#!/usr/bin/perl

#=============================================================================
# sqlconv: map one MySQL table to another
#
#  Author: Steve Shreeve
#   Email: shreeve@uci.edu
# Website: http://s2s.org
#
#    Bugs: Everything is currently quoted, this may be bad... (ie - NULL's)
#    Todo: Allow for unquoted fields (ie - NULL or now())
#
# Version: 1.2 (17 Sep 1999)
#
#   - fixed a bug with handling \' sequences, had to convert all these to ''
#
# Version: 1.1 (27 May 1999)
#   - fixed a bug for lines containing the string ');' 
#
# Version: 1.0 (08 May 1999)
#   - initial release
#
#=============================================================================
# check usage
unless ($#ARGV == 2) {
  my $prog = $0; $prog =~ s|^(.*/)||;
  die <<DONE;

Usage: $prog orig_table dest_table 'format'

Where format is a comma separated list using the following tokens:

  n    => pick field n from the input table
  n-m  => pick fields n through m
  n-   => pick alls fields from n to the end
  -m   => pick alls fields from the beginning to m
  *    => ''
  n*   => '','','',... (n of them)
  0    => 0
  else => chop any leading/trailing quotes and push the rest

For example,

  $prog users new_users "4,-3,0,5-8,*,11-9,2*,'1999-05-08',12-" < old.sql

This would read the MySQL dump of the old 'users' table and produce valid
MySQL insert statements to populate a new table called 'new_users'. The fields
for the new_users table are built from the fields of the old users table using
this format string. A play-by-play of the above example is:

  copy field 4, copy fields 1-3, produce a '0', copy fields 5 through 8,
  produce an empty field, copy fields 11-9 (reverse), produce 2 empty fields,
  insert a date, and then copy fields 12 through the end of the original
table. 

DONE
}

# read command-line parameters
($tbl1,$tbl2,$flds) = @ARGV[0 .. 2];
@flds = split(/\s*,\s*/,$flds);
$tabs = $tbl1 eq '-' ? 1 : 0;

# parse SQL file
while ($text = <STDIN>) {
  if ($tabs) {
    chomp($text);
    @orig = split(/\t/,$text);
    $size = $#orig;
  } else {
    next unless $text =~ s|^insert into (\w+) values \(||i;
    next unless $tbl1 eq $1;
    $text =~ s|\);\s*$||;
    *orig = &parse_sql($text);
    $size = $#orig;
  }
  @dest = ();
  for $item (@flds) {
    if ($item eq '*') {
      push(@dest,'');
    } elsif ($item =~ m|#(\w+)\(([^\)]+)\)|) {
      if ($1 eq 'mdy2ymd') {
        if (my $valu = $orig[$2 - 1]) {
	  $valu =~ s|\D||g;
	  $valu =~ m|^(\d{4})(\d{4})(\d{6})?$| or die "Bad date... '$valu'";
	  push(@dest,"$2$1$3");
	} else {
	  push(@dest,'');
	}
      } else {
        die "Unknown function '$1'";
      }
    } elsif (($coun) = ($item =~ m|^(\d+)\*|)) {
      push(@dest,'') while ($coun--);
    } elsif ($item eq '0') {
      push(@dest,0);
    } elsif ($item =~ m|^\d+$|) {
      die "Index '$item' out of bounds\n" if ($item-1) > $size;
      push(@dest,$orig[$item - 1]);
    } elsif ($item =~ m|^(\d*)-(\d*)$|) {
      my($from,$to) = ($1,$2);
      die "Improper [from-to] in: '$item'\n" unless $from || $to;
      $from ||= 1;
      $to   ||= $size + 1;
      die "Index '$from' out of bounds\n" if ($from-1) > $size;
      die "Index '$to' out of bounds\n"   if ($to-1)   > $size;
      if ($to < $from) {
        for ($to .. $from) { push(@dest,$orig[$from + $to - $_ - 1]) }
      } else {
        for ($from .. $to) { push(@dest,$orig[$_ - 1]) }
      }
    } else {
      $item =~ s|^'||; # chop any leading quote
      $item =~ s|'$||; # chop any trailing quote
      push(@dest,$item);
    }
  }

  # produce the new MySQL line (here's where we can fix the quoting probs)
  print "insert into $tbl2 values ('",join("','",@dest),"');\n";
}
exit;

# parse an SQL statement into fields
sub parse_sql {
  my ($text,$count) = @_;
  my ($i,$field);
  my (@raw,@fields);

  # parse a CSV line into individual fields
  $text =~ s|\\'|''|g; # correctly escape single quotes
  @raw = split(/,/, $text); # split text up by commas
  for ($i = 0; $i <= $#raw; $i++) { # for each comma chunk
    $field = $raw[$i]; # begin a new field
    if ($field =~ s|^\s*'|| && $field !~ s|'\s*$||) { # if quoted only at the start
      for ($i++; $i <= $#raw; $i++) { # then append chunks
        $field .= ',' . $raw[$i]; # but, first restore the comma
        if ($field =~ m|('+)$|) { # now, grab any trailing quotes
          if (length($1) & 0x01) { # if it's an odd number
            $field =~ s|'$||; # we've reached the field end, so chop it
            last; # and finish the loop
          } # if it's even numbered, then these are escaped quotes
        } # so, keep going
      }
    }
    $field =~ s|^\s+||; # kill leading whitespace
    $field =~ s|\s+$||; # kill trailing whitespace
    push(@fields,$field); # and add this field to our list
  } # until we've finished the list

  # return either the fields or an empty array (signalling an error)
  if ($count && $count != ($#fields + 1)) { # field count is wrong
    return []; # return a reference to an empty array
  } else {
    return \@fields; # return a reference to the parsed fields
  }
}
