1 # A state machine to turn an SQL file into list of requests
2 # (represented by an array of strings)
5 # This file is copyright 2002, 2008 Roland Mas <99.roland.mas@aist.enst.fr>,
7 # This is Free Software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License version 2, as published by the
9 # Free Software Foundation.
12 # @reqlist = @{ &parse_sql_file ("blah.sql") } ;
13 # foreach $req (@reqlist) {
14 # $sth = $dbh->prepare ($query) ;
20 # * No real bugs known, but see TODO.
21 # * Should bugs appear, please notify me (patches are of course welcome)
24 # * Make sure the output of pg_dump is interpreted the way it should.
27 use subs qw/ &parse_sql_file &sql_parser_debug / ;
29 sub sql_parser_debug ( $ ) ;
30 sub parse_sql_file ( $ ) ;
32 sub parse_sql_file ( $ ) {
34 open F, $f or die "Could not open file $f: $!\n" ;
36 # This is a state machine to parse potentially complex SQL files
37 # into individual SQL requests/statements
39 my %states = ('INIT' => 0,
50 'IN_SQL_COMMENT' => 11,
53 my ($state, $l, $par_level, $com_level, $chunk, $rest, $sql, @sql_list, $copy_table, $copy_field_list, $copy_rest, @copy_data, @copy_data_tmp, $copy_field, @doldolstack) ;
54 $l = $sql = $chunk = $rest = '';
56 # Init the state machine
58 $state = $states{INIT} ;
62 STATE_LOOP: while ($state != $states{DONE}) { # State machine main loop
63 sql_parser_debug "STATE_LOOP: state = $state" ;
64 sql_parser_debug "l=$l, sql=$sql, chunk=$chunk, rest=$rest";
65 STATE_SWITCH: { # State machine step processing
66 $state == $states{INIT} && do {
67 sql_parser_debug "State = INIT" ;
71 $l = $sql = $chunk = $rest = "" ;
73 $copy_table = $copy_field_list = $copy_rest = "" ;
74 @copy_data = @copy_data_tmp = () ;
77 $state = $states{SCAN} ;
79 } ; # End of INIT state
81 $state == $states{SCAN} && do {
82 sql_parser_debug "State = SCAN" ;
84 ( ($l eq "") or ($l =~ /^\s*$/) or ($l =~ /^\s*--/) ) && do {
87 $state = $states{DONE} ;
88 last SCAN_STATE_SWITCH ;
92 last SCAN_STATE_SWITCH ;
95 ( ($l =~ m/\s*copy\s+\"[\w_]+\"\s*(\([\w, "]+\))?\s*from\s+stdin\s*;/i)
96 or ($l =~ m/\s*copy\s+[\w_]+\s*(\([\w, "]+\))?\s*from\s+stdin\s*;/i) ) && do {
99 $state = $states{START_COPY} ;
100 last SCAN_STATE_SWITCH ;
106 $state = $states{SQL_SCAN} ;
107 last SCAN_STATE_SWITCH ;
110 } # SCAN_STATE_SWITCH
112 } ; # End of SCAN state
114 $state == $states{IN_COMMENT} && do {
115 sql_parser_debug "State = IN_COMMENT" ;
116 IN_COMMENT_STATE_SWITCH: {
117 ( ($l eq "") or ($l =~ /^\s*$/) ) && do {
120 sql_parser_debug "End of file detected during a comment." ;
121 $state = $states{ERROR} ;
122 last IN_COMMENT_STATE_SWITCH ;
126 $state = $states{IN_COMMENT} ;
127 last IN_COMMENT_STATE_SWITCH ;
130 ( ($l =~ m,\*/,) || ($l =~ m,/\*,) ) && do {
131 $l =~ s,.*?((/\*)|(\*/)),$1, ;
132 ($chunk, $rest) = ($l =~ /^(..)(.*)/) ;
136 if ($chunk eq '/*') {
142 if ($com_level == 0) {
143 $state = $states{SQL_SCAN} ;
144 last IN_COMMENT_STATE_SWITCH ;
146 $state = $states{IN_COMMENT} ;
147 last IN_COMMENT_STATE_SWITCH ;
153 sql_parser_debug "Examining $l\n" ;
155 $state = $states{ERROR} ;
156 last IN_COMMENT_STATE_SWITCH ;
160 $state = $states{IN_COMMENT} ;
161 last IN_COMMENT_STATE_SWITCH ;
164 } # IN_COMMENT_STATE_SWITCH
166 } ; # End of IN_COMMENT state
168 $state == $states{IN_SQL_COMMENT} && do {
169 sql_parser_debug "State = IN_SQL_COMMENT" ;
170 IN_SQL_COMMENT_STATE_SWITCH: {
171 ( ($rest eq "") or ($rest =~ /^\s*$/) ) && do {
174 sql_parser_debug "End of file detected during a comment." ;
175 $state = $states{ERROR} ;
176 last IN_SQL_COMMENT_STATE_SWITCH ;
180 $state = $states{IN_SQL_COMMENT} ;
181 last IN_SQL_COMMENT_STATE_SWITCH ;
184 ( ($rest =~ m,\*/,) || ($rest =~ m,/\*,) ) && do {
185 $rest =~ s,.*?((/\*)|(\*/)),$1, ;
186 ($chunk, my $rest2) = ($rest =~ /^(..)(.*)/) ;
190 if ($chunk eq '/*') {
196 if ($com_level == 0) {
197 $state = $states{IN_SQL} ;
198 last IN_SQL_COMMENT_STATE_SWITCH ;
200 $state = $states{IN_SQL_COMMENT} ;
201 last IN_SQL_COMMENT_STATE_SWITCH ;
208 sql_parser_debug "End of file detected during a comment." ;
209 $state = $states{ERROR} ;
210 last IN_SQL_COMMENT_STATE_SWITCH ;
214 $state = $states{IN_SQL_COMMENT} ;
215 last IN_SQL_COMMENT_STATE_SWITCH ;
218 } # IN_SQL_COMMENT_STATE_SWITCH
220 } ; # End of IN_SQL_COMMENT state
222 $state == $states{SQL_SCAN} && do {
223 sql_parser_debug "State = SQL_SCAN" ;
224 SQL_SCAN_STATE_SWITCH: {
225 ( ($l eq "") or ($l =~ /^\s*$/) or ($l =~ /^--/) ) && do {
228 sql_parser_debug "End of file detected during an SQL statement." ;
229 $state = $states{ERROR} ;
230 last SQL_SCAN_STATE_SWITCH ;
234 $state = $states{SQL_SCAN} ;
235 last SQL_SCAN_STATE_SWITCH ;
238 ($l =~ m,^\s*/\*,) && do {
241 $state = $states{IN_COMMENT} ;
242 last SQL_SCAN_STATE_SWITCH ;
245 ($l =~ m,^(.*?)\$([\w]*)\$,) && do {
247 push @doldolstack, $2 ;
248 sql_parser_debug "---$sql---$doldolstack[0]---" ;
249 $l =~ s,^(.*?)\$[\w]*\$,, ;
250 $state = $states{IN_DOLDOL} ;
251 last SQL_SCAN_STATE_SWITCH ;
255 ($chunk, $rest) = ($l =~ m,^([^()\';-]*)(.*),) ;
258 $state = $states{IN_SQL} ;
259 last SQL_SCAN_STATE_SWITCH ;
262 } # SQL_SCAN_STATE_SWITCH
264 } ; # End of SQL_SCAN state
266 $state == $states{IN_COMMENT} && do {
267 sql_parser_debug "State = IN_COMMENT" ;
268 IN_COMMENT_STATE_SWITCH: {
269 ( ($l eq "") or ($l =~ /^\s*$/) ) && do {
272 sql_parser_debug "End of file detected during a comment." ;
273 $state = $states{ERROR} ;
274 last IN_COMMENT_STATE_SWITCH ;
278 $state = $states{IN_COMMENT} ;
279 last IN_COMMENT_STATE_SWITCH ;
282 ( ($l !~ m,\*/,) || ($l !~ m,/\*,) ) && do {
283 $l =~ s,.*?((/\*)|(\*/)),$1, ;
284 ($chunk, $rest) = ($l =~ /^(..)(.*)/) ;
288 if ($chunk eq '/*') {
294 if ($com_level == 0) {
295 $state = $states{SQL_SCAN} ;
296 last IN_COMMENT_STATE_SWITCH ;
298 $state = $states{IN_COMMENT} ;
299 last IN_COMMENT_STATE_SWITCH ;
306 sql_parser_debug "End of file detected during a comment." ;
307 $state = $states{ERROR} ;
308 last IN_COMMENT_STATE_SWITCH ;
312 $state = $states{IN_COMMENT} ;
313 last IN_COMMENT_STATE_SWITCH ;
316 } # IN_COMMENT_STATE_SWITCH
318 } ; # End of IN_COMMENT state
320 $state == $states{IN_SQL} && do {
321 sql_parser_debug "State = IN_SQL" ;
323 IN_SQL_STATE_SWITCH: {
324 ($rest =~ m,^\s*/\*,) && do {
325 $rest =~ s,^\s*/\*,, ;
327 $state = $states{IN_SQL_COMMENT} ;
328 last IN_SQL_STATE_SWITCH ;
332 ($rest =~ /^\(/) && do {
335 $rest = substr $rest, 1 ;
338 $state = $states{SQL_SCAN} ;
339 last IN_SQL_STATE_SWITCH ;
342 ( ($rest =~ /^\)/) and ($par_level > 0) ) && do {
345 $rest = substr $rest, 1 ;
348 $state = $states{SQL_SCAN} ;
349 last IN_SQL_STATE_SWITCH ;
352 ($rest =~ /^\)/) && do {
353 sql_parser_debug "Detected ')' without any matching '('." ;
354 $state = $states{ERROR} ;
355 last IN_SQL_STATE_SWITCH ;
358 ($rest =~ /^--/) && do {
362 $state = $states{SQL_SCAN} ;
363 last IN_SQL_STATE_SWITCH ;
366 ($rest =~ /^-[^-]/) && do {
368 $rest = substr $rest, 1 ;
371 $state = $states{SQL_SCAN} ;
372 last IN_SQL_STATE_SWITCH ;
375 ( ($rest =~ /^;/) and ($par_level == 0) ) && do {
377 $rest = substr $rest, 1 ;
379 $state = $states{END_SQL} ;
380 last IN_SQL_STATE_SWITCH ;
383 ($rest =~ /^;/) && do {
384 sql_parser_debug "Detected ';' within a parenthesis." ;
385 $state = $states{ERROR} ;
386 last IN_SQL_STATE_SWITCH ;
389 ($rest eq "") && do {
393 $state = $states{SQL_SCAN} ;
394 last IN_SQL_STATE_SWITCH ;
397 ($rest =~ /^\'/) && do {
399 $rest = substr $rest, 1 ;
402 $state = $states{IN_QUOTE} ;
403 last IN_SQL_STATE_SWITCH ;
407 sql_parser_debug "Unknown event in IN_SQL state" ;
408 $state = $states{ERROR} ;
409 last IN_SQL_STATE_SWITCH ;
411 } # IN_SQL_STATE_SWITCH
413 } ; # End of IN_SQL state
415 $state == $states{END_SQL} && do {
416 sql_parser_debug "State = END_SQL" ;
417 END_SQL_STATE_SWITCH: {
418 ($sql =~ /^\s*$/) && do {
422 $state = $states{SCAN} ;
423 last END_SQL_STATE_SWITCH ;
427 push @sql_list, $sql ;
428 sql_parser_debug ("Found SQL $sql\n") ;
432 $state = $states{SCAN} ;
433 last END_SQL_STATE_SWITCH ;
436 } # END_SQL_STATE_SWITCH
438 } ; # End of END_SQL state
440 $state == $states{QUOTE_SCAN} && do {
441 sql_parser_debug "State = QUOTE_SCAN" ;
442 QUOTE_SCAN_STATE_SWITCH: {
443 ($rest eq "") && do {
447 sql_parser_debug "Detected end of file inside a quoted string." ;
448 $state = $states{ERROR} ;
449 last QUOTE_SCAN_STATE_SWITCH ;
454 last QUOTE_SCAN_STATE_SWITCH ;
458 ($chunk, $rest) = ($l =~ /^([^\\\']*)(.*)/) ;
461 $state = $states{IN_QUOTE} ;
462 last QUOTE_SCAN_STATE_SWITCH ;
465 } # QUOTE_SCAN_STATE_SWITCH
467 } ; # End of QUOTE_SCAN state
469 $state == $states{IN_QUOTE} && do {
470 sql_parser_debug "State = IN_QUOTE" ;
471 IN_QUOTE_STATE_SWITCH: {
472 ($rest =~ /^\'/) && do {
474 $rest = substr $rest, 1 ;
477 $state = $states{SQL_SCAN} ;
478 last IN_QUOTE_STATE_SWITCH ;
481 ($rest =~ /^\\\'/) && do {
483 $rest = substr $rest, 2 ;
485 last IN_QUOTE_STATE_SWITCH ;
488 ($rest =~ /^\\[^\\]/) && do {
490 $rest = substr $rest, 1 ;
492 last IN_QUOTE_STATE_SWITCH ;
495 ($rest =~ /^\\$/) && do {
497 $rest = substr $rest, 1 ;
499 last IN_QUOTE_STATE_SWITCH ;
505 $state = $states{QUOTE_SCAN} ;
506 last IN_QUOTE_STATE_SWITCH ;
509 } # IN_QUOTE_STATE_SWITCH
511 } ; # End of IN_QUOTE state
513 $state == $states{IN_DOLDOL} && do {
514 sql_parser_debug "State = IN_DOLDOL" ;
515 IN_DOLDOL_STATE_SWITCH: {
516 my $cur = $doldolstack[0] ;
518 ($l =~ m,^(.*?)\$([\w]*)\$,) && do {
521 if ($found eq $cur) {
523 if ($#doldolstack >= 0) {
524 $state = $states{IN_DOLDOL} ;
527 $rest =~ s,^(.*?)\$[\w]*\$,, ;
528 sql_parser_debug "Exiting DOLDOL for $cur (current = $sql) ($rest)" ;
529 $state = $states{SQL_SCAN} ;
532 push @doldolstack, $found ;
534 $l =~ s,^(.*?)\$[\w]*\$,, ;
535 last IN_DOLDOL_STATE_SWITCH ;
543 sql_parser_debug "Detected end of file within a dollar-quoted string." ;
544 $state = $states{ERROR} ;
545 last IN_DOLDOL_STATE_SWITCH ;
549 $state = $states{IN_DOLDOL} ;
550 last IN_DOLDOL_STATE_SWITCH ;
553 } # IN_DOLDOL_STATE_SWITCH
555 } ; # End of IN_DOLDOL state
557 $state == $states{START_COPY} && do {
558 sql_parser_debug "State = START_COPY" ;
559 START_COPY_STATE_SWITCH: {
560 ($l =~ m/\s*copy\s+\"[\w_]+\"\s*(\([\w, "]+\))?\s*from\s+stdin\s*;/i) && do {
561 ($copy_table, $copy_field_list, undef, $copy_rest) = ($l =~ /\s*copy\s+\"([\w_]+)\"\s*((\([\w, "]+\))?)\s*from\s+stdin\s*;(.*)/i) ;
562 $copy_field_list =~ s/^\s+//;
563 $copy_field_list =~ s/\s+$//;
564 $copy_field_list = ' '.$copy_field_list unless $copy_field_list eq '';
567 sql_parser_debug "Detected end of file within a COPY statement." ;
568 $state = $states{ERROR} ;
569 last START_COPY_STATE_SWITCH ;
573 $state = $states{IN_COPY} ;
574 last START_COPY_STATE_SWITCH ;
577 ($l =~ m/\s*copy\s+[\w_]+\s*(\([\w, "]+\))?\s*from\s+stdin\s*;/i) && do {
578 ($copy_table, $copy_field_list, undef, $copy_rest) = ($l =~ /\s*copy\s+([\w_]+)\s*((\([\w, "]+\))?)\s*from\s+stdin\s*;(.*)/i) ;
579 $copy_field_list =~ s/^\s+//;
580 $copy_field_list =~ s/\s+$//;
581 $copy_field_list = ' '.$copy_field_list unless $copy_field_list eq '';
584 sql_parser_debug "Detected end of file within a COPY statement." ;
585 $state = $states{ERROR} ;
586 last START_COPY_STATE_SWITCH ;
590 $state = $states{IN_COPY} ;
591 last START_COPY_STATE_SWITCH ;
595 sql_parser_debug "Unknown event in START_COPY state." ;
596 $state = $states{ERROR} ;
597 last START_COPY_STATE_SWITCH ;
600 } # START_COPY_STATE_SWITCH
602 } ; # End of START_COPY state
604 $state == $states{IN_COPY} && do {
605 sql_parser_debug "State = IN_COPY" ;
606 IN_COPY_STATE_SWITCH: {
607 ($l =~ /^\\\.$/) && do {
610 $state = $states{SCAN} ;
611 last IN_COPY_STATE_SWITCH ;
616 @copy_data_tmp = split /\t/, $l ;
617 foreach $copy_field (@copy_data_tmp) {
618 if ($copy_field eq '\N') {
619 $copy_field = 'NULL' ;
621 $copy_field =~ s/\'/\'\'/g ;
622 $copy_field = "'" . $copy_field . "'" ;
624 push @copy_data, $copy_field ;
626 $sql = "INSERT INTO \"$copy_table\"$copy_field_list VALUES (" ;
627 $sql .= join (", ", @copy_data) ;
629 push @sql_list, $sql ;
632 sql_parser_debug "Detected end of file within a COPY statement." ;
633 $state = $states{ERROR} ;
634 last IN_COPY_STATE_SWITCH ;
638 last IN_COPY_STATE_SWITCH ;
643 } ; # End of IN_COPY state
645 $state == $states{DONE} && do {
646 sql_parser_debug "State = DONE" ;
648 } ; # End of DONE state
650 $state == $states{ERROR} && do {
651 sql_parser_debug "State = ERROR" ;
652 sql_parser_debug "Reached the ERROR state. Dying." ;
653 die "State machine is buggy." ;
656 } ; # End of ERROR state
659 sql_parser_debug "State machine went in an unknown state... Redirecting to ERROR." ;
660 $state = $states{ERROR} ;
671 sub sql_parser_debug ( $ ) {
675 print STDERR "$v\n" ;