#!/dfs/prod/ipn/bin/perl # We need this version of Perl, not AIX's ancient /usr/bin/perl, because I # use exists on an array (ie, if (exists $CoC_Start[0]) { ...) # and want to avoid getting this error msg, # exists operator argument is not a HASH element at scp.old.US line ... $debug=1; # If debug, will be a lot more verbose & will not scp. $debug=0; # This program will read one of the US back-file DVD's, which are filled with # thousands of multi-page tif files for older US images. As of 8-26-2003, we # have a total 423 of these DVDs to process, usp001 through usp423, of which # 229 have already been done (well, partially done) by Danny in April, 2001. # The 229 that were partially done, were usp001-usp249 except for 212, 215, # 222-224, 229-230, 235, 242-245, and 248. # # Input is a DVD name of one of these DVDs, e.g. usp001, and optionally, the # number of children processes to spawn and/or whether or not to force the # scp to Southbury should an image NOT be in the Cumulative Index file. # # If the DVD name is one of the ones Danny had already partially processed, # then we look for a "ToDo" file (eg, ~/ToDo/usp001) that should contain the # output of my ~rickjas/old_US_Images/check.usp.pl program in Southbury. # # Unlike previous image-handling Perl programs, we will actually scp the file # directly to Southbury's /dfs/images/US tree, writing the file with its CD label, # e.g. /dfs/images/US/30/05/US00000530.usp001.tif. We even go directly to the DFS # server that is actually hosting the destination DFS fileset. We are able to # do this without passwords due to # A) I've set up my rickjas userid on all 8 DFS servers (dephds005-012) to be # able to ssh/scp without passwords. See my ~jasper/aixnotes/ssh.install.notes # for details on how this was done. # B) I set up this userid (eg, jasper's AFS id in San Jose) so that I can # get in. This entails setting 3 files in my/your ~/.ssh2 directory. # C) All the /dfs/images/US/nn/nn directories have the proper DFS ACLs, # most importantly, {any_other rwx-i-}. Note in particular that the delete # permission is missing. This is intentional and prevents an unauthenticated # web server to erase image files via some cgi back door. We can create # files, but not erase or modify them. # D) I hardcode in this script, the DFS images.US.nn fileset to DFS server mapping. # # Another improvement is the spawning of umpteen children to do all the scp-ing. # Once we determine the number of images we need to copy, we spawn up to 30 child # processes and do that many scp's at a time, splitting the work among the children. # # The other results of this program are five files, written to the # /dfs/cdrom/ directory. The interesting thing about these output # files, is they are all shared by the children processes and thus require Perl's # flock to synchronize writing to each. # 1) done is a list of files that were scp'd to Southbury. This will # be used to do post-processing in Southbury, namely to create the link, # e.g. /dfs/images/US/30/05/US00000530.tif -> US00000530.usp001.tif # 2) InsertDB2, which is a DB2 script of a bunch of insert commands used to # insert imag table rows. This will be run on all the patent databases, # dephds059, dephds061, trantor and rhino with this command, # db2 -f /dfs/cdrom/$cdlabel/InsertDB2 # Note that we should be safe to do imports because I've synchronized all # the imag tables with what is in DFS for each already partially-copied DVD. # We ARE guaranteed that anything we scp over, will not already be in imag # with this uspnnn cdlabel. It might be there from another CD/DVD, but in # those cases, we'll be adding a second imag row (and NOT creating the link). # 3) excluded.files, which is a list of images not scp'd to Southbury due # to being too small (< 1,000 bytes) or not in the Cumulative Index file # found on the usp423 DVD. # 4) non-fatal.errors. eg no metadata. # 5) Errors, which is a list of really bizarre errors that I don't expect # to ever happen. # use Errno qw(EAGAIN); # Gets you the EAGAIN symbol, used in fork loop. use POSIX ":sys_wait_h"; # Gets you the WNOHANG symbol, used in waitpid call. use Fcntl qw(:DEFAULT :flock); # Gets you the LOCK_* constants used in flock call %DFS_Server=( "00" => "05", "01" => "05", "02" => "05", "03" => "05", "04" => "05", "05" => "08", "06" => "05", "07" => "05", "08" => "05", "09" => "05", "10" => "05", "11" => "11", "12" => "05", "13" => "05", "14" => "05", "15" => "05", "16" => "05", "17" => "11", "18" => "06", "19" => "06", "20" => "06", "21" => "06", "22" => "06", "23" => "11", "24" => "06", "25" => "06", "26" => "06", "27" => "06", "28" => "06", "29" => "11", "30" => "06", "31" => "06", "32" => "06", "33" => "06", "34" => "06", "35" => "11", "36" => "06", "37" => "06", "38" => "06", "39" => "06", "40" => "06", "41" => "11", "42" => "06", "43" => "06", "44" => "06", "45" => "06", "46" => "06", "47" => "11", "48" => "07", "49" => "07", "50" => "07", "51" => "07", "52" => "07", "53" => "12", "54" => "07", "55" => "07", "56" => "07", "57" => "07", "58" => "07", "59" => "09", "60" => "07", "61" => "07", "62" => "07", "63" => "07", "64" => "07", "65" => "08", "66" => "07", "67" => "07", "68" => "07", "69" => "07", "70" => "07", "71" => "08", "72" => "07", "73" => "07", "74" => "07", "75" => "07", "76" => "07", "77" => "08", "78" => "08", "79" => "08", "80" => "08", "81" => "08", "82" => "08", "83" => "09", "84" => "08", "85" => "08", "86" => "08", "87" => "08", "88" => "08", "89" => "09", "90" => "08", "91" => "08", "92" => "08", "93" => "08", "94" => "08", "95" => "09", "96" => "08", "97" => "08", "98" => "08", "99" => "09"); if ($ARGV[0]) { # Was a parameter given to us? $cdlabel = lc $ARGV[0]; if ($cdlabel =~ /^\d*$/) { # Accept just the numeric portion, eg 1 $cdlabel=sprintf("usp%03d",$cdlabel); } } else { die "$0 needs the label name of an back-file US DVD, e.g. usp001.\n"; } $HOME=$ENV{"HOME"}; $number_of_children = 30; $heartbeat=5; # Have each child give a heartbeat every 5 image files. $Force_scp=0; shift @ARGV; while (@ARGV) { if ($ARGV[0] =~ /^\d*$/) { # If numeric, must be number of children to spawn. $number_of_children = $ARGV[0]; } elsif ($ARGV[0] =~ /^force$/i) { $Force_scp=1; # Force-copy even if missing from Cumulative Index file. } else { die "Invalid argument given.\n"; } shift @ARGV; } # Determine where our input directory is. Use in order, the $CDROMROOT # environment variable, our current directory, or /cdrom (the normal case). # always set. if ($ENV{"CDROMROOT"}) {$CDROMROOT=$ENV{"CDROMROOT"}; } elsif (-d "$ENV{'PWD'}/$cdlabel") {$CDROMROOT=$ENV{'PWD'}} elsif (-d "/cdrom/$cdlabel") {$CDROMROOT="/cdrom"} # Normal case. else {$CDROMROOT=$ENV{'PWD'}} if (! -d "$CDROMROOT/$cdlabel") { die "Could not find $CDROMROOT/$cdlabel input directory\n"} $outdir = "/dfs/cdrom/$cdlabel"; # Establish our output directory. if (! -d "$outdir") { print STDOUT "Creating output directory at $outdir ...\n"; mkdir "$outdir", 0777; # I've pre-split off each DVD from the master Cumulative Index file on usp423. `cp -p /afs/d/u/jasper/Cum.Idx/$cdlabel $outdir/Cum.Idx`; } $Cum_Idx_File="$outdir/Cum.Idx"; if (! -r "$Cum_Idx_File") { if (-r "/afs/d/u/jasper/Cum.Idx/$cdlabel") { print "Copying $cdlabel portion of Cumulative Index file from ~jasper/Cum.Idx directory.\n"; `/usr/bin/cp -p /afs/d/u/jasper/Cum.Idx/$cdlabel $Cum_Idx_File`; if (! -r "$Cum_Idx_File") {die "Could not find Cumulative Index file ($Cum_Idx_File)."} } } # Programs $any2any="/dfs/prod/ipn/bin/any2any"; if (! -r "$any2any") { die "Could not find or execute $any2any program."} $anyinfo="/dfs/prod/ipn/bin/anyinfo"; if (! -r "$anyinfo") { die "Could not find or execute $anyinfo program."} $tiffcp="/local/bin/tiffcp"; if (! -r "$anyinfo") { die "Could not find or execute $tiffcp program."} $tiffinfo="/local/bin/tiffinfo"; if (! -r "$anyinfo") { die "Could not find or execute $tiffinfo program."} $SBY_userid="rickjas"; $scp="/local/bin/scp"; if (! -r "$scp") { die "Could not find or execute $scp program."} # Since I get fancy in this program and scp directly to Southbury, insure this userid's # ~/.ssh2 directory is set up with the proper keys. We need 3 things done, # 1) The string 'IdKey id_dsa_1024_a.009' in the ~/.ssh2/identifiation file. # 2) cp -p /afs/d/u/jasper/.ssh/id_dsa_1024_a.009 ~/.ssh2 # 3) cp -p /afs/d/u/jasper/.ssh/id_dsa_1024_a.009.pub ~/.ssh2 if (! `/usr/bin/grep id_dsa_1024_a.009 $HOME/.ssh2/identification 2>/dev/null`) {`echo 'IdKey id_dsa_1024_a.009' >> $HOME/.ssh2/identification`} if (! -r "$HOME/.ssh2/id_dsa_1024_a.009") {`/usr/bin/cp -p /afs/d/u/jasper/.ssh2/id_dsa_1024_a.009 $HOME/.ssh2`} if (! -r "$HOME/.ssh2/id_dsa_1024_a.009.pub") {`/usr/bin/cp -p /afs/d/u/jasper/.ssh2/id_dsa_1024_a.009.pub $HOME/.ssh2`} # If this is a DVD that Danny partially copied already, then work off of the # /dfs/cdrom/$cdlabel/ToDo file, else we'll scan the whole bloody DVD and # build up our own list. if ($cdlabel le "usp249" && ! grep {m/$cdlabel/} qw(usp212 usp215 usp222 usp223 usp224 usp229 usp230 usp235 usp242 usp243 usp244 usp245 usp248) ) { if (! -r "$outdir/ToDo") { `$scp $SBY_userid\@dephds008:$outdir/ToDo $outdir/ToDo`; if (! -r "$outdir/ToDo") {die "Could not find $outdir/ToDo file. Maybe you need to scp from Southbury, e.g.\n scp -p $SBY_userid\@dephds008:$outdir/ToDo $outdir\n"} } print STDOUT "Will scp only those files in our ToDo list ...\n" if ($debug); @file_list = `/usr/bin/cat $outdir/ToDo`; } else { print STDOUT "Will scp over the whole bloody DVD ...\n" if ($debug); print STDOUT "Scanning $CDROMROOT/$cdlabel directory for input files at " . timestamp() . ".\nThis takes a few minutes ...\n"; @file_list = `/usr/bin/find $CDROMROOT/$cdlabel -type f -name '*.tif`; # For debugging a shorter list, uncomment the find statement above in preference for this one. # @file_list = `/usr/bin/find $CDROMROOT/$cdlabel/$cdlabel/00/000 -type f -name '*.tif`; } # At this point, the file_list array holds all the image files we need to do. # It might have come from our ToDo file, or from the find command if we're # doing "the whole bloody DVD". chomp(@file_list); # In list mode, chomp works on each element. $total_number_TODO = scalar(@file_list); if ($total_number_TODO == 0) { die "No image files found at $CDROMROOT/$cdlabel\n" } if ($total_number_TODO < $number_of_children) {$number_of_children=$total_number_TODO} if (-s "$outdir/Done") { $Done_Count=`/usr/bin/wc -l $outdir/Done 2>/dev/null`; chomp $Done_Count; if ($Done_Count == $total_number_TODO) { print "All $total_number_TODO images are already done.\n"; exit; } } else { $Done_Count=0; } print STDOUT "Reading the $cdlabel lines from the Cumulative Index ($Cum_Idx_File) ...\n"; @lines = `/usr/bin/awk '{print \$1}' $Cum_Idx_File`; chomp(@lines); # In list mode, chomp works on each element. $Cum_Idx_count=0; # This array is shared between all children. foreach $this_line (@lines) { # Lines originally are like so patent CDlabel Pages # 0000003 USP001 4 # or D001504 USP001 2 # or RD25414 USP001 2 # or RE00021 USP001 4 # or X011280 USP001 1 # but since I return just the first token, all I've got are the patent numbers. # Now encode the patent number to Delphion's standard patn for our hash's key. if ($this_line =~ /^\d{7}$/) { $patn = "US0${this_line}__"} elsif ($this_line =~ /^([DHTX])(\d{5,6})$/) { $patn = "US${1}0${2}__"} elsif ($this_line =~ /^(PP|AI|RE|RD|RX)(\d{5})$/) { $patn = "US${1}0${2}__"} elsif ($this_line =~ /^([X0D])(\d{5})H$/) { $patn = "US${1}00${2}12"} elsif ($this_line =~ /^([X0D])(\d{5})D$/) { $patn = "US${1}00${2}14"} elsif ($this_line =~ /^([X0D])(\d{5})L$/) { $patn = "US${1}00${2}34"} elsif ($this_line =~ /^([X0D])(\d{5})N$/) { $patn = "US${1}00${2}78"} else {die "Did not match $this_line from Cumulative Index file.\n"} $In_Cum_Idx{$patn} = 1; # eg, $In_Cum_Idx{US05224775__}=1 $Cum_Idx_count++; } if (-s "$outdir/Done") { print STDOUT "Reading the Done file ($outdir/Done) ...\n"; @lines = `/usr/bin/awk '{print \$1}' $outdir/Done`; chomp(@lines); # In list mode, chomp works on each element. foreach $patn (@lines) { $Done{$patn}=1; # Lines contain just the patn } } # Before spawning all our children, open up our 5 output files so that we # all can share the same file descriptor. open(DONE,">>$outdir/Done") || die "Could not open $outdir/Done"; select(DONE); $|=1; if (-s "$outdir/InsertDB2") { open(DB2,">>$outdir/InsertDB2") || die "Could not open $outdir/InsertDB2"; } else { open(DB2,">$outdir/InsertDB2") || die "Could not open $outdir/InsertDB2"; print DB2 "-- db2 -f /dfs/cdrom/$cdlabel/InsertDB2 | egrep -v '^\$|completed successfully'\n"; } select(DB2); $|=1; open(ERRORS,">>$outdir/Errors") || die "Could not open $outdir/Errors"; select(ERRORS); $|=1; open(NONFATAL,">>$outdir/Non-Fatal_Errors") || die "Could not open $outdir/Non-Fatal_Errors"; select(NONFATAL); $|=1; open(EXCLUDED,">>$outdir/Excluded_Files") || die "Could not open $outdir/Excluded_Files"; select(EXCLUDED); $|=1; select(STDOUT); $|=1; print STDOUT "Starting to process $total_number_TODO image files among $number_of_children processes " . timestamp() . " ...\n"; for (1 .. $number_of_children) { # print STDOUT "Spawning child #$_ ...\n"; FORK: { if ($pid = fork) { # If the fork was successful, it returns to the parent, a non-zero # number (true), which is the child's PID. Save it. # print STDOUT "Parent spawned child #$_ as PID $pid ...\n"; $PIDs{$pid}=$_; # The PIDs array has key=child's PID & value=child's number (1-n) } else { # Else we're in the child process (most likely) or the if (! defined $pid) { # fork failed. If $pid is not defined, then the fork failed. print STDERR "fork error spawning Child #$_. Will retry in 2 seconds ...\n"; sleep 2; redo FORK; } ###################################################### # # # CC H H III L DDD RRR EEEE N N # # C C H H I L D D R R E NN N # # C HHHH I L D D RRR EEE N N N # # C C H H I L D D R R E N NN # # CC H H III LLLL DDD R R EEEE N N # # # ###################################################### # If we get here, then we're inside the just-spawned child process, # who share the already-opened file descriptors for our 5 output # files, and have their own copy of all other variables. $child_number=$_; # Ranges from 1 to $number_of_children # Determine the beginning and ending line number for the images this # child process should handle. We carve the list of files to do, # into $number_of_children blocks, and each child does their block. # If not evenly divisable (the normal case), then all the ealier # children do one more image than all the later children. # # For example, if 10 images to do among 4 children, # $number_todo_for_each_child will be 2, but # child 1 will do 3, beginning will be 0 & end will be 2. # child 2 will do 3, beginning will be 3 & end will be 5. # child 3 will do 2, beginning will be 6 & end will be 7. # and child 4 will do 2, beginning will be 8 & end will be 9. # This is the minimum number each child will do and might be zero. # Some number of the first children will do one more each. $number_todo_for_each_child=int($total_number_TODO / $number_of_children); # This will get adjusted later. $beginning=$number_todo_for_each_child * ($child_number-1); # This is the number of first children that will do one more. # This might be zero if it's evenly divisable, but that's ok. $number_of_first_children = $total_number_TODO % $number_of_children; if ($child_number <= $number_of_first_children) { # One of the first children? $beginning += $child_number - 1; $end=$beginning + $number_todo_for_each_child; } else { # One of the later children. $beginning += $number_of_first_children; $end=$beginning + $number_todo_for_each_child - 1; } $number_for_me_TODO=$end-$beginning+1; # print STDOUT "Child #$child_number will handle $beginning to $end inclusive ($number_for_me_TODO total).\n" if ($debug); $file_num=0; # This child's file counter for ($index=$beginning; $index <= $end; $index++) { $this_file=$file_list[$index]; $file_num++; # print STDOUT "Child #$child_number file #$file_num out of $number_for_me_TODO is $this_file\n" if ($debug); Handle_Image_File(); # Handle this one image file. if ($file_num%$heartbeat == 0) { print STDOUT "Child #$child_number working on file #$file_num out of $number_for_me_TODO = $this_file at " . timestamp() . " ...\n"; } } # print STDOUT "After $file_num files, child #$child_number (PID $$) is ending at " . timestamp() . ".\n" if ($debug); exit; # That is, this one child is exiting. } } } ########################################### # # # PPP A RRR EEEE N N TTTTT # # P P A A R R E NN N T # # PPP AAAAA RRR EEE N N N T # # P A A R R E N NN T # # P A A R R EEEE N N T # # # ########################################### # At this point, we're back in the parent after spawning all our # children, who are likely running at this point. All that's left # for the parent to do, is reap the kiddies. # print STDOUT "Rick, the Parent's PID list is\n"; # foreach $PID (keys(%PIDs)) { print STDOUT "\$PIDs\{$PID} is Child # $PIDs{$PID}\n" } do { $kidpid = waitpid(-1,&WNOHANG); # waitpid returns 0 if the child is still running # -1 if the child is no longer around (i.e. it's been reaped) # the $pid the first time we waitpid after the child dies. if ($kidpid == 0) { # There's at least one child still running. sleep 5; # Wait a bit before checking on kids again. # print STDOUT "$$: At ", timestamp(), ", we're waiting on \$PIDs\{$PID}=>"; # foreach $PID (keys(%PIDs)) { # print STDOUT "$PID=$PIDs{$PID} "; # } # print STDOUT "<=\n"; } else { # Hey, status has changed for some child! Either one quit/ended, # print STDOUT "In reap loop at ", timestamp(), ", child #$PIDs{$kidpid} (PID=$kidpid) exited with status= $?.\n"; delete $PIDs{$kidpid} unless ($kidpid == -1); # or there are no more children. } } until $kidpid == -1; # At this point, all the children have ended. close DONE; close DB2; close ERRORS; close NONFATAL; close EXCLUDED; $Done_Count=`/usr/bin/wc -l $outdir/Done`; chomp $Done_Count; if ($Done_Count == $total_number_TODO) { print "It appears that we're finished with all $total_number_TODO images from $cdlabel,"; print "so I'll copy the Done & InsertDB2 files over to Southbury.\n"; `$scp -p $outdir/Done $SBY_userid\@dephds009:$outdir/Done`; `$scp -p $outdir/InsertDB2 $SBY_userid\@dephds009:$outdir/InsertDB2`; } exit; # This is the parent ending. ##################################################################################### # # # Return the current timestamp in yyyy/mm/dd hh:mm:ss format. # # # ##################################################################################### sub timestamp() { my ($sec,$min,$hour,$mday,$mon,$year,undef,undef,undef)=localtime(); # # localtime() returns sec, min, hour, mday as you'd expect, but # mon = 0-11, where January = 0 and December = 11. # and year = Number of years since 1900 $year=1900+$year; # Add 1900 back to year. return sprintf("$year/%02d/%02d %02d:%02d:%02d",($mon+1),$mday,$hour,$min,$sec); } # End of timestamp subroutine. ###################################################### # # # CC H H III L DDD RRR EEEE N N # # C C H H I L D D R R E NN N # # C HHHH I L D D RRR EEE N N N # # C C H H I L D D R R E N NN # # CC H H III LLLL DDD R R EEEE N N # # # ###################################################### # Do what we need to do for this one image file, $this_file. # $this_file is for example, /cdrom/usp001/usp001/00/000/00000001.tif # or /cdrom/usp001/usp001/rx/000/rx000105.tif # # Here we process that image with the following 5 steps; # 1) See if we want this image copied, that is, it's in the # Cumulative Index file and it's not too small and it # hasn't already been copied. # 2) Count the number of pages in the file. # 3) Extract tagdata & metadata if possible. # 3A: If the metadata indicates this is a "Consolidated" image, # i.e. it has Certificate of Corrections and/or Reexaminations # tacked on the end, then split off all the pieces, scp-ing # each piece to Southbury and adding the appropriate rows into # our InsertDB2 file. # 3B: Else this isn't a "Consolidated" image (the normal case). # Check for tif errors, rewriting the image with tiffcp if need be. # 4) scp this image (or original piece if split), to Southbury. # 5) Write an InsertDB2 line for this image (or original piece if split). # 6) If all's well, then write a line into our DONE file. sub Handle_Image_File() { # Parse out the patent number, in our examples, 00000001 or RX000105. if ($this_file !~ /\/(....(..)(..))\.tif$/) { print STDERR "Unknown filename ($this_file) ignored by child #$child_number after $file_num files.\n"; flock(ERRORS,LOCK_EX) or die "Can't lock Errors file at 1"; seek(ERRORS,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print ERRORS "Unknown filename ($this_file) ignored by child #$child_number after $file_num files.\n"; flock(ERRORS,LOCK_UN); return; } $Image_fn=$1; # Just the file name without the trailing .tif (eg 00000001) $dir1=$2; # Used for /dfs/images/US directory names $dir2=$3; # Used also for indexing into DFS_Server associative array. # Convert DVD filename, to Delphion-standard patn. Mostly this is to convert # the rare fractional patents (all on usp422). if ($Image_fn =~ /^[A-Z]{0,2}\d{6,8}$/) { $patn = "US${Image_fn}__"; } elsif ($Image_fn =~ /^([X0D])(\d{6})H$/ ) { $patn = "US${1}0${2}12"; } elsif ($Image_fn =~ /^([X0D])(\d{6})D$/ ) { $patn = "US${1}0${2}14"; } elsif ($Image_fn =~ /^([X0D])(\d{6})L$/ ) { $patn = "US${1}0${2}34"; } elsif ($Image_fn =~ /^([X0D])(\d{6})N$/ ) { $patn = "US${1}0${2}78"; } else {die "Could not make sense of $Image_fn file name.\n"} # Step 1) Do we want this image in Southbury? There are two reasons why # an image on DVD, should not get transferred to SBY. # - It's not in the final Cumulative Index file on the last DVD # (usp423). Each DVD had a Cumulative Index file, but the # very last one on usp423, is the master. # - Or the image is too small. There are many images on DVD # that are about 863 bytes large (give or take) that simply # say "WITHDRAWN". Sometimes (most of the times? I don't know, # I didn't check), the image is rescanned on a later DVD # (e.g. USRX00002 on usp001 says "WITHDRAWN", but is ok on usp301). # Other times, it's never rescanned (e.g. US00000100 on USP001 or # US002163300 on USP422). Oh, well. # Honor the "force" flag if it was specified. $size = (-s $this_file); if ((! $In_Cum_Idx{$patn} && ! $Force_scp) || $size < 1000) { # We definitely don't want this image over in Southbury. if ((! $In_Cum_Idx{$patn}) && ($size < 1000)) { $reason="not in the index file & too small."; } elsif (! $In_Cum_Idx{$patn}) { $reason="not in the index file ($size bytes)."; } else { $reason="too small."; } print STDOUT "$this_file excluded because it's $reason\n"; flock(EXCLUDED,LOCK_EX) or die "Can't lock EXCLUDED file at 1"; seek(EXCLUDED,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print EXCLUDED "$this_file excluded because it's $reason\n"; flock(EXCLUDED,LOCK_UN); return; } if ($Done{$patn}) { # If we've previously scp'd this patent to Southbury, return; # then great. Just return quietly. } # Ok, we're definitely going to process this image. # Reset all temporary variables. $bad_file = $abs = $desc = $drawing = $claim = $biblio = ""; $image_pages = $db2_set_clause = $temp_file_list = ""; # $Image_Dir="/dfs/images/US/$dir2/$dir1"; $destination="$Image_Dir/$patn.$cdlabel.tif"; # print STDOUT "Rick, I got patn=>$patn< and Image_Dir=>$Image_Dir< and\ndestination=>$destination< from this_file=>$this_file<\n" if ($debug); # Step 2) Count how many pages are in this image. $pages=Count_Pages($this_file); if (! $pages) { flock(ERRORS,LOCK_EX) or die "Can't lock Errors file at 2"; seek(ERRORS,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print ERRORS "Bizarre error counting pages of $this_file.\n"; flock(ERRORS,LOCK_UN); die "Bizarre error counting pages of $this_file.\n"; # I don't ever expect this. } # Step 3) Try to extract tagdata & metadata so we can split off the pieces. if (There_Is_Tagdata($this_file)) { # If any tag data was found, then the There_Is_Tagdata subroutine has already set # up many variables for us. In DB2 column order, we have # abs, desc, drawing, claim, and biblio. # And if there was also metadata present, we also have $numCoCs and $numReexams, # with their associated Start/End numbers, # ($CoC_Start[$i], $CoC_End[$i], $Reexam_Start[$i] and $Reexam_End[$i]). # print STDOUT "Page start data is abs=$abs, desc=$desc, drawing=$drawing, claim=$claim, biblio=$biblio.\n" if ($debug); if ($numCoCs || $numReexams) { # Step 3A) If there are any CoC's or Reexams, $pages=Split_Off_Pieces(); # then break out the CoC's & Reexams, scp-ing and # adding rows to DB2 as we go. Returns number of # pages in the now-split original. # Warning! The Split_Off_Pieces routine also changes $this_file to point # to the newly-created, split-off original image in /dfs/cdrom/$cdlabel. } else { # Step 3B) There are no CoC or Reexams. Just check for tif errors. # The reason we don't have to check for tif errors above, is because # the Split_Off_Pieces routine already rewrites the image file, thus # correcting any errors. # If there are errors, the tiffcp program will return error messages (thus this "if" # statement will be true), or run quietly (thus false) if there were no errors. if (`$tiffcp $this_file /dev/null 2>&1`) { # Ooops, tiffcp detected an errror. Use tiffcp to really rewrite the # image file and change $this_file to the newly-rewritten image file. $bad_file = $this_file; # Note this change! From here on, $this_file no longer points to the DVD # image file. It now is pointing to the fixed image in /dfs/cdrom/$cdlabel. $this_file="$outdir/$patn.tif"; `$tiffcp $bad_file $this_file 2>/dev/null`; if ($?) {die "Couldn't rewrite $bad_file to $this_file with tiffcp."} print STDOUT "Rewrote $bad_file to fix tif errors. Continuing ...\n"; flock(NONFATAL,LOCK_EX) or die "Can't lock NONFATAL file at 1"; seek(NONFATAL,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print NONFATAL "Rewrote $bad_file to fix tif errors.\n"; flock(NONFATAL,LOCK_UN); $temp_file_list="$this_file"; # Erase this file when done. } } } # End of Step 3) "Is There Any Tagdata?" # Step 4) scp $this_image to Southbury. Note at this point, $this_image could be # - The original DVD image (normal case), # - or the newly-created, split-off original image in /dfs/cdrom/$cdlabel, # - or the fixed image in /dfs/cdrom/$cdlabel. $target="$SBY_userid\@dephds0" . $DFS_Server{$dir2}; print STDOUT "$scp-ing $this_file to $target:$destination\n" if ($debug); $scp_string_output = `$scp -p $this_file $target:$destination 2>&1`; $status = $?; if ($status != 0) { print STDERR "scp error: $this_file to $target:$destination ($size bytes).\n"; flock(ERRORS,LOCK_EX) or die "Can't lock Errors file at 3"; seek(ERRORS,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print ERRORS "scp error: $this_file to $target:$destination ($size bytes)\n"; flock(ERRORS,LOCK_UN); return; # Abort from our Handle_Image_File routine. We did not # successfully handle this image file. } # print STDOUT "Child #$child_number copied $this_file to $destination ok.\n" if ($debug); # Step 5) Write an insert line for this image (or original piece if split). # Note that we're assured that this image will not be in DB2 at all. $column_names="patn,image_pages,cdlabel,datasrc,namesrc"; $values="'$patn',$pages,'$cdlabel','USG','$cdlabel'"; if ($abs) { $column_names .= ",abs"; $values .= ",$abs"; } if ($desc) { $column_names .= ",desc"; $values .= ",$desc"; } if ($drawing) { $column_names .= ",drawing"; $values .= ",$drawing"; } if ($claim) { $column_names .= ",claim"; $values .= ",$claim"; } if ($biblio) { $column_names .= ",biblio"; $values .= ",$biblio"; } # print STDOUT "inserting into imag ($column_names) values($values)\n" if ($debug); flock(DB2,LOCK_EX) or die "Can't lock DB2 file at 1"; seek(DB2,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print DB2 "insert into imag ($column_names) values($values)\n"; flock(DB2,LOCK_UN); # Step 6) If all's well, then write a line into our DONE file. flock(DONE,LOCK_EX) or die "Can't lock DONE file at 1"; seek(DONE,0,2); # In case someone appended while we were waiting. (Thanks, Tom) printf DONE "$patn $size $destination\n"; flock(DONE,LOCK_UN); # Erase any temporary files we may have created. if ($temp_file_list) {unlink $temp_file_list} } # Search this image file for a "Software" tif header tag (decimal 305) and if there and in # the right format, parse out the tag data. If we're lucky, there will also be metadata # at the end of the image file. # # We return 0 if we don't find any tag data or metadata, else 1 (we did). sub There_Is_Tagdata { my $fn=$_[0]; $numCoCs=0; $numReexams=0; # print STDOUT "Checking $fn for tag data ...\n"; $tagdata=`$tiffinfo $fn 2>/dev/null | /usr/bin/grep Software 2>/dev/null | /usr/bin/head -1 | /usr/bin/cut -f2 -d'\"'`; # Typically, the tag data is something like [6]0;3;2;6;6;META-535202;002 # where [nn] = Total number of pages # n; = Bibliographic data Page Start # n; = Abstracts data Page Start # n; = Drawing data Page Start # n; = Description data Page Start # n; = Claim data Page Start # META-nnnnn; = Metadata offset # print STDOUT "Preliminary tagdata =>$tagdata<\n" if ($debug); if ($tagdata =~ /\[(\d*)\](\d*);(\d*);(\d*);(\d*);(\d*);META-(\d*)\D/) { $numPages=$1; $biblio=$2; $abs=$3; $drawing=$4; $desc=$5; $claim=$6; $metadata_Offset=$7; # print STDOUT "I've got $numPages $biblio $abs $drawing $desc $claim $metadata_Offset from $tagdata\n" if ($debug); # The metadata starts at that metadata_Offset, which is at the end of the image file. $file_size = -s $fn; $tail_parm = $file_size - $metadata_Offset +1; if ($tail_parm < 300) { $metadata=`/usr/bin/tail -c $tail_parm $fn`; # When there's no Certificate of Corrections or Reexaminations, the metadata usually looks like # D0000002,US,S1,18430224,2,Y,NULL,1,2,0,0,2,2,2,2,0,0,0,0,CERT_OF_CORR=0,RE_EXAM=0 # ======== == == ======== = = ==== === === === === === === # 8-char patn-/ | | | | | | | | | | | \-Reexaminations Start,End # Country Code-/ | | | | | | | | | \-Certificate of Corrections Start,End # Kind-/ | | | | | | | \-Claim Start,End # Issue Date (YYYYMMDD)-/ | | | | | \-Description Start,End # Total Number of Pages-/ | | | \-Drawing Start,End # Missing Page Flag-/ | \-Abstracts Start,End # Withdrawn Flag-/ # # When there is Certificate of Corrections or Reexaminations data, the metadata is # 05224775,US,A1,19930706,28,NULL,NULL,1,1,2,10,11,18,18,20,21,21,22,28, # CERT_OF_CORR=1,19941011,NULL,21,21,RE_EXAM=2,19940719,B1,NULL,22,25,20020423,C2,NULL,26,28 # = =================== = ====================== ====================== # After the equal sign, is the number of CoC's or Reexams, followed by metadata for each. # The metadata includes Publication Date (YYYYMMDD), # the kind code for Reexaminations only, # a Missing Pages Flag, # and Start,End page numbers. # Verify the metadata starts with the 8-character patn. If not correct, try to find the # metadata on our own. US0036163 on usp001 for example, had the metadata tag truncated # (159041, not 1590412), yet the metadata was there in the last 92 bytes. # eg, metadata=>05224775,US,A1,19930706,28,NULL,NULL,1,1,2,10,11,18,18,20,21,21,22,28, # CERT_OF_CORR=1,19941011,NULL,21,21, # RE_EXAM=2,19940719,B1,NULL,22,25,20020423,C2,NULL,26,28< # and $Image_fn=>05224775< and 8-character patn=>05224775< # print STDOUT "0: metadata=>$metadata<\n and \$Image_fn=>$Image_fn< and 8-character patn=>" , substr($metadata,0,8) , "<\n"; if (substr($metadata,0,8) ne $Image_fn) { $metadata=`/usr/bin/tail -c 300 $fn`; if ($metadata !~ /($Image_fn.*)$/) { print STDOUT "1: $fn metadata could not be salvaged. Continuing ...\n" if ($debug); flock(NONFATAL,LOCK_EX) or die "Can't lock NONFATAL file at 2"; seek(NONFATAL,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print NONFATAL "1: $fn metadata could not be salvaged.\n"; flock(NONFATAL,LOCK_UN); return 1; # There was tag data, but no metadata. } else { $metadata=$1; print STDOUT "2: $fn metadata salvaged >$metadata< Continuing ...\n" if ($debug); # This is quite common and not worth logging. # flock(NONFATAL,LOCK_EX) or die "Can't lock NONFATAL file at 3"; # seek(NONFATAL,0,2); # In case someone appended while we were waiting. (Thanks, Tom) # print NONFATAL "2: $fn metadata salvaged >$metadata<\n"; # flock(NONFATAL,LOCK_UN); } } } else { $metadata=`/usr/bin/tail -c 300 $fn`; if ($metadata !~ /($Image_fn.*)$/) { print STDOUT "3: $fn metadata could not be salvaged. Continuing ...\n" if ($debug); flock(NONFATAL,LOCK_EX) or die "Can't lock NONFATAL file at 4"; seek(NONFATAL,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print NONFATAL "3: $fn metadata could not be salvaged.\n"; flock(NONFATAL,LOCK_UN); return 1; # There was tag data, but no metadata. } else { $metadata=$1; print STDOUT "4: $fn metadata salvaged >$metadata< Continuing ...\n" if ($debug); # I was curious whether this really happened, and I found a case where # yes, it did. /cdrom/usp001/usp001/00/008/00008417.tif had apparently # good tagdata, but its metadata offset was wrong. # flock(NONFATAL,LOCK_EX) or die "Can't lock NONFATAL file at 5"; # seek(NONFATAL,0,2); # In case someone appended while we were waiting. (Thanks, Tom) # print NONFATAL "4: $fn metadata salvaged >$metadata<\n"; # flock(NONFATAL,LOCK_UN); } } # The metadata looks good (well, at least it starts with our 8-character patn). # Parse out the CoC & Reexam data if present, and set in variables # $numCoCs & $numReexams, along with the page start info in arrays. # print STDOUT "6: metadata=>$metadata<\n and \$Image_fn=>$Image_fn< and 8-character patn=>" , substr($metadata,0,8) , "<\n"; if ($metadata =~ /,CERT_OF_CORR=(\d*),(.*),?RE_EXAM=(\d*),?(.*)/) { $numCoCs=$1; $CoC_data=$2; $numReexams=$3; $Reexam_data=$4; if ($numCoCs || $numReexams) { print STDOUT "Found $numCoCs CoCs and $numReexams reexaminations in $fn. Continuing ...\n"; flock(NONFATAL,LOCK_EX) or die "Can't lock NONFATAL file at 6"; seek(NONFATAL,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print NONFATAL "Found $numCoCs CoCs and $numReexams reexaminations in $fn.\n"; flock(NONFATAL,LOCK_UN); } # print STDOUT "Parsing CoC Data >$CoC_data<\n"; $counter=0; while ($CoC_data =~ /\d*,.*?,(\d*),(\d*),?/g) { # Do a sanity check first. Is there this many pages? On US00229322 (usp006), # the CoC supposedly on page 12 was missing. There was only 11 pages total. if ($1 <= $pages) { $CoC_Start[$counter]=$1; $CoC_End[$counter]=$2; if ($CoC_End[$counter] > $pages) { $CoC_End[$counter]=$pages; } # print STDOUT "Got a CoC on pages $CoC_Start[$counter]-$CoC_End[$counter] (>$`< remaining)\n" if ($debug); $counter++; } } if ($counter != $numCoCs) { print STDOUT "Wrong number of CoC stanzas in the metadata for $patn. Was $counter, should have been $numCoCs. Continuing ...\n"; flock(NONFATAL,LOCK_EX) or die "Can't lock NONFATAL file at 7"; seek(NONFATAL,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print NONFATAL "Wrong number of CoC stanzas in the metadata for $patn. Was $counter, should have been $numCoCs.\n"; flock(NONFATAL,LOCK_UN); } # print STDOUT "Parsing Reexam Data >$Reexam_data<\n"; $counter=0; while ($Reexam_data =~ /\d*,(\w*),\w*,(\d*),(\d*),?/g) { $Reexam_Kind[$counter]=$1; $Reexam_Start[$counter]=$2; $Reexam_End[$counter]=$3; $np=$Reexam_End[$counter] - $Reexam_Start[$counter] + 1; # print STDOUT "Got a $Reexam_Kind[$counter] Reexam on pages $Reexam_Start[$counter]-$Reexam_End[$counter] (>$'< remaining)\n" if ($debug); if ($Reexam_Start[$counter] <= $pages) { $counter++; } } if ($counter != $numReexams) { print STDOUT "Wrong number of Reexam stanzas in the metadata for $patn. Was $counter, should have been $numReexams. Continuing ...\n"; flock(NONFATAL,LOCK_EX) or die "Can't lock NONFATAL file at 8"; seek(NONFATAL,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print NONFATAL "Wrong number of Reexam stanzas in the metadata for $patn. Was $counter, should have been $numReexams.\n"; flock(NONFATAL,LOCK_UN); } } } else { print STDOUT "No tagdata for $fn? Curious. Continuing ...\n" if ($debug); flock(NONFATAL,LOCK_EX) or die "Can't lock NONFATAL file at 9"; seek(NONFATAL,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print NONFATAL "No tagdata for $fn? Curious.\n"; flock(NONFATAL,LOCK_UN); return 0; # No tag data, so no metadata (probably - we didn't really check.) } return 1; } # Count the number of pages in the passed filename. Returns the number # of pages in the image or zero if there's an error. sub Count_Pages { my $fn=$_[0]; my @lines = `$anyinfo $fn`; foreach my $line (@lines) { if ( $line =~ /^([0-9]+) pages?\./ ) { return $1; } } return 0; } sub Split_Off_Pieces { # Split the $this_image image file up into individual pages. # die if error, 'cause I don't know how to handle errors gracefully from here. # # Original: Page 1 to min(CoC_Start[0] and/or Reexam_Start[0]) # CoC (if any) : CoC_Start[i] - CoC_End[i] # Reexams (if any): Reexam_Start[i] - Reexam_End[i] if (exists $CoC_Start[0]) { # If there are any CoC's, then the original $last_original_page= $CoC_Start[0] - 1; # ends just before where the first CoC begins. } else { # Else there are no CoC's, so there must be a $last_original_page=$Reexam_Start[0] - 1; # Reexam and that's where the original ends. } $Temp_CTL_File="$outdir/$patn.original.ctl"; open(CTL,">$Temp_CTL_File") || die "Error opening Original control file ($Temp_CTL_File)."; for (1 .. $last_original_page) { # print STDOUT "Writing Original CTL File Line: filename $this_file\nactivepagerange $_\n" if ($debug); print CTL "filename $this_file\nactivepagerange $_\n" || die "Error writing to Original control file ($Temp_CTL_File)."; } close CTL; $Original_fn = "$outdir/$patn.$cdlabel.tif"; # print STDOUT "Rick, calling Call_any2any with ($Temp_CTL_File,$Original_fn)\n" if ($debug); # Now call any2any to create the original image from the consolidated image. my $Original_Pages = Call_any2any($Temp_CTL_File,$Original_fn); # The Call_any2any routine returns the number of pages wrote (zero if there was an error). # Insure any2any's page count matches how many I thought I should've written. They should. if ($Original_Pages != $last_original_page) { flock(ERRORS,LOCK_EX) or die "Can't lock Errors file at 4"; seek(ERRORS,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print ERRORS "Page count mismatch for $Original_fn. Wrote $Original_Pages, not $last_original_page.\n"; flock(ERRORS,LOCK_UN); die "Page count mismatch for $Original_fn. Wrote $Original_Pages, not $last_original_page.\n"; } # Normal case. All's ok. print STDOUT "Wrote $Original_Pages pages into $Original_fn at ", scalar localtime, ".\n" if ($debug); unlink $Temp_CTL_File; # Erase our temporary control file. $temp_file_list="$Original_fn"; # Erase this Original file when done. # If there are any Certificate of Corrections, carve them out of the consolidated image. # The filenames for the X6 CoCs will be US01234567X6 instead of US01234567__, and if # there are more than 1 CoC (so that just $cdlabel wouldn't be unique), we'll append # suffixes to the CD label to ensure uniqueness. So the first CoC will be # US01234567__.usp001_1.tif, the second, US01234567__.usp001_2.tif, etc. my $Base_Patn=substr($patn,0,10); # EG, US01010772 my $This_X6_Patn = $Base_Patn . "X6"; # EG, US01010772X6 for (my $CoC_Number=0; $CoC_Number<$numCoCs; $CoC_Number++) { # print STDOUT "Extracting CoC number " , $CoC_Number+1 , " ...\n" if ($debug); $Temp_CTL_File="$outdir/$patn.CoC_$CoC_Number.ctl"; open(CTL,">$Temp_CTL_File") || die "Error opening CoC control file ($Temp_CTL_File)."; for ($CoC_Start[$CoC_Number] .. $CoC_End[$CoC_Number]) { # print STDOUT "Writing CoC CTL File Line: filename $this_file\nactivepagerange $_\n" if ($debug); print CTL "filename $this_file\nactivepagerange $_\n" || die "Error writing to CoC control file ($Temp_CTL_File)."; } close CTL; my $CoC_fn="$outdir/$This_X6_Patn.tif"; # Now call any2any to create the image for this CoC from the consolidated image. $pages_wrote = Call_any2any($Temp_CTL_File,$CoC_fn); # The Call_any2any routine returns the number of pages wrote (zero if there was an error). # Insure any2any's page count matches how many I thought I should've written. They should. if ($pages_wrote != $CoC_End[$CoC_Number] - $CoC_Start[$CoC_Number] + 1) { flock(ERRORS,LOCK_EX) or die "Can't lock Errors file at 5"; seek(ERRORS,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print ERRORS "Page count mismatch for $patn CoC #" , $CoC_Number+1 , ". Wrote $pages_wrote, not " , $CoC_End[$CoC_Number]-$CoC_Start[$CoC_Number]+1 , ".\n"; flock(ERRORS,LOCK_UN); die "Page count mismatch for $patn CoC #" , $CoC_Number+1 , ". Wrote $pages_wrote, not " , $CoC_End[$CoC_Number]-$CoC_Start[$CoC_Number]+1 , ".\n"; } # Normal case. All's ok. print STDOUT "Wrote $pages_wrote pages into $CoC_fn at ", scalar localtime, ".\n" if ($debug); unlink $Temp_CTL_File; # Erase our temporary control file. # Now scp over this newly-created, X6, CoC image to Southbury. # Figure out which DFS name to call it. If only 1 CoC (normal case), then use the # real CD label. Otherwise, suffix all CD labels with _digit. if ($#CoC_Start) { $This_X6_CD_Label = "${cdlabel}_" . ($CoC_Number+1); # EG, usp037_1, usp037_2, ... } else { $This_X6_CD_Label = $cdlabel; # EG, usp037 } $This_X6_fqn = "$Image_Dir/$This_X6_Patn.$This_X6_CD_Label.tif"; # EG, "/dfs/images/US/72/07/US01010772X6.usp037.tif" $target="$SBY_userid\@dephds0" . $DFS_Server{$dir2}; print STDOUT "$scp-ing $CoC_fn to $target:$This_X6_fqn\n" if ($debug); $scp_string_output = `$scp -p $CoC_fn $target:$This_X6_fqn 2>&1`; $status = $?; if ($status != 0) { flock(ERRORS,LOCK_EX) or die "Can't lock Errors file at 6"; seek(ERRORS,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print ERRORS "scp error: $CoC_fn to $target:$This_X6_fqn.\n"; flock(ERRORS,LOCK_UN); die "scp error: $CoC_fn to $target:$This_X6_fqn.\n"; } # Write row into imag to identify this X6 image. For example, # db2 "insert into imag (patn,image_pages,cdlabel,datasrc,namesrc) # values('$This_X6_Patn',$pages_wrote,'$This_X6_CD_Label','USG','$This_X6_CD_Label')"; flock(DB2,LOCK_EX) or die "Can't lock DB2 file at 2"; seek(DB2,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print DB2 "insert into imag (patn,image_pages,cdlabel,datasrc,namesrc) values('$This_X6_Patn',$pages_wrote,'$This_X6_CD_Label','USG','$This_X6_CD_Label')\n"; flock(DB2,LOCK_UN); unlink $CoC_fn; # Erase our X6 image. } # If there are any Reexaminations, carve them out of the consolidated image. # The Reexams kinds are nicely provided to us in the metadata, and will be B1 or C1. # "B" if $Reexam_Issue_Year < 2002, or "C" if $Reexam_Issue_Year > 2001. # The numeric portion increments for each Reexam. For example, for 05224775 on # usp364, the first reexam was issued in 1994, so it was a B1, # the second reexam was issued in 2002, so it was a C2. # Because of that ever-increasing digit, we don't have to play games with the CD label # to ensure uniqueness, unlike what he had to do for the CoC's. for (my $Reexam_Number=0; $Reexam_Number<$numReexams; $Reexam_Number++) { # print STDOUT "Extracting Reexam number " , $Reexam_Number+1 , " ...\n" if ($debug); $Temp_CTL_File="$outdir/$patn.Reexam_$Reexam_Number.ctl"; open(CTL,">$Temp_CTL_File") || die "Error opening Reexam control file ($Temp_CTL_File)."; for ($Reexam_Start[$Reexam_Number] .. $Reexam_End[$Reexam_Number]) { # print STDOUT "Writing Reexam CTL File Line: filename $this_file\nactivepagerange $_\n" if ($debug); print CTL "filename $this_file\nactivepagerange $_\n" || die "Error writing to Reexam control file ($Temp_CTL_File)."; } close CTL; if ($Reexam_Kind[$Reexam_Number] !~ /^[BC]\d$/) { # Sanity check. die "Invalid kind ($Reexam_Kind[$Reexam_Number]) in $this_file metadata."; # If this ever happens, you could go back and recapture the $Reexam_Issue_Year and # build your own kind, ala # $kind = ($Reexam_Issue_Year[$Reexam_Number] < 2002 ? "B" : "C") . $Reexam_Number+1; } my $This_Reexam_Patn="$Base_Patn$Reexam_Kind[$Reexam_Number]"; # EG, US05224775B1 # or US05224775C2 my $Reexam_fn="$outdir/$This_Reexam_Patn.tif"; # Now call any2any to create the image for this Reexam from the consolidated image. $pages_wrote = Call_any2any($Temp_CTL_File,$Reexam_fn); # The Call_any2any routine returns the number of pages wrote (zero if there was an error). # Insure any2any's page count matches how many I thought I should've written. They should. if ($pages_wrote != $Reexam_End[$Reexam_Number] - $Reexam_Start[$Reexam_Number] + 1) { flock(ERRORS,LOCK_EX) or die "Can't lock Errors file at 7"; seek(ERRORS,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print ERRORS "Page count mismatch for $patn Reexam #" , $Reexam_Number+1 , ". Wrote $pages_wrote, not " , $Reexam_End[$Reexam_Number]-$Reexam_Start[$Reexam_Number]+1 , ".\n"; flock(ERRORS,LOCK_UN); die "Page count mismatch for $patn Reexam #" , $Reexam_Number+1 , ". Wrote $pages_wrote, not " , $Reexam_End[$Reexam_Number]-$Reexam_Start[$Reexam_Number]+1 , ".\n"; } # Normal case. All's ok. print STDOUT "Wrote $pages_wrote pages into $Reexam_fn at ", scalar localtime, ".\n" if ($debug); unlink $Temp_CTL_File; # Erase our temporary control file. # Now scp over this newly-created, Reexam image to Southbury. # Build up the complete name for this reexam image, e.g. # /dfs/images/US/75/47/US05224775C2.usp364.tif $This_Reexam_fqn = "$Image_Dir/$This_Reexam_Patn.$cdlabel.tif"; $target="$SBY_userid\@dephds0" . $DFS_Server{$dir2}; print STDOUT "$scp-ing $Reexam_fn to $target:$This_Reexam_fqn\n" if ($debug); $scp_string_output = `$scp -p $Reexam_fn $target:$This_Reexam_fqn 2>&1`; $status = $?; if ($status != 0) { flock(ERRORS,LOCK_EX) or die "Can't lock Errors file at 8"; seek(ERRORS,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print ERRORS "scp error: $CoC_fn to $target:$This_Reexam_fqn.\n"; flock(ERRORS,LOCK_UN); die "scp error: $CoC_fn to $target:$This_Reexam_fqn.\n"; } # Write row into imag to identify this Reexam image. For example, # db2 "insert into imag (patn,image_pages,cdlabel,datasrc,namesrc) # values('$This_Reexam_Patn',$pages_wrote,'$cdlabel','USG','$cdlabel')"; flock(DB2,LOCK_EX) or die "Can't lock DB2 file at 3"; seek(DB2,0,2); # In case someone appended while we were waiting. (Thanks, Tom) print DB2 "insert into imag (patn,image_pages,cdlabel,datasrc,namesrc) values('$This_Reexam_Patn',$pages_wrote,'$cdlabel','USG','$cdlabel')\n"; flock(DB2,LOCK_UN); unlink $Reexam_fn; # Erase our Reexam image. } # Resets this_file for the rest of the processing for this image file, # so that we scp this split-off Original in /dfs/cdrom/, # rather than the consolidated image file from the DVD. $this_file=$Original_fn; return $Original_Pages; } # Given an any2any control file and an output file name, call any2any to convert/create # the image, and return the number of pages any2any wrote (our caller will probably # compare the number of pages wrote against what he expected). # # If there is an error with any2any, we will spit out error messages to STDERR. # # Called like so, my $Original_Pages = Call_any2any($Temp_CTL_File,$Original_fn); sub Call_any2any { my ($CTL_File, $Output_File)=@_; my $this_line; my $any2any_page_count = 0; # print STDOUT "Calling $any2any $CTL_File $Output_File 2>&1 ...\n" if ($debug); my @lines = `$any2any $CTL_File $Output_File 2>&1`; $rc = $?; if ( $rc ) { # Did any2any fail? print STDERR "any2any failed for $Output_File at ", scalar localtime, "\n"; foreach $line (@lines) { # Print out any messages from any2any. print STDERR "$line"; } } else { # any2any worked ok. Great. How many pages did it write? foreach $this_line (@lines) { # If only 1 page got written, any2any messages says "1 page", not "1 pages". if ( $this_line =~ /([0-9]+) pages? written OK./) { $any2any_page_count = $1; last; } } } return $any2any_page_count; } # $Header$ # $Log$