#!/usr/bin/perl #------------------------------------------------------------ # Automated script to copy image files from /dfs/cdrom # filesystem into /dfs/images. # # Author: Dale Posey (modified by Rick Jasper & Cristian Manasoiu) # # INPUT: name of file containing list directories to process # i.e. jp.1 #----------------------------------------------------------- use lib "/tmp"; # use lib "/home/dale"; use File::Basename; # use DBI qw(:sql_types); require 'ctime.pl'; require 'common.pl'; umask(022); # The first arg is our input file name, eg jp.1 $ipf = $ARGV[0]; if ( $ipf eq "" ) { die "Gotta have a filename, eg jp.1.\n"; } else { @s_if = stat($ipf); if ($? != 0){ die "Oooops, can't stat the input file, $ipf. Please fix and retry\n"; } } $collection = "JP"; init_log_files(); # print STDOUT "Rick, we're in the mainline at # 4.\n"; # # we will not make it through this list, but we proceed # as far as we can # $ToDo_cd_count = @cd_list; $Doing_cd_count=0; while ( $cd = $cd_list[$cd_pointer] ) { if ( -e "abort" ) { print STDOUT "Aborting by request.\n"; close REC; close LF; exit; } while ( -e "stall" ) { # lets wait a bit and try again # print STDOUT "Stalling for 1 minute.\n"; sleep(60); } # # Get rid of trailing \n character and any trailing blanks. # This is only necessary for our initial input file (eg /s1jb0.us), # not for the checkpoint.log file. chomp $cd; $cd =~ s/ *//g; $Doing_cd_count++; print STDOUT "Started processing cd $cd [$Doing_cd_count of $ToDo_cd_count] at ",&ctime(time),"\n"; $idx_name = "/dfs/cdrom/$cd/exchange.idx"; undef @in_list; undef @patents; print STDOUT "Getting list of files from '$idx_name' - please wait\n"; open(CD, $idx_name) || die "Could not open index file '$idx_name' on CD '$cd'\n"; # >page number # --patn-- ------------ junk --------------- dir dir dir patn | # > zeros # JPA 11000009 000000010000000000002900000053850\DOC\DOCUJPA\DOC110V1\D11000D1\11000009\00000001.TIF # JPA 11000009 SGMLSGML0000000000003200000020810\DOC\DOCUJPA\DOC110V1\D11000D1\11000009\SGML.NRM # # need to create: # tar -cvf /imagesx/JP/xx/yy/JPnnnnyyxxA2.tar /cdrom/mijpxxxxxx/doc/yyyyyyy/xxxxxxx/zzzzzzzz/00000001.tif \ # /cdrom/mijpxxxxxx/doc/yyyyyyy/xxxxxxx/zzzzzzzz/sgml.nrm # # Loop through each line in the exchange.idx file. Each time we transition to a new patent, # remember the tar command we've built for the previous patent. That tar command will be # executed later. $this_patn = ""; while ( ) { $uc = $_; tr/[A-Z]*/[a-z]/; if (/(..)(..) (.*) .*\\doc\\(.*)\\(.*)\\(.*)\\(........)\\00000001.tif/) { # per Eric these patents need A2 as the kind. $ep = "JP"; $kind = "A2"; $patn = $3; while (length($patn) < 8) { $patn = "0".$patn; } $d1 = $4; $d2 = $5; $d3 = $6; $dup_pat = $7; if ( $patn ne $prev_patn ) { if ( $prev_patn ne "" ) { # We've changed patents so create an entry in our tar-commands array to process later. $cmds{$prev_patn} = $tar_cmd; # Remember the cd command for later execution, too. $cd_dirs{$prev_patn} = $cd_dir; # Remember the name of our tar.Z file, too. $tar_files{$prev_patn} = $tar_file; # Remember the compress command for later execution, as well. $cmp_files{$prev_patn} = $cmp_cmd; # Push the previous patent number on our list of patent numbers. push(@patents, $prev_patn); } $prev_patn = $patn; # create a filename for tar to work with $patn =~ /(....)(..)(..)/; $cmp_cmd = "compress -f $image_dir/$3/$2/$ep$patn$kind\.tar"; # Initialize our tar command with for now, just the tif file. # If/when we come across the sgml file, we'll append it to this command. $tar_cmd = "tar -cvf $image_dir/$3/$2/$ep$patn$kind\.tar 00000001.tif "; $tar_file = "$image_dir/$3/$2/$ep$patn$kind\.tar.Z"; $cd_dir = "/dfs/cdrom/$cd/doc/$d1/$d2/$d3/$patn "; } else { # We must have come across the sgml file first and have already built the # initial tar command. Just append this tif file to that tar command. $tar_cmd = $tar_cmd."00000001.tif "; } } elsif (/(..)(..) (.*) .*\\doc\\(.*)\\(.*)\\(.*)\\(........)\\sgml.nrm/) { $ep = $1; $ep =~ tr/[a-z]/[A-Z]/; # per Eric these patents need A2 as the kind. $kind = "A2"; $patn = $3; $d1 = $4; $d2 = $5; $d3 = $6; $dup_pat = $7; if ( $patn ne $prev_patn ) { if ( $prev_patn ne "" ) { # We've changed patents so do the same thing we did above. $cmds{$prev_patn} = $tar_cmd; $cd_dirs{$prev_patn} = $cd_dir; $tar_files{$prev_patn} = $tar_file; $cmp_files{$prev_patn} = $cmp_cmd; push(@patents, $prev_patn); } $prev_patn = $patn; # Create a beginning tar command with just this sgml file. $patn =~ /(....)(..)(..)/; $cmp_cmd = "compress -f $image_dir/$3/$2/$ep$patn$kind\.tar"; $tar_cmd = "tar -cvf $image_dir/$3/$2/$ep$patn$kind\.tar sgml.nrm "; $cd_dir = "/dfs/cdrom/$cd/doc/$d1/$d2/$d3/$patn "; } else { # The normal case. We came across the tif file first, now we're # appending this sgml file to our tar command. $tar_cmd = $tar_cmd."sgml.nrm "; } } else { print STDOUT "Unrecognized line:$_\n"; next; } } # When all done with the exchange.idx file, save our last set of commands. $tar_files{$prev_patn} = $tar_file; $cd_dirs{$prev_patn} = $cd_dir; $cmp_files{$prev_patn} = $cmp_cmd; $cmds{$prev_patn} = $tar_cmd; push(@patents, $prev_patn); print STDOUT "Commands for CD:'$cd' created - continuing.\n"; # # $cnt = 1; $todo = @patents; @lines = `/usr/bin/dcecp -c acl check /dfs/images/JP/00/00 | grep w | grep id 1>/dev/null`; $rc = $?; if ( $rc ) { print STDOUT "I don't have the right DCE credentials to write into the /dfs/images/JP directories.\n"; exit; } foreach $p (@patents) { # system("/usr/bin/dcecp -c acl check /dfs/images/JP/00/00 | grep w | grep id 1>/dev/null"); # $rc = $?; # if ( $rc ) { # print STDOUT "I've lost the ability to write into the /dfs/images/JP directories.\n"; # print STDOUT "Perhaps my DCE token has expired//\n"; # exit; # } # print STDOUT "The commands for $p [$cnt of $todo] are\n\tcd $cd_dirs{$p}\n\t$cmds{$p}\n\t$cmp_files{$p}\n"; $err_count = 0; while ( $err_count < 5 ) { print STDOUT "Processing patent $p [$cnt of $todo] ..."; # First cd to the proper directory, then do a relative tar, and # finally the compress. system("cd $cd_dirs{$p} && $cmds{$p} 1>/dev/null && $cmp_files{$p}"); $rc = $?; if ( $rc ) { $errno = $!; print STDOUT " - Commands failed - $rc - $errno\n"; $err_count++; next; } else { print STDOUT " - Done\n"; $cnt ++; last; } } if ( -e "kill" ) { exit; } } print LF "DONE $cd\n"; $cd_pointer++; }