#!/usr/local/bin/perl #------------------------------------------------------------ # Script to copy "old" US DVD cd image files to # local filesystem and then backup to ADSM. # # Author: Dale Posey # Org: IBM Global Services # Date: June 26, 2000 # # INPUT: name of file for list of cd's to process # i.e. /usr1-25.txt # #----------------------------------------------------------- use lib "/"; use File::Basename; require 'ctime.pl'; require "common.pl"; # The first arg is our input file name, eg /usr1-25.txt $ipf = $ARGV[0]; # determines if we go to images1 or 2 $lvl = $ARGV[1]; if ( $lvl eq "" ) { die "Gotta have a level number. 1 or 2." } if ( $ipf eq "" ) { die "Gotta have a filename.\n"; } else { @s_if = stat($ipf); if ($? != 0){ die "Oooops, can't stat the input file....please fix and retry\n"; } } $collection = "US"; $server = "reindeer"; init_log_files(); # # we will not make it through this list, but we proceed # as far as we can # $ToDo_cd_count = @cd_list; $Doing_cd_count=0; while ( $cd = $cd_list[$cd_pointer] ) { if ( -e "$base_dir/abort" ) { print STDOUT "Aborting by request.\n"; print DBG "Aborting by request.\n"; close DBG; close REC; close LF; exit; } while ( -e "$base_dir/stall" ) { # lets wait a few minutes and try again # print STDOUT "Stalling for 1 minute.\n"; sleep(60); } # # Get rid of trailing \n character and any trailing blanks. # This is only necessary for our initial input file (eg /s1jb0.us), # not for the checkpoint.log file. chomp $cd; $cd =~ s/ *//g; $Doing_cd_count++; print STDOUT "Started processing cd $cd [$Doing_cd_count of $ToDo_cd_count] at ",&ctime(time),"\n"; print DBG "Started processing cd $cd [$Doing_cd_count of $ToDo_cd_count] at ",&ctime(time),"\n"; check_adsm(); print STDOUT "max_pool=$max_pool\t max_tape=$max_tape\t pct_util_tape=$pct_util_tape\n"; print DBG "max_pool=$max_pool\t max_tape=$max_tape\t pct_util_tape=$pct_util_tape\n"; if ( $pct_util_tape > 95 ) { print STDOUT "The tape space is $pct_util_tape percent full. Can't do any more CDs.\n"; print DBG "The tape space is $pct_util_tape percent full. Can't do any more CDs.\n"; close DBG; close REC; close LF; exit; } # this is the path for US type with cd names beginning usp. $idx_name = "/cdrom/$cd/open.idx"; print DBG "Clearing out ctl files from the '$tmp_dir'.\n"; print STDOUT "Clearing out ctl files from the '$tmp_dir'.\n"; if ( $tmp_dir eq "" ) { print DBG "tmp_dir not set up\n"; print STDOUT "tmp_dir not set up\n"; exit; } @lines = `find $tmp_dir -name '*.ctl' -exec rm {} \\; 2>&1`; $rc = $?; if ( $rc ) { print DBG @lines; print STDOUT @lines; } # make sure the @in_list get cleared out undef @in_list; print DBG "Getting list of files from '$idx_name' - please wait\n"; print STDOUT "Getting list of files from '$idx_name' - please wait\n"; open(CD, $idx_name) || die "Could not open index file '$idx_name' on CD '$cd'\n"; # --patn-- ------------ junk -------------- dir dir dir dir file # EPA2 00921504 00010000006600271646730000041456\I0921504\IMG092V1\I09215D1\09215042\PAGE0001 while ( ) { $uc = $_; tr/[A-Z]*/[a-z]/; if (/(..)(..) (.*) .*\\(.*)\\(.*)\\(.*)\\(.*)\\page(....) /) { # for the UK patents we do something different????? $ep = $1; $ep =~ tr/[a-z]/[A-Z]/; $kind = $2; $kind =~ tr/[a-z]/[A-Z]/; $kind =~ tr/\s/_/; $patn = $3; $d1 = $4; $d2 = $5; $d3 = $6; $d4 = $7; $page_num = $8; if ( $prev_patn != "" ) { if ( $patn != $prev_patn ) { print STDOUT "Beginning new patent '$patn'\n"; $out_fn = "$ep$prev_patn$prev_kind\.tif"; # close tmp file close TF; @in_list = (@in_list, "$prev_patn $out_fn"); $prev_patn = $patn; $prev_kind = $kind; $tmp_file = "$tmp_dir/$patn.ctl"; open (TF, ">$tmp_file") || die "Can't create temporyary file - '$tmp_file'\n"; select(TF); $|=1; } print TF "filename /cdrom/$cd/$d1/$d2/$d3/$d4/page$page_num\n"; } else { # no previous patent or begin a new one $prev_patn = $patn; $prev_kind = $kind; $tmp_file = "$tmp_dir/$patn.ctl"; open (TF, ">$tmp_file") || die "Can't create temporyary file - '$tmp_file'\n"; select(TF); $|=1; print TF "filename /cdrom/$cd/$d1/$d2/$d3/$d4/page$page_num\n"; } } else { print DBG "Unrecognized line:$_\n"; next; } } close TF; $out_fn = "$ep$prev_patn$kind\.tif"; @in_list = (@in_list, "$patn $out_fn"); $nfiles = @in_list; if ( ! $nfiles ) { # I've seen this before on s2jb1 where the jukebox was apparently hung. print STDOUT "The $cd CD doesn't appear to have any tif files on it. Something's wrong. Help!\n"; print DBG "The $cd CD doesn't appear to have any tif files on it. Something's wrong. Help!\n"; close DBG; close REC; close LF; exit; } print STDOUT "control files for CD:'$cd' created - continuing.\n"; print STDOUT "Removing old directories and files\n"; rm_dirs(); print STDOUT "Building new directories\n"; # build directories needed $dir1_cnt = 0; $dir2_cnt = 0; if ( ! -d "$image_dir/$cd" ) { mkdir "$image_dir/$cd", $mode; # Don't count this high-level directory. Of course, we have to mkdir it. # $dir1_cnt++; } foreach $l (@in_list) { ($p, $f) = split(/ /, $l); $_ = $p; if ( ! /([0-9][0-9])([0-9][0-9])$/ ) { print STDOUT "Can't handle the file $l on the $cd CD, because the name isn't .*digit-digit-digit-digit.??,\n"; print DBG "Can't handle the file $l on the $cd CD, because the name isn't .*digit-digit-digit-digit.??,\n"; next; } $d1 = $1; $d2 = $2; $dir1 = "$image_dir/$cd/$d2"; $dir2 = "$image_dir/$cd/$d2/$d1"; if ( ! -d $dir1 ) { mkdir($dir1, $mode) || die "can't mkdir of $dir1"; $dir1_cnt++; } if ( ! -d $dir2 ) { mkdir($dir2, $mode) || die "can't mkdir of $dir2"; $dir2_cnt++; } } print STDOUT "Created $dir1_cnt+$dir2_cnt directories to copy $nfiles files.\n"; print DBG "Created $dir1_cnt+$dir2_cnt directories to copy $nfiles files.\n"; # # $cnt = 1; foreach $x (@in_list) { ($p, $f) = split(/ /, $x); print STDOUT "Processing '$p $f'\n"; print DBG "Processing '$p $f'\n"; $p =~ /....(..)(..)/; $d1 = $1; $d2 = $2; $in_file = "$tmp_dir/$p.ctl"; # needs to be /images(.)/EP/$cd/$d1/$d2/$out_file $f =~ /^EP....(..)(..)/; $out_file = "$image_dir/$cd/$2/$1/$f"; print STDOUT "merging [$cnt of $nfiles] $in_file -> $out_file"; $err_count = 0; while ( $err_count < 5 ) { @lines = `/ips/bin/any2any $in_file $out_file 2>&1`; $rc = $?; if ( $rc ) { $errno = $!; print STDOUT " - failed\nCopy of $in_file to $out_file failed - $rc - $errno\n"; print DBG " - failed\nCopy of $in_file to $out_file failed - $rc - $errno\n"; # if any2any failed for strange reason then # we need to do something about it here......... $err_count++; next; } else { print STDOUT " - done.\n"; $page_count = 0; foreach $l (@lines) { if ( $l =~ /([0-9]+) pages written OK./) { $page_count = $1; } } print DBG "[$cnt of $nfiles] merged $in_name -> $out_file\n"; # get page count of this patent. $page_count = 0; @lines = `/ips/bin/anyinfo $out_file`; foreach $l (@lines) { $_ = $l; if ( /^([0-9]+) pages?\./ ) { $page_count = $1; last; } } if ( ! $page_count ) { print STDOUT "This patent has zero pages - $out_file\n"; print DBG "This patent has zero pages - $out_file\n"; foreach $l (@lines) { print "'$l'"; } next; } # build a record for this patent. # use db2 date format 2000-04-21-10.55.42.000000 @pcs = localtime(time); $yr = ($pcs[5] + 1900); $smon = ($pcs[4] + 1); $mo = sprintf("%02d", $smon); $dy = sprintf("%02d", $pcs[3]); $hr = sprintf("%02d", $pcs[2]); $mn = sprintf("%02d", $pcs[1]); $sc = sprintf("%02d", $pcs[0]); $rec_date = "$yr-$mo-$dy-$hr\.$mn\.$sc\.000000"; $rec = "$cd $p $rec_date $out_file $page_count"; # # we only put out this list *after* it has been backed up to ADSM. # @db2list = (@db2list, $rec); last; } } $cnt++; } print STDOUT "Starting adsm backup for $cd at ",&ctime(time),"\n"; print DBG "Starting adsm backup for $cd at ",&ctime(time),"\n"; backup_to_adsm(); }