#!/dfs/prod/ipn/bin/perl # This script will query the patent database to summarize the INPADOC coverage. # For each country and kind code for that country, we find # - The earliest Issue Date, # - The lowest patent for that earliest Issue Date, # - The latest Issue Date, # - The highest patent for that latest Issue Date, # - The number of patents. # # The output will be an HTML file that we scp to Patolis. To see a sample # output file, see http://rick.jasperfamily.org/delphion/inpadoc.collections.hti # # This typically runs on dephds059 via this inst1 crontab entry, # # # # Every Sunday morning at 5 AM, summarize our INPADOC coverage and # # scp the summary to Patolis in Japan. Rick Jasper # 0 5 * * 0 /home/inst1/Japio.US.Update/inpadoc.summary.pl $debug=1; $debug=0; ($sec, $min, $hour, $mday, $mon, $year) = localtime(); $date= (1900+$year) . "/" . ($mon<9?"0":"") . ($mon+1) . "/" . ($mday<10?"0":"") . $mday; $filedate= (1900+$year) . ($mon<9?"0":"") . ($mon+1) . ($mday<10?"0":"") . $mday; $HTML_outfile="/home/inst1/Japio.US.Update/inpadoc.collections.html"; # Local HTML Output File $TEXT_outfile="/home/inst1/Japio.US.Update/inpadoc.collections.$filedate"; # Local Text Output File $destination_fn="inpadoc.collections.html.$filedate"; # fn in Patolis's /ipn/prod/include dir $scp_destination="inst1\@ips01i:/ips/prod/include/$destination_fn"; $mail_recipients="k_uchiyama\@patolis.co.jp, t_nakada\@patolis.co.jp, n_muro\@patolis.co.jp, m_mori_ibm\@patolis.co.jp, y_takashima\@patolis.co.jp"; $mail_recipients="jasper\@delphion.com" if ($debug); # For testing purposes only. # When running from cron, inst1's PATH only includes # /usr/bin:/etc:/usr/sbin:/usr/ucb:/usr/bin/X11:/sbin # It does not include a bunch of other directories normally there. # We need at least the instance's sqllib/bin directory so that db2 # commands work. We'll fully-qualify the scp1 command so we don't # have to put /usr/local/bin in our path. # # Note also that our instance is different on different systems. # We're gonna have to connect to the database, so do it now. if (-f "/home/inst1/db2profile") { # These next two lines are needed only in a cron environment, but they # don't hurt if you're running this from the command line. $ENV{"PATH"} = $ENV{"PATH"} . ":/home/db2inst1/sqllib/bin"; $ENV{DB2INSTANCE}="inst1"; `. /home/inst1/db2profile`; $cmd_output_string=`db2 connect to patent user inst1 using inst1_password`; if ($?) { print STDERR "Failed to connect to the database. The db2 connect command output was \n$cmd_output_string\n"; print STDERR "Perhaps DB2 is down? I'm PID $$ and here's a ps -ef command to help sort things out.\n"; print STDERR `ps -ef`; die "db2 connect error.\n"; } } elsif (-f "/home/caeadmin/sqllib/db2profile") { # These next two lines are needed only in a cron environment, but they # don't hurt if you're running this from the command line. $ENV{"PATH"} = $ENV{"PATH"} . ":/home/caeadmin/sqllib/bin"; $ENV{DB2INSTANCE}="caeadmin"; `. /home/caeadmin/sqllib/db2profile`; $cmd_output_string=`db2 connect to pdbsrch1 user inst1 using inst1_password`; if ($?) { print STDERR "Failed to connect to the database. The db2 connect command output was \n$cmd_output_string\n"; print STDERR "Perhaps DB2 is down? I'm PID $$ and here's a ps -ef command to help sort things out.\n"; print STDERR `ps -ef`; die "db2 connect error.\n"; } } else {die "Could not connect to a database.\n"} # Extract the distinct country & kind codes and for each, the minimum & maximum # issue date (ISD). We exclude the 11/11/1111 ISD's, 'cause they're invalid. print "Extracting distinct country & kind codes data. This will take about 6 minutes ...\n" if ($debug); $Number_of_Distinct_Countries=0; $Number_of_Distinct_Country_Kind=0; $First_Country=""; $Last_Country=""; $db2cmd="db2 -x \"select icnt,itype,min(isd),max(isd) from main where datasrc='IFD' and isd<>'11/11/1111' group by icnt,itype order by icnt,itype\""; open(DB2_DATA, "$db2cmd|") || die "Can't execute DB2 command.\n"; while () { chomp; # The input file was created with # db2 -x "select icnt,itype,min(isd),max(isd) from main # where datasrc='IFD' and isd<>'11/11/1111' # group by icnt,itype order by icnt,itype" # # So the input doesn't have the DB2 headers. I just put them here to help me. # The input looks like, # # ICNT ITYPE MIN(ISD) MAX(ISD) # ---- ----- ---------- ---------- # AP A 02/28/1986 03/31/2003 # AP A0 11/01/1984 03/31/2003 # AP DO 03/31/2003 03/31/2003 # AP U 06/06/2002 06/06/2002 # AR A1 02/08/1973 12/30/1991 # AT U3 12/27/1994 10/27/2003 # AU A3 11/17/1994 10/10/2002 # ... if (/^(\w{2})\s*(\w*)\s*(\S{10})\s*(\S{10})\s*$/) { $cc=$1; $kind=$2; $key="$cc:$kind"; $cc{$key}=$cc; $kind{$key}=$kind; $min_ISD{$key}=$3; $max_ISD{$key}=$4; $Number_of_Distinct_Country_Kind++; # print "Got $cc{$key} $kind{$key}, min=$min_ISD{$key} and max=$max_ISD{$key}\n" if ($debug); if (! $First_Country) { # Only true once, the first time through. $Number_of_Distinct_Countries++; $First_Country=$cc; } elsif ($cc ne $Last_Country) { $Number_of_Distinct_Countries++; } $Last_Country=$cc; } else {die "Did not understand $_ from DB2 data.\n"} } close(DB2_DATA); # print "Got $Number_of_Distinct_Countries distinct countries and $Number_of_Distinct_Country_Kind distinct country/kind codes.\n\n\n" if ($debug); # For each Country & Kind, count how many patents there are. # This query formerly was included in the above query, but we wanted to NOT include # the 11/11/1111 ISD patents in the above query, and want to include them here. print "Extracting counts now. This will take another 6 minutes ...\n" if ($debug); $db2cmd="db2 -x \"select icnt,itype,count(*) from main where datasrc='IFD' group by icnt,itype order by icnt,itype\""; open(DB2_DATA, "$db2cmd|") || die "Can't execute DB2 command.\n"; while () { chomp; # This input file was created with # db2 -x "select icnt,itype,count(*) from main # where datasrc='IFD' group by icnt,itype order by icnt,itype" # # So the input doesn't have the DB2 headers. I just put them here to help me. # The input looks like, # # ICNT ITYPE COUNT # ---- ----- ------ # AP A 1122 # AP A0 2722 # AP DO 1 # AP U 1 # AR A1 38826 # AR A2 1970 # ... if (/^(\w{2})\s*(\w*)\s*(\d*)\s*$/) { $cc=$1; $kind=$2; $key="$cc:$kind"; $count{$key}=$3; $Total{$cc}+=$count{$key}; $Grand_Total+=$count{$key}; # print "$count{$key} $cc:$kind patents, Running total for $cc is now $Total{$cc} and the Grand Total is $Grand_Total.\n" if ($debug); } else {die "Did not understand $_ from DB2 data.\n"} } close(DB2_DATA); open(HTMLOUT,">$HTML_outfile") || die "Can't write to $HTML_outfile.\n"; open(TEXTOUT,">$TEXT_outfile") || die "Can't write to $TEXT_outfile.\n"; print HTMLOUT "$Grand_Total Total Patents as of $date\n\n\n"; printf TEXTOUT "$Grand_Total Total Patents as of $date\r\n\r\nCC Type Date Range Patent Range Count\r\n"; # Now loop through those distinct country/kind codes, getting the rest of the data for each. $Last_Country=$First_Country; foreach $key (sort keys %cc) { $db2cmd="db2 -x \"select min(patn) from main where datasrc='IFD' and icnt='$cc{$key}' and itype='$kind{$key}' and isd='$min_ISD{$key}'\""; open(DB2_DATA, "$db2cmd|") || die "Can't execute min(ISD) DB2 command.\n"; $min_patn=; close(DB2_DATA); chomp $min_patn; $db2cmd="db2 -x \"select max(patn) from main where datasrc='IFD' and icnt='$cc{$key}' and itype='$kind{$key}' and isd='$max_ISD{$key}'\""; open(DB2_DATA, "$db2cmd|") || die "Can't execute max(ISD) DB2 command.\n"; $max_patn=; close(DB2_DATA); chomp $max_patn; if ($Last_Country ne $cc{$key}) { print HTMLOUT "\n"; printf TEXTOUT "%68d\r\n",$Total{$Last_Country}; } $date_range=substr($min_ISD{$key},6,4) . "/" . substr($min_ISD{$key},0,2) . "/" . substr($min_ISD{$key},3,2) . "-" . substr($max_ISD{$key},6,4) . "/" . substr($max_ISD{$key},0,2) . "/" . substr($max_ISD{$key},3,2); $patn_range="$min_patn-$max_patn"; print HTMLOUT "\n"; printf TEXTOUT "%2s %2s %21s %25s %10s\r\n",$cc{$key},$kind{$key},$date_range,$patn_range,$count{$key}; $Last_Country=$cc{$key}; } `db2 terminate`; print HTMLOUT "\n
CountryTypeDate RangePatent RangeCount
$Total{$Last_Country}
$cc{$key}$kind{$key}$date_range$patn_range$count{$key}
$Total{$Last_Country}
\n"; printf TEXTOUT "%68d\r\n",$Total{$Last_Country}; close(HTMLOUT); close(TEXTOUT); if (-s "$HTML_outfile" < 40000) { # Have a little bit of a sanity check here. print STDERR "Did not scp the HTML output file ($HTML_output) because it's too small.\nSomething must have gone wrong.\n"; } else { print "Trying /usr/local/bin/scp1 -p $HTML_outfile $scp_destination 2>&1\n" if ($debug); $scp_string_output = `/usr/local/bin/scp1 -p $HTML_outfile $scp_destination 2>&1`; $status = $?; if ($status != 0) { print "Will send mail instead to $mail_recipients.\n" if ($debug); open(MAIL, "|/usr/bin/mail -s'Weekly INPADOC Coverage From Delphion' $mail_recipients") or die "Cannot send mail to $mail_recipients"; print MAIL "The following attachment is the weekly INPADOC coverage from Delphion's\n"; print MAIL "site in Southbury. It was sent via e-mail like this because the automated\n"; print MAIL "program got an error when it tried to scp the file directly to ips01i's"; print MAIL "$scp_destination.\n\n"; print MAIL "Please detach this attachment and put it on ips01i where it belongs,\nat $scp_destination\n"; print MAIL "\n\nIf you have any questions about this e-mail, ask Rick Jasper at"; print MAIL "rick\@delphion.com. His phone number in San Jose, California is (408) 960-7529.\n"; print MAIL "Do not reply to the sender of this e-mail. Your reply will be undeliverable.\n"; # To send an attachment via AIX's sendmail, we simply uuencode it. $attachment=`/usr/bin/uuencode $HTML_outfile $destination_fn`; print MAIL $attachment; close MAIL; } }