#!../bin/perl # Pre-process a user query; send it to a k2tunnel or pattunnel for search; process # the output. (See end of file for history.) # (NOTE: NIPO/UK must uncomment one line in filter_IC) # Input name/value pairs which will override our defaults are: # &app=currentResults (the default)|pdfExpress|dataExtract|snapshot|clustering|patentLab|fhist # and the value determines the tab content # &GENERAL=query a complete user query, often including Verity search language # elements # &-g=n n=1 is the default and it writes html to stdout which # includes the site header/footer, the tabs and tab content # for the &app application, and a result set for the query # n=3 writes an xml result set to stdout # n=4 causes the patent numbers and scores of the result set # to be written into db2, followed by a redirect to # whatever is in patquery's REDIR_URL environment, or # whatever is specified in the -r value # n=5 causes both 1 and 4 to be done # &-m=pagesize the default is 20 (and 0 is allowed) # &-s=startrow the default is 1 # &-c=list-of-collections the list may be comma separated; multiple -c's are allowed # &-i=list-of-fields to show the list may be comma separated; multiple -i's are allowed # (and the number after the i, e.g. -i6, determines the order) # &-o=field-name the sort column (default is score); ties are resolved by # later issued date, then by patent number # &-r=cluster or wisdomain or a url cluster and wisdomain get translated to appropriate # url's (and their are some security checks) -- default is empty # &-e=add'l-args, for the -r multiple -e's are allowed; a number after the e is allowed # &-l=name-of-invoking-page this value will appear in the patquery log files # (any page/code calling either patsearch or a patquery tunnel # should supply this value, so we can easily track where searches # are originating) # &pn=list-of-selected-patents Any patent in this list will get a check mark # &pf=1 produce printer-friendly html (only with -g=1) # &k2=0 to use pattunnel instead of the default k2tunnel # &vs=dephds0NN to change to a Verity server instead of localhost # &fulltext= if present, with a value of 1 or y(es) or t(rue), all collections named are upgraded to their # named are upgraded to their fulltext versions, and downgraded with any # other non-empty value # &GENSTART/&GENEND=query fragment the query becomes GENSTART(GENERAL)GENEND -- cht's # hack for adding a on the front and/or a date # restriction on the back, of a GENERAL query # &qid=1 should only be used by the saved search/run search world and # it results in a "call" to IPNqrytime after a -g=1 page has # been formed # &nchost=wwwX or elephant or full https:// url -- to change nc server # &cr=n1+n2+n3 (obsolete) sets the startrow to 1/n1/n2/n3 depending on # whether we have First.x, Prev.x, Next.x or Last.x present # &language=de or es ... to change languages (obsolete) # &Search.x or .y ignored (for use with a Search button) # (some other .x .y arguments are ignored as well) # &skipX=anything is ignored (X can be any string allowed in a name, or empty) # # &workfile= &wlref= &add= are arguments only used in the Workfile # &wlname= part of the current results page (it also uses &pn). # NOTE! Anything not mentioned above becomes part of the query. In particular, the # &FieldNN_Type= and &FieldNN_Text= arguments passed by the various search forms become # part of the query passed on through the patquery tunnel. All of the "-" arguments # are passed on through the tunnel, for example, -a (see warning below). use strict; sub text_filter_sed(\$); sub getDatasrc(\%); sub getImagePages(\%); sub getDerwentTitle(\%); sub getAge($); sub getInventor(\%); sub renderThumbnail($$$$); sub trim(\$;\$\$\$\$); sub hashDump(\%); sub sortFields(\@\@); sub getUserLists($); sub getAbstract($$); sub details_url($); sub formatILang($); sub ul1_open($); sub ul1_close($); sub ul1_closebr($); sub getPndisp(\$$); sub insertHitTable($\@); sub getHitTable($$\@); sub displayHtlmError($); sub setDelphionCookie(\%\$); sub trimPath(\$); sub url_decode($); sub url_encode($); sub trackQuery($$); sub callTrack($); sub flushout($); sub readproperties($); use CGI qw/:standard/; use CGI::Cookie; use FCGI; use constant PnhashDatasrc => 0; use constant PnhashImagePages => 1; use constant PnhashDrawing => 2; use constant PnhashQueryTps => 3; use constant PnhashIsOnTps => 4; use constant PnhashDwPan => 5; use constant PnhashDwTitle => 6; use constant PnhashInventor => 7; my $fcgiRequest = FCGI::Request(); my $max_fcgi_requests = 1000; my $fcgi_requests = 0; my $use_db2 = 1; use DBI qw(:sql_types); use LWP::Simple qw(get); use HTTP::Request::Common; use LWP::UserAgent; use File::Basename; my $patsearch; if ($0 =~ /.*\/(.*)\/(.*)/) { $patsearch = "/$1/$2"; } else { $patsearch = "/cgi-bin/patsearch"; } my $debug = 0; my $netdatasnap=1; #------- Set (some of) our global variables ------- my (%free_coll, %hitlist_op); $free_coll{"bibonly"} = 1; $free_coll{"lblistings"} = 1; my $default_pagesize = 20; my $default_body_color = qq(BGCOLOR="WHITE"); my $default_left_color = qq(BGCOLOR="#cccc99"); my $default_showsort_color = qq(BGCOLOR="#dddddd"); my ($body_color, $left_color, $showsort_color); my $default_row1_color = qq(BGCOLOR="WHITE"); my $default_row2_color = qq(BGCOLOR="#FAFAEA"); my $geneva = qq!FACE="Verdana,Geneva,Arial,Helvetica"!; my $empty_col = "  \n"; my $nbsp = " "; # makes code a little less ugly, no major upside my $A_etag = ""; use constant HeaderLineCt => 3; use constant QueryInListCt => 50; # Remove this hash when non-subscriber popups are removed +++ $hitlist_op{"scroll"} = 1; $hitlist_op{"refine"} = 1; $hitlist_op{"sort"} = 1; $hitlist_op{"choose"} = 1; # for doing complete urlencoding - from RFC 1738... my %urlenc; for (my $i=0x20; $i<=0x7e; $i++) { $urlenc{chr($i)}=chr($i); } $urlenc{' '}="+"; $urlenc{'"'}="%22"; # how bout that $urlenc{'#'}="%23"; $urlenc{'%'}="%25"; $urlenc{'&'}="%26"; $urlenc{':'}="%3A"; $urlenc{';'}="%3B"; $urlenc{'<'}="%3C"; $urlenc{'='}="%3D"; $urlenc{'>'}="%3E"; $urlenc{'?'}="%3F"; $urlenc{'@'}="%40"; $urlenc{'['}="%5B"; $urlenc{'\\'}="%5C"; $urlenc{']'}="%5D"; $urlenc{'^'}="%5E"; $urlenc{'`'}="%60"; $urlenc{'{'}="%7B"; $urlenc{'}'}="%7D"; $urlenc{'|'}="%7C"; $urlenc{'~'}="%7E"; my %opposite_coll=( usapps => 'usappsft', usappsft => 'usapps', bibonly => 'patentft', patentft => 'bibonly', epa => 'epaft', epaft => 'epa', epb => 'epbft', epbft => 'epb', pct => 'pctft', pctft => 'pct', deapps => 'deappsft', deappsft => 'deapps', de => 'deft', deft => 'de' ); my $txt_contact_support = "Please feel free to contact our customer support team should you have any questions:\n"; $txt_contact_support .= qq! Contact Customer Support.\n!; # preload HTML template file(s) my %template; my %templatefiles; $templatefiles{'tabs'}="../templates/patsearchTabs.html"; $templatefiles{'currentResults'}="../templates/currentResults.html"; $templatefiles{'0results'}="../templates/currentResults0.html"; $templatefiles{'topNav'}="../templates/currentResults2.html"; $templatefiles{'bottomNav'}="../templates/currentResults3.html"; $templatefiles{'choose'}="../templates/currentResults4.html"; $templatefiles{'orderBox'}="../templates/orderBox.html"; $templatefiles{'workFile'}="../templates/workFile.html"; $templatefiles{'clustering'}="../templates/clustering.html"; $templatefiles{'snapshot'}="../templates/snapshot.html"; $templatefiles{'printerfriendly'}="../templates/printerFriendly.html"; $templatefiles{'printerfriendlyTop'}="../templates/printerFriendly2.html"; $templatefiles{'trailer'}="../htdocs/hti/trailer.hti"; $templatefiles{'header'}="../htdocs/hti/header.hti"; while (my ($key,$fn) = each %templatefiles) { $template{"$key"}=""; if (open(DATA,$fn)) { while () { $template{"$key"}.=$_; } close DATA; } else { print STDERR "patsearch: missing template file $fn\n"; } } my %preferences; # ------- Global constants belonging to g1OutputFilter, here because we use fcgi ------- # strings for various "wordings" my $txt_ia_fast_buy = "PDF"; my $txt_ia_fast_buy_short = "PDF"; my $txt_ia_add_cart_short = "Select"; my $txt_pl_fields = "Show fields: "; my $txt_desc = "Sort descending"; my $txt_asc = "Sort ascending"; my $sort_href_title = "Sort DIRECTION"; my $sort_href_windowstatus = "Click to sort DIRECTION"; my $ascending_direction = "ascending"; my $descending_direction = "descending"; my $txt_sort_help = "(To sort a column, click label at top)"; my $txt_sick_server = "We are experiencing a temporary service outage and we are unable to complete your search.\n"; $txt_sick_server .= " We are working to fix this problem as quickly as possible and apologize for any inconvenience.\n"; $txt_sick_server .= " We appreciate your patience; please try your search again later.\n"; my $txt_bad_collection = "We do not recognize the collection you specified.\n"; $txt_bad_collection .= " Please recreate your search using one of our search pages,\n"; $txt_bad_collection .= qq!the Quick/Number\n!; $txt_bad_collection .= " search page is a good place to start.\n"; $txt_bad_collection .= " You can also review descriptions of all of our collections on our\n"; $txt_bad_collection .= qq! Collection Details page.\n!; # g1OutputFilter's constant hashes and arrays my (%hd_txt,%hd_sort,%colldesc,%col_pos); my (@selectable_colls,@selectable_ftcolls,@non_ftcolls); # col_order indicates left-to-right column positioning, col names must be same as hd_txt, hd_sort, etc. my @col_order = ("VDKVGWKEY","IMG", "TITLE","DWT", "AB", "INVT", "ASSG","PD","AD","DP","CLAS","PATN","WEEK","ASSC", "SCORE"); $hd_txt{"VDKVGWKEY"} = "Publication"; $hd_txt{"PD"} = "Pub. Date"; $hd_txt{"TITLE"} = "Title"; $hd_txt{"CLAS"} = "IPC Code"; $hd_txt{"ASSG"} = "Assignee"; $hd_txt{"SCORE"} = "Score"; $hd_txt{"PATN"} = "Patent"; # Derwent extra fields from here $hd_txt{"WEEK"} = "Week"; $hd_txt{"AD"} = "Filed"; $hd_txt{"DP"} = "Priority"; $hd_txt{"ASSC"} = "Code"; $hd_txt{"INVT"} = "Inventors"; $hd_txt{"AB"} = "Abstract"; $hd_txt{"DWT"} = "Derwent Title"; # Default sort directions (0 is descending; 1 is ascending) $hd_sort{"VDKVGWKEY"} = 0; $hd_sort{"PD"} = 0; $hd_sort{"CLAS"} = 1; $hd_sort{"ASSG"} = 1; $hd_sort{"TITLE"} = 1; $hd_sort{"SCORE"} = 0; $hd_sort{"PATN"} = 0; # Derwent extra fields from here $hd_sort{"WEEK"} = 0; $hd_sort{"AD"} = 0; $hd_sort{"DP"} = 0; $hd_sort{"ASSC"} = 1; $hd_sort{"INVT"} = 1; $colldesc{"usapps"} = "US (Applications)"; $colldesc{"usappsft"} = "US (Applications - Full text)"; $colldesc{"bibonly"} = "US (Granted)"; $colldesc{"patentft"} = "US (Granted - Full text)"; $colldesc{"epa"} = "European (Applications)"; $colldesc{"epb"} = "European (Granted)"; $colldesc{"epaft"} = "European (Applications - Full text)"; $colldesc{"epbft"} = "European (Granted - Full text)"; $colldesc{"deapps"} = "German (Applications)"; $colldesc{"de"} = "German (Granted)"; $colldesc{"deappsft"} = "German (Applications - Full text)"; $colldesc{"deft"} = "German (Granted - Full text)"; $colldesc{"japan"} = "Abstracts of Japan"; $colldesc{"pct"} = "WIPO PCT Publications"; $colldesc{"pctft"} = "WIPO PCT Publications (Full text)"; $colldesc{"inpadoc"} = "INPADOC"; $colldesc{"derwent"} = "Derwent (Test)"; $colldesc{"derdemo"} = "Derwent (Demo)"; $colldesc{"lblistings"} = "Patents with IP Listings"; $colldesc{"ipcom"} = "IP.com"; @selectable_colls=qw(bibonly usapps epb epa de deapps japan pct inpadoc); @selectable_ftcolls=qw(patentft usappsft epbft epaft deft deappsft pctft); @non_ftcolls=qw(japan inpadoc lblistings); my %subset_coll=(epaft=>1, epbft=>1, deappsft=>1, deft=>1, pctft=>1); # one-time initialization for the CurrentResults and other apps (before the fcgi loop) my $header_body_color = $default_body_color; my $header_left_color = $default_left_color; my $header_showsort_color = $default_showsort_color; # get colors from lines like: # if ($template{'header'} =~ //) { $header_body_color = $1; $header_body_color =~ s/\\//g; } if ($template{'header'} =~ //) { $header_left_color = $1; $header_left_color =~ s/\\//g; } if ($template{'header'} =~ //) { $header_showsort_color = $1; $header_showsort_color =~ s/\\//g; } my %info; my ($yyyy,$mmdd)=split(' ',`date +"%Y /%m/%d"`); $info{"refineOneyear"}=sprintf(" AND PD>%4d%s",$yyyy-1,$mmdd); $info{"refineTwoyears"}=sprintf(" AND PD>%4d%s",$yyyy-2,$mmdd); $info{"refineThreeyears"}=sprintf(" AND PD>%4d%s",$yyyy-3,$mmdd); $info{"refineFiveyears"}=sprintf(" AND PD>%4d%s",$yyyy-5,$mmdd); $info{"refineTenyears"}=sprintf(" AND PD>%4d%s",$yyyy-10,$mmdd); # Capture values from the initial environment, before we enter the fcgi request loop # (where %ENV will have a different request environment, each time) my $default_gmode = $ENV{'DEFAULT_GMODE'}; my $IPNROOT = $ENV{'IPNROOT'}; if(!$IPNROOT) { $IPNROOT = ".."; # .. = devel/ipn/, prod/ipn/, stage/ipn } # Determine Verity Server to be used (if &vs= isn't set in the QUERY_STRING) # (All verity servers use port 80) # We also set the default tunnel here, and the default database, # since they also depend on which environment we are running in. my ($default_verity_server, $default_tunnel); my $RULE_FILE = $ENV{'RULE_FILE'}; my $default_pqdatabase; if (defined($RULE_FILE)) { $RULE_FILE =~ m|(.*/)?(.+)|; if (index("$2","dev") >= 0) { # use localhost for development nodes $default_verity_server="localhost"; $default_tunnel = "pattunnel"; $default_pqdatabase = "pdbfree" } elsif (index("$2","ipn") >= 0) { # use network dispatched searchfree for regular nodes $default_verity_server="search"; $default_tunnel = "k2tunnel"; $default_pqdatabase = "pdbfree" } else { # use localhost/k2tunnel for any other environment $default_verity_server="localhost"; $default_tunnel = "k2tunnel"; $default_pqdatabase = "pdbfree" } } else { $default_verity_server="localhost"; $default_tunnel = "k2tunnel"; $default_pqdatabase = "patent" } my $PQDATABASE = $ENV{'PQDATABASE'}; $PQDATABASE = $default_pqdatabase unless ($PQDATABASE); my $PQUSERID = $ENV{'PQUSERID'}; $PQUSERID = "ipsrun" unless ($PQUSERID); my $PQPW = $ENV{'PQPW'}; $PQPW = "ipsrun_password" unless ($PQPW); my $unique = $ENV{'RANDOM'}; $unique = $$ unless $unique; my $response_html = "/tmp/search.$unique.html"; my $response_http = "/tmp/search.$unique.http"; my $qsfilename = "/tmp/search.$unique.qs"; my $httppost = "$IPNROOT/bin/httppost"; my $httppost_mode = ($debug) ? "-g" : ""; my $fcgi_mode = $fcgiRequest->IsFastCGI(); my $only_once = ($fcgi_mode) ? 0 : 1; my $TrackableApp = '^(quicksearch|advquery|boolquery)$'; # Connect to db2 just once if we are using fcgi, and prepare our selects for image pages # and for testing the database status (i.e. is it now out of rotation) my ($db2h, $db2sth, $statsth, $db2dsrch, $db2imagh, $db2ageh, $db2dwtitle, $db2dwtitleh, $db2invth); if ($fcgi_mode && $use_db2) { setupDB2(); connectDB2(); } # Declarations of global variables set in our fcgi accept() loop my ($hprefix, $cookie, $userid, $shopperid, $userLevel, $server, $server_addr, $annotation, $features); my ($remote_addr, $request_uri, $whoami, $debugfromenv); my ($gmode,$pagesize,$lval, $pf, $k2, $tunnel, $vs, $url_to_call, $nchost, $dbname); my ($language, $langarg, $workfile, $qid, $genstart, $genend, $isExplicitFields); my ($SINPUT, %SARRAY); my $appName; my (%coll_index, $fulltext_field); my @pn; my %delphionCookie; # set up a signal to quit gracefully my $gotsig = 0; $SIG{'INT'} = sub {$gotsig = 1; print "$patsearch($$)...Quitting as requested\n"}; while($only_once || ($fcgi_mode && !$gotsig && $fcgi_requests++ < $max_fcgi_requests && $fcgiRequest->Accept() >= 0)) { # How do we set an FCGI ExitStatus in this perl world? +++ $only_once = 0; print STDOUT "Content-type: text/html\n\n









Debugging
" if ($debug > 1); # Everytime we get a new request, if we are in fcgi mode, we do a quick check to # see if the database we are using is still in rotation. If it is not, we set up # to exceed our life quota, but, we do process this current request with the current db. if ($db2h && $statsth && $fcgi_mode) { my $rotation = ""; my $ok = $statsth->execute(); if ($ok) { my ($attval, $attname); $statsth->bind_col(1, \$attval); $statsth->bind_col(2, \$attname); while ($statsth->fetch()) { trim($attname, $attval); if ($attname eq 'DBSTATUS') {$rotation = $attval;} elsif ($attname eq 'DBNAME') {$dbname = $attval;} } } else { warn "$patsearch($$): fetch of dbstatus failed\n ", $db2h->errstr; $rotation = "OUT"; # if we can't get there, it must be out of service } $statsth->finish(); if (uc($rotation) eq "OUT") { $fcgi_requests = $max_fcgi_requests; warn "$patsearch($$): quitting after this request, dbstatus of $PQDATABASE is OUT\n"; } } $hprefix = "http"; $hprefix = "https" if (defined($ENV{HTTPS}) && uc($ENV{'HTTPS'}) eq 'ON'); # ON if ssl, reliably $cookie = (defined($ENV{HTTP_COOKIE})) ? $ENV{'HTTP_COOKIE'} : ""; $server = (defined($ENV{SERVER_NAME})) ? $ENV{'SERVER_NAME'} : ""; $server_addr = $ENV{'SERVER_ADDR'}; $remote_addr = $ENV{'REMOTE_ADDR'}; $request_uri = (defined($ENV{"REQUEST_URI"})) ? $ENV{"REQUEST_URI"} : $patsearch; $whoami = ($request_uri =~ /(.*)\?.*/) ? $1 : $request_uri; %delphionCookie = (); # clear/reset setDelphionCookie(%delphionCookie, $cookie); $userid = ""; $shopperid = ""; $userLevel="unknown"; if ($delphionCookie{"IPNuser.user"}) { $userid=$delphionCookie{"IPNuser.user"}; $userLevel="basic"; # he is at least a basic user -- the features test could upgrade } $annotation=""; if ($delphionCookie{"IPNuser.annotval"}) { # this value is url encoded; decode and minimally html encode $annotation=$delphionCookie{"IPNuser.annotval"}; $annotation =~ s/\+/ /g; $annotation =~ s/%([A-Fa-f0-9]{2})/pack("c",hex($1))/ge; $annotation = minimal_htmlencode($annotation); } if ($delphionCookie{"Ticket.id"}) { $shopperid=$delphionCookie{"Ticket.id"}; } $features=""; if ($delphionCookie{"Ticket.features"}) { # this value has just letters, numbers, and _'s so no html encode needed $features=$delphionCookie{"Ticket.features"}; if ($features =~ /(premier|unlimited)/i) { $userLevel=lc($1); } } # Get our input string my $METHOD = (defined($ENV{REQUEST_METHOD})) ? $ENV{'REQUEST_METHOD'} : ""; if ( $METHOD eq 'POST') { read(STDIN, $SINPUT, $ENV{'CONTENT_LENGTH'}); # POSTs may have a trailing newline chomp $SINPUT; } elsif ($METHOD eq 'GET' ) { $SINPUT = $ENV{'QUERY_STRING'}; } else { print STDERR "patsearch($hprefix): invoked with invalid REQUEST_METHOD: $METHOD\n"; print STDOUT "Content-type: text/html\n\n"; print STDOUT "UNEXPECTED METHOD $METHOD\n"; if ($debug>1) { print STDOUT "

Environment:\n"; while (my ($key,$value) = each %ENV) { print STDOUT "
$key=$value\n"; } } next; $fcgi_requests += 100; # lose 100 lives, instead of exit(1); } $debugfromenv = qq| \n|; $debugfromenv .= qq| \n|; $debugfromenv .= qq| \n|; $debugfromenv .= qq| \n|; $debugfromenv .= qq| \n|; if ($SINPUT eq "") { # Would it be better to return HTTP error, and let server handle reply? select(STDOUT); $| = 1; print STDOUT "Content-type: text/html\n\n"; OutputHeader("No search terms supplied","error"); print STDOUT $debugfromenv if ($debug); my $tail = "

"; $tail .= "No search terms were supplied."; $tail .= "
\n"; $tail .= "
To continue, please choose one of the IP Search items "; $tail .= "or use your browser's Back button. \n"; $tail .= "Bookmarking of search results is not supported.\n"; $tail .= "

\n"; $tail .= "\n"; # end the table started by OutputHeader() print STDOUT $tail; OutputTrailer(); next; } # Build the hash with name/value pairs, removing the url encoding undef %SARRAY; $isExplicitFields = 0; my @combo = split(/&/, $SINPUT); foreach my $combo (@combo) { $combo =~ s/\+/ /g; my ($input_name, $value) = split(/=/,$combo); if (!$isExplicitFields && $input_name =~ /^-i/) {$isExplicitFields = 1;} $input_name =~ s/%([A-Fa-f0-9]{2})/pack("c",hex($1))/ge; $value =~ s/%([A-Fa-f0-9]{2})/pack("c",hex($1))/ge; $value =~ s/\s/ /g; # embedded \n's and such need to get normalized print "[$combo][$input_name][$value]
\n" if ($debug > 2); next if ($input_name =~ /^skip/i); # remove all tags named skipXXX if($SARRAY{$input_name}) { # If an entry appears more than once, append a space and new # value to the old value. (ex: -c coll_f ... -c coll_b -> -c coll_f coll_b) $SARRAY{$input_name} .= " $value"; } else { $SARRAY{$input_name} = $value; } } # JTI the app value is the application mode (tab button) to display $appName="currentResults"; # default mode if (defined($SARRAY{"app"})) { $appName=$SARRAY{"app"}; delete $SARRAY{"app"}; } # (Leave in the original, not-quite-right, argument name for a while +++) if (defined($SARRAY{"-app"})) { $appName=$SARRAY{"-app"}; delete $SARRAY{"-app"}; } # The -l value was supposed to be a simple name, identifying the html page this patsearch # was invoked from. But, some pages don't seem to pass this at all (which causes us not # to log the search on the Verity Server), and some pages seem to think that is a good # name for a patsearch-invoking button. Try to repair any mess here. delete $SARRAY{"x"}; delete $SARRAY{"y"}; delete $SARRAY{"-l.x"}; delete $SARRAY{"-l.y"}; $lval = $SARRAY{"-l"}; if (!defined($lval) || $lval eq "") { $lval = "unknown"; $SARRAY{"-l"} = "$lval"; } # See if our user is permitted to search other than free collections # (This test quickly skims off the obvious cases -- we check again # later, after we have completely filtered the input.) if (TestSubscriber() == 1) { next; # we redirected him } # Look for the "g" mode tag and value; set default if not present $gmode = $SARRAY{"-g"}; if (!defined($gmode) || $gmode eq "") { $gmode = ($default_gmode) ? $default_gmode : "1"; $SARRAY{"-g"} = "$gmode"; } $pagesize = $SARRAY{"-m"}; if (!defined($pagesize) || !($pagesize =~ /\d+/)) { $pagesize = $default_pagesize; # default $SARRAY{"-m"} = $pagesize if ($gmode eq '1' || $gmode eq '5'); # tell patquery } # Reject xml requests (-g 3) if not from localhost if (($gmode eq "3") && ($remote_addr !~ /localhost|127\.0\.0\.1|$server_addr/)) { OutputXML401(); next; } # Only for -g 1 mode, respect a request for a printer-friendly page $pf = $SARRAY{"pf"}; if (defined($pf)) { delete $SARRAY{"pf"}; # remove from the query $pf = 0 if ($gmode ne "1"); # +++ what is gmode for derwent? } # Obtain k2 Server tag and value, if present $k2 = $SARRAY{"k2"}; if (defined($k2)) { $tunnel = ($k2) ? "k2tunnel" : (("$k2" eq "0")?"pattunnel":$default_tunnel); delete $SARRAY{"k2"}; # remove from the query } else { $tunnel = $default_tunnel; } # Obtain Verity Server tag and value, if present $vs = $SARRAY{"vs"}; if ($vs) { $url_to_call="http://$vs/fcgi-bin/$tunnel"; } else { $url_to_call="http://$default_verity_server/fcgi-bin/$tunnel"; } delete $SARRAY{"vs"} if (defined($vs)); # Obtain the language, if present, and remove from the query $language = $SARRAY{"language"}; delete $SARRAY{"language"} if (defined($language)); $langarg = ($language) ? "&language=$language" : ""; # Obtain nchost tag and value, if present, and remove from the query. # Or, set from the environment if NCHOST is present. $nchost = $SARRAY{"nchost"}; if (defined($nchost)) { delete $SARRAY{"nchost"}; } else { $nchost = $ENV{'NCHOST'}; } # Remove any "search" tag from the query (the Search button uses these # so that netscape users will get a nice mouseover display). delete $SARRAY{"Search.x"}; delete $SARRAY{"Search.y"}; delete $SARRAY{"search.x"}; delete $SARRAY{"search.y"}; delete $SARRAY{"SEARCH.x"}; delete $SARRAY{"SEARCH.y"}; # Remove any sorting button, and print friendly button, tags from the query # (If the NAME we put on the button changes, these have to change.) delete $SARRAY{"Sort ascending.x"}; delete $SARRAY{"Sort ascending.y"}; delete $SARRAY{"Sort descending.x"}; delete $SARRAY{"Sort descending.y"}; delete $SARRAY{"printer friendly.x"}; delete $SARRAY{"printer friendly.y"}; # Set -s for navigation (cr is now unused and should be removed soon +++) my $cr = $SARRAY{"cr"}; if(defined($cr)) { delete $SARRAY{"cr"}; $cr =~ /(\d*)(\s)+(\d*)(\s)+(\d*)/; $SARRAY{"-s1"} = 1 if(defined($SARRAY{"First.x"})); $SARRAY{"-s$1"} = 1 if(defined($SARRAY{"Prev.x"})); $SARRAY{"-s$3"} = 1 if(defined($SARRAY{"Next.x"})); $SARRAY{"-s$5"} = 1 if(defined($SARRAY{"Last.x"})); delete $SARRAY{"First.x"}; delete $SARRAY{"First.y"}; delete $SARRAY{"Prev.x"}; delete $SARRAY{"Prev.y"}; delete $SARRAY{"Next.x"}; delete $SARRAY{"Next.y"}; delete $SARRAY{"Last.x"}; delete $SARRAY{"Last.y"}; } if (defined($SARRAY{"-r"})) { # Lookup table for -r value my %url_hash; $url_hash{"cluster"} = "http://localhost/cluster"; $url_hash{"wisdomain"} ="$nchost/servlet/com.delphion.ipn.user.IPNAddWDItem"; # Before we overwrite a -r value, ensure proper clustering authentication if ($SARRAY{"-r"} eq "cluster" && basename($0) ne "clustersearch") { # Tom said to put in this check, but didn't say what he wanted # to do for failure, so, put a message in the httpd-error log and # say something about not being authorized to the user. print STDERR "Unauthorized clustering requested, $0 was called\n"; OutputUnauthorized(); next; } $SARRAY{"-r"} = $url_hash{$SARRAY{"-r"}}; } # Alter collection names, based on the setting of the fulltext arg if (defined($SARRAY{"fulltext"})) { my $fulltext = $SARRAY{"fulltext"}; delete $SARRAY{"fulltext"}; if (defined(my $collections = $SARRAY{"-c"}) && $fulltext ne "") { $fulltext = $fulltext =~ /^[1yt]/i; my @colls = split(/[\ ,]+/, $collections); $collections=""; foreach my $cname (@colls) { if (($cname =~ /ft/ && !$fulltext)||($cname !~ /ft/ && $fulltext)) { $cname = $opposite_coll{$cname} if (defined($opposite_coll{$cname})); } $collections .= " $cname"; } $SARRAY{"-c"} = substr($collections, 1); } } # Get the qid of a saved search (passed by a Run of a saved search) $qid = $SARRAY{"qid"}; if (defined($qid)) { delete $SARRAY{"qid"}; } if (defined($SARRAY{'pn'})) { @pn = split(/[,\s]+/, $SARRAY{'pn'}); delete $SARRAY{'pn'}; } else { @pn = (); } # Names of collections which may cause various transforms my @cnames = qw(bibonly patent patentft usapps usappsft epb epbft epa epaft de deft deapps deappsft pct pctft japan inpadoc inpadup); undef %coll_index; foreach my $cname (@cnames) { $coll_index{$cname} = -1; } # Do filtering on what is left of the whole array (should only be query parts now). inputFilter(); # Now double-check on whether the user is really allowed to do this search if (TestSubscriber() == 1) { next; # we redirected him } # Modify the filtered GENERAL query string by any GENSTART and GENEND values $genstart = $SARRAY{"GENSTART"}; $genend = $SARRAY{"GENEND"}; delete $SARRAY{"GENSTART"} if (defined($genstart)); delete $SARRAY{"GENEND"} if (defined($genend)); if (($genstart || $genend) && defined($SARRAY{"GENERAL"})) { $SARRAY{"GENERAL"} = "$genstart(".$SARRAY{"GENERAL"}.")$genend"; } # Check for assigneefamily my $input_name_test; my $value_test; my $af_seen = 0; #== DISABLED == my $NEVER = 0; if($NEVER){ while ( ($input_name_test, $value_test) = each(%SARRAY) ) { if(($input_name_test =~ /assigneefamily/i)||($value_test =~ /assigneefamily/i)) { print STDERR "Denied assigneefamily field use, $0 was called\n"; OutputUnauthorized("The assigneefamily field has been temporarily disabled.
\nFor more information please see Hierarchy Data Availability."); $af_seen=1; last; } } } if($af_seen) { $af_seen=0; next; } # If work file administration $workfile = $SARRAY{"workfile.x"}; if (defined($workfile)) { delete $SARRAY{"workfile.x"}; # remove from the query delete $SARRAY{"workfile.y"}; ManageWorkfile(); # This will redirect next; } if (!$isExplicitFields) { $SARRAY{"-i"} = "VDKVGWKEY TITLE PD SCORE"; } elsif ($gmode!=3 && ($SARRAY{"-i"} . $SARRAY{"-i0"} . $SARRAY{"-i1"}) !~ /VDKVGWKEY/i) { # patquery doesn't force key column into output so make sure all result sets have key $SARRAY{"-i"} = "VDKVGWKEY " . $SARRAY{"-i"}; # (the above test, and the assignment, need "code improvement" +++) } # Build the url encoded query string. my $amper = ""; my $i = 0; my $out_qstring = ""; foreach my $tag (%SARRAY) { if ($i == 0) { my $value = $SARRAY{$tag}; $tag =~ s/([^ a-zA-Z0-9-_])/sprintf("%%%02lx",unpack('C',$1))/ge; $tag =~ s/[ ]/+/g; $value =~ s/([^ a-zA-Z0-9-_])/sprintf("%%%02lx",unpack('C',$1))/ge; $value =~ s/[ ]/+/g; print "tag=$tag val=$value
\n" if ($debug > 1); $out_qstring .= $amper . $tag . "=" . $value; $amper = "&"; } $i = !$i; } # Create a file containing the QUERY_STRING # (This is never supposed to fail. If it does, the httppost will fail, so there will # be no html file to process and we will end up telling the user that the server is # down -- better than a die here.) open(QSFILE, ">$qsfilename"); print QSFILE "$out_qstring"; close(QSFILE); # Call URL and save responses, both HTML and HTTPS system "$httppost $httppost_mode \"$url_to_call?\" $qsfilename $response_html >$response_http;" ; # Depending on the mode used to invoked the patquery tunnel, write the # proper output, based on the response file, to STDOUT. See patquery help text for gmode explanation. if ($gmode eq "4" || $gmode eq "8") { redirOutputFilter(); } elsif ($gmode eq "3") { # Perhaps someday we should avoid the extra write/read from disk +++ g3PassThru(); } else { g1OutputFilter(); } # Cleanup (for non-fcgi_mode, redirOutputFilter did its own and never returned) unlink ($qsfilename, $response_http) unless ($debug); unlink ($response_html) unless ($debug); exit(0) if (!$fcgi_mode); $fcgiRequest->Flush(); $debug = 0; # Don't leave debug on, for the next user, in fcgi-mode } # while FCGI accept() # cleanup before exiting if ($db2sth) { $db2sth->finish(); $db2h->disconnect(); } print STDERR "$patsearch($$): $max_fcgi_requests lives used up, exiting...\n" if ($fcgi_requests>$max_fcgi_requests); exit(0); # Subroutine to test whether or not the current search is allowed. If not, # we redirect the user off to access jail. # returns 1 if the user was sent to jail; 0 otherwise # sub TestSubscriber { return 0 if ($debug>1); # +++ debug +++ return 0 if ($remote_addr =~ /localhost|127\.0\.0\.1|$server_addr/); my @colls = split(/[\ ,]+/, $SARRAY{"-c"}); my $allfree = 1; my @opts = split(/[\ ,]+/, $SARRAY{"-l"}); foreach my $opname (@opts) { if($opname eq "wisdomain"){ $allfree = 0; last; } } foreach my $cname (@colls) { if (!$free_coll{$cname}) { $allfree = 0; last; } } if (!($allfree) and ($userLevel !~ /premier|unlimited/) and !($hitlist_op{$lval})) { # He's not select(STDOUT); $| = 1; my $location = "/access_control/go.html.en$whoami?$SINPUT\n"; print "Location: $location"; print "Content-type: text/html\n\n"; system("echo \"$location\" >$qsfilename") if ($debug); return(1); } else { return(0); } } # TestSubscriber # --------------------------------------------------------------------------------- # Here we loop through our HTML-variables, mostly to detect search strings that we # need to filter, but we also process -c (collection name). # # There can be great confusion in this discussion if one didn't clearly understand # the term "field". In most cases, e.g. in Delphion's Help pages, in the Boolean # screen's "All Fields" box, or even here referring to the FieldNN_Type variable # names, "field" just means anything searchable, but in the Verity world there's a # big difference between a zone and a field. Here, I use "field" to mean anything # searchable, and I hereby coin "Vfield" to designate a Verity field. # # The 5 cases for search strings, are # 1) The variable's name is GENERAL, whose value is the search string, which # might contain the full Verity syntax. # # 2) The variable's name is of the form FieldNN_Type (where NN is a number), # whose value is RAW, in which case the corresponding FieldNN_Text variable # contains the search string, which also may contain the full Verity syntax. # # 3) The variable's name is of the form FieldNN_Type, whose value is not RAW. # In this case, the variable's value specifies the zone/Vfield to search, and # the corresponding FieldNN_Text variable contains the search string, which # cannot have the full Verity syntax. # # 4) The variable's name is of the form FieldNN_Type, whose value is not RAW # (just like case 3), but the corresponding FieldNN_Text variable is undefined. # Where is the search string, you ask? In hidden -a form variables. See below. # # 5) The variable's name is of the form FieldNN_Type, whose value is not RAW # (just like 3 & 4), but the corresponding FieldNN_Text variable is defined, # but empty/null. We delete these FieldNN_Type/FieldNN_Text pair of variables. # # Realize that the full search criteria can be in multiple pieces. The Boolean # screen for example, can pass many different FieldNN_Type/FieldNN_Text variable # pairs to us. We must look at each piece and filter what we can. # # ------------------------------------------------------------------------------- my %filters; sub inputFilter { # This "Branch Tree" consolidates the previous bunch of nested if/elseif/elseif ... # statements by maintaining a list of zone keywords and their corresponding # filtering routine. Testing is case-insensitive, so any of ki, Ki, kI, KI, will # match. Note how multiple keywords can be defined for the same routine. # (class and mainclass, and the nationalclass full zone names, are not # included here since there has to be some sane way to avoid the class # filters, and the mistakes they make) %filters = ("KI" => \&filter_KI, "IC" => \&filter_IC, "MC" => \&filter_IC, "INDEXCLASS" => \&filter_IC, "NC" => \&filter_NC, "CNC" => \&filter_NC, "UP" => \&filter_UP, "PN" => \&filter_PN, "USREFS" => \&filter_PN); $fulltext_field = 0; # Loop through all variable's pairs of name/value. my ($input_name, $value); while ( ($input_name, $value) = each(%SARRAY) ) { if ( $input_name =~ / ^\s*GENERAL\s*$ /ix ) { # Temporary code to recognize (and remove) &d at the end of a refine'd user query, # to set $debug to 1, and GET instead of POST if ( $value =~ s/&d(\)*)\s*$/$1/ ) { $debug = 1; } # Farm off the handling of input strings containing the full Verity syntax, replacing # our original search string, $SARRAY{$input_name} with the filtered equivalent. $SARRAY{$input_name} = Parse_and_Filter_Search_String($value); } elsif ( $input_name =~ /^(Field\d+)_Type$/i ) { # For a FieldNN_Type variable name, the search string is the value of the corresponding # FieldNN_Text variable, whose name we now determine, being careful to preserve case. my $type_input_name=$input_name; # Squirrel away original variable name. $input_name="${1}_Text"; # Note we are changing our while-loop variable, so # Warning! Beware of the -a backdoor through patsearch, into patquery. In both the # Advanced and Boolean screens, in the From/To sets of Year/Month/Day pull-downs, # Tom sets up FieldNN_Type variables, without their corresponding FieldNN_Text # partners. "Where are the values/search strings?" you ask? Hidden away in a variable # named -a (yes, with the dash). For example, specifying only a "From" year of 2002 will # generate these html variable name/value pairs, # Name Value # =========== ================================= # Field46_Type RAW # Field46_OP ) AND # -a &Field46_Text=AD%3e=2002-01-01 # with Field46_Text being undefined, so we fail this test below (in Perl, undefined # variables equal ""). # if ($SARRAY{$input_name} eq "") { # Is FieldNN_Text string undefined or empty? if ( defined($SARRAY{$input_name}) ) { # If it's really empty and not just delete $SARRAY{$type_input_name}; # undefined, then delete the pair. delete $SARRAY{$input_name}; } } else { # We want to remove search strings that are really empty, i.e. they look like any of # () () () if ( $SARRAY{$input_name} =~ /^\s*(<\s*(and|or)\s*>\s*)?\(\s*\)\s*$/i ) { delete $SARRAY{$type_input_name}; # Pretend FieldNN_Type & FieldNN_Text delete $SARRAY{$input_name}; # pair, never existed. } # Ok, we have a corresponding FieldNN_Text variable and it's not empty. if ($value =~ /^\s*RAW\s*$/i) { # Farm off the handling of input strings containing the full Verity syntax. $SARRAY{$input_name} = Parse_and_Filter_Search_String($SARRAY{$input_name}); } else { # We've got a FieldNN_Type variable name whose value is not RAW, so its # value must be the zone or Vfield to search. Use our filters branch # tree to call the appropriate filter for this zone, if one exists. if (defined($filters{uc "$value"})) { # Allow mixed case zones. # All filter routines take the search string as input and return it, possibly modified. $SARRAY{$input_name} = &{$filters{uc "$value"}}($SARRAY{$input_name}); } # If we're searching in any of the CLAIMS, DESCRIPTION, or BACKGROUND zones, # then insure we're doing it in the Full-Text collections. # Insure we properly handle the "(TITLE,ABSTRACT,CLAIMS)" case you'll get from # the Boolean screen, when you select "Title, Abstract or Claims". if ($value =~ /\b(CLAIMS|DESCRIPTION|BACKGROUND)\b/i) { $fulltext_field = 1; # Upgrade the collection being searched to full text. } } } # This variable's name is not GENERAL nor one of our FieldNN_Type guys. What else do we handle? } elsif ( $input_name =~ / -c /x ) { # Determine absence or presence of collections; -1 indicates absence my @colls = split(/[\ ,]+/, $value); foreach my $cname (@colls) { $coll_index{$cname} = 1; } } # else it's not a variable name we handle here. Just ignore it. } # End of our "while" loop, going through all variable names. # Now that we have parsed all of the input, as much as we are going to, # see if we need any collection adjustments. # Ensure that there is at most one US (Granted) collection searched, and at most # one US (Applications), and at most one EPx or PCT # collection searched, and, that we are using the fulltext version if the user is # searching claims (or claimsLL), or description (or descriptionLL) or background. if ( $coll_index{"patent"} >= 0 ) { # we automatically upgrade patent to patentft $coll_index{"patentft"} = 1; $coll_index{"patent"} = -1; } my $have_us = coll_test_and_upgrade("bibonly", "patentft"); my $have_usapps = coll_test_and_upgrade("usapps", "usappsft"); my $have_epa = coll_test_and_upgrade("epa", "epaft"); my $have_epb = coll_test_and_upgrade("epb", "epbft"); my $have_deapps = coll_test_and_upgrade("deapps", "deappsft"); my $have_de = coll_test_and_upgrade("de", "deft"); my $have_pct = coll_test_and_upgrade("pct", "pctft"); # We also modify the specification of the inpadoc collection, # depending on what other collections are searched. If no other # collection is being searched, we add all of inpadup (the collections # which contain patents that we have elsewhere in our database); # otherwise, we add only the inpadup collections which don't overlap. if ( $coll_index{"inpadoc"} >= 0 && $coll_index{"inpadup"} == -1 ) { my $ncolls = 0; foreach my $coll (keys %coll_index) { $ncolls++ if ( $coll_index{$coll} >= 0 ); } if ( $ncolls == 1 ) { # just inpadoc $coll_index{"inpadup"} = 1; } else { if ( $coll_index{"japan"} == -1 ) { $coll_index{"inpadup2"} = 1; $coll_index{"inpadup3"} = 1; $coll_index{"inpadup4"} = 1; $coll_index{"inpadup5"} = 1; } $coll_index{"inpadup6"} = 1,$coll_index{"inpadup7"} = 1 if ( $have_us == 0 ); $coll_index{"inpadup10"} = 1 if ( $have_usapps == 0 ); } } # reform the collection input variable, with the upgrades and inpadups, # in a cannonical order, so that we will always get the same search results # no matter what order the various search pages specify collections (although # as of 6/12/02, they all used the same order, which is our cannonical order) my $colls = ""; foreach my $name (sort(keys %coll_index)) { if ( $coll_index{$name} >= 0 ) { $colls .= " " . $name; } } $SARRAY{"-c"} = substr($colls, 1); } # End of inputFilter subroutine # The functions below are used by the input filter # -------------------------------------------------------------------------------------- # Function to parse the passed search string which can contain the full Verity syntax. # Our goals are to identify the pieces of the search string that require filtering, # call the appropriate filter subroutine, and pass back the filtered string. # We also detect full-text zones and upgrade to the full-text collections if need be. # # This routine is called when the html-form-variable's name is GENERAL, or one of the # FieldNN_type names whose value is RAW. # # Like the filter routines, we are passed the search string and we return the search # string, possibly modified. For example, given # (( bananas claims) and (084/DIG.022 < IN> NC )) # we want to pick out the 084/DIG.022, let filter_NC mangle it to 084DIG022, and # return the prettier (( bananas claims ) and ( 084DIG022 NC)) # # ================================================================================== # ============= Important Warning to Future Filter Writers ============= # ================================================================================== # # Note that we do not attempt to parse all legal Verity syntax. That has proven to # be too complicated and besides, we don't have to. Our purpose is to call the filter # subroutines only when necessary, which convert or normalize naive/beginner users' # input, into our canonical form that one would not expect the beginner to necessarily # know. We can always claim a part of the search term is "too complicated", thus # identifying an experienced user, and not handle it. # # Our job is made easier due to # 1) The filters only handle Verity zones, not Vfields, so we only need look for # the keyword. (Previous versions of patsearch also filtered date Vfields # and were looking for Vfield operators, e.g. PD >= 2004-04-22.) # 2) We only filter search strings targeted towards exactly one zone. Boolean screen # "All-Fields" searches, or GENERAL searches from the Advanced or Results screens, # that don't specify a zone at all (eg, 084/DIG.022), will not get filtered. # But if a zone is specified (eg, 084/DIG.022 NC), it will get filtered. # 3) This also allows us to skip the handling of this otherwise perfectly valid # Verity syntax: "ABC (IC,CLAIMS)" by saying it's "too complicated". # We'll only filter the search string if exactly one zone is specified. # (We do however parse those zones and do the full-text upgrades if appropriate) # 4) The search terms that we DO filter, are in a known format with only certain # characters allowed. This means that don't need to worry about some keyword # being inside quotes. For example, "084/DIG.022" NC (with the quotes) # won't get filtered. Should it get filtered? Arguably yes, but I don't want # to code the parsing of junk like "084 and (022)" NC, or worse, # "really sneaky" NC. Remember, filtering is to help beginners. # If we see a quote or any unusual character, they're not a beginner and their # search string doesn't get filtered. # # Yet even with the above simplifications, this routine can still get things wrong # with really pathological cases, e.g. "(000) NC" CLAIMS # This 000 will be passed to filter_NC (oops!) # We also don't detect and may filter invalid Verity syntax, e.g. # (AND 000) NC Ah, well. Tough. Verity will complain. # -------------------------------------------------------------------------------------- sub Parse_and_Filter_Search_String { my ($search_string) = @_; my $filtered_search_string=""; my ($next_unconsumed); my $next_unconsumed=0; # Our cursor as we consume the search string. my $end_of_search_string=length($search_string); # For end-of-string checking. # Loop through our passed search string, picking out any and all 's. # Note the use of Perl's /g modifier, which continues scanning our search string # where we last left off. Also see note regarding pos() function at the end of # this loop. In this loop, we'll build up our $filtered_search_string variable, # which will get returned to our caller. while ($search_string =~ / \s* < \s* (in) \s* > \s* /ixg) { # We found an . Capture before and after pieces. my $IN = $1; # Preserve case of IN keyword. my $before_piece=substr($`,$next_unconsumed,(length($`)+$next_unconsumed)); my $after_piece=$'; my $after_piece_length=length($after_piece); my $zone_start_position = pos($search_string); my ($after_parenL, $paren_level, $zone_name, $after_parenR, $filtered_search_term, $before_this_search_term); # Parse out the zone name(s). Handle any of # NC ( NC ) (CLAIMS) (NC,IC) NC and more stuff NC,IC ((NC)) # Set $zone_name and $zone_end_position variables. my $zone_cursor = 0; my $atom=substr($after_piece,$zone_cursor,1); if ($atom eq '(') { # If the zone starts with a parenthesis, $after_parenL="("; # remember it, then scan right to the # matching ")" or the end of the string, $paren_level=1; # properly handling nested parenthesis. while ($paren_level && $zone_cursor<$after_piece_length) { $zone_cursor++; # Advance right one character. $atom=substr($after_piece,$zone_cursor,1); if ($atom eq "(") {$paren_level++} elsif ($atom eq ")") {$paren_level--} } if ($zone_cursor<$after_piece_length) { # Normal case where we found our matching ")". $after_parenR=")"; # Remember ")" so we can restore it later. $zone_name = substr($after_piece,1,$zone_cursor-1); } else { $after_parenR=""; # No matching ")" found. It's ok, we'll do $zone_cursor--; # the best we can, setting the zone_name to $zone_name = substr($after_piece,1); # the complete after_piece. } $zone_name =~ s/^\s+//; # Remove multiple leading white-space $zone_name =~ s/\s+/ /g; # Compress multiple imbeddded white-space $zone_name =~ s/ , /,/g; # Change "aa , bb" to "aa,bb" $zone_name =~ s/\s+$//; # Remove multiple trailing white-space } else { # This zone name does not start with an "(" (normal case). $after_parenL=""; # Remember lack of parenthesis so our filtered search $after_parenR=""; # string follows suit. # Here we find the end of our zone relying on the fact that all zone names only # contain alphanumeric characters. if ($after_piece =~ /^(\w+)/) { $zone_cursor=length($&)-1; # Set position of last character of zone name. $zone_name=$1; } else { # No zone specified. Bizarre case of , or $zone_name=""; # at the end of the search string, or some such. $zone_cursor=-1; # Fudge position of last good character of zone name. } } # Ok, we've parsed out the zone name. See if we've got a filter for it. # If so, only then do we look at the before_piece. Note that an exact match # is required here. 000NC and 000(NC) will match ok and get filtered, # but 000"NC" and 000("NC") and 000(IC,NC) although perfectly # valid Verity syntax, won't match and will not get filtered. # if (! defined($filters{uc "$zone_name"})) { # If there's no filter for this zone, $before_this_search_term=$before_piece; # then set things up so we return $filtered_search_term=""; # the original search term. } else { # Else we have a filter for this zone. We can't postpone this any longer. # It's time to start looking at the search string before our . my ($this_search_term, $search_term_parenL, $search_term_parenR); my $before_cursor = length($before_piece)-1; if ($before_cursor==-1) { # No search term? How bizarre. $before_this_search_term = ""; $this_search_term = ""; } else { # Normal case where we have a search term. my $search_term_end; $atom=substr($before_piece,$before_cursor,1); # Find the beginning of our search term. # if ($atom eq ")") { # If our search string term with a right parenthesis, # then blindly go left to the matching "(" or to the # start of the string. $search_term_parenR=")"; $search_term_end = $before_cursor-1; $paren_level=1; while ($paren_level && $before_cursor) { $before_cursor--; # Back up another character. if ($before_cursor==-1) {last} # Ran out of before_string. $atom=substr($before_piece,$before_cursor,1); if ($atom eq ")") {$paren_level++} elsif ($atom eq "(") {$paren_level--} } if ($atom="(" && ! $paren_level) { # Did we end normally, by finding the matching "("? $search_term_parenL="("; # Remember the left parenthesis for later restoration. $before_this_search_term = substr($before_piece,0,$before_cursor); $before_cursor++; # Go back to first character after matching "(". $this_search_term = substr($before_piece,$before_cursor,$search_term_end-$before_cursor+1); } else { # Ran out of before_string before finding the matching "(" $search_term_parenL=""; # That's ok. Remember lack of left parenthesis and $before_this_search_term = ""; # we'll call the whole before_string, our search term. $this_search_term = substr($before_piece,$before_cursor,$search_term_end-$before_cursor+1); } $this_search_term =~ s/^\s+/ /; # Pretty up our search term, removing excess whitespace. $this_search_term =~ s/\s+$/ /; $this_search_term =~ s/ +/ /g; } else { # Am in non-parenthesis mode. Scan leftward 'till beginning-of-string # accepting any word character (letter, digit, _), [/-.], space, or # bareword and|or|not. $search_term_end = $before_cursor; while ($before_cursor>=0) { if ((($before_cursor > 3) && (substr($before_piece,$before_cursor-4,5)=~/\Wand\W/i)) || (($before_cursor== 3) && (substr($before_piece,$before_cursor-3,4)=~/not\W/i )) || (($before_cursor > 3) && (substr($before_piece,$before_cursor-4,5)=~/\Wnot\W/i)) || (($before_cursor > 2) && (substr($before_piece,$before_cursor-3,4)=~/\Wor\W/i ))) { $before_cursor++; # We found a keyword. Terminate this search term. last; } elsif ($atom !~ /[\w\/\. ,*?;]/) { # Check for valid search term characters. $before_cursor++; # Terminate scan at first invalid character. last; } # Else this character is ok inside a search term. $before_cursor--; # Continue scanning left-ward. $atom=substr($before_piece,$before_cursor,1); } if ($before_cursor==-1) { # Did we run out of before_piece before finding $before_this_search_term = ""; # some other reason to terminate our search term? $this_search_term = $before_piece; # Yep. } else { # Nope. Carve out search term. $before_this_search_term = substr($before_piece,0,$before_cursor); $this_search_term = substr($before_piece,$before_cursor); } } # End of Before non-parenthesis mode. } # End of normal case where we have a search term. $this_search_term =~ s/^\s+//; # Remove multiple leading white-space $this_search_term =~ s/\s+/ /g; # Compress multiple imbeddded white-space $this_search_term =~ s/\s+$//; # Remove multiple trailing white-space if ($this_search_term) { # If there's anything of our search term left, # then filter it. $filtered_search_term="$search_term_parenL" . &{$filters{uc "$zone_name"}}($this_search_term) . "$search_term_parenR"; } else { # else it's an empty, bogus search term. $filtered_search_term="$search_term_parenL$search_term_parenR"; # Maintain search term parenthesis. } } $filtered_search_term =~ s/^\s+/ /; # Compress multiple leading white-space $filtered_search_term =~ s/\s\s/ /g; # Compress multiple imbeddded white-space $filtered_search_term =~ s/\s+$/ /; # Compress multiple trailing white-space $filtered_search_string.=" $before_this_search_term $filtered_search_term <$IN> $after_parenL$zone_name$after_parenR"; if ($zone_name =~ /\b(CLAIMS|DESCRIPTION|BACKGROUND)\b/i) { $fulltext_field = 1; # Upgrade the collection being searched to full text. } # Advance Perl's position inside our search string, to the next character beyond our # zone name(s). The pos() function works in tandem with Perl's /g pattern matching. $next_unconsumed = $zone_start_position + $zone_cursor + 1; pos($search_string)=$next_unconsumed; } # End of our while loop, looking for 's. # When there are no more 's (or there were none at all), append the rest of # our search_string, that is, the part we haven't consumed yet. if ($next_unconsumed < $end_of_search_string) { $filtered_search_string.=substr($search_string,$next_unconsumed); } $filtered_search_string =~ s/^\s+//; # Remove multiple leading white-space $filtered_search_string =~ s/\s+/ /g; # Compress multiple imbedded white-space $filtered_search_string =~ s/\s+$//; # Remove multiple trailing white-space return $filtered_search_string; } # Conversion of * in the KIND field. We filter specifically, any letter followed by *, # accepting multiple kind code search arguments separated by commas. For example, # We filter c* or A1,B2,C*,D4 # into (c,c?) into A1,B2,(C,C?),D4 sub filter_KI { my ($in_var_string) = @_; my $output=""; # If there's any kind of funny character at all in this search string, then don't # modify it at all. Just pass everything on untouched. if ($in_var_string !~ /^[\w *?,]*$/) { $output = $in_var_string; } else { # Else iterate across all comma-separated search foreach (split /,/, $in_var_string) { # terms, building up our filtered output. my $term; # Filter this term if it's letter*, else don't muck with it. if ( /^\s*([A-Za-z])\*\s*$/) { $term = "($1,$1?)"; } else { $term = $_; } # Rebuild the search string with the commas we're parsing on, in our foreach/split loop. if ($output eq "") { # First term? I.E. First time through? $output = $term; # If so, it doesn't get a comma. } else { # Else separate this term from earlier ones with a $output .= ",$term"; # comma. } } # End of foreach split ... } return $output; } # Filter input of IPC Class Search Term # Name: IC, MC, INDEXCLASS # (class and mainclass are not included here since there has to be some sane # way to avoid this filter, and the mistakes it makes) # # The format of IPC class numbers as rendered to the user and thus, in our input, is in # three parts, with the first part mandatory and the other two optional (but if the second # part exists, then so does the third). The parts are separated by a space and slash and # leading zeros in the second part are removed, eg G02F 1/31. # # In DB/2, we normalize the second part (if it exists), zero-padding it on the left to # three characters. We also remove the slash, leaving us with G02F 00131, for example. # Verity indexes this as two separate words. When we search, we give search arguments to # Verity like G02F 00131. Verity treats this as two words which must follow each other, # thus returns what we want. # # But before searching, Verity does it checking to see if this search might return too many # results and this is done on a word boundary. Thus if the user entered "G02F 1/*", we # filter this to "G02F 001*" and this will fail because of the 001* term (there are too # many patents indexed with words starting with 001). We've got to do something about that. # # Our solution is based upon the length of valid IPC classes. After padding the second # part to three characters, there are five possible lengths of valid IPC classes. # Length of 4 (e.g. G02F ) ipc_ttl table has 630 different 4-character IPC's. # Length of 10 (e.g. G02F 00200 ) ipc_ttl table has 61,147 different 10-character IPC's. # Length of 11 (e.g. G02F 001383 ) ipc_ttl table has 6,127 different 11-character IPC's. # Length of 12 (e.g. G02F 0011333 ) ipc_ttl table has 405 different 12-character IPC's. # Length of 13 (e.g. G02F 00113363) ipc_ttl table has 2 different 13-character IPC's. # If we filter anything, we will never pass along *'s to Verity in the first two parts. # Instead, we will always pad the first two parts with ?'s, to these fixed lengths. # # For example (these examples are not exhaustive or comprehensive), # # G* will get filtered to G??? # G0* will get filtered to G0?? # G02* will get filtered to G02? # # G02F will get filtered to just G02F, which will also pick up all the 10-13 # or G02F* character IPC classes, e.g. G02F 001/383 since Verity has indexed # or G02F * these as two words. # # G02F *1 will get filtered to (G02F ??1??G02F ??1???G02F ??1????) # G02F *12 will get filtered to (G02F ?12??G02F ?12???G02F ?12????) # # G02F 1 you might think G02F 1* would get filtered to the impossibly long # or G02F 1* (G02F 1????G02F 1?????G02F 1?????? # (G02F 01???G02F 01????G02F 01????? # (G02F 001??G02F 001???G02F 001????) # but due to the 1?* terms, this query is too complex for Verity, so just like # we did in the National Class filter (filter_NC below), we'll interpret this as # G02F 1/* and filter both to # (G02F 001??G02F 001???G02F 001????) # If the user doesn't like it, tough. He can put in his own multi-term search or # easier, to put some question marks or extra parenthesis in his search term, # which we then, won't filter at all. # # G02F 12 again, you might think G02F 12* would get filtered to some impossibly long thing, # or G02F 12* but to keep things consistent with the G02F 1* case above, we simply filter this to # (G02F 012??G02F 012???G02F 012????) # # G02F 1*2 will get filtered to # (G02F 1?2??G02F 1?2???G02F 1?2???? # G02F 012??G02F 012???G02F 012????) # # G02F 123/x will never match since the third part is always 2 or more characters long, # so we guess he meant 0x and filter to G02F 1230x. # G02F 123/* will get filtered to (G02F 123??G02F 123???G02F 123????) # # We also try to handle more complex input, for example, G02* 1*2/* will get filtered to # (G02? 1?2??G02? 1?2???G02? 1?2???? # G02? 012??G02? 012???G02? 012????) # We may wind up with *'s in the third part, for example, # G02F 3/*x will get filtered to G02F 003*x. # # This code used to include terms with nnn?????, but, since there are way too # many occurrences of 8-digit numbers being indexed (for patent numbers) for # such a search to work; and, because there are only two classes in the whole # space of IPC's which have an 8-digit group/subgroup cht removed them. # (For the curious, the two classes are: G02F 00113357 and G02F 00113363.) sub filter_IC { my ($in_var_string) = @_; my ($output, $part1, $part2, $part3, $length1, $length2, $length3, $part2b, $length2b, $term); $output = $in_var_string; # Default output to input in case some of these tests fail. # If there's any Verity keyword (and, or, not) or any kind of funny character in # this search string, then don't modify it at all. if ( ($in_var_string !~ /^[A-Z\d\/ \*,]*$/i) || ($in_var_string =~ /[ <]and[ >]/i ) || ($in_var_string =~ /[ <]or[ >]/i ) || ($in_var_string =~ /[ <]not[ >]/i ) ) { $output = $in_var_string; # Pass everything on untouched. } else { $output=""; # Reset output to null 'cause we're gonna # split our input (the search argument) into separate, comma-delimited terms, # and iterate across all search terms, rebuilding our filtered output. foreach (split /,/, $in_var_string) { # Parse out the three parts of the IPC Class search argument, # 0-4 characters, delimited by an optional space, with possible asterisks (*), # 0-3 characters, delimited by an optional slash (/) with possible asterisks (*), # and the rest. # If this term doesn't follow this syntax, then don't muck with it. if ( /^\s* ([\*a-zA-Z\d]{0,4}) \s* \ ? # First part \s* ([\*a-zA-Z\d]{0,3}) \s* \/? # Second part \s* ([\*a-zA-Z\d]* ) \s* $/x ) { # Third part $part1=$1; $part2=$2; $part3=$3; $length1 = length($part1); $length2 = length($part2); $length3 = length($part3); IC_CASE1: { # If part1 is null or "*", e.g. " 123/45" or "* 123/45", then we can # just drop it and let Verity search on simply part2 & part3, # unless a single * is the whole term. if ($length1 == 0 || $part1 eq "*" ) { # " 123/45" or "* 123/45" $part1 = "" if ($length2>0); # Just search on the rest. last IC_CASE1; } # The philosophy of whether or not to append an asterisk to any of the # parts or not, is to do so if he has not asked for a specific class. # A one-, two-, or three-character first part can never be a specific # class, so we wild card that here, but four-character first parts # that contain no wild-card characters, are specific. Remember, due # to the check for "funny" characters above, we don't see ?'s here. # [cht does not agree with this philosophy; she thinks we should be # adding wildcard ?'s only if the user already had an * in his search term] # Only *s wild-card characters are possible. if (length($part1) < 4 && index($part1,"*")<0) { $part1 = substr($part1 . "????",0,4); # Pad to length 4. last IC_CASE1; } # If he's used *'s in his first term, e.g. C* or C*D or *D, we're # going to be nice and expand them to C??? or C??D or ???D for him. # But it's too much to ask us to do anything with *0*. That 0 could # in any position, It's not reasonable for us to filter that into # (0??? ?0?? ??0? ???0). Likewise for *0**. So only # handle here, exactly one * in something that's 4 characters or less. # Perl Note: $part1=~tr/*// counts how many *'s there are in $part1. if ( $part1=~tr/*// == 1 && length($part1) <= 4 ) { substr($part1, index($part1,"*"), 1) = "?" x (5-length($part1)); last IC_CASE1; } } # End of IC_CASE1 block. # The problem with wild-carding the second part is, we also zero-pad it. # Thus if the guy asks for G02F *1/04, we need to make that G02F ??1/04. # That one is easy, as is the 2-character case, G02F *12/04 to G02F ?1204. # But asterisks on the right side are more difficult. You might guess # that G02F 1* should catch G02F 001*, G02F 01?*, and G02F 1??*, but # as mentioned above, Verity fails the 1????? search as too complex, so we # interpret G02F 1* as G02F 1/*, yielding # (G02F 001??G02F 001???G02F 001????) $length1 = length($part1); $part2b = ""; IC_CASE2: { if ( ($length2 == 0 || $part2 eq "*") && ($length3 == 0 || $part3 eq "*") ) { $part2=""; last IC_CASE2; } if ( $length2 == 0 || $part2 eq "*") { # /something or */something ==> ??? $part2 = "???"; last IC_CASE2; } if ( $length2 == 1 ) { # C ==> 00C $part2 = "00$part2"; last IC_CASE2; } if ( $part2 =~ /^\*(.)$/ ) { # *C ==> ??C $part2 = "??$1"; last IC_CASE2; } if ( $part2 =~ /^(.)\*$/ ) { # C* ==> 00C (intentionally not C?? or 0C?) $part2 = "00$1"; last IC_CASE2; } if ( $length2 == 2 ) { # CC ==> 0CC $part2 = "0$part2"; last IC_CASE2; } if ( $part2 =~ /^\*(..)$/ ) { # *CC ==> ?CC $part2 = "?$1"; last IC_CASE2; } if ( $part2 =~ /^(.)\*(.)$/ ) { # C*C ==> C?C or 0CC, since * could be null. $part2 = "$1?$2"; $part2b = "0$1$2"; last IC_CASE2; } if ( $part2 =~ /^(..)\*$/ ) { # CC* ==> 0CC (intentionally not CC? to keep $part2 = "0$1$2"; # consistent with the C* case above. last IC_CASE2; } } # End of IC_CASE2 block. # As a nice guy, we'll convert C09D 123/4 to C09D 123/04, else it'll never match. if ( length($part3) == 1 && $part3 ne "*" ) { $part3 = "0$part3" } $length1 = length($part1); # Recompute current lengths. $length2 = length($part2); $length2b = length($part2b); # If length of $part3 is less than 2, it must be null or "*" (used below). $length3 = length($part3); # If there are any holes in my logic here, at least default to something reasonable. $term = $part1 . ("$part2$part3" ? " $part2$part3" : ""); IC_CASE3: { # For the C* or C0* or C09* or C09D or C09* / or C09D /* or C09D */* cases # (i.e. a null part2 & part3), all we need do return is $part1. if ( $length3 < 2 && ($length2 == 0 || $part2 eq "???") ) { $term = $part1; # If just * or " /", term would be blank. Oh, well. last IC_CASE3; } # Any $part1, $part2 not C*C, and null or * part3. e.g. C09D 123 or C09D 123/*. if ( $length3 < 2 && $length2b == 0 ) { $term = "($part1 $part2??$part1 $part2???$part1 $part2????)"; last IC_CASE3; } # E.G. C09D C*C or C09D C*C/* if ( $length3 < 2 && $length2b > 0) { $term = "($part1 $part2??$part1 $part2???$part1 $part2????"; $term .= "$part1 $part2b??$part1 $part2b???$part1 $part2b????)"; last IC_CASE3; } # E.G. C09D CCC/xx or C09D CC*/xx or C09D CCC/xxx or others if ( $length3 > 1 && $part3=~tr/*?// == 0 && $length2b == 0) { $term = "$part1 $part2$part3"; last IC_CASE3; } # E.G. C09D C*C/xx or C09D C*C/xxx if ( $length3 > 1 && $part3=~tr/*?// == 0 ) { $term = "$part1 $part2$part3$part1 $part2b$part3"; last IC_CASE3; } # All others C09D C*C cases, e.g. C09D C*C/12*3 if ( $length2b > 0 ) { $term = "$part1 $part2$part3$part1 $part2b$part3"; last IC_CASE3; } } # End of IC_CASE3 block. } else { # Else this term didn't match our "don't muck with it" match. $term = $_; } # Rebuild the search string with the commas we're parsing on, in our foreach/split loop. if ($output eq "") { # First term? I.E. First time through? $output = $term; # If so, it doesn't get a comma } else { # Else separate this term from earlier ones with a $output = $output . "," . $term; # comma. Note that we remove trailing spaces after } # the comma. EG "A01, B02" => "A01,B02" } # End of foreach split ... # For the National Patent Offices, uncomment this line (4 of 4). # $output =~ s/ //g; } return $output; } # filter_IC # Filter input of US National Class # Name: filter_NC # Example Input: 084/174; 084/423.R, 084/DIG.022, D5, 084DIG23, AB* # Output: 084174, 084423R, 084DIG022, (0D5???*,D05???*), 084DIG23, (AB????*,0AB???*) # Note: This filter should not be used for national GB classes, the UKPO # version of patsearch will only use this filter if only the US # collection is being searched -sjb sub filter_NC { my ($in_var_string) = @_; my ($output, $ls1, $ts1, $ls2, $ts2, $slash, $dot, $length, $first2chars, $part1, $part2, $part3, $part1b, $part2b, $term); $output = $in_var_string; # Default output to input in case some of these tests fail. # If there's any Verity keyword (and, or, not) or any kind of funny character in this # search string, don't modify is at all. The idea is, if this guy knows what he's # doing by specifying a Verity special symbol (and ? ) in his search, we don't # want to get in his way (Carol's suggestion), or he's given us some junk, we'll # rely on Verity to fail on this search. if ( $in_var_string !~ /^[a-zA-Z\d\/\.,; \*]*$/ || ($in_var_string =~ / and /i ) || ($in_var_string =~ / or /i ) || ($in_var_string =~ / not /i ) ) { $output = $in_var_string; # Pass everything on untouched. } else { $in_var_string =~ tr?\*\*//\.\.??s; # Also eliminate repeated special characters, eg ** # We want to accept multiple national class search arguments, which in the Verity # search language, are separated by commas or spaces (e.g. 084/423,432/436). # An improvement suggested by CHT is, because the detail page shows national # class separated by semicolons, somebody might "swipe" (cut & paste) that # semicolon-separated list (e.g. 084/423;084/432) and use it. Thus, here we $in_var_string =~ s/;/,/g; # translate all semicolons into commas. $output=""; # Reset output to null 'cause we're gonna # split our input (the search argument) into separate, comma-delimited terms, # and iterate across all search terms, rebuilding our filtered output. foreach (split /,/, $in_var_string) { # Parse out the three parts of the National Class search argument, # 0-3 characters, delimited by an optional slash (/) with possible asterisks (*), # 0-3 characters, delimited by an optional comma (.) with possible asterisks (*), # and the rest. # If this term doesn't follow this syntax, then don't muck with it. if ( /^(\*?) ([a-zA-Z\d]{0,3}) (\*?) (\/?) # First part (\*?) ([a-zA-Z\d]{0,3}) (\*?) (\.?) # Second part ([\*a-zA-Z\d]*) $/x ) { # Third part $ls1=($1 eq "*"); # True if there was a leading star on the first part. $part1=$2; $ts1=($3 eq "*"); # True if there was a trailing star on the first part. $slash=($4 eq "/"); # True if we found a slash. $ls2=($5 eq "*"); # True if there was a leading star on the second part. $part2=$6; $ts2=($7 eq "*"); # True if there was a trailing star on the second part. $dot=($8 eq "."); # True if we found a period. $part3=$9; $length = length($part1); $first2chars = substr($part1,0,2); $part1b = ""; # Interpret part 1. In the comments for these cases below, # l = Any letter [A-Z] # d = Any digit [0-9] # c = Any character [A-Z0-9] # D = The letter D or d # 0 = The digit zero # () = Optional part NC_CASE1: { # If nothing was specified for part1, make it ???. if ( $length == 0 ) { # / or . or * or */ or *. ==> ??? $part1 = "???"; last NC_CASE1; } if ( $length == 1 && $ls1 && !$ts1) { # *C/ ==> ??C $part1 = "??$part1"; last NC_CASE1; } # C* is a special case. One could try to interpret this as a search for # C?????* or 0C????* or 00C???*, but the C?????* guarantees that the # search will fail since there are many words starting with any single # character you choose. So we're going to interpret this as the most # restrictive, yet still plausible and guaranteed to at least work as # far as Verity is concerned, 00C???*. Again, if the user doesn't like # the way we interpreted their search, they can use a different syntax. # # So C* or C*/ is handled the same as C or C/, ignoring the trailing *. # This may be counter-intuitive, but heck, searching for every National # Class starting with a given single character, doesn't make sense. # Or worse, *C*. # # If the intent was that 3*/ would find 030-039/* and/or 300-399/*, # you would need to specify 03?/ or 3??/. if ( $length == 1 ) { # C or C* or *C* ==> 00C $part1 = "00$part1"; last NC_CASE1; } # [Dd][0-9] (e.g. D5) is a very special case because these national classes # are in the DB/2 database and Verity index in two different ways, 0D5 or D05. # The simpliest case, D5, must be converted to 2 terms, D05 or 0D5. Likewise, # 0D has to be converted to 0D? or D0?. But to really complicate things, # D5*/ might be expected to include D50-D59/ as well as D5, so that would # have to be converted to 0D5???* or D05???* or D5????*. Aaaaargh! No, we # only handle the simpliest two cases now. if ( $length == 2 && $first2chars=~"0([Dd])" && !$ls1 && !$ts1 ) { # 0D => 0D? or D0? $part1 = "0$1?"; $part1b = "${1}0?"; last NC_CASE1; } if ( $length == 2 && $first2chars=~"([Dd])([0-9])" && !$ls1 && !$ts1 ) { # D5 => 0D5 or D05 $part1 = "0$1$2"; $part1b = "${1}0$2"; last NC_CASE1; } if ( $length == 2 && $first2chars=~"([Dd])([0-9])" && $ls1 && !$ts1 ) { # *D5 => ?D5 or D05 /..(.)/; $part1 = "?$1$2"; $part1b = "${1}0$2"; last NC_CASE1; } if ( $length == 2 && $ls1 && !$ts1 ) { # *CC ==> ?CC $part1 = "?$part1"; last NC_CASE1; } if ( $length == 2 && !$ls1 && $ts1 ) { # CC* ==> CC? or 0CC $part1b = "0$part1"; $part1 = "$part1?"; last NC_CASE1; } if ( $length == 2 ) { # CC or *CC* ==> 0CC $part1 = "0$part1"; last NC_CASE1; } if ( $length == 3 && $first2chars=~"0([Dd])" && !$ls1 && !$ts1 ) { # 0D5 => 0D5 or D05 $part1b = "${1}0" . substr($part1,2,1); last NC_CASE1; } if ( $length == 3 && $first2chars=~"([Dd])0" && !$ls1 && !$ts1 ) { # D05 => D05 or 0D5 $part1b = "0$1" . substr($part1,2,1); last NC_CASE1; } } # End of NC_CASE1 block. $length = length($part2); $part2b = ""; NC_CASE2: { # If nothing was specified for part2, make it ???. if ( $length == 0 ) { # /. or . or * or */ or *. ==> ??? $part2 = "???"; last NC_CASE2; } if ( $length == 1 && $ls2 && !$ts2) { # *C ==> ??C $part2 = "??$part2"; last NC_CASE2; } # A note on the C* case: We expand C* to 00C* only, intentionally NOT expanding # it to include C??* and 0C?* for two reasons: First, we don't expand C* like # that for our part1 above, so to be consistent, we don't for part2 as well. # Second, this is how we expand part2 in filter_IC so although the IC reason # does not apply here, to be consistent, we do the same expansion here. if ( $length == 1 ) { # C or C* or *C* ==> 00C $part2 = "00$part2"; last NC_CASE2; } if ( $length == 2 && $ls2 && !$ts2 ) { # *CC ==> ?CC $part2 = "?$part2"; last NC_CASE2; } if ( $length == 2 && !$ls2 && $ts2 ) { # CC* ==> CC? or 0CC $part2b = "0$part2"; $part2 = "$part2?"; last NC_CASE2; } if ( $length == 2 ) { # CC or *CC* ==> 0CC $part2 = "0$part2"; last NC_CASE2; } } # End of NC_CASE2 block. # The philosophy of whether or not to append an asterisk if part3 wasn't specified, # is to do so if he has not asked for a specific class, which is easily identified # by checking for wild card characters (?) in any of the first two parts. # Thus, 123, which is interpreted as 123/*, gets an asterisk (123???*), but # 12/34 is a specific class, so doesn't (012/034). # A later fix modified this statement to pick up the 1234* and 123456* cases, # where our part2 was "004" and "456" respectively (note no ?'s), so we were # dropping the trailing * (oops). So now we include the check for $ts2. if ($part3 eq "" && (index($part1.$part1b.$part2.$part2b,"?")>=0 || $ts2) ) { $part3="*" } if ($part1b eq "" && $part2b eq "") { $term = sprintf '%03s%03s%s', $part1, $part2, $part3; } elsif ($part1b ne "" && $part2b eq "") { $term = sprintf '(%03s%03s%s,%03s%03s%s)', $part1, $part2, $part3, $part1b, $part2, $part3; } elsif ($part1b eq "" && $part2b ne "") { $term = sprintf '(%03s%03s%s,%03s%03s%s)', $part1, $part2, $part3, $part1, $part2b, $part3; } else { $term = sprintf '(%03s%03s%s,%03s%03s%s,%03s%03s%s,%03s%03s%s)', $part1, $part2, $part3, $part1b, $part2, $part3, $part1, $part2b, $part3, $part1b, $part2b, $part3; } } else { # Else this term didn't match our "don't muck with it" match. $term = $_; } # Rebuild the search string with the commas we're parsing on, in our foreach/split loop. # Recombine all but the first term, with commas. Note that we remove trailing spaces # (if any) after the comma, EG "A/1, B/2" becomes "00A001,00B002". $output .= ($output ne "" ? "," : "").$term; } # End of foreach split ... } # End of else don't pass everything on untouched return $output; } # Filter "Unadorned" patent numbers (UP), which is (mostly) the full patent # number without the country code, kind, or leading zeros (eg 5551212). # # This simple rule gets tweaked a bit when you talk about non-utility US # patents (eg USRE028481__) or Italian patents, which have a 2-character # city code (eg ITMI031377A0). The upnumber zone has the imbedded zeros # indexed (eg RE028481 and MI031377). # # Another tweak is for US Applications, Japanese patents, and 2004 and beyond # WO patents, which get two forms indexed (eg, 21001149 and 20010001149) in # the upnumber zone. # # One often finds publication numbers with slashes in them. We try to allow # these as input, filtering to what they should be. The formats we handle # are either 2- or 4-digit years, a slash, and either 6- or 7-digit numbers. # # We Filter This => Into This # ============== ================================= # US2001/1149 (200100011491001149101149 # US5551212A1 5551212 # USPP3987 PP003987 # PP3987 PP003987 # ITMI26A0 MI000026 # ITMI26 MI000026 (Italian patent with City Code. ) # MI123456 MI123456 (Same thing, but some 2-letter, but ) # US123456 123456 (prefixes are taken as country codes) # DE1234987 1234987 (Because 7-digits, not Italian city.) # 2001/1149 (200100011491001149101149) # 04/123456 (200401234564123456) # 04/1234567 20041234567 # # One last note on this filter. Like all the other filters, we do handle # multiple terms, eg (US5551212A1,USPP3987,US2001/1149)UP, but unlike all # the other filters, we also accept spaces as term delimiters. For example, # (uspp3987 d1234 itmi26 mi26 de000026 04/1234567)UP # becomes (pp003987,d0001234,mi000026,mi000026,26,20041234567)UP # Note how the spaces in our input, becomes commas!! It's only this UP filter # that can get away with replacing spaces with commas, because the UPNUMBER # zone is always single words, unlike PN or IC where you might have C09D 123. # It never makes sense to search for one UP followed by another. sub filter_UP { my($in_var_string) = @_; # If there's any kind of "funny" character in this search string, don't modify it at all. return $in_var_string if (($in_var_string !~ /^[a-zA-Z\d\/_\?\*,\ ]*$/) || ($in_var_string =~ / and /i ) || ($in_var_string =~ / or /i ) || ($in_var_string =~ / not /i ) ); my $output = ""; my @pns = split(/[\ |,]/, $in_var_string); foreach my $pns (@pns) { if ($pns ne "") { my ($term_year, $term_num, $term_type, $term1, $term2, $term3); my $term = $pns; if ($pns =~ /\*/) { } elsif ($pns =~ /[A-Z]{0,2}((?:[0-9\?]{2})?[0-9\?]{2})\/0*([\d\?]{1,7})[A-Z_\?]*[0-9]*/i) { # Slash form with yyyy/ or yy/, and either with or without the CC and KI. # Drop the leading CC, the middle slash, & trailing kind, and pad the # middle digits to 5, 6, or 7 characters. # if (length($1)==2 && $1>60) {$term_year="19$1"} elsif (length($1)==2 ) {$term_year="20$1"} else {$term_year="$1"} $term_num=$2; if ($term_num<= 99999) { # 1-5 Digits (yy/d yy/ddddd yyyy/d yyyy/ddddd). # Could be US App or 5-digit WO or 6-digit WO. Filter to # "(yyyy00nnnnnyy0nnnnnyynnnnn)" with leading zeros removed. $term1="$term_year" . "0" x (7 - length($term_num)) . "$term_num"; $term2=substr($term_year,2) . "0" x (6 - length($term_num)) . $term_num; $term2=~s/^0*(.)/$1/; # Strip all leading zeros. $term3=substr($term_year,2) . "0" x (5 - length($term_num)) . $term_num; $term3=~s/^0*(.)/$1/; # Strip all leading zeros. $term = "($term1$term2$term3)"; } elsif ($term_num<=999999) { # 6 Digits (yy/dddddd or yyyy/dddddd). # Could be US App or 6-digit WO. Filter to "(yyyy0ddddddyydddddd)". $term1 = "${term_year}0$term_num"; $term2 = substr($term_year,2) . $term_num; $term2=~s/^0*(.)/$1/; # Strip all leading zeros. $term = "($term1$term2)"; } else { # 7 Digits (yy/ddddddd or yyyy/ddddddd). $term = "$term_year$term_num"; # Must be US App. Filter to yyyyddddddd. } } elsif ($pns =~ /([I\?][T\?])([A-Z]{1,2})0*([\d\?]{1,11})([A-Z_\?]*[0-9]*)/ix) { # Some Italian pno12 and pno10 forms need an 8-char UP, for example, # ITMI26A0 or ITMI26 becomes MI000026, but don't touch IT89000210. # That gets handled below. $term = $3; $term = $2 . "0" x (8 - length($2) - length($term)) . $term; } elsif ($pns =~ /^([A-Z?]{2})([\d\?]{1,6})$/i) { # Catch good Italian UPs, eg LI000002, which is a perfectly good UP for # ITLI000002U1, but allow US, DE, EP, JP, and WO cases slip through 'cause # I know these aren't Italian city codes (ie ain't no ITUS*, ITDE* ITEP*, etc). my $city_code=$1; $term_num=$2; if ($city_code=~/(US|DE|EP|JP|WO)/i) { # Does city code look like a country code? $term = $term_num; # If so, filter to nnn with no leading zeros. $term=~s/^0*(.)/$1/; } else { # Filter apparent city codes to CC00nnnn. $term = $city_code . "0" x (6 - length($term_num)) . $term_num; } } elsif ($pns =~ /([A-Z\?]{0,2})([A-Z\?]{0,2})0*([\d\?]{1,11})([A-Z_\?]*[0-9]*)/i) { # Remove unwanted/unnecessary, leading or imbedded zeros. Again, we allow & drop # leading CC and trailing kinds, so any of US00001244__, US00001244, US1244__, # US1244, 00001244__, 00001244, 1244__ all get filtered here to just 1244. # Also, our IT89000210 example from above gets filtered here to 89000210. # # We also special case the more common, US non-utility types (RE, PP, & D). Any # of USPP000003987__, PP0003987__, PP0000003987 all get filtered into PP003987. # (Thank goodness we don't have any Reunion patents. Never heard of Reunion? # It's a French-owned island off of Africa, whose country code is RE.) $term_num = $3; if (($2 eq ""?$1:$2) =~ /^(RE|PP|D)$/i) { # Is this a US patent type? $term = $1 . "0" x (8 - length($1) - length($term_num)) . $term_num; } else { # Probably is a country code, then. $term = $term_num; # Drop country code & leading zeros. $term=~s/^0*(.)/$1/; } } $output .= ($output ne "" ? "," : "").$term; } } return $output; } # Normalize patent numbers. # # We Filter This => Into This # ================ ================================================== # US2003/0186395A1 (US20030186395A1US003186395A1) # ??2003/100000A1 (??20030100000A1??003100000A1) # wo2004/17056A1 (wo20040017056A1wo00417056A1wo04017056A1) # WO03/123456A2 WO03123456A2 # WO03/12345A (WO00312345A*wo03012345A*) # wo03/000001234 (wo00301234*wo03001234*) # # A note on searching for US Granted patents PN. The double-underscore # kind, __, is considered by Verity to be punctuation and is not indexed, and # when specified in a search term, is dropped. So don't get confused when # searching for US05551212pn finds something, but wo03012345pn, does not. # Normally, you always need to provide a kind (eg wo03012345A1pn). # sub filter_PN { my($in_var_string) = @_; # If there's any kind of "funny" character in this search string, don't modify it at all. return $in_var_string if (($in_var_string !~ /^[a-zA-Z\d\/_\?\*,\ ]*$/) || ($in_var_string =~ / and /i ) || ($in_var_string =~ / or /i ) || ($in_var_string =~ / not /i ) ); my $output = ""; my @pns = split(/,+/, $in_var_string); foreach my $pns (@pns) { if ($pns ne "") { # (so we ignore a trailing comma after the last number) my ($term_year, $term_num, $term_type, $term_CC); my $term = $pns; if ($pns =~ /\*/) { # If there's an * in this search term, skip it. # US references, US Application images, or 2004 WO images show the publication # number like CC YYYY/nnnnn KK. If that's what we got, we'll filter it to # the US Application format of CCYYYY00nnnnnKK # the old WO format of CC0YYnnnnnKK (unless nnnnn>5 digits) # the new WO format of CCYY0nnnnnKK (unless nnnnn>6 digits) } elsif ($pns =~ /([A-Z\?]{2})([0-9\?]{4})\/([\d\?]{1,11})([A-Z_\?]*[0-9]*)/i) { my ($term_yy); $term_CC=$1; # EG, WO $term_year=$2; # and yyyy $term_num=$3+0; # and 1234 (The +0 strips leading zeros) $term_type=$4; # and A1 $term_yy=substr($term_year,2); $term_type.="*" if (length($term_type)<2); # If A, now A*. If null, now *. if (length($term_num)<6) { # Numeric portion is 1-5 digits long. $term = "$term_CC$term_year" . "0" x (7 - length($term_num)) . "$term_num$term_type" . "${term_CC}0$term_yy" . "0" x (5 - length($term_num)) . "$term_num$term_type" . "$term_CC$term_yy" . "0" x (6 - length($term_num)) . "$term_num$term_type"; } elsif (length($term_num)<7) { # Numeric portion is 6 digits long. $term = "$term_CC$term_year" . "0" x (7 - length($term_num)) . "$term_num$term_type" . "${term_CC}0$term_yy" . "0" x (5 - length($term_num)) . "$term_num$term_type"; } # Else Numeric portion is 7+ digits long. Don't know what this is, so don't filter. # On the other hand, sometimes WO images show only two digits in the year. # But since those 2 yy digits could be in two different places, we need # to filter from WOyy/1234A1 to (WO0yy01234A1WOyy001234A1). # (Is it inconsistent that we don't allow US Apps to have 2-digit years? Tough.) } elsif ($pns =~ /([A-Z\?]{2})([0-9\?]{2})\/([\d\?]{1,11})([A-Z_\?]*[0-9]*)/i) { $term_CC=$1; # EG, WO $term_year=$2; # and yy (and i don't care if it's <2000 or not) $term_num=$3+0; # and 1234 (The +0 strips leading zeros) $term_type=$4; # and A1 $term_type.="*" if (length($term_type)<2); # If A, now A*. If null, now *. if ($term_num>99999) { # Already 6 digits. Don't have to pad. if ($term_num<=999999) { # But not greater than 6 digits. $term = "$term_CC$term_year$term_num$term_type"; } # Else the Numeric portion is too long. Don't know what this is, so don't filter. } else { # 5-digits or less. Need to pad into both forms. my ($term1, $term2); $term1 = "${term_CC}0$term_year" . "0" x (5 - length($term_num)) . "$term_num$term_type"; $term2 = "$term_CC$term_year" . "0" x (6 - length($term_num)) . "$term_num$term_type"; $term = "($term1$term2)"; } # This filters USD123 or USD123__ or USPP123_ or WO123A # into USD0000123* or USD0000123__ or USPP000123_* or WO00000123A* } elsif ($pns =~ /([A-Z\?]{2})([A-Z\?]{0,2})([\d\?]{1,11})([A-Z_\?]*[0-9]*)/ix) { $term = $1 . $2 . "0" x (8 - length($2) - length($3)) . $3. $4. (length($4)==2?"":"*"); } $output .= ($output ne "" ? "," : "").$term; } } return $output; } # ----------------------------------------------------------------------------------- # Function to ensure that only one version of a collection pair is present, in the # set being searched, and that it's the fulltext version if we have a fulltext # zone mentioned. It takes a pair of collections names, e.g. "epa","eapft" as # input, and returns a 1 if either of that pair should be searched. # ----------------------------------------------------------------------------------- sub coll_test_and_upgrade { my ($plain, $ft) = @_; my $ft_index = $coll_index{$ft}; my $plain_index = $coll_index{$plain}; my $have_it = 0; if ( $plain_index >= 0 ) { # plain version $have_it = 1; if ( $ft_index >= 0 ) { # fulltext version too $coll_index{$plain} = -1; # remove plain one } else { # only plain, upgrade if needed if ($fulltext_field) { $coll_index{$plain} = -1; $coll_index{$ft} = 1; } } } elsif ( $ft_index >= 0 ) { # only fulltext version $have_it = 1; } return $have_it; } # End of coll_test_and_upgrade # "redir" handling of output placed in a db2 hitlist (taken from dofilter) # (There used to be inactive SSI code in here. cht removed it 6/30/00. # If anyone wants to see it, ask cht for an archive copy.) sub redirOutputFilter { # (The following httppost command was done above, for all modes.) # system "$httppost -g \"$url_to_call?\" $qsfilename $response_html >$response_http;" ; my $PQ_REDIR_URL = ""; open(IN, $response_html) ; while() { $PQ_REDIR_URL .= $_; } close(IN); # Done with all temp files unlink ($qsfilename, $response_html, $response_http) unless ($fcgi_mode || $debug); # Eat HTTP Header: lines - we'll create our own $PQ_REDIR_URL =~ s/HTTP Header: .*\n//g; # Force flush after every write select(STDOUT); $| = 1; # Tease-out the Net.data URL from the HTML redirect $PQ_REDIR_URL =~ s/.*URL=(.*)\".*/$1/g; chomp $PQ_REDIR_URL; # Eat any newlines if ($PQ_REDIR_URL eq "") { my $get_stdout=get("http://localhost/messages/500error.html"); die "Call to get() for 500error.html failed.\n" if (!defined($get_stdout)); $get_stdout=~s/^HTTP Header:.*\n//g; print STDOUT "Content-type: text/html\n\n"; print STDOUT $get_stdout; } else { # Pass language if passed to us originally # $PQ_REDIR_URL .= "$langarg"; breaks net commerce print STDOUT "Final redir url is: $PQ_REDIR_URL\n" if ($debug); # cluster.d2w needs cookie info to make time tracking call $PQ_REDIR_URL .= "&HTTP_COOKIE=" . url_encode($cookie); # Get a Net.Data-generated hitlist (or a cluster ...), eating any HTTP headers. # (We depend on this url being shorter than 2048 -- there is no query in it, just # a pool table and database and timestamp.) my $get_stdout=get("$PQ_REDIR_URL"); if (defined($get_stdout)) { $get_stdout=~s/^HTTP Header:.*\n//g; print STDOUT "Content-type: text/html\n\n"; print STDOUT $get_stdout; } else { # Not supposed to happen!! warn "$patsearch: call to get() for $PQ_REDIR_URL failed.\n"; OutputUnauthorized(); # May not be right, but, better than blank! } } } # redirOutputFilter #------- Produce a site header ------ # Input: Text for the title tag # Output: none # sub OutputHeader { my($in_var, $type) = @_; my $html = $template{'header'}; my $head = "\n\n$in_var\n"; if ($type eq "error") { $head .= qq!\n!; } else { $head .=< EOF ; } # If Hoovers set -l=hoovers, cookie the user for NC revenue tracking if ($lval eq "hoovers") { $head .= qq!\n"; } print STDOUT "$head"; # Since server side include processing does not work in our world, we # need to emulate the following: # $head .= qq!!; # This is made outrageously complicated by the fact that there are server # side variables which are set in this header, from environment variables # as well as directly, which are then either used later in the # header/trailer/ad world, or, which we need the values of (like colors). # Anyway, we get our colors from the request environment, or failing that, from the # header file, or failing that, we use our defaults. # Note that the whole BGCOLOR phrase is in the environment, or the header # variables (and it is conceivable that someone could slip in other # phrases someday, so, we are going to have to follow along with # this "architecture"). # (We grabbed the header default colors when we initially read the header file.) # So, we massage the header file template here, processing, substituting for the # netdata variable $(nchostu) the current real value. $body_color = $ENV{'BODY_COLOR'}; $body_color = $header_body_color unless $body_color; $left_color = $ENV{'LEFT_COLOR'}; $left_color = $header_left_color unless $left_color; $showsort_color = $ENV{'SHOWSORT_COLOR'}; $showsort_color = $header_showsort_color unless $showsort_color; $html =~ s/\$\(nchostu\)/$nchost/g; print STDOUT $html; $head = qq!\n!; $head .= "
\n"; print STDOUT "$head"; $info{'bodyColor'}=$body_color; } # OutputHeader #------- Produce a site trailer ------ # Input: none # Output: none # sub OutputTrailer { print STDOUT $template{'trailer'}; } # OutputTrailer #------- Produce an unauthorized page ------ # Input: none # Output: none # sub OutputUnauthorized { my $ou_message = "You do not seem to be authorized for this operation"; my ($in_arg) = @_; if ($in_arg) { $ou_message = $in_arg; } select(STDOUT); $| = 1; print STDOUT "Content-type: text/html\n\n"; OutputHeader("Not Authorized","error"); print STDOUT $debugfromenv if ($debug); my $tail = "

"; $tail .= qq!\n!; $tail .= qq! \n!; $tail .= "
 \n!; $tail .= "
"; $tail .= $ou_message; $tail .= "
\n"; $tail .= "
To continue, please choose one of the IP Search items "; $tail .= "or use your browser's Back button.\n"; $tail .= "

\n"; $tail .= $txt_contact_support; $tail .= "

\n"; $tail .= qq!

 
\n"; $tail .= "
\n"; # end the table started by OutputHeader() print STDOUT $tail; OutputTrailer(); } # OutputUnauthorized #------- Produce a 401 return page ------ # Input: none # Output: none # sub OutputXML401 { select(STDOUT); $| = 1; print STDOUT "Content-type: text/xml\n\n"; print STDOUT "401\n" } # OutputXML401 #------- Put commas into a number ------ # Input: a string with the number # Output: a string with the commified number # sub commify { my $out = reverse $_[0]; $out =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $out; } # commify #------- Turn a canonical ICL into a readable one ------ # Input: the db2 form of an icl, usually A99A 99999 # Output: the readable form, e.g. A99A 999/99 # sub FormatICL { my($out) = @_; my ($p1, $p2, $p3); if ($out =~ /(.... )(...)(.*)/) { $p1 = $1; $p2 = $2; $p3 = $3; $p2 =~ /0?0?(.*)/; $out = "$p1$1/$p3"; } return $out; } # FormatICL #------- Environment needed to connect to Delphion db2 ------ sub setupDB2 { if (!defined($ENV{DB2INSTANCE})) { # DB2INSTANCE should always be defined, but use Southbury value, just in case $ENV{DB2INSTANCE}="caeadmin"; } $ENV{DB2DIR}="/home/inst1/sqllib"; $ENV{INSTHOME}="/home/inst1"; $ENV{PATH}.=":/home/inst1/sqllib/bin:/home/inst1/sqllib/adm:/home/inst1/sqllib/misc"; $ENV{LD_LIBRARY_PATH}.=":/home/inst1/sqllib/lib"; } #------- Subroutine to connect to a Delphion db and prepare our stmts ------ sub connectDB2 { $db2h = DBI->connect("dbi:DB2:$PQDATABASE", $PQUSERID, $PQPW, 'DB2'); if ($db2h) { my $db2stmt = qq{ select image_pages, drawing from inst1.imag where patn=? and image_pages is not null for fetch only}; $db2sth = $db2h->prepare($db2stmt); warn "Could not prepare the imag select\n", $db2h->errstr if (!$db2sth); my $statstmt = qq{ select attr_value, attr_name from inst1.ipndb_attr for fetch only }; $statsth = $db2h->prepare($statstmt); warn "Could not prepare the dbstatus select\n", $db2h->errstr if (!$statsth); my $db2dsrc = "select patn, datasrc from inst1.main where patn in "; my $db2imag = "select patn, image_pages, drawing, cdlabel from inst1.imag where image_pages is not null and patn in "; my $db2age = "select (days(current date) - days(max(isd))) from inst1.main where patn=? for fetch only"; my $db2dwtitle = "select f.patn, f.pan, m.title from inst1.dw_family f, inst1.dw_main m where f.pan = m.pan and f.patn in "; my $db2invt = "select patn, datasrc, nam, num from inst1.invt where (patn, datasrc) in "; my $idx = 0; my ($liststr, $list2str); while ($idx < QueryInListCt) {$liststr .= "?,"; $idx++;} chop($liststr); $idx = 0; while ($idx < QueryInListCt) {$list2str .= "((CAST (? AS CHAR(12))), (CAST (? AS CHAR(3)))),"; $idx++;} chop($list2str); $db2dsrc .= "(" . $liststr . ") order by patn, datasrc for fetch only"; $db2imag .= "(" . $liststr . ") order by patn for fetch only"; $db2dwtitle .= "(" . $liststr . ") order by f.patn for fetch only"; $db2invt .= "(values" . $list2str . ") order by patn, datasrc, num for fetch only"; $db2dsrch = $db2h->prepare($db2dsrc); warn "Could not prepare the datasrc select\n", $db2h->errstr if (!$db2dsrch); $db2imagh = $db2h->prepare($db2imag); warn "Could not prepare the image_pages select\n", $db2h->errstr if (!$db2imagh); $db2ageh = $db2h->prepare($db2age); warn "Could not prepare the age select\n", $db2h->errstr if (!$db2ageh); $db2dwtitleh = $db2h->prepare($db2dwtitle); warn "Could not prepare the age select\n", $db2h->errstr if (!$db2dwtitleh); $db2invth = $db2h->prepare($db2invt); warn "Could not prepare the inventor select\n", $db2h->errstr if (!$db2invth); } else { warn "Patsearch could not db2 connect to dbi:DB2:$PQDATABASE, $PQUSERID, $PQPW, 'DB2'\n", $DBI::errstr; } } # connectDB2 sub getDatasrc(\%) { my $phash = $_[0]; my ($rowct, $parmct) = (0,0); my $phash_size = scalar keys %$phash; foreach my $patn (keys %$phash) { $rowct++; $parmct++; unless($db2dsrch->bind_param($parmct, $patn)) {warn "bind_param failure for data source query, patn=$patn"; return;} if ($parmct == QueryInListCt || $rowct == $phash_size) { for (my $i = 0, my $itct = QueryInListCt - $parmct; $i < $itct; $i++) { unless($db2dsrch->bind_param($parmct + $i + 1, "XXX")) {warn "bind_param failure for data source query, patn=XXX"; return;} } $parmct = 0; unless($db2dsrch->execute()) {warn "execute failure for data source query " . $db2h->errstr; return;} my ($fetch_patn, $fetch_datasrc, $curr_patn, $curr_datasrc); unless($db2dsrch->bind_columns(\$fetch_patn, \$fetch_datasrc)) {warn "bind_columns failed for getDatasrc"; return;} while($db2dsrch->fetch()) { trim($fetch_patn, $fetch_datasrc); if ($curr_patn ne $fetch_patn) { if ($curr_patn) { $$phash{$curr_patn}[PnhashDatasrc] = $curr_datasrc; } $curr_patn = $fetch_patn; $curr_datasrc = $fetch_datasrc; } elsif ($fetch_datasrc ne "IFD") { $curr_datasrc = $fetch_datasrc; } } if ($fetch_patn) { $$phash{$curr_patn}[PnhashDatasrc] = $curr_datasrc; } } } } # getDatasrc sub getImagePages(\%) { my $phash = $_[0]; my ($rowct, $parmct) = (0,0); my $phash_size = scalar keys %$phash; foreach my $patn (keys %$phash) { $rowct++; $parmct++; $$phash{$patn}[PnhashQueryTps] = 1; unless($db2imagh->bind_param($parmct, $patn)) {warn "bind_param failure for image page query, patn=$patn"; return;} if ($parmct == QueryInListCt || $rowct == $phash_size) { for (my $i = 0, my $itct = QueryInListCt - $parmct; $i < $itct; $i++) { unless($db2imagh->bind_param($parmct + $i + 1, "XXX")) {warn "bind_param failure for image page query, patn=XXX"; return;} } $parmct = 0; unless($db2imagh->execute()) {warn "execute failure for image page query " . $db2h->errstr; return;} my ($fetch_patn, $fetch_image_pages, $fetch_drawing, $curr_patn, $cdlabel); unless($db2imagh->bind_columns(\$fetch_patn, \$fetch_image_pages, \$fetch_drawing, \$cdlabel)) {warn "bind_columns failed for getImagePages"; return;} while($db2imagh->fetch()) { trim($fetch_patn); trim($cdlabel); if ($curr_patn ne $fetch_patn) { if ($cdlabel) { # This treats drawing=0 like drawing=NULL if ($fetch_image_pages) {$$phash{$fetch_patn}[PnhashImagePages] = $fetch_image_pages;} if ($fetch_drawing) {$$phash{$fetch_patn}[PnhashDrawing] = $fetch_drawing;} $$phash{$fetch_patn}[PnhashQueryTps] = 0; if ($cdlabel eq 'TPS') { $$phash{$fetch_patn}[PnhashIsOnTps] = 1; } } $curr_patn = $fetch_patn; } } } } } # getImagePages sub getInventor(\%) { # Must call getDatasrc before this my $phash = $_[0]; my ($rowct, $parmct) = (0,0); my $phash_size = scalar keys %$phash; my $locQueryInListCt = QueryInListCt; foreach my $patn (keys %$phash) { $rowct++; $parmct++; unless($db2invth->bind_param(2 * $parmct - 1, $patn)) {warn "bind_param failure for inventor query, patn=$patn"; return;} unless($db2invth->bind_param(2 * $parmct, $$phash{$patn}[PnhashDatasrc])) {warn "bind_param failure for inventor query, patn= $patn"; return;} if ($parmct == $locQueryInListCt || $rowct == $phash_size) { for (my $i = 0, my $itct = $locQueryInListCt - $parmct; $i < $itct; $i++) { unless($db2invth->bind_param(($parmct + $i + 1) * 2 - 1, "XXXXXXXXXXXX")) {warn "bind_param failure for inventor query, patn=XXX"; return;} unless($db2invth->bind_param(($parmct + $i + 1) * 2, "XXX")) {warn "bind_param failure for inventor query, patn=XXX"; r eturn;} } $parmct = 0; unless($db2invth->execute()) {warn "execute failure for inventor query " . $db2h->errstr; return;} my ($fetch_patn, $fetch_datasrc, $fetch_nam, $fetch_num, $curr_patn, $curr_datasrc); unless($db2invth->bind_columns(\$fetch_patn, \$fetch_datasrc, \$fetch_nam, \$fetch_num)) {warn "bind_columns failed for ge tInventor"; return;} while($db2invth->fetch()) { trim($fetch_patn); if ($$phash{$fetch_patn}[PnhashInventor]) {$$phash{$fetch_patn}[PnhashInventor] .= "; $fetch_nam";} else {$$phash{$fetch_patn}[PnhashInventor] = $fetch_nam;} } } } } sub getDerwentTitle(\%) { my $phash = $_[0]; my ($rowct, $parmct) = (0,0); my $phash_size = scalar keys %$phash; $parmct = 0; foreach my $patn (keys %$phash) { $rowct++; $parmct++; unless($db2dwtitleh->bind_param($parmct, $patn)) {warn "bind_param failure for derwent title query, patn=$patn"; return;} if ($parmct == QueryInListCt || $rowct == $phash_size) { for (my $i = 0, my $itct = QueryInListCt - $parmct; $i < $itct; $i++) { unless($db2dwtitleh->bind_param($parmct + $i + 1, "XXX")) {warn "bind_param failure for derwent title query, patn=XXX"; return;} } $parmct = 0; unless($db2dwtitleh->execute()) {warn "execute failure for derwent title query " . $db2h->errstr; return;} my ($fetch_patn, $pan, $title); unless($db2dwtitleh->bind_columns(\$fetch_patn, \$pan, \$title)) {warn "bind_columns failed for derwent title"; return;} while ($db2dwtitleh->fetch()) { trim($fetch_patn); trim($pan); $$phash{$fetch_patn}[PnhashDwPan] = $pan; # Derwent had old (pre 1998) markup in titles using @ = get rid of these $title =~ s/[=]/ /g; $title =~ s/[@]//g; $$phash{$fetch_patn}[PnhashDwTitle] = $title; undef $pan; undef $title; } } } } sub getAge($) { my ($pn) = @_; my $defaultdays = 100; # default is to pretend the patent is oldish my ($days, $ok); if ($db2h && $db2ageh) { unless ($db2ageh->bind_param(1, $pn, $DBI::SQL_CHAR)) { warn "$patsearch($$): bind_param failure for age query\n", $db2h->errstr; return $defaultdays; } $ok = $db2ageh->execute(); if ($ok) { $db2ageh->bind_col(1, \$days); $db2ageh->fetch(); } else { warn "$patsearch($$): execute failure for age query on $pn\n", $db2h->errstr; } $db2ageh->finish(); } return defined($days) ? $days : $defaultdays; } # getAge #------- Subroutine used by g1OutputFilter to read the first three lines returned by #------- patquery, and set these global variables based on them # (This section should be moved to inside of g1OutputFilter.) my ($query, $hits, $other, @qstats, $merged, $retrieved, $processed, $collections, @colls, $startrow, $rows, $sortcol, $sortasc, @fields, $rc, $errormsg, @dbinfo, $table_name, $db_name, $time_stamp); my $txt_patmsg1 = "$patsearch did not receive enough data from the patquery tunnel"; sub Parse3HeaderLines { open(IN, "$response_html"); chop($query = ); chop($hits = ); chop($other = ); close(IN); if ($other eq "") { # Things are not right print STDERR "$txt_patmsg1\n"; return 1; } @qstats = split(/\t/,$hits); $merged = "$qstats[0]"; $retrieved = "$qstats[1]"; $processed = "$qstats[2]"; $startrow = "$qstats[3]"; $rows = "$qstats[4]"; $sortcol = "$qstats[5]"; $sortasc = "$qstats[6]"; my $fieldstr = $qstats[7]; $fieldstr =~ s/\ +$//; # strip trailing blanks @fields = split(/\ +/, $fieldstr); @dbinfo = split(/\t/, $other); $table_name = "$dbinfo[0]"; $db_name = "$dbinfo[1]"; $time_stamp = "$dbinfo[2]"; $collections = "$dbinfo[3]"; $collections =~ s/\ +$//; # strip trailing blanks @colls = split(/ /, $collections); $rc = "$dbinfo[4]"; $errormsg = "$dbinfo[5]"; return 0; } # Parse3HeaderLines # Output-filter between Verity (Patquery) and HTML # (History at the bottom of the file) # ----------------------------------------------- # # expects the name of the file containing what patquery sent back in # $response_html, and the query posted to patquery in $SINPUT sub g1OutputFilter { use strict 'vars'; # Declare (some of) our variables up top here, including our multi-language ones my ($blankTDSpan,$dataSpan,$nonblankTDSpan,$orderBox); my ($query_html,$endrow,$collstr,$sortname,$sortoname); my ($curcol,$desc_img,$asc_img,$anch_head); my $maxdocs = 500; # furthest a user can scroll; get from environment variable?? my $wisdomain_maxdocs = 20000; # Max number to request on wisdomain button my $cluster_maxdocs = 20000; # Max number to request on cluster button my $snapshot_maxdocs = 20000; # Max number to request on snapshot button my %selected = (); select(STDOUT); $| = 1; # Force flush after every write print STDOUT "Content-type: text/html\n\n"; if (-z $response_html) { # no output file returned, try one more time if not localhost if ($default_verity_server !~ "localhost") { print STDERR "patsearch httppost retried\n"; sleep 1; # give a dead patquery time to restart system "$httppost $httppost_mode \"$url_to_call?\" $qsfilename $response_html >$response_http;" ; } if (-z $response_html) { # give up chop(my $ts = `date +%T`); print STDERR "$ts: patsearch httppost returned no data\n"; print STDOUT "No response from search server; please try later"; print STDOUT "
SINPUT: |$SINPUT|" if ($debug); return; } } if ($debug > 1) { print STDOUT "\n
SINPUT: |$SINPUT|"; print STDOUT "\n
SARRAY: |", %SARRAY, "|\n"; } my $have_images = "EP US WO CH GB FI"; my ($image_pages, $drawing); # Set "what features the user sees" controlling variables my ($refine_query, $save_search, $dofastbuy, $dopinkdots, $orderbox_at_bottom, $show_sort_arrows, $show_field_choice, $scrolling_controls, $printer_friendly_button); if (!$pf) { # normal $refine_query = 1; $save_search = 1; $dofastbuy = 1; $dopinkdots = 1; $orderbox_at_bottom = 1; $show_sort_arrows = 1; $show_field_choice = 1; $scrolling_controls = 1; $printer_friendly_button = 1; } else { # printer friendly $refine_query = 0; $save_search = 0; $dopinkdots = 1; $orderbox_at_bottom = 0; $show_sort_arrows = 0; $show_field_choice = 0; $scrolling_controls = 0; $printer_friendly_button = 0; } my $date_format = "us"; # "ymd" is only other choice now, and we get ymd my $GETPOST = ($debug)?"GET":"POST"; if ($nchost =~ /:/) { # we already have a complete url for the netcommerce host } else { if ($server =~ "penguin") { # is this still necessary? (yes 6/30/00) $nchost = "bear" if (!defined($nchost)); $nchost = "https://$nchost.delphion.com"; } else { $nchost = "www2" if (!defined($nchost)); $nchost = "https://$nchost.delphion.com"; } } my $details_url = "/details?pn="; my $details_url_derwent = "/derwent/p/dwdetails?pan="; my $details_url_ipcom = "http://$server:8080/db2xml/servlet/DB2XMLServlet?mode=ipcomdetails&pubid="; my $order_cmd = "$nchost/cgi-bin/ncommerce3/IPNAddItems"; # my $order_cmd https://bear.delphion.com/cgi-bin/ncommerce3/ExecMacro/IPN/IPNlistprod.d2w/report"; my $pink_url = "/lb"; my $pinkdbm = "/dfs/.rw/ipnlistings/lblistings"; # patquery's -g 1 cgi mode returns: # line1: the query # line2: merged retrieved processed startrow rows sortcol sortasc field-name-list (tab separated) # line3: db2tablename dbname timestamp collections rc errormsgs (all tab separated) # line4-: tab separated fields (score, patn, date, title, by default) # default sortcols are: score=1 patn=2 date=3 title=4 # sortasc is 1 (TRUE) for ascending, 0 for descending # We now parse these first 3 header lines into global variables, checking for disasters # in communicating to the search server, or, a sick search server if ( Parse3HeaderLines() != 0 || ($processed == 0 && !($rc == -40 || $rc == 11 || $rc == 111)) ) { # Our search server is probably sick my $still_bad = 0; if ($rc == -2 ) { # Check for unknown collection foreach my $coll (@colls) { $coll = basename($coll); $still_bad = 2 if (!defined($colldesc{$coll})); } } if ($still_bad == 0 && $rc != 1) { # wait a little while, and retry this same query sleep 2; system "$httppost $httppost_mode \"$url_to_call?\" $qsfilename $response_html >$response_http;" ; if (-z $response_html) { # this is totally hopeless, give up $still_bad = 1; } else { # process the new resulting header lines if ( Parse3HeaderLines() != 0 || $processed == 0) { $still_bad = 1; } } } if ($still_bad) { my $hdr = ($still_bad == 1) ? "Search server unavailable" : "Bad Collection"; OutputHeader($hdr,"error"); # ("error" says use site the style sheet) print STDOUT $debugfromenv if ($debug); # This is so dumb that we can't put out plain text, but must make a nested table! my $tail = "

"; $tail .= qq!\n!; $tail .= qq! \n!; $tail .= "
 \n!; $tail .= ($still_bad == 1) ? $txt_sick_server : $txt_bad_collection; $tail .= "

\n"; $tail .= $txt_contact_support; $tail .= "

\n"; $tail .= qq!

 
\n"; $tail .= "\n"; # end the table started by OutputHeader() print STDOUT $tail; OutputTrailer(); $fcgi_requests += 100; # lose 100 lives, something which should have worked didn't return; } } $query_html = minimal_htmlencode($query); $endrow = $startrow - 1 + $rows; if ($maxdocs > $retrieved) { $maxdocs = $retrieved; } $save_search = 0 if ($rc != 0); $appName="currentResults" if ($retrieved == 0); # Stay on this tab if 0 results #------- Set some variables needed to create the hitlist table ------- my $derwent = ($collections =~ /der(went|demo)/ix) ? 1 : 0; my $ipcom = ($collections =~ /ipcom/) ? 1 : 0; # Derwent and IPCOM details use different link URLs $details_url = $details_url_derwent if ($derwent); $details_url = $details_url_ipcom if ($ipcom); $collstr = ""; foreach my $coll (@colls) { $coll = basename($coll); if (defined($colldesc{$coll})) { $collstr .= $colldesc{$coll}.", "; } else { $collstr .= "Unknown, " if ( index( $coll, "inpa" ) == -1 ); } } $collstr = substr($collstr, 0, length($collstr)-2); # remove last , $sortname = ($sortcol-1 <= $#fields) ? $fields[$sortcol-1] : "SCORE"; $sortoname = ($sortasc) ? "-$sortname" : $sortname; # Some columns are treated specially -- we need to know which they are # so, set up a hash (indicating which columns the user will see). undef %col_pos; $curcol = 0; foreach my $col (@fields) { $curcol++; $col_pos{$col} = $curcol; } #------- Now, set up to read the rest of the lines, with the hitlist data ------ # open(IN, "tail +4 $response_html | sed -f $IPNROOT/cgi-bin/text_filter.sed |"); open(IN, "$response_html"); my $lct = 0; my %pnhash; while ($lct < HeaderLineCt && ) {$lct++;} my $hitlist_start = tell IN; # load patent numbers into hash while () { chop; s/\t\t$/\t \t/; # in case the last element is empty split(/\t/); $pnhash{$_[$col_pos{"VDKVGWKEY"} - 1]} = (); # Array, eventually has items with indexes defined by constants Pnhash?? } seek(IN, $hitlist_start, 0); # getUserLists() takes .003 sec - old POST using LWP::UserAgent took 1.5sec # Also fetches and sets %preferences my $wf_list = getUserLists($shopperid); $blankTDSpan = 1; # 2; # colspan of initial blank cols for blue sections above/below hitlist $dataSpan = keys(%col_pos); # chosen fields (plus vdkvgwkey) $nonblankTDSpan = $dataSpan+2-$blankTDSpan; $orderBox=$template{'orderBox'}; $info{'blankTDSpan'}=$blankTDSpan; $info{'nonblankTDSpan'}=$nonblankTDSpan; $info{'fhSelected'}=""; $info{'pdfSelected'}=""; $info{'tifSelected'}=""; $info{'moreSelected'}=""; if($preferences{"orderform.product"}) { if($preferences{"orderform.product"} eq "3400") { $info{'fhSelected'}="SELECTED"; } if($preferences{"orderform.product"} eq "2422") { $info{'pdfSelected'}="SELECTED"; } if($preferences{"orderform.product"} eq "7944") { $info{'tifSelected'}="SELECTED"; } if($preferences{"orderform.product"} eq "0") { $info{'moreSelected'}="SELECTED"; } } else { $info{'fhSelected'}="SELECTED x"; } $orderBox=~s/\<\!\-\-(\w+)\-\-\>/$info{$1}/g; # We cannot use an HREF for sort arrows, because a) it causes the # user's query to get logged (and we say we don't log those), and # b) it let's user's see our arg lists and try to manufacture their # own "get"s, or bookmarks, or, worse, html page links to our site. if ($show_sort_arrows) { # these are the sort arrows my $img_tail = qq! BORDER="0">!; $desc_img .= qq! 1) { # initialize sort variables when we have no db2 hitlist $anch_head = qq! \n!; $anch_head .= qq! \n!; $anch_head .= qq! \n!; $anch_head .= qq! \n!; $anch_head .= qq! \n!; $anch_head .= qq! \n!; $anch_head .= qq! \n!; # also pass on various optional arguments $anch_head .= qq! \n! if ($k2); $anch_head .= qq! \n! if ($vs); $anch_head .= qq! \n! if ($language); } my $query_urlenc=$query; $query_urlenc=~s/([ -~])/$urlenc{$1}/g; $info{query}=$query_html; $info{querylength}=length($query); $info{collections}=$collections; $info{fields}="@fields"; $info{sort}=$sortoname; $info{startrow}=$startrow; $info{pagesize}=$pagesize; $info{K2}=($k2) ? $k2 : ""; $info{VS}=($vs) ? $vs : ""; $info{language}=($language) ? $language : ""; $info{patsearchMethod}=$GETPOST; $info{patsearchAction}=$patsearch; $info{ncHost}=$nchost; # This bunch should be removed when no templates use them anymore +++ $info{'refineMethod'}=$GETPOST; $info{'refineAction'}=$patsearch; $info{"saveMethod"}="POST"; $info{"saveAction"}="$nchost/cgi-bin/ncommerce3/ExecMacro/IPN/IPNqrysave.d2w/input"; $info{"saveURL"}="$nchost/cgi-bin/ncommerce3/ExecMacro/IPN/IPNqrysave.d2w/input?query=$query_html&-c=$collections&-i=@fields&-o=$sortoname&mode=add&form=patsearch"; #------- Produce header (up thru start of "main" table) ------ if ($pf) { my $html = $template{'printerfriendly'}; my $ts; chop($ts = `date -u`); $info{ts} = $ts; $html=~s/\<\!\-\-(\w+)\-\-\>/$info{$1}/g; print STDOUT "\n",$html; } else { OutputHeader("$query"); # (Note that the header processing sets the background colors we are to use.) # (It also begins the outermost table (and that table is closed after we put # out the appropriate tab.) } #------- Application-Specific tabs and display bit ------ # Set the default tab variables for all of our apps (each will modify his own) my @allapps=("CR","PX","DX","SN","CL","PL","FH" ); foreach my $app (@allapps) { $info{"${app}bg"}="#CCCCCC"; $info{"${app}curve"}="gray"; $info{"${app}a"}=($retrieved>0)?"a":"INACTIVEa"; $info{"${app}fntclr"}=($retrieved>0)?"black":"#eeeeee"; } $info{'patsearchTabsIncludeSearchform'}=""; $info{'patsearchTabsEndIncludeSearchform'}=""; $info{'patsearchTabsIncludeFirstcolspan'}=""; $info{'patsearchTabsEndIncludeFirstcolspan'}=""; $info{'tabContents'}=""; print STDOUT $debugfromenv if ($debug); if ("$appName"eq"dataExtract" && !$pf) { my $useourtabs = 0; my $html = ""; $info{"app"}="dataExtract"; $info{"DXbg"}="#B5CFF8"; $info{"DXcurve"}="blue"; $info{"DXa"}="INACTIVEa"; # (anything which can look like an anchor tag, but isn't) $info{"XXcolsBefore"}="8"; $info{"XXcolsAfter"}="12"; # Map selected fields, in order, to fieldnames expected by DE my $temp_fields = join(' ',@fields); my $sel="patent_number,"; if($temp_fields =~ m/IMG/) { } if($temp_fields =~ m/TITLE/){ $sel .= "main_patenttitle,"; } if($temp_fields =~ m/DWT/) { } if($temp_fields =~ m/AB/) { $sel .= "abstract,"; } if($temp_fields =~ m/ASSG/) { $sel .= "assignee_name,"; } if($temp_fields =~ m/PD/) { $sel .= "main_issuedate,"; } if($temp_fields =~ m/AD/) { $sel .= "main_applicationdate,"; } if($temp_fields =~ m/DP/) { $sel .= "priority_date,"; } if($temp_fields =~ m/CLAS/) { $sel .= "intlclass_code,"; } if($preferences{"dataextract.selitem"}) { $sel = $preferences{"dataextract.selitem"}; } # POST version my $ua = LWP::UserAgent->new; my $res = $ua->request(POST "http://localhost/fcgi-bin/dataExtract.fpl", [ 'searchstr' => $query_html, # don't need urlenc now 'userlevel' => $userLevel, 'pagesize' => $pagesize, 'k2' => $k2, 'vs' => $vs, 'language' => $language, 'collections' => $collections, 'fields' => join(' ',@fields), 'sort' => $sortoname, 'startrow' => $startrow, 'saveMethod' => $info{'saveMethod'}, 'saveAction' => $info{'saveAction'}, 'saveURL' => $info{'saveURL'}, 'patsearchMethod' => $info{'patsearchMethod'}, 'patsearchAction' => $info{'patsearchAction'}, 'tunnel' => $url_to_call, 'showtab' => "on", 'annotation' => $annotation, 'sel' => $sel, 'nitems' => $retrieved, 'format' => $preferences{"dataextract.format"}, 'zipit' => $preferences{"dataextract.zipit"}, 'debug' => ($debug)?"low":"" ]); my $get_stdout=""; if ($res->is_success) { $get_stdout = $res->content; } else { $get_stdout = "Error ".$res->code.", ".$res->message; } if ($useourtabs) { $html=$template{'tabs'}; $info{'tabContents'}=$get_stdout; $html=~s/\<\!\-\-(\w+)\-\-\>/$info{$1}/g; print STDOUT $html; } else { print STDOUT $get_stdout; } } elsif ("$appName"eq"pdfExpress" && !$pf) { $info{"app"}="pdfExpress"; $info{"PXbg"}="#B5CFF8"; $info{"PXcurve"}="blue"; $info{"PXa"}="INACTIVEa"; # (anything which can look like an anchor tag, but isn't) $info{"XXcolsBefore"}="4"; $info{"XXcolsAfter"}="16"; # POST version my $ua = LWP::UserAgent->new; my $res = $ua->request(POST "http://localhost/fcgi-bin/pdfExpress.fpl", [ 'searchstr' => $query_html, 'userlevel' => $userLevel, 'pagesize' => $pagesize, 'k2' => $k2, 'vs' => $vs, 'language' => $language, 'collections' => $collections, 'fields' => join(' ',@fields), 'sort' => $sortoname, 'startrow' => $startrow, 'saveMethod' => $info{'saveMethod'}, 'saveAction' => $info{'saveAction'}, 'saveURL' => $info{'saveURL'}, 'patsearchMethod' => $info{'patsearchMethod'}, 'patsearchAction' => $info{'patsearchAction'}, 'tunnel' => $url_to_call, 'showtab' => "on", 'debug' => ($debug)?"low":"", 'annotation' => $annotation, 'nitems' => $retrieved ]); my $get_stdout=""; if ($res->is_success) { $get_stdout = $res->content; } else { $get_stdout = "Error ".$res->code.", ".$res->message; } print STDOUT $get_stdout; } elsif ("$appName"eq"snapshot" && !$pf) { my $html=$template{'tabs'}; $info{"app"}="snapshot"; $info{"SNbg"}="#B5CFF8"; $info{"SNcurve"}="blue"; $info{"SNa"}="INACTIVEa"; # (anything which can look like an anchor tag, but isn't) $info{"XXcolsBefore"}="12"; $info{"XXcolsAfter"}="8"; my $urlqs = "-c=" . urlencode($collections) . "&GENERAL=" . $query_urlenc; my $rs_title="Result Set for Query:$query_html    " .qq!! ."Save Search/Create Alert"; $info{"snapshotURL"}="$url_to_call?$urlqs&-m=0&-g=5&-l=snapshot"; $info{"snapshotURLMax"}="$url_to_call?$urlqs&-m=0&-g=9&-l=snapshot"; if ($netdatasnap) { # Retrieve tab contents from Net.Data my $ua = LWP::UserAgent->new; my $res = ""; if ($retrieved>500) { $res = $ua->request(POST "http://localhost/cgi-bin/db2www.cmd/v4/snapshot_form.d2w/ss_form", [ 'rs_title' => $rs_title, # no urlencode! 'nitems' => $retrieved, # WARNING: Snapshot can't tell if 500 or all items 'url' => $info{"snapshotURL"}, 'urlmax' => $info{"snapshotURLMax"}, 'dcols' => $preferences{"snapshot.dcols"}, 'csort' => $preferences{"snapshot.csort"}, 'cb' => $preferences{"snapshot.cb"} ]); } else { $res = $ua->request(POST "http://localhost/cgi-bin/db2www.cmd/v4/snapshot_form.d2w/ss_form", [ 'rs_title' => $rs_title, # no urlencode! 'nitems' => $retrieved, 'url' => $info{"snapshotURL"}, 'dcols' => $preferences{"snapshot.dcols"}, 'csort' => $preferences{"snapshot.csort"}, 'cb' => $preferences{"snapshot.cb"} ]); } my $get_stdout=""; if ($res->is_success) { $get_stdout = $res->content; } else { $get_stdout = "Error ".$res->code.", ".$res->message; } # Insert this into the template for the tab $info{"tabContents"}=$get_stdout; } else { # Use a patsearch flavor template $info{"snapshotMethod"}="GET"; $info{"snapshotAction"}="/snapshot/clist"; $info{"snapshotRs_title"}=minimal_htmlencode($rs_title); # Now we figure out which alternate sections of the template we want my $docs=($retrieved>$snapshot_maxdocs)?$snapshot_maxdocs:$retrieved; if ($docs<=$maxdocs) { $info{"snapshotN2"}="$docs"; $info{"snapshotN"}="$docs"; $info{"snapshotPrettyN"}=commify($docs); $info{"snapshotIncludeFullset"}=""; $info{"snapshotIncludedEntire"}=""; $info{"snapshotAltIncludedEntire"}=""; # (no --> can be in these 3) $info{"snapshotEndIncludedEntire"}=""; } else { $info{"snapshotN"}="$maxdocs"; $info{"snapshotPrettyN"}="First $maxdocs"; $info{"snapshotN2"}="$docs"; $info{"snapshotPrettyN2"}=commify("$docs"); $info{"snapshotIncludeFullset"}=""; $info{"snapshotEndIncludeFullset"}=""; if ($retrieved>$snapshot_maxdocs) { $info{"snapshotIncludedEntire"}=""; $info{"snapshotEndIncludedEntire"}=""; } else { $info{"snapshotIncludedEntire"}=""; $info{"snapshotAltIncludedEntire"}=""; } } $info{"tabContents"}=$template{'snapshot'}; $info{"tabContents"}=~s/\<\!\-\-(\w+)\-\-\>/$info{$1}/g; } $html=~s/\<\!\-\-(\w+)\-\-\>/$info{$1}/g; print STDOUT $html; } elsif ("$appName"eq"clustering" && !$pf) { my $html=$template{'tabs'}; $info{"tabContents"}=$template{'clustering'}; $info{"app"}="clustering"; $info{"CLbg"}="#B5CFF8"; $info{"CLcurve"}="blue"; $info{"CLa"}="INACTIVEa"; # (anything which can look like an anchor tag, but isn't) $info{"XXcolsBefore"}="16"; $info{"XXcolsAfter"}="4"; $info{"clusterMethod"}="GET"; $info{"clusterAction"}="/cgi-bin/clustersearch"; $info{"clusterMaxdocs"}="$cluster_maxdocs"; $info{"nitems"}="$retrieved"; # Now we figure out which alternate sections of the template we want my $docs=($retrieved>$cluster_maxdocs)?$cluster_maxdocs:$retrieved; if ($docs<=$maxdocs) { $info{"clusterN"}="$docs"; $info{"clusterFirstLineType"}="hidden"; $info{"clusterIncludeFullset"}=""; $info{"clusterIncludedEntire"}=""; $info{"clusterAltIncludedEntire"}=""; # (no --> can be in these 3) $info{"clusterEndIncludedEntire"}=""; } else { $info{"clusterN"}="First $maxdocs"; $info{"clusterFirstLineType"}="radio"; $info{"clusterN2"}=commify("$docs"); $info{"clusterIncludeFullset"}=""; $info{"clusterEndIncludeFullset"}=""; if ($retrieved>$cluster_maxdocs) { $info{"clusterIncludedEntire"}=""; $info{"clusterEndIncludedEntire"}=""; } else { $info{"clusterIncludedEntire"}=""; $info{"clusterAltIncludedEntire"}=""; } } my $nclusters = 50; $nclusters = $preferences{"cluster.clusters"}; $info{"clusterN5selected"}=""; $info{"clusterN10selected"}=""; $info{"clusterN20selected"}=""; $info{"clusterN30selected"}=""; $info{"clusterN50selected"}=""; if($nclusters==5){ $info{"clusterN5selected" }="SELECTED";} if($nclusters==10){ $info{"clusterN10selected"}="SELECTED";} if($nclusters==20){ $info{"clusterN20selected"}="SELECTED";} if($nclusters==30){ $info{"clusterN30selected"}="SELECTED";} if($nclusters==50){ $info{"clusterN50selected"}="SELECTED";} $info{"tabContents"}=~s/\<\!\-\-(\w+)\-\-\>/$info{$1}/g; $html=~s/\<\!\-\-(\w+)\-\-\>/$info{$1}/g; print STDOUT $html; } elsif ("$appName"eq"patentLab" && !$pf) { my $doer=($patsearch=~/fcgi-bin/)?"fcgi-bin/patentLab.fpl":"cgi-bin/patentLab.pl"; my $html=$template{'tabs'}; $info{"app"}="patentLab"; $info{"PLbg"}="#B5CFF8"; $info{"PLcurve"}="blue"; $info{"PLa"}="INACTIVEa"; # (anything which can look like an anchor tag, but isn't) $info{"XXcolsBefore"}="20"; $info{"XXcolsAfter"}="0"; # Retrieve tab contents from patentLab.fpl my $ua = LWP::UserAgent->new; my $urlqs = "-c=" . urlencode($collections) . "&GENERAL=" . $query_urlenc; my $res = $ua->request(POST "http://localhost/$doer", [ 'nitems' => $retrieved, 'rs_title' => "", 'url' => "$url_to_call?$urlqs&-m=0&-g=5&-l=wisdomain", 'annotation' => $annotation, 'features' => $features ]); my $get_stdout=""; if ($res->is_success) { $get_stdout = $res->content; } else { $get_stdout = "Error ".$res->code.", ".$res->message; } $get_stdout=~s/\<\!\-\-(\w+)\-\-\>/$info{$1}/g; $info{"tabContents"}=$get_stdout; $html=~s/\<\!\-\-(\w+)\-\-\>/$info{$1}/g; print STDOUT $html; } elsif ("$appName"eq"fhist" && !$pf) { my $html=$template{'tabs'}; $info{"app"}="fhist"; $info{"FHbg"}="#B5CFF8"; $info{"FHcurve"}="blue"; $info{"FHa"}="INACTIVEa"; # (anything which can look like an anchor tag, but isn't) $info{"XXcolsBefore"}="20"; $info{"XXcolsAfter"}="0"; my $urlqs = "-c=" . urlencode($collections) . "&GENERAL=" . $query_urlenc; my $rs_title="Result Set for Query:$query_html    " .qq!! ."Save Search/Create Alert"; $info{"snapshotURL"}="$url_to_call?$urlqs&-m=0&-g=5&-l=snapshot"; $info{"snapshotURLMax"}="$url_to_call?$urlqs&-m=0&-g=9&-l=snapshot"; # Retrieve tab contents from Net.Data my $ua = LWP::UserAgent->new; my $res = ""; $res = $ua->request(POST "http://localhost/cgi-bin/db2www.cmd/v4/fh_form.d2w/fhist_form", [ 'rs_title' => $rs_title, # no urlencode! 'nitems' => $retrieved, 'url' => $info{"snapshotURL"}, 'dcols' => $preferences{"snapshot.dcols"}, 'csort' => $preferences{"snapshot.csort"}, 'cb' => $preferences{"snapshot.cb"} ]); my $get_stdout=""; if ($res->is_success) { $get_stdout = $res->content; } else { $get_stdout = "Error ".$res->code.", ".$res->message; } # Insert this into the template for the tab $info{"tabContents"}=$get_stdout; $html=~s/\<\!\-\-(\w+)\-\-\>/$info{$1}/g; print STDOUT $html; } elsif (!$pf) { # default to current results set print STDERR "patsearch called with invalid app $appName\n" if ("$appName"ne"currentResults"); my $html=$template{'tabs'}; $info{"tabContents"}=$template{'currentResults'}; $info{"CRbg"}="#B5CFF8"; $info{"CRcurve"}="blue"; $info{"CRa"}="INACTIVEa"; # (anything which can look like an anchor tag, but isn't) $info{"XXcolsBefore"}="0"; $info{"XXcolsAfter"}="20"; $info{"CRfntclr"}="black"; # (even if $retrieved==0 we want this tab black) # We want to use our common tabs template, but, no searchform (and, we are # the first column, and a colspan=0 doesn't work). $info{'patsearchTabsIncludeSearchform'}=""; $info{'patsearchTabsIncludeFirstcolspan'}=""; # Create the Collection Selection options, marking as selected the proper set. # When we get here, we have already done the search, on whatever set of # collections the user was allowed to choose, from any of our search pages. # (Right now, there is no way for a user to search, say epa and epbft together, # and, I depend on that below, in the sense that if any collection is fulltext, # we are going to offer the entire "fulltext" set, and not some mixture (where # we have both, anyway). # State: $collections is the space-separated list we got back from patquery, and # our %coll_index gets rebuilt here, based on the actual set we got back. # reload the coll_index hash with the collections actually searched undef %coll_index; my @colls = split(/[\ ,]+/, $collections); my $completefulltext=0; foreach my $cname (@colls) { $coll_index{$cname} = 1; my $oppname = $opposite_coll{$cname}; if (defined($oppname) &&(!$subset_coll{$cname})&&(!$subset_coll{$oppname})) { # complete subset $completefulltext++; } } my $selects=""; my $fulltext=($collections=~/ft/)?1:0; my $need_footnote = 0; $info{'refineFullTextIncludeSubset'}=""; $info{'refineFullTextEndIncludeSubset'}=""; # (Remember if you change this code that you can't have nested comments in html) if ($userLevel =~ /premier|unlimited/ && !$coll_index{'lblistings'}) { $info{"refineIncludeCSelect"}=""; $info{"refineEndIncludeCSelect"}=""; $info{"refineAltCSelect"}=""; $info{"refineImproperColls"}=""; # This is always true for now, till Marketing changes its mind if (($completefulltext == scalar(@colls)) || 1) { # remove our subset "warning" only if all selected colls have a complete # fulltext collection $info{'refineFullTextIncludeSubset'}=""; } # Set up the collections SELECT section, and prepare the $info variables to show # the correct radio buttons for swapping between fulltext and front pages. if ($fulltext) { $info{'refineFrontPagesChecked'}=""; $info{'refineFullTextChecked'}="checked"; foreach my $coll (@selectable_ftcolls) { # We used to have a simple select here, now marketing wants checkboxes :-( # Then they got changed, by marketing, back to the simple select, so, leave # one sample of the checkbox code here, commented out. # (I suppose the template could have contained Javascript to render the # set of collections, either way, if we set two Javascript array variables # to the collections and their descriptions, and, another array variable # with the selected set.) $selects .= qq!