#!/usr/bin/perl ############################################################################## # Pre-process a user query and send it to patquery for search. The resulting # pooltable is being displayed by hitlist_sql.d2w. The NPO patsearch always # operates in g-mode 4. # It is based on the 06/24/99 version of the IPN patsearch. It contains # some filtering updates. The IPN HTML code has been removed. # # (c) Copyright Delphion, Inc. 2000 # Portions (c) IBM Corporation 1997-2000 # # The source code for this program is not published or otherwise divested of # its trade secrets, irrespective of what has been deposited with the U.S. # Copyright Office. # # $Header: /cvsroot_ipsfdb2/ips/cgi-bin/patsearch,v 1.17 2002/03/21 20:59:56 jasper Exp $ # # $Log: patsearch,v $ # Revision 1.17 2002/03/21 20:59:56 jasper # Add two more characters, "<" and ">", to the ending delimiter of a date in # the converteer_datum_velden subroutine. This is to fix bug #78. # # Revision 1.16 2002/03/08 00:44:03 jasper # Sander's ipnmode changes. Fix Rick's typo. # # Revision 1.15 2002/02/14 23:57:05 sander # Different UP filtering in IPN mode # # Revision 1.14 2002/02/06 02:14:47 sander # Command line parameter -query causes patsearch to return the filtered query # # Revision 1.13 2001/11/20 23:57:36 jasper # Handle dropped *'s in filter_NC if the input was 1234* or 123456*. # Also put in a couple more comments (I can't resist). # # Revision 1.12 2001/11/13 13:58:21 servaas # This patsearch does not need external grep function anymore # # Revision 1.11 2001/11/09 09:52:31 servaas # Patsearch uses extended grep function in perlscript sgrep to search lines longer than 2048 bytes # # Revision 1.10 2001/09/24 23:40:07 jasper # Update filter_UP to strip leading zeros (not pad them) from patent number. # # Revision 1.9 2001/05/17 19:47:58 jasper # Fix a few typos. (Next time I'll check in the fully-debugged version) # # Revision 1.8 2001/05/16 20:19:03 jasper # Redo filter_IC logic, this time doing it right. # # Revision 1.7 2001/05/10 20:14:21 jasper # Improve IPC Class and National Class filters. # # Revision 1.6 2001/05/08 23:24:07 sander # Cookie check before searching # # # ############################################################################## # Assume being called from commandline if no request method has been set # If called with the -query flag, read the query from stdin and write the # filtered one to stdout. In this way, patsearch operates as a real query # filter. $ipnmode=0; # By default, use NPO filtering style if(!($ENV{REQUEST_METHOD})) { print "At 101, \$#ARGV=$#ARGV and \$ARGV[0]=>$ARGV[0]<.\n"; if(($#ARGV == 0 && $ARGV[0] eq "-query") || ($#ARGV == 1 && ($ARGV[0] eq "-query" || $ARGV[1] eq "-query"))) { $ipnmode = ($#ARGV == 1 && ($ARGV[0] eq "-filtermode=ipn" || $ARGV[1] eq "-filtermode=ipn")); $query = ; # Read the query from stdin chop $query; # Remove newline $SARRAY{"GENERAL"} = $query; inputFilter(); print STDOUT $SARRAY{"GENERAL"}; exit(0); } } require "local.pl"; # Site and language dependents $debug = 0; srand; # one-time init of random number generator seed $randval = int rand(10000000); # used for ads $HTTPS = $ENV{'HTTPS'}; # ON if gold, reliably $IPNROOT = $ENV{'IPNROOT'}; if(!$IPNROOT) { $IPNROOT = ".."; # .. = proj/ipn/, prod/ipn/, proj/ipnfb, or prod/ipnfb } $response_html = "/tmp/search.$$.html"; $response_http = "/tmp/search.$$.http"; $qsfile = ">/tmp/search.$$.qs"; $qsfilename = "/tmp/search.$$.qs"; $httppost = "$IPNROOT/bin/httppost"; #$httppost_mode = ($debug) ? "-g" : ""; $httppost_mode = "-g"; $header_file = "/hti/eheader.hti.$language"; $trailer_file = "hti/trailer.hti.$language"; if ($SINPUT eq "") { # Would it be better to return HTTP error, and let server handle reply select(STDOUT); $| = 1; # print STDOUT "Content-type: text/html\n\n"; # OutputHeader("$txt_no_sterms_title"); # print STDOUT $txt_no_sterms_body; # OutputTrailer(); exit(0); } #Build the array with name/value pairs @combo = split(/&/, $SINPUT); foreach $combo (@combo) { $combo =~ s/\+/ /g; ($input_name, $value) = split(/=/,$combo); $input_name =~ s/%([A-Fa-f0-9]{2})/pack("c",hex($1))/ge; $value =~ s/%([A-Fa-f0-9]{2})/pack("c",hex($1))/ge; #print "[$combo][$input_name][$value]
"; if($SARRAY{$input_name}) { # If an entry appears more than once, append a space and new # value to the old value. (ex: -c coll_f, -c coll_b -> -c coll_f coll_b) $SARRAY{$input_name} = $SARRAY{$input_name} . " " . $value; } else { $SARRAY{$input_name} = $value; } } # Obtain k2 Server tag and value, if present $k2 = $SARRAY{"k2"}; if ($k2) { $tunnel="k2tunnel"; } else { # $tunnel="pattunnel"; $tunnel="patquery"; } delete $SARRAY{"k2"} if (defined($k2)); # remove from the query # Obtain Verity Server tag and value, if present $vs = $SARRAY{"vs"}; if ($vs) { $url_to_call="http://$vs/fcgi-bin/$tunnel?"; } else { $url_to_call="http://$default_verity_server/fcgi-bin/$tunnel?"; } delete $SARRAY{"vs"} if (defined($vs)); # Look for the "g" mode tab and value; set default if not present $gmode = $SARRAY{"-g"}; if ($gmode eq "") { if ($HTTPS eq 'ON' ) { # use db2 hitlists for gold $gmode = "4"; $gmode = "1"; # when we have db2 problems } else { $gmode = "1"; } $SARRAY{"-g"} = "$gmode"; } # Obtain the language, if present, and remove from the query $language = $SARRAY{"language"}; delete $SARRAY{"language"} if (defined($language)); $langarg = ($language) ? "&language=$language" : ""; # Obtain advertising tag and value, if present, and remove from the query $ad = $SARRAY{"ad"}; delete $SARRAY{"ad"} if (defined($ad)); $adarg = ($ad) ? "&ad=$ad" : ""; #Convert the whole array. inputFilter(); #Build the output string. $amper = ""; $i = 0; foreach $tag (%SARRAY) { if ($i == 0) { $value = $SARRAY{$tag}; $tag =~ s/([^ a-zA-Z0-9-_])/sprintf("%%%02lx",unpack('C',$1))/ge; $tag =~ s/[ ]/+/g; $value =~ s/([^ a-zA-Z0-9-_])/sprintf("%%%02lx",unpack('C',$1))/ge; $value =~ s/[ ]/+/g; #print "tag=$tag val=$value
"; $out_qstring .= $amper . $tag . "=" . $value; $amper = "&"; } $i = !$i; } #print "qsout = $out_qstring\n"; $QUERY_STRING = $out_qstring; # Create a file containing the QUERY_STRING open qsfile; select qsfile; print "$QUERY_STRING"; select STDOUT; close qsfile; # Call URL and save responses, both HTML and HTTPS system "$httppost $httppost_mode \"$url_to_call\" $qsfilename $response_html >$response_http;" ; # Depending on the mode used to invoked the patquery tunnel, write the # proper output, based on the response file, to STDOUT. if ($gmode eq "4") { redirOutputFilter(); } else { die "gmode not supported"; } # Cleanup (redirOutputFilter does its own; it never returns) unlink ($qsfilename, $response_http) unless ($debug); unlink ($response_html) unless ($debug); exit(0); # Here starts the real filtercode # The input filter # ---------------- sub inputFilter { %DAYS_MONTHS = ( "1", "31" , "2", "28" , "3", "31" , "4", "30" , "5", "31" , "6", "30" , "7", "31" , "8", "31" , "9", "30" , "10", "31" , "11", "30" , "12", "31" ); %DUTCH_MONTHS = ( "jan", "01", "feb", "02", "fev", "02", "maa", "03", "mar", "03", "mrt", "03", "apr", "04", "avr", "04", "mai", "05", "may", "05", "mei", "05", "juin", "06", "jun", "06", "juil", "07", "jul", "07", "aou", "08", "aug", "08", "sep", "09", "oct", "10", "okt", "10", "nov", "11", "dec", "12", "dez", "12" ); while ( ($input_name , $value) = each(%SARRAY) ) { # Whole question in GENERAL or RAW field # Here more fields are mentioned with an operator ( , = , ) # and "and/or/and not" connections if ( $input_name =~ / ^\s*GENERAL\s*$ | ^\s*RAW\s*$ /ix ) { $position_in_value = 0; $position_of_field_type = 0; $length_of_field_type = 0; $position_of_field_value = 0; $length_of_field_value = 0; print STDOUT "I have a GENERAL or RAW field inside inputFilter, \$input_name=>$input_name< and \$value=>$value<.\n"; while (1) # Continue until we can find no more strings { print "Looking for starting at $position_in_value of $value\n"; $tmp_position_in_value = index( $value , "" , $position_in_value ); if ($tmp_position_in_value == -1) { $tmp_position_in_value = index( $value , "" , $position_in_value ); } $position_in_value = $tmp_position_in_value; last if $position_in_value == -1; $position_in_value += 3; determine_position_and_length_of_field_type(); determine_position_and_length_of_field_value(); $field_type = substr( $value, $position_of_field_type, $length_of_field_type ); print STDOUT "At inputFilter:1, \$field_type starts at $position_of_field_type, goes for $length_of_field_type and is =>$field_type<\n"; $field_value = substr( $value, $position_of_field_value, $length_of_field_value ); print STDOUT "At inputFilter:1, \$field_value starts at $position_of_field_value, goes for $length_of_field_value and is =>$field_value<\n"; $field_value = filter_QUOTE($field_value); if ( $field_type =~ /^\s*KI\s*$/i ) { $field_value = filter_KI($field_value); } elsif ( $field_type =~ /^\s*IC\s*$/i || $field_type =~ /^\s*MC\s*$/i || $field_type =~ /^\s*MAINCLAS\s*$/i || $field_type =~ /^\s*CLASS\s*$/i ) { $field_value = filter_IC($field_value); } elsif ( $field_type =~ /^\s*NC\s*$/i ) { $field_value = filter_NC($field_value); } elsif ( $field_type =~ /^\s*UP\s*$/i ) { $field_value = filter_UP($field_value); } elsif ( $field_type =~ /^\s*PN\s*$/i ) { $field_value = filter_PN($field_value); } # See the notes in the determine_position_and_length_of_field_value subroutine, but # this next statement might be substituting a null for a null. substr( $value, $position_of_field_value, $length_of_field_value ) = $field_value; $position_in_value += length( $field_value ) - $length_of_field_value; } $position_in_value = 0; $position_of_date_in_tmp = 0; $length_of_date = 0; converteer_datum_velden(); @SARRAY{$input_name} = $value; next; } elsif ( $input_name =~ / Field\d+_Type /ix ) { $input_name =~ / Field\d+ /ix; $input_name = $&; $input_name = $input_name . "_Text"; if (@SARRAY{$input_name} ne "") { # String is not empty $joiner_key = $& . "_Joiner"; @SARRAY{$input_name} = filter_QUOTE(@SARRAY{$input_name}); if ( $value =~ /^\s*KI\s*$/i ) { @SARRAY{$input_name} = filter_KI(@SARRAY{$input_name}); next; } elsif ( $value =~ /^\s*IC\s*$ | ^\s*MC\s*$ | ^\s*MAINCLASS\s*$ | ^\s*CLASS\s*$ /ix ) { @SARRAY{$input_name} = filter_IC(@SARRAY{$input_name}); next; } elsif ( $value =~ /^\s*NC\s*$/i ) { @SARRAY{$input_name} = filter_NC(@SARRAY{$input_name}); next; } elsif ( $value =~ /^\s*UP\s*$/i ) { @SARRAY{$input_name} = filter_UP(@SARRAY{$input_name}); next; } elsif ( $value =~ /^\s*PN\s*$/i ) { @SARRAY{$input_name} = filter_PN(@SARRAY{$input_name}); next; } elsif ( $value =~ /^\s*PD\s*$ | ^\s*AD\s*$ | ^\s*DP\s*$ /ix ) { @SARRAY{$input_name} =~ / \s* (\W*?) \s* (?=\w) /xi; my($symbol) = $1; @SARRAY{$input_name} = $'; @SARRAY{$input_name} =~ s/\s*$//; @SARRAY{$input_name} =~ s/ [\/\.-] /\ /igx; @SARRAY{$input_name} = filter_DUTCH_MONTH(@SARRAY{$input_name}); @SARRAY{$input_name} =~ s/^\s*//; if ( $symbol =~ / ^<=$ | ^>=$ | ^<$ | ^>$ | ^=$ /ix ) { @SARRAY{$joiner_key} = " " . $symbol . " "; } else { @arrayOfElements = split( /\ +/ , @SARRAY{$input_name}); $numberOfElements = @arrayOfElements; SWITCH: { $numberOfElements =~ /^1$/ && do { @SARRAY{$joiner_key} = " >= "; @SARRAY{$input_name} = "01 01 $arrayOfElements[0] and $value <= 31 12 $arrayOfElements[0]"; last SWITCH; }; $numberOfElements =~ /^2$/ && do { @SARRAY{$joiner_key} = " >= "; my $year_idx= 1; # Do you use [mm yy] or [yyyy mm] format? my $month_idx = 0; if($arrayOfElements[$month_idx] > 1000) { ($year_idx, $month_idx) = ($month_idx, $year_idx); } $year = $arrayOfElements[$year_idx]; # 7 september 2000 : added millennium window: # year smaller than 26 : above 2000 # year in between 26 and 99 : in the range of 19xx if ($year < 26 ) {$year = 2000 + $year ;} elsif ( ( $year >= 26 ) && ( $year < 100 ) ) {$year = 1900 + $year ;} %local_DAYS_MONTHS = %DAYS_MONTHS; while ( ($month_no, $days) = each(%local_DAYS_MONTHS) ) { if ($arrayOfElements[$month_idx] =~ / ^0?$month_no$ /xi) { if (($month_no == "02") && ((($year%4 == 0) && ($year%100 != 0)) || ($year%400 == 0))) { $days = $days + 1; } @SARRAY{$input_name} = "01 $month_no $year and $value <= $days $month_no $year"; last; } } last SWITCH; }; $numberOfElements =~ /^3$/ && do { @SARRAY{$joiner_key} = " = "; last SWITCH; } } } } } } } } # Below mentioned functions are used by the # input filter # ---------------------------------------------- # Below mentioned function determines the location and length # of -| field type-| # ( based on / initiated by the variable -| $position_in_value-| , # which represents de location of in $value ) # -------------------------------------------------- sub determine_position_and_length_of_field_type { $position_of_field_type = $position_in_value; $position_of_field_type++; while ( substr($value, $position_of_field_type , 1) eq " " ) { $position_of_field_type++; } substr($value, $position_of_field_type) =~ /^(\w+)/; $length_of_field_type = length($1); } # Given the input string, $value (eg "(term1 NC)"), and with # $position_in_value pointing to the ending ">" of the "" # (10 in our example), this function determines the location # and length of the field_value (term1 in our example) by # setting the $position_of_field_value # and $length_of_field_value # variables, in our example, to 1 and 5. # --------------------------------------------------------------- sub determine_position_and_length_of_field_value { # To start with, $position_in_value is pointing to the ending > of the # Back up 4 positions to the first character before the , probably a blank, # but not necessarily if for example, $value = "(term1NC)". $position_of_field_value = $position_in_value; $position_of_field_value -= 4; # In our example, if $position_in_value was 10, pointing to the # ending ">" of the "", then now $position_of_field_value is 6, # which points to the character before the "". # Now back up to the first left parenthesis you see (if any). "First" that is, when # going to the left. while ( substr($value, $position_of_field_value, 1) =~ /[^\(]/ ) { $position_of_field_value--; last if $position_of_field_value == -1; # Ooops, went beyond the left. There is no (. } # Go forward to character after the left parenthesis, or first character if -1 (which # means there was no left parenthesis). $position_of_field_value++; # Skip forward any leading spaces if any to get to the real first character of the search term. while ( substr($value, $position_of_field_value , 1) eq " " ) { $position_of_field_value++; } # Now $position_of_field_value is pointing to the first real character of the value field, # eg in our "(term1 NC)" example, 1, the "t". # Find the end of the search term by looking at the input, starting at the first real character # of our search term (e.g. "term1 NC)"). The "end" is determined kind of funny here and # it's known to be wrong in some cases. # # This originally was simply, # substr($value, $position_of_field_value) =~ /^([^ ][^<]*[^ ])\s* NC"), or imbedded Verity terms ("abcdef NC"). It also counted trailing # parenthesis as part of the term, e.g. "((abc) IC)" would say the length=4 or "abc)". # # It's not at all obvious how this worked for failing match cases, but what would happen is # the original code would set $length_of_field_value to 0 ($1 is unchanged by the failing match, # but because we're inside a subroutine, it was null to begin with, and length(null)=0). # Then inputFilter (our caller) would call the filter_xx routines with a null argument strings, # and as long as all filters returned null when given a null (a BIG assumption, but apparently # the case), then inputFilter would replace a null (due to $length_of_field_value=0) term in the # original input, with null (what the filter returned), so everything worked. If you ask me, # the fact it worked was purely accidental. It couldn't have been planned this way, right? # # There's also the problem of multiple terms, eg "1/ 2/ MC". Both 1/ and 2/ should # be filtered, but most of the filters aren't expecting multiple terms in their input, so they # do the wrong thing when you pass the whole "1/ 2/" to them. Better is have a real, # complete Verity syntax parser, and loop for multiple terms in inputFilter. CHT said she'd # write the BNF for it and Sander said he'd code it, but don't expect it anytime soon. # # This code only partially fixes the situation. It at least calls the filter with a one-character # term and doesn't count trailing parenthesis, but there's still the other problems, e.g. # multiple terms. We'll leave it broken as is 'till CHT & Sander get this fixed by writing a # real parser. # # I also unneccessarily, but explicitly, set $length_of_field_value to 0 instead of relying # on Perl's default behavior of $1="", which was too obscure to be intentional. if ( substr($value, $position_of_field_value) =~ /^(.*?)\s*[\()<>]/ ) { $length_of_field_value = length($1); } else { # Did not match. $length_of_field_value = 0; } } # Below mentioned function filters the date fields in $value # in the case that $input_name is equal to "GENERAL" # or "RAW" # ----------------------------------------------------- sub converteer_datum_velden { $tmp_value = substr ( $value , $position_in_value ); if ( $tmp_value =~ / = \s* \d | [^n]> \s* \d | <[^i] \s* \d /igx ) { $position_of_date_in_tmp = length($`) + length($&) - 1; substr ( $tmp_value , $position_of_date_in_tmp ) =~ / ^ ([^\)<>]* [^\)<> ]) /ix; $date = $1; $date =~ s/ [\/\.-] /\ /igx; $length_of_date = length($date); $concat = $position_in_value; $position_in_value += $position_of_date_in_tmp + $length_of_date; $date = filter_DUTCH_MONTH($date); substr( $tmp_value, $position_of_date_in_tmp, $length_of_date ) = $date; $position_in_value += length( $date ) - $length_of_date; $value = substr ( $value , 0 , $concat ) . $tmp_value; converteer_datum_velden(); } } # Conversion of * in the KIND field # Name: KI # Input: c* # Output: c. sub filter_KI { local($in_var) = @_; if($in_var =~ /\*/) { if($in_var =~ /([A-Z])\*/ix) { $in_var = "($1, $1\?)"; # Translate A* to (A, A?) } elsif($in_var =~ /\*([A-Z])/ix) { $in_var = "($1, \?$1)"; # Translate *1 to (1, ?1) } } return $in_var; } # Filter input of IPC Class Search Term # Name: IC, MC, MAINCLASS, CLASS # # The format of IPC class numbers as rendered to the user and thus, in our input, is in # three parts, with the first part mandatory and the other two optional (but if the second # part exists, then so does the third). The parts are separated by a space and slash and # leading zeros in the second part are removed, eg C09D 3/84. # # In DB/2, we normalize the second part (if it exists), zero-padding it on the left to # three characters. We also remove the slash, leaving us with C09D 00384, for example. # Verity indexes this as two separate words. When we search, we give search arguments to # Verity like C09D 00384. Verity treats this as two words which must follow each other, # thus returns what we want. # # But before searching, Verity does it checking to see if this search might return too many # results and this is done on a word boundary. Thus if the user entered "C09D 5/*", we # filter this to "C09D 005*" and this will fail because of the 005* term (there are too # many patents indexed with words starting at 005, e.g. the patent numbers in the 5 million # range. We've got to do something about that. # # Our solution is based upon the length of valid IPC classes. After padding the second # part to three characters, there are three possible lengths of valid IPC classes. # Length of 4, e.g. C09D # Length of 10, e.g. C09D 00384 # Length of 11, e.g. C09D 003733 # If we filter anything, we will never pass along *'s to Verity in the first two parts. # Instead, we will always pad the first two parts with ?'s, to these fixed lengths. # # For example (these examples are not exhaustive or comprehensive), # C* will get filtered to C??? # C0* will get filtered to C0?? # C09* will get filtered to C09? # # C09D will get filtered to C09D, which will also pick up all the 10- and # or C09D* 11-character IPC classes, e.g. C09D 003/733 since Verity has indexed # or C09D * these as two words. # # C09D *1 will get filtered to C09D ??1?? or C09D ??1??? # C09D *12 will get filtered to C09D ?12?? or C09D ?12??? # # C09D 1 you might think C09D 1* would get filtered to the very long (C09D 1???? # or C09D 1* C09D 1????? C09D 01??? C09D 01???? C09D 001?? C09D 001???) # but due to the 1????? term, this query is too complex for Verity, so just like # we did in the National Class filter (filter_NC below), we'll interpret this as # C09D 1/* and filter both to (C09D 001?? C09D 001???). If the guy doesn't # like it, tough. He can put in his own multi-term search or easier, to put some # question marks in his search term, which we then, won't filter at all. # # C09D 12 again, you might think C09D 12* would get filtered to (C09D 12??? # or C09D 12* C09D 12???? C09D 012?? C09D 012???), but to keep things consistent with # the C09D 1* case above, we simply filter this to (C09D 012?? C09D 012???). # # C09D 1*2 will get filtered to (C09D 1?2?? C09D 1?2??? C09D 012?? C09D 012???) # # C09D 123/x will never match since the third part is always 2- or 3-characters long, # so we guess he meant 0x and filter to C09D 1230x. # C09D 123/* will get filtered to (C09D 123?? C09D 123???) # We also try to handle more complex input, for example, # C09* 1*2/* will get filtered to (C09? 1?2?? C09? 1?2??? C09? 012?? C09? 012???) # We may wind up with *'s in the third part, for example, # C09D 3/*x will get filtered to C09D 003*x. # # In the National Patent Offices on the other hand, the space is removed from the IPC # class when stored in DB/2. This changes completely how Verity indexes and thus, how # we should filter, namely # 1) No longer do you need to worry about the "C09D 1*" failing due to the 1????? term. # Instead what you could have is "C09D001*" which would work fine. However, we use the # same logic as for the US, padding to fixed lengths, and simply remove any spaces at # the end of it all. # 2) In the C*, C0*, C09*, and C09D* cases, the filtered output can no longer be simply # the 4-character output it was before. They now have to include the two longer cases. # So "C0*" now becomes (C0??C0???????C0???????). # There are 4, 1-line changes for the National Patent Offices indicated with comments. sub filter_IC { local($in_var_string) = @_; $output = @in_var_string; # Default output to input in case some of these tests fail. # If there's any kind of funny character in this search string, don't modify is at all. if ( $in_var_string !~ /^[a-zA-Z\d\/ \*,]*$/ ) { $output = $in_var_string; # Pass everything on untouched. } else { # We want to accept multiple national class search arguments, which in the Verity # search language, are separated by commas or spaces (e.g. C09D 1/23,C09D *12/3* $output=""; # Reset output to null 'cause we're gonna # split our input (the search argument) into separate, comma-delimited terms, # and iterate across all search terms, rebuilding our filtered output. foreach (split /,/, $in_var_string) { # Parse out the three parts of the IPC Class search argument, # 0-4 characters, delimited by an optional space, with possible asterisks (*), # 0-3 characters, delimited by an optional slash (/) with possible asterisks (*), # and the rest. # If this term doesn't follow this syntax, then don't muck with it. if ( /^\s* ([\*a-zA-Z\d]{0,4}) \s* \ ? # First part \s* ([\*a-zA-Z\d]{0,3}) \s* \/? # Second part \s* ([\*a-zA-Z\d]* ) $/x ) { # Third part $part1=$1; $part2=$2; $part3=$3; $length1 = length($part1); IC_CASE1: { # If part1 is null or "*", e.g. " 123/45" or "* 123/45", then we can # just drop it and let Verity search on simply part2 & part3. # Of course, this statement only applies to the US. if ($length1 == 0 || $part1 eq "*" ) { # " 123/45" or "* 123/45" $part1 = ""; # Just search on the rest. # For the National Patent Offices, uncomment this line (1 of 4). $part1 = "????" unless($ipnmode); last IC_CASE1; } # The philosophy of whether or not to append an asterisk to any of the # parts or not, is to do so if he has not asked for a specific class. # A one-, two-, or three-character first part can never be a specific # class, so we wild card that here, but four-character first parts # that contain no wild-card characters, are specific. Remember, due # to the check for "funny" characters above, we don't see ?'s here. # Only *s wild-card characters are possible. if (length($part1) < 4 && index($part1,"*")<0) { $part1 = substr($part1 . "????",0,4); # Pad to length 4. last IC_CASE1; } # If he's used *'s in his first term, e.g. C* or C*D or *D, we're # going to be nice and expand them to C??? or C??D or ???D for him. # But it's too much to ask us to do anything with *0*. That 0 oculd # in any position, It's not reasonable for us to filter that into # (0??? ?0?? ??0? ???0). Likewise for *0**. So only # handle here, exactly one * in something that's 4 characters or less. # Perl Note: $part1=~tr/*// counts how many *'s there are in $part1. if ( $part1=~tr/*// == 1 && length($part1) <= 4 ) { substr($part1, index($part1,"*"), 1) = "?" x (5-length($part1)); last IC_CASE1; } } # End of IC_CASE1 block. # The problem with wild-carding the second part is, we also zero-pad it. # Thus if the guy asks for H01B *1/04, we need to make that H01B ??1/04. # That one is easy, as is the 2-character case, H01B *12/04 to H01B ?1204. # But asterisks on the right side are more difficult. You might guess # that H01B 1* should catch H01B 001*, H01B 01?*, and H01B 1??*, but # as mentioned above, Verity fails the 1????? search as too complex, so we # interpret H01B 1* as H01B 1/*, yielding (H01B 001?? H01B 001???) only. $length1 = length($part1); $length2 = length($part2); $length3 = length($part3); $part2b = ""; IC_CASE2: { # In general, if a specific class is given, we don't want to wild-card # the search. For example, C09D should not find C09D 003/84. In the US # however, due to the space in the IPC class in DB/2, there's nothing we # can do about it. C09D will find all classes, C09D as well as all # C09D * classes. # # If nothing was specified for part2 or part3, and part1 is a valid # specific class (4 characters and no wildcards), then do nothing to the # null part2 and part3. In particular, we don't want C09D to find # C09D 00384 because C09D is a specific class. However in the US, we # can't get what we want. C09D *will* find all classes, C09D as well as # all C09D * classes). This is because of the space preserved in the # IPC class, so even though it's not the desired action, there's nothing # we can do to avoid it. # # In the National Patent Office sites though, this space is stripped, so # that's where the behavior would differ and we'd get what we wanted. # There, we don't doctor C09D in any way, but if there are any wild-card # characters in the already-filtered $part1, e.g. C09? along with null $part2 # & $part3, we want to filter the rest to (C09? C09?????? C09??????). # Here, we take one step to accomplish this, make $part2="???". # # null or / or /* or * or */ or */* if ( ($length2 == 0 || $part2 eq "*") && ($length3 == 0 || $part3 eq "*") ) { $part2=""; # For the National Patent Offices, uncomment this line (2 of 4). if ( !($ipnmode) && $part1=~tr/?*// > 0 ) { $part2="???"; } last IC_CASE2; } if ( $length2 == 0 || $part2 eq "*") { # /something or */something ==> ??? $part2 = "???"; last IC_CASE2; } if ( $length2 == 1 ) { # C ==> 00C $part2 = "00$part2"; last IC_CASE2; } if ( $part2 =~ /^\*(.)$/ ) { # *C ==> ??C $part2 = "??$1"; last IC_CASE2; } if ( $part2 =~ /^(.)\*$/ ) { # C* ==> 00C (intentionally not C?? or 0C?) $part2 = "00$1"; last IC_CASE2; } if ( $length2 == 2 ) { # CC ==> 0CC $part2 = "0$part2"; last IC_CASE2; } if ( $part2 =~ /^\*(..)$/ ) { # *CC ==> ?CC $part2 = "?$1"; last IC_CASE2; } if ( $part2 =~ /^(.)\*(.)$/ ) { # C*C ==> C?C or 0CC, since * could be null. $part2 = "$1?$2"; $part2b = "0$1$2"; last IC_CASE2; } if ( $part2 =~ /^(..)\*$/ ) { # CC* ==> 0CC (intentionally not CC? to keep $part2 = "0$1$2"; # consistent with the C* case above. last IC_CASE2; } } # End of IC_CASE2 block. # As a nice guy, we'll convert C09D 123/4 to C09D 123/04, else it'll never match. if ( length($part3) == 1 && $part3 ne "*" ) { $part3 = "0$part3" } $length1 = length($part1); # Recompute current lengths. $length2 = length($part2); $length2b = length($part2b); # If length of $part3 is less than 2, it must be null or "*" (used below). $length3 = length($part3); $term = "$part1 $part2$part3"; # If there are any holes in my logic here, at least # default to something reasonable. IC_CASE3: { # In the US, for the C* or C0* or C09* or C09D or C09* / or C09D /* or C09D */* cases # (i.e. a null part2 & part3), all we need do return is $part1. if ( $length3 < 2 && ($length2 == 0 || $part2 eq "???") ) { $term = $part1; # If just * or " /", term would be blank. Oh, well. # For the National Patent Offices, uncomment this line (3 of 4). $term = "($part1 $part1????? $part1??????)" unless($ipnmode); last IC_CASE3; } # Any $part1, $part2 = C*C, and null or * part3. e.g. C09D 123 or C09D 123 # or C09D 123/* and other cases. if ( $length3 < 2 && $length2b == 0 ) { $term = "($part1 $part2?? $part1 $part2???)"; last IC_CASE3; } # E.G. C09D C*C or C09D C*C/* if ( $length3 < 2 && $length2b > 0) { $term = "($part1 $part2?? $part1 $part2??? $part1 $part2b?? $part1 $part2b???)"; last IC_CASE3; } # E.G. C09D CCC/xx or C09D CC*/xx or C09D CCC/xxx or others if ( $length3 > 1 && $part3=~tr/*?// == 0 && $length2b == 0) { $term = "$part1 $part2$part3"; last IC_CASE3; } # E.G. C09D C*C/xx or C09D C*C/xxx if ( $length3 > 1 && $part3=~tr/*?// == 0 ) { $term = "$part1 $part2$part3 $part1 $part2b$part3"; last IC_CASE3; } # All others C09D C*C cases, e.g. C09D C*C/12*3 if ( $length2b > 0 ) { $term = "$part1 $part2$part3 $part1 $part2b$part3"; last IC_CASE3; } } # End of IC_CASE3 block. } else { # Else this term didn't match our "don't muck with it" match. $term = $_; } # Rebuild the search string with the commas we're parsing on, in our foreach/split loop. if ($output eq "") { # First term? I.E. First time through? $output = $term; # If so, it doesn't get a comma } else { # Else separate this term from earlier ones with a $output = $output . "," . $term; # comma. Note that we remove trailing spaces after } # the comma. EG "A01, B02" => "A01,B02" } # End of foreach split ... # For the National Patent Offices, uncomment this line (4 of 4). $output =~ s/ //g unless($ipnmode); } return $output; } # Filter input of US National Class # Name: filter_NC # Example Input: 084/174; 084/423.R, 084/DIG.022, D5, 084DIG23, AB* # Output: 084174, 084423R, 084DIG022, (0D5???*,D05???*), 084DIG23, (AB????*,0AB???*) # Note: This filter should not be used for national GB classes, the UKPO # version of patsearch will only use this filter if only the US # collection is being searched -sjb sub filter_NC { local($in_var_string) = @_; $output = $in_var_string; # Default output to input in case some of these tests fail. # If there's any kind of funny character in this search string, don't modify is at all. # The idea is, if this guy knows what he's doing by specifying a Verity special symbol # (? ) in his search, in which case we don't want to get in his way (Carol's # suggestion), or he's given us some junk, in which case we'll rely on Verity to fail # on this search. if ( ($in_var_string !~ /^[a-zA-Z\d\/\.,; \*]*$/) || (($SARRAY{"-c"} !~ /us/) && $ENV{REQUEST_METHOD})) { # -sjb, only filter US NC $output = $in_var_string; # Pass everything on untouched. } else { $in_var_string =~ s/\s//g; # Get rid of any whitespace characters. $in_var_string =~ tr?\*\*//\.\.??s; # Also eliminate repeated special characters, eg ** # We want to accept multiple national class search arguments, which in the Verity # search language, are separated by commas or spaces (e.g. 084/423,432/436). # An improvement suggested by CHT is, because the detail page shows national # class separated by semicolons, somebody might "swipe" (cut & paste) that # semicolon-separated list (e.g. 084/423;084/432) and use it. Thus, here we $in_var_string =~ s/;/,/g; # translate all semicolons into commas. $output=""; # Reset output to null 'cause we're gonna # split our input (the search argument) into separate, comma-delimited terms, # and iterate across all search terms, rebuilding our filtered output. foreach (split /,/, $in_var_string) { # Parse out the three parts of the National Class search argument, # 0-3 characters, delimited by an optional slash (/) with possible asterisks (*), # 0-3 characters, delimited by an optional comma (.) with possible asterisks (*), # and the rest. # If this term doesn't follow this syntax, then don't muck with it. if ( /^(\*?) ([a-zA-Z\d]{0,3}) (\*?) (\/?) # First part (\*?) ([a-zA-Z\d]{0,3}) (\*?) (\.?) # Second part ([\*a-zA-Z\d]*) $/x ) { # Third part $ls1=($1 eq "*"); # True if there was a leading star on the first part. $part1=$2; $ts1=($3 eq "*"); # True if there was a trailing star on the first part. $slash=($4 eq "/"); # True if we found a slash. $ls2=($5 eq "*"); # True if there was a leading star on the second part. $part2=$6; $ts2=($7 eq "*"); # True if there was a trailing star on the second part. $dot=($8 eq "."); # True if we found a period. $part3=$9; $length = length($part1); $first2chars = substr($part1,0,2); $part1b = ""; # Interpret part 1. In the comments for these cases below, # l = Any letter [A-Z] # d = Any digit [0-9] # c = Any character [A-Z0-9] # D = The letter D or d # 0 = The digit zero # () = Optional part NC_CASE1: { # If nothing was specified for part1, make it ???. if ( $length == 0 ) { # / or . or * or */ or *. ==> ??? $part1 = "???"; last NC_CASE1; } if ( $length == 1 && $ls1 && !$ts1) { # *C/ ==> ??C $part1 = "??$part1"; last NC_CASE1; } # C* is a special case. One could try to interpret this as a search for # C?????* or 0C????* or 00C???*, but the C?????* guarantees that the # search will fail since there are many words starting with any single # character you choose. So we're going to interpret this as the most # restrictive, yet still plausible and guaranteed to at least work as # far as Verity is concerned, 00C???*. Again, if the user doesn't like # the way we interpreted their search, they can use a different syntax. # # So C* or C*/ is handled the same as C or C/, ignoring the trailing *. # This may be counter-intuitive, but heck, searching for every National # Class starting with a given single character, doesn't make sense. # Or worse, *C*. # # If the intent was that 3*/ would find 030-039/* and/or 300-399/*, # you would need to specify 03?/ or 3??/. if ( $length == 1 ) { # C or C* or *C* ==> 00C $part1 = "00$part1"; last NC_CASE1; } # [Dd][0-9] (e.g. D5) is a very special case because these national classes # are in the DB/2 database and Verity index in two different ways, 0D5 or D05. # The simpliest case, D5, must be converted to 2 terms, D05 or 0D5. Likewise, # 0D has to be converted to 0D? or D0?. But to really complicate things, # D5*/ might be expected to include D50-D59/ as well as D5, so that would # have to be converted to 0D5???* or D05???* or D5????*. Aaaaargh! No, we # only handle the simpliest two cases now. if ( $length == 2 && $first2chars=~"0([Dd])" && !$ls1 && !$ts1 ) { # 0D => 0D? or D0? $part1 = "0$1?"; $part1b = "${1}0?"; last NC_CASE1; } if ( $length == 2 && $first2chars=~"([Dd])([0-9])" && !$ls1 && !$ts1 ) { # D5 => 0D5 or D05 $part1 = "0$1$2"; $part1b = "${1}0$2"; last NC_CASE1; } if ( $length == 2 && $first2chars=~"([Dd])([0-9])" && $ls1 && !$ts1 ) { # *D5 => ?D5 or D05 /..(.)/; $part1 = "?$1$2"; $part1b = "${1}0$2"; last NC_CASE1; } if ( $length == 2 && $ls1 && !$ts1 ) { # *CC ==> ?CC $part1 = "?$part1"; last NC_CASE1; } if ( $length == 2 && !$ls1 && $ts1 ) { # CC* ==> CC? or 0CC $part1b = "0$part1"; $part1 = "$part1?"; last NC_CASE1; } if ( $length == 2 ) { # CC or *CC* ==> 0CC $part1 = "0$part1"; last NC_CASE1; } if ( $length == 3 && $first2chars=~"0([Dd])" && !$ls1 && !$ts1 ) { # 0D5 => 0D5 or D05 $part1b = "${1}0" . substr($part1,2,1); last NC_CASE1; } if ( $length == 3 && $first2chars=~"([Dd])0" && !$ls1 && !$ts1 ) { # D05 => D05 or 0D5 $part1b = "0$1" . substr($part1,2,1); last NC_CASE1; } } # End of NC_CASE1 block. $length = length($part2); $part2b = ""; NC_CASE2: { # If nothing was specified for part2, make it ???. if ( $length == 0 ) { # /. or . or * or */ or *. ==> ??? $part2 = "???"; last NC_CASE2; } if ( $length == 1 && $ls2 && !$ts2) { # *C ==> ??C $part2 = "??$part2"; last NC_CASE2; } # A note on the C* case: We expand C* to 00C* only, intentionally NOT expanding # it to include C??* and 0C?* for two reasons: First, we don't expand C* like # that for our part1 above, so to be consistent, we don't for part2 as well. # Second, this is how we expand part2 in filter_IC so although the IC reason # does not apply here, to be consistent, we do the same expansion here. if ( $length == 1 ) { # C or C* or *C* ==> 00C $part2 = "00$part2"; last NC_CASE2; } if ( $length == 2 && $ls2 && !$ts2 ) { # *CC ==> ?CC $part2 = "?$part2"; last NC_CASE2; } if ( $length == 2 && !$ls2 && $ts2 ) { # CC* ==> CC? or 0CC $part2b = "0$part2"; $part2 = "$part2?"; last NC_CASE2; } if ( $length == 2 ) { # CC or *CC* ==> 0CC $part2 = "0$part2"; last NC_CASE2; } } # End of NC_CASE2 block. # The philosophy of whether or not to append an asterisk if part3 wasn't specified, # is to do so if he has not asked for a specific class, which is easily identified # by checking for wild card characters (?) in any of the first two parts. # Thus, 123, which is interpreted as 123/*, gets an asterisk (123???*), but # 12/34 is a specific class, so doesn't (012/034). # A later fix modified this statement to pick up the 1234* and 123456* cases, # where our part2 was "004" and "456" respectively (note no ?'s), so we were # dropping the trailing * (oops). So now we include the check for $ts2. if ($part3 eq "" && (index($part1.$part1b.$part2.$part2b,"?")>=0 || $ts2) ) { $part3="*" } if ($part1b eq "" && $part2b eq "") { $term = sprintf '%03s%03s%s', $part1, $part2, $part3; } elsif ($part1b ne "" && $part2b eq "") { $term = sprintf '(%03s%03s%s,%03s%03s%s)', $part1, $part2, $part3, $part1b, $part2, $part3; } elsif ($part1b eq "" && $part2b ne "") { $term = sprintf '(%03s%03s%s,%03s%03s%s)', $part1, $part2, $part3, $part1, $part2b, $part3; } else { $term = sprintf '(%03s%03s%s,%03s%03s%s,%03s%03s%s,%03s%03s%s)', $part1, $part2, $part3, $part1b, $part2, $part3, $part1, $part2b, $part3, $part1b, $part2b, $part3; } } else { # Else this term didn't match our "don't muck with it" match. $term = $_; } # Rebuild the search string with the commas we're parsing on, in our foreach/split loop. if ($output eq "") { # First term? I.E. First time through? $output = $term; # If so, it doesn't get a comma } else { # Else separate this term from earlier ones with a $output = $output . "," . $term; # comma. Note that we remove trailing spaces after } # the comma. EG "A/1, B/2" => "00A001,00B002" } # End of foreach split ... } # End of else don't pass everything on untouched return $output; } # Unadorned Patent Numbers, i.e. the "number" only. # Name: UP # Function: Remove leading zeros and pad internal numeric portion for each term. # Input: 00123, 0RE0123,0RE123, RE123, D1234 # Output: 123,RE00123,RE00123,RE00123,D001234 sub filter_UP { my ($in_var_string) = @_; my ($output,$term); # If there's any kind of "funny" character in this search string, don't modify it at all. # That is, we only filter if input is alphanumeric or a Verity operator ?*, or a comma or space. return $output if ($in_var_string !~ /^[a-zA-Z\d\\?\*,\ ]*$/); # Split our input (the search argument) into separate, comma-delimited terms, # and iterate across all search terms, rebuilding our filtered output. foreach (split /,/, $in_var_string) { # For each term, after stripping off blanks and leading zeros, we handle 2 cases only. # 1) One Letter followed by all numeric. We pad numeric portion with zeros to 7 digits. # For example, D123 or D000123 => D0000123 # 2) Two Letters followed by all numeric. We pad numeric portion with zeros to 6 digits. # For example, RE123 or RE00123 => RE000123 my $term = $_; if ( $term =~ /^ *(.*) *$/ ) { # Strip off leading and trailing blanks, if any. $term=$1; } if ( $term =~ /^0+(.*)$/ ) { # Strip off leading zeros if any. $term=$1; } if($ipnmode) { # IPN mode (as D0000123 or RR000123, indexed as D0000123 or RR000123) if ( $term =~ /^([a-zA-Z])0*(\d{1,6})$/ ) { $term = $1 . "0" x (7-length($2)) . $2; # Input is D00123, output is D0000123, e.g. } elsif ( $term =~ /^([a-zA-Z]{2})0*(\d{1,5})$/ ) { $term = $1 . "0" x (6-length($2)) . $2; # Input is RR0123, output is RR000123, e.g. } } else { # NPO mode (as 0D000123 or 0RR00123, indexed as D000123 or RR00123) if ( $term =~ /^([a-zA-Z])0*(\d{1,6})$/ ) { $term = $1 . "0" x (6-length($2)) . $2; # Input is D00123, e.g. } elsif ( $term =~ /^([a-zA-Z]{2})0*(\d{1,5})$/ ) { $term = $1 . "0" x (5-length($2)) . $2; # Input is RR0123, e.g. } } # Rebuild the search string with the commas we're parsing on, in our foreach/split loop. if ($output eq "") { # First term? I.E. First time through? $output = $term; # If so, it doesn't get a comma } else { # Else separate this term from earlier ones with a $output = $output . "," . $term; # comma. } } # End of foreach split ... return $output; } # Leading zeros with patentnumber # Name: PN # Input: Less than 8 digits and no wildcard # Output: fill up with leading zeros till CcnnnnnnnnKI with PN sub filter_PN { local($in_var) = @_; if($in_var =~ /\*/) { return $in_var; } elsif($in_var =~ /([A-Z\?]{2})([\d\?]{1,8})([A-Z\?]*[0-9]*)/ix) { return $1 . "0" x (8 - length($2)) . $2 . $3. (length($3)==2?"":"*"); } else { return $in_var; } #new code handles multiple values. not complete yet so don't activate -sb # my $in_var = shift; # @ssvar = split /\s+/, $in_var; # for($i = 0; $i <= $#ssvar; $i++) { # @csvar = split /,/, $ssvar[$i]; # for($j = 0; $j <= $#csvar; $j++) { # if($csvar[$j] =~ /([A-Z\?]{2})([\d\?]{1,8})([A-Z\?]*[0-9]*)/ix) { # $csvar[$j] = $1 . "0" x (8 - length($2)) . $2 . $3. (length($3)==2?"":"*"); # } # } # @ssvar[$i] = join ',', @csvar; # } # return join ' ', @ssvar; } # Quotes are automatically provided around words with an accent character # Name : All text fields # Input: Word string with an accent ( ascii > 127 ) # Output: Word string between ` ` # Filter checks if action is needed. sub filter_QUOTE { local($in_var) = @_; if ( $convert_case && $in_var =~ /^\s*[^'"`].*[^'"`]\s*$/) { # IGE wants that non quoted words are always case insensitive $in_var = lc $in_var; } if ( $in_var =~ /[\x80-\xFF]/ ) { # OBSOLETE: $in_var =~ s/^\s*([\ -\xFF]+)\s*$/`$1`/; # There is a word with an accent # $in_var =~ s/(\w+[\x80-\xFF]+\w*)/`$1`/gx; # 14-12-99 replaced by marco $in_var =~ s/(\S+[\x80-\xFF]+\S*)/`$1`/gx; } return $in_var; } # Input of dutch date, abbravations included # Name : All date fields (PD,AD,DP) # Input : Part of a dutch name (part of date) # Output: number sub filter_DUTCH_MONTH { local($in_var) = @_; %local_DUTCH_MONTHS = %DUTCH_MONTHS; while ( ($month_name, $month_no) = each(%local_DUTCH_MONTHS) ) { last if $in_var =~ s/ (\b\s* $month_name [a-zA-Z\ ]*) /\ $month_no\ /xi; } return $in_var; } # "redir" handling of output placed in a db2 hitlist (taken from dofilter) sub redirOutputFilter { if(0) { # Call URL and save responses, both HTML and HTTPS #system "$httppost -g \"$url_to_call\" $qsfilename $response_html >$response_http;" ; # If return HTML is empty (no data) then emit an error message, cleanup & exit if (-z$response_html) { system("cat - $qsfilename $response_http $orderlog; rm -f $qsfilename $response_html $response_http"); die "Call to patquery failed"; } # Strip header (first 8 lines) and line (links) after the order summary. # Remove the links they add and tags too. #system("sed -f $IPNROOT/cgi-bin/patqueryt.sed $response_html | tee -a $orderlog"); # Cleanup most files unlink ($qsfilename, $response_http); # Copy response to STDOUT print STDOUT "Content-type: text/html\n\n"; open(IN, $response_html) ; while() { print STDOUT $_; } close(IN); # Finish cleanup unlink ($response_html); } else { #========================================================= # Fix for "back" button problem. # Make above if(0) an if(1) for SSI #========================================================= $supercp_cmd = "$IPNROOT/bin/supercp"; $grep_cmd = "$IPNROOT/bin/sgrep"; #========================================================= # Directly open httppost as a pipe, with its output forced to stdout. # This avoids two temporary files. #========================================================= #system "$httppost -g \"$url_to_call\" $qsfilename $response_html >$response_http;" ; $PQ_REDIR_URL = ""; open(IN, $response_html) ; while() { $PQ_REDIR_URL .= $_; } close(IN); unlink ($qsfilename) unless($debug); # Done with query file unlink ($response_html, $response_http) unless($debug); # Eat HTTP Header: lines - we'll create our own $PQ_REDIR_URL =~ s/HTTP Header: .*\n//g; # Force flush after every write select(STDOUT); $| = 1; # patquery_passthru should be used once SSI is available # Uncomment the next line to pass-on whatever patquery returned #$patquery_passthru = 1; if($patquery_passthru) { # Emit our own HTTP header for HTML print STDOUT "Content-type: text/html\n\n"; # Echo whatever patquery emitted - it should be an SSI clause print STDOUT "$PQ_REDIR_URL"; select(STDOUT); $| = 1; exit; # DONE } else { # Tease-out the Net.data URL from the HTML redirect $PQ_REDIR_URL =~ s/.*URL=(.*)\".*/$1/g; chomp $PQ_REDIR_URL; # Eat any newlines # Emit our own HTTP header for HTML ## print STDOUT "Content-type: text/x-ssi-html\n\n"; print STDOUT "Content-type: text/html\n\n"; if ($PQ_REDIR_URL eq "") { open ERROR_MSG, "$supercp_cmd \"http://localhost/messages/500error.html\" \"%stdout\" |" or die "Call to $supercp_cmd failed.\n"; $ignore_HTTP = 1 ; #true at the start while () { if (/^$/) { $ignore_HTTP = 0 ;} # empty line indicates that the text HTTP Header could be in the page itself next if ( /^HTTP Header: / && $ignore_HTTP ) ; print $_; } close ERROR_MSG or die "Bad errormessage: $! $?"; # exec "$supercp_cmd \"http://localhost/messages/500error.html\" \"%stdout\" | $grep_cmd 'HTTP Header: '" or die "Call to $supercp_cmd failed.\n"; } else { # Pass special variable if this script is run in HTTPS (gold) # so that hitlist_sql.d2w will know this is gold if ($HTTPS eq 'ON' ) { $PQ_REDIR_URL .= "&gold1=1"; } # Pass language and/or ad if passed to us originally $PQ_REDIR_URL .= "$adarg$langarg"; #Call supercp to fetch Net.Data-generated hitlist, eating any HTTP headers. #Once again, this is constructed to avoid needing temporary files. $ignore_HTTP = 1 ; #true at the start open HTTP_MSG , "$supercp_cmd \"$PQ_REDIR_URL\" \"%stdout\" |" or die "Call to $supercp_cmd failed.\n"; while () { if (/^$/) { $ignore_HTTP = 0 ;} # empty line is not anymore ignoring of text HTTP Header next if ( /^HTTP Header: / && $ignore_HTTP ) ; print $_; } close HTTP_MSG or die "Bad HTTP file: $! $?"; #exec "$supercp_cmd \"$PQ_REDIR_URL\" \"%stdout\" | $grep_cmd 'HTTP Header: '" or die "Call to $supercp_cmd failed.\n"; # Uncomment the following to use an SSI exec call instead of supercp # NOTE: ICSS Version 4.2.1.7 will not work with POSTed CGI SSI exec requests. # GO 4.6.2.2 seems to handle it ok though. -tdg # $PQ_REDIR_URL =~ s/http:\/\/localhost//g; # $PQ_REDIR_URL =~ s/&/\\&/g; # print STDOUT "Content-type: text/html\n\n\n"; } # DONE - exec never returns } #========================================================= # End fix for "back" button problem. #========================================================= } } # redirOutputFilter #------- Produce a site header ------ # Input: Text for the title tag # Output: none # sub OutputHeader { local($in_var) = @_; $head = "\n"; $head .= "\n"; $head .= "$in_var\n"; # The first line of the included header file is a (logical, huh?) # $head .= ""; print STDOUT "$head"; system("cat $IPNROOT/htdocs/$language/$header_file"); } # OutputHeader sub OutputTrailer { #------- Produce trailer ------- # (This file starts with ) # (It also references the closing_tags variable) system("cat $IPNROOT/htdocs/$language/$trailer_file"); } # OutputTrailer