#!/dcs/bin/perl5 -w ##------------------------------------------------------------------------## ## File: ## marc-search.cgi ## Author: ## Eric D. Friedman friedman@uci.edu ## Documentation: ## POD style documentation is included at the end of the Marc.pm file. ## PODs can be read with any of several tools in the standard perl5 ## distribution, including pod2text, pod2man, pod2html, and pod2latex. ## From a shell, try one of the following: ## pod2text Marc.pm | more ## pod2man Marc.pm | nroff -man | more ##------------------------------------------------------------------------## ## Configurable options - described in great detail in the POD - READ IT! my $server = 'http://eee.uci.edu'; my $help = $server . '/toolbox/marc-search.html'; my $doc_root = '/Web/doc'; my $script = $server . '/cgi-bin/marc-search.edf.cgi'; my $usersubdir = 'public_html'; # Tells which subdirectory of # a user's home directory # (file system) corresponds to # http://server/~user/ . (Only # needed if marc-search # supports URLs with tildes.) ##------------------------------------------------------------------------## ## External Modules to load - others are loaded in the Marc_Search package # This next line tells perl where the CGI_Lite module was installed. use lib '/users/asc/friedman/www/cgi-bin'; use lib '/Web/httpd/cgi-bin'; use CGI_Lite; # Substitute other CGI libraries if need be (see next section) use strict; require 5.002; ##------------------------------------------------------------------------## ## Read and Parse the input values from the Web client # This next section is where the input from the Web client is read and # parsed. I've chosen to use Shishir Gundavaram's CGI_Lite module # because it is lightweight and efficient. You can download a copy from # . You are also free to # use a different module or library as needed. The only requirement is # that the form values must be loaded into a hash table for which $form # is a reference. That is, if you have a hash called %input, you'll need # to create a reference to it thusly: $form = \%input; my $cgi = new CGI_Lite; my $platform = 'Unix'; # Not really needed $cgi->set_platform ($platform); # on unix systems my $form = $cgi->parse_form_data; # The object supplied by CGI_Lite is not used below this point. If # you're using a different module/library, changes below here should # not be needed provided that you've created the $form reference as # described above. ##------------------------------------------------------------------------## ## Real work begins here - no changes should be necessary from here $| = 1; # Don't buffer output print "Content-type: text/html\n\n"; # Output Mime type $form->{submit} = 'Find' # Some browsers allow forms to be if not exists $form->{submit}; # submitted by pressing enter ##------------------------------------------------------------------------## ## Output a search form if ( defined $ENV{'REQUEST_METHOD'} and $ENV{'REQUEST_METHOD'} eq 'GET' # Script called from a simple link or $form->{submit} eq 'Start Search')# Script initiated from a form { use Marc::Form; my $query = new Marc::Form; # Create search object $query->error("Sorry, your client needs to set HTTP_REFERER") unless defined $ENV{'HTTP_REFERER'}; $query->doc_root($doc_root); $query->script ($script); $query->help ($help); my $rel_path; ($rel_path = $ENV{'HTTP_REFERER'}) =~ s,http://.*?/(.*)/.*,/$1,; if ($rel_path =~ m,/~(.*?)/(.*),) # path has a tilde in it { my $user = $1; my $path = $2; my $home = (getpwnam($user))[7]; # get the user's home directory # and use the value in $usersubdir # to complete the path to the # working directory $query->home ($home); $query->user ('~' . $user); $query->working_dir($home . '/' . $usersubdir . '/' . $path); } else { $query->working_dir($doc_root . $rel_path); # Path to the archive } $query->base_href($server . $rel_path . '/'); # URL for the archive my $dir = $query->working_dir; if (-e "$dir/.marc-search.cfg") { $query->read_config_file; } else { my $name; ($name = $rel_path) =~ s,.*/(.*),$1,; $query->archive_name($name); # Name of the archive } $query->print_form; exit 0; } elsif ($form->{submit} eq 'New Search') # Script called from mid-search { use Marc::Form; my $query = new Marc::Form; # Create search object $query->doc_root ($doc_root); # Document Root $query->script ($script); # Script URL $query->help ($help); # Help URL $query->working_dir ($form->{working_dir}); # Working Directory $query->archive_name ($form->{archive_name}); # Name of the Archive $query->base_href ($form->{base_href}); # Base for URLs $query->home ($form->{home}); # home directory $query->user ($form->{user}); # home directory my $dir = $query->working_dir; if (-e "$dir/.marc-search.cfg") { $query->read_config_file; } $query->print_form; exit 0; } ##------------------------------------------------------------------------## ## This is a search use Marc::Search; my $search = new Marc::Search; # Create search object $search->doc_root ($doc_root); $search->script ($script); $search->usersubdir ($usersubdir); ##------------------------------------------------------------------------## ## Get info from the hidden form fields $search->working_dir ($form->{working_dir}); # Working Directory $search->archive_name ($form->{archive_name}); # Name of the Archive $search->base_href ($form->{base_href}); # Base for URLs $search->home ($form->{home}); # home directory $search->user ($form->{user}); # home directory $search->directories ($form->{directories}); # Directories to search if ($form->{submit} eq 'Continue Searching') { $search->body_count ($form->{body_count}); # Found in Body $search->date_count ($form->{date_count}); # Found in Date $search->from_count ($form->{from_count}); # Found in From Field $search->subj_count ($form->{subj_count}); # Found in Subject $search->previous ($form->{previous}); # End of Last Search } ##------------------------------------------------------------------------## ## Read in custom sort routine my $dir = $search->working_dir; if (-e "$dir/.marc-search.cfg") { $search->read_config_file; } ##------------------------------------------------------------------------## ## Do a little error checking if ( not (defined($search->base_href)) or not (defined($search->working_dir)) or not (defined($search->archive_name)) ) { $search->error("base_href, working_dir, or archive_name were not set"); } elsif ($form->{key_word} =~ /^\s*$/) # User didn't enter any search terms { $search->error("Please enter a search term and try again."); } ##------------------------------------------------------------------------## ## Check Form Options during a search # Use top directory if no subdirectories are chosen $search->directories($search->working_dir) if (not (defined($search->directories))); # set the limit of results per page $search->limit ($form->{limit}); # set the boolean which determines the order in which files are searched $search->age (1) if ( $form->{age} eq 'new' or $form->{age} eq '1'); # set the boolean to control partial word matches $search->match (1) if ( $form->{match} eq 'partial' or $form->{match} eq '1'); ##------------------------------------------------------------------------## ## Process the search term(s) according to user-selected options # Split search terms on whitespace my @words = split(/\s+/,$form->{key_word}); # Keep a copy of the search terms $search->words (\@words); $search->clean_words ($form->{key_word}); # Make another copy for use in a matching function my @clean_words = @words; my $i; for $i (0 .. $#words) { # protect match delimiter $words[$i] =~ s,/,\\/,g; # Anchor words in 'exact' search $words[$i] = '\b' . $words[$i] . '\b' if ($form->{match} eq 'exact'); } # only used for bolding results $search->key_word (join('|',@words)); if ($form->{case} eq 'off') { # turn case boolean on $search->case (1); # Perl5-style case insensitivity $search->key_word ('(?i)' . $search->key_word); } # OR search if ($form->{how} eq 'any') { $search->function2 ($search->match_any(@words)); $search->how ('any'); } # AND search elsif ($form->{how} eq 'all') { $search->function1 ($search->body_match_all(@clean_words,@words)); $search->function2 ($search->match_all(@words)); $search->how ('all'); } # PHRASE search else { $search->function2 ($search->match_this(@words)); $search->how ('phrase'); } ##------------------------------------------------------------------------## ## Establish Search Criteria # The booleans set by these methods determine what areas of the messages # will be searched. $search->subj (defined($form->{subj})); # 'Subject' Header $search->from (defined($form->{from})); # 'From' Header $search->date (defined($form->{date})); # 'Date' Header $search->body (defined($form->{body})); # 'Body' of the mesgs ## Body searching is the default if all boxes are unchecked $search->body (1) if ( not ($search->subj) and not ($search->from) and not ($search->body) and not ($search->date) ); ##------------------------------------------------------------------------## ## The Heart of the Search Engine $search->header; # Print the top of the page my $searched = $search->search; $search->searched($searched); # Read all the files? if ($searched < $search->file_count) {$search->footer(1)} else {$search->footer(0)}; exit 0; # All finished!