package Marc::Search; use Marc; use File::Find; use strict; use vars qw($VERSION $AUTOLOAD @MSGFILES @ISA); @ISA = qw(Marc); $VERSION = "4.3"; ##------------------------------------------------------------------------## ## Constructor my %fields = ( age => 0, archive_name => undef, base_href => undef, body => undef, body_count => 0, case => 0, clean_words => undef, date => undef, date_count => 0, directories => undef, doc_root => undef, file_count => 0, from => undef, from_count => 0, function1 => undef, function2 => undef, home => undef, how => undef, key_word => undef, limit => 25, match => 0, previous => undef, searched => 0, script => undef, subj => undef, subj_count => 0, user => undef, usersubdir => undef, words => undef, working_dir => undef, ); sub new { my $class = shift; my $self = Marc->new(\%fields); bless $self, $class; return $self; } ##------------------------------------------------------------------------## ## These accessor methods keep a running count of matches in each area ## PUBLIC METHOD sub body_count { my $self = shift; my $count = shift || 0; return $self->{body_count} += $count; } sub date_count { my $self = shift; my $count = shift || 0; return $self->{date_count} += $count; } sub from_count { my $self = shift; my $count = shift || 0; return $self->{from_count} += $count; } sub subj_count { my $self = shift; my $count = shift || 0; return $self->{subj_count} += $count; } ##------------------------------------------------------------------------## ## Handle Actual Search ## PRIVATE METHOD sub _find_match { my($self,$file,$subj,$from,$date,$body_ref) = @_; my $body_string = ''; my $match = undef; # Check for a match in subject if (($self->subj) && ($_ = $subj) && (&{$self->{function2}})) { $subj =~ s,($self->{key_word}),$1,go; # Bold any matches $self->subj_count(1); # Keeping count $match = 1; # We'll be printing this one } # Check for a match in from if (($self->from) && ($_ = $from) && (&{$self->{function2}})) { $from =~ s,($self->{key_word}),$1,go; $self->from_count(1); $match = 1; } # Check for a match in date if (($self->date) && ($_ = $date) && (&{$self->{function2}})) { $date =~ s,($self->{key_word}),$1,go; $self->date_count(1); $match = 1; } # Is this a full? if (defined($body_ref)) { my @body = @$body_ref; # use routine generated by body_match_all if (defined($self->function1)) { my @words = @{$self->words}; my $i; BODY: for $i (0 .. $#body) { my %matches = (); my $hit = ''; $_ = $body[$i]; my @linematches = &{$self->{function1}}; foreach $hit (@linematches) { # key=searchterm; value=line $matches{$hit} = $i; } # all keys = all terms? if (keys %matches == @words) { # Add to the running total $self->body_count(1); my $line; $match = 1; foreach $hit (sort {$matches{$a} <=> $matches{$b}} keys %matches) { # no duplicates please next if ($matches{$hit} + 1 == $line); # arrays start from 0 $line = $matches{$hit} + 1; $body_string .= "line $line: $body[$matches{$hit}]"; } $body_string =~ s,($self->{key_word}),$1,go; last BODY; } } } # otherwise use routine supplied by match_any or match_this else { my $i; BODY: for $i (0 .. $#body) { if (($_ = $body[$i]) && (&{$self->{function2}})) { ($body_string = $body[($i - 1)] . $body[$i] . $body[($i + 1)]) =~ s,($self->{key_word}),$1,go; $self->body_count(1); $match = 1; last BODY; } } } } if (defined $match) { print "
$subj -- \n","$date\n", "
$from","
$body_string
\n"; } return $match; # 1 if match suceeds; 0 otherwise } ##------------------------------------------------------------------------## ## Build up a list of files to search; read in the relevant portions; ## pass those parts off for checking (and printing if there's a match) ## by the _find_match method ## PUBLIC METHOD sub search { my $self = shift; my $doc_root = $self->doc_root; my $limit = $self->limit; my $previous = $self->previous || 0; my $dirs = $self->directories; my $user = $self->user || ''; my $body = $self->body || 0; my $home = $self->home if (defined($self->home)); my $usersubdir = $self->usersubdir; # More than one directory to search? if (ref($dirs) eq 'ARRAY') { find(\&_get_file_list,@{$dirs}); } # Only only one directory to search else { find(\&_get_file_list,$dirs); } # File::Find returns these in somewhat haphazard order. @MSGFILES = sort @MSGFILES; # Newest files first! @MSGFILES = reverse(@MSGFILES) if $self->age; # The *real* number of files $self->file_count($#MSGFILES); @MSGFILES = splice(@MSGFILES,$previous) if $previous; my $file; my $i = 1; # Arrays are numbered from 0 # Avoid doing a lot of extra math inside the loop $limit += $previous; foreach $file (@MSGFILES) { my ($subj,$from,$date,$body_ref); open(FH,"<$file") or $self->error("Couldn't access file $file: $!"); # Need this loop because newer versions of MHonArc put a version # number on the first line of the message. Just in case Earl # decides to change this again, we'll loop until the subject # comment tag is found. Thanks to Douglas Gray Stephens for # pointing this out, and more importantly, for suggesting a good # solution (though ultimately not the one in place here). That # DGS was able to contribute to this modest little program is, I # think, a good argument in favor of open source code! while () { if (/^/) { $subj = $1; last; } } ($from = ) =~ s/^/$1/; ($date = ) =~ s/^/$1/; if ($body) { while () { # Messages are contained between Body-of-Message tags next unless (/^/); $_ = ; while ($_ !~ /^/) { push(@$body_ref,$_); $_ = ; } last; } } close(FH) or $self->error("Couldn't close file $file: $!"); if ($home) { $file =~ s/$home/\/$user/o; $file =~ s/\/$usersubdir//o; } else { $file =~ s/$doc_root//o; } if ($self->_find_match($file,$subj,$from,$date,$body_ref)) { return ($i + $previous) if ( $self->body_count == $limit or $self->subj_count == $limit or $self->from_count == $limit or $self->date_count == $limit); } $i++; } return $self->file_count + 1; } sub header { my $self = shift; my $base_href = $self->base_href; my $clean_words = $self->clean_words; my $archive_name = $self->archive_name; print <Search Results for MHonArc

Search Results for MHonArc

Search for xxxEOFxxx # Some reminders about the search options that were selected if ($self->how eq 'phrase') {print "this phrase"} elsif ($self->how eq 'any') {print "any of these words"} else {print "all of these words"} print < "$clean_words" xxxEOFxxx if ($self->case) { print "(case Insensitive, " } else { print "(case Sensitive, "} if ($self->match) { print "Partial Word Search)"} else { print "Exact Word Search)"} print < Return to the $archive_name archive
xxxEOFxxx if ($self->age) { print "Newest Messages appear first

\n"; } else { print "Oldest Messages appear first

\n"; } } ##------------------------------------------------------------------------## ## Output Search Results ## PUBLIC METHOD sub footer { my ($self,$continue) = @_; my $num = $self->file_count + 1; # Arrays are numbered from 0 my $body_count = $self->body_count; my $date_count = $self->date_count; my $from_count = $self->from_count; my $subj_count = $self->subj_count; my $body = $self->body || 0; my $subj = $self->subj || 0; my $from = $self->from || 0; my $date = $self->date || 0; my $dirs = $self->directories; my $base_href = $self->base_href; my $archive_name = $self->archive_name; my $clean_words = $self->clean_words; my $working_dir = $self->working_dir; my $script = $self->script; my $how = $self->how; my $home = $self->home; my $user = $self->user; my $age = $self->age; my $case = $self->case; my $match = $self->match; my $limit = $self->limit; my $searched = $self->searched; my $fields = ''; print < Summary of Search Results

Searched $searched out of $num messages...
xxxEOFxxx # No need to print extraneous form fields, so we won't if ($body) { print "
$body_count matches in Body\n
"; $fields .= < EOF } if ($subj) { print "
$subj_count matches in Subject\n
"; $fields .= < EOF } if ($from) { print "
$from_count matches in From\n
"; $fields .= < EOF } if ($date) { print "
$date_count matches in Date\n
"; $fields .= < EOF } if (ref($dirs) eq 'ARRAY') { for (@{$dirs}) { $fields .= < EOF } } else { $fields .= < EOF } print <
$fields xxxEOFxxx print < xxxEOFxxx print < xxxEOFxxx print < MHonArc Search Engine -- Return to the $self->{archive_name} archive
Written by Eric D. Friedman -- friedman\@uci.edu
Based on Jason Lin's code -- jlin\@uci.edu
Last updated March 29, 1997
Please send comments to eee\@uci.edu xxxEOFxxx } ##------------------------------------------------------------------------## ## Function for use with File::Find -- recursive ## PRIVATE METHOD sub _get_file_list { /^msg/ && push @MSGFILES,$File::Find::name; } ##------------------------------------------------------------------------## ## Eval anonymous pattern match functions based on user search terms ## PUBLIC METHOD sub match_any { my $self = shift; my ($tail,$pat); if ($self->case) {$tail = '/i'} else {$tail = '/'}; my $code = < 5; study; EOCODE for $pat (@_) { $code .= <$code"; # use for debugging my $function = eval $code; die "bad pattern: $@" if $@; return $function; } ## PUBLIC METHOD sub body_match_all { my($self,@ret) = @_; my($len) = ($#ret + 1) / 2; my(@pat) = splice(@ret,$len); my $tail; if ($self->case) {$tail = '/i'} else {$tail = '/'}; my $code = < 5; study; EOCODE my $i; for $i (0 .. $#pat) { $code .= <$code"; # used for debugging my $function = eval $code; die "bad pattern: $@" if $@; return $function; } ## PUBLIC METHOD sub match_all { my $self = shift; my ($sep,$tail); if ($self->case) { $sep = "/i && /"; $tail = "/i }"; } else { $sep = "/ && /"; $tail = "/ }"; } my $code = "sub { /" . join ("$sep", @_) . $tail; # print "
$code
"; # debugging my $function = eval $code; die "bad pattern: $@" if $@; return $function; } ## PUBLIC METHOD sub match_this { my $self = shift; my $string = join(' ', @_); $string = '(?i)' . $string if ($self->case); my $code = "sub { /" . $string . "/ }"; # print "
$code
"; # Yep, you guessed it, debugging my $function = eval $code; die "bad pattern: $@" if $@; return $function; } 1;