#! /usr/bin/perl

BEGIN { require "../../perl/lib/Sitedefs.pm"; }

#######################################################################
#
# Todo:       Add functionality for key values set to "all"
#
# Program:    search.cgi
# Date:       17 June 1998
# Programmer: Laurie Dickinson
# Purpose:    enables user to search Science U, using a query string
#             as well as limiting search by site-determined metatags.
# Logic:      
#             1.  Define and initialize local variables.
#             2.  Get form parameters.
#             3.  If first time through, just display search form.
#             4.  If subsequent times through:
#                 a.  Execute Search:
#                 b.  If no errors, display search data.
#                 c.  If errors, display error data.
#                 d.  Display search form.
#             5.  Display footer.
#
#######################################################################

use ErrorLog;
use CGITracker;
use Exception;
use strict;
use Keywords;
use PageTemplate;
use searchutils;

#- swish-e Variables --------------------------------------------#

#Path to SWISH-E executable
my $swish = "$Sitedefs::SUROOT/local/swish-e/$Sitedefs::OSTYPE/swish-e";

#Path to the SWISH-E index
my $index = "$Sitedefs::SUROOT/local/swish-e/scienceu.index";

#Path to this cgi script
my $searchcgi = "$Sitedefs::ROOTURL/library/search.cgi";

my $navfile = "$Sitedefs::ROOTPATH/library/navigation.html";

#- Main Program ------------------------------------------------------#

#define local variables
my ($tpl, $q, $query, $results, $first, $displaynum, @SU_Doctype, $SU_Doctype, @SU_Section, $SU_Section, @SU_Subject, $SU_Subject, $new, $pagetitle);

#$tpl = new PageTemplate;
$q = new CGITracker;
$tpl = new PageTemplate;

# set display defaults if not set
if ($q->param('results') eq "") {
    $q->param('results','2000');
}
if ($q->param('first') eq "") {
    $q->param('first','1');
}
if ($q->param('displaynum') eq "") {
    $q->param('displaynum','20');
}

if ($q->param('new') eq "yes") { 
    # first time thru, display search form only.

    # set default parameters
    $q->param('query','');
    $q->param('new','no');

    # display header information
    $pagetitle = "Search Engine";
    &printPageTop($pagetitle, $navfile, $q, $tpl);

    # display form
    &printQueryForm($q, $query, $SU_Doctype, $displaynum, $results, $first, $new);

    &printPageBottom($q, $tpl);
}
else {
    # subsequent times through--execute and display search

    # get passed parameters
    $query = $q->param('query');
    $results = $q->param('results');
    $first = $q->param('first');
    $displaynum = $q->param('displaynum');
    @SU_Doctype = ($q->param('SU_Doctype'));
    $SU_Doctype = join (",", @SU_Doctype);
    @SU_Section = $q->param('SU_Section');
    $SU_Section = join (",", @SU_Section);
    @SU_Subject = $q->param('SU_Subject');
    $SU_Subject = join (",", @SU_Subject);

    # execute search and display results
    &do_search($q, $query, $results, $first, $displaynum, $SU_Doctype, $SU_Section, $SU_Subject);

    # reset first item to retrieve to 1 for users using form
    $q->param('first','1');

    # display form
    &printQueryForm($q, $query, $SU_Doctype, $displaynum, $results, $first, $new);
    &printPageBottom($q, $tpl);
}


#- Subroutines -------------------------------------------------------#

#Subroutine for constructing the Swish-E search request and formatting the results.

sub do_search {

#Create your SWISH-E query command

    # local definitions for variables passed in subroutine call
    my($q, $query, $results, $first, $displaynum, @SU_Doctype, $SU_Doctype, @SU_Section, $SU_Section, @SU_Subject, $SU_Subject);
    $q = shift;
    $query = shift;
    $results = shift;
    $first = shift;
    if ($first < 0) { # avoid negative value for first element
	$first = 1;
    }
    $displaynum = shift;
    $SU_Doctype = shift;
    $SU_Section = shift;
    $SU_Subject = shift;

    # local definitions for procedure variables
    my ($count, $swishquery, $num_entries, $keystring, @results, $errormsg);
    my ($stringone, $title, $newtitle, $filesize, $rank, $url, $description, $resultnum, $pagetitle, $desc, $last);

    $count=0;
    $swishquery = $query;

    # parse out SU_Doctype entries
    if ($SU_Doctype eq "") {
	# do nothing
    }
    else {
	$keystring = $SU_Doctype;
	$keystring =~ s/,/ or /g;
	$keystring = "\"su_doctype =(".$keystring.")\"";
	if ($swishquery eq "") {
	    $swishquery = $keystring;
	}
	else {
	    $swishquery = $swishquery." and ".$keystring;
	}
    }

    # parse out SU_Section entries
    if ($SU_Section eq "") {
	# do nothing
    }
    else {
	$keystring = $SU_Section;
	$keystring =~ s/,/ or /g;
	$keystring = "\"su_section =(".$keystring.")\"";
	if ($swishquery eq "") {
	    $swishquery = $keystring;
	}
	else {
	    $swishquery = $swishquery." and ".$keystring;
	}
    }

    # parse out SU_Subject entries
    if ($SU_Subject eq "") {
	# do nothing
    }
    else {
	$keystring = $SU_Subject;
	$keystring =~ s/,/ or /g;
	$keystring = "\"su_subject =(".$keystring.")\"";
	if ($swishquery eq "") {
	    $swishquery = $keystring;
	}
	else {
	    $swishquery = $swishquery." and ".$keystring;
	}
    }

#    print STDERR "final query = $swishquery\n";

    if ($swishquery) {
	# non-blank search query--ok
    }
    else { # blank search query, set error.
	$errormsg = "You must enter at least one search parameter.  Please try again.";
    }

#    print STDERR "Swishquery: $swishquery\n";
#    $swishquery = quotemeta($swishquery); 
#    print STDERR "Swishquery: $swishquery\n";
    open(SWISH, "$swish -w $swishquery -v 0 -m $results -f $index|");

#Check for errors

    while (<SWISH>) {
#	print STDERR $_;
	chop;

	# trapping for swish-e error messages.
	if ($_ eq "err: no results") {
	    $errormsg = "No items matched your search request.  Please Try again.";
	}
	elsif ($_ eq "err: could not open index file") {
	    $errormsg = "Could not open search index $index.";
	}
	elsif ($_ eq "err: no search words specified") {
	    $errormsg = "You must enter at least one search parameter.  Please Try again.";
	}
	elsif ($_ eq "err: a word is too common") {
	    $errormsg = "One of your search terms is too common.  Please Try again.";
	}

	next if /^\D/;
	$count++;
	push(@results, $_);

    }

    #Print search query and results count to file:
    open (LOGFILE, ">>SEARCH_LOG");
    print LOGFILE ("Query: $swishquery; Results: $count\n");
    close LOGFILE;

    if (($count == 0) && ($errormsg eq "")) {
	$errormsg = "No items matched your search request.  Please try again.";
    }

#Print the results page

    if ($errormsg ne "") { # print error page
	&search_error($q, $errormsg);
    }
    else { #print results page
#    $q = new CGITracker qw ( :standard );
	$pagetitle = "Search Engine Results";
	&printPageTop($pagetitle, $navfile, $q, $tpl);
	
	if (($first + $displaynum - 1) < $count) {
	    $last = $first + $displaynum - 1;
	}
	else {
	    $last = $count;
	}
	$q->Print( "<hr><FONT SIZE=-1>");
	if ($query ne "") {
	    $q->Print("<b>Search query string<\/b>: $query.  <br>\n");
	}
	if ($SU_Subject ne "") {
	    my @SubjectList = split(/,/, $SU_Subject);
	    $q->Print("<b>Subject(s)<\/b>:  ");
	    while (@SubjectList) {
		$q->Print( "$Keywords::SU_KeyLabels{pop @SubjectList}. ");
	    }
	    $q->Print("<br>");
	}
	if ($SU_Doctype ne "") {
	    my @ContentList = split(/,/, $SU_Doctype);
	    $q->Print("<b>Content Type(s)<\/b>:  ");
	    while (@ContentList) {
		$q->Print( "$Keywords::SU_Doctype{pop @ContentList}. ");
	    }
	    $q->Print("<br>");
	}
	if ($SU_Section ne "") {
	    my @SectionList = split(/,/, $SU_Section);
	    $q->Print("<b>Section Type(s)<\/b>:  ");
	    while (@SectionList) {
		$q->Print( "$Keywords::SU_Section{pop @SectionList}. ");
	    }
	    $q->Print("<br>");
	}
	$q->Print("<\/FONT>");
	
	&PrintLastNext($q, $first, $displaynum, $count, $SU_Section, $SU_Doctype, $SU_Subject, $query);
	
	$q->Print("<TABLE BORDER=1 WIDTH=100% CELLPADDING=5>\n",
		  "<TR><TD COLSPAN=2 BGCOLOR=\"CCCCFF\"><FONT SIZE=-1 FACE=\"ARIAL,HELVETICA\">Your search returned <b>$count<\/b> Items.  Currently displaying records <b>$first<\/b> through <b>$last<\/b> in order of computed relevance:<\/FONT><\/TD>\n",
		  "<\/TR>\n");
	$resultnum = 0;
	foreach (@results) {
	    $resultnum++;
	    if (($resultnum >= $first) && ($resultnum < $first + $displaynum)) {
		($stringone, $title, $filesize) = split(/\"/, $_);
		$filesize =~ s/ //; # remove leading blank
		($rank, $url) = split(/ /, $stringone);
		$rank = int ($rank/10);
		($newtitle, $description) = &GetTitleDescription( $url );
		if ($newtitle ne "") {
		    $title = $newtitle;
		}
#	    $q->Print("<TR><TD ALIGN=CENTER VALIGN=TOP>$rank</TD>\n");
		$q->Print("<TR><TD ALIGN=CENTER VALIGN=TOP BGCOLOR=\"CFCFCF\">");
		$q->Print("$resultnum</TD>\n");
		if (($resultnum % 2) == 1) {
		    $q->Print("<TD VALIGN=TOP BGCOLOR=\"E7E7E7\">");
		}
		else {
		    $q->Print("<TD VALIGN=TOP BGCOLOR=\"FFFFFF\">");
		}
		$q->Print("<A HREF=\"$url\"><FONT FACE=\"ARIAL,HELVETICA\"><b>$title<\/b><\/FONT><\/A><BR>\n");
		if ($description ne "") {
		    $q->Print("$description<br>");
		}
		else {
		}
		$q->Print("<FONT SIZE=-2><b>URL<\/b>: $url ($filesize\K)<\/FONT>");
		$q->Print("\n</TD></TR>");
	    }
	}
	$q->Print("</TABLE>");
	
	&PrintLastNext($q, $first, $displaynum, $count, $SU_Section, $SU_Doctype, $SU_Subject, $query);
	$q->Print("<hr>\n");
	
    }
}

#
# Subroutine for printing the query form that appears on each
# search page.  The form should "remember" the parameters that
# were set in the last search.
#

sub printQueryForm {
    my ($q, $query, $SU_Doctype, $SU_Subject, $displaynum, $results, $first, $new);
    my (@SU_Doctype, @SU_Subject, @subvalues, @docvalues, @secvalues);
    $q = shift;
    $query = shift;
    $SU_Doctype = shift;
    $displaynum = shift;
    $results = shift;
    $first = shift;

    $new = shift;
    @SU_Doctype = split(",", $SU_Doctype);

    # Retrieve metakey categories from Keywords hash tables
    # for automatic display.
    @subvalues = sort keys %Keywords::SU_Subject2;
    @docvalues = sort keys %Keywords::SU_Doctype;
    @secvalues = sort keys %Keywords::SU_Section;

    $q->Print($q->start_form(-method=>'GET',
			     -action=>$searchcgi),"\n",
	      "<div align=\"right\"><A HREF=\"\#help\"><img src=\"images\/help.gif\" border=\"0\" alt=\"Help!\"><\/A><\/div>",
	      "Search for:<br>\n",
	      $q->textfield(-name=>'query',
				      -default=>$query,
				      -size=>35),"\n",
	      $q->image_button(-name=>'Find it!',
			       -src=>'images/go.gif',
			       -border=>'0',
			       -alt=>'Find it!'),"<br>\n",
	      "<DIV ALIGN=LEFT><FONT SIZE=-1>Display ",
	      $q->popup_menu(-name=>'displaynum',
			     -values=>[qw/10 20 50/], 
			     -default=>$displaynum),
	      " records per page out of a possible ",
	      $q->popup_menu(-name=>'results',
			     -values=>[qw/100 2000 50000/], 
			     -default=>$displaynum),
	      " retrieved.<\/FONT><\/DIV>",
	      $q->hidden(-name=>'first',
			 -value=>'1',
			 -default=>'1'),
	      $q->hidden(-name=>'new',
			 -value=>'no',
			 -default=>'no'),
	      "<TABLE CELLPADDING=6 ALIGN=CENTER WIDTH=\"100%\">\n",
	      "<TR ALIGN=LEFT VALIGN=TOP>\n<TD COLSPAN=2 BGCOLOR=#CFCFCF><FONT FACE=\"ARIAL,HELVETICA\"><b>Select from the criteria below to limit your search</b></FONT>:</TD></TR>",
	      "<TR ALIGN=LEFT VALIGN=TOP>\n<TD BGCOLOR=#E7E7E7><b>Subject<\/b>: ","<br>\n",
	      $q->checkbox_group(-name=>'SU_Subject',
				 -linebreak=>'true',
				 -values=>[@subvalues],
				 -labels=>\%Keywords::SU_KeyLabels,
				 -defaults=>[@SU_Subject]),
	      "</TD>",
	      "<TD BGCOLOR=#CCCCFF ROWSPAN=2><b>Content Type<\/b>: <br>\n",
	      $q->checkbox_group(-name=>'SU_Doctype',
				 -values=>[@docvalues],
				 -linebreak=>'true',
				 -labels=>\%Keywords::SU_Doctype,
				 -defaults=>[@SU_Doctype]),"<br>\n",
"</TD></TR><p>\n<TD BGCOLOR=#E7E7E7>",
	      "<b>Section<\/b>: ","<br>\n",
	      $q->checkbox_group(-name=>'SU_Section',
				 -linebreak=>'true',
				 -values=>[@secvalues],
				 -labels=>\%Keywords::SU_Section,
				 -linebreak=>'true',
				 -defaults=>[@SU_Section]),"</TD></TR>\n",
	      "<TR BGCOLOR=#CFCFCF><TD COLSPAN=2><FONT FACE=\"ARIAL,HELVETICA\"><A NAME=\"help\"><b>Search Help:\n</b></FONT></TD></TR>\n",
	      "<TR BGCOLOR=#FFFFCC><TD COLSPAN=2>\n",
	      "<FONT SIZE=\"-1\"><OL>\n",
	      "<LI><B>Entering a query string:</B> <BR>\n",
	      "If you enter a group of words, the search engine will find all documents that\n",
	      "contain all of the words you enter. You may use the &quot;OR&quot;\n",
	      "and &quot;AND&quot; and &quot;NOT&quot; boolean operators to specify your \n",
	      "search more particularly. For example:<BR>\n",
	      "Query String: geometry polyhedra tetrahedra - found pages will contain\n",
	      "of the terms.<BR>\n",
	      "Query String: geometry AND polyhedra AND tetrahedra - found pages will contain \n",
	      "all of the terms (equivalent to the above).<BR>\n",
	      "Query String: geometry AND polyhedra OR tetrahedra - found pages will contain \n",
	      "both geometry and polyhedra or they will contain tetrahedra. Items at the \n",
	      "beginning of the search string are grouped together first, so this will be \n",
	      "read as (geometry AND polyhedra) or tetrahedra. Note, however, that parentheses \n",
	      "are not allowed in search strings.<BR>\n",
	      "Query String: geometry AND (polyhedra OR tetrahedra) - illegal construction. \n",
	      "No parentheses allowed. <br>\n",
	      "<LI><B>Using Selection Boxes</B>: <BR>\n",
	      "When you choose a selection box you are indicating that you would like to \n",
	      "narrow your search to only the criteria selected. So, for example, if you \n",
	      "type in the word &quot;saturn&quot; in the query box and select &quot;Educational \n",
	      "Materials&quot; and &quot;Simulations&quot; from the Content Type checkboxes, \n",
	      "you will receive a list of pages that contain the word &quot;saturn&quot; \n",
	      "on the page and are either educational materials or online simulations. If \n",
	      "in addition you click the &quot;Observatory&quot; checkbox under the Sections \n",
	      "area, you'll receive only those pages that match the previous search parameters \n",
	      "and are in addition located in the Observatory on the site. (In this case, \n",
	      "the search will probably turn up the same pages, since it is doubtful that \n",
	      "a page containing the word &quot;saturn&quot; will appear anywhere else than \n",
	      "in the Observatory. If, on the other hand, you select &quot;saturn&quot; and \n",
	      "go on to select &quot;Geometry&quot; from the Subjects categories or &quot;Geometry \n",
	      "Center&quot; from the sections categories, chances are your search will turn \n",
	      "up empty.) <br>\n",
	      "<LI><B>Refining Your Search:<BR>\n",
	      "</B>If your search turns up pages that are not quite what you wanted, you \n",
	      "can go down to the bottom of the page and edit any of the search parameters \n",
	      "you entered and resubmit your search query. \n",
	      "</OL></FONT>\n",
	      "</TD></TR></TABLE>\n",
	      $q->end_form,"\n");
    
    my ($output);
    
    return $output;
}

#
# Subroutine for printing error messages.
#   Prints message with appropriate header
#

sub search_error {

#Print the results page
    my ($q, $error_message);
    my $pagetitle;

    $q = shift;
    $error_message = shift;

    $pagetitle = "Search Error";
    &printPageTop($pagetitle, $navfile, $q, $tpl);

    $q->Print( "<P ALIGN=CENTER>
               $error_message
	       <\/P>\n
	       <HR>" );

}



sub PrintLastNext {

    # passed variables
    my ($q, $first, $displaynum, $count, $SU_Section, $SU_Doctype, $SU_Subject, $query);

    $q = shift;
    $first = shift;
    $displaynum = shift;
    $count = shift;
    $SU_Section = shift;
    $SU_Doctype = shift;
    $SU_Subject = shift;
    $query = shift;

    my (@Section, @Doctype, @Subject, $SectionQuery, $DoctypeQuery, $SubjectQuery);
    @Section = split(/,/, $SU_Section);
    @Doctype = split(/,/, $SU_Doctype);
    @Subject = split(/,/, $SU_Subject);

    $SectionQuery = "";
    while (@Section) {
	$SectionQuery = $SectionQuery."\&SU_Section\=".pop(@Section);
    }

    $DoctypeQuery = "";
    while (@Doctype) {
	$DoctypeQuery = $DoctypeQuery."\&SU_Doctype\=".pop(@Doctype);
    }

    $SubjectQuery = "";
    while (@Subject) {
	$SubjectQuery = $SubjectQuery."\&SU_Subject\=".pop(@Subject);
    }

    # procedure variable
    my $newfirst;

    $q->Print("<DIV ALIGN=CENTER>");
    if ($first > 1) {
	$newfirst=$first - $displaynum;
	$q->Print("<A HREF=\"$searchcgi?query=$query$SectionQuery$DoctypeQuery$SubjectQuery&results=$results&first=$newfirst&displaynum=$displaynum\">Previous<\/A>");
    }
    else {
	$q->Print("Previous");
    }
    $q->Print(" / ");
    if (($first + $displaynum - 1) < $count) {
	$newfirst=$first + $displaynum;
	$q->Print("<A HREF=\"$searchcgi?query=$query$SectionQuery$DoctypeQuery$SubjectQuery&results=$results&first=$newfirst&displaynum=$displaynum\">Next<\/A>");
    }
    else {
	$q->Print("Next");
    }
    $q->Print("<\/DIV>");
}


