4 # FILE: SearchEngine.php
6 # Open Source Metadata Archive Search Engine (OSMASE)
7 # Copyright 2002-2011 Edward Almasy and Internet Scout
8 # http://scout.wisc.edu
13 # ---- PUBLIC INTERFACE --------------------------------------------------
15 # possible types of logical operators
19 # flags used for indicating field types
28 # save database object for our use
31 # save item access parameters
35 # define flags used for indicating word states
36 if (!defined(
"WORD_PRESENT")) { define(
"WORD_PRESENT", 1); }
37 if (!defined(
"WORD_EXCLUDED")) { define(
"WORD_EXCLUDED", 2); }
38 if (!defined(
"WORD_REQUIRED")) { define(
"WORD_REQUIRED", 4); }
40 # set default debug state
44 # add field to be searched
46 $FieldName, $DBFieldName, $FieldType, $Weight, $UsedInKeywordSearch)
49 $this->FieldInfo[$FieldName][
"DBFieldName"] = $DBFieldName;
50 $this->FieldInfo[$FieldName][
"FieldType"] = $FieldType;
51 $this->FieldInfo[$FieldName][
"Weight"] = $Weight;
52 $this->FieldInfo[$FieldName][
"InKeywordSearch"] = $UsedInKeywordSearch;
55 # retrieve info about tables and fields (useful for child objects)
59 {
return $this->FieldInfo[$FieldName][
"DBFieldName"]; }
61 {
return $this->FieldInfo[$FieldName][
"FieldType"]; }
63 {
return $this->FieldInfo[$FieldName][
"Weight"]; }
65 {
return $this->FieldInfo[$FieldName][
"InKeywordSearch"]; }
74 # ---- search functions
76 # perform keyword search
77 function Search($SearchString, $StartingResult = 0, $NumberOfResults = 10,
78 $SortByField = NULL, $SortDescending = TRUE)
80 $SearchString = $this->SetDebugLevel($SearchString);
81 $this->
DMsg(0,
"In Search() with search string \"".$SearchString.
"\"");
83 # save start time to use in calculating search time
84 $StartTime = microtime(TRUE);
87 $this->InclusiveTermCount = 0;
88 $this->RequiredTermCount = 0;
89 $this->ExcludedTermCount = 0;
91 # parse search string into terms
92 $Words = $this->ParseSearchStringForWords($SearchString);
93 $this->
DMsg(1,
"Found ".count($Words).
" words");
95 # parse search string for phrases
96 $Phrases = $this->ParseSearchStringForPhrases($SearchString);
97 $this->
DMsg(1,
"Found ".count($Phrases).
" phrases");
99 # if only excluded terms specified
100 if ($this->ExcludedTermCount && !$this->InclusiveTermCount)
103 $this->
DMsg(1,
"Loading all records");
104 $Scores = $this->LoadScoresForAllRecords();
109 $Scores = $this->SearchForWords($Words);
110 $this->
DMsg(1,
"Found ".count($Scores).
" results after word search");
111 $Scores = $this->SearchForPhrases($Phrases, $Scores);
112 $this->
DMsg(1,
"Found ".count($Scores).
" results after phrase search");
115 # if search results found
116 if (count($Scores) > 0)
118 # handle any excluded words
119 $Scores = $this->FilterOnExcludedWords($Words, $Scores);
121 # strip off any results that don't contain required words
122 $Scores = $this->FilterOnRequiredWords($Scores);
125 # count, sort, and trim search result scores list
126 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
127 $SortByField, $SortDescending);
130 $this->LastSearchTime = microtime(TRUE) - $StartTime;
132 # return list of items to caller
133 $this->
DMsg(0,
"Ended up with ".$this->NumberOfResultsAvailable.
" results");
137 # perform search across multiple fields and return trimmed results to caller
138 function FieldedSearch($SearchStrings, $StartingResult = 0, $NumberOfResults = 10,
139 $SortByField = NULL, $SortDescending = TRUE)
141 $SearchStrings = $this->SetDebugLevel($SearchStrings);
142 $this->
DMsg(0,
"In FieldedSearch() with "
143 .count($SearchStrings).
" search strings");
145 # save start time to use in calculating search time
146 $StartTime = microtime(TRUE);
149 $Scores = $this->SearchAcrossFields($SearchStrings);
150 $Scores = ($Scores === NULL) ? array() : $Scores;
152 # count, sort, and trim search result scores list
153 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
154 $SortByField, $SortDescending);
157 $this->LastSearchTime = microtime(TRUE) - $StartTime;
159 # return list of items to caller
160 $this->
DMsg(0,
"Ended up with ".$this->NumberOfResultsAvailable.
" results");
164 # perform search with logical groups of fielded searches
165 function GroupedSearch($SearchGroups, $StartingResult = 0, $NumberOfResults = 10,
166 $SortByField = NULL, $SortDescending = TRUE)
168 foreach ($SearchGroups as $Index => $Groups)
170 if (isset($SearchGroups[$Index][
"SearchStrings"]))
172 $SearchGroups[$Index][
"SearchStrings"] =
173 $this->SetDebugLevel($SearchGroups[$Index][
"SearchStrings"]);
176 $this->
DMsg(0,
"In GroupedSearch() with "
177 .count($SearchGroups).
" search groups");
179 # save start time to use in calculating search time
180 $StartTime = microtime(TRUE);
182 # start with no results
185 # save AND/OR search setting
188 # for each search group
190 foreach ($SearchGroups as $Group)
192 $this->
DMsg(0,
"----- GROUP ---------------------------");
194 # if group has AND/OR setting specified
195 if (isset($Group[
"Logic"]))
197 # use specified AND/OR setting
202 # use saved AND/OR setting
205 $this->
DMsg(2,
"Logic is "
208 # if we have search strings for this group
209 if (isset($Group[
"SearchStrings"]))
212 $GroupScores = $this->SearchAcrossFields($Group[
"SearchStrings"]);
214 # if search was conducted
215 if ($GroupScores !== NULL)
217 # if saved AND/OR setting is OR or this is first search
218 if (($SavedSearchLogic == self::LOGIC_OR) || $FirstSearch)
220 # add search results to result list
221 foreach ($GroupScores as $ItemId => $Score)
223 if (isset($Scores[$ItemId]))
225 $Scores[$ItemId] += $Score;
229 $Scores[$ItemId] = $Score;
233 # (reset flag indicating first search)
234 $FirstSearch = FALSE;
238 # AND search results with previous results
239 $OldScores = $Scores;
241 foreach ($GroupScores as $ItemId => $Score)
243 if (isset($OldScores[$ItemId]))
245 $Scores[$ItemId] = $OldScores[$ItemId] + $Score;
253 # restore AND/OR search setting
256 # count, sort, and trim search result scores list
257 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
258 $SortByField, $SortDescending);
261 $this->LastSearchTime = microtime(TRUE) - $StartTime;
263 # return search results to caller
264 $this->
DMsg(0,
"Ended up with ".$this->NumberOfResultsAvailable.
" results");
268 # add function that will be called to filter search results
271 # save filter function name
272 $this->FilterFuncs[] = $FunctionName;
275 # get or set default search logic (AND or OR)
278 if ($NewSetting != NULL)
304 return $this->SearchTermList;
312 # report total weight for all fields involved in search
316 $IncludedKeywordSearch = FALSE;
317 foreach ($SearchStrings as $FieldName => $SearchStringArray)
319 if ($FieldName ==
"XXXKeywordXXX")
321 $IncludedKeywordSearch = TRUE;
325 $Weight += $this->FieldInfo[$FieldName][
"Weight"];
328 if ($IncludedKeywordSearch)
330 foreach ($this->FieldInfo as $FieldName => $Info)
332 if ($Info[
"InKeywordSearch"])
334 $Weight += $Info[
"Weight"];
342 # ---- search database update functions
344 # update search DB for the specified item
347 # bail out if item ID is negative (indicating a temporary record)
348 if ($ItemId < 0) {
return; }
350 # clear word count added flags for this item
351 unset($this->WordCountAdded);
353 # delete any existing info for this item
354 $this->DB->Query(
"DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
356 # for each metadata field
357 foreach ($this->FieldInfo as $FieldName => $Info)
359 # if search weight for field is positive
360 if ($Info[
"Weight"] > 0)
362 # retrieve text for field
368 # for each text string in array
369 foreach ($Text as $String)
371 # record search info for text
372 $this->RecordSearchInfoForText($ItemId, $FieldName,
373 $Info[
"Weight"], $String,
374 $Info[
"InKeywordSearch"]);
379 # record search info for text
380 $this->RecordSearchInfoForText($ItemId, $FieldName,
381 $Info[
"Weight"], $Text,
382 $Info[
"InKeywordSearch"]);
388 # update search DB for the specified range of items
391 # retrieve IDs for specified number of items starting at specified ID
392 $this->DB->Query(
"SELECT ".$this->
ItemIdFieldName.
" FROM ".$this->ItemTableName
393 .
" WHERE ".$this->ItemIdFieldName.
" >= ".$StartingItemId
394 .
" ORDER BY ".$this->ItemIdFieldName.
" LIMIT ".$NumberOfItems);
397 # for each retrieved item ID
398 foreach ($ItemIds as $ItemId)
400 # update search info for item
404 # return ID of last item updated to caller
408 # drop all data pertaining to item from search DB
411 # drop all entries pertaining to item from word count table
412 $this->DB->Query(
"DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
415 # drop all data pertaining to field from search DB
418 # retrieve our ID for field
419 $FieldId = $this->DB->Query(
"SELECT FieldId FROM SearchFields "
420 .
"WHERE FieldName = '".addslashes($FieldName).
"'",
"FieldId");
422 # drop all entries pertaining to field from word counts table
423 $this->DB->Query(
"DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId.
"\'");
425 # drop field from our fields table
426 $this->DB->Query(
"DELETE FROM SearchFields WHERE FieldId = \'".$FieldId.
"\'");
429 # return total number of terms indexed by search engine
432 return $this->DB->Query(
"SELECT COUNT(*) AS TermCount"
433 .
" FROM SearchWords",
"TermCount");
436 # return total number of items indexed by search engine
439 return $this->DB->Query(
"SELECT COUNT(DISTINCT ItemId) AS ItemCount"
440 .
" FROM SearchWordCounts",
"ItemCount");
451 # asssume no synonyms will be added
455 $WordId = $this->GetWordId($Word, TRUE);
457 # for each synonym passed in
458 foreach ($Synonyms as $Synonym)
461 $SynonymId = $this->GetWordId($Synonym, TRUE);
463 # if synonym is not already in database
464 $this->DB->Query(
"SELECT * FROM SearchWordSynonyms"
465 .
" WHERE (WordIdA = ".$WordId
466 .
" AND WordIdB = ".$SynonymId.
")"
467 .
" OR (WordIdB = ".$WordId
468 .
" AND WordIdA = ".$SynonymId.
")");
469 if ($this->DB->NumRowsSelected() == 0)
471 # add synonym entry to database
472 $this->DB->Query(
"INSERT INTO SearchWordSynonyms"
473 .
" (WordIdA, WordIdB)"
474 .
" VALUES (".$WordId.
", ".$SynonymId.
")");
479 # report to caller number of new synonyms added
487 $WordId = $this->GetWordId($Word);
490 if ($WordId !== NULL)
492 # if no specific synonyms provided
493 if ($Synonyms === NULL)
495 # remove all synonyms for word
496 $this->DB->Query(
"DELETE FROM SearchWordSynonyms"
497 .
" WHERE WordIdA = '".$WordId.
"'"
498 .
" OR WordIdB = '".$WordId.
"'");
502 # for each specified synonym
503 foreach ($Synonyms as $Synonym)
505 # look up ID for synonym
506 $SynonymId = $this->GetWordId($Synonym);
508 # if synonym ID was found
509 if ($SynonymId !== NULL)
511 # delete synonym entry
512 $this->DB->Query(
"DELETE FROM SearchWordSynonyms"
513 .
" WHERE (WordIdA = '".$WordId.
"'"
514 .
" AND WordIdB = '".$SynonymId.
"')"
515 .
" OR (WordIdB = '".$WordId.
"'"
516 .
" AND WordIdA = '".$SynonymId.
"')");
523 # remove all synonyms
526 $this->DB->Query(
"DELETE FROM SearchWordSynonyms");
529 # get synonyms for word (returns array of synonyms)
532 # assume no synonyms will be found
535 # look up ID for word
536 $WordId = $this->GetWordId($Word);
538 # if word ID was found
539 if ($WordId !== NULL)
541 # look up IDs of all synonyms for this word
542 $this->DB->Query(
"SELECT WordIdA, WordIdB FROM SearchWordSynonyms"
543 .
" WHERE WordIdA = ".$WordId
544 .
" OR WordIdB = ".$WordId);
545 $SynonymIds = array();
546 while ($Record = $this->DB->FetchRow)
548 $SynonymIds[] = ($Record[
"WordIdA"] == $WordId)
549 ? $Record[
"WordIdB"] : $Record[
"WordIdA"];
552 # for each synonym ID
553 foreach ($SynonymIds as $SynonymId)
555 # look up synonym word and add to synonym list
556 $Synonyms[] = $this->GetWord($SynonymId);
560 # return synonyms to caller
564 # get all synonyms (returns 2D array w/ words as first index)
567 # assume no synonyms will be found
568 $SynonymList = array();
570 # for each synonym ID pair
572 $OurDB->Query(
"SELECT WordIdA, WordIdB FROM SearchWordSynonyms");
573 while ($Record = $OurDB->FetchRow())
576 $Word = $this->GetWord($Record[
"WordIdA"]);
577 $Synonym = $this->GetWord($Record[
"WordIdB"]);
579 # if we do not already have an entry for the word
580 # or synonym is not listed for this word
581 if (!isset($SynonymList[$Word])
582 || !in_array($Synonym, $SynonymList[$Word]))
584 # add entry for synonym
585 $SynonymList[$Word][] = $Synonym;
588 # if we do not already have an entry for the synonym
589 # or word is not listed for this synonym
590 if (!isset($SynonymList[$Synonym])
591 || !in_array($Word, $SynonymList[$Synonym]))
594 $SynonymList[$Synonym][] = $Word;
599 # (this loop removes reciprocal duplicates)
600 foreach ($SynonymList as $Word => $Synonyms)
602 # for each synonym for that word
603 foreach ($Synonyms as $Synonym)
605 # if synonym has synonyms and word is one of them
606 if (isset($SynonymList[$Synonym])
607 && isset($SynonymList[$Word])
608 && in_array($Word, $SynonymList[$Synonym])
609 && in_array($Synonym, $SynonymList[$Word]))
611 # if word has less synonyms than synonym
612 if (count($SynonymList[$Word])
613 < count($SynonymList[$Synonym]))
615 # remove synonym from synonym list for word
616 $SynonymList[$Word] = array_diff(
617 $SynonymList[$Word], array($Synonym));
619 # if no synonyms left for word
620 if (!count($SynonymList[$Word]))
622 # remove empty synonym list for word
623 unset($SynonymList[$Word]);
628 # remove word from synonym list for synonym
629 $SynonymList[$Synonym] = array_diff(
630 $SynonymList[$Synonym], array($Word));
632 # if no synonyms left for word
633 if (!count($SynonymList[$Synonym]))
635 # remove empty synonym list for word
636 unset($SynonymList[$Synonym]);
643 # sort array alphabetically (just for convenience)
644 foreach ($SynonymList as $Word => $Synonyms)
646 asort($SynonymList[$Word]);
650 # return 2D array of synonyms to caller
654 # set all synonyms (accepts 2D array w/ words as first index)
657 # remove all existing synonyms
660 # for each synonym entry passed in
661 foreach ($SynonymList as $Word => $Synonyms)
663 # add synonyms for word
678 # asssume no synonyms will be added
681 # read in contents of file
682 $Lines = file($FileName, FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
684 # if file contained lines
687 # for each line of file
688 foreach ($Lines as $Line)
690 # if line is not a comment
691 if (!preg_match(
"/[\s]*#/", $Line))
693 # split line into words
694 $Words = preg_split(
"/[\s,]+/", $Line);
697 if (count($Words) > 1)
699 # separate out word and synonyms
700 $Word = array_shift($Words);
709 # return count of synonyms added to caller
713 # suggest alternatives
720 # ---- PRIVATE INTERFACE -------------------------------------------------
733 private $WordCountAdded;
736 private $RequiredTermCount;
737 private $RequiredTermCounts;
738 private $InclusiveTermCount;
739 private $ExcludedTermCount;
740 private $SearchTermList;
745 # ---- common private functions (used in both searching and DB build)
747 # normalize and parse search string into list of search terms
748 private function ParseSearchStringForWords($SearchString, $IgnorePhrases = FALSE)
750 # strip off any surrounding whitespace
751 $Text = trim($SearchString);
753 # set up normalization replacement strings
755 "/'s[^a-z0-9\\-+~]+/i", #
get rid of possessive plurals
756 "/'/", #
get rid of single quotes / apostrophes
757 "/\"[^\"]*\"/", #
get rid of phrases (NOTE: HARD-CODED INDEX BELOW!!!)
"
758 "/\\([^)]*\\)/
", # get rid of groups (NOTE: HARD-CODED INDEX BELOW!!!)
759 "/[^a-z0-9\\-+~]+/i
", # convert non-alphanumerics / non-minus/plus to a space
760 "/([^\\s])-+/i
", # convert minus preceded by anything but whitespace to a space
761 "/([^\\s])\\++/i
", # convert plus preceded by anything but whitespace to a space
762 "/-\\s/i
", # convert minus followed by whitespace to a space
763 "/\\+\\s/i
", # convert plus followed by whitespace to a space
764 "/~\\s/i
", # convert tilde followed by whitespace to a space
765 "/[ ]+/
" # convert multiple spaces to one space
767 $Replacements = array(
781 # if we are supposed to ignore phrases and groups (series of words in quotes or surrounded by parens)
784 # switch phrase removal to double quote removal (HARD-CODED INDEX INTO PATTERN LIST!!)
785 $Patterns[2] = "/\
"/";
787 # switch group removal to paren removal (HARD-CODED INDEX INTO PATTERN LIST!!)
788 $Patterns[3] =
"/[\(\)]+/";
791 # remove punctuation from text and normalize whitespace
792 $Text = preg_replace($Patterns, $Replacements, $Text);
793 $this->
DMsg(2,
"Normalized search string is '".$Text.
"'");
795 # convert text to lower case
796 $Text = strtolower($Text);
798 # strip off any extraneous whitespace
801 # start with an empty array
804 # if we have no words left after parsing
805 if (strlen($Text) != 0)
808 foreach (explode(
" ", $Text) as $Word)
810 # grab first character of word
811 $FirstChar = substr($Word, 0, 1);
813 # strip off option characters and set flags appropriately
814 $Flags = WORD_PRESENT;
815 if ($FirstChar ==
"-")
817 $Word = substr($Word, 1);
818 $Flags |= WORD_EXCLUDED;
819 if (!isset($Words[$Word]))
821 $this->ExcludedTermCount++;
826 if ($FirstChar ==
"~")
828 $Word = substr($Word, 1);
831 || ($FirstChar ==
"+"))
833 if ($FirstChar ==
"+")
835 $Word = substr($Word, 1);
837 $Flags |= WORD_REQUIRED;
838 if (!isset($Words[$Word]))
840 $this->RequiredTermCount++;
843 if (!isset($Words[$Word]))
845 $this->InclusiveTermCount++;
846 $this->SearchTermList[] = $Word;
850 # store flags to indicate word found
851 $Words[$Word] = $Flags;
852 $this->
DMsg(3,
"Word identified (".$Word.
")");
856 # return normalized words to caller
860 protected function GetFieldId($FieldName)
862 # if field ID is not in cache
863 if (!isset($this->FieldIds[$FieldName]))
865 # look up field info in database
866 $this->DB->Query(
"SELECT FieldId FROM SearchFields "
867 .
"WHERE FieldName = '".addslashes($FieldName).
"'");
870 if ($Record = $this->DB->FetchRow())
872 # load info from DB record
873 $FieldId = $Record[
"FieldId"];
877 # add field to database
878 $this->DB->Query(
"INSERT INTO SearchFields (FieldName) "
879 .
"VALUES ('".addslashes($FieldName).
"')");
881 # retrieve ID for newly added field
882 $FieldId = $this->DB->LastInsertId(
"SearchFields");
886 $this->FieldIds[$FieldName] = $FieldId;
889 # return cached ID to caller
890 return $this->FieldIds[$FieldName];
893 # retrieve ID for specified word (returns NULL if no ID found)
894 private function GetWordId($Word, $AddIfNotFound = FALSE)
898 # if word was in ID cache
899 if (isset($WordIdCache[$Word]))
902 $WordId = $WordIdCache[$Word];
906 # look up ID in database
907 $WordId = $this->DB->Query(
"SELECT WordId"
909 .
" WHERE WordText='".addslashes($Word).
"'",
912 # if ID was not found and caller requested it be added
913 if (($WordId === NULL) && $AddIfNotFound)
915 # add word to database
916 $this->DB->Query(
"INSERT INTO SearchWords (WordText)"
917 .
" VALUES ('".addslashes(strtolower($Word)).
"')");
919 # get ID for newly added word
920 $WordId = $this->DB->LastInsertId(
"SearchWords");
924 $WordIdCache[$Word] = $WordId;
927 # return ID to caller
931 # retrieve ID for specified word stem (returns NULL if no ID found)
932 private function GetStemId($Stem, $AddIfNotFound = FALSE)
936 # if stem was in ID cache
937 if (isset($StemIdCache[$Stem]))
940 $StemId = $StemIdCache[$Stem];
944 # look up ID in database
945 $StemId = $this->DB->Query(
"SELECT WordId"
947 .
" WHERE WordText='".addslashes($Stem).
"'",
950 # if ID was not found and caller requested it be added
951 if (($StemId === NULL) && $AddIfNotFound)
953 # add stem to database
954 $this->DB->Query(
"INSERT INTO SearchStems (WordText)"
955 .
" VALUES ('".addslashes(strtolower($Stem)).
"')");
957 # get ID for newly added stem
958 $StemId = $this->DB->LastInsertId(
"SearchStems");
961 # adjust from DB ID value to stem ID value
962 $StemId += self::STEM_ID_OFFSET;
965 $StemIdCache[$Stem] = $StemId;
968 # return ID to caller
972 # retrieve word for specified word ID (returns FALSE if no word found)
973 private function GetWord($WordId)
977 # if word was in cache
978 if (isset($WordCache[$WordId]))
980 # use word from cache
981 $Word = $WordCache[$WordId];
985 # adjust search location and word ID if word is stem
986 $TableName =
"SearchWords";
987 if ($WordId >= self::STEM_ID_OFFSET)
989 $TableName =
"SearchStems";
990 $WordId -= self::STEM_ID_OFFSET;
993 # look up word in database
994 $Word = $this->DB->Query(
"SELECT WordText"
996 .
" WHERE WordId='".$WordId.
"'",
1000 $WordCache[$WordId] = $Word;
1003 # return word to caller
1008 # ---- private functions used in searching
1010 # perform search across multiple fields and return raw results to caller
1011 private function SearchAcrossFields($SearchStrings)
1013 # start by assuming no search will be done
1017 $this->InclusiveTermCount = 0;
1018 $this->RequiredTermCount = 0;
1019 $this->ExcludedTermCount = 0;
1022 $NeedComparisonSearch = FALSE;
1023 foreach ($SearchStrings as $FieldName => $SearchStringArray)
1025 # convert search string to array if needed
1026 if (!is_array($SearchStringArray))
1028 $SearchStringArray = array($SearchStringArray);
1031 # for each search string for this field
1032 foreach ($SearchStringArray as $SearchString)
1034 # if field is keyword or field is text and does not look like comparison match
1035 if (($FieldName ==
"XXXKeywordXXX")
1036 || (isset($this->FieldInfo[$FieldName])
1037 && ($this->FieldInfo[$FieldName][
"FieldType"] == self::FIELDTYPE_TEXT)
1038 && !preg_match(
"/^[><!]=./", $SearchString)
1039 && !preg_match(
"/^[><=]./", $SearchString)))
1041 $this->DMsg(0,
"Searching text field \""
1042 .$FieldName.
"\" for string \"$SearchString\"");
1044 # normalize text and split into words
1045 $Words[$FieldName] =
1046 $this->ParseSearchStringForWords($SearchString);
1048 # calculate scores for matching items
1049 if (count($Words[$FieldName]))
1051 $Scores = $this->SearchForWords(
1052 $Words[$FieldName], $FieldName, $Scores);
1053 $this->DMsg(3,
"Have "
1054 .count($Scores).
" results after word search");
1057 # split into phrases
1058 $Phrases[$FieldName] =
1059 $this->ParseSearchStringForPhrases($SearchString);
1061 # handle any phrases
1062 if (count($Phrases[$FieldName]))
1064 $Scores = $this->SearchForPhrases(
1065 $Phrases[$FieldName], $Scores, $FieldName, TRUE, FALSE);
1066 $this->DMsg(3,
"Have "
1067 .count($Scores).
" results after phrase search");
1072 # set flag to indicate possible comparison search candidate found
1073 $NeedComparisonSearch = TRUE;
1078 # perform comparison searches
1079 if ($NeedComparisonSearch)
1081 $Scores = $this->SearchForComparisonMatches($SearchStrings, $Scores);
1082 $this->DMsg(3,
"Have ".count($Scores).
" results after comparison search");
1085 # if no results found and exclusions specified
1086 if (!count($Scores) && $this->ExcludedTermCount)
1089 $Scores = $this->LoadScoresForAllRecords();
1092 # if search results found
1095 # for each search text string
1096 foreach ($SearchStrings as $FieldName => $SearchStringArray)
1098 # convert search string to array if needed
1099 if (!is_array($SearchStringArray))
1101 $SearchStringArray = array($SearchStringArray);
1104 # for each search string for this field
1105 foreach ($SearchStringArray as $SearchString)
1108 if (($FieldName ==
"XXXKeywordXXX")
1109 || (isset($this->FieldInfo[$FieldName])
1110 && ($this->FieldInfo[$FieldName][
"FieldType"]
1111 == self::FIELDTYPE_TEXT)))
1113 # if there are words in search text
1114 if (isset($Words[$FieldName]))
1116 # handle any excluded words
1117 $Scores = $this->FilterOnExcludedWords($Words[$FieldName], $Scores, $FieldName);
1120 # handle any excluded phrases
1121 if (isset($Phrases[$FieldName]))
1123 $Scores = $this->SearchForPhrases(
1124 $Phrases[$FieldName], $Scores, $FieldName, FALSE, TRUE);
1130 # strip off any results that don't contain required words
1131 $Scores = $this->FilterOnRequiredWords($Scores);
1134 # return search result scores to caller
1138 # search for words in specified field
1139 private function SearchForWords(
1140 $Words, $FieldName =
"XXXKeywordXXX", $Scores = NULL)
1144 # start with empty search result scores list if none passed in
1145 if ($Scores == NULL)
1151 $FieldId = $this->GetFieldId($FieldName);
1154 foreach ($Words as $Word => $Flags)
1156 $this->DMsg(2,
"Searching for word '${Word}' in field ".$FieldName);
1158 # if word is not excluded
1159 if (!($Flags & WORD_EXCLUDED))
1161 # look up record ID for word
1162 $this->DMsg(2,
"Looking up word \"".$Word.
"\"");
1163 $WordId = $this->GetWordId($Word);
1166 if ($WordId !== NULL)
1168 # look up counts for word
1169 $DB->Query(
"SELECT ItemId,Count FROM SearchWordCounts "
1170 .
"WHERE WordId = ".$WordId
1171 .
" AND FieldId = ".$FieldId);
1172 $Counts = $DB->FetchColumn(
"Count",
"ItemId");
1174 # if synonym support is enabled
1175 if ($this->SynonymsEnabled)
1177 # look for any synonyms
1178 $DB->Query(
"SELECT WordIdA, WordIdB"
1179 .
" FROM SearchWordSynonyms"
1180 .
" WHERE WordIdA = ".$WordId
1181 .
" OR WordIdB = ".$WordId);
1183 # if synonyms were found
1184 if ($DB->NumRowsSelected())
1186 # retrieve synonym IDs
1187 $SynonymIds = array();
1188 while ($Record = $DB->FetchRow())
1190 $SynonymIds[] = ($Record[
"WordIdA"] == $WordId)
1191 ? $Record[
"WordIdB"]
1192 : $Record[
"WordIdA"];
1196 foreach ($SynonymIds as $SynonymId)
1198 # retrieve counts for synonym
1199 $DB->Query(
"SELECT ItemId,Count"
1200 .
" FROM SearchWordCounts"
1201 .
" WHERE WordId = ".$SynonymId
1202 .
" AND FieldId = ".$FieldId);
1203 $SynonymCounts = $DB->FetchColumn(
"Count",
"ItemId");
1206 foreach ($SynonymCounts as $ItemId => $Count)
1208 # adjust count because it's a synonym
1209 $AdjustedCount = ceil($Count / 2);
1211 # add count to existing counts
1212 if (isset($Counts[$ItemId]))
1214 $Counts[$ItemId] += $AdjustedCount;
1218 $Counts[$ItemId] = $AdjustedCount;
1226 # if stemming is enabled
1227 if ($this->StemmingEnabled)
1231 $this->DMsg(2,
"Looking up stem \"".$Stem.
"\"");
1232 $StemId = $this->GetStemId($Stem);
1234 # if ID found for stem
1235 if ($StemId !== NULL)
1237 # retrieve counts for stem
1238 $DB->Query(
"SELECT ItemId,Count"
1239 .
" FROM SearchWordCounts"
1240 .
" WHERE WordId = ".$StemId
1241 .
" AND FieldId = ".$FieldId);
1242 $StemCounts = $DB->FetchColumn(
"Count",
"ItemId");
1245 foreach ($StemCounts as $ItemId => $Count)
1247 # adjust count because it's a stem
1248 $AdjustedCount = ceil($Count / 2);
1250 # add count to existing counts
1251 if (isset($Counts[$ItemId]))
1253 $Counts[$ItemId] += $AdjustedCount;
1257 $Counts[$ItemId] = $AdjustedCount;
1263 # if counts were found
1267 foreach ($Counts as $ItemId => $Count)
1269 # if word flagged as required
1270 if ($Flags & WORD_REQUIRED)
1272 # increment required word count for record
1273 if (isset($this->RequiredTermCounts[$ItemId]))
1275 $this->RequiredTermCounts[$ItemId]++;
1279 $this->RequiredTermCounts[$ItemId] = 1;
1283 # add to item record score
1284 if (isset($Scores[$ItemId]))
1286 $Scores[$ItemId] += $Count;
1290 $Scores[$ItemId] = $Count;
1297 # return basic scores to caller
1301 # extract phrases (terms surrounded by quotes) from search string
1302 private function ParseSearchStringForPhrases($SearchString)
1304 # split into chunks delimited by double quote marks
1305 $Pieces = explode(
"\"", $SearchString); #
"
1307 # for each pair of chunks
1310 while ($Index < count($Pieces))
1312 # grab phrase from chunk
1313 $Phrase = trim(addslashes($Pieces[$Index - 1]));
1314 $Flags = WORD_PRESENT;
1316 # grab first character of phrase
1317 $FirstChar = substr($Pieces[$Index - 2], -1);
1319 # set flags to reflect any option characters
1320 if ($FirstChar == "-
")
1322 $Flags |= WORD_EXCLUDED;
1323 if (!isset($Phrases[$Phrase]))
1325 $this->ExcludedTermCount++;
1330 if ((($this->DefaultSearchLogic == self::LOGIC_AND) && ($FirstChar != "~
"))
1331 || ($FirstChar == "+
"))
1333 $Flags |= WORD_REQUIRED;
1334 if (!isset($Phrases[$Phrase]))
1336 $this->RequiredTermCount++;
1339 if (!isset($Phrases[$Phrase]))
1341 $this->InclusiveTermCount++;
1342 $this->SearchTermList[] = $Phrase;
1345 $Phrases[$Phrase] = $Flags;
1347 # move to next pair of chunks
1351 # return phrases to caller
1355 # extract groups (terms surrounded by parens) from search string
1356 # (NOTE: NOT YET IMPLEMENTED!!!)
1357 private function ParseSearchStringForGroups($SearchString)
1359 # split into chunks delimited by open paren
1360 $Pieces = explode("(
", $SearchString);
1364 while ($Index < count($Pieces))
1366 # grab phrase from chunk
1367 $Group = trim(addslashes($Pieces[$Index - 1]));
1370 # move to next pair of chunks
1374 # return phrases to caller
1378 protected function SearchFieldForPhrases($FieldName, $Phrase)
1381 exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\
n");
1384 private function SearchForPhrases($Phrases, $Scores, $FieldName = "XXXKeywordXXX",
1385 $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
1387 # if phrases are found
1388 if (count($Phrases) > 0)
1390 # if this is a keyword search
1391 if ($FieldName ==
"XXXKeywordXXX")
1394 foreach ($this->FieldInfo as $KFieldName => $Info)
1396 # if field is marked to be included in keyword searches
1397 if ($Info[
"InKeywordSearch"])
1399 # call ourself with that field
1400 $Scores = $this->SearchForPhrases($Phrases, $Scores, $KFieldName,
1401 $ProcessNonExcluded, $ProcessExcluded);
1408 foreach ($Phrases as $Phrase => $Flags)
1410 $this->DMsg(2,
"Searching for phrase '".$Phrase
1411 .
"' in field ".$FieldName);
1413 # if phrase flagged as excluded and we are doing excluded phrases
1414 # or phrase flagged as non-excluded and we are doing non-excluded phrases
1415 if (($ProcessExcluded && ($Flags & WORD_EXCLUDED))
1416 || ($ProcessNonExcluded && !($Flags & WORD_EXCLUDED)))
1418 # initialize score list if necessary
1419 if ($Scores === NULL) { $Scores = array(); }
1421 # retrieve list of items that contain phrase
1422 $ItemIds = $this->SearchFieldForPhrases(
1423 $FieldName, $Phrase);
1425 # for each item that contains phrase
1426 foreach ($ItemIds as $ItemId)
1428 # if we are doing excluded phrases and phrase flagged as excluded
1429 if ($ProcessExcluded && ($Flags & WORD_EXCLUDED))
1431 # knock item off of list
1432 unset($Scores[$ItemId]);
1434 elseif ($ProcessNonExcluded)
1436 # calculate phrase value based on number of words and field weight
1437 $PhraseScore = count(preg_split(
"/[\s]+/", $Phrase, -1, PREG_SPLIT_NO_EMPTY))
1438 * $this->FieldInfo[$FieldName][
"Weight"];
1439 $this->DMsg(2,
"Phrase score is ".$PhraseScore);
1441 # bump up item record score
1442 if (isset($Scores[$ItemId]))
1444 $Scores[$ItemId] += $PhraseScore;
1448 $Scores[$ItemId] = $PhraseScore;
1451 # if phrase flagged as required
1452 if ($Flags & WORD_REQUIRED)
1454 # increment required word count for record
1455 if (isset($this->RequiredTermCounts[$ItemId]))
1457 $this->RequiredTermCounts[$ItemId]++;
1461 $this->RequiredTermCounts[$ItemId] = 1;
1471 # return updated scores to caller
1475 private function FilterOnExcludedWords($Words, $Scores, $FieldName =
"XXXKeywordXXX")
1480 $FieldId = $this->GetFieldId($FieldName);
1483 foreach ($Words as $Word => $Flags)
1485 # if word flagged as excluded
1486 if ($Flags & WORD_EXCLUDED)
1488 # look up record ID for word
1489 $WordId = $this->GetWordId($Word);
1492 if ($WordId !== NULL)
1494 # look up counts for word
1495 $DB->Query(
"SELECT ItemId FROM SearchWordCounts "
1496 .
"WHERE WordId=${WordId} AND FieldId=${FieldId}");
1499 while ($Record = $DB->FetchRow())
1501 # if item record is in score list
1502 $ItemId = $Record[
"ItemId"];
1503 if (isset($Scores[$ItemId]))
1505 # remove item record from score list
1506 $this->DMsg(3,
"Filtering out item ".$ItemId
1507 .
" because it contained word \"".$Word.
"\"");
1508 unset($Scores[$ItemId]);
1515 # returned filtered score list to caller
1519 private function FilterOnRequiredWords($Scores)
1521 # if there were required words
1522 if ($this->RequiredTermCount > 0)
1525 foreach ($Scores as $ItemId => $Score)
1527 # if item does not meet required word count
1528 if (!isset($this->RequiredTermCounts[$ItemId])
1529 || ($this->RequiredTermCounts[$ItemId] < $this->RequiredTermCount))
1532 $this->DMsg(4,
"Filtering out item ".$ItemId
1533 .
" because it didn't have required word count of "
1534 .$this->RequiredTermCount
1535 .(isset($this->RequiredTermCounts[$ItemId])
1537 .$this->RequiredTermCounts[$ItemId]
1540 unset($Scores[$ItemId]);
1545 # return filtered list to caller
1549 # count, sort, and trim search result scores list
1550 private function CleanScores($Scores, $StartingResult, $NumberOfResults,
1551 $SortByField, $SortDescending)
1553 # perform any requested filtering
1554 $this->DMsg(0,
"Have ".count($Scores).
" results before filter callbacks");
1555 $Scores = $this->FilterOnSuppliedFunctions($Scores);
1557 # save total number of results available
1558 $this->NumberOfResultsAvailable = count($Scores);
1560 # if no sorting field specified
1561 if ($SortByField === NULL)
1563 # sort result list by score
1564 if ($SortDescending)
1565 arsort($Scores, SORT_NUMERIC);
1567 asort($Scores, SORT_NUMERIC);
1571 # get list of item IDs in sorted order
1572 $SortedIds = $this->GetItemIdsSortedByField(
1573 $SortByField, $SortDescending);
1575 # if we have sorted item IDs
1576 if (count($SortedIds) && count($Scores))
1578 # strip sorted ID list down to those that appear in search results
1579 $SortedIds = array_intersect($SortedIds, array_keys($Scores));
1581 # rebuild score list in sorted order
1582 foreach ($SortedIds as $Id)
1584 $NewScores[$Id] = $Scores[$Id];
1586 $Scores = $NewScores;
1590 # sort result list by score
1591 arsort($Scores, SORT_NUMERIC);
1595 # trim result list to match range requested by caller
1596 $ScoresKeys = array_slice(
1597 array_keys($Scores), $StartingResult, $NumberOfResults);
1598 $TrimmedScores = array();
1599 foreach ($ScoresKeys as $Key) { $TrimmedScores[$Key] = $Scores[$Key]; }
1601 # returned cleaned search result scores list to caller
1602 return $TrimmedScores;
1605 protected function FilterOnSuppliedFunctions($Scores)
1607 # if filter functions have been set
1608 if (isset($this->FilterFuncs))
1611 foreach ($Scores as $ItemId => $Score)
1613 # for each filter function
1614 foreach ($this->FilterFuncs as $FuncName)
1616 # if filter function return TRUE for item
1617 if (call_user_func($FuncName, $ItemId))
1620 $this->DMsg(2,
"Filter callback <i>".$FuncName
1621 .
"</i> rejected item ".$ItemId);
1622 unset($Scores[$ItemId]);
1624 # bail out of filter func loop
1631 # return filtered list to caller
1635 private function SearchForComparisonMatches($SearchStrings, $Scores)
1639 foreach ($SearchStrings as $SearchFieldName => $SearchStringArray)
1641 # if field is not keyword
1642 if ($SearchFieldName !=
"XXXKeywordXXX")
1644 # convert search string to array if needed
1645 if (!is_array($SearchStringArray))
1647 $SearchStringArray = array($SearchStringArray);
1650 # for each search string for this field
1651 foreach ($SearchStringArray as $SearchString)
1653 # if search string looks like comparison search
1654 $FoundOperator = preg_match(
"/^[><!]=./", $SearchString)
1655 || preg_match(
"/^[><=]./", $SearchString);
1657 || (isset($this->FieldInfo[$SearchFieldName][
"FieldType"])
1658 && ($this->FieldInfo[$SearchFieldName][
"FieldType"]
1659 != self::FIELDTYPE_TEXT)))
1662 $Patterns = array(
"/^[><!]=/",
"/^[><=]/");
1663 $Replacements = array(
"",
"");
1664 $Value = trim(preg_replace($Patterns, $Replacements, $SearchString));
1666 # determine and save operator
1667 if (!$FoundOperator)
1669 $Operators[$Index] =
"=";
1673 $Term = trim($SearchString);
1674 $FirstChar = $Term{0};
1675 $FirstTwoChars = $FirstChar.$Term{1};
1676 if ($FirstTwoChars ==
">=") { $Operators[$Index] =
">="; }
1677 elseif ($FirstTwoChars ==
"<=") { $Operators[$Index] =
"<="; }
1678 elseif ($FirstTwoChars ==
"!=") { $Operators[$Index] =
"!="; }
1679 elseif ($FirstChar ==
">") { $Operators[$Index] =
">"; }
1680 elseif ($FirstChar ==
"<") { $Operators[$Index] =
"<"; }
1681 elseif ($FirstChar ==
"=") { $Operators[$Index] =
"="; }
1684 # if operator was found
1685 if (isset($Operators[$Index]))
1688 $Values[$Index] = $Value;
1691 $FieldNames[$Index] = $SearchFieldName;
1692 $this->DMsg(3,
"Added comparison (field = <i>"
1693 .$FieldNames[$Index].
"</i> op = <i>"
1694 .$Operators[$Index].
"</i> val = <i>"
1695 .$Values[$Index].
"</i>)");
1697 # move to next comparison array entry
1705 # if comparisons found
1706 if (isset($Operators))
1708 # perform comparisons on fields and gather results
1709 $Results = $this->SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values);
1711 # if search logic is set to AND
1712 if ($this->DefaultSearchLogic == self::LOGIC_AND)
1714 # if results were found
1715 if (count($Results))
1717 # if there were no prior results and no terms for keyword search
1718 if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0))
1720 # add all results to scores
1721 foreach ($Results as $ItemId)
1723 $Scores[$ItemId] = 1;
1728 # remove anything from scores that is not part of results
1729 foreach ($Scores as $ItemId => $Score)
1731 if (in_array($ItemId, $Results) == FALSE)
1733 unset($Scores[$ItemId]);
1746 # add result items to scores
1747 if ($Scores === NULL) { $Scores = array(); }
1748 foreach ($Results as $ItemId)
1750 if (isset($Scores[$ItemId]))
1752 $Scores[$ItemId] += 1;
1756 $Scores[$ItemId] = 1;
1762 # return results to caller
1766 private function SetDebugLevel($SearchStrings)
1768 # if search info is an array
1769 if (is_array($SearchStrings))
1771 # for each array element
1772 foreach ($SearchStrings as $FieldName => $SearchStringArray)
1774 # if element is an array
1775 if (is_array($SearchStringArray))
1777 # for each array element
1778 foreach ($SearchStringArray as $Index => $SearchString)
1780 # pull out search string if present
1781 $SearchStrings[$FieldName][$Index] = $this->ExtractDebugLevel($SearchString);
1786 # pull out search string if present
1787 $SearchStrings[$FieldName] = $this->ExtractDebugLevel($SearchStringArray);
1793 # pull out search string if present
1794 $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
1797 # return new search info to caller
1798 return $SearchStrings;
1801 private function ExtractDebugLevel($SearchString)
1803 # if search string contains debug level indicator
1804 if (strstr($SearchString,
"DBUGLVL="))
1806 # remove indicator and set debug level
1807 $Level = preg_replace(
"/^\\s*DBUGLVL=([1-9]{1,2}).*/",
"\\1", $SearchString);
1810 $this->DebugLevel = $Level;
1811 $this->DMsg(0,
"Setting debug level to ".$Level);
1812 $SearchString = preg_replace(
"/DBUGLVL=${Level}/",
"", $SearchString);
1816 # return (possibly) modified search string to caller
1817 return $SearchString;
1820 # load and return search result scores array containing all possible records
1821 private function LoadScoresForAllRecords()
1823 # start with empty list
1827 $this->DB->Query(
"SELECT ".$this->ItemIdFieldName
1828 .
" FROM ".$this->ItemTableName);
1829 while ($Record = $this->DB->FetchRow())
1831 # set score for item to 1
1832 $Scores[$Record[$this->ItemIdFieldName]] = 1;
1835 # return array with all scores to caller
1840 # ---- private functions used in building search database
1849 private function UpdateWordCount($Word, $ItemId, $FieldId, $Weight = 1)
1851 # retrieve ID for word
1852 $WordIds[] = $this->GetWordId($Word, TRUE);
1854 # if stemming is enabled
1855 if ($this->StemmingEnabled)
1857 # retrieve ID for stem of word
1859 $WordIds[] = $this->GetStemId($Stem, TRUE);
1862 # for word and stem of word
1863 foreach ($WordIds as $WordId)
1865 # if word count already added to database
1866 if (isset($this->WordCountAdded[$WordId][$FieldId]))
1869 $this->DB->Query(
"UPDATE SearchWordCounts SET Count=Count+".$Weight
1870 .
" WHERE WordId=".$WordId
1871 .
" AND ItemId=".$ItemId
1872 .
" AND FieldId=".$FieldId);
1876 # add word count to DB
1877 $this->DB->Query(
"INSERT INTO SearchWordCounts"
1878 .
" (WordId, ItemId, FieldId, Count) VALUES"
1879 .
" (".$WordId.
", ".$ItemId.
", ".$FieldId.
", ".$Weight.
")");
1881 # remember that we added count for this word
1882 $this->WordCountAdded[$WordId][$FieldId] = TRUE;
1885 # decrease weight for stem
1886 $Weight = ceil($Weight / 2);
1890 protected function GetFieldContent($ItemId, $FieldName)
1893 exit(
"<br>SE - ERROR: GetFieldContent() not implemented<br>\n");
1896 private function RecordSearchInfoForText(
1897 $ItemId, $FieldName, $Weight, $Text, $IncludeInKeyword)
1900 $Words = $this->ParseSearchStringForWords($Text, TRUE);
1902 # if there was text left after parsing
1903 if (count($Words) > 0)
1906 $FieldId = $this->GetFieldId($FieldName);
1908 # if text should be included in keyword searches
1909 if ($IncludeInKeyword)
1911 # get ID for keyword field
1912 $KeywordFieldId = $this->GetFieldId(
"XXXKeywordXXX");
1916 foreach ($Words as $Word => $Flags)
1918 # update count for word
1919 $this->UpdateWordCount($Word, $ItemId, $FieldId);
1921 # if text should be included in keyword searches
1922 if ($IncludeInKeyword)
1924 # update keyword field count for word
1925 $this->UpdateWordCount(
1926 $Word, $ItemId, $KeywordFieldId, $Weight);
1932 # print debug message if level set high enough
1933 protected function DMsg($Level, $Msg)
1935 if ($this->DebugLevel > $Level)
1937 print(
"SE: ".$Msg.
"<br>\n");
1941 # ---- BACKWARD COMPATIBILITY --------------------------------------------
1943 # possible types of logical operators
1944 const SEARCHLOGIC_AND = 1;
1945 const SEARCHLOGIC_OR = 2;