CWIS Developer Documentation
SearchEngine.php
Go to the documentation of this file.
1 <?PHP
2 #
3 # FILE: SearchEngine.php
4 #
5 # Open Source Metadata Archive Search Engine (OSMASE)
6 # Copyright 2002-2016 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu
8 #
9 
13 abstract class SearchEngine
14 {
15 
16  # ---- PUBLIC INTERFACE --------------------------------------------------
17 
18  # possible types of logical operators
19  const LOGIC_AND = 1;
20  const LOGIC_OR = 2;
21 
22  # flags used for indicating field types
23  const FIELDTYPE_TEXT = 1;
24  const FIELDTYPE_NUMERIC = 2;
25  const FIELDTYPE_DATE = 3;
27 
28  # flags used for indicating word states
29  const WORD_PRESENT = 1;
30  const WORD_EXCLUDED = 2;
31  const WORD_REQUIRED = 4;
32 
41  public function __construct(
43  {
44  # create database object for our use
45  $this->DB = new Database();
46 
47  # save item access parameters
48  $this->ItemTableName = $ItemTableName;
49  $this->ItemIdFieldName = $ItemIdFieldName;
50  $this->ItemTypeFieldName = $ItemTypeFieldName;
51 
52  # set default debug state
53  $this->DebugLevel = 0;
54  }
55 
66  public function AddField($FieldId, $FieldType, $ItemTypes,
67  $Weight, $UsedInKeywordSearch)
68  {
69  # save values
70  $this->FieldInfo[$FieldId]["FieldType"] = $FieldType;
71  $this->FieldInfo[$FieldId]["Weight"] = $Weight;
72  $this->FieldInfo[$FieldId]["InKeywordSearch"] =
73  $UsedInKeywordSearch ? TRUE : FALSE;
74  $this->FieldInfo[$FieldId]["ItemTypes"] = is_array($ItemTypes)
75  ? $ItemTypes : array($ItemTypes);
76  }
77 
83  public function FieldType($FieldId)
84  {
85  return $this->FieldInfo[$FieldId]["FieldType"];
86  }
87 
93  public function FieldWeight($FieldId)
94  {
95  return $this->FieldInfo[$FieldId]["Weight"];
96  }
97 
103  public function FieldInKeywordSearch($FieldId)
104  {
105  return $this->FieldInfo[$FieldId]["InKeywordSearch"];
106  }
107 
112  public function DebugLevel($NewValue)
113  {
114  $this->DebugLevel = $NewValue;
115  }
116 
117 
118  # ---- search functions
119 
130  public function Search($SearchParams)
131  {
132  return self::FlattenMultiTypeResults($this->SearchAll($SearchParams));
133  }
134 
144  public function SearchAll($SearchParams)
145  {
146  # if keyword search string was passed in
147  if (is_string($SearchParams))
148  {
149  # convert string to search parameter set
150  $SearchString = $SearchParams;
151  $SearchParams = new SearchParameterSet();
152  $SearchParams->AddParameter($SearchString);
153  }
154 
155  # interpret and filter out magic debugging keyword (if any)
156  $KeywordStrings = $SearchParams->GetKeywordSearchStrings();
157  foreach ($KeywordStrings as $String)
158  {
159  $FilteredString = $this->ExtractDebugLevel($String);
160  if ($FilteredString != $String)
161  {
162  $SearchParams->RemoveParameter($String);
163  $SearchParams->AddParameter($FilteredString);
164  }
165  }
166  $this->DMsg(0, "Description: ".$SearchParams->TextDescription());
167 
168  # save start time to use in calculating search time
169  $StartTime = microtime(TRUE);
170 
171  # clear parsed search term list
172  $this->SearchTermList = array();
173 
174  # perform search
175  $Scores = $this->RawSearch($SearchParams);
176 
177  # count, sort, and trim search result scores list
178  $Scores = $this->SortScores($Scores, $SearchParams->SortBy(),
179  $SearchParams->SortDescending());
180 
181  # record search time
182  $this->LastSearchTime = microtime(TRUE) - $StartTime;
183 
184  # return search results to caller
185  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
186  return $Scores;
187  }
188 
193  public function AddResultFilterFunction($FunctionName)
194  {
195  # save filter function name
196  $this->FilterFuncs[] = $FunctionName;
197  }
198 
205  public function NumberOfResults($ItemType = NULL)
206  {
207  return ($ItemType === NULL) ? $this->NumberOfResultsAvailable
208  : (isset($this->NumberOfResultsPerItemType[$ItemType])
209  ? $this->NumberOfResultsPerItemType[$ItemType] : 0);
210  }
211 
216  public function SearchTerms()
217  {
218  return $this->SearchTermList;
219  }
220 
225  public function SearchTime()
226  {
227  return $this->LastSearchTime;
228  }
229 
236  public function FieldedSearchWeightScale($SearchParams)
237  {
238  $Weight = 0;
239  $FieldIds = $SearchParams->GetFields();
240  foreach ($FieldIds as $FieldId)
241  {
242  if (array_key_exists($FieldId, $this->FieldInfo))
243  {
244  $Weight += $this->FieldInfo[$FieldId]["Weight"];
245  }
246  }
247  if (count($SearchParams->GetKeywordSearchStrings()))
248  {
249  foreach ($this->FieldInfo as $FieldId => $Info)
250  {
251  if ($Info["InKeywordSearch"])
252  {
253  $Weight += $Info["Weight"];
254  }
255  }
256  }
257  return $Weight;
258  }
259 
260 
261  # ---- search database update functions
262 
268  public function UpdateForItem($ItemId, $ItemType)
269  {
270  # clear word count added flags for this item
271  unset($this->WordCountAdded);
272 
273  # delete any existing info for this item
274  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
275  $this->DB->Query("DELETE FROM SearchItemTypes WHERE ItemId = ".$ItemId);
276 
277  # save item type
278  $this->DB->Query("INSERT INTO SearchItemTypes (ItemId, ItemType)"
279  ." VALUES (".intval($ItemId).", ".intval($ItemType).")");
280 
281  # for each metadata field
282  foreach ($this->FieldInfo as $FieldId => $Info)
283  {
284  # if valid search weight for field and field applies to this item
285  if (($Info["Weight"] > 0)
286  && in_array($ItemType, $Info["ItemTypes"]))
287  {
288  # retrieve text for field
289  $Text = $this->GetFieldContent($ItemId, $FieldId);
290 
291  # if text is array
292  if (is_array($Text))
293  {
294  # for each text string in array
295  foreach ($Text as $String)
296  {
297  # record search info for text
298  $this->RecordSearchInfoForText($ItemId, $FieldId,
299  $Info["Weight"], $String,
300  $Info["InKeywordSearch"]);
301  }
302  }
303  else
304  {
305  # record search info for text
306  $this->RecordSearchInfoForText($ItemId, $FieldId,
307  $Info["Weight"], $Text,
308  $Info["InKeywordSearch"]);
309  }
310  }
311  }
312  }
313 
320  public function UpdateForItems($StartingItemId, $NumberOfItems)
321  {
322  # retrieve IDs for specified number of items starting at specified ID
323  $this->DB->Query("SELECT ".$this->ItemIdFieldName.", ".$this->ItemTypeFieldName
324  ." FROM ".$this->ItemTableName
325  ." WHERE ".$this->ItemIdFieldName." >= ".$StartingItemId
326  ." ORDER BY ".$this->ItemIdFieldName." LIMIT ".$NumberOfItems);
327  $ItemIds = $this->DB->FetchColumn(
328  $this->ItemTypeFieldName, $this->ItemIdFieldName);
329 
330  # for each retrieved item ID
331  foreach ($ItemIds as $ItemId => $ItemType)
332  {
333  # update search info for item
334  $this->UpdateForItem($ItemId, $ItemType);
335  }
336 
337  # return ID of last item updated to caller
338  return $ItemId;
339  }
340 
345  public function DropItem($ItemId)
346  {
347  # drop all entries pertaining to item from word count table
348  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
349  $this->DB->Query("DELETE FROM SearchItemTypes WHERE ItemId = ".$ItemId);
350  }
351 
356  public function DropField($FieldId)
357  {
358  # drop all entries pertaining to field from word counts table
359  $this->DB->Query("DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId."\'");
360  }
361 
366  public function SearchTermCount()
367  {
368  return $this->DB->Query("SELECT COUNT(*) AS TermCount"
369  ." FROM SearchWords", "TermCount");
370  }
371 
376  public function ItemCount()
377  {
378  return $this->DB->Query("SELECT COUNT(DISTINCT ItemId) AS ItemCount"
379  ." FROM SearchWordCounts", "ItemCount");
380  }
381 
389  public function AddSynonyms($Word, $Synonyms)
390  {
391  # asssume no synonyms will be added
392  $AddCount = 0;
393 
394  # get ID for word
395  $WordId = $this->GetWordId($Word, TRUE);
396 
397  # for each synonym passed in
398  foreach ($Synonyms as $Synonym)
399  {
400  # get ID for synonym
401  $SynonymId = $this->GetWordId($Synonym, TRUE);
402 
403  # if synonym is not already in database
404  $this->DB->Query("SELECT * FROM SearchWordSynonyms"
405  ." WHERE (WordIdA = ".$WordId
406  ." AND WordIdB = ".$SynonymId.")"
407  ." OR (WordIdB = ".$WordId
408  ." AND WordIdA = ".$SynonymId.")");
409  if ($this->DB->NumRowsSelected() == 0)
410  {
411  # add synonym entry to database
412  $this->DB->Query("INSERT INTO SearchWordSynonyms"
413  ." (WordIdA, WordIdB)"
414  ." VALUES (".$WordId.", ".$SynonymId.")");
415  $AddCount++;
416  }
417  }
418 
419  # report to caller number of new synonyms added
420  return $AddCount;
421  }
422 
429  public function RemoveSynonyms($Word, $Synonyms = NULL)
430  {
431  # find ID for word
432  $WordId = $this->GetWordId($Word);
433 
434  # if ID found
435  if ($WordId !== NULL)
436  {
437  # if no specific synonyms provided
438  if ($Synonyms === NULL)
439  {
440  # remove all synonyms for word
441  $this->DB->Query("DELETE FROM SearchWordSynonyms"
442  ." WHERE WordIdA = '".$WordId."'"
443  ." OR WordIdB = '".$WordId."'");
444  }
445  else
446  {
447  # for each specified synonym
448  foreach ($Synonyms as $Synonym)
449  {
450  # look up ID for synonym
451  $SynonymId = $this->GetWordId($Synonym);
452 
453  # if synonym ID was found
454  if ($SynonymId !== NULL)
455  {
456  # delete synonym entry
457  $this->DB->Query("DELETE FROM SearchWordSynonyms"
458  ." WHERE (WordIdA = '".$WordId."'"
459  ." AND WordIdB = '".$SynonymId."')"
460  ." OR (WordIdB = '".$WordId."'"
461  ." AND WordIdA = '".$SynonymId."')");
462  }
463  }
464  }
465  }
466  }
467 
471  public function RemoveAllSynonyms()
472  {
473  $this->DB->Query("DELETE FROM SearchWordSynonyms");
474  }
475 
481  public function GetSynonyms($Word)
482  {
483  # assume no synonyms will be found
484  $Synonyms = array();
485 
486  # look up ID for word
487  $WordId = $this->GetWordId($Word);
488 
489  # if word ID was found
490  if ($WordId !== NULL)
491  {
492  # look up IDs of all synonyms for this word
493  $this->DB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms"
494  ." WHERE WordIdA = ".$WordId
495  ." OR WordIdB = ".$WordId);
496  $SynonymIds = array();
497  while ($Record = $this->DB->FetchRow)
498  {
499  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
500  ? $Record["WordIdB"] : $Record["WordIdA"];
501  }
502 
503  # for each synonym ID
504  foreach ($SynonymIds as $SynonymId)
505  {
506  # look up synonym word and add to synonym list
507  $Synonyms[] = $this->GetWord($SynonymId);
508  }
509  }
510 
511  # return synonyms to caller
512  return $Synonyms;
513  }
514 
519  public function GetAllSynonyms()
520  {
521  # assume no synonyms will be found
522  $SynonymList = array();
523 
524  # for each synonym ID pair
525  $OurDB = new Database();
526  $OurDB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms");
527  while ($Record = $OurDB->FetchRow())
528  {
529  # look up words
530  $Word = $this->GetWord($Record["WordIdA"]);
531  $Synonym = $this->GetWord($Record["WordIdB"]);
532 
533  # if we do not already have an entry for the word
534  # or synonym is not listed for this word
535  if (!isset($SynonymList[$Word])
536  || !in_array($Synonym, $SynonymList[$Word]))
537  {
538  # add entry for synonym
539  $SynonymList[$Word][] = $Synonym;
540  }
541 
542  # if we do not already have an entry for the synonym
543  # or word is not listed for this synonym
544  if (!isset($SynonymList[$Synonym])
545  || !in_array($Word, $SynonymList[$Synonym]))
546  {
547  # add entry for word
548  $SynonymList[$Synonym][] = $Word;
549  }
550  }
551 
552  # for each word
553  # (this loop removes reciprocal duplicates)
554  foreach ($SynonymList as $Word => $Synonyms)
555  {
556  # for each synonym for that word
557  foreach ($Synonyms as $Synonym)
558  {
559  # if synonym has synonyms and word is one of them
560  if (isset($SynonymList[$Synonym])
561  && isset($SynonymList[$Word])
562  && in_array($Word, $SynonymList[$Synonym])
563  && in_array($Synonym, $SynonymList[$Word]))
564  {
565  # if word has less synonyms than synonym
566  if (count($SynonymList[$Word])
567  < count($SynonymList[$Synonym]))
568  {
569  # remove synonym from synonym list for word
570  $SynonymList[$Word] = array_diff(
571  $SynonymList[$Word], array($Synonym));
572 
573  # if no synonyms left for word
574  if (!count($SynonymList[$Word]))
575  {
576  # remove empty synonym list for word
577  unset($SynonymList[$Word]);
578  }
579  }
580  else
581  {
582  # remove word from synonym list for synonym
583  $SynonymList[$Synonym] = array_diff(
584  $SynonymList[$Synonym], array($Word));
585 
586  # if no synonyms left for word
587  if (!count($SynonymList[$Synonym]))
588  {
589  # remove empty synonym list for word
590  unset($SynonymList[$Synonym]);
591  }
592  }
593  }
594  }
595  }
596 
597  # sort array alphabetically (just for convenience)
598  foreach ($SynonymList as $Word => $Synonyms)
599  {
600  asort($SynonymList[$Word]);
601  }
602  ksort($SynonymList);
603 
604  # return 2D array of synonyms to caller
605  return $SynonymList;
606  }
607 
613  public function SetAllSynonyms($SynonymList)
614  {
615  # remove all existing synonyms
616  $this->RemoveAllSynonyms();
617 
618  # for each synonym entry passed in
619  foreach ($SynonymList as $Word => $Synonyms)
620  {
621  # add synonyms for word
622  $this->AddSynonyms($Word, $Synonyms);
623  }
624  }
625 
634  public function LoadSynonymsFromFile($FileName)
635  {
636  # asssume no synonyms will be added
637  $AddCount = 0;
638 
639  # read in contents of file
640  $Lines = file($FileName, FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
641 
642  # if file contained lines
643  if (count($Lines))
644  {
645  # for each line of file
646  foreach ($Lines as $Line)
647  {
648  # if line is not a comment
649  if (!preg_match("/[\s]*#/", $Line))
650  {
651  # split line into words
652  $Words = preg_split("/[\s,]+/", $Line);
653 
654  # if synonyms found
655  if (count($Words) > 1)
656  {
657  # separate out word and synonyms
658  $Word = array_shift($Words);
659 
660  # add synonyms
661  $AddCount += $this->AddSynonyms($Word, $Words);
662  }
663  }
664  }
665  }
666 
667  # return count of synonyms added to caller
668  return $AddCount;
669  }
670 
678  public static function FlattenMultiTypeResults($Results)
679  {
680  $FlatScores = [];
681  foreach ($Results as $ItemType => $ItemScores)
682  {
683  $FlatScores += $ItemScores;
684  }
685 
686  return $FlatScores;
687  }
688 
696  public static function BuildMultiTypeResults($Results)
697  {
698  $DB = new Database();
699  $DB->Query("SELECT * FROM SearchItemTypes");
700  $ItemTypes = $DB->FetchColumn("ItemType", "ItemId");
701 
702  $SplitScores = [];
703  foreach ($Results as $ItemId => $ItemScore)
704  {
705  $ItemType = $ItemTypes[$ItemId];
706  $SplitScores[$ItemType][$ItemId] = $ItemScore;
707  }
708 
709  return $SplitScores;
710  }
711 
712  # ---- PRIVATE INTERFACE -------------------------------------------------
713 
714  protected $DB;
715  protected $DebugLevel;
716  protected $FilterFuncs;
717  protected $ItemIdFieldName;
718  protected $ItemTableName;
720  protected $LastSearchTime;
723  protected $StemmingEnabled = TRUE;
724  protected $SynonymsEnabled = TRUE;
725 
726  private $ExcludedTermCount;
727  private $FieldIds;
728  private $FieldInfo;
729  private $InclusiveTermCount;
730  private $RequiredTermCount;
731  private $RequiredTermCounts;
732  private $SearchTermList;
733  private $WordCountAdded;
734 
735  const KEYWORD_FIELD_ID = -100;
736  const STEM_ID_OFFSET = 1000000;
737 
738 
739  # ---- private methods (searching)
740 
748  private function RawSearch(SearchParameterSet $SearchParams)
749  {
750  # retrieve search strings
751  $SearchStrings = $SearchParams->GetSearchStrings();
752  $KeywordSearchStrings = $SearchParams->GetKeywordSearchStrings();
753 
754  # add keyword searches (if any) to fielded searches
755  if (count($KeywordSearchStrings))
756  {
757  $SearchStrings[self::KEYWORD_FIELD_ID] = $KeywordSearchStrings;
758  }
759 
760  # normalize search strings
761  $NormalizedSearchStrings = array();
762  foreach ($SearchStrings as $FieldId => $SearchStringArray)
763  {
764  if (!is_array($SearchStringArray))
765  {
766  $SearchStringArray = array($SearchStringArray);
767  }
768  foreach ($SearchStringArray as $String)
769  {
770  $String = trim($String);
771  if (strlen($String))
772  {
773  $NormalizedSearchStrings[$FieldId][] = $String;
774  }
775  }
776  }
777  $SearchStrings = $NormalizedSearchStrings;
778 
779  # if we have strings to search for
780  if (count($SearchStrings))
781  {
782  # perform search
783  $Scores = $this->SearchAcrossFields(
784  $SearchStrings, $SearchParams->Logic());
785  }
786 
787  # for each subgroup
788  foreach ($SearchParams->GetSubgroups() as $Subgroup)
789  {
790  # perform subgroup search
791  $NewScores = $this->RawSearch($Subgroup);
792 
793  # added subgroup search scores to previous scores as appropriate
794  if (isset($Scores))
795  {
796  $Scores = $this->CombineScores(
797  $Scores, $NewScores, $SearchParams->Logic());
798  }
799  else
800  {
801  $Scores = $NewScores;
802  }
803  }
804  if (isset($NewScores))
805  {
806  $this->DMsg(2, "Have ".count($Scores)
807  ." results after subgroup processing");
808  }
809 
810  # pare down results to just allowed item types (if specified)
811  if ($SearchParams->ItemTypes())
812  {
813  $AllowedItemTypes = $SearchParams->ItemTypes();
814  foreach ($Scores as $ItemId => $Score)
815  {
816  if (!in_array($this->GetItemType($ItemId), $AllowedItemTypes))
817  {
818  unset($Scores[$ItemId]);
819  }
820  }
821  $this->DMsg(3, "Have ".count($Scores)
822  ." results after paring to allowed item types");
823  }
824 
825  # return search results to caller
826  return isset($Scores) ? $Scores : array();
827  }
828 
836  private function CombineScores($ScoresA, $ScoresB, $Logic)
837  {
838  if ($Logic == "OR")
839  {
840  $Scores = $ScoresA;
841  foreach ($ScoresB as $ItemId => $Score)
842  {
843  if (isset($Scores[$ItemId]))
844  {
845  $Scores[$ItemId] += $Score;
846  }
847  else
848  {
849  $Scores[$ItemId] = $Score;
850  }
851  }
852  }
853  else
854  {
855  $Scores = array();
856  foreach ($ScoresA as $ItemId => $Score)
857  {
858  if (isset($ScoresB[$ItemId]))
859  {
860  $Scores[$ItemId] = $Score + $ScoresB[$ItemId];
861  }
862  }
863  }
864  return $Scores;
865  }
866 
876  private function SearchAcrossFields($SearchStrings, $Logic)
877  {
878  # start by assuming no search will be done
879  $Scores = array();
880 
881  # clear word counts
882  $this->ExcludedTermCount = 0;
883  $this->InclusiveTermCount = 0;
884  $this->RequiredTermCount = 0;
885  $this->RequiredTermCounts = array();
886 
887  # for each field
888  $NeedComparisonSearch = FALSE;
889  foreach ($SearchStrings as $FieldId => $SearchStringArray)
890  {
891  # for each search string for this field
892  foreach ($SearchStringArray as $SearchString)
893  {
894  # if field is keyword or field is text and does not look
895  # like comparison match
896  $NotComparisonSearch = !preg_match(
897  self::COMPARISON_OPERATOR_PATTERN, $SearchString);
898  if (($FieldId == self::KEYWORD_FIELD_ID)
899  || (isset($this->FieldInfo[$FieldId])
900  && ($this->FieldInfo[$FieldId]["FieldType"]
901  == self::FIELDTYPE_TEXT)
902  && $NotComparisonSearch))
903  {
904  if ($FieldId == self::KEYWORD_FIELD_ID)
905  {
906  $this->DMsg(0, "Performing keyword search for string \""
907  .$SearchString."\"");
908  }
909  else
910  {
911  $this->DMsg(0, "Searching text field "
912  .$FieldId." for string \"".$SearchString."\"");
913  }
914 
915  # normalize text and split into words
916  $Words[$FieldId] =
917  $this->ParseSearchStringForWords($SearchString, $Logic);
918 
919  # calculate scores for matching items
920  if (count($Words[$FieldId]))
921  {
922  $Scores = $this->SearchForWords(
923  $Words[$FieldId], $FieldId, $Scores);
924  $this->DMsg(3, "Have "
925  .count($Scores)." results after word search");
926  }
927 
928  # split into phrases
929  $Phrases[$FieldId] = $this->ParseSearchStringForPhrases(
930  $SearchString, $Logic);
931 
932  # handle any phrases
933  if (count($Phrases[$FieldId]))
934  {
935  $Scores = $this->SearchForPhrases(
936  $Phrases[$FieldId], $Scores, $FieldId, TRUE, FALSE);
937  $this->DMsg(3, "Have "
938  .count($Scores)." results after phrase search");
939  }
940  }
941  else
942  {
943  # set flag to indicate possible comparison search candidate found
944  $NeedComparisonSearch = TRUE;
945  }
946  }
947  }
948 
949  # perform comparison searches
950  if ($NeedComparisonSearch)
951  {
952  $Scores = $this->SearchForComparisonMatches(
953  $SearchStrings, $Logic, $Scores);
954  $this->DMsg(3, "Have ".count($Scores)." results after comparison search");
955  }
956 
957  # if no results found, no required terms, and exclusions specified
958  if ((count($Scores) == 0) &&
959  ($this->RequiredTermCount == 0) &&
960  ($this->ExcludedTermCount > 0) )
961  {
962  # determine which item types are implicated for keyword searches
963  $KeywordItemTypes = [];
964  foreach ($this->FieldInfo as $FieldId => $Info)
965  {
966  if ($Info["InKeywordSearch"])
967  {
968  $KeywordItemTypes = array_merge(
969  $KeywordItemTypes,
970  $Info["ItemTypes"]);
971  }
972  }
973  $KeywordItemTypes = array_unique($KeywordItemTypes);
974 
975  # determine what item types were in use for the fields we
976  # are searching
977  $FieldTypes = [];
978  foreach ($SearchStrings as $FieldId => $Info)
979  {
980  $MyTypes = ($FieldId == self::KEYWORD_FIELD_ID) ?
981  $KeywordItemTypes :
982  $this->FieldInfo[$FieldId]["ItemTypes"];
983 
984  $FieldTypes = array_merge(
985  $FieldTypes, $MyTypes);
986  }
987  $FieldTypes = array_unique($FieldTypes);
988 
989  # load all records for these field types
990  $Scores = $this->LoadScoresForAllRecords($FieldTypes);
991  }
992 
993  # if search results found
994  if (count($Scores))
995  {
996  # for each search text string
997  foreach ($SearchStrings as $FieldId => $SearchStringArray)
998  {
999  # for each search string for this field
1000  foreach ($SearchStringArray as $SearchString)
1001  {
1002  # if field is text
1003  if (($FieldId == self::KEYWORD_FIELD_ID)
1004  || (isset($this->FieldInfo[$FieldId])
1005  && ($this->FieldInfo[$FieldId]["FieldType"]
1006  == self::FIELDTYPE_TEXT)))
1007  {
1008  # if there are words in search text
1009  if (isset($Words[$FieldId]))
1010  {
1011  # handle any excluded words
1012  $Scores = $this->FilterOnExcludedWords(
1013  $Words[$FieldId], $Scores, $FieldId);
1014  }
1015 
1016  # handle any excluded phrases
1017  if (isset($Phrases[$FieldId]))
1018  {
1019  $Scores = $this->SearchForPhrases(
1020  $Phrases[$FieldId], $Scores,
1021  $FieldId, FALSE, TRUE);
1022  }
1023  }
1024  }
1025  $this->DMsg(3, "Have ".count($Scores)
1026  ." results after processing exclusions");
1027  }
1028 
1029  # strip off any results that don't contain required words
1030  $Scores = $this->FilterOnRequiredWords($Scores);
1031  }
1032 
1033  # return search result scores to caller
1034  return $Scores;
1035  }
1036 
1046  private function SearchForWords($Words, $FieldId, $Scores = NULL)
1047  {
1048  $DB = $this->DB;
1049 
1050  # start with empty search result scores list if none passed in
1051  if ($Scores == NULL)
1052  {
1053  $Scores = array();
1054  }
1055 
1056  # for each word
1057  foreach ($Words as $Word => $Flags)
1058  {
1059  unset($Counts);
1060  if ($FieldId == self::KEYWORD_FIELD_ID)
1061  {
1062  $this->DMsg(2, "Performing keyword search for word \"".$Word."\"");
1063  }
1064  else
1065  {
1066  $this->DMsg(2, "Searching for word \"".$Word."\" in field ".$FieldId);
1067  }
1068 
1069  # if word is not excluded
1070  if (!($Flags & self::WORD_EXCLUDED))
1071  {
1072  # look up record ID for word
1073  $this->DMsg(2, "Looking up word \"".$Word."\"");
1074  $WordId = $this->GetWordId($Word);
1075 
1076  # if word is in DB
1077  if ($WordId !== NULL)
1078  {
1079  # look up counts for word
1080  $DB->Query("SELECT ItemId,Count FROM SearchWordCounts "
1081  ."WHERE WordId = ".$WordId
1082  ." AND FieldId = ".$FieldId);
1083  $Counts = $DB->FetchColumn("Count", "ItemId");
1084 
1085  # if synonym support is enabled
1086  if ($this->SynonymsEnabled)
1087  {
1088  # look for any synonyms
1089  $DB->Query("SELECT WordIdA, WordIdB"
1090  ." FROM SearchWordSynonyms"
1091  ." WHERE WordIdA = ".$WordId
1092  ." OR WordIdB = ".$WordId);
1093 
1094  # if synonyms were found
1095  if ($DB->NumRowsSelected())
1096  {
1097  # retrieve synonym IDs
1098  $SynonymIds = array();
1099  while ($Record = $DB->FetchRow())
1100  {
1101  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
1102  ? $Record["WordIdB"]
1103  : $Record["WordIdA"];
1104  }
1105 
1106  # for each synonym
1107  foreach ($SynonymIds as $SynonymId)
1108  {
1109  # retrieve counts for synonym
1110  $DB->Query("SELECT ItemId,Count"
1111  ." FROM SearchWordCounts"
1112  ." WHERE WordId = ".$SynonymId
1113  ." AND FieldId = ".$FieldId);
1114  $SynonymCounts = $DB->FetchColumn("Count", "ItemId");
1115 
1116  # for each count
1117  foreach ($SynonymCounts as $ItemId => $Count)
1118  {
1119  # adjust count because it's a synonym
1120  $AdjustedCount = ceil($Count / 2);
1121 
1122  # add count to existing counts
1123  if (isset($Counts[$ItemId]))
1124  {
1125  $Counts[$ItemId] += $AdjustedCount;
1126  }
1127  else
1128  {
1129  $Counts[$ItemId] = $AdjustedCount;
1130  }
1131  }
1132  }
1133  }
1134  }
1135  }
1136 
1137  # if stemming is enabled
1138  if ($this->StemmingEnabled)
1139  {
1140  # retrieve word stem
1141  $Stem = PorterStemmer::Stem($Word);
1142 
1143  # if stem was different from word
1144  if ($Stem != $Word)
1145  {
1146  # retrieve stem ID
1147  $this->DMsg(2, "Looking up stem \"".$Stem."\"");
1148  $StemId = $this->GetStemId($Stem);
1149 
1150  # if ID found for stem
1151  if ($StemId !== NULL)
1152  {
1153  # retrieve counts for stem
1154  $DB->Query("SELECT ItemId,Count"
1155  ." FROM SearchWordCounts"
1156  ." WHERE WordId = ".$StemId
1157  ." AND FieldId = ".$FieldId);
1158  $StemCounts = $DB->FetchColumn("Count", "ItemId");
1159 
1160  # for each count
1161  foreach ($StemCounts as $ItemId => $Count)
1162  {
1163  # adjust count because it's a stem
1164  $AdjustedCount = ceil($Count / 2);
1165 
1166  # add count to existing counts
1167  if (isset($Counts[$ItemId]))
1168  {
1169  $Counts[$ItemId] += $AdjustedCount;
1170  }
1171  else
1172  {
1173  $Counts[$ItemId] = $AdjustedCount;
1174  }
1175  }
1176  }
1177  }
1178  }
1179 
1180  # if counts were found
1181  if (isset($Counts))
1182  {
1183  # for each count
1184  foreach ($Counts as $ItemId => $Count)
1185  {
1186  # if word flagged as required
1187  if ($Flags & self::WORD_REQUIRED)
1188  {
1189  # increment required word count for record
1190  if (isset($this->RequiredTermCounts[$ItemId]))
1191  {
1192  $this->RequiredTermCounts[$ItemId]++;
1193  }
1194  else
1195  {
1196  $this->RequiredTermCounts[$ItemId] = 1;
1197  }
1198  }
1199 
1200  # add to item record score
1201  if (isset($Scores[$ItemId]))
1202  {
1203  $Scores[$ItemId] += $Count;
1204  }
1205  else
1206  {
1207  $Scores[$ItemId] = $Count;
1208  }
1209  }
1210  }
1211  }
1212  }
1213 
1214  # return basic scores to caller
1215  return $Scores;
1216  }
1217 
1225  private function ParseSearchStringForPhrases($SearchString, $Logic)
1226  {
1227  # split into chunks delimited by double quote marks
1228  $Pieces = explode("\"", $SearchString); # "
1229 
1230  # for each pair of chunks
1231  $Index = 2;
1232  $Phrases = array();
1233  while ($Index < count($Pieces))
1234  {
1235  # grab phrase from chunk
1236  $Phrase = trim(addslashes($Pieces[$Index - 1]));
1237  $Flags = self::WORD_PRESENT;
1238 
1239  # grab first character of phrase
1240  $FirstChar = substr($Pieces[$Index - 2], -1);
1241 
1242  # set flags to reflect any option characters
1243  if ($FirstChar == "-")
1244  {
1245  $Flags |= self::WORD_EXCLUDED;
1246  if (!isset($Phrases[$Phrase]))
1247  {
1248  $this->ExcludedTermCount++;
1249  }
1250  }
1251  else
1252  {
1253  if ((($Logic == "AND")
1254  && ($FirstChar != "~"))
1255  || ($FirstChar == "+"))
1256  {
1257  $Flags |= self::WORD_REQUIRED;
1258  if (!isset($Phrases[$Phrase]))
1259  {
1260  $this->RequiredTermCount++;
1261  }
1262  }
1263  if (!isset($Phrases[$Phrase]))
1264  {
1265  $this->InclusiveTermCount++;
1266  $this->SearchTermList[] = $Phrase;
1267  }
1268  }
1269  $Phrases[$Phrase] = $Flags;
1270 
1271  # move to next pair of chunks
1272  $Index += 2;
1273  }
1274 
1275  # return phrases to caller
1276  return $Phrases;
1277  }
1278 
1284  protected function SearchFieldForPhrases($FieldId, $Phrase)
1285  {
1286  # error out
1287  exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\n");
1288  }
1289 
1301  private function SearchForPhrases($Phrases, $Scores, $FieldId,
1302  $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
1303  {
1304  # if phrases are found
1305  if (count($Phrases) > 0)
1306  {
1307  # if this is a keyword search
1308  if ($FieldId == self::KEYWORD_FIELD_ID)
1309  {
1310  # for each field
1311  foreach ($this->FieldInfo as $KFieldId => $Info)
1312  {
1313  # if field is marked to be included in keyword searches
1314  if ($Info["InKeywordSearch"])
1315  {
1316  # call ourself with that field
1317  $Scores = $this->SearchForPhrases(
1318  $Phrases, $Scores, $KFieldId,
1319  $ProcessNonExcluded, $ProcessExcluded);
1320  }
1321  }
1322  }
1323  else
1324  {
1325  # for each phrase
1326  foreach ($Phrases as $Phrase => $Flags)
1327  {
1328  $this->DMsg(2, "Searching for phrase '".$Phrase
1329  ."' in field ".$FieldId);
1330 
1331  # if phrase flagged as excluded and we are doing excluded
1332  # phrases or phrase flagged as non-excluded and we
1333  # are doing non-excluded phrases
1334  if (($ProcessExcluded && ($Flags & self::WORD_EXCLUDED))
1335  || ($ProcessNonExcluded && !($Flags & self::WORD_EXCLUDED)))
1336  {
1337  # initialize score list if necessary
1338  if ($Scores === NULL) { $Scores = array(); }
1339 
1340  # retrieve list of items that contain phrase
1341  $ItemIds = $this->SearchFieldForPhrases(
1342  $FieldId, $Phrase);
1343 
1344  # for each item that contains phrase
1345  foreach ($ItemIds as $ItemId)
1346  {
1347  # if we are doing excluded phrases and phrase
1348  # is flagged as excluded
1349  if ($ProcessExcluded && ($Flags & self::WORD_EXCLUDED))
1350  {
1351  # knock item off of list
1352  unset($Scores[$ItemId]);
1353  }
1354  elseif ($ProcessNonExcluded)
1355  {
1356  # calculate phrase value based on number of
1357  # words and field weight
1358  $PhraseScore = count(preg_split("/[\s]+/",
1359  $Phrase, -1, PREG_SPLIT_NO_EMPTY))
1360  * $this->FieldInfo[$FieldId]["Weight"];
1361  $this->DMsg(2, "Phrase score is ".$PhraseScore);
1362 
1363  # bump up item record score
1364  if (isset($Scores[$ItemId]))
1365  {
1366  $Scores[$ItemId] += $PhraseScore;
1367  }
1368  else
1369  {
1370  $Scores[$ItemId] = $PhraseScore;
1371  }
1372 
1373  # if phrase flagged as required
1374  if ($Flags & self::WORD_REQUIRED)
1375  {
1376  # increment required word count for record
1377  if (isset($this->RequiredTermCounts[$ItemId]))
1378  {
1379  $this->RequiredTermCounts[$ItemId]++;
1380  }
1381  else
1382  {
1383  $this->RequiredTermCounts[$ItemId] = 1;
1384  }
1385  }
1386  }
1387  }
1388  }
1389  }
1390  }
1391  }
1392 
1393  # return updated scores to caller
1394  return $Scores;
1395  }
1396 
1405  private function FilterOnExcludedWords($Words, $Scores, $FieldId)
1406  {
1407  $DB = $this->DB;
1408 
1409  # for each word
1410  foreach ($Words as $Word => $Flags)
1411  {
1412  # if word flagged as excluded
1413  if ($Flags & self::WORD_EXCLUDED)
1414  {
1415  # look up record ID for word
1416  $WordId = $this->GetWordId($Word);
1417 
1418  # if word is in DB
1419  if ($WordId !== NULL)
1420  {
1421  # look up counts for word
1422  $DB->Query("SELECT ItemId FROM SearchWordCounts "
1423  ."WHERE WordId=${WordId} AND FieldId=${FieldId}");
1424 
1425  # for each count
1426  while ($Record = $DB->FetchRow())
1427  {
1428  # if item record is in score list
1429  $ItemId = $Record["ItemId"];
1430  if (isset($Scores[$ItemId]))
1431  {
1432  # remove item record from score list
1433  $this->DMsg(3, "Filtering out item ".$ItemId
1434  ." because it contained word \"".$Word."\"");
1435  unset($Scores[$ItemId]);
1436  }
1437  }
1438  }
1439  }
1440  }
1441 
1442  # returned filtered score list to caller
1443  return $Scores;
1444  }
1445 
1451  private function FilterOnRequiredWords($Scores)
1452  {
1453  # if there were required words
1454  if ($this->RequiredTermCount > 0)
1455  {
1456  # for each item
1457  foreach ($Scores as $ItemId => $Score)
1458  {
1459  # if item does not meet required word count
1460  if (!isset($this->RequiredTermCounts[$ItemId])
1461  || ($this->RequiredTermCounts[$ItemId]
1462  < $this->RequiredTermCount))
1463  {
1464  # filter out item
1465  $this->DMsg(4, "Filtering out item ".$ItemId
1466  ." because it didn't have required word count of "
1467  .$this->RequiredTermCount
1468  .(isset($this->RequiredTermCounts[$ItemId])
1469  ? " (only had "
1470  .$this->RequiredTermCounts[$ItemId]
1471  : " (had none")
1472  .")");
1473  unset($Scores[$ItemId]);
1474  }
1475  }
1476  }
1477 
1478  # return filtered list to caller
1479  return $Scores;
1480  }
1481 
1492  private function SortScores($Scores, $SortByField, $SortDescending)
1493  {
1494  # perform any requested filtering
1495  $this->DMsg(0, "Have ".count($Scores)." results before filter callbacks");
1496  $Scores = $this->FilterOnSuppliedFunctions($Scores);
1497 
1498  # save total number of results available
1499  $this->NumberOfResultsAvailable = count($Scores);
1500 
1501  # sort search scores into item type bins
1502  $NewScores = array();
1503  foreach ($Scores as $Id => $Score)
1504  {
1505  $ItemType = $this->GetItemType($Id);
1506  if ($ItemType !== NULL)
1507  {
1508  $NewScores[$ItemType][$Id] = $Score;
1509  }
1510  }
1511  $Scores = $NewScores;
1512 
1513  # for each item type
1514  $NewSortByField = array();
1515  $NewSortDescending = array();
1516  foreach ($Scores as $ItemType => $TypeScores)
1517  {
1518  # normalize sort field parameter
1519  $NewSortByField[$ItemType] = !is_array($SortByField) ? $SortByField
1520  : (isset($SortByField[$ItemType])
1521  ? $SortByField[$ItemType] : FALSE);
1522 
1523  # normalize sort direction parameter
1524  $NewSortDescending[$ItemType] = !is_array($SortDescending) ? $SortDescending
1525  : (isset($SortDescending[$ItemType])
1526  ? $SortDescending[$ItemType] : TRUE);
1527  }
1528  $SortByField = $NewSortByField;
1529  $SortDescending = $NewSortDescending;
1530 
1531  # for each item type
1532  foreach ($Scores as $ItemType => $TypeScores)
1533  {
1534  # save number of results
1535  $this->NumberOfResultsPerItemType[$ItemType] = count($TypeScores);
1536 
1537  # if no sorting field specified
1538  if ($SortByField[$ItemType] === FALSE)
1539  {
1540  # sort result list by score
1541  if ($SortDescending[$ItemType])
1542  {
1543  arsort($Scores[$ItemType], SORT_NUMERIC);
1544  }
1545  else
1546  {
1547  asort($Scores[$ItemType], SORT_NUMERIC);
1548  }
1549  }
1550  else
1551  {
1552  # get list of item IDs in sorted order
1553  $SortedIds = $this->GetItemIdsSortedByField($ItemType,
1554  $SortByField[$ItemType], $SortDescending[$ItemType]);
1555 
1556  # if we have sorted item IDs
1557  if (count($SortedIds) && count($TypeScores))
1558  {
1559  # strip sorted ID list down to those that appear in search results
1560  $SortedIds = array_intersect($SortedIds,
1561  array_keys($TypeScores));
1562 
1563  # rebuild score list in sorted order
1564  $NewScores = array();
1565  foreach ($SortedIds as $Id)
1566  {
1567  $NewScores[$Id] = $TypeScores[$Id];
1568  }
1569  $Scores[$ItemType] = $NewScores;
1570  }
1571  else
1572  {
1573  # sort result list by score
1574  arsort($Scores[$ItemType], SORT_NUMERIC);
1575  }
1576  }
1577  }
1578 
1579  # returned cleaned search result scores list to caller
1580  return $Scores;
1581  }
1582 
1588  protected function FilterOnSuppliedFunctions($Scores)
1589  {
1590  # if filter functions have been set
1591  if (isset($this->FilterFuncs))
1592  {
1593  # for each result
1594  foreach ($Scores as $ItemId => $Score)
1595  {
1596  # for each filter function
1597  foreach ($this->FilterFuncs as $FuncName)
1598  {
1599  # if filter function return TRUE for item
1600  if (call_user_func($FuncName, $ItemId))
1601  {
1602  # discard result
1603  $this->DMsg(2, "Filter callback <i>".$FuncName
1604  ."</i> rejected item ".$ItemId);
1605  unset($Scores[$ItemId]);
1606 
1607  # bail out of filter func loop
1608  continue 2;
1609  }
1610  }
1611  }
1612  }
1613 
1614  # return filtered list to caller
1615  return $Scores;
1616  }
1617 
1627  private function SearchForComparisonMatches($SearchStrings, $Logic, $Scores)
1628  {
1629  # for each field
1630  $Index = 0;
1631  foreach ($SearchStrings as $SearchFieldId => $SearchStringArray)
1632  {
1633  # if field is not keyword
1634  if ($SearchFieldId != self::KEYWORD_FIELD_ID)
1635  {
1636  # for each search string for this field
1637  foreach ($SearchStringArray as $SearchString)
1638  {
1639  # look for comparison operators
1640  $FoundOperator = preg_match(
1641  self::COMPARISON_OPERATOR_PATTERN,
1642  $SearchString, $Matches);
1643 
1644  # if a comparison operator was found
1645  # or this is a field type that is always a comparison search
1646  if ($FoundOperator ||
1647  ($this->FieldInfo[$SearchFieldId]["FieldType"]
1648  != self::FIELDTYPE_TEXT))
1649  {
1650  # determine value to compare against
1651  $Value = trim(preg_replace(
1652  self::COMPARISON_OPERATOR_PATTERN, '\2',
1653  $SearchString));
1654 
1655  # if no comparison operator was found
1656  if (!$FoundOperator)
1657  {
1658  # assume comparison is equality
1659  $Operators[$Index] = "=";
1660  }
1661  else
1662  {
1663  # use operator from comparison match
1664  $Operators[$Index] = $Matches[1];
1665  }
1666 
1667  # if operator was found
1668  if (isset($Operators[$Index]))
1669  {
1670  # save value
1671  $Values[$Index] = $Value;
1672 
1673  # save field name
1674  $FieldIds[$Index] = $SearchFieldId;
1675  $this->DMsg(3, "Added comparison (field = <i>"
1676  .$FieldIds[$Index]."</i> op = <i>"
1677  .$Operators[$Index]."</i> val = <i>"
1678  .$Values[$Index]."</i>)");
1679 
1680  # move to next comparison array entry
1681  $Index++;
1682  }
1683  }
1684  }
1685  }
1686  }
1687 
1688  # if comparisons found
1689  if (isset($Operators))
1690  {
1691  # perform comparisons on fields and gather results
1692  $Results = $this->SearchFieldsForComparisonMatches(
1693  $FieldIds, $Operators, $Values, $Logic);
1694 
1695  # if search logic is set to AND
1696  if ($Logic == "AND")
1697  {
1698  # if results were found
1699  if (count($Results))
1700  {
1701  # if there were no prior results and no terms for keyword search
1702  if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0))
1703  {
1704  # add all results to scores
1705  foreach ($Results as $ItemId)
1706  {
1707  $Scores[$ItemId] = 1;
1708  }
1709  }
1710  else
1711  {
1712  # remove anything from scores that is not part of results
1713  foreach ($Scores as $ItemId => $Score)
1714  {
1715  if (in_array($ItemId, $Results) == FALSE)
1716  {
1717  unset($Scores[$ItemId]);
1718  }
1719  }
1720  }
1721  }
1722  else
1723  {
1724  # clear scores
1725  $Scores = array();
1726  }
1727  }
1728  else
1729  {
1730  # add result items to scores
1731  if ($Scores === NULL) { $Scores = array(); }
1732  foreach ($Results as $ItemId)
1733  {
1734  if (isset($Scores[$ItemId]))
1735  {
1736  $Scores[$ItemId] += 1;
1737  }
1738  else
1739  {
1740  $Scores[$ItemId] = 1;
1741  }
1742  }
1743  }
1744  }
1745 
1746  # return results to caller
1747  return $Scores;
1748  }
1749 
1757  private function SetDebugLevel($SearchStrings)
1758  {
1759  # if search info is an array
1760  if (is_array($SearchStrings))
1761  {
1762  # for each array element
1763  foreach ($SearchStrings as $FieldId => $SearchStringArray)
1764  {
1765  # if element is an array
1766  if (is_array($SearchStringArray))
1767  {
1768  # for each array element
1769  foreach ($SearchStringArray as $Index => $SearchString)
1770  {
1771  # pull out search string if present
1772  $SearchStrings[$FieldId][$Index] =
1773  $this->ExtractDebugLevel($SearchString);
1774  }
1775  }
1776  else
1777  {
1778  # pull out search string if present
1779  $SearchStrings[$FieldId] =
1780  $this->ExtractDebugLevel($SearchStringArray);
1781  }
1782  }
1783  }
1784  else
1785  {
1786  # pull out search string if present
1787  $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
1788  }
1789 
1790  # return new search info to caller
1791  return $SearchStrings;
1792  }
1793 
1800  private function ExtractDebugLevel($SearchString)
1801  {
1802  # if search string contains debug level indicator
1803  if (strstr($SearchString, "DBUGLVL="))
1804  {
1805  # remove indicator and set debug level
1806  $Level = preg_replace("/^\\s*DBUGLVL=([1-9]{1,2}).*/", "\\1", $SearchString);
1807  if ($Level > 0)
1808  {
1809  $this->DebugLevel = $Level;
1810  $this->DMsg(0, "Setting debug level to ".$Level);
1811  $SearchString = preg_replace("/\s*DBUGLVL=${Level}\s*/", "",
1812  $SearchString);
1813  }
1814  }
1815 
1816  # return (possibly) modified search string to caller
1817  return $SearchString;
1818  }
1819 
1825  private function LoadScoresForAllRecords($ItemTypes)
1826  {
1827  # if no item types were provided return an empty array
1828  if (count($ItemTypes)==0)
1829  {
1830  return [];
1831  }
1832 
1833  # get all the ItemIds belonging to the given types
1834  $this->DB->Query("SELECT ".$this->ItemIdFieldName." AS ItemId"
1835  ." FROM ".$this->ItemTableName
1836  ." WHERE ".$this->ItemTypeFieldName." IN(".implode(",", $ItemTypes).")");
1837 
1838  # return array with all scores to caller
1839  return array_fill_keys($this->DB->FetchColumn("ItemId"), 1);
1840  }
1841 
1842  # ---- private methods (search DB building)
1843 
1851  private function UpdateWordCount($Word, $ItemId, $FieldId, $Weight = 1)
1852  {
1853  # retrieve ID for word
1854  $WordIds[] = $this->GetWordId($Word, TRUE);
1855 
1856  # if stemming is enabled and word looks appropriate for stemming
1857  if ($this->StemmingEnabled && !is_numeric($Word))
1858  {
1859  # retrieve stem of word
1860  $Stem = PorterStemmer::Stem($Word, TRUE);
1861 
1862  # if stem is different
1863  if ($Stem != $Word)
1864  {
1865  # retrieve ID for stem of word
1866  $WordIds[] = $this->GetStemId($Stem, TRUE);
1867  }
1868  }
1869 
1870  # for word and stem of word
1871  foreach ($WordIds as $WordId)
1872  {
1873  # if word count already added to database
1874  if (isset($this->WordCountAdded[$WordId][$FieldId]))
1875  {
1876  # update word count
1877  $this->DB->Query("UPDATE SearchWordCounts SET Count=Count+".$Weight
1878  ." WHERE WordId=".$WordId
1879  ." AND ItemId=".$ItemId
1880  ." AND FieldId=".$FieldId);
1881  }
1882  else
1883  {
1884  # add word count to DB
1885  $this->DB->Query("INSERT INTO SearchWordCounts"
1886  ." (WordId, ItemId, FieldId, Count) VALUES"
1887  ." (".$WordId.", ".$ItemId.", ".$FieldId.", ".$Weight.")");
1888 
1889  # remember that we added count for this word
1890  $this->WordCountAdded[$WordId][$FieldId] = TRUE;
1891  }
1892 
1893  # decrease weight for stem
1894  $Weight = ceil($Weight / 2);
1895  }
1896  }
1897 
1903  protected function GetFieldContent($ItemId, $FieldId)
1904  {
1905  # error out
1906  throw new Exception("GetFieldContent() not implemented.");
1907  }
1908 
1918  private function RecordSearchInfoForText(
1919  $ItemId, $FieldId, $Weight, $Text, $IncludeInKeyword)
1920  {
1921  # normalize text
1922  $Words = $this->ParseSearchStringForWords($Text, "OR", TRUE);
1923 
1924  # if there was text left after parsing
1925  if (count($Words) > 0)
1926  {
1927  # for each word
1928  foreach ($Words as $Word => $Flags)
1929  {
1930  # update count for word
1931  $this->UpdateWordCount($Word, $ItemId, $FieldId);
1932 
1933  # if text should be included in keyword searches
1934  if ($IncludeInKeyword)
1935  {
1936  # update keyword field count for word
1937  $this->UpdateWordCount(
1938  $Word, $ItemId, self::KEYWORD_FIELD_ID, $Weight);
1939  }
1940  }
1941  }
1942  }
1943 
1944  # ---- common private methods (used in both searching and DB build)
1945 
1956  private function ParseSearchStringForWords(
1957  $SearchString, $Logic, $IgnorePhrases = FALSE)
1958  {
1959  # strip off any surrounding whitespace
1960  $Text = trim($SearchString);
1961 
1962  # define phrase and group search patterns separately, so that we can
1963  # later replace them easily if necessary
1964  $PhraseSearchPattern = "/\"[^\"]*\"/";
1965  $GroupSearchPattern = "/\\([^)]*\\)/";
1966 
1967  # set up search string normalization replacement strings (NOTE: these
1968  # are performed in sequence, so the order IS SIGNIFICANT)
1969  $ReplacementPatterns = array(
1970  # get rid of possessive plurals
1971  "/'s[^a-z0-9\\-+~]+/i" => " ",
1972  # get rid of single quotes / apostrophes
1973  "/'/" => "",
1974  # get rid of phrases
1975  $PhraseSearchPattern => " ",
1976  # get rid of groups
1977  $GroupSearchPattern => " ",
1978  # convert everything but alphanumerics and minus/plus/tilde to a space
1979  "/[^a-z0-9\\-+~]+/i" => "\\1 ",
1980  # truncate any runs of minus/plus/tilde to just the first char
1981  "/([~+-])[~+-]+/" => "\\1",
1982  # convert two alphanumerics segments separated by a minus into
1983  # both separate words and a single combined word
1984  "/([~+-]?)([a-z0-9]+)-([a-z0-9]+)/i" => "\\1\\2 \\1\\3 \\1\\2\\3",
1985  # convert minus/plus/tilde preceded by anything but whitespace to a space
1986  "/([^\\s])[~+-]+/i" => "\\1 ",
1987  # convert minus/plus/tilde followed by whitespace to a space
1988  "/[~+-]+\\s/i" => " ",
1989  # convert multiple spaces to one space
1990  "/[ ]+/" => " ",
1991  );
1992 
1993  # if we are supposed to ignore phrasing (series of words in quotes)
1994  # and grouping (series of words surrounded by parens)
1995  if ($IgnorePhrases)
1996  {
1997  # switch phrase removal to double quote removal
1998  # and switch group removal to paren removal
1999  $NewReplacementPatterns = [];
2000  foreach ($ReplacementPatterns as $Pattern => $Replacement)
2001  {
2002  if ($Pattern == $PhraseSearchPattern)
2003  {
2004  $Pattern = "/\"/";
2005  }
2006  elseif ($Pattern == $GroupSearchPattern)
2007  {
2008  $Pattern = "/[\(\)]+/";
2009  }
2010  $NewReplacementPatterns[$Pattern] = $Replacement;
2011  }
2012  $ReplacementPatterns = $NewReplacementPatterns;
2013  }
2014 
2015  # remove punctuation from text and normalize whitespace
2016  $Text = preg_replace(array_keys($ReplacementPatterns),
2017  $ReplacementPatterns, $Text);
2018  $this->DMsg(2, "Normalized search string is \"".$Text."\"");
2019 
2020  # convert text to lower case
2021  $Text = strtolower($Text);
2022 
2023  # strip off any extraneous whitespace
2024  $Text = trim($Text);
2025 
2026  # start with an empty array
2027  $Words = array();
2028 
2029  # if we have no words left after parsing
2030  if (strlen($Text) != 0)
2031  {
2032  # for each word
2033  foreach (explode(" ", $Text) as $Word)
2034  {
2035  # grab first character of word
2036  $FirstChar = substr($Word, 0, 1);
2037 
2038  # strip off option characters and set flags appropriately
2039  $Flags = self::WORD_PRESENT;
2040  if ($FirstChar == "-")
2041  {
2042  $Word = substr($Word, 1);
2043  $Flags |= self::WORD_EXCLUDED;
2044  if (!isset($Words[$Word]))
2045  {
2046  $this->ExcludedTermCount++;
2047  }
2048  }
2049  else
2050  {
2051  if ($FirstChar == "~")
2052  {
2053  $Word = substr($Word, 1);
2054  }
2055  elseif (($Logic == "AND")
2056  || ($FirstChar == "+"))
2057  {
2058  if ($FirstChar == "+")
2059  {
2060  $Word = substr($Word, 1);
2061  }
2062  $Flags |= self::WORD_REQUIRED;
2063  if (!isset($Words[$Word]))
2064  {
2065  $this->RequiredTermCount++;
2066  }
2067  }
2068  if (!isset($Words[$Word]))
2069  {
2070  $this->InclusiveTermCount++;
2071  $this->SearchTermList[] = $Word;
2072  }
2073  }
2074 
2075  # store flags to indicate word found
2076  $Words[$Word] = $Flags;
2077  $this->DMsg(3, "Word identified (".$Word.")");
2078  }
2079  }
2080 
2081  # return normalized words to caller
2082  return $Words;
2083  }
2084 
2092  private function GetWordId($Word, $AddIfNotFound = FALSE)
2093  {
2094  static $WordIdCache;
2095 
2096  # if word was in ID cache
2097  if (isset($WordIdCache[$Word]))
2098  {
2099  # use ID from cache
2100  $WordId = $WordIdCache[$Word];
2101  }
2102  else
2103  {
2104  # look up ID in database
2105  $WordId = $this->DB->Query("SELECT WordId"
2106  ." FROM SearchWords"
2107  ." WHERE WordText='".addslashes($Word)."'",
2108  "WordId");
2109 
2110  # if ID was not found and caller requested it be added
2111  if (($WordId === NULL) && $AddIfNotFound)
2112  {
2113  # add word to database
2114  $this->DB->Query("INSERT INTO SearchWords (WordText)"
2115  ." VALUES ('".addslashes(strtolower($Word))."')");
2116 
2117  # get ID for newly added word
2118  $WordId = $this->DB->LastInsertId();
2119  }
2120 
2121  # save ID to cache
2122  $WordIdCache[$Word] = $WordId;
2123  }
2124 
2125  # return ID to caller
2126  return $WordId;
2127  }
2128 
2136  private function GetStemId($Stem, $AddIfNotFound = FALSE)
2137  {
2138  static $StemIdCache;
2139 
2140  # if stem was in ID cache
2141  if (isset($StemIdCache[$Stem]))
2142  {
2143  # use ID from cache
2144  $StemId = $StemIdCache[$Stem];
2145  }
2146  else
2147  {
2148  # look up ID in database
2149  $StemId = $this->DB->Query("SELECT WordId"
2150  ." FROM SearchStems"
2151  ." WHERE WordText='".addslashes($Stem)."'",
2152  "WordId");
2153 
2154  # if ID was not found and caller requested it be added
2155  if (($StemId === NULL) && $AddIfNotFound)
2156  {
2157  # add stem to database
2158  $this->DB->Query("INSERT INTO SearchStems (WordText)"
2159  ." VALUES ('".addslashes(strtolower($Stem))."')");
2160 
2161  # get ID for newly added stem
2162  $StemId = $this->DB->LastInsertId();
2163  }
2164 
2165  # adjust from DB ID value to stem ID value
2166  $StemId += self::STEM_ID_OFFSET;
2167 
2168  # save ID to cache
2169  $StemIdCache[$Stem] = $StemId;
2170  }
2171 
2172  # return ID to caller
2173  return $StemId;
2174  }
2175 
2181  private function GetWord($WordId)
2182  {
2183  static $WordCache;
2184 
2185  # if word was in cache
2186  if (isset($WordCache[$WordId]))
2187  {
2188  # use word from cache
2189  $Word = $WordCache[$WordId];
2190  }
2191  else
2192  {
2193  # adjust search location and word ID if word is stem
2194  $TableName = "SearchWords";
2195  if ($WordId >= self::STEM_ID_OFFSET)
2196  {
2197  $TableName = "SearchStems";
2198  $WordId -= self::STEM_ID_OFFSET;
2199  }
2200 
2201  # look up word in database
2202  $Word = $this->DB->Query("SELECT WordText"
2203  ." FROM ".$TableName
2204  ." WHERE WordId='".$WordId."'",
2205  "WordText");
2206 
2207  # save word to cache
2208  $WordCache[$WordId] = $Word;
2209  }
2210 
2211  # return word to caller
2212  return $Word;
2213  }
2214 
2220  private function GetItemType($ItemId)
2221  {
2222  static $ItemTypeCache;
2223  if (!isset($ItemTypeCache))
2224  {
2225  $this->DB->Query("SELECT * FROM SearchItemTypes");
2226  $ItemTypeCache = $this->DB->FetchColumn("ItemType", "ItemId");
2227  }
2228  return isset($ItemTypeCache[$ItemId])
2229  ? (int)$ItemTypeCache[$ItemId] : NULL;
2230  }
2231 
2237  protected function DMsg($Level, $Msg)
2238  {
2239  if ($this->DebugLevel > $Level)
2240  {
2241  print "SE: ".$Msg."<br>\n";
2242  }
2243  }
2244 
2245  # ---- BACKWARD COMPATIBILITY --------------------------------------------
2246 
2247  # possible types of logical operators
2248  const SEARCHLOGIC_AND = 1;
2249  const SEARCHLOGIC_OR = 2;
2250 
2251  # pattern to detect search strings that are explicit comparisons
2252  const COMPARISON_OPERATOR_PATTERN = '/^([><=^$@]+|!=)([^><=^$@])/';
2253 }
SearchTermCount()
Get total number of search terms indexed by search engine.
SetAllSynonyms($SynonymList)
Set all synonyms.
DropItem($ItemId)
Drop all data pertaining to item from search database.
AddField($FieldId, $FieldType, $ItemTypes, $Weight, $UsedInKeywordSearch)
Add field to include in searching.
RemoveSynonyms($Word, $Synonyms=NULL)
Remove synonym(s).
const KEYWORD_FIELD_ID
LoadSynonymsFromFile($FileName)
Load synonyms from a file.
Set of parameters used to perform a search.
SQL database abstraction object with smart query caching.
Definition: Database.php:22
SearchFieldForPhrases($FieldId, $Phrase)
Search for phrase in specified field.
GetAllSynonyms()
Get all synonyms.
const SEARCHLOGIC_OR
const FIELDTYPE_NUMERIC
FilterOnSuppliedFunctions($Scores)
Filter search scores through any supplied functions.
Logic($NewValue=NULL)
Get/set logic for set.
UpdateForItem($ItemId, $ItemType)
Update search database for the specified item.
AddSynonyms($Word, $Synonyms)
Add synonyms.
const FIELDTYPE_DATERANGE
const FIELDTYPE_DATE
const SEARCHLOGIC_AND
SearchAll($SearchParams)
Perform search with specified parameters, returning results separated by item type.
ItemTypes($ItemTypes=NULL)
Get/set allowed item types.
SearchTerms()
Get normalized list of search terms.
const WORD_EXCLUDED
NumberOfResults($ItemType=NULL)
Get number of results found by most recent search.
FieldWeight($FieldId)
Get search weight for specified field.
FieldType($FieldId)
Get type of specified field (text/numeric/date/daterange).
ItemCount()
Get total number of items indexed by search engine.
static BuildMultiTypeResults($Results)
Expand a one-dimensional array(ItemId => ItemScore) into a two-dimensional array(ItemType => array(It...
__construct($ItemTableName, $ItemIdFieldName, $ItemTypeFieldName)
Object constructor.
const STEM_ID_OFFSET
GetSearchStrings($IncludeSubgroups=FALSE)
Get search strings in set.
RemoveAllSynonyms()
Remove all synonyms.
DMsg($Level, $Msg)
Print debug message if level set high enough.
const WORD_PRESENT
DropField($FieldId)
Drop all data pertaining to field from search database.
GetFieldContent($ItemId, $FieldId)
Retrieve content for specified field for specified item.
Core metadata archive search engine class.
GetKeywordSearchStrings()
Get keyword search strings in set.
Search($SearchParams)
Perform search with specified parameters, returning results in a flat array indexed by item ID...
const COMPARISON_OPERATOR_PATTERN
const FIELDTYPE_TEXT
const WORD_REQUIRED
DebugLevel($NewValue)
Set debug output level.
UpdateForItems($StartingItemId, $NumberOfItems)
Update search database for the specified range of items.
FieldedSearchWeightScale($SearchParams)
Get total of weights for all fields involved in search, useful for assessing scale of scores in searc...
static FlattenMultiTypeResults($Results)
Flatten a two-dimensional array keyed by ItemType with results for each type as the outer values into...
FieldInKeywordSearch($FieldId)
Get whether specified field is included in keyword searches.
AddResultFilterFunction($FunctionName)
Add function that will be called to filter search results.
SearchTime()
Get time that last search took, in seconds.
GetSubgroups()
Get parameter subgroups.
GetSynonyms($Word)
Get synonyms for word.