CWIS Developer Documentation
SearchEngine.php
Go to the documentation of this file.
1 <?PHP
2 #
3 # FILE: SearchEngine.php
4 #
5 # Open Source Metadata Archive Search Engine (OSMASE)
6 # Copyright 2002-2016 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu
8 #
9 
14 {
15 
16  # ---- PUBLIC INTERFACE --------------------------------------------------
17 
18  # possible types of logical operators
19  const LOGIC_AND = 1;
20  const LOGIC_OR = 2;
21 
22  # flags used for indicating field types
23  const FIELDTYPE_TEXT = 1;
24  const FIELDTYPE_NUMERIC = 2;
25  const FIELDTYPE_DATE = 3;
27 
28  # flags used for indicating word states
29  const WORD_PRESENT = 1;
30  const WORD_EXCLUDED = 2;
31  const WORD_REQUIRED = 4;
32 
41  public function __construct(
43  {
44  # create database object for our use
45  $this->DB = new Database();
46 
47  # save item access parameters
48  $this->ItemTableName = $ItemTableName;
49  $this->ItemIdFieldName = $ItemIdFieldName;
50  $this->ItemTypeFieldName = $ItemTypeFieldName;
51 
52  # set default debug state
53  $this->DebugLevel = 0;
54  }
55 
66  public function AddField($FieldId, $FieldType, $ItemTypes,
67  $Weight, $UsedInKeywordSearch)
68  {
69  # save values
70  $this->FieldInfo[$FieldId]["FieldType"] = $FieldType;
71  $this->FieldInfo[$FieldId]["Weight"] = $Weight;
72  $this->FieldInfo[$FieldId]["InKeywordSearch"] =
73  $UsedInKeywordSearch ? TRUE : FALSE;
74  $this->FieldInfo[$FieldId]["ItemTypes"] = is_array($ItemTypes)
75  ? $ItemTypes : array($ItemTypes);
76  }
77 
83  public function FieldType($FieldId)
84  {
85  return $this->FieldInfo[$FieldId]["FieldType"];
86  }
87 
93  public function FieldWeight($FieldId)
94  {
95  return $this->FieldInfo[$FieldId]["Weight"];
96  }
97 
103  public function FieldInKeywordSearch($FieldId)
104  {
105  return $this->FieldInfo[$FieldId]["InKeywordSearch"];
106  }
107 
112  public function DebugLevel($NewValue)
113  {
114  $this->DebugLevel = $NewValue;
115  }
116 
117 
118  # ---- search functions
119 
138  public function Search(
139  $SearchParams, $StartingResult = 0, $NumberOfResults = PHP_INT_MAX,
140  $SortByField = NULL, $SortDescending = TRUE)
141  {
142  # if keyword search string was passed in
143  if (is_string($SearchParams))
144  {
145  # convert string to search parameter set
146  $SearchString = $SearchParams;
147  $SearchParams = new SearchParameterSet();
148  $SearchParams->AddParameter($SearchString);
149  }
150 
151  # interpret and filter out magic debugging keyword (if any)
152  $KeywordStrings = $SearchParams->GetKeywordSearchStrings();
153  foreach ($KeywordStrings as $String)
154  {
155  $FilteredString = $this->ExtractDebugLevel($String);
156  if ($FilteredString != $String)
157  {
158  $SearchParams->RemoveParameter($String);
159  $SearchParams->AddParameter($FilteredString);
160  }
161  }
162 
163  # save start time to use in calculating search time
164  $StartTime = microtime(TRUE);
165 
166  # clear parsed search term list
167  $this->SearchTermList = array();
168 
169  # perform search
170  $Scores = $this->RawSearch($SearchParams);
171 
172  # count, sort, and trim search result scores list
173  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
174  $SortByField, $SortDescending);
175 
176  # record search time
177  $this->LastSearchTime = microtime(TRUE) - $StartTime;
178 
179  # return search results to caller
180  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
181  return $Scores;
182  }
183 
203  public function FieldedSearch(
204  $SearchStrings, $StartingResult = 0, $NumberOfResults = 10,
205  $SortByField = NULL, $SortDescending = TRUE)
206  {
207  # pass off the request to grouped search (for now) if appropriate
208  if ($SearchStrings instanceof SearchParameterSet)
209  {
210  return $this->GroupedSearch($SearchStrings, $StartingResult,
211  $NumberOfResults, $SortByField, $SortDescending);
212  }
213 
214  # interpret and filter out magic debugging keyword (if any)
215  $SearchStrings = $this->SetDebugLevel($SearchStrings);
216  $this->DMsg(0, "In FieldedSearch() with "
217  .count($SearchStrings)." search strings");
218 
219  # save start time to use in calculating search time
220  $StartTime = microtime(TRUE);
221 
222  # perform search
223  $Scores = $this->SearchAcrossFields($SearchStrings);
224  $Scores = ($Scores === NULL) ? array() : $Scores;
225 
226  # count, sort, and trim search result scores list
227  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
228  $SortByField, $SortDescending);
229 
230  # record search time
231  $this->LastSearchTime = microtime(TRUE) - $StartTime;
232 
233  # return list of items to caller
234  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
235  return $Scores;
236  }
237 
242  public function AddResultFilterFunction($FunctionName)
243  {
244  # save filter function name
245  $this->FilterFuncs[] = $FunctionName;
246  }
247 
254  public function NumberOfResults($ItemType = NULL)
255  {
256  return ($ItemType === NULL) ? $this->NumberOfResultsAvailable
257  : (isset($this->NumberOfResultsPerItemType[$ItemType])
258  ? $this->NumberOfResultsPerItemType[$ItemType] : 0);
259  }
260 
265  public function SearchTerms()
266  {
267  return $this->SearchTermList;
268  }
269 
274  public function SearchTime()
275  {
276  return $this->LastSearchTime;
277  }
278 
285  public function FieldedSearchWeightScale($SearchParams)
286  {
287  $Weight = 0;
288  $FieldIds = $SearchParams->GetFields();
289  foreach ($FieldIds as $FieldId)
290  {
291  if (array_key_exists($FieldId, $this->FieldInfo))
292  {
293  $Weight += $this->FieldInfo[$FieldId]["Weight"];
294  }
295  }
296  if (count($SearchParams->GetKeywordSearchStrings()))
297  {
298  foreach ($this->FieldInfo as $FieldId => $Info)
299  {
300  if ($Info["InKeywordSearch"])
301  {
302  $Weight += $Info["Weight"];
303  }
304  }
305  }
306  return $Weight;
307  }
308 
309 
310  # ---- search database update functions
311 
317  public function UpdateForItem($ItemId, $ItemType)
318  {
319  # clear word count added flags for this item
320  unset($this->WordCountAdded);
321 
322  # delete any existing info for this item
323  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
324  $this->DB->Query("DELETE FROM SearchItemTypes WHERE ItemId = ".$ItemId);
325 
326  # save item type
327  $this->DB->Query("INSERT INTO SearchItemTypes (ItemId, ItemType)"
328  ." VALUES (".intval($ItemId).", ".intval($ItemType).")");
329 
330  # for each metadata field
331  foreach ($this->FieldInfo as $FieldId => $Info)
332  {
333  # if valid search weight for field and field applies to this item
334  if (($Info["Weight"] > 0)
335  && in_array($ItemType, $Info["ItemTypes"]))
336  {
337  # retrieve text for field
338  $Text = $this->GetFieldContent($ItemId, $FieldId);
339 
340  # if text is array
341  if (is_array($Text))
342  {
343  # for each text string in array
344  foreach ($Text as $String)
345  {
346  # record search info for text
347  $this->RecordSearchInfoForText($ItemId, $FieldId,
348  $Info["Weight"], $String,
349  $Info["InKeywordSearch"]);
350  }
351  }
352  else
353  {
354  # record search info for text
355  $this->RecordSearchInfoForText($ItemId, $FieldId,
356  $Info["Weight"], $Text,
357  $Info["InKeywordSearch"]);
358  }
359  }
360  }
361  }
362 
369  public function UpdateForItems($StartingItemId, $NumberOfItems)
370  {
371  # retrieve IDs for specified number of items starting at specified ID
372  $this->DB->Query("SELECT ".$this->ItemIdFieldName.", ".$this->ItemTypeFieldName
373  ." FROM ".$this->ItemTableName
374  ." WHERE ".$this->ItemIdFieldName." >= ".$StartingItemId
375  ." ORDER BY ".$this->ItemIdFieldName." LIMIT ".$NumberOfItems);
376  $ItemIds = $this->DB->FetchColumn(
377  $this->ItemTypeFieldName, $this->ItemIdFieldName);
378 
379  # for each retrieved item ID
380  foreach ($ItemIds as $ItemId => $ItemType)
381  {
382  # update search info for item
383  $this->UpdateForItem($ItemId, $ItemType);
384  }
385 
386  # return ID of last item updated to caller
387  return $ItemId;
388  }
389 
394  public function DropItem($ItemId)
395  {
396  # drop all entries pertaining to item from word count table
397  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
398  $this->DB->Query("DELETE FROM SearchItemTypes WHERE ItemId = ".$ItemId);
399  }
400 
405  public function DropField($FieldId)
406  {
407  # drop all entries pertaining to field from word counts table
408  $this->DB->Query("DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId."\'");
409  }
410 
415  public function SearchTermCount()
416  {
417  return $this->DB->Query("SELECT COUNT(*) AS TermCount"
418  ." FROM SearchWords", "TermCount");
419  }
420 
425  public function ItemCount()
426  {
427  return $this->DB->Query("SELECT COUNT(DISTINCT ItemId) AS ItemCount"
428  ." FROM SearchWordCounts", "ItemCount");
429  }
430 
438  public function AddSynonyms($Word, $Synonyms)
439  {
440  # asssume no synonyms will be added
441  $AddCount = 0;
442 
443  # get ID for word
444  $WordId = $this->GetWordId($Word, TRUE);
445 
446  # for each synonym passed in
447  foreach ($Synonyms as $Synonym)
448  {
449  # get ID for synonym
450  $SynonymId = $this->GetWordId($Synonym, TRUE);
451 
452  # if synonym is not already in database
453  $this->DB->Query("SELECT * FROM SearchWordSynonyms"
454  ." WHERE (WordIdA = ".$WordId
455  ." AND WordIdB = ".$SynonymId.")"
456  ." OR (WordIdB = ".$WordId
457  ." AND WordIdA = ".$SynonymId.")");
458  if ($this->DB->NumRowsSelected() == 0)
459  {
460  # add synonym entry to database
461  $this->DB->Query("INSERT INTO SearchWordSynonyms"
462  ." (WordIdA, WordIdB)"
463  ." VALUES (".$WordId.", ".$SynonymId.")");
464  $AddCount++;
465  }
466  }
467 
468  # report to caller number of new synonyms added
469  return $AddCount;
470  }
471 
478  public function RemoveSynonyms($Word, $Synonyms = NULL)
479  {
480  # find ID for word
481  $WordId = $this->GetWordId($Word);
482 
483  # if ID found
484  if ($WordId !== NULL)
485  {
486  # if no specific synonyms provided
487  if ($Synonyms === NULL)
488  {
489  # remove all synonyms for word
490  $this->DB->Query("DELETE FROM SearchWordSynonyms"
491  ." WHERE WordIdA = '".$WordId."'"
492  ." OR WordIdB = '".$WordId."'");
493  }
494  else
495  {
496  # for each specified synonym
497  foreach ($Synonyms as $Synonym)
498  {
499  # look up ID for synonym
500  $SynonymId = $this->GetWordId($Synonym);
501 
502  # if synonym ID was found
503  if ($SynonymId !== NULL)
504  {
505  # delete synonym entry
506  $this->DB->Query("DELETE FROM SearchWordSynonyms"
507  ." WHERE (WordIdA = '".$WordId."'"
508  ." AND WordIdB = '".$SynonymId."')"
509  ." OR (WordIdB = '".$WordId."'"
510  ." AND WordIdA = '".$SynonymId."')");
511  }
512  }
513  }
514  }
515  }
516 
520  public function RemoveAllSynonyms()
521  {
522  $this->DB->Query("DELETE FROM SearchWordSynonyms");
523  }
524 
530  public function GetSynonyms($Word)
531  {
532  # assume no synonyms will be found
533  $Synonyms = array();
534 
535  # look up ID for word
536  $WordId = $this->GetWordId($Word);
537 
538  # if word ID was found
539  if ($WordId !== NULL)
540  {
541  # look up IDs of all synonyms for this word
542  $this->DB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms"
543  ." WHERE WordIdA = ".$WordId
544  ." OR WordIdB = ".$WordId);
545  $SynonymIds = array();
546  while ($Record = $this->DB->FetchRow)
547  {
548  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
549  ? $Record["WordIdB"] : $Record["WordIdA"];
550  }
551 
552  # for each synonym ID
553  foreach ($SynonymIds as $SynonymId)
554  {
555  # look up synonym word and add to synonym list
556  $Synonyms[] = $this->GetWord($SynonymId);
557  }
558  }
559 
560  # return synonyms to caller
561  return $Synonyms;
562  }
563 
568  public function GetAllSynonyms()
569  {
570  # assume no synonyms will be found
571  $SynonymList = array();
572 
573  # for each synonym ID pair
574  $OurDB = new Database();
575  $OurDB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms");
576  while ($Record = $OurDB->FetchRow())
577  {
578  # look up words
579  $Word = $this->GetWord($Record["WordIdA"]);
580  $Synonym = $this->GetWord($Record["WordIdB"]);
581 
582  # if we do not already have an entry for the word
583  # or synonym is not listed for this word
584  if (!isset($SynonymList[$Word])
585  || !in_array($Synonym, $SynonymList[$Word]))
586  {
587  # add entry for synonym
588  $SynonymList[$Word][] = $Synonym;
589  }
590 
591  # if we do not already have an entry for the synonym
592  # or word is not listed for this synonym
593  if (!isset($SynonymList[$Synonym])
594  || !in_array($Word, $SynonymList[$Synonym]))
595  {
596  # add entry for word
597  $SynonymList[$Synonym][] = $Word;
598  }
599  }
600 
601  # for each word
602  # (this loop removes reciprocal duplicates)
603  foreach ($SynonymList as $Word => $Synonyms)
604  {
605  # for each synonym for that word
606  foreach ($Synonyms as $Synonym)
607  {
608  # if synonym has synonyms and word is one of them
609  if (isset($SynonymList[$Synonym])
610  && isset($SynonymList[$Word])
611  && in_array($Word, $SynonymList[$Synonym])
612  && in_array($Synonym, $SynonymList[$Word]))
613  {
614  # if word has less synonyms than synonym
615  if (count($SynonymList[$Word])
616  < count($SynonymList[$Synonym]))
617  {
618  # remove synonym from synonym list for word
619  $SynonymList[$Word] = array_diff(
620  $SynonymList[$Word], array($Synonym));
621 
622  # if no synonyms left for word
623  if (!count($SynonymList[$Word]))
624  {
625  # remove empty synonym list for word
626  unset($SynonymList[$Word]);
627  }
628  }
629  else
630  {
631  # remove word from synonym list for synonym
632  $SynonymList[$Synonym] = array_diff(
633  $SynonymList[$Synonym], array($Word));
634 
635  # if no synonyms left for word
636  if (!count($SynonymList[$Synonym]))
637  {
638  # remove empty synonym list for word
639  unset($SynonymList[$Synonym]);
640  }
641  }
642  }
643  }
644  }
645 
646  # sort array alphabetically (just for convenience)
647  foreach ($SynonymList as $Word => $Synonyms)
648  {
649  asort($SynonymList[$Word]);
650  }
651  ksort($SynonymList);
652 
653  # return 2D array of synonyms to caller
654  return $SynonymList;
655  }
656 
662  public function SetAllSynonyms($SynonymList)
663  {
664  # remove all existing synonyms
665  $this->RemoveAllSynonyms();
666 
667  # for each synonym entry passed in
668  foreach ($SynonymList as $Word => $Synonyms)
669  {
670  # add synonyms for word
671  $this->AddSynonyms($Word, $Synonyms);
672  }
673  }
674 
683  public function LoadSynonymsFromFile($FileName)
684  {
685  # asssume no synonyms will be added
686  $AddCount = 0;
687 
688  # read in contents of file
689  $Lines = file($FileName, FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
690 
691  # if file contained lines
692  if (count($Lines))
693  {
694  # for each line of file
695  foreach ($Lines as $Line)
696  {
697  # if line is not a comment
698  if (!preg_match("/[\s]*#/", $Line))
699  {
700  # split line into words
701  $Words = preg_split("/[\s,]+/", $Line);
702 
703  # if synonyms found
704  if (count($Words) > 1)
705  {
706  # separate out word and synonyms
707  $Word = array_shift($Words);
708 
709  # add synonyms
710  $AddCount += $this->AddSynonyms($Word, $Words);
711  }
712  }
713  }
714  }
715 
716  # return count of synonyms added to caller
717  return $AddCount;
718  }
719 
720 
721  # ---- PRIVATE INTERFACE -------------------------------------------------
722 
723  protected $DB;
724  protected $DebugLevel;
725  protected $FilterFuncs;
726  protected $ItemIdFieldName;
727  protected $ItemTableName;
729  protected $LastSearchTime;
731  protected $StemmingEnabled = TRUE;
732  protected $SynonymsEnabled = TRUE;
733 
734  private $ExcludedTermCount;
735  private $FieldIds;
736  private $FieldInfo;
737  private $InclusiveTermCount;
738  private $RequiredTermCount;
739  private $RequiredTermCounts;
740  private $SearchTermList;
741  private $WordCountAdded;
742 
743  const KEYWORD_FIELD_ID = -100;
744  const STEM_ID_OFFSET = 1000000;
745 
746 
747  # ---- private methods (searching)
748 
756  private function RawSearch($SearchParams)
757  {
758  # retrieve search strings
759  $SearchStrings = $SearchParams->GetSearchStrings();
760  $KeywordSearchStrings = $SearchParams->GetKeywordSearchStrings();
761 
762  # add keyword searches (if any) to fielded searches
763  if (count($KeywordSearchStrings))
764  {
765  $SearchStrings[self::KEYWORD_FIELD_ID] = $KeywordSearchStrings;
766  }
767 
768  # normalize search strings
769  $NormalizedSearchStrings = array();
770  foreach ($SearchStrings as $FieldId => $SearchStringArray)
771  {
772  if (!is_array($SearchStringArray))
773  {
774  $SearchStringArray = array($SearchStringArray);
775  }
776  foreach ($SearchStringArray as $String)
777  {
778  $String = trim($String);
779  if (strlen($String))
780  {
781  $NormalizedSearchStrings[$FieldId][] = $String;
782  }
783  }
784  }
785  $SearchStrings = $NormalizedSearchStrings;
786 
787  # if we have strings to search for
788  if (count($SearchStrings))
789  {
790  # perform search
791  $Scores = $this->SearchAcrossFields(
792  $SearchStrings, $SearchParams->Logic());
793  }
794 
795  # for each subgroup
796  foreach ($SearchParams->GetSubgroups() as $Subgroup)
797  {
798  # perform subgroup search
799  $NewScores = $this->RawSearch($Subgroup);
800 
801  # added subgroup search scores to previous scores as appropriate
802  if (isset($Scores))
803  {
804  $Scores = $this->CombineScores(
805  $Scores, $NewScores, $SearchParams->Logic());
806  }
807  else
808  {
809  $Scores = $NewScores;
810  }
811  }
812  if (isset($NewScores))
813  {
814  $this->DMsg(2, "Have ".count($Scores)
815  ." results after subgroup processing");
816  }
817 
818  # pare down results to just allowed item types (if specified)
819  if ($SearchParams->ItemTypes())
820  {
821  $AllowedItemTypes = $SearchParams->ItemTypes();
822  foreach ($Scores as $ItemId => $Score)
823  {
824  if (!in_array($this->GetItemType($ItemId), $AllowedItemTypes))
825  {
826  unset($Scores[$ItemId]);
827  }
828  }
829  $this->DMsg(3, "Have ".count($Scores)
830  ." results after paring to allowed item types");
831  }
832 
833  # return search results to caller
834  return isset($Scores) ? $Scores : array();
835  }
836 
844  private function CombineScores($ScoresA, $ScoresB, $Logic)
845  {
846  if ($Logic == "OR")
847  {
848  $Scores = $ScoresA;
849  foreach ($ScoresB as $ItemId => $Score)
850  {
851  if (isset($Scores[$ItemId]))
852  {
853  $Scores[$ItemId] += $Score;
854  }
855  else
856  {
857  $Scores[$ItemId] = $Score;
858  }
859  }
860  }
861  else
862  {
863  $Scores = array();
864  foreach ($ScoresA as $ItemId => $Score)
865  {
866  if (isset($ScoresB[$ItemId]))
867  {
868  $Scores[$ItemId] = $Score + $ScoresB[$ItemId];
869  }
870  }
871  }
872  return $Scores;
873  }
874 
884  private function SearchAcrossFields($SearchStrings, $Logic)
885  {
886  # start by assuming no search will be done
887  $Scores = array();
888 
889  # clear word counts
890  $this->ExcludedTermCount = 0;
891  $this->InclusiveTermCount = 0;
892  $this->RequiredTermCount = 0;
893  $this->RequiredTermCounts = array();
894 
895  # for each field
896  $NeedComparisonSearch = FALSE;
897  foreach ($SearchStrings as $FieldId => $SearchStringArray)
898  {
899  # for each search string for this field
900  foreach ($SearchStringArray as $SearchString)
901  {
902  # if field is keyword or field is text and does not look
903  # like comparison match
904  $NotComparisonSearch = !preg_match(
905  self::COMPARISON_OPERATOR_PATTERN, $SearchString);
906  if (($FieldId == self::KEYWORD_FIELD_ID)
907  || (isset($this->FieldInfo[$FieldId])
908  && ($this->FieldInfo[$FieldId]["FieldType"]
909  == self::FIELDTYPE_TEXT)
910  && $NotComparisonSearch))
911  {
912  $this->DMsg(0, "Searching text field \""
913  .$FieldId."\" for string \"$SearchString\"");
914 
915  # normalize text and split into words
916  $Words[$FieldId] =
917  $this->ParseSearchStringForWords($SearchString, $Logic);
918 
919  # calculate scores for matching items
920  if (count($Words[$FieldId]))
921  {
922  $Scores = $this->SearchForWords(
923  $Words[$FieldId], $FieldId, $Scores);
924  $this->DMsg(3, "Have "
925  .count($Scores)." results after word search");
926  }
927 
928  # split into phrases
929  $Phrases[$FieldId] = $this->ParseSearchStringForPhrases(
930  $SearchString, $Logic);
931 
932  # handle any phrases
933  if (count($Phrases[$FieldId]))
934  {
935  $Scores = $this->SearchForPhrases(
936  $Phrases[$FieldId], $Scores, $FieldId, TRUE, FALSE);
937  $this->DMsg(3, "Have "
938  .count($Scores)." results after phrase search");
939  }
940  }
941  else
942  {
943  # set flag to indicate possible comparison search candidate found
944  $NeedComparisonSearch = TRUE;
945  }
946  }
947  }
948 
949  # perform comparison searches
950  if ($NeedComparisonSearch)
951  {
952  $Scores = $this->SearchForComparisonMatches(
953  $SearchStrings, $Logic, $Scores);
954  $this->DMsg(3, "Have ".count($Scores)." results after comparison search");
955  }
956 
957  # if no results found, no required terms, and exclusions specified
958  if ((count($Scores) == 0) &&
959  ($this->RequiredTermCount == 0) &&
960  ($this->ExcludedTermCount > 0) )
961  {
962  # determine which item types are implicated for keyword searches
963  $KeywordItemTypes = [];
964  foreach ($this->FieldInfo as $FieldId => $Info)
965  {
966  if ($Info["InKeywordSearch"])
967  {
968  $KeywordItemTypes = array_merge(
969  $KeywordItemTypes,
970  $Info["ItemTypes"]);
971  }
972  }
973  $KeywordItemTypes = array_unique($KeywordItemTypes);
974 
975  # determine what item types were in use for the fields we
976  # are searching
977  $FieldTypes = [];
978  foreach ($SearchStrings as $FieldId => $Info)
979  {
980  $MyTypes = ($FieldId == self::KEYWORD_FIELD_ID) ?
981  $KeywordItemTypes :
982  $this->FieldInfo[$FieldId]["ItemTypes"];
983 
984  $FieldTypes = array_merge(
985  $FieldTypes, $MyTypes);
986  }
987  $FieldTypes = array_unique($FieldTypes);
988 
989  # load all records for these field types
990  $Scores = $this->LoadScoresForAllRecords($FieldTypes);
991  }
992 
993  # if search results found
994  if (count($Scores))
995  {
996  # for each search text string
997  foreach ($SearchStrings as $FieldId => $SearchStringArray)
998  {
999  # for each search string for this field
1000  foreach ($SearchStringArray as $SearchString)
1001  {
1002  # if field is text
1003  if (($FieldId == self::KEYWORD_FIELD_ID)
1004  || (isset($this->FieldInfo[$FieldId])
1005  && ($this->FieldInfo[$FieldId]["FieldType"]
1006  == self::FIELDTYPE_TEXT)))
1007  {
1008  # if there are words in search text
1009  if (isset($Words[$FieldId]))
1010  {
1011  # handle any excluded words
1012  $Scores = $this->FilterOnExcludedWords(
1013  $Words[$FieldId], $Scores, $FieldId);
1014  }
1015 
1016  # handle any excluded phrases
1017  if (isset($Phrases[$FieldId]))
1018  {
1019  $Scores = $this->SearchForPhrases(
1020  $Phrases[$FieldId], $Scores,
1021  $FieldId, FALSE, TRUE);
1022  }
1023  }
1024  }
1025  $this->DMsg(3, "Have ".count($Scores)
1026  ." results after processing exclusions");
1027  }
1028 
1029  # strip off any results that don't contain required words
1030  $Scores = $this->FilterOnRequiredWords($Scores);
1031  }
1032 
1033  # return search result scores to caller
1034  return $Scores;
1035  }
1036 
1046  private function SearchForWords($Words, $FieldId, $Scores = NULL)
1047  {
1048  $DB = $this->DB;
1049 
1050  # start with empty search result scores list if none passed in
1051  if ($Scores == NULL)
1052  {
1053  $Scores = array();
1054  }
1055 
1056  # for each word
1057  foreach ($Words as $Word => $Flags)
1058  {
1059  unset($Counts);
1060  $this->DMsg(2, "Searching for word '${Word}' in field ".$FieldId);
1061 
1062  # if word is not excluded
1063  if (!($Flags & self::WORD_EXCLUDED))
1064  {
1065  # look up record ID for word
1066  $this->DMsg(2, "Looking up word \"".$Word."\"");
1067  $WordId = $this->GetWordId($Word);
1068 
1069  # if word is in DB
1070  if ($WordId !== NULL)
1071  {
1072  # look up counts for word
1073  $DB->Query("SELECT ItemId,Count FROM SearchWordCounts "
1074  ."WHERE WordId = ".$WordId
1075  ." AND FieldId = ".$FieldId);
1076  $Counts = $DB->FetchColumn("Count", "ItemId");
1077 
1078  # if synonym support is enabled
1079  if ($this->SynonymsEnabled)
1080  {
1081  # look for any synonyms
1082  $DB->Query("SELECT WordIdA, WordIdB"
1083  ." FROM SearchWordSynonyms"
1084  ." WHERE WordIdA = ".$WordId
1085  ." OR WordIdB = ".$WordId);
1086 
1087  # if synonyms were found
1088  if ($DB->NumRowsSelected())
1089  {
1090  # retrieve synonym IDs
1091  $SynonymIds = array();
1092  while ($Record = $DB->FetchRow())
1093  {
1094  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
1095  ? $Record["WordIdB"]
1096  : $Record["WordIdA"];
1097  }
1098 
1099  # for each synonym
1100  foreach ($SynonymIds as $SynonymId)
1101  {
1102  # retrieve counts for synonym
1103  $DB->Query("SELECT ItemId,Count"
1104  ." FROM SearchWordCounts"
1105  ." WHERE WordId = ".$SynonymId
1106  ." AND FieldId = ".$FieldId);
1107  $SynonymCounts = $DB->FetchColumn("Count", "ItemId");
1108 
1109  # for each count
1110  foreach ($SynonymCounts as $ItemId => $Count)
1111  {
1112  # adjust count because it's a synonym
1113  $AdjustedCount = ceil($Count / 2);
1114 
1115  # add count to existing counts
1116  if (isset($Counts[$ItemId]))
1117  {
1118  $Counts[$ItemId] += $AdjustedCount;
1119  }
1120  else
1121  {
1122  $Counts[$ItemId] = $AdjustedCount;
1123  }
1124  }
1125  }
1126  }
1127  }
1128  }
1129 
1130  # if stemming is enabled
1131  if ($this->StemmingEnabled)
1132  {
1133  # retrieve word stem
1134  $Stem = PorterStemmer::Stem($Word);
1135 
1136  # if stem was different from word
1137  if ($Stem != $Word)
1138  {
1139  # retrieve stem ID
1140  $this->DMsg(2, "Looking up stem \"".$Stem."\"");
1141  $StemId = $this->GetStemId($Stem);
1142 
1143  # if ID found for stem
1144  if ($StemId !== NULL)
1145  {
1146  # retrieve counts for stem
1147  $DB->Query("SELECT ItemId,Count"
1148  ." FROM SearchWordCounts"
1149  ." WHERE WordId = ".$StemId
1150  ." AND FieldId = ".$FieldId);
1151  $StemCounts = $DB->FetchColumn("Count", "ItemId");
1152 
1153  # for each count
1154  foreach ($StemCounts as $ItemId => $Count)
1155  {
1156  # adjust count because it's a stem
1157  $AdjustedCount = ceil($Count / 2);
1158 
1159  # add count to existing counts
1160  if (isset($Counts[$ItemId]))
1161  {
1162  $Counts[$ItemId] += $AdjustedCount;
1163  }
1164  else
1165  {
1166  $Counts[$ItemId] = $AdjustedCount;
1167  }
1168  }
1169  }
1170  }
1171  }
1172 
1173  # if counts were found
1174  if (isset($Counts))
1175  {
1176  # for each count
1177  foreach ($Counts as $ItemId => $Count)
1178  {
1179  # if word flagged as required
1180  if ($Flags & self::WORD_REQUIRED)
1181  {
1182  # increment required word count for record
1183  if (isset($this->RequiredTermCounts[$ItemId]))
1184  {
1185  $this->RequiredTermCounts[$ItemId]++;
1186  }
1187  else
1188  {
1189  $this->RequiredTermCounts[$ItemId] = 1;
1190  }
1191  }
1192 
1193  # add to item record score
1194  if (isset($Scores[$ItemId]))
1195  {
1196  $Scores[$ItemId] += $Count;
1197  }
1198  else
1199  {
1200  $Scores[$ItemId] = $Count;
1201  }
1202  }
1203  }
1204  }
1205  }
1206 
1207  # return basic scores to caller
1208  return $Scores;
1209  }
1210 
1218  private function ParseSearchStringForPhrases($SearchString, $Logic)
1219  {
1220  # split into chunks delimited by double quote marks
1221  $Pieces = explode("\"", $SearchString); # "
1222 
1223  # for each pair of chunks
1224  $Index = 2;
1225  $Phrases = array();
1226  while ($Index < count($Pieces))
1227  {
1228  # grab phrase from chunk
1229  $Phrase = trim(addslashes($Pieces[$Index - 1]));
1230  $Flags = self::WORD_PRESENT;
1231 
1232  # grab first character of phrase
1233  $FirstChar = substr($Pieces[$Index - 2], -1);
1234 
1235  # set flags to reflect any option characters
1236  if ($FirstChar == "-")
1237  {
1238  $Flags |= self::WORD_EXCLUDED;
1239  if (!isset($Phrases[$Phrase]))
1240  {
1241  $this->ExcludedTermCount++;
1242  }
1243  }
1244  else
1245  {
1246  if ((($Logic == "AND")
1247  && ($FirstChar != "~"))
1248  || ($FirstChar == "+"))
1249  {
1250  $Flags |= self::WORD_REQUIRED;
1251  if (!isset($Phrases[$Phrase]))
1252  {
1253  $this->RequiredTermCount++;
1254  }
1255  }
1256  if (!isset($Phrases[$Phrase]))
1257  {
1258  $this->InclusiveTermCount++;
1259  $this->SearchTermList[] = $Phrase;
1260  }
1261  }
1262  $Phrases[$Phrase] = $Flags;
1263 
1264  # move to next pair of chunks
1265  $Index += 2;
1266  }
1267 
1268  # return phrases to caller
1269  return $Phrases;
1270  }
1271 
1277  protected function SearchFieldForPhrases($FieldId, $Phrase)
1278  {
1279  # error out
1280  exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\n");
1281  }
1282 
1294  private function SearchForPhrases($Phrases, $Scores, $FieldId,
1295  $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
1296  {
1297  # if phrases are found
1298  if (count($Phrases) > 0)
1299  {
1300  # if this is a keyword search
1301  if ($FieldId == self::KEYWORD_FIELD_ID)
1302  {
1303  # for each field
1304  foreach ($this->FieldInfo as $KFieldId => $Info)
1305  {
1306  # if field is marked to be included in keyword searches
1307  if ($Info["InKeywordSearch"])
1308  {
1309  # call ourself with that field
1310  $Scores = $this->SearchForPhrases(
1311  $Phrases, $Scores, $KFieldId,
1312  $ProcessNonExcluded, $ProcessExcluded);
1313  }
1314  }
1315  }
1316  else
1317  {
1318  # for each phrase
1319  foreach ($Phrases as $Phrase => $Flags)
1320  {
1321  $this->DMsg(2, "Searching for phrase '".$Phrase
1322  ."' in field ".$FieldId);
1323 
1324  # if phrase flagged as excluded and we are doing excluded
1325  # phrases or phrase flagged as non-excluded and we
1326  # are doing non-excluded phrases
1327  if (($ProcessExcluded && ($Flags & self::WORD_EXCLUDED))
1328  || ($ProcessNonExcluded && !($Flags & self::WORD_EXCLUDED)))
1329  {
1330  # initialize score list if necessary
1331  if ($Scores === NULL) { $Scores = array(); }
1332 
1333  # retrieve list of items that contain phrase
1334  $ItemIds = $this->SearchFieldForPhrases(
1335  $FieldId, $Phrase);
1336 
1337  # for each item that contains phrase
1338  foreach ($ItemIds as $ItemId)
1339  {
1340  # if we are doing excluded phrases and phrase
1341  # is flagged as excluded
1342  if ($ProcessExcluded && ($Flags & self::WORD_EXCLUDED))
1343  {
1344  # knock item off of list
1345  unset($Scores[$ItemId]);
1346  }
1347  elseif ($ProcessNonExcluded)
1348  {
1349  # calculate phrase value based on number of
1350  # words and field weight
1351  $PhraseScore = count(preg_split("/[\s]+/",
1352  $Phrase, -1, PREG_SPLIT_NO_EMPTY))
1353  * $this->FieldInfo[$FieldId]["Weight"];
1354  $this->DMsg(2, "Phrase score is ".$PhraseScore);
1355 
1356  # bump up item record score
1357  if (isset($Scores[$ItemId]))
1358  {
1359  $Scores[$ItemId] += $PhraseScore;
1360  }
1361  else
1362  {
1363  $Scores[$ItemId] = $PhraseScore;
1364  }
1365 
1366  # if phrase flagged as required
1367  if ($Flags & self::WORD_REQUIRED)
1368  {
1369  # increment required word count for record
1370  if (isset($this->RequiredTermCounts[$ItemId]))
1371  {
1372  $this->RequiredTermCounts[$ItemId]++;
1373  }
1374  else
1375  {
1376  $this->RequiredTermCounts[$ItemId] = 1;
1377  }
1378  }
1379  }
1380  }
1381  }
1382  }
1383  }
1384  }
1385 
1386  # return updated scores to caller
1387  return $Scores;
1388  }
1389 
1398  private function FilterOnExcludedWords($Words, $Scores, $FieldId)
1399  {
1400  $DB = $this->DB;
1401 
1402  # for each word
1403  foreach ($Words as $Word => $Flags)
1404  {
1405  # if word flagged as excluded
1406  if ($Flags & self::WORD_EXCLUDED)
1407  {
1408  # look up record ID for word
1409  $WordId = $this->GetWordId($Word);
1410 
1411  # if word is in DB
1412  if ($WordId !== NULL)
1413  {
1414  # look up counts for word
1415  $DB->Query("SELECT ItemId FROM SearchWordCounts "
1416  ."WHERE WordId=${WordId} AND FieldId=${FieldId}");
1417 
1418  # for each count
1419  while ($Record = $DB->FetchRow())
1420  {
1421  # if item record is in score list
1422  $ItemId = $Record["ItemId"];
1423  if (isset($Scores[$ItemId]))
1424  {
1425  # remove item record from score list
1426  $this->DMsg(3, "Filtering out item ".$ItemId
1427  ." because it contained word \"".$Word."\"");
1428  unset($Scores[$ItemId]);
1429  }
1430  }
1431  }
1432  }
1433  }
1434 
1435  # returned filtered score list to caller
1436  return $Scores;
1437  }
1438 
1444  private function FilterOnRequiredWords($Scores)
1445  {
1446  # if there were required words
1447  if ($this->RequiredTermCount > 0)
1448  {
1449  # for each item
1450  foreach ($Scores as $ItemId => $Score)
1451  {
1452  # if item does not meet required word count
1453  if (!isset($this->RequiredTermCounts[$ItemId])
1454  || ($this->RequiredTermCounts[$ItemId]
1455  < $this->RequiredTermCount))
1456  {
1457  # filter out item
1458  $this->DMsg(4, "Filtering out item ".$ItemId
1459  ." because it didn't have required word count of "
1460  .$this->RequiredTermCount
1461  .(isset($this->RequiredTermCounts[$ItemId])
1462  ? " (only had "
1463  .$this->RequiredTermCounts[$ItemId]
1464  : " (had none")
1465  .")");
1466  unset($Scores[$ItemId]);
1467  }
1468  }
1469  }
1470 
1471  # return filtered list to caller
1472  return $Scores;
1473  }
1474 
1487  private function CleanScores($Scores, $StartingResult, $NumberOfResults,
1488  $SortByField, $SortDescending)
1489  {
1490  # perform any requested filtering
1491  $this->DMsg(0, "Have ".count($Scores)." results before filter callbacks");
1492  $Scores = $this->FilterOnSuppliedFunctions($Scores);
1493 
1494  # save total number of results available
1495  $this->NumberOfResultsAvailable = count($Scores);
1496 
1497  # sort search scores into item type bins
1498  $NewScores = array();
1499  foreach ($Scores as $Id => $Score)
1500  {
1501  $ItemType = $this->GetItemType($Id);
1502  if ($ItemType !== NULL)
1503  {
1504  $NewScores[$ItemType][$Id] = $Score;
1505  }
1506  }
1507  $Scores = $NewScores;
1508 
1509  # for each item type
1510  $NewSortByField = array();
1511  $NewSortDescending = array();
1512  foreach ($Scores as $ItemType => $TypeScores)
1513  {
1514  # normalize sort field parameter
1515  $NewSortByField[$ItemType] = !is_array($SortByField) ? $SortByField
1516  : (isset($SortByField[$ItemType])
1517  ? $SortByField[$ItemType] : NULL);
1518 
1519  # normalize sort direction parameter
1520  $NewSortDescending[$ItemType] = !is_array($SortDescending) ? $SortDescending
1521  : (isset($SortDescending[$ItemType])
1522  ? $SortDescending[$ItemType] : TRUE);
1523  }
1524  $SortByField = $NewSortByField;
1525  $SortDescending = $NewSortDescending;
1526 
1527  # for each item type
1528  foreach ($Scores as $ItemType => $TypeScores)
1529  {
1530  # save number of results
1531  $this->NumberOfResultsPerItemType[$ItemType] = count($TypeScores);
1532 
1533  # if no sorting field specified
1534  if ($SortByField[$ItemType] === NULL)
1535  {
1536  # sort result list by score
1537  if ($SortDescending[$ItemType])
1538  {
1539  arsort($Scores[$ItemType], SORT_NUMERIC);
1540  }
1541  else
1542  {
1543  asort($Scores[$ItemType], SORT_NUMERIC);
1544  }
1545  }
1546  else
1547  {
1548  # get list of item IDs in sorted order
1549  $SortedIds = $this->GetItemIdsSortedByField($ItemType,
1550  $SortByField[$ItemType], $SortDescending[$ItemType]);
1551 
1552  # if we have sorted item IDs
1553  if (count($SortedIds) && count($TypeScores))
1554  {
1555  # strip sorted ID list down to those that appear in search results
1556  $SortedIds = array_intersect($SortedIds,
1557  array_keys($TypeScores));
1558 
1559  # rebuild score list in sorted order
1560  $NewScores = array();
1561  foreach ($SortedIds as $Id)
1562  {
1563  $NewScores[$Id] = $TypeScores[$Id];
1564  }
1565  $Scores[$ItemType] = $NewScores;
1566  }
1567  else
1568  {
1569  # sort result list by score
1570  arsort($Scores[$ItemType], SORT_NUMERIC);
1571  }
1572  }
1573 
1574  # if subset of scores requested
1575  if (($StartingResult > 0) || ($NumberOfResults < PHP_INT_MAX))
1576  {
1577  # trim scores back to requested subset
1578  $ScoresKeys = array_slice(array_keys($Scores[$ItemType]),
1579  $StartingResult, $NumberOfResults);
1580  $NewScores = array();
1581  foreach ($ScoresKeys as $Key)
1582  {
1583  $NewScores[$Key] = $Scores[$ItemType][$Key];
1584  }
1585  $Scores[$ItemType] = $NewScores;
1586  }
1587  }
1588 
1589  # returned cleaned search result scores list to caller
1590  return $Scores;
1591  }
1592 
1598  protected function FilterOnSuppliedFunctions($Scores)
1599  {
1600  # if filter functions have been set
1601  if (isset($this->FilterFuncs))
1602  {
1603  # for each result
1604  foreach ($Scores as $ItemId => $Score)
1605  {
1606  # for each filter function
1607  foreach ($this->FilterFuncs as $FuncName)
1608  {
1609  # if filter function return TRUE for item
1610  if (call_user_func($FuncName, $ItemId))
1611  {
1612  # discard result
1613  $this->DMsg(2, "Filter callback <i>".$FuncName
1614  ."</i> rejected item ".$ItemId);
1615  unset($Scores[$ItemId]);
1616 
1617  # bail out of filter func loop
1618  continue 2;
1619  }
1620  }
1621  }
1622  }
1623 
1624  # return filtered list to caller
1625  return $Scores;
1626  }
1627 
1637  private function SearchForComparisonMatches($SearchStrings, $Logic, $Scores)
1638  {
1639  # for each field
1640  $Index = 0;
1641  foreach ($SearchStrings as $SearchFieldId => $SearchStringArray)
1642  {
1643  # if field is not keyword
1644  if ($SearchFieldId != self::KEYWORD_FIELD_ID)
1645  {
1646  # for each search string for this field
1647  foreach ($SearchStringArray as $SearchString)
1648  {
1649  # look for comparison operators
1650  $FoundOperator = preg_match(
1651  self::COMPARISON_OPERATOR_PATTERN,
1652  $SearchString, $Matches);
1653 
1654  # if a comparison operator was found
1655  # or this is a field type that is always a comparison search
1656  if ($FoundOperator ||
1657  ($this->FieldInfo[$SearchFieldId]["FieldType"]
1658  != self::FIELDTYPE_TEXT))
1659  {
1660  # determine value to compare against
1661  $Value = trim(preg_replace(
1662  self::COMPARISON_OPERATOR_PATTERN, '\2',
1663  $SearchString));
1664 
1665  # if no comparison operator was found
1666  if (!$FoundOperator)
1667  {
1668  # assume comparison is equality
1669  $Operators[$Index] = "=";
1670  }
1671  else
1672  {
1673  # use operator from comparison match
1674  $Operators[$Index] = $Matches[1];
1675  }
1676 
1677  # if operator was found
1678  if (isset($Operators[$Index]))
1679  {
1680  # save value
1681  $Values[$Index] = $Value;
1682 
1683  # save field name
1684  $FieldIds[$Index] = $SearchFieldId;
1685  $this->DMsg(3, "Added comparison (field = <i>"
1686  .$FieldIds[$Index]."</i> op = <i>"
1687  .$Operators[$Index]."</i> val = <i>"
1688  .$Values[$Index]."</i>)");
1689 
1690  # move to next comparison array entry
1691  $Index++;
1692  }
1693  }
1694  }
1695  }
1696  }
1697 
1698  # if comparisons found
1699  if (isset($Operators))
1700  {
1701  # perform comparisons on fields and gather results
1702  $Results = $this->SearchFieldsForComparisonMatches(
1703  $FieldIds, $Operators, $Values, $Logic);
1704 
1705  # if search logic is set to AND
1706  if ($Logic == "AND")
1707  {
1708  # if results were found
1709  if (count($Results))
1710  {
1711  # if there were no prior results and no terms for keyword search
1712  if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0))
1713  {
1714  # add all results to scores
1715  foreach ($Results as $ItemId)
1716  {
1717  $Scores[$ItemId] = 1;
1718  }
1719  }
1720  else
1721  {
1722  # remove anything from scores that is not part of results
1723  foreach ($Scores as $ItemId => $Score)
1724  {
1725  if (in_array($ItemId, $Results) == FALSE)
1726  {
1727  unset($Scores[$ItemId]);
1728  }
1729  }
1730  }
1731  }
1732  else
1733  {
1734  # clear scores
1735  $Scores = array();
1736  }
1737  }
1738  else
1739  {
1740  # add result items to scores
1741  if ($Scores === NULL) { $Scores = array(); }
1742  foreach ($Results as $ItemId)
1743  {
1744  if (isset($Scores[$ItemId]))
1745  {
1746  $Scores[$ItemId] += 1;
1747  }
1748  else
1749  {
1750  $Scores[$ItemId] = 1;
1751  }
1752  }
1753  }
1754  }
1755 
1756  # return results to caller
1757  return $Scores;
1758  }
1759 
1767  private function SetDebugLevel($SearchStrings)
1768  {
1769  # if search info is an array
1770  if (is_array($SearchStrings))
1771  {
1772  # for each array element
1773  foreach ($SearchStrings as $FieldId => $SearchStringArray)
1774  {
1775  # if element is an array
1776  if (is_array($SearchStringArray))
1777  {
1778  # for each array element
1779  foreach ($SearchStringArray as $Index => $SearchString)
1780  {
1781  # pull out search string if present
1782  $SearchStrings[$FieldId][$Index] =
1783  $this->ExtractDebugLevel($SearchString);
1784  }
1785  }
1786  else
1787  {
1788  # pull out search string if present
1789  $SearchStrings[$FieldId] =
1790  $this->ExtractDebugLevel($SearchStringArray);
1791  }
1792  }
1793  }
1794  else
1795  {
1796  # pull out search string if present
1797  $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
1798  }
1799 
1800  # return new search info to caller
1801  return $SearchStrings;
1802  }
1803 
1810  private function ExtractDebugLevel($SearchString)
1811  {
1812  # if search string contains debug level indicator
1813  if (strstr($SearchString, "DBUGLVL="))
1814  {
1815  # remove indicator and set debug level
1816  $Level = preg_replace("/^\\s*DBUGLVL=([1-9]{1,2}).*/", "\\1", $SearchString);
1817  if ($Level > 0)
1818  {
1819  $this->DebugLevel = $Level;
1820  $this->DMsg(0, "Setting debug level to ".$Level);
1821  $SearchString = preg_replace("/\s*DBUGLVL=${Level}\s*/", "",
1822  $SearchString);
1823  }
1824  }
1825 
1826  # return (possibly) modified search string to caller
1827  return $SearchString;
1828  }
1829 
1835  private function LoadScoresForAllRecords($ItemTypes)
1836  {
1837  # if no item types were provided return an empty array
1838  if (count($ItemTypes)==0)
1839  {
1840  return [];
1841  }
1842 
1843  # get all the ItemIds belonging to the given types
1844  $this->DB->Query("SELECT ".$this->ItemIdFieldName." AS ItemId"
1845  ." FROM ".$this->ItemTableName
1846  ." WHERE ".$this->ItemTypeFieldName." IN(".implode(",", $ItemTypes).")");
1847 
1848  # return array with all scores to caller
1849  return array_fill_keys($this->DB->FetchColumn("ItemId"), 1);
1850  }
1851 
1852  # ---- private methods (search DB building)
1853 
1861  private function UpdateWordCount($Word, $ItemId, $FieldId, $Weight = 1)
1862  {
1863  # retrieve ID for word
1864  $WordIds[] = $this->GetWordId($Word, TRUE);
1865 
1866  # if stemming is enabled and word looks appropriate for stemming
1867  if ($this->StemmingEnabled && !is_numeric($Word))
1868  {
1869  # retrieve stem of word
1870  $Stem = PorterStemmer::Stem($Word, TRUE);
1871 
1872  # if stem is different
1873  if ($Stem != $Word)
1874  {
1875  # retrieve ID for stem of word
1876  $WordIds[] = $this->GetStemId($Stem, TRUE);
1877  }
1878  }
1879 
1880  # for word and stem of word
1881  foreach ($WordIds as $WordId)
1882  {
1883  # if word count already added to database
1884  if (isset($this->WordCountAdded[$WordId][$FieldId]))
1885  {
1886  # update word count
1887  $this->DB->Query("UPDATE SearchWordCounts SET Count=Count+".$Weight
1888  ." WHERE WordId=".$WordId
1889  ." AND ItemId=".$ItemId
1890  ." AND FieldId=".$FieldId);
1891  }
1892  else
1893  {
1894  # add word count to DB
1895  $this->DB->Query("INSERT INTO SearchWordCounts"
1896  ." (WordId, ItemId, FieldId, Count) VALUES"
1897  ." (".$WordId.", ".$ItemId.", ".$FieldId.", ".$Weight.")");
1898 
1899  # remember that we added count for this word
1900  $this->WordCountAdded[$WordId][$FieldId] = TRUE;
1901  }
1902 
1903  # decrease weight for stem
1904  $Weight = ceil($Weight / 2);
1905  }
1906  }
1907 
1913  protected function GetFieldContent($ItemId, $FieldId)
1914  {
1915  # error out
1916  throw Exception("GetFieldContent() not implemented.");
1917  }
1918 
1928  private function RecordSearchInfoForText(
1929  $ItemId, $FieldId, $Weight, $Text, $IncludeInKeyword)
1930  {
1931  # normalize text
1932  $Words = $this->ParseSearchStringForWords($Text, "OR", TRUE);
1933 
1934  # if there was text left after parsing
1935  if (count($Words) > 0)
1936  {
1937  # for each word
1938  foreach ($Words as $Word => $Flags)
1939  {
1940  # update count for word
1941  $this->UpdateWordCount($Word, $ItemId, $FieldId);
1942 
1943  # if text should be included in keyword searches
1944  if ($IncludeInKeyword)
1945  {
1946  # update keyword field count for word
1947  $this->UpdateWordCount(
1948  $Word, $ItemId, self::KEYWORD_FIELD_ID, $Weight);
1949  }
1950  }
1951  }
1952  }
1953 
1954  # ---- common private methods (used in both searching and DB build)
1955 
1966  private function ParseSearchStringForWords(
1967  $SearchString, $Logic, $IgnorePhrases = FALSE)
1968  {
1969  # strip off any surrounding whitespace
1970  $Text = trim($SearchString);
1971 
1972  # define phrase and group search patterns separately, so that we can
1973  # later replace them easily if necessary
1974  $PhraseSearchPattern = "/\"[^\"]*\"/";
1975  $GroupSearchPattern = "/\\([^)]*\\)/";
1976 
1977  # set up search string normalization replacement strings (NOTE: these
1978  # are performed in sequence, so the order IS SIGNIFICANT)
1979  $ReplacementPatterns = array(
1980  # get rid of possessive plurals
1981  "/'s[^a-z0-9\\-+~]+/i" => " ",
1982  # get rid of single quotes / apostrophes
1983  "/'/" => "",
1984  # get rid of phrases
1985  $PhraseSearchPattern => " ",
1986  # get rid of groups
1987  $GroupSearchPattern => " ",
1988  # convert everything but alphanumerics and minus/plus/tilde to a space
1989  "/[^a-z0-9\\-+~]+/i" => "\\1 ",
1990  # truncate any runs of minus/plus/tilde to just the first char
1991  "/([~+-])[~+-]+/" => "\\1",
1992  # convert two alphanumerics segments separated by a minus into
1993  # both separate words and a single combined word
1994  "/([~+-]?)([a-z0-9]+)-([a-z0-9]+)/i" => "\\1\\2 \\1\\3 \\1\\2\\3",
1995  # convert minus/plus/tilde preceded by anything but whitespace to a space
1996  "/([^\\s])[~+-]+/i" => "\\1 ",
1997  # convert minus/plus/tilde followed by whitespace to a space
1998  "/[~+-]+\\s/i" => " ",
1999  # convert multiple spaces to one space
2000  "/[ ]+/" => " ",
2001  );
2002 
2003  # if we are supposed to ignore phrasing (series of words in quotes)
2004  # and grouping (series of words surrounded by parens)
2005  if ($IgnorePhrases)
2006  {
2007  # switch phrase removal to double quote removal
2008  # and switch group removal to paren removal
2009  foreach ($ReplacementPatterns as $Pattern => $Replacement)
2010  {
2011  if ($Pattern == $PhraseSearchPattern)
2012  {
2013  $Pattern = "/\"/";
2014  }
2015  elseif ($Pattern == $GroupSearchPattern)
2016  {
2017  $Pattern = "/[\(\)]+/";
2018  }
2019  $NewReplacementPatterns[$Pattern] = $Replacement;
2020  }
2021  $ReplacementPatterns = $NewReplacementPatterns;
2022  }
2023 
2024  # remove punctuation from text and normalize whitespace
2025  $Text = preg_replace(array_keys($ReplacementPatterns),
2026  $ReplacementPatterns, $Text);
2027  $this->DMsg(2, "Normalized search string is '".$Text."'");
2028 
2029  # convert text to lower case
2030  $Text = strtolower($Text);
2031 
2032  # strip off any extraneous whitespace
2033  $Text = trim($Text);
2034 
2035  # start with an empty array
2036  $Words = array();
2037 
2038  # if we have no words left after parsing
2039  if (strlen($Text) != 0)
2040  {
2041  # for each word
2042  foreach (explode(" ", $Text) as $Word)
2043  {
2044  # grab first character of word
2045  $FirstChar = substr($Word, 0, 1);
2046 
2047  # strip off option characters and set flags appropriately
2048  $Flags = self::WORD_PRESENT;
2049  if ($FirstChar == "-")
2050  {
2051  $Word = substr($Word, 1);
2052  $Flags |= self::WORD_EXCLUDED;
2053  if (!isset($Words[$Word]))
2054  {
2055  $this->ExcludedTermCount++;
2056  }
2057  }
2058  else
2059  {
2060  if ($FirstChar == "~")
2061  {
2062  $Word = substr($Word, 1);
2063  }
2064  elseif (($Logic == "AND")
2065  || ($FirstChar == "+"))
2066  {
2067  if ($FirstChar == "+")
2068  {
2069  $Word = substr($Word, 1);
2070  }
2071  $Flags |= self::WORD_REQUIRED;
2072  if (!isset($Words[$Word]))
2073  {
2074  $this->RequiredTermCount++;
2075  }
2076  }
2077  if (!isset($Words[$Word]))
2078  {
2079  $this->InclusiveTermCount++;
2080  $this->SearchTermList[] = $Word;
2081  }
2082  }
2083 
2084  # store flags to indicate word found
2085  $Words[$Word] = $Flags;
2086  $this->DMsg(3, "Word identified (".$Word.")");
2087  }
2088  }
2089 
2090  # return normalized words to caller
2091  return $Words;
2092  }
2093 
2101  private function GetWordId($Word, $AddIfNotFound = FALSE)
2102  {
2103  static $WordIdCache;
2104 
2105  # if word was in ID cache
2106  if (isset($WordIdCache[$Word]))
2107  {
2108  # use ID from cache
2109  $WordId = $WordIdCache[$Word];
2110  }
2111  else
2112  {
2113  # look up ID in database
2114  $WordId = $this->DB->Query("SELECT WordId"
2115  ." FROM SearchWords"
2116  ." WHERE WordText='".addslashes($Word)."'",
2117  "WordId");
2118 
2119  # if ID was not found and caller requested it be added
2120  if (($WordId === NULL) && $AddIfNotFound)
2121  {
2122  # add word to database
2123  $this->DB->Query("INSERT INTO SearchWords (WordText)"
2124  ." VALUES ('".addslashes(strtolower($Word))."')");
2125 
2126  # get ID for newly added word
2127  $WordId = $this->DB->LastInsertId();
2128  }
2129 
2130  # save ID to cache
2131  $WordIdCache[$Word] = $WordId;
2132  }
2133 
2134  # return ID to caller
2135  return $WordId;
2136  }
2137 
2145  private function GetStemId($Stem, $AddIfNotFound = FALSE)
2146  {
2147  static $StemIdCache;
2148 
2149  # if stem was in ID cache
2150  if (isset($StemIdCache[$Stem]))
2151  {
2152  # use ID from cache
2153  $StemId = $StemIdCache[$Stem];
2154  }
2155  else
2156  {
2157  # look up ID in database
2158  $StemId = $this->DB->Query("SELECT WordId"
2159  ." FROM SearchStems"
2160  ." WHERE WordText='".addslashes($Stem)."'",
2161  "WordId");
2162 
2163  # if ID was not found and caller requested it be added
2164  if (($StemId === NULL) && $AddIfNotFound)
2165  {
2166  # add stem to database
2167  $this->DB->Query("INSERT INTO SearchStems (WordText)"
2168  ." VALUES ('".addslashes(strtolower($Stem))."')");
2169 
2170  # get ID for newly added stem
2171  $StemId = $this->DB->LastInsertId();
2172  }
2173 
2174  # adjust from DB ID value to stem ID value
2175  $StemId += self::STEM_ID_OFFSET;
2176 
2177  # save ID to cache
2178  $StemIdCache[$Stem] = $StemId;
2179  }
2180 
2181  # return ID to caller
2182  return $StemId;
2183  }
2184 
2190  private function GetWord($WordId)
2191  {
2192  static $WordCache;
2193 
2194  # if word was in cache
2195  if (isset($WordCache[$WordId]))
2196  {
2197  # use word from cache
2198  $Word = $WordCache[$WordId];
2199  }
2200  else
2201  {
2202  # adjust search location and word ID if word is stem
2203  $TableName = "SearchWords";
2204  if ($WordId >= self::STEM_ID_OFFSET)
2205  {
2206  $TableName = "SearchStems";
2207  $WordId -= self::STEM_ID_OFFSET;
2208  }
2209 
2210  # look up word in database
2211  $Word = $this->DB->Query("SELECT WordText"
2212  ." FROM ".$TableName
2213  ." WHERE WordId='".$WordId."'",
2214  "WordText");
2215 
2216  # save word to cache
2217  $WordCache[$WordId] = $Word;
2218  }
2219 
2220  # return word to caller
2221  return $Word;
2222  }
2223 
2229  private function GetItemType($ItemId)
2230  {
2231  static $ItemTypeCache;
2232  if (!isset($ItemTypeCache))
2233  {
2234  $this->DB->Query("SELECT * FROM SearchItemTypes");
2235  $ItemTypeCache = $this->DB->FetchColumn("ItemType", "ItemId");
2236  }
2237  return isset($ItemTypeCache[$ItemId])
2238  ? (int)$ItemTypeCache[$ItemId] : NULL;
2239  }
2240 
2246  protected function DMsg($Level, $Msg)
2247  {
2248  if ($this->DebugLevel > $Level)
2249  {
2250  print "SE: ".$Msg."<br>\n";
2251  }
2252  }
2253 
2254  # ---- BACKWARD COMPATIBILITY --------------------------------------------
2255 
2256  # possible types of logical operators
2257  const SEARCHLOGIC_AND = 1;
2258  const SEARCHLOGIC_OR = 2;
2259 
2260  # pattern to detect search strings that are explicit comparisons
2261  const COMPARISON_OPERATOR_PATTERN = '/^([><=^$@]+|!=)([^><=^$@])/';
2262 }
SearchTermCount()
Get total number of search terms indexed by search engine.
SetAllSynonyms($SynonymList)
Set all synonyms.
DropItem($ItemId)
Drop all data pertaining to item from search database.
AddField($FieldId, $FieldType, $ItemTypes, $Weight, $UsedInKeywordSearch)
Add field to include in searching.
RemoveSynonyms($Word, $Synonyms=NULL)
Remove synonym(s).
const KEYWORD_FIELD_ID
LoadSynonymsFromFile($FileName)
Load synonyms from a file.
Set of parameters used to perform a search.
SQL database abstraction object with smart query caching.
Definition: Database.php:22
SearchFieldForPhrases($FieldId, $Phrase)
Search for phrase in specified field.
GetAllSynonyms()
Get all synonyms.
const SEARCHLOGIC_OR
const FIELDTYPE_NUMERIC
FilterOnSuppliedFunctions($Scores)
Filter search scores through any supplied functions.
UpdateForItem($ItemId, $ItemType)
Update search database for the specified item.
AddSynonyms($Word, $Synonyms)
Add synonyms.
const FIELDTYPE_DATERANGE
const FIELDTYPE_DATE
const SEARCHLOGIC_AND
SearchTerms()
Get normalized list of search terms.
const WORD_EXCLUDED
NumberOfResults($ItemType=NULL)
Get number of results found by most recent search.
FieldWeight($FieldId)
Get search weight for specified field.
FieldType($FieldId)
Get type of specified field (text/numeric/date/daterange).
ItemCount()
Get total number of items indexed by search engine.
FieldedSearch($SearchStrings, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
Perform search across multiple fields, with different values or comparisons specified for each field...
__construct($ItemTableName, $ItemIdFieldName, $ItemTypeFieldName)
Object constructor.
const STEM_ID_OFFSET
Search($SearchParams, $StartingResult=0, $NumberOfResults=PHP_INT_MAX, $SortByField=NULL, $SortDescending=TRUE)
Perform search with specified parameters.
RemoveAllSynonyms()
Remove all synonyms.
DMsg($Level, $Msg)
Print debug message if level set high enough.
const WORD_PRESENT
DropField($FieldId)
Drop all data pertaining to field from search database.
GetFieldContent($ItemId, $FieldId)
Retrieve content for specified field for specified item.
Core metadata archive search engine class.
const COMPARISON_OPERATOR_PATTERN
const FIELDTYPE_TEXT
const WORD_REQUIRED
DebugLevel($NewValue)
Set debug output level.
UpdateForItems($StartingItemId, $NumberOfItems)
Update search database for the specified range of items.
FieldedSearchWeightScale($SearchParams)
Get total of weights for all fields involved in search, useful for assessing scale of scores in searc...
FieldInKeywordSearch($FieldId)
Get whether specified field is included in keyword searches.
AddResultFilterFunction($FunctionName)
Add function that will be called to filter search results.
SearchTime()
Get time that last search took, in seconds.
GetSynonyms($Word)
Get synonyms for word.