3 # FILE: SPTSearchEngine.php
5 # Part of the Collection Workflow Integration System (CWIS)
6 # Copyright 2011-2013 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu/cwis/
14 # create a database handle
17 # pass database handle and config values to real search engine object
26 # for each field defined in schema
28 $Fields = $this->Schema->GetFields();
29 foreach ($Fields as $Field)
31 # determine field type for searching
32 switch ($Field->Type())
44 $FieldType = self::FIELDTYPE_TEXT;
49 $FieldType = self::FIELDTYPE_NUMERIC;
53 $FieldType = self::FIELDTYPE_DATERANGE;
57 $FieldType = self::FIELDTYPE_DATE;
65 exit(
"ERROR: unknown field type "
66 .$Field->Type().
" in SPTSearchEngine.php");
70 if ($FieldType !== NULL)
72 # add field to search engine
73 $this->
AddField($Field->Name(), $Field->DBFieldName(), $FieldType,
74 $Field->SearchWeight(), $Field->IncludeInKeywordSearch());
79 # overloaded version of method to retrieve text from DB
85 # retrieve text (including variants) from resource object and return to caller
86 return $Resource->Get($FieldName, FALSE, TRUE);
89 # overloaded version of method to retrieve resource/phrase match list
92 # normalize and escape search phrase for use in SQL query
93 $SearchPhrase = strtolower(addslashes($Phrase));
95 # query DB for matching list based on field type
96 $Field = $this->Schema->GetFieldByName($FieldName);
97 switch ($Field->Type())
103 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
104 .
"WHERE POSITION('".$SearchPhrase.
"'"
105 .
" IN LOWER(`".$Field->DBFieldName().
"`)) ";
109 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
110 .
"WHERE POSITION('".$SearchPhrase.
"'"
111 .
" IN LOWER(`".$Field->DBFieldName().
"AltText`)) ";
115 $NameTableSize = $this->DB->Query(
"SELECT COUNT(*) AS NameCount"
116 .
" FROM ControlledNames",
"NameCount");
117 $QueryString =
"SELECT DISTINCT ResourceNameInts.ResourceId "
118 .
"FROM ResourceNameInts, ControlledNames "
119 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(ControlledName)) "
120 .
"AND ControlledNames.ControlledNameId"
121 .
" = ResourceNameInts.ControlledNameId "
122 .
"AND ControlledNames.FieldId = ".$Field->Id();
123 $SecondQueryString =
"SELECT DISTINCT ResourceNameInts.ResourceId "
124 .
"FROM ResourceNameInts, ControlledNames, VariantNames "
125 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(VariantName)) "
126 .
"AND VariantNames.ControlledNameId"
127 .
" = ResourceNameInts.ControlledNameId "
128 .
"AND ControlledNames.ControlledNameId"
129 .
" = ResourceNameInts.ControlledNameId "
130 .
"AND ControlledNames.FieldId = ".$Field->Id();
134 $QueryString =
"SELECT DISTINCT ResourceNameInts.ResourceId "
135 .
"FROM ResourceNameInts, ControlledNames "
136 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(ControlledName)) "
137 .
"AND ControlledNames.ControlledNameId = ResourceNameInts.ControlledNameId "
138 .
"AND ControlledNames.FieldId = ".$Field->Id();
142 $QueryString =
"SELECT DISTINCT ResourceClassInts.ResourceId "
143 .
"FROM ResourceClassInts, Classifications "
144 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(ClassificationName)) "
145 .
"AND Classifications.ClassificationId = ResourceClassInts.ClassificationId "
146 .
"AND Classifications.FieldId = ".$Field->Id();
150 $UserId = $this->DB->Query(
"SELECT UserId FROM APUsers "
151 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(UserName)) "
152 .
"OR POSITION('".$SearchPhrase.
"' IN LOWER(RealName))",
"UserId");
155 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
156 .
"WHERE `".$Field->DBFieldName().
"` = ".$UserId;
161 if ($SearchPhrase > 0)
163 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
164 .
"WHERE `".$Field->DBFieldName().
"` = ".(int)$SearchPhrase;
172 # (these types not yet handled by search engine for phrases)
176 # build match list based on results returned from DB
177 if (isset($QueryString))
179 $this->
DMsg(7,
"Performing phrase search query (<i>".$QueryString.
"</i>)");
180 if ($this->
DebugLevel > 9) { $StartTime = microtime(TRUE); }
181 $this->DB->Query($QueryString);
184 $EndTime = microtime(TRUE);
185 if (($StartTime - $EndTime) > 0.1)
187 printf(
"SE: Query took %.2f seconds<br>\n",
188 ($EndTime - $StartTime));
191 $MatchList = $this->DB->FetchColumn(
"ResourceId");
192 if (isset($SecondQueryString))
194 $this->
DMsg(7,
"Performing second phrase search query"
195 .
" (<i>".$SecondQueryString.
"</i>)");
196 if ($this->
DebugLevel > 9) { $StartTime = microtime(TRUE); }
197 $this->DB->Query($SecondQueryString);
200 $EndTime = microtime(TRUE);
201 if (($StartTime - $EndTime) > 0.1)
203 printf(
"SE: query took %.2f seconds<br>\n",
204 ($EndTime - $StartTime));
207 $MatchList = $MatchList + $this->DB->FetchColumn(
"ResourceId");
212 $MatchList = array();
215 # return list of matching resources to caller
219 # search field for records that meet comparison
222 # use SQL keyword appropriate to current search logic for combining operations
225 # for each comparison
226 foreach ($FieldNames as $Index => $FieldName)
228 $Operator = $Operators[$Index];
229 $Value = $Values[$Index];
231 # determine query based on field type
232 $Field = $this->Schema->GetFieldByName($FieldName);
235 switch ($Field->Type())
243 if (isset($Queries[
"Resources"]))
245 $Queries[
"Resources"] .= $CombineWord;
249 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId FROM Resources WHERE ";
253 $User =
new CWUser($Value);
254 $Value = $User->Id();
256 $Queries[
"Resources"] .=
"`".$Field->DBFieldName().
"` ".$Operator.
" '".addslashes($Value).
"' ";
260 $QueryIndex =
"ResourceNameInts".$Field->Id();
261 if (!isset($Queries[$QueryIndex][
"A"]))
263 $Queries[$QueryIndex][
"A"] =
264 "SELECT DISTINCT ResourceId"
265 .
" FROM ResourceNameInts, ControlledNames "
266 .
" WHERE ControlledNames.FieldId = ".$Field->Id()
268 $CloseQuery[$QueryIndex][
"A"] = TRUE;
269 $ComparisonCount[$QueryIndex][
"A"] = 1;
270 $ComparisonCountField[$QueryIndex][
"A"] =
"ControlledName";
274 $Queries[$QueryIndex][
"A"] .=
" OR ";
275 $ComparisonCount[$QueryIndex][
"A"]++;
277 $Queries[$QueryIndex][
"A"] .=
278 "((ResourceNameInts.ControlledNameId"
279 .
" = ControlledNames.ControlledNameId"
280 .
" AND ControlledName "
281 .$Operator.
" '".addslashes($Value).
"'))";
282 if (!isset($Queries[$QueryIndex][
"B"]))
284 $Queries[$QueryIndex][
"B"] =
285 "SELECT DISTINCT ResourceId"
286 .
" FROM ResourceNameInts, ControlledNames,"
288 .
" WHERE ControlledNames.FieldId = ".$Field->Id()
290 $CloseQuery[$QueryIndex][
"B"] = TRUE;
291 $ComparisonCount[$QueryIndex][
"B"] = 1;
292 $ComparisonCountField[$QueryIndex][
"B"] =
"ControlledName";
296 $Queries[$QueryIndex][
"B"] .=
" OR ";
297 $ComparisonCount[$QueryIndex][
"B"]++;
299 $Queries[$QueryIndex][
"B"] .=
300 "((ResourceNameInts.ControlledNameId"
301 .
" = ControlledNames.ControlledNameId"
302 .
" AND ResourceNameInts.ControlledNameId"
303 .
" = VariantNames.ControlledNameId"
305 .$Operator.
" '".addslashes($Value).
"'))";
309 $QueryIndex =
"ResourceNameInts".$Field->Id();
310 if (!isset($Queries[$QueryIndex]))
312 $Queries[$QueryIndex] =
313 "SELECT DISTINCT ResourceId FROM ResourceNameInts, ControlledNames "
314 .
" WHERE ControlledNames.FieldId = ".$Field->Id()
316 $CloseQuery[$QueryIndex] = TRUE;
317 $ComparisonCount[$QueryIndex] = 1;
318 $ComparisonCountField[$QueryIndex] =
"ControlledName";
322 $Queries[$QueryIndex] .=
" OR ";
323 $ComparisonCount[$QueryIndex]++;
325 $Queries[$QueryIndex] .=
"(ResourceNameInts.ControlledNameId = ControlledNames.ControlledNameId"
326 .
" AND ControlledName ".$Operator.
" '".addslashes($Value).
"')";
330 $QueryIndex =
"ResourceClassInts".$Field->Id();
331 if (!isset($Queries[$QueryIndex]))
333 $Queries[$QueryIndex] =
"SELECT DISTINCT ResourceId FROM ResourceClassInts, Classifications "
334 .
" WHERE ResourceClassInts.ClassificationId = Classifications.ClassificationId"
335 .
" AND Classifications.FieldId = ".$Field->Id().
" AND ( ";
336 $CloseQuery[$QueryIndex] = TRUE;
337 $ComparisonCount[$QueryIndex] = 1;
338 $ComparisonCountField[$QueryIndex] =
"ClassificationName";
342 $Queries[$QueryIndex] .=
" OR ";
343 $ComparisonCount[$QueryIndex]++;
345 $Queries[$QueryIndex] .=
" ClassificationName ".$Operator.
" '".addslashes($Value).
"'";
349 # if value appears to have time component or text description
350 if (strpos($Value,
":")
351 || strstr($Value,
"day")
352 || strstr($Value,
"week")
353 || strstr($Value,
"month")
354 || strstr($Value,
"year")
355 || strstr($Value,
"hour")
356 || strstr($Value,
"minute"))
358 if (isset($Queries[
"Resources"]))
360 $Queries[
"Resources"] .= $CombineWord;
364 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId"
365 .
" FROM Resources WHERE ";
368 # flip operator if necessary
369 if (strstr($Value,
"ago"))
371 $OperatorFlipMap = array(
377 $Operator = isset($OperatorFlipMap[$Operator])
378 ? $OperatorFlipMap[$Operator] : $Operator;
381 # use strtotime method to build condition
382 $TimestampValue = strtotime($Value);
383 if (($TimestampValue !== FALSE) && ($TimestampValue != -1))
385 if ((date(
"H:i:s", $TimestampValue) ==
"00:00:00")
386 && (strpos($Value,
"00:00") === FALSE)
387 && ($Operator ==
"<="))
390 date(
"Y-m-d", $TimestampValue).
" 23:59:59";
394 $NormalizedValue = date(
"Y-m-d H:i:s", $TimestampValue);
399 $NormalizedValue = addslashes($Value);
401 $Queries[
"Resources"] .=
402 " ( `".$Field->DBFieldName().
"` "
404 .
" '".$NormalizedValue.
"' ) ";
408 # use Date object method to build condition
409 $Date =
new Date($Value);
410 if ($Date->Precision())
412 if (isset($Queries[
"Resources"]))
414 $Queries[
"Resources"] .= $CombineWord;
418 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId"
419 .
" FROM Resources WHERE ";
421 $Queries[
"Resources"] .=
" ( ".$Date->SqlCondition(
422 $Field->DBFieldName(), NULL, $Operator).
" ) ";
428 $Date =
new Date($Value);
429 if ($Date->Precision())
431 if (isset($Queries[
"Resources"]))
433 $Queries[
"Resources"] .= $CombineWord;
437 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId"
438 .
" FROM Resources WHERE ";
440 $Queries[
"Resources"] .=
" ( ".$Date->SqlCondition(
441 $Field->DBFieldName().
"Begin",
442 $Field->DBFieldName().
"End", $Operator).
" ) ";
447 $QueryIndex =
"ReferenceInts".$Field->Id();
448 if (!isset($Queries[$QueryIndex]))
450 if (!isset($NameField))
453 $this->Schema->GetFieldByMappedName(
456 $Queries[$QueryIndex] =
457 "SELECT DISTINCT RI.SrcResourceId AS ResourceId"
458 .
" FROM ReferenceInts AS RI, Resources AS R "
459 .
" WHERE RI.FieldId = ".$Field->Id()
461 $CloseQuery[$QueryIndex] = TRUE;
465 $Queries[$QueryIndex] .= $CombineWord;
467 $Queries[$QueryIndex] .=
"(R.`".$NameField->DBFieldName().
"` "
468 .$Operator.
" '".addslashes($Value).
"'"
469 .
" AND R.ResourceId = RI.DstResourceId)";
474 # (these types not yet handled by search engine for comparisons)
483 # for each assembled query
484 foreach ($Queries as $QueryIndex => $Query)
486 # if query has multiple parts
487 if (is_array($Query))
489 # for each part of query
490 $ResourceIds = array();
491 foreach ($Query as $PartIndex => $PartQuery)
493 # add closing paren if query was flagged to be closed
494 if (isset($CloseQuery[$QueryIndex][$PartIndex]))
499 $PartQuery .=
"GROUP BY ResourceId HAVING"
501 .$ComparisonCountField[$QueryIndex][$PartIndex]
503 .$ComparisonCount[$QueryIndex][$PartIndex];
507 # perform query and retrieve IDs
508 $this->
DMsg(5,
"Performing comparison query (<i>"
509 .$PartQuery.
"</i>)");
510 $this->DB->Query($PartQuery);
511 $ResourceIds = $ResourceIds
512 + $this->DB->FetchColumn(
"ResourceId");
513 $this->
DMsg(5,
"Comparison query produced <i>"
514 .count($ResourceIds).
"</i> results");
519 # add closing paren if query was flagged to be closed
520 if (isset($CloseQuery[$QueryIndex]))
525 $Query .=
"GROUP BY ResourceId HAVING"
527 .$ComparisonCountField[$QueryIndex]
529 .$ComparisonCount[$QueryIndex];
533 # perform query and retrieve IDs
534 $this->
DMsg(5,
"Performing comparison query (<i>".$Query.
"</i>)");
535 $this->DB->Query($Query);
536 $ResourceIds = $this->DB->FetchColumn(
"ResourceId");
537 $this->
DMsg(5,
"Comparison query produced <i>"
538 .count($ResourceIds).
"</i> results");
541 # if we already have some results
544 # if search logic is set to AND
547 # remove anything from results that was not returned from query
548 $Results = array_intersect($Results, $ResourceIds);
552 # add values returned from query to results
553 $Results = array_unique(array_merge($Results, $ResourceIds));
558 # set results to values returned from query
559 $Results = $ResourceIds;
565 # initialize results to empty list
569 # return results to caller
576 return $RFactory->GetResourceIdsSortedBy($FieldName, !$SortDescending);
583 $TaskDescription =
"Update search data for"
584 .
" <a href=\"r".$ItemId.
"\"><i>"
585 .$Item->GetMapped(
"Title").
"</i></a>";
586 $GLOBALS[
"AF"]->QueueUniqueTask(array(__CLASS__,
"RunUpdateForItem"),
587 array(intval($ItemId)), $TaskPriority, $TaskDescription);
592 # check that resource still exists
594 if (!$RFactory->ItemExists($ItemId)) {
return; }
596 # update search data for resource
598 $SearchEngine->UpdateForItem($ItemId);
600 # And updated the cached Controlled Name and Classification mappings
601 # which are used to suggest search facets:
604 # Get all the classifications that are associated with this resource:
605 $DB->Query(
"SELECT DISTINCT ClassificationId FROM ResourceClassInts "
606 .
"WHERE ResourceId=".intval($ItemId));
607 $MyClasses = array();
608 foreach (
$DB->FetchRows() as $Row)
609 $MyClasses []= $Row[
"ClassificationId"];
611 # Update the class map cache for this resource:
612 $DB->Query(
"DELETE FROM ClassificationFacetCache WHERE ResourceId=".intval($ItemId));
614 if (count($MyClasses)>0)
615 $DB->Query(
"INSERT INTO ClassificationFacetCache (ResourceId, Classes) VALUES ".
616 "(".intval($ItemId).
",'".implode(
",",$MyClasses).
"')");
618 # Get all of the names that are associated with this resource:
619 $DB->Query(
"SELECT DISTINCT ControlledNameId FROM ResourceNameInts "
620 .
"WHERE ResourceId=".intval($ItemId));
623 foreach (
$DB->FetchRows() as $Row)
624 $MyNames []= $Row[
"ControlledNameId"];
626 # Update the name map cache for this resource:
627 $DB->Query(
"DELETE FROM ControlledNameFacetCache WHERE ResourceId=".intval($ItemId));
629 if (count($MyNames)>0)
630 $DB->Query(
"INSERT INTO ControlledNameFacetCache (ResourceId, Names) VALUES ".
631 "(".intval($ItemId).
",'".implode(
",",$MyNames).
"')");
642 # Classifications and names associated with these search results:
643 $SearchClasses = array();
644 $SearchNames = array();
646 # Disable DB cache for the search suggestions process,
647 # This avoids memory exhaustion.
651 if (count($SearchResults)>0)
653 # Pull out all the Classifications that were associated with our search results:
654 $DB->Query(
"SELECT * FROM ClassificationFacetCache "
655 .
"WHERE ResourceId IN "
656 .
"(".implode(
",",array_keys($SearchResults)).
")");
657 while ($Row=
$DB->FetchRow())
659 foreach (explode(
',',$Row[
"Classes"]) as $ClassId)
661 $SearchClasses[$ClassId] []= $Row[
"ResourceId"];
665 # Similarly with controlled names
666 $DB->Query(
"SELECT * from ControlledNameFacetCache "
667 .
"WHERE ResourceId in "
668 .
"(".implode(
",",array_keys($SearchResults)).
")");
669 while ($Row=
$DB->FetchRow())
671 foreach (explode(
',',$Row[
"Names"]) as $NameId)
673 $SearchNames[$NameId] []= $Row[
"ResourceId"];
678 # Generate a map of FieldId -> Field Names for all of the generated facets:
679 $SuggestionsById = array();
681 # Pull relevant Classification names out of the DB
682 if ( count($SearchClasses) > 0)
684 $DB->Query(
"SELECT FieldId,ClassificationId,ClassificationName FROM Classifications "
685 .
"WHERE ClassificationId IN (".implode(
",",array_keys($SearchClasses)).
")");
686 while ($Row =
$DB->FetchRow())
687 $SuggestionsById[$Row[
"FieldId"]] []=
688 array(
"Id" => $Row[
"ClassificationId"],
689 "Name" => $Row[
"ClassificationName"],
690 "Count" => count( $SearchClasses[ $Row[
"ClassificationId"] ] ) );
693 if (count($SearchNames)>0)
695 # Pull relevant ControlledNames out of the DB
696 $DB->Query(
"SELECT FieldId,ControlledNameId,ControlledName FROM ControlledNames "
697 .
"WHERE ControlledNameId IN (".implode(
",",array_keys($SearchNames)).
")");
698 while ($Row =
$DB->FetchRow())
699 $SuggestionsById[$Row[
"FieldId"]] []=
700 array(
"Id" => $Row[
"ControlledNameId"],
701 "Name" => $Row[
"ControlledName"],
702 "Count" => count( $SearchNames[ $Row[
"ControlledNameId"] ] ) );
705 # Now, translate the suggestions that we have in terms of the
706 # FieldIds to suggestions in terms of the Field Names.
707 $SuggestionsByFieldName = array();
709 # If we have *any* suggestions to offer:
710 if (count($SuggestionsById)>0)
712 # We'll need G_User for permissions checks:
716 # Fill in an array that maps FieldNames to search links
717 # which would be appropriate for that field
718 foreach ($SuggestionsById as $FieldId => $FieldValues)
720 $ThisField = $Schema->GetField($FieldId);
722 # Bail on fields taht didn't exist (possibly as a result of stale cache data)
723 # and on fields that the current user cannot view, and on fields that are disabled
724 # for advanced searching:
725 if (is_object($ThisField) &&
727 $ThisField->IncludeInFacetedSearch() &&
728 $ThisField->Enabled() &&
729 ($ThisField->ViewingPrivilege() == FALSE ||
730 $G_User->HasPriv($ThisField->ViewingPrivilege()) ) )
732 $SuggestionsByFieldName[ $ThisField->Name() ] = array();
734 foreach ($FieldValues as $Value)
735 $SuggestionsByFieldName [ $ThisField->Name() ] [$Value[
"Id"]] =
736 array(
"Name" => $Value[
"Name"],
"Count" => $Value[
"Count"] );
741 ksort($SuggestionsByFieldName);
743 return $SuggestionsByFieldName;
748 # functions for backward compatability w/ old SPT code
SearchFieldForPhrases($FieldName, $Phrase)
const PRIORITY_LOW
Lower priority.
SQL database abstraction object with smart query caching.
UpdateForResource($ItemId)
AddField($FieldName, $DBFieldName, $FieldType, $Weight, $UsedInKeywordSearch)
GetFieldContent($ItemId, $FieldName)
SearchEngine(&$DB, $ItemTableName, $ItemIdFieldName, $ReferenceTableName, $ReferenceSourceIdFieldName, $ReferenceDestinationIdFieldName)
static RunUpdateForItem($ItemId)
static GetResultFacets($SearchResults)
Generate a list of suggested additional search terms that can be used for faceted searching...
SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values)
Represents a "resource" in CWIS.
static GetItemIdsSortedByField($FieldName, $SortDescending)
DefaultSearchLogic($NewSetting=NULL)
Factory for Resource objects.
CWIS-specific user class.
static QueueUpdateForItem($ItemId, $TaskPriority=ApplicationFramework::PRIORITY_LOW)