00001 <?PHP
00002 #
00003 # FILE: SPTSearchEngine.php
00004 #
00005 # Part of the Collection Workflow Integration System (CWIS)
00006 # Copyright 2011 Edward Almasy and Internet Scout Project
00007 # http://scout.wisc.edu/
00008 #
00009
00010 class SPTSearchEngine extends SearchEngine {
00011
00012 function SPTSearchEngine()
00013 {
00014 # create a database handle
00015 $DB = new Database();
00016
00017 # pass database handle and config values to real search engine object
00018 $this->SearchEngine($DB, "Resources", "ResourceId");
00019
00020 # for each field defined in schema
00021 $this->Schema = new MetadataSchema();
00022 $Fields = $this->Schema->GetFields();
00023 foreach ($Fields as $Field)
00024 {
00025 # determine field type for searching
00026 switch ($Field->Type())
00027 {
00028 case MetadataSchema::MDFTYPE_TEXT:
00029 case MetadataSchema::MDFTYPE_PARAGRAPH:
00030 case MetadataSchema::MDFTYPE_USER:
00031 case MetadataSchema::MDFTYPE_TREE:
00032 case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00033 case MetadataSchema::MDFTYPE_OPTION:
00034 case MetadataSchema::MDFTYPE_IMAGE:
00035 case MetadataSchema::MDFTYPE_FILE:
00036 case MetadataSchema::MDFTYPE_URL:
00037 $FieldType = self::FIELDTYPE_TEXT;
00038 break;
00039
00040 case MetadataSchema::MDFTYPE_NUMBER:
00041 case MetadataSchema::MDFTYPE_FLAG:
00042 $FieldType = self::FIELDTYPE_NUMERIC;
00043 break;
00044
00045 case MetadataSchema::MDFTYPE_DATE:
00046 $FieldType = self::FIELDTYPE_DATERANGE;
00047 break;
00048
00049 case MetadataSchema::MDFTYPE_TIMESTAMP:
00050 $FieldType = self::FIELDTYPE_DATE;
00051 break;
00052
00053 case MetadataSchema::MDFTYPE_POINT:
00054 $FieldType = NULL;
00055 break;
00056
00057 default:
00058 exit("ERROR: unknown field type "
00059 .$Field->Type()." in SPTSearchEngine.php");
00060 break;
00061 }
00062
00063 if ($FieldType !== NULL)
00064 {
00065 # add field to search engine
00066 $this->AddField($Field->Name(), $Field->DBFieldName(), $FieldType,
00067 $Field->SearchWeight(), $Field->IncludeInKeywordSearch());
00068 }
00069 }
00070 }
00071
00072 # overloaded version of method to retrieve text from DB
00073 function GetFieldContent($ItemId, $FieldName)
00074 {
00075 # get resource object
00076 $Resource = new Resource($ItemId);
00077
00078 # retrieve text (including variants) from resource object and return to caller
00079 return $Resource->Get($FieldName, FALSE, TRUE);
00080 }
00081
00082 # overloaded version of method to retrieve resource/phrase match list
00083 function SearchFieldForPhrases($FieldName, $Phrase)
00084 {
00085 # normalize and escape search phrase for use in SQL query
00086 $SearchPhrase = strtolower(addslashes($Phrase));
00087
00088 # query DB for matching list based on field type
00089 $Field = $this->Schema->GetFieldByName($FieldName);
00090 switch ($Field->Type())
00091 {
00092 case MetadataSchema::MDFTYPE_TEXT:
00093 case MetadataSchema::MDFTYPE_PARAGRAPH:
00094 case MetadataSchema::MDFTYPE_FILE:
00095 case MetadataSchema::MDFTYPE_URL:
00096 $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00097 ."WHERE POSITION('".$SearchPhrase."'"
00098 ." IN LOWER(`".$Field->DBFieldName()."`)) ";
00099 break;
00100
00101 case MetadataSchema::MDFTYPE_IMAGE:
00102 $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00103 ."WHERE POSITION('".$SearchPhrase."'"
00104 ." IN LOWER(`".$Field->DBFieldName()."AltText`)) ";
00105 break;
00106
00107 case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00108 $NameTableSize = $this->DB->Query("SELECT COUNT(*) AS NameCount"
00109 ." FROM ControlledNames", "NameCount");
00110 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00111 ."FROM ResourceNameInts, ControlledNames "
00112 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) "
00113 ."AND ControlledNames.ControlledNameId"
00114 ." = ResourceNameInts.ControlledNameId "
00115 ."AND ControlledNames.FieldId = ".$Field->Id();
00116 $SecondQueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00117 ."FROM ResourceNameInts, ControlledNames, VariantNames "
00118 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(VariantName)) "
00119 ."AND VariantNames.ControlledNameId"
00120 ." = ResourceNameInts.ControlledNameId "
00121 ."AND ControlledNames.ControlledNameId"
00122 ." = ResourceNameInts.ControlledNameId "
00123 ."AND ControlledNames.FieldId = ".$Field->Id();
00124 break;
00125
00126 case MetadataSchema::MDFTYPE_OPTION:
00127 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00128 ."FROM ResourceNameInts, ControlledNames "
00129 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) "
00130 ."AND ControlledNames.ControlledNameId = ResourceNameInts.ControlledNameId "
00131 ."AND ControlledNames.FieldId = ".$Field->Id();
00132 break;
00133
00134 case MetadataSchema::MDFTYPE_TREE:
00135 $QueryString = "SELECT DISTINCT ResourceClassInts.ResourceId "
00136 ."FROM ResourceClassInts, Classifications "
00137 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ClassificationName)) "
00138 ."AND Classifications.ClassificationId = ResourceClassInts.ClassificationId "
00139 ."AND Classifications.FieldId = ".$Field->Id();
00140 break;
00141
00142 case MetadataSchema::MDFTYPE_USER:
00143 $UserId = $this->DB->Query("SELECT UserId FROM APUsers "
00144 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(UserName)) "
00145 ."OR POSITION('".$SearchPhrase."' IN LOWER(RealName))", "UserId");
00146 if ($UserId != NULL)
00147 {
00148 $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00149 ."WHERE `".$Field->DBFieldName()."` = ".$UserId;
00150 }
00151 break;
00152
00153 case MetadataSchema::MDFTYPE_NUMBER:
00154 if ($SearchPhrase > 0)
00155 {
00156 $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00157 ."WHERE `".$Field->DBFieldName()."` = ".(int)$SearchPhrase;
00158 }
00159 break;
00160
00161 case MetadataSchema::MDFTYPE_FLAG:
00162 case MetadataSchema::MDFTYPE_DATE:
00163 case MetadataSchema::MDFTYPE_TIMESTAMP:
00164 # (these types not yet handled by search engine for phrases)
00165 break;
00166 }
00167
00168 # build match list based on results returned from DB
00169 if (isset($QueryString))
00170 {
00171 $this->DMsg(7, "Performing phrase search query (<i>".$QueryString."</i>)");
00172 if ($this->DebugLevel > 9) { $StartTime = microtime(TRUE); }
00173 $this->DB->Query($QueryString);
00174 if ($this->DebugLevel > 9)
00175 {
00176 $EndTime = microtime(TRUE);
00177 if (($StartTime - $EndTime) > 0.1)
00178 {
00179 printf("SE: Query took %.2f seconds<br>\n",
00180 ($EndTime - $StartTime));
00181 }
00182 }
00183 $MatchList = $this->DB->FetchColumn("ResourceId");
00184 if (isset($SecondQueryString))
00185 {
00186 $this->DMsg(7, "Performing second phrase search query"
00187 ." (<i>".$SecondQueryString."</i>)");
00188 if ($this->DebugLevel > 9) { $StartTime = microtime(TRUE); }
00189 $this->DB->Query($SecondQueryString);
00190 if ($this->DebugLevel > 9)
00191 {
00192 $EndTime = microtime(TRUE);
00193 if (($StartTime - $EndTime) > 0.1)
00194 {
00195 printf("SE: query took %.2f seconds<br>\n",
00196 ($EndTime - $StartTime));
00197 }
00198 }
00199 $MatchList = $MatchList + $this->DB->FetchColumn("ResourceId");
00200 }
00201 }
00202 else
00203 {
00204 $MatchList = array();
00205 }
00206
00207 # return list of matching resources to caller
00208 return $MatchList;
00209 }
00210
00211 # search field for records that meet comparison
00212 function SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values)
00213 {
00214 # use SQL keyword appropriate to current search logic for combining operations
00215 $CombineWord = ($this->DefaultSearchLogic == self::LOGIC_AND) ? " AND " : " OR ";
00216
00217 # for each comparison
00218 foreach ($FieldNames as $Index => $FieldName)
00219 {
00220 $Operator = $Operators[$Index];
00221 $Value = $Values[$Index];
00222
00223 # determine query based on field type
00224 $Field = $this->Schema->GetFieldByName($FieldName);
00225 if ($Field != NULL)
00226 {
00227 switch ($Field->Type())
00228 {
00229 case MetadataSchema::MDFTYPE_TEXT:
00230 case MetadataSchema::MDFTYPE_PARAGRAPH:
00231 case MetadataSchema::MDFTYPE_NUMBER:
00232 case MetadataSchema::MDFTYPE_FLAG:
00233 case MetadataSchema::MDFTYPE_USER:
00234 case MetadataSchema::MDFTYPE_URL:
00235 if (isset($Queries["Resources"]))
00236 {
00237 $Queries["Resources"] .= $CombineWord;
00238 }
00239 else
00240 {
00241 $Queries["Resources"] = "SELECT DISTINCT ResourceId FROM Resources WHERE ";
00242 }
00243 if ($Field->Type() == MetadataSchema::MDFTYPE_USER)
00244 {
00245 $User = new SPTUser($Value);
00246 $Value = $User->Id();
00247 }
00248 $Queries["Resources"] .= "`".$Field->DBFieldName()."` ".$Operator." '".addslashes($Value)."' ";
00249 break;
00250
00251 case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00252 $QueryIndex = "ResourceNameInts".$Field->Id();
00253 if (!isset($Queries[$QueryIndex]["A"]))
00254 {
00255 $Queries[$QueryIndex]["A"] =
00256 "SELECT DISTINCT ResourceId"
00257 ." FROM ResourceNameInts, ControlledNames "
00258 ." WHERE ControlledNames.FieldId = ".$Field->Id()
00259 ." AND ( ";
00260 $CloseQuery[$QueryIndex]["A"] = TRUE;
00261 }
00262 else
00263 {
00264 $Queries[$QueryIndex]["A"] .= $CombineWord;
00265 }
00266 $Queries[$QueryIndex]["A"] .=
00267 "((ResourceNameInts.ControlledNameId"
00268 ." = ControlledNames.ControlledNameId"
00269 ." AND ControlledName "
00270 .$Operator." '".addslashes($Value)."'))";
00271 if (!isset($Queries[$QueryIndex]["B"]))
00272 {
00273 $Queries[$QueryIndex]["B"] =
00274 "SELECT DISTINCT ResourceId"
00275 . " FROM ResourceNameInts, ControlledNames,"
00276 ." VariantNames "
00277 ." WHERE ControlledNames.FieldId = ".$Field->Id()
00278 ." AND ( ";
00279 $CloseQuery[$QueryIndex]["B"] = TRUE;
00280 }
00281 else
00282 {
00283 $Queries[$QueryIndex]["B"] .= $CombineWord;
00284 }
00285 $Queries[$QueryIndex]["B"] .=
00286 "((ResourceNameInts.ControlledNameId"
00287 ." = ControlledNames.ControlledNameId"
00288 ." AND ResourceNameInts.ControlledNameId"
00289 ." = VariantNames.ControlledNameId"
00290 ." AND VariantName "
00291 .$Operator." '".addslashes($Value)."'))";
00292 break;
00293
00294 case MetadataSchema::MDFTYPE_OPTION:
00295 $QueryIndex = "ResourceNameInts".$Field->Id();
00296 if (!isset($Queries[$QueryIndex]))
00297 {
00298 $Queries[$QueryIndex] =
00299 "SELECT DISTINCT ResourceId FROM ResourceNameInts, ControlledNames "
00300 ." WHERE ControlledNames.FieldId = ".$Field->Id()
00301 ." AND ( ";
00302 $CloseQuery[$QueryIndex] = TRUE;
00303 }
00304 else
00305 {
00306 $Queries[$QueryIndex] .= $CombineWord;
00307 }
00308 $Queries[$QueryIndex] .= "(ResourceNameInts.ControlledNameId = ControlledNames.ControlledNameId"
00309 ." AND ControlledName ".$Operator." '".addslashes($Value)."')";
00310 break;
00311
00312 case MetadataSchema::MDFTYPE_TREE:
00313 $QueryIndex = "ResourceClassInts".$Field->Id();
00314 if (!isset($Queries[$QueryIndex]))
00315 {
00316 $Queries[$QueryIndex] = "SELECT DISTINCT ResourceId FROM ResourceClassInts, Classifications "
00317 ." WHERE ResourceClassInts.ClassificationId = Classifications.ClassificationId"
00318 ." AND Classifications.FieldId = ".$Field->Id()." AND ( ";
00319 $CloseQuery[$QueryIndex] = TRUE;
00320 }
00321 else
00322 {
00323 $Queries[$QueryIndex] .= $CombineWord;
00324 }
00325 $Queries[$QueryIndex] .= " ClassificationName ".$Operator." '".addslashes($Value)."'";
00326 break;
00327
00328 case MetadataSchema::MDFTYPE_TIMESTAMP:
00329 # if value appears to have time component or text description
00330 if (strpos($Value, ":")
00331 || strstr($Value, "day")
00332 || strstr($Value, "week")
00333 || strstr($Value, "month")
00334 || strstr($Value, "year")
00335 || strstr($Value, "hour")
00336 || strstr($Value, "minute"))
00337 {
00338 if (isset($Queries["Resources"]))
00339 {
00340 $Queries["Resources"] .= $CombineWord;
00341 }
00342 else
00343 {
00344 $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00345 ." FROM Resources WHERE ";
00346 }
00347
00348 # flip operator if necessary
00349 if (strstr($Value, "ago"))
00350 {
00351 $OperatorFlipMap = array(
00352 "<" => ">=",
00353 ">" => "<=",
00354 "<=" => ">",
00355 ">=" => "<",
00356 );
00357 $Operator = isset($OperatorFlipMap[$Operator])
00358 ? $OperatorFlipMap[$Operator] : $Operator;
00359 }
00360
00361 # use strtotime method to build condition
00362 $TimestampValue = strtotime($Value);
00363 if (($TimestampValue !== FALSE) && ($TimestampValue != -1))
00364 {
00365 if ((date("H:i:s", $TimestampValue) == "00:00:00")
00366 && (strpos($Value, "00:00") === FALSE)
00367 && ($Operator == "<="))
00368 {
00369 $NormalizedValue =
00370 date("Y-m-d", $TimestampValue)." 23:59:59";
00371 }
00372 else
00373 {
00374 $NormalizedValue = date("Y-m-d H:i:s", $TimestampValue);
00375 }
00376 }
00377 else
00378 {
00379 $NormalizedValue = addslashes($Value);
00380 }
00381 $Queries["Resources"] .=
00382 " ( `".$Field->DBFieldName()."` "
00383 .$Operator
00384 ." '".$NormalizedValue."' ) ";
00385 }
00386 else
00387 {
00388 # use Date object method to build condition
00389 $Date = new Date($Value);
00390 if ($Date->Precision())
00391 {
00392 if (isset($Queries["Resources"]))
00393 {
00394 $Queries["Resources"] .= $CombineWord;
00395 }
00396 else
00397 {
00398 $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00399 ." FROM Resources WHERE ";
00400 }
00401 $Queries["Resources"] .= " ( ".$Date->SqlCondition(
00402 $Field->DBFieldName(), NULL, $Operator)." ) ";
00403 }
00404 }
00405 break;
00406
00407 case MetadataSchema::MDFTYPE_DATE:
00408 $Date = new Date($Value);
00409 if ($Date->Precision())
00410 {
00411 if (isset($Queries["Resources"]))
00412 {
00413 $Queries["Resources"] .= $CombineWord;
00414 }
00415 else
00416 {
00417 $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00418 ." FROM Resources WHERE ";
00419 }
00420 $Queries["Resources"] .= " ( ".$Date->SqlCondition(
00421 $Field->DBFieldName()."Begin",
00422 $Field->DBFieldName()."End", $Operator)." ) ";
00423 }
00424 break;
00425
00426 case MetadataSchema::MDFTYPE_IMAGE:
00427 case MetadataSchema::MDFTYPE_FILE:
00428 # (these types not yet handled by search engine for comparisons)
00429 break;
00430 }
00431 }
00432 }
00433
00434 # if queries found
00435 if (isset($Queries))
00436 {
00437 # for each assembled query
00438 foreach ($Queries as $QueryIndex => $Query)
00439 {
00440 # if query has multiple parts
00441 if (is_array($Query))
00442 {
00443 # for each part of query
00444 $ResourceIds = array();
00445 foreach ($Query as $PartIndex => $PartQuery)
00446 {
00447 # add closing paren if query was flagged to be closed
00448 if (isset($CloseQuery[$QueryIndex][$PartIndex]))
00449 { $PartQuery .= " ) "; }
00450
00451 # perform query and retrieve IDs
00452 $this->DMsg(5, "Performing comparison query (<i>"
00453 .$PartQuery."</i>)");
00454 $this->DB->Query($PartQuery);
00455 $ResourceIds = $ResourceIds
00456 + $this->DB->FetchColumn("ResourceId");
00457 $this->DMsg(5, "Comparison query produced <i>"
00458 .count($ResourceIds)."</i> results");
00459 }
00460 }
00461 else
00462 {
00463 # add closing paren if query was flagged to be closed
00464 if (isset($CloseQuery[$QueryIndex])) { $Query .= " ) "; }
00465
00466 # perform query and retrieve IDs
00467 $this->DMsg(5, "Performing comparison query (<i>".$Query."</i>)");
00468 $this->DB->Query($Query);
00469 $ResourceIds = $this->DB->FetchColumn("ResourceId");
00470 $this->DMsg(5, "Comparison query produced <i>"
00471 .count($ResourceIds)."</i> results");
00472 }
00473
00474 # if we already have some results
00475 if (isset($Results))
00476 {
00477 # if search logic is set to AND
00478 if ($this->DefaultSearchLogic == self::LOGIC_AND)
00479 {
00480 # remove anything from results that was not returned from query
00481 $Results = array_intersect($Results, $ResourceIds);
00482 }
00483 else
00484 {
00485 # add values returned from query to results
00486 $Results = array_unique(array_merge($Results, $ResourceIds));
00487 }
00488 }
00489 else
00490 {
00491 # set results to values returned from query
00492 $Results = $ResourceIds;
00493 }
00494 }
00495 }
00496 else
00497 {
00498 # initialize results to empty list
00499 $Results = array();
00500 }
00501
00502 # return results to caller
00503 return $Results;
00504 }
00505
00506 static function GetItemIdsSortedByField($FieldName, $SortDescending)
00507 {
00508 $RFactory = new ResourceFactory();
00509 return $RFactory->GetResourceIdsSortedBy($FieldName, !$SortDescending);
00510 }
00511
00512 static function QueueUpdateForItem($ItemId,
00513 $Priority = ApplicationFramework::PRIORITY_LOW)
00514 {
00515 global $AF;
00516 $AF->QueueUniqueTask(array(__CLASS__, "RunUpdateForItem"),
00517 array(intval($ItemId)), $Priority);
00518 }
00519
00520 static function RunUpdateForItem($ItemId)
00521 {
00522 # check that resource still exists
00523 $RFactory = new ResourceFactory();
00524 if (!$RFactory->ItemExists($ItemId)) { return; }
00525
00526 # update search data for resource
00527 $SearchEngine = new SPTSearchEngine();
00528 $SearchEngine->UpdateForItem($ItemId);
00529 }
00530
00531 private $Schema;
00532
00533 # functions for backward compatability w/ old SPT code
00534 function UpdateForResource($ItemId) { $this->UpdateForItem($ItemId); }
00535 }