00001 <?PHP
00002
00003 #
00004 # FILE: SPTSearchEngine.php
00005 #
00006 # Part of the Collection Workflow Integration System (CWIS)
00007 # Copyright 2002-2011 Edward Almasy and Internet Scout
00008 # http://scout.wisc.edu
00009 #
00010
00011 class SPTSearchEngine extends SearchEngine {
00012
00013 function SPTSearchEngine()
00014 {
00015 # create a database handle
00016 $DB = new Database();
00017
00018 # pass database handle and config values to real search engine object
00019 $this->SearchEngine($DB, "Resources", "ResourceId");
00020
00021 # for each field defined in schema
00022 $this->Schema = new MetadataSchema();
00023 $Fields = $this->Schema->GetFields();
00024 foreach ($Fields as $Field)
00025 {
00026 # determine field type for searching
00027 switch ($Field->Type())
00028 {
00029 case MetadataSchema::MDFTYPE_TEXT:
00030 case MetadataSchema::MDFTYPE_PARAGRAPH:
00031 case MetadataSchema::MDFTYPE_USER:
00032 case MetadataSchema::MDFTYPE_TREE:
00033 case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00034 case MetadataSchema::MDFTYPE_OPTION:
00035 case MetadataSchema::MDFTYPE_IMAGE:
00036 case MetadataSchema::MDFTYPE_FILE:
00037 case MetadataSchema::MDFTYPE_URL:
00038 $FieldType = self::FIELDTYPE_TEXT;
00039 break;
00040
00041 case MetadataSchema::MDFTYPE_NUMBER:
00042 case MetadataSchema::MDFTYPE_FLAG:
00043 $FieldType = self::FIELDTYPE_NUMERIC;
00044 break;
00045
00046 case MetadataSchema::MDFTYPE_DATE:
00047 $FieldType = self::FIELDTYPE_DATERANGE;
00048 break;
00049
00050 case MetadataSchema::MDFTYPE_TIMESTAMP:
00051 $FieldType = self::FIELDTYPE_DATE;
00052 break;
00053
00054 case MetadataSchema::MDFTYPE_POINT:
00055 $FieldType = NULL;
00056 break;
00057
00058 default:
00059 exit("ERROR: unknown field type "
00060 .$Field->Type()." in SPTSearchEngine.php");
00061 break;
00062 }
00063
00064 if ($FieldType !== NULL)
00065 {
00066 # add field to search engine
00067 $this->AddField($Field->Name(), $Field->DBFieldName(), $FieldType,
00068 $Field->SearchWeight(), $Field->IncludeInKeywordSearch());
00069 }
00070 }
00071 }
00072
00073 # overloaded version of method to retrieve text from DB
00074 function GetFieldContent($ItemId, $FieldName)
00075 {
00076 # get resource object
00077 $Resource = new Resource($ItemId);
00078
00079 # retrieve text (including variants) from resource object and return to caller
00080 return $Resource->Get($FieldName, FALSE, TRUE);
00081 }
00082
00083 # overloaded version of method to retrieve resource/phrase match list
00084 function SearchFieldForPhrases($FieldName, $Phrase)
00085 {
00086 # normalize and escape search phrase for use in SQL query
00087 $SearchPhrase = strtolower(addslashes($Phrase));
00088
00089 # query DB for matching list based on field type
00090 $Field = $this->Schema->GetFieldByName($FieldName);
00091 switch ($Field->Type())
00092 {
00093 case MetadataSchema::MDFTYPE_TEXT:
00094 case MetadataSchema::MDFTYPE_PARAGRAPH:
00095 case MetadataSchema::MDFTYPE_FILE:
00096 case MetadataSchema::MDFTYPE_URL:
00097 $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00098 ."WHERE POSITION('".$SearchPhrase."'"
00099 ." IN LOWER(`".$Field->DBFieldName()."`)) ";
00100 break;
00101
00102 case MetadataSchema::MDFTYPE_IMAGE:
00103 $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00104 ."WHERE POSITION('".$SearchPhrase."'"
00105 ." IN LOWER(`".$Field->DBFieldName()."AltText`)) ";
00106 break;
00107
00108 case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00109 $NameTableSize = $this->DB->Query("SELECT COUNT(*) AS NameCount"
00110 ." FROM ControlledNames", "NameCount");
00111 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00112 ."FROM ResourceNameInts, ControlledNames "
00113 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) "
00114 ."AND ControlledNames.ControlledNameId"
00115 ." = ResourceNameInts.ControlledNameId "
00116 ."AND ControlledNames.FieldId = ".$Field->Id();
00117 $SecondQueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00118 ."FROM ResourceNameInts, ControlledNames, VariantNames "
00119 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(VariantName)) "
00120 ."AND VariantNames.ControlledNameId"
00121 ." = ResourceNameInts.ControlledNameId "
00122 ."AND ControlledNames.ControlledNameId"
00123 ." = ResourceNameInts.ControlledNameId "
00124 ."AND ControlledNames.FieldId = ".$Field->Id();
00125 break;
00126
00127 case MetadataSchema::MDFTYPE_OPTION:
00128 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00129 ."FROM ResourceNameInts, ControlledNames "
00130 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) "
00131 ."AND ControlledNames.ControlledNameId = ResourceNameInts.ControlledNameId "
00132 ."AND ControlledNames.FieldId = ".$Field->Id();
00133 break;
00134
00135 case MetadataSchema::MDFTYPE_TREE:
00136 $QueryString = "SELECT DISTINCT ResourceClassInts.ResourceId "
00137 ."FROM ResourceClassInts, Classifications "
00138 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ClassificationName)) "
00139 ."AND Classifications.ClassificationId = ResourceClassInts.ClassificationId "
00140 ."AND Classifications.FieldId = ".$Field->Id();
00141 break;
00142
00143 case MetadataSchema::MDFTYPE_USER:
00144 $UserId = $this->DB->Query("SELECT UserId FROM APUsers "
00145 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(UserName)) "
00146 ."OR POSITION('".$SearchPhrase."' IN LOWER(RealName))", "UserId");
00147 if ($UserId != NULL)
00148 {
00149 $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00150 ."WHERE `".$Field->DBFieldName()."` = ".$UserId;
00151 }
00152 break;
00153
00154 case MetadataSchema::MDFTYPE_NUMBER:
00155 if ($SearchPhrase > 0)
00156 {
00157 $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00158 ."WHERE `".$Field->DBFieldName()."` = ".(int)$SearchPhrase;
00159 }
00160 break;
00161
00162 case MetadataSchema::MDFTYPE_FLAG:
00163 case MetadataSchema::MDFTYPE_DATE:
00164 case MetadataSchema::MDFTYPE_TIMESTAMP:
00165 # (these types not yet handled by search engine for phrases)
00166 break;
00167 }
00168
00169 # build match list based on results returned from DB
00170 if (isset($QueryString))
00171 {
00172 $this->DMsg(7, "Performing phrase search query (<i>".$QueryString."</i>)");
00173 if ($this->DebugLevel > 9) { $StartTime = microtime(TRUE); }
00174 $this->DB->Query($QueryString);
00175 if ($this->DebugLevel > 9)
00176 {
00177 $EndTime = microtime(TRUE);
00178 if (($StartTime - $EndTime) > 0.1)
00179 {
00180 printf("SE: Query took %.2f seconds<br>\n",
00181 ($EndTime - $StartTime));
00182 }
00183 }
00184 $MatchList = $this->DB->FetchColumn("ResourceId");
00185 if (isset($SecondQueryString))
00186 {
00187 $this->DMsg(7, "Performing second phrase search query"
00188 ." (<i>".$SecondQueryString."</i>)");
00189 if ($this->DebugLevel > 9) { $StartTime = microtime(TRUE); }
00190 $this->DB->Query($SecondQueryString);
00191 if ($this->DebugLevel > 9)
00192 {
00193 $EndTime = microtime(TRUE);
00194 if (($StartTime - $EndTime) > 0.1)
00195 {
00196 printf("SE: query took %.2f seconds<br>\n",
00197 ($EndTime - $StartTime));
00198 }
00199 }
00200 $MatchList = $MatchList + $this->DB->FetchColumn("ResourceId");
00201 }
00202 }
00203 else
00204 {
00205 $MatchList = array();
00206 }
00207
00208 # return list of matching resources to caller
00209 return $MatchList;
00210 }
00211
00212 # search field for records that meet comparison
00213 function SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values)
00214 {
00215 # use SQL keyword appropriate to current search logic for combining operations
00216 $CombineWord = ($this->DefaultSearchLogic == self::LOGIC_AND) ? " AND " : " OR ";
00217
00218 # for each comparison
00219 foreach ($FieldNames as $Index => $FieldName)
00220 {
00221 $Operator = $Operators[$Index];
00222 $Value = $Values[$Index];
00223
00224 # determine query based on field type
00225 $Field = $this->Schema->GetFieldByName($FieldName);
00226 if ($Field != NULL)
00227 {
00228 switch ($Field->Type())
00229 {
00230 case MetadataSchema::MDFTYPE_TEXT:
00231 case MetadataSchema::MDFTYPE_PARAGRAPH:
00232 case MetadataSchema::MDFTYPE_NUMBER:
00233 case MetadataSchema::MDFTYPE_FLAG:
00234 case MetadataSchema::MDFTYPE_USER:
00235 case MetadataSchema::MDFTYPE_URL:
00236 if (isset($Queries["Resources"]))
00237 {
00238 $Queries["Resources"] .= $CombineWord;
00239 }
00240 else
00241 {
00242 $Queries["Resources"] = "SELECT DISTINCT ResourceId FROM Resources WHERE ";
00243 }
00244 if ($Field->Type() == MetadataSchema::MDFTYPE_USER)
00245 {
00246 $User = new SPTUser($Value);
00247 $Value = $User->Id();
00248 }
00249 $Queries["Resources"] .= "`".$Field->DBFieldName()."` ".$Operator." '".addslashes($Value)."' ";
00250 break;
00251
00252 case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00253 $QueryIndex = "ResourceNameInts".$Field->Id();
00254 if (!isset($Queries[$QueryIndex]["A"]))
00255 {
00256 $Queries[$QueryIndex]["A"] =
00257 "SELECT DISTINCT ResourceId"
00258 ." FROM ResourceNameInts, ControlledNames "
00259 ." WHERE ControlledNames.FieldId = ".$Field->Id()
00260 ." AND ( ";
00261 $CloseQuery[$QueryIndex]["A"] = TRUE;
00262 }
00263 else
00264 {
00265 $Queries[$QueryIndex]["A"] .= $CombineWord;
00266 }
00267 $Queries[$QueryIndex]["A"] .=
00268 "((ResourceNameInts.ControlledNameId"
00269 ." = ControlledNames.ControlledNameId"
00270 ." AND ControlledName "
00271 .$Operator." '".addslashes($Value)."'))";
00272 if (!isset($Queries[$QueryIndex]["B"]))
00273 {
00274 $Queries[$QueryIndex]["B"] =
00275 "SELECT DISTINCT ResourceId"
00276 . " FROM ResourceNameInts, ControlledNames,"
00277 ." VariantNames "
00278 ." WHERE ControlledNames.FieldId = ".$Field->Id()
00279 ." AND ( ";
00280 $CloseQuery[$QueryIndex]["B"] = TRUE;
00281 }
00282 else
00283 {
00284 $Queries[$QueryIndex]["B"] .= $CombineWord;
00285 }
00286 $Queries[$QueryIndex]["B"] .=
00287 "((ResourceNameInts.ControlledNameId"
00288 ." = ControlledNames.ControlledNameId"
00289 ." AND ResourceNameInts.ControlledNameId"
00290 ." = VariantNames.ControlledNameId"
00291 ." AND VariantName "
00292 .$Operator." '".addslashes($Value)."'))";
00293 break;
00294
00295 case MetadataSchema::MDFTYPE_OPTION:
00296 $QueryIndex = "ResourceNameInts".$Field->Id();
00297 if (!isset($Queries[$QueryIndex]))
00298 {
00299 $Queries[$QueryIndex] =
00300 "SELECT DISTINCT ResourceId FROM ResourceNameInts, ControlledNames "
00301 ." WHERE ControlledNames.FieldId = ".$Field->Id()
00302 ." AND ( ";
00303 $CloseQuery[$QueryIndex] = TRUE;
00304 }
00305 else
00306 {
00307 $Queries[$QueryIndex] .= $CombineWord;
00308 }
00309 $Queries[$QueryIndex] .= "(ResourceNameInts.ControlledNameId = ControlledNames.ControlledNameId"
00310 ." AND ControlledName ".$Operator." '".addslashes($Value)."')";
00311 break;
00312
00313 case MetadataSchema::MDFTYPE_TREE:
00314 $QueryIndex = "ResourceClassInts".$Field->Id();
00315 if (!isset($Queries[$QueryIndex]))
00316 {
00317 $Queries[$QueryIndex] = "SELECT DISTINCT ResourceId FROM ResourceClassInts, Classifications "
00318 ." WHERE ResourceClassInts.ClassificationId = Classifications.ClassificationId"
00319 ." AND Classifications.FieldId = ".$Field->Id()." AND ( ";
00320 $CloseQuery[$QueryIndex] = TRUE;
00321 }
00322 else
00323 {
00324 $Queries[$QueryIndex] .= $CombineWord;
00325 }
00326 $Queries[$QueryIndex] .= " ClassificationName ".$Operator." '".addslashes($Value)."'";
00327 break;
00328
00329 case MetadataSchema::MDFTYPE_TIMESTAMP:
00330 # if value appears to have time component or text description
00331 if (strpos($Value, ":")
00332 || strstr($Value, "day")
00333 || strstr($Value, "week")
00334 || strstr($Value, "month")
00335 || strstr($Value, "year")
00336 || strstr($Value, "hour")
00337 || strstr($Value, "minute"))
00338 {
00339 if (isset($Queries["Resources"]))
00340 {
00341 $Queries["Resources"] .= $CombineWord;
00342 }
00343 else
00344 {
00345 $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00346 ." FROM Resources WHERE ";
00347 }
00348
00349 # flip operator if necessary
00350 if (strstr($Value, "ago"))
00351 {
00352 $OperatorFlipMap = array(
00353 "<" => ">=",
00354 ">" => "<=",
00355 "<=" => ">",
00356 ">=" => "<",
00357 );
00358 $Operator = isset($OperatorFlipMap[$Operator])
00359 ? $OperatorFlipMap[$Operator] : $Operator;
00360 }
00361
00362 # use strtotime method to build condition
00363 $TimestampValue = strtotime($Value);
00364 if (($TimestampValue !== FALSE) && ($TimestampValue != -1))
00365 {
00366 if ((date("H:i:s", $TimestampValue) == "00:00:00")
00367 && (strpos($Value, "00:00") === FALSE)
00368 && ($Operator == "<="))
00369 {
00370 $NormalizedValue =
00371 date("Y-m-d", $TimestampValue)." 23:59:59";
00372 }
00373 else
00374 {
00375 $NormalizedValue = date("Y-m-d H:i:s", $TimestampValue);
00376 }
00377 }
00378 else
00379 {
00380 $NormalizedValue = addslashes($Value);
00381 }
00382 $Queries["Resources"] .=
00383 " ( `".$Field->DBFieldName()."` "
00384 .$Operator
00385 ." '".$NormalizedValue."' ) ";
00386 }
00387 else
00388 {
00389 # use Date object method to build condition
00390 $Date = new Date($Value);
00391 if ($Date->Precision())
00392 {
00393 if (isset($Queries["Resources"]))
00394 {
00395 $Queries["Resources"] .= $CombineWord;
00396 }
00397 else
00398 {
00399 $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00400 ." FROM Resources WHERE ";
00401 }
00402 $Queries["Resources"] .= " ( ".$Date->SqlCondition(
00403 $Field->DBFieldName(), NULL, $Operator)." ) ";
00404 }
00405 }
00406 break;
00407
00408 case MetadataSchema::MDFTYPE_DATE:
00409 $Date = new Date($Value);
00410 if ($Date->Precision())
00411 {
00412 if (isset($Queries["Resources"]))
00413 {
00414 $Queries["Resources"] .= $CombineWord;
00415 }
00416 else
00417 {
00418 $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00419 ." FROM Resources WHERE ";
00420 }
00421 $Queries["Resources"] .= " ( ".$Date->SqlCondition(
00422 $Field->DBFieldName()."Begin",
00423 $Field->DBFieldName()."End", $Operator)." ) ";
00424 }
00425 break;
00426
00427 case MetadataSchema::MDFTYPE_IMAGE:
00428 case MetadataSchema::MDFTYPE_FILE:
00429 # (these types not yet handled by search engine for comparisons)
00430 break;
00431 }
00432 }
00433 }
00434
00435 # if queries found
00436 if (isset($Queries))
00437 {
00438 # for each assembled query
00439 foreach ($Queries as $QueryIndex => $Query)
00440 {
00441 # if query has multiple parts
00442 if (is_array($Query))
00443 {
00444 # for each part of query
00445 $ResourceIds = array();
00446 foreach ($Query as $PartIndex => $PartQuery)
00447 {
00448 # add closing paren if query was flagged to be closed
00449 if (isset($CloseQuery[$QueryIndex])) { $PartQuery .= " ) "; }
00450
00451 # perform query and retrieve IDs
00452 $this->DMsg(5, "Performing comparison query (<i>"
00453 .$PartQuery."</i>)");
00454 $this->DB->Query($PartQuery);
00455 $ResourceIds = $ResourceIds
00456 + $this->DB->FetchColumn("ResourceId");
00457 $this->DMsg(5, "Comparison query produced <i>"
00458 .count($ResourceIds)."</i> results");
00459 }
00460 }
00461 else
00462 {
00463 # add closing paren if query was flagged to be closed
00464 if (isset($CloseQuery[$QueryIndex])) { $Query .= " ) "; }
00465
00466 # perform query and retrieve IDs
00467 $this->DMsg(5, "Performing comparison query (<i>".$Query."</i>)");
00468 $this->DB->Query($Query);
00469 $ResourceIds = $this->DB->FetchColumn("ResourceId");
00470 $this->DMsg(5, "Comparison query produced <i>"
00471 .count($ResourceIds)."</i> results");
00472 }
00473
00474 # if we already have some results
00475 if (isset($Results))
00476 {
00477 # if search logic is set to AND
00478 if ($this->DefaultSearchLogic == self::LOGIC_AND)
00479 {
00480 # remove anything from results that was not returned from query
00481 $Results = array_intersect($Results, $ResourceIds);
00482 }
00483 else
00484 {
00485 # add values returned from query to results
00486 $Results = array_unique(array_merge($Results, $ResourceIds));
00487 }
00488 }
00489 else
00490 {
00491 # set results to values returned from query
00492 $Results = $ResourceIds;
00493 }
00494 }
00495 }
00496 else
00497 {
00498 # initialize results to empty list
00499 $Results = array();
00500 }
00501
00502 # return results to caller
00503 return $Results;
00504 }
00505
00506 static function GetItemIdsSortedByField($FieldName, $SortDescending)
00507 {
00508 $RFactory = new ResourceFactory();
00509 return $RFactory->GetResourceIdsSortedBy($FieldName, !$SortDescending);
00510 }
00511
00512 static function QueueUpdateForItem($ItemId,
00513 $Priority = ApplicationFramework::PRIORITY_LOW)
00514 {
00515 global $AF;
00516 $AF->QueueUniqueTask(array(__CLASS__, "RunUpdateForItem"),
00517 array(intval($ItemId)), $Priority);
00518 }
00519
00520 static function RunUpdateForItem($ItemId)
00521 {
00522 # check that resource still exists
00523 $RFactory = new ResourceFactory();
00524 if (!$RFactory->ItemExists($ItemId)) { return; }
00525
00526 # update search data for resource
00527 $SearchEngine = new SPTSearchEngine();
00528 $SearchEngine->UpdateForItem($ItemId);
00529 }
00530
00531 private $Schema;
00532
00533 # functions for backward compatability w/ old SPT code
00534 function UpdateForResource($ItemId) { $this->UpdateForItem($ItemId); }
00535 }