SPTSearchEngine.php
Go to the documentation of this file.
00001 <?PHP 00002 00003 # 00004 # FILE: SPTSearchEngine.php 00005 # 00006 # Part of the Collection Workflow Integration System (CWIS) 00007 # Copyright 2002-2011 Edward Almasy and Internet Scout 00008 # http://scout.wisc.edu 00009 # 00010 00011 class SPTSearchEngine extends SearchEngine { 00012 00013 function SPTSearchEngine() 00014 { 00015 # create a database handle 00016 $DB = new Database(); 00017 00018 # pass database handle and config values to real search engine object 00019 $this->SearchEngine($DB, "Resources", "ResourceId"); 00020 00021 # for each field defined in schema 00022 $this->Schema = new MetadataSchema(); 00023 $Fields = $this->Schema->GetFields(); 00024 foreach ($Fields as $Field) 00025 { 00026 # determine field type for searching 00027 switch ($Field->Type()) 00028 { 00029 case MetadataSchema::MDFTYPE_TEXT: 00030 case MetadataSchema::MDFTYPE_PARAGRAPH: 00031 case MetadataSchema::MDFTYPE_USER: 00032 case MetadataSchema::MDFTYPE_TREE: 00033 case MetadataSchema::MDFTYPE_CONTROLLEDNAME: 00034 case MetadataSchema::MDFTYPE_OPTION: 00035 case MetadataSchema::MDFTYPE_IMAGE: 00036 case MetadataSchema::MDFTYPE_FILE: 00037 case MetadataSchema::MDFTYPE_URL: 00038 $FieldType = self::FIELDTYPE_TEXT; 00039 break; 00040 00041 case MetadataSchema::MDFTYPE_NUMBER: 00042 case MetadataSchema::MDFTYPE_FLAG: 00043 $FieldType = self::FIELDTYPE_NUMERIC; 00044 break; 00045 00046 case MetadataSchema::MDFTYPE_DATE: 00047 $FieldType = self::FIELDTYPE_DATERANGE; 00048 break; 00049 00050 case MetadataSchema::MDFTYPE_TIMESTAMP: 00051 $FieldType = self::FIELDTYPE_DATE; 00052 break; 00053 00054 case MetadataSchema::MDFTYPE_POINT: 00055 $FieldType = NULL; 00056 break; 00057 00058 default: 00059 exit("ERROR: unknown field type " 00060 .$Field->Type()." in SPTSearchEngine.php"); 00061 break; 00062 } 00063 00064 if ($FieldType !== NULL) 00065 { 00066 # add field to search engine 00067 $this->AddField($Field->Name(), $Field->DBFieldName(), $FieldType, 00068 $Field->SearchWeight(), $Field->IncludeInKeywordSearch()); 00069 } 00070 } 00071 } 00072 00073 # overloaded version of method to retrieve text from DB 00074 function GetFieldContent($ItemId, $FieldName) 00075 { 00076 # get resource object 00077 $Resource = new Resource($ItemId); 00078 00079 # retrieve text (including variants) from resource object and return to caller 00080 return $Resource->Get($FieldName, FALSE, TRUE); 00081 } 00082 00083 # overloaded version of method to retrieve resource/phrase match list 00084 function SearchFieldForPhrases($FieldName, $Phrase) 00085 { 00086 # normalize and escape search phrase for use in SQL query 00087 $SearchPhrase = strtolower(addslashes($Phrase)); 00088 00089 # query DB for matching list based on field type 00090 $Field = $this->Schema->GetFieldByName($FieldName); 00091 switch ($Field->Type()) 00092 { 00093 case MetadataSchema::MDFTYPE_TEXT: 00094 case MetadataSchema::MDFTYPE_PARAGRAPH: 00095 case MetadataSchema::MDFTYPE_FILE: 00096 case MetadataSchema::MDFTYPE_URL: 00097 $QueryString = "SELECT DISTINCT ResourceId FROM Resources " 00098 ."WHERE POSITION('".$SearchPhrase."'" 00099 ." IN LOWER(`".$Field->DBFieldName()."`)) "; 00100 break; 00101 00102 case MetadataSchema::MDFTYPE_IMAGE: 00103 $QueryString = "SELECT DISTINCT ResourceId FROM Resources " 00104 ."WHERE POSITION('".$SearchPhrase."'" 00105 ." IN LOWER(`".$Field->DBFieldName()."AltText`)) "; 00106 break; 00107 00108 case MetadataSchema::MDFTYPE_CONTROLLEDNAME: 00109 $NameTableSize = $this->DB->Query("SELECT COUNT(*) AS NameCount" 00110 ." FROM ControlledNames", "NameCount"); 00111 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId " 00112 ."FROM ResourceNameInts, ControlledNames " 00113 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) " 00114 ."AND ControlledNames.ControlledNameId" 00115 ." = ResourceNameInts.ControlledNameId " 00116 ."AND ControlledNames.FieldId = ".$Field->Id(); 00117 $SecondQueryString = "SELECT DISTINCT ResourceNameInts.ResourceId " 00118 ."FROM ResourceNameInts, ControlledNames, VariantNames " 00119 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(VariantName)) " 00120 ."AND VariantNames.ControlledNameId" 00121 ." = ResourceNameInts.ControlledNameId " 00122 ."AND ControlledNames.ControlledNameId" 00123 ." = ResourceNameInts.ControlledNameId " 00124 ."AND ControlledNames.FieldId = ".$Field->Id(); 00125 break; 00126 00127 case MetadataSchema::MDFTYPE_OPTION: 00128 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId " 00129 ."FROM ResourceNameInts, ControlledNames " 00130 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) " 00131 ."AND ControlledNames.ControlledNameId = ResourceNameInts.ControlledNameId " 00132 ."AND ControlledNames.FieldId = ".$Field->Id(); 00133 break; 00134 00135 case MetadataSchema::MDFTYPE_TREE: 00136 $QueryString = "SELECT DISTINCT ResourceClassInts.ResourceId " 00137 ."FROM ResourceClassInts, Classifications " 00138 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ClassificationName)) " 00139 ."AND Classifications.ClassificationId = ResourceClassInts.ClassificationId " 00140 ."AND Classifications.FieldId = ".$Field->Id(); 00141 break; 00142 00143 case MetadataSchema::MDFTYPE_USER: 00144 $UserId = $this->DB->Query("SELECT UserId FROM APUsers " 00145 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(UserName)) " 00146 ."OR POSITION('".$SearchPhrase."' IN LOWER(RealName))", "UserId"); 00147 if ($UserId != NULL) 00148 { 00149 $QueryString = "SELECT DISTINCT ResourceId FROM Resources " 00150 ."WHERE `".$Field->DBFieldName()."` = ".$UserId; 00151 } 00152 break; 00153 00154 case MetadataSchema::MDFTYPE_NUMBER: 00155 if ($SearchPhrase > 0) 00156 { 00157 $QueryString = "SELECT DISTINCT ResourceId FROM Resources " 00158 ."WHERE `".$Field->DBFieldName()."` = ".(int)$SearchPhrase; 00159 } 00160 break; 00161 00162 case MetadataSchema::MDFTYPE_FLAG: 00163 case MetadataSchema::MDFTYPE_DATE: 00164 case MetadataSchema::MDFTYPE_TIMESTAMP: 00165 # (these types not yet handled by search engine for phrases) 00166 break; 00167 } 00168 00169 # build match list based on results returned from DB 00170 if (isset($QueryString)) 00171 { 00172 $this->DMsg(7, "Performing phrase search query (<i>".$QueryString."</i>)"); 00173 if ($this->DebugLevel > 9) { $StartTime = microtime(TRUE); } 00174 $this->DB->Query($QueryString); 00175 if ($this->DebugLevel > 9) 00176 { 00177 $EndTime = microtime(TRUE); 00178 if (($StartTime - $EndTime) > 0.1) 00179 { 00180 printf("SE: Query took %.2f seconds<br>\n", 00181 ($EndTime - $StartTime)); 00182 } 00183 } 00184 $MatchList = $this->DB->FetchColumn("ResourceId"); 00185 if (isset($SecondQueryString)) 00186 { 00187 $this->DMsg(7, "Performing second phrase search query" 00188 ." (<i>".$SecondQueryString."</i>)"); 00189 if ($this->DebugLevel > 9) { $StartTime = microtime(TRUE); } 00190 $this->DB->Query($SecondQueryString); 00191 if ($this->DebugLevel > 9) 00192 { 00193 $EndTime = microtime(TRUE); 00194 if (($StartTime - $EndTime) > 0.1) 00195 { 00196 printf("SE: query took %.2f seconds<br>\n", 00197 ($EndTime - $StartTime)); 00198 } 00199 } 00200 $MatchList = $MatchList + $this->DB->FetchColumn("ResourceId"); 00201 } 00202 } 00203 else 00204 { 00205 $MatchList = array(); 00206 } 00207 00208 # return list of matching resources to caller 00209 return $MatchList; 00210 } 00211 00212 # search field for records that meet comparison 00213 function SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values) 00214 { 00215 # use SQL keyword appropriate to current search logic for combining operations 00216 $CombineWord = ($this->DefaultSearchLogic == self::LOGIC_AND) ? " AND " : " OR "; 00217 00218 # for each comparison 00219 foreach ($FieldNames as $Index => $FieldName) 00220 { 00221 $Operator = $Operators[$Index]; 00222 $Value = $Values[$Index]; 00223 00224 # determine query based on field type 00225 $Field = $this->Schema->GetFieldByName($FieldName); 00226 if ($Field != NULL) 00227 { 00228 switch ($Field->Type()) 00229 { 00230 case MetadataSchema::MDFTYPE_TEXT: 00231 case MetadataSchema::MDFTYPE_PARAGRAPH: 00232 case MetadataSchema::MDFTYPE_NUMBER: 00233 case MetadataSchema::MDFTYPE_FLAG: 00234 case MetadataSchema::MDFTYPE_USER: 00235 case MetadataSchema::MDFTYPE_URL: 00236 if (isset($Queries["Resources"])) 00237 { 00238 $Queries["Resources"] .= $CombineWord; 00239 } 00240 else 00241 { 00242 $Queries["Resources"] = "SELECT DISTINCT ResourceId FROM Resources WHERE "; 00243 } 00244 if ($Field->Type() == MetadataSchema::MDFTYPE_USER) 00245 { 00246 $User = new SPTUser($Value); 00247 $Value = $User->Id(); 00248 } 00249 $Queries["Resources"] .= "`".$Field->DBFieldName()."` ".$Operator." '".addslashes($Value)."' "; 00250 break; 00251 00252 case MetadataSchema::MDFTYPE_CONTROLLEDNAME: 00253 $QueryIndex = "ResourceNameInts".$Field->Id(); 00254 if (!isset($Queries[$QueryIndex]["A"])) 00255 { 00256 $Queries[$QueryIndex]["A"] = 00257 "SELECT DISTINCT ResourceId" 00258 ." FROM ResourceNameInts, ControlledNames " 00259 ." WHERE ControlledNames.FieldId = ".$Field->Id() 00260 ." AND ( "; 00261 $CloseQuery[$QueryIndex]["A"] = TRUE; 00262 } 00263 else 00264 { 00265 $Queries[$QueryIndex]["A"] .= $CombineWord; 00266 } 00267 $Queries[$QueryIndex]["A"] .= 00268 "((ResourceNameInts.ControlledNameId" 00269 ." = ControlledNames.ControlledNameId" 00270 ." AND ControlledName " 00271 .$Operator." '".addslashes($Value)."'))"; 00272 if (!isset($Queries[$QueryIndex]["B"])) 00273 { 00274 $Queries[$QueryIndex]["B"] = 00275 "SELECT DISTINCT ResourceId" 00276 . " FROM ResourceNameInts, ControlledNames," 00277 ." VariantNames " 00278 ." WHERE ControlledNames.FieldId = ".$Field->Id() 00279 ." AND ( "; 00280 $CloseQuery[$QueryIndex]["B"] = TRUE; 00281 } 00282 else 00283 { 00284 $Queries[$QueryIndex]["B"] .= $CombineWord; 00285 } 00286 $Queries[$QueryIndex]["B"] .= 00287 "((ResourceNameInts.ControlledNameId" 00288 ." = ControlledNames.ControlledNameId" 00289 ." AND ResourceNameInts.ControlledNameId" 00290 ." = VariantNames.ControlledNameId" 00291 ." AND VariantName " 00292 .$Operator." '".addslashes($Value)."'))"; 00293 break; 00294 00295 case MetadataSchema::MDFTYPE_OPTION: 00296 $QueryIndex = "ResourceNameInts".$Field->Id(); 00297 if (!isset($Queries[$QueryIndex])) 00298 { 00299 $Queries[$QueryIndex] = 00300 "SELECT DISTINCT ResourceId FROM ResourceNameInts, ControlledNames " 00301 ." WHERE ControlledNames.FieldId = ".$Field->Id() 00302 ." AND ( "; 00303 $CloseQuery[$QueryIndex] = TRUE; 00304 } 00305 else 00306 { 00307 $Queries[$QueryIndex] .= $CombineWord; 00308 } 00309 $Queries[$QueryIndex] .= "(ResourceNameInts.ControlledNameId = ControlledNames.ControlledNameId" 00310 ." AND ControlledName ".$Operator." '".addslashes($Value)."')"; 00311 break; 00312 00313 case MetadataSchema::MDFTYPE_TREE: 00314 $QueryIndex = "ResourceClassInts".$Field->Id(); 00315 if (!isset($Queries[$QueryIndex])) 00316 { 00317 $Queries[$QueryIndex] = "SELECT DISTINCT ResourceId FROM ResourceClassInts, Classifications " 00318 ." WHERE ResourceClassInts.ClassificationId = Classifications.ClassificationId" 00319 ." AND Classifications.FieldId = ".$Field->Id()." AND ( "; 00320 $CloseQuery[$QueryIndex] = TRUE; 00321 } 00322 else 00323 { 00324 $Queries[$QueryIndex] .= $CombineWord; 00325 } 00326 $Queries[$QueryIndex] .= " ClassificationName ".$Operator." '".addslashes($Value)."'"; 00327 break; 00328 00329 case MetadataSchema::MDFTYPE_TIMESTAMP: 00330 # if value appears to have time component or text description 00331 if (strpos($Value, ":") 00332 || strstr($Value, "day") 00333 || strstr($Value, "week") 00334 || strstr($Value, "month") 00335 || strstr($Value, "year") 00336 || strstr($Value, "hour") 00337 || strstr($Value, "minute")) 00338 { 00339 if (isset($Queries["Resources"])) 00340 { 00341 $Queries["Resources"] .= $CombineWord; 00342 } 00343 else 00344 { 00345 $Queries["Resources"] = "SELECT DISTINCT ResourceId" 00346 ." FROM Resources WHERE "; 00347 } 00348 00349 # flip operator if necessary 00350 if (strstr($Value, "ago")) 00351 { 00352 $OperatorFlipMap = array( 00353 "<" => ">=", 00354 ">" => "<=", 00355 "<=" => ">", 00356 ">=" => "<", 00357 ); 00358 $Operator = isset($OperatorFlipMap[$Operator]) 00359 ? $OperatorFlipMap[$Operator] : $Operator; 00360 } 00361 00362 # use strtotime method to build condition 00363 $TimestampValue = strtotime($Value); 00364 if (($TimestampValue !== FALSE) && ($TimestampValue != -1)) 00365 { 00366 if ((date("H:i:s", $TimestampValue) == "00:00:00") 00367 && (strpos($Value, "00:00") === FALSE) 00368 && ($Operator == "<=")) 00369 { 00370 $NormalizedValue = 00371 date("Y-m-d", $TimestampValue)." 23:59:59"; 00372 } 00373 else 00374 { 00375 $NormalizedValue = date("Y-m-d H:i:s", $TimestampValue); 00376 } 00377 } 00378 else 00379 { 00380 $NormalizedValue = addslashes($Value); 00381 } 00382 $Queries["Resources"] .= 00383 " ( `".$Field->DBFieldName()."` " 00384 .$Operator 00385 ." '".$NormalizedValue."' ) "; 00386 } 00387 else 00388 { 00389 # use Date object method to build condition 00390 $Date = new Date($Value); 00391 if ($Date->Precision()) 00392 { 00393 if (isset($Queries["Resources"])) 00394 { 00395 $Queries["Resources"] .= $CombineWord; 00396 } 00397 else 00398 { 00399 $Queries["Resources"] = "SELECT DISTINCT ResourceId" 00400 ." FROM Resources WHERE "; 00401 } 00402 $Queries["Resources"] .= " ( ".$Date->SqlCondition( 00403 $Field->DBFieldName(), NULL, $Operator)." ) "; 00404 } 00405 } 00406 break; 00407 00408 case MetadataSchema::MDFTYPE_DATE: 00409 $Date = new Date($Value); 00410 if ($Date->Precision()) 00411 { 00412 if (isset($Queries["Resources"])) 00413 { 00414 $Queries["Resources"] .= $CombineWord; 00415 } 00416 else 00417 { 00418 $Queries["Resources"] = "SELECT DISTINCT ResourceId" 00419 ." FROM Resources WHERE "; 00420 } 00421 $Queries["Resources"] .= " ( ".$Date->SqlCondition( 00422 $Field->DBFieldName()."Begin", 00423 $Field->DBFieldName()."End", $Operator)." ) "; 00424 } 00425 break; 00426 00427 case MetadataSchema::MDFTYPE_IMAGE: 00428 case MetadataSchema::MDFTYPE_FILE: 00429 # (these types not yet handled by search engine for comparisons) 00430 break; 00431 } 00432 } 00433 } 00434 00435 # if queries found 00436 if (isset($Queries)) 00437 { 00438 # for each assembled query 00439 foreach ($Queries as $QueryIndex => $Query) 00440 { 00441 # if query has multiple parts 00442 if (is_array($Query)) 00443 { 00444 # for each part of query 00445 $ResourceIds = array(); 00446 foreach ($Query as $PartIndex => $PartQuery) 00447 { 00448 # add closing paren if query was flagged to be closed 00449 if (isset($CloseQuery[$QueryIndex])) { $PartQuery .= " ) "; } 00450 00451 # perform query and retrieve IDs 00452 $this->DMsg(5, "Performing comparison query (<i>" 00453 .$PartQuery."</i>)"); 00454 $this->DB->Query($PartQuery); 00455 $ResourceIds = $ResourceIds 00456 + $this->DB->FetchColumn("ResourceId"); 00457 $this->DMsg(5, "Comparison query produced <i>" 00458 .count($ResourceIds)."</i> results"); 00459 } 00460 } 00461 else 00462 { 00463 # add closing paren if query was flagged to be closed 00464 if (isset($CloseQuery[$QueryIndex])) { $Query .= " ) "; } 00465 00466 # perform query and retrieve IDs 00467 $this->DMsg(5, "Performing comparison query (<i>".$Query."</i>)"); 00468 $this->DB->Query($Query); 00469 $ResourceIds = $this->DB->FetchColumn("ResourceId"); 00470 $this->DMsg(5, "Comparison query produced <i>" 00471 .count($ResourceIds)."</i> results"); 00472 } 00473 00474 # if we already have some results 00475 if (isset($Results)) 00476 { 00477 # if search logic is set to AND 00478 if ($this->DefaultSearchLogic == self::LOGIC_AND) 00479 { 00480 # remove anything from results that was not returned from query 00481 $Results = array_intersect($Results, $ResourceIds); 00482 } 00483 else 00484 { 00485 # add values returned from query to results 00486 $Results = array_unique(array_merge($Results, $ResourceIds)); 00487 } 00488 } 00489 else 00490 { 00491 # set results to values returned from query 00492 $Results = $ResourceIds; 00493 } 00494 } 00495 } 00496 else 00497 { 00498 # initialize results to empty list 00499 $Results = array(); 00500 } 00501 00502 # return results to caller 00503 return $Results; 00504 } 00505 00506 static function GetItemIdsSortedByField($FieldName, $SortDescending) 00507 { 00508 $RFactory = new ResourceFactory(); 00509 return $RFactory->GetResourceIdsSortedBy($FieldName, !$SortDescending); 00510 } 00511 00512 static function QueueUpdateForItem($ItemId, 00513 $Priority = ApplicationFramework::PRIORITY_LOW) 00514 { 00515 global $AF; 00516 $AF->QueueUniqueTask(array(__CLASS__, "RunUpdateForItem"), 00517 array(intval($ItemId)), $Priority); 00518 } 00519 00520 static function RunUpdateForItem($ItemId) 00521 { 00522 # check that resource still exists 00523 $RFactory = new ResourceFactory(); 00524 if (!$RFactory->ItemExists($ItemId)) { return; } 00525 00526 # update search data for resource 00527 $SearchEngine = new SPTSearchEngine(); 00528 $SearchEngine->UpdateForItem($ItemId); 00529 } 00530 00531 private $Schema; 00532 00533 # functions for backward compatability w/ old SPT code 00534 function UpdateForResource($ItemId) { $this->UpdateForItem($ItemId); } 00535 }