SPTSearchEngine.php

Go to the documentation of this file.
00001 <?PHP
00002 
00003 #
00004 #   FILE:  SPTSearchEngine.php
00005 #
00006 #   Part of the Collection Workflow Integration System (CWIS)
00007 #   Copyright 2002-2011 Edward Almasy and Internet Scout
00008 #   http://scout.wisc.edu
00009 #
00010 
00011 class SPTSearchEngine extends SearchEngine {
00012 
00013     function SPTSearchEngine()
00014     {
00015         # create a database handle
00016         $DB = new Database();
00017 
00018         # pass database handle and config values to real search engine object
00019         $this->SearchEngine($DB, "Resources", "ResourceId");
00020 
00021         # for each field defined in schema
00022         $this->Schema = new MetadataSchema();
00023         $Fields = $this->Schema->GetFields();
00024         foreach ($Fields as $Field)
00025         {
00026             # determine field type for searching
00027             switch ($Field->Type())
00028             {
00029                 case MetadataSchema::MDFTYPE_TEXT:
00030                 case MetadataSchema::MDFTYPE_PARAGRAPH:
00031                 case MetadataSchema::MDFTYPE_USER:
00032                 case MetadataSchema::MDFTYPE_TREE:
00033                 case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00034                 case MetadataSchema::MDFTYPE_OPTION:
00035                 case MetadataSchema::MDFTYPE_IMAGE:
00036                 case MetadataSchema::MDFTYPE_FILE:
00037                 case MetadataSchema::MDFTYPE_URL:
00038                     $FieldType = self::FIELDTYPE_TEXT;
00039                     break;
00040 
00041                 case MetadataSchema::MDFTYPE_NUMBER:
00042                 case MetadataSchema::MDFTYPE_FLAG:
00043                     $FieldType = self::FIELDTYPE_NUMERIC;
00044                     break;
00045 
00046                 case MetadataSchema::MDFTYPE_DATE:
00047                     $FieldType = self::FIELDTYPE_DATERANGE;
00048                     break;
00049 
00050                 case MetadataSchema::MDFTYPE_TIMESTAMP:
00051                     $FieldType = self::FIELDTYPE_DATE;
00052                     break;
00053 
00054                 case MetadataSchema::MDFTYPE_POINT:
00055                     $FieldType = NULL;
00056                     break;
00057 
00058                 default:
00059                     exit("ERROR: unknown field type "
00060                             .$Field->Type()." in SPTSearchEngine.php");
00061                     break;
00062             }
00063 
00064             if ($FieldType !== NULL)
00065             {
00066                 # add field to search engine
00067                 $this->AddField($Field->Name(), $Field->DBFieldName(), $FieldType,
00068                                 $Field->SearchWeight(), $Field->IncludeInKeywordSearch());
00069             }
00070         }
00071     }
00072 
00073     # overloaded version of method to retrieve text from DB
00074     function GetFieldContent($ItemId, $FieldName)
00075     {
00076         # get resource object
00077         $Resource = new Resource($ItemId);
00078 
00079         # retrieve text (including variants) from resource object and return to caller
00080         return $Resource->Get($FieldName, FALSE, TRUE);
00081     }
00082 
00083     # overloaded version of method to retrieve resource/phrase match list
00084     function SearchFieldForPhrases($FieldName, $Phrase)
00085     {
00086         # normalize and escape search phrase for use in SQL query
00087         $SearchPhrase = strtolower(addslashes($Phrase));
00088 
00089         # query DB for matching list based on field type
00090         $Field = $this->Schema->GetFieldByName($FieldName);
00091         switch ($Field->Type())
00092         {
00093             case MetadataSchema::MDFTYPE_TEXT:
00094             case MetadataSchema::MDFTYPE_PARAGRAPH:
00095             case MetadataSchema::MDFTYPE_FILE:
00096             case MetadataSchema::MDFTYPE_URL:
00097                 $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00098                         ."WHERE POSITION('".$SearchPhrase."'"
00099                             ." IN LOWER(`".$Field->DBFieldName()."`)) ";
00100                 break;
00101 
00102             case MetadataSchema::MDFTYPE_IMAGE:
00103                 $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00104                         ."WHERE POSITION('".$SearchPhrase."'"
00105                             ." IN LOWER(`".$Field->DBFieldName()."AltText`)) ";
00106                 break;
00107 
00108             case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00109                 $NameTableSize = $this->DB->Query("SELECT COUNT(*) AS NameCount"
00110                         ." FROM ControlledNames", "NameCount");
00111                 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00112                         ."FROM ResourceNameInts, ControlledNames "
00113                         ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) "
00114                         ."AND ControlledNames.ControlledNameId"
00115                                 ." = ResourceNameInts.ControlledNameId "
00116                         ."AND ControlledNames.FieldId = ".$Field->Id();
00117                 $SecondQueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00118                         ."FROM ResourceNameInts, ControlledNames, VariantNames "
00119                         ."WHERE POSITION('".$SearchPhrase."' IN LOWER(VariantName)) "
00120                         ."AND VariantNames.ControlledNameId"
00121                                 ." = ResourceNameInts.ControlledNameId "
00122                         ."AND ControlledNames.ControlledNameId"
00123                                 ." = ResourceNameInts.ControlledNameId "
00124                         ."AND ControlledNames.FieldId = ".$Field->Id();
00125                 break;
00126 
00127             case MetadataSchema::MDFTYPE_OPTION:
00128                 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00129                         ."FROM ResourceNameInts, ControlledNames "
00130                         ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) "
00131                         ."AND ControlledNames.ControlledNameId = ResourceNameInts.ControlledNameId "
00132                         ."AND ControlledNames.FieldId = ".$Field->Id();
00133                 break;
00134 
00135             case MetadataSchema::MDFTYPE_TREE:
00136                 $QueryString = "SELECT DISTINCT ResourceClassInts.ResourceId "
00137                         ."FROM ResourceClassInts, Classifications "
00138                         ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ClassificationName)) "
00139                         ."AND Classifications.ClassificationId = ResourceClassInts.ClassificationId "
00140                         ."AND Classifications.FieldId = ".$Field->Id();
00141                 break;
00142 
00143             case MetadataSchema::MDFTYPE_USER:
00144                 $UserId = $this->DB->Query("SELECT UserId FROM APUsers "
00145                                            ."WHERE POSITION('".$SearchPhrase."' IN LOWER(UserName)) "
00146                                            ."OR POSITION('".$SearchPhrase."' IN LOWER(RealName))", "UserId");
00147                 if ($UserId != NULL)
00148                 {
00149                     $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00150                                      ."WHERE `".$Field->DBFieldName()."` = ".$UserId;
00151                 }
00152                 break;
00153 
00154             case MetadataSchema::MDFTYPE_NUMBER:
00155                 if ($SearchPhrase > 0)
00156                 {
00157                     $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00158                                      ."WHERE `".$Field->DBFieldName()."` = ".(int)$SearchPhrase;
00159                 }
00160                 break;
00161 
00162             case MetadataSchema::MDFTYPE_FLAG:
00163             case MetadataSchema::MDFTYPE_DATE:
00164             case MetadataSchema::MDFTYPE_TIMESTAMP:
00165                 # (these types not yet handled by search engine for phrases)
00166                 break;
00167         }
00168 
00169         # build match list based on results returned from DB
00170         if (isset($QueryString))
00171         {
00172             $this->DMsg(7, "Performing phrase search query (<i>".$QueryString."</i>)");
00173             if ($this->DebugLevel > 9) {  $StartTime = microtime(TRUE);  }
00174             $this->DB->Query($QueryString);
00175             if ($this->DebugLevel > 9)
00176             {
00177                 $EndTime = microtime(TRUE);
00178                 if (($StartTime - $EndTime) > 0.1)
00179                 {
00180                     printf("SE:  Query took %.2f seconds<br>\n",
00181                             ($EndTime - $StartTime));
00182                 }
00183             }
00184             $MatchList = $this->DB->FetchColumn("ResourceId");
00185             if (isset($SecondQueryString))
00186             {
00187                 $this->DMsg(7, "Performing second phrase search query"
00188                         ." (<i>".$SecondQueryString."</i>)");
00189                 if ($this->DebugLevel > 9) {  $StartTime = microtime(TRUE);  }
00190                 $this->DB->Query($SecondQueryString);
00191                 if ($this->DebugLevel > 9)
00192                 {
00193                     $EndTime = microtime(TRUE);
00194                     if (($StartTime - $EndTime) > 0.1)
00195                     {
00196                         printf("SE:  query took %.2f seconds<br>\n",
00197                                 ($EndTime - $StartTime));
00198                     }
00199                 }
00200                 $MatchList = $MatchList + $this->DB->FetchColumn("ResourceId");
00201             }
00202         }
00203         else
00204         {
00205             $MatchList = array();
00206         }
00207 
00208         # return list of matching resources to caller
00209         return $MatchList;
00210     }
00211 
00212     # search field for records that meet comparison
00213     function SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values)
00214     {
00215         # use SQL keyword appropriate to current search logic for combining operations
00216         $CombineWord = ($this->DefaultSearchLogic == self::LOGIC_AND) ? " AND " : " OR ";
00217 
00218         # for each comparison
00219         foreach ($FieldNames as $Index => $FieldName)
00220         {
00221             $Operator = $Operators[$Index];
00222             $Value = $Values[$Index];
00223 
00224             # determine query based on field type
00225             $Field = $this->Schema->GetFieldByName($FieldName);
00226             if ($Field != NULL)
00227             {
00228                 switch ($Field->Type())
00229                 {
00230                     case MetadataSchema::MDFTYPE_TEXT:
00231                     case MetadataSchema::MDFTYPE_PARAGRAPH:
00232                     case MetadataSchema::MDFTYPE_NUMBER:
00233                     case MetadataSchema::MDFTYPE_FLAG:
00234                     case MetadataSchema::MDFTYPE_USER:
00235                     case MetadataSchema::MDFTYPE_URL:
00236                         if (isset($Queries["Resources"]))
00237                         {
00238                             $Queries["Resources"] .= $CombineWord;
00239                         }
00240                         else
00241                         {
00242                             $Queries["Resources"] = "SELECT DISTINCT ResourceId FROM Resources WHERE ";
00243                         }
00244                         if ($Field->Type() == MetadataSchema::MDFTYPE_USER)
00245                         {
00246                             $User = new SPTUser($Value);
00247                             $Value = $User->Id();
00248                         }
00249                         $Queries["Resources"] .= "`".$Field->DBFieldName()."` ".$Operator." '".addslashes($Value)."' ";
00250                         break;
00251 
00252                     case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00253                         $QueryIndex = "ResourceNameInts".$Field->Id();
00254                         if (!isset($Queries[$QueryIndex]["A"]))
00255                         {
00256                             $Queries[$QueryIndex]["A"] =
00257                                     "SELECT DISTINCT ResourceId"
00258                                     ." FROM ResourceNameInts, ControlledNames "
00259                                     ." WHERE ControlledNames.FieldId = ".$Field->Id()
00260                                     ." AND ( ";
00261                             $CloseQuery[$QueryIndex]["A"] = TRUE;
00262                         }
00263                         else
00264                         {
00265                             $Queries[$QueryIndex]["A"] .= $CombineWord;
00266                         }
00267                         $Queries[$QueryIndex]["A"] .=
00268                                 "((ResourceNameInts.ControlledNameId"
00269                                         ." = ControlledNames.ControlledNameId"
00270                                 ." AND ControlledName "
00271                                         .$Operator." '".addslashes($Value)."'))";
00272                         if (!isset($Queries[$QueryIndex]["B"]))
00273                         {
00274                             $Queries[$QueryIndex]["B"] =
00275                                     "SELECT DISTINCT ResourceId"
00276                                     . " FROM ResourceNameInts, ControlledNames,"
00277                                             ." VariantNames "
00278                                     ." WHERE ControlledNames.FieldId = ".$Field->Id()
00279                                     ." AND ( ";
00280                             $CloseQuery[$QueryIndex]["B"] = TRUE;
00281                         }
00282                         else
00283                         {
00284                             $Queries[$QueryIndex]["B"] .= $CombineWord;
00285                         }
00286                         $Queries[$QueryIndex]["B"] .=
00287                                 "((ResourceNameInts.ControlledNameId"
00288                                         ." = ControlledNames.ControlledNameId"
00289                                 ." AND ResourceNameInts.ControlledNameId"
00290                                         ." = VariantNames.ControlledNameId"
00291                                 ." AND VariantName "
00292                                         .$Operator." '".addslashes($Value)."'))";
00293                         break;
00294 
00295                     case MetadataSchema::MDFTYPE_OPTION:
00296                         $QueryIndex = "ResourceNameInts".$Field->Id();
00297                         if (!isset($Queries[$QueryIndex]))
00298                         {
00299                             $Queries[$QueryIndex] =
00300                                     "SELECT DISTINCT ResourceId FROM ResourceNameInts, ControlledNames "
00301                                     ." WHERE ControlledNames.FieldId = ".$Field->Id()
00302                                     ." AND ( ";
00303                             $CloseQuery[$QueryIndex] = TRUE;
00304                         }
00305                         else
00306                         {
00307                             $Queries[$QueryIndex] .= $CombineWord;
00308                         }
00309                         $Queries[$QueryIndex] .= "(ResourceNameInts.ControlledNameId = ControlledNames.ControlledNameId"
00310                                                        ." AND ControlledName ".$Operator." '".addslashes($Value)."')";
00311                         break;
00312 
00313                     case MetadataSchema::MDFTYPE_TREE:
00314                         $QueryIndex = "ResourceClassInts".$Field->Id();
00315                         if (!isset($Queries[$QueryIndex]))
00316                         {
00317                             $Queries[$QueryIndex] = "SELECT DISTINCT ResourceId FROM ResourceClassInts, Classifications "
00318                                                  ." WHERE ResourceClassInts.ClassificationId = Classifications.ClassificationId"
00319                                                  ." AND Classifications.FieldId = ".$Field->Id()." AND ( ";
00320                             $CloseQuery[$QueryIndex] = TRUE;
00321                         }
00322                         else
00323                         {
00324                             $Queries[$QueryIndex] .= $CombineWord;
00325                         }
00326                         $Queries[$QueryIndex] .= " ClassificationName ".$Operator." '".addslashes($Value)."'";
00327                         break;
00328 
00329                     case MetadataSchema::MDFTYPE_TIMESTAMP:
00330                         # if value appears to have time component or text description
00331                         if (strpos($Value, ":")
00332                                 || strstr($Value, "day")
00333                                 || strstr($Value, "week")
00334                                 || strstr($Value, "month")
00335                                 || strstr($Value, "year")
00336                                 || strstr($Value, "hour")
00337                                 || strstr($Value, "minute"))
00338                         {
00339                             if (isset($Queries["Resources"]))
00340                             {
00341                                 $Queries["Resources"] .= $CombineWord;
00342                             }
00343                             else
00344                             {
00345                                 $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00346                                         ." FROM Resources WHERE ";
00347                             }
00348 
00349                             # flip operator if necessary
00350                             if (strstr($Value, "ago"))
00351                             {
00352                                 $OperatorFlipMap = array(
00353                                         "<" => ">=",
00354                                         ">" => "<=",
00355                                         "<=" => ">",
00356                                         ">=" => "<",
00357                                         );
00358                                 $Operator = isset($OperatorFlipMap[$Operator])
00359                                         ? $OperatorFlipMap[$Operator] : $Operator;
00360                             }
00361 
00362                             # use strtotime method to build condition
00363                             $TimestampValue = strtotime($Value);
00364                             if (($TimestampValue !== FALSE) && ($TimestampValue != -1))
00365                             {
00366                                 if ((date("H:i:s", $TimestampValue) == "00:00:00")
00367                                         && (strpos($Value, "00:00") === FALSE)
00368                                         && ($Operator == "<="))
00369                                 {
00370                                     $NormalizedValue =
00371                                             date("Y-m-d", $TimestampValue)." 23:59:59";
00372                                 }
00373                                 else
00374                                 {
00375                                     $NormalizedValue = date("Y-m-d H:i:s", $TimestampValue);
00376                                 }
00377                             }
00378                             else
00379                             {
00380                                 $NormalizedValue = addslashes($Value);
00381                             }
00382                             $Queries["Resources"] .=
00383                                     " ( `".$Field->DBFieldName()."` "
00384                                     .$Operator
00385                                     ." '".$NormalizedValue."' ) ";
00386                         }
00387                         else
00388                         {
00389                             # use Date object method to build condition
00390                             $Date = new Date($Value);
00391                             if ($Date->Precision())
00392                             {
00393                                 if (isset($Queries["Resources"]))
00394                                 {
00395                                     $Queries["Resources"] .= $CombineWord;
00396                                 }
00397                                 else
00398                                 {
00399                                     $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00400                                             ." FROM Resources WHERE ";
00401                                 }
00402                                 $Queries["Resources"] .= " ( ".$Date->SqlCondition(
00403                                         $Field->DBFieldName(), NULL, $Operator)." ) ";
00404                             }
00405                         }
00406                         break;
00407 
00408                     case MetadataSchema::MDFTYPE_DATE:
00409                         $Date = new Date($Value);
00410                         if ($Date->Precision())
00411                         {
00412                             if (isset($Queries["Resources"]))
00413                             {
00414                                 $Queries["Resources"] .= $CombineWord;
00415                             }
00416                             else
00417                             {
00418                                 $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00419                                         ." FROM Resources WHERE ";
00420                             }
00421                             $Queries["Resources"] .= " ( ".$Date->SqlCondition(
00422                                     $Field->DBFieldName()."Begin",
00423                                     $Field->DBFieldName()."End", $Operator)." ) ";
00424                         }
00425                         break;
00426 
00427                     case MetadataSchema::MDFTYPE_IMAGE:
00428                     case MetadataSchema::MDFTYPE_FILE:
00429                         # (these types not yet handled by search engine for comparisons)
00430                         break;
00431                 }
00432             }
00433         }
00434 
00435         # if queries found
00436         if (isset($Queries))
00437         {
00438             # for each assembled query
00439             foreach ($Queries as $QueryIndex => $Query)
00440             {
00441                 # if query has multiple parts
00442                 if (is_array($Query))
00443                 {
00444                     # for each part of query
00445                     $ResourceIds = array();
00446                     foreach ($Query as $PartIndex => $PartQuery)
00447                     {
00448                         # add closing paren if query was flagged to be closed
00449                         if (isset($CloseQuery[$QueryIndex])) {  $PartQuery .= " ) ";  }
00450 
00451                         # perform query and retrieve IDs
00452                         $this->DMsg(5, "Performing comparison query (<i>"
00453                                 .$PartQuery."</i>)");
00454                         $this->DB->Query($PartQuery);
00455                         $ResourceIds = $ResourceIds
00456                                 + $this->DB->FetchColumn("ResourceId");
00457                         $this->DMsg(5, "Comparison query produced <i>"
00458                                 .count($ResourceIds)."</i> results");
00459                     }
00460                 }
00461                 else
00462                 {
00463                     # add closing paren if query was flagged to be closed
00464                     if (isset($CloseQuery[$QueryIndex])) {  $Query .= " ) ";  }
00465 
00466                     # perform query and retrieve IDs
00467                     $this->DMsg(5, "Performing comparison query (<i>".$Query."</i>)");
00468                     $this->DB->Query($Query);
00469                     $ResourceIds = $this->DB->FetchColumn("ResourceId");
00470                     $this->DMsg(5, "Comparison query produced <i>"
00471                             .count($ResourceIds)."</i> results");
00472                 }
00473 
00474                 # if we already have some results
00475                 if (isset($Results))
00476                 {
00477                     # if search logic is set to AND
00478                     if ($this->DefaultSearchLogic == self::LOGIC_AND)
00479                     {
00480                         # remove anything from results that was not returned from query
00481                         $Results = array_intersect($Results, $ResourceIds);
00482                     }
00483                     else
00484                     {
00485                         # add values returned from query to results
00486                         $Results = array_unique(array_merge($Results, $ResourceIds));
00487                     }
00488                 }
00489                 else
00490                 {
00491                     # set results to values returned from query
00492                     $Results = $ResourceIds;
00493                 }
00494             }
00495         }
00496         else
00497         {
00498             # initialize results to empty list
00499             $Results = array();
00500         }
00501 
00502         # return results to caller
00503         return $Results;
00504     }
00505 
00506     static function GetItemIdsSortedByField($FieldName, $SortDescending)
00507     {
00508         $RFactory = new ResourceFactory();
00509         return $RFactory->GetResourceIdsSortedBy($FieldName, !$SortDescending);
00510     }
00511 
00512     static function QueueUpdateForItem($ItemId,
00513             $Priority = ApplicationFramework::PRIORITY_LOW)
00514     {
00515         global $AF;
00516         $AF->QueueUniqueTask(array(__CLASS__, "RunUpdateForItem"),
00517                 array(intval($ItemId)), $Priority);
00518     }
00519 
00520     static function RunUpdateForItem($ItemId)
00521     {
00522         # check that resource still exists
00523         $RFactory = new ResourceFactory();
00524         if (!$RFactory->ItemExists($ItemId)) {  return;  }
00525 
00526         # update search data for resource
00527         $SearchEngine = new SPTSearchEngine();
00528         $SearchEngine->UpdateForItem($ItemId);
00529     }
00530 
00531     private $Schema;
00532 
00533     # functions for backward compatability w/ old SPT code
00534     function UpdateForResource($ItemId) {  $this->UpdateForItem($ItemId);  }
00535 }