Search:

CWIS Developers Documentation

  • Main Page
  • Classes
  • Files
  • File List
  • File Members

OAIClient.php

Go to the documentation of this file.
00001 <?PHP
00002 
00003 #
00004 #   FILE:  Scout--OAIClient.php
00005 #     Provides a client for pulling data from OAI-PMH providers
00006 #     For protocol documentation, see:
00007 #     http://www.openarchives.org/OAI/openarchivesprotocol.html
00008 #
00009 #   METHODS PROVIDED:
00010 #       OAIClient(ServerUrl, Cache)
00011 #           - constructor
00012 #       ServerUrl(NewValue)
00013 #           - Change the base url of the remote repository
00014 #       MetadataPrefix($pfx)
00015 #           - Set the schema we will request from remote
00016 #       SetSpec($set)
00017 #           - Restrict queries to a single set
00018 #             for details, see
00019 #             http://www.openarchives.org/OAI/openarchivesprotocol.html#Set
00020 #       GetIdentification()
00021 #           - Fetch identifying information about the remote repository
00022 #       GetFormats()
00023 #           - Fetch information about what schemas remote can serve
00024 #       GetRecords($start,$end)
00025 #           - Pull records in batches, optionally with date restrictions
00026 #       GetRecord($id)
00027 #           - Pull a single record using a unique identifier
00028 #       MoreRecordsAvailable()
00029 #           - Determine if a batch pull is complete or not
00030 #       ResetRecordPointer()
00031 #           - Restart a batch pull from the beginning
00032 #       SetDebugLevel()
00033 #           - Determine verbosity
00034 #
00035 #   Copyright 2008 Edward Almasy and Internet Scout
00036 #   http://scout.wisc.edu
00037 #
00038 
00039 require_once("XMLParser.php");
00040 
00041 
00042 class OAIClient {
00043 
00044     # ---- PUBLIC INTERFACE --------------------------------------------------
00045 
00052     function OAIClient($ServerUrl, $Cache=NULL)
00053     {
00054         # set default debug level
00055         $this->DebugLevel = 0;
00056 
00057         # save OAI server URL
00058         $this->ServerUrl = $ServerUrl;
00059 
00060         # set default metadata prefix
00061         $this->MetadataPrefix = "oai_dc";
00062 
00063         # set default set specification for queries
00064         $this->SetSpec = NULL;
00065 
00066         $this->CacheSequenceNumber = 0;
00067         if ($Cache !== NULL)
00068         {
00069             $this->Cache = $Cache;
00070             $this->UsingCache = is_dir($Cache);
00071             if ($this->UsingCache == FALSE )
00072             {
00073                 mkdir($Cache);
00074             }
00075         }
00076     }
00077 
00084     function ServerUrl($NewValue = NULL)
00085     {
00086         if ($NewValue != NULL)
00087         {
00088             $this->ServerUrl = $NewValue;
00089         }
00090         return $this->ServerUrl;
00091     }
00092 
00099     function MetadataPrefix($NewValue = NULL)
00100     {
00101         if ($NewValue != NULL)
00102         {
00103             $this->MetadataPrefix = $NewValue;
00104         }
00105         return $this->MetadataPrefix;
00106     }
00107 
00114     function SetSpec($NewValue = "X-NOSETSPECVALUE-X")
00115     {
00116         if ($NewValue != "X-NOSETSPECVALUE-X")
00117         {
00118             $this->SetSpec = $NewValue;
00119         }
00120         return $this->SetSpec;
00121     }
00122 
00130     function GetIdentification()
00131     {
00132         # query server for XML text
00133         $XmlText = $this->PerformQuery("Identify");
00134         $this->DebugOutVar(8,__METHOD__,"XmlText",htmlspecialchars($XmlText));
00135 
00136         # convert XML text into object
00137         $Xml = simplexml_load_string($XmlText);
00138         $this->DebugOutVar(9, __METHOD__, "Xml", $Xml);
00139 
00140         # if identification info was found
00141         $Info = array();
00142         if (isset($Xml->Identify))
00143         {
00144             # extract info
00145             $Ident = $Xml->Identify;
00146             $this->GetValFromXml($Ident, "repositoryName", "Name", $Info);
00147             $this->GetValFromXml($Ident, "adminEmail", "Email", $Info);
00148             $this->GetValFromXml($Ident, "baseURL", "URL", $Info);
00149         }
00150 
00151         # return info to caller
00152         return $Info;
00153     }
00154 
00160     function GetFormats()
00161     {
00162         # query server for XML text
00163         $XmlText = $this->PerformQuery("ListMetadataFormats");
00164         $this->DebugOutVar(8,__METHOD__,"XmlText",htmlspecialchars($XmlText));
00165 
00166         # convert XML text into object
00167         $Xml = simplexml_load_string($XmlText);
00168         $this->DebugOutVar(9, __METHOD__, "Xml", $Xml);
00169 
00170         # if format info was found
00171         $Formats = array();
00172         if (isset($Xml->ListMetadataFormats->metadataFormat))
00173         {
00174             # extract info
00175             $Index = 0;
00176             foreach ($Xml->ListMetadataFormats->metadataFormat as $Format)
00177             {
00178                 $this->GetValFromXml(
00179                         $Format, "metadataPrefix", "Name", $Formats[$Index]);
00180                 $this->GetValFromXml(
00181                         $Format, "schema", "Schema", $Formats[$Index]);
00182                 $this->GetValFromXml(
00183                         $Format, "metadataNamespace", "Namespace",
00184                         $Formats[$Index]);
00185                 $Index++;
00186             }
00187         }
00188 
00189         # return info to caller
00190         return $Formats;
00191     }
00192 
00200     function GetRecords($StartDate = NULL, $EndDate = NULL)
00201     {
00202         if( $this->Cache != NULL )
00203         {
00204             $cache_fname = sprintf("%s/%010x",
00205                                    $this->Cache,
00206                                    $this->CacheSequenceNumber);
00207             $this->CacheSequenceNumber++;
00208         }
00209 
00210         if( $this->Cache == NULL or $this->UsingCache == FALSE )
00211         {
00212             # if we have resumption token from prior query
00213             if (isset($this->ResumptionToken))
00214             {
00215                 # use resumption token as sole argument
00216                 $Args["resumptionToken"] = $this->ResumptionToken;
00217             }
00218             else
00219             {
00220                 # set up arguments for query
00221                 $Args["metadataPrefix"] = $this->MetadataPrefix;
00222                 if ($StartDate) {  $Args["from"] = $StartDate;  }
00223                 if ($EndDate)   {  $Args["until"] = $EndDate;  }
00224                 if ($this->SetSpec) {  $Args["set"] = $this->SetSpec;  }
00225             }
00226 
00227             # query server for XML text
00228             $XmlText = $this->PerformQuery("ListRecords", $Args);
00229 
00230             if( $this->Cache != NULL )
00231             {
00232                 file_put_contents( $cache_fname, $XmlText );
00233             }
00234         }
00235         else
00236         {
00237             # Get XML text from the cache
00238             $XmlText = file_get_contents( $cache_fname );
00239         }
00240 
00241         $this->DebugOutVar(8, __METHOD__,"XmlText",htmlspecialchars($XmlText));
00242 
00243         return $this->GetRecordsFromXML($XmlText, "listrecords" );
00244     }
00245 
00260     function GetRecord($Id)
00261     {
00262         $Args["metadataPrefix"] = $this->MetadataPrefix;
00263         $Args["identifier"] = $Id;
00264 
00265         # query server for XML text
00266         $XmlText = $this->PerformQuery("GetRecord", $Args);
00267         $this->DebugOutVar(8, __METHOD__,"XmlText",htmlspecialchars($XmlText));
00268 
00269         return $this->GetRecordsFromXML($XmlText, "getrecord" );
00270     }
00271 
00277     function MoreRecordsAvailable()
00278     {
00279         return isset($this->ResumptionToken) ? TRUE : FALSE;
00280     }
00281 
00285     function ResetRecordPointer()
00286     {
00287         unset($this->ResumptionToken);
00288         $this->CacheSequenceNumber = 0;
00289     }
00290 
00296     function SetDebugLevel($NewLevel)
00297     {
00298         $this->DebugLevel = $NewLevel;
00299     }
00300 
00301 
00302     # ---- PRIVATE INTERFACE -------------------------------------------------
00303 
00304     private $ServerUrl;
00305     private $MetadataPrefix;
00306     private $SetSpec;
00307     private $DebugLevel;
00308     private $ResumptionToken;
00309     private $Cache;
00310     private $UsingCache;
00311     private $CacheSequenceNumber;
00312 
00313     # perform OAI query and return resulting data to caller
00314     private function PerformQuery($QueryVerb, $Args = NULL)
00315     {
00316         # open stream to OAI server
00317 
00318         if (strpos($this->ServerUrl, "?") === FALSE)
00319         {
00320             $QueryUrl = $this->ServerUrl."?verb=".$QueryVerb;
00321         }
00322         else
00323         {
00324             $QueryUrl = $this->ServerUrl."&verb=".$QueryVerb;
00325         }
00326 
00327         if ($Args)
00328         {
00329             foreach ($Args as $ArgName => $ArgValue)
00330             {
00331                 $QueryUrl .= "&".urlencode($ArgName)."=".urlencode($ArgValue);
00332             }
00333         }
00334         $FHndl = fopen($QueryUrl, "r");
00335 
00336         # if stream was successfully opened
00337         $Text = "";
00338         if ($FHndl !== FALSE)
00339         {
00340             # while lines left in response
00341             while (!feof($FHndl))
00342             {
00343                 # read line from server and add it to text to be parsed
00344                 $Text .= fread($FHndl, 10000000);
00345             }
00346         }
00347 
00348         # close OAI server stream
00349         fclose($FHndl);
00350 
00351         # return query result data to caller
00352         return $Text;
00353     }
00354 
00355     # set array value if available in simplexml object
00356     private function GetValFromXml($Xml, $SrcName, $DstName, &$Results)
00357     {
00358         if (isset($Xml->$SrcName))
00359         {
00360             $Results[$DstName] = trim($Xml->$SrcName);
00361         }
00362     }
00363 
00364     # print variable contents if debug is above specified level
00365     private function DebugOutVar($Level, $MethodName, $VarName, $VarValue)
00366     {
00367         if ($this->DebugLevel >= $Level)
00368         {
00369             print("\n<pre>".$MethodName."()  ".$VarName." = \n");
00370             print_r($VarValue);
00371             print("</pre>\n");
00372         }
00373     }
00374 
00375     # Recursively dump tags inside a metadata section, flattening them
00376     # as we go.
00377     private function DumpTagsRecursive(&$Records, $Index, $Parser, $ParentTagName=NULL)
00378     {
00379         $TagName = $Parser->GetTagName();
00380         do
00381         {
00382             $StorageTagName = ($ParentTagName!==NULL) ?
00383                 $ParentTagName."/".$TagName : $TagName;
00384 
00385             if ($Parser->SeekToChild() ){
00386                 $this->DumpTagsRecursive( $Records, $Index, $Parser, $StorageTagName );
00387                 $Parser->SeekToParent();
00388             }
00389             else
00390             {
00391                 $Records[$Index]["metadata"][$StorageTagName][] = $Parser->GetData();
00392             }
00393         } while ($TagName = $Parser->NextTag());
00394     }
00395 
00396     # Query has been sent, we need to retrieve records that came from it.
00397     private function GetRecordsFromXML($XmlText, $ParseTo ){
00398         # create XML parser and pass it text
00399         $Parser = new XMLParser();
00400         $Parser->ParseText($XmlText);
00401 
00402         $this->DebugOutVar(9, __METHOD__, "Parser", $Parser);
00403 
00404         # if records were found
00405         $Records = array();
00406         $ItemCount = $Parser->SeekTo("oai-pmh", $ParseTo, "record");
00407         if ($ItemCount)
00408         {
00409             # for each record
00410             $Index = 0;
00411             do
00412             {
00413                 # grab record identifier and date
00414                 $Records[$Index]["identifier"]=$Parser->GetData("header",
00415                                                                 "identifier");
00416                 $Records[$Index]["datestamp"]=$Parser->GetData("header",
00417                                                                "datestamp");
00418 
00419                 # grab metadata
00420                 $SeekResult = $Parser->SeekTo("metadata");
00421                 if ($SeekResult)
00422                 {
00423                     $SeekResult = $Parser->SeekToChild();
00424                     if ($SeekResult)
00425                     {
00426                         $Records[$Index]["format"] = $Parser->GetTagName();
00427                         $SeekResult = $Parser->SeekToChild();
00428                         if ($SeekResult)
00429                         {
00430                             $this->DumpTagsRecursive($Records, $Index, $Parser);
00431                             $Parser->SeekToParent();
00432                         }
00433                         $Parser->SeekToParent();
00434                     }
00435                     $Parser->SeekToParent();
00436                 }
00437 
00438                 # grab search info (if any)
00439                 $SeekResult = $Parser->SeekTo("about");
00440                 if ($SeekResult)
00441                 {
00442                     $SeekResult = $Parser->SeekTo("searchInfo");
00443                     if ($SeekResult)
00444                     {
00445                         $SeekResult = $Parser->SeekToChild();
00446                         if ($SeekResult)
00447                         {
00448                             $TagName = $Parser->GetTagName();
00449                             do
00450                             {
00451                                 $Records[$Index]["about"]["SEARCHINFO"][$TagName][] =
00452                                         $Parser->GetData();
00453                             } while ($TagName = $Parser->NextTag());
00454                             $Parser->SeekToParent();
00455                         }
00456                         $Parser->SeekToParent();
00457                     }
00458                     $Parser->SeekToParent();
00459                 }
00460 
00461                 $Index++;
00462             }
00463             while ($Parser->NextItem());
00464         }
00465 
00466         # look for resumption token and save if found
00467         $Parser->SeekToRoot();
00468         $SeekResult = $Parser->SeekTo(
00469                 "oai-pmh", "listrecords", "resumptiontoken");
00470         if ($SeekResult !== NULL)
00471         {
00472             $this->ResumptionToken = $Parser->GetData();
00473         }
00474         else
00475         {
00476             unset($this->ResumptionToken);
00477         }
00478 
00479         # return records to caller
00480         return $Records;
00481     }
00482 
00483 }
00484 
00485 ?>

CWIS logo doxygen
Copyright 2010 Internet Scout