Search:

CWIS Developers Documentation

  • Main Page
  • Classes
  • Files
  • File List
  • File Members

RSSClient.php

Go to the documentation of this file.
00001 <?PHP
00002 
00003 #
00004 #   FILE:  Scout--RSSClient.php
00005 #
00006 #   METHODS PROVIDED:
00007 #       RSSClient()
00008 #           - constructor
00009 #       SomeMethod($SomeParameter, $AnotherParameter)
00010 #           - short description of method
00011 #
00012 #   AUTHOR:  Edward Almasy
00013 #
00014 #   Copyright 2005 Internet Scout Project
00015 #   http://scout.wisc.edu
00016 #
00017 
00018 class RSSClient {
00019 
00020     # ---- PUBLIC INTERFACE --------------------------------------------------
00021 
00022     # object constructor
00023     function RSSClient($ServerUrl, $CacheDB = NULL, $RefreshTime = 600, $Encoding = "UTF-8", $DebugLevel = 0)
00024     {
00025         # set default debug level
00026         $this->DebugLevel = $DebugLevel;
00027 
00028         # set default encoding
00029         $this->Encoding = $Encoding;
00030 
00031         # save cache details
00032         $this->CacheDB = $CacheDB;
00033         $this->RefreshTime = $RefreshTime;
00034 
00035         # query server (or cache) for XML text
00036         $this->XmlText = $this->QueryServerWithCaching(
00037             $ServerUrl, $CacheDB, $RefreshTime);
00038 
00039         # create XML parser and parse text
00040         $this->Parser = new XMLParser($this->Encoding);
00041         if ($this->DebugLevel > 3) {  $Parser->SetDebugLevel($this->DebugLevel - 3);  }
00042         $this->Parser->ParseText($this->XmlText);
00043 
00044         if ($this->DebugLevel) {  print("RSSClient->RSSClient() returned ".strlen($this->XmlText)." characters from server query<br>\n");  }
00045     }
00046 
00047     # get/set server URL
00048     function ServerUrl($NewValue = NULL)
00049     {
00050         # if new RSS server URL supplied
00051         if (($NewValue != NULL) && ($NewValue != $this->ServerUrl))
00052         {
00053             # save new value
00054             $this->ServerUrl = $NewValue;
00055 
00056             # re-read XML from server at new URL
00057             $this->XmlText = $this->QueryServerWithCaching(
00058                 $NewValue,
00059                 $this->CacheDB,
00060                 $this->RefreshTime);
00061 
00062             # create new XML parser and parse text
00063             $this->Parser = new XMLParser();
00064             if ($this->DebugLevel > 3) {  $Parser->SetDebugLevel($this->DebugLevel - 3);  }
00065             $this->Parser->ParseText($this->XmlText);
00066         }
00067 
00068         # return RSS server URL to caller
00069         return $this->ServerUrl;
00070     }
00071 
00072     # get/set encoding
00073     function Encoding($NewValue = NULL)
00074     {
00075         # if new encoding supplied
00076         if (($NewValue != NULL) && ($NewValue != $this->Encoding))
00077         {
00078             # save new value
00079             $this->Encoding = $NewValue;
00080 
00081             # re-read XML from server
00082             $this->XmlText = $this->QueryServerWithCaching(
00083                 $this->ServerUrl,
00084                 $this->CacheDB,
00085                 $this->RefreshTime);
00086 
00087             # create new XML parser and parse text
00088             $this->Parser = new XMLParser($this->Encoding);
00089             if ($this->DebugLevel > 3) {  $Parser->SetDebugLevel($this->DebugLevel - 3);  }
00090             $this->Parser->ParseText($this->XmlText);
00091         }
00092 
00093         # return encoding to caller
00094         return $this->Encoding;
00095     }
00096 
00102     function AutodetectEncoding()
00103     {
00104         # if neither the XML file nor the HTTP response headers specify an
00105         # encoding, there is an overwhelming chance that it's ISO-8859-1, so
00106         # use it as the default
00107         $Encoding = "ISO-8859-1";
00108 
00109         # only get up to the the encoding portion of the XML declartion
00110         # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-prolog-dtd
00111         $S = '[ \t\r\n]';
00112         $Eq = "{$S}?={$S}?";
00113         $VersionNum = '1.0';
00114         $EncName = '[A-Za-z]([A-Za-z0-9._]|-)*';
00115         $VersionInfo = "{$S}version{$Eq}('{$VersionNum}'|\"{$VersionNum}\")";
00116         $EncodingDecl = "{$S}encoding{$Eq}('{$EncName}'|\"{$EncName}\")";
00117         $XMLDecl = "<\?xml{$VersionInfo}({$EncodingDecl})?";
00118         $RegEx = "/{$XMLDecl}/";
00119 
00120         # try to find the encoding, index 3 will be set if encoding is declared
00121         preg_match($RegEx, $this->XmlText, $Matches);
00122 
00123         # give precedence to the encoding specified within the XML file since
00124         # a RSS feed publisher might not have access to HTTP response headers
00125         if (count($Matches) >= 4)
00126         {
00127             # also need to strip off the quotes
00128             $Encoding = trim($Matches[3], "'\"");
00129         }
00130 
00131         # then give precedence to the charset parameter in the Content-Type
00132         # response header
00133         else if ($this->CacheDB)
00134         {
00135             # create cache table if it doesn't exist
00136             $DB = $this->CacheDB;
00137             $ServerUrl = addslashes($this->ServerUrl);
00138 
00139             # get the cache value
00140             $DB->Query("
00141                 SELECT * FROM RSSClientCache
00142                 WHERE ServerUrl = '".$ServerUrl."'");
00143             $Exists = ($DB->NumRowsSelected() > 0);
00144             $Cache = $DB->FetchRow();
00145 
00146             # if cached and charset parameter was given in the response headers
00147             if ($Exists && strlen($Cache["Charset"]))
00148             {
00149                 $Encoding = $Cache["Charset"];
00150             }
00151         }
00152 
00153         $this->Encoding($Encoding);
00154     }
00155 
00156     # retrieve RSS items (from first channel if not otherwise specified)
00157     function GetItems($NumberOfItems = NULL, $ChannelName = NULL)
00158     {
00159         # start by assuming no items will be found
00160         $Items = array();
00161 
00162         # move parser to area in XML with items
00163         $Parser = $this->Parser;
00164         $Parser->SeekToRoot();
00165         $Result = $Parser->SeekTo("rss");
00166         if ($Result === NULL)
00167         {
00168             $Result = $Parser->SeekTo("rdf:RDF");
00169         }
00170         else
00171         {
00172             $Parser->SeekTo("channel");
00173         }
00174 
00175         # if items are found
00176         $ItemCount = $Parser->SeekTo("item");
00177         if ($ItemCount)
00178         {
00179             # for each record
00180             $Index = 0;
00181             do
00182             {
00183                 # retrieve item info
00184                 $Items[$Index]["title"] = $Parser->GetData("title");
00185                 $Items[$Index]["description"] = $Parser->GetData("description");
00186                 $Items[$Index]["link"] = $Parser->GetData("link");
00187                 $Items[$Index]["enclosure"] = $Parser->GetAttributes("enclosure");
00188 
00189                 $Index++;
00190             }
00191             while ($Parser->NextItem() && (($NumberOfItems == NULL) || ($Index < $NumberOfItems)));
00192         }
00193 
00194         # return records to caller
00195         return $Items;
00196     }
00197 
00198     # retrieve site name as given in feed
00199     function GetChannelTitle()
00200     {
00201         if (!isset($this->ChannelTitle)) {  $this->LoadChannelInfo();  }
00202         return $this->ChannelTitle;
00203     }
00204 
00205     # retrieve site link as given in feed
00206     function GetChannelLink()
00207     {
00208         if (!isset($this->ChannelLink)) {  $this->LoadChannelInfo();  }
00209         return $this->ChannelLink;
00210     }
00211 
00212     # retrieve site description as given in feed
00213     function GetChannelDescription()
00214     {
00215         if (!isset($this->ChannelDescription)) {  $this->LoadChannelInfo();  }
00216         return $this->ChannelDescription;
00217     }
00218 
00219     # tell caller whether client is using cached data
00220     function UsedCachedData()
00221     {
00222         return $this->CachedDataWasUsed;
00223     }
00224 
00225 
00226     # ---- PRIVATE INTERFACE -------------------------------------------------
00227 
00228     var $CacheDB;
00229     var $RefreshTime;
00230     var $ServerUrl;
00231     var $MetadataPrefix;
00232     var $SetSpec;
00233     var $DebugLevel;
00234     var $Encoding;
00235     var $XmlText;
00236     var $Parser;
00237     var $ChannelTitle;
00238     var $ChannelLink;
00239     var $ChannelDescription;
00240     var $CachedDataWasUsed;
00241 
00242     # set current debug output level (0-9)
00243     function SetDebugLevel($NewLevel)
00244     {
00245         $this->DebugLevel = $NewLevel;
00246     }
00247 
00257     function GetXmlInfo($Url)
00258     {
00259         $Text = @file_get_contents($Url);
00260         $Type = NULL;
00261         $Charset = NULL;
00262 
00263         # get the type and charset if the fetch was successful
00264         if ($Text !== FALSE)
00265         {
00266             # this must come after file_get_contents() and before any other remote
00267             # fetching is done
00268             $Headers = $http_response_header;
00269 
00270             # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
00271             $LWS = '([ \t]*|\r\n[ \t]+)';
00272             $Token = '[!\x23-\x27*+-.\x30-\x39\x41-\x5A\x5E-\x7A|~]+';
00273             $QuotedPair = '\\[\x00-\x7F]';
00274             $QdText = "([^\\x00-\\x1F\\x7F\"]|{$LWS})";
00275             $QuotedString = "\"({$QdText}|{$QuotedPair})*\"";
00276             $Value = "({$Token}|{$QuotedString})";
00277             $Parameter = "{$Token}{$LWS}={$LWS}{$Value}";
00278 
00279             # these make the Content-Type regex specific to Content-Type
00280             # values with charset parameters in them, but make capturing
00281             # the charset much easier
00282             $BasicParameter = "(;{$LWS}{$Parameter})*";
00283             $CharsetParameter = "(;{$LWS}charset{$LWS}={$LWS}{$Value})";
00284             $ModParameter = "{$BasicParameter}{$CharsetParameter}{$BasicParameter}";
00285             $MediaType = "({$Token}{$LWS}\\/{$LWS}{$Token}){$LWS}{$ModParameter}";
00286 
00287             # back to the spec
00288             $ContentType = "Content-Type{$LWS}:{$LWS}{$MediaType}{$LWS}";
00289             $RegEx = "/^{$ContentType}$/i";
00290 
00291             foreach ($Headers as $Header)
00292             {
00293                 preg_match($RegEx, $Header, $Matches);
00294 
00295                 if (isset($Matches[3]) && isset($Matches[19]))
00296                 {
00297                     $Type = $Matches[3];
00298                     $Charset = $Matches[19];
00299                     break;
00300                 }
00301             }
00302         }
00303 
00304         return array($Text, $Type, $Charset);
00305     }
00306 
00307     # load RSS XML from server or cache
00308     function QueryServerWithCaching($ServerUrl, $CacheDB, $RefreshTime)
00309     {
00310         # save RSS server URL
00311         $this->ServerUrl = $ServerUrl;
00312 
00313         # save caching info (if any)
00314         if ($CacheDB)
00315         {
00316             $this->CacheDB = $CacheDB;
00317         }
00318 
00319         # if caching info was supplied
00320         if ($this->CacheDB)
00321         {
00322             $DB = $this->CacheDB;
00323 
00324             # look up cached information for this server
00325             $QueryTimeCutoff = date("Y-m-d H:i:s", (time() - $RefreshTime));
00326             $DB->Query("
00327                 SELECT * FROM RSSClientCache
00328                 WHERE ServerUrl = '".addslashes($ServerUrl)."'
00329                 AND LastQueryTime > '".$QueryTimeCutoff."'");
00330 
00331             # if we have cached info that has not expired
00332             if ($CachedXml = $DB->FetchField("CachedXml"))
00333             {
00334                 # use cached info
00335                 $QueryResult = $CachedXml;
00336                 $this->CachedDataWasUsed = TRUE;
00337             }
00338             else
00339             {
00340                 $this->CachedDataWasUsed = FALSE;
00341 
00342                 # query server for XML text
00343                 list($Text, $Type, $Charset) = $this->GetXmlInfo($ServerUrl);
00344                 $QueryResult = "";
00345 
00346                 # if query was successful
00347                 if ($Text !== FALSE)
00348                 {
00349                     $QueryResult = $Text;
00350 
00351                     # clear out any old cache entries
00352                     $DB->Query("
00353                         DELETE FROM RSSClientCache
00354                         WHERE ServerUrl = '".addslashes($ServerUrl)."'");
00355 
00356                     # save info in cache
00357                     $DB->Query("
00358                         INSERT INTO RSSClientCache
00359                         (ServerUrl, CachedXml, Type, Charset, LastQueryTime)
00360                         VALUES (
00361                             '".addslashes($ServerUrl)."',
00362                             '".addslashes($Text)."',
00363                             '".addslashes($Type)."',
00364                             '".addslashes($Charset)."',
00365                             NOW())");
00366                 }
00367             }
00368         }
00369 
00370         # return query result to caller
00371         return $QueryResult;
00372     }
00373 
00374     function LoadChannelInfo()
00375     {
00376         $Parser = $this->Parser;
00377         $Parser->SeekToRoot();
00378         $Result = $Parser->SeekTo("rss");
00379         if ($Result === NULL)
00380         {
00381             $Result = $Parser->SeekTo("rdf:RDF");
00382         }
00383         $Parser->SeekTo("channel");
00384         $this->ChannelTitle = $Parser->GetData("title");
00385         $this->ChannelLink = $Parser->GetData("link");
00386         $this->ChannelDescription = $Parser->GetData("description");
00387     }
00388 }

CWIS logo doxygen
Copyright 2010 Internet Scout