CWIS Developer Documentation
RSSClient.php
Go to the documentation of this file.
1 <?PHP
2 #
3 # FILE: RSSClient.php
4 #
5 # Part of the ScoutLib application support library
6 # Copyright 2002-2013 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu/
8 #
9 
13 class RSSClient
14 {
15  # ---- PUBLIC INTERFACE --------------------------------------------------
16 
29  public function __construct($ServerUrl, $CacheDB = NULL, $RefreshTime = 600,
30  $Encoding = "UTF-8", $DebugLevel = 0)
31  {
32  # set default debug level
33  $this->DebugLevel = $DebugLevel;
34 
35  # set default encoding
36  $this->Encoding = $Encoding;
37 
38  # save cache details
39  $this->CacheDB = $CacheDB;
40  $this->RefreshTime = $RefreshTime;
41 
42  # query server (or cache) for XML text
43  $this->XmlText = $this->QueryServerWithCaching(
44  $ServerUrl, $CacheDB, $RefreshTime);
45 
46  # create XML parser and parse text
47  $this->Parser = new XMLParser($this->Encoding);
48  if ($this->DebugLevel > 3)
49  {
50  $this->Parser->SetDebugLevel($this->DebugLevel - 3);
51  }
52  $this->Parser->ParseText($this->XmlText);
53 
54  if ($this->DebugLevel)
55  {
56  print("RSSClient->RSSClient() returned ".strlen($this->XmlText)
57  ." characters from server query<br>\n");
58  }
59  }
60 
66  public function ServerUrl($NewValue = NULL)
67  {
68  # if new RSS server URL supplied
69  if (($NewValue != NULL) && ($NewValue != $this->ServerUrl))
70  {
71  # save new value
72  $this->ServerUrl = $NewValue;
73 
74  # re-read XML from server at new URL
75  $this->XmlText = $this->QueryServerWithCaching(
76  $NewValue,
77  $this->CacheDB,
78  $this->RefreshTime);
79 
80  # create new XML parser and parse text
81  $this->Parser = new XMLParser();
82  if ($this->DebugLevel > 3)
83  {
84  $this->Parser->SetDebugLevel($this->DebugLevel - 3);
85  }
86  $this->Parser->ParseText($this->XmlText);
87  }
88 
89  # return RSS server URL to caller
90  return $this->ServerUrl;
91  }
92 
99  public function Encoding($NewValue = NULL)
100  {
101  # if new encoding supplied
102  if (($NewValue != NULL) && ($NewValue != $this->Encoding))
103  {
104  # save new value
105  $this->Encoding = $NewValue;
106 
107  # re-read XML from server
108  $this->XmlText = $this->QueryServerWithCaching(
109  $this->ServerUrl,
110  $this->CacheDB,
111  $this->RefreshTime);
112 
113  # create new XML parser and parse text
114  $this->Parser = new XMLParser($this->Encoding);
115  if ($this->DebugLevel > 3)
116  {
117  $this->Parser->SetDebugLevel($this->DebugLevel - 3);
118  }
119  $this->Parser->ParseText($this->XmlText);
120  }
121 
122  # return encoding to caller
123  return $this->Encoding;
124  }
125 
131  public function AutodetectEncoding()
132  {
133  # if neither the XML file nor the HTTP response headers specify an
134  # encoding, there is an overwhelming chance that it's ISO-8859-1, so
135  # use it as the default
136  $Encoding = "ISO-8859-1";
137 
138  # only get up to the the encoding portion of the XML declartion
139  # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-prolog-dtd
140  $S = '[ \t\r\n]';
141  $Eq = "{$S}?={$S}?";
142  $VersionNum = '1.0';
143  $EncName = '[A-Za-z]([A-Za-z0-9._]|-)*';
144  $VersionInfo = "{$S}version{$Eq}('{$VersionNum}'|\"{$VersionNum}\")";
145  $EncodingDecl = "{$S}encoding{$Eq}('{$EncName}'|\"{$EncName}\")";
146  $XMLDecl = "<\?xml{$VersionInfo}({$EncodingDecl})?";
147  $RegEx = "/{$XMLDecl}/";
148 
149  # try to find the encoding, index 3 will be set if encoding is declared
150  preg_match($RegEx, $this->XmlText, $Matches);
151 
152  # give precedence to the encoding specified within the XML file since
153  # a RSS feed publisher might not have access to HTTP response headers
154  if (count($Matches) >= 4)
155  {
156  # also need to strip off the quotes
157  $Encoding = trim($Matches[3], "'\"");
158  }
159 
160  # then give precedence to the charset parameter in the Content-Type
161  # response header
162  else if ($this->CacheDB)
163  {
164  # create cache table if it doesn't exist
165  $DB = $this->CacheDB;
166  $ServerUrl = addslashes($this->ServerUrl);
167 
168  # get the cache value
169  $DB->Query("
170  SELECT * FROM RSSClientCache
171  WHERE ServerUrl = '".$ServerUrl."'");
172  $Exists = ($DB->NumRowsSelected() > 0);
173  $Cache = $DB->FetchRow();
174 
175  # if cached and charset parameter was given in the response headers
176  if ($Exists && strlen($Cache["Charset"]))
177  {
178  $Encoding = $Cache["Charset"];
179  }
180  }
181 
182  $this->Encoding($Encoding);
183  }
184 
193  public function GetItems($NumberOfItems = NULL, $ChannelName = NULL)
194  {
195  # start by assuming no items will be found
196  $Items = array();
197 
198  # move parser to area in XML with items
199  $Parser = $this->Parser;
200  $Parser->SeekToRoot();
201  $Result = $Parser->SeekTo("rss");
202  if ($Result === NULL)
203  {
204  $Result = $Parser->SeekTo("rdf:RDF");
205  }
206  else
207  {
208  $Parser->SeekTo("channel");
209  }
210 
211  # if items are found
212  $ItemCount = $Parser->SeekTo("item");
213  if ($ItemCount)
214  {
215  # for each record
216  $Index = 0;
217  do
218  {
219  # retrieve item info
220  $Items[$Index]["title"] = $Parser->GetData("title");
221  $Items[$Index]["description"] = $Parser->GetData("description");
222  $Items[$Index]["link"] = $Parser->GetData("link");
223  $Items[$Index]["enclosure"] = $Parser->GetAttributes("enclosure");
224 
225  $Index++;
226  }
227  while ($Parser->NextItem()
228  && (($NumberOfItems == NULL) || ($Index < $NumberOfItems)));
229  }
230 
231  # return records to caller
232  return $Items;
233  }
234 
239  public function GetChannelTitle()
240  {
241  if (!isset($this->ChannelTitle)) { $this->LoadChannelInfo(); }
242  return $this->ChannelTitle;
243  }
244 
249  public function GetChannelLink()
250  {
251  if (!isset($this->ChannelLink)) { $this->LoadChannelInfo(); }
252  return $this->ChannelLink;
253  }
254 
259  public function GetChannelDescription()
260  {
261  if (!isset($this->ChannelDescription)) { $this->LoadChannelInfo(); }
262  return $this->ChannelDescription;
263  }
264 
269  public function UsedCachedData()
270  {
271  return $this->CachedDataWasUsed;
272  }
273 
274  # ---- PRIVATE INTERFACE -------------------------------------------------
275 
276  private $CacheDB;
277  private $RefreshTime;
278  private $ServerUrl;
279  private $MetadataPrefix;
280  private $SetSpec;
281  private $DebugLevel;
282  private $Encoding;
283  private $XmlText;
284  private $Parser;
285  private $ChannelTitle;
286  private $ChannelLink;
287  private $ChannelDescription;
288  private $CachedDataWasUsed;
289 
295  private function SetDebugLevel($NewLevel)
296  {
297  $this->DebugLevel = $NewLevel;
298  }
299 
308  private function GetXmlInfo($Url)
309  {
310  $Text = @file_get_contents($Url);
311  $Type = NULL;
312  $Charset = NULL;
313 
314  # get the type and charset if the fetch was successful
315  if ($Text !== FALSE)
316  {
317  # this must come after file_get_contents() and before any other remote
318  # fetching is done
319  $Headers = $http_response_header;
320 
321  # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
322  $LWS = '([ \t]*|\r\n[ \t]+)';
323  $Token = '[!\x23-\x27*+-.\x30-\x39\x41-\x5A\x5E-\x7A|~]+';
324  $QuotedPair = '\\[\x00-\x7F]';
325  $QdText = "([^\\x00-\\x1F\\x7F\"]|{$LWS})";
326  $QuotedString = "\"({$QdText}|{$QuotedPair})*\"";
327  $Value = "({$Token}|{$QuotedString})";
328  $Parameter = "{$Token}{$LWS}={$LWS}{$Value}";
329 
330  # these make the Content-Type regex specific to Content-Type
331  # values with charset parameters in them, but make capturing
332  # the charset much easier
333  $BasicParameter = "(;{$LWS}{$Parameter})*";
334  $CharsetParameter = "(;{$LWS}charset{$LWS}={$LWS}{$Value})";
335  $ModParameter = "{$BasicParameter}{$CharsetParameter}{$BasicParameter}";
336  $MediaType = "({$Token}{$LWS}\\/{$LWS}{$Token}){$LWS}{$ModParameter}";
337 
338  # back to the spec
339  $ContentType = "Content-Type{$LWS}:{$LWS}{$MediaType}{$LWS}";
340  $RegEx = "/^{$ContentType}$/i";
341 
342  foreach ($Headers as $Header)
343  {
344  preg_match($RegEx, $Header, $Matches);
345 
346  if (isset($Matches[3]) && isset($Matches[19]))
347  {
348  $Type = $Matches[3];
349  $Charset = $Matches[19];
350  break;
351  }
352  }
353  }
354 
355  return array($Text, $Type, $Charset);
356  }
357 
368  private function QueryServerWithCaching($ServerUrl, $CacheDB, $RefreshTime)
369  {
370  # save RSS server URL
371  $this->ServerUrl = $ServerUrl;
372 
373  # save caching info (if any)
374  if ($CacheDB)
375  {
376  $this->CacheDB = $CacheDB;
377  }
378 
379  # if caching info was supplied
380  if ($this->CacheDB)
381  {
382  $DB = $this->CacheDB;
383 
384  # look up cached information for this server
385  $QueryTimeCutoff = date("Y-m-d H:i:s", (time() - $RefreshTime));
386  $DB->Query("
387  SELECT * FROM RSSClientCache
388  WHERE ServerUrl = '".addslashes($ServerUrl)."'
389  AND LastQueryTime > '".$QueryTimeCutoff."'");
390 
391  # if we have cached info that has not expired
392  if ($CachedXml = $DB->FetchField("CachedXml"))
393  {
394  # use cached info
395  $QueryResult = $CachedXml;
396  $this->CachedDataWasUsed = TRUE;
397  }
398  else
399  {
400  $this->CachedDataWasUsed = FALSE;
401 
402  # query server for XML text
403  list($Text, $Type, $Charset) = $this->GetXmlInfo($ServerUrl);
404  $QueryResult = "";
405 
406  # if query was successful
407  if ($Text !== FALSE)
408  {
409  $QueryResult = $Text;
410 
411  # clear out any old cache entries
412  $DB->Query("
413  DELETE FROM RSSClientCache
414  WHERE ServerUrl = '".addslashes($ServerUrl)."'");
415 
416  # save info in cache
417  $DB->Query("
418  INSERT INTO RSSClientCache
419  (ServerUrl, CachedXml, Type, Charset, LastQueryTime)
420  VALUES (
421  '".addslashes($ServerUrl)."',
422  '".addslashes($Text)."',
423  '".addslashes($Type)."',
424  '".addslashes($Charset)."',
425  NOW())");
426  }
427  }
428  }
429 
430  # return query result to caller
431  return $QueryResult;
432  }
433 
438  private function LoadChannelInfo()
439  {
440  $Parser = $this->Parser;
441  $Parser->SeekToRoot();
442  $Result = $Parser->SeekTo("rss");
443  if ($Result === NULL)
444  {
445  $Result = $Parser->SeekTo("rdf:RDF");
446  }
447  $Parser->SeekTo("channel");
448  $this->ChannelTitle = $Parser->GetData("title");
449  $this->ChannelLink = $Parser->GetData("link");
450  $this->ChannelDescription = $Parser->GetData("description");
451  }
452 }
UsedCachedData()
Determine whether the RSS client is using cached data.
Definition: RSSClient.php:269
GetItems($NumberOfItems=NULL, $ChannelName=NULL)
Retrieve the RSS items from the RSS feed.
Definition: RSSClient.php:193
GetChannelTitle()
Retrieve the channel title as given in the RSS feed.
Definition: RSSClient.php:239
Implements an RSS client for fetching, parsing, and caching RSS feeds.
Definition: RSSClient.php:13
GetChannelLink()
Retrive the URL to the site of the channel in the RSS feed.
Definition: RSSClient.php:249
ServerUrl($NewValue=NULL)
Get or set the RSS feed URL.
Definition: RSSClient.php:66
Encoding($NewValue=NULL)
Get or set the character encoding of the RSS feed.
Definition: RSSClient.php:99
AutodetectEncoding()
Try to automatically detect and set the encoding of the RSS feed.
Definition: RSSClient.php:131
__construct($ServerUrl, $CacheDB=NULL, $RefreshTime=600, $Encoding="UTF-8", $DebugLevel=0)
Object constructor.
Definition: RSSClient.php:29
GetChannelDescription()
Get the description of the channel as given in the RSS feed.
Definition: RSSClient.php:259