CWIS Developer Documentation
RSSClient.php
Go to the documentation of this file.
1 <?PHP
2 
3 #
4 # FILE: Scout--RSSClient.php
5 #
6 # METHODS PROVIDED:
7 # RSSClient()
8 # - constructor
9 # SomeMethod($SomeParameter, $AnotherParameter)
10 # - short description of method
11 #
12 # AUTHOR: Edward Almasy
13 #
14 # Copyright 2005 Internet Scout Project
15 # http://scout.wisc.edu
16 #
17 
18 class RSSClient {
19 
20  # ---- PUBLIC INTERFACE --------------------------------------------------
21 
22  # object constructor
23  function RSSClient($ServerUrl, $CacheDB = NULL, $RefreshTime = 600, $Encoding = "UTF-8", $DebugLevel = 0)
24  {
25  # set default debug level
26  $this->DebugLevel = $DebugLevel;
27 
28  # set default encoding
29  $this->Encoding = $Encoding;
30 
31  # save cache details
32  $this->CacheDB = $CacheDB;
33  $this->RefreshTime = $RefreshTime;
34 
35  # query server (or cache) for XML text
36  $this->XmlText = $this->QueryServerWithCaching(
38 
39  # create XML parser and parse text
40  $this->Parser = new XMLParser($this->Encoding);
41  if ($this->DebugLevel > 3) { $Parser->SetDebugLevel($this->DebugLevel - 3); }
42  $this->Parser->ParseText($this->XmlText);
43 
44  if ($this->DebugLevel) { print("RSSClient->RSSClient() returned ".strlen($this->XmlText)." characters from server query<br>\n"); }
45  }
46 
47  # get/set server URL
48  function ServerUrl($NewValue = NULL)
49  {
50  # if new RSS server URL supplied
51  if (($NewValue != NULL) && ($NewValue != $this->ServerUrl))
52  {
53  # save new value
54  $this->ServerUrl = $NewValue;
55 
56  # re-read XML from server at new URL
57  $this->XmlText = $this->QueryServerWithCaching(
58  $NewValue,
59  $this->CacheDB,
60  $this->RefreshTime);
61 
62  # create new XML parser and parse text
63  $this->Parser = new XMLParser();
64  if ($this->DebugLevel > 3) { $Parser->SetDebugLevel($this->DebugLevel - 3); }
65  $this->Parser->ParseText($this->XmlText);
66  }
67 
68  # return RSS server URL to caller
69  return $this->ServerUrl;
70  }
71 
72  # get/set encoding
73  function Encoding($NewValue = NULL)
74  {
75  # if new encoding supplied
76  if (($NewValue != NULL) && ($NewValue != $this->Encoding))
77  {
78  # save new value
79  $this->Encoding = $NewValue;
80 
81  # re-read XML from server
82  $this->XmlText = $this->QueryServerWithCaching(
83  $this->ServerUrl,
84  $this->CacheDB,
85  $this->RefreshTime);
86 
87  # create new XML parser and parse text
88  $this->Parser = new XMLParser($this->Encoding);
89  if ($this->DebugLevel > 3) { $Parser->SetDebugLevel($this->DebugLevel - 3); }
90  $this->Parser->ParseText($this->XmlText);
91  }
92 
93  # return encoding to caller
94  return $this->Encoding;
95  }
96 
103  {
104  # if neither the XML file nor the HTTP response headers specify an
105  # encoding, there is an overwhelming chance that it's ISO-8859-1, so
106  # use it as the default
107  $Encoding = "ISO-8859-1";
108 
109  # only get up to the the encoding portion of the XML declartion
110  # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-prolog-dtd
111  $S = '[ \t\r\n]';
112  $Eq = "{$S}?={$S}?";
113  $VersionNum = '1.0';
114  $EncName = '[A-Za-z]([A-Za-z0-9._]|-)*';
115  $VersionInfo = "{$S}version{$Eq}('{$VersionNum}'|\"{$VersionNum}\")";
116  $EncodingDecl = "{$S}encoding{$Eq}('{$EncName}'|\"{$EncName}\")";
117  $XMLDecl = "<\?xml{$VersionInfo}({$EncodingDecl})?";
118  $RegEx = "/{$XMLDecl}/";
119 
120  # try to find the encoding, index 3 will be set if encoding is declared
121  preg_match($RegEx, $this->XmlText, $Matches);
122 
123  # give precedence to the encoding specified within the XML file since
124  # a RSS feed publisher might not have access to HTTP response headers
125  if (count($Matches) >= 4)
126  {
127  # also need to strip off the quotes
128  $Encoding = trim($Matches[3], "'\"");
129  }
130 
131  # then give precedence to the charset parameter in the Content-Type
132  # response header
133  else if ($this->CacheDB)
134  {
135  # create cache table if it doesn't exist
136  $DB = $this->CacheDB;
137  $ServerUrl = addslashes($this->ServerUrl);
138 
139  # get the cache value
140  $DB->Query("
141  SELECT * FROM RSSClientCache
142  WHERE ServerUrl = '".$ServerUrl."'");
143  $Exists = ($DB->NumRowsSelected() > 0);
144  $Cache = $DB->FetchRow();
145 
146  # if cached and charset parameter was given in the response headers
147  if ($Exists && strlen($Cache["Charset"]))
148  {
149  $Encoding = $Cache["Charset"];
150  }
151  }
152 
153  $this->Encoding($Encoding);
154  }
155 
156  # retrieve RSS items (from first channel if not otherwise specified)
157  function GetItems($NumberOfItems = NULL, $ChannelName = NULL)
158  {
159  # start by assuming no items will be found
160  $Items = array();
161 
162  # move parser to area in XML with items
164  $Parser->SeekToRoot();
165  $Result = $Parser->SeekTo("rss");
166  if ($Result === NULL)
167  {
168  $Result = $Parser->SeekTo("rdf:RDF");
169  }
170  else
171  {
172  $Parser->SeekTo("channel");
173  }
174 
175  # if items are found
176  $ItemCount = $Parser->SeekTo("item");
177  if ($ItemCount)
178  {
179  # for each record
180  $Index = 0;
181  do
182  {
183  # retrieve item info
184  $Items[$Index]["title"] = $Parser->GetData("title");
185  $Items[$Index]["description"] = $Parser->GetData("description");
186  $Items[$Index]["link"] = $Parser->GetData("link");
187  $Items[$Index]["enclosure"] = $Parser->GetAttributes("enclosure");
188 
189  $Index++;
190  }
191  while ($Parser->NextItem() && (($NumberOfItems == NULL) || ($Index < $NumberOfItems)));
192  }
193 
194  # return records to caller
195  return $Items;
196  }
197 
198  # retrieve site name as given in feed
199  function GetChannelTitle()
200  {
201  if (!isset($this->ChannelTitle)) { $this->LoadChannelInfo(); }
202  return $this->ChannelTitle;
203  }
204 
205  # retrieve site link as given in feed
206  function GetChannelLink()
207  {
208  if (!isset($this->ChannelLink)) { $this->LoadChannelInfo(); }
209  return $this->ChannelLink;
210  }
211 
212  # retrieve site description as given in feed
214  {
215  if (!isset($this->ChannelDescription)) { $this->LoadChannelInfo(); }
217  }
218 
219  # tell caller whether client is using cached data
220  function UsedCachedData()
221  {
223  }
224 
225 
226  # ---- PRIVATE INTERFACE -------------------------------------------------
227 
228  var $CacheDB;
232  var $SetSpec;
235  var $XmlText;
236  var $Parser;
241 
242  # set current debug output level (0-9)
243  function SetDebugLevel($NewLevel)
244  {
245  $this->DebugLevel = $NewLevel;
246  }
247 
257  function GetXmlInfo($Url)
258  {
259  $Text = @file_get_contents($Url);
260  $Type = NULL;
261  $Charset = NULL;
262 
263  # get the type and charset if the fetch was successful
264  if ($Text !== FALSE)
265  {
266  # this must come after file_get_contents() and before any other remote
267  # fetching is done
268  $Headers = $http_response_header;
269 
270  # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
271  $LWS = '([ \t]*|\r\n[ \t]+)';
272  $Token = '[!\x23-\x27*+-.\x30-\x39\x41-\x5A\x5E-\x7A|~]+';
273  $QuotedPair = '\\[\x00-\x7F]';
274  $QdText = "([^\\x00-\\x1F\\x7F\"]|{$LWS})";
275  $QuotedString = "\"({$QdText}|{$QuotedPair})*\"";
276  $Value = "({$Token}|{$QuotedString})";
277  $Parameter = "{$Token}{$LWS}={$LWS}{$Value}";
278 
279  # these make the Content-Type regex specific to Content-Type
280  # values with charset parameters in them, but make capturing
281  # the charset much easier
282  $BasicParameter = "(;{$LWS}{$Parameter})*";
283  $CharsetParameter = "(;{$LWS}charset{$LWS}={$LWS}{$Value})";
284  $ModParameter = "{$BasicParameter}{$CharsetParameter}{$BasicParameter}";
285  $MediaType = "({$Token}{$LWS}\\/{$LWS}{$Token}){$LWS}{$ModParameter}";
286 
287  # back to the spec
288  $ContentType = "Content-Type{$LWS}:{$LWS}{$MediaType}{$LWS}";
289  $RegEx = "/^{$ContentType}$/i";
290 
291  foreach ($Headers as $Header)
292  {
293  preg_match($RegEx, $Header, $Matches);
294 
295  if (isset($Matches[3]) && isset($Matches[19]))
296  {
297  $Type = $Matches[3];
298  $Charset = $Matches[19];
299  break;
300  }
301  }
302  }
303 
304  return array($Text, $Type, $Charset);
305  }
306 
307  # load RSS XML from server or cache
309  {
310  # save RSS server URL
311  $this->ServerUrl = $ServerUrl;
312 
313  # save caching info (if any)
314  if ($CacheDB)
315  {
316  $this->CacheDB = $CacheDB;
317  }
318 
319  # if caching info was supplied
320  if ($this->CacheDB)
321  {
322  $DB = $this->CacheDB;
323 
324  # look up cached information for this server
325  $QueryTimeCutoff = date("Y-m-d H:i:s", (time() - $RefreshTime));
326  $DB->Query("
327  SELECT * FROM RSSClientCache
328  WHERE ServerUrl = '".addslashes($ServerUrl)."'
329  AND LastQueryTime > '".$QueryTimeCutoff."'");
330 
331  # if we have cached info that has not expired
332  if ($CachedXml = $DB->FetchField("CachedXml"))
333  {
334  # use cached info
335  $QueryResult = $CachedXml;
336  $this->CachedDataWasUsed = TRUE;
337  }
338  else
339  {
340  $this->CachedDataWasUsed = FALSE;
341 
342  # query server for XML text
343  list($Text, $Type, $Charset) = $this->GetXmlInfo($ServerUrl);
344  $QueryResult = "";
345 
346  # if query was successful
347  if ($Text !== FALSE)
348  {
349  $QueryResult = $Text;
350 
351  # clear out any old cache entries
352  $DB->Query("
353  DELETE FROM RSSClientCache
354  WHERE ServerUrl = '".addslashes($ServerUrl)."'");
355 
356  # save info in cache
357  $DB->Query("
358  INSERT INTO RSSClientCache
359  (ServerUrl, CachedXml, Type, Charset, LastQueryTime)
360  VALUES (
361  '".addslashes($ServerUrl)."',
362  '".addslashes($Text)."',
363  '".addslashes($Type)."',
364  '".addslashes($Charset)."',
365  NOW())");
366  }
367  }
368  }
369 
370  # return query result to caller
371  return $QueryResult;
372  }
373 
374  function LoadChannelInfo()
375  {
377  $Parser->SeekToRoot();
378  $Result = $Parser->SeekTo("rss");
379  if ($Result === NULL)
380  {
381  $Result = $Parser->SeekTo("rdf:RDF");
382  }
383  $Parser->SeekTo("channel");
384  $this->ChannelTitle = $Parser->GetData("title");
385  $this->ChannelLink = $Parser->GetData("link");
386  $this->ChannelDescription = $Parser->GetData("description");
387  }
388 }