5 # Part of the ScoutLib application support library 6 # Copyright 2002-2013 Edward Almasy and Internet Scout Research Group 7 # http://scout.wisc.edu/ 15 # ---- PUBLIC INTERFACE -------------------------------------------------- 29 public function __construct($ServerUrl, $CacheDB = NULL, $RefreshTime = 600,
30 $Encoding =
"UTF-8", $DebugLevel = 0)
32 # set default debug level 33 $this->DebugLevel = $DebugLevel;
35 # set default encoding 39 $this->CacheDB = $CacheDB;
40 $this->RefreshTime = $RefreshTime;
42 # query server (or cache) for XML text 43 $this->XmlText = $this->QueryServerWithCaching(
44 $ServerUrl, $CacheDB, $RefreshTime);
46 # create XML parser and parse text 48 if ($this->DebugLevel > 3) { $Parser->SetDebugLevel($this->DebugLevel - 3); }
49 $this->Parser->ParseText($this->XmlText);
51 if ($this->DebugLevel)
53 print(
"RSSClient->RSSClient() returned ".strlen($this->XmlText)
54 .
" characters from server query<br>\n");
65 # if new RSS server URL supplied 66 if (($NewValue != NULL) && ($NewValue != $this->
ServerUrl))
71 # re-read XML from server at new URL 72 $this->XmlText = $this->QueryServerWithCaching(
77 # create new XML parser and parse text 79 if ($this->DebugLevel > 3)
81 $Parser->SetDebugLevel($this->DebugLevel - 3);
83 $this->Parser->ParseText($this->XmlText);
86 # return RSS server URL to caller 87 return $this->ServerUrl;
98 # if new encoding supplied 99 if (($NewValue != NULL) && ($NewValue != $this->
Encoding))
104 # re-read XML from server 105 $this->XmlText = $this->QueryServerWithCaching(
110 # create new XML parser and parse text 112 if ($this->DebugLevel > 3)
114 $Parser->SetDebugLevel($this->DebugLevel - 3);
116 $this->Parser->ParseText($this->XmlText);
119 # return encoding to caller 120 return $this->Encoding;
130 # if neither the XML file nor the HTTP response headers specify an 131 # encoding, there is an overwhelming chance that it's ISO-8859-1, so 132 # use it as the default 133 $Encoding =
"ISO-8859-1";
135 # only get up to the the encoding portion of the XML declartion 136 # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-prolog-dtd 140 $EncName =
'[A-Za-z]([A-Za-z0-9._]|-)*';
141 $VersionInfo =
"{$S}version{$Eq}('{$VersionNum}'|\"{$VersionNum}\")";
142 $EncodingDecl =
"{$S}encoding{$Eq}('{$EncName}'|\"{$EncName}\")";
143 $XMLDecl =
"<\?xml{$VersionInfo}({$EncodingDecl})?";
144 $RegEx =
"/{$XMLDecl}/";
146 # try to find the encoding, index 3 will be set if encoding is declared 147 preg_match($RegEx, $this->XmlText, $Matches);
149 # give precedence to the encoding specified within the XML file since 150 # a RSS feed publisher might not have access to HTTP response headers 151 if (count($Matches) >= 4)
153 # also need to strip off the quotes 154 $Encoding = trim($Matches[3],
"'\"");
157 # then give precedence to the charset parameter in the Content-Type 159 else if ($this->CacheDB)
161 # create cache table if it doesn't exist 162 $DB = $this->CacheDB;
163 $ServerUrl = addslashes($this->
ServerUrl);
165 # get the cache value 167 SELECT * FROM RSSClientCache 168 WHERE ServerUrl = '".$ServerUrl.
"'");
169 $Exists = ($DB->NumRowsSelected() > 0);
170 $Cache = $DB->FetchRow();
172 # if cached and charset parameter was given in the response headers 173 if ($Exists && strlen($Cache[
"Charset"]))
175 $Encoding = $Cache[
"Charset"];
190 public function GetItems($NumberOfItems = NULL, $ChannelName = NULL)
192 # start by assuming no items will be found 195 # move parser to area in XML with items 196 $Parser = $this->Parser;
197 $Parser->SeekToRoot();
198 $Result = $Parser->SeekTo(
"rss");
199 if ($Result === NULL)
201 $Result = $Parser->SeekTo(
"rdf:RDF");
205 $Parser->SeekTo(
"channel");
209 $ItemCount = $Parser->SeekTo(
"item");
217 $Items[$Index][
"title"] = $Parser->GetData(
"title");
218 $Items[$Index][
"description"] = $Parser->GetData(
"description");
219 $Items[$Index][
"link"] = $Parser->GetData(
"link");
220 $Items[$Index][
"enclosure"] = $Parser->GetAttributes(
"enclosure");
224 while ($Parser->NextItem()
225 && (($NumberOfItems == NULL) || ($Index < $NumberOfItems)));
228 # return records to caller 238 if (!isset($this->ChannelTitle)) { $this->LoadChannelInfo(); }
239 return $this->ChannelTitle;
248 if (!isset($this->ChannelLink)) { $this->LoadChannelInfo(); }
249 return $this->ChannelLink;
258 if (!isset($this->ChannelDescription)) { $this->LoadChannelInfo(); }
259 return $this->ChannelDescription;
268 return $this->CachedDataWasUsed;
271 # ---- PRIVATE INTERFACE ------------------------------------------------- 274 private $RefreshTime;
276 private $MetadataPrefix;
282 private $ChannelTitle;
283 private $ChannelLink;
284 private $ChannelDescription;
285 private $CachedDataWasUsed;
292 private function SetDebugLevel($NewLevel)
294 $this->DebugLevel = $NewLevel;
305 private function GetXmlInfo($Url)
307 $Text = @file_get_contents($Url);
311 # get the type and charset if the fetch was successful 314 # this must come after file_get_contents() and before any other remote 316 $Headers = $http_response_header;
318 # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17 319 $LWS =
'([ \t]*|\r\n[ \t]+)';
320 $Token =
'[!\x23-\x27*+-.\x30-\x39\x41-\x5A\x5E-\x7A|~]+';
321 $QuotedPair =
'\\[\x00-\x7F]';
322 $QdText =
"([^\\x00-\\x1F\\x7F\"]|{$LWS})";
323 $QuotedString =
"\"({$QdText}|{$QuotedPair})*\"";
324 $Value =
"({$Token}|{$QuotedString})";
325 $Parameter =
"{$Token}{$LWS}={$LWS}{$Value}";
327 # these make the Content-Type regex specific to Content-Type 328 # values with charset parameters in them, but make capturing 329 # the charset much easier 330 $BasicParameter =
"(;{$LWS}{$Parameter})*";
331 $CharsetParameter =
"(;{$LWS}charset{$LWS}={$LWS}{$Value})";
332 $ModParameter =
"{$BasicParameter}{$CharsetParameter}{$BasicParameter}";
333 $MediaType =
"({$Token}{$LWS}\\/{$LWS}{$Token}){$LWS}{$ModParameter}";
336 $ContentType =
"Content-Type{$LWS}:{$LWS}{$MediaType}{$LWS}";
337 $RegEx =
"/^{$ContentType}$/i";
339 foreach ($Headers as $Header)
341 preg_match($RegEx, $Header, $Matches);
343 if (isset($Matches[3]) && isset($Matches[19]))
346 $Charset = $Matches[19];
352 return array($Text, $Type, $Charset);
365 private function QueryServerWithCaching($ServerUrl, $CacheDB, $RefreshTime)
367 # save RSS server URL 370 # save caching info (if any) 373 $this->CacheDB = $CacheDB;
376 # if caching info was supplied 379 $DB = $this->CacheDB;
381 # look up cached information for this server 382 $QueryTimeCutoff = date(
"Y-m-d H:i:s", (time() - $RefreshTime));
384 SELECT * FROM RSSClientCache 385 WHERE ServerUrl = '".addslashes($ServerUrl).
"' 386 AND LastQueryTime > '".$QueryTimeCutoff.
"'");
388 # if we have cached info that has not expired 389 if ($CachedXml = $DB->FetchField(
"CachedXml"))
392 $QueryResult = $CachedXml;
393 $this->CachedDataWasUsed = TRUE;
397 $this->CachedDataWasUsed = FALSE;
399 # query server for XML text 400 list($Text, $Type, $Charset) = $this->GetXmlInfo($ServerUrl);
403 # if query was successful 406 $QueryResult = $Text;
408 # clear out any old cache entries 410 DELETE FROM RSSClientCache 411 WHERE ServerUrl = '".addslashes($ServerUrl).
"'");
415 INSERT INTO RSSClientCache 416 (ServerUrl, CachedXml, Type, Charset, LastQueryTime) 418 '".addslashes($ServerUrl).
"', 419 '".addslashes($Text).
"', 420 '".addslashes($Type).
"', 421 '".addslashes($Charset).
"', 427 # return query result to caller 435 private function LoadChannelInfo()
437 $Parser = $this->Parser;
438 $Parser->SeekToRoot();
439 $Result = $Parser->SeekTo(
"rss");
440 if ($Result === NULL)
442 $Result = $Parser->SeekTo(
"rdf:RDF");
444 $Parser->SeekTo(
"channel");
445 $this->ChannelTitle = $Parser->GetData(
"title");
446 $this->ChannelLink = $Parser->GetData(
"link");
447 $this->ChannelDescription = $Parser->GetData(
"description");