5 # Part of the ScoutLib application support library 6 # Copyright 2002-2013 Edward Almasy and Internet Scout Research Group 7 # http://scout.wisc.edu/ 15 # ---- PUBLIC INTERFACE -------------------------------------------------- 29 public function __construct($ServerUrl, $CacheDB = NULL, $RefreshTime = 600,
30 $Encoding =
"UTF-8", $DebugLevel = 0)
32 # set default debug level 33 $this->DebugLevel = $DebugLevel;
35 # set default encoding 39 $this->CacheDB = $CacheDB;
40 $this->RefreshTime = $RefreshTime;
42 # query server (or cache) for XML text 43 $this->XmlText = $this->QueryServerWithCaching(
44 $ServerUrl, $CacheDB, $RefreshTime);
46 # create XML parser and parse text 48 if ($this->DebugLevel > 3)
50 $this->Parser->SetDebugLevel($this->DebugLevel - 3);
52 $this->Parser->ParseText($this->XmlText);
54 if ($this->DebugLevel)
56 print(
"RSSClient->RSSClient() returned ".strlen($this->XmlText)
57 .
" characters from server query<br>\n");
68 # if new RSS server URL supplied 69 if (($NewValue != NULL) && ($NewValue != $this->
ServerUrl))
74 # re-read XML from server at new URL 75 $this->XmlText = $this->QueryServerWithCaching(
80 # create new XML parser and parse text 82 if ($this->DebugLevel > 3)
84 $this->Parser->SetDebugLevel($this->DebugLevel - 3);
86 $this->Parser->ParseText($this->XmlText);
89 # return RSS server URL to caller 90 return $this->ServerUrl;
101 # if new encoding supplied 102 if (($NewValue != NULL) && ($NewValue != $this->
Encoding))
107 # re-read XML from server 108 $this->XmlText = $this->QueryServerWithCaching(
113 # create new XML parser and parse text 115 if ($this->DebugLevel > 3)
117 $this->Parser->SetDebugLevel($this->DebugLevel - 3);
119 $this->Parser->ParseText($this->XmlText);
122 # return encoding to caller 123 return $this->Encoding;
133 # if neither the XML file nor the HTTP response headers specify an 134 # encoding, there is an overwhelming chance that it's ISO-8859-1, so 135 # use it as the default 136 $Encoding =
"ISO-8859-1";
138 # only get up to the the encoding portion of the XML declartion 139 # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-prolog-dtd 143 $EncName =
'[A-Za-z]([A-Za-z0-9._]|-)*';
144 $VersionInfo =
"{$S}version{$Eq}('{$VersionNum}'|\"{$VersionNum}\")";
145 $EncodingDecl =
"{$S}encoding{$Eq}('{$EncName}'|\"{$EncName}\")";
146 $XMLDecl =
"<\?xml{$VersionInfo}({$EncodingDecl})?";
147 $RegEx =
"/{$XMLDecl}/";
149 # try to find the encoding, index 3 will be set if encoding is declared 150 preg_match($RegEx, $this->XmlText, $Matches);
152 # give precedence to the encoding specified within the XML file since 153 # a RSS feed publisher might not have access to HTTP response headers 154 if (count($Matches) >= 4)
156 # also need to strip off the quotes 157 $Encoding = trim($Matches[3],
"'\"");
160 # then give precedence to the charset parameter in the Content-Type 162 else if ($this->CacheDB)
164 # create cache table if it doesn't exist 165 $DB = $this->CacheDB;
166 $ServerUrl = addslashes($this->
ServerUrl);
168 # get the cache value 170 SELECT * FROM RSSClientCache 171 WHERE ServerUrl = '".$ServerUrl.
"'");
172 $Exists = ($DB->NumRowsSelected() > 0);
173 $Cache = $DB->FetchRow();
175 # if cached and charset parameter was given in the response headers 176 if ($Exists && strlen($Cache[
"Charset"]))
178 $Encoding = $Cache[
"Charset"];
193 public function GetItems($NumberOfItems = NULL, $ChannelName = NULL)
195 # start by assuming no items will be found 198 # move parser to area in XML with items 199 $Parser = $this->Parser;
200 $Parser->SeekToRoot();
201 $Result = $Parser->SeekTo(
"rss");
202 if ($Result === NULL)
204 $Result = $Parser->SeekTo(
"rdf:RDF");
208 $Parser->SeekTo(
"channel");
212 $ItemCount = $Parser->SeekTo(
"item");
220 $Items[$Index][
"title"] = $Parser->GetData(
"title");
221 $Items[$Index][
"description"] = $Parser->GetData(
"description");
222 $Items[$Index][
"link"] = $Parser->GetData(
"link");
223 $Items[$Index][
"enclosure"] = $Parser->GetAttributes(
"enclosure");
227 while ($Parser->NextItem()
228 && (($NumberOfItems == NULL) || ($Index < $NumberOfItems)));
231 # return records to caller 241 if (!isset($this->ChannelTitle)) { $this->LoadChannelInfo(); }
242 return $this->ChannelTitle;
251 if (!isset($this->ChannelLink)) { $this->LoadChannelInfo(); }
252 return $this->ChannelLink;
261 if (!isset($this->ChannelDescription)) { $this->LoadChannelInfo(); }
262 return $this->ChannelDescription;
271 return $this->CachedDataWasUsed;
274 # ---- PRIVATE INTERFACE ------------------------------------------------- 277 private $RefreshTime;
279 private $MetadataPrefix;
285 private $ChannelTitle;
286 private $ChannelLink;
287 private $ChannelDescription;
288 private $CachedDataWasUsed;
295 private function SetDebugLevel($NewLevel)
297 $this->DebugLevel = $NewLevel;
308 private function GetXmlInfo($Url)
310 $Text = @file_get_contents($Url);
314 # get the type and charset if the fetch was successful 317 # this must come after file_get_contents() and before any other remote 319 $Headers = $http_response_header;
321 # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17 322 $LWS =
'([ \t]*|\r\n[ \t]+)';
323 $Token =
'[!\x23-\x27*+-.\x30-\x39\x41-\x5A\x5E-\x7A|~]+';
324 $QuotedPair =
'\\[\x00-\x7F]';
325 $QdText =
"([^\\x00-\\x1F\\x7F\"]|{$LWS})";
326 $QuotedString =
"\"({$QdText}|{$QuotedPair})*\"";
327 $Value =
"({$Token}|{$QuotedString})";
328 $Parameter =
"{$Token}{$LWS}={$LWS}{$Value}";
330 # these make the Content-Type regex specific to Content-Type 331 # values with charset parameters in them, but make capturing 332 # the charset much easier 333 $BasicParameter =
"(;{$LWS}{$Parameter})*";
334 $CharsetParameter =
"(;{$LWS}charset{$LWS}={$LWS}{$Value})";
335 $ModParameter =
"{$BasicParameter}{$CharsetParameter}{$BasicParameter}";
336 $MediaType =
"({$Token}{$LWS}\\/{$LWS}{$Token}){$LWS}{$ModParameter}";
339 $ContentType =
"Content-Type{$LWS}:{$LWS}{$MediaType}{$LWS}";
340 $RegEx =
"/^{$ContentType}$/i";
342 foreach ($Headers as $Header)
344 preg_match($RegEx, $Header, $Matches);
346 if (isset($Matches[3]) && isset($Matches[19]))
349 $Charset = $Matches[19];
355 return array($Text, $Type, $Charset);
368 private function QueryServerWithCaching($ServerUrl, $CacheDB, $RefreshTime)
370 # save RSS server URL 373 # save caching info (if any) 376 $this->CacheDB = $CacheDB;
379 # if caching info was supplied 382 $DB = $this->CacheDB;
384 # look up cached information for this server 385 $QueryTimeCutoff = date(
"Y-m-d H:i:s", (time() - $RefreshTime));
387 SELECT * FROM RSSClientCache 388 WHERE ServerUrl = '".addslashes($ServerUrl).
"' 389 AND LastQueryTime > '".$QueryTimeCutoff.
"'");
391 # if we have cached info that has not expired 392 if ($CachedXml = $DB->FetchField(
"CachedXml"))
395 $QueryResult = $CachedXml;
396 $this->CachedDataWasUsed = TRUE;
400 $this->CachedDataWasUsed = FALSE;
402 # query server for XML text 403 list($Text, $Type, $Charset) = $this->GetXmlInfo($ServerUrl);
406 # if query was successful 409 $QueryResult = $Text;
411 # clear out any old cache entries 413 DELETE FROM RSSClientCache 414 WHERE ServerUrl = '".addslashes($ServerUrl).
"'");
418 INSERT INTO RSSClientCache 419 (ServerUrl, CachedXml, Type, Charset, LastQueryTime) 421 '".addslashes($ServerUrl).
"', 422 '".addslashes($Text).
"', 423 '".addslashes($Type).
"', 424 '".addslashes($Charset).
"', 430 # return query result to caller 438 private function LoadChannelInfo()
440 $Parser = $this->Parser;
441 $Parser->SeekToRoot();
442 $Result = $Parser->SeekTo(
"rss");
443 if ($Result === NULL)
445 $Result = $Parser->SeekTo(
"rdf:RDF");
447 $Parser->SeekTo(
"channel");
448 $this->ChannelTitle = $Parser->GetData(
"title");
449 $this->ChannelLink = $Parser->GetData(
"link");
450 $this->ChannelDescription = $Parser->GetData(
"description");