4 # FILE: Scout--RSSClient.php
9 # SomeMethod($SomeParameter, $AnotherParameter)
10 # - short description of method
12 # AUTHOR: Edward Almasy
14 # Copyright 2005 Internet Scout Project
15 # http://scout.wisc.edu
20 # ---- PUBLIC INTERFACE --------------------------------------------------
25 # set default debug level
28 # set default encoding
35 # query server (or cache) for XML text
39 # create XML parser and parse text
41 if ($this->DebugLevel > 3) {
$Parser->SetDebugLevel($this->DebugLevel - 3); }
42 $this->Parser->ParseText($this->XmlText);
44 if ($this->DebugLevel) { print(
"RSSClient->RSSClient() returned ".strlen($this->XmlText).
" characters from server query<br>\n"); }
50 # if new RSS server URL supplied
51 if (($NewValue != NULL) && ($NewValue != $this->
ServerUrl))
56 # re-read XML from server at new URL
62 # create new XML parser and parse text
64 if ($this->DebugLevel > 3) {
$Parser->SetDebugLevel($this->DebugLevel - 3); }
65 $this->Parser->ParseText($this->XmlText);
68 # return RSS server URL to caller
75 # if new encoding supplied
76 if (($NewValue != NULL) && ($NewValue != $this->
Encoding))
81 # re-read XML from server
87 # create new XML parser and parse text
89 if ($this->DebugLevel > 3) {
$Parser->SetDebugLevel($this->DebugLevel - 3); }
90 $this->Parser->ParseText($this->XmlText);
93 # return encoding to caller
104 # if neither the XML file nor the HTTP response headers specify an
105 # encoding, there is an overwhelming chance that it's ISO-8859-1, so
106 # use it as the default
109 # only get up to the the encoding portion of the XML declartion
110 # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-prolog-dtd
114 $EncName =
'[A-Za-z]([A-Za-z0-9._]|-)*';
115 $VersionInfo =
"{$S}version{$Eq}('{$VersionNum}'|\"{$VersionNum}\")";
116 $EncodingDecl =
"{$S}encoding{$Eq}('{$EncName}'|\"{$EncName}\")";
117 $XMLDecl =
"<\?xml{$VersionInfo}({$EncodingDecl})?";
118 $RegEx =
"/{$XMLDecl}/";
120 # try to find the encoding, index 3 will be set if encoding is declared
121 preg_match($RegEx, $this->XmlText, $Matches);
123 # give precedence to the encoding specified within the XML file since
124 # a RSS feed publisher might not have access to HTTP response headers
125 if (count($Matches) >= 4)
127 # also need to strip off the quotes
131 # then give precedence to the charset parameter in the Content-Type
133 else if ($this->CacheDB)
135 # create cache table if it doesn't exist
139 # get the cache value
141 SELECT * FROM RSSClientCache
143 $Exists = ($DB->NumRowsSelected() > 0);
144 $Cache = $DB->FetchRow();
146 # if cached and charset parameter was given in the response headers
147 if ($Exists && strlen($Cache[
"Charset"]))
156 # retrieve RSS items (from first channel if not otherwise specified)
157 function GetItems($NumberOfItems = NULL, $ChannelName = NULL)
159 # start by assuming no items will be found
162 # move parser to area in XML with items
165 $Result =
$Parser->SeekTo(
"rss");
166 if ($Result === NULL)
168 $Result =
$Parser->SeekTo(
"rdf:RDF");
176 $ItemCount =
$Parser->SeekTo(
"item");
185 $Items[$Index][
"description"] =
$Parser->GetData(
"description");
187 $Items[$Index][
"enclosure"] =
$Parser->GetAttributes(
"enclosure");
191 while (
$Parser->NextItem() && (($NumberOfItems == NULL) || ($Index < $NumberOfItems)));
194 # return records to caller
198 # retrieve site name as given in feed
205 # retrieve site link as given in feed
212 # retrieve site description as given in feed
219 # tell caller whether client is using cached data
226 # ---- PRIVATE INTERFACE -------------------------------------------------
242 # set current debug output level (0-9)
245 $this->DebugLevel = $NewLevel;
259 $Text = @file_get_contents($Url);
263 # get the type and charset if the fetch was successful
266 # this must come after file_get_contents() and before any other remote
268 $Headers = $http_response_header;
270 # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
271 $LWS =
'([ \t]*|\r\n[ \t]+)';
272 $Token =
'[!\x23-\x27*+-.\x30-\x39\x41-\x5A\x5E-\x7A|~]+';
273 $QuotedPair =
'\\[\x00-\x7F]';
274 $QdText =
"([^\\x00-\\x1F\\x7F\"]|{$LWS})";
275 $QuotedString =
"\"({$QdText}|{$QuotedPair})*\"";
276 $Value =
"({$Token}|{$QuotedString})";
277 $Parameter =
"{$Token}{$LWS}={$LWS}{$Value}";
279 # these make the Content-Type regex specific to Content-Type
280 # values with charset parameters in them, but make capturing
281 # the charset much easier
282 $BasicParameter =
"(;{$LWS}{$Parameter})*";
283 $CharsetParameter =
"(;{$LWS}charset{$LWS}={$LWS}{$Value})";
284 $ModParameter =
"{$BasicParameter}{$CharsetParameter}{$BasicParameter}";
285 $MediaType =
"({$Token}{$LWS}\\/{$LWS}{$Token}){$LWS}{$ModParameter}";
288 $ContentType =
"Content-Type{$LWS}:{$LWS}{$MediaType}{$LWS}";
289 $RegEx =
"/^{$ContentType}$/i";
291 foreach ($Headers as $Header)
293 preg_match($RegEx, $Header, $Matches);
295 if (isset($Matches[3]) && isset($Matches[19]))
298 $Charset = $Matches[19];
304 return array($Text, $Type, $Charset);
307 # load RSS XML from server or cache
310 # save RSS server URL
313 # save caching info (if any)
319 # if caching info was supplied
324 # look up cached information for this server
325 $QueryTimeCutoff = date(
"Y-m-d H:i:s", (time() -
$RefreshTime));
327 SELECT * FROM RSSClientCache
328 WHERE ServerUrl = '".addslashes(
$ServerUrl).
"'
329 AND LastQueryTime > '".$QueryTimeCutoff.
"'");
331 # if we have cached info that has not expired
332 if ($CachedXml = $DB->FetchField(
"CachedXml"))
335 $QueryResult = $CachedXml;
336 $this->CachedDataWasUsed = TRUE;
340 $this->CachedDataWasUsed = FALSE;
342 # query server for XML text
346 # if query was successful
349 $QueryResult = $Text;
351 # clear out any old cache entries
353 DELETE FROM RSSClientCache
354 WHERE ServerUrl = '".addslashes(
$ServerUrl).
"'");
358 INSERT INTO RSSClientCache
359 (ServerUrl, CachedXml, Type, Charset, LastQueryTime)
362 '".addslashes($Text).
"',
363 '".addslashes($Type).
"',
364 '".addslashes($Charset).
"',
370 # return query result to caller
378 $Result =
$Parser->SeekTo(
"rss");
379 if ($Result === NULL)
381 $Result =
$Parser->SeekTo(
"rdf:RDF");
384 $this->ChannelTitle =
$Parser->GetData(
"title");
385 $this->ChannelLink =
$Parser->GetData(
"link");
386 $this->ChannelDescription =
$Parser->GetData(
"description");