Simpletest Coverage - modules/aggregator/aggregator.parser.inc

1 <?php
2 // $Id: aggregator.parser.inc,v 1.4 2009/07/15 21:32:43 dries Exp $
3
4 /**
5 * @file
6 * Parser functions for the aggregator module.
7 */
8
9 /**
10 * Implement hook_aggregator_parse_info().
11 */
12 function aggregator_aggregator_parse_info() {
13 return array(
14 'title' => t('Default parser'),
15 'description' => t('Parses RSS, Atom and RDF feeds.'),
16 );
17 }
18
19 /**
20 * Implement hook_aggregator_parse().
21 */
22 function aggregator_aggregator_parse($feed) {
23 global $channel, $image;
24
25 // Filter the input data.
26 if (aggregator_parse_feed($feed->source_string, $feed)) {
27 $modified = empty($feed->http_headers['Last-Modified']) ? 0 : strtotime($feed->http_headers['Last-Modified']);
28
29 // Prepare the channel data.
30 foreach ($channel as $key => $value) {
31 $channel[$key] = trim($value);
32 }
33
34 // Prepare the image data (if any).
35 foreach ($image as $key => $value) {
36 $image[$key] = trim($value);
37 }
38
39 if (!empty($image['link']) && !empty($image['url']) && !empty($image['title'])) {
40 $image = l(theme('image', $image['url'], $image['title']), $image['link'], array('html' => TRUE));
41 }
42 else {
43 $image = '';
44 }
45
46 $etag = empty($feed->http_headers['ETag']) ? '' : $feed->http_headers['ETag'];
47 // Update the feed data.
48 db_merge('aggregator_feed')
49 ->key(array('fid' => $feed->fid))
50 ->fields(array(
51 'url' => $feed->url,
52 'checked' => REQUEST_TIME,
53 'link' => !empty($channel['link']) ? $channel['link'] : '',
54 'description' => !empty($channel['description']) ? $channel['description'] : '',
55 'image' => $image,
56 'hash' => md5($feed->source_string),
57 'etag' => $etag,
58 'modified' => $modified,
59 ))
60 ->execute();
61
62 // Clear the cache.
63 cache_clear_all();
64
65 if (isset($feed->redirected)) {
66 watchdog('aggregator', 'Updated URL for feed %title to %url.', array('%title' => $feed->title, '%url' => $feed->url));
67 }
68
69 watchdog('aggregator', 'There is new syndicated content from %site.', array('%site' => $feed->title));
70 drupal_set_message(t('There is new syndicated content from %site.', array('%site' => $feed->title)));
71
72 }
73 }
74
75 /**
76 * Parse a feed and store its items.
77 *
78 * @param $data
79 * The feed data.
80 * @param $feed
81 * An object describing the feed to be parsed.
82 * @return
83 * FALSE on error, TRUE otherwise.
84 */
85 function aggregator_parse_feed(&$data, $feed) {
86 global $items, $image, $channel;
87
88 // Unset the global variables before we use them.
89 unset($GLOBALS['element'], $GLOBALS['item'], $GLOBALS['tag']);
90 $items = array();
91 $image = array();
92 $channel = array();
93
94 // Parse the data.
95 $xml_parser = drupal_xml_parser_create($data);
96 xml_set_element_handler($xml_parser, 'aggregator_element_start', 'aggregator_element_end');
97 xml_set_character_data_handler($xml_parser, 'aggregator_element_data');
98
99 if (!xml_parse($xml_parser, $data, 1)) {
100 watchdog('aggregator', 'The feed from %site seems to be broken, due to an error "%error" on line %line.', array('%site' => $feed->title, '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser)), WATCHDOG_WARNING);
101 drupal_set_message(t('The feed from %site seems to be broken, because of error "%error" on line %line.', array('%site' => $feed->title, '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser))), 'error');
102 return FALSE;
103 }
104 xml_parser_free($xml_parser);
105
106 // We reverse the array such that we store the first item last, and the last
107 // item first. In the database, the newest item should be at the top.
108 $items = array_reverse($items);
109
110 // Initialize items array.
111 $feed->items = array();
112 foreach ($items as $item) {
113
114 // Prepare the item:
115 foreach ($item as $key => $value) {
116 $item[$key] = trim($value);
117 }
118
119 // Resolve the item's title. If no title is found, we use up to 40
120 // characters of the description ending at a word boundary, but not
121 // splitting potential entities.
122 if (!empty($item['title'])) {
123 $item['title'] = $item['title'];
124 }
125 elseif (!empty($item['description'])) {
126 $item['title'] = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", truncate_utf8($item['description'], 40));
127 }
128 else {
129 $item['title'] = '';
130 }
131
132 // Resolve the items link.
133 if (!empty($item['link'])) {
134 $item['link'] = $item['link'];
135 }
136 else {
137 $item['link'] = $feed->link;
138 }
139 $item['guid'] = isset($item['guid']) ? $item['guid'] : '';
140
141 // Atom feeds have a content and/or summary tag instead of a description tag.
142 if (!empty($item['content:encoded'])) {
143 $item['description'] = $item['content:encoded'];
144 }
145 elseif (!empty($item['summary'])) {
146 $item['description'] = $item['summary'];
147 }
148 elseif (!empty($item['content'])) {
149 $item['description'] = $item['content'];
150 }
151
152 // Try to resolve and parse the item's publication date.
153 $date = '';
154 foreach (array('pubdate', 'dc:date', 'dcterms:issued', 'dcterms:created', 'dcterms:modified', 'issued', 'created', 'modified', 'published', 'updated') as $key) {
155 if (!empty($item[$key])) {
156 $date = $item[$key];
157 break;
158 }
159 }
160
161 $item['timestamp'] = strtotime($date);
162
163 if ($item['timestamp'] === FALSE) {
164 $item['timestamp'] = aggregator_parse_w3cdtf($date); // Aggregator_parse_w3cdtf() returns FALSE on failure.
165 }
166
167 // Resolve dc:creator tag as the item author if author tag is not set.
168 if (empty($item['author']) && !empty($item['dc:creator'])) {
169 $item['author'] = $item['dc:creator'];
170 }
171
172 $item += array('author' => '', 'description' => '');
173
174 // Store on $feed object. This is where processors will look for parsed items.
175 $feed->items[] = $item;
176 }
177
178 return TRUE;
179 }
180
181 /**
182 * Callback function used by the XML parser.
183 */
184 function aggregator_element_start($parser, $name, $attributes) {
185 global $item, $element, $tag, $items, $channel;
186
187 $name = strtolower($name);
188 switch ($name) {
189 case 'image':
190 case 'textinput':
191 case 'content':
192 case 'summary':
193 case 'tagline':
194 case 'subtitle':
195 case 'logo':
196 case 'info':
197 $element = $name;
198 break;
199 case 'id':
200 if ($element != 'item') {
201 $element = $name;
202 }
203 case 'link':
204 if (!empty($attributes['rel']) && $attributes['rel'] == 'alternate') {
205 if ($element == 'item') {
206 $items[$item]['link'] = $attributes['href'];
207 }
208 else {
209 $channel['link'] = $attributes['href'];
210 }
211 }
212 break;
213 case 'item':
214 $element = $name;
215 $item += 1;
216 break;
217 case 'entry':
218 $element = 'item';
219 $item += 1;
220 break;
221 }
222
223 $tag = $name;
224 }
225
226 /**
227 * Call-back function used by the XML parser.
228 */
229 function aggregator_element_end($parser, $name) {
230 global $element;
231
232 switch ($name) {
233 case 'image':
234 case 'textinput':
235 case 'item':
236 case 'entry':
237 case 'content':
238 case 'info':
239 $element = '';
240 break;
241 case 'id':
242 if ($element == 'id') {
243 $element = '';
244 }
245 }
246 }
247
248 /**
249 * Callback function used by the XML parser.
250 */
251 function aggregator_element_data($parser, $data) {
252 global $channel, $element, $items, $item, $image, $tag;
253 $items += array($item => array());
254 switch ($element) {
255 case 'item':
256 $items[$item] += array($tag => '');
257 $items[$item][$tag] .= $data;
258 break;
259 case 'image':
260 case 'logo':
261 $image += array($tag => '');
262 $image[$tag] .= $data;
263 break;
264 case 'link':
265 if ($data) {
266 $items[$item] += array($tag => '');
267 $items[$item][$tag] .= $data;
268 }
269 break;
270 case 'content':
271 $items[$item] += array('content' => '');
272 $items[$item]['content'] .= $data;
273 break;
274 case 'summary':
275 $items[$item] += array('summary' => '');
276 $items[$item]['summary'] .= $data;
277 break;
278 case 'tagline':
279 case 'subtitle':
280 $channel += array('description' => '');
281 $channel['description'] .= $data;
282 break;
283 case 'info':
284 case 'id':
285 case 'textinput':
286 // The sub-element is not supported. However, we must recognize
287 // it or its contents will end up in the item array.
288 break;
289 default:
290 $channel += array($tag => '');
291 $channel[$tag] .= $data;
292 }
293 }
294
295 /**
296 * Parse the W3C date/time format, a subset of ISO 8601.
297 *
298 * PHP date parsing functions do not handle this format.
299 * See http://www.w3.org/TR/NOTE-datetime for more information.
300 * Originally from MagpieRSS (http://magpierss.sourceforge.net/).
301 *
302 * @param $date_str
303 * A string with a potentially W3C DTF date.
304 * @return
305 * A timestamp if parsed successfully or FALSE if not.
306 */
307 function aggregator_parse_w3cdtf($date_str) {
308 if (preg_match('/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/', $date_str, $match)) {
309 list($year, $month, $day, $hours, $minutes, $seconds) = array($match[1], $match[2], $match[3], $match[4], $match[5], $match[6]);
310 // Calculate the epoch for current date assuming GMT.
311 $epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year);
312 if ($match[10] != 'Z') { // Z is zulu time, aka GMT
313 list($tz_mod, $tz_hour, $tz_min) = array($match[8], $match[9], $match[10]);
314 // Zero out the variables.
315 if (!$tz_hour) {
316 $tz_hour = 0;
317 }
318 if (!$tz_min) {
319 $tz_min = 0;
320 }
321 $offset_secs = (($tz_hour * 60) + $tz_min) * 60;
322 // Is timezone ahead of GMT? If yes, subtract offset.
323 if ($tz_mod == '+') {
324 $offset_secs *= -1;
325 }
326 $epoch += $offset_secs;
327 }
328 return $epoch;
329 }
330 else {
331 return FALSE;
332 }
333 }
334

Legend

Missed
lines code that were not excersized during program execution.
Covered
lines code were excersized during program execution.
Comment/non executable
Comment or non-executable line of code.
Dead
lines of code that according to xdebug could not be executed. This is counted as coverage code because in almost all cases it is code that runnable.