Mercurial > defr > drupal > core
diff modules/aggregator/aggregator.module @ 1:c1f4ac30525a 6.0
Drupal 6.0
author | Franck Deroche <webmaster@defr.org> |
---|---|
date | Tue, 23 Dec 2008 14:28:28 +0100 |
parents | |
children | 2427550111ae |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modules/aggregator/aggregator.module Tue Dec 23 14:28:28 2008 +0100 @@ -0,0 +1,924 @@ +<?php +// $Id: aggregator.module,v 1.374 2008/01/15 08:06:32 dries Exp $ + +/** + * @file + * Used to aggregate syndicated content (RSS, RDF, and Atom). + */ + +/** + * Implementation of hook_help(). + */ +function aggregator_help($path, $arg) { + switch ($path) { + case 'admin/help#aggregator': + $output = '<p>'. t('The aggregator is a powerful on-site syndicator and news reader that gathers fresh content from RSS-, RDF-, and Atom-based feeds made available across the web. Thousands of sites (particularly news sites and blogs) publish their latest headlines and posts in feeds, using a number of standardized XML-based formats. Formats supported by the aggregator include <a href="@rss">RSS</a>, <a href="@rdf">RDF</a>, and <a href="@atom">Atom</a>.', array('@rss' => 'http://cyber.law.harvard.edu/rss/', '@rdf' => 'http://www.w3.org/RDF/', '@atom' => 'http://www.atomenabled.org')) .'</p>'; + $output .= '<p>'. t('Feeds contain feed items, or individual posts published by the site providing the feed. Feeds may be grouped in categories, generally by topic. Users view feed items in the <a href="@aggregator">main aggregator display</a> or by <a href="@aggregator-sources">their source</a>. Administrators can <a href="@feededit">add, edit and delete feeds</a> and choose how often to check each feed for newly updated items. The most recent items in either a feed or category can be displayed as a block through the <a href="@admin-block">blocks administration page</a>. A <a href="@aggregator-opml">machine-readable OPML file</a> of all feeds is available. A correctly configured <a href="@cron">cron maintenance task</a> is required to update feeds automatically.', array('@aggregator' => url('aggregator'), '@aggregator-sources' => url('aggregator/sources'), '@feededit' => url('admin/content/aggregator'), '@admin-block' => url('admin/build/block'), '@aggregator-opml' => url('aggregator/opml'), '@cron' => url('admin/reports/status'))) .'</p>'; + $output .= '<p>'. t('For more information, see the online handbook entry for <a href="@aggregator">Aggregator module</a>.', array('@aggregator' => 'http://drupal.org/handbook/modules/aggregator/')) .'</p>'; + return $output; + case 'admin/content/aggregator': + $output = '<p>'. t('Thousands of sites (particularly news sites and blogs) publish their latest headlines and posts in feeds, using a number of standardized XML-based formats. Formats supported by the aggregator include <a href="@rss">RSS</a>, <a href="@rdf">RDF</a>, and <a href="@atom">Atom</a>.', array('@rss' => 'http://cyber.law.harvard.edu/rss/', '@rdf' => 'hsttp://www.w3.org/RDF/', '@atom' => 'http://www.atomenabled.org')) .'</p>'; + $output .= '<p>'. t('Current feeds are listed below, and <a href="@addfeed">new feeds may be added</a>. For each feed or feed category, the <em>latest items</em> block may be enabled at the <a href="@block">blocks administration page</a>.', array('@addfeed' => url('admin/content/aggregator/add/feed'), '@block' => url('admin/build/block'))) .'</p>'; + return $output; + case 'admin/content/aggregator/add/feed': + return '<p>'. t('Add a feed in RSS, RDF or Atom format. A feed may only have one entry.') .'</p>'; + case 'admin/content/aggregator/add/category': + return '<p>'. t('Categories allow feed items from different feeds to be grouped together. For example, several sport-related feeds may belong to a category named <em>Sports</em>. Feed items may be grouped automatically (by selecting a category when creating or editing a feed) or manually (via the <em>Categorize</em> page available from feed item listings). Each category provides its own feed page and block.') .'</p>'; + } +} + +/** + * Implementation of hook_theme() + */ +function aggregator_theme() { + return array( + 'aggregator_wrapper' => array( + 'arguments' => array('content' => NULL), + 'file' => 'aggregator.pages.inc', + 'template' => 'aggregator-wrapper', + ), + 'aggregator_categorize_items' => array( + 'arguments' => array('form' => NULL), + 'file' => 'aggregator.pages.inc', + ), + 'aggregator_feed_source' => array( + 'arguments' => array('feed' => NULL), + 'file' => 'aggregator.pages.inc', + 'template' => 'aggregator-feed-source', + ), + 'aggregator_block_item' => array( + 'arguments' => array('item' => NULL, 'feed' => 0), + ), + 'aggregator_summary_items' => array( + 'arguments' => array('summary_items' => NULL, 'source' => NULL), + 'file' => 'aggregator.pages.inc', + 'template' => 'aggregator-summary-items', + ), + 'aggregator_summary_item' => array( + 'arguments' => array('item' => NULL), + 'file' => 'aggregator.pages.inc', + 'template' => 'aggregator-summary-item', + ), + 'aggregator_item' => array( + 'arguments' => array('item' => NULL), + 'file' => 'aggregator.pages.inc', + 'template' => 'aggregator-item', + ), + 'aggregator_page_opml' => array( + 'arguments' => array('feeds' => NULL), + 'file' => 'aggregator.pages.inc', + ), + 'aggregator_page_rss' => array( + 'arguments' => array('feeds' => NULL, 'category' => NULL), + 'file' => 'aggregator.pages.inc', + ), + ); +} + +/** + * Implementation of hook_menu(). + */ +function aggregator_menu() { + $items['admin/content/aggregator'] = array( + 'title' => 'Feed aggregator', + 'description' => "Configure which content your site aggregates from other sites, how often it polls them, and how they're categorized.", + 'page callback' => 'aggregator_admin_overview', + 'access arguments' => array('administer news feeds'), + 'file' => 'aggregator.admin.inc', + ); + $items['admin/content/aggregator/add/feed'] = array( + 'title' => 'Add feed', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('aggregator_form_feed'), + 'access arguments' => array('administer news feeds'), + 'type' => MENU_LOCAL_TASK, + 'parent' => 'admin/content/aggregator', + 'file' => 'aggregator.admin.inc', + ); + $items['admin/content/aggregator/add/category'] = array( + 'title' => 'Add category', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('aggregator_form_category'), + 'access arguments' => array('administer news feeds'), + 'type' => MENU_LOCAL_TASK, + 'parent' => 'admin/content/aggregator', + 'file' => 'aggregator.admin.inc', + ); + $items['admin/content/aggregator/remove/%aggregator_feed'] = array( + 'title' => 'Remove items', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('aggregator_admin_remove_feed', 4), + 'access arguments' => array('administer news feeds'), + 'type' => MENU_CALLBACK, + 'file' => 'aggregator.admin.inc', + ); + $items['admin/content/aggregator/update/%aggregator_feed'] = array( + 'title' => 'Update items', + 'page callback' => 'aggregator_admin_refresh_feed', + 'page arguments' => array(4), + 'access arguments' => array('administer news feeds'), + 'type' => MENU_CALLBACK, + 'file' => 'aggregator.admin.inc', + ); + $items['admin/content/aggregator/list'] = array( + 'title' => 'List', + 'type' => MENU_DEFAULT_LOCAL_TASK, + 'weight' => -10, + ); + $items['admin/content/aggregator/settings'] = array( + 'title' => 'Settings', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('aggregator_admin_settings'), + 'type' => MENU_LOCAL_TASK, + 'weight' => 10, + 'access arguments' => array('administer news feeds'), + 'file' => 'aggregator.admin.inc', + ); + $items['aggregator'] = array( + 'title' => 'Feed aggregator', + 'page callback' => 'aggregator_page_last', + 'access arguments' => array('access news feeds'), + 'weight' => 5, + 'file' => 'aggregator.pages.inc', + ); + $items['aggregator/sources'] = array( + 'title' => 'Sources', + 'page callback' => 'aggregator_page_sources', + 'access arguments' => array('access news feeds'), + 'file' => 'aggregator.pages.inc', + ); + $items['aggregator/categories'] = array( + 'title' => 'Categories', + 'page callback' => 'aggregator_page_categories', + 'access callback' => '_aggregator_has_categories', + 'file' => 'aggregator.pages.inc', + ); + $items['aggregator/rss'] = array( + 'title' => 'RSS feed', + 'page callback' => 'aggregator_page_rss', + 'access arguments' => array('access news feeds'), + 'type' => MENU_CALLBACK, + 'file' => 'aggregator.pages.inc', + ); + $items['aggregator/opml'] = array( + 'title' => 'OPML feed', + 'page callback' => 'aggregator_page_opml', + 'access arguments' => array('access news feeds'), + 'type' => MENU_CALLBACK, + 'file' => 'aggregator.pages.inc', + ); + $items['aggregator/categories/%aggregator_category'] = array( + 'title callback' => '_aggregator_category_title', + 'title arguments' => array(2), + 'page callback' => 'aggregator_page_category', + 'page arguments' => array(2), + 'access callback' => 'user_access', + 'access arguments' => array('access news feeds'), + 'file' => 'aggregator.pages.inc', + ); + $items['aggregator/categories/%aggregator_category/view'] = array( + 'title' => 'View', + 'type' => MENU_DEFAULT_LOCAL_TASK, + 'weight' => -10, + ); + $items['aggregator/categories/%aggregator_category/categorize'] = array( + 'title' => 'Categorize', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('aggregator_page_category', 2), + 'access arguments' => array('administer news feeds'), + 'type' => MENU_LOCAL_TASK, + 'file' => 'aggregator.pages.inc', + ); + $items['aggregator/categories/%aggregator_category/configure'] = array( + 'title' => 'Configure', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('aggregator_form_category', 2), + 'access arguments' => array('administer news feeds'), + 'type' => MENU_LOCAL_TASK, + 'weight' => 1, + 'file' => 'aggregator.admin.inc', + ); + $items['aggregator/sources/%aggregator_feed'] = array( + 'page callback' => 'aggregator_page_source', + 'page arguments' => array(2), + 'type' => MENU_CALLBACK, + 'file' => 'aggregator.pages.inc', + ); + $items['aggregator/sources/%aggregator_feed/view'] = array( + 'title' => 'View', + 'type' => MENU_DEFAULT_LOCAL_TASK, + 'weight' => -10, + ); + $items['aggregator/sources/%aggregator_feed/categorize'] = array( + 'title' => 'Categorize', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('aggregator_page_source', 2), + 'access arguments' => array('administer news feeds'), + 'type' => MENU_LOCAL_TASK, + 'file' => 'aggregator.pages.inc', + ); + $items['aggregator/sources/%aggregator_feed/configure'] = array( + 'title' => 'Configure', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('aggregator_form_feed', 2), + 'access arguments' => array('administer news feeds'), + 'type' => MENU_LOCAL_TASK, + 'weight' => 1, + 'file' => 'aggregator.admin.inc', + ); + $items['admin/content/aggregator/edit/feed/%aggregator_feed'] = array( + 'title' => 'Edit feed', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('aggregator_form_feed', 5), + 'access arguments' => array('administer news feeds'), + 'type' => MENU_CALLBACK, + 'file' => 'aggregator.admin.inc', + ); + $items['admin/content/aggregator/edit/category/%aggregator_category'] = array( + 'title' => 'Edit category', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('aggregator_form_category', 5), + 'access arguments' => array('administer news feeds'), + 'type' => MENU_CALLBACK, + 'file' => 'aggregator.admin.inc', + ); + + return $items; +} + +/** + * Menu callback. + * + * @return + * An aggregator category title. + */ +function _aggregator_category_title($category) { + return $category['title']; +} + +/** + * Implementation of hook_init(). + */ +function aggregator_init() { + drupal_add_css(drupal_get_path('module', 'aggregator') .'/aggregator.css'); +} + +/** + * Find out whether there are any aggregator categories. + * + * @return + * TRUE if there is at least one category and the user has access to them, FALSE otherwise. + */ +function _aggregator_has_categories() { + return user_access('access news feeds') && db_result(db_query('SELECT COUNT(*) FROM {aggregator_category}')); +} + +/** + * Implementation of hook_perm(). + */ +function aggregator_perm() { + return array('administer news feeds', 'access news feeds'); +} + +/** + * Implementation of hook_cron(). + * + * Checks news feeds for updates once their refresh interval has elapsed. + */ +function aggregator_cron() { + $result = db_query('SELECT * FROM {aggregator_feed} WHERE checked + refresh < %d', time()); + while ($feed = db_fetch_array($result)) { + aggregator_refresh($feed); + } +} + +/** + * Implementation of hook_block(). + * + * Generates blocks for the latest news items in each category and feed. + */ +function aggregator_block($op = 'list', $delta = 0, $edit = array()) { + if (user_access('access news feeds')) { + if ($op == 'list') { + $result = db_query('SELECT cid, title FROM {aggregator_category} ORDER BY title'); + while ($category = db_fetch_object($result)) { + $block['category-'. $category->cid]['info'] = t('!title category latest items', array('!title' => $category->title)); + } + $result = db_query('SELECT fid, title FROM {aggregator_feed} ORDER BY fid'); + while ($feed = db_fetch_object($result)) { + $block['feed-'. $feed->fid]['info'] = t('!title feed latest items', array('!title' => $feed->title)); + } + } + else if ($op == 'configure') { + list($type, $id) = explode('-', $delta); + if ($type == 'category') { + $value = db_result(db_query('SELECT block FROM {aggregator_category} WHERE cid = %d', $id)); + } + else { + $value = db_result(db_query('SELECT block FROM {aggregator_feed} WHERE fid = %d', $id)); + } + $form['block'] = array('#type' => 'select', '#title' => t('Number of news items in block'), '#default_value' => $value, '#options' => drupal_map_assoc(array(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20))); + return $form; + } + else if ($op == 'save') { + list($type, $id) = explode('-', $delta); + if ($type == 'category') { + $value = db_query('UPDATE {aggregator_category} SET block = %d WHERE cid = %d', $edit['block'], $id); + } + else { + $value = db_query('UPDATE {aggregator_feed} SET block = %d WHERE fid = %d', $edit['block'], $id); + } + } + else if ($op == 'view') { + list($type, $id) = explode('-', $delta); + switch ($type) { + case 'feed': + if ($feed = db_fetch_object(db_query('SELECT fid, title, block FROM {aggregator_feed} WHERE fid = %d', $id))) { + $block['subject'] = check_plain($feed->title); + $result = db_query_range('SELECT * FROM {aggregator_item} WHERE fid = %d ORDER BY timestamp DESC, iid DESC', $feed->fid, 0, $feed->block); + $read_more = theme('more_link', url('aggregator/sources/'. $feed->fid), t("View this feed's recent news.")); + } + break; + + case 'category': + if ($category = db_fetch_object(db_query('SELECT cid, title, block FROM {aggregator_category} WHERE cid = %d', $id))) { + $block['subject'] = check_plain($category->title); + $result = db_query_range('SELECT i.* FROM {aggregator_category_item} ci LEFT JOIN {aggregator_item} i ON ci.iid = i.iid WHERE ci.cid = %d ORDER BY i.timestamp DESC, i.iid DESC', $category->cid, 0, $category->block); + $read_more = theme('more_link', url('aggregator/categories/'. $category->cid), t("View this category's recent news.")); + } + break; + } + $items = array(); + while ($item = db_fetch_object($result)) { + $items[] = theme('aggregator_block_item', $item); + } + + // Only display the block if there are items to show. + if (count($items) > 0) { + $block['content'] = theme('item_list', $items) . $read_more; + } + } + if (isset($block)) { + return $block; + } + } +} + +/** + * Add/edit/delete aggregator categories. + * + * @param $edit + * An associative array describing the category to be added/edited/deleted. + */ +function aggregator_save_category($edit) { + $link_path = 'aggregator/categories/'; + if (!empty($edit['cid'])) { + $link_path .= $edit['cid']; + if (!empty($edit['title'])) { + db_query("UPDATE {aggregator_category} SET title = '%s', description = '%s' WHERE cid = %d", $edit['title'], $edit['description'], $edit['cid']); + $op = 'update'; + } + else { + db_query('DELETE FROM {aggregator_category} WHERE cid = %d', $edit['cid']); + $edit['title'] = ''; + $op = 'delete'; + } + } + else if (!empty($edit['title'])) { + // A single unique id for bundles and feeds, to use in blocks + db_query("INSERT INTO {aggregator_category} (title, description, block) VALUES ('%s', '%s', 5)", $edit['title'], $edit['description']); + $link_path .= db_last_insert_id('aggregator', 'cid'); + $op = 'insert'; + } + if (isset($op)) { + menu_link_maintain('aggregator', $op, $link_path, $edit['title']); + } +} + +/** + * Add/edit/delete an aggregator feed. + * + * @param $edit + * An associative array describing the feed to be added/edited/deleted. + */ +function aggregator_save_feed($edit) { + if (!empty($edit['fid'])) { + // An existing feed is being modified, delete the category listings. + db_query('DELETE FROM {aggregator_category_feed} WHERE fid = %d', $edit['fid']); + } + if (!empty($edit['fid']) && !empty($edit['title'])) { + db_query("UPDATE {aggregator_feed} SET title = '%s', url = '%s', refresh = %d WHERE fid = %d", $edit['title'], $edit['url'], $edit['refresh'], $edit['fid']); + } + else if (!empty($edit['fid'])) { + $items = array(); + $result = db_query('SELECT iid FROM {aggregator_item} WHERE fid = %d', $edit['fid']); + while ($item = db_fetch_object($result)) { + $items[] = "iid = $item->iid"; + } + if (!empty($items)) { + db_query('DELETE FROM {aggregator_category_item} WHERE '. implode(' OR ', $items)); + } + db_query('DELETE FROM {aggregator_feed} WHERE fid = %d', $edit['fid']); + db_query('DELETE FROM {aggregator_item} WHERE fid = %d', $edit['fid']); + } + else if (!empty($edit['title'])) { + db_query("INSERT INTO {aggregator_feed} (title, url, refresh, block, description, image) VALUES ('%s', '%s', %d, 5, '', '')", $edit['title'], $edit['url'], $edit['refresh']); + // A single unique id for bundles and feeds, to use in blocks. + $edit['fid'] = db_last_insert_id('aggregator_feed', 'fid'); + } + if (!empty($edit['title'])) { + // The feed is being saved, save the categories as well. + if (!empty($edit['category'])) { + foreach ($edit['category'] as $cid => $value) { + if ($value) { + db_query('INSERT INTO {aggregator_category_feed} (fid, cid) VALUES (%d, %d)', $edit['fid'], $cid); + } + } + } + } +} + +/** + * Removes all items from a feed. + * + * @param $feed + * An associative array describing the feed to be cleared. + */ +function aggregator_remove($feed) { + $result = db_query('SELECT iid FROM {aggregator_item} WHERE fid = %d', $feed['fid']); + while ($item = db_fetch_object($result)) { + $items[] = "iid = $item->iid"; + } + if (!empty($items)) { + db_query('DELETE FROM {aggregator_category_item} WHERE '. implode(' OR ', $items)); + } + db_query('DELETE FROM {aggregator_item} WHERE fid = %d', $feed['fid']); + db_query("UPDATE {aggregator_feed} SET checked = 0, etag = '', modified = 0 WHERE fid = %d", $feed['fid']); + drupal_set_message(t('The news items from %site have been removed.', array('%site' => $feed['title']))); +} + +/** + * Call-back function used by the XML parser. + */ +function aggregator_element_start($parser, $name, $attributes) { + global $item, $element, $tag, $items, $channel; + + switch ($name) { + case 'IMAGE': + case 'TEXTINPUT': + case 'CONTENT': + case 'SUMMARY': + case 'TAGLINE': + case 'SUBTITLE': + case 'LOGO': + case 'INFO': + $element = $name; + break; + case 'ID': + if ($element != 'ITEM') { + $element = $name; + } + case 'LINK': + if (!empty($attributes['REL']) && $attributes['REL'] == 'alternate') { + if ($element == 'ITEM') { + $items[$item]['LINK'] = $attributes['HREF']; + } + else { + $channel['LINK'] = $attributes['HREF']; + } + } + break; + case 'ITEM': + $element = $name; + $item += 1; + break; + case 'ENTRY': + $element = 'ITEM'; + $item += 1; + break; + } + + $tag = $name; +} + +/** + * Call-back function used by the XML parser. + */ +function aggregator_element_end($parser, $name) { + global $element; + + switch ($name) { + case 'IMAGE': + case 'TEXTINPUT': + case 'ITEM': + case 'ENTRY': + case 'CONTENT': + case 'INFO': + $element = ''; + break; + case 'ID': + if ($element == 'ID') { + $element = ''; + } + } +} + +/** + * Call-back function used by the XML parser. + */ +function aggregator_element_data($parser, $data) { + global $channel, $element, $items, $item, $image, $tag; + $items += array($item => array()); + switch ($element) { + case 'ITEM': + $items[$item] += array($tag => ''); + $items[$item][$tag] .= $data; + break; + case 'IMAGE': + case 'LOGO': + $image += array($tag => ''); + $image[$tag] .= $data; + break; + case 'LINK': + if ($data) { + $items[$item] += array($tag => ''); + $items[$item][$tag] .= $data; + } + break; + case 'CONTENT': + $items[$item] += array('CONTENT' => ''); + $items[$item]['CONTENT'] .= $data; + break; + case 'SUMMARY': + $items[$item] += array('SUMMARY' => ''); + $items[$item]['SUMMARY'] .= $data; + break; + case 'TAGLINE': + case 'SUBTITLE': + $channel += array('DESCRIPTION' => ''); + $channel['DESCRIPTION'] .= $data; + break; + case 'INFO': + case 'ID': + case 'TEXTINPUT': + // The sub-element is not supported. However, we must recognize + // it or its contents will end up in the item array. + break; + default: + $channel += array($tag => ''); + $channel[$tag] .= $data; + } +} + +/** + * Checks a news feed for new items. + * + * @param $feed + * An associative array describing the feed to be refreshed. + */ +function aggregator_refresh($feed) { + global $channel, $image; + + // Generate conditional GET headers. + $headers = array(); + if ($feed['etag']) { + $headers['If-None-Match'] = $feed['etag']; + } + if ($feed['modified']) { + $headers['If-Modified-Since'] = gmdate('D, d M Y H:i:s', $feed['modified']) .' GMT'; + } + + // Request feed. + $result = drupal_http_request($feed['url'], $headers); + + // Process HTTP response code. + switch ($result->code) { + case 304: + db_query('UPDATE {aggregator_feed} SET checked = %d WHERE fid = %d', time(), $feed['fid']); + drupal_set_message(t('There is no new syndicated content from %site.', array('%site' => $feed['title']))); + break; + case 301: + $feed['url'] = $result->redirect_url; + watchdog('aggregator', 'Updated URL for feed %title to %url.', array('%title' => $feed['title'], '%url' => $feed['url'])); + // Deliberate no break. + case 200: + case 302: + case 307: + // Filter the input data: + if (aggregator_parse_feed($result->data, $feed)) { + $modified = empty($result->headers['Last-Modified']) ? 0 : strtotime($result->headers['Last-Modified']); + + // Prepare the channel data. + foreach ($channel as $key => $value) { + $channel[$key] = trim($value); + } + + // Prepare the image data (if any). + foreach ($image as $key => $value) { + $image[$key] = trim($value); + } + + if (!empty($image['LINK']) && !empty($image['URL']) && !empty($image['TITLE'])) { + // Note, we should really use theme_image() here but that only works with local images it won't work with images fetched with a URL unless PHP version > 5 + $image = '<a href="'. check_url($image['LINK']) .'" class="feed-image"><img src="'. check_url($image['URL']) .'" alt="'. check_plain($image['TITLE']) .'" /></a>'; + } + else { + $image = NULL; + } + + $etag = empty($result->headers['ETag']) ? '' : $result->headers['ETag']; + // Update the feed data. + db_query("UPDATE {aggregator_feed} SET url = '%s', checked = %d, link = '%s', description = '%s', image = '%s', etag = '%s', modified = %d WHERE fid = %d", $feed['url'], time(), $channel['LINK'], $channel['DESCRIPTION'], $image, $etag, $modified, $feed['fid']); + + // Clear the cache. + cache_clear_all(); + + watchdog('aggregator', 'There is new syndicated content from %site.', array('%site' => $feed['title'])); + drupal_set_message(t('There is new syndicated content from %site.', array('%site' => $feed['title']))); + break; + } + $result->error = t('feed not parseable'); + // Deliberate no break. + default: + watchdog('aggregator', 'The feed from %site seems to be broken, due to "%error".', array('%site' => $feed['title'], '%error' => $result->code .' '. $result->error), WATCHDOG_WARNING); + drupal_set_message(t('The feed from %site seems to be broken, because of error "%error".', array('%site' => $feed['title'], '%error' => $result->code .' '. $result->error))); + module_invoke('system', 'check_http_request'); + } +} + +/** + * Parse the W3C date/time format, a subset of ISO 8601. PHP date parsing + * functions do not handle this format. + * See http://www.w3.org/TR/NOTE-datetime for more information. + * Originally from MagpieRSS (http://magpierss.sourceforge.net/). + * + * @param $date_str + * A string with a potentially W3C DTF date. + * @return + * A timestamp if parsed successfully or FALSE if not. + */ +function aggregator_parse_w3cdtf($date_str) { + if (preg_match('/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/', $date_str, $match)) { + list($year, $month, $day, $hours, $minutes, $seconds) = array($match[1], $match[2], $match[3], $match[4], $match[5], $match[6]); + // calc epoch for current date assuming GMT + $epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year); + if ($match[10] != 'Z') { // Z is zulu time, aka GMT + list($tz_mod, $tz_hour, $tz_min) = array($match[8], $match[9], $match[10]); + // zero out the variables + if (!$tz_hour) { + $tz_hour = 0; + } + if (!$tz_min) { + $tz_min = 0; + } + $offset_secs = (($tz_hour * 60) + $tz_min) * 60; + // is timezone ahead of GMT? then subtract offset + if ($tz_mod == '+') { + $offset_secs *= -1; + } + $epoch += $offset_secs; + } + return $epoch; + } + else { + return FALSE; + } +} + +/** + * Parse a feed and store its items. + * + * @param $data + * The feed data. + * @param $feed + * An associative array describing the feed to be parsed. + * @return + * 0 on error, 1 otherwise. + */ +function aggregator_parse_feed(&$data, $feed) { + global $items, $image, $channel; + + // Unset the global variables before we use them: + unset($GLOBALS['element'], $GLOBALS['item'], $GLOBALS['tag']); + $items = array(); + $image = array(); + $channel = array(); + + // parse the data: + $xml_parser = drupal_xml_parser_create($data); + xml_set_element_handler($xml_parser, 'aggregator_element_start', 'aggregator_element_end'); + xml_set_character_data_handler($xml_parser, 'aggregator_element_data'); + + if (!xml_parse($xml_parser, $data, 1)) { + watchdog('aggregator', 'The feed from %site seems to be broken, due to an error "%error" on line %line.', array('%site' => $feed['title'], '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser)), WATCHDOG_WARNING); + drupal_set_message(t('The feed from %site seems to be broken, because of error "%error" on line %line.', array('%site' => $feed['title'], '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser))), 'error'); + return 0; + } + xml_parser_free($xml_parser); + + // We reverse the array such that we store the first item last, and the last + // item first. In the database, the newest item should be at the top. + $items = array_reverse($items); + + // Initialize variables. + $title = $link = $author = $description = $guid = NULL; + foreach ($items as $item) { + unset($title, $link, $author, $description, $guid); + + // Prepare the item: + foreach ($item as $key => $value) { + $item[$key] = trim($value); + } + + // Resolve the item's title. If no title is found, we use up to 40 + // characters of the description ending at a word boundary but not + // splitting potential entities. + if (!empty($item['TITLE'])) { + $title = $item['TITLE']; + } + elseif (!empty($item['DESCRIPTION'])) { + $title = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", truncate_utf8($item['DESCRIPTION'], 40)); + } + else { + $title = ''; + } + + // Resolve the items link. + if (!empty($item['LINK'])) { + $link = $item['LINK']; + } + else { + $link = $feed['link']; + } + $guid = isset($item['GUID']) ? $item['GUID'] : ''; + + // Atom feeds have a CONTENT and/or SUMMARY tag instead of a DESCRIPTION tag. + if (!empty($item['CONTENT:ENCODED'])) { + $item['DESCRIPTION'] = $item['CONTENT:ENCODED']; + } + else if (!empty($item['SUMMARY'])) { + $item['DESCRIPTION'] = $item['SUMMARY']; + } + else if (!empty($item['CONTENT'])) { + $item['DESCRIPTION'] = $item['CONTENT']; + } + + // Try to resolve and parse the item's publication date. If no date is + // found, we use the current date instead. + $date = 'now'; + foreach (array('PUBDATE', 'DC:DATE', 'DCTERMS:ISSUED', 'DCTERMS:CREATED', 'DCTERMS:MODIFIED', 'ISSUED', 'CREATED', 'MODIFIED', 'PUBLISHED', 'UPDATED') as $key) { + if (!empty($item[$key])) { + $date = $item[$key]; + break; + } + } + + $timestamp = strtotime($date); // As of PHP 5.1.0, strtotime returns FALSE on failure instead of -1. + if ($timestamp <= 0) { + $timestamp = aggregator_parse_w3cdtf($date); // Returns FALSE on failure + if (!$timestamp) { + $timestamp = time(); // better than nothing + } + } + + // Save this item. Try to avoid duplicate entries as much as possible. If + // we find a duplicate entry, we resolve it and pass along its ID is such + // that we can update it if needed. + if (!empty($guid)) { + $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND guid = '%s'", $feed['fid'], $guid)); + } + else if ($link && $link != $feed['link'] && $link != $feed['url']) { + $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND link = '%s'", $feed['fid'], $link)); + } + else { + $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND title = '%s'", $feed['fid'], $title)); + } + $item += array('AUTHOR' => '', 'DESCRIPTION' => ''); + aggregator_save_item(array('iid' => (isset($entry->iid) ? $entry->iid: ''), 'fid' => $feed['fid'], 'timestamp' => $timestamp, 'title' => $title, 'link' => $link, 'author' => $item['AUTHOR'], 'description' => $item['DESCRIPTION'], 'guid' => $guid)); + } + + // Remove all items that are older than flush item timer. + $age = time() - variable_get('aggregator_clear', 9676800); + $result = db_query('SELECT iid FROM {aggregator_item} WHERE fid = %d AND timestamp < %d', $feed['fid'], $age); + + $items = array(); + $num_rows = FALSE; + while ($item = db_fetch_object($result)) { + $items[] = $item->iid; + $num_rows = TRUE; + } + if ($num_rows) { + db_query('DELETE FROM {aggregator_category_item} WHERE iid IN ('. implode(', ', $items) .')'); + db_query('DELETE FROM {aggregator_item} WHERE fid = %d AND timestamp < %d', $feed['fid'], $age); + } + + return 1; +} + +/** + * Add/edit/delete an aggregator item. + * + * @param $edit + * An associative array describing the item to be added/edited/deleted. + */ +function aggregator_save_item($edit) { + if ($edit['iid'] && $edit['title']) { + db_query("UPDATE {aggregator_item} SET title = '%s', link = '%s', author = '%s', description = '%s', guid = '%s', timestamp = %d WHERE iid = %d", $edit['title'], $edit['link'], $edit['author'], $edit['description'], $edit['guid'], $edit['timestamp'], $edit['iid']); + } + else if ($edit['iid']) { + db_query('DELETE FROM {aggregator_item} WHERE iid = %d', $edit['iid']); + db_query('DELETE FROM {aggregator_category_item} WHERE iid = %d', $edit['iid']); + } + else if ($edit['title'] && $edit['link']) { + db_query("INSERT INTO {aggregator_item} (fid, title, link, author, description, timestamp, guid) VALUES (%d, '%s', '%s', '%s', '%s', %d, '%s')", $edit['fid'], $edit['title'], $edit['link'], $edit['author'], $edit['description'], $edit['timestamp'], $edit['guid']); + $edit['iid'] = db_last_insert_id('aggregator_item', 'iid'); + // file the items in the categories indicated by the feed + $categories = db_query('SELECT cid FROM {aggregator_category_feed} WHERE fid = %d', $edit['fid']); + while ($category = db_fetch_object($categories)) { + db_query('INSERT INTO {aggregator_category_item} (cid, iid) VALUES (%d, %d)', $category->cid, $edit['iid']); + } + } +} + +/** + * Load an aggregator feed. + * + * @param $fid + * The feed id. + * @return + * An associative array describing the feed. + */ +function aggregator_feed_load($fid) { + static $feeds; + if (!isset($feeds[$fid])) { + $feeds[$fid] = db_fetch_array(db_query('SELECT * FROM {aggregator_feed} WHERE fid = %d', $fid)); + } + return $feeds[$fid]; +} + +/** + * Load an aggregator category. + * + * @param $cid + * The category id. + * @return + * An associative array describing the category. + */ +function aggregator_category_load($cid) { + static $categories; + if (!isset($categories[$cid])) { + $categories[$cid] = db_fetch_array(db_query('SELECT * FROM {aggregator_category} WHERE cid = %d', $cid)); + } + return $categories[$cid]; +} + +/** + * Format an individual feed item for display in the block. + * + * @param $item + * The item to be displayed. + * @param $feed + * Not used. + * @return + * The item HTML. + * @ingroup themeable + */ +function theme_aggregator_block_item($item, $feed = 0) { + global $user; + + $output = ''; + if ($user->uid && module_exists('blog') && user_access('create blog entries')) { + if ($image = theme('image', 'misc/blog.png', t('blog it'), t('blog it'))) { + $output .= '<div class="icon">'. l($image, 'node/add/blog', array('attributes' => array('title' => t('Comment on this news item in your personal blog.'), 'class' => 'blog-it'), 'query' => "iid=$item->iid", 'html' => TRUE)) .'</div>'; + } + } + + // Display the external link to the item. + $output .= '<a href="'. check_url($item->link) .'">'. check_plain($item->title) ."</a>\n"; + + return $output; +} + +/** + * Safely render HTML content, as allowed. + * + * @param $value + * The content to be filtered. + * @return + * The filtered content. + */ +function aggregator_filter_xss($value) { + return filter_xss($value, preg_split('/\s+|<|>/', variable_get('aggregator_allowed_html_tags', '<a> <b> <br> <dd> <dl> <dt> <em> <i> <li> <ol> <p> <strong> <u> <ul>'), -1, PREG_SPLIT_NO_EMPTY)); +} + +/** + * Helper function for drupal_map_assoc. + * + * @param $count + * Items count. + * @return + * Plural-formatted "@count items" + */ +function _aggregator_items($count) { + return format_plural($count, '1 item', '@count items'); +}