#!/usr/bin/env python # -*- coding: UTF-8 -*- """Planet aggregator library. This package is a library for developing web sites or software that aggregate RSS, CDF and Atom feeds taken from elsewhere into a single, combined feed. """ __version__ = "2.0" __authors__ = [ "Scott James Remnant ", "Jeff Waugh " ] __license__ = "Python" # Modules available without separate import import cache import feedparser import sanitize import htmltmpl import sgmllib try: import logging except: import compat_logging as logging # Limit the effect of "from planet import *" __all__ = ("cache", "feedparser", "htmltmpl", "logging", "Planet", "Channel", "NewsItem") import os import md5 import time import dbhash import re try: from xml.sax.saxutils import escape except: def escape(data): return data.replace("&","&").replace(">",">").replace("<","<") # Version information (for generator headers) VERSION = ("Planet/%s +http://www.planetplanet.org" % __version__) # Default User-Agent header to send when retreiving feeds USER_AGENT = VERSION + " " + feedparser.USER_AGENT # Default cache directory CACHE_DIRECTORY = "cache" # Default number of items to display from a new feed NEW_FEED_ITEMS = 10 # Useful common date/time formats TIMEFMT_ISO = "%Y-%m-%dT%H:%M:%S+00:00" TIMEFMT_822 = "%a, %d %b %Y %H:%M:%S +0000" # Log instance to use here log = logging.getLogger("planet") try: log.warning except: log.warning = log.warn # Defaults for the template file config sections ENCODING = "utf-8" ITEMS_PER_PAGE = 60 DAYS_PER_PAGE = 0 OUTPUT_DIR = "output" DATE_FORMAT = "%B %d, %Y %I:%M %p" NEW_DATE_FORMAT = "%B %d, %Y" ACTIVITY_THRESHOLD = 0 class stripHtml(sgmllib.SGMLParser): "remove all tags from the data" def __init__(self, data): sgmllib.SGMLParser.__init__(self) self.result='' self.feed(data) self.close() def handle_data(self, data): if data: self.result+=data def template_info(item, date_format): """Produce a dictionary of template information.""" info = {} for key in item.keys(): if item.key_type(key) == item.DATE: date = item.get_as_date(key) info[key] = time.strftime(date_format, date) info[key + "_iso"] = time.strftime(TIMEFMT_ISO, date) info[key + "_822"] = time.strftime(TIMEFMT_822, date) else: info[key] = item[key] if 'title' in item.keys(): info['title_plain'] = stripHtml(info['title']).result return info class Planet: """A set of channels. This class represents a set of channels for which the items will be aggregated together into one combined feed. Properties: user_agent User-Agent header to fetch feeds with. cache_directory Directory to store cached channels in. new_feed_items Number of items to display from a new feed. filter A regular expression that articles must match. exclude A regular expression that articles must not match. """ def __init__(self, config): self.config = config self._channels = [] self.user_agent = USER_AGENT self.cache_directory = CACHE_DIRECTORY self.new_feed_items = NEW_FEED_ITEMS self.filter = None self.exclude = None def tmpl_config_get(self, template, option, default=None, raw=0, vars=None): """Get a template value from the configuration, with a default.""" if self.config.has_option(template, option): return self.config.get(template, option, raw=raw, vars=None) elif self.config.has_option("Planet", option): return self.config.get("Planet", option, raw=raw, vars=None) else: return default def gather_channel_info(self, template_file="Planet"): date_format = self.tmpl_config_get(template_file, "date_format", DATE_FORMAT, raw=1) activity_threshold = int(self.tmpl_config_get(template_file, "activity_threshold", ACTIVITY_THRESHOLD)) if activity_threshold: activity_horizon = \ time.gmtime(time.time()-86400*activity_threshold) else: activity_horizon = 0 channels = {} channels_list = [] for channel in self.channels(hidden=1): channels[channel] = template_info(channel, date_format) channels_list.append(channels[channel]) # identify inactive feeds if activity_horizon: latest = channel.items(sorted=1) if len(latest)==0 or latest[0].date < activity_horizon: channels[channel]["message"] = \ "no activity in %d days" % activity_threshold # report channel level errors if not channel.url_status: continue status = int(channel.url_status) if status == 403: channels[channel]["message"] = "403: forbidden" elif status == 404: channels[channel]["message"] = "404: not found" elif status == 408: channels[channel]["message"] = "408: request timeout" elif status == 410: channels[channel]["message"] = "410: gone" elif status == 500: channels[channel]["message"] = "internal server error" elif status >= 400: channels[channel]["message"] = "http status %s" % status return channels, channels_list def gather_items_info(self, channels, template_file="Planet", channel_list=None): items_list = [] prev_date = [] prev_channel = None date_format = self.tmpl_config_get(template_file, "date_format", DATE_FORMAT, raw=1) items_per_page = int(self.tmpl_config_get(template_file, "items_per_page", ITEMS_PER_PAGE)) days_per_page = int(self.tmpl_config_get(template_file, "days_per_page", DAYS_PER_PAGE)) new_date_format = self.tmpl_config_get(template_file, "new_date_format", NEW_DATE_FORMAT, raw=1) for newsitem in self.items(max_items=items_per_page, max_days=days_per_page, channels=channel_list): item_info = template_info(newsitem, date_format) chan_info = channels[newsitem._channel] for k, v in chan_info.items(): item_info["channel_" + k] = v # Check for the start of a new day if prev_date[:3] != newsitem.date[:3]: prev_date = newsitem.date item_info["new_date"] = time.strftime(new_date_format, newsitem.date) # Check for the start of a new channel if item_info.has_key("new_date") \ or prev_channel != newsitem._channel: prev_channel = newsitem._channel item_info["new_channel"] = newsitem._channel.url items_list.append(item_info) return items_list def run(self, planet_name, planet_link, template_files, offline = False): log = logging.getLogger("planet.runner") # Create a planet log.info("Loading cached data") if self.config.has_option("Planet", "cache_directory"): self.cache_directory = self.config.get("Planet", "cache_directory") if self.config.has_option("Planet", "new_feed_items"): self.new_feed_items = int(self.config.get("Planet", "new_feed_items")) self.user_agent = "%s +%s %s" % (planet_name, planet_link, self.user_agent) if self.config.has_option("Planet", "filter"): self.filter = self.config.get("Planet", "filter") # The other configuration blocks are channels to subscribe to for feed_url in self.config.sections(): if feed_url == "Planet" or feed_url in template_files: continue # Create a channel, configure it and subscribe it channel = Channel(self, feed_url) self.subscribe(channel) # Update it try: if not offline and not channel.url_status == '410': channel.update() except KeyboardInterrupt: raise except: log.exception("Update of <%s> failed", feed_url) def generate_all_files(self, template_files, planet_name, planet_link, planet_feed, owner_name, owner_email): log = logging.getLogger("planet.runner") # Go-go-gadget-template for template_file in template_files: manager = htmltmpl.TemplateManager() log.info("Processing template %s", template_file) try: template = manager.prepare(template_file) except htmltmpl.TemplateError: template = manager.prepare(os.path.basename(template_file)) # Read the configuration output_dir = self.tmpl_config_get(template_file, "output_dir", OUTPUT_DIR) date_format = self.tmpl_config_get(template_file, "date_format", DATE_FORMAT, raw=1) encoding = self.tmpl_config_get(template_file, "encoding", ENCODING) # We treat each template individually base = os.path.splitext(os.path.basename(template_file))[0] url = os.path.join(planet_link, base) output_file = os.path.join(output_dir, base) # Gather information channels, channels_list = self.gather_channel_info(template_file) items_list = self.gather_items_info(channels, template_file) # Gather item information # Process the template tp = htmltmpl.TemplateProcessor(html_escape=0) tp.set("Items", items_list) tp.set("Channels", channels_list) # Generic information tp.set("generator", VERSION) tp.set("name", planet_name) tp.set("link", planet_link) tp.set("owner_name", owner_name) tp.set("owner_email", owner_email) tp.set("url", url) if planet_feed: tp.set("feed", planet_feed) tp.set("feedtype", planet_feed.find('rss')>=0 and 'rss' or 'atom') # Update time date = time.gmtime() tp.set("date", time.strftime(date_format, date)) tp.set("date_iso", time.strftime(TIMEFMT_ISO, date)) tp.set("date_822", time.strftime(TIMEFMT_822, date)) try: log.info("Writing %s", output_file) output_fd = open(output_file, "w") if encoding.lower() in ("utf-8", "utf8"): # UTF-8 output is the default because we use that internally output_fd.write(tp.process(template)) elif encoding.lower() in ("xml", "html", "sgml"): # Magic for Python 2.3 users output = tp.process(template).decode("utf-8") output_fd.write(output.encode("ascii", "xmlcharrefreplace")) else: # Must be a "known" encoding output = tp.process(template).decode("utf-8") output_fd.write(output.encode(encoding, "replace")) output_fd.close() except KeyboardInterrupt: raise except: log.exception("Write of %s failed", output_file) def channels(self, hidden=0, sorted=1): """Return the list of channels.""" channels = [] for channel in self._channels: if hidden or not channel.has_key("hidden"): channels.append((channel.name, channel)) if sorted: channels.sort() return [ c[-1] for c in channels ] def find_by_basename(self, basename): for channel in self._channels: if basename == channel.cache_basename(): return channel def subscribe(self, channel): """Subscribe the planet to the channel.""" self._channels.append(channel) def unsubscribe(self, channel): """Unsubscribe the planet from the channel.""" self._channels.remove(channel) def items(self, hidden=0, sorted=1, max_items=0, max_days=0, channels=None): """Return an optionally filtered list of items in the channel. The filters are applied in the following order: If hidden is true then items in hidden channels and hidden items will be returned. If sorted is true then the item list will be sorted with the newest first. If max_items is non-zero then this number of items, at most, will be returned. If max_days is non-zero then any items older than the newest by this number of days won't be returned. Requires sorted=1 to work. The sharp-eyed will note that this looks a little strange code-wise, it turns out that Python gets *really* slow if we try to sort the actual items themselves. Also we use mktime here, but it's ok because we discard the numbers and just need them to be relatively consistent between each other. """ planet_filter_re = None if self.filter: planet_filter_re = re.compile(self.filter, re.I) planet_exclude_re = None if self.exclude: planet_exclude_re = re.compile(self.exclude, re.I) items = [] seen_guids = {} if not channels: channels=self.channels(hidden=hidden, sorted=0) for channel in channels: for item in channel._items.values(): if hidden or not item.has_key("hidden"): channel_filter_re = None if channel.filter: channel_filter_re = re.compile(channel.filter, re.I) channel_exclude_re = None if channel.exclude: channel_exclude_re = re.compile(channel.exclude, re.I) if (planet_filter_re or planet_exclude_re \ or channel_filter_re or channel_exclude_re): title = "" if item.has_key("title"): title = item.title content = item.get_content("content") if planet_filter_re: if not (planet_filter_re.search(title) \ or planet_filter_re.search(content)): continue if planet_exclude_re: if (planet_exclude_re.search(title) \ or planet_exclude_re.search(content)): continue if channel_filter_re: if not (channel_filter_re.search(title) \ or channel_filter_re.search(content)): continue if channel_exclude_re: if (channel_exclude_re.search(title) \ or channel_exclude_re.search(content)): continue if not seen_guids.has_key(item.id): seen_guids[item.id] = 1; items.append((time.mktime(item.date), item.order, item)) # Sort the list if sorted: items.sort() items.reverse() # Apply max_items filter if len(items) and max_items: items = items[:max_items] # Apply max_days filter if len(items) and max_days: max_count = 0 max_time = items[0][0] - max_days * 84600 for item in items: if item[0] > max_time: max_count += 1 else: items = items[:max_count] break return [ i[-1] for i in items ] class Channel(cache.CachedInfo): """A list of news items. This class represents a list of news items taken from the feed of a website or other source. Properties: url URL of the feed. url_etag E-Tag of the feed URL. url_modified Last modified time of the feed URL. url_status Last HTTP status of the feed URL. hidden Channel should be hidden (True if exists). name Name of the feed owner, or feed title. next_order Next order number to be assigned to NewsItem updated Correct UTC-Normalised update time of the feed. last_updated Correct UTC-Normalised time the feed was last updated. id An identifier the feed claims is unique (*). title One-line title (*). link Link to the original format feed (*). tagline Short description of the feed (*). info Longer description of the feed (*). modified Date the feed claims to have been modified (*). author Name of the author (*). publisher Name of the publisher (*). generator Name of the feed generator (*). category Category name (*). copyright Copyright information for humans to read (*). license Link to the licence for the content (*). docs Link to the specification of the feed format (*). language Primary language (*). errorreportsto E-Mail address to send error reports to (*). image_url URL of an associated image (*). image_link Link to go with the associated image (*). image_title Alternative text of the associated image (*). image_width Width of the associated image (*). image_height Height of the associated image (*). filter A regular expression that articles must match. exclude A regular expression that articles must not match. Properties marked (*) will only be present if the original feed contained them. Note that the optional 'modified' date field is simply a claim made by the item and parsed from the information given, 'updated' (and 'last_updated') are far more reliable sources of information. Some feeds may define additional properties to those above. """ IGNORE_KEYS = ("links", "contributors", "textinput", "cloud", "categories", "url", "href", "url_etag", "url_modified", "tags", "itunes_explicit") def __init__(self, planet, url): if not os.path.isdir(planet.cache_directory): os.makedirs(planet.cache_directory) cache_filename = cache.filename(planet.cache_directory, url) cache_file = dbhash.open(cache_filename, "c", 0666) cache.CachedInfo.__init__(self, cache_file, url, root=1) self._items = {} self._planet = planet self._expired = [] self.url = url # retain the original URL for error reporting self.configured_url = url self.url_etag = None self.url_status = None self.url_modified = None self.name = None self.updated = None self.last_updated = None self.filter = None self.exclude = None self.next_order = "0" self.cache_read() self.cache_read_entries() if planet.config.has_section(url): for option in planet.config.options(url): value = planet.config.get(url, option) self.set_as_string(option, value, cached=0) def has_item(self, id_): """Check whether the item exists in the channel.""" return self._items.has_key(id_) def get_item(self, id_): """Return the item from the channel.""" return self._items[id_] # Special methods __contains__ = has_item def items(self, hidden=0, sorted=0): """Return the item list.""" items = [] for item in self._items.values(): if hidden or not item.has_key("hidden"): items.append((time.mktime(item.date), item.order, item)) if sorted: items.sort() items.reverse() return [ i[-1] for i in items ] def __iter__(self): """Iterate the sorted item list.""" return iter(self.items(sorted=1)) def cache_read_entries(self): """Read entry information from the cache.""" keys = self._cache.keys() for key in keys: if key.find(" ") != -1: continue if self.has_key(key): continue item = NewsItem(self, key) self._items[key] = item def cache_basename(self): return cache.filename('',self._id) def cache_write(self, sync=1): """Write channel and item information to the cache.""" for item in self._items.values(): item.cache_write(sync=0) for item in self._expired: item.cache_clear(sync=0) cache.CachedInfo.cache_write(self, sync) self._expired = [] def feed_information(self): """ Returns a description string for the feed embedded in this channel. This will usually simply be the feed url embedded in <>, but in the case where the current self.url has changed from the original self.configured_url the string will contain both pieces of information. This is so that the URL in question is easier to find in logging output: getting an error about a URL that doesn't appear in your config file is annoying. """ if self.url == self.configured_url: return "<%s>" % self.url else: return "<%s> (formerly <%s>)" % (self.url, self.configured_url) def update(self): """Download the feed to refresh the information. This does the actual work of pulling down the feed and if it changes updates the cached information about the feed and entries within it. """ info = feedparser.parse(self.url, etag=self.url_etag, modified=self.url_modified, agent=self._planet.user_agent) if info.has_key("status"): self.url_status = str(info.status) elif info.has_key("entries") and len(info.entries)>0: self.url_status = str(200) elif info.bozo and info.bozo_exception.__class__.__name__=='Timeout': self.url_status = str(408) else: self.url_status = str(500) if self.url_status == '301' and \ (info.has_key("entries") and len(info.entries)>0): log.warning("Feed has moved from <%s> to <%s>", self.url, info.url) try: os.link(cache.filename(self._planet.cache_directory, self.url), cache.filename(self._planet.cache_directory, info.url)) except: pass self.url = info.url elif self.url_status == '304': log.info("Feed %s unchanged", self.feed_information()) return elif self.url_status == '410': log.info("Feed %s gone", self.feed_information()) self.cache_write() return elif self.url_status == '408': log.warning("Feed %s timed out", self.feed_information()) return elif int(self.url_status) >= 400: log.error("Error %s while updating feed %s", self.url_status, self.feed_information()) return else: log.info("Updating feed %s", self.feed_information()) self.url_etag = info.has_key("etag") and info.etag or None self.url_modified = info.has_key("modified") and info.modified or None if self.url_etag is not None: log.debug("E-Tag: %s", self.url_etag) if self.url_modified is not None: log.debug("Last Modified: %s", time.strftime(TIMEFMT_ISO, self.url_modified)) self.update_info(info.feed) self.update_entries(info.entries) self.cache_write() def update_info(self, feed): """Update information from the feed. This reads the feed information supplied by feedparser and updates the cached information about the feed. These are the various potentially interesting properties that you might care about. """ for key in feed.keys(): if key in self.IGNORE_KEYS or key + "_parsed" in self.IGNORE_KEYS: # Ignored fields pass elif feed.has_key(key + "_parsed"): # Ignore unparsed date fields pass elif key.endswith("_detail"): # retain name and email sub-fields if feed[key].has_key('name') and feed[key].name: self.set_as_string(key.replace("_detail","_name"), \ feed[key].name) if feed[key].has_key('email') and feed[key].email: self.set_as_string(key.replace("_detail","_email"), \ feed[key].email) elif key == "items": # Ignore items field pass elif key.endswith("_parsed"): # Date fields if feed[key] is not None: self.set_as_date(key[:-len("_parsed")], feed[key]) elif key == "image": # Image field: save all the information if feed[key].has_key("url"): self.set_as_string(key + "_url", feed[key].url) if feed[key].has_key("link"): self.set_as_string(key + "_link", feed[key].link) if feed[key].has_key("title"): self.set_as_string(key + "_title", feed[key].title) if feed[key].has_key("width"): self.set_as_string(key + "_width", str(feed[key].width)) if feed[key].has_key("height"): self.set_as_string(key + "_height", str(feed[key].height)) elif isinstance(feed[key], (str, unicode)): # String fields try: detail = key + '_detail' if feed.has_key(detail) and feed[detail].has_key('type'): if feed[detail].type == 'text/html': feed[key] = sanitize.HTML(feed[key]) elif feed[detail].type == 'text/plain': feed[key] = escape(feed[key]) self.set_as_string(key, feed[key]) except KeyboardInterrupt: raise except: log.exception("Ignored '%s' of <%s>, unknown format", key, self.url) def update_entries(self, entries): """Update entries from the feed. This reads the entries supplied by feedparser and updates the cached information about them. It's at this point we update the 'updated' timestamp and keep the old one in 'last_updated', these provide boundaries for acceptable entry times. If this is the first time a feed has been updated then most of the items will be marked as hidden, according to Planet.new_feed_items. If the feed does not contain items which, according to the sort order, should be there; those items are assumed to have been expired from the feed or replaced and are removed from the cache. """ if not len(entries): return self.last_updated = self.updated self.updated = time.gmtime() new_items = [] feed_items = [] for entry in entries: # Try really hard to find some kind of unique identifier if entry.has_key("id"): entry_id = cache.utf8(entry.id) elif entry.has_key("link"): entry_id = cache.utf8(entry.link) elif entry.has_key("title"): entry_id = (self.url + "/" + md5.new(cache.utf8(entry.title)).hexdigest()) elif entry.has_key("summary"): entry_id = (self.url + "/" + md5.new(cache.utf8(entry.summary)).hexdigest()) else: log.error("Unable to find or generate id, entry ignored") continue # Create the item if necessary and update if self.has_item(entry_id): item = self._items[entry_id] else: item = NewsItem(self, entry_id) self._items[entry_id] = item new_items.append(item) item.update(entry) feed_items.append(entry_id) # Hide excess items the first time through if self.last_updated is None and self._planet.new_feed_items \ and len(feed_items) > self._planet.new_feed_items: item.hidden = "yes" log.debug("Marked <%s> as hidden (new feed)", entry_id) # Assign order numbers in reverse new_items.reverse() for item in new_items: item.order = self.next_order = str(int(self.next_order) + 1) # Check for expired or replaced items feed_count = len(feed_items) log.debug("Items in Feed: %d", feed_count) for item in self.items(sorted=1): if feed_count < 1: break elif item.id in feed_items: feed_count -= 1 elif item._channel.url_status != '226': del(self._items[item.id]) self._expired.append(item) log.debug("Removed expired or replaced item <%s>", item.id) def get_name(self, key): """Return the key containing the name.""" for key in ("name", "title"): if self.has_key(key) and self.key_type(key) != self.NULL: return self.get_as_string(key) return "" class NewsItem(cache.CachedInfo): """An item of news. This class represents a single item of news on a channel. They're created by members of the Channel class and accessible through it. Properties: id Channel-unique identifier for this item. id_hash Relatively short, printable cryptographic hash of id date Corrected UTC-Normalised update time, for sorting. order Order in which items on the same date can be sorted. hidden Item should be hidden (True if exists). title One-line title (*). link Link to the original format text (*). summary Short first-page summary (*). content Full HTML content. modified Date the item claims to have been modified (*). issued Date the item claims to have been issued (*). created Date the item claims to have been created (*). expired Date the item claims to expire (*). author Name of the author (*). publisher Name of the publisher (*). category Category name (*). comments Link to a page to enter comments (*). license Link to the licence for the content (*). source_name Name of the original source of this item (*). source_link Link to the original source of this item (*). Properties marked (*) will only be present if the original feed contained them. Note that the various optional date fields are simply claims made by the item and parsed from the information given, 'date' is a far more reliable source of information. Some feeds may define additional properties to those above. """ IGNORE_KEYS = ("categories", "contributors", "enclosures", "links", "guidislink", "date", "tags") def __init__(self, channel, id_): cache.CachedInfo.__init__(self, channel._cache, id_) self._channel = channel self.id = id_ self.id_hash = md5.new(id_).hexdigest() self.date = None self.order = None self.content = None self.cache_read() def update(self, entry): """Update the item from the feedparser entry given.""" for key in entry.keys(): if key in self.IGNORE_KEYS or key + "_parsed" in self.IGNORE_KEYS: # Ignored fields pass elif entry.has_key(key + "_parsed"): # Ignore unparsed date fields pass elif key.endswith("_detail"): # retain name, email, and language sub-fields if entry[key].has_key('name') and entry[key].name: self.set_as_string(key.replace("_detail","_name"), \ entry[key].name) if entry[key].has_key('email') and entry[key].email: self.set_as_string(key.replace("_detail","_email"), \ entry[key].email) if entry[key].has_key('language') and entry[key].language and \ (not self._channel.has_key('language') or \ entry[key].language != self._channel.language): self.set_as_string(key.replace("_detail","_language"), \ entry[key].language) elif key.endswith("_parsed"): # Date fields if entry[key] is not None: self.set_as_date(key[:-len("_parsed")], entry[key]) elif key == "source": # Source field: save both url and value if entry[key].has_key("value"): self.set_as_string(key + "_name", entry[key].value) if entry[key].has_key("url"): self.set_as_string(key + "_link", entry[key].url) elif key == "content": # Content field: concatenate the values value = "" for item in entry[key]: if item.type == 'text/html': item.value = sanitize.HTML(item.value) elif item.type == 'text/plain': item.value = escape(item.value) if item.has_key('language') and item.language and \ (not self._channel.has_key('language') or item.language != self._channel.language) : self.set_as_string(key + "_language", item.language) value += cache.utf8(item.value) self.set_as_string(key, value) elif isinstance(entry[key], (str, unicode)): # String fields try: detail = key + '_detail' if entry.has_key(detail): if entry[detail].has_key('type'): if entry[detail].type == 'text/html': entry[key] = sanitize.HTML(entry[key]) elif entry[detail].type == 'text/plain': entry[key] = escape(entry[key]) self.set_as_string(key, entry[key]) except KeyboardInterrupt: raise except: log.exception("Ignored '%s' of <%s>, unknown format", key, self.id) # Generate the date field if we need to self.get_date("date") def get_date(self, key): """Get (or update) the date key. We check whether the date the entry claims to have been changed is since we last updated this feed and when we pulled the feed off the site. If it is then it's probably not bogus, and we'll sort accordingly. If it isn't then we bound it appropriately, this ensures that entries appear in posting sequence but don't overlap entries added in previous updates and don't creep into the next one. """ for other_key in ("updated", "modified", "published", "issued", "created"): if self.has_key(other_key): date = self.get_as_date(other_key) break else: date = None if date is not None: if date > self._channel.updated: date = self._channel.updated # elif date < self._channel.last_updated: # date = self._channel.updated elif self.has_key(key) and self.key_type(key) != self.NULL: return self.get_as_date(key) else: date = self._channel.updated self.set_as_date(key, date) return date def get_content(self, key): """Return the key containing the content.""" for key in ("content", "tagline", "summary"): if self.has_key(key) and self.key_type(key) != self.NULL: return self.get_as_string(key) return ""