#!/usr/bin/env python # -*- coding: UTF-8 -*- """Item cache. Between runs of Planet we need somewhere to store the feed information we parsed, this is so we don't lose information when a particular feed goes away or is too short to hold enough items. This module provides the code to handle this cache transparently enough that the rest of the code can take the persistance for granted. """ import os import re # Regular expressions to sanitise cache filenames re_url_scheme = re.compile(r'^[^:]*://') re_slash = re.compile(r'[?/]+') re_initial_cruft = re.compile(r'^[,.]*') re_final_cruft = re.compile(r'[,.]*$') class CachedInfo: """Cached information. This class is designed to hold information that is stored in a cache between instances. It can act both as a dictionary (c['foo']) and as an object (c.foo) to get and set values and supports both string and date values. If you wish to support special fields you can derive a class off this and implement get_FIELD and set_FIELD functions which will be automatically called. """ STRING = "string" DATE = "date" NULL = "null" def __init__(self, cache, id_, root=0): self._type = {} self._value = {} self._cached = {} self._cache = cache self._id = id_.replace(" ", "%20") self._root = root def cache_key(self, key): """Return the cache key name for the given key.""" key = key.replace(" ", "_") if self._root: return key else: return self._id + " " + key def cache_read(self): """Read information from the cache.""" if self._root: keys_key = " keys" else: keys_key = self._id if self._cache.has_key(keys_key): keys = self._cache[keys_key].split(" ") else: return for key in keys: cache_key = self.cache_key(key) if not self._cached.has_key(key) or self._cached[key]: # Key either hasn't been loaded, or is one for the cache self._value[key] = self._cache[cache_key] self._type[key] = self._cache[cache_key + " type"] self._cached[key] = 1 def cache_write(self, sync=1): """Write information to the cache.""" self.cache_clear(sync=0) keys = [] for key in self.keys(): cache_key = self.cache_key(key) if not self._cached[key]: if self._cache.has_key(cache_key): # Non-cached keys need to be cleared del(self._cache[cache_key]) del(self._cache[cache_key + " type"]) continue keys.append(key) self._cache[cache_key] = self._value[key] self._cache[cache_key + " type"] = self._type[key] if self._root: keys_key = " keys" else: keys_key = self._id self._cache[keys_key] = " ".join(keys) if sync: self._cache.sync() def cache_clear(self, sync=1): """Remove information from the cache.""" if self._root: keys_key = " keys" else: keys_key = self._id if self._cache.has_key(keys_key): keys = self._cache[keys_key].split(" ") del(self._cache[keys_key]) else: return for key in keys: cache_key = self.cache_key(key) del(self._cache[cache_key]) del(self._cache[cache_key + " type"]) if sync: self._cache.sync() def has_key(self, key): """Check whether the key exists.""" key = key.replace(" ", "_") return self._value.has_key(key) def key_type(self, key): """Return the key type.""" key = key.replace(" ", "_") return self._type[key] def set(self, key, value, cached=1): """Set the value of the given key. If a set_KEY function exists that is called otherwise the string function is called and the date function if that fails (it nearly always will). """ key = key.replace(" ", "_") try: func = getattr(self, "set_" + key) except AttributeError: pass else: return func(key, value) if value == None: return self.set_as_null(key, value) else: try: return self.set_as_string(key, value) except TypeError: return self.set_as_date(key, value) def get(self, key): """Return the value of the given key. If a get_KEY function exists that is called otherwise the correctly typed function is called if that exists. """ key = key.replace(" ", "_") try: func = getattr(self, "get_" + key) except AttributeError: pass else: return func(key) try: func = getattr(self, "get_as_" + self._type[key]) except AttributeError: pass else: return func(key) return self._value[key] def set_as_string(self, key, value, cached=1): """Set the key to the string value. The value is converted to UTF-8 if it is a Unicode string, otherwise it's assumed to have failed decoding (feedparser tries pretty hard) so has all non-ASCII characters stripped. """ value = utf8(value) key = key.replace(" ", "_") self._value[key] = value self._type[key] = self.STRING self._cached[key] = cached def get_as_string(self, key): """Return the key as a string value.""" key = key.replace(" ", "_") if not self.has_key(key): raise KeyError, key return self._value[key] def set_as_date(self, key, value, cached=1): """Set the key to the date value. The date should be a 9-item tuple as returned by time.gmtime(). """ value = " ".join([ str(s) for s in value ]) key = key.replace(" ", "_") self._value[key] = value self._type[key] = self.DATE self._cached[key] = cached def get_as_date(self, key): """Return the key as a date value.""" key = key.replace(" ", "_") if not self.has_key(key): raise KeyError, key value = self._value[key] return tuple([ int(i) for i in value.split(" ") ]) def set_as_null(self, key, value, cached=1): """Set the key to the null value. This only exists to make things less magic. """ key = key.replace(" ", "_") self._value[key] = "" self._type[key] = self.NULL self._cached[key] = cached def get_as_null(self, key): """Return the key as the null value.""" key = key.replace(" ", "_") if not self.has_key(key): raise KeyError, key return None def del_key(self, key): """Delete the given key.""" key = key.replace(" ", "_") if not self.has_key(key): raise KeyError, key del(self._value[key]) del(self._type[key]) del(self._cached[key]) def keys(self): """Return the list of cached keys.""" return self._value.keys() def __iter__(self): """Iterate the cached keys.""" return iter(self._value.keys()) # Special methods __contains__ = has_key __setitem__ = set_as_string __getitem__ = get __delitem__ = del_key __delattr__ = del_key def __setattr__(self, key, value): if key.startswith("_"): self.__dict__[key] = value else: self.set(key, value) def __getattr__(self, key): if self.has_key(key): return self.get(key) else: raise AttributeError, key def filename(directory, filename): """Return a filename suitable for the cache. Strips dangerous and common characters to create a filename we can use to store the cache in. """ filename = re_url_scheme.sub("", filename) filename = re_slash.sub(",", filename) filename = re_initial_cruft.sub("", filename) filename = re_final_cruft.sub("", filename) return os.path.join(directory, filename) def utf8(value): """Return the value as a UTF-8 string.""" if type(value) == type(u''): return value.encode("utf-8") else: try: return unicode(value, "utf-8").encode("utf-8") except UnicodeError: try: return unicode(value, "iso-8859-1").encode("utf-8") except UnicodeError: return unicode(value, "ascii", "replace").encode("utf-8")