diff --git a/DJAGEN/branches/mustafa_branch/00_default_vhost.conf b/DJAGEN/branches/mustafa_branch/00_default_vhost.conf
new file mode 100755
index 0000000..ce876c4
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/00_default_vhost.conf
@@ -0,0 +1,126 @@
+### Section 3: Virtual Hosts
+#
+# VirtualHost: If you want to maintain multiple domains/hostnames on your
+# machine you can setup VirtualHost containers for them. Most configurations
+# use only name-based virtual hosts so the server doesn't need to worry about
+# IP addresses. This is indicated by the asterisks in the directives below.
+#
+# Please see the documentation at
+#
+# for further details before you try to setup virtual hosts.
+#
+# You may use the command line option '-S' to verify your virtual host
+# configuration.
+Listen 80
+#
+# Use name-based virtual hosting.
+#
+NameVirtualHost *:80
+
+#
+# VirtualHost example:
+# Almost any Apache directive may go into a VirtualHost container.
+# The first VirtualHost section is used for requests without a known
+# server name.
+#
+#
+# ServerAdmin webmaster@dummy-host.example.com
+# DocumentRoot /www/docs/dummy-host.example.com
+# ServerName dummy-host.example.com
+# ErrorLog @rel_logfiledir@/dummy-host.example.com-error_log
+# CustomLog @rel_logfiledir@/dummy-host.example.com-access_log common
+#
+
+#
+# The First Virtual Host is also your DEFAULT Virtual Host.
+# This means any requests that do not match any other vhosts will
+# goto this virtual host.
+#
+
+
+
+ #
+ # DocumentRoot: The directory out of which you will serve your
+ # documents. By default, all requests are taken from this directory, but
+ # symbolic links and aliases may be used to point to other locations.
+ #
+ DocumentRoot "/var/www/localhost/htdocs"
+
+ #
+ # This should be changed to whatever you set DocumentRoot to.
+ #
+
+
+ #
+ # Possible values for the Options directive are "None", "All",
+ # or any combination of:
+ # Indexes Includes FollowSymLinks SymLinksifOwnerMatch ExecCGI MultiViews
+ #
+ # Note that "MultiViews" must be named *explicitly* --- "Options All"
+ # doesn't give it to you.
+ #
+ # The Options directive is both complicated and important. Please see
+ # http://httpd.apache.org/docs-2.0/mod/core.html#options
+ # for more information.
+ #
+ Options Indexes FollowSymLinks
+
+ #
+ # AllowOverride controls what directives may be placed in .htaccess files.
+ # It can be "All", "None", or any combination of the keywords:
+ # Options FileInfo AuthConfig Limit
+ #
+ AllowOverride None
+
+ #
+ # Controls who can get stuff from this server.
+ #
+ Order allow,deny
+ Allow from all
+
+
+
+ # this must match a Processor
+ ServerEnvironment apache apache
+
+ # these are optional - defaults to the values specified in httpd.conf
+ MinSpareProcessors 4
+ MaxProcessors 20
+
+
+
+
+
+ ServerName /
+ ServerAlias */
+Alias /phpmyadmin/ /var/www/localhost/htdocs/phpmyadmin/
+
+order deny,allow
+Allow from all
+
+
+Alias /djagenmedia/ /var/www/localhost/htdocs/djagen/
+
+order deny,allow
+Allow from all
+
+
+Alias /admin_media/ /usr/lib/python2.5/site-packages/django/contrib/admin/media
+
+order deny,allow
+Allow from all
+
+
+WSGIScriptAlias / /home/cad/Workspace/djagen_ws/gezegen/branches/mustafa_branch/djagen/wsgi_handler.py
+WSGIDaemonProcess djagen user=cad group=root processes=1 threads=10
+WSGIProcessGroup djagen
+
+
+
+Order deny,allow
+Allow from all
+
+
+
+
+
diff --git a/DJAGEN/branches/mustafa_branch/djagen/__init__.py b/DJAGEN/branches/mustafa_branch/djagen/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/DJAGEN/branches/mustafa_branch/djagen/captcha/__init__.py b/DJAGEN/branches/mustafa_branch/djagen/captcha/__init__.py
new file mode 100755
index 0000000..ac47d9a
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/captcha/__init__.py
@@ -0,0 +1,12 @@
+VERSION = (0, 1, 7)
+
+def get_version(svn=False):
+ "Returns the version as a human-format string."
+ v = '.'.join([str(i) for i in VERSION])
+ if svn:
+ from django.utils.version import get_svn_revision
+ import os
+ svn_rev = get_svn_revision(os.path.dirname(__file__))
+ if svn_rev:
+ v = '%s-%s' % (v, svn_rev)
+ return v
diff --git a/DJAGEN/branches/mustafa_branch/djagen/captcha/conf/__init__.py b/DJAGEN/branches/mustafa_branch/djagen/captcha/conf/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/DJAGEN/branches/mustafa_branch/djagen/captcha/conf/settings.py b/DJAGEN/branches/mustafa_branch/djagen/captcha/conf/settings.py
new file mode 100755
index 0000000..ddfe82f
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/captcha/conf/settings.py
@@ -0,0 +1,49 @@
+import os
+from django.conf import settings
+
+CAPTCHA_FONT_PATH = getattr(settings,'CAPTCHA_FONT_PATH', os.path.normpath(os.path.join(os.path.dirname(__file__), '..', 'fonts/Vera.ttf')))
+CAPTCHA_FONT_SIZE = getattr(settings,'CAPTCHA_FONT_SIZE', 22)
+CAPTCHA_LETTER_ROTATION = getattr(settings, 'CAPTCHA_LETTER_ROTATION', (-35,35))
+CAPTCHA_BACKGROUND_COLOR = getattr(settings,'CAPTCHA_BACKGROUND_COLOR', '#ffffff')
+CAPTCHA_FOREGROUND_COLOR= getattr(settings,'CAPTCHA_FOREGROUND_COLOR', '#001100')
+CAPTCHA_CHALLENGE_FUNCT = getattr(settings,'CAPTCHA_CHALLENGE_FUNCT','captcha.helpers.random_char_challenge')
+CAPTCHA_NOISE_FUNCTIONS = getattr(settings,'CAPTCHA_NOISE_FUNCTIONS', ('captcha.helpers.noise_arcs','captcha.helpers.noise_dots',))
+CAPTCHA_FILTER_FUNCTIONS = getattr(settings,'CAPTCHA_FILTER_FUNCTIONS',('captcha.helpers.post_smooth',))
+CAPTCHA_WORDS_DICTIONARY = getattr(settings,'CAPTCHA_WORDS_DICTIONARY', '/usr/share/dict/words')
+CAPTCHA_FLITE_PATH = getattr(settings,'CAPTCHA_FLITE_PATH',None)
+CAPTCHA_TIMEOUT = getattr(settings, 'CAPTCHA_TIMEOUT', 5) # Minutes
+CAPTCHA_LENGTH = int(getattr(settings, 'CAPTCHA_LENGTH', 4)) # Chars
+CAPTCHA_IMAGE_BEFORE_FIELD = getattr(settings,'CAPTCHA_IMAGE_BEFORE_FIELD', True)
+CAPTCHA_DICTIONARY_MIN_LENGTH = getattr(settings,'CAPTCHA_DICTIONARY_MIN_LENGTH', 0)
+CAPTCHA_DICTIONARY_MAX_LENGTH = getattr(settings,'CAPTCHA_DICTIONARY_MAX_LENGTH', 99)
+if CAPTCHA_IMAGE_BEFORE_FIELD:
+ CAPTCHA_OUTPUT_FORMAT = getattr(settings,'CAPTCHA_OUTPUT_FORMAT', u'%(image)s %(hidden_field)s %(text_field)s')
+else:
+ CAPTCHA_OUTPUT_FORMAT = getattr(settings,'CAPTCHA_OUTPUT_FORMAT', u'%(hidden_field)s %(text_field)s %(image)s')
+
+
+# Failsafe
+if CAPTCHA_DICTIONARY_MIN_LENGTH > CAPTCHA_DICTIONARY_MAX_LENGTH:
+ CAPTCHA_DICTIONARY_MIN_LENGTH, CAPTCHA_DICTIONARY_MAX_LENGTH = CAPTCHA_DICTIONARY_MAX_LENGTH, CAPTCHA_DICTIONARY_MIN_LENGTH
+
+
+def _callable_from_string(string_or_callable):
+ if callable(string_or_callable):
+ return string_or_callable
+ else:
+ return getattr(__import__( '.'.join(string_or_callable.split('.')[:-1]), {}, {}, ['']), string_or_callable.split('.')[-1])
+
+def get_challenge():
+ return _callable_from_string(CAPTCHA_CHALLENGE_FUNCT)
+
+
+def noise_functions():
+ if CAPTCHA_NOISE_FUNCTIONS:
+ return map(_callable_from_string, CAPTCHA_NOISE_FUNCTIONS)
+ return list()
+
+def filter_functions():
+ if CAPTCHA_FILTER_FUNCTIONS:
+ return map(_callable_from_string, CAPTCHA_FILTER_FUNCTIONS)
+ return list()
+
diff --git a/DJAGEN/branches/mustafa_branch/djagen/captcha/fields.py b/DJAGEN/branches/mustafa_branch/djagen/captcha/fields.py
new file mode 100755
index 0000000..7df0f03
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/captcha/fields.py
@@ -0,0 +1,81 @@
+from django.forms.fields import CharField, MultiValueField
+from django.forms import ValidationError
+from django.forms.widgets import TextInput, MultiWidget, HiddenInput
+from django.utils.safestring import mark_safe
+from django.utils.translation import ugettext_lazy as _
+from django.core.urlresolvers import reverse
+from captcha.models import CaptchaStore
+from captcha.conf import settings
+from captcha.helpers import *
+import datetime
+
+class CaptchaTextInput(MultiWidget):
+ def __init__(self,attrs=None):
+ widgets = (
+ HiddenInput(attrs),
+ TextInput(attrs),
+ )
+
+ for key in ('image','hidden_field','text_field'):
+ if '%%(%s)s'%key not in settings.CAPTCHA_OUTPUT_FORMAT:
+ raise KeyError('All of %s must be present in your CAPTCHA_OUTPUT_FORMAT setting. Could not find %s' %(
+ ', '.join(['%%(%s)s'%k for k in ('image','hidden_field','text_field')]),
+ '%%(%s)s'%key
+ ))
+
+ super(CaptchaTextInput,self).__init__(widgets,attrs)
+
+ def decompress(self,value):
+ if value:
+ return value.split(',')
+ return [None,None]
+
+ def format_output(self, rendered_widgets):
+ hidden_field, text_field = rendered_widgets
+ return settings.CAPTCHA_OUTPUT_FORMAT %dict(image=self.image_and_audio, hidden_field=hidden_field, text_field=text_field)
+
+ def render(self, name, value, attrs=None):
+ challenge,response= settings.get_challenge()()
+
+ store = CaptchaStore.objects.create(challenge=challenge,response=response)
+ key = store.hashkey
+ value = [key, u'']
+
+ self.image_and_audio = '' %reverse('captcha-image',kwargs=dict(key=key))
+ if settings.CAPTCHA_FLITE_PATH:
+ self.image_and_audio = '%s' %( reverse('captcha-audio', kwargs=dict(key=key)), unicode(_('Play captcha as audio file')), self.image_and_audio)
+ #fields = super(CaptchaTextInput, self).render(name, value, attrs=attrs)
+
+ return super(CaptchaTextInput, self).render(name, value, attrs=attrs)
+
+class CaptchaField(MultiValueField):
+ widget=CaptchaTextInput
+
+ def __init__(self, *args,**kwargs):
+ fields = (
+ CharField(show_hidden_initial=True),
+ CharField(),
+ )
+ if 'error_messages' not in kwargs or 'invalid' not in kwargs.get('error_messages'):
+ if 'error_messages' not in kwargs:
+ kwargs['error_messages'] = dict()
+ kwargs['error_messages'].update(dict(invalid=_('Invalid CAPTCHA')))
+
+
+ super(CaptchaField,self).__init__(fields=fields, *args, **kwargs)
+
+ def compress(self,data_list):
+ if data_list:
+ return ','.join(data_list)
+ return None
+
+ def clean(self, value):
+ super(CaptchaField, self).clean(value)
+ response, value[1] = value[1].strip().lower(), ''
+ CaptchaStore.remove_expired()
+ try:
+ store = CaptchaStore.objects.get(response=response, hashkey=value[0], expiration__gt=datetime.datetime.now())
+ store.delete()
+ except Exception:
+ raise ValidationError(getattr(self,'error_messages',dict()).get('invalid', _('Invalid CAPTCHA')))
+ return value
diff --git a/DJAGEN/branches/mustafa_branch/djagen/captcha/fonts/COPYRIGHT.TXT b/DJAGEN/branches/mustafa_branch/djagen/captcha/fonts/COPYRIGHT.TXT
new file mode 100755
index 0000000..e651be1
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/captcha/fonts/COPYRIGHT.TXT
@@ -0,0 +1,124 @@
+Bitstream Vera Fonts Copyright
+
+The fonts have a generous copyright, allowing derivative works (as
+long as "Bitstream" or "Vera" are not in the names), and full
+redistribution (so long as they are not *sold* by themselves). They
+can be be bundled, redistributed and sold with any software.
+
+The fonts are distributed under the following copyright:
+
+Copyright
+=========
+
+Copyright (c) 2003 by Bitstream, Inc. All Rights Reserved. Bitstream
+Vera is a trademark of Bitstream, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the fonts accompanying this license ("Fonts") and associated
+documentation files (the "Font Software"), to reproduce and distribute
+the Font Software, including without limitation the rights to use,
+copy, merge, publish, distribute, and/or sell copies of the Font
+Software, and to permit persons to whom the Font Software is furnished
+to do so, subject to the following conditions:
+
+The above copyright and trademark notices and this permission notice
+shall be included in all copies of one or more of the Font Software
+typefaces.
+
+The Font Software may be modified, altered, or added to, and in
+particular the designs of glyphs or characters in the Fonts may be
+modified and additional glyphs or characters may be added to the
+Fonts, only if the fonts are renamed to names not containing either
+the words "Bitstream" or the word "Vera".
+
+This License becomes null and void to the extent applicable to Fonts
+or Font Software that has been modified and is distributed under the
+"Bitstream Vera" names.
+
+The Font Software may be sold as part of a larger software package but
+no copy of one or more of the Font Software typefaces may be sold by
+itself.
+
+THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL
+BITSTREAM OR THE GNOME FOUNDATION BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL,
+OR CONSEQUENTIAL DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF THE USE OR INABILITY TO USE THE FONT
+SOFTWARE OR FROM OTHER DEALINGS IN THE FONT SOFTWARE.
+
+Except as contained in this notice, the names of Gnome, the Gnome
+Foundation, and Bitstream Inc., shall not be used in advertising or
+otherwise to promote the sale, use or other dealings in this Font
+Software without prior written authorization from the Gnome Foundation
+or Bitstream Inc., respectively. For further information, contact:
+fonts at gnome dot org.
+
+Copyright FAQ
+=============
+
+ 1. I don't understand the resale restriction... What gives?
+
+ Bitstream is giving away these fonts, but wishes to ensure its
+ competitors can't just drop the fonts as is into a font sale system
+ and sell them as is. It seems fair that if Bitstream can't make money
+ from the Bitstream Vera fonts, their competitors should not be able to
+ do so either. You can sell the fonts as part of any software package,
+ however.
+
+ 2. I want to package these fonts separately for distribution and
+ sale as part of a larger software package or system. Can I do so?
+
+ Yes. A RPM or Debian package is a "larger software package" to begin
+ with, and you aren't selling them independently by themselves.
+ See 1. above.
+
+ 3. Are derivative works allowed?
+ Yes!
+
+ 4. Can I change or add to the font(s)?
+ Yes, but you must change the name(s) of the font(s).
+
+ 5. Under what terms are derivative works allowed?
+
+ You must change the name(s) of the fonts. This is to ensure the
+ quality of the fonts, both to protect Bitstream and Gnome. We want to
+ ensure that if an application has opened a font specifically of these
+ names, it gets what it expects (though of course, using fontconfig,
+ substitutions could still could have occurred during font
+ opening). You must include the Bitstream copyright. Additional
+ copyrights can be added, as per copyright law. Happy Font Hacking!
+
+ 6. If I have improvements for Bitstream Vera, is it possible they might get
+ adopted in future versions?
+
+ Yes. The contract between the Gnome Foundation and Bitstream has
+ provisions for working with Bitstream to ensure quality additions to
+ the Bitstream Vera font family. Please contact us if you have such
+ additions. Note, that in general, we will want such additions for the
+ entire family, not just a single font, and that you'll have to keep
+ both Gnome and Jim Lyles, Vera's designer, happy! To make sense to add
+ glyphs to the font, they must be stylistically in keeping with Vera's
+ design. Vera cannot become a "ransom note" font. Jim Lyles will be
+ providing a document describing the design elements used in Vera, as a
+ guide and aid for people interested in contributing to Vera.
+
+ 7. I want to sell a software package that uses these fonts: Can I do so?
+
+ Sure. Bundle the fonts with your software and sell your software
+ with the fonts. That is the intent of the copyright.
+
+ 8. If applications have built the names "Bitstream Vera" into them,
+ can I override this somehow to use fonts of my choosing?
+
+ This depends on exact details of the software. Most open source
+ systems and software (e.g., Gnome, KDE, etc.) are now converting to
+ use fontconfig (see www.fontconfig.org) to handle font configuration,
+ selection and substitution; it has provisions for overriding font
+ names and subsituting alternatives. An example is provided by the
+ supplied local.conf file, which chooses the family Bitstream Vera for
+ "sans", "serif" and "monospace". Other software (e.g., the XFree86
+ core server) has other mechanisms for font substitution.
+
diff --git a/DJAGEN/branches/mustafa_branch/djagen/captcha/fonts/README.TXT b/DJAGEN/branches/mustafa_branch/djagen/captcha/fonts/README.TXT
new file mode 100755
index 0000000..0f71795
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/captcha/fonts/README.TXT
@@ -0,0 +1,11 @@
+Contained herin is the Bitstream Vera font family.
+
+The Copyright information is found in the COPYRIGHT.TXT file (along
+with being incoporated into the fonts themselves).
+
+The releases notes are found in the file "RELEASENOTES.TXT".
+
+We hope you enjoy Vera!
+
+ Bitstream, Inc.
+ The Gnome Project
diff --git a/DJAGEN/branches/mustafa_branch/djagen/captcha/fonts/Vera.ttf b/DJAGEN/branches/mustafa_branch/djagen/captcha/fonts/Vera.ttf
new file mode 100755
index 0000000..58cd6b5
Binary files /dev/null and b/DJAGEN/branches/mustafa_branch/djagen/captcha/fonts/Vera.ttf differ
diff --git a/DJAGEN/branches/mustafa_branch/djagen/captcha/helpers.py b/DJAGEN/branches/mustafa_branch/djagen/captcha/helpers.py
new file mode 100755
index 0000000..b400700
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/captcha/helpers.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+import random
+from captcha.conf import settings
+
+def math_challenge():
+ operators = ('+','*','-',)
+ operands = (random.randint(1,10),random.randint(1,10))
+ operator = random.choice(operators)
+ if operands[0] < operands[1] and '-' == operator:
+ operands = (operands[1],operands[0])
+ challenge = '%d%s%d' %(operands[0],operator,operands[1])
+ return u'%s=' %(challenge), unicode(eval(challenge))
+
+def random_char_challenge():
+ chars,ret = u'abcdefghijklmnopqrstuvwxyz', u''
+ for i in range(settings.CAPTCHA_LENGTH):
+ ret += random.choice(chars)
+ return ret.upper(),ret
+
+def unicode_challenge():
+ chars,ret = u'äàáëéèïíîöóòüúù', u''
+ for i in range(settings.CAPTCHA_LENGTH):
+ ret += random.choice(chars)
+ return ret.upper(), ret
+
+def word_challenge():
+ fd = file(settings.CAPTCHA_WORDS_DICTIONARY,'rb')
+ l = fd.readlines()
+ fd.close()
+ while True:
+ word = random.choice(l).strip()
+ if len(word) >= settings.CAPTCHA_DICTIONARY_MIN_LENGTH and len(word) <= settings.CAPTCHA_DICTIONARY_MAX_LENGTH:
+ break
+ return word.upper(), word.lower()
+
+def noise_arcs(draw,image):
+ size = image.size
+ draw.arc([-20,-20, size[0],20], 0, 295, fill=settings.CAPTCHA_FOREGROUND_COLOR)
+ draw.line([-20,20, size[0]+20,size[1]-20], fill=settings.CAPTCHA_FOREGROUND_COLOR)
+ draw.line([-20,0, size[0]+20,size[1]], fill=settings.CAPTCHA_FOREGROUND_COLOR)
+ return draw
+
+def noise_dots(draw,image):
+ size = image.size
+ for p in range(int(size[0]*size[1]*0.1)):
+ draw.point((random.randint(0, size[0]),random.randint(0, size[1])), fill=settings.CAPTCHA_FOREGROUND_COLOR )
+ return draw
+
+def post_smooth(image):
+ import ImageFilter
+ return image.filter(ImageFilter.SMOOTH)
diff --git a/DJAGEN/branches/mustafa_branch/djagen/captcha/management/__init__.py b/DJAGEN/branches/mustafa_branch/djagen/captcha/management/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/DJAGEN/branches/mustafa_branch/djagen/captcha/management/commands/__init__.py b/DJAGEN/branches/mustafa_branch/djagen/captcha/management/commands/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/DJAGEN/branches/mustafa_branch/djagen/captcha/management/commands/captcha_clean.py b/DJAGEN/branches/mustafa_branch/djagen/captcha/management/commands/captcha_clean.py
new file mode 100755
index 0000000..9a66e48
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/captcha/management/commands/captcha_clean.py
@@ -0,0 +1,28 @@
+from django.core.management.base import BaseCommand, CommandError
+import sys
+
+from optparse import make_option
+
+class Command(BaseCommand):
+ help = "Clean up expired captcha hashkeys."
+
+ def handle(self, **options):
+ from captcha.models import CaptchaStore
+ import datetime
+ verbose = int(options.get('verbosity'))
+ expired_keys = CaptchaStore.objects.filter(expiration__lte=datetime.datetime.now()).count()
+ if verbose >= 1:
+ print "Currently %s expired hashkeys" % expired_keys
+ try:
+ CaptchaStore.remove_expired()
+ except:
+ if verbose >= 1 :
+ print "Unable to delete expired hashkeys."
+ sys.exit(1)
+ if verbose >= 1:
+ if expired_keys > 0:
+ print "Expired hashkeys removed."
+ else:
+ print "No keys to remove."
+
+
diff --git a/DJAGEN/branches/mustafa_branch/djagen/captcha/models.py b/DJAGEN/branches/mustafa_branch/djagen/captcha/models.py
new file mode 100755
index 0000000..fc8c599
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/captcha/models.py
@@ -0,0 +1,46 @@
+from django.db import models
+from captcha.conf import settings as captcha_settings
+import datetime, unicodedata, random, time
+
+# Heavily based on session key generation in Django
+# Use the system (hardware-based) random number generator if it exists.
+if hasattr(random, 'SystemRandom'):
+ randrange = random.SystemRandom().randrange
+else:
+ randrange = random.randrange
+MAX_RANDOM_KEY = 18446744073709551616L # 2 << 63
+
+
+try:
+ import hashlib # sha for Python 2.5+
+except ImportError:
+ import sha # sha for Python 2.4 (deprecated in Python 2.6)
+ hashlib = False
+
+class CaptchaStore(models.Model):
+ challenge = models.CharField(blank=False, max_length=32)
+ response = models.CharField(blank=False, max_length=32)
+ hashkey = models.CharField(blank=False, max_length=40, unique=True)
+ expiration = models.DateTimeField(blank=False)
+
+ def save(self,*args,**kwargs):
+ self.response = self.response.lower()
+ if not self.expiration:
+ self.expiration = datetime.datetime.now() + datetime.timedelta(minutes= int(captcha_settings.CAPTCHA_TIMEOUT))
+ if not self.hashkey:
+ key_ = unicodedata.normalize('NFKD', str(randrange(0,MAX_RANDOM_KEY)) + str(time.time()) + unicode(self.challenge)).encode('ascii', 'ignore') + unicodedata.normalize('NFKD', unicode(self.response)).encode('ascii', 'ignore')
+ if hashlib:
+ self.hashkey = hashlib.new('sha', key_).hexdigest()
+ else:
+ self.hashkey = sha.new(key_).hexdigest()
+ del(key_)
+ super(CaptchaStore,self).save(*args,**kwargs)
+
+ def __unicode__(self):
+ return self.challenge
+
+
+ def remove_expired(cls):
+ cls.objects.filter(expiration__lte=datetime.datetime.now()).delete()
+ remove_expired = classmethod(remove_expired)
+
diff --git a/DJAGEN/branches/mustafa_branch/djagen/captcha/tests/__init__.py b/DJAGEN/branches/mustafa_branch/djagen/captcha/tests/__init__.py
new file mode 100755
index 0000000..ded5948
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/captcha/tests/__init__.py
@@ -0,0 +1,153 @@
+# -*- coding: utf-8 -*-
+from captcha.conf import settings
+from captcha.models import CaptchaStore
+from django.core.urlresolvers import reverse
+from django.test import TestCase
+from django.utils.translation import ugettext_lazy as _
+import datetime
+
+
+class CaptchaCase(TestCase):
+ urls = 'captcha.tests.urls'
+
+ def setUp(self):
+ self.default_challenge = settings.get_challenge()()
+ self.math_challenge = settings._callable_from_string('captcha.helpers.math_challenge')()
+ self.chars_challenge = settings._callable_from_string('captcha.helpers.random_char_challenge')()
+ self.unicode_challenge = settings._callable_from_string('captcha.helpers.unicode_challenge')()
+
+ self.default_store, created = CaptchaStore.objects.get_or_create(challenge=self.default_challenge[0],response=self.default_challenge[1])
+ self.math_store, created = CaptchaStore.objects.get_or_create(challenge=self.math_challenge[0],response=self.math_challenge[1])
+ self.chars_store, created = CaptchaStore.objects.get_or_create(challenge=self.chars_challenge[0],response=self.chars_challenge[1])
+ self.unicode_store, created = CaptchaStore.objects.get_or_create(challenge=self.unicode_challenge[0],response=self.unicode_challenge[1])
+
+
+
+
+ def testImages(self):
+ for key in (self.math_store.hashkey, self.chars_store.hashkey, self.default_store.hashkey, self.unicode_store.hashkey):
+ response = self.client.get(reverse('captcha-image',kwargs=dict(key=key)))
+ self.failUnlessEqual(response.status_code, 200)
+ self.assertTrue(response.has_header('content-type'))
+ self.assertEquals(response._headers.get('content-type'), ('Content-Type', 'image/png'))
+
+ def testAudio(self):
+ if not settings.CAPTCHA_FLITE_PATH:
+ return
+ for key in (self.math_store.hashkey, self.chars_store.hashkey, self.default_store.hashkey, self.unicode_store.hashkey):
+ response = self.client.get(reverse('captcha-audio',kwargs=dict(key=key)))
+ self.failUnlessEqual(response.status_code, 200)
+ self.assertTrue(len(response.content) > 1024)
+ self.assertTrue(response.has_header('content-type'))
+ self.assertEquals(response._headers.get('content-type'), ('Content-Type', 'audio/x-wav'))
+
+ def testFormSubmit(self):
+ r = self.client.get(reverse('captcha-test'))
+ self.failUnlessEqual(r.status_code, 200)
+ hash_ = r.content[r.content.find('value="')+7:r.content.find('value="')+47]
+ try:
+ response = CaptchaStore.objects.get(hashkey=hash_).response
+ except:
+ self.fail()
+
+ r = self.client.post(reverse('captcha-test'), dict(captcha_0=hash_,captcha_1=response, subject='xxx', sender='asasd@asdasd.com'))
+ self.failUnlessEqual(r.status_code, 200)
+ self.assertTrue(r.content.find('Form validated') > 0)
+
+ r = self.client.post(reverse('captcha-test'), dict(captcha_0=hash_,captcha_1=response, subject='xxx', sender='asasd@asdasd.com'))
+ self.failUnlessEqual(r.status_code, 200)
+ self.assertFalse(r.content.find('Form validated') > 0)
+
+
+
+ def testWrongSubmit(self):
+ r = self.client.get(reverse('captcha-test'))
+ self.failUnlessEqual(r.status_code, 200)
+ r = self.client.post(reverse('captcha-test'), dict(captcha_0='abc',captcha_1='wrong response', subject='xxx', sender='asasd@asdasd.com'))
+ self.assertFormError(r,'form','captcha',_('Invalid CAPTCHA'))
+
+ def testDeleteExpired(self):
+ self.default_store.expiration = datetime.datetime.now() - datetime.timedelta(minutes=5)
+ self.default_store.save()
+ hash_ = self.default_store.hashkey
+ r = self.client.post(reverse('captcha-test'), dict(captcha_0=hash_,captcha_1=self.default_store.response, subject='xxx', sender='asasd@asdasd.com'))
+
+ self.failUnlessEqual(r.status_code, 200)
+ self.assertFalse(r.content.find('Form validated') > 0)
+
+ # expired -> deleted
+ try:
+ CaptchaStore.objects.get(hashkey=hash_)
+ self.fail()
+ except:
+ pass
+
+ def testCustomErrorMessage(self):
+ r = self.client.get(reverse('captcha-test-custom-error-message'))
+ self.failUnlessEqual(r.status_code, 200)
+
+ # Wrong answer
+ r = self.client.post(reverse('captcha-test-custom-error-message'), dict(captcha_0='abc',captcha_1='wrong response'))
+ self.assertFormError(r,'form','captcha','TEST CUSTOM ERROR MESSAGE')
+ # empty answer
+ r = self.client.post(reverse('captcha-test-custom-error-message'), dict(captcha_0='abc',captcha_1=''))
+ self.assertFormError(r,'form','captcha',_('This field is required.'))
+
+ def testRepeatedChallenge(self):
+ store = CaptchaStore.objects.create(challenge='xxx',response='xxx')
+ try:
+ store2 = CaptchaStore.objects.create(challenge='xxx',response='xxx')
+ except Exception:
+ self.fail()
+
+
+ def testRepeatedChallengeFormSubmit(self):
+ settings.CAPTCHA_CHALLENGE_FUNCT = 'captcha.tests.trivial_challenge'
+
+ r1 = self.client.get(reverse('captcha-test'))
+ r2 = self.client.get(reverse('captcha-test'))
+ self.failUnlessEqual(r1.status_code, 200)
+ self.failUnlessEqual(r2.status_code, 200)
+ hash_1 = r1.content[r1.content.find('value="')+7:r1.content.find('value="')+47]
+ hash_2 = r2.content[r2.content.find('value="')+7:r2.content.find('value="')+47]
+ try:
+ store_1 = CaptchaStore.objects.get(hashkey=hash_1)
+ store_2 = CaptchaStore.objects.get(hashkey=hash_2)
+ except:
+ self.fail()
+
+ self.assertTrue(store_1.pk != store_2.pk)
+ self.assertTrue(store_1.response == store_2.response)
+ self.assertTrue(hash_1 != hash_2)
+
+
+
+ r1 = self.client.post(reverse('captcha-test'), dict(captcha_0=hash_1,captcha_1=store_1.response, subject='xxx', sender='asasd@asdasd.com'))
+ self.failUnlessEqual(r1.status_code, 200)
+ self.assertTrue(r1.content.find('Form validated') > 0)
+
+ try:
+ store_2 = CaptchaStore.objects.get(hashkey=hash_2)
+ except:
+ self.fail()
+
+ r2 = self.client.post(reverse('captcha-test'), dict(captcha_0=hash_2,captcha_1=store_2.response, subject='xxx', sender='asasd@asdasd.com'))
+ self.failUnlessEqual(r2.status_code, 200)
+ self.assertTrue(r2.content.find('Form validated') > 0)
+
+ def testOutputFormat(self):
+ settings.CAPTCHA_OUTPUT_FORMAT = u'%(image)s
Hello, captcha world
%(hidden_field)s%(text_field)s'
+ r = self.client.get(reverse('captcha-test'))
+ self.failUnlessEqual(r.status_code, 200)
+ self.assertTrue('
+ Linux Gezegeni, Türkiye'de Linux ve Özgür Yazılım konusunda çalışmalar yapan arkadaşlarımızın internet üzerindeki günlüklerini bir tek sayfadan okumamızı ve kendi dünyalarına ulaşmamızı sağlayan basit bir web sitesidir.
+
+ Linux Gezegeni Gezegen Ekibi tarafından yönetilmektedir, Gezegen hakkındaki sorularınızı ve Gezegen'e iniş başvurularınızı e-posta ile iletebilirsiniz.
+
+
+ Gezegene iniş başvurularınızda Gezegen Kuralları'na uyan RSS/Atom beslemenizi ve gezegen içerisinde kullanmak istediğiniz (en fazla 80x80 çözünürlüklü) resminizi (bir başka deyişle hackergotchi); varsa jabber adresinizle birlikte e-posta yoluyla göndermenizi rica ediyoruz.
+
+
+
+
+
+
diff --git a/DJAGEN/branches/mustafa_branch/djagen/gezegen/gezegen/simple.html.tmplc b/DJAGEN/branches/mustafa_branch/djagen/gezegen/gezegen/simple.html.tmplc
new file mode 100755
index 0000000..d466e42
Binary files /dev/null and b/DJAGEN/branches/mustafa_branch/djagen/gezegen/gezegen/simple.html.tmplc differ
diff --git a/DJAGEN/branches/mustafa_branch/djagen/gezegen/gezegen/zaman.sh b/DJAGEN/branches/mustafa_branch/djagen/gezegen/gezegen/zaman.sh
new file mode 100755
index 0000000..e0c9a2b
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/gezegen/gezegen/zaman.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+while read x
+do
+ echo "$(date)::$x"
+done
diff --git a/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet-cache.py b/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet-cache.py
new file mode 100755
index 0000000..9334583
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet-cache.py
@@ -0,0 +1,194 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+"""Planet cache tool.
+
+"""
+
+__authors__ = [ "Scott James Remnant ",
+ "Jeff Waugh " ]
+__license__ = "Python"
+
+
+import os
+import sys
+import time
+import dbhash
+import ConfigParser
+
+import planet
+
+
+def usage():
+ print "Usage: planet-cache [options] CACHEFILE [ITEMID]..."
+ print
+ print "Examine and modify information in the Planet cache."
+ print
+ print "Channel Commands:"
+ print " -C, --channel Display known information on the channel"
+ print " -L, --list List items in the channel"
+ print " -K, --keys List all keys found in channel items"
+ print
+ print "Item Commands (need ITEMID):"
+ print " -I, --item Display known information about the item(s)"
+ print " -H, --hide Mark the item(s) as hidden"
+ print " -U, --unhide Mark the item(s) as not hidden"
+ print
+ print "Other Options:"
+ print " -h, --help Display this help message and exit"
+ sys.exit(0)
+
+def usage_error(msg, *args):
+ print >>sys.stderr, msg, " ".join(args)
+ print >>sys.stderr, "Perhaps you need --help ?"
+ sys.exit(1)
+
+def print_keys(item, title):
+ keys = item.keys()
+ keys.sort()
+ key_len = max([ len(k) for k in keys ])
+
+ print title + ":"
+ for key in keys:
+ if item.key_type(key) == item.DATE:
+ value = time.strftime(planet.TIMEFMT_ISO, item[key])
+ else:
+ value = str(item[key])
+ print " %-*s %s" % (key_len, key, fit_str(value, 74 - key_len))
+
+def fit_str(string, length):
+ if len(string) <= length:
+ return string
+ else:
+ return string[:length-4] + " ..."
+
+
+if __name__ == "__main__":
+ cache_file = None
+ want_ids = 0
+ ids = []
+
+ command = None
+
+ for arg in sys.argv[1:]:
+ if arg == "-h" or arg == "--help":
+ usage()
+ elif arg == "-C" or arg == "--channel":
+ if command is not None:
+ usage_error("Only one command option may be supplied")
+ command = "channel"
+ elif arg == "-L" or arg == "--list":
+ if command is not None:
+ usage_error("Only one command option may be supplied")
+ command = "list"
+ elif arg == "-K" or arg == "--keys":
+ if command is not None:
+ usage_error("Only one command option may be supplied")
+ command = "keys"
+ elif arg == "-I" or arg == "--item":
+ if command is not None:
+ usage_error("Only one command option may be supplied")
+ command = "item"
+ want_ids = 1
+ elif arg == "-H" or arg == "--hide":
+ if command is not None:
+ usage_error("Only one command option may be supplied")
+ command = "hide"
+ want_ids = 1
+ elif arg == "-U" or arg == "--unhide":
+ if command is not None:
+ usage_error("Only one command option may be supplied")
+ command = "unhide"
+ want_ids = 1
+ elif arg.startswith("-"):
+ usage_error("Unknown option:", arg)
+ else:
+ if cache_file is None:
+ cache_file = arg
+ elif want_ids:
+ ids.append(arg)
+ else:
+ usage_error("Unexpected extra argument:", arg)
+
+ if cache_file is None:
+ usage_error("Missing expected cache filename")
+ elif want_ids and not len(ids):
+ usage_error("Missing expected entry ids")
+
+ # Open the cache file directly to get the URL it represents
+ try:
+ db = dbhash.open(cache_file)
+ url = db["url"]
+ db.close()
+ except dbhash.bsddb._db.DBError, e:
+ print >>sys.stderr, cache_file + ":", e.args[1]
+ sys.exit(1)
+ except KeyError:
+ print >>sys.stderr, cache_file + ": Probably not a cache file"
+ sys.exit(1)
+
+ # Now do it the right way :-)
+ my_planet = planet.Planet(ConfigParser.ConfigParser())
+ my_planet.cache_directory = os.path.dirname(cache_file)
+ channel = planet.Channel(my_planet, url)
+
+ for item_id in ids:
+ if not channel.has_item(item_id):
+ print >>sys.stderr, item_id + ": Not in channel"
+ sys.exit(1)
+
+ # Do the user's bidding
+ if command == "channel":
+ print_keys(channel, "Channel Keys")
+
+ elif command == "item":
+ for item_id in ids:
+ item = channel.get_item(item_id)
+ print_keys(item, "Item Keys for %s" % item_id)
+
+ elif command == "list":
+ print "Items in Channel:"
+ for item in channel.items(hidden=1, sorted=1):
+ print " " + item.id
+ print " " + time.strftime(planet.TIMEFMT_ISO, item.date)
+ if hasattr(item, "title"):
+ print " " + fit_str(item.title, 70)
+ if hasattr(item, "hidden"):
+ print " (hidden)"
+
+ elif command == "keys":
+ keys = {}
+ for item in channel.items():
+ for key in item.keys():
+ keys[key] = 1
+
+ keys = keys.keys()
+ keys.sort()
+
+ print "Keys used in Channel:"
+ for key in keys:
+ print " " + key
+ print
+
+ print "Use --item to output values of particular items."
+
+ elif command == "hide":
+ for item_id in ids:
+ item = channel.get_item(item_id)
+ if hasattr(item, "hidden"):
+ print item_id + ": Already hidden."
+ else:
+ item.hidden = "yes"
+
+ channel.cache_write()
+ print "Done."
+
+ elif command == "unhide":
+ for item_id in ids:
+ item = channel.get_item(item_id)
+ if hasattr(item, "hidden"):
+ del(item.hidden)
+ else:
+ print item_id + ": Not hidden."
+
+ channel.cache_write()
+ print "Done."
diff --git a/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet.py b/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet.py
new file mode 100755
index 0000000..a245a76
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet.py
@@ -0,0 +1,280 @@
+#!/usr/bin/env python
+"""The Planet aggregator.
+
+A flexible and easy-to-use aggregator for generating websites.
+
+Visit http://www.planetplanet.org/ for more information and to download
+the latest version.
+
+Requires Python 2.1, recommends 2.3.
+"""
+
+__authors__ = [ "Scott James Remnant ",
+ "Jeff Waugh " ]
+__license__ = "Python"
+
+import datetime
+
+import os
+import sys
+import time
+import locale
+import urlparse
+
+import planet
+
+from ConfigParser import ConfigParser
+
+# Default configuration file path
+CONFIG_FILE = "config.ini"
+
+# Defaults for the [Planet] config section
+PLANET_NAME = "Unconfigured Planet"
+PLANET_LINK = "Unconfigured Planet"
+PLANET_FEED = None
+OWNER_NAME = "Anonymous Coward"
+OWNER_EMAIL = ""
+LOG_LEVEL = "WARNING"
+FEED_TIMEOUT = 20 # seconds
+
+# Default template file list
+TEMPLATE_FILES = "examples/basic/planet.html.tmpl"
+
+#part for django api usage
+import sys
+import os
+# In order to reduce integration issues, this path gets defined automatically.
+sys.path.append(os.path.abspath('../..'))
+
+os.environ['DJANGO_SETTINGS_MODULE'] = 'djagen.settings'
+from djagen.collector.models import *
+
+def config_get(config, section, option, default=None, raw=0, vars=None):
+ """Get a value from the configuration, with a default."""
+ if config.has_option(section, option):
+ return config.get(section, option, raw=raw, vars=None)
+ else:
+ return default
+
+def main():
+ config_file = CONFIG_FILE
+ offline = 0
+ verbose = 0
+
+ for arg in sys.argv[1:]:
+ if arg == "-h" or arg == "--help":
+ print "Usage: planet [options] [CONFIGFILE]"
+ print
+ print "Options:"
+ print " -v, --verbose DEBUG level logging during update"
+ print " -o, --offline Update the Planet from the cache only"
+ print " -h, --help Display this help message and exit"
+ print
+ sys.exit(0)
+ elif arg == "-v" or arg == "--verbose":
+ verbose = 1
+ elif arg == "-o" or arg == "--offline":
+ offline = 1
+ elif arg.startswith("-"):
+ print >>sys.stderr, "Unknown option:", arg
+ sys.exit(1)
+ else:
+ config_file = arg
+
+ # Read the configuration file
+ config = ConfigParser()
+ config.read(config_file)
+ if not config.has_section("Planet"):
+ print >>sys.stderr, "Configuration missing [Planet] section."
+ sys.exit(1)
+
+ # Read the [Planet] config section
+ planet_name = config_get(config, "Planet", "name", PLANET_NAME)
+ planet_link = config_get(config, "Planet", "link", PLANET_LINK)
+ planet_feed = config_get(config, "Planet", "feed", PLANET_FEED)
+ owner_name = config_get(config, "Planet", "owner_name", OWNER_NAME)
+ owner_email = config_get(config, "Planet", "owner_email", OWNER_EMAIL)
+ if verbose:
+ log_level = "DEBUG"
+ else:
+ log_level = config_get(config, "Planet", "log_level", LOG_LEVEL)
+ feed_timeout = config_get(config, "Planet", "feed_timeout", FEED_TIMEOUT)
+ template_files = config_get(config, "Planet", "template_files",
+ TEMPLATE_FILES).split(" ")
+
+ # Default feed to the first feed for which there is a template
+ if not planet_feed:
+ for template_file in template_files:
+ name = os.path.splitext(os.path.basename(template_file))[0]
+ if name.find('atom')>=0 or name.find('rss')>=0:
+ planet_feed = urlparse.urljoin(planet_link, name)
+ break
+
+ # Define locale
+ if config.has_option("Planet", "locale"):
+ # The user can specify more than one locale (separated by ":") as
+ # fallbacks.
+ locale_ok = False
+ for user_locale in config.get("Planet", "locale").split(':'):
+ user_locale = user_locale.strip()
+ try:
+ locale.setlocale(locale.LC_ALL, user_locale)
+ except locale.Error:
+ pass
+ else:
+ locale_ok = True
+ break
+ if not locale_ok:
+ print >>sys.stderr, "Unsupported locale setting."
+ sys.exit(1)
+
+ # Activate logging
+ planet.logging.basicConfig()
+ planet.logging.getLogger().setLevel(planet.logging.getLevelName(log_level))
+ log = planet.logging.getLogger("planet.runner")
+ try:
+ log.warning
+ except:
+ log.warning = log.warn
+
+ # timeoutsocket allows feedparser to time out rather than hang forever on
+ # ultra-slow servers. Python 2.3 now has this functionality available in
+ # the standard socket library, so under 2.3 you don't need to install
+ # anything. But you probably should anyway, because the socket module is
+ # buggy and timeoutsocket is better.
+ if feed_timeout:
+ try:
+ feed_timeout = float(feed_timeout)
+ except:
+ log.warning("Feed timeout set to invalid value '%s', skipping", feed_timeout)
+ feed_timeout = None
+
+ if feed_timeout and not offline:
+ try:
+ from planet import timeoutsocket
+ timeoutsocket.setDefaultSocketTimeout(feed_timeout)
+ log.debug("Socket timeout set to %d seconds", feed_timeout)
+ except ImportError:
+ import socket
+ if hasattr(socket, 'setdefaulttimeout'):
+ log.debug("timeoutsocket not found, using python function")
+ socket.setdefaulttimeout(feed_timeout)
+ log.debug("Socket timeout set to %d seconds", feed_timeout)
+ else:
+ log.error("Unable to set timeout to %d seconds", feed_timeout)
+
+ # run the planet
+ my_planet = planet.Planet(config)
+ my_planet.run(planet_name, planet_link, template_files, offline)
+
+
+
+ ## This is where archiving is done! ##
+ #add the current channels to the db
+ channels = my_planet.channels()
+ for channel in channels:
+
+ author_name = channel.name
+
+ try:
+ author_face = channel.face
+ except:
+ author_face = None
+ try:
+ channel_subtitle = channel.subtitle
+ except:
+ channel_subtitle = None
+ try:
+ channel_title = channel.title
+ except:
+ channel_title = None
+
+ channel_url = channel.url
+
+ try:
+ channel_link = channel.link
+ except:
+ channel_link = None
+
+ try:
+ channel_urlstatus = channel.url_status
+ except:
+ channel_urlstatus = None
+
+ label = channel.label
+
+ label_personal = 0
+ label_lkd = 0
+ label_community = 0
+ label_eng = 0
+ if label == "Personal":
+ label_personal = 1
+ if label == "LKD":
+ label_lkd = 1
+ if label == "Community":
+ label_community = 1
+ if label == "Eng":
+ label_eng = 1
+
+ id = channel.id
+
+ try:
+ author = Authors.objects.get(author_id=id)
+
+ #update the values with the ones at the config file
+ author.author_name = author_name
+ #print author_name
+ author.author_face = author_face
+ author.channel_subtitle = channel_subtitle
+ author.channel_title = channel_title
+ author.channel_url = channel_url
+ author.channel_link = channel_link
+ author.channel_url_status = channel_urlstatus
+ author.label_personal = label_personal
+ author.label_lkd = label_lkd
+ author.label_community = label_community
+ author.label_eng = label_eng
+
+ except Exception, ex:
+ #print ex
+ author = Authors(author_id=id, author_name=author_name, author_face=author_face, channel_subtitle=channel_subtitle, channel_title=channel_title, channel_url=channel_url, channel_link=channel_link, channel_urlstatus=channel_urlstatus, label_personal=label_personal, label_lkd=label_lkd, label_community=label_community, label_eng=label_eng)
+
+
+ author.save()
+
+ #entry issues
+ items = channel.items()
+ for item in items:
+ id_hash = item.id_hash
+
+ try:
+ entry = author.entries_set.get(id_hash = id_hash)
+ entry.title = item.title
+ entry.content_html = item.content
+ entry.content_text = entry.sanitize(item.content)
+ entry.summary = item.summary
+ entry.link = item.link
+ d = item.date
+ entry.date = datetime.datetime(d[0], d[1], d[2], d[3], d[4], d[5])
+ except:
+ content_html = item.content
+ #content_text = entry.sanitize(content_html)
+ d = item.date
+ if not item.has_key('summary'): summary = None
+ else: summary = item.summary
+ entry = author.entries_set.create(id_hash=id_hash, title=item.title, content_html=item.content, summary=summary, link=item.link, date=datetime.datetime(d[0], d[1], d[2], d[3], d[4], d[5]))
+ entry.content_text = entry.sanitize(content_html)
+
+ entry.save()
+
+ #datetime issue
+ r = RunTime()
+ r.save()
+
+ my_planet.generate_all_files(template_files, planet_name,
+ planet_link, planet_feed, owner_name, owner_email)
+
+
+if __name__ == "__main__":
+ main()
+
diff --git a/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet/__init__.py b/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet/__init__.py
new file mode 100755
index 0000000..7829731
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet/__init__.py
@@ -0,0 +1,969 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+"""Planet aggregator library.
+
+This package is a library for developing web sites or software that
+aggregate RSS, CDF and Atom feeds taken from elsewhere into a single,
+combined feed.
+"""
+
+__version__ = "2.0"
+__authors__ = [ "Scott James Remnant ",
+ "Jeff Waugh " ]
+__license__ = "Python"
+
+import locale
+
+# Modules available without separate import
+import cache
+import feedparser
+import sanitize
+import htmltmpl
+import sgmllib
+try:
+ import logging
+except:
+ import compat_logging as logging
+
+# Limit the effect of "from planet import *"
+__all__ = ("cache", "feedparser", "htmltmpl", "logging",
+ "Planet", "Channel", "NewsItem")
+
+
+import os
+import md5
+import time
+import dbhash
+import re
+
+try:
+ from xml.sax.saxutils import escape
+except:
+ def escape(data):
+ return data.replace("&","&").replace(">",">").replace("<","<")
+
+# Version information (for generator headers)
+VERSION = ("Planet/%s +http://www.planetplanet.org" % __version__)
+
+# Default User-Agent header to send when retreiving feeds
+USER_AGENT = VERSION + " " + feedparser.USER_AGENT
+
+# Default cache directory
+CACHE_DIRECTORY = "cache"
+
+# Default number of items to display from a new feed
+NEW_FEED_ITEMS = 10
+
+# Useful common date/time formats
+TIMEFMT_ISO = "%Y-%m-%dT%H:%M:%S+00:00"
+TIMEFMT_822 = "%a, %d %b %Y %H:%M:%S +0000"
+
+
+# Log instance to use here
+log = logging.getLogger("planet")
+try:
+ log.warning
+except:
+ log.warning = log.warn
+
+# Defaults for the template file config sections
+ENCODING = "utf-8"
+ITEMS_PER_PAGE = 60
+DAYS_PER_PAGE = 0
+OUTPUT_DIR = "output"
+DATE_FORMAT = "%B %d, %Y %I:%M %p"
+NEW_DATE_FORMAT = "%B %d, %Y"
+ACTIVITY_THRESHOLD = 0
+
+class stripHtml(sgmllib.SGMLParser):
+ "remove all tags from the data"
+ def __init__(self, data):
+ sgmllib.SGMLParser.__init__(self)
+ self.result=''
+ self.feed(data)
+ self.close()
+ def handle_data(self, data):
+ if data: self.result+=data
+
+def template_info(item, date_format):
+ """Produce a dictionary of template information."""
+ info = {}
+
+ #set the locale so that the dates at the feeds will be in english
+ lc=locale.getlocale()
+ if lc[0] == None:
+ try:
+ locale.setlocale(locale.LC_ALL, '')
+ except:
+ pass
+ elif lc[0].find("tr") != -1:
+ try:
+ locale.setlocale(locale.LC_ALL, '')
+ except:
+ pass
+
+ for key in item.keys():
+ if item.key_type(key) == item.DATE:
+ date = item.get_as_date(key)
+ info[key] = time.strftime(date_format, date)
+ info[key + "_iso"] = time.strftime(TIMEFMT_ISO, date)
+ info[key + "_822"] = time.strftime(TIMEFMT_822, date)
+ else:
+ info[key] = item[key]
+ if 'title' in item.keys():
+ info['title_plain'] = stripHtml(info['title']).result
+
+ return info
+
+
+class Planet:
+ """A set of channels.
+
+ This class represents a set of channels for which the items will
+ be aggregated together into one combined feed.
+
+ Properties:
+ user_agent User-Agent header to fetch feeds with.
+ cache_directory Directory to store cached channels in.
+ new_feed_items Number of items to display from a new feed.
+ filter A regular expression that articles must match.
+ exclude A regular expression that articles must not match.
+ """
+ def __init__(self, config):
+ self.config = config
+
+ self._channels = []
+
+ self.user_agent = USER_AGENT
+ self.cache_directory = CACHE_DIRECTORY
+ self.new_feed_items = NEW_FEED_ITEMS
+ self.filter = None
+ self.exclude = None
+
+ def tmpl_config_get(self, template, option, default=None, raw=0, vars=None):
+ """Get a template value from the configuration, with a default."""
+ if self.config.has_option(template, option):
+ return self.config.get(template, option, raw=raw, vars=None)
+ elif self.config.has_option("Planet", option):
+ return self.config.get("Planet", option, raw=raw, vars=None)
+ else:
+ return default
+
+ def gather_channel_info(self, template_file="Planet"):
+ date_format = self.tmpl_config_get(template_file,
+ "date_format", DATE_FORMAT, raw=1)
+
+ activity_threshold = int(self.tmpl_config_get(template_file,
+ "activity_threshold",
+ ACTIVITY_THRESHOLD))
+
+ if activity_threshold:
+ activity_horizon = \
+ time.gmtime(time.time()-86400*activity_threshold)
+ else:
+ activity_horizon = 0
+
+ channels = {}
+ channels_list = []
+ for channel in self.channels(hidden=1):
+ channels[channel] = template_info(channel, date_format)
+ channels_list.append(channels[channel])
+
+ # identify inactive feeds
+ if activity_horizon:
+ latest = channel.items(sorted=1)
+ if len(latest)==0 or latest[0].date < activity_horizon:
+ channels[channel]["message"] = \
+ "no activity in %d days" % activity_threshold
+
+ # report channel level errors
+ if not channel.url_status: continue
+ status = int(channel.url_status)
+ if status == 403:
+ channels[channel]["message"] = "403: forbidden"
+ elif status == 404:
+ channels[channel]["message"] = "404: not found"
+ elif status == 408:
+ channels[channel]["message"] = "408: request timeout"
+ elif status == 410:
+ channels[channel]["message"] = "410: gone"
+ elif status == 500:
+ channels[channel]["message"] = "internal server error"
+ elif status >= 400:
+ channels[channel]["message"] = "http status %s" % status
+
+ return channels, channels_list
+
+ def gather_items_info(self, channels, template_file="Planet", channel_list=None):
+ items_list = []
+ prev_date = []
+ prev_channel = None
+
+ date_format = self.tmpl_config_get(template_file,
+ "date_format", DATE_FORMAT, raw=1)
+ items_per_page = int(self.tmpl_config_get(template_file,
+ "items_per_page", ITEMS_PER_PAGE))
+ days_per_page = int(self.tmpl_config_get(template_file,
+ "days_per_page", DAYS_PER_PAGE))
+ new_date_format = self.tmpl_config_get(template_file,
+ "new_date_format", NEW_DATE_FORMAT, raw=1)
+
+ for newsitem in self.items(max_items=items_per_page,
+ max_days=days_per_page,
+ channels=channel_list):
+ item_info = template_info(newsitem, date_format)
+ chan_info = channels[newsitem._channel]
+ for k, v in chan_info.items():
+ item_info["channel_" + k] = v
+
+ # Check for the start of a new day
+ if prev_date[:3] != newsitem.date[:3]:
+ prev_date = newsitem.date
+ item_info["new_date"] = time.strftime(new_date_format,
+ newsitem.date)
+
+ # Check for the start of a new channel
+ if item_info.has_key("new_date") \
+ or prev_channel != newsitem._channel:
+ prev_channel = newsitem._channel
+ item_info["new_channel"] = newsitem._channel.url
+
+ items_list.append(item_info)
+
+ return items_list
+
+ def run(self, planet_name, planet_link, template_files, offline = False):
+ log = logging.getLogger("planet.runner")
+
+ # Create a planet
+ log.info("Loading cached data")
+ if self.config.has_option("Planet", "cache_directory"):
+ self.cache_directory = self.config.get("Planet", "cache_directory")
+ if self.config.has_option("Planet", "new_feed_items"):
+ self.new_feed_items = int(self.config.get("Planet", "new_feed_items"))
+ self.user_agent = "%s +%s %s" % (planet_name, planet_link,
+ self.user_agent)
+ if self.config.has_option("Planet", "filter"):
+ self.filter = self.config.get("Planet", "filter")
+
+ # The other configuration blocks are channels to subscribe to
+ for feed_url in self.config.sections():
+ if feed_url == "Planet" or feed_url in template_files:
+ continue
+ log.info(feed_url)
+ # Create a channel, configure it and subscribe it
+ channel = Channel(self, feed_url)
+ self.subscribe(channel)
+
+ # Update it
+ try:
+ if not offline and not channel.url_status == '410':
+ channel.update()
+ except KeyboardInterrupt:
+ raise
+ except:
+ log.exception("Update of <%s> failed", feed_url)
+
+ def generate_all_files(self, template_files, planet_name,
+ planet_link, planet_feed, owner_name, owner_email):
+
+ log = logging.getLogger("planet.runner")
+ # Go-go-gadget-template
+ for template_file in template_files:
+ manager = htmltmpl.TemplateManager()
+ log.info("Processing template %s", template_file)
+ try:
+ template = manager.prepare(template_file)
+ except htmltmpl.TemplateError:
+ template = manager.prepare(os.path.basename(template_file))
+ # Read the configuration
+ output_dir = self.tmpl_config_get(template_file,
+ "output_dir", OUTPUT_DIR)
+ date_format = self.tmpl_config_get(template_file,
+ "date_format", DATE_FORMAT, raw=1)
+ encoding = self.tmpl_config_get(template_file, "encoding", ENCODING)
+
+ # We treat each template individually
+ base = os.path.splitext(os.path.basename(template_file))[0]
+ url = os.path.join(planet_link, base)
+ output_file = os.path.join(output_dir, base)
+
+ # Gather information
+ channels, channels_list = self.gather_channel_info(template_file)
+ items_list = self.gather_items_info(channels, template_file)
+
+ # Gather item information
+
+ # Process the template
+ tp = htmltmpl.TemplateProcessor(html_escape=0)
+ tp.set("Items", items_list)
+ tp.set("Channels", channels_list)
+
+ # Generic information
+ tp.set("generator", VERSION)
+ tp.set("name", planet_name)
+ tp.set("link", planet_link)
+ tp.set("owner_name", owner_name)
+ tp.set("owner_email", owner_email)
+ tp.set("url", url)
+
+ if planet_feed:
+ tp.set("feed", planet_feed)
+ tp.set("feedtype", planet_feed.find('rss')>=0 and 'rss' or 'atom')
+
+ # Update time
+ date = time.localtime()
+ tp.set("date", time.strftime(date_format, date))
+ tp.set("date_iso", time.strftime(TIMEFMT_ISO, date))
+ tp.set("date_822", time.strftime(TIMEFMT_822, date))
+
+ try:
+ log.info("Writing %s", output_file)
+ output_fd = open(output_file, "w")
+ if encoding.lower() in ("utf-8", "utf8"):
+ # UTF-8 output is the default because we use that internally
+ output_fd.write(tp.process(template))
+ elif encoding.lower() in ("xml", "html", "sgml"):
+ # Magic for Python 2.3 users
+ output = tp.process(template).decode("utf-8")
+ output_fd.write(output.encode("ascii", "xmlcharrefreplace"))
+ else:
+ # Must be a "known" encoding
+ output = tp.process(template).decode("utf-8")
+ output_fd.write(output.encode(encoding, "replace"))
+ output_fd.close()
+ except KeyboardInterrupt:
+ raise
+ except:
+ log.exception("Write of %s failed", output_file)
+
+ def channels(self, hidden=0, sorted=1):
+ """Return the list of channels."""
+ channels = []
+ for channel in self._channels:
+ if hidden or not channel.has_key("hidden"):
+ channels.append((channel.name, channel))
+
+ if sorted:
+ channels.sort()
+
+ return [ c[-1] for c in channels ]
+
+ def find_by_basename(self, basename):
+ for channel in self._channels:
+ if basename == channel.cache_basename(): return channel
+
+ def subscribe(self, channel):
+ """Subscribe the planet to the channel."""
+ self._channels.append(channel)
+
+ def unsubscribe(self, channel):
+ """Unsubscribe the planet from the channel."""
+ self._channels.remove(channel)
+
+ def items(self, hidden=0, sorted=1, max_items=0, max_days=0, channels=None):
+ """Return an optionally filtered list of items in the channel.
+
+ The filters are applied in the following order:
+
+ If hidden is true then items in hidden channels and hidden items
+ will be returned.
+
+ If sorted is true then the item list will be sorted with the newest
+ first.
+
+ If max_items is non-zero then this number of items, at most, will
+ be returned.
+
+ If max_days is non-zero then any items older than the newest by
+ this number of days won't be returned. Requires sorted=1 to work.
+
+
+ The sharp-eyed will note that this looks a little strange code-wise,
+ it turns out that Python gets *really* slow if we try to sort the
+ actual items themselves. Also we use mktime here, but it's ok
+ because we discard the numbers and just need them to be relatively
+ consistent between each other.
+ """
+ planet_filter_re = None
+ if self.filter:
+ planet_filter_re = re.compile(self.filter, re.I)
+ planet_exclude_re = None
+ if self.exclude:
+ planet_exclude_re = re.compile(self.exclude, re.I)
+
+ items = []
+ seen_guids = {}
+ if not channels: channels=self.channels(hidden=hidden, sorted=0)
+ for channel in channels:
+ for item in channel._items.values():
+ if hidden or not item.has_key("hidden"):
+
+ channel_filter_re = None
+ if channel.filter:
+ channel_filter_re = re.compile(channel.filter,
+ re.I)
+ channel_exclude_re = None
+ if channel.exclude:
+ channel_exclude_re = re.compile(channel.exclude,
+ re.I)
+ if (planet_filter_re or planet_exclude_re \
+ or channel_filter_re or channel_exclude_re):
+ title = ""
+ if item.has_key("title"):
+ title = item.title
+ content = item.get_content("content")
+
+ if planet_filter_re:
+ if not (planet_filter_re.search(title) \
+ or planet_filter_re.search(content)):
+ continue
+
+ if planet_exclude_re:
+ if (planet_exclude_re.search(title) \
+ or planet_exclude_re.search(content)):
+ continue
+
+ if channel_filter_re:
+ if not (channel_filter_re.search(title) \
+ or channel_filter_re.search(content)):
+ continue
+
+ if channel_exclude_re:
+ if (channel_exclude_re.search(title) \
+ or channel_exclude_re.search(content)):
+ continue
+
+ if not seen_guids.has_key(item.id):
+ seen_guids[item.id] = 1;
+ items.append((time.mktime(item.date), item.order, item))
+
+ # Sort the list
+ if sorted:
+ items.sort()
+ items.reverse()
+
+ # Apply max_items filter
+ if len(items) and max_items:
+ items = items[:max_items]
+
+ # Apply max_days filter
+ if len(items) and max_days:
+ max_count = 0
+ max_time = items[0][0] - max_days * 84600
+ for item in items:
+ if item[0] > max_time:
+ max_count += 1
+ else:
+ items = items[:max_count]
+ break
+
+ return [ i[-1] for i in items ]
+
+class Channel(cache.CachedInfo):
+ """A list of news items.
+
+ This class represents a list of news items taken from the feed of
+ a website or other source.
+
+ Properties:
+ url URL of the feed.
+ url_etag E-Tag of the feed URL.
+ url_modified Last modified time of the feed URL.
+ url_status Last HTTP status of the feed URL.
+ hidden Channel should be hidden (True if exists).
+ name Name of the feed owner, or feed title.
+ next_order Next order number to be assigned to NewsItem
+
+ updated Correct UTC-Normalised update time of the feed.
+ last_updated Correct UTC-Normalised time the feed was last updated.
+
+ id An identifier the feed claims is unique (*).
+ title One-line title (*).
+ link Link to the original format feed (*).
+ tagline Short description of the feed (*).
+ info Longer description of the feed (*).
+
+ modified Date the feed claims to have been modified (*).
+
+ author Name of the author (*).
+ publisher Name of the publisher (*).
+ generator Name of the feed generator (*).
+ category Category name (*).
+ copyright Copyright information for humans to read (*).
+ license Link to the licence for the content (*).
+ docs Link to the specification of the feed format (*).
+ language Primary language (*).
+ errorreportsto E-Mail address to send error reports to (*).
+
+ image_url URL of an associated image (*).
+ image_link Link to go with the associated image (*).
+ image_title Alternative text of the associated image (*).
+ image_width Width of the associated image (*).
+ image_height Height of the associated image (*).
+
+ filter A regular expression that articles must match.
+ exclude A regular expression that articles must not match.
+
+ Properties marked (*) will only be present if the original feed
+ contained them. Note that the optional 'modified' date field is simply
+ a claim made by the item and parsed from the information given, 'updated'
+ (and 'last_updated') are far more reliable sources of information.
+
+ Some feeds may define additional properties to those above.
+ """
+ IGNORE_KEYS = ("links", "contributors", "textinput", "cloud", "categories",
+ "url", "href", "url_etag", "url_modified", "tags", "itunes_explicit")
+
+ def __init__(self, planet, url):
+ if not os.path.isdir(planet.cache_directory):
+ os.makedirs(planet.cache_directory)
+ cache_filename = cache.filename(planet.cache_directory, url)
+ cache_file = dbhash.open(cache_filename, "c", 0666)
+
+ cache.CachedInfo.__init__(self, cache_file, url, root=1)
+
+ self._items = {}
+ self._planet = planet
+ self._expired = []
+ self.url = url
+ # retain the original URL for error reporting
+ self.configured_url = url
+ self.url_etag = None
+ self.url_status = None
+ self.url_modified = None
+ self.name = None
+ self.updated = None
+ self.last_updated = None
+ self.filter = None
+ self.exclude = None
+ self.next_order = "0"
+ self.cache_read()
+ self.cache_read_entries()
+
+ if planet.config.has_section(url):
+ for option in planet.config.options(url):
+ value = planet.config.get(url, option)
+ self.set_as_string(option, value, cached=0)
+
+ def has_item(self, id_):
+ """Check whether the item exists in the channel."""
+ return self._items.has_key(id_)
+
+ def get_item(self, id_):
+ """Return the item from the channel."""
+ return self._items[id_]
+
+ # Special methods
+ __contains__ = has_item
+
+ def items(self, hidden=0, sorted=0):
+ """Return the item list."""
+ items = []
+ for item in self._items.values():
+ if hidden or not item.has_key("hidden"):
+ items.append((time.mktime(item.date), item.order, item))
+
+ if sorted:
+ items.sort()
+ items.reverse()
+
+ return [ i[-1] for i in items ]
+
+ def __iter__(self):
+ """Iterate the sorted item list."""
+ return iter(self.items(sorted=1))
+
+ def cache_read_entries(self):
+ """Read entry information from the cache."""
+ keys = self._cache.keys()
+ for key in keys:
+ if key.find(" ") != -1: continue
+ if self.has_key(key): continue
+
+ item = NewsItem(self, key)
+ self._items[key] = item
+
+ def cache_basename(self):
+ return cache.filename('',self._id)
+
+ def cache_write(self, sync=1):
+
+ """Write channel and item information to the cache."""
+ for item in self._items.values():
+ item.cache_write(sync=0)
+ for item in self._expired:
+ item.cache_clear(sync=0)
+ cache.CachedInfo.cache_write(self, sync)
+
+ self._expired = []
+
+ def feed_information(self):
+ """
+ Returns a description string for the feed embedded in this channel.
+
+ This will usually simply be the feed url embedded in <>, but in the
+ case where the current self.url has changed from the original
+ self.configured_url the string will contain both pieces of information.
+ This is so that the URL in question is easier to find in logging
+ output: getting an error about a URL that doesn't appear in your config
+ file is annoying.
+ """
+ if self.url == self.configured_url:
+ return "<%s>" % self.url
+ else:
+ return "<%s> (formerly <%s>)" % (self.url, self.configured_url)
+
+ def update(self):
+ """Download the feed to refresh the information.
+
+ This does the actual work of pulling down the feed and if it changes
+ updates the cached information about the feed and entries within it.
+ """
+ info = feedparser.parse(self.url,
+ etag=self.url_etag, modified=self.url_modified,
+ agent=self._planet.user_agent)
+ if info.has_key("status"):
+ self.url_status = str(info.status)
+ elif info.has_key("entries") and len(info.entries)>0:
+ self.url_status = str(200)
+ elif info.bozo and info.bozo_exception.__class__.__name__=='Timeout':
+ self.url_status = str(408)
+ else:
+ self.url_status = str(500)
+
+ if self.url_status == '301' and \
+ (info.has_key("entries") and len(info.entries)>0):
+ log.warning("Feed has moved from <%s> to <%s>", self.url, info.url)
+ try:
+ os.link(cache.filename(self._planet.cache_directory, self.url),
+ cache.filename(self._planet.cache_directory, info.url))
+ except:
+ pass
+ self.url = info.url
+ elif self.url_status == '304':
+ log.info("Feed %s unchanged", self.feed_information())
+ return
+ elif self.url_status == '410':
+ log.info("Feed %s gone", self.feed_information())
+ self.cache_write()
+ return
+ elif self.url_status == '408':
+ log.warning("Feed %s timed out", self.feed_information())
+ return
+ elif int(self.url_status) >= 400:
+ log.error("Error %s while updating feed %s",
+ self.url_status, self.feed_information())
+ return
+ else:
+ log.info("Updating feed %s", self.feed_information())
+
+ self.url_etag = info.has_key("etag") and info.etag or None
+ self.url_modified = info.has_key("modified") and info.modified or None
+ if self.url_etag is not None:
+ log.debug("E-Tag: %s", self.url_etag)
+ if self.url_modified is not None:
+ log.debug("Last Modified: %s",
+ time.strftime(TIMEFMT_ISO, self.url_modified))
+
+ self.update_info(info.feed)
+ self.update_entries(info.entries)
+ self.cache_write()
+
+ def update_info(self, feed):
+ """Update information from the feed.
+
+ This reads the feed information supplied by feedparser and updates
+ the cached information about the feed. These are the various
+ potentially interesting properties that you might care about.
+ """
+ for key in feed.keys():
+ if key in self.IGNORE_KEYS or key + "_parsed" in self.IGNORE_KEYS:
+ # Ignored fields
+ pass
+ elif feed.has_key(key + "_parsed"):
+ # Ignore unparsed date fields
+ pass
+ elif key.endswith("_detail"):
+ # retain name and email sub-fields
+ if feed[key].has_key('name') and feed[key].name:
+ self.set_as_string(key.replace("_detail","_name"), \
+ feed[key].name)
+ if feed[key].has_key('email') and feed[key].email:
+ self.set_as_string(key.replace("_detail","_email"), \
+ feed[key].email)
+ elif key == "items":
+ # Ignore items field
+ pass
+ elif key.endswith("_parsed"):
+ # Date fields
+ if feed[key] is not None:
+ self.set_as_date(key[:-len("_parsed")], feed[key])
+ elif key == "image":
+ # Image field: save all the information
+ if feed[key].has_key("url"):
+ self.set_as_string(key + "_url", feed[key].url)
+ if feed[key].has_key("link"):
+ self.set_as_string(key + "_link", feed[key].link)
+ if feed[key].has_key("title"):
+ self.set_as_string(key + "_title", feed[key].title)
+ if feed[key].has_key("width"):
+ self.set_as_string(key + "_width", str(feed[key].width))
+ if feed[key].has_key("height"):
+ self.set_as_string(key + "_height", str(feed[key].height))
+ elif isinstance(feed[key], (str, unicode)):
+ # String fields
+ try:
+ detail = key + '_detail'
+ if feed.has_key(detail) and feed[detail].has_key('type'):
+ if feed[detail].type == 'text/html':
+ feed[key] = sanitize.HTML(feed[key])
+ elif feed[detail].type == 'text/plain':
+ feed[key] = escape(feed[key])
+ self.set_as_string(key, feed[key])
+ except KeyboardInterrupt:
+ raise
+ except:
+ log.exception("Ignored '%s' of <%s>, unknown format",
+ key, self.url)
+
+ def update_entries(self, entries):
+ """Update entries from the feed.
+
+ This reads the entries supplied by feedparser and updates the
+ cached information about them. It's at this point we update
+ the 'updated' timestamp and keep the old one in 'last_updated',
+ these provide boundaries for acceptable entry times.
+
+ If this is the first time a feed has been updated then most of the
+ items will be marked as hidden, according to Planet.new_feed_items.
+
+ If the feed does not contain items which, according to the sort order,
+ should be there; those items are assumed to have been expired from
+ the feed or replaced and are removed from the cache.
+ """
+ if not len(entries):
+ return
+
+ self.last_updated = self.updated
+ self.updated = time.gmtime()
+
+ new_items = []
+ feed_items = []
+ for entry in entries:
+ # Try really hard to find some kind of unique identifier
+ if entry.has_key("id"):
+ entry_id = cache.utf8(entry.id)
+ elif entry.has_key("link"):
+ entry_id = cache.utf8(entry.link)
+ elif entry.has_key("title"):
+ entry_id = (self.url + "/"
+ + md5.new(cache.utf8(entry.title)).hexdigest())
+ elif entry.has_key("summary"):
+ entry_id = (self.url + "/"
+ + md5.new(cache.utf8(entry.summary)).hexdigest())
+ else:
+ log.error("Unable to find or generate id, entry ignored")
+ continue
+
+ # Create the item if necessary and update
+ if self.has_item(entry_id):
+ item = self._items[entry_id]
+ else:
+ item = NewsItem(self, entry_id)
+ self._items[entry_id] = item
+ new_items.append(item)
+ item.update(entry)
+ feed_items.append(entry_id)
+
+ # Hide excess items the first time through
+ if self.last_updated is None and self._planet.new_feed_items \
+ and len(feed_items) > self._planet.new_feed_items:
+ item.hidden = "yes"
+ log.debug("Marked <%s> as hidden (new feed)", entry_id)
+
+ # Assign order numbers in reverse
+ new_items.reverse()
+ for item in new_items:
+ item.order = self.next_order = str(int(self.next_order) + 1)
+
+ # Check for expired or replaced items
+ feed_count = len(feed_items)
+ log.debug("Items in Feed: %d", feed_count)
+ for item in self.items(sorted=1):
+ if feed_count < 1:
+ break
+ elif item.id in feed_items:
+ feed_count -= 1
+ elif item._channel.url_status != '226':
+ del(self._items[item.id])
+ self._expired.append(item)
+ log.debug("Removed expired or replaced item <%s>", item.id)
+
+ def get_name(self, key):
+ """Return the key containing the name."""
+ for key in ("name", "title"):
+ if self.has_key(key) and self.key_type(key) != self.NULL:
+ return self.get_as_string(key)
+
+ return ""
+
+class NewsItem(cache.CachedInfo):
+ """An item of news.
+
+ This class represents a single item of news on a channel. They're
+ created by members of the Channel class and accessible through it.
+
+ Properties:
+ id Channel-unique identifier for this item.
+ id_hash Relatively short, printable cryptographic hash of id
+ date Corrected UTC-Normalised update time, for sorting.
+ order Order in which items on the same date can be sorted.
+ hidden Item should be hidden (True if exists).
+
+ title One-line title (*).
+ link Link to the original format text (*).
+ summary Short first-page summary (*).
+ content Full HTML content.
+
+ modified Date the item claims to have been modified (*).
+ issued Date the item claims to have been issued (*).
+ created Date the item claims to have been created (*).
+ expired Date the item claims to expire (*).
+
+ author Name of the author (*).
+ publisher Name of the publisher (*).
+ category Category name (*).
+ comments Link to a page to enter comments (*).
+ license Link to the licence for the content (*).
+ source_name Name of the original source of this item (*).
+ source_link Link to the original source of this item (*).
+
+ Properties marked (*) will only be present if the original feed
+ contained them. Note that the various optional date fields are
+ simply claims made by the item and parsed from the information
+ given, 'date' is a far more reliable source of information.
+
+ Some feeds may define additional properties to those above.
+ """
+ IGNORE_KEYS = ("categories", "contributors", "enclosures", "links",
+ "guidislink", "date", "tags")
+
+ def __init__(self, channel, id_):
+ cache.CachedInfo.__init__(self, channel._cache, id_)
+
+ self._channel = channel
+ self.id = id_
+ self.id_hash = md5.new(id_).hexdigest()
+ self.date = None
+ self.order = None
+ self.content = None
+ self.cache_read()
+
+ def update(self, entry):
+ """Update the item from the feedparser entry given."""
+ for key in entry.keys():
+ if key in self.IGNORE_KEYS or key + "_parsed" in self.IGNORE_KEYS:
+ # Ignored fields
+ pass
+ elif entry.has_key(key + "_parsed"):
+ # Ignore unparsed date fields
+ pass
+ elif key.endswith("_detail"):
+ # retain name, email, and language sub-fields
+ if entry[key].has_key('name') and entry[key].name:
+ self.set_as_string(key.replace("_detail","_name"), \
+ entry[key].name)
+ if entry[key].has_key('email') and entry[key].email:
+ self.set_as_string(key.replace("_detail","_email"), \
+ entry[key].email)
+ if entry[key].has_key('language') and entry[key].language and \
+ (not self._channel.has_key('language') or \
+ entry[key].language != self._channel.language):
+ self.set_as_string(key.replace("_detail","_language"), \
+ entry[key].language)
+ elif key.endswith("_parsed"):
+ # Date fields
+ if entry[key] is not None:
+ self.set_as_date(key[:-len("_parsed")], entry[key])
+ elif key == "source":
+ # Source field: save both url and value
+ if entry[key].has_key("value"):
+ self.set_as_string(key + "_name", entry[key].value)
+ if entry[key].has_key("url"):
+ self.set_as_string(key + "_link", entry[key].url)
+ elif key == "content":
+ # Content field: concatenate the values
+ value = ""
+ for item in entry[key]:
+ if item.type == 'text/html':
+ item.value = sanitize.HTML(item.value)
+ elif item.type == 'text/plain':
+ item.value = escape(item.value)
+ if item.has_key('language') and item.language and \
+ (not self._channel.has_key('language') or
+ item.language != self._channel.language) :
+ self.set_as_string(key + "_language", item.language)
+ value += cache.utf8(item.value)
+ self.set_as_string(key, value)
+ elif isinstance(entry[key], (str, unicode)):
+ # String fields
+ try:
+ detail = key + '_detail'
+ if entry.has_key(detail):
+ if entry[detail].has_key('type'):
+ if entry[detail].type == 'text/html':
+ entry[key] = sanitize.HTML(entry[key])
+ elif entry[detail].type == 'text/plain':
+ entry[key] = escape(entry[key])
+ self.set_as_string(key, entry[key])
+ except KeyboardInterrupt:
+ raise
+ except:
+ log.exception("Ignored '%s' of <%s>, unknown format",
+ key, self.id)
+
+ # Generate the date field if we need to
+ self.get_date("date")
+
+ def get_date(self, key):
+ """Get (or update) the date key.
+
+ We check whether the date the entry claims to have been changed is
+ since we last updated this feed and when we pulled the feed off the
+ site.
+
+ If it is then it's probably not bogus, and we'll sort accordingly.
+
+ If it isn't then we bound it appropriately, this ensures that
+ entries appear in posting sequence but don't overlap entries
+ added in previous updates and don't creep into the next one.
+ """
+
+ for other_key in ("updated", "modified", "published", "issued", "created"):
+ if self.has_key(other_key):
+ date = self.get_as_date(other_key)
+ break
+ else:
+ date = None
+
+ if date is not None:
+ if date > self._channel.updated:
+ date = self._channel.updated
+# elif date < self._channel.last_updated:
+# date = self._channel.updated
+ elif self.has_key(key) and self.key_type(key) != self.NULL:
+ return self.get_as_date(key)
+ else:
+ date = self._channel.updated
+
+ self.set_as_date(key, date)
+ return date
+
+ def get_content(self, key):
+ """Return the key containing the content."""
+ for key in ("content", "tagline", "summary"):
+ if self.has_key(key) and self.key_type(key) != self.NULL:
+ return self.get_as_string(key)
+
+ return ""
diff --git a/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet/atomstyler.py b/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet/atomstyler.py
new file mode 100755
index 0000000..9220702
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet/atomstyler.py
@@ -0,0 +1,124 @@
+from xml.dom import minidom, Node
+from urlparse import urlparse, urlunparse
+from xml.parsers.expat import ExpatError
+from htmlentitydefs import name2codepoint
+import re
+
+# select and apply an xml:base for this entry
+class relativize:
+ def __init__(self, parent):
+ self.score = {}
+ self.links = []
+ self.collect_and_tally(parent)
+ self.base = self.select_optimal_base()
+ if self.base:
+ if not parent.hasAttribute('xml:base'):
+ self.rebase(parent)
+ parent.setAttribute('xml:base', self.base)
+
+ # collect and tally cite, href and src attributes
+ def collect_and_tally(self,parent):
+ uri = None
+ if parent.hasAttribute('cite'): uri=parent.getAttribute('cite')
+ if parent.hasAttribute('href'): uri=parent.getAttribute('href')
+ if parent.hasAttribute('src'): uri=parent.getAttribute('src')
+
+ if uri:
+ parts=urlparse(uri)
+ if parts[0].lower() == 'http':
+ parts = (parts[1]+parts[2]).split('/')
+ base = None
+ for i in range(1,len(parts)):
+ base = tuple(parts[0:i])
+ self.score[base] = self.score.get(base,0) + len(base)
+ if base and base not in self.links: self.links.append(base)
+
+ for node in parent.childNodes:
+ if node.nodeType == Node.ELEMENT_NODE:
+ self.collect_and_tally(node)
+
+ # select the xml:base with the highest score
+ def select_optimal_base(self):
+ if not self.score: return None
+ for link in self.links:
+ self.score[link] = 0
+ winner = max(self.score.values())
+ if not winner: return None
+ for key in self.score.keys():
+ if self.score[key] == winner:
+ if winner == len(key): return None
+ return urlunparse(('http', key[0], '/'.join(key[1:]), '', '', '')) + '/'
+
+ # rewrite cite, href and src attributes using this base
+ def rebase(self,parent):
+ uri = None
+ if parent.hasAttribute('cite'): uri=parent.getAttribute('cite')
+ if parent.hasAttribute('href'): uri=parent.getAttribute('href')
+ if parent.hasAttribute('src'): uri=parent.getAttribute('src')
+ if uri and uri.startswith(self.base):
+ uri = uri[len(self.base):] or '.'
+ if parent.hasAttribute('href'): uri=parent.setAttribute('href', uri)
+ if parent.hasAttribute('src'): uri=parent.setAttribute('src', uri)
+
+ for node in parent.childNodes:
+ if node.nodeType == Node.ELEMENT_NODE:
+ self.rebase(node)
+
+# convert type="html" to type="plain" or type="xhtml" as appropriate
+def retype(parent):
+ for node in parent.childNodes:
+ if node.nodeType == Node.ELEMENT_NODE:
+
+ if node.hasAttribute('type') and node.getAttribute('type') == 'html':
+ if len(node.childNodes)==0:
+ node.removeAttribute('type')
+ elif len(node.childNodes)==1:
+
+ # replace html entity defs with utf-8
+ chunks=re.split('&(\w+);', node.childNodes[0].nodeValue)
+ for i in range(1,len(chunks),2):
+ if chunks[i] in ['amp', 'lt', 'gt', 'apos', 'quot']:
+ chunks[i] ='&' + chunks[i] +';'
+ elif chunks[i] in name2codepoint:
+ chunks[i]=unichr(name2codepoint[chunks[i]])
+ else:
+ chunks[i]='&' + chunks[i] + ';'
+ text = u"".join(chunks)
+
+ try:
+ # see if the resulting text is a well-formed XML fragment
+ div = '
and
+ # only if all the remaining content is nested underneath it.
+ # This means that the divs would be retained in the following:
+ #
foo
bar
+ if pieces and (pieces[0] == '
' or pieces[0].startswith('
':
+ depth = 0
+ for piece in pieces[:-1]:
+ if piece.startswith(''):
+ depth -= 1
+ if depth == 0: break
+ elif piece.startswith('<') and not piece.endswith('/>'):
+ depth += 1
+ else:
+ pieces = pieces[1:-1]
+
+ output = ''.join(pieces)
+ if stripWhitespace:
+ output = output.strip()
+ if not expectingText: return output
+
+ # decode base64 content
+ if base64 and self.contentparams.get('base64', 0):
+ try:
+ output = base64.decodestring(output)
+ except binascii.Error:
+ pass
+ except binascii.Incomplete:
+ pass
+
+ # resolve relative URIs
+ if (element in self.can_be_relative_uri) and output:
+ output = self.resolveURI(output)
+
+ # decode entities within embedded markup
+ if not self.contentparams.get('base64', 0):
+ output = self.decodeEntities(element, output)
+
+ # remove temporary cruft from contentparams
+ try:
+ del self.contentparams['mode']
+ except KeyError:
+ pass
+ try:
+ del self.contentparams['base64']
+ except KeyError:
+ pass
+
+ # resolve relative URIs within embedded markup
+ if self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types:
+ if element in self.can_contain_relative_uris:
+ output = _resolveRelativeURIs(output, self.baseuri, self.encoding)
+
+ # sanitize embedded markup
+ if self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types:
+ if element in self.can_contain_dangerous_markup:
+ output = _sanitizeHTML(output, self.encoding)
+
+ if self.encoding and type(output) != type(u''):
+ try:
+ output = unicode(output, self.encoding)
+ except:
+ pass
+
+ # address common error where people take data that is already
+ # utf-8, presume that it is iso-8859-1, and re-encode it.
+ if self.encoding=='utf-8' and type(output) == type(u''):
+ try:
+ output = unicode(output.encode('iso-8859-1'), 'utf-8')
+ except:
+ pass
+
+ # map win-1252 extensions to the proper code points
+ if type(output) == type(u''):
+ output = u''.join([c in cp1252 and cp1252[c] or c for c in output])
+
+ # categories/tags/keywords/whatever are handled in _end_category
+ if element == 'category':
+ return output
+
+ # store output in appropriate place(s)
+ if self.inentry and not self.insource:
+ if element == 'content':
+ self.entries[-1].setdefault(element, [])
+ contentparams = copy.deepcopy(self.contentparams)
+ contentparams['value'] = output
+ self.entries[-1][element].append(contentparams)
+ elif element == 'link':
+ self.entries[-1][element] = output
+ if output:
+ self.entries[-1]['links'][-1]['href'] = output
+ else:
+ if element == 'description':
+ element = 'summary'
+ self.entries[-1][element] = output
+ if self.incontent:
+ contentparams = copy.deepcopy(self.contentparams)
+ contentparams['value'] = output
+ self.entries[-1][element + '_detail'] = contentparams
+ elif (self.infeed or self.insource) and (not self.intextinput) and (not self.inimage):
+ context = self._getContext()
+ if element == 'description':
+ element = 'subtitle'
+ context[element] = output
+ if element == 'link':
+ context['links'][-1]['href'] = output
+ elif self.incontent:
+ contentparams = copy.deepcopy(self.contentparams)
+ contentparams['value'] = output
+ context[element + '_detail'] = contentparams
+ return output
+
+ def pushContent(self, tag, attrsD, defaultContentType, expectingText):
+ self.incontent += 1
+ self.contentparams = FeedParserDict({
+ 'type': self.mapContentType(attrsD.get('type', defaultContentType)),
+ 'language': self.lang,
+ 'base': self.baseuri})
+ self.contentparams['base64'] = self._isBase64(attrsD, self.contentparams)
+ self.push(tag, expectingText)
+
+ def popContent(self, tag):
+ value = self.pop(tag)
+ self.incontent -= 1
+ self.contentparams.clear()
+ return value
+
+ def _mapToStandardPrefix(self, name):
+ colonpos = name.find(':')
+ if colonpos <> -1:
+ prefix = name[:colonpos]
+ suffix = name[colonpos+1:]
+ prefix = self.namespacemap.get(prefix, prefix)
+ name = prefix + ':' + suffix
+ return name
+
+ def _getAttribute(self, attrsD, name):
+ return attrsD.get(self._mapToStandardPrefix(name))
+
+ def _isBase64(self, attrsD, contentparams):
+ if attrsD.get('mode', '') == 'base64':
+ return 1
+ if self.contentparams['type'].startswith('text/'):
+ return 0
+ if self.contentparams['type'].endswith('+xml'):
+ return 0
+ if self.contentparams['type'].endswith('/xml'):
+ return 0
+ return 1
+
+ def _itsAnHrefDamnIt(self, attrsD):
+ href = attrsD.get('url', attrsD.get('uri', attrsD.get('href', None)))
+ if href:
+ try:
+ del attrsD['url']
+ except KeyError:
+ pass
+ try:
+ del attrsD['uri']
+ except KeyError:
+ pass
+ attrsD['href'] = href
+ return attrsD
+
+ def _save(self, key, value):
+ context = self._getContext()
+ context.setdefault(key, value)
+
+ def _start_rss(self, attrsD):
+ versionmap = {'0.91': 'rss091u',
+ '0.92': 'rss092',
+ '0.93': 'rss093',
+ '0.94': 'rss094'}
+ if not self.version:
+ attr_version = attrsD.get('version', '')
+ version = versionmap.get(attr_version)
+ if version:
+ self.version = version
+ elif attr_version.startswith('2.'):
+ self.version = 'rss20'
+ else:
+ self.version = 'rss'
+
+ def _start_dlhottitles(self, attrsD):
+ self.version = 'hotrss'
+
+ def _start_channel(self, attrsD):
+ self.infeed = 1
+ self._cdf_common(attrsD)
+ _start_feedinfo = _start_channel
+
+ def _cdf_common(self, attrsD):
+ if attrsD.has_key('lastmod'):
+ self._start_modified({})
+ self.elementstack[-1][-1] = attrsD['lastmod']
+ self._end_modified()
+ if attrsD.has_key('href'):
+ self._start_link({})
+ self.elementstack[-1][-1] = attrsD['href']
+ self._end_link()
+
+ def _start_feed(self, attrsD):
+ self.infeed = 1
+ versionmap = {'0.1': 'atom01',
+ '0.2': 'atom02',
+ '0.3': 'atom03'}
+ if not self.version:
+ attr_version = attrsD.get('version')
+ version = versionmap.get(attr_version)
+ if version:
+ self.version = version
+ else:
+ self.version = 'atom'
+
+ def _end_channel(self):
+ self.infeed = 0
+ _end_feed = _end_channel
+
+ def _start_image(self, attrsD):
+ self.inimage = 1
+ self.push('image', 0)
+ context = self._getContext()
+ context.setdefault('image', FeedParserDict())
+
+ def _end_image(self):
+ self.pop('image')
+ self.inimage = 0
+
+ def _start_textinput(self, attrsD):
+ self.intextinput = 1
+ self.push('textinput', 0)
+ context = self._getContext()
+ context.setdefault('textinput', FeedParserDict())
+ _start_textInput = _start_textinput
+
+ def _end_textinput(self):
+ self.pop('textinput')
+ self.intextinput = 0
+ _end_textInput = _end_textinput
+
+ def _start_author(self, attrsD):
+ self.inauthor = 1
+ self.push('author', 1)
+ _start_managingeditor = _start_author
+ _start_dc_author = _start_author
+ _start_dc_creator = _start_author
+ _start_itunes_author = _start_author
+
+ def _end_author(self):
+ self.pop('author')
+ self.inauthor = 0
+ self._sync_author_detail()
+ _end_managingeditor = _end_author
+ _end_dc_author = _end_author
+ _end_dc_creator = _end_author
+ _end_itunes_author = _end_author
+
+ def _start_itunes_owner(self, attrsD):
+ self.inpublisher = 1
+ self.push('publisher', 0)
+
+ def _end_itunes_owner(self):
+ self.pop('publisher')
+ self.inpublisher = 0
+ self._sync_author_detail('publisher')
+
+ def _start_contributor(self, attrsD):
+ self.incontributor = 1
+ context = self._getContext()
+ context.setdefault('contributors', [])
+ context['contributors'].append(FeedParserDict())
+ self.push('contributor', 0)
+
+ def _end_contributor(self):
+ self.pop('contributor')
+ self.incontributor = 0
+
+ def _start_dc_contributor(self, attrsD):
+ self.incontributor = 1
+ context = self._getContext()
+ context.setdefault('contributors', [])
+ context['contributors'].append(FeedParserDict())
+ self.push('name', 0)
+
+ def _end_dc_contributor(self):
+ self._end_name()
+ self.incontributor = 0
+
+ def _start_name(self, attrsD):
+ self.push('name', 0)
+ _start_itunes_name = _start_name
+
+ def _end_name(self):
+ value = self.pop('name')
+ if self.inpublisher:
+ self._save_author('name', value, 'publisher')
+ elif self.inauthor:
+ self._save_author('name', value)
+ elif self.incontributor:
+ self._save_contributor('name', value)
+ elif self.intextinput:
+ context = self._getContext()
+ context['textinput']['name'] = value
+ _end_itunes_name = _end_name
+
+ def _start_width(self, attrsD):
+ self.push('width', 0)
+
+ def _end_width(self):
+ value = self.pop('width')
+ try:
+ value = int(value)
+ except:
+ value = 0
+ if self.inimage:
+ context = self._getContext()
+ context['image']['width'] = value
+
+ def _start_height(self, attrsD):
+ self.push('height', 0)
+
+ def _end_height(self):
+ value = self.pop('height')
+ try:
+ value = int(value)
+ except:
+ value = 0
+ if self.inimage:
+ context = self._getContext()
+ context['image']['height'] = value
+
+ def _start_url(self, attrsD):
+ self.push('href', 1)
+ _start_homepage = _start_url
+ _start_uri = _start_url
+
+ def _end_url(self):
+ value = self.pop('href')
+ if self.inauthor:
+ self._save_author('href', value)
+ elif self.incontributor:
+ self._save_contributor('href', value)
+ elif self.inimage:
+ context = self._getContext()
+ context['image']['href'] = value
+ elif self.intextinput:
+ context = self._getContext()
+ context['textinput']['link'] = value
+ _end_homepage = _end_url
+ _end_uri = _end_url
+
+ def _start_email(self, attrsD):
+ self.push('email', 0)
+ _start_itunes_email = _start_email
+
+ def _end_email(self):
+ value = self.pop('email')
+ if self.inpublisher:
+ self._save_author('email', value, 'publisher')
+ elif self.inauthor:
+ self._save_author('email', value)
+ elif self.incontributor:
+ self._save_contributor('email', value)
+ _end_itunes_email = _end_email
+
+ def _getContext(self):
+ if self.insource:
+ context = self.sourcedata
+ elif self.inentry:
+ context = self.entries[-1]
+ else:
+ context = self.feeddata
+ return context
+
+ def _save_author(self, key, value, prefix='author'):
+ context = self._getContext()
+ context.setdefault(prefix + '_detail', FeedParserDict())
+ context[prefix + '_detail'][key] = value
+ self._sync_author_detail()
+
+ def _save_contributor(self, key, value):
+ context = self._getContext()
+ context.setdefault('contributors', [FeedParserDict()])
+ context['contributors'][-1][key] = value
+
+ def _sync_author_detail(self, key='author'):
+ context = self._getContext()
+ detail = context.get('%s_detail' % key)
+ if detail:
+ name = detail.get('name')
+ email = detail.get('email')
+ if name and email:
+ context[key] = '%s (%s)' % (name, email)
+ elif name:
+ context[key] = name
+ elif email:
+ context[key] = email
+ else:
+ author = context.get(key)
+ if not author: return
+ emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))''', author)
+ if not emailmatch: return
+ email = emailmatch.group(0)
+ # probably a better way to do the following, but it passes all the tests
+ author = author.replace(email, '')
+ author = author.replace('()', '')
+ author = author.strip()
+ if author and (author[0] == '('):
+ author = author[1:]
+ if author and (author[-1] == ')'):
+ author = author[:-1]
+ author = author.strip()
+ context.setdefault('%s_detail' % key, FeedParserDict())
+ context['%s_detail' % key]['name'] = author
+ context['%s_detail' % key]['email'] = email
+
+ def _start_subtitle(self, attrsD):
+ self.pushContent('subtitle', attrsD, 'text/plain', 1)
+ _start_tagline = _start_subtitle
+ _start_itunes_subtitle = _start_subtitle
+
+ def _end_subtitle(self):
+ self.popContent('subtitle')
+ _end_tagline = _end_subtitle
+ _end_itunes_subtitle = _end_subtitle
+
+ def _start_rights(self, attrsD):
+ self.pushContent('rights', attrsD, 'text/plain', 1)
+ _start_dc_rights = _start_rights
+ _start_copyright = _start_rights
+
+ def _end_rights(self):
+ self.popContent('rights')
+ _end_dc_rights = _end_rights
+ _end_copyright = _end_rights
+
+ def _start_item(self, attrsD):
+ self.entries.append(FeedParserDict())
+ self.push('item', 0)
+ self.inentry = 1
+ self.guidislink = 0
+ id = self._getAttribute(attrsD, 'rdf:about')
+ if id:
+ context = self._getContext()
+ context['id'] = id
+ self._cdf_common(attrsD)
+ _start_entry = _start_item
+ _start_product = _start_item
+
+ def _end_item(self):
+ self.pop('item')
+ self.inentry = 0
+ _end_entry = _end_item
+
+ def _start_dc_language(self, attrsD):
+ self.push('language', 1)
+ _start_language = _start_dc_language
+
+ def _end_dc_language(self):
+ self.lang = self.pop('language')
+ _end_language = _end_dc_language
+
+ def _start_dc_publisher(self, attrsD):
+ self.push('publisher', 1)
+ _start_webmaster = _start_dc_publisher
+
+ def _end_dc_publisher(self):
+ self.pop('publisher')
+ self._sync_author_detail('publisher')
+ _end_webmaster = _end_dc_publisher
+
+ def _start_published(self, attrsD):
+ self.push('published', 1)
+ _start_dcterms_issued = _start_published
+ _start_issued = _start_published
+
+ def _end_published(self):
+ value = self.pop('published')
+ self._save('published_parsed', _parse_date(value))
+ _end_dcterms_issued = _end_published
+ _end_issued = _end_published
+
+ def _start_updated(self, attrsD):
+ self.push('updated', 1)
+ _start_modified = _start_updated
+ _start_dcterms_modified = _start_updated
+ _start_pubdate = _start_updated
+ _start_dc_date = _start_updated
+
+ def _end_updated(self):
+ value = self.pop('updated')
+ parsed_value = _parse_date(value)
+ self._save('updated_parsed', parsed_value)
+ _end_modified = _end_updated
+ _end_dcterms_modified = _end_updated
+ _end_pubdate = _end_updated
+ _end_dc_date = _end_updated
+
+ def _start_created(self, attrsD):
+ self.push('created', 1)
+ _start_dcterms_created = _start_created
+
+ def _end_created(self):
+ value = self.pop('created')
+ self._save('created_parsed', _parse_date(value))
+ _end_dcterms_created = _end_created
+
+ def _start_expirationdate(self, attrsD):
+ self.push('expired', 1)
+
+ def _end_expirationdate(self):
+ self._save('expired_parsed', _parse_date(self.pop('expired')))
+
+ def _start_cc_license(self, attrsD):
+ self.push('license', 1)
+ value = self._getAttribute(attrsD, 'rdf:resource')
+ if value:
+ self.elementstack[-1][2].append(value)
+ self.pop('license')
+
+ def _start_creativecommons_license(self, attrsD):
+ self.push('license', 1)
+
+ def _end_creativecommons_license(self):
+ self.pop('license')
+
+ def _addTag(self, term, scheme, label):
+ context = self._getContext()
+ tags = context.setdefault('tags', [])
+ if (not term) and (not scheme) and (not label): return
+ value = FeedParserDict({'term': term, 'scheme': scheme, 'label': label})
+ if value not in tags:
+ tags.append(FeedParserDict({'term': term, 'scheme': scheme, 'label': label}))
+
+ def _start_category(self, attrsD):
+ if _debug: sys.stderr.write('entering _start_category with %s\n' % repr(attrsD))
+ term = attrsD.get('term')
+ scheme = attrsD.get('scheme', attrsD.get('domain'))
+ label = attrsD.get('label')
+ self._addTag(term, scheme, label)
+ self.push('category', 1)
+ _start_dc_subject = _start_category
+ _start_keywords = _start_category
+
+ def _end_itunes_keywords(self):
+ for term in self.pop('itunes_keywords').split():
+ self._addTag(term, 'http://www.itunes.com/', None)
+
+ def _start_itunes_category(self, attrsD):
+ self._addTag(attrsD.get('text'), 'http://www.itunes.com/', None)
+ self.push('category', 1)
+
+ def _end_category(self):
+ value = self.pop('category')
+ if not value: return
+ context = self._getContext()
+ tags = context['tags']
+ if value and len(tags) and not tags[-1]['term']:
+ tags[-1]['term'] = value
+ else:
+ self._addTag(value, None, None)
+ _end_dc_subject = _end_category
+ _end_keywords = _end_category
+ _end_itunes_category = _end_category
+
+ def _start_cloud(self, attrsD):
+ self._getContext()['cloud'] = FeedParserDict(attrsD)
+
+ def _start_link(self, attrsD):
+ attrsD.setdefault('rel', 'alternate')
+ attrsD.setdefault('type', 'text/html')
+ attrsD = self._itsAnHrefDamnIt(attrsD)
+ if attrsD.has_key('href'):
+ attrsD['href'] = self.resolveURI(attrsD['href'])
+ expectingText = self.infeed or self.inentry or self.insource
+ context = self._getContext()
+ context.setdefault('links', [])
+ context['links'].append(FeedParserDict(attrsD))
+ if attrsD['rel'] == 'enclosure':
+ self._start_enclosure(attrsD)
+ if attrsD.has_key('href'):
+ expectingText = 0
+ if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
+ context['link'] = attrsD['href']
+ else:
+ self.push('link', expectingText)
+ _start_producturl = _start_link
+
+ def _end_link(self):
+ value = self.pop('link')
+ context = self._getContext()
+ if self.intextinput:
+ context['textinput']['link'] = value
+ if self.inimage:
+ context['image']['link'] = value
+ _end_producturl = _end_link
+
+ def _start_guid(self, attrsD):
+ self.guidislink = (attrsD.get('ispermalink', 'true') == 'true')
+ self.push('id', 1)
+
+ def _end_guid(self):
+ value = self.pop('id')
+ self._save('guidislink', self.guidislink and not self._getContext().has_key('link'))
+ if self.guidislink:
+ # guid acts as link, but only if 'ispermalink' is not present or is 'true',
+ # and only if the item doesn't already have a link element
+ self._save('link', value)
+
+ def _start_title(self, attrsD):
+ self.pushContent('title', attrsD, 'text/plain', self.infeed or self.inentry or self.insource)
+ _start_dc_title = _start_title
+ _start_media_title = _start_title
+
+ def _end_title(self):
+ value = self.popContent('title')
+ context = self._getContext()
+ if self.intextinput:
+ context['textinput']['title'] = value
+ elif self.inimage:
+ context['image']['title'] = value
+ _end_dc_title = _end_title
+ _end_media_title = _end_title
+
+ def _start_description(self, attrsD):
+ context = self._getContext()
+ if context.has_key('summary'):
+ self._summaryKey = 'content'
+ self._start_content(attrsD)
+ else:
+ self.pushContent('description', attrsD, 'text/html', self.infeed or self.inentry or self.insource)
+
+ def _start_abstract(self, attrsD):
+ self.pushContent('description', attrsD, 'text/plain', self.infeed or self.inentry or self.insource)
+
+ def _end_description(self):
+ if self._summaryKey == 'content':
+ self._end_content()
+ else:
+ value = self.popContent('description')
+ context = self._getContext()
+ if self.intextinput:
+ context['textinput']['description'] = value
+ elif self.inimage:
+ context['image']['description'] = value
+ self._summaryKey = None
+ _end_abstract = _end_description
+
+ def _start_info(self, attrsD):
+ self.pushContent('info', attrsD, 'text/plain', 1)
+ _start_feedburner_browserfriendly = _start_info
+
+ def _end_info(self):
+ self.popContent('info')
+ _end_feedburner_browserfriendly = _end_info
+
+ def _start_generator(self, attrsD):
+ if attrsD:
+ attrsD = self._itsAnHrefDamnIt(attrsD)
+ if attrsD.has_key('href'):
+ attrsD['href'] = self.resolveURI(attrsD['href'])
+ self._getContext()['generator_detail'] = FeedParserDict(attrsD)
+ self.push('generator', 1)
+
+ def _end_generator(self):
+ value = self.pop('generator')
+ context = self._getContext()
+ if context.has_key('generator_detail'):
+ context['generator_detail']['name'] = value
+
+ def _start_admin_generatoragent(self, attrsD):
+ self.push('generator', 1)
+ value = self._getAttribute(attrsD, 'rdf:resource')
+ if value:
+ self.elementstack[-1][2].append(value)
+ self.pop('generator')
+ self._getContext()['generator_detail'] = FeedParserDict({'href': value})
+
+ def _start_admin_errorreportsto(self, attrsD):
+ self.push('errorreportsto', 1)
+ value = self._getAttribute(attrsD, 'rdf:resource')
+ if value:
+ self.elementstack[-1][2].append(value)
+ self.pop('errorreportsto')
+
+ def _start_summary(self, attrsD):
+ context = self._getContext()
+ if context.has_key('summary'):
+ self._summaryKey = 'content'
+ self._start_content(attrsD)
+ else:
+ self._summaryKey = 'summary'
+ self.pushContent(self._summaryKey, attrsD, 'text/plain', 1)
+ _start_itunes_summary = _start_summary
+
+ def _end_summary(self):
+ if self._summaryKey == 'content':
+ self._end_content()
+ else:
+ self.popContent(self._summaryKey or 'summary')
+ self._summaryKey = None
+ _end_itunes_summary = _end_summary
+
+ def _start_enclosure(self, attrsD):
+ attrsD = self._itsAnHrefDamnIt(attrsD)
+ self._getContext().setdefault('enclosures', []).append(FeedParserDict(attrsD))
+ href = attrsD.get('href')
+ if href:
+ context = self._getContext()
+ if not context.get('id'):
+ context['id'] = href
+
+ def _start_source(self, attrsD):
+ self.insource = 1
+
+ def _end_source(self):
+ self.insource = 0
+ self._getContext()['source'] = copy.deepcopy(self.sourcedata)
+ self.sourcedata.clear()
+
+ def _start_content(self, attrsD):
+ self.pushContent('content', attrsD, 'text/plain', 1)
+ src = attrsD.get('src')
+ if src:
+ self.contentparams['src'] = src
+ self.push('content', 1)
+
+ def _start_prodlink(self, attrsD):
+ self.pushContent('content', attrsD, 'text/html', 1)
+
+ def _start_body(self, attrsD):
+ self.pushContent('content', attrsD, 'application/xhtml+xml', 1)
+ _start_xhtml_body = _start_body
+
+ def _start_content_encoded(self, attrsD):
+ self.pushContent('content', attrsD, 'text/html', 1)
+ _start_fullitem = _start_content_encoded
+
+ def _end_content(self):
+ copyToDescription = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types)
+ value = self.popContent('content')
+ if copyToDescription:
+ self._save('description', value)
+ _end_body = _end_content
+ _end_xhtml_body = _end_content
+ _end_content_encoded = _end_content
+ _end_fullitem = _end_content
+ _end_prodlink = _end_content
+
+ def _start_itunes_image(self, attrsD):
+ self.push('itunes_image', 0)
+ self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')})
+ _start_itunes_link = _start_itunes_image
+
+ def _end_itunes_block(self):
+ value = self.pop('itunes_block', 0)
+ self._getContext()['itunes_block'] = (value == 'yes') and 1 or 0
+
+ def _end_itunes_explicit(self):
+ value = self.pop('itunes_explicit', 0)
+ self._getContext()['itunes_explicit'] = (value == 'yes') and 1 or 0
+
+if _XML_AVAILABLE:
+ class _StrictFeedParser(_FeedParserMixin, xml.sax.handler.ContentHandler):
+ def __init__(self, baseuri, baselang, encoding):
+ if _debug: sys.stderr.write('trying StrictFeedParser\n')
+ xml.sax.handler.ContentHandler.__init__(self)
+ _FeedParserMixin.__init__(self, baseuri, baselang, encoding)
+ self.bozo = 0
+ self.exc = None
+
+ def startPrefixMapping(self, prefix, uri):
+ self.trackNamespace(prefix, uri)
+
+ def startElementNS(self, name, qname, attrs):
+ namespace, localname = name
+ lowernamespace = str(namespace or '').lower()
+ if lowernamespace.find('backend.userland.com/rss') <> -1:
+ # match any backend.userland.com namespace
+ namespace = 'http://backend.userland.com/rss'
+ lowernamespace = namespace
+ if qname and qname.find(':') > 0:
+ givenprefix = qname.split(':')[0]
+ else:
+ givenprefix = None
+ prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
+ if givenprefix and (prefix == None or (prefix == '' and lowernamespace == '')) and not self.namespacesInUse.has_key(givenprefix):
+ raise UndeclaredNamespace, "'%s' is not associated with a namespace" % givenprefix
+ if prefix:
+ localname = prefix + ':' + localname
+ localname = str(localname).lower()
+ if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname))
+
+ # qname implementation is horribly broken in Python 2.1 (it
+ # doesn't report any), and slightly broken in Python 2.2 (it
+ # doesn't report the xml: namespace). So we match up namespaces
+ # with a known list first, and then possibly override them with
+ # the qnames the SAX parser gives us (if indeed it gives us any
+ # at all). Thanks to MatejC for helping me test this and
+ # tirelessly telling me that it didn't work yet.
+ attrsD = {}
+ for (namespace, attrlocalname), attrvalue in attrs._attrs.items():
+ lowernamespace = (namespace or '').lower()
+ prefix = self._matchnamespaces.get(lowernamespace, '')
+ if prefix:
+ attrlocalname = prefix + ':' + attrlocalname
+ attrsD[str(attrlocalname).lower()] = attrvalue
+ for qname in attrs.getQNames():
+ attrsD[str(qname).lower()] = attrs.getValueByQName(qname)
+ self.unknown_starttag(localname, attrsD.items())
+
+ def characters(self, text):
+ self.handle_data(text)
+
+ def endElementNS(self, name, qname):
+ namespace, localname = name
+ lowernamespace = str(namespace or '').lower()
+ if qname and qname.find(':') > 0:
+ givenprefix = qname.split(':')[0]
+ else:
+ givenprefix = ''
+ prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
+ if prefix:
+ localname = prefix + ':' + localname
+ localname = str(localname).lower()
+ self.unknown_endtag(localname)
+
+ def error(self, exc):
+ self.bozo = 1
+ self.exc = exc
+
+ def fatalError(self, exc):
+ self.error(exc)
+ raise exc
+
+class _BaseHTMLProcessor(sgmllib.SGMLParser):
+ elements_no_end_tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
+ 'img', 'input', 'isindex', 'link', 'meta', 'param']
+
+ def __init__(self, encoding):
+ self.encoding = encoding
+ if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding)
+ sgmllib.SGMLParser.__init__(self)
+
+ def reset(self):
+ self.pieces = []
+ sgmllib.SGMLParser.reset(self)
+
+ def _shorttag_replace(self, match):
+ tag = match.group(1)
+ if tag in self.elements_no_end_tag:
+ return '<' + tag + ' />'
+ else:
+ return '<' + tag + '>' + tag + '>'
+
+ def feed(self, data):
+ data = re.compile(r'', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace
+ data = re.sub(r'<([^<\s]+?)\s*/>', self._shorttag_replace, data)
+ data = data.replace(''', "'")
+ data = data.replace('"', '"')
+ if self.encoding and type(data) == type(u''):
+ data = data.encode(self.encoding)
+ sgmllib.SGMLParser.feed(self, data)
+ sgmllib.SGMLParser.close(self)
+
+ def normalize_attrs(self, attrs):
+ # utility method to be called by descendants
+ attrs = [(k.lower(), v) for k, v in attrs]
+ attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs]
+ return attrs
+
+ def unknown_starttag(self, tag, attrs):
+ # called for each start tag
+ # attrs is a list of (attr, value) tuples
+ # e.g. for
, tag='pre', attrs=[('class', 'screen')]
+ if _debug: sys.stderr.write('_BaseHTMLProcessor, unknown_starttag, tag=%s\n' % tag)
+ uattrs = []
+ # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds
+ for key, value in attrs:
+ if type(value) != type(u''):
+ value = unicode(value, self.encoding)
+ uattrs.append((unicode(key, self.encoding), value))
+ strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs]).encode(self.encoding)
+ if tag in self.elements_no_end_tag:
+ self.pieces.append('<%(tag)s%(strattrs)s />' % locals())
+ else:
+ self.pieces.append('<%(tag)s%(strattrs)s>' % locals())
+
+ def unknown_endtag(self, tag):
+ # called for each end tag, e.g. for
+ HTMLTMPL WARNING:
+ Cannot include template: %s
+
+
+ """ % filename
+ self.DEB("CANNOT INCLUDE WARNING")
+
+ elif token == "." % token
+
+ elif DISABLE_OUTPUT not in output_control:
+ # Raw textual template data.
+ # If output of current block is not disabled, then
+ # append template data to the output buffer.
+ out += token
+
+ i += 1
+ # end of the big while loop
+
+ # Check whether all opening statements were closed.
+ if loop_name: raise TemplateError, "Missing ."
+ if output_control: raise TemplateError, "Missing or "
+ return out
+
+ ##############################################
+ # PRIVATE METHODS #
+ ##############################################
+
+ def DEB(self, str):
+ """ Print debugging message to stderr if debugging is enabled.
+ @hidden
+ """
+ if self._debug: print >> sys.stderr, str
+
+ def find_value(self, var, loop_name, loop_pass, loop_total,
+ global_override=None):
+ """ Search the self._vars data structure to find variable var
+ located in currently processed pass of a loop which
+ is currently being processed. If the variable is an ordinary
+ variable, then return it.
+
+ If the variable is an identificator of a loop, then
+ return the total number of times this loop will
+ be executed.
+
+ Return an empty string, if the variable is not
+ found at all.
+
+ @hidden
+ """
+ # Search for the requested variable in magic vars if the name
+ # of the variable starts with "__" and if we are inside a loop.
+ if self._magic_vars and var.startswith("__") and loop_name:
+ return self.magic_var(var, loop_pass[-1], loop_total[-1])
+
+ # Search for an ordinary variable or for a loop.
+ # Recursively search in self._vars for the requested variable.
+ scope = self._vars
+ globals = []
+ for i in range(len(loop_name)):
+ # If global lookup is on then push the value on the stack.
+ if ((self._global_vars and global_override != "0") or \
+ global_override == "1") and scope.has_key(var) and \
+ self.is_ordinary_var(scope[var]):
+ globals.append(scope[var])
+
+ # Descent deeper into the hierarchy.
+ if scope.has_key(loop_name[i]) and scope[loop_name[i]]:
+ scope = scope[loop_name[i]][loop_pass[i]]
+ else:
+ return ""
+
+ if scope.has_key(var):
+ # Value exists in current loop.
+ if type(scope[var]) == ListType:
+ # The requested value is a loop.
+ # Return total number of its passes.
+ return len(scope[var])
+ else:
+ return scope[var]
+ elif globals and \
+ ((self._global_vars and global_override != "0") or \
+ global_override == "1"):
+ # Return globally looked up value.
+ return globals.pop()
+ else:
+ # No value found.
+ if var[0].isupper():
+ # This is a loop name.
+ # Return zero, because the user wants to know number
+ # of its passes.
+ return 0
+ else:
+ return ""
+
+ def magic_var(self, var, loop_pass, loop_total):
+ """ Resolve and return value of a magic variable.
+ Raise an exception if the magic variable is not recognized.
+
+ @hidden
+ """
+ self.DEB("MAGIC: '%s', PASS: %d, TOTAL: %d"\
+ % (var, loop_pass, loop_total))
+ if var == "__FIRST__":
+ if loop_pass == 0:
+ return 1
+ else:
+ return 0
+ elif var == "__LAST__":
+ if loop_pass == loop_total - 1:
+ return 1
+ else:
+ return 0
+ elif var == "__INNER__":
+ # If this is neither the first nor the last pass.
+ if loop_pass != 0 and loop_pass != loop_total - 1:
+ return 1
+ else:
+ return 0
+ elif var == "__PASS__":
+ # Magic variable __PASS__ counts passes from one.
+ return loop_pass + 1
+ elif var == "__PASSTOTAL__":
+ return loop_total
+ elif var == "__ODD__":
+ # Internally pass numbers stored in loop_pass are counted from
+ # zero. But the template language presents them counted from one.
+ # Therefore we must add one to the actual loop_pass value to get
+ # the value we present to the user.
+ if (loop_pass + 1) % 2 != 0:
+ return 1
+ else:
+ return 0
+ elif var.startswith("__EVERY__"):
+ # Magic variable __EVERY__x is never true in first or last pass.
+ if loop_pass != 0 and loop_pass != loop_total - 1:
+ # Check if an integer follows the variable name.
+ try:
+ every = int(var[9:]) # nine is length of "__EVERY__"
+ except ValueError:
+ raise TemplateError, "Magic variable __EVERY__x: "\
+ "Invalid pass number."
+ else:
+ if not every:
+ raise TemplateError, "Magic variable __EVERY__x: "\
+ "Pass number cannot be zero."
+ elif (loop_pass + 1) % every == 0:
+ self.DEB("MAGIC: EVERY: " + str(every))
+ return 1
+ else:
+ return 0
+ else:
+ return 0
+ else:
+ raise TemplateError, "Invalid magic variable '%s'." % var
+
+ def escape(self, str, override=""):
+ """ Escape a string either by HTML escaping or by URL escaping.
+ @hidden
+ """
+ ESCAPE_QUOTES = 1
+ if (self._html_escape and override != "NONE" and override != "0" and \
+ override != "URL") or override == "HTML" or override == "1":
+ return cgi.escape(str, ESCAPE_QUOTES)
+ elif override == "URL":
+ return urllib.quote_plus(str)
+ else:
+ return str
+
+ def is_ordinary_var(self, var):
+ """ Return true if var is a scalar. (not a reference to loop)
+ @hidden
+ """
+ if type(var) == StringType or type(var) == IntType or \
+ type(var) == LongType or type(var) == FloatType:
+ return 1
+ else:
+ return 0
+
+
+##############################################
+# CLASS: TemplateCompiler #
+##############################################
+
+class TemplateCompiler:
+ """ Preprocess, parse, tokenize and compile the template.
+
+ This class parses the template and produces a 'compiled' form
+ of it. This compiled form is an instance of the Template
+ class. The compiled form is used as input for the TemplateProcessor
+ which uses it to actually process the template.
+
+ This class should be used direcly only when you need to compile
+ a template from a string. If your template is in a file, then you
+ should use the TemplateManager class which provides
+ a higher level interface to this class and also can save the
+ compiled template to disk in a precompiled form.
+ """
+
+ def __init__(self, include=1, max_include=5, comments=1, gettext=0,
+ debug=0):
+ """ Constructor.
+
+ @header __init__(include=1, max_include=5, comments=1, gettext=0,
+ debug=0)
+
+ @param include Enable or disable included templates.
+ @param max_include Maximum depth of nested inclusions.
+ @param comments Enable or disable template comments.
+ @param gettext Enable or disable gettext support.
+ @param debug Enable or disable debugging messages.
+ """
+
+ self._include = include
+ self._max_include = max_include
+ self._comments = comments
+ self._gettext = gettext
+ self._debug = debug
+
+ # This is a list of filenames of all included templates.
+ # It's modified by the include_templates() method.
+ self._include_files = []
+
+ # This is a counter of current inclusion depth. It's used to prevent
+ # infinite recursive includes.
+ self._include_level = 0
+
+ def compile(self, file):
+ """ Compile template from a file.
+
+ @header compile(file)
+ @return Compiled template.
+ The return value is an instance of the Template
+ class.
+
+ @param file Filename of the template.
+ See the prepare() method of the TemplateManager
+ class for exaplanation of this parameter.
+ """
+
+ self.DEB("COMPILING FROM FILE: " + file)
+ self._include_path = os.path.join(os.path.dirname(file), INCLUDE_DIR)
+ tokens = self.parse(self.read(file))
+ compile_params = (self._include, self._max_include, self._comments,
+ self._gettext)
+ return Template(__version__, file, self._include_files,
+ tokens, compile_params, self._debug)
+
+ def compile_string(self, data):
+ """ Compile template from a string.
+
+ This method compiles a template from a string. The
+ template cannot include any templates.
+ TMPL_INCLUDE statements are turned into warnings.
+
+ @header compile_string(data)
+ @return Compiled template.
+ The return value is an instance of the Template
+ class.
+
+ @param data String containing the template data.
+ """
+ self.DEB("COMPILING FROM STRING")
+ self._include = 0
+ tokens = self.parse(data)
+ compile_params = (self._include, self._max_include, self._comments,
+ self._gettext)
+ return Template(__version__, None, None, tokens, compile_params,
+ self._debug)
+
+ ##############################################
+ # PRIVATE METHODS #
+ ##############################################
+
+ def DEB(self, str):
+ """ Print debugging message to stderr if debugging is enabled.
+ @hidden
+ """
+ if self._debug: print >> sys.stderr, str
+
+ def read(self, filename):
+ """ Read content of file and return it. Raise an error if a problem
+ occurs.
+ @hidden
+ """
+ self.DEB("READING: " + filename)
+ try:
+ f = None
+ try:
+ f = open(filename, "r")
+ data = f.read()
+ except IOError, (errno, errstr):
+ raise TemplateError, "IO error while reading template '%s': "\
+ "(%d) %s" % (filename, errno, errstr)
+ else:
+ return data
+ finally:
+ if f: f.close()
+
+ def parse(self, template_data):
+ """ Parse the template. This method is recursively called from
+ within the include_templates() method.
+
+ @return List of processing tokens.
+ @hidden
+ """
+ if self._comments:
+ self.DEB("PREPROCESS: COMMENTS")
+ template_data = self.remove_comments(template_data)
+ tokens = self.tokenize(template_data)
+ if self._include:
+ self.DEB("PREPROCESS: INCLUDES")
+ self.include_templates(tokens)
+ return tokens
+
+ def remove_comments(self, template_data):
+ """ Remove comments from the template data.
+ @hidden
+ """
+ pattern = r"### .*"
+ return re.sub(pattern, "", template_data)
+
+ def include_templates(self, tokens):
+ """ Process TMPL_INCLUDE statements. Use the include_level counter
+ to prevent infinite recursion. Record paths to all included
+ templates to self._include_files.
+ @hidden
+ """
+ i = 0
+ out = "" # buffer for output
+ skip_params = 0
+
+ # Process the list of tokens.
+ while 1:
+ if i == len(tokens): break
+ if skip_params:
+ skip_params = 0
+ i += PARAMS_NUMBER
+ continue
+
+ token = tokens[i]
+ if token == "."
+ self._include_level += 1
+ if self._include_level > self._max_include:
+ # Do not include the template.
+ # Protection against infinite recursive includes.
+ skip_params = 1
+ self.DEB("INCLUDE: LIMIT REACHED: " + filename)
+ else:
+ # Include the template.
+ skip_params = 0
+ include_file = os.path.join(self._include_path, filename)
+ self._include_files.append(include_file)
+ include_data = self.read(include_file)
+ include_tokens = self.parse(include_data)
+
+ # Append the tokens from the included template to actual
+ # position in the tokens list, replacing the TMPL_INCLUDE
+ # token and its parameters.
+ tokens[i:i+PARAMS_NUMBER+1] = include_tokens
+ i = i + len(include_tokens)
+ self.DEB("INCLUDED: " + filename)
+ continue # Do not increment 'i' below.
+ i += 1
+ # end of the main while loop
+
+ if self._include_level > 0: self._include_level -= 1
+ return out
+
+ def tokenize(self, template_data):
+ """ Split the template into tokens separated by template statements.
+ The statements itself and associated parameters are also
+ separately included in the resulting list of tokens.
+ Return list of the tokens.
+
+ @hidden
+ """
+ self.DEB("TOKENIZING TEMPLATE")
+ # NOTE: The TWO double quotes in character class in the regexp below
+ # are there only to prevent confusion of syntax highlighter in Emacs.
+ pattern = r"""
+ (?:^[ \t]+)? # eat spaces, tabs (opt.)
+ (<
+ (?:!--[ ])? # comment start + space (opt.)
+ /?TMPL_[A-Z]+ # closing slash / (opt.) + statement
+ [ a-zA-Z0-9""/.=:_\\-]* # this spans also comments ending (--)
+ >)
+ [%s]? # eat trailing newline (opt.)
+ """ % os.linesep
+ rc = re.compile(pattern, re.VERBOSE | re.MULTILINE)
+ split = rc.split(template_data)
+ tokens = []
+ for statement in split:
+ if statement.startswith(" 0 and '=' not in params[0]:
+ # implicit identifier
+ name = params[0]
+ del params[0]
+ else:
+ # explicit identifier as a 'NAME' parameter
+ name = self.find_param("NAME", params)
+ self.DEB("TOKENIZER: NAME: " + str(name))
+ return name
+
+ def find_param(self, param, params):
+ """ Extract value of parameter from a statement.
+ @hidden
+ """
+ for pair in params:
+ name, value = pair.split("=")
+ if not name or not value:
+ raise TemplateError, "Syntax error in template."
+ if name == param:
+ if value[0] == '"':
+ # The value is in double quotes.
+ ret_value = value[1:-1]
+ else:
+ # The value is without double quotes.
+ ret_value = value
+ self.DEB("TOKENIZER: PARAM: '%s' => '%s'" % (param, ret_value))
+ return ret_value
+ else:
+ self.DEB("TOKENIZER: PARAM: '%s' => NOT DEFINED" % param)
+ return None
+
+
+##############################################
+# CLASS: Template #
+##############################################
+
+class Template:
+ """ This class represents a compiled template.
+
+ This class provides storage and methods for the compiled template
+ and associated metadata. It's serialized by pickle if we need to
+ save the compiled template to disk in a precompiled form.
+
+ You should never instantiate this class directly. Always use the
+ TemplateManager or TemplateCompiler classes to
+ create the instances of this class.
+
+ The only method which you can directly use is the is_uptodate
+ method.
+ """
+
+ def __init__(self, version, file, include_files, tokens, compile_params,
+ debug=0):
+ """ Constructor.
+ @hidden
+ """
+ self._version = version
+ self._file = file
+ self._tokens = tokens
+ self._compile_params = compile_params
+ self._debug = debug
+ self._mtime = None
+ self._include_mtimes = {}
+
+ if not file:
+ self.DEB("TEMPLATE WAS COMPILED FROM A STRING")
+ return
+
+ # Save modifitcation time of the main template file.
+ if os.path.isfile(file):
+ self._mtime = os.path.getmtime(file)
+ else:
+ raise TemplateError, "Template: file does not exist: '%s'" % file
+
+ # Save modificaton times of all included template files.
+ for inc_file in include_files:
+ if os.path.isfile(inc_file):
+ self._include_mtimes[inc_file] = os.path.getmtime(inc_file)
+ else:
+ raise TemplateError, "Template: file does not exist: '%s'"\
+ % inc_file
+
+ self.DEB("NEW TEMPLATE CREATED")
+
+ def is_uptodate(self, compile_params=None):
+ """ Check whether the compiled template is uptodate.
+
+ Return true if this compiled template is uptodate.
+ Return false, if the template source file was changed on the
+ disk since it was compiled.
+ Works by comparison of modification times.
+ Also takes modification times of all included templates
+ into account.
+
+ @header is_uptodate(compile_params=None)
+ @return True if the template is uptodate, false otherwise.
+
+ @param compile_params Only for internal use.
+ Do not use this optional parameter. It's intended only for
+ internal use by the TemplateManager.
+ """
+ if not self._file:
+ self.DEB("TEMPLATE COMPILED FROM A STRING")
+ return 0
+
+ if self._version != __version__:
+ self.DEB("TEMPLATE: VERSION NOT UPTODATE")
+ return 0
+
+ if compile_params != None and compile_params != self._compile_params:
+ self.DEB("TEMPLATE: DIFFERENT COMPILATION PARAMS")
+ return 0
+
+ # Check modification times of the main template and all included
+ # templates. If the included template no longer exists, then
+ # the problem will be resolved when the template is recompiled.
+
+ # Main template file.
+ if not (os.path.isfile(self._file) and \
+ self._mtime == os.path.getmtime(self._file)):
+ self.DEB("TEMPLATE: NOT UPTODATE: " + self._file)
+ return 0
+
+ # Included templates.
+ for inc_file in self._include_mtimes.keys():
+ if not (os.path.isfile(inc_file) and \
+ self._include_mtimes[inc_file] == \
+ os.path.getmtime(inc_file)):
+ self.DEB("TEMPLATE: NOT UPTODATE: " + inc_file)
+ return 0
+ else:
+ self.DEB("TEMPLATE: UPTODATE")
+ return 1
+
+ def tokens(self):
+ """ Get tokens of this template.
+ @hidden
+ """
+ return self._tokens
+
+ def file(self):
+ """ Get filename of the main file of this template.
+ @hidden
+ """
+ return self._file
+
+ def debug(self, debug):
+ """ Get debugging state.
+ @hidden
+ """
+ self._debug = debug
+
+ ##############################################
+ # PRIVATE METHODS #
+ ##############################################
+
+ def __getstate__(self):
+ """ Used by pickle when the class is serialized.
+ Remove the 'debug' attribute before serialization.
+ @hidden
+ """
+ dict = copy.copy(self.__dict__)
+ del dict["_debug"]
+ return dict
+
+ def __setstate__(self, dict):
+ """ Used by pickle when the class is unserialized.
+ Add the 'debug' attribute.
+ @hidden
+ """
+ dict["_debug"] = 0
+ self.__dict__ = dict
+
+
+ def DEB(self, str):
+ """ Print debugging message to stderr.
+ @hidden
+ """
+ if self._debug: print >> sys.stderr, str
+
+
+##############################################
+# EXCEPTIONS #
+##############################################
+
+class TemplateError(Exception):
+ """ Fatal exception. Raised on runtime or template syntax errors.
+
+ This exception is raised when a runtime error occurs or when a syntax
+ error in the template is found. It has one parameter which always
+ is a string containing a description of the error.
+
+ All potential IOError exceptions are handled by the module and are
+ converted to TemplateError exceptions. That means you should catch the
+ TemplateError exception if there is a possibility that for example
+ the template file will not be accesssible.
+
+ The exception can be raised by constructors or by any method of any
+ class.
+
+ The instance is no longer usable when this exception is raised.
+ """
+
+ def __init__(self, error):
+ """ Constructor.
+ @hidden
+ """
+ Exception.__init__(self, "Htmltmpl error: " + error)
+
+
+class PrecompiledError(Exception):
+ """ This exception is _PRIVATE_ and non fatal.
+ @hidden
+ """
+
+ def __init__(self, template):
+ """ Constructor.
+ @hidden
+ """
+ Exception.__init__(self, template)
+
diff --git a/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet/sanitize.py b/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet/sanitize.py
new file mode 100755
index 0000000..c98b14d
--- /dev/null
+++ b/DJAGEN/branches/mustafa_branch/djagen/gezegen/planet/sanitize.py
@@ -0,0 +1,354 @@
+"""
+sanitize: bringing sanitiy to world of messed-up data
+"""
+
+__author__ = ["Mark Pilgrim ",
+ "Aaron Swartz "]
+__contributors__ = ["Sam Ruby "]
+__license__ = "BSD"
+__version__ = "0.25"
+
+_debug = 0
+
+# If you want sanitize to automatically run HTML markup through HTML Tidy, set
+# this to 1. Requires mxTidy
+# or utidylib .
+TIDY_MARKUP = 0
+
+# List of Python interfaces for HTML Tidy, in order of preference. Only useful
+# if TIDY_MARKUP = 1
+PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"]
+
+import sgmllib, re
+
+# chardet library auto-detects character encodings
+# Download from http://chardet.feedparser.org/
+try:
+ import chardet
+ if _debug:
+ import chardet.constants
+ chardet.constants._debug = 1
+
+ _chardet = lambda data: chardet.detect(data)['encoding']
+except:
+ chardet = None
+ _chardet = lambda data: None
+
+class _BaseHTMLProcessor(sgmllib.SGMLParser):
+ elements_no_end_tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
+ 'img', 'input', 'isindex', 'link', 'meta', 'param']
+
+ _r_barebang = re.compile(r'')
+
+ def __init__(self, encoding):
+ self.encoding = encoding
+ if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding)
+ sgmllib.SGMLParser.__init__(self)
+
+ def reset(self):
+ self.pieces = []
+ sgmllib.SGMLParser.reset(self)
+
+ def _shorttag_replace(self, match):
+ tag = match.group(1)
+ if tag in self.elements_no_end_tag:
+ return '<' + tag + ' />'
+ else:
+ return '<' + tag + '>' + tag + '>'
+
+ def feed(self, data):
+ data = self._r_barebang.sub(r'<!\1', data)
+ data = self._r_bareamp.sub("&", data)
+ data = self._r_shorttag.sub(self._shorttag_replace, data)
+ if self.encoding and type(data) == type(u''):
+ data = data.encode(self.encoding)
+ sgmllib.SGMLParser.feed(self, data)
+
+ def normalize_attrs(self, attrs):
+ # utility method to be called by descendants
+ attrs = [(k.lower(), v) for k, v in attrs]
+ attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs]
+ return attrs
+
+ def unknown_starttag(self, tag, attrs):
+ # called for each start tag
+ # attrs is a list of (attr, value) tuples
+ # e.g. for
, tag='pre', attrs=[('class', 'screen')]
+ if _debug: sys.stderr.write('_BaseHTMLProcessor, unknown_starttag, tag=%s\n' % tag)
+ uattrs = []
+ # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds
+ for key, value in attrs:
+ if type(value) != type(u''):
+ value = unicode(value, self.encoding)
+ uattrs.append((unicode(key, self.encoding), value))
+ strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs]).encode(self.encoding)
+ if tag in self.elements_no_end_tag:
+ self.pieces.append('<%(tag)s%(strattrs)s />' % locals())
+ else:
+ self.pieces.append('<%(tag)s%(strattrs)s>' % locals())
+
+ def unknown_endtag(self, tag):
+ # called for each end tag, e.g. for