* planet/__init__.py.orig, planet/__init__.py.backup: deleted these unnecessary
files since they're old backups.. * gezegen/index.html.tmpl: did some code enhancement like fixing indentations and code styles and used template variables for hardcoded planet url.. * www/foafroll.xml, www/opml.xml, www/rss10.xml, www/rss20.xml, www/index.html: including these generated files for now.. * www/images/planet.png: added a favicon to the planet.. we might change this in the future.
This commit is contained in:
parent
998d78bc1d
commit
7071612bec
|
@ -1,161 +1,195 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml/transitional.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title><TMPL_VAR name></title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<link rel="stylesheet" href="http://gezegen.linux.org.tr/generic.css" type="text/css" />
|
||||
<link rel="stylesheet" href="http://gezegen.linux.org.tr/layout.css" type="text/css" />
|
||||
<link rel="stylesheet" href="http://gezegen.linux.org.tr/planet.css" type="text/css" />
|
||||
<link rel="stylesheet" href="http://gezegen.linux.org.tr/bloggers.css" type="text/css" />
|
||||
<!--
|
||||
FIXME: add favicon
|
||||
<link rel="icon" type="image/png" href="images/logo.png" />
|
||||
<link rel="shortcut icon" type="image/png" href="images/logo.png" />
|
||||
-->
|
||||
<link rel="alternate" type="application/rss+xml" title="<TMPL_VAR name>" href="http://gezegen.linux.org.tr/rss20.xml" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="hdr">
|
||||
<div id="banner"><img src="http://gezegen.linux.org.tr/images/spacer.png" alt="spacer" /></div>
|
||||
<div id="logo"><a href="http://gezegen.linux.org.tr/"><img src="http://gezegen.linux.org.tr/images/spacer.png" alt="Anasayfa" /></a></div>
|
||||
<!--
|
||||
<div id="hdrNav">
|
||||
<a href="http://gezegenlinux.blogspot.com/">Linux Gezegeni Haberleri</a>
|
||||
</div>-->
|
||||
<!--
|
||||
<div id="hdrNav">
|
||||
<center><a href="http://www.cebitbilisim.com/tr" target="_blank"><img src="http://gezegen.linux.org.tr/images/banner2006-tr.gif" border="0"></a></center>
|
||||
</div>
|
||||
-->
|
||||
</div>
|
||||
<head>
|
||||
<title><TMPL_VAR name></title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<link rel="stylesheet" href="<TMPL_VAR link>/generic.css" type="text/css" />
|
||||
<link rel="stylesheet" href="<TMPL_VAR link>/layout.css" type="text/css" />
|
||||
<link rel="stylesheet" href="<TMPL_VAR link>/planet.css" type="text/css" />
|
||||
<link rel="stylesheet" href="<TMPL_VAR link>/bloggers.css" type="text/css" />
|
||||
<link rel="icon" type="image/png" href="images/planet.png" />
|
||||
<link rel="alternate" type="application/rss+xml" title="<TMPL_VAR name>" href="<TMPL_VAR link>/rss20.xml" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="hdr">
|
||||
<div id="banner">
|
||||
<img src="<TMPL_VAR link>/images/spacer.png" alt="spacer" />
|
||||
</div>
|
||||
<div id="logo">
|
||||
<a href="<TMPL_VAR link>/">
|
||||
<img src="<TMPL_VAR link>/images/spacer.png" alt="Anasayfa" />
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="body">
|
||||
<TMPL_LOOP Items>
|
||||
<TMPL_IF new_date>
|
||||
<h2 class="date"><TMPL_VAR new_date></h2>
|
||||
</TMPL_IF>
|
||||
<div id="body">
|
||||
<TMPL_LOOP Items>
|
||||
<TMPL_IF new_date>
|
||||
<h2 class="date"><TMPL_VAR new_date></h2>
|
||||
</TMPL_IF>
|
||||
|
||||
<div class="entry <TMPL_IF channel_nick><TMPL_VAR channel_nick></TMPL_IF>">
|
||||
<div class="person-info">
|
||||
<a href="<TMPL_VAR channel_link ESCAPE="HTML">" title="<TMPL_VAR channel_title ESCAPE="HTML">">
|
||||
<TMPL_IF channel_face>
|
||||
<img class="face" src="http://gezegen.linux.org.tr/images/heads/<TMPL_VAR channel_face ESCAPE="HTML">" title="<TMPL_VAR channel_name>" /><br />
|
||||
<TMPL_ELSE>
|
||||
<img class="face" src="http://gezegen.linux.org.tr/images/heads/nobody.png" title="<TMPL_VAR channel_name>" /><br />
|
||||
</TMPL_IF>
|
||||
<TMPL_VAR channel_name><TMPL_IF channel_nick><br />(<TMPL_VAR channel_nick>)</TMPL_IF>
|
||||
</a>
|
||||
</div>
|
||||
<div class="entry <TMPL_IF channel_nick><TMPL_VAR channel_nick></TMPL_IF>">
|
||||
<div class="person-info">
|
||||
<a href="<TMPL_VAR channel_link ESCAPE="HTML">" title="<TMPL_VAR channel_title ESCAPE="HTML">">
|
||||
<TMPL_IF channel_face>
|
||||
<img class="face" src="<TMPL_VAR link>/images/heads/<TMPL_VAR channel_face ESCAPE="HTML">" title="<TMPL_VAR channel_name>" />
|
||||
<br />
|
||||
<TMPL_ELSE>
|
||||
<img class="face" src="<TMPL_VAR link>/images/heads/nobody.png" title="<TMPL_VAR channel_name>" />
|
||||
<br />
|
||||
</TMPL_IF>
|
||||
<TMPL_VAR channel_name>
|
||||
<TMPL_IF channel_nick>
|
||||
<br />
|
||||
(<TMPL_VAR channel_nick>)
|
||||
</TMPL_IF>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<div class="post">
|
||||
<div class="post2">
|
||||
<div class="post-header">
|
||||
<TMPL_IF title>
|
||||
<h4 class="post-title"><a href="<TMPL_VAR link ESCAPE="HTML">"><TMPL_VAR title></a></h4>
|
||||
<TMPL_ELSE>
|
||||
<div class="post-title"><span> </span></div>
|
||||
</TMPL_IF>
|
||||
</div>
|
||||
<br />
|
||||
<div class="post-contents">
|
||||
<TMPL_VAR content>
|
||||
<br />
|
||||
<br />
|
||||
<div id="post-links" style="text-align: center;">
|
||||
<TMPL_IF comments><a href="<TMPL_VAR comments ESCAPE="HTML">"><img src="images/yorum.png" border="0" title="Yorumlar" /></a></TMPL_IF>
|
||||
<a href="http://del.icio.us/post?url=<TMPL_VAR link ESCAPE="HTML">&title=<TMPL_VAR title ESCAPE="HTML">" target="_blank"><img src="images/delicious.png" border="0" title="del.icio.us'a gönder" /></a>
|
||||
<a href="http://technorati.com/search/<TMPL_VAR link ESCAPE="HTML">" target="_blank"><img src="images/technorati.png" border="0" title="technorati'de ara" /></a>
|
||||
</div>
|
||||
</div>
|
||||
<div class="post-footer">
|
||||
<p><a href="<TMPL_VAR link ESCAPE="HTML">"><TMPL_VAR date></a></p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</TMPL_LOOP>
|
||||
</div>
|
||||
<div class="post">
|
||||
<div class="post2">
|
||||
<div class="post-header">
|
||||
<TMPL_IF title>
|
||||
<h4 class="post-title">
|
||||
<a href="<TMPL_VAR link ESCAPE="HTML">">
|
||||
<TMPL_VAR title>
|
||||
</a>
|
||||
</h4>
|
||||
<TMPL_ELSE>
|
||||
<div class="post-title">
|
||||
<span> </span>
|
||||
</div>
|
||||
</TMPL_IF>
|
||||
</div>
|
||||
<br />
|
||||
<div class="post-contents">
|
||||
<TMPL_VAR content>
|
||||
<br />
|
||||
<br />
|
||||
<div id="post-links" style="text-align: center;">
|
||||
<TMPL_IF comments>
|
||||
<a href="<TMPL_VAR comments ESCAPE="HTML">">
|
||||
<img src="images/yorum.png" border="0" title="Yorumlar" />
|
||||
</a>
|
||||
</TMPL_IF>
|
||||
<a href="http://del.icio.us/post?url=<TMPL_VAR link ESCAPE="HTML">&title=<TMPL_VAR title ESCAPE="HTML">" target="_blank">
|
||||
<img src="images/delicious.png" border="0" title="del.icio.us'a gönder" />
|
||||
</a>
|
||||
<a href="http://technorati.com/search/<TMPL_VAR link ESCAPE="HTML">" target="_blank">
|
||||
<img src="images/technorati.png" border="0" title="technorati'de ara" />
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
<div class="post-footer">
|
||||
<p>
|
||||
<a href="<TMPL_VAR link ESCAPE="HTML">">
|
||||
<TMPL_VAR date>
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</TMPL_LOOP>
|
||||
</div>
|
||||
|
||||
<div id="sidebar">
|
||||
<div class="section">
|
||||
<h3>Gezegen Hakkında</h3>
|
||||
<p>Linux Gezegeni, Türkiye'de Linux ve Özgür Yazılım konusunda çalışmalar yapan arkadaşlarımızın web üzerindeki günlüklerini bir tek sayfadan okumamızı ve kendi dünyalarına ulaşmamızı sağlayan basit bir web sitesidir.</p>
|
||||
<p>Gezegeni <a href="http://www.planetplanet.org/">Planet</a> ile oluşturuyoruz, tasarım <a href="http://www.actsofvolition.com/">Steven Garrity</a>'nin eseri.</p>
|
||||
</div>
|
||||
<div id="sidebar">
|
||||
<div class="section">
|
||||
<h3>Gezegen Hakkında</h3>
|
||||
<p>
|
||||
Linux Gezegeni, Türkiye'de Linux ve Özgür Yazılım konusunda çalışmalar yapan arkadaşlarımızın web üzerindeki günlüklerini bir tek sayfadan okumamızı ve kendi dünyalarına ulaşmamızı sağlayan basit bir web sitesidir.
|
||||
</p>
|
||||
<p>
|
||||
Gezegeni <a href="http://www.planetplanet.org/">Planet</a> ile oluşturuyoruz, tasarım <a href="http://www.actsofvolition.com/">Steven Garrity</a>'nin eseri.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<div class="section">
|
||||
<a href='http://reklam.lkd.org.tr/www/delivery/ck.php?n=a78599b7&cb=INSERT_RANDOM_NUMBER_HERE' target='_blank'>
|
||||
<img src='http://reklam.lkd.org.tr/www/delivery/avw.php?zoneid=2&cb=INSERT_RANDOM_NUMBER_HERE&n=a78599b7' border='0' alt='' />
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<a href='http://reklam.lkd.org.tr/www/delivery/ck.php?n=a78599b7&cb=INSERT_RANDOM_NUMBER_HERE' target='_blank'><img
|
||||
src='http://reklam.lkd.org.tr/www/delivery/avw.php?zoneid=2&cb=INSERT_RANDOM_NUMBER_HERE&n=a78599b7' border='0' alt='' /></a>
|
||||
<div class="bloggers section" id="bloggers">
|
||||
<h3>Üyeler</h3>
|
||||
<ul>
|
||||
<TMPL_LOOP Channels>
|
||||
<li>
|
||||
<div>
|
||||
<TMPL_IF face>
|
||||
<img class="head" src="images/heads/<TMPL_VAR face ESCAPE="HTML">" title="<TMPL_VAR face>" />
|
||||
<TMPL_ELSE>
|
||||
<img class="head" src="images/heads/nobody.png" title="<TMPL_VAR channel_name>" />
|
||||
</TMPL_IF>
|
||||
<div class="ircnick"> </div>
|
||||
</div>
|
||||
<a href="<TMPL_VAR url ESCAPE="HTML">" title="subscribe">
|
||||
<img src="images/feed-icon-10x10.png" alt="(feed)">
|
||||
</a>
|
||||
<a <TMPL_IF link>href="<TMPL_VAR link ESCAPE="HTML">" </TMPL_IF><TMPL_IF message>class="message" title="<TMPL_VAR message ESCAPE="HTML">"</TMPL_IF><TMPL_UNLESS message>title="<TMPL_VAR title_plain ESCAPE="HTML">"</TMPL_UNLESS>>
|
||||
<TMPL_VAR name>
|
||||
</a>
|
||||
</li>
|
||||
</TMPL_LOOP>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="section">
|
||||
<h3>Takip edin</h3>
|
||||
<ul>
|
||||
<li><a href="<TMPL_VAR link>/rss20.xml">RSS 2.0</a></li>
|
||||
<li><a href="<TMPL_VAR link>/rss10.xml">RSS 1.0</a></li>
|
||||
<li><a href="<TMPL_VAR link>/foafroll.xml">FOAF</a></li>
|
||||
<li><a href="<TMPL_VAR link>/opml.xml">OPML</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="bloggers section" id="bloggers">
|
||||
<h3>Üyeler</h3>
|
||||
<ul>
|
||||
<TMPL_LOOP Channels>
|
||||
<li>
|
||||
<div>
|
||||
<TMPL_IF face><img class="head" src="images/heads/<TMPL_VAR face ESCAPE="HTML">" title="<TMPL_VAR face>" />
|
||||
<TMPL_ELSE><img class="head" src="images/heads/nobody.png" title="<TMPL_VAR channel_name>" /></TMPL_IF>
|
||||
<div class="ircnick"> </div>
|
||||
</div>
|
||||
<a href="<TMPL_VAR url ESCAPE="HTML">" title="subscribe"><img src="images/feed-icon-10x10.png" alt="(feed)"></a>
|
||||
<a <TMPL_IF link>href="<TMPL_VAR link ESCAPE="HTML">" </TMPL_IF><TMPL_IF message>class="message" title="<TMPL_VAR message ESCAPE="HTML">"</TMPL_IF><TMPL_UNLESS message>title="<TMPL_VAR title_plain ESCAPE="HTML">"</TMPL_UNLESS>><TMPL_VAR name></a>
|
||||
</li>
|
||||
</TMPL_LOOP>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="section">
|
||||
<h3>Diğer Gezegenler</h3>
|
||||
<ul>
|
||||
<li><a href="http://gezegen.pardus.org.tr/">Pardus</a></li>
|
||||
<li><a href="http://www.kernelplanet.org/">Kernel</a></li>
|
||||
<li><a href="http://www.planetkde.org/">KDE</a></li>
|
||||
<li><a href="http://planet.gnome.org">Gnome</a></li>
|
||||
<li><a href="http://www.planetsuse.org/">SuSE</a></li>
|
||||
<li><a href="http://planet.python.org">Python</a></li>
|
||||
<li><a href="http://planet.gentoo.org">Gentoo</a></li>
|
||||
<li><a href="http://www.go-mono.com/monologue/">MONOlogue</a></li>
|
||||
<li><a href="http://planetjava.org">Java</a></li>
|
||||
<li><a href="http://planet.lisp.org">LISP</a></li>
|
||||
<li><a href="http://planet.perl.org">Perl</a></li>
|
||||
<li><a href="http://fedoraproject.org/people/">Fedora</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h3>Takip edin</h3>
|
||||
<ul>
|
||||
<li><a href="http://gezegen.linux.org.tr/rss20.xml">RSS 2.0</a></li>
|
||||
<li><a href="http://gezegen.linux.org.tr/rss10.xml">RSS 1.0</a></li>
|
||||
<li><a href="http://gezegen.linux.org.tr/foafroll.xml">FOAF</a></li>
|
||||
<li><a href="http://gezegen.linux.org.tr/opml.xml">OPML</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="section">
|
||||
<h3>Güncelleme</h3>
|
||||
<p>Gezegen her 10 dakikada bir yenilenir.</p>
|
||||
<p>
|
||||
Son güncelleme:
|
||||
<br />
|
||||
<TMPL_VAR date>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h3>Diğer Gezegenler</h3>
|
||||
<ul>
|
||||
<li><a href="http://gezegen.pardus.org.tr/">Pardus</a></li>
|
||||
<li><a href="http://www.kernelplanet.org/">Kernel</a></li>
|
||||
<li><a href="http://www.planetkde.org/">KDE</a></li>
|
||||
<li><a href="http://planet.gnome.org">Gnome</a></li>
|
||||
<li><a href="http://www.planetsuse.org/">SuSE</a></li>
|
||||
<li><a href="http://planet.python.org">Python</a></li>
|
||||
<li><a href="http://planet.gentoo.org">Gentoo</a></li>
|
||||
<li><a href="http://www.go-mono.com/monologue/">MONOlogue</a></li>
|
||||
<li><a href="http://planetjava.org">Java</a></li>
|
||||
<li><a href="http://planet.lisp.org">LISP</a></li>
|
||||
<li><a href="http://planet.perl.org">Perl</a></li>
|
||||
<li><a href="http://fedoraproject.org/people/">Fedora</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="section">
|
||||
<h3>İletişim</h3>
|
||||
<p>
|
||||
Linux Gezegeni <a href="mailto:gezegen [at] linux.org.tr">Gezegen Ekibi</a> tarafından yönetilmektedir, Gezegen hakkındaki sorularınızı ve Gezegen'e iniş başvurularınızı e-posta ile iletebilirsiniz.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h3>Güncelleme</h3>
|
||||
<p>Gezegen her 10 dakikada bir yenilenir.</p>
|
||||
<p>Son güncelleme: <br /><TMPL_VAR date></p>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h3>İletişim</h3>
|
||||
<p>Linux Gezegeni <a href="mailto:gezegen [at] linux.org.tr">Gezegen Ekibi</a> tarafından yönetilmektedir, Gezegen hakkındaki sorularınızı ve Gezegen'e iniş başvurularınızı e-posta ile iletebilirsiniz.</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div id="copyright">
|
||||
Bu sayfa içerisinde yazılanlar doğru veya yanlış herhangi bir biçimde <a href="http://www.lkd.org.tr/">Linux Kullanıcıları Derneği</a>'ni bağlamaz. <br />
|
||||
LKD yalnızca Linux Gezegeni için teknik olanakları (sunucu, yazılım, bant genişliği) sağlar.<br />
|
||||
Ayrıca Gezegen istatistiklere <a href="http://gezegen.linux.org.tr/stats">buradan</a> ulaşabilirsiniz.<br />
|
||||
<!-- Start of StatCounter Code -->
|
||||
<a href="http://www.statcounter.com/" target="_blank"><img src="http://c18.statcounter.com/counter.php?sc_project=1860933&java=0&security=e27e04a9&invisible=0" alt="free tracking" border="0"></a>
|
||||
<!-- End of StatCounter Code -->
|
||||
|
||||
</div>
|
||||
</body>
|
||||
<div id="copyright">
|
||||
Bu sayfa içerisinde yazılanlar doğru veya yanlış herhangi bir biçimde <a href="http://www.lkd.org.tr/">Linux Kullanıcıları Derneği</a>'ni bağlamaz.
|
||||
<br />
|
||||
LKD yalnızca Linux Gezegeni için teknik olanakları (sunucu, yazılım, bant genişliği) sağlar.
|
||||
<br />
|
||||
Ayrıca Gezegen istatistiklere <a href="<TMPL_VAR link>/stats">buradan</a> ulaşabilirsiniz.
|
||||
<br />
|
||||
<!-- Start of StatCounter Code -->
|
||||
<a href="http://www.statcounter.com/" target="_blank"><img src="http://c18.statcounter.com/counter.php?sc_project=1860933&java=0&security=e27e04a9&invisible=0" alt="free tracking" border="0"></a>
|
||||
<!-- End of StatCounter Code -->
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
|
@ -1,948 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""Planet aggregator library.
|
||||
|
||||
This package is a library for developing web sites or software that
|
||||
aggregate RSS, CDF and Atom feeds taken from elsewhere into a single,
|
||||
combined feed.
|
||||
"""
|
||||
|
||||
__version__ = "1.0"
|
||||
__authors__ = [ "Scott James Remnant <scott@netsplit.com>",
|
||||
"Jeff Waugh <jdub@perkypants.org>" ]
|
||||
__license__ = "Python"
|
||||
|
||||
|
||||
# Modules available without separate import
|
||||
import cache
|
||||
import feedparser
|
||||
import sanitize
|
||||
import htmltmpl
|
||||
import sgmllib
|
||||
try:
|
||||
import logging
|
||||
except:
|
||||
import compat_logging as logging
|
||||
|
||||
# Limit the effect of "from planet import *"
|
||||
__all__ = ("cache", "feedparser", "htmltmpl", "logging",
|
||||
"Planet", "Channel", "NewsItem")
|
||||
|
||||
|
||||
import locale
|
||||
import os
|
||||
import md5
|
||||
import time
|
||||
import dbhash
|
||||
import re
|
||||
import xml.sax.saxutils
|
||||
|
||||
|
||||
# Version information (for generator headers)
|
||||
VERSION = ("Planet/%s +http://www.planetplanet.org" % __version__)
|
||||
|
||||
# Default User-Agent header to send when retreiving feeds
|
||||
USER_AGENT = VERSION + " " + feedparser.USER_AGENT
|
||||
|
||||
# Default cache directory
|
||||
CACHE_DIRECTORY = "cache"
|
||||
|
||||
# Default number of items to display from a new feed
|
||||
NEW_FEED_ITEMS = 10
|
||||
|
||||
# Useful common date/time formats
|
||||
TIMEFMT_ISO = "%Y-%m-%dT%H:%M:%S+00:00"
|
||||
TIMEFMT_822 = "%a, %d %b %Y %H:%M:%S +0000"
|
||||
|
||||
|
||||
# Log instance to use here
|
||||
log = logging.getLogger("planet")
|
||||
try:
|
||||
log.warning
|
||||
except:
|
||||
log.warning = log.warn
|
||||
|
||||
# Defaults for the template file config sections
|
||||
ENCODING = "utf-8"
|
||||
ITEMS_PER_PAGE = 60
|
||||
DAYS_PER_PAGE = 0
|
||||
OUTPUT_DIR = "output"
|
||||
DATE_FORMAT = "%B %d, %Y %I:%M %p"
|
||||
NEW_DATE_FORMAT = "%B %d, %Y"
|
||||
ACTIVITY_THRESHOLD = 0
|
||||
|
||||
class stripHtml(sgmllib.SGMLParser):
|
||||
"remove all tags from the data"
|
||||
def __init__(self, data):
|
||||
sgmllib.SGMLParser.__init__(self)
|
||||
self.result=''
|
||||
self.feed(data)
|
||||
self.close()
|
||||
def handle_data(self, data):
|
||||
if data: self.result+=data
|
||||
|
||||
def template_info(item, date_format):
|
||||
"""Produce a dictionary of template information."""
|
||||
info = {}
|
||||
for key in item.keys():
|
||||
if item.key_type(key) == item.DATE:
|
||||
date = item.get_as_date(key)
|
||||
info[key] = time.strftime(date_format, date)
|
||||
info[key + "_iso"] = time.strftime(TIMEFMT_ISO, date)
|
||||
info[key + "_822"] = time.strftime(TIMEFMT_822, date)
|
||||
else:
|
||||
info[key] = item[key]
|
||||
if 'title' in item.keys():
|
||||
info['title_plain'] = stripHtml(info['title']).result
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class Planet:
|
||||
"""A set of channels.
|
||||
|
||||
This class represents a set of channels for which the items will
|
||||
be aggregated together into one combined feed.
|
||||
|
||||
Properties:
|
||||
user_agent User-Agent header to fetch feeds with.
|
||||
cache_directory Directory to store cached channels in.
|
||||
new_feed_items Number of items to display from a new feed.
|
||||
filter A regular expression that articles must match.
|
||||
exclude A regular expression that articles must not match.
|
||||
"""
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
|
||||
self._channels = []
|
||||
|
||||
self.user_agent = USER_AGENT
|
||||
self.cache_directory = CACHE_DIRECTORY
|
||||
self.new_feed_items = NEW_FEED_ITEMS
|
||||
self.filter = None
|
||||
self.exclude = None
|
||||
|
||||
def tmpl_config_get(self, template, option, default=None, raw=0, vars=None):
|
||||
"""Get a template value from the configuration, with a default."""
|
||||
if self.config.has_option(template, option):
|
||||
return self.config.get(template, option, raw=raw, vars=None)
|
||||
elif self.config.has_option("Planet", option):
|
||||
return self.config.get("Planet", option, raw=raw, vars=None)
|
||||
else:
|
||||
return default
|
||||
|
||||
def gather_channel_info(self, template_file="Planet"):
|
||||
date_format = self.tmpl_config_get(template_file,
|
||||
"date_format", DATE_FORMAT, raw=1)
|
||||
|
||||
activity_threshold = int(self.tmpl_config_get(template_file,
|
||||
"activity_threshold",
|
||||
ACTIVITY_THRESHOLD))
|
||||
|
||||
if activity_threshold:
|
||||
activity_horizon = \
|
||||
time.gmtime(time.time()-86400*activity_threshold)
|
||||
else:
|
||||
activity_horizon = 0
|
||||
|
||||
channels = {}
|
||||
channels_list = []
|
||||
for channel in self.channels(hidden=1):
|
||||
channels[channel] = template_info(channel, date_format)
|
||||
channels_list.append(channels[channel])
|
||||
|
||||
# identify inactive feeds
|
||||
if activity_horizon:
|
||||
latest = channel.items(sorted=1)
|
||||
if len(latest)==0 or latest[0].date < activity_horizon:
|
||||
channels[channel]["message"] = \
|
||||
"no activity in %d days" % activity_threshold
|
||||
|
||||
# report channel level errors
|
||||
if not channel.url_status: continue
|
||||
status = int(channel.url_status)
|
||||
if status == 403:
|
||||
channels[channel]["message"] = "403: forbidden"
|
||||
elif status == 404:
|
||||
channels[channel]["message"] = "404: not found"
|
||||
elif status == 408:
|
||||
channels[channel]["message"] = "408: request timeout"
|
||||
elif status == 410:
|
||||
channels[channel]["message"] = "410: gone"
|
||||
elif status == 500:
|
||||
channels[channel]["message"] = "internal server error"
|
||||
elif status >= 400:
|
||||
channels[channel]["message"] = "http status %s" % status
|
||||
|
||||
return channels, channels_list
|
||||
|
||||
def gather_items_info(self, channels, template_file="Planet", channel_list=None):
|
||||
items_list = []
|
||||
prev_date = []
|
||||
prev_channel = None
|
||||
|
||||
date_format = self.tmpl_config_get(template_file,
|
||||
"date_format", DATE_FORMAT, raw=1)
|
||||
items_per_page = int(self.tmpl_config_get(template_file,
|
||||
"items_per_page", ITEMS_PER_PAGE))
|
||||
days_per_page = int(self.tmpl_config_get(template_file,
|
||||
"days_per_page", DAYS_PER_PAGE))
|
||||
new_date_format = self.tmpl_config_get(template_file,
|
||||
"new_date_format", NEW_DATE_FORMAT, raw=1)
|
||||
|
||||
for newsitem in self.items(max_items=items_per_page,
|
||||
max_days=days_per_page,
|
||||
channels=channel_list):
|
||||
newsitem.date = time.localtime(time.mktime(newsitem.date)+7200)
|
||||
item_info = template_info(newsitem, date_format)
|
||||
chan_info = channels[newsitem._channel]
|
||||
for k, v in chan_info.items():
|
||||
item_info["channel_" + k] = v
|
||||
|
||||
# Check for the start of a new day
|
||||
if prev_date[:3] != newsitem.date[:3]:
|
||||
prev_date = newsitem.date
|
||||
item_info["new_date"] = time.strftime(new_date_format,
|
||||
newsitem.date)
|
||||
|
||||
# Check for the start of a new channel
|
||||
if item_info.has_key("new_date") \
|
||||
or prev_channel != newsitem._channel:
|
||||
prev_channel = newsitem._channel
|
||||
item_info["new_channel"] = newsitem._channel.url
|
||||
|
||||
items_list.append(item_info)
|
||||
|
||||
return items_list
|
||||
|
||||
def run(self, planet_name, planet_link, template_files, offline = False):
|
||||
log = logging.getLogger("planet.runner")
|
||||
|
||||
# Create a planet
|
||||
log.info("Loading cached data")
|
||||
if self.config.has_option("Planet", "cache_directory"):
|
||||
self.cache_directory = self.config.get("Planet", "cache_directory")
|
||||
if self.config.has_option("Planet", "new_feed_items"):
|
||||
self.new_feed_items = int(self.config.get("Planet", "new_feed_items"))
|
||||
self.user_agent = "%s +%s %s" % (planet_name, planet_link,
|
||||
self.user_agent)
|
||||
if self.config.has_option("Planet", "filter"):
|
||||
self.filter = self.config.get("Planet", "filter")
|
||||
|
||||
# The other configuration blocks are channels to subscribe to
|
||||
for feed_url in self.config.sections():
|
||||
if feed_url == "Planet" or feed_url in template_files:
|
||||
continue
|
||||
|
||||
# Create a channel, configure it and subscribe it
|
||||
channel = Channel(self, feed_url)
|
||||
self.subscribe(channel)
|
||||
|
||||
# Update it
|
||||
try:
|
||||
if not offline and not channel.url_status == '410':
|
||||
channel.update()
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
log.exception("Update of <%s> failed", feed_url)
|
||||
|
||||
def generate_all_files(self, template_files, planet_name,
|
||||
planet_link, planet_feed, owner_name, owner_email):
|
||||
|
||||
log = logging.getLogger("planet.runner")
|
||||
# Go-go-gadget-template
|
||||
for template_file in template_files:
|
||||
manager = htmltmpl.TemplateManager()
|
||||
log.info("Processing template %s", template_file)
|
||||
template = manager.prepare(template_file)
|
||||
# Read the configuration
|
||||
output_dir = self.tmpl_config_get(template_file,
|
||||
"output_dir", OUTPUT_DIR)
|
||||
date_format = self.tmpl_config_get(template_file,
|
||||
"date_format", DATE_FORMAT, raw=1)
|
||||
encoding = self.tmpl_config_get(template_file, "encoding", ENCODING)
|
||||
|
||||
# We treat each template individually
|
||||
base = os.path.splitext(os.path.basename(template_file))[0]
|
||||
url = os.path.join(planet_link, base)
|
||||
output_file = os.path.join(output_dir, base)
|
||||
|
||||
# Gather information
|
||||
channels, channels_list = self.gather_channel_info(template_file)
|
||||
items_list = self.gather_items_info(channels, template_file)
|
||||
|
||||
# Gather item information
|
||||
|
||||
# Process the template
|
||||
tp = htmltmpl.TemplateProcessor(html_escape=0)
|
||||
tp.set("Items", items_list)
|
||||
tp.set("Channels", channels_list)
|
||||
|
||||
# Generic information
|
||||
tp.set("generator", VERSION)
|
||||
tp.set("name", planet_name)
|
||||
tp.set("link", planet_link)
|
||||
tp.set("owner_name", owner_name)
|
||||
tp.set("owner_email", owner_email)
|
||||
tp.set("url", url)
|
||||
|
||||
if planet_feed:
|
||||
tp.set("feed", planet_feed)
|
||||
tp.set("feedtype", planet_feed.find('rss')>=0 and 'rss' or 'atom')
|
||||
|
||||
# Update time
|
||||
date = time.localtime()
|
||||
tp.set("date", time.strftime(date_format, date))
|
||||
tp.set("date_iso", time.strftime(TIMEFMT_ISO, date))
|
||||
tp.set("date_822", time.strftime(TIMEFMT_822, date))
|
||||
|
||||
try:
|
||||
log.info("Writing %s", output_file)
|
||||
output_fd = open(output_file, "w")
|
||||
if encoding.lower() in ("utf-8", "utf8"):
|
||||
# UTF-8 output is the default because we use that internally
|
||||
output_fd.write(tp.process(template))
|
||||
elif encoding.lower() in ("xml", "html", "sgml"):
|
||||
# Magic for Python 2.3 users
|
||||
output = tp.process(template).decode("utf-8")
|
||||
output_fd.write(output.encode("ascii", "xmlcharrefreplace"))
|
||||
else:
|
||||
# Must be a "known" encoding
|
||||
output = tp.process(template).decode("utf-8")
|
||||
output_fd.write(output.encode(encoding, "replace"))
|
||||
output_fd.close()
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
log.exception("Write of %s failed", output_file)
|
||||
|
||||
def channels(self, hidden=0, sorted=1):
|
||||
"""Return the list of channels."""
|
||||
channels = []
|
||||
for channel in self._channels:
|
||||
if hidden or not channel.has_key("hidden"):
|
||||
channels.append((channel.name, channel))
|
||||
|
||||
if sorted:
|
||||
locale.setlocale(locale.LC_ALL,"tr_TR.UTF-8")
|
||||
channels.sort(key=lambda x: locale.strxfrm(x[0]))
|
||||
locale.setlocale(locale.LC_ALL,"C")
|
||||
|
||||
|
||||
return [ c[-1] for c in channels ]
|
||||
|
||||
def find_by_basename(self, basename):
|
||||
for channel in self._channels:
|
||||
if basename == channel.cache_basename(): return channel
|
||||
|
||||
def subscribe(self, channel):
|
||||
"""Subscribe the planet to the channel."""
|
||||
self._channels.append(channel)
|
||||
|
||||
def unsubscribe(self, channel):
|
||||
"""Unsubscribe the planet from the channel."""
|
||||
self._channels.remove(channel)
|
||||
|
||||
def items(self, hidden=0, sorted=1, max_items=0, max_days=0, channels=None):
|
||||
"""Return an optionally filtered list of items in the channel.
|
||||
|
||||
The filters are applied in the following order:
|
||||
|
||||
If hidden is true then items in hidden channels and hidden items
|
||||
will be returned.
|
||||
|
||||
If sorted is true then the item list will be sorted with the newest
|
||||
first.
|
||||
|
||||
If max_items is non-zero then this number of items, at most, will
|
||||
be returned.
|
||||
|
||||
If max_days is non-zero then any items older than the newest by
|
||||
this number of days won't be returned. Requires sorted=1 to work.
|
||||
|
||||
|
||||
The sharp-eyed will note that this looks a little strange code-wise,
|
||||
it turns out that Python gets *really* slow if we try to sort the
|
||||
actual items themselves. Also we use mktime here, but it's ok
|
||||
because we discard the numbers and just need them to be relatively
|
||||
consistent between each other.
|
||||
"""
|
||||
planet_filter_re = None
|
||||
if self.filter:
|
||||
planet_filter_re = re.compile(self.filter, re.I)
|
||||
planet_exclude_re = None
|
||||
if self.exclude:
|
||||
planet_exclude_re = re.compile(self.exclude, re.I)
|
||||
|
||||
items = []
|
||||
seen_guids = {}
|
||||
if not channels: channels=self.channels(hidden=hidden, sorted=0)
|
||||
for channel in channels:
|
||||
for item in channel._items.values():
|
||||
if hidden or not item.has_key("hidden"):
|
||||
|
||||
channel_filter_re = None
|
||||
if channel.filter:
|
||||
channel_filter_re = re.compile(channel.filter,
|
||||
re.I)
|
||||
channel_exclude_re = None
|
||||
if channel.exclude:
|
||||
channel_exclude_re = re.compile(channel.exclude,
|
||||
re.I)
|
||||
if (planet_filter_re or planet_exclude_re \
|
||||
or channel_filter_re or channel_exclude_re):
|
||||
title = ""
|
||||
if item.has_key("title"):
|
||||
title = item.title
|
||||
content = item.get_content("content")
|
||||
|
||||
if planet_filter_re:
|
||||
if not (planet_filter_re.search(title) \
|
||||
or planet_filter_re.search(content)):
|
||||
continue
|
||||
|
||||
if planet_exclude_re:
|
||||
if (planet_exclude_re.search(title) \
|
||||
or planet_exclude_re.search(content)):
|
||||
continue
|
||||
|
||||
if channel_filter_re:
|
||||
if not (channel_filter_re.search(title) \
|
||||
or channel_filter_re.search(content)):
|
||||
continue
|
||||
|
||||
if channel_exclude_re:
|
||||
if (channel_exclude_re.search(title) \
|
||||
or channel_exclude_re.search(content)):
|
||||
continue
|
||||
|
||||
if not seen_guids.has_key(item.id):
|
||||
seen_guids[item.id] = 1;
|
||||
items.append((time.mktime(item.date), item.order, item))
|
||||
|
||||
# Sort the list
|
||||
if sorted:
|
||||
items.sort()
|
||||
items.reverse()
|
||||
|
||||
# Apply max_items filter
|
||||
if len(items) and max_items:
|
||||
items = items[:max_items]
|
||||
|
||||
# Apply max_days filter
|
||||
if len(items) and max_days:
|
||||
max_count = 0
|
||||
max_time = items[0][0] - max_days * 84600
|
||||
for item in items:
|
||||
if item[0] > max_time:
|
||||
max_count += 1
|
||||
else:
|
||||
items = items[:max_count]
|
||||
break
|
||||
|
||||
return [ i[-1] for i in items ]
|
||||
|
||||
class Channel(cache.CachedInfo):
|
||||
"""A list of news items.
|
||||
|
||||
This class represents a list of news items taken from the feed of
|
||||
a website or other source.
|
||||
|
||||
Properties:
|
||||
url URL of the feed.
|
||||
url_etag E-Tag of the feed URL.
|
||||
url_modified Last modified time of the feed URL.
|
||||
url_status Last HTTP status of the feed URL.
|
||||
hidden Channel should be hidden (True if exists).
|
||||
name Name of the feed owner, or feed title.
|
||||
next_order Next order number to be assigned to NewsItem
|
||||
|
||||
updated Correct UTC-Normalised update time of the feed.
|
||||
last_updated Correct UTC-Normalised time the feed was last updated.
|
||||
|
||||
id An identifier the feed claims is unique (*).
|
||||
title One-line title (*).
|
||||
link Link to the original format feed (*).
|
||||
tagline Short description of the feed (*).
|
||||
info Longer description of the feed (*).
|
||||
|
||||
modified Date the feed claims to have been modified (*).
|
||||
|
||||
author Name of the author (*).
|
||||
publisher Name of the publisher (*).
|
||||
generator Name of the feed generator (*).
|
||||
category Category name (*).
|
||||
copyright Copyright information for humans to read (*).
|
||||
license Link to the licence for the content (*).
|
||||
docs Link to the specification of the feed format (*).
|
||||
language Primary language (*).
|
||||
errorreportsto E-Mail address to send error reports to (*).
|
||||
|
||||
image_url URL of an associated image (*).
|
||||
image_link Link to go with the associated image (*).
|
||||
image_title Alternative text of the associated image (*).
|
||||
image_width Width of the associated image (*).
|
||||
image_height Height of the associated image (*).
|
||||
|
||||
filter A regular expression that articles must match.
|
||||
exclude A regular expression that articles must not match.
|
||||
|
||||
Properties marked (*) will only be present if the original feed
|
||||
contained them. Note that the optional 'modified' date field is simply
|
||||
a claim made by the item and parsed from the information given, 'updated'
|
||||
(and 'last_updated') are far more reliable sources of information.
|
||||
|
||||
Some feeds may define additional properties to those above.
|
||||
"""
|
||||
IGNORE_KEYS = ("links", "contributors", "textinput", "cloud", "categories",
|
||||
"url", "href", "url_etag", "url_modified", "tags", "itunes_explicit")
|
||||
|
||||
def __init__(self, planet, url):
|
||||
if not os.path.isdir(planet.cache_directory):
|
||||
os.makedirs(planet.cache_directory)
|
||||
cache_filename = cache.filename(planet.cache_directory, url)
|
||||
cache_file = dbhash.open(cache_filename, "c", 0666)
|
||||
|
||||
cache.CachedInfo.__init__(self, cache_file, url, root=1)
|
||||
|
||||
self._items = {}
|
||||
self._planet = planet
|
||||
self._expired = []
|
||||
self.url = url
|
||||
# retain the original URL for error reporting
|
||||
self.configured_url = url
|
||||
self.url_etag = None
|
||||
self.url_status = None
|
||||
self.url_modified = None
|
||||
self.name = None
|
||||
self.updated = None
|
||||
self.last_updated = None
|
||||
self.filter = None
|
||||
self.exclude = None
|
||||
self.next_order = "0"
|
||||
self.cache_read()
|
||||
self.cache_read_entries()
|
||||
|
||||
if planet.config.has_section(url):
|
||||
for option in planet.config.options(url):
|
||||
value = planet.config.get(url, option)
|
||||
self.set_as_string(option, value, cached=0)
|
||||
|
||||
def has_item(self, id_):
|
||||
"""Check whether the item exists in the channel."""
|
||||
return self._items.has_key(id_)
|
||||
|
||||
def get_item(self, id_):
|
||||
"""Return the item from the channel."""
|
||||
return self._items[id_]
|
||||
|
||||
# Special methods
|
||||
__contains__ = has_item
|
||||
|
||||
def items(self, hidden=0, sorted=0):
|
||||
"""Return the item list."""
|
||||
items = []
|
||||
for item in self._items.values():
|
||||
if hidden or not item.has_key("hidden"):
|
||||
items.append((time.mktime(item.date), item.order, item))
|
||||
|
||||
if sorted:
|
||||
items.sort()
|
||||
items.reverse()
|
||||
|
||||
return [ i[-1] for i in items ]
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterate the sorted item list."""
|
||||
return iter(self.items(sorted=1))
|
||||
|
||||
def cache_read_entries(self):
|
||||
"""Read entry information from the cache."""
|
||||
keys = self._cache.keys()
|
||||
for key in keys:
|
||||
if key.find(" ") != -1: continue
|
||||
if self.has_key(key): continue
|
||||
|
||||
item = NewsItem(self, key)
|
||||
self._items[key] = item
|
||||
|
||||
def cache_basename(self):
|
||||
return cache.filename('',self._id)
|
||||
|
||||
def cache_write(self, sync=1):
|
||||
"""Write channel and item information to the cache."""
|
||||
for item in self._items.values():
|
||||
item.cache_write(sync=0)
|
||||
for item in self._expired:
|
||||
item.cache_clear(sync=0)
|
||||
cache.CachedInfo.cache_write(self, sync)
|
||||
|
||||
self._expired = []
|
||||
|
||||
def feed_information(self):
|
||||
"""
|
||||
Returns a description string for the feed embedded in this channel.
|
||||
|
||||
This will usually simply be the feed url embedded in <>, but in the
|
||||
case where the current self.url has changed from the original
|
||||
self.configured_url the string will contain both pieces of information.
|
||||
This is so that the URL in question is easier to find in logging
|
||||
output: getting an error about a URL that doesn't appear in your config
|
||||
file is annoying.
|
||||
"""
|
||||
if self.url == self.configured_url:
|
||||
return "<%s>" % self.url
|
||||
else:
|
||||
return "<%s> (formerly <%s>)" % (self.url, self.configured_url)
|
||||
|
||||
def update(self):
|
||||
"""Download the feed to refresh the information.
|
||||
|
||||
This does the actual work of pulling down the feed and if it changes
|
||||
updates the cached information about the feed and entries within it.
|
||||
"""
|
||||
info = feedparser.parse(self.url,
|
||||
etag=self.url_etag, modified=self.url_modified,
|
||||
agent=self._planet.user_agent)
|
||||
if info.has_key("status"):
|
||||
self.url_status = str(info.status)
|
||||
elif info.has_key("entries") and len(info.entries)>0:
|
||||
self.url_status = str(200)
|
||||
elif info.bozo and info.bozo_exception.__class__.__name__=='Timeout':
|
||||
self.url_status = str(408)
|
||||
else:
|
||||
self.url_status = str(500)
|
||||
|
||||
if self.url_status == '301' and (info.has_key("entries") and len(info.entries)>0):
|
||||
if self.url != info.url:
|
||||
log.warning("Feed has moved from <%s> to <%s>", self.url, info.url)
|
||||
os.link(cache.filename(self._planet.cache_directory, self.url),
|
||||
cache.filename(self._planet.cache_directory, info.url))
|
||||
self.url != info.url
|
||||
elif self.url_status == '304':
|
||||
log.info("Feed %s unchanged", self.feed_information())
|
||||
return
|
||||
elif self.url_status == '410':
|
||||
log.info("Feed %s gone", self.feed_information())
|
||||
self.cache_write()
|
||||
return
|
||||
elif self.url_status == '408':
|
||||
log.warning("Feed %s timed out", self.feed_information())
|
||||
return
|
||||
elif int(self.url_status) >= 400:
|
||||
log.error("Error %s while updating feed %s",
|
||||
self.url_status, self.feed_information())
|
||||
return
|
||||
else:
|
||||
log.info("Updating feed %s", self.feed_information())
|
||||
|
||||
self.url_etag = info.has_key("etag") and info.etag or None
|
||||
self.url_modified = info.has_key("modified") and info.modified or None
|
||||
if self.url_etag is not None:
|
||||
log.debug("E-Tag: %s", self.url_etag)
|
||||
if self.url_modified is not None:
|
||||
log.debug("Last Modified: %s",
|
||||
time.strftime(TIMEFMT_ISO, self.url_modified))
|
||||
|
||||
self.update_info(info.feed)
|
||||
self.update_entries(info.entries)
|
||||
self.cache_write()
|
||||
|
||||
def update_info(self, feed):
|
||||
"""Update information from the feed.
|
||||
|
||||
This reads the feed information supplied by feedparser and updates
|
||||
the cached information about the feed. These are the various
|
||||
potentially interesting properties that you might care about.
|
||||
"""
|
||||
for key in feed.keys():
|
||||
if key in self.IGNORE_KEYS or key + "_parsed" in self.IGNORE_KEYS:
|
||||
# Ignored fields
|
||||
pass
|
||||
elif feed.has_key(key + "_parsed"):
|
||||
# Ignore unparsed date fields
|
||||
pass
|
||||
elif key.endswith("_detail"):
|
||||
# retain name and email sub-fields
|
||||
if feed[key].has_key('name') and feed[key].name:
|
||||
self.set_as_string(key.replace("_detail","_name"), \
|
||||
feed[key].name)
|
||||
if feed[key].has_key('email') and feed[key].email:
|
||||
self.set_as_string(key.replace("_detail","_email"), \
|
||||
feed[key].email)
|
||||
elif key == "items":
|
||||
# Ignore items field
|
||||
pass
|
||||
elif key.endswith("_parsed"):
|
||||
# Date fields
|
||||
if feed[key] is not None:
|
||||
self.set_as_date(key[:-len("_parsed")], feed[key])
|
||||
elif key == "image":
|
||||
# Image field: save all the information
|
||||
if feed[key].has_key("url"):
|
||||
self.set_as_string(key + "_url", feed[key].url)
|
||||
if feed[key].has_key("link"):
|
||||
self.set_as_string(key + "_link", feed[key].link)
|
||||
if feed[key].has_key("title"):
|
||||
self.set_as_string(key + "_title", feed[key].title)
|
||||
if feed[key].has_key("width"):
|
||||
self.set_as_string(key + "_width", str(feed[key].width))
|
||||
if feed[key].has_key("height"):
|
||||
self.set_as_string(key + "_height", str(feed[key].height))
|
||||
elif isinstance(feed[key], (str, unicode)):
|
||||
# String fields
|
||||
try:
|
||||
detail = key + '_detail'
|
||||
if feed.has_key(detail) and feed[detail].has_key('type'):
|
||||
if feed[detail].type == 'text/html':
|
||||
feed[key] = sanitize.HTML(feed[key])
|
||||
elif feed[detail].type == 'text/plain':
|
||||
feed[key] = xml.sax.saxutils.escape(feed[key])
|
||||
self.set_as_string(key, feed[key])
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
log.exception("Ignored '%s' of <%s>, unknown format",
|
||||
key, self.url)
|
||||
|
||||
def update_entries(self, entries):
|
||||
"""Update entries from the feed.
|
||||
|
||||
This reads the entries supplied by feedparser and updates the
|
||||
cached information about them. It's at this point we update
|
||||
the 'updated' timestamp and keep the old one in 'last_updated',
|
||||
these provide boundaries for acceptable entry times.
|
||||
|
||||
If this is the first time a feed has been updated then most of the
|
||||
items will be marked as hidden, according to Planet.new_feed_items.
|
||||
|
||||
If the feed does not contain items which, according to the sort order,
|
||||
should be there; those items are assumed to have been expired from
|
||||
the feed or replaced and are removed from the cache.
|
||||
"""
|
||||
if not len(entries):
|
||||
return
|
||||
|
||||
self.last_updated = self.updated
|
||||
self.updated = time.gmtime()
|
||||
|
||||
new_items = []
|
||||
feed_items = []
|
||||
for entry in entries:
|
||||
# Try really hard to find some kind of unique identifier
|
||||
if entry.has_key("id"):
|
||||
entry_id = cache.utf8(entry.id)
|
||||
elif entry.has_key("link"):
|
||||
entry_id = cache.utf8(entry.link)
|
||||
elif entry.has_key("title"):
|
||||
entry_id = (self.url + "/"
|
||||
+ md5.new(cache.utf8(entry.title)).hexdigest())
|
||||
elif entry.has_key("summary"):
|
||||
entry_id = (self.url + "/"
|
||||
+ md5.new(cache.utf8(entry.summary)).hexdigest())
|
||||
else:
|
||||
log.error("Unable to find or generate id, entry ignored")
|
||||
continue
|
||||
|
||||
# Create the item if necessary and update
|
||||
if self.has_item(entry_id):
|
||||
item = self._items[entry_id]
|
||||
else:
|
||||
item = NewsItem(self, entry_id)
|
||||
self._items[entry_id] = item
|
||||
new_items.append(item)
|
||||
item.update(entry)
|
||||
feed_items.append(entry_id)
|
||||
|
||||
# Hide excess items the first time through
|
||||
if self.last_updated is None and self._planet.new_feed_items \
|
||||
and len(feed_items) > self._planet.new_feed_items:
|
||||
item.hidden = "yes"
|
||||
log.debug("Marked <%s> as hidden (new feed)", entry_id)
|
||||
|
||||
# Assign order numbers in reverse
|
||||
new_items.reverse()
|
||||
for item in new_items:
|
||||
item.order = self.next_order = str(int(self.next_order) + 1)
|
||||
|
||||
# Check for expired or replaced items
|
||||
feed_count = len(feed_items)
|
||||
log.debug("Items in Feed: %d", feed_count)
|
||||
for item in self.items(sorted=1):
|
||||
if feed_count < 1:
|
||||
break
|
||||
elif item.id in feed_items:
|
||||
feed_count -= 1
|
||||
elif item._channel.url_status != '226':
|
||||
del(self._items[item.id])
|
||||
self._expired.append(item)
|
||||
log.debug("Removed expired or replaced item <%s>", item.id)
|
||||
|
||||
def get_name(self, key):
|
||||
"""Return the key containing the name."""
|
||||
for key in ("name", "title"):
|
||||
if self.has_key(key) and self.key_type(key) != self.NULL:
|
||||
return self.get_as_string(key)
|
||||
|
||||
return ""
|
||||
|
||||
class NewsItem(cache.CachedInfo):
|
||||
"""An item of news.
|
||||
|
||||
This class represents a single item of news on a channel. They're
|
||||
created by members of the Channel class and accessible through it.
|
||||
|
||||
Properties:
|
||||
id Channel-unique identifier for this item.
|
||||
id_hash Relatively short, printable cryptographic hash of id
|
||||
date Corrected UTC-Normalised update time, for sorting.
|
||||
order Order in which items on the same date can be sorted.
|
||||
hidden Item should be hidden (True if exists).
|
||||
|
||||
title One-line title (*).
|
||||
link Link to the original format text (*).
|
||||
summary Short first-page summary (*).
|
||||
content Full HTML content.
|
||||
|
||||
modified Date the item claims to have been modified (*).
|
||||
issued Date the item claims to have been issued (*).
|
||||
created Date the item claims to have been created (*).
|
||||
expired Date the item claims to expire (*).
|
||||
|
||||
author Name of the author (*).
|
||||
publisher Name of the publisher (*).
|
||||
category Category name (*).
|
||||
comments Link to a page to enter comments (*).
|
||||
license Link to the licence for the content (*).
|
||||
source_name Name of the original source of this item (*).
|
||||
source_link Link to the original source of this item (*).
|
||||
|
||||
Properties marked (*) will only be present if the original feed
|
||||
contained them. Note that the various optional date fields are
|
||||
simply claims made by the item and parsed from the information
|
||||
given, 'date' is a far more reliable source of information.
|
||||
|
||||
Some feeds may define additional properties to those above.
|
||||
"""
|
||||
IGNORE_KEYS = ("categories", "contributors", "enclosures", "links",
|
||||
"guidislink", "date", "tags")
|
||||
|
||||
def __init__(self, channel, id_):
|
||||
cache.CachedInfo.__init__(self, channel._cache, id_)
|
||||
|
||||
self._channel = channel
|
||||
self.id = id_
|
||||
self.id_hash = md5.new(id_).hexdigest()
|
||||
self.date = None
|
||||
self.order = None
|
||||
self.content = None
|
||||
self.cache_read()
|
||||
|
||||
def update(self, entry):
|
||||
"""Update the item from the feedparser entry given."""
|
||||
for key in entry.keys():
|
||||
if key in self.IGNORE_KEYS or key + "_parsed" in self.IGNORE_KEYS:
|
||||
# Ignored fields
|
||||
pass
|
||||
elif entry.has_key(key + "_parsed"):
|
||||
# Ignore unparsed date fields
|
||||
pass
|
||||
elif key.endswith("_detail"):
|
||||
# retain name, email, and language sub-fields
|
||||
if entry[key].has_key('name') and entry[key].name:
|
||||
self.set_as_string(key.replace("_detail","_name"), \
|
||||
entry[key].name)
|
||||
if entry[key].has_key('email') and entry[key].email:
|
||||
self.set_as_string(key.replace("_detail","_email"), \
|
||||
entry[key].email)
|
||||
if entry[key].has_key('language') and entry[key].language and \
|
||||
(not self._channel.has_key('language') or \
|
||||
entry[key].language != self._channel.language):
|
||||
self.set_as_string(key.replace("_detail","_language"), \
|
||||
entry[key].language)
|
||||
elif key.endswith("_parsed"):
|
||||
# Date fields
|
||||
if entry[key] is not None:
|
||||
self.set_as_date(key[:-len("_parsed")], entry[key])
|
||||
elif key == "source":
|
||||
# Source field: save both url and value
|
||||
if entry[key].has_key("value"):
|
||||
self.set_as_string(key + "_name", entry[key].value)
|
||||
if entry[key].has_key("url"):
|
||||
self.set_as_string(key + "_link", entry[key].url)
|
||||
elif key == "content":
|
||||
# Content field: concatenate the values
|
||||
value = ""
|
||||
for item in entry[key]:
|
||||
if item.type == 'text/html':
|
||||
item.value = sanitize.HTML(item.value)
|
||||
elif item.type == 'text/plain':
|
||||
item.value = xml.sax.saxutils.escape(item.value)
|
||||
if item.has_key('language') and item.language and \
|
||||
(not self._channel.has_key('language') or
|
||||
item.language != self._channel.language) :
|
||||
self.set_as_string(key + "_language", item.language)
|
||||
value += cache.utf8(item.value)
|
||||
self.set_as_string(key, value)
|
||||
elif isinstance(entry[key], (str, unicode)):
|
||||
# String fields
|
||||
try:
|
||||
detail = key + '_detail'
|
||||
if entry.has_key(detail):
|
||||
if entry[detail].has_key('type'):
|
||||
if entry[detail].type == 'text/html':
|
||||
entry[key] = sanitize.HTML(entry[key])
|
||||
elif entry[detail].type == 'text/plain':
|
||||
entry[key] = xml.sax.saxutils.escape(entry[key])
|
||||
self.set_as_string(key, entry[key])
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
log.exception("Ignored '%s' of <%s>, unknown format",
|
||||
key, self.id)
|
||||
|
||||
# Generate the date field if we need to
|
||||
self.get_date("date")
|
||||
|
||||
def get_date(self, key):
|
||||
"""Get (or update) the date key.
|
||||
|
||||
We check whether the date the entry claims to have been changed is
|
||||
since we last updated this feed and when we pulled the feed off the
|
||||
site.
|
||||
|
||||
If it is then it's probably not bogus, and we'll sort accordingly.
|
||||
|
||||
If it isn't then we bound it appropriately, this ensures that
|
||||
entries appear in posting sequence but don't overlap entries
|
||||
added in previous updates and don't creep into the next one.
|
||||
"""
|
||||
|
||||
for other_key in ("updated", "modified", "published", "issued", "created"):
|
||||
if self.has_key(other_key):
|
||||
date = self.get_as_date(other_key)
|
||||
break
|
||||
else:
|
||||
date = None
|
||||
|
||||
if date is not None:
|
||||
if date > self._channel.updated:
|
||||
date = self._channel.updated
|
||||
# elif date < self._channel.last_updated:
|
||||
# date = self._channel.updated
|
||||
elif self.has_key(key) and self.key_type(key) != self.NULL:
|
||||
return self.get_as_date(key)
|
||||
else:
|
||||
date = self._channel.updated
|
||||
|
||||
self.set_as_date(key, date)
|
||||
return date
|
||||
|
||||
def get_content(self, key):
|
||||
"""Return the key containing the content."""
|
||||
for key in ("content", "tagline", "summary"):
|
||||
if self.has_key(key) and self.key_type(key) != self.NULL:
|
||||
return self.get_as_string(key)
|
||||
|
||||
return ""
|
|
@ -1,953 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
"""Planet aggregator library.
|
||||
|
||||
This package is a library for developing web sites or software that
|
||||
aggregate RSS, CDF and Atom feeds taken from elsewhere into a single,
|
||||
combined feed.
|
||||
"""
|
||||
|
||||
__version__ = "2.0"
|
||||
__authors__ = [ "Scott James Remnant <scott@netsplit.com>",
|
||||
"Jeff Waugh <jdub@perkypants.org>" ]
|
||||
__license__ = "Python"
|
||||
|
||||
|
||||
# Modules available without separate import
|
||||
import cache
|
||||
import feedparser
|
||||
import sanitize
|
||||
import htmltmpl
|
||||
import sgmllib
|
||||
try:
|
||||
import logging
|
||||
except:
|
||||
import compat_logging as logging
|
||||
|
||||
# Limit the effect of "from planet import *"
|
||||
__all__ = ("cache", "feedparser", "htmltmpl", "logging",
|
||||
"Planet", "Channel", "NewsItem")
|
||||
|
||||
|
||||
import os
|
||||
import md5
|
||||
import time
|
||||
import dbhash
|
||||
import re
|
||||
|
||||
try:
|
||||
from xml.sax.saxutils import escape
|
||||
except:
|
||||
def escape(data):
|
||||
return data.replace("&","&").replace(">",">").replace("<","<")
|
||||
|
||||
# Version information (for generator headers)
|
||||
VERSION = ("Planet/%s +http://www.planetplanet.org" % __version__)
|
||||
|
||||
# Default User-Agent header to send when retreiving feeds
|
||||
USER_AGENT = VERSION + " " + feedparser.USER_AGENT
|
||||
|
||||
# Default cache directory
|
||||
CACHE_DIRECTORY = "cache"
|
||||
|
||||
# Default number of items to display from a new feed
|
||||
NEW_FEED_ITEMS = 10
|
||||
|
||||
# Useful common date/time formats
|
||||
TIMEFMT_ISO = "%Y-%m-%dT%H:%M:%S+00:00"
|
||||
TIMEFMT_822 = "%a, %d %b %Y %H:%M:%S +0000"
|
||||
|
||||
|
||||
# Log instance to use here
|
||||
log = logging.getLogger("planet")
|
||||
try:
|
||||
log.warning
|
||||
except:
|
||||
log.warning = log.warn
|
||||
|
||||
# Defaults for the template file config sections
|
||||
ENCODING = "utf-8"
|
||||
ITEMS_PER_PAGE = 60
|
||||
DAYS_PER_PAGE = 0
|
||||
OUTPUT_DIR = "output"
|
||||
DATE_FORMAT = "%B %d, %Y %I:%M %p"
|
||||
NEW_DATE_FORMAT = "%B %d, %Y"
|
||||
ACTIVITY_THRESHOLD = 0
|
||||
|
||||
class stripHtml(sgmllib.SGMLParser):
|
||||
"remove all tags from the data"
|
||||
def __init__(self, data):
|
||||
sgmllib.SGMLParser.__init__(self)
|
||||
self.result=''
|
||||
self.feed(data)
|
||||
self.close()
|
||||
def handle_data(self, data):
|
||||
if data: self.result+=data
|
||||
|
||||
def template_info(item, date_format):
|
||||
"""Produce a dictionary of template information."""
|
||||
info = {}
|
||||
for key in item.keys():
|
||||
if item.key_type(key) == item.DATE:
|
||||
date = item.get_as_date(key)
|
||||
info[key] = time.strftime(date_format, date)
|
||||
info[key + "_iso"] = time.strftime(TIMEFMT_ISO, date)
|
||||
info[key + "_822"] = time.strftime(TIMEFMT_822, date)
|
||||
else:
|
||||
info[key] = item[key]
|
||||
if 'title' in item.keys():
|
||||
info['title_plain'] = stripHtml(info['title']).result
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class Planet:
|
||||
"""A set of channels.
|
||||
|
||||
This class represents a set of channels for which the items will
|
||||
be aggregated together into one combined feed.
|
||||
|
||||
Properties:
|
||||
user_agent User-Agent header to fetch feeds with.
|
||||
cache_directory Directory to store cached channels in.
|
||||
new_feed_items Number of items to display from a new feed.
|
||||
filter A regular expression that articles must match.
|
||||
exclude A regular expression that articles must not match.
|
||||
"""
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
|
||||
self._channels = []
|
||||
|
||||
self.user_agent = USER_AGENT
|
||||
self.cache_directory = CACHE_DIRECTORY
|
||||
self.new_feed_items = NEW_FEED_ITEMS
|
||||
self.filter = None
|
||||
self.exclude = None
|
||||
|
||||
def tmpl_config_get(self, template, option, default=None, raw=0, vars=None):
|
||||
"""Get a template value from the configuration, with a default."""
|
||||
if self.config.has_option(template, option):
|
||||
return self.config.get(template, option, raw=raw, vars=None)
|
||||
elif self.config.has_option("Planet", option):
|
||||
return self.config.get("Planet", option, raw=raw, vars=None)
|
||||
else:
|
||||
return default
|
||||
|
||||
def gather_channel_info(self, template_file="Planet"):
|
||||
date_format = self.tmpl_config_get(template_file,
|
||||
"date_format", DATE_FORMAT, raw=1)
|
||||
|
||||
activity_threshold = int(self.tmpl_config_get(template_file,
|
||||
"activity_threshold",
|
||||
ACTIVITY_THRESHOLD))
|
||||
|
||||
if activity_threshold:
|
||||
activity_horizon = \
|
||||
time.gmtime(time.time()-86400*activity_threshold)
|
||||
else:
|
||||
activity_horizon = 0
|
||||
|
||||
channels = {}
|
||||
channels_list = []
|
||||
for channel in self.channels(hidden=1):
|
||||
channels[channel] = template_info(channel, date_format)
|
||||
channels_list.append(channels[channel])
|
||||
|
||||
# identify inactive feeds
|
||||
if activity_horizon:
|
||||
latest = channel.items(sorted=1)
|
||||
if len(latest)==0 or latest[0].date < activity_horizon:
|
||||
channels[channel]["message"] = \
|
||||
"no activity in %d days" % activity_threshold
|
||||
|
||||
# report channel level errors
|
||||
if not channel.url_status: continue
|
||||
status = int(channel.url_status)
|
||||
if status == 403:
|
||||
channels[channel]["message"] = "403: forbidden"
|
||||
elif status == 404:
|
||||
channels[channel]["message"] = "404: not found"
|
||||
elif status == 408:
|
||||
channels[channel]["message"] = "408: request timeout"
|
||||
elif status == 410:
|
||||
channels[channel]["message"] = "410: gone"
|
||||
elif status == 500:
|
||||
channels[channel]["message"] = "internal server error"
|
||||
elif status >= 400:
|
||||
channels[channel]["message"] = "http status %s" % status
|
||||
|
||||
return channels, channels_list
|
||||
|
||||
def gather_items_info(self, channels, template_file="Planet", channel_list=None):
|
||||
items_list = []
|
||||
prev_date = []
|
||||
prev_channel = None
|
||||
|
||||
date_format = self.tmpl_config_get(template_file,
|
||||
"date_format", DATE_FORMAT, raw=1)
|
||||
items_per_page = int(self.tmpl_config_get(template_file,
|
||||
"items_per_page", ITEMS_PER_PAGE))
|
||||
days_per_page = int(self.tmpl_config_get(template_file,
|
||||
"days_per_page", DAYS_PER_PAGE))
|
||||
new_date_format = self.tmpl_config_get(template_file,
|
||||
"new_date_format", NEW_DATE_FORMAT, raw=1)
|
||||
|
||||
for newsitem in self.items(max_items=items_per_page,
|
||||
max_days=days_per_page,
|
||||
channels=channel_list):
|
||||
item_info = template_info(newsitem, date_format)
|
||||
chan_info = channels[newsitem._channel]
|
||||
for k, v in chan_info.items():
|
||||
item_info["channel_" + k] = v
|
||||
|
||||
# Check for the start of a new day
|
||||
if prev_date[:3] != newsitem.date[:3]:
|
||||
prev_date = newsitem.date
|
||||
item_info["new_date"] = time.strftime(new_date_format,
|
||||
newsitem.date)
|
||||
|
||||
# Check for the start of a new channel
|
||||
if item_info.has_key("new_date") \
|
||||
or prev_channel != newsitem._channel:
|
||||
prev_channel = newsitem._channel
|
||||
item_info["new_channel"] = newsitem._channel.url
|
||||
|
||||
items_list.append(item_info)
|
||||
|
||||
return items_list
|
||||
|
||||
def run(self, planet_name, planet_link, template_files, offline = False):
|
||||
log = logging.getLogger("planet.runner")
|
||||
|
||||
# Create a planet
|
||||
log.info("Loading cached data")
|
||||
if self.config.has_option("Planet", "cache_directory"):
|
||||
self.cache_directory = self.config.get("Planet", "cache_directory")
|
||||
if self.config.has_option("Planet", "new_feed_items"):
|
||||
self.new_feed_items = int(self.config.get("Planet", "new_feed_items"))
|
||||
self.user_agent = "%s +%s %s" % (planet_name, planet_link,
|
||||
self.user_agent)
|
||||
if self.config.has_option("Planet", "filter"):
|
||||
self.filter = self.config.get("Planet", "filter")
|
||||
|
||||
# The other configuration blocks are channels to subscribe to
|
||||
for feed_url in self.config.sections():
|
||||
if feed_url == "Planet" or feed_url in template_files:
|
||||
continue
|
||||
|
||||
# Create a channel, configure it and subscribe it
|
||||
channel = Channel(self, feed_url)
|
||||
self.subscribe(channel)
|
||||
|
||||
# Update it
|
||||
try:
|
||||
if not offline and not channel.url_status == '410':
|
||||
channel.update()
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
log.exception("Update of <%s> failed", feed_url)
|
||||
|
||||
def generate_all_files(self, template_files, planet_name,
|
||||
planet_link, planet_feed, owner_name, owner_email):
|
||||
|
||||
log = logging.getLogger("planet.runner")
|
||||
# Go-go-gadget-template
|
||||
for template_file in template_files:
|
||||
manager = htmltmpl.TemplateManager()
|
||||
log.info("Processing template %s", template_file)
|
||||
try:
|
||||
template = manager.prepare(template_file)
|
||||
except htmltmpl.TemplateError:
|
||||
template = manager.prepare(os.path.basename(template_file))
|
||||
# Read the configuration
|
||||
output_dir = self.tmpl_config_get(template_file,
|
||||
"output_dir", OUTPUT_DIR)
|
||||
date_format = self.tmpl_config_get(template_file,
|
||||
"date_format", DATE_FORMAT, raw=1)
|
||||
encoding = self.tmpl_config_get(template_file, "encoding", ENCODING)
|
||||
|
||||
# We treat each template individually
|
||||
base = os.path.splitext(os.path.basename(template_file))[0]
|
||||
url = os.path.join(planet_link, base)
|
||||
output_file = os.path.join(output_dir, base)
|
||||
|
||||
# Gather information
|
||||
channels, channels_list = self.gather_channel_info(template_file)
|
||||
items_list = self.gather_items_info(channels, template_file)
|
||||
|
||||
# Gather item information
|
||||
|
||||
# Process the template
|
||||
tp = htmltmpl.TemplateProcessor(html_escape=0)
|
||||
tp.set("Items", items_list)
|
||||
tp.set("Channels", channels_list)
|
||||
|
||||
# Generic information
|
||||
tp.set("generator", VERSION)
|
||||
tp.set("name", planet_name)
|
||||
tp.set("link", planet_link)
|
||||
tp.set("owner_name", owner_name)
|
||||
tp.set("owner_email", owner_email)
|
||||
tp.set("url", url)
|
||||
|
||||
if planet_feed:
|
||||
tp.set("feed", planet_feed)
|
||||
tp.set("feedtype", planet_feed.find('rss')>=0 and 'rss' or 'atom')
|
||||
|
||||
# Update time
|
||||
date = time.gmtime()
|
||||
tp.set("date", time.strftime(date_format, date))
|
||||
tp.set("date_iso", time.strftime(TIMEFMT_ISO, date))
|
||||
tp.set("date_822", time.strftime(TIMEFMT_822, date))
|
||||
|
||||
try:
|
||||
log.info("Writing %s", output_file)
|
||||
output_fd = open(output_file, "w")
|
||||
if encoding.lower() in ("utf-8", "utf8"):
|
||||
# UTF-8 output is the default because we use that internally
|
||||
output_fd.write(tp.process(template))
|
||||
elif encoding.lower() in ("xml", "html", "sgml"):
|
||||
# Magic for Python 2.3 users
|
||||
output = tp.process(template).decode("utf-8")
|
||||
output_fd.write(output.encode("ascii", "xmlcharrefreplace"))
|
||||
else:
|
||||
# Must be a "known" encoding
|
||||
output = tp.process(template).decode("utf-8")
|
||||
output_fd.write(output.encode(encoding, "replace"))
|
||||
output_fd.close()
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
log.exception("Write of %s failed", output_file)
|
||||
|
||||
def channels(self, hidden=0, sorted=1):
|
||||
"""Return the list of channels."""
|
||||
channels = []
|
||||
for channel in self._channels:
|
||||
if hidden or not channel.has_key("hidden"):
|
||||
channels.append((channel.name, channel))
|
||||
|
||||
if sorted:
|
||||
channels.sort()
|
||||
|
||||
return [ c[-1] for c in channels ]
|
||||
|
||||
def find_by_basename(self, basename):
|
||||
for channel in self._channels:
|
||||
if basename == channel.cache_basename(): return channel
|
||||
|
||||
def subscribe(self, channel):
|
||||
"""Subscribe the planet to the channel."""
|
||||
self._channels.append(channel)
|
||||
|
||||
def unsubscribe(self, channel):
|
||||
"""Unsubscribe the planet from the channel."""
|
||||
self._channels.remove(channel)
|
||||
|
||||
def items(self, hidden=0, sorted=1, max_items=0, max_days=0, channels=None):
|
||||
"""Return an optionally filtered list of items in the channel.
|
||||
|
||||
The filters are applied in the following order:
|
||||
|
||||
If hidden is true then items in hidden channels and hidden items
|
||||
will be returned.
|
||||
|
||||
If sorted is true then the item list will be sorted with the newest
|
||||
first.
|
||||
|
||||
If max_items is non-zero then this number of items, at most, will
|
||||
be returned.
|
||||
|
||||
If max_days is non-zero then any items older than the newest by
|
||||
this number of days won't be returned. Requires sorted=1 to work.
|
||||
|
||||
|
||||
The sharp-eyed will note that this looks a little strange code-wise,
|
||||
it turns out that Python gets *really* slow if we try to sort the
|
||||
actual items themselves. Also we use mktime here, but it's ok
|
||||
because we discard the numbers and just need them to be relatively
|
||||
consistent between each other.
|
||||
"""
|
||||
planet_filter_re = None
|
||||
if self.filter:
|
||||
planet_filter_re = re.compile(self.filter, re.I)
|
||||
planet_exclude_re = None
|
||||
if self.exclude:
|
||||
planet_exclude_re = re.compile(self.exclude, re.I)
|
||||
|
||||
items = []
|
||||
seen_guids = {}
|
||||
if not channels: channels=self.channels(hidden=hidden, sorted=0)
|
||||
for channel in channels:
|
||||
for item in channel._items.values():
|
||||
if hidden or not item.has_key("hidden"):
|
||||
|
||||
channel_filter_re = None
|
||||
if channel.filter:
|
||||
channel_filter_re = re.compile(channel.filter,
|
||||
re.I)
|
||||
channel_exclude_re = None
|
||||
if channel.exclude:
|
||||
channel_exclude_re = re.compile(channel.exclude,
|
||||
re.I)
|
||||
if (planet_filter_re or planet_exclude_re \
|
||||
or channel_filter_re or channel_exclude_re):
|
||||
title = ""
|
||||
if item.has_key("title"):
|
||||
title = item.title
|
||||
content = item.get_content("content")
|
||||
|
||||
if planet_filter_re:
|
||||
if not (planet_filter_re.search(title) \
|
||||
or planet_filter_re.search(content)):
|
||||
continue
|
||||
|
||||
if planet_exclude_re:
|
||||
if (planet_exclude_re.search(title) \
|
||||
or planet_exclude_re.search(content)):
|
||||
continue
|
||||
|
||||
if channel_filter_re:
|
||||
if not (channel_filter_re.search(title) \
|
||||
or channel_filter_re.search(content)):
|
||||
continue
|
||||
|
||||
if channel_exclude_re:
|
||||
if (channel_exclude_re.search(title) \
|
||||
or channel_exclude_re.search(content)):
|
||||
continue
|
||||
|
||||
if not seen_guids.has_key(item.id):
|
||||
seen_guids[item.id] = 1;
|
||||
items.append((time.mktime(item.date), item.order, item))
|
||||
|
||||
# Sort the list
|
||||
if sorted:
|
||||
items.sort()
|
||||
items.reverse()
|
||||
|
||||
# Apply max_items filter
|
||||
if len(items) and max_items:
|
||||
items = items[:max_items]
|
||||
|
||||
# Apply max_days filter
|
||||
if len(items) and max_days:
|
||||
max_count = 0
|
||||
max_time = items[0][0] - max_days * 84600
|
||||
for item in items:
|
||||
if item[0] > max_time:
|
||||
max_count += 1
|
||||
else:
|
||||
items = items[:max_count]
|
||||
break
|
||||
|
||||
return [ i[-1] for i in items ]
|
||||
|
||||
class Channel(cache.CachedInfo):
|
||||
"""A list of news items.
|
||||
|
||||
This class represents a list of news items taken from the feed of
|
||||
a website or other source.
|
||||
|
||||
Properties:
|
||||
url URL of the feed.
|
||||
url_etag E-Tag of the feed URL.
|
||||
url_modified Last modified time of the feed URL.
|
||||
url_status Last HTTP status of the feed URL.
|
||||
hidden Channel should be hidden (True if exists).
|
||||
name Name of the feed owner, or feed title.
|
||||
next_order Next order number to be assigned to NewsItem
|
||||
|
||||
updated Correct UTC-Normalised update time of the feed.
|
||||
last_updated Correct UTC-Normalised time the feed was last updated.
|
||||
|
||||
id An identifier the feed claims is unique (*).
|
||||
title One-line title (*).
|
||||
link Link to the original format feed (*).
|
||||
tagline Short description of the feed (*).
|
||||
info Longer description of the feed (*).
|
||||
|
||||
modified Date the feed claims to have been modified (*).
|
||||
|
||||
author Name of the author (*).
|
||||
publisher Name of the publisher (*).
|
||||
generator Name of the feed generator (*).
|
||||
category Category name (*).
|
||||
copyright Copyright information for humans to read (*).
|
||||
license Link to the licence for the content (*).
|
||||
docs Link to the specification of the feed format (*).
|
||||
language Primary language (*).
|
||||
errorreportsto E-Mail address to send error reports to (*).
|
||||
|
||||
image_url URL of an associated image (*).
|
||||
image_link Link to go with the associated image (*).
|
||||
image_title Alternative text of the associated image (*).
|
||||
image_width Width of the associated image (*).
|
||||
image_height Height of the associated image (*).
|
||||
|
||||
filter A regular expression that articles must match.
|
||||
exclude A regular expression that articles must not match.
|
||||
|
||||
Properties marked (*) will only be present if the original feed
|
||||
contained them. Note that the optional 'modified' date field is simply
|
||||
a claim made by the item and parsed from the information given, 'updated'
|
||||
(and 'last_updated') are far more reliable sources of information.
|
||||
|
||||
Some feeds may define additional properties to those above.
|
||||
"""
|
||||
IGNORE_KEYS = ("links", "contributors", "textinput", "cloud", "categories",
|
||||
"url", "href", "url_etag", "url_modified", "tags", "itunes_explicit")
|
||||
|
||||
def __init__(self, planet, url):
|
||||
if not os.path.isdir(planet.cache_directory):
|
||||
os.makedirs(planet.cache_directory)
|
||||
cache_filename = cache.filename(planet.cache_directory, url)
|
||||
cache_file = dbhash.open(cache_filename, "c", 0666)
|
||||
|
||||
cache.CachedInfo.__init__(self, cache_file, url, root=1)
|
||||
|
||||
self._items = {}
|
||||
self._planet = planet
|
||||
self._expired = []
|
||||
self.url = url
|
||||
# retain the original URL for error reporting
|
||||
self.configured_url = url
|
||||
self.url_etag = None
|
||||
self.url_status = None
|
||||
self.url_modified = None
|
||||
self.name = None
|
||||
self.updated = None
|
||||
self.last_updated = None
|
||||
self.filter = None
|
||||
self.exclude = None
|
||||
self.next_order = "0"
|
||||
self.cache_read()
|
||||
self.cache_read_entries()
|
||||
|
||||
if planet.config.has_section(url):
|
||||
for option in planet.config.options(url):
|
||||
value = planet.config.get(url, option)
|
||||
self.set_as_string(option, value, cached=0)
|
||||
|
||||
def has_item(self, id_):
|
||||
"""Check whether the item exists in the channel."""
|
||||
return self._items.has_key(id_)
|
||||
|
||||
def get_item(self, id_):
|
||||
"""Return the item from the channel."""
|
||||
return self._items[id_]
|
||||
|
||||
# Special methods
|
||||
__contains__ = has_item
|
||||
|
||||
def items(self, hidden=0, sorted=0):
|
||||
"""Return the item list."""
|
||||
items = []
|
||||
for item in self._items.values():
|
||||
if hidden or not item.has_key("hidden"):
|
||||
items.append((time.mktime(item.date), item.order, item))
|
||||
|
||||
if sorted:
|
||||
items.sort()
|
||||
items.reverse()
|
||||
|
||||
return [ i[-1] for i in items ]
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterate the sorted item list."""
|
||||
return iter(self.items(sorted=1))
|
||||
|
||||
def cache_read_entries(self):
|
||||
"""Read entry information from the cache."""
|
||||
keys = self._cache.keys()
|
||||
for key in keys:
|
||||
if key.find(" ") != -1: continue
|
||||
if self.has_key(key): continue
|
||||
|
||||
item = NewsItem(self, key)
|
||||
self._items[key] = item
|
||||
|
||||
def cache_basename(self):
|
||||
return cache.filename('',self._id)
|
||||
|
||||
def cache_write(self, sync=1):
|
||||
"""Write channel and item information to the cache."""
|
||||
for item in self._items.values():
|
||||
item.cache_write(sync=0)
|
||||
for item in self._expired:
|
||||
item.cache_clear(sync=0)
|
||||
cache.CachedInfo.cache_write(self, sync)
|
||||
|
||||
self._expired = []
|
||||
|
||||
def feed_information(self):
|
||||
"""
|
||||
Returns a description string for the feed embedded in this channel.
|
||||
|
||||
This will usually simply be the feed url embedded in <>, but in the
|
||||
case where the current self.url has changed from the original
|
||||
self.configured_url the string will contain both pieces of information.
|
||||
This is so that the URL in question is easier to find in logging
|
||||
output: getting an error about a URL that doesn't appear in your config
|
||||
file is annoying.
|
||||
"""
|
||||
if self.url == self.configured_url:
|
||||
return "<%s>" % self.url
|
||||
else:
|
||||
return "<%s> (formerly <%s>)" % (self.url, self.configured_url)
|
||||
|
||||
def update(self):
|
||||
"""Download the feed to refresh the information.
|
||||
|
||||
This does the actual work of pulling down the feed and if it changes
|
||||
updates the cached information about the feed and entries within it.
|
||||
"""
|
||||
info = feedparser.parse(self.url,
|
||||
etag=self.url_etag, modified=self.url_modified,
|
||||
agent=self._planet.user_agent)
|
||||
if info.has_key("status"):
|
||||
self.url_status = str(info.status)
|
||||
elif info.has_key("entries") and len(info.entries)>0:
|
||||
self.url_status = str(200)
|
||||
elif info.bozo and info.bozo_exception.__class__.__name__=='Timeout':
|
||||
self.url_status = str(408)
|
||||
else:
|
||||
self.url_status = str(500)
|
||||
|
||||
if self.url_status == '301' and \
|
||||
(info.has_key("entries") and len(info.entries)>0):
|
||||
log.warning("Feed has moved from <%s> to <%s>", self.url, info.url)
|
||||
try:
|
||||
os.link(cache.filename(self._planet.cache_directory, self.url),
|
||||
cache.filename(self._planet.cache_directory, info.url))
|
||||
except:
|
||||
pass
|
||||
self.url = info.url
|
||||
elif self.url_status == '304':
|
||||
log.info("Feed %s unchanged", self.feed_information())
|
||||
return
|
||||
elif self.url_status == '410':
|
||||
log.info("Feed %s gone", self.feed_information())
|
||||
self.cache_write()
|
||||
return
|
||||
elif self.url_status == '408':
|
||||
log.warning("Feed %s timed out", self.feed_information())
|
||||
return
|
||||
elif int(self.url_status) >= 400:
|
||||
log.error("Error %s while updating feed %s",
|
||||
self.url_status, self.feed_information())
|
||||
return
|
||||
else:
|
||||
log.info("Updating feed %s", self.feed_information())
|
||||
|
||||
self.url_etag = info.has_key("etag") and info.etag or None
|
||||
self.url_modified = info.has_key("modified") and info.modified or None
|
||||
if self.url_etag is not None:
|
||||
log.debug("E-Tag: %s", self.url_etag)
|
||||
if self.url_modified is not None:
|
||||
log.debug("Last Modified: %s",
|
||||
time.strftime(TIMEFMT_ISO, self.url_modified))
|
||||
|
||||
self.update_info(info.feed)
|
||||
self.update_entries(info.entries)
|
||||
self.cache_write()
|
||||
|
||||
def update_info(self, feed):
|
||||
"""Update information from the feed.
|
||||
|
||||
This reads the feed information supplied by feedparser and updates
|
||||
the cached information about the feed. These are the various
|
||||
potentially interesting properties that you might care about.
|
||||
"""
|
||||
for key in feed.keys():
|
||||
if key in self.IGNORE_KEYS or key + "_parsed" in self.IGNORE_KEYS:
|
||||
# Ignored fields
|
||||
pass
|
||||
elif feed.has_key(key + "_parsed"):
|
||||
# Ignore unparsed date fields
|
||||
pass
|
||||
elif key.endswith("_detail"):
|
||||
# retain name and email sub-fields
|
||||
if feed[key].has_key('name') and feed[key].name:
|
||||
self.set_as_string(key.replace("_detail","_name"), \
|
||||
feed[key].name)
|
||||
if feed[key].has_key('email') and feed[key].email:
|
||||
self.set_as_string(key.replace("_detail","_email"), \
|
||||
feed[key].email)
|
||||
elif key == "items":
|
||||
# Ignore items field
|
||||
pass
|
||||
elif key.endswith("_parsed"):
|
||||
# Date fields
|
||||
if feed[key] is not None:
|
||||
self.set_as_date(key[:-len("_parsed")], feed[key])
|
||||
elif key == "image":
|
||||
# Image field: save all the information
|
||||
if feed[key].has_key("url"):
|
||||
self.set_as_string(key + "_url", feed[key].url)
|
||||
if feed[key].has_key("link"):
|
||||
self.set_as_string(key + "_link", feed[key].link)
|
||||
if feed[key].has_key("title"):
|
||||
self.set_as_string(key + "_title", feed[key].title)
|
||||
if feed[key].has_key("width"):
|
||||
self.set_as_string(key + "_width", str(feed[key].width))
|
||||
if feed[key].has_key("height"):
|
||||
self.set_as_string(key + "_height", str(feed[key].height))
|
||||
elif isinstance(feed[key], (str, unicode)):
|
||||
# String fields
|
||||
try:
|
||||
detail = key + '_detail'
|
||||
if feed.has_key(detail) and feed[detail].has_key('type'):
|
||||
if feed[detail].type == 'text/html':
|
||||
feed[key] = sanitize.HTML(feed[key])
|
||||
elif feed[detail].type == 'text/plain':
|
||||
feed[key] = escape(feed[key])
|
||||
self.set_as_string(key, feed[key])
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
log.exception("Ignored '%s' of <%s>, unknown format",
|
||||
key, self.url)
|
||||
|
||||
def update_entries(self, entries):
|
||||
"""Update entries from the feed.
|
||||
|
||||
This reads the entries supplied by feedparser and updates the
|
||||
cached information about them. It's at this point we update
|
||||
the 'updated' timestamp and keep the old one in 'last_updated',
|
||||
these provide boundaries for acceptable entry times.
|
||||
|
||||
If this is the first time a feed has been updated then most of the
|
||||
items will be marked as hidden, according to Planet.new_feed_items.
|
||||
|
||||
If the feed does not contain items which, according to the sort order,
|
||||
should be there; those items are assumed to have been expired from
|
||||
the feed or replaced and are removed from the cache.
|
||||
"""
|
||||
if not len(entries):
|
||||
return
|
||||
|
||||
self.last_updated = self.updated
|
||||
self.updated = time.gmtime()
|
||||
|
||||
new_items = []
|
||||
feed_items = []
|
||||
for entry in entries:
|
||||
# Try really hard to find some kind of unique identifier
|
||||
if entry.has_key("id"):
|
||||
entry_id = cache.utf8(entry.id)
|
||||
elif entry.has_key("link"):
|
||||
entry_id = cache.utf8(entry.link)
|
||||
elif entry.has_key("title"):
|
||||
entry_id = (self.url + "/"
|
||||
+ md5.new(cache.utf8(entry.title)).hexdigest())
|
||||
elif entry.has_key("summary"):
|
||||
entry_id = (self.url + "/"
|
||||
+ md5.new(cache.utf8(entry.summary)).hexdigest())
|
||||
else:
|
||||
log.error("Unable to find or generate id, entry ignored")
|
||||
continue
|
||||
|
||||
# Create the item if necessary and update
|
||||
if self.has_item(entry_id):
|
||||
item = self._items[entry_id]
|
||||
else:
|
||||
item = NewsItem(self, entry_id)
|
||||
self._items[entry_id] = item
|
||||
new_items.append(item)
|
||||
item.update(entry)
|
||||
feed_items.append(entry_id)
|
||||
|
||||
# Hide excess items the first time through
|
||||
if self.last_updated is None and self._planet.new_feed_items \
|
||||
and len(feed_items) > self._planet.new_feed_items:
|
||||
item.hidden = "yes"
|
||||
log.debug("Marked <%s> as hidden (new feed)", entry_id)
|
||||
|
||||
# Assign order numbers in reverse
|
||||
new_items.reverse()
|
||||
for item in new_items:
|
||||
item.order = self.next_order = str(int(self.next_order) + 1)
|
||||
|
||||
# Check for expired or replaced items
|
||||
feed_count = len(feed_items)
|
||||
log.debug("Items in Feed: %d", feed_count)
|
||||
for item in self.items(sorted=1):
|
||||
if feed_count < 1:
|
||||
break
|
||||
elif item.id in feed_items:
|
||||
feed_count -= 1
|
||||
elif item._channel.url_status != '226':
|
||||
del(self._items[item.id])
|
||||
self._expired.append(item)
|
||||
log.debug("Removed expired or replaced item <%s>", item.id)
|
||||
|
||||
def get_name(self, key):
|
||||
"""Return the key containing the name."""
|
||||
for key in ("name", "title"):
|
||||
if self.has_key(key) and self.key_type(key) != self.NULL:
|
||||
return self.get_as_string(key)
|
||||
|
||||
return ""
|
||||
|
||||
class NewsItem(cache.CachedInfo):
|
||||
"""An item of news.
|
||||
|
||||
This class represents a single item of news on a channel. They're
|
||||
created by members of the Channel class and accessible through it.
|
||||
|
||||
Properties:
|
||||
id Channel-unique identifier for this item.
|
||||
id_hash Relatively short, printable cryptographic hash of id
|
||||
date Corrected UTC-Normalised update time, for sorting.
|
||||
order Order in which items on the same date can be sorted.
|
||||
hidden Item should be hidden (True if exists).
|
||||
|
||||
title One-line title (*).
|
||||
link Link to the original format text (*).
|
||||
summary Short first-page summary (*).
|
||||
content Full HTML content.
|
||||
|
||||
modified Date the item claims to have been modified (*).
|
||||
issued Date the item claims to have been issued (*).
|
||||
created Date the item claims to have been created (*).
|
||||
expired Date the item claims to expire (*).
|
||||
|
||||
author Name of the author (*).
|
||||
publisher Name of the publisher (*).
|
||||
category Category name (*).
|
||||
comments Link to a page to enter comments (*).
|
||||
license Link to the licence for the content (*).
|
||||
source_name Name of the original source of this item (*).
|
||||
source_link Link to the original source of this item (*).
|
||||
|
||||
Properties marked (*) will only be present if the original feed
|
||||
contained them. Note that the various optional date fields are
|
||||
simply claims made by the item and parsed from the information
|
||||
given, 'date' is a far more reliable source of information.
|
||||
|
||||
Some feeds may define additional properties to those above.
|
||||
"""
|
||||
IGNORE_KEYS = ("categories", "contributors", "enclosures", "links",
|
||||
"guidislink", "date", "tags")
|
||||
|
||||
def __init__(self, channel, id_):
|
||||
cache.CachedInfo.__init__(self, channel._cache, id_)
|
||||
|
||||
self._channel = channel
|
||||
self.id = id_
|
||||
self.id_hash = md5.new(id_).hexdigest()
|
||||
self.date = None
|
||||
self.order = None
|
||||
self.content = None
|
||||
self.cache_read()
|
||||
|
||||
def update(self, entry):
|
||||
"""Update the item from the feedparser entry given."""
|
||||
for key in entry.keys():
|
||||
if key in self.IGNORE_KEYS or key + "_parsed" in self.IGNORE_KEYS:
|
||||
# Ignored fields
|
||||
pass
|
||||
elif entry.has_key(key + "_parsed"):
|
||||
# Ignore unparsed date fields
|
||||
pass
|
||||
elif key.endswith("_detail"):
|
||||
# retain name, email, and language sub-fields
|
||||
if entry[key].has_key('name') and entry[key].name:
|
||||
self.set_as_string(key.replace("_detail","_name"), \
|
||||
entry[key].name)
|
||||
if entry[key].has_key('email') and entry[key].email:
|
||||
self.set_as_string(key.replace("_detail","_email"), \
|
||||
entry[key].email)
|
||||
if entry[key].has_key('language') and entry[key].language and \
|
||||
(not self._channel.has_key('language') or \
|
||||
entry[key].language != self._channel.language):
|
||||
self.set_as_string(key.replace("_detail","_language"), \
|
||||
entry[key].language)
|
||||
elif key.endswith("_parsed"):
|
||||
# Date fields
|
||||
if entry[key] is not None:
|
||||
self.set_as_date(key[:-len("_parsed")], entry[key])
|
||||
elif key == "source":
|
||||
# Source field: save both url and value
|
||||
if entry[key].has_key("value"):
|
||||
self.set_as_string(key + "_name", entry[key].value)
|
||||
if entry[key].has_key("url"):
|
||||
self.set_as_string(key + "_link", entry[key].url)
|
||||
elif key == "content":
|
||||
# Content field: concatenate the values
|
||||
value = ""
|
||||
for item in entry[key]:
|
||||
if item.type == 'text/html':
|
||||
item.value = sanitize.HTML(item.value)
|
||||
elif item.type == 'text/plain':
|
||||
item.value = escape(item.value)
|
||||
if item.has_key('language') and item.language and \
|
||||
(not self._channel.has_key('language') or
|
||||
item.language != self._channel.language) :
|
||||
self.set_as_string(key + "_language", item.language)
|
||||
value += cache.utf8(item.value)
|
||||
self.set_as_string(key, value)
|
||||
elif isinstance(entry[key], (str, unicode)):
|
||||
# String fields
|
||||
try:
|
||||
detail = key + '_detail'
|
||||
if entry.has_key(detail):
|
||||
if entry[detail].has_key('type'):
|
||||
if entry[detail].type == 'text/html':
|
||||
entry[key] = sanitize.HTML(entry[key])
|
||||
elif entry[detail].type == 'text/plain':
|
||||
entry[key] = escape(entry[key])
|
||||
self.set_as_string(key, entry[key])
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
log.exception("Ignored '%s' of <%s>, unknown format",
|
||||
key, self.id)
|
||||
|
||||
# Generate the date field if we need to
|
||||
self.get_date("date")
|
||||
|
||||
def get_date(self, key):
|
||||
"""Get (or update) the date key.
|
||||
|
||||
We check whether the date the entry claims to have been changed is
|
||||
since we last updated this feed and when we pulled the feed off the
|
||||
site.
|
||||
|
||||
If it is then it's probably not bogus, and we'll sort accordingly.
|
||||
|
||||
If it isn't then we bound it appropriately, this ensures that
|
||||
entries appear in posting sequence but don't overlap entries
|
||||
added in previous updates and don't creep into the next one.
|
||||
"""
|
||||
|
||||
for other_key in ("updated", "modified", "published", "issued", "created"):
|
||||
if self.has_key(other_key):
|
||||
date = self.get_as_date(other_key)
|
||||
break
|
||||
else:
|
||||
date = None
|
||||
|
||||
if date is not None:
|
||||
if date > self._channel.updated:
|
||||
date = self._channel.updated
|
||||
# elif date < self._channel.last_updated:
|
||||
# date = self._channel.updated
|
||||
elif self.has_key(key) and self.key_type(key) != self.NULL:
|
||||
return self.get_as_date(key)
|
||||
else:
|
||||
date = self._channel.updated
|
||||
|
||||
self.set_as_date(key, date)
|
||||
return date
|
||||
|
||||
def get_content(self, key):
|
||||
"""Return the key containing the content."""
|
||||
for key in ("content", "tagline", "summary"):
|
||||
if self.has_key(key) and self.key_type(key) != self.NULL:
|
||||
return self.get_as_string(key)
|
||||
|
||||
return ""
|
|
@ -15,8 +15,8 @@
|
|||
<foaf:Agent>
|
||||
<foaf:name>A. Murat Eren</foaf:name>
|
||||
<foaf:weblog>
|
||||
<foaf:Document rdf:about="">
|
||||
<dc:title></dc:title>
|
||||
<foaf:Document rdf:about="http://cekirdek.pardus.org.tr/~meren/blog/">
|
||||
<dc:title>There is no silver bullet..</dc:title>
|
||||
<rdfs:seeAlso>
|
||||
<rss:channel rdf:about="" />
|
||||
</rdfs:seeAlso>
|
||||
|
@ -119,8 +119,8 @@
|
|||
<foaf:Agent>
|
||||
<foaf:name>Alper Somuncu</foaf:name>
|
||||
<foaf:weblog>
|
||||
<foaf:Document rdf:about="http://www.alpersomuncu.com/weblog/">
|
||||
<dc:title>alper somuncu nokta com - IBM AIX</dc:title>
|
||||
<foaf:Document rdf:about="">
|
||||
<dc:title></dc:title>
|
||||
<rdfs:seeAlso>
|
||||
<rss:channel rdf:about="" />
|
||||
</rdfs:seeAlso>
|
||||
|
@ -379,8 +379,8 @@
|
|||
<foaf:Agent>
|
||||
<foaf:name>Gökmen Göksel</foaf:name>
|
||||
<foaf:weblog>
|
||||
<foaf:Document rdf:about="">
|
||||
<dc:title></dc:title>
|
||||
<foaf:Document rdf:about="http://cekirdek.pardus.org.tr/~gokmen/zangetsu/blog/">
|
||||
<dc:title>Human UNITS</dc:title>
|
||||
<rdfs:seeAlso>
|
||||
<rss:channel rdf:about="" />
|
||||
</rdfs:seeAlso>
|
||||
|
@ -432,7 +432,7 @@
|
|||
<foaf:name>Hüseyin Uslu</foaf:name>
|
||||
<foaf:weblog>
|
||||
<foaf:Document rdf:about="http://www.huseyinuslu.net/_export/xhtml/topics_linux_feed">
|
||||
<dc:title>Regular (S)expressions » linux</dc:title>
|
||||
<dc:title></dc:title>
|
||||
<rdfs:seeAlso>
|
||||
<rss:channel rdf:about="" />
|
||||
</rdfs:seeAlso>
|
||||
|
@ -548,8 +548,8 @@
|
|||
<foaf:Agent>
|
||||
<foaf:name>Levent Yalçın</foaf:name>
|
||||
<foaf:weblog>
|
||||
<foaf:Document rdf:about="http://leoman.gen.tr/blg">
|
||||
<dc:title>Leoman® » LKD-Gezegen</dc:title>
|
||||
<foaf:Document rdf:about="">
|
||||
<dc:title></dc:title>
|
||||
<rdfs:seeAlso>
|
||||
<rss:channel rdf:about="" />
|
||||
</rdfs:seeAlso>
|
||||
|
|
BIN
www/images/planet.png
Normal file
BIN
www/images/planet.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 809 B |
3424
www/index.html
3424
www/index.html
File diff suppressed because one or more lines are too long
|
@ -2,8 +2,8 @@
|
|||
<opml version="1.1">
|
||||
<head>
|
||||
<title>Linux Gezegeni</title>
|
||||
<dateCreated>Pzt, 20 Eki 2008 01:21:58 +0000</dateCreated>
|
||||
<dateModified>Pzt, 20 Eki 2008 01:21:58 +0000</dateModified>
|
||||
<dateCreated>Per, 23 Eki 2008 23:22:40 +0000</dateCreated>
|
||||
<dateModified>Per, 23 Eki 2008 23:22:40 +0000</dateModified>
|
||||
<ownerName>Gezegen Ekibi</ownerName>
|
||||
<ownerEmail>gezegen@linux.org.tr</ownerEmail>
|
||||
</head>
|
||||
|
|
525
www/rss10.xml
525
www/rss10.xml
File diff suppressed because one or more lines are too long
577
www/rss20.xml
577
www/rss20.xml
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user