[mod_python] [ANN] slashbox - class to get/display slashdot.org news stories
richard offer
richard at whitequeen.com
Thu Jul 6 21:13:04 EST 2000
Hi,
Here's a quick hack of a class to get and format news stories from a
number of sites.
It was designed with mod_python in mind, so all you need to do is
import slashbox
...
def handler(req):
...
req.write(str(slashbox.Slashdot()))
To add the latest stories from slashdot to your page.
A number of other sites are included, its easy to add new ones simply
by sub-classing slashbox. The trick is to find the sites .rss/.rdf file.
You'll need the latest PyXML code.
richard.
# $Id: slashbox.py,v 1.2 2000/07/08 21:34:46 richard Exp $
# Copyright (c) 2000 Richard Offer <richard at whitequeen.com>.
# All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# Richard Offer BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name of Richard Offer shall not be
# used in advertising or otherwise to promote the sale, use or other dealings
# in this Software without prior written authorization from Richard Offer.
#
#
# A python class to grab the Slashdot (and other sites using RSS/RDF) stories.
import stat
import xmllib
import urllib
import urlparse
import os
import time
import string
import sys
from xml.sax import saxlib,saxexts
class _myrdf(saxlib.HandlerBase):
def __init__(self):
self._in={}
self._in["title"]=0
self._in["link"]=0
self._in["description"]=0
self._txt={}
self._txt["title"]=""
self._txt["link"]=""
self._txt["description"]=""
self._stories=[]
def startElement(self,ele,attr):
if ele == 'item' :
self._txt={}
self._txt["title"]=""
self._txt["link"]=""
self._txt["description"]=""
elif self._in.has_key(ele) :
self._in[ele]=1
else:
pass
def endElement(self,ele):
if self._in.has_key(ele) :
self._in[ele] = 0
self._txt[ele] = string.join(string.split((self._txt[ele])))
elif ele == 'item' :
self._stories.append(self._txt)
def characters(self,ch,start,length):
for i in self._in.keys():
if self._in[i] == 1:
self._txt[i] = self._txt[i] + ch[start:start+length]
def stories(self):
return self._stories
# class Site()
class slashbox:
"""
Create a single slashbox.
slashbox(url,freq)
url is the url of the RSS/RDF file, freq is the update period (mins).
The freq cannot be less than 30
raw()
returns a list of stories (each story is stored in a dict).
__str__()
returns a HTML table containing the stories.
Several subclasses are available, these already have the url defined,
so to get the headlines from slashdot.org simply do
>>> print Slashdot()
The list of built-in sites is
Slashdot()
Freshmeat()
SecurityFocus()
Parnassus()
LWN()
Fool()
LinuxNetNews()
SourceForgeReleases()
SourceForgeNews()
If you are using mod_python, you can embed a slashbox simply by
import slashbox
...
req.write(str(slashbox.Slashdot()))
"""
def __init__(self, url='http://slashdot.org/slashdot.rdf', freq=60):
self._url=url
if freq < 30:
freq=30
self._freq=freq
_u=urlparse.urlparse(self._url)
self._file=_u[1] + "-" + os.path.basename(url)
if self._file == '':
self._file = os.path.basename(url)
self._file = "/tmp/slashbox-" + self._file
self.site= _u[0]+ "://" + _u[1] + "/"
self.name= _u[1]
try:
st=os.stat(self._file)
if st[8] < ( time.time() - (self._freq*60) ):
(fname,headers)=urllib.urlretrieve(self._url, self._file)
except OSError:
(fname,headers)=urllib.urlretrieve(self._url,self._file)
fp=open(self._file)
xmlp=saxexts.make_parser()
self.dh=_myrdf()
xmlp.setDocumentHandler(self.dh)
xmlp.parseFile(fp)
def stories(self):
return self.dh.stories()
def __str__(self):
str="<SMALL>\n<TABLE BORDER=2 WIDTH=200>\n"
str=str + '<TH><A HREF="%s">%s</A></TH>\n' % ( self.site,
self.name )
str=str + "<TR><TD>\n <TABLE><TBODY>\n"
for i in self.dh.stories():
str=str + ' <TR><TD><A HREF="%s">%s</A>' % ( i['link'],i['title'] )
if i.has_key("description"):
str=str + "<BR><SMALL>%s</SMALL>" % ( i['description'])
str=str + '</TD></TR>\n'
str=str+" </TBODY></TABLE></TD></TR>\n</TABLE></SMALL>\n\n"
return str
def setName(self,name):
self.name=name
class Slashdot(slashbox):
"""slashbox sub-class for Slashdot.org"""
def __init__(self):
_url='http://slashdot.org/slashdot.rdf'
slashbox.__init__(self,url=_url)
self.setName('Slashdot')
class Freshmeat(slashbox):
"""slashbox sub-class for Freshmeat.net"""
def __init__(self):
_url='http://freshmeat.net/backend/fm.rdf'
slashbox.__init__(self,url=_url)
self.setName('Freshmeat')
class SecurityFocus(slashbox):
"""slashbox sub-class for SecurityFocus.com"""
def __init__(self):
_url='http://www.securityfocus.com/topnews-rss.html'
slashbox.__init__(self,url=_url)
self.setName('Security Focus')
class Parnassus(slashbox):
"""slashbox sub-class for the Vaults of Parnassus"""
def __init__(self):
_url='http://www.vex.net/parnassus/parnassus.rss'
slashbox.__init__(self,url=_url)
self.setName('Vaults of Parnassus')
class LWN(slashbox):
"""slashbox sub-class for the Linux Weekly News"""
def __init__(self):
_url='http://lwn.net/headlines/rss'
slashbox.__init__(self,url=_url)
self.setName('Linux Weekly News')
class Fool(slashbox):
"""slashbox sub-class for the Motley Fool"""
def __init__(self):
_url='http://www.fool.com/About/headlines/rss_headlines.asp'
slashbox.__init__(self,url=_url)
self.setName('The Motley Fool')
class LinuxNetNews(slashbox):
"""slashbox sub-class for Linux Net News"""
def __init__(self):
_url='http://www.netnews.opensrc.org/index.rdf'
slashbox.__init__(self,url=_url)
self.setName('Linux Net News')
class SourceForgeReleases(slashbox):
"""slashbox sub-class for new releases on SourceForge"""
def __init__(self):
_url='http://sourceforge.net/export/rss_sfnewreleases.php'
slashbox.__init__(self,url=_url)
self.setName('Sourceforge New Releases')
class SourceForgeNews(slashbox):
"""slashbox sub-class for Project News at SourceForge"""
def __init__(self):
_url='http://sourceforge.net/export/rss_sfnews.php'
slashbox.__init__(self,url=_url)
self.setName('Sourceforge News')
if __name__ == "__main__":
print Slashdot()
--
richard offer @ home
84 FE 48 E4 74 D0 26 D4 31 8E B6 86 98 74 E2 7C 8A FB BF A3
___________________________________http://216.185.15.144/users/richard/
More information about the Mod_python
mailing list