richard offer
richard at whitequeen.com
Thu Jul 6 21:13:04 EST 2000
Hi, Here's a quick hack of a class to get and format news stories from a number of sites. It was designed with mod_python in mind, so all you need to do is import slashbox ... def handler(req): ... req.write(str(slashbox.Slashdot())) To add the latest stories from slashdot to your page. A number of other sites are included, its easy to add new ones simply by sub-classing slashbox. The trick is to find the sites .rss/.rdf file. You'll need the latest PyXML code. richard. # $Id: slashbox.py,v 1.2 2000/07/08 21:34:46 richard Exp $ # Copyright (c) 2000 Richard Offer <richard at whitequeen.com>. # All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # Richard Offer BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN # AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # Except as contained in this notice, the name of Richard Offer shall not be # used in advertising or otherwise to promote the sale, use or other dealings # in this Software without prior written authorization from Richard Offer. # # # A python class to grab the Slashdot (and other sites using RSS/RDF) stories. import stat import xmllib import urllib import urlparse import os import time import string import sys from xml.sax import saxlib,saxexts class _myrdf(saxlib.HandlerBase): def __init__(self): self._in={} self._in["title"]=0 self._in["link"]=0 self._in["description"]=0 self._txt={} self._txt["title"]="" self._txt["link"]="" self._txt["description"]="" self._stories=[] def startElement(self,ele,attr): if ele == 'item' : self._txt={} self._txt["title"]="" self._txt["link"]="" self._txt["description"]="" elif self._in.has_key(ele) : self._in[ele]=1 else: pass def endElement(self,ele): if self._in.has_key(ele) : self._in[ele] = 0 self._txt[ele] = string.join(string.split((self._txt[ele]))) elif ele == 'item' : self._stories.append(self._txt) def characters(self,ch,start,length): for i in self._in.keys(): if self._in[i] == 1: self._txt[i] = self._txt[i] + ch[start:start+length] def stories(self): return self._stories # class Site() class slashbox: """ Create a single slashbox. slashbox(url,freq) url is the url of the RSS/RDF file, freq is the update period (mins). The freq cannot be less than 30 raw() returns a list of stories (each story is stored in a dict). __str__() returns a HTML table containing the stories. Several subclasses are available, these already have the url defined, so to get the headlines from slashdot.org simply do >>> print Slashdot() The list of built-in sites is Slashdot() Freshmeat() SecurityFocus() Parnassus() LWN() Fool() LinuxNetNews() SourceForgeReleases() SourceForgeNews() If you are using mod_python, you can embed a slashbox simply by import slashbox ... req.write(str(slashbox.Slashdot())) """ def __init__(self, url='http://slashdot.org/slashdot.rdf', freq=60): self._url=url if freq < 30: freq=30 self._freq=freq _u=urlparse.urlparse(self._url) self._file=_u[1] + "-" + os.path.basename(url) if self._file == '': self._file = os.path.basename(url) self._file = "/tmp/slashbox-" + self._file self.site= _u[0]+ "://" + _u[1] + "/" self.name= _u[1] try: st=os.stat(self._file) if st[8] < ( time.time() - (self._freq*60) ): (fname,headers)=urllib.urlretrieve(self._url, self._file) except OSError: (fname,headers)=urllib.urlretrieve(self._url,self._file) fp=open(self._file) xmlp=saxexts.make_parser() self.dh=_myrdf() xmlp.setDocumentHandler(self.dh) xmlp.parseFile(fp) def stories(self): return self.dh.stories() def __str__(self): str="<SMALL>\n<TABLE BORDER=2 WIDTH=200>\n" str=str + '<TH><A HREF="%s">%s</A></TH>\n' % ( self.site, self.name ) str=str + "<TR><TD>\n <TABLE><TBODY>\n" for i in self.dh.stories(): str=str + ' <TR><TD><A HREF="%s">%s</A>' % ( i['link'],i['title'] ) if i.has_key("description"): str=str + "<BR><SMALL>%s</SMALL>" % ( i['description']) str=str + '</TD></TR>\n' str=str+" </TBODY></TABLE></TD></TR>\n</TABLE></SMALL>\n\n" return str def setName(self,name): self.name=name class Slashdot(slashbox): """slashbox sub-class for Slashdot.org""" def __init__(self): _url='http://slashdot.org/slashdot.rdf' slashbox.__init__(self,url=_url) self.setName('Slashdot') class Freshmeat(slashbox): """slashbox sub-class for Freshmeat.net""" def __init__(self): _url='http://freshmeat.net/backend/fm.rdf' slashbox.__init__(self,url=_url) self.setName('Freshmeat') class SecurityFocus(slashbox): """slashbox sub-class for SecurityFocus.com""" def __init__(self): _url='http://www.securityfocus.com/topnews-rss.html' slashbox.__init__(self,url=_url) self.setName('Security Focus') class Parnassus(slashbox): """slashbox sub-class for the Vaults of Parnassus""" def __init__(self): _url='http://www.vex.net/parnassus/parnassus.rss' slashbox.__init__(self,url=_url) self.setName('Vaults of Parnassus') class LWN(slashbox): """slashbox sub-class for the Linux Weekly News""" def __init__(self): _url='http://lwn.net/headlines/rss' slashbox.__init__(self,url=_url) self.setName('Linux Weekly News') class Fool(slashbox): """slashbox sub-class for the Motley Fool""" def __init__(self): _url='http://www.fool.com/About/headlines/rss_headlines.asp' slashbox.__init__(self,url=_url) self.setName('The Motley Fool') class LinuxNetNews(slashbox): """slashbox sub-class for Linux Net News""" def __init__(self): _url='http://www.netnews.opensrc.org/index.rdf' slashbox.__init__(self,url=_url) self.setName('Linux Net News') class SourceForgeReleases(slashbox): """slashbox sub-class for new releases on SourceForge""" def __init__(self): _url='http://sourceforge.net/export/rss_sfnewreleases.php' slashbox.__init__(self,url=_url) self.setName('Sourceforge New Releases') class SourceForgeNews(slashbox): """slashbox sub-class for Project News at SourceForge""" def __init__(self): _url='http://sourceforge.net/export/rss_sfnews.php' slashbox.__init__(self,url=_url) self.setName('Sourceforge News') if __name__ == "__main__": print Slashdot() -- richard offer @ home 84 FE 48 E4 74 D0 26 D4 31 8E B6 86 98 74 E2 7C 8A FB BF A3 ___________________________________http://216.185.15.144/users/richard/
|