123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160 |
- from nntplib import NNTP, decode_header
- from urllib.request import urlopen
- import textwrap
- import re
- class NewsAgent:
- """
- An object that can distribute news items from news sources to news
- destinations.
- """
- def __init__(self):
- self.sources = []
- self.destinations = []
- def add_source(self, source):
- self.sources.append(source)
- def addDestination(self, dest):
- self.destinations.append(dest)
- def distribute(self):
- """
- Retrieve all news items from all sources, and Distribute them to all
- destinations.
- """
- items = []
- for source in self.sources:
- items.extend(source.get_items())
- for dest in self.destinations:
- dest.receive_items(items)
- class NewsItem:
- """
- A simple news item consisting of a title and body text.
- """
- def __init__(self, title, body):
- self.title = title
- self.body = body
- class NNTPSource:
- """
- A news source that retrieves news items from an NNTP group.
- """
- def __init__(self, servername, group, howmany):
- self.servername = servername
- self.group = group
- self.howmany = howmany
- def get_items(self):
- server = NNTP(self.servername)
- resp, count, first, last, name = server.group(self.group)
- start = last - self.howmany + 1
- resp, overviews = server.over((start, last))
- for id, over in overviews:
- title = decode_header(over['subject'])
- resp, info = server.body(id)
- body = '\n'.join(line.decode('latin')
- for line in info.lines) + '\n\n'
- yield NewsItem(title, body)
- server.quit()
- class SimpleWebSource:
- """
- A news source that extracts news items from a web page using regular
- expressions.
- """
- def __init__(self, url, title_pattern, body_pattern, encoding='utf8'):
- self.url = url
- self.title_pattern = re.compile(title_pattern)
- self.body_pattern = re.compile(body_pattern)
- self.encoding = encoding
- def get_items(self):
- text = urlopen(self.url).read().decode(self.encoding)
- titles = self.title_pattern.findall(text)
- bodies = self.body_pattern.findall(text)
- for title, body in zip(titles, bodies):
- yield NewsItem(title, textwrap.fill(body) + '\n')
- class PlainDestination:
- """
- A news destination that formats all its news items as plain text.
- """
- def receive_items(self, items):
- for item in items:
- print(item.title)
- print('-' * len(item.title))
- print(item.body)
- class HTMLDestination:
- """
- A news destination that formats all its news items as HTML.
- """
- def __init__(self, filename):
- self.filename = filename
- def receive_items(self, items):
- out = open(self.filename, 'w')
- print("""
- <html>
- <head>
- <title>Today's News</title>
- </head>
- <body>
- <h1>Today's News</h1>
- """, file=out)
- print('<ul>', file=out)
- id = 0
- for item in items:
- id += 1
- print(' <li><a href="#{}">{}</a></li>'
- .format(id, item.title), file=out)
- print('</ul>', file=out)
- id = 0
- for item in items:
- id += 1
- print('<h2><a name="{}">{}</a></h2>'
- .format(id, item.title), file=out)
- print('<pre>{}</pre>'.format(item.body), file=out)
- print("""
- </body>
- </html>
- """, file=out)
- def runDefaultSetup():
- """
- A default setup of sources and destination. Modify to taste.
- """
- agent = NewsAgent()
- # A SimpleWebSource that retrieves news from Reuters:
- reuters_url = 'http://www.reuters.com/news/world'
- reuters_title = r'<h2><a href="[^"]*"\s*>(.*?)</a>'
- reuters_body = r'</h2><p>(.*?)</p>'
- reuters = SimpleWebSource(reuters_url, reuters_title, reuters_body)
- agent.add_source(reuters)
- # An NNTPSource that retrieves news from comp.lang.python.announce:
- clpa_server = 'news.foo.bar' # Insert real server name
- clpa_server = 'news.ntnu.no'
- clpa_group = 'comp.lang.python.announce'
- clpa_howmany = 10
- clpa = NNTPSource(clpa_server, clpa_group, clpa_howmany)
- agent.add_source(clpa)
- # Add plain-text destination and an HTML destination:
- agent.addDestination(PlainDestination())
- agent.addDestination(HTMLDestination('news.html'))
- # Distribute the news items:
- agent.distribute()
- if __name__ == '__main__': runDefaultSetup()
|