listing23-2.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. from nntplib import NNTP, decode_header
  2. from urllib.request import urlopen
  3. import textwrap
  4. import re
  5. class NewsAgent:
  6. """
  7. An object that can distribute news items from news sources to news
  8. destinations.
  9. """
  10. def __init__(self):
  11. self.sources = []
  12. self.destinations = []
  13. def add_source(self, source):
  14. self.sources.append(source)
  15. def addDestination(self, dest):
  16. self.destinations.append(dest)
  17. def distribute(self):
  18. """
  19. Retrieve all news items from all sources, and Distribute them to all
  20. destinations.
  21. """
  22. items = []
  23. for source in self.sources:
  24. items.extend(source.get_items())
  25. for dest in self.destinations:
  26. dest.receive_items(items)
  27. class NewsItem:
  28. """
  29. A simple news item consisting of a title and body text.
  30. """
  31. def __init__(self, title, body):
  32. self.title = title
  33. self.body = body
  34. class NNTPSource:
  35. """
  36. A news source that retrieves news items from an NNTP group.
  37. """
  38. def __init__(self, servername, group, howmany):
  39. self.servername = servername
  40. self.group = group
  41. self.howmany = howmany
  42. def get_items(self):
  43. server = NNTP(self.servername)
  44. resp, count, first, last, name = server.group(self.group)
  45. start = last - self.howmany + 1
  46. resp, overviews = server.over((start, last))
  47. for id, over in overviews:
  48. title = decode_header(over['subject'])
  49. resp, info = server.body(id)
  50. body = '\n'.join(line.decode('latin')
  51. for line in info.lines) + '\n\n'
  52. yield NewsItem(title, body)
  53. server.quit()
  54. class SimpleWebSource:
  55. """
  56. A news source that extracts news items from a web page using regular
  57. expressions.
  58. """
  59. def __init__(self, url, title_pattern, body_pattern, encoding='utf8'):
  60. self.url = url
  61. self.title_pattern = re.compile(title_pattern)
  62. self.body_pattern = re.compile(body_pattern)
  63. self.encoding = encoding
  64. def get_items(self):
  65. text = urlopen(self.url).read().decode(self.encoding)
  66. titles = self.title_pattern.findall(text)
  67. bodies = self.body_pattern.findall(text)
  68. for title, body in zip(titles, bodies):
  69. yield NewsItem(title, textwrap.fill(body) + '\n')
  70. class PlainDestination:
  71. """
  72. A news destination that formats all its news items as plain text.
  73. """
  74. def receive_items(self, items):
  75. for item in items:
  76. print(item.title)
  77. print('-' * len(item.title))
  78. print(item.body)
  79. class HTMLDestination:
  80. """
  81. A news destination that formats all its news items as HTML.
  82. """
  83. def __init__(self, filename):
  84. self.filename = filename
  85. def receive_items(self, items):
  86. out = open(self.filename, 'w')
  87. print("""
  88. <html>
  89. <head>
  90. <title>Today's News</title>
  91. </head>
  92. <body>
  93. <h1>Today's News</h1>
  94. """, file=out)
  95. print('<ul>', file=out)
  96. id = 0
  97. for item in items:
  98. id += 1
  99. print(' <li><a href="#{}">{}</a></li>'
  100. .format(id, item.title), file=out)
  101. print('</ul>', file=out)
  102. id = 0
  103. for item in items:
  104. id += 1
  105. print('<h2><a name="{}">{}</a></h2>'
  106. .format(id, item.title), file=out)
  107. print('<pre>{}</pre>'.format(item.body), file=out)
  108. print("""
  109. </body>
  110. </html>
  111. """, file=out)
  112. def runDefaultSetup():
  113. """
  114. A default setup of sources and destination. Modify to taste.
  115. """
  116. agent = NewsAgent()
  117. # A SimpleWebSource that retrieves news from Reuters:
  118. reuters_url = 'http://www.reuters.com/news/world'
  119. reuters_title = r'<h2><a href="[^"]*"\s*>(.*?)</a>'
  120. reuters_body = r'</h2><p>(.*?)</p>'
  121. reuters = SimpleWebSource(reuters_url, reuters_title, reuters_body)
  122. agent.add_source(reuters)
  123. # An NNTPSource that retrieves news from comp.lang.python.announce:
  124. clpa_server = 'news.foo.bar' # Insert real server name
  125. clpa_server = 'news.ntnu.no'
  126. clpa_group = 'comp.lang.python.announce'
  127. clpa_howmany = 10
  128. clpa = NNTPSource(clpa_server, clpa_group, clpa_howmany)
  129. agent.add_source(clpa)
  130. # Add plain-text destination and an HTML destination:
  131. agent.addDestination(PlainDestination())
  132. agent.addDestination(HTMLDestination('news.html'))
  133. # Distribute the news items:
  134. agent.distribute()
  135. if __name__ == '__main__': runDefaultSetup()