listing20-6.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. import sys, re
  2. from handlers import *
  3. from util import *
  4. from rules import *
  5. class Parser:
  6. """
  7. A Parser reads a text file, applying rules and controlling a handler.
  8. """
  9. def __init__(self, handler):
  10. self.handler = handler
  11. self.rules = []
  12. self.filters = []
  13. def addRule(self, rule):
  14. self.rules.append(rule)
  15. def addFilter(self, pattern, name):
  16. def filter(block, handler):
  17. return re.sub(pattern, handler.sub(name), block)
  18. self.filters.append(filter)
  19. def parse(self, file):
  20. self.handler.start('document')
  21. for block in blocks(file):
  22. for filter in self.filters:
  23. block = filter(block, self.handler)
  24. for rule in self.rules:
  25. if rule.condition(block):
  26. last = rule.action(block,
  27. self.handler)
  28. if last: break
  29. self.handler.end('document')
  30. class BasicTextParser(Parser):
  31. """
  32. A specific Parser that adds rules and filters in its constructor.
  33. """
  34. def __init__(self, handler):
  35. Parser.__init__(self, handler)
  36. self.addRule(ListRule())
  37. self.addRule(ListItemRule())
  38. self.addRule(TitleRule())
  39. self.addRule(HeadingRule())
  40. self.addRule(ParagraphRule())
  41. self.addFilter(r'\*(.+?)\*', 'emphasis')
  42. self.addFilter(r'(http://[\.a-zA-Z/]+)', 'url')
  43. self.addFilter(r'([\.a-zA-Z]+@[\.a-zA-Z]+[a-zA-Z]+)', 'mail')
  44. handler = HTMLRenderer()
  45. parser = BasicTextParser(handler)
  46. parser.parse(sys.stdin)