test3.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import requests
  2. import scrapy
  3. import re
  4. import time
  5. # 功能:检查字符串str是否符合正则表达式re_exp
  6. # re_exp:正则表达式
  7. # str:待检查的字符串
  8. def check_string(re_exp, str):
  9. res = re.search(re_exp, str)
  10. if res:
  11. return True
  12. else:
  13. return False
  14. def getText(page_url):
  15. res = requests.get(url=page_url)
  16. html = res.text
  17. sel = scrapy.Selector(text=html)
  18. # 解析抓取“热点要闻”
  19. words = sel.css('div[id="chaptercontent"]::text').extract()
  20. pt_next = sel.css('a[id="pt_next"]').xpath('@href').get()
  21. str = ''
  22. # 获取到文字
  23. # 存储
  24. # 执行下一步
  25. #
  26. # for word in words:
  27. # if(word != '\n' and word != 'm.2188c.com'):
  28. # str = str + word + '\n'
  29. # with open('data.txt', 'a') as f: # 设置文件对象
  30. # f.write(str) # 将字符串写入文件中
  31. # if ("llo" in "hello, python"):
  32. # time.sleep(1);
  33. # url = 'https://m.2188c.com'+pt_next
  34. # with open('line.txt', 'w') as f: # 设置文件对象
  35. # f.write(url) # 将字符串写入文件中
  36. # getText(url)
  37. getText('https://m.2188c.com/txt/31063/93347948.html');