1234567891011121314151617181920212223242526272829303132333435363738394041424344 |
- import requests
- import scrapy
- import re
- import time
- # 功能:检查字符串str是否符合正则表达式re_exp
- # re_exp:正则表达式
- # str:待检查的字符串
- def check_string(re_exp, str):
- res = re.search(re_exp, str)
- if res:
- return True
- else:
- return False
- def getText(page_url):
- res = requests.get(url=page_url)
- html = res.text
- sel = scrapy.Selector(text=html)
- # 解析抓取“热点要闻”
- words = sel.css('div[id="chaptercontent"]::text').extract()
- pt_next = sel.css('a[id="pt_next"]').xpath('@href').get()
- str = ''
- # 获取到文字
- # 存储
- # 执行下一步
- #
- # for word in words:
- # if(word != '\n' and word != 'm.2188c.com'):
- # str = str + word + '\n'
- # with open('data.txt', 'a') as f: # 设置文件对象
- # f.write(str) # 将字符串写入文件中
- # if ("llo" in "hello, python"):
- # time.sleep(1);
- # url = 'https://m.2188c.com'+pt_next
- # with open('line.txt', 'w') as f: # 设置文件对象
- # f.write(url) # 将字符串写入文件中
- # getText(url)
- getText('https://m.2188c.com/txt/31063/93347948.html');
|