import requests import scrapy import re import time # 功能:检查字符串str是否符合正则表达式re_exp # re_exp:正则表达式 # str:待检查的字符串 def check_string(re_exp, str): res = re.search(re_exp, str) if res: return True else: return False def getText(page_url): res = requests.get(url=page_url) html = res.text sel = scrapy.Selector(text=html) # 解析抓取“热点要闻” words = sel.css('div[id="chaptercontent"]::text').extract() pt_next = sel.css('a[id="pt_next"]').xpath('@href').get() str = '' # 获取到文字 # 存储 # 执行下一步 # # for word in words: # if(word != '\n' and word != 'm.2188c.com'): # str = str + word + '\n' # with open('data.txt', 'a') as f: # 设置文件对象 # f.write(str) # 将字符串写入文件中 # if ("llo" in "hello, python"): # time.sleep(1); # url = 'https://m.2188c.com'+pt_next # with open('line.txt', 'w') as f: # 设置文件对象 # f.write(url) # 将字符串写入文件中 # getText(url) getText('https://m.2188c.com/txt/31063/93347948.html');