python
/
python-reptile-novels


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344
							import requests
import scrapy
import re
import time

# 功能：检查字符串str是否符合正则表达式re_exp
# re_exp:正则表达式
# str:待检查的字符串
def check_string(re_exp, str):
    res = re.search(re_exp, str)
    if res:
        return True
    else:
        return False

def getText(page_url):
    res = requests.get(url=page_url)
    html = res.text

    sel = scrapy.Selector(text=html)
    # 解析抓取“热点要闻”
    words = sel.css('div[id="chaptercontent"]::text').extract()
    pt_next = sel.css('a[id="pt_next"]').xpath('@href').get()
    str = ''
    # 获取到文字
    # 存储
    # 执行下一步
    #
    # for word in words:
    #     if(word != '\n' and word != 'm.2188c.com'):
    #         str = str + word + '\n'
    # with open('data.txt', 'a') as f:  # 设置文件对象
    #     f.write(str)  # 将字符串写入文件中
    # if ("llo" in "hello, python"):
    #     time.sleep(1);
    #     url = 'https://m.2188c.com'+pt_next
    #     with open('line.txt', 'w') as f:  # 设置文件对象
    #         f.write(url)  # 将字符串写入文件中
    #     getText(url)


getText('https://m.2188c.com/txt/31063/93347948.html');