python
/
python-reptile-novels


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
							import requests
import scrapy
import re
import sqlite3

import time

# 链接数据库
con = sqlite3.connect("轮回乐园.db")
cur = con.cursor()


# 合并数据
def dowload():
    cur.execute("SELECT * FROM book");
    row = cur.fetchone()
    while row is not None:
        row = cur.fetchone()
        print(row[0])
        str = row[1] + row[3] + '\n'
        with open('data.txt', 'a') as f:  # 设置文件对象
            f.write(str)  # 将字符串写入文件中
        time.sleep(2);


# dowload()

# 判读数据表是否存在
def is_table(table_name):
    res = cur.execute("SELECT name FROM sqlite_master WHERE name='" + table_name + "'")
    return res.fetchone() is None


# 创建文章主题表
if is_table('new_book'):
    cur.execute("CREATE TABLE new_book(url, title, pt_next)")


# 获取批量数据
def get_more_data(title):
    res = cur.execute("SELECT * FROM book WHERE title='" + title + "'")
    return res.fetchall()


# 链接数据库
con = sqlite3.connect("轮回乐园.db")
cur = con.cursor()


# 记录已经处理过的数据
def inset_new_book(url, title, pt_next):
    cur.executemany("INSERT INTO new_book VALUES(?, ?, ?)", [(url, title, pt_next)])
    con.commit()  # Remember to commit the transaction after executing INSERT.


# 判断当前title是否存在数据库中
def is_title_in_table(title):
    res = cur.execute("SELECT title FROM new_book WHERE title='" + title + "'")
    return res.fetchone() is None


# 过滤数据
def clear_data():
    cur.execute("SELECT * FROM book");
    row_all = cur.fetchall()
    for row in row_all:
        url = row[0]
        title = row[1]
        pt_next = row[2]
        print(title);
        if is_title_in_table(title):
            all_row = get_more_data(title)
            centent = ''
            for text in all_row:
                centent = centent + text[3] + '\n'
            str = title + '\n' + centent;
            with open('data.txt', 'a') as f:  # 设置文件对象
                f.write(str)  # 将字符串写入文件中
            inset_new_book(url=url, title=title, pt_next=pt_next)

clear_data()