import requests import scrapy import re import sqlite3 import time # 链接数据库 con = sqlite3.connect("轮回乐园.db") cur = con.cursor() # 合并数据 def dowload(): cur.execute("SELECT * FROM book"); row = cur.fetchone() while row is not None: row = cur.fetchone() print(row[0]) str = row[1] + row[3] + '\n' with open('data.txt', 'a') as f: # 设置文件对象 f.write(str) # 将字符串写入文件中 time.sleep(2); # dowload() # 判读数据表是否存在 def is_table(table_name): res = cur.execute("SELECT name FROM sqlite_master WHERE name='" + table_name + "'") return res.fetchone() is None # 创建文章主题表 if is_table('new_book'): cur.execute("CREATE TABLE new_book(url, title, pt_next)") # 获取批量数据 def get_more_data(title): res = cur.execute("SELECT * FROM book WHERE title='" + title + "'") return res.fetchall() # 链接数据库 con = sqlite3.connect("轮回乐园.db") cur = con.cursor() # 记录已经处理过的数据 def inset_new_book(url, title, pt_next): cur.executemany("INSERT INTO new_book VALUES(?, ?, ?)", [(url, title, pt_next)]) con.commit() # Remember to commit the transaction after executing INSERT. # 判断当前title是否存在数据库中 def is_title_in_table(title): res = cur.execute("SELECT title FROM new_book WHERE title='" + title + "'") return res.fetchone() is None # 过滤数据 def clear_data(): cur.execute("SELECT * FROM book"); row_all = cur.fetchall() for row in row_all: url = row[0] title = row[1] pt_next = row[2] print(title); if is_title_in_table(title): all_row = get_more_data(title) centent = '' for text in all_row: centent = centent + text[3] + '\n' str = title + '\n' + centent; with open('data.txt', 'a') as f: # 设置文件对象 f.write(str) # 将字符串写入文件中 inset_new_book(url=url, title=title, pt_next=pt_next) clear_data()