test5.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. import requests
  2. import scrapy
  3. import re
  4. import sqlite3
  5. import time
  6. # 链接数据库
  7. con = sqlite3.connect("轮回乐园.db")
  8. cur = con.cursor()
  9. # 合并数据
  10. def dowload():
  11. cur.execute("SELECT * FROM book");
  12. row = cur.fetchone()
  13. while row is not None:
  14. row = cur.fetchone()
  15. print(row[0])
  16. str = row[1] + row[3] + '\n'
  17. with open('data.txt', 'a') as f: # 设置文件对象
  18. f.write(str) # 将字符串写入文件中
  19. time.sleep(2);
  20. # dowload()
  21. # 判读数据表是否存在
  22. def is_table(table_name):
  23. res = cur.execute("SELECT name FROM sqlite_master WHERE name='" + table_name + "'")
  24. return res.fetchone() is None
  25. # 创建文章主题表
  26. if is_table('new_book'):
  27. cur.execute("CREATE TABLE new_book(url, title, pt_next)")
  28. # 获取批量数据
  29. def get_more_data(title):
  30. res = cur.execute("SELECT * FROM book WHERE title='" + title + "'")
  31. return res.fetchall()
  32. # 链接数据库
  33. con = sqlite3.connect("轮回乐园.db")
  34. cur = con.cursor()
  35. # 记录已经处理过的数据
  36. def inset_new_book(url, title, pt_next):
  37. cur.executemany("INSERT INTO new_book VALUES(?, ?, ?)", [(url, title, pt_next)])
  38. con.commit() # Remember to commit the transaction after executing INSERT.
  39. # 判断当前title是否存在数据库中
  40. def is_title_in_table(title):
  41. res = cur.execute("SELECT title FROM new_book WHERE title='" + title + "'")
  42. return res.fetchone() is None
  43. # 过滤数据
  44. def clear_data():
  45. cur.execute("SELECT * FROM book");
  46. row_all = cur.fetchall()
  47. for row in row_all:
  48. url = row[0]
  49. title = row[1]
  50. pt_next = row[2]
  51. print(title);
  52. if is_title_in_table(title):
  53. all_row = get_more_data(title)
  54. centent = ''
  55. for text in all_row:
  56. centent = centent + text[3] + '\n'
  57. str = title + '\n' + centent;
  58. with open('data.txt', 'a') as f: # 设置文件对象
  59. f.write(str) # 将字符串写入文件中
  60. inset_new_book(url=url, title=title, pt_next=pt_next)
  61. clear_data()