04-page.js 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. const fs = require("fs");
  2. const puppeteer = require('puppeteer');
  3. const utils = require('./utils/base');
  4. var $xindaming = require('./xindaming');
  5. const len = $xindaming.length;
  6. let index = 0;
  7. const createPage = async (url) => {
  8. const browser = await puppeteer.launch({
  9. args: ['--no-sandbox', '--disable-setuid-sandbox'],
  10. headless: true, //默认为true(无头),不显示浏览器界面
  11. // slowMo: 200,
  12. });
  13. const page = (await browser.pages())[0]; //这是我的写法,只有一个tab
  14. await page.goto(url); //跳转到掘金
  15. const result = await page.evaluate(() => {
  16. return new Promise(resolve => {
  17. let content = {
  18. title: document.getElementsByTagName('h1')[0].innerText,
  19. page: document.getElementsByClassName('main1')[1].getElementsByTagName('p')[0].innerText
  20. };
  21. resolve(content);
  22. });
  23. });
  24. await browser.close(); //关闭浏览器
  25. return result;
  26. };
  27. forEachUrl();
  28. function forEachUrl() {
  29. if ($xindaming[index] !== undefined) {
  30. console.clear();
  31. console.log(`还剩${len - index}; 当前进度:${$xindaming[index].index} ${$xindaming[index].title}`);
  32. createPage($xindaming[index].href).then(res => {
  33. return utils.page($xindaming[index], res);
  34. }).then(res => {
  35. fs.writeFileSync('./book/OEBPS/Text/text' + $xindaming[index].index + '.xhtml', res);
  36. setTimeout(() => {
  37. index += 1;
  38. forEachUrl();
  39. }, 200);
  40. });
  41. }
  42. }