代码之家  ›  专栏  ›  技术社区  ›  ssp singh

使用Puppeter为URL列表截图。js公司

  •  0
  • ssp singh  · 技术社区  · 6 年前

    假设我有一个URL列表或SVG文件的完整路径,现在我想为每个URL逐个截图。

    这是测试代码,我用它一个接一个地截图,但它并没有按预期工作!

    这段代码只需为所有URL启动一次headless chrome页面实例和节点。js抛出此错误

    (节点:3412)MaxListenerSexceedawranging:可能的EventEmitter内存 检测到泄漏。添加了11个LifecycleeEvent侦听器。使用 发射器。setMaxListeners()以增加限制

    但我想一个接一个地截图。

    'use strict';
    
    const fs        = require('fs');
    const glob      = require('glob');
    const validUrl  = require("valid-url")
    const puppeteer = require('puppeteer');
    const devices   = require('puppeteer/DeviceDescriptors');
    const iPhone    = devices['iPhone 6'];
    
    /**
     * Loading Application Config
     */
    const inputUrl = 'http://www.google.com';
    
    /**
     * Using Puppeteer.js
     */
    console.log('-- Trying to Launch Puppeteer');
    const browser = puppeteer.launch({
        headless: true
    }).then(function(browserObj)
    {
        console.log('-- Trying to Open New Page');
        browserObj.newPage().then(function(pageObj)
        {
            async function closeHeadlesssChrome(browserObj) {
                console.log('-- Trying to Close Chome Headless Window');
                await browserObj.close();
            }
    
            async function setChromeViewport(pageObj) {
                console.log('-- Trying to Update page viewPort');
                await pageObj.setViewport({
                    width: 1366,
                    height: 738,
                    deviceScaleFactor: 1,
                    isMobile: false,
                    hasTouch: false,
                    isLandscape: false
                });
            }
    
            var takeScreenshot = async function(pageObj, srcUrl) {
                console.log('-- Trying to Load Web Page ' + srcUrl);
                await pageObj.goto(srcUrl);
    
                console.log('-- Trying to Take Screenshot');
                await pageObj.screenshot({
                    path: srcUrl + '.png',
                    clip: {
                        x: 0,
                        y: 0,
                        width: 795,
                        height: 1125
                    }
                })
            }
    
    
            // Input or Source Url
            const inputUrl = "C:/Users/ssp/Music/BR PUBLIC INTER COLLEGE";
            var matchedFiles = [];
    
            // Check if given Url/Path exists
            if (fs.existsSync(inputUrl)) 
            {
                const inputUrlObj = fs.statSync(inputUrl);
                if (inputUrlObj.isDirectory()) 
                {
                    matchedFiles = glob.GlobSync(inputUrl + '/**/*.svg').found;
                } 
                else if (inputUrlObj.isFile()) 
                {
                    matchedFiles.push(inputUrl );
                }
            } 
            else 
            {
                console.log('-- Input Url not exists')
                return closeHeadlesssChrome(browserObj);
            }
    
            setChromeViewport(pageObj);
    
            matchedFiles.map(function(srcUrl){
                takeScreenshot(pageObj, srcUrl);
            });
        });
    });
    

    谢谢

    1 回复  |  直到 6 年前
        1
  •  2
  •   Md. Abu Taher    6 年前

    使用 for..of 具有 async-await 而不是 .map 地图 不暂停执行,但 await

    browserObj.newPage().then(async function(pageObj) { // <-- turn the main function into async function
     // ... many lines later
     for(let srcUrl of matchedFiles){
      await takeScreenshot(pageObj, srcUrl);
     }
    

    旁注:由于您的所有函数都是相互独立的,也许您可以将它们移出块并在调用browserObj之前声明它们。newPage()

    这是重构后的代码,如果有问题请原谅,但你明白了。

    "use strict";
    
    const fs = require("fs");
    const glob = require("glob");
    const validUrl = require("valid-url");
    const puppeteer = require("puppeteer");
    const devices = require("puppeteer/DeviceDescriptors");
    const iPhone = devices["iPhone 6"];
    
    /**
     * Loading Application Config
     */
    const inputUrl = "http://www.google.com";
    
    /**
     * Controller functions
     */
    
    async function closeHeadlesssChrome(browserObj) {
      console.log("-- Trying to Close Chome Headless Window");
      await browserObj.close();
    }
    
    async function setChromeViewport(pageObj) {
      console.log("-- Trying to Update page viewPort");
      await pageObj.setViewport({
        width: 1366,
        height: 738,
        deviceScaleFactor: 1,
        isMobile: false,
        hasTouch: false,
        isLandscape: false
      });
    }
    
    var takeScreenshot = async function(pageObj, srcUrl) {
      console.log("-- Trying to Load Web Page " + srcUrl);
      await pageObj.goto(srcUrl);
    
      console.log("-- Trying to Take Screenshot");
      await pageObj.screenshot({
        path: srcUrl + ".png",
        clip: {
          x: 0,
          y: 0,
          width: 795,
          height: 1125
        }
      });
    };
    
    function getURLList() {
      // Input or Source Url
      const inputUrl = "C:/Users/ssp/Music/BR PUBLIC INTER COLLEGE";
      var matchedFiles = [];
    
      // Check if given Url/Path exists
      if (fs.existsSync(inputUrl)) {
        const inputUrlObj = fs.statSync(inputUrl);
        if (inputUrlObj.isDirectory()) {
          matchedFiles = glob.GlobSync(inputUrl + "/**/*.svg").found;
        } else if (inputUrlObj.isFile()) {
          matchedFiles.push(inputUrl);
        }
        return matchedFiles;
      }
    }
    
    /**
     * Using Puppeteer.js
     */
    
    (async () => {
      // get url list
      const matchedFiles = getURLList();
      if (!matchedFiles) {
        console.log("-- Input Url not exists");
        // if there is no url, then no need to even launch the browser and waste resources
        return;
      }
    
      console.log("-- Trying to Launch Puppeteer");
      const browserObj = await puppeteer.launch({
        headless: true
      });
    
      console.log('-- Trying to Open New Page');
      const pageObj = await browserObj.newPage();
    
      console.log('-- Change Viewport');
      await setChromeViewport(pageObj);
    
      console.log('-- Run thru the url list');
      for (const srcUrl of matchedFiles) {
        await takeScreenshot(pageObj, srcUrl);
      }
    })();