I can't see to figure out why Pyppetter is hanging in headless mode, but
if headless mode is turned off the script works perfectly.
When I say hanging, nothing is happening (even waited 5 mins).
Nothing is printed, no errors etc.
import requests
from bs4 import BeautifulSoup
import time
import os
import pyppeteer
from pyppeteer import launch
import asyncio
import subprocess
AGENT_DIR = os.path.dirname(__file__) + r'agents'
URL = 'https://techblog.willshouse.com/2012/01/03/most-common-user-agents/'
def get_latest_agents():
async def scrape():
url = URL
browser = await launch(headless = True)
page = await browser.newPage()
#await page.goto(url, {'waitUntil': 'networkidle0'})
await page.goto(url)
await page.waitFor(9000)
content = await page.content()
soup = BeautifulSoup(content, 'html.parser')
agents = soup.select('.get-the-list')[0].text
agents = agents.split('
')
print(agents)
browser.close()
loop = asyncio.get_event_loop()
response = loop.run_until_complete(scrape())
if __name__ == '__main__':
subprocess.call(['taskkill', '/F', '/im', 'chrome.exe'])
get_latest_agents()
question from:
https://stackoverflow.com/questions/65894850/pyppetter-hangs-when-in-headless-mode 与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…