ports/misc/py-aider-chat/files/patch-aider_scrape.py
Dave Cottlehuber eeff19a86c misc/py-aider-chat: new port - AI pair programming in your terminal
Sponsored by:	SkunkWerks, GmbH
2024-05-22 17:11:40 +00:00

77 lines
2.3 KiB
Python

--- aider/scrape.py.orig 2024-05-13 18:19:39 UTC
+++ aider/scrape.py
@@ -6,7 +6,6 @@ from bs4 import BeautifulSoup
import httpx
import pypandoc
from bs4 import BeautifulSoup
-from playwright.sync_api import sync_playwright
from aider import __version__
@@ -42,14 +41,11 @@ class Scraper:
"""
Scrape a url and turn it into readable markdown.
- `url` - the URLto scrape.
+ `url` - the URL to scrape.
"""
self.try_playwright()
- if self.playwright_available:
- content = self.scrape_with_playwright(url)
- else:
- content = self.scrape_with_httpx(url)
+ content = self.scrape_with_httpx(url)
if not content:
return
@@ -62,49 +58,6 @@ class Scraper:
return content
# Internals...
- def scrape_with_playwright(self, url):
- with sync_playwright() as p:
- try:
- browser = p.chromium.launch()
- except Exception as e:
- self.playwright_available = False
- self.print_error(e)
- return
-
- page = browser.new_page()
-
- user_agent = page.evaluate("navigator.userAgent")
- user_agent = user_agent.replace("Headless", "")
- user_agent = user_agent.replace("headless", "")
- user_agent += " " + aider_user_agent
-
- page = browser.new_page(user_agent=user_agent)
- page.goto(url)
- content = page.content()
- browser.close()
-
- return content
-
- def try_playwright(self):
- if self.playwright_available is not None:
- return
-
- with sync_playwright() as p:
- try:
- p.chromium.launch()
- self.playwright_available = True
- except Exception:
- self.playwright_available = False
-
- def get_playwright_instructions(self):
- if self.playwright_available in (True, None):
- return
- if self.playwright_instructions_shown:
- return
-
- self.playwright_instructions_shown = True
- return PLAYWRIGHT_INFO
-
def scrape_with_httpx(self, url):
headers = {"User-Agent": f"Mozilla./5.0 ({aider_user_agent})"}
try: