ports/misc/py-aider-chat/files/patch-aider_scrape.py

--- aider/scrape.py.orig	2024-05-13 18:19:39 UTC
+++ aider/scrape.py
@@ -6,7 +6,6 @@ from bs4 import BeautifulSoup
 import httpx
 import pypandoc
 from bs4 import BeautifulSoup
-from playwright.sync_api import sync_playwright

 from aider import __version__

@@ -42,14 +41,11 @@ class Scraper:
         """
         Scrape a url and turn it into readable markdown.

-        `url` - the URLto scrape.
+        `url` - the URL to scrape.
         """
         self.try_playwright()

-        if self.playwright_available:
-            content = self.scrape_with_playwright(url)
-        else:
-            content = self.scrape_with_httpx(url)
+        content = self.scrape_with_httpx(url)

         if not content:
             return
@@ -62,49 +58,6 @@ class Scraper:
         return content

     # Internals...
-    def scrape_with_playwright(self, url):
-        with sync_playwright() as p:
-            try:
-                browser = p.chromium.launch()
-            except Exception as e:
-                self.playwright_available = False
-                self.print_error(e)
-                return
-
-            page = browser.new_page()
-
-            user_agent = page.evaluate("navigator.userAgent")
-            user_agent = user_agent.replace("Headless", "")
-            user_agent = user_agent.replace("headless", "")
-            user_agent += " " + aider_user_agent
-
-            page = browser.new_page(user_agent=user_agent)
-            page.goto(url)
-            content = page.content()
-            browser.close()
-
-        return content
-
-    def try_playwright(self):
-        if self.playwright_available is not None:
-            return
-
-        with sync_playwright() as p:
-            try:
-                p.chromium.launch()
-                self.playwright_available = True
-            except Exception:
-                self.playwright_available = False
-
-    def get_playwright_instructions(self):
-        if self.playwright_available in (True, None):
-            return
-        if self.playwright_instructions_shown:
-            return
-
-        self.playwright_instructions_shown = True
-        return PLAYWRIGHT_INFO
-
     def scrape_with_httpx(self, url):
         headers = {"User-Agent": f"Mozilla./5.0 ({aider_user_agent})"}
         try: