ports/misc/py-aider-chat/files/patch-aider_scrape.py

--- aider/scrape.py.orig	2024-09-09 10:28:04 UTC
+++ aider/scrape.py
@@ -15,57 +15,8 @@ def install_playwright(io):


 def install_playwright(io):
-    try:
-        from playwright.sync_api import sync_playwright
+    return False

-        has_pip = True
-    except ImportError:
-        has_pip = False
-
-    try:
-        with sync_playwright() as p:
-            p.chromium.launch()
-            has_chromium = True
-    except Exception:
-        has_chromium = False
-
-    if has_pip and has_chromium:
-        return True
-
-    pip_cmd = utils.get_pip_install(["aider-chat[playwright]"])
-    chromium_cmd = "-m playwright install --with-deps chromium"
-    chromium_cmd = [sys.executable] + chromium_cmd.split()
-
-    cmds = ""
-    if not has_pip:
-        cmds += " ".join(pip_cmd) + "\n"
-    if not has_chromium:
-        cmds += " ".join(chromium_cmd) + "\n"
-
-    text = f"""For the best web scraping, install Playwright:
-
-{cmds}
-See {urls.enable_playwright} for more info.
-"""
-
-    io.tool_output(text)
-    if not io.confirm_ask("Install playwright?", default="y"):
-        return
-
-    if not has_pip:
-        success, output = utils.run_install(pip_cmd)
-        if not success:
-            io.tool_error(output)
-            return
-
-    success, output = utils.run_install(chromium_cmd)
-    if not success:
-        io.tool_error(output)
-        return
-
-    return True
-
-
 class Scraper:
     pandoc_available = None
     playwright_available = None
@@ -82,7 +33,7 @@ class Scraper:
         else:
             self.print_error = print

-        self.playwright_available = playwright_available
+        self.playwright_available = False
         self.verify_ssl = verify_ssl

     def scrape(self, url):
@@ -93,10 +44,7 @@ class Scraper:
         `url` - the URL to scrape.
         """

-        if self.playwright_available:
-            content, mime_type = self.scrape_with_playwright(url)
-        else:
-            content, mime_type = self.scrape_with_httpx(url)
+        content, mime_type = self.scrape_with_httpx(url)

         if not content:
             self.print_error(f"Failed to retrieve content from {url}")
@@ -130,56 +78,6 @@ class Scraper:
         return False

     # Internals...
-    def scrape_with_playwright(self, url):
-        import playwright  # noqa: F401
-        from playwright.sync_api import Error as PlaywrightError
-        from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
-        from playwright.sync_api import sync_playwright
-
-        with sync_playwright() as p:
-            try:
-                browser = p.chromium.launch()
-            except Exception as e:
-                self.playwright_available = False
-                self.print_error(str(e))
-                return None, None
-
-            try:
-                context = browser.new_context(ignore_https_errors=not self.verify_ssl)
-                page = context.new_page()
-
-                user_agent = page.evaluate("navigator.userAgent")
-                user_agent = user_agent.replace("Headless", "")
-                user_agent = user_agent.replace("headless", "")
-                user_agent += " " + aider_user_agent
-
-                page.set_extra_http_headers({"User-Agent": user_agent})
-
-                response = None
-                try:
-                    response = page.goto(url, wait_until="networkidle", timeout=5000)
-                except PlaywrightTimeoutError:
-                    self.print_error(f"Timeout while loading {url}")
-                except PlaywrightError as e:
-                    self.print_error(f"Error navigating to {url}: {str(e)}")
-                    return None, None
-
-                try:
-                    content = page.content()
-                    mime_type = None
-                    if response:
-                        content_type = response.header_value("content-type")
-                        if content_type:
-                            mime_type = content_type.split(";")[0]
-                except PlaywrightError as e:
-                    self.print_error(f"Error retrieving page content: {str(e)}")
-                    content = None
-                    mime_type = None
-            finally:
-                browser.close()
-
-        return content, mime_type
-
     def scrape_with_httpx(self, url):
         import httpx