import sys import json import os from requests_html import HTMLSession if os.path.exists("/usr/bin/chromium"): os.environ['PYPPETEER_CHROMIUM_REVISION'] = '/usr/bin/chromium' def scrape_website(url: str) -> str: session = HTMLSession() try: res = session.get(url, timeout=15) res.html.render(timeout=20) text = " ".join(res.html.text.split()) return {"text": text[:8000]} except Exception as e: return {"error": f"Scraping failed for {url}: {e}"} finally: session.close() if __name__ == "__main__": if len(sys.argv) < 2: print(json.dumps({"error": "No URL provided"})) sys.exit(1) url = sys.argv[1] result = scrape_website(url) if "error" in result: print(result["error"], file=sys.stderr) sys.exit(1) else: print(json.dumps(result)) sys.exit(0)