Spaces:
Running
Running
| import sys | |
| import json | |
| import os | |
| from requests_html import HTMLSession | |
| if os.path.exists("/usr/bin/chromium"): | |
| os.environ['PYPPETEER_CHROMIUM_REVISION'] = '/usr/bin/chromium' | |
| def scrape_website(url: str) -> str: | |
| session = HTMLSession() | |
| try: | |
| res = session.get(url, timeout=15) | |
| res.html.render(timeout=20) | |
| text = " ".join(res.html.text.split()) | |
| return {"text": text[:8000]} | |
| except Exception as e: | |
| return {"error": f"Scraping failed for {url}: {e}"} | |
| finally: | |
| session.close() | |
| if __name__ == "__main__": | |
| if len(sys.argv) < 2: | |
| print(json.dumps({"error": "No URL provided"})) | |
| sys.exit(1) | |
| url = sys.argv[1] | |
| result = scrape_website(url) | |
| if "error" in result: | |
| print(result["error"], file=sys.stderr) | |
| sys.exit(1) | |
| else: | |
| print(json.dumps(result)) | |
| sys.exit(0) | |