File size: 895 Bytes
723bbe6
 
925c4eb
 
723bbe6
925c4eb
 
723bbe6
 
 
 
 
 
 
91df0bf
723bbe6
91df0bf
723bbe6
 
 
 
 
 
 
 
 
 
9cdbd5b
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import sys
import json
import os

from requests_html import HTMLSession
if os.path.exists("/usr/bin/chromium"):
    os.environ['PYPPETEER_CHROMIUM_REVISION'] = '/usr/bin/chromium'

def scrape_website(url: str) -> str:
    session = HTMLSession()
    try:
        res = session.get(url, timeout=15)
        res.html.render(timeout=20)
        text = " ".join(res.html.text.split())
        return {"text": text[:8000]}
    except Exception as e:
        return {"error": f"Scraping failed for {url}: {e}"}
    finally:
        session.close()

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print(json.dumps({"error": "No URL provided"}))
        sys.exit(1)
    
    url = sys.argv[1]
    result = scrape_website(url)
    if "error" in result:
        print(result["error"], file=sys.stderr)
        sys.exit(1)
    else:
        print(json.dumps(result))
        sys.exit(0)