File size: 1,595 Bytes
0ec1bb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import requests
from bs4 import BeautifulSoup
import json
import time

SERPAPI_KEY = "c3e3e8fd8d12ca55d8a8954a14bf827f2d4261ef55373b381661f23b1440a2af"  # Replace with your SerpAPI key

def google_search(query, num_results=1000):
    url = "https://serpapi.com/search"
    params = {
        "engine": "google",
        "q": query,
        "api_key": SERPAPI_KEY,
        "num": num_results
    }
    resp = requests.get(url, params=params)
    data = resp.json()
    links = []
    for result in data.get("organic_results", []):
        link = result.get("link")
        if link:
            links.append(link)
    return links

def scrape_page(url):
    try:
        resp = requests.get(url, timeout=10, headers={"User-Agent": "Mozilla/5.0"})
        soup = BeautifulSoup(resp.content, "html.parser")
        text = soup.get_text(separator="\n", strip=True)
        return text[:20000]  # Limit to first 2000 chars for brevity
    except Exception as e:
        return f"[SCRAPE ERROR] {e}"

def scrape_topic_and_save(topic, filename="results.json"):
    links = google_search(topic)
    results = []
    for url in links:
        print(f"Scraping: {url}")
        content = scrape_page(url)
        results.append({"url": url, "content": content})
        time.sleep(0)
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=2)
    print(f"Saved {len(results)} results to {filename}")

if __name__ == "__main__":
    topic = input("Enter topic to search: ")
    scrape_topic_and_save(topic)