File size: 1,115 Bytes
4e5c781
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import httpx
from bs4 import BeautifulSoup
import urllib.parse

def test_scrape():
    query = "IPL 2026 Live Scores"
    url = f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
    }
    r = httpx.get(url, headers=headers)
    print("STATUS CODE:", r.status_code)
    
    soup = BeautifulSoup(r.text, 'html.parser')
    
    # Let's inspect divs or tables
    results = soup.find_all('div', class_='result')
    print("Found 'div' with class 'result':", len(results))
    
    # If len(results) is 0, let's print all class names of divs or dump some html snippet
    if len(results) == 0:
        print("HTML Snippet (first 1000 chars):")
        print(r.text[:1000])
        
        # Let's look for link tags
        links = soup.find_all('a')
        print("Total anchor links:", len(links))
        for l in links[:10]:
            print("Anchor:", l.get('class'), l.get('href'), l.text[:30])

if __name__ == "__main__":
    test_scrape()