Spaces:
Running
Running
| import httpx | |
| from bs4 import BeautifulSoup | |
| import urllib.parse | |
| def test_scrape(): | |
| query = "IPL 2026 Live Scores" | |
| url = f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}" | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" | |
| } | |
| r = httpx.get(url, headers=headers) | |
| print("STATUS CODE:", r.status_code) | |
| soup = BeautifulSoup(r.text, 'html.parser') | |
| # Let's inspect divs or tables | |
| results = soup.find_all('div', class_='result') | |
| print("Found 'div' with class 'result':", len(results)) | |
| # If len(results) is 0, let's print all class names of divs or dump some html snippet | |
| if len(results) == 0: | |
| print("HTML Snippet (first 1000 chars):") | |
| print(r.text[:1000]) | |
| # Let's look for link tags | |
| links = soup.find_all('a') | |
| print("Total anchor links:", len(links)) | |
| for l in links[:10]: | |
| print("Anchor:", l.get('class'), l.get('href'), l.text[:30]) | |
| if __name__ == "__main__": | |
| test_scrape() | |