Spaces:
Running
Running
File size: 1,115 Bytes
4e5c781 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | import httpx
from bs4 import BeautifulSoup
import urllib.parse
def test_scrape():
query = "IPL 2026 Live Scores"
url = f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
r = httpx.get(url, headers=headers)
print("STATUS CODE:", r.status_code)
soup = BeautifulSoup(r.text, 'html.parser')
# Let's inspect divs or tables
results = soup.find_all('div', class_='result')
print("Found 'div' with class 'result':", len(results))
# If len(results) is 0, let's print all class names of divs or dump some html snippet
if len(results) == 0:
print("HTML Snippet (first 1000 chars):")
print(r.text[:1000])
# Let's look for link tags
links = soup.find_all('a')
print("Total anchor links:", len(links))
for l in links[:10]:
print("Anchor:", l.get('class'), l.get('href'), l.text[:30])
if __name__ == "__main__":
test_scrape()
|