Spaces:

Vijayadhith7
/

AURA-Backend

Running

AURA-Backend / python_backend /scratch_scrape_test.py

Upload 16 files

4e5c781 verified 9 days ago

1.12 kB

	import httpx
	from bs4 import BeautifulSoup
	import urllib.parse

	def test_scrape():
	query = "IPL 2026 Live Scores"
	url = f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}"
	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
	}
	r = httpx.get(url, headers=headers)
	print("STATUS CODE:", r.status_code)

	soup = BeautifulSoup(r.text, 'html.parser')

	# Let's inspect divs or tables
	results = soup.find_all('div', class_='result')
	print("Found 'div' with class 'result':", len(results))

	# If len(results) is 0, let's print all class names of divs or dump some html snippet
	if len(results) == 0:
	print("HTML Snippet (first 1000 chars):")
	print(r.text[:1000])

	# Let's look for link tags
	links = soup.find_all('a')
	print("Total anchor links:", len(links))
	for l in links[:10]:
	print("Anchor:", l.get('class'), l.get('href'), l.text[:30])

	if __name__ == "__main__":
	test_scrape()