Spaces:

snakeeee
/

scholar-rag-engine

Sleeping

Initial commit - Scholar RAG Engine

1505bbf about 1 month ago

399 Bytes

	import requests
	from bs4 import BeautifulSoup

	def scrape_url(url):

	headers = {"User-Agent":"Mozilla/5.0"}

	r = requests.get(url, headers=headers)

	soup = BeautifulSoup(r.text,"html.parser")

	elements = soup.find_all(["h1","h2","h3","p","li"])

	text = " ".join(
	el.get_text(strip=True)
	for el in elements
	if el.get_text(strip=True)
	)

	return text