Spaces:
Running
Running
File size: 611 Bytes
94b06be 9710aee 94b06be 9710aee 94b06be 9710aee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
import httpx
from bs4 import BeautifulSoup
from settings import USER_AGENT
class FetchTools:
async def get_text_from_url(self, url: str) -> str:
headers = {"User-Agent": USER_AGENT}
async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:
r = await client.get(url, headers=headers)
r.raise_for_status()
html = r.text
soup = BeautifulSoup(html, "html.parser")
for tag in soup(["script", "style", "noscript"]):
tag.decompose()
text = " ".join(soup.get_text(separator=" ").split())
return text
|