Spaces:
Sleeping
Sleeping
| import httpx | |
| from bs4 import BeautifulSoup | |
| from settings import USER_AGENT | |
| class FetchTools: | |
| async def get_text_from_url(self, url: str) -> str: | |
| headers = {"User-Agent": USER_AGENT} | |
| async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client: | |
| r = await client.get(url, headers=headers) | |
| r.raise_for_status() | |
| html = r.text | |
| soup = BeautifulSoup(html, "html.parser") | |
| for tag in soup(["script", "style", "noscript"]): | |
| tag.decompose() | |
| text = " ".join(soup.get_text(separator=" ").split()) | |
| return text | |