File size: 611 Bytes
94b06be
9710aee
94b06be
 
 
 
9710aee
 
 
94b06be
9710aee
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import httpx
from bs4 import BeautifulSoup
from settings import USER_AGENT

class FetchTools:
    async def get_text_from_url(self, url: str) -> str:
        headers = {"User-Agent": USER_AGENT}
        async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:
            r = await client.get(url, headers=headers)
            r.raise_for_status()
            html = r.text

        soup = BeautifulSoup(html, "html.parser")
        for tag in soup(["script", "style", "noscript"]):
            tag.decompose()
        text = " ".join(soup.get_text(separator=" ").split())
        return text