Hana Celeste commited on
Update app/fetch.py
Browse files- app/fetch.py +12 -45
app/fetch.py
CHANGED
|
@@ -1,53 +1,20 @@
|
|
| 1 |
-
import
|
| 2 |
-
from fastapi import HTTPException
|
| 3 |
-
from urllib.parse import quote_plus
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
SCRAPINGANT_KEY = "fdd15bd8d6f042f893d8bb93a8e47722"
|
| 7 |
-
SCRAPINGANT_ENDPOINT = "https://api.scrapingant.com/v2/general"
|
| 8 |
|
| 9 |
|
| 10 |
class Fetcher:
|
|
|
|
|
|
|
|
|
|
| 11 |
async def start(self):
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
| 15 |
async def stop(self):
|
| 16 |
-
|
|
|
|
|
|
|
| 17 |
|
| 18 |
async def fetch(self, url: str):
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
"
|
| 22 |
-
"proxy_type": "residential",
|
| 23 |
-
"proxy_country": "VN",
|
| 24 |
-
"browser": "false",
|
| 25 |
-
}
|
| 26 |
-
|
| 27 |
-
try:
|
| 28 |
-
async with httpx.AsyncClient(timeout=20) as client:
|
| 29 |
-
resp = await client.get(
|
| 30 |
-
SCRAPINGANT_ENDPOINT,
|
| 31 |
-
params=params,
|
| 32 |
-
headers={
|
| 33 |
-
"Accept": "application/json, text/plain, */*",
|
| 34 |
-
"User-Agent": "Mozilla/5.0",
|
| 35 |
-
},
|
| 36 |
-
)
|
| 37 |
-
|
| 38 |
-
if resp.status_code != 200:
|
| 39 |
-
raise HTTPException(
|
| 40 |
-
status_code=resp.status_code,
|
| 41 |
-
detail="Upstream error",
|
| 42 |
-
)
|
| 43 |
-
|
| 44 |
-
text = resp.text.strip()
|
| 45 |
-
|
| 46 |
-
# ScrapingAnt trả JSON thẳng nếu API
|
| 47 |
-
try:
|
| 48 |
-
return resp.json()
|
| 49 |
-
except Exception:
|
| 50 |
-
return {"raw": text}
|
| 51 |
-
|
| 52 |
-
except httpx.RequestError as e:
|
| 53 |
-
raise HTTPException(502, f"Proxy error: {str(e)}")
|
|
|
|
| 1 |
+
import aiohttp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
class Fetcher:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
self.session = None
|
| 7 |
+
|
| 8 |
async def start(self):
|
| 9 |
+
if not self.session:
|
| 10 |
+
self.session = aiohttp.ClientSession()
|
| 11 |
|
| 12 |
async def stop(self):
|
| 13 |
+
if self.session:
|
| 14 |
+
await self.session.close()
|
| 15 |
+
self.session = None
|
| 16 |
|
| 17 |
async def fetch(self, url: str):
|
| 18 |
+
async with self.session.get(url, timeout=30) as resp:
|
| 19 |
+
text = await resp.text()
|
| 20 |
+
return {"ok": True, "status": resp.status, "data": text}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|