Hana Celeste commited on
Commit
4cd5dc8
·
verified ·
1 Parent(s): e3c24ac

Update app/fetch.py

Browse files
Files changed (1) hide show
  1. app/fetch.py +39 -13
app/fetch.py CHANGED
@@ -1,14 +1,14 @@
1
  import random
2
  import json
 
3
  from fastapi import HTTPException
4
- from playwright.async_api import async_playwright
5
 
6
 
7
  UAS = [
8
  # Windows Chrome
9
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
10
  "(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
11
-
12
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
13
  "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
14
 
@@ -40,11 +40,11 @@ class Fetcher:
40
  if self.pw:
41
  await self.pw.stop()
42
 
43
- async def fetch(self, url: str):
44
- ua = random.choice(UAS)
45
 
46
- context = await self.browser.new_context(
47
- user_agent=ua,
 
48
  locale="en-US",
49
  timezone_id="Asia/Ho_Chi_Minh",
50
  extra_http_headers={
@@ -53,17 +53,20 @@ class Fetcher:
53
  },
54
  )
55
 
 
 
56
  page = await context.new_page()
57
 
58
  try:
59
- # warm homepage (bắt buộc với ihentai.dog)
60
  await page.goto(
61
  "https://ihentai.dog/",
62
  wait_until="domcontentloaded",
63
  timeout=15000,
64
  )
 
65
 
66
- # load API như browser thật
67
  resp = await page.goto(
68
  url,
69
  wait_until="domcontentloaded",
@@ -74,15 +77,38 @@ class Fetcher:
74
  raise HTTPException(500, "No response")
75
 
76
  status = resp.status
77
- text = await resp.text()
 
 
 
 
 
78
 
79
- if status != 200:
80
- raise HTTPException(status, "Fetch failed")
 
 
 
 
81
 
 
82
  try:
83
- return json.loads(text)
84
  except Exception:
85
- return {"raw": text}
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  finally:
88
  await page.close()
 
1
  import random
2
  import json
3
+ import asyncio
4
  from fastapi import HTTPException
5
+ from playwright.async_api import async_playwright, TimeoutError
6
 
7
 
8
  UAS = [
9
  # Windows Chrome
10
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
11
  "(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
 
12
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
13
  "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
14
 
 
40
  if self.pw:
41
  await self.pw.stop()
42
 
43
+ # =============================
 
44
 
45
+ async def _new_context(self):
46
+ return await self.browser.new_context(
47
+ user_agent=random.choice(UAS),
48
  locale="en-US",
49
  timezone_id="Asia/Ho_Chi_Minh",
50
  extra_http_headers={
 
53
  },
54
  )
55
 
56
+ async def fetch(self, url: str, retry: int = 1):
57
+ context = await self._new_context()
58
  page = await context.new_page()
59
 
60
  try:
61
+ # ===== CF WARM UP =====
62
  await page.goto(
63
  "https://ihentai.dog/",
64
  wait_until="domcontentloaded",
65
  timeout=15000,
66
  )
67
+ await page.wait_for_timeout(1200) # CF settle
68
 
69
+ # ===== REAL NAVIGATION =====
70
  resp = await page.goto(
71
  url,
72
  wait_until="domcontentloaded",
 
77
  raise HTTPException(500, "No response")
78
 
79
  status = resp.status
80
+ headers = resp.headers
81
+ body = await resp.text()
82
+
83
+ # ===== STATUS HANDLE =====
84
+ if status >= 400:
85
+ raise HTTPException(status, "Upstream error")
86
 
87
+ if not body:
88
+ return {
89
+ "status": status,
90
+ "headers": headers,
91
+ "data": None,
92
+ }
93
 
94
+ # ===== JSON SAFE PARSE =====
95
  try:
96
+ data = json.loads(body)
97
  except Exception:
98
+ data = body
99
+
100
+ return {
101
+ "status": status,
102
+ "headers": headers,
103
+ "data": data,
104
+ }
105
+
106
+ except TimeoutError:
107
+ if retry > 0:
108
+ await page.close()
109
+ await context.close()
110
+ return await self.fetch(url, retry=retry - 1)
111
+ raise HTTPException(504, "Timeout")
112
 
113
  finally:
114
  await page.close()