plan291037 commited on
Commit
3f99a4f
·
verified ·
1 Parent(s): 5a7418a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -20
app.py CHANGED
@@ -1,12 +1,14 @@
1
  import os, asyncio, time
2
  from fastapi import FastAPI, HTTPException, Header, Response
3
  from pydantic import BaseModel, HttpUrl
4
- import undetected_chromedriver as uc
5
 
6
  API_KEY = os.getenv("OPENER_API_KEY", "")
7
- PROFILE = "/tmp/uc-profile"
8
  UA = os.getenv("UA", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36")
9
- CHROME_BIN = os.getenv("CHROME_BIN", "/usr/bin/chromium")
 
 
10
 
11
  SIGNS = (
12
  "/sorry/", "consent.google.com", "detected unusual traffic",
@@ -20,6 +22,7 @@ drv = None
20
  queue = None
21
  worker = None
22
  last_status = {"ts": 0, "url": "", "captcha": False, "sig": "", "title": ""}
 
23
 
24
  class OpenPayload(BaseModel):
25
  url: HttpUrl
@@ -33,12 +36,37 @@ def _check_captcha():
33
  return True, s
34
  return False, ""
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  async def _worker():
37
  while True:
38
  url = await queue.get()
39
  try:
40
- await asyncio.to_thread(drv.get, url)
41
- bad, sig = _check_captcha()
42
  last_status.update({"ts": int(time.time()), "url": drv.current_url or url, "captcha": bad, "sig": sig, "title": drv.title or ""})
43
  print("[check]", {"captcha": bad, "sig": sig, "url": last_status["url"]})
44
  print("[open] done", url)
@@ -48,20 +76,18 @@ async def _worker():
48
  @app.on_event("startup")
49
  async def startup():
50
  global drv, queue, worker
51
- os.environ["HOME"] = "/tmp"
52
- os.makedirs("/tmp/.local/share", exist_ok=True)
53
  os.makedirs(PROFILE, exist_ok=True)
54
- o = uc.ChromeOptions()
55
- o.headless = True
56
- o.binary_location = CHROME_BIN
57
- o.add_argument(f"--user-data-dir={PROFILE}")
58
- o.add_argument("--no-sandbox")
59
- o.add_argument("--disable-dev-shm-usage")
60
- o.add_argument("--disable-gpu")
61
- o.add_argument(f"--user-agent={UA}")
62
- o.add_argument("--lang=th-TH")
63
- drv = uc.Chrome(options=o, use_subprocess=True)
64
- drv.get("data:text/html,ready")
65
  queue = asyncio.Queue()
66
  worker = asyncio.create_task(_worker())
67
  print("[startup] browser ready")
@@ -83,8 +109,8 @@ async def health():
83
  async def last():
84
  return last_status
85
 
86
- async def _enqueue(payload: OpenPayload, x_opener_key: str | None):
87
- if API_KEY and x_opener_key != API_KEY:
88
  raise HTTPException(status_code=401, detail="bad key")
89
  print("[recv]", str(payload.url))
90
  await queue.put(str(payload.url))
 
1
  import os, asyncio, time
2
  from fastapi import FastAPI, HTTPException, Header, Response
3
  from pydantic import BaseModel, HttpUrl
4
+ from seleniumbase import Driver
5
 
6
  API_KEY = os.getenv("OPENER_API_KEY", "")
7
+ PROFILE = os.getenv("SB_PROFILE_DIR", os.path.abspath("./.sb-profile"))
8
  UA = os.getenv("UA", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36")
9
+ HEADLESS = os.getenv("HEADLESS", "1") != "0"
10
+ PLS = os.getenv("SB_PAGE_LOAD_STRATEGY", "normal")
11
+ MIN_GAP_MS = int(os.getenv("MIN_GAP_MS", "800"))
12
 
13
  SIGNS = (
14
  "/sorry/", "consent.google.com", "detected unusual traffic",
 
22
  queue = None
23
  worker = None
24
  last_status = {"ts": 0, "url": "", "captcha": False, "sig": "", "title": ""}
25
+ _last_open_ms = 0
26
 
27
  class OpenPayload(BaseModel):
28
  url: HttpUrl
 
36
  return True, s
37
  return False, ""
38
 
39
+ def _warmup():
40
+ drv.get("https://www.google.com/?hl=th")
41
+ time.sleep(1.2)
42
+ drv.set_window_size(1280, 900)
43
+ drv.execute_script("window.scrollTo(0, 300);")
44
+ time.sleep(0.6)
45
+
46
+ def _respect_rate_limit():
47
+ global _last_open_ms
48
+ now = int(time.time() * 1000)
49
+ wait = _last_open_ms + MIN_GAP_MS - now
50
+ if wait > 0:
51
+ time.sleep(wait / 1000)
52
+ _last_open_ms = int(time.time() * 1000)
53
+
54
+ def _open_with_retry(url: str):
55
+ _respect_rate_limit()
56
+ drv.get(url)
57
+ bad, sig = _check_captcha()
58
+ if bad:
59
+ _warmup()
60
+ _respect_rate_limit()
61
+ drv.get(url)
62
+ bad, sig = _check_captcha()
63
+ return bad, sig
64
+
65
  async def _worker():
66
  while True:
67
  url = await queue.get()
68
  try:
69
+ bad, sig = await asyncio.to_thread(_open_with_retry, url)
 
70
  last_status.update({"ts": int(time.time()), "url": drv.current_url or url, "captcha": bad, "sig": sig, "title": drv.title or ""})
71
  print("[check]", {"captcha": bad, "sig": sig, "url": last_status["url"]})
72
  print("[open] done", url)
 
76
  @app.on_event("startup")
77
  async def startup():
78
  global drv, queue, worker
 
 
79
  os.makedirs(PROFILE, exist_ok=True)
80
+ drv = Driver(
81
+ uc=True,
82
+ headless=HEADLESS,
83
+ incognito=False,
84
+ user_data_dir=PROFILE,
85
+ page_load_strategy=PLS,
86
+ agent=UA,
87
+ locale_code="th",
88
+ )
89
+ drv.set_window_size(1280, 900)
90
+ _warmup()
91
  queue = asyncio.Queue()
92
  worker = asyncio.create_task(_worker())
93
  print("[startup] browser ready")
 
109
  async def last():
110
  return last_status
111
 
112
+ async def _enqueue(payload: OpenPayload, key: str | None):
113
+ if API_KEY and key != API_KEY:
114
  raise HTTPException(status_code=401, detail="bad key")
115
  print("[recv]", str(payload.url))
116
  await queue.put(str(payload.url))