plan291037 commited on
Commit
0e3b18d
·
verified ·
1 Parent(s): 3ded09d

Update app/lens_text_core.py

Browse files
Files changed (1) hide show
  1. app/lens_text_core.py +51 -70
app/lens_text_core.py CHANGED
@@ -1,7 +1,7 @@
1
 
2
- import os, time, asyncio, base64, re, threading, hashlib, logging, shutil, atexit, tempfile, uuid
3
  from io import BytesIO
4
- from typing import Any, Dict, List, Union, Optional
5
  from urllib.parse import urlparse
6
 
7
  import httpx
@@ -10,7 +10,6 @@ from PIL import Image
10
  from selenium import webdriver
11
  from selenium.webdriver.chrome.options import Options as ChromeOptions
12
  from selenium.webdriver.chrome.service import Service as ChromeService
13
- from selenium.common.exceptions import SessionNotCreatedException
14
  from selenium.webdriver.common.by import By
15
  from selenium.webdriver.support.ui import WebDriverWait
16
  from selenium.webdriver.support import expected_conditions as EC
@@ -33,81 +32,63 @@ CHROME_EXTRA_ARGS = os.getenv(
33
  "--window-size=1920,1080 --headless=new",
34
  ).split()
35
 
36
- _CACHE_TTL = 600
37
- _BROWSER_TTL = 900
38
  _IDLE_TIMEOUT = int(os.getenv("CHROME_IDLE_SECONDS", "60"))
 
39
 
40
- def _mk_profile_dir() -> str:
41
- base = tempfile.gettempdir()
42
- p = os.path.join(base, f"chrome-profile-{os.getpid()}-{threading.get_ident()}-{uuid.uuid4().hex}")
43
- os.makedirs(p, exist_ok=True)
44
-
45
- atexit.register(lambda: shutil.rmtree(p, ignore_errors=True))
46
- return p
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- def _build_chrome(cookie_dict: Dict[str, str] | None = None,
49
- *, domain: str = ".google.com",
50
- user_data_dir: Optional[str] = None,
51
- reuse_profile: bool = False):
52
  opts = ChromeOptions()
53
-
54
- extra = os.getenv(
55
- "CHROME_EXTRA_ARGS",
56
- "--no-sandbox --disable-dev-shm-usage --disable-gpu --window-size=1920,1080 --headless=new",
57
- ).split()
58
- for a in extra:
59
- if a:
60
- opts.add_argument(a)
61
-
62
- if user_data_dir:
63
- profile_dir = user_data_dir
64
- os.makedirs(profile_dir, exist_ok=True)
65
- elif reuse_profile:
66
- base = os.getenv("CHROME_PROFILE_BASE", "/tmp")
67
- profile_dir = os.path.join(base, "chrome-profile")
68
- os.makedirs(profile_dir, exist_ok=True)
69
- else:
70
- profile_dir = _mk_profile_dir()
71
-
72
- opts.add_argument(f"--user-data-dir={profile_dir}")
73
- opts.add_argument("--profile-directory=Default")
74
-
75
- bin_loc = os.getenv("CHROME_BINARY")
76
- if bin_loc and os.path.exists(bin_loc):
77
- opts.binary_location = bin_loc
78
-
79
- drv_path = os.getenv("CHROMEDRIVER")
80
-
81
- def _new_driver():
82
- if drv_path and os.path.exists(drv_path):
83
- service = ChromeService(executable_path=drv_path)
84
- return webdriver.Chrome(service=service, options=opts)
85
- return webdriver.Chrome(options=opts)
86
-
87
- try:
88
- drv = _new_driver()
89
- except SessionNotCreatedException:
90
- fresh = _mk_profile_dir()
91
- opts.arguments = [x for x in opts.arguments if not x.startswith("--user-data-dir=")]
92
- opts.add_argument(f"--user-data-dir={fresh}")
93
- drv = _new_driver()
94
-
95
- if cookie_dict:
96
  try:
97
- drv.execute_cdp_cmd("Network.enable", {})
98
- cookies = [
99
- {"name": k, "value": v, "domain": domain, "path": "/", "secure": True}
100
- for k, v in cookie_dict.items()
101
- ]
102
- drv.execute_cdp_cmd("Network.setCookies", {"cookies": cookies})
103
  except Exception:
 
 
 
 
 
 
 
104
  try:
105
- drv.get(f"https://{domain.lstrip('.')}/")
106
- for k, v in cookie_dict.items():
107
- drv.add_cookie({"name": k, "value": v, "domain": domain, "path": "/", "secure": True})
 
108
  except Exception:
109
  pass
110
-
111
  return drv
112
 
113
  _cached_cookie, _cached_cookie_ts, _cookie_lock = None, 0.0, threading.Lock()
@@ -228,7 +209,7 @@ def _parse_calc_value(calc: str, dim: float) -> float:
228
  return base - off if op == "-" else base + off
229
 
230
  def _extract_boxes(drv, w: int, h: int) -> List[Dict[str, Any]]:
231
- WebDriverWait(drv, 30).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.lv6PAb")))
232
  nodes = drv.find_elements(By.XPATH, "//div[contains(@class,'lv6PAb') and @aria-label]")
233
 
234
  out: List[Dict[str,Any]] = []
 
1
 
2
+ import os, time, asyncio, base64, re, threading, hashlib, logging, shutil
3
  from io import BytesIO
4
+ from typing import Any, Dict, List, Union
5
  from urllib.parse import urlparse
6
 
7
  import httpx
 
10
  from selenium import webdriver
11
  from selenium.webdriver.chrome.options import Options as ChromeOptions
12
  from selenium.webdriver.chrome.service import Service as ChromeService
 
13
  from selenium.webdriver.common.by import By
14
  from selenium.webdriver.support.ui import WebDriverWait
15
  from selenium.webdriver.support import expected_conditions as EC
 
32
  "--window-size=1920,1080 --headless=new",
33
  ).split()
34
 
35
+ _CACHE_TTL = 300
 
36
  _IDLE_TIMEOUT = int(os.getenv("CHROME_IDLE_SECONDS", "60"))
37
+ _BROWSER_TTL = 900
38
 
39
+ _COMMON_CHROME_PATHS = [
40
+ # Linux
41
+ "/usr/bin/google-chrome", "/usr/bin/chromium", "/usr/bin/chromium-browser",
42
+ "/snap/bin/chromium",
43
+ "/opt/google/chrome/google-chrome",
44
+ # macOS
45
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
46
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
47
+ # Windows
48
+ r"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
49
+ r"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
50
+ ]
51
+
52
+ def _find_chrome_binary() -> str:
53
+ if (env := os.getenv("CHROME_BINARY")):
54
+ return env
55
+
56
+ for cmd in ("google-chrome", "chromium-browser", "chromium", "chrome"):
57
+ path = shutil.which(cmd)
58
+ if path:
59
+ return path
60
+
61
+ for path in _COMMON_CHROME_PATHS:
62
+ if os.path.exists(path):
63
+ return path
64
+
65
+ raise RuntimeError(
66
+ "Chrome binary not found; set CHROME_BINARY env var or install Chrome/Chromium"
67
+ )
68
 
69
+ def _build_chrome(cookie_dict: Dict[str, str] | None = None):
70
+ bin_loc = _find_chrome_binary()
71
+ drv_path = os.getenv("CHROMEDRIVER", "/usr/bin/chromedriver")
 
72
  opts = ChromeOptions()
73
+ opts.binary_location = bin_loc
74
+ for flag in CHROME_EXTRA_ARGS:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  try:
76
+ opts.add_argument(flag)
 
 
 
 
 
77
  except Exception:
78
+ pass
79
+ service = ChromeService(executable_path=drv_path)
80
+ drv = webdriver.Chrome(service=service, options=opts)
81
+
82
+ drv.execute_cdp_cmd("Network.enable", {})
83
+ if cookie_dict:
84
+ for name, val in cookie_dict.items():
85
  try:
86
+ drv.execute_cdp_cmd("Network.setCookie", {
87
+ "name": name, "value": val,
88
+ "domain": ".google.com", "path": "/", "secure": True
89
+ })
90
  except Exception:
91
  pass
 
92
  return drv
93
 
94
  _cached_cookie, _cached_cookie_ts, _cookie_lock = None, 0.0, threading.Lock()
 
209
  return base - off if op == "-" else base + off
210
 
211
  def _extract_boxes(drv, w: int, h: int) -> List[Dict[str, Any]]:
212
+ WebDriverWait(drv, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.lv6PAb")))
213
  nodes = drv.find_elements(By.XPATH, "//div[contains(@class,'lv6PAb') and @aria-label]")
214
 
215
  out: List[Dict[str,Any]] = []