plan291037 commited on
Commit
edd390b
·
verified ·
1 Parent(s): 1f2ce78

Update app/lens_text_core.py

Browse files
Files changed (1) hide show
  1. app/lens_text_core.py +53 -25
app/lens_text_core.py CHANGED
@@ -1,7 +1,7 @@
1
 
2
  import os, time, asyncio, base64, re, threading, hashlib, logging, shutil, atexit, tempfile, uuid
3
  from io import BytesIO
4
- from typing import Any, Dict, List, Union
5
  from urllib.parse import urlparse
6
 
7
  import httpx
@@ -33,55 +33,83 @@ CHROME_EXTRA_ARGS = os.getenv(
33
  "--window-size=1920,1080 --headless=new",
34
  ).split()
35
 
36
- _PROFILE_DIRS = []
37
-
38
  _CACHE_TTL = 300
39
  _IDLE_TIMEOUT = int(os.getenv("CHROME_IDLE_SECONDS", "60"))
40
  _BROWSER_TTL = 900
41
 
42
  def _mk_profile_dir() -> str:
43
- base = os.getenv("CHROME_PROFILE_BASE", tempfile.gettempdir())
44
  p = os.path.join(base, f"chrome-profile-{os.getpid()}-{threading.get_ident()}-{uuid.uuid4().hex}")
45
  os.makedirs(p, exist_ok=True)
46
- _PROFILE_DIRS.append(p)
 
47
  return p
48
 
49
- def _cleanup_profiles():
50
- for p in _PROFILE_DIRS:
51
- try:
52
- shutil.rmtree(p, ignore_errors=True)
53
- except Exception:
54
- pass
55
-
56
- atexit.register(_cleanup_profiles)
57
-
58
- def _build_chrome() -> webdriver.Chrome:
59
  opts = ChromeOptions()
60
 
61
  extra = os.getenv(
62
  "CHROME_EXTRA_ARGS",
63
- "--disable-gpu --no-sandbox --disable-dev-shm-usage --window-size=1920,1080 --headless=new",
64
  ).split()
65
  for a in extra:
66
  if a:
67
  opts.add_argument(a)
68
 
69
- profile_dir = _mk_profile_dir()
 
 
 
 
 
 
 
 
 
70
  opts.add_argument(f"--user-data-dir={profile_dir}")
71
  opts.add_argument("--profile-directory=Default")
72
 
 
 
 
 
73
  drv_path = os.getenv("CHROMEDRIVER")
74
- try:
 
75
  if drv_path and os.path.exists(drv_path):
76
- return webdriver.Chrome(service=ChromeService(executable_path=drv_path), options=opts)
77
- return webdriver.Chrome(options=opts)
78
- except SessionNotCreatedException as e:
79
- LOGGER.warning("SessionNotCreated: %s; retry with a fresh profile dir", e)
80
- profile_dir2 = _mk_profile_dir()
81
- opts.arguments = [a for a in opts.arguments if not a.startswith("--user-data-dir=")]
82
- opts.add_argument(f"--user-data-dir={profile_dir2}")
83
  return webdriver.Chrome(options=opts)
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  _cached_cookie, _cached_cookie_ts, _cookie_lock = None, 0.0, threading.Lock()
86
  _global_driver, _driver_last_use, _driver_lock = None, 0.0, threading.Lock()
87
 
 
1
 
2
  import os, time, asyncio, base64, re, threading, hashlib, logging, shutil, atexit, tempfile, uuid
3
  from io import BytesIO
4
+ from typing import Any, Dict, List, Union, Optional
5
  from urllib.parse import urlparse
6
 
7
  import httpx
 
33
  "--window-size=1920,1080 --headless=new",
34
  ).split()
35
 
 
 
36
  _CACHE_TTL = 300
37
  _IDLE_TIMEOUT = int(os.getenv("CHROME_IDLE_SECONDS", "60"))
38
  _BROWSER_TTL = 900
39
 
40
  def _mk_profile_dir() -> str:
41
+ base = tempfile.gettempdir()
42
  p = os.path.join(base, f"chrome-profile-{os.getpid()}-{threading.get_ident()}-{uuid.uuid4().hex}")
43
  os.makedirs(p, exist_ok=True)
44
+
45
+ atexit.register(lambda: shutil.rmtree(p, ignore_errors=True))
46
  return p
47
 
48
+ def _build_chrome(cookie_dict: Dict[str, str] | None = None,
49
+ *, domain: str = ".google.com",
50
+ user_data_dir: Optional[str] = None,
51
+ reuse_profile: bool = False):
 
 
 
 
 
 
52
  opts = ChromeOptions()
53
 
54
  extra = os.getenv(
55
  "CHROME_EXTRA_ARGS",
56
+ "--no-sandbox --disable-dev-shm-usage --disable-gpu --window-size=1920,1080 --headless=new",
57
  ).split()
58
  for a in extra:
59
  if a:
60
  opts.add_argument(a)
61
 
62
+ if user_data_dir:
63
+ profile_dir = user_data_dir
64
+ os.makedirs(profile_dir, exist_ok=True)
65
+ elif reuse_profile:
66
+ base = os.getenv("CHROME_PROFILE_BASE", "/tmp")
67
+ profile_dir = os.path.join(base, "chrome-profile")
68
+ os.makedirs(profile_dir, exist_ok=True)
69
+ else:
70
+ profile_dir = _mk_profile_dir()
71
+
72
  opts.add_argument(f"--user-data-dir={profile_dir}")
73
  opts.add_argument("--profile-directory=Default")
74
 
75
+ bin_loc = os.getenv("CHROME_BINARY")
76
+ if bin_loc and os.path.exists(bin_loc):
77
+ opts.binary_location = bin_loc
78
+
79
  drv_path = os.getenv("CHROMEDRIVER")
80
+
81
+ def _new_driver():
82
  if drv_path and os.path.exists(drv_path):
83
+ service = ChromeService(executable_path=drv_path)
84
+ return webdriver.Chrome(service=service, options=opts)
 
 
 
 
 
85
  return webdriver.Chrome(options=opts)
86
 
87
+ try:
88
+ drv = _new_driver()
89
+ except SessionNotCreatedException:
90
+ fresh = _mk_profile_dir()
91
+ opts.arguments = [x for x in opts.arguments if not x.startswith("--user-data-dir=")]
92
+ opts.add_argument(f"--user-data-dir={fresh}")
93
+ drv = _new_driver()
94
+
95
+ if cookie_dict:
96
+ try:
97
+ drv.execute_cdp_cmd("Network.enable", {})
98
+ cookies = [
99
+ {"name": k, "value": v, "domain": domain, "path": "/", "secure": True}
100
+ for k, v in cookie_dict.items()
101
+ ]
102
+ drv.execute_cdp_cmd("Network.setCookies", {"cookies": cookies})
103
+ except Exception:
104
+ try:
105
+ drv.get(f"https://{domain.lstrip('.')}/")
106
+ for k, v in cookie_dict.items():
107
+ drv.add_cookie({"name": k, "value": v, "domain": domain, "path": "/", "secure": True})
108
+ except Exception:
109
+ pass
110
+
111
+ return drv
112
+
113
  _cached_cookie, _cached_cookie_ts, _cookie_lock = None, 0.0, threading.Lock()
114
  _global_driver, _driver_last_use, _driver_lock = None, 0.0, threading.Lock()
115