duck3-create Claude Opus 4.6 commited on
Commit
431f64f
ยท
1 Parent(s): ac36889

Add dual API fallback and proxy support

Browse files

- Try plain API (no cookies) first, fall back to cookies
- Fixes Railway IP mismatch issue (cookies from home IP rejected on cloud)
- Add PROXY_URL env var support for optional residential proxy
- Remove retry loop in favor of smarter plainโ†’cookies fallback
- Skip fallback for genuine "no subtitles" errors

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. main.py +29 -17
main.py CHANGED
@@ -26,10 +26,24 @@ app.add_middleware(
26
 
27
  _executor = ThreadPoolExecutor(max_workers=5)
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  _cookie_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "cookies.txt")
30
 
31
  try:
32
- # If no local cookies.txt, try creating one from YOUTUBE_COOKIES_BASE64 env var
33
  if not os.path.exists(_cookie_path):
34
  import base64
35
  cookies_b64 = os.environ.get("YOUTUBE_COOKIES_BASE64", "")
@@ -47,14 +61,14 @@ try:
47
  _cookie_jar.load(ignore_discard=True, ignore_expires=True)
48
  _session = requests.Session()
49
  _session.cookies = _cookie_jar
50
- _yt_api = YouTubeTranscriptApi(http_client=_session)
51
- logger.info(f"Using cookies from {_cookie_path}")
 
 
52
  else:
53
- _yt_api = YouTubeTranscriptApi()
54
  logger.info("No cookies found, running without cookies")
55
  except Exception as e:
56
  logger.error(f"Failed to load cookies: {e}")
57
- _yt_api = YouTubeTranscriptApi()
58
 
59
 
60
  class TranscriptRequest(BaseModel):
@@ -103,9 +117,6 @@ KOREAN_FILLERS = {
103
 
104
  NOISE_PATTERN = re.compile(r"^\[.*\]$")
105
 
106
- MAX_RETRIES = 3
107
- RETRY_DELAY = 1.0
108
-
109
 
110
  def denoise_text(text: str) -> str:
111
  lines = text.split("\n")
@@ -133,10 +144,14 @@ def _fetch_transcript(video_id: str, language: str, denoise: bool, fmt: str, kee
133
  elif language == "en":
134
  languages.append("ko")
135
 
 
 
 
 
136
  last_error = None
137
- for attempt in range(1, MAX_RETRIES + 1):
138
  try:
139
- data = _yt_api.fetch(video_id, languages=languages)
140
 
141
  if fmt == "json":
142
  entries = [
@@ -168,16 +183,13 @@ def _fetch_transcript(video_id: str, language: str, denoise: bool, fmt: str, kee
168
  return {"transcript": text, "error": None}
169
  except Exception as e:
170
  last_error = str(e)
171
- logger.error(f"Attempt {attempt}/{MAX_RETRIES} failed for {video_id}: {last_error}")
172
 
173
- # Don't retry if video genuinely has no subtitles or IP is blocked
174
- if "No transcripts" in last_error or "disabled" in last_error.lower() or "Could not retrieve" in last_error:
175
  break
176
 
177
- if attempt < MAX_RETRIES:
178
- time.sleep(RETRY_DELAY)
179
-
180
- # All retries exhausted or non-retryable error
181
  error_msg = last_error or "Unknown error"
182
  if "No transcripts" in error_msg or "Could not retrieve" in error_msg:
183
  error_msg = f"์ž๋ง‰์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ({error_msg[:120]})"
 
26
 
27
  _executor = ThreadPoolExecutor(max_workers=5)
28
 
29
+ # --- Proxy support (optional PROXY_URL env var) ---
30
+ _proxy_url = os.environ.get("PROXY_URL", "")
31
+ _proxy_config = None
32
+ if _proxy_url:
33
+ from youtube_transcript_api.proxies import GenericProxyConfig
34
+ _proxy_config = GenericProxyConfig(
35
+ http_url=_proxy_url,
36
+ https_url=_proxy_url,
37
+ )
38
+ logger.info(f"Using proxy: {_proxy_url[:30]}...")
39
+
40
+ # --- API instances: plain (no cookies) + with cookies (fallback) ---
41
+ _yt_api = YouTubeTranscriptApi(proxy_config=_proxy_config)
42
+ _yt_api_cookies = None
43
+
44
  _cookie_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "cookies.txt")
45
 
46
  try:
 
47
  if not os.path.exists(_cookie_path):
48
  import base64
49
  cookies_b64 = os.environ.get("YOUTUBE_COOKIES_BASE64", "")
 
61
  _cookie_jar.load(ignore_discard=True, ignore_expires=True)
62
  _session = requests.Session()
63
  _session.cookies = _cookie_jar
64
+ if _proxy_url:
65
+ _session.proxies = {"http": _proxy_url, "https": _proxy_url}
66
+ _yt_api_cookies = YouTubeTranscriptApi(http_client=_session)
67
+ logger.info(f"Cookies loaded from {_cookie_path} (used as fallback)")
68
  else:
 
69
  logger.info("No cookies found, running without cookies")
70
  except Exception as e:
71
  logger.error(f"Failed to load cookies: {e}")
 
72
 
73
 
74
  class TranscriptRequest(BaseModel):
 
117
 
118
  NOISE_PATTERN = re.compile(r"^\[.*\]$")
119
 
 
 
 
120
 
121
  def denoise_text(text: str) -> str:
122
  lines = text.split("\n")
 
144
  elif language == "en":
145
  languages.append("ko")
146
 
147
+ apis_to_try = [("plain", _yt_api)]
148
+ if _yt_api_cookies:
149
+ apis_to_try.append(("cookies", _yt_api_cookies))
150
+
151
  last_error = None
152
+ for api_name, api in apis_to_try:
153
  try:
154
+ data = api.fetch(video_id, languages=languages)
155
 
156
  if fmt == "json":
157
  entries = [
 
183
  return {"transcript": text, "error": None}
184
  except Exception as e:
185
  last_error = str(e)
186
+ logger.warning(f"[{api_name}] Failed for {video_id}: {last_error[:100]}")
187
 
188
+ # Don't try cookies fallback if video genuinely has no subtitles
189
+ if "No transcripts" in last_error or "disabled" in last_error.lower():
190
  break
191
 
192
+ # All attempts failed
 
 
 
193
  error_msg = last_error or "Unknown error"
194
  if "No transcripts" in error_msg or "Could not retrieve" in error_msg:
195
  error_msg = f"์ž๋ง‰์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ({error_msg[:120]})"