Spaces:
Sleeping
Sleeping
rsnarsna commited on
Commit ·
ade38de
1
Parent(s): 0bcc65e
fix: Update Google OAuth token and refresh token handling; add error handling for token refresh and improve transcript fetching with fallback mechanisms
Browse files- Google_oauth_token.json +1 -1
- app.py +9 -3
- gemini_transcript.py +44 -7
Google_oauth_token.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"token": "ya29.
|
|
|
|
| 1 |
+
{"token": "ya29.a0AQvPyINisnTmbbUbKvU_Oon2bsyEI0RplGRg35lszdgHJSGyHGEgIIJ9egNQxH2WYTL620YwTJxDsK7sZv2nbfSw5kUQc76A8mklMLSxn762j0NYpdeXt5RbOQg8KJf6prPxH8CsPuwz7ks1bK0BFz4vy04QkPBMlNSRtMkba_OA_TL_ly7_03bGAaCK29Q_z2RWGCkaCgYKAS4SARcSFQHGX2MiWhTmNjFUY-_7XXKWJbeV9w0206", "refresh_token": "1//0gSFNaVNizGvECgYIARAAGBASNwF-L9Ir0TRSCojKNffp98YYGDMRNlnndT_iKz3QfIcYtWShLJRGy_S2QDCzQhPPwcrAJlH12lo", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "769133159215-9gbq0l5v49kmclfcq7vbq7tutck0aphd.apps.googleusercontent.com", "client_secret": "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", "scopes": ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/gmail.send", "https://www.googleapis.com/auth/drive.file", "https://www.googleapis.com/auth/youtube.force-ssl"], "universe_domain": "googleapis.com", "account": "", "expiry": "2026-05-31T16:17:24Z"}
|
app.py
CHANGED
|
@@ -183,8 +183,14 @@ def load_credentials() -> Credentials | None:
|
|
| 183 |
creds = Credentials.from_authorized_user_file(TOKEN_PATH, SCOPES)
|
| 184 |
if not creds.valid:
|
| 185 |
if creds.expired and creds.refresh_token:
|
| 186 |
-
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
else:
|
| 189 |
return None
|
| 190 |
return creds
|
|
@@ -1118,4 +1124,4 @@ def generate(payload: GenerateRequest):
|
|
| 1118 |
|
| 1119 |
if __name__ == "__main__":
|
| 1120 |
import uvicorn
|
| 1121 |
-
uvicorn.run("
|
|
|
|
| 183 |
creds = Credentials.from_authorized_user_file(TOKEN_PATH, SCOPES)
|
| 184 |
if not creds.valid:
|
| 185 |
if creds.expired and creds.refresh_token:
|
| 186 |
+
try:
|
| 187 |
+
creds.refresh(GoogleRequest())
|
| 188 |
+
Path(TOKEN_PATH).write_text(creds.to_json(), encoding="utf-8")
|
| 189 |
+
except Exception as e:
|
| 190 |
+
print(f"[WARN] Failed to refresh token: {e}. Forcing re-auth.")
|
| 191 |
+
if os.path.exists(TOKEN_PATH):
|
| 192 |
+
os.remove(TOKEN_PATH)
|
| 193 |
+
return None
|
| 194 |
else:
|
| 195 |
return None
|
| 196 |
return creds
|
|
|
|
| 1124 |
|
| 1125 |
if __name__ == "__main__":
|
| 1126 |
import uvicorn
|
| 1127 |
+
uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
|
gemini_transcript.py
CHANGED
|
@@ -261,7 +261,15 @@ class YouTubeTranscriptFetcher:
|
|
| 261 |
raise ValueError(f"Unsupported YouTube URL: {url}")
|
| 262 |
|
| 263 |
def _fetch_once(self) -> str:
|
| 264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
return " ".join(item.text for item in transcript)
|
| 266 |
|
| 267 |
def _try_all_tiers(self) -> tuple[str, str]:
|
|
@@ -399,8 +407,10 @@ class YtDlpTranscriptFetcher:
|
|
| 399 |
"""
|
| 400 |
Search manual subtitles first, then auto-generated, for a
|
| 401 |
matching language + preferred format.
|
|
|
|
| 402 |
Returns (url, format_ext).
|
| 403 |
"""
|
|
|
|
| 404 |
for subs_dict in (manual_subs, auto_subs):
|
| 405 |
if not subs_dict:
|
| 406 |
continue
|
|
@@ -420,9 +430,31 @@ class YtDlpTranscriptFetcher:
|
|
| 420 |
if track.get("url"):
|
| 421 |
return track["url"], track.get("ext", "vtt")
|
| 422 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
raise RuntimeError(
|
| 424 |
-
f"No subtitles found in yt-dlp metadata for "
|
| 425 |
-
f"languages {self.languages} (video: {self.video_id})"
|
| 426 |
)
|
| 427 |
|
| 428 |
def fetch(self) -> str:
|
|
@@ -580,10 +612,15 @@ class YouTubeApiTranscriptFetcher:
|
|
| 580 |
|
| 581 |
if not caption_id:
|
| 582 |
available = [i["snippet"]["language"] for i in items]
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 587 |
|
| 588 |
# Step 3: Download caption content as SRT
|
| 589 |
caption_content = youtube.captions().download(
|
|
|
|
| 261 |
raise ValueError(f"Unsupported YouTube URL: {url}")
|
| 262 |
|
| 263 |
def _fetch_once(self) -> str:
|
| 264 |
+
try:
|
| 265 |
+
transcript = self.api.fetch(self.video_id, languages=self.languages)
|
| 266 |
+
except NoTranscriptFound:
|
| 267 |
+
logger.info("[Tier 1] Requested languages %s not found. Finding first available transcript...", self.languages)
|
| 268 |
+
transcript_list = self.api.list_transcripts(self.video_id)
|
| 269 |
+
first_transcript = next(iter(transcript_list))
|
| 270 |
+
logger.info("[Tier 1] Falling back to language: %s", first_transcript.language_code)
|
| 271 |
+
transcript = first_transcript.fetch()
|
| 272 |
+
|
| 273 |
return " ".join(item.text for item in transcript)
|
| 274 |
|
| 275 |
def _try_all_tiers(self) -> tuple[str, str]:
|
|
|
|
| 407 |
"""
|
| 408 |
Search manual subtitles first, then auto-generated, for a
|
| 409 |
matching language + preferred format.
|
| 410 |
+
If requested languages are not available, fallback to any available language.
|
| 411 |
Returns (url, format_ext).
|
| 412 |
"""
|
| 413 |
+
# 1. Try preferred languages
|
| 414 |
for subs_dict in (manual_subs, auto_subs):
|
| 415 |
if not subs_dict:
|
| 416 |
continue
|
|
|
|
| 430 |
if track.get("url"):
|
| 431 |
return track["url"], track.get("ext", "vtt")
|
| 432 |
|
| 433 |
+
# 2. Fallback to ANY available language
|
| 434 |
+
logger.info("[yt-dlp] Preferred languages %s not found. Falling back to any available language.", self.languages)
|
| 435 |
+
for subs_dict in (manual_subs, auto_subs):
|
| 436 |
+
if not subs_dict:
|
| 437 |
+
continue
|
| 438 |
+
# Try preferred formats across all languages
|
| 439 |
+
for fmt in self.PREFERRED_FORMATS:
|
| 440 |
+
for lang, tracks in subs_dict.items():
|
| 441 |
+
if not tracks:
|
| 442 |
+
continue
|
| 443 |
+
for track in tracks:
|
| 444 |
+
if track.get("ext") == fmt and track.get("url"):
|
| 445 |
+
logger.info("[yt-dlp] Falling back to language: %s", lang)
|
| 446 |
+
return track["url"], fmt
|
| 447 |
+
# No preferred format matched — use first available with URL across all languages
|
| 448 |
+
for lang, tracks in subs_dict.items():
|
| 449 |
+
if not tracks:
|
| 450 |
+
continue
|
| 451 |
+
for track in tracks:
|
| 452 |
+
if track.get("url"):
|
| 453 |
+
logger.info("[yt-dlp] Falling back to language: %s", lang)
|
| 454 |
+
return track["url"], track.get("ext", "vtt")
|
| 455 |
+
|
| 456 |
raise RuntimeError(
|
| 457 |
+
f"No subtitles found in yt-dlp metadata for video: {self.video_id}"
|
|
|
|
| 458 |
)
|
| 459 |
|
| 460 |
def fetch(self) -> str:
|
|
|
|
| 612 |
|
| 613 |
if not caption_id:
|
| 614 |
available = [i["snippet"]["language"] for i in items]
|
| 615 |
+
if available:
|
| 616 |
+
logger.info("[YT-API] Preferred languages %s not found. Falling back to any available language.", self.languages)
|
| 617 |
+
caption_id = items[0]["id"]
|
| 618 |
+
lang = items[0]["snippet"]["language"]
|
| 619 |
+
logger.info("[YT-API] Falling back to language: %s, id=%s", lang, caption_id)
|
| 620 |
+
else:
|
| 621 |
+
raise RuntimeError(
|
| 622 |
+
f"No caption tracks found for video {self.video_id}"
|
| 623 |
+
)
|
| 624 |
|
| 625 |
# Step 3: Download caption content as SRT
|
| 626 |
caption_content = youtube.captions().download(
|