tool-YoutubeTranscript-blog

Sleeping

VPCSinfo commited on Feb 22, 2025

Commit

f279b36

1 Parent(s): b984651

[FIX] improve transcript extraction by handling missing language captions

Files changed (1) hide show

tool.py CHANGED Viewed

@@ -80,12 +80,13 @@ class YouTubeTranscriptExtractor(Tool):
             # Create a YouTube object
             yt = YouTube(video_url)
             lang='en'
-            # Get the video transcript
-            if lang in yt.captions:
-                transcript = yt.captions['en'].generate_srt_captions()
-            else:
-                transcript = yt.captions.all()[0].generate_srt_captions()
-                lang = yt.captions.all()[0].code
             # Clean up the transcript by removing timestamps and line numbers
             cleaned_transcript = ""

             # Create a YouTube object
             yt = YouTube(video_url)
             lang='en'
+           # Get the video transcript
+            try:
+                transcript = yt.captions[lang].generate_srt_captions()
+            except KeyError:
+                # If the specified language is not available, get the first available transcript
+                transcript = next(iter(yt.captions.values())).generate_srt_captions()
+                lang = next(iter(yt.captions.keys()))
             # Clean up the transcript by removing timestamps and line numbers
             cleaned_transcript = ""