[FIX] improve transcript extraction by handling missing language captions
Browse files
tool.py
CHANGED
|
@@ -80,12 +80,13 @@ class YouTubeTranscriptExtractor(Tool):
|
|
| 80 |
# Create a YouTube object
|
| 81 |
yt = YouTube(video_url)
|
| 82 |
lang='en'
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
transcript = yt.captions[
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
|
|
|
| 89 |
|
| 90 |
# Clean up the transcript by removing timestamps and line numbers
|
| 91 |
cleaned_transcript = ""
|
|
|
|
| 80 |
# Create a YouTube object
|
| 81 |
yt = YouTube(video_url)
|
| 82 |
lang='en'
|
| 83 |
+
# Get the video transcript
|
| 84 |
+
try:
|
| 85 |
+
transcript = yt.captions[lang].generate_srt_captions()
|
| 86 |
+
except KeyError:
|
| 87 |
+
# If the specified language is not available, get the first available transcript
|
| 88 |
+
transcript = next(iter(yt.captions.values())).generate_srt_captions()
|
| 89 |
+
lang = next(iter(yt.captions.keys()))
|
| 90 |
|
| 91 |
# Clean up the transcript by removing timestamps and line numbers
|
| 92 |
cleaned_transcript = ""
|