Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -86,41 +86,63 @@ def download_and_transcribe_with_whisper(youtube_url):
|
|
| 86 |
def get_transcript_from_youtube_api(video_id, video_length):
|
| 87 |
"""Fetches transcript using YouTube API if available."""
|
| 88 |
try:
|
|
|
|
| 89 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 90 |
|
|
|
|
| 91 |
for transcript in transcript_list:
|
| 92 |
-
if not transcript.is_generated:
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
if video_length > 15:
|
|
|
|
|
|
|
| 97 |
auto_transcript = transcript_list.find_generated_transcript(['en'])
|
| 98 |
if auto_transcript:
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
|
|
|
|
|
|
| 104 |
|
| 105 |
except Exception as e:
|
| 106 |
print(f"Error fetching transcript: {e}")
|
| 107 |
return None
|
| 108 |
|
|
|
|
| 109 |
def get_transcript(youtube_url, api_key):
|
| 110 |
"""Gets transcript from YouTube API or Whisper if unavailable."""
|
| 111 |
-
video_id =
|
| 112 |
-
if not video_id:
|
| 113 |
-
print("Invalid or unsupported YouTube URL.")
|
| 114 |
-
return None
|
| 115 |
-
|
| 116 |
video_length = get_video_duration(video_id, api_key)
|
|
|
|
| 117 |
if video_length is not None:
|
| 118 |
-
print(f"Video length: {video_length
|
|
|
|
|
|
|
| 119 |
transcript = get_transcript_from_youtube_api(video_id, video_length)
|
|
|
|
|
|
|
| 120 |
if transcript:
|
|
|
|
| 121 |
return transcript
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
| 124 |
else:
|
| 125 |
print("Error fetching video duration.")
|
| 126 |
return None
|
|
|
|
| 86 |
def get_transcript_from_youtube_api(video_id, video_length):
|
| 87 |
"""Fetches transcript using YouTube API if available."""
|
| 88 |
try:
|
| 89 |
+
# Fetch available transcripts
|
| 90 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 91 |
|
| 92 |
+
# Look for manually created transcripts first
|
| 93 |
for transcript in transcript_list:
|
| 94 |
+
if not transcript.is_generated: # This checks for manually created transcripts
|
| 95 |
+
manual_transcript = transcript.fetch()
|
| 96 |
+
# Check if manual_transcript is iterable (should be a list)
|
| 97 |
+
if isinstance(manual_transcript, list):
|
| 98 |
+
full_transcript = " ".join([segment['text'] for segment in manual_transcript])
|
| 99 |
+
return full_transcript # Return manual transcript immediately
|
| 100 |
+
else:
|
| 101 |
+
print("Manual transcript is not iterable.")
|
| 102 |
+
return None
|
| 103 |
+
|
| 104 |
+
# If no manual transcript found, proceed to auto-generated transcript
|
| 105 |
if video_length > 15:
|
| 106 |
+
# Video is longer than 15 minutes, so use auto-generated transcript
|
| 107 |
+
print("Video is longer than 15 minutes, using auto-generated transcript.")
|
| 108 |
auto_transcript = transcript_list.find_generated_transcript(['en'])
|
| 109 |
if auto_transcript:
|
| 110 |
+
# Extract the text from the auto-generated transcript
|
| 111 |
+
full_transcript = " ".join([segment['text'] for segment in auto_transcript.fetch()])
|
| 112 |
+
return full_transcript # Return auto-generated transcript
|
| 113 |
+
else:
|
| 114 |
+
print("No auto-generated transcript available.")
|
| 115 |
+
return None
|
| 116 |
|
| 117 |
+
else:
|
| 118 |
+
# Video is shorter than 15 minutes, use Whisper for transcription
|
| 119 |
+
print("Video is shorter than 15 minutes, using Whisper for transcription.")
|
| 120 |
+
return None # This will be handled by Whisper in your main function
|
| 121 |
|
| 122 |
except Exception as e:
|
| 123 |
print(f"Error fetching transcript: {e}")
|
| 124 |
return None
|
| 125 |
|
| 126 |
+
|
| 127 |
def get_transcript(youtube_url, api_key):
|
| 128 |
"""Gets transcript from YouTube API or Whisper if unavailable."""
|
| 129 |
+
video_id = youtube_url.split("v=")[-1] # Extract the video ID from URL
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
video_length = get_video_duration(video_id, api_key)
|
| 131 |
+
|
| 132 |
if video_length is not None:
|
| 133 |
+
print(f"Video length: {video_length} minutes.")
|
| 134 |
+
|
| 135 |
+
# Fetch transcript using YouTube API
|
| 136 |
transcript = get_transcript_from_youtube_api(video_id, video_length)
|
| 137 |
+
|
| 138 |
+
# If a transcript is found from YouTube, use it
|
| 139 |
if transcript:
|
| 140 |
+
print("Transcript found.")
|
| 141 |
return transcript
|
| 142 |
+
else:
|
| 143 |
+
# No transcript found from YouTube API, proceed with Whisper
|
| 144 |
+
print("No transcript found on YouTube, using Whisper for transcription.")
|
| 145 |
+
return download_and_transcribe_with_whisper(youtube_url) # Use Whisper for short videos
|
| 146 |
else:
|
| 147 |
print("Error fetching video duration.")
|
| 148 |
return None
|