Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -126,7 +126,7 @@ with st.sidebar:
|
|
| 126 |
process_url_clicked = st.button("Process URL")
|
| 127 |
|
| 128 |
st.header("Enter YouTube URL")
|
| 129 |
-
youtube_url = st.text_input("YouTube URL", placeholder="e.g., https://www.youtube.com/watch?v=
|
| 130 |
process_youtube_clicked = st.button("Process YouTube Video")
|
| 131 |
|
| 132 |
# Main content container
|
|
@@ -240,11 +240,34 @@ if process_youtube_clicked:
|
|
| 240 |
st.stop()
|
| 241 |
|
| 242 |
st.text("Fetching Transcript...Started...β
β
β
")
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
if not transcript_text.strip():
|
| 247 |
-
st.error("
|
| 248 |
st.stop()
|
| 249 |
|
| 250 |
# Create a Document object from the transcript
|
|
@@ -253,10 +276,10 @@ if process_youtube_clicked:
|
|
| 253 |
embeddings = st.session_state.embeddings
|
| 254 |
process_content([doc], embeddings)
|
| 255 |
except TranscriptsDisabled:
|
| 256 |
-
st.error("Transcripts are disabled for this video. Try a different video.")
|
| 257 |
st.stop()
|
| 258 |
except NoTranscriptFound:
|
| 259 |
-
st.error("No transcript found in
|
| 260 |
st.stop()
|
| 261 |
except Exception as e:
|
| 262 |
st.error(f"Error processing YouTube video: {str(e)}")
|
|
|
|
| 126 |
process_url_clicked = st.button("Process URL")
|
| 127 |
|
| 128 |
st.header("Enter YouTube URL")
|
| 129 |
+
youtube_url = st.text_input("YouTube URL", placeholder="e.g., https://www.youtube.com/watch?v=pxiP-HJLCx0")
|
| 130 |
process_youtube_clicked = st.button("Process YouTube Video")
|
| 131 |
|
| 132 |
# Main content container
|
|
|
|
| 240 |
st.stop()
|
| 241 |
|
| 242 |
st.text("Fetching Transcript...Started...β
β
β
")
|
| 243 |
+
# Get the list of available transcripts
|
| 244 |
+
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 245 |
+
|
| 246 |
+
transcript = None
|
| 247 |
+
# Try to find a manually created transcript in English
|
| 248 |
+
try:
|
| 249 |
+
transcript = transcript_list.find_manually_created_transcript(['en'])
|
| 250 |
+
except NoTranscriptFound:
|
| 251 |
+
# If no manual transcript, try an auto-generated one in English
|
| 252 |
+
try:
|
| 253 |
+
transcript = transcript_list.find_generated_transcript(['en'])
|
| 254 |
+
except NoTranscriptFound:
|
| 255 |
+
# If no English transcript, get the first available transcript and translate to English
|
| 256 |
+
for t in transcript_list:
|
| 257 |
+
if t.is_translatable:
|
| 258 |
+
transcript = t.translate('en')
|
| 259 |
+
break
|
| 260 |
+
|
| 261 |
+
if not transcript:
|
| 262 |
+
st.error("No transcript available or translatable to English. Try a different video.")
|
| 263 |
+
st.stop()
|
| 264 |
+
|
| 265 |
+
# Fetch the transcript data
|
| 266 |
+
transcript_data = transcript.fetch()
|
| 267 |
+
transcript_text = " ".join([entry['text'] for entry in transcript_data])
|
| 268 |
+
|
| 269 |
if not transcript_text.strip():
|
| 270 |
+
st.error("Transcript is empty. Try a different video.")
|
| 271 |
st.stop()
|
| 272 |
|
| 273 |
# Create a Document object from the transcript
|
|
|
|
| 276 |
embeddings = st.session_state.embeddings
|
| 277 |
process_content([doc], embeddings)
|
| 278 |
except TranscriptsDisabled:
|
| 279 |
+
st.error("Transcripts are disabled for this video by the creator. Try a different video.")
|
| 280 |
st.stop()
|
| 281 |
except NoTranscriptFound:
|
| 282 |
+
st.error("No transcript found in any language. Try a different video.")
|
| 283 |
st.stop()
|
| 284 |
except Exception as e:
|
| 285 |
st.error(f"Error processing YouTube video: {str(e)}")
|