MahatirTusher commited on
Commit
f777d19
Β·
verified Β·
1 Parent(s): 5d35abc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -7
app.py CHANGED
@@ -126,7 +126,7 @@ with st.sidebar:
126
  process_url_clicked = st.button("Process URL")
127
 
128
  st.header("Enter YouTube URL")
129
- youtube_url = st.text_input("YouTube URL", placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ")
130
  process_youtube_clicked = st.button("Process YouTube Video")
131
 
132
  # Main content container
@@ -240,11 +240,34 @@ if process_youtube_clicked:
240
  st.stop()
241
 
242
  st.text("Fetching Transcript...Started...βœ…βœ…βœ…")
243
- transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en', 'bn'])
244
- transcript_text = " ".join([entry['text'] for entry in transcript])
245
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  if not transcript_text.strip():
247
- st.error("No transcript available for this video. Try a different video.")
248
  st.stop()
249
 
250
  # Create a Document object from the transcript
@@ -253,10 +276,10 @@ if process_youtube_clicked:
253
  embeddings = st.session_state.embeddings
254
  process_content([doc], embeddings)
255
  except TranscriptsDisabled:
256
- st.error("Transcripts are disabled for this video. Try a different video.")
257
  st.stop()
258
  except NoTranscriptFound:
259
- st.error("No transcript found in the supported languages (English or Bengali). Try a different video.")
260
  st.stop()
261
  except Exception as e:
262
  st.error(f"Error processing YouTube video: {str(e)}")
 
126
  process_url_clicked = st.button("Process URL")
127
 
128
  st.header("Enter YouTube URL")
129
+ youtube_url = st.text_input("YouTube URL", placeholder="e.g., https://www.youtube.com/watch?v=pxiP-HJLCx0")
130
  process_youtube_clicked = st.button("Process YouTube Video")
131
 
132
  # Main content container
 
240
  st.stop()
241
 
242
  st.text("Fetching Transcript...Started...βœ…βœ…βœ…")
243
+ # Get the list of available transcripts
244
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
245
+
246
+ transcript = None
247
+ # Try to find a manually created transcript in English
248
+ try:
249
+ transcript = transcript_list.find_manually_created_transcript(['en'])
250
+ except NoTranscriptFound:
251
+ # If no manual transcript, try an auto-generated one in English
252
+ try:
253
+ transcript = transcript_list.find_generated_transcript(['en'])
254
+ except NoTranscriptFound:
255
+ # If no English transcript, get the first available transcript and translate to English
256
+ for t in transcript_list:
257
+ if t.is_translatable:
258
+ transcript = t.translate('en')
259
+ break
260
+
261
+ if not transcript:
262
+ st.error("No transcript available or translatable to English. Try a different video.")
263
+ st.stop()
264
+
265
+ # Fetch the transcript data
266
+ transcript_data = transcript.fetch()
267
+ transcript_text = " ".join([entry['text'] for entry in transcript_data])
268
+
269
  if not transcript_text.strip():
270
+ st.error("Transcript is empty. Try a different video.")
271
  st.stop()
272
 
273
  # Create a Document object from the transcript
 
276
  embeddings = st.session_state.embeddings
277
  process_content([doc], embeddings)
278
  except TranscriptsDisabled:
279
+ st.error("Transcripts are disabled for this video by the creator. Try a different video.")
280
  st.stop()
281
  except NoTranscriptFound:
282
+ st.error("No transcript found in any language. Try a different video.")
283
  st.stop()
284
  except Exception as e:
285
  st.error(f"Error processing YouTube video: {str(e)}")