Sayiqa commited on
Commit
e7be820
·
verified ·
1 Parent(s): d271517

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -44
app.py CHANGED
@@ -383,7 +383,6 @@ def process_youtube_video(url="", keywords=""):
383
  summary = ""
384
  sentiment_label = "N/A"
385
  recommendations = ""
386
- text = ""
387
 
388
  if not url.strip():
389
  return thumbnail, "Please enter a YouTube URL", sentiment_label, recommendations
@@ -394,45 +393,41 @@ def process_youtube_video(url="", keywords=""):
394
 
395
  thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
396
 
397
- # Method 1: Direct transcript fetch
 
 
 
 
398
  try:
399
- transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
400
  text = " ".join([t['text'] for t in transcript])
401
- except:
402
- # Method 2: List available transcripts
 
 
 
403
  try:
404
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
405
- transcript = None
406
-
407
- # Try English variants
408
- for lang_code in ['en', 'en-US', 'en-GB', 'a.en']:
409
- try:
410
- transcript = transcript_list.find_transcript([lang_code])
411
- if transcript:
412
- text = " ".join([t['text'] for t in transcript.fetch()])
413
- break
414
- except:
415
- continue
416
-
417
- # Try auto-generated if no manual transcript
418
- if not text:
419
- try:
420
- transcript = transcript_list.find_generated_transcript(['en'])
421
- text = " ".join([t['text'] for t in transcript.fetch()])
422
- except:
423
- # Try translation as last resort
424
- try:
425
- manual_transcript = transcript_list.find_manually_created_transcript()
426
- translated = manual_transcript.translate('en')
427
- text = " ".join([t['text'] for t in translated.fetch()])
428
- except:
429
- raise Exception("No available transcripts found")
430
 
 
 
 
 
 
 
431
  except Exception as e:
432
- return thumbnail, f"⚠️ No transcripts available: {str(e)}", sentiment_label, recommendations
433
 
434
  if not text:
435
- return thumbnail, "⚠️ Could not extract transcript text", sentiment_label, recommendations
 
436
 
437
  # Process valid transcript
438
  try:
@@ -451,9 +446,16 @@ def process_youtube_video(url="", keywords=""):
451
  f"Subjectivity: {subjectivity:.2f}"
452
  )
453
 
454
- # Generate summary
455
  model = genai.GenerativeModel("gemini-pro")
456
- summary = model.generate_content(f"Summarize this content: {cleaned_text[:4000]}").text
 
 
 
 
 
 
 
457
 
458
  except Exception as e:
459
  return thumbnail, f"⚠️ Error processing content: {str(e)}", sentiment_label, recommendations
@@ -468,15 +470,6 @@ def process_youtube_video(url="", keywords=""):
468
  print(f"Debug - Main Error: {str(e)}")
469
  return None, f"Error: {str(e)}", "N/A", ""
470
 
471
-
472
-
473
-
474
-
475
-
476
-
477
-
478
-
479
-
480
  def get_recommendations(keywords, max_results=5):
481
  if not keywords:
482
  return "Please provide search keywords"
 
383
  summary = ""
384
  sentiment_label = "N/A"
385
  recommendations = ""
 
386
 
387
  if not url.strip():
388
  return thumbnail, "Please enter a YouTube URL", sentiment_label, recommendations
 
393
 
394
  thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
395
 
396
+ # Multiple methods to get transcript
397
+ text = ""
398
+ error_messages = []
399
+
400
+ # Method 1: YouTube Transcript API
401
  try:
402
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
403
  text = " ".join([t['text'] for t in transcript])
404
+ except Exception as e:
405
+ error_messages.append(str(e))
406
+
407
+ # Method 2: PyTube if first method fails
408
+ if not text:
409
  try:
410
+ yt = YouTube(url)
411
+ captions = yt.captions
412
+ if 'en' in captions:
413
+ text = captions['en'].generate_srt_captions()
414
+ elif 'a.en' in captions:
415
+ text = captions['a.en'].generate_srt_captions()
416
+ except Exception as e:
417
+ error_messages.append(str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
 
419
+ # Method 3: Try auto-generated captions
420
+ if not text:
421
+ try:
422
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
423
+ auto_transcript = transcript_list.find_generated_transcript(['en'])
424
+ text = " ".join([t['text'] for t in auto_transcript.fetch()])
425
  except Exception as e:
426
+ error_messages.append(str(e))
427
 
428
  if not text:
429
+ error_msg = "\n".join(error_messages)
430
+ return thumbnail, f"⚠️ Could not access video content. Please try another video with English subtitles.\nDetails: {error_msg}", sentiment_label, recommendations
431
 
432
  # Process valid transcript
433
  try:
 
446
  f"Subjectivity: {subjectivity:.2f}"
447
  )
448
 
449
+ # Generate summary using Gemini
450
  model = genai.GenerativeModel("gemini-pro")
451
+ prompt = f"""Provide a comprehensive summary of this content in clear points:
452
+ {cleaned_text[:4000]}
453
+ Include:
454
+ 1. Main topics
455
+ 2. Key points
456
+ 3. Important takeaways"""
457
+
458
+ summary = model.generate_content(prompt).text
459
 
460
  except Exception as e:
461
  return thumbnail, f"⚠️ Error processing content: {str(e)}", sentiment_label, recommendations
 
470
  print(f"Debug - Main Error: {str(e)}")
471
  return None, f"Error: {str(e)}", "N/A", ""
472
 
 
 
 
 
 
 
 
 
 
473
  def get_recommendations(keywords, max_results=5):
474
  if not keywords:
475
  return "Please provide search keywords"