Sayiqa commited on
Commit
226349c
·
verified ·
1 Parent(s): cccb609

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -14
app.py CHANGED
@@ -378,7 +378,6 @@ from googleapiclient.discovery import build
378
  # print(f"\nTotal Sentences Analyzed: {sentiment['total_sentences']}")
379
  #####################################################################################################
380
  from pytube import YouTube
381
- import os
382
  import re
383
  from textblob import TextBlob
384
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
@@ -400,18 +399,20 @@ def process_youtube_video(url="", keywords=""):
400
 
401
  thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
402
 
403
- # Multiple methods to get transcript
404
  text = ""
405
  error_messages = []
406
 
407
- # Method 1: YouTube Transcript API
408
  try:
409
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
410
  text = " ".join([t['text'] for t in transcript])
 
 
411
  except Exception as e:
412
- error_messages.append(str(e))
413
 
414
- # Method 2: PyTube if first method fails
415
  if not text:
416
  try:
417
  yt = YouTube(url)
@@ -421,29 +422,30 @@ def process_youtube_video(url="", keywords=""):
421
  elif 'a.en' in captions:
422
  text = captions['a.en'].generate_srt_captions()
423
  except Exception as e:
424
- error_messages.append(str(e))
425
 
426
- # Method 3: Try auto-generated captions
427
  if not text:
428
  try:
429
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
430
  auto_transcript = transcript_list.find_generated_transcript(['en'])
431
  text = " ".join([t['text'] for t in auto_transcript.fetch()])
432
  except Exception as e:
433
- error_messages.append(str(e))
434
 
 
435
  if not text:
436
  error_msg = "\n".join(error_messages)
437
- return thumbnail, f"⚠️ Could not access video content. Please try another video with English subtitles.\nDetails: {error_msg}", sentiment_label, recommendations
438
 
439
  # Process valid transcript
440
  try:
441
- # Clean text
442
  cleaned_text = re.sub(r'[^\w\s.]', '', text)
443
  cleaned_text = ' '.join(cleaned_text.split())
444
 
445
  # Sentiment Analysis
446
- blob = TextBlob(cleaned_text[:2000])
447
  polarity = blob.sentiment.polarity
448
  subjectivity = blob.sentiment.subjectivity
449
 
@@ -453,7 +455,8 @@ def process_youtube_video(url="", keywords=""):
453
  f"Subjectivity: {subjectivity:.2f}"
454
  )
455
 
456
- # Generate summary using Gemini
 
457
  model = genai.GenerativeModel("gemini-pro")
458
  prompt = f"""Provide a comprehensive summary of this content in clear points:
459
  {cleaned_text[:4000]}
@@ -467,16 +470,30 @@ def process_youtube_video(url="", keywords=""):
467
  except Exception as e:
468
  return thumbnail, f"⚠️ Error processing content: {str(e)}", sentiment_label, recommendations
469
 
470
- # Get recommendations
471
  if keywords.strip():
472
  recommendations = get_recommendations(keywords)
473
 
474
  return thumbnail, summary, sentiment_label, recommendations
475
 
476
  except Exception as e:
477
- print(f"Debug - Main Error: {str(e)}")
478
  return None, f"Error: {str(e)}", "N/A", ""
479
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
480
  def get_recommendations(keywords, max_results=5):
481
  if not keywords:
482
  return "Please provide search keywords"
 
378
  # print(f"\nTotal Sentences Analyzed: {sentiment['total_sentences']}")
379
  #####################################################################################################
380
  from pytube import YouTube
 
381
  import re
382
  from textblob import TextBlob
383
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 
399
 
400
  thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
401
 
402
+ # Initialize variables for transcript fetching
403
  text = ""
404
  error_messages = []
405
 
406
+ # Method 1: Using YouTube Transcript API
407
  try:
408
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
409
  text = " ".join([t['text'] for t in transcript])
410
+ except (TranscriptsDisabled, NoTranscriptFound) as e:
411
+ error_messages.append(f"Transcript API error: {str(e)}")
412
  except Exception as e:
413
+ error_messages.append(f"Transcript API general error: {str(e)}")
414
 
415
+ # Method 2: Using PyTube if the first method fails
416
  if not text:
417
  try:
418
  yt = YouTube(url)
 
422
  elif 'a.en' in captions:
423
  text = captions['a.en'].generate_srt_captions()
424
  except Exception as e:
425
+ error_messages.append(f"PyTube error: {str(e)}")
426
 
427
+ # Method 3: Using auto-generated captions via Transcript API
428
  if not text:
429
  try:
430
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
431
  auto_transcript = transcript_list.find_generated_transcript(['en'])
432
  text = " ".join([t['text'] for t in auto_transcript.fetch()])
433
  except Exception as e:
434
+ error_messages.append(f"Auto-generated captions error: {str(e)}")
435
 
436
+ # Check if transcript was successfully fetched
437
  if not text:
438
  error_msg = "\n".join(error_messages)
439
+ return thumbnail, f"⚠️ Could not access video content. Details: {error_msg}", sentiment_label, recommendations
440
 
441
  # Process valid transcript
442
  try:
443
+ # Clean text for analysis
444
  cleaned_text = re.sub(r'[^\w\s.]', '', text)
445
  cleaned_text = ' '.join(cleaned_text.split())
446
 
447
  # Sentiment Analysis
448
+ blob = TextBlob(cleaned_text[:2000]) # Analyze first 2000 characters for performance
449
  polarity = blob.sentiment.polarity
450
  subjectivity = blob.sentiment.subjectivity
451
 
 
455
  f"Subjectivity: {subjectivity:.2f}"
456
  )
457
 
458
+ # Generate summary using Gemini (Generative AI)
459
+ genai.configure(api_key="AIzaSyDw4LHOzdkRrU7GunTTC3_f6iS1OsAbmKA") # Replace with your actual API key
460
  model = genai.GenerativeModel("gemini-pro")
461
  prompt = f"""Provide a comprehensive summary of this content in clear points:
462
  {cleaned_text[:4000]}
 
470
  except Exception as e:
471
  return thumbnail, f"⚠️ Error processing content: {str(e)}", sentiment_label, recommendations
472
 
473
+ # Get recommendations based on keywords
474
  if keywords.strip():
475
  recommendations = get_recommendations(keywords)
476
 
477
  return thumbnail, summary, sentiment_label, recommendations
478
 
479
  except Exception as e:
 
480
  return None, f"Error: {str(e)}", "N/A", ""
481
 
482
+ def extract_video_id(url):
483
+ """
484
+ Extracts the video ID from a YouTube URL.
485
+ """
486
+ match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
487
+ return match.group(1) if match else None
488
+
489
+ def get_recommendations(keywords):
490
+ """
491
+ Fetches related video recommendations based on the provided keywords.
492
+ """
493
+ # Placeholder for fetching recommendations based on keywords
494
+ return f"Recommendations for: {keywords}" # Dummy return for now
495
+
496
+
497
  def get_recommendations(keywords, max_results=5):
498
  if not keywords:
499
  return "Please provide search keywords"