Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -378,7 +378,6 @@ from googleapiclient.discovery import build
|
|
| 378 |
# print(f"\nTotal Sentences Analyzed: {sentiment['total_sentences']}")
|
| 379 |
#####################################################################################################
|
| 380 |
from pytube import YouTube
|
| 381 |
-
import os
|
| 382 |
import re
|
| 383 |
from textblob import TextBlob
|
| 384 |
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
|
|
@@ -400,18 +399,20 @@ def process_youtube_video(url="", keywords=""):
|
|
| 400 |
|
| 401 |
thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
|
| 402 |
|
| 403 |
-
#
|
| 404 |
text = ""
|
| 405 |
error_messages = []
|
| 406 |
|
| 407 |
-
# Method 1: YouTube Transcript API
|
| 408 |
try:
|
| 409 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
| 410 |
text = " ".join([t['text'] for t in transcript])
|
|
|
|
|
|
|
| 411 |
except Exception as e:
|
| 412 |
-
error_messages.append(str(e))
|
| 413 |
|
| 414 |
-
# Method 2: PyTube if first method fails
|
| 415 |
if not text:
|
| 416 |
try:
|
| 417 |
yt = YouTube(url)
|
|
@@ -421,29 +422,30 @@ def process_youtube_video(url="", keywords=""):
|
|
| 421 |
elif 'a.en' in captions:
|
| 422 |
text = captions['a.en'].generate_srt_captions()
|
| 423 |
except Exception as e:
|
| 424 |
-
error_messages.append(str(e))
|
| 425 |
|
| 426 |
-
# Method 3:
|
| 427 |
if not text:
|
| 428 |
try:
|
| 429 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 430 |
auto_transcript = transcript_list.find_generated_transcript(['en'])
|
| 431 |
text = " ".join([t['text'] for t in auto_transcript.fetch()])
|
| 432 |
except Exception as e:
|
| 433 |
-
error_messages.append(str(e))
|
| 434 |
|
|
|
|
| 435 |
if not text:
|
| 436 |
error_msg = "\n".join(error_messages)
|
| 437 |
-
return thumbnail, f"⚠️ Could not access video content.
|
| 438 |
|
| 439 |
# Process valid transcript
|
| 440 |
try:
|
| 441 |
-
# Clean text
|
| 442 |
cleaned_text = re.sub(r'[^\w\s.]', '', text)
|
| 443 |
cleaned_text = ' '.join(cleaned_text.split())
|
| 444 |
|
| 445 |
# Sentiment Analysis
|
| 446 |
-
blob = TextBlob(cleaned_text[:2000])
|
| 447 |
polarity = blob.sentiment.polarity
|
| 448 |
subjectivity = blob.sentiment.subjectivity
|
| 449 |
|
|
@@ -453,7 +455,8 @@ def process_youtube_video(url="", keywords=""):
|
|
| 453 |
f"Subjectivity: {subjectivity:.2f}"
|
| 454 |
)
|
| 455 |
|
| 456 |
-
# Generate summary using Gemini
|
|
|
|
| 457 |
model = genai.GenerativeModel("gemini-pro")
|
| 458 |
prompt = f"""Provide a comprehensive summary of this content in clear points:
|
| 459 |
{cleaned_text[:4000]}
|
|
@@ -467,16 +470,30 @@ def process_youtube_video(url="", keywords=""):
|
|
| 467 |
except Exception as e:
|
| 468 |
return thumbnail, f"⚠️ Error processing content: {str(e)}", sentiment_label, recommendations
|
| 469 |
|
| 470 |
-
# Get recommendations
|
| 471 |
if keywords.strip():
|
| 472 |
recommendations = get_recommendations(keywords)
|
| 473 |
|
| 474 |
return thumbnail, summary, sentiment_label, recommendations
|
| 475 |
|
| 476 |
except Exception as e:
|
| 477 |
-
print(f"Debug - Main Error: {str(e)}")
|
| 478 |
return None, f"Error: {str(e)}", "N/A", ""
|
| 479 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
def get_recommendations(keywords, max_results=5):
|
| 481 |
if not keywords:
|
| 482 |
return "Please provide search keywords"
|
|
|
|
| 378 |
# print(f"\nTotal Sentences Analyzed: {sentiment['total_sentences']}")
|
| 379 |
#####################################################################################################
|
| 380 |
from pytube import YouTube
|
|
|
|
| 381 |
import re
|
| 382 |
from textblob import TextBlob
|
| 383 |
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
|
|
|
|
| 399 |
|
| 400 |
thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
|
| 401 |
|
| 402 |
+
# Initialize variables for transcript fetching
|
| 403 |
text = ""
|
| 404 |
error_messages = []
|
| 405 |
|
| 406 |
+
# Method 1: Using YouTube Transcript API
|
| 407 |
try:
|
| 408 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
| 409 |
text = " ".join([t['text'] for t in transcript])
|
| 410 |
+
except (TranscriptsDisabled, NoTranscriptFound) as e:
|
| 411 |
+
error_messages.append(f"Transcript API error: {str(e)}")
|
| 412 |
except Exception as e:
|
| 413 |
+
error_messages.append(f"Transcript API general error: {str(e)}")
|
| 414 |
|
| 415 |
+
# Method 2: Using PyTube if the first method fails
|
| 416 |
if not text:
|
| 417 |
try:
|
| 418 |
yt = YouTube(url)
|
|
|
|
| 422 |
elif 'a.en' in captions:
|
| 423 |
text = captions['a.en'].generate_srt_captions()
|
| 424 |
except Exception as e:
|
| 425 |
+
error_messages.append(f"PyTube error: {str(e)}")
|
| 426 |
|
| 427 |
+
# Method 3: Using auto-generated captions via Transcript API
|
| 428 |
if not text:
|
| 429 |
try:
|
| 430 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 431 |
auto_transcript = transcript_list.find_generated_transcript(['en'])
|
| 432 |
text = " ".join([t['text'] for t in auto_transcript.fetch()])
|
| 433 |
except Exception as e:
|
| 434 |
+
error_messages.append(f"Auto-generated captions error: {str(e)}")
|
| 435 |
|
| 436 |
+
# Check if transcript was successfully fetched
|
| 437 |
if not text:
|
| 438 |
error_msg = "\n".join(error_messages)
|
| 439 |
+
return thumbnail, f"⚠️ Could not access video content. Details: {error_msg}", sentiment_label, recommendations
|
| 440 |
|
| 441 |
# Process valid transcript
|
| 442 |
try:
|
| 443 |
+
# Clean text for analysis
|
| 444 |
cleaned_text = re.sub(r'[^\w\s.]', '', text)
|
| 445 |
cleaned_text = ' '.join(cleaned_text.split())
|
| 446 |
|
| 447 |
# Sentiment Analysis
|
| 448 |
+
blob = TextBlob(cleaned_text[:2000]) # Analyze first 2000 characters for performance
|
| 449 |
polarity = blob.sentiment.polarity
|
| 450 |
subjectivity = blob.sentiment.subjectivity
|
| 451 |
|
|
|
|
| 455 |
f"Subjectivity: {subjectivity:.2f}"
|
| 456 |
)
|
| 457 |
|
| 458 |
+
# Generate summary using Gemini (Generative AI)
|
| 459 |
+
genai.configure(api_key="AIzaSyDw4LHOzdkRrU7GunTTC3_f6iS1OsAbmKA") # Replace with your actual API key
|
| 460 |
model = genai.GenerativeModel("gemini-pro")
|
| 461 |
prompt = f"""Provide a comprehensive summary of this content in clear points:
|
| 462 |
{cleaned_text[:4000]}
|
|
|
|
| 470 |
except Exception as e:
|
| 471 |
return thumbnail, f"⚠️ Error processing content: {str(e)}", sentiment_label, recommendations
|
| 472 |
|
| 473 |
+
# Get recommendations based on keywords
|
| 474 |
if keywords.strip():
|
| 475 |
recommendations = get_recommendations(keywords)
|
| 476 |
|
| 477 |
return thumbnail, summary, sentiment_label, recommendations
|
| 478 |
|
| 479 |
except Exception as e:
|
|
|
|
| 480 |
return None, f"Error: {str(e)}", "N/A", ""
|
| 481 |
|
| 482 |
+
def extract_video_id(url):
|
| 483 |
+
"""
|
| 484 |
+
Extracts the video ID from a YouTube URL.
|
| 485 |
+
"""
|
| 486 |
+
match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
|
| 487 |
+
return match.group(1) if match else None
|
| 488 |
+
|
| 489 |
+
def get_recommendations(keywords):
|
| 490 |
+
"""
|
| 491 |
+
Fetches related video recommendations based on the provided keywords.
|
| 492 |
+
"""
|
| 493 |
+
# Placeholder for fetching recommendations based on keywords
|
| 494 |
+
return f"Recommendations for: {keywords}" # Dummy return for now
|
| 495 |
+
|
| 496 |
+
|
| 497 |
def get_recommendations(keywords, max_results=5):
|
| 498 |
if not keywords:
|
| 499 |
return "Please provide search keywords"
|