Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -581,17 +581,24 @@ def get_recommendations(keywords, max_results=5):
|
|
| 581 |
|
| 582 |
def process_youtube_video(url):
|
| 583 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
thumbnail = None
|
| 585 |
summary = "No transcript available"
|
| 586 |
sentiment_label = "N/A"
|
| 587 |
|
|
|
|
| 588 |
video_id = extract_video_id(url)
|
| 589 |
if not video_id:
|
| 590 |
return None, "Invalid YouTube URL", "N/A"
|
| 591 |
|
|
|
|
| 592 |
thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
|
| 593 |
|
| 594 |
try:
|
|
|
|
| 595 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 596 |
transcript = None
|
| 597 |
try:
|
|
@@ -599,18 +606,21 @@ def process_youtube_video(url):
|
|
| 599 |
except:
|
| 600 |
transcript = transcript_list.find_generated_transcript(['en'])
|
| 601 |
|
|
|
|
| 602 |
text = " ".join([t['text'] for t in transcript.fetch()])
|
| 603 |
if not text.strip():
|
| 604 |
raise ValueError("Transcript is empty")
|
| 605 |
|
|
|
|
| 606 |
cleaned_text = clean_text_for_analysis(text)
|
| 607 |
-
|
| 608 |
sentiment = TextBlob(cleaned_text).sentiment
|
| 609 |
sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
|
| 610 |
|
|
|
|
| 611 |
summary = f"Summary: {cleaned_text[:400]}..."
|
| 612 |
|
| 613 |
except (TranscriptsDisabled, NoTranscriptFound):
|
|
|
|
| 614 |
metadata = get_video_metadata(video_id)
|
| 615 |
summary = metadata.get("description", "No subtitles available")
|
| 616 |
sentiment_label = "N/A"
|
|
@@ -619,6 +629,8 @@ def process_youtube_video(url):
|
|
| 619 |
|
| 620 |
except Exception as e:
|
| 621 |
return None, f"Error: {str(e)}", "N/A"
|
|
|
|
|
|
|
| 622 |
url = "https://www.youtube.com/watch?v=q1XFm21I-VQ"
|
| 623 |
thumbnail, summary, sentiment = process_youtube_video(url)
|
| 624 |
print(f"Thumbnail: {thumbnail}\n")
|
|
|
|
| 581 |
|
| 582 |
def process_youtube_video(url):
|
| 583 |
try:
|
| 584 |
+
import re
|
| 585 |
+
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
|
| 586 |
+
from textblob import TextBlob
|
| 587 |
+
|
| 588 |
thumbnail = None
|
| 589 |
summary = "No transcript available"
|
| 590 |
sentiment_label = "N/A"
|
| 591 |
|
| 592 |
+
# Extract video ID
|
| 593 |
video_id = extract_video_id(url)
|
| 594 |
if not video_id:
|
| 595 |
return None, "Invalid YouTube URL", "N/A"
|
| 596 |
|
| 597 |
+
# Generate thumbnail URL
|
| 598 |
thumbnail = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
|
| 599 |
|
| 600 |
try:
|
| 601 |
+
# Fetch transcript
|
| 602 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 603 |
transcript = None
|
| 604 |
try:
|
|
|
|
| 606 |
except:
|
| 607 |
transcript = transcript_list.find_generated_transcript(['en'])
|
| 608 |
|
| 609 |
+
# Combine transcript into text
|
| 610 |
text = " ".join([t['text'] for t in transcript.fetch()])
|
| 611 |
if not text.strip():
|
| 612 |
raise ValueError("Transcript is empty")
|
| 613 |
|
| 614 |
+
# Clean and analyze text
|
| 615 |
cleaned_text = clean_text_for_analysis(text)
|
|
|
|
| 616 |
sentiment = TextBlob(cleaned_text).sentiment
|
| 617 |
sentiment_label = f"{'Positive' if sentiment.polarity > 0 else 'Negative' if sentiment.polarity < 0 else 'Neutral'} ({sentiment.polarity:.2f})"
|
| 618 |
|
| 619 |
+
# Summarize text
|
| 620 |
summary = f"Summary: {cleaned_text[:400]}..."
|
| 621 |
|
| 622 |
except (TranscriptsDisabled, NoTranscriptFound):
|
| 623 |
+
# Fall back to metadata if no transcript
|
| 624 |
metadata = get_video_metadata(video_id)
|
| 625 |
summary = metadata.get("description", "No subtitles available")
|
| 626 |
sentiment_label = "N/A"
|
|
|
|
| 629 |
|
| 630 |
except Exception as e:
|
| 631 |
return None, f"Error: {str(e)}", "N/A"
|
| 632 |
+
|
| 633 |
+
# Test the function
|
| 634 |
url = "https://www.youtube.com/watch?v=q1XFm21I-VQ"
|
| 635 |
thumbnail, summary, sentiment = process_youtube_video(url)
|
| 636 |
print(f"Thumbnail: {thumbnail}\n")
|