import torch import gradio as gr import pandas as pd import matplotlib.pyplot as plt from transformers import pipeline from youtube_comment_downloader import YoutubeCommentDownloader import re analyzer = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english") def extract_video_id(url): # Regex for various YouTube URL patterns patterns = [ r'(?:https?://)?(?:www\.)?youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})', # Standard YouTube URL r'(?:https?://)?(?:www\.)?youtube\.com/embed/([a-zA-Z0-9_-]{11})', # Embedded URL r'(?:https?://)?youtu\.be/([a-zA-Z0-9_-]{11})' # Shortened URL ] for pattern in patterns: match = re.search(pattern, url) if match: return match.group(1) return None def fetch_comments(url, max_comments=100): # Extract video ID video_id = extract_video_id(url) if not video_id: return None, "Invalid YouTube URL" # Return None if URL is invalid # Initialize downloader downloader = YoutubeCommentDownloader() # Fetch comments try: comments = [] for comment in downloader.get_comments(video_id): comments.append(comment["text"]) if len(comments) >= max_comments: break df = pd.DataFrame(comments, columns=["comments"]) return df # Return DataFrame and no error message except Exception as e: return None, f"Error: {str(e)}" # Return None and error message if an exception occurs def analyze_comments(video_url): comments_df = fetch_comments(video_url) if comments_df.empty: return "No comments found.", None, None # Analyze sentiments comments_df["sentiment"] = comments_df["comments"].apply(lambda x: analyzer(x)[0]["label"]) positive_count = comments_df["sentiment"].value_counts().get("POSITIVE", 0) negative_count = comments_df["sentiment"].value_counts().get("NEGATIVE", 0) # Calculate the average sentiment based on dominance (Positive or Negative) total_comments = len(comments_df) if total_comments > 0: if positive_count > negative_count: avg_sentiment = "Positive" # Dominant sentiment is Positive elif negative_count > positive_count: avg_sentiment = "Negative" # Dominant sentiment is Negative else: avg_sentiment = "Neutral" # Equal number of Positive and Negative comments else: avg_sentiment = "Neutral" # If there are no comments # Create a pie chart fig, ax = plt.subplots(figsize=(6, 6)) ax.pie( [positive_count, negative_count], labels=["Positive", "Negative"], autopct='%1.1f%%', colors=['#4CAF50', '#F44336'] ) ax.set_title("Sentiment Distribution") # Return analysis results along with the pie chart sentiment_summary = f"Total Positive Comments: {positive_count}\nTotal Negative Comments: {negative_count}\nAverage Sentiment: {avg_sentiment}" return sentiment_summary, fig # Gradio Interface gr.close_all() demo = gr.Interface( fn=analyze_comments, inputs=[gr.Textbox(label="YouTube Video URL", placeholder="Enter YouTube video URL")], outputs=[ gr.Textbox(label="Sentiment Summary"), gr.Plot(label="Sentiment Pie Chart") ], title="YouTube Comment Sentiment Analyzer", description="Analyze the sentiments of YouTube video comments and view detailed analysis." ) demo.launch()