Mohit0199's picture
Create app.py
4b24268 verified
import torch
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from transformers import pipeline
from youtube_comment_downloader import YoutubeCommentDownloader
import re
analyzer = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
def extract_video_id(url):
# Regex for various YouTube URL patterns
patterns = [
r'(?:https?://)?(?:www\.)?youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})', # Standard YouTube URL
r'(?:https?://)?(?:www\.)?youtube\.com/embed/([a-zA-Z0-9_-]{11})', # Embedded URL
r'(?:https?://)?youtu\.be/([a-zA-Z0-9_-]{11})' # Shortened URL
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
def fetch_comments(url, max_comments=100):
# Extract video ID
video_id = extract_video_id(url)
if not video_id:
return None, "Invalid YouTube URL" # Return None if URL is invalid
# Initialize downloader
downloader = YoutubeCommentDownloader()
# Fetch comments
try:
comments = []
for comment in downloader.get_comments(video_id):
comments.append(comment["text"])
if len(comments) >= max_comments:
break
df = pd.DataFrame(comments, columns=["comments"])
return df # Return DataFrame and no error message
except Exception as e:
return None, f"Error: {str(e)}" # Return None and error message if an exception occurs
def analyze_comments(video_url):
comments_df = fetch_comments(video_url)
if comments_df.empty:
return "No comments found.", None, None
# Analyze sentiments
comments_df["sentiment"] = comments_df["comments"].apply(lambda x: analyzer(x)[0]["label"])
positive_count = comments_df["sentiment"].value_counts().get("POSITIVE", 0)
negative_count = comments_df["sentiment"].value_counts().get("NEGATIVE", 0)
# Calculate the average sentiment based on dominance (Positive or Negative)
total_comments = len(comments_df)
if total_comments > 0:
if positive_count > negative_count:
avg_sentiment = "Positive" # Dominant sentiment is Positive
elif negative_count > positive_count:
avg_sentiment = "Negative" # Dominant sentiment is Negative
else:
avg_sentiment = "Neutral" # Equal number of Positive and Negative comments
else:
avg_sentiment = "Neutral" # If there are no comments
# Create a pie chart
fig, ax = plt.subplots(figsize=(6, 6))
ax.pie(
[positive_count, negative_count],
labels=["Positive", "Negative"],
autopct='%1.1f%%',
colors=['#4CAF50', '#F44336']
)
ax.set_title("Sentiment Distribution")
# Return analysis results along with the pie chart
sentiment_summary = f"Total Positive Comments: {positive_count}\nTotal Negative Comments: {negative_count}\nAverage Sentiment: {avg_sentiment}"
return sentiment_summary, fig
# Gradio Interface
gr.close_all()
demo = gr.Interface(
fn=analyze_comments,
inputs=[gr.Textbox(label="YouTube Video URL", placeholder="Enter YouTube video URL")],
outputs=[
gr.Textbox(label="Sentiment Summary"),
gr.Plot(label="Sentiment Pie Chart")
],
title="YouTube Comment Sentiment Analyzer",
description="Analyze the sentiments of YouTube video comments and view detailed analysis."
)
demo.launch()