Spaces:

Mohit0199
/

youtube_comments_sentiment_analyzer

Sleeping

File size: 3,543 Bytes

4b24268

import torch
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from transformers import pipeline
from youtube_comment_downloader import YoutubeCommentDownloader
import re

analyzer = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")


def extract_video_id(url):
    # Regex for various YouTube URL patterns
    patterns = [
        r'(?:https?://)?(?:www\.)?youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})',  # Standard YouTube URL
        r'(?:https?://)?(?:www\.)?youtube\.com/embed/([a-zA-Z0-9_-]{11})',     # Embedded URL
        r'(?:https?://)?youtu\.be/([a-zA-Z0-9_-]{11})'                        # Shortened URL
    ]
    
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    
    return None


def fetch_comments(url, max_comments=100):
    # Extract video ID
    video_id = extract_video_id(url)
    if not video_id:
        return None, "Invalid YouTube URL"  # Return None if URL is invalid

    # Initialize downloader
    downloader = YoutubeCommentDownloader()

    # Fetch comments
    try:
        comments = []
        for comment in downloader.get_comments(video_id):
            comments.append(comment["text"])
            if len(comments) >= max_comments:
                break
        
        df = pd.DataFrame(comments, columns=["comments"])
        return df # Return DataFrame and no error message

    except Exception as e:
        return None, f"Error: {str(e)}"  # Return None and error message if an exception occurs


def analyze_comments(video_url):
    comments_df = fetch_comments(video_url)

    if comments_df.empty:
        return "No comments found.", None, None

    # Analyze sentiments
    comments_df["sentiment"] = comments_df["comments"].apply(lambda x: analyzer(x)[0]["label"])

    positive_count = comments_df["sentiment"].value_counts().get("POSITIVE", 0)
    negative_count = comments_df["sentiment"].value_counts().get("NEGATIVE", 0)

    # Calculate the average sentiment based on dominance (Positive or Negative)
    total_comments = len(comments_df)
    if total_comments > 0:
        if positive_count > negative_count:
            avg_sentiment = "Positive"  # Dominant sentiment is Positive
        elif negative_count > positive_count:
            avg_sentiment = "Negative"  # Dominant sentiment is Negative
        else:
            avg_sentiment = "Neutral"  # Equal number of Positive and Negative comments
    else:
        avg_sentiment = "Neutral"  # If there are no comments

    # Create a pie chart
    fig, ax = plt.subplots(figsize=(6, 6))
    ax.pie(
        [positive_count, negative_count],
        labels=["Positive", "Negative"],
        autopct='%1.1f%%',
        colors=['#4CAF50', '#F44336']
    )
    ax.set_title("Sentiment Distribution")

    # Return analysis results along with the pie chart
    sentiment_summary = f"Total Positive Comments: {positive_count}\nTotal Negative Comments: {negative_count}\nAverage Sentiment: {avg_sentiment}"

    return sentiment_summary, fig


# Gradio Interface
gr.close_all()

demo = gr.Interface(
    fn=analyze_comments,
    inputs=[gr.Textbox(label="YouTube Video URL", placeholder="Enter YouTube video URL")],
    outputs=[
        gr.Textbox(label="Sentiment Summary"),
        gr.Plot(label="Sentiment Pie Chart")
    ],
    title="YouTube Comment Sentiment Analyzer",
    description="Analyze the sentiments of YouTube video comments and view detailed analysis."
)

demo.launch()