Spaces:

rya23
/

Sentiment_analysis

Sleeping

File size: 2,549 Bytes

73dbe48
 
 
 
 
 
4dcc4fb
 
73dbe48
4dcc4fb
 
73dbe48
4dcc4fb
 
 
 
 
 
 
73dbe48
 
 
 
 
 
 
4dcc4fb
 
73dbe48
 
 
4dcc4fb
 
73dbe48
 
 
 
 
 
4dcc4fb
73dbe48
 
4dcc4fb
73dbe48
 
4dcc4fb
73dbe48
4dcc4fb
 
 
73dbe48
4dcc4fb
 
 
 
73dbe48
 
 
4dcc4fb
7253f0f
4dcc4fb
225b53c
73dbe48
4dcc4fb
 
 
 
73dbe48
 
4dcc4fb
 
73dbe48
 
 
 
4dcc4fb
73dbe48
 
4dcc4fb
73dbe48

import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
import numpy as np
import googleapiclient.discovery
from dotenv import load_dotenv
import os

load_dotenv()
api_key = os.getenv("DEVELOPER_KEY")
# Initialize the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(
    "nlptown/bert-base-multilingual-uncased-sentiment"
)
model = AutoModelForSequenceClassification.from_pretrained(
    "nlptown/bert-base-multilingual-uncased-sentiment"
)


# Function to get comments from a YouTube video
def get_comments(youtube, **kwargs):
    comments = []
    results = youtube.commentThreads().list(**kwargs).execute()

    while results:
        for item in results["items"]:
            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
            comments.append(comment)

        # Check if there are more comments
        if "nextPageToken" in results:
            kwargs["pageToken"] = results["nextPageToken"]
            results = youtube.commentThreads().list(**kwargs).execute()
        else:
            break

    return comments


# Function to get sentiment score
def sentiment_score(comment):
    sentiment = model(tokenizer.encode(comment, return_tensors="pt"))
    return torch.argmax(sentiment.logits).item() + 1


# Gradio function to analyze video comments
def analyze_video(video_id):

    video_id = video_id.split("v=")[1]
    youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
    comments = get_comments(
        youtube, part="snippet", videoId=video_id, textFormat="plainText"
    )

    if not comments:
        return "No comments found."

    df = pd.DataFrame(np.array(comments[:200]), columns=["comments"])
    print(df.head())
    df["sentiment"] = df["comments"].apply(lambda x: sentiment_score(x[:512]))
    print(df.head())

    sentiment_counts = df["sentiment"].value_counts().sort_index()
    sentiment_dict = {
        f"Sentiment {index}": count for index, count in sentiment_counts.items()
    }

    # Returning as a dictionary, where the key is the sentiment score (1-5) and value is the count
    return sentiment_dict


# Creating the Gradio Interface
gr_interface = gr.Interface(
    fn=analyze_video,
    inputs=[gr.Textbox(label="YouTube Video ID")],
    outputs="json",
    title="YouTube Comment Sentiment Analysis",
    description="Input a YouTube video ID and your API key to analyze the sentiment of the comments.",
)

# Launch the Gradio app
gr_interface.launch()