rya23's picture
Update app.py
225b53c verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
import numpy as np
import googleapiclient.discovery
from dotenv import load_dotenv
import os
load_dotenv()
api_key = os.getenv("DEVELOPER_KEY")
# Initialize the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(
"nlptown/bert-base-multilingual-uncased-sentiment"
)
model = AutoModelForSequenceClassification.from_pretrained(
"nlptown/bert-base-multilingual-uncased-sentiment"
)
# Function to get comments from a YouTube video
def get_comments(youtube, **kwargs):
comments = []
results = youtube.commentThreads().list(**kwargs).execute()
while results:
for item in results["items"]:
comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
comments.append(comment)
# Check if there are more comments
if "nextPageToken" in results:
kwargs["pageToken"] = results["nextPageToken"]
results = youtube.commentThreads().list(**kwargs).execute()
else:
break
return comments
# Function to get sentiment score
def sentiment_score(comment):
sentiment = model(tokenizer.encode(comment, return_tensors="pt"))
return torch.argmax(sentiment.logits).item() + 1
# Gradio function to analyze video comments
def analyze_video(video_id):
video_id = video_id.split("v=")[1]
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
comments = get_comments(
youtube, part="snippet", videoId=video_id, textFormat="plainText"
)
if not comments:
return "No comments found."
df = pd.DataFrame(np.array(comments[:200]), columns=["comments"])
print(df.head())
df["sentiment"] = df["comments"].apply(lambda x: sentiment_score(x[:512]))
print(df.head())
sentiment_counts = df["sentiment"].value_counts().sort_index()
sentiment_dict = {
f"Sentiment {index}": count for index, count in sentiment_counts.items()
}
# Returning as a dictionary, where the key is the sentiment score (1-5) and value is the count
return sentiment_dict
# Creating the Gradio Interface
gr_interface = gr.Interface(
fn=analyze_video,
inputs=[gr.Textbox(label="YouTube Video ID")],
outputs="json",
title="YouTube Comment Sentiment Analysis",
description="Input a YouTube video ID and your API key to analyze the sentiment of the comments.",
)
# Launch the Gradio app
gr_interface.launch()