Spaces:
Sleeping
Sleeping
File size: 2,549 Bytes
73dbe48 4dcc4fb 73dbe48 4dcc4fb 73dbe48 4dcc4fb 73dbe48 4dcc4fb 73dbe48 4dcc4fb 73dbe48 4dcc4fb 73dbe48 4dcc4fb 73dbe48 4dcc4fb 73dbe48 4dcc4fb 73dbe48 4dcc4fb 73dbe48 4dcc4fb 7253f0f 4dcc4fb 225b53c 73dbe48 4dcc4fb 73dbe48 4dcc4fb 73dbe48 4dcc4fb 73dbe48 4dcc4fb 73dbe48 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
import numpy as np
import googleapiclient.discovery
from dotenv import load_dotenv
import os
load_dotenv()
api_key = os.getenv("DEVELOPER_KEY")
# Initialize the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(
"nlptown/bert-base-multilingual-uncased-sentiment"
)
model = AutoModelForSequenceClassification.from_pretrained(
"nlptown/bert-base-multilingual-uncased-sentiment"
)
# Function to get comments from a YouTube video
def get_comments(youtube, **kwargs):
comments = []
results = youtube.commentThreads().list(**kwargs).execute()
while results:
for item in results["items"]:
comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
comments.append(comment)
# Check if there are more comments
if "nextPageToken" in results:
kwargs["pageToken"] = results["nextPageToken"]
results = youtube.commentThreads().list(**kwargs).execute()
else:
break
return comments
# Function to get sentiment score
def sentiment_score(comment):
sentiment = model(tokenizer.encode(comment, return_tensors="pt"))
return torch.argmax(sentiment.logits).item() + 1
# Gradio function to analyze video comments
def analyze_video(video_id):
video_id = video_id.split("v=")[1]
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
comments = get_comments(
youtube, part="snippet", videoId=video_id, textFormat="plainText"
)
if not comments:
return "No comments found."
df = pd.DataFrame(np.array(comments[:200]), columns=["comments"])
print(df.head())
df["sentiment"] = df["comments"].apply(lambda x: sentiment_score(x[:512]))
print(df.head())
sentiment_counts = df["sentiment"].value_counts().sort_index()
sentiment_dict = {
f"Sentiment {index}": count for index, count in sentiment_counts.items()
}
# Returning as a dictionary, where the key is the sentiment score (1-5) and value is the count
return sentiment_dict
# Creating the Gradio Interface
gr_interface = gr.Interface(
fn=analyze_video,
inputs=[gr.Textbox(label="YouTube Video ID")],
outputs="json",
title="YouTube Comment Sentiment Analysis",
description="Input a YouTube video ID and your API key to analyze the sentiment of the comments.",
)
# Launch the Gradio app
gr_interface.launch()
|