Spaces:
Sleeping
Sleeping
File size: 2,296 Bytes
73dbe48 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
import numpy as np
import googleapiclient.discovery
# Initialize the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
# Function to get comments from a YouTube video
def get_comments(youtube, **kwargs):
comments = []
results = youtube.commentThreads().list(**kwargs).execute()
while results:
for item in results['items']:
comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
comments.append(comment)
# Check if there are more comments
if 'nextPageToken' in results:
kwargs['pageToken'] = results['nextPageToken']
results = youtube.commentThreads().list(**kwargs).execute()
else:
break
return comments
# Function to get sentiment score
def sentiment_score(comment):
sentiment = model(tokenizer.encode(comment, return_tensors='pt'))
return torch.argmax(sentiment.logits).item() + 1
# Gradio function to analyze video comments
def analyze_video(video_id, api_key):
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
comments = get_comments(youtube, part="snippet", videoId=video_id, textFormat="plainText")
if not comments:
return "No comments found."
df = pd.DataFrame(np.array(comments[:100]), columns=['comments'])
df['sentiment'] = df['comments'].apply(lambda x: sentiment_score(x[:512]))
sentiment_counts = df['sentiment'].value_counts().sort_index().to_dict()
# Returning as a dictionary, where the key is the sentiment score (1-5) and value is the count
return sentiment_counts
# Creating the Gradio Interface
gr_interface = gr.Interface(
fn=analyze_video,
inputs=[gr.Textbox(label="YouTube Video ID"), gr.Textbox(label="YouTube API Key")],
outputs="json",
title="YouTube Comment Sentiment Analysis",
description="Input a YouTube video ID and your API key to analyze the sentiment of the comments."
)
# Launch the Gradio app
gr_interface.launch()
|