Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| import pandas as pd | |
| import numpy as np | |
| import googleapiclient.discovery | |
| from dotenv import load_dotenv | |
| import os | |
| load_dotenv() | |
| api_key = os.getenv("DEVELOPER_KEY") | |
| # Initialize the tokenizer and model | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| "nlptown/bert-base-multilingual-uncased-sentiment" | |
| ) | |
| model = AutoModelForSequenceClassification.from_pretrained( | |
| "nlptown/bert-base-multilingual-uncased-sentiment" | |
| ) | |
| # Function to get comments from a YouTube video | |
| def get_comments(youtube, **kwargs): | |
| comments = [] | |
| results = youtube.commentThreads().list(**kwargs).execute() | |
| while results: | |
| for item in results["items"]: | |
| comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"] | |
| comments.append(comment) | |
| # Check if there are more comments | |
| if "nextPageToken" in results: | |
| kwargs["pageToken"] = results["nextPageToken"] | |
| results = youtube.commentThreads().list(**kwargs).execute() | |
| else: | |
| break | |
| return comments | |
| # Function to get sentiment score | |
| def sentiment_score(comment): | |
| sentiment = model(tokenizer.encode(comment, return_tensors="pt")) | |
| return torch.argmax(sentiment.logits).item() + 1 | |
| # Gradio function to analyze video comments | |
| def analyze_video(video_id): | |
| video_id = video_id.split("v=")[1] | |
| youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key) | |
| comments = get_comments( | |
| youtube, part="snippet", videoId=video_id, textFormat="plainText" | |
| ) | |
| if not comments: | |
| return "No comments found." | |
| df = pd.DataFrame(np.array(comments[:200]), columns=["comments"]) | |
| print(df.head()) | |
| df["sentiment"] = df["comments"].apply(lambda x: sentiment_score(x[:512])) | |
| print(df.head()) | |
| sentiment_counts = df["sentiment"].value_counts().sort_index() | |
| sentiment_dict = { | |
| f"Sentiment {index}": count for index, count in sentiment_counts.items() | |
| } | |
| # Returning as a dictionary, where the key is the sentiment score (1-5) and value is the count | |
| return sentiment_dict | |
| # Creating the Gradio Interface | |
| gr_interface = gr.Interface( | |
| fn=analyze_video, | |
| inputs=[gr.Textbox(label="YouTube Video ID")], | |
| outputs="json", | |
| title="YouTube Comment Sentiment Analysis", | |
| description="Input a YouTube video ID and your API key to analyze the sentiment of the comments.", | |
| ) | |
| # Launch the Gradio app | |
| gr_interface.launch() | |