import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import pandas as pd import numpy as np import googleapiclient.discovery from dotenv import load_dotenv import os load_dotenv() api_key = os.getenv("DEVELOPER_KEY") # Initialize the tokenizer and model tokenizer = AutoTokenizer.from_pretrained( "nlptown/bert-base-multilingual-uncased-sentiment" ) model = AutoModelForSequenceClassification.from_pretrained( "nlptown/bert-base-multilingual-uncased-sentiment" ) # Function to get comments from a YouTube video def get_comments(youtube, **kwargs): comments = [] results = youtube.commentThreads().list(**kwargs).execute() while results: for item in results["items"]: comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"] comments.append(comment) # Check if there are more comments if "nextPageToken" in results: kwargs["pageToken"] = results["nextPageToken"] results = youtube.commentThreads().list(**kwargs).execute() else: break return comments # Function to get sentiment score def sentiment_score(comment): sentiment = model(tokenizer.encode(comment, return_tensors="pt")) return torch.argmax(sentiment.logits).item() + 1 # Gradio function to analyze video comments def analyze_video(video_id): video_id = video_id.split("v=")[1] youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key) comments = get_comments( youtube, part="snippet", videoId=video_id, textFormat="plainText" ) if not comments: return "No comments found." df = pd.DataFrame(np.array(comments[:200]), columns=["comments"]) print(df.head()) df["sentiment"] = df["comments"].apply(lambda x: sentiment_score(x[:512])) print(df.head()) sentiment_counts = df["sentiment"].value_counts().sort_index() sentiment_dict = { f"Sentiment {index}": count for index, count in sentiment_counts.items() } # Returning as a dictionary, where the key is the sentiment score (1-5) and value is the count return sentiment_dict # Creating the Gradio Interface gr_interface = gr.Interface( fn=analyze_video, inputs=[gr.Textbox(label="YouTube Video ID")], outputs="json", title="YouTube Comment Sentiment Analysis", description="Input a YouTube video ID and your API key to analyze the sentiment of the comments.", ) # Launch the Gradio app gr_interface.launch()