Spaces:

rya23
/

Sentiment_analysis

Sleeping

App Files Files Community

Sentiment_analysis / app.py

rya23

Update app.py

225b53c verified over 1 year ago

raw

history blame contribute delete

2.55 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import pandas as pd
	import numpy as np
	import googleapiclient.discovery
	from dotenv import load_dotenv
	import os

	load_dotenv()
	api_key = os.getenv("DEVELOPER_KEY")
	# Initialize the tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained(
	"nlptown/bert-base-multilingual-uncased-sentiment"
	)
	model = AutoModelForSequenceClassification.from_pretrained(
	"nlptown/bert-base-multilingual-uncased-sentiment"
	)


	# Function to get comments from a YouTube video
	def get_comments(youtube, **kwargs):
	comments = []
	results = youtube.commentThreads().list(**kwargs).execute()

	while results:
	for item in results["items"]:
	comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
	comments.append(comment)

	# Check if there are more comments
	if "nextPageToken" in results:
	kwargs["pageToken"] = results["nextPageToken"]
	results = youtube.commentThreads().list(**kwargs).execute()
	else:
	break

	return comments


	# Function to get sentiment score
	def sentiment_score(comment):
	sentiment = model(tokenizer.encode(comment, return_tensors="pt"))
	return torch.argmax(sentiment.logits).item() + 1


	# Gradio function to analyze video comments
	def analyze_video(video_id):

	video_id = video_id.split("v=")[1]
	youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
	comments = get_comments(
	youtube, part="snippet", videoId=video_id, textFormat="plainText"
	)

	if not comments:
	return "No comments found."

	df = pd.DataFrame(np.array(comments[:200]), columns=["comments"])
	print(df.head())
	df["sentiment"] = df["comments"].apply(lambda x: sentiment_score(x[:512]))
	print(df.head())

	sentiment_counts = df["sentiment"].value_counts().sort_index()
	sentiment_dict = {
	f"Sentiment {index}": count for index, count in sentiment_counts.items()
	}

	# Returning as a dictionary, where the key is the sentiment score (1-5) and value is the count
	return sentiment_dict


	# Creating the Gradio Interface
	gr_interface = gr.Interface(
	fn=analyze_video,
	inputs=[gr.Textbox(label="YouTube Video ID")],
	outputs="json",
	title="YouTube Comment Sentiment Analysis",
	description="Input a YouTube video ID and your API key to analyze the sentiment of the comments.",
	)

	# Launch the Gradio app
	gr_interface.launch()