aarya commited on
Commit
73dbe48
·
1 Parent(s): befb114

Add application file

Browse files
Files changed (2) hide show
  1. app.py +62 -0
  2. requirements.txt +1 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ import torch
4
+ import pandas as pd
5
+ import numpy as np
6
+ import googleapiclient.discovery
7
+
8
+ # Initialize the tokenizer and model
9
+ tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
10
+ model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
11
+
12
+ # Function to get comments from a YouTube video
13
+ def get_comments(youtube, **kwargs):
14
+ comments = []
15
+ results = youtube.commentThreads().list(**kwargs).execute()
16
+
17
+ while results:
18
+ for item in results['items']:
19
+ comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
20
+ comments.append(comment)
21
+
22
+ # Check if there are more comments
23
+ if 'nextPageToken' in results:
24
+ kwargs['pageToken'] = results['nextPageToken']
25
+ results = youtube.commentThreads().list(**kwargs).execute()
26
+ else:
27
+ break
28
+
29
+ return comments
30
+
31
+ # Function to get sentiment score
32
+ def sentiment_score(comment):
33
+ sentiment = model(tokenizer.encode(comment, return_tensors='pt'))
34
+ return torch.argmax(sentiment.logits).item() + 1
35
+
36
+ # Gradio function to analyze video comments
37
+ def analyze_video(video_id, api_key):
38
+ youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
39
+ comments = get_comments(youtube, part="snippet", videoId=video_id, textFormat="plainText")
40
+
41
+ if not comments:
42
+ return "No comments found."
43
+
44
+ df = pd.DataFrame(np.array(comments[:100]), columns=['comments'])
45
+ df['sentiment'] = df['comments'].apply(lambda x: sentiment_score(x[:512]))
46
+
47
+ sentiment_counts = df['sentiment'].value_counts().sort_index().to_dict()
48
+
49
+ # Returning as a dictionary, where the key is the sentiment score (1-5) and value is the count
50
+ return sentiment_counts
51
+
52
+ # Creating the Gradio Interface
53
+ gr_interface = gr.Interface(
54
+ fn=analyze_video,
55
+ inputs=[gr.Textbox(label="YouTube Video ID"), gr.Textbox(label="YouTube API Key")],
56
+ outputs="json",
57
+ title="YouTube Comment Sentiment Analysis",
58
+ description="Input a YouTube video ID and your API key to analyze the sentiment of the comments."
59
+ )
60
+
61
+ # Launch the Gradio app
62
+ gr_interface.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ google-api-python-client