aarya commited on
Commit
4dcc4fb
·
1 Parent(s): 49fe43c

Made changes

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +35 -16
  3. requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
app.py CHANGED
@@ -4,10 +4,19 @@ import torch
4
  import pandas as pd
5
  import numpy as np
6
  import googleapiclient.discovery
 
 
7
 
 
 
8
  # Initialize the tokenizer and model
9
- tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
10
- model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
 
 
 
 
 
11
 
12
  # Function to get comments from a YouTube video
13
  def get_comments(youtube, **kwargs):
@@ -15,47 +24,57 @@ def get_comments(youtube, **kwargs):
15
  results = youtube.commentThreads().list(**kwargs).execute()
16
 
17
  while results:
18
- for item in results['items']:
19
- comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
20
  comments.append(comment)
21
 
22
  # Check if there are more comments
23
- if 'nextPageToken' in results:
24
- kwargs['pageToken'] = results['nextPageToken']
25
  results = youtube.commentThreads().list(**kwargs).execute()
26
  else:
27
  break
28
 
29
  return comments
30
 
 
31
  # Function to get sentiment score
32
  def sentiment_score(comment):
33
- sentiment = model(tokenizer.encode(comment, return_tensors='pt'))
34
  return torch.argmax(sentiment.logits).item() + 1
35
 
 
36
  # Gradio function to analyze video comments
37
- def analyze_video(video_id, api_key):
 
 
38
  youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
39
- comments = get_comments(youtube, part="snippet", videoId=video_id, textFormat="plainText")
40
-
 
 
41
  if not comments:
42
  return "No comments found."
43
 
44
- df = pd.DataFrame(np.array(comments[:100]), columns=['comments'])
45
- df['sentiment'] = df['comments'].apply(lambda x: sentiment_score(x[:512]))
46
 
47
- sentiment_counts = df['sentiment'].value_counts().sort_index().to_dict()
 
 
 
48
 
49
  # Returning as a dictionary, where the key is the sentiment score (1-5) and value is the count
50
- return sentiment_counts
 
51
 
52
  # Creating the Gradio Interface
53
  gr_interface = gr.Interface(
54
  fn=analyze_video,
55
- inputs=[gr.Textbox(label="YouTube Video ID"), gr.Textbox(label="YouTube API Key")],
56
  outputs="json",
57
  title="YouTube Comment Sentiment Analysis",
58
- description="Input a YouTube video ID and your API key to analyze the sentiment of the comments."
59
  )
60
 
61
  # Launch the Gradio app
 
4
  import pandas as pd
5
  import numpy as np
6
  import googleapiclient.discovery
7
+ from dotenv import load_dotenv
8
+ import os
9
 
10
+ load_dotenv()
11
+ api_key = os.getenv("DEVELOPER_KEY")
12
  # Initialize the tokenizer and model
13
+ tokenizer = AutoTokenizer.from_pretrained(
14
+ "nlptown/bert-base-multilingual-uncased-sentiment"
15
+ )
16
+ model = AutoModelForSequenceClassification.from_pretrained(
17
+ "nlptown/bert-base-multilingual-uncased-sentiment"
18
+ )
19
+
20
 
21
  # Function to get comments from a YouTube video
22
  def get_comments(youtube, **kwargs):
 
24
  results = youtube.commentThreads().list(**kwargs).execute()
25
 
26
  while results:
27
+ for item in results["items"]:
28
+ comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
29
  comments.append(comment)
30
 
31
  # Check if there are more comments
32
+ if "nextPageToken" in results:
33
+ kwargs["pageToken"] = results["nextPageToken"]
34
  results = youtube.commentThreads().list(**kwargs).execute()
35
  else:
36
  break
37
 
38
  return comments
39
 
40
+
41
  # Function to get sentiment score
42
  def sentiment_score(comment):
43
+ sentiment = model(tokenizer.encode(comment, return_tensors="pt"))
44
  return torch.argmax(sentiment.logits).item() + 1
45
 
46
+
47
  # Gradio function to analyze video comments
48
+ def analyze_video(video_id):
49
+
50
+ video_id = video_id.split("v=")[1]
51
  youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
52
+ comments = get_comments(
53
+ youtube, part="snippet", videoId=video_id, textFormat="plainText"
54
+ )
55
+
56
  if not comments:
57
  return "No comments found."
58
 
59
+ df = pd.DataFrame(np.array(comments[:200]), columns=["comments"])
60
+ df["sentiment"] = df["comments"].apply(lambda x: sentiment_score(x[:512]))
61
 
62
+ sentiment_counts = df["sentiment"].value_counts().sort_index()
63
+ sentiment_dict = {
64
+ f"Sentiment {index}": count for index, count in sentiment_counts.items()
65
+ }
66
 
67
  # Returning as a dictionary, where the key is the sentiment score (1-5) and value is the count
68
+ return sentiment_dict
69
+
70
 
71
  # Creating the Gradio Interface
72
  gr_interface = gr.Interface(
73
  fn=analyze_video,
74
+ inputs=[gr.Textbox(label="YouTube Video ID")],
75
  outputs="json",
76
  title="YouTube Comment Sentiment Analysis",
77
+ description="Input a YouTube video ID and your API key to analyze the sentiment of the comments.",
78
  )
79
 
80
  # Launch the Gradio app
requirements.txt CHANGED
@@ -3,3 +3,4 @@ transformers
3
  torch
4
  numpy
5
  pandas
 
 
3
  torch
4
  numpy
5
  pandas
6
+ python-dotenv