Spaces:

rya23
/

Sentiment_analysis

Sleeping

App Files Files Community

aarya commited on Aug 27, 2024

Commit

4dcc4fb

1 Parent(s): 49fe43c

Made changes

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +35 -16
requirements.txt +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

app.py CHANGED Viewed

@@ -4,10 +4,19 @@ import torch
 import pandas as pd
 import numpy as np
 import googleapiclient.discovery
 # Initialize the tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
-model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
 # Function to get comments from a YouTube video
 def get_comments(youtube, **kwargs):
@@ -15,47 +24,57 @@ def get_comments(youtube, **kwargs):
     results = youtube.commentThreads().list(**kwargs).execute()
     while results:
-        for item in results['items']:
-            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
             comments.append(comment)
         # Check if there are more comments
-        if 'nextPageToken' in results:
-            kwargs['pageToken'] = results['nextPageToken']
             results = youtube.commentThreads().list(**kwargs).execute()
         else:
             break
     return comments
 # Function to get sentiment score
 def sentiment_score(comment):
-    sentiment = model(tokenizer.encode(comment, return_tensors='pt'))
     return torch.argmax(sentiment.logits).item() + 1
 # Gradio function to analyze video comments
-def analyze_video(video_id, api_key):
     youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
-    comments = get_comments(youtube, part="snippet", videoId=video_id, textFormat="plainText")
     if not comments:
         return "No comments found."
-    df = pd.DataFrame(np.array(comments[:100]), columns=['comments'])
-    df['sentiment'] = df['comments'].apply(lambda x: sentiment_score(x[:512]))
-    sentiment_counts = df['sentiment'].value_counts().sort_index().to_dict()
     # Returning as a dictionary, where the key is the sentiment score (1-5) and value is the count
-    return sentiment_counts
 # Creating the Gradio Interface
 gr_interface = gr.Interface(
     fn=analyze_video,
-    inputs=[gr.Textbox(label="YouTube Video ID"), gr.Textbox(label="YouTube API Key")],
     outputs="json",
     title="YouTube Comment Sentiment Analysis",
-    description="Input a YouTube video ID and your API key to analyze the sentiment of the comments."
 )
 # Launch the Gradio app

 import pandas as pd
 import numpy as np
 import googleapiclient.discovery
+from dotenv import load_dotenv
+import os
+load_dotenv()
+api_key = os.getenv("DEVELOPER_KEY")
 # Initialize the tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(
+    "nlptown/bert-base-multilingual-uncased-sentiment"
+)
+model = AutoModelForSequenceClassification.from_pretrained(
+    "nlptown/bert-base-multilingual-uncased-sentiment"
+)
 # Function to get comments from a YouTube video
 def get_comments(youtube, **kwargs):
     results = youtube.commentThreads().list(**kwargs).execute()
     while results:
+        for item in results["items"]:
+            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
             comments.append(comment)
         # Check if there are more comments
+        if "nextPageToken" in results:
+            kwargs["pageToken"] = results["nextPageToken"]
             results = youtube.commentThreads().list(**kwargs).execute()
         else:
             break
     return comments
 # Function to get sentiment score
 def sentiment_score(comment):
+    sentiment = model(tokenizer.encode(comment, return_tensors="pt"))
     return torch.argmax(sentiment.logits).item() + 1
 # Gradio function to analyze video comments
+def analyze_video(video_id):
+    video_id = video_id.split("v=")[1]
     youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
+    comments = get_comments(
+        youtube, part="snippet", videoId=video_id, textFormat="plainText"
+    )
     if not comments:
         return "No comments found."
+    df = pd.DataFrame(np.array(comments[:200]), columns=["comments"])
+    df["sentiment"] = df["comments"].apply(lambda x: sentiment_score(x[:512]))
+    sentiment_counts = df["sentiment"].value_counts().sort_index()
+    sentiment_dict = {
+        f"Sentiment {index}": count for index, count in sentiment_counts.items()
+    }
     # Returning as a dictionary, where the key is the sentiment score (1-5) and value is the count
+    return sentiment_dict
 # Creating the Gradio Interface
 gr_interface = gr.Interface(
     fn=analyze_video,
+    inputs=[gr.Textbox(label="YouTube Video ID")],
     outputs="json",
     title="YouTube Comment Sentiment Analysis",
+    description="Input a YouTube video ID and your API key to analyze the sentiment of the comments.",
 )
 # Launch the Gradio app

requirements.txt CHANGED Viewed

@@ -3,3 +3,4 @@ transformers
 torch
 numpy
 pandas

 torch
 numpy
 pandas
+python-dotenv