nextAnalytics / reddit /reddit_sentiment_analysis.py
honeybansal23's picture
updated file system
13e5718
import pandas as pd
import ast
from transformers import pipeline
classifier = pipeline(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
output=pd.DataFrame()
class SentimentAnalysis:
def process_comment(self,comment):
sentence=[comment['comment'][:512]]
model_outputs = classifier(sentence)
# Prepare the comment data
comment_data = {
"comment": comment['comment'],
'emotion':model_outputs[0][:3],
"replies": [] # Initialize replies list
}
# Process replies recursively if any
if comment['replies']:
for reply in comment['replies']:
reply_data = self.process_comment(reply) # Recursive call for replies
comment_data["replies"].append(reply_data)
return comment_data
def generate_sentiment_and_emotion_from_data(self,fileName):
df = pd.read_csv(fileName)
for i in range(len(df)):
comments_data=[]
row=df.iloc[i]
commentary=(ast.literal_eval(row['comments']))
commentary=commentary['comments']
while commentary:
comment = commentary.pop(0)
comment_data = self.process_comment(comment)
comments_data.append(comment_data)
json_output = {
"comments": comments_data
}
subset_data = df.iloc[i].copy()
# Modify the subset
subset_data['comments'] = json_output
# Assign back if needed
df.iloc[i] = subset_data
df.to_csv(fileName, index=False)
print("Sentiment Data saved to",fileName)