Book-Recommender-System / sentiment_analysis.py
github-actions[bot]
Auto-deploy from GitHub Actions
779489a
import numpy as np
import pandas as pd
from tqdm import tqdm
from transformers import pipeline
# Initialize the emotion classifier
classifier = pipeline(
"text-classification",
model="j-hartmann/emotion-english-distilroberta-base",
top_k=None,
device="mps"
)
books = pd.read_csv("data/book_with_categories.csv")
# Define emotion labels
emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
def calculate_max_emotion_score(predictions):
"""Calculate maximum score for each emotion from predictions."""
per_emotion_scores = {label: [] for label in emotion_labels}
for pred in predictions:
sorted_pred = sorted(pred, key=lambda x: x['label'])
for idx, label in enumerate(emotion_labels):
per_emotion_scores[label].append(sorted_pred[idx]['score'])
return {label: np.max(scores) for label, scores in per_emotion_scores.items()}
# Process sentiment analysis for all books
emotion_scores = {label: [] for label in emotion_labels}
isbn = []
for i in tqdm(range(len(books)), desc="Processing books"):
isbn.append(books.iloc[i]['isbn13'])
sentences = books.iloc[i]['description'].split('.')
predictions = classifier(sentences)
max_scores = calculate_max_emotion_score(predictions)
for label in emotion_labels:
emotion_scores[label].append(max_scores[label])
emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbn
# Merge with original books DataFrame
books = pd.merge(books, emotions_df, on="isbn13")
books.to_csv('data/books_with_emotions.csv', index=False)
print("Sentiment analysis completed and saved to 'data/books_with_emotions.csv'")