File size: 1,680 Bytes
779489a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import numpy as np
import pandas as pd
from tqdm import tqdm
from transformers import pipeline

# Initialize the emotion classifier
classifier = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    top_k=None,
    device="mps"
)

books = pd.read_csv("data/book_with_categories.csv")

# Define emotion labels
emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']

def calculate_max_emotion_score(predictions):
    """Calculate maximum score for each emotion from predictions."""
    per_emotion_scores = {label: [] for label in emotion_labels}
    for pred in predictions:
        sorted_pred = sorted(pred, key=lambda x: x['label'])
        for idx, label in enumerate(emotion_labels):
            per_emotion_scores[label].append(sorted_pred[idx]['score'])
    return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

# Process sentiment analysis for all books
emotion_scores = {label: [] for label in emotion_labels}
isbn = []

for i in tqdm(range(len(books)), desc="Processing books"):
    isbn.append(books.iloc[i]['isbn13'])
    sentences = books.iloc[i]['description'].split('.')
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_score(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])


emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbn

# Merge with original books DataFrame
books = pd.merge(books, emotions_df, on="isbn13")

books.to_csv('data/books_with_emotions.csv', index=False)

print("Sentiment analysis completed and saved to 'data/books_with_emotions.csv'")