In [None]:
import pandas as pd

books = pd.read_csv("books_with_categories.csv")

In [None]:
!pip install torch transformers


In [None]:
import torch
import transformers
print(f"PyTorch version: {torch.__version__}")
print(f"Transformers version: {transformers.__version__}")

In [None]:
# Fix the bug by making torch available in transformers namespace
transformers.torch = torch

from transformers import pipeline

pipe = pipeline(
 "text-classification",
 model="j-hartmann/emotion-english-distilroberta-base",
 return_all_scores=True
)

# Test it
text = "I am so happy today!"
result = pipe(text)
print(result)

#top-k None
#device -- mps /cuda for warnings

In [None]:
pipe(books["description"][0])

In [None]:
pipe(books["description"][0].split("."))

In [None]:
sentences = books["description"][0].split(".")
predictions = pipe(sentences)

In [None]:
sentences[0]

In [None]:
predictions[0]

In [None]:
sentences[4]

In [None]:
predictions[4]

In [None]:
sorted(predictions[0], key = lambda x: x['label'])

In [None]:
import numpy as np

emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

def calculate_max_emotion_scores(predictions):
 per_emotion_scores = {label: [] for label in emotion_labels}
 for prediction in predictions:
 sorted_predictions = sorted(prediction, key=lambda x: x['label'], reverse=True)
 for index, label in enumerate(emotion_labels):
 per_emotion_scores[label].append(sorted_predictions[index]['score'])
 return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

In [None]:
for i, row in books.head(10).iterrows():
 isbn.append(str(row["isbn13"]))

 sentences = str(row["description"]).split(".")
 predictions = pipe(sentences)
 max_scores = calculate_max_emotion_scores(predictions)

 for label in emotion_labels:
 # force conversion to Python float
 emotion_scores[label].append(float(max_scores[label]))


In [None]:
emotion_scores = {
 label: [float(x) for x in scores]
 for label, scores in emotion_scores.items()
}


In [None]:
emotion_scores

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from transformers import pipeline

# Initialize the emotion analysis pipeline
pipe = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=None)

# Load your books data
books = pd.read_csv("books_with_categories.csv") # Replace with your actual file name

emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

def calculate_max_emotion_scores(predictions):
 """Calculate maximum emotion scores across all sentences"""
 per_emotion_scores = {label: [] for label in emotion_labels}

 for prediction in predictions:
 # Create a dictionary for easy lookup by label
 prediction_dict = {pred['label']: pred['score'] for pred in prediction}

 # Add scores for each emotion label
 for label in emotion_labels:
 score = prediction_dict.get(label, 0.0) # Default to 0 if label not found
 per_emotion_scores[label].append(score)

 # Return maximum score for each emotion across all sentences
 return {label: np.max(scores) if scores else 0.0 for label, scores in per_emotion_scores.items()}

print("Processing emotions for books...")
for i, row in tqdm(books.iterrows(), total=len(books)):
 isbn.append(str(row["isbn13"]))

 # Handle missing descriptions
 description = str(row["description"]) if pd.notna(row["description"]) else ""

 if description and description != "nan":
 # Split into sentences and filter out empty ones
 sentences = [s.strip() for s in description.split(".") if s.strip()]

 if sentences:
 try:
 predictions = pipe(sentences)
 max_scores = calculate_max_emotion_scores(predictions)
 except Exception as e:
 print(f"Error processing book {row['isbn13']}: {e}")
 # Use default scores if processing fails
 max_scores = {label: 0.0 for label in emotion_labels}
 else:
 # Empty description
 max_scores = {label: 0.0 for label in emotion_labels}
 else:
 # No description available
 max_scores = {label: 0.0 for label in emotion_labels}

 # Add scores to our lists
 for label in emotion_labels:
 emotion_scores[label].append(float(max_scores[label]))

In [None]:
# Create emotion DataFrame
emotion_df = pd.DataFrame(emotion_scores)
emotion_df['isbn13'] = isbn

print("Emotion processing completed!")
print("Sample emotion scores:")
print(emotion_df.head(10))

In [None]:
books['isbn13'] = books['isbn13'].astype(str).str.replace('.0', '', regex=False)
emotion_df['isbn13'] = emotion_df['isbn13'].astype(str).str.replace('.0', '', regex=False)

print("Data types before merge:")
print(f"Books isbn13 dtype: {books['isbn13'].dtype}")
print(f"Emotion isbn13 dtype: {emotion_df['isbn13'].dtype}")

# Merge emotion scores back to the original books DataFrame
books_with_emotions = books.merge(emotion_df, on='isbn13', how='left')

# Save the combined DataFrame
books_with_emotions.to_csv("books_with_emotions.csv", index=False)

print(f"Saved books with emotions to 'books_with_emotions.csv'")
print(f"Total books processed: {len(books_with_emotions)}")
print("Available columns:", books_with_emotions.columns.tolist())