import pandas as pd
import numpy as np
from transformers import pipeline
from tqdm import tqdm

books = pd. read_csv("books_with_categories.csv")

# test the HF model with sample-text
classifier = pipeline("text-classification",
					  model="j-hartmann/emotion-english-distilroberta-base", 
					  top_k=None,
					  device='mps')

"""result = classifier("I love this!")

# as it returns a list of dictionaries
for item in result[0]:
	print(item)
    #another way, print(f"{item['label']:<10}: {item['score']}")"""

# break the description into sentences and give individual scores for each
"""sentences = books["description"][0].split(".")
predictions = classifier(sentences)

i = len(predictions)
while i > 0:
	for sentence in predictions[i-1]:
		print(sentence)
	print("--------------------")
	i -= 1"""

# create a dictionary with emotions having maximum probabilities from each sentence
emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

def calculate_max_emotion_scores(predictions):
	per_emotion_scores = {label: [] for label in emotion_labels}
	for prediction in predictions:
		sorted_predictions = sorted(prediction, key=lambda x: ["label"])
		for index, label in enumerate (emotion_labels):
			per_emotion_scores[label].append(sorted_predictions[index]["score"])
	return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

# for all the books in dataset
for i in tqdm(range(len(books))):
	isbn.append(books["isbn13"][i])
	sentences = books["description"][i].split(".")
	predictions = classifier(sentences)
	max_scores = calculate_max_emotion_scores(predictions)
	for label in emotion_labels:
		emotion_scores[label].append(max_scores[label])

# create a new dataframe from the results
emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbn
print(emotions_df)

books = pd.merge(books, emotions_df, on = "isbn13")
books.to_csv("books_with_emotions.csv", index = False)