Spaces:

nirmanpatel
/

semantic-book-recommender

Sleeping

App Files Files Community

semantic-book-recommender / sentiment_analysis.py

nirmanpatel

Upload 4 files

226e11e verified 7 months ago

raw

history blame contribute delete

2.02 kB

	import pandas as pd
	import numpy as np
	from transformers import pipeline
	from tqdm import tqdm

	books = pd. read_csv("books_with_categories.csv")

	# test the HF model with sample-text
	classifier = pipeline("text-classification",
	model="j-hartmann/emotion-english-distilroberta-base",
	top_k=None,
	device='mps')

	"""result = classifier("I love this!")

	# as it returns a list of dictionaries
	for item in result[0]:
	print(item)
	#another way, print(f"{item['label']:<10}: {item['score']}")"""

	# break the description into sentences and give individual scores for each
	"""sentences = books["description"][0].split(".")
	predictions = classifier(sentences)

	i = len(predictions)
	while i > 0:
	for sentence in predictions[i-1]:
	print(sentence)
	print("--------------------")
	i -= 1"""

	# create a dictionary with emotions having maximum probabilities from each sentence
	emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
	isbn = []
	emotion_scores = {label: [] for label in emotion_labels}

	def calculate_max_emotion_scores(predictions):
	per_emotion_scores = {label: [] for label in emotion_labels}
	for prediction in predictions:
	sorted_predictions = sorted(prediction, key=lambda x: ["label"])
	for index, label in enumerate (emotion_labels):
	per_emotion_scores[label].append(sorted_predictions[index]["score"])
	return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

	# for all the books in dataset
	for i in tqdm(range(len(books))):
	isbn.append(books["isbn13"][i])
	sentences = books["description"][i].split(".")
	predictions = classifier(sentences)
	max_scores = calculate_max_emotion_scores(predictions)
	for label in emotion_labels:
	emotion_scores[label].append(max_scores[label])

	# create a new dataframe from the results
	emotions_df = pd.DataFrame(emotion_scores)
	emotions_df["isbn13"] = isbn
	print(emotions_df)

	books = pd.merge(books, emotions_df, on = "isbn13")
	books.to_csv("books_with_emotions.csv", index = False)