book_recommendation_system / recommender.py
rohitmsan's picture
Create recommender.py
4ba61d6 verified
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
class RecommenderService:
"""
Class responsible for recommending books based on content similarity.
"""
def __init__(self, data):
self.data = data
self.tfidf_matrix = self._compute_tfidf(data["summaries"])
self.similarity_matrix = self._compute_similarity()
def _compute_tfidf(self, summaries):
"""
Compute TF-IDF matrix for book summaries.
Args:
summaries (pd.Series): Series of book summaries.
Returns:
sparse matrix: TF-IDF transformed summaries.
"""
vectorizer = TfidfVectorizer(stop_words="english")
return vectorizer.fit_transform(summaries)
def _compute_similarity(self):
"""
Compute cosine similarity matrix.
Returns:
ndarray: Cosine similarity matrix.
"""
return cosine_similarity(self.tfidf_matrix)
def recommend_books(self, book_name, num_recommendations=5):
"""
Recommend similar books based on a given book name.
Args:
book_name (str): Name of the book to base recommendations on.
num_recommendations (int): Number of books to recommend.
Returns:
list: List of recommended book names.
"""
try:
idx = self.data[self.data["book_name"].str.lower() == book_name.lower()].index[0]
except IndexError:
raise ValueError(f"Book '{book_name}' not found in the dataset.")
sim_scores = list(enumerate(self.similarity_matrix[idx]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_indices = [i[0] for i in sim_scores[1:]]
recommended_books = []
for idx in sim_indices:
book_title = self.data.iloc[idx]["book_name"]
if book_title != book_name and book_title not in recommended_books:
recommended_books.append(book_title)
if len(recommended_books) >= num_recommendations:
break
return recommended_books[:num_recommendations]