| from sklearn.feature_extraction.text import TfidfVectorizer |
| from sklearn.metrics.pairwise import cosine_similarity |
| class RecommenderService: |
| """ |
| Class responsible for recommending books based on content similarity. |
| """ |
| def __init__(self, data): |
| self.data = data |
| self.tfidf_matrix = self._compute_tfidf(data["summaries"]) |
| self.similarity_matrix = self._compute_similarity() |
|
|
| def _compute_tfidf(self, summaries): |
| """ |
| Compute TF-IDF matrix for book summaries. |
| |
| Args: |
| summaries (pd.Series): Series of book summaries. |
| |
| Returns: |
| sparse matrix: TF-IDF transformed summaries. |
| """ |
| vectorizer = TfidfVectorizer(stop_words="english") |
| return vectorizer.fit_transform(summaries) |
|
|
| def _compute_similarity(self): |
| """ |
| Compute cosine similarity matrix. |
| |
| Returns: |
| ndarray: Cosine similarity matrix. |
| """ |
| return cosine_similarity(self.tfidf_matrix) |
|
|
| def recommend_books(self, book_name, num_recommendations=5): |
| """ |
| Recommend similar books based on a given book name. |
| |
| Args: |
| book_name (str): Name of the book to base recommendations on. |
| num_recommendations (int): Number of books to recommend. |
| |
| Returns: |
| list: List of recommended book names. |
| """ |
| try: |
| idx = self.data[self.data["book_name"].str.lower() == book_name.lower()].index[0] |
| except IndexError: |
| raise ValueError(f"Book '{book_name}' not found in the dataset.") |
|
|
|
|
| sim_scores = list(enumerate(self.similarity_matrix[idx])) |
|
|
| sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) |
|
|
|
|
| sim_indices = [i[0] for i in sim_scores[1:]] |
| |
| recommended_books = [] |
| for idx in sim_indices: |
|
|
| book_title = self.data.iloc[idx]["book_name"] |
| if book_title != book_name and book_title not in recommended_books: |
| recommended_books.append(book_title) |
| |
|
|
| if len(recommended_books) >= num_recommendations: |
| break |
|
|
| return recommended_books[:num_recommendations] |
|
|