Update handler.py
Browse files- handler.py +29 -0
handler.py
CHANGED
|
@@ -4,6 +4,35 @@ import numpy as np
|
|
| 4 |
import pandas as pd
|
| 5 |
import os
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
class EndpointHandler:
|
| 8 |
def __init__(self, path=""):
|
| 9 |
model_path = os.path.join(path, "model.pkl")
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
import os
|
| 6 |
|
| 7 |
+
class ContentBasedRecommender:
|
| 8 |
+
def __init__(self, train_data):
|
| 9 |
+
self.train_data = train_data
|
| 10 |
+
|
| 11 |
+
def predict(self, user_id, k=10):
|
| 12 |
+
user_books = set(self.train_data[self.train_data['user_id'] == user_id]['book_id'])
|
| 13 |
+
similar_books = set().union(*(self.train_data[self.train_data['book_id'] == book_id]['similar_books'].iloc[0] for book_id in user_books))
|
| 14 |
+
recommended_books = list(similar_books - user_books)
|
| 15 |
+
|
| 16 |
+
return np.random.choice(recommended_books, size=k, replace=False) if len(recommended_books) >= k else recommended_books
|
| 17 |
+
|
| 18 |
+
def evaluate(self, test_data, k=10):
|
| 19 |
+
user_ids = test_data['user_id'].unique()
|
| 20 |
+
hit_rate, ndcg_scores = [], []
|
| 21 |
+
|
| 22 |
+
for user_id in user_ids[:100]:
|
| 23 |
+
true_books = set(test_data[test_data['user_id'] == user_id]['book_id'])
|
| 24 |
+
pred_books = set(self.predict(user_id, k))
|
| 25 |
+
|
| 26 |
+
hits = len(true_books & pred_books)
|
| 27 |
+
hit_rate.append(hits / min(k, len(true_books)))
|
| 28 |
+
|
| 29 |
+
dcg = sum(1 / math.log2(rank + 2) for rank, book in enumerate(pred_books) if book in true_books)
|
| 30 |
+
idcg = sum(1 / math.log2(i + 2) for i in range(min(k, len(true_books))))
|
| 31 |
+
ndcg = dcg / idcg if idcg > 0 else 0
|
| 32 |
+
ndcg_scores.append(ndcg)
|
| 33 |
+
|
| 34 |
+
return np.mean(hit_rate), np.mean(ndcg_scores)
|
| 35 |
+
|
| 36 |
class EndpointHandler:
|
| 37 |
def __init__(self, path=""):
|
| 38 |
model_path = os.path.join(path, "model.pkl")
|