Spaces:
Sleeping
Sleeping
File size: 3,129 Bytes
133a630 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from utils.helpers import load_data
class ContentBasedRecommender:
def __init__(self, products_df):
self.products = products_df.copy()
self.products["text_features"] = (
self.products["category"].fillna("") + " " +
self.products["subcategory"].fillna("") + " " +
self.products["brand"].fillna("") + " " +
self.products["name"].fillna("")
)
self.vectorizer = TfidfVectorizer(stop_words="english")
self.tfidf_matrix = self.vectorizer.fit_transform(self.products["text_features"])
self.product_ids = self.products["product_id"].values
def tfidf_recommend(self, user_profile_items, n_recommendations=10):
if not user_profile_items:
return []
profile_indices = []
for pid in user_profile_items:
mask = self.products["product_id"] == pid
if mask.any():
idx = self.products[mask].index[0]
profile_indices.append(idx)
if not profile_indices:
return []
profile_vector = np.asarray(self.tfidf_matrix[profile_indices].mean(axis=0))
sim_scores = cosine_similarity(profile_vector, self.tfidf_matrix).flatten()
exclude = set(profile_indices)
ranked = sorted(
[(i, sim_scores[i]) for i in range(len(sim_scores)) if i not in exclude],
key=lambda x: x[1], reverse=True
)
results = []
for idx, score in ranked[:n_recommendations]:
results.append((int(self.product_ids[idx]), float(score)))
return results
def feature_match_recommend(self, preferences, n_recommendations=10):
preferred_cats = preferences.get("preferred_categories", set())
favorite_brands = preferences.get("favorite_brands", set())
budget_min = preferences.get("budget_min", 0)
budget_max = preferences.get("budget_max", 999999)
scores = []
for _, product in self.products.iterrows():
score = 0
if product["category"] in preferred_cats:
score += 40
if product["brand"] in favorite_brands:
score += 30
if budget_min <= product["price"] <= budget_max:
score += 20
if product["subcategory"] in [s for c in preferred_cats for s in [] if c == product["category"]]:
score += 10
scores.append((int(product["product_id"]), score))
scores.sort(key=lambda x: x[1], reverse=True)
return scores[:n_recommendations]
def recommend(self, method, user_profile_items=None, preferences=None, n_recommendations=10):
if method == "tfidf":
return self.tfidf_recommend(user_profile_items or [], n_recommendations)
elif method == "feature_match":
return self.feature_match_recommend(preferences or {}, n_recommendations)
else:
raise ValueError(f"Unknown method: {method}")
|