Spaces:
Sleeping
Sleeping
Initial implementation of TasteEngine: multi-approach recommender system with Flask web UI, evaluation suite, and explanation engine
133a630 | import numpy as np | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from utils.helpers import load_data | |
| class ContentBasedRecommender: | |
| def __init__(self, products_df): | |
| self.products = products_df.copy() | |
| self.products["text_features"] = ( | |
| self.products["category"].fillna("") + " " + | |
| self.products["subcategory"].fillna("") + " " + | |
| self.products["brand"].fillna("") + " " + | |
| self.products["name"].fillna("") | |
| ) | |
| self.vectorizer = TfidfVectorizer(stop_words="english") | |
| self.tfidf_matrix = self.vectorizer.fit_transform(self.products["text_features"]) | |
| self.product_ids = self.products["product_id"].values | |
| def tfidf_recommend(self, user_profile_items, n_recommendations=10): | |
| if not user_profile_items: | |
| return [] | |
| profile_indices = [] | |
| for pid in user_profile_items: | |
| mask = self.products["product_id"] == pid | |
| if mask.any(): | |
| idx = self.products[mask].index[0] | |
| profile_indices.append(idx) | |
| if not profile_indices: | |
| return [] | |
| profile_vector = np.asarray(self.tfidf_matrix[profile_indices].mean(axis=0)) | |
| sim_scores = cosine_similarity(profile_vector, self.tfidf_matrix).flatten() | |
| exclude = set(profile_indices) | |
| ranked = sorted( | |
| [(i, sim_scores[i]) for i in range(len(sim_scores)) if i not in exclude], | |
| key=lambda x: x[1], reverse=True | |
| ) | |
| results = [] | |
| for idx, score in ranked[:n_recommendations]: | |
| results.append((int(self.product_ids[idx]), float(score))) | |
| return results | |
| def feature_match_recommend(self, preferences, n_recommendations=10): | |
| preferred_cats = preferences.get("preferred_categories", set()) | |
| favorite_brands = preferences.get("favorite_brands", set()) | |
| budget_min = preferences.get("budget_min", 0) | |
| budget_max = preferences.get("budget_max", 999999) | |
| scores = [] | |
| for _, product in self.products.iterrows(): | |
| score = 0 | |
| if product["category"] in preferred_cats: | |
| score += 40 | |
| if product["brand"] in favorite_brands: | |
| score += 30 | |
| if budget_min <= product["price"] <= budget_max: | |
| score += 20 | |
| if product["subcategory"] in [s for c in preferred_cats for s in [] if c == product["category"]]: | |
| score += 10 | |
| scores.append((int(product["product_id"]), score)) | |
| scores.sort(key=lambda x: x[1], reverse=True) | |
| return scores[:n_recommendations] | |
| def recommend(self, method, user_profile_items=None, preferences=None, n_recommendations=10): | |
| if method == "tfidf": | |
| return self.tfidf_recommend(user_profile_items or [], n_recommendations) | |
| elif method == "feature_match": | |
| return self.feature_match_recommend(preferences or {}, n_recommendations) | |
| else: | |
| raise ValueError(f"Unknown method: {method}") | |