Spaces:
Running
Running
| import numpy as np | |
| import pandas as pd | |
| import pickle | |
| from pathlib import Path | |
| class PopularityRecommender: | |
| """ | |
| Baseline model — recommends most popular items globally. | |
| For warm users: filters out items they already interacted with. | |
| For cold users: returns top-K popular items with no filtering. | |
| Why this matters: | |
| Any collaborative filtering model must beat this baseline. | |
| If ALS barely outperforms popularity, the extra complexity | |
| is not worth it in production. | |
| """ | |
| def __init__(self): | |
| self.popular_items = None # item_idx sorted by popularity_score | |
| self.user_history = {} # user_idx -> set of seen item_idx | |
| self.is_fitted = False | |
| def fit(self, train_df: pd.DataFrame, item_feats: pd.DataFrame): | |
| # Sort items by Bayesian popularity score (built in Phase 3) | |
| self.popular_items = ( | |
| item_feats | |
| .sort_values("popularity_score", ascending=False)["item_idx"] | |
| .values | |
| ) | |
| # Build user history for filtering | |
| self.user_history = ( | |
| train_df.groupby("user_idx")["item_idx"] | |
| .apply(set).to_dict() | |
| ) | |
| self.is_fitted = True | |
| print(f"PopularityRecommender fitted — {len(self.popular_items):,} items ranked") | |
| return self | |
| def recommend(self, user_idx: int, k: int = 10) -> np.ndarray: | |
| if not self.is_fitted: | |
| raise RuntimeError("Call fit() first") | |
| seen = self.user_history.get(user_idx, set()) | |
| recs = [i for i in self.popular_items if i not in seen] | |
| return np.array(recs[:k]) | |
| def recommend_batch(self, user_indices, k: int = 10) -> dict: | |
| return {u: self.recommend(u, k) for u in user_indices} | |
| def save(self, path): | |
| with open(path, "wb") as f: | |
| pickle.dump(self, f) | |
| print(f"Saved PopularityRecommender to {path}") | |
| def load(path): | |
| with open(path, "rb") as f: | |
| return pickle.load(f) | |