import numpy as np import pandas as pd import pickle from pathlib import Path class PopularityRecommender: """ Baseline model — recommends most popular items globally. For warm users: filters out items they already interacted with. For cold users: returns top-K popular items with no filtering. Why this matters: Any collaborative filtering model must beat this baseline. If ALS barely outperforms popularity, the extra complexity is not worth it in production. """ def __init__(self): self.popular_items = None # item_idx sorted by popularity_score self.user_history = {} # user_idx -> set of seen item_idx self.is_fitted = False def fit(self, train_df: pd.DataFrame, item_feats: pd.DataFrame): # Sort items by Bayesian popularity score (built in Phase 3) self.popular_items = ( item_feats .sort_values("popularity_score", ascending=False)["item_idx"] .values ) # Build user history for filtering self.user_history = ( train_df.groupby("user_idx")["item_idx"] .apply(set).to_dict() ) self.is_fitted = True print(f"PopularityRecommender fitted — {len(self.popular_items):,} items ranked") return self def recommend(self, user_idx: int, k: int = 10) -> np.ndarray: if not self.is_fitted: raise RuntimeError("Call fit() first") seen = self.user_history.get(user_idx, set()) recs = [i for i in self.popular_items if i not in seen] return np.array(recs[:k]) def recommend_batch(self, user_indices, k: int = 10) -> dict: return {u: self.recommend(u, k) for u in user_indices} def save(self, path): with open(path, "wb") as f: pickle.dump(self, f) print(f"Saved PopularityRecommender to {path}") @staticmethod def load(path): with open(path, "rb") as f: return pickle.load(f)