recsys-ecommerce / src /models /popularity.py
dscsdvdfsvs's picture
fix: upload src folder with model classes
80843b0 verified
import numpy as np
import pandas as pd
import pickle
from pathlib import Path
class PopularityRecommender:
"""
Baseline model — recommends most popular items globally.
For warm users: filters out items they already interacted with.
For cold users: returns top-K popular items with no filtering.
Why this matters:
Any collaborative filtering model must beat this baseline.
If ALS barely outperforms popularity, the extra complexity
is not worth it in production.
"""
def __init__(self):
self.popular_items = None # item_idx sorted by popularity_score
self.user_history = {} # user_idx -> set of seen item_idx
self.is_fitted = False
def fit(self, train_df: pd.DataFrame, item_feats: pd.DataFrame):
# Sort items by Bayesian popularity score (built in Phase 3)
self.popular_items = (
item_feats
.sort_values("popularity_score", ascending=False)["item_idx"]
.values
)
# Build user history for filtering
self.user_history = (
train_df.groupby("user_idx")["item_idx"]
.apply(set).to_dict()
)
self.is_fitted = True
print(f"PopularityRecommender fitted — {len(self.popular_items):,} items ranked")
return self
def recommend(self, user_idx: int, k: int = 10) -> np.ndarray:
if not self.is_fitted:
raise RuntimeError("Call fit() first")
seen = self.user_history.get(user_idx, set())
recs = [i for i in self.popular_items if i not in seen]
return np.array(recs[:k])
def recommend_batch(self, user_indices, k: int = 10) -> dict:
return {u: self.recommend(u, k) for u in user_indices}
def save(self, path):
with open(path, "wb") as f:
pickle.dump(self, f)
print(f"Saved PopularityRecommender to {path}")
@staticmethod
def load(path):
with open(path, "rb") as f:
return pickle.load(f)