Spaces:
Runtime error
Runtime error
| # AI-assisted (Claude Code, claude.ai) -- https://claude.ai | |
| """Popularity baseline recommender — the floor every other model must beat.""" | |
| import pandas as pd | |
| import numpy as np | |
| import pickle | |
| from pathlib import Path | |
| class PopularityRecommender: | |
| """Recommend top-N most purchased items globally or per department.""" | |
| def __init__(self): | |
| self.global_ranking = None | |
| self.dept_ranking = None | |
| self.user_top_dept = None | |
| def fit(self, train: pd.DataFrame, articles: pd.DataFrame): | |
| # Global popularity | |
| self.global_ranking = ( | |
| train.groupby("article_id").size() | |
| .sort_values(ascending=False) | |
| .index.tolist() | |
| ) | |
| # Per-department popularity | |
| train_with_dept = train.merge( | |
| articles[["article_id", "department_name"]], | |
| on="article_id", | |
| how="left", | |
| ) | |
| self.dept_ranking = ( | |
| train_with_dept.groupby(["department_name", "article_id"]) | |
| .size() | |
| .reset_index(name="count") | |
| .sort_values(["department_name", "count"], ascending=[True, False]) | |
| .groupby("department_name")["article_id"] | |
| .apply(list) | |
| .to_dict() | |
| ) | |
| # Each user's most-purchased department | |
| user_dept = ( | |
| train_with_dept.groupby(["customer_id", "department_name"]) | |
| .size() | |
| .reset_index(name="count") | |
| ) | |
| self.user_top_dept = ( | |
| user_dept.loc[user_dept.groupby("customer_id")["count"].idxmax()] | |
| .set_index("customer_id")["department_name"] | |
| .to_dict() | |
| ) | |
| print(f"Fitted on {len(train):,} transactions") | |
| print(f"Global top 5: {self.global_ranking[:5]}") | |
| def recommend(self, customer_id: str, k: int = 12, mode: str = "global"): | |
| if mode == "department" and customer_id in self.user_top_dept: | |
| dept = self.user_top_dept[customer_id] | |
| candidates = self.dept_ranking.get(dept, self.global_ranking) | |
| else: | |
| candidates = self.global_ranking | |
| return candidates[:k] | |
| def save(self, path: str): | |
| with open(path, "wb") as f: | |
| pickle.dump(self, f) | |
| def load(cls, path: str): | |
| with open(path, "rb") as f: | |
| return pickle.load(f) | |
| def train_baseline(data_dir: str = "data/processed"): | |
| data_dir = Path(data_dir) | |
| train = pd.read_csv(data_dir / "train.csv", dtype={"article_id": str}) | |
| articles = pd.read_csv(data_dir / "articles_subset.csv", dtype={"article_id": str}) | |
| model = PopularityRecommender() | |
| model.fit(train, articles) | |
| out_path = Path("models/baseline/popularity.pkl") | |
| out_path.parent.mkdir(parents=True, exist_ok=True) | |
| model.save(str(out_path)) | |
| print(f"Saved to {out_path}") | |
| return model | |
| if __name__ == "__main__": | |
| train_baseline() | |