from pathlib import Path # Replace hardcoded path with Hugging Face-aware fallback from src.utils.paths import get_processed_path, _hf_download def _load_defaults(dataset: str) -> Dict[str, Dict[str, Any]]: """ Load defaults.json for a dataset. Try local path first; fall back to HF hub if needed. """ try: fp = get_processed_path(dataset) / "index" / "defaults.json" if fp.exists(): return json.loads(fp.read_text()) except Exception: pass try: # fallback (root-level for HF structure) return json.loads(_hf_download("json/defaults.json").read_text()) except Exception: return {} # Likewise for these load functions: def _load_user_vec(proc: Path, user_id: str) -> np.ndarray: try: dfu = _read_parquet(proc / "user_text_emb.parquet", ["user_id", "vector"]) except FileNotFoundError: dfu = pd.read_parquet(_hf_download("parquet/user_text_emb.parquet"), columns=["user_id", "vector"]) row = dfu[dfu["user_id"] == user_id] if row.empty: raise ValueError(f"user_id '{user_id}' not found. Run text embedding step.") v = np.asarray(row.iloc[0]["vector"], dtype=np.float32) return v / (np.linalg.norm(v) + 1e-12) def _load_items_table(proc: Path) -> pd.DataFrame: try: items = _read_parquet(proc / "items_with_meta.parquet") except FileNotFoundError: items = pd.read_parquet(_hf_download("parquet/items_with_meta.parquet")) if ITEM_KEY not in items.columns: if items.index.name == ITEM_KEY: items = items.reset_index() else: raise KeyError(f"'{ITEM_KEY}' not found in items_with_meta.parquet") return items def _user_seen_items(proc: Path, user_id: str) -> set: try: df = _read_parquet(proc / "reviews.parquet", ["user_id", ITEM_KEY]) except FileNotFoundError: df = pd.read_parquet(_hf_download("parquet/reviews.parquet"), columns=["user_id", ITEM_KEY]) return set(df[df["user_id"] == user_id][ITEM_KEY].tolist())