Spaces:
Sleeping
Sleeping
| from pathlib import Path | |
| # Replace hardcoded path with Hugging Face-aware fallback | |
| from src.utils.paths import get_processed_path, _hf_download | |
| def _load_defaults(dataset: str) -> Dict[str, Dict[str, Any]]: | |
| """ | |
| Load defaults.json for a dataset. | |
| Try local path first; fall back to HF hub if needed. | |
| """ | |
| try: | |
| fp = get_processed_path(dataset) / "index" / "defaults.json" | |
| if fp.exists(): | |
| return json.loads(fp.read_text()) | |
| except Exception: | |
| pass | |
| try: | |
| # fallback (root-level for HF structure) | |
| return json.loads(_hf_download("json/defaults.json").read_text()) | |
| except Exception: | |
| return {} | |
| # Likewise for these load functions: | |
| def _load_user_vec(proc: Path, user_id: str) -> np.ndarray: | |
| try: | |
| dfu = _read_parquet(proc / "user_text_emb.parquet", ["user_id", "vector"]) | |
| except FileNotFoundError: | |
| dfu = pd.read_parquet(_hf_download("parquet/user_text_emb.parquet"), columns=["user_id", "vector"]) | |
| row = dfu[dfu["user_id"] == user_id] | |
| if row.empty: | |
| raise ValueError(f"user_id '{user_id}' not found. Run text embedding step.") | |
| v = np.asarray(row.iloc[0]["vector"], dtype=np.float32) | |
| return v / (np.linalg.norm(v) + 1e-12) | |
| def _load_items_table(proc: Path) -> pd.DataFrame: | |
| try: | |
| items = _read_parquet(proc / "items_with_meta.parquet") | |
| except FileNotFoundError: | |
| items = pd.read_parquet(_hf_download("parquet/items_with_meta.parquet")) | |
| if ITEM_KEY not in items.columns: | |
| if items.index.name == ITEM_KEY: | |
| items = items.reset_index() | |
| else: | |
| raise KeyError(f"'{ITEM_KEY}' not found in items_with_meta.parquet") | |
| return items | |
| def _user_seen_items(proc: Path, user_id: str) -> set: | |
| try: | |
| df = _read_parquet(proc / "reviews.parquet", ["user_id", ITEM_KEY]) | |
| except FileNotFoundError: | |
| df = pd.read_parquet(_hf_download("parquet/reviews.parquet"), columns=["user_id", ITEM_KEY]) | |
| return set(df[df["user_id"] == user_id][ITEM_KEY].tolist()) |