cove-api / src /service /recommender.py
MayankChoudhary76
βœ… Final API changes
24a5fa2
from pathlib import Path
# Replace hardcoded path with Hugging Face-aware fallback
from src.utils.paths import get_processed_path, _hf_download
def _load_defaults(dataset: str) -> Dict[str, Dict[str, Any]]:
"""
Load defaults.json for a dataset.
Try local path first; fall back to HF hub if needed.
"""
try:
fp = get_processed_path(dataset) / "index" / "defaults.json"
if fp.exists():
return json.loads(fp.read_text())
except Exception:
pass
try:
# fallback (root-level for HF structure)
return json.loads(_hf_download("json/defaults.json").read_text())
except Exception:
return {}
# Likewise for these load functions:
def _load_user_vec(proc: Path, user_id: str) -> np.ndarray:
try:
dfu = _read_parquet(proc / "user_text_emb.parquet", ["user_id", "vector"])
except FileNotFoundError:
dfu = pd.read_parquet(_hf_download("parquet/user_text_emb.parquet"), columns=["user_id", "vector"])
row = dfu[dfu["user_id"] == user_id]
if row.empty:
raise ValueError(f"user_id '{user_id}' not found. Run text embedding step.")
v = np.asarray(row.iloc[0]["vector"], dtype=np.float32)
return v / (np.linalg.norm(v) + 1e-12)
def _load_items_table(proc: Path) -> pd.DataFrame:
try:
items = _read_parquet(proc / "items_with_meta.parquet")
except FileNotFoundError:
items = pd.read_parquet(_hf_download("parquet/items_with_meta.parquet"))
if ITEM_KEY not in items.columns:
if items.index.name == ITEM_KEY:
items = items.reset_index()
else:
raise KeyError(f"'{ITEM_KEY}' not found in items_with_meta.parquet")
return items
def _user_seen_items(proc: Path, user_id: str) -> set:
try:
df = _read_parquet(proc / "reviews.parquet", ["user_id", ITEM_KEY])
except FileNotFoundError:
df = pd.read_parquet(_hf_download("parquet/reviews.parquet"), columns=["user_id", ITEM_KEY])
return set(df[df["user_id"] == user_id][ITEM_KEY].tolist())