# src/storage.py import json import os from datetime import datetime from pathlib import Path from huggingface_hub import HfApi, hf_hub_download DATASET_REPO = "Usmansafder/engchain-annotations" HF_TOKEN = os.environ.get("HF_TOKEN") if not HF_TOKEN: raise RuntimeError( "HF_TOKEN not set. Add it in Space Settings → Secrets with write access." ) api = HfApi(token=HF_TOKEN) def _safe_slug(s: str) -> str: s = s.strip() s = "".join(c for c in s if c.isalnum() or c in (" ", "_", "-")) s = s.replace(" ", "_") return s or "unknown" def _download_json_if_exists(path_in_repo: str, local_path: Path) -> list: """ Returns list of existing reviews; empty list if file doesn't exist yet. """ try: downloaded = hf_hub_download( repo_id=DATASET_REPO, filename=path_in_repo, repo_type="dataset", token=HF_TOKEN, ) local_path.write_text(Path(downloaded).read_text(encoding="utf-8"), encoding="utf-8") data = json.loads(local_path.read_text(encoding="utf-8")) return data if isinstance(data, list) else [] except Exception: return [] def save_review(annotator_name, branch, area, template_name, scores, decision, feedback): safe_name = _safe_slug(annotator_name) safe_branch = _safe_slug(branch) # Store per-reviewer, per-branch path_in_repo = f"reviews/{safe_name}/{safe_branch}.json" local_file = Path("/tmp") / f"{safe_name}_{safe_branch}.json" existing = _download_json_if_exists(path_in_repo, local_file) review_data = { "timestamp": datetime.now().isoformat(), "annotator_id": annotator_name, "branch": branch, "area": area, "template": template_name, "scores": { "physical_plausibility": scores[0], "mathematical_correctness": scores[1], "pedagogical_clarity": scores[2], }, "decision": decision, "feedback": feedback, } existing.append(review_data) local_file.write_text(json.dumps(existing, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") api.upload_file( path_or_fileobj=str(local_file), path_in_repo=path_in_repo, repo_id=DATASET_REPO, repo_type="dataset", commit_message=f"Add review: {safe_name} ({safe_branch})", ) return f"hf://datasets/{DATASET_REPO}/{path_in_repo}"