Engchain-annotator / src /storage.py
Usmansafder's picture
Update src/storage.py
28b66d4 verified
# src/storage.py
import json
import os
from datetime import datetime
from pathlib import Path
from huggingface_hub import HfApi, hf_hub_download
DATASET_REPO = "Usmansafder/engchain-annotations"
HF_TOKEN = os.environ.get("HF_TOKEN")
if not HF_TOKEN:
raise RuntimeError(
"HF_TOKEN not set. Add it in Space Settings → Secrets with write access."
)
api = HfApi(token=HF_TOKEN)
def _safe_slug(s: str) -> str:
s = s.strip()
s = "".join(c for c in s if c.isalnum() or c in (" ", "_", "-"))
s = s.replace(" ", "_")
return s or "unknown"
def _download_json_if_exists(path_in_repo: str, local_path: Path) -> list:
"""
Returns list of existing reviews; empty list if file doesn't exist yet.
"""
try:
downloaded = hf_hub_download(
repo_id=DATASET_REPO,
filename=path_in_repo,
repo_type="dataset",
token=HF_TOKEN,
)
local_path.write_text(Path(downloaded).read_text(encoding="utf-8"), encoding="utf-8")
data = json.loads(local_path.read_text(encoding="utf-8"))
return data if isinstance(data, list) else []
except Exception:
return []
def save_review(annotator_name, branch, area, template_name, scores, decision, feedback):
safe_name = _safe_slug(annotator_name)
safe_branch = _safe_slug(branch)
# Store per-reviewer, per-branch
path_in_repo = f"reviews/{safe_name}/{safe_branch}.json"
local_file = Path("/tmp") / f"{safe_name}_{safe_branch}.json"
existing = _download_json_if_exists(path_in_repo, local_file)
review_data = {
"timestamp": datetime.now().isoformat(),
"annotator_id": annotator_name,
"branch": branch,
"area": area,
"template": template_name,
"scores": {
"physical_plausibility": scores[0],
"mathematical_correctness": scores[1],
"pedagogical_clarity": scores[2],
},
"decision": decision,
"feedback": feedback,
}
existing.append(review_data)
local_file.write_text(json.dumps(existing, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
api.upload_file(
path_or_fileobj=str(local_file),
path_in_repo=path_in_repo,
repo_id=DATASET_REPO,
repo_type="dataset",
commit_message=f"Add review: {safe_name} ({safe_branch})",
)
return f"hf://datasets/{DATASET_REPO}/{path_in_repo}"