Usmansafdarktk commited on
Commit
aeb316b
·
2 Parent(s): f0a2335 28b66d4

Merge branch 'main' of https://huggingface.co/spaces/Usmansafder/Engchain-annotator

Browse files
Files changed (2) hide show
  1. requirements.txt +2 -0
  2. src/storage.py +79 -46
requirements.txt CHANGED
@@ -2,3 +2,5 @@ streamlit>=1.30.0
2
  pandas
3
  numpy
4
  scipy
 
 
 
2
  pandas
3
  numpy
4
  scipy
5
+ huggingface_hub
6
+
src/storage.py CHANGED
@@ -1,46 +1,79 @@
1
- # src/storage.py
2
- import json
3
- from datetime import datetime
4
- from pathlib import Path
5
-
6
- def _get_review_dir() -> Path:
7
- """
8
- If Hugging Face Persistent Storage is enabled, it is mounted at /data.
9
- Otherwise fall back to repo-local ./reviews (ephemeral on Spaces).
10
- """
11
- persistent_root = Path("/data")
12
- if persistent_root.exists() and persistent_root.is_dir():
13
- return persistent_root / "reviews"
14
- return Path(__file__).parent.parent / "reviews"
15
-
16
- REVIEW_DIR = _get_review_dir()
17
- REVIEW_DIR.mkdir(parents=True, exist_ok=True)
18
-
19
- def save_review(annotator_name, branch, area, template_name, scores, decision, feedback):
20
- """
21
- Appends a single review to an annotator-specific JSONL file.
22
- """
23
- safe_name = "".join([c for c in annotator_name if c.isalnum() or c in (" ", "_")]).strip().replace(" ", "_")
24
- safe_branch = branch.replace(" ", "_")
25
-
26
- filename = REVIEW_DIR / f"{safe_name}_{safe_branch}.jsonl"
27
-
28
- review_data = {
29
- "timestamp": datetime.now().isoformat(),
30
- "annotator_id": annotator_name,
31
- "branch": branch,
32
- "area": area,
33
- "template": template_name,
34
- "scores": {
35
- "physical_plausibility": scores[0],
36
- "mathematical_correctness": scores[1],
37
- "pedagogical_clarity": scores[2],
38
- },
39
- "decision": decision,
40
- "feedback": feedback,
41
- }
42
-
43
- with open(filename, "a", encoding="utf-8") as f:
44
- f.write(json.dumps(review_data) + "\n")
45
-
46
- return str(filename)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/storage.py
2
+ import json
3
+ import os
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+
7
+ from huggingface_hub import HfApi, hf_hub_download
8
+
9
+ DATASET_REPO = "Usmansafder/engchain-annotations"
10
+ HF_TOKEN = os.environ.get("HF_TOKEN")
11
+
12
+ if not HF_TOKEN:
13
+ raise RuntimeError(
14
+ "HF_TOKEN not set. Add it in Space Settings → Secrets with write access."
15
+ )
16
+
17
+ api = HfApi(token=HF_TOKEN)
18
+
19
+ def _safe_slug(s: str) -> str:
20
+ s = s.strip()
21
+ s = "".join(c for c in s if c.isalnum() or c in (" ", "_", "-"))
22
+ s = s.replace(" ", "_")
23
+ return s or "unknown"
24
+
25
+ def _download_json_if_exists(path_in_repo: str, local_path: Path) -> list:
26
+ """
27
+ Returns list of existing reviews; empty list if file doesn't exist yet.
28
+ """
29
+ try:
30
+ downloaded = hf_hub_download(
31
+ repo_id=DATASET_REPO,
32
+ filename=path_in_repo,
33
+ repo_type="dataset",
34
+ token=HF_TOKEN,
35
+ )
36
+ local_path.write_text(Path(downloaded).read_text(encoding="utf-8"), encoding="utf-8")
37
+ data = json.loads(local_path.read_text(encoding="utf-8"))
38
+ return data if isinstance(data, list) else []
39
+ except Exception:
40
+ return []
41
+
42
+ def save_review(annotator_name, branch, area, template_name, scores, decision, feedback):
43
+ safe_name = _safe_slug(annotator_name)
44
+ safe_branch = _safe_slug(branch)
45
+
46
+ # Store per-reviewer, per-branch
47
+ path_in_repo = f"reviews/{safe_name}/{safe_branch}.json"
48
+ local_file = Path("/tmp") / f"{safe_name}_{safe_branch}.json"
49
+
50
+ existing = _download_json_if_exists(path_in_repo, local_file)
51
+
52
+ review_data = {
53
+ "timestamp": datetime.now().isoformat(),
54
+ "annotator_id": annotator_name,
55
+ "branch": branch,
56
+ "area": area,
57
+ "template": template_name,
58
+ "scores": {
59
+ "physical_plausibility": scores[0],
60
+ "mathematical_correctness": scores[1],
61
+ "pedagogical_clarity": scores[2],
62
+ },
63
+ "decision": decision,
64
+ "feedback": feedback,
65
+ }
66
+
67
+ existing.append(review_data)
68
+
69
+ local_file.write_text(json.dumps(existing, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
70
+
71
+ api.upload_file(
72
+ path_or_fileobj=str(local_file),
73
+ path_in_repo=path_in_repo,
74
+ repo_id=DATASET_REPO,
75
+ repo_type="dataset",
76
+ commit_message=f"Add review: {safe_name} ({safe_branch})",
77
+ )
78
+
79
+ return f"hf://datasets/{DATASET_REPO}/{path_in_repo}"