Usmansafder commited on
Commit
28b66d4
·
verified ·
1 Parent(s): 51c78c4

Update src/storage.py

Browse files
Files changed (1) hide show
  1. src/storage.py +32 -50
src/storage.py CHANGED
@@ -3,68 +3,51 @@ import json
3
  import os
4
  from datetime import datetime
5
  from pathlib import Path
 
6
  from huggingface_hub import HfApi, hf_hub_download
7
 
8
- # ==============================
9
- # CONFIG
10
- # ==============================
11
  DATASET_REPO = "Usmansafder/engchain-annotations"
12
- DATA_FILE = "reviews.jsonl"
13
-
14
- HF_TOKEN = os.environ.get("HF_TOKEN") # must be set as Space secret
15
 
16
- if HF_TOKEN is None:
17
  raise RuntimeError(
18
- "HF_TOKEN environment variable not set. "
19
- "Add it as a Space secret with write access."
20
  )
21
 
22
  api = HfApi(token=HF_TOKEN)
23
 
24
- # ==============================
25
- # HELPERS
26
- # ==============================
27
- def _load_existing_reviews(tmp_path: Path) -> list:
28
- if not tmp_path.exists():
29
- return []
30
- with open(tmp_path, "r", encoding="utf-8") as f:
31
- return f.readlines()
32
 
33
- # ==============================
34
- # MAIN API
35
- # ==============================
36
- def save_review(
37
- annotator_name,
38
- branch,
39
- area,
40
- template_name,
41
- scores,
42
- decision,
43
- feedback,
44
- ):
45
  """
46
- Appends a single review to a JSONL file stored
47
- in a Hugging Face Dataset repository.
48
  """
49
-
50
- # Temporary local path
51
- local_file = Path("/tmp") / DATA_FILE
52
-
53
- # Download existing dataset file if it exists
54
  try:
55
  downloaded = hf_hub_download(
56
  repo_id=DATASET_REPO,
57
- filename=DATA_FILE,
58
  repo_type="dataset",
59
  token=HF_TOKEN,
60
  )
61
- local_file.write_text(
62
- Path(downloaded).read_text(encoding="utf-8"),
63
- encoding="utf-8",
64
- )
65
  except Exception:
66
- # File does not exist yet — first write
67
- pass
 
 
 
 
 
 
 
 
 
68
 
69
  review_data = {
70
  "timestamp": datetime.now().isoformat(),
@@ -81,17 +64,16 @@ def save_review(
81
  "feedback": feedback,
82
  }
83
 
84
- # Append new record
85
- with open(local_file, "a", encoding="utf-8") as f:
86
- f.write(json.dumps(review_data) + "\n")
87
 
88
- # Upload back to dataset repo
89
  api.upload_file(
90
  path_or_fileobj=str(local_file),
91
- path_in_repo=DATA_FILE,
92
  repo_id=DATASET_REPO,
93
  repo_type="dataset",
94
- commit_message=f"Add review by {annotator_name}",
95
  )
96
 
97
- return f"hf://datasets/{DATASET_REPO}/{DATA_FILE}"
 
3
  import os
4
  from datetime import datetime
5
  from pathlib import Path
6
+
7
  from huggingface_hub import HfApi, hf_hub_download
8
 
 
 
 
9
  DATASET_REPO = "Usmansafder/engchain-annotations"
10
+ HF_TOKEN = os.environ.get("HF_TOKEN")
 
 
11
 
12
+ if not HF_TOKEN:
13
  raise RuntimeError(
14
+ "HF_TOKEN not set. Add it in Space Settings → Secrets with write access."
 
15
  )
16
 
17
  api = HfApi(token=HF_TOKEN)
18
 
19
+ def _safe_slug(s: str) -> str:
20
+ s = s.strip()
21
+ s = "".join(c for c in s if c.isalnum() or c in (" ", "_", "-"))
22
+ s = s.replace(" ", "_")
23
+ return s or "unknown"
 
 
 
24
 
25
+ def _download_json_if_exists(path_in_repo: str, local_path: Path) -> list:
 
 
 
 
 
 
 
 
 
 
 
26
  """
27
+ Returns list of existing reviews; empty list if file doesn't exist yet.
 
28
  """
 
 
 
 
 
29
  try:
30
  downloaded = hf_hub_download(
31
  repo_id=DATASET_REPO,
32
+ filename=path_in_repo,
33
  repo_type="dataset",
34
  token=HF_TOKEN,
35
  )
36
+ local_path.write_text(Path(downloaded).read_text(encoding="utf-8"), encoding="utf-8")
37
+ data = json.loads(local_path.read_text(encoding="utf-8"))
38
+ return data if isinstance(data, list) else []
 
39
  except Exception:
40
+ return []
41
+
42
+ def save_review(annotator_name, branch, area, template_name, scores, decision, feedback):
43
+ safe_name = _safe_slug(annotator_name)
44
+ safe_branch = _safe_slug(branch)
45
+
46
+ # Store per-reviewer, per-branch
47
+ path_in_repo = f"reviews/{safe_name}/{safe_branch}.json"
48
+ local_file = Path("/tmp") / f"{safe_name}_{safe_branch}.json"
49
+
50
+ existing = _download_json_if_exists(path_in_repo, local_file)
51
 
52
  review_data = {
53
  "timestamp": datetime.now().isoformat(),
 
64
  "feedback": feedback,
65
  }
66
 
67
+ existing.append(review_data)
68
+
69
+ local_file.write_text(json.dumps(existing, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
70
 
 
71
  api.upload_file(
72
  path_or_fileobj=str(local_file),
73
+ path_in_repo=path_in_repo,
74
  repo_id=DATASET_REPO,
75
  repo_type="dataset",
76
+ commit_message=f"Add review: {safe_name} ({safe_branch})",
77
  )
78
 
79
+ return f"hf://datasets/{DATASET_REPO}/{path_in_repo}"