fdaudens commited on
Commit
c7c2e28
·
verified ·
1 Parent(s): 4b7eae4

Persist Sonic Caucus votes to private dataset

Browse files
Files changed (3) hide show
  1. app.py +13 -30
  2. requirements.txt +1 -0
  3. storage.py +119 -0
app.py CHANGED
@@ -1,6 +1,5 @@
1
  from __future__ import annotations
2
 
3
- import csv
4
  import json
5
  import os
6
  import random
@@ -10,22 +9,12 @@ from pathlib import Path
10
 
11
  import gradio as gr
12
 
 
 
13
 
14
  ROOT = Path(__file__).resolve().parent
15
  MANIFEST_PATH = ROOT / "manifest.json"
16
  VOTES_PATH = ROOT / "votes.csv"
17
- VOTE_FIELDS = [
18
- "timestamp",
19
- "voter_id",
20
- "sample_id",
21
- "left_clip_id",
22
- "right_clip_id",
23
- "left_model_id",
24
- "right_model_id",
25
- "winner",
26
- "winner_model_id",
27
- "notes",
28
- ]
29
 
30
 
31
  def load_manifest() -> list[dict]:
@@ -81,8 +70,10 @@ def vote(
81
  else:
82
  winner_model_id = ""
83
 
 
84
  row = {
85
- "timestamp": datetime.now(timezone.utc).isoformat(timespec="seconds"),
 
86
  "voter_id": voter_id.strip(),
87
  "sample_id": left["sample_id"],
88
  "left_clip_id": left["clip_id"],
@@ -93,18 +84,12 @@ def vote(
93
  "winner_model_id": winner_model_id,
94
  "notes": notes.strip(),
95
  }
96
- append_vote(row)
97
  audio_a, audio_b, next_state, pair_status = choose_pair(sample_id)
98
- return audio_a, audio_b, next_state, f"Vote saved. {pair_status}", ""
99
-
100
-
101
- def append_vote(row: dict) -> None:
102
- exists = VOTES_PATH.exists()
103
- with VOTES_PATH.open("a", newline="") as handle:
104
- writer = csv.DictWriter(handle, fieldnames=VOTE_FIELDS)
105
- if not exists:
106
- writer.writeheader()
107
- writer.writerow(row)
108
 
109
 
110
  def load_results(admin_code: str) -> tuple[list[list], str | None]:
@@ -113,12 +98,10 @@ def load_results(admin_code: str) -> tuple[list[list], str | None]:
113
  raise gr.Error("Wrong admin code.")
114
  if not expected:
115
  raise gr.Error("Admin results are disabled until ARENA_ADMIN_CODE is set.")
116
- if not VOTES_PATH.exists():
 
117
  return [], None
118
 
119
- with VOTES_PATH.open() as handle:
120
- rows = list(csv.DictReader(handle))
121
-
122
  wins = Counter(row["winner_model_id"] for row in rows if row["winner_model_id"])
123
  appearances = defaultdict(int)
124
  labels = {clip["model_id"]: clip["label"] for clip in CLIPS}
@@ -136,7 +119,7 @@ def load_results(admin_code: str) -> tuple[list[list], str | None]:
136
  count,
137
  round(win_count / count, 3) if count else 0,
138
  ])
139
- return table, str(VOTES_PATH)
140
 
141
 
142
  with gr.Blocks(title="Sonic Caucus") as demo:
 
1
  from __future__ import annotations
2
 
 
3
  import json
4
  import os
5
  import random
 
9
 
10
  import gradio as gr
11
 
12
+ from storage import append_vote, load_vote_rows, new_vote_id
13
+
14
 
15
  ROOT = Path(__file__).resolve().parent
16
  MANIFEST_PATH = ROOT / "manifest.json"
17
  VOTES_PATH = ROOT / "votes.csv"
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  def load_manifest() -> list[dict]:
 
70
  else:
71
  winner_model_id = ""
72
 
73
+ now = datetime.now(timezone.utc)
74
  row = {
75
+ "timestamp": now.isoformat(timespec="seconds"),
76
+ "vote_id": new_vote_id(now),
77
  "voter_id": voter_id.strip(),
78
  "sample_id": left["sample_id"],
79
  "left_clip_id": left["clip_id"],
 
84
  "winner_model_id": winner_model_id,
85
  "notes": notes.strip(),
86
  }
87
+ persistence_error = append_vote(VOTES_PATH, row)
88
  audio_a, audio_b, next_state, pair_status = choose_pair(sample_id)
89
+ save_status = "Vote saved to persistent log."
90
+ if persistence_error:
91
+ save_status = f"Vote saved locally; persistent log unavailable ({persistence_error})."
92
+ return audio_a, audio_b, next_state, f"{save_status} {pair_status}", ""
 
 
 
 
 
 
93
 
94
 
95
  def load_results(admin_code: str) -> tuple[list[list], str | None]:
 
98
  raise gr.Error("Wrong admin code.")
99
  if not expected:
100
  raise gr.Error("Admin results are disabled until ARENA_ADMIN_CODE is set.")
101
+ rows, source = load_vote_rows(VOTES_PATH)
102
+ if not rows:
103
  return [], None
104
 
 
 
 
105
  wins = Counter(row["winner_model_id"] for row in rows if row["winner_model_id"])
106
  appearances = defaultdict(int)
107
  labels = {clip["model_id"]: clip["label"] for clip in CLIPS}
 
119
  count,
120
  round(win_count / count, 3) if count else 0,
121
  ])
122
+ return table, str(VOTES_PATH) if "local" in source else None
123
 
124
 
125
  with gr.Blocks(title="Sonic Caucus") as demo:
requirements.txt CHANGED
@@ -1 +1,2 @@
1
  gradio>=5.0,<7
 
 
1
  gradio>=5.0,<7
2
+ huggingface_hub>=0.25
storage.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ import json
5
+ import os
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+ from uuid import uuid4
9
+
10
+ from huggingface_hub import HfApi, hf_hub_download
11
+
12
+
13
+ VOTE_FIELDS = [
14
+ "timestamp",
15
+ "vote_id",
16
+ "voter_id",
17
+ "sample_id",
18
+ "left_clip_id",
19
+ "right_clip_id",
20
+ "left_model_id",
21
+ "right_model_id",
22
+ "winner",
23
+ "winner_model_id",
24
+ "notes",
25
+ ]
26
+
27
+
28
+ def new_vote_id(now: datetime | None = None) -> str:
29
+ now = now or datetime.now(timezone.utc)
30
+ return f"{now.strftime('%Y%m%dT%H%M%SZ')}_{uuid4().hex[:12]}"
31
+
32
+
33
+ def remote_vote_path(row: dict) -> str:
34
+ return f"votes/{row['sample_id']}/{row['vote_id']}.json"
35
+
36
+
37
+ def remote_config() -> tuple[str, str | None]:
38
+ repo_id = os.getenv("ARENA_VOTES_REPO", "fdaudens/sonic-caucus-votes").strip()
39
+ token = os.getenv("ARENA_HF_TOKEN") or os.getenv("HF_TOKEN")
40
+ return repo_id, token
41
+
42
+
43
+ def append_local_vote(votes_path: Path, row: dict) -> None:
44
+ exists = votes_path.exists()
45
+ with votes_path.open("a", newline="") as handle:
46
+ writer = csv.DictWriter(handle, fieldnames=VOTE_FIELDS)
47
+ if not exists:
48
+ writer.writeheader()
49
+ writer.writerow(row)
50
+
51
+
52
+ def append_remote_vote(row: dict) -> str | None:
53
+ repo_id, token = remote_config()
54
+ if not repo_id:
55
+ return "repo not configured"
56
+ if not token:
57
+ return "token not configured"
58
+
59
+ payload = json.dumps(row, indent=2, sort_keys=True).encode("utf-8")
60
+ try:
61
+ HfApi().upload_file(
62
+ path_or_fileobj=payload,
63
+ path_in_repo=remote_vote_path(row),
64
+ repo_id=repo_id,
65
+ repo_type="dataset",
66
+ token=token,
67
+ commit_message=f"Add Sonic Caucus vote {row['vote_id']}",
68
+ )
69
+ except Exception as exc:
70
+ return type(exc).__name__
71
+ return None
72
+
73
+
74
+ def append_vote(votes_path: Path, row: dict) -> str | None:
75
+ append_local_vote(votes_path, row)
76
+ return append_remote_vote(row)
77
+
78
+
79
+ def read_local_votes(votes_path: Path) -> list[dict]:
80
+ if not votes_path.exists():
81
+ return []
82
+ with votes_path.open() as handle:
83
+ return list(csv.DictReader(handle))
84
+
85
+
86
+ def read_remote_votes() -> tuple[list[dict], str | None]:
87
+ repo_id, token = remote_config()
88
+ if not repo_id:
89
+ return [], "repo not configured"
90
+ if not token:
91
+ return [], "token not configured"
92
+
93
+ api = HfApi()
94
+ try:
95
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=token)
96
+ vote_files = [name for name in files if name.startswith("votes/") and name.endswith(".json")]
97
+ rows = []
98
+ for filename in vote_files:
99
+ cached = hf_hub_download(repo_id=repo_id, filename=filename, repo_type="dataset", token=token)
100
+ rows.append(json.loads(Path(cached).read_text()))
101
+ except Exception as exc:
102
+ return [], type(exc).__name__
103
+ return rows, None
104
+
105
+
106
+ def load_vote_rows(votes_path: Path) -> tuple[list[dict], str]:
107
+ remote_rows, remote_error = read_remote_votes()
108
+ if remote_rows:
109
+ return remote_rows, "persistent Hugging Face dataset"
110
+
111
+ local_rows = read_local_votes(votes_path)
112
+ if local_rows:
113
+ if remote_error:
114
+ return local_rows, f"local CSV; remote unavailable ({remote_error})"
115
+ return local_rows, "local CSV"
116
+
117
+ if remote_error:
118
+ return [], f"no votes; remote unavailable ({remote_error})"
119
+ return [], "no votes"