tung commited on
Commit
69061f2
·
1 Parent(s): 4bd9fc2

feat: switch to huggingface hub data

Browse files
Files changed (1) hide show
  1. app.py +146 -73
app.py CHANGED
@@ -1,66 +1,138 @@
1
  import os
 
2
  from datetime import datetime
 
3
 
4
  import gradio as gr
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # -----------------------------------------------------------------------------
8
- # Configuration – adjust these paths to point at your data location
9
  # -----------------------------------------------------------------------------
10
- DATA_PATH = "human_judgement/selected_samples.json" # CSV with columns: question, answer1, answer2
11
- RATINGS_PATH = (
12
- "human_judgement/human_judgement.csv" # File where user ratings will be appended
13
- )
 
 
 
14
 
15
  # -----------------------------------------------------------------------------
16
- # Helper functions
17
  # -----------------------------------------------------------------------------
18
 
19
 
20
  def load_data(path: str = DATA_PATH) -> pd.DataFrame:
21
- """Load the Q/A pairs once and cache them inside gradio runtime."""
22
  if not os.path.exists(path):
23
- raise FileNotFoundError(f"Could not find data file at {path}.")
 
 
24
  df = pd.read_json(path, lines=True)
25
- expected_cols = {"question", "response1", "response2"}
26
- if not expected_cols.issubset(df.columns):
27
- raise ValueError(f"CSV file must contain columns: {', '.join(expected_cols)}")
28
  return df
29
 
30
 
31
- def load_ratings(path: str = RATINGS_PATH) -> pd.DataFrame:
32
- """Load the ratings file (creates an empty one if absent)."""
33
- if os.path.exists(path):
34
- return pd.read_csv(path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  return pd.DataFrame(columns=["user_id", "row_index", "choice", "timestamp"])
36
 
37
 
38
- def save_rating(user_id: str, row_index: int, choice: int, path: str = RATINGS_PATH):
39
- """Append a single rating row to disk, avoiding accidental duplicates."""
40
- ratings = load_ratings(path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- # Prevent duplicate entries for the same user/question pair
 
 
43
  duplicate = (ratings.user_id == user_id) & (ratings.row_index == row_index)
44
  if duplicate.any():
45
- return # already stored, nothing to do
46
 
47
  new_entry = {
48
  "user_id": user_id,
49
  "row_index": row_index,
50
- "choice": choice, # 1 means answer1 preferred, 2 means answer2 preferred
51
  "timestamp": datetime.utcnow().isoformat(),
52
  }
53
  ratings = pd.concat([ratings, pd.DataFrame([new_entry])], ignore_index=True)
54
- ratings.to_csv(path, index=False)
55
 
56
 
57
  def get_next_unrated(df: pd.DataFrame, ratings: pd.DataFrame, user_id: str):
58
- """Return (row_index, question, answer1, answer2) or None if finished."""
59
- rated_indices = ratings.loc[ratings.user_id == user_id, "row_index"].tolist()
60
- unrated_df = df[~df.index.isin(rated_indices)]
61
- if unrated_df.empty:
62
  return None
63
- row = unrated_df.iloc[0]
64
  return row.name, row.question, row.response1, row.response2
65
 
66
 
@@ -70,25 +142,23 @@ def get_next_unrated(df: pd.DataFrame, ratings: pd.DataFrame, user_id: str):
70
 
71
 
72
  def start_or_resume(user_id: str, state_df):
73
- """Initialise or resume a session for a given user id."""
74
  if not user_id.strip():
75
  return (
76
- gr.update(visible=True),
77
- gr.update(visible=False),
78
- gr.update(visible=False),
79
  "",
80
  "",
81
  "",
82
- "",
83
- "Please enter a nonempty identifier to begin.",
84
  )
85
 
86
  ratings = load_ratings()
87
  record = get_next_unrated(state_df, ratings, user_id)
88
  if record is None:
89
- # Completed all tasks
90
  return (
91
- gr.update(visible=True),
92
  gr.update(visible=False),
93
  gr.update(visible=False),
94
  "",
@@ -100,10 +170,10 @@ def start_or_resume(user_id: str, state_df):
100
 
101
  idx, q, a1, a2 = record
102
  return (
103
- gr.update(visible=True), # keep user id input visible for reference
104
- gr.update(visible=True), # show evaluation section
105
- gr.update(visible=True), # enable submit button
106
- "**" + q + "**",
107
  a1,
108
  a2,
109
  str(idx),
@@ -112,10 +182,13 @@ def start_or_resume(user_id: str, state_df):
112
 
113
 
114
  def submit_preference(user_id: str, row_idx_str: str, choice: str, state_df):
115
- """Handle a single preference submission and load the next question."""
116
  if choice not in {"answer1", "answer2"}:
117
- return gr.update(
118
- value="Please choose either Answer 1 or Answer 2 before submitting."
 
 
 
 
119
  )
120
 
121
  row_idx = int(row_idx_str)
@@ -127,7 +200,7 @@ def submit_preference(user_id: str, row_idx_str: str, choice: str, state_df):
127
  return "", "", "", "", "🎉 You have evaluated every item – thank you!"
128
 
129
  idx, q, a1, a2 = record
130
- return "**" + q + "**", a1, a2, str(idx), ""
131
 
132
 
133
  # -----------------------------------------------------------------------------
@@ -138,44 +211,45 @@ def submit_preference(user_id: str, row_idx_str: str, choice: str, state_df):
138
  def build_demo():
139
  df = load_data()
140
 
 
 
 
 
 
 
 
 
 
 
 
141
  with gr.Blocks(title="Question/Answer Preference Rater") as demo:
 
 
142
  gr.Markdown(
143
- """# Q/A Preference Rater
144
- Enter your identifier below to start or resume your evaluation session. For every question, select which answer you prefer. Your progress is saved automatically so you can return at any time using the **same identifier**."""
145
  )
146
 
147
- state_df = gr.State(df) # keep dataset in memory for callbacks
148
  state_row_idx = gr.State("")
149
 
150
- # User identifier section
151
- id_input = gr.Textbox(
152
- label="User Identifier", placeholder="e.g. Alice", scale=3
153
- )
154
- start_btn = gr.Button("Start / Resume", scale=1)
155
 
156
- # Feedback / status message
157
- info_md = gr.Markdown("", visible=True)
158
 
159
- # Evaluation section (initially hidden)
160
  with gr.Column(visible=False) as eval_col:
161
- question_md = gr.Markdown("", label="Question")
162
  with gr.Row():
163
- # answer1_box = gr.Textbox(
164
- # label="Answer\u00a01", interactive=False, lines=10
165
- # )
166
- # answer2_box = gr.Textbox(
167
- # label="Answer\u00a02", interactive=False, lines=10
168
- # )
169
- answer1_box = gr.Markdown(label="Answer 1")
170
- answer2_box = gr.Markdown(label="Answer 2")
171
  choice_radio = gr.Radio(
172
- ["answer1", "answer2"],
173
- label="Which answer do you prefer?",
174
- interactive=True,
175
  )
176
  submit_btn = gr.Button("Submit Preference", visible=False)
177
 
178
- # Wire callbacks
179
  start_btn.click(
180
  fn=start_or_resume,
181
  inputs=[id_input, state_df],
@@ -184,8 +258,8 @@ def build_demo():
184
  eval_col,
185
  submit_btn,
186
  question_md,
187
- answer1_box,
188
- answer2_box,
189
  state_row_idx,
190
  info_md,
191
  ],
@@ -194,12 +268,11 @@ def build_demo():
194
  submit_btn.click(
195
  fn=submit_preference,
196
  inputs=[id_input, state_row_idx, choice_radio, state_df],
197
- outputs=[question_md, answer1_box, answer2_box, state_row_idx, info_md],
198
  )
199
 
200
  return demo
201
 
202
 
203
- # if __name__ == "__main__":
204
- # build_demo().launch()
205
- build_demo().launch()
 
1
  import os
2
+ import tempfile
3
  from datetime import datetime
4
+ from pathlib import Path
5
 
6
  import gradio as gr
7
  import pandas as pd
8
+ from huggingface_hub import HfApi, HfHubError, hf_hub_download
9
+
10
+ # ------------------------------------------------------------
11
+ # Cloud‑friendly Q/A preference rater for **Hugging Face Spaces**
12
+ # ------------------------------------------------------------
13
+ # This version swaps local CSV persistence for a tiny remote‑dataset
14
+ # workflow that works on Spaces:
15
+ # • Ratings are stored in (and loaded from) a lightweight **dataset
16
+ # repo** on the Hugging Face Hub – no local file system required.
17
+ # • The dataset repo is set via the `RATINGS_REPO` env‑var.
18
+ # • You must pass a write‑enabled token (env‑var `HF_TOKEN`) that has
19
+ # `write` permission on that dataset.
20
+ #
21
+ # Quick setup guide
22
+ # -----------------
23
+ # 1. Create a dataset repository to hold the ratings file, e.g.:
24
+ # https://huggingface.co/datasets/<org>/qa‑rater‑data
25
+ # 2. Inside **Space Settings ▸ Secrets**, add:
26
+ # • `RATINGS_REPO` → <org>/qa‑rater‑data
27
+ # • `HF_TOKEN` → a token with *Write* access to that repo
28
+ # 3. Add `huggingface‑hub` to your `requirements.txt` or
29
+ # `pip install huggingface‑hub` locally.
30
+ # 4. Deploy / push your updated Space – ratings will now persist in
31
+ # the dataset repo instead of the Space’s ephemeral storage.
32
+ # ------------------------------------------------------------
33
+
34
 
35
  # -----------------------------------------------------------------------------
36
+ # Configuration – constants & styling
37
  # -----------------------------------------------------------------------------
38
+ DATA_PATH = "human_judgement/selected_samples.json"
39
+ RATINGS_FILE = "human_judgement/human_judgement.csv" # Name *inside* the dataset repo
40
+ RATINGS_REPO = os.getenv("RATINGS_REPO") # e.g. "org/qa‑rater‑data"
41
+ HF_TOKEN = os.getenv("HF_TOKEN") # write token for that repo
42
+ MAX_HEIGHT_PX = 400 # Max visible height for answer Markdown blocks
43
+
44
+ api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
45
 
46
  # -----------------------------------------------------------------------------
47
+ # Helper functions – data I/O
48
  # -----------------------------------------------------------------------------
49
 
50
 
51
  def load_data(path: str = DATA_PATH) -> pd.DataFrame:
52
+ """Local read for the static Q/A CSV bundled with the Space repo."""
53
  if not os.path.exists(path):
54
+ raise FileNotFoundError(
55
+ f"Could not find data file at {path} – did you upload it?"
56
+ )
57
  df = pd.read_json(path, lines=True)
58
+ required = {"question", "response1", "response2"}
59
+ if not required.issubset(df.columns):
60
+ raise ValueError(f"CSV must contain columns: {', '.join(required)}")
61
  return df
62
 
63
 
64
+ # ---------- Rating persistence helpers ---------------------------------------
65
+
66
+
67
+ def _download_remote_ratings() -> Path | None:
68
+ """Try to fetch the current ratings file from the Hub; returns path or None."""
69
+ if not RATINGS_REPO:
70
+ return None
71
+ try:
72
+ return Path(
73
+ hf_hub_download(
74
+ repo_id=RATINGS_REPO,
75
+ filename=RATINGS_FILE,
76
+ repo_type="dataset",
77
+ token=HF_TOKEN,
78
+ cache_dir=tempfile.gettempdir(),
79
+ )
80
+ )
81
+ except HfHubError:
82
+ # File/repo may not exist yet – caller will create empty DF.
83
+ return None
84
+
85
+
86
+ def load_ratings() -> pd.DataFrame:
87
+ """Return ratings DataFrame from remote repo (or empty if none)."""
88
+ remote = _download_remote_ratings()
89
+ if remote and remote.exists():
90
+ return pd.read_csv(remote)
91
  return pd.DataFrame(columns=["user_id", "row_index", "choice", "timestamp"])
92
 
93
 
94
+ def _upload_remote_ratings(df: pd.DataFrame):
95
+ """Upload CSV to the dataset repo with a commit per save."""
96
+ if not (RATINGS_REPO and api):
97
+ # Running locally (dev) – save to a temp file for inspection.
98
+ df.to_csv(RATINGS_FILE, index=False)
99
+ return
100
+
101
+ with tempfile.TemporaryDirectory() as tmpdir:
102
+ csv_path = Path(tmpdir) / RATINGS_FILE
103
+ df.to_csv(csv_path, index=False)
104
+ api.upload_file(
105
+ path_or_fileobj=str(csv_path),
106
+ path_in_repo=RATINGS_FILE,
107
+ repo_id=RATINGS_REPO,
108
+ repo_type="dataset",
109
+ commit_message="Add/Update rating",
110
+ )
111
+
112
 
113
+ def save_rating(user_id: str, row_index: int, choice: int):
114
+ """Append a rating (deduplicated) and push to the Hub."""
115
+ ratings = load_ratings()
116
  duplicate = (ratings.user_id == user_id) & (ratings.row_index == row_index)
117
  if duplicate.any():
118
+ return # already stored
119
 
120
  new_entry = {
121
  "user_id": user_id,
122
  "row_index": row_index,
123
+ "choice": choice,
124
  "timestamp": datetime.utcnow().isoformat(),
125
  }
126
  ratings = pd.concat([ratings, pd.DataFrame([new_entry])], ignore_index=True)
127
+ _upload_remote_ratings(ratings)
128
 
129
 
130
  def get_next_unrated(df: pd.DataFrame, ratings: pd.DataFrame, user_id: str):
131
+ rated = ratings.loc[ratings.user_id == user_id, "row_index"].tolist()
132
+ unrated = df[~df.index.isin(rated)]
133
+ if unrated.empty:
 
134
  return None
135
+ row = unrated.iloc[0]
136
  return row.name, row.question, row.response1, row.response2
137
 
138
 
 
142
 
143
 
144
  def start_or_resume(user_id: str, state_df):
 
145
  if not user_id.strip():
146
  return (
147
+ gr.update(value=user_id, visible=True),
148
+ gr.update(visible=False), # eval_col
149
+ gr.update(visible=False), # submit_btn
150
  "",
151
  "",
152
  "",
153
+ "", # q, a1, a2, idx
154
+ "Please enter a non-empty identifier to begin.",
155
  )
156
 
157
  ratings = load_ratings()
158
  record = get_next_unrated(state_df, ratings, user_id)
159
  if record is None:
 
160
  return (
161
+ gr.update(value=user_id, visible=True),
162
  gr.update(visible=False),
163
  gr.update(visible=False),
164
  "",
 
170
 
171
  idx, q, a1, a2 = record
172
  return (
173
+ gr.update(value=user_id, visible=True),
174
+ gr.update(visible=True), # eval_col
175
+ gr.update(visible=True), # submit_btn
176
+ q,
177
  a1,
178
  a2,
179
  str(idx),
 
182
 
183
 
184
  def submit_preference(user_id: str, row_idx_str: str, choice: str, state_df):
 
185
  if choice not in {"answer1", "answer2"}:
186
+ return (
187
+ "",
188
+ "",
189
+ "",
190
+ "",
191
+ "Please choose either Answer 1 or Answer 2 before submitting.",
192
  )
193
 
194
  row_idx = int(row_idx_str)
 
200
  return "", "", "", "", "🎉 You have evaluated every item – thank you!"
201
 
202
  idx, q, a1, a2 = record
203
+ return q, a1, a2, str(idx), ""
204
 
205
 
206
  # -----------------------------------------------------------------------------
 
211
  def build_demo():
212
  df = load_data()
213
 
214
+ # CSS to constrain very tall answers
215
+ overflow_css = f"""
216
+ <style>
217
+ .answerbox {{
218
+ max-height: {MAX_HEIGHT_PX}px;
219
+ overflow-y: auto;
220
+ white-space: pre-wrap;
221
+ }}
222
+ </style>
223
+ """
224
+
225
  with gr.Blocks(title="Question/Answer Preference Rater") as demo:
226
+ gr.HTML(overflow_css)
227
+
228
  gr.Markdown(
229
+ """# Q/A Preference Rater\nEnter your identifier below to start or resume. For every question, select which answer you prefer. Your progress is saved automatically so you can return at any time using the same identifier."""
 
230
  )
231
 
232
+ state_df = gr.State(df)
233
  state_row_idx = gr.State("")
234
 
235
+ # Identifier input
236
+ id_input = gr.Textbox(label="User Identifier", placeholder="e.g. alice")
237
+ start_btn = gr.Button("Start / Resume")
 
 
238
 
239
+ info_md = gr.Markdown("")
 
240
 
241
+ # Evaluation widgets
242
  with gr.Column(visible=False) as eval_col:
243
+ question_md = gr.Markdown("")
244
  with gr.Row():
245
+ answer1_md = gr.Markdown(label="Answer 1", elem_classes=["answerbox"])
246
+ answer2_md = gr.Markdown(label="Answer 2", elem_classes=["answerbox"])
 
 
 
 
 
 
247
  choice_radio = gr.Radio(
248
+ ["answer1", "answer2"], label="Which answer do you prefer?"
 
 
249
  )
250
  submit_btn = gr.Button("Submit Preference", visible=False)
251
 
252
+ # Callbacks wiring
253
  start_btn.click(
254
  fn=start_or_resume,
255
  inputs=[id_input, state_df],
 
258
  eval_col,
259
  submit_btn,
260
  question_md,
261
+ answer1_md,
262
+ answer2_md,
263
  state_row_idx,
264
  info_md,
265
  ],
 
268
  submit_btn.click(
269
  fn=submit_preference,
270
  inputs=[id_input, state_row_idx, choice_radio, state_df],
271
+ outputs=[question_md, answer1_md, answer2_md, state_row_idx, info_md],
272
  )
273
 
274
  return demo
275
 
276
 
277
+ if __name__ == "__main__":
278
+ build_demo().launch()