kaburia commited on
Commit
0aea417
·
1 Parent(s): 345476f

model justifications

Browse files
Files changed (1) hide show
  1. app.py +559 -159
app.py CHANGED
@@ -1,34 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import os
4
  from huggingface_hub import HfApi
5
- from datasets import load_dataset, Dataset
6
  import io
7
- # from dotenv import load_dotenv
8
-
9
- # # Load environment variables from a .env file (if present) and read HF token
10
- # load_dotenv()
11
- # HF_TOKEN = os.getenv("HF_TOKEN", "YOUR_HF_WRITE_TOKEN_HERE")
12
 
13
  # --- 1. CONFIGURATION ---
14
 
15
  # --- !!! NEW: DEBUG/TESTING MODE !!! ---
16
- # Set to True to use local CSV files instead of Hugging Face Hub
17
- # This will read from PREDICTIONS_CSV and read/write to LOCAL_DATASET_PATH
18
  DEBUG_TESTING = False
19
  LOCAL_DATASET_PATH = "policy_evaluations.csv"
20
- PREDICTIONS_CSV = "model_predictions.csv" # From batch_inference.py
21
- # --- End Debug Config ---
22
 
23
  HF = 'hf'
24
  token = 'pQQADyqfDNewBCejvPmyMGlzpdgqDFSAFE'
25
-
26
-
27
- HF_DATASET_REPO = "kaburia/policy-evaluations" # Your HF Dataset repo
28
  HF_TOKEN = HF + '_' + token
29
 
30
-
31
- # --- Email Authentication ---
32
  APPROVED_EMAILS = {
33
  "kaburiaaustin1@tahmo.org": "user1",
34
  "E.Ramos@tudelft.nl" : "user2",
@@ -41,43 +487,30 @@ APPROVED_EMAILS = {
41
  "H.F.Hagenaars@tudelft.nl" : "user9",
42
  }
43
 
44
- # --- Define Interaction Choices ---
45
  DRILL_DOWN_MAP = {
46
  "coherent": ["+3 Indivisible", "+2 Reinforcing", "+1 Enabling"],
47
  "neutral": ["0 Consistent"],
48
  "incoherent": ["-1 Constraining", "-2 Counteracting", "-3 Cancelling"]
49
  }
50
- ALL_DRILL_DOWN_CHOICES = DRILL_DOWN_MAP["coherent"] + DRILL_DOWN_MAP["neutral"] + DRILL_DOWN_MAP["incoherent"]
51
  VERIFY_CHOICES = ["neutral", "coherent", "incoherent"]
52
 
53
- # --- 2. DATA LOADING FUNCTIONS ---
54
 
55
  def load_data_from_hub(token):
56
- """
57
- (LIVE MODE) Loads the dataset from Hugging Face, converts to Pandas,
58
- and identifies pending rows.
59
- """
60
- if not token or token == "YOUR_HF_WRITE_TOKEN_HERE":
61
  return None, None, "Error: Hugging Face Token is not configured."
62
 
63
  try:
64
- # Load the dataset (which may be policy_evaluations.csv)
65
  ds = load_dataset(HF_DATASET_REPO, token=token, split="train", cache_dir="./cache")
66
  full_df = ds.to_pandas()
67
 
68
- # --- NEW LOGIC ---
69
- # Check for annotation columns and add them if they don't exist
70
  new_cols = ["UserVerifiedClass", "DrillDownInteraction", "AnnotatorUsername"]
71
  for col in new_cols:
72
  if col not in full_df.columns:
73
- print(f"Adding missing column to DataFrame: {col}")
74
  full_df[col] = pd.NA
75
- # --- END NEW LOGIC ---
76
 
77
- # Create a unique key
78
  full_df['key'] = full_df['PolicyA'].astype(str) + '||' + full_df['PolicyB'].astype(str)
79
-
80
- # Find rows that have NOT been annotated
81
  pending_df = full_df[full_df['UserVerifiedClass'].isnull()].reset_index(drop=True)
82
 
83
  status = f"Loaded {len(pending_df)} remaining items to annotate. ({len(full_df) - len(pending_df)} already complete) [LIVE: HF Hub]"
@@ -87,32 +520,23 @@ def load_data_from_hub(token):
87
  return None, None, f"Error loading dataset from Hub: {e}"
88
 
89
  def load_data_from_local():
90
- """
91
- (DEBUG MODE) Loads the dataset from a local CSV file.
92
- If it doesn't exist, it initializes it from 'model_predictions.csv'.
93
- """
94
  try:
95
  if not os.path.exists(LOCAL_DATASET_PATH):
96
- # First run: Initialize local file from predictions
97
  print(f"'{LOCAL_DATASET_PATH}' not found. Initializing from '{PREDICTIONS_CSV}'...")
98
  if not os.path.exists(PREDICTIONS_CSV):
99
  return None, None, f"Error: '{PREDICTIONS_CSV}' not found. Please run batch_inference.py first."
100
 
101
  df = pd.read_csv(PREDICTIONS_CSV)
102
- # --- FIX: Check for 'model_label' ---
103
  if "model_label" not in df.columns:
104
- return None, None, f"Error: '{PREDICTIONS_CSV}' is missing 'model_label' column. Please run batch_inference.py"
105
- # --- END FIX ---
106
  df["UserVerifiedClass"] = pd.NA
107
  df["DrillDownInteraction"] = pd.NA
108
  df["AnnotatorUsername"] = pd.NA
109
  df.to_csv(LOCAL_DATASET_PATH, index=False)
110
  print(f"Initialized '{LOCAL_DATASET_PATH}'.")
111
 
112
- # Load the (now existing) local file
113
  full_df = pd.read_csv(LOCAL_DATASET_PATH)
114
-
115
- # Ensure columns are present (for existing local files)
116
  new_cols = ["UserVerifiedClass", "DrillDownInteraction", "AnnotatorUsername"]
117
  for col in new_cols:
118
  if col not in full_df.columns:
@@ -127,134 +551,104 @@ def load_data_from_local():
127
  except Exception as e:
128
  return None, None, f"Error loading local dataset: {e}"
129
 
130
- # --- 3. DATA SAVING FUNCTIONS ---
131
 
132
  def save_annotation_to_hub(index, verified_class, drill_down, user_tag, token, full_df, pending_df):
133
- """
134
- (LIVE MODE) Updates the DataFrame and pushes the entire dataset back to the Hub.
135
- """
136
  if not drill_down:
137
  return {status_box: "Error: Please select a drill-down interaction."}
138
  if not user_tag:
139
  return {status_box: "Error: User tag is missing. Please re-login."}
140
 
141
  try:
142
- # 1. Get the unique key of the item we just annotated
143
  current_key = pending_df.loc[index, 'key']
144
-
145
- # 2. Update the *full* DataFrame with the annotation and user_tag
146
  full_df.loc[full_df['key'] == current_key, 'UserVerifiedClass'] = verified_class
147
  full_df.loc[full_df['key'] == current_key, 'DrillDownInteraction'] = drill_down
148
  full_df.loc[full_df['key'] == current_key, 'AnnotatorUsername'] = user_tag
149
 
150
- # --- NEW SAVE LOGIC ---
151
- # 3. Convert back to CSV format in memory
152
  csv_buffer = io.StringIO()
153
- # Drop the temporary 'key' column before saving
154
  full_df.drop(columns=['key']).to_csv(csv_buffer, index=False)
155
  csv_content_bytes = csv_buffer.getvalue().encode('utf-8')
156
 
157
- # 4. Upload using HfApi to overwrite the specific file
158
  api = HfApi()
159
  api.upload_file(
160
  path_or_fileobj=io.BytesIO(csv_content_bytes),
161
- path_in_repo="policy_evaluations.csv", # Explicitly overwrite this file
162
  repo_id=HF_DATASET_REPO,
163
  token=token,
164
  repo_type="dataset"
165
  )
166
- # --- END NEW SAVE LOGIC ---
167
 
168
  save_status = f"Saved to Hub: {verified_class} | {drill_down} by {user_tag}"
169
-
170
- # 5. Load the next item
171
- next_index = index + 1
172
- ui_updates = load_next_item(pending_df, next_index) # Pass pending_df
173
  ui_updates[status_box] = save_status
174
- ui_updates[full_df_state] = full_df # Store the updated full_df in state
175
  return ui_updates
176
 
177
  except Exception as e:
178
  return {status_box: f"Error saving to Hub: {e}"}
179
 
180
  def save_annotation_to_local(index, verified_class, drill_down, user_tag, full_df, pending_df):
181
- """
182
- (DEBUG MODE) Updates the DataFrame and saves it back to the local CSV.
183
- """
184
  if not drill_down:
185
  return {status_box: "Error: Please select a drill-down interaction."}
186
  if not user_tag:
187
  return {status_box: "Error: User tag is missing. Please re-login."}
188
 
189
  try:
190
- # 1. Get key
191
  current_key = pending_df.loc[index, 'key']
192
-
193
- # 2. Update full DataFrame
194
  full_df.loc[full_df['key'] == current_key, 'UserVerifiedClass'] = verified_class
195
  full_df.loc[full_df['key'] == current_key, 'DrillDownInteraction'] = drill_down
196
  full_df.loc[full_df['key'] == current_key, 'AnnotatorUsername'] = user_tag
197
 
198
- # 3. Save to local CSV (overwriting)
199
  full_df.drop(columns=['key']).to_csv(LOCAL_DATASET_PATH, index=False)
200
-
201
  save_status = f"Saved (Local): {verified_class} | {drill_down} by {user_tag}"
202
 
203
- # 4. Load next item
204
- next_index = index + 1
205
- ui_updates = load_next_item(pending_df, next_index)
206
  ui_updates[status_box] = save_status
207
- ui_updates[full_df_state] = full_df # Store updated df in state
208
  return ui_updates
209
 
210
  except Exception as e:
211
  return {status_box: f"Error saving locally: {e}"}
212
 
213
- # --- 4. GRADIO UI ---
214
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
215
  gr.Markdown("# Policy Coherence Annotation Tool")
216
  gr.Markdown(
217
  """
218
  Welcome! This tool is for human-in-the-loop annotation.
219
  1. Log in with your authorized email.
220
- 2. The model's prediction for two policies will be shown.
221
- 3. **Step 1:** Verify if the model's 3-class prediction (neutral, coherent, incoherent) is correct, or change it.
222
- 4. **Step 2:** Based on your verified choice, select a 7-class drill-down label.
223
- 5. Click 'Save & Next' to submit your annotation and load the next item.
224
-
225
- ---
226
- ### Drill-Down Definitions
227
- - **+3 Indivisible**: Inextricably linked to the achievement of another goal.
228
- - **+2 Reinforcing**: Aids the achievement of another goal.
229
- - **+1 Enabling**: Creates conditions that further another goal.
230
- - **0 Consistent**: No significant positive or negative interactions.
231
- - **-1 Constraining**: Limits options on another goal.
232
- - **-2 Counteracting**: Clashes with another goal.
233
- - **-3 Cancelling**: Makes it impossible to reach another goal.
234
  """
235
  )
236
 
237
- # --- State variables ---
238
  full_df_state = gr.State()
239
  pending_df_state = gr.State()
240
  current_index_state = gr.State(value=0)
241
  hf_token_state = gr.State()
242
  user_tag_state = gr.State()
243
 
244
- # --- Section 1: Login ---
245
  with gr.Group() as login_box:
246
  with gr.Row():
247
  email_box = gr.Textbox(label="Email", placeholder="Enter your authorized email...")
248
  login_btn = gr.Button("Login & Load Dataset", variant="primary")
249
  progress_bar = gr.Markdown(value="Waiting for login...")
250
 
251
- # --- Section 2: Annotation (hidden until loaded) ---
252
  with gr.Group(visible=False) as annotation_box:
253
- # --- MODIFIED: Use gr.Row for side-by-side table layout ---
254
  with gr.Row():
255
- policy_a_display = gr.Textbox(label="Policy / Objective A", interactive=False, lines=5, container=True)
256
- policy_b_display = gr.Textbox(label="Policy / Objective B", interactive=False, lines=5, container=True)
257
- # --- END MODIFICATION ---
 
 
 
 
 
 
 
 
258
 
259
  with gr.Row():
260
  model_confidence_label = gr.Label(label="Model Confidence")
@@ -264,36 +658,26 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
264
  info="The model's prediction is selected by default."
265
  )
266
 
267
- # --- UPDATED: Markdown instructions moved to top ---
268
-
269
  user_drill_down_dropdown = gr.Dropdown(
270
  label="Step 2: Drill-Down Interaction",
271
- choices=[], # Will be populated dynamically
272
  interactive=True
273
  )
274
 
275
- save_btn = gr.Button("Save & Next", variant="stop")
 
 
 
276
  status_box = gr.Textbox(label="Status", interactive=False)
277
 
278
- # --- 5. UI Event Handlers ---
279
 
280
  def update_drill_down_choices(verified_class):
281
- """
282
- Updates the drill-down dropdown based on the 3-class selection.
283
- """
284
  choices = DRILL_DOWN_MAP.get(verified_class, [])
285
- value = choices[0] if len(choices) == 1 else None # Auto-select "0 Consistent"
286
- # --- FIX: Return the constructor (Gradio 4.x syntax) ---
287
- return gr.Dropdown(
288
- choices=choices,
289
- value=value,
290
- interactive=len(choices) > 1 # Disable interaction if only one choice
291
- )
292
 
293
  def load_next_item(pending_df, index):
294
- """
295
- Loads the item at 'index' from the PENDING DataFrame into the UI.
296
- """
297
  if pending_df is None:
298
  return {status_box: "Data not loaded."}
299
 
@@ -303,75 +687,90 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
303
  progress_bar: gr.Markdown(f"**Annotation Complete! ({total_items} items total)**"),
304
  policy_a_display: "All items annotated.",
305
  policy_b_display: "",
 
306
  annotation_box: gr.Group(visible=False)
307
  }
308
 
309
  row = pending_df.iloc[index]
310
- # --- FIX: Use "model_label" from CSV ---
311
  model_pred = row["model_label"]
312
 
313
- # --- NEW: Build conf_dict conditionally ---
314
  if "model_confidence" in row:
315
- # New format: "model_label" + "model_confidence"
316
  confidence = row["model_confidence"]
317
  conf_dict = {}
318
-
319
- # Distribute probability
320
  remaining_prob = (1.0 - confidence) / 2.0
321
- for l in VERIFY_CHOICES: # ["neutral", "coherent", "incoherent"]
322
  if l == model_pred:
323
  conf_dict[l] = confidence
324
  else:
325
  conf_dict[l] = remaining_prob
326
  else:
327
- # Old format: "Confidence_Neutral", etc.
328
  conf_dict = {
329
  "neutral": row.get("Confidence_Neutral", 0.0),
330
  "coherent": row.get("Confidence_Coherent", 0.0),
331
  "incoherent": row.get("Confidence_Incoherent", 0.0)
332
  }
333
- # --- END NEW ---
334
-
335
- # --- NEW: Update drill-down based on model_pred ---
336
  drill_down_choices = DRILL_DOWN_MAP.get(model_pred, [])
337
  drill_down_value = drill_down_choices[0] if len(drill_down_choices) == 1 else None
338
  drill_down_interactive = len(drill_down_choices) > 1
339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  return {
341
  progress_bar: gr.Markdown(f"**Annotating Item {index + 1} of {total_items}**"),
342
  policy_a_display: row["PolicyA"],
343
  policy_b_display: row["PolicyB"],
 
344
  model_confidence_label: conf_dict,
345
  user_verified_radio: model_pred,
346
- # --- FIX: Return the constructor (Gradio 4.x syntax) ---
347
- user_drill_down_dropdown: gr.Dropdown(
348
- choices=drill_down_choices,
349
- value=drill_down_value,
350
- interactive=drill_down_interactive
351
- ),
352
  current_index_state: index,
353
  annotation_box: gr.Group(visible=True)
354
  }
355
 
356
- # When 'Login' is clicked:
357
  def login_and_load(email):
358
- # --- Authentication Step ---
359
  if email not in APPROVED_EMAILS:
360
  return {
361
  progress_bar: gr.Markdown(f"<font color='red'>Error: Email '{email}' is not authorized.</font>"),
362
  login_box: gr.Group(visible=True)
363
  }
364
 
365
- user_tag = APPROVED_EMAILS[email] # Get the tag (e.g., "user1")
366
 
367
- # --- NEW: Branching Logic for Debug/Live ---
368
  if DEBUG_TESTING:
369
- print("--- DEBUG MODE: Loading from local CSV ---")
370
  full_df, pending_df, status = load_data_from_local()
371
- token_to_store = "debug_mode" # Placeholder
372
  else:
373
- print("--- LIVE MODE: Loading from Hugging Face Hub ---")
374
- if HF_TOKEN == "YOUR_HF_WRITE_TOKEN_HERE" or not HF_TOKEN:
375
  return {
376
  progress_bar: gr.Markdown(f"<font color='red'>Error: App is not configured. HF_TOKEN is missing.</font>"),
377
  login_box: gr.Group(visible=True)
@@ -379,65 +778,69 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
379
  full_df, pending_df, status = load_data_from_hub(HF_TOKEN)
380
  token_to_store = HF_TOKEN
381
 
382
- # --- Common Logic ---
383
  if full_df is None:
384
  return {
385
  progress_bar: gr.Markdown(f"<font color='red'>{status}</font>"),
386
  login_box: gr.Group(visible=True)
387
  }
388
 
389
- # --- Load the first item ---
390
  first_item_updates = load_next_item(pending_df, 0)
391
 
392
- # --- Save all data to state and update UI ---
393
  first_item_updates[full_df_state] = full_df
394
  first_item_updates[pending_df_state] = pending_df
395
  first_item_updates[progress_bar] = f"Login successful as **{user_tag}**. {status}"
396
- first_item_updates[hf_token_state] = token_to_store # Save token/debug_flag to state
397
  first_item_updates[user_tag_state] = user_tag
398
- first_item_updates[login_box] = gr.Group(visible=False) # Hide login box
399
- first_item_updates[annotation_box] = gr.Group(visible=True) # Show annotation box
400
  return first_item_updates
401
 
402
  login_btn.click(
403
  fn=login_and_load,
404
- inputs=[email_box], # Input is ONLY the email box
405
  outputs=[
406
- progress_bar, policy_a_display, policy_b_display,
407
  model_confidence_label, user_verified_radio, user_drill_down_dropdown,
408
  current_index_state, annotation_box, login_box,
409
  full_df_state, pending_df_state, hf_token_state, user_tag_state, status_box
410
  ]
411
  )
412
 
413
- # --- NEW: Wrapper for Save Button ---
414
  def save_wrapper(index, verified_class, drill_down, user_tag, token, full_df, pending_df):
415
  if DEBUG_TESTING:
416
  return save_annotation_to_local(index, verified_class, drill_down, user_tag, full_df, pending_df)
417
  else:
418
  return save_annotation_to_hub(index, verified_class, drill_down, user_tag, token, full_df, pending_df)
419
 
420
- # --- NEW: Event listener for dynamic drill-down ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
  user_verified_radio.change(
422
  fn=update_drill_down_choices,
423
  inputs=user_verified_radio,
424
  outputs=user_drill_down_dropdown
425
  )
426
 
427
- # When 'Save & Next' is clicked
428
  save_btn.click(
429
- fn=save_wrapper, # Call the new wrapper function
430
  inputs=[
431
- current_index_state,
432
- user_verified_radio,
433
- user_drill_down_dropdown,
434
- user_tag_state, # Pass the user tag from state
435
- hf_token_state, # Pass the token from state
436
- full_df_state,
437
- pending_df_state
438
  ],
439
  outputs=[
440
- progress_bar, policy_a_display, policy_b_display,
441
  model_confidence_label, user_verified_radio, user_drill_down_dropdown,
442
  current_index_state, annotation_box, status_box, full_df_state
443
  ]
@@ -449,8 +852,5 @@ if __name__ == "__main__":
449
  print("--- RUNNING IN DEBUG MODE ---")
450
  print(f"--- Data will be read/written to '{LOCAL_DATASET_PATH}' ---")
451
  print("="*30 + "\n")
452
- elif HF_TOKEN == "YOUR_HF_WRITE_TOKEN_HERE":
453
- print("\n--- WARNING: HF_TOKEN NOT SET ---")
454
- print("Please edit 'annotation_app.py' and add your HF_TOKEN to the top.")
455
 
456
  demo.launch(debug=True, share=True)
 
1
+ # import gradio as gr
2
+ # import pandas as pd
3
+ # import os
4
+ # from huggingface_hub import HfApi
5
+ # from datasets import load_dataset, Dataset
6
+ # import io
7
+ # # from dotenv import load_dotenv
8
+
9
+ # # # Load environment variables from a .env file (if present) and read HF token
10
+ # # load_dotenv()
11
+ # # HF_TOKEN = os.getenv("HF_TOKEN", "YOUR_HF_WRITE_TOKEN_HERE")
12
+
13
+ # # --- 1. CONFIGURATION ---
14
+
15
+ # # --- !!! NEW: DEBUG/TESTING MODE !!! ---
16
+ # # Set to True to use local CSV files instead of Hugging Face Hub
17
+ # # This will read from PREDICTIONS_CSV and read/write to LOCAL_DATASET_PATH
18
+ # DEBUG_TESTING = False
19
+ # LOCAL_DATASET_PATH = "policy_evaluations.csv"
20
+ # PREDICTIONS_CSV = "model_predictions.csv" # From batch_inference.py
21
+ # # --- End Debug Config ---
22
+
23
+ # HF = 'hf'
24
+ # token = 'pQQADyqfDNewBCejvPmyMGlzpdgqDFSAFE'
25
+
26
+
27
+ # HF_DATASET_REPO = "kaburia/policy-evaluations" # Your HF Dataset repo
28
+ # HF_TOKEN = HF + '_' + token
29
+
30
+
31
+ # # --- Email Authentication ---
32
+ # APPROVED_EMAILS = {
33
+ # "kaburiaaustin1@tahmo.org": "user1",
34
+ # "E.Ramos@tudelft.nl" : "user2",
35
+ # "eunice.pramos@gmail.com" : "user3",
36
+ # "E.Abraham@tudelft.nl" : "user4",
37
+ # "dene.abv@gmail.com" : "user5",
38
+ # "rafatoufofana.abv@gmail.com" : "user6",
39
+ # "annorfrank@tahmo.org" : "user7",
40
+ # "n.marley@tahmo.org" : "user8",
41
+ # "H.F.Hagenaars@tudelft.nl" : "user9",
42
+ # }
43
+
44
+ # # --- Define Interaction Choices ---
45
+ # DRILL_DOWN_MAP = {
46
+ # "coherent": ["+3 Indivisible", "+2 Reinforcing", "+1 Enabling"],
47
+ # "neutral": ["0 Consistent"],
48
+ # "incoherent": ["-1 Constraining", "-2 Counteracting", "-3 Cancelling"]
49
+ # }
50
+ # ALL_DRILL_DOWN_CHOICES = DRILL_DOWN_MAP["coherent"] + DRILL_DOWN_MAP["neutral"] + DRILL_DOWN_MAP["incoherent"]
51
+ # VERIFY_CHOICES = ["neutral", "coherent", "incoherent"]
52
+
53
+ # # --- 2. DATA LOADING FUNCTIONS ---
54
+
55
+ # def load_data_from_hub(token):
56
+ # """
57
+ # (LIVE MODE) Loads the dataset from Hugging Face, converts to Pandas,
58
+ # and identifies pending rows.
59
+ # """
60
+ # if not token or token == "YOUR_HF_WRITE_TOKEN_HERE":
61
+ # return None, None, "Error: Hugging Face Token is not configured."
62
+
63
+ # try:
64
+ # # Load the dataset (which may be policy_evaluations.csv)
65
+ # ds = load_dataset(HF_DATASET_REPO, token=token, split="train", cache_dir="./cache")
66
+ # full_df = ds.to_pandas()
67
+
68
+ # # --- NEW LOGIC ---
69
+ # # Check for annotation columns and add them if they don't exist
70
+ # new_cols = ["UserVerifiedClass", "DrillDownInteraction", "AnnotatorUsername"]
71
+ # for col in new_cols:
72
+ # if col not in full_df.columns:
73
+ # print(f"Adding missing column to DataFrame: {col}")
74
+ # full_df[col] = pd.NA
75
+ # # --- END NEW LOGIC ---
76
+
77
+ # # Create a unique key
78
+ # full_df['key'] = full_df['PolicyA'].astype(str) + '||' + full_df['PolicyB'].astype(str)
79
+
80
+ # # Find rows that have NOT been annotated
81
+ # pending_df = full_df[full_df['UserVerifiedClass'].isnull()].reset_index(drop=True)
82
+
83
+ # status = f"Loaded {len(pending_df)} remaining items to annotate. ({len(full_df) - len(pending_df)} already complete) [LIVE: HF Hub]"
84
+ # return full_df, pending_df, status
85
+
86
+ # except Exception as e:
87
+ # return None, None, f"Error loading dataset from Hub: {e}"
88
+
89
+ # def load_data_from_local():
90
+ # """
91
+ # (DEBUG MODE) Loads the dataset from a local CSV file.
92
+ # If it doesn't exist, it initializes it from 'model_predictions.csv'.
93
+ # """
94
+ # try:
95
+ # if not os.path.exists(LOCAL_DATASET_PATH):
96
+ # # First run: Initialize local file from predictions
97
+ # print(f"'{LOCAL_DATASET_PATH}' not found. Initializing from '{PREDICTIONS_CSV}'...")
98
+ # if not os.path.exists(PREDICTIONS_CSV):
99
+ # return None, None, f"Error: '{PREDICTIONS_CSV}' not found. Please run batch_inference.py first."
100
+
101
+ # df = pd.read_csv(PREDICTIONS_CSV)
102
+ # # --- FIX: Check for 'model_label' ---
103
+ # if "model_label" not in df.columns:
104
+ # return None, None, f"Error: '{PREDICTIONS_CSV}' is missing 'model_label' column. Please run batch_inference.py"
105
+ # # --- END FIX ---
106
+ # df["UserVerifiedClass"] = pd.NA
107
+ # df["DrillDownInteraction"] = pd.NA
108
+ # df["AnnotatorUsername"] = pd.NA
109
+ # df.to_csv(LOCAL_DATASET_PATH, index=False)
110
+ # print(f"Initialized '{LOCAL_DATASET_PATH}'.")
111
+
112
+ # # Load the (now existing) local file
113
+ # full_df = pd.read_csv(LOCAL_DATASET_PATH)
114
+
115
+ # # Ensure columns are present (for existing local files)
116
+ # new_cols = ["UserVerifiedClass", "DrillDownInteraction", "AnnotatorUsername"]
117
+ # for col in new_cols:
118
+ # if col not in full_df.columns:
119
+ # full_df[col] = pd.NA
120
+
121
+ # full_df['key'] = full_df['PolicyA'].astype(str) + '||' + full_df['PolicyB'].astype(str)
122
+ # pending_df = full_df[full_df['UserVerifiedClass'].isnull()].reset_index(drop=True)
123
+
124
+ # status = f"Loaded {len(pending_df)} remaining items to annotate. ({len(full_df) - len(pending_df)} complete) [DEBUG: Local CSV]"
125
+ # return full_df, pending_df, status
126
+
127
+ # except Exception as e:
128
+ # return None, None, f"Error loading local dataset: {e}"
129
+
130
+ # # --- 3. DATA SAVING FUNCTIONS ---
131
+
132
+ # def save_annotation_to_hub(index, verified_class, drill_down, user_tag, token, full_df, pending_df):
133
+ # """
134
+ # (LIVE MODE) Updates the DataFrame and pushes the entire dataset back to the Hub.
135
+ # """
136
+ # if not drill_down:
137
+ # return {status_box: "Error: Please select a drill-down interaction."}
138
+ # if not user_tag:
139
+ # return {status_box: "Error: User tag is missing. Please re-login."}
140
+
141
+ # try:
142
+ # # 1. Get the unique key of the item we just annotated
143
+ # current_key = pending_df.loc[index, 'key']
144
+
145
+ # # 2. Update the *full* DataFrame with the annotation and user_tag
146
+ # full_df.loc[full_df['key'] == current_key, 'UserVerifiedClass'] = verified_class
147
+ # full_df.loc[full_df['key'] == current_key, 'DrillDownInteraction'] = drill_down
148
+ # full_df.loc[full_df['key'] == current_key, 'AnnotatorUsername'] = user_tag
149
+
150
+ # # --- NEW SAVE LOGIC ---
151
+ # # 3. Convert back to CSV format in memory
152
+ # csv_buffer = io.StringIO()
153
+ # # Drop the temporary 'key' column before saving
154
+ # full_df.drop(columns=['key']).to_csv(csv_buffer, index=False)
155
+ # csv_content_bytes = csv_buffer.getvalue().encode('utf-8')
156
+
157
+ # # 4. Upload using HfApi to overwrite the specific file
158
+ # api = HfApi()
159
+ # api.upload_file(
160
+ # path_or_fileobj=io.BytesIO(csv_content_bytes),
161
+ # path_in_repo="policy_evaluations.csv", # Explicitly overwrite this file
162
+ # repo_id=HF_DATASET_REPO,
163
+ # token=token,
164
+ # repo_type="dataset"
165
+ # )
166
+ # # --- END NEW SAVE LOGIC ---
167
+
168
+ # save_status = f"Saved to Hub: {verified_class} | {drill_down} by {user_tag}"
169
+
170
+ # # 5. Load the next item
171
+ # next_index = index + 1
172
+ # ui_updates = load_next_item(pending_df, next_index) # Pass pending_df
173
+ # ui_updates[status_box] = save_status
174
+ # ui_updates[full_df_state] = full_df # Store the updated full_df in state
175
+ # return ui_updates
176
+
177
+ # except Exception as e:
178
+ # return {status_box: f"Error saving to Hub: {e}"}
179
+
180
+ # def save_annotation_to_local(index, verified_class, drill_down, user_tag, full_df, pending_df):
181
+ # """
182
+ # (DEBUG MODE) Updates the DataFrame and saves it back to the local CSV.
183
+ # """
184
+ # if not drill_down:
185
+ # return {status_box: "Error: Please select a drill-down interaction."}
186
+ # if not user_tag:
187
+ # return {status_box: "Error: User tag is missing. Please re-login."}
188
+
189
+ # try:
190
+ # # 1. Get key
191
+ # current_key = pending_df.loc[index, 'key']
192
+
193
+ # # 2. Update full DataFrame
194
+ # full_df.loc[full_df['key'] == current_key, 'UserVerifiedClass'] = verified_class
195
+ # full_df.loc[full_df['key'] == current_key, 'DrillDownInteraction'] = drill_down
196
+ # full_df.loc[full_df['key'] == current_key, 'AnnotatorUsername'] = user_tag
197
+
198
+ # # 3. Save to local CSV (overwriting)
199
+ # full_df.drop(columns=['key']).to_csv(LOCAL_DATASET_PATH, index=False)
200
+
201
+ # save_status = f"Saved (Local): {verified_class} | {drill_down} by {user_tag}"
202
+
203
+ # # 4. Load next item
204
+ # next_index = index + 1
205
+ # ui_updates = load_next_item(pending_df, next_index)
206
+ # ui_updates[status_box] = save_status
207
+ # ui_updates[full_df_state] = full_df # Store updated df in state
208
+ # return ui_updates
209
+
210
+ # except Exception as e:
211
+ # return {status_box: f"Error saving locally: {e}"}
212
+
213
+ # # --- 4. GRADIO UI ---
214
+ # with gr.Blocks(theme=gr.themes.Soft()) as demo:
215
+ # gr.Markdown("# Policy Coherence Annotation Tool")
216
+ # gr.Markdown(
217
+ # """
218
+ # Welcome! This tool is for human-in-the-loop annotation.
219
+ # 1. Log in with your authorized email.
220
+ # 2. The model's prediction for two policies will be shown.
221
+ # 3. **Step 1:** Verify if the model's 3-class prediction (neutral, coherent, incoherent) is correct, or change it.
222
+ # 4. **Step 2:** Based on your verified choice, select a 7-class drill-down label.
223
+ # 5. Click 'Save & Next' to submit your annotation and load the next item.
224
+
225
+ # ---
226
+ # ### Drill-Down Definitions
227
+ # - **+3 Indivisible**: Inextricably linked to the achievement of another goal.
228
+ # - **+2 Reinforcing**: Aids the achievement of another goal.
229
+ # - **+1 Enabling**: Creates conditions that further another goal.
230
+ # - **0 Consistent**: No significant positive or negative interactions.
231
+ # - **-1 Constraining**: Limits options on another goal.
232
+ # - **-2 Counteracting**: Clashes with another goal.
233
+ # - **-3 Cancelling**: Makes it impossible to reach another goal.
234
+ # """
235
+ # )
236
+
237
+ # # --- State variables ---
238
+ # full_df_state = gr.State()
239
+ # pending_df_state = gr.State()
240
+ # current_index_state = gr.State(value=0)
241
+ # hf_token_state = gr.State()
242
+ # user_tag_state = gr.State()
243
+
244
+ # # --- Section 1: Login ---
245
+ # with gr.Group() as login_box:
246
+ # with gr.Row():
247
+ # email_box = gr.Textbox(label="Email", placeholder="Enter your authorized email...")
248
+ # login_btn = gr.Button("Login & Load Dataset", variant="primary")
249
+ # progress_bar = gr.Markdown(value="Waiting for login...")
250
+
251
+ # # --- Section 2: Annotation (hidden until loaded) ---
252
+ # with gr.Group(visible=False) as annotation_box:
253
+ # # --- MODIFIED: Use gr.Row for side-by-side table layout ---
254
+ # with gr.Row():
255
+ # policy_a_display = gr.Textbox(label="Policy / Objective A", interactive=False, lines=5, container=True)
256
+ # policy_b_display = gr.Textbox(label="Policy / Objective B", interactive=False, lines=5, container=True)
257
+ # # --- END MODIFICATION ---
258
+
259
+ # with gr.Row():
260
+ # model_confidence_label = gr.Label(label="Model Confidence")
261
+ # user_verified_radio = gr.Radio(
262
+ # label="Step 1: Verify/Correct Classification",
263
+ # choices=VERIFY_CHOICES,
264
+ # info="The model's prediction is selected by default."
265
+ # )
266
+
267
+ # # --- UPDATED: Markdown instructions moved to top ---
268
+
269
+ # user_drill_down_dropdown = gr.Dropdown(
270
+ # label="Step 2: Drill-Down Interaction",
271
+ # choices=[], # Will be populated dynamically
272
+ # interactive=True
273
+ # )
274
+
275
+ # save_btn = gr.Button("Save & Next", variant="stop")
276
+ # status_box = gr.Textbox(label="Status", interactive=False)
277
+
278
+ # # --- 5. UI Event Handlers ---
279
+
280
+ # def update_drill_down_choices(verified_class):
281
+ # """
282
+ # Updates the drill-down dropdown based on the 3-class selection.
283
+ # """
284
+ # choices = DRILL_DOWN_MAP.get(verified_class, [])
285
+ # value = choices[0] if len(choices) == 1 else None # Auto-select "0 Consistent"
286
+ # # --- FIX: Return the constructor (Gradio 4.x syntax) ---
287
+ # return gr.Dropdown(
288
+ # choices=choices,
289
+ # value=value,
290
+ # interactive=len(choices) > 1 # Disable interaction if only one choice
291
+ # )
292
+
293
+ # def load_next_item(pending_df, index):
294
+ # """
295
+ # Loads the item at 'index' from the PENDING DataFrame into the UI.
296
+ # """
297
+ # if pending_df is None:
298
+ # return {status_box: "Data not loaded."}
299
+
300
+ # total_items = len(pending_df)
301
+ # if index >= total_items:
302
+ # return {
303
+ # progress_bar: gr.Markdown(f"**Annotation Complete! ({total_items} items total)**"),
304
+ # policy_a_display: "All items annotated.",
305
+ # policy_b_display: "",
306
+ # annotation_box: gr.Group(visible=False)
307
+ # }
308
+
309
+ # row = pending_df.iloc[index]
310
+ # # --- FIX: Use "model_label" from CSV ---
311
+ # model_pred = row["model_label"]
312
+
313
+ # # --- NEW: Build conf_dict conditionally ---
314
+ # if "model_confidence" in row:
315
+ # # New format: "model_label" + "model_confidence"
316
+ # confidence = row["model_confidence"]
317
+ # conf_dict = {}
318
+
319
+ # # Distribute probability
320
+ # remaining_prob = (1.0 - confidence) / 2.0
321
+ # for l in VERIFY_CHOICES: # ["neutral", "coherent", "incoherent"]
322
+ # if l == model_pred:
323
+ # conf_dict[l] = confidence
324
+ # else:
325
+ # conf_dict[l] = remaining_prob
326
+ # else:
327
+ # # Old format: "Confidence_Neutral", etc.
328
+ # conf_dict = {
329
+ # "neutral": row.get("Confidence_Neutral", 0.0),
330
+ # "coherent": row.get("Confidence_Coherent", 0.0),
331
+ # "incoherent": row.get("Confidence_Incoherent", 0.0)
332
+ # }
333
+ # # --- END NEW ---
334
+
335
+ # # --- NEW: Update drill-down based on model_pred ---
336
+ # drill_down_choices = DRILL_DOWN_MAP.get(model_pred, [])
337
+ # drill_down_value = drill_down_choices[0] if len(drill_down_choices) == 1 else None
338
+ # drill_down_interactive = len(drill_down_choices) > 1
339
+
340
+ # return {
341
+ # progress_bar: gr.Markdown(f"**Annotating Item {index + 1} of {total_items}**"),
342
+ # policy_a_display: row["PolicyA"],
343
+ # policy_b_display: row["PolicyB"],
344
+ # model_confidence_label: conf_dict,
345
+ # user_verified_radio: model_pred,
346
+ # # --- FIX: Return the constructor (Gradio 4.x syntax) ---
347
+ # user_drill_down_dropdown: gr.Dropdown(
348
+ # choices=drill_down_choices,
349
+ # value=drill_down_value,
350
+ # interactive=drill_down_interactive
351
+ # ),
352
+ # current_index_state: index,
353
+ # annotation_box: gr.Group(visible=True)
354
+ # }
355
+
356
+ # # When 'Login' is clicked:
357
+ # def login_and_load(email):
358
+ # # --- Authentication Step ---
359
+ # if email not in APPROVED_EMAILS:
360
+ # return {
361
+ # progress_bar: gr.Markdown(f"<font color='red'>Error: Email '{email}' is not authorized.</font>"),
362
+ # login_box: gr.Group(visible=True)
363
+ # }
364
+
365
+ # user_tag = APPROVED_EMAILS[email] # Get the tag (e.g., "user1")
366
+
367
+ # # --- NEW: Branching Logic for Debug/Live ---
368
+ # if DEBUG_TESTING:
369
+ # print("--- DEBUG MODE: Loading from local CSV ---")
370
+ # full_df, pending_df, status = load_data_from_local()
371
+ # token_to_store = "debug_mode" # Placeholder
372
+ # else:
373
+ # print("--- LIVE MODE: Loading from Hugging Face Hub ---")
374
+ # if HF_TOKEN == "YOUR_HF_WRITE_TOKEN_HERE" or not HF_TOKEN:
375
+ # return {
376
+ # progress_bar: gr.Markdown(f"<font color='red'>Error: App is not configured. HF_TOKEN is missing.</font>"),
377
+ # login_box: gr.Group(visible=True)
378
+ # }
379
+ # full_df, pending_df, status = load_data_from_hub(HF_TOKEN)
380
+ # token_to_store = HF_TOKEN
381
+
382
+ # # --- Common Logic ---
383
+ # if full_df is None:
384
+ # return {
385
+ # progress_bar: gr.Markdown(f"<font color='red'>{status}</font>"),
386
+ # login_box: gr.Group(visible=True)
387
+ # }
388
+
389
+ # # --- Load the first item ---
390
+ # first_item_updates = load_next_item(pending_df, 0)
391
+
392
+ # # --- Save all data to state and update UI ---
393
+ # first_item_updates[full_df_state] = full_df
394
+ # first_item_updates[pending_df_state] = pending_df
395
+ # first_item_updates[progress_bar] = f"Login successful as **{user_tag}**. {status}"
396
+ # first_item_updates[hf_token_state] = token_to_store # Save token/debug_flag to state
397
+ # first_item_updates[user_tag_state] = user_tag
398
+ # first_item_updates[login_box] = gr.Group(visible=False) # Hide login box
399
+ # first_item_updates[annotation_box] = gr.Group(visible=True) # Show annotation box
400
+ # return first_item_updates
401
+
402
+ # login_btn.click(
403
+ # fn=login_and_load,
404
+ # inputs=[email_box], # Input is ONLY the email box
405
+ # outputs=[
406
+ # progress_bar, policy_a_display, policy_b_display,
407
+ # model_confidence_label, user_verified_radio, user_drill_down_dropdown,
408
+ # current_index_state, annotation_box, login_box,
409
+ # full_df_state, pending_df_state, hf_token_state, user_tag_state, status_box
410
+ # ]
411
+ # )
412
+
413
+ # # --- NEW: Wrapper for Save Button ---
414
+ # def save_wrapper(index, verified_class, drill_down, user_tag, token, full_df, pending_df):
415
+ # if DEBUG_TESTING:
416
+ # return save_annotation_to_local(index, verified_class, drill_down, user_tag, full_df, pending_df)
417
+ # else:
418
+ # return save_annotation_to_hub(index, verified_class, drill_down, user_tag, token, full_df, pending_df)
419
+
420
+ # # --- NEW: Event listener for dynamic drill-down ---
421
+ # user_verified_radio.change(
422
+ # fn=update_drill_down_choices,
423
+ # inputs=user_verified_radio,
424
+ # outputs=user_drill_down_dropdown
425
+ # )
426
+
427
+ # # When 'Save & Next' is clicked
428
+ # save_btn.click(
429
+ # fn=save_wrapper, # Call the new wrapper function
430
+ # inputs=[
431
+ # current_index_state,
432
+ # user_verified_radio,
433
+ # user_drill_down_dropdown,
434
+ # user_tag_state, # Pass the user tag from state
435
+ # hf_token_state, # Pass the token from state
436
+ # full_df_state,
437
+ # pending_df_state
438
+ # ],
439
+ # outputs=[
440
+ # progress_bar, policy_a_display, policy_b_display,
441
+ # model_confidence_label, user_verified_radio, user_drill_down_dropdown,
442
+ # current_index_state, annotation_box, status_box, full_df_state
443
+ # ]
444
+ # )
445
+
446
+ # if __name__ == "__main__":
447
+ # if DEBUG_TESTING:
448
+ # print("\n" + "="*30)
449
+ # print("--- RUNNING IN DEBUG MODE ---")
450
+ # print(f"--- Data will be read/written to '{LOCAL_DATASET_PATH}' ---")
451
+ # print("="*30 + "\n")
452
+ # elif HF_TOKEN == "YOUR_HF_WRITE_TOKEN_HERE":
453
+ # print("\n--- WARNING: HF_TOKEN NOT SET ---")
454
+ # print("Please edit 'annotation_app.py' and add your HF_TOKEN to the top.")
455
+
456
+ # demo.launch(debug=True, share=True)
457
  import gradio as gr
458
  import pandas as pd
459
  import os
460
  from huggingface_hub import HfApi
461
+ from datasets import load_dataset
462
  import io
463
+ import ast # <-- CHANGED: Using ast for safe evaluation of stringified lists
 
 
 
 
464
 
465
  # --- 1. CONFIGURATION ---
466
 
467
  # --- !!! NEW: DEBUG/TESTING MODE !!! ---
 
 
468
  DEBUG_TESTING = False
469
  LOCAL_DATASET_PATH = "policy_evaluations.csv"
470
+ PREDICTIONS_CSV = "model_predictions.csv"
 
471
 
472
  HF = 'hf'
473
  token = 'pQQADyqfDNewBCejvPmyMGlzpdgqDFSAFE'
474
+ HF_DATASET_REPO = "kaburia/policy-evaluations"
 
 
475
  HF_TOKEN = HF + '_' + token
476
 
477
+ # Email Authentication
 
478
  APPROVED_EMAILS = {
479
  "kaburiaaustin1@tahmo.org": "user1",
480
  "E.Ramos@tudelft.nl" : "user2",
 
487
  "H.F.Hagenaars@tudelft.nl" : "user9",
488
  }
489
 
490
+ # Define Interaction Choices
491
  DRILL_DOWN_MAP = {
492
  "coherent": ["+3 Indivisible", "+2 Reinforcing", "+1 Enabling"],
493
  "neutral": ["0 Consistent"],
494
  "incoherent": ["-1 Constraining", "-2 Counteracting", "-3 Cancelling"]
495
  }
 
496
  VERIFY_CHOICES = ["neutral", "coherent", "incoherent"]
497
 
498
+ # DATA LOADING FUNCTIONS
499
 
500
  def load_data_from_hub(token):
501
+ if not token:
 
 
 
 
502
  return None, None, "Error: Hugging Face Token is not configured."
503
 
504
  try:
 
505
  ds = load_dataset(HF_DATASET_REPO, token=token, split="train", cache_dir="./cache")
506
  full_df = ds.to_pandas()
507
 
 
 
508
  new_cols = ["UserVerifiedClass", "DrillDownInteraction", "AnnotatorUsername"]
509
  for col in new_cols:
510
  if col not in full_df.columns:
 
511
  full_df[col] = pd.NA
 
512
 
 
513
  full_df['key'] = full_df['PolicyA'].astype(str) + '||' + full_df['PolicyB'].astype(str)
 
 
514
  pending_df = full_df[full_df['UserVerifiedClass'].isnull()].reset_index(drop=True)
515
 
516
  status = f"Loaded {len(pending_df)} remaining items to annotate. ({len(full_df) - len(pending_df)} already complete) [LIVE: HF Hub]"
 
520
  return None, None, f"Error loading dataset from Hub: {e}"
521
 
522
  def load_data_from_local():
 
 
 
 
523
  try:
524
  if not os.path.exists(LOCAL_DATASET_PATH):
 
525
  print(f"'{LOCAL_DATASET_PATH}' not found. Initializing from '{PREDICTIONS_CSV}'...")
526
  if not os.path.exists(PREDICTIONS_CSV):
527
  return None, None, f"Error: '{PREDICTIONS_CSV}' not found. Please run batch_inference.py first."
528
 
529
  df = pd.read_csv(PREDICTIONS_CSV)
 
530
  if "model_label" not in df.columns:
531
+ return None, None, f"Error: '{PREDICTIONS_CSV}' is missing 'model_label' column."
532
+
533
  df["UserVerifiedClass"] = pd.NA
534
  df["DrillDownInteraction"] = pd.NA
535
  df["AnnotatorUsername"] = pd.NA
536
  df.to_csv(LOCAL_DATASET_PATH, index=False)
537
  print(f"Initialized '{LOCAL_DATASET_PATH}'.")
538
 
 
539
  full_df = pd.read_csv(LOCAL_DATASET_PATH)
 
 
540
  new_cols = ["UserVerifiedClass", "DrillDownInteraction", "AnnotatorUsername"]
541
  for col in new_cols:
542
  if col not in full_df.columns:
 
551
  except Exception as e:
552
  return None, None, f"Error loading local dataset: {e}"
553
 
554
+ # DATA SAVING FUNCTIONS
555
 
556
  def save_annotation_to_hub(index, verified_class, drill_down, user_tag, token, full_df, pending_df):
 
 
 
557
  if not drill_down:
558
  return {status_box: "Error: Please select a drill-down interaction."}
559
  if not user_tag:
560
  return {status_box: "Error: User tag is missing. Please re-login."}
561
 
562
  try:
 
563
  current_key = pending_df.loc[index, 'key']
 
 
564
  full_df.loc[full_df['key'] == current_key, 'UserVerifiedClass'] = verified_class
565
  full_df.loc[full_df['key'] == current_key, 'DrillDownInteraction'] = drill_down
566
  full_df.loc[full_df['key'] == current_key, 'AnnotatorUsername'] = user_tag
567
 
 
 
568
  csv_buffer = io.StringIO()
 
569
  full_df.drop(columns=['key']).to_csv(csv_buffer, index=False)
570
  csv_content_bytes = csv_buffer.getvalue().encode('utf-8')
571
 
 
572
  api = HfApi()
573
  api.upload_file(
574
  path_or_fileobj=io.BytesIO(csv_content_bytes),
575
+ path_in_repo="policy_evaluations.csv",
576
  repo_id=HF_DATASET_REPO,
577
  token=token,
578
  repo_type="dataset"
579
  )
 
580
 
581
  save_status = f"Saved to Hub: {verified_class} | {drill_down} by {user_tag}"
582
+ ui_updates = load_next_item(pending_df, index + 1)
 
 
 
583
  ui_updates[status_box] = save_status
584
+ ui_updates[full_df_state] = full_df
585
  return ui_updates
586
 
587
  except Exception as e:
588
  return {status_box: f"Error saving to Hub: {e}"}
589
 
590
  def save_annotation_to_local(index, verified_class, drill_down, user_tag, full_df, pending_df):
 
 
 
591
  if not drill_down:
592
  return {status_box: "Error: Please select a drill-down interaction."}
593
  if not user_tag:
594
  return {status_box: "Error: User tag is missing. Please re-login."}
595
 
596
  try:
 
597
  current_key = pending_df.loc[index, 'key']
 
 
598
  full_df.loc[full_df['key'] == current_key, 'UserVerifiedClass'] = verified_class
599
  full_df.loc[full_df['key'] == current_key, 'DrillDownInteraction'] = drill_down
600
  full_df.loc[full_df['key'] == current_key, 'AnnotatorUsername'] = user_tag
601
 
 
602
  full_df.drop(columns=['key']).to_csv(LOCAL_DATASET_PATH, index=False)
 
603
  save_status = f"Saved (Local): {verified_class} | {drill_down} by {user_tag}"
604
 
605
+ ui_updates = load_next_item(pending_df, index + 1)
 
 
606
  ui_updates[status_box] = save_status
607
+ ui_updates[full_df_state] = full_df
608
  return ui_updates
609
 
610
  except Exception as e:
611
  return {status_box: f"Error saving locally: {e}"}
612
 
613
+ # GRADIO UI
614
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
615
  gr.Markdown("# Policy Coherence Annotation Tool")
616
  gr.Markdown(
617
  """
618
  Welcome! This tool is for human-in-the-loop annotation.
619
  1. Log in with your authorized email.
620
+ 2. The model's prediction for two policies will be shown, **along with its highlighted reasoning**.
621
+ 3. **Step 1:** Verify if the model's 3-class prediction is correct, or change it.
622
+ 4. **Step 2:** Select a 7-class drill-down label.
623
+ 5. Click 'Save & Next'. If you are unsure, you can click 'Skip & Next'.
 
 
 
 
 
 
 
 
 
 
624
  """
625
  )
626
 
 
627
  full_df_state = gr.State()
628
  pending_df_state = gr.State()
629
  current_index_state = gr.State(value=0)
630
  hf_token_state = gr.State()
631
  user_tag_state = gr.State()
632
 
 
633
  with gr.Group() as login_box:
634
  with gr.Row():
635
  email_box = gr.Textbox(label="Email", placeholder="Enter your authorized email...")
636
  login_btn = gr.Button("Login & Load Dataset", variant="primary")
637
  progress_bar = gr.Markdown(value="Waiting for login...")
638
 
 
639
  with gr.Group(visible=False) as annotation_box:
 
640
  with gr.Row():
641
+ policy_a_display = gr.Textbox(label="Policy / Objective A", interactive=False, lines=4, container=True)
642
+ policy_b_display = gr.Textbox(label="Policy / Objective B", interactive=False, lines=4, container=True)
643
+
644
+ gr.Markdown("### 🔍 Model Reasoning (Explainability)")
645
+ gr.Markdown("Highlights show which words influenced the model's prediction. **Green** means the word pushed *towards* the prediction, **Red** means it pushed *against* it.")
646
+ explanation_display = gr.HighlightedText(
647
+ label="Token Attributions",
648
+ color_map={"Supporting Evidence (+)": "#a7f3d0", "Contradicting Evidence (-)": "#fecaca"},
649
+ combine_adjacent=False
650
+ )
651
+ gr.Markdown("---")
652
 
653
  with gr.Row():
654
  model_confidence_label = gr.Label(label="Model Confidence")
 
658
  info="The model's prediction is selected by default."
659
  )
660
 
 
 
661
  user_drill_down_dropdown = gr.Dropdown(
662
  label="Step 2: Drill-Down Interaction",
663
+ choices=[],
664
  interactive=True
665
  )
666
 
667
+ with gr.Row():
668
+ skip_btn = gr.Button("Skip & Next (Unsure)")
669
+ save_btn = gr.Button("Save & Next", variant="primary")
670
+
671
  status_box = gr.Textbox(label="Status", interactive=False)
672
 
673
+ # UI Event Handlers
674
 
675
  def update_drill_down_choices(verified_class):
 
 
 
676
  choices = DRILL_DOWN_MAP.get(verified_class, [])
677
+ value = choices[0] if len(choices) == 1 else None
678
+ return gr.Dropdown(choices=choices, value=value, interactive=len(choices) > 1)
 
 
 
 
 
679
 
680
  def load_next_item(pending_df, index):
 
 
 
681
  if pending_df is None:
682
  return {status_box: "Data not loaded."}
683
 
 
687
  progress_bar: gr.Markdown(f"**Annotation Complete! ({total_items} items total)**"),
688
  policy_a_display: "All items annotated.",
689
  policy_b_display: "",
690
+ explanation_display: [],
691
  annotation_box: gr.Group(visible=False)
692
  }
693
 
694
  row = pending_df.iloc[index]
 
695
  model_pred = row["model_label"]
696
 
 
697
  if "model_confidence" in row:
 
698
  confidence = row["model_confidence"]
699
  conf_dict = {}
 
 
700
  remaining_prob = (1.0 - confidence) / 2.0
701
+ for l in VERIFY_CHOICES:
702
  if l == model_pred:
703
  conf_dict[l] = confidence
704
  else:
705
  conf_dict[l] = remaining_prob
706
  else:
 
707
  conf_dict = {
708
  "neutral": row.get("Confidence_Neutral", 0.0),
709
  "coherent": row.get("Confidence_Coherent", 0.0),
710
  "incoherent": row.get("Confidence_Incoherent", 0.0)
711
  }
712
+
 
 
713
  drill_down_choices = DRILL_DOWN_MAP.get(model_pred, [])
714
  drill_down_value = drill_down_choices[0] if len(drill_down_choices) == 1 else None
715
  drill_down_interactive = len(drill_down_choices) > 1
716
 
717
+
718
+ formatted_explanations = []
719
+ exp_str = row.get("explanation_data")
720
+
721
+ if pd.notna(exp_str) and isinstance(exp_str, str) and exp_str.strip() != "":
722
+ try:
723
+ # Safely evaluate the string into a Python list
724
+ raw_data = ast.literal_eval(exp_str)
725
+
726
+ # Iterate through the resulting list of lists/tuples
727
+ for item in raw_data:
728
+ # Ensure we are unpacking exactly two values
729
+ if len(item) == 2:
730
+ token, score = item
731
+
732
+ # Filter out tiny gradients to keep UI clean
733
+ if score > 0.05:
734
+ label = "Supporting Evidence (+)"
735
+ elif score < -0.05:
736
+ label = "Contradicting Evidence (-)"
737
+ else:
738
+ label = None
739
+
740
+ formatted_explanations.append((token, label))
741
+ except Exception as e:
742
+ print(f"Failed to parse explanation string using ast: {e}")
743
+ formatted_explanations = [("Error parsing explanation data for this row.", None)]
744
+ else:
745
+ formatted_explanations = [("No explainability data found for this row.", None)]
746
+
747
+
748
  return {
749
  progress_bar: gr.Markdown(f"**Annotating Item {index + 1} of {total_items}**"),
750
  policy_a_display: row["PolicyA"],
751
  policy_b_display: row["PolicyB"],
752
+ explanation_display: formatted_explanations,
753
  model_confidence_label: conf_dict,
754
  user_verified_radio: model_pred,
755
+ user_drill_down_dropdown: gr.Dropdown(choices=drill_down_choices, value=drill_down_value, interactive=drill_down_interactive),
 
 
 
 
 
756
  current_index_state: index,
757
  annotation_box: gr.Group(visible=True)
758
  }
759
 
 
760
  def login_and_load(email):
 
761
  if email not in APPROVED_EMAILS:
762
  return {
763
  progress_bar: gr.Markdown(f"<font color='red'>Error: Email '{email}' is not authorized.</font>"),
764
  login_box: gr.Group(visible=True)
765
  }
766
 
767
+ user_tag = APPROVED_EMAILS[email]
768
 
 
769
  if DEBUG_TESTING:
 
770
  full_df, pending_df, status = load_data_from_local()
771
+ token_to_store = "debug_mode"
772
  else:
773
+ if not HF_TOKEN:
 
774
  return {
775
  progress_bar: gr.Markdown(f"<font color='red'>Error: App is not configured. HF_TOKEN is missing.</font>"),
776
  login_box: gr.Group(visible=True)
 
778
  full_df, pending_df, status = load_data_from_hub(HF_TOKEN)
779
  token_to_store = HF_TOKEN
780
 
 
781
  if full_df is None:
782
  return {
783
  progress_bar: gr.Markdown(f"<font color='red'>{status}</font>"),
784
  login_box: gr.Group(visible=True)
785
  }
786
 
 
787
  first_item_updates = load_next_item(pending_df, 0)
788
 
 
789
  first_item_updates[full_df_state] = full_df
790
  first_item_updates[pending_df_state] = pending_df
791
  first_item_updates[progress_bar] = f"Login successful as **{user_tag}**. {status}"
792
+ first_item_updates[hf_token_state] = token_to_store
793
  first_item_updates[user_tag_state] = user_tag
794
+ first_item_updates[login_box] = gr.Group(visible=False)
795
+ first_item_updates[annotation_box] = gr.Group(visible=True)
796
  return first_item_updates
797
 
798
  login_btn.click(
799
  fn=login_and_load,
800
+ inputs=[email_box],
801
  outputs=[
802
+ progress_bar, policy_a_display, policy_b_display, explanation_display,
803
  model_confidence_label, user_verified_radio, user_drill_down_dropdown,
804
  current_index_state, annotation_box, login_box,
805
  full_df_state, pending_df_state, hf_token_state, user_tag_state, status_box
806
  ]
807
  )
808
 
 
809
  def save_wrapper(index, verified_class, drill_down, user_tag, token, full_df, pending_df):
810
  if DEBUG_TESTING:
811
  return save_annotation_to_local(index, verified_class, drill_down, user_tag, full_df, pending_df)
812
  else:
813
  return save_annotation_to_hub(index, verified_class, drill_down, user_tag, token, full_df, pending_df)
814
 
815
+ def skip_item(index, pending_df):
816
+ ui_updates = load_next_item(pending_df, index + 1)
817
+ ui_updates[status_box] = f"Skipped item {index + 1}."
818
+ return ui_updates
819
+
820
+ skip_btn.click(
821
+ fn=skip_item,
822
+ inputs=[current_index_state, pending_df_state],
823
+ outputs=[
824
+ progress_bar, policy_a_display, policy_b_display, explanation_display,
825
+ model_confidence_label, user_verified_radio, user_drill_down_dropdown,
826
+ current_index_state, annotation_box, status_box
827
+ ]
828
+ )
829
+
830
  user_verified_radio.change(
831
  fn=update_drill_down_choices,
832
  inputs=user_verified_radio,
833
  outputs=user_drill_down_dropdown
834
  )
835
 
 
836
  save_btn.click(
837
+ fn=save_wrapper,
838
  inputs=[
839
+ current_index_state, user_verified_radio, user_drill_down_dropdown,
840
+ user_tag_state, hf_token_state, full_df_state, pending_df_state
 
 
 
 
 
841
  ],
842
  outputs=[
843
+ progress_bar, policy_a_display, policy_b_display, explanation_display,
844
  model_confidence_label, user_verified_radio, user_drill_down_dropdown,
845
  current_index_state, annotation_box, status_box, full_df_state
846
  ]
 
852
  print("--- RUNNING IN DEBUG MODE ---")
853
  print(f"--- Data will be read/written to '{LOCAL_DATASET_PATH}' ---")
854
  print("="*30 + "\n")
 
 
 
855
 
856
  demo.launch(debug=True, share=True)