iyosha commited on
Commit
efb69be
·
verified ·
1 Parent(s): 30fc179

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -2
app.py CHANGED
@@ -9,7 +9,7 @@ from configs import configs
9
  from clients import backend, logger
10
  from backend.helpers import get_random_session_samples
11
 
12
- dataset = load_dataset("iyosha-huji/stressBench", token=configs.HF_API_TOKEN)["test"]
13
 
14
  INSTRUCTIONS = """<div align='center'>You are given an audio sample and a question with 2 answer options.\n\nListen to the audio and select the correct answer from the options below.\n\n<b>Note:</b> The question is the same for all samples, but the audio and the corresponding answers change.</div>"""
15
 
@@ -47,7 +47,7 @@ def human_eval_tab():
47
  if p == configs.USER_PASSWORD and usr.strip() != "":
48
  new_session_id = str(uuid4())
49
  sample_indices, stage = get_random_session_samples(
50
- backend, dataset, STAGE_SPLITS, usr, num_samples=20
51
  )
52
  logger.info(f"Session ID: {new_session_id}, Stage: {stage}")
53
  return (
@@ -428,10 +428,37 @@ def get_admin_tab():
428
  else:
429
  rand_acc_msg = "Random sampling failed (no data)."
430
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
  # Final message (no indentation!)
432
  msg = f"""
433
  ## ✅ Accuracy Summary
434
 
 
 
 
 
 
435
  ### Majority Vote
436
  {agg_msg}
437
 
 
9
  from clients import backend, logger
10
  from backend.helpers import get_random_session_samples
11
 
12
+ dataset = load_dataset("iyosha-huji/stressEval", token=configs.HF_API_TOKEN)["test"]
13
 
14
  INSTRUCTIONS = """<div align='center'>You are given an audio sample and a question with 2 answer options.\n\nListen to the audio and select the correct answer from the options below.\n\n<b>Note:</b> The question is the same for all samples, but the audio and the corresponding answers change.</div>"""
15
 
 
47
  if p == configs.USER_PASSWORD and usr.strip() != "":
48
  new_session_id = str(uuid4())
49
  sample_indices, stage = get_random_session_samples(
50
+ backend, dataset, STAGE_SPLITS, usr, num_samples=15
51
  )
52
  logger.info(f"Session ID: {new_session_id}, Stage: {stage}")
53
  return (
 
428
  else:
429
  rand_acc_msg = "Random sampling failed (no data)."
430
 
431
+
432
+ correct = 0
433
+ total = 0
434
+
435
+ for _, row in df.iterrows():
436
+ idx = int(row["index_in_dataset"])
437
+ if idx >= len(dataset):
438
+ continue # skip out-of-range
439
+ sample = dataset[idx]
440
+ gt_answer = sample["possible_answers"][sample["label"]]
441
+ if row["answer"] == gt_answer:
442
+ correct += 1
443
+ total += 1
444
+
445
+ overall_acc = correct / total if total > 0 else None
446
+ if overall_acc is not None:
447
+ overall_acc_msg = (
448
+ f"Overall Accuracy: {overall_acc:.2%} ({correct}/{total})"
449
+ )
450
+ else:
451
+ overall_acc_msg = "No data available."
452
+
453
  # Final message (no indentation!)
454
  msg = f"""
455
  ## ✅ Accuracy Summary
456
 
457
+ ### Overall Accuracy
458
+ {overall_acc_msg}
459
+
460
+ ---
461
+
462
  ### Majority Vote
463
  {agg_msg}
464