Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,7 @@ from configs import configs
|
|
| 9 |
from clients import backend, logger
|
| 10 |
from backend.helpers import get_random_session_samples
|
| 11 |
|
| 12 |
-
dataset = load_dataset("iyosha-huji/
|
| 13 |
|
| 14 |
INSTRUCTIONS = """<div align='center'>You are given an audio sample and a question with 2 answer options.\n\nListen to the audio and select the correct answer from the options below.\n\n<b>Note:</b> The question is the same for all samples, but the audio and the corresponding answers change.</div>"""
|
| 15 |
|
|
@@ -47,7 +47,7 @@ def human_eval_tab():
|
|
| 47 |
if p == configs.USER_PASSWORD and usr.strip() != "":
|
| 48 |
new_session_id = str(uuid4())
|
| 49 |
sample_indices, stage = get_random_session_samples(
|
| 50 |
-
backend, dataset, STAGE_SPLITS, usr, num_samples=
|
| 51 |
)
|
| 52 |
logger.info(f"Session ID: {new_session_id}, Stage: {stage}")
|
| 53 |
return (
|
|
@@ -428,10 +428,37 @@ def get_admin_tab():
|
|
| 428 |
else:
|
| 429 |
rand_acc_msg = "Random sampling failed (no data)."
|
| 430 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
# Final message (no indentation!)
|
| 432 |
msg = f"""
|
| 433 |
## ✅ Accuracy Summary
|
| 434 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
### Majority Vote
|
| 436 |
{agg_msg}
|
| 437 |
|
|
|
|
| 9 |
from clients import backend, logger
|
| 10 |
from backend.helpers import get_random_session_samples
|
| 11 |
|
| 12 |
+
dataset = load_dataset("iyosha-huji/stressEval", token=configs.HF_API_TOKEN)["test"]
|
| 13 |
|
| 14 |
INSTRUCTIONS = """<div align='center'>You are given an audio sample and a question with 2 answer options.\n\nListen to the audio and select the correct answer from the options below.\n\n<b>Note:</b> The question is the same for all samples, but the audio and the corresponding answers change.</div>"""
|
| 15 |
|
|
|
|
| 47 |
if p == configs.USER_PASSWORD and usr.strip() != "":
|
| 48 |
new_session_id = str(uuid4())
|
| 49 |
sample_indices, stage = get_random_session_samples(
|
| 50 |
+
backend, dataset, STAGE_SPLITS, usr, num_samples=15
|
| 51 |
)
|
| 52 |
logger.info(f"Session ID: {new_session_id}, Stage: {stage}")
|
| 53 |
return (
|
|
|
|
| 428 |
else:
|
| 429 |
rand_acc_msg = "Random sampling failed (no data)."
|
| 430 |
|
| 431 |
+
|
| 432 |
+
correct = 0
|
| 433 |
+
total = 0
|
| 434 |
+
|
| 435 |
+
for _, row in df.iterrows():
|
| 436 |
+
idx = int(row["index_in_dataset"])
|
| 437 |
+
if idx >= len(dataset):
|
| 438 |
+
continue # skip out-of-range
|
| 439 |
+
sample = dataset[idx]
|
| 440 |
+
gt_answer = sample["possible_answers"][sample["label"]]
|
| 441 |
+
if row["answer"] == gt_answer:
|
| 442 |
+
correct += 1
|
| 443 |
+
total += 1
|
| 444 |
+
|
| 445 |
+
overall_acc = correct / total if total > 0 else None
|
| 446 |
+
if overall_acc is not None:
|
| 447 |
+
overall_acc_msg = (
|
| 448 |
+
f"Overall Accuracy: {overall_acc:.2%} ({correct}/{total})"
|
| 449 |
+
)
|
| 450 |
+
else:
|
| 451 |
+
overall_acc_msg = "No data available."
|
| 452 |
+
|
| 453 |
# Final message (no indentation!)
|
| 454 |
msg = f"""
|
| 455 |
## ✅ Accuracy Summary
|
| 456 |
|
| 457 |
+
### Overall Accuracy
|
| 458 |
+
{overall_acc_msg}
|
| 459 |
+
|
| 460 |
+
---
|
| 461 |
+
|
| 462 |
### Majority Vote
|
| 463 |
{agg_msg}
|
| 464 |
|