Spaces:

HiTZ
/

Critical_Questions_Leaderboard

Running

Blanca commited on Jun 9, 2025

Commit

1d73000

verified ·

1 Parent(s): fe37352

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -70,7 +70,7 @@ eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, spli
 # Gold answers
 gold_results = {}
 gold_dataset = load_dataset(INTERNAL_DATA_DATASET, "test", token=TOKEN, trust_remote_code=True)
-gold_results = {"test": {row["intervention_id"]: row for row in gold_dataset["test"]}}
 def restart_space():
@@ -153,19 +153,20 @@ def add_new_eval(
     with open(f"scored/{organisation}_{model}.jsonl", "w") as scored_file:
         with open(file_path, 'r') as f:
             for ix, line in enumerate(f):
-                try:
-                    task = json.loads(line)
-                except Exception:
-                    return format_error(f"Line {ix} is incorrectly formatted. Please fix it and resubmit your file.")
-                if "model_answer" not in task:
-                    return format_error(f"Line {ix} missing 'model_answer'.")
-                answer = task["model_answer"]
-                task_id = task["task_id"]
-                if task_id not in gold_results[val_or_test]:
-                    return format_error(f"{task_id} not found in gold set.")
-                score = question_scorer(answer, gold_results[val_or_test][task_id]["Final answer"])
                 scored_file.write(
                     json.dumps({

 # Gold answers
 gold_results = {}
 gold_dataset = load_dataset(INTERNAL_DATA_DATASET, "test", token=TOKEN, trust_remote_code=True)
+gold_results = {"test": {row["cqs"]: row for row in gold_dataset["test"]}}
 def restart_space():
     with open(f"scored/{organisation}_{model}.jsonl", "w") as scored_file:
         with open(file_path, 'r') as f:
             for ix, line in enumerate(f):
+                #try:
+                #    task = json.loads(line)
+                #except Exception:
+                #    return format_error(f"Line {ix} is incorrectly formatted. Please fix it and resubmit your file.")
+                #if "model_answer" not in task:
+                #    return format_error(f"Line {ix} missing 'model_answer'.")
+                #answer = task["model_answer"]
+                #task_id = task["task_id"]
+                #if task_id not in gold_results[val_or_test]:
+                #    return format_error(f"{task_id} not found in gold set.")
+                #score = question_scorer(answer, gold_results[val_or_test][task_id]["Final answer"])
+                score = 1
                 scored_file.write(
                     json.dumps({