Blanca commited on
Commit
1d73000
·
verified ·
1 Parent(s): fe37352

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -70,7 +70,7 @@ eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, spli
70
  # Gold answers
71
  gold_results = {}
72
  gold_dataset = load_dataset(INTERNAL_DATA_DATASET, "test", token=TOKEN, trust_remote_code=True)
73
- gold_results = {"test": {row["intervention_id"]: row for row in gold_dataset["test"]}}
74
 
75
 
76
  def restart_space():
@@ -153,19 +153,20 @@ def add_new_eval(
153
  with open(f"scored/{organisation}_{model}.jsonl", "w") as scored_file:
154
  with open(file_path, 'r') as f:
155
  for ix, line in enumerate(f):
156
- try:
157
- task = json.loads(line)
158
- except Exception:
159
- return format_error(f"Line {ix} is incorrectly formatted. Please fix it and resubmit your file.")
160
- if "model_answer" not in task:
161
- return format_error(f"Line {ix} missing 'model_answer'.")
162
- answer = task["model_answer"]
163
- task_id = task["task_id"]
164
-
165
- if task_id not in gold_results[val_or_test]:
166
- return format_error(f"{task_id} not found in gold set.")
167
-
168
- score = question_scorer(answer, gold_results[val_or_test][task_id]["Final answer"])
 
169
 
170
  scored_file.write(
171
  json.dumps({
 
70
  # Gold answers
71
  gold_results = {}
72
  gold_dataset = load_dataset(INTERNAL_DATA_DATASET, "test", token=TOKEN, trust_remote_code=True)
73
+ gold_results = {"test": {row["cqs"]: row for row in gold_dataset["test"]}}
74
 
75
 
76
  def restart_space():
 
153
  with open(f"scored/{organisation}_{model}.jsonl", "w") as scored_file:
154
  with open(file_path, 'r') as f:
155
  for ix, line in enumerate(f):
156
+ #try:
157
+ # task = json.loads(line)
158
+ #except Exception:
159
+ # return format_error(f"Line {ix} is incorrectly formatted. Please fix it and resubmit your file.")
160
+ #if "model_answer" not in task:
161
+ # return format_error(f"Line {ix} missing 'model_answer'.")
162
+ #answer = task["model_answer"]
163
+ #task_id = task["task_id"]
164
+
165
+ #if task_id not in gold_results[val_or_test]:
166
+ # return format_error(f"{task_id} not found in gold set.")
167
+
168
+ #score = question_scorer(answer, gold_results[val_or_test][task_id]["Final answer"])
169
+ score = 1
170
 
171
  scored_file.write(
172
  json.dumps({