Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
|
|
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
from huggingface_hub import login
|
| 6 |
import re
|
|
@@ -9,6 +11,7 @@ from groq import Groq
|
|
| 9 |
# --- Constants ---
|
| 10 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 11 |
|
|
|
|
| 12 |
# --- Basic Agent Definition ---
|
| 13 |
class BasicAgent:
|
| 14 |
def __init__(self):
|
|
@@ -100,7 +103,71 @@ class BasicAgent:
|
|
| 100 |
return self.solve_riddle(question)
|
| 101 |
return self.query_groq(question)
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
|
|
|
|
|
|
| 104 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 105 |
space_id = os.getenv("SPACE_ID")
|
| 106 |
if profile:
|
|
@@ -143,10 +210,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 143 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 144 |
except Exception as e:
|
| 145 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
| 146 |
-
|
| 147 |
if not answers_payload:
|
| 148 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 149 |
-
|
| 150 |
submission_data = {
|
| 151 |
"username": username.strip(),
|
| 152 |
"agent_code": agent_code,
|
|
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
| 4 |
+
import string
|
| 5 |
+
import warnings
|
| 6 |
import pandas as pd
|
| 7 |
from huggingface_hub import login
|
| 8 |
import re
|
|
|
|
| 11 |
# --- Constants ---
|
| 12 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 13 |
|
| 14 |
+
|
| 15 |
# --- Basic Agent Definition ---
|
| 16 |
class BasicAgent:
|
| 17 |
def __init__(self):
|
|
|
|
| 103 |
return self.solve_riddle(question)
|
| 104 |
return self.query_groq(question)
|
| 105 |
|
| 106 |
+
def question_scorer(model_answer: str, ground_truth: str) -> bool:
|
| 107 |
+
def normalize_str(input_str, remove_punct=True) -> str:
|
| 108 |
+
no_spaces = re.sub(r"\s", "", input_str)
|
| 109 |
+
if remove_punct:
|
| 110 |
+
translator = str.maketrans("", "", string.punctuation)
|
| 111 |
+
return no_spaces.lower().translate(translator)
|
| 112 |
+
else:
|
| 113 |
+
return no_spaces.lower()
|
| 114 |
+
|
| 115 |
+
def normalize_number_str(number_str: str) -> float | None:
|
| 116 |
+
for char in ["$", "%", ","]:
|
| 117 |
+
number_str = number_str.replace(char, "")
|
| 118 |
+
try:
|
| 119 |
+
return float(number_str)
|
| 120 |
+
except ValueError:
|
| 121 |
+
print(f"String '{number_str}' cannot be normalized to number.")
|
| 122 |
+
return None
|
| 123 |
+
|
| 124 |
+
def split_string(s: str, char_list: list[str] = [",", ";"]) -> list[str]:
|
| 125 |
+
pattern = f"[{''.join(map(re.escape, char_list))}]"
|
| 126 |
+
return [elem.strip() for elem in re.split(pattern, s)]
|
| 127 |
+
|
| 128 |
+
def is_float(val) -> bool:
|
| 129 |
+
try:
|
| 130 |
+
float(val)
|
| 131 |
+
return True
|
| 132 |
+
except ValueError:
|
| 133 |
+
return False
|
| 134 |
+
|
| 135 |
+
if model_answer is None:
|
| 136 |
+
model_answer = "None"
|
| 137 |
+
|
| 138 |
+
# Case 1: Ground truth is numeric
|
| 139 |
+
if is_float(ground_truth):
|
| 140 |
+
print(f"Evaluating '{model_answer}' as a number.")
|
| 141 |
+
normalized = normalize_number_str(model_answer)
|
| 142 |
+
return normalized == float(ground_truth) if normalized is not None else False
|
| 143 |
+
|
| 144 |
+
# Case 2: Ground truth is a list
|
| 145 |
+
elif any(char in ground_truth for char in [",", ";"]):
|
| 146 |
+
print(f"Evaluating '{model_answer}' as a comma/semicolon-separated list.")
|
| 147 |
+
gt_elems = split_string(ground_truth)
|
| 148 |
+
ma_elems = split_string(model_answer)
|
| 149 |
+
|
| 150 |
+
if len(gt_elems) != len(ma_elems):
|
| 151 |
+
warnings.warn("Answer lists have different lengths, returning False.", UserWarning)
|
| 152 |
+
return False
|
| 153 |
+
|
| 154 |
+
for ma_elem, gt_elem in zip(ma_elems, gt_elems):
|
| 155 |
+
if is_float(gt_elem):
|
| 156 |
+
normalized = normalize_number_str(ma_elem)
|
| 157 |
+
if normalized != float(gt_elem):
|
| 158 |
+
return False
|
| 159 |
+
else:
|
| 160 |
+
if normalize_str(ma_elem, remove_punct=False) != normalize_str(gt_elem, remove_punct=False):
|
| 161 |
+
return False
|
| 162 |
+
return True
|
| 163 |
+
|
| 164 |
+
# Case 3: Ground truth is a plain string
|
| 165 |
+
else:
|
| 166 |
+
print(f"Evaluating '{model_answer}' as a string.")
|
| 167 |
+
return normalize_str(model_answer) == normalize_str(ground_truth)
|
| 168 |
|
| 169 |
+
print(question_scorer("FINAL ANSWER: right", ))
|
| 170 |
+
|
| 171 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 172 |
space_id = os.getenv("SPACE_ID")
|
| 173 |
if profile:
|
|
|
|
| 210 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 211 |
except Exception as e:
|
| 212 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
| 213 |
+
|
| 214 |
if not answers_payload:
|
| 215 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 216 |
+
print(question_scorer("FINAL ANSWER: right",submitted_answer))
|
| 217 |
submission_data = {
|
| 218 |
"username": username.strip(),
|
| 219 |
"agent_code": agent_code,
|