File size: 4,523 Bytes
10e9b7d eccf8e4 3c4371f 10e9b7d 27b3bf4 3db6293 e80aab9 27b3bf4 31243f4 27b3bf4 31243f4 27b3bf4 3c4371f 7e4a06b 27b3bf4 7e4a06b 27b3bf4 3c4371f 7e4a06b 31243f4 e80aab9 31243f4 27b3bf4 31243f4 27b3bf4 36ed51a 3c4371f 27b3bf4 eccf8e4 31243f4 7d65c66 27b3bf4 7d65c66 27b3bf4 e80aab9 7d65c66 27b3bf4 31243f4 27b3bf4 31243f4 27b3bf4 31243f4 27b3bf4 31243f4 27b3bf4 31243f4 27b3bf4 e80aab9 27b3bf4 7d65c66 27b3bf4 e80aab9 27b3bf4 31243f4 e80aab9 27b3bf4 e80aab9 27b3bf4 7d65c66 27b3bf4 e80aab9 27b3bf4 e80aab9 27b3bf4 31243f4 27b3bf4 0ee0419 e514fd7 27b3bf4 e514fd7 27b3bf4 e80aab9 7e4a06b e80aab9 31243f4 e80aab9 27b3bf4 e80aab9 31243f4 e80aab9 7d65c66 27b3bf4 7d65c66 27b3bf4 3c4371f 27b3bf4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | import os
import gradio as gr
import requests
import pandas as pd
from smolagents import CodeAgent
from smolagents import DuckDuckGoSearchTool
from smolagents import PythonInterpreterTool
from smolagents import InferenceClientModel
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# ---------------- AGENT ---------------- #
class SmartAgent:
def __init__(self):
print("Initializing SmartAgent")
self.model = InferenceClientModel(
model_id="meta-llama/Meta-Llama-3-8B-Instruct"
)
self.agent = CodeAgent(
tools=[
DuckDuckGoSearchTool(),
PythonInterpreterTool()
],
model=self.model,
max_steps=8
)
def __call__(self, question: str) -> str:
print("Question received:", question)
try:
answer = self.agent.run(question)
if answer is None:
return ""
return str(answer).strip()
except Exception as e:
print("Agent error:", e)
return ""
# ---------------- RUN EVALUATION ---------------- #
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if profile:
username = profile.username
else:
return "Please login first.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
try:
agent = SmartAgent()
except Exception as e:
return f"Agent initialization error: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
# ---------------- GET QUESTIONS ---------------- #
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions = response.json()
except Exception as e:
return f"Error fetching questions: {e}", None
answers_payload = []
results_log = []
# ---------------- RUN AGENT ---------------- #
for item in questions:
task_id = item.get("task_id")
question = item.get("question")
if not task_id or not question:
continue
try:
answer = agent(question)
answers_payload.append(
{
"task_id": task_id,
"submitted_answer": answer
}
)
results_log.append(
{
"Task ID": task_id,
"Question": question,
"Submitted Answer": answer
}
)
except Exception as e:
results_log.append(
{
"Task ID": task_id,
"Question": question,
"Submitted Answer": f"ERROR: {e}"
}
)
# ---------------- SUBMIT ---------------- #
submission_data = {
"username": username.strip(),
"agent_code": agent_code,
"answers": answers_payload
}
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result.get('username')}\n"
f"Score: {result.get('score')}%\n"
f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}"
)
return final_status, pd.DataFrame(results_log)
except Exception as e:
return f"Submission failed: {e}", pd.DataFrame(results_log)
# ---------------- UI ---------------- #
with gr.Blocks() as demo:
gr.Markdown("# Basic Agent Evaluation Runner")
gr.Markdown(
"""
Instructions:
1. Login to Hugging Face
2. Click **Run Evaluation & Submit All Answers**
3. The agent will answer 20 GAIA questions
4. Your score will appear when finished
"""
)
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(
label="Run Status / Submission Result",
lines=5,
interactive=False
)
results_table = gr.DataFrame(
label="Questions and Agent Answers",
wrap=True
)
run_button.click(
fn=run_and_submit_all,
outputs=[status_output, results_table]
)
if __name__ == "__main__":
print("Starting Agent Evaluation App")
demo.launch(debug=True) |