eigbney commited on
Commit
3bd7a68
·
verified ·
1 Parent(s): 26606ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -31
app.py CHANGED
@@ -2,79 +2,83 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- from smolagents import CodeAgent, HfApiModel
6
 
7
  # --- Constants ---
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
  class BasicAgent:
11
  def __init__(self):
12
- # HfApiModel is native and fast within the HF ecosystem
13
- # It uses your HF_TOKEN automatically if set in Secrets
14
- self.model = HfApiModel(model_id="Qwen/Qwen2.5-72B-Instruct")
15
 
16
- # We use CodeAgent WITHOUT external tools like Search.
17
- # add_base_tools=True provides the Python Interpreter for logic/math.
18
  self.agent = CodeAgent(
19
  tools=[],
20
  model=self.model,
21
  add_base_tools=True
22
  )
23
- print("Clean Agent initialized (Python Interpreter enabled).")
24
 
25
  def __call__(self, question: str) -> str:
26
- # Prompt specifically designed for GAIA Exact Match scoring
27
  clean_prompt = (
28
- f"Solve this task: {question}\n\n"
29
- "Final Answer Requirement: Provide ONLY the numeric or text value. "
30
- "Do not include units, symbols, or conversational filler. "
31
- "No 'The answer is...', no 'FINAL ANSWER' text. Just the raw value."
32
  )
33
 
34
  try:
35
- # The agent will write code to solve if the question is complex
36
  result = self.agent.run(clean_prompt)
37
  return str(result).strip()
38
  except Exception as e:
39
- print(f"Agent Error: {e}")
40
  return "Error"
41
 
42
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
43
  space_id = os.getenv("SPACE_ID")
44
 
45
  if not profile:
46
- return "Please Login to Hugging Face with the button.", None
47
 
48
  username = profile.username
49
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
50
 
51
- # 1. Fetch
52
  try:
53
  response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
 
54
  questions_data = response.json()
55
  except Exception as e:
56
- return f"Fetch Error: {e}", None
57
 
58
- # 2. Run
59
  agent = BasicAgent()
60
  answers_payload = []
61
  results_log = []
62
 
63
- print(f"Starting evaluation for {len(questions_data)} questions...")
64
-
65
  for item in questions_data:
66
  task_id = item.get("task_id")
67
  question_text = item.get("question")
68
 
69
  if not task_id: continue
70
 
71
- # Run agent
72
  submitted_answer = agent(question_text)
73
 
74
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
75
- results_log.append({"Task ID": task_id, "Question": question_text[:100], "Answer": submitted_answer})
 
 
 
 
76
 
77
- # 3. Submit
78
  submission_data = {
79
  "username": username,
80
  "agent_code": agent_code,
@@ -83,27 +87,29 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
83
 
84
  try:
85
  response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
 
86
  result_data = response.json()
87
 
88
  final_status = (
89
  f"Submission Successful!\n"
90
- f"Score: {result_data.get('score')}% "
91
- f"({result_data.get('correct_count')}/{result_data.get('total_attempted')} correct)"
 
92
  )
93
  return final_status, pd.DataFrame(results_log)
94
  except Exception as e:
95
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
96
 
97
- # --- Gradio Interface ---
98
  with gr.Blocks() as demo:
99
- gr.Markdown("# GAIA Agent Evaluation Runner (Clean Version)")
100
- gr.Markdown("Uses `smolagents` with a built-in Python Interpreter to solve tasks.")
101
 
102
  gr.LoginButton()
103
- run_button = gr.Button("Run Evaluation & Submit All", variant="primary")
104
 
105
- status_output = gr.Textbox(label="Status", lines=4)
106
- results_table = gr.DataFrame(label="Generated Answers", wrap=True)
107
 
108
  run_button.click(
109
  fn=run_and_submit_all,
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ from smolagents import CodeAgent, InferenceClientModel
6
 
7
  # --- Constants ---
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
  class BasicAgent:
11
  def __init__(self):
12
+ # InferenceClientModel is the latest standard for smolagents v1.7+
13
+ # It automatically uses the HF_TOKEN secret if added to your Space
14
+ self.model = InferenceClientModel(model_id="Qwen/Qwen2.5-72B-Instruct")
15
 
16
+ # add_base_tools=True enables the Python interpreter tool
 
17
  self.agent = CodeAgent(
18
  tools=[],
19
  model=self.model,
20
  add_base_tools=True
21
  )
22
+ print("Agent initialized with InferenceClientModel and Python Interpreter.")
23
 
24
  def __call__(self, question: str) -> str:
25
+ # Strict prompt to ensure "Exact Match" scoring works
26
  clean_prompt = (
27
+ f"Solve the following task: {question}\n\n"
28
+ "Final Answer Instructions: Provide ONLY the final result value. "
29
+ "No extra words, no units, no 'The answer is...', and no 'FINAL ANSWER' text."
 
30
  )
31
 
32
  try:
33
+ # The agent will use its Python tool if it needs to calculate or process data
34
  result = self.agent.run(clean_prompt)
35
  return str(result).strip()
36
  except Exception as e:
37
+ print(f"Agent execution error: {e}")
38
  return "Error"
39
 
40
  def run_and_submit_all(profile: gr.OAuthProfile | None):
41
+ # Determine the Space ID for the code link
42
  space_id = os.getenv("SPACE_ID")
43
 
44
  if not profile:
45
+ return "Please Login to Hugging Face first.", None
46
 
47
  username = profile.username
48
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
49
 
50
+ # 1. Fetch the evaluation questions
51
  try:
52
  response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
53
+ response.raise_for_status()
54
  questions_data = response.json()
55
  except Exception as e:
56
+ return f"Error fetching questions: {e}", None
57
 
58
+ # 2. Instantiate and run the agent
59
  agent = BasicAgent()
60
  answers_payload = []
61
  results_log = []
62
 
63
+ print(f"Processing {len(questions_data)} tasks...")
64
+
65
  for item in questions_data:
66
  task_id = item.get("task_id")
67
  question_text = item.get("question")
68
 
69
  if not task_id: continue
70
 
71
+ # Generate answer
72
  submitted_answer = agent(question_text)
73
 
74
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
75
+ results_log.append({
76
+ "Task ID": task_id,
77
+ "Question": question_text[:100] + "...",
78
+ "Agent Answer": submitted_answer
79
+ })
80
 
81
+ # 3. Submit to the leaderboard
82
  submission_data = {
83
  "username": username,
84
  "agent_code": agent_code,
 
87
 
88
  try:
89
  response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
90
+ response.raise_for_status()
91
  result_data = response.json()
92
 
93
  final_status = (
94
  f"Submission Successful!\n"
95
+ f"User: {result_data.get('username')}\n"
96
+ f"Score: {result_data.get('score', 0)}% "
97
+ f"({result_data.get('correct_count', 0)}/{result_data.get('total_attempted', 0)} correct)"
98
  )
99
  return final_status, pd.DataFrame(results_log)
100
  except Exception as e:
101
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
102
 
103
+ # --- Gradio UI ---
104
  with gr.Blocks() as demo:
105
+ gr.Markdown("# GAIA Solver (v2026 Optimized)")
106
+ gr.Markdown("Click Login, then Run to evaluate your agent on the GAIA dataset.")
107
 
108
  gr.LoginButton()
109
+ run_button = gr.Button("Run Evaluation & Submit", variant="primary")
110
 
111
+ status_output = gr.Textbox(label="Status/Score", lines=4)
112
+ results_table = gr.DataFrame(label="Task Log", wrap=True)
113
 
114
  run_button.click(
115
  fn=run_and_submit_all,