eigbney commited on
Commit
245cfdb
·
verified ·
1 Parent(s): 1ae22f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -79
app.py CHANGED
@@ -9,125 +9,110 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
  class BasicAgent:
11
  def __init__(self):
12
- # 1. Initialize the Model (the 'brain')
13
- # This wrapper is the most stable version for HF Inference API in 2026.
14
- # It will automatically use your HF_TOKEN secret if added to the Space.
15
  self.model = InferenceClientModel(model_id="Qwen/Qwen2.5-72B-Instruct")
16
 
17
- # 2. Initialize the CodeAgent (the 'body')
18
- # - tools=[]: We start with no external tools.
19
- # - add_base_tools=False: This prevents the 'ddgs' / DuckDuckGo error.
20
- # Note: CodeAgent still has a built-in Python interpreter to solve math/logic!
21
  self.agent = CodeAgent(
22
  tools=[],
23
  model=self.model,
24
  add_base_tools=False
25
  )
26
- print("Agent successfully initialized with Python Interpreter (No ddgs needed).")
27
 
28
  def __call__(self, question: str) -> str:
29
- # 3. Prompting for Exact Match scoring
30
- # We tell the agent to be as direct as possible.
31
- clean_prompt = (
32
- f"Question: {question}\n\n"
33
- "Final Answer Requirement: Provide ONLY the numeric or text value of the answer. "
34
- "Do not include any explanation, units, or 'The answer is' text. "
35
- "Do not include 'FINAL ANSWER' in your output."
 
36
  )
37
 
38
  try:
39
- # The agent will write and run Python code if the question requires it.
40
- result = self.agent.run(clean_prompt)
41
- return str(result).strip()
 
 
 
 
 
 
 
 
 
42
  except Exception as e:
43
- print(f"Error during agent execution: {e}")
44
- return "Error solving question"
45
 
46
  def run_and_submit_all(profile: gr.OAuthProfile | None):
47
- space_id = os.getenv("SPACE_ID")
48
-
49
  if not profile:
50
  return "Please Login to Hugging Face with the button.", None
51
 
52
- username = f"{profile.username}"
53
- api_url = DEFAULT_API_URL
54
- questions_url = f"{api_url}/questions"
55
- submit_url = f"{api_url}/submit"
56
-
57
- # URL to your code for verification
58
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "https://huggingface.co/spaces"
59
 
60
  # 1. Fetch Questions
61
  try:
62
- response = requests.get(questions_url, timeout=15)
63
- response.raise_for_status()
64
  questions_data = response.json()
65
  except Exception as e:
66
- return f"Error fetching questions: {e}", None
67
 
68
- # 2. Run Agent
69
- # Instantiate inside the function to ensure a fresh session
70
- try:
71
- agent = BasicAgent()
72
- except Exception as e:
73
- return f"Error initializing agent: {e}", None
74
-
75
- results_log = []
76
  answers_payload = []
77
-
 
78
  print(f"Starting evaluation for {len(questions_data)} questions...")
79
 
80
  for item in questions_data:
81
  task_id = item.get("task_id")
82
  question_text = item.get("question")
83
 
84
- if not task_id or question_text is None:
85
- continue
86
-
87
  try:
88
- submitted_answer = agent(question_text)
89
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
90
- results_log.append({"Task ID": task_id, "Question": question_text[:80], "Submitted Answer": submitted_answer})
91
- except Exception as e:
92
- results_log.append({"Task ID": task_id, "Question": question_text[:80], "Submitted Answer": f"ERROR: {e}"})
93
-
94
- # 3. Submit Results
95
- submission_data = {
96
- "username": username.strip(),
97
- "agent_code": agent_code,
98
- "answers": answers_payload
99
- }
 
 
100
 
 
101
  try:
102
- response = requests.post(submit_url, json=submission_data, timeout=60)
103
- response.raise_for_status()
104
- result_data = response.json()
105
-
106
- final_status = (
107
- f"Submission Successful!\n"
108
- f"User: {result_data.get('username')}\n"
109
- f"Overall Score: {result_data.get('score', 0)}% "
110
- f"({result_data.get('correct_count', 0)}/{result_data.get('total_attempted', 0)} correct)"
111
- )
112
- return final_status, pd.DataFrame(results_log)
113
  except Exception as e:
114
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
115
 
116
  # --- Gradio UI ---
117
  with gr.Blocks() as demo:
118
- gr.Markdown("# GAIA Final Evaluation Solver")
119
- gr.Markdown("Click 'Login' then 'Run' to solve all questions and submit your score.")
120
-
121
  gr.LoginButton()
122
- run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
123
-
124
- status_output = gr.Textbox(label="Submission Status", lines=5)
125
- results_table = gr.DataFrame(label="Agent Answers Log", wrap=True)
126
-
127
- run_button.click(
128
- fn=run_and_submit_all,
129
- outputs=[status_output, results_table]
130
- )
131
 
132
  if __name__ == "__main__":
133
  demo.launch()
 
9
 
10
  class BasicAgent:
11
  def __init__(self):
12
+ # We use Qwen2.5-72B for its strong reasoning and code generation
 
 
13
  self.model = InferenceClientModel(model_id="Qwen/Qwen2.5-72B-Instruct")
14
 
15
+ # CodeAgent natively handles Python code execution
 
 
 
16
  self.agent = CodeAgent(
17
  tools=[],
18
  model=self.model,
19
  add_base_tools=False
20
  )
 
21
 
22
  def __call__(self, question: str) -> str:
23
+ # This prompt is the 'gold standard' for GAIA exact-match scoring
24
+ strict_prompt = (
25
+ f"You are a general AI assistant. Solve this task: {question}\n\n"
26
+ "Report your thoughts, and finish your answer with the following template: "
27
+ "FINAL ANSWER: [YOUR FINAL ANSWER]\n\n"
28
+ "YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list. "
29
+ "If the answer is a number, do not use units ($, %, kg). "
30
+ "If the answer is a string, don't use articles (a, an, the) or abbreviations."
31
  )
32
 
33
  try:
34
+ # Run the agent
35
+ raw_result = self.agent.run(strict_prompt)
36
+
37
+ # --- Strict Post-Processing ---
38
+ # Extract only the content after 'FINAL ANSWER:' if it exists
39
+ result_str = str(raw_result)
40
+ if "FINAL ANSWER:" in result_str:
41
+ result_str = result_str.split("FINAL ANSWER:")[-1]
42
+
43
+ # Remove markdown, quotes, and trailing punctuation
44
+ clean_ans = result_str.replace("`", "").replace('"', "").replace("'", "").strip()
45
+ return clean_ans.rstrip('.')
46
  except Exception as e:
47
+ print(f"Agent Error: {e}")
48
+ return "Error"
49
 
50
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
51
  if not profile:
52
  return "Please Login to Hugging Face with the button.", None
53
 
54
+ username = profile.username
55
+ space_id = os.getenv("SPACE_ID")
56
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
 
 
 
57
 
58
  # 1. Fetch Questions
59
  try:
60
+ response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
 
61
  questions_data = response.json()
62
  except Exception as e:
63
+ return f"Fetch Error: {e}", None
64
 
65
+ # 2. Setup Agent
66
+ agent = BasicAgent()
 
 
 
 
 
 
67
  answers_payload = []
68
+ results_log = []
69
+
70
  print(f"Starting evaluation for {len(questions_data)} questions...")
71
 
72
  for item in questions_data:
73
  task_id = item.get("task_id")
74
  question_text = item.get("question")
75
 
76
+ # --- File Download Logic ---
77
+ # Some GAIA questions refer to a file. We download it so the Python tool can read it.
78
+ file_url = f"{DEFAULT_API_URL}/files/{task_id}"
79
  try:
80
+ file_res = requests.get(file_url, timeout=5)
81
+ if file_res.status_code == 200:
82
+ # Save the file with its task_id as the name
83
+ with open(task_id, "wb") as f:
84
+ f.write(file_res.content)
85
+ question_text += f"\n\n[System Note: A file for this task has been downloaded to your directory as '{task_id}'. Use Python to read/analyze it.]"
86
+ except:
87
+ pass # No file for this task
88
+
89
+ # 3. Generate Answer
90
+ ans = agent(question_text)
91
+
92
+ answers_payload.append({"task_id": task_id, "submitted_answer": ans})
93
+ results_log.append({"Task ID": task_id, "Answer": ans})
94
 
95
+ # 4. Submit
96
  try:
97
+ sub_res = requests.post(f"{DEFAULT_API_URL}/submit", json={
98
+ "username": username,
99
+ "agent_code": agent_code,
100
+ "answers": answers_payload
101
+ })
102
+ data = sub_res.json()
103
+ status = f"Success! Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})"
104
+ return status, pd.DataFrame(results_log)
 
 
 
105
  except Exception as e:
106
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
107
 
108
  # --- Gradio UI ---
109
  with gr.Blocks() as demo:
110
+ gr.Markdown("# GAIA Pro Solver")
 
 
111
  gr.LoginButton()
112
+ run_btn = gr.Button("Run & Submit", variant="primary")
113
+ status = gr.Textbox(label="Result")
114
+ table = gr.DataFrame(label="Log")
115
+ run_btn.click(fn=run_and_submit_all, outputs=[status, table])
 
 
 
 
 
116
 
117
  if __name__ == "__main__":
118
  demo.launch()