KaiserShultz commited on
Commit
eadec70
·
verified ·
1 Parent(s): cefdb3c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +338 -0
app.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Ankelodon Agent Adapter for the Hugging Face Agents Course evaluator.
2
+
3
+ This module exposes a simple Gradio-powered wrapper around the
4
+ `ankelodon_multiagent_system` project. It follows the same high-level flow
5
+ as the official GAIA template provided in the course materials: fetch
6
+ evaluation questions from the GAIA API, run your agent to produce
7
+ responses, and submit those responses back to the leaderboard.
8
+
9
+ The key differences between this adapter and the GAIA template are:
10
+
11
+ * It imports and uses your multi‑agent system defined in the `src`
12
+ package (see `src/agent.py`) via the `build_workflow` function. This
13
+ function returns a `langgraph` state machine capable of planning,
14
+ reasoning and executing tools. The adapter calls into this workflow
15
+ with a properly initialised `AgentState` and extracts the final
16
+ answer from the resulting state.
17
+ * It automatically downloads any file attachments associated with a
18
+ task (via the `/files/{task_id}` endpoint exposed by the evaluation
19
+ server) and saves them into a temporary directory. The local file
20
+ paths are passed into the agent through the `files` field of the
21
+ state. Your existing file handling logic (e.g. `preprocess_files`
22
+ in `src/tools/tools.py`) will detect the file type and suggest
23
+ appropriate tools.
24
+ * It strips any leading ``Final answer:`` prefix from the agent's
25
+ response. The evaluation server performs an exact string match
26
+ against the ground truth answer【842261069842380†L108-L112】, so it is
27
+ important that the returned text contains only the answer and
28
+ nothing else.
29
+
30
+ Before running this script yourself, make sure all dependencies in
31
+ `requirements.txt` are installed. To use the Gradio interface locally,
32
+ run `python ankelodon_adapter.py` from the project root. When deploying
33
+ as a Hugging Face Space for leaderboard submission, ensure the
34
+ `SPACE_ID` environment variable is set by the platform; it is used to
35
+ construct a link back to your code for verification.
36
+ """
37
+
38
+ from __future__ import annotations
39
+
40
+ import os
41
+ import tempfile
42
+ from typing import Optional, List, Dict, Any
43
+
44
+ import requests
45
+ import gradio as gr
46
+ import pandas as pd
47
+
48
+ try:
49
+ # Import the multi‑agent system components. When running as a script
50
+ # within the project root, Python's module search path should
51
+ # already include the `src` directory. If you get import errors,
52
+ # ensure that the working directory is the repository root or
53
+ # append `src` to `sys.path` manually before these imports.
54
+ from src.agent import build_workflow
55
+ from src.config import config as WORKFLOW_CONFIG
56
+ from src.state import AgentState
57
+ except Exception as import_err:
58
+ raise RuntimeError(
59
+ "Failed to import the Ankelodon multi-agent system. "
60
+ "Make sure you are running this script from the repository root "
61
+ "and that the project has been installed correctly."
62
+ ) from import_err
63
+
64
+ DEFAULT_API_URL: str = "https://agents-course-unit4-scoring.hf.space"
65
+
66
+
67
+ class AnkelodonAgent:
68
+ """Simple callable wrapper around the Ankelodon multi‑agent system.
69
+
70
+ Instances of this class can be called directly with a natural
71
+ language question and an optional task identifier. Under the hood it
72
+ builds a `langgraph` workflow using ``build_workflow()``, prepares
73
+ an initial state, fetches any file attachments associated with
74
+ the task, and invokes the workflow to compute a final answer.
75
+ """
76
+
77
+ def __init__(self) -> None:
78
+ # Initialise the workflow once per agent. Subsequent calls reuse
79
+ # the compiled state machine, which is more efficient than
80
+ # rebuilding it on every question.
81
+ self.workflow = build_workflow()
82
+
83
+ def _download_attachment(self, task_id: str) -> List[str]:
84
+ """Download a file attachment for the given task ID.
85
+
86
+ The evaluation API exposes a ``/files/{task_id}`` endpoint【842261069842380†L95-L107】.
87
+ This helper downloads the content, infers a file extension
88
+ from the HTTP ``Content-Type`` header and writes the bytes to a
89
+ temporary file. It returns a list of file paths (zero or one
90
+ element) to be included in the agent state.
91
+ """
92
+ files: List[str] = []
93
+ url = f"{DEFAULT_API_URL}/files/{task_id}"
94
+ try:
95
+ resp = requests.get(url, timeout=15, allow_redirects=True)
96
+ if resp.status_code == 200 and resp.content:
97
+ # Map common MIME substrings to file extensions. The
98
+ # multi‑agent system's file handling tools use the
99
+ # extension to determine how to process the file.
100
+ ctype = resp.headers.get("content-type", "").lower()
101
+ ext_map = {
102
+ "excel": ".xlsx",
103
+ "sheet": ".xlsx",
104
+ "csv": ".csv",
105
+ "python": ".py",
106
+ "audio": ".mp3",
107
+ "image": ".jpg",
108
+ }
109
+ extension = ""
110
+ for key, val in ext_map.items():
111
+ if key in ctype:
112
+ extension = val
113
+ break
114
+ tmp_dir = tempfile.mkdtemp(prefix="ankelodon_task_")
115
+ filename = f"attachment{extension}"
116
+ path = os.path.join(tmp_dir, filename)
117
+ with open(path, "wb") as fh:
118
+ fh.write(resp.content)
119
+ files.append(path)
120
+ except Exception as e:
121
+ # Log the error to console but don't fail the entire task.
122
+ print(f"[WARNING] Failed to fetch attachment for task {task_id}: {e}")
123
+ return files
124
+
125
+ def __call__(self, question: str, task_id: Optional[str] = None) -> str:
126
+ """Run the multi‑agent system to answer a question.
127
+
128
+ Parameters
129
+ ----------
130
+ question: str
131
+ The natural language query to answer.
132
+ task_id: Optional[str]
133
+ If provided, the ID used to fetch any associated file
134
+ attachment from the evaluation API. Attachments are stored
135
+ locally and passed into the agent via the ``files`` field.
136
+
137
+ Returns
138
+ -------
139
+ str
140
+ The final answer produced by the agent, with any "final
141
+ answer" prefix removed. If no answer is produced the empty
142
+ string is returned.
143
+ """
144
+ # Build the initial agent state. The AgentState type defines
145
+ # numerous fields, many of which the workflow populates
146
+ # internally. We set only the essentials here. Unrecognised
147
+ # keys are ignored by the underlying state machine.
148
+ state: Dict[str, Any] = {
149
+ "query": question,
150
+ "final_answer": "",
151
+ "plan": None,
152
+ "complexity_assessment": None,
153
+ "current_step": 0,
154
+ "reasoning_done": False,
155
+ "messages": [],
156
+ "files": [],
157
+ "file_contents": {},
158
+ "critique_feedback": None,
159
+ "iteration_count": 0,
160
+ "max_iterations": 3,
161
+ "execution_report": None,
162
+ "previous_tool_results": {},
163
+ }
164
+
165
+ # If a task ID is provided, attempt to download its attachment.
166
+ if task_id:
167
+ attachment_paths = self._download_attachment(task_id)
168
+ if attachment_paths:
169
+ state["files"] = attachment_paths
170
+
171
+ # Invoke the workflow. The `config` parameter defines runtime
172
+ # options such as recursion limits and thread identifiers. It is
173
+ # imported from `src.config`.
174
+ try:
175
+ result_state = self.workflow.invoke(state, config=WORKFLOW_CONFIG)
176
+ except Exception as e:
177
+ print(f"[ERROR] Failed to run workflow: {e}")
178
+ return ""
179
+
180
+ # Extract the final answer. Depending on the branch taken,
181
+ # either the ``final_answer`` key or a generic ``answer`` key may
182
+ # be present. Use whichever exists. Some nodes may prepend
183
+ # "final answer:"; remove it for exact match scoring【842261069842380†L108-L112】.
184
+ answer = ""
185
+ if isinstance(result_state, dict):
186
+ answer = result_state.get("final_answer") or result_state.get("answer") or ""
187
+ if answer:
188
+ answer = answer.replace("Final answer:", "").replace("final answer:", "").strip()
189
+ return answer
190
+
191
+
192
+ def run_and_submit_all(profile: Optional[gr.OAuthProfile]) -> tuple[str, pd.DataFrame | None]:
193
+ """Fetch all questions, run the agent, and submit the answers.
194
+
195
+ This function replicates the behaviour of the GAIA template's
196
+ ``run_and_submit_all`` function【566837548679297†L247-L306】 but uses the
197
+ ``AnkelodonAgent`` class defined above. It is bound to a Gradio
198
+ button in the UI. On success it returns a status message and a
199
+ DataFrame of results; on failure it returns an error message and
200
+ ``None`` or an empty DataFrame.
201
+ """
202
+ # Require the user to be logged in so we can report the username.
203
+ if not profile:
204
+ return "Please Login to Hugging Face with the button.", None
205
+ username = getattr(profile, "username", "").strip()
206
+
207
+ api_url = DEFAULT_API_URL
208
+ questions_url = f"{api_url}/questions"
209
+ submit_url = f"{api_url}/submit"
210
+
211
+ # Instantiate the agent once.
212
+ try:
213
+ agent = AnkelodonAgent()
214
+ print("Ankelodon agent initialised successfully")
215
+ except Exception as e:
216
+ err_msg = f"Error initialising agent: {e}"
217
+ print(err_msg)
218
+ return err_msg, None
219
+
220
+ # Fetch questions from the evaluation API.【566837548679297†L247-L268】
221
+ try:
222
+ print(f"Fetching questions from: {questions_url}")
223
+ resp = requests.get(questions_url, timeout=15)
224
+ resp.raise_for_status()
225
+ questions_data = resp.json()
226
+ if not questions_data:
227
+ return "Fetched questions list is empty or invalid format.", None
228
+ print(f"Fetched {len(questions_data)} questions.")
229
+ except Exception as e:
230
+ err_msg = f"Error fetching questions: {e}"
231
+ print(err_msg)
232
+ return err_msg, None
233
+
234
+ # Run the agent on each question.
235
+ results_log: List[Dict[str, Any]] = []
236
+ answers_payload: List[Dict[str, str]] = []
237
+ print(f"Running agent on {len(questions_data)} questions…")
238
+ for item in questions_data:
239
+ task_id = item.get("task_id")
240
+ question_text = item.get("question")
241
+ if not task_id or question_text is None:
242
+ print(f"Skipping item with missing task_id or question: {item}")
243
+ continue
244
+ try:
245
+ answer = agent(question_text, task_id)
246
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
247
+ results_log.append({
248
+ "Task ID": task_id,
249
+ "Question": question_text,
250
+ "Submitted Answer": answer,
251
+ })
252
+ except Exception as e:
253
+ print(f"Error running agent on task {task_id}: {e}")
254
+ results_log.append({
255
+ "Task ID": task_id,
256
+ "Question": question_text,
257
+ "Submitted Answer": f"AGENT ERROR: {e}",
258
+ })
259
+
260
+ if not answers_payload:
261
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
262
+
263
+ # Prepare submission payload. The leaderboard displays a link to your
264
+ # code; this is constructed from the SPACE_ID environment variable.
265
+ space_id = os.getenv("SPACE_ID", "")
266
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
267
+ submission_data = {
268
+ "username": username,
269
+ "agent_code": agent_code,
270
+ "answers": answers_payload,
271
+ }
272
+
273
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
274
+ try:
275
+ submission_resp = requests.post(submit_url, json=submission_data, timeout=60)
276
+ submission_resp.raise_for_status()
277
+ result_data = submission_resp.json()
278
+ final_status = (
279
+ f"Submission Successful!\n"
280
+ f"User: {result_data.get('username')}\n"
281
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
282
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
283
+ f"Message: {result_data.get('message', 'No message received.')}"
284
+ )
285
+ print("Submission successful.")
286
+ return final_status, pd.DataFrame(results_log)
287
+ except Exception as e:
288
+ err_msg = f"Submission Failed: {e}"
289
+ print(err_msg)
290
+ return err_msg, pd.DataFrame(results_log)
291
+
292
+
293
+ # Build the Gradio interface. This interface resembles the official
294
+ # GAIA template【566837548679297†L372-L401】 but runs your Ankelodon agent.
295
+ with gr.Blocks() as demo:
296
+ gr.Markdown("# Ankelodon Agent Evaluation Runner")
297
+ gr.Markdown(
298
+ """
299
+ **Instructions**
300
+
301
+ 1. Clone this repository or duplicate the associated Hugging Face Space.
302
+ 2. Log in to your Hugging Face account using the button below. Your HF
303
+ username is used to attribute your submission on the leaderboard.
304
+ 3. Click **Run Evaluation & Submit All Answers** to fetch the questions,
305
+ run the Ankelodon agent on each one, submit your answers, and display
306
+ the resulting score and answers.
307
+
308
+ ---
309
+ This template is intentionally lightweight. Feel free to customise it –
310
+ add caching, parallel execution or additional logging as you see fit.
311
+ """
312
+ )
313
+ gr.LoginButton()
314
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
315
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
316
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
317
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
318
+
319
+
320
+ if __name__ == "__main__":
321
+ # When running locally, print some information about the environment.
322
+ print("\n" + "-" * 30 + " Ankelodon Adapter Starting " + "-" * 30)
323
+ space_host_startup = os.getenv("SPACE_HOST")
324
+ space_id_startup = os.getenv("SPACE_ID")
325
+ if space_host_startup:
326
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
327
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
328
+ else:
329
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
330
+ if space_id_startup:
331
+ print(f"✅ SPACE_ID found: {space_id_startup}")
332
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
333
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
334
+ else:
335
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
336
+ print("-" * (60 + len(" Ankelodon Adapter Starting ")) + "\n")
337
+ # Launch the Gradio app.
338
+ demo.launch(debug=True, share=False)