nehaMfiles commited on
Commit
46292d4
·
verified ·
1 Parent(s): 81917a3

Create neha

Browse files
Files changed (1) hide show
  1. neha +340 -0
neha ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Final Assignment — GAIA agent for the HF Agents Course (Unit 4).
3
+
4
+ Built on smolagents. It:
5
+ - fetches the filtered GAIA questions,
6
+ - downloads any attached file and extracts its text,
7
+ - runs a CodeAgent with web search / webpage / Wikipedia tools,
8
+ - prompts with the OFFICIAL GAIA system prompt and extracts the text
9
+ after "FINAL ANSWER:",
10
+ - submits the bare answer to the course /submit API,
11
+ - also writes an official GAIA json-lines file (model_answer +
12
+ reasoning_trace) you can download and upload to the real leaderboard.
13
+
14
+ Set these as *Space secrets* (Settings -> Variables and secrets):
15
+ - HF_TOKEN (always needed; raises your inference rate limit)
16
+ - MODEL_PROVIDER "hf" (default) or "litellm"
17
+ - MODEL_ID e.g. "Qwen/Qwen2.5-Coder-32B-Instruct" (hf)
18
+ or "gpt-4o" / "anthropic/claude-sonnet-4-5" (litellm)
19
+ - LITELLM_API_KEY only if MODEL_PROVIDER=litellm
20
+ A GPT-4-level model follows the format prompt easily and scores much higher.
21
+ """
22
+
23
+ import os
24
+ import io
25
+ import re
26
+ import json
27
+ import tempfile
28
+
29
+ import requests
30
+ import pandas as pd
31
+ import gradio as gr
32
+
33
+ from smolagents import (
34
+ CodeAgent,
35
+ InferenceClientModel,
36
+ LiteLLMModel,
37
+ DuckDuckGoSearchTool,
38
+ WikipediaSearchTool,
39
+ VisitWebpageTool,
40
+ )
41
+
42
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
43
+ JSONL_PATH = "gaia_submission.jsonl"
44
+
45
+ # Official GAIA system prompt (from the paper / leaderboard).
46
+ GAIA_SYSTEM_PROMPT = (
47
+ "You are a general AI assistant. I will ask you a question. Report your "
48
+ "thoughts, and finish your answer with the following template: FINAL ANSWER: "
49
+ "[YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as "
50
+ "possible OR a comma separated list of numbers and/or strings. If you are asked "
51
+ "for a number, don't use comma to write your number neither use units such as $ "
52
+ "or percent sign unless specified otherwise. If you are asked for a string, "
53
+ "don't use articles, neither abbreviations (e.g. for cities), and write the "
54
+ "digits in plain text unless specified otherwise. If you are asked for a comma "
55
+ "separated list, apply the above rules depending of whether the element to be "
56
+ "put in the list is a number or a string."
57
+ )
58
+
59
+
60
+ # --------------------------------------------------------------------------- #
61
+ # Model selection
62
+ # --------------------------------------------------------------------------- #
63
+ def build_model():
64
+ provider = os.getenv("MODEL_PROVIDER", "hf").lower()
65
+ model_id = os.getenv("MODEL_ID", "Qwen/Qwen2.5-Coder-32B-Instruct")
66
+
67
+ if provider == "litellm":
68
+ return LiteLLMModel(
69
+ model_id=model_id,
70
+ api_key=os.getenv("LITELLM_API_KEY") or os.getenv("OPENAI_API_KEY"),
71
+ temperature=0.0,
72
+ )
73
+
74
+ kwargs = {"model_id": model_id, "temperature": 0.0}
75
+ hf_provider = os.getenv("HF_INFERENCE_PROVIDER") # e.g. "together", "sambanova"
76
+ if hf_provider:
77
+ kwargs["provider"] = hf_provider
78
+ token = os.getenv("HF_TOKEN")
79
+ if token:
80
+ kwargs["token"] = token
81
+ return InferenceClientModel(**kwargs)
82
+
83
+
84
+ # --------------------------------------------------------------------------- #
85
+ # File handling: download a task's attachment and extract usable text
86
+ # --------------------------------------------------------------------------- #
87
+ def fetch_file_text(api_url: str, task_id: str, file_name: str) -> str:
88
+ url = f"{api_url}/files/{task_id}"
89
+ try:
90
+ r = requests.get(url, timeout=60)
91
+ r.raise_for_status()
92
+ except Exception as e:
93
+ return f"[Could not download attached file '{file_name}': {e}]"
94
+
95
+ data = r.content
96
+ ext = file_name.lower().rsplit(".", 1)[-1] if "." in file_name else ""
97
+
98
+ try:
99
+ if ext in ("txt", "py", "md", "json", "xml", "csv", "tsv"):
100
+ text = data.decode("utf-8", errors="replace")
101
+ if ext == "csv":
102
+ df = pd.read_csv(io.StringIO(text))
103
+ return f"CSV file '{file_name}' content:\n{df.to_string()}"
104
+ if ext == "tsv":
105
+ df = pd.read_csv(io.StringIO(text), sep="\t")
106
+ return f"TSV file '{file_name}' content:\n{df.to_string()}"
107
+ return f"File '{file_name}' content:\n{text}"
108
+
109
+ if ext in ("xlsx", "xls"):
110
+ sheets = pd.read_excel(io.BytesIO(data), sheet_name=None)
111
+ parts = [f"Excel file '{file_name}':"]
112
+ for name, df in sheets.items():
113
+ parts.append(f"--- sheet: {name} ---\n{df.to_string()}")
114
+ return "\n".join(parts)
115
+
116
+ if ext == "pdf":
117
+ import pdfplumber
118
+ with pdfplumber.open(io.BytesIO(data)) as pdf:
119
+ pages = [p.extract_text() or "" for p in pdf.pages]
120
+ return f"PDF file '{file_name}' text:\n" + "\n".join(pages)
121
+
122
+ if ext == "docx":
123
+ import docx
124
+ tmp = os.path.join(tempfile.gettempdir(), file_name)
125
+ with open(tmp, "wb") as f:
126
+ f.write(data)
127
+ doc = docx.Document(tmp)
128
+ return f"Word file '{file_name}':\n" + "\n".join(
129
+ p.text for p in doc.paragraphs
130
+ )
131
+
132
+ tmp = os.path.join(tempfile.gettempdir(), file_name)
133
+ with open(tmp, "wb") as f:
134
+ f.write(data)
135
+ return (
136
+ f"[A file named '{file_name}' is attached and saved locally at '{tmp}'. "
137
+ f"Use your tools / Python to inspect it if the question needs it.]"
138
+ )
139
+ except Exception as e:
140
+ return f"[Attached file '{file_name}' could not be parsed: {e}]"
141
+
142
+
143
+ # --------------------------------------------------------------------------- #
144
+ # Answer extraction / normalization
145
+ # --------------------------------------------------------------------------- #
146
+ def extract_answer(raw: str) -> str:
147
+ """Take the text after the last 'FINAL ANSWER:' if present, then normalize."""
148
+ text = str(raw).strip()
149
+ matches = list(re.finditer(r"final answer\s*:", text, flags=re.IGNORECASE))
150
+ if matches:
151
+ text = text[matches[-1].end():].strip()
152
+ # collapse to first line (the answer should be a single line)
153
+ text = text.splitlines()[0].strip() if text else text
154
+ # strip wrapping quotes / brackets
155
+ if len(text) >= 2 and text[0] == text[-1] and text[0] in ("'", '"'):
156
+ text = text[1:-1].strip()
157
+ # drop a trailing period unless it is part of a number
158
+ if text.endswith(".") and not re.fullmatch(r"[\d.]+", text):
159
+ text = text[:-1].strip()
160
+ return text
161
+
162
+
163
+ # --------------------------------------------------------------------------- #
164
+ # The agent
165
+ # --------------------------------------------------------------------------- #
166
+ class GaiaAgent:
167
+ def __init__(self, api_url: str = DEFAULT_API_URL):
168
+ self.api_url = api_url
169
+ model = build_model()
170
+ tools = [
171
+ DuckDuckGoSearchTool(),
172
+ VisitWebpageTool(),
173
+ WikipediaSearchTool(user_agent="GAIA-course-agent (student@example.com)"),
174
+ ]
175
+ self.agent = CodeAgent(
176
+ tools=tools,
177
+ model=model,
178
+ add_base_tools=True, # python interpreter + transcriber
179
+ additional_authorized_imports=[
180
+ "pandas", "numpy", "math", "statistics",
181
+ "json", "re", "datetime", "itertools",
182
+ ],
183
+ max_steps=10,
184
+ verbosity_level=1,
185
+ )
186
+ print("GaiaAgent ready.")
187
+
188
+ def _reasoning_trace(self) -> str:
189
+ """Reconstruct a compact trace from the agent's memory of the last run."""
190
+ try:
191
+ lines = []
192
+ for step in getattr(self.agent.memory, "steps", []):
193
+ out = getattr(step, "model_output", None)
194
+ if out:
195
+ lines.append(str(out).strip())
196
+ obs = getattr(step, "observations", None)
197
+ if obs:
198
+ lines.append("Observation: " + str(obs).strip()[:400])
199
+ return "\n".join(lines)[:6000]
200
+ except Exception:
201
+ return ""
202
+
203
+ def __call__(self, question: str, task_id: str = "", file_name: str = ""):
204
+ """Returns (answer, reasoning_trace)."""
205
+ prompt = (
206
+ GAIA_SYSTEM_PROMPT
207
+ + "\n\nWhen you call final_answer, pass ONLY the value that should "
208
+ "follow 'FINAL ANSWER:', formatted by the rules above.\n\nQUESTION:\n"
209
+ + question
210
+ )
211
+ if file_name:
212
+ prompt += "\n\n" + fetch_file_text(self.api_url, task_id, file_name)
213
+ try:
214
+ result = self.agent.run(prompt)
215
+ return extract_answer(result), self._reasoning_trace()
216
+ except Exception as e:
217
+ print(f"Agent error on task {task_id}: {e}")
218
+ return "unknown", f"error: {e}"
219
+
220
+
221
+ # --------------------------------------------------------------------------- #
222
+ # Fetch -> run -> submit
223
+ # --------------------------------------------------------------------------- #
224
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
225
+ space_id = os.getenv("SPACE_ID")
226
+
227
+ if profile:
228
+ username = profile.username
229
+ print(f"User logged in: {username}")
230
+ else:
231
+ return "Please Login to Hugging Face with the button.", None, None
232
+
233
+ api_url = DEFAULT_API_URL
234
+ questions_url = f"{api_url}/questions"
235
+ submit_url = f"{api_url}/submit"
236
+
237
+ try:
238
+ agent = GaiaAgent(api_url)
239
+ except Exception as e:
240
+ return f"Error initializing agent: {e}", None, None
241
+
242
+ agent_code = (
243
+ f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local"
244
+ )
245
+
246
+ try:
247
+ resp = requests.get(questions_url, timeout=30)
248
+ resp.raise_for_status()
249
+ questions = resp.json()
250
+ if not questions:
251
+ return "Fetched questions list is empty.", None, None
252
+ except Exception as e:
253
+ return f"Error fetching questions: {e}", None, None
254
+
255
+ results_log = []
256
+ answers_payload = [] # for the course /submit API
257
+ jsonl_records = [] # for the official GAIA leaderboard file
258
+
259
+ for item in questions:
260
+ task_id = item.get("task_id")
261
+ question = item.get("question")
262
+ file_name = item.get("file_name", "") or ""
263
+ if not task_id or question is None:
264
+ continue
265
+ print(f"Running task {task_id} ...")
266
+ answer, trace = agent(question, task_id, file_name)
267
+
268
+ answers_payload.append(
269
+ {"task_id": task_id, "submitted_answer": answer}
270
+ )
271
+ jsonl_records.append(
272
+ {"task_id": task_id, "model_answer": answer, "reasoning_trace": trace}
273
+ )
274
+ results_log.append(
275
+ {"Task ID": task_id, "Question": question, "Submitted Answer": answer}
276
+ )
277
+
278
+ # Write the official GAIA json-lines file for download.
279
+ try:
280
+ with open(JSONL_PATH, "w", encoding="utf-8") as f:
281
+ for rec in jsonl_records:
282
+ f.write(json.dumps(rec, ensure_ascii=False) + "\n")
283
+ jsonl_file = JSONL_PATH
284
+ except Exception as e:
285
+ print(f"Could not write jsonl: {e}")
286
+ jsonl_file = None
287
+
288
+ if not answers_payload:
289
+ return "Agent produced no answers.", pd.DataFrame(results_log), jsonl_file
290
+
291
+ submission = {
292
+ "username": username.strip(),
293
+ "agent_code": agent_code,
294
+ "answers": answers_payload,
295
+ }
296
+
297
+ try:
298
+ resp = requests.post(submit_url, json=submission, timeout=120)
299
+ resp.raise_for_status()
300
+ data = resp.json()
301
+ status = (
302
+ f"Submission Successful!\n"
303
+ f"User: {data.get('username')}\n"
304
+ f"Score: {data.get('score', 'N/A')}% "
305
+ f"({data.get('correct_count', '?')}/"
306
+ f"{data.get('total_attempted', '?')} correct)\n"
307
+ f"Message: {data.get('message', '')}"
308
+ )
309
+ return status, pd.DataFrame(results_log), jsonl_file
310
+ except Exception as e:
311
+ return f"Submission Failed: {e}", pd.DataFrame(results_log), jsonl_file
312
+
313
+
314
+ # --------------------------------------------------------------------------- #
315
+ # Gradio UI
316
+ # --------------------------------------------------------------------------- #
317
+ with gr.Blocks() as demo:
318
+ gr.Markdown("# GAIA Agent — Final Assignment")
319
+ gr.Markdown(
320
+ "1. Log in with Hugging Face below.\n"
321
+ "2. Click **Run Evaluation & Submit All Answers**.\n\n"
322
+ "This submits to the course leaderboard AND produces a "
323
+ "`gaia_submission.jsonl` file in the official GAIA format for download. "
324
+ "Running all questions can take several minutes."
325
+ )
326
+ gr.LoginButton()
327
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
328
+ status_output = gr.Textbox(
329
+ label="Run Status / Submission Result", lines=5, interactive=False
330
+ )
331
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
332
+ jsonl_download = gr.File(label="Official GAIA submission (.jsonl)")
333
+ run_button.click(
334
+ fn=run_and_submit_all,
335
+ outputs=[status_output, results_table, jsonl_download],
336
+ )
337
+
338
+
339
+ if __name__ == "__main__":
340
+ demo.launch(debug=True, share=False)