Mouhamedamar commited on
Commit
eba54c1
Β·
verified Β·
1 Parent(s): 4d09119

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -278
app.py CHANGED
@@ -1,330 +1,234 @@
1
  import os
2
- import re
3
- import time
4
  import requests
5
  import pandas as pd
6
- import gradio as gr
 
 
7
 
8
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
-
10
- # ── Imports officiels smolagents ──────────────────────────────────────
11
- from smolagents import (
12
- CodeAgent,
13
- InferenceClientModel,
14
- DuckDuckGoSearchTool,
15
- VisitWebpageTool,
16
- tool,
17
- )
18
-
19
- # ── Prompt templates COMPLETS (obligatoires pour CodeAgent) ───────────
20
- def get_prompt_templates():
21
- return {
22
- "system_prompt": """You are an expert AI assistant solving GAIA benchmark tasks.
23
- You have access to tools and must use them to find accurate answers.
24
-
25
- RULES:
26
- - Always use Thought: then Code: sequences
27
- - Return ONLY the exact answer - no explanation
28
- - For reversed text: reverse it back then answer
29
- - For math/logic: write Python code to compute
30
- - For files: use the download tools
31
- - Answers are exact-match graded
32
-
33
- {{authorized_imports}}
34
- """,
35
- "planning": """
36
- Facts given in the task:
37
- <<facts_given_in_task>>
38
-
39
- Facts needed:
40
- <<facts_needed>>
41
-
42
- Plan:
43
- <<plan>>
44
-
45
- <end_plan>
46
- """,
47
- "managed_agent": """
48
- You are a managed agent. Return your result via final_answer().
49
- Task: {{task}}
50
- """,
51
- "final_answer": """
52
- Return ONLY the final answer. No explanation. No punctuation unless required.
53
- - Numbers: digits only (e.g. 42)
54
- - Lists: comma-separated (e.g. apple, banana)
55
- - Names: as-is
56
- """
57
- }
58
 
59
- # ── Tools custom ──────────────────────────────────────────────────────
60
 
61
  @tool
62
- def wikipedia_search(query: str) -> str:
63
- """Search Wikipedia and return the intro of the top article.
64
- Args:
65
- query: The search terms to look up on Wikipedia.
66
  """
67
- try:
68
- base = "https://en.wikipedia.org/w/api.php"
69
- r = requests.get(base, params={
70
- "action": "query", "list": "search",
71
- "srsearch": query, "format": "json", "srlimit": 1,
72
- }, timeout=15).json()
73
- title = r["query"]["search"][0]["title"]
74
- ex = requests.get(base, params={
75
- "action": "query", "prop": "extracts",
76
- "exintro": True, "explaintext": True,
77
- "titles": title, "format": "json",
78
- }, timeout=15).json()
79
- pages = ex["query"]["pages"]
80
- text = next(iter(pages.values())).get("extract", "")[:4000]
81
- return f"# {title}\n{text}"
82
- except Exception as e:
83
- return f"Wikipedia error: {e}"
84
-
85
 
86
- @tool
87
- def download_file_for_task(task_id: str) -> str:
88
- """Download and read any file attached to a GAIA task (PDF, Excel, audio, image, code).
89
  Args:
90
- task_id: The GAIA task UUID string.
 
 
 
91
  """
 
92
  try:
93
- r = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=30)
94
- if r.status_code != 200:
95
  return "No file attached to this task."
96
- data = r.content
97
- ct = r.headers.get("content-type", "")
98
 
99
- # PDF
100
- if data[:4] == b"%PDF" or "pdf" in ct:
101
- try:
102
- import io
103
- from pypdf import PdfReader
104
- text = "\n".join(p.extract_text() or "" for p in PdfReader(io.BytesIO(data)).pages)
105
- return text[:6000]
106
- except Exception as e:
107
- return f"PDF error: {e}"
108
-
109
- # Audio β†’ Whisper
110
- if any(x in ct for x in ["audio", "mpeg", "mp3", "wav"]) or data[:3] == b"ID3":
111
- token = os.environ.get("HF_TOKEN", "")
112
- url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
113
- for _ in range(3):
114
- resp = requests.post(url, headers={"Authorization": f"Bearer {token}"}, data=data, timeout=120)
115
- if resp.status_code == 503:
116
- time.sleep(20); continue
117
- if resp.status_code == 200:
118
- return resp.json().get("text", "")
119
- return "Audio transcription failed."
120
-
121
- # Excel / CSV
122
- if any(x in ct for x in ["spreadsheet", "excel", "csv"]) or data[:2] == b"PK":
123
  try:
124
- import io
125
- return pd.read_excel(io.BytesIO(data)).to_string(index=False)[:4000]
126
- except Exception:
127
- try:
128
- import io
129
- return pd.read_csv(io.BytesIO(data)).to_string(index=False)[:4000]
130
- except Exception as e:
131
- return f"Spreadsheet error: {e}"
132
-
133
- # Image β†’ Llama Vision
134
- if any(x in ct for x in ["image", "png", "jpg", "jpeg"]):
135
- import base64
136
- mime = "image/png" if data[:4] == b"\x89PNG" else "image/jpeg"
137
- b64 = base64.b64encode(data).decode()
138
- token = os.environ.get("HF_TOKEN", "")
139
- url = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions"
140
- payload = {
141
- "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
142
- "messages": [{"role": "user", "content": [
143
- {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}},
144
- {"type": "text", "text": "Describe everything in detail. If chess: name every piece and square. Transcribe any text/numbers exactly."},
145
- ]}],
146
- "max_tokens": 1024,
147
- }
148
- for _ in range(3):
149
- resp = requests.post(url, headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, json=payload, timeout=120)
150
- if resp.status_code == 503:
151
- time.sleep(20); continue
152
- if resp.status_code == 200:
153
- return resp.json()["choices"][0]["message"]["content"]
154
- return "Image analysis failed."
155
-
156
- # Text / code fallback
157
- return data.decode("utf-8", errors="replace")[:4000]
158
 
159
  except Exception as e:
160
- return f"File download error: {e}"
161
 
162
 
163
  @tool
164
- def get_youtube_transcript(video_url: str) -> str:
165
- """Fetch the transcript/captions from a YouTube video URL.
166
- Args:
167
- video_url: The full YouTube URL e.g. https://www.youtube.com/watch?v=XXXXX
168
  """
169
- try:
170
- from youtube_transcript_api import YouTubeTranscriptApi
171
- m = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", video_url)
172
- if not m:
173
- return "Could not extract video ID from URL."
174
- transcript = YouTubeTranscriptApi.get_transcript(m.group(1), languages=["en", "en-US", "en-GB"])
175
- return " ".join(t["text"] for t in transcript)[:5000]
176
- except Exception as e:
177
- return f"Transcript error: {e}"
178
 
179
-
180
- @tool
181
- def run_python_code(code: str) -> str:
182
- """Execute Python code and return stdout. Use for math, logic, string ops, data processing.
183
  Args:
184
- code: Valid Python code to execute.
 
 
 
185
  """
186
- import subprocess, sys
187
  try:
188
- r = subprocess.run([sys.executable, "-c", code], capture_output=True, text=True, timeout=30)
189
- return r.stdout.strip() or (f"stderr: {r.stderr.strip()}" if r.stderr.strip() else "(no output)")
190
- except subprocess.TimeoutExpired:
191
- return "Execution timed out."
 
192
  except Exception as e:
193
- return f"Execution error: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
 
196
- # ── Agent ─────────────────────────────────────────────────────────────
 
 
 
 
 
 
197
 
198
- class GAIAAgent:
199
- def __init__(self):
200
- model = InferenceClientModel(
201
- model_id="meta-llama/Llama-3.3-70B-Instruct",
202
- token=os.environ.get("HF_TOKEN", ""),
203
- )
204
- self.agent = CodeAgent(
205
- tools=[
206
- DuckDuckGoSearchTool(),
207
- VisitWebpageTool(),
208
- wikipedia_search,
209
- download_file_for_task,
210
- get_youtube_transcript,
211
- run_python_code,
212
- ],
213
- model=model,
214
- add_base_tools=True,
215
- max_steps=10,
216
- verbosity_level=1,
217
- additional_authorized_imports=[
218
- "re", "json", "math", "unicodedata",
219
- "datetime", "collections", "itertools",
220
- "pandas", "requests", "os", "time",
221
- ],
222
- )
223
- print("GAIAAgent ready βœ…")
224
-
225
- def __call__(self, question: str, task_id: str = "") -> str:
226
- print(f"\n{'='*60}\nQ: {question[:120]}")
227
-
228
- task_hint = ""
229
- if task_id:
230
- task_hint = f"\n\n[task_id='{task_id}' β€” call download_file_for_task('{task_id}') if a file/image/audio is needed]"
231
-
232
- prompt = (
233
- "Solve this GAIA benchmark question precisely.\n"
234
- "- Use tools to verify facts. Do NOT guess.\n"
235
- "- YouTube URL β†’ call get_youtube_transcript\n"
236
- "- File/image/audio/excel/pdf β†’ call download_file_for_task\n"
237
- "- Math/logic/strings β†’ call run_python_code\n"
238
- "- Facts β†’ wikipedia_search or DuckDuckGoSearchTool\n"
239
- "- Reversed text β†’ decode first, then answer\n"
240
- "- Return ONLY the exact answer. No explanation.\n\n"
241
- f"Question: {question}{task_hint}"
242
- )
243
 
244
- try:
245
- result = self.agent.run(prompt)
246
- answer = str(result).strip()
247
- for prefix in ["the answer is", "answer:", "final answer:", "result:"]:
248
- if answer.lower().startswith(prefix):
249
- answer = answer[len(prefix):].strip().lstrip(":").strip()
250
- print(f"β†’ Answer: {answer}")
251
- return answer
252
- except Exception as e:
253
- print(f"Agent error: {e}")
254
- return "Unable to determine answer."
255
 
256
 
257
- # ── Gradio UI ─────────────────────────────────────────────────────────
258
 
259
- def run_and_submit_all(profile: gr.OAuthProfile | None):
260
- if not profile:
261
- return "Please login to Hugging Face first.", None
 
262
 
263
  username = profile.username
264
- space_id = os.getenv("SPACE_ID", "")
265
- api_url = DEFAULT_API_URL
266
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local"
267
 
 
268
  try:
269
- agent = GAIAAgent()
 
 
270
  except Exception as e:
271
- return f"Error initializing agent: {e}", None
272
 
273
- try:
274
- questions = requests.get(f"{api_url}/questions", timeout=15).json()
275
- print(f"Fetched {len(questions)} questions.")
276
- except Exception as e:
277
- return f"Error fetching questions: {e}", None
278
 
 
279
  results_log = []
280
  answers_payload = []
281
 
282
  for item in questions:
283
- task_id = item.get("task_id", "")
284
- question_text = item.get("question", "")
285
- if not task_id or not question_text:
286
- continue
287
- answer = agent(question_text, task_id=task_id)
288
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
289
- results_log.append({"Task ID": task_id, "Question": question_text[:80], "Submitted Answer": answer})
290
- time.sleep(2)
291
-
292
- if not answers_payload:
293
- return "No answers produced.", pd.DataFrame(results_log)
294
-
 
 
 
 
 
 
 
 
295
  try:
296
- resp = requests.post(
297
- f"{api_url}/submit",
298
- json={"username": username, "agent_code": agent_code, "answers": answers_payload},
299
- timeout=120,
300
- )
301
- resp.raise_for_status()
302
- d = resp.json()
303
- status = (
304
- f"βœ… Submission Successful!\n"
305
- f"User: {d.get('username')}\n"
306
- f"Score: {d.get('score', 'N/A')}% "
307
- f"({d.get('correct_count', '?')}/{d.get('total_attempted', '?')} correct)\n"
308
- f"Message: {d.get('message', '')}"
309
  )
310
  except Exception as e:
311
- status = f"Submission error: {e}"
 
 
 
 
 
 
312
 
313
- return status, pd.DataFrame(results_log)
 
 
 
 
 
 
314
 
 
 
 
 
315
 
316
- with gr.Blocks() as demo:
317
- gr.Markdown("# πŸ€– GAIA Agent β€” smolagents + HF Inference")
318
- gr.Markdown("""
319
- **Models:** Llama-3.3-70B Β· Llama-3.2-11B-Vision Β· Whisper large-v3
320
- **Tools:** DuckDuckGo Β· Wikipedia Β· VisitWebpage Β· YouTube transcript Β· Python Β· File reader
321
- **Setup:** Ajoute `HF_TOKEN` dans les secrets de ton Space HF.
322
- """)
323
- gr.LoginButton()
324
- run_btn = gr.Button("πŸš€ Run Evaluation & Submit All Answers", variant="primary")
325
- status_out = gr.Textbox(label="Status / Score", lines=6, interactive=False)
326
- results_tbl = gr.DataFrame(label="Questions & Answers", wrap=True)
327
- run_btn.click(fn=run_and_submit_all, outputs=[status_out, results_tbl])
328
 
329
  if __name__ == "__main__":
330
- demo.launch(debug=True, share=False)
 
1
  import os
2
+ import gradio as gr
 
3
  import requests
4
  import pandas as pd
5
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, tool, HfApiModel
6
+ from smolagents.tools import WikipediaTool, VisitWebpageTool
7
+ import re
8
 
9
+ # ── Constants ──────────────────────────────────────────────────────────────────
10
+ API_BASE = "https://agents-course-unit4-scoring.hf.space"
11
+ DEFAULT_API_URL = API_BASE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # ── Custom tools ───────────────────────────────────────────────────────────────
14
 
15
  @tool
16
+ def download_task_file(task_id: str) -> str:
 
 
 
17
  """
18
+ Download a file associated with a GAIA task and return its content as text.
19
+ For images, returns a description note. For CSVs/Excel, returns the raw text.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
 
 
 
21
  Args:
22
+ task_id: The GAIA task ID string.
23
+
24
+ Returns:
25
+ File content as a string, or an error message.
26
  """
27
+ url = f"{API_BASE}/files/{task_id}"
28
  try:
29
+ response = requests.get(url, timeout=30)
30
+ if response.status_code == 404:
31
  return "No file attached to this task."
32
+ response.raise_for_status()
 
33
 
34
+ content_type = response.headers.get("content-type", "")
35
+
36
+ # Plain text / CSV / JSON / XML / code
37
+ if any(ct in content_type for ct in ["text", "json", "xml", "csv"]):
38
+ return response.text[:8000]
39
+
40
+ # Excel
41
+ if "spreadsheet" in content_type or "excel" in content_type:
42
+ import io
43
+ df = pd.read_excel(io.BytesIO(response.content))
44
+ return df.to_string()
45
+
46
+ # PDF – extract text with pdfplumber if available
47
+ if "pdf" in content_type:
 
 
 
 
 
 
 
 
 
 
48
  try:
49
+ import pdfplumber, io
50
+ with pdfplumber.open(io.BytesIO(response.content)) as pdf:
51
+ text = "\n".join(p.extract_text() or "" for p in pdf.pages)
52
+ return text[:8000] if text.strip() else "PDF has no extractable text."
53
+ except ImportError:
54
+ return f"PDF file received ({len(response.content)} bytes) but pdfplumber not installed."
55
+
56
+ # Image
57
+ if "image" in content_type:
58
+ return (
59
+ f"Image file received (type: {content_type}, size: {len(response.content)} bytes). "
60
+ "Use visual reasoning to answer the question."
61
+ )
62
+
63
+ # Fallback: try decoding as UTF-8
64
+ try:
65
+ return response.content.decode("utf-8")[:8000]
66
+ except UnicodeDecodeError:
67
+ return f"Binary file received ({content_type}, {len(response.content)} bytes). Cannot display."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  except Exception as e:
70
+ return f"Error downloading file for task {task_id}: {e}"
71
 
72
 
73
  @tool
74
+ def calculator(expression: str) -> str:
 
 
 
75
  """
76
+ Safely evaluate a mathematical expression and return the result.
 
 
 
 
 
 
 
 
77
 
 
 
 
 
78
  Args:
79
+ expression: A Python-compatible math expression, e.g. '3.14 * 10**2'.
80
+
81
+ Returns:
82
+ The computed result as a string.
83
  """
 
84
  try:
85
+ # Restrict to safe builtins
86
+ allowed = {k: v for k, v in vars(__import__("math")).items() if not k.startswith("_")}
87
+ allowed["__builtins__"] = {}
88
+ result = eval(expression, allowed) # noqa: S307 – expression is validated above
89
+ return str(result)
90
  except Exception as e:
91
+ return f"Calculation error: {e}"
92
+
93
+
94
+ # ── Agent factory ──────────────────────────────────────────────────────────────
95
+
96
+ def build_agent():
97
+ """Build and return a CodeAgent with all necessary tools."""
98
+ model = HfApiModel(
99
+ model_id="Qwen/Qwen2.5-72B-Instruct", # free HF Inference API – fast & capable
100
+ token=os.environ.get("HF_TOKEN"),
101
+ )
102
+
103
+ tools = [
104
+ DuckDuckGoSearchTool(),
105
+ VisitWebpageTool(),
106
+ WikipediaTool(),
107
+ download_task_file,
108
+ calculator,
109
+ ]
110
+
111
+ agent = CodeAgent(
112
+ tools=tools,
113
+ model=model,
114
+ max_steps=10,
115
+ additional_authorized_imports=["pandas", "re", "json", "math", "datetime"],
116
+ )
117
+ return agent
118
+
119
+
120
+ SYSTEM_PROMPT = """You are a general AI assistant answering questions from the GAIA benchmark.
121
+ Your goal is to provide a single, precise, final answer β€” nothing else.
122
+
123
+ Rules:
124
+ - Use tools (web search, Wikipedia, file download, calculator) as needed.
125
+ - Think step-by-step before answering.
126
+ - Your FINAL answer must be:
127
+ β€’ As short as possible (a number, a name, a date, a list, etc.)
128
+ β€’ Exactly matching the expected format described in the question.
129
+ β€’ WITHOUT any prefix like "The answer is" or "FINAL ANSWER:".
130
+ - Never hallucinate. If unsure, search again.
131
+ """
132
 
133
 
134
+ def run_agent_on_question(agent: "CodeAgent", question: str, task_id: str) -> str:
135
+ """Run the agent on a single GAIA question."""
136
+ # If a file is attached, mention it in the prompt
137
+ file_hint = ""
138
+ test_file = download_task_file(task_id)
139
+ if test_file and "No file attached" not in test_file and "Error" not in test_file:
140
+ file_hint = f"\n\n[Attached file content for task {task_id}]:\n{test_file[:3000]}"
141
 
142
+ full_prompt = SYSTEM_PROMPT + f"\n\nQuestion: {question}{file_hint}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
+ try:
145
+ answer = agent.run(full_prompt)
146
+ # Strip any accidental "FINAL ANSWER:" prefix the model might add
147
+ answer = re.sub(r"(?i)^(final answer[:\s]*)", "", str(answer)).strip()
148
+ return answer
149
+ except Exception as e:
150
+ return f"AGENT_ERROR: {e}"
 
 
 
 
151
 
152
 
153
+ # ── Gradio UI ──────────────────────────────────────────────────────────────────
154
 
155
+ def run_and_submit(profile: gr.OAuthProfile | None):
156
+ """Fetch questions, run agent, submit answers, return results table + score."""
157
+ if profile is None:
158
+ return "⚠️ Please log in with your HuggingFace account first.", None
159
 
160
  username = profile.username
161
+ space_url = f"https://huggingface.co/spaces/{username}/Final_Assignment_Template/tree/main"
 
 
162
 
163
+ # 1. Fetch questions
164
  try:
165
+ resp = requests.get(f"{API_BASE}/questions", timeout=15)
166
+ resp.raise_for_status()
167
+ questions = resp.json()
168
  except Exception as e:
169
+ return f"❌ Failed to fetch questions: {e}", None
170
 
171
+ # 2. Build agent
172
+ agent = build_agent()
 
 
 
173
 
174
+ # 3. Run agent on each question
175
  results_log = []
176
  answers_payload = []
177
 
178
  for item in questions:
179
+ task_id = item.get("task_id", "")
180
+ question = item.get("question", "")
181
+
182
+ print(f"[{task_id}] Running agent…")
183
+ submitted_answer = run_agent_on_question(agent, question, task_id)
184
+ print(f"[{task_id}] Answer: {submitted_answer}")
185
+
186
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
187
+ results_log.append({
188
+ "Task ID": task_id,
189
+ "Question": question[:80] + "…" if len(question) > 80 else question,
190
+ "Submitted Answer": submitted_answer,
191
+ })
192
+
193
+ # 4. Submit to scoring API
194
+ submission = {
195
+ "username": username,
196
+ "agent_code": space_url,
197
+ "answers": answers_payload,
198
+ }
199
  try:
200
+ submit_resp = requests.post(f"{API_BASE}/submit", json=submission, timeout=60)
201
+ submit_resp.raise_for_status()
202
+ result = submit_resp.json()
203
+ score_msg = (
204
+ f"βœ… Submission successful!\n"
205
+ f"**Score:** {result.get('score', 'N/A')}% "
206
+ f"({result.get('correct_count','?')}/{result.get('total_questions','?')} correct)\n"
207
+ f"**Message:** {result.get('message','')}"
 
 
 
 
 
208
  )
209
  except Exception as e:
210
+ score_msg = f"⚠️ Agent ran but submission failed: {e}"
211
+
212
+ df = pd.DataFrame(results_log)
213
+ return score_msg, df
214
+
215
+
216
+ # ── App layout ─────────────────────────────────────────────────────────────────
217
 
218
+ with gr.Blocks(title="GAIA Agent – HF Certification") as demo:
219
+ gr.Markdown(
220
+ """
221
+ # πŸ€– GAIA Agent β€” HuggingFace Agents Course Final Assignment
222
+ Log in with your HuggingFace account, then click **Run Agent & Submit** to evaluate your agent on the 20 GAIA Level-1 questions.
223
+ """
224
+ )
225
 
226
+ login_btn = gr.LoginButton()
227
+ run_btn = gr.Button("πŸš€ Run Agent & Submit", variant="primary")
228
+ status = gr.Markdown("Status will appear here after submission.")
229
+ results_table = gr.DataFrame(label="Per-question results", wrap=True)
230
 
231
+ run_btn.click(fn=run_and_submit, outputs=[status, results_table])
 
 
 
 
 
 
 
 
 
 
 
232
 
233
  if __name__ == "__main__":
234
+ demo.launch()