Mouhamedamar commited on
Commit
0b1a31b
Β·
verified Β·
1 Parent(s): 3339f5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -105
app.py CHANGED
@@ -7,57 +7,52 @@ import gradio as gr
7
 
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
- # ──────────────────────────────────────────────
11
- # smolagents β€” InferenceClientModel est dans smolagents directement
12
- # ──────────────────────────────────────────────
13
  from smolagents import (
14
- tool,
15
  CodeAgent,
16
  InferenceClientModel,
17
  DuckDuckGoSearchTool,
18
  VisitWebpageTool,
 
19
  )
20
 
21
- # ──────────────────────────────────────────────
22
- # Tools
23
- # ──────────────────────────────────────────────
24
 
25
  @tool
26
  def wikipedia_search(query: str) -> str:
27
  """Search Wikipedia and return the intro of the top article.
28
  Args:
29
- query: search terms
30
  """
31
  try:
32
- url = "https://en.wikipedia.org/w/api.php"
33
- r = requests.get(url, params={
34
  "action": "query", "list": "search",
35
  "srsearch": query, "format": "json", "srlimit": 1,
36
  }, timeout=15).json()
37
- title = r["query"]["search"][0]["title"]
38
- ex = requests.get(url, params={
39
  "action": "query", "prop": "extracts",
40
  "exintro": True, "explaintext": True,
41
  "titles": title, "format": "json",
42
  }, timeout=15).json()
43
- pages = ex["query"]["pages"]
44
- extract = next(iter(pages.values())).get("extract", "")[:4000]
45
- return f"# {title}\n{extract}"
46
  except Exception as e:
47
  return f"Wikipedia error: {e}"
48
 
49
 
50
  @tool
51
  def download_file_for_task(task_id: str) -> str:
52
- """Download and read a file attached to a GAIA task. Returns text content.
53
  Args:
54
- task_id: the GAIA task UUID
55
  """
56
  try:
57
- url = f"{DEFAULT_API_URL}/files/{task_id}"
58
- r = requests.get(url, timeout=30)
59
  if r.status_code != 200:
60
- return "No file found for this task."
61
  data = r.content
62
  ct = r.headers.get("content-type", "")
63
 
@@ -66,20 +61,20 @@ def download_file_for_task(task_id: str) -> str:
66
  try:
67
  import io
68
  from pypdf import PdfReader
69
- reader = PdfReader(io.BytesIO(data))
70
- text = "\n".join(p.extract_text() or "" for p in reader.pages)
71
  return text[:6000]
72
  except Exception as e:
73
  return f"PDF error: {e}"
74
 
75
- # Audio
76
  if any(x in ct for x in ["audio", "mpeg", "mp3", "wav"]) or data[:3] == b"ID3":
77
- hf_url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
78
- headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN','')}"}
79
  for _ in range(3):
80
- resp = requests.post(hf_url, headers=headers, data=data, timeout=120)
81
  if resp.status_code == 503:
82
- time.sleep(20); continue
 
83
  if resp.status_code == 200:
84
  return resp.json().get("text", "")
85
  return "Audio transcription failed."
@@ -88,66 +83,57 @@ def download_file_for_task(task_id: str) -> str:
88
  if any(x in ct for x in ["spreadsheet", "excel", "csv"]) or data[:2] == b"PK":
89
  try:
90
  import io
91
- df = pd.read_excel(io.BytesIO(data))
92
- return df.to_string(index=False)[:4000]
93
  except Exception:
94
  try:
95
  import io
96
- df = pd.read_csv(io.BytesIO(data))
97
- return df.to_string(index=False)[:4000]
98
  except Exception as e:
99
  return f"Spreadsheet error: {e}"
100
 
101
- # Image β†’ vision model
102
  if any(x in ct for x in ["image", "png", "jpg", "jpeg"]):
103
  import base64
104
- b64 = base64.b64encode(data).decode()
105
  mime = "image/png" if data[:4] == b"\x89PNG" else "image/jpeg"
106
- hf_url = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions"
107
- headers = {
108
- "Authorization": f"Bearer {os.environ.get('HF_TOKEN','')}",
109
- "Content-Type": "application/json",
110
- }
111
  payload = {
112
  "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
113
  "messages": [{"role": "user", "content": [
114
  {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}},
115
- {"type": "text", "text": "Describe this image in full detail. If chess: name every piece and square. If text/numbers: transcribe exactly."},
116
  ]}],
117
  "max_tokens": 1024,
118
  }
119
  for _ in range(3):
120
- resp = requests.post(hf_url, headers=headers, json=payload, timeout=120)
121
  if resp.status_code == 503:
122
- time.sleep(20); continue
 
123
  if resp.status_code == 200:
124
  return resp.json()["choices"][0]["message"]["content"]
125
  return "Image analysis failed."
126
 
127
  # Text / code fallback
128
- try:
129
- return data.decode("utf-8", errors="replace")[:4000]
130
- except Exception:
131
- return "Binary file, cannot read."
132
  except Exception as e:
133
  return f"File download error: {e}"
134
 
135
 
136
  @tool
137
  def get_youtube_transcript(video_url: str) -> str:
138
- """Get the transcript/captions from a YouTube video.
139
  Args:
140
- video_url: full YouTube URL like https://www.youtube.com/watch?v=XXXXX
141
  """
142
  try:
143
  from youtube_transcript_api import YouTubeTranscriptApi
144
- vid_match = re.search(r"v=([A-Za-z0-9_-]{11})", video_url)
145
- if not vid_match:
146
- vid_match = re.search(r"youtu\.be/([A-Za-z0-9_-]{11})", video_url)
147
- if not vid_match:
148
- return "Could not extract video ID."
149
- vid_id = vid_match.group(1)
150
- transcript = YouTubeTranscriptApi.get_transcript(vid_id, languages=["en", "en-US", "en-GB"])
151
  return " ".join(t["text"] for t in transcript)[:5000]
152
  except Exception as e:
153
  return f"Transcript error: {e}"
@@ -155,35 +141,28 @@ def get_youtube_transcript(video_url: str) -> str:
155
 
156
  @tool
157
  def run_python_code(code: str) -> str:
158
- """Execute Python code and return the output. Useful for math, logic, data processing.
159
  Args:
160
- code: Python code to execute
161
  """
162
  import subprocess, sys
163
  try:
164
- result = subprocess.run(
165
- [sys.executable, "-c", code],
166
- capture_output=True, text=True, timeout=30
167
- )
168
- out = result.stdout.strip()
169
- err = result.stderr.strip()
170
- return out if out else (f"Error: {err}" if err else "(no output)")
171
  except subprocess.TimeoutExpired:
172
- return "Timed out."
173
  except Exception as e:
174
- return f"Error: {e}"
175
 
176
 
177
- # ──────────────────────────────────────────────
178
- # Agent
179
- # ──────────────────────────────────────────────
180
 
181
  class GAIAAgent:
182
  def __init__(self):
 
183
  model = InferenceClientModel(
184
  model_id="meta-llama/Llama-3.3-70B-Instruct",
185
  token=os.environ.get("HF_TOKEN", ""),
186
- timeout=120,
187
  )
188
  self.agent = CodeAgent(
189
  tools=[
@@ -198,52 +177,59 @@ class GAIAAgent:
198
  max_steps=10,
199
  verbosity_level=1,
200
  additional_authorized_imports=[
201
- "re", "json", "math", "datetime",
202
- "collections", "itertools", "unicodedata",
203
  ],
204
  )
205
  print("GAIAAgent ready βœ…")
206
 
207
  def __call__(self, question: str, task_id: str = "") -> str:
208
- print(f"\n{'='*55}\nQ: {question[:120]}")
209
- full_question = question
210
- if task_id:
211
- full_question = (
212
- f"{question}\n\n"
213
- f"[If this question needs a file, call download_file_for_task with task_id='{task_id}']"
214
- )
 
 
 
 
 
215
  prompt = (
216
- "You are solving a GAIA benchmark question. Rules:\n"
217
- "- Think step by step and use tools when needed.\n"
218
- "- For YouTube links: call get_youtube_transcript.\n"
219
- "- For attached files (image/pdf/audio/excel): call download_file_for_task.\n"
220
- "- For math/logic: call run_python_code.\n"
221
- "- For facts: call wikipedia_search or DuckDuckGoSearchTool.\n"
222
- "- Give ONLY the final answer, nothing else. Exact match required.\n"
223
- "- For lists: comma-separated. For numbers: digits only.\n\n"
224
- f"Question: {full_question}"
 
 
225
  )
 
226
  try:
227
  result = self.agent.run(prompt)
228
  answer = str(result).strip()
229
- # Strip common LLM prefixes
230
- for prefix in ["The answer is", "Answer:", "ANSWER:", "Final answer:", "Result:"]:
231
- if answer.lower().startswith(prefix.lower()):
232
  answer = answer[len(prefix):].strip().lstrip(":").strip()
233
- print(f"β†’ {answer}")
234
  return answer
235
  except Exception as e:
236
  print(f"Agent error: {e}")
237
  return "Unable to determine answer."
238
 
239
 
240
- # ──────────────────────────────────────────────
241
- # Gradio UI
242
- # ──────────────────────────────────────────────
243
 
244
  def run_and_submit_all(profile: gr.OAuthProfile | None):
245
  if not profile:
246
  return "Please login to Hugging Face first.", None
 
247
  username = profile.username
248
  space_id = os.getenv("SPACE_ID", "")
249
  api_url = DEFAULT_API_URL
@@ -255,9 +241,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
255
  return f"Error initializing agent: {e}", None
256
 
257
  try:
258
- r = requests.get(f"{api_url}/questions", timeout=15)
259
- r.raise_for_status()
260
- questions = r.json()
261
  print(f"Fetched {len(questions)} questions.")
262
  except Exception as e:
263
  return f"Error fetching questions: {e}", None
@@ -272,11 +256,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
272
  continue
273
  answer = agent(question_text, task_id=task_id)
274
  answers_payload.append({"task_id": task_id, "submitted_answer": answer})
275
- results_log.append({
276
- "Task ID": task_id,
277
- "Question": question_text[:80],
278
- "Submitted Answer": answer,
279
- })
280
  time.sleep(1)
281
 
282
  if not answers_payload:
@@ -306,9 +286,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
306
  with gr.Blocks() as demo:
307
  gr.Markdown("# πŸ€– GAIA Agent β€” smolagents + HF Inference")
308
  gr.Markdown("""
309
- **Models:** Llama-3.3-70B (reasoning) Β· Llama-3.2-11B-Vision (images) Β· Whisper large-v3 (audio)
310
- **Tools:** DuckDuckGo Β· Wikipedia Β· Visit webpage Β· YouTube transcript Β· Python execution Β· File reader
311
- **Requires:** `HF_TOKEN` secret in your Space settings.
312
  """)
313
  gr.LoginButton()
314
  run_btn = gr.Button("πŸš€ Run Evaluation & Submit All Answers", variant="primary")
 
7
 
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
+ # ── Imports exacts depuis la doc officielle smolagents ──────────────────
 
 
11
  from smolagents import (
 
12
  CodeAgent,
13
  InferenceClientModel,
14
  DuckDuckGoSearchTool,
15
  VisitWebpageTool,
16
+ tool,
17
  )
18
 
19
+ # ── Tools custom ────────────────────────────────────────────────────────
 
 
20
 
21
  @tool
22
  def wikipedia_search(query: str) -> str:
23
  """Search Wikipedia and return the intro of the top article.
24
  Args:
25
+ query: The search terms to look up on Wikipedia.
26
  """
27
  try:
28
+ base = "https://en.wikipedia.org/w/api.php"
29
+ search = requests.get(base, params={
30
  "action": "query", "list": "search",
31
  "srsearch": query, "format": "json", "srlimit": 1,
32
  }, timeout=15).json()
33
+ title = search["query"]["search"][0]["title"]
34
+ extract = requests.get(base, params={
35
  "action": "query", "prop": "extracts",
36
  "exintro": True, "explaintext": True,
37
  "titles": title, "format": "json",
38
  }, timeout=15).json()
39
+ pages = extract["query"]["pages"]
40
+ text = next(iter(pages.values())).get("extract", "")[:4000]
41
+ return f"# {title}\n{text}"
42
  except Exception as e:
43
  return f"Wikipedia error: {e}"
44
 
45
 
46
  @tool
47
  def download_file_for_task(task_id: str) -> str:
48
+ """Download and read any file attached to a GAIA task (PDF, Excel, audio, image, code).
49
  Args:
50
+ task_id: The GAIA task UUID string.
51
  """
52
  try:
53
+ r = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=30)
 
54
  if r.status_code != 200:
55
+ return "No file attached to this task."
56
  data = r.content
57
  ct = r.headers.get("content-type", "")
58
 
 
61
  try:
62
  import io
63
  from pypdf import PdfReader
64
+ text = "\n".join(p.extract_text() or "" for p in PdfReader(io.BytesIO(data)).pages)
 
65
  return text[:6000]
66
  except Exception as e:
67
  return f"PDF error: {e}"
68
 
69
+ # Audio β†’ Whisper
70
  if any(x in ct for x in ["audio", "mpeg", "mp3", "wav"]) or data[:3] == b"ID3":
71
+ token = os.environ.get("HF_TOKEN", "")
72
+ url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
73
  for _ in range(3):
74
+ resp = requests.post(url, headers={"Authorization": f"Bearer {token}"}, data=data, timeout=120)
75
  if resp.status_code == 503:
76
+ time.sleep(20)
77
+ continue
78
  if resp.status_code == 200:
79
  return resp.json().get("text", "")
80
  return "Audio transcription failed."
 
83
  if any(x in ct for x in ["spreadsheet", "excel", "csv"]) or data[:2] == b"PK":
84
  try:
85
  import io
86
+ return pd.read_excel(io.BytesIO(data)).to_string(index=False)[:4000]
 
87
  except Exception:
88
  try:
89
  import io
90
+ return pd.read_csv(io.BytesIO(data)).to_string(index=False)[:4000]
 
91
  except Exception as e:
92
  return f"Spreadsheet error: {e}"
93
 
94
+ # Image β†’ Llama Vision
95
  if any(x in ct for x in ["image", "png", "jpg", "jpeg"]):
96
  import base64
 
97
  mime = "image/png" if data[:4] == b"\x89PNG" else "image/jpeg"
98
+ b64 = base64.b64encode(data).decode()
99
+ token = os.environ.get("HF_TOKEN", "")
100
+ url = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions"
 
 
101
  payload = {
102
  "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
103
  "messages": [{"role": "user", "content": [
104
  {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}},
105
+ {"type": "text", "text": "Describe everything in this image in detail. If it's a chess board, name every piece and its exact square. If there is text or numbers, transcribe them exactly."},
106
  ]}],
107
  "max_tokens": 1024,
108
  }
109
  for _ in range(3):
110
+ resp = requests.post(url, headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, json=payload, timeout=120)
111
  if resp.status_code == 503:
112
+ time.sleep(20)
113
+ continue
114
  if resp.status_code == 200:
115
  return resp.json()["choices"][0]["message"]["content"]
116
  return "Image analysis failed."
117
 
118
  # Text / code fallback
119
+ return data.decode("utf-8", errors="replace")[:4000]
120
+
 
 
121
  except Exception as e:
122
  return f"File download error: {e}"
123
 
124
 
125
  @tool
126
  def get_youtube_transcript(video_url: str) -> str:
127
+ """Fetch the transcript/captions from a YouTube video URL.
128
  Args:
129
+ video_url: The full YouTube URL, e.g. https://www.youtube.com/watch?v=XXXXX
130
  """
131
  try:
132
  from youtube_transcript_api import YouTubeTranscriptApi
133
+ m = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", video_url)
134
+ if not m:
135
+ return "Could not extract video ID from URL."
136
+ transcript = YouTubeTranscriptApi.get_transcript(m.group(1), languages=["en", "en-US", "en-GB"])
 
 
 
137
  return " ".join(t["text"] for t in transcript)[:5000]
138
  except Exception as e:
139
  return f"Transcript error: {e}"
 
141
 
142
  @tool
143
  def run_python_code(code: str) -> str:
144
+ """Execute Python code and return stdout. Use for math, logic, string manipulation, data processing.
145
  Args:
146
+ code: Valid Python code to execute.
147
  """
148
  import subprocess, sys
149
  try:
150
+ r = subprocess.run([sys.executable, "-c", code], capture_output=True, text=True, timeout=30)
151
+ return r.stdout.strip() or (f"stderr: {r.stderr.strip()}" if r.stderr.strip() else "(no output)")
 
 
 
 
 
152
  except subprocess.TimeoutExpired:
153
+ return "Execution timed out."
154
  except Exception as e:
155
+ return f"Execution error: {e}"
156
 
157
 
158
+ # ── Agent ───────────────────────────────────────────────────────────────
 
 
159
 
160
  class GAIAAgent:
161
  def __init__(self):
162
+ # Exactement comme dans la doc officielle smolagents
163
  model = InferenceClientModel(
164
  model_id="meta-llama/Llama-3.3-70B-Instruct",
165
  token=os.environ.get("HF_TOKEN", ""),
 
166
  )
167
  self.agent = CodeAgent(
168
  tools=[
 
177
  max_steps=10,
178
  verbosity_level=1,
179
  additional_authorized_imports=[
180
+ "re", "json", "math", "unicodedata",
181
+ "datetime", "collections", "itertools",
182
  ],
183
  )
184
  print("GAIAAgent ready βœ…")
185
 
186
  def __call__(self, question: str, task_id: str = "") -> str:
187
+ print(f"\n{'='*60}\nQ: {question[:120]}")
188
+
189
+ # DΓ©tection de fichier joint ou YouTube dans la question
190
+ has_yt = bool(re.search(r"youtube\.com|youtu\.be", question))
191
+ has_file_hint = any(w in question.lower() for w in ["attached", "file", "image", "audio", "excel", "spreadsheet", "pdf", "code"])
192
+
193
+ task_hint = ""
194
+ if task_id and (has_file_hint or has_yt):
195
+ task_hint = f"\n\nNote: task_id='{task_id}' β€” use download_file_for_task('{task_id}') if a file is needed."
196
+ elif task_id:
197
+ task_hint = f"\n\n[task_id: '{task_id}' β€” use download_file_for_task if a file is mentioned]"
198
+
199
  prompt = (
200
+ "Solve this GAIA benchmark question. Important rules:\n"
201
+ "- Use tools to find/verify information. Do NOT guess.\n"
202
+ "- For YouTube URLs β†’ call get_youtube_transcript.\n"
203
+ "- For attached files (pdf/image/audio/excel/code) β†’ call download_file_for_task.\n"
204
+ "- For math/logic/string manipulation β†’ call run_python_code.\n"
205
+ "- For factual lookups β†’ call wikipedia_search or DuckDuckGoSearchTool.\n"
206
+ "- Your final answer must be SHORT and EXACT (exact string match is used for grading).\n"
207
+ "- For reversed text: decode it first, then answer.\n"
208
+ "- For counts: give only the number.\n"
209
+ "- For lists: comma-separated values only.\n\n"
210
+ f"Question: {question}{task_hint}"
211
  )
212
+
213
  try:
214
  result = self.agent.run(prompt)
215
  answer = str(result).strip()
216
+ # Nettoyer les prΓ©fixes verbeux du LLM
217
+ for prefix in ["the answer is", "answer:", "final answer:", "result:"]:
218
+ if answer.lower().startswith(prefix):
219
  answer = answer[len(prefix):].strip().lstrip(":").strip()
220
+ print(f"β†’ Answer: {answer}")
221
  return answer
222
  except Exception as e:
223
  print(f"Agent error: {e}")
224
  return "Unable to determine answer."
225
 
226
 
227
+ # ── Gradio UI ────────────────────────────────────────────────────────────
 
 
228
 
229
  def run_and_submit_all(profile: gr.OAuthProfile | None):
230
  if not profile:
231
  return "Please login to Hugging Face first.", None
232
+
233
  username = profile.username
234
  space_id = os.getenv("SPACE_ID", "")
235
  api_url = DEFAULT_API_URL
 
241
  return f"Error initializing agent: {e}", None
242
 
243
  try:
244
+ questions = requests.get(f"{api_url}/questions", timeout=15).json()
 
 
245
  print(f"Fetched {len(questions)} questions.")
246
  except Exception as e:
247
  return f"Error fetching questions: {e}", None
 
256
  continue
257
  answer = agent(question_text, task_id=task_id)
258
  answers_payload.append({"task_id": task_id, "submitted_answer": answer})
259
+ results_log.append({"Task ID": task_id, "Question": question_text[:80], "Submitted Answer": answer})
 
 
 
 
260
  time.sleep(1)
261
 
262
  if not answers_payload:
 
286
  with gr.Blocks() as demo:
287
  gr.Markdown("# πŸ€– GAIA Agent β€” smolagents + HF Inference")
288
  gr.Markdown("""
289
+ **Models:** Llama-3.3-70B Β· Llama-3.2-11B-Vision Β· Whisper large-v3
290
+ **Tools:** DuckDuckGo Β· Wikipedia Β· VisitWebpage Β· YouTube transcript Β· Python Β· File reader (PDF/Excel/Audio/Image)
291
+ **Setup:** Ajoute `HF_TOKEN` dans les secrets de ton Space.
292
  """)
293
  gr.LoginButton()
294
  run_btn = gr.Button("πŸš€ Run Evaluation & Submit All Answers", variant="primary")