Mouhamedamar commited on
Commit
7b71a0f
Β·
verified Β·
1 Parent(s): 1942536

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -36
app.py CHANGED
@@ -15,20 +15,34 @@ from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun
15
  from langchain_community.utilities import WikipediaAPIWrapper
16
 
17
  # ── Constants ──────────────────────────────────────────────────────────────────
18
- API_BASE = "https://agents-course-unit4-scoring.hf.space"
19
- PRIMARY_MODEL = "llama-3.3-70b-versatile"
20
- FALLBACK_MODEL = "llama-3.1-70b-versatile" # or "gemma2-9b-it"
 
 
 
 
 
 
 
 
 
 
21
 
22
  SYSTEM_PROMPT = """You are a precise AI assistant solving GAIA benchmark questions.
23
 
24
- Rules:
25
- - Use tools (web_search, wikipedia_search, visit_webpage, download_task_file, calculator) as needed.
26
- - Reason step by step before giving your final answer.
27
- - Your FINAL answer must be:
28
- β€’ As short as possible: a number, a name, a date, a list, etc.
29
- β€’ Exactly matching the format described in the question (e.g. "+4.6", "White; 5876").
30
- β€’ NO prefix like "The answer is" or "FINAL ANSWER:" β€” just the raw answer.
31
- - Never guess. If unsure, search again.
 
 
 
 
32
  """
33
 
34
  # ── Tools ──────────────────────────────────────────────────────────────────────
@@ -52,7 +66,9 @@ def wikipedia_search(query: str) -> str:
52
  query: The topic to look up on Wikipedia.
53
  """
54
  try:
55
- wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=4000))
 
 
56
  return wiki.run(query)
57
  except Exception as e:
58
  return f"Wikipedia error: {e}"
@@ -78,7 +94,7 @@ def visit_webpage(url: str) -> str:
78
  @tool
79
  def download_task_file(task_id: str) -> str:
80
  """Download the file attached to a GAIA task and return its content.
81
- Always call this when the question references a file or image.
82
  Args:
83
  task_id: The GAIA task ID string.
84
  """
@@ -88,11 +104,18 @@ def download_task_file(task_id: str) -> str:
88
  return "No file attached to this task."
89
  resp.raise_for_status()
90
  ct = resp.headers.get("content-type", "")
 
91
  if any(x in ct for x in ["text", "json", "xml", "csv"]):
92
  return resp.text[:6000]
 
93
  if "spreadsheet" in ct or "excel" in ct:
94
  import io
95
- return pd.read_excel(io.BytesIO(resp.content)).to_string()[:5000]
 
 
 
 
 
96
  if "pdf" in ct:
97
  try:
98
  import pdfplumber, io
@@ -101,12 +124,19 @@ def download_task_file(task_id: str) -> str:
101
  return text[:6000] or "PDF has no extractable text."
102
  except ImportError:
103
  return f"PDF received ({len(resp.content)} bytes). pdfplumber not installed."
 
104
  if "image" in ct:
105
- return f"Image file ({ct}, {len(resp.content)} bytes). Use visual reasoning."
 
 
 
 
 
106
  try:
107
  return resp.content.decode("utf-8")[:6000]
108
  except UnicodeDecodeError:
109
  return f"Binary file ({ct}, {len(resp.content)} bytes)."
 
110
  except Exception as e:
111
  return f"File download error: {e}"
112
 
@@ -115,12 +145,13 @@ def download_task_file(task_id: str) -> str:
115
  def calculator(expression: str) -> str:
116
  """Evaluate a mathematical Python expression safely.
117
  Args:
118
- expression: e.g. '(390/2) / (146*0.01)' or 'sqrt(144)'.
119
  """
120
  try:
121
  allowed = {k: v for k, v in vars(math).items() if not k.startswith("_")}
122
  allowed["__builtins__"] = {}
123
- return str(eval(expression, allowed))
 
124
  except Exception as e:
125
  return f"Calculation error: {e}"
126
 
@@ -134,6 +165,7 @@ class AgentState(TypedDict):
134
 
135
 
136
  def make_llm(model_id: str):
 
137
  return ChatGroq(
138
  model=model_id,
139
  temperature=0,
@@ -142,14 +174,26 @@ def make_llm(model_id: str):
142
 
143
 
144
  def agent_node(state: AgentState):
145
- try:
146
- response = make_llm(PRIMARY_MODEL).invoke(state["messages"])
147
- except Exception:
148
- response = make_llm(FALLBACK_MODEL).invoke(state["messages"])
149
- return {"messages": [response]}
 
 
 
 
 
 
 
 
 
 
 
150
 
151
 
152
  def should_continue(state: AgentState):
 
153
  last = state["messages"][-1]
154
  if hasattr(last, "tool_calls") and last.tool_calls:
155
  return "tools"
@@ -170,21 +214,44 @@ def build_graph():
170
  APP = build_graph()
171
 
172
 
 
 
173
  def run_agent(question: str, task_id: str) -> str:
174
- file_ctx = download_task_file.invoke({"task_id": task_id})
 
 
 
175
  file_hint = ""
176
- if file_ctx and "No file attached" not in file_ctx and "error" not in file_ctx.lower():
177
- file_hint = f"\n\n[Attached file for task {task_id}]:\n{file_ctx[:3000]}"
 
 
 
 
 
 
 
 
 
 
178
 
179
  messages = [
180
  SystemMessage(content=SYSTEM_PROMPT),
181
- HumanMessage(content=f"Question: {question}{file_hint}\n\nTask ID: {task_id}"),
182
  ]
 
183
  try:
184
- result = APP.invoke({"messages": messages}, config={"recursion_limit": 25})
185
- answer = result["messages"][-1].content
186
- answer = re.sub(r"(?i)^(final answer[:\s]*|answer[:\s]*)", "", str(answer)).strip()
 
 
 
 
 
 
187
  return answer
 
188
  except Exception as e:
189
  return f"AGENT_ERROR: {e}"
190
 
@@ -198,6 +265,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
198
  username = profile.username
199
  space_url = f"https://huggingface.co/spaces/{username}/Final_Assignment_Template/tree/main"
200
 
 
201
  try:
202
  resp = requests.get(f"{API_BASE}/questions", timeout=15)
203
  resp.raise_for_status()
@@ -210,9 +278,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
210
  for i, item in enumerate(questions):
211
  task_id = item.get("task_id", "")
212
  question = item.get("question", "")
213
- print(f"[{i+1}/{len(questions)}] {task_id}")
 
 
214
  answer = run_agent(question, task_id)
215
- print(f" β†’ {answer}")
 
216
  answers_payload.append({"task_id": task_id, "submitted_answer": answer})
217
  results_log.append({
218
  "Task ID": task_id,
@@ -220,15 +291,20 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
220
  "Submitted Answer": answer,
221
  })
222
 
223
- payload = {"username": username, "agent_code": space_url, "answers": answers_payload}
 
 
 
 
 
224
  try:
225
  sub = requests.post(f"{API_BASE}/submit", json=payload, timeout=60)
226
  sub.raise_for_status()
227
  r = sub.json()
228
  msg = (
229
- f"βœ… **Score : {r.get('score','?')}%** "
230
- f"({r.get('correct_count','?')}/{r.get('total_questions','?')} correctes)\n"
231
- f"{r.get('message','')}"
232
  )
233
  except Exception as e:
234
  msg = f"⚠️ Agent terminΓ© mais soumission Γ©chouΓ©e : {e}"
@@ -236,10 +312,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
236
  return msg, pd.DataFrame(results_log)
237
 
238
 
 
 
239
  with gr.Blocks(title="GAIA Agent – LangGraph + Groq") as demo:
240
  gr.Markdown("""
241
  # πŸ€– GAIA Agent β€” LangGraph + Groq
242
- **Modèles** : `llama-3.3-70b-versatile` (principal) · `mixtral-8x7b-32768` (fallback)
 
 
243
  **Tools** : Web Search Β· Wikipedia Β· Visit Webpage Β· File Download Β· Calculator
244
 
245
  Connectez-vous avec votre compte HuggingFace puis cliquez sur **Run & Submit**.
 
15
  from langchain_community.utilities import WikipediaAPIWrapper
16
 
17
  # ── Constants ──────────────────────────────────────────────────────────────────
18
+ API_BASE = "https://agents-course-unit4-scoring.hf.space"
19
+
20
+ # βœ… ModΓ¨les ACTIFS sur Groq en avril 2026
21
+ # Source : https://console.groq.com/docs/models
22
+ # ❌ DΓ‰COMMISSIONNΓ‰S (ne pas utiliser) :
23
+ # - mixtral-8x7b-32768
24
+ # - llama-3.1-70b-versatile
25
+ # - gemma2-9b-it
26
+ MODEL_PRIORITY = [
27
+ "llama-3.3-70b-versatile", # βœ… Primaire β€” meilleur raisonnement
28
+ "openai/gpt-oss-20b", # βœ… Fallback 1 β€” trΓ¨s rapide
29
+ "llama-3.1-8b-instant", # βœ… Fallback 2 β€” lΓ©ger et fiable
30
+ ]
31
 
32
  SYSTEM_PROMPT = """You are a precise AI assistant solving GAIA benchmark questions.
33
 
34
+ CRITICAL RULES:
35
+ 1. Answer ONLY the exact question asked. Never drift to another question.
36
+ 2. Use tools (web_search, wikipedia_search, visit_webpage, download_task_file, calculator) whenever you are not 100% certain of the answer.
37
+ 3. Reason step by step BEFORE giving your final answer.
38
+ 4. Your FINAL answer must be:
39
+ - As SHORT as possible: a number, a name, a date, a list, etc.
40
+ - Exactly matching the format described in the question (e.g. "+4.6", "White; 5876", "3").
41
+ - NO prefix like "The answer is" or "FINAL ANSWER:" β€” just the raw answer.
42
+ 5. Never hallucinate. If unsure, search again.
43
+ 6. If the question contains reversed or encoded text, decode it first, then answer what it asks.
44
+ 7. If a file is attached, read it carefully before answering.
45
+ 8. For math questions, always use the calculator tool.
46
  """
47
 
48
  # ── Tools ──────────────────────────────────────────────────────────────────────
 
66
  query: The topic to look up on Wikipedia.
67
  """
68
  try:
69
+ wiki = WikipediaQueryRun(
70
+ api_wrapper=WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=4000)
71
+ )
72
  return wiki.run(query)
73
  except Exception as e:
74
  return f"Wikipedia error: {e}"
 
94
  @tool
95
  def download_task_file(task_id: str) -> str:
96
  """Download the file attached to a GAIA task and return its content.
97
+ Always call this when the question references a file, image, spreadsheet, or document.
98
  Args:
99
  task_id: The GAIA task ID string.
100
  """
 
104
  return "No file attached to this task."
105
  resp.raise_for_status()
106
  ct = resp.headers.get("content-type", "")
107
+
108
  if any(x in ct for x in ["text", "json", "xml", "csv"]):
109
  return resp.text[:6000]
110
+
111
  if "spreadsheet" in ct or "excel" in ct:
112
  import io
113
+ try:
114
+ df = pd.read_excel(io.BytesIO(resp.content))
115
+ return df.to_string()[:5000]
116
+ except Exception as e:
117
+ return f"Excel read error: {e}"
118
+
119
  if "pdf" in ct:
120
  try:
121
  import pdfplumber, io
 
124
  return text[:6000] or "PDF has no extractable text."
125
  except ImportError:
126
  return f"PDF received ({len(resp.content)} bytes). pdfplumber not installed."
127
+
128
  if "image" in ct:
129
+ return (
130
+ f"Image file received ({ct}, {len(resp.content)} bytes). "
131
+ "Use the context of the question to reason about this image."
132
+ )
133
+
134
+ # Dernier recours : tenter le dΓ©codage UTF-8
135
  try:
136
  return resp.content.decode("utf-8")[:6000]
137
  except UnicodeDecodeError:
138
  return f"Binary file ({ct}, {len(resp.content)} bytes)."
139
+
140
  except Exception as e:
141
  return f"File download error: {e}"
142
 
 
145
  def calculator(expression: str) -> str:
146
  """Evaluate a mathematical Python expression safely.
147
  Args:
148
+ expression: e.g. '(390/2) / (146*0.01)' or 'sqrt(144)' or '2**10'.
149
  """
150
  try:
151
  allowed = {k: v for k, v in vars(math).items() if not k.startswith("_")}
152
  allowed["__builtins__"] = {}
153
+ result = eval(expression, allowed)
154
+ return str(result)
155
  except Exception as e:
156
  return f"Calculation error: {e}"
157
 
 
165
 
166
 
167
  def make_llm(model_id: str):
168
+ """CrΓ©e un LLM Groq avec les outils liΓ©s."""
169
  return ChatGroq(
170
  model=model_id,
171
  temperature=0,
 
174
 
175
 
176
  def agent_node(state: AgentState):
177
+ """
178
+ Essaie les modèles dans l'ordre MODEL_PRIORITY.
179
+ S'arrΓͺte dΓ¨s qu'un modΓ¨le rΓ©pond sans erreur.
180
+ """
181
+ last_error = None
182
+ for model_id in MODEL_PRIORITY:
183
+ try:
184
+ print(f" [agent] Essai modèle : {model_id}")
185
+ response = make_llm(model_id).invoke(state["messages"])
186
+ return {"messages": [response]}
187
+ except Exception as e:
188
+ print(f" [agent] Modèle {model_id} échoué : {e}")
189
+ last_error = e
190
+ continue
191
+
192
+ raise RuntimeError(f"Tous les modèles Groq ont échoué. Dernière erreur : {last_error}")
193
 
194
 
195
  def should_continue(state: AgentState):
196
+ """DΓ©cide si on appelle des outils ou si on termine."""
197
  last = state["messages"][-1]
198
  if hasattr(last, "tool_calls") and last.tool_calls:
199
  return "tools"
 
214
  APP = build_graph()
215
 
216
 
217
+ # ── Agent runner ───────────────────────────────────────────────────────────────
218
+
219
  def run_agent(question: str, task_id: str) -> str:
220
+ """
221
+ ExΓ©cute l'agent sur une question GAIA.
222
+ Injecte le contenu du fichier attachΓ© uniquement s'il est rΓ©ellement utile.
223
+ """
224
  file_hint = ""
225
+ try:
226
+ raw = download_task_file.invoke({"task_id": task_id})
227
+ if (
228
+ raw
229
+ and "No file attached" not in raw
230
+ and "error" not in raw.lower()
231
+ and "Binary file" not in raw
232
+ and len(raw.strip()) > 10
233
+ ):
234
+ file_hint = f"\n\n[Attached file content]:\n{raw[:3000]}"
235
+ except Exception as e:
236
+ print(f" [run_agent] Erreur tΓ©lΓ©chargement fichier : {e}")
237
 
238
  messages = [
239
  SystemMessage(content=SYSTEM_PROMPT),
240
+ HumanMessage(content=f"Question: {question}{file_hint}"),
241
  ]
242
+
243
  try:
244
+ result = APP.invoke({"messages": messages}, config={"recursion_limit": 30})
245
+ raw_answer = result["messages"][-1].content
246
+
247
+ # Nettoyage des prΓ©fixes parasites
248
+ answer = re.sub(
249
+ r"(?i)^(final\s+answer[:\s]*|answer[:\s]*|the\s+answer\s+is[:\s]*)",
250
+ "",
251
+ str(raw_answer),
252
+ ).strip()
253
  return answer
254
+
255
  except Exception as e:
256
  return f"AGENT_ERROR: {e}"
257
 
 
265
  username = profile.username
266
  space_url = f"https://huggingface.co/spaces/{username}/Final_Assignment_Template/tree/main"
267
 
268
+ # RΓ©cupΓ©ration des questions
269
  try:
270
  resp = requests.get(f"{API_BASE}/questions", timeout=15)
271
  resp.raise_for_status()
 
278
  for i, item in enumerate(questions):
279
  task_id = item.get("task_id", "")
280
  question = item.get("question", "")
281
+ print(f"\n[{i+1}/{len(questions)}] Task: {task_id}")
282
+ print(f" Question: {question[:120]}")
283
+
284
  answer = run_agent(question, task_id)
285
+ print(f" βœ… RΓ©ponse : {answer}")
286
+
287
  answers_payload.append({"task_id": task_id, "submitted_answer": answer})
288
  results_log.append({
289
  "Task ID": task_id,
 
291
  "Submitted Answer": answer,
292
  })
293
 
294
+ # Soumission
295
+ payload = {
296
+ "username": username,
297
+ "agent_code": space_url,
298
+ "answers": answers_payload,
299
+ }
300
  try:
301
  sub = requests.post(f"{API_BASE}/submit", json=payload, timeout=60)
302
  sub.raise_for_status()
303
  r = sub.json()
304
  msg = (
305
+ f"βœ… **Score : {r.get('score', '?')}%** "
306
+ f"({r.get('correct_count', '?')}/{r.get('total_questions', '?')} correctes)\n"
307
+ f"{r.get('message', '')}"
308
  )
309
  except Exception as e:
310
  msg = f"⚠️ Agent terminΓ© mais soumission Γ©chouΓ©e : {e}"
 
312
  return msg, pd.DataFrame(results_log)
313
 
314
 
315
+ # ── Interface ──────────────────────────────────────────────────────────────────
316
+
317
  with gr.Blocks(title="GAIA Agent – LangGraph + Groq") as demo:
318
  gr.Markdown("""
319
  # πŸ€– GAIA Agent β€” LangGraph + Groq
320
+ **Modèles actifs (avril 2026)** :
321
+ `llama-3.3-70b-versatile` β†’ `openai/gpt-oss-20b` β†’ `llama-3.1-8b-instant`
322
+
323
  **Tools** : Web Search Β· Wikipedia Β· Visit Webpage Β· File Download Β· Calculator
324
 
325
  Connectez-vous avec votre compte HuggingFace puis cliquez sur **Run & Submit**.