Mouhamedamar commited on
Commit
68fe09e
·
verified ·
1 Parent(s): 7b71a0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -14
app.py CHANGED
@@ -31,18 +31,37 @@ MODEL_PRIORITY = [
31
 
32
  SYSTEM_PROMPT = """You are a precise AI assistant solving GAIA benchmark questions.
33
 
 
 
 
 
 
 
 
34
  CRITICAL RULES:
35
- 1. Answer ONLY the exact question asked. Never drift to another question.
36
- 2. Use tools (web_search, wikipedia_search, visit_webpage, download_task_file, calculator) whenever you are not 100% certain of the answer.
37
- 3. Reason step by step BEFORE giving your final answer.
38
- 4. Your FINAL answer must be:
39
- - As SHORT as possible: a number, a name, a date, a list, etc.
40
- - Exactly matching the format described in the question (e.g. "+4.6", "White; 5876", "3").
41
- - NO prefix like "The answer is" or "FINAL ANSWER:" — just the raw answer.
42
- 5. Never hallucinate. If unsure, search again.
43
- 6. If the question contains reversed or encoded text, decode it first, then answer what it asks.
44
- 7. If a file is attached, read it carefully before answering.
45
- 8. For math questions, always use the calculator tool.
 
 
 
 
 
 
 
 
 
 
 
 
46
  """
47
 
48
  # ── Tools ──────────────────────────────────────────────────────────────────────
@@ -176,27 +195,79 @@ def make_llm(model_id: str):
176
  def agent_node(state: AgentState):
177
  """
178
  Essaie les modèles dans l'ordre MODEL_PRIORITY.
179
- S'arrête dès qu'un modèle répond sans erreur.
 
180
  """
181
  last_error = None
 
182
  for model_id in MODEL_PRIORITY:
183
  try:
184
  print(f" [agent] Essai modèle : {model_id}")
185
- response = make_llm(model_id).invoke(state["messages"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  return {"messages": [response]}
 
187
  except Exception as e:
188
  print(f" [agent] Modèle {model_id} échoué : {e}")
189
  last_error = e
190
  continue
191
 
 
192
  raise RuntimeError(f"Tous les modèles Groq ont échoué. Dernière erreur : {last_error}")
193
 
194
 
195
  def should_continue(state: AgentState):
196
- """Décide si on appelle des outils ou si on termine."""
197
  last = state["messages"][-1]
 
 
198
  if hasattr(last, "tool_calls") and last.tool_calls:
199
  return "tools"
 
 
 
 
 
 
 
 
 
 
200
  return END
201
 
202
 
 
31
 
32
  SYSTEM_PROMPT = """You are a precise AI assistant solving GAIA benchmark questions.
33
 
34
+ AVAILABLE TOOLS (USE ONLY THESE EXACT NAMES):
35
+ - web_search
36
+ - wikipedia_search
37
+ - visit_webpage
38
+ - download_task_file
39
+ - calculator
40
+
41
  CRITICAL RULES:
42
+ 1. NEVER call any tool outside this list.
43
+ 2. DO NOT use brave_search or browser.search.
44
+ 3. Always use the exact tool names provided.
45
+
46
+ 4. Answer ONLY the exact question asked.
47
+ 5. Use tools whenever you are not 100% certain.
48
+ 6. Think step by step before answering.
49
+
50
+ 7. Final answer must be:
51
+ - SHORT
52
+ - EXACT format
53
+ - NO explanation
54
+
55
+ 8. If a file is mentioned → ALWAYS call download_task_file.
56
+ 9. If file content is provided in the question, you MUST use it.
57
+ DO NOT ask for the file again.
58
+
59
+ 10. Never hallucinate.
60
+ 11. When using web_search, ALWAYS follow by visit_webpage on a relevant result to confirm the answer.
61
+
62
+ 12. Prefer exact facts from webpages over search snippets.
63
+
64
+ 13. When possible, verify the answer using at least two sources.
65
  """
66
 
67
  # ── Tools ──────────────────────────────────────────────────────────────────────
 
195
  def agent_node(state: AgentState):
196
  """
197
  Essaie les modèles dans l'ordre MODEL_PRIORITY.
198
+ Ajoute un filtre qualité pour éviter les mauvaises réponses.
199
+ Retry intelligent si réponse faible.
200
  """
201
  last_error = None
202
+
203
  for model_id in MODEL_PRIORITY:
204
  try:
205
  print(f" [agent] Essai modèle : {model_id}")
206
+
207
+ llm = make_llm(model_id)
208
+ response = llm.invoke(state["messages"])
209
+
210
+ # 🔥 CONTENU
211
+ content = str(response.content).strip()
212
+ content_lower = content.lower()
213
+
214
+ print(f" [agent] Réponse brute : {content[:120]}")
215
+
216
+ # ❌ FILTRE QUALITÉ (hyper important pour GAIA)
217
+ weak_patterns = [
218
+ "unable",
219
+ "not sure",
220
+ "i don't know",
221
+ "cannot find",
222
+ "no information",
223
+ "insufficient information",
224
+ "not available",
225
+ "i could not",
226
+ "i cannot",
227
+ "unknown"
228
+ ]
229
+
230
+ if (
231
+ not content
232
+ or any(p in content_lower for p in weak_patterns)
233
+ ):
234
+ print(f" [agent] Réponse faible détectée → retry modèle suivant")
235
+ raise ValueError("Weak or uncertain answer")
236
+
237
+ # ❌ Éviter réponses trop longues (souvent mauvaises en GAIA)
238
+ if len(content.split()) > 50:
239
+ print(f" [agent] Réponse trop longue → probablement incorrecte")
240
+ raise ValueError("Answer too verbose")
241
+
242
+ # ✅ Si OK → retourner
243
+ print(f" [agent] Réponse acceptée ✅")
244
  return {"messages": [response]}
245
+
246
  except Exception as e:
247
  print(f" [agent] Modèle {model_id} échoué : {e}")
248
  last_error = e
249
  continue
250
 
251
+ # ❌ Si tous échouent
252
  raise RuntimeError(f"Tous les modèles Groq ont échoué. Dernière erreur : {last_error}")
253
 
254
 
255
  def should_continue(state: AgentState):
 
256
  last = state["messages"][-1]
257
+
258
+ # Si tool call → continuer
259
  if hasattr(last, "tool_calls") and last.tool_calls:
260
  return "tools"
261
+
262
+ # 🔥 Si pas encore utilisé d’outil → forcer recherche
263
+ used_tools = any(
264
+ hasattr(m, "tool_calls") and m.tool_calls
265
+ for m in state["messages"]
266
+ )
267
+
268
+ if not used_tools:
269
+ return "agent"
270
+
271
  return END
272
 
273