Final_Assignment_Template

Running

App Files Files Community

Mouhamedamar commited on 5 days ago

Commit

68fe09e

verified ·

1 Parent(s): 7b71a0f

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -14

app.py CHANGED Viewed

@@ -31,18 +31,37 @@ MODEL_PRIORITY = [
 SYSTEM_PROMPT = """You are a precise AI assistant solving GAIA benchmark questions.
 CRITICAL RULES:
-1. Answer ONLY the exact question asked. Never drift to another question.
-2. Use tools (web_search, wikipedia_search, visit_webpage, download_task_file, calculator) whenever you are not 100% certain of the answer.
-3. Reason step by step BEFORE giving your final answer.
-4. Your FINAL answer must be:
-   - As SHORT as possible: a number, a name, a date, a list, etc.
-   - Exactly matching the format described in the question (e.g. "+4.6", "White; 5876", "3").
-   - NO prefix like "The answer is" or "FINAL ANSWER:" — just the raw answer.
-5. Never hallucinate. If unsure, search again.
-6. If the question contains reversed or encoded text, decode it first, then answer what it asks.
-7. If a file is attached, read it carefully before answering.
-8. For math questions, always use the calculator tool.
 """
 # ── Tools ──────────────────────────────────────────────────────────────────────
@@ -176,27 +195,79 @@ def make_llm(model_id: str):
 def agent_node(state: AgentState):
     """
     Essaie les modèles dans l'ordre MODEL_PRIORITY.
-    S'arrête dès qu'un modèle répond sans erreur.
     """
     last_error = None
     for model_id in MODEL_PRIORITY:
         try:
             print(f"  [agent] Essai modèle : {model_id}")
-            response = make_llm(model_id).invoke(state["messages"])
             return {"messages": [response]}
         except Exception as e:
             print(f"  [agent] Modèle {model_id} échoué : {e}")
             last_error = e
             continue
     raise RuntimeError(f"Tous les modèles Groq ont échoué. Dernière erreur : {last_error}")
 def should_continue(state: AgentState):
-    """Décide si on appelle des outils ou si on termine."""
     last = state["messages"][-1]
     if hasattr(last, "tool_calls") and last.tool_calls:
         return "tools"
     return END

 SYSTEM_PROMPT = """You are a precise AI assistant solving GAIA benchmark questions.
+AVAILABLE TOOLS (USE ONLY THESE EXACT NAMES):
+- web_search
+- wikipedia_search
+- visit_webpage
+- download_task_file
+- calculator
 CRITICAL RULES:
+1. NEVER call any tool outside this list.
+2. DO NOT use brave_search or browser.search.
+3. Always use the exact tool names provided.
+4. Answer ONLY the exact question asked.
+5. Use tools whenever you are not 100% certain.
+6. Think step by step before answering.
+7. Final answer must be:
+   - SHORT
+   - EXACT format
+   - NO explanation
+8. If a file is mentioned → ALWAYS call download_task_file.
+9. If file content is provided in the question, you MUST use it.
+DO NOT ask for the file again.
+10. Never hallucinate.
+11. When using web_search, ALWAYS follow by visit_webpage on a relevant result to confirm the answer.
+12. Prefer exact facts from webpages over search snippets.
+13. When possible, verify the answer using at least two sources.
 """
 # ── Tools ──────────────────────────────────────────────────────────────────────
 def agent_node(state: AgentState):
     """
     Essaie les modèles dans l'ordre MODEL_PRIORITY.
+    Ajoute un filtre qualité pour éviter les mauvaises réponses.
+    Retry intelligent si réponse faible.
     """
     last_error = None
     for model_id in MODEL_PRIORITY:
         try:
             print(f"  [agent] Essai modèle : {model_id}")
+            llm = make_llm(model_id)
+            response = llm.invoke(state["messages"])
+            # 🔥 CONTENU
+            content = str(response.content).strip()
+            content_lower = content.lower()
+            print(f"  [agent] Réponse brute : {content[:120]}")
+            # ❌ FILTRE QUALITÉ (hyper important pour GAIA)
+            weak_patterns = [
+                "unable",
+                "not sure",
+                "i don't know",
+                "cannot find",
+                "no information",
+                "insufficient information",
+                "not available",
+                "i could not",
+                "i cannot",
+                "unknown"
+            ]
+            if (
+                not content
+                or any(p in content_lower for p in weak_patterns)
+            ):
+                print(f"  [agent] Réponse faible détectée → retry modèle suivant")
+                raise ValueError("Weak or uncertain answer")
+            # ❌ Éviter réponses trop longues (souvent mauvaises en GAIA)
+            if len(content.split()) > 50:
+                print(f"  [agent] Réponse trop longue → probablement incorrecte")
+                raise ValueError("Answer too verbose")
+            # ✅ Si OK → retourner
+            print(f"  [agent] Réponse acceptée ✅")
             return {"messages": [response]}
         except Exception as e:
             print(f"  [agent] Modèle {model_id} échoué : {e}")
             last_error = e
             continue
+    # ❌ Si tous échouent
     raise RuntimeError(f"Tous les modèles Groq ont échoué. Dernière erreur : {last_error}")
 def should_continue(state: AgentState):
     last = state["messages"][-1]
+    # Si tool call → continuer
     if hasattr(last, "tool_calls") and last.tool_calls:
         return "tools"
+    # 🔥 Si pas encore utilisé d’outil → forcer recherche
+    used_tools = any(
+        hasattr(m, "tool_calls") and m.tool_calls
+        for m in state["messages"]
+    )
+    if not used_tools:
+        return "agent"
     return END