Final_Assignment_Template

Running

App Files Files Community

Mouhamedamar commited on 13 days ago

Commit

7b71a0f

verified ·

1 Parent(s): 1942536

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -36

app.py CHANGED Viewed

@@ -15,20 +15,34 @@ from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun
 from langchain_community.utilities import WikipediaAPIWrapper
 # ── Constants ──────────────────────────────────────────────────────────────────
-API_BASE       = "https://agents-course-unit4-scoring.hf.space"
-PRIMARY_MODEL  = "llama-3.3-70b-versatile"
-FALLBACK_MODEL = "llama-3.1-70b-versatile"   # or "gemma2-9b-it"
 SYSTEM_PROMPT = """You are a precise AI assistant solving GAIA benchmark questions.
-Rules:
-- Use tools (web_search, wikipedia_search, visit_webpage, download_task_file, calculator) as needed.
-- Reason step by step before giving your final answer.
-- Your FINAL answer must be:
-  • As short as possible: a number, a name, a date, a list, etc.
-  • Exactly matching the format described in the question (e.g. "+4.6", "White; 5876").
-  • NO prefix like "The answer is" or "FINAL ANSWER:" — just the raw answer.
-- Never guess. If unsure, search again.
 """
 # ── Tools ──────────────────────────────────────────────────────────────────────
@@ -52,7 +66,9 @@ def wikipedia_search(query: str) -> str:
         query: The topic to look up on Wikipedia.
     """
     try:
-        wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=4000))
         return wiki.run(query)
     except Exception as e:
         return f"Wikipedia error: {e}"
@@ -78,7 +94,7 @@ def visit_webpage(url: str) -> str:
 @tool
 def download_task_file(task_id: str) -> str:
     """Download the file attached to a GAIA task and return its content.
-    Always call this when the question references a file or image.
     Args:
         task_id: The GAIA task ID string.
     """
@@ -88,11 +104,18 @@ def download_task_file(task_id: str) -> str:
             return "No file attached to this task."
         resp.raise_for_status()
         ct = resp.headers.get("content-type", "")
         if any(x in ct for x in ["text", "json", "xml", "csv"]):
             return resp.text[:6000]
         if "spreadsheet" in ct or "excel" in ct:
             import io
-            return pd.read_excel(io.BytesIO(resp.content)).to_string()[:5000]
         if "pdf" in ct:
             try:
                 import pdfplumber, io
@@ -101,12 +124,19 @@ def download_task_file(task_id: str) -> str:
                 return text[:6000] or "PDF has no extractable text."
             except ImportError:
                 return f"PDF received ({len(resp.content)} bytes). pdfplumber not installed."
         if "image" in ct:
-            return f"Image file ({ct}, {len(resp.content)} bytes). Use visual reasoning."
         try:
             return resp.content.decode("utf-8")[:6000]
         except UnicodeDecodeError:
             return f"Binary file ({ct}, {len(resp.content)} bytes)."
     except Exception as e:
         return f"File download error: {e}"
@@ -115,12 +145,13 @@ def download_task_file(task_id: str) -> str:
 def calculator(expression: str) -> str:
     """Evaluate a mathematical Python expression safely.
     Args:
-        expression: e.g. '(390/2) / (146*0.01)' or 'sqrt(144)'.
     """
     try:
         allowed = {k: v for k, v in vars(math).items() if not k.startswith("_")}
         allowed["__builtins__"] = {}
-        return str(eval(expression, allowed))
     except Exception as e:
         return f"Calculation error: {e}"
@@ -134,6 +165,7 @@ class AgentState(TypedDict):
 def make_llm(model_id: str):
     return ChatGroq(
         model=model_id,
         temperature=0,
@@ -142,14 +174,26 @@ def make_llm(model_id: str):
 def agent_node(state: AgentState):
-    try:
-        response = make_llm(PRIMARY_MODEL).invoke(state["messages"])
-    except Exception:
-        response = make_llm(FALLBACK_MODEL).invoke(state["messages"])
-    return {"messages": [response]}
 def should_continue(state: AgentState):
     last = state["messages"][-1]
     if hasattr(last, "tool_calls") and last.tool_calls:
         return "tools"
@@ -170,21 +214,44 @@ def build_graph():
 APP = build_graph()
 def run_agent(question: str, task_id: str) -> str:
-    file_ctx = download_task_file.invoke({"task_id": task_id})
     file_hint = ""
-    if file_ctx and "No file attached" not in file_ctx and "error" not in file_ctx.lower():
-        file_hint = f"\n\n[Attached file for task {task_id}]:\n{file_ctx[:3000]}"
     messages = [
         SystemMessage(content=SYSTEM_PROMPT),
-        HumanMessage(content=f"Question: {question}{file_hint}\n\nTask ID: {task_id}"),
     ]
     try:
-        result = APP.invoke({"messages": messages}, config={"recursion_limit": 25})
-        answer = result["messages"][-1].content
-        answer = re.sub(r"(?i)^(final answer[:\s]*|answer[:\s]*)", "", str(answer)).strip()
         return answer
     except Exception as e:
         return f"AGENT_ERROR: {e}"
@@ -198,6 +265,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     username  = profile.username
     space_url = f"https://huggingface.co/spaces/{username}/Final_Assignment_Template/tree/main"
     try:
         resp = requests.get(f"{API_BASE}/questions", timeout=15)
         resp.raise_for_status()
@@ -210,9 +278,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     for i, item in enumerate(questions):
         task_id  = item.get("task_id", "")
         question = item.get("question", "")
-        print(f"[{i+1}/{len(questions)}] {task_id}")
         answer = run_agent(question, task_id)
-        print(f"  → {answer}")
         answers_payload.append({"task_id": task_id, "submitted_answer": answer})
         results_log.append({
             "Task ID":          task_id,
@@ -220,15 +291,20 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             "Submitted Answer": answer,
         })
-    payload = {"username": username, "agent_code": space_url, "answers": answers_payload}
     try:
         sub = requests.post(f"{API_BASE}/submit", json=payload, timeout=60)
         sub.raise_for_status()
         r = sub.json()
         msg = (
-            f"✅ **Score : {r.get('score','?')}%** "
-            f"({r.get('correct_count','?')}/{r.get('total_questions','?')} correctes)\n"
-            f"{r.get('message','')}"
         )
     except Exception as e:
         msg = f"⚠️ Agent terminé mais soumission échouée : {e}"
@@ -236,10 +312,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     return msg, pd.DataFrame(results_log)
 with gr.Blocks(title="GAIA Agent – LangGraph + Groq") as demo:
     gr.Markdown("""
     # 🤖 GAIA Agent — LangGraph + Groq
-    **Modèles** : `llama-3.3-70b-versatile` (principal) · `mixtral-8x7b-32768` (fallback)
     **Tools** : Web Search · Wikipedia · Visit Webpage · File Download · Calculator
     Connectez-vous avec votre compte HuggingFace puis cliquez sur **Run & Submit**.

 from langchain_community.utilities import WikipediaAPIWrapper
 # ── Constants ──────────────────────────────────────────────────────────────────
+API_BASE = "https://agents-course-unit4-scoring.hf.space"
+# ✅ Modèles ACTIFS sur Groq en avril 2026
+# Source : https://console.groq.com/docs/models
+# ❌ DÉCOMMISSIONNÉS (ne pas utiliser) :
+#    - mixtral-8x7b-32768
+#    - llama-3.1-70b-versatile
+#    - gemma2-9b-it
+MODEL_PRIORITY = [
+    "llama-3.3-70b-versatile",  # ✅ Primaire — meilleur raisonnement
+    "openai/gpt-oss-20b",       # ✅ Fallback 1 — très rapide
+    "llama-3.1-8b-instant",     # ✅ Fallback 2 — léger et fiable
+]
 SYSTEM_PROMPT = """You are a precise AI assistant solving GAIA benchmark questions.
+CRITICAL RULES:
+1. Answer ONLY the exact question asked. Never drift to another question.
+2. Use tools (web_search, wikipedia_search, visit_webpage, download_task_file, calculator) whenever you are not 100% certain of the answer.
+3. Reason step by step BEFORE giving your final answer.
+4. Your FINAL answer must be:
+   - As SHORT as possible: a number, a name, a date, a list, etc.
+   - Exactly matching the format described in the question (e.g. "+4.6", "White; 5876", "3").
+   - NO prefix like "The answer is" or "FINAL ANSWER:" — just the raw answer.
+5. Never hallucinate. If unsure, search again.
+6. If the question contains reversed or encoded text, decode it first, then answer what it asks.
+7. If a file is attached, read it carefully before answering.
+8. For math questions, always use the calculator tool.
 """
 # ── Tools ──────────────────────────────────────────────────────────────────────
         query: The topic to look up on Wikipedia.
     """
     try:
+        wiki = WikipediaQueryRun(
+            api_wrapper=WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=4000)
+        )
         return wiki.run(query)
     except Exception as e:
         return f"Wikipedia error: {e}"
 @tool
 def download_task_file(task_id: str) -> str:
     """Download the file attached to a GAIA task and return its content.
+    Always call this when the question references a file, image, spreadsheet, or document.
     Args:
         task_id: The GAIA task ID string.
     """
             return "No file attached to this task."
         resp.raise_for_status()
         ct = resp.headers.get("content-type", "")
         if any(x in ct for x in ["text", "json", "xml", "csv"]):
             return resp.text[:6000]
         if "spreadsheet" in ct or "excel" in ct:
             import io
+            try:
+                df = pd.read_excel(io.BytesIO(resp.content))
+                return df.to_string()[:5000]
+            except Exception as e:
+                return f"Excel read error: {e}"
         if "pdf" in ct:
             try:
                 import pdfplumber, io
                 return text[:6000] or "PDF has no extractable text."
             except ImportError:
                 return f"PDF received ({len(resp.content)} bytes). pdfplumber not installed."
         if "image" in ct:
+            return (
+                f"Image file received ({ct}, {len(resp.content)} bytes). "
+                "Use the context of the question to reason about this image."
+            )
+        # Dernier recours : tenter le décodage UTF-8
         try:
             return resp.content.decode("utf-8")[:6000]
         except UnicodeDecodeError:
             return f"Binary file ({ct}, {len(resp.content)} bytes)."
     except Exception as e:
         return f"File download error: {e}"
 def calculator(expression: str) -> str:
     """Evaluate a mathematical Python expression safely.
     Args:
+        expression: e.g. '(390/2) / (146*0.01)' or 'sqrt(144)' or '2**10'.
     """
     try:
         allowed = {k: v for k, v in vars(math).items() if not k.startswith("_")}
         allowed["__builtins__"] = {}
+        result = eval(expression, allowed)
+        return str(result)
     except Exception as e:
         return f"Calculation error: {e}"
 def make_llm(model_id: str):
+    """Crée un LLM Groq avec les outils liés."""
     return ChatGroq(
         model=model_id,
         temperature=0,
 def agent_node(state: AgentState):
+    """
+    Essaie les modèles dans l'ordre MODEL_PRIORITY.
+    S'arrête dès qu'un modèle répond sans erreur.
+    """
+    last_error = None
+    for model_id in MODEL_PRIORITY:
+        try:
+            print(f"  [agent] Essai modèle : {model_id}")
+            response = make_llm(model_id).invoke(state["messages"])
+            return {"messages": [response]}
+        except Exception as e:
+            print(f"  [agent] Modèle {model_id} échoué : {e}")
+            last_error = e
+            continue
+    raise RuntimeError(f"Tous les modèles Groq ont échoué. Dernière erreur : {last_error}")
 def should_continue(state: AgentState):
+    """Décide si on appelle des outils ou si on termine."""
     last = state["messages"][-1]
     if hasattr(last, "tool_calls") and last.tool_calls:
         return "tools"
 APP = build_graph()
+# ── Agent runner ───────────────────────────────────────────────────────────────
 def run_agent(question: str, task_id: str) -> str:
+    """
+    Exécute l'agent sur une question GAIA.
+    Injecte le contenu du fichier attaché uniquement s'il est réellement utile.
+    """
     file_hint = ""
+    try:
+        raw = download_task_file.invoke({"task_id": task_id})
+        if (
+            raw
+            and "No file attached" not in raw
+            and "error" not in raw.lower()
+            and "Binary file" not in raw
+            and len(raw.strip()) > 10
+        ):
+            file_hint = f"\n\n[Attached file content]:\n{raw[:3000]}"
+    except Exception as e:
+        print(f"  [run_agent] Erreur téléchargement fichier : {e}")
     messages = [
         SystemMessage(content=SYSTEM_PROMPT),
+        HumanMessage(content=f"Question: {question}{file_hint}"),
     ]
     try:
+        result = APP.invoke({"messages": messages}, config={"recursion_limit": 30})
+        raw_answer = result["messages"][-1].content
+        # Nettoyage des préfixes parasites
+        answer = re.sub(
+            r"(?i)^(final\s+answer[:\s]*|answer[:\s]*|the\s+answer\s+is[:\s]*)",
+            "",
+            str(raw_answer),
+        ).strip()
         return answer
     except Exception as e:
         return f"AGENT_ERROR: {e}"
     username  = profile.username
     space_url = f"https://huggingface.co/spaces/{username}/Final_Assignment_Template/tree/main"
+    # Récupération des questions
     try:
         resp = requests.get(f"{API_BASE}/questions", timeout=15)
         resp.raise_for_status()
     for i, item in enumerate(questions):
         task_id  = item.get("task_id", "")
         question = item.get("question", "")
+        print(f"\n[{i+1}/{len(questions)}] Task: {task_id}")
+        print(f"  Question: {question[:120]}")
         answer = run_agent(question, task_id)
+        print(f"  ✅ Réponse : {answer}")
         answers_payload.append({"task_id": task_id, "submitted_answer": answer})
         results_log.append({
             "Task ID":          task_id,
             "Submitted Answer": answer,
         })
+    # Soumission
+    payload = {
+        "username":   username,
+        "agent_code": space_url,
+        "answers":    answers_payload,
+    }
     try:
         sub = requests.post(f"{API_BASE}/submit", json=payload, timeout=60)
         sub.raise_for_status()
         r = sub.json()
         msg = (
+            f"✅ **Score : {r.get('score', '?')}%** "
+            f"({r.get('correct_count', '?')}/{r.get('total_questions', '?')} correctes)\n"
+            f"{r.get('message', '')}"
         )
     except Exception as e:
         msg = f"⚠️ Agent terminé mais soumission échouée : {e}"
     return msg, pd.DataFrame(results_log)
+# ── Interface ──────────────────────────────────────────────────────────────────
 with gr.Blocks(title="GAIA Agent – LangGraph + Groq") as demo:
     gr.Markdown("""
     # 🤖 GAIA Agent — LangGraph + Groq
+    **Modèles actifs (avril 2026)** :
+    `llama-3.3-70b-versatile` → `openai/gpt-oss-20b` → `llama-3.1-8b-instant`
     **Tools** : Web Search · Wikipedia · Visit Webpage · File Download · Calculator
     Connectez-vous avec votre compte HuggingFace puis cliquez sur **Run & Submit**.