Final_Assignment_Template

Sleeping

App Files Files Community

GilbertoEwaldFilho commited on Nov 26, 2025

Commit

0f0f5ed

verified ·

1 Parent(s): 49ab2c6

Update app.py

Browse files

Files changed (1) hide show

app.py +159 -71

app.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import os
 import re
 import requests
 import pandas as pd
 import gradio as gr
 from huggingface_hub import InferenceClient
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -15,12 +17,11 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # =========================================================
 def clean_answer(text: str) -> str:
     """
-    Limpa a resposta retornada pelo modelo:
     - remove quebras de linha
     - remove 'final answer', 'answer:', etc
     - remove aspas externas
     - normaliza espaços
-    NÃO apaga o conteúdo útil.
     """
     if not text:
         return ""
@@ -38,96 +39,177 @@ def clean_answer(text: str) -> str:
     text = text.replace("\n", " ").strip()
-    # aspas externas
-    if len(text) >= 2 and text[0] == text[-1] and text[0] in ['"', "'"]:
         text = text[1:-1].strip()
     text = re.sub(r"\s+", " ", text)
     return text.strip()
 # =========================================================
-#  Prompt base para o agente
 # =========================================================
-SYSTEM_PROMPT = (
-    "You are an AI agent solving GAIA-style questions.\n"
-    "You have access to a web search tool (DuckDuckGoSearchTool).\n"
-    "For each question, you MUST search the web when needed to obtain accurate, "
-    "up-to-date factual information before answering.\n"
-    "Use the search tool, read the results, reason, and then produce ONLY the final answer.\n"
-    "Do NOT output explanations, steps, reasoning, citations, links, or any extra words.\n"
-    "Do NOT output labels like 'Final answer', 'Answer:', etc.\n"
-    "If the answer is a number, output just the number. "
-    "If it is a word or short phrase, output just that.\n"
-    "Your output will be compared to the ground truth using EXACT MATCH."
-)
 # =========================================================
-#  Basic Agent Definition – usando smolagents
 # =========================================================
 class BasicAgent:
     """
-    Agente simples usando InferenceClient.chat_completion
-    para responder as questões do GAIA em modo conversacional.
     """
     def __init__(self):
-        print("Initializing Simple GAIA Agent with chat_completion...")
         hf_token = os.getenv("HF_TOKEN")
         if not hf_token:
             raise ValueError(
-                "HF_TOKEN not found! Crie um Secret chamado HF_TOKEN em Settings → Variables."
             )
-        # Modelo que sabemos ser suportado como 'conversational'
         self.client = InferenceClient(
-            model="Qwen/Qwen2.5-72B-Instruct",  # o mesmo que a infra mostrou no log
             token=hf_token,
         )
         self.system_instructions = (
             "You are solving GAIA benchmark questions.\n"
-            "Rules:\n"
             "- Answer ONLY with the final answer.\n"
-            "- No explanations, no reasoning, no extra words.\n"
             "- Do NOT write 'Final answer', 'Answer:', etc.\n"
             "- If the answer is a number, output just the number.\n"
-            "- Your output will be compared with EXACT MATCH.\n"
         )
-    def __call__(self, question: str) -> str:
         print(f"\n=== NEW QUESTION ===\n{question}\n")
         messages = [
             {"role": "system", "content": self.system_instructions},
-            {
-                "role": "user",
-                "content": (
-                    question
-                    + "\n\nRemember: reply ONLY with the final answer, nothing else."
-                ),
-            },
         ]
         try:
             completion = self.client.chat_completion(
                 messages=messages,
-                max_tokens=64,
                 temperature=0.1,
                 top_p=0.9,
             )
-            # compatível com os dois formatos (.message["content"] ou .message.content)
             choice = completion.choices[0]
-            message = choice.message
-            if isinstance(message, dict):
-                raw = message.get("content", "")
             else:
-                raw = getattr(message, "content", "")
             print("RAW MODEL OUTPUT:", repr(raw))
             final = clean_answer(raw)
@@ -138,16 +220,16 @@ class BasicAgent:
             print("ERROR calling InferenceClient.chat_completion:", e)
             return ""
 # =========================================================
-#  Runner + submit (mantido do template, usando BasicAgent novo)
 # =========================================================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
-    and displays the results.
     """
-    # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
     if profile:
         username = f"{profile.username}"
@@ -160,21 +242,20 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent
     try:
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # Link para o código do agente (Space público)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(f"Agent code URL: {agent_code}")
-    # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=60)  # timeout maior
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
@@ -192,18 +273,22 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append(
                 {"task_id": task_id, "submitted_answer": submitted_answer}
             )
@@ -228,7 +313,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
@@ -239,10 +324,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     )
     print(status_update)
-    # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
@@ -283,22 +368,23 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         return status_message, results_df
-# --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner (smolagents)")
     gr.Markdown(
         """
-        **Instructions:**
-        1.  This space uses a simple agent built with `smolagents` + `InferenceClientModel`.
-        2.  Log in to your Hugging Face account using the button below.
-        3.  Click **'Run Evaluation & Submit All Answers'** to fetch questions,
-            run the agent, submit answers, and see your score.
-        ---
-        **Notes:**
-        - The correction on the server uses EXACT MATCH, so the agent is prompted
-          to output only the final answer (sem 'FINAL ANSWER', sem explicações).
-        - This template is intentionally simples; você pode adicionar tools,
-          melhorar o prompt, etc., se quiser subir seu score.
         """
     )
@@ -319,6 +405,7 @@ with gr.Blocks() as demo:
         outputs=[status_output, results_table],
     )
 if __name__ == "__main__":
     print("\n" + "-" * 30 + " App Starting " + "-" * 30)
     space_host_startup = os.getenv("SPACE_HOST")
@@ -340,5 +427,6 @@ if __name__ == "__main__":
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     print("-" * (60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
     demo.launch(debug=True, share=False)

 import os
 import re
+import io
 import requests
 import pandas as pd
 import gradio as gr
 from huggingface_hub import InferenceClient
+from duckduckgo_search import DDGS
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # =========================================================
 def clean_answer(text: str) -> str:
     """
+    Limpa a resposta do modelo para bater com EXACT MATCH:
     - remove quebras de linha
     - remove 'final answer', 'answer:', etc
     - remove aspas externas
     - normaliza espaços
     """
     if not text:
         return ""
     text = text.replace("\n", " ").strip()
+    if len(text) >= 2 and text.startswith('"') and text.endswith('"'):
+        text = text[1:-1].strip()
+    if len(text) >= 2 and text.startswith("'") and text.endswith("'"):
         text = text[1:-1].strip()
     text = re.sub(r"\s+", " ", text)
     return text.strip()
 # =========================================================
+#  Tools auxiliares (search + arquivo)
 # =========================================================
+def web_search(query: str, max_results: int = 6) -> str:
+    """
+    Busca no DuckDuckGo e retorna um texto com snippets.
+    Se der erro, retorna string vazia.
+    """
+    try:
+        snippets = []
+        with DDGS() as ddgs:
+            for r in ddgs.text(query, max_results=max_results):
+                title = r.get("title") or ""
+                body = r.get("body") or ""
+                url = r.get("href") or ""
+                snippets.append(f"Title: {title}\nSnippet: {body}\nURL: {url}")
+        return "\n\n".join(snippets)[:4000]  # corta para não estourar contexto
+    except Exception as e:
+        print(f"[SEARCH ERROR] {e}")
+        return ""
+def get_file_context(item: dict) -> str | None:
+    """
+    Tenta baixar e ler um arquivo associado à questão.
+    Supõe que o JSON possa ter um campo 'file_url'.
+    Se não tiver ou der erro, retorna None.
+    """
+    url = (
+        item.get("file_url")
+        or item.get("file")
+        or item.get("attachment_url")
+        or item.get("attachment")
+    )
+    if not url:
+        return None
+    print(f"Trying to download attachment for task {item.get('task_id')} from: {url}")
+    try:
+        resp = requests.get(url, timeout=20)
+        resp.raise_for_status()
+        content_type = resp.headers.get("content-type", "")
+        data = resp.content
+        # XLSX
+        if url.endswith(".xlsx") or (
+            "spreadsheetml.sheet" in content_type
+        ):
+            try:
+                df = pd.read_excel(io.BytesIO(data))
+                csv_preview = df.to_csv(index=False)
+                return csv_preview[:4000]
+            except Exception as e:
+                print(f"[FILE XLSX PARSE ERROR] {e}")
+                return None
+        # CSV / texto
+        try:
+            text = resp.text
+            return text[:4000]
+        except Exception as e:
+            print(f"[FILE TEXT PARSE ERROR] {e}")
+            return None
+    except Exception as e:
+        print(f"[FILE DOWNLOAD ERROR] {e}")
+        return None
 # =========================================================
+#  Basic Agent Definition – sem smolagents, usando só InferenceClient
 # =========================================================
 class BasicAgent:
     """
+    Agente que:
+    - usa DuckDuckGo para buscar contexto
+    - tenta ler arquivo anexo (se o JSON tiver file_url)
+    - chama Qwen via chat_completion
+    - devolve apenas a resposta final (EXACT MATCH friendly)
     """
     def __init__(self):
+        print("Initializing GAIA agent with InferenceClient + DuckDuckGo...")
         hf_token = os.getenv("HF_TOKEN")
         if not hf_token:
             raise ValueError(
+                "HF_TOKEN not found! Configure um Secret chamado HF_TOKEN em Settings → Variables."
             )
+        # Modelo conversacional (suporta chat_completion)
         self.client = InferenceClient(
+            model="Qwen/Qwen2.5-72B-Instruct",
             token=hf_token,
         )
         self.system_instructions = (
             "You are solving GAIA benchmark questions.\n"
+            "You may receive web search snippets and/or file contents.\n"
+            "Use them to answer accurately.\n"
+            "RULES:\n"
             "- Answer ONLY with the final answer.\n"
+            "- No explanations, no reasoning steps, no justification.\n"
             "- Do NOT write 'Final answer', 'Answer:', etc.\n"
             "- If the answer is a number, output just the number.\n"
+            "- Your output will be compared using EXACT MATCH.\n"
         )
+    def __call__(self, question: str, file_context: str | None = None) -> str:
         print(f"\n=== NEW QUESTION ===\n{question}\n")
+        # 1) Busca na web
+        search_context = web_search(question)
+        print(f"[SEARCH LENGTH] {len(search_context)} chars")
+        # 2) Constrói contexto adicional
+        extra_parts = []
+        if search_context:
+            extra_parts.append("Web search results:\n" + search_context)
+        if file_context:
+            extra_parts.append("Relevant file content:\n" + file_context)
+        extra_context = "\n\n".join(extra_parts)
+        if len(extra_context) > 6000:
+            extra_context = extra_context[:6000]
+        user_content = question
+        if extra_context:
+            user_content += (
+                "\n\nHere is some external context (web and/or file):\n"
+                + extra_context
+                + "\n\nUsing ONLY the necessary information above, "
+                  "answer the question. Remember: reply ONLY with the final answer."
+            )
+        else:
+            user_content += (
+                "\n\nAnswer the question using your knowledge. "
+                "Remember: reply ONLY with the final answer."
+            )
         messages = [
             {"role": "system", "content": self.system_instructions},
+            {"role": "user", "content": user_content},
         ]
         try:
             completion = self.client.chat_completion(
                 messages=messages,
+                max_tokens=96,
                 temperature=0.1,
                 top_p=0.9,
             )
             choice = completion.choices[0]
+            msg = choice.message
+            if isinstance(msg, dict):
+                raw = msg.get("content", "")
             else:
+                raw = getattr(msg, "content", "")
             print("RAW MODEL OUTPUT:", repr(raw))
             final = clean_answer(raw)
             print("ERROR calling InferenceClient.chat_completion:", e)
             return ""
 # =========================================================
+#  Runner + submit (quase igual ao template original)
 # =========================================================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
+    Busca todas as questões, roda o agente em cada uma,
+    submete as respostas e mostra o resultado.
     """
+    space_id = os.getenv("SPACE_ID")
     if profile:
         username = f"{profile.username}"
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instancia o agente
     try:
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(f"Agent code URL: {agent_code}")
+    # 2. Busca perguntas
     print(f"Fetching questions from: {questions_url}")
     try:
+        response = requests.get(questions_url, timeout=120)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Roda o agente
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            file_context = get_file_context(item)
+            submitted_answer = agent(question_text, file_context=file_context)
             answers_payload.append(
                 {"task_id": task_id, "submitted_answer": submitted_answer}
             )
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Monta submissão
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
     )
     print(status_update)
+    # 5. Submete
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
         return status_message, results_df
+# =========================================================
+#  Interface Gradio (igual ao template, com texto atualizado)
+# =========================================================
 with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent Evaluation Runner (Custom Qwen + DuckDuckGo)")
     gr.Markdown(
         """
+        **How to use:**
+        1. Log in to your Hugging Face account using the button below.
+        2. Click **'Run Evaluation & Submit All Answers'**.
+        3. The agent will:
+           - fetch all questions,
+           - optionally download attached files (if any),
+           - perform web search,
+           - answer each question with ONLY the final answer (EXACT MATCH friendly),
+           - submit the answers to the scoring API.
         """
     )
         outputs=[status_output, results_table],
     )
 if __name__ == "__main__":
     print("\n" + "-" * 30 + " App Starting " + "-" * 30)
     space_host_startup = os.getenv("SPACE_HOST")
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     print("-" * (60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for GAIA Agent Evaluation...")
     demo.launch(debug=True, share=False)