Final_Assignment_Template

Sleeping

App Files Files Community

GilbertoEwaldFilho commited on Nov 26, 2025

Commit

b6c0776

verified ·

1 Parent(s): 65d648f

Update app.py

Browse files

Files changed (1) hide show

app.py +154 -302

app.py CHANGED Viewed

@@ -1,128 +1,82 @@
 import os
 import re
 import requests
 import pandas as pd
 import gradio as gr
 from typing import Optional, List
-from ddgs import DDGS                # pip install ddgs
 from huggingface_hub import InferenceClient
-# ============================
 #   CONSTANTES DA AVALIAÇÃO
-# ============================
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# ============================
 #   FUNÇÕES AUXILIARES
-# ============================
 def clean_answer(text: str) -> str:
-    """
-    Limpa a resposta do modelo para bater em EXACT MATCH:
-    - remove quebras de linha
-    - remove 'final answer', 'answer:' etc
-    - remove aspas externas
-    - normaliza espaços
-    - remove ponto final se sobrar só isso no fim
-    """
-    if text is None:
         return ""
     text = str(text).strip()
-    # Remover prefixos tipo "Final answer:", "Answer is", etc.
     patterns_to_remove = [
         r"(?i)^final answer[:\- ]*",
         r"(?i)^answer[:\- ]*",
         r"(?i)^the answer is[:\- ]*",
         r"(?i)^my answer is[:\- ]*",
-        r"(?i)^resposta[:\- ]*",
     ]
     for p in patterns_to_remove:
         text = re.sub(p, "", text).strip()
-    # remover quebras de linha
     text = text.replace("\n", " ").replace("\r", " ").strip()
-    # aspas externas
-    if len(text) >= 2 and text.startswith('"') and text.endswith('"'):
-        text = text[1:-1].strip()
-    if len(text) >= 2 and text.startswith("'") and text.endswith("'"):
-        text = text[1:-1].strip()
-    # múltiplos espaços
     text = re.sub(r"\s+", " ", text).strip()
-    # ponto final isolado no fim
     if text.endswith(".") and not re.search(r"[0-9A-Za-z][.!?]$", text[:-1]):
-        text = text[:-1].strip()
-    return text
 def enforce_numeric_format(question: str, answer: str) -> str:
-    """
-    Para questões que pedem número, casas decimais, etc,
-    tenta extrair só o número principal e formatar direito.
-    """
     q = question.lower()
-    # Se pedir duas casas decimais, ex: "two decimal places"
     if "two decimal places" in q or "2 decimal places" in q:
         match = re.search(r"[-+]?\d+(?:[.,]\d+)?", answer)
         if match:
-            num = match.group(0).replace(",", "")
             try:
-                value = float(num)
                 return f"{value:.2f}"
-            except ValueError:
                 pass
-    # Se parecer que é só um número inteiro (at bats, year, count etc.)
-    if any(
-        kw in q
-        for kw in [
-            "how many",
-            "at bats",
-            "number of",
-            "population",
-            "what year",
-            "in which year",
-        ]
-    ):
         match = re.search(r"-?\d+", answer.replace(",", ""))
         if match:
             return match.group(0)
-    # senão, devolve como veio
     return answer
 def web_search(question: str, max_results: int = 5) -> str:
-    """
-    Usa DuckDuckGo (ddgs) pra buscar contexto web.
-    Retorna um texto concatenando título + snippet.
-    """
-    snippets: List[str] = []
     try:
         with DDGS() as ddgs:
-            for r in ddgs.text(
-                question,
-                max_results=max_results,
-                safesearch="moderate",
-            ):
-                title = r.get("title") or ""
-                body = r.get("body") or ""
-                url = r.get("href") or ""
-                snippet = f"{title}\n{body}\nURL: {url}"
-                snippets.append(snippet)
     except Exception as e:
         print("[WEB SEARCH ERROR]", e)
         return ""
@@ -130,322 +84,220 @@ def web_search(question: str, max_results: int = 5) -> str:
     if not snippets:
         return ""
-    joined = "\n\n---\n\n".join(snippets)
-    # limitar pra não exagerar o contexto
-    return joined[:8000]
-# ============================
-#   AGENTE PRINCIPAL
-# ============================
 SYSTEM_INSTRUCTIONS = """
-You are a highly accurate AI assistant solving GAIA benchmark questions.
-You MUST provide answers suitable for EXACT MATCH evaluation.
-GENERAL RULES:
-- Think step by step, but DO NOT show your reasoning.
-- Output ONLY the final answer string.
-- Do NOT include explanations, reasoning, or extra words.
-- Do NOT write things like "Final answer:", "Answer is", etc.
-- If the answer is a number, output only the number (no units unless explicitly requested).
-- If the answer is a list, output it exactly as requested (e.g., comma-separated, alphabetical order, etc.).
-- Respect the requested formatting (e.g., two decimal places, upper/lowercase if clearly required).
 """
 class GaiaAgent:
-    """
-    Agente projetado para maximizar a taxa de acerto:
-    - usa modelo open-source via InferenceClient (rota gratuita)
-    - faz web search com ddgs em todas as questões
-    - aplica pós-processamento para números / duas casas decimais etc.
-    """
     def __init__(self):
-        print("Initializing GAIA Agent...")
-        hf_token = os.getenv("HF_TOKEN")
-        if not hf_token:
-            raise ValueError(
-                "HF_TOKEN não encontrado! "
-                "Crie um Secret chamado HF_TOKEN em Settings → Variables."
-            )
-        # Modelo forte open-source (pode trocar se quiser tentar outros)
         self.client = InferenceClient(
-            model="mistralai/Mistral-7B-Instruct-v0.2",
-            token=hf_token,
         )
-    def build_prompt(self, question: str, search_context: str) -> str:
-        """
-        Constrói o prompt completo para o modelo.
-        """
-        base = SYSTEM_INSTRUCTIONS.strip()
-        if search_context:
-            ctx = (
-                "Here are web search results that may be relevant. "
-                "They can be noisy, so you must reason carefully and ignore incorrect info.\n\n"
-                f"{search_context}"
-            )
-        else:
-            ctx = "No external web search results are available for this question."
-        prompt = (
-            f"{base}\n\n"
             f"QUESTION:\n{question}\n\n"
-            f"{ctx}\n\n"
-            "Now, based on all the above, provide ONLY the final answer.\n"
-            "Remember: no explanation, only the final answer string.\n"
-            "Answer:"
         )
-        return prompt
-    def __call__(self, question: str) -> str:
-        print("\n" + "=" * 60)
         print("NEW QUESTION:")
         print(question)
-        print("=" * 60 + "\n")
-        # 1. Web search
-        search_ctx = web_search(question, max_results=5)
-        print(f"[SEARCH CONTEXT LENGTH] {len(search_ctx)} chars")
-        # 2. Montar prompt
-        prompt = self.build_prompt(question, search_ctx)
-        # 3. Chamar modelo
         try:
-            raw = self.client.text_generation(
-                prompt,
-                max_new_tokens=160,
                 temperature=0.0,
-                top_p=0.9,
-                repetition_penalty=1.05,
             )
-            print("[RAW MODEL OUTPUT]", repr(raw))
         except Exception as e:
-            print("ERROR calling InferenceClient.text_generation:", e)
             return ""
-        # 4. Limpeza + pós-processamento
         answer = clean_answer(raw)
         answer = enforce_numeric_format(question, answer)
-        print("[FINAL CLEANED ANSWER]", repr(answer))
         return answer
-# ============================
-#   PIPELINE: RODAR E SUBMETER
-# ============================
 def run_and_submit_all(profile: Optional[gr.OAuthProfile]):
-    """
-    Busca todas as questões, roda o agente, submete e mostra resultado.
-    """
-    # --- usuário HF (pra leaderboard)
-    if profile:
-        username = profile.username
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
-    # --- URLs da API de scoring
-    space_id = os.getenv("SPACE_ID")
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # link do código na Space (precisa estar pública)
-    if space_id:
-        agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    else:
-        agent_code = ""
     print(f"Agent code URL: {agent_code}")
-    # 1) Instanciar agente
     try:
         agent = GaiaAgent()
     except Exception as e:
-        print("Error instantiating agent:", e)
         return f"Error initializing agent: {e}", None
-    # 2) Buscar questões
-    print(f"Fetching questions from: {questions_url}")
     try:
         resp = requests.get(questions_url, timeout=120)
         resp.raise_for_status()
-        questions_data = resp.json()
-        if not questions_data:
-            print("Fetched questions list is empty or invalid.")
-            return "Fetched questions list is empty or invalid format.", None
-        print(f"Fetched {len(questions_data)} questions.")
     except Exception as e:
-        print("Error fetching questions:", e)
         return f"Error fetching questions: {e}", None
-    # 3) Rodar agente em cada questão
-    results_log = []
     answers_payload = []
-    print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
-        task_id = item.get("task_id")
-        question_text = item.get("question")
-        if not task_id or question_text is None:
-            print("Skipping item with missing task_id or question:", item)
-            continue
-        try:
-            submitted_answer = agent(question_text)
-        except Exception as e:
-            print(f"Error running agent on task {task_id}:", e)
-            submitted_answer = ""
-        answers_payload.append(
-            {"task_id": task_id, "submitted_answer": submitted_answer}
-        )
-        results_log.append(
-            {
-                "Task ID": task_id,
-                "Question": question_text,
-                "Submitted Answer": submitted_answer,
-            }
-        )
-    if not answers_payload:
-        print("Agent did not produce any answers to submit.")
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4) Preparar submissão
-    submission_data = {
-        "username": username.strip(),
         "agent_code": agent_code,
         "answers": answers_payload,
     }
-    print(
-        f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    )
-    print(f"Submitting to: {submit_url}")
-    # 5) Submeter (sem timeout pra não cortar o servidor)
     try:
-        resp = requests.post(submit_url, json=submission_data)
         resp.raise_for_status()
-        result_data = resp.json()
-        final_status = (
             f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/"
-            f"{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
         )
-        print("Submission successful.")
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        error_detail = f"Server responded with status {e.response.status_code}."
-        try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except Exception:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except Exception as e:
-        status_message = f"An unexpected error occurred during submission: {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-# ============================
 #   INTERFACE GRADIO
-# ============================
 with gr.Blocks() as demo:
-    gr.Markdown("# GAIA Agent Evaluation Runner (improved)")
-    gr.Markdown(
-        """
-        **Como usar**
-        1. Faça login com sua conta Hugging Face no botão abaixo.
-        2. Certifique-se de que este Space está público e tem um Secret `HF_TOKEN`
-           com permissão de Inference.
-        3. Clique em **"Run Evaluation & Submit All Answers"**.
-        4. Aguarde o agente responder às 20 questões e enviar ao servidor de scoring.
-        **Notas**
-        - O agente usa web search (DuckDuckGo) e um modelo open-source forte
-          via InferenceClient.
-        - A saída é cuidadosamente pós-processada para tentar maximizar o
-          acerto em EXACT MATCH (números, duas casas decimais, etc.).
-        """
-    )
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(
-        label="Run Status / Submission Result",
-        lines=5,
-        interactive=False,
-    )
-    results_table = gr.DataFrame(
-        label="Questions and Agent Answers",
-        wrap=True,
-    )
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table],
-    )
 if __name__ == "__main__":
-    print("\n" + "-" * 30 + " App Starting " + "-" * 30)
-    space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID")
-    if space_host_startup:
-        print(f"✅ SPACE_HOST found: {space_host_startup}")
-        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
-    else:
-        print("ℹ️  SPACE_HOST not found (talvez rodando localmente).")
-    if space_id_startup:
-        print(f"✅ SPACE_ID found: {space_id_startup}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(
-            f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
-        )
-    else:
-        print("ℹ️  SPACE_ID not found. Repo URL cannot be determined.")
-    print("-" * (60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for GAIA Agent Evaluation...")
     demo.launch(debug=True, share=False)

 import os
 import re
+import io
 import requests
 import pandas as pd
 import gradio as gr
 from typing import Optional, List
+from ddgs import DDGS
 from huggingface_hub import InferenceClient
+# ================================
 #   CONSTANTES DA AVALIAÇÃO
+# ================================
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# ================================
 #   FUNÇÕES AUXILIARES
+# ================================
 def clean_answer(text: str) -> str:
+    if not text:
         return ""
     text = str(text).strip()
     patterns_to_remove = [
         r"(?i)^final answer[:\- ]*",
         r"(?i)^answer[:\- ]*",
         r"(?i)^the answer is[:\- ]*",
         r"(?i)^my answer is[:\- ]*",
     ]
     for p in patterns_to_remove:
         text = re.sub(p, "", text).strip()
     text = text.replace("\n", " ").replace("\r", " ").strip()
     text = re.sub(r"\s+", " ", text).strip()
+    if len(text) > 2 and text.startswith(("'", '"')) and text.endswith(("'", '"')):
+        text = text[1:-1]
     if text.endswith(".") and not re.search(r"[0-9A-Za-z][.!?]$", text[:-1]):
+        text = text[:-1]
+    return text.strip()
 def enforce_numeric_format(question: str, answer: str) -> str:
     q = question.lower()
     if "two decimal places" in q or "2 decimal places" in q:
         match = re.search(r"[-+]?\d+(?:[.,]\d+)?", answer)
         if match:
             try:
+                value = float(match.group(0).replace(",", ""))
                 return f"{value:.2f}"
+            except:
                 pass
+    if any(kw in q for kw in ["how many", "number of", "what year", "in which year"]):
         match = re.search(r"-?\d+", answer.replace(",", ""))
         if match:
             return match.group(0)
     return answer
 def web_search(question: str, max_results: int = 5) -> str:
+    snippets = []
     try:
         with DDGS() as ddgs:
+            for r in ddgs.text(question, max_results=max_results, safesearch="moderate"):
+                title = r.get("title", "")
+                body = r.get("body", "")
+                url = r.get("href", "")
+                snippets.append(f"{title}\n{body}\nURL: {url}")
     except Exception as e:
         print("[WEB SEARCH ERROR]", e)
         return ""
     if not snippets:
         return ""
+    return ("\n\n---\n\n".join(snippets))[:8000]
+def get_file_context(api_url: str, task_id: str, item: dict) -> str:
+    file_name = (
+        item.get("file_name")
+        or item.get("filename")
+        or item.get("file")
+        or ""
+    )
+    has_file_flag = item.get("has_file")
+    has_file = bool(file_name) or bool(has_file_flag)
+    if not has_file:
+        return ""
+    file_url = f"{api_url}/files/{task_id}"
+    print(f"[FILE DOWNLOAD] {file_url}")
+    try:
+        resp = requests.get(file_url, timeout=60)
+        resp.raise_for_status()
+        data = resp.content
+        content_type = (resp.headers.get("content-type") or "").lower()
+        name_lower = file_name.lower()
+        # TXT / CSV
+        if any(name_lower.endswith(ext) for ext in [".txt", ".csv", ".tsv"]):
+            try:
+                text = data.decode("utf-8", errors="replace")
+            except:
+                text = data.decode("latin-1", errors="replace")
+            return f"[FILE TXT]\n{text[:8000]}"
+        # XLS / XLSX
+        if any(name_lower.endswith(ext) for ext in [".xlsx", ".xls", ".xlsm"]):
+            try:
+                df = pd.read_excel(io.BytesIO(data))
+                csv_text = df.to_csv(index=False)
+                return f"[FILE TABLE CSV]\n{csv_text[:8000]}"
+            except Exception as e:
+                print("[EXCEL PARSE ERROR]", e)
+                return "[FILE] Spreadsheet exists but cannot parse."
+        return f"[FILE BINARY: {file_name}] {len(data)} bytes"
+    except Exception as e:
+        print("[FILE ERROR]", e)
+        return ""
+# ================================
+#   SISTEMA DE INSTRUÇÕES
+# ================================
 SYSTEM_INSTRUCTIONS = """
+You are a highly accurate GAIA benchmark agent.
+Always output ONLY the final answer (EXACT MATCH).
+No explanations. No reasoning. No extra words.
+Rules:
+- If the answer is a number → only the number.
+- If format requires 2 decimal places → enforce it.
+- If a list is required → output in exact requested form.
 """
+# ================================
+#   AGENTE PRINCIPAL
+# ================================
 class GaiaAgent:
     def __init__(self):
+        print("Initializing GAIA Agent with Qwen 80B...")
+        token = os.getenv("HF_TOKEN")
+        if not token:
+            raise ValueError("Missing HF_TOKEN in Space secrets.")
         self.client = InferenceClient(
+            model="Qwen/Qwen3-Next-80B-A3B-Thinking",
+            token=token,
         )
+    def build_prompt(self, question, search_ctx, file_ctx):
+        return (
+            f"{SYSTEM_INSTRUCTIONS}\n\n"
             f"QUESTION:\n{question}\n\n"
+            f"FILE CONTEXT:\n{file_ctx or 'No file provided.'}\n\n"
+            f"WEB SEARCH CONTEXT:\n{search_ctx or 'No search results.'}\n\n"
+            "Now output ONLY the final answer:\n"
         )
+    def __call__(self, question: str, file_context: str = "") -> str:
+        print("\n====================================================")
         print("NEW QUESTION:")
         print(question)
+        print("====================================================\n")
+        search_ctx = web_search(question)
+        print(f"[SEARCH LEN] {len(search_ctx)} | [FILE LEN] {len(file_context)}")
+        prompt = self.build_prompt(question, search_ctx, file_context)
         try:
+            response = self.client.chat_completion(
+                messages=[
+                    {"role": "system", "content": SYSTEM_INSTRUCTIONS},
+                    {"role": "user", "content": prompt},
+                ],
+                max_tokens=200,
                 temperature=0.0,
             )
+            raw = response.choices[0].message["content"]
+            print("[RAW OUTPUT]", raw)
         except Exception as e:
+            print("ERROR calling chat_completion:", e)
             return ""
         answer = clean_answer(raw)
         answer = enforce_numeric_format(question, answer)
+        print("[FINAL ANSWER]", answer)
         return answer
+# ================================
+#   PIPELINE DE EXECUÇÃO
+# ================================
 def run_and_submit_all(profile: Optional[gr.OAuthProfile]):
+    if not profile:
+        return "Please log in first.", None
+    username = profile.username
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    space_id = os.getenv("SPACE_ID")
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(f"User logged in: {username}")
     print(f"Agent code URL: {agent_code}")
     try:
         agent = GaiaAgent()
     except Exception as e:
         return f"Error initializing agent: {e}", None
+    print("Fetching questions...")
     try:
         resp = requests.get(questions_url, timeout=120)
         resp.raise_for_status()
+        questions = resp.json()
     except Exception as e:
         return f"Error fetching questions: {e}", None
+    print(f"Fetched {len(questions)} questions.")
     answers_payload = []
+    results_log = []
+    for item in questions:
+        qid = item["task_id"]
+        qtext = item["question"]
+        file_context = get_file_context(api_url, qid, item)
+        answer = agent(qtext, file_context)
+        answers_payload.append({"task_id": qid, "submitted_answer": answer})
+        results_log.append({"Task ID": qid, "Question": qtext, "Submitted Answer": answer})
+    submission = {
+        "username": username,
         "agent_code": agent_code,
         "answers": answers_payload,
     }
+    print("Submitting answers...")
     try:
+        resp = requests.post(submit_url, json=submission)
         resp.raise_for_status()
+        result = resp.json()
+        status = (
             f"Submission Successful!\n"
+            f"Score: {result.get('score')}% "
+            f"({result.get('correct_count')}/{result.get('total_attempted')})\n"
+            f"{result.get('message')}"
         )
+        return status, pd.DataFrame(results_log)
     except Exception as e:
+        return f"Submission failed: {e}", pd.DataFrame(results_log)
+# ================================
 #   INTERFACE GRADIO
+# ================================
 with gr.Blocks() as demo:
+    gr.Markdown("## GAIA Agent Runner – Qwen 80B Enhanced Version")
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
+    out_status = gr.Textbox(label="Status", lines=4)
+    out_table = gr.DataFrame(label="Answers")
+    run_button.click(run_and_submit_all, outputs=[out_status, out_table])
 if __name__ == "__main__":
     demo.launch(debug=True, share=False)