Final_Assignment_Template

Sleeping

App Files Files Community

GilbertoEwaldFilho commited on Nov 26, 2025

Commit

f7efd53

verified ·

1 Parent(s): ee8b123

Update app.py

Browse files

Files changed (1) hide show

app.py +247 -228

app.py CHANGED Viewed

@@ -1,366 +1,377 @@
 import os
 import re
-import io
 import requests
 import pandas as pd
 import gradio as gr
 from huggingface_hub import InferenceClient
-from duckduckgo_search import DDGS
-# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# =========================================================
-#  Helper: limpeza de resposta para EXACT MATCH
-# =========================================================
 def clean_answer(text: str) -> str:
     """
-    Limpa a resposta do modelo para bater com EXACT MATCH:
     - remove quebras de linha
-    - remove 'final answer', 'answer:', etc
     - remove aspas externas
     - normaliza espaços
     """
-    if not text:
         return ""
     text = str(text).strip()
     patterns_to_remove = [
-        r"(?i)final answer[:\- ]*",
-        r"(?i)answer[:\- ]*",
-        r"(?i)the answer is[:\- ]*",
-        r"(?i)my answer is[:\- ]*",
     ]
     for p in patterns_to_remove:
         text = re.sub(p, "", text).strip()
-    text = text.replace("\n", " ").strip()
     if len(text) >= 2 and text.startswith('"') and text.endswith('"'):
         text = text[1:-1].strip()
     if len(text) >= 2 and text.startswith("'") and text.endswith("'"):
         text = text[1:-1].strip()
-    text = re.sub(r"\s+", " ", text)
-    return text.strip()
-# =========================================================
-#  Tools auxiliares (search + arquivo)
-# =========================================================
-def web_search(query: str, max_results: int = 6) -> str:
     """
-    Busca no DuckDuckGo e retorna um texto com snippets.
-    Se der erro, retorna string vazia.
     """
     try:
-        snippets = []
         with DDGS() as ddgs:
-            for r in ddgs.text(query, max_results=max_results):
                 title = r.get("title") or ""
                 body = r.get("body") or ""
                 url = r.get("href") or ""
-                snippets.append(f"Title: {title}\nSnippet: {body}\nURL: {url}")
-        return "\n\n".join(snippets)[:4000]  # corta para não estourar contexto
     except Exception as e:
-        print(f"[SEARCH ERROR] {e}")
         return ""
-def get_file_context(item: dict) -> str | None:
-    """
-    Tenta baixar e ler um arquivo associado à questão.
-    Supõe que o JSON possa ter um campo 'file_url'.
-    Se não tiver ou der erro, retorna None.
-    """
-    url = (
-        item.get("file_url")
-        or item.get("file")
-        or item.get("attachment_url")
-        or item.get("attachment")
-    )
-    if not url:
-        return None
-    print(f"Trying to download attachment for task {item.get('task_id')} from: {url}")
-    try:
-        resp = requests.get(url, timeout=20)
-        resp.raise_for_status()
-        content_type = resp.headers.get("content-type", "")
-        data = resp.content
-        # XLSX
-        if url.endswith(".xlsx") or (
-            "spreadsheetml.sheet" in content_type
-        ):
-            try:
-                df = pd.read_excel(io.BytesIO(data))
-                csv_preview = df.to_csv(index=False)
-                return csv_preview[:4000]
-            except Exception as e:
-                print(f"[FILE XLSX PARSE ERROR] {e}")
-                return None
-        # CSV / texto
-        try:
-            text = resp.text
-            return text[:4000]
-        except Exception as e:
-            print(f"[FILE TEXT PARSE ERROR] {e}")
-            return None
-    except Exception as e:
-        print(f"[FILE DOWNLOAD ERROR] {e}")
-        return None
-# =========================================================
-#  Basic Agent Definition – sem smolagents, usando só InferenceClient
-# =========================================================
-class BasicAgent:
     """
-    Agente que:
-    - usa DuckDuckGo para buscar contexto
-    - tenta ler arquivo anexo (se o JSON tiver file_url)
-    - chama Qwen via chat_completion
-    - devolve apenas a resposta final (EXACT MATCH friendly)
     """
     def __init__(self):
-        print("Initializing GAIA agent with InferenceClient + DuckDuckGo...")
         hf_token = os.getenv("HF_TOKEN")
         if not hf_token:
             raise ValueError(
-                "HF_TOKEN not found! Configure um Secret chamado HF_TOKEN em Settings → Variables."
             )
-        # Modelo conversacional (suporta chat_completion)
         self.client = InferenceClient(
-            model="Qwen/Qwen2.5-72B-Instruct",
             token=hf_token,
         )
-        self.system_instructions = (
-            "You are solving GAIA benchmark questions.\n"
-            "You may receive web search snippets and/or file contents.\n"
-            "Use them to answer accurately.\n"
-            "RULES:\n"
-            "- Answer ONLY with the final answer.\n"
-            "- No explanations, no reasoning steps, no justification.\n"
-            "- Do NOT write 'Final answer', 'Answer:', etc.\n"
-            "- If the answer is a number, output just the number.\n"
-            "- Your output will be compared using EXACT MATCH.\n"
-        )
-    def __call__(self, question: str, file_context: str | None = None) -> str:
-        print(f"\n=== NEW QUESTION ===\n{question}\n")
-        # 1) Busca na web
-        search_context = web_search(question)
-        print(f"[SEARCH LENGTH] {len(search_context)} chars")
-        # 2) Constrói contexto adicional
-        extra_parts = []
         if search_context:
-            extra_parts.append("Web search results:\n" + search_context)
-        if file_context:
-            extra_parts.append("Relevant file content:\n" + file_context)
-        extra_context = "\n\n".join(extra_parts)
-        if len(extra_context) > 6000:
-            extra_context = extra_context[:6000]
-        user_content = question
-        if extra_context:
-            user_content += (
-                "\n\nHere is some external context (web and/or file):\n"
-                + extra_context
-                + "\n\nUsing ONLY the necessary information above, "
-                  "answer the question. Remember: reply ONLY with the final answer."
             )
         else:
-            user_content += (
-                "\n\nAnswer the question using your knowledge. "
-                "Remember: reply ONLY with the final answer."
-            )
-        messages = [
-            {"role": "system", "content": self.system_instructions},
-            {"role": "user", "content": user_content},
-        ]
         try:
-            completion = self.client.chat_completion(
-                messages=messages,
-                max_tokens=96,
-                temperature=0.1,
                 top_p=0.9,
             )
-            choice = completion.choices[0]
-            msg = choice.message
-            if isinstance(msg, dict):
-                raw = msg.get("content", "")
-            else:
-                raw = getattr(msg, "content", "")
-            print("RAW MODEL OUTPUT:", repr(raw))
-            final = clean_answer(raw)
-            print("CLEANED ANSWER:", repr(final))
-            return final
-        except Exception as e:
-            print("ERROR calling InferenceClient.chat_completion:", e)
-            return ""
-# =========================================================
-#  Runner + submit (quase igual ao template original)
-# =========================================================
-def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
-    Busca todas as questões, roda o agente em cada uma,
-    submete as respostas e mostra o resultado.
     """
-    space_id = os.getenv("SPACE_ID")
     if profile:
-        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instancia o agente
     try:
-        agent = BasicAgent()
     except Exception as e:
-        print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(f"Agent code URL: {agent_code}")
-    # 2. Busca perguntas
     print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=120)
-        response.raise_for_status()
-        questions_data = response.json()
         if not questions_data:
-            print("Fetched questions list is empty.")
             return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
-    except requests.exceptions.RequestException as e:
-        print(f"Error fetching questions: {e}")
-        return f"Error fetching questions: {e}", None
-    except requests.exceptions.JSONDecodeError as e:
-        print(f"Error decoding JSON response from questions endpoint: {e}")
-        print(f"Response text: {response.text[:500]}")
-        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
-        print(f"An unexpected error occurred fetching questions: {e}")
-        return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Roda o agente
     results_log = []
     answers_payload = []
-    print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
-            print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            file_context = get_file_context(item)
-            submitted_answer = agent(question_text, file_context=file_context)
-            answers_payload.append(
-                {"task_id": task_id, "submitted_answer": submitted_answer}
-            )
-            results_log.append(
-                {
-                    "Task ID": task_id,
-                    "Question": question_text,
-                    "Submitted Answer": submitted_answer,
-                }
-            )
         except Exception as e:
-            print(f"Error running agent on task {task_id}: {e}")
-            results_log.append(
-                {
-                    "Task ID": task_id,
-                    "Question": question_text,
-                    "Submitted Answer": f"AGENT ERROR: {e}",
-                }
-            )
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Monta submissão
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
         "answers": answers_payload,
     }
-    status_update = (
         f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     )
-    print(status_update)
-    # 5. Submete
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=120)
-        response.raise_for_status()
-        result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
             f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
             error_json = e.response.json()
             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
             error_detail += f" Response: {e.response.text[:500]}"
         status_message = f"Submission Failed: {error_detail}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except requests.exceptions.RequestException as e:
         status_message = f"Submission Failed: Network error - {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
@@ -368,23 +379,28 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         return status_message, results_df
-# =========================================================
-#  Interface Gradio (igual ao template, com texto atualizado)
-# =========================================================
-with gr.Blocks() as demo:
-    gr.Markdown("# GAIA Agent Evaluation Runner (Custom Qwen + DuckDuckGo)")
     gr.Markdown(
         """
-        **How to use:**
-        1. Log in to your Hugging Face account using the button below.
-        2. Click **'Run Evaluation & Submit All Answers'**.
-        3. The agent will:
-           - fetch all questions,
-           - optionally download attached files (if any),
-           - perform web search,
-           - answer each question with ONLY the final answer (EXACT MATCH friendly),
-           - submit the answers to the scoring API.
         """
     )
@@ -393,8 +409,11 @@ with gr.Blocks() as demo:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(
-        label="Run Status / Submission Result", lines=5, interactive=False
     )
     results_table = gr.DataFrame(
         label="Questions and Agent Answers",
         wrap=True,
@@ -415,7 +434,7 @@ if __name__ == "__main__":
         print(f"✅ SPACE_HOST found: {space_host_startup}")
         print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
     else:
-        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
     if space_id_startup:
         print(f"✅ SPACE_ID found: {space_id_startup}")
@@ -424,7 +443,7 @@ if __name__ == "__main__":
             f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
         )
     else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     print("-" * (60 + len(" App Starting ")) + "\n")

 import os
 import re
 import requests
 import pandas as pd
 import gradio as gr
+from typing import Optional, List
+from ddgs import DDGS                # pip install ddgs
 from huggingface_hub import InferenceClient
+# ============================
+#   CONSTANTES DA AVALIAÇÃO
+# ============================
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# ============================
+#   FUNÇÕES AUXILIARES
+# ============================
 def clean_answer(text: str) -> str:
     """
+    Limpa a resposta do modelo para bater em EXACT MATCH:
     - remove quebras de linha
+    - remove 'final answer', 'answer:' etc
     - remove aspas externas
     - normaliza espaços
+    - remove ponto final se sobrar só isso no fim
     """
+    if text is None:
         return ""
     text = str(text).strip()
+    # Remover prefixos tipo "Final answer:", "Answer is", etc.
     patterns_to_remove = [
+        r"(?i)^final answer[:\- ]*",
+        r"(?i)^answer[:\- ]*",
+        r"(?i)^the answer is[:\- ]*",
+        r"(?i)^my answer is[:\- ]*",
+        r"(?i)^resposta[:\- ]*",
     ]
     for p in patterns_to_remove:
         text = re.sub(p, "", text).strip()
+    # remover quebras de linha
+    text = text.replace("\n", " ").replace("\r", " ").strip()
+    # aspas externas
     if len(text) >= 2 and text.startswith('"') and text.endswith('"'):
         text = text[1:-1].strip()
     if len(text) >= 2 and text.startswith("'") and text.endswith("'"):
         text = text[1:-1].strip()
+    # múltiplos espaços
+    text = re.sub(r"\s+", " ", text).strip()
+    # ponto final isolado no fim
+    if text.endswith(".") and not re.search(r"[0-9A-Za-z][.!?]$", text[:-1]):
+        text = text[:-1].strip()
+    return text
+def enforce_numeric_format(question: str, answer: str) -> str:
+    """
+    Para questões que pedem número, casas decimais, etc,
+    tenta extrair só o número principal e formatar direito.
+    """
+    q = question.lower()
+    # Se pedir duas casas decimais, ex: "two decimal places"
+    if "two decimal places" in q or "2 decimal places" in q:
+        match = re.search(r"[-+]?\d+(?:[.,]\d+)?", answer)
+        if match:
+            num = match.group(0).replace(",", "")
+            try:
+                value = float(num)
+                return f"{value:.2f}"
+            except ValueError:
+                pass
+    # Se parecer que é só um número inteiro (at bats, year, count etc.)
+    if any(
+        kw in q
+        for kw in [
+            "how many",
+            "at bats",
+            "number of",
+            "population",
+            "what year",
+            "in which year",
+        ]
+    ):
+        match = re.search(r"-?\d+", answer.replace(",", ""))
+        if match:
+            return match.group(0)
+    # senão, devolve como veio
+    return answer
+def web_search(question: str, max_results: int = 5) -> str:
     """
+    Usa DuckDuckGo (ddgs) pra buscar contexto web.
+    Retorna um texto concatenando título + snippet.
     """
+    snippets: List[str] = []
     try:
         with DDGS() as ddgs:
+            for r in ddgs.text(
+                question,
+                max_results=max_results,
+                safesearch="moderate",
+            ):
                 title = r.get("title") or ""
                 body = r.get("body") or ""
                 url = r.get("href") or ""
+                snippet = f"{title}\n{body}\nURL: {url}"
+                snippets.append(snippet)
     except Exception as e:
+        print("[WEB SEARCH ERROR]", e)
         return ""
+    if not snippets:
+        return ""
+    joined = "\n\n---\n\n".join(snippets)
+    # limitar pra não exagerar o contexto
+    return joined[:8000]
+# ============================
+#   AGENTE PRINCIPAL
+# ============================
+SYSTEM_INSTRUCTIONS = """
+You are a highly accurate AI assistant solving GAIA benchmark questions.
+You MUST provide answers suitable for EXACT MATCH evaluation.
+GENERAL RULES:
+- Think step by step, but DO NOT show your reasoning.
+- Output ONLY the final answer string.
+- Do NOT include explanations, reasoning, or extra words.
+- Do NOT write things like "Final answer:", "Answer is", etc.
+- If the answer is a number, output only the number (no units unless explicitly requested).
+- If the answer is a list, output it exactly as requested (e.g., comma-separated, alphabetical order, etc.).
+- Respect the requested formatting (e.g., two decimal places, upper/lowercase if clearly required).
+"""
+class GaiaAgent:
     """
+    Agente projetado para maximizar a taxa de acerto:
+    - usa modelo open-source via InferenceClient (rota gratuita)
+    - faz web search com ddgs em todas as questões
+    - aplica pós-processamento para números / duas casas decimais etc.
     """
     def __init__(self):
+        print("Initializing GAIA Agent...")
         hf_token = os.getenv("HF_TOKEN")
         if not hf_token:
             raise ValueError(
+                "HF_TOKEN não encontrado! "
+                "Crie um Secret chamado HF_TOKEN em Settings → Variables."
             )
+        # Modelo forte open-source (pode trocar se quiser tentar outros)
         self.client = InferenceClient(
+            model="mistralai/Mistral-7B-Instruct-v0.2",
             token=hf_token,
         )
+    def build_prompt(self, question: str, search_context: str) -> str:
+        """
+        Constrói o prompt completo para o modelo.
+        """
+        base = SYSTEM_INSTRUCTIONS.strip()
         if search_context:
+            ctx = (
+                "Here are web search results that may be relevant. "
+                "They can be noisy, so you must reason carefully and ignore incorrect info.\n\n"
+                f"{search_context}"
             )
         else:
+            ctx = "No external web search results are available for this question."
+        prompt = (
+            f"{base}\n\n"
+            f"QUESTION:\n{question}\n\n"
+            f"{ctx}\n\n"
+            "Now, based on all the above, provide ONLY the final answer.\n"
+            "Remember: no explanation, only the final answer string.\n"
+            "Answer:"
+        )
+        return prompt
+    def __call__(self, question: str) -> str:
+        print("\n" + "=" * 60)
+        print("NEW QUESTION:")
+        print(question)
+        print("=" * 60 + "\n")
+        # 1. Web search
+        search_ctx = web_search(question, max_results=5)
+        print(f"[SEARCH CONTEXT LENGTH] {len(search_ctx)} chars")
+        # 2. Montar prompt
+        prompt = self.build_prompt(question, search_ctx)
+        # 3. Chamar modelo
         try:
+            raw = self.client.text_generation(
+                prompt,
+                max_new_tokens=160,
+                temperature=0.0,
                 top_p=0.9,
+                repetition_penalty=1.05,
             )
+            print("[RAW MODEL OUTPUT]", repr(raw))
+        except Exception as e:
+            print("ERROR calling InferenceClient.text_generation:", e)
+            return ""
+        # 4. Limpeza + pós-processamento
+        answer = clean_answer(raw)
+        answer = enforce_numeric_format(question, answer)
+        print("[FINAL CLEANED ANSWER]", repr(answer))
+        return answer
+# ============================
+#   PIPELINE: RODAR E SUBMETER
+# ============================
+def run_and_submit_all(profile: Optional[gr.OAuthProfile]):
     """
+    Busca todas as questões, roda o agente, submete e mostra resultado.
     """
+    # --- usuário HF (pra leaderboard)
     if profile:
+        username = profile.username
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
+    # --- URLs da API de scoring
+    space_id = os.getenv("SPACE_ID")
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # link do código na Space (precisa estar pública)
+    if space_id:
+        agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    else:
+        agent_code = ""
+    print(f"Agent code URL: {agent_code}")
+    # 1) Instanciar agente
     try:
+        agent = GaiaAgent()
     except Exception as e:
+        print("Error instantiating agent:", e)
         return f"Error initializing agent: {e}", None
+    # 2) Buscar questões
     print(f"Fetching questions from: {questions_url}")
     try:
+        resp = requests.get(questions_url, timeout=120)
+        resp.raise_for_status()
+        questions_data = resp.json()
         if not questions_data:
+            print("Fetched questions list is empty or invalid.")
             return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except Exception as e:
+        print("Error fetching questions:", e)
+        return f"Error fetching questions: {e}", None
+    # 3) Rodar agente em cada questão
     results_log = []
     answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
+            print("Skipping item with missing task_id or question:", item)
             continue
         try:
+            submitted_answer = agent(question_text)
         except Exception as e:
+            print(f"Error running agent on task {task_id}:", e)
+            submitted_answer = ""
+        answers_payload.append(
+            {"task_id": task_id, "submitted_answer": submitted_answer}
+        )
+        results_log.append(
+            {
+                "Task ID": task_id,
+                "Question": question_text,
+                "Submitted Answer": submitted_answer,
+            }
+        )
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4) Preparar submissão
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
         "answers": answers_payload,
     }
+    print(
         f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     )
+    print(f"Submitting to: {submit_url}")
+    # 5) Submeter (sem timeout pra não cortar o servidor)
     try:
+        resp = requests.post(submit_url, json=submission_data)
+        resp.raise_for_status()
+        result_data = resp.json()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
             f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/"
+            f"{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
             error_json = e.response.json()
             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except Exception:
             error_detail += f" Response: {e.response.text[:500]}"
         status_message = f"Submission Failed: {error_detail}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.RequestException as e:
         status_message = f"Submission Failed: Network error - {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
         return status_message, results_df
+# ============================
+#   INTERFACE GRADIO
+# ============================
+with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent Evaluation Runner (improved)")
     gr.Markdown(
         """
+        **Como usar**
+        1. Faça login com sua conta Hugging Face no botão abaixo.
+        2. Certifique-se de que este Space está público e tem um Secret `HF_TOKEN`
+           com permissão de Inference.
+        3. Clique em **"Run Evaluation & Submit All Answers"**.
+        4. Aguarde o agente responder às 20 questões e enviar ao servidor de scoring.
+        **Notas**
+        - O agente usa web search (DuckDuckGo) e um modelo open-source forte
+          via InferenceClient.
+        - A saída é cuidadosamente pós-processada para tentar maximizar o
+          acerto em EXACT MATCH (números, duas casas decimais, etc.).
         """
     )
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(
+        label="Run Status / Submission Result",
+        lines=5,
+        interactive=False,
     )
     results_table = gr.DataFrame(
         label="Questions and Agent Answers",
         wrap=True,
         print(f"✅ SPACE_HOST found: {space_host_startup}")
         print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
     else:
+        print("ℹ️  SPACE_HOST not found (talvez rodando localmente).")
     if space_id_startup:
         print(f"✅ SPACE_ID found: {space_id_startup}")
             f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
         )
     else:
+        print("ℹ️  SPACE_ID not found. Repo URL cannot be determined.")
     print("-" * (60 + len(" App Starting ")) + "\n")