AlanRocha's picture
Update app.py
26249f5 verified
Raw
History Blame Contribute Delete
6.56 kB
"""
app.py β€” GAIA Agent Evaluation Runner
Para executar no HuggingFace Spaces (ou local).
"""
import os
import gradio as gr
import requests
import pandas as pd
from agent import GAIAAgent
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
def run_and_submit_all(profile: gr.OAuthProfile | None):
"""
Busca as 20 perguntas, executa o agente em cada uma,
submete as respostas e exibe o resultado.
"""
space_id = os.getenv("SPACE_ID", "")
if profile:
username = profile.username
print(f"UsuΓ‘rio logado: {username}")
else:
return "⚠️ Faça login com o botão do Hugging Face antes de continuar.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
# ── 1. Instanciar agente ──────────────────────────────────────────────────
try:
agent = GAIAAgent()
except Exception as e:
return f"❌ Erro ao inicializar o agente: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
print(f"Agent code link: {agent_code}")
# ── 2. Buscar perguntas ───────────────────────────────────────────────────
print(f"Buscando perguntas em: {questions_url}")
try:
resp = requests.get(questions_url, timeout=15)
resp.raise_for_status()
questions_data = resp.json()
if not questions_data:
return "A lista de perguntas estΓ‘ vazia.", None
print(f"βœ… {len(questions_data)} perguntas recebidas.")
except Exception as e:
return f"❌ Erro ao buscar perguntas: {e}", None
# ── 3. Rodar o agente ─────────────────────────────────────────────────────
results_log = []
answers_payload = []
for i, item in enumerate(questions_data):
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or question_text is None:
print(f"Pulando item invΓ‘lido: {item}")
continue
print(f"\n[{i+1}/{len(questions_data)}] task_id={task_id}")
try:
answer = agent(question_text, task_id=task_id)
answers_payload.append({"task_id": task_id, "submitted_answer": answer})
results_log.append({
"Task ID": task_id,
"Question": question_text[:120],
"Submitted Answer": answer,
})
except Exception as e:
err = f"AGENT ERROR: {e}"
print(err)
answers_payload.append({"task_id": task_id, "submitted_answer": err})
results_log.append({
"Task ID": task_id,
"Question": question_text[:120],
"Submitted Answer": err,
})
if not answers_payload:
return "O agente nΓ£o produziu respostas.", pd.DataFrame(results_log)
# ── 4. Submeter respostas ─────────────────────────────────────────────────
submission = {
"username": username.strip(),
"agent_code": agent_code,
"answers": answers_payload,
}
print(f"\nEnviando {len(answers_payload)} respostas para {submit_url}...")
try:
resp = requests.post(submit_url, json=submission, timeout=120)
resp.raise_for_status()
result = resp.json()
status = (
f"βœ… SubmissΓ£o concluΓ­da!\n"
f"UsuΓ‘rio: {result.get('username')}\n"
f"PontuaΓ§Γ£o: {result.get('score', 'N/A')}% "
f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} corretas)\n"
f"Mensagem: {result.get('message', '')}"
)
print(status)
return status, pd.DataFrame(results_log)
except requests.exceptions.HTTPError as e:
detail = f"HTTP {e.response.status_code}"
try:
detail += f" β€” {e.response.json().get('detail', e.response.text)}"
except Exception:
detail += f" β€” {e.response.text[:300]}"
return f"❌ Submissão falhou: {detail}", pd.DataFrame(results_log)
except Exception as e:
return f"❌ Erro inesperado na submissão: {e}", pd.DataFrame(results_log)
# ── Interface Gradio ──────────────────────────────────────────────────────────
with gr.Blocks(title="GAIA Agent β€” Final Assignment") as demo:
gr.Markdown("# πŸ€– GAIA Agent β€” HuggingFace Agents Course Final Assignment")
gr.Markdown(
"""
**Como usar:**
1. FaΓ§a login com sua conta Hugging Face usando o botΓ£o abaixo.
2. Clique em **Rodar AvaliaΓ§Γ£o & Submeter** para executar o agente nas 20 perguntas.
3. Aguarde β€” o agente usa ferramentas (busca, Python, Wikipedia) para responder.
> ⏳ O processo pode levar alguns minutos. Cada pergunta é processada com até 25 passos de raciocínio.
"""
)
gr.LoginButton()
run_btn = gr.Button("πŸš€ Rodar AvaliaΓ§Γ£o & Submeter", variant="primary")
status_out = gr.Textbox(label="Status / Resultado da SubmissΓ£o", lines=6, interactive=False)
results_df = gr.DataFrame(label="Perguntas e Respostas do Agente", wrap=True)
run_btn.click(fn=run_and_submit_all, outputs=[status_out, results_df])
if __name__ == "__main__":
print("\n" + "─" * 50)
print("Iniciando GAIA Agent App...")
space_host = os.getenv("SPACE_HOST")
space_id = os.getenv("SPACE_ID")
if space_host:
print(f"βœ… SPACE_HOST: {space_host}")
if space_id:
print(f"βœ… SPACE_ID: {space_id}")
print(f" Repo: https://huggingface.co/spaces/{space_id}")
else:
print("ℹ️ Rodando localmente (SPACE_ID nΓ£o definido).")
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
if anthropic_key:
print("βœ… ANTHROPIC_API_KEY encontrada.")
else:
print("⚠️ ANTHROPIC_API_KEY não encontrada! Configure-a antes de rodar.")
print("─" * 50)
demo.launch(debug=True, share=False)