File size: 6,563 Bytes
6e2cae4 10e9b7d eccf8e4 3c4371f 6e2cae4 3db6293 6e2cae4 31243f4 6e2cae4 31243f4 6e2cae4 7e4a06b 6e2cae4 7e4a06b 6e2cae4 31243f4 6e2cae4 31243f4 6e2cae4 31243f4 6e2cae4 36ed51a 6e2cae4 eccf8e4 6e2cae4 31243f4 6e2cae4 7d65c66 6e2cae4 7d65c66 6e2cae4 31243f4 6e2cae4 31243f4 6e2cae4 31243f4 26249f5 6e2cae4 31243f4 6e2cae4 31243f4 6e2cae4 e80aab9 6e2cae4 e80aab9 6e2cae4 e80aab9 6e2cae4 e80aab9 6e2cae4 7d65c66 6e2cae4 0ee0419 e514fd7 6e2cae4 e514fd7 e80aab9 6e2cae4 7e4a06b 6e2cae4 e80aab9 6e2cae4 3c4371f 6e2cae4 7d65c66 6e2cae4 3c4371f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | """
app.py β GAIA Agent Evaluation Runner
Para executar no HuggingFace Spaces (ou local).
"""
import os
import gradio as gr
import requests
import pandas as pd
from agent import GAIAAgent
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
def run_and_submit_all(profile: gr.OAuthProfile | None):
"""
Busca as 20 perguntas, executa o agente em cada uma,
submete as respostas e exibe o resultado.
"""
space_id = os.getenv("SPACE_ID", "")
if profile:
username = profile.username
print(f"UsuΓ‘rio logado: {username}")
else:
return "β οΈ FaΓ§a login com o botΓ£o do Hugging Face antes de continuar.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
# ββ 1. Instanciar agente ββββββββββββββββββββββββββββββββββββββββββββββββββ
try:
agent = GAIAAgent()
except Exception as e:
return f"β Erro ao inicializar o agente: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
print(f"Agent code link: {agent_code}")
# ββ 2. Buscar perguntas βββββββββββββββββββββββββββββββββββββββββββββββββββ
print(f"Buscando perguntas em: {questions_url}")
try:
resp = requests.get(questions_url, timeout=15)
resp.raise_for_status()
questions_data = resp.json()
if not questions_data:
return "A lista de perguntas estΓ‘ vazia.", None
print(f"β
{len(questions_data)} perguntas recebidas.")
except Exception as e:
return f"β Erro ao buscar perguntas: {e}", None
# ββ 3. Rodar o agente βββββββββββββββββββββββββββββββββββββββββββββββββββββ
results_log = []
answers_payload = []
for i, item in enumerate(questions_data):
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or question_text is None:
print(f"Pulando item invΓ‘lido: {item}")
continue
print(f"\n[{i+1}/{len(questions_data)}] task_id={task_id}")
try:
answer = agent(question_text, task_id=task_id)
answers_payload.append({"task_id": task_id, "submitted_answer": answer})
results_log.append({
"Task ID": task_id,
"Question": question_text[:120],
"Submitted Answer": answer,
})
except Exception as e:
err = f"AGENT ERROR: {e}"
print(err)
answers_payload.append({"task_id": task_id, "submitted_answer": err})
results_log.append({
"Task ID": task_id,
"Question": question_text[:120],
"Submitted Answer": err,
})
if not answers_payload:
return "O agente nΓ£o produziu respostas.", pd.DataFrame(results_log)
# ββ 4. Submeter respostas βββββββββββββββββββββββββββββββββββββββββββββββββ
submission = {
"username": username.strip(),
"agent_code": agent_code,
"answers": answers_payload,
}
print(f"\nEnviando {len(answers_payload)} respostas para {submit_url}...")
try:
resp = requests.post(submit_url, json=submission, timeout=120)
resp.raise_for_status()
result = resp.json()
status = (
f"β
SubmissΓ£o concluΓda!\n"
f"UsuΓ‘rio: {result.get('username')}\n"
f"PontuaΓ§Γ£o: {result.get('score', 'N/A')}% "
f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} corretas)\n"
f"Mensagem: {result.get('message', '')}"
)
print(status)
return status, pd.DataFrame(results_log)
except requests.exceptions.HTTPError as e:
detail = f"HTTP {e.response.status_code}"
try:
detail += f" β {e.response.json().get('detail', e.response.text)}"
except Exception:
detail += f" β {e.response.text[:300]}"
return f"β SubmissΓ£o falhou: {detail}", pd.DataFrame(results_log)
except Exception as e:
return f"β Erro inesperado na submissΓ£o: {e}", pd.DataFrame(results_log)
# ββ Interface Gradio ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks(title="GAIA Agent β Final Assignment") as demo:
gr.Markdown("# π€ GAIA Agent β HuggingFace Agents Course Final Assignment")
gr.Markdown(
"""
**Como usar:**
1. FaΓ§a login com sua conta Hugging Face usando o botΓ£o abaixo.
2. Clique em **Rodar AvaliaΓ§Γ£o & Submeter** para executar o agente nas 20 perguntas.
3. Aguarde β o agente usa ferramentas (busca, Python, Wikipedia) para responder.
> β³ O processo pode levar alguns minutos. Cada pergunta Γ© processada com atΓ© 25 passos de raciocΓnio.
"""
)
gr.LoginButton()
run_btn = gr.Button("π Rodar AvaliaΓ§Γ£o & Submeter", variant="primary")
status_out = gr.Textbox(label="Status / Resultado da SubmissΓ£o", lines=6, interactive=False)
results_df = gr.DataFrame(label="Perguntas e Respostas do Agente", wrap=True)
run_btn.click(fn=run_and_submit_all, outputs=[status_out, results_df])
if __name__ == "__main__":
print("\n" + "β" * 50)
print("Iniciando GAIA Agent App...")
space_host = os.getenv("SPACE_HOST")
space_id = os.getenv("SPACE_ID")
if space_host:
print(f"β
SPACE_HOST: {space_host}")
if space_id:
print(f"β
SPACE_ID: {space_id}")
print(f" Repo: https://huggingface.co/spaces/{space_id}")
else:
print("βΉοΈ Rodando localmente (SPACE_ID nΓ£o definido).")
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
if anthropic_key:
print("β
ANTHROPIC_API_KEY encontrada.")
else:
print("β οΈ ANTHROPIC_API_KEY nΓ£o encontrada! Configure-a antes de rodar.")
print("β" * 50)
demo.launch(debug=True, share=False) |