FD900's picture
Update app.py
d74dce3 verified
raw
history blame
416 Bytes
import gradio as gr
from agent import GaiaAgent
from run import run_and_submit_all # Adjusted if you moved out of `gaia_benchmark`
agent = GaiaAgent()
with gr.Blocks() as demo:
gr.Markdown("# GAIA Agent Evaluation")
btn = gr.Button("Run Evaluation & Submit All Answers")
result = gr.Textbox(label="Submission Result")
btn.click(fn=lambda: run_and_submit_all(agent), outputs=result)
demo.launch()