alisamak commited on
Commit
f70dfc8
·
verified ·
1 Parent(s): 888b56f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -0
app.py CHANGED
@@ -5,6 +5,7 @@ import inspect
5
  import pandas as pd
6
  # from basic_agent import BasicAgent
7
  from LG_agent import BasicAgent
 
8
 
9
  # (Keep Constants as is)
10
  # --- Constants ---
@@ -162,12 +163,19 @@ with gr.Blocks() as demo:
162
 
163
  gr.LoginButton()
164
 
 
 
165
  run_button = gr.Button("Run Evaluation & Submit All Answers")
166
 
167
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
168
  # Removed max_rows=10 from DataFrame constructor
169
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
170
 
 
 
 
 
 
171
  # run_button.click(
172
  # fn=run_and_submit_all,
173
  # outputs=[status_output, results_table]
 
5
  import pandas as pd
6
  # from basic_agent import BasicAgent
7
  from LG_agent import BasicAgent
8
+ from LG_agent import test_questions, evaluate_agent
9
 
10
  # (Keep Constants as is)
11
  # --- Constants ---
 
163
 
164
  gr.LoginButton()
165
 
166
+ test_eval_button = gr.Button("🔍 Evaluate on Local GAIA Test Set")
167
+
168
  run_button = gr.Button("Run Evaluation & Submit All Answers")
169
 
170
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
171
  # Removed max_rows=10 from DataFrame constructor
172
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
173
 
174
+ test_eval_button.click(
175
+ fn=lambda: evaluate_agent(BasicAgent(), test_questions) or "Done — check logs for results.",
176
+ outputs=[status_output]
177
+ )
178
+
179
  # run_button.click(
180
  # fn=run_and_submit_all,
181
  # outputs=[status_output, results_table]