agentbee

Sleeping

mangubee Claude commited on 24 days ago

Commit

c8247b8

1 Parent(s): 40db96d

ui: move Test & Debug tab after Full Evaluation

Test & Debug is rarely used, so Full Evaluation is now the first tab.

Tab order:
- Tab 1: Full Evaluation (primary functionality)
- Tab 2: Test & Debug (debugging/diagnostics)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show

app.py +53 -53

app.py CHANGED Viewed

@@ -696,59 +696,7 @@ with gr.Blocks() as demo:
     )
     with gr.Tabs():
-        # Tab 1: Test Single Question (NEW - for diagnostics)
-        with gr.Tab("🔍 Test & Debug"):
-            gr.Markdown("""
-            **Test Mode:** Run the agent on a single question and see detailed diagnostics.
-            This mode shows:
-            - API key status
-            - Execution plan
-            - Tools selected and executed
-            - Evidence collected
-            - Errors encountered
-            - Final answer
-            """)
-            test_question_input = gr.Textbox(
-                label="Enter Test Question",
-                placeholder="e.g., What is the capital of France?",
-                lines=3,
-            )
-            with gr.Row():
-                llm_provider_dropdown = gr.Dropdown(
-                    label="LLM Provider",
-                    choices=["Gemini", "HuggingFace", "Groq", "Claude"],
-                    value="HuggingFace",
-                    info="Select which LLM to use for this test",
-                )
-            test_button = gr.Button("Run Test", variant="primary")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    test_answer_output = gr.Textbox(
-                        label="Answer", lines=3, interactive=False
-                    )
-                    test_api_status = gr.Textbox(
-                        label="API Keys Status", lines=5, interactive=False
-                    )
-                with gr.Column(scale=2):
-                    test_diagnostics_output = gr.Textbox(
-                        label="Execution Diagnostics", lines=20, interactive=False
-                    )
-            test_button.click(
-                fn=test_single_question,
-                inputs=[
-                    test_question_input,
-                    llm_provider_dropdown,
-                ],
-                outputs=[test_answer_output, test_diagnostics_output, test_api_status],
-            )
-        # Tab 2: Full Evaluation (existing functionality)
         with gr.Tab("📊 Full Evaluation"):
             gr.Markdown(
                 """
@@ -812,6 +760,58 @@ with gr.Blocks() as demo:
                 outputs=[status_output, results_table, export_output],
             )
 if __name__ == "__main__":
     print("\n" + "-" * 30 + " App Starting " + "-" * 30)
     # Check for SPACE_HOST and SPACE_ID at startup for information

     )
     with gr.Tabs():
+        # Tab 1: Full Evaluation (primary functionality)
         with gr.Tab("📊 Full Evaluation"):
             gr.Markdown(
                 """
                 outputs=[status_output, results_table, export_output],
             )
+        # Tab 2: Test Single Question (debugging/diagnostics)
+        with gr.Tab("🔍 Test & Debug"):
+            gr.Markdown("""
+            **Test Mode:** Run the agent on a single question and see detailed diagnostics.
+            This mode shows:
+            - API key status
+            - Execution plan
+            - Tools selected and executed
+            - Evidence collected
+            - Errors encountered
+            - Final answer
+            """)
+            test_question_input = gr.Textbox(
+                label="Enter Test Question",
+                placeholder="e.g., What is the capital of France?",
+                lines=3,
+            )
+            with gr.Row():
+                llm_provider_dropdown = gr.Dropdown(
+                    label="LLM Provider",
+                    choices=["Gemini", "HuggingFace", "Groq", "Claude"],
+                    value="HuggingFace",
+                    info="Select which LLM to use for this test",
+                )
+            test_button = gr.Button("Run Test", variant="primary")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    test_answer_output = gr.Textbox(
+                        label="Answer", lines=3, interactive=False
+                    )
+                    test_api_status = gr.Textbox(
+                        label="API Keys Status", lines=5, interactive=False
+                    )
+                with gr.Column(scale=2):
+                    test_diagnostics_output = gr.Textbox(
+                        label="Execution Diagnostics", lines=20, interactive=False
+                    )
+            test_button.click(
+                fn=test_single_question,
+                inputs=[
+                    test_question_input,
+                    llm_provider_dropdown,
+                ],
+                outputs=[test_answer_output, test_diagnostics_output, test_api_status],
+            )
 if __name__ == "__main__":
     print("\n" + "-" * 30 + " App Starting " + "-" * 30)
     # Check for SPACE_HOST and SPACE_ID at startup for information