mangubee Claude commited on
Commit
c8247b8
·
1 Parent(s): 40db96d

ui: move Test & Debug tab after Full Evaluation

Browse files

Test & Debug is rarely used, so Full Evaluation is now the first tab.

Tab order:
- Tab 1: Full Evaluation (primary functionality)
- Tab 2: Test & Debug (debugging/diagnostics)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +53 -53
app.py CHANGED
@@ -696,59 +696,7 @@ with gr.Blocks() as demo:
696
  )
697
 
698
  with gr.Tabs():
699
- # Tab 1: Test Single Question (NEW - for diagnostics)
700
- with gr.Tab("🔍 Test & Debug"):
701
- gr.Markdown("""
702
- **Test Mode:** Run the agent on a single question and see detailed diagnostics.
703
-
704
- This mode shows:
705
- - API key status
706
- - Execution plan
707
- - Tools selected and executed
708
- - Evidence collected
709
- - Errors encountered
710
- - Final answer
711
- """)
712
-
713
- test_question_input = gr.Textbox(
714
- label="Enter Test Question",
715
- placeholder="e.g., What is the capital of France?",
716
- lines=3,
717
- )
718
-
719
- with gr.Row():
720
- llm_provider_dropdown = gr.Dropdown(
721
- label="LLM Provider",
722
- choices=["Gemini", "HuggingFace", "Groq", "Claude"],
723
- value="HuggingFace",
724
- info="Select which LLM to use for this test",
725
- )
726
-
727
- test_button = gr.Button("Run Test", variant="primary")
728
-
729
- with gr.Row():
730
- with gr.Column(scale=1):
731
- test_answer_output = gr.Textbox(
732
- label="Answer", lines=3, interactive=False
733
- )
734
- test_api_status = gr.Textbox(
735
- label="API Keys Status", lines=5, interactive=False
736
- )
737
- with gr.Column(scale=2):
738
- test_diagnostics_output = gr.Textbox(
739
- label="Execution Diagnostics", lines=20, interactive=False
740
- )
741
-
742
- test_button.click(
743
- fn=test_single_question,
744
- inputs=[
745
- test_question_input,
746
- llm_provider_dropdown,
747
- ],
748
- outputs=[test_answer_output, test_diagnostics_output, test_api_status],
749
- )
750
-
751
- # Tab 2: Full Evaluation (existing functionality)
752
  with gr.Tab("📊 Full Evaluation"):
753
  gr.Markdown(
754
  """
@@ -812,6 +760,58 @@ with gr.Blocks() as demo:
812
  outputs=[status_output, results_table, export_output],
813
  )
814
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
815
  if __name__ == "__main__":
816
  print("\n" + "-" * 30 + " App Starting " + "-" * 30)
817
  # Check for SPACE_HOST and SPACE_ID at startup for information
 
696
  )
697
 
698
  with gr.Tabs():
699
+ # Tab 1: Full Evaluation (primary functionality)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
700
  with gr.Tab("📊 Full Evaluation"):
701
  gr.Markdown(
702
  """
 
760
  outputs=[status_output, results_table, export_output],
761
  )
762
 
763
+ # Tab 2: Test Single Question (debugging/diagnostics)
764
+ with gr.Tab("🔍 Test & Debug"):
765
+ gr.Markdown("""
766
+ **Test Mode:** Run the agent on a single question and see detailed diagnostics.
767
+
768
+ This mode shows:
769
+ - API key status
770
+ - Execution plan
771
+ - Tools selected and executed
772
+ - Evidence collected
773
+ - Errors encountered
774
+ - Final answer
775
+ """)
776
+
777
+ test_question_input = gr.Textbox(
778
+ label="Enter Test Question",
779
+ placeholder="e.g., What is the capital of France?",
780
+ lines=3,
781
+ )
782
+
783
+ with gr.Row():
784
+ llm_provider_dropdown = gr.Dropdown(
785
+ label="LLM Provider",
786
+ choices=["Gemini", "HuggingFace", "Groq", "Claude"],
787
+ value="HuggingFace",
788
+ info="Select which LLM to use for this test",
789
+ )
790
+
791
+ test_button = gr.Button("Run Test", variant="primary")
792
+
793
+ with gr.Row():
794
+ with gr.Column(scale=1):
795
+ test_answer_output = gr.Textbox(
796
+ label="Answer", lines=3, interactive=False
797
+ )
798
+ test_api_status = gr.Textbox(
799
+ label="API Keys Status", lines=5, interactive=False
800
+ )
801
+ with gr.Column(scale=2):
802
+ test_diagnostics_output = gr.Textbox(
803
+ label="Execution Diagnostics", lines=20, interactive=False
804
+ )
805
+
806
+ test_button.click(
807
+ fn=test_single_question,
808
+ inputs=[
809
+ test_question_input,
810
+ llm_provider_dropdown,
811
+ ],
812
+ outputs=[test_answer_output, test_diagnostics_output, test_api_status],
813
+ )
814
+
815
  if __name__ == "__main__":
816
  print("\n" + "-" * 30 + " App Starting " + "-" * 30)
817
  # Check for SPACE_HOST and SPACE_ID at startup for information