AgileAndy Claude commited on
Commit
a4da413
·
1 Parent(s): 7139c44

Add interactive testing interface with two-tab design

Browse files

- Remove automatic test execution on startup
- Add Interactive Testing tab for asking any question
- Keep GAIA Certification tab for official evaluation
- Improve local auth handling with helpful HF_TOKEN guidance
- Clean separation between testing and certification workflows

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +72 -48
app.py CHANGED
@@ -364,80 +364,104 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
364
 
365
  # --- Build Gradio Interface using Blocks ---
366
  with gr.Blocks() as demo:
367
- gr.Markdown("# Basic Agent Evaluation Runner")
368
  gr.Markdown(
369
  """
370
- **Instructions:**
371
-
372
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
373
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
374
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
375
 
 
 
 
 
 
 
 
 
376
  ---
377
- **Disclaimers:**
378
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
379
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
380
  """
381
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
 
383
- gr.LoginButton()
384
 
385
- run_button = gr.Button("Run Evaluation & Submit All Answers")
386
 
387
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
388
- # Removed max_rows=10 from DataFrame constructor
389
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
390
 
391
- run_button.click(
392
- fn=run_and_submit_all,
393
- outputs=[status_output, results_table]
394
- )
395
 
396
  if __name__ == "__main__":
397
  print("\n" + "-"*30 + " App Starting " + "-"*30)
398
 
399
- # Test the agent first if running locally
400
- space_host_startup = os.getenv("SPACE_HOST")
401
- if not space_host_startup:
402
- print("Running local test of SimpleAgent...")
403
- agent = BasicAgent()
404
-
405
- test_questions = [
406
- "What is 15 + 27?",
407
- "When was the Eiffel Tower built?",
408
- "Who was the first person to walk on the moon?",
409
- "What is the capital of France?"
410
- ]
411
-
412
- print("Testing Simple Direct Agent:")
413
- print("=" * 40)
414
-
415
- for i, question in enumerate(test_questions, 1):
416
- print(f"\n{i}. Question: {question}")
417
- answer = agent(question)
418
- print(f" Answer: {answer}")
419
- print("-" * 25)
420
-
421
- print("\n" + "=" * 40)
422
- print("Local test completed. Starting Gradio interface...\n")
423
-
424
  # Check for SPACE_HOST and SPACE_ID at startup for information
425
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
 
426
 
427
  if space_host_startup:
428
  print(f"✅ SPACE_HOST found: {space_host_startup}")
429
  print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
430
  else:
431
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
432
 
433
- if space_id_startup: # Print repo URLs if SPACE_ID is found
434
  print(f"✅ SPACE_ID found: {space_id_startup}")
435
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
436
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
437
  else:
438
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
439
 
440
  print("-"*(60 + len(" App Starting ")) + "\n")
441
 
442
- print("Launching Gradio Interface for Basic Agent Evaluation...")
 
 
 
 
 
443
  demo.launch(debug=True, share=False)
 
364
 
365
  # --- Build Gradio Interface using Blocks ---
366
  with gr.Blocks() as demo:
367
+ gr.Markdown("# Enhanced Agent for GAIA Level 1 Certification")
368
  gr.Markdown(
369
  """
370
+ **Test your agent interactively or run the full GAIA evaluation:**
 
 
 
 
371
 
372
+ **Option 1: Interactive Testing**
373
+ - Ask any question to test how the agent works
374
+ - See detailed logs of search, Wikipedia lookup, and reasoning
375
+
376
+ **Option 2: GAIA Certification**
377
+ 1. Log in to your Hugging Face account using the button below
378
+ 2. Click 'Run Evaluation & Submit All Answers' for official scoring
379
+
380
  ---
 
 
 
381
  """
382
  )
383
+
384
+ with gr.Tab("Interactive Testing"):
385
+ gr.Markdown("### Ask the agent any question")
386
+ question_input = gr.Textbox(
387
+ label="Your Question",
388
+ placeholder="e.g., What is 25 * 4? or Who invented the telephone?",
389
+ lines=2
390
+ )
391
+ ask_button = gr.Button("Ask Agent", variant="primary")
392
+ answer_output = gr.Textbox(
393
+ label="Agent's Answer",
394
+ lines=3,
395
+ interactive=False
396
+ )
397
+
398
+ def ask_agent(question):
399
+ if not question.strip():
400
+ return "Please enter a question."
401
+
402
+ agent = BasicAgent()
403
+ try:
404
+ answer = agent(question)
405
+ return answer
406
+ except Exception as e:
407
+ return f"Error: {e}"
408
+
409
+ ask_button.click(
410
+ fn=ask_agent,
411
+ inputs=[question_input],
412
+ outputs=[answer_output]
413
+ )
414
+
415
+ with gr.Tab("GAIA Certification"):
416
+ gr.Markdown("### Official GAIA Level 1 Evaluation")
417
+ gr.Markdown(
418
+ """
419
+ **Instructions:**
420
+ 1. Log in to your Hugging Face account using the button below
421
+ 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score
422
+
423
+ **Note:** This can take several minutes as the agent processes all questions.
424
+ """
425
+ )
426
 
427
+ gr.LoginButton()
428
 
429
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
430
 
431
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
432
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
433
 
434
+ run_button.click(
435
+ fn=run_and_submit_all,
436
+ outputs=[status_output, results_table]
437
+ )
438
 
439
  if __name__ == "__main__":
440
  print("\n" + "-"*30 + " App Starting " + "-"*30)
441
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  # Check for SPACE_HOST and SPACE_ID at startup for information
443
+ space_host_startup = os.getenv("SPACE_HOST")
444
+ space_id_startup = os.getenv("SPACE_ID")
445
 
446
  if space_host_startup:
447
  print(f"✅ SPACE_HOST found: {space_host_startup}")
448
  print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
449
  else:
450
+ print("ℹ️ SPACE_HOST environment variable not found (running locally).")
451
 
452
+ if space_id_startup:
453
  print(f"✅ SPACE_ID found: {space_id_startup}")
454
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
455
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
456
  else:
457
+ print("ℹ️ SPACE_ID environment variable not found (running locally). Repo URL cannot be determined.")
458
 
459
  print("-"*(60 + len(" App Starting ")) + "\n")
460
 
461
+ print("Launching Gradio Interface for Enhanced Agent...")
462
+ # Set HF_TOKEN for local testing if not set
463
+ if not space_host_startup and not os.getenv("HF_TOKEN"):
464
+ print("💡 For local testing: Set HF_TOKEN environment variable to bypass auth issues")
465
+ print(" Example: export HF_TOKEN=hf_your_token_here")
466
+
467
  demo.launch(debug=True, share=False)