"""
Test script for GAIA Agent

This script helps you test your agent implementation before submitting to the leaderboard.
Run this to verify your agent works correctly.
"""

import requests
import json
from typing import Dict, List
from agent_implementation import create_agent
from config import AGENT_TYPE

def test_api_connection():
    """Test connection to the GAIA API"""
    print("🔍 Testing API connection...")
    
    try:
        # Test questions endpoint
        response = requests.get("https://gaia-benchmark.vercel.app/api/questions")
        if response.status_code == 200:
            questions = response.json()
            print(f"✅ API connection successful! Found {len(questions)} questions")
            return True
        else:
            print(f"❌ API connection failed with status code: {response.status_code}")
            return False
    except Exception as e:
        print(f"❌ API connection error: {e}")
        return False

def test_random_question():
    """Test fetching a random question"""
    print("\n🎲 Testing random question fetch...")
    
    try:
        response = requests.get("https://gaia-benchmark.vercel.app/api/random-question")
        if response.status_code == 200:
            question = response.json()
            print(f"✅ Random question fetched successfully!")
            print(f"   Task ID: {question.get('task_id', 'N/A')}")
            print(f"   Question: {question.get('question', 'N/A')[:100]}...")
            return question
        else:
            print(f"❌ Failed to fetch random question: {response.status_code}")
            return None
    except Exception as e:
        print(f"❌ Error fetching random question: {e}")
        return None

def test_agent_on_question(agent, question: Dict):
    """Test the agent on a specific question"""
    print(f"\n🤖 Testing agent on question...")
    print(f"   Task ID: {question.get('task_id', 'N/A')}")
    print(f"   Question: {question.get('question', 'N/A')}")
    
    try:
        answer = agent.generate_answer(question)
        print(f"✅ Agent generated answer:")
        print(f"   Answer: {answer}")
        return answer
    except Exception as e:
        print(f"❌ Agent error: {e}")
        return None

def test_file_download(task_id: str):
    """Test file download functionality"""
    print(f"\n📁 Testing file download for task {task_id}...")
    
    try:
        response = requests.get(f"https://gaia-benchmark.vercel.app/api/files/{task_id}")
        if response.status_code == 200:
            content = response.text
            print(f"✅ File downloaded successfully!")
            print(f"   Content length: {len(content)} characters")
            print(f"   Preview: {content[:100]}...")
            return content
        else:
            print(f"⚠️  No file found for task {task_id} (status: {response.status_code})")
            return ""
    except Exception as e:
        print(f"❌ File download error: {e}")
        return ""

def run_comprehensive_test():
    """Run a comprehensive test of the agent"""
    print("🚀 Starting comprehensive GAIA agent test...")
    print("=" * 60)
    
    # Test 1: API Connection
    if not test_api_connection():
        print("❌ Cannot proceed without API connection")
        return False
    
    # Test 2: Create agent
    print(f"\n🤖 Creating {AGENT_TYPE} agent...")
    try:
        agent = create_agent(AGENT_TYPE)
        print(f"✅ {AGENT_TYPE} agent created successfully!")
    except Exception as e:
        print(f"❌ Failed to create agent: {e}")
        return False
    
    # Test 3: Fetch random question
    question = test_random_question()
    if not question:
        print("❌ Cannot proceed without a test question")
        return False
    
    # Test 4: Test file download
    task_id = question.get('task_id', '')
    if task_id:
        file_content = test_file_download(task_id)
    
    # Test 5: Test agent on question
    answer = test_agent_on_question(agent, question)
    if not answer:
        print("❌ Agent failed to generate answer")
        return False
    
    # Test 6: Validate answer format
    print(f"\n✅ Answer validation:")
    print(f"   Length: {len(answer)} characters")
    print(f"   Contains 'FINAL ANSWER': {'FINAL ANSWER' in answer}")
    if 'FINAL ANSWER' in answer:
        print("   ⚠️  Warning: Answer contains 'FINAL ANSWER' - remove this for submission!")
    
    print("\n🎉 Comprehensive test completed successfully!")
    return True

def test_multiple_questions(num_questions: int = 3):
    """Test the agent on multiple random questions"""
    print(f"\n🔄 Testing agent on {num_questions} random questions...")
    
    agent = create_agent(AGENT_TYPE)
    results = []
    
    for i in range(num_questions):
        print(f"\n--- Test {i+1}/{num_questions} ---")
        
        # Fetch random question
        response = requests.get("https://gaia-benchmark.vercel.app/api/random-question")
        if response.status_code != 200:
            print(f"❌ Failed to fetch question {i+1}")
            continue
        
        question = response.json()
        print(f"Question: {question.get('question', 'N/A')[:80]}...")
        
        # Generate answer
        try:
            answer = agent.generate_answer(question)
            print(f"Answer: {answer[:100]}...")
            results.append({
                'task_id': question.get('task_id'),
                'question': question.get('question'),
                'answer': answer,
                'status': 'success'
            })
        except Exception as e:
            print(f"❌ Error: {e}")
            results.append({
                'task_id': question.get('task_id'),
                'question': question.get('question'),
                'error': str(e),
                'status': 'error'
            })
    
    # Summary
    successful = sum(1 for r in results if r['status'] == 'success')
    print(f"\n📊 Test Summary:")
    print(f"   Total questions: {num_questions}")
    print(f"   Successful: {successful}")
    print(f"   Failed: {num_questions - successful}")
    print(f"   Success rate: {(successful/num_questions)*100:.1f}%")
    
    return results

def main():
    """Main test function"""
    print("🧪 GAIA Agent Test Suite")
    print("=" * 40)
    
    while True:
        print("\nChoose a test option:")
        print("1. Run comprehensive test")
        print("2. Test multiple questions")
        print("3. Test single random question")
        print("4. Exit")
        
        choice = input("\nEnter your choice (1-4): ").strip()
        
        if choice == "1":
            run_comprehensive_test()
        elif choice == "2":
            num = input("How many questions to test? (default: 3): ").strip()
            try:
                num = int(num) if num else 3
                test_multiple_questions(num)
            except ValueError:
                print("Invalid number, using default: 3")
                test_multiple_questions(3)
        elif choice == "3":
            if test_api_connection():
                agent = create_agent(AGENT_TYPE)
                question = test_random_question()
                if question:
                    test_agent_on_question(agent, question)
        elif choice == "4":
            print("👋 Goodbye!")
            break
        else:
            print("Invalid choice. Please enter 1-4.")

if __name__ == "__main__":
    main()