Spaces:

Godreign
/

Policy2Logic

Sleeping

File size: 9,671 Bytes

9c21115

"""
Automated test runner for Policy-to-Logic RL Environment.

This script:
1. Starts the server in the background
2. Waits for it to be healthy
3. Runs all endpoint tests
4. Generates a detailed report
5. Cleans up (stops the server)

Run it:
    uv run python test_all.py
"""

import subprocess
import time
import requests
import json
import sys
from typing import Any, Dict, List
from datetime import datetime

BASE_URL = "http://localhost:7860"
MAX_RETRIES = 30
RETRY_DELAY = 1

class TestRunner:
    def __init__(self):
        self.results: List[Dict[str, Any]] = []
        self.server_process = None
        self.passed = 0
        self.failed = 0
        self.start_time = datetime.now()
        
    def log(self, message: str, level: str = "INFO"):
        """Print formatted log message."""
        timestamp = datetime.now().strftime("%H:%M:%S")
        prefix = {
            "INFO": "ℹ️ ",
            "SUCCESS": "✅",
            "ERROR": "❌",
            "WARNING": "⚠️ ",
            "TEST": "🧪"
        }
        print(f"[{timestamp}] {prefix.get(level, '')} {message}")
    
    def start_server(self) -> bool:
        """Start the FastAPI server in background."""
        self.log("Starting FastAPI server...", "INFO")
        try:
            self.server_process = subprocess.Popen(
                ["uv", "run", "python", "main.py"],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True
            )
            self.log(f"Server process started (PID: {self.server_process.pid})", "SUCCESS")
            return True
        except Exception as e:
            self.log(f"Failed to start server: {e}", "ERROR")
            return False
    
    def wait_for_server(self) -> bool:
        """Wait for server to be ready."""
        self.log(f"Waiting for server to be ready (max {MAX_RETRIES} attempts)...", "INFO")
        
        for attempt in range(MAX_RETRIES):
            try:
                response = requests.get(f"{BASE_URL}/health", timeout=2)
                if response.status_code == 200:
                    self.log("Server is ready!", "SUCCESS")
                    return True
            except requests.exceptions.ConnectionError:
                if attempt < MAX_RETRIES - 1:
                    time.sleep(RETRY_DELAY)
        
        self.log(f"Server failed to start after {MAX_RETRIES} attempts", "ERROR")
        return False
    
    def test_endpoint(self, method: str, endpoint: str, data: dict = None, 
                      description: str = "") -> bool:
        """Test an API endpoint and record result."""
        url = f"{BASE_URL}{endpoint}"
        display_name = description or endpoint
        self.log(f"Testing: {display_name}", "TEST")
        
        try:
            if method == "POST":
                response = requests.post(url, json=data, timeout=5)
            else:
                response = requests.get(url, timeout=5)
            
            success = response.status_code in [200, 201]
            
            result = {
                "test": display_name,
                "endpoint": endpoint,
                "method": method,
                "status_code": response.status_code,
                "success": success,
                "response": None
            }
            
            try:
                result["response"] = response.json()
            except:
                result["response"] = response.text[:200]
            
            self.results.append(result)
            
            if success:
                self.passed += 1
                self.log(f"  Status: {response.status_code} - PASSED", "SUCCESS")
            else:
                self.failed += 1
                self.log(f"  Status: {response.status_code} - FAILED", "ERROR")
            
            return success
            
        except requests.exceptions.Timeout:
            self.failed += 1
            self.log(f"  TIMEOUT", "ERROR")
            self.results.append({
                "test": display_name,
                "endpoint": endpoint,
                "method": method,
                "status_code": None,
                "success": False,
                "response": "Request timeout"
            })
            return False
        except Exception as e:
            self.failed += 1
            self.log(f"  ERROR: {e}", "ERROR")
            self.results.append({
                "test": display_name,
                "endpoint": endpoint,
                "method": method,
                "status_code": None,
                "success": False,
                "response": str(e)
            })
            return False
    
    def run_tests(self) -> bool:
        """Run all endpoint tests."""
        self.log("Starting test suite...", "INFO")
        print("\n" + "="*70)
        
        # Test 1: Health check
        self.test_endpoint("GET", "/health", description="Health Check")
        
        # Test 2: List tasks
        self.test_endpoint("GET", "/tasks", description="List Available Tasks")
        
        # Test 3: Reset environment
        reset_result = self.test_endpoint(
            "POST", 
            "/reset", 
            data={"task_name": None},
            description="Reset Environment (Start Episode)"
        )
        
        # Test 4: Get state
        self.test_endpoint("GET", "/state", description="Get Current State")
        
        # Test 5: Ask clarification
        if reset_result:
            self.test_endpoint(
                "POST",
                "/step",
                data={
                    "action_type": "ask_clarification",
                    "content": "What are the business hours?"
                },
                description="Step: Ask Clarification"
            )
        
        # Test 6: Get state after step
        self.test_endpoint("GET", "/state", description="Get State After Step")
        
        # Test 7: Propose rules
        if reset_result:
            self.test_endpoint(
                "POST",
                "/step",
                data={
                    "action_type": "propose_rules",
                    "content": {
                        "rules": [
                            {"condition": "user.role == 'admin'", "action": "ALLOW"}
                        ]
                    }
                },
                description="Step: Propose Rules"
            )
        
        # Test 8: Final state
        self.test_endpoint("GET", "/state", description="Get Final State")
        
        print("="*70 + "\n")
        return self.failed == 0
    
    def stop_server(self):
        """Stop the server process."""
        if self.server_process:
            self.log("Stopping server...", "INFO")
            self.server_process.terminate()
            try:
                self.server_process.wait(timeout=5)
                self.log("Server stopped", "SUCCESS")
            except subprocess.TimeoutExpired:
                self.server_process.kill()
                self.log("Server force-killed", "WARNING")
    
    def generate_report(self):
        """Generate and print test report."""
        duration = (datetime.now() - self.start_time).total_seconds()
        
        print("\n" + "="*70)
        print("📊 TEST REPORT")
        print("="*70)
        print(f"Timestamp: {self.start_time.strftime('%Y-%m-%d %H:%M:%S')}")
        print(f"Duration:  {duration:.2f}s")
        print(f"Total:     {self.passed + self.failed} tests")
        print(f"Passed:    {self.passed} ✅")
        print(f"Failed:    {self.failed} ❌")
        print(f"Success Rate: {100 * self.passed / (self.passed + self.failed):.1f}%")
        print("="*70)
        
        print("\n📋 DETAILED RESULTS:\n")
        for i, result in enumerate(self.results, 1):
            status_icon = "✅" if result["success"] else "❌"
            print(f"{i}. {status_icon} {result['test']}")
            print(f"   Endpoint: {result['method']} {result['endpoint']}")
            print(f"   Status: {result['status_code']}")
            if isinstance(result['response'], dict):
                print(f"   Response: {json.dumps(result['response'], indent=6)[:200]}...")
            else:
                print(f"   Response: {str(result['response'])[:100]}")
            print()
        
        print("="*70)
        if self.failed == 0:
            print("🎉 ALL TESTS PASSED!")
        else:
            print(f"⚠️  {self.failed} test(s) failed. Check details above.")
        print("="*70 + "\n")
        
        return self.failed == 0
    
    def run(self) -> bool:
        """Run the entire test suite."""
        print("\n🚀 Policy-to-Logic RL Environment - Automated Test Suite\n")
        
        # Start server
        if not self.start_server():
            return False
        
        # Wait for server
        time.sleep(2)  # Give server time to start
        if not self.wait_for_server():
            self.stop_server()
            return False
        
        # Run tests
        all_passed = self.run_tests()
        
        # Stop server
        self.stop_server()
        
        # Generate report
        self.generate_report()
        
        return all_passed

def main():
    runner = TestRunner()
    try:
        success = runner.run()
        sys.exit(0 if success else 1)
    except KeyboardInterrupt:
        print("\n\n⚠️  Test interrupted by user")
        runner.stop_server()
        sys.exit(1)
    except Exception as e:
        print(f"\n❌ Unexpected error: {e}")
        runner.stop_server()
        sys.exit(1)

if __name__ == "__main__":
    main()