File size: 4,710 Bytes
80ebded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python3
"""
Quick test script for retry system - minimal version.

Run this to quickly verify retry functionality is working.
Usage: python test_retry_quick.py
"""

import requests
import json

BASE_URL = "http://localhost:8000"
TENANT_ID = "quick_test"
TIMEOUT = 120  # Increased timeout for LLM calls (model loading can take time)

def check_server_health():
    """Check if the backend server is running."""
    try:
        response = requests.get(f"{BASE_URL}/health", timeout=5)
        if response.status_code == 200:
            return True
    except:
        pass
    return False

def test_debug_endpoint():
    """Quick test using debug endpoint."""
    print("πŸ” Testing retry system via /agent/debug endpoint...\n")
    
    # First check if server is running
    print("πŸ“‘ Checking if backend server is running...")
    if not check_server_health():
        print(f"❌ Cannot connect to {BASE_URL}")
        print("   Make sure backend is running:")
        print("   - uvicorn backend.api.main:app --port 8000")
        print("   - Or use: python backend/mcp_server/server.py")
        return False
    print("βœ… Backend server is running\n")
    
    try:
        print(f"⏱️  Sending request (timeout: {TIMEOUT}s)...")
        print("   Note: First request may take longer if Ollama needs to load the model\n")
        
        response = requests.post(
            f"{BASE_URL}/agent/debug",
            json={
                "tenant_id": TENANT_ID,
                "message": "What is quantum computing?"
            },
            timeout=TIMEOUT
        )
        
        if response.status_code == 200:
            data = response.json()
            reasoning_trace = data.get("reasoning_trace", [])
            
            print(f"βœ… Connected to backend")
            print(f"πŸ“‹ Found {len(reasoning_trace)} reasoning steps\n")
            
            # Look for retry steps
            retry_steps = []
            for step in reasoning_trace:
                step_str = json.dumps(step).lower()
                if any(keyword in step_str for keyword in ["retry", "repair", "threshold", "rewritten"]):
                    retry_steps.append(step)
            
            if retry_steps:
                print(f"⚑ Found {len(retry_steps)} retry-related steps:")
                for step in retry_steps[:3]:
                    print(f"   - {step.get('step', 'unknown')}")
                print("\nβœ… Retry system is active and working!")
                return True
            else:
                print("ℹ️  No retry steps found (system working optimally - no retries needed)")
                print("\nβœ… Retry system is integrated (retries only happen when needed)")
                return True
        else:
            print(f"❌ Request failed: {response.status_code}")
            try:
                error_data = response.json()
                print(f"   Error details: {error_data}")
            except:
                print(f"   Response: {response.text[:200]}")
            return False
            
    except requests.exceptions.Timeout:
        print(f"❌ Request timed out after {TIMEOUT} seconds")
        print("\n   Possible causes:")
        print("   - Ollama is not running or model is not loaded")
        print("   - MCP server is not running")
        print("   - LLM call is taking too long")
        print("\n   To fix:")
        print("   1. Check if Ollama is running: ollama serve")
        print("   2. Check if model is available: ollama list")
        print("   3. Pull the model if needed: ollama pull llama3.1:latest")
        print("   4. Check if MCP server is running")
        return False
    except requests.exceptions.ConnectionError:
        print(f"❌ Cannot connect to {BASE_URL}")
        print("   Make sure backend is running:")
        print("   - uvicorn backend.api.main:app --port 8000")
        print("   - Or use: python backend/mcp_server/server.py")
        return False
    except Exception as e:
        print(f"❌ Error: {e}")
        print(f"   Error type: {type(e).__name__}")
        return False


if __name__ == "__main__":
    print("=" * 60)
    print("  Quick Retry System Test")
    print("=" * 60 + "\n")
    
    success = test_debug_endpoint()
    
    if success:
        print("\n" + "=" * 60)
        print("βœ… Test completed successfully!")
        print("=" * 60)
        print("\nπŸ’‘ For comprehensive tests, run:")
        print("   - pytest backend/tests/test_retry_system.py -v")
        print("   - python test_retry_integration.py")
    else:
        print("\n" + "=" * 60)
        print("❌ Test failed - check errors above")
        print("=" * 60)