Spaces:

destinyebuka
/

AIDA

Running

File size: 17,383 Bytes

ec34ad8

#!/usr/bin/env python3
"""
RLM (Recursive Language Model) Test Suite for AIDA

Tests:
1. Query Analyzer - Detect complex query types
2. RLM Search Service - Execute recursive searches
3. Integration - End-to-end flow

Run with:
    python test_rlm.py
    python test_rlm.py --live  # Run with actual LLM calls

Author: AIDA Team
Date: 2026-02-09
"""

import asyncio
import sys
import json
from typing import List, Dict

# Add project root to path
sys.path.insert(0, ".")


# =============================================================================
# Color output for terminal
# =============================================================================

class Colors:
    HEADER = '\033[95m'
    BLUE = '\033[94m'
    CYAN = '\033[96m'
    GREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'


def print_header(text: str):
    print(f"\n{Colors.HEADER}{Colors.BOLD}{'='*60}{Colors.ENDC}")
    print(f"{Colors.HEADER}{Colors.BOLD}{text}{Colors.ENDC}")
    print(f"{Colors.HEADER}{Colors.BOLD}{'='*60}{Colors.ENDC}\n")


def print_success(text: str):
    print(f"{Colors.GREEN}✅ {text}{Colors.ENDC}")


def print_fail(text: str):
    print(f"{Colors.FAIL}❌ {text}{Colors.ENDC}")


def print_info(text: str):
    print(f"{Colors.CYAN}ℹ️  {text}{Colors.ENDC}")


def print_warning(text: str):
    print(f"{Colors.WARNING}⚠️  {text}{Colors.ENDC}")


# =============================================================================
# Test 1: Query Analyzer
# =============================================================================

def test_query_analyzer():
    """Test the RLM Query Analyzer"""
    print_header("Test 1: RLM Query Analyzer")

    from app.ai.services.rlm_query_analyzer import (
        analyze_query_complexity,
        QueryComplexity
    )

    test_cases = [
        # Multi-hop queries
        ("3-bed apartment near international schools in Cotonou", QueryComplexity.MULTI_HOP),
        ("House close to the beach in Calavi", QueryComplexity.MULTI_HOP),
        ("Apartment within 2km of the airport", QueryComplexity.MULTI_HOP),
        ("Find something near the university", QueryComplexity.MULTI_HOP),

        # Boolean OR queries
        ("Under 500k XOF or has a pool", QueryComplexity.BOOLEAN_OR),
        ("2-bedroom or 3-bedroom in Cotonou", QueryComplexity.BOOLEAN_OR),
        ("Either furnished or with parking", QueryComplexity.BOOLEAN_OR),

        # Comparative queries
        ("Compare prices in Cotonou vs Calavi", QueryComplexity.COMPARATIVE),
        ("Which is cheaper: 2-bed in Cotonou or 3-bed in Calavi?", QueryComplexity.COMPARATIVE),
        ("Difference between rent in Porto-Novo and Cotonou", QueryComplexity.COMPARATIVE),

        # Aggregation queries
        ("What is the average price in Cotonou?", QueryComplexity.AGGREGATION),
        ("How many 3-bed apartments are available?", QueryComplexity.AGGREGATION),
        ("Total listings in Calavi", QueryComplexity.AGGREGATION),

        # Multi-factor queries
        ("Best family apartment near schools and parks in safe area", QueryComplexity.MULTI_FACTOR),
        ("Top luxury modern apartments with good security", QueryComplexity.MULTI_FACTOR),
        ("Ideal quiet peaceful home for family", QueryComplexity.MULTI_FACTOR),

        # Simple queries (should NOT trigger RLM)
        ("3-bed apartment in Cotonou", QueryComplexity.SIMPLE),
        ("Houses under 500k", QueryComplexity.SIMPLE),
        ("Furnished apartment for rent", QueryComplexity.SIMPLE),
    ]

    passed = 0
    failed = 0

    for query, expected_complexity in test_cases:
        analysis = analyze_query_complexity(query)

        if analysis.complexity == expected_complexity:
            passed += 1
            print_success(f"'{query[:40]}...' → {analysis.complexity.value}")
        else:
            failed += 1
            print_fail(f"'{query[:40]}...'")
            print(f"   Expected: {expected_complexity.value}")
            print(f"   Got: {analysis.complexity.value}")
            print(f"   Reasoning: {analysis.reasoning}")

    print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}")
    return failed == 0


# =============================================================================
# Test 2: Strategy Selector Integration
# =============================================================================

async def test_strategy_selector():
    """Test that strategy selector correctly routes to RLM"""
    print_header("Test 2: Strategy Selector RLM Routing")

    from app.ai.services.search_strategy_selector import (
        select_search_strategy,
        SearchStrategy
    )

    test_cases = [
        # RLM strategies
        {
            "query": "3-bed near schools in Cotonou",
            "params": {"location": "Cotonou", "bedrooms": 3},
            "expected_rlm": True,
            "expected_strategy": SearchStrategy.RLM_MULTI_HOP
        },
        {
            "query": "Under 500k or has pool",
            "params": {"max_price": 500000},
            "expected_rlm": True,
            "expected_strategy": SearchStrategy.RLM_BOOLEAN_OR
        },
        {
            "query": "Compare Cotonou vs Calavi",
            "params": {},
            "expected_rlm": True,
            "expected_strategy": SearchStrategy.RLM_COMPARATIVE
        },

        # Traditional strategies (should NOT use RLM)
        {
            "query": "3-bed apartment in Cotonou under 500k",
            "params": {"location": "Cotonou", "bedrooms": 3, "max_price": 500000},
            "expected_rlm": False,
            "expected_strategy": SearchStrategy.MONGO_ONLY
        },
    ]

    passed = 0
    failed = 0

    for case in test_cases:
        result = await select_search_strategy(case["query"], case["params"])

        rlm_match = result.get("use_rlm", False) == case["expected_rlm"]
        strategy_match = result["strategy"] == case["expected_strategy"]

        if rlm_match and strategy_match:
            passed += 1
            print_success(f"'{case['query'][:40]}...'")
            print(f"   Strategy: {result['strategy'].value}")
            print(f"   RLM: {result.get('use_rlm', False)}")
        else:
            failed += 1
            print_fail(f"'{case['query'][:40]}...'")
            print(f"   Expected: {case['expected_strategy'].value}, RLM={case['expected_rlm']}")
            print(f"   Got: {result['strategy'].value}, RLM={result.get('use_rlm', False)}")

    print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}")
    return failed == 0


# =============================================================================
# Test 3: RLM Search Service (LIVE)
# =============================================================================

async def test_rlm_search_live():
    """Test the RLM Search Service with actual LLM calls"""
    print_header("Test 3: RLM Search Service (LIVE)")

    print_warning("This test makes actual API calls to DeepSeek LLM")
    print_info("Ensure DEEPSEEK_API_KEY is set in your environment\n")

    from app.ai.services.rlm_search_service import rlm_search

    test_queries = [
        {
            "query": "3-bed apartment near schools in Cotonou",
            "description": "Multi-hop proximity search"
        },
        {
            "query": "Under 300k or has pool",
            "description": "Boolean OR query"
        },
        {
            "query": "Compare average prices in Cotonou vs Calavi",
            "description": "Comparative analysis"
        },
        {
            "query": "Best family apartment near schools and parks",
            "description": "Multi-factor ranking"
        },
    ]

    for i, test in enumerate(test_queries, 1):
        print(f"\n{Colors.CYAN}Test {i}: {test['description']}{Colors.ENDC}")
        print(f"Query: \"{test['query']}\"")

        try:
            result = await rlm_search(test["query"])

            print_success(f"Strategy used: {result.get('strategy_used', 'Unknown')}")
            print(f"   Results: {len(result.get('results', []))} listings")
            print(f"   LLM calls: {result.get('call_count', 'N/A')}")

            if result.get("reasoning_steps"):
                print(f"   Reasoning steps:")
                for step in result["reasoning_steps"][:3]:
                    print(f"      - {step.get('step', 'unknown')}: {json.dumps(step, default=str)[:80]}...")

            if result.get("message"):
                print(f"   Message: {result['message'][:100]}...")

            if result.get("comparison_data"):
                print(f"   Comparison data available: Yes")

        except Exception as e:
            print_fail(f"Error: {str(e)}")

    return True


# =============================================================================
# Test 4: Query Pattern Detection
# =============================================================================

def test_pattern_detection():
    """Test specific pattern detection in queries"""
    print_header("Test 4: Pattern Detection")

    from app.ai.services.rlm_query_analyzer import analyze_query_complexity

    # Test POI detection
    poi_queries = [
        ("apartment near the school", "school"),
        ("house close to beach", "beach"),
        ("near the university campus", "university"),
        ("walking distance from hospital", "hospital"),
        ("close to the market", "market"),
        ("near the airport", "airport"),
    ]

    print(f"{Colors.BOLD}POI (Point of Interest) Detection:{Colors.ENDC}")
    for query, expected_poi in poi_queries:
        analysis = analyze_query_complexity(query)
        poi_found = any(expected_poi in p.lower() for p in analysis.detected_patterns)
        if poi_found:
            print_success(f"'{query}' → Detected '{expected_poi}'")
        else:
            print_fail(f"'{query}' → Expected '{expected_poi}', got {analysis.detected_patterns}")

    # Test French queries
    print(f"\n{Colors.BOLD}French Query Detection:{Colors.ENDC}")
    french_queries = [
        ("appartement près de l'école", True),  # Near school
        ("maison proche de la plage", True),    # Close to beach
        ("comparer les prix", True),            # Compare prices
        ("appartement 3 chambres à Cotonou", False),  # Simple query
    ]

    for query, expected_rlm in french_queries:
        analysis = analyze_query_complexity(query)
        if analysis.use_rlm == expected_rlm:
            print_success(f"'{query}' → RLM={analysis.use_rlm}")
        else:
            print_fail(f"'{query}' → Expected RLM={expected_rlm}, got {analysis.use_rlm}")

    return True


# =============================================================================
# Test 5: Distance Calculation
# =============================================================================

def test_distance_calculation():
    """Test the Haversine distance calculation"""
    print_header("Test 5: Distance Calculation (Haversine)")

    from app.ai.services.rlm_search_service import RLMSearchAgent

    agent = RLMSearchAgent()

    # Known distances (approximate)
    test_cases = [
        # (lat1, lon1, lat2, lon2, expected_km, tolerance_km)
        (6.3654, 2.4183, 6.3700, 2.4200, 0.5, 0.3),    # Nearby in Cotonou
        (6.3654, 2.4183, 6.4300, 2.3500, 10, 2),       # Cross-city
        (6.3654, 2.4183, 6.5000, 2.0000, 50, 10),      # Longer distance
    ]

    passed = 0
    for lat1, lon1, lat2, lon2, expected, tolerance in test_cases:
        distance = agent._calculate_distance(lat1, lon1, lat2, lon2)
        within_tolerance = abs(distance - expected) <= tolerance

        if within_tolerance:
            passed += 1
            print_success(f"({lat1}, {lon1}) → ({lat2}, {lon2}): {distance:.2f} km (expected ~{expected} km)")
        else:
            print_fail(f"({lat1}, {lon1}) → ({lat2}, {lon2}): {distance:.2f} km (expected ~{expected} km)")

    print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}")
    return passed == len(test_cases)


# =============================================================================
# Test 6: OpenStreetMap POI Service
# =============================================================================

async def test_osm_poi_service():
    """Test the OpenStreetMap POI service integration"""
    print_header("Test 6: OpenStreetMap POI Service")

    print_info("This test makes real API calls to OpenStreetMap (FREE)")
    print_info("Testing: Nominatim geocoding + Overpass POI search\n")

    from app.ai.services.osm_poi_service import (
        geocode_location,
        find_pois,
        find_pois_overpass,
        calculate_distance_km
    )

    # Test 1: Geocoding
    print(f"{Colors.BOLD}1. Geocoding Test:{Colors.ENDC}")
    coords = await geocode_location("Cotonou, Benin")
    if coords:
        print_success(f"Geocoded 'Cotonou, Benin' → ({coords[0]:.4f}, {coords[1]:.4f})")
    else:
        print_fail("Failed to geocode 'Cotonou, Benin'")

    # Test 2: Find Schools
    print(f"\n{Colors.BOLD}2. Find Schools in Cotonou:{Colors.ENDC}")
    schools = await find_pois("school", "Cotonou, Benin", radius_km=3, limit=5)
    print(f"  Found {len(schools)} schools:")
    for school in schools[:3]:
        print(f"    - {school['name']} ({school['lat']:.4f}, {school['lon']:.4f})")

    # Test 3: Find Hospitals
    print(f"\n{Colors.BOLD}3. Find Hospitals in Cotonou:{Colors.ENDC}")
    hospitals = await find_pois("hospital", "Cotonou, Benin", radius_km=5, limit=5)
    print(f"  Found {len(hospitals)} hospitals:")
    for hospital in hospitals[:3]:
        print(f"    - {hospital['name']} ({hospital['lat']:.4f}, {hospital['lon']:.4f})")

    # Test 4: French POI type
    print(f"\n{Colors.BOLD}4. French POI Type 'plage' (beach):{Colors.ENDC}")
    beaches = await find_pois("plage", "Cotonou, Benin", radius_km=10, limit=5)
    print(f"  Found {len(beaches)} beaches")

    # Test 5: Distance calculation
    print(f"\n{Colors.BOLD}5. Distance Calculation:{Colors.ENDC}")
    if coords and schools:
        dist = calculate_distance_km(
            coords[0], coords[1],
            schools[0]["lat"], schools[0]["lon"]
        )
        print_success(f"Distance from Cotonou center to {schools[0]['name']}: {dist:.2f} km")

    # Test 6: Integration with RLM
    print(f"\n{Colors.BOLD}6. RLM Integration Test:{Colors.ENDC}")
    from app.ai.services.rlm_search_service import RLMSearchAgent
    agent = RLMSearchAgent()

    pois = await agent._find_poi_locations("school", "Cotonou, Benin")
    if pois:
        print_success(f"RLM agent found {len(pois)} schools via OSM")
        print(f"    First result: {pois[0].get('name', 'Unknown')}")
    else:
        print_warning("RLM agent found no schools (may be network issue)")

    print(f"\n{Colors.BOLD}OSM Integration Complete!{Colors.ENDC}")
    return True


# =============================================================================
# Main
# =============================================================================

async def main():
    """Run all tests"""
    print(f"\n{Colors.BOLD}{Colors.HEADER}")
    print("╔═══════════════════════════════════════════════════════════╗")
    print("║     RLM (Recursive Language Model) Test Suite for AIDA    ║")
    print("╚═══════════════════════════════════════════════════════════╝")
    print(f"{Colors.ENDC}\n")

    live_mode = "--live" in sys.argv

    all_passed = True

    # Test 1: Query Analyzer (no LLM calls)
    if not test_query_analyzer():
        all_passed = False

    # Test 2: Strategy Selector
    if not await test_strategy_selector():
        all_passed = False

    # Test 3: Pattern Detection
    if not test_pattern_detection():
        all_passed = False

    # Test 4: Distance Calculation
    if not test_distance_calculation():
        all_passed = False

    # Test 5: OpenStreetMap POI Service
    await test_osm_poi_service()

    # Test 6: Live RLM Search (only if --live flag)
    if live_mode:
        print_warning("\nRunning LIVE tests with actual LLM calls...")
        await test_rlm_search_live()
    else:
        print_info("\nSkipping live LLM tests. Run with --live flag to include them.")
        print_info("Example: python test_rlm.py --live")

    # Summary
    print_header("Test Summary")
    if all_passed:
        print_success("All offline tests passed!")
        print_info("RLM is ready to use in AIDA.")
    else:
        print_fail("Some tests failed. Check the output above.")

    # Usage examples
    print(f"\n{Colors.BOLD}Usage Examples:{Colors.ENDC}")
    print("""
    # In your code:
    from app.ai.services.rlm_search_service import rlm_search

    # Multi-hop search (near POI)
    results = await rlm_search("3-bed near schools in Cotonou")

    # Boolean OR
    results = await rlm_search("under 500k or has pool")

    # Comparative
    results = await rlm_search("compare Cotonou vs Calavi")

    # The brain.py automatically uses RLM when appropriate!
    """)


if __name__ == "__main__":
    asyncio.run(main())