#!/usr/bin/env python3 """ RLM (Recursive Language Model) Test Suite for AIDA Tests: 1. Query Analyzer - Detect complex query types 2. RLM Search Service - Execute recursive searches 3. Integration - End-to-end flow Run with: python test_rlm.py python test_rlm.py --live # Run with actual LLM calls Author: AIDA Team Date: 2026-02-09 """ import asyncio import sys import json from typing import List, Dict # Add project root to path sys.path.insert(0, ".") # ============================================================================= # Color output for terminal # ============================================================================= class Colors: HEADER = '\033[95m' BLUE = '\033[94m' CYAN = '\033[96m' GREEN = '\033[92m' WARNING = '\033[93m' FAIL = '\033[91m' ENDC = '\033[0m' BOLD = '\033[1m' def print_header(text: str): print(f"\n{Colors.HEADER}{Colors.BOLD}{'='*60}{Colors.ENDC}") print(f"{Colors.HEADER}{Colors.BOLD}{text}{Colors.ENDC}") print(f"{Colors.HEADER}{Colors.BOLD}{'='*60}{Colors.ENDC}\n") def print_success(text: str): print(f"{Colors.GREEN}✅ {text}{Colors.ENDC}") def print_fail(text: str): print(f"{Colors.FAIL}❌ {text}{Colors.ENDC}") def print_info(text: str): print(f"{Colors.CYAN}ℹ️ {text}{Colors.ENDC}") def print_warning(text: str): print(f"{Colors.WARNING}⚠️ {text}{Colors.ENDC}") # ============================================================================= # Test 1: Query Analyzer # ============================================================================= def test_query_analyzer(): """Test the RLM Query Analyzer""" print_header("Test 1: RLM Query Analyzer") from app.ai.services.rlm_query_analyzer import ( analyze_query_complexity, QueryComplexity ) test_cases = [ # Multi-hop queries ("3-bed apartment near international schools in Cotonou", QueryComplexity.MULTI_HOP), ("House close to the beach in Calavi", QueryComplexity.MULTI_HOP), ("Apartment within 2km of the airport", QueryComplexity.MULTI_HOP), ("Find something near the university", QueryComplexity.MULTI_HOP), # Boolean OR queries ("Under 500k XOF or has a pool", QueryComplexity.BOOLEAN_OR), ("2-bedroom or 3-bedroom in Cotonou", QueryComplexity.BOOLEAN_OR), ("Either furnished or with parking", QueryComplexity.BOOLEAN_OR), # Comparative queries ("Compare prices in Cotonou vs Calavi", QueryComplexity.COMPARATIVE), ("Which is cheaper: 2-bed in Cotonou or 3-bed in Calavi?", QueryComplexity.COMPARATIVE), ("Difference between rent in Porto-Novo and Cotonou", QueryComplexity.COMPARATIVE), # Aggregation queries ("What is the average price in Cotonou?", QueryComplexity.AGGREGATION), ("How many 3-bed apartments are available?", QueryComplexity.AGGREGATION), ("Total listings in Calavi", QueryComplexity.AGGREGATION), # Multi-factor queries ("Best family apartment near schools and parks in safe area", QueryComplexity.MULTI_FACTOR), ("Top luxury modern apartments with good security", QueryComplexity.MULTI_FACTOR), ("Ideal quiet peaceful home for family", QueryComplexity.MULTI_FACTOR), # Simple queries (should NOT trigger RLM) ("3-bed apartment in Cotonou", QueryComplexity.SIMPLE), ("Houses under 500k", QueryComplexity.SIMPLE), ("Furnished apartment for rent", QueryComplexity.SIMPLE), ] passed = 0 failed = 0 for query, expected_complexity in test_cases: analysis = analyze_query_complexity(query) if analysis.complexity == expected_complexity: passed += 1 print_success(f"'{query[:40]}...' → {analysis.complexity.value}") else: failed += 1 print_fail(f"'{query[:40]}...'") print(f" Expected: {expected_complexity.value}") print(f" Got: {analysis.complexity.value}") print(f" Reasoning: {analysis.reasoning}") print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}") return failed == 0 # ============================================================================= # Test 2: Strategy Selector Integration # ============================================================================= async def test_strategy_selector(): """Test that strategy selector correctly routes to RLM""" print_header("Test 2: Strategy Selector RLM Routing") from app.ai.services.search_strategy_selector import ( select_search_strategy, SearchStrategy ) test_cases = [ # RLM strategies { "query": "3-bed near schools in Cotonou", "params": {"location": "Cotonou", "bedrooms": 3}, "expected_rlm": True, "expected_strategy": SearchStrategy.RLM_MULTI_HOP }, { "query": "Under 500k or has pool", "params": {"max_price": 500000}, "expected_rlm": True, "expected_strategy": SearchStrategy.RLM_BOOLEAN_OR }, { "query": "Compare Cotonou vs Calavi", "params": {}, "expected_rlm": True, "expected_strategy": SearchStrategy.RLM_COMPARATIVE }, # Traditional strategies (should NOT use RLM) { "query": "3-bed apartment in Cotonou under 500k", "params": {"location": "Cotonou", "bedrooms": 3, "max_price": 500000}, "expected_rlm": False, "expected_strategy": SearchStrategy.MONGO_ONLY }, ] passed = 0 failed = 0 for case in test_cases: result = await select_search_strategy(case["query"], case["params"]) rlm_match = result.get("use_rlm", False) == case["expected_rlm"] strategy_match = result["strategy"] == case["expected_strategy"] if rlm_match and strategy_match: passed += 1 print_success(f"'{case['query'][:40]}...'") print(f" Strategy: {result['strategy'].value}") print(f" RLM: {result.get('use_rlm', False)}") else: failed += 1 print_fail(f"'{case['query'][:40]}...'") print(f" Expected: {case['expected_strategy'].value}, RLM={case['expected_rlm']}") print(f" Got: {result['strategy'].value}, RLM={result.get('use_rlm', False)}") print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}") return failed == 0 # ============================================================================= # Test 3: RLM Search Service (LIVE) # ============================================================================= async def test_rlm_search_live(): """Test the RLM Search Service with actual LLM calls""" print_header("Test 3: RLM Search Service (LIVE)") print_warning("This test makes actual API calls to DeepSeek LLM") print_info("Ensure DEEPSEEK_API_KEY is set in your environment\n") from app.ai.services.rlm_search_service import rlm_search test_queries = [ { "query": "3-bed apartment near schools in Cotonou", "description": "Multi-hop proximity search" }, { "query": "Under 300k or has pool", "description": "Boolean OR query" }, { "query": "Compare average prices in Cotonou vs Calavi", "description": "Comparative analysis" }, { "query": "Best family apartment near schools and parks", "description": "Multi-factor ranking" }, ] for i, test in enumerate(test_queries, 1): print(f"\n{Colors.CYAN}Test {i}: {test['description']}{Colors.ENDC}") print(f"Query: \"{test['query']}\"") try: result = await rlm_search(test["query"]) print_success(f"Strategy used: {result.get('strategy_used', 'Unknown')}") print(f" Results: {len(result.get('results', []))} listings") print(f" LLM calls: {result.get('call_count', 'N/A')}") if result.get("reasoning_steps"): print(f" Reasoning steps:") for step in result["reasoning_steps"][:3]: print(f" - {step.get('step', 'unknown')}: {json.dumps(step, default=str)[:80]}...") if result.get("message"): print(f" Message: {result['message'][:100]}...") if result.get("comparison_data"): print(f" Comparison data available: Yes") except Exception as e: print_fail(f"Error: {str(e)}") return True # ============================================================================= # Test 4: Query Pattern Detection # ============================================================================= def test_pattern_detection(): """Test specific pattern detection in queries""" print_header("Test 4: Pattern Detection") from app.ai.services.rlm_query_analyzer import analyze_query_complexity # Test POI detection poi_queries = [ ("apartment near the school", "school"), ("house close to beach", "beach"), ("near the university campus", "university"), ("walking distance from hospital", "hospital"), ("close to the market", "market"), ("near the airport", "airport"), ] print(f"{Colors.BOLD}POI (Point of Interest) Detection:{Colors.ENDC}") for query, expected_poi in poi_queries: analysis = analyze_query_complexity(query) poi_found = any(expected_poi in p.lower() for p in analysis.detected_patterns) if poi_found: print_success(f"'{query}' → Detected '{expected_poi}'") else: print_fail(f"'{query}' → Expected '{expected_poi}', got {analysis.detected_patterns}") # Test French queries print(f"\n{Colors.BOLD}French Query Detection:{Colors.ENDC}") french_queries = [ ("appartement près de l'école", True), # Near school ("maison proche de la plage", True), # Close to beach ("comparer les prix", True), # Compare prices ("appartement 3 chambres à Cotonou", False), # Simple query ] for query, expected_rlm in french_queries: analysis = analyze_query_complexity(query) if analysis.use_rlm == expected_rlm: print_success(f"'{query}' → RLM={analysis.use_rlm}") else: print_fail(f"'{query}' → Expected RLM={expected_rlm}, got {analysis.use_rlm}") return True # ============================================================================= # Test 5: Distance Calculation # ============================================================================= def test_distance_calculation(): """Test the Haversine distance calculation""" print_header("Test 5: Distance Calculation (Haversine)") from app.ai.services.rlm_search_service import RLMSearchAgent agent = RLMSearchAgent() # Known distances (approximate) test_cases = [ # (lat1, lon1, lat2, lon2, expected_km, tolerance_km) (6.3654, 2.4183, 6.3700, 2.4200, 0.5, 0.3), # Nearby in Cotonou (6.3654, 2.4183, 6.4300, 2.3500, 10, 2), # Cross-city (6.3654, 2.4183, 6.5000, 2.0000, 50, 10), # Longer distance ] passed = 0 for lat1, lon1, lat2, lon2, expected, tolerance in test_cases: distance = agent._calculate_distance(lat1, lon1, lat2, lon2) within_tolerance = abs(distance - expected) <= tolerance if within_tolerance: passed += 1 print_success(f"({lat1}, {lon1}) → ({lat2}, {lon2}): {distance:.2f} km (expected ~{expected} km)") else: print_fail(f"({lat1}, {lon1}) → ({lat2}, {lon2}): {distance:.2f} km (expected ~{expected} km)") print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}") return passed == len(test_cases) # ============================================================================= # Test 6: OpenStreetMap POI Service # ============================================================================= async def test_osm_poi_service(): """Test the OpenStreetMap POI service integration""" print_header("Test 6: OpenStreetMap POI Service") print_info("This test makes real API calls to OpenStreetMap (FREE)") print_info("Testing: Nominatim geocoding + Overpass POI search\n") from app.ai.services.osm_poi_service import ( geocode_location, find_pois, find_pois_overpass, calculate_distance_km ) # Test 1: Geocoding print(f"{Colors.BOLD}1. Geocoding Test:{Colors.ENDC}") coords = await geocode_location("Cotonou, Benin") if coords: print_success(f"Geocoded 'Cotonou, Benin' → ({coords[0]:.4f}, {coords[1]:.4f})") else: print_fail("Failed to geocode 'Cotonou, Benin'") # Test 2: Find Schools print(f"\n{Colors.BOLD}2. Find Schools in Cotonou:{Colors.ENDC}") schools = await find_pois("school", "Cotonou, Benin", radius_km=3, limit=5) print(f" Found {len(schools)} schools:") for school in schools[:3]: print(f" - {school['name']} ({school['lat']:.4f}, {school['lon']:.4f})") # Test 3: Find Hospitals print(f"\n{Colors.BOLD}3. Find Hospitals in Cotonou:{Colors.ENDC}") hospitals = await find_pois("hospital", "Cotonou, Benin", radius_km=5, limit=5) print(f" Found {len(hospitals)} hospitals:") for hospital in hospitals[:3]: print(f" - {hospital['name']} ({hospital['lat']:.4f}, {hospital['lon']:.4f})") # Test 4: French POI type print(f"\n{Colors.BOLD}4. French POI Type 'plage' (beach):{Colors.ENDC}") beaches = await find_pois("plage", "Cotonou, Benin", radius_km=10, limit=5) print(f" Found {len(beaches)} beaches") # Test 5: Distance calculation print(f"\n{Colors.BOLD}5. Distance Calculation:{Colors.ENDC}") if coords and schools: dist = calculate_distance_km( coords[0], coords[1], schools[0]["lat"], schools[0]["lon"] ) print_success(f"Distance from Cotonou center to {schools[0]['name']}: {dist:.2f} km") # Test 6: Integration with RLM print(f"\n{Colors.BOLD}6. RLM Integration Test:{Colors.ENDC}") from app.ai.services.rlm_search_service import RLMSearchAgent agent = RLMSearchAgent() pois = await agent._find_poi_locations("school", "Cotonou, Benin") if pois: print_success(f"RLM agent found {len(pois)} schools via OSM") print(f" First result: {pois[0].get('name', 'Unknown')}") else: print_warning("RLM agent found no schools (may be network issue)") print(f"\n{Colors.BOLD}OSM Integration Complete!{Colors.ENDC}") return True # ============================================================================= # Main # ============================================================================= async def main(): """Run all tests""" print(f"\n{Colors.BOLD}{Colors.HEADER}") print("╔═══════════════════════════════════════════════════════════╗") print("║ RLM (Recursive Language Model) Test Suite for AIDA ║") print("╚═══════════════════════════════════════════════════════════╝") print(f"{Colors.ENDC}\n") live_mode = "--live" in sys.argv all_passed = True # Test 1: Query Analyzer (no LLM calls) if not test_query_analyzer(): all_passed = False # Test 2: Strategy Selector if not await test_strategy_selector(): all_passed = False # Test 3: Pattern Detection if not test_pattern_detection(): all_passed = False # Test 4: Distance Calculation if not test_distance_calculation(): all_passed = False # Test 5: OpenStreetMap POI Service await test_osm_poi_service() # Test 6: Live RLM Search (only if --live flag) if live_mode: print_warning("\nRunning LIVE tests with actual LLM calls...") await test_rlm_search_live() else: print_info("\nSkipping live LLM tests. Run with --live flag to include them.") print_info("Example: python test_rlm.py --live") # Summary print_header("Test Summary") if all_passed: print_success("All offline tests passed!") print_info("RLM is ready to use in AIDA.") else: print_fail("Some tests failed. Check the output above.") # Usage examples print(f"\n{Colors.BOLD}Usage Examples:{Colors.ENDC}") print(""" # In your code: from app.ai.services.rlm_search_service import rlm_search # Multi-hop search (near POI) results = await rlm_search("3-bed near schools in Cotonou") # Boolean OR results = await rlm_search("under 500k or has pool") # Comparative results = await rlm_search("compare Cotonou vs Calavi") # The brain.py automatically uses RLM when appropriate! """) if __name__ == "__main__": asyncio.run(main())