Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| RLM (Recursive Language Model) Test Suite for AIDA | |
| Tests: | |
| 1. Query Analyzer - Detect complex query types | |
| 2. RLM Search Service - Execute recursive searches | |
| 3. Integration - End-to-end flow | |
| Run with: | |
| python test_rlm.py | |
| python test_rlm.py --live # Run with actual LLM calls | |
| Author: AIDA Team | |
| Date: 2026-02-09 | |
| """ | |
| import asyncio | |
| import sys | |
| import json | |
| from typing import List, Dict | |
| # Add project root to path | |
| sys.path.insert(0, ".") | |
| # ============================================================================= | |
| # Color output for terminal | |
| # ============================================================================= | |
| class Colors: | |
| HEADER = '\033[95m' | |
| BLUE = '\033[94m' | |
| CYAN = '\033[96m' | |
| GREEN = '\033[92m' | |
| WARNING = '\033[93m' | |
| FAIL = '\033[91m' | |
| ENDC = '\033[0m' | |
| BOLD = '\033[1m' | |
| def print_header(text: str): | |
| print(f"\n{Colors.HEADER}{Colors.BOLD}{'='*60}{Colors.ENDC}") | |
| print(f"{Colors.HEADER}{Colors.BOLD}{text}{Colors.ENDC}") | |
| print(f"{Colors.HEADER}{Colors.BOLD}{'='*60}{Colors.ENDC}\n") | |
| def print_success(text: str): | |
| print(f"{Colors.GREEN}β {text}{Colors.ENDC}") | |
| def print_fail(text: str): | |
| print(f"{Colors.FAIL}β {text}{Colors.ENDC}") | |
| def print_info(text: str): | |
| print(f"{Colors.CYAN}βΉοΈ {text}{Colors.ENDC}") | |
| def print_warning(text: str): | |
| print(f"{Colors.WARNING}β οΈ {text}{Colors.ENDC}") | |
| # ============================================================================= | |
| # Test 1: Query Analyzer | |
| # ============================================================================= | |
| def test_query_analyzer(): | |
| """Test the RLM Query Analyzer""" | |
| print_header("Test 1: RLM Query Analyzer") | |
| from app.ai.services.rlm_query_analyzer import ( | |
| analyze_query_complexity, | |
| QueryComplexity | |
| ) | |
| test_cases = [ | |
| # Multi-hop queries | |
| ("3-bed apartment near international schools in Cotonou", QueryComplexity.MULTI_HOP), | |
| ("House close to the beach in Calavi", QueryComplexity.MULTI_HOP), | |
| ("Apartment within 2km of the airport", QueryComplexity.MULTI_HOP), | |
| ("Find something near the university", QueryComplexity.MULTI_HOP), | |
| # Boolean OR queries | |
| ("Under 500k XOF or has a pool", QueryComplexity.BOOLEAN_OR), | |
| ("2-bedroom or 3-bedroom in Cotonou", QueryComplexity.BOOLEAN_OR), | |
| ("Either furnished or with parking", QueryComplexity.BOOLEAN_OR), | |
| # Comparative queries | |
| ("Compare prices in Cotonou vs Calavi", QueryComplexity.COMPARATIVE), | |
| ("Which is cheaper: 2-bed in Cotonou or 3-bed in Calavi?", QueryComplexity.COMPARATIVE), | |
| ("Difference between rent in Porto-Novo and Cotonou", QueryComplexity.COMPARATIVE), | |
| # Aggregation queries | |
| ("What is the average price in Cotonou?", QueryComplexity.AGGREGATION), | |
| ("How many 3-bed apartments are available?", QueryComplexity.AGGREGATION), | |
| ("Total listings in Calavi", QueryComplexity.AGGREGATION), | |
| # Multi-factor queries | |
| ("Best family apartment near schools and parks in safe area", QueryComplexity.MULTI_FACTOR), | |
| ("Top luxury modern apartments with good security", QueryComplexity.MULTI_FACTOR), | |
| ("Ideal quiet peaceful home for family", QueryComplexity.MULTI_FACTOR), | |
| # Simple queries (should NOT trigger RLM) | |
| ("3-bed apartment in Cotonou", QueryComplexity.SIMPLE), | |
| ("Houses under 500k", QueryComplexity.SIMPLE), | |
| ("Furnished apartment for rent", QueryComplexity.SIMPLE), | |
| ] | |
| passed = 0 | |
| failed = 0 | |
| for query, expected_complexity in test_cases: | |
| analysis = analyze_query_complexity(query) | |
| if analysis.complexity == expected_complexity: | |
| passed += 1 | |
| print_success(f"'{query[:40]}...' β {analysis.complexity.value}") | |
| else: | |
| failed += 1 | |
| print_fail(f"'{query[:40]}...'") | |
| print(f" Expected: {expected_complexity.value}") | |
| print(f" Got: {analysis.complexity.value}") | |
| print(f" Reasoning: {analysis.reasoning}") | |
| print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}") | |
| return failed == 0 | |
| # ============================================================================= | |
| # Test 2: Strategy Selector Integration | |
| # ============================================================================= | |
| async def test_strategy_selector(): | |
| """Test that strategy selector correctly routes to RLM""" | |
| print_header("Test 2: Strategy Selector RLM Routing") | |
| from app.ai.services.search_strategy_selector import ( | |
| select_search_strategy, | |
| SearchStrategy | |
| ) | |
| test_cases = [ | |
| # RLM strategies | |
| { | |
| "query": "3-bed near schools in Cotonou", | |
| "params": {"location": "Cotonou", "bedrooms": 3}, | |
| "expected_rlm": True, | |
| "expected_strategy": SearchStrategy.RLM_MULTI_HOP | |
| }, | |
| { | |
| "query": "Under 500k or has pool", | |
| "params": {"max_price": 500000}, | |
| "expected_rlm": True, | |
| "expected_strategy": SearchStrategy.RLM_BOOLEAN_OR | |
| }, | |
| { | |
| "query": "Compare Cotonou vs Calavi", | |
| "params": {}, | |
| "expected_rlm": True, | |
| "expected_strategy": SearchStrategy.RLM_COMPARATIVE | |
| }, | |
| # Traditional strategies (should NOT use RLM) | |
| { | |
| "query": "3-bed apartment in Cotonou under 500k", | |
| "params": {"location": "Cotonou", "bedrooms": 3, "max_price": 500000}, | |
| "expected_rlm": False, | |
| "expected_strategy": SearchStrategy.MONGO_ONLY | |
| }, | |
| ] | |
| passed = 0 | |
| failed = 0 | |
| for case in test_cases: | |
| result = await select_search_strategy(case["query"], case["params"]) | |
| rlm_match = result.get("use_rlm", False) == case["expected_rlm"] | |
| strategy_match = result["strategy"] == case["expected_strategy"] | |
| if rlm_match and strategy_match: | |
| passed += 1 | |
| print_success(f"'{case['query'][:40]}...'") | |
| print(f" Strategy: {result['strategy'].value}") | |
| print(f" RLM: {result.get('use_rlm', False)}") | |
| else: | |
| failed += 1 | |
| print_fail(f"'{case['query'][:40]}...'") | |
| print(f" Expected: {case['expected_strategy'].value}, RLM={case['expected_rlm']}") | |
| print(f" Got: {result['strategy'].value}, RLM={result.get('use_rlm', False)}") | |
| print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}") | |
| return failed == 0 | |
| # ============================================================================= | |
| # Test 3: RLM Search Service (LIVE) | |
| # ============================================================================= | |
| async def test_rlm_search_live(): | |
| """Test the RLM Search Service with actual LLM calls""" | |
| print_header("Test 3: RLM Search Service (LIVE)") | |
| print_warning("This test makes actual API calls to DeepSeek LLM") | |
| print_info("Ensure DEEPSEEK_API_KEY is set in your environment\n") | |
| from app.ai.services.rlm_search_service import rlm_search | |
| test_queries = [ | |
| { | |
| "query": "3-bed apartment near schools in Cotonou", | |
| "description": "Multi-hop proximity search" | |
| }, | |
| { | |
| "query": "Under 300k or has pool", | |
| "description": "Boolean OR query" | |
| }, | |
| { | |
| "query": "Compare average prices in Cotonou vs Calavi", | |
| "description": "Comparative analysis" | |
| }, | |
| { | |
| "query": "Best family apartment near schools and parks", | |
| "description": "Multi-factor ranking" | |
| }, | |
| ] | |
| for i, test in enumerate(test_queries, 1): | |
| print(f"\n{Colors.CYAN}Test {i}: {test['description']}{Colors.ENDC}") | |
| print(f"Query: \"{test['query']}\"") | |
| try: | |
| result = await rlm_search(test["query"]) | |
| print_success(f"Strategy used: {result.get('strategy_used', 'Unknown')}") | |
| print(f" Results: {len(result.get('results', []))} listings") | |
| print(f" LLM calls: {result.get('call_count', 'N/A')}") | |
| if result.get("reasoning_steps"): | |
| print(f" Reasoning steps:") | |
| for step in result["reasoning_steps"][:3]: | |
| print(f" - {step.get('step', 'unknown')}: {json.dumps(step, default=str)[:80]}...") | |
| if result.get("message"): | |
| print(f" Message: {result['message'][:100]}...") | |
| if result.get("comparison_data"): | |
| print(f" Comparison data available: Yes") | |
| except Exception as e: | |
| print_fail(f"Error: {str(e)}") | |
| return True | |
| # ============================================================================= | |
| # Test 4: Query Pattern Detection | |
| # ============================================================================= | |
| def test_pattern_detection(): | |
| """Test specific pattern detection in queries""" | |
| print_header("Test 4: Pattern Detection") | |
| from app.ai.services.rlm_query_analyzer import analyze_query_complexity | |
| # Test POI detection | |
| poi_queries = [ | |
| ("apartment near the school", "school"), | |
| ("house close to beach", "beach"), | |
| ("near the university campus", "university"), | |
| ("walking distance from hospital", "hospital"), | |
| ("close to the market", "market"), | |
| ("near the airport", "airport"), | |
| ] | |
| print(f"{Colors.BOLD}POI (Point of Interest) Detection:{Colors.ENDC}") | |
| for query, expected_poi in poi_queries: | |
| analysis = analyze_query_complexity(query) | |
| poi_found = any(expected_poi in p.lower() for p in analysis.detected_patterns) | |
| if poi_found: | |
| print_success(f"'{query}' β Detected '{expected_poi}'") | |
| else: | |
| print_fail(f"'{query}' β Expected '{expected_poi}', got {analysis.detected_patterns}") | |
| # Test French queries | |
| print(f"\n{Colors.BOLD}French Query Detection:{Colors.ENDC}") | |
| french_queries = [ | |
| ("appartement près de l'école", True), # Near school | |
| ("maison proche de la plage", True), # Close to beach | |
| ("comparer les prix", True), # Compare prices | |
| ("appartement 3 chambres Γ Cotonou", False), # Simple query | |
| ] | |
| for query, expected_rlm in french_queries: | |
| analysis = analyze_query_complexity(query) | |
| if analysis.use_rlm == expected_rlm: | |
| print_success(f"'{query}' β RLM={analysis.use_rlm}") | |
| else: | |
| print_fail(f"'{query}' β Expected RLM={expected_rlm}, got {analysis.use_rlm}") | |
| return True | |
| # ============================================================================= | |
| # Test 5: Distance Calculation | |
| # ============================================================================= | |
| def test_distance_calculation(): | |
| """Test the Haversine distance calculation""" | |
| print_header("Test 5: Distance Calculation (Haversine)") | |
| from app.ai.services.rlm_search_service import RLMSearchAgent | |
| agent = RLMSearchAgent() | |
| # Known distances (approximate) | |
| test_cases = [ | |
| # (lat1, lon1, lat2, lon2, expected_km, tolerance_km) | |
| (6.3654, 2.4183, 6.3700, 2.4200, 0.5, 0.3), # Nearby in Cotonou | |
| (6.3654, 2.4183, 6.4300, 2.3500, 10, 2), # Cross-city | |
| (6.3654, 2.4183, 6.5000, 2.0000, 50, 10), # Longer distance | |
| ] | |
| passed = 0 | |
| for lat1, lon1, lat2, lon2, expected, tolerance in test_cases: | |
| distance = agent._calculate_distance(lat1, lon1, lat2, lon2) | |
| within_tolerance = abs(distance - expected) <= tolerance | |
| if within_tolerance: | |
| passed += 1 | |
| print_success(f"({lat1}, {lon1}) β ({lat2}, {lon2}): {distance:.2f} km (expected ~{expected} km)") | |
| else: | |
| print_fail(f"({lat1}, {lon1}) β ({lat2}, {lon2}): {distance:.2f} km (expected ~{expected} km)") | |
| print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}") | |
| return passed == len(test_cases) | |
| # ============================================================================= | |
| # Test 6: OpenStreetMap POI Service | |
| # ============================================================================= | |
| async def test_osm_poi_service(): | |
| """Test the OpenStreetMap POI service integration""" | |
| print_header("Test 6: OpenStreetMap POI Service") | |
| print_info("This test makes real API calls to OpenStreetMap (FREE)") | |
| print_info("Testing: Nominatim geocoding + Overpass POI search\n") | |
| from app.ai.services.osm_poi_service import ( | |
| geocode_location, | |
| find_pois, | |
| find_pois_overpass, | |
| calculate_distance_km | |
| ) | |
| # Test 1: Geocoding | |
| print(f"{Colors.BOLD}1. Geocoding Test:{Colors.ENDC}") | |
| coords = await geocode_location("Cotonou, Benin") | |
| if coords: | |
| print_success(f"Geocoded 'Cotonou, Benin' β ({coords[0]:.4f}, {coords[1]:.4f})") | |
| else: | |
| print_fail("Failed to geocode 'Cotonou, Benin'") | |
| # Test 2: Find Schools | |
| print(f"\n{Colors.BOLD}2. Find Schools in Cotonou:{Colors.ENDC}") | |
| schools = await find_pois("school", "Cotonou, Benin", radius_km=3, limit=5) | |
| print(f" Found {len(schools)} schools:") | |
| for school in schools[:3]: | |
| print(f" - {school['name']} ({school['lat']:.4f}, {school['lon']:.4f})") | |
| # Test 3: Find Hospitals | |
| print(f"\n{Colors.BOLD}3. Find Hospitals in Cotonou:{Colors.ENDC}") | |
| hospitals = await find_pois("hospital", "Cotonou, Benin", radius_km=5, limit=5) | |
| print(f" Found {len(hospitals)} hospitals:") | |
| for hospital in hospitals[:3]: | |
| print(f" - {hospital['name']} ({hospital['lat']:.4f}, {hospital['lon']:.4f})") | |
| # Test 4: French POI type | |
| print(f"\n{Colors.BOLD}4. French POI Type 'plage' (beach):{Colors.ENDC}") | |
| beaches = await find_pois("plage", "Cotonou, Benin", radius_km=10, limit=5) | |
| print(f" Found {len(beaches)} beaches") | |
| # Test 5: Distance calculation | |
| print(f"\n{Colors.BOLD}5. Distance Calculation:{Colors.ENDC}") | |
| if coords and schools: | |
| dist = calculate_distance_km( | |
| coords[0], coords[1], | |
| schools[0]["lat"], schools[0]["lon"] | |
| ) | |
| print_success(f"Distance from Cotonou center to {schools[0]['name']}: {dist:.2f} km") | |
| # Test 6: Integration with RLM | |
| print(f"\n{Colors.BOLD}6. RLM Integration Test:{Colors.ENDC}") | |
| from app.ai.services.rlm_search_service import RLMSearchAgent | |
| agent = RLMSearchAgent() | |
| pois = await agent._find_poi_locations("school", "Cotonou, Benin") | |
| if pois: | |
| print_success(f"RLM agent found {len(pois)} schools via OSM") | |
| print(f" First result: {pois[0].get('name', 'Unknown')}") | |
| else: | |
| print_warning("RLM agent found no schools (may be network issue)") | |
| print(f"\n{Colors.BOLD}OSM Integration Complete!{Colors.ENDC}") | |
| return True | |
| # ============================================================================= | |
| # Main | |
| # ============================================================================= | |
| async def main(): | |
| """Run all tests""" | |
| print(f"\n{Colors.BOLD}{Colors.HEADER}") | |
| print("βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ") | |
| print("β RLM (Recursive Language Model) Test Suite for AIDA β") | |
| print("βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ") | |
| print(f"{Colors.ENDC}\n") | |
| live_mode = "--live" in sys.argv | |
| all_passed = True | |
| # Test 1: Query Analyzer (no LLM calls) | |
| if not test_query_analyzer(): | |
| all_passed = False | |
| # Test 2: Strategy Selector | |
| if not await test_strategy_selector(): | |
| all_passed = False | |
| # Test 3: Pattern Detection | |
| if not test_pattern_detection(): | |
| all_passed = False | |
| # Test 4: Distance Calculation | |
| if not test_distance_calculation(): | |
| all_passed = False | |
| # Test 5: OpenStreetMap POI Service | |
| await test_osm_poi_service() | |
| # Test 6: Live RLM Search (only if --live flag) | |
| if live_mode: | |
| print_warning("\nRunning LIVE tests with actual LLM calls...") | |
| await test_rlm_search_live() | |
| else: | |
| print_info("\nSkipping live LLM tests. Run with --live flag to include them.") | |
| print_info("Example: python test_rlm.py --live") | |
| # Summary | |
| print_header("Test Summary") | |
| if all_passed: | |
| print_success("All offline tests passed!") | |
| print_info("RLM is ready to use in AIDA.") | |
| else: | |
| print_fail("Some tests failed. Check the output above.") | |
| # Usage examples | |
| print(f"\n{Colors.BOLD}Usage Examples:{Colors.ENDC}") | |
| print(""" | |
| # In your code: | |
| from app.ai.services.rlm_search_service import rlm_search | |
| # Multi-hop search (near POI) | |
| results = await rlm_search("3-bed near schools in Cotonou") | |
| # Boolean OR | |
| results = await rlm_search("under 500k or has pool") | |
| # Comparative | |
| results = await rlm_search("compare Cotonou vs Calavi") | |
| # The brain.py automatically uses RLM when appropriate! | |
| """) | |
| if __name__ == "__main__": | |
| asyncio.run(main()) | |