AIDA / test_rlm.py
destinyebuka's picture
fyp
bc0cd92
#!/usr/bin/env python3
"""
RLM (Recursive Language Model) Test Suite for AIDA
Tests:
1. Query Analyzer - Detect complex query types
2. RLM Search Service - Execute recursive searches
3. Integration - End-to-end flow
Run with:
python test_rlm.py
python test_rlm.py --live # Run with actual LLM calls
Author: AIDA Team
Date: 2026-02-09
"""
import asyncio
import sys
import json
from typing import List, Dict
# Add project root to path
sys.path.insert(0, ".")
# =============================================================================
# Color output for terminal
# =============================================================================
class Colors:
HEADER = '\033[95m'
BLUE = '\033[94m'
CYAN = '\033[96m'
GREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
def print_header(text: str):
print(f"\n{Colors.HEADER}{Colors.BOLD}{'='*60}{Colors.ENDC}")
print(f"{Colors.HEADER}{Colors.BOLD}{text}{Colors.ENDC}")
print(f"{Colors.HEADER}{Colors.BOLD}{'='*60}{Colors.ENDC}\n")
def print_success(text: str):
print(f"{Colors.GREEN}βœ… {text}{Colors.ENDC}")
def print_fail(text: str):
print(f"{Colors.FAIL}❌ {text}{Colors.ENDC}")
def print_info(text: str):
print(f"{Colors.CYAN}ℹ️ {text}{Colors.ENDC}")
def print_warning(text: str):
print(f"{Colors.WARNING}⚠️ {text}{Colors.ENDC}")
# =============================================================================
# Test 1: Query Analyzer
# =============================================================================
def test_query_analyzer():
"""Test the RLM Query Analyzer"""
print_header("Test 1: RLM Query Analyzer")
from app.ai.services.rlm_query_analyzer import (
analyze_query_complexity,
QueryComplexity
)
test_cases = [
# Multi-hop queries
("3-bed apartment near international schools in Cotonou", QueryComplexity.MULTI_HOP),
("House close to the beach in Calavi", QueryComplexity.MULTI_HOP),
("Apartment within 2km of the airport", QueryComplexity.MULTI_HOP),
("Find something near the university", QueryComplexity.MULTI_HOP),
# Boolean OR queries
("Under 500k XOF or has a pool", QueryComplexity.BOOLEAN_OR),
("2-bedroom or 3-bedroom in Cotonou", QueryComplexity.BOOLEAN_OR),
("Either furnished or with parking", QueryComplexity.BOOLEAN_OR),
# Comparative queries
("Compare prices in Cotonou vs Calavi", QueryComplexity.COMPARATIVE),
("Which is cheaper: 2-bed in Cotonou or 3-bed in Calavi?", QueryComplexity.COMPARATIVE),
("Difference between rent in Porto-Novo and Cotonou", QueryComplexity.COMPARATIVE),
# Aggregation queries
("What is the average price in Cotonou?", QueryComplexity.AGGREGATION),
("How many 3-bed apartments are available?", QueryComplexity.AGGREGATION),
("Total listings in Calavi", QueryComplexity.AGGREGATION),
# Multi-factor queries
("Best family apartment near schools and parks in safe area", QueryComplexity.MULTI_FACTOR),
("Top luxury modern apartments with good security", QueryComplexity.MULTI_FACTOR),
("Ideal quiet peaceful home for family", QueryComplexity.MULTI_FACTOR),
# Simple queries (should NOT trigger RLM)
("3-bed apartment in Cotonou", QueryComplexity.SIMPLE),
("Houses under 500k", QueryComplexity.SIMPLE),
("Furnished apartment for rent", QueryComplexity.SIMPLE),
]
passed = 0
failed = 0
for query, expected_complexity in test_cases:
analysis = analyze_query_complexity(query)
if analysis.complexity == expected_complexity:
passed += 1
print_success(f"'{query[:40]}...' β†’ {analysis.complexity.value}")
else:
failed += 1
print_fail(f"'{query[:40]}...'")
print(f" Expected: {expected_complexity.value}")
print(f" Got: {analysis.complexity.value}")
print(f" Reasoning: {analysis.reasoning}")
print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}")
return failed == 0
# =============================================================================
# Test 2: Strategy Selector Integration
# =============================================================================
async def test_strategy_selector():
"""Test that strategy selector correctly routes to RLM"""
print_header("Test 2: Strategy Selector RLM Routing")
from app.ai.services.search_strategy_selector import (
select_search_strategy,
SearchStrategy
)
test_cases = [
# RLM strategies
{
"query": "3-bed near schools in Cotonou",
"params": {"location": "Cotonou", "bedrooms": 3},
"expected_rlm": True,
"expected_strategy": SearchStrategy.RLM_MULTI_HOP
},
{
"query": "Under 500k or has pool",
"params": {"max_price": 500000},
"expected_rlm": True,
"expected_strategy": SearchStrategy.RLM_BOOLEAN_OR
},
{
"query": "Compare Cotonou vs Calavi",
"params": {},
"expected_rlm": True,
"expected_strategy": SearchStrategy.RLM_COMPARATIVE
},
# Traditional strategies (should NOT use RLM)
{
"query": "3-bed apartment in Cotonou under 500k",
"params": {"location": "Cotonou", "bedrooms": 3, "max_price": 500000},
"expected_rlm": False,
"expected_strategy": SearchStrategy.MONGO_ONLY
},
]
passed = 0
failed = 0
for case in test_cases:
result = await select_search_strategy(case["query"], case["params"])
rlm_match = result.get("use_rlm", False) == case["expected_rlm"]
strategy_match = result["strategy"] == case["expected_strategy"]
if rlm_match and strategy_match:
passed += 1
print_success(f"'{case['query'][:40]}...'")
print(f" Strategy: {result['strategy'].value}")
print(f" RLM: {result.get('use_rlm', False)}")
else:
failed += 1
print_fail(f"'{case['query'][:40]}...'")
print(f" Expected: {case['expected_strategy'].value}, RLM={case['expected_rlm']}")
print(f" Got: {result['strategy'].value}, RLM={result.get('use_rlm', False)}")
print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}")
return failed == 0
# =============================================================================
# Test 3: RLM Search Service (LIVE)
# =============================================================================
async def test_rlm_search_live():
"""Test the RLM Search Service with actual LLM calls"""
print_header("Test 3: RLM Search Service (LIVE)")
print_warning("This test makes actual API calls to DeepSeek LLM")
print_info("Ensure DEEPSEEK_API_KEY is set in your environment\n")
from app.ai.services.rlm_search_service import rlm_search
test_queries = [
{
"query": "3-bed apartment near schools in Cotonou",
"description": "Multi-hop proximity search"
},
{
"query": "Under 300k or has pool",
"description": "Boolean OR query"
},
{
"query": "Compare average prices in Cotonou vs Calavi",
"description": "Comparative analysis"
},
{
"query": "Best family apartment near schools and parks",
"description": "Multi-factor ranking"
},
]
for i, test in enumerate(test_queries, 1):
print(f"\n{Colors.CYAN}Test {i}: {test['description']}{Colors.ENDC}")
print(f"Query: \"{test['query']}\"")
try:
result = await rlm_search(test["query"])
print_success(f"Strategy used: {result.get('strategy_used', 'Unknown')}")
print(f" Results: {len(result.get('results', []))} listings")
print(f" LLM calls: {result.get('call_count', 'N/A')}")
if result.get("reasoning_steps"):
print(f" Reasoning steps:")
for step in result["reasoning_steps"][:3]:
print(f" - {step.get('step', 'unknown')}: {json.dumps(step, default=str)[:80]}...")
if result.get("message"):
print(f" Message: {result['message'][:100]}...")
if result.get("comparison_data"):
print(f" Comparison data available: Yes")
except Exception as e:
print_fail(f"Error: {str(e)}")
return True
# =============================================================================
# Test 4: Query Pattern Detection
# =============================================================================
def test_pattern_detection():
"""Test specific pattern detection in queries"""
print_header("Test 4: Pattern Detection")
from app.ai.services.rlm_query_analyzer import analyze_query_complexity
# Test POI detection
poi_queries = [
("apartment near the school", "school"),
("house close to beach", "beach"),
("near the university campus", "university"),
("walking distance from hospital", "hospital"),
("close to the market", "market"),
("near the airport", "airport"),
]
print(f"{Colors.BOLD}POI (Point of Interest) Detection:{Colors.ENDC}")
for query, expected_poi in poi_queries:
analysis = analyze_query_complexity(query)
poi_found = any(expected_poi in p.lower() for p in analysis.detected_patterns)
if poi_found:
print_success(f"'{query}' β†’ Detected '{expected_poi}'")
else:
print_fail(f"'{query}' β†’ Expected '{expected_poi}', got {analysis.detected_patterns}")
# Test French queries
print(f"\n{Colors.BOLD}French Query Detection:{Colors.ENDC}")
french_queries = [
("appartement près de l'école", True), # Near school
("maison proche de la plage", True), # Close to beach
("comparer les prix", True), # Compare prices
("appartement 3 chambres Γ  Cotonou", False), # Simple query
]
for query, expected_rlm in french_queries:
analysis = analyze_query_complexity(query)
if analysis.use_rlm == expected_rlm:
print_success(f"'{query}' β†’ RLM={analysis.use_rlm}")
else:
print_fail(f"'{query}' β†’ Expected RLM={expected_rlm}, got {analysis.use_rlm}")
return True
# =============================================================================
# Test 5: Distance Calculation
# =============================================================================
def test_distance_calculation():
"""Test the Haversine distance calculation"""
print_header("Test 5: Distance Calculation (Haversine)")
from app.ai.services.rlm_search_service import RLMSearchAgent
agent = RLMSearchAgent()
# Known distances (approximate)
test_cases = [
# (lat1, lon1, lat2, lon2, expected_km, tolerance_km)
(6.3654, 2.4183, 6.3700, 2.4200, 0.5, 0.3), # Nearby in Cotonou
(6.3654, 2.4183, 6.4300, 2.3500, 10, 2), # Cross-city
(6.3654, 2.4183, 6.5000, 2.0000, 50, 10), # Longer distance
]
passed = 0
for lat1, lon1, lat2, lon2, expected, tolerance in test_cases:
distance = agent._calculate_distance(lat1, lon1, lat2, lon2)
within_tolerance = abs(distance - expected) <= tolerance
if within_tolerance:
passed += 1
print_success(f"({lat1}, {lon1}) β†’ ({lat2}, {lon2}): {distance:.2f} km (expected ~{expected} km)")
else:
print_fail(f"({lat1}, {lon1}) β†’ ({lat2}, {lon2}): {distance:.2f} km (expected ~{expected} km)")
print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}")
return passed == len(test_cases)
# =============================================================================
# Test 6: OpenStreetMap POI Service
# =============================================================================
async def test_osm_poi_service():
"""Test the OpenStreetMap POI service integration"""
print_header("Test 6: OpenStreetMap POI Service")
print_info("This test makes real API calls to OpenStreetMap (FREE)")
print_info("Testing: Nominatim geocoding + Overpass POI search\n")
from app.ai.services.osm_poi_service import (
geocode_location,
find_pois,
find_pois_overpass,
calculate_distance_km
)
# Test 1: Geocoding
print(f"{Colors.BOLD}1. Geocoding Test:{Colors.ENDC}")
coords = await geocode_location("Cotonou, Benin")
if coords:
print_success(f"Geocoded 'Cotonou, Benin' β†’ ({coords[0]:.4f}, {coords[1]:.4f})")
else:
print_fail("Failed to geocode 'Cotonou, Benin'")
# Test 2: Find Schools
print(f"\n{Colors.BOLD}2. Find Schools in Cotonou:{Colors.ENDC}")
schools = await find_pois("school", "Cotonou, Benin", radius_km=3, limit=5)
print(f" Found {len(schools)} schools:")
for school in schools[:3]:
print(f" - {school['name']} ({school['lat']:.4f}, {school['lon']:.4f})")
# Test 3: Find Hospitals
print(f"\n{Colors.BOLD}3. Find Hospitals in Cotonou:{Colors.ENDC}")
hospitals = await find_pois("hospital", "Cotonou, Benin", radius_km=5, limit=5)
print(f" Found {len(hospitals)} hospitals:")
for hospital in hospitals[:3]:
print(f" - {hospital['name']} ({hospital['lat']:.4f}, {hospital['lon']:.4f})")
# Test 4: French POI type
print(f"\n{Colors.BOLD}4. French POI Type 'plage' (beach):{Colors.ENDC}")
beaches = await find_pois("plage", "Cotonou, Benin", radius_km=10, limit=5)
print(f" Found {len(beaches)} beaches")
# Test 5: Distance calculation
print(f"\n{Colors.BOLD}5. Distance Calculation:{Colors.ENDC}")
if coords and schools:
dist = calculate_distance_km(
coords[0], coords[1],
schools[0]["lat"], schools[0]["lon"]
)
print_success(f"Distance from Cotonou center to {schools[0]['name']}: {dist:.2f} km")
# Test 6: Integration with RLM
print(f"\n{Colors.BOLD}6. RLM Integration Test:{Colors.ENDC}")
from app.ai.services.rlm_search_service import RLMSearchAgent
agent = RLMSearchAgent()
pois = await agent._find_poi_locations("school", "Cotonou, Benin")
if pois:
print_success(f"RLM agent found {len(pois)} schools via OSM")
print(f" First result: {pois[0].get('name', 'Unknown')}")
else:
print_warning("RLM agent found no schools (may be network issue)")
print(f"\n{Colors.BOLD}OSM Integration Complete!{Colors.ENDC}")
return True
# =============================================================================
# Main
# =============================================================================
async def main():
"""Run all tests"""
print(f"\n{Colors.BOLD}{Colors.HEADER}")
print("╔═══════════════════════════════════════════════════════════╗")
print("β•‘ RLM (Recursive Language Model) Test Suite for AIDA β•‘")
print("β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•")
print(f"{Colors.ENDC}\n")
live_mode = "--live" in sys.argv
all_passed = True
# Test 1: Query Analyzer (no LLM calls)
if not test_query_analyzer():
all_passed = False
# Test 2: Strategy Selector
if not await test_strategy_selector():
all_passed = False
# Test 3: Pattern Detection
if not test_pattern_detection():
all_passed = False
# Test 4: Distance Calculation
if not test_distance_calculation():
all_passed = False
# Test 5: OpenStreetMap POI Service
await test_osm_poi_service()
# Test 6: Live RLM Search (only if --live flag)
if live_mode:
print_warning("\nRunning LIVE tests with actual LLM calls...")
await test_rlm_search_live()
else:
print_info("\nSkipping live LLM tests. Run with --live flag to include them.")
print_info("Example: python test_rlm.py --live")
# Summary
print_header("Test Summary")
if all_passed:
print_success("All offline tests passed!")
print_info("RLM is ready to use in AIDA.")
else:
print_fail("Some tests failed. Check the output above.")
# Usage examples
print(f"\n{Colors.BOLD}Usage Examples:{Colors.ENDC}")
print("""
# In your code:
from app.ai.services.rlm_search_service import rlm_search
# Multi-hop search (near POI)
results = await rlm_search("3-bed near schools in Cotonou")
# Boolean OR
results = await rlm_search("under 500k or has pool")
# Comparative
results = await rlm_search("compare Cotonou vs Calavi")
# The brain.py automatically uses RLM when appropriate!
""")
if __name__ == "__main__":
asyncio.run(main())