Spaces:

destinyebuka
/

AIDA

Running

App Files Files Community

AIDA / test_rlm.py

destinyebuka

fyp

bc0cd92 10 days ago

raw

history blame contribute delete

17.4 kB

	#!/usr/bin/env python3
	"""
	RLM (Recursive Language Model) Test Suite for AIDA

	Tests:
	1. Query Analyzer - Detect complex query types
	2. RLM Search Service - Execute recursive searches
	3. Integration - End-to-end flow

	Run with:
	python test_rlm.py
	python test_rlm.py --live # Run with actual LLM calls

	Author: AIDA Team
	Date: 2026-02-09
	"""

	import asyncio
	import sys
	import json
	from typing import List, Dict

	# Add project root to path
	sys.path.insert(0, ".")


	# =============================================================================
	# Color output for terminal
	# =============================================================================

	class Colors:
	HEADER = '\033[95m'
	BLUE = '\033[94m'
	CYAN = '\033[96m'
	GREEN = '\033[92m'
	WARNING = '\033[93m'
	FAIL = '\033[91m'
	ENDC = '\033[0m'
	BOLD = '\033[1m'


	def print_header(text: str):
	print(f"\n{Colors.HEADER}{Colors.BOLD}{'='*60}{Colors.ENDC}")
	print(f"{Colors.HEADER}{Colors.BOLD}{text}{Colors.ENDC}")
	print(f"{Colors.HEADER}{Colors.BOLD}{'='*60}{Colors.ENDC}\n")


	def print_success(text: str):
	print(f"{Colors.GREEN}✅ {text}{Colors.ENDC}")


	def print_fail(text: str):
	print(f"{Colors.FAIL}❌ {text}{Colors.ENDC}")


	def print_info(text: str):
	print(f"{Colors.CYAN}ℹ️ {text}{Colors.ENDC}")


	def print_warning(text: str):
	print(f"{Colors.WARNING}⚠️ {text}{Colors.ENDC}")


	# =============================================================================
	# Test 1: Query Analyzer
	# =============================================================================

	def test_query_analyzer():
	"""Test the RLM Query Analyzer"""
	print_header("Test 1: RLM Query Analyzer")

	from app.ai.services.rlm_query_analyzer import (
	analyze_query_complexity,
	QueryComplexity
	)

	test_cases = [
	# Multi-hop queries
	("3-bed apartment near international schools in Cotonou", QueryComplexity.MULTI_HOP),
	("House close to the beach in Calavi", QueryComplexity.MULTI_HOP),
	("Apartment within 2km of the airport", QueryComplexity.MULTI_HOP),
	("Find something near the university", QueryComplexity.MULTI_HOP),

	# Boolean OR queries
	("Under 500k XOF or has a pool", QueryComplexity.BOOLEAN_OR),
	("2-bedroom or 3-bedroom in Cotonou", QueryComplexity.BOOLEAN_OR),
	("Either furnished or with parking", QueryComplexity.BOOLEAN_OR),

	# Comparative queries
	("Compare prices in Cotonou vs Calavi", QueryComplexity.COMPARATIVE),
	("Which is cheaper: 2-bed in Cotonou or 3-bed in Calavi?", QueryComplexity.COMPARATIVE),
	("Difference between rent in Porto-Novo and Cotonou", QueryComplexity.COMPARATIVE),

	# Aggregation queries
	("What is the average price in Cotonou?", QueryComplexity.AGGREGATION),
	("How many 3-bed apartments are available?", QueryComplexity.AGGREGATION),
	("Total listings in Calavi", QueryComplexity.AGGREGATION),

	# Multi-factor queries
	("Best family apartment near schools and parks in safe area", QueryComplexity.MULTI_FACTOR),
	("Top luxury modern apartments with good security", QueryComplexity.MULTI_FACTOR),
	("Ideal quiet peaceful home for family", QueryComplexity.MULTI_FACTOR),

	# Simple queries (should NOT trigger RLM)
	("3-bed apartment in Cotonou", QueryComplexity.SIMPLE),
	("Houses under 500k", QueryComplexity.SIMPLE),
	("Furnished apartment for rent", QueryComplexity.SIMPLE),
	]

	passed = 0
	failed = 0

	for query, expected_complexity in test_cases:
	analysis = analyze_query_complexity(query)

	if analysis.complexity == expected_complexity:
	passed += 1
	print_success(f"'{query[:40]}...' → {analysis.complexity.value}")
	else:
	failed += 1
	print_fail(f"'{query[:40]}...'")
	print(f" Expected: {expected_complexity.value}")
	print(f" Got: {analysis.complexity.value}")
	print(f" Reasoning: {analysis.reasoning}")

	print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}")
	return failed == 0


	# =============================================================================
	# Test 2: Strategy Selector Integration
	# =============================================================================

	async def test_strategy_selector():
	"""Test that strategy selector correctly routes to RLM"""
	print_header("Test 2: Strategy Selector RLM Routing")

	from app.ai.services.search_strategy_selector import (
	select_search_strategy,
	SearchStrategy
	)

	test_cases = [
	# RLM strategies
	{
	"query": "3-bed near schools in Cotonou",
	"params": {"location": "Cotonou", "bedrooms": 3},
	"expected_rlm": True,
	"expected_strategy": SearchStrategy.RLM_MULTI_HOP
	},
	{
	"query": "Under 500k or has pool",
	"params": {"max_price": 500000},
	"expected_rlm": True,
	"expected_strategy": SearchStrategy.RLM_BOOLEAN_OR
	},
	{
	"query": "Compare Cotonou vs Calavi",
	"params": {},
	"expected_rlm": True,
	"expected_strategy": SearchStrategy.RLM_COMPARATIVE
	},

	# Traditional strategies (should NOT use RLM)
	{
	"query": "3-bed apartment in Cotonou under 500k",
	"params": {"location": "Cotonou", "bedrooms": 3, "max_price": 500000},
	"expected_rlm": False,
	"expected_strategy": SearchStrategy.MONGO_ONLY
	},
	]

	passed = 0
	failed = 0

	for case in test_cases:
	result = await select_search_strategy(case["query"], case["params"])

	rlm_match = result.get("use_rlm", False) == case["expected_rlm"]
	strategy_match = result["strategy"] == case["expected_strategy"]

	if rlm_match and strategy_match:
	passed += 1
	print_success(f"'{case['query'][:40]}...'")
	print(f" Strategy: {result['strategy'].value}")
	print(f" RLM: {result.get('use_rlm', False)}")
	else:
	failed += 1
	print_fail(f"'{case['query'][:40]}...'")
	print(f" Expected: {case['expected_strategy'].value}, RLM={case['expected_rlm']}")
	print(f" Got: {result['strategy'].value}, RLM={result.get('use_rlm', False)}")

	print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}")
	return failed == 0


	# =============================================================================
	# Test 3: RLM Search Service (LIVE)
	# =============================================================================

	async def test_rlm_search_live():
	"""Test the RLM Search Service with actual LLM calls"""
	print_header("Test 3: RLM Search Service (LIVE)")

	print_warning("This test makes actual API calls to DeepSeek LLM")
	print_info("Ensure DEEPSEEK_API_KEY is set in your environment\n")

	from app.ai.services.rlm_search_service import rlm_search

	test_queries = [
	{
	"query": "3-bed apartment near schools in Cotonou",
	"description": "Multi-hop proximity search"
	},
	{
	"query": "Under 300k or has pool",
	"description": "Boolean OR query"
	},
	{
	"query": "Compare average prices in Cotonou vs Calavi",
	"description": "Comparative analysis"
	},
	{
	"query": "Best family apartment near schools and parks",
	"description": "Multi-factor ranking"
	},
	]

	for i, test in enumerate(test_queries, 1):
	print(f"\n{Colors.CYAN}Test {i}: {test['description']}{Colors.ENDC}")
	print(f"Query: \"{test['query']}\"")

	try:
	result = await rlm_search(test["query"])

	print_success(f"Strategy used: {result.get('strategy_used', 'Unknown')}")
	print(f" Results: {len(result.get('results', []))} listings")
	print(f" LLM calls: {result.get('call_count', 'N/A')}")

	if result.get("reasoning_steps"):
	print(f" Reasoning steps:")
	for step in result["reasoning_steps"][:3]:
	print(f" - {step.get('step', 'unknown')}: {json.dumps(step, default=str)[:80]}...")

	if result.get("message"):
	print(f" Message: {result['message'][:100]}...")

	if result.get("comparison_data"):
	print(f" Comparison data available: Yes")

	except Exception as e:
	print_fail(f"Error: {str(e)}")

	return True


	# =============================================================================
	# Test 4: Query Pattern Detection
	# =============================================================================

	def test_pattern_detection():
	"""Test specific pattern detection in queries"""
	print_header("Test 4: Pattern Detection")

	from app.ai.services.rlm_query_analyzer import analyze_query_complexity

	# Test POI detection
	poi_queries = [
	("apartment near the school", "school"),
	("house close to beach", "beach"),
	("near the university campus", "university"),
	("walking distance from hospital", "hospital"),
	("close to the market", "market"),
	("near the airport", "airport"),
	]

	print(f"{Colors.BOLD}POI (Point of Interest) Detection:{Colors.ENDC}")
	for query, expected_poi in poi_queries:
	analysis = analyze_query_complexity(query)
	poi_found = any(expected_poi in p.lower() for p in analysis.detected_patterns)
	if poi_found:
	print_success(f"'{query}' → Detected '{expected_poi}'")
	else:
	print_fail(f"'{query}' → Expected '{expected_poi}', got {analysis.detected_patterns}")

	# Test French queries
	print(f"\n{Colors.BOLD}French Query Detection:{Colors.ENDC}")
	french_queries = [
	("appartement près de l'école", True), # Near school
	("maison proche de la plage", True), # Close to beach
	("comparer les prix", True), # Compare prices
	("appartement 3 chambres à Cotonou", False), # Simple query
	]

	for query, expected_rlm in french_queries:
	analysis = analyze_query_complexity(query)
	if analysis.use_rlm == expected_rlm:
	print_success(f"'{query}' → RLM={analysis.use_rlm}")
	else:
	print_fail(f"'{query}' → Expected RLM={expected_rlm}, got {analysis.use_rlm}")

	return True


	# =============================================================================
	# Test 5: Distance Calculation
	# =============================================================================

	def test_distance_calculation():
	"""Test the Haversine distance calculation"""
	print_header("Test 5: Distance Calculation (Haversine)")

	from app.ai.services.rlm_search_service import RLMSearchAgent

	agent = RLMSearchAgent()

	# Known distances (approximate)
	test_cases = [
	# (lat1, lon1, lat2, lon2, expected_km, tolerance_km)
	(6.3654, 2.4183, 6.3700, 2.4200, 0.5, 0.3), # Nearby in Cotonou
	(6.3654, 2.4183, 6.4300, 2.3500, 10, 2), # Cross-city
	(6.3654, 2.4183, 6.5000, 2.0000, 50, 10), # Longer distance
	]

	passed = 0
	for lat1, lon1, lat2, lon2, expected, tolerance in test_cases:
	distance = agent._calculate_distance(lat1, lon1, lat2, lon2)
	within_tolerance = abs(distance - expected) <= tolerance

	if within_tolerance:
	passed += 1
	print_success(f"({lat1}, {lon1}) → ({lat2}, {lon2}): {distance:.2f} km (expected ~{expected} km)")
	else:
	print_fail(f"({lat1}, {lon1}) → ({lat2}, {lon2}): {distance:.2f} km (expected ~{expected} km)")

	print(f"\n{Colors.BOLD}Results: {passed}/{len(test_cases)} passed{Colors.ENDC}")
	return passed == len(test_cases)


	# =============================================================================
	# Test 6: OpenStreetMap POI Service
	# =============================================================================

	async def test_osm_poi_service():
	"""Test the OpenStreetMap POI service integration"""
	print_header("Test 6: OpenStreetMap POI Service")

	print_info("This test makes real API calls to OpenStreetMap (FREE)")
	print_info("Testing: Nominatim geocoding + Overpass POI search\n")

	from app.ai.services.osm_poi_service import (
	geocode_location,
	find_pois,
	find_pois_overpass,
	calculate_distance_km
	)

	# Test 1: Geocoding
	print(f"{Colors.BOLD}1. Geocoding Test:{Colors.ENDC}")
	coords = await geocode_location("Cotonou, Benin")
	if coords:
	print_success(f"Geocoded 'Cotonou, Benin' → ({coords[0]:.4f}, {coords[1]:.4f})")
	else:
	print_fail("Failed to geocode 'Cotonou, Benin'")

	# Test 2: Find Schools
	print(f"\n{Colors.BOLD}2. Find Schools in Cotonou:{Colors.ENDC}")
	schools = await find_pois("school", "Cotonou, Benin", radius_km=3, limit=5)
	print(f" Found {len(schools)} schools:")
	for school in schools[:3]:
	print(f" - {school['name']} ({school['lat']:.4f}, {school['lon']:.4f})")

	# Test 3: Find Hospitals
	print(f"\n{Colors.BOLD}3. Find Hospitals in Cotonou:{Colors.ENDC}")
	hospitals = await find_pois("hospital", "Cotonou, Benin", radius_km=5, limit=5)
	print(f" Found {len(hospitals)} hospitals:")
	for hospital in hospitals[:3]:
	print(f" - {hospital['name']} ({hospital['lat']:.4f}, {hospital['lon']:.4f})")

	# Test 4: French POI type
	print(f"\n{Colors.BOLD}4. French POI Type 'plage' (beach):{Colors.ENDC}")
	beaches = await find_pois("plage", "Cotonou, Benin", radius_km=10, limit=5)
	print(f" Found {len(beaches)} beaches")

	# Test 5: Distance calculation
	print(f"\n{Colors.BOLD}5. Distance Calculation:{Colors.ENDC}")
	if coords and schools:
	dist = calculate_distance_km(
	coords[0], coords[1],
	schools[0]["lat"], schools[0]["lon"]
	)
	print_success(f"Distance from Cotonou center to {schools[0]['name']}: {dist:.2f} km")

	# Test 6: Integration with RLM
	print(f"\n{Colors.BOLD}6. RLM Integration Test:{Colors.ENDC}")
	from app.ai.services.rlm_search_service import RLMSearchAgent
	agent = RLMSearchAgent()

	pois = await agent._find_poi_locations("school", "Cotonou, Benin")
	if pois:
	print_success(f"RLM agent found {len(pois)} schools via OSM")
	print(f" First result: {pois[0].get('name', 'Unknown')}")
	else:
	print_warning("RLM agent found no schools (may be network issue)")

	print(f"\n{Colors.BOLD}OSM Integration Complete!{Colors.ENDC}")
	return True


	# =============================================================================
	# Main
	# =============================================================================

	async def main():
	"""Run all tests"""
	print(f"\n{Colors.BOLD}{Colors.HEADER}")
	print("╔═══════════════════════════════════════════════════════════╗")
	print("║ RLM (Recursive Language Model) Test Suite for AIDA ║")
	print("╚═══════════════════════════════════════════════════════════╝")
	print(f"{Colors.ENDC}\n")

	live_mode = "--live" in sys.argv

	all_passed = True

	# Test 1: Query Analyzer (no LLM calls)
	if not test_query_analyzer():
	all_passed = False

	# Test 2: Strategy Selector
	if not await test_strategy_selector():
	all_passed = False

	# Test 3: Pattern Detection
	if not test_pattern_detection():
	all_passed = False

	# Test 4: Distance Calculation
	if not test_distance_calculation():
	all_passed = False

	# Test 5: OpenStreetMap POI Service
	await test_osm_poi_service()

	# Test 6: Live RLM Search (only if --live flag)
	if live_mode:
	print_warning("\nRunning LIVE tests with actual LLM calls...")
	await test_rlm_search_live()
	else:
	print_info("\nSkipping live LLM tests. Run with --live flag to include them.")
	print_info("Example: python test_rlm.py --live")

	# Summary
	print_header("Test Summary")
	if all_passed:
	print_success("All offline tests passed!")
	print_info("RLM is ready to use in AIDA.")
	else:
	print_fail("Some tests failed. Check the output above.")

	# Usage examples
	print(f"\n{Colors.BOLD}Usage Examples:{Colors.ENDC}")
	print("""
	# In your code:
	from app.ai.services.rlm_search_service import rlm_search

	# Multi-hop search (near POI)
	results = await rlm_search("3-bed near schools in Cotonou")

	# Boolean OR
	results = await rlm_search("under 500k or has pool")

	# Comparative
	results = await rlm_search("compare Cotonou vs Calavi")

	# The brain.py automatically uses RLM when appropriate!
	""")


	if __name__ == "__main__":
	asyncio.run(main())