Spaces:
Sleeping
Sleeping
| """Test the GIS recommender with dimensionality reduction.""" | |
| import numpy as np | |
| from recommender.gis_recommender import ( | |
| EmbeddingReducer, | |
| HybridSemanticSpatialEncoder, | |
| generate_mock_clients, | |
| generate_seed_donor_profile, | |
| GISRecommender, | |
| ClientProfile, | |
| HousingType, | |
| ) | |
| def test_embedding_reducer(): | |
| """Test the embedding reducer.""" | |
| print("=" * 50) | |
| print("Testing EmbeddingReducer") | |
| print("=" * 50) | |
| # Create sample sparse embeddings (like SEA-LION output) | |
| sample_embedding = np.zeros(1024) | |
| sample_embedding[0] = 0.206 | |
| sample_embedding[1] = -0.198 | |
| sample_embedding[10] = 0.178 | |
| sample_embedding[50] = -0.145 | |
| sample_embedding[100] = 0.234 | |
| sample_embedding[200] = -0.167 | |
| sample_embedding[500] = 0.189 | |
| sample_embedding[800] = -0.156 | |
| # Test sparse projection | |
| reduced = EmbeddingReducer.compute_sparse_projection( | |
| sample_embedding, n_components=8 | |
| ) | |
| print(f"Original dims: {len(sample_embedding)}") | |
| print(f"Reduced dims: {len(reduced)}") | |
| print(f"Reduced values: {reduced}") | |
| print(f"Reduced norm: {np.linalg.norm(reduced):.4f}") | |
| print() | |
| def test_hybrid_encoder(): | |
| """Test the hybrid semantic-spatial encoder.""" | |
| print("=" * 50) | |
| print("Testing HybridSemanticSpatialEncoder") | |
| print("=" * 50) | |
| encoder = HybridSemanticSpatialEncoder(semantic_dims=8) | |
| # Test with sample embedding and Singapore coordinates | |
| embedding = np.random.randn(1024) | |
| coords = (1.3521, 103.8198) # Singapore center | |
| hybrid = encoder.encode(embedding, coords) | |
| print(f"Hybrid vector dims: {len(hybrid)}") # Should be 8 + 2 = 10 | |
| print(f"Hybrid values: {hybrid}") | |
| # Test similarity between nearby points | |
| coords2 = (1.3525, 103.8195) # Very close | |
| hybrid2 = encoder.encode(embedding, coords2) | |
| sim_same = encoder.compute_similarity(hybrid, hybrid2) | |
| print(f"Similarity (same embedding, close coords): {sim_same:.4f}") | |
| # Test with different embedding | |
| embedding3 = np.random.randn(1024) | |
| hybrid3 = encoder.encode(embedding3, coords) | |
| sim_diff = encoder.compute_similarity(hybrid, hybrid3) | |
| print(f"Similarity (diff embedding, same coords): {sim_diff:.4f}") | |
| print() | |
| def test_mock_clients(): | |
| """Test mock client generation with embeddings.""" | |
| print("=" * 50) | |
| print("Testing Mock Client Generation") | |
| print("=" * 50) | |
| seed = generate_seed_donor_profile("education") | |
| print(f"Seed profile: {seed.user_id}") | |
| print(f" - Causes: {seed.causes}") | |
| print(f" - Full embedding dims: {len(seed.embedding)}") | |
| print(f" - Reduced embedding dims: {len(seed.embedding_reduced)}") | |
| print(f" - Hybrid embedding dims: {len(seed.hybrid_embedding)}") | |
| print() | |
| clients = generate_mock_clients(10) | |
| print(f"Generated {len(clients)} mock clients") | |
| for i, c in enumerate(clients[:3]): | |
| print(f" Client {i}: {c.user_id}") | |
| print(f" - Area: {c.planning_area}, Housing: {c.housing_type.value}") | |
| print(f" - Causes: {c.causes}") | |
| print( | |
| f" - Has embeddings: full={c.embedding is not None}, reduced={c.embedding_reduced is not None}" | |
| ) | |
| print() | |
| def test_hybrid_lookalike(): | |
| """Test hybrid lookalike matching.""" | |
| print("=" * 50) | |
| print("Testing Hybrid Lookalike Matching") | |
| print("=" * 50) | |
| seed = generate_seed_donor_profile("education") | |
| candidates = generate_mock_clients(50) | |
| recommender = GISRecommender() | |
| # Find lookalikes without filters | |
| results = recommender.find_lookalikes_hybrid( | |
| seed_profile=seed, | |
| candidates=candidates, | |
| k=10, | |
| ) | |
| print(f"Found {len(results)} lookalikes") | |
| print("\nTop 5 matches:") | |
| for i, r in enumerate(results[:5]): | |
| print(f" {i+1}. {r.client.user_id}") | |
| print( | |
| f" Score: {r.final_score:.3f} (vector={r.vector_similarity_score:.3f}, spatial={r.spatial_proxy_score:.3f}, prox={r.proximity_score:.3f})" | |
| ) | |
| print(f" Causes: {r.client.causes}") | |
| print(f" Distance: {r.geo_distance_km:.2f} km") | |
| print() | |
| # Test with planning area filter | |
| print("\nWith planning area filter (bishan):") | |
| results_filtered = recommender.find_lookalikes_hybrid( | |
| seed_profile=seed, | |
| candidates=candidates, | |
| k=10, | |
| planning_area_filter="bishan", | |
| ) | |
| print(f"Found {len(results_filtered)} matches in Bishan") | |
| for r in results_filtered[:3]: | |
| print(f" - {r.client.user_id}: {r.final_score:.3f}") | |
| def test_tiered_targeting(): | |
| """Test tiered targeting.""" | |
| print("=" * 50) | |
| print("Testing Tiered Targeting") | |
| print("=" * 50) | |
| seed = generate_seed_donor_profile("education") | |
| candidates = generate_mock_clients(100) | |
| recommender = GISRecommender() | |
| results = recommender.find_lookalikes_hybrid( | |
| seed_profile=seed, | |
| candidates=candidates, | |
| k=30, | |
| ) | |
| tiered = recommender.apply_tiered_targeting(results, min_score=0.0) | |
| print(f"Tier 1 (High Priority): {len(tiered['tier_1'])} clients") | |
| print(f"Tier 2 (Medium Priority): {len(tiered['tier_2'])} clients") | |
| print(f"Tier 3 (Lower Priority): {len(tiered['tier_3'])} clients") | |
| if tiered["tier_1"]: | |
| print( | |
| f"\nTier 1 score range: {tiered['tier_1'][-1].final_score:.3f} - {tiered['tier_1'][0].final_score:.3f}" | |
| ) | |
| if tiered["tier_3"]: | |
| print( | |
| f"Tier 3 score range: {tiered['tier_3'][-1].final_score:.3f} - {tiered['tier_3'][0].final_score:.3f}" | |
| ) | |
| def test_geojson_export(): | |
| """Test GeoJSON export.""" | |
| print("=" * 50) | |
| print("Testing GeoJSON Export") | |
| print("=" * 50) | |
| seed = generate_seed_donor_profile("education") | |
| candidates = generate_mock_clients(20) | |
| recommender = GISRecommender() | |
| results = recommender.find_lookalikes_hybrid(seed, candidates, k=10) | |
| geojson = recommender.to_geojson(results) | |
| print(f"GeoJSON type: {geojson['type']}") | |
| print(f"Number of features: {len(geojson['features'])}") | |
| if geojson["features"]: | |
| feat = geojson["features"][0] | |
| print(f"\nSample feature:") | |
| print( | |
| f" Geometry: {feat['geometry']['type']} at {feat['geometry']['coordinates']}" | |
| ) | |
| print(f" Properties: {list(feat['properties'].keys())}") | |
| if __name__ == "__main__": | |
| test_embedding_reducer() | |
| test_hybrid_encoder() | |
| test_mock_clients() | |
| test_hybrid_lookalike() | |
| test_tiered_targeting() | |
| test_geojson_export() | |
| print("\n" + "=" * 50) | |
| print("All tests passed!") | |
| print("=" * 50) | |