ollama-api-proxy / test_gis.py
GitHub Actions
Sync from GitHub
1d32142
"""Test the GIS recommender with dimensionality reduction."""
import numpy as np
from recommender.gis_recommender import (
EmbeddingReducer,
HybridSemanticSpatialEncoder,
generate_mock_clients,
generate_seed_donor_profile,
GISRecommender,
ClientProfile,
HousingType,
)
def test_embedding_reducer():
"""Test the embedding reducer."""
print("=" * 50)
print("Testing EmbeddingReducer")
print("=" * 50)
# Create sample sparse embeddings (like SEA-LION output)
sample_embedding = np.zeros(1024)
sample_embedding[0] = 0.206
sample_embedding[1] = -0.198
sample_embedding[10] = 0.178
sample_embedding[50] = -0.145
sample_embedding[100] = 0.234
sample_embedding[200] = -0.167
sample_embedding[500] = 0.189
sample_embedding[800] = -0.156
# Test sparse projection
reduced = EmbeddingReducer.compute_sparse_projection(
sample_embedding, n_components=8
)
print(f"Original dims: {len(sample_embedding)}")
print(f"Reduced dims: {len(reduced)}")
print(f"Reduced values: {reduced}")
print(f"Reduced norm: {np.linalg.norm(reduced):.4f}")
print()
def test_hybrid_encoder():
"""Test the hybrid semantic-spatial encoder."""
print("=" * 50)
print("Testing HybridSemanticSpatialEncoder")
print("=" * 50)
encoder = HybridSemanticSpatialEncoder(semantic_dims=8)
# Test with sample embedding and Singapore coordinates
embedding = np.random.randn(1024)
coords = (1.3521, 103.8198) # Singapore center
hybrid = encoder.encode(embedding, coords)
print(f"Hybrid vector dims: {len(hybrid)}") # Should be 8 + 2 = 10
print(f"Hybrid values: {hybrid}")
# Test similarity between nearby points
coords2 = (1.3525, 103.8195) # Very close
hybrid2 = encoder.encode(embedding, coords2)
sim_same = encoder.compute_similarity(hybrid, hybrid2)
print(f"Similarity (same embedding, close coords): {sim_same:.4f}")
# Test with different embedding
embedding3 = np.random.randn(1024)
hybrid3 = encoder.encode(embedding3, coords)
sim_diff = encoder.compute_similarity(hybrid, hybrid3)
print(f"Similarity (diff embedding, same coords): {sim_diff:.4f}")
print()
def test_mock_clients():
"""Test mock client generation with embeddings."""
print("=" * 50)
print("Testing Mock Client Generation")
print("=" * 50)
seed = generate_seed_donor_profile("education")
print(f"Seed profile: {seed.user_id}")
print(f" - Causes: {seed.causes}")
print(f" - Full embedding dims: {len(seed.embedding)}")
print(f" - Reduced embedding dims: {len(seed.embedding_reduced)}")
print(f" - Hybrid embedding dims: {len(seed.hybrid_embedding)}")
print()
clients = generate_mock_clients(10)
print(f"Generated {len(clients)} mock clients")
for i, c in enumerate(clients[:3]):
print(f" Client {i}: {c.user_id}")
print(f" - Area: {c.planning_area}, Housing: {c.housing_type.value}")
print(f" - Causes: {c.causes}")
print(
f" - Has embeddings: full={c.embedding is not None}, reduced={c.embedding_reduced is not None}"
)
print()
def test_hybrid_lookalike():
"""Test hybrid lookalike matching."""
print("=" * 50)
print("Testing Hybrid Lookalike Matching")
print("=" * 50)
seed = generate_seed_donor_profile("education")
candidates = generate_mock_clients(50)
recommender = GISRecommender()
# Find lookalikes without filters
results = recommender.find_lookalikes_hybrid(
seed_profile=seed,
candidates=candidates,
k=10,
)
print(f"Found {len(results)} lookalikes")
print("\nTop 5 matches:")
for i, r in enumerate(results[:5]):
print(f" {i+1}. {r.client.user_id}")
print(
f" Score: {r.final_score:.3f} (vector={r.vector_similarity_score:.3f}, spatial={r.spatial_proxy_score:.3f}, prox={r.proximity_score:.3f})"
)
print(f" Causes: {r.client.causes}")
print(f" Distance: {r.geo_distance_km:.2f} km")
print()
# Test with planning area filter
print("\nWith planning area filter (bishan):")
results_filtered = recommender.find_lookalikes_hybrid(
seed_profile=seed,
candidates=candidates,
k=10,
planning_area_filter="bishan",
)
print(f"Found {len(results_filtered)} matches in Bishan")
for r in results_filtered[:3]:
print(f" - {r.client.user_id}: {r.final_score:.3f}")
def test_tiered_targeting():
"""Test tiered targeting."""
print("=" * 50)
print("Testing Tiered Targeting")
print("=" * 50)
seed = generate_seed_donor_profile("education")
candidates = generate_mock_clients(100)
recommender = GISRecommender()
results = recommender.find_lookalikes_hybrid(
seed_profile=seed,
candidates=candidates,
k=30,
)
tiered = recommender.apply_tiered_targeting(results, min_score=0.0)
print(f"Tier 1 (High Priority): {len(tiered['tier_1'])} clients")
print(f"Tier 2 (Medium Priority): {len(tiered['tier_2'])} clients")
print(f"Tier 3 (Lower Priority): {len(tiered['tier_3'])} clients")
if tiered["tier_1"]:
print(
f"\nTier 1 score range: {tiered['tier_1'][-1].final_score:.3f} - {tiered['tier_1'][0].final_score:.3f}"
)
if tiered["tier_3"]:
print(
f"Tier 3 score range: {tiered['tier_3'][-1].final_score:.3f} - {tiered['tier_3'][0].final_score:.3f}"
)
def test_geojson_export():
"""Test GeoJSON export."""
print("=" * 50)
print("Testing GeoJSON Export")
print("=" * 50)
seed = generate_seed_donor_profile("education")
candidates = generate_mock_clients(20)
recommender = GISRecommender()
results = recommender.find_lookalikes_hybrid(seed, candidates, k=10)
geojson = recommender.to_geojson(results)
print(f"GeoJSON type: {geojson['type']}")
print(f"Number of features: {len(geojson['features'])}")
if geojson["features"]:
feat = geojson["features"][0]
print(f"\nSample feature:")
print(
f" Geometry: {feat['geometry']['type']} at {feat['geometry']['coordinates']}"
)
print(f" Properties: {list(feat['properties'].keys())}")
if __name__ == "__main__":
test_embedding_reducer()
test_hybrid_encoder()
test_mock_clients()
test_hybrid_lookalike()
test_tiered_targeting()
test_geojson_export()
print("\n" + "=" * 50)
print("All tests passed!")
print("=" * 50)