Spaces:

kevansoon
/

ollama-api-proxy

Sleeping

GitHub Actions

Sync from GitHub

1d32142 3 months ago

6.62 kB

	"""Test the GIS recommender with dimensionality reduction."""

	import numpy as np
	from recommender.gis_recommender import (
	EmbeddingReducer,
	HybridSemanticSpatialEncoder,
	generate_mock_clients,
	generate_seed_donor_profile,
	GISRecommender,
	ClientProfile,
	HousingType,
	)


	def test_embedding_reducer():
	"""Test the embedding reducer."""
	print("=" * 50)
	print("Testing EmbeddingReducer")
	print("=" * 50)

	# Create sample sparse embeddings (like SEA-LION output)
	sample_embedding = np.zeros(1024)
	sample_embedding[0] = 0.206
	sample_embedding[1] = -0.198
	sample_embedding[10] = 0.178
	sample_embedding[50] = -0.145
	sample_embedding[100] = 0.234
	sample_embedding[200] = -0.167
	sample_embedding[500] = 0.189
	sample_embedding[800] = -0.156

	# Test sparse projection
	reduced = EmbeddingReducer.compute_sparse_projection(
	sample_embedding, n_components=8
	)
	print(f"Original dims: {len(sample_embedding)}")
	print(f"Reduced dims: {len(reduced)}")
	print(f"Reduced values: {reduced}")
	print(f"Reduced norm: {np.linalg.norm(reduced):.4f}")
	print()


	def test_hybrid_encoder():
	"""Test the hybrid semantic-spatial encoder."""
	print("=" * 50)
	print("Testing HybridSemanticSpatialEncoder")
	print("=" * 50)

	encoder = HybridSemanticSpatialEncoder(semantic_dims=8)

	# Test with sample embedding and Singapore coordinates
	embedding = np.random.randn(1024)
	coords = (1.3521, 103.8198) # Singapore center

	hybrid = encoder.encode(embedding, coords)
	print(f"Hybrid vector dims: {len(hybrid)}") # Should be 8 + 2 = 10
	print(f"Hybrid values: {hybrid}")

	# Test similarity between nearby points
	coords2 = (1.3525, 103.8195) # Very close
	hybrid2 = encoder.encode(embedding, coords2)

	sim_same = encoder.compute_similarity(hybrid, hybrid2)
	print(f"Similarity (same embedding, close coords): {sim_same:.4f}")

	# Test with different embedding
	embedding3 = np.random.randn(1024)
	hybrid3 = encoder.encode(embedding3, coords)

	sim_diff = encoder.compute_similarity(hybrid, hybrid3)
	print(f"Similarity (diff embedding, same coords): {sim_diff:.4f}")
	print()


	def test_mock_clients():
	"""Test mock client generation with embeddings."""
	print("=" * 50)
	print("Testing Mock Client Generation")
	print("=" * 50)

	seed = generate_seed_donor_profile("education")
	print(f"Seed profile: {seed.user_id}")
	print(f" - Causes: {seed.causes}")
	print(f" - Full embedding dims: {len(seed.embedding)}")
	print(f" - Reduced embedding dims: {len(seed.embedding_reduced)}")
	print(f" - Hybrid embedding dims: {len(seed.hybrid_embedding)}")
	print()

	clients = generate_mock_clients(10)
	print(f"Generated {len(clients)} mock clients")
	for i, c in enumerate(clients[:3]):
	print(f" Client {i}: {c.user_id}")
	print(f" - Area: {c.planning_area}, Housing: {c.housing_type.value}")
	print(f" - Causes: {c.causes}")
	print(
	f" - Has embeddings: full={c.embedding is not None}, reduced={c.embedding_reduced is not None}"
	)
	print()


	def test_hybrid_lookalike():
	"""Test hybrid lookalike matching."""
	print("=" * 50)
	print("Testing Hybrid Lookalike Matching")
	print("=" * 50)

	seed = generate_seed_donor_profile("education")
	candidates = generate_mock_clients(50)

	recommender = GISRecommender()

	# Find lookalikes without filters
	results = recommender.find_lookalikes_hybrid(
	seed_profile=seed,
	candidates=candidates,
	k=10,
	)

	print(f"Found {len(results)} lookalikes")
	print("\nTop 5 matches:")
	for i, r in enumerate(results[:5]):
	print(f" {i+1}. {r.client.user_id}")
	print(
	f" Score: {r.final_score:.3f} (vector={r.vector_similarity_score:.3f}, spatial={r.spatial_proxy_score:.3f}, prox={r.proximity_score:.3f})"
	)
	print(f" Causes: {r.client.causes}")
	print(f" Distance: {r.geo_distance_km:.2f} km")
	print()

	# Test with planning area filter
	print("\nWith planning area filter (bishan):")
	results_filtered = recommender.find_lookalikes_hybrid(
	seed_profile=seed,
	candidates=candidates,
	k=10,
	planning_area_filter="bishan",
	)
	print(f"Found {len(results_filtered)} matches in Bishan")
	for r in results_filtered[:3]:
	print(f" - {r.client.user_id}: {r.final_score:.3f}")


	def test_tiered_targeting():
	"""Test tiered targeting."""
	print("=" * 50)
	print("Testing Tiered Targeting")
	print("=" * 50)

	seed = generate_seed_donor_profile("education")
	candidates = generate_mock_clients(100)

	recommender = GISRecommender()

	results = recommender.find_lookalikes_hybrid(
	seed_profile=seed,
	candidates=candidates,
	k=30,
	)

	tiered = recommender.apply_tiered_targeting(results, min_score=0.0)

	print(f"Tier 1 (High Priority): {len(tiered['tier_1'])} clients")
	print(f"Tier 2 (Medium Priority): {len(tiered['tier_2'])} clients")
	print(f"Tier 3 (Lower Priority): {len(tiered['tier_3'])} clients")

	if tiered["tier_1"]:
	print(
	f"\nTier 1 score range: {tiered['tier_1'][-1].final_score:.3f} - {tiered['tier_1'][0].final_score:.3f}"
	)
	if tiered["tier_3"]:
	print(
	f"Tier 3 score range: {tiered['tier_3'][-1].final_score:.3f} - {tiered['tier_3'][0].final_score:.3f}"
	)


	def test_geojson_export():
	"""Test GeoJSON export."""
	print("=" * 50)
	print("Testing GeoJSON Export")
	print("=" * 50)

	seed = generate_seed_donor_profile("education")
	candidates = generate_mock_clients(20)

	recommender = GISRecommender()
	results = recommender.find_lookalikes_hybrid(seed, candidates, k=10)

	geojson = recommender.to_geojson(results)

	print(f"GeoJSON type: {geojson['type']}")
	print(f"Number of features: {len(geojson['features'])}")

	if geojson["features"]:
	feat = geojson["features"][0]
	print(f"\nSample feature:")
	print(
	f" Geometry: {feat['geometry']['type']} at {feat['geometry']['coordinates']}"
	)
	print(f" Properties: {list(feat['properties'].keys())}")


	if __name__ == "__main__":
	test_embedding_reducer()
	test_hybrid_encoder()
	test_mock_clients()
	test_hybrid_lookalike()
	test_tiered_targeting()
	test_geojson_export()

	print("\n" + "=" * 50)
	print("All tests passed!")
	print("=" * 50)