Upload test_model.py

0232e6f verified 29 days ago

13.1 kB

	"""
	End-to-end test for the Fin-MoE Latent Encoder.

	Tests:
	1. Model instantiation and parameter counting
	2. Forward pass with synthetic data
	3. Loss computation and backward pass
	4. Gradient flow through all components
	5. Individual component tests (MLOFI, Active Depth, Sentiment, FMHCA, MoE)
	6. Inference mode (get_latent)
	"""

	import torch
	import sys
	import traceback


	def test_model_instantiation():
	"""Test model creates with default config."""
	print("=" * 60)
	print("TEST 1: Model Instantiation")
	print("=" * 60)

	from fin_moe.model import FinMoELatentEncoder

	model = FinMoELatentEncoder()
	params = model.count_parameters()

	print(f"Model created successfully!")
	print(f"\nParameter counts:")
	for component, count in params.items():
	print(f" {component:20s}: {count:>10,d}")

	total = params["total"]
	print(f"\n Total: {total:,d} ({total / 1e6:.1f}M parameters)")
	print(f" Config: {model.config}")

	return model


	def test_forward_pass(model):
	"""Test full forward pass with synthetic data."""
	print("\n" + "=" * 60)
	print("TEST 2: Forward Pass")
	print("=" * 60)

	from fin_moe.data_pipeline import generate_synthetic_batch

	batch = generate_synthetic_batch(batch_size=4, micro_seq_len=50, macro_seq_len=16)

	print(f"Input shapes:")
	for k, v in batch.items():
	if isinstance(v, torch.Tensor):
	print(f" {k:20s}: {list(v.shape)}")

	# Forward pass
	model.train()
	outputs = model(**batch)

	print(f"\nOutput shapes:")
	for k, v in outputs.items():
	if isinstance(v, torch.Tensor):
	print(f" {k:20s}: {list(v.shape)} \| range: [{v.min().item():.4f}, {v.max().item():.4f}]")
	elif isinstance(v, dict):
	print(f" {k:20s}: dict with {len(v)} keys")

	print(f"\n Loss: {outputs['loss'].item():.4f}")
	print(f" Loss components:")
	for k, v in outputs['loss_dict'].items():
	print(f" {k:25s}: {v:.4f}")

	return outputs


	def test_backward_pass(model, outputs):
	"""Test gradient computation and flow."""
	print("\n" + "=" * 60)
	print("TEST 3: Backward Pass & Gradient Flow")
	print("=" * 60)

	loss = outputs["loss"]
	loss.backward()

	# Check gradients exist for all components
	components = {
	"micro_encoder": model.micro_encoder,
	"macro_encoder": model.macro_encoder,
	"fmhca": model.fmhca,
	"moe": model.moe,
	"heads": model.heads,
	"loss_fn": model.loss_fn,
	}

	all_good = True
	for name, component in components.items():
	has_grad = any(
	p.grad is not None and p.grad.abs().sum() > 0
	for p in component.parameters() if p.requires_grad
	)
	total_params = sum(1 for p in component.parameters() if p.requires_grad)
	grad_params = sum(
	1 for p in component.parameters()
	if p.requires_grad and p.grad is not None and p.grad.abs().sum() > 0
	)
	status = "✓" if has_grad else "✗"
	if not has_grad:
	all_good = False
	print(f" {status} {name:20s}: {grad_params}/{total_params} params have gradients")

	# Check specific learnable parameters
	print(f"\n Key learnable parameters:")
	print(f" FMHCA alpha (tanh gate): {model.fmhca.micro_to_macro.alpha.data.item():.4f} "
	f"(grad: {model.fmhca.micro_to_macro.alpha.grad.item():.6f})")
	print(f" Loss log_var_mse: {model.loss_fn.log_var_mse.data.item():.4f} "
	f"(grad: {model.loss_fn.log_var_mse.grad.item():.6f})")
	print(f" Loss log_var_direction: {model.loss_fn.log_var_direction.data.item():.4f} "
	f"(grad: {model.loss_fn.log_var_direction.grad.item():.6f})")
	print(f" Loss log_var_toxicity: {model.loss_fn.log_var_toxicity.data.item():.4f} "
	f"(grad: {model.loss_fn.log_var_toxicity.grad.item():.6f})")

	model.zero_grad()
	return all_good


	def test_mlofi_extractor():
	"""Test MLOFI feature extraction independently."""
	print("\n" + "=" * 60)
	print("TEST 4: MLOFI Feature Extractor")
	print("=" * 60)

	from fin_moe.feature_extractors import MLOFIExtractor

	extractor = MLOFIExtractor(n_levels=10, d_model=128)

	bid_vol = torch.abs(torch.randn(2, 50, 10)) * 100
	ask_vol = torch.abs(torch.randn(2, 50, 10)) * 100

	features = extractor(bid_vol, ask_vol)
	print(f" Input: bid_vol {list(bid_vol.shape)}, ask_vol {list(ask_vol.shape)}")
	print(f" Output: {list(features.shape)}")
	print(f" Depth weights (learned): {extractor.compute_depth_weights().detach().numpy().round(3)}")

	# Verify OFI computation
	ofi = extractor.compute_per_level_ofi(bid_vol, ask_vol)
	print(f" OFI shape: {list(ofi.shape)}, mean: {ofi.mean():.4f}, std: {ofi.std():.4f}")


	def test_active_depth():
	"""Test Active Depth feature extraction."""
	print("\n" + "=" * 60)
	print("TEST 5: Active Depth Features")
	print("=" * 60)

	from fin_moe.feature_extractors import ActiveDepthExtractor

	extractor = ActiveDepthExtractor(n_levels=10, d_model=128)

	bid_q = torch.abs(torch.randn(2, 50, 10)) * 100
	ask_q = torch.abs(torch.randn(2, 50, 10)) * 100

	features = extractor(bid_q, ask_q)
	print(f" Input: bid {list(bid_q.shape)}, ask {list(ask_q.shape)}")
	print(f" Output: {list(features.shape)}")

	# Test individual metrics
	ke = extractor.compute_kinetic_energy(bid_q + ask_q)
	mt = extractor.compute_market_temperature(bid_q + ask_q)
	print(f" Kinetic Energy: {list(ke.shape)}, mean: {ke.mean():.2f}")
	print(f" Market Temperature: {list(mt.shape)}, mean: {mt.mean():.4f}")


	def test_sentiment_tokenizer():
	"""Test Economic Sentiment Tokenizer."""
	print("\n" + "=" * 60)
	print("TEST 6: Economic Sentiment Tokenizer")
	print("=" * 60)

	from fin_moe.feature_extractors import EconomicSentimentTokenizer

	tokenizer = EconomicSentimentTokenizer(text_dim=768, d_model=128, n_tokens=4)

	text_emb = torch.randn(2, 32, 768)
	tokens, scores = tokenizer(text_emb)

	print(f" Input: {list(text_emb.shape)}")
	print(f" Output tokens: {list(tokens.shape)}")
	print(f" Sentiment dimensions:")
	for name, score in scores.items():
	print(f" {name:25s}: {score.detach().numpy().round(3)}")


	def test_cross_attention():
	"""Test FMHCA fusion module."""
	print("\n" + "=" * 60)
	print("TEST 7: Financial Multi-Head Cross-Attention")
	print("=" * 60)

	from fin_moe.cross_attention import FinancialMultiHeadCrossAttention

	fmhca = FinancialMultiHeadCrossAttention(
	d_model=128, n_heads=8, n_fusion_layers=2, n_bottleneck=4
	)

	micro = torch.randn(2, 50, 128)
	macro = torch.randn(2, 4, 128)

	latent, micro_e, macro_e = fmhca(micro, macro)

	print(f" Micro input: {list(micro.shape)}")
	print(f" Macro input: {list(macro.shape)}")
	print(f" Latent output: {list(latent.shape)}")
	print(f" Micro enriched: {list(micro_e.shape)}")
	print(f" Macro enriched: {list(macro_e.shape)}")
	print(f" Gate values: α={fmhca.micro_to_macro.alpha.item():.4f}, "
	f"β={fmhca.macro_to_micro.alpha.item():.4f}")


	def test_moe_layer():
	"""Test MoE routing and expert utilization."""
	print("\n" + "=" * 60)
	print("TEST 8: Sparse Mixture of Experts")
	print("=" * 60)

	from fin_moe.moe_layer import SparseMoELayer

	moe = SparseMoELayer(d_model=128, d_ff=512, n_experts=8, top_k=2)
	moe.train()

	x = torch.randn(4, 8, 128)
	out = moe(x)

	print(f" Input: {list(x.shape)}")
	print(f" Output: {list(out.shape)}")
	print(f" Aux loss: {moe.aux_loss.item():.4f}")

	# Run a few batches to accumulate utilization stats
	for _ in range(10):
	moe(torch.randn(4, 8, 128))

	util = moe.get_expert_utilization()
	print(f" Expert utilization: {[f'{u:.3f}' for u in util['utilization']]}")
	print(f" Utilization entropy: {util['entropy']:.4f}")


	def test_hybrid_loss():
	"""Test Sakuma DML hybrid loss."""
	print("\n" + "=" * 60)
	print("TEST 9: Sakuma DML Hybrid Loss")
	print("=" * 60)

	from fin_moe.hybrid_loss import SakumaDMLHybridLoss

	loss_fn = SakumaDMLHybridLoss()

	B = 8
	return_pred = torch.randn(B)
	direction_logits = torch.randn(B, 3)
	toxicity_pred = torch.cat([
	torch.sigmoid(torch.randn(B, 1)),
	torch.randn(B, 1)
	], dim=-1)
	true_returns = torch.randn(B) * 0.01
	true_toxicity = torch.rand(B)

	total_loss, loss_dict = loss_fn(
	return_pred, direction_logits, toxicity_pred,
	true_returns, true_toxicity,
	moe_aux_loss=torch.tensor(0.1)
	)

	print(f" Total loss: {total_loss.item():.4f}")
	for k, v in loss_dict.items():
	print(f" {k:25s}: {v:.4f}")

	# Check that loss weights are learnable
	total_loss.backward()
	print(f"\n Gradients on log-var parameters:")
	print(f" log_var_mse grad: {loss_fn.log_var_mse.grad.item():.6f}")
	print(f" log_var_direction grad: {loss_fn.log_var_direction.grad.item():.6f}")
	print(f" log_var_toxicity grad: {loss_fn.log_var_toxicity.grad.item():.6f}")


	def test_inference_mode():
	"""Test inference mode (get_latent)."""
	print("\n" + "=" * 60)
	print("TEST 10: Inference Mode (get_latent)")
	print("=" * 60)

	from fin_moe.model import FinMoELatentEncoder
	from fin_moe.data_pipeline import generate_synthetic_batch

	model = FinMoELatentEncoder()
	model.eval()

	batch = generate_synthetic_batch(batch_size=4, micro_seq_len=50, macro_seq_len=16)

	with torch.no_grad():
	latent = model.get_latent(
	bid_volumes=batch["bid_volumes"],
	ask_volumes=batch["ask_volumes"],
	text_embeddings=batch["text_embeddings"]
	)

	print(f" Latent shape: {list(latent.shape)}")
	print(f" Latent norm: {latent.norm(dim=-1).mean().item():.4f}")
	print(f" Latent std: {latent.std(dim=-1).mean().item():.4f}")


	def test_training_step():
	"""Test a complete training step."""
	print("\n" + "=" * 60)
	print("TEST 11: Complete Training Step")
	print("=" * 60)

	from fin_moe.model import FinMoELatentEncoder
	from fin_moe.data_pipeline import generate_synthetic_batch

	model = FinMoELatentEncoder({"d_model": 128, "n_experts": 4})
	optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

	model.train()

	losses = []
	for step in range(5):
	batch = generate_synthetic_batch(batch_size=4, micro_seq_len=50, macro_seq_len=16)

	outputs = model(**batch)
	loss = outputs["loss"]

	optimizer.zero_grad()
	loss.backward()

	# Gradient clipping
	grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

	optimizer.step()

	losses.append(loss.item())
	print(f" Step {step+1}: loss={loss.item():.4f}, grad_norm={grad_norm:.4f}")

	print(f"\n Loss trend: {losses[0]:.4f} → {losses[-1]:.4f}")
	decreasing = losses[-1] < losses[0]
	print(f" Loss decreasing: {'✓' if decreasing else '~ (expected with random data)'}")


	def main():
	"""Run all tests."""
	print("╔" + "═" * 58 + "╗")
	print("║ Fin-MoE Latent Encoder — End-to-End Test Suite ║")
	print("╚" + "═" * 58 + "╝")

	tests = [
	("Model Instantiation", test_model_instantiation),
	("Forward Pass", lambda: test_forward_pass(model)),
	("Backward Pass", lambda: test_backward_pass(model, outputs)),
	("MLOFI Extractor", test_mlofi_extractor),
	("Active Depth", test_active_depth),
	("Sentiment Tokenizer", test_sentiment_tokenizer),
	("Cross-Attention", test_cross_attention),
	("MoE Layer", test_moe_layer),
	("Hybrid Loss", test_hybrid_loss),
	("Inference Mode", test_inference_mode),
	("Training Step", test_training_step),
	]

	# Run test 1 first to get model
	model = test_model_instantiation()

	# Run test 2 with model
	outputs = test_forward_pass(model)

	# Run test 3 with model and outputs
	test_backward_pass(model, outputs)

	# Run remaining tests
	passed = 3
	failed = 0
	for name, test_fn in tests[3:]:
	try:
	test_fn()
	passed += 1
	except Exception as e:
	failed += 1
	print(f"\n ✗ FAILED: {name}")
	print(f" Error: {e}")
	traceback.print_exc()

	print("\n" + "=" * 60)
	print(f"RESULTS: {passed} passed, {failed} failed out of {len(tests)}")
	print("=" * 60)

	if failed > 0:
	sys.exit(1)
	else:
	print("\n✓ All tests passed! Fin-MoE Latent Encoder is working correctly.")
	sys.exit(0)


	if __name__ == "__main__":
	main()