tensegrity / tests /test_scoring_bench.py

Enhance README and scripts for cognitive architecture testing

be04d92 23 days ago

3.32 kB

	"""
	Integration tests: ScoringBridge on benchmark samples and energy causal arena.
	"""
	import numpy as np
	np.random.seed(42)


	def test_scoring_bridge_on_tasks():
	"""ScoringBridge on a small slice of benchmark tasks."""
	print("=" * 60)
	print("TEST: semantic field scoring on sample tasks")
	print("=" * 60)

	from tensegrity.engine.scoring import ScoringBridge
	from tensegrity.bench.tasks import load_task_samples

	bridge = ScoringBridge(obs_dim=128, hidden_dims=[64, 16])

	tasks = ["copa", "sciq", "arc_challenge"]

	for task_name in tasks:
	try:
	samples = load_task_samples(task_name, max_samples=30)
	except Exception as e:
	print(f"\n {task_name}: SKIP ({e})")
	continue

	correct = 0
	total = 0

	for sample in samples:
	bridge.reset()
	scores, entropy = bridge.score_choices(sample.prompt, sample.choices)
	pred = int(np.argmax(scores))
	if pred == sample.gold:
	correct += 1
	total += 1

	acc = correct / max(total, 1)
	print(f"\n {task_name}: {correct}/{total} = {acc:.1%}")

	print(f"\n ✓ ScoringBridge functional")


	def test_causal_energy_arena():
	"""Energy-based causal model competition."""
	print("\n" + "=" * 60)
	print("TEST: energy-based causal arena")
	print("=" * 60)

	from tensegrity.causal.scm import StructuralCausalModel
	from tensegrity.engine.causal_energy import EnergyCausalArena

	# Two competing models
	m_correct = StructuralCausalModel("correct")
	m_correct.add_variable("X", n_values=3)
	m_correct.add_variable("Y", n_values=3, parents=["X"])

	m_wrong = StructuralCausalModel("wrong")
	m_wrong.add_variable("X", n_values=3)
	m_wrong.add_variable("Y", n_values=3) # No causal link

	# Train correct model on data where X causes Y
	data = m_correct.sample(100)
	m_correct.update_from_data(data)
	m_wrong.update_from_data(data)

	arena = EnergyCausalArena(precision=1.0, beta=2.0)
	arena.register(m_correct)
	arena.register(m_wrong)

	# Test on 20 observations
	test_data = m_correct.sample(20)
	winners = []
	for obs in test_data:
	result = arena.compete(obs)
	winners.append(result["winner"])
	arena.update_models(obs)

	correct_wins = sum(1 for w in winners if w == "correct")
	print(f" Correct model wins: {correct_wins}/{len(winners)}")
	print(f" Final tension: {arena.tension:.3f}")

	# Energy comparison
	last_result = arena.compete(test_data[-1])
	print(f" Last energies: {last_result['energies']}")
	print(f" Last posteriors: {last_result['posteriors']}")

	print(f" ✓ Energy causal arena functional")


	if __name__ == "__main__":
	tests = [
	("Scoring bridge", test_scoring_bridge_on_tasks),
	("Causal energy", test_causal_energy_arena),
	]

	print("\n" + "█" * 60)
	print(" Scoring + causal energy integration")
	print("█" * 60)

	for name, fn in tests:
	try:
	fn()
	except Exception as e:
	print(f"\n ✗ {name} FAILED: {e}")
	import traceback; traceback.print_exc()

	print()