File size: 3,320 Bytes

55d63c9
defd8d1
55d63c9
 
 
 
 
defd8d1
 
55d63c9
defd8d1
55d63c9
 
defd8d1
55d63c9
 
defd8d1
55d63c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
defd8d1
55d63c9
 
defd8d1
 
55d63c9
defd8d1
55d63c9
 
 
defd8d1
55d63c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
defd8d1
55d63c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
defd8d1
55d63c9
 
 
 
defd8d1
 
55d63c9
 
 
defd8d1
55d63c9

"""
Integration tests: ScoringBridge on benchmark samples and energy causal arena.
"""
import numpy as np
np.random.seed(42)


def test_scoring_bridge_on_tasks():
    """ScoringBridge on a small slice of benchmark tasks."""
    print("=" * 60)
    print("TEST: semantic field scoring on sample tasks")
    print("=" * 60)
    
    from tensegrity.engine.scoring import ScoringBridge
    from tensegrity.bench.tasks import load_task_samples
    
    bridge = ScoringBridge(obs_dim=128, hidden_dims=[64, 16])
    
    tasks = ["copa", "sciq", "arc_challenge"]
    
    for task_name in tasks:
        try:
            samples = load_task_samples(task_name, max_samples=30)
        except Exception as e:
            print(f"\n  {task_name}: SKIP ({e})")
            continue
        
        correct = 0
        total = 0
        
        for sample in samples:
            bridge.reset()
            scores, entropy = bridge.score_choices(sample.prompt, sample.choices)
            pred = int(np.argmax(scores))
            if pred == sample.gold:
                correct += 1
            total += 1
        
        acc = correct / max(total, 1)
        print(f"\n  {task_name}: {correct}/{total} = {acc:.1%}")
    
    print(f"\n  ✓ ScoringBridge functional")


def test_causal_energy_arena():
    """Energy-based causal model competition."""
    print("\n" + "=" * 60)
    print("TEST: energy-based causal arena")
    print("=" * 60)
    
    from tensegrity.causal.scm import StructuralCausalModel
    from tensegrity.engine.causal_energy import EnergyCausalArena
    
    # Two competing models
    m_correct = StructuralCausalModel("correct")
    m_correct.add_variable("X", n_values=3)
    m_correct.add_variable("Y", n_values=3, parents=["X"])
    
    m_wrong = StructuralCausalModel("wrong")
    m_wrong.add_variable("X", n_values=3)
    m_wrong.add_variable("Y", n_values=3)  # No causal link
    
    # Train correct model on data where X causes Y
    data = m_correct.sample(100)
    m_correct.update_from_data(data)
    m_wrong.update_from_data(data)
    
    arena = EnergyCausalArena(precision=1.0, beta=2.0)
    arena.register(m_correct)
    arena.register(m_wrong)
    
    # Test on 20 observations
    test_data = m_correct.sample(20)
    winners = []
    for obs in test_data:
        result = arena.compete(obs)
        winners.append(result["winner"])
        arena.update_models(obs)
    
    correct_wins = sum(1 for w in winners if w == "correct")
    print(f"  Correct model wins: {correct_wins}/{len(winners)}")
    print(f"  Final tension: {arena.tension:.3f}")
    
    # Energy comparison
    last_result = arena.compete(test_data[-1])
    print(f"  Last energies: {last_result['energies']}")
    print(f"  Last posteriors: {last_result['posteriors']}")
    
    print(f"  ✓ Energy causal arena functional")


if __name__ == "__main__":
    tests = [
        ("Scoring bridge", test_scoring_bridge_on_tasks),
        ("Causal energy", test_causal_energy_arena),
    ]
    
    print("\n" + "█" * 60)
    print("  Scoring + causal energy integration")
    print("█" * 60)
    
    for name, fn in tests:
        try:
            fn()
        except Exception as e:
            print(f"\n  ✗ {name} FAILED: {e}")
            import traceback; traceback.print_exc()
    
    print()