"""
Integration tests: ScoringBridge on benchmark samples and energy causal arena.
"""
import numpy as np
np.random.seed(42)


def test_scoring_bridge_on_tasks():
    """ScoringBridge on a small slice of benchmark tasks."""
    print("=" * 60)
    print("TEST: semantic field scoring on sample tasks")
    print("=" * 60)
    
    from tensegrity.engine.scoring import ScoringBridge
    from tensegrity.bench.tasks import load_task_samples
    
    bridge = ScoringBridge(obs_dim=128, hidden_dims=[64, 16])
    
    tasks = ["copa", "sciq", "arc_challenge"]
    
    for task_name in tasks:
        try:
            samples = load_task_samples(task_name, max_samples=30)
        except Exception as e:
            print(f"\n  {task_name}: SKIP ({e})")
            continue
        
        correct = 0
        total = 0
        
        for sample in samples:
            bridge.reset()
            scores, entropy = bridge.score_choices(sample.prompt, sample.choices)
            pred = int(np.argmax(scores))
            if pred == sample.gold:
                correct += 1
            total += 1
        
        acc = correct / max(total, 1)
        print(f"\n  {task_name}: {correct}/{total} = {acc:.1%}")
    
    print(f"\n  ✓ ScoringBridge functional")


def test_causal_energy_arena():
    """Energy-based causal model competition."""
    print("\n" + "=" * 60)
    print("TEST: energy-based causal arena")
    print("=" * 60)
    
    from tensegrity.causal.scm import StructuralCausalModel
    from tensegrity.engine.causal_energy import EnergyCausalArena
    
    # Two competing models
    m_correct = StructuralCausalModel("correct")
    m_correct.add_variable("X", n_values=3)
    m_correct.add_variable("Y", n_values=3, parents=["X"])
    
    m_wrong = StructuralCausalModel("wrong")
    m_wrong.add_variable("X", n_values=3)
    m_wrong.add_variable("Y", n_values=3)  # No causal link
    
    # Train correct model on data where X causes Y
    data = m_correct.sample(100)
    m_correct.update_from_data(data)
    m_wrong.update_from_data(data)
    
    arena = EnergyCausalArena(precision=1.0, beta=2.0)
    arena.register(m_correct)
    arena.register(m_wrong)
    
    # Test on 20 observations
    test_data = m_correct.sample(20)
    winners = []
    for obs in test_data:
        result = arena.compete(obs)
        winners.append(result["winner"])
        arena.update_models(obs)
    
    correct_wins = sum(1 for w in winners if w == "correct")
    print(f"  Correct model wins: {correct_wins}/{len(winners)}")
    print(f"  Final tension: {arena.tension:.3f}")
    
    # Energy comparison
    last_result = arena.compete(test_data[-1])
    print(f"  Last energies: {last_result['energies']}")
    print(f"  Last posteriors: {last_result['posteriors']}")
    
    print(f"  ✓ Energy causal arena functional")


if __name__ == "__main__":
    tests = [
        ("Scoring bridge", test_scoring_bridge_on_tasks),
        ("Causal energy", test_causal_energy_arena),
    ]
    
    print("\n" + "█" * 60)
    print("  Scoring + causal energy integration")
    print("█" * 60)
    
    for name, fn in tests:
        try:
            fn()
        except Exception as e:
            print(f"\n  ✗ {name} FAILED: {e}")
            import traceback; traceback.print_exc()
    
    print()