| """ |
| runner.py — Falsification Protocol Orchestrator |
| |
| Implements the complete falsification protocol from Section 7: |
| |
| 1. Load pinned corpus (25 signals across 5 categories) |
| 2. For each signal: |
| a. Extract commitments from original |
| b. Run 10 recursive compressions (BASELINE — no gate) |
| c. Run 10 recursive compressions (ENFORCED — with gate) |
| d. Record lineage chains for both |
| 3. Compute aggregate statistics |
| 4. Check attractor collapse (if all signals converge, result is invalid) |
| 5. Output JSON receipt |
| |
| Success criterion (paper): enforced stability > baseline by ≥20pp |
| """ |
|
|
| import json |
| import os |
| import sys |
| from typing import List, Dict, Optional, Set |
| from datetime import datetime, timezone |
| from dataclasses import dataclass |
|
|
| from .extraction import extract_commitment_texts |
| from .fidelity import fidelity_score, fidelity_breakdown |
| from .compression import CompressionBackend, get_backend |
| from .enforcement import CommitmentGate, baseline_compress |
| from .lineage import ( |
| LineageChain, LineageRecord, |
| _hash_text, _hash_commitment_set, |
| check_attractor_collapse |
| ) |
|
|
|
|
| |
| |
| |
|
|
| DEFAULT_DEPTH = 10 |
| DEFAULT_THRESHOLD = 0.6 |
| DEFAULT_TARGET_RATIO = 0.5 |
| DEFAULT_MAX_RETRIES = 3 |
| DEFAULT_CORPUS_PATH = os.path.join( |
| os.path.dirname(os.path.dirname(__file__)), 'corpus', 'canonical_corpus.json' |
| ) |
|
|
|
|
| |
| |
| |
|
|
| def load_corpus(path: str = DEFAULT_CORPUS_PATH) -> List[Dict]: |
| """Load the pinned test corpus.""" |
| with open(path, 'r') as f: |
| data = json.load(f) |
| return data['canonical_signals'] |
|
|
|
|
| |
| |
| |
|
|
| def run_recursion( |
| signal: str, |
| backend: CompressionBackend, |
| depth: int = DEFAULT_DEPTH, |
| enforce: bool = False, |
| threshold: float = DEFAULT_THRESHOLD, |
| target_ratio: float = DEFAULT_TARGET_RATIO, |
| max_retries: int = DEFAULT_MAX_RETRIES, |
| ) -> LineageChain: |
| """ |
| Run recursive compression on a single signal. |
| |
| Returns a LineageChain with full provenance records. |
| """ |
| |
| original_commitments = extract_commitment_texts(signal) |
| |
| |
| chain = LineageChain( |
| signal_id=_hash_text(signal), |
| signal_preview=signal[:100], |
| original_commitment_hash=_hash_commitment_set(original_commitments), |
| original_commitment_count=len(original_commitments), |
| backend=backend.name, |
| enforced=enforce, |
| depth=depth, |
| ) |
| |
| |
| gate = CommitmentGate(backend, threshold, max_retries) if enforce else None |
| |
| current_text = signal |
| parent_hash = None |
| |
| for i in range(depth): |
| input_hash = _hash_text(current_text) |
| |
| |
| if enforce and gate: |
| result = gate.compress(current_text, original_commitments, target_ratio) |
| output_text = result.output |
| output_commitments = result.output_commitments |
| detail = result.fidelity_detail |
| score = result.fidelity |
| passed = result.passed |
| else: |
| output_text = baseline_compress(backend, current_text, target_ratio) |
| output_commitments = extract_commitment_texts(output_text) |
| detail = fidelity_breakdown(original_commitments, output_commitments) |
| score = detail['min_aggregated'] |
| passed = score >= threshold |
| |
| output_hash = _hash_text(output_text) |
| |
| |
| record = LineageRecord( |
| iteration=i + 1, |
| input_hash=input_hash, |
| output_hash=output_hash, |
| commitment_hash=_hash_commitment_set(output_commitments), |
| commitments_found=len(output_commitments), |
| fidelity=score, |
| fidelity_detail=detail, |
| gate_passed=passed, |
| parent_hash=parent_hash, |
| text_preview=output_text[:100], |
| ) |
| chain.add_record(record) |
| |
| |
| current_text = output_text |
| parent_hash = output_hash |
| |
| return chain |
|
|
|
|
| |
| |
| |
|
|
| @dataclass |
| class ProtocolResult: |
| """Complete result of the falsification protocol.""" |
| corpus_size: int |
| depth: int |
| backend: str |
| threshold: float |
| baseline_chains: List[LineageChain] |
| enforced_chains: List[LineageChain] |
| |
| |
| baseline_avg_fidelity: float = 0.0 |
| enforced_avg_fidelity: float = 0.0 |
| baseline_stability_pct: float = 0.0 |
| enforced_stability_pct: float = 0.0 |
| improvement_pp: float = 0.0 |
| attractor_collapse: bool = False |
| |
| timestamp: str = '' |
| |
| def to_dict(self) -> dict: |
| return { |
| 'summary': { |
| 'corpus_size': self.corpus_size, |
| 'depth': self.depth, |
| 'backend': self.backend, |
| 'threshold': self.threshold, |
| 'baseline': { |
| 'avg_fidelity': round(self.baseline_avg_fidelity, 4), |
| 'stability_pct': round(self.baseline_stability_pct, 1), |
| }, |
| 'enforced': { |
| 'avg_fidelity': round(self.enforced_avg_fidelity, 4), |
| 'stability_pct': round(self.enforced_stability_pct, 1), |
| }, |
| 'improvement_pp': round(self.improvement_pp, 1), |
| 'attractor_collapse': self.attractor_collapse, |
| 'timestamp': self.timestamp, |
| }, |
| 'baseline_chains': [c.to_dict() for c in self.baseline_chains], |
| 'enforced_chains': [c.to_dict() for c in self.enforced_chains], |
| } |
| |
| def to_json(self, indent: int = 2) -> str: |
| return json.dumps(self.to_dict(), indent=indent) |
|
|
|
|
| def run_protocol( |
| backend_name: str = 'extractive', |
| enforced_backend_name: Optional[str] = None, |
| depth: int = DEFAULT_DEPTH, |
| threshold: float = DEFAULT_THRESHOLD, |
| target_ratio: float = DEFAULT_TARGET_RATIO, |
| max_retries: int = DEFAULT_MAX_RETRIES, |
| corpus_path: str = DEFAULT_CORPUS_PATH, |
| signals: Optional[List[str]] = None, |
| verbose: bool = True, |
| ) -> ProtocolResult: |
| """ |
| Run the complete falsification protocol. |
| |
| For each signal in the corpus: |
| 1. Run baseline recursion (no enforcement) |
| 2. Run enforced recursion (with commitment gate) |
| 3. Compare stability |
| |
| Check for attractor collapse across all signals. |
| |
| Args: |
| backend_name: Backend for baseline runs |
| enforced_backend_name: Backend for enforced runs (defaults to same as baseline) |
| depth: Recursion iterations |
| threshold: Fidelity threshold for pass/fail |
| target_ratio: Compression target |
| max_retries: Gate retry attempts |
| corpus_path: Path to corpus JSON |
| signals: Override corpus with specific signals |
| verbose: Print progress |
| """ |
| baseline_backend = get_backend(backend_name) |
| |
| if enforced_backend_name is None and backend_name == 'lossy': |
| enforced_backend_name = 'lossy_enforced' |
| enforced_backend = get_backend(enforced_backend_name or backend_name) |
| |
| |
| if signals: |
| corpus = [{'category': 'custom', 'signal': s} for s in signals] |
| else: |
| corpus = load_corpus(corpus_path) |
| |
| baseline_chains = [] |
| enforced_chains = [] |
| |
| for i, entry in enumerate(corpus): |
| signal = entry['signal'] |
| category = entry.get('category', 'unknown') |
| |
| if verbose: |
| commitments = extract_commitment_texts(signal) |
| print(f"\n[{i+1}/{len(corpus)}] {category}: {signal[:60]}...") |
| print(f" Commitments found: {len(commitments)}") |
| |
| |
| commitments = extract_commitment_texts(signal) |
| if not commitments: |
| if verbose: |
| print(f" ⚠ No commitments detected — skipping") |
| continue |
| |
| |
| if hasattr(baseline_backend, 'reset'): |
| baseline_backend.reset() |
| if hasattr(enforced_backend, 'reset'): |
| enforced_backend.reset() |
| |
| |
| if verbose: |
| print(f" Running baseline (depth={depth})...") |
| b_chain = run_recursion( |
| signal, baseline_backend, depth, |
| enforce=False, threshold=threshold, target_ratio=target_ratio, |
| ) |
| baseline_chains.append(b_chain) |
| if verbose: |
| print(f" Final fidelity: {b_chain.final_fidelity:.3f}" |
| f" {'✓' if b_chain.final_fidelity >= threshold else '✗'}") |
| |
| |
| if hasattr(enforced_backend, 'reset'): |
| enforced_backend.reset() |
| |
| |
| if verbose: |
| print(f" Running enforced (depth={depth})...") |
| e_chain = run_recursion( |
| signal, enforced_backend, depth, |
| enforce=True, threshold=threshold, target_ratio=target_ratio, |
| max_retries=max_retries, |
| ) |
| enforced_chains.append(e_chain) |
| if verbose: |
| print(f" Final fidelity: {e_chain.final_fidelity:.3f}" |
| f" {'✓' if e_chain.final_fidelity >= threshold else '✗'}") |
| |
| gap = e_chain.final_fidelity - b_chain.final_fidelity |
| print(f" Δ = {gap:+.3f}") |
| |
| |
| n = len(baseline_chains) |
| if n == 0: |
| raise ValueError("No signals with commitments found in corpus") |
| |
| b_avg = sum(c.final_fidelity for c in baseline_chains) / n |
| e_avg = sum(c.final_fidelity for c in enforced_chains) / n |
| b_stable = sum(1 for c in baseline_chains if c.final_fidelity >= threshold) / n * 100 |
| e_stable = sum(1 for c in enforced_chains if c.final_fidelity >= threshold) / n * 100 |
| |
| |
| collapse_base = check_attractor_collapse(baseline_chains) |
| collapse_enf = check_attractor_collapse(enforced_chains) |
| |
| result = ProtocolResult( |
| corpus_size=n, |
| depth=depth, |
| backend=f"{baseline_backend.name} vs {enforced_backend.name}", |
| threshold=threshold, |
| baseline_chains=baseline_chains, |
| enforced_chains=enforced_chains, |
| baseline_avg_fidelity=b_avg, |
| enforced_avg_fidelity=e_avg, |
| baseline_stability_pct=b_stable, |
| enforced_stability_pct=e_stable, |
| improvement_pp=e_stable - b_stable, |
| attractor_collapse=collapse_base or collapse_enf, |
| timestamp=datetime.now(timezone.utc).isoformat(), |
| ) |
| |
| if verbose: |
| print(f"\n{'='*70}") |
| print(f"FALSIFICATION PROTOCOL RESULTS") |
| print(f"{'='*70}") |
| print(f"Corpus: {n} signals | Depth: {depth} | Backend: {baseline_backend.name} vs {enforced_backend.name}") |
| print(f"Threshold: {threshold}") |
| print(f"\n {'':20s} {'Baseline':>10s} {'Enforced':>10s} {'Δ':>8s}") |
| print(f" {'Avg Fidelity':20s} {b_avg:10.3f} {e_avg:10.3f} {e_avg-b_avg:+8.3f}") |
| print(f" {'Stability %':20s} {b_stable:9.1f}% {e_stable:9.1f}% {e_stable-b_stable:+7.1f}pp") |
| |
| if collapse_base or collapse_enf: |
| print(f"\n ⚠ ATTRACTOR COLLAPSE DETECTED — results may be invalid") |
| if collapse_base: |
| print(f" Baseline chains converged to same output") |
| if collapse_enf: |
| print(f" Enforced chains converged to same output") |
| |
| success = result.improvement_pp >= 20.0 |
| print(f"\n {'✓ PASS' if success else '✗ FAIL'}: " |
| f"Improvement = {result.improvement_pp:+.1f}pp " |
| f"(threshold: ≥20pp)") |
| print(f"{'='*70}") |
| |
| return result |
|
|
|
|
| |
| |
| |
|
|
| def main(): |
| """Command-line entry point.""" |
| import argparse |
| |
| parser = argparse.ArgumentParser( |
| description="Commitment Conservation Falsification Protocol" |
| ) |
| parser.add_argument('--backend', default='extractive', |
| choices=['extractive', 'bart', 'back_translation', 'lossy'], |
| help='Compression backend for baseline') |
| parser.add_argument('--enforced-backend', default=None, |
| choices=['extractive', 'bart', 'back_translation', 'lossy', 'lossy_enforced'], |
| help='Backend for enforced runs (default: same as --backend)') |
| parser.add_argument('--depth', type=int, default=DEFAULT_DEPTH, |
| help='Recursion depth (default: 10)') |
| parser.add_argument('--threshold', type=float, default=DEFAULT_THRESHOLD, |
| help='Fidelity threshold (default: 0.6)') |
| parser.add_argument('--signal', type=str, default=None, |
| help='Test a single signal instead of full corpus') |
| parser.add_argument('--corpus', type=str, default=DEFAULT_CORPUS_PATH, |
| help='Path to corpus JSON') |
| parser.add_argument('--output', type=str, default='outputs/protocol_result.json', |
| help='Output path for JSON receipt') |
| parser.add_argument('--quiet', action='store_true', |
| help='Suppress verbose output') |
| |
| args = parser.parse_args() |
| |
| signals = [args.signal] if args.signal else None |
| |
| result = run_protocol( |
| backend_name=args.backend, |
| enforced_backend_name=args.enforced_backend, |
| depth=args.depth, |
| threshold=args.threshold, |
| corpus_path=args.corpus, |
| signals=signals, |
| verbose=not args.quiet, |
| ) |
| |
| |
| os.makedirs(os.path.dirname(args.output) or '.', exist_ok=True) |
| with open(args.output, 'w') as f: |
| f.write(result.to_json()) |
| |
| print(f"\n✓ Receipt saved: {args.output}") |
|
|
|
|
| if __name__ == '__main__': |
| main() |
|
|