File size: 5,807 Bytes

19d2058

"""
enforcement.py — Commitment Conservation Gate

The gate is an architectural component, not a post-hoc patch.
It sits between the compressor output and the pipeline output.

Protocol:
  1. Extract commitments from ORIGINAL signal (once, at entry)
  2. Compress the signal
  3. Extract commitments from compressed output
  4. Score fidelity
  5. IF fidelity >= threshold: PASS (output compressed)
  6. IF fidelity < threshold AND retries remain:
     - Re-inject missing commitments into input
     - Re-compress (retry)
  7. IF retries exhausted: FALLBACK
     - Return best attempt seen so far
     - Log the failure

This is NOT "append missing text to the end."
That was the v1 bug. Appended text gets stripped on the next
compression cycle because the summarizer treats it as low-salience.

Instead: re-inject commitments into the INPUT before re-compression,
structured as high-salience prefix. The compressor sees them as
the most important content on retry.
"""

from typing import Set, Optional, Tuple
from dataclasses import dataclass, field
from .extraction import extract_commitment_texts
from .fidelity import fidelity_score, fidelity_breakdown
from .compression import CompressionBackend


@dataclass
class GateResult:
    """Result of passing a signal through the commitment gate."""
    output: str                         # The final compressed text
    passed: bool                        # Whether fidelity threshold was met
    fidelity: float                     # Final fidelity score
    fidelity_detail: dict               # Component scores
    attempts: int                       # Number of compression attempts
    original_commitments: Set[str]      # Commitments from original signal
    output_commitments: Set[str]        # Commitments in final output
    missing_commitments: Set[str]       # Commitments that were lost


class CommitmentGate:
    """
    Commitment conservation gate.
    
    Wraps a compression backend and enforces commitment preservation
    through a reject-and-retry loop with structured re-injection.
    """
    
    def __init__(
        self,
        backend: CompressionBackend,
        threshold: float = 0.6,
        max_retries: int = 3,
    ):
        """
        Args:
            backend: The compression backend to wrap
            threshold: Minimum fidelity score to pass (0.0 to 1.0)
            max_retries: Maximum re-injection attempts before fallback
        """
        self.backend = backend
        self.threshold = threshold
        self.max_retries = max_retries
    
    def compress(
        self,
        text: str,
        original_commitments: Set[str],
        target_ratio: float = 0.5,
    ) -> GateResult:
        """
        Compress text through the commitment gate.
        
        Args:
            text: Text to compress (may be original or already-processed)
            original_commitments: The commitments that MUST be preserved
                                  (extracted once from the original signal)
            target_ratio: Compression target
        
        Returns:
            GateResult with output text, pass/fail, fidelity scores
        """
        best_output = text
        best_fidelity = 0.0
        best_detail = {}
        
        current_input = text
        
        for attempt in range(1, self.max_retries + 1):
            # Compress
            compressed = self.backend.compress(current_input, target_ratio)
            
            # Extract and score
            output_commitments = extract_commitment_texts(compressed)
            detail = fidelity_breakdown(original_commitments, output_commitments)
            score = detail['min_aggregated']
            
            # Track best
            if score > best_fidelity:
                best_output = compressed
                best_fidelity = score
                best_detail = detail
            
            # Check threshold
            if score >= self.threshold:
                return GateResult(
                    output=compressed,
                    passed=True,
                    fidelity=score,
                    fidelity_detail=detail,
                    attempts=attempt,
                    original_commitments=original_commitments,
                    output_commitments=output_commitments,
                    missing_commitments=original_commitments - output_commitments,
                )
            
            # Re-inject: structure missing commitments as high-salience prefix
            missing = original_commitments - output_commitments
            if missing and attempt < self.max_retries:
                # Format missing commitments as explicit constraints
                # Placing them FIRST makes them highest salience for the compressor
                constraint_block = '. '.join(sorted(missing)) + '. '
                current_input = constraint_block + compressed
            else:
                # No missing or last attempt — can't improve
                break
        
        # Fallback: return best attempt
        output_commitments = extract_commitment_texts(best_output)
        return GateResult(
            output=best_output,
            passed=False,
            fidelity=best_fidelity,
            fidelity_detail=best_detail,
            attempts=min(attempt, self.max_retries),
            original_commitments=original_commitments,
            output_commitments=output_commitments,
            missing_commitments=original_commitments - output_commitments,
        )


def baseline_compress(
    backend: CompressionBackend,
    text: str,
    target_ratio: float = 0.5,
) -> str:
    """
    Baseline compression — no gate, no enforcement.
    Just compress and return whatever comes out.
    """
    return backend.compress(text, target_ratio)