File size: 5,807 Bytes
19d2058
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
"""
enforcement.py — Commitment Conservation Gate

The gate is an architectural component, not a post-hoc patch.
It sits between the compressor output and the pipeline output.

Protocol:
  1. Extract commitments from ORIGINAL signal (once, at entry)
  2. Compress the signal
  3. Extract commitments from compressed output
  4. Score fidelity
  5. IF fidelity >= threshold: PASS (output compressed)
  6. IF fidelity < threshold AND retries remain:
     - Re-inject missing commitments into input
     - Re-compress (retry)
  7. IF retries exhausted: FALLBACK
     - Return best attempt seen so far
     - Log the failure

This is NOT "append missing text to the end."
That was the v1 bug. Appended text gets stripped on the next
compression cycle because the summarizer treats it as low-salience.

Instead: re-inject commitments into the INPUT before re-compression,
structured as high-salience prefix. The compressor sees them as
the most important content on retry.
"""

from typing import Set, Optional, Tuple
from dataclasses import dataclass, field
from .extraction import extract_commitment_texts
from .fidelity import fidelity_score, fidelity_breakdown
from .compression import CompressionBackend


@dataclass
class GateResult:
    """Result of passing a signal through the commitment gate."""
    output: str                         # The final compressed text
    passed: bool                        # Whether fidelity threshold was met
    fidelity: float                     # Final fidelity score
    fidelity_detail: dict               # Component scores
    attempts: int                       # Number of compression attempts
    original_commitments: Set[str]      # Commitments from original signal
    output_commitments: Set[str]        # Commitments in final output
    missing_commitments: Set[str]       # Commitments that were lost


class CommitmentGate:
    """
    Commitment conservation gate.
    
    Wraps a compression backend and enforces commitment preservation
    through a reject-and-retry loop with structured re-injection.
    """
    
    def __init__(
        self,
        backend: CompressionBackend,
        threshold: float = 0.6,
        max_retries: int = 3,
    ):
        """
        Args:
            backend: The compression backend to wrap
            threshold: Minimum fidelity score to pass (0.0 to 1.0)
            max_retries: Maximum re-injection attempts before fallback
        """
        self.backend = backend
        self.threshold = threshold
        self.max_retries = max_retries
    
    def compress(
        self,
        text: str,
        original_commitments: Set[str],
        target_ratio: float = 0.5,
    ) -> GateResult:
        """
        Compress text through the commitment gate.
        
        Args:
            text: Text to compress (may be original or already-processed)
            original_commitments: The commitments that MUST be preserved
                                  (extracted once from the original signal)
            target_ratio: Compression target
        
        Returns:
            GateResult with output text, pass/fail, fidelity scores
        """
        best_output = text
        best_fidelity = 0.0
        best_detail = {}
        
        current_input = text
        
        for attempt in range(1, self.max_retries + 1):
            # Compress
            compressed = self.backend.compress(current_input, target_ratio)
            
            # Extract and score
            output_commitments = extract_commitment_texts(compressed)
            detail = fidelity_breakdown(original_commitments, output_commitments)
            score = detail['min_aggregated']
            
            # Track best
            if score > best_fidelity:
                best_output = compressed
                best_fidelity = score
                best_detail = detail
            
            # Check threshold
            if score >= self.threshold:
                return GateResult(
                    output=compressed,
                    passed=True,
                    fidelity=score,
                    fidelity_detail=detail,
                    attempts=attempt,
                    original_commitments=original_commitments,
                    output_commitments=output_commitments,
                    missing_commitments=original_commitments - output_commitments,
                )
            
            # Re-inject: structure missing commitments as high-salience prefix
            missing = original_commitments - output_commitments
            if missing and attempt < self.max_retries:
                # Format missing commitments as explicit constraints
                # Placing them FIRST makes them highest salience for the compressor
                constraint_block = '. '.join(sorted(missing)) + '. '
                current_input = constraint_block + compressed
            else:
                # No missing or last attempt — can't improve
                break
        
        # Fallback: return best attempt
        output_commitments = extract_commitment_texts(best_output)
        return GateResult(
            output=best_output,
            passed=False,
            fidelity=best_fidelity,
            fidelity_detail=best_detail,
            attempts=min(attempt, self.max_retries),
            original_commitments=original_commitments,
            output_commitments=output_commitments,
            missing_commitments=original_commitments - output_commitments,
        )


def baseline_compress(
    backend: CompressionBackend,
    text: str,
    target_ratio: float = 0.5,
) -> str:
    """
    Baseline compression — no gate, no enforcement.
    Just compress and return whatever comes out.
    """
    return backend.compress(text, target_ratio)