burnmydays's picture
Deploy harness v2 to root for HuggingFace Space
19d2058
"""
enforcement.py — Commitment Conservation Gate
The gate is an architectural component, not a post-hoc patch.
It sits between the compressor output and the pipeline output.
Protocol:
1. Extract commitments from ORIGINAL signal (once, at entry)
2. Compress the signal
3. Extract commitments from compressed output
4. Score fidelity
5. IF fidelity >= threshold: PASS (output compressed)
6. IF fidelity < threshold AND retries remain:
- Re-inject missing commitments into input
- Re-compress (retry)
7. IF retries exhausted: FALLBACK
- Return best attempt seen so far
- Log the failure
This is NOT "append missing text to the end."
That was the v1 bug. Appended text gets stripped on the next
compression cycle because the summarizer treats it as low-salience.
Instead: re-inject commitments into the INPUT before re-compression,
structured as high-salience prefix. The compressor sees them as
the most important content on retry.
"""
from typing import Set, Optional, Tuple
from dataclasses import dataclass, field
from .extraction import extract_commitment_texts
from .fidelity import fidelity_score, fidelity_breakdown
from .compression import CompressionBackend
@dataclass
class GateResult:
"""Result of passing a signal through the commitment gate."""
output: str # The final compressed text
passed: bool # Whether fidelity threshold was met
fidelity: float # Final fidelity score
fidelity_detail: dict # Component scores
attempts: int # Number of compression attempts
original_commitments: Set[str] # Commitments from original signal
output_commitments: Set[str] # Commitments in final output
missing_commitments: Set[str] # Commitments that were lost
class CommitmentGate:
"""
Commitment conservation gate.
Wraps a compression backend and enforces commitment preservation
through a reject-and-retry loop with structured re-injection.
"""
def __init__(
self,
backend: CompressionBackend,
threshold: float = 0.6,
max_retries: int = 3,
):
"""
Args:
backend: The compression backend to wrap
threshold: Minimum fidelity score to pass (0.0 to 1.0)
max_retries: Maximum re-injection attempts before fallback
"""
self.backend = backend
self.threshold = threshold
self.max_retries = max_retries
def compress(
self,
text: str,
original_commitments: Set[str],
target_ratio: float = 0.5,
) -> GateResult:
"""
Compress text through the commitment gate.
Args:
text: Text to compress (may be original or already-processed)
original_commitments: The commitments that MUST be preserved
(extracted once from the original signal)
target_ratio: Compression target
Returns:
GateResult with output text, pass/fail, fidelity scores
"""
best_output = text
best_fidelity = 0.0
best_detail = {}
current_input = text
for attempt in range(1, self.max_retries + 1):
# Compress
compressed = self.backend.compress(current_input, target_ratio)
# Extract and score
output_commitments = extract_commitment_texts(compressed)
detail = fidelity_breakdown(original_commitments, output_commitments)
score = detail['min_aggregated']
# Track best
if score > best_fidelity:
best_output = compressed
best_fidelity = score
best_detail = detail
# Check threshold
if score >= self.threshold:
return GateResult(
output=compressed,
passed=True,
fidelity=score,
fidelity_detail=detail,
attempts=attempt,
original_commitments=original_commitments,
output_commitments=output_commitments,
missing_commitments=original_commitments - output_commitments,
)
# Re-inject: structure missing commitments as high-salience prefix
missing = original_commitments - output_commitments
if missing and attempt < self.max_retries:
# Format missing commitments as explicit constraints
# Placing them FIRST makes them highest salience for the compressor
constraint_block = '. '.join(sorted(missing)) + '. '
current_input = constraint_block + compressed
else:
# No missing or last attempt — can't improve
break
# Fallback: return best attempt
output_commitments = extract_commitment_texts(best_output)
return GateResult(
output=best_output,
passed=False,
fidelity=best_fidelity,
fidelity_detail=best_detail,
attempts=min(attempt, self.max_retries),
original_commitments=original_commitments,
output_commitments=output_commitments,
missing_commitments=original_commitments - output_commitments,
)
def baseline_compress(
backend: CompressionBackend,
text: str,
target_ratio: float = 0.5,
) -> str:
"""
Baseline compression — no gate, no enforcement.
Just compress and return whatever comes out.
"""
return backend.compress(text, target_ratio)