| """ |
| PipelineContext — Shared state for a single /api/analyze request. |
| |
| INVARIANTS: |
| 1. original_text is IMMUTABLE after construction |
| 2. _offset_mappers is APPEND-ONLY — past mappers never mutated or removed |
| 3. map_to_original() is READ-ONLY and deterministic |
| 4. All coordinate transforms go through OffsetMapper public API |
| |
| TERMINOLOGY: |
| original_text = ORIGINAL_TEXT (user's raw input, immutable) |
| current_text = CURRENT_TEXT (pipeline working copy, mutated per stage) |
| |
| # FUTURE: If pipeline grows beyond 5 stages, the linear reverse_map_offset() |
| # chain in map_to_original() could be replaced with a precomputed cumulative |
| # mapping or segment-tree composition. Not needed for 3 stages. |
| """ |
| import logging |
| from nlp.correction_patch import PatchSet, CorrectionPatch, PRIORITY |
| from nlp.stage_locker import StageLocker, PIPELINE_DEBUG |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| class PipelineContext: |
| """ |
| Shared state object for a single /api/analyze request. |
| Carries all pipeline state through Spelling → Grammar → Punctuation. |
| Discarded after the response is sent. |
| """ |
|
|
| def __init__(self, original_text: str): |
| self.original_text: str = original_text |
| self.current_text: str = original_text |
| self.patches: PatchSet = PatchSet() |
| self._offset_mappers: list = [] |
| self.stage_locker: StageLocker = StageLocker() |
|
|
| def map_to_original(self, start: int, end: int) -> tuple: |
| """ |
| Map CURRENT_TEXT coords → ORIGINAL_TEXT coords. |
| |
| Walks the OffsetMapper chain in REVERSE, calling |
| reverse_map_offset() on each mapper. |
| |
| READ-ONLY: does not mutate any state. |
| DETERMINISTIC: same inputs always produce same outputs. |
| """ |
| curr_start, curr_end = start, end |
| for mapper in reversed(self._offset_mappers): |
| curr_start = mapper.reverse_map_offset(curr_start, is_end=False) |
| curr_end = mapper.reverse_map_offset(curr_end, is_end=True) |
| return curr_start, curr_end |
|
|
| def add_patch(self, stage: str, start_current: int, end_current: int, |
| replacement: str, confidence: float = 1.0, |
| alternatives: list = None): |
| """ |
| Create a CorrectionPatch with BOTH coordinate spaces populated. |
| |
| CURRENT coords: from the diff (for StageLocker) |
| ORIGINAL coords: computed via reverse mapper chain (for API + overlap resolution) |
| """ |
| start_orig, end_orig = self.map_to_original(start_current, end_current) |
| text_len = len(self.original_text) |
| start_orig = max(0, min(start_orig, text_len)) |
| end_orig = max(start_orig, min(end_orig, text_len)) |
|
|
| patch = CorrectionPatch( |
| stage=stage, |
| start_original=start_orig, |
| end_original=end_orig, |
| start_current=start_current, |
| end_current=end_current, |
| original=self.original_text[start_orig:end_orig], |
| replacement=replacement, |
| priority=PRIORITY.get(stage, 0), |
| confidence=confidence, |
| locked=True, |
| alternatives=alternatives or [], |
| ) |
| self.patches.add(patch) |
|
|
| |
| self.stage_locker.lock(start_current, end_current, stage) |
|
|
| if PIPELINE_DEBUG: |
| logger.debug( |
| f"[PipelineContext] PATCH stage={stage} " |
| f"CURRENT=[{start_current}:{end_current}] " |
| f"ORIGINAL=[{start_orig}:{end_orig}] " |
| f"'{patch.original}' → '{replacement}'" |
| ) |
| return patch |
|
|
| def mutate_text(self, text_after: str, OffsetMapperClass): |
| """ |
| Update CURRENT_TEXT after a stage produces a new version. |
| |
| 1. Creates OffsetMapper(text_before=current_text, text_after=new) |
| 2. APPENDS to mapper chain (append-only invariant) |
| 3. Updates StageLocker via mapper.forward_map_range() |
| 4. Sets current_text = text_after |
| """ |
| if text_after == self.current_text: |
| return |
|
|
| mapper = OffsetMapperClass(self.current_text, text_after) |
| self._offset_mappers.append(mapper) |
|
|
| |
| self.stage_locker.update_via_mapper(mapper) |
|
|
| if PIPELINE_DEBUG: |
| logger.debug( |
| f"[PipelineContext] MUTATE len={len(self.current_text)} → {len(text_after)} " |
| f"mappers={len(self._offset_mappers)}" |
| ) |
| self.current_text = text_after |
|
|