"""Risolve overlap tra livelli NER/regex/GLiNER. Priorità decrescente, score per parità.""" import logging from typing import List from presidio_analyzer import RecognizerResult logger = logging.getLogger(__name__) def _spans_overlap(a: RecognizerResult, b: RecognizerResult) -> bool: return a.start < b.end and b.start < a.end def resolve_overlapping_spans( results_by_priority: List[List[RecognizerResult]], ) -> List[RecognizerResult]: """ Merge senza overlap su lista di livelli ordinati per priorità decrescente. Intra-layer: vince score DESC + lunghezza DESC. Inter-layer: il livello più alto vince sempre. Ogni risultato accettato riceve recognition_metadata['source_priority']. """ accepted: list[RecognizerResult] = [] for priority, layer in enumerate(results_by_priority): if not layer: continue sorted_layer = sorted(layer, key=lambda r: (-r.score, -(r.end - r.start))) for r in sorted_layer: if any(_spans_overlap(r, a) for a in accepted): logger.debug("[resolver] scartato %s @%d (priorità %d)", r.entity_type, r.start, priority) continue if r.recognition_metadata is None: r.recognition_metadata = {} r.recognition_metadata["source_priority"] = priority accepted.append(r) return sorted(accepted, key=lambda x: x.start)