AiAnonymize_2 / core /span_resolver.py
Alessandro Tomassini
vs 2.0
fa1e652
"""Risolve overlap tra livelli NER/regex/GLiNER. Priorità decrescente, score per parità."""
import logging
from typing import List
from presidio_analyzer import RecognizerResult
logger = logging.getLogger(__name__)
def _spans_overlap(a: RecognizerResult, b: RecognizerResult) -> bool:
return a.start < b.end and b.start < a.end
def resolve_overlapping_spans(
results_by_priority: List[List[RecognizerResult]],
) -> List[RecognizerResult]:
"""
Merge senza overlap su lista di livelli ordinati per priorità decrescente.
Intra-layer: vince score DESC + lunghezza DESC.
Inter-layer: il livello più alto vince sempre.
Ogni risultato accettato riceve recognition_metadata['source_priority'].
"""
accepted: list[RecognizerResult] = []
for priority, layer in enumerate(results_by_priority):
if not layer:
continue
sorted_layer = sorted(layer, key=lambda r: (-r.score, -(r.end - r.start)))
for r in sorted_layer:
if any(_spans_overlap(r, a) for a in accepted):
logger.debug("[resolver] scartato %s @%d (priorità %d)", r.entity_type, r.start, priority)
continue
if r.recognition_metadata is None:
r.recognition_metadata = {}
r.recognition_metadata["source_priority"] = priority
accepted.append(r)
return sorted(accepted, key=lambda x: x.start)