Spaces:
Sleeping
Sleeping
| """Risolve overlap tra livelli NER/regex/GLiNER. Priorità decrescente, score per parità.""" | |
| import logging | |
| from typing import List | |
| from presidio_analyzer import RecognizerResult | |
| logger = logging.getLogger(__name__) | |
| def _spans_overlap(a: RecognizerResult, b: RecognizerResult) -> bool: | |
| return a.start < b.end and b.start < a.end | |
| def resolve_overlapping_spans( | |
| results_by_priority: List[List[RecognizerResult]], | |
| ) -> List[RecognizerResult]: | |
| """ | |
| Merge senza overlap su lista di livelli ordinati per priorità decrescente. | |
| Intra-layer: vince score DESC + lunghezza DESC. | |
| Inter-layer: il livello più alto vince sempre. | |
| Ogni risultato accettato riceve recognition_metadata['source_priority']. | |
| """ | |
| accepted: list[RecognizerResult] = [] | |
| for priority, layer in enumerate(results_by_priority): | |
| if not layer: | |
| continue | |
| sorted_layer = sorted(layer, key=lambda r: (-r.score, -(r.end - r.start))) | |
| for r in sorted_layer: | |
| if any(_spans_overlap(r, a) for a in accepted): | |
| logger.debug("[resolver] scartato %s @%d (priorità %d)", r.entity_type, r.start, priority) | |
| continue | |
| if r.recognition_metadata is None: | |
| r.recognition_metadata = {} | |
| r.recognition_metadata["source_priority"] = priority | |
| accepted.append(r) | |
| return sorted(accepted, key=lambda x: x.start) | |