import re PROTECTED_WORDS = ["Gérant", "Directrice", "Financière", "Architecte"] def simulate_filter(text, detected_entities): """Simule la logique de filtrage de main.py""" clean = [] for ent in detected_entities: detected_text = text[ent['start']:ent['end']] if any(pw.lower() in detected_text.lower() for pw in PROTECTED_WORDS): continue clean.append(ent) return clean def test_filter_logic(): text = "Jean-Pierre Moulin (Gérant) et Sophie Berthier (Directrice Financière)." # On simule ce que spaCy renvoie raw_detections = [ {'start': 0, 'end': 18, 'type': 'PERSON'}, {'start': 20, 'end': 26, 'type': 'ORG'}, # Gérant {'start': 31, 'end': 46, 'type': 'PERSON'}, {'start': 48, 'end': 70, 'type': 'ORG'} # Directrice Financière ] filtered = simulate_filter(text, raw_detections) print(f"\n--- Filter Logic Test ---") print(f"Before: {len(raw_detections)} entities") print(f"After: {len(filtered)} entities") # Seuls les noms doivent rester assert len(filtered) == 2 assert text[filtered[0]['start']:filtered[0]['end']] == "Jean-Pierre Moulin" print("✅ FILTER LOGIC SUCCESS: Protected words bypassed detections.") if __name__ == "__main__": test_filter_logic()