File size: 1,337 Bytes
17ac484
 
6f25cc6
17ac484
6f25cc6
 
 
 
 
 
 
 
 
17ac484
6f25cc6
 
17ac484
6f25cc6
 
 
 
 
 
17ac484
6f25cc6
 
 
 
 
 
 
 
 
 
 
 
17ac484
 
6f25cc6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import re

PROTECTED_WORDS = ["Gérant", "Directrice", "Financière", "Architecte"]

def simulate_filter(text, detected_entities):
    """Simule la logique de filtrage de main.py"""
    clean = []
    for ent in detected_entities:
        detected_text = text[ent['start']:ent['end']]
        if any(pw.lower() in detected_text.lower() for pw in PROTECTED_WORDS):
            continue
        clean.append(ent)
    return clean

def test_filter_logic():
    text = "Jean-Pierre Moulin (Gérant) et Sophie Berthier (Directrice Financière)."
    
    # On simule ce que spaCy renvoie
    raw_detections = [
        {'start': 0, 'end': 18, 'type': 'PERSON'},
        {'start': 20, 'end': 26, 'type': 'ORG'}, # Gérant
        {'start': 31, 'end': 46, 'type': 'PERSON'},
        {'start': 48, 'end': 70, 'type': 'ORG'}  # Directrice Financière
    ]
    
    filtered = simulate_filter(text, raw_detections)
    
    print(f"\n--- Filter Logic Test ---")
    print(f"Before: {len(raw_detections)} entities")
    print(f"After: {len(filtered)} entities")
    
    # Seuls les noms doivent rester
    assert len(filtered) == 2
    assert text[filtered[0]['start']:filtered[0]['end']] == "Jean-Pierre Moulin"
    
    print("✅ FILTER LOGIC SUCCESS: Protected words bypassed detections.")

if __name__ == "__main__":
    test_filter_logic()