J
File size: 7,930 Bytes
85fa7d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
"""
Tests b谩sicos para Aliah-Plus
"""

import pytest
import numpy as np
import sys
from pathlib import Path

# A帽adir src al path
sys.path.insert(0, str(Path(__file__).parent.parent))

from src.face_processor import FaceProcessor
from src.embedding_engine import EmbeddingEngine
from src.comparator import FaceComparator
from src.ocr_extractor import OCRExtractor
from src.cross_referencer import CrossReferencer


class TestFaceProcessor:
    """Tests para el procesador de rostros"""
    
    def test_initialization(self):
        """Verifica que FaceProcessor se inicializa correctamente"""
        processor = FaceProcessor()
        assert processor.detector is not None
    
    def test_align_face_no_face(self):
        """Verifica que retorna None cuando no hay rostro"""
        processor = FaceProcessor()
        # Imagen random sin rostro
        random_image = np.random.randint(0, 255, (200, 200, 3), dtype=np.uint8)
        result = processor.align_face(random_image)
        # Puede ser None o una imagen si MTCNN detecta algo por error
        assert result is None or isinstance(result, np.ndarray)


class TestEmbeddingEngine:
    """Tests para el motor de embeddings"""
    
    def test_initialization(self):
        """Verifica inicializaci贸n con diferentes modelos"""
        engine = EmbeddingEngine(model="ArcFace")
        assert engine.model_name == "ArcFace"
        
        # Modelo no soportado deber铆a usar ArcFace por defecto
        engine2 = EmbeddingEngine(model="InvalidModel")
        assert engine2.model_name == "ArcFace"
    
    def test_generate_embedding_shape(self):
        """Verifica que los embeddings tienen la forma correcta"""
        engine = EmbeddingEngine(model="ArcFace")
        
        # Crear rostro fake de 160x160
        fake_face = np.random.randint(0, 255, (160, 160, 3), dtype=np.uint8)
        
        # Intentar generar embedding
        embedding = engine.generate_embedding(fake_face)
        
        # Si funciona, deber铆a ser un array numpy
        if embedding is not None:
            assert isinstance(embedding, np.ndarray)
            assert len(embedding) > 0


class TestComparator:
    """Tests para el comparador de embeddings"""
    
    def test_initialization(self):
        """Verifica inicializaci贸n"""
        comparator = FaceComparator(threshold=0.75)
        assert comparator.threshold == 0.75
    
    def test_calculate_similarity_identical(self):
        """Dos embeddings id茅nticos deben tener similitud 1.0"""
        comparator = FaceComparator()
        
        emb = np.random.rand(512)
        similarity = comparator.calculate_similarity(emb, emb)
        
        assert abs(similarity - 1.0) < 0.01  # Debe ser ~1.0
    
    def test_verify_identity_levels(self):
        """Verifica los niveles de confianza"""
        comparator = FaceComparator()
        
        emb1 = np.random.rand(512)
        emb2 = np.random.rand(512)
        
        confidence, similarity = comparator.verify_identity(emb1, emb2)
        
        assert isinstance(confidence, str)
        assert 0.0 <= similarity <= 1.0
        
        # Verificar categor铆as
        if similarity > 0.85:
            assert "Seguro" in confidence
        elif similarity > 0.72:
            assert "Probable" in confidence
        else:
            assert "Descartado" in confidence


class TestOCRExtractor:
    """Tests para el extractor OCR"""
    
    def test_initialization(self):
        """Verifica inicializaci贸n"""
        # Sin GPU para tests
        ocr = OCRExtractor(gpu=False)
        assert ocr.reader is not None
    
    def test_clean_text(self):
        """Verifica limpieza de texto"""
        ocr = OCRExtractor(gpu=False)
        
        # Texto con errores comunes de OCR
        dirty = "example.c0m"
        clean = ocr._clean_text(dirty)
        
        assert clean == "example.com"
    
    def test_is_valid_domain(self):
        """Verifica validaci贸n de dominios"""
        ocr = OCRExtractor(gpu=False)
        
        assert ocr._is_valid_domain("example.com") == True
        assert ocr._is_valid_domain("onlyfans.com") == True
        assert ocr._is_valid_domain("invalid") == False
        assert ocr._is_valid_domain("no spaces.com") == False
    
    def test_preprocess_image(self):
        """Verifica que el preprocesamiento genera m煤ltiples versiones"""
        ocr = OCRExtractor(gpu=False)
        
        # Imagen de prueba
        test_img = np.random.randint(0, 255, (100, 200, 3), dtype=np.uint8)
        
        processed = ocr.preprocess_image(test_img)
        
        # Debe generar 7 versiones
        assert len(processed) == 7
        
        # Todas deben ser im谩genes v谩lidas
        for img in processed:
            assert isinstance(img, np.ndarray)
            assert len(img.shape) == 2  # Grayscale


class TestCrossReferencer:
    """Tests para el cross-referencer"""
    
    def test_initialization(self):
        """Verifica inicializaci贸n"""
        xref = CrossReferencer(domain_similarity_threshold=0.85)
        assert xref.domain_threshold == 0.85
    
    def test_normalize_domain(self):
        """Verifica normalizaci贸n de dominios"""
        xref = CrossReferencer()
        
        # Diferentes formatos del mismo dominio
        assert xref.normalize_domain("www.example.com") == "example.com"
        assert xref.normalize_domain("EXAMPLE.COM") == "example.com"
        assert xref.normalize_domain("example.com:8080") == "example.com"
        assert xref.normalize_domain("m.example.com") == "example.com"
    
    def test_extract_domain_from_url(self):
        """Verifica extracci贸n de dominio de URL"""
        xref = CrossReferencer()
        
        url = "https://www.example.com/path/to/page.html?query=1"
        domain = xref.extract_domain_from_url(url)
        
        assert domain == "example.com"
    
    def test_calculate_domain_similarity(self):
        """Verifica c谩lculo de similitud de dominios"""
        xref = CrossReferencer()
        
        # Dominios id茅nticos
        assert xref.calculate_domain_similarity("example.com", "example.com") == 1.0
        
        # Dominios similares
        sim = xref.calculate_domain_similarity("example.com", "examples.com")
        assert 0.7 < sim < 1.0
        
        # Dominios diferentes
        sim2 = xref.calculate_domain_similarity("example.com", "different.com")
        assert sim2 < 0.7
    
    def test_deduplicate_results(self):
        """Verifica deduplicaci贸n de resultados"""
        xref = CrossReferencer()
        
        results = [
            {'url': 'https://example.com/1.jpg'},
            {'url': 'https://example.com/1.jpg'},  # Duplicado
            {'url': 'https://example.com/2.jpg'},
        ]
        
        unique = xref.deduplicate_results(results)
        
        assert len(unique) == 2


class TestIntegration:
    """Tests de integraci贸n"""
    
    def test_full_pipeline_mock(self):
        """Test del pipeline completo con datos mock"""
        
        # 1. Procesar rostro
        processor = FaceProcessor()
        fake_image = np.random.randint(0, 255, (300, 300, 3), dtype=np.uint8)
        
        # 2. OCR
        ocr = OCRExtractor(gpu=False)
        
        # 3. Cross-referencer
        xref = CrossReferencer()
        
        # Datos mock
        yandex_results = [
            {'url': 'https://example.com/photo.jpg', 'source': 'yandex'}
        ]
        
        ocr_domains = ['example.com']
        
        # Cross-reference
        matches = xref.match_pimeyes_with_search(
            [],
            yandex_results,
            ocr_domains
        )
        
        # Debe encontrar el match
        assert isinstance(matches, list)


# Funci贸n para ejecutar tests
def run_tests():
    """Ejecuta todos los tests"""
    pytest.main([__file__, '-v'])


if __name__ == "__main__":
    run_tests()