""" Tests básicos para Aliah-Plus """ import pytest import numpy as np import sys from pathlib import Path # Añadir src al path sys.path.insert(0, str(Path(__file__).parent.parent)) from src.face_processor import FaceProcessor from src.embedding_engine import EmbeddingEngine from src.comparator import FaceComparator from src.ocr_extractor import OCRExtractor from src.cross_referencer import CrossReferencer class TestFaceProcessor: """Tests para el procesador de rostros""" def test_initialization(self): """Verifica que FaceProcessor se inicializa correctamente""" processor = FaceProcessor() assert processor.detector is not None def test_align_face_no_face(self): """Verifica que retorna None cuando no hay rostro""" processor = FaceProcessor() # Imagen random sin rostro random_image = np.random.randint(0, 255, (200, 200, 3), dtype=np.uint8) result = processor.align_face(random_image) # Puede ser None o una imagen si MTCNN detecta algo por error assert result is None or isinstance(result, np.ndarray) class TestEmbeddingEngine: """Tests para el motor de embeddings""" def test_initialization(self): """Verifica inicialización con diferentes modelos""" engine = EmbeddingEngine(model="ArcFace") assert engine.model_name == "ArcFace" # Modelo no soportado debería usar ArcFace por defecto engine2 = EmbeddingEngine(model="InvalidModel") assert engine2.model_name == "ArcFace" def test_generate_embedding_shape(self): """Verifica que los embeddings tienen la forma correcta""" engine = EmbeddingEngine(model="ArcFace") # Crear rostro fake de 160x160 fake_face = np.random.randint(0, 255, (160, 160, 3), dtype=np.uint8) # Intentar generar embedding embedding = engine.generate_embedding(fake_face) # Si funciona, debería ser un array numpy if embedding is not None: assert isinstance(embedding, np.ndarray) assert len(embedding) > 0 class TestComparator: """Tests para el comparador de embeddings""" def test_initialization(self): """Verifica inicialización""" comparator = FaceComparator(threshold=0.75) assert comparator.threshold == 0.75 def test_calculate_similarity_identical(self): """Dos embeddings idénticos deben tener similitud 1.0""" comparator = FaceComparator() emb = np.random.rand(512) similarity = comparator.calculate_similarity(emb, emb) assert abs(similarity - 1.0) < 0.01 # Debe ser ~1.0 def test_verify_identity_levels(self): """Verifica los niveles de confianza""" comparator = FaceComparator() emb1 = np.random.rand(512) emb2 = np.random.rand(512) confidence, similarity = comparator.verify_identity(emb1, emb2) assert isinstance(confidence, str) assert 0.0 <= similarity <= 1.0 # Verificar categorías if similarity > 0.85: assert "Seguro" in confidence elif similarity > 0.72: assert "Probable" in confidence else: assert "Descartado" in confidence class TestOCRExtractor: """Tests para el extractor OCR""" def test_initialization(self): """Verifica inicialización""" # Sin GPU para tests ocr = OCRExtractor(gpu=False) assert ocr.reader is not None def test_clean_text(self): """Verifica limpieza de texto""" ocr = OCRExtractor(gpu=False) # Texto con errores comunes de OCR dirty = "example.c0m" clean = ocr._clean_text(dirty) assert clean == "example.com" def test_is_valid_domain(self): """Verifica validación de dominios""" ocr = OCRExtractor(gpu=False) assert ocr._is_valid_domain("example.com") == True assert ocr._is_valid_domain("onlyfans.com") == True assert ocr._is_valid_domain("invalid") == False assert ocr._is_valid_domain("no spaces.com") == False def test_preprocess_image(self): """Verifica que el preprocesamiento genera múltiples versiones""" ocr = OCRExtractor(gpu=False) # Imagen de prueba test_img = np.random.randint(0, 255, (100, 200, 3), dtype=np.uint8) processed = ocr.preprocess_image(test_img) # Debe generar 7 versiones assert len(processed) == 7 # Todas deben ser imágenes válidas for img in processed: assert isinstance(img, np.ndarray) assert len(img.shape) == 2 # Grayscale class TestCrossReferencer: """Tests para el cross-referencer""" def test_initialization(self): """Verifica inicialización""" xref = CrossReferencer(domain_similarity_threshold=0.85) assert xref.domain_threshold == 0.85 def test_normalize_domain(self): """Verifica normalización de dominios""" xref = CrossReferencer() # Diferentes formatos del mismo dominio assert xref.normalize_domain("www.example.com") == "example.com" assert xref.normalize_domain("EXAMPLE.COM") == "example.com" assert xref.normalize_domain("example.com:8080") == "example.com" assert xref.normalize_domain("m.example.com") == "example.com" def test_extract_domain_from_url(self): """Verifica extracción de dominio de URL""" xref = CrossReferencer() url = "https://www.example.com/path/to/page.html?query=1" domain = xref.extract_domain_from_url(url) assert domain == "example.com" def test_calculate_domain_similarity(self): """Verifica cálculo de similitud de dominios""" xref = CrossReferencer() # Dominios idénticos assert xref.calculate_domain_similarity("example.com", "example.com") == 1.0 # Dominios similares sim = xref.calculate_domain_similarity("example.com", "examples.com") assert 0.7 < sim < 1.0 # Dominios diferentes sim2 = xref.calculate_domain_similarity("example.com", "different.com") assert sim2 < 0.7 def test_deduplicate_results(self): """Verifica deduplicación de resultados""" xref = CrossReferencer() results = [ {'url': 'https://example.com/1.jpg'}, {'url': 'https://example.com/1.jpg'}, # Duplicado {'url': 'https://example.com/2.jpg'}, ] unique = xref.deduplicate_results(results) assert len(unique) == 2 class TestIntegration: """Tests de integración""" def test_full_pipeline_mock(self): """Test del pipeline completo con datos mock""" # 1. Procesar rostro processor = FaceProcessor() fake_image = np.random.randint(0, 255, (300, 300, 3), dtype=np.uint8) # 2. OCR ocr = OCRExtractor(gpu=False) # 3. Cross-referencer xref = CrossReferencer() # Datos mock yandex_results = [ {'url': 'https://example.com/photo.jpg', 'source': 'yandex'} ] ocr_domains = ['example.com'] # Cross-reference matches = xref.match_pimeyes_with_search( [], yandex_results, ocr_domains ) # Debe encontrar el match assert isinstance(matches, list) # Función para ejecutar tests def run_tests(): """Ejecuta todos los tests""" pytest.main([__file__, '-v']) if __name__ == "__main__": run_tests()