J / src /test_basic.py
Andro0s's picture
Upload 13 files
85fa7d2 verified
"""
Tests b谩sicos para Aliah-Plus
"""
import pytest
import numpy as np
import sys
from pathlib import Path
# A帽adir src al path
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.face_processor import FaceProcessor
from src.embedding_engine import EmbeddingEngine
from src.comparator import FaceComparator
from src.ocr_extractor import OCRExtractor
from src.cross_referencer import CrossReferencer
class TestFaceProcessor:
"""Tests para el procesador de rostros"""
def test_initialization(self):
"""Verifica que FaceProcessor se inicializa correctamente"""
processor = FaceProcessor()
assert processor.detector is not None
def test_align_face_no_face(self):
"""Verifica que retorna None cuando no hay rostro"""
processor = FaceProcessor()
# Imagen random sin rostro
random_image = np.random.randint(0, 255, (200, 200, 3), dtype=np.uint8)
result = processor.align_face(random_image)
# Puede ser None o una imagen si MTCNN detecta algo por error
assert result is None or isinstance(result, np.ndarray)
class TestEmbeddingEngine:
"""Tests para el motor de embeddings"""
def test_initialization(self):
"""Verifica inicializaci贸n con diferentes modelos"""
engine = EmbeddingEngine(model="ArcFace")
assert engine.model_name == "ArcFace"
# Modelo no soportado deber铆a usar ArcFace por defecto
engine2 = EmbeddingEngine(model="InvalidModel")
assert engine2.model_name == "ArcFace"
def test_generate_embedding_shape(self):
"""Verifica que los embeddings tienen la forma correcta"""
engine = EmbeddingEngine(model="ArcFace")
# Crear rostro fake de 160x160
fake_face = np.random.randint(0, 255, (160, 160, 3), dtype=np.uint8)
# Intentar generar embedding
embedding = engine.generate_embedding(fake_face)
# Si funciona, deber铆a ser un array numpy
if embedding is not None:
assert isinstance(embedding, np.ndarray)
assert len(embedding) > 0
class TestComparator:
"""Tests para el comparador de embeddings"""
def test_initialization(self):
"""Verifica inicializaci贸n"""
comparator = FaceComparator(threshold=0.75)
assert comparator.threshold == 0.75
def test_calculate_similarity_identical(self):
"""Dos embeddings id茅nticos deben tener similitud 1.0"""
comparator = FaceComparator()
emb = np.random.rand(512)
similarity = comparator.calculate_similarity(emb, emb)
assert abs(similarity - 1.0) < 0.01 # Debe ser ~1.0
def test_verify_identity_levels(self):
"""Verifica los niveles de confianza"""
comparator = FaceComparator()
emb1 = np.random.rand(512)
emb2 = np.random.rand(512)
confidence, similarity = comparator.verify_identity(emb1, emb2)
assert isinstance(confidence, str)
assert 0.0 <= similarity <= 1.0
# Verificar categor铆as
if similarity > 0.85:
assert "Seguro" in confidence
elif similarity > 0.72:
assert "Probable" in confidence
else:
assert "Descartado" in confidence
class TestOCRExtractor:
"""Tests para el extractor OCR"""
def test_initialization(self):
"""Verifica inicializaci贸n"""
# Sin GPU para tests
ocr = OCRExtractor(gpu=False)
assert ocr.reader is not None
def test_clean_text(self):
"""Verifica limpieza de texto"""
ocr = OCRExtractor(gpu=False)
# Texto con errores comunes de OCR
dirty = "example.c0m"
clean = ocr._clean_text(dirty)
assert clean == "example.com"
def test_is_valid_domain(self):
"""Verifica validaci贸n de dominios"""
ocr = OCRExtractor(gpu=False)
assert ocr._is_valid_domain("example.com") == True
assert ocr._is_valid_domain("onlyfans.com") == True
assert ocr._is_valid_domain("invalid") == False
assert ocr._is_valid_domain("no spaces.com") == False
def test_preprocess_image(self):
"""Verifica que el preprocesamiento genera m煤ltiples versiones"""
ocr = OCRExtractor(gpu=False)
# Imagen de prueba
test_img = np.random.randint(0, 255, (100, 200, 3), dtype=np.uint8)
processed = ocr.preprocess_image(test_img)
# Debe generar 7 versiones
assert len(processed) == 7
# Todas deben ser im谩genes v谩lidas
for img in processed:
assert isinstance(img, np.ndarray)
assert len(img.shape) == 2 # Grayscale
class TestCrossReferencer:
"""Tests para el cross-referencer"""
def test_initialization(self):
"""Verifica inicializaci贸n"""
xref = CrossReferencer(domain_similarity_threshold=0.85)
assert xref.domain_threshold == 0.85
def test_normalize_domain(self):
"""Verifica normalizaci贸n de dominios"""
xref = CrossReferencer()
# Diferentes formatos del mismo dominio
assert xref.normalize_domain("www.example.com") == "example.com"
assert xref.normalize_domain("EXAMPLE.COM") == "example.com"
assert xref.normalize_domain("example.com:8080") == "example.com"
assert xref.normalize_domain("m.example.com") == "example.com"
def test_extract_domain_from_url(self):
"""Verifica extracci贸n de dominio de URL"""
xref = CrossReferencer()
url = "https://www.example.com/path/to/page.html?query=1"
domain = xref.extract_domain_from_url(url)
assert domain == "example.com"
def test_calculate_domain_similarity(self):
"""Verifica c谩lculo de similitud de dominios"""
xref = CrossReferencer()
# Dominios id茅nticos
assert xref.calculate_domain_similarity("example.com", "example.com") == 1.0
# Dominios similares
sim = xref.calculate_domain_similarity("example.com", "examples.com")
assert 0.7 < sim < 1.0
# Dominios diferentes
sim2 = xref.calculate_domain_similarity("example.com", "different.com")
assert sim2 < 0.7
def test_deduplicate_results(self):
"""Verifica deduplicaci贸n de resultados"""
xref = CrossReferencer()
results = [
{'url': 'https://example.com/1.jpg'},
{'url': 'https://example.com/1.jpg'}, # Duplicado
{'url': 'https://example.com/2.jpg'},
]
unique = xref.deduplicate_results(results)
assert len(unique) == 2
class TestIntegration:
"""Tests de integraci贸n"""
def test_full_pipeline_mock(self):
"""Test del pipeline completo con datos mock"""
# 1. Procesar rostro
processor = FaceProcessor()
fake_image = np.random.randint(0, 255, (300, 300, 3), dtype=np.uint8)
# 2. OCR
ocr = OCRExtractor(gpu=False)
# 3. Cross-referencer
xref = CrossReferencer()
# Datos mock
yandex_results = [
{'url': 'https://example.com/photo.jpg', 'source': 'yandex'}
]
ocr_domains = ['example.com']
# Cross-reference
matches = xref.match_pimeyes_with_search(
[],
yandex_results,
ocr_domains
)
# Debe encontrar el match
assert isinstance(matches, list)
# Funci贸n para ejecutar tests
def run_tests():
"""Ejecuta todos los tests"""
pytest.main([__file__, '-v'])
if __name__ == "__main__":
run_tests()