|
|
""" |
|
|
Tests b谩sicos para Aliah-Plus |
|
|
""" |
|
|
|
|
|
import pytest |
|
|
import numpy as np |
|
|
import sys |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent)) |
|
|
|
|
|
from src.face_processor import FaceProcessor |
|
|
from src.embedding_engine import EmbeddingEngine |
|
|
from src.comparator import FaceComparator |
|
|
from src.ocr_extractor import OCRExtractor |
|
|
from src.cross_referencer import CrossReferencer |
|
|
|
|
|
|
|
|
class TestFaceProcessor: |
|
|
"""Tests para el procesador de rostros""" |
|
|
|
|
|
def test_initialization(self): |
|
|
"""Verifica que FaceProcessor se inicializa correctamente""" |
|
|
processor = FaceProcessor() |
|
|
assert processor.detector is not None |
|
|
|
|
|
def test_align_face_no_face(self): |
|
|
"""Verifica que retorna None cuando no hay rostro""" |
|
|
processor = FaceProcessor() |
|
|
|
|
|
random_image = np.random.randint(0, 255, (200, 200, 3), dtype=np.uint8) |
|
|
result = processor.align_face(random_image) |
|
|
|
|
|
assert result is None or isinstance(result, np.ndarray) |
|
|
|
|
|
|
|
|
class TestEmbeddingEngine: |
|
|
"""Tests para el motor de embeddings""" |
|
|
|
|
|
def test_initialization(self): |
|
|
"""Verifica inicializaci贸n con diferentes modelos""" |
|
|
engine = EmbeddingEngine(model="ArcFace") |
|
|
assert engine.model_name == "ArcFace" |
|
|
|
|
|
|
|
|
engine2 = EmbeddingEngine(model="InvalidModel") |
|
|
assert engine2.model_name == "ArcFace" |
|
|
|
|
|
def test_generate_embedding_shape(self): |
|
|
"""Verifica que los embeddings tienen la forma correcta""" |
|
|
engine = EmbeddingEngine(model="ArcFace") |
|
|
|
|
|
|
|
|
fake_face = np.random.randint(0, 255, (160, 160, 3), dtype=np.uint8) |
|
|
|
|
|
|
|
|
embedding = engine.generate_embedding(fake_face) |
|
|
|
|
|
|
|
|
if embedding is not None: |
|
|
assert isinstance(embedding, np.ndarray) |
|
|
assert len(embedding) > 0 |
|
|
|
|
|
|
|
|
class TestComparator: |
|
|
"""Tests para el comparador de embeddings""" |
|
|
|
|
|
def test_initialization(self): |
|
|
"""Verifica inicializaci贸n""" |
|
|
comparator = FaceComparator(threshold=0.75) |
|
|
assert comparator.threshold == 0.75 |
|
|
|
|
|
def test_calculate_similarity_identical(self): |
|
|
"""Dos embeddings id茅nticos deben tener similitud 1.0""" |
|
|
comparator = FaceComparator() |
|
|
|
|
|
emb = np.random.rand(512) |
|
|
similarity = comparator.calculate_similarity(emb, emb) |
|
|
|
|
|
assert abs(similarity - 1.0) < 0.01 |
|
|
|
|
|
def test_verify_identity_levels(self): |
|
|
"""Verifica los niveles de confianza""" |
|
|
comparator = FaceComparator() |
|
|
|
|
|
emb1 = np.random.rand(512) |
|
|
emb2 = np.random.rand(512) |
|
|
|
|
|
confidence, similarity = comparator.verify_identity(emb1, emb2) |
|
|
|
|
|
assert isinstance(confidence, str) |
|
|
assert 0.0 <= similarity <= 1.0 |
|
|
|
|
|
|
|
|
if similarity > 0.85: |
|
|
assert "Seguro" in confidence |
|
|
elif similarity > 0.72: |
|
|
assert "Probable" in confidence |
|
|
else: |
|
|
assert "Descartado" in confidence |
|
|
|
|
|
|
|
|
class TestOCRExtractor: |
|
|
"""Tests para el extractor OCR""" |
|
|
|
|
|
def test_initialization(self): |
|
|
"""Verifica inicializaci贸n""" |
|
|
|
|
|
ocr = OCRExtractor(gpu=False) |
|
|
assert ocr.reader is not None |
|
|
|
|
|
def test_clean_text(self): |
|
|
"""Verifica limpieza de texto""" |
|
|
ocr = OCRExtractor(gpu=False) |
|
|
|
|
|
|
|
|
dirty = "example.c0m" |
|
|
clean = ocr._clean_text(dirty) |
|
|
|
|
|
assert clean == "example.com" |
|
|
|
|
|
def test_is_valid_domain(self): |
|
|
"""Verifica validaci贸n de dominios""" |
|
|
ocr = OCRExtractor(gpu=False) |
|
|
|
|
|
assert ocr._is_valid_domain("example.com") == True |
|
|
assert ocr._is_valid_domain("onlyfans.com") == True |
|
|
assert ocr._is_valid_domain("invalid") == False |
|
|
assert ocr._is_valid_domain("no spaces.com") == False |
|
|
|
|
|
def test_preprocess_image(self): |
|
|
"""Verifica que el preprocesamiento genera m煤ltiples versiones""" |
|
|
ocr = OCRExtractor(gpu=False) |
|
|
|
|
|
|
|
|
test_img = np.random.randint(0, 255, (100, 200, 3), dtype=np.uint8) |
|
|
|
|
|
processed = ocr.preprocess_image(test_img) |
|
|
|
|
|
|
|
|
assert len(processed) == 7 |
|
|
|
|
|
|
|
|
for img in processed: |
|
|
assert isinstance(img, np.ndarray) |
|
|
assert len(img.shape) == 2 |
|
|
|
|
|
|
|
|
class TestCrossReferencer: |
|
|
"""Tests para el cross-referencer""" |
|
|
|
|
|
def test_initialization(self): |
|
|
"""Verifica inicializaci贸n""" |
|
|
xref = CrossReferencer(domain_similarity_threshold=0.85) |
|
|
assert xref.domain_threshold == 0.85 |
|
|
|
|
|
def test_normalize_domain(self): |
|
|
"""Verifica normalizaci贸n de dominios""" |
|
|
xref = CrossReferencer() |
|
|
|
|
|
|
|
|
assert xref.normalize_domain("www.example.com") == "example.com" |
|
|
assert xref.normalize_domain("EXAMPLE.COM") == "example.com" |
|
|
assert xref.normalize_domain("example.com:8080") == "example.com" |
|
|
assert xref.normalize_domain("m.example.com") == "example.com" |
|
|
|
|
|
def test_extract_domain_from_url(self): |
|
|
"""Verifica extracci贸n de dominio de URL""" |
|
|
xref = CrossReferencer() |
|
|
|
|
|
url = "https://www.example.com/path/to/page.html?query=1" |
|
|
domain = xref.extract_domain_from_url(url) |
|
|
|
|
|
assert domain == "example.com" |
|
|
|
|
|
def test_calculate_domain_similarity(self): |
|
|
"""Verifica c谩lculo de similitud de dominios""" |
|
|
xref = CrossReferencer() |
|
|
|
|
|
|
|
|
assert xref.calculate_domain_similarity("example.com", "example.com") == 1.0 |
|
|
|
|
|
|
|
|
sim = xref.calculate_domain_similarity("example.com", "examples.com") |
|
|
assert 0.7 < sim < 1.0 |
|
|
|
|
|
|
|
|
sim2 = xref.calculate_domain_similarity("example.com", "different.com") |
|
|
assert sim2 < 0.7 |
|
|
|
|
|
def test_deduplicate_results(self): |
|
|
"""Verifica deduplicaci贸n de resultados""" |
|
|
xref = CrossReferencer() |
|
|
|
|
|
results = [ |
|
|
{'url': 'https://example.com/1.jpg'}, |
|
|
{'url': 'https://example.com/1.jpg'}, |
|
|
{'url': 'https://example.com/2.jpg'}, |
|
|
] |
|
|
|
|
|
unique = xref.deduplicate_results(results) |
|
|
|
|
|
assert len(unique) == 2 |
|
|
|
|
|
|
|
|
class TestIntegration: |
|
|
"""Tests de integraci贸n""" |
|
|
|
|
|
def test_full_pipeline_mock(self): |
|
|
"""Test del pipeline completo con datos mock""" |
|
|
|
|
|
|
|
|
processor = FaceProcessor() |
|
|
fake_image = np.random.randint(0, 255, (300, 300, 3), dtype=np.uint8) |
|
|
|
|
|
|
|
|
ocr = OCRExtractor(gpu=False) |
|
|
|
|
|
|
|
|
xref = CrossReferencer() |
|
|
|
|
|
|
|
|
yandex_results = [ |
|
|
{'url': 'https://example.com/photo.jpg', 'source': 'yandex'} |
|
|
] |
|
|
|
|
|
ocr_domains = ['example.com'] |
|
|
|
|
|
|
|
|
matches = xref.match_pimeyes_with_search( |
|
|
[], |
|
|
yandex_results, |
|
|
ocr_domains |
|
|
) |
|
|
|
|
|
|
|
|
assert isinstance(matches, list) |
|
|
|
|
|
|
|
|
|
|
|
def run_tests(): |
|
|
"""Ejecuta todos los tests""" |
|
|
pytest.main([__file__, '-v']) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
run_tests() |
|
|
|