"""Tests for fraud detection module.""" import pytest import numpy as np import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) class TestSyntheticDataGeneration: """Test synthetic data generation.""" def test_generate_certificates(self): from app.data.generate_synthetic import generate_certificates_dataset df = generate_certificates_dataset(n_samples=1000) assert len(df) == 1000 assert "label" in df.columns assert set(df["label"].unique()) == {0, 1} def test_fraud_ratio(self): from app.data.generate_synthetic import generate_certificates_dataset df = generate_certificates_dataset(n_samples=10000) fraud_ratio = df["label"].mean() assert 0.05 <= fraud_ratio <= 0.12 # Should be ~8% def test_all_columns_present(self): from app.data.generate_synthetic import generate_certificates_dataset df = generate_certificates_dataset(n_samples=100) required_cols = [ "issuer_name", "recipient_name", "course_name", "issue_date", "expiry_date", "credential_hash", "issuer_reputation_score", "certificate_age_days", "metadata_completeness_score", "ocr_confidence_score", "template_match_score", "domain_verification_status", "previous_verification_count", "time_since_last_verification_days", "label", ] for col in required_cols: assert col in df.columns, f"Missing column: {col}" class TestFeatureEngineering: """Test feature engineering.""" def test_extract_date_features(self): from app.data.feature_engineering import extract_date_features import pandas as pd df = pd.DataFrame({ "issue_date": ["2024-01-15", "2024-06-20"], "expiry_date": ["2025-01-15", "2025-06-20"], }) result = extract_date_features(df) assert "issue_month" in result.columns assert "issue_year" in result.columns assert "days_to_expiry" in result.columns def test_compute_text_features(self): from app.data.feature_engineering import compute_text_features import pandas as pd df = pd.DataFrame({ "recipient_name": ["John Smith", "Jane Doe"], "course_name": ["ML Fundamentals", "Data Science"], "issuer_name": ["MIT", "Stanford"], }) result = compute_text_features(df) assert "name_length" in result.columns assert "course_word_count" in result.columns def test_hash_integrity(self): from app.data.feature_engineering import hash_integrity_check # Valid SHA-256 hash valid_hash = "a" * 64 result = hash_integrity_check(valid_hash) assert result["is_valid_length"] is True assert result["is_valid_format"] is True # Invalid hash result_invalid = hash_integrity_check("short") assert result_invalid["is_valid_length"] is False def test_compute_risk_score(self): from app.data.feature_engineering import compute_risk_score import pandas as pd df = pd.DataFrame({ "issuer_reputation_score": [0.9, 0.1], "metadata_completeness_score": [0.95, 0.2], "template_match_score": [0.85, 0.15], "domain_verification_status": [1, 0], }) result = compute_risk_score(df) assert "risk_score" in result.columns # Low rep should have higher risk assert result["risk_score"].iloc[1] > result["risk_score"].iloc[0] class TestFraudPrediction: """Test fraud prediction inference.""" def test_predict_returns_expected_keys(self): from app.models.fraud_detection.predict import predict_fraud cert = { "issuer_name": "MIT", "course_name": "Machine Learning", "issuer_reputation_score": 0.9, "template_match_score": 0.85, "metadata_completeness_score": 0.9, "domain_verification_status": 1, "previous_verification_count": 5, "ocr_confidence_score": 0.92, "certificate_age_days": 365, "time_since_last_verification_days": 30, } result = predict_fraud(cert) # Should at minimum return error or valid result if "error" not in result: assert "is_authentic" in result assert "fraud_probability" in result assert "risk_level" in result class TestMathUtils: """Test math utility functions.""" def test_cosine_similarity(self): from app.utils.math_utils import cosine_similarity_vectors a = np.array([1, 0, 0]) b = np.array([1, 0, 0]) assert cosine_similarity_vectors(a, b) == pytest.approx(1.0) c = np.array([0, 1, 0]) assert cosine_similarity_vectors(a, c) == pytest.approx(0.0) def test_softmax(self): from app.utils.math_utils import softmax result = softmax(np.array([1.0, 2.0, 3.0])) assert result.sum() == pytest.approx(1.0) assert all(r > 0 for r in result) def test_compute_entropy(self): from app.utils.math_utils import compute_entropy uniform = np.array([0.25, 0.25, 0.25, 0.25]) entropy = compute_entropy(uniform) assert entropy == pytest.approx(2.0, abs=0.01)