SmartCertify-ML / tests /test_fraud_detection.py
Harsh Yadav
CREATE : Trained the model
6de2f28
"""Tests for fraud detection module."""
import pytest
import numpy as np
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
class TestSyntheticDataGeneration:
"""Test synthetic data generation."""
def test_generate_certificates(self):
from app.data.generate_synthetic import generate_certificates_dataset
df = generate_certificates_dataset(n_samples=1000)
assert len(df) == 1000
assert "label" in df.columns
assert set(df["label"].unique()) == {0, 1}
def test_fraud_ratio(self):
from app.data.generate_synthetic import generate_certificates_dataset
df = generate_certificates_dataset(n_samples=10000)
fraud_ratio = df["label"].mean()
assert 0.05 <= fraud_ratio <= 0.12 # Should be ~8%
def test_all_columns_present(self):
from app.data.generate_synthetic import generate_certificates_dataset
df = generate_certificates_dataset(n_samples=100)
required_cols = [
"issuer_name", "recipient_name", "course_name",
"issue_date", "expiry_date", "credential_hash",
"issuer_reputation_score", "certificate_age_days",
"metadata_completeness_score", "ocr_confidence_score",
"template_match_score", "domain_verification_status",
"previous_verification_count",
"time_since_last_verification_days", "label",
]
for col in required_cols:
assert col in df.columns, f"Missing column: {col}"
class TestFeatureEngineering:
"""Test feature engineering."""
def test_extract_date_features(self):
from app.data.feature_engineering import extract_date_features
import pandas as pd
df = pd.DataFrame({
"issue_date": ["2024-01-15", "2024-06-20"],
"expiry_date": ["2025-01-15", "2025-06-20"],
})
result = extract_date_features(df)
assert "issue_month" in result.columns
assert "issue_year" in result.columns
assert "days_to_expiry" in result.columns
def test_compute_text_features(self):
from app.data.feature_engineering import compute_text_features
import pandas as pd
df = pd.DataFrame({
"recipient_name": ["John Smith", "Jane Doe"],
"course_name": ["ML Fundamentals", "Data Science"],
"issuer_name": ["MIT", "Stanford"],
})
result = compute_text_features(df)
assert "name_length" in result.columns
assert "course_word_count" in result.columns
def test_hash_integrity(self):
from app.data.feature_engineering import hash_integrity_check
# Valid SHA-256 hash
valid_hash = "a" * 64
result = hash_integrity_check(valid_hash)
assert result["is_valid_length"] is True
assert result["is_valid_format"] is True
# Invalid hash
result_invalid = hash_integrity_check("short")
assert result_invalid["is_valid_length"] is False
def test_compute_risk_score(self):
from app.data.feature_engineering import compute_risk_score
import pandas as pd
df = pd.DataFrame({
"issuer_reputation_score": [0.9, 0.1],
"metadata_completeness_score": [0.95, 0.2],
"template_match_score": [0.85, 0.15],
"domain_verification_status": [1, 0],
})
result = compute_risk_score(df)
assert "risk_score" in result.columns
# Low rep should have higher risk
assert result["risk_score"].iloc[1] > result["risk_score"].iloc[0]
class TestFraudPrediction:
"""Test fraud prediction inference."""
def test_predict_returns_expected_keys(self):
from app.models.fraud_detection.predict import predict_fraud
cert = {
"issuer_name": "MIT",
"course_name": "Machine Learning",
"issuer_reputation_score": 0.9,
"template_match_score": 0.85,
"metadata_completeness_score": 0.9,
"domain_verification_status": 1,
"previous_verification_count": 5,
"ocr_confidence_score": 0.92,
"certificate_age_days": 365,
"time_since_last_verification_days": 30,
}
result = predict_fraud(cert)
# Should at minimum return error or valid result
if "error" not in result:
assert "is_authentic" in result
assert "fraud_probability" in result
assert "risk_level" in result
class TestMathUtils:
"""Test math utility functions."""
def test_cosine_similarity(self):
from app.utils.math_utils import cosine_similarity_vectors
a = np.array([1, 0, 0])
b = np.array([1, 0, 0])
assert cosine_similarity_vectors(a, b) == pytest.approx(1.0)
c = np.array([0, 1, 0])
assert cosine_similarity_vectors(a, c) == pytest.approx(0.0)
def test_softmax(self):
from app.utils.math_utils import softmax
result = softmax(np.array([1.0, 2.0, 3.0]))
assert result.sum() == pytest.approx(1.0)
assert all(r > 0 for r in result)
def test_compute_entropy(self):
from app.utils.math_utils import compute_entropy
uniform = np.array([0.25, 0.25, 0.25, 0.25])
entropy = compute_entropy(uniform)
assert entropy == pytest.approx(2.0, abs=0.01)