fraudshield / validate_enhancements.py
DevikaJ2005's picture
Docs: Comprehensive professional documentation and validation suite
af4e958
#!/usr/bin/env python3
"""Validation script for enhanced FraudShield files.
This script verifies:
1. Python version requirements (3.10+)
2. All enhanced files have valid syntax
3. All imports work correctly
4. Pydantic models validate correctly
5. Baseline inference produces correct scores
6. Results file has correct structure
"""
import sys
import json
from pathlib import Path
def check_python_version():
"""Verify Python 3.10+ is being used."""
version = sys.version_info
is_valid = version >= (3, 10)
status = "βœ“ PASS" if is_valid else "βœ— FAIL"
print(f"1. Python Version Check: {status}")
print(f" Required: 3.10+ | Current: {version.major}.{version.minor}")
return is_valid
def check_imports():
"""Verify all enhanced modules can be imported."""
print("\n2. Import Validation:")
all_pass = True
modules = [
("models", "models.py"),
("data_loader", "data_loader.py"),
("fraudshield_env", "fraudshield_env.py"),
("graders", "graders.py"),
("llm_agent", "llm_agent.py"),
("inference", "inference.py"),
]
for module_name, file_name in modules:
try:
__import__(module_name)
print(f" βœ“ {file_name} imports successfully")
except Exception as e:
print(f" βœ— {file_name} import failed: {e}")
all_pass = False
return all_pass
def check_pydantic_models():
"""Verify Pydantic models work correctly."""
print("\n3. Pydantic Model Validation:")
try:
from models import (
FraudCheckAction, FraudCheckObservation, Reward,
EpisodeState, StepResult, ResetResult, DecisionEnum,
TaskDifficulty, TransactionData
)
# Test FraudCheckAction
action = FraudCheckAction(
transaction_id="test_001",
decision=DecisionEnum.FRAUD,
confidence=0.95,
reasoning="Test fraud decision based on seller account age"
)
# Test validation: confidence must be [0.0, 1.0]
try:
invalid_action = FraudCheckAction(
transaction_id="test_002",
decision=DecisionEnum.FRAUD,
confidence=1.5, # Invalid: > 1.0
reasoning="This should fail validation"
)
print(" βœ— Confidence validation failed (should reject > 1.0)")
return False
except Exception:
pass # Expected to fail
print(" βœ“ FraudCheckAction validation passed")
print(f" - transaction_id: {action.transaction_id}")
print(f" - decision: {action.decision.value}")
print(f" - confidence: {action.confidence}")
# Test DecisionEnum
assert DecisionEnum.FRAUD.value == "fraud"
assert DecisionEnum.LEGITIMATE.value == "legitimate"
print(" βœ“ DecisionEnum validation passed")
# Test TaskDifficulty
assert TaskDifficulty.EASY.value == "easy"
assert TaskDifficulty.MEDIUM.value == "medium"
assert TaskDifficulty.HARD.value == "hard"
print(" βœ“ TaskDifficulty validation passed")
return True
except Exception as e:
print(f" βœ— Pydantic model validation failed: {e}")
return False
def check_results_file():
"""Verify results file structure and scores."""
print("\n4. Results File Validation:")
try:
results_file = Path("fraudshield_baseline_results.json")
if not results_file.exists():
print(" βœ— Results file not found")
return False
with open(results_file) as f:
results = json.load(f)
# Validate structure
required_keys = ["final_score", "easy", "medium", "hard", "metadata"]
missing_keys = [k for k in required_keys if k not in results]
if missing_keys:
print(f" βœ— Missing keys: {missing_keys}")
return False
print(" βœ“ Results file has required structure")
# Validate scores
scores = {
"Final": results["final_score"],
"Easy": results["easy"]["score"],
"Medium": results["medium"]["score"],
"Hard": results["hard"]["score"],
}
for name, score in scores.items():
if not (0 <= score <= 1):
print(f" βœ— {name} score out of range: {score}")
return False
print(f" βœ“ {name} score: {score:.4f}")
# Check if baseline score matches expected value
expected_final = 0.8660
actual_final = results["final_score"]
tolerance = 0.001
if abs(actual_final - expected_final) < tolerance:
print(f" βœ“ Baseline score matches expected value ({expected_final:.4f})")
else:
print(f" ⚠ Baseline score variance: expected {expected_final:.4f}, got {actual_final:.4f}")
# Validate task transaction counts
print(f" βœ“ Transaction counts:")
print(f" - Easy: {results['easy']['num_transactions']}")
print(f" - Medium: {results['medium']['num_transactions']}")
print(f" - Hard: {results['hard']['num_transactions']}")
return True
except Exception as e:
print(f" βœ— Results validation failed: {e}")
return False
def check_data_loader():
"""Verify data loader works correctly."""
print("\n5. Data Loader Validation:")
try:
from data_loader import FraudDataLoader
loader = FraudDataLoader(data_path="data", seed=42)
if not loader.load_bundle():
print(" βœ— Failed to load data bundle")
return False
print(" βœ“ Data bundle loaded successfully")
# Check task sizes
summary = loader.get_bundle_summary()
print(f" βœ“ Bundle summary:")
print(f" - Snapshot ID: {summary.get('snapshot_id')}")
print(f" - Schema version: {summary.get('schema_version')}")
print(f" - Seed: {summary.get('seed')}")
print(f" - Task sizes: {summary.get('task_sizes')}")
# Verify task cases
for task_name in ["easy", "medium", "hard"]:
cases = loader.get_task_cases(task_name)
if not cases:
print(f" βœ— No cases found for {task_name} task")
return False
print(f" βœ“ {task_name.capitalize()} task: {len(cases)} cases")
return True
except Exception as e:
print(f" βœ— Data loader validation failed: {e}")
return False
def check_environment():
"""Verify environment works correctly."""
print("\n6. Environment Validation:")
try:
from fraudshield_env import FraudShieldEnvironment
env = FraudShieldEnvironment(data_path="data", seed=42)
if not env.load_data():
print(" βœ— Failed to load environment data")
return False
print(" βœ“ Environment created and data loaded")
# Test reset
reset_result = env.reset("easy")
print(f" βœ“ Reset successful (episode_id: {env.episode_id})")
# Test observation
obs = reset_result.observation
print(f" βœ“ Initial observation created")
print(f" - Transaction ID: {obs.transaction_id}")
print(f" - Task: {obs.task_name.value}")
print(f" - Episode step: {obs.episode_step}")
return True
except Exception as e:
print(f" βœ— Environment validation failed: {e}")
return False
def main():
"""Run all validation checks."""
print("=" * 70)
print("FraudShield Enhancement Validation Suite")
print("=" * 70)
checks = [
("Python Version", check_python_version),
("Imports", check_imports),
("Pydantic Models", check_pydantic_models),
("Results File", check_results_file),
("Data Loader", check_data_loader),
("Environment", check_environment),
]
results = {}
for check_name, check_fn in checks:
try:
results[check_name] = check_fn()
except Exception as e:
print(f"\nβœ— {check_name} check failed with exception: {e}")
results[check_name] = False
print("\n" + "=" * 70)
print("VALIDATION SUMMARY")
print("=" * 70)
for check_name, passed in results.items():
status = "βœ“ PASS" if passed else "βœ— FAIL"
print(f"{status}: {check_name}")
all_passed = all(results.values())
print("\n" + ("=" * 70))
if all_passed:
print("βœ“ ALL VALIDATIONS PASSED")
print("=" * 70)
return 0
else:
print("βœ— SOME VALIDATIONS FAILED")
print("=" * 70)
return 1
if __name__ == "__main__":
sys.exit(main())