Spaces:
Configuration error
Configuration error
| #!/usr/bin/env python3 | |
| """Validation script for enhanced FraudShield files. | |
| This script verifies: | |
| 1. Python version requirements (3.10+) | |
| 2. All enhanced files have valid syntax | |
| 3. All imports work correctly | |
| 4. Pydantic models validate correctly | |
| 5. Baseline inference produces correct scores | |
| 6. Results file has correct structure | |
| """ | |
| import sys | |
| import json | |
| from pathlib import Path | |
| def check_python_version(): | |
| """Verify Python 3.10+ is being used.""" | |
| version = sys.version_info | |
| is_valid = version >= (3, 10) | |
| status = "β PASS" if is_valid else "β FAIL" | |
| print(f"1. Python Version Check: {status}") | |
| print(f" Required: 3.10+ | Current: {version.major}.{version.minor}") | |
| return is_valid | |
| def check_imports(): | |
| """Verify all enhanced modules can be imported.""" | |
| print("\n2. Import Validation:") | |
| all_pass = True | |
| modules = [ | |
| ("models", "models.py"), | |
| ("data_loader", "data_loader.py"), | |
| ("fraudshield_env", "fraudshield_env.py"), | |
| ("graders", "graders.py"), | |
| ("llm_agent", "llm_agent.py"), | |
| ("inference", "inference.py"), | |
| ] | |
| for module_name, file_name in modules: | |
| try: | |
| __import__(module_name) | |
| print(f" β {file_name} imports successfully") | |
| except Exception as e: | |
| print(f" β {file_name} import failed: {e}") | |
| all_pass = False | |
| return all_pass | |
| def check_pydantic_models(): | |
| """Verify Pydantic models work correctly.""" | |
| print("\n3. Pydantic Model Validation:") | |
| try: | |
| from models import ( | |
| FraudCheckAction, FraudCheckObservation, Reward, | |
| EpisodeState, StepResult, ResetResult, DecisionEnum, | |
| TaskDifficulty, TransactionData | |
| ) | |
| # Test FraudCheckAction | |
| action = FraudCheckAction( | |
| transaction_id="test_001", | |
| decision=DecisionEnum.FRAUD, | |
| confidence=0.95, | |
| reasoning="Test fraud decision based on seller account age" | |
| ) | |
| # Test validation: confidence must be [0.0, 1.0] | |
| try: | |
| invalid_action = FraudCheckAction( | |
| transaction_id="test_002", | |
| decision=DecisionEnum.FRAUD, | |
| confidence=1.5, # Invalid: > 1.0 | |
| reasoning="This should fail validation" | |
| ) | |
| print(" β Confidence validation failed (should reject > 1.0)") | |
| return False | |
| except Exception: | |
| pass # Expected to fail | |
| print(" β FraudCheckAction validation passed") | |
| print(f" - transaction_id: {action.transaction_id}") | |
| print(f" - decision: {action.decision.value}") | |
| print(f" - confidence: {action.confidence}") | |
| # Test DecisionEnum | |
| assert DecisionEnum.FRAUD.value == "fraud" | |
| assert DecisionEnum.LEGITIMATE.value == "legitimate" | |
| print(" β DecisionEnum validation passed") | |
| # Test TaskDifficulty | |
| assert TaskDifficulty.EASY.value == "easy" | |
| assert TaskDifficulty.MEDIUM.value == "medium" | |
| assert TaskDifficulty.HARD.value == "hard" | |
| print(" β TaskDifficulty validation passed") | |
| return True | |
| except Exception as e: | |
| print(f" β Pydantic model validation failed: {e}") | |
| return False | |
| def check_results_file(): | |
| """Verify results file structure and scores.""" | |
| print("\n4. Results File Validation:") | |
| try: | |
| results_file = Path("fraudshield_baseline_results.json") | |
| if not results_file.exists(): | |
| print(" β Results file not found") | |
| return False | |
| with open(results_file) as f: | |
| results = json.load(f) | |
| # Validate structure | |
| required_keys = ["final_score", "easy", "medium", "hard", "metadata"] | |
| missing_keys = [k for k in required_keys if k not in results] | |
| if missing_keys: | |
| print(f" β Missing keys: {missing_keys}") | |
| return False | |
| print(" β Results file has required structure") | |
| # Validate scores | |
| scores = { | |
| "Final": results["final_score"], | |
| "Easy": results["easy"]["score"], | |
| "Medium": results["medium"]["score"], | |
| "Hard": results["hard"]["score"], | |
| } | |
| for name, score in scores.items(): | |
| if not (0 <= score <= 1): | |
| print(f" β {name} score out of range: {score}") | |
| return False | |
| print(f" β {name} score: {score:.4f}") | |
| # Check if baseline score matches expected value | |
| expected_final = 0.8660 | |
| actual_final = results["final_score"] | |
| tolerance = 0.001 | |
| if abs(actual_final - expected_final) < tolerance: | |
| print(f" β Baseline score matches expected value ({expected_final:.4f})") | |
| else: | |
| print(f" β Baseline score variance: expected {expected_final:.4f}, got {actual_final:.4f}") | |
| # Validate task transaction counts | |
| print(f" β Transaction counts:") | |
| print(f" - Easy: {results['easy']['num_transactions']}") | |
| print(f" - Medium: {results['medium']['num_transactions']}") | |
| print(f" - Hard: {results['hard']['num_transactions']}") | |
| return True | |
| except Exception as e: | |
| print(f" β Results validation failed: {e}") | |
| return False | |
| def check_data_loader(): | |
| """Verify data loader works correctly.""" | |
| print("\n5. Data Loader Validation:") | |
| try: | |
| from data_loader import FraudDataLoader | |
| loader = FraudDataLoader(data_path="data", seed=42) | |
| if not loader.load_bundle(): | |
| print(" β Failed to load data bundle") | |
| return False | |
| print(" β Data bundle loaded successfully") | |
| # Check task sizes | |
| summary = loader.get_bundle_summary() | |
| print(f" β Bundle summary:") | |
| print(f" - Snapshot ID: {summary.get('snapshot_id')}") | |
| print(f" - Schema version: {summary.get('schema_version')}") | |
| print(f" - Seed: {summary.get('seed')}") | |
| print(f" - Task sizes: {summary.get('task_sizes')}") | |
| # Verify task cases | |
| for task_name in ["easy", "medium", "hard"]: | |
| cases = loader.get_task_cases(task_name) | |
| if not cases: | |
| print(f" β No cases found for {task_name} task") | |
| return False | |
| print(f" β {task_name.capitalize()} task: {len(cases)} cases") | |
| return True | |
| except Exception as e: | |
| print(f" β Data loader validation failed: {e}") | |
| return False | |
| def check_environment(): | |
| """Verify environment works correctly.""" | |
| print("\n6. Environment Validation:") | |
| try: | |
| from fraudshield_env import FraudShieldEnvironment | |
| env = FraudShieldEnvironment(data_path="data", seed=42) | |
| if not env.load_data(): | |
| print(" β Failed to load environment data") | |
| return False | |
| print(" β Environment created and data loaded") | |
| # Test reset | |
| reset_result = env.reset("easy") | |
| print(f" β Reset successful (episode_id: {env.episode_id})") | |
| # Test observation | |
| obs = reset_result.observation | |
| print(f" β Initial observation created") | |
| print(f" - Transaction ID: {obs.transaction_id}") | |
| print(f" - Task: {obs.task_name.value}") | |
| print(f" - Episode step: {obs.episode_step}") | |
| return True | |
| except Exception as e: | |
| print(f" β Environment validation failed: {e}") | |
| return False | |
| def main(): | |
| """Run all validation checks.""" | |
| print("=" * 70) | |
| print("FraudShield Enhancement Validation Suite") | |
| print("=" * 70) | |
| checks = [ | |
| ("Python Version", check_python_version), | |
| ("Imports", check_imports), | |
| ("Pydantic Models", check_pydantic_models), | |
| ("Results File", check_results_file), | |
| ("Data Loader", check_data_loader), | |
| ("Environment", check_environment), | |
| ] | |
| results = {} | |
| for check_name, check_fn in checks: | |
| try: | |
| results[check_name] = check_fn() | |
| except Exception as e: | |
| print(f"\nβ {check_name} check failed with exception: {e}") | |
| results[check_name] = False | |
| print("\n" + "=" * 70) | |
| print("VALIDATION SUMMARY") | |
| print("=" * 70) | |
| for check_name, passed in results.items(): | |
| status = "β PASS" if passed else "β FAIL" | |
| print(f"{status}: {check_name}") | |
| all_passed = all(results.values()) | |
| print("\n" + ("=" * 70)) | |
| if all_passed: | |
| print("β ALL VALIDATIONS PASSED") | |
| print("=" * 70) | |
| return 0 | |
| else: | |
| print("β SOME VALIDATIONS FAILED") | |
| print("=" * 70) | |
| return 1 | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |