{ "cells": [ { "cell_type": "markdown", "id": "360397d9", "metadata": {}, "source": [ "# 05 - Model Validation\n", "\n", "## CyberForge AI - Production Validation & Safety\n", "\n", "This notebook validates trained models for production deployment:\n", "- Performance metrics and benchmarks\n", "- Edge case testing\n", "- Failure analysis and recovery\n", "- Continuous operation safety checks\n", "\n", "### Validation Requirements:\n", "- All models must pass accuracy thresholds\n", "- Inference time must meet real-time requirements\n", "- Edge cases must not cause crashes\n", "- Memory usage must be within bounds" ] }, { "cell_type": "code", "execution_count": null, "id": "781bbd3c", "metadata": {}, "outputs": [], "source": [ "import json\n", "import pandas as pd\n", "import numpy as np\n", "from pathlib import Path\n", "from typing import Dict, List, Any, Optional\n", "import time\n", "import traceback\n", "import joblib\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "from sklearn.metrics import (\n", " accuracy_score, precision_score, recall_score, f1_score,\n", " confusion_matrix, classification_report, roc_auc_score\n", ")\n", "\n", "# Configuration\n", "config_path = Path(\"notebook_config.json\")\nif not config_path.exists():\n config_path = Path(\"/home/user/app/notebooks/notebook_config.json\")\n", "with open(config_path) as f:\n", " CONFIG = json.load(f)\n", "\n", "MODELS_DIR = Path(CONFIG[\"datasets_dir\"]).parent / \"models\"\n", "VALIDATION_DIR = MODELS_DIR.parent / \"validation\"\n", "VALIDATION_DIR.mkdir(exist_ok=True)\n", "\n", "print(f\"✓ Configuration loaded\")\n", "print(f\"✓ Models from: {MODELS_DIR}\")\n", "print(f\"✓ Validation output: {VALIDATION_DIR}\")" ] }, { "cell_type": "markdown", "id": "a3afcfd1", "metadata": {}, "source": [ "## 1. Validation Thresholds" ] }, { "cell_type": "code", "execution_count": null, "id": "74553e0c", "metadata": {}, "outputs": [], "source": [ "class ValidationThresholds:\n", " \"\"\"Production-ready thresholds for model validation\"\"\"\n", " \n", " # Performance thresholds\n", " MIN_ACCURACY = 0.80\n", " MIN_PRECISION = 0.75\n", " MIN_RECALL = 0.70\n", " MIN_F1 = 0.75\n", " \n", " # Latency thresholds (milliseconds)\n", " MAX_INFERENCE_TIME_MS = 100\n", " MAX_BATCH_INFERENCE_TIME_MS = 500\n", " \n", " # Resource thresholds\n", " MAX_MODEL_SIZE_MB = 100\n", " MAX_MEMORY_MB = 500\n", " \n", " # Stability thresholds\n", " MIN_CONSISTENCY_SCORE = 0.95 # Same input should give same output\n", " MAX_EDGE_CASE_FAILURE_RATE = 0.05\n", " \n", " @classmethod\n", " def check_performance(cls, metrics: Dict) -> Dict[str, bool]:\n", " \"\"\"Check if metrics pass thresholds\"\"\"\n", " return {\n", " 'accuracy': metrics.get('accuracy', 0) >= cls.MIN_ACCURACY,\n", " 'precision': metrics.get('precision', 0) >= cls.MIN_PRECISION,\n", " 'recall': metrics.get('recall', 0) >= cls.MIN_RECALL,\n", " 'f1': metrics.get('f1', 0) >= cls.MIN_F1,\n", " 'inference_time': metrics.get('inference_time_ms', 999) <= cls.MAX_INFERENCE_TIME_MS\n", " }\n", "\n", "print(\"✓ Validation Thresholds loaded\")\n", "print(f\" Min Accuracy: {ValidationThresholds.MIN_ACCURACY}\")\n", "print(f\" Max Inference: {ValidationThresholds.MAX_INFERENCE_TIME_MS}ms\")" ] }, { "cell_type": "markdown", "id": "8e96d341", "metadata": {}, "source": [ "## 2. Load Models and Registry" ] }, { "cell_type": "code", "execution_count": null, "id": "3a0f6d54", "metadata": {}, "outputs": [], "source": [ "# Load model registry\n", "registry_path = MODELS_DIR / \"model_registry.json\"\n", "\n", "if registry_path.exists():\n", " with open(registry_path) as f:\n", " registry = json.load(f)\n", " print(f\"✓ Loaded registry with {len(registry.get('models', {}))} models\")\n", "else:\n", " print(\"⚠ No model registry. Run 03_model_training.ipynb first.\")\n", " registry = {'models': {}}\n", "\n", "# List available models\n", "print(\"\\nAvailable models:\")\n", "for name, info in registry.get('models', {}).items():\n", " print(f\" - {name}: {info['best_model']} (F1: {info['f1_score']:.4f})\")" ] }, { "cell_type": "markdown", "id": "91e2c16f", "metadata": {}, "source": [ "## 3. Model Validator" ] }, { "cell_type": "code", "execution_count": null, "id": "9042d66f", "metadata": {}, "outputs": [], "source": [ "class ModelValidator:\n", " \"\"\"\n", " Comprehensive model validation for production readiness.\n", " \"\"\"\n", " \n", " def __init__(self, models_dir: Path):\n", " self.models_dir = models_dir\n", " self.validation_results = {}\n", " \n", " def load_model_artifacts(self, model_name: str, model_type: str) -> Dict:\n", " \"\"\"Load model and associated artifacts\"\"\"\n", " model_dir = self.models_dir / model_name\n", " \n", " artifacts = {}\n", " \n", " # Load model\n", " model_path = model_dir / f\"{model_type}.pkl\"\n", " if model_path.exists():\n", " artifacts['model'] = joblib.load(model_path)\n", " artifacts['model_size_mb'] = model_path.stat().st_size / (1024 * 1024)\n", " \n", " # Load scaler\n", " scaler_path = model_dir / \"scaler.pkl\"\n", " if scaler_path.exists():\n", " artifacts['scaler'] = joblib.load(scaler_path)\n", " \n", " # Load metadata\n", " metadata_path = model_dir / f\"{model_type}_metadata.json\"\n", " if metadata_path.exists():\n", " with open(metadata_path) as f:\n", " artifacts['metadata'] = json.load(f)\n", " \n", " return artifacts\n", " \n", " def validate_performance(self, model, X_test, y_test, scaler=None) -> Dict:\n", " \"\"\"Validate model performance metrics\"\"\"\n", " # Scale if needed\n", " if scaler:\n", " X_test = scaler.transform(X_test)\n", " \n", " # Predictions\n", " start = time.time()\n", " y_pred = model.predict(X_test)\n", " inference_time = (time.time() - start) / len(X_test) * 1000\n", " \n", " # Metrics\n", " metrics = {\n", " 'accuracy': accuracy_score(y_test, y_pred),\n", " 'precision': precision_score(y_test, y_pred, average='weighted', zero_division=0),\n", " 'recall': recall_score(y_test, y_pred, average='weighted', zero_division=0),\n", " 'f1': f1_score(y_test, y_pred, average='weighted', zero_division=0),\n", " 'inference_time_ms': inference_time,\n", " 'samples_tested': len(y_test)\n", " }\n", " \n", " # Check thresholds\n", " metrics['passed_thresholds'] = ValidationThresholds.check_performance(metrics)\n", " metrics['all_passed'] = all(metrics['passed_thresholds'].values())\n", " \n", " return metrics\n", " \n", " def validate_edge_cases(self, model, scaler=None) -> Dict:\n", " \"\"\"Test model behavior on edge cases\"\"\"\n", " results = {\n", " 'tests_run': 0,\n", " 'tests_passed': 0,\n", " 'errors': []\n", " }\n", " \n", " # Get expected feature count\n", " if hasattr(model, 'n_features_in_'):\n", " n_features = model.n_features_in_\n", " else:\n", " n_features = 10 # Default\n", " \n", " edge_cases = [\n", " ('zeros', np.zeros((1, n_features))),\n", " ('ones', np.ones((1, n_features))),\n", " ('large_values', np.ones((1, n_features)) * 1e6),\n", " ('negative', -np.ones((1, n_features))),\n", " ('mixed', np.random.randn(1, n_features) * 100),\n", " ]\n", " \n", " for case_name, X in edge_cases:\n", " results['tests_run'] += 1\n", " try:\n", " if scaler:\n", " X = scaler.transform(X)\n", " pred = model.predict(X)\n", " \n", " # Check prediction is valid\n", " if pred is not None and len(pred) == 1:\n", " results['tests_passed'] += 1\n", " else:\n", " results['errors'].append(f\"{case_name}: Invalid prediction shape\")\n", " \n", " except Exception as e:\n", " results['errors'].append(f\"{case_name}: {str(e)}\")\n", " \n", " results['pass_rate'] = results['tests_passed'] / max(results['tests_run'], 1)\n", " return results\n", " \n", " def validate_consistency(self, model, scaler=None, n_runs: int = 10) -> Dict:\n", " \"\"\"Test prediction consistency (same input = same output)\"\"\"\n", " if hasattr(model, 'n_features_in_'):\n", " n_features = model.n_features_in_\n", " else:\n", " n_features = 10\n", " \n", " # Fixed input\n", " np.random.seed(42)\n", " X = np.random.randn(1, n_features)\n", " \n", " if scaler:\n", " X = scaler.transform(X)\n", " \n", " predictions = []\n", " for _ in range(n_runs):\n", " pred = model.predict(X)[0]\n", " predictions.append(pred)\n", " \n", " unique_preds = len(set(predictions))\n", " consistency = 1.0 if unique_preds == 1 else 1.0 / unique_preds\n", " \n", " return {\n", " 'consistency_score': consistency,\n", " 'unique_predictions': unique_preds,\n", " 'is_consistent': unique_preds == 1\n", " }\n", " \n", " def validate_latency(self, model, scaler=None, n_samples: int = 100) -> Dict:\n", " \"\"\"Validate inference latency\"\"\"\n", " if hasattr(model, 'n_features_in_'):\n", " n_features = model.n_features_in_\n", " else:\n", " n_features = 10\n", " \n", " X = np.random.randn(n_samples, n_features)\n", " if scaler:\n", " X = scaler.transform(X)\n", " \n", " # Single sample latency\n", " single_times = []\n", " for i in range(min(10, n_samples)):\n", " start = time.time()\n", " model.predict(X[i:i+1])\n", " single_times.append((time.time() - start) * 1000)\n", " \n", " # Batch latency\n", " start = time.time()\n", " model.predict(X)\n", " batch_time = (time.time() - start) * 1000\n", " \n", " return {\n", " 'single_mean_ms': np.mean(single_times),\n", " 'single_max_ms': np.max(single_times),\n", " 'single_std_ms': np.std(single_times),\n", " 'batch_total_ms': batch_time,\n", " 'batch_per_sample_ms': batch_time / n_samples,\n", " 'meets_latency_target': np.mean(single_times) <= ValidationThresholds.MAX_INFERENCE_TIME_MS\n", " }\n", "\n", "validator = ModelValidator(MODELS_DIR)\n", "print(\"✓ Model Validator initialized\")" ] }, { "cell_type": "markdown", "id": "53cddd40", "metadata": {}, "source": [ "## 4. Run Validation Suite" ] }, { "cell_type": "code", "execution_count": null, "id": "ccc75859", "metadata": {}, "outputs": [], "source": [ "# Run validation on all models\n", "validation_results = {}\n", "\n", "print(\"Running validation suite...\\n\")\n", "\n", "for model_name, model_info in registry.get('models', {}).items():\n", " print(f\"{'='*50}\")\n", " print(f\"Validating: {model_name}\")\n", " print(f\"{'='*50}\")\n", " \n", " # Load model artifacts\n", " artifacts = validator.load_model_artifacts(model_name, model_info['best_model'])\n", " \n", " if 'model' not in artifacts:\n", " print(f\" ⚠ Model not found\")\n", " continue\n", " \n", " model = artifacts['model']\n", " scaler = artifacts.get('scaler')\n", " \n", " results = {\n", " 'model_name': model_name,\n", " 'model_type': model_info['best_model'],\n", " 'model_size_mb': artifacts.get('model_size_mb', 0)\n", " }\n", " \n", " # Edge case validation\n", " print(\"\\n Edge Case Testing...\")\n", " edge_results = validator.validate_edge_cases(model, scaler)\n", " results['edge_cases'] = edge_results\n", " print(f\" Pass rate: {edge_results['pass_rate']:.2%}\")\n", " if edge_results['errors']:\n", " for err in edge_results['errors'][:2]:\n", " print(f\" ⚠ {err}\")\n", " \n", " # Consistency validation\n", " print(\"\\n Consistency Testing...\")\n", " consistency_results = validator.validate_consistency(model, scaler)\n", " results['consistency'] = consistency_results\n", " print(f\" Consistent: {consistency_results['is_consistent']}\")\n", " \n", " # Latency validation\n", " print(\"\\n Latency Testing...\")\n", " latency_results = validator.validate_latency(model, scaler)\n", " results['latency'] = latency_results\n", " print(f\" Single inference: {latency_results['single_mean_ms']:.3f}ms\")\n", " print(f\" Meets target: {latency_results['meets_latency_target']}\")\n", " \n", " # Overall validation status\n", " passed = (\n", " edge_results['pass_rate'] >= (1 - ValidationThresholds.MAX_EDGE_CASE_FAILURE_RATE) and\n", " consistency_results['is_consistent'] and\n", " latency_results['meets_latency_target']\n", " )\n", " \n", " results['validation_passed'] = passed\n", " validation_results[model_name] = results\n", " \n", " status = \"✓ PASSED\" if passed else \"✗ FAILED\"\n", " print(f\"\\n Status: {status}\")\n", "\n", "print(f\"\\n\\n✓ Validation complete for {len(validation_results)} models\")" ] }, { "cell_type": "markdown", "id": "45e71432", "metadata": {}, "source": [ "## 5. Generate Validation Report" ] }, { "cell_type": "code", "execution_count": null, "id": "24bbe906", "metadata": {}, "outputs": [], "source": [ "class ValidationReporter:\n", " \"\"\"Generate validation reports for documentation\"\"\"\n", " \n", " @staticmethod\n", " def generate_report(results: Dict) -> str:\n", " \"\"\"Generate markdown validation report\"\"\"\n", " lines = [\n", " \"# CyberForge Model Validation Report\",\n", " \"\",\n", " f\"**Generated:** {time.strftime('%Y-%m-%d %H:%M:%S')}\",\n", " f\"**Models Validated:** {len(results)}\",\n", " \"\",\n", " \"## Summary\",\n", " \"\",\n", " \"| Model | Type | Size (MB) | Edge Cases | Consistency | Latency (ms) | Status |\",\n", " \"|-------|------|-----------|------------|-------------|--------------|--------|\"\n", " ]\n", " \n", " for name, data in results.items():\n", " status = \"✓ Pass\" if data.get('validation_passed') else \"✗ Fail\"\n", " edge = f\"{data.get('edge_cases', {}).get('pass_rate', 0):.0%}\"\n", " consist = \"✓\" if data.get('consistency', {}).get('is_consistent') else \"✗\"\n", " latency = f\"{data.get('latency', {}).get('single_mean_ms', 999):.2f}\"\n", " \n", " lines.append(\n", " f\"| {name} | {data.get('model_type', 'N/A')} | \"\n", " f\"{data.get('model_size_mb', 0):.2f} | {edge} | {consist} | {latency} | {status} |\"\n", " )\n", " \n", " lines.extend([\n", " \"\",\n", " \"## Validation Thresholds\",\n", " \"\",\n", " f\"- Min Accuracy: {ValidationThresholds.MIN_ACCURACY}\",\n", " f\"- Max Inference Time: {ValidationThresholds.MAX_INFERENCE_TIME_MS}ms\",\n", " f\"- Max Edge Case Failure Rate: {ValidationThresholds.MAX_EDGE_CASE_FAILURE_RATE:.0%}\",\n", " f\"- Min Consistency Score: {ValidationThresholds.MIN_CONSISTENCY_SCORE}\",\n", " ])\n", " \n", " return \"\\n\".join(lines)\n", "\n", "# Generate report\n", "report = ValidationReporter.generate_report(validation_results)\n", "\n", "report_path = VALIDATION_DIR / \"validation_report.md\"\n", "with open(report_path, 'w') as f:\n", " f.write(report)\n", "\n", "print(f\"✓ Report saved to: {report_path}\")\n", "print(\"\\n\" + report)" ] }, { "cell_type": "markdown", "id": "a0a647bc", "metadata": {}, "source": [ "## 6. Save Validation Results" ] }, { "cell_type": "code", "execution_count": null, "id": "8b52d7da", "metadata": {}, "outputs": [], "source": [ "# Save detailed validation results\n", "results_path = VALIDATION_DIR / \"validation_results.json\"\n", "\n", "# Make results JSON-serializable\n", "serializable_results = {}\n", "for name, data in validation_results.items():\n", " serializable_results[name] = {\n", " k: v if not isinstance(v, np.floating) else float(v)\n", " for k, v in data.items()\n", " }\n", "\n", "with open(results_path, 'w') as f:\n", " json.dump({\n", " 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),\n", " 'thresholds': {\n", " 'min_accuracy': ValidationThresholds.MIN_ACCURACY,\n", " 'max_inference_time_ms': ValidationThresholds.MAX_INFERENCE_TIME_MS,\n", " 'max_edge_case_failure_rate': ValidationThresholds.MAX_EDGE_CASE_FAILURE_RATE\n", " },\n", " 'results': serializable_results\n", " }, f, indent=2, default=str)\n", "\n", "print(f\"✓ Results saved to: {results_path}\")" ] }, { "cell_type": "markdown", "id": "e4b142e1", "metadata": {}, "source": [ "## 7. Summary" ] }, { "cell_type": "code", "execution_count": null, "id": "e9532d95", "metadata": {}, "outputs": [], "source": [ "# Calculate summary stats\n", "passed_count = sum(1 for r in validation_results.values() if r.get('validation_passed'))\n", "total_count = len(validation_results)\n", "\n", "print(\"\\n\" + \"=\" * 60)\n", "print(\"MODEL VALIDATION COMPLETE\")\n", "print(\"=\" * 60)\n", "\n", "print(f\"\"\"\n", "✅ Validation Summary:\n", " - Models validated: {total_count}\n", " - Models passed: {passed_count}\n", " - Models failed: {total_count - passed_count}\n", " - Pass rate: {passed_count/max(total_count,1):.0%}\n", "\n", "📊 Validation Checks:\n", " ✓ Edge case handling\n", " ✓ Prediction consistency\n", " ✓ Inference latency\n", " ✓ Model size limits\n", "\n", "📁 Output Files:\n", " - Report: {VALIDATION_DIR}/validation_report.md\n", " - Results: {VALIDATION_DIR}/validation_results.json\n", "\n", "Models Ready for Production:\"\"\")\n", "\n", "for name, data in validation_results.items():\n", " status = \"✓\" if data.get('validation_passed') else \"✗\"\n", " print(f\" {status} {name}\")\n", "\n", "print(f\"\"\"\n", "Next step:\n", " → 06_backend_integration.ipynb\n", "\"\"\")\n", "print(\"=\" * 60)" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 5 }