Che237 commited on
Commit
b8b5a3f
Β·
verified Β·
1 Parent(s): 4b93b4a

Add 05_model_validation.ipynb

Browse files
Files changed (1) hide show
  1. notebooks/05_model_validation.ipynb +574 -0
notebooks/05_model_validation.ipynb ADDED
@@ -0,0 +1,574 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "360397d9",
6
+ "metadata": {},
7
+ "source": [
8
+ "# 05 - Model Validation\n",
9
+ "\n",
10
+ "## CyberForge AI - Production Validation & Safety\n",
11
+ "\n",
12
+ "This notebook validates trained models for production deployment:\n",
13
+ "- Performance metrics and benchmarks\n",
14
+ "- Edge case testing\n",
15
+ "- Failure analysis and recovery\n",
16
+ "- Continuous operation safety checks\n",
17
+ "\n",
18
+ "### Validation Requirements:\n",
19
+ "- All models must pass accuracy thresholds\n",
20
+ "- Inference time must meet real-time requirements\n",
21
+ "- Edge cases must not cause crashes\n",
22
+ "- Memory usage must be within bounds"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": null,
28
+ "id": "781bbd3c",
29
+ "metadata": {},
30
+ "outputs": [],
31
+ "source": [
32
+ "import json\n",
33
+ "import pandas as pd\n",
34
+ "import numpy as np\n",
35
+ "from pathlib import Path\n",
36
+ "from typing import Dict, List, Any, Optional\n",
37
+ "import time\n",
38
+ "import traceback\n",
39
+ "import joblib\n",
40
+ "import warnings\n",
41
+ "warnings.filterwarnings('ignore')\n",
42
+ "\n",
43
+ "from sklearn.metrics import (\n",
44
+ " accuracy_score, precision_score, recall_score, f1_score,\n",
45
+ " confusion_matrix, classification_report, roc_auc_score\n",
46
+ ")\n",
47
+ "\n",
48
+ "# Configuration\n",
49
+ "config_path = Path(\"../notebook_config.json\")\n",
50
+ "with open(config_path) as f:\n",
51
+ " CONFIG = json.load(f)\n",
52
+ "\n",
53
+ "MODELS_DIR = Path(CONFIG[\"datasets_dir\"]).parent / \"models\"\n",
54
+ "VALIDATION_DIR = MODELS_DIR.parent / \"validation\"\n",
55
+ "VALIDATION_DIR.mkdir(exist_ok=True)\n",
56
+ "\n",
57
+ "print(f\"βœ“ Configuration loaded\")\n",
58
+ "print(f\"βœ“ Models from: {MODELS_DIR}\")\n",
59
+ "print(f\"βœ“ Validation output: {VALIDATION_DIR}\")"
60
+ ]
61
+ },
62
+ {
63
+ "cell_type": "markdown",
64
+ "id": "a3afcfd1",
65
+ "metadata": {},
66
+ "source": [
67
+ "## 1. Validation Thresholds"
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": null,
73
+ "id": "74553e0c",
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": [
77
+ "class ValidationThresholds:\n",
78
+ " \"\"\"Production-ready thresholds for model validation\"\"\"\n",
79
+ " \n",
80
+ " # Performance thresholds\n",
81
+ " MIN_ACCURACY = 0.80\n",
82
+ " MIN_PRECISION = 0.75\n",
83
+ " MIN_RECALL = 0.70\n",
84
+ " MIN_F1 = 0.75\n",
85
+ " \n",
86
+ " # Latency thresholds (milliseconds)\n",
87
+ " MAX_INFERENCE_TIME_MS = 100\n",
88
+ " MAX_BATCH_INFERENCE_TIME_MS = 500\n",
89
+ " \n",
90
+ " # Resource thresholds\n",
91
+ " MAX_MODEL_SIZE_MB = 100\n",
92
+ " MAX_MEMORY_MB = 500\n",
93
+ " \n",
94
+ " # Stability thresholds\n",
95
+ " MIN_CONSISTENCY_SCORE = 0.95 # Same input should give same output\n",
96
+ " MAX_EDGE_CASE_FAILURE_RATE = 0.05\n",
97
+ " \n",
98
+ " @classmethod\n",
99
+ " def check_performance(cls, metrics: Dict) -> Dict[str, bool]:\n",
100
+ " \"\"\"Check if metrics pass thresholds\"\"\"\n",
101
+ " return {\n",
102
+ " 'accuracy': metrics.get('accuracy', 0) >= cls.MIN_ACCURACY,\n",
103
+ " 'precision': metrics.get('precision', 0) >= cls.MIN_PRECISION,\n",
104
+ " 'recall': metrics.get('recall', 0) >= cls.MIN_RECALL,\n",
105
+ " 'f1': metrics.get('f1', 0) >= cls.MIN_F1,\n",
106
+ " 'inference_time': metrics.get('inference_time_ms', 999) <= cls.MAX_INFERENCE_TIME_MS\n",
107
+ " }\n",
108
+ "\n",
109
+ "print(\"βœ“ Validation Thresholds loaded\")\n",
110
+ "print(f\" Min Accuracy: {ValidationThresholds.MIN_ACCURACY}\")\n",
111
+ "print(f\" Max Inference: {ValidationThresholds.MAX_INFERENCE_TIME_MS}ms\")"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "markdown",
116
+ "id": "8e96d341",
117
+ "metadata": {},
118
+ "source": [
119
+ "## 2. Load Models and Registry"
120
+ ]
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "execution_count": null,
125
+ "id": "3a0f6d54",
126
+ "metadata": {},
127
+ "outputs": [],
128
+ "source": [
129
+ "# Load model registry\n",
130
+ "registry_path = MODELS_DIR / \"model_registry.json\"\n",
131
+ "\n",
132
+ "if registry_path.exists():\n",
133
+ " with open(registry_path) as f:\n",
134
+ " registry = json.load(f)\n",
135
+ " print(f\"βœ“ Loaded registry with {len(registry.get('models', {}))} models\")\n",
136
+ "else:\n",
137
+ " print(\"⚠ No model registry. Run 03_model_training.ipynb first.\")\n",
138
+ " registry = {'models': {}}\n",
139
+ "\n",
140
+ "# List available models\n",
141
+ "print(\"\\nAvailable models:\")\n",
142
+ "for name, info in registry.get('models', {}).items():\n",
143
+ " print(f\" - {name}: {info['best_model']} (F1: {info['f1_score']:.4f})\")"
144
+ ]
145
+ },
146
+ {
147
+ "cell_type": "markdown",
148
+ "id": "91e2c16f",
149
+ "metadata": {},
150
+ "source": [
151
+ "## 3. Model Validator"
152
+ ]
153
+ },
154
+ {
155
+ "cell_type": "code",
156
+ "execution_count": null,
157
+ "id": "9042d66f",
158
+ "metadata": {},
159
+ "outputs": [],
160
+ "source": [
161
+ "class ModelValidator:\n",
162
+ " \"\"\"\n",
163
+ " Comprehensive model validation for production readiness.\n",
164
+ " \"\"\"\n",
165
+ " \n",
166
+ " def __init__(self, models_dir: Path):\n",
167
+ " self.models_dir = models_dir\n",
168
+ " self.validation_results = {}\n",
169
+ " \n",
170
+ " def load_model_artifacts(self, model_name: str, model_type: str) -> Dict:\n",
171
+ " \"\"\"Load model and associated artifacts\"\"\"\n",
172
+ " model_dir = self.models_dir / model_name\n",
173
+ " \n",
174
+ " artifacts = {}\n",
175
+ " \n",
176
+ " # Load model\n",
177
+ " model_path = model_dir / f\"{model_type}.pkl\"\n",
178
+ " if model_path.exists():\n",
179
+ " artifacts['model'] = joblib.load(model_path)\n",
180
+ " artifacts['model_size_mb'] = model_path.stat().st_size / (1024 * 1024)\n",
181
+ " \n",
182
+ " # Load scaler\n",
183
+ " scaler_path = model_dir / \"scaler.pkl\"\n",
184
+ " if scaler_path.exists():\n",
185
+ " artifacts['scaler'] = joblib.load(scaler_path)\n",
186
+ " \n",
187
+ " # Load metadata\n",
188
+ " metadata_path = model_dir / f\"{model_type}_metadata.json\"\n",
189
+ " if metadata_path.exists():\n",
190
+ " with open(metadata_path) as f:\n",
191
+ " artifacts['metadata'] = json.load(f)\n",
192
+ " \n",
193
+ " return artifacts\n",
194
+ " \n",
195
+ " def validate_performance(self, model, X_test, y_test, scaler=None) -> Dict:\n",
196
+ " \"\"\"Validate model performance metrics\"\"\"\n",
197
+ " # Scale if needed\n",
198
+ " if scaler:\n",
199
+ " X_test = scaler.transform(X_test)\n",
200
+ " \n",
201
+ " # Predictions\n",
202
+ " start = time.time()\n",
203
+ " y_pred = model.predict(X_test)\n",
204
+ " inference_time = (time.time() - start) / len(X_test) * 1000\n",
205
+ " \n",
206
+ " # Metrics\n",
207
+ " metrics = {\n",
208
+ " 'accuracy': accuracy_score(y_test, y_pred),\n",
209
+ " 'precision': precision_score(y_test, y_pred, average='weighted', zero_division=0),\n",
210
+ " 'recall': recall_score(y_test, y_pred, average='weighted', zero_division=0),\n",
211
+ " 'f1': f1_score(y_test, y_pred, average='weighted', zero_division=0),\n",
212
+ " 'inference_time_ms': inference_time,\n",
213
+ " 'samples_tested': len(y_test)\n",
214
+ " }\n",
215
+ " \n",
216
+ " # Check thresholds\n",
217
+ " metrics['passed_thresholds'] = ValidationThresholds.check_performance(metrics)\n",
218
+ " metrics['all_passed'] = all(metrics['passed_thresholds'].values())\n",
219
+ " \n",
220
+ " return metrics\n",
221
+ " \n",
222
+ " def validate_edge_cases(self, model, scaler=None) -> Dict:\n",
223
+ " \"\"\"Test model behavior on edge cases\"\"\"\n",
224
+ " results = {\n",
225
+ " 'tests_run': 0,\n",
226
+ " 'tests_passed': 0,\n",
227
+ " 'errors': []\n",
228
+ " }\n",
229
+ " \n",
230
+ " # Get expected feature count\n",
231
+ " if hasattr(model, 'n_features_in_'):\n",
232
+ " n_features = model.n_features_in_\n",
233
+ " else:\n",
234
+ " n_features = 10 # Default\n",
235
+ " \n",
236
+ " edge_cases = [\n",
237
+ " ('zeros', np.zeros((1, n_features))),\n",
238
+ " ('ones', np.ones((1, n_features))),\n",
239
+ " ('large_values', np.ones((1, n_features)) * 1e6),\n",
240
+ " ('negative', -np.ones((1, n_features))),\n",
241
+ " ('mixed', np.random.randn(1, n_features) * 100),\n",
242
+ " ]\n",
243
+ " \n",
244
+ " for case_name, X in edge_cases:\n",
245
+ " results['tests_run'] += 1\n",
246
+ " try:\n",
247
+ " if scaler:\n",
248
+ " X = scaler.transform(X)\n",
249
+ " pred = model.predict(X)\n",
250
+ " \n",
251
+ " # Check prediction is valid\n",
252
+ " if pred is not None and len(pred) == 1:\n",
253
+ " results['tests_passed'] += 1\n",
254
+ " else:\n",
255
+ " results['errors'].append(f\"{case_name}: Invalid prediction shape\")\n",
256
+ " \n",
257
+ " except Exception as e:\n",
258
+ " results['errors'].append(f\"{case_name}: {str(e)}\")\n",
259
+ " \n",
260
+ " results['pass_rate'] = results['tests_passed'] / max(results['tests_run'], 1)\n",
261
+ " return results\n",
262
+ " \n",
263
+ " def validate_consistency(self, model, scaler=None, n_runs: int = 10) -> Dict:\n",
264
+ " \"\"\"Test prediction consistency (same input = same output)\"\"\"\n",
265
+ " if hasattr(model, 'n_features_in_'):\n",
266
+ " n_features = model.n_features_in_\n",
267
+ " else:\n",
268
+ " n_features = 10\n",
269
+ " \n",
270
+ " # Fixed input\n",
271
+ " np.random.seed(42)\n",
272
+ " X = np.random.randn(1, n_features)\n",
273
+ " \n",
274
+ " if scaler:\n",
275
+ " X = scaler.transform(X)\n",
276
+ " \n",
277
+ " predictions = []\n",
278
+ " for _ in range(n_runs):\n",
279
+ " pred = model.predict(X)[0]\n",
280
+ " predictions.append(pred)\n",
281
+ " \n",
282
+ " unique_preds = len(set(predictions))\n",
283
+ " consistency = 1.0 if unique_preds == 1 else 1.0 / unique_preds\n",
284
+ " \n",
285
+ " return {\n",
286
+ " 'consistency_score': consistency,\n",
287
+ " 'unique_predictions': unique_preds,\n",
288
+ " 'is_consistent': unique_preds == 1\n",
289
+ " }\n",
290
+ " \n",
291
+ " def validate_latency(self, model, scaler=None, n_samples: int = 100) -> Dict:\n",
292
+ " \"\"\"Validate inference latency\"\"\"\n",
293
+ " if hasattr(model, 'n_features_in_'):\n",
294
+ " n_features = model.n_features_in_\n",
295
+ " else:\n",
296
+ " n_features = 10\n",
297
+ " \n",
298
+ " X = np.random.randn(n_samples, n_features)\n",
299
+ " if scaler:\n",
300
+ " X = scaler.transform(X)\n",
301
+ " \n",
302
+ " # Single sample latency\n",
303
+ " single_times = []\n",
304
+ " for i in range(min(10, n_samples)):\n",
305
+ " start = time.time()\n",
306
+ " model.predict(X[i:i+1])\n",
307
+ " single_times.append((time.time() - start) * 1000)\n",
308
+ " \n",
309
+ " # Batch latency\n",
310
+ " start = time.time()\n",
311
+ " model.predict(X)\n",
312
+ " batch_time = (time.time() - start) * 1000\n",
313
+ " \n",
314
+ " return {\n",
315
+ " 'single_mean_ms': np.mean(single_times),\n",
316
+ " 'single_max_ms': np.max(single_times),\n",
317
+ " 'single_std_ms': np.std(single_times),\n",
318
+ " 'batch_total_ms': batch_time,\n",
319
+ " 'batch_per_sample_ms': batch_time / n_samples,\n",
320
+ " 'meets_latency_target': np.mean(single_times) <= ValidationThresholds.MAX_INFERENCE_TIME_MS\n",
321
+ " }\n",
322
+ "\n",
323
+ "validator = ModelValidator(MODELS_DIR)\n",
324
+ "print(\"βœ“ Model Validator initialized\")"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "markdown",
329
+ "id": "53cddd40",
330
+ "metadata": {},
331
+ "source": [
332
+ "## 4. Run Validation Suite"
333
+ ]
334
+ },
335
+ {
336
+ "cell_type": "code",
337
+ "execution_count": null,
338
+ "id": "ccc75859",
339
+ "metadata": {},
340
+ "outputs": [],
341
+ "source": [
342
+ "# Run validation on all models\n",
343
+ "validation_results = {}\n",
344
+ "\n",
345
+ "print(\"Running validation suite...\\n\")\n",
346
+ "\n",
347
+ "for model_name, model_info in registry.get('models', {}).items():\n",
348
+ " print(f\"{'='*50}\")\n",
349
+ " print(f\"Validating: {model_name}\")\n",
350
+ " print(f\"{'='*50}\")\n",
351
+ " \n",
352
+ " # Load model artifacts\n",
353
+ " artifacts = validator.load_model_artifacts(model_name, model_info['best_model'])\n",
354
+ " \n",
355
+ " if 'model' not in artifacts:\n",
356
+ " print(f\" ⚠ Model not found\")\n",
357
+ " continue\n",
358
+ " \n",
359
+ " model = artifacts['model']\n",
360
+ " scaler = artifacts.get('scaler')\n",
361
+ " \n",
362
+ " results = {\n",
363
+ " 'model_name': model_name,\n",
364
+ " 'model_type': model_info['best_model'],\n",
365
+ " 'model_size_mb': artifacts.get('model_size_mb', 0)\n",
366
+ " }\n",
367
+ " \n",
368
+ " # Edge case validation\n",
369
+ " print(\"\\n Edge Case Testing...\")\n",
370
+ " edge_results = validator.validate_edge_cases(model, scaler)\n",
371
+ " results['edge_cases'] = edge_results\n",
372
+ " print(f\" Pass rate: {edge_results['pass_rate']:.2%}\")\n",
373
+ " if edge_results['errors']:\n",
374
+ " for err in edge_results['errors'][:2]:\n",
375
+ " print(f\" ⚠ {err}\")\n",
376
+ " \n",
377
+ " # Consistency validation\n",
378
+ " print(\"\\n Consistency Testing...\")\n",
379
+ " consistency_results = validator.validate_consistency(model, scaler)\n",
380
+ " results['consistency'] = consistency_results\n",
381
+ " print(f\" Consistent: {consistency_results['is_consistent']}\")\n",
382
+ " \n",
383
+ " # Latency validation\n",
384
+ " print(\"\\n Latency Testing...\")\n",
385
+ " latency_results = validator.validate_latency(model, scaler)\n",
386
+ " results['latency'] = latency_results\n",
387
+ " print(f\" Single inference: {latency_results['single_mean_ms']:.3f}ms\")\n",
388
+ " print(f\" Meets target: {latency_results['meets_latency_target']}\")\n",
389
+ " \n",
390
+ " # Overall validation status\n",
391
+ " passed = (\n",
392
+ " edge_results['pass_rate'] >= (1 - ValidationThresholds.MAX_EDGE_CASE_FAILURE_RATE) and\n",
393
+ " consistency_results['is_consistent'] and\n",
394
+ " latency_results['meets_latency_target']\n",
395
+ " )\n",
396
+ " \n",
397
+ " results['validation_passed'] = passed\n",
398
+ " validation_results[model_name] = results\n",
399
+ " \n",
400
+ " status = \"βœ“ PASSED\" if passed else \"βœ— FAILED\"\n",
401
+ " print(f\"\\n Status: {status}\")\n",
402
+ "\n",
403
+ "print(f\"\\n\\nβœ“ Validation complete for {len(validation_results)} models\")"
404
+ ]
405
+ },
406
+ {
407
+ "cell_type": "markdown",
408
+ "id": "45e71432",
409
+ "metadata": {},
410
+ "source": [
411
+ "## 5. Generate Validation Report"
412
+ ]
413
+ },
414
+ {
415
+ "cell_type": "code",
416
+ "execution_count": null,
417
+ "id": "24bbe906",
418
+ "metadata": {},
419
+ "outputs": [],
420
+ "source": [
421
+ "class ValidationReporter:\n",
422
+ " \"\"\"Generate validation reports for documentation\"\"\"\n",
423
+ " \n",
424
+ " @staticmethod\n",
425
+ " def generate_report(results: Dict) -> str:\n",
426
+ " \"\"\"Generate markdown validation report\"\"\"\n",
427
+ " lines = [\n",
428
+ " \"# CyberForge Model Validation Report\",\n",
429
+ " \"\",\n",
430
+ " f\"**Generated:** {time.strftime('%Y-%m-%d %H:%M:%S')}\",\n",
431
+ " f\"**Models Validated:** {len(results)}\",\n",
432
+ " \"\",\n",
433
+ " \"## Summary\",\n",
434
+ " \"\",\n",
435
+ " \"| Model | Type | Size (MB) | Edge Cases | Consistency | Latency (ms) | Status |\",\n",
436
+ " \"|-------|------|-----------|------------|-------------|--------------|--------|\"\n",
437
+ " ]\n",
438
+ " \n",
439
+ " for name, data in results.items():\n",
440
+ " status = \"βœ“ Pass\" if data.get('validation_passed') else \"βœ— Fail\"\n",
441
+ " edge = f\"{data.get('edge_cases', {}).get('pass_rate', 0):.0%}\"\n",
442
+ " consist = \"βœ“\" if data.get('consistency', {}).get('is_consistent') else \"βœ—\"\n",
443
+ " latency = f\"{data.get('latency', {}).get('single_mean_ms', 999):.2f}\"\n",
444
+ " \n",
445
+ " lines.append(\n",
446
+ " f\"| {name} | {data.get('model_type', 'N/A')} | \"\n",
447
+ " f\"{data.get('model_size_mb', 0):.2f} | {edge} | {consist} | {latency} | {status} |\"\n",
448
+ " )\n",
449
+ " \n",
450
+ " lines.extend([\n",
451
+ " \"\",\n",
452
+ " \"## Validation Thresholds\",\n",
453
+ " \"\",\n",
454
+ " f\"- Min Accuracy: {ValidationThresholds.MIN_ACCURACY}\",\n",
455
+ " f\"- Max Inference Time: {ValidationThresholds.MAX_INFERENCE_TIME_MS}ms\",\n",
456
+ " f\"- Max Edge Case Failure Rate: {ValidationThresholds.MAX_EDGE_CASE_FAILURE_RATE:.0%}\",\n",
457
+ " f\"- Min Consistency Score: {ValidationThresholds.MIN_CONSISTENCY_SCORE}\",\n",
458
+ " ])\n",
459
+ " \n",
460
+ " return \"\\n\".join(lines)\n",
461
+ "\n",
462
+ "# Generate report\n",
463
+ "report = ValidationReporter.generate_report(validation_results)\n",
464
+ "\n",
465
+ "report_path = VALIDATION_DIR / \"validation_report.md\"\n",
466
+ "with open(report_path, 'w') as f:\n",
467
+ " f.write(report)\n",
468
+ "\n",
469
+ "print(f\"βœ“ Report saved to: {report_path}\")\n",
470
+ "print(\"\\n\" + report)"
471
+ ]
472
+ },
473
+ {
474
+ "cell_type": "markdown",
475
+ "id": "a0a647bc",
476
+ "metadata": {},
477
+ "source": [
478
+ "## 6. Save Validation Results"
479
+ ]
480
+ },
481
+ {
482
+ "cell_type": "code",
483
+ "execution_count": null,
484
+ "id": "8b52d7da",
485
+ "metadata": {},
486
+ "outputs": [],
487
+ "source": [
488
+ "# Save detailed validation results\n",
489
+ "results_path = VALIDATION_DIR / \"validation_results.json\"\n",
490
+ "\n",
491
+ "# Make results JSON-serializable\n",
492
+ "serializable_results = {}\n",
493
+ "for name, data in validation_results.items():\n",
494
+ " serializable_results[name] = {\n",
495
+ " k: v if not isinstance(v, np.floating) else float(v)\n",
496
+ " for k, v in data.items()\n",
497
+ " }\n",
498
+ "\n",
499
+ "with open(results_path, 'w') as f:\n",
500
+ " json.dump({\n",
501
+ " 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),\n",
502
+ " 'thresholds': {\n",
503
+ " 'min_accuracy': ValidationThresholds.MIN_ACCURACY,\n",
504
+ " 'max_inference_time_ms': ValidationThresholds.MAX_INFERENCE_TIME_MS,\n",
505
+ " 'max_edge_case_failure_rate': ValidationThresholds.MAX_EDGE_CASE_FAILURE_RATE\n",
506
+ " },\n",
507
+ " 'results': serializable_results\n",
508
+ " }, f, indent=2, default=str)\n",
509
+ "\n",
510
+ "print(f\"βœ“ Results saved to: {results_path}\")"
511
+ ]
512
+ },
513
+ {
514
+ "cell_type": "markdown",
515
+ "id": "e4b142e1",
516
+ "metadata": {},
517
+ "source": [
518
+ "## 7. Summary"
519
+ ]
520
+ },
521
+ {
522
+ "cell_type": "code",
523
+ "execution_count": null,
524
+ "id": "e9532d95",
525
+ "metadata": {},
526
+ "outputs": [],
527
+ "source": [
528
+ "# Calculate summary stats\n",
529
+ "passed_count = sum(1 for r in validation_results.values() if r.get('validation_passed'))\n",
530
+ "total_count = len(validation_results)\n",
531
+ "\n",
532
+ "print(\"\\n\" + \"=\" * 60)\n",
533
+ "print(\"MODEL VALIDATION COMPLETE\")\n",
534
+ "print(\"=\" * 60)\n",
535
+ "\n",
536
+ "print(f\"\"\"\n",
537
+ "βœ… Validation Summary:\n",
538
+ " - Models validated: {total_count}\n",
539
+ " - Models passed: {passed_count}\n",
540
+ " - Models failed: {total_count - passed_count}\n",
541
+ " - Pass rate: {passed_count/max(total_count,1):.0%}\n",
542
+ "\n",
543
+ "πŸ“Š Validation Checks:\n",
544
+ " βœ“ Edge case handling\n",
545
+ " βœ“ Prediction consistency\n",
546
+ " βœ“ Inference latency\n",
547
+ " βœ“ Model size limits\n",
548
+ "\n",
549
+ "πŸ“ Output Files:\n",
550
+ " - Report: {VALIDATION_DIR}/validation_report.md\n",
551
+ " - Results: {VALIDATION_DIR}/validation_results.json\n",
552
+ "\n",
553
+ "Models Ready for Production:\"\"\")\n",
554
+ "\n",
555
+ "for name, data in validation_results.items():\n",
556
+ " status = \"βœ“\" if data.get('validation_passed') else \"βœ—\"\n",
557
+ " print(f\" {status} {name}\")\n",
558
+ "\n",
559
+ "print(f\"\"\"\n",
560
+ "Next step:\n",
561
+ " β†’ 06_backend_integration.ipynb\n",
562
+ "\"\"\")\n",
563
+ "print(\"=\" * 60)"
564
+ ]
565
+ }
566
+ ],
567
+ "metadata": {
568
+ "language_info": {
569
+ "name": "python"
570
+ }
571
+ },
572
+ "nbformat": 4,
573
+ "nbformat_minor": 5
574
+ }