{ "cells": [ { "cell_type": "markdown", "id": "7fe14597", "metadata": {}, "source": [ "# 06 - Backend Integration\n", "\n", "## CyberForge AI - API-Ready Model Packaging\n", "\n", "This notebook prepares models for backend integration:\n", "- Model serialization in API-friendly formats\n", "- Versioned model artifacts\n", "- Backend-compatible inference endpoints\n", "- Integration with mlService.js and ThreatService.js\n", "\n", "### Backend Integration Points:\n", "- `mlService.js` - Primary ML model interface\n", "- `WebScraperAPIService.js` - Input data format\n", "- `threatService.js` - Threat analysis outputs" ] }, { "cell_type": "code", "execution_count": null, "id": "7ab1f4ce", "metadata": {}, "outputs": [], "source": [ "import json\n", "import os\n", "import time\n", "import shutil\n", "from pathlib import Path\n", "from typing import Dict, List, Any\n", "import joblib\n", "import hashlib\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "# Configuration\n", "config_path = Path(\"notebook_config.json\")\nif not config_path.exists():\n config_path = Path(\"/home/user/app/notebooks/notebook_config.json\")\n", "with open(config_path) as f:\n", " CONFIG = json.load(f)\n", "\n", "MODELS_DIR = Path(CONFIG[\"datasets_dir\"]).parent / \"models\"\n", "AGENT_DIR = MODELS_DIR.parent / \"agent\"\n", "BACKEND_DIR = MODELS_DIR.parent / \"backend_package\"\n", "BACKEND_DIR.mkdir(exist_ok=True)\n", "\n", "print(f\"✓ Configuration loaded\")\n", "print(f\"✓ Backend package output: {BACKEND_DIR}\")" ] }, { "cell_type": "markdown", "id": "985e012e", "metadata": {}, "source": [ "## 1. Define Backend API Contracts" ] }, { "cell_type": "code", "execution_count": null, "id": "d323ddd2", "metadata": {}, "outputs": [], "source": [ "class BackendAPIContracts:\n", " \"\"\"\n", " Define API contracts matching backend services.\n", " Aligned with mlService.js and threatService.js\n", " \"\"\"\n", " \n", " # Request format from WebScraperAPIService\n", " SCRAPER_INPUT = {\n", " 'url': 'string',\n", " 'security_report': {\n", " 'is_https': 'boolean',\n", " 'mixed_content': 'boolean',\n", " 'insecure_cookies': 'boolean',\n", " 'security_headers': 'object'\n", " },\n", " 'network_requests': 'array',\n", " 'console_logs': 'array',\n", " 'page_content': 'string'\n", " }\n", " \n", " # Response format for mlService.js\n", " ML_RESPONSE = {\n", " 'prediction': 'number', # 0 or 1\n", " 'confidence': 'number', # 0.0 to 1.0\n", " 'risk_level': 'string', # critical, high, medium, low, info\n", " 'model_name': 'string',\n", " 'model_version': 'string',\n", " 'inference_time_ms': 'number',\n", " 'details': 'object'\n", " }\n", " \n", " # Response format for threatService.js\n", " THREAT_RESPONSE = {\n", " 'threat_detected': 'boolean',\n", " 'threat_type': 'string',\n", " 'risk_score': 'number',\n", " 'indicators': 'array',\n", " 'recommended_action': 'string',\n", " 'reasoning': 'string'\n", " }\n", " \n", " @classmethod\n", " def format_ml_response(cls, prediction: int, confidence: float, \n", " model_name: str, inference_time: float,\n", " details: Dict = None) -> Dict:\n", " \"\"\"Format response for mlService.js\"\"\"\n", " risk_level = (\n", " 'critical' if confidence >= 0.9 else\n", " 'high' if confidence >= 0.7 else\n", " 'medium' if confidence >= 0.5 else\n", " 'low' if confidence >= 0.3 else 'info'\n", " )\n", " \n", " return {\n", " 'prediction': int(prediction),\n", " 'confidence': float(confidence),\n", " 'risk_level': risk_level,\n", " 'model_name': model_name,\n", " 'model_version': '1.0.0',\n", " 'inference_time_ms': float(inference_time),\n", " 'details': details or {}\n", " }\n", " \n", " @classmethod\n", " def format_threat_response(cls, detected: bool, threat_type: str,\n", " score: float, indicators: List,\n", " action: str, reasoning: str) -> Dict:\n", " \"\"\"Format response for threatService.js\"\"\"\n", " return {\n", " 'threat_detected': detected,\n", " 'threat_type': threat_type,\n", " 'risk_score': float(score),\n", " 'indicators': indicators,\n", " 'recommended_action': action,\n", " 'reasoning': reasoning\n", " }\n", "\n", "print(\"✓ Backend API Contracts defined\")" ] }, { "cell_type": "markdown", "id": "e1dbdcde", "metadata": {}, "source": [ "## 2. Model Packager" ] }, { "cell_type": "code", "execution_count": null, "id": "52dad602", "metadata": {}, "outputs": [], "source": [ "class ModelPackager:\n", " \"\"\"\n", " Package models for backend deployment.\n", " Creates versioned, self-contained model artifacts.\n", " \"\"\"\n", " \n", " def __init__(self, models_dir: Path, output_dir: Path):\n", " self.models_dir = models_dir\n", " self.output_dir = output_dir\n", " self.package_manifest = {'models': {}, 'version': '1.0.0'}\n", " \n", " def calculate_checksum(self, file_path: Path) -> str:\n", " \"\"\"Calculate MD5 checksum for file integrity\"\"\"\n", " with open(file_path, 'rb') as f:\n", " return hashlib.md5(f.read()).hexdigest()\n", " \n", " def package_model(self, model_name: str, model_info: Dict) -> Dict:\n", " \"\"\"Package a single model for backend\"\"\"\n", " model_type = model_info.get('best_model', 'random_forest')\n", " source_dir = self.models_dir / model_name\n", " dest_dir = self.output_dir / model_name\n", " dest_dir.mkdir(exist_ok=True)\n", " \n", " packaged_files = {}\n", " \n", " # Copy model file\n", " model_source = source_dir / f\"{model_type}.pkl\"\n", " if model_source.exists():\n", " model_dest = dest_dir / \"model.pkl\"\n", " shutil.copy(model_source, model_dest)\n", " packaged_files['model'] = {\n", " 'path': str(model_dest.relative_to(self.output_dir)),\n", " 'checksum': self.calculate_checksum(model_dest),\n", " 'size_bytes': model_dest.stat().st_size\n", " }\n", " \n", " # Copy scaler\n", " scaler_source = source_dir / \"scaler.pkl\"\n", " if scaler_source.exists():\n", " scaler_dest = dest_dir / \"scaler.pkl\"\n", " shutil.copy(scaler_source, scaler_dest)\n", " packaged_files['scaler'] = {\n", " 'path': str(scaler_dest.relative_to(self.output_dir)),\n", " 'checksum': self.calculate_checksum(scaler_dest)\n", " }\n", " \n", " # Copy metadata\n", " meta_source = source_dir / f\"{model_type}_metadata.json\"\n", " if meta_source.exists():\n", " meta_dest = dest_dir / \"metadata.json\"\n", " shutil.copy(meta_source, meta_dest)\n", " packaged_files['metadata'] = {\n", " 'path': str(meta_dest.relative_to(self.output_dir))\n", " }\n", " \n", " # Create model info\n", " model_pkg_info = {\n", " 'name': model_name,\n", " 'type': model_type,\n", " 'version': '1.0.0',\n", " 'accuracy': model_info.get('accuracy', 0),\n", " 'f1_score': model_info.get('f1_score', 0),\n", " 'inference_time_ms': model_info.get('inference_time_ms', 0),\n", " 'files': packaged_files,\n", " 'packaged_at': time.strftime('%Y-%m-%d %H:%M:%S')\n", " }\n", " \n", " # Save model info\n", " info_path = dest_dir / \"package_info.json\"\n", " with open(info_path, 'w') as f:\n", " json.dump(model_pkg_info, f, indent=2)\n", " \n", " self.package_manifest['models'][model_name] = model_pkg_info\n", " \n", " return model_pkg_info\n", " \n", " def save_manifest(self):\n", " \"\"\"Save package manifest\"\"\"\n", " manifest_path = self.output_dir / \"manifest.json\"\n", " self.package_manifest['created_at'] = time.strftime('%Y-%m-%d %H:%M:%S')\n", " with open(manifest_path, 'w') as f:\n", " json.dump(self.package_manifest, f, indent=2)\n", " return manifest_path\n", "\n", "packager = ModelPackager(MODELS_DIR, BACKEND_DIR)\n", "print(\"✓ Model Packager initialized\")" ] }, { "cell_type": "markdown", "id": "823a2f51", "metadata": {}, "source": [ "## 3. Package Models" ] }, { "cell_type": "code", "execution_count": null, "id": "1557a9fd", "metadata": {}, "outputs": [], "source": [ "# Load model registry\n", "registry_path = MODELS_DIR / \"model_registry.json\"\n", "\n", "if registry_path.exists():\n", " with open(registry_path) as f:\n", " registry = json.load(f)\n", " print(f\"✓ Loaded {len(registry.get('models', {}))} models\")\n", "else:\n", " registry = {'models': {}}\n", " print(\"⚠ No registry found\")\n", "\n", "# Package each model\n", "print(\"\\nPackaging models for backend...\\n\")\n", "\n", "for model_name, model_info in registry.get('models', {}).items():\n", " print(f\" Packaging: {model_name}\")\n", " try:\n", " pkg_info = packager.package_model(model_name, model_info)\n", " print(f\" ✓ Files: {len(pkg_info['files'])}\")\n", " print(f\" ✓ Version: {pkg_info['version']}\")\n", " except Exception as e:\n", " print(f\" ⚠ Error: {e}\")\n", "\n", "# Save manifest\n", "manifest_path = packager.save_manifest()\n", "print(f\"\\n✓ Manifest saved to: {manifest_path}\")" ] }, { "cell_type": "markdown", "id": "7af34d6e", "metadata": {}, "source": [ "## 4. Generate Backend Integration Code" ] }, { "cell_type": "code", "execution_count": null, "id": "c361b79a", "metadata": {}, "outputs": [], "source": [ "# Generate Python inference module for backend\n", "inference_module = '''\n", "\"\"\"\n", "CyberForge ML Inference Module\n", "Backend integration for mlService.js\n", "\"\"\"\n", "\n", "import json\n", "import time\n", "import joblib\n", "import numpy as np\n", "from pathlib import Path\n", "from typing import Dict, List, Any, Optional\n", "\n", "class CyberForgeInference:\n", " \"\"\"\n", " ML inference service for CyberForge backend.\n", " Compatible with mlService.js API contract.\n", " \"\"\"\n", " \n", " def __init__(self, models_dir: str):\n", " self.models_dir = Path(models_dir)\n", " self.loaded_models = {}\n", " self.manifest = self._load_manifest()\n", " \n", " def _load_manifest(self) -> Dict:\n", " manifest_path = self.models_dir / \"manifest.json\"\n", " if manifest_path.exists():\n", " with open(manifest_path) as f:\n", " return json.load(f)\n", " return {\"models\": {}}\n", " \n", " def load_model(self, model_name: str) -> bool:\n", " \"\"\"Load a model into memory\"\"\"\n", " if model_name in self.loaded_models:\n", " return True\n", " \n", " model_dir = self.models_dir / model_name\n", " model_path = model_dir / \"model.pkl\"\n", " scaler_path = model_dir / \"scaler.pkl\"\n", " \n", " if not model_path.exists():\n", " return False\n", " \n", " self.loaded_models[model_name] = {\n", " \"model\": joblib.load(model_path),\n", " \"scaler\": joblib.load(scaler_path) if scaler_path.exists() else None\n", " }\n", " return True\n", " \n", " def predict(self, model_name: str, features: Dict) -> Dict:\n", " \"\"\"\n", " Make a prediction.\n", " \n", " Args:\n", " model_name: Name of the model to use\n", " features: Feature dictionary\n", " \n", " Returns:\n", " Response matching mlService.js contract\n", " \"\"\"\n", " if not self.load_model(model_name):\n", " return {\"error\": f\"Model not found: {model_name}\"}\n", " \n", " model_data = self.loaded_models[model_name]\n", " model = model_data[\"model\"]\n", " scaler = model_data[\"scaler\"]\n", " \n", " # Convert features to array\n", " X = np.array([list(features.values())])\n", " \n", " # Scale if scaler available\n", " if scaler:\n", " X = scaler.transform(X)\n", " \n", " # Predict\n", " start_time = time.time()\n", " prediction = int(model.predict(X)[0])\n", " inference_time = (time.time() - start_time) * 1000\n", " \n", " # Get confidence\n", " confidence = 0.5\n", " if hasattr(model, \"predict_proba\"):\n", " proba = model.predict_proba(X)[0]\n", " confidence = float(max(proba))\n", " \n", " # Determine risk level\n", " risk_level = (\n", " \"critical\" if confidence >= 0.9 else\n", " \"high\" if confidence >= 0.7 else\n", " \"medium\" if confidence >= 0.5 else\n", " \"low\" if confidence >= 0.3 else \"info\"\n", " )\n", " \n", " return {\n", " \"prediction\": prediction,\n", " \"confidence\": confidence,\n", " \"risk_level\": risk_level,\n", " \"model_name\": model_name,\n", " \"model_version\": \"1.0.0\",\n", " \"inference_time_ms\": inference_time\n", " }\n", " \n", " def batch_predict(self, model_name: str, features_list: List[Dict]) -> List[Dict]:\n", " \"\"\"Batch predictions\"\"\"\n", " return [self.predict(model_name, f) for f in features_list]\n", " \n", " def list_models(self) -> List[str]:\n", " \"\"\"List available models\"\"\"\n", " return list(self.manifest.get(\"models\", {}).keys())\n", " \n", " def get_model_info(self, model_name: str) -> Dict:\n", " \"\"\"Get model information\"\"\"\n", " return self.manifest.get(\"models\", {}).get(model_name, {})\n", "\n", "\n", "# FastAPI integration\n", "def create_api(models_dir: str):\n", " \"\"\"Create FastAPI app for model serving\"\"\"\n", " try:\n", " from fastapi import FastAPI, HTTPException\n", " from pydantic import BaseModel\n", " except ImportError:\n", " return None\n", " \n", " app = FastAPI(title=\"CyberForge ML API\", version=\"1.0.0\")\n", " inference = CyberForgeInference(models_dir)\n", " \n", " class PredictRequest(BaseModel):\n", " model_name: str\n", " features: Dict\n", " \n", " @app.post(\"/predict\")\n", " async def predict(request: PredictRequest):\n", " result = inference.predict(request.model_name, request.features)\n", " if \"error\" in result:\n", " raise HTTPException(status_code=404, detail=result[\"error\"])\n", " return result\n", " \n", " @app.get(\"/models\")\n", " async def list_models():\n", " return {\"models\": inference.list_models()}\n", " \n", " @app.get(\"/models/{model_name}\")\n", " async def get_model_info(model_name: str):\n", " info = inference.get_model_info(model_name)\n", " if not info:\n", " raise HTTPException(status_code=404, detail=\"Model not found\")\n", " return info\n", " \n", " return app\n", "\n", "\n", "if __name__ == \"__main__\":\n", " import sys\n", " models_dir = sys.argv[1] if len(sys.argv) > 1 else \".\"\n", " \n", " inference = CyberForgeInference(models_dir)\n", " print(f\"Available models: {inference.list_models()}\")\n", "'''\n", "\n", "inference_path = BACKEND_DIR / \"inference.py\"\n", "with open(inference_path, 'w') as f:\n", " f.write(inference_module)\n", "\n", "print(f\"✓ Inference module saved to: {inference_path}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "d394f34d", "metadata": {}, "outputs": [], "source": [ "# Generate JavaScript client for backend\n", "js_client = '''\n", "/**\n", " * CyberForge ML Client\n", " * Integration with mlService.js\n", " */\n", "\n", "const axios = require('axios');\n", "\n", "class CyberForgeMLClient {\n", " constructor(baseUrl = 'http://localhost:8001') {\n", " this.baseUrl = baseUrl;\n", " this.client = axios.create({\n", " baseURL: baseUrl,\n", " timeout: 5000,\n", " headers: { 'Content-Type': 'application/json' }\n", " });\n", " }\n", "\n", " /**\n", " * Get prediction from ML model\n", " * @param {string} modelName - Name of the model\n", " * @param {Object} features - Feature dictionary\n", " * @returns {Promise} Prediction result\n", " */\n", " async predict(modelName, features) {\n", " try {\n", " const response = await this.client.post('/predict', {\n", " model_name: modelName,\n", " features: features\n", " });\n", " return response.data;\n", " } catch (error) {\n", " console.error('ML prediction error:', error.message);\n", " throw error;\n", " }\n", " }\n", "\n", " /**\n", " * Analyze website for threats\n", " * @param {string} url - URL to analyze\n", " * @param {Object} scrapedData - Data from WebScraperAPIService\n", " * @returns {Promise} Threat analysis result\n", " */\n", " async analyzeWebsite(url, scrapedData) {\n", " try {\n", " const response = await this.client.post('/analyze', {\n", " url: url,\n", " data: scrapedData\n", " });\n", " return response.data;\n", " } catch (error) {\n", " console.error('Website analysis error:', error.message);\n", " throw error;\n", " }\n", " }\n", "\n", " /**\n", " * List available models\n", " * @returns {Promise} List of model names\n", " */\n", " async listModels() {\n", " const response = await this.client.get('/models');\n", " return response.data.models;\n", " }\n", "\n", " /**\n", " * Get model information\n", " * @param {string} modelName - Name of the model\n", " * @returns {Promise} Model metadata\n", " */\n", " async getModelInfo(modelName) {\n", " const response = await this.client.get(`/models/${modelName}`);\n", " return response.data;\n", " }\n", "}\n", "\n", "module.exports = CyberForgeMLClient;\n", "'''\n", "\n", "js_client_path = BACKEND_DIR / \"ml_client.js\"\n", "with open(js_client_path, 'w') as f:\n", " f.write(js_client)\n", "\n", "print(f\"✓ JavaScript client saved to: {js_client_path}\")" ] }, { "cell_type": "markdown", "id": "f167845b", "metadata": {}, "source": [ "## 5. Copy Agent Module" ] }, { "cell_type": "code", "execution_count": null, "id": "6b8053d7", "metadata": {}, "outputs": [], "source": [ "# Copy agent module to backend package\n", "agent_source = AGENT_DIR / \"cyberforge_agent.py\"\n", "agent_config_source = AGENT_DIR / \"agent_config.json\"\n", "\n", "if agent_source.exists():\n", " shutil.copy(agent_source, BACKEND_DIR / \"cyberforge_agent.py\")\n", " print(f\"✓ Agent module copied\")\n", "\n", "if agent_config_source.exists():\n", " shutil.copy(agent_config_source, BACKEND_DIR / \"agent_config.json\")\n", " print(f\"✓ Agent config copied\")" ] }, { "cell_type": "markdown", "id": "7141514b", "metadata": {}, "source": [ "## 6. Generate README" ] }, { "cell_type": "code", "execution_count": null, "id": "6a8b1384", "metadata": {}, "outputs": [], "source": [ "readme_content = f\"\"\"\n", "# CyberForge ML Backend Package\n", "\n", "Production-ready ML models for CyberForge backend integration.\n", "\n", "## Contents\n", "\n", "- `inference.py` - Python inference module\n", "- `ml_client.js` - JavaScript client for Node.js backend\n", "- `cyberforge_agent.py` - Agent intelligence module\n", "- `manifest.json` - Model registry and metadata\n", "- `*/model.pkl` - Trained model files\n", "- `*/scaler.pkl` - Feature scalers\n", "- `*/metadata.json` - Model metadata\n", "\n", "## Quick Start\n", "\n", "### Python\n", "\n", "```python\n", "from inference import CyberForgeInference\n", "\n", "inference = CyberForgeInference('./backend_package')\n", "result = inference.predict('phishing_detection', {{'url_length': 50, ...}})\n", "print(result)\n", "```\n", "\n", "### Node.js\n", "\n", "```javascript\n", "const CyberForgeMLClient = require('./ml_client');\n", "\n", "const client = new CyberForgeMLClient('http://localhost:8001');\n", "const result = await client.predict('phishing_detection', {{url_length: 50}});\n", "console.log(result);\n", "```\n", "\n", "### FastAPI Server\n", "\n", "```bash\n", "pip install fastapi uvicorn\n", "uvicorn inference:create_api --host 0.0.0.0 --port 8001\n", "```\n", "\n", "## API Contract\n", "\n", "### Prediction Response\n", "\n", "```json\n", "{{\n", " \"prediction\": 0,\n", " \"confidence\": 0.95,\n", " \"risk_level\": \"low\",\n", " \"model_name\": \"phishing_detection\",\n", " \"model_version\": \"1.0.0\",\n", " \"inference_time_ms\": 2.5\n", "}}\n", "```\n", "\n", "## Models Included\n", "\n", "| Model | Type | Accuracy | F1 Score |\n", "|-------|------|----------|----------|\n", "\"\"\"\n", "\n", "# Add model table\n", "for model_name, model_info in packager.package_manifest.get('models', {}).items():\n", " readme_content += f\"| {model_name} | {model_info.get('type', 'N/A')} | {model_info.get('accuracy', 0):.4f} | {model_info.get('f1_score', 0):.4f} |\\n\"\n", "\n", "readme_content += f\"\"\"\n", "\n", "## Version\n", "\n", "- Package Version: 1.0.0\n", "- Created: {time.strftime('%Y-%m-%d %H:%M:%S')}\n", "\"\"\"\n", "\n", "readme_path = BACKEND_DIR / \"README.md\"\n", "with open(readme_path, 'w') as f:\n", " f.write(readme_content)\n", "\n", "print(f\"✓ README saved to: {readme_path}\")" ] }, { "cell_type": "markdown", "id": "f0c44ad9", "metadata": {}, "source": [ "## 7. Summary" ] }, { "cell_type": "code", "execution_count": null, "id": "148c0293", "metadata": {}, "outputs": [], "source": [ "# List packaged files\n", "packaged_files = list(BACKEND_DIR.rglob('*'))\n", "total_size = sum(f.stat().st_size for f in packaged_files if f.is_file())\n", "\n", "print(\"\\n\" + \"=\" * 60)\n", "print(\"BACKEND INTEGRATION COMPLETE\")\n", "print(\"=\" * 60)\n", "\n", "print(f\"\"\"\n", "📦 Backend Package:\n", " - Location: {BACKEND_DIR}\n", " - Files: {len([f for f in packaged_files if f.is_file()])}\n", " - Total size: {total_size / (1024*1024):.2f} MB\n", "\n", "📁 Package Contents:\"\"\")\n", "\n", "for f in sorted(BACKEND_DIR.iterdir()):\n", " if f.is_file():\n", " print(f\" - {f.name}\")\n", " elif f.is_dir():\n", " print(f\" - {f.name}/\")\n", "\n", "print(f\"\"\"\n", "🔌 Integration Points:\n", " - Python: inference.py (CyberForgeInference class)\n", " - Node.js: ml_client.js (CyberForgeMLClient class)\n", " - FastAPI: inference.py (create_api function)\n", "\n", "Next step:\n", " → 07_deployment_artifacts.ipynb\n", "\"\"\")\n", "print(\"=\" * 60)" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 5 }