Spaces:

Che237
/

cyberforge

Sleeping

File size: 20,747 Bytes

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "bc13dda0",
   "metadata": {},
   "source": [
    "# 07 - Deployment Artifacts\n",
    "\n",
    "## CyberForge AI - Final Deployment Package\n",
    "\n",
    "This notebook creates final deployment artifacts for:\n",
    "- Hugging Face Hub upload\n",
    "- Production deployment\n",
    "- Model versioning and documentation\n",
    "\n",
    "### Outputs:\n",
    "- Complete model package for HF Hub\n",
    "- Docker configuration for ML services\n",
    "- Environment configuration\n",
    "- Deployment documentation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9263214e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "import time\n",
    "import shutil\n",
    "from pathlib import Path\n",
    "from typing import Dict, List, Any\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# Configuration\n",
    "config_path = Path(\"notebook_config.json\")\n",
    "if not config_path.exists():\n",
    "    config_path = Path(\"/home/user/app/notebooks/notebook_config.json\")\n",
    "with open(config_path) as f:\n",
    "    CONFIG = json.load(f)\n",
    "\n",
    "MODELS_DIR = Path(CONFIG[\"datasets_dir\"]).parent / \"models\"\n",
    "BACKEND_DIR = MODELS_DIR.parent / \"backend_package\"\n",
    "DEPLOY_DIR = MODELS_DIR.parent / \"deployment\"\n",
    "DEPLOY_DIR.mkdir(exist_ok=True)\n",
    "\n",
    "print(f\"✓ Configuration loaded\")\n",
    "print(f\"✓ Deployment output: {DEPLOY_DIR}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ea513b54",
   "metadata": {},
   "source": [
    "## 1. Hugging Face Hub Upload Preparation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0409ab79",
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    from huggingface_hub import HfApi, login\n",
    "    HF_AVAILABLE = True\n",
    "except ImportError:\n",
    "    import subprocess\n",
    "    subprocess.run(['pip', 'install', 'huggingface_hub', '-q'])\n",
    "    from huggingface_hub import HfApi, login\n",
    "    HF_AVAILABLE = True\n",
    "\n",
    "class HuggingFaceDeployer:\n",
    "    \"\"\"\n",
    "    Deploy models and artifacts to Hugging Face Hub.\n",
    "    \"\"\"\n",
    "    \n",
    "    def __init__(self, token: str = None):\n",
    "        self.token = token or CONFIG.get('hf_token') or os.environ.get('HF_TOKEN')\n",
    "        self.api = HfApi(token=self.token) if self.token else None\n",
    "        self.repo_id = CONFIG.get('hf_repo', 'Che237/cyberforge-models')\n",
    "    \n",
    "    def is_authenticated(self) -> bool:\n",
    "        \"\"\"Check if authenticated to Hugging Face\"\"\"\n",
    "        if not self.api:\n",
    "            return False\n",
    "        try:\n",
    "            self.api.whoami()\n",
    "            return True\n",
    "        except:\n",
    "            return False\n",
    "    \n",
    "    def create_model_card(self, models_info: Dict) -> str:\n",
    "        \"\"\"Create MODEL_CARD.md for Hugging Face\"\"\"\n",
    "        card = f\"\"\"\n",
    "---\n",
    "license: mit\n",
    "tags:\n",
    "  - cybersecurity\n",
    "  - threat-detection\n",
    "  - phishing\n",
    "  - malware\n",
    "  - security\n",
    "language:\n",
    "  - en\n",
    "---\n",
    "\n",
    "# CyberForge AI Models\n",
    "\n",
    "Production-ready machine learning models for cybersecurity threat detection.\n",
    "\n",
    "## Models Included\n",
    "\n",
    "| Model | Task | Accuracy | F1 Score | Inference Time |\n",
    "|-------|------|----------|----------|----------------|\n",
    "\"\"\"\n",
    "        \n",
    "        for name, info in models_info.items():\n",
    "            card += f\"| {name} | {info.get('type', 'classification')} | {info.get('accuracy', 0):.4f} | {info.get('f1_score', 0):.4f} | {info.get('inference_time_ms', 0):.2f}ms |\\n\"\n",
    "        \n",
    "        card += f\"\"\"\n",
    "\n",
    "## Usage\n",
    "\n",
    "### Python\n",
    "\n",
    "```python\n",
    "from inference import CyberForgeInference\n",
    "\n",
    "inference = CyberForgeInference('./models')\n",
    "result = inference.predict('phishing_detection', features)\n",
    "```\n",
    "\n",
    "### API\n",
    "\n",
    "```python\n",
    "import requests\n",
    "\n",
    "response = requests.post(\n",
    "    'https://huggingface.co/spaces/Che237/cyberforge/predict',\n",
    "    json={{'model_name': 'phishing_detection', 'features': features}}\n",
    ")\n",
    "```\n",
    "\n",
    "## Model Details\n",
    "\n",
    "- **Framework**: scikit-learn\n",
    "- **Python Version**: 3.11+\n",
    "- **License**: MIT\n",
    "\n",
    "## Citation\n",
    "\n",
    "If you use these models, please cite:\n",
    "\n",
    "```bibtex\n",
    "@software{{cyberforge2024,\n",
    "  title = {{CyberForge AI Security Models}},\n",
    "  year = {{2024}},\n",
    "  url = {{https://huggingface.co/Che237/cyberforge-models}}\n",
    "}}\n",
    "```\n",
    "\"\"\"\n",
    "        return card\n",
    "    \n",
    "    def prepare_upload_package(self, source_dir: Path, output_dir: Path) -> Path:\n",
    "        \"\"\"Prepare package for HF upload\"\"\"\n",
    "        hf_dir = output_dir / \"huggingface_upload\"\n",
    "        hf_dir.mkdir(exist_ok=True)\n",
    "        \n",
    "        # Copy all files from backend package\n",
    "        if source_dir.exists():\n",
    "            for item in source_dir.iterdir():\n",
    "                if item.is_file():\n",
    "                    shutil.copy(item, hf_dir / item.name)\n",
    "                elif item.is_dir():\n",
    "                    shutil.copytree(item, hf_dir / item.name, dirs_exist_ok=True)\n",
    "        \n",
    "        return hf_dir\n",
    "\n",
    "hf_deployer = HuggingFaceDeployer()\n",
    "print(f\"✓ HuggingFace Deployer initialized\")\n",
    "print(f\"   Authenticated: {hf_deployer.is_authenticated()}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2ca3a2e2",
   "metadata": {},
   "source": [
    "## 2. Create Deployment Package"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "057ab810",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create HF upload package\n",
    "print(\"Creating deployment package...\\n\")\n",
    "\n",
    "hf_package_dir = hf_deployer.prepare_upload_package(BACKEND_DIR, DEPLOY_DIR)\n",
    "print(f\"✓ Package prepared at: {hf_package_dir}\")\n",
    "\n",
    "# Load models info\n",
    "manifest_path = BACKEND_DIR / \"manifest.json\"\n",
    "if manifest_path.exists():\n",
    "    with open(manifest_path) as f:\n",
    "        manifest = json.load(f)\n",
    "    models_info = manifest.get('models', {})\n",
    "else:\n",
    "    models_info = {}\n",
    "\n",
    "# Create model card\n",
    "model_card = hf_deployer.create_model_card(models_info)\n",
    "model_card_path = hf_package_dir / \"README.md\"\n",
    "with open(model_card_path, 'w') as f:\n",
    "    f.write(model_card)\n",
    "\n",
    "print(f\"✓ Model card saved\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5168e806",
   "metadata": {},
   "source": [
    "## 3. Docker Configuration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c756998d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate Dockerfile for ML services\n",
    "dockerfile_content = '''\n",
    "# CyberForge ML Services Dockerfile\n",
    "FROM python:3.11-slim\n",
    "\n",
    "WORKDIR /app\n",
    "\n",
    "# Install dependencies\n",
    "COPY requirements.txt .\n",
    "RUN pip install --no-cache-dir -r requirements.txt\n",
    "\n",
    "# Copy models and code\n",
    "COPY . .\n",
    "\n",
    "# Expose port\n",
    "EXPOSE 8001\n",
    "\n",
    "# Health check\n",
    "HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \\\n",
    "    CMD curl -f http://localhost:8001/health || exit 1\n",
    "\n",
    "# Run server\n",
    "CMD [\"uvicorn\", \"inference:app\", \"--host\", \"0.0.0.0\", \"--port\", \"8001\"]\n",
    "'''\n",
    "\n",
    "dockerfile_path = DEPLOY_DIR / \"Dockerfile\"\n",
    "with open(dockerfile_path, 'w') as f:\n",
    "    f.write(dockerfile_content)\n",
    "\n",
    "print(f\"✓ Dockerfile saved to: {dockerfile_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "afea07f6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate requirements.txt for deployment\n",
    "requirements_content = '''\n",
    "# CyberForge ML Requirements\n",
    "fastapi>=0.104.0\n",
    "uvicorn>=0.24.0\n",
    "pydantic>=2.0.0\n",
    "numpy>=1.24.0\n",
    "pandas>=2.0.0\n",
    "scikit-learn>=1.3.0\n",
    "joblib>=1.3.0\n",
    "python-multipart>=0.0.6\n",
    "huggingface-hub>=0.19.0\n",
    "google-genai>=1.0.0\n",
    "requests>=2.31.0\n",
    "pyarrow>=14.0.0\n",
    "'''\n",
    "\n",
    "requirements_path = DEPLOY_DIR / \"requirements.txt\"\n",
    "with open(requirements_path, 'w') as f:\n",
    "    f.write(requirements_content)\n",
    "\n",
    "print(f\"✓ requirements.txt saved\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6586c940",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate docker-compose.yml\n",
    "docker_compose_content = '''\n",
    "version: \"3.8\"\n",
    "\n",
    "services:\n",
    "  ml-services:\n",
    "    build:\n",
    "      context: .\n",
    "      dockerfile: Dockerfile\n",
    "    ports:\n",
    "      - \"8001:8001\"\n",
    "    environment:\n",
    "      - GEMINI_API_KEY=${GEMINI_API_KEY}\n",
    "      - HF_TOKEN=${HF_TOKEN}\n",
    "    volumes:\n",
    "      - ./models:/app/models:ro\n",
    "    restart: unless-stopped\n",
    "    healthcheck:\n",
    "      test: [\"CMD\", \"curl\", \"-f\", \"http://localhost:8001/health\"]\n",
    "      interval: 30s\n",
    "      timeout: 10s\n",
    "      retries: 3\n",
    "'''\n",
    "\n",
    "compose_path = DEPLOY_DIR / \"docker-compose.yml\"\n",
    "with open(compose_path, 'w') as f:\n",
    "    f.write(docker_compose_content)\n",
    "\n",
    "print(f\"✓ docker-compose.yml saved\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "665ec48b",
   "metadata": {},
   "source": [
    "## 4. Environment Configuration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "61531f5c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate .env.example\n",
    "env_example = '''\n",
    "# CyberForge ML Services Environment Configuration\n",
    "\n",
    "# Hugging Face\n",
    "HF_TOKEN=your_huggingface_token_here\n",
    "HF_REPO=Che237/cyberforge-models\n",
    "\n",
    "# Gemini API\n",
    "GEMINI_API_KEY=your_gemini_api_key_here\n",
    "\n",
    "# WebScraper API\n",
    "WEBSCRAPER_API_KEY=your_webscraper_api_key_here\n",
    "WEBSCRAPER_API_URL=http://webscrapper.live/api/scrape\n",
    "\n",
    "# Server Configuration\n",
    "ML_SERVICE_PORT=8001\n",
    "ML_SERVICE_HOST=0.0.0.0\n",
    "\n",
    "# Model Configuration\n",
    "MODELS_DIR=./models\n",
    "MAX_INFERENCE_TIME_MS=100\n",
    "CONFIDENCE_THRESHOLD=0.7\n",
    "'''\n",
    "\n",
    "env_path = DEPLOY_DIR / \".env.example\"\n",
    "with open(env_path, 'w') as f:\n",
    "    f.write(env_example)\n",
    "\n",
    "print(f\"✓ .env.example saved\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "86c547c3",
   "metadata": {},
   "source": [
    "## 5. Upload Script"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21703867",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate HuggingFace upload script\n",
    "upload_script = '''\n",
    "#!/usr/bin/env python3\n",
    "\"\"\"\n",
    "CyberForge - Upload to Hugging Face Hub\n",
    "\"\"\"\n",
    "\n",
    "import os\n",
    "import sys\n",
    "from pathlib import Path\n",
    "from huggingface_hub import HfApi, login\n",
    "\n",
    "def main():\n",
    "    # Get token\n",
    "    token = os.environ.get('HF_TOKEN')\n",
    "    if not token:\n",
    "        print(\"Error: HF_TOKEN environment variable not set\")\n",
    "        sys.exit(1)\n",
    "    \n",
    "    # Login\n",
    "    login(token=token)\n",
    "    api = HfApi()\n",
    "    \n",
    "    # Configuration\n",
    "    repo_id = os.environ.get('HF_REPO', 'Che237/cyberforge-models')\n",
    "    upload_dir = Path('./huggingface_upload')\n",
    "    \n",
    "    if not upload_dir.exists():\n",
    "        print(f\"Error: Upload directory not found: {upload_dir}\")\n",
    "        sys.exit(1)\n",
    "    \n",
    "    print(f\"Uploading to: {repo_id}\")\n",
    "    \n",
    "    # Upload\n",
    "    try:\n",
    "        api.upload_folder(\n",
    "            folder_path=str(upload_dir),\n",
    "            repo_id=repo_id,\n",
    "            repo_type=\"model\",\n",
    "            commit_message=\"Update CyberForge ML models\"\n",
    "        )\n",
    "        print(f\"✓ Upload complete: https://huggingface.co/{repo_id}\")\n",
    "    except Exception as e:\n",
    "        print(f\"Error: {e}\")\n",
    "        sys.exit(1)\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    main()\n",
    "'''\n",
    "\n",
    "upload_script_path = DEPLOY_DIR / \"upload_to_hf.py\"\n",
    "with open(upload_script_path, 'w') as f:\n",
    "    f.write(upload_script)\n",
    "\n",
    "os.chmod(upload_script_path, 0o755)\n",
    "print(f\"✓ Upload script saved\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "289852a7",
   "metadata": {},
   "source": [
    "## 6. Deployment Documentation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4b823e84",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate deployment guide\n",
    "deployment_guide = f'''\n",
    "# CyberForge ML Deployment Guide\n",
    "\n",
    "## Quick Start\n",
    "\n",
    "### 1. Local Deployment\n",
    "\n",
    "```bash\n",
    "# Install dependencies\n",
    "pip install -r requirements.txt\n",
    "\n",
    "# Start server\n",
    "uvicorn inference:app --host 0.0.0.0 --port 8001\n",
    "```\n",
    "\n",
    "### 2. Docker Deployment\n",
    "\n",
    "```bash\n",
    "# Build and run\n",
    "docker-compose up -d\n",
    "\n",
    "# Check logs\n",
    "docker-compose logs -f\n",
    "```\n",
    "\n",
    "### 3. Hugging Face Deployment\n",
    "\n",
    "```bash\n",
    "# Set token\n",
    "export HF_TOKEN=your_token_here\n",
    "\n",
    "# Upload\n",
    "python upload_to_hf.py\n",
    "```\n",
    "\n",
    "## API Endpoints\n",
    "\n",
    "### Prediction\n",
    "\n",
    "```\n",
    "POST /predict\n",
    "Content-Type: application/json\n",
    "\n",
    "{{\n",
    "    \"model_name\": \"phishing_detection\",\n",
    "    \"features\": {{\n",
    "        \"url_length\": 50,\n",
    "        \"has_https\": true,\n",
    "        ...\n",
    "    }}\n",
    "}}\n",
    "```\n",
    "\n",
    "### List Models\n",
    "\n",
    "```\n",
    "GET /models\n",
    "```\n",
    "\n",
    "### Health Check\n",
    "\n",
    "```\n",
    "GET /health\n",
    "```\n",
    "\n",
    "## Environment Variables\n",
    "\n",
    "| Variable | Description | Required |\n",
    "|----------|-------------|----------|\n",
    "| HF_TOKEN | Hugging Face API token | For HF upload |\n",
    "| GEMINI_API_KEY | Gemini API key | For AI reasoning |\n",
    "| WEBSCRAPER_API_KEY | WebScraper API key | For data collection |\n",
    "\n",
    "## Monitoring\n",
    "\n",
    "- Health endpoint: `/health`\n",
    "- Metrics endpoint: `/metrics`\n",
    "- Logs: `docker-compose logs -f ml-services`\n",
    "\n",
    "## Troubleshooting\n",
    "\n",
    "### Model not found\n",
    "- Check model files exist in `models/` directory\n",
    "- Verify manifest.json includes the model\n",
    "\n",
    "### Slow inference\n",
    "- Check model size\n",
    "- Consider using lighter model variant\n",
    "- Verify no resource contention\n",
    "\n",
    "### API errors\n",
    "- Check logs for stack traces\n",
    "- Verify input format matches expected schema\n",
    "- Ensure all dependencies installed\n",
    "\n",
    "## Support\n",
    "\n",
    "For issues and feature requests, please open an issue on GitHub.\n",
    "'''\n",
    "\n",
    "guide_path = DEPLOY_DIR / \"DEPLOYMENT.md\"\n",
    "with open(guide_path, 'w') as f:\n",
    "    f.write(deployment_guide)\n",
    "\n",
    "print(f\"✓ Deployment guide saved\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "54cad563",
   "metadata": {},
   "source": [
    "## 7. Final Package Verification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "502eb389",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Verify deployment package\n",
    "required_files = [\n",
    "    'Dockerfile',\n",
    "    'docker-compose.yml',\n",
    "    'requirements.txt',\n",
    "    '.env.example',\n",
    "    'upload_to_hf.py',\n",
    "    'DEPLOYMENT.md',\n",
    "    'huggingface_upload/README.md'\n",
    "]\n",
    "\n",
    "print(\"Verifying deployment package...\\n\")\n",
    "\n",
    "all_present = True\n",
    "for file in required_files:\n",
    "    path = DEPLOY_DIR / file\n",
    "    exists = path.exists()\n",
    "    status = \"✓\" if exists else \"✗\"\n",
    "    print(f\"  {status} {file}\")\n",
    "    if not exists:\n",
    "        all_present = False\n",
    "\n",
    "print(f\"\\n{'✓ All files present' if all_present else '✗ Some files missing'}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4eea48b3",
   "metadata": {},
   "source": [
    "## 8. Summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0371e31b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate package stats\n",
    "total_files = len(list(DEPLOY_DIR.rglob('*')))\n",
    "total_size = sum(f.stat().st_size for f in DEPLOY_DIR.rglob('*') if f.is_file())\n",
    "\n",
    "print(\"\\n\" + \"=\" * 60)\n",
    "print(\"DEPLOYMENT ARTIFACTS COMPLETE\")\n",
    "print(\"=\" * 60)\n",
    "\n",
    "print(f\"\"\"\n",
    "🚀 Deployment Package Ready:\n",
    "   - Location: {DEPLOY_DIR}\n",
    "   - Files: {total_files}\n",
    "   - Total size: {total_size / (1024*1024):.2f} MB\n",
    "\n",
    "📦 Package Contents:\n",
    "   - Dockerfile: Container configuration\n",
    "   - docker-compose.yml: Multi-service orchestration\n",
    "   - requirements.txt: Python dependencies\n",
    "   - .env.example: Environment template\n",
    "   - upload_to_hf.py: HuggingFace upload script\n",
    "   - DEPLOYMENT.md: Deployment guide\n",
    "   - huggingface_upload/: HF Hub package\n",
    "\n",
    "🔧 Deployment Options:\n",
    "   1. Local: uvicorn inference:app --port 8001\n",
    "   2. Docker: docker-compose up -d\n",
    "   3. HuggingFace: python upload_to_hf.py\n",
    "\n",
    "📋 Next Steps:\n",
    "   1. Set environment variables (copy .env.example to .env)\n",
    "   2. Choose deployment method\n",
    "   3. Verify health endpoint\n",
    "   4. Integrate with backend services\n",
    "\n",
    "🎉 CyberForge ML Pipeline Complete!\n",
    "\"\"\")\n",
    "print(\"=\" * 60)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2ee1fdf8",
   "metadata": {},
   "source": [
    "## 9. Optional: Upload to Hugging Face"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7ce9155a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Upload trained models and artifacts to Hugging Face Hub\n",
    "if hf_deployer.is_authenticated():\n",
    "    print(\"Uploading to Hugging Face Hub...\")\n",
    "    try:\n",
    "        hf_deployer.api.upload_folder(\n",
    "            folder_path=str(hf_package_dir),\n",
    "            repo_id=hf_deployer.repo_id,\n",
    "            repo_type=\"model\",\n",
    "            commit_message=\"Update CyberForge ML models and deployment artifacts\"\n",
    "        )\n",
    "        print(f\"✓ Uploaded to: https://huggingface.co/{hf_deployer.repo_id}\")\n",
    "    except Exception as e:\n",
    "        print(f\"⚠ Upload error: {e}\")\n",
    "else:\n",
    "    print(\"⚠ Not authenticated to HuggingFace. Models saved locally only.\")\n",
    "    print(\"  Set HF_TOKEN environment variable or hf_token in notebook_config.json to upload.\")"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}