File size: 10,550 Bytes

5b86813

#!/usr/bin/env python3
"""
Deployment & Optimization Utilities
Based on: Nature Scientific Reports (Nov 2025) - YOLOv11 + CNN-BiGRU

Features:
    • TensorRT FP16 export  (105 FPS on RTX 3090)
    • ONNX export           (cross-platform)
    • TFLite export         (Raspberry Pi / edge)
    • INT8 post-training quantization
    • Benchmark latency / throughput
"""

import os
import sys
import time
import logging
from pathlib import Path
from typing import Optional, Dict, Any

import numpy as np
import torch
from ultralytics import YOLO

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
)
logger = logging.getLogger("deploy")


# ═══════════════════════════════════════════════════════════════════════════
# Export helpers
# ═══════════════════════════════════════════════════════════════════════════

def export_tensorrt(
    weights: str = "best.pt",
    imgsz: int = 416,
    half: bool = True,
    int8: bool = False,
    workspace: int = 2,
) -> str:
    """
    Export YOLOv11 to TensorRT engine (.engine).
    Tuned for RTX 2050 (4 GB VRAM) – workspace=2 GB, imgsz=416.
    """
    model = YOLO(weights)
    path = model.export(
        format="engine",
        imgsz=imgsz,
        half=half,
        int8=int8,
        workspace=workspace,
        device=0,
    )
    logger.info("TensorRT engine saved → %s", path)
    return str(path)


def export_onnx(
    weights: str = "best.pt",
    imgsz: int = 416,
    half: bool = False,
    simplify: bool = True,
) -> str:
    """Export to ONNX for cross-platform inference."""
    model = YOLO(weights)
    path = model.export(
        format="onnx",
        imgsz=imgsz,
        half=half,
        simplify=simplify,
    )
    logger.info("ONNX model saved → %s", path)
    return str(path)


def export_tflite(
    weights: str = "best.pt",
    imgsz: int = 416,
    int8: bool = False,
) -> str:
    """Export to TFLite for Raspberry Pi / mobile deployment."""
    model = YOLO(weights)
    path = model.export(
        format="tflite",
        imgsz=imgsz,
        int8=int8,
    )
    logger.info("TFLite model saved → %s", path)
    return str(path)


def export_torchscript(
    weights: str = "best.pt",
    imgsz: int = 640,
) -> str:
    """Export to TorchScript."""
    model = YOLO(weights)
    path = model.export(format="torchscript", imgsz=imgsz)
    logger.info("TorchScript model saved → %s", path)
    return str(path)


# ═══════════════════════════════════════════════════════════════════════════
# INT8 Post-Training Quantization (PyTorch)
# ═══════════════════════════════════════════════════════════════════════════

def quantize_int8(
    weights: str = "best.pt",
    output_path: str = "best_quantized.pt",
) -> str:
    """
    Apply dynamic INT8 quantization to linear & conv layers.
    Reduces model size ~4× with minimal accuracy loss.
    """
    from torch.quantization import quantize_dynamic

    model = YOLO(weights).model
    quantized = quantize_dynamic(
        model,
        {torch.nn.Linear, torch.nn.Conv2d},
        dtype=torch.qint8,
    )
    torch.save(quantized.state_dict(), output_path)
    logger.info("INT8 quantized model → %s", output_path)
    return output_path


# ═══════════════════════════════════════════════════════════════════════════
# Benchmark
# ═══════════════════════════════════════════════════════════════════════════

def benchmark(
    weights: str = "best.pt",
    imgsz: int = 416,
    warmup: int = 20,
    runs: int = 100,
    device: int = 0,
    half: bool = True,
) -> Dict[str, Any]:
    """
    Measure inference latency and throughput.

    Returns dict with:
        mean_ms, std_ms, min_ms, max_ms, fps
    """
    model = YOLO(weights)

    # Warm-up
    dummy = np.random.randint(0, 255, (imgsz, imgsz, 3), dtype=np.uint8)
    for _ in range(warmup):
        model.predict(dummy, imgsz=imgsz, device=device, verbose=False)

    times = []
    for _ in range(runs):
        t0 = time.perf_counter()
        model.predict(dummy, imgsz=imgsz, device=device, verbose=False)
        times.append(time.perf_counter() - t0)

    times_ms = np.array(times) * 1000
    results = {
        "model": weights,
        "imgsz": imgsz,
        "device": str(device),
        "runs": runs,
        "mean_ms": float(times_ms.mean()),
        "std_ms": float(times_ms.std()),
        "min_ms": float(times_ms.min()),
        "max_ms": float(times_ms.max()),
        "fps": float(1000.0 / times_ms.mean()),
    }

    logger.info(
        "Benchmark: %.1f ± %.1f ms  (%.1f FPS)  [%s]",
        results["mean_ms"], results["std_ms"], results["fps"], weights,
    )
    return results


# ═══════════════════════════════════════════════════════════════════════════
# Raspberry Pi packaging helper
# ═══════════════════════════════════════════════════════════════════════════

def package_for_edge(
    yolo_weights: str = "best.pt",
    bigru_weights: Optional[str] = None,
    imgsz: int = 416,
    output_dir: str = "edge_package",
) -> str:
    """
    Create a self-contained deployment package for edge devices.

    Package contents:
        edge_package/
            model.tflite       – quantised YOLO
            bigru.pth          – severity model (if available)
            inference.py       – standalone inference script
            config.yaml        – runtime config
    """
    out = Path(output_dir)
    out.mkdir(parents=True, exist_ok=True)

    # Export TFLite
    tflite_path = export_tflite(yolo_weights, imgsz=imgsz, int8=True)
    import shutil
    shutil.copy2(tflite_path, out / "model.tflite")

    # Copy BiGRU weights
    if bigru_weights and Path(bigru_weights).exists():
        shutil.copy2(bigru_weights, out / "bigru.pth")

    # Runtime config
    import yaml
    config = {
        "model": "model.tflite",
        "bigru": "bigru.pth" if bigru_weights else None,
        "imgsz": imgsz,
        "conf_threshold": 0.5,
        "iou_threshold": 0.45,
        "classes": ["Alligator Crack", "Longitudinal Crack", "Pothole", "Transverse Crack"],
        "severity_labels": ["Minor", "Moderate", "Severe", "Critical"],
    }
    with open(out / "config.yaml", "w") as f:
        yaml.dump(config, f)

    # Copy inference script
    inference_src = Path(__file__).parent / "inference.py"
    if inference_src.exists():
        shutil.copy2(inference_src, out / "inference.py")

    logger.info("Edge package created → %s", out)
    return str(out)


# ═══════════════════════════════════════════════════════════════════════════
# CLI
# ═══════════════════════════════════════════════════════════════════════════

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="Deploy & Optimise Road Anomaly Models")
    sub = parser.add_subparsers(dest="cmd", required=True)

    # export
    p_exp = sub.add_parser("export", help="Export model")
    p_exp.add_argument("--weights", default="best.pt")
    p_exp.add_argument("--format", default="engine",
                       choices=["engine", "onnx", "tflite", "torchscript"])
    p_exp.add_argument("--imgsz", type=int, default=416)
    p_exp.add_argument("--half", action="store_true", default=True)
    p_exp.add_argument("--int8", action="store_true")

    # quantize
    p_q = sub.add_parser("quantize", help="INT8 quantization")
    p_q.add_argument("--weights", default="best.pt")
    p_q.add_argument("--output", default="best_quantized.pt")

    # benchmark
    p_b = sub.add_parser("benchmark", help="Measure speed")
    p_b.add_argument("--weights", default="best.pt")
    p_b.add_argument("--imgsz", type=int, default=416)
    p_b.add_argument("--runs", type=int, default=100)

    # package
    p_p = sub.add_parser("package", help="Package for edge deployment")
    p_p.add_argument("--weights", default="best.pt")
    p_p.add_argument("--bigru", default=None)
    p_p.add_argument("--imgsz", type=int, default=416)
    p_p.add_argument("--output-dir", default="edge_package")

    args = parser.parse_args()

    if args.cmd == "export":
        dispatch = {
            "engine": export_tensorrt,
            "onnx": export_onnx,
            "tflite": export_tflite,
            "torchscript": export_torchscript,
        }
        fn = dispatch[args.format]
        kwargs = {"weights": args.weights, "imgsz": args.imgsz}
        if args.format == "engine":
            kwargs.update(half=args.half, int8=args.int8)
        elif args.format == "tflite":
            kwargs["int8"] = args.int8
        fn(**kwargs)

    elif args.cmd == "quantize":
        quantize_int8(args.weights, args.output)

    elif args.cmd == "benchmark":
        results = benchmark(args.weights, imgsz=args.imgsz, runs=args.runs)
        import json
        print(json.dumps(results, indent=2))

    elif args.cmd == "package":
        package_for_edge(
            yolo_weights=args.weights,
            bigru_weights=args.bigru,
            imgsz=args.imgsz,
            output_dir=args.output_dir,
        )