| |
| """ |
| Deployment & Optimization Utilities |
| Based on: Nature Scientific Reports (Nov 2025) - YOLOv11 + CNN-BiGRU |
| |
| Features: |
| β’ TensorRT FP16 export (105 FPS on RTX 3090) |
| β’ ONNX export (cross-platform) |
| β’ TFLite export (Raspberry Pi / edge) |
| β’ INT8 post-training quantization |
| β’ Benchmark latency / throughput |
| """ |
|
|
| import os |
| import sys |
| import time |
| import logging |
| from pathlib import Path |
| from typing import Optional, Dict, Any |
|
|
| import numpy as np |
| import torch |
| from ultralytics import YOLO |
|
|
| logging.basicConfig( |
| level=logging.INFO, |
| format="%(asctime)s [%(levelname)s] %(name)s - %(message)s", |
| ) |
| logger = logging.getLogger("deploy") |
|
|
|
|
| |
| |
| |
|
|
| def export_tensorrt( |
| weights: str = "best.pt", |
| imgsz: int = 416, |
| half: bool = True, |
| int8: bool = False, |
| workspace: int = 2, |
| ) -> str: |
| """ |
| Export YOLOv11 to TensorRT engine (.engine). |
| Tuned for RTX 2050 (4 GB VRAM) β workspace=2 GB, imgsz=416. |
| """ |
| model = YOLO(weights) |
| path = model.export( |
| format="engine", |
| imgsz=imgsz, |
| half=half, |
| int8=int8, |
| workspace=workspace, |
| device=0, |
| ) |
| logger.info("TensorRT engine saved β %s", path) |
| return str(path) |
|
|
|
|
| def export_onnx( |
| weights: str = "best.pt", |
| imgsz: int = 416, |
| half: bool = False, |
| simplify: bool = True, |
| ) -> str: |
| """Export to ONNX for cross-platform inference.""" |
| model = YOLO(weights) |
| path = model.export( |
| format="onnx", |
| imgsz=imgsz, |
| half=half, |
| simplify=simplify, |
| ) |
| logger.info("ONNX model saved β %s", path) |
| return str(path) |
|
|
|
|
| def export_tflite( |
| weights: str = "best.pt", |
| imgsz: int = 416, |
| int8: bool = False, |
| ) -> str: |
| """Export to TFLite for Raspberry Pi / mobile deployment.""" |
| model = YOLO(weights) |
| path = model.export( |
| format="tflite", |
| imgsz=imgsz, |
| int8=int8, |
| ) |
| logger.info("TFLite model saved β %s", path) |
| return str(path) |
|
|
|
|
| def export_torchscript( |
| weights: str = "best.pt", |
| imgsz: int = 640, |
| ) -> str: |
| """Export to TorchScript.""" |
| model = YOLO(weights) |
| path = model.export(format="torchscript", imgsz=imgsz) |
| logger.info("TorchScript model saved β %s", path) |
| return str(path) |
|
|
|
|
| |
| |
| |
|
|
| def quantize_int8( |
| weights: str = "best.pt", |
| output_path: str = "best_quantized.pt", |
| ) -> str: |
| """ |
| Apply dynamic INT8 quantization to linear & conv layers. |
| Reduces model size ~4Γ with minimal accuracy loss. |
| """ |
| from torch.quantization import quantize_dynamic |
|
|
| model = YOLO(weights).model |
| quantized = quantize_dynamic( |
| model, |
| {torch.nn.Linear, torch.nn.Conv2d}, |
| dtype=torch.qint8, |
| ) |
| torch.save(quantized.state_dict(), output_path) |
| logger.info("INT8 quantized model β %s", output_path) |
| return output_path |
|
|
|
|
| |
| |
| |
|
|
| def benchmark( |
| weights: str = "best.pt", |
| imgsz: int = 416, |
| warmup: int = 20, |
| runs: int = 100, |
| device: int = 0, |
| half: bool = True, |
| ) -> Dict[str, Any]: |
| """ |
| Measure inference latency and throughput. |
| |
| Returns dict with: |
| mean_ms, std_ms, min_ms, max_ms, fps |
| """ |
| model = YOLO(weights) |
|
|
| |
| dummy = np.random.randint(0, 255, (imgsz, imgsz, 3), dtype=np.uint8) |
| for _ in range(warmup): |
| model.predict(dummy, imgsz=imgsz, device=device, verbose=False) |
|
|
| times = [] |
| for _ in range(runs): |
| t0 = time.perf_counter() |
| model.predict(dummy, imgsz=imgsz, device=device, verbose=False) |
| times.append(time.perf_counter() - t0) |
|
|
| times_ms = np.array(times) * 1000 |
| results = { |
| "model": weights, |
| "imgsz": imgsz, |
| "device": str(device), |
| "runs": runs, |
| "mean_ms": float(times_ms.mean()), |
| "std_ms": float(times_ms.std()), |
| "min_ms": float(times_ms.min()), |
| "max_ms": float(times_ms.max()), |
| "fps": float(1000.0 / times_ms.mean()), |
| } |
|
|
| logger.info( |
| "Benchmark: %.1f Β± %.1f ms (%.1f FPS) [%s]", |
| results["mean_ms"], results["std_ms"], results["fps"], weights, |
| ) |
| return results |
|
|
|
|
| |
| |
| |
|
|
| def package_for_edge( |
| yolo_weights: str = "best.pt", |
| bigru_weights: Optional[str] = None, |
| imgsz: int = 416, |
| output_dir: str = "edge_package", |
| ) -> str: |
| """ |
| Create a self-contained deployment package for edge devices. |
| |
| Package contents: |
| edge_package/ |
| model.tflite β quantised YOLO |
| bigru.pth β severity model (if available) |
| inference.py β standalone inference script |
| config.yaml β runtime config |
| """ |
| out = Path(output_dir) |
| out.mkdir(parents=True, exist_ok=True) |
|
|
| |
| tflite_path = export_tflite(yolo_weights, imgsz=imgsz, int8=True) |
| import shutil |
| shutil.copy2(tflite_path, out / "model.tflite") |
|
|
| |
| if bigru_weights and Path(bigru_weights).exists(): |
| shutil.copy2(bigru_weights, out / "bigru.pth") |
|
|
| |
| import yaml |
| config = { |
| "model": "model.tflite", |
| "bigru": "bigru.pth" if bigru_weights else None, |
| "imgsz": imgsz, |
| "conf_threshold": 0.5, |
| "iou_threshold": 0.45, |
| "classes": ["Alligator Crack", "Longitudinal Crack", "Pothole", "Transverse Crack"], |
| "severity_labels": ["Minor", "Moderate", "Severe", "Critical"], |
| } |
| with open(out / "config.yaml", "w") as f: |
| yaml.dump(config, f) |
|
|
| |
| inference_src = Path(__file__).parent / "inference.py" |
| if inference_src.exists(): |
| shutil.copy2(inference_src, out / "inference.py") |
|
|
| logger.info("Edge package created β %s", out) |
| return str(out) |
|
|
|
|
| |
| |
| |
|
|
| if __name__ == "__main__": |
| import argparse |
|
|
| parser = argparse.ArgumentParser(description="Deploy & Optimise Road Anomaly Models") |
| sub = parser.add_subparsers(dest="cmd", required=True) |
|
|
| |
| p_exp = sub.add_parser("export", help="Export model") |
| p_exp.add_argument("--weights", default="best.pt") |
| p_exp.add_argument("--format", default="engine", |
| choices=["engine", "onnx", "tflite", "torchscript"]) |
| p_exp.add_argument("--imgsz", type=int, default=416) |
| p_exp.add_argument("--half", action="store_true", default=True) |
| p_exp.add_argument("--int8", action="store_true") |
|
|
| |
| p_q = sub.add_parser("quantize", help="INT8 quantization") |
| p_q.add_argument("--weights", default="best.pt") |
| p_q.add_argument("--output", default="best_quantized.pt") |
|
|
| |
| p_b = sub.add_parser("benchmark", help="Measure speed") |
| p_b.add_argument("--weights", default="best.pt") |
| p_b.add_argument("--imgsz", type=int, default=416) |
| p_b.add_argument("--runs", type=int, default=100) |
|
|
| |
| p_p = sub.add_parser("package", help="Package for edge deployment") |
| p_p.add_argument("--weights", default="best.pt") |
| p_p.add_argument("--bigru", default=None) |
| p_p.add_argument("--imgsz", type=int, default=416) |
| p_p.add_argument("--output-dir", default="edge_package") |
|
|
| args = parser.parse_args() |
|
|
| if args.cmd == "export": |
| dispatch = { |
| "engine": export_tensorrt, |
| "onnx": export_onnx, |
| "tflite": export_tflite, |
| "torchscript": export_torchscript, |
| } |
| fn = dispatch[args.format] |
| kwargs = {"weights": args.weights, "imgsz": args.imgsz} |
| if args.format == "engine": |
| kwargs.update(half=args.half, int8=args.int8) |
| elif args.format == "tflite": |
| kwargs["int8"] = args.int8 |
| fn(**kwargs) |
|
|
| elif args.cmd == "quantize": |
| quantize_int8(args.weights, args.output) |
|
|
| elif args.cmd == "benchmark": |
| results = benchmark(args.weights, imgsz=args.imgsz, runs=args.runs) |
| import json |
| print(json.dumps(results, indent=2)) |
|
|
| elif args.cmd == "package": |
| package_for_edge( |
| yolo_weights=args.weights, |
| bigru_weights=args.bigru, |
| imgsz=args.imgsz, |
| output_dir=args.output_dir, |
| ) |
|
|