arm-model / model /deploy.py
pragadeeshv23's picture
Upload folder using huggingface_hub
5b86813 verified
#!/usr/bin/env python3
"""
Deployment & Optimization Utilities
Based on: Nature Scientific Reports (Nov 2025) - YOLOv11 + CNN-BiGRU
Features:
β€’ TensorRT FP16 export (105 FPS on RTX 3090)
β€’ ONNX export (cross-platform)
β€’ TFLite export (Raspberry Pi / edge)
β€’ INT8 post-training quantization
β€’ Benchmark latency / throughput
"""
import os
import sys
import time
import logging
from pathlib import Path
from typing import Optional, Dict, Any
import numpy as np
import torch
from ultralytics import YOLO
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
)
logger = logging.getLogger("deploy")
# ═══════════════════════════════════════════════════════════════════════════
# Export helpers
# ═══════════════════════════════════════════════════════════════════════════
def export_tensorrt(
weights: str = "best.pt",
imgsz: int = 416,
half: bool = True,
int8: bool = False,
workspace: int = 2,
) -> str:
"""
Export YOLOv11 to TensorRT engine (.engine).
Tuned for RTX 2050 (4 GB VRAM) – workspace=2 GB, imgsz=416.
"""
model = YOLO(weights)
path = model.export(
format="engine",
imgsz=imgsz,
half=half,
int8=int8,
workspace=workspace,
device=0,
)
logger.info("TensorRT engine saved β†’ %s", path)
return str(path)
def export_onnx(
weights: str = "best.pt",
imgsz: int = 416,
half: bool = False,
simplify: bool = True,
) -> str:
"""Export to ONNX for cross-platform inference."""
model = YOLO(weights)
path = model.export(
format="onnx",
imgsz=imgsz,
half=half,
simplify=simplify,
)
logger.info("ONNX model saved β†’ %s", path)
return str(path)
def export_tflite(
weights: str = "best.pt",
imgsz: int = 416,
int8: bool = False,
) -> str:
"""Export to TFLite for Raspberry Pi / mobile deployment."""
model = YOLO(weights)
path = model.export(
format="tflite",
imgsz=imgsz,
int8=int8,
)
logger.info("TFLite model saved β†’ %s", path)
return str(path)
def export_torchscript(
weights: str = "best.pt",
imgsz: int = 640,
) -> str:
"""Export to TorchScript."""
model = YOLO(weights)
path = model.export(format="torchscript", imgsz=imgsz)
logger.info("TorchScript model saved β†’ %s", path)
return str(path)
# ═══════════════════════════════════════════════════════════════════════════
# INT8 Post-Training Quantization (PyTorch)
# ═══════════════════════════════════════════════════════════════════════════
def quantize_int8(
weights: str = "best.pt",
output_path: str = "best_quantized.pt",
) -> str:
"""
Apply dynamic INT8 quantization to linear & conv layers.
Reduces model size ~4Γ— with minimal accuracy loss.
"""
from torch.quantization import quantize_dynamic
model = YOLO(weights).model
quantized = quantize_dynamic(
model,
{torch.nn.Linear, torch.nn.Conv2d},
dtype=torch.qint8,
)
torch.save(quantized.state_dict(), output_path)
logger.info("INT8 quantized model β†’ %s", output_path)
return output_path
# ═══════════════════════════════════════════════════════════════════════════
# Benchmark
# ═══════════════════════════════════════════════════════════════════════════
def benchmark(
weights: str = "best.pt",
imgsz: int = 416,
warmup: int = 20,
runs: int = 100,
device: int = 0,
half: bool = True,
) -> Dict[str, Any]:
"""
Measure inference latency and throughput.
Returns dict with:
mean_ms, std_ms, min_ms, max_ms, fps
"""
model = YOLO(weights)
# Warm-up
dummy = np.random.randint(0, 255, (imgsz, imgsz, 3), dtype=np.uint8)
for _ in range(warmup):
model.predict(dummy, imgsz=imgsz, device=device, verbose=False)
times = []
for _ in range(runs):
t0 = time.perf_counter()
model.predict(dummy, imgsz=imgsz, device=device, verbose=False)
times.append(time.perf_counter() - t0)
times_ms = np.array(times) * 1000
results = {
"model": weights,
"imgsz": imgsz,
"device": str(device),
"runs": runs,
"mean_ms": float(times_ms.mean()),
"std_ms": float(times_ms.std()),
"min_ms": float(times_ms.min()),
"max_ms": float(times_ms.max()),
"fps": float(1000.0 / times_ms.mean()),
}
logger.info(
"Benchmark: %.1f Β± %.1f ms (%.1f FPS) [%s]",
results["mean_ms"], results["std_ms"], results["fps"], weights,
)
return results
# ═══════════════════════════════════════════════════════════════════════════
# Raspberry Pi packaging helper
# ═══════════════════════════════════════════════════════════════════════════
def package_for_edge(
yolo_weights: str = "best.pt",
bigru_weights: Optional[str] = None,
imgsz: int = 416,
output_dir: str = "edge_package",
) -> str:
"""
Create a self-contained deployment package for edge devices.
Package contents:
edge_package/
model.tflite – quantised YOLO
bigru.pth – severity model (if available)
inference.py – standalone inference script
config.yaml – runtime config
"""
out = Path(output_dir)
out.mkdir(parents=True, exist_ok=True)
# Export TFLite
tflite_path = export_tflite(yolo_weights, imgsz=imgsz, int8=True)
import shutil
shutil.copy2(tflite_path, out / "model.tflite")
# Copy BiGRU weights
if bigru_weights and Path(bigru_weights).exists():
shutil.copy2(bigru_weights, out / "bigru.pth")
# Runtime config
import yaml
config = {
"model": "model.tflite",
"bigru": "bigru.pth" if bigru_weights else None,
"imgsz": imgsz,
"conf_threshold": 0.5,
"iou_threshold": 0.45,
"classes": ["Alligator Crack", "Longitudinal Crack", "Pothole", "Transverse Crack"],
"severity_labels": ["Minor", "Moderate", "Severe", "Critical"],
}
with open(out / "config.yaml", "w") as f:
yaml.dump(config, f)
# Copy inference script
inference_src = Path(__file__).parent / "inference.py"
if inference_src.exists():
shutil.copy2(inference_src, out / "inference.py")
logger.info("Edge package created β†’ %s", out)
return str(out)
# ═══════════════════════════════════════════════════════════════════════════
# CLI
# ═══════════════════════════════════════════════════════════════════════════
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Deploy & Optimise Road Anomaly Models")
sub = parser.add_subparsers(dest="cmd", required=True)
# export
p_exp = sub.add_parser("export", help="Export model")
p_exp.add_argument("--weights", default="best.pt")
p_exp.add_argument("--format", default="engine",
choices=["engine", "onnx", "tflite", "torchscript"])
p_exp.add_argument("--imgsz", type=int, default=416)
p_exp.add_argument("--half", action="store_true", default=True)
p_exp.add_argument("--int8", action="store_true")
# quantize
p_q = sub.add_parser("quantize", help="INT8 quantization")
p_q.add_argument("--weights", default="best.pt")
p_q.add_argument("--output", default="best_quantized.pt")
# benchmark
p_b = sub.add_parser("benchmark", help="Measure speed")
p_b.add_argument("--weights", default="best.pt")
p_b.add_argument("--imgsz", type=int, default=416)
p_b.add_argument("--runs", type=int, default=100)
# package
p_p = sub.add_parser("package", help="Package for edge deployment")
p_p.add_argument("--weights", default="best.pt")
p_p.add_argument("--bigru", default=None)
p_p.add_argument("--imgsz", type=int, default=416)
p_p.add_argument("--output-dir", default="edge_package")
args = parser.parse_args()
if args.cmd == "export":
dispatch = {
"engine": export_tensorrt,
"onnx": export_onnx,
"tflite": export_tflite,
"torchscript": export_torchscript,
}
fn = dispatch[args.format]
kwargs = {"weights": args.weights, "imgsz": args.imgsz}
if args.format == "engine":
kwargs.update(half=args.half, int8=args.int8)
elif args.format == "tflite":
kwargs["int8"] = args.int8
fn(**kwargs)
elif args.cmd == "quantize":
quantize_int8(args.weights, args.output)
elif args.cmd == "benchmark":
results = benchmark(args.weights, imgsz=args.imgsz, runs=args.runs)
import json
print(json.dumps(results, indent=2))
elif args.cmd == "package":
package_for_edge(
yolo_weights=args.weights,
bigru_weights=args.bigru,
imgsz=args.imgsz,
output_dir=args.output_dir,
)