Upload folder using huggingface_hub

5b86813 verified 7 days ago

10.6 kB

	#!/usr/bin/env python3
	"""
	Deployment & Optimization Utilities
	Based on: Nature Scientific Reports (Nov 2025) - YOLOv11 + CNN-BiGRU

	Features:
	• TensorRT FP16 export (105 FPS on RTX 3090)
	• ONNX export (cross-platform)
	• TFLite export (Raspberry Pi / edge)
	• INT8 post-training quantization
	• Benchmark latency / throughput
	"""

	import os
	import sys
	import time
	import logging
	from pathlib import Path
	from typing import Optional, Dict, Any

	import numpy as np
	import torch
	from ultralytics import YOLO

	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
	)
	logger = logging.getLogger("deploy")


	# ═══════════════════════════════════════════════════════════════════════════
	# Export helpers
	# ═══════════════════════════════════════════════════════════════════════════

	def export_tensorrt(
	weights: str = "best.pt",
	imgsz: int = 416,
	half: bool = True,
	int8: bool = False,
	workspace: int = 2,
	) -> str:
	"""
	Export YOLOv11 to TensorRT engine (.engine).
	Tuned for RTX 2050 (4 GB VRAM) – workspace=2 GB, imgsz=416.
	"""
	model = YOLO(weights)
	path = model.export(
	format="engine",
	imgsz=imgsz,
	half=half,
	int8=int8,
	workspace=workspace,
	device=0,
	)
	logger.info("TensorRT engine saved → %s", path)
	return str(path)


	def export_onnx(
	weights: str = "best.pt",
	imgsz: int = 416,
	half: bool = False,
	simplify: bool = True,
	) -> str:
	"""Export to ONNX for cross-platform inference."""
	model = YOLO(weights)
	path = model.export(
	format="onnx",
	imgsz=imgsz,
	half=half,
	simplify=simplify,
	)
	logger.info("ONNX model saved → %s", path)
	return str(path)


	def export_tflite(
	weights: str = "best.pt",
	imgsz: int = 416,
	int8: bool = False,
	) -> str:
	"""Export to TFLite for Raspberry Pi / mobile deployment."""
	model = YOLO(weights)
	path = model.export(
	format="tflite",
	imgsz=imgsz,
	int8=int8,
	)
	logger.info("TFLite model saved → %s", path)
	return str(path)


	def export_torchscript(
	weights: str = "best.pt",
	imgsz: int = 640,
	) -> str:
	"""Export to TorchScript."""
	model = YOLO(weights)
	path = model.export(format="torchscript", imgsz=imgsz)
	logger.info("TorchScript model saved → %s", path)
	return str(path)


	# ═══════════════════════════════════════════════════════════════════════════
	# INT8 Post-Training Quantization (PyTorch)
	# ═══════════════════════════════════════════════════════════════════════════

	def quantize_int8(
	weights: str = "best.pt",
	output_path: str = "best_quantized.pt",
	) -> str:
	"""
	Apply dynamic INT8 quantization to linear & conv layers.
	Reduces model size ~4× with minimal accuracy loss.
	"""
	from torch.quantization import quantize_dynamic

	model = YOLO(weights).model
	quantized = quantize_dynamic(
	model,
	{torch.nn.Linear, torch.nn.Conv2d},
	dtype=torch.qint8,
	)
	torch.save(quantized.state_dict(), output_path)
	logger.info("INT8 quantized model → %s", output_path)
	return output_path


	# ═══════════════════════════════════════════════════════════════════════════
	# Benchmark
	# ═══════════════════════════════════════════════════════════════════════════

	def benchmark(
	weights: str = "best.pt",
	imgsz: int = 416,
	warmup: int = 20,
	runs: int = 100,
	device: int = 0,
	half: bool = True,
	) -> Dict[str, Any]:
	"""
	Measure inference latency and throughput.

	Returns dict with:
	mean_ms, std_ms, min_ms, max_ms, fps
	"""
	model = YOLO(weights)

	# Warm-up
	dummy = np.random.randint(0, 255, (imgsz, imgsz, 3), dtype=np.uint8)
	for _ in range(warmup):
	model.predict(dummy, imgsz=imgsz, device=device, verbose=False)

	times = []
	for _ in range(runs):
	t0 = time.perf_counter()
	model.predict(dummy, imgsz=imgsz, device=device, verbose=False)
	times.append(time.perf_counter() - t0)

	times_ms = np.array(times) * 1000
	results = {
	"model": weights,
	"imgsz": imgsz,
	"device": str(device),
	"runs": runs,
	"mean_ms": float(times_ms.mean()),
	"std_ms": float(times_ms.std()),
	"min_ms": float(times_ms.min()),
	"max_ms": float(times_ms.max()),
	"fps": float(1000.0 / times_ms.mean()),
	}

	logger.info(
	"Benchmark: %.1f ± %.1f ms (%.1f FPS) [%s]",
	results["mean_ms"], results["std_ms"], results["fps"], weights,
	)
	return results


	# ═══════════════════════════════════════════════════════════════════════════
	# Raspberry Pi packaging helper
	# ═══════════════════════════════════════════════════════════════════════════

	def package_for_edge(
	yolo_weights: str = "best.pt",
	bigru_weights: Optional[str] = None,
	imgsz: int = 416,
	output_dir: str = "edge_package",
	) -> str:
	"""
	Create a self-contained deployment package for edge devices.

	Package contents:
	edge_package/
	model.tflite – quantised YOLO
	bigru.pth – severity model (if available)
	inference.py – standalone inference script
	config.yaml – runtime config
	"""
	out = Path(output_dir)
	out.mkdir(parents=True, exist_ok=True)

	# Export TFLite
	tflite_path = export_tflite(yolo_weights, imgsz=imgsz, int8=True)
	import shutil
	shutil.copy2(tflite_path, out / "model.tflite")

	# Copy BiGRU weights
	if bigru_weights and Path(bigru_weights).exists():
	shutil.copy2(bigru_weights, out / "bigru.pth")

	# Runtime config
	import yaml
	config = {
	"model": "model.tflite",
	"bigru": "bigru.pth" if bigru_weights else None,
	"imgsz": imgsz,
	"conf_threshold": 0.5,
	"iou_threshold": 0.45,
	"classes": ["Alligator Crack", "Longitudinal Crack", "Pothole", "Transverse Crack"],
	"severity_labels": ["Minor", "Moderate", "Severe", "Critical"],
	}
	with open(out / "config.yaml", "w") as f:
	yaml.dump(config, f)

	# Copy inference script
	inference_src = Path(__file__).parent / "inference.py"
	if inference_src.exists():
	shutil.copy2(inference_src, out / "inference.py")

	logger.info("Edge package created → %s", out)
	return str(out)


	# ═══════════════════════════════════════════════════════════════════════════
	# CLI
	# ═══════════════════════════════════════════════════════════════════════════

	if __name__ == "__main__":
	import argparse

	parser = argparse.ArgumentParser(description="Deploy & Optimise Road Anomaly Models")
	sub = parser.add_subparsers(dest="cmd", required=True)

	# export
	p_exp = sub.add_parser("export", help="Export model")
	p_exp.add_argument("--weights", default="best.pt")
	p_exp.add_argument("--format", default="engine",
	choices=["engine", "onnx", "tflite", "torchscript"])
	p_exp.add_argument("--imgsz", type=int, default=416)
	p_exp.add_argument("--half", action="store_true", default=True)
	p_exp.add_argument("--int8", action="store_true")

	# quantize
	p_q = sub.add_parser("quantize", help="INT8 quantization")
	p_q.add_argument("--weights", default="best.pt")
	p_q.add_argument("--output", default="best_quantized.pt")

	# benchmark
	p_b = sub.add_parser("benchmark", help="Measure speed")
	p_b.add_argument("--weights", default="best.pt")
	p_b.add_argument("--imgsz", type=int, default=416)
	p_b.add_argument("--runs", type=int, default=100)

	# package
	p_p = sub.add_parser("package", help="Package for edge deployment")
	p_p.add_argument("--weights", default="best.pt")
	p_p.add_argument("--bigru", default=None)
	p_p.add_argument("--imgsz", type=int, default=416)
	p_p.add_argument("--output-dir", default="edge_package")

	args = parser.parse_args()

	if args.cmd == "export":
	dispatch = {
	"engine": export_tensorrt,
	"onnx": export_onnx,
	"tflite": export_tflite,
	"torchscript": export_torchscript,
	}
	fn = dispatch[args.format]
	kwargs = {"weights": args.weights, "imgsz": args.imgsz}
	if args.format == "engine":
	kwargs.update(half=args.half, int8=args.int8)
	elif args.format == "tflite":
	kwargs["int8"] = args.int8
	fn(**kwargs)

	elif args.cmd == "quantize":
	quantize_int8(args.weights, args.output)

	elif args.cmd == "benchmark":
	results = benchmark(args.weights, imgsz=args.imgsz, runs=args.runs)
	import json
	print(json.dumps(results, indent=2))

	elif args.cmd == "package":
	package_for_edge(
	yolo_weights=args.weights,
	bigru_weights=args.bigru,
	imgsz=args.imgsz,
	output_dir=args.output_dir,
	)