#!/usr/bin/env python3 """ High-Accuracy Training Script for Road Anomaly Detection ========================================================= Optimised for: RTX 2050 (4 GB), i5-12450H, 15 GB RAM Model: YOLO11s — 9.4M params, 21.5 GFLOPs (3.6× more than 11n) Usage: python train_high_accuracy.py # Full training (300 epochs) python train_high_accuracy.py --dry-run # Quick 2-epoch test run """ import os import sys import shutil import logging import argparse from pathlib import Path from datetime import datetime logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler(), logging.FileHandler("training_optimised.log")], ) logger = logging.getLogger("train_optimised") def main(): parser = argparse.ArgumentParser(description="Train YOLO11s for road anomaly detection") parser.add_argument("--dry-run", action="store_true", help="Quick 2-epoch test to verify everything works") args = parser.parse_args() try: import torch from ultralytics import YOLO except ImportError as e: print(f"Missing dependency: {e}") print("Run: pip install ultralytics torch") sys.exit(1) is_dry_run = args.dry_run epochs = 2 if is_dry_run else 300 run_name = "dry_run" if is_dry_run else "high_accuracy_s" print() print("=" * 60) if is_dry_run: print(" DRY RUN — 2 epochs to verify setup") else: print(" HIGH-ACCURACY ROAD ANOMALY DETECTION TRAINING") print(" YOLO11s • RTX 2050 (4 GB) optimised") print("=" * 60) print(f" Started: {datetime.now():%Y-%m-%d %H:%M:%S}") print() # ── GPU check ── if torch.cuda.is_available(): gpu = torch.cuda.get_device_properties(0) vram_gb = gpu.total_memory / (1024 ** 3) print(f" GPU: {gpu.name} ({vram_gb:.1f} GB)") else: vram_gb = 0 print(" WARNING: No GPU — training will be very slow") # ── Pick batch size based on VRAM ── # YOLO11s at 640px: ~2.2 GB at batch=2, ~3.2 GB at batch=4 if vram_gb >= 6: batch = 8 elif vram_gb >= 4: batch = 4 else: batch = 2 # Safe for RTX 2050 (3.7 GB) with YOLO11s # ── Dataset path (use absolute path to avoid any issues) ── script_dir = Path(__file__).resolve().parent data_yaml = script_dir / "dataset" / "data.yaml" if not data_yaml.exists(): print(f" ERROR: Dataset not found: {data_yaml}") sys.exit(1) # Count images train_imgs = list((script_dir / "dataset" / "train" / "images").glob("*.jpg")) valid_imgs = list((script_dir / "dataset" / "valid" / "images").glob("*.jpg")) print(f" Dataset: {len(train_imgs)} train / {len(valid_imgs)} val images") print(f" Batch size: {batch}") print(f" Image size: 640 (native 600x600 — no downscaling)") print(f" Epochs: {epochs}") print() # ── Load model ── # YOLO11s: 9.4M params, 21.5 GFLOPs — 3.6× more capacity than 11n # Fits in 3.7 GB VRAM at batch=2 with AMP (~2.2 GB) model_name = "yolo11s.pt" print(f" Base model: {model_name}") model = YOLO(model_name) # ══════════════════════════════════════════════════════════════════ # TRAINING — optimised hyperparameters # ══════════════════════════════════════════════════════════════════ try: results = model.train( # ── Data ── data=str(data_yaml), imgsz=640, # Match native 600×600 (padded to 640) # ── Training schedule ── epochs=epochs, patience=0 if is_dry_run else 50, # No early stop in dry run batch=batch, # Fit in 4 GB VRAM # ── Optimiser ── optimizer="AdamW", lr0=0.002, # Slightly higher LR for small dataset lrf=0.01, # Final LR = lr0 × lrf (cosine decay) momentum=0.937, weight_decay=0.0005, warmup_epochs=10, # Longer warmup for stability warmup_momentum=0.5, warmup_bias_lr=0.01, # ── Augmentation (aggressive for small dataset) ── hsv_h=0.02, # Hue shift hsv_s=0.75, # Saturation shift hsv_v=0.5, # Value/brightness shift degrees=15.0, # Rotation ±15° translate=0.2, # Translation ±20% scale=0.5, # Scale ±50% shear=5.0, # Shear ±5° perspective=0.0001, # Slight perspective warp flipud=0.1, # Vertical flip (road can be upside-down in data) fliplr=0.5, # Horizontal flip mosaic=1.0, # Full mosaic — critical for small datasets mixup=0.15, # Mix images to reduce overfitting copy_paste=0.1, # Copy-paste augmentation erasing=0.2, # Random erasing (dropout-like) close_mosaic=20, # Disable mosaic last 20 epochs for fine-tuning # ── Performance ── device=0, workers=4, # 12 threads but limited RAM cache="disk", # Don't eat RAM (only 15 GB) amp=True, # Mixed precision — saves VRAM # ── Saving ── project="road_anomaly", name=run_name, exist_ok=True, save=True, save_period=25, # Checkpoint every 25 epochs val=True, plots=True, # ── Advanced ── cos_lr=True, # Cosine learning rate schedule nbs=64, # Nominal batch size for LR scaling ) except Exception as e: logger.error("Training failed: %s", e) import traceback traceback.print_exc() sys.exit(1) # ══════════════════════════════════════════════════════════════════ # Post-training # ══════════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════════ # Post-training — copy weights & validate # ══════════════════════════════════════════════════════════════════ # Get save_dir from the trainer (NOT from results — results is metrics) save_dir = Path(model.trainer.save_dir) logger.info("Training save dir: %s", save_dir) best_src = save_dir / "weights" / "best.pt" last_src = save_dir / "weights" / "last.pt" # Fallback: search if the expected path doesn't exist if not best_src.exists(): logger.warning("best.pt not at expected path: %s", best_src) logger.info("Searching for best.pt...") for search_root in [Path("road_anomaly"), Path("runs"), script_dir]: if not search_root.exists(): continue candidates = sorted(search_root.rglob("best.pt"), key=lambda p: p.stat().st_mtime, reverse=True) if candidates: best_src = candidates[0] last_src = best_src.parent / "last.pt" logger.info("Found best.pt at: %s", best_src) break if not best_src.exists(): logger.error("FATAL: best.pt not found anywhere after training!") logger.error("Check these directories manually:") logger.error(" %s", save_dir) for p in Path(".").rglob("best.pt"): logger.error(" Found: %s", p) sys.exit(1) # Copy to standard locations dest_dir = script_dir / "runs" dest_dir.mkdir(parents=True, exist_ok=True) dest_best = dest_dir / "best.pt" shutil.copy2(best_src, dest_best) logger.info("Best model copied to: %s", dest_best) # Also copy to project root for convenience shutil.copy2(best_src, script_dir / "best.pt") logger.info("Best model copied to: %s", script_dir / "best.pt") if last_src.exists(): shutil.copy2(last_src, dest_dir / "last.pt") logger.info("Last model copied to: %s", dest_dir / "last.pt") # ── Final validation ── print() print("=" * 60) print(" FINAL VALIDATION") print("=" * 60) try: best_model = YOLO(str(dest_best)) metrics = best_model.val(data=str(data_yaml), imgsz=640, device=0) p = metrics.box.mp r = metrics.box.mr f1 = 2 * p * r / (p + r) if (p + r) > 0 else 0.0 print(f" mAP@0.5: {metrics.box.map50*100:.1f}%") print(f" mAP@0.5:0.95: {metrics.box.map*100:.1f}%") print(f" Precision: {p*100:.1f}%") print(f" Recall: {r*100:.1f}%") print(f" F1-score: {f1*100:.1f}%") print(f" Inference: {metrics.speed['inference']:.1f} ms/image") print() # Per-class print(" Per-class mAP@0.5:") for i, ap in enumerate(metrics.box.ap50): print(f" {best_model.names[i]:>20s}: {ap*100:.1f}%") print("=" * 60) except Exception as e: logger.error("Validation failed: %s", e) print(" Validation failed but model was saved successfully.") print(f" Model at: {dest_best}") print() print(f" Finished: {datetime.now():%Y-%m-%d %H:%M:%S}") print(f" Model saved to: {dest_best}") print(" Run 'python evaluate.py' to re-check anytime.") print() if __name__ == "__main__": main()