#!/usr/bin/env python3 """ Final Training on H100 - 96GB VRAM Beast! Merges ALL datasets and trains with maximum performance """ from roboflow import Roboflow from ultralytics import YOLO import torch import os import shutil import yaml import glob from pathlib import Path print("=" * 70) print("FINAL TRAINING ON H100 - BALANCED DATASET") print("=" * 70) # Check GPU print(f"\nGPU Available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"GPU: {torch.cuda.get_device_name(0)}") print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.0f} GB") # Step 1: Download all datasets from Roboflow print("\n" + "=" * 70) print("STEP 1: Downloading Datasets from Roboflow") print("=" * 70) rf = Roboflow(api_key="cMpZOr1EizWFVrJ0Au4o") # Dataset 1: New 212 helmet images print("\nDataset 1: New helmet images (212)...") project1 = rf.workspace("team11s-workspace-man05").project("helmet-detection-ihomd") ds1 = project1.version(1).download("yolov8", location="~/helmet_212") # Dataset 2: Old no-helmet (499) from first account print("\nDataset 2: No-helmet images (499)...") rf2 = Roboflow(api_key="qeQs9chVa3kU0XnpTZsd") project2 = rf2.workspace("nyc-nleyq").project("indian-cctv-traffic-violations") ds2 = project2.version(1).download("yolov8", location="~/no_helmet_499") # Dataset 3: With-helmet (300) from second account print("\nDataset 3: With-helmet images (300)...") project3 = rf2.workspace("vivekvarikuti").project("withhelmet") ds3 = project3.version(1).download("yolov8", location="~/with_helmet_300") # Dataset 4: Triple-riding from original (626) print("\nDataset 4: Triple-riding (626)...") project4 = rf2.workspace("triple-ride-rsysj").project("triple-riding-detection-pniom") ds4 = project4.version(1).download("yolov8", location="~/triple_riding_626") print("\n✅ All datasets downloaded!") # Step 2: Merge all datasets print("\n" + "=" * 70) print("STEP 2: Merging ALL Datasets") print("=" * 70) MERGED_DIR = os.path.expanduser("~/final_merged_h100") for split in ['train', 'valid', 'test']: os.makedirs(f"{MERGED_DIR}/{split}/images", exist_ok=True) os.makedirs(f"{MERGED_DIR}/{split}/labels", exist_ok=True) # Collect all classes all_classes = set() datasets = [ (ds1.location, 'helmet212'), (ds2.location, 'nohelmet499'), (ds3.location, 'withhelmet300'), (ds4.location, 'triple626') ] class_configs = {} for ds_path, ds_name in datasets: yaml_path = f"{ds_path}/data.yaml" if os.path.exists(yaml_path): with open(yaml_path, 'r') as f: cfg = yaml.safe_load(f) class_configs[ds_name] = cfg if 'names' in cfg: all_classes.update(cfg['names']) unified_classes = sorted(list(all_classes)) print(f"\nUnified classes ({len(unified_classes)}): {unified_classes}") # Create class mappings class_maps = {} for ds_name, cfg in class_configs.items(): class_maps[ds_name] = {} if 'names' in cfg: for i, cls in enumerate(cfg['names']): class_maps[ds_name][i] = unified_classes.index(cls) # Copy and merge datasets def copy_with_remap(src_dir, prefix, class_mapping): total = 0 for split in ['train', 'valid', 'test']: src_img = f"{src_dir}/{split}/images" src_lbl = f"{src_dir}/{split}/labels" if not os.path.exists(src_img): continue imgs = glob.glob(f"{src_img}/*.jpg") + glob.glob(f"{src_img}/*.png") for img_path in imgs: img_name = os.path.basename(img_path) lbl_name = Path(img_path).stem + '.txt' lbl_path = f"{src_lbl}/{lbl_name}" # Copy image with prefix dst_img = f"{MERGED_DIR}/{split}/images/{prefix}_{img_name}" shutil.copy2(img_path, dst_img) # Remap and copy label if os.path.exists(lbl_path): with open(lbl_path, 'r') as f: lines = f.readlines() remapped = [] for line in lines: parts = line.strip().split() if len(parts) >= 5: old_cls = int(parts[0]) new_cls = class_mapping.get(old_cls, old_cls) remapped.append(f"{new_cls} {' '.join(parts[1:])}\n") if remapped: dst_lbl = f"{MERGED_DIR}/{split}/labels/{prefix}_{lbl_name}" with open(dst_lbl, 'w') as f: f.writelines(remapped) total += 1 return total print("\nCopying datasets...") for (ds_path, ds_name), prefix in zip(datasets, ['h212', 'nh499', 'wh300', 'tr626']): count = copy_with_remap(ds_path, prefix, class_maps.get(ds_name, {})) print(f" {ds_name}: {count} images") # Count final print("\nFinal merged dataset:") for split in ['train', 'valid', 'test']: imgs = glob.glob(f"{MERGED_DIR}/{split}/images/*") print(f" {split}: {len(imgs)} images") # Create YAML merged_yaml = { 'path': MERGED_DIR, 'train': 'train/images', 'val': 'valid/images', 'test': 'test/images', 'nc': len(unified_classes), 'names': unified_classes } yaml_path = f"{MERGED_DIR}/data.yaml" with open(yaml_path, 'w') as f: yaml.dump(merged_yaml, f, default_flow_style=False) print(f"\nConfig saved: {yaml_path}") # Step 3: Train on H100 with OPTIMIZED settings print("\n" + "=" * 70) print("STEP 3: TRAINING ON H100 (96GB VRAM!)") print("=" * 70) model = YOLO('yolo26m.pt') print(f"\nTraining config:") print(f" Model: YOLO26m") print(f" Epochs: 150 (faster with H100)") print(f" Batch: -1 (auto - H100 can handle 64-128!)") print(f" Image size: 640") print(f" Classes: {len(unified_classes)}") print("\nStarting training...") results = model.train( data=yaml_path, epochs=150, # Fewer epochs needed with large batch on H100 imgsz=640, batch=-1, # Auto batch (H100 will use 64-128!) cache='ram', # H100 has tons of RAM device=0, workers=8, patience=30, name='h100_final', project='outputs', # Augmentation hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=10, translate=0.1, scale=0.5, fliplr=0.5, mosaic=1.0, mixup=0.1, lr0=0.01, lrf=0.01, amp=True, val=True, plots=True, ) print("\n" + "=" * 70) print("TRAINING COMPLETE!") print("=" * 70) # Validate metrics = model.val() print(f"\nFinal Metrics:") print(f" mAP50: {metrics.box.map50:.4f} ({metrics.box.map50*100:.1f}%)") print(f" mAP50-95: {metrics.box.map:.4f} ({metrics.box.map*100:.1f}%)") print(f" Precision: {metrics.box.mp:.4f} ({metrics.box.mp*100:.1f}%)") print(f" Recall: {metrics.box.mr:.4f} ({metrics.box.mr*100:.1f}%)") # Export print("\nExporting to ONNX...") model.export(format='onnx', dynamic=True, simplify=True) print("\n" + "=" * 70) print("Model saved: outputs/h100_final/weights/best.pt") print("=" * 70)