| |
| """ |
| Final Training on H100 - 96GB VRAM Beast! |
| Merges ALL datasets and trains with maximum performance |
| """ |
|
|
| from roboflow import Roboflow |
| from ultralytics import YOLO |
| import torch |
| import os |
| import shutil |
| import yaml |
| import glob |
| from pathlib import Path |
|
|
| print("=" * 70) |
| print("FINAL TRAINING ON H100 - BALANCED DATASET") |
| print("=" * 70) |
|
|
| |
| print(f"\nGPU Available: {torch.cuda.is_available()}") |
| if torch.cuda.is_available(): |
| print(f"GPU: {torch.cuda.get_device_name(0)}") |
| print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.0f} GB") |
|
|
| |
| print("\n" + "=" * 70) |
| print("STEP 1: Downloading Datasets from Roboflow") |
| print("=" * 70) |
|
|
| rf = Roboflow(api_key="cMpZOr1EizWFVrJ0Au4o") |
|
|
| |
| print("\nDataset 1: New helmet images (212)...") |
| project1 = rf.workspace("team11s-workspace-man05").project("helmet-detection-ihomd") |
| ds1 = project1.version(1).download("yolov8", location="~/helmet_212") |
|
|
| |
| print("\nDataset 2: No-helmet images (499)...") |
| rf2 = Roboflow(api_key="qeQs9chVa3kU0XnpTZsd") |
| project2 = rf2.workspace("nyc-nleyq").project("indian-cctv-traffic-violations") |
| ds2 = project2.version(1).download("yolov8", location="~/no_helmet_499") |
|
|
| |
| print("\nDataset 3: With-helmet images (300)...") |
| project3 = rf2.workspace("vivekvarikuti").project("withhelmet") |
| ds3 = project3.version(1).download("yolov8", location="~/with_helmet_300") |
|
|
| |
| print("\nDataset 4: Triple-riding (626)...") |
| project4 = rf2.workspace("triple-ride-rsysj").project("triple-riding-detection-pniom") |
| ds4 = project4.version(1).download("yolov8", location="~/triple_riding_626") |
|
|
| print("\n✅ All datasets downloaded!") |
|
|
| |
| print("\n" + "=" * 70) |
| print("STEP 2: Merging ALL Datasets") |
| print("=" * 70) |
|
|
| MERGED_DIR = os.path.expanduser("~/final_merged_h100") |
|
|
| for split in ['train', 'valid', 'test']: |
| os.makedirs(f"{MERGED_DIR}/{split}/images", exist_ok=True) |
| os.makedirs(f"{MERGED_DIR}/{split}/labels", exist_ok=True) |
|
|
| |
| all_classes = set() |
| datasets = [ |
| (ds1.location, 'helmet212'), |
| (ds2.location, 'nohelmet499'), |
| (ds3.location, 'withhelmet300'), |
| (ds4.location, 'triple626') |
| ] |
|
|
| class_configs = {} |
| for ds_path, ds_name in datasets: |
| yaml_path = f"{ds_path}/data.yaml" |
| if os.path.exists(yaml_path): |
| with open(yaml_path, 'r') as f: |
| cfg = yaml.safe_load(f) |
| class_configs[ds_name] = cfg |
| if 'names' in cfg: |
| all_classes.update(cfg['names']) |
|
|
| unified_classes = sorted(list(all_classes)) |
| print(f"\nUnified classes ({len(unified_classes)}): {unified_classes}") |
|
|
| |
| class_maps = {} |
| for ds_name, cfg in class_configs.items(): |
| class_maps[ds_name] = {} |
| if 'names' in cfg: |
| for i, cls in enumerate(cfg['names']): |
| class_maps[ds_name][i] = unified_classes.index(cls) |
|
|
| |
| def copy_with_remap(src_dir, prefix, class_mapping): |
| total = 0 |
| for split in ['train', 'valid', 'test']: |
| src_img = f"{src_dir}/{split}/images" |
| src_lbl = f"{src_dir}/{split}/labels" |
|
|
| if not os.path.exists(src_img): |
| continue |
|
|
| imgs = glob.glob(f"{src_img}/*.jpg") + glob.glob(f"{src_img}/*.png") |
|
|
| for img_path in imgs: |
| img_name = os.path.basename(img_path) |
| lbl_name = Path(img_path).stem + '.txt' |
| lbl_path = f"{src_lbl}/{lbl_name}" |
|
|
| |
| dst_img = f"{MERGED_DIR}/{split}/images/{prefix}_{img_name}" |
| shutil.copy2(img_path, dst_img) |
|
|
| |
| if os.path.exists(lbl_path): |
| with open(lbl_path, 'r') as f: |
| lines = f.readlines() |
|
|
| remapped = [] |
| for line in lines: |
| parts = line.strip().split() |
| if len(parts) >= 5: |
| old_cls = int(parts[0]) |
| new_cls = class_mapping.get(old_cls, old_cls) |
| remapped.append(f"{new_cls} {' '.join(parts[1:])}\n") |
|
|
| if remapped: |
| dst_lbl = f"{MERGED_DIR}/{split}/labels/{prefix}_{lbl_name}" |
| with open(dst_lbl, 'w') as f: |
| f.writelines(remapped) |
| total += 1 |
|
|
| return total |
|
|
| print("\nCopying datasets...") |
| for (ds_path, ds_name), prefix in zip(datasets, ['h212', 'nh499', 'wh300', 'tr626']): |
| count = copy_with_remap(ds_path, prefix, class_maps.get(ds_name, {})) |
| print(f" {ds_name}: {count} images") |
|
|
| |
| print("\nFinal merged dataset:") |
| for split in ['train', 'valid', 'test']: |
| imgs = glob.glob(f"{MERGED_DIR}/{split}/images/*") |
| print(f" {split}: {len(imgs)} images") |
|
|
| |
| merged_yaml = { |
| 'path': MERGED_DIR, |
| 'train': 'train/images', |
| 'val': 'valid/images', |
| 'test': 'test/images', |
| 'nc': len(unified_classes), |
| 'names': unified_classes |
| } |
|
|
| yaml_path = f"{MERGED_DIR}/data.yaml" |
| with open(yaml_path, 'w') as f: |
| yaml.dump(merged_yaml, f, default_flow_style=False) |
|
|
| print(f"\nConfig saved: {yaml_path}") |
|
|
| |
| print("\n" + "=" * 70) |
| print("STEP 3: TRAINING ON H100 (96GB VRAM!)") |
| print("=" * 70) |
|
|
| model = YOLO('yolo26m.pt') |
|
|
| print(f"\nTraining config:") |
| print(f" Model: YOLO26m") |
| print(f" Epochs: 150 (faster with H100)") |
| print(f" Batch: -1 (auto - H100 can handle 64-128!)") |
| print(f" Image size: 640") |
| print(f" Classes: {len(unified_classes)}") |
|
|
| print("\nStarting training...") |
|
|
| results = model.train( |
| data=yaml_path, |
| epochs=150, |
| imgsz=640, |
| batch=-1, |
| cache='ram', |
| device=0, |
| workers=8, |
| patience=30, |
| name='h100_final', |
| project='outputs', |
|
|
| |
| hsv_h=0.015, |
| hsv_s=0.7, |
| hsv_v=0.4, |
| degrees=10, |
| translate=0.1, |
| scale=0.5, |
| fliplr=0.5, |
| mosaic=1.0, |
| mixup=0.1, |
|
|
| lr0=0.01, |
| lrf=0.01, |
| amp=True, |
| val=True, |
| plots=True, |
| ) |
|
|
| print("\n" + "=" * 70) |
| print("TRAINING COMPLETE!") |
| print("=" * 70) |
|
|
| |
| metrics = model.val() |
| print(f"\nFinal Metrics:") |
| print(f" mAP50: {metrics.box.map50:.4f} ({metrics.box.map50*100:.1f}%)") |
| print(f" mAP50-95: {metrics.box.map:.4f} ({metrics.box.map*100:.1f}%)") |
| print(f" Precision: {metrics.box.mp:.4f} ({metrics.box.mp*100:.1f}%)") |
| print(f" Recall: {metrics.box.mr:.4f} ({metrics.box.mr*100:.1f}%)") |
|
|
| |
| print("\nExporting to ONNX...") |
| model.export(format='onnx', dynamic=True, simplify=True) |
|
|
| print("\n" + "=" * 70) |
| print("Model saved: outputs/h100_final/weights/best.pt") |
| print("=" * 70) |
|
|