File size: 6,931 Bytes
a70eb3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#!/usr/bin/env python3
"""
Final Training on H100 - 96GB VRAM Beast!
Merges ALL datasets and trains with maximum performance
"""

from roboflow import Roboflow
from ultralytics import YOLO
import torch
import os
import shutil
import yaml
import glob
from pathlib import Path

print("=" * 70)
print("FINAL TRAINING ON H100 - BALANCED DATASET")
print("=" * 70)

# Check GPU
print(f"\nGPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.0f} GB")

# Step 1: Download all datasets from Roboflow
print("\n" + "=" * 70)
print("STEP 1: Downloading Datasets from Roboflow")
print("=" * 70)

rf = Roboflow(api_key="cMpZOr1EizWFVrJ0Au4o")

# Dataset 1: New 212 helmet images
print("\nDataset 1: New helmet images (212)...")
project1 = rf.workspace("team11s-workspace-man05").project("helmet-detection-ihomd")
ds1 = project1.version(1).download("yolov8", location="~/helmet_212")

# Dataset 2: Old no-helmet (499) from first account
print("\nDataset 2: No-helmet images (499)...")
rf2 = Roboflow(api_key="qeQs9chVa3kU0XnpTZsd")
project2 = rf2.workspace("nyc-nleyq").project("indian-cctv-traffic-violations")
ds2 = project2.version(1).download("yolov8", location="~/no_helmet_499")

# Dataset 3: With-helmet (300) from second account
print("\nDataset 3: With-helmet images (300)...")
project3 = rf2.workspace("vivekvarikuti").project("withhelmet")
ds3 = project3.version(1).download("yolov8", location="~/with_helmet_300")

# Dataset 4: Triple-riding from original (626)
print("\nDataset 4: Triple-riding (626)...")
project4 = rf2.workspace("triple-ride-rsysj").project("triple-riding-detection-pniom")
ds4 = project4.version(1).download("yolov8", location="~/triple_riding_626")

print("\n✅ All datasets downloaded!")

# Step 2: Merge all datasets
print("\n" + "=" * 70)
print("STEP 2: Merging ALL Datasets")
print("=" * 70)

MERGED_DIR = os.path.expanduser("~/final_merged_h100")

for split in ['train', 'valid', 'test']:
    os.makedirs(f"{MERGED_DIR}/{split}/images", exist_ok=True)
    os.makedirs(f"{MERGED_DIR}/{split}/labels", exist_ok=True)

# Collect all classes
all_classes = set()
datasets = [
    (ds1.location, 'helmet212'),
    (ds2.location, 'nohelmet499'),
    (ds3.location, 'withhelmet300'),
    (ds4.location, 'triple626')
]

class_configs = {}
for ds_path, ds_name in datasets:
    yaml_path = f"{ds_path}/data.yaml"
    if os.path.exists(yaml_path):
        with open(yaml_path, 'r') as f:
            cfg = yaml.safe_load(f)
            class_configs[ds_name] = cfg
            if 'names' in cfg:
                all_classes.update(cfg['names'])

unified_classes = sorted(list(all_classes))
print(f"\nUnified classes ({len(unified_classes)}): {unified_classes}")

# Create class mappings
class_maps = {}
for ds_name, cfg in class_configs.items():
    class_maps[ds_name] = {}
    if 'names' in cfg:
        for i, cls in enumerate(cfg['names']):
            class_maps[ds_name][i] = unified_classes.index(cls)

# Copy and merge datasets
def copy_with_remap(src_dir, prefix, class_mapping):
    total = 0
    for split in ['train', 'valid', 'test']:
        src_img = f"{src_dir}/{split}/images"
        src_lbl = f"{src_dir}/{split}/labels"

        if not os.path.exists(src_img):
            continue

        imgs = glob.glob(f"{src_img}/*.jpg") + glob.glob(f"{src_img}/*.png")

        for img_path in imgs:
            img_name = os.path.basename(img_path)
            lbl_name = Path(img_path).stem + '.txt'
            lbl_path = f"{src_lbl}/{lbl_name}"

            # Copy image with prefix
            dst_img = f"{MERGED_DIR}/{split}/images/{prefix}_{img_name}"
            shutil.copy2(img_path, dst_img)

            # Remap and copy label
            if os.path.exists(lbl_path):
                with open(lbl_path, 'r') as f:
                    lines = f.readlines()

                remapped = []
                for line in lines:
                    parts = line.strip().split()
                    if len(parts) >= 5:
                        old_cls = int(parts[0])
                        new_cls = class_mapping.get(old_cls, old_cls)
                        remapped.append(f"{new_cls} {' '.join(parts[1:])}\n")

                if remapped:
                    dst_lbl = f"{MERGED_DIR}/{split}/labels/{prefix}_{lbl_name}"
                    with open(dst_lbl, 'w') as f:
                        f.writelines(remapped)
                    total += 1

    return total

print("\nCopying datasets...")
for (ds_path, ds_name), prefix in zip(datasets, ['h212', 'nh499', 'wh300', 'tr626']):
    count = copy_with_remap(ds_path, prefix, class_maps.get(ds_name, {}))
    print(f"  {ds_name}: {count} images")

# Count final
print("\nFinal merged dataset:")
for split in ['train', 'valid', 'test']:
    imgs = glob.glob(f"{MERGED_DIR}/{split}/images/*")
    print(f"  {split}: {len(imgs)} images")

# Create YAML
merged_yaml = {
    'path': MERGED_DIR,
    'train': 'train/images',
    'val': 'valid/images',
    'test': 'test/images',
    'nc': len(unified_classes),
    'names': unified_classes
}

yaml_path = f"{MERGED_DIR}/data.yaml"
with open(yaml_path, 'w') as f:
    yaml.dump(merged_yaml, f, default_flow_style=False)

print(f"\nConfig saved: {yaml_path}")

# Step 3: Train on H100 with OPTIMIZED settings
print("\n" + "=" * 70)
print("STEP 3: TRAINING ON H100 (96GB VRAM!)")
print("=" * 70)

model = YOLO('yolo26m.pt')

print(f"\nTraining config:")
print(f"  Model: YOLO26m")
print(f"  Epochs: 150 (faster with H100)")
print(f"  Batch: -1 (auto - H100 can handle 64-128!)")
print(f"  Image size: 640")
print(f"  Classes: {len(unified_classes)}")

print("\nStarting training...")

results = model.train(
    data=yaml_path,
    epochs=150,  # Fewer epochs needed with large batch on H100
    imgsz=640,
    batch=-1,  # Auto batch (H100 will use 64-128!)
    cache='ram',  # H100 has tons of RAM
    device=0,
    workers=8,
    patience=30,
    name='h100_final',
    project='outputs',

    # Augmentation
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    degrees=10,
    translate=0.1,
    scale=0.5,
    fliplr=0.5,
    mosaic=1.0,
    mixup=0.1,

    lr0=0.01,
    lrf=0.01,
    amp=True,
    val=True,
    plots=True,
)

print("\n" + "=" * 70)
print("TRAINING COMPLETE!")
print("=" * 70)

# Validate
metrics = model.val()
print(f"\nFinal Metrics:")
print(f"  mAP50:     {metrics.box.map50:.4f} ({metrics.box.map50*100:.1f}%)")
print(f"  mAP50-95:  {metrics.box.map:.4f} ({metrics.box.map*100:.1f}%)")
print(f"  Precision: {metrics.box.mp:.4f} ({metrics.box.mp*100:.1f}%)")
print(f"  Recall:    {metrics.box.mr:.4f} ({metrics.box.mr*100:.1f}%)")

# Export
print("\nExporting to ONNX...")
model.export(format='onnx', dynamic=True, simplify=True)

print("\n" + "=" * 70)
print("Model saved: outputs/h100_final/weights/best.pt")
print("=" * 70)