"""Train EfficientNet-B0 head helmet classifier. Design choices: - EfficientNet-B0 pretrained on ImageNet — ~5M params, fast, strong features. - Input 224x224; aspect-preserving resize + pad so aspect isn't distorted. - Runtime augmentations: flips, rotation, color jitter, gaussian blur, perspective warp. This is critical for generalization on head crops which vary enormously in angle/lighting. - AdamW + cosine LR + warmup. - BCE with class weights if imbalanced. - Mixed precision (fp16) on H100. - Early stop patience=3 on val F1 (not accuracy — F1 is better for binary classification with potential class imbalance). - ImageFolder dataset — train and val are already separated into per-class dirs. """ from __future__ import annotations import os, random from pathlib import Path import torch import torch.nn as nn import torchvision.transforms as T import torchvision.transforms.functional as TF from torchvision.datasets import ImageFolder from torch.utils.data import DataLoader from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights DATA = Path('/home/azureuser/helmet_v5/data/head_helmet/imgs') OUT = Path('/home/azureuser/helmet_v5/models/helmet_head_v2.pt') OUT.parent.mkdir(parents=True, exist_ok=True) RUN = Path('/home/azureuser/helmet_v5/runs/head_helmet_v2') RUN.mkdir(parents=True, exist_ok=True) class PadResize: """Resize preserving aspect ratio with zero-padding to 224x224.""" def __init__(self, size=224): self.size = size def __call__(self, img): w, h = img.size scale = self.size / max(w, h) nw, nh = int(w*scale), int(h*scale) img = img.resize((nw, nh)) # Pad to 224x224 pad_l = (self.size - nw) // 2; pad_t = (self.size - nh) // 2 pad_r = self.size - nw - pad_l; pad_b = self.size - nh - pad_t img = TF.pad(img, (pad_l, pad_t, pad_r, pad_b), fill=0) return img NORM = T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) train_tf = T.Compose([ PadResize(224), T.RandomHorizontalFlip(0.5), T.RandomRotation(15, fill=0), T.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2), T.RandomApply([T.GaussianBlur(3)], 0.3), T.RandomPerspective(0.1, 0.3), T.ToTensor(), NORM, ]) val_tf = T.Compose([PadResize(224), T.ToTensor(), NORM]) def f1_score(tp, fp, fn): p = tp / max(tp + fp, 1) r = tp / max(tp + fn, 1) if p + r == 0: return 0.0, 0.0, 0.0 return 2*p*r/(p+r), p, r def main(): torch.manual_seed(42); random.seed(42) device = 'cuda' train_ds = ImageFolder(str(DATA / 'train'), transform=train_tf) val_ds = ImageFolder(str(DATA / 'val'), transform=val_tf) print(f'[data] train={len(train_ds)} val={len(val_ds)} classes={train_ds.classes}') # Compute class weights for imbalance class_counts = [0, 0] for _, c in train_ds.samples: class_counts[c] += 1 total = sum(class_counts) class_weights = torch.tensor([total/(2*c) if c else 1.0 for c in class_counts]).to(device) print(f'[data] class_counts={class_counts} weights={class_weights.tolist()}') bs = 64 train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=6, pin_memory=True, drop_last=True) val_dl = DataLoader(val_ds, batch_size=bs, shuffle=False, num_workers=4, pin_memory=True) model = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT) # Replace classifier head: 1280 -> 2 model.classifier[1] = nn.Linear(1280, 2) model = model.to(device) crit = nn.CrossEntropyLoss(weight=class_weights) opt = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0.01) epochs = 20 warmup = 2 sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=epochs-warmup) scaler = torch.amp.GradScaler('cuda') best_f1 = 0.0; patience = 3; stale = 0 log = [] for ep in range(1, epochs+1): model.train() losses = [] for i, (x, y) in enumerate(train_dl): x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True) opt.zero_grad() with torch.amp.autocast('cuda'): logits = model(x) loss = crit(logits, y) scaler.scale(loss).backward() scaler.step(opt); scaler.update() losses.append(loss.item()) if i % 50 == 0: print(f' ep {ep} it {i}/{len(train_dl)} loss={loss.item():.3f}') if ep > warmup: sched.step() # Val model.eval() tp = fp = fn = tn = 0 with torch.no_grad(): for x, y in val_dl: x, y = x.to(device), y.to(device) with torch.amp.autocast('cuda'): pred = model(x).argmax(1) # class 1 = no_helmet (positive for our purpose) tp += int(((pred==1) & (y==1)).sum().item()) fp += int(((pred==1) & (y==0)).sum().item()) fn += int(((pred==0) & (y==1)).sum().item()) tn += int(((pred==0) & (y==0)).sum().item()) acc = (tp+tn) / max(tp+fp+fn+tn, 1) f1, P, R = f1_score(tp, fp, fn) log.append({'ep': ep, 'loss': sum(losses)/len(losses), 'val_acc': acc, 'val_f1': f1, 'val_P': P, 'val_R': R, 'tp': tp, 'fp': fp, 'fn': fn, 'tn': tn}) print(f'[ep {ep}] loss={sum(losses)/len(losses):.3f} val_acc={acc:.3f} ' f'F1={f1:.3f} P={P:.3f} R={R:.3f} (tp={tp} fp={fp} fn={fn} tn={tn})') if f1 > best_f1: best_f1 = f1; stale = 0 torch.save(model.state_dict(), str(OUT)) print(f' ★ new best, saved to {OUT}') else: stale += 1 if stale >= patience: print(f'[early-stop] no F1 improvement for {patience} epochs'); break import json (RUN / 'log.json').write_text(json.dumps(log, indent=2)) print(f'\n[done] best val F1 = {best_f1:.3f}, saved to {OUT}') if __name__ == '__main__': main()