Spaces:
Sleeping
Sleeping
| # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license | |
| """ | |
| Train a YOLOv5 classifier model on a classification dataset | |
| Usage - Single-GPU training: | |
| $ python classify/train.py --model yolov5s-cls.pt --data imagenette160 --epochs 5 --img 224 | |
| Usage - Multi-GPU DDP training: | |
| $ python -m torch.distributed.run --nproc_per_node 4 --master_port 2022 classify/train.py --model yolov5s-cls.pt --data imagenet --epochs 5 --img 224 --device 0,1,2,3 | |
| Datasets: --data mnist, fashion-mnist, cifar10, cifar100, imagenette, imagewoof, imagenet, or 'path/to/data' | |
| YOLOv5-cls models: --model yolov5n-cls.pt, yolov5s-cls.pt, yolov5m-cls.pt, yolov5l-cls.pt, yolov5x-cls.pt | |
| Torchvision models: --model resnet50, efficientnet_b0, etc. See https://pytorch.org/vision/stable/models.html | |
| """ | |
| import argparse | |
| import os | |
| import subprocess | |
| import sys | |
| import time | |
| from copy import deepcopy | |
| from datetime import datetime | |
| from pathlib import Path | |
| import torch | |
| import torch.distributed as dist | |
| import torch.hub as hub | |
| import torch.optim.lr_scheduler as lr_scheduler | |
| import torchvision | |
| from torch.cuda import amp | |
| from tqdm import tqdm | |
| FILE = Path(__file__).resolve() | |
| ROOT = FILE.parents[1] # YOLOv5 root directory | |
| if str(ROOT) not in sys.path: | |
| sys.path.append(str(ROOT)) # add ROOT to PATH | |
| ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative | |
| from classify import val as validate | |
| from models.experimental import attempt_load | |
| from models.yolo import ClassificationModel, DetectionModel | |
| from utils.dataloaders import create_classification_dataloader | |
| from utils.general import (DATASETS_DIR, LOGGER, TQDM_BAR_FORMAT, WorkingDirectory, check_git_info, check_git_status, | |
| check_requirements, colorstr, download, increment_path, init_seeds, print_args, yaml_save) | |
| from utils.loggers import GenericLogger | |
| from utils.plots import imshow_cls | |
| from utils.torch_utils import (ModelEMA, de_parallel, model_info, reshape_classifier_output, select_device, smart_DDP, | |
| smart_optimizer, smartCrossEntropyLoss, torch_distributed_zero_first) | |
| LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html | |
| RANK = int(os.getenv('RANK', -1)) | |
| WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) | |
| GIT_INFO = check_git_info() | |
| def train(opt, device): | |
| init_seeds(opt.seed + 1 + RANK, deterministic=True) | |
| save_dir, data, bs, epochs, nw, imgsz, pretrained = \ | |
| opt.save_dir, Path(opt.data), opt.batch_size, opt.epochs, min(os.cpu_count() - 1, opt.workers), \ | |
| opt.imgsz, str(opt.pretrained).lower() == 'true' | |
| cuda = device.type != 'cpu' | |
| # Directories | |
| wdir = save_dir / 'weights' | |
| wdir.mkdir(parents=True, exist_ok=True) # make dir | |
| last, best = wdir / 'last.pt', wdir / 'best.pt' | |
| # Save run settings | |
| yaml_save(save_dir / 'opt.yaml', vars(opt)) | |
| # Logger | |
| logger = GenericLogger(opt=opt, console_logger=LOGGER) if RANK in {-1, 0} else None | |
| # Download Dataset | |
| with torch_distributed_zero_first(LOCAL_RANK), WorkingDirectory(ROOT): | |
| data_dir = data if data.is_dir() else (DATASETS_DIR / data) | |
| if not data_dir.is_dir(): | |
| LOGGER.info(f'\nDataset not found ⚠️, missing path {data_dir}, attempting download...') | |
| t = time.time() | |
| if str(data) == 'imagenet': | |
| subprocess.run(['bash', str(ROOT / 'data/scripts/get_imagenet.sh')], shell=True, check=True) | |
| else: | |
| url = f'https://github.com/ultralytics/yolov5/releases/download/v1.0/{data}.zip' | |
| download(url, dir=data_dir.parent) | |
| s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n" | |
| LOGGER.info(s) | |
| # Dataloaders | |
| nc = len([x for x in (data_dir / 'train').glob('*') if x.is_dir()]) # number of classes | |
| trainloader = create_classification_dataloader(path=data_dir / 'train', | |
| imgsz=imgsz, | |
| batch_size=bs // WORLD_SIZE, | |
| augment=True, | |
| cache=opt.cache, | |
| rank=LOCAL_RANK, | |
| workers=nw) | |
| test_dir = data_dir / 'test' if (data_dir / 'test').exists() else data_dir / 'val' # data/test or data/val | |
| if RANK in {-1, 0}: | |
| testloader = create_classification_dataloader(path=test_dir, | |
| imgsz=imgsz, | |
| batch_size=bs // WORLD_SIZE * 2, | |
| augment=False, | |
| cache=opt.cache, | |
| rank=-1, | |
| workers=nw) | |
| # Model | |
| with torch_distributed_zero_first(LOCAL_RANK), WorkingDirectory(ROOT): | |
| if Path(opt.model).is_file() or opt.model.endswith('.pt'): | |
| model = attempt_load(opt.model, device='cpu', fuse=False) | |
| elif opt.model in torchvision.models.__dict__: # TorchVision models i.e. resnet50, efficientnet_b0 | |
| model = torchvision.models.__dict__[opt.model](weights='IMAGENET1K_V1' if pretrained else None) | |
| else: | |
| m = hub.list('ultralytics/yolov5') # + hub.list('pytorch/vision') # models | |
| raise ModuleNotFoundError(f'--model {opt.model} not found. Available models are: \n' + '\n'.join(m)) | |
| if isinstance(model, DetectionModel): | |
| LOGGER.warning("WARNING ⚠️ pass YOLOv5 classifier model with '-cls' suffix, i.e. '--model yolov5s-cls.pt'") | |
| model = ClassificationModel(model=model, nc=nc, cutoff=opt.cutoff or 10) # convert to classification model | |
| reshape_classifier_output(model, nc) # update class count | |
| for m in model.modules(): | |
| if not pretrained and hasattr(m, 'reset_parameters'): | |
| m.reset_parameters() | |
| if isinstance(m, torch.nn.Dropout) and opt.dropout is not None: | |
| m.p = opt.dropout # set dropout | |
| for p in model.parameters(): | |
| p.requires_grad = True # for training | |
| model = model.to(device) | |
| # Info | |
| if RANK in {-1, 0}: | |
| model.names = trainloader.dataset.classes # attach class names | |
| model.transforms = testloader.dataset.torch_transforms # attach inference transforms | |
| model_info(model) | |
| if opt.verbose: | |
| LOGGER.info(model) | |
| images, labels = next(iter(trainloader)) | |
| file = imshow_cls(images[:25], labels[:25], names=model.names, f=save_dir / 'train_images.jpg') | |
| logger.log_images(file, name='Train Examples') | |
| logger.log_graph(model, imgsz) # log model | |
| # Optimizer | |
| optimizer = smart_optimizer(model, opt.optimizer, opt.lr0, momentum=0.9, decay=opt.decay) | |
| # Scheduler | |
| lrf = 0.01 # final lr (fraction of lr0) | |
| # lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - lrf) + lrf # cosine | |
| lf = lambda x: (1 - x / epochs) * (1 - lrf) + lrf # linear | |
| scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) | |
| # scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=lr0, total_steps=epochs, pct_start=0.1, | |
| # final_div_factor=1 / 25 / lrf) | |
| # EMA | |
| ema = ModelEMA(model) if RANK in {-1, 0} else None | |
| # DDP mode | |
| if cuda and RANK != -1: | |
| model = smart_DDP(model) | |
| # Train | |
| t0 = time.time() | |
| criterion = smartCrossEntropyLoss(label_smoothing=opt.label_smoothing) # loss function | |
| best_fitness = 0.0 | |
| scaler = amp.GradScaler(enabled=cuda) | |
| val = test_dir.stem # 'val' or 'test' | |
| LOGGER.info(f'Image sizes {imgsz} train, {imgsz} test\n' | |
| f'Using {nw * WORLD_SIZE} dataloader workers\n' | |
| f"Logging results to {colorstr('bold', save_dir)}\n" | |
| f'Starting {opt.model} training on {data} dataset with {nc} classes for {epochs} epochs...\n\n' | |
| f"{'Epoch':>10}{'GPU_mem':>10}{'train_loss':>12}{f'{val}_loss':>12}{'top1_acc':>12}{'top5_acc':>12}") | |
| for epoch in range(epochs): # loop over the dataset multiple times | |
| tloss, vloss, fitness = 0.0, 0.0, 0.0 # train loss, val loss, fitness | |
| model.train() | |
| if RANK != -1: | |
| trainloader.sampler.set_epoch(epoch) | |
| pbar = enumerate(trainloader) | |
| if RANK in {-1, 0}: | |
| pbar = tqdm(enumerate(trainloader), total=len(trainloader), bar_format=TQDM_BAR_FORMAT) | |
| for i, (images, labels) in pbar: # progress bar | |
| images, labels = images.to(device, non_blocking=True), labels.to(device) | |
| # Forward | |
| with amp.autocast(enabled=cuda): # stability issues when enabled | |
| loss = criterion(model(images), labels) | |
| # Backward | |
| scaler.scale(loss).backward() | |
| # Optimize | |
| scaler.unscale_(optimizer) # unscale gradients | |
| torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients | |
| scaler.step(optimizer) | |
| scaler.update() | |
| optimizer.zero_grad() | |
| if ema: | |
| ema.update(model) | |
| if RANK in {-1, 0}: | |
| tloss = (tloss * i + loss.item()) / (i + 1) # update mean losses | |
| mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) | |
| pbar.desc = f"{f'{epoch + 1}/{epochs}':>10}{mem:>10}{tloss:>12.3g}" + ' ' * 36 | |
| # Test | |
| if i == len(pbar) - 1: # last batch | |
| top1, top5, vloss = validate.run(model=ema.ema, | |
| dataloader=testloader, | |
| criterion=criterion, | |
| pbar=pbar) # test accuracy, loss | |
| fitness = top1 # define fitness as top1 accuracy | |
| # Scheduler | |
| scheduler.step() | |
| # Log metrics | |
| if RANK in {-1, 0}: | |
| # Best fitness | |
| if fitness > best_fitness: | |
| best_fitness = fitness | |
| # Log | |
| metrics = { | |
| 'train/loss': tloss, | |
| f'{val}/loss': vloss, | |
| 'metrics/accuracy_top1': top1, | |
| 'metrics/accuracy_top5': top5, | |
| 'lr/0': optimizer.param_groups[0]['lr']} # learning rate | |
| logger.log_metrics(metrics, epoch) | |
| # Save model | |
| final_epoch = epoch + 1 == epochs | |
| if (not opt.nosave) or final_epoch: | |
| ckpt = { | |
| 'epoch': epoch, | |
| 'best_fitness': best_fitness, | |
| 'model': deepcopy(ema.ema).half(), # deepcopy(de_parallel(model)).half(), | |
| 'ema': None, # deepcopy(ema.ema).half(), | |
| 'updates': ema.updates, | |
| 'optimizer': None, # optimizer.state_dict(), | |
| 'opt': vars(opt), | |
| 'git': GIT_INFO, # {remote, branch, commit} if a git repo | |
| 'date': datetime.now().isoformat()} | |
| # Save last, best and delete | |
| torch.save(ckpt, last) | |
| if best_fitness == fitness: | |
| torch.save(ckpt, best) | |
| del ckpt | |
| # Train complete | |
| if RANK in {-1, 0} and final_epoch: | |
| LOGGER.info(f'\nTraining complete ({(time.time() - t0) / 3600:.3f} hours)' | |
| f"\nResults saved to {colorstr('bold', save_dir)}" | |
| f'\nPredict: python classify/predict.py --weights {best} --source im.jpg' | |
| f'\nValidate: python classify/val.py --weights {best} --data {data_dir}' | |
| f'\nExport: python export.py --weights {best} --include onnx' | |
| f"\nPyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', '{best}')" | |
| f'\nVisualize: https://netron.app\n') | |
| # Plot examples | |
| images, labels = (x[:25] for x in next(iter(testloader))) # first 25 images and labels | |
| pred = torch.max(ema.ema(images.to(device)), 1)[1] | |
| file = imshow_cls(images, labels, pred, de_parallel(model).names, verbose=False, f=save_dir / 'test_images.jpg') | |
| # Log results | |
| meta = {'epochs': epochs, 'top1_acc': best_fitness, 'date': datetime.now().isoformat()} | |
| logger.log_images(file, name='Test Examples (true-predicted)', epoch=epoch) | |
| logger.log_model(best, epochs, metadata=meta) | |
| def parse_opt(known=False): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--model', type=str, default='yolov5s-cls.pt', help='initial weights path') | |
| parser.add_argument('--data', type=str, default='imagenette160', help='cifar10, cifar100, mnist, imagenet, ...') | |
| parser.add_argument('--epochs', type=int, default=10, help='total training epochs') | |
| parser.add_argument('--batch-size', type=int, default=64, help='total batch size for all GPUs') | |
| parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=224, help='train, val image size (pixels)') | |
| parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') | |
| parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"') | |
| parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') | |
| parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') | |
| parser.add_argument('--project', default=ROOT / 'runs/train-cls', help='save to project/name') | |
| parser.add_argument('--name', default='exp', help='save to project/name') | |
| parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') | |
| parser.add_argument('--pretrained', nargs='?', const=True, default=True, help='start from i.e. --pretrained False') | |
| parser.add_argument('--optimizer', choices=['SGD', 'Adam', 'AdamW', 'RMSProp'], default='Adam', help='optimizer') | |
| parser.add_argument('--lr0', type=float, default=0.001, help='initial learning rate') | |
| parser.add_argument('--decay', type=float, default=5e-5, help='weight decay') | |
| parser.add_argument('--label-smoothing', type=float, default=0.1, help='Label smoothing epsilon') | |
| parser.add_argument('--cutoff', type=int, default=None, help='Model layer cutoff index for Classify() head') | |
| parser.add_argument('--dropout', type=float, default=None, help='Dropout (fraction)') | |
| parser.add_argument('--verbose', action='store_true', help='Verbose mode') | |
| parser.add_argument('--seed', type=int, default=0, help='Global training seed') | |
| parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') | |
| return parser.parse_known_args()[0] if known else parser.parse_args() | |
| def main(opt): | |
| # Checks | |
| if RANK in {-1, 0}: | |
| print_args(vars(opt)) | |
| check_git_status() | |
| check_requirements(ROOT / 'requirements.txt') | |
| # DDP mode | |
| device = select_device(opt.device, batch_size=opt.batch_size) | |
| if LOCAL_RANK != -1: | |
| assert opt.batch_size != -1, 'AutoBatch is coming soon for classification, please pass a valid --batch-size' | |
| assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE' | |
| assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' | |
| torch.cuda.set_device(LOCAL_RANK) | |
| device = torch.device('cuda', LOCAL_RANK) | |
| dist.init_process_group(backend='nccl' if dist.is_nccl_available() else 'gloo') | |
| # Parameters | |
| opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run | |
| # Train | |
| train(opt, device) | |
| def run(**kwargs): | |
| # Usage: from yolov5 import classify; classify.train.run(data=mnist, imgsz=320, model='yolov5m') | |
| opt = parse_opt(True) | |
| for k, v in kwargs.items(): | |
| setattr(opt, k, v) | |
| main(opt) | |
| return opt | |
| if __name__ == '__main__': | |
| opt = parse_opt() | |
| main(opt) | |