| |
| |
| |
|
|
| import argparse |
| import random |
| import warnings |
| from loguru import logger |
|
|
| import torch |
| import torch.backends.cudnn as cudnn |
|
|
| from yolox.core import launch |
| from yolox.exp import Exp, check_exp_value, get_exp |
| from yolox.utils import configure_module, configure_nccl, configure_omp, get_num_devices |
|
|
|
|
| def make_parser(): |
| parser = argparse.ArgumentParser("YOLOX train parser") |
| parser.add_argument("-expn", "--experiment-name", type=str, default=None) |
| parser.add_argument("-n", "--name", type=str, default=None, help="model name") |
|
|
| |
| parser.add_argument( |
| "--dist-backend", default="nccl", type=str, help="distributed backend" |
| ) |
| parser.add_argument( |
| "--dist-url", |
| default=None, |
| type=str, |
| help="url used to set up distributed training", |
| ) |
| parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size") |
| parser.add_argument( |
| "-d", "--devices", default=None, type=int, help="device for training" |
| ) |
| parser.add_argument( |
| "-f", |
| "--exp_file", |
| default=None, |
| type=str, |
| help="plz input your experiment description file", |
| ) |
| parser.add_argument( |
| "--resume", default=False, action="store_true", help="resume training" |
| ) |
| parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file") |
| parser.add_argument( |
| "-e", |
| "--start_epoch", |
| default=None, |
| type=int, |
| help="resume training start epoch", |
| ) |
| parser.add_argument( |
| "--num_machines", default=1, type=int, help="num of node for training" |
| ) |
| parser.add_argument( |
| "--machine_rank", default=0, type=int, help="node rank for multi-node training" |
| ) |
| parser.add_argument( |
| "--fp16", |
| dest="fp16", |
| default=False, |
| action="store_true", |
| help="Adopting mix precision training.", |
| ) |
| parser.add_argument( |
| "--cache", |
| type=str, |
| nargs="?", |
| const="ram", |
| help="Caching imgs to ram/disk for fast training.", |
| ) |
| parser.add_argument( |
| "-o", |
| "--occupy", |
| dest="occupy", |
| default=False, |
| action="store_true", |
| help="occupy GPU memory first for training.", |
| ) |
| parser.add_argument( |
| "-l", |
| "--logger", |
| type=str, |
| help="Logger to be used for metrics. \ |
| Implemented loggers include `tensorboard`, `mlflow` and `wandb`.", |
| default="tensorboard" |
| ) |
| parser.add_argument( |
| "opts", |
| help="Modify config options using the command-line", |
| default=None, |
| nargs=argparse.REMAINDER, |
| ) |
| return parser |
|
|
|
|
| @logger.catch |
| def main(exp: Exp, args): |
| if exp.seed is not None: |
| random.seed(exp.seed) |
| torch.manual_seed(exp.seed) |
| cudnn.deterministic = True |
| warnings.warn( |
| "You have chosen to seed training. This will turn on the CUDNN deterministic setting, " |
| "which can slow down your training considerably! You may see unexpected behavior " |
| "when restarting from checkpoints." |
| ) |
|
|
| |
| configure_nccl() |
| configure_omp() |
| cudnn.benchmark = True |
|
|
| trainer = exp.get_trainer(args) |
| trainer.train() |
|
|
|
|
| if __name__ == "__main__": |
| configure_module() |
| args = make_parser().parse_args() |
| exp = get_exp(args.exp_file, args.name) |
| exp.merge(args.opts) |
| check_exp_value(exp) |
|
|
| if not args.experiment_name: |
| args.experiment_name = exp.exp_name |
|
|
| num_gpu = get_num_devices() if args.devices is None else args.devices |
| assert num_gpu <= get_num_devices() |
|
|
| if args.cache is not None: |
| exp.dataset = exp.get_dataset(cache=True, cache_type=args.cache) |
|
|
| dist_url = "auto" if args.dist_url is None else args.dist_url |
| launch( |
| main, |
| num_gpu, |
| args.num_machines, |
| args.machine_rank, |
| backend=args.dist_backend, |
| dist_url=dist_url, |
| args=(exp, args), |
| ) |
|
|