| | |
| | |
| | |
| | |
| |
|
| | try: |
| | import apex |
| | except ImportError: |
| | print("apex is not installed") |
| |
|
| | from mmcv.runner import OptimizerHook, HOOKS |
| |
|
| |
|
| | @HOOKS.register_module() |
| | class DistOptimizerHook(OptimizerHook): |
| | """Optimizer hook for distributed training.""" |
| |
|
| | def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False): |
| | self.grad_clip = grad_clip |
| | self.coalesce = coalesce |
| | self.bucket_size_mb = bucket_size_mb |
| | self.update_interval = update_interval |
| | self.use_fp16 = use_fp16 |
| |
|
| | def before_run(self, runner): |
| | runner.optimizer.zero_grad() |
| |
|
| | def after_train_iter(self, runner): |
| | runner.outputs["loss"] /= self.update_interval |
| | if self.use_fp16: |
| | |
| | with apex.amp.scale_loss(runner.outputs["loss"], runner.optimizer) as scaled_loss: |
| | scaled_loss.backward() |
| | else: |
| | runner.outputs["loss"].backward() |
| | if self.every_n_iters(runner, self.update_interval): |
| | if self.grad_clip is not None: |
| | self.clip_grads(runner.model.parameters()) |
| | runner.optimizer.step() |
| | runner.optimizer.zero_grad() |
| |
|