Spaces:
Runtime error
Runtime error
| # ------------------------------------------------------------------------ | |
| # HOTR official code : engine/trainer.py | |
| # Copyright (c) Kakao Brain, Inc. and its affiliates. All Rights Reserved | |
| # ------------------------------------------------------------------------ | |
| # Modified from DETR (https://github.com/facebookresearch/detr) | |
| # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved | |
| # ------------------------------------------------------------------------ | |
| import math | |
| import torch | |
| import sys | |
| import hotr.util.misc as utils | |
| import hotr.util.logger as loggers | |
| from hotr.util.ramp import * | |
| from typing import Iterable | |
| import wandb | |
| def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module, | |
| data_loader: Iterable, optimizer: torch.optim.Optimizer, | |
| device: torch.device, epoch: int, max_epoch: int, ramp_up_epoch: int,rampdown_epoch: int,max_consis_coef: float=1.0,max_norm: float = 0,dataset_file: str = 'coco', log: bool = False): | |
| model.train() | |
| criterion.train() | |
| metric_logger = loggers.MetricLogger(mode="train", delimiter=" ") | |
| metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) | |
| space_fmt = str(len(str(max_epoch))) | |
| header = 'Epoch [{start_epoch: >{fill}}/{end_epoch}]'.format(start_epoch=epoch+1, end_epoch=max_epoch, fill=space_fmt) | |
| print_freq = int(len(data_loader)/5) | |
| if epoch<=rampdown_epoch: | |
| consis_coef=sigmoid_rampup(epoch,ramp_up_epoch,max_consis_coef) | |
| else: | |
| consis_coef=cosine_rampdown(epoch-rampdown_epoch,max_epoch-rampdown_epoch,max_consis_coef) | |
| print(f"\n>>> Epoch #{(epoch+1)}") | |
| for samples, targets in metric_logger.log_every(data_loader, print_freq, header): | |
| samples = samples.to(device) | |
| targets = [{k: v.to(device) for k, v in t.items()} for t in targets] | |
| outputs = model(samples) | |
| loss_dict = criterion(outputs, targets, log) | |
| #print(loss_dict) | |
| weight_dict = criterion.weight_dict | |
| losses = sum(loss_dict[k] * weight_dict[k]*consis_coef if 'consistency' in k else loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) | |
| # reduce losses over all GPUs for logging purposes | |
| loss_dict_reduced = utils.reduce_dict(loss_dict) | |
| loss_dict_reduced_unscaled = {f'{k}_unscaled': v | |
| for k, v in loss_dict_reduced.items()} | |
| loss_dict_reduced_scaled = {k: v * weight_dict[k]*consis_coef if 'consistency' in k else v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict} | |
| losses_reduced_scaled = sum(loss_dict_reduced_scaled.values()) | |
| loss_value = losses_reduced_scaled.item() | |
| if not math.isfinite(loss_value): | |
| print("Loss is {}, stopping training".format(loss_value)) | |
| print(loss_dict_reduced) | |
| sys.exit(1) | |
| optimizer.zero_grad() | |
| losses.backward() | |
| if max_norm > 0: | |
| torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) | |
| optimizer.step() | |
| metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled) | |
| if "obj_class_error" in loss_dict: | |
| metric_logger.update(obj_class_error=loss_dict_reduced['obj_class_error']) | |
| metric_logger.update(lr=optimizer.param_groups[0]["lr"]) | |
| # gather the stats from all processes | |
| metric_logger.synchronize_between_processes() | |
| if utils.get_rank() == 0 and log: wandb.log(loss_dict_reduced_scaled) | |
| print("Averaged stats:", metric_logger) | |
| return {k: meter.global_avg for k, meter in metric_logger.meters.items()} | |