diff --git "a/logs/exp_dc_f32c32_EqM.log" "b/logs/exp_dc_f32c32_EqM.log" new file mode 100644--- /dev/null +++ "b/logs/exp_dc_f32c32_EqM.log" @@ -0,0 +1,4392 @@ +nohup: ignoring input +The following values were not passed to `accelerate launch` and had defaults used instead: + `--num_processes` was set to a value of `8` + More than one GPU was found, enabling multi-GPU training. + If this was unintended please pass in `--num_processes=1`. + `--num_machines` was set to a value of `1` + `--mixed_precision` was set to a value of `'no'` + `--dynamo_backend` was set to a value of `'no'` +To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`. +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Using cached /workspace/DC_SSDAE/runs/cache +Using cached /workspace/DC_SSDAE/runs/cache +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +Using cached /workspace/DC_SSDAE/runs/cache +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Using cached /workspace/DC_SSDAE/runs/cache +Using cached /workspace/DC_SSDAE/runs/cache +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Using cached /workspace/DC_SSDAE/runs/cache +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Using cached /workspace/DC_SSDAE/runs/cache +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Using cached /workspace/DC_SSDAE/runs/cache +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +[2025-10-26 11:19:20,467][main][INFO] - Will write tensorboard logs inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM/tensorboard_logs +[2025-10-26 11:19:20,470][main][INFO] - Runtime at /workspace/DC_SSDAE +[2025-10-26 11:19:20,472][main][INFO] - Running inside /workspace/DC_SSDAE/runs/jobs/train_enc_dc_f32c32_EqM +[2025-10-26 11:19:20,472][main][INFO] - Running args: ['main.py', 'run_name=train_enc_dc_f32c32_EqM', 'dataset.im_size=128', 'dataset.aug_scale=2', 'training.epochs=60', 'dc_ssdae.encoder_train=true'] +[2025-10-26 11:19:20,473][main][INFO] - Command: 'main.py' 'run_name=train_enc_dc_f32c32_EqM' 'dataset.im_size=128' 'dataset.aug_scale=2' 'training.epochs=60' 'dc_ssdae.encoder_train=true' +[2025-10-26 11:19:20,473][main][INFO] - Accelerator with 8 processes, running on cuda:0 +[2025-10-26 11:19:20,478][main][INFO] - Hydra configuration: +seed: 0 +task: train +runtime_path: ${hydra:runtime.cwd} +ckpt_dir: ${runtime_path}/runs +run_name: train_enc_dc_f32c32_EqM +cache_dir: ${ckpt_dir}/cache +run_dir: ${ckpt_dir}/jobs/${run_name} +checkpoint_path: ${run_dir}/checkpoints +dataset: + imagenet_root: imagenet_data + im_size: 128 + batch_size: 192 + aug_scale: 2 + limit: null +distill_teacher: false +dc_ssdae: + compile: false + checkpoint: null + encoder: f32c32 + encoder_checkpoint: null + encoder_train: true + decoder: S + trainer_type: FM + encoder_type: dc + sampler: + steps: 10 + ema: + decay: 0.999 + start_iter: 50000 +aux_losses: + compile: ${dc_ssdae.compile} + repa: + i_extract: 4 + n_layers: 2 + lpips: true +training: + sdpa_kernel: 2 + mixed_precision: bf16 + grad_accumulate: 1 + grad_clip: 0.1 + epochs: 60 + eval_freq: 1 + save_on_best: FID + log_freq: 100 + lr: 0.0003 + weight_decay: 0.001 +losses: + diffusion: 1 + repa: 0.25 + lpips: 0.5 + kl: 1.0e-06 +show_samples: 8 + + + +[2025-10-26 11:19:33,933][main][INFO] - Loaded ImageNet dataset: {'train': Dataset ImageNet + Number of datapoints: 1279867 + Root location: ../../../imagenet_data + Split: train + StandardTransform +Transform: Compose( + RandomResize(min_size=128, max_size=256, interpolation=InterpolationMode.LANCZOS, antialias=True) + RandomCrop(size=(128, 128), pad_if_needed=False, fill=0, padding_mode=constant) + RandomHorizontalFlip(p=0.5) + ToImage() + ToDtype(scale=True) + Normalize(mean=[0.5], std=[0.5], inplace=False) + ), 'test': Dataset ImageNet + Number of datapoints: 49950 + Root location: ../../../imagenet_data + Split: validation + StandardTransform +Transform: Compose( + Resize(size=[128], interpolation=InterpolationMode.BILINEAR, antialias=True) + CenterCrop(size=(128, 128)) + ToImage() + ToDtype(scale=True) + Normalize(mean=[0.5], std=[0.5], inplace=False) + )} +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +[2025-10-26 11:19:49,801][main][INFO] - ae parameters count: +[2025-10-26 11:19:49,807][main][INFO] - Total: #230.9M (trainable: #230.9M) +[2025-10-26 11:19:49,808][main][INFO] - - encoder: #217.4M (trainable: #217.4M) +[2025-10-26 11:19:49,809][main][INFO] - - project_in: #1.8K (trainable: #1.8K) +[2025-10-26 11:19:49,810][main][INFO] - - stages: #216.9M (trainable: #216.9M) +[2025-10-26 11:19:49,811][main][INFO] - - project_out: #576.1K (trainable: #576.1K) +[2025-10-26 11:19:49,813][main][INFO] - - decoder: #13.5M (trainable: #13.5M) +[2025-10-26 11:19:49,813][main][INFO] - - conv_in_img: #896 (trainable: #896) +[2025-10-26 11:19:49,814][main][INFO] - - conv_in_z: #9.0K (trainable: #9.0K) +[2025-10-26 11:19:49,814][main][INFO] - - conv_in: #36.1K (trainable: #36.1K) +[2025-10-26 11:19:49,815][main][INFO] - - batch_norm_z: #64 (trainable: #64) +[2025-10-26 11:19:49,815][main][INFO] - - time_proj: #0 (trainable: #0) +[2025-10-26 11:19:49,817][main][INFO] - - time_embedding: #80.5K (trainable: #80.5K) +[2025-10-26 11:19:49,818][main][INFO] - - ada_ctx_proj: #54.1K (trainable: #54.1K) +[2025-10-26 11:19:49,819][main][INFO] - - down_blocks: #3.0M (trainable: #3.0M) +[2025-10-26 11:19:49,820][main][INFO] - - mid_block: #3.4M (trainable: #3.4M) +[2025-10-26 11:19:49,820][main][INFO] - - up_blocks: #6.9M (trainable: #6.9M) +[2025-10-26 11:19:49,821][main][INFO] - - conv_norm_out: #128 (trainable: #128) +[2025-10-26 11:19:49,821][main][INFO] - - conv_out_act: #0 (trainable: #0) +[2025-10-26 11:19:49,822][main][INFO] - - conv_out: #1.7K (trainable: #1.7K) +[2025-10-26 11:19:49,825][main][INFO] - ae: EMAWrapper( + (model): DistributedDataParallel( + (module): DC_SSDAE( + (encoder): DCEncoder( + (project_in): ConvPixelUnshuffleDownSampleLayer( + (conv): ConvLayer( + (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (stages): ModuleList( + (0): OpSequential( + (op_list): ModuleList() + ) + (1): OpSequential( + (op_list): ModuleList( + (0-4): 5 x ResidualBlock( + (main): ResBlock( + (conv1): ConvLayer( + (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (act): SiLU() + ) + (conv2): ConvLayer( + (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + ) + ) + (shortcut): IdentityLayer() + ) + (5): ResidualBlock( + (main): ConvPixelUnshuffleDownSampleLayer( + (conv): ConvLayer( + (conv): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (shortcut): PixelUnshuffleChannelAveragingDownSampleLayer() + ) + ) + ) + (2): OpSequential( + (op_list): ModuleList( + (0-9): 10 x ResidualBlock( + (main): ResBlock( + (conv1): ConvLayer( + (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (act): SiLU() + ) + (conv2): ConvLayer( + (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + ) + ) + (shortcut): IdentityLayer() + ) + (10): ResidualBlock( + (main): ConvPixelUnshuffleDownSampleLayer( + (conv): ConvLayer( + (conv): Conv2d(512, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (shortcut): PixelUnshuffleChannelAveragingDownSampleLayer() + ) + ) + ) + (3): OpSequential( + (op_list): ModuleList( + (0-3): 4 x ResidualBlock( + (main): ResBlock( + (conv1): ConvLayer( + (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (act): SiLU() + ) + (conv2): ConvLayer( + (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + ) + ) + (shortcut): IdentityLayer() + ) + (4): ResidualBlock( + (main): ConvPixelUnshuffleDownSampleLayer( + (conv): ConvLayer( + (conv): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (shortcut): PixelUnshuffleChannelAveragingDownSampleLayer() + ) + ) + ) + (4): OpSequential( + (op_list): ModuleList( + (0-3): 4 x ResidualBlock( + (main): ResBlock( + (conv1): ConvLayer( + (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (act): SiLU() + ) + (conv2): ConvLayer( + (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + ) + ) + (shortcut): IdentityLayer() + ) + (4): ResidualBlock( + (main): ConvPixelUnshuffleDownSampleLayer( + (conv): ConvLayer( + (conv): Conv2d(1024, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (shortcut): PixelUnshuffleChannelAveragingDownSampleLayer() + ) + ) + ) + (5): OpSequential( + (op_list): ModuleList( + (0-3): 4 x ResidualBlock( + (main): ResBlock( + (conv1): ConvLayer( + (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (act): SiLU() + ) + (conv2): ConvLayer( + (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + ) + ) + (shortcut): IdentityLayer() + ) + ) + ) + ) + (project_out): OpSequential( + (op_list): ModuleList( + (0): ConvLayer( + (conv): Conv2d(1024, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + ) + ) + (decoder): UViTDecoder( + (conv_in_img): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv_in_z): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv_in): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (batch_norm_z): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (time_proj): Timesteps() + (time_embedding): TimestepEmbedding( + (linear_1): Linear(in_features=64, out_features=256, bias=True) + (act): SiLU() + (linear_2): Linear(in_features=256, out_features=256, bias=True) + ) + (ada_ctx_proj): Sequential( + (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (1): SiLU() + (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + (down_blocks): ModuleList( + (0): DownBlock2D( + (resnets): ModuleList( + (0-1): 2 x ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=128, bias=True) + (norm2): GroupNorm(32, 64, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + ) + ) + (downsamplers): ModuleList( + (0): Downsample2D( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + ) + ) + ) + (1): DownBlock2D( + (resnets): ModuleList( + (0): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=192, bias=True) + (norm2): GroupNorm(32, 96, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(64, 96, kernel_size=(1, 1), stride=(1, 1)) + ) + (1): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=192, bias=True) + (norm2): GroupNorm(32, 96, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + ) + ) + (downsamplers): ModuleList( + (0): Downsample2D( + (conv): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + ) + ) + ) + (2): DownBlock2D( + (resnets): ModuleList( + (0): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(96, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=320, bias=True) + (norm2): GroupNorm(32, 160, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(96, 160, kernel_size=(1, 1), stride=(1, 1)) + ) + (1): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 320, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=320, bias=True) + (norm2): GroupNorm(32, 160, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + ) + ) + (downsamplers): ModuleList( + (0): Downsample2D( + (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + ) + ) + ) + (3): DownBlock2D( + (resnets): ModuleList( + (0-1): 2 x ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 320, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=320, bias=True) + (norm2): GroupNorm(32, 160, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + ) + ) + ) + ) + (mid_block): UViTMiddleTransformer( + (proj_in): Linear(in_features=160, out_features=160, bias=True) + (transformer_blocks): ModuleList( + (0-7): 8 x TransformerBlock( + (norm1): AdaLayerNorm( + (silu): SiLU() + (linear): Linear(in_features=64, out_features=320, bias=True) + (norm): LayerNorm((160,), eps=1e-05, elementwise_affine=False) + ) + (attn1): Attention( + (to_q): Linear(in_features=160, out_features=160, bias=False) + (to_k): Linear(in_features=160, out_features=160, bias=False) + (to_v): Linear(in_features=160, out_features=160, bias=False) + (out_proj): Linear(in_features=160, out_features=160, bias=True) + (out_drop): Dropout(p=0.0, inplace=False) + ) + (norm2): LayerNorm((160,), eps=1e-05, elementwise_affine=True) + (ff): FeedForward( + (proj_in_act): GEGLU( + (proj): Linear(in_features=160, out_features=1280, bias=True) + ) + (drop): Dropout(p=0.0, inplace=False) + (proj_out): Linear(in_features=640, out_features=160, bias=True) + ) + (relative_position_bias): RelativePositionBias() + ) + ) + (proj_out): Linear(in_features=160, out_features=160, bias=True) + (norm): GroupNorm(32, 160, eps=1e-06, affine=True) + ) + (up_blocks): ModuleList( + (0): UpBlock2D( + (resnets): ModuleList( + (0-2): 3 x ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 640, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(320, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=320, bias=True) + (norm2): GroupNorm(32, 160, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(320, 160, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + (upsamplers): ModuleList( + (0): Upsample2D( + (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + ) + (1): UpBlock2D( + (resnets): ModuleList( + (0-1): 2 x ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 640, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(320, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=320, bias=True) + (norm2): GroupNorm(32, 160, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(320, 160, kernel_size=(1, 1), stride=(1, 1)) + ) + (2): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 512, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(256, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=320, bias=True) + (norm2): GroupNorm(32, 160, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(256, 160, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + (upsamplers): ModuleList( + (0): Upsample2D( + (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + ) + (2): UpBlock2D( + (resnets): ModuleList( + (0): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 512, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(256, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=192, bias=True) + (norm2): GroupNorm(32, 96, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(256, 96, kernel_size=(1, 1), stride=(1, 1)) + ) + (1): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(192, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=192, bias=True) + (norm2): GroupNorm(32, 96, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1)) + ) + (2): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 320, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(160, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=192, bias=True) + (norm2): GroupNorm(32, 96, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(160, 96, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + (upsamplers): ModuleList( + (0): Upsample2D( + (conv): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + ) + (3): UpBlock2D( + (resnets): ModuleList( + (0): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 320, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(160, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=128, bias=True) + (norm2): GroupNorm(32, 64, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(160, 64, kernel_size=(1, 1), stride=(1, 1)) + ) + (1-2): 2 x ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=128, bias=True) + (norm2): GroupNorm(32, 64, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + ) + ) + (conv_norm_out): GroupNorm(32, 64, eps=1e-05, affine=True) + (conv_out_act): SiLU() + (conv_out): Conv2d(64, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + ) + (ema): EMA(ema_model=DC_SSDAE, decay=0.999, start_iter=50000) +) +[2025-10-26 11:19:49,825][main][INFO] - aux_losses parameters count: +[2025-10-26 11:19:49,826][main][INFO] - Total: #96.7M (trainable: #145.9K) +[2025-10-26 11:19:49,827][main][INFO] - - repa_loss: #82.7M (trainable: #145.9K) +[2025-10-26 11:19:49,828][main][INFO] - - lpips_loss: #14.0M (trainable: #0) +[2025-10-26 11:19:49,828][main][INFO] - aux_losses: DistributedDataParallel( + (module): SSDDLosses( + (repa_loss): REPALoss( + (features_extractor): Frozen(DinoEncoder/Dinov2Model) + (repa_mlp): Sequential( + (0): Linear(in_features=160, out_features=160, bias=True) + (1): SiLU() + (2): Linear(in_features=160, out_features=768, bias=True) + ) + (repa_loss): CosineSimilarity() + ) + (lpips_loss): Frozen(LPIPS) + ) +) +[2025-10-26 11:19:49,833][main][INFO] - Optimizer for autoencoder: RAdamScheduleFree ( +Parameter Group 0 + betas: (0.9, 0.999) + eps: 1e-08 + foreach: True + k: 0 + lr: 0.0003 + lr_max: -1.0 + r: 0.0 + scheduled_lr: 0.0 + silent_sgd_phase: True + train_mode: False + weight_decay: 0.001 + weight_lr_power: 2.0 + weight_sum: 0.0 + +Parameter Group 1 + betas: (0.9, 0.999) + eps: 1e-08 + foreach: True + k: 0 + lr: 0.0003 + lr_max: -1.0 + r: 0.0 + scheduled_lr: 0.0 + silent_sgd_phase: True + train_mode: False + weight_decay: 0.0 + weight_lr_power: 2.0 + weight_sum: 0.0 +) +[2025-10-26 11:19:49,843][main][INFO] - No training state found to resume from None +[2025-10-26 11:19:49,844][main][INFO] - ====================== RUNNING TASK train +[2025-10-26 11:19:49,844][main][INFO] - Starting training +[2025-10-26 11:19:49,845][main][INFO] - Batch size of 192 (24 per GPU, 1 acumulation step(s) 8 process(es)) +[2025-10-26 11:19:49,853][main][INFO] - --- + + +[2025-10-26 11:19:49,854][main][INFO] - [T_total=00:00:29 | T_train=00:00:00] Start epoch 0 +[T_total=00:00:38 | T_train=00:00:09 | T_epoch=00:00:09] Epoch 0, batch 1 / 6666 (step 0) loss=7.98227e+07 (avg=7.982e+07) [[all losses: diffusion=1.35239 ; kl=7.98227e+13 ; lpips=0.750356 ; repa=0.995288]] +[T_total=00:03:25 | T_train=00:02:56 | T_epoch=00:02:56] Epoch 0, batch 101 / 6666 (step 100) loss=4.43039e+06 (avg=4.43e+06) [[all losses: diffusion=1.20449 ; kl=4.43039e+12 ; lpips=0.702094 ; repa=0.949182 ; sum_loss=4.43039e+06]] +[T_total=00:06:12 | T_train=00:05:42 | T_epoch=00:05:42] Epoch 0, batch 201 / 6666 (step 200) loss=2.22622e+06 (avg=2.226e+06) [[all losses: diffusion=0.753804 ; kl=2.22622e+12 ; lpips=0.638019 ; repa=0.92357 ; sum_loss=2.22622e+06]] +[T_total=00:08:58 | T_train=00:08:28 | T_epoch=00:08:28] Epoch 0, batch 301 / 6666 (step 300) loss=1.48661e+06 (avg=1.487e+06) [[all losses: diffusion=0.558678 ; kl=1.48661e+12 ; lpips=0.588297 ; repa=0.897577 ; sum_loss=1.48661e+06]] +[T_total=00:11:45 | T_train=00:11:16 | T_epoch=00:11:16] Epoch 0, batch 401 / 6666 (step 400) loss=1.11588e+06 (avg=1.116e+06) [[all losses: diffusion=0.455064 ; kl=1.11588e+12 ; lpips=0.554513 ; repa=0.87424 ; sum_loss=1.11588e+06]] +[T_total=00:14:30 | T_train=00:14:01 | T_epoch=00:14:01] Epoch 0, batch 501 / 6666 (step 500) loss=893153 (avg=8.932e+05) [[all losses: diffusion=0.3904 ; kl=8.93153e+11 ; lpips=0.530706 ; repa=0.855295 ; sum_loss=893153]] +[T_total=00:17:16 | T_train=00:16:47 | T_epoch=00:16:47] Epoch 0, batch 601 / 6666 (step 600) loss=744550 (avg=7.446e+05) [[all losses: diffusion=0.346796 ; kl=7.4455e+11 ; lpips=0.514013 ; repa=0.83964 ; sum_loss=744550]] +[T_total=00:20:03 | T_train=00:19:33 | T_epoch=00:19:33] Epoch 0, batch 701 / 6666 (step 700) loss=638338 (avg=6.383e+05) [[all losses: diffusion=0.314614 ; kl=6.38337e+11 ; lpips=0.500275 ; repa=0.826242 ; sum_loss=638338]] +[T_total=00:22:49 | T_train=00:22:20 | T_epoch=00:22:20] Epoch 0, batch 801 / 6666 (step 800) loss=558645 (avg=5.586e+05) [[all losses: diffusion=0.290145 ; kl=5.58645e+11 ; lpips=0.489371 ; repa=0.814839 ; sum_loss=558645]] +[T_total=00:25:37 | T_train=00:25:08 | T_epoch=00:25:08] Epoch 0, batch 901 / 6666 (step 900) loss=496643 (avg=4.966e+05) [[all losses: diffusion=0.270795 ; kl=4.96642e+11 ; lpips=0.48002 ; repa=0.805043 ; sum_loss=496643]] +[T_total=00:28:25 | T_train=00:27:55 | T_epoch=00:27:55] Epoch 0, batch 1001 / 6666 (step 1000) loss=447028 (avg=4.47e+05) [[all losses: diffusion=0.255036 ; kl=4.47027e+11 ; lpips=0.472247 ; repa=0.796565 ; sum_loss=447028]] +[T_total=00:31:11 | T_train=00:30:42 | T_epoch=00:30:42] Epoch 0, batch 1101 / 6666 (step 1100) loss=406526 (avg=4.065e+05) [[all losses: diffusion=0.242515 ; kl=4.06525e+11 ; lpips=0.465703 ; repa=0.789135 ; sum_loss=406526]] +[T_total=00:33:57 | T_train=00:33:28 | T_epoch=00:33:28] Epoch 0, batch 1201 / 6666 (step 1200) loss=372677 (avg=3.727e+05) [[all losses: diffusion=0.231588 ; kl=3.72676e+11 ; lpips=0.459858 ; repa=0.782447 ; sum_loss=372677]] +[T_total=00:36:48 | T_train=00:36:19 | T_epoch=00:36:19] Epoch 0, batch 1301 / 6666 (step 1300) loss=344031 (avg=3.44e+05) [[all losses: diffusion=0.222323 ; kl=3.44031e+11 ; lpips=0.454401 ; repa=0.776351 ; sum_loss=344031]] +[T_total=00:39:36 | T_train=00:39:06 | T_epoch=00:39:06] Epoch 0, batch 1401 / 6666 (step 1400) loss=319475 (avg=3.195e+05) [[all losses: diffusion=0.214335 ; kl=3.19475e+11 ; lpips=0.449551 ; repa=0.770723 ; sum_loss=319475]] +[T_total=00:42:25 | T_train=00:41:55 | T_epoch=00:41:55] Epoch 0, batch 1501 / 6666 (step 1500) loss=298191 (avg=2.982e+05) [[all losses: diffusion=0.207336 ; kl=2.98191e+11 ; lpips=0.44511 ; repa=0.76551 ; sum_loss=298191]] +[T_total=00:45:11 | T_train=00:44:41 | T_epoch=00:44:41] Epoch 0, batch 1601 / 6666 (step 1600) loss=279567 (avg=2.796e+05) [[all losses: diffusion=0.201527 ; kl=2.79566e+11 ; lpips=0.44146 ; repa=0.760872 ; sum_loss=279567]] +[T_total=00:47:57 | T_train=00:47:28 | T_epoch=00:47:28] Epoch 0, batch 1701 / 6666 (step 1700) loss=263131 (avg=2.631e+05) [[all losses: diffusion=0.195944 ; kl=2.63131e+11 ; lpips=0.437847 ; repa=0.756408 ; sum_loss=263131]] +[T_total=00:50:45 | T_train=00:50:16 | T_epoch=00:50:16] Epoch 0, batch 1801 / 6666 (step 1800) loss=248521 (avg=2.485e+05) [[all losses: diffusion=0.191052 ; kl=2.48521e+11 ; lpips=0.434357 ; repa=0.752207 ; sum_loss=248521]] +[T_total=00:53:31 | T_train=00:53:01 | T_epoch=00:53:01] Epoch 0, batch 1901 / 6666 (step 1900) loss=235464 (avg=2.355e+05) [[all losses: diffusion=0.186678 ; kl=2.35463e+11 ; lpips=0.431319 ; repa=0.74834 ; sum_loss=235464]] +[T_total=00:56:20 | T_train=00:55:50 | T_epoch=00:55:50] Epoch 0, batch 2001 / 6666 (step 2000) loss=223696 (avg=2.237e+05) [[all losses: diffusion=0.182638 ; kl=2.23696e+11 ; lpips=0.4283 ; repa=0.744678 ; sum_loss=223696]] +[T_total=00:59:05 | T_train=00:58:36 | T_epoch=00:58:36] Epoch 0, batch 2101 / 6666 (step 2100) loss=213049 (avg=2.13e+05) [[all losses: diffusion=0.178958 ; kl=2.13049e+11 ; lpips=0.425383 ; repa=0.741197 ; sum_loss=213049]] +[T_total=01:01:50 | T_train=01:01:21 | T_epoch=01:01:21] Epoch 0, batch 2201 / 6666 (step 2200) loss=203370 (avg=2.034e+05) [[all losses: diffusion=0.175643 ; kl=2.03369e+11 ; lpips=0.422444 ; repa=0.737906 ; sum_loss=203370]] +[T_total=01:04:37 | T_train=01:04:08 | T_epoch=01:04:08] Epoch 0, batch 2301 / 6666 (step 2300) loss=194533 (avg=1.945e+05) [[all losses: diffusion=0.172703 ; kl=1.94532e+11 ; lpips=0.420254 ; repa=0.734866 ; sum_loss=194533]] +[T_total=01:07:24 | T_train=01:06:54 | T_epoch=01:06:54] Epoch 0, batch 2401 / 6666 (step 2400) loss=186431 (avg=1.864e+05) [[all losses: diffusion=0.169846 ; kl=1.8643e+11 ; lpips=0.41779 ; repa=0.731913 ; sum_loss=186431]] +[T_total=01:10:10 | T_train=01:09:40 | T_epoch=01:09:40] Epoch 0, batch 2501 / 6666 (step 2500) loss=178976 (avg=1.79e+05) [[all losses: diffusion=0.167195 ; kl=1.78976e+11 ; lpips=0.415442 ; repa=0.729082 ; sum_loss=178976]] +[T_total=01:12:57 | T_train=01:12:28 | T_epoch=01:12:28] Epoch 0, batch 2601 / 6666 (step 2600) loss=172095 (avg=1.721e+05) [[all losses: diffusion=0.164736 ; kl=1.72095e+11 ; lpips=0.412958 ; repa=0.726392 ; sum_loss=172095]] +[T_total=01:15:44 | T_train=01:15:15 | T_epoch=01:15:15] Epoch 0, batch 2701 / 6666 (step 2700) loss=165724 (avg=1.657e+05) [[all losses: diffusion=0.162423 ; kl=1.65723e+11 ; lpips=0.410746 ; repa=0.723795 ; sum_loss=165724]] +[T_total=01:18:31 | T_train=01:18:01 | T_epoch=01:18:01] Epoch 0, batch 2801 / 6666 (step 2800) loss=159807 (avg=1.598e+05) [[all losses: diffusion=0.160327 ; kl=1.59807e+11 ; lpips=0.409184 ; repa=0.721489 ; sum_loss=159807]] +[T_total=01:21:16 | T_train=01:20:46 | T_epoch=01:20:46] Epoch 0, batch 2901 / 6666 (step 2900) loss=154299 (avg=1.543e+05) [[all losses: diffusion=0.15832 ; kl=1.54298e+11 ; lpips=0.407189 ; repa=0.719126 ; sum_loss=154299]] +[T_total=01:24:03 | T_train=01:23:33 | T_epoch=01:23:33] Epoch 0, batch 3001 / 6666 (step 3000) loss=149157 (avg=1.492e+05) [[all losses: diffusion=0.156399 ; kl=1.49157e+11 ; lpips=0.405272 ; repa=0.716847 ; sum_loss=149157]] +[T_total=01:26:49 | T_train=01:26:20 | T_epoch=01:26:20] Epoch 0, batch 3101 / 6666 (step 3100) loss=144347 (avg=1.443e+05) [[all losses: diffusion=0.154716 ; kl=1.44347e+11 ; lpips=0.403593 ; repa=0.714736 ; sum_loss=144347]] +[T_total=01:29:34 | T_train=01:29:04 | T_epoch=01:29:04] Epoch 0, batch 3201 / 6666 (step 3200) loss=139838 (avg=1.398e+05) [[all losses: diffusion=0.153035 ; kl=1.39837e+11 ; lpips=0.401909 ; repa=0.712664 ; sum_loss=139838]] +[T_total=01:32:18 | T_train=01:31:49 | T_epoch=01:31:49] Epoch 0, batch 3301 / 6666 (step 3300) loss=135602 (avg=1.356e+05) [[all losses: diffusion=0.151482 ; kl=1.35601e+11 ; lpips=0.39997 ; repa=0.710614 ; sum_loss=135602]] +[T_total=01:35:07 | T_train=01:34:38 | T_epoch=01:34:38] Epoch 0, batch 3401 / 6666 (step 3400) loss=131615 (avg=1.316e+05) [[all losses: diffusion=0.14995 ; kl=1.31614e+11 ; lpips=0.398314 ; repa=0.708682 ; sum_loss=131615]] +[T_total=01:37:56 | T_train=01:37:27 | T_epoch=01:37:27] Epoch 0, batch 3501 / 6666 (step 3500) loss=127855 (avg=1.279e+05) [[all losses: diffusion=0.148505 ; kl=1.27855e+11 ; lpips=0.396696 ; repa=0.706807 ; sum_loss=127855]] +[T_total=01:40:42 | T_train=01:40:13 | T_epoch=01:40:13] Epoch 0, batch 3601 / 6666 (step 3600) loss=124305 (avg=1.243e+05) [[all losses: diffusion=0.147137 ; kl=1.24304e+11 ; lpips=0.395032 ; repa=0.704981 ; sum_loss=124305]] +[T_total=01:43:27 | T_train=01:42:58 | T_epoch=01:42:58] Epoch 0, batch 3701 / 6666 (step 3700) loss=120947 (avg=1.209e+05) [[all losses: diffusion=0.145954 ; kl=1.20946e+11 ; lpips=0.393755 ; repa=0.703369 ; sum_loss=120947]] +[T_total=01:46:14 | T_train=01:45:45 | T_epoch=01:45:45] Epoch 0, batch 3801 / 6666 (step 3800) loss=117765 (avg=1.178e+05) [[all losses: diffusion=0.144745 ; kl=1.17764e+11 ; lpips=0.392088 ; repa=0.701642 ; sum_loss=117765]] +[T_total=01:48:59 | T_train=01:48:30 | T_epoch=01:48:30] Epoch 0, batch 3901 / 6666 (step 3900) loss=114746 (avg=1.147e+05) [[all losses: diffusion=0.14356 ; kl=1.14746e+11 ; lpips=0.390599 ; repa=0.700015 ; sum_loss=114746]] +[T_total=01:51:48 | T_train=01:51:19 | T_epoch=01:51:19] Epoch 0, batch 4001 / 6666 (step 4000) loss=111878 (avg=1.119e+05) [[all losses: diffusion=0.142503 ; kl=1.11878e+11 ; lpips=0.38916 ; repa=0.698442 ; sum_loss=111878]] +[T_total=01:54:35 | T_train=01:54:06 | T_epoch=01:54:06] Epoch 0, batch 4101 / 6666 (step 4100) loss=109150 (avg=1.092e+05) [[all losses: diffusion=0.141428 ; kl=1.0915e+11 ; lpips=0.387725 ; repa=0.6969 ; sum_loss=109150]] +[T_total=01:57:20 | T_train=01:56:51 | T_epoch=01:56:51] Epoch 0, batch 4201 / 6666 (step 4200) loss=106552 (avg=1.066e+05) [[all losses: diffusion=0.1404 ; kl=1.06551e+11 ; lpips=0.38633 ; repa=0.695405 ; sum_loss=106552]] +[T_total=02:00:07 | T_train=01:59:37 | T_epoch=01:59:37] Epoch 0, batch 4301 / 6666 (step 4300) loss=104075 (avg=1.041e+05) [[all losses: diffusion=0.139434 ; kl=1.04074e+11 ; lpips=0.385047 ; repa=0.693982 ; sum_loss=104075]] +[T_total=02:02:53 | T_train=02:02:23 | T_epoch=02:02:23] Epoch 0, batch 4401 / 6666 (step 4400) loss=101710 (avg=1.017e+05) [[all losses: diffusion=0.138503 ; kl=1.01709e+11 ; lpips=0.383689 ; repa=0.692544 ; sum_loss=101710]] +[T_total=02:05:38 | T_train=02:05:09 | T_epoch=02:05:09] Epoch 0, batch 4501 / 6666 (step 4500) loss=99450.1 (avg=9.945e+04) [[all losses: diffusion=0.137602 ; kl=9.94496e+10 ; lpips=0.382355 ; repa=0.691146 ; sum_loss=99450.1]] +[T_total=02:08:24 | T_train=02:07:55 | T_epoch=02:07:55] Epoch 0, batch 4601 / 6666 (step 4600) loss=97288.6 (avg=9.729e+04) [[all losses: diffusion=0.136731 ; kl=9.72881e+10 ; lpips=0.381071 ; repa=0.689785 ; sum_loss=97288.6]] +[T_total=02:11:11 | T_train=02:10:41 | T_epoch=02:10:41] Epoch 0, batch 4701 / 6666 (step 4700) loss=95219.1 (avg=9.522e+04) [[all losses: diffusion=0.135916 ; kl=9.52186e+10 ; lpips=0.379756 ; repa=0.688458 ; sum_loss=95219.1]] +[T_total=02:13:57 | T_train=02:13:28 | T_epoch=02:13:28] Epoch 0, batch 4801 / 6666 (step 4800) loss=93235.8 (avg=9.324e+04) [[all losses: diffusion=0.135169 ; kl=9.32353e+10 ; lpips=0.378824 ; repa=0.687249 ; sum_loss=93235.8]] +[T_total=02:16:43 | T_train=02:16:14 | T_epoch=02:16:14] Epoch 0, batch 4901 / 6666 (step 4900) loss=91333.4 (avg=9.133e+04) [[all losses: diffusion=0.134396 ; kl=9.13329e+10 ; lpips=0.377635 ; repa=0.685994 ; sum_loss=91333.4]] +[T_total=02:19:31 | T_train=02:19:02 | T_epoch=02:19:02] Epoch 0, batch 5001 / 6666 (step 5000) loss=89507.4 (avg=8.951e+04) [[all losses: diffusion=0.133684 ; kl=8.95069e+10 ; lpips=0.376679 ; repa=0.684837 ; sum_loss=89507.4]] +[T_total=02:22:19 | T_train=02:21:49 | T_epoch=02:21:49] Epoch 0, batch 5101 / 6666 (step 5100) loss=87752.7 (avg=8.775e+04) [[all losses: diffusion=0.132985 ; kl=8.77522e+10 ; lpips=0.375455 ; repa=0.683631 ; sum_loss=87752.7]] +[T_total=02:25:03 | T_train=02:24:34 | T_epoch=02:24:34] Epoch 0, batch 5201 / 6666 (step 5200) loss=86065.5 (avg=8.607e+04) [[all losses: diffusion=0.132295 ; kl=8.6065e+10 ; lpips=0.374322 ; repa=0.68246 ; sum_loss=86065.5]] +[T_total=02:27:51 | T_train=02:27:21 | T_epoch=02:27:21] Epoch 0, batch 5301 / 6666 (step 5300) loss=84441.9 (avg=8.444e+04) [[all losses: diffusion=0.131623 ; kl=8.44414e+10 ; lpips=0.373146 ; repa=0.681308 ; sum_loss=84441.9]] +[T_total=02:30:37 | T_train=02:30:08 | T_epoch=02:30:08] Epoch 0, batch 5401 / 6666 (step 5400) loss=82878.5 (avg=8.288e+04) [[all losses: diffusion=0.13099 ; kl=8.2878e+10 ; lpips=0.372145 ; repa=0.680204 ; sum_loss=82878.5]] +[T_total=02:33:23 | T_train=02:32:53 | T_epoch=02:32:53] Epoch 0, batch 5501 / 6666 (step 5500) loss=81371.9 (avg=8.137e+04) [[all losses: diffusion=0.13035 ; kl=8.13714e+10 ; lpips=0.371115 ; repa=0.67912 ; sum_loss=81371.9]] +[T_total=02:36:11 | T_train=02:35:41 | T_epoch=02:35:41] Epoch 0, batch 5601 / 6666 (step 5600) loss=79919.1 (avg=7.992e+04) [[all losses: diffusion=0.129727 ; kl=7.99186e+10 ; lpips=0.370057 ; repa=0.678042 ; sum_loss=79919.1]] +[T_total=02:38:58 | T_train=02:38:29 | T_epoch=02:38:29] Epoch 0, batch 5701 / 6666 (step 5700) loss=78517.3 (avg=7.852e+04) [[all losses: diffusion=0.129136 ; kl=7.85168e+10 ; lpips=0.368968 ; repa=0.676967 ; sum_loss=78517.3]] +[T_total=02:41:43 | T_train=02:41:14 | T_epoch=02:41:14] Epoch 0, batch 5801 / 6666 (step 5800) loss=77163.7 (avg=7.716e+04) [[all losses: diffusion=0.128562 ; kl=7.71633e+10 ; lpips=0.367907 ; repa=0.675912 ; sum_loss=77163.7]] +[T_total=02:44:29 | T_train=02:44:00 | T_epoch=02:44:00] Epoch 0, batch 5901 / 6666 (step 5900) loss=75856.1 (avg=7.586e+04) [[all losses: diffusion=0.127989 ; kl=7.58556e+10 ; lpips=0.366987 ; repa=0.674895 ; sum_loss=75856.1]] +[T_total=02:47:15 | T_train=02:46:46 | T_epoch=02:46:46] Epoch 0, batch 6001 / 6666 (step 6000) loss=74592.2 (avg=7.459e+04) [[all losses: diffusion=0.127487 ; kl=7.45917e+10 ; lpips=0.366201 ; repa=0.673964 ; sum_loss=74592.2]] +[T_total=02:50:00 | T_train=02:49:31 | T_epoch=02:49:31] Epoch 0, batch 6101 / 6666 (step 6100) loss=73369.6 (avg=7.337e+04) [[all losses: diffusion=0.127008 ; kl=7.33691e+10 ; lpips=0.365298 ; repa=0.673019 ; sum_loss=73369.6]] +[T_total=02:52:46 | T_train=02:52:17 | T_epoch=02:52:17] Epoch 0, batch 6201 / 6666 (step 6200) loss=72186.4 (avg=7.219e+04) [[all losses: diffusion=0.126477 ; kl=7.21859e+10 ; lpips=0.364351 ; repa=0.672049 ; sum_loss=72186.4]] +[T_total=02:55:34 | T_train=02:55:04 | T_epoch=02:55:04] Epoch 0, batch 6301 / 6666 (step 6300) loss=71040.8 (avg=7.104e+04) [[all losses: diffusion=0.125966 ; kl=7.10403e+10 ; lpips=0.363403 ; repa=0.671096 ; sum_loss=71040.8]] +[T_total=02:58:20 | T_train=02:57:50 | T_epoch=02:57:50] Epoch 0, batch 6401 / 6666 (step 6400) loss=69931 (avg=6.993e+04) [[all losses: diffusion=0.125498 ; kl=6.99305e+10 ; lpips=0.362705 ; repa=0.670227 ; sum_loss=69931]] +[T_total=03:01:05 | T_train=03:00:36 | T_epoch=03:00:36] Epoch 0, batch 6501 / 6666 (step 6500) loss=68855.3 (avg=6.886e+04) [[all losses: diffusion=0.125028 ; kl=6.88549e+10 ; lpips=0.361784 ; repa=0.669303 ; sum_loss=68855.3]] +[T_total=03:03:53 | T_train=03:03:23 | T_epoch=03:03:23] Epoch 0, batch 6601 / 6666 (step 6600) loss=67812.2 (avg=6.781e+04) [[all losses: diffusion=0.124559 ; kl=6.78118e+10 ; lpips=0.360871 ; repa=0.668393 ; sum_loss=67812.2]] +[2025-10-26 14:25:01,522][main][INFO] - [T_total=03:05:41 | T_train=03:05:11 | T_epoch=03:05:11] End of epoch 0 (6666 steps) train loss 67151 +[2025-10-26 14:25:01,524][main][INFO] - [Epoch 0] All losses: [[diffusion=0.124278 ; kl=6.71505e+10 ; lpips=0.360362 ; repa=0.667823]] + Reconstructing from test set: 0%| | 0/261 [00:00 + sys.exit(main()) + ^^^^^^ + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/accelerate/commands/accelerate_cli.py", line 50, in main + args.func(args) + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/accelerate/commands/launch.py", line 1226, in launch_command + multi_gpu_launcher(args) + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/accelerate/commands/launch.py", line 853, in multi_gpu_launcher + distrib_run.run(args) + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/distributed/run.py", line 927, in run + elastic_launch( + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/distributed/launcher/api.py", line 156, in __call__ + return launch_agent(self._config, self._entrypoint, list(args)) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/distributed/launcher/api.py", line 284, in launch_agent + result = agent.run() + ^^^^^^^^^^^ + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/distributed/elastic/metrics/api.py", line 138, in wrapper + result = f(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^ + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/distributed/elastic/agent/server/api.py", line 717, in run + result = self._invoke_run(role) + ^^^^^^^^^^^^^^^^^^^^^^ + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/distributed/elastic/agent/server/api.py", line 881, in _invoke_run + time.sleep(monitor_interval) + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/distributed/elastic/multiprocessing/api.py", line 85, in _terminate_process_handler + raise SignalException(f"Process {os.getpid()} got signal: {sigval}", sigval=sigval) +torch.distributed.elastic.multiprocessing.api.SignalException: Process 63668 got signal: 15