diff --git "a/logs/exp_vq_f8c4_FM.log" "b/logs/exp_vq_f8c4_FM.log" new file mode 100644--- /dev/null +++ "b/logs/exp_vq_f8c4_FM.log" @@ -0,0 +1,2031 @@ +nohup: ignoring input +The following values were not passed to `accelerate launch` and had defaults used instead: + `--num_processes` was set to a value of `8` + More than one GPU was found, enabling multi-GPU training. + If this was unintended please pass in `--num_processes=1`. + `--num_machines` was set to a value of `1` + `--mixed_precision` was set to a value of `'no'` + `--dynamo_backend` was set to a value of `'no'` +To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`. +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/__init__.py:1617: UserWarning: Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:80.) + _C._set_float32_matmul_precision(precision) +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=AlexNet_Weights.IMAGENET1K_V1`. You can also use `weights=AlexNet_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Using cached /workspace/DC_SSDAE/runs/cache +Using cached /workspace/DC_SSDAE/runs/cache +Using cached /workspace/DC_SSDAE/runs/cache +Using cached /workspace/DC_SSDAE/runs/cache +Using cached /workspace/DC_SSDAE/runs/cache +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Using cached /workspace/DC_SSDAE/runs/cache +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +Using cached /workspace/DC_SSDAE/runs/cache +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth +Using cached /workspace/DC_SSDAE/runs/cache +Using cache found in /workspace/DC_SSDAE/runs/cache/facebookresearch_dino_main +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/peft/tuners/tuners_utils.py:196: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing! + warnings.warn( +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +File already exists: /workspace/DC_SSDAE/runs/cache/weights-inception-2015-12-05-6726825d.pth +[2025-10-24 11:27:55,703][main][INFO] - Will write tensorboard logs inside /workspace/DC_SSDAE/runs/jobs/train_enc_vq_f8c4_FM/tensorboard_logs +[2025-10-24 11:27:55,722][main][INFO] - Runtime at /workspace/DC_SSDAE +[2025-10-24 11:27:55,723][main][INFO] - Running inside /workspace/DC_SSDAE/runs/jobs/train_enc_vq_f8c4_FM +[2025-10-24 11:27:55,724][main][INFO] - Running args: ['main.py', 'run_name=train_enc_vq_f8c4_FM', 'dataset.im_size=128', 'dataset.aug_scale=2', 'training.epochs=20', 'dc_ssdae.encoder_train=true'] +[2025-10-24 11:27:55,725][main][INFO] - Command: 'main.py' 'run_name=train_enc_vq_f8c4_FM' 'dataset.im_size=128' 'dataset.aug_scale=2' 'training.epochs=20' 'dc_ssdae.encoder_train=true' +[2025-10-24 11:27:55,726][main][INFO] - Accelerator with 8 processes, running on cuda:0 +[2025-10-24 11:27:55,729][main][INFO] - Hydra configuration: +seed: 0 +task: train +runtime_path: ${hydra:runtime.cwd} +ckpt_dir: ${runtime_path}/runs +run_name: train_enc_vq_f8c4_FM +cache_dir: ${ckpt_dir}/cache +run_dir: ${ckpt_dir}/jobs/${run_name} +checkpoint_path: ${run_dir}/checkpoints +dataset: + imagenet_root: imagenet_data + im_size: 128 + batch_size: 192 + aug_scale: 2 + limit: null +distill_teacher: false +dc_ssdae: + compile: false + checkpoint: null + encoder: f8c4 + encoder_checkpoint: null + encoder_train: true + decoder: S + trainer_type: FM + encoder_type: vq + sampler: + steps: 10 + ema: + decay: 0.999 + start_iter: 50000 +aux_losses: + compile: ${dc_ssdae.compile} + repa: + i_extract: 4 + n_layers: 2 + lpips: true +training: + sdpa_kernel: 2 + mixed_precision: bf16 + grad_accumulate: 1 + grad_clip: 0.1 + epochs: 20 + eval_freq: 1 + save_on_best: FID + log_freq: 100 + lr: 0.0003 + weight_decay: 0.001 +losses: + diffusion: 1 + repa: 0.25 + lpips: 0.5 + kl: 1.0e-06 +show_samples: 8 + + + +[2025-10-24 11:28:09,494][main][INFO] - Loaded ImageNet dataset: {'train': Dataset ImageNet + Number of datapoints: 1279867 + Root location: ../../../imagenet_data + Split: train + StandardTransform +Transform: Compose( + RandomResize(min_size=128, max_size=256, interpolation=InterpolationMode.LANCZOS, antialias=True) + RandomCrop(size=(128, 128), pad_if_needed=False, fill=0, padding_mode=constant) + RandomHorizontalFlip(p=0.5) + ToImage() + ToDtype(scale=True) + Normalize(mean=[0.5], std=[0.5], inplace=False) + ), 'test': Dataset ImageNet + Number of datapoints: 49950 + Root location: ../../../imagenet_data + Split: validation + StandardTransform +Transform: Compose( + Resize(size=[128], interpolation=InterpolationMode.BILINEAR, antialias=True) + CenterCrop(size=(128, 128)) + ToImage() + ToDtype(scale=True) + Normalize(mean=[0.5], std=[0.5], inplace=False) + )} +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_mean. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.running_var. Will default to 'ingore'. +[WARNING] Model buffers behavior should be defined using the '_ema' parameter. No _ema key for the buffer decoder.batch_norm_z.num_batches_tracked. Will default to 'ingore'. +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +Loading model from: /workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/lpips/weights/v0.1/vgg.pth +[2025-10-24 11:28:18,537][main][INFO] - ae parameters count: +[2025-10-24 11:28:18,540][main][INFO] - Total: #46.0M (trainable: #46.0M) +[2025-10-24 11:28:18,541][main][INFO] - - encoder: #32.6M (trainable: #32.6M) +[2025-10-24 11:28:18,542][main][INFO] - - conv_in: #3.5K (trainable: #3.5K) +[2025-10-24 11:28:18,543][main][INFO] - - down: #22.5M (trainable: #22.5M) +[2025-10-24 11:28:18,543][main][INFO] - - mid: #10.0M (trainable: #10.0M) +[2025-10-24 11:28:18,544][main][INFO] - - norm_out: #1.0K (trainable: #1.0K) +[2025-10-24 11:28:18,545][main][INFO] - - act_out: #0 (trainable: #0) +[2025-10-24 11:28:18,545][main][INFO] - - conv_out: #36.0K (trainable: #36.0K) +[2025-10-24 11:28:18,546][main][INFO] - - out_proj: #72 (trainable: #72) +[2025-10-24 11:28:18,547][main][INFO] - - decoder: #13.4M (trainable: #13.4M) +[2025-10-24 11:28:18,548][main][INFO] - - conv_in_img: #896 (trainable: #896) +[2025-10-24 11:28:18,548][main][INFO] - - conv_in_z: #1.2K (trainable: #1.2K) +[2025-10-24 11:28:18,549][main][INFO] - - conv_in: #36.1K (trainable: #36.1K) +[2025-10-24 11:28:18,550][main][INFO] - - batch_norm_z: #8 (trainable: #8) +[2025-10-24 11:28:18,550][main][INFO] - - time_proj: #0 (trainable: #0) +[2025-10-24 11:28:18,551][main][INFO] - - time_embedding: #80.5K (trainable: #80.5K) +[2025-10-24 11:28:18,551][main][INFO] - - ada_ctx_proj: #38.4K (trainable: #38.4K) +[2025-10-24 11:28:18,552][main][INFO] - - down_blocks: #3.0M (trainable: #3.0M) +[2025-10-24 11:28:18,553][main][INFO] - - mid_block: #3.4M (trainable: #3.4M) +[2025-10-24 11:28:18,554][main][INFO] - - up_blocks: #6.9M (trainable: #6.9M) +[2025-10-24 11:28:18,554][main][INFO] - - conv_norm_out: #128 (trainable: #128) +[2025-10-24 11:28:18,555][main][INFO] - - conv_out_act: #0 (trainable: #0) +[2025-10-24 11:28:18,555][main][INFO] - - conv_out: #1.7K (trainable: #1.7K) +[2025-10-24 11:28:18,557][main][INFO] - ae: EMAWrapper( + (model): DistributedDataParallel( + (module): DC_SSDAE( + (encoder): VQEncoder( + (conv_in): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (down): ModuleList( + (0): Module( + (block): ModuleList( + (0-1): 2 x VQGResnetBlock( + (norm1): GroupNorm(32, 128, eps=1e-06, affine=True) + (act1): SwishActivation() + (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (norm2): GroupNorm(32, 128, eps=1e-06, affine=True) + (act2): SwishActivation() + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (attn): ModuleList() + (downsample): VQGDownsample( + (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2)) + ) + ) + (1): Module( + (block): ModuleList( + (0): VQGResnetBlock( + (norm1): GroupNorm(32, 128, eps=1e-06, affine=True) + (act1): SwishActivation() + (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (norm2): GroupNorm(32, 256, eps=1e-06, affine=True) + (act2): SwishActivation() + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nin_shortcut): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1)) + ) + (1): VQGResnetBlock( + (norm1): GroupNorm(32, 256, eps=1e-06, affine=True) + (act1): SwishActivation() + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (norm2): GroupNorm(32, 256, eps=1e-06, affine=True) + (act2): SwishActivation() + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (attn): ModuleList() + (downsample): VQGDownsample( + (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2)) + ) + ) + (2): Module( + (block): ModuleList( + (0): VQGResnetBlock( + (norm1): GroupNorm(32, 256, eps=1e-06, affine=True) + (act1): SwishActivation() + (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (norm2): GroupNorm(32, 512, eps=1e-06, affine=True) + (act2): SwishActivation() + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nin_shortcut): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1)) + ) + (1): VQGResnetBlock( + (norm1): GroupNorm(32, 512, eps=1e-06, affine=True) + (act1): SwishActivation() + (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (norm2): GroupNorm(32, 512, eps=1e-06, affine=True) + (act2): SwishActivation() + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (attn): ModuleList() + (downsample): VQGDownsample( + (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2)) + ) + ) + (3): Module( + (block): ModuleList( + (0-1): 2 x VQGResnetBlock( + (norm1): GroupNorm(32, 512, eps=1e-06, affine=True) + (act1): SwishActivation() + (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (norm2): GroupNorm(32, 512, eps=1e-06, affine=True) + (act2): SwishActivation() + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (attn): ModuleList() + ) + ) + (mid): Module( + (block_1): VQGResnetBlock( + (norm1): GroupNorm(32, 512, eps=1e-06, affine=True) + (act1): SwishActivation() + (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (norm2): GroupNorm(32, 512, eps=1e-06, affine=True) + (act2): SwishActivation() + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + (attn_1): VQGAttnBlock( + (norm): GroupNorm(32, 512, eps=1e-06, affine=True) + (q): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1)) + (k): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1)) + (v): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1)) + (proj_out): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1)) + ) + (block_2): VQGResnetBlock( + (norm1): GroupNorm(32, 512, eps=1e-06, affine=True) + (act1): SwishActivation() + (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (norm2): GroupNorm(32, 512, eps=1e-06, affine=True) + (act2): SwishActivation() + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (norm_out): GroupNorm(32, 512, eps=1e-06, affine=True) + (act_out): SwishActivation() + (conv_out): Conv2d(512, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (out_proj): Conv2d(8, 8, kernel_size=(1, 1), stride=(1, 1)) + ) + (decoder): UViTDecoder( + (conv_in_img): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv_in_z): Conv2d(4, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv_in): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (batch_norm_z): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (time_proj): Timesteps() + (time_embedding): TimestepEmbedding( + (linear_1): Linear(in_features=64, out_features=256, bias=True) + (act): SiLU() + (linear_2): Linear(in_features=256, out_features=256, bias=True) + ) + (ada_ctx_proj): Sequential( + (0): Conv2d(4, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (1): SiLU() + (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + (down_blocks): ModuleList( + (0): DownBlock2D( + (resnets): ModuleList( + (0-1): 2 x ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=128, bias=True) + (norm2): GroupNorm(32, 64, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + ) + ) + (downsamplers): ModuleList( + (0): Downsample2D( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + ) + ) + ) + (1): DownBlock2D( + (resnets): ModuleList( + (0): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=192, bias=True) + (norm2): GroupNorm(32, 96, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(64, 96, kernel_size=(1, 1), stride=(1, 1)) + ) + (1): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=192, bias=True) + (norm2): GroupNorm(32, 96, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + ) + ) + (downsamplers): ModuleList( + (0): Downsample2D( + (conv): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + ) + ) + ) + (2): DownBlock2D( + (resnets): ModuleList( + (0): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(96, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=320, bias=True) + (norm2): GroupNorm(32, 160, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(96, 160, kernel_size=(1, 1), stride=(1, 1)) + ) + (1): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 320, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=320, bias=True) + (norm2): GroupNorm(32, 160, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + ) + ) + (downsamplers): ModuleList( + (0): Downsample2D( + (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + ) + ) + ) + (3): DownBlock2D( + (resnets): ModuleList( + (0-1): 2 x ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 320, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=320, bias=True) + (norm2): GroupNorm(32, 160, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + ) + ) + ) + ) + (mid_block): UViTMiddleTransformer( + (proj_in): Linear(in_features=160, out_features=160, bias=True) + (transformer_blocks): ModuleList( + (0-7): 8 x TransformerBlock( + (norm1): AdaLayerNorm( + (silu): SiLU() + (linear): Linear(in_features=64, out_features=320, bias=True) + (norm): LayerNorm((160,), eps=1e-05, elementwise_affine=False) + ) + (attn1): Attention( + (to_q): Linear(in_features=160, out_features=160, bias=False) + (to_k): Linear(in_features=160, out_features=160, bias=False) + (to_v): Linear(in_features=160, out_features=160, bias=False) + (out_proj): Linear(in_features=160, out_features=160, bias=True) + (out_drop): Dropout(p=0.0, inplace=False) + ) + (norm2): LayerNorm((160,), eps=1e-05, elementwise_affine=True) + (ff): FeedForward( + (proj_in_act): GEGLU( + (proj): Linear(in_features=160, out_features=1280, bias=True) + ) + (drop): Dropout(p=0.0, inplace=False) + (proj_out): Linear(in_features=640, out_features=160, bias=True) + ) + (relative_position_bias): RelativePositionBias() + ) + ) + (proj_out): Linear(in_features=160, out_features=160, bias=True) + (norm): GroupNorm(32, 160, eps=1e-06, affine=True) + ) + (up_blocks): ModuleList( + (0): UpBlock2D( + (resnets): ModuleList( + (0-2): 3 x ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 640, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(320, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=320, bias=True) + (norm2): GroupNorm(32, 160, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(320, 160, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + (upsamplers): ModuleList( + (0): Upsample2D( + (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + ) + (1): UpBlock2D( + (resnets): ModuleList( + (0-1): 2 x ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 640, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(320, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=320, bias=True) + (norm2): GroupNorm(32, 160, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(320, 160, kernel_size=(1, 1), stride=(1, 1)) + ) + (2): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 512, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(256, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=320, bias=True) + (norm2): GroupNorm(32, 160, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(256, 160, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + (upsamplers): ModuleList( + (0): Upsample2D( + (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + ) + (2): UpBlock2D( + (resnets): ModuleList( + (0): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 512, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(256, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=192, bias=True) + (norm2): GroupNorm(32, 96, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(256, 96, kernel_size=(1, 1), stride=(1, 1)) + ) + (1): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(192, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=192, bias=True) + (norm2): GroupNorm(32, 96, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1)) + ) + (2): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 320, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(160, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=192, bias=True) + (norm2): GroupNorm(32, 96, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(160, 96, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + (upsamplers): ModuleList( + (0): Upsample2D( + (conv): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + ) + (3): UpBlock2D( + (resnets): ModuleList( + (0): ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 320, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(160, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=128, bias=True) + (norm2): GroupNorm(32, 64, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(160, 64, kernel_size=(1, 1), stride=(1, 1)) + ) + (1-2): 2 x ResnetBlock2D( + (norm1): AdaGroupNorm2D( + (ctx_proj): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1)) + ) + (conv1): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (time_emb_proj): Linear(in_features=256, out_features=128, bias=True) + (norm2): GroupNorm(32, 64, eps=1e-05, affine=True) + (dropout): Dropout(p=0.0, inplace=False) + (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (nonlinearity): SiLU() + (conv_shortcut): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + ) + ) + (conv_norm_out): GroupNorm(32, 64, eps=1e-05, affine=True) + (conv_out_act): SiLU() + (conv_out): Conv2d(64, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + ) + (ema): EMA(ema_model=DC_SSDAE, decay=0.999, start_iter=50000) +) +[2025-10-24 11:28:18,558][main][INFO] - aux_losses parameters count: +[2025-10-24 11:28:18,559][main][INFO] - Total: #96.7M (trainable: #145.9K) +[2025-10-24 11:28:18,560][main][INFO] - - repa_loss: #82.7M (trainable: #145.9K) +[2025-10-24 11:28:18,561][main][INFO] - - lpips_loss: #14.0M (trainable: #0) +[2025-10-24 11:28:18,561][main][INFO] - aux_losses: DistributedDataParallel( + (module): SSDDLosses( + (repa_loss): REPALoss( + (features_extractor): Frozen(DinoEncoder/Dinov2Model) + (repa_mlp): Sequential( + (0): Linear(in_features=160, out_features=160, bias=True) + (1): SiLU() + (2): Linear(in_features=160, out_features=768, bias=True) + ) + (repa_loss): CosineSimilarity() + ) + (lpips_loss): Frozen(LPIPS) + ) +) +[2025-10-24 11:28:18,565][main][INFO] - Optimizer for autoencoder: RAdamScheduleFree ( +Parameter Group 0 + betas: (0.9, 0.999) + eps: 1e-08 + foreach: True + k: 0 + lr: 0.0003 + lr_max: -1.0 + r: 0.0 + scheduled_lr: 0.0 + silent_sgd_phase: True + train_mode: False + weight_decay: 0.001 + weight_lr_power: 2.0 + weight_sum: 0.0 + +Parameter Group 1 + betas: (0.9, 0.999) + eps: 1e-08 + foreach: True + k: 0 + lr: 0.0003 + lr_max: -1.0 + r: 0.0 + scheduled_lr: 0.0 + silent_sgd_phase: True + train_mode: False + weight_decay: 0.0 + weight_lr_power: 2.0 + weight_sum: 0.0 +) +[2025-10-24 11:28:18,570][main][INFO] - No training state found to resume from None +[2025-10-24 11:28:18,571][main][INFO] - ====================== RUNNING TASK train +[2025-10-24 11:28:18,572][main][INFO] - Starting training +[2025-10-24 11:28:18,572][main][INFO] - Batch size of 192 (24 per GPU, 1 acumulation step(s) 8 process(es)) +[2025-10-24 11:28:18,582][main][INFO] - --- + + +[2025-10-24 11:28:18,583][main][INFO] - [T_total=00:00:22 | T_train=00:00:00] Start epoch 0 +[T_total=00:00:31 | T_train=00:00:08 | T_epoch=00:00:08] Epoch 0, batch 1 / 6666 (step 0) loss=2.00639 (avg=2.006) [[all losses: diffusion=1.37496 ; kl=1.20887 ; lpips=0.760925 ; repa=1.00389]] +[T_total=00:01:28 | T_train=00:01:05 | T_epoch=00:01:05] Epoch 0, batch 101 / 6666 (step 100) loss=0.982711 (avg=0.9827) [[all losses: diffusion=0.45005 ; kl=187.768 ; lpips=0.599657 ; repa=0.930581 ; sum_loss=0.982711]] +[T_total=00:02:24 | T_train=00:02:01 | T_epoch=00:02:01] Epoch 0, batch 201 / 6666 (step 200) loss=0.820433 (avg=0.8204) [[all losses: diffusion=0.3185 ; kl=450.67 ; lpips=0.55166 ; repa=0.902608 ; sum_loss=0.820433]] +[T_total=00:03:20 | T_train=00:02:57 | T_epoch=00:02:57] Epoch 0, batch 301 / 6666 (step 300) loss=0.74484 (avg=0.7448) [[all losses: diffusion=0.263146 ; kl=743.669 ; lpips=0.5229 ; repa=0.878002 ; sum_loss=0.74484]] +[T_total=00:04:16 | T_train=00:03:53 | T_epoch=00:03:53] Epoch 0, batch 401 / 6666 (step 400) loss=0.697232 (avg=0.6972) [[all losses: diffusion=0.231263 ; kl=1168.44 ; lpips=0.500139 ; repa=0.858927 ; sum_loss=0.697232]] +[T_total=00:05:13 | T_train=00:04:50 | T_epoch=00:04:50] Epoch 0, batch 501 / 6666 (step 500) loss=0.66279 (avg=0.6628) [[all losses: diffusion=0.209766 ; kl=1558.48 ; lpips=0.481169 ; repa=0.843526 ; sum_loss=0.66279]] +[T_total=00:06:09 | T_train=00:05:46 | T_epoch=00:05:46] Epoch 0, batch 601 / 6666 (step 600) loss=0.63616 (avg=0.6362) [[all losses: diffusion=0.194012 ; kl=1861.2 ; lpips=0.465438 ; repa=0.830269 ; sum_loss=0.63616]] +[T_total=00:07:05 | T_train=00:06:42 | T_epoch=00:06:42] Epoch 0, batch 701 / 6666 (step 700) loss=0.614905 (avg=0.6149) [[all losses: diffusion=0.181861 ; kl=2074.35 ; lpips=0.452538 ; repa=0.818803 ; sum_loss=0.614905]] +[T_total=00:08:02 | T_train=00:07:39 | T_epoch=00:07:39] Epoch 0, batch 801 / 6666 (step 800) loss=0.597575 (avg=0.5976) [[all losses: diffusion=0.172305 ; kl=2230.87 ; lpips=0.441734 ; repa=0.808688 ; sum_loss=0.597575]] +[T_total=00:08:58 | T_train=00:08:35 | T_epoch=00:08:35] Epoch 0, batch 901 / 6666 (step 900) loss=0.582951 (avg=0.583) [[all losses: diffusion=0.164504 ; kl=2349.25 ; lpips=0.432249 ; repa=0.799892 ; sum_loss=0.582951]] +[T_total=00:09:54 | T_train=00:09:32 | T_epoch=00:09:32] Epoch 0, batch 1001 / 6666 (step 1000) loss=0.570332 (avg=0.5703) [[all losses: diffusion=0.157891 ; kl=2442.99 ; lpips=0.423963 ; repa=0.792065 ; sum_loss=0.570332]] +[T_total=00:10:52 | T_train=00:10:29 | T_epoch=00:10:29] Epoch 0, batch 1101 / 6666 (step 1100) loss=0.559321 (avg=0.5593) [[all losses: diffusion=0.152515 ; kl=2537.36 ; lpips=0.416044 ; repa=0.784987 ; sum_loss=0.559321]] +[T_total=00:11:48 | T_train=00:11:25 | T_epoch=00:11:25] Epoch 0, batch 1201 / 6666 (step 1200) loss=0.549218 (avg=0.5492) [[all losses: diffusion=0.147607 ; kl=2634.4 ; lpips=0.408684 ; repa=0.778538 ; sum_loss=0.549218]] +[T_total=00:12:45 | T_train=00:12:22 | T_epoch=00:12:22] Epoch 0, batch 1301 / 6666 (step 1300) loss=0.539921 (avg=0.5399) [[all losses: diffusion=0.143338 ; kl=2728.43 ; lpips=0.401431 ; repa=0.772559 ; sum_loss=0.539921]] +[T_total=00:13:41 | T_train=00:13:18 | T_epoch=00:13:18] Epoch 0, batch 1401 / 6666 (step 1400) loss=0.531344 (avg=0.5313) [[all losses: diffusion=0.139529 ; kl=2818.26 ; lpips=0.394478 ; repa=0.76703 ; sum_loss=0.531344]] +[T_total=00:14:37 | T_train=00:14:15 | T_epoch=00:14:15] Epoch 0, batch 1501 / 6666 (step 1500) loss=0.523272 (avg=0.5233) [[all losses: diffusion=0.136069 ; kl=2900.7 ; lpips=0.387695 ; repa=0.76182 ; sum_loss=0.523272]] +[T_total=00:15:34 | T_train=00:15:11 | T_epoch=00:15:11] Epoch 0, batch 1601 / 6666 (step 1600) loss=0.515761 (avg=0.5158) [[all losses: diffusion=0.133069 ; kl=2976.58 ; lpips=0.380968 ; repa=0.756927 ; sum_loss=0.515761]] +[T_total=00:16:30 | T_train=00:16:07 | T_epoch=00:16:07] Epoch 0, batch 1701 / 6666 (step 1700) loss=0.508649 (avg=0.5086) [[all losses: diffusion=0.130122 ; kl=3044.74 ; lpips=0.37482 ; repa=0.75229 ; sum_loss=0.508649]] +[T_total=00:17:26 | T_train=00:17:03 | T_epoch=00:17:03] Epoch 0, batch 1801 / 6666 (step 1800) loss=0.502034 (avg=0.502) [[all losses: diffusion=0.127541 ; kl=3105.58 ; lpips=0.368816 ; repa=0.747917 ; sum_loss=0.502034]] +[T_total=00:18:23 | T_train=00:18:00 | T_epoch=00:18:00] Epoch 0, batch 1901 / 6666 (step 1900) loss=0.495807 (avg=0.4958) [[all losses: diffusion=0.125148 ; kl=3158.68 ; lpips=0.363137 ; repa=0.743726 ; sum_loss=0.495807]] +[T_total=00:19:19 | T_train=00:18:56 | T_epoch=00:18:56] Epoch 0, batch 2001 / 6666 (step 2000) loss=0.49 (avg=0.49) [[all losses: diffusion=0.122931 ; kl=3204.61 ; lpips=0.357845 ; repa=0.739768 ; sum_loss=0.49]] +[T_total=00:20:15 | T_train=00:19:52 | T_epoch=00:19:52] Epoch 0, batch 2101 / 6666 (step 2100) loss=0.484527 (avg=0.4845) [[all losses: diffusion=0.120915 ; kl=3244.74 ; lpips=0.352743 ; repa=0.735983 ; sum_loss=0.484527]] +[T_total=00:21:11 | T_train=00:20:49 | T_epoch=00:20:49] Epoch 0, batch 2201 / 6666 (step 2200) loss=0.47943 (avg=0.4794) [[all losses: diffusion=0.119106 ; kl=3279.79 ; lpips=0.347893 ; repa=0.732387 ; sum_loss=0.47943]] +[T_total=00:22:08 | T_train=00:21:45 | T_epoch=00:21:45] Epoch 0, batch 2301 / 6666 (step 2300) loss=0.474568 (avg=0.4746) [[all losses: diffusion=0.117372 ; kl=3310.48 ; lpips=0.343335 ; repa=0.728871 ; sum_loss=0.474568]] +[T_total=00:23:04 | T_train=00:22:41 | T_epoch=00:22:41] Epoch 0, batch 2401 / 6666 (step 2400) loss=0.469974 (avg=0.47) [[all losses: diffusion=0.11575 ; kl=3337.3 ; lpips=0.338999 ; repa=0.725549 ; sum_loss=0.469974]] +[T_total=00:24:00 | T_train=00:23:38 | T_epoch=00:23:38] Epoch 0, batch 2501 / 6666 (step 2500) loss=0.465628 (avg=0.4656) [[all losses: diffusion=0.114223 ; kl=3360.59 ; lpips=0.334917 ; repa=0.722341 ; sum_loss=0.465628]] +[T_total=00:24:57 | T_train=00:24:34 | T_epoch=00:24:34] Epoch 0, batch 2601 / 6666 (step 2600) loss=0.461504 (avg=0.4615) [[all losses: diffusion=0.112836 ; kl=3381.18 ; lpips=0.330923 ; repa=0.719304 ; sum_loss=0.461504]] +[T_total=00:25:53 | T_train=00:25:30 | T_epoch=00:25:30] Epoch 0, batch 2701 / 6666 (step 2700) loss=0.457538 (avg=0.4575) [[all losses: diffusion=0.111475 ; kl=3399.33 ; lpips=0.327182 ; repa=0.716292 ; sum_loss=0.457538]] +[T_total=00:26:49 | T_train=00:26:26 | T_epoch=00:26:26] Epoch 0, batch 2801 / 6666 (step 2800) loss=0.453829 (avg=0.4538) [[all losses: diffusion=0.110163 ; kl=3415.29 ; lpips=0.323765 ; repa=0.713471 ; sum_loss=0.453829]] +[T_total=00:27:45 | T_train=00:27:23 | T_epoch=00:27:23] Epoch 0, batch 2901 / 6666 (step 2900) loss=0.450269 (avg=0.4503) [[all losses: diffusion=0.108964 ; kl=3429.59 ; lpips=0.320398 ; repa=0.710705 ; sum_loss=0.450269]] +[T_total=00:28:42 | T_train=00:28:19 | T_epoch=00:28:19] Epoch 0, batch 3001 / 6666 (step 3000) loss=0.446893 (avg=0.4469) [[all losses: diffusion=0.107824 ; kl=3442.41 ; lpips=0.317225 ; repa=0.708055 ; sum_loss=0.446893]] +[T_total=00:29:38 | T_train=00:29:15 | T_epoch=00:29:15] Epoch 0, batch 3101 / 6666 (step 3100) loss=0.443665 (avg=0.4437) [[all losses: diffusion=0.106783 ; kl=3454.02 ; lpips=0.314116 ; repa=0.705479 ; sum_loss=0.443665]] +[T_total=00:30:35 | T_train=00:30:12 | T_epoch=00:30:12] Epoch 0, batch 3201 / 6666 (step 3200) loss=0.440552 (avg=0.4406) [[all losses: diffusion=0.105741 ; kl=3464.54 ; lpips=0.311189 ; repa=0.703011 ; sum_loss=0.440552]] +[T_total=00:31:31 | T_train=00:31:08 | T_epoch=00:31:08] Epoch 0, batch 3301 / 6666 (step 3300) loss=0.437604 (avg=0.4376) [[all losses: diffusion=0.104839 ; kl=3474.21 ; lpips=0.308268 ; repa=0.700626 ; sum_loss=0.437604]] +[T_total=00:32:27 | T_train=00:32:04 | T_epoch=00:32:04] Epoch 0, batch 3401 / 6666 (step 3400) loss=0.434757 (avg=0.4348) [[all losses: diffusion=0.103897 ; kl=3483.14 ; lpips=0.305582 ; repa=0.698346 ; sum_loss=0.434757]] +[T_total=00:33:23 | T_train=00:33:01 | T_epoch=00:33:01] Epoch 0, batch 3501 / 6666 (step 3500) loss=0.43205 (avg=0.4321) [[all losses: diffusion=0.103008 ; kl=3491.6 ; lpips=0.303026 ; repa=0.696149 ; sum_loss=0.43205]] +[T_total=00:34:20 | T_train=00:33:57 | T_epoch=00:33:57] Epoch 0, batch 3601 / 6666 (step 3600) loss=0.429435 (avg=0.4294) [[all losses: diffusion=0.10218 ; kl=3499.29 ; lpips=0.300503 ; repa=0.694015 ; sum_loss=0.429435]] +[T_total=00:35:16 | T_train=00:34:53 | T_epoch=00:34:53] Epoch 0, batch 3701 / 6666 (step 3700) loss=0.426919 (avg=0.4269) [[all losses: diffusion=0.101412 ; kl=3506.53 ; lpips=0.298028 ; repa=0.691945 ; sum_loss=0.426919]] +[T_total=00:36:12 | T_train=00:35:49 | T_epoch=00:35:49] Epoch 0, batch 3801 / 6666 (step 3800) loss=0.424509 (avg=0.4245) [[all losses: diffusion=0.100691 ; kl=3513.3 ; lpips=0.295644 ; repa=0.689928 ; sum_loss=0.424509]] +[T_total=00:37:09 | T_train=00:36:46 | T_epoch=00:36:46] Epoch 0, batch 3901 / 6666 (step 3900) loss=0.422171 (avg=0.4222) [[all losses: diffusion=0.099949 ; kl=3519.71 ; lpips=0.293404 ; repa=0.688003 ; sum_loss=0.422171]] +[T_total=00:38:05 | T_train=00:37:42 | T_epoch=00:37:42] Epoch 0, batch 4001 / 6666 (step 4000) loss=0.419963 (avg=0.42) [[all losses: diffusion=0.0993082 ; kl=3525.71 ; lpips=0.291198 ; repa=0.686122 ; sum_loss=0.419963]] +[T_total=00:39:01 | T_train=00:38:39 | T_epoch=00:38:39] Epoch 0, batch 4101 / 6666 (step 4100) loss=0.41782 (avg=0.4178) [[all losses: diffusion=0.0986443 ; kl=3531.47 ; lpips=0.289135 ; repa=0.684307 ; sum_loss=0.41782]] +[T_total=00:39:58 | T_train=00:39:35 | T_epoch=00:39:35] Epoch 0, batch 4201 / 6666 (step 4200) loss=0.415725 (avg=0.4157) [[all losses: diffusion=0.0980084 ; kl=3536.88 ; lpips=0.287091 ; repa=0.682537 ; sum_loss=0.415725]] +[T_total=00:40:54 | T_train=00:40:31 | T_epoch=00:40:31] Epoch 0, batch 4301 / 6666 (step 4300) loss=0.41368 (avg=0.4137) [[all losses: diffusion=0.0973924 ; kl=3542.11 ; lpips=0.285092 ; repa=0.680797 ; sum_loss=0.41368]] +[T_total=00:41:50 | T_train=00:41:28 | T_epoch=00:41:28] Epoch 0, batch 4401 / 6666 (step 4400) loss=0.411734 (avg=0.4117) [[all losses: diffusion=0.0968228 ; kl=3546.92 ; lpips=0.283181 ; repa=0.679095 ; sum_loss=0.411734]] +[T_total=00:42:47 | T_train=00:42:24 | T_epoch=00:42:24] Epoch 0, batch 4501 / 6666 (step 4500) loss=0.409851 (avg=0.4099) [[all losses: diffusion=0.0962662 ; kl=3551.48 ; lpips=0.281339 ; repa=0.677455 ; sum_loss=0.409851]] +[T_total=00:43:43 | T_train=00:43:20 | T_epoch=00:43:20] Epoch 0, batch 4601 / 6666 (step 4600) loss=0.408028 (avg=0.408) [[all losses: diffusion=0.0957281 ; kl=3555.85 ; lpips=0.279561 ; repa=0.675855 ; sum_loss=0.408028]] +[T_total=00:44:39 | T_train=00:44:16 | T_epoch=00:44:16] Epoch 0, batch 4701 / 6666 (step 4700) loss=0.406272 (avg=0.4063) [[all losses: diffusion=0.095233 ; kl=3560.05 ; lpips=0.27781 ; repa=0.674295 ; sum_loss=0.406272]] +[T_total=00:45:36 | T_train=00:45:13 | T_epoch=00:45:13] Epoch 0, batch 4801 / 6666 (step 4800) loss=0.404548 (avg=0.4045) [[all losses: diffusion=0.0947084 ; kl=3564.01 ; lpips=0.276168 ; repa=0.672768 ; sum_loss=0.404548]] +[T_total=00:46:32 | T_train=00:46:09 | T_epoch=00:46:09] Epoch 0, batch 4901 / 6666 (step 4900) loss=0.402894 (avg=0.4029) [[all losses: diffusion=0.0942283 ; kl=3567.74 ; lpips=0.274555 ; repa=0.671282 ; sum_loss=0.402894]] +[T_total=00:47:28 | T_train=00:47:05 | T_epoch=00:47:05] Epoch 0, batch 5001 / 6666 (step 5000) loss=0.401258 (avg=0.4013) [[all losses: diffusion=0.0937469 ; kl=3571.27 ; lpips=0.272976 ; repa=0.669809 ; sum_loss=0.401258]] +[T_total=00:48:25 | T_train=00:48:02 | T_epoch=00:48:02] Epoch 0, batch 5101 / 6666 (step 5100) loss=0.399707 (avg=0.3997) [[all losses: diffusion=0.0933285 ; kl=3574.64 ; lpips=0.27141 ; repa=0.668394 ; sum_loss=0.399707]] +[T_total=00:49:21 | T_train=00:48:58 | T_epoch=00:48:58] Epoch 0, batch 5201 / 6666 (step 5200) loss=0.398184 (avg=0.3982) [[all losses: diffusion=0.0928949 ; kl=3577.82 ; lpips=0.269917 ; repa=0.66701 ; sum_loss=0.398184]] +[T_total=00:50:17 | T_train=00:49:54 | T_epoch=00:49:54] Epoch 0, batch 5301 / 6666 (step 5300) loss=0.396703 (avg=0.3967) [[all losses: diffusion=0.0924865 ; kl=3580.87 ; lpips=0.268439 ; repa=0.665665 ; sum_loss=0.396703]] +[T_total=00:51:13 | T_train=00:50:51 | T_epoch=00:50:51] Epoch 0, batch 5401 / 6666 (step 5400) loss=0.395267 (avg=0.3953) [[all losses: diffusion=0.0920777 ; kl=3583.87 ; lpips=0.267041 ; repa=0.664337 ; sum_loss=0.395267]] +[T_total=00:52:10 | T_train=00:51:47 | T_epoch=00:51:47] Epoch 0, batch 5501 / 6666 (step 5500) loss=0.393835 (avg=0.3938) [[all losses: diffusion=0.0916643 ; kl=3586.74 ; lpips=0.265655 ; repa=0.663026 ; sum_loss=0.393835]] +[T_total=00:53:06 | T_train=00:52:43 | T_epoch=00:52:43] Epoch 0, batch 5601 / 6666 (step 5600) loss=0.392469 (avg=0.3925) [[all losses: diffusion=0.0912773 ; kl=3589.46 ; lpips=0.264322 ; repa=0.661763 ; sum_loss=0.392469]] +[T_total=00:54:02 | T_train=00:53:39 | T_epoch=00:53:39] Epoch 0, batch 5701 / 6666 (step 5700) loss=0.391126 (avg=0.3911) [[all losses: diffusion=0.0909149 ; kl=3591.94 ; lpips=0.262987 ; repa=0.660502 ; sum_loss=0.391126]] +[T_total=00:54:59 | T_train=00:54:36 | T_epoch=00:54:36] Epoch 0, batch 5801 / 6666 (step 5800) loss=0.389827 (avg=0.3898) [[all losses: diffusion=0.0905627 ; kl=3594.27 ; lpips=0.261706 ; repa=0.65927 ; sum_loss=0.389827]] +[T_total=00:55:55 | T_train=00:55:32 | T_epoch=00:55:32] Epoch 0, batch 5901 / 6666 (step 5900) loss=0.388544 (avg=0.3885) [[all losses: diffusion=0.0901877 ; kl=3596.69 ; lpips=0.260487 ; repa=0.658063 ; sum_loss=0.388544]] +[T_total=00:56:51 | T_train=00:56:28 | T_epoch=00:56:28] Epoch 0, batch 6001 / 6666 (step 6000) loss=0.387302 (avg=0.3873) [[all losses: diffusion=0.08984 ; kl=3598.92 ; lpips=0.259284 ; repa=0.656885 ; sum_loss=0.387302]] +[T_total=00:57:47 | T_train=00:57:24 | T_epoch=00:57:24] Epoch 0, batch 6101 / 6666 (step 6100) loss=0.386124 (avg=0.3861) [[all losses: diffusion=0.0895414 ; kl=3601.03 ; lpips=0.25809 ; repa=0.655745 ; sum_loss=0.386124]] +[T_total=00:58:44 | T_train=00:58:21 | T_epoch=00:58:21] Epoch 0, batch 6201 / 6666 (step 6200) loss=0.384914 (avg=0.3849) [[all losses: diffusion=0.089197 ; kl=3603.1 ; lpips=0.256929 ; repa=0.6546 ; sum_loss=0.384914]] +[T_total=00:59:40 | T_train=00:59:17 | T_epoch=00:59:17] Epoch 0, batch 6301 / 6666 (step 6300) loss=0.383746 (avg=0.3837) [[all losses: diffusion=0.0888722 ; kl=3605.04 ; lpips=0.255792 ; repa=0.65349 ; sum_loss=0.383746]] +[T_total=01:00:36 | T_train=01:00:13 | T_epoch=01:00:13] Epoch 0, batch 6401 / 6666 (step 6400) loss=0.382618 (avg=0.3826) [[all losses: diffusion=0.0885434 ; kl=3606.91 ; lpips=0.254735 ; repa=0.6524 ; sum_loss=0.382618]] +[T_total=01:01:33 | T_train=01:01:10 | T_epoch=01:01:10] Epoch 0, batch 6501 / 6666 (step 6500) loss=0.381519 (avg=0.3815) [[all losses: diffusion=0.0882497 ; kl=3608.7 ; lpips=0.253659 ; repa=0.651325 ; sum_loss=0.381519]] +[T_total=01:02:29 | T_train=01:02:06 | T_epoch=01:02:06] Epoch 0, batch 6601 / 6666 (step 6600) loss=0.38043 (avg=0.3804) [[all losses: diffusion=0.0879526 ; kl=3610.53 ; lpips=0.2526 ; repa=0.650266 ; sum_loss=0.38043]] +[2025-10-24 12:31:01,697][main][INFO] - [T_total=01:03:06 | T_train=01:02:43 | T_epoch=01:02:43] End of epoch 0 (6666 steps) train loss 0.379739 +[2025-10-24 12:31:01,700][main][INFO] - [Epoch 0] All losses: [[diffusion=0.0877689 ; kl=3611.6 ; lpips=0.251927 ; repa=0.64958]] + Reconstructing from test set: 0%| | 0/261 [00:00 + sys.exit(main()) + ^^^^^^ + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/accelerate/commands/accelerate_cli.py", line 50, in main + args.func(args) + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/accelerate/commands/launch.py", line 1226, in launch_command + multi_gpu_launcher(args) + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/accelerate/commands/launch.py", line 853, in multi_gpu_launcher + distrib_run.run(args) + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/distributed/run.py", line 927, in run + elastic_launch( + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/distributed/launcher/api.py", line 156, in __call__ + return launch_agent(self._config, self._entrypoint, list(args)) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/distributed/launcher/api.py", line 284, in launch_agent + result = agent.run() + ^^^^^^^^^^^ + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/distributed/elastic/metrics/api.py", line 138, in wrapper + result = f(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^ + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/distributed/elastic/agent/server/api.py", line 717, in run + result = self._invoke_run(role) + ^^^^^^^^^^^^^^^^^^^^^^ + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/distributed/elastic/agent/server/api.py", line 881, in _invoke_run + time.sleep(monitor_interval) + File "/workspace/miniconda3/envs/DC_SSDAE/lib/python3.12/site-packages/torch/distributed/elastic/multiprocessing/api.py", line 85, in _terminate_process_handler + raise SignalException(f"Process {os.getpid()} got signal: {sigval}", sigval=sigval) +torch.distributed.elastic.multiprocessing.api.SignalException: Process 48843 got signal: 15