Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- Rectified_Noise/GVP-Disp/README.md +92 -0
- Rectified_Noise/GVP-Disp/W_False.log +5 -0
- Rectified_Noise/GVP-Disp/evaluate_samples.sh +65 -0
- Rectified_Noise/GVP-Disp/evaluator.py +689 -0
- Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-000-SiT-XL-2-GVP-velocity-None/log.txt +11 -0
- Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-001-SiT-XL-2-GVP-velocity-None/log.txt +1 -0
- Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/log.txt +500 -0
- Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-003-SiT-XL-2-GVP-velocity-None/log.txt +6 -0
- Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None/log.txt +863 -0
- Rectified_Noise/GVP-Disp/run.sh +14 -0
- Rectified_Noise/GVP-Disp/test.sh +78 -0
- Rectified_Noise/GVP-Disp/train_rectified_noise.py +429 -0
- Rectified_Noise/GVP-Disp/transport/__init__.py +71 -0
- Rectified_Noise/GVP-Disp/transport/__pycache__/__init__.cpython-312.pyc +0 -0
- Rectified_Noise/GVP-Disp/transport/__pycache__/__init__.cpython-38.pyc +0 -0
- Rectified_Noise/GVP-Disp/transport/__pycache__/integrators.cpython-312.pyc +0 -0
- Rectified_Noise/GVP-Disp/transport/__pycache__/integrators.cpython-38.pyc +0 -0
- Rectified_Noise/GVP-Disp/transport/__pycache__/path.cpython-312.pyc +0 -0
- Rectified_Noise/GVP-Disp/transport/__pycache__/path.cpython-38.pyc +0 -0
- Rectified_Noise/GVP-Disp/transport/__pycache__/transport.cpython-312.pyc +0 -0
- Rectified_Noise/GVP-Disp/transport/__pycache__/transport.cpython-38.pyc +0 -0
- Rectified_Noise/GVP-Disp/transport/__pycache__/utils.cpython-312.pyc +0 -0
- Rectified_Noise/GVP-Disp/transport/__pycache__/utils.cpython-38.pyc +0 -0
- Rectified_Noise/GVP-Disp/transport/integrators.py +117 -0
- Rectified_Noise/GVP-Disp/transport/path.py +192 -0
- Rectified_Noise/GVP-Disp/transport/transport.py +501 -0
- Rectified_Noise/GVP-Disp/transport/utils.py +29 -0
- Rectified_Noise/GVP-Disp/w_training1.log +927 -0
- Rectified_Noise/VP-Disp/README.md +92 -0
- Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000059.png +0 -0
- Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000169.png +0 -0
- Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000286.png +0 -0
- Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000545.png +0 -0
- Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000606.png +0 -0
- Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000769.png +0 -0
- Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001050.png +0 -0
- Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001099.png +0 -0
- Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001346.png +0 -0
- Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001475.png +0 -0
- Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001518.png +0 -0
- Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001644.png +0 -0
- Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001741.png +0 -0
- Rectified_Noise/VP-Disp/W_False.log +5 -0
- Rectified_Noise/VP-Disp/W_No.log +5 -0
- Rectified_Noise/VP-Disp/W_True_0.15.log +5 -0
- Rectified_Noise/VP-Disp/W_True_0.5.log +5 -0
- Rectified_Noise/VP-Disp/download.py +41 -0
- Rectified_Noise/VP-Disp/environment.yml +16 -0
- Rectified_Noise/VP-Disp/evaluate_samples.sh +65 -0
- Rectified_Noise/VP-Disp/evaluator.py +689 -0
Rectified_Noise/GVP-Disp/README.md
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# [AAAI 2026] Rectified Noise: A Generative Model Using Positive-incentive Noise
|
| 2 |
+
|
| 3 |
+

|
| 4 |
+
|
| 5 |
+
<br>
|
| 6 |
+
<a href="https://arxiv.org/pdf/2511.07911"><img src="https://img.shields.io/static/v1?label=Paper&message=2511.07911&color=red&logo=arxiv"></a>
|
| 7 |
+
<a href="https://huggingface.co/xiangzai/recitified_noise"><img src="https://img.shields.io/badge/🤗_HuggingFace-Model-ffbd45.svg" alt="HuggingFace"></a>
|
| 8 |
+
|
| 9 |
+
## Introduction
|
| 10 |
+
This is a [Pytorch](https://pytorch.org) implementation of **Rectified Noise**, a generative model using positive-incentive noise to enhance model's sampling.
|
| 11 |
+
|
| 12 |
+

|
| 13 |
+
|
| 14 |
+
## Setup
|
| 15 |
+
|
| 16 |
+
We provide an `environment.yml` file that can be used to create a Conda environment.
|
| 17 |
+
|
| 18 |
+
```bash
|
| 19 |
+
conda env create -f environment.yml
|
| 20 |
+
conda activate RN
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
## Usage
|
| 24 |
+
|
| 25 |
+
### Training
|
| 26 |
+
1. We provide a training script for RN in `train_rectified_noise.py`
|
| 27 |
+
|
| 28 |
+
Run:
|
| 29 |
+
|
| 30 |
+
```bash
|
| 31 |
+
torchrun --nnodes=1 --nproc_per_node=4 train_rectified_noise.py \
|
| 32 |
+
--data-path /path/to/data \
|
| 33 |
+
--num-classes 3 \
|
| 34 |
+
--path-type Linear \
|
| 35 |
+
--prediction velocity \
|
| 36 |
+
--ckpt /path/to/pretrained_model \
|
| 37 |
+
--model SiT-B/2
|
| 38 |
+
--learn-mu True \
|
| 39 |
+
--depth 1 \
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
You can find relevant checkpoint files from the previous Hugging Face link.
|
| 43 |
+
|
| 44 |
+
2. Parameters:
|
| 45 |
+
|
| 46 |
+
| Argument | Type | Default | Description |
|
| 47 |
+
|----------|------|---------|-------------|
|
| 48 |
+
| `--data-path ` | str | `-` | Path to the dataset. |
|
| 49 |
+
| `--num-classes` | int | `-` | Number of classes. |
|
| 50 |
+
| `--path-type` | str | `Linear` | Directory to save the generated images. |
|
| 51 |
+
| `--prediction` | str | `velocity` | Output type of network. |
|
| 52 |
+
| `--ckpt` | str | `-` | Path to pretrained model checkpoint. |
|
| 53 |
+
| `--model` | str | `SiT-B/2` | Model type, any option from the model list. |
|
| 54 |
+
| `--learn-mu` | bool | `True` | Whether to learn the mu parameter. |
|
| 55 |
+
| `--depth` | int | `1` | Depth parameter for the SiTF2 model(Extra SiT Block). |
|
| 56 |
+
|
| 57 |
+
**Sampling**
|
| 58 |
+
|
| 59 |
+
1. Using the trained RN model to enhance the pre-trained model
|
| 60 |
+
|
| 61 |
+
```bash
|
| 62 |
+
torchrun --nnodes=1 --nproc_per_node=4 train_rectified_noise.py \
|
| 63 |
+
--path-type Linear \
|
| 64 |
+
--prediction velocity \
|
| 65 |
+
--ckpt /path/to/pretrained_model \
|
| 66 |
+
--sitf2-ckpt /path/to/pretrained_RN \
|
| 67 |
+
--model SiT-B/2
|
| 68 |
+
--learn-mu True \
|
| 69 |
+
--depth 1 \
|
| 70 |
+
```
|
| 71 |
+
|
| 72 |
+
## Ackownledgement
|
| 73 |
+
This repo benefits from [SiT](https://github.com/willisma/SiT). Thanks for their excellent works.
|
| 74 |
+
|
| 75 |
+
## Contact
|
| 76 |
+
If you have any question about this project, please contact mguzhenyu@outlook.com.
|
| 77 |
+
|
| 78 |
+
## Citation
|
| 79 |
+
|
| 80 |
+
If you find the code useful for your research, please consider citing our work:
|
| 81 |
+
|
| 82 |
+
```
|
| 83 |
+
@misc{gu2025rectifiednoisegenerativemodel,
|
| 84 |
+
title={Rectified Noise: A Generative Model Using Positive-incentive Noise},
|
| 85 |
+
author={Zhenyu Gu and Yanchen Xu and Sida Huang and Yubin Guo and Hongyuan Zhang},
|
| 86 |
+
year={2025},
|
| 87 |
+
eprint={2511.07911},
|
| 88 |
+
archivePrefix={arXiv},
|
| 89 |
+
primaryClass={cs.LG},
|
| 90 |
+
url={https://arxiv.org/abs/2511.07911},
|
| 91 |
+
}
|
| 92 |
+
```
|
Rectified_Noise/GVP-Disp/W_False.log
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
0%| | 0/47 [00:00<?, ?it/s]
|
| 1 |
2%|▏ | 1/47 [01:34<1:12:31, 94.60s/it]
|
| 2 |
4%|▍ | 2/47 [03:08<1:10:33, 94.07s/it]
|
| 3 |
6%|▋ | 3/47 [04:42<1:08:52, 93.93s/it]
|
| 4 |
9%|▊ | 4/47 [06:15<1:07:13, 93.79s/it]
|
| 5 |
11%|█ | 5/47 [07:49<1:05:36, 93.72s/it]
|
| 6 |
13%|█▎ | 6/47 [09:22<1:04:00, 93.67s/it]
|
| 7 |
15%|█▍ | 7/47 [10:56<1:02:26, 93.67s/it]
|
| 8 |
17%|█▋ | 8/47 [12:30<1:00:51, 93.62s/it]
|
| 9 |
19%|█▉ | 9/47 [14:03<59:16, 93.60s/it]
|
| 10 |
21%|██▏ | 10/47 [15:37<57:43, 93.62s/it]
|
| 11 |
23%|██▎ | 11/47 [17:10<56:10, 93.62s/it]
|
| 12 |
26%|██▌ | 12/47 [18:44<54:36, 93.63s/it]
|
| 13 |
28%|██▊ | 13/47 [20:18<53:02, 93.61s/it]
|
| 14 |
30%|██▉ | 14/47 [21:51<51:30, 93.64s/it]
|
| 15 |
32%|███▏ | 15/47 [23:25<49:57, 93.66s/it]
|
| 16 |
34%|███▍ | 16/47 [24:58<48:21, 93.61s/it]
|
| 17 |
36%|███▌ | 17/47 [26:32<46:50, 93.68s/it]
|
| 18 |
38%|███▊ | 18/47 [28:06<45:17, 93.71s/it]
|
| 19 |
40%|████ | 19/47 [29:40<43:43, 93.70s/it]
|
|
|
|
| 1 |
+
[NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
|
| 2 |
+
Starting rank=0, seed=0, world_size=1.
|
| 3 |
+
Saving .png samples at GVP_samples/depth-mu-2-threshold-1.0-0025000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04
|
| 4 |
+
Total number of images that will be sampled: 3008
|
| 5 |
+
|
| 6 |
0%| | 0/47 [00:00<?, ?it/s]
|
| 7 |
2%|▏ | 1/47 [01:34<1:12:31, 94.60s/it]
|
| 8 |
4%|▍ | 2/47 [03:08<1:10:33, 94.07s/it]
|
| 9 |
6%|▋ | 3/47 [04:42<1:08:52, 93.93s/it]
|
| 10 |
9%|▊ | 4/47 [06:15<1:07:13, 93.79s/it]
|
| 11 |
11%|█ | 5/47 [07:49<1:05:36, 93.72s/it]
|
| 12 |
13%|█▎ | 6/47 [09:22<1:04:00, 93.67s/it]
|
| 13 |
15%|█▍ | 7/47 [10:56<1:02:26, 93.67s/it]
|
| 14 |
17%|█▋ | 8/47 [12:30<1:00:51, 93.62s/it]
|
| 15 |
19%|█▉ | 9/47 [14:03<59:16, 93.60s/it]
|
| 16 |
21%|██▏ | 10/47 [15:37<57:43, 93.62s/it]
|
| 17 |
23%|██▎ | 11/47 [17:10<56:10, 93.62s/it]
|
| 18 |
26%|██▌ | 12/47 [18:44<54:36, 93.63s/it]
|
| 19 |
28%|██▊ | 13/47 [20:18<53:02, 93.61s/it]
|
| 20 |
30%|██▉ | 14/47 [21:51<51:30, 93.64s/it]
|
| 21 |
32%|███▏ | 15/47 [23:25<49:57, 93.66s/it]
|
| 22 |
34%|███▍ | 16/47 [24:58<48:21, 93.61s/it]
|
| 23 |
36%|███▌ | 17/47 [26:32<46:50, 93.68s/it]
|
| 24 |
38%|███▊ | 18/47 [28:06<45:17, 93.71s/it]
|
| 25 |
40%|████ | 19/47 [29:40<43:43, 93.70s/it]
|
Rectified_Noise/GVP-Disp/evaluate_samples.sh
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Execute all evaluation tasks in parallel
|
| 4 |
+
# Each command runs in the background using &
|
| 5 |
+
|
| 6 |
+
echo "Starting all evaluation tasks in parallel..."
|
| 7 |
+
|
| 8 |
+
# Reference batch path
|
| 9 |
+
REF_BATCH="/gemini/space/zhaozy/zhy/dataset/VIRTUAL_imagenet256_labeled.npz"
|
| 10 |
+
|
| 11 |
+
# Base directory for sample files
|
| 12 |
+
SAMPLE_DIR="/gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise/last_samples_depth_2_gvp_0.5"
|
| 13 |
+
|
| 14 |
+
# Change to the project root directory
|
| 15 |
+
cd /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching
|
| 16 |
+
|
| 17 |
+
# Evaluate threshold 0.0 on GPU 0
|
| 18 |
+
CUDA_VISIBLE_DEVICES=0 nohup python evaluator.py \
|
| 19 |
+
--ref_batch ${REF_BATCH} \
|
| 20 |
+
--sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.0-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
|
| 21 |
+
> eval_threshold_0.0.log 2>&1 &
|
| 22 |
+
|
| 23 |
+
# Evaluate threshold 0.15 on GPU 1
|
| 24 |
+
CUDA_VISIBLE_DEVICES=1 nohup python evaluator.py \
|
| 25 |
+
--ref_batch ${REF_BATCH} \
|
| 26 |
+
--sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.15-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
|
| 27 |
+
> eval_threshold_0.15.log 2>&1 &
|
| 28 |
+
|
| 29 |
+
# Evaluate threshold 0.25 on GPU 2
|
| 30 |
+
CUDA_VISIBLE_DEVICES=2 nohup python evaluator.py \
|
| 31 |
+
--ref_batch ${REF_BATCH} \
|
| 32 |
+
--sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.25-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
|
| 33 |
+
> eval_threshold_0.25.log 2>&1 &
|
| 34 |
+
|
| 35 |
+
# Evaluate threshold 0.5 on GPU 3
|
| 36 |
+
CUDA_VISIBLE_DEVICES=3 nohup python evaluator.py \
|
| 37 |
+
--ref_batch ${REF_BATCH} \
|
| 38 |
+
--sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.5-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
|
| 39 |
+
> eval_threshold_0.5.log 2>&1 &
|
| 40 |
+
|
| 41 |
+
# Evaluate threshold 0.75 on GPU 4
|
| 42 |
+
CUDA_VISIBLE_DEVICES=0 nohup python evaluator.py \
|
| 43 |
+
--ref_batch ${REF_BATCH} \
|
| 44 |
+
--sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.75-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
|
| 45 |
+
> eval_threshold_0.75.log 2>&1 &
|
| 46 |
+
|
| 47 |
+
# Evaluate threshold 1.0 on GPU 5
|
| 48 |
+
CUDA_VISIBLE_DEVICES=1 nohup python evaluator.py \
|
| 49 |
+
--ref_batch ${REF_BATCH} \
|
| 50 |
+
--sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-1.0-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
|
| 51 |
+
> eval_threshold_1.0.log 2>&1 &
|
| 52 |
+
|
| 53 |
+
# Wait for all background jobs to complete
|
| 54 |
+
echo "All evaluation tasks started. Waiting for completion..."
|
| 55 |
+
wait
|
| 56 |
+
|
| 57 |
+
echo "All evaluation tasks completed!"
|
| 58 |
+
echo ""
|
| 59 |
+
echo "Results saved in:"
|
| 60 |
+
echo " - eval_threshold_0.0.log"
|
| 61 |
+
echo " - eval_threshold_0.15.log"
|
| 62 |
+
echo " - eval_threshold_0.25.log"
|
| 63 |
+
echo " - eval_threshold_0.5.log"
|
| 64 |
+
echo " - eval_threshold_0.75.log"
|
| 65 |
+
echo " - eval_threshold_1.0.log"
|
Rectified_Noise/GVP-Disp/evaluator.py
ADDED
|
@@ -0,0 +1,689 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import io
|
| 3 |
+
import os
|
| 4 |
+
import random
|
| 5 |
+
import warnings
|
| 6 |
+
import zipfile
|
| 7 |
+
from abc import ABC, abstractmethod
|
| 8 |
+
from contextlib import contextmanager
|
| 9 |
+
from functools import partial
|
| 10 |
+
from multiprocessing import cpu_count
|
| 11 |
+
from multiprocessing.pool import ThreadPool
|
| 12 |
+
from typing import Iterable, Optional, Tuple, Union
|
| 13 |
+
|
| 14 |
+
import numpy as np
|
| 15 |
+
import requests
|
| 16 |
+
import tensorflow.compat.v1 as tf
|
| 17 |
+
from scipy import linalg
|
| 18 |
+
from tqdm.auto import tqdm
|
| 19 |
+
from datetime import timedelta
|
| 20 |
+
import torch
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
INCEPTION_V3_URL = "https://openaipublic.blob.core.windows.net/diffusion/jul-2021/ref_batches/classify_image_graph_def.pb"
|
| 25 |
+
INCEPTION_V3_PATH = "classify_image_graph_def.pb"
|
| 26 |
+
|
| 27 |
+
FID_POOL_NAME = "pool_3:0"
|
| 28 |
+
FID_SPATIAL_NAME = "mixed_6/conv:0"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def main():
|
| 32 |
+
parser = argparse.ArgumentParser()
|
| 33 |
+
parser.add_argument("--ref_batch", default='/gemini/space/zhaozy/zhy/dataset/VIRTUAL_imagenet256_labeled.npz',help="path to reference batch npz file")
|
| 34 |
+
parser.add_argument("--sample_batch", default='/gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise/last_samples_depth_2/depth-mu-28-0050000-2000000-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz', help="path to sample batch npz file")
|
| 35 |
+
args = parser.parse_args()
|
| 36 |
+
|
| 37 |
+
config = tf.ConfigProto(
|
| 38 |
+
allow_soft_placement=True # allows DecodeJpeg to run on CPU in Inception graph
|
| 39 |
+
)
|
| 40 |
+
config.gpu_options.allow_growth = True
|
| 41 |
+
evaluator = Evaluator(tf.Session(config=config))
|
| 42 |
+
|
| 43 |
+
print("warming up TensorFlow...")
|
| 44 |
+
# This will cause TF to print a bunch of verbose stuff now rather
|
| 45 |
+
# than after the next print(), to help prevent confusion.
|
| 46 |
+
evaluator.warmup()
|
| 47 |
+
|
| 48 |
+
print("computing reference batch activations...")
|
| 49 |
+
ref_acts = evaluator.read_activations(args.ref_batch)
|
| 50 |
+
print("computing/reading reference batch statistics...")
|
| 51 |
+
ref_stats, ref_stats_spatial = evaluator.read_statistics(args.ref_batch, ref_acts)
|
| 52 |
+
|
| 53 |
+
print("computing sample batch activations...")
|
| 54 |
+
sample_acts = evaluator.read_activations(args.sample_batch)
|
| 55 |
+
print("computing/reading sample batch statistics...")
|
| 56 |
+
sample_stats, sample_stats_spatial = evaluator.read_statistics(args.sample_batch, sample_acts)
|
| 57 |
+
|
| 58 |
+
print("Computing evaluations...")
|
| 59 |
+
#print("Inception Score:", evaluator.compute_inception_score(sample_acts[0]))
|
| 60 |
+
print("FID:", sample_stats.frechet_distance(ref_stats))
|
| 61 |
+
#print("sFID:", sample_stats_spatial.frechet_distance(ref_stats_spatial))
|
| 62 |
+
#prec, recall = evaluator.compute_prec_recall(ref_acts[0], sample_acts[0])
|
| 63 |
+
#print("Precision:", prec)
|
| 64 |
+
#print("Recall:", recall)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class InvalidFIDException(Exception):
|
| 68 |
+
pass
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class FIDStatistics:
|
| 72 |
+
def __init__(self, mu: np.ndarray, sigma: np.ndarray):
|
| 73 |
+
self.mu = mu
|
| 74 |
+
self.sigma = sigma
|
| 75 |
+
|
| 76 |
+
def frechet_distance(self, other, eps=1e-6):
|
| 77 |
+
"""
|
| 78 |
+
Compute the Frechet distance between two sets of statistics.
|
| 79 |
+
"""
|
| 80 |
+
# https://github.com/bioinf-jku/TTUR/blob/73ab375cdf952a12686d9aa7978567771084da42/fid.py#L132
|
| 81 |
+
mu1, sigma1 = self.mu, self.sigma
|
| 82 |
+
mu2, sigma2 = other.mu, other.sigma
|
| 83 |
+
|
| 84 |
+
mu1 = np.atleast_1d(mu1)
|
| 85 |
+
mu2 = np.atleast_1d(mu2)
|
| 86 |
+
|
| 87 |
+
sigma1 = np.atleast_2d(sigma1)
|
| 88 |
+
sigma2 = np.atleast_2d(sigma2)
|
| 89 |
+
|
| 90 |
+
assert (
|
| 91 |
+
mu1.shape == mu2.shape
|
| 92 |
+
), f"Training and test mean vectors have different lengths: {mu1.shape}, {mu2.shape}"
|
| 93 |
+
assert (
|
| 94 |
+
sigma1.shape == sigma2.shape
|
| 95 |
+
), f"Training and test covariances have different dimensions: {sigma1.shape}, {sigma2.shape}"
|
| 96 |
+
|
| 97 |
+
diff = mu1 - mu2
|
| 98 |
+
|
| 99 |
+
# product might be almost singular
|
| 100 |
+
covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
|
| 101 |
+
if not np.isfinite(covmean).all():
|
| 102 |
+
msg = (
|
| 103 |
+
"fid calculation produces singular product; adding %s to diagonal of cov estimates"
|
| 104 |
+
% eps
|
| 105 |
+
)
|
| 106 |
+
warnings.warn(msg)
|
| 107 |
+
offset = np.eye(sigma1.shape[0]) * eps
|
| 108 |
+
covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
|
| 109 |
+
|
| 110 |
+
# numerical error might give slight imaginary component
|
| 111 |
+
#虚部报错部分
|
| 112 |
+
if np.iscomplexobj(covmean):
|
| 113 |
+
if not np.allclose(np.diagonal(covmean).imag, 0, atol=1):
|
| 114 |
+
m = np.max(np.abs(covmean.imag))
|
| 115 |
+
print(f"Real component: {covmean.real}")
|
| 116 |
+
raise ValueError("Imaginary component {}".format(m))
|
| 117 |
+
covmean = covmean.real
|
| 118 |
+
|
| 119 |
+
tr_covmean = np.trace(covmean)
|
| 120 |
+
|
| 121 |
+
return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
class Evaluator:
|
| 125 |
+
def __init__(
|
| 126 |
+
self,
|
| 127 |
+
session,
|
| 128 |
+
batch_size=64,
|
| 129 |
+
softmax_batch_size=512,
|
| 130 |
+
):
|
| 131 |
+
self.sess = session
|
| 132 |
+
self.batch_size = batch_size
|
| 133 |
+
self.softmax_batch_size = softmax_batch_size
|
| 134 |
+
self.manifold_estimator = ManifoldEstimator(session)
|
| 135 |
+
with self.sess.graph.as_default():
|
| 136 |
+
self.image_input = tf.placeholder(tf.float32, shape=[None, None, None, 3])
|
| 137 |
+
self.softmax_input = tf.placeholder(tf.float32, shape=[None, 2048])
|
| 138 |
+
self.pool_features, self.spatial_features = _create_feature_graph(self.image_input)
|
| 139 |
+
self.softmax = _create_softmax_graph(self.softmax_input)
|
| 140 |
+
|
| 141 |
+
def warmup(self):
|
| 142 |
+
self.compute_activations(np.zeros([1, 8, 64, 64, 3]))
|
| 143 |
+
|
| 144 |
+
def read_activations(self, npz_path: Union[str, np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
|
| 145 |
+
if isinstance(npz_path, str):
|
| 146 |
+
# If npz_path is a string, treat it as a file path and read the .npz file
|
| 147 |
+
with open_npz_array(npz_path, "arr_0") as reader:
|
| 148 |
+
return self.compute_activations(reader.read_batches(self.batch_size))
|
| 149 |
+
elif isinstance(npz_path, np.ndarray):
|
| 150 |
+
# If npz_path is a numpy array, split it into batches manually
|
| 151 |
+
print("--------line 140-----------")
|
| 152 |
+
batches = np.array_split(npz_path, range(self.batch_size, npz_path.shape[0], self.batch_size))
|
| 153 |
+
print("--------line 143-----------")
|
| 154 |
+
return self.compute_activations(batches)
|
| 155 |
+
else:
|
| 156 |
+
raise ValueError("npz_path must be either a file path (str) or a numpy array (np.ndarray)")
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def compute_activations(self, batches: Iterable[np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
|
| 160 |
+
"""
|
| 161 |
+
Compute image features for downstream evals.
|
| 162 |
+
|
| 163 |
+
:param batches: a iterator over NHWC numpy arrays in [0, 255].
|
| 164 |
+
:return: a tuple of numpy arrays of shape [N x X], where X is a feature
|
| 165 |
+
dimension. The tuple is (pool_3, spatial).
|
| 166 |
+
"""
|
| 167 |
+
preds = []
|
| 168 |
+
spatial_preds = []
|
| 169 |
+
for batch in tqdm(batches):
|
| 170 |
+
# print("--------line 164-----------")
|
| 171 |
+
|
| 172 |
+
# # 识别当前进程信息
|
| 173 |
+
# if 'RANK' in os.environ:
|
| 174 |
+
# rank = int(os.environ['RANK'])
|
| 175 |
+
# local_rank = int(os.environ.get('LOCAL_RANK', rank % torch.cuda.device_count()))
|
| 176 |
+
# print(f"Distributed training - Global Rank: {rank}, Local Rank: {local_rank}")
|
| 177 |
+
# print(f"Current GPU device: {torch.cuda.current_device()}" if torch.cuda.is_available() else "No CUDA")
|
| 178 |
+
# else:
|
| 179 |
+
# print("Single process mode")
|
| 180 |
+
|
| 181 |
+
# print(f"Process PID: {os.getpid()}")
|
| 182 |
+
|
| 183 |
+
batch = batch.astype(np.float32)
|
| 184 |
+
pred, spatial_pred = self.sess.run(
|
| 185 |
+
[self.pool_features, self.spatial_features], {self.image_input: batch}
|
| 186 |
+
)
|
| 187 |
+
# print("--------line 169-----------")
|
| 188 |
+
preds.append(pred.reshape([pred.shape[0], -1]))
|
| 189 |
+
spatial_preds.append(spatial_pred.reshape([spatial_pred.shape[0], -1]))
|
| 190 |
+
return (
|
| 191 |
+
np.concatenate(preds, axis=0),
|
| 192 |
+
np.concatenate(spatial_preds, axis=0),
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
def read_statistics(
|
| 196 |
+
self, npz_path: Union[str, np.ndarray], activations: Tuple[np.ndarray, np.ndarray]
|
| 197 |
+
) -> Tuple[FIDStatistics, FIDStatistics]:
|
| 198 |
+
if isinstance(npz_path, str):
|
| 199 |
+
obj = np.load(npz_path)
|
| 200 |
+
if "mu" in list(obj.keys()):
|
| 201 |
+
return FIDStatistics(obj["mu"], obj["sigma"]), FIDStatistics(
|
| 202 |
+
obj["mu_s"], obj["sigma_s"]
|
| 203 |
+
)
|
| 204 |
+
elif isinstance(npz_path, np.ndarray):
|
| 205 |
+
obj = npz_path
|
| 206 |
+
else:
|
| 207 |
+
raise ValueError("npz_path must be either a file path (str) or a numpy array (np.ndarray)")
|
| 208 |
+
return tuple(self.compute_statistics(x) for x in activations)
|
| 209 |
+
|
| 210 |
+
def compute_statistics(self, activations: np.ndarray) -> FIDStatistics:
|
| 211 |
+
mu = np.mean(activations, axis=0)
|
| 212 |
+
sigma = np.cov(activations, rowvar=False)
|
| 213 |
+
return FIDStatistics(mu, sigma)
|
| 214 |
+
|
| 215 |
+
def compute_inception_score(self, activations: np.ndarray, split_size: int = 5000) -> float:
|
| 216 |
+
softmax_out = []
|
| 217 |
+
for i in range(0, len(activations), self.softmax_batch_size):
|
| 218 |
+
acts = activations[i : i + self.softmax_batch_size]
|
| 219 |
+
softmax_out.append(self.sess.run(self.softmax, feed_dict={self.softmax_input: acts}))
|
| 220 |
+
preds = np.concatenate(softmax_out, axis=0)
|
| 221 |
+
# https://github.com/openai/improved-gan/blob/4f5d1ec5c16a7eceb206f42bfc652693601e1d5c/inception_score/model.py#L46
|
| 222 |
+
scores = []
|
| 223 |
+
for i in range(0, len(preds), split_size):
|
| 224 |
+
part = preds[i : i + split_size]
|
| 225 |
+
kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
|
| 226 |
+
kl = np.mean(np.sum(kl, 1))
|
| 227 |
+
scores.append(np.exp(kl))
|
| 228 |
+
return float(np.mean(scores))
|
| 229 |
+
|
| 230 |
+
def compute_prec_recall(
|
| 231 |
+
self, activations_ref: np.ndarray, activations_sample: np.ndarray
|
| 232 |
+
) -> Tuple[float, float]:
|
| 233 |
+
radii_1 = self.manifold_estimator.manifold_radii(activations_ref)
|
| 234 |
+
radii_2 = self.manifold_estimator.manifold_radii(activations_sample)
|
| 235 |
+
pr = self.manifold_estimator.evaluate_pr(
|
| 236 |
+
activations_ref, radii_1, activations_sample, radii_2
|
| 237 |
+
)
|
| 238 |
+
return (float(pr[0][0]), float(pr[1][0]))
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
class ManifoldEstimator:
|
| 242 |
+
"""
|
| 243 |
+
A helper for comparing manifolds of feature vectors.
|
| 244 |
+
|
| 245 |
+
Adapted from https://github.com/kynkaat/improved-precision-and-recall-metric/blob/f60f25e5ad933a79135c783fcda53de30f42c9b9/precision_recall.py#L57
|
| 246 |
+
"""
|
| 247 |
+
|
| 248 |
+
def __init__(
|
| 249 |
+
self,
|
| 250 |
+
session,
|
| 251 |
+
row_batch_size=10000,
|
| 252 |
+
col_batch_size=10000,
|
| 253 |
+
nhood_sizes=(3,),
|
| 254 |
+
clamp_to_percentile=None,
|
| 255 |
+
eps=1e-5,
|
| 256 |
+
):
|
| 257 |
+
"""
|
| 258 |
+
Estimate the manifold of given feature vectors.
|
| 259 |
+
|
| 260 |
+
:param session: the TensorFlow session.
|
| 261 |
+
:param row_batch_size: row batch size to compute pairwise distances
|
| 262 |
+
(parameter to trade-off between memory usage and performance).
|
| 263 |
+
:param col_batch_size: column batch size to compute pairwise distances.
|
| 264 |
+
:param nhood_sizes: number of neighbors used to estimate the manifold.
|
| 265 |
+
:param clamp_to_percentile: prune hyperspheres that have radius larger than
|
| 266 |
+
the given percentile.
|
| 267 |
+
:param eps: small number for numerical stability.
|
| 268 |
+
"""
|
| 269 |
+
self.distance_block = DistanceBlock(session)
|
| 270 |
+
self.row_batch_size = row_batch_size
|
| 271 |
+
self.col_batch_size = col_batch_size
|
| 272 |
+
self.nhood_sizes = nhood_sizes
|
| 273 |
+
self.num_nhoods = len(nhood_sizes)
|
| 274 |
+
self.clamp_to_percentile = clamp_to_percentile
|
| 275 |
+
self.eps = eps
|
| 276 |
+
|
| 277 |
+
def warmup(self):
|
| 278 |
+
feats, radii = (
|
| 279 |
+
np.zeros([1, 2048], dtype=np.float32),
|
| 280 |
+
np.zeros([1, 1], dtype=np.float32),
|
| 281 |
+
)
|
| 282 |
+
self.evaluate_pr(feats, radii, feats, radii)
|
| 283 |
+
|
| 284 |
+
def manifold_radii(self, features: np.ndarray) -> np.ndarray:
|
| 285 |
+
num_images = len(features)
|
| 286 |
+
|
| 287 |
+
# Estimate manifold of features by calculating distances to k-NN of each sample.
|
| 288 |
+
radii = np.zeros([num_images, self.num_nhoods], dtype=np.float32)
|
| 289 |
+
distance_batch = np.zeros([self.row_batch_size, num_images], dtype=np.float32)
|
| 290 |
+
seq = np.arange(max(self.nhood_sizes) + 1, dtype=np.int32)
|
| 291 |
+
|
| 292 |
+
for begin1 in range(0, num_images, self.row_batch_size):
|
| 293 |
+
end1 = min(begin1 + self.row_batch_size, num_images)
|
| 294 |
+
row_batch = features[begin1:end1]
|
| 295 |
+
|
| 296 |
+
for begin2 in range(0, num_images, self.col_batch_size):
|
| 297 |
+
end2 = min(begin2 + self.col_batch_size, num_images)
|
| 298 |
+
col_batch = features[begin2:end2]
|
| 299 |
+
|
| 300 |
+
# Compute distances between batches.
|
| 301 |
+
distance_batch[
|
| 302 |
+
0 : end1 - begin1, begin2:end2
|
| 303 |
+
] = self.distance_block.pairwise_distances(row_batch, col_batch)
|
| 304 |
+
|
| 305 |
+
# Find the k-nearest neighbor from the current batch.
|
| 306 |
+
radii[begin1:end1, :] = np.concatenate(
|
| 307 |
+
[
|
| 308 |
+
x[:, self.nhood_sizes]
|
| 309 |
+
for x in _numpy_partition(distance_batch[0 : end1 - begin1, :], seq, axis=1)
|
| 310 |
+
],
|
| 311 |
+
axis=0,
|
| 312 |
+
)
|
| 313 |
+
|
| 314 |
+
if self.clamp_to_percentile is not None:
|
| 315 |
+
max_distances = np.percentile(radii, self.clamp_to_percentile, axis=0)
|
| 316 |
+
radii[radii > max_distances] = 0
|
| 317 |
+
return radii
|
| 318 |
+
|
| 319 |
+
def evaluate(self, features: np.ndarray, radii: np.ndarray, eval_features: np.ndarray):
|
| 320 |
+
"""
|
| 321 |
+
Evaluate if new feature vectors are at the manifold.
|
| 322 |
+
"""
|
| 323 |
+
num_eval_images = eval_features.shape[0]
|
| 324 |
+
num_ref_images = radii.shape[0]
|
| 325 |
+
distance_batch = np.zeros([self.row_batch_size, num_ref_images], dtype=np.float32)
|
| 326 |
+
batch_predictions = np.zeros([num_eval_images, self.num_nhoods], dtype=np.int32)
|
| 327 |
+
max_realism_score = np.zeros([num_eval_images], dtype=np.float32)
|
| 328 |
+
nearest_indices = np.zeros([num_eval_images], dtype=np.int32)
|
| 329 |
+
|
| 330 |
+
for begin1 in range(0, num_eval_images, self.row_batch_size):
|
| 331 |
+
end1 = min(begin1 + self.row_batch_size, num_eval_images)
|
| 332 |
+
feature_batch = eval_features[begin1:end1]
|
| 333 |
+
|
| 334 |
+
for begin2 in range(0, num_ref_images, self.col_batch_size):
|
| 335 |
+
end2 = min(begin2 + self.col_batch_size, num_ref_images)
|
| 336 |
+
ref_batch = features[begin2:end2]
|
| 337 |
+
|
| 338 |
+
distance_batch[
|
| 339 |
+
0 : end1 - begin1, begin2:end2
|
| 340 |
+
] = self.distance_block.pairwise_distances(feature_batch, ref_batch)
|
| 341 |
+
|
| 342 |
+
# From the minibatch of new feature vectors, determine if they are in the estimated manifold.
|
| 343 |
+
# If a feature vector is inside a hypersphere of some reference sample, then
|
| 344 |
+
# the new sample lies at the estimated manifold.
|
| 345 |
+
# The radii of the hyperspheres are determined from distances of neighborhood size k.
|
| 346 |
+
samples_in_manifold = distance_batch[0 : end1 - begin1, :, None] <= radii
|
| 347 |
+
batch_predictions[begin1:end1] = np.any(samples_in_manifold, axis=1).astype(np.int32)
|
| 348 |
+
|
| 349 |
+
max_realism_score[begin1:end1] = np.max(
|
| 350 |
+
radii[:, 0] / (distance_batch[0 : end1 - begin1, :] + self.eps), axis=1
|
| 351 |
+
)
|
| 352 |
+
nearest_indices[begin1:end1] = np.argmin(distance_batch[0 : end1 - begin1, :], axis=1)
|
| 353 |
+
|
| 354 |
+
return {
|
| 355 |
+
"fraction": float(np.mean(batch_predictions)),
|
| 356 |
+
"batch_predictions": batch_predictions,
|
| 357 |
+
"max_realisim_score": max_realism_score,
|
| 358 |
+
"nearest_indices": nearest_indices,
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
def evaluate_pr(
|
| 362 |
+
self,
|
| 363 |
+
features_1: np.ndarray,
|
| 364 |
+
radii_1: np.ndarray,
|
| 365 |
+
features_2: np.ndarray,
|
| 366 |
+
radii_2: np.ndarray,
|
| 367 |
+
) -> Tuple[np.ndarray, np.ndarray]:
|
| 368 |
+
"""
|
| 369 |
+
Evaluate precision and recall efficiently.
|
| 370 |
+
|
| 371 |
+
:param features_1: [N1 x D] feature vectors for reference batch.
|
| 372 |
+
:param radii_1: [N1 x K1] radii for reference vectors.
|
| 373 |
+
:param features_2: [N2 x D] feature vectors for the other batch.
|
| 374 |
+
:param radii_2: [N x K2] radii for other vectors.
|
| 375 |
+
:return: a tuple of arrays for (precision, recall):
|
| 376 |
+
- precision: an np.ndarray of length K1
|
| 377 |
+
- recall: an np.ndarray of length K2
|
| 378 |
+
"""
|
| 379 |
+
features_1_status = np.zeros([len(features_1), radii_2.shape[1]], dtype=np.bool)
|
| 380 |
+
features_2_status = np.zeros([len(features_2), radii_1.shape[1]], dtype=np.bool)
|
| 381 |
+
for begin_1 in range(0, len(features_1), self.row_batch_size):
|
| 382 |
+
end_1 = begin_1 + self.row_batch_size
|
| 383 |
+
batch_1 = features_1[begin_1:end_1]
|
| 384 |
+
for begin_2 in range(0, len(features_2), self.col_batch_size):
|
| 385 |
+
end_2 = begin_2 + self.col_batch_size
|
| 386 |
+
batch_2 = features_2[begin_2:end_2]
|
| 387 |
+
batch_1_in, batch_2_in = self.distance_block.less_thans(
|
| 388 |
+
batch_1, radii_1[begin_1:end_1], batch_2, radii_2[begin_2:end_2]
|
| 389 |
+
)
|
| 390 |
+
features_1_status[begin_1:end_1] |= batch_1_in
|
| 391 |
+
features_2_status[begin_2:end_2] |= batch_2_in
|
| 392 |
+
return (
|
| 393 |
+
np.mean(features_2_status.astype(np.float64), axis=0),
|
| 394 |
+
np.mean(features_1_status.astype(np.float64), axis=0),
|
| 395 |
+
)
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
class DistanceBlock:
|
| 399 |
+
"""
|
| 400 |
+
Calculate pairwise distances between vectors.
|
| 401 |
+
|
| 402 |
+
Adapted from https://github.com/kynkaat/improved-precision-and-recall-metric/blob/f60f25e5ad933a79135c783fcda53de30f42c9b9/precision_recall.py#L34
|
| 403 |
+
"""
|
| 404 |
+
|
| 405 |
+
def __init__(self, session):
|
| 406 |
+
self.session = session
|
| 407 |
+
|
| 408 |
+
# Initialize TF graph to calculate pairwise distances.
|
| 409 |
+
with session.graph.as_default():
|
| 410 |
+
self._features_batch1 = tf.placeholder(tf.float32, shape=[None, None])
|
| 411 |
+
self._features_batch2 = tf.placeholder(tf.float32, shape=[None, None])
|
| 412 |
+
distance_block_16 = _batch_pairwise_distances(
|
| 413 |
+
tf.cast(self._features_batch1, tf.float16),
|
| 414 |
+
tf.cast(self._features_batch2, tf.float16),
|
| 415 |
+
)
|
| 416 |
+
self.distance_block = tf.cond(
|
| 417 |
+
tf.reduce_all(tf.math.is_finite(distance_block_16)),
|
| 418 |
+
lambda: tf.cast(distance_block_16, tf.float32),
|
| 419 |
+
lambda: _batch_pairwise_distances(self._features_batch1, self._features_batch2),
|
| 420 |
+
)
|
| 421 |
+
|
| 422 |
+
# Extra logic for less thans.
|
| 423 |
+
self._radii1 = tf.placeholder(tf.float32, shape=[None, None])
|
| 424 |
+
self._radii2 = tf.placeholder(tf.float32, shape=[None, None])
|
| 425 |
+
dist32 = tf.cast(self.distance_block, tf.float32)[..., None]
|
| 426 |
+
self._batch_1_in = tf.math.reduce_any(dist32 <= self._radii2, axis=1)
|
| 427 |
+
self._batch_2_in = tf.math.reduce_any(dist32 <= self._radii1[:, None], axis=0)
|
| 428 |
+
|
| 429 |
+
def pairwise_distances(self, U, V):
|
| 430 |
+
"""
|
| 431 |
+
Evaluate pairwise distances between two batches of feature vectors.
|
| 432 |
+
"""
|
| 433 |
+
return self.session.run(
|
| 434 |
+
self.distance_block,
|
| 435 |
+
feed_dict={self._features_batch1: U, self._features_batch2: V},
|
| 436 |
+
)
|
| 437 |
+
|
| 438 |
+
def less_thans(self, batch_1, radii_1, batch_2, radii_2):
|
| 439 |
+
return self.session.run(
|
| 440 |
+
[self._batch_1_in, self._batch_2_in],
|
| 441 |
+
feed_dict={
|
| 442 |
+
self._features_batch1: batch_1,
|
| 443 |
+
self._features_batch2: batch_2,
|
| 444 |
+
self._radii1: radii_1,
|
| 445 |
+
self._radii2: radii_2,
|
| 446 |
+
},
|
| 447 |
+
)
|
| 448 |
+
|
| 449 |
+
|
| 450 |
+
def _batch_pairwise_distances(U, V):
|
| 451 |
+
"""
|
| 452 |
+
Compute pairwise distances between two batches of feature vectors.
|
| 453 |
+
"""
|
| 454 |
+
with tf.variable_scope("pairwise_dist_block"):
|
| 455 |
+
# Squared norms of each row in U and V.
|
| 456 |
+
norm_u = tf.reduce_sum(tf.square(U), 1)
|
| 457 |
+
norm_v = tf.reduce_sum(tf.square(V), 1)
|
| 458 |
+
|
| 459 |
+
# norm_u as a column and norm_v as a row vectors.
|
| 460 |
+
norm_u = tf.reshape(norm_u, [-1, 1])
|
| 461 |
+
norm_v = tf.reshape(norm_v, [1, -1])
|
| 462 |
+
|
| 463 |
+
# Pairwise squared Euclidean distances.
|
| 464 |
+
D = tf.maximum(norm_u - 2 * tf.matmul(U, V, False, True) + norm_v, 0.0)
|
| 465 |
+
|
| 466 |
+
return D
|
| 467 |
+
|
| 468 |
+
|
| 469 |
+
class NpzArrayReader(ABC):
|
| 470 |
+
@abstractmethod
|
| 471 |
+
def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
|
| 472 |
+
pass
|
| 473 |
+
|
| 474 |
+
@abstractmethod
|
| 475 |
+
def remaining(self) -> int:
|
| 476 |
+
pass
|
| 477 |
+
|
| 478 |
+
def read_batches(self, batch_size: int) -> Iterable[np.ndarray]:
|
| 479 |
+
def gen_fn():
|
| 480 |
+
while True:
|
| 481 |
+
batch = self.read_batch(batch_size)
|
| 482 |
+
if batch is None:
|
| 483 |
+
break
|
| 484 |
+
yield batch
|
| 485 |
+
|
| 486 |
+
rem = self.remaining()
|
| 487 |
+
num_batches = rem // batch_size + int(rem % batch_size != 0)
|
| 488 |
+
return BatchIterator(gen_fn, num_batches)
|
| 489 |
+
|
| 490 |
+
|
| 491 |
+
class BatchIterator:
|
| 492 |
+
def __init__(self, gen_fn, length):
|
| 493 |
+
self.gen_fn = gen_fn
|
| 494 |
+
self.length = length
|
| 495 |
+
|
| 496 |
+
def __len__(self):
|
| 497 |
+
return self.length
|
| 498 |
+
|
| 499 |
+
def __iter__(self):
|
| 500 |
+
return self.gen_fn()
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
class StreamingNpzArrayReader(NpzArrayReader):
|
| 504 |
+
def __init__(self, arr_f, shape, dtype):
|
| 505 |
+
self.arr_f = arr_f
|
| 506 |
+
self.shape = shape
|
| 507 |
+
self.dtype = dtype
|
| 508 |
+
self.idx = 0
|
| 509 |
+
|
| 510 |
+
def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
|
| 511 |
+
if self.idx >= self.shape[0]:
|
| 512 |
+
return None
|
| 513 |
+
|
| 514 |
+
bs = min(batch_size, self.shape[0] - self.idx)
|
| 515 |
+
self.idx += bs
|
| 516 |
+
|
| 517 |
+
if self.dtype.itemsize == 0:
|
| 518 |
+
return np.ndarray([bs, *self.shape[1:]], dtype=self.dtype)
|
| 519 |
+
|
| 520 |
+
read_count = bs * np.prod(self.shape[1:])
|
| 521 |
+
read_size = int(read_count * self.dtype.itemsize)
|
| 522 |
+
data = _read_bytes(self.arr_f, read_size, "array data")
|
| 523 |
+
return np.frombuffer(data, dtype=self.dtype).reshape([bs, *self.shape[1:]])
|
| 524 |
+
|
| 525 |
+
def remaining(self) -> int:
|
| 526 |
+
return max(0, self.shape[0] - self.idx)
|
| 527 |
+
|
| 528 |
+
|
| 529 |
+
class MemoryNpzArrayReader(NpzArrayReader):
|
| 530 |
+
def __init__(self, arr):
|
| 531 |
+
self.arr = arr
|
| 532 |
+
self.idx = 0
|
| 533 |
+
|
| 534 |
+
@classmethod
|
| 535 |
+
def load(cls, path: str, arr_name: str):
|
| 536 |
+
with open(path, "rb") as f:
|
| 537 |
+
arr = np.load(f)[arr_name]
|
| 538 |
+
return cls(arr)
|
| 539 |
+
|
| 540 |
+
def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
|
| 541 |
+
if self.idx >= self.arr.shape[0]:
|
| 542 |
+
return None
|
| 543 |
+
|
| 544 |
+
res = self.arr[self.idx : self.idx + batch_size]
|
| 545 |
+
self.idx += batch_size
|
| 546 |
+
return res
|
| 547 |
+
|
| 548 |
+
def remaining(self) -> int:
|
| 549 |
+
return max(0, self.arr.shape[0] - self.idx)
|
| 550 |
+
|
| 551 |
+
|
| 552 |
+
@contextmanager
|
| 553 |
+
def open_npz_array(path: str, arr_name: str) -> NpzArrayReader:
|
| 554 |
+
with _open_npy_file(path, arr_name) as arr_f:
|
| 555 |
+
version = np.lib.format.read_magic(arr_f)
|
| 556 |
+
if version == (1, 0):
|
| 557 |
+
header = np.lib.format.read_array_header_1_0(arr_f)
|
| 558 |
+
elif version == (2, 0):
|
| 559 |
+
header = np.lib.format.read_array_header_2_0(arr_f)
|
| 560 |
+
else:
|
| 561 |
+
yield MemoryNpzArrayReader.load(path, arr_name)
|
| 562 |
+
return
|
| 563 |
+
shape, fortran, dtype = header
|
| 564 |
+
if fortran or dtype.hasobject:
|
| 565 |
+
yield MemoryNpzArrayReader.load(path, arr_name)
|
| 566 |
+
else:
|
| 567 |
+
yield StreamingNpzArrayReader(arr_f, shape, dtype)
|
| 568 |
+
|
| 569 |
+
|
| 570 |
+
def _read_bytes(fp, size, error_template="ran out of data"):
|
| 571 |
+
"""
|
| 572 |
+
Copied from: https://github.com/numpy/numpy/blob/fb215c76967739268de71aa4bda55dd1b062bc2e/numpy/lib/format.py#L788-L886
|
| 573 |
+
|
| 574 |
+
Read from file-like object until size bytes are read.
|
| 575 |
+
Raises ValueError if not EOF is encountered before size bytes are read.
|
| 576 |
+
Non-blocking objects only supported if they derive from io objects.
|
| 577 |
+
Required as e.g. ZipExtFile in python 2.6 can return less data than
|
| 578 |
+
requested.
|
| 579 |
+
"""
|
| 580 |
+
data = bytes()
|
| 581 |
+
while True:
|
| 582 |
+
# io files (default in python3) return None or raise on
|
| 583 |
+
# would-block, python2 file will truncate, probably nothing can be
|
| 584 |
+
# done about that. note that regular files can't be non-blocking
|
| 585 |
+
try:
|
| 586 |
+
r = fp.read(size - len(data))
|
| 587 |
+
data += r
|
| 588 |
+
if len(r) == 0 or len(data) == size:
|
| 589 |
+
break
|
| 590 |
+
except io.BlockingIOError:
|
| 591 |
+
pass
|
| 592 |
+
if len(data) != size:
|
| 593 |
+
msg = "EOF: reading %s, expected %d bytes got %d"
|
| 594 |
+
raise ValueError(msg % (error_template, size, len(data)))
|
| 595 |
+
else:
|
| 596 |
+
return data
|
| 597 |
+
|
| 598 |
+
|
| 599 |
+
@contextmanager
|
| 600 |
+
def _open_npy_file(path: str, arr_name: str):
|
| 601 |
+
with open(path, "rb") as f:
|
| 602 |
+
with zipfile.ZipFile(f, "r") as zip_f:
|
| 603 |
+
if f"{arr_name}.npy" not in zip_f.namelist():
|
| 604 |
+
raise ValueError(f"missing {arr_name} in npz file")
|
| 605 |
+
with zip_f.open(f"{arr_name}.npy", "r") as arr_f:
|
| 606 |
+
yield arr_f
|
| 607 |
+
|
| 608 |
+
|
| 609 |
+
def _download_inception_model():
|
| 610 |
+
if os.path.exists(INCEPTION_V3_PATH):
|
| 611 |
+
return
|
| 612 |
+
print("downloading InceptionV3 model...")
|
| 613 |
+
with requests.get(INCEPTION_V3_URL, stream=True) as r:
|
| 614 |
+
r.raise_for_status()
|
| 615 |
+
tmp_path = INCEPTION_V3_PATH + ".tmp"
|
| 616 |
+
with open(tmp_path, "wb") as f:
|
| 617 |
+
for chunk in tqdm(r.iter_content(chunk_size=8192)):
|
| 618 |
+
f.write(chunk)
|
| 619 |
+
os.rename(tmp_path, INCEPTION_V3_PATH)
|
| 620 |
+
|
| 621 |
+
|
| 622 |
+
def _create_feature_graph(input_batch):
|
| 623 |
+
_download_inception_model()
|
| 624 |
+
prefix = f"{random.randrange(2**32)}_{random.randrange(2**32)}"
|
| 625 |
+
with open(INCEPTION_V3_PATH, "rb") as f:
|
| 626 |
+
graph_def = tf.GraphDef()
|
| 627 |
+
graph_def.ParseFromString(f.read())
|
| 628 |
+
pool3, spatial = tf.import_graph_def(
|
| 629 |
+
graph_def,
|
| 630 |
+
input_map={f"ExpandDims:0": input_batch},
|
| 631 |
+
return_elements=[FID_POOL_NAME, FID_SPATIAL_NAME],
|
| 632 |
+
name=prefix,
|
| 633 |
+
)
|
| 634 |
+
_update_shapes(pool3)
|
| 635 |
+
spatial = spatial[..., :7]
|
| 636 |
+
return pool3, spatial
|
| 637 |
+
|
| 638 |
+
|
| 639 |
+
def _create_softmax_graph(input_batch):
|
| 640 |
+
_download_inception_model()
|
| 641 |
+
prefix = f"{random.randrange(2**32)}_{random.randrange(2**32)}"
|
| 642 |
+
with open(INCEPTION_V3_PATH, "rb") as f:
|
| 643 |
+
graph_def = tf.GraphDef()
|
| 644 |
+
graph_def.ParseFromString(f.read())
|
| 645 |
+
(matmul,) = tf.import_graph_def(
|
| 646 |
+
graph_def, return_elements=[f"softmax/logits/MatMul"], name=prefix
|
| 647 |
+
)
|
| 648 |
+
w = matmul.inputs[1]
|
| 649 |
+
logits = tf.matmul(input_batch, w)
|
| 650 |
+
return tf.nn.softmax(logits)
|
| 651 |
+
|
| 652 |
+
|
| 653 |
+
def _update_shapes(pool3):
|
| 654 |
+
# https://github.com/bioinf-jku/TTUR/blob/73ab375cdf952a12686d9aa7978567771084da42/fid.py#L50-L63
|
| 655 |
+
ops = pool3.graph.get_operations()
|
| 656 |
+
for op in ops:
|
| 657 |
+
for o in op.outputs:
|
| 658 |
+
shape = o.get_shape()
|
| 659 |
+
if shape._dims is not None: # pylint: disable=protected-access
|
| 660 |
+
# shape = [s.value for s in shape] TF 1.x
|
| 661 |
+
shape = [s for s in shape] # TF 2.x
|
| 662 |
+
new_shape = []
|
| 663 |
+
for j, s in enumerate(shape):
|
| 664 |
+
if s == 1 and j == 0:
|
| 665 |
+
new_shape.append(None)
|
| 666 |
+
else:
|
| 667 |
+
new_shape.append(s)
|
| 668 |
+
o.__dict__["_shape_val"] = tf.TensorShape(new_shape)
|
| 669 |
+
return pool3
|
| 670 |
+
|
| 671 |
+
|
| 672 |
+
def _numpy_partition(arr, kth, **kwargs):
|
| 673 |
+
num_workers = min(cpu_count(), len(arr))
|
| 674 |
+
chunk_size = len(arr) // num_workers
|
| 675 |
+
extra = len(arr) % num_workers
|
| 676 |
+
|
| 677 |
+
start_idx = 0
|
| 678 |
+
batches = []
|
| 679 |
+
for i in range(num_workers):
|
| 680 |
+
size = chunk_size + (1 if i < extra else 0)
|
| 681 |
+
batches.append(arr[start_idx : start_idx + size])
|
| 682 |
+
start_idx += size
|
| 683 |
+
|
| 684 |
+
with ThreadPool(num_workers) as pool:
|
| 685 |
+
return list(pool.map(partial(np.partition, kth=kth, **kwargs), batches))
|
| 686 |
+
|
| 687 |
+
|
| 688 |
+
if __name__ == "__main__":
|
| 689 |
+
main()
|
Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-000-SiT-XL-2-GVP-velocity-None/log.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[[34m2026-02-03 06:38:01[0m] Experiment directory created at results_256_gvp_disp/depth-mu-2-000-SiT-XL-2-GVP-velocity-None
|
| 2 |
+
[[34m2026-02-03 06:38:35[0m] Combined_model Parameters: 729,629,632
|
| 3 |
+
[[34m2026-02-03 06:38:35[0m] Total trainable parameters: 53,910,176
|
| 4 |
+
[[34m2026-02-03 06:38:38[0m] Dataset contains 1,281,167 images (/gemini/platform/public/zhaozy/hzh/datasets/Imagenet/train/)
|
| 5 |
+
[[34m2026-02-03 06:38:38[0m] Training for 100000 epochs...
|
| 6 |
+
[[34m2026-02-03 06:38:38[0m] Beginning epoch 0...
|
| 7 |
+
[[34m2026-02-03 06:39:30[0m] (step=0000100) Train Loss: -1.8935, Train Steps/Sec: 1.91
|
| 8 |
+
[[34m2026-02-03 06:40:20[0m] (step=0000200) Train Loss: -2.2925, Train Steps/Sec: 2.04
|
| 9 |
+
[[34m2026-02-03 06:41:10[0m] (step=0000300) Train Loss: -2.2953, Train Steps/Sec: 1.99
|
| 10 |
+
[[34m2026-02-03 06:42:00[0m] (step=0000400) Train Loss: -2.2904, Train Steps/Sec: 1.99
|
| 11 |
+
[[34m2026-02-03 06:42:50[0m] (step=0000500) Train Loss: -2.2938, Train Steps/Sec: 2.00
|
Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-001-SiT-XL-2-GVP-velocity-None/log.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[[34m2026-02-03 06:44:16[0m] Experiment directory created at results_256_gvp_disp/depth-mu-2-001-SiT-XL-2-GVP-velocity-None
|
Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/log.txt
ADDED
|
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[[34m2026-02-03 06:45:00[0m] Experiment directory created at results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None
|
| 2 |
+
[[34m2026-02-03 06:45:32[0m] Combined_model Parameters: 729,629,632
|
| 3 |
+
[[34m2026-02-03 06:45:32[0m] Total trainable parameters: 53,910,176
|
| 4 |
+
[[34m2026-02-03 06:45:34[0m] Dataset contains 1,281,167 images (/gemini/platform/public/zhaozy/hzh/datasets/Imagenet/train/)
|
| 5 |
+
[[34m2026-02-03 06:45:34[0m] Training for 100000 epochs...
|
| 6 |
+
[[34m2026-02-03 06:45:34[0m] Beginning epoch 0...
|
| 7 |
+
[[34m2026-02-03 06:47:01[0m] (step=0000100) Train Loss: -3.1750, Train Steps/Sec: 1.15
|
| 8 |
+
[[34m2026-02-03 06:48:24[0m] (step=0000200) Train Loss: -3.6610, Train Steps/Sec: 1.20
|
| 9 |
+
[[34m2026-02-03 06:49:47[0m] (step=0000300) Train Loss: -3.6752, Train Steps/Sec: 1.20
|
| 10 |
+
[[34m2026-02-03 06:51:10[0m] (step=0000400) Train Loss: -3.6767, Train Steps/Sec: 1.20
|
| 11 |
+
[[34m2026-02-03 06:52:33[0m] (step=0000500) Train Loss: -3.6782, Train Steps/Sec: 1.20
|
| 12 |
+
[[34m2026-02-03 06:53:56[0m] (step=0000600) Train Loss: -3.6781, Train Steps/Sec: 1.20
|
| 13 |
+
[[34m2026-02-03 06:55:21[0m] (step=0000700) Train Loss: -3.6788, Train Steps/Sec: 1.18
|
| 14 |
+
[[34m2026-02-03 06:57:50[0m] (step=0000800) Train Loss: -3.6797, Train Steps/Sec: 0.67
|
| 15 |
+
[[34m2026-02-03 07:00:57[0m] (step=0000900) Train Loss: -3.6833, Train Steps/Sec: 0.54
|
| 16 |
+
[[34m2026-02-03 07:04:02[0m] (step=0001000) Train Loss: -3.6793, Train Steps/Sec: 0.54
|
| 17 |
+
[[34m2026-02-03 07:07:08[0m] (step=0001100) Train Loss: -3.6790, Train Steps/Sec: 0.54
|
| 18 |
+
[[34m2026-02-03 07:10:14[0m] (step=0001200) Train Loss: -3.6799, Train Steps/Sec: 0.54
|
| 19 |
+
[[34m2026-02-03 07:13:20[0m] (step=0001300) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 20 |
+
[[34m2026-02-03 07:19:05[0m] (step=0001400) Train Loss: -3.6833, Train Steps/Sec: 0.29
|
| 21 |
+
[[34m2026-02-03 07:22:12[0m] (step=0001500) Train Loss: -3.6796, Train Steps/Sec: 0.53
|
| 22 |
+
[[34m2026-02-03 07:25:19[0m] (step=0001600) Train Loss: -3.6813, Train Steps/Sec: 0.54
|
| 23 |
+
[[34m2026-02-03 07:28:25[0m] (step=0001700) Train Loss: -3.6843, Train Steps/Sec: 0.54
|
| 24 |
+
[[34m2026-02-03 07:31:32[0m] (step=0001800) Train Loss: -3.6813, Train Steps/Sec: 0.53
|
| 25 |
+
[[34m2026-02-03 07:34:38[0m] (step=0001900) Train Loss: -3.6828, Train Steps/Sec: 0.54
|
| 26 |
+
[[34m2026-02-03 07:37:45[0m] (step=0002000) Train Loss: -3.6826, Train Steps/Sec: 0.54
|
| 27 |
+
[[34m2026-02-03 07:40:51[0m] (step=0002100) Train Loss: -3.6799, Train Steps/Sec: 0.54
|
| 28 |
+
[[34m2026-02-03 07:43:58[0m] (step=0002200) Train Loss: -3.6784, Train Steps/Sec: 0.53
|
| 29 |
+
[[34m2026-02-03 07:47:06[0m] (step=0002300) Train Loss: -3.6824, Train Steps/Sec: 0.53
|
| 30 |
+
[[34m2026-02-03 07:50:12[0m] (step=0002400) Train Loss: -3.6787, Train Steps/Sec: 0.54
|
| 31 |
+
[[34m2026-02-03 07:53:19[0m] (step=0002500) Train Loss: -3.6771, Train Steps/Sec: 0.54
|
| 32 |
+
[[34m2026-02-03 07:53:23[0m] Beginning epoch 1...
|
| 33 |
+
[[34m2026-02-03 07:56:29[0m] (step=0002600) Train Loss: -3.6847, Train Steps/Sec: 0.53
|
| 34 |
+
[[34m2026-02-03 07:59:35[0m] (step=0002700) Train Loss: -3.6829, Train Steps/Sec: 0.54
|
| 35 |
+
[[34m2026-02-03 08:02:42[0m] (step=0002800) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 36 |
+
[[34m2026-02-03 08:05:49[0m] (step=0002900) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 37 |
+
[[34m2026-02-03 08:08:55[0m] (step=0003000) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 38 |
+
[[34m2026-02-03 08:12:01[0m] (step=0003100) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 39 |
+
[[34m2026-02-03 08:15:09[0m] (step=0003200) Train Loss: -3.6812, Train Steps/Sec: 0.53
|
| 40 |
+
[[34m2026-02-03 08:18:16[0m] (step=0003300) Train Loss: -3.6800, Train Steps/Sec: 0.53
|
| 41 |
+
[[34m2026-02-03 08:21:20[0m] (step=0003400) Train Loss: -3.6797, Train Steps/Sec: 0.54
|
| 42 |
+
[[34m2026-02-03 08:24:27[0m] (step=0003500) Train Loss: -3.6802, Train Steps/Sec: 0.54
|
| 43 |
+
[[34m2026-02-03 08:27:34[0m] (step=0003600) Train Loss: -3.6834, Train Steps/Sec: 0.53
|
| 44 |
+
[[34m2026-02-03 08:30:40[0m] (step=0003700) Train Loss: -3.6810, Train Steps/Sec: 0.54
|
| 45 |
+
[[34m2026-02-03 08:33:48[0m] (step=0003800) Train Loss: -3.6822, Train Steps/Sec: 0.53
|
| 46 |
+
[[34m2026-02-03 08:36:55[0m] (step=0003900) Train Loss: -3.6817, Train Steps/Sec: 0.53
|
| 47 |
+
[[34m2026-02-03 08:40:01[0m] (step=0004000) Train Loss: -3.6794, Train Steps/Sec: 0.54
|
| 48 |
+
[[34m2026-02-03 08:43:08[0m] (step=0004100) Train Loss: -3.6801, Train Steps/Sec: 0.54
|
| 49 |
+
[[34m2026-02-03 08:46:15[0m] (step=0004200) Train Loss: -3.6850, Train Steps/Sec: 0.54
|
| 50 |
+
[[34m2026-02-03 08:49:21[0m] (step=0004300) Train Loss: -3.6801, Train Steps/Sec: 0.54
|
| 51 |
+
[[34m2026-02-03 08:52:28[0m] (step=0004400) Train Loss: -3.6816, Train Steps/Sec: 0.54
|
| 52 |
+
[[34m2026-02-03 08:55:35[0m] (step=0004500) Train Loss: -3.6820, Train Steps/Sec: 0.53
|
| 53 |
+
[[34m2026-02-03 08:58:42[0m] (step=0004600) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 54 |
+
[[34m2026-02-03 09:01:49[0m] (step=0004700) Train Loss: -3.6806, Train Steps/Sec: 0.54
|
| 55 |
+
[[34m2026-02-03 09:04:56[0m] (step=0004800) Train Loss: -3.6797, Train Steps/Sec: 0.53
|
| 56 |
+
[[34m2026-02-03 09:08:03[0m] (step=0004900) Train Loss: -3.6800, Train Steps/Sec: 0.53
|
| 57 |
+
[[34m2026-02-03 09:11:10[0m] (step=0005000) Train Loss: -3.6831, Train Steps/Sec: 0.54
|
| 58 |
+
[[34m2026-02-03 09:11:18[0m] Beginning epoch 2...
|
| 59 |
+
[[34m2026-02-03 09:14:20[0m] (step=0005100) Train Loss: -3.6803, Train Steps/Sec: 0.52
|
| 60 |
+
[[34m2026-02-03 09:17:27[0m] (step=0005200) Train Loss: -3.6804, Train Steps/Sec: 0.53
|
| 61 |
+
[[34m2026-02-03 09:20:34[0m] (step=0005300) Train Loss: -3.6804, Train Steps/Sec: 0.54
|
| 62 |
+
[[34m2026-02-03 09:23:40[0m] (step=0005400) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 63 |
+
[[34m2026-02-03 09:26:47[0m] (step=0005500) Train Loss: -3.6819, Train Steps/Sec: 0.53
|
| 64 |
+
[[34m2026-02-03 09:29:54[0m] (step=0005600) Train Loss: -3.6834, Train Steps/Sec: 0.54
|
| 65 |
+
[[34m2026-02-03 09:33:01[0m] (step=0005700) Train Loss: -3.6805, Train Steps/Sec: 0.53
|
| 66 |
+
[[34m2026-02-03 09:36:08[0m] (step=0005800) Train Loss: -3.6827, Train Steps/Sec: 0.53
|
| 67 |
+
[[34m2026-02-03 09:39:15[0m] (step=0005900) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 68 |
+
[[34m2026-02-03 09:42:20[0m] (step=0006000) Train Loss: -3.6807, Train Steps/Sec: 0.54
|
| 69 |
+
[[34m2026-02-03 09:45:27[0m] (step=0006100) Train Loss: -3.6814, Train Steps/Sec: 0.53
|
| 70 |
+
[[34m2026-02-03 09:48:34[0m] (step=0006200) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 71 |
+
[[34m2026-02-03 09:51:40[0m] (step=0006300) Train Loss: -3.6799, Train Steps/Sec: 0.54
|
| 72 |
+
[[34m2026-02-03 09:54:46[0m] (step=0006400) Train Loss: -3.6797, Train Steps/Sec: 0.54
|
| 73 |
+
[[34m2026-02-03 09:57:54[0m] (step=0006500) Train Loss: -3.6820, Train Steps/Sec: 0.53
|
| 74 |
+
[[34m2026-02-03 10:01:01[0m] (step=0006600) Train Loss: -3.6789, Train Steps/Sec: 0.53
|
| 75 |
+
[[34m2026-02-03 10:04:08[0m] (step=0006700) Train Loss: -3.6804, Train Steps/Sec: 0.53
|
| 76 |
+
[[34m2026-02-03 10:07:15[0m] (step=0006800) Train Loss: -3.6803, Train Steps/Sec: 0.53
|
| 77 |
+
[[34m2026-02-03 10:10:22[0m] (step=0006900) Train Loss: -3.6787, Train Steps/Sec: 0.54
|
| 78 |
+
[[34m2026-02-03 10:13:29[0m] (step=0007000) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 79 |
+
[[34m2026-02-03 10:16:35[0m] (step=0007100) Train Loss: -3.6813, Train Steps/Sec: 0.54
|
| 80 |
+
[[34m2026-02-03 10:19:42[0m] (step=0007200) Train Loss: -3.6820, Train Steps/Sec: 0.54
|
| 81 |
+
[[34m2026-02-03 10:22:49[0m] (step=0007300) Train Loss: -3.6810, Train Steps/Sec: 0.53
|
| 82 |
+
[[34m2026-02-03 10:25:56[0m] (step=0007400) Train Loss: -3.6828, Train Steps/Sec: 0.53
|
| 83 |
+
[[34m2026-02-03 10:29:04[0m] (step=0007500) Train Loss: -3.6821, Train Steps/Sec: 0.53
|
| 84 |
+
[[34m2026-02-03 10:29:16[0m] Beginning epoch 3...
|
| 85 |
+
[[34m2026-02-03 10:32:13[0m] (step=0007600) Train Loss: -3.6794, Train Steps/Sec: 0.53
|
| 86 |
+
[[34m2026-02-03 10:35:20[0m] (step=0007700) Train Loss: -3.6809, Train Steps/Sec: 0.53
|
| 87 |
+
[[34m2026-02-03 10:38:27[0m] (step=0007800) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 88 |
+
[[34m2026-02-03 10:41:34[0m] (step=0007900) Train Loss: -3.6813, Train Steps/Sec: 0.54
|
| 89 |
+
[[34m2026-02-03 10:44:41[0m] (step=0008000) Train Loss: -3.6852, Train Steps/Sec: 0.53
|
| 90 |
+
[[34m2026-02-03 10:47:47[0m] (step=0008100) Train Loss: -3.6820, Train Steps/Sec: 0.54
|
| 91 |
+
[[34m2026-02-03 10:50:54[0m] (step=0008200) Train Loss: -3.6798, Train Steps/Sec: 0.54
|
| 92 |
+
[[34m2026-02-03 10:54:01[0m] (step=0008300) Train Loss: -3.6772, Train Steps/Sec: 0.54
|
| 93 |
+
[[34m2026-02-03 10:57:07[0m] (step=0008400) Train Loss: -3.6800, Train Steps/Sec: 0.54
|
| 94 |
+
[[34m2026-02-03 11:00:13[0m] (step=0008500) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 95 |
+
[[34m2026-02-03 11:03:19[0m] (step=0008600) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 96 |
+
[[34m2026-02-03 11:06:23[0m] (step=0008700) Train Loss: -3.6806, Train Steps/Sec: 0.54
|
| 97 |
+
[[34m2026-02-03 11:09:29[0m] (step=0008800) Train Loss: -3.6762, Train Steps/Sec: 0.54
|
| 98 |
+
[[34m2026-02-03 11:12:36[0m] (step=0008900) Train Loss: -3.6838, Train Steps/Sec: 0.54
|
| 99 |
+
[[34m2026-02-03 11:15:43[0m] (step=0009000) Train Loss: -3.6826, Train Steps/Sec: 0.53
|
| 100 |
+
[[34m2026-02-03 11:18:50[0m] (step=0009100) Train Loss: -3.6806, Train Steps/Sec: 0.54
|
| 101 |
+
[[34m2026-02-03 11:21:57[0m] (step=0009200) Train Loss: -3.6806, Train Steps/Sec: 0.54
|
| 102 |
+
[[34m2026-02-03 11:25:04[0m] (step=0009300) Train Loss: -3.6819, Train Steps/Sec: 0.54
|
| 103 |
+
[[34m2026-02-03 11:28:11[0m] (step=0009400) Train Loss: -3.6785, Train Steps/Sec: 0.53
|
| 104 |
+
[[34m2026-02-03 11:31:17[0m] (step=0009500) Train Loss: -3.6769, Train Steps/Sec: 0.54
|
| 105 |
+
[[34m2026-02-03 11:34:24[0m] (step=0009600) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 106 |
+
[[34m2026-02-03 11:37:31[0m] (step=0009700) Train Loss: -3.6856, Train Steps/Sec: 0.54
|
| 107 |
+
[[34m2026-02-03 11:40:38[0m] (step=0009800) Train Loss: -3.6803, Train Steps/Sec: 0.53
|
| 108 |
+
[[34m2026-02-03 11:43:45[0m] (step=0009900) Train Loss: -3.6805, Train Steps/Sec: 0.54
|
| 109 |
+
[[34m2026-02-03 11:46:51[0m] (step=0010000) Train Loss: -3.6819, Train Steps/Sec: 0.54
|
| 110 |
+
[[34m2026-02-03 11:47:07[0m] Beginning epoch 4...
|
| 111 |
+
[[34m2026-02-03 11:50:01[0m] (step=0010100) Train Loss: -3.6850, Train Steps/Sec: 0.53
|
| 112 |
+
[[34m2026-02-03 11:53:08[0m] (step=0010200) Train Loss: -3.6816, Train Steps/Sec: 0.53
|
| 113 |
+
[[34m2026-02-03 11:56:15[0m] (step=0010300) Train Loss: -3.6836, Train Steps/Sec: 0.53
|
| 114 |
+
[[34m2026-02-03 11:59:22[0m] (step=0010400) Train Loss: -3.6789, Train Steps/Sec: 0.53
|
| 115 |
+
[[34m2026-02-03 12:02:29[0m] (step=0010500) Train Loss: -3.6793, Train Steps/Sec: 0.54
|
| 116 |
+
[[34m2026-02-03 12:05:36[0m] (step=0010600) Train Loss: -3.6834, Train Steps/Sec: 0.54
|
| 117 |
+
[[34m2026-02-03 12:08:42[0m] (step=0010700) Train Loss: -3.6842, Train Steps/Sec: 0.54
|
| 118 |
+
[[34m2026-02-03 12:11:49[0m] (step=0010800) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 119 |
+
[[34m2026-02-03 12:14:56[0m] (step=0010900) Train Loss: -3.6813, Train Steps/Sec: 0.54
|
| 120 |
+
[[34m2026-02-03 12:18:03[0m] (step=0011000) Train Loss: -3.6843, Train Steps/Sec: 0.53
|
| 121 |
+
[[34m2026-02-03 12:21:09[0m] (step=0011100) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 122 |
+
[[34m2026-02-03 12:24:15[0m] (step=0011200) Train Loss: -3.6787, Train Steps/Sec: 0.54
|
| 123 |
+
[[34m2026-02-03 12:27:20[0m] (step=0011300) Train Loss: -3.6828, Train Steps/Sec: 0.54
|
| 124 |
+
[[34m2026-02-03 12:30:27[0m] (step=0011400) Train Loss: -3.6830, Train Steps/Sec: 0.53
|
| 125 |
+
[[34m2026-02-03 12:33:34[0m] (step=0011500) Train Loss: -3.6784, Train Steps/Sec: 0.53
|
| 126 |
+
[[34m2026-02-03 12:36:41[0m] (step=0011600) Train Loss: -3.6831, Train Steps/Sec: 0.53
|
| 127 |
+
[[34m2026-02-03 12:39:48[0m] (step=0011700) Train Loss: -3.6834, Train Steps/Sec: 0.53
|
| 128 |
+
[[34m2026-02-03 12:42:55[0m] (step=0011800) Train Loss: -3.6808, Train Steps/Sec: 0.53
|
| 129 |
+
[[34m2026-02-03 12:46:02[0m] (step=0011900) Train Loss: -3.6810, Train Steps/Sec: 0.54
|
| 130 |
+
[[34m2026-02-03 12:49:09[0m] (step=0012000) Train Loss: -3.6821, Train Steps/Sec: 0.53
|
| 131 |
+
[[34m2026-02-03 12:52:16[0m] (step=0012100) Train Loss: -3.6827, Train Steps/Sec: 0.53
|
| 132 |
+
[[34m2026-02-03 12:55:23[0m] (step=0012200) Train Loss: -3.6827, Train Steps/Sec: 0.54
|
| 133 |
+
[[34m2026-02-03 12:58:30[0m] (step=0012300) Train Loss: -3.6808, Train Steps/Sec: 0.54
|
| 134 |
+
[[34m2026-02-03 13:01:37[0m] (step=0012400) Train Loss: -3.6818, Train Steps/Sec: 0.53
|
| 135 |
+
[[34m2026-02-03 13:04:44[0m] (step=0012500) Train Loss: -3.6809, Train Steps/Sec: 0.54
|
| 136 |
+
[[34m2026-02-03 13:05:03[0m] Beginning epoch 5...
|
| 137 |
+
[[34m2026-02-03 13:07:54[0m] (step=0012600) Train Loss: -3.6814, Train Steps/Sec: 0.52
|
| 138 |
+
[[34m2026-02-03 13:11:01[0m] (step=0012700) Train Loss: -3.6842, Train Steps/Sec: 0.53
|
| 139 |
+
[[34m2026-02-03 13:14:08[0m] (step=0012800) Train Loss: -3.6816, Train Steps/Sec: 0.54
|
| 140 |
+
[[34m2026-02-03 13:17:15[0m] (step=0012900) Train Loss: -3.6790, Train Steps/Sec: 0.53
|
| 141 |
+
[[34m2026-02-03 13:20:22[0m] (step=0013000) Train Loss: -3.6812, Train Steps/Sec: 0.53
|
| 142 |
+
[[34m2026-02-03 13:23:29[0m] (step=0013100) Train Loss: -3.6792, Train Steps/Sec: 0.53
|
| 143 |
+
[[34m2026-02-03 13:26:36[0m] (step=0013200) Train Loss: -3.6836, Train Steps/Sec: 0.53
|
| 144 |
+
[[34m2026-02-03 13:29:43[0m] (step=0013300) Train Loss: -3.6845, Train Steps/Sec: 0.54
|
| 145 |
+
[[34m2026-02-03 13:32:50[0m] (step=0013400) Train Loss: -3.6822, Train Steps/Sec: 0.53
|
| 146 |
+
[[34m2026-02-03 13:35:57[0m] (step=0013500) Train Loss: -3.6798, Train Steps/Sec: 0.53
|
| 147 |
+
[[34m2026-02-03 13:39:04[0m] (step=0013600) Train Loss: -3.6828, Train Steps/Sec: 0.54
|
| 148 |
+
[[34m2026-02-03 13:42:11[0m] (step=0013700) Train Loss: -3.6799, Train Steps/Sec: 0.54
|
| 149 |
+
[[34m2026-02-03 13:45:18[0m] (step=0013800) Train Loss: -3.6812, Train Steps/Sec: 0.53
|
| 150 |
+
[[34m2026-02-03 13:48:22[0m] (step=0013900) Train Loss: -3.6831, Train Steps/Sec: 0.54
|
| 151 |
+
[[34m2026-02-03 13:51:29[0m] (step=0014000) Train Loss: -3.6808, Train Steps/Sec: 0.54
|
| 152 |
+
[[34m2026-02-03 13:54:36[0m] (step=0014100) Train Loss: -3.6823, Train Steps/Sec: 0.53
|
| 153 |
+
[[34m2026-02-03 13:57:43[0m] (step=0014200) Train Loss: -3.6795, Train Steps/Sec: 0.54
|
| 154 |
+
[[34m2026-02-03 14:00:50[0m] (step=0014300) Train Loss: -3.6795, Train Steps/Sec: 0.53
|
| 155 |
+
[[34m2026-02-03 14:03:57[0m] (step=0014400) Train Loss: -3.6838, Train Steps/Sec: 0.54
|
| 156 |
+
[[34m2026-02-03 14:07:04[0m] (step=0014500) Train Loss: -3.6832, Train Steps/Sec: 0.53
|
| 157 |
+
[[34m2026-02-03 14:10:11[0m] (step=0014600) Train Loss: -3.6832, Train Steps/Sec: 0.53
|
| 158 |
+
[[34m2026-02-03 14:13:18[0m] (step=0014700) Train Loss: -3.6784, Train Steps/Sec: 0.54
|
| 159 |
+
[[34m2026-02-03 14:16:24[0m] (step=0014800) Train Loss: -3.6824, Train Steps/Sec: 0.54
|
| 160 |
+
[[34m2026-02-03 14:19:31[0m] (step=0014900) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 161 |
+
[[34m2026-02-03 14:22:38[0m] (step=0015000) Train Loss: -3.6822, Train Steps/Sec: 0.53
|
| 162 |
+
[[34m2026-02-03 14:23:01[0m] Beginning epoch 6...
|
| 163 |
+
[[34m2026-02-03 14:25:48[0m] (step=0015100) Train Loss: -3.6831, Train Steps/Sec: 0.53
|
| 164 |
+
[[34m2026-02-03 14:28:55[0m] (step=0015200) Train Loss: -3.6786, Train Steps/Sec: 0.53
|
| 165 |
+
[[34m2026-02-03 14:32:02[0m] (step=0015300) Train Loss: -3.6826, Train Steps/Sec: 0.54
|
| 166 |
+
[[34m2026-02-03 14:35:08[0m] (step=0015400) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 167 |
+
[[34m2026-02-03 14:38:15[0m] (step=0015500) Train Loss: -3.6806, Train Steps/Sec: 0.54
|
| 168 |
+
[[34m2026-02-03 14:41:21[0m] (step=0015600) Train Loss: -3.6796, Train Steps/Sec: 0.54
|
| 169 |
+
[[34m2026-02-03 14:44:28[0m] (step=0015700) Train Loss: -3.6839, Train Steps/Sec: 0.54
|
| 170 |
+
[[34m2026-02-03 14:47:36[0m] (step=0015800) Train Loss: -3.6846, Train Steps/Sec: 0.53
|
| 171 |
+
[[34m2026-02-03 14:50:43[0m] (step=0015900) Train Loss: -3.6828, Train Steps/Sec: 0.53
|
| 172 |
+
[[34m2026-02-03 14:53:50[0m] (step=0016000) Train Loss: -3.6828, Train Steps/Sec: 0.54
|
| 173 |
+
[[34m2026-02-03 14:56:57[0m] (step=0016100) Train Loss: -3.6789, Train Steps/Sec: 0.53
|
| 174 |
+
[[34m2026-02-03 15:00:04[0m] (step=0016200) Train Loss: -3.6810, Train Steps/Sec: 0.53
|
| 175 |
+
[[34m2026-02-03 15:03:11[0m] (step=0016300) Train Loss: -3.6799, Train Steps/Sec: 0.53
|
| 176 |
+
[[34m2026-02-03 15:06:19[0m] (step=0016400) Train Loss: -3.6806, Train Steps/Sec: 0.53
|
| 177 |
+
[[34m2026-02-03 15:09:24[0m] (step=0016500) Train Loss: -3.6828, Train Steps/Sec: 0.54
|
| 178 |
+
[[34m2026-02-03 15:12:31[0m] (step=0016600) Train Loss: -3.6781, Train Steps/Sec: 0.54
|
| 179 |
+
[[34m2026-02-03 15:15:37[0m] (step=0016700) Train Loss: -3.6830, Train Steps/Sec: 0.54
|
| 180 |
+
[[34m2026-02-03 15:18:44[0m] (step=0016800) Train Loss: -3.6756, Train Steps/Sec: 0.54
|
| 181 |
+
[[34m2026-02-03 15:21:51[0m] (step=0016900) Train Loss: -3.6798, Train Steps/Sec: 0.54
|
| 182 |
+
[[34m2026-02-03 15:24:58[0m] (step=0017000) Train Loss: -3.6813, Train Steps/Sec: 0.53
|
| 183 |
+
[[34m2026-02-03 15:28:04[0m] (step=0017100) Train Loss: -3.6807, Train Steps/Sec: 0.54
|
| 184 |
+
[[34m2026-02-03 15:31:11[0m] (step=0017200) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 185 |
+
[[34m2026-02-03 15:34:18[0m] (step=0017300) Train Loss: -3.6800, Train Steps/Sec: 0.54
|
| 186 |
+
[[34m2026-02-03 15:37:25[0m] (step=0017400) Train Loss: -3.6836, Train Steps/Sec: 0.53
|
| 187 |
+
[[34m2026-02-03 15:40:32[0m] (step=0017500) Train Loss: -3.6807, Train Steps/Sec: 0.53
|
| 188 |
+
[[34m2026-02-03 15:40:59[0m] Beginning epoch 7...
|
| 189 |
+
[[34m2026-02-03 15:43:42[0m] (step=0017600) Train Loss: -3.6829, Train Steps/Sec: 0.53
|
| 190 |
+
[[34m2026-02-03 15:46:49[0m] (step=0017700) Train Loss: -3.6790, Train Steps/Sec: 0.53
|
| 191 |
+
[[34m2026-02-03 15:49:56[0m] (step=0017800) Train Loss: -3.6850, Train Steps/Sec: 0.53
|
| 192 |
+
[[34m2026-02-03 15:53:04[0m] (step=0017900) Train Loss: -3.6803, Train Steps/Sec: 0.53
|
| 193 |
+
[[34m2026-02-03 15:56:11[0m] (step=0018000) Train Loss: -3.6835, Train Steps/Sec: 0.53
|
| 194 |
+
[[34m2026-02-03 15:59:18[0m] (step=0018100) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 195 |
+
[[34m2026-02-03 16:02:25[0m] (step=0018200) Train Loss: -3.6788, Train Steps/Sec: 0.53
|
| 196 |
+
[[34m2026-02-03 16:05:31[0m] (step=0018300) Train Loss: -3.6786, Train Steps/Sec: 0.54
|
| 197 |
+
[[34m2026-02-03 16:08:39[0m] (step=0018400) Train Loss: -3.6812, Train Steps/Sec: 0.53
|
| 198 |
+
[[34m2026-02-03 16:11:46[0m] (step=0018500) Train Loss: -3.6809, Train Steps/Sec: 0.53
|
| 199 |
+
[[34m2026-02-03 16:14:52[0m] (step=0018600) Train Loss: -3.6803, Train Steps/Sec: 0.54
|
| 200 |
+
[[34m2026-02-03 16:17:59[0m] (step=0018700) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 201 |
+
[[34m2026-02-03 16:21:06[0m] (step=0018800) Train Loss: -3.6819, Train Steps/Sec: 0.53
|
| 202 |
+
[[34m2026-02-03 16:24:12[0m] (step=0018900) Train Loss: -3.6834, Train Steps/Sec: 0.54
|
| 203 |
+
[[34m2026-02-03 16:27:19[0m] (step=0019000) Train Loss: -3.6824, Train Steps/Sec: 0.54
|
| 204 |
+
[[34m2026-02-03 16:30:24[0m] (step=0019100) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 205 |
+
[[34m2026-02-03 16:33:31[0m] (step=0019200) Train Loss: -3.6826, Train Steps/Sec: 0.53
|
| 206 |
+
[[34m2026-02-03 16:36:38[0m] (step=0019300) Train Loss: -3.6774, Train Steps/Sec: 0.53
|
| 207 |
+
[[34m2026-02-03 16:39:45[0m] (step=0019400) Train Loss: -3.6809, Train Steps/Sec: 0.54
|
| 208 |
+
[[34m2026-02-03 16:42:51[0m] (step=0019500) Train Loss: -3.6837, Train Steps/Sec: 0.54
|
| 209 |
+
[[34m2026-02-03 16:45:59[0m] (step=0019600) Train Loss: -3.6828, Train Steps/Sec: 0.53
|
| 210 |
+
[[34m2026-02-03 16:49:06[0m] (step=0019700) Train Loss: -3.6803, Train Steps/Sec: 0.53
|
| 211 |
+
[[34m2026-02-03 16:52:13[0m] (step=0019800) Train Loss: -3.6828, Train Steps/Sec: 0.53
|
| 212 |
+
[[34m2026-02-03 16:55:20[0m] (step=0019900) Train Loss: -3.6832, Train Steps/Sec: 0.53
|
| 213 |
+
[[34m2026-02-03 16:58:27[0m] (step=0020000) Train Loss: -3.6837, Train Steps/Sec: 0.54
|
| 214 |
+
[[34m2026-02-03 16:58:57[0m] Beginning epoch 8...
|
| 215 |
+
[[34m2026-02-03 17:01:37[0m] (step=0020100) Train Loss: -3.6820, Train Steps/Sec: 0.52
|
| 216 |
+
[[34m2026-02-03 17:04:45[0m] (step=0020200) Train Loss: -3.6798, Train Steps/Sec: 0.53
|
| 217 |
+
[[34m2026-02-03 17:07:52[0m] (step=0020300) Train Loss: -3.6807, Train Steps/Sec: 0.53
|
| 218 |
+
[[34m2026-02-03 17:10:59[0m] (step=0020400) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 219 |
+
[[34m2026-02-03 17:14:05[0m] (step=0020500) Train Loss: -3.6794, Train Steps/Sec: 0.54
|
| 220 |
+
[[34m2026-02-03 17:17:13[0m] (step=0020600) Train Loss: -3.6833, Train Steps/Sec: 0.53
|
| 221 |
+
[[34m2026-02-03 17:20:20[0m] (step=0020700) Train Loss: -3.6802, Train Steps/Sec: 0.53
|
| 222 |
+
[[34m2026-02-03 17:23:27[0m] (step=0020800) Train Loss: -3.6812, Train Steps/Sec: 0.53
|
| 223 |
+
[[34m2026-02-03 17:26:34[0m] (step=0020900) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 224 |
+
[[34m2026-02-03 17:29:41[0m] (step=0021000) Train Loss: -3.6795, Train Steps/Sec: 0.53
|
| 225 |
+
[[34m2026-02-03 17:32:48[0m] (step=0021100) Train Loss: -3.6794, Train Steps/Sec: 0.53
|
| 226 |
+
[[34m2026-02-03 17:35:55[0m] (step=0021200) Train Loss: 3.9167, Train Steps/Sec: 0.53
|
| 227 |
+
[[34m2026-02-03 17:39:02[0m] (step=0021300) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 228 |
+
[[34m2026-02-03 17:42:09[0m] (step=0021400) Train Loss: -3.6805, Train Steps/Sec: 0.53
|
| 229 |
+
[[34m2026-02-03 17:45:16[0m] (step=0021500) Train Loss: -3.6808, Train Steps/Sec: 0.54
|
| 230 |
+
[[34m2026-02-03 17:48:23[0m] (step=0021600) Train Loss: -3.6812, Train Steps/Sec: 0.54
|
| 231 |
+
[[34m2026-02-03 17:51:28[0m] (step=0021700) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 232 |
+
[[34m2026-02-03 17:54:34[0m] (step=0021800) Train Loss: -3.6846, Train Steps/Sec: 0.54
|
| 233 |
+
[[34m2026-02-03 17:57:41[0m] (step=0021900) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 234 |
+
[[34m2026-02-03 18:00:48[0m] (step=0022000) Train Loss: -3.6807, Train Steps/Sec: 0.54
|
| 235 |
+
[[34m2026-02-03 18:03:55[0m] (step=0022100) Train Loss: -3.6799, Train Steps/Sec: 0.53
|
| 236 |
+
[[34m2026-02-03 18:07:02[0m] (step=0022200) Train Loss: -3.6788, Train Steps/Sec: 0.53
|
| 237 |
+
[[34m2026-02-03 18:10:09[0m] (step=0022300) Train Loss: -3.6821, Train Steps/Sec: 0.53
|
| 238 |
+
[[34m2026-02-03 18:13:16[0m] (step=0022400) Train Loss: -3.6808, Train Steps/Sec: 0.53
|
| 239 |
+
[[34m2026-02-03 18:16:24[0m] (step=0022500) Train Loss: -3.6836, Train Steps/Sec: 0.53
|
| 240 |
+
[[34m2026-02-03 18:16:58[0m] Beginning epoch 9...
|
| 241 |
+
[[34m2026-02-03 18:19:34[0m] (step=0022600) Train Loss: -3.6835, Train Steps/Sec: 0.53
|
| 242 |
+
[[34m2026-02-03 18:22:40[0m] (step=0022700) Train Loss: -3.6848, Train Steps/Sec: 0.54
|
| 243 |
+
[[34m2026-02-03 18:25:47[0m] (step=0022800) Train Loss: -3.6778, Train Steps/Sec: 0.54
|
| 244 |
+
[[34m2026-02-03 18:28:53[0m] (step=0022900) Train Loss: -3.6829, Train Steps/Sec: 0.54
|
| 245 |
+
[[34m2026-02-03 18:32:00[0m] (step=0023000) Train Loss: -3.6807, Train Steps/Sec: 0.54
|
| 246 |
+
[[34m2026-02-03 18:35:07[0m] (step=0023100) Train Loss: -3.6846, Train Steps/Sec: 0.53
|
| 247 |
+
[[34m2026-02-03 18:38:14[0m] (step=0023200) Train Loss: -3.6809, Train Steps/Sec: 0.54
|
| 248 |
+
[[34m2026-02-03 18:41:21[0m] (step=0023300) Train Loss: -3.6807, Train Steps/Sec: 0.53
|
| 249 |
+
[[34m2026-02-03 18:44:28[0m] (step=0023400) Train Loss: -3.6812, Train Steps/Sec: 0.54
|
| 250 |
+
[[34m2026-02-03 18:47:35[0m] (step=0023500) Train Loss: -3.6811, Train Steps/Sec: 0.53
|
| 251 |
+
[[34m2026-02-03 18:50:42[0m] (step=0023600) Train Loss: -3.6800, Train Steps/Sec: 0.53
|
| 252 |
+
[[34m2026-02-03 18:53:49[0m] (step=0023700) Train Loss: -3.6848, Train Steps/Sec: 0.53
|
| 253 |
+
[[34m2026-02-03 18:56:56[0m] (step=0023800) Train Loss: -3.6824, Train Steps/Sec: 0.54
|
| 254 |
+
[[34m2026-02-03 19:00:03[0m] (step=0023900) Train Loss: -3.6820, Train Steps/Sec: 0.54
|
| 255 |
+
[[34m2026-02-03 19:03:09[0m] (step=0024000) Train Loss: -3.6848, Train Steps/Sec: 0.54
|
| 256 |
+
[[34m2026-02-03 19:06:16[0m] (step=0024100) Train Loss: -3.6791, Train Steps/Sec: 0.54
|
| 257 |
+
[[34m2026-02-03 19:09:22[0m] (step=0024200) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 258 |
+
[[34m2026-02-03 19:12:30[0m] (step=0024300) Train Loss: -3.6800, Train Steps/Sec: 0.53
|
| 259 |
+
[[34m2026-02-03 19:15:35[0m] (step=0024400) Train Loss: -3.6792, Train Steps/Sec: 0.54
|
| 260 |
+
[[34m2026-02-03 19:18:42[0m] (step=0024500) Train Loss: -3.6807, Train Steps/Sec: 0.53
|
| 261 |
+
[[34m2026-02-03 19:21:49[0m] (step=0024600) Train Loss: -3.6796, Train Steps/Sec: 0.53
|
| 262 |
+
[[34m2026-02-03 19:24:56[0m] (step=0024700) Train Loss: -3.6814, Train Steps/Sec: 0.53
|
| 263 |
+
[[34m2026-02-03 19:28:03[0m] (step=0024800) Train Loss: -3.6832, Train Steps/Sec: 0.54
|
| 264 |
+
[[34m2026-02-03 19:31:10[0m] (step=0024900) Train Loss: -3.6832, Train Steps/Sec: 0.54
|
| 265 |
+
[[34m2026-02-03 19:34:18[0m] (step=0025000) Train Loss: -3.6782, Train Steps/Sec: 0.53
|
| 266 |
+
[[34m2026-02-03 19:34:18[0m] Saved checkpoint to results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/checkpoints/0025000.pt
|
| 267 |
+
[[34m2026-02-03 19:34:56[0m] Beginning epoch 10...
|
| 268 |
+
[[34m2026-02-03 19:37:29[0m] (step=0025100) Train Loss: -3.6836, Train Steps/Sec: 0.52
|
| 269 |
+
[[34m2026-02-03 19:40:21[0m] Generating EMA samples...
|
| 270 |
+
[[34m2026-02-03 19:40:36[0m] (step=0025200) Train Loss: -3.6796, Train Steps/Sec: 0.53
|
| 271 |
+
[[34m2026-02-03 19:43:43[0m] (step=0025300) Train Loss: -3.6818, Train Steps/Sec: 0.53
|
| 272 |
+
[[34m2026-02-03 19:46:50[0m] (step=0025400) Train Loss: -3.6789, Train Steps/Sec: 0.54
|
| 273 |
+
[[34m2026-02-03 19:49:58[0m] (step=0025500) Train Loss: -3.6817, Train Steps/Sec: 0.53
|
| 274 |
+
[[34m2026-02-03 19:53:05[0m] (step=0025600) Train Loss: -3.6804, Train Steps/Sec: 0.53
|
| 275 |
+
[[34m2026-02-03 19:56:11[0m] (step=0025700) Train Loss: -3.6800, Train Steps/Sec: 0.54
|
| 276 |
+
[[34m2026-02-03 19:59:19[0m] (step=0025800) Train Loss: -3.6832, Train Steps/Sec: 0.53
|
| 277 |
+
[[34m2026-02-03 20:02:25[0m] (step=0025900) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 278 |
+
[[34m2026-02-03 20:05:32[0m] (step=0026000) Train Loss: -3.6812, Train Steps/Sec: 0.54
|
| 279 |
+
[[34m2026-02-03 20:08:39[0m] (step=0026100) Train Loss: -3.6827, Train Steps/Sec: 0.54
|
| 280 |
+
[[34m2026-02-03 20:11:47[0m] (step=0026200) Train Loss: -3.6793, Train Steps/Sec: 0.53
|
| 281 |
+
[[34m2026-02-03 20:14:54[0m] (step=0026300) Train Loss: -3.6817, Train Steps/Sec: 0.53
|
| 282 |
+
[[34m2026-02-03 20:18:01[0m] (step=0026400) Train Loss: -3.6813, Train Steps/Sec: 0.54
|
| 283 |
+
[[34m2026-02-03 20:21:07[0m] (step=0026500) Train Loss: -3.6806, Train Steps/Sec: 0.54
|
| 284 |
+
[[34m2026-02-03 20:24:14[0m] (step=0026600) Train Loss: -3.6842, Train Steps/Sec: 0.54
|
| 285 |
+
[[34m2026-02-03 20:27:20[0m] (step=0026700) Train Loss: -3.6809, Train Steps/Sec: 0.54
|
| 286 |
+
[[34m2026-02-03 20:30:27[0m] (step=0026800) Train Loss: -3.6849, Train Steps/Sec: 0.53
|
| 287 |
+
[[34m2026-02-03 20:33:34[0m] (step=0026900) Train Loss: -3.6802, Train Steps/Sec: 0.53
|
| 288 |
+
[[34m2026-02-03 20:36:39[0m] (step=0027000) Train Loss: -3.6792, Train Steps/Sec: 0.54
|
| 289 |
+
[[34m2026-02-03 20:39:46[0m] (step=0027100) Train Loss: -3.6843, Train Steps/Sec: 0.54
|
| 290 |
+
[[34m2026-02-03 20:42:52[0m] (step=0027200) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 291 |
+
[[34m2026-02-03 20:45:59[0m] (step=0027300) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 292 |
+
[[34m2026-02-03 20:49:06[0m] (step=0027400) Train Loss: -3.6775, Train Steps/Sec: 0.54
|
| 293 |
+
[[34m2026-02-03 20:52:12[0m] (step=0027500) Train Loss: -3.6800, Train Steps/Sec: 0.54
|
| 294 |
+
[[34m2026-02-03 20:52:54[0m] Beginning epoch 11...
|
| 295 |
+
[[34m2026-02-03 20:55:23[0m] (step=0027600) Train Loss: -3.6853, Train Steps/Sec: 0.53
|
| 296 |
+
[[34m2026-02-03 20:58:29[0m] (step=0027700) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 297 |
+
[[34m2026-02-03 21:01:37[0m] (step=0027800) Train Loss: -3.6811, Train Steps/Sec: 0.53
|
| 298 |
+
[[34m2026-02-03 21:04:43[0m] (step=0027900) Train Loss: -3.6810, Train Steps/Sec: 0.54
|
| 299 |
+
[[34m2026-02-03 21:07:50[0m] (step=0028000) Train Loss: -3.6827, Train Steps/Sec: 0.53
|
| 300 |
+
[[34m2026-02-03 21:10:57[0m] (step=0028100) Train Loss: -3.6839, Train Steps/Sec: 0.53
|
| 301 |
+
[[34m2026-02-03 21:14:04[0m] (step=0028200) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 302 |
+
[[34m2026-02-03 21:17:11[0m] (step=0028300) Train Loss: -3.6830, Train Steps/Sec: 0.53
|
| 303 |
+
[[34m2026-02-03 21:20:18[0m] (step=0028400) Train Loss: -3.6797, Train Steps/Sec: 0.53
|
| 304 |
+
[[34m2026-02-03 21:23:25[0m] (step=0028500) Train Loss: -3.6797, Train Steps/Sec: 0.53
|
| 305 |
+
[[34m2026-02-03 21:26:32[0m] (step=0028600) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 306 |
+
[[34m2026-02-03 21:29:39[0m] (step=0028700) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 307 |
+
[[34m2026-02-03 21:32:45[0m] (step=0028800) Train Loss: -3.6812, Train Steps/Sec: 0.54
|
| 308 |
+
[[34m2026-02-03 21:35:53[0m] (step=0028900) Train Loss: -3.6858, Train Steps/Sec: 0.53
|
| 309 |
+
[[34m2026-02-03 21:38:59[0m] (step=0029000) Train Loss: -3.6842, Train Steps/Sec: 0.54
|
| 310 |
+
[[34m2026-02-03 21:42:06[0m] (step=0029100) Train Loss: -3.6836, Train Steps/Sec: 0.54
|
| 311 |
+
[[34m2026-02-03 21:45:14[0m] (step=0029200) Train Loss: -3.6813, Train Steps/Sec: 0.53
|
| 312 |
+
[[34m2026-02-03 21:48:20[0m] (step=0029300) Train Loss: -3.6783, Train Steps/Sec: 0.54
|
| 313 |
+
[[34m2026-02-03 21:51:27[0m] (step=0029400) Train Loss: -3.6829, Train Steps/Sec: 0.53
|
| 314 |
+
[[34m2026-02-03 21:54:34[0m] (step=0029500) Train Loss: -3.6812, Train Steps/Sec: 0.54
|
| 315 |
+
[[34m2026-02-03 21:57:39[0m] (step=0029600) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 316 |
+
[[34m2026-02-03 22:00:46[0m] (step=0029700) Train Loss: -3.6828, Train Steps/Sec: 0.53
|
| 317 |
+
[[34m2026-02-03 22:03:53[0m] (step=0029800) Train Loss: -3.6826, Train Steps/Sec: 0.54
|
| 318 |
+
[[34m2026-02-03 22:06:59[0m] (step=0029900) Train Loss: -3.6814, Train Steps/Sec: 0.54
|
| 319 |
+
[[34m2026-02-03 22:10:06[0m] (step=0030000) Train Loss: -3.6837, Train Steps/Sec: 0.54
|
| 320 |
+
[[34m2026-02-03 22:10:51[0m] Beginning epoch 12...
|
| 321 |
+
[[34m2026-02-03 22:13:16[0m] (step=0030100) Train Loss: -3.6822, Train Steps/Sec: 0.53
|
| 322 |
+
[[34m2026-02-03 22:16:22[0m] (step=0030200) Train Loss: -3.6787, Train Steps/Sec: 0.54
|
| 323 |
+
[[34m2026-02-03 22:19:29[0m] (step=0030300) Train Loss: -3.6815, Train Steps/Sec: 0.53
|
| 324 |
+
[[34m2026-02-03 22:22:37[0m] (step=0030400) Train Loss: -3.6806, Train Steps/Sec: 0.53
|
| 325 |
+
[[34m2026-02-03 22:25:44[0m] (step=0030500) Train Loss: -3.6825, Train Steps/Sec: 0.53
|
| 326 |
+
[[34m2026-02-03 22:28:51[0m] (step=0030600) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 327 |
+
[[34m2026-02-03 22:31:58[0m] (step=0030700) Train Loss: -3.6838, Train Steps/Sec: 0.54
|
| 328 |
+
[[34m2026-02-03 22:35:05[0m] (step=0030800) Train Loss: -3.6822, Train Steps/Sec: 0.53
|
| 329 |
+
[[34m2026-02-03 22:38:11[0m] (step=0030900) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 330 |
+
[[34m2026-02-03 22:41:18[0m] (step=0031000) Train Loss: -3.6815, Train Steps/Sec: 0.54
|
| 331 |
+
[[34m2026-02-03 22:44:25[0m] (step=0031100) Train Loss: -3.6796, Train Steps/Sec: 0.53
|
| 332 |
+
[[34m2026-02-03 22:47:32[0m] (step=0031200) Train Loss: -3.6812, Train Steps/Sec: 0.53
|
| 333 |
+
[[34m2026-02-03 22:50:39[0m] (step=0031300) Train Loss: -3.6806, Train Steps/Sec: 0.53
|
| 334 |
+
[[34m2026-02-03 22:53:46[0m] (step=0031400) Train Loss: -3.6822, Train Steps/Sec: 0.53
|
| 335 |
+
[[34m2026-02-03 22:56:53[0m] (step=0031500) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 336 |
+
[[34m2026-02-03 23:00:00[0m] (step=0031600) Train Loss: -3.6803, Train Steps/Sec: 0.53
|
| 337 |
+
[[34m2026-02-03 23:03:07[0m] (step=0031700) Train Loss: -3.6843, Train Steps/Sec: 0.53
|
| 338 |
+
[[34m2026-02-03 23:06:14[0m] (step=0031800) Train Loss: -3.6832, Train Steps/Sec: 0.53
|
| 339 |
+
[[34m2026-02-03 23:09:21[0m] (step=0031900) Train Loss: -3.6809, Train Steps/Sec: 0.54
|
| 340 |
+
[[34m2026-02-03 23:12:28[0m] (step=0032000) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 341 |
+
[[34m2026-02-03 23:15:34[0m] (step=0032100) Train Loss: -3.6786, Train Steps/Sec: 0.54
|
| 342 |
+
[[34m2026-02-03 23:18:39[0m] (step=0032200) Train Loss: -3.6814, Train Steps/Sec: 0.54
|
| 343 |
+
[[34m2026-02-03 23:21:46[0m] (step=0032300) Train Loss: -3.6839, Train Steps/Sec: 0.54
|
| 344 |
+
[[34m2026-02-03 23:24:52[0m] (step=0032400) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 345 |
+
[[34m2026-02-03 23:27:59[0m] (step=0032500) Train Loss: -3.6809, Train Steps/Sec: 0.53
|
| 346 |
+
[[34m2026-02-03 23:28:48[0m] Beginning epoch 13...
|
| 347 |
+
[[34m2026-02-03 23:31:09[0m] (step=0032600) Train Loss: -3.6846, Train Steps/Sec: 0.53
|
| 348 |
+
[[34m2026-02-03 23:34:16[0m] (step=0032700) Train Loss: -3.6841, Train Steps/Sec: 0.53
|
| 349 |
+
[[34m2026-02-03 23:37:24[0m] (step=0032800) Train Loss: -3.6813, Train Steps/Sec: 0.53
|
| 350 |
+
[[34m2026-02-03 23:40:31[0m] (step=0032900) Train Loss: -3.6792, Train Steps/Sec: 0.53
|
| 351 |
+
[[34m2026-02-03 23:43:38[0m] (step=0033000) Train Loss: -3.6782, Train Steps/Sec: 0.53
|
| 352 |
+
[[34m2026-02-03 23:46:45[0m] (step=0033100) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 353 |
+
[[34m2026-02-03 23:49:52[0m] (step=0033200) Train Loss: -3.6819, Train Steps/Sec: 0.53
|
| 354 |
+
[[34m2026-02-03 23:52:59[0m] (step=0033300) Train Loss: -3.6793, Train Steps/Sec: 0.54
|
| 355 |
+
[[34m2026-02-03 23:56:06[0m] (step=0033400) Train Loss: -3.6810, Train Steps/Sec: 0.54
|
| 356 |
+
[[34m2026-02-03 23:59:13[0m] (step=0033500) Train Loss: -3.6816, Train Steps/Sec: 0.53
|
| 357 |
+
[[34m2026-02-04 00:02:20[0m] (step=0033600) Train Loss: -3.6831, Train Steps/Sec: 0.54
|
| 358 |
+
[[34m2026-02-04 00:05:26[0m] (step=0033700) Train Loss: -3.6831, Train Steps/Sec: 0.54
|
| 359 |
+
[[34m2026-02-04 00:08:33[0m] (step=0033800) Train Loss: -3.6826, Train Steps/Sec: 0.54
|
| 360 |
+
[[34m2026-02-04 00:11:40[0m] (step=0033900) Train Loss: -3.6804, Train Steps/Sec: 0.54
|
| 361 |
+
[[34m2026-02-04 00:14:46[0m] (step=0034000) Train Loss: -3.6789, Train Steps/Sec: 0.54
|
| 362 |
+
[[34m2026-02-04 00:17:54[0m] (step=0034100) Train Loss: -3.6814, Train Steps/Sec: 0.53
|
| 363 |
+
[[34m2026-02-04 00:21:00[0m] (step=0034200) Train Loss: -3.6805, Train Steps/Sec: 0.54
|
| 364 |
+
[[34m2026-02-04 00:24:07[0m] (step=0034300) Train Loss: -3.6837, Train Steps/Sec: 0.53
|
| 365 |
+
[[34m2026-02-04 00:27:14[0m] (step=0034400) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 366 |
+
[[34m2026-02-04 00:30:20[0m] (step=0034500) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 367 |
+
[[34m2026-02-04 00:33:27[0m] (step=0034600) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 368 |
+
[[34m2026-02-04 00:36:34[0m] (step=0034700) Train Loss: -3.6799, Train Steps/Sec: 0.54
|
| 369 |
+
[[34m2026-02-04 00:39:38[0m] (step=0034800) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 370 |
+
[[34m2026-02-04 00:42:45[0m] (step=0034900) Train Loss: -3.6820, Train Steps/Sec: 0.54
|
| 371 |
+
[[34m2026-02-04 00:45:52[0m] (step=0035000) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 372 |
+
[[34m2026-02-04 00:46:45[0m] Beginning epoch 14...
|
| 373 |
+
[[34m2026-02-04 00:49:01[0m] (step=0035100) Train Loss: -3.6794, Train Steps/Sec: 0.53
|
| 374 |
+
[[34m2026-02-04 00:52:08[0m] (step=0035200) Train Loss: -3.6804, Train Steps/Sec: 0.54
|
| 375 |
+
[[34m2026-02-04 00:55:15[0m] (step=0035300) Train Loss: -3.6825, Train Steps/Sec: 0.53
|
| 376 |
+
[[34m2026-02-04 00:58:22[0m] (step=0035400) Train Loss: -3.6817, Train Steps/Sec: 0.53
|
| 377 |
+
[[34m2026-02-04 01:01:29[0m] (step=0035500) Train Loss: -3.6840, Train Steps/Sec: 0.54
|
| 378 |
+
[[34m2026-02-04 01:04:35[0m] (step=0035600) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 379 |
+
[[34m2026-02-04 01:07:42[0m] (step=0035700) Train Loss: -3.6796, Train Steps/Sec: 0.53
|
| 380 |
+
[[34m2026-02-04 01:10:50[0m] (step=0035800) Train Loss: -3.6834, Train Steps/Sec: 0.53
|
| 381 |
+
[[34m2026-02-04 01:13:56[0m] (step=0035900) Train Loss: -3.6763, Train Steps/Sec: 0.54
|
| 382 |
+
[[34m2026-02-04 01:17:03[0m] (step=0036000) Train Loss: -3.6837, Train Steps/Sec: 0.53
|
| 383 |
+
[[34m2026-02-04 01:20:10[0m] (step=0036100) Train Loss: -3.6806, Train Steps/Sec: 0.53
|
| 384 |
+
[[34m2026-02-04 01:23:18[0m] (step=0036200) Train Loss: -3.6821, Train Steps/Sec: 0.53
|
| 385 |
+
[[34m2026-02-04 01:26:24[0m] (step=0036300) Train Loss: -3.6772, Train Steps/Sec: 0.54
|
| 386 |
+
[[34m2026-02-04 01:29:31[0m] (step=0036400) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 387 |
+
[[34m2026-02-04 01:32:38[0m] (step=0036500) Train Loss: -3.6816, Train Steps/Sec: 0.54
|
| 388 |
+
[[34m2026-02-04 01:35:45[0m] (step=0036600) Train Loss: -3.6792, Train Steps/Sec: 0.53
|
| 389 |
+
[[34m2026-02-04 01:38:51[0m] (step=0036700) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 390 |
+
[[34m2026-02-04 01:41:59[0m] (step=0036800) Train Loss: -3.6835, Train Steps/Sec: 0.53
|
| 391 |
+
[[34m2026-02-04 01:45:05[0m] (step=0036900) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 392 |
+
[[34m2026-02-04 01:48:12[0m] (step=0037000) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 393 |
+
[[34m2026-02-04 01:51:18[0m] (step=0037100) Train Loss: -3.6775, Train Steps/Sec: 0.54
|
| 394 |
+
[[34m2026-02-04 01:54:25[0m] (step=0037200) Train Loss: -3.6796, Train Steps/Sec: 0.54
|
| 395 |
+
[[34m2026-02-04 01:57:31[0m] (step=0037300) Train Loss: -3.6806, Train Steps/Sec: 0.54
|
| 396 |
+
[[34m2026-02-04 02:00:38[0m] (step=0037400) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 397 |
+
[[34m2026-02-04 02:03:43[0m] (step=0037500) Train Loss: -3.6808, Train Steps/Sec: 0.54
|
| 398 |
+
[[34m2026-02-04 02:04:39[0m] Beginning epoch 15...
|
| 399 |
+
[[34m2026-02-04 02:06:52[0m] (step=0037600) Train Loss: -3.6847, Train Steps/Sec: 0.53
|
| 400 |
+
[[34m2026-02-04 02:10:00[0m] (step=0037700) Train Loss: -3.6837, Train Steps/Sec: 0.53
|
| 401 |
+
[[34m2026-02-04 02:13:06[0m] (step=0037800) Train Loss: -3.6796, Train Steps/Sec: 0.54
|
| 402 |
+
[[34m2026-02-04 02:16:13[0m] (step=0037900) Train Loss: -3.6804, Train Steps/Sec: 0.54
|
| 403 |
+
[[34m2026-02-04 02:19:20[0m] (step=0038000) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 404 |
+
[[34m2026-02-04 02:22:26[0m] (step=0038100) Train Loss: -3.6803, Train Steps/Sec: 0.54
|
| 405 |
+
[[34m2026-02-04 02:25:33[0m] (step=0038200) Train Loss: -3.6813, Train Steps/Sec: 0.54
|
| 406 |
+
[[34m2026-02-04 02:28:40[0m] (step=0038300) Train Loss: -3.6798, Train Steps/Sec: 0.53
|
| 407 |
+
[[34m2026-02-04 02:31:47[0m] (step=0038400) Train Loss: -3.6797, Train Steps/Sec: 0.53
|
| 408 |
+
[[34m2026-02-04 02:34:54[0m] (step=0038500) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 409 |
+
[[34m2026-02-04 02:38:01[0m] (step=0038600) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 410 |
+
[[34m2026-02-04 02:41:08[0m] (step=0038700) Train Loss: -3.6824, Train Steps/Sec: 0.54
|
| 411 |
+
[[34m2026-02-04 02:44:14[0m] (step=0038800) Train Loss: -3.6800, Train Steps/Sec: 0.54
|
| 412 |
+
[[34m2026-02-04 02:47:22[0m] (step=0038900) Train Loss: -3.6812, Train Steps/Sec: 0.53
|
| 413 |
+
[[34m2026-02-04 02:50:28[0m] (step=0039000) Train Loss: -3.6826, Train Steps/Sec: 0.54
|
| 414 |
+
[[34m2026-02-04 02:53:35[0m] (step=0039100) Train Loss: -3.6807, Train Steps/Sec: 0.53
|
| 415 |
+
[[34m2026-02-04 02:56:42[0m] (step=0039200) Train Loss: -3.6831, Train Steps/Sec: 0.54
|
| 416 |
+
[[34m2026-02-04 02:59:48[0m] (step=0039300) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 417 |
+
[[34m2026-02-04 03:02:55[0m] (step=0039400) Train Loss: -3.6803, Train Steps/Sec: 0.54
|
| 418 |
+
[[34m2026-02-04 03:06:01[0m] (step=0039500) Train Loss: -3.6815, Train Steps/Sec: 0.54
|
| 419 |
+
[[34m2026-02-04 03:09:08[0m] (step=0039600) Train Loss: -3.6830, Train Steps/Sec: 0.53
|
| 420 |
+
[[34m2026-02-04 03:12:15[0m] (step=0039700) Train Loss: -3.6771, Train Steps/Sec: 0.54
|
| 421 |
+
[[34m2026-02-04 03:15:21[0m] (step=0039800) Train Loss: -3.6791, Train Steps/Sec: 0.54
|
| 422 |
+
[[34m2026-02-04 03:18:28[0m] (step=0039900) Train Loss: -3.6797, Train Steps/Sec: 0.54
|
| 423 |
+
[[34m2026-02-04 03:21:34[0m] (step=0040000) Train Loss: -3.6815, Train Steps/Sec: 0.54
|
| 424 |
+
[[34m2026-02-04 03:22:33[0m] Beginning epoch 16...
|
| 425 |
+
[[34m2026-02-04 03:24:43[0m] (step=0040100) Train Loss: -3.6799, Train Steps/Sec: 0.53
|
| 426 |
+
[[34m2026-02-04 03:27:50[0m] (step=0040200) Train Loss: -3.6823, Train Steps/Sec: 0.53
|
| 427 |
+
[[34m2026-02-04 03:30:57[0m] (step=0040300) Train Loss: -3.6805, Train Steps/Sec: 0.53
|
| 428 |
+
[[34m2026-02-04 03:34:04[0m] (step=0040400) Train Loss: -3.6829, Train Steps/Sec: 0.54
|
| 429 |
+
[[34m2026-02-04 03:37:11[0m] (step=0040500) Train Loss: -3.6786, Train Steps/Sec: 0.53
|
| 430 |
+
[[34m2026-02-04 03:40:18[0m] (step=0040600) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 431 |
+
[[34m2026-02-04 03:43:24[0m] (step=0040700) Train Loss: -3.6804, Train Steps/Sec: 0.54
|
| 432 |
+
[[34m2026-02-04 03:46:32[0m] (step=0040800) Train Loss: -3.6860, Train Steps/Sec: 0.53
|
| 433 |
+
[[34m2026-02-04 03:49:38[0m] (step=0040900) Train Loss: -3.6804, Train Steps/Sec: 0.54
|
| 434 |
+
[[34m2026-02-04 03:52:44[0m] (step=0041000) Train Loss: -3.6803, Train Steps/Sec: 0.54
|
| 435 |
+
[[34m2026-02-04 03:55:52[0m] (step=0041100) Train Loss: -3.6803, Train Steps/Sec: 0.53
|
| 436 |
+
[[34m2026-02-04 03:58:59[0m] (step=0041200) Train Loss: -3.6801, Train Steps/Sec: 0.53
|
| 437 |
+
[[34m2026-02-04 04:02:06[0m] (step=0041300) Train Loss: -3.6794, Train Steps/Sec: 0.53
|
| 438 |
+
[[34m2026-02-04 04:05:14[0m] (step=0041400) Train Loss: -3.6816, Train Steps/Sec: 0.53
|
| 439 |
+
[[34m2026-02-04 04:08:20[0m] (step=0041500) Train Loss: -3.6858, Train Steps/Sec: 0.54
|
| 440 |
+
[[34m2026-02-04 04:11:27[0m] (step=0041600) Train Loss: -3.6811, Train Steps/Sec: 0.53
|
| 441 |
+
[[34m2026-02-04 04:14:34[0m] (step=0041700) Train Loss: -3.6859, Train Steps/Sec: 0.53
|
| 442 |
+
[[34m2026-02-04 04:17:41[0m] (step=0041800) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 443 |
+
[[34m2026-02-04 04:20:47[0m] (step=0041900) Train Loss: -3.6838, Train Steps/Sec: 0.54
|
| 444 |
+
[[34m2026-02-04 04:23:54[0m] (step=0042000) Train Loss: -3.6809, Train Steps/Sec: 0.54
|
| 445 |
+
[[34m2026-02-04 04:27:00[0m] (step=0042100) Train Loss: -3.6781, Train Steps/Sec: 0.54
|
| 446 |
+
[[34m2026-02-04 04:30:07[0m] (step=0042200) Train Loss: -3.6826, Train Steps/Sec: 0.54
|
| 447 |
+
[[34m2026-02-04 04:33:13[0m] (step=0042300) Train Loss: -3.6835, Train Steps/Sec: 0.54
|
| 448 |
+
[[34m2026-02-04 04:36:20[0m] (step=0042400) Train Loss: -3.6816, Train Steps/Sec: 0.54
|
| 449 |
+
[[34m2026-02-04 04:39:27[0m] (step=0042500) Train Loss: -3.6802, Train Steps/Sec: 0.53
|
| 450 |
+
[[34m2026-02-04 04:40:31[0m] Beginning epoch 17...
|
| 451 |
+
[[34m2026-02-04 04:42:37[0m] (step=0042600) Train Loss: -3.6831, Train Steps/Sec: 0.53
|
| 452 |
+
[[34m2026-02-04 04:45:42[0m] (step=0042700) Train Loss: -3.6778, Train Steps/Sec: 0.54
|
| 453 |
+
[[34m2026-02-04 04:48:48[0m] (step=0042800) Train Loss: -3.6846, Train Steps/Sec: 0.54
|
| 454 |
+
[[34m2026-02-04 04:51:55[0m] (step=0042900) Train Loss: -3.6827, Train Steps/Sec: 0.53
|
| 455 |
+
[[34m2026-02-04 04:55:02[0m] (step=0043000) Train Loss: -3.6820, Train Steps/Sec: 0.54
|
| 456 |
+
[[34m2026-02-04 04:58:08[0m] (step=0043100) Train Loss: -3.6803, Train Steps/Sec: 0.54
|
| 457 |
+
[[34m2026-02-04 05:01:15[0m] (step=0043200) Train Loss: -3.6808, Train Steps/Sec: 0.54
|
| 458 |
+
[[34m2026-02-04 05:04:22[0m] (step=0043300) Train Loss: -3.6838, Train Steps/Sec: 0.53
|
| 459 |
+
[[34m2026-02-04 05:07:29[0m] (step=0043400) Train Loss: -3.6809, Train Steps/Sec: 0.54
|
| 460 |
+
[[34m2026-02-04 05:10:36[0m] (step=0043500) Train Loss: -3.6757, Train Steps/Sec: 0.53
|
| 461 |
+
[[34m2026-02-04 05:13:43[0m] (step=0043600) Train Loss: -3.6808, Train Steps/Sec: 0.54
|
| 462 |
+
[[34m2026-02-04 05:16:50[0m] (step=0043700) Train Loss: -3.6807, Train Steps/Sec: 0.54
|
| 463 |
+
[[34m2026-02-04 05:19:56[0m] (step=0043800) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 464 |
+
[[34m2026-02-04 05:23:03[0m] (step=0043900) Train Loss: -3.6811, Train Steps/Sec: 0.53
|
| 465 |
+
[[34m2026-02-04 05:26:10[0m] (step=0044000) Train Loss: -3.6819, Train Steps/Sec: 0.54
|
| 466 |
+
[[34m2026-02-04 05:29:17[0m] (step=0044100) Train Loss: -3.6801, Train Steps/Sec: 0.54
|
| 467 |
+
[[34m2026-02-04 05:32:24[0m] (step=0044200) Train Loss: -3.6785, Train Steps/Sec: 0.54
|
| 468 |
+
[[34m2026-02-04 05:35:31[0m] (step=0044300) Train Loss: -3.6841, Train Steps/Sec: 0.53
|
| 469 |
+
[[34m2026-02-04 05:38:38[0m] (step=0044400) Train Loss: -3.6841, Train Steps/Sec: 0.53
|
| 470 |
+
[[34m2026-02-04 05:41:01[0m] (step=0044500) Train Loss: -3.6791, Train Steps/Sec: 0.70
|
| 471 |
+
[[34m2026-02-04 05:42:24[0m] (step=0044600) Train Loss: -3.6843, Train Steps/Sec: 1.20
|
| 472 |
+
[[34m2026-02-04 05:43:47[0m] (step=0044700) Train Loss: -3.6815, Train Steps/Sec: 1.21
|
| 473 |
+
[[34m2026-02-04 05:45:10[0m] (step=0044800) Train Loss: -3.6785, Train Steps/Sec: 1.21
|
| 474 |
+
[[34m2026-02-04 05:46:33[0m] (step=0044900) Train Loss: -3.6820, Train Steps/Sec: 1.21
|
| 475 |
+
[[34m2026-02-04 05:47:56[0m] (step=0045000) Train Loss: -3.6847, Train Steps/Sec: 1.20
|
| 476 |
+
[[34m2026-02-04 05:48:26[0m] Beginning epoch 18...
|
| 477 |
+
[[34m2026-02-04 05:49:22[0m] (step=0045100) Train Loss: -3.6816, Train Steps/Sec: 1.16
|
| 478 |
+
[[34m2026-02-04 05:50:45[0m] (step=0045200) Train Loss: -3.6834, Train Steps/Sec: 1.20
|
| 479 |
+
[[34m2026-02-04 05:52:08[0m] (step=0045300) Train Loss: -3.6787, Train Steps/Sec: 1.21
|
| 480 |
+
[[34m2026-02-04 05:53:31[0m] (step=0045400) Train Loss: -3.6844, Train Steps/Sec: 1.20
|
| 481 |
+
[[34m2026-02-04 05:54:54[0m] (step=0045500) Train Loss: -3.6823, Train Steps/Sec: 1.20
|
| 482 |
+
[[34m2026-02-04 05:56:17[0m] (step=0045600) Train Loss: -3.6806, Train Steps/Sec: 1.20
|
| 483 |
+
[[34m2026-02-04 05:57:40[0m] (step=0045700) Train Loss: -3.6797, Train Steps/Sec: 1.21
|
| 484 |
+
[[34m2026-02-04 05:59:03[0m] (step=0045800) Train Loss: -3.6819, Train Steps/Sec: 1.20
|
| 485 |
+
[[34m2026-02-04 06:00:26[0m] (step=0045900) Train Loss: -3.6807, Train Steps/Sec: 1.20
|
| 486 |
+
[[34m2026-02-04 06:01:49[0m] (step=0046000) Train Loss: -3.6814, Train Steps/Sec: 1.21
|
| 487 |
+
[[34m2026-02-04 06:03:12[0m] (step=0046100) Train Loss: -3.6827, Train Steps/Sec: 1.21
|
| 488 |
+
[[34m2026-02-04 06:04:35[0m] (step=0046200) Train Loss: -3.6824, Train Steps/Sec: 1.20
|
| 489 |
+
[[34m2026-02-04 06:05:58[0m] (step=0046300) Train Loss: -3.6825, Train Steps/Sec: 1.20
|
| 490 |
+
[[34m2026-02-04 06:07:21[0m] (step=0046400) Train Loss: -3.6826, Train Steps/Sec: 1.20
|
| 491 |
+
[[34m2026-02-04 06:08:44[0m] (step=0046500) Train Loss: -3.6778, Train Steps/Sec: 1.20
|
| 492 |
+
[[34m2026-02-04 06:10:07[0m] (step=0046600) Train Loss: -3.6820, Train Steps/Sec: 1.20
|
| 493 |
+
[[34m2026-02-04 06:11:30[0m] (step=0046700) Train Loss: -3.6830, Train Steps/Sec: 1.21
|
| 494 |
+
[[34m2026-02-04 06:12:53[0m] (step=0046800) Train Loss: -3.6808, Train Steps/Sec: 1.20
|
| 495 |
+
[[34m2026-02-04 06:14:16[0m] (step=0046900) Train Loss: -3.6812, Train Steps/Sec: 1.20
|
| 496 |
+
[[34m2026-02-04 06:15:39[0m] (step=0047000) Train Loss: -3.6836, Train Steps/Sec: 1.20
|
| 497 |
+
[[34m2026-02-04 06:17:02[0m] (step=0047100) Train Loss: -3.6806, Train Steps/Sec: 1.20
|
| 498 |
+
[[34m2026-02-04 06:18:25[0m] (step=0047200) Train Loss: -3.6813, Train Steps/Sec: 1.20
|
| 499 |
+
[[34m2026-02-04 06:19:48[0m] (step=0047300) Train Loss: -3.6828, Train Steps/Sec: 1.20
|
| 500 |
+
[[34m2026-02-04 06:21:11[0m] (step=0047400) Train Loss: -3.6842, Train Steps/Sec: 1.21
|
Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-003-SiT-XL-2-GVP-velocity-None/log.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[[34m2026-02-03 06:53:41[0m] Experiment directory created at results_256_gvp_disp/depth-mu-2-003-SiT-XL-2-GVP-velocity-None
|
| 2 |
+
[[34m2026-02-03 06:54:17[0m] Combined_model Parameters: 729,629,632
|
| 3 |
+
[[34m2026-02-03 06:54:17[0m] Total trainable parameters: 53,910,176
|
| 4 |
+
[[34m2026-02-03 06:54:19[0m] Dataset contains 1,281,167 images (/gemini/platform/public/zhaozy/hzh/datasets/Imagenet/train/)
|
| 5 |
+
[[34m2026-02-03 06:54:19[0m] Training for 100000 epochs...
|
| 6 |
+
[[34m2026-02-03 06:54:19[0m] Beginning epoch 0...
|
Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None/log.txt
ADDED
|
@@ -0,0 +1,863 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[[34m2026-02-03 06:55:12[0m] Experiment directory created at results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None
|
| 2 |
+
[[34m2026-02-03 06:55:47[0m] Combined_model Parameters: 729,629,632
|
| 3 |
+
[[34m2026-02-03 06:55:47[0m] Total trainable parameters: 53,910,176
|
| 4 |
+
[[34m2026-02-03 06:55:50[0m] Dataset contains 1,281,167 images (/gemini/platform/public/zhaozy/hzh/datasets/Imagenet/train/)
|
| 5 |
+
[[34m2026-02-03 06:55:50[0m] Training for 100000 epochs...
|
| 6 |
+
[[34m2026-02-03 06:55:50[0m] Beginning epoch 0...
|
| 7 |
+
[[34m2026-02-03 06:57:30[0m] (step=0000100) Train Loss: -2.4789, Train Steps/Sec: 1.00
|
| 8 |
+
[[34m2026-02-03 06:59:08[0m] (step=0000200) Train Loss: -2.9649, Train Steps/Sec: 1.02
|
| 9 |
+
[[34m2026-02-03 07:00:47[0m] (step=0000300) Train Loss: -2.9777, Train Steps/Sec: 1.01
|
| 10 |
+
[[34m2026-02-03 07:02:27[0m] (step=0000400) Train Loss: -2.9828, Train Steps/Sec: 1.00
|
| 11 |
+
[[34m2026-02-03 07:04:08[0m] (step=0000500) Train Loss: -2.9877, Train Steps/Sec: 0.99
|
| 12 |
+
[[34m2026-02-03 07:05:49[0m] (step=0000600) Train Loss: -2.9875, Train Steps/Sec: 0.99
|
| 13 |
+
[[34m2026-02-03 07:07:28[0m] (step=0000700) Train Loss: -2.9882, Train Steps/Sec: 1.01
|
| 14 |
+
[[34m2026-02-03 07:09:08[0m] (step=0000800) Train Loss: -2.9861, Train Steps/Sec: 1.00
|
| 15 |
+
[[34m2026-02-03 07:10:49[0m] (step=0000900) Train Loss: -2.9862, Train Steps/Sec: 0.99
|
| 16 |
+
[[34m2026-02-03 07:12:30[0m] (step=0001000) Train Loss: -2.9886, Train Steps/Sec: 0.99
|
| 17 |
+
[[34m2026-02-03 07:14:12[0m] (step=0001100) Train Loss: -2.9849, Train Steps/Sec: 0.98
|
| 18 |
+
[[34m2026-02-03 07:18:10[0m] (step=0001200) Train Loss: -2.9885, Train Steps/Sec: 0.42
|
| 19 |
+
[[34m2026-02-03 07:20:07[0m] (step=0001300) Train Loss: -2.9864, Train Steps/Sec: 0.85
|
| 20 |
+
[[34m2026-02-03 07:21:45[0m] (step=0001400) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 21 |
+
[[34m2026-02-03 07:23:22[0m] (step=0001500) Train Loss: -2.9863, Train Steps/Sec: 1.03
|
| 22 |
+
[[34m2026-02-03 07:25:00[0m] (step=0001600) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 23 |
+
[[34m2026-02-03 07:26:37[0m] (step=0001700) Train Loss: -2.9930, Train Steps/Sec: 1.03
|
| 24 |
+
[[34m2026-02-03 07:28:14[0m] (step=0001800) Train Loss: -2.9892, Train Steps/Sec: 1.03
|
| 25 |
+
[[34m2026-02-03 07:29:52[0m] (step=0001900) Train Loss: -2.9881, Train Steps/Sec: 1.02
|
| 26 |
+
[[34m2026-02-03 07:31:30[0m] (step=0002000) Train Loss: -2.9857, Train Steps/Sec: 1.03
|
| 27 |
+
[[34m2026-02-03 07:33:07[0m] (step=0002100) Train Loss: -2.9902, Train Steps/Sec: 1.02
|
| 28 |
+
[[34m2026-02-03 07:34:45[0m] (step=0002200) Train Loss: -2.9829, Train Steps/Sec: 1.03
|
| 29 |
+
[[34m2026-02-03 07:36:23[0m] (step=0002300) Train Loss: -2.9862, Train Steps/Sec: 1.02
|
| 30 |
+
[[34m2026-02-03 07:38:00[0m] (step=0002400) Train Loss: -2.9895, Train Steps/Sec: 1.03
|
| 31 |
+
[[34m2026-02-03 07:39:37[0m] (step=0002500) Train Loss: -2.9878, Train Steps/Sec: 1.03
|
| 32 |
+
[[34m2026-02-03 07:41:15[0m] (step=0002600) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 33 |
+
[[34m2026-02-03 07:42:53[0m] (step=0002700) Train Loss: -2.9906, Train Steps/Sec: 1.02
|
| 34 |
+
[[34m2026-02-03 07:44:31[0m] (step=0002800) Train Loss: -2.9915, Train Steps/Sec: 1.02
|
| 35 |
+
[[34m2026-02-03 07:46:09[0m] (step=0002900) Train Loss: -2.9871, Train Steps/Sec: 1.02
|
| 36 |
+
[[34m2026-02-03 07:47:47[0m] (step=0003000) Train Loss: -2.9850, Train Steps/Sec: 1.03
|
| 37 |
+
[[34m2026-02-03 07:49:25[0m] (step=0003100) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 38 |
+
[[34m2026-02-03 07:51:03[0m] (step=0003200) Train Loss: -2.9903, Train Steps/Sec: 1.02
|
| 39 |
+
[[34m2026-02-03 07:52:41[0m] (step=0003300) Train Loss: -2.9943, Train Steps/Sec: 1.02
|
| 40 |
+
[[34m2026-02-03 07:54:15[0m] (step=0003400) Train Loss: -2.9891, Train Steps/Sec: 1.06
|
| 41 |
+
[[34m2026-02-03 07:55:53[0m] (step=0003500) Train Loss: -2.9845, Train Steps/Sec: 1.03
|
| 42 |
+
[[34m2026-02-03 07:57:30[0m] (step=0003600) Train Loss: -2.9919, Train Steps/Sec: 1.02
|
| 43 |
+
[[34m2026-02-03 07:59:08[0m] (step=0003700) Train Loss: -2.9916, Train Steps/Sec: 1.03
|
| 44 |
+
[[34m2026-02-03 08:00:46[0m] (step=0003800) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 45 |
+
[[34m2026-02-03 08:02:23[0m] (step=0003900) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 46 |
+
[[34m2026-02-03 08:04:01[0m] (step=0004000) Train Loss: -2.9929, Train Steps/Sec: 1.02
|
| 47 |
+
[[34m2026-02-03 08:05:39[0m] (step=0004100) Train Loss: -2.9882, Train Steps/Sec: 1.02
|
| 48 |
+
[[34m2026-02-03 08:07:17[0m] (step=0004200) Train Loss: -2.9859, Train Steps/Sec: 1.02
|
| 49 |
+
[[34m2026-02-03 08:08:54[0m] (step=0004300) Train Loss: -2.9849, Train Steps/Sec: 1.02
|
| 50 |
+
[[34m2026-02-03 08:10:32[0m] (step=0004400) Train Loss: -2.9854, Train Steps/Sec: 1.02
|
| 51 |
+
[[34m2026-02-03 08:12:10[0m] (step=0004500) Train Loss: -2.9904, Train Steps/Sec: 1.02
|
| 52 |
+
[[34m2026-02-03 08:13:47[0m] (step=0004600) Train Loss: -2.9874, Train Steps/Sec: 1.03
|
| 53 |
+
[[34m2026-02-03 08:15:26[0m] (step=0004700) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 54 |
+
[[34m2026-02-03 08:17:04[0m] (step=0004800) Train Loss: -2.9844, Train Steps/Sec: 1.02
|
| 55 |
+
[[34m2026-02-03 08:18:41[0m] (step=0004900) Train Loss: -2.9825, Train Steps/Sec: 1.02
|
| 56 |
+
[[34m2026-02-03 08:20:19[0m] (step=0005000) Train Loss: -2.9846, Train Steps/Sec: 1.02
|
| 57 |
+
[[34m2026-02-03 08:20:24[0m] Beginning epoch 1...
|
| 58 |
+
[[34m2026-02-03 08:21:59[0m] (step=0005100) Train Loss: -2.9935, Train Steps/Sec: 1.00
|
| 59 |
+
[[34m2026-02-03 08:23:37[0m] (step=0005200) Train Loss: -2.9902, Train Steps/Sec: 1.02
|
| 60 |
+
[[34m2026-02-03 08:25:15[0m] (step=0005300) Train Loss: -2.9927, Train Steps/Sec: 1.02
|
| 61 |
+
[[34m2026-02-03 08:26:53[0m] (step=0005400) Train Loss: -2.9865, Train Steps/Sec: 1.02
|
| 62 |
+
[[34m2026-02-03 08:28:31[0m] (step=0005500) Train Loss: -2.9877, Train Steps/Sec: 1.02
|
| 63 |
+
[[34m2026-02-03 08:30:09[0m] (step=0005600) Train Loss: -2.9912, Train Steps/Sec: 1.02
|
| 64 |
+
[[34m2026-02-03 08:31:46[0m] (step=0005700) Train Loss: -2.9920, Train Steps/Sec: 1.03
|
| 65 |
+
[[34m2026-02-03 08:33:24[0m] (step=0005800) Train Loss: -2.9866, Train Steps/Sec: 1.02
|
| 66 |
+
[[34m2026-02-03 08:35:02[0m] (step=0005900) Train Loss: -2.9884, Train Steps/Sec: 1.02
|
| 67 |
+
[[34m2026-02-03 08:36:39[0m] (step=0006000) Train Loss: -2.9900, Train Steps/Sec: 1.03
|
| 68 |
+
[[34m2026-02-03 08:38:17[0m] (step=0006100) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 69 |
+
[[34m2026-02-03 08:39:55[0m] (step=0006200) Train Loss: -2.9904, Train Steps/Sec: 1.02
|
| 70 |
+
[[34m2026-02-03 08:41:33[0m] (step=0006300) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 71 |
+
[[34m2026-02-03 08:43:11[0m] (step=0006400) Train Loss: -2.9901, Train Steps/Sec: 1.02
|
| 72 |
+
[[34m2026-02-03 08:44:49[0m] (step=0006500) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 73 |
+
[[34m2026-02-03 08:46:27[0m] (step=0006600) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 74 |
+
[[34m2026-02-03 08:48:05[0m] (step=0006700) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 75 |
+
[[34m2026-02-03 08:49:43[0m] (step=0006800) Train Loss: -2.9873, Train Steps/Sec: 1.02
|
| 76 |
+
[[34m2026-02-03 08:51:20[0m] (step=0006900) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 77 |
+
[[34m2026-02-03 08:52:58[0m] (step=0007000) Train Loss: -2.9848, Train Steps/Sec: 1.02
|
| 78 |
+
[[34m2026-02-03 08:54:36[0m] (step=0007100) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 79 |
+
[[34m2026-02-03 08:56:13[0m] (step=0007200) Train Loss: -2.9936, Train Steps/Sec: 1.02
|
| 80 |
+
[[34m2026-02-03 08:57:51[0m] (step=0007300) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 81 |
+
[[34m2026-02-03 08:59:29[0m] (step=0007400) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 82 |
+
[[34m2026-02-03 09:01:07[0m] (step=0007500) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 83 |
+
[[34m2026-02-03 09:02:45[0m] (step=0007600) Train Loss: -2.9875, Train Steps/Sec: 1.03
|
| 84 |
+
[[34m2026-02-03 09:04:22[0m] (step=0007700) Train Loss: -2.9918, Train Steps/Sec: 1.02
|
| 85 |
+
[[34m2026-02-03 09:06:01[0m] (step=0007800) Train Loss: -2.9859, Train Steps/Sec: 1.02
|
| 86 |
+
[[34m2026-02-03 09:07:39[0m] (step=0007900) Train Loss: -2.9846, Train Steps/Sec: 1.02
|
| 87 |
+
[[34m2026-02-03 09:09:16[0m] (step=0008000) Train Loss: -2.9873, Train Steps/Sec: 1.02
|
| 88 |
+
[[34m2026-02-03 09:10:54[0m] (step=0008100) Train Loss: -2.9913, Train Steps/Sec: 1.02
|
| 89 |
+
[[34m2026-02-03 09:12:28[0m] (step=0008200) Train Loss: -2.9826, Train Steps/Sec: 1.07
|
| 90 |
+
[[34m2026-02-03 09:14:06[0m] (step=0008300) Train Loss: -2.9896, Train Steps/Sec: 1.02
|
| 91 |
+
[[34m2026-02-03 09:15:44[0m] (step=0008400) Train Loss: -2.9933, Train Steps/Sec: 1.03
|
| 92 |
+
[[34m2026-02-03 09:17:21[0m] (step=0008500) Train Loss: -2.9858, Train Steps/Sec: 1.02
|
| 93 |
+
[[34m2026-02-03 09:18:59[0m] (step=0008600) Train Loss: -2.9896, Train Steps/Sec: 1.02
|
| 94 |
+
[[34m2026-02-03 09:20:37[0m] (step=0008700) Train Loss: -2.9877, Train Steps/Sec: 1.02
|
| 95 |
+
[[34m2026-02-03 09:22:15[0m] (step=0008800) Train Loss: -2.9901, Train Steps/Sec: 1.02
|
| 96 |
+
[[34m2026-02-03 09:23:53[0m] (step=0008900) Train Loss: -2.9884, Train Steps/Sec: 1.02
|
| 97 |
+
[[34m2026-02-03 09:25:31[0m] (step=0009000) Train Loss: -2.9896, Train Steps/Sec: 1.02
|
| 98 |
+
[[34m2026-02-03 09:27:09[0m] (step=0009100) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 99 |
+
[[34m2026-02-03 09:28:47[0m] (step=0009200) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 100 |
+
[[34m2026-02-03 09:30:24[0m] (step=0009300) Train Loss: -2.9888, Train Steps/Sec: 1.03
|
| 101 |
+
[[34m2026-02-03 09:32:02[0m] (step=0009400) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 102 |
+
[[34m2026-02-03 09:33:40[0m] (step=0009500) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 103 |
+
[[34m2026-02-03 09:35:18[0m] (step=0009600) Train Loss: -2.9820, Train Steps/Sec: 1.02
|
| 104 |
+
[[34m2026-02-03 09:36:56[0m] (step=0009700) Train Loss: -2.9845, Train Steps/Sec: 1.02
|
| 105 |
+
[[34m2026-02-03 09:38:34[0m] (step=0009800) Train Loss: -2.9893, Train Steps/Sec: 1.02
|
| 106 |
+
[[34m2026-02-03 09:40:12[0m] (step=0009900) Train Loss: -2.9918, Train Steps/Sec: 1.02
|
| 107 |
+
[[34m2026-02-03 09:41:50[0m] (step=0010000) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 108 |
+
[[34m2026-02-03 09:41:58[0m] Beginning epoch 2...
|
| 109 |
+
[[34m2026-02-03 09:43:30[0m] (step=0010100) Train Loss: -2.9883, Train Steps/Sec: 1.00
|
| 110 |
+
[[34m2026-02-03 09:45:08[0m] (step=0010200) Train Loss: -2.9871, Train Steps/Sec: 1.02
|
| 111 |
+
[[34m2026-02-03 09:46:45[0m] (step=0010300) Train Loss: -2.9880, Train Steps/Sec: 1.03
|
| 112 |
+
[[34m2026-02-03 09:48:22[0m] (step=0010400) Train Loss: -2.9866, Train Steps/Sec: 1.02
|
| 113 |
+
[[34m2026-02-03 09:50:00[0m] (step=0010500) Train Loss: -2.9857, Train Steps/Sec: 1.02
|
| 114 |
+
[[34m2026-02-03 09:51:38[0m] (step=0010600) Train Loss: -2.9888, Train Steps/Sec: 1.02
|
| 115 |
+
[[34m2026-02-03 09:53:16[0m] (step=0010700) Train Loss: -2.9913, Train Steps/Sec: 1.02
|
| 116 |
+
[[34m2026-02-03 09:54:55[0m] (step=0010800) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 117 |
+
[[34m2026-02-03 09:56:33[0m] (step=0010900) Train Loss: -2.9904, Train Steps/Sec: 1.02
|
| 118 |
+
[[34m2026-02-03 09:58:11[0m] (step=0011000) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 119 |
+
[[34m2026-02-03 09:59:49[0m] (step=0011100) Train Loss: -2.9910, Train Steps/Sec: 1.02
|
| 120 |
+
[[34m2026-02-03 10:01:27[0m] (step=0011200) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 121 |
+
[[34m2026-02-03 10:03:06[0m] (step=0011300) Train Loss: -2.9864, Train Steps/Sec: 1.01
|
| 122 |
+
[[34m2026-02-03 10:04:43[0m] (step=0011400) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 123 |
+
[[34m2026-02-03 10:06:21[0m] (step=0011500) Train Loss: -2.9933, Train Steps/Sec: 1.02
|
| 124 |
+
[[34m2026-02-03 10:08:00[0m] (step=0011600) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 125 |
+
[[34m2026-02-03 10:09:37[0m] (step=0011700) Train Loss: -2.9908, Train Steps/Sec: 1.02
|
| 126 |
+
[[34m2026-02-03 10:11:16[0m] (step=0011800) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 127 |
+
[[34m2026-02-03 10:12:54[0m] (step=0011900) Train Loss: -2.9866, Train Steps/Sec: 1.02
|
| 128 |
+
[[34m2026-02-03 10:14:32[0m] (step=0012000) Train Loss: -2.9896, Train Steps/Sec: 1.02
|
| 129 |
+
[[34m2026-02-03 10:16:10[0m] (step=0012100) Train Loss: -2.9866, Train Steps/Sec: 1.02
|
| 130 |
+
[[34m2026-02-03 10:17:48[0m] (step=0012200) Train Loss: -2.9893, Train Steps/Sec: 1.02
|
| 131 |
+
[[34m2026-02-03 10:19:26[0m] (step=0012300) Train Loss: -2.9856, Train Steps/Sec: 1.02
|
| 132 |
+
[[34m2026-02-03 10:21:04[0m] (step=0012400) Train Loss: -2.9944, Train Steps/Sec: 1.02
|
| 133 |
+
[[34m2026-02-03 10:22:41[0m] (step=0012500) Train Loss: -2.9854, Train Steps/Sec: 1.02
|
| 134 |
+
[[34m2026-02-03 10:24:20[0m] (step=0012600) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 135 |
+
[[34m2026-02-03 10:25:57[0m] (step=0012700) Train Loss: -2.9851, Train Steps/Sec: 1.02
|
| 136 |
+
[[34m2026-02-03 10:27:35[0m] (step=0012800) Train Loss: -2.9892, Train Steps/Sec: 1.02
|
| 137 |
+
[[34m2026-02-03 10:29:13[0m] (step=0012900) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 138 |
+
[[34m2026-02-03 10:30:47[0m] (step=0013000) Train Loss: -2.9892, Train Steps/Sec: 1.06
|
| 139 |
+
[[34m2026-02-03 10:32:25[0m] (step=0013100) Train Loss: -2.9854, Train Steps/Sec: 1.02
|
| 140 |
+
[[34m2026-02-03 10:34:02[0m] (step=0013200) Train Loss: -2.9860, Train Steps/Sec: 1.03
|
| 141 |
+
[[34m2026-02-03 10:35:40[0m] (step=0013300) Train Loss: -2.9888, Train Steps/Sec: 1.02
|
| 142 |
+
[[34m2026-02-03 10:37:18[0m] (step=0013400) Train Loss: -2.9860, Train Steps/Sec: 1.02
|
| 143 |
+
[[34m2026-02-03 10:38:56[0m] (step=0013500) Train Loss: -2.9910, Train Steps/Sec: 1.03
|
| 144 |
+
[[34m2026-02-03 10:40:33[0m] (step=0013600) Train Loss: -2.9834, Train Steps/Sec: 1.02
|
| 145 |
+
[[34m2026-02-03 10:42:11[0m] (step=0013700) Train Loss: -2.9847, Train Steps/Sec: 1.02
|
| 146 |
+
[[34m2026-02-03 10:43:49[0m] (step=0013800) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 147 |
+
[[34m2026-02-03 10:45:27[0m] (step=0013900) Train Loss: -2.9884, Train Steps/Sec: 1.02
|
| 148 |
+
[[34m2026-02-03 10:47:05[0m] (step=0014000) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 149 |
+
[[34m2026-02-03 10:48:42[0m] (step=0014100) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 150 |
+
[[34m2026-02-03 10:50:20[0m] (step=0014200) Train Loss: -2.9885, Train Steps/Sec: 1.02
|
| 151 |
+
[[34m2026-02-03 10:51:58[0m] (step=0014300) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 152 |
+
[[34m2026-02-03 10:53:35[0m] (step=0014400) Train Loss: -2.9889, Train Steps/Sec: 1.03
|
| 153 |
+
[[34m2026-02-03 10:55:13[0m] (step=0014500) Train Loss: -2.9893, Train Steps/Sec: 1.02
|
| 154 |
+
[[34m2026-02-03 10:56:51[0m] (step=0014600) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 155 |
+
[[34m2026-02-03 10:58:28[0m] (step=0014700) Train Loss: -2.9864, Train Steps/Sec: 1.03
|
| 156 |
+
[[34m2026-02-03 11:00:06[0m] (step=0014800) Train Loss: -2.9927, Train Steps/Sec: 1.02
|
| 157 |
+
[[34m2026-02-03 11:01:43[0m] (step=0014900) Train Loss: -2.9881, Train Steps/Sec: 1.03
|
| 158 |
+
[[34m2026-02-03 11:03:20[0m] (step=0015000) Train Loss: -2.9892, Train Steps/Sec: 1.03
|
| 159 |
+
[[34m2026-02-03 11:03:33[0m] Beginning epoch 3...
|
| 160 |
+
[[34m2026-02-03 11:05:01[0m] (step=0015100) Train Loss: -2.9843, Train Steps/Sec: 1.00
|
| 161 |
+
[[34m2026-02-03 11:06:39[0m] (step=0015200) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 162 |
+
[[34m2026-02-03 11:08:16[0m] (step=0015300) Train Loss: -2.9872, Train Steps/Sec: 1.03
|
| 163 |
+
[[34m2026-02-03 11:09:54[0m] (step=0015400) Train Loss: -2.9896, Train Steps/Sec: 1.03
|
| 164 |
+
[[34m2026-02-03 11:11:32[0m] (step=0015500) Train Loss: -2.9881, Train Steps/Sec: 1.02
|
| 165 |
+
[[34m2026-02-03 11:13:10[0m] (step=0015600) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 166 |
+
[[34m2026-02-03 11:14:48[0m] (step=0015700) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 167 |
+
[[34m2026-02-03 11:16:25[0m] (step=0015800) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 168 |
+
[[34m2026-02-03 11:18:04[0m] (step=0015900) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 169 |
+
[[34m2026-02-03 11:19:42[0m] (step=0016000) Train Loss: -2.9963, Train Steps/Sec: 1.02
|
| 170 |
+
[[34m2026-02-03 11:21:19[0m] (step=0016100) Train Loss: -2.9911, Train Steps/Sec: 1.02
|
| 171 |
+
[[34m2026-02-03 11:22:57[0m] (step=0016200) Train Loss: -2.9873, Train Steps/Sec: 1.02
|
| 172 |
+
[[34m2026-02-03 11:24:35[0m] (step=0016300) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 173 |
+
[[34m2026-02-03 11:26:13[0m] (step=0016400) Train Loss: -2.9870, Train Steps/Sec: 1.02
|
| 174 |
+
[[34m2026-02-03 11:27:51[0m] (step=0016500) Train Loss: -2.9856, Train Steps/Sec: 1.02
|
| 175 |
+
[[34m2026-02-03 11:29:29[0m] (step=0016600) Train Loss: -2.9835, Train Steps/Sec: 1.02
|
| 176 |
+
[[34m2026-02-03 11:31:07[0m] (step=0016700) Train Loss: -2.9855, Train Steps/Sec: 1.02
|
| 177 |
+
[[34m2026-02-03 11:32:44[0m] (step=0016800) Train Loss: -2.9885, Train Steps/Sec: 1.03
|
| 178 |
+
[[34m2026-02-03 11:34:21[0m] (step=0016900) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 179 |
+
[[34m2026-02-03 11:35:59[0m] (step=0017000) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 180 |
+
[[34m2026-02-03 11:37:38[0m] (step=0017100) Train Loss: -2.9856, Train Steps/Sec: 1.02
|
| 181 |
+
[[34m2026-02-03 11:39:16[0m] (step=0017200) Train Loss: -2.9916, Train Steps/Sec: 1.02
|
| 182 |
+
[[34m2026-02-03 11:40:54[0m] (step=0017300) Train Loss: -2.9901, Train Steps/Sec: 1.02
|
| 183 |
+
[[34m2026-02-03 11:42:31[0m] (step=0017400) Train Loss: -2.9858, Train Steps/Sec: 1.02
|
| 184 |
+
[[34m2026-02-03 11:44:10[0m] (step=0017500) Train Loss: -2.9834, Train Steps/Sec: 1.02
|
| 185 |
+
[[34m2026-02-03 11:45:48[0m] (step=0017600) Train Loss: -2.9826, Train Steps/Sec: 1.02
|
| 186 |
+
[[34m2026-02-03 11:47:22[0m] (step=0017700) Train Loss: -2.9870, Train Steps/Sec: 1.06
|
| 187 |
+
[[34m2026-02-03 11:49:00[0m] (step=0017800) Train Loss: -2.9945, Train Steps/Sec: 1.02
|
| 188 |
+
[[34m2026-02-03 11:50:38[0m] (step=0017900) Train Loss: -2.9841, Train Steps/Sec: 1.02
|
| 189 |
+
[[34m2026-02-03 11:52:16[0m] (step=0018000) Train Loss: -2.9945, Train Steps/Sec: 1.02
|
| 190 |
+
[[34m2026-02-03 11:53:53[0m] (step=0018100) Train Loss: -2.9870, Train Steps/Sec: 1.02
|
| 191 |
+
[[34m2026-02-03 11:55:32[0m] (step=0018200) Train Loss: -2.9886, Train Steps/Sec: 1.02
|
| 192 |
+
[[34m2026-02-03 11:57:10[0m] (step=0018300) Train Loss: -2.9877, Train Steps/Sec: 1.02
|
| 193 |
+
[[34m2026-02-03 11:58:48[0m] (step=0018400) Train Loss: -2.9872, Train Steps/Sec: 1.02
|
| 194 |
+
[[34m2026-02-03 12:00:26[0m] (step=0018500) Train Loss: -2.9906, Train Steps/Sec: 1.02
|
| 195 |
+
[[34m2026-02-03 12:02:03[0m] (step=0018600) Train Loss: -2.9869, Train Steps/Sec: 1.03
|
| 196 |
+
[[34m2026-02-03 12:03:41[0m] (step=0018700) Train Loss: -2.9855, Train Steps/Sec: 1.03
|
| 197 |
+
[[34m2026-02-03 12:05:18[0m] (step=0018800) Train Loss: -2.9842, Train Steps/Sec: 1.03
|
| 198 |
+
[[34m2026-02-03 12:06:56[0m] (step=0018900) Train Loss: -2.9824, Train Steps/Sec: 1.03
|
| 199 |
+
[[34m2026-02-03 12:08:33[0m] (step=0019000) Train Loss: -2.9857, Train Steps/Sec: 1.03
|
| 200 |
+
[[34m2026-02-03 12:10:11[0m] (step=0019100) Train Loss: -2.9898, Train Steps/Sec: 1.02
|
| 201 |
+
[[34m2026-02-03 12:11:48[0m] (step=0019200) Train Loss: -2.9880, Train Steps/Sec: 1.03
|
| 202 |
+
[[34m2026-02-03 12:13:26[0m] (step=0019300) Train Loss: -2.9942, Train Steps/Sec: 1.03
|
| 203 |
+
[[34m2026-02-03 12:15:03[0m] (step=0019400) Train Loss: -2.9905, Train Steps/Sec: 1.02
|
| 204 |
+
[[34m2026-02-03 12:16:41[0m] (step=0019500) Train Loss: -2.9895, Train Steps/Sec: 1.02
|
| 205 |
+
[[34m2026-02-03 12:18:19[0m] (step=0019600) Train Loss: -2.9856, Train Steps/Sec: 1.02
|
| 206 |
+
[[34m2026-02-03 12:19:57[0m] (step=0019700) Train Loss: -2.9901, Train Steps/Sec: 1.02
|
| 207 |
+
[[34m2026-02-03 12:21:35[0m] (step=0019800) Train Loss: -2.9845, Train Steps/Sec: 1.02
|
| 208 |
+
[[34m2026-02-03 12:23:12[0m] (step=0019900) Train Loss: -2.9859, Train Steps/Sec: 1.02
|
| 209 |
+
[[34m2026-02-03 12:24:50[0m] (step=0020000) Train Loss: -2.9927, Train Steps/Sec: 1.02
|
| 210 |
+
[[34m2026-02-03 12:25:06[0m] Beginning epoch 4...
|
| 211 |
+
[[34m2026-02-03 12:26:30[0m] (step=0020100) Train Loss: -2.9944, Train Steps/Sec: 1.00
|
| 212 |
+
[[34m2026-02-03 12:28:07[0m] (step=0020200) Train Loss: -2.9893, Train Steps/Sec: 1.02
|
| 213 |
+
[[34m2026-02-03 12:29:45[0m] (step=0020300) Train Loss: -2.9873, Train Steps/Sec: 1.03
|
| 214 |
+
[[34m2026-02-03 12:31:23[0m] (step=0020400) Train Loss: -2.9886, Train Steps/Sec: 1.02
|
| 215 |
+
[[34m2026-02-03 12:33:00[0m] (step=0020500) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 216 |
+
[[34m2026-02-03 12:34:38[0m] (step=0020600) Train Loss: -2.9908, Train Steps/Sec: 1.02
|
| 217 |
+
[[34m2026-02-03 12:36:16[0m] (step=0020700) Train Loss: -2.9836, Train Steps/Sec: 1.02
|
| 218 |
+
[[34m2026-02-03 12:37:54[0m] (step=0020800) Train Loss: -2.9885, Train Steps/Sec: 1.02
|
| 219 |
+
[[34m2026-02-03 12:39:32[0m] (step=0020900) Train Loss: -2.9839, Train Steps/Sec: 1.02
|
| 220 |
+
[[34m2026-02-03 12:41:10[0m] (step=0021000) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 221 |
+
[[34m2026-02-03 12:42:48[0m] (step=0021100) Train Loss: -2.9918, Train Steps/Sec: 1.02
|
| 222 |
+
[[34m2026-02-03 12:44:25[0m] (step=0021200) Train Loss: -2.9904, Train Steps/Sec: 1.03
|
| 223 |
+
[[34m2026-02-03 12:46:03[0m] (step=0021300) Train Loss: -2.9917, Train Steps/Sec: 1.02
|
| 224 |
+
[[34m2026-02-03 12:47:41[0m] (step=0021400) Train Loss: -2.9911, Train Steps/Sec: 1.02
|
| 225 |
+
[[34m2026-02-03 12:49:19[0m] (step=0021500) Train Loss: -2.9888, Train Steps/Sec: 1.02
|
| 226 |
+
[[34m2026-02-03 12:50:57[0m] (step=0021600) Train Loss: -2.9900, Train Steps/Sec: 1.02
|
| 227 |
+
[[34m2026-02-03 12:52:35[0m] (step=0021700) Train Loss: -2.9857, Train Steps/Sec: 1.02
|
| 228 |
+
[[34m2026-02-03 12:54:13[0m] (step=0021800) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 229 |
+
[[34m2026-02-03 12:55:50[0m] (step=0021900) Train Loss: -2.9898, Train Steps/Sec: 1.03
|
| 230 |
+
[[34m2026-02-03 12:57:28[0m] (step=0022000) Train Loss: -2.9929, Train Steps/Sec: 1.02
|
| 231 |
+
[[34m2026-02-03 12:59:06[0m] (step=0022100) Train Loss: -2.9851, Train Steps/Sec: 1.03
|
| 232 |
+
[[34m2026-02-03 13:00:44[0m] (step=0022200) Train Loss: -2.9931, Train Steps/Sec: 1.02
|
| 233 |
+
[[34m2026-02-03 13:02:22[0m] (step=0022300) Train Loss: -2.9841, Train Steps/Sec: 1.02
|
| 234 |
+
[[34m2026-02-03 13:03:59[0m] (step=0022400) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 235 |
+
[[34m2026-02-03 13:05:34[0m] (step=0022500) Train Loss: -2.9891, Train Steps/Sec: 1.05
|
| 236 |
+
[[34m2026-02-03 13:07:12[0m] (step=0022600) Train Loss: -2.9902, Train Steps/Sec: 1.02
|
| 237 |
+
[[34m2026-02-03 13:08:50[0m] (step=0022700) Train Loss: -2.9920, Train Steps/Sec: 1.02
|
| 238 |
+
[[34m2026-02-03 13:10:28[0m] (step=0022800) Train Loss: -2.9864, Train Steps/Sec: 1.03
|
| 239 |
+
[[34m2026-02-03 13:12:05[0m] (step=0022900) Train Loss: -2.9827, Train Steps/Sec: 1.03
|
| 240 |
+
[[34m2026-02-03 13:13:43[0m] (step=0023000) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 241 |
+
[[34m2026-02-03 13:15:21[0m] (step=0023100) Train Loss: -2.9919, Train Steps/Sec: 1.02
|
| 242 |
+
[[34m2026-02-03 13:16:59[0m] (step=0023200) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 243 |
+
[[34m2026-02-03 13:18:37[0m] (step=0023300) Train Loss: -2.9883, Train Steps/Sec: 1.02
|
| 244 |
+
[[34m2026-02-03 13:20:15[0m] (step=0023400) Train Loss: 78736.1641, Train Steps/Sec: 1.02
|
| 245 |
+
[[34m2026-02-03 13:21:53[0m] (step=0023500) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 246 |
+
[[34m2026-02-03 13:23:31[0m] (step=0023600) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 247 |
+
[[34m2026-02-03 13:25:09[0m] (step=0023700) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 248 |
+
[[34m2026-02-03 13:26:47[0m] (step=0023800) Train Loss: -2.9883, Train Steps/Sec: 1.02
|
| 249 |
+
[[34m2026-02-03 13:28:25[0m] (step=0023900) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 250 |
+
[[34m2026-02-03 13:30:03[0m] (step=0024000) Train Loss: -2.9908, Train Steps/Sec: 1.02
|
| 251 |
+
[[34m2026-02-03 13:31:41[0m] (step=0024100) Train Loss: -2.9903, Train Steps/Sec: 1.02
|
| 252 |
+
[[34m2026-02-03 13:33:19[0m] (step=0024200) Train Loss: -2.9885, Train Steps/Sec: 1.02
|
| 253 |
+
[[34m2026-02-03 13:34:56[0m] (step=0024300) Train Loss: -2.9898, Train Steps/Sec: 1.02
|
| 254 |
+
[[34m2026-02-03 13:36:34[0m] (step=0024400) Train Loss: -2.9893, Train Steps/Sec: 1.02
|
| 255 |
+
[[34m2026-02-03 13:38:12[0m] (step=0024500) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 256 |
+
[[34m2026-02-03 13:39:49[0m] (step=0024600) Train Loss: -2.9859, Train Steps/Sec: 1.02
|
| 257 |
+
[[34m2026-02-03 13:41:27[0m] (step=0024700) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 258 |
+
[[34m2026-02-03 13:43:05[0m] (step=0024800) Train Loss: -2.9900, Train Steps/Sec: 1.02
|
| 259 |
+
[[34m2026-02-03 13:44:43[0m] (step=0024900) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 260 |
+
[[34m2026-02-03 13:46:21[0m] (step=0025000) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 261 |
+
[[34m2026-02-03 13:46:22[0m] Saved checkpoint to results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None/checkpoints/0025000.pt
|
| 262 |
+
[[34m2026-02-03 13:46:42[0m] Beginning epoch 5...
|
| 263 |
+
[[34m2026-02-03 13:48:02[0m] (step=0025100) Train Loss: -2.9903, Train Steps/Sec: 0.99
|
| 264 |
+
[[34m2026-02-03 13:49:31[0m] Generating EMA samples...
|
| 265 |
+
[[34m2026-02-03 13:49:39[0m] (step=0025200) Train Loss: -2.9859, Train Steps/Sec: 1.03
|
| 266 |
+
[[34m2026-02-03 13:51:17[0m] (step=0025300) Train Loss: -2.9951, Train Steps/Sec: 1.02
|
| 267 |
+
[[34m2026-02-03 13:52:55[0m] (step=0025400) Train Loss: -2.9863, Train Steps/Sec: 1.02
|
| 268 |
+
[[34m2026-02-03 13:54:33[0m] (step=0025500) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 269 |
+
[[34m2026-02-03 13:56:10[0m] (step=0025600) Train Loss: -2.9899, Train Steps/Sec: 1.03
|
| 270 |
+
[[34m2026-02-03 13:57:48[0m] (step=0025700) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 271 |
+
[[34m2026-02-03 13:59:26[0m] (step=0025800) Train Loss: -2.9830, Train Steps/Sec: 1.02
|
| 272 |
+
[[34m2026-02-03 14:01:03[0m] (step=0025900) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 273 |
+
[[34m2026-02-03 14:02:40[0m] (step=0026000) Train Loss: -2.9889, Train Steps/Sec: 1.03
|
| 274 |
+
[[34m2026-02-03 14:04:18[0m] (step=0026100) Train Loss: -2.9845, Train Steps/Sec: 1.02
|
| 275 |
+
[[34m2026-02-03 14:05:56[0m] (step=0026200) Train Loss: -2.9882, Train Steps/Sec: 1.02
|
| 276 |
+
[[34m2026-02-03 14:07:34[0m] (step=0026300) Train Loss: -2.9906, Train Steps/Sec: 1.02
|
| 277 |
+
[[34m2026-02-03 14:09:12[0m] (step=0026400) Train Loss: -2.9909, Train Steps/Sec: 1.02
|
| 278 |
+
[[34m2026-02-03 14:10:51[0m] (step=0026500) Train Loss: -2.9932, Train Steps/Sec: 1.02
|
| 279 |
+
[[34m2026-02-03 14:12:28[0m] (step=0026600) Train Loss: -2.9909, Train Steps/Sec: 1.02
|
| 280 |
+
[[34m2026-02-03 14:14:06[0m] (step=0026700) Train Loss: -2.9931, Train Steps/Sec: 1.02
|
| 281 |
+
[[34m2026-02-03 14:15:43[0m] (step=0026800) Train Loss: -2.9852, Train Steps/Sec: 1.03
|
| 282 |
+
[[34m2026-02-03 14:17:21[0m] (step=0026900) Train Loss: -2.9819, Train Steps/Sec: 1.02
|
| 283 |
+
[[34m2026-02-03 14:18:59[0m] (step=0027000) Train Loss: -2.9908, Train Steps/Sec: 1.02
|
| 284 |
+
[[34m2026-02-03 14:20:37[0m] (step=0027100) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 285 |
+
[[34m2026-02-03 14:22:15[0m] (step=0027200) Train Loss: -2.9917, Train Steps/Sec: 1.02
|
| 286 |
+
[[34m2026-02-03 14:23:50[0m] (step=0027300) Train Loss: -2.9884, Train Steps/Sec: 1.06
|
| 287 |
+
[[34m2026-02-03 14:25:27[0m] (step=0027400) Train Loss: -2.9851, Train Steps/Sec: 1.02
|
| 288 |
+
[[34m2026-02-03 14:27:05[0m] (step=0027500) Train Loss: -2.9896, Train Steps/Sec: 1.02
|
| 289 |
+
[[34m2026-02-03 14:28:43[0m] (step=0027600) Train Loss: -2.9866, Train Steps/Sec: 1.02
|
| 290 |
+
[[34m2026-02-03 14:30:21[0m] (step=0027700) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 291 |
+
[[34m2026-02-03 14:31:59[0m] (step=0027800) Train Loss: -2.9925, Train Steps/Sec: 1.02
|
| 292 |
+
[[34m2026-02-03 14:33:36[0m] (step=0027900) Train Loss: -2.9893, Train Steps/Sec: 1.03
|
| 293 |
+
[[34m2026-02-03 14:35:13[0m] (step=0028000) Train Loss: -2.9872, Train Steps/Sec: 1.02
|
| 294 |
+
[[34m2026-02-03 14:36:51[0m] (step=0028100) Train Loss: -2.9859, Train Steps/Sec: 1.02
|
| 295 |
+
[[34m2026-02-03 14:38:29[0m] (step=0028200) Train Loss: -2.9917, Train Steps/Sec: 1.02
|
| 296 |
+
[[34m2026-02-03 14:40:07[0m] (step=0028300) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 297 |
+
[[34m2026-02-03 14:41:46[0m] (step=0028400) Train Loss: -2.9881, Train Steps/Sec: 1.02
|
| 298 |
+
[[34m2026-02-03 14:43:23[0m] (step=0028500) Train Loss: -2.9831, Train Steps/Sec: 1.02
|
| 299 |
+
[[34m2026-02-03 14:45:02[0m] (step=0028600) Train Loss: -2.9903, Train Steps/Sec: 1.02
|
| 300 |
+
[[34m2026-02-03 14:46:39[0m] (step=0028700) Train Loss: -2.9925, Train Steps/Sec: 1.02
|
| 301 |
+
[[34m2026-02-03 14:48:17[0m] (step=0028800) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 302 |
+
[[34m2026-02-03 14:49:55[0m] (step=0028900) Train Loss: -2.9915, Train Steps/Sec: 1.02
|
| 303 |
+
[[34m2026-02-03 14:51:33[0m] (step=0029000) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 304 |
+
[[34m2026-02-03 14:53:10[0m] (step=0029100) Train Loss: -2.9889, Train Steps/Sec: 1.03
|
| 305 |
+
[[34m2026-02-03 14:54:48[0m] (step=0029200) Train Loss: -2.9917, Train Steps/Sec: 1.03
|
| 306 |
+
[[34m2026-02-03 14:56:26[0m] (step=0029300) Train Loss: -2.9851, Train Steps/Sec: 1.02
|
| 307 |
+
[[34m2026-02-03 14:58:04[0m] (step=0029400) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 308 |
+
[[34m2026-02-03 14:59:42[0m] (step=0029500) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 309 |
+
[[34m2026-02-03 15:01:20[0m] (step=0029600) Train Loss: -2.9896, Train Steps/Sec: 1.03
|
| 310 |
+
[[34m2026-02-03 15:02:57[0m] (step=0029700) Train Loss: -2.9913, Train Steps/Sec: 1.02
|
| 311 |
+
[[34m2026-02-03 15:04:35[0m] (step=0029800) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 312 |
+
[[34m2026-02-03 15:06:13[0m] (step=0029900) Train Loss: -2.9887, Train Steps/Sec: 1.03
|
| 313 |
+
[[34m2026-02-03 15:07:51[0m] (step=0030000) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 314 |
+
[[34m2026-02-03 15:08:15[0m] Beginning epoch 6...
|
| 315 |
+
[[34m2026-02-03 15:09:31[0m] (step=0030100) Train Loss: -2.9889, Train Steps/Sec: 1.00
|
| 316 |
+
[[34m2026-02-03 15:11:09[0m] (step=0030200) Train Loss: -2.9905, Train Steps/Sec: 1.02
|
| 317 |
+
[[34m2026-02-03 15:12:47[0m] (step=0030300) Train Loss: -2.9833, Train Steps/Sec: 1.02
|
| 318 |
+
[[34m2026-02-03 15:14:24[0m] (step=0030400) Train Loss: -2.9880, Train Steps/Sec: 1.03
|
| 319 |
+
[[34m2026-02-03 15:16:02[0m] (step=0030500) Train Loss: -2.9881, Train Steps/Sec: 1.03
|
| 320 |
+
[[34m2026-02-03 15:17:39[0m] (step=0030600) Train Loss: -2.9924, Train Steps/Sec: 1.02
|
| 321 |
+
[[34m2026-02-03 15:19:17[0m] (step=0030700) Train Loss: -2.9888, Train Steps/Sec: 1.02
|
| 322 |
+
[[34m2026-02-03 15:20:56[0m] (step=0030800) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 323 |
+
[[34m2026-02-03 15:22:33[0m] (step=0030900) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 324 |
+
[[34m2026-02-03 15:24:11[0m] (step=0031000) Train Loss: -2.9885, Train Steps/Sec: 1.02
|
| 325 |
+
[[34m2026-02-03 15:25:49[0m] (step=0031100) Train Loss: -2.9836, Train Steps/Sec: 1.02
|
| 326 |
+
[[34m2026-02-03 15:27:27[0m] (step=0031200) Train Loss: -2.9901, Train Steps/Sec: 1.02
|
| 327 |
+
[[34m2026-02-03 15:29:05[0m] (step=0031300) Train Loss: -2.9919, Train Steps/Sec: 1.02
|
| 328 |
+
[[34m2026-02-03 15:30:42[0m] (step=0031400) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 329 |
+
[[34m2026-02-03 15:32:21[0m] (step=0031500) Train Loss: -2.9930, Train Steps/Sec: 1.02
|
| 330 |
+
[[34m2026-02-03 15:33:59[0m] (step=0031600) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 331 |
+
[[34m2026-02-03 15:35:36[0m] (step=0031700) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 332 |
+
[[34m2026-02-03 15:37:14[0m] (step=0031800) Train Loss: -2.9910, Train Steps/Sec: 1.02
|
| 333 |
+
[[34m2026-02-03 15:38:52[0m] (step=0031900) Train Loss: -2.9891, Train Steps/Sec: 1.03
|
| 334 |
+
[[34m2026-02-03 15:40:29[0m] (step=0032000) Train Loss: -2.9904, Train Steps/Sec: 1.02
|
| 335 |
+
[[34m2026-02-03 15:42:04[0m] (step=0032100) Train Loss: -2.9853, Train Steps/Sec: 1.06
|
| 336 |
+
[[34m2026-02-03 15:43:42[0m] (step=0032200) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 337 |
+
[[34m2026-02-03 15:45:20[0m] (step=0032300) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 338 |
+
[[34m2026-02-03 15:46:58[0m] (step=0032400) Train Loss: -2.9881, Train Steps/Sec: 1.02
|
| 339 |
+
[[34m2026-02-03 15:48:36[0m] (step=0032500) Train Loss: -2.9862, Train Steps/Sec: 1.02
|
| 340 |
+
[[34m2026-02-03 15:50:14[0m] (step=0032600) Train Loss: -2.9863, Train Steps/Sec: 1.02
|
| 341 |
+
[[34m2026-02-03 15:51:52[0m] (step=0032700) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 342 |
+
[[34m2026-02-03 15:53:30[0m] (step=0032800) Train Loss: -2.9881, Train Steps/Sec: 1.02
|
| 343 |
+
[[34m2026-02-03 15:55:08[0m] (step=0032900) Train Loss: -2.9877, Train Steps/Sec: 1.02
|
| 344 |
+
[[34m2026-02-03 15:56:46[0m] (step=0033000) Train Loss: -2.9921, Train Steps/Sec: 1.02
|
| 345 |
+
[[34m2026-02-03 15:58:24[0m] (step=0033100) Train Loss: -2.9846, Train Steps/Sec: 1.02
|
| 346 |
+
[[34m2026-02-03 16:00:02[0m] (step=0033200) Train Loss: -2.9850, Train Steps/Sec: 1.02
|
| 347 |
+
[[34m2026-02-03 16:01:39[0m] (step=0033300) Train Loss: -2.9908, Train Steps/Sec: 1.02
|
| 348 |
+
[[34m2026-02-03 16:03:17[0m] (step=0033400) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 349 |
+
[[34m2026-02-03 16:04:55[0m] (step=0033500) Train Loss: -2.9814, Train Steps/Sec: 1.02
|
| 350 |
+
[[34m2026-02-03 16:06:33[0m] (step=0033600) Train Loss: -2.9837, Train Steps/Sec: 1.02
|
| 351 |
+
[[34m2026-02-03 16:08:11[0m] (step=0033700) Train Loss: -2.9859, Train Steps/Sec: 1.02
|
| 352 |
+
[[34m2026-02-03 16:09:49[0m] (step=0033800) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 353 |
+
[[34m2026-02-03 16:11:27[0m] (step=0033900) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 354 |
+
[[34m2026-02-03 16:13:05[0m] (step=0034000) Train Loss: -2.9859, Train Steps/Sec: 1.02
|
| 355 |
+
[[34m2026-02-03 16:14:43[0m] (step=0034100) Train Loss: -2.9872, Train Steps/Sec: 1.02
|
| 356 |
+
[[34m2026-02-03 16:16:20[0m] (step=0034200) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 357 |
+
[[34m2026-02-03 16:17:58[0m] (step=0034300) Train Loss: -2.9886, Train Steps/Sec: 1.02
|
| 358 |
+
[[34m2026-02-03 16:19:36[0m] (step=0034400) Train Loss: -2.9885, Train Steps/Sec: 1.02
|
| 359 |
+
[[34m2026-02-03 16:21:14[0m] (step=0034500) Train Loss: -2.9865, Train Steps/Sec: 1.03
|
| 360 |
+
[[34m2026-02-03 16:22:51[0m] (step=0034600) Train Loss: -2.9868, Train Steps/Sec: 1.03
|
| 361 |
+
[[34m2026-02-03 16:24:29[0m] (step=0034700) Train Loss: -2.9897, Train Steps/Sec: 1.02
|
| 362 |
+
[[34m2026-02-03 16:26:07[0m] (step=0034800) Train Loss: -2.9918, Train Steps/Sec: 1.02
|
| 363 |
+
[[34m2026-02-03 16:27:45[0m] (step=0034900) Train Loss: -2.9873, Train Steps/Sec: 1.02
|
| 364 |
+
[[34m2026-02-03 16:29:22[0m] (step=0035000) Train Loss: -2.9874, Train Steps/Sec: 1.03
|
| 365 |
+
[[34m2026-02-03 16:29:50[0m] Beginning epoch 7...
|
| 366 |
+
[[34m2026-02-03 16:31:03[0m] (step=0035100) Train Loss: -2.9881, Train Steps/Sec: 1.00
|
| 367 |
+
[[34m2026-02-03 16:32:41[0m] (step=0035200) Train Loss: -2.9922, Train Steps/Sec: 1.02
|
| 368 |
+
[[34m2026-02-03 16:34:19[0m] (step=0035300) Train Loss: -2.9847, Train Steps/Sec: 1.02
|
| 369 |
+
[[34m2026-02-03 16:35:56[0m] (step=0035400) Train Loss: -2.9855, Train Steps/Sec: 1.03
|
| 370 |
+
[[34m2026-02-03 16:37:35[0m] (step=0035500) Train Loss: -2.9950, Train Steps/Sec: 1.02
|
| 371 |
+
[[34m2026-02-03 16:39:12[0m] (step=0035600) Train Loss: -2.9896, Train Steps/Sec: 1.02
|
| 372 |
+
[[34m2026-02-03 16:40:50[0m] (step=0035700) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 373 |
+
[[34m2026-02-03 16:42:28[0m] (step=0035800) Train Loss: -2.9853, Train Steps/Sec: 1.02
|
| 374 |
+
[[34m2026-02-03 16:44:06[0m] (step=0035900) Train Loss: -2.9897, Train Steps/Sec: 1.02
|
| 375 |
+
[[34m2026-02-03 16:45:44[0m] (step=0036000) Train Loss: -2.9903, Train Steps/Sec: 1.02
|
| 376 |
+
[[34m2026-02-03 16:47:21[0m] (step=0036100) Train Loss: -2.9906, Train Steps/Sec: 1.02
|
| 377 |
+
[[34m2026-02-03 16:48:59[0m] (step=0036200) Train Loss: -2.9868, Train Steps/Sec: 1.03
|
| 378 |
+
[[34m2026-02-03 16:50:37[0m] (step=0036300) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 379 |
+
[[34m2026-02-03 16:52:14[0m] (step=0036400) Train Loss: -2.9840, Train Steps/Sec: 1.03
|
| 380 |
+
[[34m2026-02-03 16:53:52[0m] (step=0036500) Train Loss: -2.9848, Train Steps/Sec: 1.02
|
| 381 |
+
[[34m2026-02-03 16:55:30[0m] (step=0036600) Train Loss: -2.9852, Train Steps/Sec: 1.02
|
| 382 |
+
[[34m2026-02-03 16:57:08[0m] (step=0036700) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 383 |
+
[[34m2026-02-03 16:58:46[0m] (step=0036800) Train Loss: -2.9842, Train Steps/Sec: 1.02
|
| 384 |
+
[[34m2026-02-03 17:00:21[0m] (step=0036900) Train Loss: -2.9858, Train Steps/Sec: 1.05
|
| 385 |
+
[[34m2026-02-03 17:01:58[0m] (step=0037000) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 386 |
+
[[34m2026-02-03 17:03:36[0m] (step=0037100) Train Loss: -2.9892, Train Steps/Sec: 1.02
|
| 387 |
+
[[34m2026-02-03 17:05:14[0m] (step=0037200) Train Loss: -2.9854, Train Steps/Sec: 1.02
|
| 388 |
+
[[34m2026-02-03 17:06:52[0m] (step=0037300) Train Loss: -2.9902, Train Steps/Sec: 1.02
|
| 389 |
+
[[34m2026-02-03 17:08:30[0m] (step=0037400) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 390 |
+
[[34m2026-02-03 17:10:08[0m] (step=0037500) Train Loss: -2.9872, Train Steps/Sec: 1.02
|
| 391 |
+
[[34m2026-02-03 17:11:46[0m] (step=0037600) Train Loss: -2.9898, Train Steps/Sec: 1.02
|
| 392 |
+
[[34m2026-02-03 17:13:24[0m] (step=0037700) Train Loss: -2.9885, Train Steps/Sec: 1.03
|
| 393 |
+
[[34m2026-02-03 17:15:02[0m] (step=0037800) Train Loss: -2.9913, Train Steps/Sec: 1.02
|
| 394 |
+
[[34m2026-02-03 17:16:40[0m] (step=0037900) Train Loss: -2.9863, Train Steps/Sec: 1.02
|
| 395 |
+
[[34m2026-02-03 17:18:18[0m] (step=0038000) Train Loss: -2.9926, Train Steps/Sec: 1.02
|
| 396 |
+
[[34m2026-02-03 17:19:56[0m] (step=0038100) Train Loss: -2.9901, Train Steps/Sec: 1.02
|
| 397 |
+
[[34m2026-02-03 17:21:34[0m] (step=0038200) Train Loss: -2.9866, Train Steps/Sec: 1.02
|
| 398 |
+
[[34m2026-02-03 17:23:12[0m] (step=0038300) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 399 |
+
[[34m2026-02-03 17:24:50[0m] (step=0038400) Train Loss: -2.9900, Train Steps/Sec: 1.02
|
| 400 |
+
[[34m2026-02-03 17:26:27[0m] (step=0038500) Train Loss: -2.9831, Train Steps/Sec: 1.02
|
| 401 |
+
[[34m2026-02-03 17:28:05[0m] (step=0038600) Train Loss: -2.9854, Train Steps/Sec: 1.03
|
| 402 |
+
[[34m2026-02-03 17:29:43[0m] (step=0038700) Train Loss: -2.9888, Train Steps/Sec: 1.02
|
| 403 |
+
[[34m2026-02-03 17:31:21[0m] (step=0038800) Train Loss: -2.9871, Train Steps/Sec: 1.02
|
| 404 |
+
[[34m2026-02-03 17:32:59[0m] (step=0038900) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 405 |
+
[[34m2026-02-03 17:34:37[0m] (step=0039000) Train Loss: -2.9915, Train Steps/Sec: 1.03
|
| 406 |
+
[[34m2026-02-03 17:36:15[0m] (step=0039100) Train Loss: -2.9872, Train Steps/Sec: 1.02
|
| 407 |
+
[[34m2026-02-03 17:37:53[0m] (step=0039200) Train Loss: -2.9919, Train Steps/Sec: 1.02
|
| 408 |
+
[[34m2026-02-03 17:39:30[0m] (step=0039300) Train Loss: -2.9894, Train Steps/Sec: 1.03
|
| 409 |
+
[[34m2026-02-03 17:41:08[0m] (step=0039400) Train Loss: -2.9859, Train Steps/Sec: 1.02
|
| 410 |
+
[[34m2026-02-03 17:42:46[0m] (step=0039500) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 411 |
+
[[34m2026-02-03 17:44:24[0m] (step=0039600) Train Loss: -2.9895, Train Steps/Sec: 1.02
|
| 412 |
+
[[34m2026-02-03 17:46:02[0m] (step=0039700) Train Loss: -2.9927, Train Steps/Sec: 1.02
|
| 413 |
+
[[34m2026-02-03 17:47:39[0m] (step=0039800) Train Loss: -2.9872, Train Steps/Sec: 1.02
|
| 414 |
+
[[34m2026-02-03 17:49:17[0m] (step=0039900) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 415 |
+
[[34m2026-02-03 17:50:55[0m] (step=0040000) Train Loss: -2.9913, Train Steps/Sec: 1.02
|
| 416 |
+
[[34m2026-02-03 17:51:27[0m] Beginning epoch 8...
|
| 417 |
+
[[34m2026-02-03 17:52:35[0m] (step=0040100) Train Loss: -2.9853, Train Steps/Sec: 1.00
|
| 418 |
+
[[34m2026-02-03 17:54:13[0m] (step=0040200) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 419 |
+
[[34m2026-02-03 17:55:51[0m] (step=0040300) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 420 |
+
[[34m2026-02-03 17:57:29[0m] (step=0040400) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 421 |
+
[[34m2026-02-03 17:59:06[0m] (step=0040500) Train Loss: -2.9873, Train Steps/Sec: 1.03
|
| 422 |
+
[[34m2026-02-03 18:00:44[0m] (step=0040600) Train Loss: -2.9879, Train Steps/Sec: 1.03
|
| 423 |
+
[[34m2026-02-03 18:02:22[0m] (step=0040700) Train Loss: -2.9881, Train Steps/Sec: 1.02
|
| 424 |
+
[[34m2026-02-03 18:04:00[0m] (step=0040800) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 425 |
+
[[34m2026-02-03 18:05:38[0m] (step=0040900) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 426 |
+
[[34m2026-02-03 18:07:16[0m] (step=0041000) Train Loss: -2.9832, Train Steps/Sec: 1.02
|
| 427 |
+
[[34m2026-02-03 18:08:54[0m] (step=0041100) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 428 |
+
[[34m2026-02-03 18:10:31[0m] (step=0041200) Train Loss: -2.9928, Train Steps/Sec: 1.02
|
| 429 |
+
[[34m2026-02-03 18:12:09[0m] (step=0041300) Train Loss: -2.9884, Train Steps/Sec: 1.02
|
| 430 |
+
[[34m2026-02-03 18:13:47[0m] (step=0041400) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 431 |
+
[[34m2026-02-03 18:15:25[0m] (step=0041500) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 432 |
+
[[34m2026-02-03 18:17:00[0m] (step=0041600) Train Loss: -2.9869, Train Steps/Sec: 1.06
|
| 433 |
+
[[34m2026-02-03 18:18:37[0m] (step=0041700) Train Loss: -2.9919, Train Steps/Sec: 1.03
|
| 434 |
+
[[34m2026-02-03 18:20:15[0m] (step=0041800) Train Loss: -2.9858, Train Steps/Sec: 1.02
|
| 435 |
+
[[34m2026-02-03 18:21:53[0m] (step=0041900) Train Loss: -2.9856, Train Steps/Sec: 1.02
|
| 436 |
+
[[34m2026-02-03 18:23:30[0m] (step=0042000) Train Loss: -2.9883, Train Steps/Sec: 1.02
|
| 437 |
+
[[34m2026-02-03 18:25:08[0m] (step=0042100) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 438 |
+
[[34m2026-02-03 18:26:46[0m] (step=0042200) Train Loss: -2.9826, Train Steps/Sec: 1.03
|
| 439 |
+
[[34m2026-02-03 18:28:23[0m] (step=0042300) Train Loss: -2.9954, Train Steps/Sec: 1.03
|
| 440 |
+
[[34m2026-02-03 18:30:00[0m] (step=0042400) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 441 |
+
[[34m2026-02-03 18:31:38[0m] (step=0042500) Train Loss: -2.9865, Train Steps/Sec: 1.02
|
| 442 |
+
[[34m2026-02-03 18:33:16[0m] (step=0042600) Train Loss: -2.9924, Train Steps/Sec: 1.02
|
| 443 |
+
[[34m2026-02-03 18:34:54[0m] (step=0042700) Train Loss: -2.9881, Train Steps/Sec: 1.02
|
| 444 |
+
[[34m2026-02-03 18:36:32[0m] (step=0042800) Train Loss: -2.9871, Train Steps/Sec: 1.02
|
| 445 |
+
[[34m2026-02-03 18:38:10[0m] (step=0042900) Train Loss: -2.9893, Train Steps/Sec: 1.02
|
| 446 |
+
[[34m2026-02-03 18:39:48[0m] (step=0043000) Train Loss: -2.9860, Train Steps/Sec: 1.02
|
| 447 |
+
[[34m2026-02-03 18:41:25[0m] (step=0043100) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 448 |
+
[[34m2026-02-03 18:43:03[0m] (step=0043200) Train Loss: -2.9875, Train Steps/Sec: 1.03
|
| 449 |
+
[[34m2026-02-03 18:44:41[0m] (step=0043300) Train Loss: -2.9882, Train Steps/Sec: 1.02
|
| 450 |
+
[[34m2026-02-03 18:46:19[0m] (step=0043400) Train Loss: -2.9886, Train Steps/Sec: 1.02
|
| 451 |
+
[[34m2026-02-03 18:47:57[0m] (step=0043500) Train Loss: -2.9948, Train Steps/Sec: 1.02
|
| 452 |
+
[[34m2026-02-03 18:49:34[0m] (step=0043600) Train Loss: -2.9888, Train Steps/Sec: 1.02
|
| 453 |
+
[[34m2026-02-03 18:51:12[0m] (step=0043700) Train Loss: -2.9846, Train Steps/Sec: 1.02
|
| 454 |
+
[[34m2026-02-03 18:52:51[0m] (step=0043800) Train Loss: -2.9913, Train Steps/Sec: 1.01
|
| 455 |
+
[[34m2026-02-03 18:54:29[0m] (step=0043900) Train Loss: -2.9863, Train Steps/Sec: 1.02
|
| 456 |
+
[[34m2026-02-03 18:56:07[0m] (step=0044000) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 457 |
+
[[34m2026-02-03 18:57:45[0m] (step=0044100) Train Loss: -2.9862, Train Steps/Sec: 1.02
|
| 458 |
+
[[34m2026-02-03 18:59:23[0m] (step=0044200) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 459 |
+
[[34m2026-02-03 19:01:00[0m] (step=0044300) Train Loss: -2.9850, Train Steps/Sec: 1.03
|
| 460 |
+
[[34m2026-02-03 19:02:38[0m] (step=0044400) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 461 |
+
[[34m2026-02-03 19:04:16[0m] (step=0044500) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 462 |
+
[[34m2026-02-03 19:05:53[0m] (step=0044600) Train Loss: -2.9881, Train Steps/Sec: 1.03
|
| 463 |
+
[[34m2026-02-03 19:07:31[0m] (step=0044700) Train Loss: -2.9886, Train Steps/Sec: 1.02
|
| 464 |
+
[[34m2026-02-03 19:09:09[0m] (step=0044800) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 465 |
+
[[34m2026-02-03 19:10:47[0m] (step=0044900) Train Loss: -2.9908, Train Steps/Sec: 1.02
|
| 466 |
+
[[34m2026-02-03 19:12:25[0m] (step=0045000) Train Loss: -2.9902, Train Steps/Sec: 1.02
|
| 467 |
+
[[34m2026-02-03 19:13:00[0m] Beginning epoch 9...
|
| 468 |
+
[[34m2026-02-03 19:14:05[0m] (step=0045100) Train Loss: -2.9923, Train Steps/Sec: 1.00
|
| 469 |
+
[[34m2026-02-03 19:15:43[0m] (step=0045200) Train Loss: -2.9882, Train Steps/Sec: 1.02
|
| 470 |
+
[[34m2026-02-03 19:17:21[0m] (step=0045300) Train Loss: -2.9932, Train Steps/Sec: 1.02
|
| 471 |
+
[[34m2026-02-03 19:18:59[0m] (step=0045400) Train Loss: -2.9883, Train Steps/Sec: 1.02
|
| 472 |
+
[[34m2026-02-03 19:20:37[0m] (step=0045500) Train Loss: -2.9825, Train Steps/Sec: 1.02
|
| 473 |
+
[[34m2026-02-03 19:22:15[0m] (step=0045600) Train Loss: -2.9882, Train Steps/Sec: 1.02
|
| 474 |
+
[[34m2026-02-03 19:23:54[0m] (step=0045700) Train Loss: -2.9896, Train Steps/Sec: 1.02
|
| 475 |
+
[[34m2026-02-03 19:25:31[0m] (step=0045800) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 476 |
+
[[34m2026-02-03 19:27:09[0m] (step=0045900) Train Loss: -2.9897, Train Steps/Sec: 1.02
|
| 477 |
+
[[34m2026-02-03 19:28:47[0m] (step=0046000) Train Loss: -2.9868, Train Steps/Sec: 1.03
|
| 478 |
+
[[34m2026-02-03 19:30:25[0m] (step=0046100) Train Loss: -2.9908, Train Steps/Sec: 1.02
|
| 479 |
+
[[34m2026-02-03 19:32:03[0m] (step=0046200) Train Loss: -2.9925, Train Steps/Sec: 1.02
|
| 480 |
+
[[34m2026-02-03 19:33:40[0m] (step=0046300) Train Loss: -2.9885, Train Steps/Sec: 1.02
|
| 481 |
+
[[34m2026-02-03 19:35:15[0m] (step=0046400) Train Loss: -2.9863, Train Steps/Sec: 1.06
|
| 482 |
+
[[34m2026-02-03 19:36:53[0m] (step=0046500) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 483 |
+
[[34m2026-02-03 19:38:30[0m] (step=0046600) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 484 |
+
[[34m2026-02-03 19:40:08[0m] (step=0046700) Train Loss: -2.9853, Train Steps/Sec: 1.02
|
| 485 |
+
[[34m2026-02-03 19:41:47[0m] (step=0046800) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 486 |
+
[[34m2026-02-03 19:43:25[0m] (step=0046900) Train Loss: -2.9886, Train Steps/Sec: 1.02
|
| 487 |
+
[[34m2026-02-03 19:45:03[0m] (step=0047000) Train Loss: -2.9863, Train Steps/Sec: 1.02
|
| 488 |
+
[[34m2026-02-03 19:46:40[0m] (step=0047100) Train Loss: -2.9877, Train Steps/Sec: 1.02
|
| 489 |
+
[[34m2026-02-03 19:48:18[0m] (step=0047200) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 490 |
+
[[34m2026-02-03 19:49:56[0m] (step=0047300) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 491 |
+
[[34m2026-02-03 19:51:34[0m] (step=0047400) Train Loss: -2.9927, Train Steps/Sec: 1.02
|
| 492 |
+
[[34m2026-02-03 19:53:12[0m] (step=0047500) Train Loss: -2.9912, Train Steps/Sec: 1.02
|
| 493 |
+
[[34m2026-02-03 19:54:50[0m] (step=0047600) Train Loss: -2.9877, Train Steps/Sec: 1.02
|
| 494 |
+
[[34m2026-02-03 19:56:28[0m] (step=0047700) Train Loss: -2.9892, Train Steps/Sec: 1.02
|
| 495 |
+
[[34m2026-02-03 19:58:06[0m] (step=0047800) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 496 |
+
[[34m2026-02-03 19:59:44[0m] (step=0047900) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 497 |
+
[[34m2026-02-03 20:01:22[0m] (step=0048000) Train Loss: -2.9915, Train Steps/Sec: 1.02
|
| 498 |
+
[[34m2026-02-03 20:03:00[0m] (step=0048100) Train Loss: -2.9858, Train Steps/Sec: 1.02
|
| 499 |
+
[[34m2026-02-03 20:04:38[0m] (step=0048200) Train Loss: -2.9865, Train Steps/Sec: 1.02
|
| 500 |
+
[[34m2026-02-03 20:06:16[0m] (step=0048300) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 501 |
+
[[34m2026-02-03 20:07:54[0m] (step=0048400) Train Loss: -2.9904, Train Steps/Sec: 1.02
|
| 502 |
+
[[34m2026-02-03 20:09:32[0m] (step=0048500) Train Loss: -2.9878, Train Steps/Sec: 1.02
|
| 503 |
+
[[34m2026-02-03 20:11:10[0m] (step=0048600) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 504 |
+
[[34m2026-02-03 20:12:48[0m] (step=0048700) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 505 |
+
[[34m2026-02-03 20:14:26[0m] (step=0048800) Train Loss: -2.9853, Train Steps/Sec: 1.02
|
| 506 |
+
[[34m2026-02-03 20:16:04[0m] (step=0048900) Train Loss: -2.9901, Train Steps/Sec: 1.02
|
| 507 |
+
[[34m2026-02-03 20:17:41[0m] (step=0049000) Train Loss: -2.9853, Train Steps/Sec: 1.02
|
| 508 |
+
[[34m2026-02-03 20:19:19[0m] (step=0049100) Train Loss: -2.9849, Train Steps/Sec: 1.02
|
| 509 |
+
[[34m2026-02-03 20:20:57[0m] (step=0049200) Train Loss: -2.9873, Train Steps/Sec: 1.02
|
| 510 |
+
[[34m2026-02-03 20:22:35[0m] (step=0049300) Train Loss: -2.9865, Train Steps/Sec: 1.02
|
| 511 |
+
[[34m2026-02-03 20:24:12[0m] (step=0049400) Train Loss: -2.9888, Train Steps/Sec: 1.03
|
| 512 |
+
[[34m2026-02-03 20:25:51[0m] (step=0049500) Train Loss: -2.9911, Train Steps/Sec: 1.02
|
| 513 |
+
[[34m2026-02-03 20:27:29[0m] (step=0049600) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 514 |
+
[[34m2026-02-03 20:29:07[0m] (step=0049700) Train Loss: -2.9921, Train Steps/Sec: 1.02
|
| 515 |
+
[[34m2026-02-03 20:30:45[0m] (step=0049800) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 516 |
+
[[34m2026-02-03 20:32:23[0m] (step=0049900) Train Loss: -2.9805, Train Steps/Sec: 1.02
|
| 517 |
+
[[34m2026-02-03 20:34:01[0m] (step=0050000) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 518 |
+
[[34m2026-02-03 20:34:02[0m] Saved checkpoint to results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None/checkpoints/0050000.pt
|
| 519 |
+
[[34m2026-02-03 20:34:41[0m] Beginning epoch 10...
|
| 520 |
+
[[34m2026-02-03 20:35:42[0m] (step=0050100) Train Loss: -2.9896, Train Steps/Sec: 0.99
|
| 521 |
+
[[34m2026-02-03 20:37:20[0m] (step=0050200) Train Loss: -2.9903, Train Steps/Sec: 1.02
|
| 522 |
+
[[34m2026-02-03 20:38:58[0m] (step=0050300) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 523 |
+
[[34m2026-02-03 20:40:20[0m] Generating EMA samples...
|
| 524 |
+
[[34m2026-02-03 20:40:35[0m] (step=0050400) Train Loss: -2.9846, Train Steps/Sec: 1.03
|
| 525 |
+
[[34m2026-02-03 20:42:13[0m] (step=0050500) Train Loss: -2.9897, Train Steps/Sec: 1.02
|
| 526 |
+
[[34m2026-02-03 20:43:51[0m] (step=0050600) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 527 |
+
[[34m2026-02-03 20:45:29[0m] (step=0050700) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 528 |
+
[[34m2026-02-03 20:47:07[0m] (step=0050800) Train Loss: -2.9852, Train Steps/Sec: 1.02
|
| 529 |
+
[[34m2026-02-03 20:48:44[0m] (step=0050900) Train Loss: -2.9878, Train Steps/Sec: 1.03
|
| 530 |
+
[[34m2026-02-03 20:50:21[0m] (step=0051000) Train Loss: -2.9897, Train Steps/Sec: 1.03
|
| 531 |
+
[[34m2026-02-03 20:52:00[0m] (step=0051100) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 532 |
+
[[34m2026-02-03 20:53:34[0m] (step=0051200) Train Loss: -2.9882, Train Steps/Sec: 1.06
|
| 533 |
+
[[34m2026-02-03 20:55:12[0m] (step=0051300) Train Loss: -2.9856, Train Steps/Sec: 1.02
|
| 534 |
+
[[34m2026-02-03 20:56:50[0m] (step=0051400) Train Loss: -2.9870, Train Steps/Sec: 1.02
|
| 535 |
+
[[34m2026-02-03 20:58:28[0m] (step=0051500) Train Loss: -2.9897, Train Steps/Sec: 1.02
|
| 536 |
+
[[34m2026-02-03 21:00:06[0m] (step=0051600) Train Loss: -2.9896, Train Steps/Sec: 1.02
|
| 537 |
+
[[34m2026-02-03 21:01:44[0m] (step=0051700) Train Loss: -2.9903, Train Steps/Sec: 1.02
|
| 538 |
+
[[34m2026-02-03 21:03:22[0m] (step=0051800) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 539 |
+
[[34m2026-02-03 21:04:59[0m] (step=0051900) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 540 |
+
[[34m2026-02-03 21:06:38[0m] (step=0052000) Train Loss: -2.9898, Train Steps/Sec: 1.02
|
| 541 |
+
[[34m2026-02-03 21:08:16[0m] (step=0052100) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 542 |
+
[[34m2026-02-03 21:09:54[0m] (step=0052200) Train Loss: -2.9888, Train Steps/Sec: 1.02
|
| 543 |
+
[[34m2026-02-03 21:11:31[0m] (step=0052300) Train Loss: -2.9868, Train Steps/Sec: 1.03
|
| 544 |
+
[[34m2026-02-03 21:13:09[0m] (step=0052400) Train Loss: -2.9857, Train Steps/Sec: 1.02
|
| 545 |
+
[[34m2026-02-03 21:14:47[0m] (step=0052500) Train Loss: -2.9898, Train Steps/Sec: 1.03
|
| 546 |
+
[[34m2026-02-03 21:16:25[0m] (step=0052600) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 547 |
+
[[34m2026-02-03 21:18:03[0m] (step=0052700) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 548 |
+
[[34m2026-02-03 21:19:40[0m] (step=0052800) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 549 |
+
[[34m2026-02-03 21:21:18[0m] (step=0052900) Train Loss: -2.9866, Train Steps/Sec: 1.02
|
| 550 |
+
[[34m2026-02-03 21:22:56[0m] (step=0053000) Train Loss: -2.9871, Train Steps/Sec: 1.02
|
| 551 |
+
[[34m2026-02-03 21:24:34[0m] (step=0053100) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 552 |
+
[[34m2026-02-03 21:26:12[0m] (step=0053200) Train Loss: -2.9921, Train Steps/Sec: 1.02
|
| 553 |
+
[[34m2026-02-03 21:27:49[0m] (step=0053300) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 554 |
+
[[34m2026-02-03 21:29:27[0m] (step=0053400) Train Loss: -2.9859, Train Steps/Sec: 1.02
|
| 555 |
+
[[34m2026-02-03 21:31:05[0m] (step=0053500) Train Loss: -2.9909, Train Steps/Sec: 1.02
|
| 556 |
+
[[34m2026-02-03 21:32:43[0m] (step=0053600) Train Loss: -2.9928, Train Steps/Sec: 1.02
|
| 557 |
+
[[34m2026-02-03 21:34:21[0m] (step=0053700) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 558 |
+
[[34m2026-02-03 21:35:59[0m] (step=0053800) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 559 |
+
[[34m2026-02-03 21:37:37[0m] (step=0053900) Train Loss: -2.9883, Train Steps/Sec: 1.02
|
| 560 |
+
[[34m2026-02-03 21:39:15[0m] (step=0054000) Train Loss: -2.9844, Train Steps/Sec: 1.02
|
| 561 |
+
[[34m2026-02-03 21:40:53[0m] (step=0054100) Train Loss: -2.9903, Train Steps/Sec: 1.02
|
| 562 |
+
[[34m2026-02-03 21:42:31[0m] (step=0054200) Train Loss: -2.9911, Train Steps/Sec: 1.02
|
| 563 |
+
[[34m2026-02-03 21:44:09[0m] (step=0054300) Train Loss: -2.9915, Train Steps/Sec: 1.02
|
| 564 |
+
[[34m2026-02-03 21:45:47[0m] (step=0054400) Train Loss: -2.9865, Train Steps/Sec: 1.02
|
| 565 |
+
[[34m2026-02-03 21:47:24[0m] (step=0054500) Train Loss: -2.9854, Train Steps/Sec: 1.03
|
| 566 |
+
[[34m2026-02-03 21:49:02[0m] (step=0054600) Train Loss: -2.9923, Train Steps/Sec: 1.02
|
| 567 |
+
[[34m2026-02-03 21:50:39[0m] (step=0054700) Train Loss: -2.9864, Train Steps/Sec: 1.03
|
| 568 |
+
[[34m2026-02-03 21:52:17[0m] (step=0054800) Train Loss: -2.9826, Train Steps/Sec: 1.02
|
| 569 |
+
[[34m2026-02-03 21:53:55[0m] (step=0054900) Train Loss: -2.9858, Train Steps/Sec: 1.02
|
| 570 |
+
[[34m2026-02-03 21:55:33[0m] (step=0055000) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 571 |
+
[[34m2026-02-03 21:56:16[0m] Beginning epoch 11...
|
| 572 |
+
[[34m2026-02-03 21:57:13[0m] (step=0055100) Train Loss: -2.9926, Train Steps/Sec: 1.00
|
| 573 |
+
[[34m2026-02-03 21:58:50[0m] (step=0055200) Train Loss: -2.9919, Train Steps/Sec: 1.02
|
| 574 |
+
[[34m2026-02-03 22:00:28[0m] (step=0055300) Train Loss: -2.9910, Train Steps/Sec: 1.02
|
| 575 |
+
[[34m2026-02-03 22:02:06[0m] (step=0055400) Train Loss: -2.9851, Train Steps/Sec: 1.02
|
| 576 |
+
[[34m2026-02-03 22:03:44[0m] (step=0055500) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 577 |
+
[[34m2026-02-03 22:05:22[0m] (step=0055600) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 578 |
+
[[34m2026-02-03 22:07:00[0m] (step=0055700) Train Loss: -2.9873, Train Steps/Sec: 1.02
|
| 579 |
+
[[34m2026-02-03 22:08:37[0m] (step=0055800) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 580 |
+
[[34m2026-02-03 22:10:15[0m] (step=0055900) Train Loss: -2.9909, Train Steps/Sec: 1.02
|
| 581 |
+
[[34m2026-02-03 22:11:50[0m] (step=0056000) Train Loss: -2.9884, Train Steps/Sec: 1.06
|
| 582 |
+
[[34m2026-02-03 22:13:28[0m] (step=0056100) Train Loss: -2.9902, Train Steps/Sec: 1.02
|
| 583 |
+
[[34m2026-02-03 22:15:05[0m] (step=0056200) Train Loss: -2.9904, Train Steps/Sec: 1.03
|
| 584 |
+
[[34m2026-02-03 22:16:43[0m] (step=0056300) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 585 |
+
[[34m2026-02-03 22:18:21[0m] (step=0056400) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 586 |
+
[[34m2026-02-03 22:19:59[0m] (step=0056500) Train Loss: -2.9903, Train Steps/Sec: 1.02
|
| 587 |
+
[[34m2026-02-03 22:21:37[0m] (step=0056600) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 588 |
+
[[34m2026-02-03 22:23:15[0m] (step=0056700) Train Loss: -2.9888, Train Steps/Sec: 1.02
|
| 589 |
+
[[34m2026-02-03 22:24:53[0m] (step=0056800) Train Loss: -2.9846, Train Steps/Sec: 1.02
|
| 590 |
+
[[34m2026-02-03 22:26:32[0m] (step=0056900) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 591 |
+
[[34m2026-02-03 22:28:10[0m] (step=0057000) Train Loss: -2.9846, Train Steps/Sec: 1.02
|
| 592 |
+
[[34m2026-02-03 22:29:48[0m] (step=0057100) Train Loss: -2.9884, Train Steps/Sec: 1.02
|
| 593 |
+
[[34m2026-02-03 22:31:26[0m] (step=0057200) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 594 |
+
[[34m2026-02-03 22:33:04[0m] (step=0057300) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 595 |
+
[[34m2026-02-03 22:34:41[0m] (step=0057400) Train Loss: -2.9913, Train Steps/Sec: 1.02
|
| 596 |
+
[[34m2026-02-03 22:36:19[0m] (step=0057500) Train Loss: -2.9885, Train Steps/Sec: 1.02
|
| 597 |
+
[[34m2026-02-03 22:37:57[0m] (step=0057600) Train Loss: -2.9872, Train Steps/Sec: 1.03
|
| 598 |
+
[[34m2026-02-03 22:39:34[0m] (step=0057700) Train Loss: -2.9902, Train Steps/Sec: 1.03
|
| 599 |
+
[[34m2026-02-03 22:41:12[0m] (step=0057800) Train Loss: -2.9949, Train Steps/Sec: 1.02
|
| 600 |
+
[[34m2026-02-03 22:42:50[0m] (step=0057900) Train Loss: -2.9919, Train Steps/Sec: 1.02
|
| 601 |
+
[[34m2026-02-03 22:44:28[0m] (step=0058000) Train Loss: -2.9903, Train Steps/Sec: 1.02
|
| 602 |
+
[[34m2026-02-03 22:46:06[0m] (step=0058100) Train Loss: -2.9908, Train Steps/Sec: 1.02
|
| 603 |
+
[[34m2026-02-03 22:47:44[0m] (step=0058200) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 604 |
+
[[34m2026-02-03 22:49:22[0m] (step=0058300) Train Loss: -2.9900, Train Steps/Sec: 1.02
|
| 605 |
+
[[34m2026-02-03 22:50:59[0m] (step=0058400) Train Loss: -2.9865, Train Steps/Sec: 1.03
|
| 606 |
+
[[34m2026-02-03 22:52:37[0m] (step=0058500) Train Loss: -2.9851, Train Steps/Sec: 1.02
|
| 607 |
+
[[34m2026-02-03 22:54:15[0m] (step=0058600) Train Loss: -2.9861, Train Steps/Sec: 1.01
|
| 608 |
+
[[34m2026-02-03 22:55:53[0m] (step=0058700) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 609 |
+
[[34m2026-02-03 22:57:31[0m] (step=0058800) Train Loss: -2.9918, Train Steps/Sec: 1.02
|
| 610 |
+
[[34m2026-02-03 22:59:09[0m] (step=0058900) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 611 |
+
[[34m2026-02-03 23:00:47[0m] (step=0059000) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 612 |
+
[[34m2026-02-03 23:02:25[0m] (step=0059100) Train Loss: -2.9920, Train Steps/Sec: 1.02
|
| 613 |
+
[[34m2026-02-03 23:04:03[0m] (step=0059200) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 614 |
+
[[34m2026-02-03 23:05:41[0m] (step=0059300) Train Loss: -2.9895, Train Steps/Sec: 1.02
|
| 615 |
+
[[34m2026-02-03 23:07:19[0m] (step=0059400) Train Loss: -2.9911, Train Steps/Sec: 1.02
|
| 616 |
+
[[34m2026-02-03 23:08:57[0m] (step=0059500) Train Loss: -2.9857, Train Steps/Sec: 1.02
|
| 617 |
+
[[34m2026-02-03 23:10:34[0m] (step=0059600) Train Loss: -2.9925, Train Steps/Sec: 1.03
|
| 618 |
+
[[34m2026-02-03 23:12:12[0m] (step=0059700) Train Loss: -2.9885, Train Steps/Sec: 1.02
|
| 619 |
+
[[34m2026-02-03 23:13:50[0m] (step=0059800) Train Loss: -2.9883, Train Steps/Sec: 1.02
|
| 620 |
+
[[34m2026-02-03 23:15:28[0m] (step=0059900) Train Loss: -2.9914, Train Steps/Sec: 1.02
|
| 621 |
+
[[34m2026-02-03 23:17:06[0m] (step=0060000) Train Loss: -2.9892, Train Steps/Sec: 1.02
|
| 622 |
+
[[34m2026-02-03 23:17:53[0m] Beginning epoch 12...
|
| 623 |
+
[[34m2026-02-03 23:18:45[0m] (step=0060100) Train Loss: -2.9931, Train Steps/Sec: 1.00
|
| 624 |
+
[[34m2026-02-03 23:20:23[0m] (step=0060200) Train Loss: -2.9852, Train Steps/Sec: 1.02
|
| 625 |
+
[[34m2026-02-03 23:22:01[0m] (step=0060300) Train Loss: -2.9839, Train Steps/Sec: 1.02
|
| 626 |
+
[[34m2026-02-03 23:23:39[0m] (step=0060400) Train Loss: -2.9866, Train Steps/Sec: 1.02
|
| 627 |
+
[[34m2026-02-03 23:25:17[0m] (step=0060500) Train Loss: -2.9886, Train Steps/Sec: 1.02
|
| 628 |
+
[[34m2026-02-03 23:26:55[0m] (step=0060600) Train Loss: -2.9869, Train Steps/Sec: 1.03
|
| 629 |
+
[[34m2026-02-03 23:28:33[0m] (step=0060700) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 630 |
+
[[34m2026-02-03 23:30:07[0m] (step=0060800) Train Loss: -2.9867, Train Steps/Sec: 1.06
|
| 631 |
+
[[34m2026-02-03 23:31:45[0m] (step=0060900) Train Loss: -2.9912, Train Steps/Sec: 1.02
|
| 632 |
+
[[34m2026-02-03 23:33:23[0m] (step=0061000) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 633 |
+
[[34m2026-02-03 23:35:01[0m] (step=0061100) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 634 |
+
[[34m2026-02-03 23:36:39[0m] (step=0061200) Train Loss: -2.9844, Train Steps/Sec: 1.02
|
| 635 |
+
[[34m2026-02-03 23:38:17[0m] (step=0061300) Train Loss: -2.9937, Train Steps/Sec: 1.02
|
| 636 |
+
[[34m2026-02-03 23:39:55[0m] (step=0061400) Train Loss: -2.9877, Train Steps/Sec: 1.02
|
| 637 |
+
[[34m2026-02-03 23:41:33[0m] (step=0061500) Train Loss: -2.9898, Train Steps/Sec: 1.02
|
| 638 |
+
[[34m2026-02-03 23:43:10[0m] (step=0061600) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 639 |
+
[[34m2026-02-03 23:44:48[0m] (step=0061700) Train Loss: -2.9897, Train Steps/Sec: 1.02
|
| 640 |
+
[[34m2026-02-03 23:46:25[0m] (step=0061800) Train Loss: -2.9888, Train Steps/Sec: 1.03
|
| 641 |
+
[[34m2026-02-03 23:48:03[0m] (step=0061900) Train Loss: -2.9867, Train Steps/Sec: 1.03
|
| 642 |
+
[[34m2026-02-03 23:49:41[0m] (step=0062000) Train Loss: -2.9901, Train Steps/Sec: 1.02
|
| 643 |
+
[[34m2026-02-03 23:51:19[0m] (step=0062100) Train Loss: -2.9850, Train Steps/Sec: 1.02
|
| 644 |
+
[[34m2026-02-03 23:52:56[0m] (step=0062200) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 645 |
+
[[34m2026-02-03 23:54:34[0m] (step=0062300) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 646 |
+
[[34m2026-02-03 23:56:12[0m] (step=0062400) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 647 |
+
[[34m2026-02-03 23:57:50[0m] (step=0062500) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 648 |
+
[[34m2026-02-03 23:59:28[0m] (step=0062600) Train Loss: -2.9854, Train Steps/Sec: 1.02
|
| 649 |
+
[[34m2026-02-04 00:01:06[0m] (step=0062700) Train Loss: -2.9918, Train Steps/Sec: 1.02
|
| 650 |
+
[[34m2026-02-04 00:02:44[0m] (step=0062800) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 651 |
+
[[34m2026-02-04 00:04:21[0m] (step=0062900) Train Loss: -2.9891, Train Steps/Sec: 1.03
|
| 652 |
+
[[34m2026-02-04 00:05:58[0m] (step=0063000) Train Loss: -2.9885, Train Steps/Sec: 1.03
|
| 653 |
+
[[34m2026-02-04 00:07:36[0m] (step=0063100) Train Loss: -2.9878, Train Steps/Sec: 1.02
|
| 654 |
+
[[34m2026-02-04 00:09:14[0m] (step=0063200) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 655 |
+
[[34m2026-02-04 00:10:52[0m] (step=0063300) Train Loss: -2.9942, Train Steps/Sec: 1.02
|
| 656 |
+
[[34m2026-02-04 00:12:30[0m] (step=0063400) Train Loss: -2.9877, Train Steps/Sec: 1.02
|
| 657 |
+
[[34m2026-02-04 00:14:07[0m] (step=0063500) Train Loss: -2.9898, Train Steps/Sec: 1.03
|
| 658 |
+
[[34m2026-02-04 00:15:46[0m] (step=0063600) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 659 |
+
[[34m2026-02-04 00:17:24[0m] (step=0063700) Train Loss: -2.9892, Train Steps/Sec: 1.02
|
| 660 |
+
[[34m2026-02-04 00:19:02[0m] (step=0063800) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 661 |
+
[[34m2026-02-04 00:20:39[0m] (step=0063900) Train Loss: -2.9886, Train Steps/Sec: 1.02
|
| 662 |
+
[[34m2026-02-04 00:22:18[0m] (step=0064000) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 663 |
+
[[34m2026-02-04 00:23:55[0m] (step=0064100) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 664 |
+
[[34m2026-02-04 00:25:33[0m] (step=0064200) Train Loss: -2.9838, Train Steps/Sec: 1.02
|
| 665 |
+
[[34m2026-02-04 00:27:11[0m] (step=0064300) Train Loss: -2.9857, Train Steps/Sec: 1.02
|
| 666 |
+
[[34m2026-02-04 00:28:49[0m] (step=0064400) Train Loss: -2.9905, Train Steps/Sec: 1.03
|
| 667 |
+
[[34m2026-02-04 00:30:26[0m] (step=0064500) Train Loss: -2.9910, Train Steps/Sec: 1.02
|
| 668 |
+
[[34m2026-02-04 00:32:05[0m] (step=0064600) Train Loss: -2.9897, Train Steps/Sec: 1.02
|
| 669 |
+
[[34m2026-02-04 00:33:42[0m] (step=0064700) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 670 |
+
[[34m2026-02-04 00:35:20[0m] (step=0064800) Train Loss: -2.9896, Train Steps/Sec: 1.02
|
| 671 |
+
[[34m2026-02-04 00:36:58[0m] (step=0064900) Train Loss: -2.9893, Train Steps/Sec: 1.02
|
| 672 |
+
[[34m2026-02-04 00:38:36[0m] (step=0065000) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 673 |
+
[[34m2026-02-04 00:39:28[0m] Beginning epoch 13...
|
| 674 |
+
[[34m2026-02-04 00:40:16[0m] (step=0065100) Train Loss: -2.9899, Train Steps/Sec: 1.00
|
| 675 |
+
[[34m2026-02-04 00:41:54[0m] (step=0065200) Train Loss: -2.9946, Train Steps/Sec: 1.02
|
| 676 |
+
[[34m2026-02-04 00:43:32[0m] (step=0065300) Train Loss: -2.9928, Train Steps/Sec: 1.02
|
| 677 |
+
[[34m2026-02-04 00:45:10[0m] (step=0065400) Train Loss: -2.9897, Train Steps/Sec: 1.02
|
| 678 |
+
[[34m2026-02-04 00:46:46[0m] (step=0065500) Train Loss: -2.9877, Train Steps/Sec: 1.05
|
| 679 |
+
[[34m2026-02-04 00:48:22[0m] (step=0065600) Train Loss: -2.9892, Train Steps/Sec: 1.03
|
| 680 |
+
[[34m2026-02-04 00:50:00[0m] (step=0065700) Train Loss: -2.9847, Train Steps/Sec: 1.02
|
| 681 |
+
[[34m2026-02-04 00:51:38[0m] (step=0065800) Train Loss: -2.9859, Train Steps/Sec: 1.02
|
| 682 |
+
[[34m2026-02-04 00:53:16[0m] (step=0065900) Train Loss: -2.9838, Train Steps/Sec: 1.03
|
| 683 |
+
[[34m2026-02-04 00:54:54[0m] (step=0066000) Train Loss: -2.9848, Train Steps/Sec: 1.02
|
| 684 |
+
[[34m2026-02-04 00:56:31[0m] (step=0066100) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 685 |
+
[[34m2026-02-04 00:58:08[0m] (step=0066200) Train Loss: -2.9903, Train Steps/Sec: 1.03
|
| 686 |
+
[[34m2026-02-04 00:59:46[0m] (step=0066300) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 687 |
+
[[34m2026-02-04 01:01:24[0m] (step=0066400) Train Loss: -2.9881, Train Steps/Sec: 1.02
|
| 688 |
+
[[34m2026-02-04 01:03:02[0m] (step=0066500) Train Loss: -2.9850, Train Steps/Sec: 1.03
|
| 689 |
+
[[34m2026-02-04 01:04:40[0m] (step=0066600) Train Loss: -2.9870, Train Steps/Sec: 1.02
|
| 690 |
+
[[34m2026-02-04 01:06:18[0m] (step=0066700) Train Loss: -2.9866, Train Steps/Sec: 1.02
|
| 691 |
+
[[34m2026-02-04 01:07:56[0m] (step=0066800) Train Loss: -2.9895, Train Steps/Sec: 1.02
|
| 692 |
+
[[34m2026-02-04 01:09:34[0m] (step=0066900) Train Loss: -2.9862, Train Steps/Sec: 1.02
|
| 693 |
+
[[34m2026-02-04 01:11:11[0m] (step=0067000) Train Loss: -2.9913, Train Steps/Sec: 1.03
|
| 694 |
+
[[34m2026-02-04 01:12:48[0m] (step=0067100) Train Loss: -2.9877, Train Steps/Sec: 1.03
|
| 695 |
+
[[34m2026-02-04 01:14:26[0m] (step=0067200) Train Loss: -2.9923, Train Steps/Sec: 1.03
|
| 696 |
+
[[34m2026-02-04 01:16:04[0m] (step=0067300) Train Loss: -2.9886, Train Steps/Sec: 1.02
|
| 697 |
+
[[34m2026-02-04 01:17:42[0m] (step=0067400) Train Loss: -2.9904, Train Steps/Sec: 1.02
|
| 698 |
+
[[34m2026-02-04 01:19:20[0m] (step=0067500) Train Loss: -2.9905, Train Steps/Sec: 1.02
|
| 699 |
+
[[34m2026-02-04 01:20:58[0m] (step=0067600) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 700 |
+
[[34m2026-02-04 01:22:36[0m] (step=0067700) Train Loss: -2.9877, Train Steps/Sec: 1.02
|
| 701 |
+
[[34m2026-02-04 01:24:14[0m] (step=0067800) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 702 |
+
[[34m2026-02-04 01:25:52[0m] (step=0067900) Train Loss: -2.9875, Train Steps/Sec: 1.03
|
| 703 |
+
[[34m2026-02-04 01:27:29[0m] (step=0068000) Train Loss: -2.9834, Train Steps/Sec: 1.02
|
| 704 |
+
[[34m2026-02-04 01:29:07[0m] (step=0068100) Train Loss: -2.9885, Train Steps/Sec: 1.02
|
| 705 |
+
[[34m2026-02-04 01:30:45[0m] (step=0068200) Train Loss: -2.9882, Train Steps/Sec: 1.02
|
| 706 |
+
[[34m2026-02-04 01:32:22[0m] (step=0068300) Train Loss: -2.9922, Train Steps/Sec: 1.03
|
| 707 |
+
[[34m2026-02-04 01:34:01[0m] (step=0068400) Train Loss: -2.9823, Train Steps/Sec: 1.02
|
| 708 |
+
[[34m2026-02-04 01:35:38[0m] (step=0068500) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 709 |
+
[[34m2026-02-04 01:37:15[0m] (step=0068600) Train Loss: -2.9938, Train Steps/Sec: 1.03
|
| 710 |
+
[[34m2026-02-04 01:38:53[0m] (step=0068700) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 711 |
+
[[34m2026-02-04 01:40:31[0m] (step=0068800) Train Loss: -2.9893, Train Steps/Sec: 1.02
|
| 712 |
+
[[34m2026-02-04 01:42:09[0m] (step=0068900) Train Loss: -2.9892, Train Steps/Sec: 1.02
|
| 713 |
+
[[34m2026-02-04 01:43:47[0m] (step=0069000) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 714 |
+
[[34m2026-02-04 01:45:25[0m] (step=0069100) Train Loss: -2.9871, Train Steps/Sec: 1.02
|
| 715 |
+
[[34m2026-02-04 01:47:02[0m] (step=0069200) Train Loss: -2.9910, Train Steps/Sec: 1.03
|
| 716 |
+
[[34m2026-02-04 01:48:40[0m] (step=0069300) Train Loss: -2.9894, Train Steps/Sec: 1.03
|
| 717 |
+
[[34m2026-02-04 01:50:17[0m] (step=0069400) Train Loss: -2.9837, Train Steps/Sec: 1.02
|
| 718 |
+
[[34m2026-02-04 01:51:55[0m] (step=0069500) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 719 |
+
[[34m2026-02-04 01:53:33[0m] (step=0069600) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 720 |
+
[[34m2026-02-04 01:55:11[0m] (step=0069700) Train Loss: -2.9852, Train Steps/Sec: 1.03
|
| 721 |
+
[[34m2026-02-04 01:56:49[0m] (step=0069800) Train Loss: -2.9926, Train Steps/Sec: 1.02
|
| 722 |
+
[[34m2026-02-04 01:58:27[0m] (step=0069900) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 723 |
+
[[34m2026-02-04 02:00:05[0m] (step=0070000) Train Loss: -2.9908, Train Steps/Sec: 1.02
|
| 724 |
+
[[34m2026-02-04 02:01:01[0m] Beginning epoch 14...
|
| 725 |
+
[[34m2026-02-04 02:01:45[0m] (step=0070100) Train Loss: -2.9858, Train Steps/Sec: 1.00
|
| 726 |
+
[[34m2026-02-04 02:03:23[0m] (step=0070200) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 727 |
+
[[34m2026-02-04 02:04:57[0m] (step=0070300) Train Loss: -2.9891, Train Steps/Sec: 1.06
|
| 728 |
+
[[34m2026-02-04 02:06:35[0m] (step=0070400) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 729 |
+
[[34m2026-02-04 02:08:13[0m] (step=0070500) Train Loss: -2.9873, Train Steps/Sec: 1.02
|
| 730 |
+
[[34m2026-02-04 02:09:51[0m] (step=0070600) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 731 |
+
[[34m2026-02-04 02:11:29[0m] (step=0070700) Train Loss: -2.9853, Train Steps/Sec: 1.02
|
| 732 |
+
[[34m2026-02-04 02:13:06[0m] (step=0070800) Train Loss: -2.9915, Train Steps/Sec: 1.02
|
| 733 |
+
[[34m2026-02-04 02:14:44[0m] (step=0070900) Train Loss: -2.9902, Train Steps/Sec: 1.02
|
| 734 |
+
[[34m2026-02-04 02:16:22[0m] (step=0071000) Train Loss: -2.9910, Train Steps/Sec: 1.02
|
| 735 |
+
[[34m2026-02-04 02:18:00[0m] (step=0071100) Train Loss: -2.9909, Train Steps/Sec: 1.02
|
| 736 |
+
[[34m2026-02-04 02:19:37[0m] (step=0071200) Train Loss: -2.9857, Train Steps/Sec: 1.03
|
| 737 |
+
[[34m2026-02-04 02:21:15[0m] (step=0071300) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 738 |
+
[[34m2026-02-04 02:22:52[0m] (step=0071400) Train Loss: -2.9858, Train Steps/Sec: 1.03
|
| 739 |
+
[[34m2026-02-04 02:24:30[0m] (step=0071500) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 740 |
+
[[34m2026-02-04 02:26:08[0m] (step=0071600) Train Loss: -2.9936, Train Steps/Sec: 1.02
|
| 741 |
+
[[34m2026-02-04 02:27:46[0m] (step=0071700) Train Loss: -2.9813, Train Steps/Sec: 1.02
|
| 742 |
+
[[34m2026-02-04 02:29:24[0m] (step=0071800) Train Loss: -2.9841, Train Steps/Sec: 1.02
|
| 743 |
+
[[34m2026-02-04 02:31:01[0m] (step=0071900) Train Loss: -2.9900, Train Steps/Sec: 1.03
|
| 744 |
+
[[34m2026-02-04 02:32:39[0m] (step=0072000) Train Loss: -2.9901, Train Steps/Sec: 1.03
|
| 745 |
+
[[34m2026-02-04 02:34:16[0m] (step=0072100) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 746 |
+
[[34m2026-02-04 02:35:54[0m] (step=0072200) Train Loss: -2.9852, Train Steps/Sec: 1.03
|
| 747 |
+
[[34m2026-02-04 02:37:32[0m] (step=0072300) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 748 |
+
[[34m2026-02-04 02:39:10[0m] (step=0072400) Train Loss: -2.9919, Train Steps/Sec: 1.02
|
| 749 |
+
[[34m2026-02-04 02:40:48[0m] (step=0072500) Train Loss: -2.9843, Train Steps/Sec: 1.02
|
| 750 |
+
[[34m2026-02-04 02:42:26[0m] (step=0072600) Train Loss: -2.9850, Train Steps/Sec: 1.02
|
| 751 |
+
[[34m2026-02-04 02:44:04[0m] (step=0072700) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 752 |
+
[[34m2026-02-04 02:45:42[0m] (step=0072800) Train Loss: -2.9904, Train Steps/Sec: 1.02
|
| 753 |
+
[[34m2026-02-04 02:47:19[0m] (step=0072900) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 754 |
+
[[34m2026-02-04 02:48:57[0m] (step=0073000) Train Loss: -2.9901, Train Steps/Sec: 1.03
|
| 755 |
+
[[34m2026-02-04 02:50:35[0m] (step=0073100) Train Loss: -2.9859, Train Steps/Sec: 1.02
|
| 756 |
+
[[34m2026-02-04 02:52:13[0m] (step=0073200) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 757 |
+
[[34m2026-02-04 02:53:50[0m] (step=0073300) Train Loss: -2.9875, Train Steps/Sec: 1.03
|
| 758 |
+
[[34m2026-02-04 02:55:28[0m] (step=0073400) Train Loss: -2.9896, Train Steps/Sec: 1.02
|
| 759 |
+
[[34m2026-02-04 02:57:05[0m] (step=0073500) Train Loss: -2.9940, Train Steps/Sec: 1.03
|
| 760 |
+
[[34m2026-02-04 02:58:43[0m] (step=0073600) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 761 |
+
[[34m2026-02-04 03:00:21[0m] (step=0073700) Train Loss: -2.9883, Train Steps/Sec: 1.02
|
| 762 |
+
[[34m2026-02-04 03:01:58[0m] (step=0073800) Train Loss: -2.9895, Train Steps/Sec: 1.03
|
| 763 |
+
[[34m2026-02-04 03:03:36[0m] (step=0073900) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 764 |
+
[[34m2026-02-04 03:05:14[0m] (step=0074000) Train Loss: -2.9884, Train Steps/Sec: 1.02
|
| 765 |
+
[[34m2026-02-04 03:06:52[0m] (step=0074100) Train Loss: -2.9830, Train Steps/Sec: 1.02
|
| 766 |
+
[[34m2026-02-04 03:08:30[0m] (step=0074200) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 767 |
+
[[34m2026-02-04 03:10:08[0m] (step=0074300) Train Loss: -2.9873, Train Steps/Sec: 1.02
|
| 768 |
+
[[34m2026-02-04 03:11:45[0m] (step=0074400) Train Loss: -2.9860, Train Steps/Sec: 1.03
|
| 769 |
+
[[34m2026-02-04 03:13:22[0m] (step=0074500) Train Loss: -2.9887, Train Steps/Sec: 1.03
|
| 770 |
+
[[34m2026-02-04 03:15:00[0m] (step=0074600) Train Loss: -2.9857, Train Steps/Sec: 1.03
|
| 771 |
+
[[34m2026-02-04 03:16:38[0m] (step=0074700) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 772 |
+
[[34m2026-02-04 03:18:15[0m] (step=0074800) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 773 |
+
[[34m2026-02-04 03:19:53[0m] (step=0074900) Train Loss: -2.9849, Train Steps/Sec: 1.02
|
| 774 |
+
[[34m2026-02-04 03:21:32[0m] (step=0075000) Train Loss: -2.9902, Train Steps/Sec: 1.02
|
| 775 |
+
[[34m2026-02-04 03:21:33[0m] Saved checkpoint to results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None/checkpoints/0075000.pt
|
| 776 |
+
[[34m2026-02-04 03:22:32[0m] Beginning epoch 15...
|
| 777 |
+
[[34m2026-02-04 03:23:10[0m] (step=0075100) Train Loss: -2.9908, Train Steps/Sec: 1.02
|
| 778 |
+
[[34m2026-02-04 03:24:48[0m] (step=0075200) Train Loss: -2.9917, Train Steps/Sec: 1.02
|
| 779 |
+
[[34m2026-02-04 03:26:26[0m] (step=0075300) Train Loss: -2.9913, Train Steps/Sec: 1.02
|
| 780 |
+
[[34m2026-02-04 03:28:04[0m] (step=0075400) Train Loss: -2.9900, Train Steps/Sec: 1.02
|
| 781 |
+
[[34m2026-02-04 03:29:42[0m] (step=0075500) Train Loss: -2.9866, Train Steps/Sec: 1.02
|
| 782 |
+
[[34m2026-02-04 03:30:56[0m] Generating EMA samples...
|
| 783 |
+
[[34m2026-02-04 03:31:20[0m] (step=0075600) Train Loss: -2.9850, Train Steps/Sec: 1.02
|
| 784 |
+
[[34m2026-02-04 03:32:57[0m] (step=0075700) Train Loss: -2.9845, Train Steps/Sec: 1.03
|
| 785 |
+
[[34m2026-02-04 03:34:36[0m] (step=0075800) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 786 |
+
[[34m2026-02-04 03:36:14[0m] (step=0075900) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 787 |
+
[[34m2026-02-04 03:37:52[0m] (step=0076000) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 788 |
+
[[34m2026-02-04 03:39:30[0m] (step=0076100) Train Loss: -2.9877, Train Steps/Sec: 1.02
|
| 789 |
+
[[34m2026-02-04 03:41:08[0m] (step=0076200) Train Loss: -2.9870, Train Steps/Sec: 1.02
|
| 790 |
+
[[34m2026-02-04 03:42:45[0m] (step=0076300) Train Loss: -2.9843, Train Steps/Sec: 1.03
|
| 791 |
+
[[34m2026-02-04 03:44:23[0m] (step=0076400) Train Loss: -2.9898, Train Steps/Sec: 1.02
|
| 792 |
+
[[34m2026-02-04 03:46:01[0m] (step=0076500) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 793 |
+
[[34m2026-02-04 03:47:39[0m] (step=0076600) Train Loss: -2.9848, Train Steps/Sec: 1.02
|
| 794 |
+
[[34m2026-02-04 03:49:16[0m] (step=0076700) Train Loss: -2.9864, Train Steps/Sec: 1.03
|
| 795 |
+
[[34m2026-02-04 03:50:54[0m] (step=0076800) Train Loss: -2.9876, Train Steps/Sec: 1.03
|
| 796 |
+
[[34m2026-02-04 03:52:32[0m] (step=0076900) Train Loss: -2.9862, Train Steps/Sec: 1.02
|
| 797 |
+
[[34m2026-02-04 03:54:10[0m] (step=0077000) Train Loss: -2.9906, Train Steps/Sec: 1.02
|
| 798 |
+
[[34m2026-02-04 03:55:48[0m] (step=0077100) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 799 |
+
[[34m2026-02-04 03:57:26[0m] (step=0077200) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 800 |
+
[[34m2026-02-04 03:59:04[0m] (step=0077300) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 801 |
+
[[34m2026-02-04 04:00:42[0m] (step=0077400) Train Loss: -2.9886, Train Steps/Sec: 1.02
|
| 802 |
+
[[34m2026-02-04 04:02:20[0m] (step=0077500) Train Loss: -2.9870, Train Steps/Sec: 1.02
|
| 803 |
+
[[34m2026-02-04 04:03:58[0m] (step=0077600) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 804 |
+
[[34m2026-02-04 04:05:35[0m] (step=0077700) Train Loss: -2.9854, Train Steps/Sec: 1.02
|
| 805 |
+
[[34m2026-02-04 04:07:13[0m] (step=0077800) Train Loss: -2.9904, Train Steps/Sec: 1.02
|
| 806 |
+
[[34m2026-02-04 04:08:51[0m] (step=0077900) Train Loss: -2.9850, Train Steps/Sec: 1.02
|
| 807 |
+
[[34m2026-02-04 04:10:29[0m] (step=0078000) Train Loss: -2.9941, Train Steps/Sec: 1.02
|
| 808 |
+
[[34m2026-02-04 04:12:07[0m] (step=0078100) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 809 |
+
[[34m2026-02-04 04:13:45[0m] (step=0078200) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 810 |
+
[[34m2026-02-04 04:15:22[0m] (step=0078300) Train Loss: -2.9915, Train Steps/Sec: 1.03
|
| 811 |
+
[[34m2026-02-04 04:17:00[0m] (step=0078400) Train Loss: -2.9876, Train Steps/Sec: 1.03
|
| 812 |
+
[[34m2026-02-04 04:18:37[0m] (step=0078500) Train Loss: -2.9893, Train Steps/Sec: 1.03
|
| 813 |
+
[[34m2026-02-04 04:20:15[0m] (step=0078600) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 814 |
+
[[34m2026-02-04 04:21:53[0m] (step=0078700) Train Loss: -2.9854, Train Steps/Sec: 1.02
|
| 815 |
+
[[34m2026-02-04 04:23:31[0m] (step=0078800) Train Loss: -2.9884, Train Steps/Sec: 1.03
|
| 816 |
+
[[34m2026-02-04 04:25:08[0m] (step=0078900) Train Loss: -2.9884, Train Steps/Sec: 1.03
|
| 817 |
+
[[34m2026-02-04 04:26:46[0m] (step=0079000) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 818 |
+
[[34m2026-02-04 04:28:24[0m] (step=0079100) Train Loss: -2.9918, Train Steps/Sec: 1.02
|
| 819 |
+
[[34m2026-02-04 04:30:01[0m] (step=0079200) Train Loss: -2.9873, Train Steps/Sec: 1.03
|
| 820 |
+
[[34m2026-02-04 04:31:39[0m] (step=0079300) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 821 |
+
[[34m2026-02-04 04:33:17[0m] (step=0079400) Train Loss: -2.9800, Train Steps/Sec: 1.02
|
| 822 |
+
[[34m2026-02-04 04:34:55[0m] (step=0079500) Train Loss: -2.9873, Train Steps/Sec: 1.03
|
| 823 |
+
[[34m2026-02-04 04:36:32[0m] (step=0079600) Train Loss: -2.9847, Train Steps/Sec: 1.02
|
| 824 |
+
[[34m2026-02-04 04:38:11[0m] (step=0079700) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 825 |
+
[[34m2026-02-04 04:39:48[0m] (step=0079800) Train Loss: -2.9865, Train Steps/Sec: 1.02
|
| 826 |
+
[[34m2026-02-04 04:41:23[0m] (step=0079900) Train Loss: -2.9922, Train Steps/Sec: 1.06
|
| 827 |
+
[[34m2026-02-04 04:43:00[0m] (step=0080000) Train Loss: -2.9857, Train Steps/Sec: 1.03
|
| 828 |
+
[[34m2026-02-04 04:44:04[0m] Beginning epoch 16...
|
| 829 |
+
[[34m2026-02-04 04:44:40[0m] (step=0080100) Train Loss: -2.9882, Train Steps/Sec: 1.00
|
| 830 |
+
[[34m2026-02-04 04:46:18[0m] (step=0080200) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 831 |
+
[[34m2026-02-04 04:47:56[0m] (step=0080300) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 832 |
+
[[34m2026-02-04 04:49:34[0m] (step=0080400) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 833 |
+
[[34m2026-02-04 04:51:11[0m] (step=0080500) Train Loss: -2.9847, Train Steps/Sec: 1.03
|
| 834 |
+
[[34m2026-02-04 04:52:49[0m] (step=0080600) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 835 |
+
[[34m2026-02-04 04:54:27[0m] (step=0080700) Train Loss: -2.9888, Train Steps/Sec: 1.03
|
| 836 |
+
[[34m2026-02-04 04:56:04[0m] (step=0080800) Train Loss: -2.9902, Train Steps/Sec: 1.03
|
| 837 |
+
[[34m2026-02-04 04:57:42[0m] (step=0080900) Train Loss: -2.9849, Train Steps/Sec: 1.02
|
| 838 |
+
[[34m2026-02-04 04:59:20[0m] (step=0081000) Train Loss: -2.9865, Train Steps/Sec: 1.03
|
| 839 |
+
[[34m2026-02-04 05:00:58[0m] (step=0081100) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 840 |
+
[[34m2026-02-04 05:02:36[0m] (step=0081200) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 841 |
+
[[34m2026-02-04 05:04:14[0m] (step=0081300) Train Loss: -2.9845, Train Steps/Sec: 1.02
|
| 842 |
+
[[34m2026-02-04 05:05:52[0m] (step=0081400) Train Loss: -2.9906, Train Steps/Sec: 1.02
|
| 843 |
+
[[34m2026-02-04 05:07:29[0m] (step=0081500) Train Loss: -2.9916, Train Steps/Sec: 1.02
|
| 844 |
+
[[34m2026-02-04 05:09:08[0m] (step=0081600) Train Loss: -2.9953, Train Steps/Sec: 1.02
|
| 845 |
+
[[34m2026-02-04 05:10:46[0m] (step=0081700) Train Loss: -2.9884, Train Steps/Sec: 1.02
|
| 846 |
+
[[34m2026-02-04 05:12:24[0m] (step=0081800) Train Loss: -2.9865, Train Steps/Sec: 1.02
|
| 847 |
+
[[34m2026-02-04 05:14:01[0m] (step=0081900) Train Loss: -2.9889, Train Steps/Sec: 1.03
|
| 848 |
+
[[34m2026-02-04 05:15:39[0m] (step=0082000) Train Loss: -2.9850, Train Steps/Sec: 1.02
|
| 849 |
+
[[34m2026-02-04 05:17:17[0m] (step=0082100) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 850 |
+
[[34m2026-02-04 05:18:55[0m] (step=0082200) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 851 |
+
[[34m2026-02-04 05:20:33[0m] (step=0082300) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 852 |
+
[[34m2026-02-04 05:22:10[0m] (step=0082400) Train Loss: -2.9872, Train Steps/Sec: 1.02
|
| 853 |
+
[[34m2026-02-04 05:23:49[0m] (step=0082500) Train Loss: -2.9838, Train Steps/Sec: 1.02
|
| 854 |
+
[[34m2026-02-04 05:25:27[0m] (step=0082600) Train Loss: -2.9881, Train Steps/Sec: 1.02
|
| 855 |
+
[[34m2026-02-04 05:27:04[0m] (step=0082700) Train Loss: -2.9890, Train Steps/Sec: 1.03
|
| 856 |
+
[[34m2026-02-04 05:28:42[0m] (step=0082800) Train Loss: -2.9881, Train Steps/Sec: 1.02
|
| 857 |
+
[[34m2026-02-04 05:30:19[0m] (step=0082900) Train Loss: -2.9903, Train Steps/Sec: 1.03
|
| 858 |
+
[[34m2026-02-04 05:31:58[0m] (step=0083000) Train Loss: -2.9946, Train Steps/Sec: 1.02
|
| 859 |
+
[[34m2026-02-04 05:33:36[0m] (step=0083100) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 860 |
+
[[34m2026-02-04 05:35:14[0m] (step=0083200) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 861 |
+
[[34m2026-02-04 05:36:52[0m] (step=0083300) Train Loss: -2.9939, Train Steps/Sec: 1.02
|
| 862 |
+
[[34m2026-02-04 05:38:30[0m] (step=0083400) Train Loss: -2.9914, Train Steps/Sec: 1.02
|
| 863 |
+
[[34m2026-02-04 05:40:07[0m] (step=0083500) Train Loss: -2.9888, Train Steps/Sec: 1.03
|
Rectified_Noise/GVP-Disp/run.sh
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
nohup torchrun \
|
| 2 |
+
--nnodes=1 \
|
| 3 |
+
--nproc_per_node=4 \
|
| 4 |
+
--rdzv_endpoint=localhost:29739 \
|
| 5 |
+
train_rectified_noise.py \
|
| 6 |
+
--depth 2 \
|
| 7 |
+
--results-dir results_256_gvp_disp \
|
| 8 |
+
--data-path /gemini/platform/public/zhaozy/hzh/datasets/Imagenet/train/ \
|
| 9 |
+
--ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/SiT_clean_256_GVP/base.pt \
|
| 10 |
+
--num-classes 1000 \
|
| 11 |
+
--path-type GVP \
|
| 12 |
+
--prediction velocity \
|
| 13 |
+
--disp \
|
| 14 |
+
> w_training1.log 2>&1 &
|
Rectified_Noise/GVP-Disp/test.sh
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Execute all four commands in parallel
|
| 4 |
+
# Each command runs in the background using &
|
| 5 |
+
|
| 6 |
+
echo "Starting all four sampling tasks in parallel..."
|
| 7 |
+
|
| 8 |
+
CUDA_VISIBLE_DEVICES=0 nohup torchrun \
|
| 9 |
+
--nnodes=1 \
|
| 10 |
+
--nproc_per_node=1 \
|
| 11 |
+
--rdzv_endpoint=localhost:29110 \
|
| 12 |
+
sample_rectified_noise.py SDE \
|
| 13 |
+
--depth 2 \
|
| 14 |
+
--sample-dir GVP_samples \
|
| 15 |
+
--model SiT-XL/2 \
|
| 16 |
+
--num-fid-samples 3000 \
|
| 17 |
+
--num-classes 1000 \
|
| 18 |
+
--global-seed 0 \
|
| 19 |
+
--use-sitf2 False \
|
| 20 |
+
--use-sitf2-before-t05 False \
|
| 21 |
+
--sitf2-threshold 0.0 \
|
| 22 |
+
--ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/SiT_clean_256_GVP/base.pt \
|
| 23 |
+
--sitf2-ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise-Dispersive-Loss/results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/checkpoints/0025000.pt > W_No.log 2>&1 &
|
| 24 |
+
|
| 25 |
+
CUDA_VISIBLE_DEVICES=1 nohup torchrun \
|
| 26 |
+
--nnodes=1 \
|
| 27 |
+
--nproc_per_node=1 \
|
| 28 |
+
--rdzv_endpoint=localhost:29150 \
|
| 29 |
+
sample_rectified_noise.py SDE \
|
| 30 |
+
--depth 2 \
|
| 31 |
+
--sample-dir GVP_samples \
|
| 32 |
+
--model SiT-XL/2 \
|
| 33 |
+
--num-fid-samples 3000 \
|
| 34 |
+
--num-classes 1000 \
|
| 35 |
+
--global-seed 0 \
|
| 36 |
+
--use-sitf2-before-t05 False \
|
| 37 |
+
--sitf2-threshold 1.0 \
|
| 38 |
+
--ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/SiT_clean_256_GVP/base.pt \
|
| 39 |
+
--sitf2-ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise-Dispersive-Loss/results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/checkpoints/0025000.pt > W_False.log 2>&1 &
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
CUDA_VISIBLE_DEVICES=2 nohup torchrun \
|
| 43 |
+
--nnodes=1 \
|
| 44 |
+
--nproc_per_node=1 \
|
| 45 |
+
--rdzv_endpoint=localhost:29152 \
|
| 46 |
+
sample_rectified_noise.py SDE \
|
| 47 |
+
--depth 2 \
|
| 48 |
+
--sample-dir GVP_samples \
|
| 49 |
+
--model SiT-XL/2 \
|
| 50 |
+
--num-fid-samples 3000 \
|
| 51 |
+
--num-classes 1000 \
|
| 52 |
+
--global-seed 0 \
|
| 53 |
+
--use-sitf2-before-t05 True \
|
| 54 |
+
--sitf2-threshold 0.5 \
|
| 55 |
+
--ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/SiT_clean_256_GVP/base.pt \
|
| 56 |
+
--sitf2-ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise-Dispersive-Loss/results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/checkpoints/0025000.pt > W_True_0.5.log 2>&1 &
|
| 57 |
+
|
| 58 |
+
CUDA_VISIBLE_DEVICES=3 nohup torchrun \
|
| 59 |
+
--nnodes=1 \
|
| 60 |
+
--nproc_per_node=1 \
|
| 61 |
+
--rdzv_endpoint=localhost:29121 \
|
| 62 |
+
sample_rectified_noise.py SDE \
|
| 63 |
+
--depth 2 \
|
| 64 |
+
--sample-dir GVP_samples \
|
| 65 |
+
--model SiT-XL/2 \
|
| 66 |
+
--num-fid-samples 3000 \
|
| 67 |
+
--num-classes 1000 \
|
| 68 |
+
--global-seed 0 \
|
| 69 |
+
--use-sitf2-before-t05 True \
|
| 70 |
+
--sitf2-threshold 0.15 \
|
| 71 |
+
--ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/SiT_clean_256_GVP/base.pt \
|
| 72 |
+
--sitf2-ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise-Dispersive-Loss/results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/checkpoints/0025000.pt > W_True_0.15.log 2>&1 &
|
| 73 |
+
|
| 74 |
+
# Wait for all background jobs to complete
|
| 75 |
+
echo "All tasks started. Waiting for completion..."
|
| 76 |
+
wait
|
| 77 |
+
|
| 78 |
+
echo "All tasks completed!"
|
Rectified_Noise/GVP-Disp/train_rectified_noise.py
ADDED
|
@@ -0,0 +1,429 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This source code is licensed under the license found in the
|
| 2 |
+
# LICENSE file in the root directory of this source tree.
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
A minimal training script for SiT using PyTorch DDP.
|
| 6 |
+
"""
|
| 7 |
+
import torch
|
| 8 |
+
# the first flag below was False when we tested this script but True makes A100 training a lot faster:
|
| 9 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 10 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 11 |
+
import torch.distributed as dist
|
| 12 |
+
from torch.nn.parallel import DistributedDataParallel as DDP
|
| 13 |
+
from torch.utils.data import DataLoader
|
| 14 |
+
from torch.utils.data.distributed import DistributedSampler
|
| 15 |
+
from torchvision.datasets import ImageFolder
|
| 16 |
+
from torchvision import transforms
|
| 17 |
+
import numpy as np
|
| 18 |
+
from collections import OrderedDict
|
| 19 |
+
from PIL import Image
|
| 20 |
+
from copy import deepcopy
|
| 21 |
+
from glob import glob
|
| 22 |
+
from time import time
|
| 23 |
+
import argparse
|
| 24 |
+
import logging
|
| 25 |
+
import os
|
| 26 |
+
|
| 27 |
+
from models import SiT, SiTF1, SiTF2, CombinedModel
|
| 28 |
+
from models import SiT_models
|
| 29 |
+
from download import find_model
|
| 30 |
+
from transport import create_transport, Sampler
|
| 31 |
+
from diffusers.models import AutoencoderKL
|
| 32 |
+
from train_utils import parse_transport_args
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
#################################################################################
|
| 37 |
+
# Training Helper Functions #
|
| 38 |
+
#################################################################################
|
| 39 |
+
|
| 40 |
+
@torch.no_grad()
|
| 41 |
+
def update_ema(ema_model, model, decay=0.9999):
|
| 42 |
+
"""
|
| 43 |
+
Step the EMA model towards the current model.
|
| 44 |
+
"""
|
| 45 |
+
ema_params = OrderedDict(ema_model.named_parameters())
|
| 46 |
+
model_params = OrderedDict(model.named_parameters())
|
| 47 |
+
|
| 48 |
+
for name, param in model_params.items():
|
| 49 |
+
# TODO: Consider applying only to params that require_grad to avoid small numerical changes of pos_embed
|
| 50 |
+
ema_params[name].mul_(decay).add_(param.data, alpha=1 - decay)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def requires_grad(model, flag=True):
|
| 54 |
+
"""
|
| 55 |
+
Set requires_grad flag for all parameters in a model.
|
| 56 |
+
"""
|
| 57 |
+
for p in model.parameters():
|
| 58 |
+
p.requires_grad = flag
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def cleanup():
|
| 62 |
+
"""
|
| 63 |
+
End DDP training.
|
| 64 |
+
"""
|
| 65 |
+
dist.destroy_process_group()
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def create_logger(logging_dir):
|
| 69 |
+
"""
|
| 70 |
+
Create a logger that writes to a log file and stdout.
|
| 71 |
+
"""
|
| 72 |
+
if dist.get_rank() == 0: # real logger
|
| 73 |
+
logging.basicConfig(
|
| 74 |
+
level=logging.INFO,
|
| 75 |
+
format='[\033[34m%(asctime)s\033[0m] %(message)s',
|
| 76 |
+
datefmt='%Y-%m-%d %H:%M:%S',
|
| 77 |
+
handlers=[logging.StreamHandler(), logging.FileHandler(f"{logging_dir}/log.txt")]
|
| 78 |
+
)
|
| 79 |
+
logger = logging.getLogger(__name__)
|
| 80 |
+
else: # dummy logger (does nothing)
|
| 81 |
+
logger = logging.getLogger(__name__)
|
| 82 |
+
logger.addHandler(logging.NullHandler())
|
| 83 |
+
return logger
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def center_crop_arr(pil_image, image_size):
|
| 87 |
+
"""
|
| 88 |
+
Center cropping implementation from ADM.
|
| 89 |
+
https://github.com/openai/guided-diffusion/blob/8fb3ad9197f16bbc40620447b2742e13458d2831/guided_diffusion/image_datasets.py#L126
|
| 90 |
+
"""
|
| 91 |
+
while min(*pil_image.size) >= 2 * image_size:
|
| 92 |
+
pil_image = pil_image.resize(
|
| 93 |
+
tuple(x // 2 for x in pil_image.size), resample=Image.BOX
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
scale = image_size / min(*pil_image.size)
|
| 97 |
+
pil_image = pil_image.resize(
|
| 98 |
+
tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
arr = np.array(pil_image)
|
| 102 |
+
crop_y = (arr.shape[0] - image_size) // 2
|
| 103 |
+
crop_x = (arr.shape[1] - image_size) // 2
|
| 104 |
+
return Image.fromarray(arr[crop_y: crop_y + image_size, crop_x: crop_x + image_size])
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
#################################################################################
|
| 108 |
+
# Training Loop #
|
| 109 |
+
#################################################################################
|
| 110 |
+
|
| 111 |
+
def main(args):
|
| 112 |
+
"""
|
| 113 |
+
Trains a new SiT model.
|
| 114 |
+
"""
|
| 115 |
+
assert torch.cuda.is_available(), "Training currently requires at least one GPU."
|
| 116 |
+
|
| 117 |
+
dist.init_process_group("nccl")
|
| 118 |
+
assert args.global_batch_size % dist.get_world_size() == 0, f"Batch size must be divisible by world size."
|
| 119 |
+
rank = dist.get_rank()
|
| 120 |
+
device = rank % torch.cuda.device_count()
|
| 121 |
+
seed = args.global_seed * dist.get_world_size() + rank
|
| 122 |
+
torch.manual_seed(seed)
|
| 123 |
+
torch.cuda.set_device(device)
|
| 124 |
+
print(f"Starting rank={rank}, seed={seed}, world_size={dist.get_world_size()}.")
|
| 125 |
+
local_batch_size = int(args.global_batch_size // dist.get_world_size())
|
| 126 |
+
learn_mu = args.learn_mu
|
| 127 |
+
depth = args.depth
|
| 128 |
+
# Setup an experiment folder:
|
| 129 |
+
if rank == 0:
|
| 130 |
+
os.makedirs(args.results_dir, exist_ok=True)
|
| 131 |
+
experiment_index = len(glob(f"{args.results_dir}/*"))
|
| 132 |
+
model_string_name = args.model.replace("/", "-")
|
| 133 |
+
if learn_mu:
|
| 134 |
+
experiment_name = f"depth-mu-{args.depth}-{experiment_index:03d}-{model_string_name}-" \
|
| 135 |
+
f"{args.path_type}-{args.prediction}-{args.loss_weight}"
|
| 136 |
+
else:
|
| 137 |
+
experiment_name = f"depth-sigma-{args.depth}-{experiment_index:03d}-{model_string_name}-" \
|
| 138 |
+
f"{args.path_type}-{args.prediction}-{args.loss_weight}"
|
| 139 |
+
experiment_dir = f"{args.results_dir}/{experiment_name}"
|
| 140 |
+
checkpoint_dir = f"{experiment_dir}/checkpoints"
|
| 141 |
+
os.makedirs(checkpoint_dir, exist_ok=True)
|
| 142 |
+
logger = create_logger(experiment_dir)
|
| 143 |
+
logger.info(f"Experiment directory created at {experiment_dir}")
|
| 144 |
+
|
| 145 |
+
else:
|
| 146 |
+
logger = create_logger(None)
|
| 147 |
+
|
| 148 |
+
# Create models:
|
| 149 |
+
assert args.image_size % 8 == 0, "Image size must be divisible by 8 (for the VAE encoder)."
|
| 150 |
+
latent_size = args.image_size // 8
|
| 151 |
+
|
| 152 |
+
# Get model configuration based on args.model
|
| 153 |
+
model_config = SiT_models[args.model]
|
| 154 |
+
model_kwargs = model_config().__dict__ # Get the default parameters for this model
|
| 155 |
+
|
| 156 |
+
# Extract parameters from the model configuration based on the model name
|
| 157 |
+
# Model names follow the format like 'SiT-XL/2', 'SiT-B/4', etc.
|
| 158 |
+
model_name = args.model
|
| 159 |
+
if 'XL' in model_name:
|
| 160 |
+
hidden_size, depth, num_heads = 1152, 28, 16
|
| 161 |
+
elif 'L' in model_name:
|
| 162 |
+
hidden_size, depth, num_heads = 1024, 24, 16
|
| 163 |
+
elif 'B' in model_name:
|
| 164 |
+
hidden_size, depth, num_heads = 768, 12, 12
|
| 165 |
+
elif 'S' in model_name:
|
| 166 |
+
hidden_size, depth, num_heads = 384, 12, 6
|
| 167 |
+
else:
|
| 168 |
+
# Default fallback
|
| 169 |
+
hidden_size, depth, num_heads = 768, 12, 12
|
| 170 |
+
|
| 171 |
+
# Extract patch size from model name like 'SiT-XL/2' -> patch_size = 2
|
| 172 |
+
patch_size = int(model_name.split('/')[-1])
|
| 173 |
+
|
| 174 |
+
sitf1 = SiTF1(
|
| 175 |
+
input_size=latent_size,
|
| 176 |
+
patch_size=patch_size,
|
| 177 |
+
in_channels=4,
|
| 178 |
+
hidden_size=hidden_size,
|
| 179 |
+
depth=depth,
|
| 180 |
+
num_heads=num_heads,
|
| 181 |
+
mlp_ratio=4.0,
|
| 182 |
+
class_dropout_prob=0.1,
|
| 183 |
+
num_classes=args.num_classes,
|
| 184 |
+
learn_sigma=False
|
| 185 |
+
).to(device)
|
| 186 |
+
sit = SiT(
|
| 187 |
+
input_size=latent_size,
|
| 188 |
+
patch_size=patch_size,
|
| 189 |
+
in_channels=4,
|
| 190 |
+
hidden_size=hidden_size,
|
| 191 |
+
depth=depth,
|
| 192 |
+
num_heads=num_heads,
|
| 193 |
+
mlp_ratio=4.0,
|
| 194 |
+
class_dropout_prob=0.1,
|
| 195 |
+
num_classes=args.num_classes,
|
| 196 |
+
learn_sigma=False
|
| 197 |
+
).to(device)
|
| 198 |
+
sitf2 = SiTF2(
|
| 199 |
+
input_size=latent_size,
|
| 200 |
+
hidden_size=hidden_size,
|
| 201 |
+
out_channels=8,
|
| 202 |
+
patch_size=patch_size,
|
| 203 |
+
num_heads=num_heads,
|
| 204 |
+
mlp_ratio=4.0,
|
| 205 |
+
depth=args.depth, # Use the depth for sitf2 as specified by command line
|
| 206 |
+
learn_sigma=True,
|
| 207 |
+
num_classes=args.num_classes,
|
| 208 |
+
learn_mu=learn_mu
|
| 209 |
+
).to(device)
|
| 210 |
+
sitf2_ema = deepcopy(sitf2).to(device)
|
| 211 |
+
combined_model = CombinedModel(sitf1, sitf2).to(device)
|
| 212 |
+
|
| 213 |
+
if args.ckpt is not None:
|
| 214 |
+
ckpt_path = args.ckpt
|
| 215 |
+
state_dict = find_model(ckpt_path)
|
| 216 |
+
try:
|
| 217 |
+
sitf1.load_state_dict(state_dict["model"], strict=False)
|
| 218 |
+
sit.load_state_dict(state_dict["model"], strict=False)
|
| 219 |
+
except:
|
| 220 |
+
sitf1.load_state_dict(state_dict, strict=False)
|
| 221 |
+
sit.load_state_dict(state_dict, strict=False)
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
requires_grad(sitf1, False)
|
| 225 |
+
requires_grad(sit, False)
|
| 226 |
+
requires_grad(sitf2, True)
|
| 227 |
+
|
| 228 |
+
opt = torch.optim.AdamW(sitf2.parameters(), lr=1e-4, weight_decay=0)
|
| 229 |
+
# Do NOT wrap sitf2 separately in DDP (avoids double-wrapping submodules); wrap only the combined model.
|
| 230 |
+
combined_model = DDP(combined_model, device_ids=[rank], find_unused_parameters=True)
|
| 231 |
+
|
| 232 |
+
# Create transport object: path_type determines the loss form used in training_losses()
|
| 233 |
+
# path_type options: "Linear", "GVP", "VP" - each corresponds to a different loss calculation method
|
| 234 |
+
transport = create_transport(
|
| 235 |
+
args.path_type, # This directly affects how loss is computed in training_losses()
|
| 236 |
+
args.prediction,
|
| 237 |
+
args.loss_weight,
|
| 238 |
+
args.train_eps,
|
| 239 |
+
args.sample_eps,
|
| 240 |
+
args.disp_loss_weight,
|
| 241 |
+
args.temperature
|
| 242 |
+
)
|
| 243 |
+
transport_sampler = Sampler(transport)
|
| 244 |
+
vae = AutoencoderKL.from_pretrained(f"stabilityai/sd-vae-ft-{args.vae}").to(device)
|
| 245 |
+
logger.info(f"Combined_model Parameters: {sum(p.numel() for p in combined_model.parameters()):,}")
|
| 246 |
+
|
| 247 |
+
grad_params = [(n, p.numel()) for n, p in combined_model.named_parameters() if p.requires_grad]
|
| 248 |
+
logger.info(f"Total trainable parameters: {sum(cnt for _, cnt in grad_params):,}")
|
| 249 |
+
|
| 250 |
+
# Setup data:
|
| 251 |
+
transform = transforms.Compose([
|
| 252 |
+
transforms.Lambda(lambda pil_image: center_crop_arr(pil_image, args.image_size)),
|
| 253 |
+
transforms.RandomHorizontalFlip(),
|
| 254 |
+
transforms.ToTensor(),
|
| 255 |
+
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True)
|
| 256 |
+
])
|
| 257 |
+
dataset = ImageFolder(args.data_path, transform=transform)
|
| 258 |
+
sampler = DistributedSampler(
|
| 259 |
+
dataset,
|
| 260 |
+
num_replicas=dist.get_world_size(),
|
| 261 |
+
rank=rank,
|
| 262 |
+
shuffle=True,
|
| 263 |
+
seed=args.global_seed
|
| 264 |
+
)
|
| 265 |
+
loader = DataLoader(
|
| 266 |
+
dataset,
|
| 267 |
+
batch_size=local_batch_size,
|
| 268 |
+
shuffle=False,
|
| 269 |
+
sampler=sampler,
|
| 270 |
+
num_workers=args.num_workers,
|
| 271 |
+
pin_memory=True,
|
| 272 |
+
drop_last=True
|
| 273 |
+
)
|
| 274 |
+
logger.info(f"Dataset contains {len(dataset):,} images ({args.data_path})")
|
| 275 |
+
# Ensure EMA updates target the correct base model (whether sitf2 is wrapped or not)
|
| 276 |
+
base_sitf2 = sitf2.module if isinstance(sitf2, torch.nn.parallel.DistributedDataParallel) else sitf2
|
| 277 |
+
update_ema(sitf2_ema, base_sitf2, decay=0)
|
| 278 |
+
sitf1.eval()
|
| 279 |
+
sit.eval()
|
| 280 |
+
sitf2.train()
|
| 281 |
+
sitf2_ema.eval()
|
| 282 |
+
|
| 283 |
+
train_steps = 0
|
| 284 |
+
log_steps = 0
|
| 285 |
+
running_loss = 0
|
| 286 |
+
start_time = time()
|
| 287 |
+
ys = torch.randint(1000, size=(local_batch_size,), device=device)
|
| 288 |
+
use_cfg = args.cfg_scale > 1.0
|
| 289 |
+
n = ys.size(0)
|
| 290 |
+
zs = torch.randn(n, 4, latent_size, latent_size, device=device)
|
| 291 |
+
if use_cfg:
|
| 292 |
+
zs = torch.cat([zs, zs], 0)
|
| 293 |
+
y_null = torch.tensor([1000] * n, device=device)
|
| 294 |
+
ys = torch.cat([ys, y_null], 0)
|
| 295 |
+
sample_model_kwargs = dict(y=ys, cfg_scale=args.cfg_scale)
|
| 296 |
+
model_fn = sitf1.forward_with_cfg
|
| 297 |
+
else:
|
| 298 |
+
sample_model_kwargs = dict(y=ys)
|
| 299 |
+
model_fn = sitf1.forward
|
| 300 |
+
def combined_sampling_model(x, t, y=None, **kwargs):
|
| 301 |
+
with torch.no_grad():
|
| 302 |
+
sit_out = sit.forward(x, t, y)
|
| 303 |
+
combined_out = combined_model.forward(x, t, y)
|
| 304 |
+
return sit_out + combined_out
|
| 305 |
+
logger.info(f"Training for {args.epochs} epochs...")
|
| 306 |
+
for epoch in range(args.epochs):
|
| 307 |
+
sampler.set_epoch(epoch)
|
| 308 |
+
logger.info(f"Beginning epoch {epoch}...")
|
| 309 |
+
for x, y in loader:
|
| 310 |
+
x = x.to(device)
|
| 311 |
+
y = y.to(device)
|
| 312 |
+
with torch.no_grad():
|
| 313 |
+
x_latent = vae.encode(x).latent_dist.sample().mul_(0.18215)
|
| 314 |
+
model_kwargs = dict(y=y, return_act=args.disp)
|
| 315 |
+
# Compute training loss: the loss form depends on args.path_type (Linear/GVP/VP)
|
| 316 |
+
# Each path_type uses a different mathematical formulation for the transport loss
|
| 317 |
+
loss_dict = transport.training_losses(sit, x_latent, model_noise=combined_model, model_kwargs=model_kwargs)
|
| 318 |
+
loss = loss_dict["loss"].mean()
|
| 319 |
+
opt.zero_grad()
|
| 320 |
+
loss.backward()
|
| 321 |
+
opt.step()
|
| 322 |
+
# Update EMA of the trainable sitf2 base model
|
| 323 |
+
update_ema(sitf2_ema, base_sitf2)
|
| 324 |
+
running_loss += loss.item()
|
| 325 |
+
log_steps += 1
|
| 326 |
+
train_steps += 1
|
| 327 |
+
if train_steps % args.log_every == 0:
|
| 328 |
+
torch.cuda.synchronize()
|
| 329 |
+
end_time = time()
|
| 330 |
+
steps_per_sec = log_steps / (end_time - start_time)
|
| 331 |
+
avg_loss = torch.tensor(running_loss / log_steps, device=device)
|
| 332 |
+
dist.all_reduce(avg_loss, op=dist.ReduceOp.SUM)
|
| 333 |
+
avg_loss = avg_loss.item() / dist.get_world_size()
|
| 334 |
+
logger.info(f"(step={train_steps:07d}) Train Loss: {avg_loss:.4f}, Train Steps/Sec: {steps_per_sec:.2f}")
|
| 335 |
+
running_loss = 0
|
| 336 |
+
log_steps = 0
|
| 337 |
+
start_time = time()
|
| 338 |
+
if train_steps % args.ckpt_every == 0 and train_steps > 0:
|
| 339 |
+
print(train_steps)
|
| 340 |
+
if rank == 0:
|
| 341 |
+
checkpoint = {
|
| 342 |
+
"model": sitf2.state_dict(),
|
| 343 |
+
"ema": sitf2.state_dict(),
|
| 344 |
+
"opt": opt.state_dict(),
|
| 345 |
+
"args": args
|
| 346 |
+
}
|
| 347 |
+
checkpoint_path = f"{checkpoint_dir}/{train_steps:07d}.pt"
|
| 348 |
+
torch.save(checkpoint, checkpoint_path)
|
| 349 |
+
logger.info(f"Saved checkpoint to {checkpoint_path}")
|
| 350 |
+
dist.barrier()
|
| 351 |
+
|
| 352 |
+
if (train_steps % args.sample_every == 0 )and train_steps > 0:
|
| 353 |
+
logger.info("Generating EMA samples...")
|
| 354 |
+
if epoch == args.epochs:
|
| 355 |
+
break
|
| 356 |
+
|
| 357 |
+
sitf1.eval()
|
| 358 |
+
sit.eval()
|
| 359 |
+
sitf2.eval()
|
| 360 |
+
logger.info("Final sampling done.")
|
| 361 |
+
|
| 362 |
+
logger.info("Done!")
|
| 363 |
+
cleanup()
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
def save_samples_grid(out_samples, epoch, experiment_index, args, experiment_name, rank):
|
| 367 |
+
if rank == 0:
|
| 368 |
+
import os
|
| 369 |
+
import numpy as np
|
| 370 |
+
from PIL import Image
|
| 371 |
+
parent_dir = os.path.dirname(args.results_dir)
|
| 372 |
+
pic_dir = os.path.join(parent_dir, "pic")
|
| 373 |
+
os.makedirs(pic_dir, exist_ok=True)
|
| 374 |
+
experiment_pic_dir = os.path.join(pic_dir, experiment_name)
|
| 375 |
+
os.makedirs(experiment_pic_dir, exist_ok=True)
|
| 376 |
+
samples_np = torch.clamp(127.5 * out_samples + 128.0, 0, 255).permute(0, 2, 3, 1).to("cpu", dtype=torch.uint8).numpy()
|
| 377 |
+
n_samples = samples_np.shape[0]
|
| 378 |
+
grid_size = int(np.ceil(np.sqrt(n_samples)))
|
| 379 |
+
canvas_size = grid_size * args.image_size
|
| 380 |
+
canvas = np.zeros((canvas_size, canvas_size, 3), dtype=np.uint8)
|
| 381 |
+
for i, sample in enumerate(samples_np):
|
| 382 |
+
row = i // grid_size
|
| 383 |
+
col = i % grid_size
|
| 384 |
+
canvas[row*args.image_size:(row+1)*args.image_size, col*args.image_size:(col+1)*args.image_size] = sample
|
| 385 |
+
combined_image = Image.fromarray(canvas)
|
| 386 |
+
combined_image.save(os.path.join(experiment_pic_dir, f"epoch_{epoch:04d}_combined.png"))
|
| 387 |
+
|
| 388 |
+
|
| 389 |
+
if __name__ == "__main__":
|
| 390 |
+
parser = argparse.ArgumentParser()
|
| 391 |
+
parser.add_argument("--data-path", type=str, required=True)
|
| 392 |
+
parser.add_argument("--results-dir", type=str, default="results_256_linear")
|
| 393 |
+
parser.add_argument("--model", type=str, choices=list(SiT_models.keys()), default="SiT-XL/2")
|
| 394 |
+
parser.add_argument("--image-size", type=int, choices=[256, 512], default=256)
|
| 395 |
+
parser.add_argument("--num-classes", type=int, default=3)
|
| 396 |
+
parser.add_argument("--epochs", type=int, default=100000)
|
| 397 |
+
parser.add_argument("--global-batch-size", type=int, default=256)
|
| 398 |
+
parser.add_argument("--global-seed", type=int, default=0)
|
| 399 |
+
parser.add_argument("--vae", type=str, choices=["ema", "mse"], default="ema") # Choice doesn't affect training
|
| 400 |
+
parser.add_argument("--num-workers", type=int, default=4)
|
| 401 |
+
parser.add_argument("--log-every", type=int, default=100)
|
| 402 |
+
parser.add_argument("--ckpt-every", type=int, default=25000)
|
| 403 |
+
parser.add_argument("--sample-every", type=int, default=25192)
|
| 404 |
+
parser.add_argument("--cfg-scale", type=float, default=4.0)
|
| 405 |
+
parser.add_argument("--ckpt", type=str, default='/gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise/2000000.pt',
|
| 406 |
+
help="Optional path to a custom SiT checkpoint")
|
| 407 |
+
parser.add_argument("--learn-mu", action=argparse.BooleanOptionalAction, default=True,
|
| 408 |
+
help="Whether to learn mu parameter")
|
| 409 |
+
parser.add_argument("--depth", type=int, default=1,
|
| 410 |
+
help="Depth parameter for SiTF2 model")
|
| 411 |
+
parser.add_argument("--disp", action="store_true",
|
| 412 |
+
help="Toggle to enable Dispersive Loss")
|
| 413 |
+
parser.add_argument("--disp-loss-weight", type=float, default=0.5,
|
| 414 |
+
help="Weight λ for dispersive loss (default: 0.5)")
|
| 415 |
+
parser.add_argument("--temperature", type=float, default=1.0,
|
| 416 |
+
help="Temperature τ for dispersive loss (default: 1.0)")
|
| 417 |
+
|
| 418 |
+
# Transport arguments (added by parse_transport_args):
|
| 419 |
+
# --path-type: Type of path for loss calculation (default: "GVP")
|
| 420 |
+
# Choices: "Linear" (linear interpolation), "GVP" (Geodesic Velocity Path), "VP" (Velocity Path)
|
| 421 |
+
# IMPORTANT: This parameter directly affects the loss form computed by transport.training_losses()
|
| 422 |
+
# The path_type determines how the transport loss is calculated during training.
|
| 423 |
+
# Make sure to use the correct path_type that matches your training objective.
|
| 424 |
+
# --prediction: Type of prediction (default: "velocity")
|
| 425 |
+
# --loss-weight: Loss weight type (default: None)
|
| 426 |
+
# --sample-eps, --train-eps: Epsilon values for sampling and training
|
| 427 |
+
parse_transport_args(parser)
|
| 428 |
+
args = parser.parse_args()
|
| 429 |
+
main(args)
|
Rectified_Noise/GVP-Disp/transport/__init__.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .transport import Transport, ModelType, WeightType, PathType, Sampler
|
| 2 |
+
|
| 3 |
+
def create_transport(
|
| 4 |
+
path_type='Linear',
|
| 5 |
+
prediction="velocity",
|
| 6 |
+
loss_weight=None,
|
| 7 |
+
train_eps=None,
|
| 8 |
+
sample_eps=None,
|
| 9 |
+
disp_loss_weight=0.5,
|
| 10 |
+
temperature=1.0,
|
| 11 |
+
):
|
| 12 |
+
"""function for creating Transport object
|
| 13 |
+
**Note**: model prediction defaults to velocity
|
| 14 |
+
Args:
|
| 15 |
+
- path_type: type of path to use; default to linear
|
| 16 |
+
- learn_score: set model prediction to score
|
| 17 |
+
- learn_noise: set model prediction to noise
|
| 18 |
+
- velocity_weighted: weight loss by velocity weight
|
| 19 |
+
- likelihood_weighted: weight loss by likelihood weight
|
| 20 |
+
- train_eps: small epsilon for avoiding instability during training
|
| 21 |
+
- sample_eps: small epsilon for avoiding instability during sampling
|
| 22 |
+
- disp_loss_weight: weight λ for dispersive loss (default: 0.5)
|
| 23 |
+
- temperature: temperature τ for dispersive loss (default: 1.0)
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
if prediction == "noise":
|
| 27 |
+
model_type = ModelType.NOISE
|
| 28 |
+
elif prediction == "score":
|
| 29 |
+
model_type = ModelType.SCORE
|
| 30 |
+
else:
|
| 31 |
+
model_type = ModelType.VELOCITY
|
| 32 |
+
|
| 33 |
+
if loss_weight == "velocity":
|
| 34 |
+
loss_type = WeightType.VELOCITY
|
| 35 |
+
elif loss_weight == "likelihood":
|
| 36 |
+
loss_type = WeightType.LIKELIHOOD
|
| 37 |
+
else:
|
| 38 |
+
loss_type = WeightType.NONE
|
| 39 |
+
|
| 40 |
+
path_choice = {
|
| 41 |
+
"Linear": PathType.LINEAR,
|
| 42 |
+
"GVP": PathType.GVP,
|
| 43 |
+
"VP": PathType.VP,
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
path_type = path_choice[path_type]
|
| 47 |
+
|
| 48 |
+
if (path_type in [PathType.VP]):
|
| 49 |
+
train_eps_new = 1e-5 if train_eps is None else train_eps
|
| 50 |
+
sample_eps_new = 1e-3 if sample_eps is None else sample_eps
|
| 51 |
+
train_eps, sample_eps = train_eps_new, sample_eps_new
|
| 52 |
+
elif (path_type in [PathType.GVP, PathType.LINEAR] and model_type != ModelType.VELOCITY):
|
| 53 |
+
train_eps_new = 1e-3 if train_eps is None else train_eps
|
| 54 |
+
sample_eps_new = 1e-3 if sample_eps is None else sample_eps
|
| 55 |
+
train_eps, sample_eps = train_eps_new, sample_eps_new
|
| 56 |
+
else: # velocity & [GVP, LINEAR] is stable everywhere
|
| 57 |
+
train_eps = 0
|
| 58 |
+
sample_eps = 0
|
| 59 |
+
|
| 60 |
+
# create flow state
|
| 61 |
+
state = Transport(
|
| 62 |
+
model_type=model_type,
|
| 63 |
+
path_type=path_type,
|
| 64 |
+
loss_type=loss_type,
|
| 65 |
+
train_eps=train_eps,
|
| 66 |
+
sample_eps=sample_eps,
|
| 67 |
+
disp_loss_weight=disp_loss_weight,
|
| 68 |
+
temperature=temperature,
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
return state
|
Rectified_Noise/GVP-Disp/transport/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (2.44 kB). View file
|
|
|
Rectified_Noise/GVP-Disp/transport/__pycache__/__init__.cpython-38.pyc
ADDED
|
Binary file (1.55 kB). View file
|
|
|
Rectified_Noise/GVP-Disp/transport/__pycache__/integrators.cpython-312.pyc
ADDED
|
Binary file (6.32 kB). View file
|
|
|
Rectified_Noise/GVP-Disp/transport/__pycache__/integrators.cpython-38.pyc
ADDED
|
Binary file (3.59 kB). View file
|
|
|
Rectified_Noise/GVP-Disp/transport/__pycache__/path.cpython-312.pyc
ADDED
|
Binary file (11.3 kB). View file
|
|
|
Rectified_Noise/GVP-Disp/transport/__pycache__/path.cpython-38.pyc
ADDED
|
Binary file (7.93 kB). View file
|
|
|
Rectified_Noise/GVP-Disp/transport/__pycache__/transport.cpython-312.pyc
ADDED
|
Binary file (22.8 kB). View file
|
|
|
Rectified_Noise/GVP-Disp/transport/__pycache__/transport.cpython-38.pyc
ADDED
|
Binary file (13.2 kB). View file
|
|
|
Rectified_Noise/GVP-Disp/transport/__pycache__/utils.cpython-312.pyc
ADDED
|
Binary file (1.9 kB). View file
|
|
|
Rectified_Noise/GVP-Disp/transport/__pycache__/utils.cpython-38.pyc
ADDED
|
Binary file (1.26 kB). View file
|
|
|
Rectified_Noise/GVP-Disp/transport/integrators.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import torch as th
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
from torchdiffeq import odeint
|
| 5 |
+
from functools import partial
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
|
| 8 |
+
class sde:
|
| 9 |
+
"""SDE solver class"""
|
| 10 |
+
def __init__(
|
| 11 |
+
self,
|
| 12 |
+
drift,
|
| 13 |
+
diffusion,
|
| 14 |
+
*,
|
| 15 |
+
t0,
|
| 16 |
+
t1,
|
| 17 |
+
num_steps,
|
| 18 |
+
sampler_type,
|
| 19 |
+
):
|
| 20 |
+
assert t0 < t1, "SDE sampler has to be in forward time"
|
| 21 |
+
|
| 22 |
+
self.num_timesteps = num_steps
|
| 23 |
+
self.t = th.linspace(t0, t1, num_steps)
|
| 24 |
+
self.dt = self.t[1] - self.t[0]
|
| 25 |
+
self.drift = drift
|
| 26 |
+
self.diffusion = diffusion
|
| 27 |
+
self.sampler_type = sampler_type
|
| 28 |
+
|
| 29 |
+
def __Euler_Maruyama_step(self, x, mean_x, t, model, **model_kwargs):
|
| 30 |
+
w_cur = th.randn(x.size()).to(x)
|
| 31 |
+
t = th.ones(x.size(0)).to(x) * t
|
| 32 |
+
dw = w_cur * th.sqrt(self.dt)
|
| 33 |
+
drift = self.drift(x, t, model, **model_kwargs)
|
| 34 |
+
diffusion = self.diffusion(x, t)
|
| 35 |
+
mean_x = x + drift * self.dt
|
| 36 |
+
x = mean_x + th.sqrt(2 * diffusion) * dw
|
| 37 |
+
return x, mean_x
|
| 38 |
+
|
| 39 |
+
def __Heun_step(self, x, _, t, model, **model_kwargs):
|
| 40 |
+
w_cur = th.randn(x.size()).to(x)
|
| 41 |
+
dw = w_cur * th.sqrt(self.dt)
|
| 42 |
+
t_cur = th.ones(x.size(0)).to(x) * t
|
| 43 |
+
diffusion = self.diffusion(x, t_cur)
|
| 44 |
+
xhat = x + th.sqrt(2 * diffusion) * dw
|
| 45 |
+
K1 = self.drift(xhat, t_cur, model, **model_kwargs)
|
| 46 |
+
xp = xhat + self.dt * K1
|
| 47 |
+
K2 = self.drift(xp, t_cur + self.dt, model, **model_kwargs)
|
| 48 |
+
return xhat + 0.5 * self.dt * (K1 + K2), xhat # at last time point we do not perform the heun step
|
| 49 |
+
|
| 50 |
+
def __forward_fn(self):
|
| 51 |
+
"""TODO: generalize here by adding all private functions ending with steps to it"""
|
| 52 |
+
sampler_dict = {
|
| 53 |
+
"Euler": self.__Euler_Maruyama_step,
|
| 54 |
+
"Heun": self.__Heun_step,
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
try:
|
| 58 |
+
sampler = sampler_dict[self.sampler_type]
|
| 59 |
+
except:
|
| 60 |
+
raise NotImplementedError("Smapler type not implemented.")
|
| 61 |
+
|
| 62 |
+
return sampler
|
| 63 |
+
|
| 64 |
+
def sample(self, init, model, **model_kwargs):
|
| 65 |
+
"""forward loop of sde"""
|
| 66 |
+
x = init
|
| 67 |
+
mean_x = init
|
| 68 |
+
samples = []
|
| 69 |
+
sampler = self.__forward_fn()
|
| 70 |
+
for ti in self.t[:-1]:
|
| 71 |
+
with th.no_grad():
|
| 72 |
+
x, mean_x = sampler(x, mean_x, ti, model, **model_kwargs)
|
| 73 |
+
samples.append(x)
|
| 74 |
+
|
| 75 |
+
return samples
|
| 76 |
+
|
| 77 |
+
class ode:
|
| 78 |
+
"""ODE solver class"""
|
| 79 |
+
def __init__(
|
| 80 |
+
self,
|
| 81 |
+
drift,
|
| 82 |
+
*,
|
| 83 |
+
t0,
|
| 84 |
+
t1,
|
| 85 |
+
sampler_type,
|
| 86 |
+
num_steps,
|
| 87 |
+
atol,
|
| 88 |
+
rtol,
|
| 89 |
+
):
|
| 90 |
+
assert t0 < t1, "ODE sampler has to be in forward time"
|
| 91 |
+
|
| 92 |
+
self.drift = drift
|
| 93 |
+
self.t = th.linspace(t0, t1, num_steps)
|
| 94 |
+
self.atol = atol
|
| 95 |
+
self.rtol = rtol
|
| 96 |
+
self.sampler_type = sampler_type
|
| 97 |
+
|
| 98 |
+
def sample(self, x, model, **model_kwargs):
|
| 99 |
+
|
| 100 |
+
device = x[0].device if isinstance(x, tuple) else x.device
|
| 101 |
+
def _fn(t, x):
|
| 102 |
+
t = th.ones(x[0].size(0)).to(device) * t if isinstance(x, tuple) else th.ones(x.size(0)).to(device) * t
|
| 103 |
+
model_output = self.drift(x, t, model, **model_kwargs)
|
| 104 |
+
return model_output
|
| 105 |
+
|
| 106 |
+
t = self.t.to(device)
|
| 107 |
+
atol = [self.atol] * len(x) if isinstance(x, tuple) else [self.atol]
|
| 108 |
+
rtol = [self.rtol] * len(x) if isinstance(x, tuple) else [self.rtol]
|
| 109 |
+
samples = odeint(
|
| 110 |
+
_fn,
|
| 111 |
+
x,
|
| 112 |
+
t,
|
| 113 |
+
method=self.sampler_type,
|
| 114 |
+
atol=atol,
|
| 115 |
+
rtol=rtol
|
| 116 |
+
)
|
| 117 |
+
return samples
|
Rectified_Noise/GVP-Disp/transport/path.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch as th
|
| 2 |
+
import numpy as np
|
| 3 |
+
from functools import partial
|
| 4 |
+
|
| 5 |
+
def expand_t_like_x(t, x):
|
| 6 |
+
"""Function to reshape time t to broadcastable dimension of x
|
| 7 |
+
Args:
|
| 8 |
+
t: [batch_dim,], time vector
|
| 9 |
+
x: [batch_dim,...], data point
|
| 10 |
+
"""
|
| 11 |
+
dims = [1] * (len(x.size()) - 1)
|
| 12 |
+
t = t.view(t.size(0), *dims)
|
| 13 |
+
return t
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
#################### Coupling Plans ####################
|
| 17 |
+
|
| 18 |
+
class ICPlan:
|
| 19 |
+
"""Linear Coupling Plan"""
|
| 20 |
+
def __init__(self, sigma=0.0):
|
| 21 |
+
self.sigma = sigma
|
| 22 |
+
|
| 23 |
+
def compute_alpha_t(self, t):
|
| 24 |
+
"""Compute the data coefficient along the path"""
|
| 25 |
+
return t, 1
|
| 26 |
+
|
| 27 |
+
def compute_sigma_t(self, t):
|
| 28 |
+
"""Compute the noise coefficient along the path"""
|
| 29 |
+
return 1 - t, -1
|
| 30 |
+
|
| 31 |
+
def compute_d_alpha_alpha_ratio_t(self, t):
|
| 32 |
+
"""Compute the ratio between d_alpha and alpha"""
|
| 33 |
+
return 1 / t
|
| 34 |
+
|
| 35 |
+
def compute_drift(self, x, t):
|
| 36 |
+
"""We always output sde according to score parametrization; """
|
| 37 |
+
t = expand_t_like_x(t, x)
|
| 38 |
+
alpha_ratio = self.compute_d_alpha_alpha_ratio_t(t)
|
| 39 |
+
sigma_t, d_sigma_t = self.compute_sigma_t(t)
|
| 40 |
+
drift = alpha_ratio * x
|
| 41 |
+
diffusion = alpha_ratio * (sigma_t ** 2) - sigma_t * d_sigma_t
|
| 42 |
+
|
| 43 |
+
return -drift, diffusion
|
| 44 |
+
|
| 45 |
+
def compute_diffusion(self, x, t, form="constant", norm=1.0):
|
| 46 |
+
"""Compute the diffusion term of the SDE
|
| 47 |
+
Args:
|
| 48 |
+
x: [batch_dim, ...], data point
|
| 49 |
+
t: [batch_dim,], time vector
|
| 50 |
+
form: str, form of the diffusion term
|
| 51 |
+
norm: float, norm of the diffusion term
|
| 52 |
+
"""
|
| 53 |
+
t = expand_t_like_x(t, x)
|
| 54 |
+
choices = {
|
| 55 |
+
"constant": norm,
|
| 56 |
+
"SBDM": norm * self.compute_drift(x, t)[1],
|
| 57 |
+
"sigma": norm * self.compute_sigma_t(t)[0],
|
| 58 |
+
"linear": norm * (1 - t),
|
| 59 |
+
"decreasing": 0.25 * (norm * th.cos(np.pi * t) + 1) ** 2,
|
| 60 |
+
"inccreasing-decreasing": norm * th.sin(np.pi * t) ** 2,
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
try:
|
| 64 |
+
diffusion = choices[form]
|
| 65 |
+
except KeyError:
|
| 66 |
+
raise NotImplementedError(f"Diffusion form {form} not implemented")
|
| 67 |
+
|
| 68 |
+
return diffusion
|
| 69 |
+
|
| 70 |
+
def get_score_from_velocity(self, velocity, x, t):
|
| 71 |
+
"""Wrapper function: transfrom velocity prediction model to score
|
| 72 |
+
Args:
|
| 73 |
+
velocity: [batch_dim, ...] shaped tensor; velocity model output
|
| 74 |
+
x: [batch_dim, ...] shaped tensor; x_t data point
|
| 75 |
+
t: [batch_dim,] time tensor
|
| 76 |
+
"""
|
| 77 |
+
t = expand_t_like_x(t, x)
|
| 78 |
+
alpha_t, d_alpha_t = self.compute_alpha_t(t)
|
| 79 |
+
sigma_t, d_sigma_t = self.compute_sigma_t(t)
|
| 80 |
+
mean = x
|
| 81 |
+
reverse_alpha_ratio = alpha_t / d_alpha_t
|
| 82 |
+
var = sigma_t**2 - reverse_alpha_ratio * d_sigma_t * sigma_t
|
| 83 |
+
score = (reverse_alpha_ratio * velocity - mean) / var
|
| 84 |
+
return score
|
| 85 |
+
|
| 86 |
+
def get_noise_from_velocity(self, velocity, x, t):
|
| 87 |
+
"""Wrapper function: transfrom velocity prediction model to denoiser
|
| 88 |
+
Args:
|
| 89 |
+
velocity: [batch_dim, ...] shaped tensor; velocity model output
|
| 90 |
+
x: [batch_dim, ...] shaped tensor; x_t data point
|
| 91 |
+
t: [batch_dim,] time tensor
|
| 92 |
+
"""
|
| 93 |
+
t = expand_t_like_x(t, x)
|
| 94 |
+
alpha_t, d_alpha_t = self.compute_alpha_t(t)
|
| 95 |
+
sigma_t, d_sigma_t = self.compute_sigma_t(t)
|
| 96 |
+
mean = x
|
| 97 |
+
reverse_alpha_ratio = alpha_t / d_alpha_t
|
| 98 |
+
var = reverse_alpha_ratio * d_sigma_t - sigma_t
|
| 99 |
+
noise = (reverse_alpha_ratio * velocity - mean) / var
|
| 100 |
+
return noise
|
| 101 |
+
|
| 102 |
+
def get_velocity_from_score(self, score, x, t):
|
| 103 |
+
"""Wrapper function: transfrom score prediction model to velocity
|
| 104 |
+
Args:
|
| 105 |
+
score: [batch_dim, ...] shaped tensor; score model output
|
| 106 |
+
x: [batch_dim, ...] shaped tensor; x_t data point
|
| 107 |
+
t: [batch_dim,] time tensor
|
| 108 |
+
"""
|
| 109 |
+
t = expand_t_like_x(t, x)
|
| 110 |
+
drift, var = self.compute_drift(x, t)
|
| 111 |
+
velocity = var * score - drift
|
| 112 |
+
return velocity
|
| 113 |
+
|
| 114 |
+
def compute_mu_t(self, t, x0, x1):
|
| 115 |
+
"""Compute the mean of time-dependent density p_t"""
|
| 116 |
+
t = expand_t_like_x(t, x1)
|
| 117 |
+
alpha_t, _ = self.compute_alpha_t(t)
|
| 118 |
+
sigma_t, _ = self.compute_sigma_t(t)
|
| 119 |
+
return alpha_t * x1 + sigma_t * x0
|
| 120 |
+
|
| 121 |
+
def compute_xt(self, t, x0, x1):
|
| 122 |
+
"""Sample xt from time-dependent density p_t; rng is required"""
|
| 123 |
+
xt = self.compute_mu_t(t, x0, x1)
|
| 124 |
+
return xt
|
| 125 |
+
|
| 126 |
+
def compute_ut(self, t, x0, x1, xt):
|
| 127 |
+
"""Compute the vector field corresponding to p_t"""
|
| 128 |
+
t = expand_t_like_x(t, x1)
|
| 129 |
+
_, d_alpha_t = self.compute_alpha_t(t)
|
| 130 |
+
_, d_sigma_t = self.compute_sigma_t(t)
|
| 131 |
+
return d_alpha_t * x1 + d_sigma_t * x0
|
| 132 |
+
|
| 133 |
+
def plan(self, t, x0, x1):
|
| 134 |
+
xt = self.compute_xt(t, x0, x1)
|
| 135 |
+
ut = self.compute_ut(t, x0, x1, xt)
|
| 136 |
+
return t, xt, ut
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
class VPCPlan(ICPlan):
|
| 140 |
+
"""class for VP path flow matching"""
|
| 141 |
+
|
| 142 |
+
def __init__(self, sigma_min=0.1, sigma_max=20.0):
|
| 143 |
+
self.sigma_min = sigma_min
|
| 144 |
+
self.sigma_max = sigma_max
|
| 145 |
+
self.log_mean_coeff = lambda t: -0.25 * ((1 - t) ** 2) * (self.sigma_max - self.sigma_min) - 0.5 * (1 - t) * self.sigma_min
|
| 146 |
+
self.d_log_mean_coeff = lambda t: 0.5 * (1 - t) * (self.sigma_max - self.sigma_min) + 0.5 * self.sigma_min
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def compute_alpha_t(self, t):
|
| 150 |
+
"""Compute coefficient of x1"""
|
| 151 |
+
alpha_t = self.log_mean_coeff(t)
|
| 152 |
+
alpha_t = th.exp(alpha_t)
|
| 153 |
+
d_alpha_t = alpha_t * self.d_log_mean_coeff(t)
|
| 154 |
+
return alpha_t, d_alpha_t
|
| 155 |
+
|
| 156 |
+
def compute_sigma_t(self, t):
|
| 157 |
+
"""Compute coefficient of x0"""
|
| 158 |
+
p_sigma_t = 2 * self.log_mean_coeff(t)
|
| 159 |
+
sigma_t = th.sqrt(1 - th.exp(p_sigma_t))
|
| 160 |
+
d_sigma_t = th.exp(p_sigma_t) * (2 * self.d_log_mean_coeff(t)) / (-2 * sigma_t)
|
| 161 |
+
return sigma_t, d_sigma_t
|
| 162 |
+
|
| 163 |
+
def compute_d_alpha_alpha_ratio_t(self, t):
|
| 164 |
+
"""Special purposed function for computing numerical stabled d_alpha_t / alpha_t"""
|
| 165 |
+
return self.d_log_mean_coeff(t)
|
| 166 |
+
|
| 167 |
+
def compute_drift(self, x, t):
|
| 168 |
+
"""Compute the drift term of the SDE"""
|
| 169 |
+
t = expand_t_like_x(t, x)
|
| 170 |
+
beta_t = self.sigma_min + (1 - t) * (self.sigma_max - self.sigma_min)
|
| 171 |
+
return -0.5 * beta_t * x, beta_t / 2
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
class GVPCPlan(ICPlan):
|
| 175 |
+
def __init__(self, sigma=0.0):
|
| 176 |
+
super().__init__(sigma)
|
| 177 |
+
|
| 178 |
+
def compute_alpha_t(self, t):
|
| 179 |
+
"""Compute coefficient of x1"""
|
| 180 |
+
alpha_t = th.sin(t * np.pi / 2)
|
| 181 |
+
d_alpha_t = np.pi / 2 * th.cos(t * np.pi / 2)
|
| 182 |
+
return alpha_t, d_alpha_t
|
| 183 |
+
|
| 184 |
+
def compute_sigma_t(self, t):
|
| 185 |
+
"""Compute coefficient of x0"""
|
| 186 |
+
sigma_t = th.cos(t * np.pi / 2)
|
| 187 |
+
d_sigma_t = -np.pi / 2 * th.sin(t * np.pi / 2)
|
| 188 |
+
return sigma_t, d_sigma_t
|
| 189 |
+
|
| 190 |
+
def compute_d_alpha_alpha_ratio_t(self, t):
|
| 191 |
+
"""Special purposed function for computing numerical stabled d_alpha_t / alpha_t"""
|
| 192 |
+
return np.pi / (2 * th.tan(t * np.pi / 2))
|
Rectified_Noise/GVP-Disp/transport/transport.py
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch as th
|
| 2 |
+
import numpy as np
|
| 3 |
+
import logging
|
| 4 |
+
|
| 5 |
+
import enum
|
| 6 |
+
|
| 7 |
+
from . import path
|
| 8 |
+
from .utils import EasyDict, log_state, mean_flat
|
| 9 |
+
from .integrators import ode, sde
|
| 10 |
+
|
| 11 |
+
class ModelType(enum.Enum):
|
| 12 |
+
"""
|
| 13 |
+
Which type of output the model predicts.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
NOISE = enum.auto() # the model predicts epsilon
|
| 17 |
+
SCORE = enum.auto() # the model predicts \nabla \log p(x)
|
| 18 |
+
VELOCITY = enum.auto() # the model predicts v(x)
|
| 19 |
+
|
| 20 |
+
class PathType(enum.Enum):
|
| 21 |
+
"""
|
| 22 |
+
Which type of path to use.
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
LINEAR = enum.auto()
|
| 26 |
+
GVP = enum.auto()
|
| 27 |
+
VP = enum.auto()
|
| 28 |
+
|
| 29 |
+
class WeightType(enum.Enum):
|
| 30 |
+
"""
|
| 31 |
+
Which type of weighting to use.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
NONE = enum.auto()
|
| 35 |
+
VELOCITY = enum.auto()
|
| 36 |
+
LIKELIHOOD = enum.auto()
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class Transport:
|
| 40 |
+
|
| 41 |
+
def __init__(
|
| 42 |
+
self,
|
| 43 |
+
*,
|
| 44 |
+
model_type,
|
| 45 |
+
path_type,
|
| 46 |
+
loss_type,
|
| 47 |
+
train_eps,
|
| 48 |
+
sample_eps,
|
| 49 |
+
disp_loss_weight=0.5,
|
| 50 |
+
temperature=1.0,
|
| 51 |
+
):
|
| 52 |
+
path_options = {
|
| 53 |
+
PathType.LINEAR: path.ICPlan,
|
| 54 |
+
PathType.GVP: path.GVPCPlan,
|
| 55 |
+
PathType.VP: path.VPCPlan,
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
self.loss_type = loss_type
|
| 59 |
+
self.model_type = model_type
|
| 60 |
+
self.path_sampler = path_options[path_type]()
|
| 61 |
+
self.train_eps = train_eps
|
| 62 |
+
self.sample_eps = sample_eps
|
| 63 |
+
self.disp_loss_weight = disp_loss_weight # λ: weight for dispersive loss
|
| 64 |
+
self.temperature = temperature # τ: temperature parameter
|
| 65 |
+
|
| 66 |
+
def prior_logp(self, z):
|
| 67 |
+
'''
|
| 68 |
+
Standard multivariate normal prior
|
| 69 |
+
Assume z is batched
|
| 70 |
+
'''
|
| 71 |
+
shape = th.tensor(z.size())
|
| 72 |
+
N = th.prod(shape[1:])
|
| 73 |
+
_fn = lambda x: -N / 2. * np.log(2 * np.pi) - th.sum(x ** 2) / 2.
|
| 74 |
+
return th.vmap(_fn)(z)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def check_interval(
|
| 78 |
+
self,
|
| 79 |
+
train_eps,
|
| 80 |
+
sample_eps,
|
| 81 |
+
*,
|
| 82 |
+
diffusion_form="SBDM",
|
| 83 |
+
sde=False,
|
| 84 |
+
reverse=False,
|
| 85 |
+
eval=False,
|
| 86 |
+
last_step_size=0.0,
|
| 87 |
+
):
|
| 88 |
+
t0 = 0
|
| 89 |
+
t1 = 1
|
| 90 |
+
eps = train_eps if not eval else sample_eps
|
| 91 |
+
if (type(self.path_sampler) in [path.VPCPlan]):
|
| 92 |
+
|
| 93 |
+
t1 = 1 - eps if (not sde or last_step_size == 0) else 1 - last_step_size
|
| 94 |
+
|
| 95 |
+
elif (type(self.path_sampler) in [path.ICPlan, path.GVPCPlan]) \
|
| 96 |
+
and (self.model_type != ModelType.VELOCITY or sde): # avoid numerical issue by taking a first semi-implicit step
|
| 97 |
+
|
| 98 |
+
t0 = eps if (diffusion_form == "SBDM" and sde) or self.model_type != ModelType.VELOCITY else 0
|
| 99 |
+
t1 = 1 - eps if (not sde or last_step_size == 0) else 1 - last_step_size
|
| 100 |
+
|
| 101 |
+
if reverse:
|
| 102 |
+
t0, t1 = 1 - t0, 1 - t1
|
| 103 |
+
|
| 104 |
+
return t0, t1
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def sample(self, x1):
|
| 108 |
+
"""Sampling x0 & t based on shape of x1 (if needed)
|
| 109 |
+
Args:
|
| 110 |
+
x1 - data point; [batch, *dim]
|
| 111 |
+
"""
|
| 112 |
+
|
| 113 |
+
x0 = th.randn_like(x1)
|
| 114 |
+
t0, t1 = self.check_interval(self.train_eps, self.sample_eps)
|
| 115 |
+
t = th.rand((x1.shape[0],)) * (t1 - t0) + t0
|
| 116 |
+
t = t.to(x1)
|
| 117 |
+
return t, x0, x1
|
| 118 |
+
|
| 119 |
+
def disp_loss(self, z):
|
| 120 |
+
"""Dispersive Loss implementation (InfoNCE-L2 variant)
|
| 121 |
+
Args:
|
| 122 |
+
z: activation tensor from model layers
|
| 123 |
+
"""
|
| 124 |
+
z = z.reshape((z.shape[0], -1)) # flatten
|
| 125 |
+
diff = th.nn.functional.pdist(z).pow(2) / z.shape[1] # pairwise distance
|
| 126 |
+
diff = th.cat((diff, diff, th.zeros(z.shape[0], device=z.device))) # match JAX implementation of full BxB matrix
|
| 127 |
+
# Apply temperature scaling: divide by temperature τ
|
| 128 |
+
diff = diff / self.temperature
|
| 129 |
+
return th.log(th.exp(-diff).mean()) # calculate loss
|
| 130 |
+
|
| 131 |
+
def training_losses(
|
| 132 |
+
self,
|
| 133 |
+
model,
|
| 134 |
+
x1,
|
| 135 |
+
model_noise=None,
|
| 136 |
+
model_kwargs=None
|
| 137 |
+
):
|
| 138 |
+
"""Loss for training the score model
|
| 139 |
+
Args:
|
| 140 |
+
- model: backbone model; could be score, noise, or velocity
|
| 141 |
+
- x1: datapoint
|
| 142 |
+
- model_kwargs: additional arguments for the model
|
| 143 |
+
"""
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
if model_kwargs == None:
|
| 147 |
+
model_kwargs = {}
|
| 148 |
+
|
| 149 |
+
t, x0, x1 = self.sample(x1)
|
| 150 |
+
t, xt, ut = self.path_sampler.plan(t, x0, x1)
|
| 151 |
+
|
| 152 |
+
# Handle return_act for dispersive loss
|
| 153 |
+
disp_loss = 0
|
| 154 |
+
if model_noise==None:
|
| 155 |
+
model_output = model(xt, t, **model_kwargs)
|
| 156 |
+
# Check if model returns activations (for dispersive loss)
|
| 157 |
+
if "return_act" in model_kwargs and model_kwargs['return_act']:
|
| 158 |
+
model_output, act = model_output
|
| 159 |
+
if act is not None and len(act) > 0:
|
| 160 |
+
# Calculate dispersive loss for all blocks
|
| 161 |
+
for block_act in act:
|
| 162 |
+
disp_loss = disp_loss + self.disp_loss(block_act)
|
| 163 |
+
else:
|
| 164 |
+
model_output_pre = model(xt, t, **model_kwargs)
|
| 165 |
+
# Handle return_act for model_noise
|
| 166 |
+
if "return_act" in model_kwargs and model_kwargs['return_act']:
|
| 167 |
+
if isinstance(model_output_pre, tuple):
|
| 168 |
+
model_output_pre, act_pre = model_output_pre
|
| 169 |
+
else:
|
| 170 |
+
act_pre = None
|
| 171 |
+
else:
|
| 172 |
+
act_pre = None
|
| 173 |
+
|
| 174 |
+
model_output_noise = model_noise(xt, t, **model_kwargs)
|
| 175 |
+
# Handle return_act for model_noise
|
| 176 |
+
if "return_act" in model_kwargs and model_kwargs['return_act']:
|
| 177 |
+
if isinstance(model_output_noise, tuple):
|
| 178 |
+
model_output_noise, act_noise = model_output_noise
|
| 179 |
+
else:
|
| 180 |
+
act_noise = None
|
| 181 |
+
# Calculate dispersive loss for all blocks in model_noise (sitf2)
|
| 182 |
+
if act_noise is not None and len(act_noise) > 0:
|
| 183 |
+
# Calculate dispersive loss for each block and sum them
|
| 184 |
+
for block_act in act_noise:
|
| 185 |
+
disp_loss = disp_loss + self.disp_loss(block_act)
|
| 186 |
+
model_output = model_output_pre + model_output_noise
|
| 187 |
+
|
| 188 |
+
B, *_, C = xt.shape
|
| 189 |
+
assert model_output.size() == (B, *xt.size()[1:-1], C)
|
| 190 |
+
|
| 191 |
+
terms = {}
|
| 192 |
+
terms['pred'] = model_output
|
| 193 |
+
if self.model_type == ModelType.VELOCITY:
|
| 194 |
+
terms['loss'] = mean_flat(((model_output - ut) ** 2))
|
| 195 |
+
else:
|
| 196 |
+
_, drift_var = self.path_sampler.compute_drift(xt, t)
|
| 197 |
+
sigma_t, _ = self.path_sampler.compute_sigma_t(path.expand_t_like_x(t, xt))
|
| 198 |
+
if self.loss_type in [WeightType.VELOCITY]:
|
| 199 |
+
weight = (drift_var / sigma_t) ** 2
|
| 200 |
+
elif self.loss_type in [WeightType.LIKELIHOOD]:
|
| 201 |
+
weight = drift_var / (sigma_t ** 2)
|
| 202 |
+
elif self.loss_type in [WeightType.NONE]:
|
| 203 |
+
weight = 1
|
| 204 |
+
else:
|
| 205 |
+
raise NotImplementedError()
|
| 206 |
+
|
| 207 |
+
if self.model_type == ModelType.NOISE:
|
| 208 |
+
terms['loss'] = mean_flat(weight * ((model_output - x0) ** 2))
|
| 209 |
+
else:
|
| 210 |
+
terms['loss'] = mean_flat(weight * ((model_output * sigma_t + x0) ** 2))
|
| 211 |
+
|
| 212 |
+
# Add dispersive loss to the total loss with weight λ
|
| 213 |
+
if disp_loss != 0:
|
| 214 |
+
terms['loss'] = terms['loss'] + self.disp_loss_weight * disp_loss
|
| 215 |
+
|
| 216 |
+
return terms
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def get_drift(
|
| 220 |
+
self
|
| 221 |
+
):
|
| 222 |
+
"""member function for obtaining the drift of the probability flow ODE"""
|
| 223 |
+
def score_ode(x, t, model, **model_kwargs):
|
| 224 |
+
drift_mean, drift_var = self.path_sampler.compute_drift(x, t)
|
| 225 |
+
model_output = model(x, t, **model_kwargs)
|
| 226 |
+
return (-drift_mean + drift_var * model_output) # by change of variable
|
| 227 |
+
|
| 228 |
+
def noise_ode(x, t, model, **model_kwargs):
|
| 229 |
+
drift_mean, drift_var = self.path_sampler.compute_drift(x, t)
|
| 230 |
+
sigma_t, _ = self.path_sampler.compute_sigma_t(path.expand_t_like_x(t, x))
|
| 231 |
+
model_output = model(x, t, **model_kwargs)
|
| 232 |
+
score = model_output / -sigma_t
|
| 233 |
+
return (-drift_mean + drift_var * score)
|
| 234 |
+
|
| 235 |
+
def velocity_ode(x, t, model, **model_kwargs):
|
| 236 |
+
model_output = model(x, t, **model_kwargs)
|
| 237 |
+
return model_output
|
| 238 |
+
|
| 239 |
+
if self.model_type == ModelType.NOISE:
|
| 240 |
+
drift_fn = noise_ode
|
| 241 |
+
elif self.model_type == ModelType.SCORE:
|
| 242 |
+
drift_fn = score_ode
|
| 243 |
+
else:
|
| 244 |
+
drift_fn = velocity_ode
|
| 245 |
+
|
| 246 |
+
def body_fn(x, t, model, **model_kwargs):
|
| 247 |
+
model_output = drift_fn(x, t, model, **model_kwargs)
|
| 248 |
+
assert model_output.shape == x.shape, "Output shape from ODE solver must match input shape"
|
| 249 |
+
return model_output
|
| 250 |
+
|
| 251 |
+
return body_fn
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
def get_score(
|
| 255 |
+
self,
|
| 256 |
+
):
|
| 257 |
+
"""member function for obtaining score of
|
| 258 |
+
x_t = alpha_t * x + sigma_t * eps"""
|
| 259 |
+
if self.model_type == ModelType.NOISE:
|
| 260 |
+
score_fn = lambda x, t, model, **kwargs: model(x, t, **kwargs) / -self.path_sampler.compute_sigma_t(path.expand_t_like_x(t, x))[0]
|
| 261 |
+
elif self.model_type == ModelType.SCORE:
|
| 262 |
+
score_fn = lambda x, t, model, **kwagrs: model(x, t, **kwagrs)
|
| 263 |
+
elif self.model_type == ModelType.VELOCITY:
|
| 264 |
+
score_fn = lambda x, t, model, **kwargs: self.path_sampler.get_score_from_velocity(model(x, t, **kwargs), x, t)
|
| 265 |
+
else:
|
| 266 |
+
raise NotImplementedError()
|
| 267 |
+
|
| 268 |
+
return score_fn
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
class Sampler:
|
| 272 |
+
"""Sampler class for the transport model"""
|
| 273 |
+
def __init__(
|
| 274 |
+
self,
|
| 275 |
+
transport,
|
| 276 |
+
):
|
| 277 |
+
"""Constructor for a general sampler; supporting different sampling methods
|
| 278 |
+
Args:
|
| 279 |
+
- transport: an tranport object specify model prediction & interpolant type
|
| 280 |
+
"""
|
| 281 |
+
|
| 282 |
+
self.transport = transport
|
| 283 |
+
self.drift = self.transport.get_drift()
|
| 284 |
+
self.score = self.transport.get_score()
|
| 285 |
+
|
| 286 |
+
def __get_sde_diffusion_and_drift(
|
| 287 |
+
self,
|
| 288 |
+
*,
|
| 289 |
+
diffusion_form="SBDM",
|
| 290 |
+
diffusion_norm=1.0,
|
| 291 |
+
):
|
| 292 |
+
|
| 293 |
+
def diffusion_fn(x, t):
|
| 294 |
+
diffusion = self.transport.path_sampler.compute_diffusion(x, t, form=diffusion_form, norm=diffusion_norm)
|
| 295 |
+
return diffusion
|
| 296 |
+
|
| 297 |
+
sde_drift = \
|
| 298 |
+
lambda x, t, model, **kwargs: \
|
| 299 |
+
self.drift(x, t, model, **kwargs) + diffusion_fn(x, t) * self.score(x, t, model, **kwargs)
|
| 300 |
+
|
| 301 |
+
sde_diffusion = diffusion_fn
|
| 302 |
+
|
| 303 |
+
return sde_drift, sde_diffusion
|
| 304 |
+
|
| 305 |
+
def __get_last_step(
|
| 306 |
+
self,
|
| 307 |
+
sde_drift,
|
| 308 |
+
*,
|
| 309 |
+
last_step,
|
| 310 |
+
last_step_size,
|
| 311 |
+
):
|
| 312 |
+
"""Get the last step function of the SDE solver"""
|
| 313 |
+
|
| 314 |
+
if last_step is None:
|
| 315 |
+
last_step_fn = \
|
| 316 |
+
lambda x, t, model, **model_kwargs: \
|
| 317 |
+
x
|
| 318 |
+
elif last_step == "Mean":
|
| 319 |
+
last_step_fn = \
|
| 320 |
+
lambda x, t, model, **model_kwargs: \
|
| 321 |
+
x + sde_drift(x, t, model, **model_kwargs) * last_step_size
|
| 322 |
+
elif last_step == "Tweedie":
|
| 323 |
+
alpha = self.transport.path_sampler.compute_alpha_t # simple aliasing; the original name was too long
|
| 324 |
+
sigma = self.transport.path_sampler.compute_sigma_t
|
| 325 |
+
last_step_fn = \
|
| 326 |
+
lambda x, t, model, **model_kwargs: \
|
| 327 |
+
x / alpha(t)[0][0] + (sigma(t)[0][0] ** 2) / alpha(t)[0][0] * self.score(x, t, model, **model_kwargs)
|
| 328 |
+
elif last_step == "Euler":
|
| 329 |
+
last_step_fn = \
|
| 330 |
+
lambda x, t, model, **model_kwargs: \
|
| 331 |
+
x + self.drift(x, t, model, **model_kwargs) * last_step_size
|
| 332 |
+
else:
|
| 333 |
+
raise NotImplementedError()
|
| 334 |
+
|
| 335 |
+
return last_step_fn
|
| 336 |
+
|
| 337 |
+
def sample_sde(
|
| 338 |
+
self,
|
| 339 |
+
*,
|
| 340 |
+
sampling_method="Euler",
|
| 341 |
+
diffusion_form="SBDM",
|
| 342 |
+
diffusion_norm=1.0,
|
| 343 |
+
last_step="Mean",
|
| 344 |
+
last_step_size=0.04,
|
| 345 |
+
num_steps=250,
|
| 346 |
+
):
|
| 347 |
+
"""returns a sampling function with given SDE settings
|
| 348 |
+
Args:
|
| 349 |
+
- sampling_method: type of sampler used in solving the SDE; default to be Euler-Maruyama
|
| 350 |
+
- diffusion_form: function form of diffusion coefficient; default to be matching SBDM
|
| 351 |
+
- diffusion_norm: function magnitude of diffusion coefficient; default to 1
|
| 352 |
+
- last_step: type of the last step; default to identity
|
| 353 |
+
- last_step_size: size of the last step; default to match the stride of 250 steps over [0,1]
|
| 354 |
+
- num_steps: total integration step of SDE
|
| 355 |
+
"""
|
| 356 |
+
|
| 357 |
+
if last_step is None:
|
| 358 |
+
last_step_size = 0.0
|
| 359 |
+
|
| 360 |
+
sde_drift, sde_diffusion = self.__get_sde_diffusion_and_drift(
|
| 361 |
+
diffusion_form=diffusion_form,
|
| 362 |
+
diffusion_norm=diffusion_norm,
|
| 363 |
+
)
|
| 364 |
+
|
| 365 |
+
t0, t1 = self.transport.check_interval(
|
| 366 |
+
self.transport.train_eps,
|
| 367 |
+
self.transport.sample_eps,
|
| 368 |
+
diffusion_form=diffusion_form,
|
| 369 |
+
sde=True,
|
| 370 |
+
eval=True,
|
| 371 |
+
reverse=False,
|
| 372 |
+
last_step_size=last_step_size,
|
| 373 |
+
)
|
| 374 |
+
|
| 375 |
+
_sde = sde(
|
| 376 |
+
sde_drift,
|
| 377 |
+
sde_diffusion,
|
| 378 |
+
t0=t0,
|
| 379 |
+
t1=t1,
|
| 380 |
+
num_steps=num_steps,
|
| 381 |
+
sampler_type=sampling_method
|
| 382 |
+
)
|
| 383 |
+
|
| 384 |
+
last_step_fn = self.__get_last_step(sde_drift, last_step=last_step, last_step_size=last_step_size)
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
def _sample(init, model, **model_kwargs):
|
| 388 |
+
xs = _sde.sample(init, model, **model_kwargs)
|
| 389 |
+
ts = th.ones(init.size(0), device=init.device) * t1
|
| 390 |
+
x = last_step_fn(xs[-1], ts, model, **model_kwargs)
|
| 391 |
+
xs.append(x)
|
| 392 |
+
|
| 393 |
+
assert len(xs) == num_steps, "Samples does not match the number of steps"
|
| 394 |
+
|
| 395 |
+
return xs
|
| 396 |
+
|
| 397 |
+
return _sample
|
| 398 |
+
|
| 399 |
+
def sample_ode(
|
| 400 |
+
self,
|
| 401 |
+
*,
|
| 402 |
+
sampling_method="dopri5",
|
| 403 |
+
num_steps=50,
|
| 404 |
+
atol=1e-6,
|
| 405 |
+
rtol=1e-3,
|
| 406 |
+
reverse=False,
|
| 407 |
+
):
|
| 408 |
+
"""returns a sampling function with given ODE settings
|
| 409 |
+
Args:
|
| 410 |
+
- sampling_method: type of sampler used in solving the ODE; default to be Dopri5
|
| 411 |
+
- num_steps:
|
| 412 |
+
- fixed solver (Euler, Heun): the actual number of integration steps performed
|
| 413 |
+
- adaptive solver (Dopri5): the number of datapoints saved during integration; produced by interpolation
|
| 414 |
+
- atol: absolute error tolerance for the solver
|
| 415 |
+
- rtol: relative error tolerance for the solver
|
| 416 |
+
- reverse: whether solving the ODE in reverse (data to noise); default to False
|
| 417 |
+
"""
|
| 418 |
+
if reverse:
|
| 419 |
+
drift = lambda x, t, model, **kwargs: self.drift(x, th.ones_like(t) * (1 - t), model, **kwargs)
|
| 420 |
+
else:
|
| 421 |
+
drift = self.drift
|
| 422 |
+
|
| 423 |
+
t0, t1 = self.transport.check_interval(
|
| 424 |
+
self.transport.train_eps,
|
| 425 |
+
self.transport.sample_eps,
|
| 426 |
+
sde=False,
|
| 427 |
+
eval=True,
|
| 428 |
+
reverse=reverse,
|
| 429 |
+
last_step_size=0.0,
|
| 430 |
+
)
|
| 431 |
+
|
| 432 |
+
_ode = ode(
|
| 433 |
+
drift=drift,
|
| 434 |
+
t0=t0,
|
| 435 |
+
t1=t1,
|
| 436 |
+
sampler_type=sampling_method,
|
| 437 |
+
num_steps=num_steps,
|
| 438 |
+
atol=atol,
|
| 439 |
+
rtol=rtol,
|
| 440 |
+
)
|
| 441 |
+
|
| 442 |
+
return _ode.sample
|
| 443 |
+
|
| 444 |
+
def sample_ode_likelihood(
|
| 445 |
+
self,
|
| 446 |
+
*,
|
| 447 |
+
sampling_method="dopri5",
|
| 448 |
+
num_steps=50,
|
| 449 |
+
atol=1e-6,
|
| 450 |
+
rtol=1e-3,
|
| 451 |
+
):
|
| 452 |
+
|
| 453 |
+
"""returns a sampling function for calculating likelihood with given ODE settings
|
| 454 |
+
Args:
|
| 455 |
+
- sampling_method: type of sampler used in solving the ODE; default to be Dopri5
|
| 456 |
+
- num_steps:
|
| 457 |
+
- fixed solver (Euler, Heun): the actual number of integration steps performed
|
| 458 |
+
- adaptive solver (Dopri5): the number of datapoints saved during integration; produced by interpolation
|
| 459 |
+
- atol: absolute error tolerance for the solver
|
| 460 |
+
- rtol: relative error tolerance for the solver
|
| 461 |
+
"""
|
| 462 |
+
def _likelihood_drift(x, t, model, **model_kwargs):
|
| 463 |
+
x, _ = x
|
| 464 |
+
eps = th.randint(2, x.size(), dtype=th.float, device=x.device) * 2 - 1
|
| 465 |
+
t = th.ones_like(t) * (1 - t)
|
| 466 |
+
with th.enable_grad():
|
| 467 |
+
x.requires_grad = True
|
| 468 |
+
grad = th.autograd.grad(th.sum(self.drift(x, t, model, **model_kwargs) * eps), x)[0]
|
| 469 |
+
logp_grad = th.sum(grad * eps, dim=tuple(range(1, len(x.size()))))
|
| 470 |
+
drift = self.drift(x, t, model, **model_kwargs)
|
| 471 |
+
return (-drift, logp_grad)
|
| 472 |
+
|
| 473 |
+
t0, t1 = self.transport.check_interval(
|
| 474 |
+
self.transport.train_eps,
|
| 475 |
+
self.transport.sample_eps,
|
| 476 |
+
sde=False,
|
| 477 |
+
eval=True,
|
| 478 |
+
reverse=False,
|
| 479 |
+
last_step_size=0.0,
|
| 480 |
+
)
|
| 481 |
+
|
| 482 |
+
_ode = ode(
|
| 483 |
+
drift=_likelihood_drift,
|
| 484 |
+
t0=t0,
|
| 485 |
+
t1=t1,
|
| 486 |
+
sampler_type=sampling_method,
|
| 487 |
+
num_steps=num_steps,
|
| 488 |
+
atol=atol,
|
| 489 |
+
rtol=rtol,
|
| 490 |
+
)
|
| 491 |
+
|
| 492 |
+
def _sample_fn(x, model, **model_kwargs):
|
| 493 |
+
init_logp = th.zeros(x.size(0)).to(x)
|
| 494 |
+
input = (x, init_logp)
|
| 495 |
+
drift, delta_logp = _ode.sample(input, model, **model_kwargs)
|
| 496 |
+
drift, delta_logp = drift[-1], delta_logp[-1]
|
| 497 |
+
prior_logp = self.transport.prior_logp(drift)
|
| 498 |
+
logp = prior_logp - delta_logp
|
| 499 |
+
return logp, drift
|
| 500 |
+
|
| 501 |
+
return _sample_fn
|
Rectified_Noise/GVP-Disp/transport/utils.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch as th
|
| 2 |
+
|
| 3 |
+
class EasyDict:
|
| 4 |
+
|
| 5 |
+
def __init__(self, sub_dict):
|
| 6 |
+
for k, v in sub_dict.items():
|
| 7 |
+
setattr(self, k, v)
|
| 8 |
+
|
| 9 |
+
def __getitem__(self, key):
|
| 10 |
+
return getattr(self, key)
|
| 11 |
+
|
| 12 |
+
def mean_flat(x):
|
| 13 |
+
"""
|
| 14 |
+
Take the mean over all non-batch dimensions.
|
| 15 |
+
"""
|
| 16 |
+
return th.mean(x, dim=list(range(1, len(x.size()))))
|
| 17 |
+
|
| 18 |
+
def log_state(state):
|
| 19 |
+
result = []
|
| 20 |
+
|
| 21 |
+
sorted_state = dict(sorted(state.items()))
|
| 22 |
+
for key, value in sorted_state.items():
|
| 23 |
+
# Check if the value is an instance of a class
|
| 24 |
+
if "<object" in str(value) or "object at" in str(value):
|
| 25 |
+
result.append(f"{key}: [{value.__class__.__name__}]")
|
| 26 |
+
else:
|
| 27 |
+
result.append(f"{key}: {value}")
|
| 28 |
+
|
| 29 |
+
return '\n'.join(result)
|
Rectified_Noise/GVP-Disp/w_training1.log
ADDED
|
@@ -0,0 +1,927 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
W0203 06:54:55.773000 72184 site-packages/torch/distributed/run.py:793]
|
| 2 |
+
W0203 06:54:55.773000 72184 site-packages/torch/distributed/run.py:793] *****************************************
|
| 3 |
+
W0203 06:54:55.773000 72184 site-packages/torch/distributed/run.py:793] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 4 |
+
W0203 06:54:55.773000 72184 site-packages/torch/distributed/run.py:793] *****************************************
|
| 5 |
+
[NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
|
| 6 |
+
[NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
|
| 7 |
+
[NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
|
| 8 |
+
[NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
|
| 9 |
+
Starting rank=0, seed=0, world_size=4.
|
| 10 |
+
[[34m2026-02-03 06:55:12[0m] Experiment directory created at results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None
|
| 11 |
+
Starting rank=3, seed=3, world_size=4.
|
| 12 |
+
Starting rank=1, seed=1, world_size=4.
|
| 13 |
+
Starting rank=2, seed=2, world_size=4.
|
| 14 |
+
[[34m2026-02-03 06:55:47[0m] Combined_model Parameters: 729,629,632
|
| 15 |
+
[[34m2026-02-03 06:55:47[0m] Total trainable parameters: 53,910,176
|
| 16 |
+
[[34m2026-02-03 06:55:50[0m] Dataset contains 1,281,167 images (/gemini/platform/public/zhaozy/hzh/datasets/Imagenet/train/)
|
| 17 |
+
[[34m2026-02-03 06:55:50[0m] Training for 100000 epochs...
|
| 18 |
+
[[34m2026-02-03 06:55:50[0m] Beginning epoch 0...
|
| 19 |
+
[[34m2026-02-03 06:57:30[0m] (step=0000100) Train Loss: -2.4789, Train Steps/Sec: 1.00
|
| 20 |
+
[[34m2026-02-03 06:59:08[0m] (step=0000200) Train Loss: -2.9649, Train Steps/Sec: 1.02
|
| 21 |
+
[[34m2026-02-03 07:00:47[0m] (step=0000300) Train Loss: -2.9777, Train Steps/Sec: 1.01
|
| 22 |
+
[[34m2026-02-03 07:02:27[0m] (step=0000400) Train Loss: -2.9828, Train Steps/Sec: 1.00
|
| 23 |
+
[[34m2026-02-03 07:04:08[0m] (step=0000500) Train Loss: -2.9877, Train Steps/Sec: 0.99
|
| 24 |
+
[[34m2026-02-03 07:05:49[0m] (step=0000600) Train Loss: -2.9875, Train Steps/Sec: 0.99
|
| 25 |
+
[[34m2026-02-03 07:07:28[0m] (step=0000700) Train Loss: -2.9882, Train Steps/Sec: 1.01
|
| 26 |
+
[[34m2026-02-03 07:09:08[0m] (step=0000800) Train Loss: -2.9861, Train Steps/Sec: 1.00
|
| 27 |
+
[[34m2026-02-03 07:10:49[0m] (step=0000900) Train Loss: -2.9862, Train Steps/Sec: 0.99
|
| 28 |
+
[[34m2026-02-03 07:12:30[0m] (step=0001000) Train Loss: -2.9886, Train Steps/Sec: 0.99
|
| 29 |
+
[[34m2026-02-03 07:14:12[0m] (step=0001100) Train Loss: -2.9849, Train Steps/Sec: 0.98
|
| 30 |
+
[[34m2026-02-03 07:18:10[0m] (step=0001200) Train Loss: -2.9885, Train Steps/Sec: 0.42
|
| 31 |
+
[[34m2026-02-03 07:20:07[0m] (step=0001300) Train Loss: -2.9864, Train Steps/Sec: 0.85
|
| 32 |
+
[[34m2026-02-03 07:21:45[0m] (step=0001400) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 33 |
+
[[34m2026-02-03 07:23:22[0m] (step=0001500) Train Loss: -2.9863, Train Steps/Sec: 1.03
|
| 34 |
+
[[34m2026-02-03 07:25:19[0m] (step=0001600) Train Loss: -3.6813, Train Steps/Sec: 0.54
|
| 35 |
+
[[34m2026-02-03 07:28:25[0m] (step=0001700) Train Loss: -3.6843, Train Steps/Sec: 0.54
|
| 36 |
+
[[34m2026-02-03 07:31:32[0m] (step=0001800) Train Loss: -3.6813, Train Steps/Sec: 0.53
|
| 37 |
+
[[34m2026-02-03 07:34:38[0m] (step=0001900) Train Loss: -3.6828, Train Steps/Sec: 0.54
|
| 38 |
+
[[34m2026-02-03 07:37:45[0m] (step=0002000) Train Loss: -3.6826, Train Steps/Sec: 0.54
|
| 39 |
+
[[34m2026-02-03 07:40:51[0m] (step=0002100) Train Loss: -3.6799, Train Steps/Sec: 0.54
|
| 40 |
+
[[34m2026-02-03 07:43:58[0m] (step=0002200) Train Loss: -3.6784, Train Steps/Sec: 0.53
|
| 41 |
+
[[34m2026-02-03 07:47:06[0m] (step=0002300) Train Loss: -3.6824, Train Steps/Sec: 0.53
|
| 42 |
+
[[34m2026-02-03 07:50:12[0m] (step=0002400) Train Loss: -3.6787, Train Steps/Sec: 0.54
|
| 43 |
+
[[34m2026-02-03 07:53:19[0m] (step=0002500) Train Loss: -3.6771, Train Steps/Sec: 0.54
|
| 44 |
+
[[34m2026-02-03 07:53:23[0m] Beginning epoch 1...
|
| 45 |
+
[[34m2026-02-03 07:56:29[0m] (step=0002600) Train Loss: -3.6847, Train Steps/Sec: 0.53
|
| 46 |
+
[[34m2026-02-03 07:59:35[0m] (step=0002700) Train Loss: -3.6829, Train Steps/Sec: 0.54
|
| 47 |
+
[[34m2026-02-03 08:02:42[0m] (step=0002800) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 48 |
+
[[34m2026-02-03 08:05:49[0m] (step=0002900) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 49 |
+
[[34m2026-02-03 08:08:55[0m] (step=0003000) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 50 |
+
[[34m2026-02-03 08:12:01[0m] (step=0003100) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 51 |
+
[[34m2026-02-03 08:15:09[0m] (step=0003200) Train Loss: -3.6812, Train Steps/Sec: 0.53
|
| 52 |
+
[[34m2026-02-03 08:18:16[0m] (step=0003300) Train Loss: -3.6800, Train Steps/Sec: 0.53
|
| 53 |
+
[[34m2026-02-03 08:21:20[0m] (step=0003400) Train Loss: -3.6797, Train Steps/Sec: 0.54
|
| 54 |
+
[[34m2026-02-03 08:24:27[0m] (step=0003500) Train Loss: -3.6802, Train Steps/Sec: 0.54
|
| 55 |
+
[[34m2026-02-03 08:27:34[0m] (step=0003600) Train Loss: -3.6834, Train Steps/Sec: 0.53
|
| 56 |
+
[[34m2026-02-03 08:30:40[0m] (step=0003700) Train Loss: -3.6810, Train Steps/Sec: 0.54
|
| 57 |
+
[[34m2026-02-03 08:33:48[0m] (step=0003800) Train Loss: -3.6822, Train Steps/Sec: 0.53
|
| 58 |
+
[[34m2026-02-03 08:36:55[0m] (step=0003900) Train Loss: -3.6817, Train Steps/Sec: 0.53
|
| 59 |
+
[[34m2026-02-03 08:40:01[0m] (step=0004000) Train Loss: -3.6794, Train Steps/Sec: 0.54
|
| 60 |
+
[[34m2026-02-03 08:43:08[0m] (step=0004100) Train Loss: -3.6801, Train Steps/Sec: 0.54
|
| 61 |
+
[[34m2026-02-03 08:46:15[0m] (step=0004200) Train Loss: -3.6850, Train Steps/Sec: 0.54
|
| 62 |
+
[[34m2026-02-03 08:49:21[0m] (step=0004300) Train Loss: -3.6801, Train Steps/Sec: 0.54
|
| 63 |
+
[[34m2026-02-03 08:52:28[0m] (step=0004400) Train Loss: -3.6816, Train Steps/Sec: 0.54
|
| 64 |
+
[[34m2026-02-03 08:55:35[0m] (step=0004500) Train Loss: -3.6820, Train Steps/Sec: 0.53
|
| 65 |
+
[[34m2026-02-03 08:58:42[0m] (step=0004600) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 66 |
+
[[34m2026-02-03 09:01:49[0m] (step=0004700) Train Loss: -3.6806, Train Steps/Sec: 0.54
|
| 67 |
+
[[34m2026-02-03 09:04:56[0m] (step=0004800) Train Loss: -3.6797, Train Steps/Sec: 0.53
|
| 68 |
+
[[34m2026-02-03 09:08:03[0m] (step=0004900) Train Loss: -3.6800, Train Steps/Sec: 0.53
|
| 69 |
+
[[34m2026-02-03 09:11:10[0m] (step=0005000) Train Loss: -3.6831, Train Steps/Sec: 0.54
|
| 70 |
+
[[34m2026-02-03 09:11:18[0m] Beginning epoch 2...
|
| 71 |
+
[[34m2026-02-03 09:14:20[0m] (step=0005100) Train Loss: -3.6803, Train Steps/Sec: 0.52
|
| 72 |
+
[[34m2026-02-03 09:17:27[0m] (step=0005200) Train Loss: -3.6804, Train Steps/Sec: 0.53
|
| 73 |
+
[[34m2026-02-03 09:20:34[0m] (step=0005300) Train Loss: -3.6804, Train Steps/Sec: 0.54
|
| 74 |
+
[[34m2026-02-03 09:23:40[0m] (step=0005400) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 75 |
+
[[34m2026-02-03 09:26:47[0m] (step=0005500) Train Loss: -3.6819, Train Steps/Sec: 0.53
|
| 76 |
+
[[34m2026-02-03 09:29:54[0m] (step=0005600) Train Loss: -3.6834, Train Steps/Sec: 0.54
|
| 77 |
+
[[34m2026-02-03 09:33:01[0m] (step=0005700) Train Loss: -3.6805, Train Steps/Sec: 0.53
|
| 78 |
+
[[34m2026-02-03 09:36:08[0m] (step=0005800) Train Loss: -3.6827, Train Steps/Sec: 0.53
|
| 79 |
+
[[34m2026-02-03 09:39:15[0m] (step=0005900) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 80 |
+
[[34m2026-02-03 09:42:20[0m] (step=0006000) Train Loss: -3.6807, Train Steps/Sec: 0.54
|
| 81 |
+
[[34m2026-02-03 09:45:27[0m] (step=0006100) Train Loss: -3.6814, Train Steps/Sec: 0.53
|
| 82 |
+
[[34m2026-02-03 09:48:34[0m] (step=0006200) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 83 |
+
[[34m2026-02-03 09:51:40[0m] (step=0006300) Train Loss: -3.6799, Train Steps/Sec: 0.54
|
| 84 |
+
[[34m2026-02-03 09:54:46[0m] (step=0006400) Train Loss: -3.6797, Train Steps/Sec: 0.54
|
| 85 |
+
[[34m2026-02-03 09:57:54[0m] (step=0006500) Train Loss: -3.6820, Train Steps/Sec: 0.53
|
| 86 |
+
[[34m2026-02-03 10:01:01[0m] (step=0006600) Train Loss: -3.6789, Train Steps/Sec: 0.53
|
| 87 |
+
[[34m2026-02-03 10:04:08[0m] (step=0006700) Train Loss: -3.6804, Train Steps/Sec: 0.53
|
| 88 |
+
[[34m2026-02-03 10:07:15[0m] (step=0006800) Train Loss: -3.6803, Train Steps/Sec: 0.53
|
| 89 |
+
[[34m2026-02-03 10:10:22[0m] (step=0006900) Train Loss: -3.6787, Train Steps/Sec: 0.54
|
| 90 |
+
[[34m2026-02-03 10:13:29[0m] (step=0007000) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 91 |
+
[[34m2026-02-03 10:16:35[0m] (step=0007100) Train Loss: -3.6813, Train Steps/Sec: 0.54
|
| 92 |
+
[[34m2026-02-03 10:19:42[0m] (step=0007200) Train Loss: -3.6820, Train Steps/Sec: 0.54
|
| 93 |
+
[[34m2026-02-03 10:22:49[0m] (step=0007300) Train Loss: -3.6810, Train Steps/Sec: 0.53
|
| 94 |
+
[[34m2026-02-03 10:25:56[0m] (step=0007400) Train Loss: -3.6828, Train Steps/Sec: 0.53
|
| 95 |
+
[[34m2026-02-03 10:29:04[0m] (step=0007500) Train Loss: -3.6821, Train Steps/Sec: 0.53
|
| 96 |
+
[[34m2026-02-03 10:29:16[0m] Beginning epoch 3...
|
| 97 |
+
[[34m2026-02-03 10:32:13[0m] (step=0007600) Train Loss: -3.6794, Train Steps/Sec: 0.53
|
| 98 |
+
[[34m2026-02-03 10:35:20[0m] (step=0007700) Train Loss: -3.6809, Train Steps/Sec: 0.53
|
| 99 |
+
[[34m2026-02-03 10:38:27[0m] (step=0007800) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 100 |
+
[[34m2026-02-03 10:41:34[0m] (step=0007900) Train Loss: -3.6813, Train Steps/Sec: 0.54
|
| 101 |
+
[[34m2026-02-03 10:44:41[0m] (step=0008000) Train Loss: -3.6852, Train Steps/Sec: 0.53
|
| 102 |
+
[[34m2026-02-03 10:47:47[0m] (step=0008100) Train Loss: -3.6820, Train Steps/Sec: 0.54
|
| 103 |
+
[[34m2026-02-03 10:50:54[0m] (step=0008200) Train Loss: -3.6798, Train Steps/Sec: 0.54
|
| 104 |
+
[[34m2026-02-03 10:54:01[0m] (step=0008300) Train Loss: -3.6772, Train Steps/Sec: 0.54
|
| 105 |
+
[[34m2026-02-03 10:57:07[0m] (step=0008400) Train Loss: -3.6800, Train Steps/Sec: 0.54
|
| 106 |
+
[[34m2026-02-03 11:00:13[0m] (step=0008500) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 107 |
+
[[34m2026-02-03 11:03:19[0m] (step=0008600) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 108 |
+
[[34m2026-02-03 11:06:23[0m] (step=0008700) Train Loss: -3.6806, Train Steps/Sec: 0.54
|
| 109 |
+
[[34m2026-02-03 11:09:29[0m] (step=0008800) Train Loss: -3.6762, Train Steps/Sec: 0.54
|
| 110 |
+
[[34m2026-02-03 11:12:36[0m] (step=0008900) Train Loss: -3.6838, Train Steps/Sec: 0.54
|
| 111 |
+
[[34m2026-02-03 11:15:43[0m] (step=0009000) Train Loss: -3.6826, Train Steps/Sec: 0.53
|
| 112 |
+
[[34m2026-02-03 11:18:50[0m] (step=0009100) Train Loss: -3.6806, Train Steps/Sec: 0.54
|
| 113 |
+
[[34m2026-02-03 11:21:57[0m] (step=0009200) Train Loss: -3.6806, Train Steps/Sec: 0.54
|
| 114 |
+
[[34m2026-02-03 11:25:04[0m] (step=0009300) Train Loss: -3.6819, Train Steps/Sec: 0.54
|
| 115 |
+
[[34m2026-02-03 11:28:11[0m] (step=0009400) Train Loss: -3.6785, Train Steps/Sec: 0.53
|
| 116 |
+
[[34m2026-02-03 11:31:17[0m] (step=0009500) Train Loss: -3.6769, Train Steps/Sec: 0.54
|
| 117 |
+
[[34m2026-02-03 11:34:24[0m] (step=0009600) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 118 |
+
[[34m2026-02-03 11:37:31[0m] (step=0009700) Train Loss: -3.6856, Train Steps/Sec: 0.54
|
| 119 |
+
[[34m2026-02-03 11:40:38[0m] (step=0009800) Train Loss: -3.6803, Train Steps/Sec: 0.53
|
| 120 |
+
[[34m2026-02-03 11:43:45[0m] (step=0009900) Train Loss: -3.6805, Train Steps/Sec: 0.54
|
| 121 |
+
[[34m2026-02-03 11:46:51[0m] (step=0010000) Train Loss: -3.6819, Train Steps/Sec: 0.54
|
| 122 |
+
[[34m2026-02-03 11:47:07[0m] Beginning epoch 4...
|
| 123 |
+
[[34m2026-02-03 11:50:01[0m] (step=0010100) Train Loss: -3.6850, Train Steps/Sec: 0.53
|
| 124 |
+
[[34m2026-02-03 11:53:08[0m] (step=0010200) Train Loss: -3.6816, Train Steps/Sec: 0.53
|
| 125 |
+
[[34m2026-02-03 11:56:15[0m] (step=0010300) Train Loss: -3.6836, Train Steps/Sec: 0.53
|
| 126 |
+
[[34m2026-02-03 11:59:22[0m] (step=0010400) Train Loss: -3.6789, Train Steps/Sec: 0.53
|
| 127 |
+
[[34m2026-02-03 12:02:29[0m] (step=0010500) Train Loss: -3.6793, Train Steps/Sec: 0.54
|
| 128 |
+
[[34m2026-02-03 12:05:36[0m] (step=0010600) Train Loss: -3.6834, Train Steps/Sec: 0.54
|
| 129 |
+
[[34m2026-02-03 12:08:42[0m] (step=0010700) Train Loss: -3.6842, Train Steps/Sec: 0.54
|
| 130 |
+
[[34m2026-02-03 12:11:49[0m] (step=0010800) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 131 |
+
[[34m2026-02-03 12:14:56[0m] (step=0010900) Train Loss: -3.6813, Train Steps/Sec: 0.54
|
| 132 |
+
[[34m2026-02-03 12:18:03[0m] (step=0011000) Train Loss: -3.6843, Train Steps/Sec: 0.53
|
| 133 |
+
[[34m2026-02-03 12:21:09[0m] (step=0011100) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 134 |
+
[[34m2026-02-03 12:24:15[0m] (step=0011200) Train Loss: -3.6787, Train Steps/Sec: 0.54
|
| 135 |
+
[[34m2026-02-03 12:27:20[0m] (step=0011300) Train Loss: -3.6828, Train Steps/Sec: 0.54
|
| 136 |
+
[[34m2026-02-03 12:30:27[0m] (step=0011400) Train Loss: -3.6830, Train Steps/Sec: 0.53
|
| 137 |
+
[[34m2026-02-03 12:33:34[0m] (step=0011500) Train Loss: -3.6784, Train Steps/Sec: 0.53
|
| 138 |
+
[[34m2026-02-03 12:36:41[0m] (step=0011600) Train Loss: -3.6831, Train Steps/Sec: 0.53
|
| 139 |
+
[[34m2026-02-03 12:39:48[0m] (step=0011700) Train Loss: -3.6834, Train Steps/Sec: 0.53
|
| 140 |
+
[[34m2026-02-03 12:42:55[0m] (step=0011800) Train Loss: -3.6808, Train Steps/Sec: 0.53
|
| 141 |
+
[[34m2026-02-03 12:46:02[0m] (step=0011900) Train Loss: -3.6810, Train Steps/Sec: 0.54
|
| 142 |
+
[[34m2026-02-03 12:49:09[0m] (step=0012000) Train Loss: -3.6821, Train Steps/Sec: 0.53
|
| 143 |
+
[[34m2026-02-03 12:52:16[0m] (step=0012100) Train Loss: -3.6827, Train Steps/Sec: 0.53
|
| 144 |
+
[[34m2026-02-03 12:55:23[0m] (step=0012200) Train Loss: -3.6827, Train Steps/Sec: 0.54
|
| 145 |
+
[[34m2026-02-03 12:58:30[0m] (step=0012300) Train Loss: -3.6808, Train Steps/Sec: 0.54
|
| 146 |
+
[[34m2026-02-03 13:01:37[0m] (step=0012400) Train Loss: -3.6818, Train Steps/Sec: 0.53
|
| 147 |
+
[[34m2026-02-03 13:04:44[0m] (step=0012500) Train Loss: -3.6809, Train Steps/Sec: 0.54
|
| 148 |
+
[[34m2026-02-03 13:05:03[0m] Beginning epoch 5...
|
| 149 |
+
[[34m2026-02-03 13:07:54[0m] (step=0012600) Train Loss: -3.6814, Train Steps/Sec: 0.52
|
| 150 |
+
[[34m2026-02-03 13:11:01[0m] (step=0012700) Train Loss: -3.6842, Train Steps/Sec: 0.53
|
| 151 |
+
[[34m2026-02-03 13:14:08[0m] (step=0012800) Train Loss: -3.6816, Train Steps/Sec: 0.54
|
| 152 |
+
[[34m2026-02-03 13:17:15[0m] (step=0012900) Train Loss: -3.6790, Train Steps/Sec: 0.53
|
| 153 |
+
[[34m2026-02-03 13:20:22[0m] (step=0013000) Train Loss: -3.6812, Train Steps/Sec: 0.53
|
| 154 |
+
[[34m2026-02-03 13:23:29[0m] (step=0013100) Train Loss: -3.6792, Train Steps/Sec: 0.53
|
| 155 |
+
[[34m2026-02-03 13:26:36[0m] (step=0013200) Train Loss: -3.6836, Train Steps/Sec: 0.53
|
| 156 |
+
[[34m2026-02-03 13:29:43[0m] (step=0013300) Train Loss: -3.6845, Train Steps/Sec: 0.54
|
| 157 |
+
[[34m2026-02-03 13:32:50[0m] (step=0013400) Train Loss: -3.6822, Train Steps/Sec: 0.53
|
| 158 |
+
[[34m2026-02-03 13:35:57[0m] (step=0013500) Train Loss: -3.6798, Train Steps/Sec: 0.53
|
| 159 |
+
[[34m2026-02-03 13:39:04[0m] (step=0013600) Train Loss: -3.6828, Train Steps/Sec: 0.54
|
| 160 |
+
[[34m2026-02-03 13:42:11[0m] (step=0013700) Train Loss: -3.6799, Train Steps/Sec: 0.54
|
| 161 |
+
[[34m2026-02-03 13:45:18[0m] (step=0013800) Train Loss: -3.6812, Train Steps/Sec: 0.53
|
| 162 |
+
[[34m2026-02-03 13:48:22[0m] (step=0013900) Train Loss: -3.6831, Train Steps/Sec: 0.54
|
| 163 |
+
[[34m2026-02-03 13:51:29[0m] (step=0014000) Train Loss: -3.6808, Train Steps/Sec: 0.54
|
| 164 |
+
[[34m2026-02-03 13:54:36[0m] (step=0014100) Train Loss: -3.6823, Train Steps/Sec: 0.53
|
| 165 |
+
[[34m2026-02-03 13:57:43[0m] (step=0014200) Train Loss: -3.6795, Train Steps/Sec: 0.54
|
| 166 |
+
[[34m2026-02-03 14:00:50[0m] (step=0014300) Train Loss: -3.6795, Train Steps/Sec: 0.53
|
| 167 |
+
[[34m2026-02-03 14:03:57[0m] (step=0014400) Train Loss: -3.6838, Train Steps/Sec: 0.54
|
| 168 |
+
[[34m2026-02-03 14:07:04[0m] (step=0014500) Train Loss: -3.6832, Train Steps/Sec: 0.53
|
| 169 |
+
[[34m2026-02-03 14:10:11[0m] (step=0014600) Train Loss: -3.6832, Train Steps/Sec: 0.53
|
| 170 |
+
[[34m2026-02-03 14:13:18[0m] (step=0014700) Train Loss: -3.6784, Train Steps/Sec: 0.54
|
| 171 |
+
[[34m2026-02-03 14:16:24[0m] (step=0014800) Train Loss: -3.6824, Train Steps/Sec: 0.54
|
| 172 |
+
[[34m2026-02-03 14:19:31[0m] (step=0014900) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 173 |
+
[[34m2026-02-03 14:22:38[0m] (step=0015000) Train Loss: -3.6822, Train Steps/Sec: 0.53
|
| 174 |
+
[[34m2026-02-03 14:23:01[0m] Beginning epoch 6...
|
| 175 |
+
[[34m2026-02-03 14:25:48[0m] (step=0015100) Train Loss: -3.6831, Train Steps/Sec: 0.53
|
| 176 |
+
[[34m2026-02-03 14:28:55[0m] (step=0015200) Train Loss: -3.6786, Train Steps/Sec: 0.53
|
| 177 |
+
[[34m2026-02-03 14:32:02[0m] (step=0015300) Train Loss: -3.6826, Train Steps/Sec: 0.54
|
| 178 |
+
[[34m2026-02-03 14:35:08[0m] (step=0015400) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 179 |
+
[[34m2026-02-03 14:38:15[0m] (step=0015500) Train Loss: -3.6806, Train Steps/Sec: 0.54
|
| 180 |
+
[[34m2026-02-03 14:41:21[0m] (step=0015600) Train Loss: -3.6796, Train Steps/Sec: 0.54
|
| 181 |
+
[[34m2026-02-03 14:44:28[0m] (step=0015700) Train Loss: -3.6839, Train Steps/Sec: 0.54
|
| 182 |
+
[[34m2026-02-03 14:47:36[0m] (step=0015800) Train Loss: -3.6846, Train Steps/Sec: 0.53
|
| 183 |
+
[[34m2026-02-03 14:50:43[0m] (step=0015900) Train Loss: -3.6828, Train Steps/Sec: 0.53
|
| 184 |
+
[[34m2026-02-03 14:53:50[0m] (step=0016000) Train Loss: -3.6828, Train Steps/Sec: 0.54
|
| 185 |
+
[[34m2026-02-03 14:56:57[0m] (step=0016100) Train Loss: -3.6789, Train Steps/Sec: 0.53
|
| 186 |
+
[[34m2026-02-03 15:00:04[0m] (step=0016200) Train Loss: -3.6810, Train Steps/Sec: 0.53
|
| 187 |
+
[[34m2026-02-03 15:03:11[0m] (step=0016300) Train Loss: -3.6799, Train Steps/Sec: 0.53
|
| 188 |
+
[[34m2026-02-03 15:06:19[0m] (step=0016400) Train Loss: -3.6806, Train Steps/Sec: 0.53
|
| 189 |
+
[[34m2026-02-03 15:09:24[0m] (step=0016500) Train Loss: -3.6828, Train Steps/Sec: 0.54
|
| 190 |
+
[[34m2026-02-03 15:12:31[0m] (step=0016600) Train Loss: -3.6781, Train Steps/Sec: 0.54
|
| 191 |
+
[[34m2026-02-03 15:15:37[0m] (step=0016700) Train Loss: -3.6830, Train Steps/Sec: 0.54
|
| 192 |
+
[[34m2026-02-03 15:18:44[0m] (step=0016800) Train Loss: -3.6756, Train Steps/Sec: 0.54
|
| 193 |
+
[[34m2026-02-03 15:21:51[0m] (step=0016900) Train Loss: -3.6798, Train Steps/Sec: 0.54
|
| 194 |
+
[[34m2026-02-03 15:24:58[0m] (step=0017000) Train Loss: -3.6813, Train Steps/Sec: 0.53
|
| 195 |
+
[[34m2026-02-03 15:28:04[0m] (step=0017100) Train Loss: -3.6807, Train Steps/Sec: 0.54
|
| 196 |
+
[[34m2026-02-03 15:31:11[0m] (step=0017200) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 197 |
+
[[34m2026-02-03 15:34:18[0m] (step=0017300) Train Loss: -3.6800, Train Steps/Sec: 0.54
|
| 198 |
+
[[34m2026-02-03 15:37:25[0m] (step=0017400) Train Loss: -3.6836, Train Steps/Sec: 0.53
|
| 199 |
+
[[34m2026-02-03 15:40:32[0m] (step=0017500) Train Loss: -3.6807, Train Steps/Sec: 0.53
|
| 200 |
+
[[34m2026-02-03 15:40:59[0m] Beginning epoch 7...
|
| 201 |
+
[[34m2026-02-03 15:43:42[0m] (step=0017600) Train Loss: -3.6829, Train Steps/Sec: 0.53
|
| 202 |
+
[[34m2026-02-03 15:46:49[0m] (step=0017700) Train Loss: -3.6790, Train Steps/Sec: 0.53
|
| 203 |
+
[[34m2026-02-03 15:49:56[0m] (step=0017800) Train Loss: -3.6850, Train Steps/Sec: 0.53
|
| 204 |
+
[[34m2026-02-03 15:53:04[0m] (step=0017900) Train Loss: -3.6803, Train Steps/Sec: 0.53
|
| 205 |
+
[[34m2026-02-03 15:56:11[0m] (step=0018000) Train Loss: -3.6835, Train Steps/Sec: 0.53
|
| 206 |
+
[[34m2026-02-03 15:59:18[0m] (step=0018100) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 207 |
+
[[34m2026-02-03 16:02:25[0m] (step=0018200) Train Loss: -3.6788, Train Steps/Sec: 0.53
|
| 208 |
+
[[34m2026-02-03 16:05:31[0m] (step=0018300) Train Loss: -3.6786, Train Steps/Sec: 0.54
|
| 209 |
+
[[34m2026-02-03 16:08:39[0m] (step=0018400) Train Loss: -3.6812, Train Steps/Sec: 0.53
|
| 210 |
+
[[34m2026-02-03 16:11:46[0m] (step=0018500) Train Loss: -3.6809, Train Steps/Sec: 0.53
|
| 211 |
+
[[34m2026-02-03 16:14:52[0m] (step=0018600) Train Loss: -3.6803, Train Steps/Sec: 0.54
|
| 212 |
+
[[34m2026-02-03 16:17:59[0m] (step=0018700) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 213 |
+
[[34m2026-02-03 16:21:06[0m] (step=0018800) Train Loss: -3.6819, Train Steps/Sec: 0.53
|
| 214 |
+
[[34m2026-02-03 16:24:12[0m] (step=0018900) Train Loss: -3.6834, Train Steps/Sec: 0.54
|
| 215 |
+
[[34m2026-02-03 16:27:19[0m] (step=0019000) Train Loss: -3.6824, Train Steps/Sec: 0.54
|
| 216 |
+
[[34m2026-02-03 16:30:24[0m] (step=0019100) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 217 |
+
[[34m2026-02-03 16:33:31[0m] (step=0019200) Train Loss: -3.6826, Train Steps/Sec: 0.53
|
| 218 |
+
[[34m2026-02-03 16:36:38[0m] (step=0019300) Train Loss: -3.6774, Train Steps/Sec: 0.53
|
| 219 |
+
[[34m2026-02-03 16:39:45[0m] (step=0019400) Train Loss: -3.6809, Train Steps/Sec: 0.54
|
| 220 |
+
[[34m2026-02-03 16:42:51[0m] (step=0019500) Train Loss: -3.6837, Train Steps/Sec: 0.54
|
| 221 |
+
[[34m2026-02-03 16:45:59[0m] (step=0019600) Train Loss: -3.6828, Train Steps/Sec: 0.53
|
| 222 |
+
[[34m2026-02-03 16:49:06[0m] (step=0019700) Train Loss: -3.6803, Train Steps/Sec: 0.53
|
| 223 |
+
[[34m2026-02-03 16:52:13[0m] (step=0019800) Train Loss: -3.6828, Train Steps/Sec: 0.53
|
| 224 |
+
[[34m2026-02-03 16:55:20[0m] (step=0019900) Train Loss: -3.6832, Train Steps/Sec: 0.53
|
| 225 |
+
[[34m2026-02-03 16:58:27[0m] (step=0020000) Train Loss: -3.6837, Train Steps/Sec: 0.54
|
| 226 |
+
[[34m2026-02-03 16:58:57[0m] Beginning epoch 8...
|
| 227 |
+
[[34m2026-02-03 17:01:37[0m] (step=0020100) Train Loss: -3.6820, Train Steps/Sec: 0.52
|
| 228 |
+
[[34m2026-02-03 17:04:45[0m] (step=0020200) Train Loss: -3.6798, Train Steps/Sec: 0.53
|
| 229 |
+
[[34m2026-02-03 17:07:52[0m] (step=0020300) Train Loss: -3.6807, Train Steps/Sec: 0.53
|
| 230 |
+
[[34m2026-02-03 17:10:59[0m] (step=0020400) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 231 |
+
[[34m2026-02-03 17:14:05[0m] (step=0020500) Train Loss: -3.6794, Train Steps/Sec: 0.54
|
| 232 |
+
[[34m2026-02-03 17:17:13[0m] (step=0020600) Train Loss: -3.6833, Train Steps/Sec: 0.53
|
| 233 |
+
[[34m2026-02-03 17:20:20[0m] (step=0020700) Train Loss: -3.6802, Train Steps/Sec: 0.53
|
| 234 |
+
[[34m2026-02-03 17:23:27[0m] (step=0020800) Train Loss: -3.6812, Train Steps/Sec: 0.53
|
| 235 |
+
[[34m2026-02-03 17:26:34[0m] (step=0020900) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 236 |
+
[[34m2026-02-03 17:29:41[0m] (step=0021000) Train Loss: -3.6795, Train Steps/Sec: 0.53
|
| 237 |
+
[[34m2026-02-03 17:32:48[0m] (step=0021100) Train Loss: -3.6794, Train Steps/Sec: 0.53
|
| 238 |
+
[[34m2026-02-03 17:35:55[0m] (step=0021200) Train Loss: 3.9167, Train Steps/Sec: 0.53
|
| 239 |
+
[[34m2026-02-03 17:39:02[0m] (step=0021300) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 240 |
+
[[34m2026-02-03 17:42:09[0m] (step=0021400) Train Loss: -3.6805, Train Steps/Sec: 0.53
|
| 241 |
+
[[34m2026-02-03 17:45:16[0m] (step=0021500) Train Loss: -3.6808, Train Steps/Sec: 0.54
|
| 242 |
+
[[34m2026-02-03 17:48:23[0m] (step=0021600) Train Loss: -3.6812, Train Steps/Sec: 0.54
|
| 243 |
+
[[34m2026-02-03 17:51:28[0m] (step=0021700) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 244 |
+
[[34m2026-02-03 17:54:34[0m] (step=0021800) Train Loss: -3.6846, Train Steps/Sec: 0.54
|
| 245 |
+
[[34m2026-02-03 17:57:41[0m] (step=0021900) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 246 |
+
[[34m2026-02-03 18:00:48[0m] (step=0022000) Train Loss: -3.6807, Train Steps/Sec: 0.54
|
| 247 |
+
[[34m2026-02-03 18:03:55[0m] (step=0022100) Train Loss: -3.6799, Train Steps/Sec: 0.53
|
| 248 |
+
[[34m2026-02-03 18:07:02[0m] (step=0022200) Train Loss: -3.6788, Train Steps/Sec: 0.53
|
| 249 |
+
[[34m2026-02-03 18:10:09[0m] (step=0022300) Train Loss: -3.6821, Train Steps/Sec: 0.53
|
| 250 |
+
[[34m2026-02-03 18:13:16[0m] (step=0022400) Train Loss: -3.6808, Train Steps/Sec: 0.53
|
| 251 |
+
[[34m2026-02-03 18:16:24[0m] (step=0022500) Train Loss: -3.6836, Train Steps/Sec: 0.53
|
| 252 |
+
[[34m2026-02-03 18:16:58[0m] Beginning epoch 9...
|
| 253 |
+
[[34m2026-02-03 18:19:34[0m] (step=0022600) Train Loss: -3.6835, Train Steps/Sec: 0.53
|
| 254 |
+
[[34m2026-02-03 18:22:40[0m] (step=0022700) Train Loss: -3.6848, Train Steps/Sec: 0.54
|
| 255 |
+
[[34m2026-02-03 18:25:47[0m] (step=0022800) Train Loss: -3.6778, Train Steps/Sec: 0.54
|
| 256 |
+
[[34m2026-02-03 18:28:53[0m] (step=0022900) Train Loss: -3.6829, Train Steps/Sec: 0.54
|
| 257 |
+
[[34m2026-02-03 18:32:00[0m] (step=0023000) Train Loss: -3.6807, Train Steps/Sec: 0.54
|
| 258 |
+
[[34m2026-02-03 18:35:07[0m] (step=0023100) Train Loss: -3.6846, Train Steps/Sec: 0.53
|
| 259 |
+
[[34m2026-02-03 18:38:14[0m] (step=0023200) Train Loss: -3.6809, Train Steps/Sec: 0.54
|
| 260 |
+
[[34m2026-02-03 18:41:21[0m] (step=0023300) Train Loss: -3.6807, Train Steps/Sec: 0.53
|
| 261 |
+
[[34m2026-02-03 18:44:28[0m] (step=0023400) Train Loss: -3.6812, Train Steps/Sec: 0.54
|
| 262 |
+
[[34m2026-02-03 18:47:35[0m] (step=0023500) Train Loss: -3.6811, Train Steps/Sec: 0.53
|
| 263 |
+
[[34m2026-02-03 18:50:42[0m] (step=0023600) Train Loss: -3.6800, Train Steps/Sec: 0.53
|
| 264 |
+
[[34m2026-02-03 18:53:49[0m] (step=0023700) Train Loss: -3.6848, Train Steps/Sec: 0.53
|
| 265 |
+
[[34m2026-02-03 18:56:56[0m] (step=0023800) Train Loss: -3.6824, Train Steps/Sec: 0.54
|
| 266 |
+
[[34m2026-02-03 19:00:03[0m] (step=0023900) Train Loss: -3.6820, Train Steps/Sec: 0.54
|
| 267 |
+
[[34m2026-02-03 19:03:09[0m] (step=0024000) Train Loss: -3.6848, Train Steps/Sec: 0.54
|
| 268 |
+
[[34m2026-02-03 19:06:16[0m] (step=0024100) Train Loss: -3.6791, Train Steps/Sec: 0.54
|
| 269 |
+
[[34m2026-02-03 19:09:22[0m] (step=0024200) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 270 |
+
[[34m2026-02-03 19:12:30[0m] (step=0024300) Train Loss: -3.6800, Train Steps/Sec: 0.53
|
| 271 |
+
[[34m2026-02-03 19:15:35[0m] (step=0024400) Train Loss: -3.6792, Train Steps/Sec: 0.54
|
| 272 |
+
[[34m2026-02-03 19:18:42[0m] (step=0024500) Train Loss: -3.6807, Train Steps/Sec: 0.53
|
| 273 |
+
[[34m2026-02-03 19:21:49[0m] (step=0024600) Train Loss: -3.6796, Train Steps/Sec: 0.53
|
| 274 |
+
[[34m2026-02-03 19:24:56[0m] (step=0024700) Train Loss: -3.6814, Train Steps/Sec: 0.53
|
| 275 |
+
[[34m2026-02-03 19:28:03[0m] (step=0024800) Train Loss: -3.6832, Train Steps/Sec: 0.54
|
| 276 |
+
[[34m2026-02-03 19:31:10[0m] (step=0024900) Train Loss: -3.6832, Train Steps/Sec: 0.54
|
| 277 |
+
[[34m2026-02-03 19:34:18[0m] (step=0025000) Train Loss: -3.6782, Train Steps/Sec: 0.53
|
| 278 |
+
25000
|
| 279 |
+
25000
|
| 280 |
+
25000
|
| 281 |
+
25000
|
| 282 |
+
[[34m2026-02-03 19:34:18[0m] Saved checkpoint to results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/checkpoints/0025000.pt
|
| 283 |
+
[[34m2026-02-03 19:34:56[0m] Beginning epoch 10...
|
| 284 |
+
[[34m2026-02-03 19:37:29[0m] (step=0025100) Train Loss: -3.6836, Train Steps/Sec: 0.52
|
| 285 |
+
[[34m2026-02-03 19:40:21[0m] Generating EMA samples...
|
| 286 |
+
[[34m2026-02-03 19:40:36[0m] (step=0025200) Train Loss: -3.6796, Train Steps/Sec: 0.53
|
| 287 |
+
[[34m2026-02-03 19:43:43[0m] (step=0025300) Train Loss: -3.6818, Train Steps/Sec: 0.53
|
| 288 |
+
[[34m2026-02-03 19:46:50[0m] (step=0025400) Train Loss: -3.6789, Train Steps/Sec: 0.54
|
| 289 |
+
[[34m2026-02-03 19:49:58[0m] (step=0025500) Train Loss: -3.6817, Train Steps/Sec: 0.53
|
| 290 |
+
[[34m2026-02-03 19:53:05[0m] (step=0025600) Train Loss: -3.6804, Train Steps/Sec: 0.53
|
| 291 |
+
[[34m2026-02-03 19:56:11[0m] (step=0025700) Train Loss: -3.6800, Train Steps/Sec: 0.54
|
| 292 |
+
[[34m2026-02-03 19:59:19[0m] (step=0025800) Train Loss: -3.6832, Train Steps/Sec: 0.53
|
| 293 |
+
[[34m2026-02-03 20:02:25[0m] (step=0025900) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 294 |
+
[[34m2026-02-03 20:05:32[0m] (step=0026000) Train Loss: -3.6812, Train Steps/Sec: 0.54
|
| 295 |
+
[[34m2026-02-03 20:08:39[0m] (step=0026100) Train Loss: -3.6827, Train Steps/Sec: 0.54
|
| 296 |
+
[[34m2026-02-03 20:11:47[0m] (step=0026200) Train Loss: -3.6793, Train Steps/Sec: 0.53
|
| 297 |
+
[[34m2026-02-03 20:14:54[0m] (step=0026300) Train Loss: -3.6817, Train Steps/Sec: 0.53
|
| 298 |
+
[[34m2026-02-03 20:18:01[0m] (step=0026400) Train Loss: -3.6813, Train Steps/Sec: 0.54
|
| 299 |
+
[[34m2026-02-03 20:21:07[0m] (step=0026500) Train Loss: -3.6806, Train Steps/Sec: 0.54
|
| 300 |
+
[[34m2026-02-03 20:24:14[0m] (step=0026600) Train Loss: -3.6842, Train Steps/Sec: 0.54
|
| 301 |
+
[[34m2026-02-03 20:27:20[0m] (step=0026700) Train Loss: -3.6809, Train Steps/Sec: 0.54
|
| 302 |
+
[[34m2026-02-03 20:30:27[0m] (step=0026800) Train Loss: -3.6849, Train Steps/Sec: 0.53
|
| 303 |
+
[[34m2026-02-03 20:33:34[0m] (step=0026900) Train Loss: -3.6802, Train Steps/Sec: 0.53
|
| 304 |
+
[[34m2026-02-03 20:36:39[0m] (step=0027000) Train Loss: -3.6792, Train Steps/Sec: 0.54
|
| 305 |
+
[[34m2026-02-03 20:39:46[0m] (step=0027100) Train Loss: -3.6843, Train Steps/Sec: 0.54
|
| 306 |
+
[[34m2026-02-03 20:42:52[0m] (step=0027200) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 307 |
+
[[34m2026-02-03 20:45:59[0m] (step=0027300) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 308 |
+
[[34m2026-02-03 20:49:06[0m] (step=0027400) Train Loss: -3.6775, Train Steps/Sec: 0.54
|
| 309 |
+
[[34m2026-02-03 20:52:12[0m] (step=0027500) Train Loss: -3.6800, Train Steps/Sec: 0.54
|
| 310 |
+
[[34m2026-02-03 20:52:54[0m] Beginning epoch 11...
|
| 311 |
+
[[34m2026-02-03 20:55:23[0m] (step=0027600) Train Loss: -3.6853, Train Steps/Sec: 0.53
|
| 312 |
+
[[34m2026-02-03 20:58:29[0m] (step=0027700) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 313 |
+
[[34m2026-02-03 21:01:37[0m] (step=0027800) Train Loss: -3.6811, Train Steps/Sec: 0.53
|
| 314 |
+
[[34m2026-02-03 21:04:43[0m] (step=0027900) Train Loss: -3.6810, Train Steps/Sec: 0.54
|
| 315 |
+
[[34m2026-02-03 21:07:50[0m] (step=0028000) Train Loss: -3.6827, Train Steps/Sec: 0.53
|
| 316 |
+
[[34m2026-02-03 21:10:57[0m] (step=0028100) Train Loss: -3.6839, Train Steps/Sec: 0.53
|
| 317 |
+
[[34m2026-02-03 21:14:04[0m] (step=0028200) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 318 |
+
[[34m2026-02-03 21:17:11[0m] (step=0028300) Train Loss: -3.6830, Train Steps/Sec: 0.53
|
| 319 |
+
[[34m2026-02-03 21:20:18[0m] (step=0028400) Train Loss: -3.6797, Train Steps/Sec: 0.53
|
| 320 |
+
[[34m2026-02-03 21:23:25[0m] (step=0028500) Train Loss: -3.6797, Train Steps/Sec: 0.53
|
| 321 |
+
[[34m2026-02-03 21:26:32[0m] (step=0028600) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 322 |
+
[[34m2026-02-03 21:29:39[0m] (step=0028700) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 323 |
+
[[34m2026-02-03 21:32:45[0m] (step=0028800) Train Loss: -3.6812, Train Steps/Sec: 0.54
|
| 324 |
+
[[34m2026-02-03 21:35:53[0m] (step=0028900) Train Loss: -3.6858, Train Steps/Sec: 0.53
|
| 325 |
+
[[34m2026-02-03 21:38:59[0m] (step=0029000) Train Loss: -3.6842, Train Steps/Sec: 0.54
|
| 326 |
+
[[34m2026-02-03 21:42:06[0m] (step=0029100) Train Loss: -3.6836, Train Steps/Sec: 0.54
|
| 327 |
+
[[34m2026-02-03 21:45:14[0m] (step=0029200) Train Loss: -3.6813, Train Steps/Sec: 0.53
|
| 328 |
+
[[34m2026-02-03 21:48:20[0m] (step=0029300) Train Loss: -3.6783, Train Steps/Sec: 0.54
|
| 329 |
+
[[34m2026-02-03 21:51:27[0m] (step=0029400) Train Loss: -3.6829, Train Steps/Sec: 0.53
|
| 330 |
+
[[34m2026-02-03 21:54:34[0m] (step=0029500) Train Loss: -3.6812, Train Steps/Sec: 0.54
|
| 331 |
+
[[34m2026-02-03 21:57:39[0m] (step=0029600) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 332 |
+
[[34m2026-02-03 22:00:46[0m] (step=0029700) Train Loss: -3.6828, Train Steps/Sec: 0.53
|
| 333 |
+
[[34m2026-02-03 22:03:53[0m] (step=0029800) Train Loss: -3.6826, Train Steps/Sec: 0.54
|
| 334 |
+
[[34m2026-02-03 22:06:59[0m] (step=0029900) Train Loss: -3.6814, Train Steps/Sec: 0.54
|
| 335 |
+
[[34m2026-02-03 22:10:06[0m] (step=0030000) Train Loss: -3.6837, Train Steps/Sec: 0.54
|
| 336 |
+
[[34m2026-02-03 22:10:51[0m] Beginning epoch 12...
|
| 337 |
+
[[34m2026-02-03 22:13:16[0m] (step=0030100) Train Loss: -3.6822, Train Steps/Sec: 0.53
|
| 338 |
+
[[34m2026-02-03 22:16:22[0m] (step=0030200) Train Loss: -3.6787, Train Steps/Sec: 0.54
|
| 339 |
+
[[34m2026-02-03 22:19:29[0m] (step=0030300) Train Loss: -3.6815, Train Steps/Sec: 0.53
|
| 340 |
+
[[34m2026-02-03 22:22:37[0m] (step=0030400) Train Loss: -3.6806, Train Steps/Sec: 0.53
|
| 341 |
+
[[34m2026-02-03 22:25:44[0m] (step=0030500) Train Loss: -3.6825, Train Steps/Sec: 0.53
|
| 342 |
+
[[34m2026-02-03 22:28:51[0m] (step=0030600) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 343 |
+
[[34m2026-02-03 22:31:58[0m] (step=0030700) Train Loss: -3.6838, Train Steps/Sec: 0.54
|
| 344 |
+
[[34m2026-02-03 22:35:05[0m] (step=0030800) Train Loss: -3.6822, Train Steps/Sec: 0.53
|
| 345 |
+
[[34m2026-02-03 22:38:11[0m] (step=0030900) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 346 |
+
[[34m2026-02-03 22:41:18[0m] (step=0031000) Train Loss: -3.6815, Train Steps/Sec: 0.54
|
| 347 |
+
[[34m2026-02-03 22:44:25[0m] (step=0031100) Train Loss: -3.6796, Train Steps/Sec: 0.53
|
| 348 |
+
[[34m2026-02-03 22:47:32[0m] (step=0031200) Train Loss: -3.6812, Train Steps/Sec: 0.53
|
| 349 |
+
[[34m2026-02-03 22:50:39[0m] (step=0031300) Train Loss: -3.6806, Train Steps/Sec: 0.53
|
| 350 |
+
[[34m2026-02-03 22:53:46[0m] (step=0031400) Train Loss: -3.6822, Train Steps/Sec: 0.53
|
| 351 |
+
[[34m2026-02-03 22:56:53[0m] (step=0031500) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 352 |
+
[[34m2026-02-03 23:00:00[0m] (step=0031600) Train Loss: -3.6803, Train Steps/Sec: 0.53
|
| 353 |
+
[[34m2026-02-03 23:03:07[0m] (step=0031700) Train Loss: -3.6843, Train Steps/Sec: 0.53
|
| 354 |
+
[[34m2026-02-03 23:06:14[0m] (step=0031800) Train Loss: -3.6832, Train Steps/Sec: 0.53
|
| 355 |
+
[[34m2026-02-03 23:09:21[0m] (step=0031900) Train Loss: -3.6809, Train Steps/Sec: 0.54
|
| 356 |
+
[[34m2026-02-03 23:12:28[0m] (step=0032000) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 357 |
+
[[34m2026-02-03 23:15:34[0m] (step=0032100) Train Loss: -3.6786, Train Steps/Sec: 0.54
|
| 358 |
+
[[34m2026-02-03 23:18:39[0m] (step=0032200) Train Loss: -3.6814, Train Steps/Sec: 0.54
|
| 359 |
+
[[34m2026-02-03 23:21:46[0m] (step=0032300) Train Loss: -3.6839, Train Steps/Sec: 0.54
|
| 360 |
+
[[34m2026-02-03 23:24:52[0m] (step=0032400) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 361 |
+
[[34m2026-02-03 23:27:59[0m] (step=0032500) Train Loss: -3.6809, Train Steps/Sec: 0.53
|
| 362 |
+
[[34m2026-02-03 23:28:48[0m] Beginning epoch 13...
|
| 363 |
+
[[34m2026-02-03 23:31:09[0m] (step=0032600) Train Loss: -3.6846, Train Steps/Sec: 0.53
|
| 364 |
+
[[34m2026-02-03 23:34:16[0m] (step=0032700) Train Loss: -3.6841, Train Steps/Sec: 0.53
|
| 365 |
+
[[34m2026-02-03 23:37:24[0m] (step=0032800) Train Loss: -3.6813, Train Steps/Sec: 0.53
|
| 366 |
+
[[34m2026-02-03 23:40:31[0m] (step=0032900) Train Loss: -3.6792, Train Steps/Sec: 0.53
|
| 367 |
+
[[34m2026-02-03 23:43:38[0m] (step=0033000) Train Loss: -3.6782, Train Steps/Sec: 0.53
|
| 368 |
+
[[34m2026-02-03 23:46:45[0m] (step=0033100) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 369 |
+
[[34m2026-02-03 23:49:52[0m] (step=0033200) Train Loss: -3.6819, Train Steps/Sec: 0.53
|
| 370 |
+
[[34m2026-02-03 23:52:59[0m] (step=0033300) Train Loss: -3.6793, Train Steps/Sec: 0.54
|
| 371 |
+
[[34m2026-02-03 23:56:06[0m] (step=0033400) Train Loss: -3.6810, Train Steps/Sec: 0.54
|
| 372 |
+
[[34m2026-02-03 23:59:13[0m] (step=0033500) Train Loss: -3.6816, Train Steps/Sec: 0.53
|
| 373 |
+
[[34m2026-02-04 00:02:20[0m] (step=0033600) Train Loss: -3.6831, Train Steps/Sec: 0.54
|
| 374 |
+
[[34m2026-02-04 00:05:26[0m] (step=0033700) Train Loss: -3.6831, Train Steps/Sec: 0.54
|
| 375 |
+
[[34m2026-02-04 00:08:33[0m] (step=0033800) Train Loss: -3.6826, Train Steps/Sec: 0.54
|
| 376 |
+
[[34m2026-02-04 00:11:40[0m] (step=0033900) Train Loss: -3.6804, Train Steps/Sec: 0.54
|
| 377 |
+
[[34m2026-02-04 00:14:46[0m] (step=0034000) Train Loss: -3.6789, Train Steps/Sec: 0.54
|
| 378 |
+
[[34m2026-02-04 00:17:54[0m] (step=0034100) Train Loss: -3.6814, Train Steps/Sec: 0.53
|
| 379 |
+
[[34m2026-02-04 00:21:00[0m] (step=0034200) Train Loss: -3.6805, Train Steps/Sec: 0.54
|
| 380 |
+
[[34m2026-02-04 00:24:07[0m] (step=0034300) Train Loss: -3.6837, Train Steps/Sec: 0.53
|
| 381 |
+
[[34m2026-02-04 00:27:14[0m] (step=0034400) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 382 |
+
[[34m2026-02-04 00:30:20[0m] (step=0034500) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 383 |
+
[[34m2026-02-04 00:33:27[0m] (step=0034600) Train Loss: -3.6821, Train Steps/Sec: 0.54
|
| 384 |
+
[[34m2026-02-04 00:36:34[0m] (step=0034700) Train Loss: -3.6799, Train Steps/Sec: 0.54
|
| 385 |
+
[[34m2026-02-04 00:39:38[0m] (step=0034800) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 386 |
+
[[34m2026-02-04 00:42:45[0m] (step=0034900) Train Loss: -3.6820, Train Steps/Sec: 0.54
|
| 387 |
+
[[34m2026-02-04 00:45:52[0m] (step=0035000) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 388 |
+
[[34m2026-02-04 00:46:45[0m] Beginning epoch 14...
|
| 389 |
+
[[34m2026-02-04 00:49:01[0m] (step=0035100) Train Loss: -3.6794, Train Steps/Sec: 0.53
|
| 390 |
+
[[34m2026-02-04 00:52:08[0m] (step=0035200) Train Loss: -3.6804, Train Steps/Sec: 0.54
|
| 391 |
+
[[34m2026-02-04 00:55:15[0m] (step=0035300) Train Loss: -3.6825, Train Steps/Sec: 0.53
|
| 392 |
+
[[34m2026-02-04 00:58:22[0m] (step=0035400) Train Loss: -3.6817, Train Steps/Sec: 0.53
|
| 393 |
+
[[34m2026-02-04 01:01:29[0m] (step=0035500) Train Loss: -3.6840, Train Steps/Sec: 0.54
|
| 394 |
+
[[34m2026-02-04 01:04:35[0m] (step=0035600) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 395 |
+
[[34m2026-02-04 01:07:42[0m] (step=0035700) Train Loss: -3.6796, Train Steps/Sec: 0.53
|
| 396 |
+
[[34m2026-02-04 01:10:50[0m] (step=0035800) Train Loss: -3.6834, Train Steps/Sec: 0.53
|
| 397 |
+
[[34m2026-02-04 01:13:56[0m] (step=0035900) Train Loss: -3.6763, Train Steps/Sec: 0.54
|
| 398 |
+
[[34m2026-02-04 01:17:03[0m] (step=0036000) Train Loss: -3.6837, Train Steps/Sec: 0.53
|
| 399 |
+
[[34m2026-02-04 01:20:10[0m] (step=0036100) Train Loss: -3.6806, Train Steps/Sec: 0.53
|
| 400 |
+
[[34m2026-02-04 01:23:18[0m] (step=0036200) Train Loss: -3.6821, Train Steps/Sec: 0.53
|
| 401 |
+
[[34m2026-02-04 01:26:24[0m] (step=0036300) Train Loss: -3.6772, Train Steps/Sec: 0.54
|
| 402 |
+
[[34m2026-02-04 01:29:31[0m] (step=0036400) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 403 |
+
[[34m2026-02-04 01:32:38[0m] (step=0036500) Train Loss: -3.6816, Train Steps/Sec: 0.54
|
| 404 |
+
[[34m2026-02-04 01:35:45[0m] (step=0036600) Train Loss: -3.6792, Train Steps/Sec: 0.53
|
| 405 |
+
[[34m2026-02-04 01:38:51[0m] (step=0036700) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 406 |
+
[[34m2026-02-04 01:41:59[0m] (step=0036800) Train Loss: -3.6835, Train Steps/Sec: 0.53
|
| 407 |
+
[[34m2026-02-04 01:45:05[0m] (step=0036900) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 408 |
+
[[34m2026-02-04 01:48:12[0m] (step=0037000) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 409 |
+
[[34m2026-02-04 01:51:18[0m] (step=0037100) Train Loss: -3.6775, Train Steps/Sec: 0.54
|
| 410 |
+
[[34m2026-02-04 01:54:25[0m] (step=0037200) Train Loss: -3.6796, Train Steps/Sec: 0.54
|
| 411 |
+
[[34m2026-02-04 01:57:31[0m] (step=0037300) Train Loss: -3.6806, Train Steps/Sec: 0.54
|
| 412 |
+
[[34m2026-02-04 02:00:38[0m] (step=0037400) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 413 |
+
[[34m2026-02-04 02:03:43[0m] (step=0037500) Train Loss: -3.6808, Train Steps/Sec: 0.54
|
| 414 |
+
[[34m2026-02-04 02:04:39[0m] Beginning epoch 15...
|
| 415 |
+
[[34m2026-02-04 02:06:52[0m] (step=0037600) Train Loss: -3.6847, Train Steps/Sec: 0.53
|
| 416 |
+
[[34m2026-02-04 02:10:00[0m] (step=0037700) Train Loss: -3.6837, Train Steps/Sec: 0.53
|
| 417 |
+
[[34m2026-02-04 02:13:06[0m] (step=0037800) Train Loss: -3.6796, Train Steps/Sec: 0.54
|
| 418 |
+
[[34m2026-02-04 02:16:13[0m] (step=0037900) Train Loss: -3.6804, Train Steps/Sec: 0.54
|
| 419 |
+
[[34m2026-02-04 02:19:20[0m] (step=0038000) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 420 |
+
[[34m2026-02-04 02:22:26[0m] (step=0038100) Train Loss: -3.6803, Train Steps/Sec: 0.54
|
| 421 |
+
[[34m2026-02-04 02:25:33[0m] (step=0038200) Train Loss: -3.6813, Train Steps/Sec: 0.54
|
| 422 |
+
[[34m2026-02-04 02:28:40[0m] (step=0038300) Train Loss: -3.6798, Train Steps/Sec: 0.53
|
| 423 |
+
[[34m2026-02-04 02:31:47[0m] (step=0038400) Train Loss: -3.6797, Train Steps/Sec: 0.53
|
| 424 |
+
[[34m2026-02-04 02:34:54[0m] (step=0038500) Train Loss: -3.6817, Train Steps/Sec: 0.54
|
| 425 |
+
[[34m2026-02-04 02:38:01[0m] (step=0038600) Train Loss: -3.6818, Train Steps/Sec: 0.54
|
| 426 |
+
[[34m2026-02-04 02:41:08[0m] (step=0038700) Train Loss: -3.6824, Train Steps/Sec: 0.54
|
| 427 |
+
[[34m2026-02-04 02:44:14[0m] (step=0038800) Train Loss: -3.6800, Train Steps/Sec: 0.54
|
| 428 |
+
[[34m2026-02-04 02:47:22[0m] (step=0038900) Train Loss: -3.6812, Train Steps/Sec: 0.53
|
| 429 |
+
[[34m2026-02-04 02:50:28[0m] (step=0039000) Train Loss: -3.6826, Train Steps/Sec: 0.54
|
| 430 |
+
[[34m2026-02-04 02:53:35[0m] (step=0039100) Train Loss: -3.6807, Train Steps/Sec: 0.53
|
| 431 |
+
[[34m2026-02-04 02:56:42[0m] (step=0039200) Train Loss: -3.6831, Train Steps/Sec: 0.54
|
| 432 |
+
[[34m2026-02-04 02:59:48[0m] (step=0039300) Train Loss: -3.6822, Train Steps/Sec: 0.54
|
| 433 |
+
[[34m2026-02-04 03:02:55[0m] (step=0039400) Train Loss: -3.6803, Train Steps/Sec: 0.54
|
| 434 |
+
[[34m2026-02-04 03:06:01[0m] (step=0039500) Train Loss: -3.6815, Train Steps/Sec: 0.54
|
| 435 |
+
[[34m2026-02-04 03:09:08[0m] (step=0039600) Train Loss: -3.6830, Train Steps/Sec: 0.53
|
| 436 |
+
[[34m2026-02-04 03:12:15[0m] (step=0039700) Train Loss: -3.6771, Train Steps/Sec: 0.54
|
| 437 |
+
[[34m2026-02-04 03:15:21[0m] (step=0039800) Train Loss: -3.6791, Train Steps/Sec: 0.54
|
| 438 |
+
[[34m2026-02-04 03:18:28[0m] (step=0039900) Train Loss: -3.6797, Train Steps/Sec: 0.54
|
| 439 |
+
[[34m2026-02-04 03:21:34[0m] (step=0040000) Train Loss: -3.6815, Train Steps/Sec: 0.54
|
| 440 |
+
[[34m2026-02-04 03:22:33[0m] Beginning epoch 16...
|
| 441 |
+
[[34m2026-02-04 03:24:43[0m] (step=0040100) Train Loss: -3.6799, Train Steps/Sec: 0.53
|
| 442 |
+
[[34m2026-02-04 03:27:50[0m] (step=0040200) Train Loss: -3.6823, Train Steps/Sec: 0.53
|
| 443 |
+
[[34m2026-02-04 03:30:57[0m] (step=0040300) Train Loss: -3.6805, Train Steps/Sec: 0.53
|
| 444 |
+
[[34m2026-02-04 03:34:04[0m] (step=0040400) Train Loss: -3.6829, Train Steps/Sec: 0.54
|
| 445 |
+
[[34m2026-02-04 03:37:11[0m] (step=0040500) Train Loss: -3.6786, Train Steps/Sec: 0.53
|
| 446 |
+
[[34m2026-02-04 03:40:18[0m] (step=0040600) Train Loss: -3.6811, Train Steps/Sec: 0.54
|
| 447 |
+
[[34m2026-02-04 03:43:24[0m] (step=0040700) Train Loss: -3.6804, Train Steps/Sec: 0.54
|
| 448 |
+
[[34m2026-02-04 03:46:32[0m] (step=0040800) Train Loss: -3.6860, Train Steps/Sec: 0.53
|
| 449 |
+
[[34m2026-02-04 03:49:38[0m] (step=0040900) Train Loss: -3.6804, Train Steps/Sec: 0.54
|
| 450 |
+
[[34m2026-02-04 03:52:44[0m] (step=0041000) Train Loss: -3.6803, Train Steps/Sec: 0.54
|
| 451 |
+
[[34m2026-02-04 03:55:52[0m] (step=0041100) Train Loss: -3.6803, Train Steps/Sec: 0.53
|
| 452 |
+
[[34m2026-02-04 03:58:59[0m] (step=0041200) Train Loss: -3.6801, Train Steps/Sec: 0.53
|
| 453 |
+
[[34m2026-02-04 04:02:06[0m] (step=0041300) Train Loss: -3.6794, Train Steps/Sec: 0.53
|
| 454 |
+
[[34m2026-02-04 04:05:14[0m] (step=0041400) Train Loss: -3.6816, Train Steps/Sec: 0.53
|
| 455 |
+
[[34m2026-02-04 04:08:20[0m] (step=0041500) Train Loss: -3.6858, Train Steps/Sec: 0.54
|
| 456 |
+
[[34m2026-02-04 04:11:27[0m] (step=0041600) Train Loss: -3.6811, Train Steps/Sec: 0.53
|
| 457 |
+
[[34m2026-02-04 04:14:34[0m] (step=0041700) Train Loss: -3.6859, Train Steps/Sec: 0.53
|
| 458 |
+
[[34m2026-02-04 04:17:41[0m] (step=0041800) Train Loss: -3.6823, Train Steps/Sec: 0.54
|
| 459 |
+
[[34m2026-02-04 04:20:47[0m] (step=0041900) Train Loss: -3.6838, Train Steps/Sec: 0.54
|
| 460 |
+
[[34m2026-02-04 04:23:54[0m] (step=0042000) Train Loss: -3.6809, Train Steps/Sec: 0.54
|
| 461 |
+
[[34m2026-02-04 04:27:00[0m] (step=0042100) Train Loss: -3.6781, Train Steps/Sec: 0.54
|
| 462 |
+
[[34m2026-02-04 04:30:07[0m] (step=0042200) Train Loss: -3.6826, Train Steps/Sec: 0.54
|
| 463 |
+
[[34m2026-02-04 04:33:13[0m] (step=0042300) Train Loss: -3.6835, Train Steps/Sec: 0.54
|
| 464 |
+
[[34m2026-02-04 04:36:20[0m] (step=0042400) Train Loss: -3.6816, Train Steps/Sec: 0.54
|
| 465 |
+
[[34m2026-02-04 04:39:27[0m] (step=0042500) Train Loss: -3.6802, Train Steps/Sec: 0.53
|
| 466 |
+
[[34m2026-02-04 04:40:31[0m] Beginning epoch 17...
|
| 467 |
+
[[34m2026-02-04 04:42:37[0m] (step=0042600) Train Loss: -3.6831, Train Steps/Sec: 0.53
|
| 468 |
+
[[34m2026-02-04 04:45:42[0m] (step=0042700) Train Loss: -3.6778, Train Steps/Sec: 0.54
|
| 469 |
+
[[34m2026-02-04 04:48:48[0m] (step=0042800) Train Loss: -3.6846, Train Steps/Sec: 0.54
|
| 470 |
+
[[34m2026-02-04 04:51:55[0m] (step=0042900) Train Loss: -3.6827, Train Steps/Sec: 0.53
|
| 471 |
+
[[34m2026-02-04 04:55:02[0m] (step=0043000) Train Loss: -3.6820, Train Steps/Sec: 0.54
|
| 472 |
+
[[34m2026-02-04 04:58:08[0m] (step=0043100) Train Loss: -3.6803, Train Steps/Sec: 0.54
|
| 473 |
+
[[34m2026-02-04 05:01:15[0m] (step=0043200) Train Loss: -3.6808, Train Steps/Sec: 0.54
|
| 474 |
+
[[34m2026-02-04 05:04:22[0m] (step=0043300) Train Loss: -3.6838, Train Steps/Sec: 0.53
|
| 475 |
+
[[34m2026-02-04 05:07:29[0m] (step=0043400) Train Loss: -3.6809, Train Steps/Sec: 0.54
|
| 476 |
+
[[34m2026-02-04 05:10:36[0m] (step=0043500) Train Loss: -3.6757, Train Steps/Sec: 0.53
|
| 477 |
+
[[34m2026-02-04 05:13:43[0m] (step=0043600) Train Loss: -3.6808, Train Steps/Sec: 0.54
|
| 478 |
+
[[34m2026-02-04 05:16:50[0m] (step=0043700) Train Loss: -3.6807, Train Steps/Sec: 0.54
|
| 479 |
+
[[34m2026-02-04 05:19:56[0m] (step=0043800) Train Loss: -3.6825, Train Steps/Sec: 0.54
|
| 480 |
+
[[34m2026-02-04 05:23:03[0m] (step=0043900) Train Loss: -3.6811, Train Steps/Sec: 0.53
|
| 481 |
+
[[34m2026-02-04 05:26:10[0m] (step=0044000) Train Loss: -3.6819, Train Steps/Sec: 0.54
|
| 482 |
+
[[34m2026-02-04 05:29:17[0m] (step=0044100) Train Loss: -3.6801, Train Steps/Sec: 0.54
|
| 483 |
+
[[34m2026-02-04 05:32:24[0m] (step=0044200) Train Loss: -3.6785, Train Steps/Sec: 0.54
|
| 484 |
+
[[34m2026-02-04 05:35:31[0m] (step=0044300) Train Loss: -3.6841, Train Steps/Sec: 0.53
|
| 485 |
+
[[34m2026-02-04 05:38:38[0m] (step=0044400) Train Loss: -3.6841, Train Steps/Sec: 0.53
|
| 486 |
+
[[34m2026-02-04 05:41:01[0m] (step=0044500) Train Loss: -3.6791, Train Steps/Sec: 0.70
|
| 487 |
+
[[34m2026-02-04 05:42:24[0m] (step=0044600) Train Loss: -3.6843, Train Steps/Sec: 1.20
|
| 488 |
+
[[34m2026-02-04 05:43:47[0m] (step=0044700) Train Loss: -3.6815, Train Steps/Sec: 1.21
|
| 489 |
+
[[34m2026-02-04 05:45:10[0m] (step=0044800) Train Loss: -3.6785, Train Steps/Sec: 1.21
|
| 490 |
+
[[34m2026-02-04 05:46:33[0m] (step=0044900) Train Loss: -3.6820, Train Steps/Sec: 1.21
|
| 491 |
+
[[34m2026-02-04 05:47:56[0m] (step=0045000) Train Loss: -3.6847, Train Steps/Sec: 1.20
|
| 492 |
+
[[34m2026-02-04 05:48:26[0m] Beginning epoch 18...
|
| 493 |
+
[[34m2026-02-04 05:49:22[0m] (step=0045100) Train Loss: -3.6816, Train Steps/Sec: 1.16
|
| 494 |
+
[[34m2026-02-04 05:50:45[0m] (step=0045200) Train Loss: -3.6834, Train Steps/Sec: 1.20
|
| 495 |
+
[[34m2026-02-04 05:52:08[0m] (step=0045300) Train Loss: -3.6787, Train Steps/Sec: 1.21
|
| 496 |
+
[[34m2026-02-04 05:53:31[0m] (step=0045400) Train Loss: -3.6844, Train Steps/Sec: 1.20
|
| 497 |
+
[[34m2026-02-04 05:54:54[0m] (step=0045500) Train Loss: -3.6823, Train Steps/Sec: 1.20
|
| 498 |
+
[[34m2026-02-04 05:56:17[0m] (step=0045600) Train Loss: -3.6806, Train Steps/Sec: 1.20
|
| 499 |
+
[[34m2026-02-04 05:57:40[0m] (step=0045700) Train Loss: -3.6797, Train Steps/Sec: 1.21
|
| 500 |
+
[[34m2026-02-04 05:59:03[0m] (step=0045800) Train Loss: -3.6819, Train Steps/Sec: 1.20
|
| 501 |
+
[[34m2026-02-04 06:00:26[0m] (step=0045900) Train Loss: -3.6807, Train Steps/Sec: 1.20
|
| 502 |
+
[[34m2026-02-04 06:01:49[0m] (step=0046000) Train Loss: -3.6814, Train Steps/Sec: 1.21
|
| 503 |
+
[[34m2026-02-04 06:03:12[0m] (step=0046100) Train Loss: -3.6827, Train Steps/Sec: 1.21
|
| 504 |
+
[[34m2026-02-04 06:04:35[0m] (step=0046200) Train Loss: -3.6824, Train Steps/Sec: 1.20
|
| 505 |
+
[[34m2026-02-04 06:05:58[0m] (step=0046300) Train Loss: -3.6825, Train Steps/Sec: 1.20
|
| 506 |
+
[[34m2026-02-04 06:07:21[0m] (step=0046400) Train Loss: -3.6826, Train Steps/Sec: 1.20
|
| 507 |
+
[[34m2026-02-04 06:08:44[0m] (step=0046500) Train Loss: -3.6778, Train Steps/Sec: 1.20
|
| 508 |
+
[[34m2026-02-04 06:10:07[0m] (step=0046600) Train Loss: -3.6820, Train Steps/Sec: 1.20
|
| 509 |
+
[[34m2026-02-04 06:11:30[0m] (step=0046700) Train Loss: -3.6830, Train Steps/Sec: 1.21
|
| 510 |
+
[[34m2026-02-04 06:12:53[0m] (step=0046800) Train Loss: -3.6808, Train Steps/Sec: 1.20
|
| 511 |
+
[[34m2026-02-04 06:14:16[0m] (step=0046900) Train Loss: -3.6812, Train Steps/Sec: 1.20
|
| 512 |
+
[[34m2026-02-04 06:15:39[0m] (step=0047000) Train Loss: -3.6836, Train Steps/Sec: 1.20
|
| 513 |
+
[[34m2026-02-04 06:17:02[0m] (step=0047100) Train Loss: -3.6806, Train Steps/Sec: 1.20
|
| 514 |
+
[[34m2026-02-04 06:18:25[0m] (step=0047200) Train Loss: -3.6813, Train Steps/Sec: 1.20
|
| 515 |
+
[[34m2026-02-04 06:19:48[0m] (step=0047300) Train Loss: -3.6828, Train Steps/Sec: 1.20
|
| 516 |
+
[[34m2026-02-04 06:21:11[0m] (step=0047400) Train Loss: -3.6842, Train Steps/Sec: 1.21
|
| 517 |
+
m] (step=0048000) Train Loss: -2.9915, Train Steps/Sec: 1.02
|
| 518 |
+
[[34m2026-02-03 20:03:00[0m] (step=0048100) Train Loss: -2.9858, Train Steps/Sec: 1.02
|
| 519 |
+
[[34m2026-02-03 20:04:38[0m] (step=0048200) Train Loss: -2.9865, Train Steps/Sec: 1.02
|
| 520 |
+
[[34m2026-02-03 20:06:16[0m] (step=0048300) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 521 |
+
[[34m2026-02-03 20:07:54[0m] (step=0048400) Train Loss: -2.9904, Train Steps/Sec: 1.02
|
| 522 |
+
[[34m2026-02-03 20:09:32[0m] (step=0048500) Train Loss: -2.9878, Train Steps/Sec: 1.02
|
| 523 |
+
[[34m2026-02-03 20:11:10[0m] (step=0048600) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 524 |
+
[[34m2026-02-03 20:12:48[0m] (step=0048700) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 525 |
+
[[34m2026-02-03 20:14:26[0m] (step=0048800) Train Loss: -2.9853, Train Steps/Sec: 1.02
|
| 526 |
+
[[34m2026-02-03 20:16:04[0m] (step=0048900) Train Loss: -2.9901, Train Steps/Sec: 1.02
|
| 527 |
+
[[34m2026-02-03 20:17:41[0m] (step=0049000) Train Loss: -2.9853, Train Steps/Sec: 1.02
|
| 528 |
+
[[34m2026-02-03 20:19:19[0m] (step=0049100) Train Loss: -2.9849, Train Steps/Sec: 1.02
|
| 529 |
+
[[34m2026-02-03 20:20:57[0m] (step=0049200) Train Loss: -2.9873, Train Steps/Sec: 1.02
|
| 530 |
+
[[34m2026-02-03 20:22:35[0m] (step=0049300) Train Loss: -2.9865, Train Steps/Sec: 1.02
|
| 531 |
+
[[34m2026-02-03 20:24:12[0m] (step=0049400) Train Loss: -2.9888, Train Steps/Sec: 1.03
|
| 532 |
+
[[34m2026-02-03 20:25:51[0m] (step=0049500) Train Loss: -2.9911, Train Steps/Sec: 1.02
|
| 533 |
+
[[34m2026-02-03 20:27:29[0m] (step=0049600) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 534 |
+
[[34m2026-02-03 20:29:07[0m] (step=0049700) Train Loss: -2.9921, Train Steps/Sec: 1.02
|
| 535 |
+
[[34m2026-02-03 20:30:45[0m] (step=0049800) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 536 |
+
[[34m2026-02-03 20:32:23[0m] (step=0049900) Train Loss: -2.9805, Train Steps/Sec: 1.02
|
| 537 |
+
[[34m2026-02-03 20:34:01[0m] (step=0050000) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 538 |
+
50000
|
| 539 |
+
50000
|
| 540 |
+
50000
|
| 541 |
+
50000
|
| 542 |
+
[[34m2026-02-03 20:34:02[0m] Saved checkpoint to results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None/checkpoints/0050000.pt
|
| 543 |
+
[[34m2026-02-03 20:34:41[0m] Beginning epoch 10...
|
| 544 |
+
[[34m2026-02-03 20:35:42[0m] (step=0050100) Train Loss: -2.9896, Train Steps/Sec: 0.99
|
| 545 |
+
[[34m2026-02-03 20:37:20[0m] (step=0050200) Train Loss: -2.9903, Train Steps/Sec: 1.02
|
| 546 |
+
[[34m2026-02-03 20:38:58[0m] (step=0050300) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 547 |
+
[[34m2026-02-03 20:40:20[0m] Generating EMA samples...
|
| 548 |
+
[[34m2026-02-03 20:40:35[0m] (step=0050400) Train Loss: -2.9846, Train Steps/Sec: 1.03
|
| 549 |
+
[[34m2026-02-03 20:42:13[0m] (step=0050500) Train Loss: -2.9897, Train Steps/Sec: 1.02
|
| 550 |
+
[[34m2026-02-03 20:43:51[0m] (step=0050600) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 551 |
+
[[34m2026-02-03 20:45:29[0m] (step=0050700) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 552 |
+
[[34m2026-02-03 20:47:07[0m] (step=0050800) Train Loss: -2.9852, Train Steps/Sec: 1.02
|
| 553 |
+
[[34m2026-02-03 20:48:44[0m] (step=0050900) Train Loss: -2.9878, Train Steps/Sec: 1.03
|
| 554 |
+
[[34m2026-02-03 20:50:21[0m] (step=0051000) Train Loss: -2.9897, Train Steps/Sec: 1.03
|
| 555 |
+
[[34m2026-02-03 20:52:00[0m] (step=0051100) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 556 |
+
[[34m2026-02-03 20:53:34[0m] (step=0051200) Train Loss: -2.9882, Train Steps/Sec: 1.06
|
| 557 |
+
[[34m2026-02-03 20:55:12[0m] (step=0051300) Train Loss: -2.9856, Train Steps/Sec: 1.02
|
| 558 |
+
[[34m2026-02-03 20:56:50[0m] (step=0051400) Train Loss: -2.9870, Train Steps/Sec: 1.02
|
| 559 |
+
[[34m2026-02-03 20:58:28[0m] (step=0051500) Train Loss: -2.9897, Train Steps/Sec: 1.02
|
| 560 |
+
[[34m2026-02-03 21:00:06[0m] (step=0051600) Train Loss: -2.9896, Train Steps/Sec: 1.02
|
| 561 |
+
[[34m2026-02-03 21:01:44[0m] (step=0051700) Train Loss: -2.9903, Train Steps/Sec: 1.02
|
| 562 |
+
[[34m2026-02-03 21:03:22[0m] (step=0051800) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 563 |
+
[[34m2026-02-03 21:04:59[0m] (step=0051900) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 564 |
+
[[34m2026-02-03 21:06:38[0m] (step=0052000) Train Loss: -2.9898, Train Steps/Sec: 1.02
|
| 565 |
+
[[34m2026-02-03 21:08:16[0m] (step=0052100) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 566 |
+
[[34m2026-02-03 21:09:54[0m] (step=0052200) Train Loss: -2.9888, Train Steps/Sec: 1.02
|
| 567 |
+
[[34m2026-02-03 21:11:31[0m] (step=0052300) Train Loss: -2.9868, Train Steps/Sec: 1.03
|
| 568 |
+
[[34m2026-02-03 21:13:09[0m] (step=0052400) Train Loss: -2.9857, Train Steps/Sec: 1.02
|
| 569 |
+
[[34m2026-02-03 21:14:47[0m] (step=0052500) Train Loss: -2.9898, Train Steps/Sec: 1.03
|
| 570 |
+
[[34m2026-02-03 21:16:25[0m] (step=0052600) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 571 |
+
[[34m2026-02-03 21:18:03[0m] (step=0052700) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 572 |
+
[[34m2026-02-03 21:19:40[0m] (step=0052800) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 573 |
+
[[34m2026-02-03 21:21:18[0m] (step=0052900) Train Loss: -2.9866, Train Steps/Sec: 1.02
|
| 574 |
+
[[34m2026-02-03 21:22:56[0m] (step=0053000) Train Loss: -2.9871, Train Steps/Sec: 1.02
|
| 575 |
+
[[34m2026-02-03 21:24:34[0m] (step=0053100) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 576 |
+
[[34m2026-02-03 21:26:12[0m] (step=0053200) Train Loss: -2.9921, Train Steps/Sec: 1.02
|
| 577 |
+
[[34m2026-02-03 21:27:49[0m] (step=0053300) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 578 |
+
[[34m2026-02-03 21:29:27[0m] (step=0053400) Train Loss: -2.9859, Train Steps/Sec: 1.02
|
| 579 |
+
[[34m2026-02-03 21:31:05[0m] (step=0053500) Train Loss: -2.9909, Train Steps/Sec: 1.02
|
| 580 |
+
[[34m2026-02-03 21:32:43[0m] (step=0053600) Train Loss: -2.9928, Train Steps/Sec: 1.02
|
| 581 |
+
[[34m2026-02-03 21:34:21[0m] (step=0053700) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 582 |
+
[[34m2026-02-03 21:35:59[0m] (step=0053800) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 583 |
+
[[34m2026-02-03 21:37:37[0m] (step=0053900) Train Loss: -2.9883, Train Steps/Sec: 1.02
|
| 584 |
+
[[34m2026-02-03 21:39:15[0m] (step=0054000) Train Loss: -2.9844, Train Steps/Sec: 1.02
|
| 585 |
+
[[34m2026-02-03 21:40:53[0m] (step=0054100) Train Loss: -2.9903, Train Steps/Sec: 1.02
|
| 586 |
+
[[34m2026-02-03 21:42:31[0m] (step=0054200) Train Loss: -2.9911, Train Steps/Sec: 1.02
|
| 587 |
+
[[34m2026-02-03 21:44:09[0m] (step=0054300) Train Loss: -2.9915, Train Steps/Sec: 1.02
|
| 588 |
+
[[34m2026-02-03 21:45:47[0m] (step=0054400) Train Loss: -2.9865, Train Steps/Sec: 1.02
|
| 589 |
+
[[34m2026-02-03 21:47:24[0m] (step=0054500) Train Loss: -2.9854, Train Steps/Sec: 1.03
|
| 590 |
+
[[34m2026-02-03 21:49:02[0m] (step=0054600) Train Loss: -2.9923, Train Steps/Sec: 1.02
|
| 591 |
+
[[34m2026-02-03 21:50:39[0m] (step=0054700) Train Loss: -2.9864, Train Steps/Sec: 1.03
|
| 592 |
+
[[34m2026-02-03 21:52:17[0m] (step=0054800) Train Loss: -2.9826, Train Steps/Sec: 1.02
|
| 593 |
+
[[34m2026-02-03 21:53:55[0m] (step=0054900) Train Loss: -2.9858, Train Steps/Sec: 1.02
|
| 594 |
+
[[34m2026-02-03 21:55:33[0m] (step=0055000) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 595 |
+
[[34m2026-02-03 21:56:16[0m] Beginning epoch 11...
|
| 596 |
+
[[34m2026-02-03 21:57:13[0m] (step=0055100) Train Loss: -2.9926, Train Steps/Sec: 1.00
|
| 597 |
+
[[34m2026-02-03 21:58:50[0m] (step=0055200) Train Loss: -2.9919, Train Steps/Sec: 1.02
|
| 598 |
+
[[34m2026-02-03 22:00:28[0m] (step=0055300) Train Loss: -2.9910, Train Steps/Sec: 1.02
|
| 599 |
+
[[34m2026-02-03 22:02:06[0m] (step=0055400) Train Loss: -2.9851, Train Steps/Sec: 1.02
|
| 600 |
+
[[34m2026-02-03 22:03:44[0m] (step=0055500) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 601 |
+
[[34m2026-02-03 22:05:22[0m] (step=0055600) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 602 |
+
[[34m2026-02-03 22:07:00[0m] (step=0055700) Train Loss: -2.9873, Train Steps/Sec: 1.02
|
| 603 |
+
[[34m2026-02-03 22:08:37[0m] (step=0055800) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 604 |
+
[[34m2026-02-03 22:10:15[0m] (step=0055900) Train Loss: -2.9909, Train Steps/Sec: 1.02
|
| 605 |
+
[[34m2026-02-03 22:11:50[0m] (step=0056000) Train Loss: -2.9884, Train Steps/Sec: 1.06
|
| 606 |
+
[[34m2026-02-03 22:13:28[0m] (step=0056100) Train Loss: -2.9902, Train Steps/Sec: 1.02
|
| 607 |
+
[[34m2026-02-03 22:15:05[0m] (step=0056200) Train Loss: -2.9904, Train Steps/Sec: 1.03
|
| 608 |
+
[[34m2026-02-03 22:16:43[0m] (step=0056300) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 609 |
+
[[34m2026-02-03 22:18:21[0m] (step=0056400) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 610 |
+
[[34m2026-02-03 22:19:59[0m] (step=0056500) Train Loss: -2.9903, Train Steps/Sec: 1.02
|
| 611 |
+
[[34m2026-02-03 22:21:37[0m] (step=0056600) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 612 |
+
[[34m2026-02-03 22:23:15[0m] (step=0056700) Train Loss: -2.9888, Train Steps/Sec: 1.02
|
| 613 |
+
[[34m2026-02-03 22:24:53[0m] (step=0056800) Train Loss: -2.9846, Train Steps/Sec: 1.02
|
| 614 |
+
[[34m2026-02-03 22:26:32[0m] (step=0056900) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 615 |
+
[[34m2026-02-03 22:28:10[0m] (step=0057000) Train Loss: -2.9846, Train Steps/Sec: 1.02
|
| 616 |
+
[[34m2026-02-03 22:29:48[0m] (step=0057100) Train Loss: -2.9884, Train Steps/Sec: 1.02
|
| 617 |
+
[[34m2026-02-03 22:31:26[0m] (step=0057200) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 618 |
+
[[34m2026-02-03 22:33:04[0m] (step=0057300) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 619 |
+
[[34m2026-02-03 22:34:41[0m] (step=0057400) Train Loss: -2.9913, Train Steps/Sec: 1.02
|
| 620 |
+
[[34m2026-02-03 22:36:19[0m] (step=0057500) Train Loss: -2.9885, Train Steps/Sec: 1.02
|
| 621 |
+
[[34m2026-02-03 22:37:57[0m] (step=0057600) Train Loss: -2.9872, Train Steps/Sec: 1.03
|
| 622 |
+
[[34m2026-02-03 22:39:34[0m] (step=0057700) Train Loss: -2.9902, Train Steps/Sec: 1.03
|
| 623 |
+
[[34m2026-02-03 22:41:12[0m] (step=0057800) Train Loss: -2.9949, Train Steps/Sec: 1.02
|
| 624 |
+
[[34m2026-02-03 22:42:50[0m] (step=0057900) Train Loss: -2.9919, Train Steps/Sec: 1.02
|
| 625 |
+
[[34m2026-02-03 22:44:28[0m] (step=0058000) Train Loss: -2.9903, Train Steps/Sec: 1.02
|
| 626 |
+
[[34m2026-02-03 22:46:06[0m] (step=0058100) Train Loss: -2.9908, Train Steps/Sec: 1.02
|
| 627 |
+
[[34m2026-02-03 22:47:44[0m] (step=0058200) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 628 |
+
[[34m2026-02-03 22:49:22[0m] (step=0058300) Train Loss: -2.9900, Train Steps/Sec: 1.02
|
| 629 |
+
[[34m2026-02-03 22:50:59[0m] (step=0058400) Train Loss: -2.9865, Train Steps/Sec: 1.03
|
| 630 |
+
[[34m2026-02-03 22:52:37[0m] (step=0058500) Train Loss: -2.9851, Train Steps/Sec: 1.02
|
| 631 |
+
[[34m2026-02-03 22:54:15[0m] (step=0058600) Train Loss: -2.9861, Train Steps/Sec: 1.01
|
| 632 |
+
[[34m2026-02-03 22:55:53[0m] (step=0058700) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 633 |
+
[[34m2026-02-03 22:57:31[0m] (step=0058800) Train Loss: -2.9918, Train Steps/Sec: 1.02
|
| 634 |
+
[[34m2026-02-03 22:59:09[0m] (step=0058900) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 635 |
+
[[34m2026-02-03 23:00:47[0m] (step=0059000) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 636 |
+
[[34m2026-02-03 23:02:25[0m] (step=0059100) Train Loss: -2.9920, Train Steps/Sec: 1.02
|
| 637 |
+
[[34m2026-02-03 23:04:03[0m] (step=0059200) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 638 |
+
[[34m2026-02-03 23:05:41[0m] (step=0059300) Train Loss: -2.9895, Train Steps/Sec: 1.02
|
| 639 |
+
[[34m2026-02-03 23:07:19[0m] (step=0059400) Train Loss: -2.9911, Train Steps/Sec: 1.02
|
| 640 |
+
[[34m2026-02-03 23:08:57[0m] (step=0059500) Train Loss: -2.9857, Train Steps/Sec: 1.02
|
| 641 |
+
[[34m2026-02-03 23:10:34[0m] (step=0059600) Train Loss: -2.9925, Train Steps/Sec: 1.03
|
| 642 |
+
[[34m2026-02-03 23:12:12[0m] (step=0059700) Train Loss: -2.9885, Train Steps/Sec: 1.02
|
| 643 |
+
[[34m2026-02-03 23:13:50[0m] (step=0059800) Train Loss: -2.9883, Train Steps/Sec: 1.02
|
| 644 |
+
[[34m2026-02-03 23:15:28[0m] (step=0059900) Train Loss: -2.9914, Train Steps/Sec: 1.02
|
| 645 |
+
[[34m2026-02-03 23:17:06[0m] (step=0060000) Train Loss: -2.9892, Train Steps/Sec: 1.02
|
| 646 |
+
[[34m2026-02-03 23:17:53[0m] Beginning epoch 12...
|
| 647 |
+
[[34m2026-02-03 23:18:45[0m] (step=0060100) Train Loss: -2.9931, Train Steps/Sec: 1.00
|
| 648 |
+
[[34m2026-02-03 23:20:23[0m] (step=0060200) Train Loss: -2.9852, Train Steps/Sec: 1.02
|
| 649 |
+
[[34m2026-02-03 23:22:01[0m] (step=0060300) Train Loss: -2.9839, Train Steps/Sec: 1.02
|
| 650 |
+
[[34m2026-02-03 23:23:39[0m] (step=0060400) Train Loss: -2.9866, Train Steps/Sec: 1.02
|
| 651 |
+
[[34m2026-02-03 23:25:17[0m] (step=0060500) Train Loss: -2.9886, Train Steps/Sec: 1.02
|
| 652 |
+
[[34m2026-02-03 23:26:55[0m] (step=0060600) Train Loss: -2.9869, Train Steps/Sec: 1.03
|
| 653 |
+
[[34m2026-02-03 23:28:33[0m] (step=0060700) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 654 |
+
[[34m2026-02-03 23:30:07[0m] (step=0060800) Train Loss: -2.9867, Train Steps/Sec: 1.06
|
| 655 |
+
[[34m2026-02-03 23:31:45[0m] (step=0060900) Train Loss: -2.9912, Train Steps/Sec: 1.02
|
| 656 |
+
[[34m2026-02-03 23:33:23[0m] (step=0061000) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 657 |
+
[[34m2026-02-03 23:35:01[0m] (step=0061100) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 658 |
+
[[34m2026-02-03 23:36:39[0m] (step=0061200) Train Loss: -2.9844, Train Steps/Sec: 1.02
|
| 659 |
+
[[34m2026-02-03 23:38:17[0m] (step=0061300) Train Loss: -2.9937, Train Steps/Sec: 1.02
|
| 660 |
+
[[34m2026-02-03 23:39:55[0m] (step=0061400) Train Loss: -2.9877, Train Steps/Sec: 1.02
|
| 661 |
+
[[34m2026-02-03 23:41:33[0m] (step=0061500) Train Loss: -2.9898, Train Steps/Sec: 1.02
|
| 662 |
+
[[34m2026-02-03 23:43:10[0m] (step=0061600) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 663 |
+
[[34m2026-02-03 23:44:48[0m] (step=0061700) Train Loss: -2.9897, Train Steps/Sec: 1.02
|
| 664 |
+
[[34m2026-02-03 23:46:25[0m] (step=0061800) Train Loss: -2.9888, Train Steps/Sec: 1.03
|
| 665 |
+
[[34m2026-02-03 23:48:03[0m] (step=0061900) Train Loss: -2.9867, Train Steps/Sec: 1.03
|
| 666 |
+
[[34m2026-02-03 23:49:41[0m] (step=0062000) Train Loss: -2.9901, Train Steps/Sec: 1.02
|
| 667 |
+
[[34m2026-02-03 23:51:19[0m] (step=0062100) Train Loss: -2.9850, Train Steps/Sec: 1.02
|
| 668 |
+
[[34m2026-02-03 23:52:56[0m] (step=0062200) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 669 |
+
[[34m2026-02-03 23:54:34[0m] (step=0062300) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 670 |
+
[[34m2026-02-03 23:56:12[0m] (step=0062400) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 671 |
+
[[34m2026-02-03 23:57:50[0m] (step=0062500) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 672 |
+
[[34m2026-02-03 23:59:28[0m] (step=0062600) Train Loss: -2.9854, Train Steps/Sec: 1.02
|
| 673 |
+
[[34m2026-02-04 00:01:06[0m] (step=0062700) Train Loss: -2.9918, Train Steps/Sec: 1.02
|
| 674 |
+
[[34m2026-02-04 00:02:44[0m] (step=0062800) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 675 |
+
[[34m2026-02-04 00:04:21[0m] (step=0062900) Train Loss: -2.9891, Train Steps/Sec: 1.03
|
| 676 |
+
[[34m2026-02-04 00:05:58[0m] (step=0063000) Train Loss: -2.9885, Train Steps/Sec: 1.03
|
| 677 |
+
[[34m2026-02-04 00:07:36[0m] (step=0063100) Train Loss: -2.9878, Train Steps/Sec: 1.02
|
| 678 |
+
[[34m2026-02-04 00:09:14[0m] (step=0063200) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 679 |
+
[[34m2026-02-04 00:10:52[0m] (step=0063300) Train Loss: -2.9942, Train Steps/Sec: 1.02
|
| 680 |
+
[[34m2026-02-04 00:12:30[0m] (step=0063400) Train Loss: -2.9877, Train Steps/Sec: 1.02
|
| 681 |
+
[[34m2026-02-04 00:14:07[0m] (step=0063500) Train Loss: -2.9898, Train Steps/Sec: 1.03
|
| 682 |
+
[[34m2026-02-04 00:15:46[0m] (step=0063600) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 683 |
+
[[34m2026-02-04 00:17:24[0m] (step=0063700) Train Loss: -2.9892, Train Steps/Sec: 1.02
|
| 684 |
+
[[34m2026-02-04 00:19:02[0m] (step=0063800) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 685 |
+
[[34m2026-02-04 00:20:39[0m] (step=0063900) Train Loss: -2.9886, Train Steps/Sec: 1.02
|
| 686 |
+
[[34m2026-02-04 00:22:18[0m] (step=0064000) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 687 |
+
[[34m2026-02-04 00:23:55[0m] (step=0064100) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 688 |
+
[[34m2026-02-04 00:25:33[0m] (step=0064200) Train Loss: -2.9838, Train Steps/Sec: 1.02
|
| 689 |
+
[[34m2026-02-04 00:27:11[0m] (step=0064300) Train Loss: -2.9857, Train Steps/Sec: 1.02
|
| 690 |
+
[[34m2026-02-04 00:28:49[0m] (step=0064400) Train Loss: -2.9905, Train Steps/Sec: 1.03
|
| 691 |
+
[[34m2026-02-04 00:30:26[0m] (step=0064500) Train Loss: -2.9910, Train Steps/Sec: 1.02
|
| 692 |
+
[[34m2026-02-04 00:32:05[0m] (step=0064600) Train Loss: -2.9897, Train Steps/Sec: 1.02
|
| 693 |
+
[[34m2026-02-04 00:33:42[0m] (step=0064700) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 694 |
+
[[34m2026-02-04 00:35:20[0m] (step=0064800) Train Loss: -2.9896, Train Steps/Sec: 1.02
|
| 695 |
+
[[34m2026-02-04 00:36:58[0m] (step=0064900) Train Loss: -2.9893, Train Steps/Sec: 1.02
|
| 696 |
+
[[34m2026-02-04 00:38:36[0m] (step=0065000) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 697 |
+
[[34m2026-02-04 00:39:28[0m] Beginning epoch 13...
|
| 698 |
+
[[34m2026-02-04 00:40:16[0m] (step=0065100) Train Loss: -2.9899, Train Steps/Sec: 1.00
|
| 699 |
+
[[34m2026-02-04 00:41:54[0m] (step=0065200) Train Loss: -2.9946, Train Steps/Sec: 1.02
|
| 700 |
+
[[34m2026-02-04 00:43:32[0m] (step=0065300) Train Loss: -2.9928, Train Steps/Sec: 1.02
|
| 701 |
+
[[34m2026-02-04 00:45:10[0m] (step=0065400) Train Loss: -2.9897, Train Steps/Sec: 1.02
|
| 702 |
+
[[34m2026-02-04 00:46:46[0m] (step=0065500) Train Loss: -2.9877, Train Steps/Sec: 1.05
|
| 703 |
+
[[34m2026-02-04 00:48:22[0m] (step=0065600) Train Loss: -2.9892, Train Steps/Sec: 1.03
|
| 704 |
+
[[34m2026-02-04 00:50:00[0m] (step=0065700) Train Loss: -2.9847, Train Steps/Sec: 1.02
|
| 705 |
+
[[34m2026-02-04 00:51:38[0m] (step=0065800) Train Loss: -2.9859, Train Steps/Sec: 1.02
|
| 706 |
+
[[34m2026-02-04 00:53:16[0m] (step=0065900) Train Loss: -2.9838, Train Steps/Sec: 1.03
|
| 707 |
+
[[34m2026-02-04 00:54:54[0m] (step=0066000) Train Loss: -2.9848, Train Steps/Sec: 1.02
|
| 708 |
+
[[34m2026-02-04 00:56:31[0m] (step=0066100) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 709 |
+
[[34m2026-02-04 00:58:08[0m] (step=0066200) Train Loss: -2.9903, Train Steps/Sec: 1.03
|
| 710 |
+
[[34m2026-02-04 00:59:46[0m] (step=0066300) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 711 |
+
[[34m2026-02-04 01:01:24[0m] (step=0066400) Train Loss: -2.9881, Train Steps/Sec: 1.02
|
| 712 |
+
[[34m2026-02-04 01:03:02[0m] (step=0066500) Train Loss: -2.9850, Train Steps/Sec: 1.03
|
| 713 |
+
[[34m2026-02-04 01:04:40[0m] (step=0066600) Train Loss: -2.9870, Train Steps/Sec: 1.02
|
| 714 |
+
[[34m2026-02-04 01:06:18[0m] (step=0066700) Train Loss: -2.9866, Train Steps/Sec: 1.02
|
| 715 |
+
[[34m2026-02-04 01:07:56[0m] (step=0066800) Train Loss: -2.9895, Train Steps/Sec: 1.02
|
| 716 |
+
[[34m2026-02-04 01:09:34[0m] (step=0066900) Train Loss: -2.9862, Train Steps/Sec: 1.02
|
| 717 |
+
[[34m2026-02-04 01:11:11[0m] (step=0067000) Train Loss: -2.9913, Train Steps/Sec: 1.03
|
| 718 |
+
[[34m2026-02-04 01:12:48[0m] (step=0067100) Train Loss: -2.9877, Train Steps/Sec: 1.03
|
| 719 |
+
[[34m2026-02-04 01:14:26[0m] (step=0067200) Train Loss: -2.9923, Train Steps/Sec: 1.03
|
| 720 |
+
[[34m2026-02-04 01:16:04[0m] (step=0067300) Train Loss: -2.9886, Train Steps/Sec: 1.02
|
| 721 |
+
[[34m2026-02-04 01:17:42[0m] (step=0067400) Train Loss: -2.9904, Train Steps/Sec: 1.02
|
| 722 |
+
[[34m2026-02-04 01:19:20[0m] (step=0067500) Train Loss: -2.9905, Train Steps/Sec: 1.02
|
| 723 |
+
[[34m2026-02-04 01:20:58[0m] (step=0067600) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 724 |
+
[[34m2026-02-04 01:22:36[0m] (step=0067700) Train Loss: -2.9877, Train Steps/Sec: 1.02
|
| 725 |
+
[[34m2026-02-04 01:24:14[0m] (step=0067800) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 726 |
+
[[34m2026-02-04 01:25:52[0m] (step=0067900) Train Loss: -2.9875, Train Steps/Sec: 1.03
|
| 727 |
+
[[34m2026-02-04 01:27:29[0m] (step=0068000) Train Loss: -2.9834, Train Steps/Sec: 1.02
|
| 728 |
+
[[34m2026-02-04 01:29:07[0m] (step=0068100) Train Loss: -2.9885, Train Steps/Sec: 1.02
|
| 729 |
+
[[34m2026-02-04 01:30:45[0m] (step=0068200) Train Loss: -2.9882, Train Steps/Sec: 1.02
|
| 730 |
+
[[34m2026-02-04 01:32:22[0m] (step=0068300) Train Loss: -2.9922, Train Steps/Sec: 1.03
|
| 731 |
+
[[34m2026-02-04 01:34:01[0m] (step=0068400) Train Loss: -2.9823, Train Steps/Sec: 1.02
|
| 732 |
+
[[34m2026-02-04 01:35:38[0m] (step=0068500) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 733 |
+
[[34m2026-02-04 01:37:15[0m] (step=0068600) Train Loss: -2.9938, Train Steps/Sec: 1.03
|
| 734 |
+
[[34m2026-02-04 01:38:53[0m] (step=0068700) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 735 |
+
[[34m2026-02-04 01:40:31[0m] (step=0068800) Train Loss: -2.9893, Train Steps/Sec: 1.02
|
| 736 |
+
[[34m2026-02-04 01:42:09[0m] (step=0068900) Train Loss: -2.9892, Train Steps/Sec: 1.02
|
| 737 |
+
[[34m2026-02-04 01:43:47[0m] (step=0069000) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 738 |
+
[[34m2026-02-04 01:45:25[0m] (step=0069100) Train Loss: -2.9871, Train Steps/Sec: 1.02
|
| 739 |
+
[[34m2026-02-04 01:47:02[0m] (step=0069200) Train Loss: -2.9910, Train Steps/Sec: 1.03
|
| 740 |
+
[[34m2026-02-04 01:48:40[0m] (step=0069300) Train Loss: -2.9894, Train Steps/Sec: 1.03
|
| 741 |
+
[[34m2026-02-04 01:50:17[0m] (step=0069400) Train Loss: -2.9837, Train Steps/Sec: 1.02
|
| 742 |
+
[[34m2026-02-04 01:51:55[0m] (step=0069500) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 743 |
+
[[34m2026-02-04 01:53:33[0m] (step=0069600) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 744 |
+
[[34m2026-02-04 01:55:11[0m] (step=0069700) Train Loss: -2.9852, Train Steps/Sec: 1.03
|
| 745 |
+
[[34m2026-02-04 01:56:49[0m] (step=0069800) Train Loss: -2.9926, Train Steps/Sec: 1.02
|
| 746 |
+
[[34m2026-02-04 01:58:27[0m] (step=0069900) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 747 |
+
[[34m2026-02-04 02:00:05[0m] (step=0070000) Train Loss: -2.9908, Train Steps/Sec: 1.02
|
| 748 |
+
[[34m2026-02-04 02:01:01[0m] Beginning epoch 14...
|
| 749 |
+
[[34m2026-02-04 02:01:45[0m] (step=0070100) Train Loss: -2.9858, Train Steps/Sec: 1.00
|
| 750 |
+
[[34m2026-02-04 02:03:23[0m] (step=0070200) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 751 |
+
[[34m2026-02-04 02:04:57[0m] (step=0070300) Train Loss: -2.9891, Train Steps/Sec: 1.06
|
| 752 |
+
[[34m2026-02-04 02:06:35[0m] (step=0070400) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 753 |
+
[[34m2026-02-04 02:08:13[0m] (step=0070500) Train Loss: -2.9873, Train Steps/Sec: 1.02
|
| 754 |
+
[[34m2026-02-04 02:09:51[0m] (step=0070600) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 755 |
+
[[34m2026-02-04 02:11:29[0m] (step=0070700) Train Loss: -2.9853, Train Steps/Sec: 1.02
|
| 756 |
+
[[34m2026-02-04 02:13:06[0m] (step=0070800) Train Loss: -2.9915, Train Steps/Sec: 1.02
|
| 757 |
+
[[34m2026-02-04 02:14:44[0m] (step=0070900) Train Loss: -2.9902, Train Steps/Sec: 1.02
|
| 758 |
+
[[34m2026-02-04 02:16:22[0m] (step=0071000) Train Loss: -2.9910, Train Steps/Sec: 1.02
|
| 759 |
+
[[34m2026-02-04 02:18:00[0m] (step=0071100) Train Loss: -2.9909, Train Steps/Sec: 1.02
|
| 760 |
+
[[34m2026-02-04 02:19:37[0m] (step=0071200) Train Loss: -2.9857, Train Steps/Sec: 1.03
|
| 761 |
+
[[34m2026-02-04 02:21:15[0m] (step=0071300) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 762 |
+
[[34m2026-02-04 02:22:52[0m] (step=0071400) Train Loss: -2.9858, Train Steps/Sec: 1.03
|
| 763 |
+
[[34m2026-02-04 02:24:30[0m] (step=0071500) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 764 |
+
[[34m2026-02-04 02:26:08[0m] (step=0071600) Train Loss: -2.9936, Train Steps/Sec: 1.02
|
| 765 |
+
[[34m2026-02-04 02:27:46[0m] (step=0071700) Train Loss: -2.9813, Train Steps/Sec: 1.02
|
| 766 |
+
[[34m2026-02-04 02:29:24[0m] (step=0071800) Train Loss: -2.9841, Train Steps/Sec: 1.02
|
| 767 |
+
[[34m2026-02-04 02:31:01[0m] (step=0071900) Train Loss: -2.9900, Train Steps/Sec: 1.03
|
| 768 |
+
[[34m2026-02-04 02:32:39[0m] (step=0072000) Train Loss: -2.9901, Train Steps/Sec: 1.03
|
| 769 |
+
[[34m2026-02-04 02:34:16[0m] (step=0072100) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 770 |
+
[[34m2026-02-04 02:35:54[0m] (step=0072200) Train Loss: -2.9852, Train Steps/Sec: 1.03
|
| 771 |
+
[[34m2026-02-04 02:37:32[0m] (step=0072300) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 772 |
+
[[34m2026-02-04 02:39:10[0m] (step=0072400) Train Loss: -2.9919, Train Steps/Sec: 1.02
|
| 773 |
+
[[34m2026-02-04 02:40:48[0m] (step=0072500) Train Loss: -2.9843, Train Steps/Sec: 1.02
|
| 774 |
+
[[34m2026-02-04 02:42:26[0m] (step=0072600) Train Loss: -2.9850, Train Steps/Sec: 1.02
|
| 775 |
+
[[34m2026-02-04 02:44:04[0m] (step=0072700) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 776 |
+
[[34m2026-02-04 02:45:42[0m] (step=0072800) Train Loss: -2.9904, Train Steps/Sec: 1.02
|
| 777 |
+
[[34m2026-02-04 02:47:19[0m] (step=0072900) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 778 |
+
[[34m2026-02-04 02:48:57[0m] (step=0073000) Train Loss: -2.9901, Train Steps/Sec: 1.03
|
| 779 |
+
[[34m2026-02-04 02:50:35[0m] (step=0073100) Train Loss: -2.9859, Train Steps/Sec: 1.02
|
| 780 |
+
[[34m2026-02-04 02:52:13[0m] (step=0073200) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 781 |
+
[[34m2026-02-04 02:53:50[0m] (step=0073300) Train Loss: -2.9875, Train Steps/Sec: 1.03
|
| 782 |
+
[[34m2026-02-04 02:55:28[0m] (step=0073400) Train Loss: -2.9896, Train Steps/Sec: 1.02
|
| 783 |
+
[[34m2026-02-04 02:57:05[0m] (step=0073500) Train Loss: -2.9940, Train Steps/Sec: 1.03
|
| 784 |
+
[[34m2026-02-04 02:58:43[0m] (step=0073600) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 785 |
+
[[34m2026-02-04 03:00:21[0m] (step=0073700) Train Loss: -2.9883, Train Steps/Sec: 1.02
|
| 786 |
+
[[34m2026-02-04 03:01:58[0m] (step=0073800) Train Loss: -2.9895, Train Steps/Sec: 1.03
|
| 787 |
+
[[34m2026-02-04 03:03:36[0m] (step=0073900) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 788 |
+
[[34m2026-02-04 03:05:14[0m] (step=0074000) Train Loss: -2.9884, Train Steps/Sec: 1.02
|
| 789 |
+
[[34m2026-02-04 03:06:52[0m] (step=0074100) Train Loss: -2.9830, Train Steps/Sec: 1.02
|
| 790 |
+
[[34m2026-02-04 03:08:30[0m] (step=0074200) Train Loss: -2.9861, Train Steps/Sec: 1.02
|
| 791 |
+
[[34m2026-02-04 03:10:08[0m] (step=0074300) Train Loss: -2.9873, Train Steps/Sec: 1.02
|
| 792 |
+
[[34m2026-02-04 03:11:45[0m] (step=0074400) Train Loss: -2.9860, Train Steps/Sec: 1.03
|
| 793 |
+
[[34m2026-02-04 03:13:22[0m] (step=0074500) Train Loss: -2.9887, Train Steps/Sec: 1.03
|
| 794 |
+
[[34m2026-02-04 03:15:00[0m] (step=0074600) Train Loss: -2.9857, Train Steps/Sec: 1.03
|
| 795 |
+
[[34m2026-02-04 03:16:38[0m] (step=0074700) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 796 |
+
[[34m2026-02-04 03:18:15[0m] (step=0074800) Train Loss: -2.9874, Train Steps/Sec: 1.02
|
| 797 |
+
[[34m2026-02-04 03:19:53[0m] (step=0074900) Train Loss: -2.9849, Train Steps/Sec: 1.02
|
| 798 |
+
[[34m2026-02-04 03:21:32[0m] (step=0075000) Train Loss: -2.9902, Train Steps/Sec: 1.02
|
| 799 |
+
75000
|
| 800 |
+
75000
|
| 801 |
+
75000
|
| 802 |
+
75000
|
| 803 |
+
[[34m2026-02-04 03:21:33[0m] Saved checkpoint to results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None/checkpoints/0075000.pt
|
| 804 |
+
[[34m2026-02-04 03:22:32[0m] Beginning epoch 15...
|
| 805 |
+
[[34m2026-02-04 03:23:10[0m] (step=0075100) Train Loss: -2.9908, Train Steps/Sec: 1.02
|
| 806 |
+
[[34m2026-02-04 03:24:48[0m] (step=0075200) Train Loss: -2.9917, Train Steps/Sec: 1.02
|
| 807 |
+
[[34m2026-02-04 03:26:26[0m] (step=0075300) Train Loss: -2.9913, Train Steps/Sec: 1.02
|
| 808 |
+
[[34m2026-02-04 03:28:04[0m] (step=0075400) Train Loss: -2.9900, Train Steps/Sec: 1.02
|
| 809 |
+
[[34m2026-02-04 03:29:42[0m] (step=0075500) Train Loss: -2.9866, Train Steps/Sec: 1.02
|
| 810 |
+
[[34m2026-02-04 03:30:56[0m] Generating EMA samples...
|
| 811 |
+
[[34m2026-02-04 03:31:20[0m] (step=0075600) Train Loss: -2.9850, Train Steps/Sec: 1.02
|
| 812 |
+
[[34m2026-02-04 03:32:57[0m] (step=0075700) Train Loss: -2.9845, Train Steps/Sec: 1.03
|
| 813 |
+
[[34m2026-02-04 03:34:36[0m] (step=0075800) Train Loss: -2.9907, Train Steps/Sec: 1.02
|
| 814 |
+
[[34m2026-02-04 03:36:14[0m] (step=0075900) Train Loss: -2.9899, Train Steps/Sec: 1.02
|
| 815 |
+
[[34m2026-02-04 03:37:52[0m] (step=0076000) Train Loss: -2.9894, Train Steps/Sec: 1.02
|
| 816 |
+
[[34m2026-02-04 03:39:30[0m] (step=0076100) Train Loss: -2.9877, Train Steps/Sec: 1.02
|
| 817 |
+
[[34m2026-02-04 03:41:08[0m] (step=0076200) Train Loss: -2.9870, Train Steps/Sec: 1.02
|
| 818 |
+
[[34m2026-02-04 03:42:45[0m] (step=0076300) Train Loss: -2.9843, Train Steps/Sec: 1.03
|
| 819 |
+
[[34m2026-02-04 03:44:23[0m] (step=0076400) Train Loss: -2.9898, Train Steps/Sec: 1.02
|
| 820 |
+
[[34m2026-02-04 03:46:01[0m] (step=0076500) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 821 |
+
[[34m2026-02-04 03:47:39[0m] (step=0076600) Train Loss: -2.9848, Train Steps/Sec: 1.02
|
| 822 |
+
[[34m2026-02-04 03:49:16[0m] (step=0076700) Train Loss: -2.9864, Train Steps/Sec: 1.03
|
| 823 |
+
[[34m2026-02-04 03:50:54[0m] (step=0076800) Train Loss: -2.9876, Train Steps/Sec: 1.03
|
| 824 |
+
[[34m2026-02-04 03:52:32[0m] (step=0076900) Train Loss: -2.9862, Train Steps/Sec: 1.02
|
| 825 |
+
[[34m2026-02-04 03:54:10[0m] (step=0077000) Train Loss: -2.9906, Train Steps/Sec: 1.02
|
| 826 |
+
[[34m2026-02-04 03:55:48[0m] (step=0077100) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 827 |
+
[[34m2026-02-04 03:57:26[0m] (step=0077200) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 828 |
+
[[34m2026-02-04 03:59:04[0m] (step=0077300) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 829 |
+
[[34m2026-02-04 04:00:42[0m] (step=0077400) Train Loss: -2.9886, Train Steps/Sec: 1.02
|
| 830 |
+
[[34m2026-02-04 04:02:20[0m] (step=0077500) Train Loss: -2.9870, Train Steps/Sec: 1.02
|
| 831 |
+
[[34m2026-02-04 04:03:58[0m] (step=0077600) Train Loss: -2.9864, Train Steps/Sec: 1.02
|
| 832 |
+
[[34m2026-02-04 04:05:35[0m] (step=0077700) Train Loss: -2.9854, Train Steps/Sec: 1.02
|
| 833 |
+
[[34m2026-02-04 04:07:13[0m] (step=0077800) Train Loss: -2.9904, Train Steps/Sec: 1.02
|
| 834 |
+
[[34m2026-02-04 04:08:51[0m] (step=0077900) Train Loss: -2.9850, Train Steps/Sec: 1.02
|
| 835 |
+
[[34m2026-02-04 04:10:29[0m] (step=0078000) Train Loss: -2.9941, Train Steps/Sec: 1.02
|
| 836 |
+
[[34m2026-02-04 04:12:07[0m] (step=0078100) Train Loss: -2.9890, Train Steps/Sec: 1.02
|
| 837 |
+
[[34m2026-02-04 04:13:45[0m] (step=0078200) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 838 |
+
[[34m2026-02-04 04:15:22[0m] (step=0078300) Train Loss: -2.9915, Train Steps/Sec: 1.03
|
| 839 |
+
[[34m2026-02-04 04:17:00[0m] (step=0078400) Train Loss: -2.9876, Train Steps/Sec: 1.03
|
| 840 |
+
[[34m2026-02-04 04:18:37[0m] (step=0078500) Train Loss: -2.9893, Train Steps/Sec: 1.03
|
| 841 |
+
[[34m2026-02-04 04:20:15[0m] (step=0078600) Train Loss: -2.9887, Train Steps/Sec: 1.02
|
| 842 |
+
[[34m2026-02-04 04:21:53[0m] (step=0078700) Train Loss: -2.9854, Train Steps/Sec: 1.02
|
| 843 |
+
[[34m2026-02-04 04:23:31[0m] (step=0078800) Train Loss: -2.9884, Train Steps/Sec: 1.03
|
| 844 |
+
[[34m2026-02-04 04:25:08[0m] (step=0078900) Train Loss: -2.9884, Train Steps/Sec: 1.03
|
| 845 |
+
[[34m2026-02-04 04:26:46[0m] (step=0079000) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 846 |
+
[[34m2026-02-04 04:28:24[0m] (step=0079100) Train Loss: -2.9918, Train Steps/Sec: 1.02
|
| 847 |
+
[[34m2026-02-04 04:30:01[0m] (step=0079200) Train Loss: -2.9873, Train Steps/Sec: 1.03
|
| 848 |
+
[[34m2026-02-04 04:31:39[0m] (step=0079300) Train Loss: -2.9867, Train Steps/Sec: 1.02
|
| 849 |
+
[[34m2026-02-04 04:33:17[0m] (step=0079400) Train Loss: -2.9800, Train Steps/Sec: 1.02
|
| 850 |
+
[[34m2026-02-04 04:34:55[0m] (step=0079500) Train Loss: -2.9873, Train Steps/Sec: 1.03
|
| 851 |
+
[[34m2026-02-04 04:36:32[0m] (step=0079600) Train Loss: -2.9847, Train Steps/Sec: 1.02
|
| 852 |
+
[[34m2026-02-04 04:38:11[0m] (step=0079700) Train Loss: -2.9876, Train Steps/Sec: 1.02
|
| 853 |
+
[[34m2026-02-04 04:39:48[0m] (step=0079800) Train Loss: -2.9865, Train Steps/Sec: 1.02
|
| 854 |
+
[[34m2026-02-04 04:41:23[0m] (step=0079900) Train Loss: -2.9922, Train Steps/Sec: 1.06
|
| 855 |
+
[[34m2026-02-04 04:43:00[0m] (step=0080000) Train Loss: -2.9857, Train Steps/Sec: 1.03
|
| 856 |
+
[[34m2026-02-04 04:44:04[0m] Beginning epoch 16...
|
| 857 |
+
[[34m2026-02-04 04:44:40[0m] (step=0080100) Train Loss: -2.9882, Train Steps/Sec: 1.00
|
| 858 |
+
[[34m2026-02-04 04:46:18[0m] (step=0080200) Train Loss: -2.9875, Train Steps/Sec: 1.02
|
| 859 |
+
[[34m2026-02-04 04:47:56[0m] (step=0080300) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 860 |
+
[[34m2026-02-04 04:49:34[0m] (step=0080400) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 861 |
+
[[34m2026-02-04 04:51:11[0m] (step=0080500) Train Loss: -2.9847, Train Steps/Sec: 1.03
|
| 862 |
+
[[34m2026-02-04 04:52:49[0m] (step=0080600) Train Loss: -2.9891, Train Steps/Sec: 1.02
|
| 863 |
+
[[34m2026-02-04 04:54:27[0m] (step=0080700) Train Loss: -2.9888, Train Steps/Sec: 1.03
|
| 864 |
+
[[34m2026-02-04 04:56:04[0m] (step=0080800) Train Loss: -2.9902, Train Steps/Sec: 1.03
|
| 865 |
+
[[34m2026-02-04 04:57:42[0m] (step=0080900) Train Loss: -2.9849, Train Steps/Sec: 1.02
|
| 866 |
+
[[34m2026-02-04 04:59:20[0m] (step=0081000) Train Loss: -2.9865, Train Steps/Sec: 1.03
|
| 867 |
+
[[34m2026-02-04 05:00:58[0m] (step=0081100) Train Loss: -2.9868, Train Steps/Sec: 1.02
|
| 868 |
+
[[34m2026-02-04 05:02:36[0m] (step=0081200) Train Loss: -2.9889, Train Steps/Sec: 1.02
|
| 869 |
+
[[34m2026-02-04 05:04:14[0m] (step=0081300) Train Loss: -2.9845, Train Steps/Sec: 1.02
|
| 870 |
+
[[34m2026-02-04 05:05:52[0m] (step=0081400) Train Loss: -2.9906, Train Steps/Sec: 1.02
|
| 871 |
+
[[34m2026-02-04 05:07:29[0m] (step=0081500) Train Loss: -2.9916, Train Steps/Sec: 1.02
|
| 872 |
+
[[34m2026-02-04 05:09:08[0m] (step=0081600) Train Loss: -2.9953, Train Steps/Sec: 1.02
|
| 873 |
+
[[34m2026-02-04 05:10:46[0m] (step=0081700) Train Loss: -2.9884, Train Steps/Sec: 1.02
|
| 874 |
+
[[34m2026-02-04 05:12:24[0m] (step=0081800) Train Loss: -2.9865, Train Steps/Sec: 1.02
|
| 875 |
+
[[34m2026-02-04 05:14:01[0m] (step=0081900) Train Loss: -2.9889, Train Steps/Sec: 1.03
|
| 876 |
+
[[34m2026-02-04 05:15:39[0m] (step=0082000) Train Loss: -2.9850, Train Steps/Sec: 1.02
|
| 877 |
+
[[34m2026-02-04 05:17:17[0m] (step=0082100) Train Loss: -2.9880, Train Steps/Sec: 1.02
|
| 878 |
+
[[34m2026-02-04 05:18:55[0m] (step=0082200) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 879 |
+
[[34m2026-02-04 05:20:33[0m] (step=0082300) Train Loss: -2.9869, Train Steps/Sec: 1.02
|
| 880 |
+
[[34m2026-02-04 05:22:10[0m] (step=0082400) Train Loss: -2.9872, Train Steps/Sec: 1.02
|
| 881 |
+
[[34m2026-02-04 05:23:49[0m] (step=0082500) Train Loss: -2.9838, Train Steps/Sec: 1.02
|
| 882 |
+
[[34m2026-02-04 05:25:27[0m] (step=0082600) Train Loss: -2.9881, Train Steps/Sec: 1.02
|
| 883 |
+
[[34m2026-02-04 05:27:04[0m] (step=0082700) Train Loss: -2.9890, Train Steps/Sec: 1.03
|
| 884 |
+
[[34m2026-02-04 05:28:42[0m] (step=0082800) Train Loss: -2.9881, Train Steps/Sec: 1.02
|
| 885 |
+
[[34m2026-02-04 05:30:19[0m] (step=0082900) Train Loss: -2.9903, Train Steps/Sec: 1.03
|
| 886 |
+
[[34m2026-02-04 05:31:58[0m] (step=0083000) Train Loss: -2.9946, Train Steps/Sec: 1.02
|
| 887 |
+
[[34m2026-02-04 05:33:36[0m] (step=0083100) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 888 |
+
[[34m2026-02-04 05:35:14[0m] (step=0083200) Train Loss: -2.9879, Train Steps/Sec: 1.02
|
| 889 |
+
[[34m2026-02-04 05:36:52[0m] (step=0083300) Train Loss: -2.9939, Train Steps/Sec: 1.02
|
| 890 |
+
[[34m2026-02-04 05:38:30[0m] (step=0083400) Train Loss: -2.9914, Train Steps/Sec: 1.02
|
| 891 |
+
[[34m2026-02-04 05:40:07[0m] (step=0083500) Train Loss: -2.9888, Train Steps/Sec: 1.03
|
| 892 |
+
W0204 05:40:25.828000 72184 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 72203 closing signal SIGTERM
|
| 893 |
+
W0204 05:40:25.830000 72184 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 72204 closing signal SIGTERM
|
| 894 |
+
W0204 05:40:25.831000 72184 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 72205 closing signal SIGTERM
|
| 895 |
+
E0204 05:40:25.834000 72184 site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: -9) local_rank: 0 (pid: 72202) of binary: /opt/conda/envs/SiT/bin/python
|
| 896 |
+
Traceback (most recent call last):
|
| 897 |
+
File "/opt/conda/envs/SiT/bin/torchrun", line 33, in <module>
|
| 898 |
+
sys.exit(load_entry_point('torch==2.5.1', 'console_scripts', 'torchrun')())
|
| 899 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 900 |
+
File "/opt/conda/envs/SiT/lib/python3.12/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper
|
| 901 |
+
return f(*args, **kwargs)
|
| 902 |
+
^^^^^^^^^^^^^^^^^^
|
| 903 |
+
File "/opt/conda/envs/SiT/lib/python3.12/site-packages/torch/distributed/run.py", line 919, in main
|
| 904 |
+
run(args)
|
| 905 |
+
File "/opt/conda/envs/SiT/lib/python3.12/site-packages/torch/distributed/run.py", line 910, in run
|
| 906 |
+
elastic_launch(
|
| 907 |
+
File "/opt/conda/envs/SiT/lib/python3.12/site-packages/torch/distributed/launcher/api.py", line 138, in __call__
|
| 908 |
+
return launch_agent(self._config, self._entrypoint, list(args))
|
| 909 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 910 |
+
File "/opt/conda/envs/SiT/lib/python3.12/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent
|
| 911 |
+
raise ChildFailedError(
|
| 912 |
+
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
|
| 913 |
+
==========================================================
|
| 914 |
+
train_rectified_noise.py FAILED
|
| 915 |
+
----------------------------------------------------------
|
| 916 |
+
Failures:
|
| 917 |
+
<NO_OTHER_FAILURES>
|
| 918 |
+
----------------------------------------------------------
|
| 919 |
+
Root Cause (first observed failure):
|
| 920 |
+
[0]:
|
| 921 |
+
time : 2026-02-04_05:40:25
|
| 922 |
+
host : cabbd6562a3025dd000330e2d302e8fd-taskrole1-0
|
| 923 |
+
rank : 0 (local_rank: 0)
|
| 924 |
+
exitcode : -9 (pid: 72202)
|
| 925 |
+
error_file: <N/A>
|
| 926 |
+
traceback : Signal 9 (SIGKILL) received by PID 72202
|
| 927 |
+
==========================================================
|
Rectified_Noise/VP-Disp/README.md
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# [AAAI 2026] Rectified Noise: A Generative Model Using Positive-incentive Noise
|
| 2 |
+
|
| 3 |
+

|
| 4 |
+
|
| 5 |
+
<br>
|
| 6 |
+
<a href="https://arxiv.org/pdf/2511.07911"><img src="https://img.shields.io/static/v1?label=Paper&message=2511.07911&color=red&logo=arxiv"></a>
|
| 7 |
+
<a href="https://huggingface.co/xiangzai/recitified_noise"><img src="https://img.shields.io/badge/🤗_HuggingFace-Model-ffbd45.svg" alt="HuggingFace"></a>
|
| 8 |
+
|
| 9 |
+
## Introduction
|
| 10 |
+
This is a [Pytorch](https://pytorch.org) implementation of **Rectified Noise**, a generative model using positive-incentive noise to enhance model's sampling.
|
| 11 |
+
|
| 12 |
+

|
| 13 |
+
|
| 14 |
+
## Setup
|
| 15 |
+
|
| 16 |
+
We provide an `environment.yml` file that can be used to create a Conda environment.
|
| 17 |
+
|
| 18 |
+
```bash
|
| 19 |
+
conda env create -f environment.yml
|
| 20 |
+
conda activate RN
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
## Usage
|
| 24 |
+
|
| 25 |
+
### Training
|
| 26 |
+
1. We provide a training script for RN in `train_rectified_noise.py`
|
| 27 |
+
|
| 28 |
+
Run:
|
| 29 |
+
|
| 30 |
+
```bash
|
| 31 |
+
torchrun --nnodes=1 --nproc_per_node=4 train_rectified_noise.py \
|
| 32 |
+
--data-path /path/to/data \
|
| 33 |
+
--num-classes 3 \
|
| 34 |
+
--path-type Linear \
|
| 35 |
+
--prediction velocity \
|
| 36 |
+
--ckpt /path/to/pretrained_model \
|
| 37 |
+
--model SiT-B/2
|
| 38 |
+
--learn-mu True \
|
| 39 |
+
--depth 1 \
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
You can find relevant checkpoint files from the previous Hugging Face link.
|
| 43 |
+
|
| 44 |
+
2. Parameters:
|
| 45 |
+
|
| 46 |
+
| Argument | Type | Default | Description |
|
| 47 |
+
|----------|------|---------|-------------|
|
| 48 |
+
| `--data-path ` | str | `-` | Path to the dataset. |
|
| 49 |
+
| `--num-classes` | int | `-` | Number of classes. |
|
| 50 |
+
| `--path-type` | str | `Linear` | Directory to save the generated images. |
|
| 51 |
+
| `--prediction` | str | `velocity` | Output type of network. |
|
| 52 |
+
| `--ckpt` | str | `-` | Path to pretrained model checkpoint. |
|
| 53 |
+
| `--model` | str | `SiT-B/2` | Model type, any option from the model list. |
|
| 54 |
+
| `--learn-mu` | bool | `True` | Whether to learn the mu parameter. |
|
| 55 |
+
| `--depth` | int | `1` | Depth parameter for the SiTF2 model(Extra SiT Block). |
|
| 56 |
+
|
| 57 |
+
**Sampling**
|
| 58 |
+
|
| 59 |
+
1. Using the trained RN model to enhance the pre-trained model
|
| 60 |
+
|
| 61 |
+
```bash
|
| 62 |
+
torchrun --nnodes=1 --nproc_per_node=4 train_rectified_noise.py \
|
| 63 |
+
--path-type Linear \
|
| 64 |
+
--prediction velocity \
|
| 65 |
+
--ckpt /path/to/pretrained_model \
|
| 66 |
+
--sitf2-ckpt /path/to/pretrained_RN \
|
| 67 |
+
--model SiT-B/2
|
| 68 |
+
--learn-mu True \
|
| 69 |
+
--depth 1 \
|
| 70 |
+
```
|
| 71 |
+
|
| 72 |
+
## Ackownledgement
|
| 73 |
+
This repo benefits from [SiT](https://github.com/willisma/SiT). Thanks for their excellent works.
|
| 74 |
+
|
| 75 |
+
## Contact
|
| 76 |
+
If you have any question about this project, please contact mguzhenyu@outlook.com.
|
| 77 |
+
|
| 78 |
+
## Citation
|
| 79 |
+
|
| 80 |
+
If you find the code useful for your research, please consider citing our work:
|
| 81 |
+
|
| 82 |
+
```
|
| 83 |
+
@misc{gu2025rectifiednoisegenerativemodel,
|
| 84 |
+
title={Rectified Noise: A Generative Model Using Positive-incentive Noise},
|
| 85 |
+
author={Zhenyu Gu and Yanchen Xu and Sida Huang and Yubin Guo and Hongyuan Zhang},
|
| 86 |
+
year={2025},
|
| 87 |
+
eprint={2511.07911},
|
| 88 |
+
archivePrefix={arXiv},
|
| 89 |
+
primaryClass={cs.LG},
|
| 90 |
+
url={https://arxiv.org/abs/2511.07911},
|
| 91 |
+
}
|
| 92 |
+
```
|
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000059.png
ADDED
|
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000169.png
ADDED
|
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000286.png
ADDED
|
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000545.png
ADDED
|
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000606.png
ADDED
|
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000769.png
ADDED
|
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001050.png
ADDED
|
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001099.png
ADDED
|
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001346.png
ADDED
|
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001475.png
ADDED
|
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001518.png
ADDED
|
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001644.png
ADDED
|
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001741.png
ADDED
|
Rectified_Noise/VP-Disp/W_False.log
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
0%| | 0/47 [00:00<?, ?it/s]
|
| 1 |
2%|▏ | 1/47 [00:44<34:16, 44.71s/it]
|
| 2 |
4%|▍ | 2/47 [01:28<33:13, 44.29s/it]
|
| 3 |
6%|▋ | 3/47 [02:12<32:23, 44.17s/it]
|
| 4 |
9%|▊ | 4/47 [02:56<31:36, 44.10s/it]
|
| 5 |
11%|█ | 5/47 [03:40<30:49, 44.05s/it]
|
| 6 |
13%|█▎ | 6/47 [04:24<30:04, 44.01s/it]
|
| 7 |
15%|█▍ | 7/47 [05:08<29:20, 44.02s/it]
|
| 8 |
17%|█▋ | 8/47 [05:52<28:36, 44.00s/it]
|
| 9 |
19%|█▉ | 9/47 [06:36<27:53, 44.03s/it]
|
| 10 |
21%|██▏ | 10/47 [07:56<34:01, 55.17s/it]
|
| 11 |
23%|██▎ | 11/47 [09:30<40:09, 66.94s/it]
|
| 12 |
26%|██▌ | 12/47 [11:04<43:46, 75.05s/it]
|
| 13 |
28%|██▊ | 13/47 [12:37<45:43, 80.70s/it]
|
| 14 |
30%|██▉ | 14/47 [14:11<46:33, 84.64s/it]
|
| 15 |
32%|███▏ | 15/47 [15:45<46:36, 87.39s/it]
|
| 16 |
34%|███▍ | 16/47 [17:18<46:06, 89.25s/it]
|
| 17 |
36%|███▌ | 17/47 [18:52<45:16, 90.55s/it]
|
| 18 |
38%|███▊ | 18/47 [20:26<44:13, 91.50s/it]
|
| 19 |
40%|████ | 19/47 [21:59<43:00, 92.17s/it]
|
| 20 |
43%|████▎ | 20/47 [23:33<41:41, 92.64s/it]
|
| 21 |
45%|████▍ | 21/47 [25:06<40:14, 92.87s/it]
|
| 22 |
47%|████▋ | 22/47 [26:40<38:47, 93.10s/it]
|
| 23 |
49%|████▉ | 23/47 [28:14<37:17, 93.21s/it]
|
| 24 |
51%|█████ | 24/47 [29:47<35:47, 93.37s/it]
|
| 25 |
53%|█████▎ | 25/47 [31:21<34:16, 93.48s/it]
|
| 26 |
55%|█████▌ | 26/47 [32:55<32:44, 93.55s/it]
|
| 27 |
57%|█████▋ | 27/47 [34:28<31:11, 93.56s/it]
|
| 28 |
60%|█████▉ | 28/47 [36:02<29:37, 93.56s/it]
|
| 29 |
62%|██████▏ | 29/47 [37:36<28:04, 93.60s/it]
|
|
|
|
| 1 |
+
[NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
|
| 2 |
+
Starting rank=0, seed=0, world_size=1.
|
| 3 |
+
Saving .png samples at VP_samples/depth-mu-2-threshold-1.0-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04
|
| 4 |
+
Total number of images that will be sampled: 3008
|
| 5 |
+
|
| 6 |
0%| | 0/47 [00:00<?, ?it/s]
|
| 7 |
2%|▏ | 1/47 [00:44<34:16, 44.71s/it]
|
| 8 |
4%|▍ | 2/47 [01:28<33:13, 44.29s/it]
|
| 9 |
6%|▋ | 3/47 [02:12<32:23, 44.17s/it]
|
| 10 |
9%|▊ | 4/47 [02:56<31:36, 44.10s/it]
|
| 11 |
11%|█ | 5/47 [03:40<30:49, 44.05s/it]
|
| 12 |
13%|█▎ | 6/47 [04:24<30:04, 44.01s/it]
|
| 13 |
15%|█▍ | 7/47 [05:08<29:20, 44.02s/it]
|
| 14 |
17%|█▋ | 8/47 [05:52<28:36, 44.00s/it]
|
| 15 |
19%|█▉ | 9/47 [06:36<27:53, 44.03s/it]
|
| 16 |
21%|██▏ | 10/47 [07:56<34:01, 55.17s/it]
|
| 17 |
23%|██▎ | 11/47 [09:30<40:09, 66.94s/it]
|
| 18 |
26%|██▌ | 12/47 [11:04<43:46, 75.05s/it]
|
| 19 |
28%|██▊ | 13/47 [12:37<45:43, 80.70s/it]
|
| 20 |
30%|██▉ | 14/47 [14:11<46:33, 84.64s/it]
|
| 21 |
32%|███▏ | 15/47 [15:45<46:36, 87.39s/it]
|
| 22 |
34%|███▍ | 16/47 [17:18<46:06, 89.25s/it]
|
| 23 |
36%|███▌ | 17/47 [18:52<45:16, 90.55s/it]
|
| 24 |
38%|███▊ | 18/47 [20:26<44:13, 91.50s/it]
|
| 25 |
40%|████ | 19/47 [21:59<43:00, 92.17s/it]
|
| 26 |
43%|████▎ | 20/47 [23:33<41:41, 92.64s/it]
|
| 27 |
45%|████▍ | 21/47 [25:06<40:14, 92.87s/it]
|
| 28 |
47%|████▋ | 22/47 [26:40<38:47, 93.10s/it]
|
| 29 |
49%|████▉ | 23/47 [28:14<37:17, 93.21s/it]
|
| 30 |
51%|█████ | 24/47 [29:47<35:47, 93.37s/it]
|
| 31 |
53%|█████▎ | 25/47 [31:21<34:16, 93.48s/it]
|
| 32 |
55%|█████▌ | 26/47 [32:55<32:44, 93.55s/it]
|
| 33 |
57%|█████▋ | 27/47 [34:28<31:11, 93.56s/it]
|
| 34 |
60%|█████▉ | 28/47 [36:02<29:37, 93.56s/it]
|
| 35 |
62%|██████▏ | 29/47 [37:36<28:04, 93.60s/it]
|
Rectified_Noise/VP-Disp/W_No.log
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
0%| | 0/47 [00:00<?, ?it/s]
|
| 1 |
2%|▏ | 1/47 [01:21<1:02:31, 81.55s/it]
|
| 2 |
4%|▍ | 2/47 [02:41<1:00:38, 80.86s/it]
|
| 3 |
6%|▋ | 3/47 [04:02<59:03, 80.54s/it]
|
| 4 |
9%|▊ | 4/47 [05:21<57:29, 80.22s/it]
|
| 5 |
11%|█ | 5/47 [06:41<56:08, 80.19s/it]
|
| 6 |
13%|█▎ | 6/47 [08:46<1:05:05, 95.25s/it]
|
| 7 |
15%|█▍ | 7/47 [10:56<1:11:01, 106.54s/it]
|
| 8 |
17%|█▋ | 8/47 [13:05<1:14:00, 113.86s/it]
|
| 9 |
19%|█▉ | 9/47 [15:15<1:15:10, 118.69s/it]
|
| 10 |
21%|██▏ | 10/47 [17:24<1:15:14, 122.00s/it]
|
| 11 |
23%|██▎ | 11/47 [19:33<1:14:34, 124.29s/it]
|
| 12 |
26%|██▌ | 12/47 [21:43<1:13:25, 125.86s/it]
|
| 13 |
28%|██▊ | 13/47 [23:52<1:11:56, 126.94s/it]
|
| 14 |
30%|██▉ | 14/47 [25:56<1:09:12, 125.85s/it]
|
| 15 |
32%|███▏ | 15/47 [28:06<1:07:46, 127.09s/it]
|
| 16 |
34%|███▍ | 16/47 [30:16<1:06:06, 127.96s/it]
|
| 17 |
36%|███▌ | 17/47 [32:26<1:04:17, 128.58s/it]
|
| 18 |
38%|███▊ | 18/47 [34:36<1:02:21, 129.01s/it]
|
| 19 |
40%|████ | 19/47 [36:45<1:00:14, 129.10s/it]
|
|
|
|
| 1 |
+
[NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
|
| 2 |
+
Starting rank=0, seed=0, world_size=1.
|
| 3 |
+
Saving .png samples at VP_samples/depth-mu-2-threshold-0.0-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04
|
| 4 |
+
Total number of images that will be sampled: 3008
|
| 5 |
+
|
| 6 |
0%| | 0/47 [00:00<?, ?it/s]
|
| 7 |
2%|▏ | 1/47 [01:21<1:02:31, 81.55s/it]
|
| 8 |
4%|▍ | 2/47 [02:41<1:00:38, 80.86s/it]
|
| 9 |
6%|▋ | 3/47 [04:02<59:03, 80.54s/it]
|
| 10 |
9%|▊ | 4/47 [05:21<57:29, 80.22s/it]
|
| 11 |
11%|█ | 5/47 [06:41<56:08, 80.19s/it]
|
| 12 |
13%|█▎ | 6/47 [08:46<1:05:05, 95.25s/it]
|
| 13 |
15%|█▍ | 7/47 [10:56<1:11:01, 106.54s/it]
|
| 14 |
17%|█▋ | 8/47 [13:05<1:14:00, 113.86s/it]
|
| 15 |
19%|█▉ | 9/47 [15:15<1:15:10, 118.69s/it]
|
| 16 |
21%|██▏ | 10/47 [17:24<1:15:14, 122.00s/it]
|
| 17 |
23%|██▎ | 11/47 [19:33<1:14:34, 124.29s/it]
|
| 18 |
26%|██▌ | 12/47 [21:43<1:13:25, 125.86s/it]
|
| 19 |
28%|██▊ | 13/47 [23:52<1:11:56, 126.94s/it]
|
| 20 |
30%|██▉ | 14/47 [25:56<1:09:12, 125.85s/it]
|
| 21 |
32%|███▏ | 15/47 [28:06<1:07:46, 127.09s/it]
|
| 22 |
34%|███▍ | 16/47 [30:16<1:06:06, 127.96s/it]
|
| 23 |
36%|███▌ | 17/47 [32:26<1:04:17, 128.58s/it]
|
| 24 |
38%|███▊ | 18/47 [34:36<1:02:21, 129.01s/it]
|
| 25 |
40%|████ | 19/47 [36:45<1:00:14, 129.10s/it]
|
Rectified_Noise/VP-Disp/W_True_0.15.log
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
0%| | 0/47 [00:00<?, ?it/s]
|
| 1 |
2%|▏ | 1/47 [00:44<34:16, 44.70s/it]
|
| 2 |
4%|▍ | 2/47 [01:28<33:11, 44.25s/it]
|
| 3 |
6%|▋ | 3/47 [02:12<32:22, 44.15s/it]
|
| 4 |
9%|▊ | 4/47 [02:56<31:35, 44.08s/it]
|
| 5 |
11%|█ | 5/47 [03:40<30:48, 44.02s/it]
|
| 6 |
13%|█▎ | 6/47 [04:24<30:04, 44.00s/it]
|
| 7 |
15%|█▍ | 7/47 [05:08<29:20, 44.01s/it]
|
| 8 |
17%|█▋ | 8/47 [05:52<28:35, 43.99s/it]
|
| 9 |
19%|█▉ | 9/47 [06:36<27:53, 44.03s/it]
|
| 10 |
21%|██▏ | 10/47 [07:57<34:09, 55.39s/it]
|
| 11 |
23%|██▎ | 11/47 [09:31<40:20, 67.22s/it]
|
| 12 |
26%|██▌ | 12/47 [11:05<43:56, 75.33s/it]
|
| 13 |
28%|██▊ | 13/47 [12:39<45:53, 80.99s/it]
|
| 14 |
30%|██▉ | 14/47 [14:13<46:42, 84.91s/it]
|
| 15 |
32%|███▏ | 15/47 [15:47<46:43, 87.62s/it]
|
| 16 |
34%|███▍ | 16/47 [17:20<46:12, 89.45s/it]
|
| 17 |
36%|███▌ | 17/47 [18:54<45:21, 90.73s/it]
|
| 18 |
38%|███▊ | 18/47 [20:28<44:18, 91.66s/it]
|
| 19 |
40%|████ | 19/47 [22:02<43:04, 92.30s/it]
|
| 20 |
43%|████▎ | 20/47 [23:36<41:44, 92.77s/it]
|
| 21 |
45%|████▍ | 21/47 [25:09<40:17, 92.99s/it]
|
| 22 |
47%|████▋ | 22/47 [26:43<38:52, 93.29s/it]
|
| 23 |
49%|████▉ | 23/47 [28:17<37:22, 93.44s/it]
|
| 24 |
51%|█████ | 24/47 [29:51<35:51, 93.55s/it]
|
| 25 |
53%|█████▎ | 25/47 [31:25<34:19, 93.62s/it]
|
| 26 |
55%|█████▌ | 26/47 [32:58<32:47, 93.69s/it]
|
| 27 |
57%|█████▋ | 27/47 [34:32<31:13, 93.68s/it]
|
| 28 |
60%|█████▉ | 28/47 [36:06<29:39, 93.65s/it]
|
| 29 |
62%|██████▏ | 29/47 [37:39<28:06, 93.69s/it]
|
|
|
|
| 1 |
+
[NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
|
| 2 |
+
Starting rank=0, seed=0, world_size=1.
|
| 3 |
+
Saving .png samples at VP_samples/depth-mu-2-threshold-0.15-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04
|
| 4 |
+
Total number of images that will be sampled: 3008
|
| 5 |
+
|
| 6 |
0%| | 0/47 [00:00<?, ?it/s]
|
| 7 |
2%|▏ | 1/47 [00:44<34:16, 44.70s/it]
|
| 8 |
4%|▍ | 2/47 [01:28<33:11, 44.25s/it]
|
| 9 |
6%|▋ | 3/47 [02:12<32:22, 44.15s/it]
|
| 10 |
9%|▊ | 4/47 [02:56<31:35, 44.08s/it]
|
| 11 |
11%|█ | 5/47 [03:40<30:48, 44.02s/it]
|
| 12 |
13%|█▎ | 6/47 [04:24<30:04, 44.00s/it]
|
| 13 |
15%|█▍ | 7/47 [05:08<29:20, 44.01s/it]
|
| 14 |
17%|█▋ | 8/47 [05:52<28:35, 43.99s/it]
|
| 15 |
19%|█▉ | 9/47 [06:36<27:53, 44.03s/it]
|
| 16 |
21%|██▏ | 10/47 [07:57<34:09, 55.39s/it]
|
| 17 |
23%|██▎ | 11/47 [09:31<40:20, 67.22s/it]
|
| 18 |
26%|██▌ | 12/47 [11:05<43:56, 75.33s/it]
|
| 19 |
28%|██▊ | 13/47 [12:39<45:53, 80.99s/it]
|
| 20 |
30%|██▉ | 14/47 [14:13<46:42, 84.91s/it]
|
| 21 |
32%|███▏ | 15/47 [15:47<46:43, 87.62s/it]
|
| 22 |
34%|███▍ | 16/47 [17:20<46:12, 89.45s/it]
|
| 23 |
36%|███▌ | 17/47 [18:54<45:21, 90.73s/it]
|
| 24 |
38%|███▊ | 18/47 [20:28<44:18, 91.66s/it]
|
| 25 |
40%|████ | 19/47 [22:02<43:04, 92.30s/it]
|
| 26 |
43%|████▎ | 20/47 [23:36<41:44, 92.77s/it]
|
| 27 |
45%|████▍ | 21/47 [25:09<40:17, 92.99s/it]
|
| 28 |
47%|████▋ | 22/47 [26:43<38:52, 93.29s/it]
|
| 29 |
49%|████▉ | 23/47 [28:17<37:22, 93.44s/it]
|
| 30 |
51%|█████ | 24/47 [29:51<35:51, 93.55s/it]
|
| 31 |
53%|█████▎ | 25/47 [31:25<34:19, 93.62s/it]
|
| 32 |
55%|█████▌ | 26/47 [32:58<32:47, 93.69s/it]
|
| 33 |
57%|█████▋ | 27/47 [34:32<31:13, 93.68s/it]
|
| 34 |
60%|█████▉ | 28/47 [36:06<29:39, 93.65s/it]
|
| 35 |
62%|██████▏ | 29/47 [37:39<28:06, 93.69s/it]
|
Rectified_Noise/VP-Disp/W_True_0.5.log
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
0%| | 0/47 [00:00<?, ?it/s]
|
| 1 |
2%|▏ | 1/47 [00:44<34:26, 44.92s/it]
|
| 2 |
4%|▍ | 2/47 [01:29<33:22, 44.51s/it]
|
| 3 |
6%|▋ | 3/47 [02:13<32:32, 44.38s/it]
|
| 4 |
9%|▊ | 4/47 [02:57<31:45, 44.31s/it]
|
| 5 |
11%|█ | 5/47 [03:41<30:58, 44.25s/it]
|
| 6 |
13%|█▎ | 6/47 [04:25<30:13, 44.24s/it]
|
| 7 |
15%|█▍ | 7/47 [05:10<29:29, 44.24s/it]
|
| 8 |
17%|█▋ | 8/47 [05:54<28:44, 44.22s/it]
|
| 9 |
19%|█▉ | 9/47 [06:38<28:01, 44.26s/it]
|
| 10 |
21%|██▏ | 10/47 [07:57<33:50, 54.88s/it]
|
| 11 |
23%|██▎ | 11/47 [09:31<40:02, 66.75s/it]
|
| 12 |
26%|██▌ | 12/47 [11:04<43:42, 74.94s/it]
|
| 13 |
28%|██▊ | 13/47 [12:38<45:41, 80.64s/it]
|
| 14 |
30%|██▉ | 14/47 [14:12<46:32, 84.61s/it]
|
| 15 |
32%|███▏ | 15/47 [15:46<46:36, 87.39s/it]
|
| 16 |
34%|███▍ | 16/47 [17:19<46:08, 89.30s/it]
|
| 17 |
36%|███▌ | 17/47 [18:53<45:18, 90.61s/it]
|
| 18 |
38%|███▊ | 18/47 [20:27<44:15, 91.58s/it]
|
| 19 |
40%|████ | 19/47 [22:01<43:03, 92.25s/it]
|
| 20 |
43%|████▎ | 20/47 [23:34<41:43, 92.71s/it]
|
| 21 |
45%|████▍ | 21/47 [25:08<40:17, 92.97s/it]
|
| 22 |
47%|████▋ | 22/47 [26:42<38:49, 93.19s/it]
|
| 23 |
49%|████▉ | 23/47 [28:15<37:19, 93.32s/it]
|
| 24 |
51%|█████ | 24/47 [29:49<35:49, 93.48s/it]
|
| 25 |
53%|█████▎ | 25/47 [31:23<34:18, 93.58s/it]
|
| 26 |
55%|█████▌ | 26/47 [32:57<32:46, 93.65s/it]
|
| 27 |
57%|█████▋ | 27/47 [34:30<31:13, 93.67s/it]
|
| 28 |
60%|█████▉ | 28/47 [36:04<29:39, 93.65s/it]
|
| 29 |
62%|██████▏ | 29/47 [37:38<28:06, 93.69s/it]
|
|
|
|
| 1 |
+
[NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
|
| 2 |
+
Starting rank=0, seed=0, world_size=1.
|
| 3 |
+
Saving .png samples at VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04
|
| 4 |
+
Total number of images that will be sampled: 3008
|
| 5 |
+
|
| 6 |
0%| | 0/47 [00:00<?, ?it/s]
|
| 7 |
2%|▏ | 1/47 [00:44<34:26, 44.92s/it]
|
| 8 |
4%|▍ | 2/47 [01:29<33:22, 44.51s/it]
|
| 9 |
6%|▋ | 3/47 [02:13<32:32, 44.38s/it]
|
| 10 |
9%|▊ | 4/47 [02:57<31:45, 44.31s/it]
|
| 11 |
11%|█ | 5/47 [03:41<30:58, 44.25s/it]
|
| 12 |
13%|█▎ | 6/47 [04:25<30:13, 44.24s/it]
|
| 13 |
15%|█▍ | 7/47 [05:10<29:29, 44.24s/it]
|
| 14 |
17%|█▋ | 8/47 [05:54<28:44, 44.22s/it]
|
| 15 |
19%|█▉ | 9/47 [06:38<28:01, 44.26s/it]
|
| 16 |
21%|██▏ | 10/47 [07:57<33:50, 54.88s/it]
|
| 17 |
23%|██▎ | 11/47 [09:31<40:02, 66.75s/it]
|
| 18 |
26%|██▌ | 12/47 [11:04<43:42, 74.94s/it]
|
| 19 |
28%|██▊ | 13/47 [12:38<45:41, 80.64s/it]
|
| 20 |
30%|██▉ | 14/47 [14:12<46:32, 84.61s/it]
|
| 21 |
32%|███▏ | 15/47 [15:46<46:36, 87.39s/it]
|
| 22 |
34%|███▍ | 16/47 [17:19<46:08, 89.30s/it]
|
| 23 |
36%|███▌ | 17/47 [18:53<45:18, 90.61s/it]
|
| 24 |
38%|███▊ | 18/47 [20:27<44:15, 91.58s/it]
|
| 25 |
40%|████ | 19/47 [22:01<43:03, 92.25s/it]
|
| 26 |
43%|████▎ | 20/47 [23:34<41:43, 92.71s/it]
|
| 27 |
45%|████▍ | 21/47 [25:08<40:17, 92.97s/it]
|
| 28 |
47%|████▋ | 22/47 [26:42<38:49, 93.19s/it]
|
| 29 |
49%|████▉ | 23/47 [28:15<37:19, 93.32s/it]
|
| 30 |
51%|█████ | 24/47 [29:49<35:49, 93.48s/it]
|
| 31 |
53%|█████▎ | 25/47 [31:23<34:18, 93.58s/it]
|
| 32 |
55%|█████▌ | 26/47 [32:57<32:46, 93.65s/it]
|
| 33 |
57%|█████▋ | 27/47 [34:30<31:13, 93.67s/it]
|
| 34 |
60%|█████▉ | 28/47 [36:04<29:39, 93.65s/it]
|
| 35 |
62%|██████▏ | 29/47 [37:38<28:06, 93.69s/it]
|
Rectified_Noise/VP-Disp/download.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This source code is licensed under the license found in the
|
| 2 |
+
# LICENSE file in the root directory of this source tree.
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Functions for downloading pre-trained SiT models
|
| 6 |
+
"""
|
| 7 |
+
from torchvision.datasets.utils import download_url
|
| 8 |
+
import torch
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
pretrained_models = {'SiT-XL-2-256x256.pt'}
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def find_model(model_name):
|
| 16 |
+
"""
|
| 17 |
+
Finds a pre-trained SiT model, downloading it if necessary. Alternatively, loads a model from a local path.
|
| 18 |
+
"""
|
| 19 |
+
if model_name in pretrained_models:
|
| 20 |
+
return download_model(model_name)
|
| 21 |
+
else:
|
| 22 |
+
assert os.path.isfile(model_name), f'Could not find SiT checkpoint at {model_name}'
|
| 23 |
+
checkpoint = torch.load(model_name, map_location=lambda storage, loc: storage, weights_only=False)
|
| 24 |
+
if "ema" in checkpoint: # supports checkpoints from train.py
|
| 25 |
+
checkpoint = checkpoint["ema"]
|
| 26 |
+
return checkpoint
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def download_model(model_name):
|
| 30 |
+
"""
|
| 31 |
+
Downloads a pre-trained SiT model from the web.
|
| 32 |
+
"""
|
| 33 |
+
assert model_name in pretrained_models
|
| 34 |
+
local_path = f'pretrained_models/{model_name}'
|
| 35 |
+
if not os.path.isfile(local_path):
|
| 36 |
+
os.makedirs('pretrained_models', exist_ok=True)
|
| 37 |
+
web_path = f'https://www.dl.dropboxusercontent.com/scl/fi/as9oeomcbub47de5g4be0/SiT-XL-2-256.pt?rlkey=uxzxmpicu46coq3msb17b9ofa&dl=0'
|
| 38 |
+
download_url(web_path, 'pretrained_models', filename=model_name)
|
| 39 |
+
model = torch.load(local_path, map_location=lambda storage, loc: storage, weights_only=False)
|
| 40 |
+
return model
|
| 41 |
+
|
Rectified_Noise/VP-Disp/environment.yml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: RN
|
| 2 |
+
channels:
|
| 3 |
+
- pytorch
|
| 4 |
+
- nvidia
|
| 5 |
+
dependencies:
|
| 6 |
+
- python >= 3.8
|
| 7 |
+
- pytorch >= 1.13
|
| 8 |
+
- torchvision
|
| 9 |
+
- pytorch-cuda >=11.7
|
| 10 |
+
- pip
|
| 11 |
+
- pip:
|
| 12 |
+
- timm
|
| 13 |
+
- diffusers
|
| 14 |
+
- accelerate
|
| 15 |
+
- torchdiffeq
|
| 16 |
+
- wandb
|
Rectified_Noise/VP-Disp/evaluate_samples.sh
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Execute all evaluation tasks in parallel
|
| 4 |
+
# Each command runs in the background using &
|
| 5 |
+
|
| 6 |
+
echo "Starting all evaluation tasks in parallel..."
|
| 7 |
+
|
| 8 |
+
# Reference batch path
|
| 9 |
+
REF_BATCH="/gemini/space/zhaozy/zhy/dataset/VIRTUAL_imagenet256_labeled.npz"
|
| 10 |
+
|
| 11 |
+
# Base directory for sample files
|
| 12 |
+
SAMPLE_DIR="/gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise/last_samples_depth_2_gvp_0.5"
|
| 13 |
+
|
| 14 |
+
# Change to the project root directory
|
| 15 |
+
cd /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching
|
| 16 |
+
|
| 17 |
+
# Evaluate threshold 0.0 on GPU 0
|
| 18 |
+
CUDA_VISIBLE_DEVICES=0 nohup python evaluator.py \
|
| 19 |
+
--ref_batch ${REF_BATCH} \
|
| 20 |
+
--sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.0-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
|
| 21 |
+
> eval_threshold_0.0.log 2>&1 &
|
| 22 |
+
|
| 23 |
+
# Evaluate threshold 0.15 on GPU 1
|
| 24 |
+
CUDA_VISIBLE_DEVICES=1 nohup python evaluator.py \
|
| 25 |
+
--ref_batch ${REF_BATCH} \
|
| 26 |
+
--sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.15-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
|
| 27 |
+
> eval_threshold_0.15.log 2>&1 &
|
| 28 |
+
|
| 29 |
+
# Evaluate threshold 0.25 on GPU 2
|
| 30 |
+
CUDA_VISIBLE_DEVICES=2 nohup python evaluator.py \
|
| 31 |
+
--ref_batch ${REF_BATCH} \
|
| 32 |
+
--sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.25-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
|
| 33 |
+
> eval_threshold_0.25.log 2>&1 &
|
| 34 |
+
|
| 35 |
+
# Evaluate threshold 0.5 on GPU 3
|
| 36 |
+
CUDA_VISIBLE_DEVICES=3 nohup python evaluator.py \
|
| 37 |
+
--ref_batch ${REF_BATCH} \
|
| 38 |
+
--sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.5-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
|
| 39 |
+
> eval_threshold_0.5.log 2>&1 &
|
| 40 |
+
|
| 41 |
+
# Evaluate threshold 0.75 on GPU 4
|
| 42 |
+
CUDA_VISIBLE_DEVICES=0 nohup python evaluator.py \
|
| 43 |
+
--ref_batch ${REF_BATCH} \
|
| 44 |
+
--sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.75-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
|
| 45 |
+
> eval_threshold_0.75.log 2>&1 &
|
| 46 |
+
|
| 47 |
+
# Evaluate threshold 1.0 on GPU 5
|
| 48 |
+
CUDA_VISIBLE_DEVICES=1 nohup python evaluator.py \
|
| 49 |
+
--ref_batch ${REF_BATCH} \
|
| 50 |
+
--sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-1.0-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
|
| 51 |
+
> eval_threshold_1.0.log 2>&1 &
|
| 52 |
+
|
| 53 |
+
# Wait for all background jobs to complete
|
| 54 |
+
echo "All evaluation tasks started. Waiting for completion..."
|
| 55 |
+
wait
|
| 56 |
+
|
| 57 |
+
echo "All evaluation tasks completed!"
|
| 58 |
+
echo ""
|
| 59 |
+
echo "Results saved in:"
|
| 60 |
+
echo " - eval_threshold_0.0.log"
|
| 61 |
+
echo " - eval_threshold_0.15.log"
|
| 62 |
+
echo " - eval_threshold_0.25.log"
|
| 63 |
+
echo " - eval_threshold_0.5.log"
|
| 64 |
+
echo " - eval_threshold_0.75.log"
|
| 65 |
+
echo " - eval_threshold_1.0.log"
|
Rectified_Noise/VP-Disp/evaluator.py
ADDED
|
@@ -0,0 +1,689 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import io
|
| 3 |
+
import os
|
| 4 |
+
import random
|
| 5 |
+
import warnings
|
| 6 |
+
import zipfile
|
| 7 |
+
from abc import ABC, abstractmethod
|
| 8 |
+
from contextlib import contextmanager
|
| 9 |
+
from functools import partial
|
| 10 |
+
from multiprocessing import cpu_count
|
| 11 |
+
from multiprocessing.pool import ThreadPool
|
| 12 |
+
from typing import Iterable, Optional, Tuple, Union
|
| 13 |
+
|
| 14 |
+
import numpy as np
|
| 15 |
+
import requests
|
| 16 |
+
import tensorflow.compat.v1 as tf
|
| 17 |
+
from scipy import linalg
|
| 18 |
+
from tqdm.auto import tqdm
|
| 19 |
+
from datetime import timedelta
|
| 20 |
+
import torch
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
INCEPTION_V3_URL = "https://openaipublic.blob.core.windows.net/diffusion/jul-2021/ref_batches/classify_image_graph_def.pb"
|
| 25 |
+
INCEPTION_V3_PATH = "classify_image_graph_def.pb"
|
| 26 |
+
|
| 27 |
+
FID_POOL_NAME = "pool_3:0"
|
| 28 |
+
FID_SPATIAL_NAME = "mixed_6/conv:0"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def main():
|
| 32 |
+
parser = argparse.ArgumentParser()
|
| 33 |
+
parser.add_argument("--ref_batch", default='/gemini/space/zhaozy/zhy/dataset/VIRTUAL_imagenet256_labeled.npz',help="path to reference batch npz file")
|
| 34 |
+
parser.add_argument("--sample_batch", default='/gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise/last_samples_depth_2/depth-mu-28-0050000-2000000-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz', help="path to sample batch npz file")
|
| 35 |
+
args = parser.parse_args()
|
| 36 |
+
|
| 37 |
+
config = tf.ConfigProto(
|
| 38 |
+
allow_soft_placement=True # allows DecodeJpeg to run on CPU in Inception graph
|
| 39 |
+
)
|
| 40 |
+
config.gpu_options.allow_growth = True
|
| 41 |
+
evaluator = Evaluator(tf.Session(config=config))
|
| 42 |
+
|
| 43 |
+
print("warming up TensorFlow...")
|
| 44 |
+
# This will cause TF to print a bunch of verbose stuff now rather
|
| 45 |
+
# than after the next print(), to help prevent confusion.
|
| 46 |
+
evaluator.warmup()
|
| 47 |
+
|
| 48 |
+
print("computing reference batch activations...")
|
| 49 |
+
ref_acts = evaluator.read_activations(args.ref_batch)
|
| 50 |
+
print("computing/reading reference batch statistics...")
|
| 51 |
+
ref_stats, ref_stats_spatial = evaluator.read_statistics(args.ref_batch, ref_acts)
|
| 52 |
+
|
| 53 |
+
print("computing sample batch activations...")
|
| 54 |
+
sample_acts = evaluator.read_activations(args.sample_batch)
|
| 55 |
+
print("computing/reading sample batch statistics...")
|
| 56 |
+
sample_stats, sample_stats_spatial = evaluator.read_statistics(args.sample_batch, sample_acts)
|
| 57 |
+
|
| 58 |
+
print("Computing evaluations...")
|
| 59 |
+
#print("Inception Score:", evaluator.compute_inception_score(sample_acts[0]))
|
| 60 |
+
print("FID:", sample_stats.frechet_distance(ref_stats))
|
| 61 |
+
#print("sFID:", sample_stats_spatial.frechet_distance(ref_stats_spatial))
|
| 62 |
+
#prec, recall = evaluator.compute_prec_recall(ref_acts[0], sample_acts[0])
|
| 63 |
+
#print("Precision:", prec)
|
| 64 |
+
#print("Recall:", recall)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class InvalidFIDException(Exception):
|
| 68 |
+
pass
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class FIDStatistics:
|
| 72 |
+
def __init__(self, mu: np.ndarray, sigma: np.ndarray):
|
| 73 |
+
self.mu = mu
|
| 74 |
+
self.sigma = sigma
|
| 75 |
+
|
| 76 |
+
def frechet_distance(self, other, eps=1e-6):
|
| 77 |
+
"""
|
| 78 |
+
Compute the Frechet distance between two sets of statistics.
|
| 79 |
+
"""
|
| 80 |
+
# https://github.com/bioinf-jku/TTUR/blob/73ab375cdf952a12686d9aa7978567771084da42/fid.py#L132
|
| 81 |
+
mu1, sigma1 = self.mu, self.sigma
|
| 82 |
+
mu2, sigma2 = other.mu, other.sigma
|
| 83 |
+
|
| 84 |
+
mu1 = np.atleast_1d(mu1)
|
| 85 |
+
mu2 = np.atleast_1d(mu2)
|
| 86 |
+
|
| 87 |
+
sigma1 = np.atleast_2d(sigma1)
|
| 88 |
+
sigma2 = np.atleast_2d(sigma2)
|
| 89 |
+
|
| 90 |
+
assert (
|
| 91 |
+
mu1.shape == mu2.shape
|
| 92 |
+
), f"Training and test mean vectors have different lengths: {mu1.shape}, {mu2.shape}"
|
| 93 |
+
assert (
|
| 94 |
+
sigma1.shape == sigma2.shape
|
| 95 |
+
), f"Training and test covariances have different dimensions: {sigma1.shape}, {sigma2.shape}"
|
| 96 |
+
|
| 97 |
+
diff = mu1 - mu2
|
| 98 |
+
|
| 99 |
+
# product might be almost singular
|
| 100 |
+
covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
|
| 101 |
+
if not np.isfinite(covmean).all():
|
| 102 |
+
msg = (
|
| 103 |
+
"fid calculation produces singular product; adding %s to diagonal of cov estimates"
|
| 104 |
+
% eps
|
| 105 |
+
)
|
| 106 |
+
warnings.warn(msg)
|
| 107 |
+
offset = np.eye(sigma1.shape[0]) * eps
|
| 108 |
+
covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
|
| 109 |
+
|
| 110 |
+
# numerical error might give slight imaginary component
|
| 111 |
+
#虚部报错部分
|
| 112 |
+
if np.iscomplexobj(covmean):
|
| 113 |
+
if not np.allclose(np.diagonal(covmean).imag, 0, atol=1):
|
| 114 |
+
m = np.max(np.abs(covmean.imag))
|
| 115 |
+
print(f"Real component: {covmean.real}")
|
| 116 |
+
raise ValueError("Imaginary component {}".format(m))
|
| 117 |
+
covmean = covmean.real
|
| 118 |
+
|
| 119 |
+
tr_covmean = np.trace(covmean)
|
| 120 |
+
|
| 121 |
+
return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
class Evaluator:
|
| 125 |
+
def __init__(
|
| 126 |
+
self,
|
| 127 |
+
session,
|
| 128 |
+
batch_size=64,
|
| 129 |
+
softmax_batch_size=512,
|
| 130 |
+
):
|
| 131 |
+
self.sess = session
|
| 132 |
+
self.batch_size = batch_size
|
| 133 |
+
self.softmax_batch_size = softmax_batch_size
|
| 134 |
+
self.manifold_estimator = ManifoldEstimator(session)
|
| 135 |
+
with self.sess.graph.as_default():
|
| 136 |
+
self.image_input = tf.placeholder(tf.float32, shape=[None, None, None, 3])
|
| 137 |
+
self.softmax_input = tf.placeholder(tf.float32, shape=[None, 2048])
|
| 138 |
+
self.pool_features, self.spatial_features = _create_feature_graph(self.image_input)
|
| 139 |
+
self.softmax = _create_softmax_graph(self.softmax_input)
|
| 140 |
+
|
| 141 |
+
def warmup(self):
|
| 142 |
+
self.compute_activations(np.zeros([1, 8, 64, 64, 3]))
|
| 143 |
+
|
| 144 |
+
def read_activations(self, npz_path: Union[str, np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
|
| 145 |
+
if isinstance(npz_path, str):
|
| 146 |
+
# If npz_path is a string, treat it as a file path and read the .npz file
|
| 147 |
+
with open_npz_array(npz_path, "arr_0") as reader:
|
| 148 |
+
return self.compute_activations(reader.read_batches(self.batch_size))
|
| 149 |
+
elif isinstance(npz_path, np.ndarray):
|
| 150 |
+
# If npz_path is a numpy array, split it into batches manually
|
| 151 |
+
print("--------line 140-----------")
|
| 152 |
+
batches = np.array_split(npz_path, range(self.batch_size, npz_path.shape[0], self.batch_size))
|
| 153 |
+
print("--------line 143-----------")
|
| 154 |
+
return self.compute_activations(batches)
|
| 155 |
+
else:
|
| 156 |
+
raise ValueError("npz_path must be either a file path (str) or a numpy array (np.ndarray)")
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def compute_activations(self, batches: Iterable[np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
|
| 160 |
+
"""
|
| 161 |
+
Compute image features for downstream evals.
|
| 162 |
+
|
| 163 |
+
:param batches: a iterator over NHWC numpy arrays in [0, 255].
|
| 164 |
+
:return: a tuple of numpy arrays of shape [N x X], where X is a feature
|
| 165 |
+
dimension. The tuple is (pool_3, spatial).
|
| 166 |
+
"""
|
| 167 |
+
preds = []
|
| 168 |
+
spatial_preds = []
|
| 169 |
+
for batch in tqdm(batches):
|
| 170 |
+
# print("--------line 164-----------")
|
| 171 |
+
|
| 172 |
+
# # 识别当前进程信息
|
| 173 |
+
# if 'RANK' in os.environ:
|
| 174 |
+
# rank = int(os.environ['RANK'])
|
| 175 |
+
# local_rank = int(os.environ.get('LOCAL_RANK', rank % torch.cuda.device_count()))
|
| 176 |
+
# print(f"Distributed training - Global Rank: {rank}, Local Rank: {local_rank}")
|
| 177 |
+
# print(f"Current GPU device: {torch.cuda.current_device()}" if torch.cuda.is_available() else "No CUDA")
|
| 178 |
+
# else:
|
| 179 |
+
# print("Single process mode")
|
| 180 |
+
|
| 181 |
+
# print(f"Process PID: {os.getpid()}")
|
| 182 |
+
|
| 183 |
+
batch = batch.astype(np.float32)
|
| 184 |
+
pred, spatial_pred = self.sess.run(
|
| 185 |
+
[self.pool_features, self.spatial_features], {self.image_input: batch}
|
| 186 |
+
)
|
| 187 |
+
# print("--------line 169-----------")
|
| 188 |
+
preds.append(pred.reshape([pred.shape[0], -1]))
|
| 189 |
+
spatial_preds.append(spatial_pred.reshape([spatial_pred.shape[0], -1]))
|
| 190 |
+
return (
|
| 191 |
+
np.concatenate(preds, axis=0),
|
| 192 |
+
np.concatenate(spatial_preds, axis=0),
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
def read_statistics(
|
| 196 |
+
self, npz_path: Union[str, np.ndarray], activations: Tuple[np.ndarray, np.ndarray]
|
| 197 |
+
) -> Tuple[FIDStatistics, FIDStatistics]:
|
| 198 |
+
if isinstance(npz_path, str):
|
| 199 |
+
obj = np.load(npz_path)
|
| 200 |
+
if "mu" in list(obj.keys()):
|
| 201 |
+
return FIDStatistics(obj["mu"], obj["sigma"]), FIDStatistics(
|
| 202 |
+
obj["mu_s"], obj["sigma_s"]
|
| 203 |
+
)
|
| 204 |
+
elif isinstance(npz_path, np.ndarray):
|
| 205 |
+
obj = npz_path
|
| 206 |
+
else:
|
| 207 |
+
raise ValueError("npz_path must be either a file path (str) or a numpy array (np.ndarray)")
|
| 208 |
+
return tuple(self.compute_statistics(x) for x in activations)
|
| 209 |
+
|
| 210 |
+
def compute_statistics(self, activations: np.ndarray) -> FIDStatistics:
|
| 211 |
+
mu = np.mean(activations, axis=0)
|
| 212 |
+
sigma = np.cov(activations, rowvar=False)
|
| 213 |
+
return FIDStatistics(mu, sigma)
|
| 214 |
+
|
| 215 |
+
def compute_inception_score(self, activations: np.ndarray, split_size: int = 5000) -> float:
|
| 216 |
+
softmax_out = []
|
| 217 |
+
for i in range(0, len(activations), self.softmax_batch_size):
|
| 218 |
+
acts = activations[i : i + self.softmax_batch_size]
|
| 219 |
+
softmax_out.append(self.sess.run(self.softmax, feed_dict={self.softmax_input: acts}))
|
| 220 |
+
preds = np.concatenate(softmax_out, axis=0)
|
| 221 |
+
# https://github.com/openai/improved-gan/blob/4f5d1ec5c16a7eceb206f42bfc652693601e1d5c/inception_score/model.py#L46
|
| 222 |
+
scores = []
|
| 223 |
+
for i in range(0, len(preds), split_size):
|
| 224 |
+
part = preds[i : i + split_size]
|
| 225 |
+
kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
|
| 226 |
+
kl = np.mean(np.sum(kl, 1))
|
| 227 |
+
scores.append(np.exp(kl))
|
| 228 |
+
return float(np.mean(scores))
|
| 229 |
+
|
| 230 |
+
def compute_prec_recall(
|
| 231 |
+
self, activations_ref: np.ndarray, activations_sample: np.ndarray
|
| 232 |
+
) -> Tuple[float, float]:
|
| 233 |
+
radii_1 = self.manifold_estimator.manifold_radii(activations_ref)
|
| 234 |
+
radii_2 = self.manifold_estimator.manifold_radii(activations_sample)
|
| 235 |
+
pr = self.manifold_estimator.evaluate_pr(
|
| 236 |
+
activations_ref, radii_1, activations_sample, radii_2
|
| 237 |
+
)
|
| 238 |
+
return (float(pr[0][0]), float(pr[1][0]))
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
class ManifoldEstimator:
|
| 242 |
+
"""
|
| 243 |
+
A helper for comparing manifolds of feature vectors.
|
| 244 |
+
|
| 245 |
+
Adapted from https://github.com/kynkaat/improved-precision-and-recall-metric/blob/f60f25e5ad933a79135c783fcda53de30f42c9b9/precision_recall.py#L57
|
| 246 |
+
"""
|
| 247 |
+
|
| 248 |
+
def __init__(
|
| 249 |
+
self,
|
| 250 |
+
session,
|
| 251 |
+
row_batch_size=10000,
|
| 252 |
+
col_batch_size=10000,
|
| 253 |
+
nhood_sizes=(3,),
|
| 254 |
+
clamp_to_percentile=None,
|
| 255 |
+
eps=1e-5,
|
| 256 |
+
):
|
| 257 |
+
"""
|
| 258 |
+
Estimate the manifold of given feature vectors.
|
| 259 |
+
|
| 260 |
+
:param session: the TensorFlow session.
|
| 261 |
+
:param row_batch_size: row batch size to compute pairwise distances
|
| 262 |
+
(parameter to trade-off between memory usage and performance).
|
| 263 |
+
:param col_batch_size: column batch size to compute pairwise distances.
|
| 264 |
+
:param nhood_sizes: number of neighbors used to estimate the manifold.
|
| 265 |
+
:param clamp_to_percentile: prune hyperspheres that have radius larger than
|
| 266 |
+
the given percentile.
|
| 267 |
+
:param eps: small number for numerical stability.
|
| 268 |
+
"""
|
| 269 |
+
self.distance_block = DistanceBlock(session)
|
| 270 |
+
self.row_batch_size = row_batch_size
|
| 271 |
+
self.col_batch_size = col_batch_size
|
| 272 |
+
self.nhood_sizes = nhood_sizes
|
| 273 |
+
self.num_nhoods = len(nhood_sizes)
|
| 274 |
+
self.clamp_to_percentile = clamp_to_percentile
|
| 275 |
+
self.eps = eps
|
| 276 |
+
|
| 277 |
+
def warmup(self):
|
| 278 |
+
feats, radii = (
|
| 279 |
+
np.zeros([1, 2048], dtype=np.float32),
|
| 280 |
+
np.zeros([1, 1], dtype=np.float32),
|
| 281 |
+
)
|
| 282 |
+
self.evaluate_pr(feats, radii, feats, radii)
|
| 283 |
+
|
| 284 |
+
def manifold_radii(self, features: np.ndarray) -> np.ndarray:
|
| 285 |
+
num_images = len(features)
|
| 286 |
+
|
| 287 |
+
# Estimate manifold of features by calculating distances to k-NN of each sample.
|
| 288 |
+
radii = np.zeros([num_images, self.num_nhoods], dtype=np.float32)
|
| 289 |
+
distance_batch = np.zeros([self.row_batch_size, num_images], dtype=np.float32)
|
| 290 |
+
seq = np.arange(max(self.nhood_sizes) + 1, dtype=np.int32)
|
| 291 |
+
|
| 292 |
+
for begin1 in range(0, num_images, self.row_batch_size):
|
| 293 |
+
end1 = min(begin1 + self.row_batch_size, num_images)
|
| 294 |
+
row_batch = features[begin1:end1]
|
| 295 |
+
|
| 296 |
+
for begin2 in range(0, num_images, self.col_batch_size):
|
| 297 |
+
end2 = min(begin2 + self.col_batch_size, num_images)
|
| 298 |
+
col_batch = features[begin2:end2]
|
| 299 |
+
|
| 300 |
+
# Compute distances between batches.
|
| 301 |
+
distance_batch[
|
| 302 |
+
0 : end1 - begin1, begin2:end2
|
| 303 |
+
] = self.distance_block.pairwise_distances(row_batch, col_batch)
|
| 304 |
+
|
| 305 |
+
# Find the k-nearest neighbor from the current batch.
|
| 306 |
+
radii[begin1:end1, :] = np.concatenate(
|
| 307 |
+
[
|
| 308 |
+
x[:, self.nhood_sizes]
|
| 309 |
+
for x in _numpy_partition(distance_batch[0 : end1 - begin1, :], seq, axis=1)
|
| 310 |
+
],
|
| 311 |
+
axis=0,
|
| 312 |
+
)
|
| 313 |
+
|
| 314 |
+
if self.clamp_to_percentile is not None:
|
| 315 |
+
max_distances = np.percentile(radii, self.clamp_to_percentile, axis=0)
|
| 316 |
+
radii[radii > max_distances] = 0
|
| 317 |
+
return radii
|
| 318 |
+
|
| 319 |
+
def evaluate(self, features: np.ndarray, radii: np.ndarray, eval_features: np.ndarray):
|
| 320 |
+
"""
|
| 321 |
+
Evaluate if new feature vectors are at the manifold.
|
| 322 |
+
"""
|
| 323 |
+
num_eval_images = eval_features.shape[0]
|
| 324 |
+
num_ref_images = radii.shape[0]
|
| 325 |
+
distance_batch = np.zeros([self.row_batch_size, num_ref_images], dtype=np.float32)
|
| 326 |
+
batch_predictions = np.zeros([num_eval_images, self.num_nhoods], dtype=np.int32)
|
| 327 |
+
max_realism_score = np.zeros([num_eval_images], dtype=np.float32)
|
| 328 |
+
nearest_indices = np.zeros([num_eval_images], dtype=np.int32)
|
| 329 |
+
|
| 330 |
+
for begin1 in range(0, num_eval_images, self.row_batch_size):
|
| 331 |
+
end1 = min(begin1 + self.row_batch_size, num_eval_images)
|
| 332 |
+
feature_batch = eval_features[begin1:end1]
|
| 333 |
+
|
| 334 |
+
for begin2 in range(0, num_ref_images, self.col_batch_size):
|
| 335 |
+
end2 = min(begin2 + self.col_batch_size, num_ref_images)
|
| 336 |
+
ref_batch = features[begin2:end2]
|
| 337 |
+
|
| 338 |
+
distance_batch[
|
| 339 |
+
0 : end1 - begin1, begin2:end2
|
| 340 |
+
] = self.distance_block.pairwise_distances(feature_batch, ref_batch)
|
| 341 |
+
|
| 342 |
+
# From the minibatch of new feature vectors, determine if they are in the estimated manifold.
|
| 343 |
+
# If a feature vector is inside a hypersphere of some reference sample, then
|
| 344 |
+
# the new sample lies at the estimated manifold.
|
| 345 |
+
# The radii of the hyperspheres are determined from distances of neighborhood size k.
|
| 346 |
+
samples_in_manifold = distance_batch[0 : end1 - begin1, :, None] <= radii
|
| 347 |
+
batch_predictions[begin1:end1] = np.any(samples_in_manifold, axis=1).astype(np.int32)
|
| 348 |
+
|
| 349 |
+
max_realism_score[begin1:end1] = np.max(
|
| 350 |
+
radii[:, 0] / (distance_batch[0 : end1 - begin1, :] + self.eps), axis=1
|
| 351 |
+
)
|
| 352 |
+
nearest_indices[begin1:end1] = np.argmin(distance_batch[0 : end1 - begin1, :], axis=1)
|
| 353 |
+
|
| 354 |
+
return {
|
| 355 |
+
"fraction": float(np.mean(batch_predictions)),
|
| 356 |
+
"batch_predictions": batch_predictions,
|
| 357 |
+
"max_realisim_score": max_realism_score,
|
| 358 |
+
"nearest_indices": nearest_indices,
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
def evaluate_pr(
|
| 362 |
+
self,
|
| 363 |
+
features_1: np.ndarray,
|
| 364 |
+
radii_1: np.ndarray,
|
| 365 |
+
features_2: np.ndarray,
|
| 366 |
+
radii_2: np.ndarray,
|
| 367 |
+
) -> Tuple[np.ndarray, np.ndarray]:
|
| 368 |
+
"""
|
| 369 |
+
Evaluate precision and recall efficiently.
|
| 370 |
+
|
| 371 |
+
:param features_1: [N1 x D] feature vectors for reference batch.
|
| 372 |
+
:param radii_1: [N1 x K1] radii for reference vectors.
|
| 373 |
+
:param features_2: [N2 x D] feature vectors for the other batch.
|
| 374 |
+
:param radii_2: [N x K2] radii for other vectors.
|
| 375 |
+
:return: a tuple of arrays for (precision, recall):
|
| 376 |
+
- precision: an np.ndarray of length K1
|
| 377 |
+
- recall: an np.ndarray of length K2
|
| 378 |
+
"""
|
| 379 |
+
features_1_status = np.zeros([len(features_1), radii_2.shape[1]], dtype=np.bool)
|
| 380 |
+
features_2_status = np.zeros([len(features_2), radii_1.shape[1]], dtype=np.bool)
|
| 381 |
+
for begin_1 in range(0, len(features_1), self.row_batch_size):
|
| 382 |
+
end_1 = begin_1 + self.row_batch_size
|
| 383 |
+
batch_1 = features_1[begin_1:end_1]
|
| 384 |
+
for begin_2 in range(0, len(features_2), self.col_batch_size):
|
| 385 |
+
end_2 = begin_2 + self.col_batch_size
|
| 386 |
+
batch_2 = features_2[begin_2:end_2]
|
| 387 |
+
batch_1_in, batch_2_in = self.distance_block.less_thans(
|
| 388 |
+
batch_1, radii_1[begin_1:end_1], batch_2, radii_2[begin_2:end_2]
|
| 389 |
+
)
|
| 390 |
+
features_1_status[begin_1:end_1] |= batch_1_in
|
| 391 |
+
features_2_status[begin_2:end_2] |= batch_2_in
|
| 392 |
+
return (
|
| 393 |
+
np.mean(features_2_status.astype(np.float64), axis=0),
|
| 394 |
+
np.mean(features_1_status.astype(np.float64), axis=0),
|
| 395 |
+
)
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
class DistanceBlock:
|
| 399 |
+
"""
|
| 400 |
+
Calculate pairwise distances between vectors.
|
| 401 |
+
|
| 402 |
+
Adapted from https://github.com/kynkaat/improved-precision-and-recall-metric/blob/f60f25e5ad933a79135c783fcda53de30f42c9b9/precision_recall.py#L34
|
| 403 |
+
"""
|
| 404 |
+
|
| 405 |
+
def __init__(self, session):
|
| 406 |
+
self.session = session
|
| 407 |
+
|
| 408 |
+
# Initialize TF graph to calculate pairwise distances.
|
| 409 |
+
with session.graph.as_default():
|
| 410 |
+
self._features_batch1 = tf.placeholder(tf.float32, shape=[None, None])
|
| 411 |
+
self._features_batch2 = tf.placeholder(tf.float32, shape=[None, None])
|
| 412 |
+
distance_block_16 = _batch_pairwise_distances(
|
| 413 |
+
tf.cast(self._features_batch1, tf.float16),
|
| 414 |
+
tf.cast(self._features_batch2, tf.float16),
|
| 415 |
+
)
|
| 416 |
+
self.distance_block = tf.cond(
|
| 417 |
+
tf.reduce_all(tf.math.is_finite(distance_block_16)),
|
| 418 |
+
lambda: tf.cast(distance_block_16, tf.float32),
|
| 419 |
+
lambda: _batch_pairwise_distances(self._features_batch1, self._features_batch2),
|
| 420 |
+
)
|
| 421 |
+
|
| 422 |
+
# Extra logic for less thans.
|
| 423 |
+
self._radii1 = tf.placeholder(tf.float32, shape=[None, None])
|
| 424 |
+
self._radii2 = tf.placeholder(tf.float32, shape=[None, None])
|
| 425 |
+
dist32 = tf.cast(self.distance_block, tf.float32)[..., None]
|
| 426 |
+
self._batch_1_in = tf.math.reduce_any(dist32 <= self._radii2, axis=1)
|
| 427 |
+
self._batch_2_in = tf.math.reduce_any(dist32 <= self._radii1[:, None], axis=0)
|
| 428 |
+
|
| 429 |
+
def pairwise_distances(self, U, V):
|
| 430 |
+
"""
|
| 431 |
+
Evaluate pairwise distances between two batches of feature vectors.
|
| 432 |
+
"""
|
| 433 |
+
return self.session.run(
|
| 434 |
+
self.distance_block,
|
| 435 |
+
feed_dict={self._features_batch1: U, self._features_batch2: V},
|
| 436 |
+
)
|
| 437 |
+
|
| 438 |
+
def less_thans(self, batch_1, radii_1, batch_2, radii_2):
|
| 439 |
+
return self.session.run(
|
| 440 |
+
[self._batch_1_in, self._batch_2_in],
|
| 441 |
+
feed_dict={
|
| 442 |
+
self._features_batch1: batch_1,
|
| 443 |
+
self._features_batch2: batch_2,
|
| 444 |
+
self._radii1: radii_1,
|
| 445 |
+
self._radii2: radii_2,
|
| 446 |
+
},
|
| 447 |
+
)
|
| 448 |
+
|
| 449 |
+
|
| 450 |
+
def _batch_pairwise_distances(U, V):
|
| 451 |
+
"""
|
| 452 |
+
Compute pairwise distances between two batches of feature vectors.
|
| 453 |
+
"""
|
| 454 |
+
with tf.variable_scope("pairwise_dist_block"):
|
| 455 |
+
# Squared norms of each row in U and V.
|
| 456 |
+
norm_u = tf.reduce_sum(tf.square(U), 1)
|
| 457 |
+
norm_v = tf.reduce_sum(tf.square(V), 1)
|
| 458 |
+
|
| 459 |
+
# norm_u as a column and norm_v as a row vectors.
|
| 460 |
+
norm_u = tf.reshape(norm_u, [-1, 1])
|
| 461 |
+
norm_v = tf.reshape(norm_v, [1, -1])
|
| 462 |
+
|
| 463 |
+
# Pairwise squared Euclidean distances.
|
| 464 |
+
D = tf.maximum(norm_u - 2 * tf.matmul(U, V, False, True) + norm_v, 0.0)
|
| 465 |
+
|
| 466 |
+
return D
|
| 467 |
+
|
| 468 |
+
|
| 469 |
+
class NpzArrayReader(ABC):
|
| 470 |
+
@abstractmethod
|
| 471 |
+
def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
|
| 472 |
+
pass
|
| 473 |
+
|
| 474 |
+
@abstractmethod
|
| 475 |
+
def remaining(self) -> int:
|
| 476 |
+
pass
|
| 477 |
+
|
| 478 |
+
def read_batches(self, batch_size: int) -> Iterable[np.ndarray]:
|
| 479 |
+
def gen_fn():
|
| 480 |
+
while True:
|
| 481 |
+
batch = self.read_batch(batch_size)
|
| 482 |
+
if batch is None:
|
| 483 |
+
break
|
| 484 |
+
yield batch
|
| 485 |
+
|
| 486 |
+
rem = self.remaining()
|
| 487 |
+
num_batches = rem // batch_size + int(rem % batch_size != 0)
|
| 488 |
+
return BatchIterator(gen_fn, num_batches)
|
| 489 |
+
|
| 490 |
+
|
| 491 |
+
class BatchIterator:
|
| 492 |
+
def __init__(self, gen_fn, length):
|
| 493 |
+
self.gen_fn = gen_fn
|
| 494 |
+
self.length = length
|
| 495 |
+
|
| 496 |
+
def __len__(self):
|
| 497 |
+
return self.length
|
| 498 |
+
|
| 499 |
+
def __iter__(self):
|
| 500 |
+
return self.gen_fn()
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
class StreamingNpzArrayReader(NpzArrayReader):
|
| 504 |
+
def __init__(self, arr_f, shape, dtype):
|
| 505 |
+
self.arr_f = arr_f
|
| 506 |
+
self.shape = shape
|
| 507 |
+
self.dtype = dtype
|
| 508 |
+
self.idx = 0
|
| 509 |
+
|
| 510 |
+
def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
|
| 511 |
+
if self.idx >= self.shape[0]:
|
| 512 |
+
return None
|
| 513 |
+
|
| 514 |
+
bs = min(batch_size, self.shape[0] - self.idx)
|
| 515 |
+
self.idx += bs
|
| 516 |
+
|
| 517 |
+
if self.dtype.itemsize == 0:
|
| 518 |
+
return np.ndarray([bs, *self.shape[1:]], dtype=self.dtype)
|
| 519 |
+
|
| 520 |
+
read_count = bs * np.prod(self.shape[1:])
|
| 521 |
+
read_size = int(read_count * self.dtype.itemsize)
|
| 522 |
+
data = _read_bytes(self.arr_f, read_size, "array data")
|
| 523 |
+
return np.frombuffer(data, dtype=self.dtype).reshape([bs, *self.shape[1:]])
|
| 524 |
+
|
| 525 |
+
def remaining(self) -> int:
|
| 526 |
+
return max(0, self.shape[0] - self.idx)
|
| 527 |
+
|
| 528 |
+
|
| 529 |
+
class MemoryNpzArrayReader(NpzArrayReader):
|
| 530 |
+
def __init__(self, arr):
|
| 531 |
+
self.arr = arr
|
| 532 |
+
self.idx = 0
|
| 533 |
+
|
| 534 |
+
@classmethod
|
| 535 |
+
def load(cls, path: str, arr_name: str):
|
| 536 |
+
with open(path, "rb") as f:
|
| 537 |
+
arr = np.load(f)[arr_name]
|
| 538 |
+
return cls(arr)
|
| 539 |
+
|
| 540 |
+
def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
|
| 541 |
+
if self.idx >= self.arr.shape[0]:
|
| 542 |
+
return None
|
| 543 |
+
|
| 544 |
+
res = self.arr[self.idx : self.idx + batch_size]
|
| 545 |
+
self.idx += batch_size
|
| 546 |
+
return res
|
| 547 |
+
|
| 548 |
+
def remaining(self) -> int:
|
| 549 |
+
return max(0, self.arr.shape[0] - self.idx)
|
| 550 |
+
|
| 551 |
+
|
| 552 |
+
@contextmanager
|
| 553 |
+
def open_npz_array(path: str, arr_name: str) -> NpzArrayReader:
|
| 554 |
+
with _open_npy_file(path, arr_name) as arr_f:
|
| 555 |
+
version = np.lib.format.read_magic(arr_f)
|
| 556 |
+
if version == (1, 0):
|
| 557 |
+
header = np.lib.format.read_array_header_1_0(arr_f)
|
| 558 |
+
elif version == (2, 0):
|
| 559 |
+
header = np.lib.format.read_array_header_2_0(arr_f)
|
| 560 |
+
else:
|
| 561 |
+
yield MemoryNpzArrayReader.load(path, arr_name)
|
| 562 |
+
return
|
| 563 |
+
shape, fortran, dtype = header
|
| 564 |
+
if fortran or dtype.hasobject:
|
| 565 |
+
yield MemoryNpzArrayReader.load(path, arr_name)
|
| 566 |
+
else:
|
| 567 |
+
yield StreamingNpzArrayReader(arr_f, shape, dtype)
|
| 568 |
+
|
| 569 |
+
|
| 570 |
+
def _read_bytes(fp, size, error_template="ran out of data"):
|
| 571 |
+
"""
|
| 572 |
+
Copied from: https://github.com/numpy/numpy/blob/fb215c76967739268de71aa4bda55dd1b062bc2e/numpy/lib/format.py#L788-L886
|
| 573 |
+
|
| 574 |
+
Read from file-like object until size bytes are read.
|
| 575 |
+
Raises ValueError if not EOF is encountered before size bytes are read.
|
| 576 |
+
Non-blocking objects only supported if they derive from io objects.
|
| 577 |
+
Required as e.g. ZipExtFile in python 2.6 can return less data than
|
| 578 |
+
requested.
|
| 579 |
+
"""
|
| 580 |
+
data = bytes()
|
| 581 |
+
while True:
|
| 582 |
+
# io files (default in python3) return None or raise on
|
| 583 |
+
# would-block, python2 file will truncate, probably nothing can be
|
| 584 |
+
# done about that. note that regular files can't be non-blocking
|
| 585 |
+
try:
|
| 586 |
+
r = fp.read(size - len(data))
|
| 587 |
+
data += r
|
| 588 |
+
if len(r) == 0 or len(data) == size:
|
| 589 |
+
break
|
| 590 |
+
except io.BlockingIOError:
|
| 591 |
+
pass
|
| 592 |
+
if len(data) != size:
|
| 593 |
+
msg = "EOF: reading %s, expected %d bytes got %d"
|
| 594 |
+
raise ValueError(msg % (error_template, size, len(data)))
|
| 595 |
+
else:
|
| 596 |
+
return data
|
| 597 |
+
|
| 598 |
+
|
| 599 |
+
@contextmanager
|
| 600 |
+
def _open_npy_file(path: str, arr_name: str):
|
| 601 |
+
with open(path, "rb") as f:
|
| 602 |
+
with zipfile.ZipFile(f, "r") as zip_f:
|
| 603 |
+
if f"{arr_name}.npy" not in zip_f.namelist():
|
| 604 |
+
raise ValueError(f"missing {arr_name} in npz file")
|
| 605 |
+
with zip_f.open(f"{arr_name}.npy", "r") as arr_f:
|
| 606 |
+
yield arr_f
|
| 607 |
+
|
| 608 |
+
|
| 609 |
+
def _download_inception_model():
|
| 610 |
+
if os.path.exists(INCEPTION_V3_PATH):
|
| 611 |
+
return
|
| 612 |
+
print("downloading InceptionV3 model...")
|
| 613 |
+
with requests.get(INCEPTION_V3_URL, stream=True) as r:
|
| 614 |
+
r.raise_for_status()
|
| 615 |
+
tmp_path = INCEPTION_V3_PATH + ".tmp"
|
| 616 |
+
with open(tmp_path, "wb") as f:
|
| 617 |
+
for chunk in tqdm(r.iter_content(chunk_size=8192)):
|
| 618 |
+
f.write(chunk)
|
| 619 |
+
os.rename(tmp_path, INCEPTION_V3_PATH)
|
| 620 |
+
|
| 621 |
+
|
| 622 |
+
def _create_feature_graph(input_batch):
|
| 623 |
+
_download_inception_model()
|
| 624 |
+
prefix = f"{random.randrange(2**32)}_{random.randrange(2**32)}"
|
| 625 |
+
with open(INCEPTION_V3_PATH, "rb") as f:
|
| 626 |
+
graph_def = tf.GraphDef()
|
| 627 |
+
graph_def.ParseFromString(f.read())
|
| 628 |
+
pool3, spatial = tf.import_graph_def(
|
| 629 |
+
graph_def,
|
| 630 |
+
input_map={f"ExpandDims:0": input_batch},
|
| 631 |
+
return_elements=[FID_POOL_NAME, FID_SPATIAL_NAME],
|
| 632 |
+
name=prefix,
|
| 633 |
+
)
|
| 634 |
+
_update_shapes(pool3)
|
| 635 |
+
spatial = spatial[..., :7]
|
| 636 |
+
return pool3, spatial
|
| 637 |
+
|
| 638 |
+
|
| 639 |
+
def _create_softmax_graph(input_batch):
|
| 640 |
+
_download_inception_model()
|
| 641 |
+
prefix = f"{random.randrange(2**32)}_{random.randrange(2**32)}"
|
| 642 |
+
with open(INCEPTION_V3_PATH, "rb") as f:
|
| 643 |
+
graph_def = tf.GraphDef()
|
| 644 |
+
graph_def.ParseFromString(f.read())
|
| 645 |
+
(matmul,) = tf.import_graph_def(
|
| 646 |
+
graph_def, return_elements=[f"softmax/logits/MatMul"], name=prefix
|
| 647 |
+
)
|
| 648 |
+
w = matmul.inputs[1]
|
| 649 |
+
logits = tf.matmul(input_batch, w)
|
| 650 |
+
return tf.nn.softmax(logits)
|
| 651 |
+
|
| 652 |
+
|
| 653 |
+
def _update_shapes(pool3):
|
| 654 |
+
# https://github.com/bioinf-jku/TTUR/blob/73ab375cdf952a12686d9aa7978567771084da42/fid.py#L50-L63
|
| 655 |
+
ops = pool3.graph.get_operations()
|
| 656 |
+
for op in ops:
|
| 657 |
+
for o in op.outputs:
|
| 658 |
+
shape = o.get_shape()
|
| 659 |
+
if shape._dims is not None: # pylint: disable=protected-access
|
| 660 |
+
# shape = [s.value for s in shape] TF 1.x
|
| 661 |
+
shape = [s for s in shape] # TF 2.x
|
| 662 |
+
new_shape = []
|
| 663 |
+
for j, s in enumerate(shape):
|
| 664 |
+
if s == 1 and j == 0:
|
| 665 |
+
new_shape.append(None)
|
| 666 |
+
else:
|
| 667 |
+
new_shape.append(s)
|
| 668 |
+
o.__dict__["_shape_val"] = tf.TensorShape(new_shape)
|
| 669 |
+
return pool3
|
| 670 |
+
|
| 671 |
+
|
| 672 |
+
def _numpy_partition(arr, kth, **kwargs):
|
| 673 |
+
num_workers = min(cpu_count(), len(arr))
|
| 674 |
+
chunk_size = len(arr) // num_workers
|
| 675 |
+
extra = len(arr) % num_workers
|
| 676 |
+
|
| 677 |
+
start_idx = 0
|
| 678 |
+
batches = []
|
| 679 |
+
for i in range(num_workers):
|
| 680 |
+
size = chunk_size + (1 if i < extra else 0)
|
| 681 |
+
batches.append(arr[start_idx : start_idx + size])
|
| 682 |
+
start_idx += size
|
| 683 |
+
|
| 684 |
+
with ThreadPool(num_workers) as pool:
|
| 685 |
+
return list(pool.map(partial(np.partition, kth=kth, **kwargs), batches))
|
| 686 |
+
|
| 687 |
+
|
| 688 |
+
if __name__ == "__main__":
|
| 689 |
+
main()
|