xiangzai commited on
Commit
0389e9b
·
verified ·
1 Parent(s): d31b843

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Rectified_Noise/GVP-Disp/README.md +92 -0
  2. Rectified_Noise/GVP-Disp/W_False.log +5 -0
  3. Rectified_Noise/GVP-Disp/evaluate_samples.sh +65 -0
  4. Rectified_Noise/GVP-Disp/evaluator.py +689 -0
  5. Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-000-SiT-XL-2-GVP-velocity-None/log.txt +11 -0
  6. Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-001-SiT-XL-2-GVP-velocity-None/log.txt +1 -0
  7. Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/log.txt +500 -0
  8. Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-003-SiT-XL-2-GVP-velocity-None/log.txt +6 -0
  9. Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None/log.txt +863 -0
  10. Rectified_Noise/GVP-Disp/run.sh +14 -0
  11. Rectified_Noise/GVP-Disp/test.sh +78 -0
  12. Rectified_Noise/GVP-Disp/train_rectified_noise.py +429 -0
  13. Rectified_Noise/GVP-Disp/transport/__init__.py +71 -0
  14. Rectified_Noise/GVP-Disp/transport/__pycache__/__init__.cpython-312.pyc +0 -0
  15. Rectified_Noise/GVP-Disp/transport/__pycache__/__init__.cpython-38.pyc +0 -0
  16. Rectified_Noise/GVP-Disp/transport/__pycache__/integrators.cpython-312.pyc +0 -0
  17. Rectified_Noise/GVP-Disp/transport/__pycache__/integrators.cpython-38.pyc +0 -0
  18. Rectified_Noise/GVP-Disp/transport/__pycache__/path.cpython-312.pyc +0 -0
  19. Rectified_Noise/GVP-Disp/transport/__pycache__/path.cpython-38.pyc +0 -0
  20. Rectified_Noise/GVP-Disp/transport/__pycache__/transport.cpython-312.pyc +0 -0
  21. Rectified_Noise/GVP-Disp/transport/__pycache__/transport.cpython-38.pyc +0 -0
  22. Rectified_Noise/GVP-Disp/transport/__pycache__/utils.cpython-312.pyc +0 -0
  23. Rectified_Noise/GVP-Disp/transport/__pycache__/utils.cpython-38.pyc +0 -0
  24. Rectified_Noise/GVP-Disp/transport/integrators.py +117 -0
  25. Rectified_Noise/GVP-Disp/transport/path.py +192 -0
  26. Rectified_Noise/GVP-Disp/transport/transport.py +501 -0
  27. Rectified_Noise/GVP-Disp/transport/utils.py +29 -0
  28. Rectified_Noise/GVP-Disp/w_training1.log +927 -0
  29. Rectified_Noise/VP-Disp/README.md +92 -0
  30. Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000059.png +0 -0
  31. Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000169.png +0 -0
  32. Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000286.png +0 -0
  33. Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000545.png +0 -0
  34. Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000606.png +0 -0
  35. Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000769.png +0 -0
  36. Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001050.png +0 -0
  37. Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001099.png +0 -0
  38. Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001346.png +0 -0
  39. Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001475.png +0 -0
  40. Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001518.png +0 -0
  41. Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001644.png +0 -0
  42. Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001741.png +0 -0
  43. Rectified_Noise/VP-Disp/W_False.log +5 -0
  44. Rectified_Noise/VP-Disp/W_No.log +5 -0
  45. Rectified_Noise/VP-Disp/W_True_0.15.log +5 -0
  46. Rectified_Noise/VP-Disp/W_True_0.5.log +5 -0
  47. Rectified_Noise/VP-Disp/download.py +41 -0
  48. Rectified_Noise/VP-Disp/environment.yml +16 -0
  49. Rectified_Noise/VP-Disp/evaluate_samples.sh +65 -0
  50. Rectified_Noise/VP-Disp/evaluator.py +689 -0
Rectified_Noise/GVP-Disp/README.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # [AAAI 2026] Rectified Noise: A Generative Model Using Positive-incentive Noise
2
+
3
+ ![Visualization of the $\pi$-noise by $\Delta$RN.](assests/visual.png)
4
+
5
+ <br>
6
+ <a href="https://arxiv.org/pdf/2511.07911"><img src="https://img.shields.io/static/v1?label=Paper&message=2511.07911&color=red&logo=arxiv"></a>
7
+ <a href="https://huggingface.co/xiangzai/recitified_noise"><img src="https://img.shields.io/badge/🤗_HuggingFace-Model-ffbd45.svg" alt="HuggingFace"></a>
8
+
9
+ ## Introduction
10
+ This is a [Pytorch](https://pytorch.org) implementation of **Rectified Noise**, a generative model using positive-incentive noise to enhance model's sampling.
11
+
12
+ ![Overview of Laytrol](assests/pipeline.png)
13
+
14
+ ## Setup
15
+
16
+ We provide an `environment.yml` file that can be used to create a Conda environment.
17
+
18
+ ```bash
19
+ conda env create -f environment.yml
20
+ conda activate RN
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ ### Training
26
+ 1. We provide a training script for RN in `train_rectified_noise.py`
27
+
28
+ Run:
29
+
30
+ ```bash
31
+ torchrun --nnodes=1 --nproc_per_node=4 train_rectified_noise.py \
32
+ --data-path /path/to/data \
33
+ --num-classes 3 \
34
+ --path-type Linear \
35
+ --prediction velocity \
36
+ --ckpt /path/to/pretrained_model \
37
+ --model SiT-B/2
38
+ --learn-mu True \
39
+ --depth 1 \
40
+ ```
41
+
42
+ You can find relevant checkpoint files from the previous Hugging Face link.
43
+
44
+ 2. Parameters:
45
+
46
+ | Argument | Type | Default | Description |
47
+ |----------|------|---------|-------------|
48
+ | `--data-path ` | str | `-` | Path to the dataset. |
49
+ | `--num-classes` | int | `-` | Number of classes. |
50
+ | `--path-type` | str | `Linear` | Directory to save the generated images. |
51
+ | `--prediction` | str | `velocity` | Output type of network. |
52
+ | `--ckpt` | str | `-` | Path to pretrained model checkpoint. |
53
+ | `--model` | str | `SiT-B/2` | Model type, any option from the model list. |
54
+ | `--learn-mu` | bool | `True` | Whether to learn the mu parameter. |
55
+ | `--depth` | int | `1` | Depth parameter for the SiTF2 model(Extra SiT Block). |
56
+
57
+ **Sampling**
58
+
59
+ 1. Using the trained RN model to enhance the pre-trained model
60
+
61
+ ```bash
62
+ torchrun --nnodes=1 --nproc_per_node=4 train_rectified_noise.py \
63
+ --path-type Linear \
64
+ --prediction velocity \
65
+ --ckpt /path/to/pretrained_model \
66
+ --sitf2-ckpt /path/to/pretrained_RN \
67
+ --model SiT-B/2
68
+ --learn-mu True \
69
+ --depth 1 \
70
+ ```
71
+
72
+ ## Ackownledgement
73
+ This repo benefits from [SiT](https://github.com/willisma/SiT). Thanks for their excellent works.
74
+
75
+ ## Contact
76
+ If you have any question about this project, please contact mguzhenyu@outlook.com.
77
+
78
+ ## Citation
79
+
80
+ If you find the code useful for your research, please consider citing our work:
81
+
82
+ ```
83
+ @misc{gu2025rectifiednoisegenerativemodel,
84
+ title={Rectified Noise: A Generative Model Using Positive-incentive Noise},
85
+ author={Zhenyu Gu and Yanchen Xu and Sida Huang and Yubin Guo and Hongyuan Zhang},
86
+ year={2025},
87
+ eprint={2511.07911},
88
+ archivePrefix={arXiv},
89
+ primaryClass={cs.LG},
90
+ url={https://arxiv.org/abs/2511.07911},
91
+ }
92
+ ```
Rectified_Noise/GVP-Disp/W_False.log ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
0
  0%| | 0/47 [00:00<?, ?it/s]
1
  2%|▏ | 1/47 [01:34<1:12:31, 94.60s/it]
2
  4%|▍ | 2/47 [03:08<1:10:33, 94.07s/it]
3
  6%|▋ | 3/47 [04:42<1:08:52, 93.93s/it]
4
  9%|▊ | 4/47 [06:15<1:07:13, 93.79s/it]
5
  11%|█ | 5/47 [07:49<1:05:36, 93.72s/it]
6
  13%|█▎ | 6/47 [09:22<1:04:00, 93.67s/it]
7
  15%|█▍ | 7/47 [10:56<1:02:26, 93.67s/it]
8
  17%|█▋ | 8/47 [12:30<1:00:51, 93.62s/it]
9
  19%|█▉ | 9/47 [14:03<59:16, 93.60s/it]
10
  21%|██▏ | 10/47 [15:37<57:43, 93.62s/it]
11
  23%|██▎ | 11/47 [17:10<56:10, 93.62s/it]
12
  26%|██▌ | 12/47 [18:44<54:36, 93.63s/it]
13
  28%|██▊ | 13/47 [20:18<53:02, 93.61s/it]
14
  30%|██▉ | 14/47 [21:51<51:30, 93.64s/it]
15
  32%|███▏ | 15/47 [23:25<49:57, 93.66s/it]
16
  34%|███▍ | 16/47 [24:58<48:21, 93.61s/it]
17
  36%|███▌ | 17/47 [26:32<46:50, 93.68s/it]
18
  38%|███▊ | 18/47 [28:06<45:17, 93.71s/it]
19
  40%|████ | 19/47 [29:40<43:43, 93.70s/it]
 
1
+ [NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
2
+ Starting rank=0, seed=0, world_size=1.
3
+ Saving .png samples at GVP_samples/depth-mu-2-threshold-1.0-0025000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04
4
+ Total number of images that will be sampled: 3008
5
+
6
  0%| | 0/47 [00:00<?, ?it/s]
7
  2%|▏ | 1/47 [01:34<1:12:31, 94.60s/it]
8
  4%|▍ | 2/47 [03:08<1:10:33, 94.07s/it]
9
  6%|▋ | 3/47 [04:42<1:08:52, 93.93s/it]
10
  9%|▊ | 4/47 [06:15<1:07:13, 93.79s/it]
11
  11%|█ | 5/47 [07:49<1:05:36, 93.72s/it]
12
  13%|█▎ | 6/47 [09:22<1:04:00, 93.67s/it]
13
  15%|█▍ | 7/47 [10:56<1:02:26, 93.67s/it]
14
  17%|█▋ | 8/47 [12:30<1:00:51, 93.62s/it]
15
  19%|█▉ | 9/47 [14:03<59:16, 93.60s/it]
16
  21%|██▏ | 10/47 [15:37<57:43, 93.62s/it]
17
  23%|██▎ | 11/47 [17:10<56:10, 93.62s/it]
18
  26%|██▌ | 12/47 [18:44<54:36, 93.63s/it]
19
  28%|██▊ | 13/47 [20:18<53:02, 93.61s/it]
20
  30%|██▉ | 14/47 [21:51<51:30, 93.64s/it]
21
  32%|███▏ | 15/47 [23:25<49:57, 93.66s/it]
22
  34%|███▍ | 16/47 [24:58<48:21, 93.61s/it]
23
  36%|███▌ | 17/47 [26:32<46:50, 93.68s/it]
24
  38%|███▊ | 18/47 [28:06<45:17, 93.71s/it]
25
  40%|████ | 19/47 [29:40<43:43, 93.70s/it]
Rectified_Noise/GVP-Disp/evaluate_samples.sh ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Execute all evaluation tasks in parallel
4
+ # Each command runs in the background using &
5
+
6
+ echo "Starting all evaluation tasks in parallel..."
7
+
8
+ # Reference batch path
9
+ REF_BATCH="/gemini/space/zhaozy/zhy/dataset/VIRTUAL_imagenet256_labeled.npz"
10
+
11
+ # Base directory for sample files
12
+ SAMPLE_DIR="/gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise/last_samples_depth_2_gvp_0.5"
13
+
14
+ # Change to the project root directory
15
+ cd /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching
16
+
17
+ # Evaluate threshold 0.0 on GPU 0
18
+ CUDA_VISIBLE_DEVICES=0 nohup python evaluator.py \
19
+ --ref_batch ${REF_BATCH} \
20
+ --sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.0-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
21
+ > eval_threshold_0.0.log 2>&1 &
22
+
23
+ # Evaluate threshold 0.15 on GPU 1
24
+ CUDA_VISIBLE_DEVICES=1 nohup python evaluator.py \
25
+ --ref_batch ${REF_BATCH} \
26
+ --sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.15-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
27
+ > eval_threshold_0.15.log 2>&1 &
28
+
29
+ # Evaluate threshold 0.25 on GPU 2
30
+ CUDA_VISIBLE_DEVICES=2 nohup python evaluator.py \
31
+ --ref_batch ${REF_BATCH} \
32
+ --sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.25-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
33
+ > eval_threshold_0.25.log 2>&1 &
34
+
35
+ # Evaluate threshold 0.5 on GPU 3
36
+ CUDA_VISIBLE_DEVICES=3 nohup python evaluator.py \
37
+ --ref_batch ${REF_BATCH} \
38
+ --sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.5-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
39
+ > eval_threshold_0.5.log 2>&1 &
40
+
41
+ # Evaluate threshold 0.75 on GPU 4
42
+ CUDA_VISIBLE_DEVICES=0 nohup python evaluator.py \
43
+ --ref_batch ${REF_BATCH} \
44
+ --sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.75-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
45
+ > eval_threshold_0.75.log 2>&1 &
46
+
47
+ # Evaluate threshold 1.0 on GPU 5
48
+ CUDA_VISIBLE_DEVICES=1 nohup python evaluator.py \
49
+ --ref_batch ${REF_BATCH} \
50
+ --sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-1.0-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
51
+ > eval_threshold_1.0.log 2>&1 &
52
+
53
+ # Wait for all background jobs to complete
54
+ echo "All evaluation tasks started. Waiting for completion..."
55
+ wait
56
+
57
+ echo "All evaluation tasks completed!"
58
+ echo ""
59
+ echo "Results saved in:"
60
+ echo " - eval_threshold_0.0.log"
61
+ echo " - eval_threshold_0.15.log"
62
+ echo " - eval_threshold_0.25.log"
63
+ echo " - eval_threshold_0.5.log"
64
+ echo " - eval_threshold_0.75.log"
65
+ echo " - eval_threshold_1.0.log"
Rectified_Noise/GVP-Disp/evaluator.py ADDED
@@ -0,0 +1,689 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import io
3
+ import os
4
+ import random
5
+ import warnings
6
+ import zipfile
7
+ from abc import ABC, abstractmethod
8
+ from contextlib import contextmanager
9
+ from functools import partial
10
+ from multiprocessing import cpu_count
11
+ from multiprocessing.pool import ThreadPool
12
+ from typing import Iterable, Optional, Tuple, Union
13
+
14
+ import numpy as np
15
+ import requests
16
+ import tensorflow.compat.v1 as tf
17
+ from scipy import linalg
18
+ from tqdm.auto import tqdm
19
+ from datetime import timedelta
20
+ import torch
21
+
22
+
23
+
24
+ INCEPTION_V3_URL = "https://openaipublic.blob.core.windows.net/diffusion/jul-2021/ref_batches/classify_image_graph_def.pb"
25
+ INCEPTION_V3_PATH = "classify_image_graph_def.pb"
26
+
27
+ FID_POOL_NAME = "pool_3:0"
28
+ FID_SPATIAL_NAME = "mixed_6/conv:0"
29
+
30
+
31
+ def main():
32
+ parser = argparse.ArgumentParser()
33
+ parser.add_argument("--ref_batch", default='/gemini/space/zhaozy/zhy/dataset/VIRTUAL_imagenet256_labeled.npz',help="path to reference batch npz file")
34
+ parser.add_argument("--sample_batch", default='/gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise/last_samples_depth_2/depth-mu-28-0050000-2000000-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz', help="path to sample batch npz file")
35
+ args = parser.parse_args()
36
+
37
+ config = tf.ConfigProto(
38
+ allow_soft_placement=True # allows DecodeJpeg to run on CPU in Inception graph
39
+ )
40
+ config.gpu_options.allow_growth = True
41
+ evaluator = Evaluator(tf.Session(config=config))
42
+
43
+ print("warming up TensorFlow...")
44
+ # This will cause TF to print a bunch of verbose stuff now rather
45
+ # than after the next print(), to help prevent confusion.
46
+ evaluator.warmup()
47
+
48
+ print("computing reference batch activations...")
49
+ ref_acts = evaluator.read_activations(args.ref_batch)
50
+ print("computing/reading reference batch statistics...")
51
+ ref_stats, ref_stats_spatial = evaluator.read_statistics(args.ref_batch, ref_acts)
52
+
53
+ print("computing sample batch activations...")
54
+ sample_acts = evaluator.read_activations(args.sample_batch)
55
+ print("computing/reading sample batch statistics...")
56
+ sample_stats, sample_stats_spatial = evaluator.read_statistics(args.sample_batch, sample_acts)
57
+
58
+ print("Computing evaluations...")
59
+ #print("Inception Score:", evaluator.compute_inception_score(sample_acts[0]))
60
+ print("FID:", sample_stats.frechet_distance(ref_stats))
61
+ #print("sFID:", sample_stats_spatial.frechet_distance(ref_stats_spatial))
62
+ #prec, recall = evaluator.compute_prec_recall(ref_acts[0], sample_acts[0])
63
+ #print("Precision:", prec)
64
+ #print("Recall:", recall)
65
+
66
+
67
+ class InvalidFIDException(Exception):
68
+ pass
69
+
70
+
71
+ class FIDStatistics:
72
+ def __init__(self, mu: np.ndarray, sigma: np.ndarray):
73
+ self.mu = mu
74
+ self.sigma = sigma
75
+
76
+ def frechet_distance(self, other, eps=1e-6):
77
+ """
78
+ Compute the Frechet distance between two sets of statistics.
79
+ """
80
+ # https://github.com/bioinf-jku/TTUR/blob/73ab375cdf952a12686d9aa7978567771084da42/fid.py#L132
81
+ mu1, sigma1 = self.mu, self.sigma
82
+ mu2, sigma2 = other.mu, other.sigma
83
+
84
+ mu1 = np.atleast_1d(mu1)
85
+ mu2 = np.atleast_1d(mu2)
86
+
87
+ sigma1 = np.atleast_2d(sigma1)
88
+ sigma2 = np.atleast_2d(sigma2)
89
+
90
+ assert (
91
+ mu1.shape == mu2.shape
92
+ ), f"Training and test mean vectors have different lengths: {mu1.shape}, {mu2.shape}"
93
+ assert (
94
+ sigma1.shape == sigma2.shape
95
+ ), f"Training and test covariances have different dimensions: {sigma1.shape}, {sigma2.shape}"
96
+
97
+ diff = mu1 - mu2
98
+
99
+ # product might be almost singular
100
+ covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
101
+ if not np.isfinite(covmean).all():
102
+ msg = (
103
+ "fid calculation produces singular product; adding %s to diagonal of cov estimates"
104
+ % eps
105
+ )
106
+ warnings.warn(msg)
107
+ offset = np.eye(sigma1.shape[0]) * eps
108
+ covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
109
+
110
+ # numerical error might give slight imaginary component
111
+ #虚部报错部分
112
+ if np.iscomplexobj(covmean):
113
+ if not np.allclose(np.diagonal(covmean).imag, 0, atol=1):
114
+ m = np.max(np.abs(covmean.imag))
115
+ print(f"Real component: {covmean.real}")
116
+ raise ValueError("Imaginary component {}".format(m))
117
+ covmean = covmean.real
118
+
119
+ tr_covmean = np.trace(covmean)
120
+
121
+ return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
122
+
123
+
124
+ class Evaluator:
125
+ def __init__(
126
+ self,
127
+ session,
128
+ batch_size=64,
129
+ softmax_batch_size=512,
130
+ ):
131
+ self.sess = session
132
+ self.batch_size = batch_size
133
+ self.softmax_batch_size = softmax_batch_size
134
+ self.manifold_estimator = ManifoldEstimator(session)
135
+ with self.sess.graph.as_default():
136
+ self.image_input = tf.placeholder(tf.float32, shape=[None, None, None, 3])
137
+ self.softmax_input = tf.placeholder(tf.float32, shape=[None, 2048])
138
+ self.pool_features, self.spatial_features = _create_feature_graph(self.image_input)
139
+ self.softmax = _create_softmax_graph(self.softmax_input)
140
+
141
+ def warmup(self):
142
+ self.compute_activations(np.zeros([1, 8, 64, 64, 3]))
143
+
144
+ def read_activations(self, npz_path: Union[str, np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
145
+ if isinstance(npz_path, str):
146
+ # If npz_path is a string, treat it as a file path and read the .npz file
147
+ with open_npz_array(npz_path, "arr_0") as reader:
148
+ return self.compute_activations(reader.read_batches(self.batch_size))
149
+ elif isinstance(npz_path, np.ndarray):
150
+ # If npz_path is a numpy array, split it into batches manually
151
+ print("--------line 140-----------")
152
+ batches = np.array_split(npz_path, range(self.batch_size, npz_path.shape[0], self.batch_size))
153
+ print("--------line 143-----------")
154
+ return self.compute_activations(batches)
155
+ else:
156
+ raise ValueError("npz_path must be either a file path (str) or a numpy array (np.ndarray)")
157
+
158
+
159
+ def compute_activations(self, batches: Iterable[np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
160
+ """
161
+ Compute image features for downstream evals.
162
+
163
+ :param batches: a iterator over NHWC numpy arrays in [0, 255].
164
+ :return: a tuple of numpy arrays of shape [N x X], where X is a feature
165
+ dimension. The tuple is (pool_3, spatial).
166
+ """
167
+ preds = []
168
+ spatial_preds = []
169
+ for batch in tqdm(batches):
170
+ # print("--------line 164-----------")
171
+
172
+ # # 识别当前进程信息
173
+ # if 'RANK' in os.environ:
174
+ # rank = int(os.environ['RANK'])
175
+ # local_rank = int(os.environ.get('LOCAL_RANK', rank % torch.cuda.device_count()))
176
+ # print(f"Distributed training - Global Rank: {rank}, Local Rank: {local_rank}")
177
+ # print(f"Current GPU device: {torch.cuda.current_device()}" if torch.cuda.is_available() else "No CUDA")
178
+ # else:
179
+ # print("Single process mode")
180
+
181
+ # print(f"Process PID: {os.getpid()}")
182
+
183
+ batch = batch.astype(np.float32)
184
+ pred, spatial_pred = self.sess.run(
185
+ [self.pool_features, self.spatial_features], {self.image_input: batch}
186
+ )
187
+ # print("--------line 169-----------")
188
+ preds.append(pred.reshape([pred.shape[0], -1]))
189
+ spatial_preds.append(spatial_pred.reshape([spatial_pred.shape[0], -1]))
190
+ return (
191
+ np.concatenate(preds, axis=0),
192
+ np.concatenate(spatial_preds, axis=0),
193
+ )
194
+
195
+ def read_statistics(
196
+ self, npz_path: Union[str, np.ndarray], activations: Tuple[np.ndarray, np.ndarray]
197
+ ) -> Tuple[FIDStatistics, FIDStatistics]:
198
+ if isinstance(npz_path, str):
199
+ obj = np.load(npz_path)
200
+ if "mu" in list(obj.keys()):
201
+ return FIDStatistics(obj["mu"], obj["sigma"]), FIDStatistics(
202
+ obj["mu_s"], obj["sigma_s"]
203
+ )
204
+ elif isinstance(npz_path, np.ndarray):
205
+ obj = npz_path
206
+ else:
207
+ raise ValueError("npz_path must be either a file path (str) or a numpy array (np.ndarray)")
208
+ return tuple(self.compute_statistics(x) for x in activations)
209
+
210
+ def compute_statistics(self, activations: np.ndarray) -> FIDStatistics:
211
+ mu = np.mean(activations, axis=0)
212
+ sigma = np.cov(activations, rowvar=False)
213
+ return FIDStatistics(mu, sigma)
214
+
215
+ def compute_inception_score(self, activations: np.ndarray, split_size: int = 5000) -> float:
216
+ softmax_out = []
217
+ for i in range(0, len(activations), self.softmax_batch_size):
218
+ acts = activations[i : i + self.softmax_batch_size]
219
+ softmax_out.append(self.sess.run(self.softmax, feed_dict={self.softmax_input: acts}))
220
+ preds = np.concatenate(softmax_out, axis=0)
221
+ # https://github.com/openai/improved-gan/blob/4f5d1ec5c16a7eceb206f42bfc652693601e1d5c/inception_score/model.py#L46
222
+ scores = []
223
+ for i in range(0, len(preds), split_size):
224
+ part = preds[i : i + split_size]
225
+ kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
226
+ kl = np.mean(np.sum(kl, 1))
227
+ scores.append(np.exp(kl))
228
+ return float(np.mean(scores))
229
+
230
+ def compute_prec_recall(
231
+ self, activations_ref: np.ndarray, activations_sample: np.ndarray
232
+ ) -> Tuple[float, float]:
233
+ radii_1 = self.manifold_estimator.manifold_radii(activations_ref)
234
+ radii_2 = self.manifold_estimator.manifold_radii(activations_sample)
235
+ pr = self.manifold_estimator.evaluate_pr(
236
+ activations_ref, radii_1, activations_sample, radii_2
237
+ )
238
+ return (float(pr[0][0]), float(pr[1][0]))
239
+
240
+
241
+ class ManifoldEstimator:
242
+ """
243
+ A helper for comparing manifolds of feature vectors.
244
+
245
+ Adapted from https://github.com/kynkaat/improved-precision-and-recall-metric/blob/f60f25e5ad933a79135c783fcda53de30f42c9b9/precision_recall.py#L57
246
+ """
247
+
248
+ def __init__(
249
+ self,
250
+ session,
251
+ row_batch_size=10000,
252
+ col_batch_size=10000,
253
+ nhood_sizes=(3,),
254
+ clamp_to_percentile=None,
255
+ eps=1e-5,
256
+ ):
257
+ """
258
+ Estimate the manifold of given feature vectors.
259
+
260
+ :param session: the TensorFlow session.
261
+ :param row_batch_size: row batch size to compute pairwise distances
262
+ (parameter to trade-off between memory usage and performance).
263
+ :param col_batch_size: column batch size to compute pairwise distances.
264
+ :param nhood_sizes: number of neighbors used to estimate the manifold.
265
+ :param clamp_to_percentile: prune hyperspheres that have radius larger than
266
+ the given percentile.
267
+ :param eps: small number for numerical stability.
268
+ """
269
+ self.distance_block = DistanceBlock(session)
270
+ self.row_batch_size = row_batch_size
271
+ self.col_batch_size = col_batch_size
272
+ self.nhood_sizes = nhood_sizes
273
+ self.num_nhoods = len(nhood_sizes)
274
+ self.clamp_to_percentile = clamp_to_percentile
275
+ self.eps = eps
276
+
277
+ def warmup(self):
278
+ feats, radii = (
279
+ np.zeros([1, 2048], dtype=np.float32),
280
+ np.zeros([1, 1], dtype=np.float32),
281
+ )
282
+ self.evaluate_pr(feats, radii, feats, radii)
283
+
284
+ def manifold_radii(self, features: np.ndarray) -> np.ndarray:
285
+ num_images = len(features)
286
+
287
+ # Estimate manifold of features by calculating distances to k-NN of each sample.
288
+ radii = np.zeros([num_images, self.num_nhoods], dtype=np.float32)
289
+ distance_batch = np.zeros([self.row_batch_size, num_images], dtype=np.float32)
290
+ seq = np.arange(max(self.nhood_sizes) + 1, dtype=np.int32)
291
+
292
+ for begin1 in range(0, num_images, self.row_batch_size):
293
+ end1 = min(begin1 + self.row_batch_size, num_images)
294
+ row_batch = features[begin1:end1]
295
+
296
+ for begin2 in range(0, num_images, self.col_batch_size):
297
+ end2 = min(begin2 + self.col_batch_size, num_images)
298
+ col_batch = features[begin2:end2]
299
+
300
+ # Compute distances between batches.
301
+ distance_batch[
302
+ 0 : end1 - begin1, begin2:end2
303
+ ] = self.distance_block.pairwise_distances(row_batch, col_batch)
304
+
305
+ # Find the k-nearest neighbor from the current batch.
306
+ radii[begin1:end1, :] = np.concatenate(
307
+ [
308
+ x[:, self.nhood_sizes]
309
+ for x in _numpy_partition(distance_batch[0 : end1 - begin1, :], seq, axis=1)
310
+ ],
311
+ axis=0,
312
+ )
313
+
314
+ if self.clamp_to_percentile is not None:
315
+ max_distances = np.percentile(radii, self.clamp_to_percentile, axis=0)
316
+ radii[radii > max_distances] = 0
317
+ return radii
318
+
319
+ def evaluate(self, features: np.ndarray, radii: np.ndarray, eval_features: np.ndarray):
320
+ """
321
+ Evaluate if new feature vectors are at the manifold.
322
+ """
323
+ num_eval_images = eval_features.shape[0]
324
+ num_ref_images = radii.shape[0]
325
+ distance_batch = np.zeros([self.row_batch_size, num_ref_images], dtype=np.float32)
326
+ batch_predictions = np.zeros([num_eval_images, self.num_nhoods], dtype=np.int32)
327
+ max_realism_score = np.zeros([num_eval_images], dtype=np.float32)
328
+ nearest_indices = np.zeros([num_eval_images], dtype=np.int32)
329
+
330
+ for begin1 in range(0, num_eval_images, self.row_batch_size):
331
+ end1 = min(begin1 + self.row_batch_size, num_eval_images)
332
+ feature_batch = eval_features[begin1:end1]
333
+
334
+ for begin2 in range(0, num_ref_images, self.col_batch_size):
335
+ end2 = min(begin2 + self.col_batch_size, num_ref_images)
336
+ ref_batch = features[begin2:end2]
337
+
338
+ distance_batch[
339
+ 0 : end1 - begin1, begin2:end2
340
+ ] = self.distance_block.pairwise_distances(feature_batch, ref_batch)
341
+
342
+ # From the minibatch of new feature vectors, determine if they are in the estimated manifold.
343
+ # If a feature vector is inside a hypersphere of some reference sample, then
344
+ # the new sample lies at the estimated manifold.
345
+ # The radii of the hyperspheres are determined from distances of neighborhood size k.
346
+ samples_in_manifold = distance_batch[0 : end1 - begin1, :, None] <= radii
347
+ batch_predictions[begin1:end1] = np.any(samples_in_manifold, axis=1).astype(np.int32)
348
+
349
+ max_realism_score[begin1:end1] = np.max(
350
+ radii[:, 0] / (distance_batch[0 : end1 - begin1, :] + self.eps), axis=1
351
+ )
352
+ nearest_indices[begin1:end1] = np.argmin(distance_batch[0 : end1 - begin1, :], axis=1)
353
+
354
+ return {
355
+ "fraction": float(np.mean(batch_predictions)),
356
+ "batch_predictions": batch_predictions,
357
+ "max_realisim_score": max_realism_score,
358
+ "nearest_indices": nearest_indices,
359
+ }
360
+
361
+ def evaluate_pr(
362
+ self,
363
+ features_1: np.ndarray,
364
+ radii_1: np.ndarray,
365
+ features_2: np.ndarray,
366
+ radii_2: np.ndarray,
367
+ ) -> Tuple[np.ndarray, np.ndarray]:
368
+ """
369
+ Evaluate precision and recall efficiently.
370
+
371
+ :param features_1: [N1 x D] feature vectors for reference batch.
372
+ :param radii_1: [N1 x K1] radii for reference vectors.
373
+ :param features_2: [N2 x D] feature vectors for the other batch.
374
+ :param radii_2: [N x K2] radii for other vectors.
375
+ :return: a tuple of arrays for (precision, recall):
376
+ - precision: an np.ndarray of length K1
377
+ - recall: an np.ndarray of length K2
378
+ """
379
+ features_1_status = np.zeros([len(features_1), radii_2.shape[1]], dtype=np.bool)
380
+ features_2_status = np.zeros([len(features_2), radii_1.shape[1]], dtype=np.bool)
381
+ for begin_1 in range(0, len(features_1), self.row_batch_size):
382
+ end_1 = begin_1 + self.row_batch_size
383
+ batch_1 = features_1[begin_1:end_1]
384
+ for begin_2 in range(0, len(features_2), self.col_batch_size):
385
+ end_2 = begin_2 + self.col_batch_size
386
+ batch_2 = features_2[begin_2:end_2]
387
+ batch_1_in, batch_2_in = self.distance_block.less_thans(
388
+ batch_1, radii_1[begin_1:end_1], batch_2, radii_2[begin_2:end_2]
389
+ )
390
+ features_1_status[begin_1:end_1] |= batch_1_in
391
+ features_2_status[begin_2:end_2] |= batch_2_in
392
+ return (
393
+ np.mean(features_2_status.astype(np.float64), axis=0),
394
+ np.mean(features_1_status.astype(np.float64), axis=0),
395
+ )
396
+
397
+
398
+ class DistanceBlock:
399
+ """
400
+ Calculate pairwise distances between vectors.
401
+
402
+ Adapted from https://github.com/kynkaat/improved-precision-and-recall-metric/blob/f60f25e5ad933a79135c783fcda53de30f42c9b9/precision_recall.py#L34
403
+ """
404
+
405
+ def __init__(self, session):
406
+ self.session = session
407
+
408
+ # Initialize TF graph to calculate pairwise distances.
409
+ with session.graph.as_default():
410
+ self._features_batch1 = tf.placeholder(tf.float32, shape=[None, None])
411
+ self._features_batch2 = tf.placeholder(tf.float32, shape=[None, None])
412
+ distance_block_16 = _batch_pairwise_distances(
413
+ tf.cast(self._features_batch1, tf.float16),
414
+ tf.cast(self._features_batch2, tf.float16),
415
+ )
416
+ self.distance_block = tf.cond(
417
+ tf.reduce_all(tf.math.is_finite(distance_block_16)),
418
+ lambda: tf.cast(distance_block_16, tf.float32),
419
+ lambda: _batch_pairwise_distances(self._features_batch1, self._features_batch2),
420
+ )
421
+
422
+ # Extra logic for less thans.
423
+ self._radii1 = tf.placeholder(tf.float32, shape=[None, None])
424
+ self._radii2 = tf.placeholder(tf.float32, shape=[None, None])
425
+ dist32 = tf.cast(self.distance_block, tf.float32)[..., None]
426
+ self._batch_1_in = tf.math.reduce_any(dist32 <= self._radii2, axis=1)
427
+ self._batch_2_in = tf.math.reduce_any(dist32 <= self._radii1[:, None], axis=0)
428
+
429
+ def pairwise_distances(self, U, V):
430
+ """
431
+ Evaluate pairwise distances between two batches of feature vectors.
432
+ """
433
+ return self.session.run(
434
+ self.distance_block,
435
+ feed_dict={self._features_batch1: U, self._features_batch2: V},
436
+ )
437
+
438
+ def less_thans(self, batch_1, radii_1, batch_2, radii_2):
439
+ return self.session.run(
440
+ [self._batch_1_in, self._batch_2_in],
441
+ feed_dict={
442
+ self._features_batch1: batch_1,
443
+ self._features_batch2: batch_2,
444
+ self._radii1: radii_1,
445
+ self._radii2: radii_2,
446
+ },
447
+ )
448
+
449
+
450
+ def _batch_pairwise_distances(U, V):
451
+ """
452
+ Compute pairwise distances between two batches of feature vectors.
453
+ """
454
+ with tf.variable_scope("pairwise_dist_block"):
455
+ # Squared norms of each row in U and V.
456
+ norm_u = tf.reduce_sum(tf.square(U), 1)
457
+ norm_v = tf.reduce_sum(tf.square(V), 1)
458
+
459
+ # norm_u as a column and norm_v as a row vectors.
460
+ norm_u = tf.reshape(norm_u, [-1, 1])
461
+ norm_v = tf.reshape(norm_v, [1, -1])
462
+
463
+ # Pairwise squared Euclidean distances.
464
+ D = tf.maximum(norm_u - 2 * tf.matmul(U, V, False, True) + norm_v, 0.0)
465
+
466
+ return D
467
+
468
+
469
+ class NpzArrayReader(ABC):
470
+ @abstractmethod
471
+ def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
472
+ pass
473
+
474
+ @abstractmethod
475
+ def remaining(self) -> int:
476
+ pass
477
+
478
+ def read_batches(self, batch_size: int) -> Iterable[np.ndarray]:
479
+ def gen_fn():
480
+ while True:
481
+ batch = self.read_batch(batch_size)
482
+ if batch is None:
483
+ break
484
+ yield batch
485
+
486
+ rem = self.remaining()
487
+ num_batches = rem // batch_size + int(rem % batch_size != 0)
488
+ return BatchIterator(gen_fn, num_batches)
489
+
490
+
491
+ class BatchIterator:
492
+ def __init__(self, gen_fn, length):
493
+ self.gen_fn = gen_fn
494
+ self.length = length
495
+
496
+ def __len__(self):
497
+ return self.length
498
+
499
+ def __iter__(self):
500
+ return self.gen_fn()
501
+
502
+
503
+ class StreamingNpzArrayReader(NpzArrayReader):
504
+ def __init__(self, arr_f, shape, dtype):
505
+ self.arr_f = arr_f
506
+ self.shape = shape
507
+ self.dtype = dtype
508
+ self.idx = 0
509
+
510
+ def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
511
+ if self.idx >= self.shape[0]:
512
+ return None
513
+
514
+ bs = min(batch_size, self.shape[0] - self.idx)
515
+ self.idx += bs
516
+
517
+ if self.dtype.itemsize == 0:
518
+ return np.ndarray([bs, *self.shape[1:]], dtype=self.dtype)
519
+
520
+ read_count = bs * np.prod(self.shape[1:])
521
+ read_size = int(read_count * self.dtype.itemsize)
522
+ data = _read_bytes(self.arr_f, read_size, "array data")
523
+ return np.frombuffer(data, dtype=self.dtype).reshape([bs, *self.shape[1:]])
524
+
525
+ def remaining(self) -> int:
526
+ return max(0, self.shape[0] - self.idx)
527
+
528
+
529
+ class MemoryNpzArrayReader(NpzArrayReader):
530
+ def __init__(self, arr):
531
+ self.arr = arr
532
+ self.idx = 0
533
+
534
+ @classmethod
535
+ def load(cls, path: str, arr_name: str):
536
+ with open(path, "rb") as f:
537
+ arr = np.load(f)[arr_name]
538
+ return cls(arr)
539
+
540
+ def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
541
+ if self.idx >= self.arr.shape[0]:
542
+ return None
543
+
544
+ res = self.arr[self.idx : self.idx + batch_size]
545
+ self.idx += batch_size
546
+ return res
547
+
548
+ def remaining(self) -> int:
549
+ return max(0, self.arr.shape[0] - self.idx)
550
+
551
+
552
+ @contextmanager
553
+ def open_npz_array(path: str, arr_name: str) -> NpzArrayReader:
554
+ with _open_npy_file(path, arr_name) as arr_f:
555
+ version = np.lib.format.read_magic(arr_f)
556
+ if version == (1, 0):
557
+ header = np.lib.format.read_array_header_1_0(arr_f)
558
+ elif version == (2, 0):
559
+ header = np.lib.format.read_array_header_2_0(arr_f)
560
+ else:
561
+ yield MemoryNpzArrayReader.load(path, arr_name)
562
+ return
563
+ shape, fortran, dtype = header
564
+ if fortran or dtype.hasobject:
565
+ yield MemoryNpzArrayReader.load(path, arr_name)
566
+ else:
567
+ yield StreamingNpzArrayReader(arr_f, shape, dtype)
568
+
569
+
570
+ def _read_bytes(fp, size, error_template="ran out of data"):
571
+ """
572
+ Copied from: https://github.com/numpy/numpy/blob/fb215c76967739268de71aa4bda55dd1b062bc2e/numpy/lib/format.py#L788-L886
573
+
574
+ Read from file-like object until size bytes are read.
575
+ Raises ValueError if not EOF is encountered before size bytes are read.
576
+ Non-blocking objects only supported if they derive from io objects.
577
+ Required as e.g. ZipExtFile in python 2.6 can return less data than
578
+ requested.
579
+ """
580
+ data = bytes()
581
+ while True:
582
+ # io files (default in python3) return None or raise on
583
+ # would-block, python2 file will truncate, probably nothing can be
584
+ # done about that. note that regular files can't be non-blocking
585
+ try:
586
+ r = fp.read(size - len(data))
587
+ data += r
588
+ if len(r) == 0 or len(data) == size:
589
+ break
590
+ except io.BlockingIOError:
591
+ pass
592
+ if len(data) != size:
593
+ msg = "EOF: reading %s, expected %d bytes got %d"
594
+ raise ValueError(msg % (error_template, size, len(data)))
595
+ else:
596
+ return data
597
+
598
+
599
+ @contextmanager
600
+ def _open_npy_file(path: str, arr_name: str):
601
+ with open(path, "rb") as f:
602
+ with zipfile.ZipFile(f, "r") as zip_f:
603
+ if f"{arr_name}.npy" not in zip_f.namelist():
604
+ raise ValueError(f"missing {arr_name} in npz file")
605
+ with zip_f.open(f"{arr_name}.npy", "r") as arr_f:
606
+ yield arr_f
607
+
608
+
609
+ def _download_inception_model():
610
+ if os.path.exists(INCEPTION_V3_PATH):
611
+ return
612
+ print("downloading InceptionV3 model...")
613
+ with requests.get(INCEPTION_V3_URL, stream=True) as r:
614
+ r.raise_for_status()
615
+ tmp_path = INCEPTION_V3_PATH + ".tmp"
616
+ with open(tmp_path, "wb") as f:
617
+ for chunk in tqdm(r.iter_content(chunk_size=8192)):
618
+ f.write(chunk)
619
+ os.rename(tmp_path, INCEPTION_V3_PATH)
620
+
621
+
622
+ def _create_feature_graph(input_batch):
623
+ _download_inception_model()
624
+ prefix = f"{random.randrange(2**32)}_{random.randrange(2**32)}"
625
+ with open(INCEPTION_V3_PATH, "rb") as f:
626
+ graph_def = tf.GraphDef()
627
+ graph_def.ParseFromString(f.read())
628
+ pool3, spatial = tf.import_graph_def(
629
+ graph_def,
630
+ input_map={f"ExpandDims:0": input_batch},
631
+ return_elements=[FID_POOL_NAME, FID_SPATIAL_NAME],
632
+ name=prefix,
633
+ )
634
+ _update_shapes(pool3)
635
+ spatial = spatial[..., :7]
636
+ return pool3, spatial
637
+
638
+
639
+ def _create_softmax_graph(input_batch):
640
+ _download_inception_model()
641
+ prefix = f"{random.randrange(2**32)}_{random.randrange(2**32)}"
642
+ with open(INCEPTION_V3_PATH, "rb") as f:
643
+ graph_def = tf.GraphDef()
644
+ graph_def.ParseFromString(f.read())
645
+ (matmul,) = tf.import_graph_def(
646
+ graph_def, return_elements=[f"softmax/logits/MatMul"], name=prefix
647
+ )
648
+ w = matmul.inputs[1]
649
+ logits = tf.matmul(input_batch, w)
650
+ return tf.nn.softmax(logits)
651
+
652
+
653
+ def _update_shapes(pool3):
654
+ # https://github.com/bioinf-jku/TTUR/blob/73ab375cdf952a12686d9aa7978567771084da42/fid.py#L50-L63
655
+ ops = pool3.graph.get_operations()
656
+ for op in ops:
657
+ for o in op.outputs:
658
+ shape = o.get_shape()
659
+ if shape._dims is not None: # pylint: disable=protected-access
660
+ # shape = [s.value for s in shape] TF 1.x
661
+ shape = [s for s in shape] # TF 2.x
662
+ new_shape = []
663
+ for j, s in enumerate(shape):
664
+ if s == 1 and j == 0:
665
+ new_shape.append(None)
666
+ else:
667
+ new_shape.append(s)
668
+ o.__dict__["_shape_val"] = tf.TensorShape(new_shape)
669
+ return pool3
670
+
671
+
672
+ def _numpy_partition(arr, kth, **kwargs):
673
+ num_workers = min(cpu_count(), len(arr))
674
+ chunk_size = len(arr) // num_workers
675
+ extra = len(arr) % num_workers
676
+
677
+ start_idx = 0
678
+ batches = []
679
+ for i in range(num_workers):
680
+ size = chunk_size + (1 if i < extra else 0)
681
+ batches.append(arr[start_idx : start_idx + size])
682
+ start_idx += size
683
+
684
+ with ThreadPool(num_workers) as pool:
685
+ return list(pool.map(partial(np.partition, kth=kth, **kwargs), batches))
686
+
687
+
688
+ if __name__ == "__main__":
689
+ main()
Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-000-SiT-XL-2-GVP-velocity-None/log.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2026-02-03 06:38:01] Experiment directory created at results_256_gvp_disp/depth-mu-2-000-SiT-XL-2-GVP-velocity-None
2
+ [2026-02-03 06:38:35] Combined_model Parameters: 729,629,632
3
+ [2026-02-03 06:38:35] Total trainable parameters: 53,910,176
4
+ [2026-02-03 06:38:38] Dataset contains 1,281,167 images (/gemini/platform/public/zhaozy/hzh/datasets/Imagenet/train/)
5
+ [2026-02-03 06:38:38] Training for 100000 epochs...
6
+ [2026-02-03 06:38:38] Beginning epoch 0...
7
+ [2026-02-03 06:39:30] (step=0000100) Train Loss: -1.8935, Train Steps/Sec: 1.91
8
+ [2026-02-03 06:40:20] (step=0000200) Train Loss: -2.2925, Train Steps/Sec: 2.04
9
+ [2026-02-03 06:41:10] (step=0000300) Train Loss: -2.2953, Train Steps/Sec: 1.99
10
+ [2026-02-03 06:42:00] (step=0000400) Train Loss: -2.2904, Train Steps/Sec: 1.99
11
+ [2026-02-03 06:42:50] (step=0000500) Train Loss: -2.2938, Train Steps/Sec: 2.00
Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-001-SiT-XL-2-GVP-velocity-None/log.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ [2026-02-03 06:44:16] Experiment directory created at results_256_gvp_disp/depth-mu-2-001-SiT-XL-2-GVP-velocity-None
Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/log.txt ADDED
@@ -0,0 +1,500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2026-02-03 06:45:00] Experiment directory created at results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None
2
+ [2026-02-03 06:45:32] Combined_model Parameters: 729,629,632
3
+ [2026-02-03 06:45:32] Total trainable parameters: 53,910,176
4
+ [2026-02-03 06:45:34] Dataset contains 1,281,167 images (/gemini/platform/public/zhaozy/hzh/datasets/Imagenet/train/)
5
+ [2026-02-03 06:45:34] Training for 100000 epochs...
6
+ [2026-02-03 06:45:34] Beginning epoch 0...
7
+ [2026-02-03 06:47:01] (step=0000100) Train Loss: -3.1750, Train Steps/Sec: 1.15
8
+ [2026-02-03 06:48:24] (step=0000200) Train Loss: -3.6610, Train Steps/Sec: 1.20
9
+ [2026-02-03 06:49:47] (step=0000300) Train Loss: -3.6752, Train Steps/Sec: 1.20
10
+ [2026-02-03 06:51:10] (step=0000400) Train Loss: -3.6767, Train Steps/Sec: 1.20
11
+ [2026-02-03 06:52:33] (step=0000500) Train Loss: -3.6782, Train Steps/Sec: 1.20
12
+ [2026-02-03 06:53:56] (step=0000600) Train Loss: -3.6781, Train Steps/Sec: 1.20
13
+ [2026-02-03 06:55:21] (step=0000700) Train Loss: -3.6788, Train Steps/Sec: 1.18
14
+ [2026-02-03 06:57:50] (step=0000800) Train Loss: -3.6797, Train Steps/Sec: 0.67
15
+ [2026-02-03 07:00:57] (step=0000900) Train Loss: -3.6833, Train Steps/Sec: 0.54
16
+ [2026-02-03 07:04:02] (step=0001000) Train Loss: -3.6793, Train Steps/Sec: 0.54
17
+ [2026-02-03 07:07:08] (step=0001100) Train Loss: -3.6790, Train Steps/Sec: 0.54
18
+ [2026-02-03 07:10:14] (step=0001200) Train Loss: -3.6799, Train Steps/Sec: 0.54
19
+ [2026-02-03 07:13:20] (step=0001300) Train Loss: -3.6818, Train Steps/Sec: 0.54
20
+ [2026-02-03 07:19:05] (step=0001400) Train Loss: -3.6833, Train Steps/Sec: 0.29
21
+ [2026-02-03 07:22:12] (step=0001500) Train Loss: -3.6796, Train Steps/Sec: 0.53
22
+ [2026-02-03 07:25:19] (step=0001600) Train Loss: -3.6813, Train Steps/Sec: 0.54
23
+ [2026-02-03 07:28:25] (step=0001700) Train Loss: -3.6843, Train Steps/Sec: 0.54
24
+ [2026-02-03 07:31:32] (step=0001800) Train Loss: -3.6813, Train Steps/Sec: 0.53
25
+ [2026-02-03 07:34:38] (step=0001900) Train Loss: -3.6828, Train Steps/Sec: 0.54
26
+ [2026-02-03 07:37:45] (step=0002000) Train Loss: -3.6826, Train Steps/Sec: 0.54
27
+ [2026-02-03 07:40:51] (step=0002100) Train Loss: -3.6799, Train Steps/Sec: 0.54
28
+ [2026-02-03 07:43:58] (step=0002200) Train Loss: -3.6784, Train Steps/Sec: 0.53
29
+ [2026-02-03 07:47:06] (step=0002300) Train Loss: -3.6824, Train Steps/Sec: 0.53
30
+ [2026-02-03 07:50:12] (step=0002400) Train Loss: -3.6787, Train Steps/Sec: 0.54
31
+ [2026-02-03 07:53:19] (step=0002500) Train Loss: -3.6771, Train Steps/Sec: 0.54
32
+ [2026-02-03 07:53:23] Beginning epoch 1...
33
+ [2026-02-03 07:56:29] (step=0002600) Train Loss: -3.6847, Train Steps/Sec: 0.53
34
+ [2026-02-03 07:59:35] (step=0002700) Train Loss: -3.6829, Train Steps/Sec: 0.54
35
+ [2026-02-03 08:02:42] (step=0002800) Train Loss: -3.6825, Train Steps/Sec: 0.54
36
+ [2026-02-03 08:05:49] (step=0002900) Train Loss: -3.6818, Train Steps/Sec: 0.54
37
+ [2026-02-03 08:08:55] (step=0003000) Train Loss: -3.6823, Train Steps/Sec: 0.54
38
+ [2026-02-03 08:12:01] (step=0003100) Train Loss: -3.6821, Train Steps/Sec: 0.54
39
+ [2026-02-03 08:15:09] (step=0003200) Train Loss: -3.6812, Train Steps/Sec: 0.53
40
+ [2026-02-03 08:18:16] (step=0003300) Train Loss: -3.6800, Train Steps/Sec: 0.53
41
+ [2026-02-03 08:21:20] (step=0003400) Train Loss: -3.6797, Train Steps/Sec: 0.54
42
+ [2026-02-03 08:24:27] (step=0003500) Train Loss: -3.6802, Train Steps/Sec: 0.54
43
+ [2026-02-03 08:27:34] (step=0003600) Train Loss: -3.6834, Train Steps/Sec: 0.53
44
+ [2026-02-03 08:30:40] (step=0003700) Train Loss: -3.6810, Train Steps/Sec: 0.54
45
+ [2026-02-03 08:33:48] (step=0003800) Train Loss: -3.6822, Train Steps/Sec: 0.53
46
+ [2026-02-03 08:36:55] (step=0003900) Train Loss: -3.6817, Train Steps/Sec: 0.53
47
+ [2026-02-03 08:40:01] (step=0004000) Train Loss: -3.6794, Train Steps/Sec: 0.54
48
+ [2026-02-03 08:43:08] (step=0004100) Train Loss: -3.6801, Train Steps/Sec: 0.54
49
+ [2026-02-03 08:46:15] (step=0004200) Train Loss: -3.6850, Train Steps/Sec: 0.54
50
+ [2026-02-03 08:49:21] (step=0004300) Train Loss: -3.6801, Train Steps/Sec: 0.54
51
+ [2026-02-03 08:52:28] (step=0004400) Train Loss: -3.6816, Train Steps/Sec: 0.54
52
+ [2026-02-03 08:55:35] (step=0004500) Train Loss: -3.6820, Train Steps/Sec: 0.53
53
+ [2026-02-03 08:58:42] (step=0004600) Train Loss: -3.6817, Train Steps/Sec: 0.54
54
+ [2026-02-03 09:01:49] (step=0004700) Train Loss: -3.6806, Train Steps/Sec: 0.54
55
+ [2026-02-03 09:04:56] (step=0004800) Train Loss: -3.6797, Train Steps/Sec: 0.53
56
+ [2026-02-03 09:08:03] (step=0004900) Train Loss: -3.6800, Train Steps/Sec: 0.53
57
+ [2026-02-03 09:11:10] (step=0005000) Train Loss: -3.6831, Train Steps/Sec: 0.54
58
+ [2026-02-03 09:11:18] Beginning epoch 2...
59
+ [2026-02-03 09:14:20] (step=0005100) Train Loss: -3.6803, Train Steps/Sec: 0.52
60
+ [2026-02-03 09:17:27] (step=0005200) Train Loss: -3.6804, Train Steps/Sec: 0.53
61
+ [2026-02-03 09:20:34] (step=0005300) Train Loss: -3.6804, Train Steps/Sec: 0.54
62
+ [2026-02-03 09:23:40] (step=0005400) Train Loss: -3.6823, Train Steps/Sec: 0.54
63
+ [2026-02-03 09:26:47] (step=0005500) Train Loss: -3.6819, Train Steps/Sec: 0.53
64
+ [2026-02-03 09:29:54] (step=0005600) Train Loss: -3.6834, Train Steps/Sec: 0.54
65
+ [2026-02-03 09:33:01] (step=0005700) Train Loss: -3.6805, Train Steps/Sec: 0.53
66
+ [2026-02-03 09:36:08] (step=0005800) Train Loss: -3.6827, Train Steps/Sec: 0.53
67
+ [2026-02-03 09:39:15] (step=0005900) Train Loss: -3.6821, Train Steps/Sec: 0.54
68
+ [2026-02-03 09:42:20] (step=0006000) Train Loss: -3.6807, Train Steps/Sec: 0.54
69
+ [2026-02-03 09:45:27] (step=0006100) Train Loss: -3.6814, Train Steps/Sec: 0.53
70
+ [2026-02-03 09:48:34] (step=0006200) Train Loss: -3.6825, Train Steps/Sec: 0.54
71
+ [2026-02-03 09:51:40] (step=0006300) Train Loss: -3.6799, Train Steps/Sec: 0.54
72
+ [2026-02-03 09:54:46] (step=0006400) Train Loss: -3.6797, Train Steps/Sec: 0.54
73
+ [2026-02-03 09:57:54] (step=0006500) Train Loss: -3.6820, Train Steps/Sec: 0.53
74
+ [2026-02-03 10:01:01] (step=0006600) Train Loss: -3.6789, Train Steps/Sec: 0.53
75
+ [2026-02-03 10:04:08] (step=0006700) Train Loss: -3.6804, Train Steps/Sec: 0.53
76
+ [2026-02-03 10:07:15] (step=0006800) Train Loss: -3.6803, Train Steps/Sec: 0.53
77
+ [2026-02-03 10:10:22] (step=0006900) Train Loss: -3.6787, Train Steps/Sec: 0.54
78
+ [2026-02-03 10:13:29] (step=0007000) Train Loss: -3.6818, Train Steps/Sec: 0.54
79
+ [2026-02-03 10:16:35] (step=0007100) Train Loss: -3.6813, Train Steps/Sec: 0.54
80
+ [2026-02-03 10:19:42] (step=0007200) Train Loss: -3.6820, Train Steps/Sec: 0.54
81
+ [2026-02-03 10:22:49] (step=0007300) Train Loss: -3.6810, Train Steps/Sec: 0.53
82
+ [2026-02-03 10:25:56] (step=0007400) Train Loss: -3.6828, Train Steps/Sec: 0.53
83
+ [2026-02-03 10:29:04] (step=0007500) Train Loss: -3.6821, Train Steps/Sec: 0.53
84
+ [2026-02-03 10:29:16] Beginning epoch 3...
85
+ [2026-02-03 10:32:13] (step=0007600) Train Loss: -3.6794, Train Steps/Sec: 0.53
86
+ [2026-02-03 10:35:20] (step=0007700) Train Loss: -3.6809, Train Steps/Sec: 0.53
87
+ [2026-02-03 10:38:27] (step=0007800) Train Loss: -3.6823, Train Steps/Sec: 0.54
88
+ [2026-02-03 10:41:34] (step=0007900) Train Loss: -3.6813, Train Steps/Sec: 0.54
89
+ [2026-02-03 10:44:41] (step=0008000) Train Loss: -3.6852, Train Steps/Sec: 0.53
90
+ [2026-02-03 10:47:47] (step=0008100) Train Loss: -3.6820, Train Steps/Sec: 0.54
91
+ [2026-02-03 10:50:54] (step=0008200) Train Loss: -3.6798, Train Steps/Sec: 0.54
92
+ [2026-02-03 10:54:01] (step=0008300) Train Loss: -3.6772, Train Steps/Sec: 0.54
93
+ [2026-02-03 10:57:07] (step=0008400) Train Loss: -3.6800, Train Steps/Sec: 0.54
94
+ [2026-02-03 11:00:13] (step=0008500) Train Loss: -3.6818, Train Steps/Sec: 0.54
95
+ [2026-02-03 11:03:19] (step=0008600) Train Loss: -3.6811, Train Steps/Sec: 0.54
96
+ [2026-02-03 11:06:23] (step=0008700) Train Loss: -3.6806, Train Steps/Sec: 0.54
97
+ [2026-02-03 11:09:29] (step=0008800) Train Loss: -3.6762, Train Steps/Sec: 0.54
98
+ [2026-02-03 11:12:36] (step=0008900) Train Loss: -3.6838, Train Steps/Sec: 0.54
99
+ [2026-02-03 11:15:43] (step=0009000) Train Loss: -3.6826, Train Steps/Sec: 0.53
100
+ [2026-02-03 11:18:50] (step=0009100) Train Loss: -3.6806, Train Steps/Sec: 0.54
101
+ [2026-02-03 11:21:57] (step=0009200) Train Loss: -3.6806, Train Steps/Sec: 0.54
102
+ [2026-02-03 11:25:04] (step=0009300) Train Loss: -3.6819, Train Steps/Sec: 0.54
103
+ [2026-02-03 11:28:11] (step=0009400) Train Loss: -3.6785, Train Steps/Sec: 0.53
104
+ [2026-02-03 11:31:17] (step=0009500) Train Loss: -3.6769, Train Steps/Sec: 0.54
105
+ [2026-02-03 11:34:24] (step=0009600) Train Loss: -3.6822, Train Steps/Sec: 0.54
106
+ [2026-02-03 11:37:31] (step=0009700) Train Loss: -3.6856, Train Steps/Sec: 0.54
107
+ [2026-02-03 11:40:38] (step=0009800) Train Loss: -3.6803, Train Steps/Sec: 0.53
108
+ [2026-02-03 11:43:45] (step=0009900) Train Loss: -3.6805, Train Steps/Sec: 0.54
109
+ [2026-02-03 11:46:51] (step=0010000) Train Loss: -3.6819, Train Steps/Sec: 0.54
110
+ [2026-02-03 11:47:07] Beginning epoch 4...
111
+ [2026-02-03 11:50:01] (step=0010100) Train Loss: -3.6850, Train Steps/Sec: 0.53
112
+ [2026-02-03 11:53:08] (step=0010200) Train Loss: -3.6816, Train Steps/Sec: 0.53
113
+ [2026-02-03 11:56:15] (step=0010300) Train Loss: -3.6836, Train Steps/Sec: 0.53
114
+ [2026-02-03 11:59:22] (step=0010400) Train Loss: -3.6789, Train Steps/Sec: 0.53
115
+ [2026-02-03 12:02:29] (step=0010500) Train Loss: -3.6793, Train Steps/Sec: 0.54
116
+ [2026-02-03 12:05:36] (step=0010600) Train Loss: -3.6834, Train Steps/Sec: 0.54
117
+ [2026-02-03 12:08:42] (step=0010700) Train Loss: -3.6842, Train Steps/Sec: 0.54
118
+ [2026-02-03 12:11:49] (step=0010800) Train Loss: -3.6822, Train Steps/Sec: 0.54
119
+ [2026-02-03 12:14:56] (step=0010900) Train Loss: -3.6813, Train Steps/Sec: 0.54
120
+ [2026-02-03 12:18:03] (step=0011000) Train Loss: -3.6843, Train Steps/Sec: 0.53
121
+ [2026-02-03 12:21:09] (step=0011100) Train Loss: -3.6821, Train Steps/Sec: 0.54
122
+ [2026-02-03 12:24:15] (step=0011200) Train Loss: -3.6787, Train Steps/Sec: 0.54
123
+ [2026-02-03 12:27:20] (step=0011300) Train Loss: -3.6828, Train Steps/Sec: 0.54
124
+ [2026-02-03 12:30:27] (step=0011400) Train Loss: -3.6830, Train Steps/Sec: 0.53
125
+ [2026-02-03 12:33:34] (step=0011500) Train Loss: -3.6784, Train Steps/Sec: 0.53
126
+ [2026-02-03 12:36:41] (step=0011600) Train Loss: -3.6831, Train Steps/Sec: 0.53
127
+ [2026-02-03 12:39:48] (step=0011700) Train Loss: -3.6834, Train Steps/Sec: 0.53
128
+ [2026-02-03 12:42:55] (step=0011800) Train Loss: -3.6808, Train Steps/Sec: 0.53
129
+ [2026-02-03 12:46:02] (step=0011900) Train Loss: -3.6810, Train Steps/Sec: 0.54
130
+ [2026-02-03 12:49:09] (step=0012000) Train Loss: -3.6821, Train Steps/Sec: 0.53
131
+ [2026-02-03 12:52:16] (step=0012100) Train Loss: -3.6827, Train Steps/Sec: 0.53
132
+ [2026-02-03 12:55:23] (step=0012200) Train Loss: -3.6827, Train Steps/Sec: 0.54
133
+ [2026-02-03 12:58:30] (step=0012300) Train Loss: -3.6808, Train Steps/Sec: 0.54
134
+ [2026-02-03 13:01:37] (step=0012400) Train Loss: -3.6818, Train Steps/Sec: 0.53
135
+ [2026-02-03 13:04:44] (step=0012500) Train Loss: -3.6809, Train Steps/Sec: 0.54
136
+ [2026-02-03 13:05:03] Beginning epoch 5...
137
+ [2026-02-03 13:07:54] (step=0012600) Train Loss: -3.6814, Train Steps/Sec: 0.52
138
+ [2026-02-03 13:11:01] (step=0012700) Train Loss: -3.6842, Train Steps/Sec: 0.53
139
+ [2026-02-03 13:14:08] (step=0012800) Train Loss: -3.6816, Train Steps/Sec: 0.54
140
+ [2026-02-03 13:17:15] (step=0012900) Train Loss: -3.6790, Train Steps/Sec: 0.53
141
+ [2026-02-03 13:20:22] (step=0013000) Train Loss: -3.6812, Train Steps/Sec: 0.53
142
+ [2026-02-03 13:23:29] (step=0013100) Train Loss: -3.6792, Train Steps/Sec: 0.53
143
+ [2026-02-03 13:26:36] (step=0013200) Train Loss: -3.6836, Train Steps/Sec: 0.53
144
+ [2026-02-03 13:29:43] (step=0013300) Train Loss: -3.6845, Train Steps/Sec: 0.54
145
+ [2026-02-03 13:32:50] (step=0013400) Train Loss: -3.6822, Train Steps/Sec: 0.53
146
+ [2026-02-03 13:35:57] (step=0013500) Train Loss: -3.6798, Train Steps/Sec: 0.53
147
+ [2026-02-03 13:39:04] (step=0013600) Train Loss: -3.6828, Train Steps/Sec: 0.54
148
+ [2026-02-03 13:42:11] (step=0013700) Train Loss: -3.6799, Train Steps/Sec: 0.54
149
+ [2026-02-03 13:45:18] (step=0013800) Train Loss: -3.6812, Train Steps/Sec: 0.53
150
+ [2026-02-03 13:48:22] (step=0013900) Train Loss: -3.6831, Train Steps/Sec: 0.54
151
+ [2026-02-03 13:51:29] (step=0014000) Train Loss: -3.6808, Train Steps/Sec: 0.54
152
+ [2026-02-03 13:54:36] (step=0014100) Train Loss: -3.6823, Train Steps/Sec: 0.53
153
+ [2026-02-03 13:57:43] (step=0014200) Train Loss: -3.6795, Train Steps/Sec: 0.54
154
+ [2026-02-03 14:00:50] (step=0014300) Train Loss: -3.6795, Train Steps/Sec: 0.53
155
+ [2026-02-03 14:03:57] (step=0014400) Train Loss: -3.6838, Train Steps/Sec: 0.54
156
+ [2026-02-03 14:07:04] (step=0014500) Train Loss: -3.6832, Train Steps/Sec: 0.53
157
+ [2026-02-03 14:10:11] (step=0014600) Train Loss: -3.6832, Train Steps/Sec: 0.53
158
+ [2026-02-03 14:13:18] (step=0014700) Train Loss: -3.6784, Train Steps/Sec: 0.54
159
+ [2026-02-03 14:16:24] (step=0014800) Train Loss: -3.6824, Train Steps/Sec: 0.54
160
+ [2026-02-03 14:19:31] (step=0014900) Train Loss: -3.6825, Train Steps/Sec: 0.54
161
+ [2026-02-03 14:22:38] (step=0015000) Train Loss: -3.6822, Train Steps/Sec: 0.53
162
+ [2026-02-03 14:23:01] Beginning epoch 6...
163
+ [2026-02-03 14:25:48] (step=0015100) Train Loss: -3.6831, Train Steps/Sec: 0.53
164
+ [2026-02-03 14:28:55] (step=0015200) Train Loss: -3.6786, Train Steps/Sec: 0.53
165
+ [2026-02-03 14:32:02] (step=0015300) Train Loss: -3.6826, Train Steps/Sec: 0.54
166
+ [2026-02-03 14:35:08] (step=0015400) Train Loss: -3.6817, Train Steps/Sec: 0.54
167
+ [2026-02-03 14:38:15] (step=0015500) Train Loss: -3.6806, Train Steps/Sec: 0.54
168
+ [2026-02-03 14:41:21] (step=0015600) Train Loss: -3.6796, Train Steps/Sec: 0.54
169
+ [2026-02-03 14:44:28] (step=0015700) Train Loss: -3.6839, Train Steps/Sec: 0.54
170
+ [2026-02-03 14:47:36] (step=0015800) Train Loss: -3.6846, Train Steps/Sec: 0.53
171
+ [2026-02-03 14:50:43] (step=0015900) Train Loss: -3.6828, Train Steps/Sec: 0.53
172
+ [2026-02-03 14:53:50] (step=0016000) Train Loss: -3.6828, Train Steps/Sec: 0.54
173
+ [2026-02-03 14:56:57] (step=0016100) Train Loss: -3.6789, Train Steps/Sec: 0.53
174
+ [2026-02-03 15:00:04] (step=0016200) Train Loss: -3.6810, Train Steps/Sec: 0.53
175
+ [2026-02-03 15:03:11] (step=0016300) Train Loss: -3.6799, Train Steps/Sec: 0.53
176
+ [2026-02-03 15:06:19] (step=0016400) Train Loss: -3.6806, Train Steps/Sec: 0.53
177
+ [2026-02-03 15:09:24] (step=0016500) Train Loss: -3.6828, Train Steps/Sec: 0.54
178
+ [2026-02-03 15:12:31] (step=0016600) Train Loss: -3.6781, Train Steps/Sec: 0.54
179
+ [2026-02-03 15:15:37] (step=0016700) Train Loss: -3.6830, Train Steps/Sec: 0.54
180
+ [2026-02-03 15:18:44] (step=0016800) Train Loss: -3.6756, Train Steps/Sec: 0.54
181
+ [2026-02-03 15:21:51] (step=0016900) Train Loss: -3.6798, Train Steps/Sec: 0.54
182
+ [2026-02-03 15:24:58] (step=0017000) Train Loss: -3.6813, Train Steps/Sec: 0.53
183
+ [2026-02-03 15:28:04] (step=0017100) Train Loss: -3.6807, Train Steps/Sec: 0.54
184
+ [2026-02-03 15:31:11] (step=0017200) Train Loss: -3.6818, Train Steps/Sec: 0.54
185
+ [2026-02-03 15:34:18] (step=0017300) Train Loss: -3.6800, Train Steps/Sec: 0.54
186
+ [2026-02-03 15:37:25] (step=0017400) Train Loss: -3.6836, Train Steps/Sec: 0.53
187
+ [2026-02-03 15:40:32] (step=0017500) Train Loss: -3.6807, Train Steps/Sec: 0.53
188
+ [2026-02-03 15:40:59] Beginning epoch 7...
189
+ [2026-02-03 15:43:42] (step=0017600) Train Loss: -3.6829, Train Steps/Sec: 0.53
190
+ [2026-02-03 15:46:49] (step=0017700) Train Loss: -3.6790, Train Steps/Sec: 0.53
191
+ [2026-02-03 15:49:56] (step=0017800) Train Loss: -3.6850, Train Steps/Sec: 0.53
192
+ [2026-02-03 15:53:04] (step=0017900) Train Loss: -3.6803, Train Steps/Sec: 0.53
193
+ [2026-02-03 15:56:11] (step=0018000) Train Loss: -3.6835, Train Steps/Sec: 0.53
194
+ [2026-02-03 15:59:18] (step=0018100) Train Loss: -3.6811, Train Steps/Sec: 0.54
195
+ [2026-02-03 16:02:25] (step=0018200) Train Loss: -3.6788, Train Steps/Sec: 0.53
196
+ [2026-02-03 16:05:31] (step=0018300) Train Loss: -3.6786, Train Steps/Sec: 0.54
197
+ [2026-02-03 16:08:39] (step=0018400) Train Loss: -3.6812, Train Steps/Sec: 0.53
198
+ [2026-02-03 16:11:46] (step=0018500) Train Loss: -3.6809, Train Steps/Sec: 0.53
199
+ [2026-02-03 16:14:52] (step=0018600) Train Loss: -3.6803, Train Steps/Sec: 0.54
200
+ [2026-02-03 16:17:59] (step=0018700) Train Loss: -3.6822, Train Steps/Sec: 0.54
201
+ [2026-02-03 16:21:06] (step=0018800) Train Loss: -3.6819, Train Steps/Sec: 0.53
202
+ [2026-02-03 16:24:12] (step=0018900) Train Loss: -3.6834, Train Steps/Sec: 0.54
203
+ [2026-02-03 16:27:19] (step=0019000) Train Loss: -3.6824, Train Steps/Sec: 0.54
204
+ [2026-02-03 16:30:24] (step=0019100) Train Loss: -3.6811, Train Steps/Sec: 0.54
205
+ [2026-02-03 16:33:31] (step=0019200) Train Loss: -3.6826, Train Steps/Sec: 0.53
206
+ [2026-02-03 16:36:38] (step=0019300) Train Loss: -3.6774, Train Steps/Sec: 0.53
207
+ [2026-02-03 16:39:45] (step=0019400) Train Loss: -3.6809, Train Steps/Sec: 0.54
208
+ [2026-02-03 16:42:51] (step=0019500) Train Loss: -3.6837, Train Steps/Sec: 0.54
209
+ [2026-02-03 16:45:59] (step=0019600) Train Loss: -3.6828, Train Steps/Sec: 0.53
210
+ [2026-02-03 16:49:06] (step=0019700) Train Loss: -3.6803, Train Steps/Sec: 0.53
211
+ [2026-02-03 16:52:13] (step=0019800) Train Loss: -3.6828, Train Steps/Sec: 0.53
212
+ [2026-02-03 16:55:20] (step=0019900) Train Loss: -3.6832, Train Steps/Sec: 0.53
213
+ [2026-02-03 16:58:27] (step=0020000) Train Loss: -3.6837, Train Steps/Sec: 0.54
214
+ [2026-02-03 16:58:57] Beginning epoch 8...
215
+ [2026-02-03 17:01:37] (step=0020100) Train Loss: -3.6820, Train Steps/Sec: 0.52
216
+ [2026-02-03 17:04:45] (step=0020200) Train Loss: -3.6798, Train Steps/Sec: 0.53
217
+ [2026-02-03 17:07:52] (step=0020300) Train Loss: -3.6807, Train Steps/Sec: 0.53
218
+ [2026-02-03 17:10:59] (step=0020400) Train Loss: -3.6811, Train Steps/Sec: 0.54
219
+ [2026-02-03 17:14:05] (step=0020500) Train Loss: -3.6794, Train Steps/Sec: 0.54
220
+ [2026-02-03 17:17:13] (step=0020600) Train Loss: -3.6833, Train Steps/Sec: 0.53
221
+ [2026-02-03 17:20:20] (step=0020700) Train Loss: -3.6802, Train Steps/Sec: 0.53
222
+ [2026-02-03 17:23:27] (step=0020800) Train Loss: -3.6812, Train Steps/Sec: 0.53
223
+ [2026-02-03 17:26:34] (step=0020900) Train Loss: -3.6822, Train Steps/Sec: 0.54
224
+ [2026-02-03 17:29:41] (step=0021000) Train Loss: -3.6795, Train Steps/Sec: 0.53
225
+ [2026-02-03 17:32:48] (step=0021100) Train Loss: -3.6794, Train Steps/Sec: 0.53
226
+ [2026-02-03 17:35:55] (step=0021200) Train Loss: 3.9167, Train Steps/Sec: 0.53
227
+ [2026-02-03 17:39:02] (step=0021300) Train Loss: -3.6821, Train Steps/Sec: 0.54
228
+ [2026-02-03 17:42:09] (step=0021400) Train Loss: -3.6805, Train Steps/Sec: 0.53
229
+ [2026-02-03 17:45:16] (step=0021500) Train Loss: -3.6808, Train Steps/Sec: 0.54
230
+ [2026-02-03 17:48:23] (step=0021600) Train Loss: -3.6812, Train Steps/Sec: 0.54
231
+ [2026-02-03 17:51:28] (step=0021700) Train Loss: -3.6817, Train Steps/Sec: 0.54
232
+ [2026-02-03 17:54:34] (step=0021800) Train Loss: -3.6846, Train Steps/Sec: 0.54
233
+ [2026-02-03 17:57:41] (step=0021900) Train Loss: -3.6811, Train Steps/Sec: 0.54
234
+ [2026-02-03 18:00:48] (step=0022000) Train Loss: -3.6807, Train Steps/Sec: 0.54
235
+ [2026-02-03 18:03:55] (step=0022100) Train Loss: -3.6799, Train Steps/Sec: 0.53
236
+ [2026-02-03 18:07:02] (step=0022200) Train Loss: -3.6788, Train Steps/Sec: 0.53
237
+ [2026-02-03 18:10:09] (step=0022300) Train Loss: -3.6821, Train Steps/Sec: 0.53
238
+ [2026-02-03 18:13:16] (step=0022400) Train Loss: -3.6808, Train Steps/Sec: 0.53
239
+ [2026-02-03 18:16:24] (step=0022500) Train Loss: -3.6836, Train Steps/Sec: 0.53
240
+ [2026-02-03 18:16:58] Beginning epoch 9...
241
+ [2026-02-03 18:19:34] (step=0022600) Train Loss: -3.6835, Train Steps/Sec: 0.53
242
+ [2026-02-03 18:22:40] (step=0022700) Train Loss: -3.6848, Train Steps/Sec: 0.54
243
+ [2026-02-03 18:25:47] (step=0022800) Train Loss: -3.6778, Train Steps/Sec: 0.54
244
+ [2026-02-03 18:28:53] (step=0022900) Train Loss: -3.6829, Train Steps/Sec: 0.54
245
+ [2026-02-03 18:32:00] (step=0023000) Train Loss: -3.6807, Train Steps/Sec: 0.54
246
+ [2026-02-03 18:35:07] (step=0023100) Train Loss: -3.6846, Train Steps/Sec: 0.53
247
+ [2026-02-03 18:38:14] (step=0023200) Train Loss: -3.6809, Train Steps/Sec: 0.54
248
+ [2026-02-03 18:41:21] (step=0023300) Train Loss: -3.6807, Train Steps/Sec: 0.53
249
+ [2026-02-03 18:44:28] (step=0023400) Train Loss: -3.6812, Train Steps/Sec: 0.54
250
+ [2026-02-03 18:47:35] (step=0023500) Train Loss: -3.6811, Train Steps/Sec: 0.53
251
+ [2026-02-03 18:50:42] (step=0023600) Train Loss: -3.6800, Train Steps/Sec: 0.53
252
+ [2026-02-03 18:53:49] (step=0023700) Train Loss: -3.6848, Train Steps/Sec: 0.53
253
+ [2026-02-03 18:56:56] (step=0023800) Train Loss: -3.6824, Train Steps/Sec: 0.54
254
+ [2026-02-03 19:00:03] (step=0023900) Train Loss: -3.6820, Train Steps/Sec: 0.54
255
+ [2026-02-03 19:03:09] (step=0024000) Train Loss: -3.6848, Train Steps/Sec: 0.54
256
+ [2026-02-03 19:06:16] (step=0024100) Train Loss: -3.6791, Train Steps/Sec: 0.54
257
+ [2026-02-03 19:09:22] (step=0024200) Train Loss: -3.6825, Train Steps/Sec: 0.54
258
+ [2026-02-03 19:12:30] (step=0024300) Train Loss: -3.6800, Train Steps/Sec: 0.53
259
+ [2026-02-03 19:15:35] (step=0024400) Train Loss: -3.6792, Train Steps/Sec: 0.54
260
+ [2026-02-03 19:18:42] (step=0024500) Train Loss: -3.6807, Train Steps/Sec: 0.53
261
+ [2026-02-03 19:21:49] (step=0024600) Train Loss: -3.6796, Train Steps/Sec: 0.53
262
+ [2026-02-03 19:24:56] (step=0024700) Train Loss: -3.6814, Train Steps/Sec: 0.53
263
+ [2026-02-03 19:28:03] (step=0024800) Train Loss: -3.6832, Train Steps/Sec: 0.54
264
+ [2026-02-03 19:31:10] (step=0024900) Train Loss: -3.6832, Train Steps/Sec: 0.54
265
+ [2026-02-03 19:34:18] (step=0025000) Train Loss: -3.6782, Train Steps/Sec: 0.53
266
+ [2026-02-03 19:34:18] Saved checkpoint to results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/checkpoints/0025000.pt
267
+ [2026-02-03 19:34:56] Beginning epoch 10...
268
+ [2026-02-03 19:37:29] (step=0025100) Train Loss: -3.6836, Train Steps/Sec: 0.52
269
+ [2026-02-03 19:40:21] Generating EMA samples...
270
+ [2026-02-03 19:40:36] (step=0025200) Train Loss: -3.6796, Train Steps/Sec: 0.53
271
+ [2026-02-03 19:43:43] (step=0025300) Train Loss: -3.6818, Train Steps/Sec: 0.53
272
+ [2026-02-03 19:46:50] (step=0025400) Train Loss: -3.6789, Train Steps/Sec: 0.54
273
+ [2026-02-03 19:49:58] (step=0025500) Train Loss: -3.6817, Train Steps/Sec: 0.53
274
+ [2026-02-03 19:53:05] (step=0025600) Train Loss: -3.6804, Train Steps/Sec: 0.53
275
+ [2026-02-03 19:56:11] (step=0025700) Train Loss: -3.6800, Train Steps/Sec: 0.54
276
+ [2026-02-03 19:59:19] (step=0025800) Train Loss: -3.6832, Train Steps/Sec: 0.53
277
+ [2026-02-03 20:02:25] (step=0025900) Train Loss: -3.6825, Train Steps/Sec: 0.54
278
+ [2026-02-03 20:05:32] (step=0026000) Train Loss: -3.6812, Train Steps/Sec: 0.54
279
+ [2026-02-03 20:08:39] (step=0026100) Train Loss: -3.6827, Train Steps/Sec: 0.54
280
+ [2026-02-03 20:11:47] (step=0026200) Train Loss: -3.6793, Train Steps/Sec: 0.53
281
+ [2026-02-03 20:14:54] (step=0026300) Train Loss: -3.6817, Train Steps/Sec: 0.53
282
+ [2026-02-03 20:18:01] (step=0026400) Train Loss: -3.6813, Train Steps/Sec: 0.54
283
+ [2026-02-03 20:21:07] (step=0026500) Train Loss: -3.6806, Train Steps/Sec: 0.54
284
+ [2026-02-03 20:24:14] (step=0026600) Train Loss: -3.6842, Train Steps/Sec: 0.54
285
+ [2026-02-03 20:27:20] (step=0026700) Train Loss: -3.6809, Train Steps/Sec: 0.54
286
+ [2026-02-03 20:30:27] (step=0026800) Train Loss: -3.6849, Train Steps/Sec: 0.53
287
+ [2026-02-03 20:33:34] (step=0026900) Train Loss: -3.6802, Train Steps/Sec: 0.53
288
+ [2026-02-03 20:36:39] (step=0027000) Train Loss: -3.6792, Train Steps/Sec: 0.54
289
+ [2026-02-03 20:39:46] (step=0027100) Train Loss: -3.6843, Train Steps/Sec: 0.54
290
+ [2026-02-03 20:42:52] (step=0027200) Train Loss: -3.6821, Train Steps/Sec: 0.54
291
+ [2026-02-03 20:45:59] (step=0027300) Train Loss: -3.6825, Train Steps/Sec: 0.54
292
+ [2026-02-03 20:49:06] (step=0027400) Train Loss: -3.6775, Train Steps/Sec: 0.54
293
+ [2026-02-03 20:52:12] (step=0027500) Train Loss: -3.6800, Train Steps/Sec: 0.54
294
+ [2026-02-03 20:52:54] Beginning epoch 11...
295
+ [2026-02-03 20:55:23] (step=0027600) Train Loss: -3.6853, Train Steps/Sec: 0.53
296
+ [2026-02-03 20:58:29] (step=0027700) Train Loss: -3.6817, Train Steps/Sec: 0.54
297
+ [2026-02-03 21:01:37] (step=0027800) Train Loss: -3.6811, Train Steps/Sec: 0.53
298
+ [2026-02-03 21:04:43] (step=0027900) Train Loss: -3.6810, Train Steps/Sec: 0.54
299
+ [2026-02-03 21:07:50] (step=0028000) Train Loss: -3.6827, Train Steps/Sec: 0.53
300
+ [2026-02-03 21:10:57] (step=0028100) Train Loss: -3.6839, Train Steps/Sec: 0.53
301
+ [2026-02-03 21:14:04] (step=0028200) Train Loss: -3.6817, Train Steps/Sec: 0.54
302
+ [2026-02-03 21:17:11] (step=0028300) Train Loss: -3.6830, Train Steps/Sec: 0.53
303
+ [2026-02-03 21:20:18] (step=0028400) Train Loss: -3.6797, Train Steps/Sec: 0.53
304
+ [2026-02-03 21:23:25] (step=0028500) Train Loss: -3.6797, Train Steps/Sec: 0.53
305
+ [2026-02-03 21:26:32] (step=0028600) Train Loss: -3.6821, Train Steps/Sec: 0.54
306
+ [2026-02-03 21:29:39] (step=0028700) Train Loss: -3.6823, Train Steps/Sec: 0.54
307
+ [2026-02-03 21:32:45] (step=0028800) Train Loss: -3.6812, Train Steps/Sec: 0.54
308
+ [2026-02-03 21:35:53] (step=0028900) Train Loss: -3.6858, Train Steps/Sec: 0.53
309
+ [2026-02-03 21:38:59] (step=0029000) Train Loss: -3.6842, Train Steps/Sec: 0.54
310
+ [2026-02-03 21:42:06] (step=0029100) Train Loss: -3.6836, Train Steps/Sec: 0.54
311
+ [2026-02-03 21:45:14] (step=0029200) Train Loss: -3.6813, Train Steps/Sec: 0.53
312
+ [2026-02-03 21:48:20] (step=0029300) Train Loss: -3.6783, Train Steps/Sec: 0.54
313
+ [2026-02-03 21:51:27] (step=0029400) Train Loss: -3.6829, Train Steps/Sec: 0.53
314
+ [2026-02-03 21:54:34] (step=0029500) Train Loss: -3.6812, Train Steps/Sec: 0.54
315
+ [2026-02-03 21:57:39] (step=0029600) Train Loss: -3.6823, Train Steps/Sec: 0.54
316
+ [2026-02-03 22:00:46] (step=0029700) Train Loss: -3.6828, Train Steps/Sec: 0.53
317
+ [2026-02-03 22:03:53] (step=0029800) Train Loss: -3.6826, Train Steps/Sec: 0.54
318
+ [2026-02-03 22:06:59] (step=0029900) Train Loss: -3.6814, Train Steps/Sec: 0.54
319
+ [2026-02-03 22:10:06] (step=0030000) Train Loss: -3.6837, Train Steps/Sec: 0.54
320
+ [2026-02-03 22:10:51] Beginning epoch 12...
321
+ [2026-02-03 22:13:16] (step=0030100) Train Loss: -3.6822, Train Steps/Sec: 0.53
322
+ [2026-02-03 22:16:22] (step=0030200) Train Loss: -3.6787, Train Steps/Sec: 0.54
323
+ [2026-02-03 22:19:29] (step=0030300) Train Loss: -3.6815, Train Steps/Sec: 0.53
324
+ [2026-02-03 22:22:37] (step=0030400) Train Loss: -3.6806, Train Steps/Sec: 0.53
325
+ [2026-02-03 22:25:44] (step=0030500) Train Loss: -3.6825, Train Steps/Sec: 0.53
326
+ [2026-02-03 22:28:51] (step=0030600) Train Loss: -3.6811, Train Steps/Sec: 0.54
327
+ [2026-02-03 22:31:58] (step=0030700) Train Loss: -3.6838, Train Steps/Sec: 0.54
328
+ [2026-02-03 22:35:05] (step=0030800) Train Loss: -3.6822, Train Steps/Sec: 0.53
329
+ [2026-02-03 22:38:11] (step=0030900) Train Loss: -3.6823, Train Steps/Sec: 0.54
330
+ [2026-02-03 22:41:18] (step=0031000) Train Loss: -3.6815, Train Steps/Sec: 0.54
331
+ [2026-02-03 22:44:25] (step=0031100) Train Loss: -3.6796, Train Steps/Sec: 0.53
332
+ [2026-02-03 22:47:32] (step=0031200) Train Loss: -3.6812, Train Steps/Sec: 0.53
333
+ [2026-02-03 22:50:39] (step=0031300) Train Loss: -3.6806, Train Steps/Sec: 0.53
334
+ [2026-02-03 22:53:46] (step=0031400) Train Loss: -3.6822, Train Steps/Sec: 0.53
335
+ [2026-02-03 22:56:53] (step=0031500) Train Loss: -3.6821, Train Steps/Sec: 0.54
336
+ [2026-02-03 23:00:00] (step=0031600) Train Loss: -3.6803, Train Steps/Sec: 0.53
337
+ [2026-02-03 23:03:07] (step=0031700) Train Loss: -3.6843, Train Steps/Sec: 0.53
338
+ [2026-02-03 23:06:14] (step=0031800) Train Loss: -3.6832, Train Steps/Sec: 0.53
339
+ [2026-02-03 23:09:21] (step=0031900) Train Loss: -3.6809, Train Steps/Sec: 0.54
340
+ [2026-02-03 23:12:28] (step=0032000) Train Loss: -3.6822, Train Steps/Sec: 0.54
341
+ [2026-02-03 23:15:34] (step=0032100) Train Loss: -3.6786, Train Steps/Sec: 0.54
342
+ [2026-02-03 23:18:39] (step=0032200) Train Loss: -3.6814, Train Steps/Sec: 0.54
343
+ [2026-02-03 23:21:46] (step=0032300) Train Loss: -3.6839, Train Steps/Sec: 0.54
344
+ [2026-02-03 23:24:52] (step=0032400) Train Loss: -3.6822, Train Steps/Sec: 0.54
345
+ [2026-02-03 23:27:59] (step=0032500) Train Loss: -3.6809, Train Steps/Sec: 0.53
346
+ [2026-02-03 23:28:48] Beginning epoch 13...
347
+ [2026-02-03 23:31:09] (step=0032600) Train Loss: -3.6846, Train Steps/Sec: 0.53
348
+ [2026-02-03 23:34:16] (step=0032700) Train Loss: -3.6841, Train Steps/Sec: 0.53
349
+ [2026-02-03 23:37:24] (step=0032800) Train Loss: -3.6813, Train Steps/Sec: 0.53
350
+ [2026-02-03 23:40:31] (step=0032900) Train Loss: -3.6792, Train Steps/Sec: 0.53
351
+ [2026-02-03 23:43:38] (step=0033000) Train Loss: -3.6782, Train Steps/Sec: 0.53
352
+ [2026-02-03 23:46:45] (step=0033100) Train Loss: -3.6821, Train Steps/Sec: 0.54
353
+ [2026-02-03 23:49:52] (step=0033200) Train Loss: -3.6819, Train Steps/Sec: 0.53
354
+ [2026-02-03 23:52:59] (step=0033300) Train Loss: -3.6793, Train Steps/Sec: 0.54
355
+ [2026-02-03 23:56:06] (step=0033400) Train Loss: -3.6810, Train Steps/Sec: 0.54
356
+ [2026-02-03 23:59:13] (step=0033500) Train Loss: -3.6816, Train Steps/Sec: 0.53
357
+ [2026-02-04 00:02:20] (step=0033600) Train Loss: -3.6831, Train Steps/Sec: 0.54
358
+ [2026-02-04 00:05:26] (step=0033700) Train Loss: -3.6831, Train Steps/Sec: 0.54
359
+ [2026-02-04 00:08:33] (step=0033800) Train Loss: -3.6826, Train Steps/Sec: 0.54
360
+ [2026-02-04 00:11:40] (step=0033900) Train Loss: -3.6804, Train Steps/Sec: 0.54
361
+ [2026-02-04 00:14:46] (step=0034000) Train Loss: -3.6789, Train Steps/Sec: 0.54
362
+ [2026-02-04 00:17:54] (step=0034100) Train Loss: -3.6814, Train Steps/Sec: 0.53
363
+ [2026-02-04 00:21:00] (step=0034200) Train Loss: -3.6805, Train Steps/Sec: 0.54
364
+ [2026-02-04 00:24:07] (step=0034300) Train Loss: -3.6837, Train Steps/Sec: 0.53
365
+ [2026-02-04 00:27:14] (step=0034400) Train Loss: -3.6817, Train Steps/Sec: 0.54
366
+ [2026-02-04 00:30:20] (step=0034500) Train Loss: -3.6811, Train Steps/Sec: 0.54
367
+ [2026-02-04 00:33:27] (step=0034600) Train Loss: -3.6821, Train Steps/Sec: 0.54
368
+ [2026-02-04 00:36:34] (step=0034700) Train Loss: -3.6799, Train Steps/Sec: 0.54
369
+ [2026-02-04 00:39:38] (step=0034800) Train Loss: -3.6823, Train Steps/Sec: 0.54
370
+ [2026-02-04 00:42:45] (step=0034900) Train Loss: -3.6820, Train Steps/Sec: 0.54
371
+ [2026-02-04 00:45:52] (step=0035000) Train Loss: -3.6818, Train Steps/Sec: 0.54
372
+ [2026-02-04 00:46:45] Beginning epoch 14...
373
+ [2026-02-04 00:49:01] (step=0035100) Train Loss: -3.6794, Train Steps/Sec: 0.53
374
+ [2026-02-04 00:52:08] (step=0035200) Train Loss: -3.6804, Train Steps/Sec: 0.54
375
+ [2026-02-04 00:55:15] (step=0035300) Train Loss: -3.6825, Train Steps/Sec: 0.53
376
+ [2026-02-04 00:58:22] (step=0035400) Train Loss: -3.6817, Train Steps/Sec: 0.53
377
+ [2026-02-04 01:01:29] (step=0035500) Train Loss: -3.6840, Train Steps/Sec: 0.54
378
+ [2026-02-04 01:04:35] (step=0035600) Train Loss: -3.6811, Train Steps/Sec: 0.54
379
+ [2026-02-04 01:07:42] (step=0035700) Train Loss: -3.6796, Train Steps/Sec: 0.53
380
+ [2026-02-04 01:10:50] (step=0035800) Train Loss: -3.6834, Train Steps/Sec: 0.53
381
+ [2026-02-04 01:13:56] (step=0035900) Train Loss: -3.6763, Train Steps/Sec: 0.54
382
+ [2026-02-04 01:17:03] (step=0036000) Train Loss: -3.6837, Train Steps/Sec: 0.53
383
+ [2026-02-04 01:20:10] (step=0036100) Train Loss: -3.6806, Train Steps/Sec: 0.53
384
+ [2026-02-04 01:23:18] (step=0036200) Train Loss: -3.6821, Train Steps/Sec: 0.53
385
+ [2026-02-04 01:26:24] (step=0036300) Train Loss: -3.6772, Train Steps/Sec: 0.54
386
+ [2026-02-04 01:29:31] (step=0036400) Train Loss: -3.6822, Train Steps/Sec: 0.54
387
+ [2026-02-04 01:32:38] (step=0036500) Train Loss: -3.6816, Train Steps/Sec: 0.54
388
+ [2026-02-04 01:35:45] (step=0036600) Train Loss: -3.6792, Train Steps/Sec: 0.53
389
+ [2026-02-04 01:38:51] (step=0036700) Train Loss: -3.6817, Train Steps/Sec: 0.54
390
+ [2026-02-04 01:41:59] (step=0036800) Train Loss: -3.6835, Train Steps/Sec: 0.53
391
+ [2026-02-04 01:45:05] (step=0036900) Train Loss: -3.6823, Train Steps/Sec: 0.54
392
+ [2026-02-04 01:48:12] (step=0037000) Train Loss: -3.6818, Train Steps/Sec: 0.54
393
+ [2026-02-04 01:51:18] (step=0037100) Train Loss: -3.6775, Train Steps/Sec: 0.54
394
+ [2026-02-04 01:54:25] (step=0037200) Train Loss: -3.6796, Train Steps/Sec: 0.54
395
+ [2026-02-04 01:57:31] (step=0037300) Train Loss: -3.6806, Train Steps/Sec: 0.54
396
+ [2026-02-04 02:00:38] (step=0037400) Train Loss: -3.6811, Train Steps/Sec: 0.54
397
+ [2026-02-04 02:03:43] (step=0037500) Train Loss: -3.6808, Train Steps/Sec: 0.54
398
+ [2026-02-04 02:04:39] Beginning epoch 15...
399
+ [2026-02-04 02:06:52] (step=0037600) Train Loss: -3.6847, Train Steps/Sec: 0.53
400
+ [2026-02-04 02:10:00] (step=0037700) Train Loss: -3.6837, Train Steps/Sec: 0.53
401
+ [2026-02-04 02:13:06] (step=0037800) Train Loss: -3.6796, Train Steps/Sec: 0.54
402
+ [2026-02-04 02:16:13] (step=0037900) Train Loss: -3.6804, Train Steps/Sec: 0.54
403
+ [2026-02-04 02:19:20] (step=0038000) Train Loss: -3.6825, Train Steps/Sec: 0.54
404
+ [2026-02-04 02:22:26] (step=0038100) Train Loss: -3.6803, Train Steps/Sec: 0.54
405
+ [2026-02-04 02:25:33] (step=0038200) Train Loss: -3.6813, Train Steps/Sec: 0.54
406
+ [2026-02-04 02:28:40] (step=0038300) Train Loss: -3.6798, Train Steps/Sec: 0.53
407
+ [2026-02-04 02:31:47] (step=0038400) Train Loss: -3.6797, Train Steps/Sec: 0.53
408
+ [2026-02-04 02:34:54] (step=0038500) Train Loss: -3.6817, Train Steps/Sec: 0.54
409
+ [2026-02-04 02:38:01] (step=0038600) Train Loss: -3.6818, Train Steps/Sec: 0.54
410
+ [2026-02-04 02:41:08] (step=0038700) Train Loss: -3.6824, Train Steps/Sec: 0.54
411
+ [2026-02-04 02:44:14] (step=0038800) Train Loss: -3.6800, Train Steps/Sec: 0.54
412
+ [2026-02-04 02:47:22] (step=0038900) Train Loss: -3.6812, Train Steps/Sec: 0.53
413
+ [2026-02-04 02:50:28] (step=0039000) Train Loss: -3.6826, Train Steps/Sec: 0.54
414
+ [2026-02-04 02:53:35] (step=0039100) Train Loss: -3.6807, Train Steps/Sec: 0.53
415
+ [2026-02-04 02:56:42] (step=0039200) Train Loss: -3.6831, Train Steps/Sec: 0.54
416
+ [2026-02-04 02:59:48] (step=0039300) Train Loss: -3.6822, Train Steps/Sec: 0.54
417
+ [2026-02-04 03:02:55] (step=0039400) Train Loss: -3.6803, Train Steps/Sec: 0.54
418
+ [2026-02-04 03:06:01] (step=0039500) Train Loss: -3.6815, Train Steps/Sec: 0.54
419
+ [2026-02-04 03:09:08] (step=0039600) Train Loss: -3.6830, Train Steps/Sec: 0.53
420
+ [2026-02-04 03:12:15] (step=0039700) Train Loss: -3.6771, Train Steps/Sec: 0.54
421
+ [2026-02-04 03:15:21] (step=0039800) Train Loss: -3.6791, Train Steps/Sec: 0.54
422
+ [2026-02-04 03:18:28] (step=0039900) Train Loss: -3.6797, Train Steps/Sec: 0.54
423
+ [2026-02-04 03:21:34] (step=0040000) Train Loss: -3.6815, Train Steps/Sec: 0.54
424
+ [2026-02-04 03:22:33] Beginning epoch 16...
425
+ [2026-02-04 03:24:43] (step=0040100) Train Loss: -3.6799, Train Steps/Sec: 0.53
426
+ [2026-02-04 03:27:50] (step=0040200) Train Loss: -3.6823, Train Steps/Sec: 0.53
427
+ [2026-02-04 03:30:57] (step=0040300) Train Loss: -3.6805, Train Steps/Sec: 0.53
428
+ [2026-02-04 03:34:04] (step=0040400) Train Loss: -3.6829, Train Steps/Sec: 0.54
429
+ [2026-02-04 03:37:11] (step=0040500) Train Loss: -3.6786, Train Steps/Sec: 0.53
430
+ [2026-02-04 03:40:18] (step=0040600) Train Loss: -3.6811, Train Steps/Sec: 0.54
431
+ [2026-02-04 03:43:24] (step=0040700) Train Loss: -3.6804, Train Steps/Sec: 0.54
432
+ [2026-02-04 03:46:32] (step=0040800) Train Loss: -3.6860, Train Steps/Sec: 0.53
433
+ [2026-02-04 03:49:38] (step=0040900) Train Loss: -3.6804, Train Steps/Sec: 0.54
434
+ [2026-02-04 03:52:44] (step=0041000) Train Loss: -3.6803, Train Steps/Sec: 0.54
435
+ [2026-02-04 03:55:52] (step=0041100) Train Loss: -3.6803, Train Steps/Sec: 0.53
436
+ [2026-02-04 03:58:59] (step=0041200) Train Loss: -3.6801, Train Steps/Sec: 0.53
437
+ [2026-02-04 04:02:06] (step=0041300) Train Loss: -3.6794, Train Steps/Sec: 0.53
438
+ [2026-02-04 04:05:14] (step=0041400) Train Loss: -3.6816, Train Steps/Sec: 0.53
439
+ [2026-02-04 04:08:20] (step=0041500) Train Loss: -3.6858, Train Steps/Sec: 0.54
440
+ [2026-02-04 04:11:27] (step=0041600) Train Loss: -3.6811, Train Steps/Sec: 0.53
441
+ [2026-02-04 04:14:34] (step=0041700) Train Loss: -3.6859, Train Steps/Sec: 0.53
442
+ [2026-02-04 04:17:41] (step=0041800) Train Loss: -3.6823, Train Steps/Sec: 0.54
443
+ [2026-02-04 04:20:47] (step=0041900) Train Loss: -3.6838, Train Steps/Sec: 0.54
444
+ [2026-02-04 04:23:54] (step=0042000) Train Loss: -3.6809, Train Steps/Sec: 0.54
445
+ [2026-02-04 04:27:00] (step=0042100) Train Loss: -3.6781, Train Steps/Sec: 0.54
446
+ [2026-02-04 04:30:07] (step=0042200) Train Loss: -3.6826, Train Steps/Sec: 0.54
447
+ [2026-02-04 04:33:13] (step=0042300) Train Loss: -3.6835, Train Steps/Sec: 0.54
448
+ [2026-02-04 04:36:20] (step=0042400) Train Loss: -3.6816, Train Steps/Sec: 0.54
449
+ [2026-02-04 04:39:27] (step=0042500) Train Loss: -3.6802, Train Steps/Sec: 0.53
450
+ [2026-02-04 04:40:31] Beginning epoch 17...
451
+ [2026-02-04 04:42:37] (step=0042600) Train Loss: -3.6831, Train Steps/Sec: 0.53
452
+ [2026-02-04 04:45:42] (step=0042700) Train Loss: -3.6778, Train Steps/Sec: 0.54
453
+ [2026-02-04 04:48:48] (step=0042800) Train Loss: -3.6846, Train Steps/Sec: 0.54
454
+ [2026-02-04 04:51:55] (step=0042900) Train Loss: -3.6827, Train Steps/Sec: 0.53
455
+ [2026-02-04 04:55:02] (step=0043000) Train Loss: -3.6820, Train Steps/Sec: 0.54
456
+ [2026-02-04 04:58:08] (step=0043100) Train Loss: -3.6803, Train Steps/Sec: 0.54
457
+ [2026-02-04 05:01:15] (step=0043200) Train Loss: -3.6808, Train Steps/Sec: 0.54
458
+ [2026-02-04 05:04:22] (step=0043300) Train Loss: -3.6838, Train Steps/Sec: 0.53
459
+ [2026-02-04 05:07:29] (step=0043400) Train Loss: -3.6809, Train Steps/Sec: 0.54
460
+ [2026-02-04 05:10:36] (step=0043500) Train Loss: -3.6757, Train Steps/Sec: 0.53
461
+ [2026-02-04 05:13:43] (step=0043600) Train Loss: -3.6808, Train Steps/Sec: 0.54
462
+ [2026-02-04 05:16:50] (step=0043700) Train Loss: -3.6807, Train Steps/Sec: 0.54
463
+ [2026-02-04 05:19:56] (step=0043800) Train Loss: -3.6825, Train Steps/Sec: 0.54
464
+ [2026-02-04 05:23:03] (step=0043900) Train Loss: -3.6811, Train Steps/Sec: 0.53
465
+ [2026-02-04 05:26:10] (step=0044000) Train Loss: -3.6819, Train Steps/Sec: 0.54
466
+ [2026-02-04 05:29:17] (step=0044100) Train Loss: -3.6801, Train Steps/Sec: 0.54
467
+ [2026-02-04 05:32:24] (step=0044200) Train Loss: -3.6785, Train Steps/Sec: 0.54
468
+ [2026-02-04 05:35:31] (step=0044300) Train Loss: -3.6841, Train Steps/Sec: 0.53
469
+ [2026-02-04 05:38:38] (step=0044400) Train Loss: -3.6841, Train Steps/Sec: 0.53
470
+ [2026-02-04 05:41:01] (step=0044500) Train Loss: -3.6791, Train Steps/Sec: 0.70
471
+ [2026-02-04 05:42:24] (step=0044600) Train Loss: -3.6843, Train Steps/Sec: 1.20
472
+ [2026-02-04 05:43:47] (step=0044700) Train Loss: -3.6815, Train Steps/Sec: 1.21
473
+ [2026-02-04 05:45:10] (step=0044800) Train Loss: -3.6785, Train Steps/Sec: 1.21
474
+ [2026-02-04 05:46:33] (step=0044900) Train Loss: -3.6820, Train Steps/Sec: 1.21
475
+ [2026-02-04 05:47:56] (step=0045000) Train Loss: -3.6847, Train Steps/Sec: 1.20
476
+ [2026-02-04 05:48:26] Beginning epoch 18...
477
+ [2026-02-04 05:49:22] (step=0045100) Train Loss: -3.6816, Train Steps/Sec: 1.16
478
+ [2026-02-04 05:50:45] (step=0045200) Train Loss: -3.6834, Train Steps/Sec: 1.20
479
+ [2026-02-04 05:52:08] (step=0045300) Train Loss: -3.6787, Train Steps/Sec: 1.21
480
+ [2026-02-04 05:53:31] (step=0045400) Train Loss: -3.6844, Train Steps/Sec: 1.20
481
+ [2026-02-04 05:54:54] (step=0045500) Train Loss: -3.6823, Train Steps/Sec: 1.20
482
+ [2026-02-04 05:56:17] (step=0045600) Train Loss: -3.6806, Train Steps/Sec: 1.20
483
+ [2026-02-04 05:57:40] (step=0045700) Train Loss: -3.6797, Train Steps/Sec: 1.21
484
+ [2026-02-04 05:59:03] (step=0045800) Train Loss: -3.6819, Train Steps/Sec: 1.20
485
+ [2026-02-04 06:00:26] (step=0045900) Train Loss: -3.6807, Train Steps/Sec: 1.20
486
+ [2026-02-04 06:01:49] (step=0046000) Train Loss: -3.6814, Train Steps/Sec: 1.21
487
+ [2026-02-04 06:03:12] (step=0046100) Train Loss: -3.6827, Train Steps/Sec: 1.21
488
+ [2026-02-04 06:04:35] (step=0046200) Train Loss: -3.6824, Train Steps/Sec: 1.20
489
+ [2026-02-04 06:05:58] (step=0046300) Train Loss: -3.6825, Train Steps/Sec: 1.20
490
+ [2026-02-04 06:07:21] (step=0046400) Train Loss: -3.6826, Train Steps/Sec: 1.20
491
+ [2026-02-04 06:08:44] (step=0046500) Train Loss: -3.6778, Train Steps/Sec: 1.20
492
+ [2026-02-04 06:10:07] (step=0046600) Train Loss: -3.6820, Train Steps/Sec: 1.20
493
+ [2026-02-04 06:11:30] (step=0046700) Train Loss: -3.6830, Train Steps/Sec: 1.21
494
+ [2026-02-04 06:12:53] (step=0046800) Train Loss: -3.6808, Train Steps/Sec: 1.20
495
+ [2026-02-04 06:14:16] (step=0046900) Train Loss: -3.6812, Train Steps/Sec: 1.20
496
+ [2026-02-04 06:15:39] (step=0047000) Train Loss: -3.6836, Train Steps/Sec: 1.20
497
+ [2026-02-04 06:17:02] (step=0047100) Train Loss: -3.6806, Train Steps/Sec: 1.20
498
+ [2026-02-04 06:18:25] (step=0047200) Train Loss: -3.6813, Train Steps/Sec: 1.20
499
+ [2026-02-04 06:19:48] (step=0047300) Train Loss: -3.6828, Train Steps/Sec: 1.20
500
+ [2026-02-04 06:21:11] (step=0047400) Train Loss: -3.6842, Train Steps/Sec: 1.21
Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-003-SiT-XL-2-GVP-velocity-None/log.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [2026-02-03 06:53:41] Experiment directory created at results_256_gvp_disp/depth-mu-2-003-SiT-XL-2-GVP-velocity-None
2
+ [2026-02-03 06:54:17] Combined_model Parameters: 729,629,632
3
+ [2026-02-03 06:54:17] Total trainable parameters: 53,910,176
4
+ [2026-02-03 06:54:19] Dataset contains 1,281,167 images (/gemini/platform/public/zhaozy/hzh/datasets/Imagenet/train/)
5
+ [2026-02-03 06:54:19] Training for 100000 epochs...
6
+ [2026-02-03 06:54:19] Beginning epoch 0...
Rectified_Noise/GVP-Disp/results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None/log.txt ADDED
@@ -0,0 +1,863 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2026-02-03 06:55:12] Experiment directory created at results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None
2
+ [2026-02-03 06:55:47] Combined_model Parameters: 729,629,632
3
+ [2026-02-03 06:55:47] Total trainable parameters: 53,910,176
4
+ [2026-02-03 06:55:50] Dataset contains 1,281,167 images (/gemini/platform/public/zhaozy/hzh/datasets/Imagenet/train/)
5
+ [2026-02-03 06:55:50] Training for 100000 epochs...
6
+ [2026-02-03 06:55:50] Beginning epoch 0...
7
+ [2026-02-03 06:57:30] (step=0000100) Train Loss: -2.4789, Train Steps/Sec: 1.00
8
+ [2026-02-03 06:59:08] (step=0000200) Train Loss: -2.9649, Train Steps/Sec: 1.02
9
+ [2026-02-03 07:00:47] (step=0000300) Train Loss: -2.9777, Train Steps/Sec: 1.01
10
+ [2026-02-03 07:02:27] (step=0000400) Train Loss: -2.9828, Train Steps/Sec: 1.00
11
+ [2026-02-03 07:04:08] (step=0000500) Train Loss: -2.9877, Train Steps/Sec: 0.99
12
+ [2026-02-03 07:05:49] (step=0000600) Train Loss: -2.9875, Train Steps/Sec: 0.99
13
+ [2026-02-03 07:07:28] (step=0000700) Train Loss: -2.9882, Train Steps/Sec: 1.01
14
+ [2026-02-03 07:09:08] (step=0000800) Train Loss: -2.9861, Train Steps/Sec: 1.00
15
+ [2026-02-03 07:10:49] (step=0000900) Train Loss: -2.9862, Train Steps/Sec: 0.99
16
+ [2026-02-03 07:12:30] (step=0001000) Train Loss: -2.9886, Train Steps/Sec: 0.99
17
+ [2026-02-03 07:14:12] (step=0001100) Train Loss: -2.9849, Train Steps/Sec: 0.98
18
+ [2026-02-03 07:18:10] (step=0001200) Train Loss: -2.9885, Train Steps/Sec: 0.42
19
+ [2026-02-03 07:20:07] (step=0001300) Train Loss: -2.9864, Train Steps/Sec: 0.85
20
+ [2026-02-03 07:21:45] (step=0001400) Train Loss: -2.9867, Train Steps/Sec: 1.02
21
+ [2026-02-03 07:23:22] (step=0001500) Train Loss: -2.9863, Train Steps/Sec: 1.03
22
+ [2026-02-03 07:25:00] (step=0001600) Train Loss: -2.9879, Train Steps/Sec: 1.02
23
+ [2026-02-03 07:26:37] (step=0001700) Train Loss: -2.9930, Train Steps/Sec: 1.03
24
+ [2026-02-03 07:28:14] (step=0001800) Train Loss: -2.9892, Train Steps/Sec: 1.03
25
+ [2026-02-03 07:29:52] (step=0001900) Train Loss: -2.9881, Train Steps/Sec: 1.02
26
+ [2026-02-03 07:31:30] (step=0002000) Train Loss: -2.9857, Train Steps/Sec: 1.03
27
+ [2026-02-03 07:33:07] (step=0002100) Train Loss: -2.9902, Train Steps/Sec: 1.02
28
+ [2026-02-03 07:34:45] (step=0002200) Train Loss: -2.9829, Train Steps/Sec: 1.03
29
+ [2026-02-03 07:36:23] (step=0002300) Train Loss: -2.9862, Train Steps/Sec: 1.02
30
+ [2026-02-03 07:38:00] (step=0002400) Train Loss: -2.9895, Train Steps/Sec: 1.03
31
+ [2026-02-03 07:39:37] (step=0002500) Train Loss: -2.9878, Train Steps/Sec: 1.03
32
+ [2026-02-03 07:41:15] (step=0002600) Train Loss: -2.9899, Train Steps/Sec: 1.02
33
+ [2026-02-03 07:42:53] (step=0002700) Train Loss: -2.9906, Train Steps/Sec: 1.02
34
+ [2026-02-03 07:44:31] (step=0002800) Train Loss: -2.9915, Train Steps/Sec: 1.02
35
+ [2026-02-03 07:46:09] (step=0002900) Train Loss: -2.9871, Train Steps/Sec: 1.02
36
+ [2026-02-03 07:47:47] (step=0003000) Train Loss: -2.9850, Train Steps/Sec: 1.03
37
+ [2026-02-03 07:49:25] (step=0003100) Train Loss: -2.9879, Train Steps/Sec: 1.02
38
+ [2026-02-03 07:51:03] (step=0003200) Train Loss: -2.9903, Train Steps/Sec: 1.02
39
+ [2026-02-03 07:52:41] (step=0003300) Train Loss: -2.9943, Train Steps/Sec: 1.02
40
+ [2026-02-03 07:54:15] (step=0003400) Train Loss: -2.9891, Train Steps/Sec: 1.06
41
+ [2026-02-03 07:55:53] (step=0003500) Train Loss: -2.9845, Train Steps/Sec: 1.03
42
+ [2026-02-03 07:57:30] (step=0003600) Train Loss: -2.9919, Train Steps/Sec: 1.02
43
+ [2026-02-03 07:59:08] (step=0003700) Train Loss: -2.9916, Train Steps/Sec: 1.03
44
+ [2026-02-03 08:00:46] (step=0003800) Train Loss: -2.9894, Train Steps/Sec: 1.02
45
+ [2026-02-03 08:02:23] (step=0003900) Train Loss: -2.9864, Train Steps/Sec: 1.02
46
+ [2026-02-03 08:04:01] (step=0004000) Train Loss: -2.9929, Train Steps/Sec: 1.02
47
+ [2026-02-03 08:05:39] (step=0004100) Train Loss: -2.9882, Train Steps/Sec: 1.02
48
+ [2026-02-03 08:07:17] (step=0004200) Train Loss: -2.9859, Train Steps/Sec: 1.02
49
+ [2026-02-03 08:08:54] (step=0004300) Train Loss: -2.9849, Train Steps/Sec: 1.02
50
+ [2026-02-03 08:10:32] (step=0004400) Train Loss: -2.9854, Train Steps/Sec: 1.02
51
+ [2026-02-03 08:12:10] (step=0004500) Train Loss: -2.9904, Train Steps/Sec: 1.02
52
+ [2026-02-03 08:13:47] (step=0004600) Train Loss: -2.9874, Train Steps/Sec: 1.03
53
+ [2026-02-03 08:15:26] (step=0004700) Train Loss: -2.9861, Train Steps/Sec: 1.02
54
+ [2026-02-03 08:17:04] (step=0004800) Train Loss: -2.9844, Train Steps/Sec: 1.02
55
+ [2026-02-03 08:18:41] (step=0004900) Train Loss: -2.9825, Train Steps/Sec: 1.02
56
+ [2026-02-03 08:20:19] (step=0005000) Train Loss: -2.9846, Train Steps/Sec: 1.02
57
+ [2026-02-03 08:20:24] Beginning epoch 1...
58
+ [2026-02-03 08:21:59] (step=0005100) Train Loss: -2.9935, Train Steps/Sec: 1.00
59
+ [2026-02-03 08:23:37] (step=0005200) Train Loss: -2.9902, Train Steps/Sec: 1.02
60
+ [2026-02-03 08:25:15] (step=0005300) Train Loss: -2.9927, Train Steps/Sec: 1.02
61
+ [2026-02-03 08:26:53] (step=0005400) Train Loss: -2.9865, Train Steps/Sec: 1.02
62
+ [2026-02-03 08:28:31] (step=0005500) Train Loss: -2.9877, Train Steps/Sec: 1.02
63
+ [2026-02-03 08:30:09] (step=0005600) Train Loss: -2.9912, Train Steps/Sec: 1.02
64
+ [2026-02-03 08:31:46] (step=0005700) Train Loss: -2.9920, Train Steps/Sec: 1.03
65
+ [2026-02-03 08:33:24] (step=0005800) Train Loss: -2.9866, Train Steps/Sec: 1.02
66
+ [2026-02-03 08:35:02] (step=0005900) Train Loss: -2.9884, Train Steps/Sec: 1.02
67
+ [2026-02-03 08:36:39] (step=0006000) Train Loss: -2.9900, Train Steps/Sec: 1.03
68
+ [2026-02-03 08:38:17] (step=0006100) Train Loss: -2.9876, Train Steps/Sec: 1.02
69
+ [2026-02-03 08:39:55] (step=0006200) Train Loss: -2.9904, Train Steps/Sec: 1.02
70
+ [2026-02-03 08:41:33] (step=0006300) Train Loss: -2.9869, Train Steps/Sec: 1.02
71
+ [2026-02-03 08:43:11] (step=0006400) Train Loss: -2.9901, Train Steps/Sec: 1.02
72
+ [2026-02-03 08:44:49] (step=0006500) Train Loss: -2.9875, Train Steps/Sec: 1.02
73
+ [2026-02-03 08:46:27] (step=0006600) Train Loss: -2.9861, Train Steps/Sec: 1.02
74
+ [2026-02-03 08:48:05] (step=0006700) Train Loss: -2.9869, Train Steps/Sec: 1.02
75
+ [2026-02-03 08:49:43] (step=0006800) Train Loss: -2.9873, Train Steps/Sec: 1.02
76
+ [2026-02-03 08:51:20] (step=0006900) Train Loss: -2.9890, Train Steps/Sec: 1.02
77
+ [2026-02-03 08:52:58] (step=0007000) Train Loss: -2.9848, Train Steps/Sec: 1.02
78
+ [2026-02-03 08:54:36] (step=0007100) Train Loss: -2.9867, Train Steps/Sec: 1.02
79
+ [2026-02-03 08:56:13] (step=0007200) Train Loss: -2.9936, Train Steps/Sec: 1.02
80
+ [2026-02-03 08:57:51] (step=0007300) Train Loss: -2.9875, Train Steps/Sec: 1.02
81
+ [2026-02-03 08:59:29] (step=0007400) Train Loss: -2.9889, Train Steps/Sec: 1.02
82
+ [2026-02-03 09:01:07] (step=0007500) Train Loss: -2.9907, Train Steps/Sec: 1.02
83
+ [2026-02-03 09:02:45] (step=0007600) Train Loss: -2.9875, Train Steps/Sec: 1.03
84
+ [2026-02-03 09:04:22] (step=0007700) Train Loss: -2.9918, Train Steps/Sec: 1.02
85
+ [2026-02-03 09:06:01] (step=0007800) Train Loss: -2.9859, Train Steps/Sec: 1.02
86
+ [2026-02-03 09:07:39] (step=0007900) Train Loss: -2.9846, Train Steps/Sec: 1.02
87
+ [2026-02-03 09:09:16] (step=0008000) Train Loss: -2.9873, Train Steps/Sec: 1.02
88
+ [2026-02-03 09:10:54] (step=0008100) Train Loss: -2.9913, Train Steps/Sec: 1.02
89
+ [2026-02-03 09:12:28] (step=0008200) Train Loss: -2.9826, Train Steps/Sec: 1.07
90
+ [2026-02-03 09:14:06] (step=0008300) Train Loss: -2.9896, Train Steps/Sec: 1.02
91
+ [2026-02-03 09:15:44] (step=0008400) Train Loss: -2.9933, Train Steps/Sec: 1.03
92
+ [2026-02-03 09:17:21] (step=0008500) Train Loss: -2.9858, Train Steps/Sec: 1.02
93
+ [2026-02-03 09:18:59] (step=0008600) Train Loss: -2.9896, Train Steps/Sec: 1.02
94
+ [2026-02-03 09:20:37] (step=0008700) Train Loss: -2.9877, Train Steps/Sec: 1.02
95
+ [2026-02-03 09:22:15] (step=0008800) Train Loss: -2.9901, Train Steps/Sec: 1.02
96
+ [2026-02-03 09:23:53] (step=0008900) Train Loss: -2.9884, Train Steps/Sec: 1.02
97
+ [2026-02-03 09:25:31] (step=0009000) Train Loss: -2.9896, Train Steps/Sec: 1.02
98
+ [2026-02-03 09:27:09] (step=0009100) Train Loss: -2.9875, Train Steps/Sec: 1.02
99
+ [2026-02-03 09:28:47] (step=0009200) Train Loss: -2.9890, Train Steps/Sec: 1.02
100
+ [2026-02-03 09:30:24] (step=0009300) Train Loss: -2.9888, Train Steps/Sec: 1.03
101
+ [2026-02-03 09:32:02] (step=0009400) Train Loss: -2.9868, Train Steps/Sec: 1.02
102
+ [2026-02-03 09:33:40] (step=0009500) Train Loss: -2.9907, Train Steps/Sec: 1.02
103
+ [2026-02-03 09:35:18] (step=0009600) Train Loss: -2.9820, Train Steps/Sec: 1.02
104
+ [2026-02-03 09:36:56] (step=0009700) Train Loss: -2.9845, Train Steps/Sec: 1.02
105
+ [2026-02-03 09:38:34] (step=0009800) Train Loss: -2.9893, Train Steps/Sec: 1.02
106
+ [2026-02-03 09:40:12] (step=0009900) Train Loss: -2.9918, Train Steps/Sec: 1.02
107
+ [2026-02-03 09:41:50] (step=0010000) Train Loss: -2.9891, Train Steps/Sec: 1.02
108
+ [2026-02-03 09:41:58] Beginning epoch 2...
109
+ [2026-02-03 09:43:30] (step=0010100) Train Loss: -2.9883, Train Steps/Sec: 1.00
110
+ [2026-02-03 09:45:08] (step=0010200) Train Loss: -2.9871, Train Steps/Sec: 1.02
111
+ [2026-02-03 09:46:45] (step=0010300) Train Loss: -2.9880, Train Steps/Sec: 1.03
112
+ [2026-02-03 09:48:22] (step=0010400) Train Loss: -2.9866, Train Steps/Sec: 1.02
113
+ [2026-02-03 09:50:00] (step=0010500) Train Loss: -2.9857, Train Steps/Sec: 1.02
114
+ [2026-02-03 09:51:38] (step=0010600) Train Loss: -2.9888, Train Steps/Sec: 1.02
115
+ [2026-02-03 09:53:16] (step=0010700) Train Loss: -2.9913, Train Steps/Sec: 1.02
116
+ [2026-02-03 09:54:55] (step=0010800) Train Loss: -2.9880, Train Steps/Sec: 1.02
117
+ [2026-02-03 09:56:33] (step=0010900) Train Loss: -2.9904, Train Steps/Sec: 1.02
118
+ [2026-02-03 09:58:11] (step=0011000) Train Loss: -2.9880, Train Steps/Sec: 1.02
119
+ [2026-02-03 09:59:49] (step=0011100) Train Loss: -2.9910, Train Steps/Sec: 1.02
120
+ [2026-02-03 10:01:27] (step=0011200) Train Loss: -2.9890, Train Steps/Sec: 1.02
121
+ [2026-02-03 10:03:06] (step=0011300) Train Loss: -2.9864, Train Steps/Sec: 1.01
122
+ [2026-02-03 10:04:43] (step=0011400) Train Loss: -2.9879, Train Steps/Sec: 1.02
123
+ [2026-02-03 10:06:21] (step=0011500) Train Loss: -2.9933, Train Steps/Sec: 1.02
124
+ [2026-02-03 10:08:00] (step=0011600) Train Loss: -2.9864, Train Steps/Sec: 1.02
125
+ [2026-02-03 10:09:37] (step=0011700) Train Loss: -2.9908, Train Steps/Sec: 1.02
126
+ [2026-02-03 10:11:16] (step=0011800) Train Loss: -2.9864, Train Steps/Sec: 1.02
127
+ [2026-02-03 10:12:54] (step=0011900) Train Loss: -2.9866, Train Steps/Sec: 1.02
128
+ [2026-02-03 10:14:32] (step=0012000) Train Loss: -2.9896, Train Steps/Sec: 1.02
129
+ [2026-02-03 10:16:10] (step=0012100) Train Loss: -2.9866, Train Steps/Sec: 1.02
130
+ [2026-02-03 10:17:48] (step=0012200) Train Loss: -2.9893, Train Steps/Sec: 1.02
131
+ [2026-02-03 10:19:26] (step=0012300) Train Loss: -2.9856, Train Steps/Sec: 1.02
132
+ [2026-02-03 10:21:04] (step=0012400) Train Loss: -2.9944, Train Steps/Sec: 1.02
133
+ [2026-02-03 10:22:41] (step=0012500) Train Loss: -2.9854, Train Steps/Sec: 1.02
134
+ [2026-02-03 10:24:20] (step=0012600) Train Loss: -2.9891, Train Steps/Sec: 1.02
135
+ [2026-02-03 10:25:57] (step=0012700) Train Loss: -2.9851, Train Steps/Sec: 1.02
136
+ [2026-02-03 10:27:35] (step=0012800) Train Loss: -2.9892, Train Steps/Sec: 1.02
137
+ [2026-02-03 10:29:13] (step=0012900) Train Loss: -2.9890, Train Steps/Sec: 1.02
138
+ [2026-02-03 10:30:47] (step=0013000) Train Loss: -2.9892, Train Steps/Sec: 1.06
139
+ [2026-02-03 10:32:25] (step=0013100) Train Loss: -2.9854, Train Steps/Sec: 1.02
140
+ [2026-02-03 10:34:02] (step=0013200) Train Loss: -2.9860, Train Steps/Sec: 1.03
141
+ [2026-02-03 10:35:40] (step=0013300) Train Loss: -2.9888, Train Steps/Sec: 1.02
142
+ [2026-02-03 10:37:18] (step=0013400) Train Loss: -2.9860, Train Steps/Sec: 1.02
143
+ [2026-02-03 10:38:56] (step=0013500) Train Loss: -2.9910, Train Steps/Sec: 1.03
144
+ [2026-02-03 10:40:33] (step=0013600) Train Loss: -2.9834, Train Steps/Sec: 1.02
145
+ [2026-02-03 10:42:11] (step=0013700) Train Loss: -2.9847, Train Steps/Sec: 1.02
146
+ [2026-02-03 10:43:49] (step=0013800) Train Loss: -2.9864, Train Steps/Sec: 1.02
147
+ [2026-02-03 10:45:27] (step=0013900) Train Loss: -2.9884, Train Steps/Sec: 1.02
148
+ [2026-02-03 10:47:05] (step=0014000) Train Loss: -2.9889, Train Steps/Sec: 1.02
149
+ [2026-02-03 10:48:42] (step=0014100) Train Loss: -2.9875, Train Steps/Sec: 1.02
150
+ [2026-02-03 10:50:20] (step=0014200) Train Loss: -2.9885, Train Steps/Sec: 1.02
151
+ [2026-02-03 10:51:58] (step=0014300) Train Loss: -2.9891, Train Steps/Sec: 1.02
152
+ [2026-02-03 10:53:35] (step=0014400) Train Loss: -2.9889, Train Steps/Sec: 1.03
153
+ [2026-02-03 10:55:13] (step=0014500) Train Loss: -2.9893, Train Steps/Sec: 1.02
154
+ [2026-02-03 10:56:51] (step=0014600) Train Loss: -2.9867, Train Steps/Sec: 1.02
155
+ [2026-02-03 10:58:28] (step=0014700) Train Loss: -2.9864, Train Steps/Sec: 1.03
156
+ [2026-02-03 11:00:06] (step=0014800) Train Loss: -2.9927, Train Steps/Sec: 1.02
157
+ [2026-02-03 11:01:43] (step=0014900) Train Loss: -2.9881, Train Steps/Sec: 1.03
158
+ [2026-02-03 11:03:20] (step=0015000) Train Loss: -2.9892, Train Steps/Sec: 1.03
159
+ [2026-02-03 11:03:33] Beginning epoch 3...
160
+ [2026-02-03 11:05:01] (step=0015100) Train Loss: -2.9843, Train Steps/Sec: 1.00
161
+ [2026-02-03 11:06:39] (step=0015200) Train Loss: -2.9891, Train Steps/Sec: 1.02
162
+ [2026-02-03 11:08:16] (step=0015300) Train Loss: -2.9872, Train Steps/Sec: 1.03
163
+ [2026-02-03 11:09:54] (step=0015400) Train Loss: -2.9896, Train Steps/Sec: 1.03
164
+ [2026-02-03 11:11:32] (step=0015500) Train Loss: -2.9881, Train Steps/Sec: 1.02
165
+ [2026-02-03 11:13:10] (step=0015600) Train Loss: -2.9899, Train Steps/Sec: 1.02
166
+ [2026-02-03 11:14:48] (step=0015700) Train Loss: -2.9887, Train Steps/Sec: 1.02
167
+ [2026-02-03 11:16:25] (step=0015800) Train Loss: -2.9880, Train Steps/Sec: 1.02
168
+ [2026-02-03 11:18:04] (step=0015900) Train Loss: -2.9894, Train Steps/Sec: 1.02
169
+ [2026-02-03 11:19:42] (step=0016000) Train Loss: -2.9963, Train Steps/Sec: 1.02
170
+ [2026-02-03 11:21:19] (step=0016100) Train Loss: -2.9911, Train Steps/Sec: 1.02
171
+ [2026-02-03 11:22:57] (step=0016200) Train Loss: -2.9873, Train Steps/Sec: 1.02
172
+ [2026-02-03 11:24:35] (step=0016300) Train Loss: -2.9868, Train Steps/Sec: 1.02
173
+ [2026-02-03 11:26:13] (step=0016400) Train Loss: -2.9870, Train Steps/Sec: 1.02
174
+ [2026-02-03 11:27:51] (step=0016500) Train Loss: -2.9856, Train Steps/Sec: 1.02
175
+ [2026-02-03 11:29:29] (step=0016600) Train Loss: -2.9835, Train Steps/Sec: 1.02
176
+ [2026-02-03 11:31:07] (step=0016700) Train Loss: -2.9855, Train Steps/Sec: 1.02
177
+ [2026-02-03 11:32:44] (step=0016800) Train Loss: -2.9885, Train Steps/Sec: 1.03
178
+ [2026-02-03 11:34:21] (step=0016900) Train Loss: -2.9889, Train Steps/Sec: 1.02
179
+ [2026-02-03 11:35:59] (step=0017000) Train Loss: -2.9889, Train Steps/Sec: 1.02
180
+ [2026-02-03 11:37:38] (step=0017100) Train Loss: -2.9856, Train Steps/Sec: 1.02
181
+ [2026-02-03 11:39:16] (step=0017200) Train Loss: -2.9916, Train Steps/Sec: 1.02
182
+ [2026-02-03 11:40:54] (step=0017300) Train Loss: -2.9901, Train Steps/Sec: 1.02
183
+ [2026-02-03 11:42:31] (step=0017400) Train Loss: -2.9858, Train Steps/Sec: 1.02
184
+ [2026-02-03 11:44:10] (step=0017500) Train Loss: -2.9834, Train Steps/Sec: 1.02
185
+ [2026-02-03 11:45:48] (step=0017600) Train Loss: -2.9826, Train Steps/Sec: 1.02
186
+ [2026-02-03 11:47:22] (step=0017700) Train Loss: -2.9870, Train Steps/Sec: 1.06
187
+ [2026-02-03 11:49:00] (step=0017800) Train Loss: -2.9945, Train Steps/Sec: 1.02
188
+ [2026-02-03 11:50:38] (step=0017900) Train Loss: -2.9841, Train Steps/Sec: 1.02
189
+ [2026-02-03 11:52:16] (step=0018000) Train Loss: -2.9945, Train Steps/Sec: 1.02
190
+ [2026-02-03 11:53:53] (step=0018100) Train Loss: -2.9870, Train Steps/Sec: 1.02
191
+ [2026-02-03 11:55:32] (step=0018200) Train Loss: -2.9886, Train Steps/Sec: 1.02
192
+ [2026-02-03 11:57:10] (step=0018300) Train Loss: -2.9877, Train Steps/Sec: 1.02
193
+ [2026-02-03 11:58:48] (step=0018400) Train Loss: -2.9872, Train Steps/Sec: 1.02
194
+ [2026-02-03 12:00:26] (step=0018500) Train Loss: -2.9906, Train Steps/Sec: 1.02
195
+ [2026-02-03 12:02:03] (step=0018600) Train Loss: -2.9869, Train Steps/Sec: 1.03
196
+ [2026-02-03 12:03:41] (step=0018700) Train Loss: -2.9855, Train Steps/Sec: 1.03
197
+ [2026-02-03 12:05:18] (step=0018800) Train Loss: -2.9842, Train Steps/Sec: 1.03
198
+ [2026-02-03 12:06:56] (step=0018900) Train Loss: -2.9824, Train Steps/Sec: 1.03
199
+ [2026-02-03 12:08:33] (step=0019000) Train Loss: -2.9857, Train Steps/Sec: 1.03
200
+ [2026-02-03 12:10:11] (step=0019100) Train Loss: -2.9898, Train Steps/Sec: 1.02
201
+ [2026-02-03 12:11:48] (step=0019200) Train Loss: -2.9880, Train Steps/Sec: 1.03
202
+ [2026-02-03 12:13:26] (step=0019300) Train Loss: -2.9942, Train Steps/Sec: 1.03
203
+ [2026-02-03 12:15:03] (step=0019400) Train Loss: -2.9905, Train Steps/Sec: 1.02
204
+ [2026-02-03 12:16:41] (step=0019500) Train Loss: -2.9895, Train Steps/Sec: 1.02
205
+ [2026-02-03 12:18:19] (step=0019600) Train Loss: -2.9856, Train Steps/Sec: 1.02
206
+ [2026-02-03 12:19:57] (step=0019700) Train Loss: -2.9901, Train Steps/Sec: 1.02
207
+ [2026-02-03 12:21:35] (step=0019800) Train Loss: -2.9845, Train Steps/Sec: 1.02
208
+ [2026-02-03 12:23:12] (step=0019900) Train Loss: -2.9859, Train Steps/Sec: 1.02
209
+ [2026-02-03 12:24:50] (step=0020000) Train Loss: -2.9927, Train Steps/Sec: 1.02
210
+ [2026-02-03 12:25:06] Beginning epoch 4...
211
+ [2026-02-03 12:26:30] (step=0020100) Train Loss: -2.9944, Train Steps/Sec: 1.00
212
+ [2026-02-03 12:28:07] (step=0020200) Train Loss: -2.9893, Train Steps/Sec: 1.02
213
+ [2026-02-03 12:29:45] (step=0020300) Train Loss: -2.9873, Train Steps/Sec: 1.03
214
+ [2026-02-03 12:31:23] (step=0020400) Train Loss: -2.9886, Train Steps/Sec: 1.02
215
+ [2026-02-03 12:33:00] (step=0020500) Train Loss: -2.9894, Train Steps/Sec: 1.02
216
+ [2026-02-03 12:34:38] (step=0020600) Train Loss: -2.9908, Train Steps/Sec: 1.02
217
+ [2026-02-03 12:36:16] (step=0020700) Train Loss: -2.9836, Train Steps/Sec: 1.02
218
+ [2026-02-03 12:37:54] (step=0020800) Train Loss: -2.9885, Train Steps/Sec: 1.02
219
+ [2026-02-03 12:39:32] (step=0020900) Train Loss: -2.9839, Train Steps/Sec: 1.02
220
+ [2026-02-03 12:41:10] (step=0021000) Train Loss: -2.9874, Train Steps/Sec: 1.02
221
+ [2026-02-03 12:42:48] (step=0021100) Train Loss: -2.9918, Train Steps/Sec: 1.02
222
+ [2026-02-03 12:44:25] (step=0021200) Train Loss: -2.9904, Train Steps/Sec: 1.03
223
+ [2026-02-03 12:46:03] (step=0021300) Train Loss: -2.9917, Train Steps/Sec: 1.02
224
+ [2026-02-03 12:47:41] (step=0021400) Train Loss: -2.9911, Train Steps/Sec: 1.02
225
+ [2026-02-03 12:49:19] (step=0021500) Train Loss: -2.9888, Train Steps/Sec: 1.02
226
+ [2026-02-03 12:50:57] (step=0021600) Train Loss: -2.9900, Train Steps/Sec: 1.02
227
+ [2026-02-03 12:52:35] (step=0021700) Train Loss: -2.9857, Train Steps/Sec: 1.02
228
+ [2026-02-03 12:54:13] (step=0021800) Train Loss: -2.9907, Train Steps/Sec: 1.02
229
+ [2026-02-03 12:55:50] (step=0021900) Train Loss: -2.9898, Train Steps/Sec: 1.03
230
+ [2026-02-03 12:57:28] (step=0022000) Train Loss: -2.9929, Train Steps/Sec: 1.02
231
+ [2026-02-03 12:59:06] (step=0022100) Train Loss: -2.9851, Train Steps/Sec: 1.03
232
+ [2026-02-03 13:00:44] (step=0022200) Train Loss: -2.9931, Train Steps/Sec: 1.02
233
+ [2026-02-03 13:02:22] (step=0022300) Train Loss: -2.9841, Train Steps/Sec: 1.02
234
+ [2026-02-03 13:03:59] (step=0022400) Train Loss: -2.9867, Train Steps/Sec: 1.02
235
+ [2026-02-03 13:05:34] (step=0022500) Train Loss: -2.9891, Train Steps/Sec: 1.05
236
+ [2026-02-03 13:07:12] (step=0022600) Train Loss: -2.9902, Train Steps/Sec: 1.02
237
+ [2026-02-03 13:08:50] (step=0022700) Train Loss: -2.9920, Train Steps/Sec: 1.02
238
+ [2026-02-03 13:10:28] (step=0022800) Train Loss: -2.9864, Train Steps/Sec: 1.03
239
+ [2026-02-03 13:12:05] (step=0022900) Train Loss: -2.9827, Train Steps/Sec: 1.03
240
+ [2026-02-03 13:13:43] (step=0023000) Train Loss: -2.9879, Train Steps/Sec: 1.02
241
+ [2026-02-03 13:15:21] (step=0023100) Train Loss: -2.9919, Train Steps/Sec: 1.02
242
+ [2026-02-03 13:16:59] (step=0023200) Train Loss: -2.9879, Train Steps/Sec: 1.02
243
+ [2026-02-03 13:18:37] (step=0023300) Train Loss: -2.9883, Train Steps/Sec: 1.02
244
+ [2026-02-03 13:20:15] (step=0023400) Train Loss: 78736.1641, Train Steps/Sec: 1.02
245
+ [2026-02-03 13:21:53] (step=0023500) Train Loss: -2.9891, Train Steps/Sec: 1.02
246
+ [2026-02-03 13:23:31] (step=0023600) Train Loss: -2.9864, Train Steps/Sec: 1.02
247
+ [2026-02-03 13:25:09] (step=0023700) Train Loss: -2.9874, Train Steps/Sec: 1.02
248
+ [2026-02-03 13:26:47] (step=0023800) Train Loss: -2.9883, Train Steps/Sec: 1.02
249
+ [2026-02-03 13:28:25] (step=0023900) Train Loss: -2.9876, Train Steps/Sec: 1.02
250
+ [2026-02-03 13:30:03] (step=0024000) Train Loss: -2.9908, Train Steps/Sec: 1.02
251
+ [2026-02-03 13:31:41] (step=0024100) Train Loss: -2.9903, Train Steps/Sec: 1.02
252
+ [2026-02-03 13:33:19] (step=0024200) Train Loss: -2.9885, Train Steps/Sec: 1.02
253
+ [2026-02-03 13:34:56] (step=0024300) Train Loss: -2.9898, Train Steps/Sec: 1.02
254
+ [2026-02-03 13:36:34] (step=0024400) Train Loss: -2.9893, Train Steps/Sec: 1.02
255
+ [2026-02-03 13:38:12] (step=0024500) Train Loss: -2.9889, Train Steps/Sec: 1.02
256
+ [2026-02-03 13:39:49] (step=0024600) Train Loss: -2.9859, Train Steps/Sec: 1.02
257
+ [2026-02-03 13:41:27] (step=0024700) Train Loss: -2.9864, Train Steps/Sec: 1.02
258
+ [2026-02-03 13:43:05] (step=0024800) Train Loss: -2.9900, Train Steps/Sec: 1.02
259
+ [2026-02-03 13:44:43] (step=0024900) Train Loss: -2.9876, Train Steps/Sec: 1.02
260
+ [2026-02-03 13:46:21] (step=0025000) Train Loss: -2.9890, Train Steps/Sec: 1.02
261
+ [2026-02-03 13:46:22] Saved checkpoint to results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None/checkpoints/0025000.pt
262
+ [2026-02-03 13:46:42] Beginning epoch 5...
263
+ [2026-02-03 13:48:02] (step=0025100) Train Loss: -2.9903, Train Steps/Sec: 0.99
264
+ [2026-02-03 13:49:31] Generating EMA samples...
265
+ [2026-02-03 13:49:39] (step=0025200) Train Loss: -2.9859, Train Steps/Sec: 1.03
266
+ [2026-02-03 13:51:17] (step=0025300) Train Loss: -2.9951, Train Steps/Sec: 1.02
267
+ [2026-02-03 13:52:55] (step=0025400) Train Loss: -2.9863, Train Steps/Sec: 1.02
268
+ [2026-02-03 13:54:33] (step=0025500) Train Loss: -2.9867, Train Steps/Sec: 1.02
269
+ [2026-02-03 13:56:10] (step=0025600) Train Loss: -2.9899, Train Steps/Sec: 1.03
270
+ [2026-02-03 13:57:48] (step=0025700) Train Loss: -2.9889, Train Steps/Sec: 1.02
271
+ [2026-02-03 13:59:26] (step=0025800) Train Loss: -2.9830, Train Steps/Sec: 1.02
272
+ [2026-02-03 14:01:03] (step=0025900) Train Loss: -2.9874, Train Steps/Sec: 1.02
273
+ [2026-02-03 14:02:40] (step=0026000) Train Loss: -2.9889, Train Steps/Sec: 1.03
274
+ [2026-02-03 14:04:18] (step=0026100) Train Loss: -2.9845, Train Steps/Sec: 1.02
275
+ [2026-02-03 14:05:56] (step=0026200) Train Loss: -2.9882, Train Steps/Sec: 1.02
276
+ [2026-02-03 14:07:34] (step=0026300) Train Loss: -2.9906, Train Steps/Sec: 1.02
277
+ [2026-02-03 14:09:12] (step=0026400) Train Loss: -2.9909, Train Steps/Sec: 1.02
278
+ [2026-02-03 14:10:51] (step=0026500) Train Loss: -2.9932, Train Steps/Sec: 1.02
279
+ [2026-02-03 14:12:28] (step=0026600) Train Loss: -2.9909, Train Steps/Sec: 1.02
280
+ [2026-02-03 14:14:06] (step=0026700) Train Loss: -2.9931, Train Steps/Sec: 1.02
281
+ [2026-02-03 14:15:43] (step=0026800) Train Loss: -2.9852, Train Steps/Sec: 1.03
282
+ [2026-02-03 14:17:21] (step=0026900) Train Loss: -2.9819, Train Steps/Sec: 1.02
283
+ [2026-02-03 14:18:59] (step=0027000) Train Loss: -2.9908, Train Steps/Sec: 1.02
284
+ [2026-02-03 14:20:37] (step=0027100) Train Loss: -2.9887, Train Steps/Sec: 1.02
285
+ [2026-02-03 14:22:15] (step=0027200) Train Loss: -2.9917, Train Steps/Sec: 1.02
286
+ [2026-02-03 14:23:50] (step=0027300) Train Loss: -2.9884, Train Steps/Sec: 1.06
287
+ [2026-02-03 14:25:27] (step=0027400) Train Loss: -2.9851, Train Steps/Sec: 1.02
288
+ [2026-02-03 14:27:05] (step=0027500) Train Loss: -2.9896, Train Steps/Sec: 1.02
289
+ [2026-02-03 14:28:43] (step=0027600) Train Loss: -2.9866, Train Steps/Sec: 1.02
290
+ [2026-02-03 14:30:21] (step=0027700) Train Loss: -2.9869, Train Steps/Sec: 1.02
291
+ [2026-02-03 14:31:59] (step=0027800) Train Loss: -2.9925, Train Steps/Sec: 1.02
292
+ [2026-02-03 14:33:36] (step=0027900) Train Loss: -2.9893, Train Steps/Sec: 1.03
293
+ [2026-02-03 14:35:13] (step=0028000) Train Loss: -2.9872, Train Steps/Sec: 1.02
294
+ [2026-02-03 14:36:51] (step=0028100) Train Loss: -2.9859, Train Steps/Sec: 1.02
295
+ [2026-02-03 14:38:29] (step=0028200) Train Loss: -2.9917, Train Steps/Sec: 1.02
296
+ [2026-02-03 14:40:07] (step=0028300) Train Loss: -2.9861, Train Steps/Sec: 1.02
297
+ [2026-02-03 14:41:46] (step=0028400) Train Loss: -2.9881, Train Steps/Sec: 1.02
298
+ [2026-02-03 14:43:23] (step=0028500) Train Loss: -2.9831, Train Steps/Sec: 1.02
299
+ [2026-02-03 14:45:02] (step=0028600) Train Loss: -2.9903, Train Steps/Sec: 1.02
300
+ [2026-02-03 14:46:39] (step=0028700) Train Loss: -2.9925, Train Steps/Sec: 1.02
301
+ [2026-02-03 14:48:17] (step=0028800) Train Loss: -2.9876, Train Steps/Sec: 1.02
302
+ [2026-02-03 14:49:55] (step=0028900) Train Loss: -2.9915, Train Steps/Sec: 1.02
303
+ [2026-02-03 14:51:33] (step=0029000) Train Loss: -2.9891, Train Steps/Sec: 1.02
304
+ [2026-02-03 14:53:10] (step=0029100) Train Loss: -2.9889, Train Steps/Sec: 1.03
305
+ [2026-02-03 14:54:48] (step=0029200) Train Loss: -2.9917, Train Steps/Sec: 1.03
306
+ [2026-02-03 14:56:26] (step=0029300) Train Loss: -2.9851, Train Steps/Sec: 1.02
307
+ [2026-02-03 14:58:04] (step=0029400) Train Loss: -2.9861, Train Steps/Sec: 1.02
308
+ [2026-02-03 14:59:42] (step=0029500) Train Loss: -2.9907, Train Steps/Sec: 1.02
309
+ [2026-02-03 15:01:20] (step=0029600) Train Loss: -2.9896, Train Steps/Sec: 1.03
310
+ [2026-02-03 15:02:57] (step=0029700) Train Loss: -2.9913, Train Steps/Sec: 1.02
311
+ [2026-02-03 15:04:35] (step=0029800) Train Loss: -2.9874, Train Steps/Sec: 1.02
312
+ [2026-02-03 15:06:13] (step=0029900) Train Loss: -2.9887, Train Steps/Sec: 1.03
313
+ [2026-02-03 15:07:51] (step=0030000) Train Loss: -2.9894, Train Steps/Sec: 1.02
314
+ [2026-02-03 15:08:15] Beginning epoch 6...
315
+ [2026-02-03 15:09:31] (step=0030100) Train Loss: -2.9889, Train Steps/Sec: 1.00
316
+ [2026-02-03 15:11:09] (step=0030200) Train Loss: -2.9905, Train Steps/Sec: 1.02
317
+ [2026-02-03 15:12:47] (step=0030300) Train Loss: -2.9833, Train Steps/Sec: 1.02
318
+ [2026-02-03 15:14:24] (step=0030400) Train Loss: -2.9880, Train Steps/Sec: 1.03
319
+ [2026-02-03 15:16:02] (step=0030500) Train Loss: -2.9881, Train Steps/Sec: 1.03
320
+ [2026-02-03 15:17:39] (step=0030600) Train Loss: -2.9924, Train Steps/Sec: 1.02
321
+ [2026-02-03 15:19:17] (step=0030700) Train Loss: -2.9888, Train Steps/Sec: 1.02
322
+ [2026-02-03 15:20:56] (step=0030800) Train Loss: -2.9891, Train Steps/Sec: 1.02
323
+ [2026-02-03 15:22:33] (step=0030900) Train Loss: -2.9867, Train Steps/Sec: 1.02
324
+ [2026-02-03 15:24:11] (step=0031000) Train Loss: -2.9885, Train Steps/Sec: 1.02
325
+ [2026-02-03 15:25:49] (step=0031100) Train Loss: -2.9836, Train Steps/Sec: 1.02
326
+ [2026-02-03 15:27:27] (step=0031200) Train Loss: -2.9901, Train Steps/Sec: 1.02
327
+ [2026-02-03 15:29:05] (step=0031300) Train Loss: -2.9919, Train Steps/Sec: 1.02
328
+ [2026-02-03 15:30:42] (step=0031400) Train Loss: -2.9907, Train Steps/Sec: 1.02
329
+ [2026-02-03 15:32:21] (step=0031500) Train Loss: -2.9930, Train Steps/Sec: 1.02
330
+ [2026-02-03 15:33:59] (step=0031600) Train Loss: -2.9894, Train Steps/Sec: 1.02
331
+ [2026-02-03 15:35:36] (step=0031700) Train Loss: -2.9879, Train Steps/Sec: 1.02
332
+ [2026-02-03 15:37:14] (step=0031800) Train Loss: -2.9910, Train Steps/Sec: 1.02
333
+ [2026-02-03 15:38:52] (step=0031900) Train Loss: -2.9891, Train Steps/Sec: 1.03
334
+ [2026-02-03 15:40:29] (step=0032000) Train Loss: -2.9904, Train Steps/Sec: 1.02
335
+ [2026-02-03 15:42:04] (step=0032100) Train Loss: -2.9853, Train Steps/Sec: 1.06
336
+ [2026-02-03 15:43:42] (step=0032200) Train Loss: -2.9875, Train Steps/Sec: 1.02
337
+ [2026-02-03 15:45:20] (step=0032300) Train Loss: -2.9868, Train Steps/Sec: 1.02
338
+ [2026-02-03 15:46:58] (step=0032400) Train Loss: -2.9881, Train Steps/Sec: 1.02
339
+ [2026-02-03 15:48:36] (step=0032500) Train Loss: -2.9862, Train Steps/Sec: 1.02
340
+ [2026-02-03 15:50:14] (step=0032600) Train Loss: -2.9863, Train Steps/Sec: 1.02
341
+ [2026-02-03 15:51:52] (step=0032700) Train Loss: -2.9868, Train Steps/Sec: 1.02
342
+ [2026-02-03 15:53:30] (step=0032800) Train Loss: -2.9881, Train Steps/Sec: 1.02
343
+ [2026-02-03 15:55:08] (step=0032900) Train Loss: -2.9877, Train Steps/Sec: 1.02
344
+ [2026-02-03 15:56:46] (step=0033000) Train Loss: -2.9921, Train Steps/Sec: 1.02
345
+ [2026-02-03 15:58:24] (step=0033100) Train Loss: -2.9846, Train Steps/Sec: 1.02
346
+ [2026-02-03 16:00:02] (step=0033200) Train Loss: -2.9850, Train Steps/Sec: 1.02
347
+ [2026-02-03 16:01:39] (step=0033300) Train Loss: -2.9908, Train Steps/Sec: 1.02
348
+ [2026-02-03 16:03:17] (step=0033400) Train Loss: -2.9889, Train Steps/Sec: 1.02
349
+ [2026-02-03 16:04:55] (step=0033500) Train Loss: -2.9814, Train Steps/Sec: 1.02
350
+ [2026-02-03 16:06:33] (step=0033600) Train Loss: -2.9837, Train Steps/Sec: 1.02
351
+ [2026-02-03 16:08:11] (step=0033700) Train Loss: -2.9859, Train Steps/Sec: 1.02
352
+ [2026-02-03 16:09:49] (step=0033800) Train Loss: -2.9887, Train Steps/Sec: 1.02
353
+ [2026-02-03 16:11:27] (step=0033900) Train Loss: -2.9907, Train Steps/Sec: 1.02
354
+ [2026-02-03 16:13:05] (step=0034000) Train Loss: -2.9859, Train Steps/Sec: 1.02
355
+ [2026-02-03 16:14:43] (step=0034100) Train Loss: -2.9872, Train Steps/Sec: 1.02
356
+ [2026-02-03 16:16:20] (step=0034200) Train Loss: -2.9874, Train Steps/Sec: 1.02
357
+ [2026-02-03 16:17:58] (step=0034300) Train Loss: -2.9886, Train Steps/Sec: 1.02
358
+ [2026-02-03 16:19:36] (step=0034400) Train Loss: -2.9885, Train Steps/Sec: 1.02
359
+ [2026-02-03 16:21:14] (step=0034500) Train Loss: -2.9865, Train Steps/Sec: 1.03
360
+ [2026-02-03 16:22:51] (step=0034600) Train Loss: -2.9868, Train Steps/Sec: 1.03
361
+ [2026-02-03 16:24:29] (step=0034700) Train Loss: -2.9897, Train Steps/Sec: 1.02
362
+ [2026-02-03 16:26:07] (step=0034800) Train Loss: -2.9918, Train Steps/Sec: 1.02
363
+ [2026-02-03 16:27:45] (step=0034900) Train Loss: -2.9873, Train Steps/Sec: 1.02
364
+ [2026-02-03 16:29:22] (step=0035000) Train Loss: -2.9874, Train Steps/Sec: 1.03
365
+ [2026-02-03 16:29:50] Beginning epoch 7...
366
+ [2026-02-03 16:31:03] (step=0035100) Train Loss: -2.9881, Train Steps/Sec: 1.00
367
+ [2026-02-03 16:32:41] (step=0035200) Train Loss: -2.9922, Train Steps/Sec: 1.02
368
+ [2026-02-03 16:34:19] (step=0035300) Train Loss: -2.9847, Train Steps/Sec: 1.02
369
+ [2026-02-03 16:35:56] (step=0035400) Train Loss: -2.9855, Train Steps/Sec: 1.03
370
+ [2026-02-03 16:37:35] (step=0035500) Train Loss: -2.9950, Train Steps/Sec: 1.02
371
+ [2026-02-03 16:39:12] (step=0035600) Train Loss: -2.9896, Train Steps/Sec: 1.02
372
+ [2026-02-03 16:40:50] (step=0035700) Train Loss: -2.9890, Train Steps/Sec: 1.02
373
+ [2026-02-03 16:42:28] (step=0035800) Train Loss: -2.9853, Train Steps/Sec: 1.02
374
+ [2026-02-03 16:44:06] (step=0035900) Train Loss: -2.9897, Train Steps/Sec: 1.02
375
+ [2026-02-03 16:45:44] (step=0036000) Train Loss: -2.9903, Train Steps/Sec: 1.02
376
+ [2026-02-03 16:47:21] (step=0036100) Train Loss: -2.9906, Train Steps/Sec: 1.02
377
+ [2026-02-03 16:48:59] (step=0036200) Train Loss: -2.9868, Train Steps/Sec: 1.03
378
+ [2026-02-03 16:50:37] (step=0036300) Train Loss: -2.9880, Train Steps/Sec: 1.02
379
+ [2026-02-03 16:52:14] (step=0036400) Train Loss: -2.9840, Train Steps/Sec: 1.03
380
+ [2026-02-03 16:53:52] (step=0036500) Train Loss: -2.9848, Train Steps/Sec: 1.02
381
+ [2026-02-03 16:55:30] (step=0036600) Train Loss: -2.9852, Train Steps/Sec: 1.02
382
+ [2026-02-03 16:57:08] (step=0036700) Train Loss: -2.9907, Train Steps/Sec: 1.02
383
+ [2026-02-03 16:58:46] (step=0036800) Train Loss: -2.9842, Train Steps/Sec: 1.02
384
+ [2026-02-03 17:00:21] (step=0036900) Train Loss: -2.9858, Train Steps/Sec: 1.05
385
+ [2026-02-03 17:01:58] (step=0037000) Train Loss: -2.9890, Train Steps/Sec: 1.02
386
+ [2026-02-03 17:03:36] (step=0037100) Train Loss: -2.9892, Train Steps/Sec: 1.02
387
+ [2026-02-03 17:05:14] (step=0037200) Train Loss: -2.9854, Train Steps/Sec: 1.02
388
+ [2026-02-03 17:06:52] (step=0037300) Train Loss: -2.9902, Train Steps/Sec: 1.02
389
+ [2026-02-03 17:08:30] (step=0037400) Train Loss: -2.9880, Train Steps/Sec: 1.02
390
+ [2026-02-03 17:10:08] (step=0037500) Train Loss: -2.9872, Train Steps/Sec: 1.02
391
+ [2026-02-03 17:11:46] (step=0037600) Train Loss: -2.9898, Train Steps/Sec: 1.02
392
+ [2026-02-03 17:13:24] (step=0037700) Train Loss: -2.9885, Train Steps/Sec: 1.03
393
+ [2026-02-03 17:15:02] (step=0037800) Train Loss: -2.9913, Train Steps/Sec: 1.02
394
+ [2026-02-03 17:16:40] (step=0037900) Train Loss: -2.9863, Train Steps/Sec: 1.02
395
+ [2026-02-03 17:18:18] (step=0038000) Train Loss: -2.9926, Train Steps/Sec: 1.02
396
+ [2026-02-03 17:19:56] (step=0038100) Train Loss: -2.9901, Train Steps/Sec: 1.02
397
+ [2026-02-03 17:21:34] (step=0038200) Train Loss: -2.9866, Train Steps/Sec: 1.02
398
+ [2026-02-03 17:23:12] (step=0038300) Train Loss: -2.9887, Train Steps/Sec: 1.02
399
+ [2026-02-03 17:24:50] (step=0038400) Train Loss: -2.9900, Train Steps/Sec: 1.02
400
+ [2026-02-03 17:26:27] (step=0038500) Train Loss: -2.9831, Train Steps/Sec: 1.02
401
+ [2026-02-03 17:28:05] (step=0038600) Train Loss: -2.9854, Train Steps/Sec: 1.03
402
+ [2026-02-03 17:29:43] (step=0038700) Train Loss: -2.9888, Train Steps/Sec: 1.02
403
+ [2026-02-03 17:31:21] (step=0038800) Train Loss: -2.9871, Train Steps/Sec: 1.02
404
+ [2026-02-03 17:32:59] (step=0038900) Train Loss: -2.9890, Train Steps/Sec: 1.02
405
+ [2026-02-03 17:34:37] (step=0039000) Train Loss: -2.9915, Train Steps/Sec: 1.03
406
+ [2026-02-03 17:36:15] (step=0039100) Train Loss: -2.9872, Train Steps/Sec: 1.02
407
+ [2026-02-03 17:37:53] (step=0039200) Train Loss: -2.9919, Train Steps/Sec: 1.02
408
+ [2026-02-03 17:39:30] (step=0039300) Train Loss: -2.9894, Train Steps/Sec: 1.03
409
+ [2026-02-03 17:41:08] (step=0039400) Train Loss: -2.9859, Train Steps/Sec: 1.02
410
+ [2026-02-03 17:42:46] (step=0039500) Train Loss: -2.9889, Train Steps/Sec: 1.02
411
+ [2026-02-03 17:44:24] (step=0039600) Train Loss: -2.9895, Train Steps/Sec: 1.02
412
+ [2026-02-03 17:46:02] (step=0039700) Train Loss: -2.9927, Train Steps/Sec: 1.02
413
+ [2026-02-03 17:47:39] (step=0039800) Train Loss: -2.9872, Train Steps/Sec: 1.02
414
+ [2026-02-03 17:49:17] (step=0039900) Train Loss: -2.9907, Train Steps/Sec: 1.02
415
+ [2026-02-03 17:50:55] (step=0040000) Train Loss: -2.9913, Train Steps/Sec: 1.02
416
+ [2026-02-03 17:51:27] Beginning epoch 8...
417
+ [2026-02-03 17:52:35] (step=0040100) Train Loss: -2.9853, Train Steps/Sec: 1.00
418
+ [2026-02-03 17:54:13] (step=0040200) Train Loss: -2.9907, Train Steps/Sec: 1.02
419
+ [2026-02-03 17:55:51] (step=0040300) Train Loss: -2.9869, Train Steps/Sec: 1.02
420
+ [2026-02-03 17:57:29] (step=0040400) Train Loss: -2.9874, Train Steps/Sec: 1.02
421
+ [2026-02-03 17:59:06] (step=0040500) Train Loss: -2.9873, Train Steps/Sec: 1.03
422
+ [2026-02-03 18:00:44] (step=0040600) Train Loss: -2.9879, Train Steps/Sec: 1.03
423
+ [2026-02-03 18:02:22] (step=0040700) Train Loss: -2.9881, Train Steps/Sec: 1.02
424
+ [2026-02-03 18:04:00] (step=0040800) Train Loss: -2.9875, Train Steps/Sec: 1.02
425
+ [2026-02-03 18:05:38] (step=0040900) Train Loss: -2.9891, Train Steps/Sec: 1.02
426
+ [2026-02-03 18:07:16] (step=0041000) Train Loss: -2.9832, Train Steps/Sec: 1.02
427
+ [2026-02-03 18:08:54] (step=0041100) Train Loss: -2.9880, Train Steps/Sec: 1.02
428
+ [2026-02-03 18:10:31] (step=0041200) Train Loss: -2.9928, Train Steps/Sec: 1.02
429
+ [2026-02-03 18:12:09] (step=0041300) Train Loss: -2.9884, Train Steps/Sec: 1.02
430
+ [2026-02-03 18:13:47] (step=0041400) Train Loss: -2.9869, Train Steps/Sec: 1.02
431
+ [2026-02-03 18:15:25] (step=0041500) Train Loss: -2.9891, Train Steps/Sec: 1.02
432
+ [2026-02-03 18:17:00] (step=0041600) Train Loss: -2.9869, Train Steps/Sec: 1.06
433
+ [2026-02-03 18:18:37] (step=0041700) Train Loss: -2.9919, Train Steps/Sec: 1.03
434
+ [2026-02-03 18:20:15] (step=0041800) Train Loss: -2.9858, Train Steps/Sec: 1.02
435
+ [2026-02-03 18:21:53] (step=0041900) Train Loss: -2.9856, Train Steps/Sec: 1.02
436
+ [2026-02-03 18:23:30] (step=0042000) Train Loss: -2.9883, Train Steps/Sec: 1.02
437
+ [2026-02-03 18:25:08] (step=0042100) Train Loss: -2.9887, Train Steps/Sec: 1.02
438
+ [2026-02-03 18:26:46] (step=0042200) Train Loss: -2.9826, Train Steps/Sec: 1.03
439
+ [2026-02-03 18:28:23] (step=0042300) Train Loss: -2.9954, Train Steps/Sec: 1.03
440
+ [2026-02-03 18:30:00] (step=0042400) Train Loss: -2.9880, Train Steps/Sec: 1.02
441
+ [2026-02-03 18:31:38] (step=0042500) Train Loss: -2.9865, Train Steps/Sec: 1.02
442
+ [2026-02-03 18:33:16] (step=0042600) Train Loss: -2.9924, Train Steps/Sec: 1.02
443
+ [2026-02-03 18:34:54] (step=0042700) Train Loss: -2.9881, Train Steps/Sec: 1.02
444
+ [2026-02-03 18:36:32] (step=0042800) Train Loss: -2.9871, Train Steps/Sec: 1.02
445
+ [2026-02-03 18:38:10] (step=0042900) Train Loss: -2.9893, Train Steps/Sec: 1.02
446
+ [2026-02-03 18:39:48] (step=0043000) Train Loss: -2.9860, Train Steps/Sec: 1.02
447
+ [2026-02-03 18:41:25] (step=0043100) Train Loss: -2.9874, Train Steps/Sec: 1.02
448
+ [2026-02-03 18:43:03] (step=0043200) Train Loss: -2.9875, Train Steps/Sec: 1.03
449
+ [2026-02-03 18:44:41] (step=0043300) Train Loss: -2.9882, Train Steps/Sec: 1.02
450
+ [2026-02-03 18:46:19] (step=0043400) Train Loss: -2.9886, Train Steps/Sec: 1.02
451
+ [2026-02-03 18:47:57] (step=0043500) Train Loss: -2.9948, Train Steps/Sec: 1.02
452
+ [2026-02-03 18:49:34] (step=0043600) Train Loss: -2.9888, Train Steps/Sec: 1.02
453
+ [2026-02-03 18:51:12] (step=0043700) Train Loss: -2.9846, Train Steps/Sec: 1.02
454
+ [2026-02-03 18:52:51] (step=0043800) Train Loss: -2.9913, Train Steps/Sec: 1.01
455
+ [2026-02-03 18:54:29] (step=0043900) Train Loss: -2.9863, Train Steps/Sec: 1.02
456
+ [2026-02-03 18:56:07] (step=0044000) Train Loss: -2.9890, Train Steps/Sec: 1.02
457
+ [2026-02-03 18:57:45] (step=0044100) Train Loss: -2.9862, Train Steps/Sec: 1.02
458
+ [2026-02-03 18:59:23] (step=0044200) Train Loss: -2.9875, Train Steps/Sec: 1.02
459
+ [2026-02-03 19:01:00] (step=0044300) Train Loss: -2.9850, Train Steps/Sec: 1.03
460
+ [2026-02-03 19:02:38] (step=0044400) Train Loss: -2.9861, Train Steps/Sec: 1.02
461
+ [2026-02-03 19:04:16] (step=0044500) Train Loss: -2.9889, Train Steps/Sec: 1.02
462
+ [2026-02-03 19:05:53] (step=0044600) Train Loss: -2.9881, Train Steps/Sec: 1.03
463
+ [2026-02-03 19:07:31] (step=0044700) Train Loss: -2.9886, Train Steps/Sec: 1.02
464
+ [2026-02-03 19:09:09] (step=0044800) Train Loss: -2.9875, Train Steps/Sec: 1.02
465
+ [2026-02-03 19:10:47] (step=0044900) Train Loss: -2.9908, Train Steps/Sec: 1.02
466
+ [2026-02-03 19:12:25] (step=0045000) Train Loss: -2.9902, Train Steps/Sec: 1.02
467
+ [2026-02-03 19:13:00] Beginning epoch 9...
468
+ [2026-02-03 19:14:05] (step=0045100) Train Loss: -2.9923, Train Steps/Sec: 1.00
469
+ [2026-02-03 19:15:43] (step=0045200) Train Loss: -2.9882, Train Steps/Sec: 1.02
470
+ [2026-02-03 19:17:21] (step=0045300) Train Loss: -2.9932, Train Steps/Sec: 1.02
471
+ [2026-02-03 19:18:59] (step=0045400) Train Loss: -2.9883, Train Steps/Sec: 1.02
472
+ [2026-02-03 19:20:37] (step=0045500) Train Loss: -2.9825, Train Steps/Sec: 1.02
473
+ [2026-02-03 19:22:15] (step=0045600) Train Loss: -2.9882, Train Steps/Sec: 1.02
474
+ [2026-02-03 19:23:54] (step=0045700) Train Loss: -2.9896, Train Steps/Sec: 1.02
475
+ [2026-02-03 19:25:31] (step=0045800) Train Loss: -2.9899, Train Steps/Sec: 1.02
476
+ [2026-02-03 19:27:09] (step=0045900) Train Loss: -2.9897, Train Steps/Sec: 1.02
477
+ [2026-02-03 19:28:47] (step=0046000) Train Loss: -2.9868, Train Steps/Sec: 1.03
478
+ [2026-02-03 19:30:25] (step=0046100) Train Loss: -2.9908, Train Steps/Sec: 1.02
479
+ [2026-02-03 19:32:03] (step=0046200) Train Loss: -2.9925, Train Steps/Sec: 1.02
480
+ [2026-02-03 19:33:40] (step=0046300) Train Loss: -2.9885, Train Steps/Sec: 1.02
481
+ [2026-02-03 19:35:15] (step=0046400) Train Loss: -2.9863, Train Steps/Sec: 1.06
482
+ [2026-02-03 19:36:53] (step=0046500) Train Loss: -2.9880, Train Steps/Sec: 1.02
483
+ [2026-02-03 19:38:30] (step=0046600) Train Loss: -2.9879, Train Steps/Sec: 1.02
484
+ [2026-02-03 19:40:08] (step=0046700) Train Loss: -2.9853, Train Steps/Sec: 1.02
485
+ [2026-02-03 19:41:47] (step=0046800) Train Loss: -2.9894, Train Steps/Sec: 1.02
486
+ [2026-02-03 19:43:25] (step=0046900) Train Loss: -2.9886, Train Steps/Sec: 1.02
487
+ [2026-02-03 19:45:03] (step=0047000) Train Loss: -2.9863, Train Steps/Sec: 1.02
488
+ [2026-02-03 19:46:40] (step=0047100) Train Loss: -2.9877, Train Steps/Sec: 1.02
489
+ [2026-02-03 19:48:18] (step=0047200) Train Loss: -2.9868, Train Steps/Sec: 1.02
490
+ [2026-02-03 19:49:56] (step=0047300) Train Loss: -2.9894, Train Steps/Sec: 1.02
491
+ [2026-02-03 19:51:34] (step=0047400) Train Loss: -2.9927, Train Steps/Sec: 1.02
492
+ [2026-02-03 19:53:12] (step=0047500) Train Loss: -2.9912, Train Steps/Sec: 1.02
493
+ [2026-02-03 19:54:50] (step=0047600) Train Loss: -2.9877, Train Steps/Sec: 1.02
494
+ [2026-02-03 19:56:28] (step=0047700) Train Loss: -2.9892, Train Steps/Sec: 1.02
495
+ [2026-02-03 19:58:06] (step=0047800) Train Loss: -2.9890, Train Steps/Sec: 1.02
496
+ [2026-02-03 19:59:44] (step=0047900) Train Loss: -2.9907, Train Steps/Sec: 1.02
497
+ [2026-02-03 20:01:22] (step=0048000) Train Loss: -2.9915, Train Steps/Sec: 1.02
498
+ [2026-02-03 20:03:00] (step=0048100) Train Loss: -2.9858, Train Steps/Sec: 1.02
499
+ [2026-02-03 20:04:38] (step=0048200) Train Loss: -2.9865, Train Steps/Sec: 1.02
500
+ [2026-02-03 20:06:16] (step=0048300) Train Loss: -2.9887, Train Steps/Sec: 1.02
501
+ [2026-02-03 20:07:54] (step=0048400) Train Loss: -2.9904, Train Steps/Sec: 1.02
502
+ [2026-02-03 20:09:32] (step=0048500) Train Loss: -2.9878, Train Steps/Sec: 1.02
503
+ [2026-02-03 20:11:10] (step=0048600) Train Loss: -2.9867, Train Steps/Sec: 1.02
504
+ [2026-02-03 20:12:48] (step=0048700) Train Loss: -2.9867, Train Steps/Sec: 1.02
505
+ [2026-02-03 20:14:26] (step=0048800) Train Loss: -2.9853, Train Steps/Sec: 1.02
506
+ [2026-02-03 20:16:04] (step=0048900) Train Loss: -2.9901, Train Steps/Sec: 1.02
507
+ [2026-02-03 20:17:41] (step=0049000) Train Loss: -2.9853, Train Steps/Sec: 1.02
508
+ [2026-02-03 20:19:19] (step=0049100) Train Loss: -2.9849, Train Steps/Sec: 1.02
509
+ [2026-02-03 20:20:57] (step=0049200) Train Loss: -2.9873, Train Steps/Sec: 1.02
510
+ [2026-02-03 20:22:35] (step=0049300) Train Loss: -2.9865, Train Steps/Sec: 1.02
511
+ [2026-02-03 20:24:12] (step=0049400) Train Loss: -2.9888, Train Steps/Sec: 1.03
512
+ [2026-02-03 20:25:51] (step=0049500) Train Loss: -2.9911, Train Steps/Sec: 1.02
513
+ [2026-02-03 20:27:29] (step=0049600) Train Loss: -2.9876, Train Steps/Sec: 1.02
514
+ [2026-02-03 20:29:07] (step=0049700) Train Loss: -2.9921, Train Steps/Sec: 1.02
515
+ [2026-02-03 20:30:45] (step=0049800) Train Loss: -2.9890, Train Steps/Sec: 1.02
516
+ [2026-02-03 20:32:23] (step=0049900) Train Loss: -2.9805, Train Steps/Sec: 1.02
517
+ [2026-02-03 20:34:01] (step=0050000) Train Loss: -2.9891, Train Steps/Sec: 1.02
518
+ [2026-02-03 20:34:02] Saved checkpoint to results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None/checkpoints/0050000.pt
519
+ [2026-02-03 20:34:41] Beginning epoch 10...
520
+ [2026-02-03 20:35:42] (step=0050100) Train Loss: -2.9896, Train Steps/Sec: 0.99
521
+ [2026-02-03 20:37:20] (step=0050200) Train Loss: -2.9903, Train Steps/Sec: 1.02
522
+ [2026-02-03 20:38:58] (step=0050300) Train Loss: -2.9894, Train Steps/Sec: 1.02
523
+ [2026-02-03 20:40:20] Generating EMA samples...
524
+ [2026-02-03 20:40:35] (step=0050400) Train Loss: -2.9846, Train Steps/Sec: 1.03
525
+ [2026-02-03 20:42:13] (step=0050500) Train Loss: -2.9897, Train Steps/Sec: 1.02
526
+ [2026-02-03 20:43:51] (step=0050600) Train Loss: -2.9880, Train Steps/Sec: 1.02
527
+ [2026-02-03 20:45:29] (step=0050700) Train Loss: -2.9868, Train Steps/Sec: 1.02
528
+ [2026-02-03 20:47:07] (step=0050800) Train Loss: -2.9852, Train Steps/Sec: 1.02
529
+ [2026-02-03 20:48:44] (step=0050900) Train Loss: -2.9878, Train Steps/Sec: 1.03
530
+ [2026-02-03 20:50:21] (step=0051000) Train Loss: -2.9897, Train Steps/Sec: 1.03
531
+ [2026-02-03 20:52:00] (step=0051100) Train Loss: -2.9875, Train Steps/Sec: 1.02
532
+ [2026-02-03 20:53:34] (step=0051200) Train Loss: -2.9882, Train Steps/Sec: 1.06
533
+ [2026-02-03 20:55:12] (step=0051300) Train Loss: -2.9856, Train Steps/Sec: 1.02
534
+ [2026-02-03 20:56:50] (step=0051400) Train Loss: -2.9870, Train Steps/Sec: 1.02
535
+ [2026-02-03 20:58:28] (step=0051500) Train Loss: -2.9897, Train Steps/Sec: 1.02
536
+ [2026-02-03 21:00:06] (step=0051600) Train Loss: -2.9896, Train Steps/Sec: 1.02
537
+ [2026-02-03 21:01:44] (step=0051700) Train Loss: -2.9903, Train Steps/Sec: 1.02
538
+ [2026-02-03 21:03:22] (step=0051800) Train Loss: -2.9894, Train Steps/Sec: 1.02
539
+ [2026-02-03 21:04:59] (step=0051900) Train Loss: -2.9867, Train Steps/Sec: 1.02
540
+ [2026-02-03 21:06:38] (step=0052000) Train Loss: -2.9898, Train Steps/Sec: 1.02
541
+ [2026-02-03 21:08:16] (step=0052100) Train Loss: -2.9899, Train Steps/Sec: 1.02
542
+ [2026-02-03 21:09:54] (step=0052200) Train Loss: -2.9888, Train Steps/Sec: 1.02
543
+ [2026-02-03 21:11:31] (step=0052300) Train Loss: -2.9868, Train Steps/Sec: 1.03
544
+ [2026-02-03 21:13:09] (step=0052400) Train Loss: -2.9857, Train Steps/Sec: 1.02
545
+ [2026-02-03 21:14:47] (step=0052500) Train Loss: -2.9898, Train Steps/Sec: 1.03
546
+ [2026-02-03 21:16:25] (step=0052600) Train Loss: -2.9875, Train Steps/Sec: 1.02
547
+ [2026-02-03 21:18:03] (step=0052700) Train Loss: -2.9874, Train Steps/Sec: 1.02
548
+ [2026-02-03 21:19:40] (step=0052800) Train Loss: -2.9890, Train Steps/Sec: 1.02
549
+ [2026-02-03 21:21:18] (step=0052900) Train Loss: -2.9866, Train Steps/Sec: 1.02
550
+ [2026-02-03 21:22:56] (step=0053000) Train Loss: -2.9871, Train Steps/Sec: 1.02
551
+ [2026-02-03 21:24:34] (step=0053100) Train Loss: -2.9894, Train Steps/Sec: 1.02
552
+ [2026-02-03 21:26:12] (step=0053200) Train Loss: -2.9921, Train Steps/Sec: 1.02
553
+ [2026-02-03 21:27:49] (step=0053300) Train Loss: -2.9907, Train Steps/Sec: 1.02
554
+ [2026-02-03 21:29:27] (step=0053400) Train Loss: -2.9859, Train Steps/Sec: 1.02
555
+ [2026-02-03 21:31:05] (step=0053500) Train Loss: -2.9909, Train Steps/Sec: 1.02
556
+ [2026-02-03 21:32:43] (step=0053600) Train Loss: -2.9928, Train Steps/Sec: 1.02
557
+ [2026-02-03 21:34:21] (step=0053700) Train Loss: -2.9861, Train Steps/Sec: 1.02
558
+ [2026-02-03 21:35:59] (step=0053800) Train Loss: -2.9867, Train Steps/Sec: 1.02
559
+ [2026-02-03 21:37:37] (step=0053900) Train Loss: -2.9883, Train Steps/Sec: 1.02
560
+ [2026-02-03 21:39:15] (step=0054000) Train Loss: -2.9844, Train Steps/Sec: 1.02
561
+ [2026-02-03 21:40:53] (step=0054100) Train Loss: -2.9903, Train Steps/Sec: 1.02
562
+ [2026-02-03 21:42:31] (step=0054200) Train Loss: -2.9911, Train Steps/Sec: 1.02
563
+ [2026-02-03 21:44:09] (step=0054300) Train Loss: -2.9915, Train Steps/Sec: 1.02
564
+ [2026-02-03 21:45:47] (step=0054400) Train Loss: -2.9865, Train Steps/Sec: 1.02
565
+ [2026-02-03 21:47:24] (step=0054500) Train Loss: -2.9854, Train Steps/Sec: 1.03
566
+ [2026-02-03 21:49:02] (step=0054600) Train Loss: -2.9923, Train Steps/Sec: 1.02
567
+ [2026-02-03 21:50:39] (step=0054700) Train Loss: -2.9864, Train Steps/Sec: 1.03
568
+ [2026-02-03 21:52:17] (step=0054800) Train Loss: -2.9826, Train Steps/Sec: 1.02
569
+ [2026-02-03 21:53:55] (step=0054900) Train Loss: -2.9858, Train Steps/Sec: 1.02
570
+ [2026-02-03 21:55:33] (step=0055000) Train Loss: -2.9875, Train Steps/Sec: 1.02
571
+ [2026-02-03 21:56:16] Beginning epoch 11...
572
+ [2026-02-03 21:57:13] (step=0055100) Train Loss: -2.9926, Train Steps/Sec: 1.00
573
+ [2026-02-03 21:58:50] (step=0055200) Train Loss: -2.9919, Train Steps/Sec: 1.02
574
+ [2026-02-03 22:00:28] (step=0055300) Train Loss: -2.9910, Train Steps/Sec: 1.02
575
+ [2026-02-03 22:02:06] (step=0055400) Train Loss: -2.9851, Train Steps/Sec: 1.02
576
+ [2026-02-03 22:03:44] (step=0055500) Train Loss: -2.9899, Train Steps/Sec: 1.02
577
+ [2026-02-03 22:05:22] (step=0055600) Train Loss: -2.9869, Train Steps/Sec: 1.02
578
+ [2026-02-03 22:07:00] (step=0055700) Train Loss: -2.9873, Train Steps/Sec: 1.02
579
+ [2026-02-03 22:08:37] (step=0055800) Train Loss: -2.9887, Train Steps/Sec: 1.02
580
+ [2026-02-03 22:10:15] (step=0055900) Train Loss: -2.9909, Train Steps/Sec: 1.02
581
+ [2026-02-03 22:11:50] (step=0056000) Train Loss: -2.9884, Train Steps/Sec: 1.06
582
+ [2026-02-03 22:13:28] (step=0056100) Train Loss: -2.9902, Train Steps/Sec: 1.02
583
+ [2026-02-03 22:15:05] (step=0056200) Train Loss: -2.9904, Train Steps/Sec: 1.03
584
+ [2026-02-03 22:16:43] (step=0056300) Train Loss: -2.9891, Train Steps/Sec: 1.02
585
+ [2026-02-03 22:18:21] (step=0056400) Train Loss: -2.9876, Train Steps/Sec: 1.02
586
+ [2026-02-03 22:19:59] (step=0056500) Train Loss: -2.9903, Train Steps/Sec: 1.02
587
+ [2026-02-03 22:21:37] (step=0056600) Train Loss: -2.9890, Train Steps/Sec: 1.02
588
+ [2026-02-03 22:23:15] (step=0056700) Train Loss: -2.9888, Train Steps/Sec: 1.02
589
+ [2026-02-03 22:24:53] (step=0056800) Train Loss: -2.9846, Train Steps/Sec: 1.02
590
+ [2026-02-03 22:26:32] (step=0056900) Train Loss: -2.9891, Train Steps/Sec: 1.02
591
+ [2026-02-03 22:28:10] (step=0057000) Train Loss: -2.9846, Train Steps/Sec: 1.02
592
+ [2026-02-03 22:29:48] (step=0057100) Train Loss: -2.9884, Train Steps/Sec: 1.02
593
+ [2026-02-03 22:31:26] (step=0057200) Train Loss: -2.9890, Train Steps/Sec: 1.02
594
+ [2026-02-03 22:33:04] (step=0057300) Train Loss: -2.9867, Train Steps/Sec: 1.02
595
+ [2026-02-03 22:34:41] (step=0057400) Train Loss: -2.9913, Train Steps/Sec: 1.02
596
+ [2026-02-03 22:36:19] (step=0057500) Train Loss: -2.9885, Train Steps/Sec: 1.02
597
+ [2026-02-03 22:37:57] (step=0057600) Train Loss: -2.9872, Train Steps/Sec: 1.03
598
+ [2026-02-03 22:39:34] (step=0057700) Train Loss: -2.9902, Train Steps/Sec: 1.03
599
+ [2026-02-03 22:41:12] (step=0057800) Train Loss: -2.9949, Train Steps/Sec: 1.02
600
+ [2026-02-03 22:42:50] (step=0057900) Train Loss: -2.9919, Train Steps/Sec: 1.02
601
+ [2026-02-03 22:44:28] (step=0058000) Train Loss: -2.9903, Train Steps/Sec: 1.02
602
+ [2026-02-03 22:46:06] (step=0058100) Train Loss: -2.9908, Train Steps/Sec: 1.02
603
+ [2026-02-03 22:47:44] (step=0058200) Train Loss: -2.9899, Train Steps/Sec: 1.02
604
+ [2026-02-03 22:49:22] (step=0058300) Train Loss: -2.9900, Train Steps/Sec: 1.02
605
+ [2026-02-03 22:50:59] (step=0058400) Train Loss: -2.9865, Train Steps/Sec: 1.03
606
+ [2026-02-03 22:52:37] (step=0058500) Train Loss: -2.9851, Train Steps/Sec: 1.02
607
+ [2026-02-03 22:54:15] (step=0058600) Train Loss: -2.9861, Train Steps/Sec: 1.01
608
+ [2026-02-03 22:55:53] (step=0058700) Train Loss: -2.9868, Train Steps/Sec: 1.02
609
+ [2026-02-03 22:57:31] (step=0058800) Train Loss: -2.9918, Train Steps/Sec: 1.02
610
+ [2026-02-03 22:59:09] (step=0058900) Train Loss: -2.9891, Train Steps/Sec: 1.02
611
+ [2026-02-03 23:00:47] (step=0059000) Train Loss: -2.9864, Train Steps/Sec: 1.02
612
+ [2026-02-03 23:02:25] (step=0059100) Train Loss: -2.9920, Train Steps/Sec: 1.02
613
+ [2026-02-03 23:04:03] (step=0059200) Train Loss: -2.9869, Train Steps/Sec: 1.02
614
+ [2026-02-03 23:05:41] (step=0059300) Train Loss: -2.9895, Train Steps/Sec: 1.02
615
+ [2026-02-03 23:07:19] (step=0059400) Train Loss: -2.9911, Train Steps/Sec: 1.02
616
+ [2026-02-03 23:08:57] (step=0059500) Train Loss: -2.9857, Train Steps/Sec: 1.02
617
+ [2026-02-03 23:10:34] (step=0059600) Train Loss: -2.9925, Train Steps/Sec: 1.03
618
+ [2026-02-03 23:12:12] (step=0059700) Train Loss: -2.9885, Train Steps/Sec: 1.02
619
+ [2026-02-03 23:13:50] (step=0059800) Train Loss: -2.9883, Train Steps/Sec: 1.02
620
+ [2026-02-03 23:15:28] (step=0059900) Train Loss: -2.9914, Train Steps/Sec: 1.02
621
+ [2026-02-03 23:17:06] (step=0060000) Train Loss: -2.9892, Train Steps/Sec: 1.02
622
+ [2026-02-03 23:17:53] Beginning epoch 12...
623
+ [2026-02-03 23:18:45] (step=0060100) Train Loss: -2.9931, Train Steps/Sec: 1.00
624
+ [2026-02-03 23:20:23] (step=0060200) Train Loss: -2.9852, Train Steps/Sec: 1.02
625
+ [2026-02-03 23:22:01] (step=0060300) Train Loss: -2.9839, Train Steps/Sec: 1.02
626
+ [2026-02-03 23:23:39] (step=0060400) Train Loss: -2.9866, Train Steps/Sec: 1.02
627
+ [2026-02-03 23:25:17] (step=0060500) Train Loss: -2.9886, Train Steps/Sec: 1.02
628
+ [2026-02-03 23:26:55] (step=0060600) Train Loss: -2.9869, Train Steps/Sec: 1.03
629
+ [2026-02-03 23:28:33] (step=0060700) Train Loss: -2.9887, Train Steps/Sec: 1.02
630
+ [2026-02-03 23:30:07] (step=0060800) Train Loss: -2.9867, Train Steps/Sec: 1.06
631
+ [2026-02-03 23:31:45] (step=0060900) Train Loss: -2.9912, Train Steps/Sec: 1.02
632
+ [2026-02-03 23:33:23] (step=0061000) Train Loss: -2.9864, Train Steps/Sec: 1.02
633
+ [2026-02-03 23:35:01] (step=0061100) Train Loss: -2.9907, Train Steps/Sec: 1.02
634
+ [2026-02-03 23:36:39] (step=0061200) Train Loss: -2.9844, Train Steps/Sec: 1.02
635
+ [2026-02-03 23:38:17] (step=0061300) Train Loss: -2.9937, Train Steps/Sec: 1.02
636
+ [2026-02-03 23:39:55] (step=0061400) Train Loss: -2.9877, Train Steps/Sec: 1.02
637
+ [2026-02-03 23:41:33] (step=0061500) Train Loss: -2.9898, Train Steps/Sec: 1.02
638
+ [2026-02-03 23:43:10] (step=0061600) Train Loss: -2.9880, Train Steps/Sec: 1.02
639
+ [2026-02-03 23:44:48] (step=0061700) Train Loss: -2.9897, Train Steps/Sec: 1.02
640
+ [2026-02-03 23:46:25] (step=0061800) Train Loss: -2.9888, Train Steps/Sec: 1.03
641
+ [2026-02-03 23:48:03] (step=0061900) Train Loss: -2.9867, Train Steps/Sec: 1.03
642
+ [2026-02-03 23:49:41] (step=0062000) Train Loss: -2.9901, Train Steps/Sec: 1.02
643
+ [2026-02-03 23:51:19] (step=0062100) Train Loss: -2.9850, Train Steps/Sec: 1.02
644
+ [2026-02-03 23:52:56] (step=0062200) Train Loss: -2.9880, Train Steps/Sec: 1.02
645
+ [2026-02-03 23:54:34] (step=0062300) Train Loss: -2.9876, Train Steps/Sec: 1.02
646
+ [2026-02-03 23:56:12] (step=0062400) Train Loss: -2.9879, Train Steps/Sec: 1.02
647
+ [2026-02-03 23:57:50] (step=0062500) Train Loss: -2.9891, Train Steps/Sec: 1.02
648
+ [2026-02-03 23:59:28] (step=0062600) Train Loss: -2.9854, Train Steps/Sec: 1.02
649
+ [2026-02-04 00:01:06] (step=0062700) Train Loss: -2.9918, Train Steps/Sec: 1.02
650
+ [2026-02-04 00:02:44] (step=0062800) Train Loss: -2.9861, Train Steps/Sec: 1.02
651
+ [2026-02-04 00:04:21] (step=0062900) Train Loss: -2.9891, Train Steps/Sec: 1.03
652
+ [2026-02-04 00:05:58] (step=0063000) Train Loss: -2.9885, Train Steps/Sec: 1.03
653
+ [2026-02-04 00:07:36] (step=0063100) Train Loss: -2.9878, Train Steps/Sec: 1.02
654
+ [2026-02-04 00:09:14] (step=0063200) Train Loss: -2.9869, Train Steps/Sec: 1.02
655
+ [2026-02-04 00:10:52] (step=0063300) Train Loss: -2.9942, Train Steps/Sec: 1.02
656
+ [2026-02-04 00:12:30] (step=0063400) Train Loss: -2.9877, Train Steps/Sec: 1.02
657
+ [2026-02-04 00:14:07] (step=0063500) Train Loss: -2.9898, Train Steps/Sec: 1.03
658
+ [2026-02-04 00:15:46] (step=0063600) Train Loss: -2.9887, Train Steps/Sec: 1.02
659
+ [2026-02-04 00:17:24] (step=0063700) Train Loss: -2.9892, Train Steps/Sec: 1.02
660
+ [2026-02-04 00:19:02] (step=0063800) Train Loss: -2.9867, Train Steps/Sec: 1.02
661
+ [2026-02-04 00:20:39] (step=0063900) Train Loss: -2.9886, Train Steps/Sec: 1.02
662
+ [2026-02-04 00:22:18] (step=0064000) Train Loss: -2.9889, Train Steps/Sec: 1.02
663
+ [2026-02-04 00:23:55] (step=0064100) Train Loss: -2.9869, Train Steps/Sec: 1.02
664
+ [2026-02-04 00:25:33] (step=0064200) Train Loss: -2.9838, Train Steps/Sec: 1.02
665
+ [2026-02-04 00:27:11] (step=0064300) Train Loss: -2.9857, Train Steps/Sec: 1.02
666
+ [2026-02-04 00:28:49] (step=0064400) Train Loss: -2.9905, Train Steps/Sec: 1.03
667
+ [2026-02-04 00:30:26] (step=0064500) Train Loss: -2.9910, Train Steps/Sec: 1.02
668
+ [2026-02-04 00:32:05] (step=0064600) Train Loss: -2.9897, Train Steps/Sec: 1.02
669
+ [2026-02-04 00:33:42] (step=0064700) Train Loss: -2.9887, Train Steps/Sec: 1.02
670
+ [2026-02-04 00:35:20] (step=0064800) Train Loss: -2.9896, Train Steps/Sec: 1.02
671
+ [2026-02-04 00:36:58] (step=0064900) Train Loss: -2.9893, Train Steps/Sec: 1.02
672
+ [2026-02-04 00:38:36] (step=0065000) Train Loss: -2.9868, Train Steps/Sec: 1.02
673
+ [2026-02-04 00:39:28] Beginning epoch 13...
674
+ [2026-02-04 00:40:16] (step=0065100) Train Loss: -2.9899, Train Steps/Sec: 1.00
675
+ [2026-02-04 00:41:54] (step=0065200) Train Loss: -2.9946, Train Steps/Sec: 1.02
676
+ [2026-02-04 00:43:32] (step=0065300) Train Loss: -2.9928, Train Steps/Sec: 1.02
677
+ [2026-02-04 00:45:10] (step=0065400) Train Loss: -2.9897, Train Steps/Sec: 1.02
678
+ [2026-02-04 00:46:46] (step=0065500) Train Loss: -2.9877, Train Steps/Sec: 1.05
679
+ [2026-02-04 00:48:22] (step=0065600) Train Loss: -2.9892, Train Steps/Sec: 1.03
680
+ [2026-02-04 00:50:00] (step=0065700) Train Loss: -2.9847, Train Steps/Sec: 1.02
681
+ [2026-02-04 00:51:38] (step=0065800) Train Loss: -2.9859, Train Steps/Sec: 1.02
682
+ [2026-02-04 00:53:16] (step=0065900) Train Loss: -2.9838, Train Steps/Sec: 1.03
683
+ [2026-02-04 00:54:54] (step=0066000) Train Loss: -2.9848, Train Steps/Sec: 1.02
684
+ [2026-02-04 00:56:31] (step=0066100) Train Loss: -2.9864, Train Steps/Sec: 1.02
685
+ [2026-02-04 00:58:08] (step=0066200) Train Loss: -2.9903, Train Steps/Sec: 1.03
686
+ [2026-02-04 00:59:46] (step=0066300) Train Loss: -2.9889, Train Steps/Sec: 1.02
687
+ [2026-02-04 01:01:24] (step=0066400) Train Loss: -2.9881, Train Steps/Sec: 1.02
688
+ [2026-02-04 01:03:02] (step=0066500) Train Loss: -2.9850, Train Steps/Sec: 1.03
689
+ [2026-02-04 01:04:40] (step=0066600) Train Loss: -2.9870, Train Steps/Sec: 1.02
690
+ [2026-02-04 01:06:18] (step=0066700) Train Loss: -2.9866, Train Steps/Sec: 1.02
691
+ [2026-02-04 01:07:56] (step=0066800) Train Loss: -2.9895, Train Steps/Sec: 1.02
692
+ [2026-02-04 01:09:34] (step=0066900) Train Loss: -2.9862, Train Steps/Sec: 1.02
693
+ [2026-02-04 01:11:11] (step=0067000) Train Loss: -2.9913, Train Steps/Sec: 1.03
694
+ [2026-02-04 01:12:48] (step=0067100) Train Loss: -2.9877, Train Steps/Sec: 1.03
695
+ [2026-02-04 01:14:26] (step=0067200) Train Loss: -2.9923, Train Steps/Sec: 1.03
696
+ [2026-02-04 01:16:04] (step=0067300) Train Loss: -2.9886, Train Steps/Sec: 1.02
697
+ [2026-02-04 01:17:42] (step=0067400) Train Loss: -2.9904, Train Steps/Sec: 1.02
698
+ [2026-02-04 01:19:20] (step=0067500) Train Loss: -2.9905, Train Steps/Sec: 1.02
699
+ [2026-02-04 01:20:58] (step=0067600) Train Loss: -2.9891, Train Steps/Sec: 1.02
700
+ [2026-02-04 01:22:36] (step=0067700) Train Loss: -2.9877, Train Steps/Sec: 1.02
701
+ [2026-02-04 01:24:14] (step=0067800) Train Loss: -2.9874, Train Steps/Sec: 1.02
702
+ [2026-02-04 01:25:52] (step=0067900) Train Loss: -2.9875, Train Steps/Sec: 1.03
703
+ [2026-02-04 01:27:29] (step=0068000) Train Loss: -2.9834, Train Steps/Sec: 1.02
704
+ [2026-02-04 01:29:07] (step=0068100) Train Loss: -2.9885, Train Steps/Sec: 1.02
705
+ [2026-02-04 01:30:45] (step=0068200) Train Loss: -2.9882, Train Steps/Sec: 1.02
706
+ [2026-02-04 01:32:22] (step=0068300) Train Loss: -2.9922, Train Steps/Sec: 1.03
707
+ [2026-02-04 01:34:01] (step=0068400) Train Loss: -2.9823, Train Steps/Sec: 1.02
708
+ [2026-02-04 01:35:38] (step=0068500) Train Loss: -2.9876, Train Steps/Sec: 1.02
709
+ [2026-02-04 01:37:15] (step=0068600) Train Loss: -2.9938, Train Steps/Sec: 1.03
710
+ [2026-02-04 01:38:53] (step=0068700) Train Loss: -2.9876, Train Steps/Sec: 1.02
711
+ [2026-02-04 01:40:31] (step=0068800) Train Loss: -2.9893, Train Steps/Sec: 1.02
712
+ [2026-02-04 01:42:09] (step=0068900) Train Loss: -2.9892, Train Steps/Sec: 1.02
713
+ [2026-02-04 01:43:47] (step=0069000) Train Loss: -2.9861, Train Steps/Sec: 1.02
714
+ [2026-02-04 01:45:25] (step=0069100) Train Loss: -2.9871, Train Steps/Sec: 1.02
715
+ [2026-02-04 01:47:02] (step=0069200) Train Loss: -2.9910, Train Steps/Sec: 1.03
716
+ [2026-02-04 01:48:40] (step=0069300) Train Loss: -2.9894, Train Steps/Sec: 1.03
717
+ [2026-02-04 01:50:17] (step=0069400) Train Loss: -2.9837, Train Steps/Sec: 1.02
718
+ [2026-02-04 01:51:55] (step=0069500) Train Loss: -2.9899, Train Steps/Sec: 1.02
719
+ [2026-02-04 01:53:33] (step=0069600) Train Loss: -2.9889, Train Steps/Sec: 1.02
720
+ [2026-02-04 01:55:11] (step=0069700) Train Loss: -2.9852, Train Steps/Sec: 1.03
721
+ [2026-02-04 01:56:49] (step=0069800) Train Loss: -2.9926, Train Steps/Sec: 1.02
722
+ [2026-02-04 01:58:27] (step=0069900) Train Loss: -2.9876, Train Steps/Sec: 1.02
723
+ [2026-02-04 02:00:05] (step=0070000) Train Loss: -2.9908, Train Steps/Sec: 1.02
724
+ [2026-02-04 02:01:01] Beginning epoch 14...
725
+ [2026-02-04 02:01:45] (step=0070100) Train Loss: -2.9858, Train Steps/Sec: 1.00
726
+ [2026-02-04 02:03:23] (step=0070200) Train Loss: -2.9869, Train Steps/Sec: 1.02
727
+ [2026-02-04 02:04:57] (step=0070300) Train Loss: -2.9891, Train Steps/Sec: 1.06
728
+ [2026-02-04 02:06:35] (step=0070400) Train Loss: -2.9861, Train Steps/Sec: 1.02
729
+ [2026-02-04 02:08:13] (step=0070500) Train Loss: -2.9873, Train Steps/Sec: 1.02
730
+ [2026-02-04 02:09:51] (step=0070600) Train Loss: -2.9907, Train Steps/Sec: 1.02
731
+ [2026-02-04 02:11:29] (step=0070700) Train Loss: -2.9853, Train Steps/Sec: 1.02
732
+ [2026-02-04 02:13:06] (step=0070800) Train Loss: -2.9915, Train Steps/Sec: 1.02
733
+ [2026-02-04 02:14:44] (step=0070900) Train Loss: -2.9902, Train Steps/Sec: 1.02
734
+ [2026-02-04 02:16:22] (step=0071000) Train Loss: -2.9910, Train Steps/Sec: 1.02
735
+ [2026-02-04 02:18:00] (step=0071100) Train Loss: -2.9909, Train Steps/Sec: 1.02
736
+ [2026-02-04 02:19:37] (step=0071200) Train Loss: -2.9857, Train Steps/Sec: 1.03
737
+ [2026-02-04 02:21:15] (step=0071300) Train Loss: -2.9868, Train Steps/Sec: 1.02
738
+ [2026-02-04 02:22:52] (step=0071400) Train Loss: -2.9858, Train Steps/Sec: 1.03
739
+ [2026-02-04 02:24:30] (step=0071500) Train Loss: -2.9876, Train Steps/Sec: 1.02
740
+ [2026-02-04 02:26:08] (step=0071600) Train Loss: -2.9936, Train Steps/Sec: 1.02
741
+ [2026-02-04 02:27:46] (step=0071700) Train Loss: -2.9813, Train Steps/Sec: 1.02
742
+ [2026-02-04 02:29:24] (step=0071800) Train Loss: -2.9841, Train Steps/Sec: 1.02
743
+ [2026-02-04 02:31:01] (step=0071900) Train Loss: -2.9900, Train Steps/Sec: 1.03
744
+ [2026-02-04 02:32:39] (step=0072000) Train Loss: -2.9901, Train Steps/Sec: 1.03
745
+ [2026-02-04 02:34:16] (step=0072100) Train Loss: -2.9899, Train Steps/Sec: 1.02
746
+ [2026-02-04 02:35:54] (step=0072200) Train Loss: -2.9852, Train Steps/Sec: 1.03
747
+ [2026-02-04 02:37:32] (step=0072300) Train Loss: -2.9874, Train Steps/Sec: 1.02
748
+ [2026-02-04 02:39:10] (step=0072400) Train Loss: -2.9919, Train Steps/Sec: 1.02
749
+ [2026-02-04 02:40:48] (step=0072500) Train Loss: -2.9843, Train Steps/Sec: 1.02
750
+ [2026-02-04 02:42:26] (step=0072600) Train Loss: -2.9850, Train Steps/Sec: 1.02
751
+ [2026-02-04 02:44:04] (step=0072700) Train Loss: -2.9867, Train Steps/Sec: 1.02
752
+ [2026-02-04 02:45:42] (step=0072800) Train Loss: -2.9904, Train Steps/Sec: 1.02
753
+ [2026-02-04 02:47:19] (step=0072900) Train Loss: -2.9868, Train Steps/Sec: 1.02
754
+ [2026-02-04 02:48:57] (step=0073000) Train Loss: -2.9901, Train Steps/Sec: 1.03
755
+ [2026-02-04 02:50:35] (step=0073100) Train Loss: -2.9859, Train Steps/Sec: 1.02
756
+ [2026-02-04 02:52:13] (step=0073200) Train Loss: -2.9864, Train Steps/Sec: 1.02
757
+ [2026-02-04 02:53:50] (step=0073300) Train Loss: -2.9875, Train Steps/Sec: 1.03
758
+ [2026-02-04 02:55:28] (step=0073400) Train Loss: -2.9896, Train Steps/Sec: 1.02
759
+ [2026-02-04 02:57:05] (step=0073500) Train Loss: -2.9940, Train Steps/Sec: 1.03
760
+ [2026-02-04 02:58:43] (step=0073600) Train Loss: -2.9874, Train Steps/Sec: 1.02
761
+ [2026-02-04 03:00:21] (step=0073700) Train Loss: -2.9883, Train Steps/Sec: 1.02
762
+ [2026-02-04 03:01:58] (step=0073800) Train Loss: -2.9895, Train Steps/Sec: 1.03
763
+ [2026-02-04 03:03:36] (step=0073900) Train Loss: -2.9879, Train Steps/Sec: 1.02
764
+ [2026-02-04 03:05:14] (step=0074000) Train Loss: -2.9884, Train Steps/Sec: 1.02
765
+ [2026-02-04 03:06:52] (step=0074100) Train Loss: -2.9830, Train Steps/Sec: 1.02
766
+ [2026-02-04 03:08:30] (step=0074200) Train Loss: -2.9861, Train Steps/Sec: 1.02
767
+ [2026-02-04 03:10:08] (step=0074300) Train Loss: -2.9873, Train Steps/Sec: 1.02
768
+ [2026-02-04 03:11:45] (step=0074400) Train Loss: -2.9860, Train Steps/Sec: 1.03
769
+ [2026-02-04 03:13:22] (step=0074500) Train Loss: -2.9887, Train Steps/Sec: 1.03
770
+ [2026-02-04 03:15:00] (step=0074600) Train Loss: -2.9857, Train Steps/Sec: 1.03
771
+ [2026-02-04 03:16:38] (step=0074700) Train Loss: -2.9891, Train Steps/Sec: 1.02
772
+ [2026-02-04 03:18:15] (step=0074800) Train Loss: -2.9874, Train Steps/Sec: 1.02
773
+ [2026-02-04 03:19:53] (step=0074900) Train Loss: -2.9849, Train Steps/Sec: 1.02
774
+ [2026-02-04 03:21:32] (step=0075000) Train Loss: -2.9902, Train Steps/Sec: 1.02
775
+ [2026-02-04 03:21:33] Saved checkpoint to results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None/checkpoints/0075000.pt
776
+ [2026-02-04 03:22:32] Beginning epoch 15...
777
+ [2026-02-04 03:23:10] (step=0075100) Train Loss: -2.9908, Train Steps/Sec: 1.02
778
+ [2026-02-04 03:24:48] (step=0075200) Train Loss: -2.9917, Train Steps/Sec: 1.02
779
+ [2026-02-04 03:26:26] (step=0075300) Train Loss: -2.9913, Train Steps/Sec: 1.02
780
+ [2026-02-04 03:28:04] (step=0075400) Train Loss: -2.9900, Train Steps/Sec: 1.02
781
+ [2026-02-04 03:29:42] (step=0075500) Train Loss: -2.9866, Train Steps/Sec: 1.02
782
+ [2026-02-04 03:30:56] Generating EMA samples...
783
+ [2026-02-04 03:31:20] (step=0075600) Train Loss: -2.9850, Train Steps/Sec: 1.02
784
+ [2026-02-04 03:32:57] (step=0075700) Train Loss: -2.9845, Train Steps/Sec: 1.03
785
+ [2026-02-04 03:34:36] (step=0075800) Train Loss: -2.9907, Train Steps/Sec: 1.02
786
+ [2026-02-04 03:36:14] (step=0075900) Train Loss: -2.9899, Train Steps/Sec: 1.02
787
+ [2026-02-04 03:37:52] (step=0076000) Train Loss: -2.9894, Train Steps/Sec: 1.02
788
+ [2026-02-04 03:39:30] (step=0076100) Train Loss: -2.9877, Train Steps/Sec: 1.02
789
+ [2026-02-04 03:41:08] (step=0076200) Train Loss: -2.9870, Train Steps/Sec: 1.02
790
+ [2026-02-04 03:42:45] (step=0076300) Train Loss: -2.9843, Train Steps/Sec: 1.03
791
+ [2026-02-04 03:44:23] (step=0076400) Train Loss: -2.9898, Train Steps/Sec: 1.02
792
+ [2026-02-04 03:46:01] (step=0076500) Train Loss: -2.9868, Train Steps/Sec: 1.02
793
+ [2026-02-04 03:47:39] (step=0076600) Train Loss: -2.9848, Train Steps/Sec: 1.02
794
+ [2026-02-04 03:49:16] (step=0076700) Train Loss: -2.9864, Train Steps/Sec: 1.03
795
+ [2026-02-04 03:50:54] (step=0076800) Train Loss: -2.9876, Train Steps/Sec: 1.03
796
+ [2026-02-04 03:52:32] (step=0076900) Train Loss: -2.9862, Train Steps/Sec: 1.02
797
+ [2026-02-04 03:54:10] (step=0077000) Train Loss: -2.9906, Train Steps/Sec: 1.02
798
+ [2026-02-04 03:55:48] (step=0077100) Train Loss: -2.9880, Train Steps/Sec: 1.02
799
+ [2026-02-04 03:57:26] (step=0077200) Train Loss: -2.9890, Train Steps/Sec: 1.02
800
+ [2026-02-04 03:59:04] (step=0077300) Train Loss: -2.9891, Train Steps/Sec: 1.02
801
+ [2026-02-04 04:00:42] (step=0077400) Train Loss: -2.9886, Train Steps/Sec: 1.02
802
+ [2026-02-04 04:02:20] (step=0077500) Train Loss: -2.9870, Train Steps/Sec: 1.02
803
+ [2026-02-04 04:03:58] (step=0077600) Train Loss: -2.9864, Train Steps/Sec: 1.02
804
+ [2026-02-04 04:05:35] (step=0077700) Train Loss: -2.9854, Train Steps/Sec: 1.02
805
+ [2026-02-04 04:07:13] (step=0077800) Train Loss: -2.9904, Train Steps/Sec: 1.02
806
+ [2026-02-04 04:08:51] (step=0077900) Train Loss: -2.9850, Train Steps/Sec: 1.02
807
+ [2026-02-04 04:10:29] (step=0078000) Train Loss: -2.9941, Train Steps/Sec: 1.02
808
+ [2026-02-04 04:12:07] (step=0078100) Train Loss: -2.9890, Train Steps/Sec: 1.02
809
+ [2026-02-04 04:13:45] (step=0078200) Train Loss: -2.9867, Train Steps/Sec: 1.02
810
+ [2026-02-04 04:15:22] (step=0078300) Train Loss: -2.9915, Train Steps/Sec: 1.03
811
+ [2026-02-04 04:17:00] (step=0078400) Train Loss: -2.9876, Train Steps/Sec: 1.03
812
+ [2026-02-04 04:18:37] (step=0078500) Train Loss: -2.9893, Train Steps/Sec: 1.03
813
+ [2026-02-04 04:20:15] (step=0078600) Train Loss: -2.9887, Train Steps/Sec: 1.02
814
+ [2026-02-04 04:21:53] (step=0078700) Train Loss: -2.9854, Train Steps/Sec: 1.02
815
+ [2026-02-04 04:23:31] (step=0078800) Train Loss: -2.9884, Train Steps/Sec: 1.03
816
+ [2026-02-04 04:25:08] (step=0078900) Train Loss: -2.9884, Train Steps/Sec: 1.03
817
+ [2026-02-04 04:26:46] (step=0079000) Train Loss: -2.9889, Train Steps/Sec: 1.02
818
+ [2026-02-04 04:28:24] (step=0079100) Train Loss: -2.9918, Train Steps/Sec: 1.02
819
+ [2026-02-04 04:30:01] (step=0079200) Train Loss: -2.9873, Train Steps/Sec: 1.03
820
+ [2026-02-04 04:31:39] (step=0079300) Train Loss: -2.9867, Train Steps/Sec: 1.02
821
+ [2026-02-04 04:33:17] (step=0079400) Train Loss: -2.9800, Train Steps/Sec: 1.02
822
+ [2026-02-04 04:34:55] (step=0079500) Train Loss: -2.9873, Train Steps/Sec: 1.03
823
+ [2026-02-04 04:36:32] (step=0079600) Train Loss: -2.9847, Train Steps/Sec: 1.02
824
+ [2026-02-04 04:38:11] (step=0079700) Train Loss: -2.9876, Train Steps/Sec: 1.02
825
+ [2026-02-04 04:39:48] (step=0079800) Train Loss: -2.9865, Train Steps/Sec: 1.02
826
+ [2026-02-04 04:41:23] (step=0079900) Train Loss: -2.9922, Train Steps/Sec: 1.06
827
+ [2026-02-04 04:43:00] (step=0080000) Train Loss: -2.9857, Train Steps/Sec: 1.03
828
+ [2026-02-04 04:44:04] Beginning epoch 16...
829
+ [2026-02-04 04:44:40] (step=0080100) Train Loss: -2.9882, Train Steps/Sec: 1.00
830
+ [2026-02-04 04:46:18] (step=0080200) Train Loss: -2.9875, Train Steps/Sec: 1.02
831
+ [2026-02-04 04:47:56] (step=0080300) Train Loss: -2.9889, Train Steps/Sec: 1.02
832
+ [2026-02-04 04:49:34] (step=0080400) Train Loss: -2.9889, Train Steps/Sec: 1.02
833
+ [2026-02-04 04:51:11] (step=0080500) Train Loss: -2.9847, Train Steps/Sec: 1.03
834
+ [2026-02-04 04:52:49] (step=0080600) Train Loss: -2.9891, Train Steps/Sec: 1.02
835
+ [2026-02-04 04:54:27] (step=0080700) Train Loss: -2.9888, Train Steps/Sec: 1.03
836
+ [2026-02-04 04:56:04] (step=0080800) Train Loss: -2.9902, Train Steps/Sec: 1.03
837
+ [2026-02-04 04:57:42] (step=0080900) Train Loss: -2.9849, Train Steps/Sec: 1.02
838
+ [2026-02-04 04:59:20] (step=0081000) Train Loss: -2.9865, Train Steps/Sec: 1.03
839
+ [2026-02-04 05:00:58] (step=0081100) Train Loss: -2.9868, Train Steps/Sec: 1.02
840
+ [2026-02-04 05:02:36] (step=0081200) Train Loss: -2.9889, Train Steps/Sec: 1.02
841
+ [2026-02-04 05:04:14] (step=0081300) Train Loss: -2.9845, Train Steps/Sec: 1.02
842
+ [2026-02-04 05:05:52] (step=0081400) Train Loss: -2.9906, Train Steps/Sec: 1.02
843
+ [2026-02-04 05:07:29] (step=0081500) Train Loss: -2.9916, Train Steps/Sec: 1.02
844
+ [2026-02-04 05:09:08] (step=0081600) Train Loss: -2.9953, Train Steps/Sec: 1.02
845
+ [2026-02-04 05:10:46] (step=0081700) Train Loss: -2.9884, Train Steps/Sec: 1.02
846
+ [2026-02-04 05:12:24] (step=0081800) Train Loss: -2.9865, Train Steps/Sec: 1.02
847
+ [2026-02-04 05:14:01] (step=0081900) Train Loss: -2.9889, Train Steps/Sec: 1.03
848
+ [2026-02-04 05:15:39] (step=0082000) Train Loss: -2.9850, Train Steps/Sec: 1.02
849
+ [2026-02-04 05:17:17] (step=0082100) Train Loss: -2.9880, Train Steps/Sec: 1.02
850
+ [2026-02-04 05:18:55] (step=0082200) Train Loss: -2.9869, Train Steps/Sec: 1.02
851
+ [2026-02-04 05:20:33] (step=0082300) Train Loss: -2.9869, Train Steps/Sec: 1.02
852
+ [2026-02-04 05:22:10] (step=0082400) Train Loss: -2.9872, Train Steps/Sec: 1.02
853
+ [2026-02-04 05:23:49] (step=0082500) Train Loss: -2.9838, Train Steps/Sec: 1.02
854
+ [2026-02-04 05:25:27] (step=0082600) Train Loss: -2.9881, Train Steps/Sec: 1.02
855
+ [2026-02-04 05:27:04] (step=0082700) Train Loss: -2.9890, Train Steps/Sec: 1.03
856
+ [2026-02-04 05:28:42] (step=0082800) Train Loss: -2.9881, Train Steps/Sec: 1.02
857
+ [2026-02-04 05:30:19] (step=0082900) Train Loss: -2.9903, Train Steps/Sec: 1.03
858
+ [2026-02-04 05:31:58] (step=0083000) Train Loss: -2.9946, Train Steps/Sec: 1.02
859
+ [2026-02-04 05:33:36] (step=0083100) Train Loss: -2.9879, Train Steps/Sec: 1.02
860
+ [2026-02-04 05:35:14] (step=0083200) Train Loss: -2.9879, Train Steps/Sec: 1.02
861
+ [2026-02-04 05:36:52] (step=0083300) Train Loss: -2.9939, Train Steps/Sec: 1.02
862
+ [2026-02-04 05:38:30] (step=0083400) Train Loss: -2.9914, Train Steps/Sec: 1.02
863
+ [2026-02-04 05:40:07] (step=0083500) Train Loss: -2.9888, Train Steps/Sec: 1.03
Rectified_Noise/GVP-Disp/run.sh ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nohup torchrun \
2
+ --nnodes=1 \
3
+ --nproc_per_node=4 \
4
+ --rdzv_endpoint=localhost:29739 \
5
+ train_rectified_noise.py \
6
+ --depth 2 \
7
+ --results-dir results_256_gvp_disp \
8
+ --data-path /gemini/platform/public/zhaozy/hzh/datasets/Imagenet/train/ \
9
+ --ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/SiT_clean_256_GVP/base.pt \
10
+ --num-classes 1000 \
11
+ --path-type GVP \
12
+ --prediction velocity \
13
+ --disp \
14
+ > w_training1.log 2>&1 &
Rectified_Noise/GVP-Disp/test.sh ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Execute all four commands in parallel
4
+ # Each command runs in the background using &
5
+
6
+ echo "Starting all four sampling tasks in parallel..."
7
+
8
+ CUDA_VISIBLE_DEVICES=0 nohup torchrun \
9
+ --nnodes=1 \
10
+ --nproc_per_node=1 \
11
+ --rdzv_endpoint=localhost:29110 \
12
+ sample_rectified_noise.py SDE \
13
+ --depth 2 \
14
+ --sample-dir GVP_samples \
15
+ --model SiT-XL/2 \
16
+ --num-fid-samples 3000 \
17
+ --num-classes 1000 \
18
+ --global-seed 0 \
19
+ --use-sitf2 False \
20
+ --use-sitf2-before-t05 False \
21
+ --sitf2-threshold 0.0 \
22
+ --ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/SiT_clean_256_GVP/base.pt \
23
+ --sitf2-ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise-Dispersive-Loss/results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/checkpoints/0025000.pt > W_No.log 2>&1 &
24
+
25
+ CUDA_VISIBLE_DEVICES=1 nohup torchrun \
26
+ --nnodes=1 \
27
+ --nproc_per_node=1 \
28
+ --rdzv_endpoint=localhost:29150 \
29
+ sample_rectified_noise.py SDE \
30
+ --depth 2 \
31
+ --sample-dir GVP_samples \
32
+ --model SiT-XL/2 \
33
+ --num-fid-samples 3000 \
34
+ --num-classes 1000 \
35
+ --global-seed 0 \
36
+ --use-sitf2-before-t05 False \
37
+ --sitf2-threshold 1.0 \
38
+ --ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/SiT_clean_256_GVP/base.pt \
39
+ --sitf2-ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise-Dispersive-Loss/results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/checkpoints/0025000.pt > W_False.log 2>&1 &
40
+
41
+
42
+ CUDA_VISIBLE_DEVICES=2 nohup torchrun \
43
+ --nnodes=1 \
44
+ --nproc_per_node=1 \
45
+ --rdzv_endpoint=localhost:29152 \
46
+ sample_rectified_noise.py SDE \
47
+ --depth 2 \
48
+ --sample-dir GVP_samples \
49
+ --model SiT-XL/2 \
50
+ --num-fid-samples 3000 \
51
+ --num-classes 1000 \
52
+ --global-seed 0 \
53
+ --use-sitf2-before-t05 True \
54
+ --sitf2-threshold 0.5 \
55
+ --ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/SiT_clean_256_GVP/base.pt \
56
+ --sitf2-ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise-Dispersive-Loss/results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/checkpoints/0025000.pt > W_True_0.5.log 2>&1 &
57
+
58
+ CUDA_VISIBLE_DEVICES=3 nohup torchrun \
59
+ --nnodes=1 \
60
+ --nproc_per_node=1 \
61
+ --rdzv_endpoint=localhost:29121 \
62
+ sample_rectified_noise.py SDE \
63
+ --depth 2 \
64
+ --sample-dir GVP_samples \
65
+ --model SiT-XL/2 \
66
+ --num-fid-samples 3000 \
67
+ --num-classes 1000 \
68
+ --global-seed 0 \
69
+ --use-sitf2-before-t05 True \
70
+ --sitf2-threshold 0.15 \
71
+ --ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/SiT_clean_256_GVP/base.pt \
72
+ --sitf2-ckpt /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise-Dispersive-Loss/results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/checkpoints/0025000.pt > W_True_0.15.log 2>&1 &
73
+
74
+ # Wait for all background jobs to complete
75
+ echo "All tasks started. Waiting for completion..."
76
+ wait
77
+
78
+ echo "All tasks completed!"
Rectified_Noise/GVP-Disp/train_rectified_noise.py ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This source code is licensed under the license found in the
2
+ # LICENSE file in the root directory of this source tree.
3
+
4
+ """
5
+ A minimal training script for SiT using PyTorch DDP.
6
+ """
7
+ import torch
8
+ # the first flag below was False when we tested this script but True makes A100 training a lot faster:
9
+ torch.backends.cuda.matmul.allow_tf32 = True
10
+ torch.backends.cudnn.allow_tf32 = True
11
+ import torch.distributed as dist
12
+ from torch.nn.parallel import DistributedDataParallel as DDP
13
+ from torch.utils.data import DataLoader
14
+ from torch.utils.data.distributed import DistributedSampler
15
+ from torchvision.datasets import ImageFolder
16
+ from torchvision import transforms
17
+ import numpy as np
18
+ from collections import OrderedDict
19
+ from PIL import Image
20
+ from copy import deepcopy
21
+ from glob import glob
22
+ from time import time
23
+ import argparse
24
+ import logging
25
+ import os
26
+
27
+ from models import SiT, SiTF1, SiTF2, CombinedModel
28
+ from models import SiT_models
29
+ from download import find_model
30
+ from transport import create_transport, Sampler
31
+ from diffusers.models import AutoencoderKL
32
+ from train_utils import parse_transport_args
33
+
34
+
35
+
36
+ #################################################################################
37
+ # Training Helper Functions #
38
+ #################################################################################
39
+
40
+ @torch.no_grad()
41
+ def update_ema(ema_model, model, decay=0.9999):
42
+ """
43
+ Step the EMA model towards the current model.
44
+ """
45
+ ema_params = OrderedDict(ema_model.named_parameters())
46
+ model_params = OrderedDict(model.named_parameters())
47
+
48
+ for name, param in model_params.items():
49
+ # TODO: Consider applying only to params that require_grad to avoid small numerical changes of pos_embed
50
+ ema_params[name].mul_(decay).add_(param.data, alpha=1 - decay)
51
+
52
+
53
+ def requires_grad(model, flag=True):
54
+ """
55
+ Set requires_grad flag for all parameters in a model.
56
+ """
57
+ for p in model.parameters():
58
+ p.requires_grad = flag
59
+
60
+
61
+ def cleanup():
62
+ """
63
+ End DDP training.
64
+ """
65
+ dist.destroy_process_group()
66
+
67
+
68
+ def create_logger(logging_dir):
69
+ """
70
+ Create a logger that writes to a log file and stdout.
71
+ """
72
+ if dist.get_rank() == 0: # real logger
73
+ logging.basicConfig(
74
+ level=logging.INFO,
75
+ format='[\033[34m%(asctime)s\033[0m] %(message)s',
76
+ datefmt='%Y-%m-%d %H:%M:%S',
77
+ handlers=[logging.StreamHandler(), logging.FileHandler(f"{logging_dir}/log.txt")]
78
+ )
79
+ logger = logging.getLogger(__name__)
80
+ else: # dummy logger (does nothing)
81
+ logger = logging.getLogger(__name__)
82
+ logger.addHandler(logging.NullHandler())
83
+ return logger
84
+
85
+
86
+ def center_crop_arr(pil_image, image_size):
87
+ """
88
+ Center cropping implementation from ADM.
89
+ https://github.com/openai/guided-diffusion/blob/8fb3ad9197f16bbc40620447b2742e13458d2831/guided_diffusion/image_datasets.py#L126
90
+ """
91
+ while min(*pil_image.size) >= 2 * image_size:
92
+ pil_image = pil_image.resize(
93
+ tuple(x // 2 for x in pil_image.size), resample=Image.BOX
94
+ )
95
+
96
+ scale = image_size / min(*pil_image.size)
97
+ pil_image = pil_image.resize(
98
+ tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
99
+ )
100
+
101
+ arr = np.array(pil_image)
102
+ crop_y = (arr.shape[0] - image_size) // 2
103
+ crop_x = (arr.shape[1] - image_size) // 2
104
+ return Image.fromarray(arr[crop_y: crop_y + image_size, crop_x: crop_x + image_size])
105
+
106
+
107
+ #################################################################################
108
+ # Training Loop #
109
+ #################################################################################
110
+
111
+ def main(args):
112
+ """
113
+ Trains a new SiT model.
114
+ """
115
+ assert torch.cuda.is_available(), "Training currently requires at least one GPU."
116
+
117
+ dist.init_process_group("nccl")
118
+ assert args.global_batch_size % dist.get_world_size() == 0, f"Batch size must be divisible by world size."
119
+ rank = dist.get_rank()
120
+ device = rank % torch.cuda.device_count()
121
+ seed = args.global_seed * dist.get_world_size() + rank
122
+ torch.manual_seed(seed)
123
+ torch.cuda.set_device(device)
124
+ print(f"Starting rank={rank}, seed={seed}, world_size={dist.get_world_size()}.")
125
+ local_batch_size = int(args.global_batch_size // dist.get_world_size())
126
+ learn_mu = args.learn_mu
127
+ depth = args.depth
128
+ # Setup an experiment folder:
129
+ if rank == 0:
130
+ os.makedirs(args.results_dir, exist_ok=True)
131
+ experiment_index = len(glob(f"{args.results_dir}/*"))
132
+ model_string_name = args.model.replace("/", "-")
133
+ if learn_mu:
134
+ experiment_name = f"depth-mu-{args.depth}-{experiment_index:03d}-{model_string_name}-" \
135
+ f"{args.path_type}-{args.prediction}-{args.loss_weight}"
136
+ else:
137
+ experiment_name = f"depth-sigma-{args.depth}-{experiment_index:03d}-{model_string_name}-" \
138
+ f"{args.path_type}-{args.prediction}-{args.loss_weight}"
139
+ experiment_dir = f"{args.results_dir}/{experiment_name}"
140
+ checkpoint_dir = f"{experiment_dir}/checkpoints"
141
+ os.makedirs(checkpoint_dir, exist_ok=True)
142
+ logger = create_logger(experiment_dir)
143
+ logger.info(f"Experiment directory created at {experiment_dir}")
144
+
145
+ else:
146
+ logger = create_logger(None)
147
+
148
+ # Create models:
149
+ assert args.image_size % 8 == 0, "Image size must be divisible by 8 (for the VAE encoder)."
150
+ latent_size = args.image_size // 8
151
+
152
+ # Get model configuration based on args.model
153
+ model_config = SiT_models[args.model]
154
+ model_kwargs = model_config().__dict__ # Get the default parameters for this model
155
+
156
+ # Extract parameters from the model configuration based on the model name
157
+ # Model names follow the format like 'SiT-XL/2', 'SiT-B/4', etc.
158
+ model_name = args.model
159
+ if 'XL' in model_name:
160
+ hidden_size, depth, num_heads = 1152, 28, 16
161
+ elif 'L' in model_name:
162
+ hidden_size, depth, num_heads = 1024, 24, 16
163
+ elif 'B' in model_name:
164
+ hidden_size, depth, num_heads = 768, 12, 12
165
+ elif 'S' in model_name:
166
+ hidden_size, depth, num_heads = 384, 12, 6
167
+ else:
168
+ # Default fallback
169
+ hidden_size, depth, num_heads = 768, 12, 12
170
+
171
+ # Extract patch size from model name like 'SiT-XL/2' -> patch_size = 2
172
+ patch_size = int(model_name.split('/')[-1])
173
+
174
+ sitf1 = SiTF1(
175
+ input_size=latent_size,
176
+ patch_size=patch_size,
177
+ in_channels=4,
178
+ hidden_size=hidden_size,
179
+ depth=depth,
180
+ num_heads=num_heads,
181
+ mlp_ratio=4.0,
182
+ class_dropout_prob=0.1,
183
+ num_classes=args.num_classes,
184
+ learn_sigma=False
185
+ ).to(device)
186
+ sit = SiT(
187
+ input_size=latent_size,
188
+ patch_size=patch_size,
189
+ in_channels=4,
190
+ hidden_size=hidden_size,
191
+ depth=depth,
192
+ num_heads=num_heads,
193
+ mlp_ratio=4.0,
194
+ class_dropout_prob=0.1,
195
+ num_classes=args.num_classes,
196
+ learn_sigma=False
197
+ ).to(device)
198
+ sitf2 = SiTF2(
199
+ input_size=latent_size,
200
+ hidden_size=hidden_size,
201
+ out_channels=8,
202
+ patch_size=patch_size,
203
+ num_heads=num_heads,
204
+ mlp_ratio=4.0,
205
+ depth=args.depth, # Use the depth for sitf2 as specified by command line
206
+ learn_sigma=True,
207
+ num_classes=args.num_classes,
208
+ learn_mu=learn_mu
209
+ ).to(device)
210
+ sitf2_ema = deepcopy(sitf2).to(device)
211
+ combined_model = CombinedModel(sitf1, sitf2).to(device)
212
+
213
+ if args.ckpt is not None:
214
+ ckpt_path = args.ckpt
215
+ state_dict = find_model(ckpt_path)
216
+ try:
217
+ sitf1.load_state_dict(state_dict["model"], strict=False)
218
+ sit.load_state_dict(state_dict["model"], strict=False)
219
+ except:
220
+ sitf1.load_state_dict(state_dict, strict=False)
221
+ sit.load_state_dict(state_dict, strict=False)
222
+
223
+
224
+ requires_grad(sitf1, False)
225
+ requires_grad(sit, False)
226
+ requires_grad(sitf2, True)
227
+
228
+ opt = torch.optim.AdamW(sitf2.parameters(), lr=1e-4, weight_decay=0)
229
+ # Do NOT wrap sitf2 separately in DDP (avoids double-wrapping submodules); wrap only the combined model.
230
+ combined_model = DDP(combined_model, device_ids=[rank], find_unused_parameters=True)
231
+
232
+ # Create transport object: path_type determines the loss form used in training_losses()
233
+ # path_type options: "Linear", "GVP", "VP" - each corresponds to a different loss calculation method
234
+ transport = create_transport(
235
+ args.path_type, # This directly affects how loss is computed in training_losses()
236
+ args.prediction,
237
+ args.loss_weight,
238
+ args.train_eps,
239
+ args.sample_eps,
240
+ args.disp_loss_weight,
241
+ args.temperature
242
+ )
243
+ transport_sampler = Sampler(transport)
244
+ vae = AutoencoderKL.from_pretrained(f"stabilityai/sd-vae-ft-{args.vae}").to(device)
245
+ logger.info(f"Combined_model Parameters: {sum(p.numel() for p in combined_model.parameters()):,}")
246
+
247
+ grad_params = [(n, p.numel()) for n, p in combined_model.named_parameters() if p.requires_grad]
248
+ logger.info(f"Total trainable parameters: {sum(cnt for _, cnt in grad_params):,}")
249
+
250
+ # Setup data:
251
+ transform = transforms.Compose([
252
+ transforms.Lambda(lambda pil_image: center_crop_arr(pil_image, args.image_size)),
253
+ transforms.RandomHorizontalFlip(),
254
+ transforms.ToTensor(),
255
+ transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True)
256
+ ])
257
+ dataset = ImageFolder(args.data_path, transform=transform)
258
+ sampler = DistributedSampler(
259
+ dataset,
260
+ num_replicas=dist.get_world_size(),
261
+ rank=rank,
262
+ shuffle=True,
263
+ seed=args.global_seed
264
+ )
265
+ loader = DataLoader(
266
+ dataset,
267
+ batch_size=local_batch_size,
268
+ shuffle=False,
269
+ sampler=sampler,
270
+ num_workers=args.num_workers,
271
+ pin_memory=True,
272
+ drop_last=True
273
+ )
274
+ logger.info(f"Dataset contains {len(dataset):,} images ({args.data_path})")
275
+ # Ensure EMA updates target the correct base model (whether sitf2 is wrapped or not)
276
+ base_sitf2 = sitf2.module if isinstance(sitf2, torch.nn.parallel.DistributedDataParallel) else sitf2
277
+ update_ema(sitf2_ema, base_sitf2, decay=0)
278
+ sitf1.eval()
279
+ sit.eval()
280
+ sitf2.train()
281
+ sitf2_ema.eval()
282
+
283
+ train_steps = 0
284
+ log_steps = 0
285
+ running_loss = 0
286
+ start_time = time()
287
+ ys = torch.randint(1000, size=(local_batch_size,), device=device)
288
+ use_cfg = args.cfg_scale > 1.0
289
+ n = ys.size(0)
290
+ zs = torch.randn(n, 4, latent_size, latent_size, device=device)
291
+ if use_cfg:
292
+ zs = torch.cat([zs, zs], 0)
293
+ y_null = torch.tensor([1000] * n, device=device)
294
+ ys = torch.cat([ys, y_null], 0)
295
+ sample_model_kwargs = dict(y=ys, cfg_scale=args.cfg_scale)
296
+ model_fn = sitf1.forward_with_cfg
297
+ else:
298
+ sample_model_kwargs = dict(y=ys)
299
+ model_fn = sitf1.forward
300
+ def combined_sampling_model(x, t, y=None, **kwargs):
301
+ with torch.no_grad():
302
+ sit_out = sit.forward(x, t, y)
303
+ combined_out = combined_model.forward(x, t, y)
304
+ return sit_out + combined_out
305
+ logger.info(f"Training for {args.epochs} epochs...")
306
+ for epoch in range(args.epochs):
307
+ sampler.set_epoch(epoch)
308
+ logger.info(f"Beginning epoch {epoch}...")
309
+ for x, y in loader:
310
+ x = x.to(device)
311
+ y = y.to(device)
312
+ with torch.no_grad():
313
+ x_latent = vae.encode(x).latent_dist.sample().mul_(0.18215)
314
+ model_kwargs = dict(y=y, return_act=args.disp)
315
+ # Compute training loss: the loss form depends on args.path_type (Linear/GVP/VP)
316
+ # Each path_type uses a different mathematical formulation for the transport loss
317
+ loss_dict = transport.training_losses(sit, x_latent, model_noise=combined_model, model_kwargs=model_kwargs)
318
+ loss = loss_dict["loss"].mean()
319
+ opt.zero_grad()
320
+ loss.backward()
321
+ opt.step()
322
+ # Update EMA of the trainable sitf2 base model
323
+ update_ema(sitf2_ema, base_sitf2)
324
+ running_loss += loss.item()
325
+ log_steps += 1
326
+ train_steps += 1
327
+ if train_steps % args.log_every == 0:
328
+ torch.cuda.synchronize()
329
+ end_time = time()
330
+ steps_per_sec = log_steps / (end_time - start_time)
331
+ avg_loss = torch.tensor(running_loss / log_steps, device=device)
332
+ dist.all_reduce(avg_loss, op=dist.ReduceOp.SUM)
333
+ avg_loss = avg_loss.item() / dist.get_world_size()
334
+ logger.info(f"(step={train_steps:07d}) Train Loss: {avg_loss:.4f}, Train Steps/Sec: {steps_per_sec:.2f}")
335
+ running_loss = 0
336
+ log_steps = 0
337
+ start_time = time()
338
+ if train_steps % args.ckpt_every == 0 and train_steps > 0:
339
+ print(train_steps)
340
+ if rank == 0:
341
+ checkpoint = {
342
+ "model": sitf2.state_dict(),
343
+ "ema": sitf2.state_dict(),
344
+ "opt": opt.state_dict(),
345
+ "args": args
346
+ }
347
+ checkpoint_path = f"{checkpoint_dir}/{train_steps:07d}.pt"
348
+ torch.save(checkpoint, checkpoint_path)
349
+ logger.info(f"Saved checkpoint to {checkpoint_path}")
350
+ dist.barrier()
351
+
352
+ if (train_steps % args.sample_every == 0 )and train_steps > 0:
353
+ logger.info("Generating EMA samples...")
354
+ if epoch == args.epochs:
355
+ break
356
+
357
+ sitf1.eval()
358
+ sit.eval()
359
+ sitf2.eval()
360
+ logger.info("Final sampling done.")
361
+
362
+ logger.info("Done!")
363
+ cleanup()
364
+
365
+
366
+ def save_samples_grid(out_samples, epoch, experiment_index, args, experiment_name, rank):
367
+ if rank == 0:
368
+ import os
369
+ import numpy as np
370
+ from PIL import Image
371
+ parent_dir = os.path.dirname(args.results_dir)
372
+ pic_dir = os.path.join(parent_dir, "pic")
373
+ os.makedirs(pic_dir, exist_ok=True)
374
+ experiment_pic_dir = os.path.join(pic_dir, experiment_name)
375
+ os.makedirs(experiment_pic_dir, exist_ok=True)
376
+ samples_np = torch.clamp(127.5 * out_samples + 128.0, 0, 255).permute(0, 2, 3, 1).to("cpu", dtype=torch.uint8).numpy()
377
+ n_samples = samples_np.shape[0]
378
+ grid_size = int(np.ceil(np.sqrt(n_samples)))
379
+ canvas_size = grid_size * args.image_size
380
+ canvas = np.zeros((canvas_size, canvas_size, 3), dtype=np.uint8)
381
+ for i, sample in enumerate(samples_np):
382
+ row = i // grid_size
383
+ col = i % grid_size
384
+ canvas[row*args.image_size:(row+1)*args.image_size, col*args.image_size:(col+1)*args.image_size] = sample
385
+ combined_image = Image.fromarray(canvas)
386
+ combined_image.save(os.path.join(experiment_pic_dir, f"epoch_{epoch:04d}_combined.png"))
387
+
388
+
389
+ if __name__ == "__main__":
390
+ parser = argparse.ArgumentParser()
391
+ parser.add_argument("--data-path", type=str, required=True)
392
+ parser.add_argument("--results-dir", type=str, default="results_256_linear")
393
+ parser.add_argument("--model", type=str, choices=list(SiT_models.keys()), default="SiT-XL/2")
394
+ parser.add_argument("--image-size", type=int, choices=[256, 512], default=256)
395
+ parser.add_argument("--num-classes", type=int, default=3)
396
+ parser.add_argument("--epochs", type=int, default=100000)
397
+ parser.add_argument("--global-batch-size", type=int, default=256)
398
+ parser.add_argument("--global-seed", type=int, default=0)
399
+ parser.add_argument("--vae", type=str, choices=["ema", "mse"], default="ema") # Choice doesn't affect training
400
+ parser.add_argument("--num-workers", type=int, default=4)
401
+ parser.add_argument("--log-every", type=int, default=100)
402
+ parser.add_argument("--ckpt-every", type=int, default=25000)
403
+ parser.add_argument("--sample-every", type=int, default=25192)
404
+ parser.add_argument("--cfg-scale", type=float, default=4.0)
405
+ parser.add_argument("--ckpt", type=str, default='/gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise/2000000.pt',
406
+ help="Optional path to a custom SiT checkpoint")
407
+ parser.add_argument("--learn-mu", action=argparse.BooleanOptionalAction, default=True,
408
+ help="Whether to learn mu parameter")
409
+ parser.add_argument("--depth", type=int, default=1,
410
+ help="Depth parameter for SiTF2 model")
411
+ parser.add_argument("--disp", action="store_true",
412
+ help="Toggle to enable Dispersive Loss")
413
+ parser.add_argument("--disp-loss-weight", type=float, default=0.5,
414
+ help="Weight λ for dispersive loss (default: 0.5)")
415
+ parser.add_argument("--temperature", type=float, default=1.0,
416
+ help="Temperature τ for dispersive loss (default: 1.0)")
417
+
418
+ # Transport arguments (added by parse_transport_args):
419
+ # --path-type: Type of path for loss calculation (default: "GVP")
420
+ # Choices: "Linear" (linear interpolation), "GVP" (Geodesic Velocity Path), "VP" (Velocity Path)
421
+ # IMPORTANT: This parameter directly affects the loss form computed by transport.training_losses()
422
+ # The path_type determines how the transport loss is calculated during training.
423
+ # Make sure to use the correct path_type that matches your training objective.
424
+ # --prediction: Type of prediction (default: "velocity")
425
+ # --loss-weight: Loss weight type (default: None)
426
+ # --sample-eps, --train-eps: Epsilon values for sampling and training
427
+ parse_transport_args(parser)
428
+ args = parser.parse_args()
429
+ main(args)
Rectified_Noise/GVP-Disp/transport/__init__.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .transport import Transport, ModelType, WeightType, PathType, Sampler
2
+
3
+ def create_transport(
4
+ path_type='Linear',
5
+ prediction="velocity",
6
+ loss_weight=None,
7
+ train_eps=None,
8
+ sample_eps=None,
9
+ disp_loss_weight=0.5,
10
+ temperature=1.0,
11
+ ):
12
+ """function for creating Transport object
13
+ **Note**: model prediction defaults to velocity
14
+ Args:
15
+ - path_type: type of path to use; default to linear
16
+ - learn_score: set model prediction to score
17
+ - learn_noise: set model prediction to noise
18
+ - velocity_weighted: weight loss by velocity weight
19
+ - likelihood_weighted: weight loss by likelihood weight
20
+ - train_eps: small epsilon for avoiding instability during training
21
+ - sample_eps: small epsilon for avoiding instability during sampling
22
+ - disp_loss_weight: weight λ for dispersive loss (default: 0.5)
23
+ - temperature: temperature τ for dispersive loss (default: 1.0)
24
+ """
25
+
26
+ if prediction == "noise":
27
+ model_type = ModelType.NOISE
28
+ elif prediction == "score":
29
+ model_type = ModelType.SCORE
30
+ else:
31
+ model_type = ModelType.VELOCITY
32
+
33
+ if loss_weight == "velocity":
34
+ loss_type = WeightType.VELOCITY
35
+ elif loss_weight == "likelihood":
36
+ loss_type = WeightType.LIKELIHOOD
37
+ else:
38
+ loss_type = WeightType.NONE
39
+
40
+ path_choice = {
41
+ "Linear": PathType.LINEAR,
42
+ "GVP": PathType.GVP,
43
+ "VP": PathType.VP,
44
+ }
45
+
46
+ path_type = path_choice[path_type]
47
+
48
+ if (path_type in [PathType.VP]):
49
+ train_eps_new = 1e-5 if train_eps is None else train_eps
50
+ sample_eps_new = 1e-3 if sample_eps is None else sample_eps
51
+ train_eps, sample_eps = train_eps_new, sample_eps_new
52
+ elif (path_type in [PathType.GVP, PathType.LINEAR] and model_type != ModelType.VELOCITY):
53
+ train_eps_new = 1e-3 if train_eps is None else train_eps
54
+ sample_eps_new = 1e-3 if sample_eps is None else sample_eps
55
+ train_eps, sample_eps = train_eps_new, sample_eps_new
56
+ else: # velocity & [GVP, LINEAR] is stable everywhere
57
+ train_eps = 0
58
+ sample_eps = 0
59
+
60
+ # create flow state
61
+ state = Transport(
62
+ model_type=model_type,
63
+ path_type=path_type,
64
+ loss_type=loss_type,
65
+ train_eps=train_eps,
66
+ sample_eps=sample_eps,
67
+ disp_loss_weight=disp_loss_weight,
68
+ temperature=temperature,
69
+ )
70
+
71
+ return state
Rectified_Noise/GVP-Disp/transport/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (2.44 kB). View file
 
Rectified_Noise/GVP-Disp/transport/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (1.55 kB). View file
 
Rectified_Noise/GVP-Disp/transport/__pycache__/integrators.cpython-312.pyc ADDED
Binary file (6.32 kB). View file
 
Rectified_Noise/GVP-Disp/transport/__pycache__/integrators.cpython-38.pyc ADDED
Binary file (3.59 kB). View file
 
Rectified_Noise/GVP-Disp/transport/__pycache__/path.cpython-312.pyc ADDED
Binary file (11.3 kB). View file
 
Rectified_Noise/GVP-Disp/transport/__pycache__/path.cpython-38.pyc ADDED
Binary file (7.93 kB). View file
 
Rectified_Noise/GVP-Disp/transport/__pycache__/transport.cpython-312.pyc ADDED
Binary file (22.8 kB). View file
 
Rectified_Noise/GVP-Disp/transport/__pycache__/transport.cpython-38.pyc ADDED
Binary file (13.2 kB). View file
 
Rectified_Noise/GVP-Disp/transport/__pycache__/utils.cpython-312.pyc ADDED
Binary file (1.9 kB). View file
 
Rectified_Noise/GVP-Disp/transport/__pycache__/utils.cpython-38.pyc ADDED
Binary file (1.26 kB). View file
 
Rectified_Noise/GVP-Disp/transport/integrators.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch as th
3
+ import torch.nn as nn
4
+ from torchdiffeq import odeint
5
+ from functools import partial
6
+ from tqdm import tqdm
7
+
8
+ class sde:
9
+ """SDE solver class"""
10
+ def __init__(
11
+ self,
12
+ drift,
13
+ diffusion,
14
+ *,
15
+ t0,
16
+ t1,
17
+ num_steps,
18
+ sampler_type,
19
+ ):
20
+ assert t0 < t1, "SDE sampler has to be in forward time"
21
+
22
+ self.num_timesteps = num_steps
23
+ self.t = th.linspace(t0, t1, num_steps)
24
+ self.dt = self.t[1] - self.t[0]
25
+ self.drift = drift
26
+ self.diffusion = diffusion
27
+ self.sampler_type = sampler_type
28
+
29
+ def __Euler_Maruyama_step(self, x, mean_x, t, model, **model_kwargs):
30
+ w_cur = th.randn(x.size()).to(x)
31
+ t = th.ones(x.size(0)).to(x) * t
32
+ dw = w_cur * th.sqrt(self.dt)
33
+ drift = self.drift(x, t, model, **model_kwargs)
34
+ diffusion = self.diffusion(x, t)
35
+ mean_x = x + drift * self.dt
36
+ x = mean_x + th.sqrt(2 * diffusion) * dw
37
+ return x, mean_x
38
+
39
+ def __Heun_step(self, x, _, t, model, **model_kwargs):
40
+ w_cur = th.randn(x.size()).to(x)
41
+ dw = w_cur * th.sqrt(self.dt)
42
+ t_cur = th.ones(x.size(0)).to(x) * t
43
+ diffusion = self.diffusion(x, t_cur)
44
+ xhat = x + th.sqrt(2 * diffusion) * dw
45
+ K1 = self.drift(xhat, t_cur, model, **model_kwargs)
46
+ xp = xhat + self.dt * K1
47
+ K2 = self.drift(xp, t_cur + self.dt, model, **model_kwargs)
48
+ return xhat + 0.5 * self.dt * (K1 + K2), xhat # at last time point we do not perform the heun step
49
+
50
+ def __forward_fn(self):
51
+ """TODO: generalize here by adding all private functions ending with steps to it"""
52
+ sampler_dict = {
53
+ "Euler": self.__Euler_Maruyama_step,
54
+ "Heun": self.__Heun_step,
55
+ }
56
+
57
+ try:
58
+ sampler = sampler_dict[self.sampler_type]
59
+ except:
60
+ raise NotImplementedError("Smapler type not implemented.")
61
+
62
+ return sampler
63
+
64
+ def sample(self, init, model, **model_kwargs):
65
+ """forward loop of sde"""
66
+ x = init
67
+ mean_x = init
68
+ samples = []
69
+ sampler = self.__forward_fn()
70
+ for ti in self.t[:-1]:
71
+ with th.no_grad():
72
+ x, mean_x = sampler(x, mean_x, ti, model, **model_kwargs)
73
+ samples.append(x)
74
+
75
+ return samples
76
+
77
+ class ode:
78
+ """ODE solver class"""
79
+ def __init__(
80
+ self,
81
+ drift,
82
+ *,
83
+ t0,
84
+ t1,
85
+ sampler_type,
86
+ num_steps,
87
+ atol,
88
+ rtol,
89
+ ):
90
+ assert t0 < t1, "ODE sampler has to be in forward time"
91
+
92
+ self.drift = drift
93
+ self.t = th.linspace(t0, t1, num_steps)
94
+ self.atol = atol
95
+ self.rtol = rtol
96
+ self.sampler_type = sampler_type
97
+
98
+ def sample(self, x, model, **model_kwargs):
99
+
100
+ device = x[0].device if isinstance(x, tuple) else x.device
101
+ def _fn(t, x):
102
+ t = th.ones(x[0].size(0)).to(device) * t if isinstance(x, tuple) else th.ones(x.size(0)).to(device) * t
103
+ model_output = self.drift(x, t, model, **model_kwargs)
104
+ return model_output
105
+
106
+ t = self.t.to(device)
107
+ atol = [self.atol] * len(x) if isinstance(x, tuple) else [self.atol]
108
+ rtol = [self.rtol] * len(x) if isinstance(x, tuple) else [self.rtol]
109
+ samples = odeint(
110
+ _fn,
111
+ x,
112
+ t,
113
+ method=self.sampler_type,
114
+ atol=atol,
115
+ rtol=rtol
116
+ )
117
+ return samples
Rectified_Noise/GVP-Disp/transport/path.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch as th
2
+ import numpy as np
3
+ from functools import partial
4
+
5
+ def expand_t_like_x(t, x):
6
+ """Function to reshape time t to broadcastable dimension of x
7
+ Args:
8
+ t: [batch_dim,], time vector
9
+ x: [batch_dim,...], data point
10
+ """
11
+ dims = [1] * (len(x.size()) - 1)
12
+ t = t.view(t.size(0), *dims)
13
+ return t
14
+
15
+
16
+ #################### Coupling Plans ####################
17
+
18
+ class ICPlan:
19
+ """Linear Coupling Plan"""
20
+ def __init__(self, sigma=0.0):
21
+ self.sigma = sigma
22
+
23
+ def compute_alpha_t(self, t):
24
+ """Compute the data coefficient along the path"""
25
+ return t, 1
26
+
27
+ def compute_sigma_t(self, t):
28
+ """Compute the noise coefficient along the path"""
29
+ return 1 - t, -1
30
+
31
+ def compute_d_alpha_alpha_ratio_t(self, t):
32
+ """Compute the ratio between d_alpha and alpha"""
33
+ return 1 / t
34
+
35
+ def compute_drift(self, x, t):
36
+ """We always output sde according to score parametrization; """
37
+ t = expand_t_like_x(t, x)
38
+ alpha_ratio = self.compute_d_alpha_alpha_ratio_t(t)
39
+ sigma_t, d_sigma_t = self.compute_sigma_t(t)
40
+ drift = alpha_ratio * x
41
+ diffusion = alpha_ratio * (sigma_t ** 2) - sigma_t * d_sigma_t
42
+
43
+ return -drift, diffusion
44
+
45
+ def compute_diffusion(self, x, t, form="constant", norm=1.0):
46
+ """Compute the diffusion term of the SDE
47
+ Args:
48
+ x: [batch_dim, ...], data point
49
+ t: [batch_dim,], time vector
50
+ form: str, form of the diffusion term
51
+ norm: float, norm of the diffusion term
52
+ """
53
+ t = expand_t_like_x(t, x)
54
+ choices = {
55
+ "constant": norm,
56
+ "SBDM": norm * self.compute_drift(x, t)[1],
57
+ "sigma": norm * self.compute_sigma_t(t)[0],
58
+ "linear": norm * (1 - t),
59
+ "decreasing": 0.25 * (norm * th.cos(np.pi * t) + 1) ** 2,
60
+ "inccreasing-decreasing": norm * th.sin(np.pi * t) ** 2,
61
+ }
62
+
63
+ try:
64
+ diffusion = choices[form]
65
+ except KeyError:
66
+ raise NotImplementedError(f"Diffusion form {form} not implemented")
67
+
68
+ return diffusion
69
+
70
+ def get_score_from_velocity(self, velocity, x, t):
71
+ """Wrapper function: transfrom velocity prediction model to score
72
+ Args:
73
+ velocity: [batch_dim, ...] shaped tensor; velocity model output
74
+ x: [batch_dim, ...] shaped tensor; x_t data point
75
+ t: [batch_dim,] time tensor
76
+ """
77
+ t = expand_t_like_x(t, x)
78
+ alpha_t, d_alpha_t = self.compute_alpha_t(t)
79
+ sigma_t, d_sigma_t = self.compute_sigma_t(t)
80
+ mean = x
81
+ reverse_alpha_ratio = alpha_t / d_alpha_t
82
+ var = sigma_t**2 - reverse_alpha_ratio * d_sigma_t * sigma_t
83
+ score = (reverse_alpha_ratio * velocity - mean) / var
84
+ return score
85
+
86
+ def get_noise_from_velocity(self, velocity, x, t):
87
+ """Wrapper function: transfrom velocity prediction model to denoiser
88
+ Args:
89
+ velocity: [batch_dim, ...] shaped tensor; velocity model output
90
+ x: [batch_dim, ...] shaped tensor; x_t data point
91
+ t: [batch_dim,] time tensor
92
+ """
93
+ t = expand_t_like_x(t, x)
94
+ alpha_t, d_alpha_t = self.compute_alpha_t(t)
95
+ sigma_t, d_sigma_t = self.compute_sigma_t(t)
96
+ mean = x
97
+ reverse_alpha_ratio = alpha_t / d_alpha_t
98
+ var = reverse_alpha_ratio * d_sigma_t - sigma_t
99
+ noise = (reverse_alpha_ratio * velocity - mean) / var
100
+ return noise
101
+
102
+ def get_velocity_from_score(self, score, x, t):
103
+ """Wrapper function: transfrom score prediction model to velocity
104
+ Args:
105
+ score: [batch_dim, ...] shaped tensor; score model output
106
+ x: [batch_dim, ...] shaped tensor; x_t data point
107
+ t: [batch_dim,] time tensor
108
+ """
109
+ t = expand_t_like_x(t, x)
110
+ drift, var = self.compute_drift(x, t)
111
+ velocity = var * score - drift
112
+ return velocity
113
+
114
+ def compute_mu_t(self, t, x0, x1):
115
+ """Compute the mean of time-dependent density p_t"""
116
+ t = expand_t_like_x(t, x1)
117
+ alpha_t, _ = self.compute_alpha_t(t)
118
+ sigma_t, _ = self.compute_sigma_t(t)
119
+ return alpha_t * x1 + sigma_t * x0
120
+
121
+ def compute_xt(self, t, x0, x1):
122
+ """Sample xt from time-dependent density p_t; rng is required"""
123
+ xt = self.compute_mu_t(t, x0, x1)
124
+ return xt
125
+
126
+ def compute_ut(self, t, x0, x1, xt):
127
+ """Compute the vector field corresponding to p_t"""
128
+ t = expand_t_like_x(t, x1)
129
+ _, d_alpha_t = self.compute_alpha_t(t)
130
+ _, d_sigma_t = self.compute_sigma_t(t)
131
+ return d_alpha_t * x1 + d_sigma_t * x0
132
+
133
+ def plan(self, t, x0, x1):
134
+ xt = self.compute_xt(t, x0, x1)
135
+ ut = self.compute_ut(t, x0, x1, xt)
136
+ return t, xt, ut
137
+
138
+
139
+ class VPCPlan(ICPlan):
140
+ """class for VP path flow matching"""
141
+
142
+ def __init__(self, sigma_min=0.1, sigma_max=20.0):
143
+ self.sigma_min = sigma_min
144
+ self.sigma_max = sigma_max
145
+ self.log_mean_coeff = lambda t: -0.25 * ((1 - t) ** 2) * (self.sigma_max - self.sigma_min) - 0.5 * (1 - t) * self.sigma_min
146
+ self.d_log_mean_coeff = lambda t: 0.5 * (1 - t) * (self.sigma_max - self.sigma_min) + 0.5 * self.sigma_min
147
+
148
+
149
+ def compute_alpha_t(self, t):
150
+ """Compute coefficient of x1"""
151
+ alpha_t = self.log_mean_coeff(t)
152
+ alpha_t = th.exp(alpha_t)
153
+ d_alpha_t = alpha_t * self.d_log_mean_coeff(t)
154
+ return alpha_t, d_alpha_t
155
+
156
+ def compute_sigma_t(self, t):
157
+ """Compute coefficient of x0"""
158
+ p_sigma_t = 2 * self.log_mean_coeff(t)
159
+ sigma_t = th.sqrt(1 - th.exp(p_sigma_t))
160
+ d_sigma_t = th.exp(p_sigma_t) * (2 * self.d_log_mean_coeff(t)) / (-2 * sigma_t)
161
+ return sigma_t, d_sigma_t
162
+
163
+ def compute_d_alpha_alpha_ratio_t(self, t):
164
+ """Special purposed function for computing numerical stabled d_alpha_t / alpha_t"""
165
+ return self.d_log_mean_coeff(t)
166
+
167
+ def compute_drift(self, x, t):
168
+ """Compute the drift term of the SDE"""
169
+ t = expand_t_like_x(t, x)
170
+ beta_t = self.sigma_min + (1 - t) * (self.sigma_max - self.sigma_min)
171
+ return -0.5 * beta_t * x, beta_t / 2
172
+
173
+
174
+ class GVPCPlan(ICPlan):
175
+ def __init__(self, sigma=0.0):
176
+ super().__init__(sigma)
177
+
178
+ def compute_alpha_t(self, t):
179
+ """Compute coefficient of x1"""
180
+ alpha_t = th.sin(t * np.pi / 2)
181
+ d_alpha_t = np.pi / 2 * th.cos(t * np.pi / 2)
182
+ return alpha_t, d_alpha_t
183
+
184
+ def compute_sigma_t(self, t):
185
+ """Compute coefficient of x0"""
186
+ sigma_t = th.cos(t * np.pi / 2)
187
+ d_sigma_t = -np.pi / 2 * th.sin(t * np.pi / 2)
188
+ return sigma_t, d_sigma_t
189
+
190
+ def compute_d_alpha_alpha_ratio_t(self, t):
191
+ """Special purposed function for computing numerical stabled d_alpha_t / alpha_t"""
192
+ return np.pi / (2 * th.tan(t * np.pi / 2))
Rectified_Noise/GVP-Disp/transport/transport.py ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch as th
2
+ import numpy as np
3
+ import logging
4
+
5
+ import enum
6
+
7
+ from . import path
8
+ from .utils import EasyDict, log_state, mean_flat
9
+ from .integrators import ode, sde
10
+
11
+ class ModelType(enum.Enum):
12
+ """
13
+ Which type of output the model predicts.
14
+ """
15
+
16
+ NOISE = enum.auto() # the model predicts epsilon
17
+ SCORE = enum.auto() # the model predicts \nabla \log p(x)
18
+ VELOCITY = enum.auto() # the model predicts v(x)
19
+
20
+ class PathType(enum.Enum):
21
+ """
22
+ Which type of path to use.
23
+ """
24
+
25
+ LINEAR = enum.auto()
26
+ GVP = enum.auto()
27
+ VP = enum.auto()
28
+
29
+ class WeightType(enum.Enum):
30
+ """
31
+ Which type of weighting to use.
32
+ """
33
+
34
+ NONE = enum.auto()
35
+ VELOCITY = enum.auto()
36
+ LIKELIHOOD = enum.auto()
37
+
38
+
39
+ class Transport:
40
+
41
+ def __init__(
42
+ self,
43
+ *,
44
+ model_type,
45
+ path_type,
46
+ loss_type,
47
+ train_eps,
48
+ sample_eps,
49
+ disp_loss_weight=0.5,
50
+ temperature=1.0,
51
+ ):
52
+ path_options = {
53
+ PathType.LINEAR: path.ICPlan,
54
+ PathType.GVP: path.GVPCPlan,
55
+ PathType.VP: path.VPCPlan,
56
+ }
57
+
58
+ self.loss_type = loss_type
59
+ self.model_type = model_type
60
+ self.path_sampler = path_options[path_type]()
61
+ self.train_eps = train_eps
62
+ self.sample_eps = sample_eps
63
+ self.disp_loss_weight = disp_loss_weight # λ: weight for dispersive loss
64
+ self.temperature = temperature # τ: temperature parameter
65
+
66
+ def prior_logp(self, z):
67
+ '''
68
+ Standard multivariate normal prior
69
+ Assume z is batched
70
+ '''
71
+ shape = th.tensor(z.size())
72
+ N = th.prod(shape[1:])
73
+ _fn = lambda x: -N / 2. * np.log(2 * np.pi) - th.sum(x ** 2) / 2.
74
+ return th.vmap(_fn)(z)
75
+
76
+
77
+ def check_interval(
78
+ self,
79
+ train_eps,
80
+ sample_eps,
81
+ *,
82
+ diffusion_form="SBDM",
83
+ sde=False,
84
+ reverse=False,
85
+ eval=False,
86
+ last_step_size=0.0,
87
+ ):
88
+ t0 = 0
89
+ t1 = 1
90
+ eps = train_eps if not eval else sample_eps
91
+ if (type(self.path_sampler) in [path.VPCPlan]):
92
+
93
+ t1 = 1 - eps if (not sde or last_step_size == 0) else 1 - last_step_size
94
+
95
+ elif (type(self.path_sampler) in [path.ICPlan, path.GVPCPlan]) \
96
+ and (self.model_type != ModelType.VELOCITY or sde): # avoid numerical issue by taking a first semi-implicit step
97
+
98
+ t0 = eps if (diffusion_form == "SBDM" and sde) or self.model_type != ModelType.VELOCITY else 0
99
+ t1 = 1 - eps if (not sde or last_step_size == 0) else 1 - last_step_size
100
+
101
+ if reverse:
102
+ t0, t1 = 1 - t0, 1 - t1
103
+
104
+ return t0, t1
105
+
106
+
107
+ def sample(self, x1):
108
+ """Sampling x0 & t based on shape of x1 (if needed)
109
+ Args:
110
+ x1 - data point; [batch, *dim]
111
+ """
112
+
113
+ x0 = th.randn_like(x1)
114
+ t0, t1 = self.check_interval(self.train_eps, self.sample_eps)
115
+ t = th.rand((x1.shape[0],)) * (t1 - t0) + t0
116
+ t = t.to(x1)
117
+ return t, x0, x1
118
+
119
+ def disp_loss(self, z):
120
+ """Dispersive Loss implementation (InfoNCE-L2 variant)
121
+ Args:
122
+ z: activation tensor from model layers
123
+ """
124
+ z = z.reshape((z.shape[0], -1)) # flatten
125
+ diff = th.nn.functional.pdist(z).pow(2) / z.shape[1] # pairwise distance
126
+ diff = th.cat((diff, diff, th.zeros(z.shape[0], device=z.device))) # match JAX implementation of full BxB matrix
127
+ # Apply temperature scaling: divide by temperature τ
128
+ diff = diff / self.temperature
129
+ return th.log(th.exp(-diff).mean()) # calculate loss
130
+
131
+ def training_losses(
132
+ self,
133
+ model,
134
+ x1,
135
+ model_noise=None,
136
+ model_kwargs=None
137
+ ):
138
+ """Loss for training the score model
139
+ Args:
140
+ - model: backbone model; could be score, noise, or velocity
141
+ - x1: datapoint
142
+ - model_kwargs: additional arguments for the model
143
+ """
144
+
145
+
146
+ if model_kwargs == None:
147
+ model_kwargs = {}
148
+
149
+ t, x0, x1 = self.sample(x1)
150
+ t, xt, ut = self.path_sampler.plan(t, x0, x1)
151
+
152
+ # Handle return_act for dispersive loss
153
+ disp_loss = 0
154
+ if model_noise==None:
155
+ model_output = model(xt, t, **model_kwargs)
156
+ # Check if model returns activations (for dispersive loss)
157
+ if "return_act" in model_kwargs and model_kwargs['return_act']:
158
+ model_output, act = model_output
159
+ if act is not None and len(act) > 0:
160
+ # Calculate dispersive loss for all blocks
161
+ for block_act in act:
162
+ disp_loss = disp_loss + self.disp_loss(block_act)
163
+ else:
164
+ model_output_pre = model(xt, t, **model_kwargs)
165
+ # Handle return_act for model_noise
166
+ if "return_act" in model_kwargs and model_kwargs['return_act']:
167
+ if isinstance(model_output_pre, tuple):
168
+ model_output_pre, act_pre = model_output_pre
169
+ else:
170
+ act_pre = None
171
+ else:
172
+ act_pre = None
173
+
174
+ model_output_noise = model_noise(xt, t, **model_kwargs)
175
+ # Handle return_act for model_noise
176
+ if "return_act" in model_kwargs and model_kwargs['return_act']:
177
+ if isinstance(model_output_noise, tuple):
178
+ model_output_noise, act_noise = model_output_noise
179
+ else:
180
+ act_noise = None
181
+ # Calculate dispersive loss for all blocks in model_noise (sitf2)
182
+ if act_noise is not None and len(act_noise) > 0:
183
+ # Calculate dispersive loss for each block and sum them
184
+ for block_act in act_noise:
185
+ disp_loss = disp_loss + self.disp_loss(block_act)
186
+ model_output = model_output_pre + model_output_noise
187
+
188
+ B, *_, C = xt.shape
189
+ assert model_output.size() == (B, *xt.size()[1:-1], C)
190
+
191
+ terms = {}
192
+ terms['pred'] = model_output
193
+ if self.model_type == ModelType.VELOCITY:
194
+ terms['loss'] = mean_flat(((model_output - ut) ** 2))
195
+ else:
196
+ _, drift_var = self.path_sampler.compute_drift(xt, t)
197
+ sigma_t, _ = self.path_sampler.compute_sigma_t(path.expand_t_like_x(t, xt))
198
+ if self.loss_type in [WeightType.VELOCITY]:
199
+ weight = (drift_var / sigma_t) ** 2
200
+ elif self.loss_type in [WeightType.LIKELIHOOD]:
201
+ weight = drift_var / (sigma_t ** 2)
202
+ elif self.loss_type in [WeightType.NONE]:
203
+ weight = 1
204
+ else:
205
+ raise NotImplementedError()
206
+
207
+ if self.model_type == ModelType.NOISE:
208
+ terms['loss'] = mean_flat(weight * ((model_output - x0) ** 2))
209
+ else:
210
+ terms['loss'] = mean_flat(weight * ((model_output * sigma_t + x0) ** 2))
211
+
212
+ # Add dispersive loss to the total loss with weight λ
213
+ if disp_loss != 0:
214
+ terms['loss'] = terms['loss'] + self.disp_loss_weight * disp_loss
215
+
216
+ return terms
217
+
218
+
219
+ def get_drift(
220
+ self
221
+ ):
222
+ """member function for obtaining the drift of the probability flow ODE"""
223
+ def score_ode(x, t, model, **model_kwargs):
224
+ drift_mean, drift_var = self.path_sampler.compute_drift(x, t)
225
+ model_output = model(x, t, **model_kwargs)
226
+ return (-drift_mean + drift_var * model_output) # by change of variable
227
+
228
+ def noise_ode(x, t, model, **model_kwargs):
229
+ drift_mean, drift_var = self.path_sampler.compute_drift(x, t)
230
+ sigma_t, _ = self.path_sampler.compute_sigma_t(path.expand_t_like_x(t, x))
231
+ model_output = model(x, t, **model_kwargs)
232
+ score = model_output / -sigma_t
233
+ return (-drift_mean + drift_var * score)
234
+
235
+ def velocity_ode(x, t, model, **model_kwargs):
236
+ model_output = model(x, t, **model_kwargs)
237
+ return model_output
238
+
239
+ if self.model_type == ModelType.NOISE:
240
+ drift_fn = noise_ode
241
+ elif self.model_type == ModelType.SCORE:
242
+ drift_fn = score_ode
243
+ else:
244
+ drift_fn = velocity_ode
245
+
246
+ def body_fn(x, t, model, **model_kwargs):
247
+ model_output = drift_fn(x, t, model, **model_kwargs)
248
+ assert model_output.shape == x.shape, "Output shape from ODE solver must match input shape"
249
+ return model_output
250
+
251
+ return body_fn
252
+
253
+
254
+ def get_score(
255
+ self,
256
+ ):
257
+ """member function for obtaining score of
258
+ x_t = alpha_t * x + sigma_t * eps"""
259
+ if self.model_type == ModelType.NOISE:
260
+ score_fn = lambda x, t, model, **kwargs: model(x, t, **kwargs) / -self.path_sampler.compute_sigma_t(path.expand_t_like_x(t, x))[0]
261
+ elif self.model_type == ModelType.SCORE:
262
+ score_fn = lambda x, t, model, **kwagrs: model(x, t, **kwagrs)
263
+ elif self.model_type == ModelType.VELOCITY:
264
+ score_fn = lambda x, t, model, **kwargs: self.path_sampler.get_score_from_velocity(model(x, t, **kwargs), x, t)
265
+ else:
266
+ raise NotImplementedError()
267
+
268
+ return score_fn
269
+
270
+
271
+ class Sampler:
272
+ """Sampler class for the transport model"""
273
+ def __init__(
274
+ self,
275
+ transport,
276
+ ):
277
+ """Constructor for a general sampler; supporting different sampling methods
278
+ Args:
279
+ - transport: an tranport object specify model prediction & interpolant type
280
+ """
281
+
282
+ self.transport = transport
283
+ self.drift = self.transport.get_drift()
284
+ self.score = self.transport.get_score()
285
+
286
+ def __get_sde_diffusion_and_drift(
287
+ self,
288
+ *,
289
+ diffusion_form="SBDM",
290
+ diffusion_norm=1.0,
291
+ ):
292
+
293
+ def diffusion_fn(x, t):
294
+ diffusion = self.transport.path_sampler.compute_diffusion(x, t, form=diffusion_form, norm=diffusion_norm)
295
+ return diffusion
296
+
297
+ sde_drift = \
298
+ lambda x, t, model, **kwargs: \
299
+ self.drift(x, t, model, **kwargs) + diffusion_fn(x, t) * self.score(x, t, model, **kwargs)
300
+
301
+ sde_diffusion = diffusion_fn
302
+
303
+ return sde_drift, sde_diffusion
304
+
305
+ def __get_last_step(
306
+ self,
307
+ sde_drift,
308
+ *,
309
+ last_step,
310
+ last_step_size,
311
+ ):
312
+ """Get the last step function of the SDE solver"""
313
+
314
+ if last_step is None:
315
+ last_step_fn = \
316
+ lambda x, t, model, **model_kwargs: \
317
+ x
318
+ elif last_step == "Mean":
319
+ last_step_fn = \
320
+ lambda x, t, model, **model_kwargs: \
321
+ x + sde_drift(x, t, model, **model_kwargs) * last_step_size
322
+ elif last_step == "Tweedie":
323
+ alpha = self.transport.path_sampler.compute_alpha_t # simple aliasing; the original name was too long
324
+ sigma = self.transport.path_sampler.compute_sigma_t
325
+ last_step_fn = \
326
+ lambda x, t, model, **model_kwargs: \
327
+ x / alpha(t)[0][0] + (sigma(t)[0][0] ** 2) / alpha(t)[0][0] * self.score(x, t, model, **model_kwargs)
328
+ elif last_step == "Euler":
329
+ last_step_fn = \
330
+ lambda x, t, model, **model_kwargs: \
331
+ x + self.drift(x, t, model, **model_kwargs) * last_step_size
332
+ else:
333
+ raise NotImplementedError()
334
+
335
+ return last_step_fn
336
+
337
+ def sample_sde(
338
+ self,
339
+ *,
340
+ sampling_method="Euler",
341
+ diffusion_form="SBDM",
342
+ diffusion_norm=1.0,
343
+ last_step="Mean",
344
+ last_step_size=0.04,
345
+ num_steps=250,
346
+ ):
347
+ """returns a sampling function with given SDE settings
348
+ Args:
349
+ - sampling_method: type of sampler used in solving the SDE; default to be Euler-Maruyama
350
+ - diffusion_form: function form of diffusion coefficient; default to be matching SBDM
351
+ - diffusion_norm: function magnitude of diffusion coefficient; default to 1
352
+ - last_step: type of the last step; default to identity
353
+ - last_step_size: size of the last step; default to match the stride of 250 steps over [0,1]
354
+ - num_steps: total integration step of SDE
355
+ """
356
+
357
+ if last_step is None:
358
+ last_step_size = 0.0
359
+
360
+ sde_drift, sde_diffusion = self.__get_sde_diffusion_and_drift(
361
+ diffusion_form=diffusion_form,
362
+ diffusion_norm=diffusion_norm,
363
+ )
364
+
365
+ t0, t1 = self.transport.check_interval(
366
+ self.transport.train_eps,
367
+ self.transport.sample_eps,
368
+ diffusion_form=diffusion_form,
369
+ sde=True,
370
+ eval=True,
371
+ reverse=False,
372
+ last_step_size=last_step_size,
373
+ )
374
+
375
+ _sde = sde(
376
+ sde_drift,
377
+ sde_diffusion,
378
+ t0=t0,
379
+ t1=t1,
380
+ num_steps=num_steps,
381
+ sampler_type=sampling_method
382
+ )
383
+
384
+ last_step_fn = self.__get_last_step(sde_drift, last_step=last_step, last_step_size=last_step_size)
385
+
386
+
387
+ def _sample(init, model, **model_kwargs):
388
+ xs = _sde.sample(init, model, **model_kwargs)
389
+ ts = th.ones(init.size(0), device=init.device) * t1
390
+ x = last_step_fn(xs[-1], ts, model, **model_kwargs)
391
+ xs.append(x)
392
+
393
+ assert len(xs) == num_steps, "Samples does not match the number of steps"
394
+
395
+ return xs
396
+
397
+ return _sample
398
+
399
+ def sample_ode(
400
+ self,
401
+ *,
402
+ sampling_method="dopri5",
403
+ num_steps=50,
404
+ atol=1e-6,
405
+ rtol=1e-3,
406
+ reverse=False,
407
+ ):
408
+ """returns a sampling function with given ODE settings
409
+ Args:
410
+ - sampling_method: type of sampler used in solving the ODE; default to be Dopri5
411
+ - num_steps:
412
+ - fixed solver (Euler, Heun): the actual number of integration steps performed
413
+ - adaptive solver (Dopri5): the number of datapoints saved during integration; produced by interpolation
414
+ - atol: absolute error tolerance for the solver
415
+ - rtol: relative error tolerance for the solver
416
+ - reverse: whether solving the ODE in reverse (data to noise); default to False
417
+ """
418
+ if reverse:
419
+ drift = lambda x, t, model, **kwargs: self.drift(x, th.ones_like(t) * (1 - t), model, **kwargs)
420
+ else:
421
+ drift = self.drift
422
+
423
+ t0, t1 = self.transport.check_interval(
424
+ self.transport.train_eps,
425
+ self.transport.sample_eps,
426
+ sde=False,
427
+ eval=True,
428
+ reverse=reverse,
429
+ last_step_size=0.0,
430
+ )
431
+
432
+ _ode = ode(
433
+ drift=drift,
434
+ t0=t0,
435
+ t1=t1,
436
+ sampler_type=sampling_method,
437
+ num_steps=num_steps,
438
+ atol=atol,
439
+ rtol=rtol,
440
+ )
441
+
442
+ return _ode.sample
443
+
444
+ def sample_ode_likelihood(
445
+ self,
446
+ *,
447
+ sampling_method="dopri5",
448
+ num_steps=50,
449
+ atol=1e-6,
450
+ rtol=1e-3,
451
+ ):
452
+
453
+ """returns a sampling function for calculating likelihood with given ODE settings
454
+ Args:
455
+ - sampling_method: type of sampler used in solving the ODE; default to be Dopri5
456
+ - num_steps:
457
+ - fixed solver (Euler, Heun): the actual number of integration steps performed
458
+ - adaptive solver (Dopri5): the number of datapoints saved during integration; produced by interpolation
459
+ - atol: absolute error tolerance for the solver
460
+ - rtol: relative error tolerance for the solver
461
+ """
462
+ def _likelihood_drift(x, t, model, **model_kwargs):
463
+ x, _ = x
464
+ eps = th.randint(2, x.size(), dtype=th.float, device=x.device) * 2 - 1
465
+ t = th.ones_like(t) * (1 - t)
466
+ with th.enable_grad():
467
+ x.requires_grad = True
468
+ grad = th.autograd.grad(th.sum(self.drift(x, t, model, **model_kwargs) * eps), x)[0]
469
+ logp_grad = th.sum(grad * eps, dim=tuple(range(1, len(x.size()))))
470
+ drift = self.drift(x, t, model, **model_kwargs)
471
+ return (-drift, logp_grad)
472
+
473
+ t0, t1 = self.transport.check_interval(
474
+ self.transport.train_eps,
475
+ self.transport.sample_eps,
476
+ sde=False,
477
+ eval=True,
478
+ reverse=False,
479
+ last_step_size=0.0,
480
+ )
481
+
482
+ _ode = ode(
483
+ drift=_likelihood_drift,
484
+ t0=t0,
485
+ t1=t1,
486
+ sampler_type=sampling_method,
487
+ num_steps=num_steps,
488
+ atol=atol,
489
+ rtol=rtol,
490
+ )
491
+
492
+ def _sample_fn(x, model, **model_kwargs):
493
+ init_logp = th.zeros(x.size(0)).to(x)
494
+ input = (x, init_logp)
495
+ drift, delta_logp = _ode.sample(input, model, **model_kwargs)
496
+ drift, delta_logp = drift[-1], delta_logp[-1]
497
+ prior_logp = self.transport.prior_logp(drift)
498
+ logp = prior_logp - delta_logp
499
+ return logp, drift
500
+
501
+ return _sample_fn
Rectified_Noise/GVP-Disp/transport/utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch as th
2
+
3
+ class EasyDict:
4
+
5
+ def __init__(self, sub_dict):
6
+ for k, v in sub_dict.items():
7
+ setattr(self, k, v)
8
+
9
+ def __getitem__(self, key):
10
+ return getattr(self, key)
11
+
12
+ def mean_flat(x):
13
+ """
14
+ Take the mean over all non-batch dimensions.
15
+ """
16
+ return th.mean(x, dim=list(range(1, len(x.size()))))
17
+
18
+ def log_state(state):
19
+ result = []
20
+
21
+ sorted_state = dict(sorted(state.items()))
22
+ for key, value in sorted_state.items():
23
+ # Check if the value is an instance of a class
24
+ if "<object" in str(value) or "object at" in str(value):
25
+ result.append(f"{key}: [{value.__class__.__name__}]")
26
+ else:
27
+ result.append(f"{key}: {value}")
28
+
29
+ return '\n'.join(result)
Rectified_Noise/GVP-Disp/w_training1.log ADDED
@@ -0,0 +1,927 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ W0203 06:54:55.773000 72184 site-packages/torch/distributed/run.py:793]
2
+ W0203 06:54:55.773000 72184 site-packages/torch/distributed/run.py:793] *****************************************
3
+ W0203 06:54:55.773000 72184 site-packages/torch/distributed/run.py:793] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
4
+ W0203 06:54:55.773000 72184 site-packages/torch/distributed/run.py:793] *****************************************
5
+ [NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
6
+ [NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
7
+ [NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
8
+ [NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
9
+ Starting rank=0, seed=0, world_size=4.
10
+ [2026-02-03 06:55:12] Experiment directory created at results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None
11
+ Starting rank=3, seed=3, world_size=4.
12
+ Starting rank=1, seed=1, world_size=4.
13
+ Starting rank=2, seed=2, world_size=4.
14
+ [2026-02-03 06:55:47] Combined_model Parameters: 729,629,632
15
+ [2026-02-03 06:55:47] Total trainable parameters: 53,910,176
16
+ [2026-02-03 06:55:50] Dataset contains 1,281,167 images (/gemini/platform/public/zhaozy/hzh/datasets/Imagenet/train/)
17
+ [2026-02-03 06:55:50] Training for 100000 epochs...
18
+ [2026-02-03 06:55:50] Beginning epoch 0...
19
+ [2026-02-03 06:57:30] (step=0000100) Train Loss: -2.4789, Train Steps/Sec: 1.00
20
+ [2026-02-03 06:59:08] (step=0000200) Train Loss: -2.9649, Train Steps/Sec: 1.02
21
+ [2026-02-03 07:00:47] (step=0000300) Train Loss: -2.9777, Train Steps/Sec: 1.01
22
+ [2026-02-03 07:02:27] (step=0000400) Train Loss: -2.9828, Train Steps/Sec: 1.00
23
+ [2026-02-03 07:04:08] (step=0000500) Train Loss: -2.9877, Train Steps/Sec: 0.99
24
+ [2026-02-03 07:05:49] (step=0000600) Train Loss: -2.9875, Train Steps/Sec: 0.99
25
+ [2026-02-03 07:07:28] (step=0000700) Train Loss: -2.9882, Train Steps/Sec: 1.01
26
+ [2026-02-03 07:09:08] (step=0000800) Train Loss: -2.9861, Train Steps/Sec: 1.00
27
+ [2026-02-03 07:10:49] (step=0000900) Train Loss: -2.9862, Train Steps/Sec: 0.99
28
+ [2026-02-03 07:12:30] (step=0001000) Train Loss: -2.9886, Train Steps/Sec: 0.99
29
+ [2026-02-03 07:14:12] (step=0001100) Train Loss: -2.9849, Train Steps/Sec: 0.98
30
+ [2026-02-03 07:18:10] (step=0001200) Train Loss: -2.9885, Train Steps/Sec: 0.42
31
+ [2026-02-03 07:20:07] (step=0001300) Train Loss: -2.9864, Train Steps/Sec: 0.85
32
+ [2026-02-03 07:21:45] (step=0001400) Train Loss: -2.9867, Train Steps/Sec: 1.02
33
+ [2026-02-03 07:23:22] (step=0001500) Train Loss: -2.9863, Train Steps/Sec: 1.03
34
+ [2026-02-03 07:25:19] (step=0001600) Train Loss: -3.6813, Train Steps/Sec: 0.54
35
+ [2026-02-03 07:28:25] (step=0001700) Train Loss: -3.6843, Train Steps/Sec: 0.54
36
+ [2026-02-03 07:31:32] (step=0001800) Train Loss: -3.6813, Train Steps/Sec: 0.53
37
+ [2026-02-03 07:34:38] (step=0001900) Train Loss: -3.6828, Train Steps/Sec: 0.54
38
+ [2026-02-03 07:37:45] (step=0002000) Train Loss: -3.6826, Train Steps/Sec: 0.54
39
+ [2026-02-03 07:40:51] (step=0002100) Train Loss: -3.6799, Train Steps/Sec: 0.54
40
+ [2026-02-03 07:43:58] (step=0002200) Train Loss: -3.6784, Train Steps/Sec: 0.53
41
+ [2026-02-03 07:47:06] (step=0002300) Train Loss: -3.6824, Train Steps/Sec: 0.53
42
+ [2026-02-03 07:50:12] (step=0002400) Train Loss: -3.6787, Train Steps/Sec: 0.54
43
+ [2026-02-03 07:53:19] (step=0002500) Train Loss: -3.6771, Train Steps/Sec: 0.54
44
+ [2026-02-03 07:53:23] Beginning epoch 1...
45
+ [2026-02-03 07:56:29] (step=0002600) Train Loss: -3.6847, Train Steps/Sec: 0.53
46
+ [2026-02-03 07:59:35] (step=0002700) Train Loss: -3.6829, Train Steps/Sec: 0.54
47
+ [2026-02-03 08:02:42] (step=0002800) Train Loss: -3.6825, Train Steps/Sec: 0.54
48
+ [2026-02-03 08:05:49] (step=0002900) Train Loss: -3.6818, Train Steps/Sec: 0.54
49
+ [2026-02-03 08:08:55] (step=0003000) Train Loss: -3.6823, Train Steps/Sec: 0.54
50
+ [2026-02-03 08:12:01] (step=0003100) Train Loss: -3.6821, Train Steps/Sec: 0.54
51
+ [2026-02-03 08:15:09] (step=0003200) Train Loss: -3.6812, Train Steps/Sec: 0.53
52
+ [2026-02-03 08:18:16] (step=0003300) Train Loss: -3.6800, Train Steps/Sec: 0.53
53
+ [2026-02-03 08:21:20] (step=0003400) Train Loss: -3.6797, Train Steps/Sec: 0.54
54
+ [2026-02-03 08:24:27] (step=0003500) Train Loss: -3.6802, Train Steps/Sec: 0.54
55
+ [2026-02-03 08:27:34] (step=0003600) Train Loss: -3.6834, Train Steps/Sec: 0.53
56
+ [2026-02-03 08:30:40] (step=0003700) Train Loss: -3.6810, Train Steps/Sec: 0.54
57
+ [2026-02-03 08:33:48] (step=0003800) Train Loss: -3.6822, Train Steps/Sec: 0.53
58
+ [2026-02-03 08:36:55] (step=0003900) Train Loss: -3.6817, Train Steps/Sec: 0.53
59
+ [2026-02-03 08:40:01] (step=0004000) Train Loss: -3.6794, Train Steps/Sec: 0.54
60
+ [2026-02-03 08:43:08] (step=0004100) Train Loss: -3.6801, Train Steps/Sec: 0.54
61
+ [2026-02-03 08:46:15] (step=0004200) Train Loss: -3.6850, Train Steps/Sec: 0.54
62
+ [2026-02-03 08:49:21] (step=0004300) Train Loss: -3.6801, Train Steps/Sec: 0.54
63
+ [2026-02-03 08:52:28] (step=0004400) Train Loss: -3.6816, Train Steps/Sec: 0.54
64
+ [2026-02-03 08:55:35] (step=0004500) Train Loss: -3.6820, Train Steps/Sec: 0.53
65
+ [2026-02-03 08:58:42] (step=0004600) Train Loss: -3.6817, Train Steps/Sec: 0.54
66
+ [2026-02-03 09:01:49] (step=0004700) Train Loss: -3.6806, Train Steps/Sec: 0.54
67
+ [2026-02-03 09:04:56] (step=0004800) Train Loss: -3.6797, Train Steps/Sec: 0.53
68
+ [2026-02-03 09:08:03] (step=0004900) Train Loss: -3.6800, Train Steps/Sec: 0.53
69
+ [2026-02-03 09:11:10] (step=0005000) Train Loss: -3.6831, Train Steps/Sec: 0.54
70
+ [2026-02-03 09:11:18] Beginning epoch 2...
71
+ [2026-02-03 09:14:20] (step=0005100) Train Loss: -3.6803, Train Steps/Sec: 0.52
72
+ [2026-02-03 09:17:27] (step=0005200) Train Loss: -3.6804, Train Steps/Sec: 0.53
73
+ [2026-02-03 09:20:34] (step=0005300) Train Loss: -3.6804, Train Steps/Sec: 0.54
74
+ [2026-02-03 09:23:40] (step=0005400) Train Loss: -3.6823, Train Steps/Sec: 0.54
75
+ [2026-02-03 09:26:47] (step=0005500) Train Loss: -3.6819, Train Steps/Sec: 0.53
76
+ [2026-02-03 09:29:54] (step=0005600) Train Loss: -3.6834, Train Steps/Sec: 0.54
77
+ [2026-02-03 09:33:01] (step=0005700) Train Loss: -3.6805, Train Steps/Sec: 0.53
78
+ [2026-02-03 09:36:08] (step=0005800) Train Loss: -3.6827, Train Steps/Sec: 0.53
79
+ [2026-02-03 09:39:15] (step=0005900) Train Loss: -3.6821, Train Steps/Sec: 0.54
80
+ [2026-02-03 09:42:20] (step=0006000) Train Loss: -3.6807, Train Steps/Sec: 0.54
81
+ [2026-02-03 09:45:27] (step=0006100) Train Loss: -3.6814, Train Steps/Sec: 0.53
82
+ [2026-02-03 09:48:34] (step=0006200) Train Loss: -3.6825, Train Steps/Sec: 0.54
83
+ [2026-02-03 09:51:40] (step=0006300) Train Loss: -3.6799, Train Steps/Sec: 0.54
84
+ [2026-02-03 09:54:46] (step=0006400) Train Loss: -3.6797, Train Steps/Sec: 0.54
85
+ [2026-02-03 09:57:54] (step=0006500) Train Loss: -3.6820, Train Steps/Sec: 0.53
86
+ [2026-02-03 10:01:01] (step=0006600) Train Loss: -3.6789, Train Steps/Sec: 0.53
87
+ [2026-02-03 10:04:08] (step=0006700) Train Loss: -3.6804, Train Steps/Sec: 0.53
88
+ [2026-02-03 10:07:15] (step=0006800) Train Loss: -3.6803, Train Steps/Sec: 0.53
89
+ [2026-02-03 10:10:22] (step=0006900) Train Loss: -3.6787, Train Steps/Sec: 0.54
90
+ [2026-02-03 10:13:29] (step=0007000) Train Loss: -3.6818, Train Steps/Sec: 0.54
91
+ [2026-02-03 10:16:35] (step=0007100) Train Loss: -3.6813, Train Steps/Sec: 0.54
92
+ [2026-02-03 10:19:42] (step=0007200) Train Loss: -3.6820, Train Steps/Sec: 0.54
93
+ [2026-02-03 10:22:49] (step=0007300) Train Loss: -3.6810, Train Steps/Sec: 0.53
94
+ [2026-02-03 10:25:56] (step=0007400) Train Loss: -3.6828, Train Steps/Sec: 0.53
95
+ [2026-02-03 10:29:04] (step=0007500) Train Loss: -3.6821, Train Steps/Sec: 0.53
96
+ [2026-02-03 10:29:16] Beginning epoch 3...
97
+ [2026-02-03 10:32:13] (step=0007600) Train Loss: -3.6794, Train Steps/Sec: 0.53
98
+ [2026-02-03 10:35:20] (step=0007700) Train Loss: -3.6809, Train Steps/Sec: 0.53
99
+ [2026-02-03 10:38:27] (step=0007800) Train Loss: -3.6823, Train Steps/Sec: 0.54
100
+ [2026-02-03 10:41:34] (step=0007900) Train Loss: -3.6813, Train Steps/Sec: 0.54
101
+ [2026-02-03 10:44:41] (step=0008000) Train Loss: -3.6852, Train Steps/Sec: 0.53
102
+ [2026-02-03 10:47:47] (step=0008100) Train Loss: -3.6820, Train Steps/Sec: 0.54
103
+ [2026-02-03 10:50:54] (step=0008200) Train Loss: -3.6798, Train Steps/Sec: 0.54
104
+ [2026-02-03 10:54:01] (step=0008300) Train Loss: -3.6772, Train Steps/Sec: 0.54
105
+ [2026-02-03 10:57:07] (step=0008400) Train Loss: -3.6800, Train Steps/Sec: 0.54
106
+ [2026-02-03 11:00:13] (step=0008500) Train Loss: -3.6818, Train Steps/Sec: 0.54
107
+ [2026-02-03 11:03:19] (step=0008600) Train Loss: -3.6811, Train Steps/Sec: 0.54
108
+ [2026-02-03 11:06:23] (step=0008700) Train Loss: -3.6806, Train Steps/Sec: 0.54
109
+ [2026-02-03 11:09:29] (step=0008800) Train Loss: -3.6762, Train Steps/Sec: 0.54
110
+ [2026-02-03 11:12:36] (step=0008900) Train Loss: -3.6838, Train Steps/Sec: 0.54
111
+ [2026-02-03 11:15:43] (step=0009000) Train Loss: -3.6826, Train Steps/Sec: 0.53
112
+ [2026-02-03 11:18:50] (step=0009100) Train Loss: -3.6806, Train Steps/Sec: 0.54
113
+ [2026-02-03 11:21:57] (step=0009200) Train Loss: -3.6806, Train Steps/Sec: 0.54
114
+ [2026-02-03 11:25:04] (step=0009300) Train Loss: -3.6819, Train Steps/Sec: 0.54
115
+ [2026-02-03 11:28:11] (step=0009400) Train Loss: -3.6785, Train Steps/Sec: 0.53
116
+ [2026-02-03 11:31:17] (step=0009500) Train Loss: -3.6769, Train Steps/Sec: 0.54
117
+ [2026-02-03 11:34:24] (step=0009600) Train Loss: -3.6822, Train Steps/Sec: 0.54
118
+ [2026-02-03 11:37:31] (step=0009700) Train Loss: -3.6856, Train Steps/Sec: 0.54
119
+ [2026-02-03 11:40:38] (step=0009800) Train Loss: -3.6803, Train Steps/Sec: 0.53
120
+ [2026-02-03 11:43:45] (step=0009900) Train Loss: -3.6805, Train Steps/Sec: 0.54
121
+ [2026-02-03 11:46:51] (step=0010000) Train Loss: -3.6819, Train Steps/Sec: 0.54
122
+ [2026-02-03 11:47:07] Beginning epoch 4...
123
+ [2026-02-03 11:50:01] (step=0010100) Train Loss: -3.6850, Train Steps/Sec: 0.53
124
+ [2026-02-03 11:53:08] (step=0010200) Train Loss: -3.6816, Train Steps/Sec: 0.53
125
+ [2026-02-03 11:56:15] (step=0010300) Train Loss: -3.6836, Train Steps/Sec: 0.53
126
+ [2026-02-03 11:59:22] (step=0010400) Train Loss: -3.6789, Train Steps/Sec: 0.53
127
+ [2026-02-03 12:02:29] (step=0010500) Train Loss: -3.6793, Train Steps/Sec: 0.54
128
+ [2026-02-03 12:05:36] (step=0010600) Train Loss: -3.6834, Train Steps/Sec: 0.54
129
+ [2026-02-03 12:08:42] (step=0010700) Train Loss: -3.6842, Train Steps/Sec: 0.54
130
+ [2026-02-03 12:11:49] (step=0010800) Train Loss: -3.6822, Train Steps/Sec: 0.54
131
+ [2026-02-03 12:14:56] (step=0010900) Train Loss: -3.6813, Train Steps/Sec: 0.54
132
+ [2026-02-03 12:18:03] (step=0011000) Train Loss: -3.6843, Train Steps/Sec: 0.53
133
+ [2026-02-03 12:21:09] (step=0011100) Train Loss: -3.6821, Train Steps/Sec: 0.54
134
+ [2026-02-03 12:24:15] (step=0011200) Train Loss: -3.6787, Train Steps/Sec: 0.54
135
+ [2026-02-03 12:27:20] (step=0011300) Train Loss: -3.6828, Train Steps/Sec: 0.54
136
+ [2026-02-03 12:30:27] (step=0011400) Train Loss: -3.6830, Train Steps/Sec: 0.53
137
+ [2026-02-03 12:33:34] (step=0011500) Train Loss: -3.6784, Train Steps/Sec: 0.53
138
+ [2026-02-03 12:36:41] (step=0011600) Train Loss: -3.6831, Train Steps/Sec: 0.53
139
+ [2026-02-03 12:39:48] (step=0011700) Train Loss: -3.6834, Train Steps/Sec: 0.53
140
+ [2026-02-03 12:42:55] (step=0011800) Train Loss: -3.6808, Train Steps/Sec: 0.53
141
+ [2026-02-03 12:46:02] (step=0011900) Train Loss: -3.6810, Train Steps/Sec: 0.54
142
+ [2026-02-03 12:49:09] (step=0012000) Train Loss: -3.6821, Train Steps/Sec: 0.53
143
+ [2026-02-03 12:52:16] (step=0012100) Train Loss: -3.6827, Train Steps/Sec: 0.53
144
+ [2026-02-03 12:55:23] (step=0012200) Train Loss: -3.6827, Train Steps/Sec: 0.54
145
+ [2026-02-03 12:58:30] (step=0012300) Train Loss: -3.6808, Train Steps/Sec: 0.54
146
+ [2026-02-03 13:01:37] (step=0012400) Train Loss: -3.6818, Train Steps/Sec: 0.53
147
+ [2026-02-03 13:04:44] (step=0012500) Train Loss: -3.6809, Train Steps/Sec: 0.54
148
+ [2026-02-03 13:05:03] Beginning epoch 5...
149
+ [2026-02-03 13:07:54] (step=0012600) Train Loss: -3.6814, Train Steps/Sec: 0.52
150
+ [2026-02-03 13:11:01] (step=0012700) Train Loss: -3.6842, Train Steps/Sec: 0.53
151
+ [2026-02-03 13:14:08] (step=0012800) Train Loss: -3.6816, Train Steps/Sec: 0.54
152
+ [2026-02-03 13:17:15] (step=0012900) Train Loss: -3.6790, Train Steps/Sec: 0.53
153
+ [2026-02-03 13:20:22] (step=0013000) Train Loss: -3.6812, Train Steps/Sec: 0.53
154
+ [2026-02-03 13:23:29] (step=0013100) Train Loss: -3.6792, Train Steps/Sec: 0.53
155
+ [2026-02-03 13:26:36] (step=0013200) Train Loss: -3.6836, Train Steps/Sec: 0.53
156
+ [2026-02-03 13:29:43] (step=0013300) Train Loss: -3.6845, Train Steps/Sec: 0.54
157
+ [2026-02-03 13:32:50] (step=0013400) Train Loss: -3.6822, Train Steps/Sec: 0.53
158
+ [2026-02-03 13:35:57] (step=0013500) Train Loss: -3.6798, Train Steps/Sec: 0.53
159
+ [2026-02-03 13:39:04] (step=0013600) Train Loss: -3.6828, Train Steps/Sec: 0.54
160
+ [2026-02-03 13:42:11] (step=0013700) Train Loss: -3.6799, Train Steps/Sec: 0.54
161
+ [2026-02-03 13:45:18] (step=0013800) Train Loss: -3.6812, Train Steps/Sec: 0.53
162
+ [2026-02-03 13:48:22] (step=0013900) Train Loss: -3.6831, Train Steps/Sec: 0.54
163
+ [2026-02-03 13:51:29] (step=0014000) Train Loss: -3.6808, Train Steps/Sec: 0.54
164
+ [2026-02-03 13:54:36] (step=0014100) Train Loss: -3.6823, Train Steps/Sec: 0.53
165
+ [2026-02-03 13:57:43] (step=0014200) Train Loss: -3.6795, Train Steps/Sec: 0.54
166
+ [2026-02-03 14:00:50] (step=0014300) Train Loss: -3.6795, Train Steps/Sec: 0.53
167
+ [2026-02-03 14:03:57] (step=0014400) Train Loss: -3.6838, Train Steps/Sec: 0.54
168
+ [2026-02-03 14:07:04] (step=0014500) Train Loss: -3.6832, Train Steps/Sec: 0.53
169
+ [2026-02-03 14:10:11] (step=0014600) Train Loss: -3.6832, Train Steps/Sec: 0.53
170
+ [2026-02-03 14:13:18] (step=0014700) Train Loss: -3.6784, Train Steps/Sec: 0.54
171
+ [2026-02-03 14:16:24] (step=0014800) Train Loss: -3.6824, Train Steps/Sec: 0.54
172
+ [2026-02-03 14:19:31] (step=0014900) Train Loss: -3.6825, Train Steps/Sec: 0.54
173
+ [2026-02-03 14:22:38] (step=0015000) Train Loss: -3.6822, Train Steps/Sec: 0.53
174
+ [2026-02-03 14:23:01] Beginning epoch 6...
175
+ [2026-02-03 14:25:48] (step=0015100) Train Loss: -3.6831, Train Steps/Sec: 0.53
176
+ [2026-02-03 14:28:55] (step=0015200) Train Loss: -3.6786, Train Steps/Sec: 0.53
177
+ [2026-02-03 14:32:02] (step=0015300) Train Loss: -3.6826, Train Steps/Sec: 0.54
178
+ [2026-02-03 14:35:08] (step=0015400) Train Loss: -3.6817, Train Steps/Sec: 0.54
179
+ [2026-02-03 14:38:15] (step=0015500) Train Loss: -3.6806, Train Steps/Sec: 0.54
180
+ [2026-02-03 14:41:21] (step=0015600) Train Loss: -3.6796, Train Steps/Sec: 0.54
181
+ [2026-02-03 14:44:28] (step=0015700) Train Loss: -3.6839, Train Steps/Sec: 0.54
182
+ [2026-02-03 14:47:36] (step=0015800) Train Loss: -3.6846, Train Steps/Sec: 0.53
183
+ [2026-02-03 14:50:43] (step=0015900) Train Loss: -3.6828, Train Steps/Sec: 0.53
184
+ [2026-02-03 14:53:50] (step=0016000) Train Loss: -3.6828, Train Steps/Sec: 0.54
185
+ [2026-02-03 14:56:57] (step=0016100) Train Loss: -3.6789, Train Steps/Sec: 0.53
186
+ [2026-02-03 15:00:04] (step=0016200) Train Loss: -3.6810, Train Steps/Sec: 0.53
187
+ [2026-02-03 15:03:11] (step=0016300) Train Loss: -3.6799, Train Steps/Sec: 0.53
188
+ [2026-02-03 15:06:19] (step=0016400) Train Loss: -3.6806, Train Steps/Sec: 0.53
189
+ [2026-02-03 15:09:24] (step=0016500) Train Loss: -3.6828, Train Steps/Sec: 0.54
190
+ [2026-02-03 15:12:31] (step=0016600) Train Loss: -3.6781, Train Steps/Sec: 0.54
191
+ [2026-02-03 15:15:37] (step=0016700) Train Loss: -3.6830, Train Steps/Sec: 0.54
192
+ [2026-02-03 15:18:44] (step=0016800) Train Loss: -3.6756, Train Steps/Sec: 0.54
193
+ [2026-02-03 15:21:51] (step=0016900) Train Loss: -3.6798, Train Steps/Sec: 0.54
194
+ [2026-02-03 15:24:58] (step=0017000) Train Loss: -3.6813, Train Steps/Sec: 0.53
195
+ [2026-02-03 15:28:04] (step=0017100) Train Loss: -3.6807, Train Steps/Sec: 0.54
196
+ [2026-02-03 15:31:11] (step=0017200) Train Loss: -3.6818, Train Steps/Sec: 0.54
197
+ [2026-02-03 15:34:18] (step=0017300) Train Loss: -3.6800, Train Steps/Sec: 0.54
198
+ [2026-02-03 15:37:25] (step=0017400) Train Loss: -3.6836, Train Steps/Sec: 0.53
199
+ [2026-02-03 15:40:32] (step=0017500) Train Loss: -3.6807, Train Steps/Sec: 0.53
200
+ [2026-02-03 15:40:59] Beginning epoch 7...
201
+ [2026-02-03 15:43:42] (step=0017600) Train Loss: -3.6829, Train Steps/Sec: 0.53
202
+ [2026-02-03 15:46:49] (step=0017700) Train Loss: -3.6790, Train Steps/Sec: 0.53
203
+ [2026-02-03 15:49:56] (step=0017800) Train Loss: -3.6850, Train Steps/Sec: 0.53
204
+ [2026-02-03 15:53:04] (step=0017900) Train Loss: -3.6803, Train Steps/Sec: 0.53
205
+ [2026-02-03 15:56:11] (step=0018000) Train Loss: -3.6835, Train Steps/Sec: 0.53
206
+ [2026-02-03 15:59:18] (step=0018100) Train Loss: -3.6811, Train Steps/Sec: 0.54
207
+ [2026-02-03 16:02:25] (step=0018200) Train Loss: -3.6788, Train Steps/Sec: 0.53
208
+ [2026-02-03 16:05:31] (step=0018300) Train Loss: -3.6786, Train Steps/Sec: 0.54
209
+ [2026-02-03 16:08:39] (step=0018400) Train Loss: -3.6812, Train Steps/Sec: 0.53
210
+ [2026-02-03 16:11:46] (step=0018500) Train Loss: -3.6809, Train Steps/Sec: 0.53
211
+ [2026-02-03 16:14:52] (step=0018600) Train Loss: -3.6803, Train Steps/Sec: 0.54
212
+ [2026-02-03 16:17:59] (step=0018700) Train Loss: -3.6822, Train Steps/Sec: 0.54
213
+ [2026-02-03 16:21:06] (step=0018800) Train Loss: -3.6819, Train Steps/Sec: 0.53
214
+ [2026-02-03 16:24:12] (step=0018900) Train Loss: -3.6834, Train Steps/Sec: 0.54
215
+ [2026-02-03 16:27:19] (step=0019000) Train Loss: -3.6824, Train Steps/Sec: 0.54
216
+ [2026-02-03 16:30:24] (step=0019100) Train Loss: -3.6811, Train Steps/Sec: 0.54
217
+ [2026-02-03 16:33:31] (step=0019200) Train Loss: -3.6826, Train Steps/Sec: 0.53
218
+ [2026-02-03 16:36:38] (step=0019300) Train Loss: -3.6774, Train Steps/Sec: 0.53
219
+ [2026-02-03 16:39:45] (step=0019400) Train Loss: -3.6809, Train Steps/Sec: 0.54
220
+ [2026-02-03 16:42:51] (step=0019500) Train Loss: -3.6837, Train Steps/Sec: 0.54
221
+ [2026-02-03 16:45:59] (step=0019600) Train Loss: -3.6828, Train Steps/Sec: 0.53
222
+ [2026-02-03 16:49:06] (step=0019700) Train Loss: -3.6803, Train Steps/Sec: 0.53
223
+ [2026-02-03 16:52:13] (step=0019800) Train Loss: -3.6828, Train Steps/Sec: 0.53
224
+ [2026-02-03 16:55:20] (step=0019900) Train Loss: -3.6832, Train Steps/Sec: 0.53
225
+ [2026-02-03 16:58:27] (step=0020000) Train Loss: -3.6837, Train Steps/Sec: 0.54
226
+ [2026-02-03 16:58:57] Beginning epoch 8...
227
+ [2026-02-03 17:01:37] (step=0020100) Train Loss: -3.6820, Train Steps/Sec: 0.52
228
+ [2026-02-03 17:04:45] (step=0020200) Train Loss: -3.6798, Train Steps/Sec: 0.53
229
+ [2026-02-03 17:07:52] (step=0020300) Train Loss: -3.6807, Train Steps/Sec: 0.53
230
+ [2026-02-03 17:10:59] (step=0020400) Train Loss: -3.6811, Train Steps/Sec: 0.54
231
+ [2026-02-03 17:14:05] (step=0020500) Train Loss: -3.6794, Train Steps/Sec: 0.54
232
+ [2026-02-03 17:17:13] (step=0020600) Train Loss: -3.6833, Train Steps/Sec: 0.53
233
+ [2026-02-03 17:20:20] (step=0020700) Train Loss: -3.6802, Train Steps/Sec: 0.53
234
+ [2026-02-03 17:23:27] (step=0020800) Train Loss: -3.6812, Train Steps/Sec: 0.53
235
+ [2026-02-03 17:26:34] (step=0020900) Train Loss: -3.6822, Train Steps/Sec: 0.54
236
+ [2026-02-03 17:29:41] (step=0021000) Train Loss: -3.6795, Train Steps/Sec: 0.53
237
+ [2026-02-03 17:32:48] (step=0021100) Train Loss: -3.6794, Train Steps/Sec: 0.53
238
+ [2026-02-03 17:35:55] (step=0021200) Train Loss: 3.9167, Train Steps/Sec: 0.53
239
+ [2026-02-03 17:39:02] (step=0021300) Train Loss: -3.6821, Train Steps/Sec: 0.54
240
+ [2026-02-03 17:42:09] (step=0021400) Train Loss: -3.6805, Train Steps/Sec: 0.53
241
+ [2026-02-03 17:45:16] (step=0021500) Train Loss: -3.6808, Train Steps/Sec: 0.54
242
+ [2026-02-03 17:48:23] (step=0021600) Train Loss: -3.6812, Train Steps/Sec: 0.54
243
+ [2026-02-03 17:51:28] (step=0021700) Train Loss: -3.6817, Train Steps/Sec: 0.54
244
+ [2026-02-03 17:54:34] (step=0021800) Train Loss: -3.6846, Train Steps/Sec: 0.54
245
+ [2026-02-03 17:57:41] (step=0021900) Train Loss: -3.6811, Train Steps/Sec: 0.54
246
+ [2026-02-03 18:00:48] (step=0022000) Train Loss: -3.6807, Train Steps/Sec: 0.54
247
+ [2026-02-03 18:03:55] (step=0022100) Train Loss: -3.6799, Train Steps/Sec: 0.53
248
+ [2026-02-03 18:07:02] (step=0022200) Train Loss: -3.6788, Train Steps/Sec: 0.53
249
+ [2026-02-03 18:10:09] (step=0022300) Train Loss: -3.6821, Train Steps/Sec: 0.53
250
+ [2026-02-03 18:13:16] (step=0022400) Train Loss: -3.6808, Train Steps/Sec: 0.53
251
+ [2026-02-03 18:16:24] (step=0022500) Train Loss: -3.6836, Train Steps/Sec: 0.53
252
+ [2026-02-03 18:16:58] Beginning epoch 9...
253
+ [2026-02-03 18:19:34] (step=0022600) Train Loss: -3.6835, Train Steps/Sec: 0.53
254
+ [2026-02-03 18:22:40] (step=0022700) Train Loss: -3.6848, Train Steps/Sec: 0.54
255
+ [2026-02-03 18:25:47] (step=0022800) Train Loss: -3.6778, Train Steps/Sec: 0.54
256
+ [2026-02-03 18:28:53] (step=0022900) Train Loss: -3.6829, Train Steps/Sec: 0.54
257
+ [2026-02-03 18:32:00] (step=0023000) Train Loss: -3.6807, Train Steps/Sec: 0.54
258
+ [2026-02-03 18:35:07] (step=0023100) Train Loss: -3.6846, Train Steps/Sec: 0.53
259
+ [2026-02-03 18:38:14] (step=0023200) Train Loss: -3.6809, Train Steps/Sec: 0.54
260
+ [2026-02-03 18:41:21] (step=0023300) Train Loss: -3.6807, Train Steps/Sec: 0.53
261
+ [2026-02-03 18:44:28] (step=0023400) Train Loss: -3.6812, Train Steps/Sec: 0.54
262
+ [2026-02-03 18:47:35] (step=0023500) Train Loss: -3.6811, Train Steps/Sec: 0.53
263
+ [2026-02-03 18:50:42] (step=0023600) Train Loss: -3.6800, Train Steps/Sec: 0.53
264
+ [2026-02-03 18:53:49] (step=0023700) Train Loss: -3.6848, Train Steps/Sec: 0.53
265
+ [2026-02-03 18:56:56] (step=0023800) Train Loss: -3.6824, Train Steps/Sec: 0.54
266
+ [2026-02-03 19:00:03] (step=0023900) Train Loss: -3.6820, Train Steps/Sec: 0.54
267
+ [2026-02-03 19:03:09] (step=0024000) Train Loss: -3.6848, Train Steps/Sec: 0.54
268
+ [2026-02-03 19:06:16] (step=0024100) Train Loss: -3.6791, Train Steps/Sec: 0.54
269
+ [2026-02-03 19:09:22] (step=0024200) Train Loss: -3.6825, Train Steps/Sec: 0.54
270
+ [2026-02-03 19:12:30] (step=0024300) Train Loss: -3.6800, Train Steps/Sec: 0.53
271
+ [2026-02-03 19:15:35] (step=0024400) Train Loss: -3.6792, Train Steps/Sec: 0.54
272
+ [2026-02-03 19:18:42] (step=0024500) Train Loss: -3.6807, Train Steps/Sec: 0.53
273
+ [2026-02-03 19:21:49] (step=0024600) Train Loss: -3.6796, Train Steps/Sec: 0.53
274
+ [2026-02-03 19:24:56] (step=0024700) Train Loss: -3.6814, Train Steps/Sec: 0.53
275
+ [2026-02-03 19:28:03] (step=0024800) Train Loss: -3.6832, Train Steps/Sec: 0.54
276
+ [2026-02-03 19:31:10] (step=0024900) Train Loss: -3.6832, Train Steps/Sec: 0.54
277
+ [2026-02-03 19:34:18] (step=0025000) Train Loss: -3.6782, Train Steps/Sec: 0.53
278
+ 25000
279
+ 25000
280
+ 25000
281
+ 25000
282
+ [2026-02-03 19:34:18] Saved checkpoint to results_256_gvp_disp/depth-mu-2-002-SiT-XL-2-GVP-velocity-None/checkpoints/0025000.pt
283
+ [2026-02-03 19:34:56] Beginning epoch 10...
284
+ [2026-02-03 19:37:29] (step=0025100) Train Loss: -3.6836, Train Steps/Sec: 0.52
285
+ [2026-02-03 19:40:21] Generating EMA samples...
286
+ [2026-02-03 19:40:36] (step=0025200) Train Loss: -3.6796, Train Steps/Sec: 0.53
287
+ [2026-02-03 19:43:43] (step=0025300) Train Loss: -3.6818, Train Steps/Sec: 0.53
288
+ [2026-02-03 19:46:50] (step=0025400) Train Loss: -3.6789, Train Steps/Sec: 0.54
289
+ [2026-02-03 19:49:58] (step=0025500) Train Loss: -3.6817, Train Steps/Sec: 0.53
290
+ [2026-02-03 19:53:05] (step=0025600) Train Loss: -3.6804, Train Steps/Sec: 0.53
291
+ [2026-02-03 19:56:11] (step=0025700) Train Loss: -3.6800, Train Steps/Sec: 0.54
292
+ [2026-02-03 19:59:19] (step=0025800) Train Loss: -3.6832, Train Steps/Sec: 0.53
293
+ [2026-02-03 20:02:25] (step=0025900) Train Loss: -3.6825, Train Steps/Sec: 0.54
294
+ [2026-02-03 20:05:32] (step=0026000) Train Loss: -3.6812, Train Steps/Sec: 0.54
295
+ [2026-02-03 20:08:39] (step=0026100) Train Loss: -3.6827, Train Steps/Sec: 0.54
296
+ [2026-02-03 20:11:47] (step=0026200) Train Loss: -3.6793, Train Steps/Sec: 0.53
297
+ [2026-02-03 20:14:54] (step=0026300) Train Loss: -3.6817, Train Steps/Sec: 0.53
298
+ [2026-02-03 20:18:01] (step=0026400) Train Loss: -3.6813, Train Steps/Sec: 0.54
299
+ [2026-02-03 20:21:07] (step=0026500) Train Loss: -3.6806, Train Steps/Sec: 0.54
300
+ [2026-02-03 20:24:14] (step=0026600) Train Loss: -3.6842, Train Steps/Sec: 0.54
301
+ [2026-02-03 20:27:20] (step=0026700) Train Loss: -3.6809, Train Steps/Sec: 0.54
302
+ [2026-02-03 20:30:27] (step=0026800) Train Loss: -3.6849, Train Steps/Sec: 0.53
303
+ [2026-02-03 20:33:34] (step=0026900) Train Loss: -3.6802, Train Steps/Sec: 0.53
304
+ [2026-02-03 20:36:39] (step=0027000) Train Loss: -3.6792, Train Steps/Sec: 0.54
305
+ [2026-02-03 20:39:46] (step=0027100) Train Loss: -3.6843, Train Steps/Sec: 0.54
306
+ [2026-02-03 20:42:52] (step=0027200) Train Loss: -3.6821, Train Steps/Sec: 0.54
307
+ [2026-02-03 20:45:59] (step=0027300) Train Loss: -3.6825, Train Steps/Sec: 0.54
308
+ [2026-02-03 20:49:06] (step=0027400) Train Loss: -3.6775, Train Steps/Sec: 0.54
309
+ [2026-02-03 20:52:12] (step=0027500) Train Loss: -3.6800, Train Steps/Sec: 0.54
310
+ [2026-02-03 20:52:54] Beginning epoch 11...
311
+ [2026-02-03 20:55:23] (step=0027600) Train Loss: -3.6853, Train Steps/Sec: 0.53
312
+ [2026-02-03 20:58:29] (step=0027700) Train Loss: -3.6817, Train Steps/Sec: 0.54
313
+ [2026-02-03 21:01:37] (step=0027800) Train Loss: -3.6811, Train Steps/Sec: 0.53
314
+ [2026-02-03 21:04:43] (step=0027900) Train Loss: -3.6810, Train Steps/Sec: 0.54
315
+ [2026-02-03 21:07:50] (step=0028000) Train Loss: -3.6827, Train Steps/Sec: 0.53
316
+ [2026-02-03 21:10:57] (step=0028100) Train Loss: -3.6839, Train Steps/Sec: 0.53
317
+ [2026-02-03 21:14:04] (step=0028200) Train Loss: -3.6817, Train Steps/Sec: 0.54
318
+ [2026-02-03 21:17:11] (step=0028300) Train Loss: -3.6830, Train Steps/Sec: 0.53
319
+ [2026-02-03 21:20:18] (step=0028400) Train Loss: -3.6797, Train Steps/Sec: 0.53
320
+ [2026-02-03 21:23:25] (step=0028500) Train Loss: -3.6797, Train Steps/Sec: 0.53
321
+ [2026-02-03 21:26:32] (step=0028600) Train Loss: -3.6821, Train Steps/Sec: 0.54
322
+ [2026-02-03 21:29:39] (step=0028700) Train Loss: -3.6823, Train Steps/Sec: 0.54
323
+ [2026-02-03 21:32:45] (step=0028800) Train Loss: -3.6812, Train Steps/Sec: 0.54
324
+ [2026-02-03 21:35:53] (step=0028900) Train Loss: -3.6858, Train Steps/Sec: 0.53
325
+ [2026-02-03 21:38:59] (step=0029000) Train Loss: -3.6842, Train Steps/Sec: 0.54
326
+ [2026-02-03 21:42:06] (step=0029100) Train Loss: -3.6836, Train Steps/Sec: 0.54
327
+ [2026-02-03 21:45:14] (step=0029200) Train Loss: -3.6813, Train Steps/Sec: 0.53
328
+ [2026-02-03 21:48:20] (step=0029300) Train Loss: -3.6783, Train Steps/Sec: 0.54
329
+ [2026-02-03 21:51:27] (step=0029400) Train Loss: -3.6829, Train Steps/Sec: 0.53
330
+ [2026-02-03 21:54:34] (step=0029500) Train Loss: -3.6812, Train Steps/Sec: 0.54
331
+ [2026-02-03 21:57:39] (step=0029600) Train Loss: -3.6823, Train Steps/Sec: 0.54
332
+ [2026-02-03 22:00:46] (step=0029700) Train Loss: -3.6828, Train Steps/Sec: 0.53
333
+ [2026-02-03 22:03:53] (step=0029800) Train Loss: -3.6826, Train Steps/Sec: 0.54
334
+ [2026-02-03 22:06:59] (step=0029900) Train Loss: -3.6814, Train Steps/Sec: 0.54
335
+ [2026-02-03 22:10:06] (step=0030000) Train Loss: -3.6837, Train Steps/Sec: 0.54
336
+ [2026-02-03 22:10:51] Beginning epoch 12...
337
+ [2026-02-03 22:13:16] (step=0030100) Train Loss: -3.6822, Train Steps/Sec: 0.53
338
+ [2026-02-03 22:16:22] (step=0030200) Train Loss: -3.6787, Train Steps/Sec: 0.54
339
+ [2026-02-03 22:19:29] (step=0030300) Train Loss: -3.6815, Train Steps/Sec: 0.53
340
+ [2026-02-03 22:22:37] (step=0030400) Train Loss: -3.6806, Train Steps/Sec: 0.53
341
+ [2026-02-03 22:25:44] (step=0030500) Train Loss: -3.6825, Train Steps/Sec: 0.53
342
+ [2026-02-03 22:28:51] (step=0030600) Train Loss: -3.6811, Train Steps/Sec: 0.54
343
+ [2026-02-03 22:31:58] (step=0030700) Train Loss: -3.6838, Train Steps/Sec: 0.54
344
+ [2026-02-03 22:35:05] (step=0030800) Train Loss: -3.6822, Train Steps/Sec: 0.53
345
+ [2026-02-03 22:38:11] (step=0030900) Train Loss: -3.6823, Train Steps/Sec: 0.54
346
+ [2026-02-03 22:41:18] (step=0031000) Train Loss: -3.6815, Train Steps/Sec: 0.54
347
+ [2026-02-03 22:44:25] (step=0031100) Train Loss: -3.6796, Train Steps/Sec: 0.53
348
+ [2026-02-03 22:47:32] (step=0031200) Train Loss: -3.6812, Train Steps/Sec: 0.53
349
+ [2026-02-03 22:50:39] (step=0031300) Train Loss: -3.6806, Train Steps/Sec: 0.53
350
+ [2026-02-03 22:53:46] (step=0031400) Train Loss: -3.6822, Train Steps/Sec: 0.53
351
+ [2026-02-03 22:56:53] (step=0031500) Train Loss: -3.6821, Train Steps/Sec: 0.54
352
+ [2026-02-03 23:00:00] (step=0031600) Train Loss: -3.6803, Train Steps/Sec: 0.53
353
+ [2026-02-03 23:03:07] (step=0031700) Train Loss: -3.6843, Train Steps/Sec: 0.53
354
+ [2026-02-03 23:06:14] (step=0031800) Train Loss: -3.6832, Train Steps/Sec: 0.53
355
+ [2026-02-03 23:09:21] (step=0031900) Train Loss: -3.6809, Train Steps/Sec: 0.54
356
+ [2026-02-03 23:12:28] (step=0032000) Train Loss: -3.6822, Train Steps/Sec: 0.54
357
+ [2026-02-03 23:15:34] (step=0032100) Train Loss: -3.6786, Train Steps/Sec: 0.54
358
+ [2026-02-03 23:18:39] (step=0032200) Train Loss: -3.6814, Train Steps/Sec: 0.54
359
+ [2026-02-03 23:21:46] (step=0032300) Train Loss: -3.6839, Train Steps/Sec: 0.54
360
+ [2026-02-03 23:24:52] (step=0032400) Train Loss: -3.6822, Train Steps/Sec: 0.54
361
+ [2026-02-03 23:27:59] (step=0032500) Train Loss: -3.6809, Train Steps/Sec: 0.53
362
+ [2026-02-03 23:28:48] Beginning epoch 13...
363
+ [2026-02-03 23:31:09] (step=0032600) Train Loss: -3.6846, Train Steps/Sec: 0.53
364
+ [2026-02-03 23:34:16] (step=0032700) Train Loss: -3.6841, Train Steps/Sec: 0.53
365
+ [2026-02-03 23:37:24] (step=0032800) Train Loss: -3.6813, Train Steps/Sec: 0.53
366
+ [2026-02-03 23:40:31] (step=0032900) Train Loss: -3.6792, Train Steps/Sec: 0.53
367
+ [2026-02-03 23:43:38] (step=0033000) Train Loss: -3.6782, Train Steps/Sec: 0.53
368
+ [2026-02-03 23:46:45] (step=0033100) Train Loss: -3.6821, Train Steps/Sec: 0.54
369
+ [2026-02-03 23:49:52] (step=0033200) Train Loss: -3.6819, Train Steps/Sec: 0.53
370
+ [2026-02-03 23:52:59] (step=0033300) Train Loss: -3.6793, Train Steps/Sec: 0.54
371
+ [2026-02-03 23:56:06] (step=0033400) Train Loss: -3.6810, Train Steps/Sec: 0.54
372
+ [2026-02-03 23:59:13] (step=0033500) Train Loss: -3.6816, Train Steps/Sec: 0.53
373
+ [2026-02-04 00:02:20] (step=0033600) Train Loss: -3.6831, Train Steps/Sec: 0.54
374
+ [2026-02-04 00:05:26] (step=0033700) Train Loss: -3.6831, Train Steps/Sec: 0.54
375
+ [2026-02-04 00:08:33] (step=0033800) Train Loss: -3.6826, Train Steps/Sec: 0.54
376
+ [2026-02-04 00:11:40] (step=0033900) Train Loss: -3.6804, Train Steps/Sec: 0.54
377
+ [2026-02-04 00:14:46] (step=0034000) Train Loss: -3.6789, Train Steps/Sec: 0.54
378
+ [2026-02-04 00:17:54] (step=0034100) Train Loss: -3.6814, Train Steps/Sec: 0.53
379
+ [2026-02-04 00:21:00] (step=0034200) Train Loss: -3.6805, Train Steps/Sec: 0.54
380
+ [2026-02-04 00:24:07] (step=0034300) Train Loss: -3.6837, Train Steps/Sec: 0.53
381
+ [2026-02-04 00:27:14] (step=0034400) Train Loss: -3.6817, Train Steps/Sec: 0.54
382
+ [2026-02-04 00:30:20] (step=0034500) Train Loss: -3.6811, Train Steps/Sec: 0.54
383
+ [2026-02-04 00:33:27] (step=0034600) Train Loss: -3.6821, Train Steps/Sec: 0.54
384
+ [2026-02-04 00:36:34] (step=0034700) Train Loss: -3.6799, Train Steps/Sec: 0.54
385
+ [2026-02-04 00:39:38] (step=0034800) Train Loss: -3.6823, Train Steps/Sec: 0.54
386
+ [2026-02-04 00:42:45] (step=0034900) Train Loss: -3.6820, Train Steps/Sec: 0.54
387
+ [2026-02-04 00:45:52] (step=0035000) Train Loss: -3.6818, Train Steps/Sec: 0.54
388
+ [2026-02-04 00:46:45] Beginning epoch 14...
389
+ [2026-02-04 00:49:01] (step=0035100) Train Loss: -3.6794, Train Steps/Sec: 0.53
390
+ [2026-02-04 00:52:08] (step=0035200) Train Loss: -3.6804, Train Steps/Sec: 0.54
391
+ [2026-02-04 00:55:15] (step=0035300) Train Loss: -3.6825, Train Steps/Sec: 0.53
392
+ [2026-02-04 00:58:22] (step=0035400) Train Loss: -3.6817, Train Steps/Sec: 0.53
393
+ [2026-02-04 01:01:29] (step=0035500) Train Loss: -3.6840, Train Steps/Sec: 0.54
394
+ [2026-02-04 01:04:35] (step=0035600) Train Loss: -3.6811, Train Steps/Sec: 0.54
395
+ [2026-02-04 01:07:42] (step=0035700) Train Loss: -3.6796, Train Steps/Sec: 0.53
396
+ [2026-02-04 01:10:50] (step=0035800) Train Loss: -3.6834, Train Steps/Sec: 0.53
397
+ [2026-02-04 01:13:56] (step=0035900) Train Loss: -3.6763, Train Steps/Sec: 0.54
398
+ [2026-02-04 01:17:03] (step=0036000) Train Loss: -3.6837, Train Steps/Sec: 0.53
399
+ [2026-02-04 01:20:10] (step=0036100) Train Loss: -3.6806, Train Steps/Sec: 0.53
400
+ [2026-02-04 01:23:18] (step=0036200) Train Loss: -3.6821, Train Steps/Sec: 0.53
401
+ [2026-02-04 01:26:24] (step=0036300) Train Loss: -3.6772, Train Steps/Sec: 0.54
402
+ [2026-02-04 01:29:31] (step=0036400) Train Loss: -3.6822, Train Steps/Sec: 0.54
403
+ [2026-02-04 01:32:38] (step=0036500) Train Loss: -3.6816, Train Steps/Sec: 0.54
404
+ [2026-02-04 01:35:45] (step=0036600) Train Loss: -3.6792, Train Steps/Sec: 0.53
405
+ [2026-02-04 01:38:51] (step=0036700) Train Loss: -3.6817, Train Steps/Sec: 0.54
406
+ [2026-02-04 01:41:59] (step=0036800) Train Loss: -3.6835, Train Steps/Sec: 0.53
407
+ [2026-02-04 01:45:05] (step=0036900) Train Loss: -3.6823, Train Steps/Sec: 0.54
408
+ [2026-02-04 01:48:12] (step=0037000) Train Loss: -3.6818, Train Steps/Sec: 0.54
409
+ [2026-02-04 01:51:18] (step=0037100) Train Loss: -3.6775, Train Steps/Sec: 0.54
410
+ [2026-02-04 01:54:25] (step=0037200) Train Loss: -3.6796, Train Steps/Sec: 0.54
411
+ [2026-02-04 01:57:31] (step=0037300) Train Loss: -3.6806, Train Steps/Sec: 0.54
412
+ [2026-02-04 02:00:38] (step=0037400) Train Loss: -3.6811, Train Steps/Sec: 0.54
413
+ [2026-02-04 02:03:43] (step=0037500) Train Loss: -3.6808, Train Steps/Sec: 0.54
414
+ [2026-02-04 02:04:39] Beginning epoch 15...
415
+ [2026-02-04 02:06:52] (step=0037600) Train Loss: -3.6847, Train Steps/Sec: 0.53
416
+ [2026-02-04 02:10:00] (step=0037700) Train Loss: -3.6837, Train Steps/Sec: 0.53
417
+ [2026-02-04 02:13:06] (step=0037800) Train Loss: -3.6796, Train Steps/Sec: 0.54
418
+ [2026-02-04 02:16:13] (step=0037900) Train Loss: -3.6804, Train Steps/Sec: 0.54
419
+ [2026-02-04 02:19:20] (step=0038000) Train Loss: -3.6825, Train Steps/Sec: 0.54
420
+ [2026-02-04 02:22:26] (step=0038100) Train Loss: -3.6803, Train Steps/Sec: 0.54
421
+ [2026-02-04 02:25:33] (step=0038200) Train Loss: -3.6813, Train Steps/Sec: 0.54
422
+ [2026-02-04 02:28:40] (step=0038300) Train Loss: -3.6798, Train Steps/Sec: 0.53
423
+ [2026-02-04 02:31:47] (step=0038400) Train Loss: -3.6797, Train Steps/Sec: 0.53
424
+ [2026-02-04 02:34:54] (step=0038500) Train Loss: -3.6817, Train Steps/Sec: 0.54
425
+ [2026-02-04 02:38:01] (step=0038600) Train Loss: -3.6818, Train Steps/Sec: 0.54
426
+ [2026-02-04 02:41:08] (step=0038700) Train Loss: -3.6824, Train Steps/Sec: 0.54
427
+ [2026-02-04 02:44:14] (step=0038800) Train Loss: -3.6800, Train Steps/Sec: 0.54
428
+ [2026-02-04 02:47:22] (step=0038900) Train Loss: -3.6812, Train Steps/Sec: 0.53
429
+ [2026-02-04 02:50:28] (step=0039000) Train Loss: -3.6826, Train Steps/Sec: 0.54
430
+ [2026-02-04 02:53:35] (step=0039100) Train Loss: -3.6807, Train Steps/Sec: 0.53
431
+ [2026-02-04 02:56:42] (step=0039200) Train Loss: -3.6831, Train Steps/Sec: 0.54
432
+ [2026-02-04 02:59:48] (step=0039300) Train Loss: -3.6822, Train Steps/Sec: 0.54
433
+ [2026-02-04 03:02:55] (step=0039400) Train Loss: -3.6803, Train Steps/Sec: 0.54
434
+ [2026-02-04 03:06:01] (step=0039500) Train Loss: -3.6815, Train Steps/Sec: 0.54
435
+ [2026-02-04 03:09:08] (step=0039600) Train Loss: -3.6830, Train Steps/Sec: 0.53
436
+ [2026-02-04 03:12:15] (step=0039700) Train Loss: -3.6771, Train Steps/Sec: 0.54
437
+ [2026-02-04 03:15:21] (step=0039800) Train Loss: -3.6791, Train Steps/Sec: 0.54
438
+ [2026-02-04 03:18:28] (step=0039900) Train Loss: -3.6797, Train Steps/Sec: 0.54
439
+ [2026-02-04 03:21:34] (step=0040000) Train Loss: -3.6815, Train Steps/Sec: 0.54
440
+ [2026-02-04 03:22:33] Beginning epoch 16...
441
+ [2026-02-04 03:24:43] (step=0040100) Train Loss: -3.6799, Train Steps/Sec: 0.53
442
+ [2026-02-04 03:27:50] (step=0040200) Train Loss: -3.6823, Train Steps/Sec: 0.53
443
+ [2026-02-04 03:30:57] (step=0040300) Train Loss: -3.6805, Train Steps/Sec: 0.53
444
+ [2026-02-04 03:34:04] (step=0040400) Train Loss: -3.6829, Train Steps/Sec: 0.54
445
+ [2026-02-04 03:37:11] (step=0040500) Train Loss: -3.6786, Train Steps/Sec: 0.53
446
+ [2026-02-04 03:40:18] (step=0040600) Train Loss: -3.6811, Train Steps/Sec: 0.54
447
+ [2026-02-04 03:43:24] (step=0040700) Train Loss: -3.6804, Train Steps/Sec: 0.54
448
+ [2026-02-04 03:46:32] (step=0040800) Train Loss: -3.6860, Train Steps/Sec: 0.53
449
+ [2026-02-04 03:49:38] (step=0040900) Train Loss: -3.6804, Train Steps/Sec: 0.54
450
+ [2026-02-04 03:52:44] (step=0041000) Train Loss: -3.6803, Train Steps/Sec: 0.54
451
+ [2026-02-04 03:55:52] (step=0041100) Train Loss: -3.6803, Train Steps/Sec: 0.53
452
+ [2026-02-04 03:58:59] (step=0041200) Train Loss: -3.6801, Train Steps/Sec: 0.53
453
+ [2026-02-04 04:02:06] (step=0041300) Train Loss: -3.6794, Train Steps/Sec: 0.53
454
+ [2026-02-04 04:05:14] (step=0041400) Train Loss: -3.6816, Train Steps/Sec: 0.53
455
+ [2026-02-04 04:08:20] (step=0041500) Train Loss: -3.6858, Train Steps/Sec: 0.54
456
+ [2026-02-04 04:11:27] (step=0041600) Train Loss: -3.6811, Train Steps/Sec: 0.53
457
+ [2026-02-04 04:14:34] (step=0041700) Train Loss: -3.6859, Train Steps/Sec: 0.53
458
+ [2026-02-04 04:17:41] (step=0041800) Train Loss: -3.6823, Train Steps/Sec: 0.54
459
+ [2026-02-04 04:20:47] (step=0041900) Train Loss: -3.6838, Train Steps/Sec: 0.54
460
+ [2026-02-04 04:23:54] (step=0042000) Train Loss: -3.6809, Train Steps/Sec: 0.54
461
+ [2026-02-04 04:27:00] (step=0042100) Train Loss: -3.6781, Train Steps/Sec: 0.54
462
+ [2026-02-04 04:30:07] (step=0042200) Train Loss: -3.6826, Train Steps/Sec: 0.54
463
+ [2026-02-04 04:33:13] (step=0042300) Train Loss: -3.6835, Train Steps/Sec: 0.54
464
+ [2026-02-04 04:36:20] (step=0042400) Train Loss: -3.6816, Train Steps/Sec: 0.54
465
+ [2026-02-04 04:39:27] (step=0042500) Train Loss: -3.6802, Train Steps/Sec: 0.53
466
+ [2026-02-04 04:40:31] Beginning epoch 17...
467
+ [2026-02-04 04:42:37] (step=0042600) Train Loss: -3.6831, Train Steps/Sec: 0.53
468
+ [2026-02-04 04:45:42] (step=0042700) Train Loss: -3.6778, Train Steps/Sec: 0.54
469
+ [2026-02-04 04:48:48] (step=0042800) Train Loss: -3.6846, Train Steps/Sec: 0.54
470
+ [2026-02-04 04:51:55] (step=0042900) Train Loss: -3.6827, Train Steps/Sec: 0.53
471
+ [2026-02-04 04:55:02] (step=0043000) Train Loss: -3.6820, Train Steps/Sec: 0.54
472
+ [2026-02-04 04:58:08] (step=0043100) Train Loss: -3.6803, Train Steps/Sec: 0.54
473
+ [2026-02-04 05:01:15] (step=0043200) Train Loss: -3.6808, Train Steps/Sec: 0.54
474
+ [2026-02-04 05:04:22] (step=0043300) Train Loss: -3.6838, Train Steps/Sec: 0.53
475
+ [2026-02-04 05:07:29] (step=0043400) Train Loss: -3.6809, Train Steps/Sec: 0.54
476
+ [2026-02-04 05:10:36] (step=0043500) Train Loss: -3.6757, Train Steps/Sec: 0.53
477
+ [2026-02-04 05:13:43] (step=0043600) Train Loss: -3.6808, Train Steps/Sec: 0.54
478
+ [2026-02-04 05:16:50] (step=0043700) Train Loss: -3.6807, Train Steps/Sec: 0.54
479
+ [2026-02-04 05:19:56] (step=0043800) Train Loss: -3.6825, Train Steps/Sec: 0.54
480
+ [2026-02-04 05:23:03] (step=0043900) Train Loss: -3.6811, Train Steps/Sec: 0.53
481
+ [2026-02-04 05:26:10] (step=0044000) Train Loss: -3.6819, Train Steps/Sec: 0.54
482
+ [2026-02-04 05:29:17] (step=0044100) Train Loss: -3.6801, Train Steps/Sec: 0.54
483
+ [2026-02-04 05:32:24] (step=0044200) Train Loss: -3.6785, Train Steps/Sec: 0.54
484
+ [2026-02-04 05:35:31] (step=0044300) Train Loss: -3.6841, Train Steps/Sec: 0.53
485
+ [2026-02-04 05:38:38] (step=0044400) Train Loss: -3.6841, Train Steps/Sec: 0.53
486
+ [2026-02-04 05:41:01] (step=0044500) Train Loss: -3.6791, Train Steps/Sec: 0.70
487
+ [2026-02-04 05:42:24] (step=0044600) Train Loss: -3.6843, Train Steps/Sec: 1.20
488
+ [2026-02-04 05:43:47] (step=0044700) Train Loss: -3.6815, Train Steps/Sec: 1.21
489
+ [2026-02-04 05:45:10] (step=0044800) Train Loss: -3.6785, Train Steps/Sec: 1.21
490
+ [2026-02-04 05:46:33] (step=0044900) Train Loss: -3.6820, Train Steps/Sec: 1.21
491
+ [2026-02-04 05:47:56] (step=0045000) Train Loss: -3.6847, Train Steps/Sec: 1.20
492
+ [2026-02-04 05:48:26] Beginning epoch 18...
493
+ [2026-02-04 05:49:22] (step=0045100) Train Loss: -3.6816, Train Steps/Sec: 1.16
494
+ [2026-02-04 05:50:45] (step=0045200) Train Loss: -3.6834, Train Steps/Sec: 1.20
495
+ [2026-02-04 05:52:08] (step=0045300) Train Loss: -3.6787, Train Steps/Sec: 1.21
496
+ [2026-02-04 05:53:31] (step=0045400) Train Loss: -3.6844, Train Steps/Sec: 1.20
497
+ [2026-02-04 05:54:54] (step=0045500) Train Loss: -3.6823, Train Steps/Sec: 1.20
498
+ [2026-02-04 05:56:17] (step=0045600) Train Loss: -3.6806, Train Steps/Sec: 1.20
499
+ [2026-02-04 05:57:40] (step=0045700) Train Loss: -3.6797, Train Steps/Sec: 1.21
500
+ [2026-02-04 05:59:03] (step=0045800) Train Loss: -3.6819, Train Steps/Sec: 1.20
501
+ [2026-02-04 06:00:26] (step=0045900) Train Loss: -3.6807, Train Steps/Sec: 1.20
502
+ [2026-02-04 06:01:49] (step=0046000) Train Loss: -3.6814, Train Steps/Sec: 1.21
503
+ [2026-02-04 06:03:12] (step=0046100) Train Loss: -3.6827, Train Steps/Sec: 1.21
504
+ [2026-02-04 06:04:35] (step=0046200) Train Loss: -3.6824, Train Steps/Sec: 1.20
505
+ [2026-02-04 06:05:58] (step=0046300) Train Loss: -3.6825, Train Steps/Sec: 1.20
506
+ [2026-02-04 06:07:21] (step=0046400) Train Loss: -3.6826, Train Steps/Sec: 1.20
507
+ [2026-02-04 06:08:44] (step=0046500) Train Loss: -3.6778, Train Steps/Sec: 1.20
508
+ [2026-02-04 06:10:07] (step=0046600) Train Loss: -3.6820, Train Steps/Sec: 1.20
509
+ [2026-02-04 06:11:30] (step=0046700) Train Loss: -3.6830, Train Steps/Sec: 1.21
510
+ [2026-02-04 06:12:53] (step=0046800) Train Loss: -3.6808, Train Steps/Sec: 1.20
511
+ [2026-02-04 06:14:16] (step=0046900) Train Loss: -3.6812, Train Steps/Sec: 1.20
512
+ [2026-02-04 06:15:39] (step=0047000) Train Loss: -3.6836, Train Steps/Sec: 1.20
513
+ [2026-02-04 06:17:02] (step=0047100) Train Loss: -3.6806, Train Steps/Sec: 1.20
514
+ [2026-02-04 06:18:25] (step=0047200) Train Loss: -3.6813, Train Steps/Sec: 1.20
515
+ [2026-02-04 06:19:48] (step=0047300) Train Loss: -3.6828, Train Steps/Sec: 1.20
516
+ [2026-02-04 06:21:11] (step=0047400) Train Loss: -3.6842, Train Steps/Sec: 1.21
517
+ m] (step=0048000) Train Loss: -2.9915, Train Steps/Sec: 1.02
518
+ [2026-02-03 20:03:00] (step=0048100) Train Loss: -2.9858, Train Steps/Sec: 1.02
519
+ [2026-02-03 20:04:38] (step=0048200) Train Loss: -2.9865, Train Steps/Sec: 1.02
520
+ [2026-02-03 20:06:16] (step=0048300) Train Loss: -2.9887, Train Steps/Sec: 1.02
521
+ [2026-02-03 20:07:54] (step=0048400) Train Loss: -2.9904, Train Steps/Sec: 1.02
522
+ [2026-02-03 20:09:32] (step=0048500) Train Loss: -2.9878, Train Steps/Sec: 1.02
523
+ [2026-02-03 20:11:10] (step=0048600) Train Loss: -2.9867, Train Steps/Sec: 1.02
524
+ [2026-02-03 20:12:48] (step=0048700) Train Loss: -2.9867, Train Steps/Sec: 1.02
525
+ [2026-02-03 20:14:26] (step=0048800) Train Loss: -2.9853, Train Steps/Sec: 1.02
526
+ [2026-02-03 20:16:04] (step=0048900) Train Loss: -2.9901, Train Steps/Sec: 1.02
527
+ [2026-02-03 20:17:41] (step=0049000) Train Loss: -2.9853, Train Steps/Sec: 1.02
528
+ [2026-02-03 20:19:19] (step=0049100) Train Loss: -2.9849, Train Steps/Sec: 1.02
529
+ [2026-02-03 20:20:57] (step=0049200) Train Loss: -2.9873, Train Steps/Sec: 1.02
530
+ [2026-02-03 20:22:35] (step=0049300) Train Loss: -2.9865, Train Steps/Sec: 1.02
531
+ [2026-02-03 20:24:12] (step=0049400) Train Loss: -2.9888, Train Steps/Sec: 1.03
532
+ [2026-02-03 20:25:51] (step=0049500) Train Loss: -2.9911, Train Steps/Sec: 1.02
533
+ [2026-02-03 20:27:29] (step=0049600) Train Loss: -2.9876, Train Steps/Sec: 1.02
534
+ [2026-02-03 20:29:07] (step=0049700) Train Loss: -2.9921, Train Steps/Sec: 1.02
535
+ [2026-02-03 20:30:45] (step=0049800) Train Loss: -2.9890, Train Steps/Sec: 1.02
536
+ [2026-02-03 20:32:23] (step=0049900) Train Loss: -2.9805, Train Steps/Sec: 1.02
537
+ [2026-02-03 20:34:01] (step=0050000) Train Loss: -2.9891, Train Steps/Sec: 1.02
538
+ 50000
539
+ 50000
540
+ 50000
541
+ 50000
542
+ [2026-02-03 20:34:02] Saved checkpoint to results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None/checkpoints/0050000.pt
543
+ [2026-02-03 20:34:41] Beginning epoch 10...
544
+ [2026-02-03 20:35:42] (step=0050100) Train Loss: -2.9896, Train Steps/Sec: 0.99
545
+ [2026-02-03 20:37:20] (step=0050200) Train Loss: -2.9903, Train Steps/Sec: 1.02
546
+ [2026-02-03 20:38:58] (step=0050300) Train Loss: -2.9894, Train Steps/Sec: 1.02
547
+ [2026-02-03 20:40:20] Generating EMA samples...
548
+ [2026-02-03 20:40:35] (step=0050400) Train Loss: -2.9846, Train Steps/Sec: 1.03
549
+ [2026-02-03 20:42:13] (step=0050500) Train Loss: -2.9897, Train Steps/Sec: 1.02
550
+ [2026-02-03 20:43:51] (step=0050600) Train Loss: -2.9880, Train Steps/Sec: 1.02
551
+ [2026-02-03 20:45:29] (step=0050700) Train Loss: -2.9868, Train Steps/Sec: 1.02
552
+ [2026-02-03 20:47:07] (step=0050800) Train Loss: -2.9852, Train Steps/Sec: 1.02
553
+ [2026-02-03 20:48:44] (step=0050900) Train Loss: -2.9878, Train Steps/Sec: 1.03
554
+ [2026-02-03 20:50:21] (step=0051000) Train Loss: -2.9897, Train Steps/Sec: 1.03
555
+ [2026-02-03 20:52:00] (step=0051100) Train Loss: -2.9875, Train Steps/Sec: 1.02
556
+ [2026-02-03 20:53:34] (step=0051200) Train Loss: -2.9882, Train Steps/Sec: 1.06
557
+ [2026-02-03 20:55:12] (step=0051300) Train Loss: -2.9856, Train Steps/Sec: 1.02
558
+ [2026-02-03 20:56:50] (step=0051400) Train Loss: -2.9870, Train Steps/Sec: 1.02
559
+ [2026-02-03 20:58:28] (step=0051500) Train Loss: -2.9897, Train Steps/Sec: 1.02
560
+ [2026-02-03 21:00:06] (step=0051600) Train Loss: -2.9896, Train Steps/Sec: 1.02
561
+ [2026-02-03 21:01:44] (step=0051700) Train Loss: -2.9903, Train Steps/Sec: 1.02
562
+ [2026-02-03 21:03:22] (step=0051800) Train Loss: -2.9894, Train Steps/Sec: 1.02
563
+ [2026-02-03 21:04:59] (step=0051900) Train Loss: -2.9867, Train Steps/Sec: 1.02
564
+ [2026-02-03 21:06:38] (step=0052000) Train Loss: -2.9898, Train Steps/Sec: 1.02
565
+ [2026-02-03 21:08:16] (step=0052100) Train Loss: -2.9899, Train Steps/Sec: 1.02
566
+ [2026-02-03 21:09:54] (step=0052200) Train Loss: -2.9888, Train Steps/Sec: 1.02
567
+ [2026-02-03 21:11:31] (step=0052300) Train Loss: -2.9868, Train Steps/Sec: 1.03
568
+ [2026-02-03 21:13:09] (step=0052400) Train Loss: -2.9857, Train Steps/Sec: 1.02
569
+ [2026-02-03 21:14:47] (step=0052500) Train Loss: -2.9898, Train Steps/Sec: 1.03
570
+ [2026-02-03 21:16:25] (step=0052600) Train Loss: -2.9875, Train Steps/Sec: 1.02
571
+ [2026-02-03 21:18:03] (step=0052700) Train Loss: -2.9874, Train Steps/Sec: 1.02
572
+ [2026-02-03 21:19:40] (step=0052800) Train Loss: -2.9890, Train Steps/Sec: 1.02
573
+ [2026-02-03 21:21:18] (step=0052900) Train Loss: -2.9866, Train Steps/Sec: 1.02
574
+ [2026-02-03 21:22:56] (step=0053000) Train Loss: -2.9871, Train Steps/Sec: 1.02
575
+ [2026-02-03 21:24:34] (step=0053100) Train Loss: -2.9894, Train Steps/Sec: 1.02
576
+ [2026-02-03 21:26:12] (step=0053200) Train Loss: -2.9921, Train Steps/Sec: 1.02
577
+ [2026-02-03 21:27:49] (step=0053300) Train Loss: -2.9907, Train Steps/Sec: 1.02
578
+ [2026-02-03 21:29:27] (step=0053400) Train Loss: -2.9859, Train Steps/Sec: 1.02
579
+ [2026-02-03 21:31:05] (step=0053500) Train Loss: -2.9909, Train Steps/Sec: 1.02
580
+ [2026-02-03 21:32:43] (step=0053600) Train Loss: -2.9928, Train Steps/Sec: 1.02
581
+ [2026-02-03 21:34:21] (step=0053700) Train Loss: -2.9861, Train Steps/Sec: 1.02
582
+ [2026-02-03 21:35:59] (step=0053800) Train Loss: -2.9867, Train Steps/Sec: 1.02
583
+ [2026-02-03 21:37:37] (step=0053900) Train Loss: -2.9883, Train Steps/Sec: 1.02
584
+ [2026-02-03 21:39:15] (step=0054000) Train Loss: -2.9844, Train Steps/Sec: 1.02
585
+ [2026-02-03 21:40:53] (step=0054100) Train Loss: -2.9903, Train Steps/Sec: 1.02
586
+ [2026-02-03 21:42:31] (step=0054200) Train Loss: -2.9911, Train Steps/Sec: 1.02
587
+ [2026-02-03 21:44:09] (step=0054300) Train Loss: -2.9915, Train Steps/Sec: 1.02
588
+ [2026-02-03 21:45:47] (step=0054400) Train Loss: -2.9865, Train Steps/Sec: 1.02
589
+ [2026-02-03 21:47:24] (step=0054500) Train Loss: -2.9854, Train Steps/Sec: 1.03
590
+ [2026-02-03 21:49:02] (step=0054600) Train Loss: -2.9923, Train Steps/Sec: 1.02
591
+ [2026-02-03 21:50:39] (step=0054700) Train Loss: -2.9864, Train Steps/Sec: 1.03
592
+ [2026-02-03 21:52:17] (step=0054800) Train Loss: -2.9826, Train Steps/Sec: 1.02
593
+ [2026-02-03 21:53:55] (step=0054900) Train Loss: -2.9858, Train Steps/Sec: 1.02
594
+ [2026-02-03 21:55:33] (step=0055000) Train Loss: -2.9875, Train Steps/Sec: 1.02
595
+ [2026-02-03 21:56:16] Beginning epoch 11...
596
+ [2026-02-03 21:57:13] (step=0055100) Train Loss: -2.9926, Train Steps/Sec: 1.00
597
+ [2026-02-03 21:58:50] (step=0055200) Train Loss: -2.9919, Train Steps/Sec: 1.02
598
+ [2026-02-03 22:00:28] (step=0055300) Train Loss: -2.9910, Train Steps/Sec: 1.02
599
+ [2026-02-03 22:02:06] (step=0055400) Train Loss: -2.9851, Train Steps/Sec: 1.02
600
+ [2026-02-03 22:03:44] (step=0055500) Train Loss: -2.9899, Train Steps/Sec: 1.02
601
+ [2026-02-03 22:05:22] (step=0055600) Train Loss: -2.9869, Train Steps/Sec: 1.02
602
+ [2026-02-03 22:07:00] (step=0055700) Train Loss: -2.9873, Train Steps/Sec: 1.02
603
+ [2026-02-03 22:08:37] (step=0055800) Train Loss: -2.9887, Train Steps/Sec: 1.02
604
+ [2026-02-03 22:10:15] (step=0055900) Train Loss: -2.9909, Train Steps/Sec: 1.02
605
+ [2026-02-03 22:11:50] (step=0056000) Train Loss: -2.9884, Train Steps/Sec: 1.06
606
+ [2026-02-03 22:13:28] (step=0056100) Train Loss: -2.9902, Train Steps/Sec: 1.02
607
+ [2026-02-03 22:15:05] (step=0056200) Train Loss: -2.9904, Train Steps/Sec: 1.03
608
+ [2026-02-03 22:16:43] (step=0056300) Train Loss: -2.9891, Train Steps/Sec: 1.02
609
+ [2026-02-03 22:18:21] (step=0056400) Train Loss: -2.9876, Train Steps/Sec: 1.02
610
+ [2026-02-03 22:19:59] (step=0056500) Train Loss: -2.9903, Train Steps/Sec: 1.02
611
+ [2026-02-03 22:21:37] (step=0056600) Train Loss: -2.9890, Train Steps/Sec: 1.02
612
+ [2026-02-03 22:23:15] (step=0056700) Train Loss: -2.9888, Train Steps/Sec: 1.02
613
+ [2026-02-03 22:24:53] (step=0056800) Train Loss: -2.9846, Train Steps/Sec: 1.02
614
+ [2026-02-03 22:26:32] (step=0056900) Train Loss: -2.9891, Train Steps/Sec: 1.02
615
+ [2026-02-03 22:28:10] (step=0057000) Train Loss: -2.9846, Train Steps/Sec: 1.02
616
+ [2026-02-03 22:29:48] (step=0057100) Train Loss: -2.9884, Train Steps/Sec: 1.02
617
+ [2026-02-03 22:31:26] (step=0057200) Train Loss: -2.9890, Train Steps/Sec: 1.02
618
+ [2026-02-03 22:33:04] (step=0057300) Train Loss: -2.9867, Train Steps/Sec: 1.02
619
+ [2026-02-03 22:34:41] (step=0057400) Train Loss: -2.9913, Train Steps/Sec: 1.02
620
+ [2026-02-03 22:36:19] (step=0057500) Train Loss: -2.9885, Train Steps/Sec: 1.02
621
+ [2026-02-03 22:37:57] (step=0057600) Train Loss: -2.9872, Train Steps/Sec: 1.03
622
+ [2026-02-03 22:39:34] (step=0057700) Train Loss: -2.9902, Train Steps/Sec: 1.03
623
+ [2026-02-03 22:41:12] (step=0057800) Train Loss: -2.9949, Train Steps/Sec: 1.02
624
+ [2026-02-03 22:42:50] (step=0057900) Train Loss: -2.9919, Train Steps/Sec: 1.02
625
+ [2026-02-03 22:44:28] (step=0058000) Train Loss: -2.9903, Train Steps/Sec: 1.02
626
+ [2026-02-03 22:46:06] (step=0058100) Train Loss: -2.9908, Train Steps/Sec: 1.02
627
+ [2026-02-03 22:47:44] (step=0058200) Train Loss: -2.9899, Train Steps/Sec: 1.02
628
+ [2026-02-03 22:49:22] (step=0058300) Train Loss: -2.9900, Train Steps/Sec: 1.02
629
+ [2026-02-03 22:50:59] (step=0058400) Train Loss: -2.9865, Train Steps/Sec: 1.03
630
+ [2026-02-03 22:52:37] (step=0058500) Train Loss: -2.9851, Train Steps/Sec: 1.02
631
+ [2026-02-03 22:54:15] (step=0058600) Train Loss: -2.9861, Train Steps/Sec: 1.01
632
+ [2026-02-03 22:55:53] (step=0058700) Train Loss: -2.9868, Train Steps/Sec: 1.02
633
+ [2026-02-03 22:57:31] (step=0058800) Train Loss: -2.9918, Train Steps/Sec: 1.02
634
+ [2026-02-03 22:59:09] (step=0058900) Train Loss: -2.9891, Train Steps/Sec: 1.02
635
+ [2026-02-03 23:00:47] (step=0059000) Train Loss: -2.9864, Train Steps/Sec: 1.02
636
+ [2026-02-03 23:02:25] (step=0059100) Train Loss: -2.9920, Train Steps/Sec: 1.02
637
+ [2026-02-03 23:04:03] (step=0059200) Train Loss: -2.9869, Train Steps/Sec: 1.02
638
+ [2026-02-03 23:05:41] (step=0059300) Train Loss: -2.9895, Train Steps/Sec: 1.02
639
+ [2026-02-03 23:07:19] (step=0059400) Train Loss: -2.9911, Train Steps/Sec: 1.02
640
+ [2026-02-03 23:08:57] (step=0059500) Train Loss: -2.9857, Train Steps/Sec: 1.02
641
+ [2026-02-03 23:10:34] (step=0059600) Train Loss: -2.9925, Train Steps/Sec: 1.03
642
+ [2026-02-03 23:12:12] (step=0059700) Train Loss: -2.9885, Train Steps/Sec: 1.02
643
+ [2026-02-03 23:13:50] (step=0059800) Train Loss: -2.9883, Train Steps/Sec: 1.02
644
+ [2026-02-03 23:15:28] (step=0059900) Train Loss: -2.9914, Train Steps/Sec: 1.02
645
+ [2026-02-03 23:17:06] (step=0060000) Train Loss: -2.9892, Train Steps/Sec: 1.02
646
+ [2026-02-03 23:17:53] Beginning epoch 12...
647
+ [2026-02-03 23:18:45] (step=0060100) Train Loss: -2.9931, Train Steps/Sec: 1.00
648
+ [2026-02-03 23:20:23] (step=0060200) Train Loss: -2.9852, Train Steps/Sec: 1.02
649
+ [2026-02-03 23:22:01] (step=0060300) Train Loss: -2.9839, Train Steps/Sec: 1.02
650
+ [2026-02-03 23:23:39] (step=0060400) Train Loss: -2.9866, Train Steps/Sec: 1.02
651
+ [2026-02-03 23:25:17] (step=0060500) Train Loss: -2.9886, Train Steps/Sec: 1.02
652
+ [2026-02-03 23:26:55] (step=0060600) Train Loss: -2.9869, Train Steps/Sec: 1.03
653
+ [2026-02-03 23:28:33] (step=0060700) Train Loss: -2.9887, Train Steps/Sec: 1.02
654
+ [2026-02-03 23:30:07] (step=0060800) Train Loss: -2.9867, Train Steps/Sec: 1.06
655
+ [2026-02-03 23:31:45] (step=0060900) Train Loss: -2.9912, Train Steps/Sec: 1.02
656
+ [2026-02-03 23:33:23] (step=0061000) Train Loss: -2.9864, Train Steps/Sec: 1.02
657
+ [2026-02-03 23:35:01] (step=0061100) Train Loss: -2.9907, Train Steps/Sec: 1.02
658
+ [2026-02-03 23:36:39] (step=0061200) Train Loss: -2.9844, Train Steps/Sec: 1.02
659
+ [2026-02-03 23:38:17] (step=0061300) Train Loss: -2.9937, Train Steps/Sec: 1.02
660
+ [2026-02-03 23:39:55] (step=0061400) Train Loss: -2.9877, Train Steps/Sec: 1.02
661
+ [2026-02-03 23:41:33] (step=0061500) Train Loss: -2.9898, Train Steps/Sec: 1.02
662
+ [2026-02-03 23:43:10] (step=0061600) Train Loss: -2.9880, Train Steps/Sec: 1.02
663
+ [2026-02-03 23:44:48] (step=0061700) Train Loss: -2.9897, Train Steps/Sec: 1.02
664
+ [2026-02-03 23:46:25] (step=0061800) Train Loss: -2.9888, Train Steps/Sec: 1.03
665
+ [2026-02-03 23:48:03] (step=0061900) Train Loss: -2.9867, Train Steps/Sec: 1.03
666
+ [2026-02-03 23:49:41] (step=0062000) Train Loss: -2.9901, Train Steps/Sec: 1.02
667
+ [2026-02-03 23:51:19] (step=0062100) Train Loss: -2.9850, Train Steps/Sec: 1.02
668
+ [2026-02-03 23:52:56] (step=0062200) Train Loss: -2.9880, Train Steps/Sec: 1.02
669
+ [2026-02-03 23:54:34] (step=0062300) Train Loss: -2.9876, Train Steps/Sec: 1.02
670
+ [2026-02-03 23:56:12] (step=0062400) Train Loss: -2.9879, Train Steps/Sec: 1.02
671
+ [2026-02-03 23:57:50] (step=0062500) Train Loss: -2.9891, Train Steps/Sec: 1.02
672
+ [2026-02-03 23:59:28] (step=0062600) Train Loss: -2.9854, Train Steps/Sec: 1.02
673
+ [2026-02-04 00:01:06] (step=0062700) Train Loss: -2.9918, Train Steps/Sec: 1.02
674
+ [2026-02-04 00:02:44] (step=0062800) Train Loss: -2.9861, Train Steps/Sec: 1.02
675
+ [2026-02-04 00:04:21] (step=0062900) Train Loss: -2.9891, Train Steps/Sec: 1.03
676
+ [2026-02-04 00:05:58] (step=0063000) Train Loss: -2.9885, Train Steps/Sec: 1.03
677
+ [2026-02-04 00:07:36] (step=0063100) Train Loss: -2.9878, Train Steps/Sec: 1.02
678
+ [2026-02-04 00:09:14] (step=0063200) Train Loss: -2.9869, Train Steps/Sec: 1.02
679
+ [2026-02-04 00:10:52] (step=0063300) Train Loss: -2.9942, Train Steps/Sec: 1.02
680
+ [2026-02-04 00:12:30] (step=0063400) Train Loss: -2.9877, Train Steps/Sec: 1.02
681
+ [2026-02-04 00:14:07] (step=0063500) Train Loss: -2.9898, Train Steps/Sec: 1.03
682
+ [2026-02-04 00:15:46] (step=0063600) Train Loss: -2.9887, Train Steps/Sec: 1.02
683
+ [2026-02-04 00:17:24] (step=0063700) Train Loss: -2.9892, Train Steps/Sec: 1.02
684
+ [2026-02-04 00:19:02] (step=0063800) Train Loss: -2.9867, Train Steps/Sec: 1.02
685
+ [2026-02-04 00:20:39] (step=0063900) Train Loss: -2.9886, Train Steps/Sec: 1.02
686
+ [2026-02-04 00:22:18] (step=0064000) Train Loss: -2.9889, Train Steps/Sec: 1.02
687
+ [2026-02-04 00:23:55] (step=0064100) Train Loss: -2.9869, Train Steps/Sec: 1.02
688
+ [2026-02-04 00:25:33] (step=0064200) Train Loss: -2.9838, Train Steps/Sec: 1.02
689
+ [2026-02-04 00:27:11] (step=0064300) Train Loss: -2.9857, Train Steps/Sec: 1.02
690
+ [2026-02-04 00:28:49] (step=0064400) Train Loss: -2.9905, Train Steps/Sec: 1.03
691
+ [2026-02-04 00:30:26] (step=0064500) Train Loss: -2.9910, Train Steps/Sec: 1.02
692
+ [2026-02-04 00:32:05] (step=0064600) Train Loss: -2.9897, Train Steps/Sec: 1.02
693
+ [2026-02-04 00:33:42] (step=0064700) Train Loss: -2.9887, Train Steps/Sec: 1.02
694
+ [2026-02-04 00:35:20] (step=0064800) Train Loss: -2.9896, Train Steps/Sec: 1.02
695
+ [2026-02-04 00:36:58] (step=0064900) Train Loss: -2.9893, Train Steps/Sec: 1.02
696
+ [2026-02-04 00:38:36] (step=0065000) Train Loss: -2.9868, Train Steps/Sec: 1.02
697
+ [2026-02-04 00:39:28] Beginning epoch 13...
698
+ [2026-02-04 00:40:16] (step=0065100) Train Loss: -2.9899, Train Steps/Sec: 1.00
699
+ [2026-02-04 00:41:54] (step=0065200) Train Loss: -2.9946, Train Steps/Sec: 1.02
700
+ [2026-02-04 00:43:32] (step=0065300) Train Loss: -2.9928, Train Steps/Sec: 1.02
701
+ [2026-02-04 00:45:10] (step=0065400) Train Loss: -2.9897, Train Steps/Sec: 1.02
702
+ [2026-02-04 00:46:46] (step=0065500) Train Loss: -2.9877, Train Steps/Sec: 1.05
703
+ [2026-02-04 00:48:22] (step=0065600) Train Loss: -2.9892, Train Steps/Sec: 1.03
704
+ [2026-02-04 00:50:00] (step=0065700) Train Loss: -2.9847, Train Steps/Sec: 1.02
705
+ [2026-02-04 00:51:38] (step=0065800) Train Loss: -2.9859, Train Steps/Sec: 1.02
706
+ [2026-02-04 00:53:16] (step=0065900) Train Loss: -2.9838, Train Steps/Sec: 1.03
707
+ [2026-02-04 00:54:54] (step=0066000) Train Loss: -2.9848, Train Steps/Sec: 1.02
708
+ [2026-02-04 00:56:31] (step=0066100) Train Loss: -2.9864, Train Steps/Sec: 1.02
709
+ [2026-02-04 00:58:08] (step=0066200) Train Loss: -2.9903, Train Steps/Sec: 1.03
710
+ [2026-02-04 00:59:46] (step=0066300) Train Loss: -2.9889, Train Steps/Sec: 1.02
711
+ [2026-02-04 01:01:24] (step=0066400) Train Loss: -2.9881, Train Steps/Sec: 1.02
712
+ [2026-02-04 01:03:02] (step=0066500) Train Loss: -2.9850, Train Steps/Sec: 1.03
713
+ [2026-02-04 01:04:40] (step=0066600) Train Loss: -2.9870, Train Steps/Sec: 1.02
714
+ [2026-02-04 01:06:18] (step=0066700) Train Loss: -2.9866, Train Steps/Sec: 1.02
715
+ [2026-02-04 01:07:56] (step=0066800) Train Loss: -2.9895, Train Steps/Sec: 1.02
716
+ [2026-02-04 01:09:34] (step=0066900) Train Loss: -2.9862, Train Steps/Sec: 1.02
717
+ [2026-02-04 01:11:11] (step=0067000) Train Loss: -2.9913, Train Steps/Sec: 1.03
718
+ [2026-02-04 01:12:48] (step=0067100) Train Loss: -2.9877, Train Steps/Sec: 1.03
719
+ [2026-02-04 01:14:26] (step=0067200) Train Loss: -2.9923, Train Steps/Sec: 1.03
720
+ [2026-02-04 01:16:04] (step=0067300) Train Loss: -2.9886, Train Steps/Sec: 1.02
721
+ [2026-02-04 01:17:42] (step=0067400) Train Loss: -2.9904, Train Steps/Sec: 1.02
722
+ [2026-02-04 01:19:20] (step=0067500) Train Loss: -2.9905, Train Steps/Sec: 1.02
723
+ [2026-02-04 01:20:58] (step=0067600) Train Loss: -2.9891, Train Steps/Sec: 1.02
724
+ [2026-02-04 01:22:36] (step=0067700) Train Loss: -2.9877, Train Steps/Sec: 1.02
725
+ [2026-02-04 01:24:14] (step=0067800) Train Loss: -2.9874, Train Steps/Sec: 1.02
726
+ [2026-02-04 01:25:52] (step=0067900) Train Loss: -2.9875, Train Steps/Sec: 1.03
727
+ [2026-02-04 01:27:29] (step=0068000) Train Loss: -2.9834, Train Steps/Sec: 1.02
728
+ [2026-02-04 01:29:07] (step=0068100) Train Loss: -2.9885, Train Steps/Sec: 1.02
729
+ [2026-02-04 01:30:45] (step=0068200) Train Loss: -2.9882, Train Steps/Sec: 1.02
730
+ [2026-02-04 01:32:22] (step=0068300) Train Loss: -2.9922, Train Steps/Sec: 1.03
731
+ [2026-02-04 01:34:01] (step=0068400) Train Loss: -2.9823, Train Steps/Sec: 1.02
732
+ [2026-02-04 01:35:38] (step=0068500) Train Loss: -2.9876, Train Steps/Sec: 1.02
733
+ [2026-02-04 01:37:15] (step=0068600) Train Loss: -2.9938, Train Steps/Sec: 1.03
734
+ [2026-02-04 01:38:53] (step=0068700) Train Loss: -2.9876, Train Steps/Sec: 1.02
735
+ [2026-02-04 01:40:31] (step=0068800) Train Loss: -2.9893, Train Steps/Sec: 1.02
736
+ [2026-02-04 01:42:09] (step=0068900) Train Loss: -2.9892, Train Steps/Sec: 1.02
737
+ [2026-02-04 01:43:47] (step=0069000) Train Loss: -2.9861, Train Steps/Sec: 1.02
738
+ [2026-02-04 01:45:25] (step=0069100) Train Loss: -2.9871, Train Steps/Sec: 1.02
739
+ [2026-02-04 01:47:02] (step=0069200) Train Loss: -2.9910, Train Steps/Sec: 1.03
740
+ [2026-02-04 01:48:40] (step=0069300) Train Loss: -2.9894, Train Steps/Sec: 1.03
741
+ [2026-02-04 01:50:17] (step=0069400) Train Loss: -2.9837, Train Steps/Sec: 1.02
742
+ [2026-02-04 01:51:55] (step=0069500) Train Loss: -2.9899, Train Steps/Sec: 1.02
743
+ [2026-02-04 01:53:33] (step=0069600) Train Loss: -2.9889, Train Steps/Sec: 1.02
744
+ [2026-02-04 01:55:11] (step=0069700) Train Loss: -2.9852, Train Steps/Sec: 1.03
745
+ [2026-02-04 01:56:49] (step=0069800) Train Loss: -2.9926, Train Steps/Sec: 1.02
746
+ [2026-02-04 01:58:27] (step=0069900) Train Loss: -2.9876, Train Steps/Sec: 1.02
747
+ [2026-02-04 02:00:05] (step=0070000) Train Loss: -2.9908, Train Steps/Sec: 1.02
748
+ [2026-02-04 02:01:01] Beginning epoch 14...
749
+ [2026-02-04 02:01:45] (step=0070100) Train Loss: -2.9858, Train Steps/Sec: 1.00
750
+ [2026-02-04 02:03:23] (step=0070200) Train Loss: -2.9869, Train Steps/Sec: 1.02
751
+ [2026-02-04 02:04:57] (step=0070300) Train Loss: -2.9891, Train Steps/Sec: 1.06
752
+ [2026-02-04 02:06:35] (step=0070400) Train Loss: -2.9861, Train Steps/Sec: 1.02
753
+ [2026-02-04 02:08:13] (step=0070500) Train Loss: -2.9873, Train Steps/Sec: 1.02
754
+ [2026-02-04 02:09:51] (step=0070600) Train Loss: -2.9907, Train Steps/Sec: 1.02
755
+ [2026-02-04 02:11:29] (step=0070700) Train Loss: -2.9853, Train Steps/Sec: 1.02
756
+ [2026-02-04 02:13:06] (step=0070800) Train Loss: -2.9915, Train Steps/Sec: 1.02
757
+ [2026-02-04 02:14:44] (step=0070900) Train Loss: -2.9902, Train Steps/Sec: 1.02
758
+ [2026-02-04 02:16:22] (step=0071000) Train Loss: -2.9910, Train Steps/Sec: 1.02
759
+ [2026-02-04 02:18:00] (step=0071100) Train Loss: -2.9909, Train Steps/Sec: 1.02
760
+ [2026-02-04 02:19:37] (step=0071200) Train Loss: -2.9857, Train Steps/Sec: 1.03
761
+ [2026-02-04 02:21:15] (step=0071300) Train Loss: -2.9868, Train Steps/Sec: 1.02
762
+ [2026-02-04 02:22:52] (step=0071400) Train Loss: -2.9858, Train Steps/Sec: 1.03
763
+ [2026-02-04 02:24:30] (step=0071500) Train Loss: -2.9876, Train Steps/Sec: 1.02
764
+ [2026-02-04 02:26:08] (step=0071600) Train Loss: -2.9936, Train Steps/Sec: 1.02
765
+ [2026-02-04 02:27:46] (step=0071700) Train Loss: -2.9813, Train Steps/Sec: 1.02
766
+ [2026-02-04 02:29:24] (step=0071800) Train Loss: -2.9841, Train Steps/Sec: 1.02
767
+ [2026-02-04 02:31:01] (step=0071900) Train Loss: -2.9900, Train Steps/Sec: 1.03
768
+ [2026-02-04 02:32:39] (step=0072000) Train Loss: -2.9901, Train Steps/Sec: 1.03
769
+ [2026-02-04 02:34:16] (step=0072100) Train Loss: -2.9899, Train Steps/Sec: 1.02
770
+ [2026-02-04 02:35:54] (step=0072200) Train Loss: -2.9852, Train Steps/Sec: 1.03
771
+ [2026-02-04 02:37:32] (step=0072300) Train Loss: -2.9874, Train Steps/Sec: 1.02
772
+ [2026-02-04 02:39:10] (step=0072400) Train Loss: -2.9919, Train Steps/Sec: 1.02
773
+ [2026-02-04 02:40:48] (step=0072500) Train Loss: -2.9843, Train Steps/Sec: 1.02
774
+ [2026-02-04 02:42:26] (step=0072600) Train Loss: -2.9850, Train Steps/Sec: 1.02
775
+ [2026-02-04 02:44:04] (step=0072700) Train Loss: -2.9867, Train Steps/Sec: 1.02
776
+ [2026-02-04 02:45:42] (step=0072800) Train Loss: -2.9904, Train Steps/Sec: 1.02
777
+ [2026-02-04 02:47:19] (step=0072900) Train Loss: -2.9868, Train Steps/Sec: 1.02
778
+ [2026-02-04 02:48:57] (step=0073000) Train Loss: -2.9901, Train Steps/Sec: 1.03
779
+ [2026-02-04 02:50:35] (step=0073100) Train Loss: -2.9859, Train Steps/Sec: 1.02
780
+ [2026-02-04 02:52:13] (step=0073200) Train Loss: -2.9864, Train Steps/Sec: 1.02
781
+ [2026-02-04 02:53:50] (step=0073300) Train Loss: -2.9875, Train Steps/Sec: 1.03
782
+ [2026-02-04 02:55:28] (step=0073400) Train Loss: -2.9896, Train Steps/Sec: 1.02
783
+ [2026-02-04 02:57:05] (step=0073500) Train Loss: -2.9940, Train Steps/Sec: 1.03
784
+ [2026-02-04 02:58:43] (step=0073600) Train Loss: -2.9874, Train Steps/Sec: 1.02
785
+ [2026-02-04 03:00:21] (step=0073700) Train Loss: -2.9883, Train Steps/Sec: 1.02
786
+ [2026-02-04 03:01:58] (step=0073800) Train Loss: -2.9895, Train Steps/Sec: 1.03
787
+ [2026-02-04 03:03:36] (step=0073900) Train Loss: -2.9879, Train Steps/Sec: 1.02
788
+ [2026-02-04 03:05:14] (step=0074000) Train Loss: -2.9884, Train Steps/Sec: 1.02
789
+ [2026-02-04 03:06:52] (step=0074100) Train Loss: -2.9830, Train Steps/Sec: 1.02
790
+ [2026-02-04 03:08:30] (step=0074200) Train Loss: -2.9861, Train Steps/Sec: 1.02
791
+ [2026-02-04 03:10:08] (step=0074300) Train Loss: -2.9873, Train Steps/Sec: 1.02
792
+ [2026-02-04 03:11:45] (step=0074400) Train Loss: -2.9860, Train Steps/Sec: 1.03
793
+ [2026-02-04 03:13:22] (step=0074500) Train Loss: -2.9887, Train Steps/Sec: 1.03
794
+ [2026-02-04 03:15:00] (step=0074600) Train Loss: -2.9857, Train Steps/Sec: 1.03
795
+ [2026-02-04 03:16:38] (step=0074700) Train Loss: -2.9891, Train Steps/Sec: 1.02
796
+ [2026-02-04 03:18:15] (step=0074800) Train Loss: -2.9874, Train Steps/Sec: 1.02
797
+ [2026-02-04 03:19:53] (step=0074900) Train Loss: -2.9849, Train Steps/Sec: 1.02
798
+ [2026-02-04 03:21:32] (step=0075000) Train Loss: -2.9902, Train Steps/Sec: 1.02
799
+ 75000
800
+ 75000
801
+ 75000
802
+ 75000
803
+ [2026-02-04 03:21:33] Saved checkpoint to results_256_gvp_disp/depth-mu-2-004-SiT-XL-2-GVP-velocity-None/checkpoints/0075000.pt
804
+ [2026-02-04 03:22:32] Beginning epoch 15...
805
+ [2026-02-04 03:23:10] (step=0075100) Train Loss: -2.9908, Train Steps/Sec: 1.02
806
+ [2026-02-04 03:24:48] (step=0075200) Train Loss: -2.9917, Train Steps/Sec: 1.02
807
+ [2026-02-04 03:26:26] (step=0075300) Train Loss: -2.9913, Train Steps/Sec: 1.02
808
+ [2026-02-04 03:28:04] (step=0075400) Train Loss: -2.9900, Train Steps/Sec: 1.02
809
+ [2026-02-04 03:29:42] (step=0075500) Train Loss: -2.9866, Train Steps/Sec: 1.02
810
+ [2026-02-04 03:30:56] Generating EMA samples...
811
+ [2026-02-04 03:31:20] (step=0075600) Train Loss: -2.9850, Train Steps/Sec: 1.02
812
+ [2026-02-04 03:32:57] (step=0075700) Train Loss: -2.9845, Train Steps/Sec: 1.03
813
+ [2026-02-04 03:34:36] (step=0075800) Train Loss: -2.9907, Train Steps/Sec: 1.02
814
+ [2026-02-04 03:36:14] (step=0075900) Train Loss: -2.9899, Train Steps/Sec: 1.02
815
+ [2026-02-04 03:37:52] (step=0076000) Train Loss: -2.9894, Train Steps/Sec: 1.02
816
+ [2026-02-04 03:39:30] (step=0076100) Train Loss: -2.9877, Train Steps/Sec: 1.02
817
+ [2026-02-04 03:41:08] (step=0076200) Train Loss: -2.9870, Train Steps/Sec: 1.02
818
+ [2026-02-04 03:42:45] (step=0076300) Train Loss: -2.9843, Train Steps/Sec: 1.03
819
+ [2026-02-04 03:44:23] (step=0076400) Train Loss: -2.9898, Train Steps/Sec: 1.02
820
+ [2026-02-04 03:46:01] (step=0076500) Train Loss: -2.9868, Train Steps/Sec: 1.02
821
+ [2026-02-04 03:47:39] (step=0076600) Train Loss: -2.9848, Train Steps/Sec: 1.02
822
+ [2026-02-04 03:49:16] (step=0076700) Train Loss: -2.9864, Train Steps/Sec: 1.03
823
+ [2026-02-04 03:50:54] (step=0076800) Train Loss: -2.9876, Train Steps/Sec: 1.03
824
+ [2026-02-04 03:52:32] (step=0076900) Train Loss: -2.9862, Train Steps/Sec: 1.02
825
+ [2026-02-04 03:54:10] (step=0077000) Train Loss: -2.9906, Train Steps/Sec: 1.02
826
+ [2026-02-04 03:55:48] (step=0077100) Train Loss: -2.9880, Train Steps/Sec: 1.02
827
+ [2026-02-04 03:57:26] (step=0077200) Train Loss: -2.9890, Train Steps/Sec: 1.02
828
+ [2026-02-04 03:59:04] (step=0077300) Train Loss: -2.9891, Train Steps/Sec: 1.02
829
+ [2026-02-04 04:00:42] (step=0077400) Train Loss: -2.9886, Train Steps/Sec: 1.02
830
+ [2026-02-04 04:02:20] (step=0077500) Train Loss: -2.9870, Train Steps/Sec: 1.02
831
+ [2026-02-04 04:03:58] (step=0077600) Train Loss: -2.9864, Train Steps/Sec: 1.02
832
+ [2026-02-04 04:05:35] (step=0077700) Train Loss: -2.9854, Train Steps/Sec: 1.02
833
+ [2026-02-04 04:07:13] (step=0077800) Train Loss: -2.9904, Train Steps/Sec: 1.02
834
+ [2026-02-04 04:08:51] (step=0077900) Train Loss: -2.9850, Train Steps/Sec: 1.02
835
+ [2026-02-04 04:10:29] (step=0078000) Train Loss: -2.9941, Train Steps/Sec: 1.02
836
+ [2026-02-04 04:12:07] (step=0078100) Train Loss: -2.9890, Train Steps/Sec: 1.02
837
+ [2026-02-04 04:13:45] (step=0078200) Train Loss: -2.9867, Train Steps/Sec: 1.02
838
+ [2026-02-04 04:15:22] (step=0078300) Train Loss: -2.9915, Train Steps/Sec: 1.03
839
+ [2026-02-04 04:17:00] (step=0078400) Train Loss: -2.9876, Train Steps/Sec: 1.03
840
+ [2026-02-04 04:18:37] (step=0078500) Train Loss: -2.9893, Train Steps/Sec: 1.03
841
+ [2026-02-04 04:20:15] (step=0078600) Train Loss: -2.9887, Train Steps/Sec: 1.02
842
+ [2026-02-04 04:21:53] (step=0078700) Train Loss: -2.9854, Train Steps/Sec: 1.02
843
+ [2026-02-04 04:23:31] (step=0078800) Train Loss: -2.9884, Train Steps/Sec: 1.03
844
+ [2026-02-04 04:25:08] (step=0078900) Train Loss: -2.9884, Train Steps/Sec: 1.03
845
+ [2026-02-04 04:26:46] (step=0079000) Train Loss: -2.9889, Train Steps/Sec: 1.02
846
+ [2026-02-04 04:28:24] (step=0079100) Train Loss: -2.9918, Train Steps/Sec: 1.02
847
+ [2026-02-04 04:30:01] (step=0079200) Train Loss: -2.9873, Train Steps/Sec: 1.03
848
+ [2026-02-04 04:31:39] (step=0079300) Train Loss: -2.9867, Train Steps/Sec: 1.02
849
+ [2026-02-04 04:33:17] (step=0079400) Train Loss: -2.9800, Train Steps/Sec: 1.02
850
+ [2026-02-04 04:34:55] (step=0079500) Train Loss: -2.9873, Train Steps/Sec: 1.03
851
+ [2026-02-04 04:36:32] (step=0079600) Train Loss: -2.9847, Train Steps/Sec: 1.02
852
+ [2026-02-04 04:38:11] (step=0079700) Train Loss: -2.9876, Train Steps/Sec: 1.02
853
+ [2026-02-04 04:39:48] (step=0079800) Train Loss: -2.9865, Train Steps/Sec: 1.02
854
+ [2026-02-04 04:41:23] (step=0079900) Train Loss: -2.9922, Train Steps/Sec: 1.06
855
+ [2026-02-04 04:43:00] (step=0080000) Train Loss: -2.9857, Train Steps/Sec: 1.03
856
+ [2026-02-04 04:44:04] Beginning epoch 16...
857
+ [2026-02-04 04:44:40] (step=0080100) Train Loss: -2.9882, Train Steps/Sec: 1.00
858
+ [2026-02-04 04:46:18] (step=0080200) Train Loss: -2.9875, Train Steps/Sec: 1.02
859
+ [2026-02-04 04:47:56] (step=0080300) Train Loss: -2.9889, Train Steps/Sec: 1.02
860
+ [2026-02-04 04:49:34] (step=0080400) Train Loss: -2.9889, Train Steps/Sec: 1.02
861
+ [2026-02-04 04:51:11] (step=0080500) Train Loss: -2.9847, Train Steps/Sec: 1.03
862
+ [2026-02-04 04:52:49] (step=0080600) Train Loss: -2.9891, Train Steps/Sec: 1.02
863
+ [2026-02-04 04:54:27] (step=0080700) Train Loss: -2.9888, Train Steps/Sec: 1.03
864
+ [2026-02-04 04:56:04] (step=0080800) Train Loss: -2.9902, Train Steps/Sec: 1.03
865
+ [2026-02-04 04:57:42] (step=0080900) Train Loss: -2.9849, Train Steps/Sec: 1.02
866
+ [2026-02-04 04:59:20] (step=0081000) Train Loss: -2.9865, Train Steps/Sec: 1.03
867
+ [2026-02-04 05:00:58] (step=0081100) Train Loss: -2.9868, Train Steps/Sec: 1.02
868
+ [2026-02-04 05:02:36] (step=0081200) Train Loss: -2.9889, Train Steps/Sec: 1.02
869
+ [2026-02-04 05:04:14] (step=0081300) Train Loss: -2.9845, Train Steps/Sec: 1.02
870
+ [2026-02-04 05:05:52] (step=0081400) Train Loss: -2.9906, Train Steps/Sec: 1.02
871
+ [2026-02-04 05:07:29] (step=0081500) Train Loss: -2.9916, Train Steps/Sec: 1.02
872
+ [2026-02-04 05:09:08] (step=0081600) Train Loss: -2.9953, Train Steps/Sec: 1.02
873
+ [2026-02-04 05:10:46] (step=0081700) Train Loss: -2.9884, Train Steps/Sec: 1.02
874
+ [2026-02-04 05:12:24] (step=0081800) Train Loss: -2.9865, Train Steps/Sec: 1.02
875
+ [2026-02-04 05:14:01] (step=0081900) Train Loss: -2.9889, Train Steps/Sec: 1.03
876
+ [2026-02-04 05:15:39] (step=0082000) Train Loss: -2.9850, Train Steps/Sec: 1.02
877
+ [2026-02-04 05:17:17] (step=0082100) Train Loss: -2.9880, Train Steps/Sec: 1.02
878
+ [2026-02-04 05:18:55] (step=0082200) Train Loss: -2.9869, Train Steps/Sec: 1.02
879
+ [2026-02-04 05:20:33] (step=0082300) Train Loss: -2.9869, Train Steps/Sec: 1.02
880
+ [2026-02-04 05:22:10] (step=0082400) Train Loss: -2.9872, Train Steps/Sec: 1.02
881
+ [2026-02-04 05:23:49] (step=0082500) Train Loss: -2.9838, Train Steps/Sec: 1.02
882
+ [2026-02-04 05:25:27] (step=0082600) Train Loss: -2.9881, Train Steps/Sec: 1.02
883
+ [2026-02-04 05:27:04] (step=0082700) Train Loss: -2.9890, Train Steps/Sec: 1.03
884
+ [2026-02-04 05:28:42] (step=0082800) Train Loss: -2.9881, Train Steps/Sec: 1.02
885
+ [2026-02-04 05:30:19] (step=0082900) Train Loss: -2.9903, Train Steps/Sec: 1.03
886
+ [2026-02-04 05:31:58] (step=0083000) Train Loss: -2.9946, Train Steps/Sec: 1.02
887
+ [2026-02-04 05:33:36] (step=0083100) Train Loss: -2.9879, Train Steps/Sec: 1.02
888
+ [2026-02-04 05:35:14] (step=0083200) Train Loss: -2.9879, Train Steps/Sec: 1.02
889
+ [2026-02-04 05:36:52] (step=0083300) Train Loss: -2.9939, Train Steps/Sec: 1.02
890
+ [2026-02-04 05:38:30] (step=0083400) Train Loss: -2.9914, Train Steps/Sec: 1.02
891
+ [2026-02-04 05:40:07] (step=0083500) Train Loss: -2.9888, Train Steps/Sec: 1.03
892
+ W0204 05:40:25.828000 72184 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 72203 closing signal SIGTERM
893
+ W0204 05:40:25.830000 72184 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 72204 closing signal SIGTERM
894
+ W0204 05:40:25.831000 72184 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 72205 closing signal SIGTERM
895
+ E0204 05:40:25.834000 72184 site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: -9) local_rank: 0 (pid: 72202) of binary: /opt/conda/envs/SiT/bin/python
896
+ Traceback (most recent call last):
897
+ File "/opt/conda/envs/SiT/bin/torchrun", line 33, in <module>
898
+ sys.exit(load_entry_point('torch==2.5.1', 'console_scripts', 'torchrun')())
899
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
900
+ File "/opt/conda/envs/SiT/lib/python3.12/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper
901
+ return f(*args, **kwargs)
902
+ ^^^^^^^^^^^^^^^^^^
903
+ File "/opt/conda/envs/SiT/lib/python3.12/site-packages/torch/distributed/run.py", line 919, in main
904
+ run(args)
905
+ File "/opt/conda/envs/SiT/lib/python3.12/site-packages/torch/distributed/run.py", line 910, in run
906
+ elastic_launch(
907
+ File "/opt/conda/envs/SiT/lib/python3.12/site-packages/torch/distributed/launcher/api.py", line 138, in __call__
908
+ return launch_agent(self._config, self._entrypoint, list(args))
909
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
910
+ File "/opt/conda/envs/SiT/lib/python3.12/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent
911
+ raise ChildFailedError(
912
+ torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
913
+ ==========================================================
914
+ train_rectified_noise.py FAILED
915
+ ----------------------------------------------------------
916
+ Failures:
917
+ <NO_OTHER_FAILURES>
918
+ ----------------------------------------------------------
919
+ Root Cause (first observed failure):
920
+ [0]:
921
+ time : 2026-02-04_05:40:25
922
+ host : cabbd6562a3025dd000330e2d302e8fd-taskrole1-0
923
+ rank : 0 (local_rank: 0)
924
+ exitcode : -9 (pid: 72202)
925
+ error_file: <N/A>
926
+ traceback : Signal 9 (SIGKILL) received by PID 72202
927
+ ==========================================================
Rectified_Noise/VP-Disp/README.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # [AAAI 2026] Rectified Noise: A Generative Model Using Positive-incentive Noise
2
+
3
+ ![Visualization of the $\pi$-noise by $\Delta$RN.](assests/visual.png)
4
+
5
+ <br>
6
+ <a href="https://arxiv.org/pdf/2511.07911"><img src="https://img.shields.io/static/v1?label=Paper&message=2511.07911&color=red&logo=arxiv"></a>
7
+ <a href="https://huggingface.co/xiangzai/recitified_noise"><img src="https://img.shields.io/badge/🤗_HuggingFace-Model-ffbd45.svg" alt="HuggingFace"></a>
8
+
9
+ ## Introduction
10
+ This is a [Pytorch](https://pytorch.org) implementation of **Rectified Noise**, a generative model using positive-incentive noise to enhance model's sampling.
11
+
12
+ ![Overview of Laytrol](assests/pipeline.png)
13
+
14
+ ## Setup
15
+
16
+ We provide an `environment.yml` file that can be used to create a Conda environment.
17
+
18
+ ```bash
19
+ conda env create -f environment.yml
20
+ conda activate RN
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ ### Training
26
+ 1. We provide a training script for RN in `train_rectified_noise.py`
27
+
28
+ Run:
29
+
30
+ ```bash
31
+ torchrun --nnodes=1 --nproc_per_node=4 train_rectified_noise.py \
32
+ --data-path /path/to/data \
33
+ --num-classes 3 \
34
+ --path-type Linear \
35
+ --prediction velocity \
36
+ --ckpt /path/to/pretrained_model \
37
+ --model SiT-B/2
38
+ --learn-mu True \
39
+ --depth 1 \
40
+ ```
41
+
42
+ You can find relevant checkpoint files from the previous Hugging Face link.
43
+
44
+ 2. Parameters:
45
+
46
+ | Argument | Type | Default | Description |
47
+ |----------|------|---------|-------------|
48
+ | `--data-path ` | str | `-` | Path to the dataset. |
49
+ | `--num-classes` | int | `-` | Number of classes. |
50
+ | `--path-type` | str | `Linear` | Directory to save the generated images. |
51
+ | `--prediction` | str | `velocity` | Output type of network. |
52
+ | `--ckpt` | str | `-` | Path to pretrained model checkpoint. |
53
+ | `--model` | str | `SiT-B/2` | Model type, any option from the model list. |
54
+ | `--learn-mu` | bool | `True` | Whether to learn the mu parameter. |
55
+ | `--depth` | int | `1` | Depth parameter for the SiTF2 model(Extra SiT Block). |
56
+
57
+ **Sampling**
58
+
59
+ 1. Using the trained RN model to enhance the pre-trained model
60
+
61
+ ```bash
62
+ torchrun --nnodes=1 --nproc_per_node=4 train_rectified_noise.py \
63
+ --path-type Linear \
64
+ --prediction velocity \
65
+ --ckpt /path/to/pretrained_model \
66
+ --sitf2-ckpt /path/to/pretrained_RN \
67
+ --model SiT-B/2
68
+ --learn-mu True \
69
+ --depth 1 \
70
+ ```
71
+
72
+ ## Ackownledgement
73
+ This repo benefits from [SiT](https://github.com/willisma/SiT). Thanks for their excellent works.
74
+
75
+ ## Contact
76
+ If you have any question about this project, please contact mguzhenyu@outlook.com.
77
+
78
+ ## Citation
79
+
80
+ If you find the code useful for your research, please consider citing our work:
81
+
82
+ ```
83
+ @misc{gu2025rectifiednoisegenerativemodel,
84
+ title={Rectified Noise: A Generative Model Using Positive-incentive Noise},
85
+ author={Zhenyu Gu and Yanchen Xu and Sida Huang and Yubin Guo and Hongyuan Zhang},
86
+ year={2025},
87
+ eprint={2511.07911},
88
+ archivePrefix={arXiv},
89
+ primaryClass={cs.LG},
90
+ url={https://arxiv.org/abs/2511.07911},
91
+ }
92
+ ```
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000059.png ADDED
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000169.png ADDED
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000286.png ADDED
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000545.png ADDED
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000606.png ADDED
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/000769.png ADDED
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001050.png ADDED
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001099.png ADDED
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001346.png ADDED
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001475.png ADDED
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001518.png ADDED
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001644.png ADDED
Rectified_Noise/VP-Disp/VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04/001741.png ADDED
Rectified_Noise/VP-Disp/W_False.log ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
0
  0%| | 0/47 [00:00<?, ?it/s]
1
  2%|▏ | 1/47 [00:44<34:16, 44.71s/it]
2
  4%|▍ | 2/47 [01:28<33:13, 44.29s/it]
3
  6%|▋ | 3/47 [02:12<32:23, 44.17s/it]
4
  9%|▊ | 4/47 [02:56<31:36, 44.10s/it]
5
  11%|█ | 5/47 [03:40<30:49, 44.05s/it]
6
  13%|█▎ | 6/47 [04:24<30:04, 44.01s/it]
7
  15%|█▍ | 7/47 [05:08<29:20, 44.02s/it]
8
  17%|█▋ | 8/47 [05:52<28:36, 44.00s/it]
9
  19%|█▉ | 9/47 [06:36<27:53, 44.03s/it]
10
  21%|██▏ | 10/47 [07:56<34:01, 55.17s/it]
11
  23%|██▎ | 11/47 [09:30<40:09, 66.94s/it]
12
  26%|██▌ | 12/47 [11:04<43:46, 75.05s/it]
13
  28%|██▊ | 13/47 [12:37<45:43, 80.70s/it]
14
  30%|██▉ | 14/47 [14:11<46:33, 84.64s/it]
15
  32%|███▏ | 15/47 [15:45<46:36, 87.39s/it]
16
  34%|███▍ | 16/47 [17:18<46:06, 89.25s/it]
17
  36%|███▌ | 17/47 [18:52<45:16, 90.55s/it]
18
  38%|███▊ | 18/47 [20:26<44:13, 91.50s/it]
19
  40%|████ | 19/47 [21:59<43:00, 92.17s/it]
20
  43%|████▎ | 20/47 [23:33<41:41, 92.64s/it]
21
  45%|████▍ | 21/47 [25:06<40:14, 92.87s/it]
22
  47%|████▋ | 22/47 [26:40<38:47, 93.10s/it]
23
  49%|████▉ | 23/47 [28:14<37:17, 93.21s/it]
24
  51%|█████ | 24/47 [29:47<35:47, 93.37s/it]
25
  53%|█████▎ | 25/47 [31:21<34:16, 93.48s/it]
26
  55%|█████▌ | 26/47 [32:55<32:44, 93.55s/it]
27
  57%|█████▋ | 27/47 [34:28<31:11, 93.56s/it]
28
  60%|█████▉ | 28/47 [36:02<29:37, 93.56s/it]
29
  62%|██████▏ | 29/47 [37:36<28:04, 93.60s/it]
 
1
+ [NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
2
+ Starting rank=0, seed=0, world_size=1.
3
+ Saving .png samples at VP_samples/depth-mu-2-threshold-1.0-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04
4
+ Total number of images that will be sampled: 3008
5
+
6
  0%| | 0/47 [00:00<?, ?it/s]
7
  2%|▏ | 1/47 [00:44<34:16, 44.71s/it]
8
  4%|▍ | 2/47 [01:28<33:13, 44.29s/it]
9
  6%|▋ | 3/47 [02:12<32:23, 44.17s/it]
10
  9%|▊ | 4/47 [02:56<31:36, 44.10s/it]
11
  11%|█ | 5/47 [03:40<30:49, 44.05s/it]
12
  13%|█▎ | 6/47 [04:24<30:04, 44.01s/it]
13
  15%|█▍ | 7/47 [05:08<29:20, 44.02s/it]
14
  17%|█▋ | 8/47 [05:52<28:36, 44.00s/it]
15
  19%|█▉ | 9/47 [06:36<27:53, 44.03s/it]
16
  21%|██▏ | 10/47 [07:56<34:01, 55.17s/it]
17
  23%|██▎ | 11/47 [09:30<40:09, 66.94s/it]
18
  26%|██▌ | 12/47 [11:04<43:46, 75.05s/it]
19
  28%|██▊ | 13/47 [12:37<45:43, 80.70s/it]
20
  30%|██▉ | 14/47 [14:11<46:33, 84.64s/it]
21
  32%|███▏ | 15/47 [15:45<46:36, 87.39s/it]
22
  34%|███▍ | 16/47 [17:18<46:06, 89.25s/it]
23
  36%|███▌ | 17/47 [18:52<45:16, 90.55s/it]
24
  38%|███▊ | 18/47 [20:26<44:13, 91.50s/it]
25
  40%|████ | 19/47 [21:59<43:00, 92.17s/it]
26
  43%|████▎ | 20/47 [23:33<41:41, 92.64s/it]
27
  45%|████▍ | 21/47 [25:06<40:14, 92.87s/it]
28
  47%|████▋ | 22/47 [26:40<38:47, 93.10s/it]
29
  49%|████▉ | 23/47 [28:14<37:17, 93.21s/it]
30
  51%|█████ | 24/47 [29:47<35:47, 93.37s/it]
31
  53%|█████▎ | 25/47 [31:21<34:16, 93.48s/it]
32
  55%|█████▌ | 26/47 [32:55<32:44, 93.55s/it]
33
  57%|█████▋ | 27/47 [34:28<31:11, 93.56s/it]
34
  60%|█████▉ | 28/47 [36:02<29:37, 93.56s/it]
35
  62%|██████▏ | 29/47 [37:36<28:04, 93.60s/it]
Rectified_Noise/VP-Disp/W_No.log ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
0
  0%| | 0/47 [00:00<?, ?it/s]
1
  2%|▏ | 1/47 [01:21<1:02:31, 81.55s/it]
2
  4%|▍ | 2/47 [02:41<1:00:38, 80.86s/it]
3
  6%|▋ | 3/47 [04:02<59:03, 80.54s/it]
4
  9%|▊ | 4/47 [05:21<57:29, 80.22s/it]
5
  11%|█ | 5/47 [06:41<56:08, 80.19s/it]
6
  13%|█▎ | 6/47 [08:46<1:05:05, 95.25s/it]
7
  15%|█▍ | 7/47 [10:56<1:11:01, 106.54s/it]
8
  17%|█▋ | 8/47 [13:05<1:14:00, 113.86s/it]
9
  19%|█▉ | 9/47 [15:15<1:15:10, 118.69s/it]
10
  21%|██▏ | 10/47 [17:24<1:15:14, 122.00s/it]
11
  23%|██▎ | 11/47 [19:33<1:14:34, 124.29s/it]
12
  26%|██▌ | 12/47 [21:43<1:13:25, 125.86s/it]
13
  28%|██▊ | 13/47 [23:52<1:11:56, 126.94s/it]
14
  30%|██▉ | 14/47 [25:56<1:09:12, 125.85s/it]
15
  32%|███▏ | 15/47 [28:06<1:07:46, 127.09s/it]
16
  34%|███▍ | 16/47 [30:16<1:06:06, 127.96s/it]
17
  36%|███▌ | 17/47 [32:26<1:04:17, 128.58s/it]
18
  38%|███▊ | 18/47 [34:36<1:02:21, 129.01s/it]
19
  40%|████ | 19/47 [36:45<1:00:14, 129.10s/it]
 
1
+ [NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
2
+ Starting rank=0, seed=0, world_size=1.
3
+ Saving .png samples at VP_samples/depth-mu-2-threshold-0.0-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04
4
+ Total number of images that will be sampled: 3008
5
+
6
  0%| | 0/47 [00:00<?, ?it/s]
7
  2%|▏ | 1/47 [01:21<1:02:31, 81.55s/it]
8
  4%|▍ | 2/47 [02:41<1:00:38, 80.86s/it]
9
  6%|▋ | 3/47 [04:02<59:03, 80.54s/it]
10
  9%|▊ | 4/47 [05:21<57:29, 80.22s/it]
11
  11%|█ | 5/47 [06:41<56:08, 80.19s/it]
12
  13%|█▎ | 6/47 [08:46<1:05:05, 95.25s/it]
13
  15%|█▍ | 7/47 [10:56<1:11:01, 106.54s/it]
14
  17%|█▋ | 8/47 [13:05<1:14:00, 113.86s/it]
15
  19%|█▉ | 9/47 [15:15<1:15:10, 118.69s/it]
16
  21%|██▏ | 10/47 [17:24<1:15:14, 122.00s/it]
17
  23%|██▎ | 11/47 [19:33<1:14:34, 124.29s/it]
18
  26%|██▌ | 12/47 [21:43<1:13:25, 125.86s/it]
19
  28%|██▊ | 13/47 [23:52<1:11:56, 126.94s/it]
20
  30%|██▉ | 14/47 [25:56<1:09:12, 125.85s/it]
21
  32%|███▏ | 15/47 [28:06<1:07:46, 127.09s/it]
22
  34%|███▍ | 16/47 [30:16<1:06:06, 127.96s/it]
23
  36%|███▌ | 17/47 [32:26<1:04:17, 128.58s/it]
24
  38%|███▊ | 18/47 [34:36<1:02:21, 129.01s/it]
25
  40%|████ | 19/47 [36:45<1:00:14, 129.10s/it]
Rectified_Noise/VP-Disp/W_True_0.15.log ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
0
  0%| | 0/47 [00:00<?, ?it/s]
1
  2%|▏ | 1/47 [00:44<34:16, 44.70s/it]
2
  4%|▍ | 2/47 [01:28<33:11, 44.25s/it]
3
  6%|▋ | 3/47 [02:12<32:22, 44.15s/it]
4
  9%|▊ | 4/47 [02:56<31:35, 44.08s/it]
5
  11%|█ | 5/47 [03:40<30:48, 44.02s/it]
6
  13%|█▎ | 6/47 [04:24<30:04, 44.00s/it]
7
  15%|█▍ | 7/47 [05:08<29:20, 44.01s/it]
8
  17%|█▋ | 8/47 [05:52<28:35, 43.99s/it]
9
  19%|█▉ | 9/47 [06:36<27:53, 44.03s/it]
10
  21%|██▏ | 10/47 [07:57<34:09, 55.39s/it]
11
  23%|██▎ | 11/47 [09:31<40:20, 67.22s/it]
12
  26%|██▌ | 12/47 [11:05<43:56, 75.33s/it]
13
  28%|██▊ | 13/47 [12:39<45:53, 80.99s/it]
14
  30%|██▉ | 14/47 [14:13<46:42, 84.91s/it]
15
  32%|███▏ | 15/47 [15:47<46:43, 87.62s/it]
16
  34%|███▍ | 16/47 [17:20<46:12, 89.45s/it]
17
  36%|███▌ | 17/47 [18:54<45:21, 90.73s/it]
18
  38%|███▊ | 18/47 [20:28<44:18, 91.66s/it]
19
  40%|████ | 19/47 [22:02<43:04, 92.30s/it]
20
  43%|████▎ | 20/47 [23:36<41:44, 92.77s/it]
21
  45%|████▍ | 21/47 [25:09<40:17, 92.99s/it]
22
  47%|████▋ | 22/47 [26:43<38:52, 93.29s/it]
23
  49%|████▉ | 23/47 [28:17<37:22, 93.44s/it]
24
  51%|█████ | 24/47 [29:51<35:51, 93.55s/it]
25
  53%|█████▎ | 25/47 [31:25<34:19, 93.62s/it]
26
  55%|█████▌ | 26/47 [32:58<32:47, 93.69s/it]
27
  57%|█████▋ | 27/47 [34:32<31:13, 93.68s/it]
28
  60%|█████▉ | 28/47 [36:06<29:39, 93.65s/it]
29
  62%|██████▏ | 29/47 [37:39<28:06, 93.69s/it]
 
1
+ [NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
2
+ Starting rank=0, seed=0, world_size=1.
3
+ Saving .png samples at VP_samples/depth-mu-2-threshold-0.15-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04
4
+ Total number of images that will be sampled: 3008
5
+
6
  0%| | 0/47 [00:00<?, ?it/s]
7
  2%|▏ | 1/47 [00:44<34:16, 44.70s/it]
8
  4%|▍ | 2/47 [01:28<33:11, 44.25s/it]
9
  6%|▋ | 3/47 [02:12<32:22, 44.15s/it]
10
  9%|▊ | 4/47 [02:56<31:35, 44.08s/it]
11
  11%|█ | 5/47 [03:40<30:48, 44.02s/it]
12
  13%|█▎ | 6/47 [04:24<30:04, 44.00s/it]
13
  15%|█▍ | 7/47 [05:08<29:20, 44.01s/it]
14
  17%|█▋ | 8/47 [05:52<28:35, 43.99s/it]
15
  19%|█▉ | 9/47 [06:36<27:53, 44.03s/it]
16
  21%|██▏ | 10/47 [07:57<34:09, 55.39s/it]
17
  23%|██▎ | 11/47 [09:31<40:20, 67.22s/it]
18
  26%|██▌ | 12/47 [11:05<43:56, 75.33s/it]
19
  28%|██▊ | 13/47 [12:39<45:53, 80.99s/it]
20
  30%|██▉ | 14/47 [14:13<46:42, 84.91s/it]
21
  32%|███▏ | 15/47 [15:47<46:43, 87.62s/it]
22
  34%|███▍ | 16/47 [17:20<46:12, 89.45s/it]
23
  36%|███▌ | 17/47 [18:54<45:21, 90.73s/it]
24
  38%|███▊ | 18/47 [20:28<44:18, 91.66s/it]
25
  40%|████ | 19/47 [22:02<43:04, 92.30s/it]
26
  43%|████▎ | 20/47 [23:36<41:44, 92.77s/it]
27
  45%|████▍ | 21/47 [25:09<40:17, 92.99s/it]
28
  47%|████▋ | 22/47 [26:43<38:52, 93.29s/it]
29
  49%|████▉ | 23/47 [28:17<37:22, 93.44s/it]
30
  51%|█████ | 24/47 [29:51<35:51, 93.55s/it]
31
  53%|█████▎ | 25/47 [31:25<34:19, 93.62s/it]
32
  55%|█████▌ | 26/47 [32:58<32:47, 93.69s/it]
33
  57%|█████▋ | 27/47 [34:32<31:13, 93.68s/it]
34
  60%|█████▉ | 28/47 [36:06<29:39, 93.65s/it]
35
  62%|██████▏ | 29/47 [37:39<28:06, 93.69s/it]
Rectified_Noise/VP-Disp/W_True_0.5.log ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
0
  0%| | 0/47 [00:00<?, ?it/s]
1
  2%|▏ | 1/47 [00:44<34:26, 44.92s/it]
2
  4%|▍ | 2/47 [01:29<33:22, 44.51s/it]
3
  6%|▋ | 3/47 [02:13<32:32, 44.38s/it]
4
  9%|▊ | 4/47 [02:57<31:45, 44.31s/it]
5
  11%|█ | 5/47 [03:41<30:58, 44.25s/it]
6
  13%|█▎ | 6/47 [04:25<30:13, 44.24s/it]
7
  15%|█▍ | 7/47 [05:10<29:29, 44.24s/it]
8
  17%|█▋ | 8/47 [05:54<28:44, 44.22s/it]
9
  19%|█▉ | 9/47 [06:38<28:01, 44.26s/it]
10
  21%|██▏ | 10/47 [07:57<33:50, 54.88s/it]
11
  23%|██▎ | 11/47 [09:31<40:02, 66.75s/it]
12
  26%|██▌ | 12/47 [11:04<43:42, 74.94s/it]
13
  28%|██▊ | 13/47 [12:38<45:41, 80.64s/it]
14
  30%|██▉ | 14/47 [14:12<46:32, 84.61s/it]
15
  32%|███▏ | 15/47 [15:46<46:36, 87.39s/it]
16
  34%|███▍ | 16/47 [17:19<46:08, 89.30s/it]
17
  36%|███▌ | 17/47 [18:53<45:18, 90.61s/it]
18
  38%|███▊ | 18/47 [20:27<44:15, 91.58s/it]
19
  40%|████ | 19/47 [22:01<43:03, 92.25s/it]
20
  43%|████▎ | 20/47 [23:34<41:43, 92.71s/it]
21
  45%|████▍ | 21/47 [25:08<40:17, 92.97s/it]
22
  47%|████▋ | 22/47 [26:42<38:49, 93.19s/it]
23
  49%|████▉ | 23/47 [28:15<37:19, 93.32s/it]
24
  51%|█████ | 24/47 [29:49<35:49, 93.48s/it]
25
  53%|█████▎ | 25/47 [31:23<34:18, 93.58s/it]
26
  55%|█████▌ | 26/47 [32:57<32:46, 93.65s/it]
27
  57%|█████▋ | 27/47 [34:30<31:13, 93.67s/it]
28
  60%|█████▉ | 28/47 [36:04<29:39, 93.65s/it]
29
  62%|██████▏ | 29/47 [37:38<28:06, 93.69s/it]
 
1
+ [NOTICE] The application is pending for GPU resource in asynchronous queue. The longest waiting time in queue is 1800 seconds.
2
+ Starting rank=0, seed=0, world_size=1.
3
+ Saving .png samples at VP_samples/depth-mu-2-threshold-0.5-0175000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04
4
+ Total number of images that will be sampled: 3008
5
+
6
  0%| | 0/47 [00:00<?, ?it/s]
7
  2%|▏ | 1/47 [00:44<34:26, 44.92s/it]
8
  4%|▍ | 2/47 [01:29<33:22, 44.51s/it]
9
  6%|▋ | 3/47 [02:13<32:32, 44.38s/it]
10
  9%|▊ | 4/47 [02:57<31:45, 44.31s/it]
11
  11%|█ | 5/47 [03:41<30:58, 44.25s/it]
12
  13%|█▎ | 6/47 [04:25<30:13, 44.24s/it]
13
  15%|█▍ | 7/47 [05:10<29:29, 44.24s/it]
14
  17%|█▋ | 8/47 [05:54<28:44, 44.22s/it]
15
  19%|█▉ | 9/47 [06:38<28:01, 44.26s/it]
16
  21%|██▏ | 10/47 [07:57<33:50, 54.88s/it]
17
  23%|██▎ | 11/47 [09:31<40:02, 66.75s/it]
18
  26%|██▌ | 12/47 [11:04<43:42, 74.94s/it]
19
  28%|██▊ | 13/47 [12:38<45:41, 80.64s/it]
20
  30%|██▉ | 14/47 [14:12<46:32, 84.61s/it]
21
  32%|███▏ | 15/47 [15:46<46:36, 87.39s/it]
22
  34%|███▍ | 16/47 [17:19<46:08, 89.30s/it]
23
  36%|███▌ | 17/47 [18:53<45:18, 90.61s/it]
24
  38%|███▊ | 18/47 [20:27<44:15, 91.58s/it]
25
  40%|████ | 19/47 [22:01<43:03, 92.25s/it]
26
  43%|████▎ | 20/47 [23:34<41:43, 92.71s/it]
27
  45%|████▍ | 21/47 [25:08<40:17, 92.97s/it]
28
  47%|████▋ | 22/47 [26:42<38:49, 93.19s/it]
29
  49%|████▉ | 23/47 [28:15<37:19, 93.32s/it]
30
  51%|█████ | 24/47 [29:49<35:49, 93.48s/it]
31
  53%|█████▎ | 25/47 [31:23<34:18, 93.58s/it]
32
  55%|█████▌ | 26/47 [32:57<32:46, 93.65s/it]
33
  57%|█████▋ | 27/47 [34:30<31:13, 93.67s/it]
34
  60%|█████▉ | 28/47 [36:04<29:39, 93.65s/it]
35
  62%|██████▏ | 29/47 [37:38<28:06, 93.69s/it]
Rectified_Noise/VP-Disp/download.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This source code is licensed under the license found in the
2
+ # LICENSE file in the root directory of this source tree.
3
+
4
+ """
5
+ Functions for downloading pre-trained SiT models
6
+ """
7
+ from torchvision.datasets.utils import download_url
8
+ import torch
9
+ import os
10
+
11
+
12
+ pretrained_models = {'SiT-XL-2-256x256.pt'}
13
+
14
+
15
+ def find_model(model_name):
16
+ """
17
+ Finds a pre-trained SiT model, downloading it if necessary. Alternatively, loads a model from a local path.
18
+ """
19
+ if model_name in pretrained_models:
20
+ return download_model(model_name)
21
+ else:
22
+ assert os.path.isfile(model_name), f'Could not find SiT checkpoint at {model_name}'
23
+ checkpoint = torch.load(model_name, map_location=lambda storage, loc: storage, weights_only=False)
24
+ if "ema" in checkpoint: # supports checkpoints from train.py
25
+ checkpoint = checkpoint["ema"]
26
+ return checkpoint
27
+
28
+
29
+ def download_model(model_name):
30
+ """
31
+ Downloads a pre-trained SiT model from the web.
32
+ """
33
+ assert model_name in pretrained_models
34
+ local_path = f'pretrained_models/{model_name}'
35
+ if not os.path.isfile(local_path):
36
+ os.makedirs('pretrained_models', exist_ok=True)
37
+ web_path = f'https://www.dl.dropboxusercontent.com/scl/fi/as9oeomcbub47de5g4be0/SiT-XL-2-256.pt?rlkey=uxzxmpicu46coq3msb17b9ofa&dl=0'
38
+ download_url(web_path, 'pretrained_models', filename=model_name)
39
+ model = torch.load(local_path, map_location=lambda storage, loc: storage, weights_only=False)
40
+ return model
41
+
Rectified_Noise/VP-Disp/environment.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: RN
2
+ channels:
3
+ - pytorch
4
+ - nvidia
5
+ dependencies:
6
+ - python >= 3.8
7
+ - pytorch >= 1.13
8
+ - torchvision
9
+ - pytorch-cuda >=11.7
10
+ - pip
11
+ - pip:
12
+ - timm
13
+ - diffusers
14
+ - accelerate
15
+ - torchdiffeq
16
+ - wandb
Rectified_Noise/VP-Disp/evaluate_samples.sh ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Execute all evaluation tasks in parallel
4
+ # Each command runs in the background using &
5
+
6
+ echo "Starting all evaluation tasks in parallel..."
7
+
8
+ # Reference batch path
9
+ REF_BATCH="/gemini/space/zhaozy/zhy/dataset/VIRTUAL_imagenet256_labeled.npz"
10
+
11
+ # Base directory for sample files
12
+ SAMPLE_DIR="/gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise/last_samples_depth_2_gvp_0.5"
13
+
14
+ # Change to the project root directory
15
+ cd /gemini/space/zhaozy/zhy/gzy_new/Noise_Matching
16
+
17
+ # Evaluate threshold 0.0 on GPU 0
18
+ CUDA_VISIBLE_DEVICES=0 nohup python evaluator.py \
19
+ --ref_batch ${REF_BATCH} \
20
+ --sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.0-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
21
+ > eval_threshold_0.0.log 2>&1 &
22
+
23
+ # Evaluate threshold 0.15 on GPU 1
24
+ CUDA_VISIBLE_DEVICES=1 nohup python evaluator.py \
25
+ --ref_batch ${REF_BATCH} \
26
+ --sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.15-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
27
+ > eval_threshold_0.15.log 2>&1 &
28
+
29
+ # Evaluate threshold 0.25 on GPU 2
30
+ CUDA_VISIBLE_DEVICES=2 nohup python evaluator.py \
31
+ --ref_batch ${REF_BATCH} \
32
+ --sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.25-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
33
+ > eval_threshold_0.25.log 2>&1 &
34
+
35
+ # Evaluate threshold 0.5 on GPU 3
36
+ CUDA_VISIBLE_DEVICES=3 nohup python evaluator.py \
37
+ --ref_batch ${REF_BATCH} \
38
+ --sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.5-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
39
+ > eval_threshold_0.5.log 2>&1 &
40
+
41
+ # Evaluate threshold 0.75 on GPU 4
42
+ CUDA_VISIBLE_DEVICES=0 nohup python evaluator.py \
43
+ --ref_batch ${REF_BATCH} \
44
+ --sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-0.75-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
45
+ > eval_threshold_0.75.log 2>&1 &
46
+
47
+ # Evaluate threshold 1.0 on GPU 5
48
+ CUDA_VISIBLE_DEVICES=1 nohup python evaluator.py \
49
+ --ref_batch ${REF_BATCH} \
50
+ --sample_batch ${SAMPLE_DIR}/depth-mu-2-threshold-1.0-0550000-base-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz \
51
+ > eval_threshold_1.0.log 2>&1 &
52
+
53
+ # Wait for all background jobs to complete
54
+ echo "All evaluation tasks started. Waiting for completion..."
55
+ wait
56
+
57
+ echo "All evaluation tasks completed!"
58
+ echo ""
59
+ echo "Results saved in:"
60
+ echo " - eval_threshold_0.0.log"
61
+ echo " - eval_threshold_0.15.log"
62
+ echo " - eval_threshold_0.25.log"
63
+ echo " - eval_threshold_0.5.log"
64
+ echo " - eval_threshold_0.75.log"
65
+ echo " - eval_threshold_1.0.log"
Rectified_Noise/VP-Disp/evaluator.py ADDED
@@ -0,0 +1,689 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import io
3
+ import os
4
+ import random
5
+ import warnings
6
+ import zipfile
7
+ from abc import ABC, abstractmethod
8
+ from contextlib import contextmanager
9
+ from functools import partial
10
+ from multiprocessing import cpu_count
11
+ from multiprocessing.pool import ThreadPool
12
+ from typing import Iterable, Optional, Tuple, Union
13
+
14
+ import numpy as np
15
+ import requests
16
+ import tensorflow.compat.v1 as tf
17
+ from scipy import linalg
18
+ from tqdm.auto import tqdm
19
+ from datetime import timedelta
20
+ import torch
21
+
22
+
23
+
24
+ INCEPTION_V3_URL = "https://openaipublic.blob.core.windows.net/diffusion/jul-2021/ref_batches/classify_image_graph_def.pb"
25
+ INCEPTION_V3_PATH = "classify_image_graph_def.pb"
26
+
27
+ FID_POOL_NAME = "pool_3:0"
28
+ FID_SPATIAL_NAME = "mixed_6/conv:0"
29
+
30
+
31
+ def main():
32
+ parser = argparse.ArgumentParser()
33
+ parser.add_argument("--ref_batch", default='/gemini/space/zhaozy/zhy/dataset/VIRTUAL_imagenet256_labeled.npz',help="path to reference batch npz file")
34
+ parser.add_argument("--sample_batch", default='/gemini/space/zhaozy/zhy/gzy_new/Noise_Matching/Rectified-Noise/last_samples_depth_2/depth-mu-28-0050000-2000000-cfg-1.0-64-SDE-100-Euler-sigma-Mean-0.04.npz', help="path to sample batch npz file")
35
+ args = parser.parse_args()
36
+
37
+ config = tf.ConfigProto(
38
+ allow_soft_placement=True # allows DecodeJpeg to run on CPU in Inception graph
39
+ )
40
+ config.gpu_options.allow_growth = True
41
+ evaluator = Evaluator(tf.Session(config=config))
42
+
43
+ print("warming up TensorFlow...")
44
+ # This will cause TF to print a bunch of verbose stuff now rather
45
+ # than after the next print(), to help prevent confusion.
46
+ evaluator.warmup()
47
+
48
+ print("computing reference batch activations...")
49
+ ref_acts = evaluator.read_activations(args.ref_batch)
50
+ print("computing/reading reference batch statistics...")
51
+ ref_stats, ref_stats_spatial = evaluator.read_statistics(args.ref_batch, ref_acts)
52
+
53
+ print("computing sample batch activations...")
54
+ sample_acts = evaluator.read_activations(args.sample_batch)
55
+ print("computing/reading sample batch statistics...")
56
+ sample_stats, sample_stats_spatial = evaluator.read_statistics(args.sample_batch, sample_acts)
57
+
58
+ print("Computing evaluations...")
59
+ #print("Inception Score:", evaluator.compute_inception_score(sample_acts[0]))
60
+ print("FID:", sample_stats.frechet_distance(ref_stats))
61
+ #print("sFID:", sample_stats_spatial.frechet_distance(ref_stats_spatial))
62
+ #prec, recall = evaluator.compute_prec_recall(ref_acts[0], sample_acts[0])
63
+ #print("Precision:", prec)
64
+ #print("Recall:", recall)
65
+
66
+
67
+ class InvalidFIDException(Exception):
68
+ pass
69
+
70
+
71
+ class FIDStatistics:
72
+ def __init__(self, mu: np.ndarray, sigma: np.ndarray):
73
+ self.mu = mu
74
+ self.sigma = sigma
75
+
76
+ def frechet_distance(self, other, eps=1e-6):
77
+ """
78
+ Compute the Frechet distance between two sets of statistics.
79
+ """
80
+ # https://github.com/bioinf-jku/TTUR/blob/73ab375cdf952a12686d9aa7978567771084da42/fid.py#L132
81
+ mu1, sigma1 = self.mu, self.sigma
82
+ mu2, sigma2 = other.mu, other.sigma
83
+
84
+ mu1 = np.atleast_1d(mu1)
85
+ mu2 = np.atleast_1d(mu2)
86
+
87
+ sigma1 = np.atleast_2d(sigma1)
88
+ sigma2 = np.atleast_2d(sigma2)
89
+
90
+ assert (
91
+ mu1.shape == mu2.shape
92
+ ), f"Training and test mean vectors have different lengths: {mu1.shape}, {mu2.shape}"
93
+ assert (
94
+ sigma1.shape == sigma2.shape
95
+ ), f"Training and test covariances have different dimensions: {sigma1.shape}, {sigma2.shape}"
96
+
97
+ diff = mu1 - mu2
98
+
99
+ # product might be almost singular
100
+ covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
101
+ if not np.isfinite(covmean).all():
102
+ msg = (
103
+ "fid calculation produces singular product; adding %s to diagonal of cov estimates"
104
+ % eps
105
+ )
106
+ warnings.warn(msg)
107
+ offset = np.eye(sigma1.shape[0]) * eps
108
+ covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
109
+
110
+ # numerical error might give slight imaginary component
111
+ #虚部报错部分
112
+ if np.iscomplexobj(covmean):
113
+ if not np.allclose(np.diagonal(covmean).imag, 0, atol=1):
114
+ m = np.max(np.abs(covmean.imag))
115
+ print(f"Real component: {covmean.real}")
116
+ raise ValueError("Imaginary component {}".format(m))
117
+ covmean = covmean.real
118
+
119
+ tr_covmean = np.trace(covmean)
120
+
121
+ return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
122
+
123
+
124
+ class Evaluator:
125
+ def __init__(
126
+ self,
127
+ session,
128
+ batch_size=64,
129
+ softmax_batch_size=512,
130
+ ):
131
+ self.sess = session
132
+ self.batch_size = batch_size
133
+ self.softmax_batch_size = softmax_batch_size
134
+ self.manifold_estimator = ManifoldEstimator(session)
135
+ with self.sess.graph.as_default():
136
+ self.image_input = tf.placeholder(tf.float32, shape=[None, None, None, 3])
137
+ self.softmax_input = tf.placeholder(tf.float32, shape=[None, 2048])
138
+ self.pool_features, self.spatial_features = _create_feature_graph(self.image_input)
139
+ self.softmax = _create_softmax_graph(self.softmax_input)
140
+
141
+ def warmup(self):
142
+ self.compute_activations(np.zeros([1, 8, 64, 64, 3]))
143
+
144
+ def read_activations(self, npz_path: Union[str, np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
145
+ if isinstance(npz_path, str):
146
+ # If npz_path is a string, treat it as a file path and read the .npz file
147
+ with open_npz_array(npz_path, "arr_0") as reader:
148
+ return self.compute_activations(reader.read_batches(self.batch_size))
149
+ elif isinstance(npz_path, np.ndarray):
150
+ # If npz_path is a numpy array, split it into batches manually
151
+ print("--------line 140-----------")
152
+ batches = np.array_split(npz_path, range(self.batch_size, npz_path.shape[0], self.batch_size))
153
+ print("--------line 143-----------")
154
+ return self.compute_activations(batches)
155
+ else:
156
+ raise ValueError("npz_path must be either a file path (str) or a numpy array (np.ndarray)")
157
+
158
+
159
+ def compute_activations(self, batches: Iterable[np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
160
+ """
161
+ Compute image features for downstream evals.
162
+
163
+ :param batches: a iterator over NHWC numpy arrays in [0, 255].
164
+ :return: a tuple of numpy arrays of shape [N x X], where X is a feature
165
+ dimension. The tuple is (pool_3, spatial).
166
+ """
167
+ preds = []
168
+ spatial_preds = []
169
+ for batch in tqdm(batches):
170
+ # print("--------line 164-----------")
171
+
172
+ # # 识别当前进程信息
173
+ # if 'RANK' in os.environ:
174
+ # rank = int(os.environ['RANK'])
175
+ # local_rank = int(os.environ.get('LOCAL_RANK', rank % torch.cuda.device_count()))
176
+ # print(f"Distributed training - Global Rank: {rank}, Local Rank: {local_rank}")
177
+ # print(f"Current GPU device: {torch.cuda.current_device()}" if torch.cuda.is_available() else "No CUDA")
178
+ # else:
179
+ # print("Single process mode")
180
+
181
+ # print(f"Process PID: {os.getpid()}")
182
+
183
+ batch = batch.astype(np.float32)
184
+ pred, spatial_pred = self.sess.run(
185
+ [self.pool_features, self.spatial_features], {self.image_input: batch}
186
+ )
187
+ # print("--------line 169-----------")
188
+ preds.append(pred.reshape([pred.shape[0], -1]))
189
+ spatial_preds.append(spatial_pred.reshape([spatial_pred.shape[0], -1]))
190
+ return (
191
+ np.concatenate(preds, axis=0),
192
+ np.concatenate(spatial_preds, axis=0),
193
+ )
194
+
195
+ def read_statistics(
196
+ self, npz_path: Union[str, np.ndarray], activations: Tuple[np.ndarray, np.ndarray]
197
+ ) -> Tuple[FIDStatistics, FIDStatistics]:
198
+ if isinstance(npz_path, str):
199
+ obj = np.load(npz_path)
200
+ if "mu" in list(obj.keys()):
201
+ return FIDStatistics(obj["mu"], obj["sigma"]), FIDStatistics(
202
+ obj["mu_s"], obj["sigma_s"]
203
+ )
204
+ elif isinstance(npz_path, np.ndarray):
205
+ obj = npz_path
206
+ else:
207
+ raise ValueError("npz_path must be either a file path (str) or a numpy array (np.ndarray)")
208
+ return tuple(self.compute_statistics(x) for x in activations)
209
+
210
+ def compute_statistics(self, activations: np.ndarray) -> FIDStatistics:
211
+ mu = np.mean(activations, axis=0)
212
+ sigma = np.cov(activations, rowvar=False)
213
+ return FIDStatistics(mu, sigma)
214
+
215
+ def compute_inception_score(self, activations: np.ndarray, split_size: int = 5000) -> float:
216
+ softmax_out = []
217
+ for i in range(0, len(activations), self.softmax_batch_size):
218
+ acts = activations[i : i + self.softmax_batch_size]
219
+ softmax_out.append(self.sess.run(self.softmax, feed_dict={self.softmax_input: acts}))
220
+ preds = np.concatenate(softmax_out, axis=0)
221
+ # https://github.com/openai/improved-gan/blob/4f5d1ec5c16a7eceb206f42bfc652693601e1d5c/inception_score/model.py#L46
222
+ scores = []
223
+ for i in range(0, len(preds), split_size):
224
+ part = preds[i : i + split_size]
225
+ kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
226
+ kl = np.mean(np.sum(kl, 1))
227
+ scores.append(np.exp(kl))
228
+ return float(np.mean(scores))
229
+
230
+ def compute_prec_recall(
231
+ self, activations_ref: np.ndarray, activations_sample: np.ndarray
232
+ ) -> Tuple[float, float]:
233
+ radii_1 = self.manifold_estimator.manifold_radii(activations_ref)
234
+ radii_2 = self.manifold_estimator.manifold_radii(activations_sample)
235
+ pr = self.manifold_estimator.evaluate_pr(
236
+ activations_ref, radii_1, activations_sample, radii_2
237
+ )
238
+ return (float(pr[0][0]), float(pr[1][0]))
239
+
240
+
241
+ class ManifoldEstimator:
242
+ """
243
+ A helper for comparing manifolds of feature vectors.
244
+
245
+ Adapted from https://github.com/kynkaat/improved-precision-and-recall-metric/blob/f60f25e5ad933a79135c783fcda53de30f42c9b9/precision_recall.py#L57
246
+ """
247
+
248
+ def __init__(
249
+ self,
250
+ session,
251
+ row_batch_size=10000,
252
+ col_batch_size=10000,
253
+ nhood_sizes=(3,),
254
+ clamp_to_percentile=None,
255
+ eps=1e-5,
256
+ ):
257
+ """
258
+ Estimate the manifold of given feature vectors.
259
+
260
+ :param session: the TensorFlow session.
261
+ :param row_batch_size: row batch size to compute pairwise distances
262
+ (parameter to trade-off between memory usage and performance).
263
+ :param col_batch_size: column batch size to compute pairwise distances.
264
+ :param nhood_sizes: number of neighbors used to estimate the manifold.
265
+ :param clamp_to_percentile: prune hyperspheres that have radius larger than
266
+ the given percentile.
267
+ :param eps: small number for numerical stability.
268
+ """
269
+ self.distance_block = DistanceBlock(session)
270
+ self.row_batch_size = row_batch_size
271
+ self.col_batch_size = col_batch_size
272
+ self.nhood_sizes = nhood_sizes
273
+ self.num_nhoods = len(nhood_sizes)
274
+ self.clamp_to_percentile = clamp_to_percentile
275
+ self.eps = eps
276
+
277
+ def warmup(self):
278
+ feats, radii = (
279
+ np.zeros([1, 2048], dtype=np.float32),
280
+ np.zeros([1, 1], dtype=np.float32),
281
+ )
282
+ self.evaluate_pr(feats, radii, feats, radii)
283
+
284
+ def manifold_radii(self, features: np.ndarray) -> np.ndarray:
285
+ num_images = len(features)
286
+
287
+ # Estimate manifold of features by calculating distances to k-NN of each sample.
288
+ radii = np.zeros([num_images, self.num_nhoods], dtype=np.float32)
289
+ distance_batch = np.zeros([self.row_batch_size, num_images], dtype=np.float32)
290
+ seq = np.arange(max(self.nhood_sizes) + 1, dtype=np.int32)
291
+
292
+ for begin1 in range(0, num_images, self.row_batch_size):
293
+ end1 = min(begin1 + self.row_batch_size, num_images)
294
+ row_batch = features[begin1:end1]
295
+
296
+ for begin2 in range(0, num_images, self.col_batch_size):
297
+ end2 = min(begin2 + self.col_batch_size, num_images)
298
+ col_batch = features[begin2:end2]
299
+
300
+ # Compute distances between batches.
301
+ distance_batch[
302
+ 0 : end1 - begin1, begin2:end2
303
+ ] = self.distance_block.pairwise_distances(row_batch, col_batch)
304
+
305
+ # Find the k-nearest neighbor from the current batch.
306
+ radii[begin1:end1, :] = np.concatenate(
307
+ [
308
+ x[:, self.nhood_sizes]
309
+ for x in _numpy_partition(distance_batch[0 : end1 - begin1, :], seq, axis=1)
310
+ ],
311
+ axis=0,
312
+ )
313
+
314
+ if self.clamp_to_percentile is not None:
315
+ max_distances = np.percentile(radii, self.clamp_to_percentile, axis=0)
316
+ radii[radii > max_distances] = 0
317
+ return radii
318
+
319
+ def evaluate(self, features: np.ndarray, radii: np.ndarray, eval_features: np.ndarray):
320
+ """
321
+ Evaluate if new feature vectors are at the manifold.
322
+ """
323
+ num_eval_images = eval_features.shape[0]
324
+ num_ref_images = radii.shape[0]
325
+ distance_batch = np.zeros([self.row_batch_size, num_ref_images], dtype=np.float32)
326
+ batch_predictions = np.zeros([num_eval_images, self.num_nhoods], dtype=np.int32)
327
+ max_realism_score = np.zeros([num_eval_images], dtype=np.float32)
328
+ nearest_indices = np.zeros([num_eval_images], dtype=np.int32)
329
+
330
+ for begin1 in range(0, num_eval_images, self.row_batch_size):
331
+ end1 = min(begin1 + self.row_batch_size, num_eval_images)
332
+ feature_batch = eval_features[begin1:end1]
333
+
334
+ for begin2 in range(0, num_ref_images, self.col_batch_size):
335
+ end2 = min(begin2 + self.col_batch_size, num_ref_images)
336
+ ref_batch = features[begin2:end2]
337
+
338
+ distance_batch[
339
+ 0 : end1 - begin1, begin2:end2
340
+ ] = self.distance_block.pairwise_distances(feature_batch, ref_batch)
341
+
342
+ # From the minibatch of new feature vectors, determine if they are in the estimated manifold.
343
+ # If a feature vector is inside a hypersphere of some reference sample, then
344
+ # the new sample lies at the estimated manifold.
345
+ # The radii of the hyperspheres are determined from distances of neighborhood size k.
346
+ samples_in_manifold = distance_batch[0 : end1 - begin1, :, None] <= radii
347
+ batch_predictions[begin1:end1] = np.any(samples_in_manifold, axis=1).astype(np.int32)
348
+
349
+ max_realism_score[begin1:end1] = np.max(
350
+ radii[:, 0] / (distance_batch[0 : end1 - begin1, :] + self.eps), axis=1
351
+ )
352
+ nearest_indices[begin1:end1] = np.argmin(distance_batch[0 : end1 - begin1, :], axis=1)
353
+
354
+ return {
355
+ "fraction": float(np.mean(batch_predictions)),
356
+ "batch_predictions": batch_predictions,
357
+ "max_realisim_score": max_realism_score,
358
+ "nearest_indices": nearest_indices,
359
+ }
360
+
361
+ def evaluate_pr(
362
+ self,
363
+ features_1: np.ndarray,
364
+ radii_1: np.ndarray,
365
+ features_2: np.ndarray,
366
+ radii_2: np.ndarray,
367
+ ) -> Tuple[np.ndarray, np.ndarray]:
368
+ """
369
+ Evaluate precision and recall efficiently.
370
+
371
+ :param features_1: [N1 x D] feature vectors for reference batch.
372
+ :param radii_1: [N1 x K1] radii for reference vectors.
373
+ :param features_2: [N2 x D] feature vectors for the other batch.
374
+ :param radii_2: [N x K2] radii for other vectors.
375
+ :return: a tuple of arrays for (precision, recall):
376
+ - precision: an np.ndarray of length K1
377
+ - recall: an np.ndarray of length K2
378
+ """
379
+ features_1_status = np.zeros([len(features_1), radii_2.shape[1]], dtype=np.bool)
380
+ features_2_status = np.zeros([len(features_2), radii_1.shape[1]], dtype=np.bool)
381
+ for begin_1 in range(0, len(features_1), self.row_batch_size):
382
+ end_1 = begin_1 + self.row_batch_size
383
+ batch_1 = features_1[begin_1:end_1]
384
+ for begin_2 in range(0, len(features_2), self.col_batch_size):
385
+ end_2 = begin_2 + self.col_batch_size
386
+ batch_2 = features_2[begin_2:end_2]
387
+ batch_1_in, batch_2_in = self.distance_block.less_thans(
388
+ batch_1, radii_1[begin_1:end_1], batch_2, radii_2[begin_2:end_2]
389
+ )
390
+ features_1_status[begin_1:end_1] |= batch_1_in
391
+ features_2_status[begin_2:end_2] |= batch_2_in
392
+ return (
393
+ np.mean(features_2_status.astype(np.float64), axis=0),
394
+ np.mean(features_1_status.astype(np.float64), axis=0),
395
+ )
396
+
397
+
398
+ class DistanceBlock:
399
+ """
400
+ Calculate pairwise distances between vectors.
401
+
402
+ Adapted from https://github.com/kynkaat/improved-precision-and-recall-metric/blob/f60f25e5ad933a79135c783fcda53de30f42c9b9/precision_recall.py#L34
403
+ """
404
+
405
+ def __init__(self, session):
406
+ self.session = session
407
+
408
+ # Initialize TF graph to calculate pairwise distances.
409
+ with session.graph.as_default():
410
+ self._features_batch1 = tf.placeholder(tf.float32, shape=[None, None])
411
+ self._features_batch2 = tf.placeholder(tf.float32, shape=[None, None])
412
+ distance_block_16 = _batch_pairwise_distances(
413
+ tf.cast(self._features_batch1, tf.float16),
414
+ tf.cast(self._features_batch2, tf.float16),
415
+ )
416
+ self.distance_block = tf.cond(
417
+ tf.reduce_all(tf.math.is_finite(distance_block_16)),
418
+ lambda: tf.cast(distance_block_16, tf.float32),
419
+ lambda: _batch_pairwise_distances(self._features_batch1, self._features_batch2),
420
+ )
421
+
422
+ # Extra logic for less thans.
423
+ self._radii1 = tf.placeholder(tf.float32, shape=[None, None])
424
+ self._radii2 = tf.placeholder(tf.float32, shape=[None, None])
425
+ dist32 = tf.cast(self.distance_block, tf.float32)[..., None]
426
+ self._batch_1_in = tf.math.reduce_any(dist32 <= self._radii2, axis=1)
427
+ self._batch_2_in = tf.math.reduce_any(dist32 <= self._radii1[:, None], axis=0)
428
+
429
+ def pairwise_distances(self, U, V):
430
+ """
431
+ Evaluate pairwise distances between two batches of feature vectors.
432
+ """
433
+ return self.session.run(
434
+ self.distance_block,
435
+ feed_dict={self._features_batch1: U, self._features_batch2: V},
436
+ )
437
+
438
+ def less_thans(self, batch_1, radii_1, batch_2, radii_2):
439
+ return self.session.run(
440
+ [self._batch_1_in, self._batch_2_in],
441
+ feed_dict={
442
+ self._features_batch1: batch_1,
443
+ self._features_batch2: batch_2,
444
+ self._radii1: radii_1,
445
+ self._radii2: radii_2,
446
+ },
447
+ )
448
+
449
+
450
+ def _batch_pairwise_distances(U, V):
451
+ """
452
+ Compute pairwise distances between two batches of feature vectors.
453
+ """
454
+ with tf.variable_scope("pairwise_dist_block"):
455
+ # Squared norms of each row in U and V.
456
+ norm_u = tf.reduce_sum(tf.square(U), 1)
457
+ norm_v = tf.reduce_sum(tf.square(V), 1)
458
+
459
+ # norm_u as a column and norm_v as a row vectors.
460
+ norm_u = tf.reshape(norm_u, [-1, 1])
461
+ norm_v = tf.reshape(norm_v, [1, -1])
462
+
463
+ # Pairwise squared Euclidean distances.
464
+ D = tf.maximum(norm_u - 2 * tf.matmul(U, V, False, True) + norm_v, 0.0)
465
+
466
+ return D
467
+
468
+
469
+ class NpzArrayReader(ABC):
470
+ @abstractmethod
471
+ def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
472
+ pass
473
+
474
+ @abstractmethod
475
+ def remaining(self) -> int:
476
+ pass
477
+
478
+ def read_batches(self, batch_size: int) -> Iterable[np.ndarray]:
479
+ def gen_fn():
480
+ while True:
481
+ batch = self.read_batch(batch_size)
482
+ if batch is None:
483
+ break
484
+ yield batch
485
+
486
+ rem = self.remaining()
487
+ num_batches = rem // batch_size + int(rem % batch_size != 0)
488
+ return BatchIterator(gen_fn, num_batches)
489
+
490
+
491
+ class BatchIterator:
492
+ def __init__(self, gen_fn, length):
493
+ self.gen_fn = gen_fn
494
+ self.length = length
495
+
496
+ def __len__(self):
497
+ return self.length
498
+
499
+ def __iter__(self):
500
+ return self.gen_fn()
501
+
502
+
503
+ class StreamingNpzArrayReader(NpzArrayReader):
504
+ def __init__(self, arr_f, shape, dtype):
505
+ self.arr_f = arr_f
506
+ self.shape = shape
507
+ self.dtype = dtype
508
+ self.idx = 0
509
+
510
+ def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
511
+ if self.idx >= self.shape[0]:
512
+ return None
513
+
514
+ bs = min(batch_size, self.shape[0] - self.idx)
515
+ self.idx += bs
516
+
517
+ if self.dtype.itemsize == 0:
518
+ return np.ndarray([bs, *self.shape[1:]], dtype=self.dtype)
519
+
520
+ read_count = bs * np.prod(self.shape[1:])
521
+ read_size = int(read_count * self.dtype.itemsize)
522
+ data = _read_bytes(self.arr_f, read_size, "array data")
523
+ return np.frombuffer(data, dtype=self.dtype).reshape([bs, *self.shape[1:]])
524
+
525
+ def remaining(self) -> int:
526
+ return max(0, self.shape[0] - self.idx)
527
+
528
+
529
+ class MemoryNpzArrayReader(NpzArrayReader):
530
+ def __init__(self, arr):
531
+ self.arr = arr
532
+ self.idx = 0
533
+
534
+ @classmethod
535
+ def load(cls, path: str, arr_name: str):
536
+ with open(path, "rb") as f:
537
+ arr = np.load(f)[arr_name]
538
+ return cls(arr)
539
+
540
+ def read_batch(self, batch_size: int) -> Optional[np.ndarray]:
541
+ if self.idx >= self.arr.shape[0]:
542
+ return None
543
+
544
+ res = self.arr[self.idx : self.idx + batch_size]
545
+ self.idx += batch_size
546
+ return res
547
+
548
+ def remaining(self) -> int:
549
+ return max(0, self.arr.shape[0] - self.idx)
550
+
551
+
552
+ @contextmanager
553
+ def open_npz_array(path: str, arr_name: str) -> NpzArrayReader:
554
+ with _open_npy_file(path, arr_name) as arr_f:
555
+ version = np.lib.format.read_magic(arr_f)
556
+ if version == (1, 0):
557
+ header = np.lib.format.read_array_header_1_0(arr_f)
558
+ elif version == (2, 0):
559
+ header = np.lib.format.read_array_header_2_0(arr_f)
560
+ else:
561
+ yield MemoryNpzArrayReader.load(path, arr_name)
562
+ return
563
+ shape, fortran, dtype = header
564
+ if fortran or dtype.hasobject:
565
+ yield MemoryNpzArrayReader.load(path, arr_name)
566
+ else:
567
+ yield StreamingNpzArrayReader(arr_f, shape, dtype)
568
+
569
+
570
+ def _read_bytes(fp, size, error_template="ran out of data"):
571
+ """
572
+ Copied from: https://github.com/numpy/numpy/blob/fb215c76967739268de71aa4bda55dd1b062bc2e/numpy/lib/format.py#L788-L886
573
+
574
+ Read from file-like object until size bytes are read.
575
+ Raises ValueError if not EOF is encountered before size bytes are read.
576
+ Non-blocking objects only supported if they derive from io objects.
577
+ Required as e.g. ZipExtFile in python 2.6 can return less data than
578
+ requested.
579
+ """
580
+ data = bytes()
581
+ while True:
582
+ # io files (default in python3) return None or raise on
583
+ # would-block, python2 file will truncate, probably nothing can be
584
+ # done about that. note that regular files can't be non-blocking
585
+ try:
586
+ r = fp.read(size - len(data))
587
+ data += r
588
+ if len(r) == 0 or len(data) == size:
589
+ break
590
+ except io.BlockingIOError:
591
+ pass
592
+ if len(data) != size:
593
+ msg = "EOF: reading %s, expected %d bytes got %d"
594
+ raise ValueError(msg % (error_template, size, len(data)))
595
+ else:
596
+ return data
597
+
598
+
599
+ @contextmanager
600
+ def _open_npy_file(path: str, arr_name: str):
601
+ with open(path, "rb") as f:
602
+ with zipfile.ZipFile(f, "r") as zip_f:
603
+ if f"{arr_name}.npy" not in zip_f.namelist():
604
+ raise ValueError(f"missing {arr_name} in npz file")
605
+ with zip_f.open(f"{arr_name}.npy", "r") as arr_f:
606
+ yield arr_f
607
+
608
+
609
+ def _download_inception_model():
610
+ if os.path.exists(INCEPTION_V3_PATH):
611
+ return
612
+ print("downloading InceptionV3 model...")
613
+ with requests.get(INCEPTION_V3_URL, stream=True) as r:
614
+ r.raise_for_status()
615
+ tmp_path = INCEPTION_V3_PATH + ".tmp"
616
+ with open(tmp_path, "wb") as f:
617
+ for chunk in tqdm(r.iter_content(chunk_size=8192)):
618
+ f.write(chunk)
619
+ os.rename(tmp_path, INCEPTION_V3_PATH)
620
+
621
+
622
+ def _create_feature_graph(input_batch):
623
+ _download_inception_model()
624
+ prefix = f"{random.randrange(2**32)}_{random.randrange(2**32)}"
625
+ with open(INCEPTION_V3_PATH, "rb") as f:
626
+ graph_def = tf.GraphDef()
627
+ graph_def.ParseFromString(f.read())
628
+ pool3, spatial = tf.import_graph_def(
629
+ graph_def,
630
+ input_map={f"ExpandDims:0": input_batch},
631
+ return_elements=[FID_POOL_NAME, FID_SPATIAL_NAME],
632
+ name=prefix,
633
+ )
634
+ _update_shapes(pool3)
635
+ spatial = spatial[..., :7]
636
+ return pool3, spatial
637
+
638
+
639
+ def _create_softmax_graph(input_batch):
640
+ _download_inception_model()
641
+ prefix = f"{random.randrange(2**32)}_{random.randrange(2**32)}"
642
+ with open(INCEPTION_V3_PATH, "rb") as f:
643
+ graph_def = tf.GraphDef()
644
+ graph_def.ParseFromString(f.read())
645
+ (matmul,) = tf.import_graph_def(
646
+ graph_def, return_elements=[f"softmax/logits/MatMul"], name=prefix
647
+ )
648
+ w = matmul.inputs[1]
649
+ logits = tf.matmul(input_batch, w)
650
+ return tf.nn.softmax(logits)
651
+
652
+
653
+ def _update_shapes(pool3):
654
+ # https://github.com/bioinf-jku/TTUR/blob/73ab375cdf952a12686d9aa7978567771084da42/fid.py#L50-L63
655
+ ops = pool3.graph.get_operations()
656
+ for op in ops:
657
+ for o in op.outputs:
658
+ shape = o.get_shape()
659
+ if shape._dims is not None: # pylint: disable=protected-access
660
+ # shape = [s.value for s in shape] TF 1.x
661
+ shape = [s for s in shape] # TF 2.x
662
+ new_shape = []
663
+ for j, s in enumerate(shape):
664
+ if s == 1 and j == 0:
665
+ new_shape.append(None)
666
+ else:
667
+ new_shape.append(s)
668
+ o.__dict__["_shape_val"] = tf.TensorShape(new_shape)
669
+ return pool3
670
+
671
+
672
+ def _numpy_partition(arr, kth, **kwargs):
673
+ num_workers = min(cpu_count(), len(arr))
674
+ chunk_size = len(arr) // num_workers
675
+ extra = len(arr) % num_workers
676
+
677
+ start_idx = 0
678
+ batches = []
679
+ for i in range(num_workers):
680
+ size = chunk_size + (1 if i < extra else 0)
681
+ batches.append(arr[start_idx : start_idx + size])
682
+ start_idx += size
683
+
684
+ with ThreadPool(num_workers) as pool:
685
+ return list(pool.map(partial(np.partition, kth=kth, **kwargs), batches))
686
+
687
+
688
+ if __name__ == "__main__":
689
+ main()