studyOverflow commited on
Commit
efa8828
·
verified ·
1 Parent(s): 6c955ef

Add files using upload-large-folder tool

Browse files
wandb/run-20260124_022454-nlxwwxfq/files/output.log ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ I0124 02:24:55.881326 140556193482560 train_g2rpo_sd_merge.py:465]
2
+ allow_tf32: true
3
+ logdir: logs
4
+ mixed_precision: bf16
5
+ num_checkpoint_limit: 5
6
+ num_epochs: 300
7
+ pretrained:
8
+ model: ./data/StableDiffusion
9
+ revision: main
10
+ prompt_fn: imagenet_animals
11
+ prompt_fn_kwargs: {}
12
+ resume_from: ''
13
+ reward_fn: hpsv2
14
+ run_name: 2026.01.24_02.24.53
15
+ sample:
16
+ batch_size: 1
17
+ eta: 1.0
18
+ guidance_scale: 5.0
19
+ num_batches_per_epoch: 2
20
+ num_steps: 50
21
+ save_freq: 20
22
+ seed: 42
23
+ train:
24
+ adam_beta1: 0.9
25
+ adam_beta2: 0.999
26
+ adam_epsilon: 1.0e-08
27
+ adam_weight_decay: 0.0001
28
+ adv_clip_max: 5
29
+ batch_size: 1
30
+ cfg: true
31
+ clip_range: 0.0001
32
+ gradient_accumulation_steps: 1
33
+ learning_rate: 1.0e-05
34
+ max_grad_norm: 1.0
35
+ num_inner_epochs: 1
36
+ timestep_fraction: 1.0
37
+ use_8bit_adam: false
38
+ use_lora: false
39
+
40
+ Loading pipeline components...: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:02<00:00, 2.67it/s]
41
+ /home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers
42
+ warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning)
43
+ I0124 02:24:59.144597 140556193482560 factory.py:159] Loaded ViT-H-14 model config.
44
+ I0124 02:25:04.967288 140556193482560 factory.py:207] Loading pretrained ViT-H-14 weights (./data/hps/open_clip_pytorch_model.bin).
wandb/run-20260124_022454-nlxwwxfq/files/requirements.txt ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scipy==1.13.0
2
+ regex==2024.9.11
3
+ sentencepiece==0.2.0
4
+ six==1.16.0
5
+ anyio==4.11.0
6
+ nvidia-cuda-nvrtc-cu12==12.6.77
7
+ scikit-video==1.1.11
8
+ platformdirs==4.5.0
9
+ mypy==1.11.1
10
+ ruff==0.6.5
11
+ charset-normalizer==3.4.4
12
+ torch==2.9.0+cu126
13
+ av==13.1.0
14
+ pillow==10.2.0
15
+ gpustat==1.1.1
16
+ torchvision==0.24.0+cu126
17
+ multidict==6.7.0
18
+ torchmetrics==1.5.1
19
+ aiohttp==3.13.1
20
+ transformers==4.46.1
21
+ decord==0.6.0
22
+ wcwidth==0.2.14
23
+ sphinx-lint==1.0.0
24
+ nvidia-cuda-runtime-cu12==12.6.77
25
+ pytz==2025.2
26
+ codespell==2.3.0
27
+ hpsv2==1.2.0
28
+ mypy_extensions==1.1.0
29
+ numpy==1.26.3
30
+ omegaconf==2.3.0
31
+ Markdown==3.9
32
+ tzdata==2025.2
33
+ pandas==2.2.3
34
+ pytorch-lightning==2.4.0
35
+ aiosignal==1.4.0
36
+ aiohappyeyeballs==2.6.1
37
+ python-dateutil==2.9.0.post0
38
+ seaborn==0.13.2
39
+ beautifulsoup4==4.12.3
40
+ isort==5.13.2
41
+ httpx==0.28.1
42
+ certifi==2025.10.5
43
+ ml_collections==1.1.0
44
+ nvidia-cudnn-cu12==9.10.2.21
45
+ hf-xet==1.2.0
46
+ requests==2.31.0
47
+ inflect==6.0.4
48
+ iniconfig==2.1.0
49
+ braceexpand==0.1.7
50
+ h5py==3.12.1
51
+ wandb==0.18.5
52
+ protobuf==3.20.3
53
+ ninja==1.13.0
54
+ kiwisolver==1.4.9
55
+ networkx==3.3
56
+ packaging==25.0
57
+ fvcore==0.1.5.post20221221
58
+ pyparsing==3.2.5
59
+ starlette==0.41.3
60
+ frozenlist==1.8.0
61
+ docker-pycreds==0.4.0
62
+ Werkzeug==3.1.3
63
+ MarkupSafe==2.1.5
64
+ einops==0.8.0
65
+ sentry-sdk==2.42.0
66
+ PyYAML==6.0.1
67
+ nvidia-nccl-cu12==2.27.5
68
+ datasets==4.3.0
69
+ polib==1.2.0
70
+ safetensors==0.6.2
71
+ async-timeout==5.0.1
72
+ setproctitle==1.3.7
73
+ clint==0.5.1
74
+ matplotlib==3.9.2
75
+ propcache==0.4.1
76
+ termcolor==3.1.0
77
+ antlr4-python3-runtime==4.9.3
78
+ cycler==0.12.1
79
+ fastvideo==1.2.0
80
+ toml==0.10.2
81
+ xxhash==3.6.0
82
+ wheel==0.44.0
83
+ albumentations==1.4.20
84
+ fastapi==0.115.3
85
+ nvidia-cufft-cu12==11.3.0.4
86
+ yarl==1.22.0
87
+ psutil==7.1.0
88
+ tensorboard-data-server==0.7.2
89
+ pydantic==2.9.2
90
+ nvidia-nvtx-cu12==12.6.77
91
+ portalocker==3.2.0
92
+ triton==3.5.0
93
+ annotated-types==0.7.0
94
+ proglog==0.1.12
95
+ nvidia-cusparselt-cu12==0.7.1
96
+ yapf==0.32.0
97
+ Jinja2==3.1.6
98
+ types-requests==2.32.4.20250913
99
+ lightning-utilities==0.15.2
100
+ grpcio==1.75.1
101
+ uvicorn==0.32.0
102
+ typing_extensions==4.15.0
103
+ nvidia-nvjitlink-cu12==12.6.85
104
+ watch==0.2.7
105
+ moviepy==1.0.3
106
+ timm==1.0.11
107
+ pytest-split==0.8.0
108
+ gdown==5.2.0
109
+ types-setuptools==80.9.0.20250822
110
+ nvidia-cusolver-cu12==11.7.1.2
111
+ types-PyYAML==6.0.12.20250915
112
+ pip==25.2
113
+ qwen-vl-utils==0.0.14
114
+ soupsieve==2.8
115
+ zipp==3.23.0
116
+ flash_attn==2.8.3
117
+ yacs==0.1.8
118
+ diffusers==0.32.0
119
+ pluggy==1.6.0
120
+ opencv-python-headless==4.11.0.86
121
+ mpmath==1.3.0
122
+ test_tube==0.7.5
123
+ stringzilla==4.2.1
124
+ fonttools==4.60.1
125
+ nvidia-ml-py==13.580.82
126
+ parameterized==0.9.0
127
+ loguru==0.7.3
128
+ tabulate==0.9.0
129
+ idna==3.6
130
+ iopath==0.1.10
131
+ decorator==4.4.2
132
+ nvidia-cufile-cu12==1.11.1.6
133
+ threadpoolctl==3.6.0
134
+ pyarrow==21.0.0
135
+ httpcore==1.0.9
136
+ hydra-core==1.3.2
137
+ multiprocess==0.70.16
138
+ contourpy==1.3.2
139
+ clip==1.0
140
+ tqdm==4.66.5
141
+ open_clip_torch==3.2.0
142
+ accelerate==1.0.1
143
+ gitdb==4.0.12
144
+ importlib_metadata==8.7.0
145
+ nvidia-cublas-cu12==12.6.4.1
146
+ h11==0.16.0
147
+ filelock==3.19.1
148
+ liger_kernel==0.4.1
149
+ click==8.3.0
150
+ urllib3==2.2.0
151
+ imageio-ffmpeg==0.5.1
152
+ setuptools==80.9.0
153
+ joblib==1.5.2
154
+ tensorboard==2.20.0
155
+ attrs==25.4.0
156
+ future==1.0.0
157
+ albucore==0.0.19
158
+ fsspec==2025.9.0
159
+ sympy==1.14.0
160
+ eval_type_backport==0.2.2
161
+ pydantic_core==2.23.4
162
+ sniffio==1.3.1
163
+ nvidia-nvshmem-cu12==3.3.20
164
+ exceptiongroup==1.3.0
165
+ smmap==5.0.2
166
+ tomli==2.0.2
167
+ ftfy==6.3.0
168
+ dill==0.4.0
169
+ pytest==7.2.0
170
+ PySocks==1.7.1
171
+ nvidia-curand-cu12==10.3.7.77
172
+ tokenizers==0.20.1
173
+ args==0.1.0
174
+ fairscale==0.4.13
175
+ peft==0.13.2
176
+ webdataset==1.0.2
177
+ huggingface-hub==0.26.1
178
+ GitPython==3.1.45
179
+ pytorchvideo==0.1.5
180
+ scikit-learn==1.5.2
181
+ bitsandbytes==0.48.1
182
+ nvidia-cusparse-cu12==12.5.4.2
183
+ nvidia-cuda-cupti-cu12==12.6.80
184
+ imageio==2.36.0
185
+ pydub==0.25.1
186
+ image-reward==1.5
187
+ absl-py==2.3.1
188
+ blessed==1.22.0
189
+ torchdiffeq==0.2.4
wandb/run-20260124_023610-n1ooz9bl/logs/debug-core.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-24T02:36:09.151602379+08:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpaso2kes5/port-612439.txt","pid":612439,"debug":false,"disable-analytics":false}
2
+ {"time":"2026-01-24T02:36:09.151632657+08:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2026-01-24T02:36:09.15256438+08:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":42423,"Zone":""}}
4
+ {"time":"2026-01-24T02:36:09.152689129+08:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":612439}
5
+ {"time":"2026-01-24T02:36:09.342363565+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:38120"}
6
+ {"time":"2026-01-24T02:36:10.159795478+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"n1ooz9bl","id":"127.0.0.1:38120"}
7
+ {"time":"2026-01-24T02:36:10.27267688+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"n1ooz9bl","id":"127.0.0.1:38120"}
8
+ {"time":"2026-01-24T02:37:22.867050941+08:00","level":"INFO","msg":"Parent process exited, terminating service process."}
wandb/run-20260124_023610-n1ooz9bl/logs/debug-internal.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-24T02:36:10.160103717+08:00","level":"INFO","msg":"using version","core version":"0.18.5"}
2
+ {"time":"2026-01-24T02:36:10.160137584+08:00","level":"INFO","msg":"created symlink","path":"/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/wandb/run-20260124_023610-n1ooz9bl/logs/debug-core.log"}
3
+ {"time":"2026-01-24T02:36:10.272633973+08:00","level":"INFO","msg":"created new stream","id":"n1ooz9bl"}
4
+ {"time":"2026-01-24T02:36:10.272673149+08:00","level":"INFO","msg":"stream: started","id":"n1ooz9bl"}
5
+ {"time":"2026-01-24T02:36:10.272719319+08:00","level":"INFO","msg":"sender: started","stream_id":"n1ooz9bl"}
6
+ {"time":"2026-01-24T02:36:10.27275806+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"n1ooz9bl"}}
7
+ {"time":"2026-01-24T02:36:10.272722646+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"n1ooz9bl"}}
8
+ {"time":"2026-01-24T02:36:11.031290755+08:00","level":"INFO","msg":"Starting system monitor"}
wandb/run-20260124_110518-tfmkls9a/files/config.yaml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.18.5
4
+ m: []
5
+ python_version: 3.10.19
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 11
10
+ - 41
11
+ - 49
12
+ - 55
13
+ - 71
14
+ - 83
15
+ - 98
16
+ "2":
17
+ - 1
18
+ - 11
19
+ - 41
20
+ - 49
21
+ - 55
22
+ - 63
23
+ - 71
24
+ - 83
25
+ - 98
26
+ "3":
27
+ - 13
28
+ - 23
29
+ - 55
30
+ "4": 3.10.19
31
+ "5": 0.18.5
32
+ "6": 4.46.1
33
+ "8":
34
+ - 5
35
+ "12": 0.18.5
36
+ "13": linux-x86_64
37
+ allow_tf32:
38
+ value: true
39
+ logdir:
40
+ value: logs
41
+ mixed_precision:
42
+ value: bf16
43
+ num_checkpoint_limit:
44
+ value: 5
45
+ num_epochs:
46
+ value: 300
47
+ pretrained:
48
+ value:
49
+ model: ./data/StableDiffusion
50
+ revision: main
51
+ prompt_fn:
52
+ value: imagenet_animals
53
+ resume_from:
54
+ value: ""
55
+ reward_fn:
56
+ value: hpsv2
57
+ run_name:
58
+ value: 2026.01.24_11.05.16
59
+ sample:
60
+ value:
61
+ batch_size: 1
62
+ eta: 1
63
+ guidance_scale: 5
64
+ num_batches_per_epoch: 2
65
+ num_steps: 50
66
+ save_freq:
67
+ value: 20
68
+ seed:
69
+ value: 42
70
+ train:
71
+ value:
72
+ adam_beta1: 0.9
73
+ adam_beta2: 0.999
74
+ adam_epsilon: 1e-08
75
+ adam_weight_decay: 0.0001
76
+ adv_clip_max: 5
77
+ batch_size: 1
78
+ cfg: true
79
+ clip_range: 0.0001
80
+ gradient_accumulation_steps: 1
81
+ learning_rate: 1e-05
82
+ max_grad_norm: 1
83
+ num_inner_epochs: 1
84
+ timestep_fraction: 1
85
+ use_8bit_adam: false
86
+ use_lora:
87
+ value: false
wandb/run-20260124_110518-tfmkls9a/files/output.log ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ I0124 11:05:19.230831 135505156663104 train_g2rpo_sd_merge.py:478]
2
+ allow_tf32: true
3
+ logdir: logs
4
+ mixed_precision: bf16
5
+ num_checkpoint_limit: 5
6
+ num_epochs: 300
7
+ pretrained:
8
+ model: ./data/StableDiffusion
9
+ revision: main
10
+ prompt_fn: imagenet_animals
11
+ prompt_fn_kwargs: {}
12
+ resume_from: ''
13
+ reward_fn: hpsv2
14
+ run_name: 2026.01.24_11.05.16
15
+ sample:
16
+ batch_size: 1
17
+ eta: 1.0
18
+ guidance_scale: 5.0
19
+ num_batches_per_epoch: 2
20
+ num_steps: 50
21
+ save_freq: 20
22
+ seed: 42
23
+ train:
24
+ adam_beta1: 0.9
25
+ adam_beta2: 0.999
26
+ adam_epsilon: 1.0e-08
27
+ adam_weight_decay: 0.0001
28
+ adv_clip_max: 5
29
+ batch_size: 1
30
+ cfg: true
31
+ clip_range: 0.0001
32
+ gradient_accumulation_steps: 1
33
+ learning_rate: 1.0e-05
34
+ max_grad_norm: 1.0
35
+ num_inner_epochs: 1
36
+ timestep_fraction: 1.0
37
+ use_8bit_adam: false
38
+ use_lora: false
39
+
40
+ Loading pipeline components...: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:02<00:00, 2.65it/s]
41
+ /home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers
42
+ warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning)
43
+ I0124 11:05:22.495319 135505156663104 factory.py:159] Loaded ViT-H-14 model config.
44
+ I0124 11:05:27.175477 135505156663104 factory.py:207] Loading pretrained ViT-H-14 weights (./data/hps/open_clip_pytorch_model.bin).
45
+ I0124 11:05:32.379540 135505156663104 train_g2rpo_sd_merge.py:670] ***** Running E-GRPO (G2RPO) Training for Stable Diffusion *****
46
+ I0124 11:05:32.380349 135505156663104 train_g2rpo_sd_merge.py:671] Num Epochs = 300
47
+ I0124 11:05:32.380448 135505156663104 train_g2rpo_sd_merge.py:672] Num generations per prompt = 4
48
+ I0124 11:05:32.380522 135505156663104 train_g2rpo_sd_merge.py:673] Eta step list = [0, 1, 2, 3, 4, 5, 6, 7]
49
+ I0124 11:05:32.380584 135505156663104 train_g2rpo_sd_merge.py:674] Eta step merge list = [1, 1, 1, 2, 2, 2, 3, 3]
50
+ I0124 11:05:32.380647 135505156663104 train_g2rpo_sd_merge.py:675] Granular list = [1]
51
+ Traceback (most recent call last):
52
+ File "/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/train_g2rpo_sd_merge.py", line 1001, in <module>
53
+ app.run(main)
54
+ File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/absl/app.py", line 316, in run
55
+ _run_main(main, args)
56
+ File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/absl/app.py", line 261, in _run_main
57
+ sys.exit(main(argv))
58
+ File "/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/train_g2rpo_sd_merge.py", line 786, in main
59
+ (eval_image[0].cpu().permute(1, 2, 0).numpy() * 255).astype(np.uint8)
60
+ TypeError: Got unsupported ScalarType BFloat16
61
+ [rank0]: Traceback (most recent call last):
62
+ [rank0]: File "/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/train_g2rpo_sd_merge.py", line 1001, in <module>
63
+ [rank0]: app.run(main)
64
+ [rank0]: File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/absl/app.py", line 316, in run
65
+ [rank0]: _run_main(main, args)
66
+ [rank0]: File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/absl/app.py", line 261, in _run_main
67
+ [rank0]: sys.exit(main(argv))
68
+ [rank0]: File "/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/train_g2rpo_sd_merge.py", line 786, in main
69
+ [rank0]: (eval_image[0].cpu().permute(1, 2, 0).numpy() * 255).astype(np.uint8)
70
+ [rank0]: TypeError: Got unsupported ScalarType BFloat16
wandb/run-20260124_110518-tfmkls9a/files/requirements.txt ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scipy==1.13.0
2
+ regex==2024.9.11
3
+ sentencepiece==0.2.0
4
+ six==1.16.0
5
+ anyio==4.11.0
6
+ nvidia-cuda-nvrtc-cu12==12.6.77
7
+ scikit-video==1.1.11
8
+ platformdirs==4.5.0
9
+ mypy==1.11.1
10
+ ruff==0.6.5
11
+ charset-normalizer==3.4.4
12
+ torch==2.9.0+cu126
13
+ av==13.1.0
14
+ pillow==10.2.0
15
+ gpustat==1.1.1
16
+ torchvision==0.24.0+cu126
17
+ multidict==6.7.0
18
+ torchmetrics==1.5.1
19
+ aiohttp==3.13.1
20
+ transformers==4.46.1
21
+ decord==0.6.0
22
+ wcwidth==0.2.14
23
+ sphinx-lint==1.0.0
24
+ nvidia-cuda-runtime-cu12==12.6.77
25
+ pytz==2025.2
26
+ codespell==2.3.0
27
+ hpsv2==1.2.0
28
+ mypy_extensions==1.1.0
29
+ numpy==1.26.3
30
+ omegaconf==2.3.0
31
+ Markdown==3.9
32
+ tzdata==2025.2
33
+ pandas==2.2.3
34
+ pytorch-lightning==2.4.0
35
+ aiosignal==1.4.0
36
+ aiohappyeyeballs==2.6.1
37
+ python-dateutil==2.9.0.post0
38
+ seaborn==0.13.2
39
+ beautifulsoup4==4.12.3
40
+ isort==5.13.2
41
+ httpx==0.28.1
42
+ certifi==2025.10.5
43
+ ml_collections==1.1.0
44
+ nvidia-cudnn-cu12==9.10.2.21
45
+ hf-xet==1.2.0
46
+ requests==2.31.0
47
+ inflect==6.0.4
48
+ iniconfig==2.1.0
49
+ braceexpand==0.1.7
50
+ h5py==3.12.1
51
+ wandb==0.18.5
52
+ protobuf==3.20.3
53
+ ninja==1.13.0
54
+ kiwisolver==1.4.9
55
+ networkx==3.3
56
+ packaging==25.0
57
+ fvcore==0.1.5.post20221221
58
+ pyparsing==3.2.5
59
+ starlette==0.41.3
60
+ frozenlist==1.8.0
61
+ docker-pycreds==0.4.0
62
+ Werkzeug==3.1.3
63
+ MarkupSafe==2.1.5
64
+ einops==0.8.0
65
+ sentry-sdk==2.42.0
66
+ PyYAML==6.0.1
67
+ nvidia-nccl-cu12==2.27.5
68
+ datasets==4.3.0
69
+ polib==1.2.0
70
+ safetensors==0.6.2
71
+ async-timeout==5.0.1
72
+ setproctitle==1.3.7
73
+ clint==0.5.1
74
+ matplotlib==3.9.2
75
+ propcache==0.4.1
76
+ termcolor==3.1.0
77
+ antlr4-python3-runtime==4.9.3
78
+ cycler==0.12.1
79
+ fastvideo==1.2.0
80
+ toml==0.10.2
81
+ xxhash==3.6.0
82
+ wheel==0.44.0
83
+ albumentations==1.4.20
84
+ fastapi==0.115.3
85
+ nvidia-cufft-cu12==11.3.0.4
86
+ yarl==1.22.0
87
+ psutil==7.1.0
88
+ tensorboard-data-server==0.7.2
89
+ pydantic==2.9.2
90
+ nvidia-nvtx-cu12==12.6.77
91
+ portalocker==3.2.0
92
+ triton==3.5.0
93
+ annotated-types==0.7.0
94
+ proglog==0.1.12
95
+ nvidia-cusparselt-cu12==0.7.1
96
+ yapf==0.32.0
97
+ Jinja2==3.1.6
98
+ types-requests==2.32.4.20250913
99
+ lightning-utilities==0.15.2
100
+ grpcio==1.75.1
101
+ uvicorn==0.32.0
102
+ typing_extensions==4.15.0
103
+ nvidia-nvjitlink-cu12==12.6.85
104
+ watch==0.2.7
105
+ moviepy==1.0.3
106
+ timm==1.0.11
107
+ pytest-split==0.8.0
108
+ gdown==5.2.0
109
+ types-setuptools==80.9.0.20250822
110
+ nvidia-cusolver-cu12==11.7.1.2
111
+ types-PyYAML==6.0.12.20250915
112
+ pip==25.2
113
+ qwen-vl-utils==0.0.14
114
+ soupsieve==2.8
115
+ zipp==3.23.0
116
+ flash_attn==2.8.3
117
+ yacs==0.1.8
118
+ diffusers==0.32.0
119
+ pluggy==1.6.0
120
+ opencv-python-headless==4.11.0.86
121
+ mpmath==1.3.0
122
+ test_tube==0.7.5
123
+ stringzilla==4.2.1
124
+ fonttools==4.60.1
125
+ nvidia-ml-py==13.580.82
126
+ parameterized==0.9.0
127
+ loguru==0.7.3
128
+ tabulate==0.9.0
129
+ idna==3.6
130
+ iopath==0.1.10
131
+ decorator==4.4.2
132
+ nvidia-cufile-cu12==1.11.1.6
133
+ threadpoolctl==3.6.0
134
+ pyarrow==21.0.0
135
+ httpcore==1.0.9
136
+ hydra-core==1.3.2
137
+ multiprocess==0.70.16
138
+ contourpy==1.3.2
139
+ clip==1.0
140
+ tqdm==4.66.5
141
+ open_clip_torch==3.2.0
142
+ accelerate==1.0.1
143
+ gitdb==4.0.12
144
+ importlib_metadata==8.7.0
145
+ nvidia-cublas-cu12==12.6.4.1
146
+ h11==0.16.0
147
+ filelock==3.19.1
148
+ liger_kernel==0.4.1
149
+ click==8.3.0
150
+ urllib3==2.2.0
151
+ imageio-ffmpeg==0.5.1
152
+ setuptools==80.9.0
153
+ joblib==1.5.2
154
+ tensorboard==2.20.0
155
+ attrs==25.4.0
156
+ future==1.0.0
157
+ albucore==0.0.19
158
+ fsspec==2025.9.0
159
+ sympy==1.14.0
160
+ eval_type_backport==0.2.2
161
+ pydantic_core==2.23.4
162
+ sniffio==1.3.1
163
+ nvidia-nvshmem-cu12==3.3.20
164
+ exceptiongroup==1.3.0
165
+ smmap==5.0.2
166
+ tomli==2.0.2
167
+ ftfy==6.3.0
168
+ dill==0.4.0
169
+ pytest==7.2.0
170
+ PySocks==1.7.1
171
+ nvidia-curand-cu12==10.3.7.77
172
+ tokenizers==0.20.1
173
+ args==0.1.0
174
+ fairscale==0.4.13
175
+ peft==0.13.2
176
+ webdataset==1.0.2
177
+ huggingface-hub==0.26.1
178
+ GitPython==3.1.45
179
+ pytorchvideo==0.1.5
180
+ scikit-learn==1.5.2
181
+ bitsandbytes==0.48.1
182
+ nvidia-cusparse-cu12==12.5.4.2
183
+ nvidia-cuda-cupti-cu12==12.6.80
184
+ imageio==2.36.0
185
+ pydub==0.25.1
186
+ image-reward==1.5
187
+ absl-py==2.3.1
188
+ blessed==1.22.0
189
+ torchdiffeq==0.2.4
wandb/run-20260124_110518-tfmkls9a/files/wandb-metadata.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-85-generic-x86_64-with-glibc2.35",
3
+ "python": "3.10.19",
4
+ "startedAt": "2026-01-24T03:05:18.186036Z",
5
+ "args": [
6
+ "--config",
7
+ "fastvideo/config_sd/base.py",
8
+ "--eta_step_list",
9
+ "0,1,2,3,4,5,6,7",
10
+ "--eta_step_merge_list",
11
+ "1,1,1,2,2,2,3,3",
12
+ "--granular_list",
13
+ "1",
14
+ "--num_generations",
15
+ "4",
16
+ "--eta",
17
+ "1.0",
18
+ "--init_same_noise"
19
+ ],
20
+ "program": "/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/train_g2rpo_sd_merge.py",
21
+ "codePath": "fastvideo/train_g2rpo_sd_merge.py",
22
+ "email": "zhangemail1428@163.com",
23
+ "root": "/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code",
24
+ "host": "abc",
25
+ "username": "zsj",
26
+ "executable": "/home/zsj/anaconda3/envs/g2rpo/bin/python",
27
+ "codePathLocal": "fastvideo/train_g2rpo_sd_merge.py",
28
+ "cpu_count": 48,
29
+ "cpu_count_logical": 96,
30
+ "gpu": "NVIDIA RTX 5880 Ada Generation",
31
+ "gpu_count": 8,
32
+ "disk": {
33
+ "/": {
34
+ "total": "1006773899264",
35
+ "used": "812154195968"
36
+ }
37
+ },
38
+ "memory": {
39
+ "total": "540697260032"
40
+ },
41
+ "cpu": {
42
+ "count": 48,
43
+ "countLogical": 96
44
+ },
45
+ "gpu_nvidia": [
46
+ {
47
+ "name": "NVIDIA RTX 5880 Ada Generation",
48
+ "memoryTotal": "51527024640",
49
+ "cudaCores": 14080,
50
+ "architecture": "Ada"
51
+ },
52
+ {
53
+ "name": "NVIDIA RTX 5880 Ada Generation",
54
+ "memoryTotal": "51527024640",
55
+ "cudaCores": 14080,
56
+ "architecture": "Ada"
57
+ },
58
+ {
59
+ "name": "NVIDIA RTX 5880 Ada Generation",
60
+ "memoryTotal": "51527024640",
61
+ "cudaCores": 14080,
62
+ "architecture": "Ada"
63
+ },
64
+ {
65
+ "name": "NVIDIA RTX 5880 Ada Generation",
66
+ "memoryTotal": "51527024640",
67
+ "cudaCores": 14080,
68
+ "architecture": "Ada"
69
+ },
70
+ {
71
+ "name": "NVIDIA RTX 5880 Ada Generation",
72
+ "memoryTotal": "51527024640",
73
+ "cudaCores": 14080,
74
+ "architecture": "Ada"
75
+ },
76
+ {
77
+ "name": "NVIDIA RTX 5880 Ada Generation",
78
+ "memoryTotal": "51527024640",
79
+ "cudaCores": 14080,
80
+ "architecture": "Ada"
81
+ },
82
+ {
83
+ "name": "NVIDIA RTX 5880 Ada Generation",
84
+ "memoryTotal": "51527024640",
85
+ "cudaCores": 14080,
86
+ "architecture": "Ada"
87
+ },
88
+ {
89
+ "name": "NVIDIA RTX 5880 Ada Generation",
90
+ "memoryTotal": "51527024640",
91
+ "cudaCores": 14080,
92
+ "architecture": "Ada"
93
+ }
94
+ ],
95
+ "cudaVersion": "12.9"
96
+ }
wandb/run-20260124_110518-tfmkls9a/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":15}}
wandb/run-20260124_110518-tfmkls9a/logs/debug-core.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-24T11:05:16.967516588+08:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpfnk0hjfx/port-697635.txt","pid":697635,"debug":false,"disable-analytics":false}
2
+ {"time":"2026-01-24T11:05:16.967548024+08:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2026-01-24T11:05:16.96847478+08:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41747,"Zone":""}}
4
+ {"time":"2026-01-24T11:05:16.968614144+08:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":697635}
5
+ {"time":"2026-01-24T11:05:17.157369334+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:39640"}
6
+ {"time":"2026-01-24T11:05:18.189748725+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"tfmkls9a","id":"127.0.0.1:39640"}
7
+ {"time":"2026-01-24T11:05:18.307722037+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"tfmkls9a","id":"127.0.0.1:39640"}
8
+ {"time":"2026-01-24T11:05:34.023368216+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:39640"}
9
+ {"time":"2026-01-24T11:05:34.023526749+08:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:39640"}
10
+ {"time":"2026-01-24T11:05:34.023604687+08:00","level":"INFO","msg":"server is shutting down"}
11
+ {"time":"2026-01-24T11:05:34.023755657+08:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:39640"}
12
+ {"time":"2026-01-24T11:05:35.735660702+08:00","level":"INFO","msg":"Parent process exited, terminating service process."}
wandb/run-20260124_110518-tfmkls9a/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-24T11:05:18.189933418+08:00","level":"INFO","msg":"using version","core version":"0.18.5"}
2
+ {"time":"2026-01-24T11:05:18.189953315+08:00","level":"INFO","msg":"created symlink","path":"/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/wandb/run-20260124_110518-tfmkls9a/logs/debug-core.log"}
3
+ {"time":"2026-01-24T11:05:18.307674596+08:00","level":"INFO","msg":"created new stream","id":"tfmkls9a"}
4
+ {"time":"2026-01-24T11:05:18.307717317+08:00","level":"INFO","msg":"stream: started","id":"tfmkls9a"}
5
+ {"time":"2026-01-24T11:05:18.307888685+08:00","level":"INFO","msg":"sender: started","stream_id":"tfmkls9a"}
6
+ {"time":"2026-01-24T11:05:18.30782203+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"tfmkls9a"}}
7
+ {"time":"2026-01-24T11:05:18.307910699+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"tfmkls9a"}}
8
+ {"time":"2026-01-24T11:05:19.08652137+08:00","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2026-01-24T11:05:34.023538308+08:00","level":"INFO","msg":"stream: closing","id":"tfmkls9a"}
10
+ {"time":"2026-01-24T11:05:34.023603272+08:00","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2026-01-24T11:05:34.025002208+08:00","level":"INFO","msg":"Stopped system monitor"}
12
+ {"time":"2026-01-24T11:05:34.69650131+08:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
13
+ {"time":"2026-01-24T11:05:34.696532166+08:00","level":"WARN","msg":"No source type found, not creating job artifact"}
14
+ {"time":"2026-01-24T11:05:34.696542852+08:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
15
+ {"time":"2026-01-24T11:05:35.716432442+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
wandb/run-20260124_110518-tfmkls9a/logs/debug.log ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-24 11:05:18,182 INFO MainThread:697635 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5
2
+ 2026-01-24 11:05:18,182 INFO MainThread:697635 [wandb_setup.py:_flush():79] Configure stats pid to 697635
3
+ 2026-01-24 11:05:18,182 INFO MainThread:697635 [wandb_setup.py:_flush():79] Loading settings from /home/zsj/.config/wandb/settings
4
+ 2026-01-24 11:05:18,182 INFO MainThread:697635 [wandb_setup.py:_flush():79] Loading settings from /data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/wandb/settings
5
+ 2026-01-24 11:05:18,182 INFO MainThread:697635 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
+ 2026-01-24 11:05:18,182 INFO MainThread:697635 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
7
+ 2026-01-24 11:05:18,182 INFO MainThread:697635 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'fastvideo/train_g2rpo_sd_merge.py', 'program_abspath': '/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/train_g2rpo_sd_merge.py', 'program': '/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/train_g2rpo_sd_merge.py'}
8
+ 2026-01-24 11:05:18,182 INFO MainThread:697635 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2026-01-24 11:05:18,182 INFO MainThread:697635 [wandb_init.py:_log_setup():534] Logging user logs to /data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/wandb/run-20260124_110518-tfmkls9a/logs/debug.log
10
+ 2026-01-24 11:05:18,182 INFO MainThread:697635 [wandb_init.py:_log_setup():535] Logging internal logs to /data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/wandb/run-20260124_110518-tfmkls9a/logs/debug-internal.log
11
+ 2026-01-24 11:05:18,182 INFO MainThread:697635 [wandb_init.py:init():621] calling init triggers
12
+ 2026-01-24 11:05:18,182 INFO MainThread:697635 [wandb_init.py:init():628] wandb.init called with sweep_config: {}
13
+ config: {}
14
+ 2026-01-24 11:05:18,182 INFO MainThread:697635 [wandb_init.py:init():671] starting backend
15
+ 2026-01-24 11:05:18,182 INFO MainThread:697635 [wandb_init.py:init():675] sending inform_init request
16
+ 2026-01-24 11:05:18,185 INFO MainThread:697635 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2026-01-24 11:05:18,185 INFO MainThread:697635 [wandb_init.py:init():688] backend started and connected
18
+ 2026-01-24 11:05:18,190 INFO MainThread:697635 [wandb_init.py:init():783] updated telemetry
19
+ 2026-01-24 11:05:18,190 INFO MainThread:697635 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout
20
+ 2026-01-24 11:05:19,077 INFO MainThread:697635 [wandb_init.py:init():867] starting run threads in backend
21
+ 2026-01-24 11:05:19,226 INFO MainThread:697635 [wandb_run.py:_console_start():2463] atexit reg
22
+ 2026-01-24 11:05:19,226 INFO MainThread:697635 [wandb_run.py:_redirect():2311] redirect: wrap_raw
23
+ 2026-01-24 11:05:19,227 INFO MainThread:697635 [wandb_run.py:_redirect():2376] Wrapping output streams.
24
+ 2026-01-24 11:05:19,227 INFO MainThread:697635 [wandb_run.py:_redirect():2401] Redirects installed.
25
+ 2026-01-24 11:05:19,228 INFO MainThread:697635 [wandb_init.py:init():911] run started, returning control to user process
26
+ 2026-01-24 11:05:19,229 INFO MainThread:697635 [wandb_run.py:_config_callback():1390] config_cb None None {'allow_tf32': True, 'logdir': 'logs', 'mixed_precision': 'bf16', 'num_checkpoint_limit': 5, 'num_epochs': 300, 'pretrained': {'model': './data/StableDiffusion', 'revision': 'main'}, 'prompt_fn': 'imagenet_animals', 'prompt_fn_kwargs': {}, 'resume_from': '', 'reward_fn': 'hpsv2', 'run_name': '2026.01.24_11.05.16', 'sample': {'batch_size': 1, 'eta': 1.0, 'guidance_scale': 5.0, 'num_batches_per_epoch': 2, 'num_steps': 50}, 'save_freq': 20, 'seed': 42, 'train': {'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'adam_weight_decay': 0.0001, 'adv_clip_max': 5, 'batch_size': 1, 'cfg': True, 'clip_range': 0.0001, 'gradient_accumulation_steps': 1, 'learning_rate': 1e-05, 'max_grad_norm': 1.0, 'num_inner_epochs': 1, 'timestep_fraction': 1.0, 'use_8bit_adam': False}, 'use_lora': False}
27
+ 2026-01-24 11:05:34,023 WARNING MsgRouterThr:697635 [router.py:message_loop():77] message_loop has been closed
wandb/run-20260124_141237-3a51ocuw/files/config.yaml ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.18.5
4
+ m: []
5
+ python_version: 3.10.19
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 11
10
+ - 41
11
+ - 49
12
+ - 55
13
+ - 63
14
+ - 71
15
+ - 83
16
+ - 98
17
+ "2":
18
+ - 1
19
+ - 11
20
+ - 41
21
+ - 49
22
+ - 55
23
+ - 63
24
+ - 71
25
+ - 83
26
+ - 98
27
+ "3":
28
+ - 16
29
+ - 23
30
+ - 55
31
+ "4": 3.10.19
32
+ "5": 0.18.5
33
+ "6": 4.46.1
34
+ "8":
35
+ - 5
36
+ "12": 0.18.5
37
+ "13": linux-x86_64
38
+ adv_clip_max:
39
+ value: 5
40
+ cfg:
41
+ value: 0
42
+ checkpointing_steps:
43
+ value: 50
44
+ clip_range:
45
+ value: 0.0001
46
+ data_json_path:
47
+ value: ./data/rl_embeddings/videos2caption.json
48
+ dataloader_num_workers:
49
+ value: 4
50
+ eta:
51
+ value: 0.7
52
+ eta_step_list:
53
+ value:
54
+ - 0
55
+ - 1
56
+ - 2
57
+ - 3
58
+ - 4
59
+ - 5
60
+ - 6
61
+ - 7
62
+ eta_step_merge_list:
63
+ value:
64
+ - 1
65
+ - 1
66
+ - 1
67
+ - 2
68
+ - 2
69
+ - 2
70
+ - 3
71
+ - 3
72
+ fsdp_sharding_startegy:
73
+ value: full
74
+ gradient_checkpointing:
75
+ value: false
76
+ granular_list:
77
+ value:
78
+ - 1
79
+ h:
80
+ value: 1024
81
+ hps_clip_path:
82
+ value: ./data/hps/open_clip_pytorch_model.bin
83
+ hps_path:
84
+ value: ./data/hps/HPS_v2.1_compressed.pt
85
+ init_same_noise:
86
+ value: true
87
+ learning_rate:
88
+ value: 2e-06
89
+ lr_num_cycles:
90
+ value: 1
91
+ lr_power:
92
+ value: 1
93
+ lr_scheduler:
94
+ value: constant_with_warmup
95
+ lr_warmup_steps:
96
+ value: 0
97
+ max_grad_norm:
98
+ value: 1
99
+ max_train_steps:
100
+ value: 301
101
+ mixed_precision:
102
+ value: bf16
103
+ num_generations:
104
+ value: 12
105
+ num_latent_t:
106
+ value: 1
107
+ output_dir:
108
+ value: ./output/g2rpo_qwenimage
109
+ pretrained_model_name_or_path:
110
+ value: ./data/QwenImage
111
+ sampler_seed:
112
+ value: 1223627
113
+ sampling_steps:
114
+ value: 16
115
+ seed:
116
+ value: 42
117
+ selective_checkpointing:
118
+ value: 1
119
+ shift:
120
+ value: 3
121
+ sp_size:
122
+ value: 1
123
+ t:
124
+ value: 1
125
+ train_batch_size:
126
+ value: 1
127
+ train_sp_batch_size:
128
+ value: 1
129
+ use_cpu_offload:
130
+ value: false
131
+ use_hpsv2:
132
+ value: true
133
+ use_hpsv3:
134
+ value: false
135
+ w:
136
+ value: 1024
137
+ weight_decay:
138
+ value: 0.0001
wandb/run-20260124_141237-3a51ocuw/files/output.log ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ***** Running E-GRPO (G2RPO) Training for QwenImage *****
2
+ Num examples = 50000
3
+ Dataloader size = 8334
4
+ Eta step list = [0, 1, 2, 3, 4, 5, 6, 7]
5
+ Eta step merge list = [1, 1, 1, 2, 2, 2, 3, 3]
6
+ Granular list = [1]
7
+ Total training parameters per FSDP shard = 3.405066888 B
8
+ Steps: 0%| | 0/100000 [00:00<?, ?it/s]Traceback (most recent call last):
9
+ File "/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/train_g2rpo_qwenimage_merge.py", line 952, in <module>
10
+ main(args)
11
+ File "/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/train_g2rpo_qwenimage_merge.py", line 834, in main
12
+ for step, (prompt_embeds, prompt_attention_masks, caption, original_length) in enumerate(train_dataloader):
13
+ File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 732, in __next__
14
+ data = self._next_data()
15
+ File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1506, in _next_data
16
+ return self._process_data(data, worker_id)
17
+ File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1541, in _process_data
18
+ data.reraise()
19
+ File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/torch/_utils.py", line 769, in reraise
20
+ raise exception
21
+ KeyError: Caught KeyError in DataLoader worker process 0.
22
+ Original Traceback (most recent call last):
23
+ File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
24
+ data = fetcher.fetch(index) # type: ignore[possibly-undefined]
25
+ File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
26
+ data = [self.dataset[idx] for idx in possibly_batched_index]
27
+ File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
28
+ data = [self.dataset[idx] for idx in possibly_batched_index]
29
+ File "/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/dataset/latent_qwenimage_rl_datasets.py", line 50, in __getitem__
30
+ prompt_attention_mask_file = self.data_anno[idx]["prompt_attention_mask"]
31
+ KeyError: 'prompt_attention_mask'
32
+
33
+ [rank0]: Traceback (most recent call last):
34
+ [rank0]: File "/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/train_g2rpo_qwenimage_merge.py", line 952, in <module>
35
+ [rank0]: main(args)
36
+ [rank0]: File "/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/train_g2rpo_qwenimage_merge.py", line 834, in main
37
+ [rank0]: for step, (prompt_embeds, prompt_attention_masks, caption, original_length) in enumerate(train_dataloader):
38
+ [rank0]: File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 732, in __next__
39
+ [rank0]: data = self._next_data()
40
+ [rank0]: File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1506, in _next_data
41
+ [rank0]: return self._process_data(data, worker_id)
42
+ [rank0]: File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1541, in _process_data
43
+ [rank0]: data.reraise()
44
+ [rank0]: File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/torch/_utils.py", line 769, in reraise
45
+ [rank0]: raise exception
46
+ [rank0]: KeyError: Caught KeyError in DataLoader worker process 0.
47
+ [rank0]: Original Traceback (most recent call last):
48
+ [rank0]: File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
49
+ [rank0]: data = fetcher.fetch(index) # type: ignore[possibly-undefined]
50
+ [rank0]: File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
51
+ [rank0]: data = [self.dataset[idx] for idx in possibly_batched_index]
52
+ [rank0]: File "/home/zsj/anaconda3/envs/g2rpo/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
53
+ [rank0]: data = [self.dataset[idx] for idx in possibly_batched_index]
54
+ [rank0]: File "/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/dataset/latent_qwenimage_rl_datasets.py", line 50, in __getitem__
55
+ [rank0]: prompt_attention_mask_file = self.data_anno[idx]["prompt_attention_mask"]
56
+ [rank0]: KeyError: 'prompt_attention_mask'
wandb/run-20260124_141237-3a51ocuw/files/requirements.txt ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scipy==1.13.0
2
+ regex==2024.9.11
3
+ sentencepiece==0.2.0
4
+ six==1.16.0
5
+ anyio==4.11.0
6
+ nvidia-cuda-nvrtc-cu12==12.6.77
7
+ scikit-video==1.1.11
8
+ platformdirs==4.5.0
9
+ mypy==1.11.1
10
+ ruff==0.6.5
11
+ charset-normalizer==3.4.4
12
+ torch==2.9.0+cu126
13
+ av==13.1.0
14
+ pillow==10.2.0
15
+ gpustat==1.1.1
16
+ torchvision==0.24.0+cu126
17
+ multidict==6.7.0
18
+ torchmetrics==1.5.1
19
+ aiohttp==3.13.1
20
+ transformers==4.46.1
21
+ decord==0.6.0
22
+ wcwidth==0.2.14
23
+ sphinx-lint==1.0.0
24
+ nvidia-cuda-runtime-cu12==12.6.77
25
+ pytz==2025.2
26
+ codespell==2.3.0
27
+ hpsv2==1.2.0
28
+ mypy_extensions==1.1.0
29
+ numpy==1.26.3
30
+ omegaconf==2.3.0
31
+ Markdown==3.9
32
+ tzdata==2025.2
33
+ pandas==2.2.3
34
+ pytorch-lightning==2.4.0
35
+ aiosignal==1.4.0
36
+ aiohappyeyeballs==2.6.1
37
+ python-dateutil==2.9.0.post0
38
+ seaborn==0.13.2
39
+ beautifulsoup4==4.12.3
40
+ isort==5.13.2
41
+ httpx==0.28.1
42
+ certifi==2025.10.5
43
+ ml_collections==1.1.0
44
+ nvidia-cudnn-cu12==9.10.2.21
45
+ peft==0.18.1
46
+ hf-xet==1.2.0
47
+ requests==2.31.0
48
+ inflect==6.0.4
49
+ iniconfig==2.1.0
50
+ braceexpand==0.1.7
51
+ h5py==3.12.1
52
+ wandb==0.18.5
53
+ protobuf==3.20.3
54
+ ninja==1.13.0
55
+ kiwisolver==1.4.9
56
+ networkx==3.3
57
+ packaging==25.0
58
+ fvcore==0.1.5.post20221221
59
+ pyparsing==3.2.5
60
+ starlette==0.41.3
61
+ frozenlist==1.8.0
62
+ docker-pycreds==0.4.0
63
+ Werkzeug==3.1.3
64
+ MarkupSafe==2.1.5
65
+ shellingham==1.5.4
66
+ einops==0.8.0
67
+ sentry-sdk==2.42.0
68
+ PyYAML==6.0.1
69
+ nvidia-nccl-cu12==2.27.5
70
+ datasets==4.3.0
71
+ polib==1.2.0
72
+ safetensors==0.6.2
73
+ async-timeout==5.0.1
74
+ setproctitle==1.3.7
75
+ clint==0.5.1
76
+ matplotlib==3.9.2
77
+ propcache==0.4.1
78
+ termcolor==3.1.0
79
+ antlr4-python3-runtime==4.9.3
80
+ cycler==0.12.1
81
+ fastvideo==1.2.0
82
+ toml==0.10.2
83
+ xxhash==3.6.0
84
+ wheel==0.44.0
85
+ albumentations==1.4.20
86
+ fastapi==0.115.3
87
+ nvidia-cufft-cu12==11.3.0.4
88
+ yarl==1.22.0
89
+ psutil==7.1.0
90
+ tensorboard-data-server==0.7.2
91
+ huggingface-hub==0.36.0
92
+ pydantic==2.9.2
93
+ nvidia-nvtx-cu12==12.6.77
94
+ portalocker==3.2.0
95
+ triton==3.5.0
96
+ annotated-types==0.7.0
97
+ proglog==0.1.12
98
+ nvidia-cusparselt-cu12==0.7.1
99
+ yapf==0.32.0
100
+ Jinja2==3.1.6
101
+ types-requests==2.32.4.20250913
102
+ lightning-utilities==0.15.2
103
+ grpcio==1.75.1
104
+ uvicorn==0.32.0
105
+ typing_extensions==4.15.0
106
+ nvidia-nvjitlink-cu12==12.6.85
107
+ watch==0.2.7
108
+ moviepy==1.0.3
109
+ timm==1.0.11
110
+ pytest-split==0.8.0
111
+ gdown==5.2.0
112
+ types-setuptools==80.9.0.20250822
113
+ nvidia-cusolver-cu12==11.7.1.2
114
+ types-PyYAML==6.0.12.20250915
115
+ pip==25.2
116
+ typer-slim==0.21.1
117
+ qwen-vl-utils==0.0.14
118
+ soupsieve==2.8
119
+ zipp==3.23.0
120
+ flash_attn==2.8.3
121
+ yacs==0.1.8
122
+ pluggy==1.6.0
123
+ opencv-python-headless==4.11.0.86
124
+ mpmath==1.3.0
125
+ test_tube==0.7.5
126
+ stringzilla==4.2.1
127
+ fonttools==4.60.1
128
+ nvidia-ml-py==13.580.82
129
+ parameterized==0.9.0
130
+ loguru==0.7.3
131
+ diffusers==0.36.0
132
+ tabulate==0.9.0
133
+ idna==3.6
134
+ iopath==0.1.10
135
+ decorator==4.4.2
136
+ nvidia-cufile-cu12==1.11.1.6
137
+ threadpoolctl==3.6.0
138
+ pyarrow==21.0.0
139
+ httpcore==1.0.9
140
+ hydra-core==1.3.2
141
+ multiprocess==0.70.16
142
+ contourpy==1.3.2
143
+ clip==1.0
144
+ tqdm==4.66.5
145
+ open_clip_torch==3.2.0
146
+ accelerate==1.0.1
147
+ gitdb==4.0.12
148
+ importlib_metadata==8.7.0
149
+ nvidia-cublas-cu12==12.6.4.1
150
+ h11==0.16.0
151
+ filelock==3.19.1
152
+ liger_kernel==0.4.1
153
+ click==8.3.0
154
+ urllib3==2.2.0
155
+ imageio-ffmpeg==0.5.1
156
+ setuptools==80.9.0
157
+ joblib==1.5.2
158
+ tensorboard==2.20.0
159
+ attrs==25.4.0
160
+ future==1.0.0
161
+ albucore==0.0.19
162
+ fsspec==2025.9.0
163
+ sympy==1.14.0
164
+ eval_type_backport==0.2.2
165
+ pydantic_core==2.23.4
166
+ sniffio==1.3.1
167
+ nvidia-nvshmem-cu12==3.3.20
168
+ exceptiongroup==1.3.0
169
+ smmap==5.0.2
170
+ tomli==2.0.2
171
+ ftfy==6.3.0
172
+ dill==0.4.0
173
+ pytest==7.2.0
174
+ PySocks==1.7.1
175
+ nvidia-curand-cu12==10.3.7.77
176
+ tokenizers==0.20.1
177
+ args==0.1.0
178
+ fairscale==0.4.13
179
+ webdataset==1.0.2
180
+ GitPython==3.1.45
181
+ pytorchvideo==0.1.5
182
+ scikit-learn==1.5.2
183
+ bitsandbytes==0.48.1
184
+ nvidia-cusparse-cu12==12.5.4.2
185
+ nvidia-cuda-cupti-cu12==12.6.80
186
+ imageio==2.36.0
187
+ pydub==0.25.1
188
+ image-reward==1.5
189
+ absl-py==2.3.1
190
+ blessed==1.22.0
191
+ torchdiffeq==0.2.4
wandb/run-20260124_141237-3a51ocuw/files/wandb-metadata.json ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-86-generic-x86_64-with-glibc2.35",
3
+ "python": "3.10.19",
4
+ "startedAt": "2026-01-24T06:12:37.513847Z",
5
+ "args": [
6
+ "--pretrained_model_name_or_path",
7
+ "./data/QwenImage",
8
+ "--data_json_path",
9
+ "./data/rl_embeddings/videos2caption.json",
10
+ "--output_dir",
11
+ "./output/g2rpo_qwenimage",
12
+ "--hps_path",
13
+ "./data/hps/HPS_v2.1_compressed.pt",
14
+ "--hps_clip_path",
15
+ "./data/hps/open_clip_pytorch_model.bin",
16
+ "--h",
17
+ "1024",
18
+ "--w",
19
+ "1024",
20
+ "--sampling_steps",
21
+ "16",
22
+ "--eta",
23
+ "0.7",
24
+ "--shift",
25
+ "3.0",
26
+ "--num_generations",
27
+ "12",
28
+ "--learning_rate",
29
+ "2e-6",
30
+ "--max_train_steps",
31
+ "301",
32
+ "--checkpointing_steps",
33
+ "50",
34
+ "--eta_step_list",
35
+ "0",
36
+ "1",
37
+ "2",
38
+ "3",
39
+ "4",
40
+ "5",
41
+ "6",
42
+ "7",
43
+ "--eta_step_merge_list",
44
+ "1",
45
+ "1",
46
+ "1",
47
+ "2",
48
+ "2",
49
+ "2",
50
+ "3",
51
+ "3",
52
+ "--granular_list",
53
+ "1",
54
+ "--init_same_noise",
55
+ "--clip_range",
56
+ "1e-4",
57
+ "--adv_clip_max",
58
+ "5.0",
59
+ "--use_hpsv2"
60
+ ],
61
+ "program": "/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/train_g2rpo_qwenimage_merge.py",
62
+ "codePath": "fastvideo/train_g2rpo_qwenimage_merge.py",
63
+ "email": "zhangemail1428@163.com",
64
+ "root": "/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code",
65
+ "host": "abc",
66
+ "username": "zsj",
67
+ "executable": "/home/zsj/anaconda3/envs/g2rpo/bin/python",
68
+ "codePathLocal": "fastvideo/train_g2rpo_qwenimage_merge.py",
69
+ "cpu_count": 48,
70
+ "cpu_count_logical": 96,
71
+ "gpu": "NVIDIA RTX 5880 Ada Generation",
72
+ "gpu_count": 8,
73
+ "disk": {
74
+ "/": {
75
+ "total": "1006773899264",
76
+ "used": "803118862336"
77
+ }
78
+ },
79
+ "memory": {
80
+ "total": "540697153536"
81
+ },
82
+ "cpu": {
83
+ "count": 48,
84
+ "countLogical": 96
85
+ },
86
+ "gpu_nvidia": [
87
+ {
88
+ "name": "NVIDIA RTX 5880 Ada Generation",
89
+ "memoryTotal": "51527024640",
90
+ "cudaCores": 14080,
91
+ "architecture": "Ada"
92
+ },
93
+ {
94
+ "name": "NVIDIA RTX 5880 Ada Generation",
95
+ "memoryTotal": "51527024640",
96
+ "cudaCores": 14080,
97
+ "architecture": "Ada"
98
+ },
99
+ {
100
+ "name": "NVIDIA RTX 5880 Ada Generation",
101
+ "memoryTotal": "51527024640",
102
+ "cudaCores": 14080,
103
+ "architecture": "Ada"
104
+ },
105
+ {
106
+ "name": "NVIDIA RTX 5880 Ada Generation",
107
+ "memoryTotal": "51527024640",
108
+ "cudaCores": 14080,
109
+ "architecture": "Ada"
110
+ },
111
+ {
112
+ "name": "NVIDIA RTX 5880 Ada Generation",
113
+ "memoryTotal": "51527024640",
114
+ "cudaCores": 14080,
115
+ "architecture": "Ada"
116
+ },
117
+ {
118
+ "name": "NVIDIA RTX 5880 Ada Generation",
119
+ "memoryTotal": "51527024640",
120
+ "cudaCores": 14080,
121
+ "architecture": "Ada"
122
+ },
123
+ {
124
+ "name": "NVIDIA RTX 5880 Ada Generation",
125
+ "memoryTotal": "51527024640",
126
+ "cudaCores": 14080,
127
+ "architecture": "Ada"
128
+ },
129
+ {
130
+ "name": "NVIDIA RTX 5880 Ada Generation",
131
+ "memoryTotal": "51527024640",
132
+ "cudaCores": 14080,
133
+ "architecture": "Ada"
134
+ }
135
+ ],
136
+ "cudaVersion": "12.9"
137
+ }
wandb/run-20260124_141237-3a51ocuw/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":1}}
wandb/run-20260124_141237-3a51ocuw/logs/debug-core.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-24T14:12:36.403578289+08:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp_5qzahaq/port-28129.txt","pid":28129,"debug":false,"disable-analytics":false}
2
+ {"time":"2026-01-24T14:12:36.403616892+08:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2026-01-24T14:12:36.404894071+08:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":28129}
4
+ {"time":"2026-01-24T14:12:36.404999637+08:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":38405,"Zone":""}}
5
+ {"time":"2026-01-24T14:12:36.572515476+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:41386"}
6
+ {"time":"2026-01-24T14:12:37.508533932+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"3a51ocuw","id":"127.0.0.1:41386"}
7
+ {"time":"2026-01-24T14:12:37.63058458+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"3a51ocuw","id":"127.0.0.1:41386"}
8
+ {"time":"2026-01-24T14:12:38.683918448+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:41386"}
9
+ {"time":"2026-01-24T14:12:38.684070643+08:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:41386"}
10
+ {"time":"2026-01-24T14:12:38.684177673+08:00","level":"INFO","msg":"server is shutting down"}
11
+ {"time":"2026-01-24T14:12:38.684241348+08:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:41386"}
12
+ {"time":"2026-01-24T14:12:38.772325053+08:00","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:38405->127.0.0.1:41386: use of closed network connection","id":"127.0.0.1:41386"}
13
+ {"time":"2026-01-24T14:12:40.353470819+08:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:41386"}
14
+ {"time":"2026-01-24T14:12:40.353514215+08:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:41386"}
15
+ {"time":"2026-01-24T14:12:40.353538791+08:00","level":"INFO","msg":"server is closed"}
wandb/run-20260124_141237-3a51ocuw/logs/debug-internal.log ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-24T14:12:37.509015822+08:00","level":"INFO","msg":"using version","core version":"0.18.5"}
2
+ {"time":"2026-01-24T14:12:37.509057782+08:00","level":"INFO","msg":"created symlink","path":"/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/wandb/run-20260124_141237-3a51ocuw/logs/debug-core.log"}
3
+ {"time":"2026-01-24T14:12:37.630545922+08:00","level":"INFO","msg":"created new stream","id":"3a51ocuw"}
4
+ {"time":"2026-01-24T14:12:37.630581136+08:00","level":"INFO","msg":"stream: started","id":"3a51ocuw"}
5
+ {"time":"2026-01-24T14:12:37.630856728+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"3a51ocuw"}}
6
+ {"time":"2026-01-24T14:12:37.630945854+08:00","level":"INFO","msg":"sender: started","stream_id":"3a51ocuw"}
7
+ {"time":"2026-01-24T14:12:37.630815002+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"3a51ocuw"}}
8
+ {"time":"2026-01-24T14:12:38.30268329+08:00","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2026-01-24T14:12:38.684097398+08:00","level":"INFO","msg":"stream: closing","id":"3a51ocuw"}
10
+ {"time":"2026-01-24T14:12:38.684210233+08:00","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2026-01-24T14:12:38.685194374+08:00","level":"INFO","msg":"Stopped system monitor"}
12
+ {"time":"2026-01-24T14:12:39.357336767+08:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
13
+ {"time":"2026-01-24T14:12:39.357370894+08:00","level":"WARN","msg":"No source type found, not creating job artifact"}
14
+ {"time":"2026-01-24T14:12:39.357389716+08:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
15
+ {"time":"2026-01-24T14:12:40.006428963+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
16
+ {"time":"2026-01-24T14:12:40.353157928+08:00","level":"INFO","msg":"handler: closed","stream_id":{"value":"3a51ocuw"}}
17
+ {"time":"2026-01-24T14:12:40.353244646+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"3a51ocuw"}}
18
+ {"time":"2026-01-24T14:12:40.353290403+08:00","level":"INFO","msg":"sender: closed","stream_id":"3a51ocuw"}
19
+ {"time":"2026-01-24T14:12:40.353339352+08:00","level":"INFO","msg":"stream: closed","id":"3a51ocuw"}
wandb/run-20260124_141237-3a51ocuw/logs/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-24 14:12:37,499 INFO MainThread:28129 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5
2
+ 2026-01-24 14:12:37,500 INFO MainThread:28129 [wandb_setup.py:_flush():79] Configure stats pid to 28129
3
+ 2026-01-24 14:12:37,500 INFO MainThread:28129 [wandb_setup.py:_flush():79] Loading settings from /home/zsj/.config/wandb/settings
4
+ 2026-01-24 14:12:37,500 INFO MainThread:28129 [wandb_setup.py:_flush():79] Loading settings from /data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/wandb/settings
5
+ 2026-01-24 14:12:37,500 INFO MainThread:28129 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
+ 2026-01-24 14:12:37,500 INFO MainThread:28129 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
7
+ 2026-01-24 14:12:37,500 INFO MainThread:28129 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'fastvideo/train_g2rpo_qwenimage_merge.py', 'program_abspath': '/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/train_g2rpo_qwenimage_merge.py', 'program': '/data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/fastvideo/train_g2rpo_qwenimage_merge.py'}
8
+ 2026-01-24 14:12:37,500 INFO MainThread:28129 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2026-01-24 14:12:37,500 INFO MainThread:28129 [wandb_init.py:_log_setup():534] Logging user logs to /data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/wandb/run-20260124_141237-3a51ocuw/logs/debug.log
10
+ 2026-01-24 14:12:37,501 INFO MainThread:28129 [wandb_init.py:_log_setup():535] Logging internal logs to /data1/zsj/SceneDPO/Rebuttal/E-GRPO/scoure_code/wandb/run-20260124_141237-3a51ocuw/logs/debug-internal.log
11
+ 2026-01-24 14:12:37,501 INFO MainThread:28129 [wandb_init.py:init():621] calling init triggers
12
+ 2026-01-24 14:12:37,501 INFO MainThread:28129 [wandb_init.py:init():628] wandb.init called with sweep_config: {}
13
+ config: {'data_json_path': './data/rl_embeddings/videos2caption.json', 'dataloader_num_workers': 4, 'train_batch_size': 1, 'num_latent_t': 1, 'pretrained_model_name_or_path': './data/QwenImage', 'hps_path': './data/hps/HPS_v2.1_compressed.pt', 'hps_clip_path': './data/hps/open_clip_pytorch_model.bin', 'cfg': 0.0, 'seed': 42, 'output_dir': './output/g2rpo_qwenimage', 'checkpointing_steps': 50, 'max_train_steps': 301, 'learning_rate': 2e-06, 'lr_warmup_steps': 0, 'max_grad_norm': 1.0, 'gradient_checkpointing': False, 'selective_checkpointing': 1.0, 'mixed_precision': 'bf16', 'use_cpu_offload': False, 'sp_size': 1, 'train_sp_batch_size': 1, 'fsdp_sharding_startegy': 'full', 'lr_scheduler': 'constant_with_warmup', 'lr_num_cycles': 1, 'lr_power': 1.0, 'weight_decay': 0.0001, 'h': 1024, 'w': 1024, 't': 1, 'sampling_steps': 16, 'eta': 0.7, 'sampler_seed': 1223627, 'num_generations': 12, 'shift': 3.0, 'init_same_noise': True, 'clip_range': 0.0001, 'adv_clip_max': 5.0, 'eta_step_list': [0, 1, 2, 3, 4, 5, 6, 7], 'eta_step_merge_list': [1, 1, 1, 2, 2, 2, 3, 3], 'granular_list': [1], 'use_hpsv2': True, 'use_hpsv3': False}
14
+ 2026-01-24 14:12:37,501 INFO MainThread:28129 [wandb_init.py:init():671] starting backend
15
+ 2026-01-24 14:12:37,501 INFO MainThread:28129 [wandb_init.py:init():675] sending inform_init request
16
+ 2026-01-24 14:12:37,506 INFO MainThread:28129 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2026-01-24 14:12:37,509 INFO MainThread:28129 [wandb_init.py:init():688] backend started and connected
18
+ 2026-01-24 14:12:37,676 INFO MainThread:28129 [wandb_init.py:init():783] updated telemetry
19
+ 2026-01-24 14:12:37,678 INFO MainThread:28129 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout
20
+ 2026-01-24 14:12:38,296 INFO MainThread:28129 [wandb_init.py:init():867] starting run threads in backend
21
+ 2026-01-24 14:12:38,502 INFO MainThread:28129 [wandb_run.py:_console_start():2463] atexit reg
22
+ 2026-01-24 14:12:38,503 INFO MainThread:28129 [wandb_run.py:_redirect():2311] redirect: wrap_raw
23
+ 2026-01-24 14:12:38,504 INFO MainThread:28129 [wandb_run.py:_redirect():2376] Wrapping output streams.
24
+ 2026-01-24 14:12:38,505 INFO MainThread:28129 [wandb_run.py:_redirect():2401] Redirects installed.
25
+ 2026-01-24 14:12:38,512 INFO MainThread:28129 [wandb_init.py:init():911] run started, returning control to user process
26
+ 2026-01-24 14:12:38,684 WARNING MsgRouterThr:28129 [router.py:message_loop():77] message_loop has been closed