guytevet commited on
Commit
5f76523
·
1 Parent(s): 258444e

added DiP checkpoints

Browse files
checkpoints/dip/DiP_10steps_context20_predict40/args.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adam_beta2": 0.999,
3
+ "arch": "trans_dec",
4
+ "autoregressive": true,
5
+ "autoregressive_include_prefix": false,
6
+ "autoregressive_init": "data",
7
+ "avg_model_beta": 0.9999,
8
+ "batch_size": 64,
9
+ "cond_mask_prob": 0.1,
10
+ "context_len": 20,
11
+ "cuda": true,
12
+ "data_dir": "",
13
+ "dataset": "humanml",
14
+ "device": 0,
15
+ "diffusion_steps": 10,
16
+ "emb_before_mask": false,
17
+ "emb_policy": "add",
18
+ "emb_trans_dec": false,
19
+ "eval_batch_size": 32,
20
+ "eval_during_training": true,
21
+ "eval_num_samples": 1000,
22
+ "eval_rep_times": 3,
23
+ "eval_split": "test",
24
+ "external_mode": false,
25
+ "gen_during_training": true,
26
+ "gen_guidance_param": 2.5,
27
+ "gen_num_repetitions": 2,
28
+ "gen_num_samples": 3,
29
+ "hml_type": null,
30
+ "keyframe_cond_prob": 0.5,
31
+ "keyframe_cond_type": "",
32
+ "lambda_fc": 0.0,
33
+ "lambda_rcxyz": 0.0,
34
+ "lambda_target_loc": 0.0,
35
+ "lambda_vel": 0.0,
36
+ "latent_dim": 512,
37
+ "layers": 8,
38
+ "log_interval": 1000,
39
+ "lr": 0.0001,
40
+ "lr_anneal_steps": 0,
41
+ "mask_frames": true,
42
+ "multi_encoder_type": "multi",
43
+ "multi_target_cond": false,
44
+ "noise_schedule": "cosine",
45
+ "num_frames": 60,
46
+ "num_steps": 600000,
47
+ "overwrite": true,
48
+ "pos_embed_max_len": 5000,
49
+ "pred_len": 40,
50
+ "resume_checkpoint": "",
51
+ "sampling_mode": "none",
52
+ "save_dir": "save/DiP_10steps_context20_predict40",
53
+ "save_interval": 50000,
54
+ "seed": 10,
55
+ "sigma_small": true,
56
+ "spatial_condition": null,
57
+ "target_enc_layers": 1,
58
+ "target_joint_names": null,
59
+ "text_encoder_type": "bert",
60
+ "train_platform_type": "WandBPlatform",
61
+ "unconstrained": false,
62
+ "use_ema": true,
63
+ "use_inpainting": false,
64
+ "use_recon_guidance": false,
65
+ "weight_decay": 0.0
66
+ }
checkpoints/dip/DiP_10steps_context20_predict40/eval_humanml_PreComHorRerAut_DifSte_10_ConLen_20_PreLen_40_000600343_gscale7.5_wo_mm.log ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ==================== Replication 0 ====================
2
+ Time: 2024-10-30 13:21:39.216134
3
+ ---> [ground truth] Matching Score: 3.2365
4
+ ---> [ground truth] R_precision: (top 1): 0.4543 (top 2): 0.6526 (top 3): 0.7612
5
+ ---> [vald] Matching Score: 3.1560
6
+ ---> [vald] R_precision: (top 1): 0.4688 (top 2): 0.6592 (top 3): 0.7764
7
+ Time: 2024-10-30 13:21:43.823692
8
+ ---> [ground truth] FID: 0.0010
9
+ ---> [vald] FID: 0.3887
10
+ Time: 2024-10-30 13:21:47.421551
11
+ ---> [ground truth] Diversity: 9.3780
12
+ ---> [vald] Diversity: 8.9154
13
+ AUX METRICS:
14
+ ---> [vald] skate_ratio: 0.0721
15
+ ---> [vald] mean_penetration: 6.5568
16
+ ---> [vald] penetration: 0.1228
17
+ ---> [vald] floating: 24.3247
18
+ ---> [vald] skating: 0.5522
19
+ AUX METRICS:
20
+ ---> [vald] fps: 2594.5557
21
+ ---> [vald] time_per_sample_ms: 15.4169
22
+ !!! DONE !!!
23
+ ==================== Replication 1 ====================
24
+ Time: 2024-10-30 13:23:16.167503
25
+ ---> [ground truth] Matching Score: 3.2482
26
+ ---> [ground truth] R_precision: (top 1): 0.4603 (top 2): 0.6550 (top 3): 0.7675
27
+ ---> [vald] Matching Score: 3.1117
28
+ ---> [vald] R_precision: (top 1): 0.4658 (top 2): 0.6758 (top 3): 0.7715
29
+ Time: 2024-10-30 13:23:20.446822
30
+ ---> [ground truth] FID: 0.0012
31
+ ---> [vald] FID: 0.2311
32
+ Time: 2024-10-30 13:23:24.204251
33
+ ---> [ground truth] Diversity: 9.1760
34
+ ---> [vald] Diversity: 9.2977
35
+ AUX METRICS:
36
+ ---> [vald] skate_ratio: 0.0777
37
+ ---> [vald] mean_penetration: 6.1410
38
+ ---> [vald] penetration: 0.0525
39
+ ---> [vald] floating: 23.3839
40
+ ---> [vald] skating: 0.6070
41
+ AUX METRICS:
42
+ ---> [vald] fps: 3102.4554
43
+ ---> [vald] time_per_sample_ms: 12.8930
44
+ !!! DONE !!!
45
+ ==================== Replication 2 ====================
46
+ Time: 2024-10-30 13:24:43.358279
47
+ ---> [ground truth] Matching Score: 3.2179
48
+ ---> [ground truth] R_precision: (top 1): 0.4612 (top 2): 0.6675 (top 3): 0.7700
49
+ ---> [vald] Matching Score: 3.1865
50
+ ---> [vald] R_precision: (top 1): 0.4746 (top 2): 0.6680 (top 3): 0.7744
51
+ Time: 2024-10-30 13:24:47.581610
52
+ ---> [ground truth] FID: 0.0011
53
+ ---> [vald] FID: 0.3562
54
+ Time: 2024-10-30 13:24:51.264851
55
+ ---> [ground truth] Diversity: 9.5500
56
+ ---> [vald] Diversity: 9.1119
57
+ AUX METRICS:
58
+ ---> [vald] skate_ratio: 0.0770
59
+ ---> [vald] mean_penetration: 6.2946
60
+ ---> [vald] penetration: 0.0656
61
+ ---> [vald] floating: 25.7569
62
+ ---> [vald] skating: 0.6014
63
+ AUX METRICS:
64
+ ---> [vald] fps: 3148.2694
65
+ ---> [vald] time_per_sample_ms: 12.7054
66
+ !!! DONE !!!
67
+ ==================== Replication 3 ====================
68
+ Time: 2024-10-30 13:26:13.869897
69
+ ---> [ground truth] Matching Score: 3.2363
70
+ ---> [ground truth] R_precision: (top 1): 0.4550 (top 2): 0.6558 (top 3): 0.7666
71
+ ---> [vald] Matching Score: 3.0945
72
+ ---> [vald] R_precision: (top 1): 0.4609 (top 2): 0.6738 (top 3): 0.7803
73
+ Time: 2024-10-30 13:26:18.041856
74
+ ---> [ground truth] FID: 0.0010
75
+ ---> [vald] FID: 0.2497
76
+ Time: 2024-10-30 13:26:21.701860
77
+ ---> [ground truth] Diversity: 9.2328
78
+ ---> [vald] Diversity: 9.1492
79
+ AUX METRICS:
80
+ ---> [vald] skate_ratio: 0.0744
81
+ ---> [vald] mean_penetration: 8.1887
82
+ ---> [vald] penetration: 0.1200
83
+ ---> [vald] floating: 25.1052
84
+ ---> [vald] skating: 0.8081
85
+ AUX METRICS:
86
+ ---> [vald] fps: 2795.6479
87
+ ---> [vald] time_per_sample_ms: 14.3080
88
+ !!! DONE !!!
89
+ ==================== Replication 4 ====================
90
+ Time: 2024-10-30 13:27:42.079014
91
+ ---> [ground truth] Matching Score: 3.2482
92
+ ---> [ground truth] R_precision: (top 1): 0.4418 (top 2): 0.6453 (top 3): 0.7545
93
+ ---> [vald] Matching Score: 3.1482
94
+ ---> [vald] R_precision: (top 1): 0.4648 (top 2): 0.6475 (top 3): 0.7637
95
+ Time: 2024-10-30 13:27:46.498248
96
+ ---> [ground truth] FID: 0.0011
97
+ ---> [vald] FID: 0.3483
98
+ Time: 2024-10-30 13:27:50.715379
99
+ ---> [ground truth] Diversity: 9.2800
100
+ ---> [vald] Diversity: 9.0324
101
+ AUX METRICS:
102
+ ---> [vald] skate_ratio: 0.0786
103
+ ---> [vald] mean_penetration: 6.1403
104
+ ---> [vald] penetration: 0.0881
105
+ ---> [vald] floating: 23.3769
106
+ ---> [vald] skating: 0.6603
107
+ AUX METRICS:
108
+ ---> [vald] fps: 2634.4592
109
+ ---> [vald] time_per_sample_ms: 15.1834
110
+ !!! DONE !!!
111
+ ==================== Replication 5 ====================
112
+ Time: 2024-10-30 13:29:11.587206
113
+ ---> [ground truth] Matching Score: 3.2448
114
+ ---> [ground truth] R_precision: (top 1): 0.4573 (top 2): 0.6578 (top 3): 0.7636
115
+ ---> [vald] Matching Score: 3.0923
116
+ ---> [vald] R_precision: (top 1): 0.4658 (top 2): 0.6729 (top 3): 0.7861
117
+ Time: 2024-10-30 13:29:15.778495
118
+ ---> [ground truth] FID: 0.0011
119
+ ---> [vald] FID: 0.1868
120
+ Time: 2024-10-30 13:29:19.420641
121
+ ---> [ground truth] Diversity: 9.6603
122
+ ---> [vald] Diversity: 8.9310
123
+ AUX METRICS:
124
+ ---> [vald] skate_ratio: 0.0734
125
+ ---> [vald] mean_penetration: 6.2195
126
+ ---> [vald] penetration: 0.0743
127
+ ---> [vald] floating: 20.6116
128
+ ---> [vald] skating: 0.6368
129
+ AUX METRICS:
130
+ ---> [vald] fps: 2977.0590
131
+ ---> [vald] time_per_sample_ms: 13.4361
132
+ !!! DONE !!!
133
+ ==================== Replication 6 ====================
134
+ Time: 2024-10-30 13:30:41.583496
135
+ ---> [ground truth] Matching Score: 3.2623
136
+ ---> [ground truth] R_precision: (top 1): 0.4487 (top 2): 0.6532 (top 3): 0.7601
137
+ ---> [vald] Matching Score: 3.1806
138
+ ---> [vald] R_precision: (top 1): 0.4561 (top 2): 0.6680 (top 3): 0.7646
139
+ Time: 2024-10-30 13:30:45.795327
140
+ ---> [ground truth] FID: 0.0011
141
+ ---> [vald] FID: 0.3755
142
+ Time: 2024-10-30 13:30:49.698681
143
+ ---> [ground truth] Diversity: 9.3130
144
+ ---> [vald] Diversity: 9.3665
145
+ AUX METRICS:
146
+ ---> [vald] skate_ratio: 0.0777
147
+ ---> [vald] mean_penetration: 5.3331
148
+ ---> [vald] penetration: 0.0656
149
+ ---> [vald] floating: 23.0651
150
+ ---> [vald] skating: 0.5678
151
+ AUX METRICS:
152
+ ---> [vald] fps: 3191.6156
153
+ ---> [vald] time_per_sample_ms: 12.5328
154
+ !!! DONE !!!
155
+ ==================== Replication 7 ====================
156
+ Time: 2024-10-30 13:32:10.410233
157
+ ---> [ground truth] Matching Score: 3.2375
158
+ ---> [ground truth] R_precision: (top 1): 0.4541 (top 2): 0.6524 (top 3): 0.7614
159
+ ---> [vald] Matching Score: 3.2039
160
+ ---> [vald] R_precision: (top 1): 0.4600 (top 2): 0.6582 (top 3): 0.7822
161
+ Time: 2024-10-30 13:32:15.110242
162
+ ---> [ground truth] FID: 0.0010
163
+ ---> [vald] FID: 0.2474
164
+ Time: 2024-10-30 13:32:19.088898
165
+ ---> [ground truth] Diversity: 9.3622
166
+ ---> [vald] Diversity: 9.1560
167
+ AUX METRICS:
168
+ ---> [vald] skate_ratio: 0.0726
169
+ ---> [vald] mean_penetration: 6.5664
170
+ ---> [vald] penetration: 0.1203
171
+ ---> [vald] floating: 24.3337
172
+ ---> [vald] skating: 0.6471
173
+ AUX METRICS:
174
+ ---> [vald] fps: 3069.7370
175
+ ---> [vald] time_per_sample_ms: 13.0304
176
+ !!! DONE !!!
177
+ ==================== Replication 8 ====================
178
+ Time: 2024-10-30 13:33:38.730361
179
+ ---> [ground truth] Matching Score: 3.2570
180
+ ---> [ground truth] R_precision: (top 1): 0.4444 (top 2): 0.6530 (top 3): 0.7672
181
+ ---> [vald] Matching Score: 3.1620
182
+ ---> [vald] R_precision: (top 1): 0.4531 (top 2): 0.6650 (top 3): 0.7832
183
+ Time: 2024-10-30 13:33:43.168566
184
+ ---> [ground truth] FID: 0.0010
185
+ ---> [vald] FID: 0.2327
186
+ Time: 2024-10-30 13:33:47.068348
187
+ ---> [ground truth] Diversity: 9.0245
188
+ ---> [vald] Diversity: 9.6330
189
+ AUX METRICS:
190
+ ---> [vald] skate_ratio: 0.0693
191
+ ---> [vald] mean_penetration: 6.0143
192
+ ---> [vald] penetration: 0.0626
193
+ ---> [vald] floating: 24.9363
194
+ ---> [vald] skating: 0.6472
195
+ AUX METRICS:
196
+ ---> [vald] fps: 3125.6639
197
+ ---> [vald] time_per_sample_ms: 12.7973
198
+ !!! DONE !!!
199
+ ==================== Replication 9 ====================
200
+ Time: 2024-10-30 13:35:09.757800
201
+ ---> [ground truth] Matching Score: 3.2334
202
+ ---> [ground truth] R_precision: (top 1): 0.4578 (top 2): 0.6619 (top 3): 0.7716
203
+ ---> [vald] Matching Score: 3.1542
204
+ ---> [vald] R_precision: (top 1): 0.4736 (top 2): 0.6875 (top 3): 0.7871
205
+ Time: 2024-10-30 13:35:13.991154
206
+ ---> [ground truth] FID: 0.0010
207
+ ---> [vald] FID: 0.2097
208
+ Time: 2024-10-30 13:35:17.637110
209
+ ---> [ground truth] Diversity: 9.2863
210
+ ---> [vald] Diversity: 9.5178
211
+ AUX METRICS:
212
+ ---> [vald] skate_ratio: 0.0729
213
+ ---> [vald] mean_penetration: 5.4497
214
+ ---> [vald] penetration: 0.0621
215
+ ---> [vald] floating: 21.2150
216
+ ---> [vald] skating: 0.5615
217
+ AUX METRICS:
218
+ ---> [vald] fps: 3223.9078
219
+ ---> [vald] time_per_sample_ms: 12.4073
220
+ !!! DONE !!!
221
+ ========== Matching Score Summary ==========
222
+ ---> [ground truth] Mean: 3.2422 CInterval: 0.0075
223
+ ---> [vald] Mean: 3.1490 CInterval: 0.0226
224
+ ========== R_precision Summary ==========
225
+ ---> [ground truth](top 1) Mean: 0.4535 CInt: 0.0038;(top 2) Mean: 0.6554 CInt: 0.0035;(top 3) Mean: 0.7644 CInt: 0.0030;
226
+ ---> [vald](top 1) Mean: 0.4644 CInt: 0.0041;(top 2) Mean: 0.6676 CInt: 0.0065;(top 3) Mean: 0.7770 CInt: 0.0049;
227
+ ========== FID Summary ==========
228
+ ---> [ground truth] Mean: 0.0011 CInterval: 0.0000
229
+ ---> [vald] Mean: 0.2826 CInterval: 0.0445
230
+ ========== Diversity Summary ==========
231
+ ---> [ground truth] Mean: 9.3263 CInterval: 0.1059
232
+ ---> [vald] Mean: 9.2111 CInterval: 0.1411
233
+ ========== MultiModality Summary ==========
234
+ ========== skate_ratio Summary ==========
235
+ ---> [vald] Mean: 0.0746 CInterval: 0.0018
236
+ ========== mean_penetration Summary ==========
237
+ ---> [vald] Mean: 6.2904 CInterval: 0.4597
238
+ ========== penetration Summary ==========
239
+ ---> [vald] Mean: 0.0834 CInterval: 0.0162
240
+ ========== floating Summary ==========
241
+ ---> [vald] Mean: 23.6109 CInterval: 0.9754
242
+ ========== skating Summary ==========
243
+ ---> [vald] Mean: 0.6289 CInterval: 0.0435
244
+ ========== fps Summary ==========
245
+ ---> [vald] Mean: 2986.3371 CInterval: 135.5679
246
+ ========== time_per_sample_ms Summary ==========
247
+ ---> [vald] Mean: 13.4711 CInterval: 0.6501
checkpoints/dip/DiP_10steps_context20_predict40/model000200000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d667b9124e57a5906a3fd3ce45934a2df0e475bc57395dd63bb76e3641491b3
3
+ size 232011477
checkpoints/dip/DiP_10steps_context20_predict40/model000600343.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcf3e482a5236a21731af571c66bd7d0274f5ddf0b6974825fa69333c77f3144
3
+ size 232011477
checkpoints/dip/DiP_10steps_context20_predict40/opt000200000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c93be3d949cb6a9b0cd0eb93408b171533354d43eef9d6cf4877b0a0a54656c
3
+ size 211523685
checkpoints/dip/DiP_10steps_context20_predict40/opt000600343.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91f18fb1cafffe861a3197f0d612b2e2d19efd8349846901bd60b7463e9af77e
3
+ size 211523685