File size: 7,699 Bytes
09154c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
{
  "best_fid": {
    "checkpoint": "/iridisfs/scratch/pf2m24/projects/Umdd/momask-codes/checkpoints/t2m/codeflow_part_structured_newvqtop3_w150_p192_h1152_d6s12_drop005_w0_b64_lr1e4_e600_eval10from0_seed42_test_h200_blossom_g1_20260601/model/best_fid.pt",
    "epoch": 300,
    "fid": 0.05680781579934546,
    "selection_config": {
      "best_checkpoint_limit": 3,
      "cond_scale": 6.0,
      "dataset_name": "t2m",
      "decode_mode": "nearest",
      "eval_steps": 96,
      "full_eval_seed": 42,
      "latent_norm_mode": "codebook",
      "latent_offset": 0.0,
      "repeat_times": 1,
      "sampling_method": "ode",
      "sampling_schedule": "uniform",
      "sde_gamma": 0.0,
      "split": "test",
      "terminal_mode": "tied_logits",
      "unit_length": 4,
      "weight_source": "ema"
    },
    "step": 114900,
    "top3": 0.8678879310344828
  },
  "best_fid_top3": [
    {
      "checkpoint": "/iridisfs/scratch/pf2m24/projects/Umdd/momask-codes/checkpoints/t2m/codeflow_part_structured_newvqtop3_w150_p192_h1152_d6s12_drop005_w0_b64_lr1e4_e600_eval10from0_seed42_test_h200_blossom_g1_20260601/model/best_fid.pt",
      "epoch": 300,
      "fid": 0.05680781579934546,
      "selection_config": {
        "best_checkpoint_limit": 3,
        "cond_scale": 6.0,
        "dataset_name": "t2m",
        "decode_mode": "nearest",
        "eval_steps": 96,
        "full_eval_seed": 42,
        "latent_norm_mode": "codebook",
        "latent_offset": 0.0,
        "repeat_times": 1,
        "sampling_method": "ode",
        "sampling_schedule": "uniform",
        "sde_gamma": 0.0,
        "split": "test",
        "terminal_mode": "tied_logits",
        "unit_length": 4,
        "weight_source": "ema"
      },
      "step": 114900,
      "top3": 0.8678879310344828
    },
    {
      "checkpoint": "/iridisfs/scratch/pf2m24/projects/Umdd/momask-codes/checkpoints/t2m/codeflow_part_structured_newvqtop3_w150_p192_h1152_d6s12_drop005_w0_b64_lr1e4_e600_eval10from0_seed42_test_h200_blossom_g1_20260601/model/best_fid_rank2.pt",
      "epoch": 290,
      "fid": 0.058189920626574576,
      "selection_config": {
        "best_checkpoint_limit": 3,
        "cond_scale": 6.0,
        "dataset_name": "t2m",
        "decode_mode": "nearest",
        "eval_steps": 96,
        "full_eval_seed": 42,
        "latent_norm_mode": "codebook",
        "latent_offset": 0.0,
        "repeat_times": 1,
        "sampling_method": "ode",
        "sampling_schedule": "uniform",
        "sde_gamma": 0.0,
        "split": "test",
        "terminal_mode": "tied_logits",
        "unit_length": 4,
        "weight_source": "ema"
      },
      "step": 111070,
      "top3": 0.8730603448275862
    },
    {
      "checkpoint": "/iridisfs/scratch/pf2m24/projects/Umdd/momask-codes/checkpoints/t2m/codeflow_part_structured_newvqtop3_w150_p192_h1152_d6s12_drop005_w0_b64_lr1e4_e600_eval10from0_seed42_test_h200_blossom_g1_20260601/model/best_fid_rank3.pt",
      "epoch": 320,
      "fid": 0.06242718631276034,
      "selection_config": {
        "best_checkpoint_limit": 3,
        "cond_scale": 6.0,
        "dataset_name": "t2m",
        "decode_mode": "nearest",
        "eval_steps": 96,
        "full_eval_seed": 42,
        "latent_norm_mode": "codebook",
        "latent_offset": 0.0,
        "repeat_times": 1,
        "sampling_method": "ode",
        "sampling_schedule": "uniform",
        "sde_gamma": 0.0,
        "split": "test",
        "terminal_mode": "tied_logits",
        "unit_length": 4,
        "weight_source": "ema"
      },
      "step": 122560,
      "top3": 0.8633620689655173
    }
  ],
  "best_top3": {
    "checkpoint": "/iridisfs/scratch/pf2m24/projects/Umdd/momask-codes/checkpoints/t2m/codeflow_part_structured_newvqtop3_w150_p192_h1152_d6s12_drop005_w0_b64_lr1e4_e600_eval10from0_seed42_test_h200_blossom_g1_20260601/model/best_top3.pt",
    "epoch": 290,
    "fid": 0.058189920626574576,
    "selection_config": {
      "best_checkpoint_limit": 3,
      "cond_scale": 6.0,
      "dataset_name": "t2m",
      "decode_mode": "nearest",
      "eval_steps": 96,
      "full_eval_seed": 42,
      "latent_norm_mode": "codebook",
      "latent_offset": 0.0,
      "repeat_times": 1,
      "sampling_method": "ode",
      "sampling_schedule": "uniform",
      "sde_gamma": 0.0,
      "split": "test",
      "terminal_mode": "tied_logits",
      "unit_length": 4,
      "weight_source": "ema"
    },
    "step": 111070,
    "top3": 0.8730603448275862
  },
  "best_top3_top3": [
    {
      "checkpoint": "/iridisfs/scratch/pf2m24/projects/Umdd/momask-codes/checkpoints/t2m/codeflow_part_structured_newvqtop3_w150_p192_h1152_d6s12_drop005_w0_b64_lr1e4_e600_eval10from0_seed42_test_h200_blossom_g1_20260601/model/best_top3.pt",
      "epoch": 290,
      "fid": 0.058189920626574576,
      "selection_config": {
        "best_checkpoint_limit": 3,
        "cond_scale": 6.0,
        "dataset_name": "t2m",
        "decode_mode": "nearest",
        "eval_steps": 96,
        "full_eval_seed": 42,
        "latent_norm_mode": "codebook",
        "latent_offset": 0.0,
        "repeat_times": 1,
        "sampling_method": "ode",
        "sampling_schedule": "uniform",
        "sde_gamma": 0.0,
        "split": "test",
        "terminal_mode": "tied_logits",
        "unit_length": 4,
        "weight_source": "ema"
      },
      "step": 111070,
      "top3": 0.8730603448275862
    },
    {
      "checkpoint": "/iridisfs/scratch/pf2m24/projects/Umdd/momask-codes/checkpoints/t2m/codeflow_part_structured_newvqtop3_w150_p192_h1152_d6s12_drop005_w0_b64_lr1e4_e600_eval10from0_seed42_test_h200_blossom_g1_20260601/model/best_top3_rank2.pt",
      "epoch": 140,
      "fid": 0.1756792861862806,
      "selection_config": {
        "best_checkpoint_limit": 3,
        "cond_scale": 6.0,
        "dataset_name": "t2m",
        "decode_mode": "nearest",
        "eval_steps": 96,
        "full_eval_seed": 42,
        "latent_norm_mode": "codebook",
        "latent_offset": 0.0,
        "repeat_times": 1,
        "sampling_method": "ode",
        "sampling_schedule": "uniform",
        "sde_gamma": 0.0,
        "split": "test",
        "terminal_mode": "tied_logits",
        "unit_length": 4,
        "weight_source": "ema"
      },
      "step": 53620,
      "top3": 0.8726293103448276
    },
    {
      "checkpoint": "/iridisfs/scratch/pf2m24/projects/Umdd/momask-codes/checkpoints/t2m/codeflow_part_structured_newvqtop3_w150_p192_h1152_d6s12_drop005_w0_b64_lr1e4_e600_eval10from0_seed42_test_h200_blossom_g1_20260601/model/best_top3_rank3.pt",
      "epoch": 120,
      "fid": 0.20938363776417646,
      "selection_config": {
        "best_checkpoint_limit": 3,
        "cond_scale": 6.0,
        "dataset_name": "t2m",
        "decode_mode": "nearest",
        "eval_steps": 96,
        "full_eval_seed": 42,
        "latent_norm_mode": "codebook",
        "latent_offset": 0.0,
        "repeat_times": 1,
        "sampling_method": "ode",
        "sampling_schedule": "uniform",
        "sde_gamma": 0.0,
        "split": "test",
        "terminal_mode": "tied_logits",
        "unit_length": 4,
        "weight_source": "ema"
      },
      "step": 45960,
      "top3": 0.8726293103448276
    }
  ],
  "selection_config": {
    "best_checkpoint_limit": 3,
    "cond_scale": 6.0,
    "dataset_name": "t2m",
    "decode_mode": "nearest",
    "eval_steps": 96,
    "full_eval_seed": 42,
    "latent_norm_mode": "codebook",
    "latent_offset": 0.0,
    "repeat_times": 1,
    "sampling_method": "ode",
    "sampling_schedule": "uniform",
    "sde_gamma": 0.0,
    "split": "test",
    "terminal_mode": "tied_logits",
    "unit_length": 4,
    "weight_source": "ema"
  }
}