tradingtrading commited on
Commit
0be5ad6
·
verified ·
1 Parent(s): 1dd45c3

checkpoint step 500

Browse files
Files changed (2) hide show
  1. config.json +297 -0
  2. model.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "encoder": {
3
+ "_target_": "flextok.model.utils.wrappers.SequentialModuleDictWrapper",
4
+ "module_dict": {
5
+ "enc_channels_to_last": {
6
+ "_target_": "flextok.model.utils.dict_ops.PerSampleOp",
7
+ "read_key": "vae_latents",
8
+ "write_key": "vae_latents_bhwc",
9
+ "per_sample_op": {
10
+ "_target_": "flextok.model.utils.dict_ops.channels_first_to_last",
11
+ "_partial_": true
12
+ }
13
+ },
14
+ "enc_patch_emb": {
15
+ "_target_": "flextok.model.preprocessors.patching.PatchEmbedder",
16
+ "input_tensor_list_read_key": "vae_latents_bhwc",
17
+ "patches_list_write_key": "enc_vae_latents_patched",
18
+ "n_patches_write_key": "enc_n_patches",
19
+ "channels_in": 16,
20
+ "dim": 1152,
21
+ "patch_sizes": [
22
+ 2,
23
+ 2
24
+ ],
25
+ "flatten_patches": false
26
+ },
27
+ "enc_posemb_module": {
28
+ "_target_": "flextok.model.utils.posembs.PositionalEmbeddingAdder",
29
+ "read_key": "enc_vae_latents_patched",
30
+ "write_key": "enc_vae_latents_patched",
31
+ "dim": 1152,
32
+ "max_sizes": [
33
+ 16,
34
+ 16
35
+ ],
36
+ "posemb_type": "sincos",
37
+ "posemb_scaling": "absolute"
38
+ },
39
+ "enc_register_module": {
40
+ "_target_": "flextok.model.preprocessors.registers.Registers1D",
41
+ "input_tensor_list_read_key": "enc_vae_latents_patched",
42
+ "register_sizes_read_write_key": "register_sizes",
43
+ "registers_write_key": "enc_registers",
44
+ "dim": 1152,
45
+ "n_min": 256,
46
+ "n_max": 256,
47
+ "size_sampling_mode": "uniform",
48
+ "ordering_mode": "nested"
49
+ },
50
+ "enc_seq_packer": {
51
+ "_target_": "flextok.model.preprocessors.flex_seq_packing.BlockWiseSequencePacker",
52
+ "input_list_read_keys": [
53
+ "enc_vae_latents_patched",
54
+ "enc_registers"
55
+ ],
56
+ "packed_seq_write_key": "enc_packed_seq",
57
+ "block_mask_write_key": "enc_block_mask",
58
+ "inner_packed_shapes_write_key": "enc_ps_inner",
59
+ "outer_packed_shapes_write_key": "enc_ps_outer",
60
+ "mask_mode": "causal_last",
61
+ "pad_to_multiple": 128
62
+ },
63
+ "enc_transformer": {
64
+ "_target_": "flextok.model.trunks.transformers.FlexTransformer",
65
+ "input_seq_read_key": "enc_packed_seq",
66
+ "output_seq_write_key": "enc_packed_seq",
67
+ "dim": 1152,
68
+ "depth": 18,
69
+ "block_mask_read_key": "enc_block_mask",
70
+ "use_act_checkpoint": true
71
+ },
72
+ "enc_unpacker": {
73
+ "_target_": "flextok.model.postprocessors.seq_unpacking.SequenceUnpacker",
74
+ "packed_seq_read_key": "enc_packed_seq",
75
+ "inner_seq_write_keys": [
76
+ "enc_vae_latents_patched",
77
+ "enc_registers"
78
+ ],
79
+ "inner_packed_shapes_read_key": "enc_ps_inner",
80
+ "outer_packed_shapes_read_key": "enc_ps_outer"
81
+ },
82
+ "enc_to_latents": {
83
+ "_target_": "flextok.model.postprocessors.heads.LinearHead",
84
+ "read_key": "enc_registers",
85
+ "write_key": "enc_registers",
86
+ "dim": 1152,
87
+ "dim_out": 6,
88
+ "use_mup_readout": false,
89
+ "weight_init_style": "zero",
90
+ "dtype_override": null
91
+ }
92
+ }
93
+ },
94
+ "decoder": {
95
+ "_target_": "flextok.model.utils.wrappers.SequentialModuleDictWrapper",
96
+ "module_dict": {
97
+ "dec_from_latents": {
98
+ "_target_": "flextok.model.preprocessors.linear.LinearLayer",
99
+ "read_key": "enc_registers_quant",
100
+ "write_key": "dec_registers_proj",
101
+ "dim_in": 6,
102
+ "dim": 1792
103
+ },
104
+ "dec_registers_posemb_module": {
105
+ "_target_": "flextok.model.utils.posembs.PositionalEmbeddingAdder",
106
+ "read_key": "dec_registers_proj",
107
+ "write_key": "dec_registers_proj",
108
+ "dim": 1792,
109
+ "max_sizes": [
110
+ 256
111
+ ],
112
+ "posemb_type": "learnable_sum",
113
+ "posemb_scaling": "absolute"
114
+ },
115
+ "dec_nested_dropout": {
116
+ "_target_": "flextok.model.preprocessors.token_dropout.MaskedNestedDropout",
117
+ "read_write_key": "dec_registers_proj",
118
+ "dim": 1792,
119
+ "size_sampling_mode": "pow2"
120
+ },
121
+ "dec_latent_dropout": {
122
+ "_target_": "flextok.model.preprocessors.nullcond.LearnedNullCond",
123
+ "read_write_key": "dec_registers_proj",
124
+ "dim": 1792,
125
+ "dropout_prob": 0.2
126
+ },
127
+ "dec_noise_channels_to_last": {
128
+ "_target_": "flextok.model.utils.dict_ops.PerSampleOp",
129
+ "read_key": "vae_latents_noised",
130
+ "write_key": "vae_latents_noised_bhwc",
131
+ "per_sample_op": {
132
+ "_target_": "flextok.model.utils.dict_ops.channels_first_to_last",
133
+ "_partial_": true
134
+ }
135
+ },
136
+ "dec_noise_patch_emb": {
137
+ "_target_": "flextok.model.preprocessors.patching.PatchEmbedder",
138
+ "input_tensor_list_read_key": "vae_latents_noised_bhwc",
139
+ "patches_list_write_key": "vae_latents_noised_patched",
140
+ "n_patches_write_key": "dec_n_patches",
141
+ "channels_in": 16,
142
+ "dim": 1792,
143
+ "patch_sizes": [
144
+ 2,
145
+ 2
146
+ ],
147
+ "flatten_patches": false
148
+ },
149
+ "dec_patches_posemb_module": {
150
+ "_target_": "flextok.model.utils.posembs.PositionalEmbeddingAdder",
151
+ "read_key": "vae_latents_noised_patched",
152
+ "write_key": "dec_patches",
153
+ "dim": 1792,
154
+ "max_sizes": [
155
+ 16,
156
+ 16
157
+ ],
158
+ "posemb_type": "sincos",
159
+ "posemb_scaling": "absolute"
160
+ },
161
+ "dec_seq_packer": {
162
+ "_target_": "flextok.model.preprocessors.flex_seq_packing.BlockWiseSequencePacker",
163
+ "input_list_read_keys": [
164
+ "dec_patches",
165
+ "dec_registers_proj"
166
+ ],
167
+ "packed_seq_write_key": "dec_packed_seq",
168
+ "block_mask_write_key": "dec_block_mask",
169
+ "inner_packed_shapes_write_key": "dec_ps_inner",
170
+ "outer_packed_shapes_write_key": "dec_ps_outer",
171
+ "emb_packing_fn_write_key": "emb_packing_fn",
172
+ "mask_mode": "full",
173
+ "pad_to_multiple": 128,
174
+ "per_subseq_embs": true
175
+ },
176
+ "dec_time_embedder": {
177
+ "_target_": "flextok.model.preprocessors.time_embedding.TimestepEmbedder",
178
+ "timesteps_read_key": "timesteps",
179
+ "time_embedding_write_key": "dec_temb",
180
+ "dim": 1792,
181
+ "frequency_embedding_size": 256,
182
+ "max_timestep": 1000.0
183
+ },
184
+ "dec_transformer": {
185
+ "_target_": "flextok.model.trunks.transformers.FlexTransformer",
186
+ "input_seq_read_key": "dec_packed_seq",
187
+ "output_seq_write_key": "dec_packed_seq",
188
+ "dim": 1792,
189
+ "depth": 28,
190
+ "block_mask_read_key": "dec_block_mask",
191
+ "adaLN_emb_read_key": "dec_temb",
192
+ "adaLN_packing_fn_read_key": "emb_packing_fn",
193
+ "adaLN_expansion": 2,
194
+ "intermediate_layer_write_key": "dec_packed_seq_repa_layer",
195
+ "intermediate_layers": [
196
+ 1
197
+ ],
198
+ "use_act_checkpoint": true
199
+ },
200
+ "dec_unpacker": {
201
+ "_target_": "flextok.model.postprocessors.seq_unpacking.SequenceUnpacker",
202
+ "packed_seq_read_key": "dec_packed_seq",
203
+ "inner_seq_write_keys": [
204
+ "dec_patches",
205
+ "dec_registers_proj"
206
+ ],
207
+ "inner_packed_shapes_read_key": "dec_ps_inner",
208
+ "outer_packed_shapes_read_key": "dec_ps_outer"
209
+ },
210
+ "dec_repa_unpacker": {
211
+ "_target_": "flextok.model.postprocessors.seq_unpacking.SequenceUnpacker",
212
+ "packed_seq_read_key": "dec_packed_seq_repa_layer",
213
+ "inner_seq_write_keys": [
214
+ "dec_patches_repa_layer",
215
+ "dec_registers_repa_layer"
216
+ ],
217
+ "inner_packed_shapes_read_key": "dec_ps_inner",
218
+ "outer_packed_shapes_read_key": "dec_ps_outer"
219
+ },
220
+ "dec_to_patches": {
221
+ "_target_": "flextok.model.postprocessors.heads.ToPatchesLinearHead",
222
+ "read_key": "dec_patches",
223
+ "write_key": "dec_patches",
224
+ "dim": 1792,
225
+ "channels_out": 16,
226
+ "patch_sizes": [
227
+ 2,
228
+ 2
229
+ ],
230
+ "use_mup_readout": false,
231
+ "weight_init_style": "zero",
232
+ "adaLN_emb_read_key": "dec_temb"
233
+ },
234
+ "dec_channels_to_first": {
235
+ "_target_": "flextok.model.utils.dict_ops.PerSampleOp",
236
+ "read_key": "dec_patches",
237
+ "write_key": "vae_latents_reconst",
238
+ "per_sample_op": {
239
+ "_target_": "flextok.model.utils.dict_ops.channels_last_to_first",
240
+ "_partial_": true
241
+ }
242
+ }
243
+ }
244
+ },
245
+ "pipeline": {
246
+ "_target_": "flextok.flow_matching.pipelines.MinRFPipeline",
247
+ "_partial_": true,
248
+ "target_sizes_read_key": null,
249
+ "latents_read_key": "enc_registers_quant",
250
+ "timesteps_read_key": "timesteps",
251
+ "noised_images_read_key": "vae_latents_noised",
252
+ "reconst_write_key": "vae_latents_reconst",
253
+ "out_channels": 16
254
+ },
255
+ "flow_matching_noise_module": {
256
+ "_target_": "flextok.flow_matching.noise_modules.MinRFNoiseModule",
257
+ "clean_images_read_key": "vae_latents",
258
+ "noised_images_write_key": "vae_latents_noised",
259
+ "noise_write_key": "flow_noise",
260
+ "timesteps_write_key": "timesteps",
261
+ "sigmas_write_key": "sigmas",
262
+ "ln": false,
263
+ "stratisfied": false,
264
+ "mode_scale": 0.25
265
+ },
266
+ "vae": {
267
+ "_target_": "flextok.vae_wrapper.StableDiffusionVAE",
268
+ "images_read_key": "rgb",
269
+ "vae_latents_read_key": "vae_latents_reconst",
270
+ "vae_latents_write_key": "vae_latents",
271
+ "images_reconst_write_key": "rgb_reconst",
272
+ "vae_kl_loss_write_key": "kl_loss",
273
+ "dtype_override": null,
274
+ "sample_posterior": true,
275
+ "compile_encode_fn": false,
276
+ "force_vae_encode": true,
277
+ "latent_channels": 16,
278
+ "scaling_factor": 0.88
279
+ },
280
+ "_target_": "flextok.flextok_wrapper.FlexTok",
281
+ "regularizer": {
282
+ "_target_": "flextok.regularizers.quantize_fsq.FSQ",
283
+ "latents_read_key": "enc_registers",
284
+ "quants_write_key": "enc_registers_quant",
285
+ "tokens_write_key": "tokens",
286
+ "levels": [
287
+ 8,
288
+ 8,
289
+ 8,
290
+ 5,
291
+ 5,
292
+ 5
293
+ ],
294
+ "drop_quant_p": 0.0,
295
+ "packed_call": false
296
+ }
297
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd43d7309d29257c19cab0b5959cf1fabfb3556405a534200616c36994bff8e0
3
+ size 10163617324