| { | |
| "architectures": ["FMNISTT2IModel"], | |
| "model_type": "fmnist_t2i_diffusion", | |
| "patch_size": 1, | |
| "image_size": 8, | |
| "num_channels": 4, | |
| "embed_dim": 384, | |
| "mlp_multiplier": 4, | |
| "n_layers": 12, | |
| "n_heads": 6, | |
| "cross_heads": 4, | |
| "noise_embed_dims": 128, | |
| "text_emb_size": 10, | |
| "dropout_prob": 0.1, | |
| "use_flash_attention": true, | |
| "text_encoding_type": "one_hot", | |
| "text_dropout_prob": 0.1, | |
| "num_classes": 10, | |
| "text_embed_dim": 384, | |
| "text_projection_layers": 1, | |
| "diffusion_config": { | |
| "timesteps": "continuous", | |
| "beta_a": 1.0, | |
| "beta_b": 2.5, | |
| "noise_start": 0.99, | |
| "noise_end": 0.01 | |
| }, | |
| "vae_config": { | |
| "vae_path": "stabilityai/sd-vae-ft-mse", | |
| "vae_latent_scale": 0.18215 | |
| }, | |
| "training_config": { | |
| "lr": 0.001, | |
| "weight_decay": 0.01, | |
| "betas": [0.9, 0.999], | |
| "eps": 1e-8, | |
| "max_grad_norm": 1.0, | |
| "ema_decay": 0.999 | |
| }, | |
| "hardware_config": { | |
| "use_amp": true, | |
| "amp_dtype": "float16", | |
| "compile_model": false | |
| }, | |
| "init_cfg": { | |
| "type": "trunc_normal", | |
| "std": 0.02, | |
| "a": -2, | |
| "b": 2 | |
| } | |
| } |