xLSTM-tiny-stories / config.json
thiomajid's picture
xLSTM is ready for image generation
5e82e8a verified
{
"mlstm_block": {
"mlstm": {
"proj_factor": 2.0,
"round_proj_up_dim_up": true,
"round_proj_up_to_multiple_of": 64,
"_proj_up_dim": 768,
"conv1d_kernel_size": 4,
"qkv_proj_blocksize": 32,
"num_heads": 4,
"embedding_dim": 384,
"bias": false,
"dropout": 0.0,
"context_length": 256,
"_num_blocks": 6,
"_inner_embedding_dim": 768
},
"_num_blocks": 6,
"_block_idx": null
},
"slstm_block": {
"slstm": {
"hidden_size": 384,
"num_heads": 4,
"num_states": 4,
"backend": "vanilla",
"function": "slstm",
"bias_init": "powerlaw_blockdependent",
"recurrent_weight_init": "zeros",
"_block_idx": null,
"_num_blocks": 6,
"num_gates": 4,
"gradient_recurrent_clipval": null,
"forward_clipval": null,
"batch_size": 8,
"input_shape": "BSGNH",
"internal_input_shape": "SBNGH",
"output_shape": "BNSH",
"dtype": "bfloat16",
"dtype_b": "float32",
"dtype_r": "bfloat16",
"dtype_w": "bfloat16",
"dtype_g": "bfloat16",
"dtype_s": "bfloat16",
"dtype_a": "float32",
"initial_val": 0.0,
"enable_automatic_mixed_precision": true,
"embedding_dim": 384,
"conv1d_kernel_size": 4,
"group_norm_weight": true,
"dropout": 0.0
},
"feedforward": {
"proj_factor": 1.3,
"round_proj_up_dim_up": true,
"round_proj_up_to_multiple_of": 64,
"_proj_up_dim": 0,
"act_fn": "swish",
"embedding_dim": -1,
"dropout": 0.0,
"bias": false,
"ff_type": "ffn_gated",
"_num_blocks": 1
},
"_num_blocks": 6,
"_block_idx": null
},
"context_length": 256,
"num_blocks": 6,
"embedding_dim": 384,
"add_post_blocks_norm": true,
"bias": false,
"dropout": 0.0,
"slstm_at": [],
"_block_map": "0,0,0,0,0,0",
"vocab_size": 49152,
"tie_weights": false,
"weight_decay_on_embedding": false,
"add_embedding_dropout": false,
"pad_token_id": 0
}