ours_4 / lam /config.json
ramu0e's picture
Upload folder using huggingface_hub
d2c64a0 verified
{
"action_depth": 5,
"action_dropout": 0.0,
"action_hidden_dim": 96,
"action_obs_dim": 0,
"action_prev_dim": 10,
"action_state_dim": 5,
"action_target_dim": 10,
"action_wide_dim": 512,
"architectures": [
"LAMModel"
],
"decoder_attention_head_dim": 64,
"decoder_attn_implementation": "flash_attention_2",
"decoder_encoder_hidden_dim": 5,
"decoder_eps": 1e-06,
"decoder_ffn_dim": 768,
"decoder_freq_dim": 64,
"decoder_in_channels": 3,
"decoder_num_attention_heads": 3,
"decoder_num_layers": 12,
"decoder_out_channels": 3,
"decoder_patch_size": [
4,
4
],
"decoder_pos_embed_seq_len": null,
"decoder_rope_max_seq_len": 1024,
"dtype": "bfloat16",
"encoder_height": 64,
"encoder_width": 64,
"fsq_levels": [
7,
5,
5,
5,
5
],
"initializer_range": 0.02,
"is_action_discrete": false,
"is_diffusion": true,
"latent_channels": 5,
"max_tokens": 128,
"min_tokens": 1,
"model_type": "lam",
"null_latent": 0,
"transformers_version": "4.57.1",
"use_tail_drop": true,
"videomae_config": {
"attn_drop_rate": 0.0,
"cos_attn": false,
"depth": 8,
"drop_path_rate": 0.0,
"drop_rate": 0.0,
"embed_dim": 192,
"img_size": [
64,
64
],
"in_chans": 3,
"init_values": 0.0,
"layer_norm_eps": 1e-06,
"mlp_ratio": 4,
"norm_layer": "nn.LayerNorm",
"num_classes": 0,
"num_frames": 2,
"num_heads": 3,
"patch_size": 4,
"qk_scale": null,
"qkv_bias": true,
"tubelet_size": 2,
"use_learnable_pos_emb": false,
"use_mean_pooling": false,
"with_cp": false
},
"videomae_from_pretrained": null,
"vocab_size": 4375
}