ActionCodec-Base-RVQft / config.json
ZibinDong's picture
Upload folder using huggingface_hub
1bc40b2 verified
{
"architectures": [
"ActionCodec"
],
"decoder_add_causal_mask": false,
"decoder_add_self_attn": false,
"decoder_cls_size": 1,
"decoder_dim": 384,
"decoder_n_heads": 6,
"decoder_n_layers": 12,
"decoder_pos_encoding_type": "fourier",
"dtype": "float32",
"embodiment_config": {
"a_franka_libero_20hz": {
"action_dim": 7,
"description": "20Hz 7-dim action for 1s. Delta eef position (xyz), orientation (rpy), and gripper position (1 open/0 close).",
"duration": 1,
"freq": 20
},
"b_widowx_bridge_5hz": {
"action_dim": 7,
"description": "5Hz 7-dim action for 1s. Delta eef position (xyz), orientation (rpy), and gripper position (1 open/0 close).",
"duration": 3,
"freq": 5
},
"c_franka_droid_15hz": {
"action_dim": 7,
"description": "15Hz 7-dim action for 1s. Delta eef position (xyz), orientation (rpy), and gripper position (1 open/0 close).",
"duration": 1,
"freq": 15
}
},
"encoder_add_causal_mask": false,
"encoder_add_self_attn": false,
"encoder_dim": 384,
"encoder_n_heads": 6,
"encoder_n_layers": 12,
"encoder_pos_encoding_type": "fourier",
"model_type": "action_codec",
"n_quantizers": 3,
"n_tokens": 48,
"transformers_version": "4.57.1",
"vq_codebook_size": 2048,
"vq_commitment_weight": 0.25,
"vq_decay": 0.99,
"vq_kmeans_init": true,
"vq_quantizer_dropout": 0.25,
"vq_threshold_ema_dead_code": 2,
"vq_type": "rvq",
"z_dim": 512
}