| { | |
| "audio_drop_path": 0.1, | |
| "audio_embed_dim": 768, | |
| "audio_kernel_size": 16, | |
| "audio_num_blocks": 12, | |
| "audio_num_heads": 12, | |
| "audio_num_mel_bins": 128, | |
| "audio_stride": 10, | |
| "audio_target_len": 204, | |
| "depth_drop_path": 0.0, | |
| "depth_embed_dim": 384, | |
| "depth_kernel_size": 16, | |
| "depth_num_blocks": 12, | |
| "depth_num_heads": 8, | |
| "imu_drop_path": 0.7, | |
| "imu_embed_dim": 512, | |
| "imu_kernel_size": 8, | |
| "imu_num_blocks": 6, | |
| "imu_num_heads": 8, | |
| "kernel_size": [ | |
| 2, | |
| 14, | |
| 14 | |
| ], | |
| "out_embed_dim": 1024, | |
| "text_embed_dim": 1024, | |
| "text_num_blocks": 24, | |
| "text_num_heads": 16, | |
| "thermal_drop_path": 0.0, | |
| "thermal_embed_dim": 768, | |
| "thermal_kernel_size": 16, | |
| "thermal_num_blocks": 12, | |
| "thermal_num_heads": 12, | |
| "video_frames": 2, | |
| "vision_embed_dim": 1280, | |
| "vision_num_blocks": 32, | |
| "vision_num_heads": 16 | |
| } |