| { | |
| "channel_size": 3, | |
| "coord_dec_dim": 8192, | |
| "coord_enc_dim": 512, | |
| "coord_out_dim": 2048, | |
| "coord_token_id": 240, | |
| "dim": 768, | |
| "eos_id": 11, | |
| "ffn_dim": 2304, | |
| "head_dim": 64, | |
| "image_cls_token_id": 244, | |
| "image_reg_1_token_id": 245, | |
| "image_reg_2_token_id": 246, | |
| "image_reg_3_token_id": 247, | |
| "image_reg_4_token_id": 248, | |
| "img_end_id": 230, | |
| "img_id": 227, | |
| "img_row_sep_id": 228, | |
| "img_start_id": 229, | |
| "max_seq_len": 8192, | |
| "n_heads": 16, | |
| "n_kv_heads": 8, | |
| "n_layers": 22, | |
| "norm_eps": 1e-05, | |
| "num_segm_layers": 3, | |
| "perception_heads": false, | |
| "rope_theta": 10000, | |
| "seg_token_id": 262, | |
| "segm_out_dim": 256, | |
| "size_dec_dim": 8192, | |
| "size_enc_dim": 512, | |
| "size_out_dim": 2048, | |
| "size_token_id": 241, | |
| "spatial_patch_size": 16, | |
| "temporal_patch_size": 1, | |
| "vocab_size": 65536 | |
| } |