File size: 3,541 Bytes
9433164 9648980 9433164 9648980 b05900a 9433164 aa82b14 6710133 b05900a 9433164 b05900a 9433164 320104c 9433164 b05900a fe0d140 659afe5 9648980 ccbcbac cca5179 aa82b14 9648980 6710133 9648980 855a92a 9648980 855a92a 9648980 855a92a 9648980 855a92a aa82b14 9433164 b9380f4 9433164 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | {
"vit_hidden_dim": 768,
"vit_inter_dim": 3072,
"vit_patch_size": 16,
"vit_img_size": 512,
"vit_n_heads": 12,
"vit_dropout": 0.0,
"vit_n_blocks": 12,
"vit_ln_eps": 1e-06,
"vit_cls_flag": false,
"vit_model_type": "google/siglip2-base-patch16-512",
"lm_hidden_dim": 960,
"lm_inter_dim": 2560,
"lm_rms_eps": 1e-05,
"lm_re_base": 100000,
"lm_max_position_embeddings": 8192,
"lm_base_vocab_size": 49152,
"extra_token_amount": 66,
"lm_vocab_size": 49218,
"lm_n_heads": 15,
"lm_n_kv_heads": 5,
"lm_dropout": 0.0,
"lm_n_blocks": 32,
"lm_attn_scaling": 1.0,
"lm_max_length": 8192,
"lm_use_tokens": false,
"lm_tie_weights": true,
"lm_model_type": "HuggingFaceTB/SmolLM2-360M-Instruct",
"lm_tokenizer": "HuggingFaceTB/SmolLM2-360M-Instruct",
"lm_chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
"mp_pixel_shuffle_factor": 4,
"mp_image_token_length": 64,
"max_img_size": 2048,
"resize_to_max_side_len": true,
"vlm_extra_tokens": {
"image_token": "<|image|>",
"global_image_token": "<|global_image|>",
"r1c1": "<row_1_col_1>",
"r1c2": "<row_1_col_2>",
"r1c3": "<row_1_col_3>",
"r1c4": "<row_1_col_4>",
"r1c5": "<row_1_col_5>",
"r1c6": "<row_1_col_6>",
"r1c7": "<row_1_col_7>",
"r1c8": "<row_1_col_8>",
"r2c1": "<row_2_col_1>",
"r2c2": "<row_2_col_2>",
"r2c3": "<row_2_col_3>",
"r2c4": "<row_2_col_4>",
"r2c5": "<row_2_col_5>",
"r2c6": "<row_2_col_6>",
"r2c7": "<row_2_col_7>",
"r2c8": "<row_2_col_8>",
"r3c1": "<row_3_col_1>",
"r3c2": "<row_3_col_2>",
"r3c3": "<row_3_col_3>",
"r3c4": "<row_3_col_4>",
"r3c5": "<row_3_col_5>",
"r3c6": "<row_3_col_6>",
"r3c7": "<row_3_col_7>",
"r3c8": "<row_3_col_8>",
"r4c1": "<row_4_col_1>",
"r4c2": "<row_4_col_2>",
"r4c3": "<row_4_col_3>",
"r4c4": "<row_4_col_4>",
"r4c5": "<row_4_col_5>",
"r4c6": "<row_4_col_6>",
"r4c7": "<row_4_col_7>",
"r4c8": "<row_4_col_8>",
"r5c1": "<row_5_col_1>",
"r5c2": "<row_5_col_2>",
"r5c3": "<row_5_col_3>",
"r5c4": "<row_5_col_4>",
"r5c5": "<row_5_col_5>",
"r5c6": "<row_5_col_6>",
"r5c7": "<row_5_col_7>",
"r5c8": "<row_5_col_8>",
"r6c1": "<row_6_col_1>",
"r6c2": "<row_6_col_2>",
"r6c3": "<row_6_col_3>",
"r6c4": "<row_6_col_4>",
"r6c5": "<row_6_col_5>",
"r6c6": "<row_6_col_6>",
"r6c7": "<row_6_col_7>",
"r6c8": "<row_6_col_8>",
"r7c1": "<row_7_col_1>",
"r7c2": "<row_7_col_2>",
"r7c3": "<row_7_col_3>",
"r7c4": "<row_7_col_4>",
"r7c5": "<row_7_col_5>",
"r7c6": "<row_7_col_6>",
"r7c7": "<row_7_col_7>",
"r7c8": "<row_7_col_8>",
"r8c1": "<row_8_col_1>",
"r8c2": "<row_8_col_2>",
"r8c3": "<row_8_col_3>",
"r8c4": "<row_8_col_4>",
"r8c5": "<row_8_col_5>",
"r8c6": "<row_8_col_6>",
"r8c7": "<row_8_col_7>",
"r8c8": "<row_8_col_8>"
},
"vlm_load_backbone_weights": true,
"vlm_checkpoint_path": "checkpoints",
"hf_repo_name": "nanoVLM"
} |