File size: 1,114 Bytes
614f5cf
 
 
 
82be5af
614f5cf
 
 
 
 
82be5af
 
 
614f5cf
 
 
82be5af
 
 
 
 
614f5cf
82be5af
614f5cf
82be5af
614f5cf
 
82be5af
 
614f5cf
 
82be5af
 
 
 
614f5cf
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
{
    "vit_hidden_dim": 768,
    "vit_inter_dim": 3072,
    "vit_patch_size": 16,
    "vit_img_size": 256,
    "vit_n_heads": 12,
    "vit_dropout": 0.0,
    "vit_n_blocks": 12,
    "vit_ln_eps": 1e-06,
    "vit_cls_flag": false,
    "vit_model_type": "google/siglip2-base-patch16-256",
    "lm_hidden_dim": 960,
    "lm_inter_dim": 2560,
    "lm_rms_eps": 1e-05,
    "lm_re_base": 100000,
    "lm_max_position_embeddings": 8192,
    "lm_base_vocab_size": 49152,
    "extra_token_amount": 1,
    "lm_vocab_size": 49153,
    "lm_n_heads": 15,
    "lm_n_kv_heads": 5,
    "lm_dropout": 0.0,
    "lm_n_blocks": 32,
    "lm_attn_scaling": 1.0,
    "lm_max_length": 512,
    "lm_use_tokens": false,
    "lm_tie_weights": true,
    "lm_model_type": "HuggingFaceTB/SmolLM2-360M-Instruct",
    "lm_tokenizer": "HuggingFaceTB/SmolLM2-360M-Instruct",
    "lm_eos_token_id": 0,
    "mp_pixel_shuffle_factor": 2,
    "mp_image_token_length": 64,
    "vlm_extra_tokens": {
        "image_token": "<|image|>"
    },
    "vlm_load_backbone_weights": true,
    "vlm_checkpoint_path": "checkpoints",
    "hf_repo_name": "nanoVLM"
}