File size: 1,948 Bytes
76e6861
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
{
  "model_type": "prefix_smolvlm_distill",
  "distill_cfg": {
    "vit_hidden_dim": 1152,
    "vit_inter_dim": 4304,
    "vit_patch_size": 14,
    "vit_img_size": 384,
    "vit_n_heads": 16,
    "vit_dropout": 0.0,
    "vit_n_blocks": 27,
    "vit_ln_eps": 1e-06,
    "vit_cls_flag": false,
    "vit_model_type": "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
    "lm_hidden_dim": 2048,
    "lm_inter_dim": 8192,
    "lm_rms_eps": 1e-05,
    "lm_re_base": 130000,
    "lm_max_position_embeddings": 8192,
    "lm_base_vocab_size": 49280,
    "extra_token_amount": 0,
    "lm_vocab_size": 49280,
    "lm_n_heads": 32,
    "lm_n_kv_heads": 32,
    "lm_dropout": 0.0,
    "lm_n_blocks": 24,
    "lm_attn_scaling": 1.0,
    "lm_pad_aware_rope": true,
    "lm_max_length": 2048,
    "lm_use_tokens": false,
    "lm_tie_weights": true,
    "lm_model_type": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
    "lm_tokenizer": "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
    "lm_chat_template": null,
    "mp_pixel_shuffle_factor": 3,
    "mp_image_token_length": 81,
    "max_img_size": 384,
    "resize_to_max_side_len": false,
    "vlm_extra_tokens": null,
    "vlm_load_backbone_weights": true,
    "vlm_checkpoint_path": null,
    "smolvlm_model_id": "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
    "processor_model_id": "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
    "teacher_lm_model_id": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
    "resume_student_from_model_id": "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
    "transport_mode": "full",
    "use_kv_bridge": false,
    "kv_bridge_mode": "affine",
    "kv_bridge_affine_stack_depth": 2,
    "kv_bridge_adapter_expansion_factor": 1.0,
    "kv_bridge_use_gate": false,
    "distill_temperature": 2.0,
    "distill_alpha": 0.5,
    "distill_skip_sources": [
      "chart2text",
      "chartqa",
      "docvqa",
      "infographic_vqa",
      "ocrvqa",
      "textcaps",
      "textvqa",
      "vistext",
      "visualmrc"
    ]
  }
}