File size: 1,948 Bytes
76e6861 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | {
"model_type": "prefix_smolvlm_distill",
"distill_cfg": {
"vit_hidden_dim": 1152,
"vit_inter_dim": 4304,
"vit_patch_size": 14,
"vit_img_size": 384,
"vit_n_heads": 16,
"vit_dropout": 0.0,
"vit_n_blocks": 27,
"vit_ln_eps": 1e-06,
"vit_cls_flag": false,
"vit_model_type": "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
"lm_hidden_dim": 2048,
"lm_inter_dim": 8192,
"lm_rms_eps": 1e-05,
"lm_re_base": 130000,
"lm_max_position_embeddings": 8192,
"lm_base_vocab_size": 49280,
"extra_token_amount": 0,
"lm_vocab_size": 49280,
"lm_n_heads": 32,
"lm_n_kv_heads": 32,
"lm_dropout": 0.0,
"lm_n_blocks": 24,
"lm_attn_scaling": 1.0,
"lm_pad_aware_rope": true,
"lm_max_length": 2048,
"lm_use_tokens": false,
"lm_tie_weights": true,
"lm_model_type": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
"lm_tokenizer": "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
"lm_chat_template": null,
"mp_pixel_shuffle_factor": 3,
"mp_image_token_length": 81,
"max_img_size": 384,
"resize_to_max_side_len": false,
"vlm_extra_tokens": null,
"vlm_load_backbone_weights": true,
"vlm_checkpoint_path": null,
"smolvlm_model_id": "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
"processor_model_id": "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
"teacher_lm_model_id": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
"resume_student_from_model_id": "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
"transport_mode": "full",
"use_kv_bridge": false,
"kv_bridge_mode": "affine",
"kv_bridge_affine_stack_depth": 2,
"kv_bridge_adapter_expansion_factor": 1.0,
"kv_bridge_use_gate": false,
"distill_temperature": 2.0,
"distill_alpha": 0.5,
"distill_skip_sources": [
"chart2text",
"chartqa",
"docvqa",
"infographic_vqa",
"ocrvqa",
"textcaps",
"textvqa",
"vistext",
"visualmrc"
]
}
} |