| { |
| "model_type": "prefix_smolvlm_distill", |
| "distill_cfg": { |
| "vit_hidden_dim": 1152, |
| "vit_inter_dim": 4304, |
| "vit_patch_size": 14, |
| "vit_img_size": 384, |
| "vit_n_heads": 16, |
| "vit_dropout": 0.0, |
| "vit_n_blocks": 27, |
| "vit_ln_eps": 1e-06, |
| "vit_cls_flag": false, |
| "vit_model_type": "HuggingFaceTB/SmolVLM2-2.2B-Instruct", |
| "lm_hidden_dim": 2048, |
| "lm_inter_dim": 8192, |
| "lm_rms_eps": 1e-05, |
| "lm_re_base": 130000, |
| "lm_max_position_embeddings": 8192, |
| "lm_base_vocab_size": 49280, |
| "extra_token_amount": 0, |
| "lm_vocab_size": 49280, |
| "lm_n_heads": 32, |
| "lm_n_kv_heads": 32, |
| "lm_dropout": 0.0, |
| "lm_n_blocks": 24, |
| "lm_attn_scaling": 1.0, |
| "lm_pad_aware_rope": true, |
| "lm_max_length": 2048, |
| "lm_use_tokens": false, |
| "lm_tie_weights": true, |
| "lm_model_type": "HuggingFaceTB/SmolLM2-1.7B-Instruct", |
| "lm_tokenizer": "HuggingFaceTB/SmolVLM2-2.2B-Instruct", |
| "lm_chat_template": null, |
| "mp_pixel_shuffle_factor": 3, |
| "mp_image_token_length": 81, |
| "max_img_size": 384, |
| "resize_to_max_side_len": false, |
| "vlm_extra_tokens": null, |
| "vlm_load_backbone_weights": true, |
| "vlm_checkpoint_path": null, |
| "smolvlm_model_id": "HuggingFaceTB/SmolVLM2-2.2B-Instruct", |
| "processor_model_id": "HuggingFaceTB/SmolVLM2-2.2B-Instruct", |
| "teacher_lm_model_id": "HuggingFaceTB/SmolLM2-1.7B-Instruct", |
| "resume_student_from_model_id": "HuggingFaceTB/SmolVLM2-2.2B-Instruct", |
| "transport_mode": "full", |
| "use_kv_bridge": false, |
| "kv_bridge_mode": "affine", |
| "kv_bridge_affine_stack_depth": 2, |
| "kv_bridge_adapter_expansion_factor": 1.0, |
| "kv_bridge_use_gate": false, |
| "distill_temperature": 2.0, |
| "distill_alpha": 0.5, |
| "distill_skip_sources": [ |
| "chart2text", |
| "chartqa", |
| "docvqa", |
| "infographic_vqa", |
| "ocrvqa", |
| "textcaps", |
| "textvqa", |
| "vistext", |
| "visualmrc" |
| ] |
| } |
| } |