{ "model_type": "prefix_smolvlm_distill", "distill_cfg": { "vit_hidden_dim": 1152, "vit_inter_dim": 4304, "vit_patch_size": 14, "vit_img_size": 384, "vit_n_heads": 16, "vit_dropout": 0.0, "vit_n_blocks": 27, "vit_ln_eps": 1e-06, "vit_cls_flag": false, "vit_model_type": "HuggingFaceTB/SmolVLM2-2.2B-Instruct", "lm_hidden_dim": 2048, "lm_inter_dim": 8192, "lm_rms_eps": 1e-05, "lm_re_base": 130000, "lm_max_position_embeddings": 8192, "lm_base_vocab_size": 49280, "extra_token_amount": 0, "lm_vocab_size": 49280, "lm_n_heads": 32, "lm_n_kv_heads": 32, "lm_dropout": 0.0, "lm_n_blocks": 24, "lm_attn_scaling": 1.0, "lm_pad_aware_rope": true, "lm_max_length": 2048, "lm_use_tokens": false, "lm_tie_weights": true, "lm_model_type": "HuggingFaceTB/SmolLM2-1.7B-Instruct", "lm_tokenizer": "HuggingFaceTB/SmolVLM2-2.2B-Instruct", "lm_chat_template": null, "mp_pixel_shuffle_factor": 3, "mp_image_token_length": 81, "max_img_size": 384, "resize_to_max_side_len": false, "vlm_extra_tokens": null, "vlm_load_backbone_weights": true, "vlm_checkpoint_path": null, "smolvlm_model_id": "HuggingFaceTB/SmolVLM2-2.2B-Instruct", "processor_model_id": "HuggingFaceTB/SmolVLM2-2.2B-Instruct", "teacher_lm_model_id": "HuggingFaceTB/SmolLM2-1.7B-Instruct", "resume_student_from_model_id": "HuggingFaceTB/SmolVLM2-2.2B-Instruct", "transport_mode": "full", "use_kv_bridge": false, "kv_bridge_mode": "affine", "kv_bridge_affine_stack_depth": 2, "kv_bridge_adapter_expansion_factor": 1.0, "kv_bridge_use_gate": false, "distill_temperature": 2.0, "distill_alpha": 0.5, "distill_skip_sources": [ "chart2text", "chartqa", "docvqa", "infographic_vqa", "ocrvqa", "textcaps", "textvqa", "vistext", "visualmrc" ] } }