| { |
| "source_model": "moonshotai/Kimi-K2.5", |
| "component": "vision_tower + mm_projector", |
| "description": "Vision-only weights extracted from Kimi-K2.5 for use with MLX.", |
| "vision_config": { |
| "_attn_implementation": "flash_attention_2", |
| "init_pos_emb_height": 64, |
| "init_pos_emb_time": 4, |
| "init_pos_emb_width": 64, |
| "merge_kernel_size": [ |
| 2, |
| 2 |
| ], |
| "merge_type": "sd2_tpool", |
| "mm_hidden_size": 1152, |
| "mm_projector_type": "patchmerger", |
| "patch_size": 14, |
| "pos_emb_type": "divided_fixed", |
| "projector_hidden_act": "gelu", |
| "projector_ln_eps": 1e-05, |
| "text_hidden_size": 7168, |
| "video_attn_type": "spatial_temporal", |
| "vt_hidden_size": 1152, |
| "vt_intermediate_size": 4304, |
| "vt_num_attention_heads": 16, |
| "vt_num_hidden_layers": 27 |
| }, |
| "projector": { |
| "type": "PatchMergerMLP", |
| "input_dim": 4608, |
| "hidden_dim": 4608, |
| "output_dim": 7168, |
| "pre_norm_eps": 1e-05 |
| }, |
| "num_tensors": 335, |
| "original_dtype": "bfloat16", |
| "media_placeholder_token_id": 163605 |
| } |