Duplicate from aidiffuser/Kimi-K2.7-Code-vision

Browse files

Co-authored-by: aidiffuser <aidiffuser@users.noreply.huggingface.co>

Files changed (5) hide show

.gitattributes +35 -0
README.md +50 -0
config.json +38 -0
extract_vision_weights.py +60 -0
kimi_k27_vision.safetensors +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,50 @@

+---
+license: other
+license_name: modified-mit
+license_link: https://huggingface.co/moonshotai/Kimi-K2.7-Code/blob/main/LICENSE
+base_model: moonshotai/Kimi-K2.7-Code
+tags:
+- mlx
+- vision
+- kimi
+- exo
+---
+# Kimi-K2.7-Code-vision
+Vision-only weights (MoonViT tower + multimodal projector) extracted from
+[moonshotai/Kimi-K2.7-Code](https://huggingface.co/moonshotai/Kimi-K2.7-Code)
+for use with MLX-based inference stacks such as [exo](https://github.com/exo-explore/exo),
+in the same format as [exolabs/Kimi-K2.6-vision](https://huggingface.co/exolabs/Kimi-K2.6-vision).
+## Contents
+- `kimi_k27_vision.safetensors` — all 335 `vision_tower.*` and `mm_projector.*`
+  tensors from the official repo (shards 63–64), original bfloat16, unmodified.
+- `config.json` — vision config copied from the official `config.json`
+  (verified byte-identical to Kimi-K2.6's vision config: 27-layer MoonViT,
+  hidden 1152, patch 14, `sd2_tpool` merger, projector to 7168).
+- `extract_vision_weights.py` — the script used to produce this repo,
+  for reproducibility.
+## Usage with exo
+Add a model card for `moonshotai/Kimi-K2.7-Code` with:
+```toml
+capabilities = ["text", "thinking", "thinking_toggle", "vision"]
+[vision]
+image_token_id = 163605
+model_type = "kimi_vl"
+weights_repo = "aidiffuser/Kimi-K2.7-Code-vision"
+processor_repo = "moonshotai/Kimi-K2.7-Code"
+```
+Tested working: distributed (2× Mac Studio M3 Ultra, tensor parallelism) with
+the official INT4 text weights, image understanding confirmed.
+## License
+Same Modified MIT license as the source model; these are a subset of the
+original weights, unmodified. All credit to Moonshot AI.

config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+ "source_model": "moonshotai/Kimi-K2.7-Code",
+ "component": "vision_tower + mm_projector",
+ "description": "Vision-only weights extracted locally from Kimi-K2.7-Code for use with MLX/exo.",
+ "vision_config": {
+  "_attn_implementation": "flash_attention_2",
+  "init_pos_emb_height": 64,
+  "init_pos_emb_time": 4,
+  "init_pos_emb_width": 64,
+  "merge_kernel_size": [
+   2,
+   2
+  ],
+  "merge_type": "sd2_tpool",
+  "mm_hidden_size": 1152,
+  "mm_projector_type": "patchmerger",
+  "patch_size": 14,
+  "pos_emb_type": "divided_fixed",
+  "projector_hidden_act": "gelu",
+  "projector_ln_eps": 1e-05,
+  "text_hidden_size": 7168,
+  "video_attn_type": "spatial_temporal",
+  "vt_hidden_size": 1152,
+  "vt_intermediate_size": 4304,
+  "vt_num_attention_heads": 16,
+  "vt_num_hidden_layers": 27
+ },
+ "projector": {
+  "type": "PatchMergerMLP",
+  "input_dim": 4608,
+  "hidden_dim": 4608,
+  "output_dim": 7168,
+  "pre_norm_eps": 1e-05
+ },
+ "num_tensors": 335,
+ "original_dtype": "bfloat16",
+ "media_placeholder_token_id": 163605
+}

extract_vision_weights.py ADDED Viewed

	@@ -0,0 +1,60 @@

+#!/usr/bin/env python3
+"""Build a local Kimi-K2.7-Code vision tower for exo, mirroring exolabs--Kimi-K2.6-vision.
+Copies the 335 vision_tower.* / mm_projector.* tensors out of the official
+moonshotai/Kimi-K2.7-Code download (shards 63+64) into a single safetensors
+file, with a config.json adapted from the K2.6 tower. exo's downloader falls
+back to local files when the repo doesn't exist on HF, so no upload is needed.
+Run with exo's venv python (has safetensors):
+  ~/exo-next/.venv/bin/python ~/.exo/extract-k27-vision-tower.py
+"""
+import json
+from pathlib import Path
+from safetensors import safe_open
+from safetensors.numpy import save_file
+MODELS = Path("/Volumes/LLM/exo-models")
+SRC = MODELS / "moonshotai--Kimi-K2.7-Code"
+K26_TOWER = MODELS / "exolabs--Kimi-K2.6-vision"
+DST = MODELS / "aidiffuser--Kimi-K2.7-Code-vision"
+SHARDS = ["model-00063-of-000064.safetensors", "model-00064-of-000064.safetensors"]
+PREFIXES = ("vision_tower.", "mm_projector.")
+for shard in SHARDS:
+    if not (SRC / shard).exists():
+        raise SystemExit(f"missing {shard} — download not finished yet")
+tensors = {}
+dtypes = set()
+for shard in SHARDS:
+    # framework="np" keeps bf16 unsupported; use torch-free path via mlx instead
+    with safe_open(str(SRC / shard), framework="pt") as f:
+        for key in f.keys():
+            if key.startswith(PREFIXES):
+                t = f.get_tensor(key)
+                dtypes.add(str(t.dtype))
+                tensors[key] = t
+print(f"extracted {len(tensors)} tensors, dtypes: {dtypes}")
+assert len(tensors) == 335, f"expected 335 tensors, got {len(tensors)}"
+DST.mkdir(exist_ok=True)
+# safetensors.numpy can't write bf16; go through torch's save_file instead
+from safetensors.torch import save_file as save_pt
+save_pt(tensors, str(DST / "kimi_k27_vision.safetensors"))
+cfg = json.loads((K26_TOWER / "config.json").read_text())
+cfg["source_model"] = "moonshotai/Kimi-K2.7-Code"
+cfg["description"] = (
+    "Vision-only weights extracted locally from Kimi-K2.7-Code for use with MLX/exo."
+)
+(DST / "config.json").write_text(json.dumps(cfg, indent=1) + "\n")
+size = (DST / "kimi_k27_vision.safetensors").stat().st_size
+print(f"wrote {DST} ({size / 1e9:.2f} GB) — done")

kimi_k27_vision.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aae39a3d166a65795fb62ad14c20f4b7fd840db209a9a016960afe1db02520bc
+size 942326328