k-l-lambda
/

Kimi-K2.5-MTP

+# Patch 1: speculative.py
+spec_path = "/usr/local/lib/python3.12/dist-packages/vllm/config/speculative.py"
+with open(spec_path) as f:
+    c = f.read()
+old = '"deepseek_v3", "deepseek_v32", "glm_moe_dsa"'
+new = '"deepseek_v3", "deepseek_v32", "glm_moe_dsa", "kimi_k25"'
+if old in c and "kimi_k25" not in c:
+    c = c.replace(old, new)
+    with open(spec_path, "w") as f: f.write(c)
+    print("speculative.py PATCHED")
+else:
+    print("speculative.py already patched")
+# Patch 2: kimi_k25.py
+k25_path = "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/kimi_k25.py"
+with open(k25_path) as f:
+    c = f.read()
+if '"model.layers.": "language_model.model.layers."' not in c:
+    c = c.replace(
+        '"language_model.layers.": "language_model.model.layers.",',
+        '"language_model.layers.": "language_model.model.layers.",\n            "model.layers.": "language_model.model.layers.",')
+    with open(k25_path, "w") as f: f.write(c)
+    print("kimi_k25.py PATCHED")
+else:
+    print("kimi_k25.py already patched")
+# Patch 3: deepseek_mtp.py - add text_config extraction everywhere config is read
+mtp_path = "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/deepseek_mtp.py"
+with open(mtp_path) as f:
+    c = f.read()
+# Replace ALL occurrences of "vllm_config.model_config.hf_config" in this file
+# to add text_config fallback
+if "text_config" not in c:
+    # Strategy: add a helper function at the top, then use it
+    import_marker = "from .utils import maybe_prefix"
+    helper = '''from .utils import maybe_prefix
+def _get_text_config(hf_config):
+    """Extract text_config from VLM configs (e.g. KimiK25Config)."""
+    return getattr(hf_config, 'text_config', hf_config)'''
+    c = c.replace(import_marker, helper)
+    # Replace all config reads
+    c = c.replace(
+        "config = vllm_config.model_config.hf_config\n        self.mtp_start_layer_idx",
+        "config = _get_text_config(vllm_config.model_config.hf_config)\n        self.mtp_start_layer_idx")
+    c = c.replace(
+        "self.config = vllm_config.model_config.hf_config\n        self.model = DeepSeekMultiTokenPredictor",
+        "self.config = _get_text_config(vllm_config.model_config.hf_config)\n        self.model = DeepSeekMultiTokenPredictor")
+    c = c.replace(
+        "config = vllm_config.speculative_config.draft_model_config.hf_config\n        self.config = config",
+        "config = _get_text_config(vllm_config.speculative_config.draft_model_config.hf_config)\n        self.config = config")
+    with open(mtp_path, "w") as f: f.write(c)
+    print("deepseek_mtp.py PATCHED (all config reads)")
+else:
+    print("deepseek_mtp.py already patched")