inference-optimization
/

test_qwen3_next_mtp

RelaxingSnorlax commited on 4 days ago

Commit

a79f558

verified ·

1 Parent(s): f9a674a

Upload config.json with huggingface_hub

Files changed (1) hide show

config.json CHANGED Viewed

@@ -1,9 +1,8 @@
 {
   "speculators_model_type": "mtp",
   "architectures": [
-    "FastMTPSpeculator"
   ],
-  "checkpoint_format": "qwen3_next",
   "num_speculative_steps": 3,
   "num_nextn_predict_layers": 1,
   "mtp_loss_step_weights": [
@@ -31,5 +30,21 @@
     "rope_theta": 10000000,
     "tie_word_embeddings": false,
     "attention_bias": true
   }
-}

 {
   "speculators_model_type": "mtp",
   "architectures": [
+    "MTPSpeculator"
   ],
   "num_speculative_steps": 3,
   "num_nextn_predict_layers": 1,
   "mtp_loss_step_weights": [
     "rope_theta": 10000000,
     "tie_word_embeddings": false,
     "attention_bias": true
+  },
+  "speculators_config": {
+    "algorithm": "mtp",
+    "proposal_methods": [
+      {
+        "proposal_type": "greedy",
+        "speculative_tokens": 3,
+        "verifier_accept_k": 1,
+        "accept_tolerance": 0.0
+      }
+    ],
+    "default_proposal_method": "greedy",
+    "verifier": {
+      "name_or_path": "Qwen/Qwen3-Next-80B-A3B-Instruct",
+      "architectures": ["Qwen3MoeForCausalLM"]
+    }
   }
+}