amd
/

Qwen3-Coder-Next-MXFP4

8-bit precision

Model card Files Files and versions

linzhao-amd commited on Feb 3

Commit

3d0095c

·

verified ·

1 Parent(s): c2b793b

Update README.md

Files changed (1) hide show

README.md +1 -1

README.md CHANGED Viewed

@@ -49,9 +49,9 @@ quant_scheme = "mxfp4"
 exclude_layers = ["lm_head", "*linear_attn.in_proj_ba", "*linear_attn.in_proj_qkvz","*mlp.gate", "*mlp.shared_expert_gate", "*self_attn.k_proj", "*self_attn.q_proj", "*self_attn.v_proj"]
 # Load model
 model = AutoModelForCausalLM.from_pretrained(ckpt_path, torch_dtype="auto", device_map="auto")
 model.eval()
-tokenizer = AutoTokenizer.from_pretrained(ckpt_path, trust_remote_code=True)
 # Get quant config from template
 template = LLMTemplate.get(model.config.model_type)

 exclude_layers = ["lm_head", "*linear_attn.in_proj_ba", "*linear_attn.in_proj_qkvz","*mlp.gate", "*mlp.shared_expert_gate", "*self_attn.k_proj", "*self_attn.q_proj", "*self_attn.v_proj"]
 # Load model
+tokenizer = AutoTokenizer.from_pretrained(ckpt_path, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(ckpt_path, torch_dtype="auto", device_map="auto")
 model.eval()
 # Get quant config from template
 template = LLMTemplate.get(model.config.model_type)