amd
/

Qwen3-Coder-Next-MXFP4

8-bit precision

Model card Files Files and versions

jiaxwang commited on Feb 3

Commit

c2b793b

·

verified ·

1 Parent(s): 8907e49

Update README.md

Files changed (1) hide show

README.md +0 -2

README.md CHANGED Viewed

@@ -52,7 +52,6 @@ exclude_layers = ["lm_head", "*linear_attn.in_proj_ba", "*linear_attn.in_proj_qk
 model = AutoModelForCausalLM.from_pretrained(ckpt_path, torch_dtype="auto", device_map="auto")
 model.eval()
 tokenizer = AutoTokenizer.from_pretrained(ckpt_path, trust_remote_code=True)
-processor = AutoProcessor.from_pretrained(ckpt_path, trust_remote_code=True)
 # Get quant config from template
 template = LLMTemplate.get(model.config.model_type)
@@ -66,7 +65,6 @@ model = quantizer.freeze(model)
 # Export hf_format
 export_safetensors(model, output_dir, custom_mode="quark")
 tokenizer.save_pretrained(output_dir)
-processor.save_pretrained(output_dir)
 # Evaluate PPL (optional)
 testdata = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")

 model = AutoModelForCausalLM.from_pretrained(ckpt_path, torch_dtype="auto", device_map="auto")
 model.eval()
 tokenizer = AutoTokenizer.from_pretrained(ckpt_path, trust_remote_code=True)
 # Get quant config from template
 template = LLMTemplate.get(model.config.model_type)
 # Export hf_format
 export_safetensors(model, output_dir, custom_mode="quark")
 tokenizer.save_pretrained(output_dir)
 # Evaluate PPL (optional)
 testdata = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")