Update README.md
Browse files
README.md
CHANGED
|
@@ -49,9 +49,9 @@ quant_scheme = "mxfp4"
|
|
| 49 |
exclude_layers = ["lm_head", "*linear_attn.in_proj_ba", "*linear_attn.in_proj_qkvz","*mlp.gate", "*mlp.shared_expert_gate", "*self_attn.k_proj", "*self_attn.q_proj", "*self_attn.v_proj"]
|
| 50 |
|
| 51 |
# Load model
|
|
|
|
| 52 |
model = AutoModelForCausalLM.from_pretrained(ckpt_path, torch_dtype="auto", device_map="auto")
|
| 53 |
model.eval()
|
| 54 |
-
tokenizer = AutoTokenizer.from_pretrained(ckpt_path, trust_remote_code=True)
|
| 55 |
|
| 56 |
# Get quant config from template
|
| 57 |
template = LLMTemplate.get(model.config.model_type)
|
|
|
|
| 49 |
exclude_layers = ["lm_head", "*linear_attn.in_proj_ba", "*linear_attn.in_proj_qkvz","*mlp.gate", "*mlp.shared_expert_gate", "*self_attn.k_proj", "*self_attn.q_proj", "*self_attn.v_proj"]
|
| 50 |
|
| 51 |
# Load model
|
| 52 |
+
tokenizer = AutoTokenizer.from_pretrained(ckpt_path, trust_remote_code=True)
|
| 53 |
model = AutoModelForCausalLM.from_pretrained(ckpt_path, torch_dtype="auto", device_map="auto")
|
| 54 |
model.eval()
|
|
|
|
| 55 |
|
| 56 |
# Get quant config from template
|
| 57 |
template = LLMTemplate.get(model.config.model_type)
|