Update README.md
Browse files
README.md
CHANGED
|
@@ -52,7 +52,6 @@ exclude_layers = ["lm_head", "*linear_attn.in_proj_ba", "*linear_attn.in_proj_qk
|
|
| 52 |
model = AutoModelForCausalLM.from_pretrained(ckpt_path, torch_dtype="auto", device_map="auto")
|
| 53 |
model.eval()
|
| 54 |
tokenizer = AutoTokenizer.from_pretrained(ckpt_path, trust_remote_code=True)
|
| 55 |
-
processor = AutoProcessor.from_pretrained(ckpt_path, trust_remote_code=True)
|
| 56 |
|
| 57 |
# Get quant config from template
|
| 58 |
template = LLMTemplate.get(model.config.model_type)
|
|
@@ -66,7 +65,6 @@ model = quantizer.freeze(model)
|
|
| 66 |
# Export hf_format
|
| 67 |
export_safetensors(model, output_dir, custom_mode="quark")
|
| 68 |
tokenizer.save_pretrained(output_dir)
|
| 69 |
-
processor.save_pretrained(output_dir)
|
| 70 |
|
| 71 |
# Evaluate PPL (optional)
|
| 72 |
testdata = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
|
|
|
|
| 52 |
model = AutoModelForCausalLM.from_pretrained(ckpt_path, torch_dtype="auto", device_map="auto")
|
| 53 |
model.eval()
|
| 54 |
tokenizer = AutoTokenizer.from_pretrained(ckpt_path, trust_remote_code=True)
|
|
|
|
| 55 |
|
| 56 |
# Get quant config from template
|
| 57 |
template = LLMTemplate.get(model.config.model_type)
|
|
|
|
| 65 |
# Export hf_format
|
| 66 |
export_safetensors(model, output_dir, custom_mode="quark")
|
| 67 |
tokenizer.save_pretrained(output_dir)
|
|
|
|
| 68 |
|
| 69 |
# Evaluate PPL (optional)
|
| 70 |
testdata = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
|