emilss commited on
Commit
cb1490c
·
1 Parent(s): f59fbaa

Add readme

Browse files
Files changed (1) hide show
  1. README.md +50 -0
README.md CHANGED
@@ -1,3 +1,53 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+
5
+ # Magistral-Small-2506-FP8-dynamic
6
+
7
+ Quantized version of [Magistral-Small-2506](https://huggingface.co/mistralai/Magistral-Small-2506).
8
+
9
+ ## Creation
10
+
11
+ This model was created with [llm-compressor](https://github.com/vllm-project/llm-compressor) by running the code snippet
12
+ below.
13
+
14
+ ```python
15
+ import argparse
16
+ import os
17
+
18
+ from llmcompressor.modifiers.quantization import QuantizationModifier
19
+ from llmcompressor.transformers import oneshot
20
+ from transformers import AutoModelForCausalLM
21
+
22
+
23
+ def main():
24
+ parser = argparse.ArgumentParser(description='Quantize a transformer model to FP8')
25
+ parser.add_argument('--model_id', type=str, required=True,
26
+ help='The model ID from HuggingFace (e.g., "meta-llama/Meta-Llama-3-8B-Instruct")')
27
+ parser.add_argument('--save_path', type=str, default='.',
28
+ help='Custom path to save the quantized model. If not provided, will use model_name-FP8-dynamic')
29
+ args = parser.parse_args()
30
+
31
+ # Load model
32
+ model = AutoModelForCausalLM.from_pretrained(
33
+ args.model_id, device_map="auto", torch_dtype="auto", trust_remote_code=True,
34
+ )
35
+
36
+ # Configure the quantization algorithm and scheme
37
+ recipe = QuantizationModifier(
38
+ targets="Linear", scheme="FP8_DYNAMIC", ignore=["lm_head"]
39
+ )
40
+
41
+ # Apply quantization
42
+ oneshot(model=model, recipe=recipe)
43
+
44
+ save_path = os.path.join(args.save_path, args.model_id.split("/")[1] + "-FP8-dynamic")
45
+ os.makedirs(save_path, exist_ok=True)
46
+
47
+ # Save to disk in compressed-tensors format
48
+ model.save_pretrained(save_path)
49
+ print(f"Model and tokenizer saved to: {save_path}")
50
+
51
+ if __name__ == "__main__":
52
+ main()
53
+ ```