Add files using upload-large-folder tool

Files changed (2) hide show

llm-compressor/r1_1776_moe_w8a8_fp8dyn.py ADDED Viewed

+from transformers import AutoTokenizer, AutoModelForCausalLM
+from llmcompressor.transformers import oneshot
+from llmcompressor.modifiers.quantization import QuantizationModifier
+MODEL_ID = "perplexity-ai/r1-1776"
+model = AutoModelForCausalLM.from_pretrained(
+  MODEL_ID, device_map="auto", torch_dtype="auto", trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+# Configure the simple PTQ quantization
+recipe = QuantizationModifier(
+  targets="Linear", scheme="FP8_DYNAMIC", ignore=["lm_head","re:.*mlp.gate$"])
+# Apply the quantization algorithm.
+oneshot(model=model, recipe=recipe, trust_remote_code_model=True)
+# Save the model.
+SAVE_DIR = "output/" + MODEL_ID.split("/")[1] + "-FP8-Dynamic"
+model.save_pretrained(SAVE_DIR)
+tokenizer.save_pretrained(SAVE_DIR)

recipe.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 DEFAULT_stage:
   DEFAULT_modifiers:
     QuantizationModifier:
-      ignore: [lm_head]
       targets: [Linear]
       scheme: FP8_DYNAMIC

 DEFAULT_stage:
   DEFAULT_modifiers:
     QuantizationModifier:
+      ignore: [lm_head, 're:.*mlp.gate$']
       targets: [Linear]
       scheme: FP8_DYNAMIC