linzhao-amd commited on
Commit
e24005b
·
verified ·
1 Parent(s): ff33cb4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -0
README.md CHANGED
@@ -36,6 +36,7 @@ cd Quark/examples/torch/language_modeling/llm_ptq/
36
  python3 quantize_quark.py --model_dir $MODEL_DIR \
37
  --quant_scheme w_mxfp4_a_mxfp4 \
38
  --group_size 32 \
 
39
  --num_calib_data 128 \
40
  --exclude_layers "lm_head" \
41
  --multi_device \
 
36
  python3 quantize_quark.py --model_dir $MODEL_DIR \
37
  --quant_scheme w_mxfp4_a_mxfp4 \
38
  --group_size 32 \
39
+ --kv_cache_dtype fp8 \
40
  --num_calib_data 128 \
41
  --exclude_layers "lm_head" \
42
  --multi_device \