Update README.md
Browse files
README.md
CHANGED
|
@@ -68,7 +68,7 @@ This model was created with [llm-compressor](https://github.com/vllm-project/llm
|
|
| 68 |
|
| 69 |
|
| 70 |
```bash
|
| 71 |
-
python quantize.py --model_path ibm-granite/granite-3.1-2b-instruct --quant_path "output_dir/granite-3.1-2b-instruct-quantized.w8a8" --calib_size
|
| 72 |
```
|
| 73 |
|
| 74 |
|
|
@@ -130,7 +130,7 @@ mappings=[
|
|
| 130 |
]
|
| 131 |
|
| 132 |
recipe = [
|
| 133 |
-
SmoothQuantModifier(smoothing_strength=0.
|
| 134 |
GPTQModifier(
|
| 135 |
targets=["Linear"],
|
| 136 |
ignore=["lm_head"],
|
|
|
|
| 68 |
|
| 69 |
|
| 70 |
```bash
|
| 71 |
+
python quantize.py --model_path ibm-granite/granite-3.1-2b-instruct --quant_path "output_dir/granite-3.1-2b-instruct-quantized.w8a8" --calib_size 2048 --dampening_frac 0.01 --observer mse
|
| 72 |
```
|
| 73 |
|
| 74 |
|
|
|
|
| 130 |
]
|
| 131 |
|
| 132 |
recipe = [
|
| 133 |
+
SmoothQuantModifier(smoothing_strength=0.7, ignore=ignore, mappings=mappings),
|
| 134 |
GPTQModifier(
|
| 135 |
targets=["Linear"],
|
| 136 |
ignore=["lm_head"],
|