Update README.md
Browse files
README.md
CHANGED
|
@@ -68,7 +68,7 @@ This model was created with [llm-compressor](https://github.com/vllm-project/llm
|
|
| 68 |
|
| 69 |
|
| 70 |
```bash
|
| 71 |
-
python quantize.py --model_path ibm-granite/granite-3.1-8b-base --quant_path "output_dir/granite-3.1-8b-base-quantized.w4a16" --calib_size 3072 --dampening_frac 0.1 --observer mse
|
| 72 |
```
|
| 73 |
|
| 74 |
|
|
@@ -87,7 +87,7 @@ parser.add_argument('--quant_path', type=str)
|
|
| 87 |
parser.add_argument('--calib_size', type=int, default=256)
|
| 88 |
parser.add_argument('--dampening_frac', type=float, default=0.1)
|
| 89 |
parser.add_argument('--observer', type=str, default="minmax")
|
| 90 |
-
parser.add_argument('--actorder', type=str, default="
|
| 91 |
|
| 92 |
args = parser.parse_args()
|
| 93 |
|
|
@@ -131,6 +131,7 @@ recipe = [
|
|
| 131 |
scheme="w4a16",
|
| 132 |
dampening_frac=args.dampening_frac,
|
| 133 |
observer=args.observer,
|
|
|
|
| 134 |
)
|
| 135 |
]
|
| 136 |
oneshot(
|
|
@@ -195,14 +196,14 @@ Here is the updated table where the column for the quantized model is kept, but
|
|
| 195 |
|
| 196 |
| Metric | ibm-granite/granite-3.1-8b-base | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
|
| 197 |
|-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
|
| 198 |
-
| ARC-Challenge (Acc-Norm, 25-shot) | 64.68 |
|
| 199 |
-
| GSM8K (Strict-Match, 5-shot) | 60.88 |
|
| 200 |
-
| HellaSwag (Acc-Norm, 10-shot) | 83.52 |
|
| 201 |
-
| MMLU (Acc, 5-shot) | 63.33 |
|
| 202 |
-
| TruthfulQA (MC2, 0-shot) | 51.33 |
|
| 203 |
-
| Winogrande (Acc, 5-shot) | 80.90 |
|
| 204 |
-
| **Average Score** | **67.44** | **
|
| 205 |
-
| **Recovery** | **100.00** | **
|
| 206 |
|
| 207 |
---
|
| 208 |
|
|
@@ -225,7 +226,7 @@ Here is the updated table where the column for the quantized model is kept, but
|
|
| 225 |
|
| 226 |
| Metric | ibm-granite/granite-3.1-8b-base | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
|
| 227 |
|-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
|
| 228 |
-
| HumanEval Pass@1 | 44.10 |
|
| 229 |
|
| 230 |
---
|
| 231 |
|
|
|
|
| 68 |
|
| 69 |
|
| 70 |
```bash
|
| 71 |
+
python quantize.py --model_path ibm-granite/granite-3.1-8b-base --quant_path "output_dir/granite-3.1-8b-base-quantized.w4a16" --calib_size 3072 --dampening_frac 0.1 --observer mse --actorder static
|
| 72 |
```
|
| 73 |
|
| 74 |
|
|
|
|
| 87 |
parser.add_argument('--calib_size', type=int, default=256)
|
| 88 |
parser.add_argument('--dampening_frac', type=float, default=0.1)
|
| 89 |
parser.add_argument('--observer', type=str, default="minmax")
|
| 90 |
+
parser.add_argument('--actorder', type=str, default="dynamic")
|
| 91 |
|
| 92 |
args = parser.parse_args()
|
| 93 |
|
|
|
|
| 131 |
scheme="w4a16",
|
| 132 |
dampening_frac=args.dampening_frac,
|
| 133 |
observer=args.observer,
|
| 134 |
+
actorder=args.actorder,
|
| 135 |
)
|
| 136 |
]
|
| 137 |
oneshot(
|
|
|
|
| 196 |
|
| 197 |
| Metric | ibm-granite/granite-3.1-8b-base | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
|
| 198 |
|-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
|
| 199 |
+
| ARC-Challenge (Acc-Norm, 25-shot) | 64.68 | 62.37 |
|
| 200 |
+
| GSM8K (Strict-Match, 5-shot) | 60.88 | 54.89 |
|
| 201 |
+
| HellaSwag (Acc-Norm, 10-shot) | 83.52 | 82.53 |
|
| 202 |
+
| MMLU (Acc, 5-shot) | 63.33 | 62.78 |
|
| 203 |
+
| TruthfulQA (MC2, 0-shot) | 51.33 | 51.30 |
|
| 204 |
+
| Winogrande (Acc, 5-shot) | 80.90 | 79.24 |
|
| 205 |
+
| **Average Score** | **67.44** | **65.52** |
|
| 206 |
+
| **Recovery** | **100.00** | **97.15** |
|
| 207 |
|
| 208 |
---
|
| 209 |
|
|
|
|
| 226 |
|
| 227 |
| Metric | ibm-granite/granite-3.1-8b-base | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
|
| 228 |
|-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
|
| 229 |
+
| HumanEval Pass@1 | 44.10 | 40.70 |
|
| 230 |
|
| 231 |
---
|
| 232 |
|