Update README.md
Browse files
README.md
CHANGED
|
@@ -86,7 +86,7 @@ vLLM aslo supports OpenAI-compatible serving. See the [documentation](https://do
|
|
| 86 |
|
| 87 |
```python
|
| 88 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 89 |
-
from llmcompressor.modifiers.quantization import
|
| 90 |
from llmcompressor.transformers import oneshot
|
| 91 |
|
| 92 |
# Load model
|
|
@@ -111,7 +111,7 @@ vLLM aslo supports OpenAI-compatible serving. See the [documentation](https://do
|
|
| 111 |
ds = ds.map(preprocess_fn)
|
| 112 |
|
| 113 |
# Configure the quantization algorithm and scheme
|
| 114 |
-
recipe =
|
| 115 |
targets="Linear",
|
| 116 |
scheme="W4A16",
|
| 117 |
ignore=["lm_head"],
|
|
|
|
| 86 |
|
| 87 |
```python
|
| 88 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 89 |
+
from llmcompressor.modifiers.quantization import GPTQModifier
|
| 90 |
from llmcompressor.transformers import oneshot
|
| 91 |
|
| 92 |
# Load model
|
|
|
|
| 111 |
ds = ds.map(preprocess_fn)
|
| 112 |
|
| 113 |
# Configure the quantization algorithm and scheme
|
| 114 |
+
recipe = GPTQModifier(
|
| 115 |
targets="Linear",
|
| 116 |
scheme="W4A16",
|
| 117 |
ignore=["lm_head"],
|