Update README.md
Browse files
README.md
CHANGED
|
@@ -135,7 +135,6 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
| 135 |
patch_linearlayers(model, patch_add_quant_config,
|
| 136 |
BaseQuantizeConfig(nbits=4, group_size=64, quant_scale=False, quant_zero=False, axis=1))
|
| 137 |
HQQLinear.set_backend(HQQBackend.PYTORCH)
|
| 138 |
-
model.eval();
|
| 139 |
|
| 140 |
from hqq.utils.patching import prepare_for_inference
|
| 141 |
prepare_for_inference(model, backend="torchao_int4")
|
|
|
|
| 135 |
patch_linearlayers(model, patch_add_quant_config,
|
| 136 |
BaseQuantizeConfig(nbits=4, group_size=64, quant_scale=False, quant_zero=False, axis=1))
|
| 137 |
HQQLinear.set_backend(HQQBackend.PYTORCH)
|
|
|
|
| 138 |
|
| 139 |
from hqq.utils.patching import prepare_for_inference
|
| 140 |
prepare_for_inference(model, backend="torchao_int4")
|