Update README.md
Browse files
README.md
CHANGED
|
@@ -30,9 +30,8 @@ pip install -vvv --no-build-isolation -e .
|
|
| 30 |
|
| 31 |
### Sample code
|
| 32 |
```
|
| 33 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
| 34 |
-
from optimum.gptq import GPTQQuantizer, load_quantized_model
|
| 35 |
import torch
|
|
|
|
| 36 |
model_name = "webbigdata/C3TR-Adapter_gptq"
|
| 37 |
|
| 38 |
# thanks to tk-master
|
|
@@ -41,9 +40,11 @@ config = AutoConfig.from_pretrained(model_name)
|
|
| 41 |
config.quantization_config["use_exllama"] = False
|
| 42 |
config.quantization_config["exllama_config"] = {"version":2}
|
| 43 |
|
|
|
|
| 44 |
max_memory={0: "12GiB", "cpu": "10GiB"}
|
|
|
|
| 45 |
quantized_model = AutoModelForCausalLM.from_pretrained(model_name
|
| 46 |
-
, torch_dtype=torch.bfloat16 #
|
| 47 |
, device_map="auto", max_memory=max_memory
|
| 48 |
, config=config)
|
| 49 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
|
| 30 |
|
| 31 |
### Sample code
|
| 32 |
```
|
|
|
|
|
|
|
| 33 |
import torch
|
| 34 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
| 35 |
model_name = "webbigdata/C3TR-Adapter_gptq"
|
| 36 |
|
| 37 |
# thanks to tk-master
|
|
|
|
| 40 |
config.quantization_config["use_exllama"] = False
|
| 41 |
config.quantization_config["exllama_config"] = {"version":2}
|
| 42 |
|
| 43 |
+
# adust your gpu memory size. 0 means first gpu.
|
| 44 |
max_memory={0: "12GiB", "cpu": "10GiB"}
|
| 45 |
+
|
| 46 |
quantized_model = AutoModelForCausalLM.from_pretrained(model_name
|
| 47 |
+
, torch_dtype=torch.bfloat16 # change torch.float16 if you use free colab or something not support bfloat16.
|
| 48 |
, device_map="auto", max_memory=max_memory
|
| 49 |
, config=config)
|
| 50 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|