Spaces:
Paused
Paused
fix: load 8-bit model
Browse files- app.py +4 -6
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -13,16 +13,14 @@ tokenizer = AutoTokenizer.from_pretrained(
|
|
| 13 |
use_auth_token=auth_token if auth_token else True,
|
| 14 |
)
|
| 15 |
model = AutoModelForCausalLM.from_pretrained(
|
| 16 |
-
"CarperAI/vicuna-13b-fine-tuned-rlhf-
|
| 17 |
-
torch_dtype=torch.float16,
|
| 18 |
-
device_map="auto",
|
| 19 |
-
offload_folder="./offload",
|
| 20 |
-
low_cpu_mem_usage=True, # Not required for demo but leave for now
|
| 21 |
use_auth_token=auth_token if auth_token else True,
|
| 22 |
)
|
| 23 |
model.cuda()
|
|
|
|
|
|
|
| 24 |
max_context_length = model.config.max_position_embeddings
|
| 25 |
-
max_new_tokens =
|
| 26 |
|
| 27 |
|
| 28 |
prompt_template = Template("""\
|
|
|
|
| 13 |
use_auth_token=auth_token if auth_token else True,
|
| 14 |
)
|
| 15 |
model = AutoModelForCausalLM.from_pretrained(
|
| 16 |
+
"CarperAI/vicuna-13b-fine-tuned-rlhf-8bit",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
use_auth_token=auth_token if auth_token else True,
|
| 18 |
)
|
| 19 |
model.cuda()
|
| 20 |
+
|
| 21 |
+
|
| 22 |
max_context_length = model.config.max_position_embeddings
|
| 23 |
+
max_new_tokens = 512
|
| 24 |
|
| 25 |
|
| 26 |
prompt_template = Template("""\
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
accelerate
|
| 2 |
torch
|
|
|
|
| 3 |
transformers>=4.28.0,<4.29.0
|
|
|
|
| 1 |
accelerate
|
| 2 |
torch
|
| 3 |
+
bitsandbytes
|
| 4 |
transformers>=4.28.0,<4.29.0
|