Update app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
| 7 |
import torch
|
| 8 |
import os
|
| 9 |
import bitnet
|
| 10 |
-
|
| 11 |
key = os.environ.get("key")
|
| 12 |
from huggingface_hub import login
|
| 13 |
login(key)
|
|
@@ -28,7 +28,8 @@ model = AutoModelForCausalLM.from_pretrained(model_id,
|
|
| 28 |
quantization_config=nf4_config,
|
| 29 |
# attn_implementation="flash_attention_2",
|
| 30 |
# torch_dtype = torch.bfloat16,
|
| 31 |
-
device_map="auto"
|
|
|
|
| 32 |
)
|
| 33 |
|
| 34 |
# replace_linears_in_hf(model)
|
|
|
|
| 7 |
import torch
|
| 8 |
import os
|
| 9 |
import bitnet
|
| 10 |
+
os.system('pip install mamba-ssm causal-conv1d>=1.2.0')
|
| 11 |
key = os.environ.get("key")
|
| 12 |
from huggingface_hub import login
|
| 13 |
login(key)
|
|
|
|
| 28 |
quantization_config=nf4_config,
|
| 29 |
# attn_implementation="flash_attention_2",
|
| 30 |
# torch_dtype = torch.bfloat16,
|
| 31 |
+
device_map="auto",
|
| 32 |
+
trust_remote_code=True
|
| 33 |
)
|
| 34 |
|
| 35 |
# replace_linears_in_hf(model)
|