Spaces:
Paused
Paused
Tao Wu commited on
Commit ·
28a9b71
1
Parent(s): 032427b
quantization
Browse files- app/embedding_setup.py +2 -1
app/embedding_setup.py
CHANGED
|
@@ -40,7 +40,7 @@ quantization_config = BitsAndBytesConfig(
|
|
| 40 |
bnb_4bit_quant_type="nf4"
|
| 41 |
)
|
| 42 |
|
| 43 |
-
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL,
|
| 44 |
|
| 45 |
|
| 46 |
first_token = 'First'
|
|
@@ -50,6 +50,7 @@ first_id = tokenizer.convert_tokens_to_ids(first_token)
|
|
| 50 |
second_id = tokenizer.convert_tokens_to_ids(second_token)
|
| 51 |
model = AutoModelForCausalLM.from_pretrained(
|
| 52 |
LLM_MODEL,
|
|
|
|
| 53 |
torch_dtype=torch.float16,
|
| 54 |
device_map="auto",
|
| 55 |
token=hf_auth,
|
|
|
|
| 40 |
bnb_4bit_quant_type="nf4"
|
| 41 |
)
|
| 42 |
|
| 43 |
+
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL, token=hf_auth)
|
| 44 |
|
| 45 |
|
| 46 |
first_token = 'First'
|
|
|
|
| 50 |
second_id = tokenizer.convert_tokens_to_ids(second_token)
|
| 51 |
model = AutoModelForCausalLM.from_pretrained(
|
| 52 |
LLM_MODEL,
|
| 53 |
+
quantization_config=quantization_config,
|
| 54 |
torch_dtype=torch.float16,
|
| 55 |
device_map="auto",
|
| 56 |
token=hf_auth,
|