Add quantize
Browse files- app.py +11 -3
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import torch
|
|
| 4 |
import os
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
from huggingface_hub import login
|
|
|
|
| 7 |
|
| 8 |
load_dotenv()
|
| 9 |
|
|
@@ -13,14 +14,21 @@ login(hf_token)
|
|
| 13 |
|
| 14 |
# Configuration du modèle
|
| 15 |
model_path = "mistralai/Mistral-Large-Instruct-2411"
|
| 16 |
-
dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
|
| 17 |
|
| 18 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 20 |
model = AutoModelForCausalLM.from_pretrained(
|
| 21 |
model_path,
|
| 22 |
device_map="auto",
|
| 23 |
-
|
| 24 |
)
|
| 25 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 26 |
|
|
|
|
| 4 |
import os
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
from huggingface_hub import login
|
| 7 |
+
from transformers import BitsAndBytesConfig
|
| 8 |
|
| 9 |
load_dotenv()
|
| 10 |
|
|
|
|
| 14 |
|
| 15 |
# Configuration du modèle
|
| 16 |
model_path = "mistralai/Mistral-Large-Instruct-2411"
|
|
|
|
| 17 |
|
| 18 |
+
# Configuration de la quantification 4-bits
|
| 19 |
+
quantization_config = BitsAndBytesConfig(
|
| 20 |
+
load_in_4bit=True,
|
| 21 |
+
bnb_4bit_compute_dtype=torch.float16,
|
| 22 |
+
bnb_4bit_quant_type="nf4",
|
| 23 |
+
bnb_4bit_use_double_quant=True
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# Initialisation du modèle avec quantification
|
| 27 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 28 |
model = AutoModelForCausalLM.from_pretrained(
|
| 29 |
model_path,
|
| 30 |
device_map="auto",
|
| 31 |
+
quantization_config=quantization_config
|
| 32 |
)
|
| 33 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 34 |
|
requirements.txt
CHANGED
|
@@ -5,7 +5,7 @@ datasets
|
|
| 5 |
sentencepiece
|
| 6 |
tokenizers
|
| 7 |
gradio
|
| 8 |
-
bitsandbytes
|
| 9 |
openai
|
| 10 |
langchain
|
| 11 |
python-dotenv
|
|
|
|
| 5 |
sentencepiece
|
| 6 |
tokenizers
|
| 7 |
gradio
|
| 8 |
+
bitsandbytes>=0.41.1
|
| 9 |
openai
|
| 10 |
langchain
|
| 11 |
python-dotenv
|