Spaces:
Sleeping
Sleeping
Sahil Seemant commited on
Commit ·
309cfde
1
Parent(s): 402e3e2
Fix TypeError by using explicit BitsAndBytesConfig for quantization
Browse files- chat_gui.py +12 -3
chat_gui.py
CHANGED
|
@@ -19,6 +19,7 @@ except (ImportError, ModuleNotFoundError):
|
|
| 19 |
AutoModelForImageTextToText,
|
| 20 |
AutoTokenizer,
|
| 21 |
AutoProcessor,
|
|
|
|
| 22 |
TextIteratorStreamer
|
| 23 |
)
|
| 24 |
from peft import PeftModel
|
|
@@ -310,12 +311,20 @@ if st.session_state.messages and st.session_state.messages[-1]["role"] == "user"
|
|
| 310 |
trust_remote_code=True,
|
| 311 |
use_fast=False
|
| 312 |
)
|
| 313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
model = model_class.from_pretrained(
|
| 315 |
conf["path"],
|
| 316 |
-
torch_dtype=torch.float16,
|
| 317 |
device_map="auto",
|
| 318 |
-
|
| 319 |
token=hf_token,
|
| 320 |
trust_remote_code=True
|
| 321 |
)
|
|
|
|
| 19 |
AutoModelForImageTextToText,
|
| 20 |
AutoTokenizer,
|
| 21 |
AutoProcessor,
|
| 22 |
+
BitsAndBytesConfig,
|
| 23 |
TextIteratorStreamer
|
| 24 |
)
|
| 25 |
from peft import PeftModel
|
|
|
|
| 311 |
trust_remote_code=True,
|
| 312 |
use_fast=False
|
| 313 |
)
|
| 314 |
+
|
| 315 |
+
# Use 4-bit quantization config (more stable than passing load_in_4bit directly)
|
| 316 |
+
quantization_config = BitsAndBytesConfig(
|
| 317 |
+
load_in_4bit=True,
|
| 318 |
+
bnb_4bit_compute_dtype=torch.float16,
|
| 319 |
+
bnb_4bit_quant_type="nf4",
|
| 320 |
+
bnb_4bit_use_double_quant=True,
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
# Load model with explicit quantization config
|
| 324 |
model = model_class.from_pretrained(
|
| 325 |
conf["path"],
|
|
|
|
| 326 |
device_map="auto",
|
| 327 |
+
quantization_config=quantization_config,
|
| 328 |
token=hf_token,
|
| 329 |
trust_remote_code=True
|
| 330 |
)
|