Sahil Seemant commited on
Commit
309cfde
·
1 Parent(s): 402e3e2

Fix TypeError by using explicit BitsAndBytesConfig for quantization

Browse files
Files changed (1) hide show
  1. chat_gui.py +12 -3
chat_gui.py CHANGED
@@ -19,6 +19,7 @@ except (ImportError, ModuleNotFoundError):
19
  AutoModelForImageTextToText,
20
  AutoTokenizer,
21
  AutoProcessor,
 
22
  TextIteratorStreamer
23
  )
24
  from peft import PeftModel
@@ -310,12 +311,20 @@ if st.session_state.messages and st.session_state.messages[-1]["role"] == "user"
310
  trust_remote_code=True,
311
  use_fast=False
312
  )
313
- # Use 4-bit quantization if on low-memory cloud
 
 
 
 
 
 
 
 
 
314
  model = model_class.from_pretrained(
315
  conf["path"],
316
- torch_dtype=torch.float16,
317
  device_map="auto",
318
- load_in_4bit=True,
319
  token=hf_token,
320
  trust_remote_code=True
321
  )
 
19
  AutoModelForImageTextToText,
20
  AutoTokenizer,
21
  AutoProcessor,
22
+ BitsAndBytesConfig,
23
  TextIteratorStreamer
24
  )
25
  from peft import PeftModel
 
311
  trust_remote_code=True,
312
  use_fast=False
313
  )
314
+
315
+ # Use 4-bit quantization config (more stable than passing load_in_4bit directly)
316
+ quantization_config = BitsAndBytesConfig(
317
+ load_in_4bit=True,
318
+ bnb_4bit_compute_dtype=torch.float16,
319
+ bnb_4bit_quant_type="nf4",
320
+ bnb_4bit_use_double_quant=True,
321
+ )
322
+
323
+ # Load model with explicit quantization config
324
  model = model_class.from_pretrained(
325
  conf["path"],
 
326
  device_map="auto",
327
+ quantization_config=quantization_config,
328
  token=hf_token,
329
  trust_remote_code=True
330
  )