jdesiree commited on
Commit
1858424
·
verified ·
1 Parent(s): 05b0c1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -10
app.py CHANGED
@@ -219,17 +219,12 @@ class Qwen25SmallLLM:
219
  if use_4bit:
220
  quant_config = BitsAndBytesConfig(
221
  load_in_4bit=True,
222
- bnb_4bit_compute_dtype=torch.bfloat16,
223
  bnb_4bit_use_double_quant=True,
224
- bnb_4bit_quant_type="nf4"
225
- )
226
- logger.info("Using 4-bit quantization with BitsAndBytes")
227
- else:
228
- quant_config = BitsAndBytesConfig(
229
- load_in_8bit=True,
230
- llm_int8_enable_fp32_cpu_offload=True
231
- )
232
- logger.info("Using 8-bit quantization with BitsAndBytes")
233
 
234
  # Try quantized load
235
  self.model = AutoModelForCausalLM.from_pretrained(
 
219
  if use_4bit:
220
  quant_config = BitsAndBytesConfig(
221
  load_in_4bit=True,
222
+ bnb_4bit_compute_dtype=torch.float16,
223
  bnb_4bit_use_double_quant=True,
224
+ bnb_4bit_quant_type="nf4",
225
+ llm_int8_threshold=0.0,
226
+ llm_int8_skip_modules=["lm_head"]
227
+ )
 
 
 
 
 
228
 
229
  # Try quantized load
230
  self.model = AutoModelForCausalLM.from_pretrained(