Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -219,17 +219,12 @@ class Qwen25SmallLLM:
|
|
| 219 |
if use_4bit:
|
| 220 |
quant_config = BitsAndBytesConfig(
|
| 221 |
load_in_4bit=True,
|
| 222 |
-
bnb_4bit_compute_dtype=torch.
|
| 223 |
bnb_4bit_use_double_quant=True,
|
| 224 |
-
bnb_4bit_quant_type="nf4"
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
quant_config = BitsAndBytesConfig(
|
| 229 |
-
load_in_8bit=True,
|
| 230 |
-
llm_int8_enable_fp32_cpu_offload=True
|
| 231 |
-
)
|
| 232 |
-
logger.info("Using 8-bit quantization with BitsAndBytes")
|
| 233 |
|
| 234 |
# Try quantized load
|
| 235 |
self.model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 219 |
if use_4bit:
|
| 220 |
quant_config = BitsAndBytesConfig(
|
| 221 |
load_in_4bit=True,
|
| 222 |
+
bnb_4bit_compute_dtype=torch.float16,
|
| 223 |
bnb_4bit_use_double_quant=True,
|
| 224 |
+
bnb_4bit_quant_type="nf4",
|
| 225 |
+
llm_int8_threshold=0.0,
|
| 226 |
+
llm_int8_skip_modules=["lm_head"]
|
| 227 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
# Try quantized load
|
| 230 |
self.model = AutoModelForCausalLM.from_pretrained(
|