Spaces:
Runtime error
Runtime error
changes according to community
Browse files
app.py
CHANGED
|
@@ -25,10 +25,13 @@ LICENSE = """
|
|
| 25 |
if not torch.cuda.is_available():
|
| 26 |
DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
|
| 27 |
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
if torch.cuda.is_available():
|
| 30 |
model_id = "mistral-community/Mixtral-8x22B-v0.1-4bit"
|
| 31 |
-
model = AutoModelForCausalLM.from_pretrained(model_id,quantization_config =
|
| 32 |
device_map="cuda",
|
| 33 |
# torch_dtype=torch.float16,
|
| 34 |
# load_in_8bit=True,
|
|
|
|
| 25 |
if not torch.cuda.is_available():
|
| 26 |
DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
|
| 27 |
|
| 28 |
+
quantization_config = BitsAndBytesConfig(
|
| 29 |
+
load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, llm_int8_enable_fp32_cpu_offload=True
|
| 30 |
+
)
|
| 31 |
|
| 32 |
if torch.cuda.is_available():
|
| 33 |
model_id = "mistral-community/Mixtral-8x22B-v0.1-4bit"
|
| 34 |
+
model = AutoModelForCausalLM.from_pretrained(model_id,quantization_config = quantization_config,
|
| 35 |
device_map="cuda",
|
| 36 |
# torch_dtype=torch.float16,
|
| 37 |
# load_in_8bit=True,
|