Spaces:
Running on Zero
Running on Zero
Commit ·
6f71cef
1
Parent(s): 72779a2
remove quant, change to bfloat16
Browse files- milkless_gradio.py +3 -2
milkless_gradio.py
CHANGED
|
@@ -9,13 +9,14 @@ import gradio as gr
|
|
| 9 |
# quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
| 10 |
torch_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.mps.is_available() else "cpu")
|
| 11 |
|
| 12 |
-
torch_dtype = torch.
|
| 13 |
|
| 14 |
llama_model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct",
|
| 15 |
# quantization_config=quantization_config,
|
| 16 |
torch_dtype=torch_dtype,
|
| 17 |
device_map=torch_device,
|
| 18 |
-
load_in_4bit=True
|
|
|
|
| 19 |
|
| 20 |
llama_tokenizer=AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
|
| 21 |
|
|
|
|
| 9 |
# quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
| 10 |
torch_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.mps.is_available() else "cpu")
|
| 11 |
|
| 12 |
+
torch_dtype = torch.bfloat16 if torch_device in ["cuda", "mps"] else torch.float32
|
| 13 |
|
| 14 |
llama_model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct",
|
| 15 |
# quantization_config=quantization_config,
|
| 16 |
torch_dtype=torch_dtype,
|
| 17 |
device_map=torch_device,
|
| 18 |
+
# load_in_4bit=True #for puny devices like mine.
|
| 19 |
+
)
|
| 20 |
|
| 21 |
llama_tokenizer=AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
|
| 22 |
|