Spaces:

willsh1997
/

milkless

Running on Zero

willsh1997 commited on Jun 27, 2025

Commit

6f71cef

1 Parent(s): 72779a2

remove quant, change to bfloat16

Files changed (1) hide show

milkless_gradio.py CHANGED Viewed

@@ -9,13 +9,14 @@ import gradio as gr
 # quantization_config = BitsAndBytesConfig(load_in_4bit=True)
 torch_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.mps.is_available() else "cpu")
-torch_dtype = torch.float16 if torch_device in ["cuda", "mps"] else torch.float32
 llama_model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct",
                                            #  quantization_config=quantization_config,
                                            torch_dtype=torch_dtype,
                                            device_map=torch_device,
-                                            load_in_4bit=True) #for puny devices like mine.
 llama_tokenizer=AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")

 # quantization_config = BitsAndBytesConfig(load_in_4bit=True)
 torch_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.mps.is_available() else "cpu")
+torch_dtype = torch.bfloat16 if torch_device in ["cuda", "mps"] else torch.float32
 llama_model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct",
                                            #  quantization_config=quantization_config,
                                            torch_dtype=torch_dtype,
                                            device_map=torch_device,
+                                            # load_in_4bit=True #for puny devices like mine.
+                                                )
 llama_tokenizer=AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")