Spaces:

sksameermujahid
/

testingnewcode

Runtime error

sksameermujahid commited on Mar 25, 2025

Commit

15a0540

verified ·

1 Parent(s): e341d1c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -196,11 +196,13 @@ def load_tokenizer_and_model():
         print("Tokenizer loaded successfully.")
         print("Loading LLM model...")
-        # Load the base model
         base_model = AutoModelForCausalLM.from_pretrained(
             base_model_name,
             trust_remote_code=True,
-            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             device_map="auto"
         )
@@ -208,9 +210,9 @@ def load_tokenizer_and_model():
         model_llm = PeftModel.from_pretrained(
             base_model,
             model_dir,
-            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-            device_map="auto"
-        ).to(device)
         print("LLM model loaded successfully.")
         return tokenizer, model_llm

         print("Tokenizer loaded successfully.")
         print("Loading LLM model...")
+        # Load the base model with 4-bit quantization
         base_model = AutoModelForCausalLM.from_pretrained(
             base_model_name,
             trust_remote_code=True,
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.float16,
             device_map="auto"
         )
         model_llm = PeftModel.from_pretrained(
             base_model,
             model_dir,
+            device_map="auto",
+            is_trainable=False
+        )
         print("LLM model loaded successfully.")
         return tokenizer, model_llm