Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -196,11 +196,13 @@ def load_tokenizer_and_model():
|
|
| 196 |
print("Tokenizer loaded successfully.")
|
| 197 |
|
| 198 |
print("Loading LLM model...")
|
| 199 |
-
# Load the base model
|
| 200 |
base_model = AutoModelForCausalLM.from_pretrained(
|
| 201 |
base_model_name,
|
| 202 |
trust_remote_code=True,
|
| 203 |
-
|
|
|
|
|
|
|
| 204 |
device_map="auto"
|
| 205 |
)
|
| 206 |
|
|
@@ -208,9 +210,9 @@ def load_tokenizer_and_model():
|
|
| 208 |
model_llm = PeftModel.from_pretrained(
|
| 209 |
base_model,
|
| 210 |
model_dir,
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
)
|
| 214 |
|
| 215 |
print("LLM model loaded successfully.")
|
| 216 |
return tokenizer, model_llm
|
|
|
|
| 196 |
print("Tokenizer loaded successfully.")
|
| 197 |
|
| 198 |
print("Loading LLM model...")
|
| 199 |
+
# Load the base model with 4-bit quantization
|
| 200 |
base_model = AutoModelForCausalLM.from_pretrained(
|
| 201 |
base_model_name,
|
| 202 |
trust_remote_code=True,
|
| 203 |
+
load_in_4bit=True,
|
| 204 |
+
bnb_4bit_quant_type="nf4",
|
| 205 |
+
bnb_4bit_compute_dtype=torch.float16,
|
| 206 |
device_map="auto"
|
| 207 |
)
|
| 208 |
|
|
|
|
| 210 |
model_llm = PeftModel.from_pretrained(
|
| 211 |
base_model,
|
| 212 |
model_dir,
|
| 213 |
+
device_map="auto",
|
| 214 |
+
is_trainable=False
|
| 215 |
+
)
|
| 216 |
|
| 217 |
print("LLM model loaded successfully.")
|
| 218 |
return tokenizer, model_llm
|