Update app.py
Browse files
app.py
CHANGED
|
@@ -21,6 +21,7 @@ base = AutoModelForCausalLM.from_pretrained(
|
|
| 21 |
base_model,
|
| 22 |
load_in_8bit=True,
|
| 23 |
device_map="auto",
|
|
|
|
| 24 |
use_auth_token="hf_" +hf_token
|
| 25 |
)
|
| 26 |
|
|
|
|
| 21 |
base_model,
|
| 22 |
load_in_8bit=True,
|
| 23 |
device_map="auto",
|
| 24 |
+
llm_int8_enable_fp32_cpu_offload=True, # offload layers to CPU if GPU full
|
| 25 |
use_auth_token="hf_" +hf_token
|
| 26 |
)
|
| 27 |
|