Upload train_glm_qlora_v4.py with huggingface_hub
Browse files- train_glm_qlora_v4.py +5 -0
train_glm_qlora_v4.py
CHANGED
|
@@ -36,6 +36,10 @@ bnb_config = BitsAndBytesConfig(
|
|
| 36 |
llm_int8_enable_fp32_cpu_offload=True,
|
| 37 |
)
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
print("Loading model in 4-bit with CPU offload...")
|
| 40 |
model = AutoModelForCausalLM.from_pretrained(
|
| 41 |
"zai-org/GLM-4.7-Flash",
|
|
@@ -43,6 +47,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 43 |
trust_remote_code=True,
|
| 44 |
device_map="auto",
|
| 45 |
max_memory={0: "20GiB", "cpu": "30GiB"},
|
|
|
|
| 46 |
)
|
| 47 |
tokenizer = AutoTokenizer.from_pretrained("zai-org/GLM-4.7-Flash", trust_remote_code=True)
|
| 48 |
print("Model loaded.")
|
|
|
|
| 36 |
llm_int8_enable_fp32_cpu_offload=True,
|
| 37 |
)
|
| 38 |
|
| 39 |
+
import os
|
| 40 |
+
offload_dir = "/tmp/offload"
|
| 41 |
+
os.makedirs(offload_dir, exist_ok=True)
|
| 42 |
+
|
| 43 |
print("Loading model in 4-bit with CPU offload...")
|
| 44 |
model = AutoModelForCausalLM.from_pretrained(
|
| 45 |
"zai-org/GLM-4.7-Flash",
|
|
|
|
| 47 |
trust_remote_code=True,
|
| 48 |
device_map="auto",
|
| 49 |
max_memory={0: "20GiB", "cpu": "30GiB"},
|
| 50 |
+
offload_folder=offload_dir,
|
| 51 |
)
|
| 52 |
tokenizer = AutoTokenizer.from_pretrained("zai-org/GLM-4.7-Flash", trust_remote_code=True)
|
| 53 |
print("Model loaded.")
|