Spaces:
Sleeping
Sleeping
Commit ·
253ae18
1
Parent(s): c4ac4dc
update
Browse files
examples/tutorials/dpo/ultrafeedback-dpo/requirements.txt
CHANGED
|
@@ -5,3 +5,4 @@ modelscope
|
|
| 5 |
datasets
|
| 6 |
trl
|
| 7 |
deepspeed
|
|
|
|
|
|
| 5 |
datasets
|
| 6 |
trl
|
| 7 |
deepspeed
|
| 8 |
+
bitsandbytes
|
examples/tutorials/dpo/ultrafeedback-dpo/step_2_train_dpo_model_single_gpu.py
CHANGED
|
@@ -118,14 +118,12 @@ def main():
|
|
| 118 |
|
| 119 |
os.environ["MODELSCOPE_CACHE"] = args.model_cache_dir
|
| 120 |
|
| 121 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 122 |
-
|
| 123 |
bnb_config = BitsAndBytesConfig(
|
| 124 |
-
load_in_4bit=True,
|
| 125 |
-
bnb_4bit_quant_type="nf4",
|
| 126 |
-
bnb_4bit_compute_dtype=torch.float16,
|
| 127 |
-
bnb_4bit_use_double_quant=True,
|
| 128 |
-
bnb_4bit_quant_storage=torch.uint8,
|
| 129 |
)
|
| 130 |
|
| 131 |
model = AutoModelForCausalLM.from_pretrained(
|
|
@@ -148,8 +146,8 @@ def main():
|
|
| 148 |
ref_model = prepare_model_for_kbit_training(ref_model)
|
| 149 |
|
| 150 |
lora_config = LoraConfig(
|
| 151 |
-
r=16,
|
| 152 |
-
lora_alpha=32,
|
| 153 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
|
| 154 |
lora_dropout=0.1,
|
| 155 |
bias="none",
|
|
@@ -200,7 +198,7 @@ def main():
|
|
| 200 |
warmup_steps=100,
|
| 201 |
lr_scheduler_type="cosine",
|
| 202 |
fp16=True,
|
| 203 |
-
gradient_checkpointing=
|
| 204 |
optim="adamw_torch",
|
| 205 |
report_to="none",
|
| 206 |
max_length=1024 if debug_mode else 2048, # prompt + chosen 的最大长度
|
|
|
|
| 118 |
|
| 119 |
os.environ["MODELSCOPE_CACHE"] = args.model_cache_dir
|
| 120 |
|
|
|
|
|
|
|
| 121 |
bnb_config = BitsAndBytesConfig(
|
| 122 |
+
load_in_4bit=True,
|
| 123 |
+
bnb_4bit_quant_type="nf4",
|
| 124 |
+
bnb_4bit_compute_dtype=torch.float16,
|
| 125 |
+
bnb_4bit_use_double_quant=True,
|
| 126 |
+
bnb_4bit_quant_storage=torch.uint8,
|
| 127 |
)
|
| 128 |
|
| 129 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 146 |
ref_model = prepare_model_for_kbit_training(ref_model)
|
| 147 |
|
| 148 |
lora_config = LoraConfig(
|
| 149 |
+
r=16,
|
| 150 |
+
lora_alpha=32,
|
| 151 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
|
| 152 |
lora_dropout=0.1,
|
| 153 |
bias="none",
|
|
|
|
| 198 |
warmup_steps=100,
|
| 199 |
lr_scheduler_type="cosine",
|
| 200 |
fp16=True,
|
| 201 |
+
gradient_checkpointing=False, # 如果内存紧张,可以设为 True
|
| 202 |
optim="adamw_torch",
|
| 203 |
report_to="none",
|
| 204 |
max_length=1024 if debug_mode else 2048, # prompt + chosen 的最大长度
|