Spaces:
Sleeping
Sleeping
File size: 7,127 Bytes
4ea4da5 0bc360d 4ea4da5 415ae04 4ea4da5 cb8268d 4ea4da5 cb8268d 4ea4da5 6c8daa9 4ea4da5 0bc360d 4ea4da5 46f3343 4ea4da5 94d6fef 4ea4da5 cb8268d 4ea4da5 ee55dd5 cb8268d ee55dd5 4ea4da5 ac79c4a 4ea4da5 b4eb298 4ea4da5 6c8daa9 8c67d01 4ea4da5 6c8daa9 4ea4da5 ac79c4a ee55dd5 cb8268d 4ea4da5 b4eb298 4ea4da5 6c8daa9 ee55dd5 cb8268d 4ea4da5 b4eb298 4ea4da5 3b275e4 4ea4da5 b4eb298 4ea4da5 c36b946 4ea4da5 92cd747 4ea4da5 b4eb298 4ea4da5 ac79c4a cb8268d 4ea4da5 b4eb298 4ea4da5 b4eb298 4ea4da5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | #!/usr/bin/python3
# -*- coding: utf-8 -*-
"""
deepspeed --num_gpus=4 step_2_train_model.py
"""
import argparse
import os
from pathlib import Path
import platform
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
if platform.system() in ("Windows", "Darwin"):
from project_settings import project_path, temp_directory
else:
project_path = os.path.abspath("../../../")
project_path = Path(project_path)
temp_directory = Path("/root/autodl-tmp/OpenMiniMind/temp")
# from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from modelscope import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import SFTTrainer, SFTConfig
from datasets import load_dataset
import torch
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--local_rank", type=int, default=-1, help="Local rank passed from distributed launcher")
parser.add_argument(
"--model_name",
default="Qwen/Qwen3-8B",
type=str
),
parser.add_argument(
"--dataset_path",
default="miyuki2026/tutorials",
type=str
),
parser.add_argument("--dataset_name", default=None, type=str),
parser.add_argument("--dataset_split", default=None, type=str),
parser.add_argument(
"--dataset_cache_dir",
default=(temp_directory / "hub_datasets").as_posix(),
type=str
),
parser.add_argument(
"--model_cache_dir",
default=(temp_directory / "hub_models").as_posix(),
type=str
),
parser.add_argument("--dataset_streaming", default=None, type=str),
parser.add_argument("--valid_dataset_size", default=1000, type=str),
parser.add_argument("--shuffle_buffer_size", default=5000, type=str),
parser.add_argument(
"--num_workers",
default=None if platform.system() in ("Windows", "Darwin") else os.cpu_count() // 2,
type=int
),
args = parser.parse_args()
return args
def main():
args = get_args()
os.environ["MODELSCOPE_CACHE"] = args.model_cache_dir
model = AutoModelForCausalLM.from_pretrained(
pretrained_model_name_or_path=args.model_name,
quantization_config=None,
device_map="auto", # 启用多GPU拆分
trust_remote_code=True,
cache_dir=args.model_cache_dir,
)
if args.local_rank in (-1, 0):
print(model)
tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_name_or_path=args.model_name,
trust_remote_code=True,
cache_dir=args.model_cache_dir,
)
if args.local_rank in (-1, 0):
print(tokenizer)
def format_func(example):
formated_text = tokenizer.apply_chat_template(
example["conversations"],
tokenize=False, # 训练时部分词,true返回的是张量
add_generation_prompt=False, # 训练期间要关闭,如果是推理则设为True
)
return {"formated_text": formated_text}
dataset_dict = load_dataset(
path=args.dataset_path,
name=args.dataset_name,
data_dir="keywords",
# data_dir="psychology",
split=args.dataset_split,
cache_dir=args.dataset_cache_dir,
# num_proc=args.num_workers if not args.dataset_streaming else None,
streaming=args.dataset_streaming,
)
dataset = dataset_dict["train"]
if args.local_rank in (-1, 0):
print(dataset)
if args.dataset_streaming:
valid_dataset = dataset.take(args.valid_dataset_size)
train_dataset = dataset.skip(args.valid_dataset_size)
train_dataset = train_dataset.shuffle(buffer_size=args.shuffle_buffer_size, seed=None)
else:
dataset = dataset.train_test_split(test_size=args.valid_dataset_size, seed=None)
train_dataset = dataset["train"]
valid_dataset = dataset["test"]
# train_dataset = valid_dataset
train_dataset = train_dataset.map(
format_func,
batched=False,
remove_columns=train_dataset.column_names,
)
trainer = SFTTrainer(
model=model,
processing_class=tokenizer, # 新写法
train_dataset=train_dataset,
eval_dataset=None, # Can set up evaluation!
args=SFTConfig(
output_dir=(temp_directory / "trainer_output/Qwen3-8B-sft-deepspeed"), # 请替换为你想要的路径
dataset_text_field="formated_text",
deepspeed="./ds_config/deepspeed_stage_3_config.json", # 添加deepspeed配置文件
per_device_train_batch_size=2,
gradient_accumulation_steps=8,
warmup_steps=100,
num_train_epochs=1, # Set this for 1 full training run.
# max_steps = 30,
learning_rate=3e-5, # Reduce to 2e-5 for long training runs
logging_steps=1,
save_steps=10, # 每500步保存一次检查点
save_total_limit=2, # 最多只保留2个检查点,旧的自动清理
optim="adamw_8bit",
weight_decay=0,
lr_scheduler_type="constant_with_warmup",
seed=3407,
report_to="none", # Use this for WandB etc
),
)
# 显示当前内存统计信息
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
if args.local_rank in (-1, 0):
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")
trainer_stats = trainer.train()
# 显示最终内存和时间统计信息
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
if args.local_rank in (-1, 0):
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(
f"{round(trainer_stats.metrics['train_runtime'] / 60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")
if args.local_rank in (-1, 0):
trained_models_dir = temp_directory / "trained_models" / "Qwen3-8B-sft-deepspeed"
trained_models_dir.mkdir(parents=True, exist_ok=True)
trainer.model.save_pretrained(trained_models_dir.as_posix())
tokenizer.save_pretrained(trained_models_dir.as_posix())
return
if __name__ == "__main__":
main()
|