miyuki2026 commited on
Commit
b3a554a
·
1 Parent(s): d5a16ff
examples/tutorials/lora_transformers/requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ datasets
2
+ unsloth
3
+ modelscope
4
+
examples/tutorials/lora_transformers/step_2_train_model.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ import os
5
+ from pathlib import Path
6
+ import platform
7
+
8
+ # os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
9
+
10
+ if platform.system() in ("Windows", "Darwin"):
11
+ from project_settings import project_path
12
+ else:
13
+ project_path = os.path.abspath("../../../")
14
+ project_path = Path(project_path)
15
+
16
+ from peft import LoraConfig
17
+ # from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
18
+ from modelscope import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
19
+ from trl import SFTTrainer, SFTConfig
20
+ from datasets import load_dataset
21
+ import torch
22
+
23
+
24
+ def get_args():
25
+ parser = argparse.ArgumentParser()
26
+ parser.add_argument(
27
+ "--model_name",
28
+ default="unsloth/Qwen3-8B-unsloth-bnb-4bit",
29
+ type=str
30
+ )
31
+ parser.add_argument(
32
+ "--dataset_path",
33
+ default="miyuki2026/tutorials",
34
+ type=str
35
+ ),
36
+ parser.add_argument("--dataset_name", default=None, type=str),
37
+ parser.add_argument("--dataset_split", default=None, type=str),
38
+ parser.add_argument(
39
+ "--dataset_cache_dir",
40
+ default=(project_path / "hub_datasets").as_posix(),
41
+ type=str
42
+ ),
43
+ parser.add_argument("--dataset_streaming", default=None, type=str),
44
+ parser.add_argument("--valid_dataset_size", default=1000, type=str),
45
+ parser.add_argument("--shuffle_buffer_size", default=5000, type=str),
46
+
47
+ parser.add_argument(
48
+ "--num_workers",
49
+ default=None if platform.system() == "Windows" else os.cpu_count() // 2,
50
+ type=str
51
+ ),
52
+ args = parser.parse_args()
53
+ return args
54
+
55
+
56
+ def main():
57
+ args = get_args()
58
+
59
+ bnb_config = BitsAndBytesConfig(
60
+ load_in_4bit=True, # 启用4-bit量化
61
+ bnb_4bit_quant_type="nf4", # 量化类型
62
+ bnb_4bit_compute_dtype=torch.float16,
63
+ bnb_4bit_use_double_quant=True # 嵌套量化节省更多内存
64
+ )
65
+ model = AutoModelForCausalLM.from_pretrained(
66
+ pretrained_model_name_or_path=args.model_name,
67
+ quantization_config=bnb_config,
68
+ device_map="auto",
69
+ trust_remote_code=True
70
+ )
71
+ tokenizer = AutoTokenizer.from_pretrained(
72
+ pretrained_model_name_or_path=args.model_name,
73
+ trust_remote_code=True
74
+ )
75
+ peft_config = LoraConfig(
76
+ r=32, # LoRA秩
77
+ lora_alpha=32, # 缩放因子
78
+ target_modules=[
79
+ "q_proj", "k_proj", "v_proj", "o_proj",
80
+ "gate_proj", "up_proj", "down_proj"
81
+ ],
82
+ lora_dropout=0., # Dropout率
83
+ bias="none", # 偏置处理方式
84
+ task_type="CAUSAL_LM" # 任务类型
85
+ )
86
+ print(model)
87
+
88
+ def format_func(example):
89
+ formated_text = tokenizer.apply_chat_template(
90
+ example["conversation"],
91
+ tokenize=False, # 训练时部分词,true返回的是张量
92
+ add_generation_prompt=False, # 训练期间要关闭,如果是推理则设为True
93
+ )
94
+ return {"formated_text": formated_text}
95
+
96
+ dataset_dict = load_dataset(
97
+ path=args.dataset_path,
98
+ name=args.dataset_name,
99
+ data_dir="keywords",
100
+ # data_dir="psychology",
101
+ split=args.dataset_split,
102
+ cache_dir=args.dataset_cache_dir,
103
+ # num_proc=args.num_workers if not args.dataset_streaming else None,
104
+ streaming=args.dataset_streaming,
105
+ )
106
+ dataset = dataset_dict["train"]
107
+
108
+ if args.dataset_streaming:
109
+ valid_dataset = dataset.take(args.valid_dataset_size)
110
+ train_dataset = dataset.skip(args.valid_dataset_size)
111
+ train_dataset = train_dataset.shuffle(buffer_size=args.shuffle_buffer_size, seed=None)
112
+ else:
113
+ dataset = dataset.train_test_split(test_size=args.valid_dataset_size, seed=None)
114
+ train_dataset = dataset["train"]
115
+ valid_dataset = dataset["test"]
116
+
117
+ train_dataset = train_dataset.map(
118
+ format_func,
119
+ batched=False,
120
+ remove_columns=train_dataset.column_names,
121
+ )
122
+ print(train_dataset)
123
+
124
+ trainer = SFTTrainer(
125
+ model=model,
126
+ processing_class=tokenizer,
127
+ # tokenizer=tokenizer,
128
+ peft_config=peft_config,
129
+ train_dataset=train_dataset,
130
+ eval_dataset=None, # Can set up evaluation!
131
+ args=SFTConfig(
132
+ dataset_text_field="text",
133
+ per_device_train_batch_size=1,
134
+ gradient_accumulation_steps=2, # Use GA to mimic batch size!
135
+ warmup_steps=5,
136
+ num_train_epochs=1, # Set this for 1 full training run.
137
+ # max_steps = 30,
138
+ learning_rate=2e-4, # Reduce to 2e-5 for long training runs
139
+ logging_steps=1,
140
+ optim="adamw_8bit",
141
+ weight_decay=0.01,
142
+ lr_scheduler_type="linear",
143
+ seed=3407,
144
+ report_to="none", # Use this for WandB etc
145
+ ),
146
+ )
147
+
148
+ # 显示当前内存统计信息
149
+ gpu_stats = torch.cuda.get_device_properties(0)
150
+ start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
151
+ max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
152
+ print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
153
+ print(f"{start_gpu_memory} GB of memory reserved.")
154
+
155
+ trainer_stats = trainer.train()
156
+
157
+ # 显示最终内存和时间统计信息
158
+ used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
159
+ used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
160
+ used_percentage = round(used_memory / max_memory * 100, 3)
161
+ lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
162
+ print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
163
+ print(
164
+ f"{round(trainer_stats.metrics['train_runtime'] / 60, 2)} minutes used for training."
165
+ )
166
+ print(f"Peak reserved memory = {used_memory} GB.")
167
+ print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
168
+ print(f"Peak reserved memory % of max memory = {used_percentage} %.")
169
+ print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")
170
+
171
+ # ==================== 4.保存训练结果 ====================================
172
+ # 只保存lora适配器参数
173
+ trained_models_dir = project_path / "trained_models" / "Qwen3-8B-sft-lora-adapter-unsloth"
174
+ trained_models_dir.mkdir(parents=True, exist_ok=True)
175
+ model.save_pretrained(trained_models_dir.as_posix())
176
+ tokenizer.save_pretrained(trained_models_dir.as_posix())
177
+
178
+ # trained_models_dir = project_path / "trained_models" / "Qwen3-8B-sft-fp16"
179
+ # trained_models_dir.mkdir(parents=True, exist_ok=True)
180
+ # model.save_pretrained_merged(trained_models_dir.as_posix(), tokenizer, save_method="merged_16bit",)
181
+ # trained_models_dir = project_path / "trained_models" / "Qwen3-8B-sft-int4"
182
+ # trained_models_dir.mkdir(parents=True, exist_ok=True)
183
+ # model.save_pretrained_merged(trained_models_dir.as_posix(), tokenizer, save_method="merged_4bit",)
184
+ return
185
+
186
+
187
+ if __name__ == "__main__":
188
+ main()
examples/tutorials/lora_unsloth/step_2_train_model.py CHANGED
@@ -40,9 +40,8 @@ def get_args():
40
  type=str
41
  ),
42
  parser.add_argument("--dataset_streaming", default=None, type=str),
43
-
44
- parser.add_argument("--valid_dataset_size", default=None, type=str),
45
- parser.add_argument("--shuffle_buffer_size", default=None, type=str),
46
 
47
  parser.add_argument(
48
  "--num_workers",
 
40
  type=str
41
  ),
42
  parser.add_argument("--dataset_streaming", default=None, type=str),
43
+ parser.add_argument("--valid_dataset_size", default=1000, type=str),
44
+ parser.add_argument("--shuffle_buffer_size", default=5000, type=str),
 
45
 
46
  parser.add_argument(
47
  "--num_workers",
examples/tutorials/lora_unsloth/step_4_evaluation.py CHANGED
@@ -46,8 +46,8 @@ def get_args():
46
  type=str
47
  ),
48
  parser.add_argument("--dataset_streaming", default=None, type=str),
49
- parser.add_argument("--valid_dataset_size", default=None, type=str),
50
- parser.add_argument("--shuffle_buffer_size", default=None, type=str),
51
 
52
  parser.add_argument(
53
  "--max_new_tokens",
 
46
  type=str
47
  ),
48
  parser.add_argument("--dataset_streaming", default=None, type=str),
49
+ parser.add_argument("--valid_dataset_size", default=1000, type=str),
50
+ parser.add_argument("--shuffle_buffer_size", default=5000, type=str),
51
 
52
  parser.add_argument(
53
  "--max_new_tokens",