miyuki2026 commited on
Commit
4b924c5
·
1 Parent(s): 3b275e4
examples/tutorials/mix_lora_unsloth/step_2_train_model.py CHANGED
@@ -5,14 +5,14 @@ import os
5
  from pathlib import Path
6
  import platform
7
 
8
- # os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
9
  os.environ["UNSLOTH_USE_MODELSCOPE"] = "1"
10
 
11
  if platform.system() in ("Windows", "Darwin"):
12
- from project_settings import project_path
13
  else:
14
  project_path = os.path.abspath("../../../")
15
  project_path = Path(project_path)
 
16
 
17
  from unsloth import FastLanguageModel
18
  from trl import SFTTrainer, SFTConfig
@@ -36,14 +36,12 @@ def get_args():
36
  parser.add_argument("--dataset_split", default=None, type=str),
37
  parser.add_argument(
38
  "--dataset_cache_dir",
39
- # default=(project_path / "hub_datasets").as_posix(),
40
- default="/root/autodl-tmp/OpenMiniMind/hub_datasets",
41
  type=str
42
  ),
43
  parser.add_argument(
44
  "--model_cache_dir",
45
- # default=(project_path / "hub_models").as_posix(),
46
- default="/root/autodl-tmp/OpenMiniMind/hub_models",
47
  type=str
48
  ),
49
  parser.add_argument("--dataset_streaming", default=None, type=str),
@@ -179,15 +177,15 @@ def main():
179
  print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")
180
 
181
  # 只保存lora适配器参数
182
- trained_models_dir = project_path / "trained_models" / "Qwen3-8B-sft-lora-adapter-unsloth"
183
  trained_models_dir.mkdir(parents=True, exist_ok=True)
184
  model.save_pretrained(trained_models_dir.as_posix())
185
  tokenizer.save_pretrained(trained_models_dir.as_posix())
186
 
187
- # trained_models_dir = project_path / "trained_models" / "Qwen3-8B-sft-fp16"
188
  # trained_models_dir.mkdir(parents=True, exist_ok=True)
189
  # model.save_pretrained_merged(trained_models_dir.as_posix(), tokenizer, save_method="merged_16bit",)
190
- # trained_models_dir = project_path / "trained_models" / "Qwen3-8B-sft-int4"
191
  # trained_models_dir.mkdir(parents=True, exist_ok=True)
192
  # model.save_pretrained_merged(trained_models_dir.as_posix(), tokenizer, save_method="merged_4bit",)
193
  return
 
5
  from pathlib import Path
6
  import platform
7
 
 
8
  os.environ["UNSLOTH_USE_MODELSCOPE"] = "1"
9
 
10
  if platform.system() in ("Windows", "Darwin"):
11
+ from project_settings import project_path, temp_directory
12
  else:
13
  project_path = os.path.abspath("../../../")
14
  project_path = Path(project_path)
15
+ temp_directory = Path("/root/autodl-tmp/OpenMiniMind/temp")
16
 
17
  from unsloth import FastLanguageModel
18
  from trl import SFTTrainer, SFTConfig
 
36
  parser.add_argument("--dataset_split", default=None, type=str),
37
  parser.add_argument(
38
  "--dataset_cache_dir",
39
+ default=(temp_directory / "hub_datasets").as_posix(),
 
40
  type=str
41
  ),
42
  parser.add_argument(
43
  "--model_cache_dir",
44
+ default=(temp_directory / "hub_models").as_posix(),
 
45
  type=str
46
  ),
47
  parser.add_argument("--dataset_streaming", default=None, type=str),
 
177
  print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")
178
 
179
  # 只保存lora适配器参数
180
+ trained_models_dir = temp_directory / "trained_models/Qwen3-8B-sft-lora-adapter-unsloth"
181
  trained_models_dir.mkdir(parents=True, exist_ok=True)
182
  model.save_pretrained(trained_models_dir.as_posix())
183
  tokenizer.save_pretrained(trained_models_dir.as_posix())
184
 
185
+ # trained_models_dir = temp_directory / "trained_models/Qwen3-8B-sft-fp16"
186
  # trained_models_dir.mkdir(parents=True, exist_ok=True)
187
  # model.save_pretrained_merged(trained_models_dir.as_posix(), tokenizer, save_method="merged_16bit",)
188
+ # trained_models_dir = temp_directory / "trained_models/Qwen3-8B-sft-int4"
189
  # trained_models_dir.mkdir(parents=True, exist_ok=True)
190
  # model.save_pretrained_merged(trained_models_dir.as_posix(), tokenizer, save_method="merged_4bit",)
191
  return
examples/tutorials/mix_lora_unsloth/step_3_inter_model.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ import os
5
+ from pathlib import Path
6
+ import platform
7
+
8
+ os.environ["UNSLOTH_USE_MODELSCOPE"] = "1"
9
+
10
+ if platform.system() in ("Windows", "Darwin"):
11
+ from project_settings import project_path, temp_directory
12
+ else:
13
+ project_path = os.path.abspath("../../../")
14
+ project_path = Path(project_path)
15
+ temp_directory = Path("/root/autodl-tmp/OpenMiniMind/temp")
16
+
17
+ from unsloth import FastLanguageModel
18
+ from transformers import TextStreamer
19
+
20
+
21
+ def get_args():
22
+ parser = argparse.ArgumentParser()
23
+ parser.add_argument(
24
+ "--model_name",
25
+ default="unsloth/Qwen3-8B-unsloth-bnb-4bit",
26
+ type=str
27
+ ),
28
+ parser.add_argument(
29
+ "--model_cache_dir",
30
+ default=(temp_directory / "hub_models").as_posix(),
31
+ type=str
32
+ ),
33
+ parser.add_argument(
34
+ "--lora_adapter_path",
35
+ default=(temp_directory / "trained_models/Qwen3-8B-sft-lora-adapter-unsloth").as_posix(),
36
+ type=str
37
+ ),
38
+ parser.add_argument(
39
+ "--max_new_tokens",
40
+ default=1024, # 8192, 128
41
+ type=int, help="最大生成长度(注意:并非模型实际长文本能力)"
42
+ ),
43
+ parser.add_argument("--top_p", default=0.85, type=float, help="nucleus采样阈值(0-1)"),
44
+ parser.add_argument("--temperature", default=0.85, type=float, help="生成温度,控制随机性(0-1,越大越随机)"),
45
+
46
+ parser.add_argument(
47
+ "--num_workers",
48
+ default=None if platform.system() == "Windows" else os.cpu_count() // 2,
49
+ type=str
50
+ ),
51
+ args = parser.parse_args()
52
+ return args
53
+
54
+
55
+ def main():
56
+ args = get_args()
57
+
58
+ os.environ["MODELSCOPE_CACHE"] = args.model_cache_dir
59
+
60
+ model, tokenizer = FastLanguageModel.from_pretrained(
61
+ model_name=args.model_name,
62
+ max_seq_length=2048, # 支持32K+长上下文
63
+ device_map="auto",
64
+ dtype=None, # 自动选择最优精度
65
+ load_in_4bit=True, # 4bit量化节省70%显存
66
+ cache_dir=args.model_cache_dir,
67
+ )
68
+
69
+ # 2、注入lora适配器
70
+ model.load_adapter(args.lora_adapter_path)
71
+
72
+ # 启用unsloth推理加速
73
+ FastLanguageModel.for_inference(model)
74
+ model.eval()
75
+
76
+ messages = [
77
+ {
78
+ "role": "user",
79
+ "content": "关键词识别:\n梯度功能材料是基于一种全新的材料设计概念而开发的新型功能材料.陶瓷-金属FGM的主要结构特点是各梯度层由不同体积浓度的陶瓷和金属组成,材料在升温和降温过程中宏观梯度层间产生热应力,每一梯度层中细观增强相和基体的热物性失配将产生单层热应力,从而导致材料整体的破坏.采用云纹干涉法,对具有四个梯度层的SiC/A1梯度功能材料分别在机载、热载及两者共同作用下进行了应变测试,分别得到了这三种情况下每梯度层同一位置的纵向应变,横向应变和剪应变值."
80
+ }
81
+ ]
82
+ format_messages = tokenizer.apply_chat_template(
83
+ messages,
84
+ tokenize=False, # 训练时部分词,true返回的是张量
85
+ add_generation_prompt=True, # 训练期间要关闭,如果是推理则设为True
86
+ )
87
+
88
+ # 4、调用tokenizer得到input
89
+ inputs = tokenizer(format_messages, return_tensors="pt").to(model.device)
90
+
91
+ # 5、调用model.generate()
92
+ generated_ids = model.generate(
93
+ **inputs,
94
+ max_new_tokens=args.max_new_tokens, do_sample=True,
95
+ streamer=TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True),
96
+ pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id,
97
+ top_p=args.top_p, temperature=args.temperature, repetition_penalty=1.0,
98
+ )
99
+
100
+ response = tokenizer.decode(generated_ids[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
101
+ print(f"response: {response}")
102
+ return
103
+
104
+
105
+ if __name__ == "__main__":
106
+ main()