miyuki2026 commited on
Commit
6c8daa9
·
1 Parent(s): 04a1180
examples/tutorials/by_deepspeed/requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  datasets
2
  unsloth
3
  modelscope
4
-
 
1
  datasets
2
  unsloth
3
  modelscope
4
+ transformers==4.57.1
examples/tutorials/by_deepspeed/step_2_train_model.py CHANGED
@@ -14,8 +14,8 @@ else:
14
  project_path = Path(project_path)
15
 
16
  from peft import LoraConfig
17
- from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
18
- # from modelscope import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
19
  from trl import SFTTrainer, SFTConfig
20
  from datasets import load_dataset
21
  import torch
@@ -63,11 +63,11 @@ def get_args():
63
  def main():
64
  args = get_args()
65
 
66
- # os.environ["MODELSCOPE_CACHE"] = args.model_cache_dir
67
 
68
  model = AutoModelForCausalLM.from_pretrained(
69
- pretrained_model_name_or_path="/root/autodl-tmp/OpenMiniMind/hub_models/models/Qwen/Qwen3-8B",
70
- # pretrained_model_name_or_path=args.model_name,
71
  quantization_config=None,
72
  # device_map="auto",
73
  trust_remote_code=True,
@@ -75,8 +75,8 @@ def main():
75
  )
76
  print(model)
77
  tokenizer = AutoTokenizer.from_pretrained(
78
- pretrained_model_name_or_path="/root/autodl-tmp/OpenMiniMind/hub_models/models/Qwen/Qwen3-8B",
79
- # pretrained_model_name_or_path=args.model_name,
80
  trust_remote_code=True,
81
  # cache_dir=args.model_cache_dir,
82
  )
 
14
  project_path = Path(project_path)
15
 
16
  from peft import LoraConfig
17
+ # from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
18
+ from modelscope import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
19
  from trl import SFTTrainer, SFTConfig
20
  from datasets import load_dataset
21
  import torch
 
63
  def main():
64
  args = get_args()
65
 
66
+ os.environ["MODELSCOPE_CACHE"] = args.model_cache_dir
67
 
68
  model = AutoModelForCausalLM.from_pretrained(
69
+ # pretrained_model_name_or_path="/root/autodl-tmp/OpenMiniMind/hub_models/models/Qwen/Qwen3-8B",
70
+ pretrained_model_name_or_path=args.model_name,
71
  quantization_config=None,
72
  # device_map="auto",
73
  trust_remote_code=True,
 
75
  )
76
  print(model)
77
  tokenizer = AutoTokenizer.from_pretrained(
78
+ # pretrained_model_name_or_path="/root/autodl-tmp/OpenMiniMind/hub_models/models/Qwen/Qwen3-8B",
79
+ pretrained_model_name_or_path=args.model_name,
80
  trust_remote_code=True,
81
  # cache_dir=args.model_cache_dir,
82
  )