Spaces:

miyuki2026
/

OpenMiniMind

Sleeping

miyuki2026 commited on 29 days ago

Commit

6c8daa9

1 Parent(s): 04a1180

update

Files changed (2) hide show

examples/tutorials/by_deepspeed/requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
 datasets
 unsloth
 modelscope

 datasets
 unsloth
 modelscope
+transformers==4.57.1

examples/tutorials/by_deepspeed/step_2_train_model.py CHANGED Viewed

@@ -14,8 +14,8 @@ else:
     project_path = Path(project_path)
 from peft import LoraConfig
-from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-# from modelscope import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from trl import SFTTrainer, SFTConfig
 from datasets import load_dataset
 import torch
@@ -63,11 +63,11 @@ def get_args():
 def main():
     args = get_args()
-    # os.environ["MODELSCOPE_CACHE"] = args.model_cache_dir
     model = AutoModelForCausalLM.from_pretrained(
-        pretrained_model_name_or_path="/root/autodl-tmp/OpenMiniMind/hub_models/models/Qwen/Qwen3-8B",
-        # pretrained_model_name_or_path=args.model_name,
         quantization_config=None,
         # device_map="auto",
         trust_remote_code=True,
@@ -75,8 +75,8 @@ def main():
     )
     print(model)
     tokenizer = AutoTokenizer.from_pretrained(
-        pretrained_model_name_or_path="/root/autodl-tmp/OpenMiniMind/hub_models/models/Qwen/Qwen3-8B",
-        # pretrained_model_name_or_path=args.model_name,
         trust_remote_code=True,
         # cache_dir=args.model_cache_dir,
     )

     project_path = Path(project_path)
 from peft import LoraConfig
+# from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+from modelscope import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from trl import SFTTrainer, SFTConfig
 from datasets import load_dataset
 import torch
 def main():
     args = get_args()
+    os.environ["MODELSCOPE_CACHE"] = args.model_cache_dir
     model = AutoModelForCausalLM.from_pretrained(
+        # pretrained_model_name_or_path="/root/autodl-tmp/OpenMiniMind/hub_models/models/Qwen/Qwen3-8B",
+        pretrained_model_name_or_path=args.model_name,
         quantization_config=None,
         # device_map="auto",
         trust_remote_code=True,
     )
     print(model)
     tokenizer = AutoTokenizer.from_pretrained(
+        # pretrained_model_name_or_path="/root/autodl-tmp/OpenMiniMind/hub_models/models/Qwen/Qwen3-8B",
+        pretrained_model_name_or_path=args.model_name,
         trust_remote_code=True,
         # cache_dir=args.model_cache_dir,
     )