miyuki2026 commited on
Commit
ee55dd5
·
1 Parent(s): 4ea4da5
examples/tutorials/by_deepspeed/step_2_train_model.py CHANGED
@@ -40,6 +40,11 @@ def get_args():
40
  default=(project_path / "hub_datasets").as_posix(),
41
  type=str
42
  ),
 
 
 
 
 
43
  parser.add_argument("--dataset_streaming", default=None, type=str),
44
  parser.add_argument("--valid_dataset_size", default=100, type=str),
45
  parser.add_argument("--shuffle_buffer_size", default=5000, type=str),
@@ -60,11 +65,13 @@ def main():
60
  pretrained_model_name_or_path=args.model_name,
61
  quantization_config=None,
62
  # device_map="auto",
63
- trust_remote_code=True
 
64
  )
65
  tokenizer = AutoTokenizer.from_pretrained(
66
  pretrained_model_name_or_path=args.model_name,
67
- trust_remote_code=True
 
68
  )
69
  print(model)
70
 
 
40
  default=(project_path / "hub_datasets").as_posix(),
41
  type=str
42
  ),
43
+ parser.add_argument(
44
+ "--model_cache_dir",
45
+ default=(project_path / "hub_models").as_posix(),
46
+ type=str
47
+ ),
48
  parser.add_argument("--dataset_streaming", default=None, type=str),
49
  parser.add_argument("--valid_dataset_size", default=100, type=str),
50
  parser.add_argument("--shuffle_buffer_size", default=5000, type=str),
 
65
  pretrained_model_name_or_path=args.model_name,
66
  quantization_config=None,
67
  # device_map="auto",
68
+ trust_remote_code=True,
69
+ cache_dir=args.model_cache_dir,
70
  )
71
  tokenizer = AutoTokenizer.from_pretrained(
72
  pretrained_model_name_or_path=args.model_name,
73
+ trust_remote_code=True,
74
+ cache_dir=args.model_cache_dir,
75
  )
76
  print(model)
77
 
examples/tutorials/lora_transformers/step_2_train_model.py CHANGED
@@ -66,11 +66,13 @@ def main():
66
  pretrained_model_name_or_path=args.model_name,
67
  quantization_config=bnb_config,
68
  device_map="auto",
69
- trust_remote_code=True
 
70
  )
71
  tokenizer = AutoTokenizer.from_pretrained(
72
  pretrained_model_name_or_path=args.model_name,
73
- trust_remote_code=True
 
74
  )
75
  peft_config = LoraConfig(
76
  r=32, # LoRA秩
 
66
  pretrained_model_name_or_path=args.model_name,
67
  quantization_config=bnb_config,
68
  device_map="auto",
69
+ trust_remote_code=True,
70
+ cache_dir=args.model_cache_dir,
71
  )
72
  tokenizer = AutoTokenizer.from_pretrained(
73
  pretrained_model_name_or_path=args.model_name,
74
+ trust_remote_code=True,
75
+ cache_dir=args.model_cache_dir,
76
  )
77
  peft_config = LoraConfig(
78
  r=32, # LoRA秩
examples/tutorials/lora_unsloth/step_2_train_model.py CHANGED
@@ -62,7 +62,8 @@ def main():
62
  dtype=None,
63
  load_in_4bit=True,
64
  load_in_8bit=False,
65
- full_finetuning=False
 
66
  )
67
 
68
  model = FastLanguageModel.get_peft_model(
 
62
  dtype=None,
63
  load_in_4bit=True,
64
  load_in_8bit=False,
65
+ full_finetuning=False,
66
+ cache_dir=args.model_cache_dir,
67
  )
68
 
69
  model = FastLanguageModel.get_peft_model(
examples/tutorials/lora_unsloth/step_3_inter_model.py CHANGED
@@ -55,6 +55,7 @@ def main():
55
  device_map="auto",
56
  dtype=None, # 自动选择最优精度
57
  load_in_4bit=True, # 4bit量化节省70%显存
 
58
  )
59
 
60
  # 2、注入lora适配器
 
55
  device_map="auto",
56
  dtype=None, # 自动选择最优精度
57
  load_in_4bit=True, # 4bit量化节省70%显存
58
+ cache_dir=args.model_cache_dir,
59
  )
60
 
61
  # 2、注入lora适配器