LeroyDyer commited on
Commit
da41efe
·
verified ·
1 Parent(s): 2ac7daf

Update Model_LOADER.py

Browse files
Files changed (1) hide show
  1. Model_LOADER.py +195 -209
Model_LOADER.py CHANGED
@@ -1,210 +1,196 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline, AutoConfig, BitsAndBytesConfig,AutoConfig
2
- import time
3
- import torch
4
- torch.backends.cuda.matmul.allow_tf32 = True
5
- import random
6
- from datasets import load_dataset
7
- from transformers import TrainingArguments
8
- from trl import SFTTrainer
9
- from peft import LoraConfig
10
- # from accelerate import infer_auto_device_map, init_empty_weights, dispatch_model
11
- from torch.nn import CrossEntropyLoss
12
- torch.autograd.set_detect_anomaly(True)
13
- random_seed = 42
14
- torch.manual_seed(random_seed)
15
- random.seed(random_seed)
16
- # Set the device for each process
17
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
- # torch.cuda.set_device(device)
19
-
20
-
21
-
22
- n_ahead_talk_global = 4
23
- n_passes_global = 2
24
- n_ahead_global = 8
25
- n_examples = 0
26
-
27
- def model_init(params):
28
- original = False
29
- if params is None:
30
- params = {}
31
- else:
32
- params = params.params
33
- # save params to file
34
- n_ahead = params.get("n_ahead", n_ahead_global if not original else 1)
35
- n_ahead_talk = params.get("n_ahead_talk", n_ahead_talk_global if not original else 1)
36
- n_passes = params.get("n_passes", n_passes_global if not original else 1)
37
- gumbel_temperature = params.get("gumbel_temperature", 1)
38
- use_start_thought_token = params.get("use_start_thought_token", True)
39
- use_end_thought_token = params.get("use_end_thought_token", True)
40
- include_policy_loss = params.get("include_policy_loss", True)
41
- gumbel_detach = params.get("gumbel_detach", True)
42
- merged_talk_heads = params.get("merged_talk_heads", True)
43
- residual_think_head = params.get("residual_think_head", False)
44
- optimize_lm_head_only_at_start = params.get("optimize_lm_head_only_at_start", False)
45
-
46
- model_id = "LeroyDyer/_Spydaz_Web_AI_V2_Aligned"
47
- tokenizer_id = model_id
48
- print("Loading model")
49
-
50
- model = AutoModelForCausalLM.from_pretrained(
51
- model_id,
52
- torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
53
- max_thoughts=n_ahead + n_ahead_talk + 1,
54
- merged_talk_heads=merged_talk_heads,
55
- merged_lm_and_talk_heads=False,
56
- merged_lm_and_think_heads=True,
57
- use_concat_talk_head=True,
58
- use_shallow_think=True,
59
- use_shallow_talk=False,
60
- use_complex_think_head=False,
61
- use_complex_talk_head=True,
62
- use_weighted_talk_head=True,
63
- trust_remote_code=True,
64
- device_map="auto",
65
- )
66
- print("Loaded model")
67
-
68
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, truncation=True, padding_side="right")
69
- tokenizer.pad_token_id = tokenizer.eos_token_id
70
-
71
- special_tokens_to_add = []
72
- if model.use_start_thought_token:
73
- special_tokens_to_add.append("<|startthought|>")
74
- if model.use_end_thought_token:
75
- special_tokens_to_add.append("<|endthought|>")
76
- if special_tokens_to_add:
77
- tokenizer.add_special_tokens({"additional_special_tokens": special_tokens_to_add})
78
- model.resize_token_embeddings(len(tokenizer))
79
- model.tokenizer = tokenizer
80
- for name, module in model.named_modules():
81
- if "embed" in name:
82
- print(module, flush=True)
83
-
84
- model.gumbel_detach = gumbel_detach
85
- model.include_policy_loss = include_policy_loss
86
- model.use_end_thought_token = use_end_thought_token
87
- model.use_start_thought_token = use_start_thought_token
88
- model.n_ahead = n_ahead
89
- model.n_ahead_talk = n_ahead_talk
90
- model.n_passes = n_passes
91
- model.residual_think_head = residual_think_head
92
- model.optimize_lm_head_only_at_start = optimize_lm_head_only_at_start
93
- model.gumbel_temperature = gumbel_temperature
94
- model.original_mode = original
95
- model.config_params = params
96
- return model
97
- model,tokenizer = model_init(None)
98
- tokenizer.save_pretrained("IpretrainedModel")
99
- model.save_pretrained("IpretrainedModel")
100
-
101
-
102
- ## TRAINING :
103
-
104
- peft_config = LoraConfig(
105
- r = 128, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
106
- target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
107
- "gate_proj", "up_proj", "down_proj","lm_head", "embed_tokens"],
108
- lora_alpha = 32,
109
- lora_dropout = 0, # Supports any, but = 0 is optimized
110
- bias = "none",
111
- use_dora=True,
112
- )
113
-
114
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline, AutoConfig
115
- from datasets import load_dataset
116
- from transformers import TrainingArguments
117
- from trl import SFTTrainer
118
- from peft import LoraConfig
119
-
120
- ## DATA
121
- alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
122
-
123
- ### Instruction:
124
- {}
125
-
126
- ### Input:
127
- {}
128
-
129
- ### Response:
130
- {}"""
131
- EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
132
- def formatting_prompts_func(examples):
133
- instructions = examples["instruction"]
134
- inputs = examples["input"]
135
- outputs = examples["output"]
136
- texts = []
137
- for instruction, input, output in zip(instructions, inputs, outputs):
138
- # Must add EOS_TOKEN, otherwise your generation will go on forever!
139
- text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
140
- texts.append(text)
141
- return { "text" : texts, }
142
- pass
143
- dataset = load_dataset("gate369/Alpaca-Star", split = "train[:2000]")
144
- dataset = dataset.shuffle(seed=3704)
145
- dataset = dataset.map(formatting_prompts_func, batched = True,)
146
- ## TRAIN
147
- model
148
- model.train
149
- max_seq_length = 32000
150
- training_args = TrainingArguments(
151
- output_dir="./out",
152
- num_train_epochs=3,
153
- per_device_train_batch_size=1,
154
- gradient_checkpointing=False,
155
- gradient_accumulation_steps=8,
156
- optim="lion_32bit",
157
- logging_steps=1,
158
- save_strategy="steps",
159
- save_steps=300,
160
- max_steps=1000,
161
- bf16=True,
162
- tf32=False,
163
- learning_rate=6e-05,
164
- max_grad_norm=0.3,
165
- warmup_ratio=0.06,
166
- lr_scheduler_type="cosine",
167
- push_to_hub=False,
168
-
169
- )
170
- trainer = SFTTrainer(
171
- args=training_args,
172
- train_dataset=dataset,
173
- model=model,
174
- tokenizer=tokenizer,
175
- max_seq_length=max_seq_length,
176
- dataset_text_field="text",
177
- peft_config=peft_config,
178
- )
179
- trainer.train()
180
-
181
- ## SAVE
182
- tokenizer.save_pretrained("SFTTrainerModel")
183
- model.save_pretrained("SFTTrainerModel")
184
-
185
-
186
- import os
187
- import huggingface_hub
188
- from huggingface_hub import notebook_login
189
- from huggingface_hub import create_repo, HfApi
190
- from huggingface_hub import hf_hub_download
191
- from huggingface_hub import create_repo, HfApi
192
- from huggingface_hub import snapshot_download
193
-
194
- MODEL_NAME = "_Spydaz_Web_AI_MistralStar"
195
- Folderinput = "SFTTrainerModel"
196
- WRITE_TOKEN = ""
197
- username = "LeroyDyer"
198
- huggingface_hub.login(WRITE_TOKEN)
199
- api = HfApi(token=WRITE_TOKEN)
200
- # Create empty repo
201
- api.create_repo(
202
- repo_id = f"{username}/{MODEL_NAME}",
203
- repo_type="model",
204
- exist_ok=True,
205
- )
206
-
207
- api.upload_folder(
208
- repo_id = f"{username}/{MODEL_NAME}",
209
- folder_path = Folderinput
210
  )
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline, AutoConfig, BitsAndBytesConfig,AutoConfig
2
+ import time
3
+ import torch
4
+ torch.backends.cuda.matmul.allow_tf32 = True
5
+ import random
6
+ from datasets import load_dataset
7
+ from transformers import TrainingArguments
8
+ from trl import SFTTrainer
9
+ from peft import LoraConfig
10
+ # from accelerate import infer_auto_device_map, init_empty_weights, dispatch_model
11
+ from torch.nn import CrossEntropyLoss
12
+ torch.autograd.set_detect_anomaly(True)
13
+ random_seed = 42
14
+ torch.manual_seed(random_seed)
15
+ random.seed(random_seed)
16
+ # Set the device for each process
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ # torch.cuda.set_device(device)
19
+
20
+
21
+
22
+ n_ahead_talk_global = 4
23
+ n_passes_global = 2
24
+ n_ahead_global = 8
25
+ n_examples = 0
26
+
27
+ def model_init(params):
28
+ original = False
29
+ if params is None:
30
+ params = {}
31
+ else:
32
+ params = params.params
33
+ # save params to file
34
+ n_ahead = params.get("n_ahead", n_ahead_global if not original else 1)
35
+ n_ahead_talk = params.get("n_ahead_talk", n_ahead_talk_global if not original else 1)
36
+ n_passes = params.get("n_passes", n_passes_global if not original else 1)
37
+ gumbel_temperature = params.get("gumbel_temperature", 1)
38
+ use_start_thought_token = params.get("use_start_thought_token", True)
39
+ use_end_thought_token = params.get("use_end_thought_token", True)
40
+ include_policy_loss = params.get("include_policy_loss", True)
41
+ gumbel_detach = params.get("gumbel_detach", True)
42
+ merged_talk_heads = params.get("merged_talk_heads", True)
43
+ residual_think_head = params.get("residual_think_head", False)
44
+ optimize_lm_head_only_at_start = params.get("optimize_lm_head_only_at_start", False)
45
+
46
+ model_id = "LeroyDyer/SpydazWeb_AGI_MistralStar"
47
+ tokenizer_id = model_id
48
+ print("Loading model")
49
+
50
+ model = AutoModelForCausalLM.from_pretrained(
51
+ model_id,
52
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
53
+ max_thoughts=n_ahead + n_ahead_talk + 1,
54
+ merged_talk_heads=merged_talk_heads,
55
+ merged_lm_and_talk_heads=False,
56
+ merged_lm_and_think_heads=True,
57
+ use_concat_talk_head=True,
58
+ use_shallow_think=True,
59
+ use_shallow_talk=False,
60
+ use_complex_think_head=False,
61
+ use_complex_talk_head=True,
62
+ use_weighted_talk_head=True,
63
+ trust_remote_code=True,
64
+ device_map="auto",
65
+ )
66
+ print("Loaded model")
67
+
68
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, truncation=True, padding_side="right")
69
+ tokenizer.pad_token_id = tokenizer.eos_token_id
70
+
71
+
72
+
73
+ model.gumbel_detach = gumbel_detach
74
+ model.include_policy_loss = include_policy_loss
75
+ model.use_end_thought_token = use_end_thought_token
76
+ model.use_start_thought_token = use_start_thought_token
77
+ model.n_ahead = n_ahead
78
+ model.n_ahead_talk = n_ahead_talk
79
+ model.n_passes = n_passes
80
+ model.residual_think_head = residual_think_head
81
+ model.optimize_lm_head_only_at_start = optimize_lm_head_only_at_start
82
+ model.gumbel_temperature = gumbel_temperature
83
+ model.original_mode = original
84
+ model.config_params = params
85
+ return model
86
+
87
+ model,tokenizer = model_init(None)
88
+
89
+
90
+ ## TRAINING :
91
+
92
+ peft_config = LoraConfig(
93
+ r = 128, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
94
+ target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
95
+ "gate_proj", "up_proj", "down_proj","lm_head", "embed_tokens"],
96
+ lora_alpha = 32,
97
+ lora_dropout = 0, # Supports any, but = 0 is optimized
98
+ bias = "none",
99
+ use_dora=True,
100
+ )
101
+
102
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline, AutoConfig
103
+ from datasets import load_dataset
104
+ from transformers import TrainingArguments
105
+ from trl import SFTTrainer
106
+ from peft import LoraConfig
107
+
108
+ ## DATA
109
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
110
+
111
+ ### Instruction:
112
+ {}
113
+
114
+ ### Input:
115
+ {}
116
+
117
+ ### Response:
118
+ {}"""
119
+ EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
120
+ def formatting_prompts_func(examples):
121
+ instructions = examples["instruction"]
122
+ inputs = examples["input"]
123
+ outputs = examples["output"]
124
+ texts = []
125
+ for instruction, input, output in zip(instructions, inputs, outputs):
126
+ # Must add EOS_TOKEN, otherwise your generation will go on forever!
127
+ text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
128
+ texts.append(text)
129
+ return { "text" : texts, }
130
+ pass
131
+ dataset = load_dataset("gate369/Alpaca-Star", split = "train[:2000]")
132
+ dataset = dataset.shuffle(seed=3704)
133
+ dataset = dataset.map(formatting_prompts_func, batched = True,)
134
+ ## TRAIN
135
+
136
+ max_seq_length = 1024
137
+ training_args = TrainingArguments(
138
+ output_dir="./out",
139
+ num_train_epochs=3,
140
+ per_device_train_batch_size=1,
141
+ gradient_checkpointing=False,
142
+ gradient_accumulation_steps=8,
143
+ optim="lion_32bit",
144
+ logging_steps=1,
145
+ save_strategy="steps",
146
+ max_steps=1000,
147
+ bf16=True,
148
+ tf32=False,
149
+ learning_rate=6e-05,
150
+ max_grad_norm=0.3,
151
+ warmup_ratio=0.06,
152
+ lr_scheduler_type="cosine",
153
+ push_to_hub=False,
154
+
155
+ )
156
+ trainer = SFTTrainer(
157
+ args=training_args,
158
+ train_dataset=dataset,
159
+ model=model,
160
+ tokenizer=tokenizer,
161
+ max_seq_length=max_seq_length,
162
+ dataset_text_field="text",
163
+ peft_config=peft_config,
164
+ )
165
+ trainer.train()
166
+
167
+ ## SAVE
168
+ tokenizer.save_pretrained("SFTTrainerModel")
169
+ model.save_pretrained("SFTTrainerModel")
170
+
171
+
172
+ import os
173
+ import huggingface_hub
174
+ from huggingface_hub import notebook_login
175
+ from huggingface_hub import create_repo, HfApi
176
+ from huggingface_hub import hf_hub_download
177
+ from huggingface_hub import create_repo, HfApi
178
+ from huggingface_hub import snapshot_download
179
+
180
+ MODEL_NAME = "_Spydaz_Web_AI_MistralStar"
181
+ Folderinput = "SFTTrainerModel"
182
+ WRITE_TOKEN = ""
183
+ username = "LeroyDyer"
184
+ huggingface_hub.login(WRITE_TOKEN)
185
+ api = HfApi(token=WRITE_TOKEN)
186
+ # Create empty repo
187
+ api.create_repo(
188
+ repo_id = f"{username}/{MODEL_NAME}",
189
+ repo_type="model",
190
+ exist_ok=True,
191
+ )
192
+
193
+ api.upload_folder(
194
+ repo_id = f"{username}/{MODEL_NAME}",
195
+ folder_path = Folderinput
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  )