Erland commited on
Commit
7361125
·
verified ·
1 Parent(s): 6090796

Training in progress, step 30, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
- "q_proj",
25
  "up_proj",
26
- "v_proj",
27
  "k_proj",
28
- "gate_proj",
29
- "down_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "gate_proj",
 
24
  "up_proj",
 
25
  "k_proj",
26
+ "down_proj",
27
+ "q_proj",
28
+ "v_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:388de8b1fd2d9b827937b3a78d2f0c4f9e28ffa087987c50cf8c48941c605e27
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56a595ab8d0501db585f02e5044fa57e61fcac8cca3667878ad850a07e524b4e
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37278b058da42c070ece918b5474e74f3a0c469f409efa5127d07d2cc5355219
3
- size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f21feea2e2f7292f02c7822951aa623f6cf71bda055968e00277d00429c41ed
3
+ size 86889042
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcc550cfbb47ad4cbc37125ea640e4b6df0c324dad2c713e9b18c9c4eb2ecb33
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2f0c5f75e18f8cb763d8ea5434e79bf35d98af96676e76ac16c35aac4009f48
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0016,
5
  "eval_steps": 500,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -147,6 +147,76 @@
147
  "learning_rate": 0.00018461538461538463,
148
  "loss": 0.4766,
149
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  }
151
  ],
152
  "logging_steps": 1,
@@ -166,7 +236,7 @@
166
  "attributes": {}
167
  }
168
  },
169
- "total_flos": 5377867744100352.0,
170
  "train_batch_size": 2,
171
  "trial_name": null,
172
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0024,
5
  "eval_steps": 500,
6
+ "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
147
  "learning_rate": 0.00018461538461538463,
148
  "loss": 0.4766,
149
  "step": 20
150
+ },
151
+ {
152
+ "epoch": 0.00168,
153
+ "grad_norm": 0.4504310190677643,
154
+ "learning_rate": 0.00018358974358974358,
155
+ "loss": 0.5895,
156
+ "step": 21
157
+ },
158
+ {
159
+ "epoch": 0.00176,
160
+ "grad_norm": 0.4786751866340637,
161
+ "learning_rate": 0.00018256410256410258,
162
+ "loss": 0.4999,
163
+ "step": 22
164
+ },
165
+ {
166
+ "epoch": 0.00184,
167
+ "grad_norm": 0.5530946850776672,
168
+ "learning_rate": 0.00018153846153846155,
169
+ "loss": 0.8919,
170
+ "step": 23
171
+ },
172
+ {
173
+ "epoch": 0.00192,
174
+ "grad_norm": 0.6729783415794373,
175
+ "learning_rate": 0.00018051282051282052,
176
+ "loss": 0.4719,
177
+ "step": 24
178
+ },
179
+ {
180
+ "epoch": 0.002,
181
+ "grad_norm": 0.5149514675140381,
182
+ "learning_rate": 0.0001794871794871795,
183
+ "loss": 0.3778,
184
+ "step": 25
185
+ },
186
+ {
187
+ "epoch": 0.00208,
188
+ "grad_norm": 0.6156336069107056,
189
+ "learning_rate": 0.00017846153846153847,
190
+ "loss": 0.4884,
191
+ "step": 26
192
+ },
193
+ {
194
+ "epoch": 0.00216,
195
+ "grad_norm": 0.6409617066383362,
196
+ "learning_rate": 0.00017743589743589744,
197
+ "loss": 0.4507,
198
+ "step": 27
199
+ },
200
+ {
201
+ "epoch": 0.00224,
202
+ "grad_norm": 0.6406662464141846,
203
+ "learning_rate": 0.00017641025641025642,
204
+ "loss": 0.4805,
205
+ "step": 28
206
+ },
207
+ {
208
+ "epoch": 0.00232,
209
+ "grad_norm": 0.6323011517524719,
210
+ "learning_rate": 0.0001753846153846154,
211
+ "loss": 0.7445,
212
+ "step": 29
213
+ },
214
+ {
215
+ "epoch": 0.0024,
216
+ "grad_norm": 0.5450726747512817,
217
+ "learning_rate": 0.00017435897435897436,
218
+ "loss": 0.7906,
219
+ "step": 30
220
  }
221
  ],
222
  "logging_steps": 1,
 
236
  "attributes": {}
237
  }
238
  },
239
+ "total_flos": 8053805909950464.0,
240
  "train_batch_size": 2,
241
  "trial_name": null,
242
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee3d2f856f997c674f6bb7fe372a2487f94f510b9888e9ae1ace34a8f9c4424b
3
- size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f97f8f21692b92eba2621575ec0a1f5b1dbf41c8aca4fd8165bf5c79e0de6d06
3
+ size 5624