Thunderbolts123 commited on
Commit
0a52b7d
·
verified ·
1 Parent(s): 887d292

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b32b5959735aa57b511b7726bfbc2ef5de45ff1ebf9d62c5f60891e02815697b
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09c982744a2e391f73af0e352e0a17f8fbe631dce3317e7eceafc8381ba0953f
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7bb1fceeac45ca2476246b3d206978cdd4cb987a38fb6a0552e774331666a57
3
- size 243807941
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:641da55da61aa12bcdb51eded80a666515608d903c7ddf5cd1bdf125653afb89
3
+ size 243808453
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:317b5a305b2a9e21e527e7f85fdb3c6126a0ca02234bcb93021996746c86138a
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b02df20a55610bece1d11ccda94ef741d9490de46861191025bd6f932e04e77c
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8bf6871cccebbd8019e51a8751deebfdc1a27237b371091ed859a0e2e1ce5c9
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b89d6b7337c8b71ddbff9a2bc80a7133f2055b441080e4bb499fae131f0d3301
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.16,
6
  "eval_steps": 500,
7
- "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -148,6 +148,146 @@
148
  "learning_rate": 0.00018841521819342236,
149
  "loss": 0.8466087341308594,
150
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  }
152
  ],
153
  "logging_steps": 10,
@@ -167,7 +307,7 @@
167
  "attributes": {}
168
  }
169
  },
170
- "total_flos": 2.8420601253617664e+16,
171
  "train_batch_size": 1,
172
  "trial_name": null,
173
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.32,
6
  "eval_steps": 500,
7
+ "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
148
  "learning_rate": 0.00018841521819342236,
149
  "loss": 0.8466087341308594,
150
  "step": 200
151
+ },
152
+ {
153
+ "epoch": 0.168,
154
+ "grad_norm": 0.33928611874580383,
155
+ "learning_rate": 0.00018682216046570475,
156
+ "loss": 0.8981865882873535,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 0.176,
161
+ "grad_norm": 0.3191171884536743,
162
+ "learning_rate": 0.00018513416398022802,
163
+ "loss": 0.8822257995605469,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 0.184,
168
+ "grad_norm": 0.3258090615272522,
169
+ "learning_rate": 0.0001833530745363661,
170
+ "loss": 0.8913363456726074,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 0.192,
175
+ "grad_norm": 0.3579511046409607,
176
+ "learning_rate": 0.00018148083972927616,
177
+ "loss": 0.9036433219909668,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 0.2,
182
+ "grad_norm": 0.322074830532074,
183
+ "learning_rate": 0.00017951950682023191,
184
+ "loss": 0.8622542381286621,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 0.208,
189
+ "grad_norm": 0.3411216735839844,
190
+ "learning_rate": 0.00017747122049797335,
191
+ "loss": 0.9023735046386718,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 0.216,
196
+ "grad_norm": 0.35992440581321716,
197
+ "learning_rate": 0.00017533822053352128,
198
+ "loss": 0.9113937377929687,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 0.224,
203
+ "grad_norm": 0.36170879006385803,
204
+ "learning_rate": 0.00017312283933102038,
205
+ "loss": 0.8803692817687988,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 0.232,
210
+ "grad_norm": 0.3963969349861145,
211
+ "learning_rate": 0.00017082749937728973,
212
+ "loss": 0.901433277130127,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 0.24,
217
+ "grad_norm": 0.3751416802406311,
218
+ "learning_rate": 0.00016845471059286887,
219
+ "loss": 0.859034538269043,
220
+ "step": 300
221
+ },
222
+ {
223
+ "epoch": 0.248,
224
+ "grad_norm": 0.4061877727508545,
225
+ "learning_rate": 0.00016600706758745668,
226
+ "loss": 0.8167524337768555,
227
+ "step": 310
228
+ },
229
+ {
230
+ "epoch": 0.256,
231
+ "grad_norm": 0.30562642216682434,
232
+ "learning_rate": 0.00016348724682274353,
233
+ "loss": 0.8657124519348145,
234
+ "step": 320
235
+ },
236
+ {
237
+ "epoch": 0.264,
238
+ "grad_norm": 0.3281916677951813,
239
+ "learning_rate": 0.00016089800368574014,
240
+ "loss": 0.8574649810791015,
241
+ "step": 330
242
+ },
243
+ {
244
+ "epoch": 0.272,
245
+ "grad_norm": 0.3317168951034546,
246
+ "learning_rate": 0.00015824216947580183,
247
+ "loss": 0.8774423599243164,
248
+ "step": 340
249
+ },
250
+ {
251
+ "epoch": 0.28,
252
+ "grad_norm": 0.3389717638492584,
253
+ "learning_rate": 0.00015552264830864468,
254
+ "loss": 0.9006638526916504,
255
+ "step": 350
256
+ },
257
+ {
258
+ "epoch": 0.288,
259
+ "grad_norm": 0.4187377691268921,
260
+ "learning_rate": 0.00015274241394073733,
261
+ "loss": 0.865053367614746,
262
+ "step": 360
263
+ },
264
+ {
265
+ "epoch": 0.296,
266
+ "grad_norm": 0.33337095379829407,
267
+ "learning_rate": 0.00014990450651754207,
268
+ "loss": 0.8772675514221191,
269
+ "step": 370
270
+ },
271
+ {
272
+ "epoch": 0.304,
273
+ "grad_norm": 0.3234522044658661,
274
+ "learning_rate": 0.0001470120292491605,
275
+ "loss": 0.8527763366699219,
276
+ "step": 380
277
+ },
278
+ {
279
+ "epoch": 0.312,
280
+ "grad_norm": 0.39457225799560547,
281
+ "learning_rate": 0.00014406814501701857,
282
+ "loss": 0.8379721641540527,
283
+ "step": 390
284
+ },
285
+ {
286
+ "epoch": 0.32,
287
+ "grad_norm": 0.317992240190506,
288
+ "learning_rate": 0.00014107607291530256,
289
+ "loss": 0.9060126304626465,
290
+ "step": 400
291
  }
292
  ],
293
  "logging_steps": 10,
 
307
  "attributes": {}
308
  }
309
  },
310
+ "total_flos": 5.682222032356147e+16,
311
  "train_batch_size": 1,
312
  "trial_name": null,
313
  "trial_params": null