madhuHuggingface commited on
Commit
fbca40b
·
verified ·
1 Parent(s): c06f408

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:342b5b54eeabad3537367e1e8ec3c7d8f5384023782c14c1a4aef737f23bde89
3
  size 60785144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d27e9476dc918efebb61651f7e9f759934ffdffe549c6fc2165cc9ced32d93a6
3
  size 60785144
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7042815b336b5beda88f3e7c0fa8756367e0e644ed3b573dc2a2619ccc065410
3
  size 31149205
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6ce41753fb994b8f796034d662e460bea6de0413caeaa2d0ec5be27d28aba58
3
  size 31149205
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fd1ec6747143d481855590b0ce95939b6941b3b5048b94e65615074cacfdb81
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35a14be938754e2a3aa3ebe18bdde4e86b890e4e0ff3c1f2a56a75942036606
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.6,
6
  "eval_steps": 500,
7
- "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -288,6 +288,76 @@
288
  "learning_rate": 9.39786722634207e-05,
289
  "loss": 0.0157,
290
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  }
292
  ],
293
  "logging_steps": 10,
@@ -307,7 +377,7 @@
307
  "attributes": {}
308
  }
309
  },
310
- "total_flos": 1277212245955584.0,
311
  "train_batch_size": 2,
312
  "trial_name": null,
313
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
  "eval_steps": 500,
7
+ "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
288
  "learning_rate": 9.39786722634207e-05,
289
  "loss": 0.0157,
290
  "step": 400
291
+ },
292
+ {
293
+ "epoch": 1.6400000000000001,
294
+ "grad_norm": 0.3347119688987732,
295
+ "learning_rate": 8.968983025525654e-05,
296
+ "loss": 0.019,
297
+ "step": 410
298
+ },
299
+ {
300
+ "epoch": 1.6800000000000002,
301
+ "grad_norm": 0.4677681028842926,
302
+ "learning_rate": 8.542008030801254e-05,
303
+ "loss": 0.0119,
304
+ "step": 420
305
+ },
306
+ {
307
+ "epoch": 1.72,
308
+ "grad_norm": 0.2853691577911377,
309
+ "learning_rate": 8.11773290156756e-05,
310
+ "loss": 0.0327,
311
+ "step": 430
312
+ },
313
+ {
314
+ "epoch": 1.76,
315
+ "grad_norm": 0.13519690930843353,
316
+ "learning_rate": 7.696943297693878e-05,
317
+ "loss": 0.0133,
318
+ "step": 440
319
+ },
320
+ {
321
+ "epoch": 1.8,
322
+ "grad_norm": 0.15065522491931915,
323
+ "learning_rate": 7.280418424658946e-05,
324
+ "loss": 0.0193,
325
+ "step": 450
326
+ },
327
+ {
328
+ "epoch": 1.8399999999999999,
329
+ "grad_norm": 0.2511173188686371,
330
+ "learning_rate": 6.868929590641735e-05,
331
+ "loss": 0.013,
332
+ "step": 460
333
+ },
334
+ {
335
+ "epoch": 1.88,
336
+ "grad_norm": 0.2314622700214386,
337
+ "learning_rate": 6.463238778236288e-05,
338
+ "loss": 0.0137,
339
+ "step": 470
340
+ },
341
+ {
342
+ "epoch": 1.92,
343
+ "grad_norm": 0.002136136870831251,
344
+ "learning_rate": 6.064097233435333e-05,
345
+ "loss": 0.0203,
346
+ "step": 480
347
+ },
348
+ {
349
+ "epoch": 1.96,
350
+ "grad_norm": 0.5252191424369812,
351
+ "learning_rate": 5.672244074495689e-05,
352
+ "loss": 0.0097,
353
+ "step": 490
354
+ },
355
+ {
356
+ "epoch": 2.0,
357
+ "grad_norm": 0.46642419695854187,
358
+ "learning_rate": 5.288404923261361e-05,
359
+ "loss": 0.0199,
360
+ "step": 500
361
  }
362
  ],
363
  "logging_steps": 10,
 
377
  "attributes": {}
378
  }
379
  },
380
+ "total_flos": 1597906326839808.0,
381
  "train_batch_size": 2,
382
  "trial_name": null,
383
  "trial_params": null