ljcamargo commited on
Commit
6068e14
·
verified ·
1 Parent(s): 4b53399

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b7bdfa9432865811566d801973f8536379093191662a7081831a5e5a10ab7ea
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8887655314c6a7fc55cd02e4ec047c0048e2272c7fd352777bb7dacb970f435
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98b8806bdda72b37431cd7a1864b5e940d2adca924c0255d88519f24424d35ce
3
  size 2479955235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4544c40f9c86f284c54a215c2c1aec6b854bdd8ff99351fe18c83fabae909708
3
  size 2479955235
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ff58b41c3672e659a0eb46d9ed11a0ca17415e7a2643a3ddfbaebb9f4e67f8f
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e35963fbe17703d43e57c264c8bf401c049828d6ea5abe6c269f936eebec007
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04a7344de2b10f0c83d4bc41d4ac3ff0903cfb5c2b6ee1654e8dd79d5d353dd0
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dacf7348dc62d0b10f0ec94c4cde6e6d272f324c59483c8d9d8548880fb6b96
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.2,
6
  "eval_steps": 500,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -358,6 +358,76 @@
358
  "learning_rate": 4.027331189710611e-05,
359
  "loss": 1.0562,
360
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  }
362
  ],
363
  "logging_steps": 10,
@@ -377,7 +447,7 @@
377
  "attributes": {}
378
  }
379
  },
380
- "total_flos": 9020891125518336.0,
381
  "train_batch_size": 2,
382
  "trial_name": null,
383
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.24,
6
  "eval_steps": 500,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
358
  "learning_rate": 4.027331189710611e-05,
359
  "loss": 1.0562,
360
  "step": 500
361
+ },
362
+ {
363
+ "epoch": 0.204,
364
+ "grad_norm": 13.945029258728027,
365
+ "learning_rate": 4.0072347266881035e-05,
366
+ "loss": 0.9637,
367
+ "step": 510
368
+ },
369
+ {
370
+ "epoch": 0.208,
371
+ "grad_norm": 16.55429458618164,
372
+ "learning_rate": 3.9871382636655953e-05,
373
+ "loss": 0.8104,
374
+ "step": 520
375
+ },
376
+ {
377
+ "epoch": 0.212,
378
+ "grad_norm": 24.434778213500977,
379
+ "learning_rate": 3.967041800643087e-05,
380
+ "loss": 1.1942,
381
+ "step": 530
382
+ },
383
+ {
384
+ "epoch": 0.216,
385
+ "grad_norm": 20.01283836364746,
386
+ "learning_rate": 3.946945337620579e-05,
387
+ "loss": 0.9209,
388
+ "step": 540
389
+ },
390
+ {
391
+ "epoch": 0.22,
392
+ "grad_norm": 18.98524284362793,
393
+ "learning_rate": 3.926848874598071e-05,
394
+ "loss": 0.7445,
395
+ "step": 550
396
+ },
397
+ {
398
+ "epoch": 0.224,
399
+ "grad_norm": 22.44414710998535,
400
+ "learning_rate": 3.906752411575563e-05,
401
+ "loss": 0.9528,
402
+ "step": 560
403
+ },
404
+ {
405
+ "epoch": 0.228,
406
+ "grad_norm": 19.79057502746582,
407
+ "learning_rate": 3.886655948553055e-05,
408
+ "loss": 0.9216,
409
+ "step": 570
410
+ },
411
+ {
412
+ "epoch": 0.232,
413
+ "grad_norm": 17.453460693359375,
414
+ "learning_rate": 3.866559485530547e-05,
415
+ "loss": 0.9834,
416
+ "step": 580
417
+ },
418
+ {
419
+ "epoch": 0.236,
420
+ "grad_norm": 29.218969345092773,
421
+ "learning_rate": 3.846463022508039e-05,
422
+ "loss": 0.9945,
423
+ "step": 590
424
+ },
425
+ {
426
+ "epoch": 0.24,
427
+ "grad_norm": 17.652963638305664,
428
+ "learning_rate": 3.826366559485531e-05,
429
+ "loss": 0.9308,
430
+ "step": 600
431
  }
432
  ],
433
  "logging_steps": 10,
 
447
  "attributes": {}
448
  }
449
  },
450
+ "total_flos": 1.081989562033152e+16,
451
  "train_batch_size": 2,
452
  "trial_name": null,
453
  "trial_params": null