kavanmevada commited on
Commit
f9c4b48
·
verified ·
1 Parent(s): c1dfa0b

Training in progress, step 70, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8c439be4803744433b47ca728c7a63d53901013c38bf5d7cf39d918f132b1ee
3
  size 3299862016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbb7a2a016a29e85a73a1b83fe04b79c45ab830ca66b55dcd4e6288bc339313c
3
  size 3299862016
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8a46f192c7007ad6ce5da6988c91bee9948ab2738cddfc8019e1fb41aa163d0
3
  size 3300016907
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5070700dbd67e0d35545a8dd8a734e90f49b9574f8e0397bae234b11da431263
3
  size 3300016907
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91ca5a4bc45dfb99eea6c6cd6eb5d8a912a32e73758c41c0b0f334f0ec0c5d35
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a135c944323c853219eae2b546147a5feb5d615450aab3e93244e542a70e149
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0002667265319549499,
6
  "eval_steps": 500,
7
- "global_step": 60,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -428,6 +428,76 @@
428
  "learning_rate": 1.311417108436413e-08,
429
  "loss": 11.2566,
430
  "step": 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
  }
432
  ],
433
  "logging_steps": 1,
@@ -447,7 +517,7 @@
447
  "attributes": {}
448
  }
449
  },
450
- "total_flos": 4674673575198720.0,
451
  "train_batch_size": 1,
452
  "trial_name": null,
453
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.00031118095394744153,
6
  "eval_steps": 500,
7
+ "global_step": 70,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
428
  "learning_rate": 1.311417108436413e-08,
429
  "loss": 11.2566,
430
  "step": 60
431
+ },
432
+ {
433
+ "epoch": 0.00027117197415419905,
434
+ "grad_norm": 13.6875,
435
+ "learning_rate": 1.3336445170539793e-08,
436
+ "loss": 11.2566,
437
+ "step": 61
438
+ },
439
+ {
440
+ "epoch": 0.0002756174163534482,
441
+ "grad_norm": 15.0625,
442
+ "learning_rate": 1.3558719256715456e-08,
443
+ "loss": 11.2535,
444
+ "step": 62
445
+ },
446
+ {
447
+ "epoch": 0.0002800628585526974,
448
+ "grad_norm": 18.625,
449
+ "learning_rate": 1.3780993342891119e-08,
450
+ "loss": 11.2911,
451
+ "step": 63
452
+ },
453
+ {
454
+ "epoch": 0.00028450830075194656,
455
+ "grad_norm": 18.25,
456
+ "learning_rate": 1.4003267429066783e-08,
457
+ "loss": 11.2483,
458
+ "step": 64
459
+ },
460
+ {
461
+ "epoch": 0.00028895374295119573,
462
+ "grad_norm": 18.125,
463
+ "learning_rate": 1.4225541515242446e-08,
464
+ "loss": 11.2932,
465
+ "step": 65
466
+ },
467
+ {
468
+ "epoch": 0.0002933991851504449,
469
+ "grad_norm": 19.875,
470
+ "learning_rate": 1.4447815601418109e-08,
471
+ "loss": 11.228,
472
+ "step": 66
473
+ },
474
+ {
475
+ "epoch": 0.000297844627349694,
476
+ "grad_norm": 14.9375,
477
+ "learning_rate": 1.4670089687593772e-08,
478
+ "loss": 11.2736,
479
+ "step": 67
480
+ },
481
+ {
482
+ "epoch": 0.0003022900695489432,
483
+ "grad_norm": 11.8125,
484
+ "learning_rate": 1.4892363773769438e-08,
485
+ "loss": 11.2563,
486
+ "step": 68
487
+ },
488
+ {
489
+ "epoch": 0.00030673551174819236,
490
+ "grad_norm": 24.875,
491
+ "learning_rate": 1.51146378599451e-08,
492
+ "loss": 11.2704,
493
+ "step": 69
494
+ },
495
+ {
496
+ "epoch": 0.00031118095394744153,
497
+ "grad_norm": 15.25,
498
+ "learning_rate": 1.5336911946120762e-08,
499
+ "loss": 11.2611,
500
+ "step": 70
501
  }
502
  ],
503
  "logging_steps": 1,
 
517
  "attributes": {}
518
  }
519
  },
520
+ "total_flos": 5453785837731840.0,
521
  "train_batch_size": 1,
522
  "trial_name": null,
523
  "trial_params": null