guyhadad01 commited on
Commit
c49e38c
·
verified ·
1 Parent(s): ef407f2

Training in progress, step 3400, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b79f31453e9489a3b25473dd16de181bf069ee2771bf0473ece225eab902297d
3
  size 471641972
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48393895744f0281dcc35ce037f939cbf0fcb9343f0e49a9b5a5800b72bf3aec
3
  size 471641972
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39a3235f25c4792928cfbbcc63e700e273fa3766c9b71ff7b09b54d36a41d5f5
3
  size 943405434
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5927d7bb5b738cb043c868545306c06435eb54da996000898577927f1d970803
3
  size 943405434
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fa4bd9692d07369beb5f2f11061992a323c35c7234002085db824618be90174
3
- size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81805f5d9be60e795c66f48836696944f5979ee35820fd897c90082596563348
3
+ size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:300da17205244156f64f5ed42fd5c64dd46e3af5e1f414de2c4903ba75da856f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7fa458e54f1f79beba7dd8cf425c5f35ff1c5b914a484c940ab3f4fb17abf3b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.08059641345960104,
6
  "eval_steps": 1000,
7
- "global_step": 3200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -456,6 +456,34 @@
456
  "learning_rate": 4.597143864598026e-05,
457
  "loss": 3.8954,
458
  "step": 3200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
459
  }
460
  ],
461
  "logging_steps": 50,
@@ -475,7 +503,7 @@
475
  "attributes": {}
476
  }
477
  },
478
- "total_flos": 1316360630797248.0,
479
  "train_batch_size": 32,
480
  "trial_name": null,
481
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.08563368930082611,
6
  "eval_steps": 1000,
7
+ "global_step": 3400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
456
  "learning_rate": 4.597143864598026e-05,
457
  "loss": 3.8954,
458
  "step": 3200
459
+ },
460
+ {
461
+ "epoch": 0.08185573241990732,
462
+ "grad_norm": 7.927274227142334,
463
+ "learning_rate": 4.5908472697964946e-05,
464
+ "loss": 3.9952,
465
+ "step": 3250
466
+ },
467
+ {
468
+ "epoch": 0.08311505138021358,
469
+ "grad_norm": 9.124794006347656,
470
+ "learning_rate": 4.584550674994963e-05,
471
+ "loss": 3.8936,
472
+ "step": 3300
473
+ },
474
+ {
475
+ "epoch": 0.08437437034051985,
476
+ "grad_norm": 8.415815353393555,
477
+ "learning_rate": 4.578254080193432e-05,
478
+ "loss": 3.9416,
479
+ "step": 3350
480
+ },
481
+ {
482
+ "epoch": 0.08563368930082611,
483
+ "grad_norm": 7.427456378936768,
484
+ "learning_rate": 4.5719574853919e-05,
485
+ "loss": 3.8502,
486
+ "step": 3400
487
  }
488
  ],
489
  "logging_steps": 50,
 
503
  "attributes": {}
504
  }
505
  },
506
+ "total_flos": 1399136881240896.0,
507
  "train_batch_size": 32,
508
  "trial_name": null,
509
  "trial_params": null