Sabbir772 commited on
Commit
cee8406
·
verified ·
1 Parent(s): dba0bc4

Training in progress, step 10180, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d87304ba9339f77e6175e6965b703a52428af51cedafc31a0bbfddf79d7965a6
3
  size 990185320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f54d77e7daba182457f5b934b683bb0c366abc8b22b61e6bcd269621abc92e9f
3
  size 990185320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12036c4d1c532ea86bb8a9c360dfbe0b8fe32c9677625d889c487420ab810ff9
3
  size 1980545291
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:135ed32c18796919d025ee0d031964f792393b973ea4bbaf40df292142688daf
3
  size 1980545291
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97f497ef69b8dbe5c6bba152f4cd98b501ac4e5a09248cff1e802c21fa6c2d4f
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8552f27df7abe09a972ce07245d6ac928db4275e959c227e9dc77f79689c125d
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a94f04e86ab58289e37d74269e24f1195144c7ee097f576e9404678cea6926f7
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94d07f0b1a695f119065fa87f9ec7a10c06b521dc7b9d758333f96e37c1da2d9
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 4.911591355599214,
6
  "eval_steps": 400,
7
- "global_step": 10000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -958,6 +958,13 @@
958
  "eval_samples_per_second": 9.254,
959
  "eval_steps_per_second": 1.157,
960
  "step": 10000
 
 
 
 
 
 
 
961
  }
962
  ],
963
  "logging_steps": 100,
@@ -972,12 +979,12 @@
972
  "should_evaluate": false,
973
  "should_log": false,
974
  "should_save": true,
975
- "should_training_stop": false
976
  },
977
  "attributes": {}
978
  }
979
  },
980
- "total_flos": 2.738510164603699e+16,
981
  "train_batch_size": 8,
982
  "trial_name": null,
983
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 5.0,
6
  "eval_steps": 400,
7
+ "global_step": 10180,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
958
  "eval_samples_per_second": 9.254,
959
  "eval_steps_per_second": 1.157,
960
  "step": 10000
961
+ },
962
+ {
963
+ "epoch": 4.960707269155206,
964
+ "grad_norm": 4.8344407081604,
965
+ "learning_rate": 2.784872298624754e-07,
966
+ "loss": 1.0911,
967
+ "step": 10100
968
  }
969
  ],
970
  "logging_steps": 100,
 
979
  "should_evaluate": false,
980
  "should_log": false,
981
  "should_save": true,
982
+ "should_training_stop": true
983
  },
984
  "attributes": {}
985
  }
986
  },
987
+ "total_flos": 2.787739807776768e+16,
988
  "train_batch_size": 8,
989
  "trial_name": null,
990
  "trial_params": null