SystemAdmin123 commited on
Commit
ae1ed32
·
verified ·
1 Parent(s): cd2d728

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac3c5cb2e77aa09de86102669fab0a58c5fbb2fac38941de01eaa06680ecd6b5
3
  size 2066752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40c2ecd92e57b2e66ca22969473f53cb6e090828806462e132c671e6e2d3f419
3
  size 2066752
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7de1afa08e042afe2b7724794e5abc6b3c2844ee734ce29154ef19d4db24a536
3
  size 2162798
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5b74b9dc7e89f78c99f07af87ab50ab5df7a6480691241092ee5f8486f93d4d
3
  size 2162798
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4789394f5e9bf3695e87ba89acf1261237e8f92d8ca463a31a9d39185674969
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bddb8fcdae8dea7e10388731f63246ec0494e51d6d509273113166b14d83f0e3
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95830d8f799e5857092ffa30bec2dad56d615ec4ba20eae91e7157878213510f
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24238ca2ad5e52c7a2d983c253be8a0ab25f258c2785c7ba56c4bfa08fb89d84
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5258a10d326b43bb322f9f7412c08a5d462c4dd53d2018b15d95caea3824d34f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d429d3d3635edcf38935f000b0d1f4e5db465042c289fb4623c33dce588231ab
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.153846153846154,
5
  "eval_steps": 40,
6
- "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -87,6 +87,20 @@
87
  "eval_samples_per_second": 285.294,
88
  "eval_steps_per_second": 4.562,
89
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  }
91
  ],
92
  "logging_steps": 10,
@@ -106,7 +120,7 @@
106
  "attributes": {}
107
  }
108
  },
109
- "total_flos": 32732683960320.0,
110
  "train_batch_size": 32,
111
  "trial_name": null,
112
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.6923076923076925,
5
  "eval_steps": 40,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
87
  "eval_samples_per_second": 285.294,
88
  "eval_steps_per_second": 4.562,
89
  "step": 80
90
+ },
91
+ {
92
+ "epoch": 6.923076923076923,
93
+ "grad_norm": 0.33203125,
94
+ "learning_rate": 0.00019458172417006347,
95
+ "loss": 10.0565,
96
+ "step": 90
97
+ },
98
+ {
99
+ "epoch": 7.6923076923076925,
100
+ "grad_norm": 0.341796875,
101
+ "learning_rate": 0.00019264940672148018,
102
+ "loss": 9.998,
103
+ "step": 100
104
  }
105
  ],
106
  "logging_steps": 10,
 
120
  "attributes": {}
121
  }
122
  },
123
+ "total_flos": 40915854950400.0,
124
  "train_batch_size": 32,
125
  "trial_name": null,
126
  "trial_params": null