Nadav commited on
Commit
0b01633
·
1 Parent(s): df68b64

Training in progress, step 10000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4d1ae83eac6be8f8c2b954c370c8aeadc7267bda680236a7a24a7649af5362c
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1d7732285dbc0e8407195393ce8d0f6d70acf3e85a7b4ef3f39c8609146bc0b
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0baba8b4569c45a203ea379a2bd3967765a89b7b77984726e0ce3ce542327d3
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2b9950baa41cad72de72ff958a3f0b3caf4baaab2ba0b7beec4ae2bb74a2a62
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b67640377e5e9bee3b80f7da077f845affe0a102a4d4e09d30cfa4a924a22956
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5d6b04968c3393a8d3fcd8486073945737f454dd868c26cd51d9e51b95e0e1f
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dba64a6270ae61fb92642c233ac6acc0599e93af9e8dc843c1040d701d07852c
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eec59b704dede5fd2cb3547af4da160bc4d245b31d03184f1306b83b75cd8e19
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f84d441103b80fa19700bab6a191373aa187c4bff502da3579fb8fa36762b6ca
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68906a5e3db4424f18aed3a659657b33e5a4636dccdcb491f81ba7b3cc4c516d
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4253147329023477,
5
- "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -106,11 +106,111 @@
106
  "eval_samples_per_second": 24.61,
107
  "eval_steps_per_second": 0.788,
108
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  }
110
  ],
111
  "max_steps": 1000000,
112
  "num_train_epochs": 86,
113
- "total_flos": 2.300164844027904e+20,
114
  "trial_name": null,
115
  "trial_params": null
116
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8506294658046955,
5
+ "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
106
  "eval_samples_per_second": 24.61,
107
  "eval_steps_per_second": 0.788,
108
  "step": 5000
109
+ },
110
+ {
111
+ "epoch": 0.47,
112
+ "learning_rate": 9.999999999999999e-06,
113
+ "loss": 0.4588,
114
+ "step": 5500
115
+ },
116
+ {
117
+ "epoch": 0.51,
118
+ "learning_rate": 9.999999999999999e-06,
119
+ "loss": 0.4567,
120
+ "step": 6000
121
+ },
122
+ {
123
+ "epoch": 0.51,
124
+ "eval_loss": 0.4276145100593567,
125
+ "eval_runtime": 16.3756,
126
+ "eval_samples_per_second": 30.533,
127
+ "eval_steps_per_second": 0.977,
128
+ "step": 6000
129
+ },
130
+ {
131
+ "epoch": 0.55,
132
+ "learning_rate": 9.999999999999999e-06,
133
+ "loss": 0.4549,
134
+ "step": 6500
135
+ },
136
+ {
137
+ "epoch": 0.6,
138
+ "learning_rate": 9.999999999999999e-06,
139
+ "loss": 0.4527,
140
+ "step": 7000
141
+ },
142
+ {
143
+ "epoch": 0.6,
144
+ "eval_loss": 0.42289844155311584,
145
+ "eval_runtime": 15.9391,
146
+ "eval_samples_per_second": 31.369,
147
+ "eval_steps_per_second": 1.004,
148
+ "step": 7000
149
+ },
150
+ {
151
+ "epoch": 0.64,
152
+ "learning_rate": 9.999999999999999e-06,
153
+ "loss": 0.4523,
154
+ "step": 7500
155
+ },
156
+ {
157
+ "epoch": 0.68,
158
+ "learning_rate": 9.999999999999999e-06,
159
+ "loss": 0.4504,
160
+ "step": 8000
161
+ },
162
+ {
163
+ "epoch": 0.68,
164
+ "eval_loss": 0.4213045537471771,
165
+ "eval_runtime": 15.5457,
166
+ "eval_samples_per_second": 32.163,
167
+ "eval_steps_per_second": 1.029,
168
+ "step": 8000
169
+ },
170
+ {
171
+ "epoch": 0.72,
172
+ "learning_rate": 9.999999999999999e-06,
173
+ "loss": 0.449,
174
+ "step": 8500
175
+ },
176
+ {
177
+ "epoch": 0.77,
178
+ "learning_rate": 9.999999999999999e-06,
179
+ "loss": 0.4473,
180
+ "step": 9000
181
+ },
182
+ {
183
+ "epoch": 0.77,
184
+ "eval_loss": 0.41637736558914185,
185
+ "eval_runtime": 15.7487,
186
+ "eval_samples_per_second": 31.749,
187
+ "eval_steps_per_second": 1.016,
188
+ "step": 9000
189
+ },
190
+ {
191
+ "epoch": 0.81,
192
+ "learning_rate": 9.999999999999999e-06,
193
+ "loss": 0.4468,
194
+ "step": 9500
195
+ },
196
+ {
197
+ "epoch": 0.85,
198
+ "learning_rate": 9.999999999999999e-06,
199
+ "loss": 0.4447,
200
+ "step": 10000
201
+ },
202
+ {
203
+ "epoch": 0.85,
204
+ "eval_loss": 0.4148881733417511,
205
+ "eval_runtime": 15.3622,
206
+ "eval_samples_per_second": 32.547,
207
+ "eval_steps_per_second": 1.042,
208
+ "step": 10000
209
  }
210
  ],
211
  "max_steps": 1000000,
212
  "num_train_epochs": 86,
213
+ "total_flos": 4.600329688055808e+20,
214
  "trial_name": null,
215
  "trial_params": null
216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0baba8b4569c45a203ea379a2bd3967765a89b7b77984726e0ce3ce542327d3
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2b9950baa41cad72de72ff958a3f0b3caf4baaab2ba0b7beec4ae2bb74a2a62
3
  size 449471589