Media1129 commited on
Commit
1e284f1
·
1 Parent(s): 833c717
Files changed (5) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +123 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9db7b16c71061c51c95abdd9a98926418625727c317ce63822699d83286752f0
3
  size 871313389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:579babb9ce23573912a301bc1751de67cf9825ec3ea9317a2e01ffe73fe79455
3
  size 871313389
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77a3836119cefac1ae6ae48708f4c04ffeb0e154f5e7affc3391210581fb2472
3
  size 435682807
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:066ce09e712752766615c1b53e42e41f5d069d3b600761e41b63ce9cebbdf3f0
3
  size 435682807
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20f8ab5fc2c073e108f2126ff07a14c70c8351f6b3e50e7f66ea48efcf4b2af2
3
  size 14657
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22ef52bddeff60306a441bc1a3df088a0a8624db9d38fee3f8b4d84b0ab49949
3
  size 14657
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8280222b67d76838a5f5a8e175e89c9fa85060a19ae5e227aea995ff67e7b224
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:042af15c79784565ba53d294ccda64499ce1432cf6644e625e4cc137910bd1d8
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.61271676300578,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -126,11 +126,131 @@
126
  "learning_rate": 3.19364161849711e-05,
127
  "loss": 0.0033,
128
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  }
130
  ],
131
  "max_steps": 27680,
132
  "num_train_epochs": 10,
133
- "total_flos": 1550396267341344.0,
134
  "trial_name": null,
135
  "trial_params": null
136
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.22543352601156,
5
+ "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
126
  "learning_rate": 3.19364161849711e-05,
127
  "loss": 0.0033,
128
  "step": 10000
129
+ },
130
+ {
131
+ "epoch": 3.79,
132
+ "learning_rate": 3.103323699421966e-05,
133
+ "loss": 0.0009,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 3.97,
138
+ "learning_rate": 3.013005780346821e-05,
139
+ "loss": 0.0012,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 4.15,
144
+ "learning_rate": 2.9226878612716762e-05,
145
+ "loss": 0.0007,
146
+ "step": 11500
147
+ },
148
+ {
149
+ "epoch": 4.34,
150
+ "learning_rate": 2.832369942196532e-05,
151
+ "loss": 0.0019,
152
+ "step": 12000
153
+ },
154
+ {
155
+ "epoch": 4.52,
156
+ "learning_rate": 2.7420520231213876e-05,
157
+ "loss": 0.0009,
158
+ "step": 12500
159
+ },
160
+ {
161
+ "epoch": 4.7,
162
+ "learning_rate": 2.651734104046243e-05,
163
+ "loss": 0.0006,
164
+ "step": 13000
165
+ },
166
+ {
167
+ "epoch": 4.88,
168
+ "learning_rate": 2.5614161849710984e-05,
169
+ "loss": 0.0005,
170
+ "step": 13500
171
+ },
172
+ {
173
+ "epoch": 5.06,
174
+ "learning_rate": 2.471098265895954e-05,
175
+ "loss": 0.0021,
176
+ "step": 14000
177
+ },
178
+ {
179
+ "epoch": 5.24,
180
+ "learning_rate": 2.380780346820809e-05,
181
+ "loss": 0.0023,
182
+ "step": 14500
183
+ },
184
+ {
185
+ "epoch": 5.42,
186
+ "learning_rate": 2.290462427745665e-05,
187
+ "loss": 0.0004,
188
+ "step": 15000
189
+ },
190
+ {
191
+ "epoch": 5.6,
192
+ "learning_rate": 2.2001445086705202e-05,
193
+ "loss": 0.0008,
194
+ "step": 15500
195
+ },
196
+ {
197
+ "epoch": 5.78,
198
+ "learning_rate": 2.1098265895953757e-05,
199
+ "loss": 0.0001,
200
+ "step": 16000
201
+ },
202
+ {
203
+ "epoch": 5.96,
204
+ "learning_rate": 2.0195086705202312e-05,
205
+ "loss": 0.0011,
206
+ "step": 16500
207
+ },
208
+ {
209
+ "epoch": 6.14,
210
+ "learning_rate": 1.9291907514450868e-05,
211
+ "loss": 0.0015,
212
+ "step": 17000
213
+ },
214
+ {
215
+ "epoch": 6.32,
216
+ "learning_rate": 1.8388728323699423e-05,
217
+ "loss": 0.0004,
218
+ "step": 17500
219
+ },
220
+ {
221
+ "epoch": 6.5,
222
+ "learning_rate": 1.748554913294798e-05,
223
+ "loss": 0.0003,
224
+ "step": 18000
225
+ },
226
+ {
227
+ "epoch": 6.68,
228
+ "learning_rate": 1.6582369942196534e-05,
229
+ "loss": 0.0,
230
+ "step": 18500
231
+ },
232
+ {
233
+ "epoch": 6.86,
234
+ "learning_rate": 1.567919075144509e-05,
235
+ "loss": 0.0002,
236
+ "step": 19000
237
+ },
238
+ {
239
+ "epoch": 7.04,
240
+ "learning_rate": 1.4776011560693643e-05,
241
+ "loss": 0.0005,
242
+ "step": 19500
243
+ },
244
+ {
245
+ "epoch": 7.23,
246
+ "learning_rate": 1.3872832369942197e-05,
247
+ "loss": 0.0001,
248
+ "step": 20000
249
  }
250
  ],
251
  "max_steps": 27680,
252
  "num_train_epochs": 10,
253
+ "total_flos": 3107617302960360.0,
254
  "trial_name": null,
255
  "trial_params": null
256
  }