manbeast3b commited on
Commit
a5b1385
·
verified ·
1 Parent(s): 13747c9

Delete trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -313
trainer_state.json DELETED
@@ -1,313 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.12358882947118241,
5
- "eval_steps": 62,
6
- "global_step": 40,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0030897207367795603,
13
- "grad_norm": 5275.14208984375,
14
- "learning_rate": 4.0000000000000004e-11,
15
- "loss": 11.2955,
16
- "step": 1
17
- },
18
- {
19
- "epoch": 0.006179441473559121,
20
- "grad_norm": 5153.88330078125,
21
- "learning_rate": 8.000000000000001e-11,
22
- "loss": 11.2979,
23
- "step": 2
24
- },
25
- {
26
- "epoch": 0.009269162210338681,
27
- "grad_norm": 5132.99609375,
28
- "learning_rate": 1.2e-10,
29
- "loss": 11.1883,
30
- "step": 3
31
- },
32
- {
33
- "epoch": 0.012358882947118241,
34
- "grad_norm": 5524.13623046875,
35
- "learning_rate": 1.6000000000000002e-10,
36
- "loss": 11.5185,
37
- "step": 4
38
- },
39
- {
40
- "epoch": 0.015448603683897801,
41
- "grad_norm": 5275.44970703125,
42
- "learning_rate": 2e-10,
43
- "loss": 11.202,
44
- "step": 5
45
- },
46
- {
47
- "epoch": 0.018538324420677363,
48
- "grad_norm": 4898.6171875,
49
- "learning_rate": 1.9984815164333162e-10,
50
- "loss": 11.0454,
51
- "step": 6
52
- },
53
- {
54
- "epoch": 0.02162804515745692,
55
- "grad_norm": 5124.095703125,
56
- "learning_rate": 1.9939306773179497e-10,
57
- "loss": 11.1537,
58
- "step": 7
59
- },
60
- {
61
- "epoch": 0.024717765894236483,
62
- "grad_norm": 5025.26025390625,
63
- "learning_rate": 1.9863613034027225e-10,
64
- "loss": 11.1345,
65
- "step": 8
66
- },
67
- {
68
- "epoch": 0.027807486631016044,
69
- "grad_norm": 4950.40576171875,
70
- "learning_rate": 1.9757963826274357e-10,
71
- "loss": 11.1104,
72
- "step": 9
73
- },
74
- {
75
- "epoch": 0.030897207367795602,
76
- "grad_norm": 4390.81787109375,
77
- "learning_rate": 1.9622680003092504e-10,
78
- "loss": 10.546,
79
- "step": 10
80
- },
81
- {
82
- "epoch": 0.03398692810457516,
83
- "grad_norm": 4808.67626953125,
84
- "learning_rate": 1.9458172417006345e-10,
85
- "loss": 10.8947,
86
- "step": 11
87
- },
88
- {
89
- "epoch": 0.037076648841354726,
90
- "grad_norm": 5642.1826171875,
91
- "learning_rate": 1.9264940672148017e-10,
92
- "loss": 11.7941,
93
- "step": 12
94
- },
95
- {
96
- "epoch": 0.040166369578134284,
97
- "grad_norm": 4708.16650390625,
98
- "learning_rate": 1.9043571606975777e-10,
99
- "loss": 10.8668,
100
- "step": 13
101
- },
102
- {
103
- "epoch": 0.04325609031491384,
104
- "grad_norm": 5043.15966796875,
105
- "learning_rate": 1.879473751206489e-10,
106
- "loss": 11.0351,
107
- "step": 14
108
- },
109
- {
110
- "epoch": 0.04634581105169341,
111
- "grad_norm": 5204.0048828125,
112
- "learning_rate": 1.851919408838327e-10,
113
- "loss": 11.2322,
114
- "step": 15
115
- },
116
- {
117
- "epoch": 0.049435531788472965,
118
- "grad_norm": 4756.47705078125,
119
- "learning_rate": 1.821777815225245e-10,
120
- "loss": 10.8011,
121
- "step": 16
122
- },
123
- {
124
- "epoch": 0.052525252525252523,
125
- "grad_norm": 4809.21533203125,
126
- "learning_rate": 1.7891405093963936e-10,
127
- "loss": 10.8063,
128
- "step": 17
129
- },
130
- {
131
- "epoch": 0.05561497326203209,
132
- "grad_norm": 5400.25927734375,
133
- "learning_rate": 1.7541066097768963e-10,
134
- "loss": 11.4944,
135
- "step": 18
136
- },
137
- {
138
- "epoch": 0.05870469399881165,
139
- "grad_norm": 5103.818359375,
140
- "learning_rate": 1.7167825131684515e-10,
141
- "loss": 11.2059,
142
- "step": 19
143
- },
144
- {
145
- "epoch": 0.061794414735591205,
146
- "grad_norm": 4988.18798828125,
147
- "learning_rate": 1.6772815716257412e-10,
148
- "loss": 11.0932,
149
- "step": 20
150
- },
151
- {
152
- "epoch": 0.06488413547237076,
153
- "grad_norm": 5250.4130859375,
154
- "learning_rate": 1.6357237482099684e-10,
155
- "loss": 11.3196,
156
- "step": 21
157
- },
158
- {
159
- "epoch": 0.06797385620915032,
160
- "grad_norm": 5046.6142578125,
161
- "learning_rate": 1.59223525266498e-10,
162
- "loss": 11.1777,
163
- "step": 22
164
- },
165
- {
166
- "epoch": 0.0710635769459299,
167
- "grad_norm": 4959.35400390625,
168
- "learning_rate": 1.5469481581224272e-10,
169
- "loss": 11.2175,
170
- "step": 23
171
- },
172
- {
173
- "epoch": 0.07415329768270945,
174
- "grad_norm": 5114.306640625,
175
- "learning_rate": 1.5e-10,
176
- "loss": 11.0837,
177
- "step": 24
178
- },
179
- {
180
- "epoch": 0.07724301841948901,
181
- "grad_norm": 4886.6083984375,
182
- "learning_rate": 1.4515333583108894e-10,
183
- "loss": 10.9479,
184
- "step": 25
185
- },
186
- {
187
- "epoch": 0.08033273915626857,
188
- "grad_norm": 5151.87548828125,
189
- "learning_rate": 1.4016954246529695e-10,
190
- "loss": 11.1946,
191
- "step": 26
192
- },
193
- {
194
- "epoch": 0.08342245989304813,
195
- "grad_norm": 4926.22314453125,
196
- "learning_rate": 1.3506375551927545e-10,
197
- "loss": 11.1436,
198
- "step": 27
199
- },
200
- {
201
- "epoch": 0.08651218062982768,
202
- "grad_norm": 5186.583984375,
203
- "learning_rate": 1.2985148110016947e-10,
204
- "loss": 11.3389,
205
- "step": 28
206
- },
207
- {
208
- "epoch": 0.08960190136660724,
209
- "grad_norm": 4836.59130859375,
210
- "learning_rate": 1.2454854871407994e-10,
211
- "loss": 11.131,
212
- "step": 29
213
- },
214
- {
215
- "epoch": 0.09269162210338681,
216
- "grad_norm": 5271.8818359375,
217
- "learning_rate": 1.1917106319237384e-10,
218
- "loss": 11.4785,
219
- "step": 30
220
- },
221
- {
222
- "epoch": 0.09578134284016637,
223
- "grad_norm": 4995.57373046875,
224
- "learning_rate": 1.1373535578184083e-10,
225
- "loss": 11.1493,
226
- "step": 31
227
- },
228
- {
229
- "epoch": 0.09887106357694593,
230
- "grad_norm": 4872.11376953125,
231
- "learning_rate": 1.0825793454723326e-10,
232
- "loss": 10.97,
233
- "step": 32
234
- },
235
- {
236
- "epoch": 0.10196078431372549,
237
- "grad_norm": 5491.39013671875,
238
- "learning_rate": 1.0275543423681622e-10,
239
- "loss": 11.5456,
240
- "step": 33
241
- },
242
- {
243
- "epoch": 0.10505050505050505,
244
- "grad_norm": 5149.216796875,
245
- "learning_rate": 9.724456576318382e-11,
246
- "loss": 11.257,
247
- "step": 34
248
- },
249
- {
250
- "epoch": 0.1081402257872846,
251
- "grad_norm": 5288.52294921875,
252
- "learning_rate": 9.174206545276678e-11,
253
- "loss": 11.4861,
254
- "step": 35
255
- },
256
- {
257
- "epoch": 0.11122994652406418,
258
- "grad_norm": 5499.283203125,
259
- "learning_rate": 8.626464421815918e-11,
260
- "loss": 11.4375,
261
- "step": 36
262
- },
263
- {
264
- "epoch": 0.11431966726084374,
265
- "grad_norm": 4957.93310546875,
266
- "learning_rate": 8.082893680762619e-11,
267
- "loss": 11.0605,
268
- "step": 37
269
- },
270
- {
271
- "epoch": 0.1174093879976233,
272
- "grad_norm": 4736.22705078125,
273
- "learning_rate": 7.54514512859201e-11,
274
- "loss": 10.8943,
275
- "step": 38
276
- },
277
- {
278
- "epoch": 0.12049910873440285,
279
- "grad_norm": 5041.0615234375,
280
- "learning_rate": 7.014851889983058e-11,
281
- "loss": 11.2565,
282
- "step": 39
283
- },
284
- {
285
- "epoch": 0.12358882947118241,
286
- "grad_norm": 5379.4970703125,
287
- "learning_rate": 6.493624448072457e-11,
288
- "loss": 11.4698,
289
- "step": 40
290
- }
291
- ],
292
- "logging_steps": 1,
293
- "max_steps": 62,
294
- "num_input_tokens_seen": 0,
295
- "num_train_epochs": 1,
296
- "save_steps": 10,
297
- "stateful_callbacks": {
298
- "TrainerControl": {
299
- "args": {
300
- "should_epoch_stop": false,
301
- "should_evaluate": false,
302
- "should_log": false,
303
- "should_save": true,
304
- "should_training_stop": false
305
- },
306
- "attributes": {}
307
- }
308
- },
309
- "total_flos": 6.637517228998656e+16,
310
- "train_batch_size": 2,
311
- "trial_name": null,
312
- "trial_params": null
313
- }