Tnt3o5 commited on
Commit
167ed37
·
verified ·
1 Parent(s): 7ef4397

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -342
trainer_state.json DELETED
@@ -1,342 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 1.3249907986750091,
6
- "eval_steps": 500,
7
- "global_step": 3600,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.0368052999631947,
14
- "grad_norm": 6.317627906799316,
15
- "learning_rate": 3.991842559741558e-05,
16
- "loss": 1.3232,
17
- "step": 100
18
- },
19
- {
20
- "epoch": 0.0736105999263894,
21
- "grad_norm": 6.4019927978515625,
22
- "learning_rate": 3.9770967337294366e-05,
23
- "loss": 1.1983,
24
- "step": 200
25
- },
26
- {
27
- "epoch": 0.1104158998895841,
28
- "grad_norm": 5.58203649520874,
29
- "learning_rate": 3.962350907717314e-05,
30
- "loss": 1.1226,
31
- "step": 300
32
- },
33
- {
34
- "epoch": 0.1472211998527788,
35
- "grad_norm": 4.505954742431641,
36
- "learning_rate": 3.947605081705192e-05,
37
- "loss": 0.7575,
38
- "step": 400
39
- },
40
- {
41
- "epoch": 0.1840264998159735,
42
- "grad_norm": 5.4588165283203125,
43
- "learning_rate": 3.93285925569307e-05,
44
- "loss": 0.684,
45
- "step": 500
46
- },
47
- {
48
- "epoch": 0.1840264998159735,
49
- "eval_loss": 2.1012208461761475,
50
- "eval_runtime": 663.3742,
51
- "eval_samples_per_second": 8.193,
52
- "eval_steps_per_second": 1.025,
53
- "step": 500
54
- },
55
- {
56
- "epoch": 0.2208317997791682,
57
- "grad_norm": 4.757272720336914,
58
- "learning_rate": 3.918113429680948e-05,
59
- "loss": 0.6552,
60
- "step": 600
61
- },
62
- {
63
- "epoch": 0.2576370997423629,
64
- "grad_norm": 4.794644355773926,
65
- "learning_rate": 3.903367603668826e-05,
66
- "loss": 0.6429,
67
- "step": 700
68
- },
69
- {
70
- "epoch": 0.2944423997055576,
71
- "grad_norm": 5.151235103607178,
72
- "learning_rate": 3.888621777656704e-05,
73
- "loss": 0.6567,
74
- "step": 800
75
- },
76
- {
77
- "epoch": 0.3312476996687523,
78
- "grad_norm": 5.006649971008301,
79
- "learning_rate": 3.8738759516445824e-05,
80
- "loss": 0.6278,
81
- "step": 900
82
- },
83
- {
84
- "epoch": 0.368052999631947,
85
- "grad_norm": 6.711447715759277,
86
- "learning_rate": 3.85913012563246e-05,
87
- "loss": 0.637,
88
- "step": 1000
89
- },
90
- {
91
- "epoch": 0.368052999631947,
92
- "eval_loss": 2.1562345027923584,
93
- "eval_runtime": 663.0039,
94
- "eval_samples_per_second": 8.198,
95
- "eval_steps_per_second": 1.026,
96
- "step": 1000
97
- },
98
- {
99
- "epoch": 0.4048582995951417,
100
- "grad_norm": 5.702431678771973,
101
- "learning_rate": 3.844384299620338e-05,
102
- "loss": 0.5867,
103
- "step": 1100
104
- },
105
- {
106
- "epoch": 0.4416635995583364,
107
- "grad_norm": 4.582945346832275,
108
- "learning_rate": 3.829638473608216e-05,
109
- "loss": 0.6156,
110
- "step": 1200
111
- },
112
- {
113
- "epoch": 0.4784688995215311,
114
- "grad_norm": 4.968910217285156,
115
- "learning_rate": 3.814892647596094e-05,
116
- "loss": 0.6213,
117
- "step": 1300
118
- },
119
- {
120
- "epoch": 0.5152741994847257,
121
- "grad_norm": 5.8628950119018555,
122
- "learning_rate": 3.800146821583972e-05,
123
- "loss": 0.6036,
124
- "step": 1400
125
- },
126
- {
127
- "epoch": 0.5520794994479205,
128
- "grad_norm": 4.60435676574707,
129
- "learning_rate": 3.7854009955718494e-05,
130
- "loss": 0.5947,
131
- "step": 1500
132
- },
133
- {
134
- "epoch": 0.5520794994479205,
135
- "eval_loss": 2.2223360538482666,
136
- "eval_runtime": 662.8599,
137
- "eval_samples_per_second": 8.199,
138
- "eval_steps_per_second": 1.026,
139
- "step": 1500
140
- },
141
- {
142
- "epoch": 0.5888847994111152,
143
- "grad_norm": 4.219241142272949,
144
- "learning_rate": 3.7706551695597275e-05,
145
- "loss": 0.61,
146
- "step": 1600
147
- },
148
- {
149
- "epoch": 0.6256900993743099,
150
- "grad_norm": 4.9983344078063965,
151
- "learning_rate": 3.7559093435476055e-05,
152
- "loss": 0.5801,
153
- "step": 1700
154
- },
155
- {
156
- "epoch": 0.6624953993375046,
157
- "grad_norm": 5.795677185058594,
158
- "learning_rate": 3.7411635175354836e-05,
159
- "loss": 0.6016,
160
- "step": 1800
161
- },
162
- {
163
- "epoch": 0.6993006993006993,
164
- "grad_norm": 4.981507778167725,
165
- "learning_rate": 3.726417691523362e-05,
166
- "loss": 0.5839,
167
- "step": 1900
168
- },
169
- {
170
- "epoch": 0.736105999263894,
171
- "grad_norm": 5.115480899810791,
172
- "learning_rate": 3.71167186551124e-05,
173
- "loss": 0.5946,
174
- "step": 2000
175
- },
176
- {
177
- "epoch": 0.736105999263894,
178
- "eval_loss": 2.1914401054382324,
179
- "eval_runtime": 663.2825,
180
- "eval_samples_per_second": 8.194,
181
- "eval_steps_per_second": 1.025,
182
- "step": 2000
183
- },
184
- {
185
- "epoch": 0.7729112992270887,
186
- "grad_norm": 4.6113176345825195,
187
- "learning_rate": 3.696926039499117e-05,
188
- "loss": 0.5763,
189
- "step": 2100
190
- },
191
- {
192
- "epoch": 0.8097165991902834,
193
- "grad_norm": 4.699350833892822,
194
- "learning_rate": 3.682180213486995e-05,
195
- "loss": 0.6007,
196
- "step": 2200
197
- },
198
- {
199
- "epoch": 0.8465218991534781,
200
- "grad_norm": 4.8883233070373535,
201
- "learning_rate": 3.667434387474873e-05,
202
- "loss": 0.5741,
203
- "step": 2300
204
- },
205
- {
206
- "epoch": 0.8833271991166728,
207
- "grad_norm": 5.460277557373047,
208
- "learning_rate": 3.652688561462751e-05,
209
- "loss": 0.5596,
210
- "step": 2400
211
- },
212
- {
213
- "epoch": 0.9201324990798675,
214
- "grad_norm": 4.431008338928223,
215
- "learning_rate": 3.6379427354506294e-05,
216
- "loss": 0.5831,
217
- "step": 2500
218
- },
219
- {
220
- "epoch": 0.9201324990798675,
221
- "eval_loss": 2.2241196632385254,
222
- "eval_runtime": 662.9801,
223
- "eval_samples_per_second": 8.198,
224
- "eval_steps_per_second": 1.026,
225
- "step": 2500
226
- },
227
- {
228
- "epoch": 0.9569377990430622,
229
- "grad_norm": 4.917581081390381,
230
- "learning_rate": 3.6231969094385074e-05,
231
- "loss": 0.5956,
232
- "step": 2600
233
- },
234
- {
235
- "epoch": 0.9937430990062569,
236
- "grad_norm": 5.325926780700684,
237
- "learning_rate": 3.6084510834263855e-05,
238
- "loss": 0.6783,
239
- "step": 2700
240
- },
241
- {
242
- "epoch": 1.0305483989694515,
243
- "grad_norm": 3.779780149459839,
244
- "learning_rate": 3.593705257414263e-05,
245
- "loss": 0.356,
246
- "step": 2800
247
- },
248
- {
249
- "epoch": 1.0673536989326462,
250
- "grad_norm": 7.602641582489014,
251
- "learning_rate": 3.5789594314021416e-05,
252
- "loss": 0.2066,
253
- "step": 2900
254
- },
255
- {
256
- "epoch": 1.104158998895841,
257
- "grad_norm": 5.734857082366943,
258
- "learning_rate": 3.564213605390019e-05,
259
- "loss": 0.2202,
260
- "step": 3000
261
- },
262
- {
263
- "epoch": 1.104158998895841,
264
- "eval_loss": 2.732856512069702,
265
- "eval_runtime": 663.1541,
266
- "eval_samples_per_second": 8.196,
267
- "eval_steps_per_second": 1.025,
268
- "step": 3000
269
- },
270
- {
271
- "epoch": 1.1409642988590356,
272
- "grad_norm": 5.713295936584473,
273
- "learning_rate": 3.549467779377897e-05,
274
- "loss": 0.6849,
275
- "step": 3100
276
- },
277
- {
278
- "epoch": 1.1777695988222303,
279
- "grad_norm": 8.02027702331543,
280
- "learning_rate": 3.534721953365775e-05,
281
- "loss": 0.6996,
282
- "step": 3200
283
- },
284
- {
285
- "epoch": 1.214574898785425,
286
- "grad_norm": 7.312982082366943,
287
- "learning_rate": 3.5199761273536525e-05,
288
- "loss": 0.6721,
289
- "step": 3300
290
- },
291
- {
292
- "epoch": 1.2513801987486197,
293
- "grad_norm": 6.537501811981201,
294
- "learning_rate": 3.505230301341531e-05,
295
- "loss": 0.6689,
296
- "step": 3400
297
- },
298
- {
299
- "epoch": 1.2881854987118144,
300
- "grad_norm": 6.13110876083374,
301
- "learning_rate": 3.4904844753294086e-05,
302
- "loss": 0.6943,
303
- "step": 3500
304
- },
305
- {
306
- "epoch": 1.2881854987118144,
307
- "eval_loss": 2.2627553939819336,
308
- "eval_runtime": 662.7328,
309
- "eval_samples_per_second": 8.201,
310
- "eval_steps_per_second": 1.026,
311
- "step": 3500
312
- },
313
- {
314
- "epoch": 1.3249907986750091,
315
- "grad_norm": 7.125514984130859,
316
- "learning_rate": 3.4757386493172874e-05,
317
- "loss": 0.6925,
318
- "step": 3600
319
- }
320
- ],
321
- "logging_steps": 100,
322
- "max_steps": 27170,
323
- "num_input_tokens_seen": 0,
324
- "num_train_epochs": 10,
325
- "save_steps": 100,
326
- "stateful_callbacks": {
327
- "TrainerControl": {
328
- "args": {
329
- "should_epoch_stop": false,
330
- "should_evaluate": false,
331
- "should_log": false,
332
- "should_save": true,
333
- "should_training_stop": false
334
- },
335
- "attributes": {}
336
- }
337
- },
338
- "total_flos": 1.001052086206464e+17,
339
- "train_batch_size": 8,
340
- "trial_name": null,
341
- "trial_params": null
342
- }