Nessii013 commited on
Commit
9466ccf
·
verified ·
1 Parent(s): 019872b

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -315
trainer_state.json DELETED
@@ -1,315 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
- "eval_steps": 500,
6
- "global_step": 391,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.02557544757033248,
13
- "grad_norm": 1.875,
14
- "learning_rate": 2.5e-05,
15
- "loss": 0.8604,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.05115089514066496,
20
- "grad_norm": 1.03125,
21
- "learning_rate": 5e-05,
22
- "loss": 0.6219,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.07672634271099744,
27
- "grad_norm": 0.80078125,
28
- "learning_rate": 7.500000000000001e-05,
29
- "loss": 0.5142,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.10230179028132992,
34
- "grad_norm": 0.71484375,
35
- "learning_rate": 0.0001,
36
- "loss": 0.4471,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.1278772378516624,
41
- "grad_norm": 0.6640625,
42
- "learning_rate": 9.715099715099715e-05,
43
- "loss": 0.4005,
44
- "step": 50
45
- },
46
- {
47
- "epoch": 0.1534526854219949,
48
- "grad_norm": 0.58984375,
49
- "learning_rate": 9.430199430199431e-05,
50
- "loss": 0.3637,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.17902813299232737,
55
- "grad_norm": 0.57421875,
56
- "learning_rate": 9.145299145299146e-05,
57
- "loss": 0.35,
58
- "step": 70
59
- },
60
- {
61
- "epoch": 0.20460358056265984,
62
- "grad_norm": 0.5078125,
63
- "learning_rate": 8.860398860398861e-05,
64
- "loss": 0.3432,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.23017902813299232,
69
- "grad_norm": 0.50390625,
70
- "learning_rate": 8.575498575498576e-05,
71
- "loss": 0.3338,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.2557544757033248,
76
- "grad_norm": 0.58203125,
77
- "learning_rate": 8.290598290598292e-05,
78
- "loss": 0.3331,
79
- "step": 100
80
- },
81
- {
82
- "epoch": 0.2813299232736573,
83
- "grad_norm": 0.55859375,
84
- "learning_rate": 8.005698005698006e-05,
85
- "loss": 0.3215,
86
- "step": 110
87
- },
88
- {
89
- "epoch": 0.3069053708439898,
90
- "grad_norm": 0.546875,
91
- "learning_rate": 7.720797720797721e-05,
92
- "loss": 0.3185,
93
- "step": 120
94
- },
95
- {
96
- "epoch": 0.33248081841432225,
97
- "grad_norm": 0.51953125,
98
- "learning_rate": 7.435897435897436e-05,
99
- "loss": 0.3186,
100
- "step": 130
101
- },
102
- {
103
- "epoch": 0.35805626598465473,
104
- "grad_norm": 0.46484375,
105
- "learning_rate": 7.150997150997152e-05,
106
- "loss": 0.3156,
107
- "step": 140
108
- },
109
- {
110
- "epoch": 0.3836317135549872,
111
- "grad_norm": 0.5859375,
112
- "learning_rate": 6.866096866096867e-05,
113
- "loss": 0.3106,
114
- "step": 150
115
- },
116
- {
117
- "epoch": 0.4092071611253197,
118
- "grad_norm": 0.48046875,
119
- "learning_rate": 6.581196581196581e-05,
120
- "loss": 0.3146,
121
- "step": 160
122
- },
123
- {
124
- "epoch": 0.43478260869565216,
125
- "grad_norm": 0.53515625,
126
- "learning_rate": 6.296296296296296e-05,
127
- "loss": 0.3135,
128
- "step": 170
129
- },
130
- {
131
- "epoch": 0.46035805626598464,
132
- "grad_norm": 0.490234375,
133
- "learning_rate": 6.011396011396012e-05,
134
- "loss": 0.3059,
135
- "step": 180
136
- },
137
- {
138
- "epoch": 0.4859335038363171,
139
- "grad_norm": 0.609375,
140
- "learning_rate": 5.726495726495726e-05,
141
- "loss": 0.3049,
142
- "step": 190
143
- },
144
- {
145
- "epoch": 0.5115089514066496,
146
- "grad_norm": 0.53125,
147
- "learning_rate": 5.441595441595442e-05,
148
- "loss": 0.3032,
149
- "step": 200
150
- },
151
- {
152
- "epoch": 0.5370843989769821,
153
- "grad_norm": 0.53125,
154
- "learning_rate": 5.156695156695157e-05,
155
- "loss": 0.2889,
156
- "step": 210
157
- },
158
- {
159
- "epoch": 0.5626598465473146,
160
- "grad_norm": 0.50390625,
161
- "learning_rate": 4.871794871794872e-05,
162
- "loss": 0.2971,
163
- "step": 220
164
- },
165
- {
166
- "epoch": 0.5882352941176471,
167
- "grad_norm": 0.546875,
168
- "learning_rate": 4.586894586894587e-05,
169
- "loss": 0.3088,
170
- "step": 230
171
- },
172
- {
173
- "epoch": 0.6138107416879796,
174
- "grad_norm": 0.57421875,
175
- "learning_rate": 4.301994301994302e-05,
176
- "loss": 0.2977,
177
- "step": 240
178
- },
179
- {
180
- "epoch": 0.639386189258312,
181
- "grad_norm": 0.578125,
182
- "learning_rate": 4.0170940170940174e-05,
183
- "loss": 0.2956,
184
- "step": 250
185
- },
186
- {
187
- "epoch": 0.6649616368286445,
188
- "grad_norm": 0.546875,
189
- "learning_rate": 3.732193732193732e-05,
190
- "loss": 0.2953,
191
- "step": 260
192
- },
193
- {
194
- "epoch": 0.690537084398977,
195
- "grad_norm": 0.49609375,
196
- "learning_rate": 3.4472934472934476e-05,
197
- "loss": 0.2955,
198
- "step": 270
199
- },
200
- {
201
- "epoch": 0.7161125319693095,
202
- "grad_norm": 0.49609375,
203
- "learning_rate": 3.162393162393162e-05,
204
- "loss": 0.2892,
205
- "step": 280
206
- },
207
- {
208
- "epoch": 0.7416879795396419,
209
- "grad_norm": 0.486328125,
210
- "learning_rate": 2.8774928774928778e-05,
211
- "loss": 0.281,
212
- "step": 290
213
- },
214
- {
215
- "epoch": 0.7672634271099744,
216
- "grad_norm": 0.4921875,
217
- "learning_rate": 2.5925925925925925e-05,
218
- "loss": 0.2911,
219
- "step": 300
220
- },
221
- {
222
- "epoch": 0.7928388746803069,
223
- "grad_norm": 0.61328125,
224
- "learning_rate": 2.307692307692308e-05,
225
- "loss": 0.2943,
226
- "step": 310
227
- },
228
- {
229
- "epoch": 0.8184143222506394,
230
- "grad_norm": 0.53125,
231
- "learning_rate": 2.022792022792023e-05,
232
- "loss": 0.293,
233
- "step": 320
234
- },
235
- {
236
- "epoch": 0.8439897698209718,
237
- "grad_norm": 0.4609375,
238
- "learning_rate": 1.737891737891738e-05,
239
- "loss": 0.2815,
240
- "step": 330
241
- },
242
- {
243
- "epoch": 0.8695652173913043,
244
- "grad_norm": 0.4609375,
245
- "learning_rate": 1.4529914529914531e-05,
246
- "loss": 0.2871,
247
- "step": 340
248
- },
249
- {
250
- "epoch": 0.8951406649616368,
251
- "grad_norm": 0.55078125,
252
- "learning_rate": 1.168091168091168e-05,
253
- "loss": 0.2832,
254
- "step": 350
255
- },
256
- {
257
- "epoch": 0.9207161125319693,
258
- "grad_norm": 0.55859375,
259
- "learning_rate": 8.831908831908831e-06,
260
- "loss": 0.289,
261
- "step": 360
262
- },
263
- {
264
- "epoch": 0.9462915601023018,
265
- "grad_norm": 0.53125,
266
- "learning_rate": 5.982905982905984e-06,
267
- "loss": 0.2936,
268
- "step": 370
269
- },
270
- {
271
- "epoch": 0.9718670076726342,
272
- "grad_norm": 0.484375,
273
- "learning_rate": 3.133903133903134e-06,
274
- "loss": 0.2828,
275
- "step": 380
276
- },
277
- {
278
- "epoch": 0.9974424552429667,
279
- "grad_norm": 0.53515625,
280
- "learning_rate": 2.8490028490028494e-07,
281
- "loss": 0.2916,
282
- "step": 390
283
- },
284
- {
285
- "epoch": 1.0,
286
- "step": 391,
287
- "total_flos": 1.382893920190464e+16,
288
- "train_loss": 0.3390924896273162,
289
- "train_runtime": 504.4341,
290
- "train_samples_per_second": 49.56,
291
- "train_steps_per_second": 0.775
292
- }
293
- ],
294
- "logging_steps": 10,
295
- "max_steps": 391,
296
- "num_input_tokens_seen": 0,
297
- "num_train_epochs": 1,
298
- "save_steps": 1000,
299
- "stateful_callbacks": {
300
- "TrainerControl": {
301
- "args": {
302
- "should_epoch_stop": false,
303
- "should_evaluate": false,
304
- "should_log": false,
305
- "should_save": true,
306
- "should_training_stop": true
307
- },
308
- "attributes": {}
309
- }
310
- },
311
- "total_flos": 1.382893920190464e+16,
312
- "train_batch_size": 4,
313
- "trial_name": null,
314
- "trial_params": null
315
- }