Tnt3o5 commited on
Commit
4599bfa
·
verified ·
1 Parent(s): 6a63c9a

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -304
trainer_state.json DELETED
@@ -1,304 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 2.3529411764705883,
6
- "eval_steps": 100,
7
- "global_step": 1800,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.13071895424836602,
14
- "grad_norm": 2.612921714782715,
15
- "learning_rate": 2.8515e-05,
16
- "loss": 1.5786,
17
- "step": 100
18
- },
19
- {
20
- "epoch": 0.13071895424836602,
21
- "eval_loss": 1.2468026876449585,
22
- "eval_runtime": 49.2181,
23
- "eval_samples_per_second": 55.224,
24
- "eval_steps_per_second": 1.727,
25
- "step": 100
26
- },
27
- {
28
- "epoch": 0.26143790849673204,
29
- "grad_norm": 2.842139482498169,
30
- "learning_rate": 2.7015e-05,
31
- "loss": 1.4427,
32
- "step": 200
33
- },
34
- {
35
- "epoch": 0.26143790849673204,
36
- "eval_loss": 1.200204849243164,
37
- "eval_runtime": 49.3689,
38
- "eval_samples_per_second": 55.055,
39
- "eval_steps_per_second": 1.722,
40
- "step": 200
41
- },
42
- {
43
- "epoch": 0.39215686274509803,
44
- "grad_norm": 2.886702060699463,
45
- "learning_rate": 2.5515000000000002e-05,
46
- "loss": 1.4714,
47
- "step": 300
48
- },
49
- {
50
- "epoch": 0.39215686274509803,
51
- "eval_loss": 1.173571228981018,
52
- "eval_runtime": 49.4135,
53
- "eval_samples_per_second": 55.005,
54
- "eval_steps_per_second": 1.72,
55
- "step": 300
56
- },
57
- {
58
- "epoch": 0.5228758169934641,
59
- "grad_norm": 2.9086837768554688,
60
- "learning_rate": 2.4015e-05,
61
- "loss": 1.3829,
62
- "step": 400
63
- },
64
- {
65
- "epoch": 0.5228758169934641,
66
- "eval_loss": 1.1620103120803833,
67
- "eval_runtime": 49.407,
68
- "eval_samples_per_second": 55.012,
69
- "eval_steps_per_second": 1.72,
70
- "step": 400
71
- },
72
- {
73
- "epoch": 0.6535947712418301,
74
- "grad_norm": 2.680055856704712,
75
- "learning_rate": 2.2515e-05,
76
- "loss": 1.4034,
77
- "step": 500
78
- },
79
- {
80
- "epoch": 0.6535947712418301,
81
- "eval_loss": 1.1487430334091187,
82
- "eval_runtime": 49.4341,
83
- "eval_samples_per_second": 54.982,
84
- "eval_steps_per_second": 1.719,
85
- "step": 500
86
- },
87
- {
88
- "epoch": 0.7843137254901961,
89
- "grad_norm": 2.7972700595855713,
90
- "learning_rate": 2.1015e-05,
91
- "loss": 1.3393,
92
- "step": 600
93
- },
94
- {
95
- "epoch": 0.7843137254901961,
96
- "eval_loss": 1.1399074792861938,
97
- "eval_runtime": 49.4463,
98
- "eval_samples_per_second": 54.969,
99
- "eval_steps_per_second": 1.719,
100
- "step": 600
101
- },
102
- {
103
- "epoch": 0.9150326797385621,
104
- "grad_norm": 2.4504594802856445,
105
- "learning_rate": 1.9515e-05,
106
- "loss": 1.3386,
107
- "step": 700
108
- },
109
- {
110
- "epoch": 0.9150326797385621,
111
- "eval_loss": 1.1321361064910889,
112
- "eval_runtime": 49.4141,
113
- "eval_samples_per_second": 55.004,
114
- "eval_steps_per_second": 1.72,
115
- "step": 700
116
- },
117
- {
118
- "epoch": 1.0457516339869282,
119
- "grad_norm": 2.388720989227295,
120
- "learning_rate": 1.8015000000000003e-05,
121
- "loss": 1.3639,
122
- "step": 800
123
- },
124
- {
125
- "epoch": 1.0457516339869282,
126
- "eval_loss": 1.1278705596923828,
127
- "eval_runtime": 49.4078,
128
- "eval_samples_per_second": 55.012,
129
- "eval_steps_per_second": 1.72,
130
- "step": 800
131
- },
132
- {
133
- "epoch": 1.1764705882352942,
134
- "grad_norm": 2.8180274963378906,
135
- "learning_rate": 1.6515e-05,
136
- "loss": 1.2569,
137
- "step": 900
138
- },
139
- {
140
- "epoch": 1.1764705882352942,
141
- "eval_loss": 1.12681245803833,
142
- "eval_runtime": 49.4505,
143
- "eval_samples_per_second": 54.964,
144
- "eval_steps_per_second": 1.719,
145
- "step": 900
146
- },
147
- {
148
- "epoch": 1.3071895424836601,
149
- "grad_norm": 2.588479518890381,
150
- "learning_rate": 1.5015e-05,
151
- "loss": 1.2463,
152
- "step": 1000
153
- },
154
- {
155
- "epoch": 1.3071895424836601,
156
- "eval_loss": 1.1237956285476685,
157
- "eval_runtime": 49.44,
158
- "eval_samples_per_second": 54.976,
159
- "eval_steps_per_second": 1.719,
160
- "step": 1000
161
- },
162
- {
163
- "epoch": 1.4379084967320261,
164
- "grad_norm": 2.7836344242095947,
165
- "learning_rate": 1.3515e-05,
166
- "loss": 1.1957,
167
- "step": 1100
168
- },
169
- {
170
- "epoch": 1.4379084967320261,
171
- "eval_loss": 1.1178600788116455,
172
- "eval_runtime": 49.4187,
173
- "eval_samples_per_second": 54.999,
174
- "eval_steps_per_second": 1.72,
175
- "step": 1100
176
- },
177
- {
178
- "epoch": 1.5686274509803921,
179
- "grad_norm": 3.272765874862671,
180
- "learning_rate": 1.2015000000000001e-05,
181
- "loss": 1.2142,
182
- "step": 1200
183
- },
184
- {
185
- "epoch": 1.5686274509803921,
186
- "eval_loss": 1.116894006729126,
187
- "eval_runtime": 49.4336,
188
- "eval_samples_per_second": 54.983,
189
- "eval_steps_per_second": 1.719,
190
- "step": 1200
191
- },
192
- {
193
- "epoch": 1.6993464052287581,
194
- "grad_norm": 2.6856162548065186,
195
- "learning_rate": 1.0515e-05,
196
- "loss": 1.2177,
197
- "step": 1300
198
- },
199
- {
200
- "epoch": 1.6993464052287581,
201
- "eval_loss": 1.1120049953460693,
202
- "eval_runtime": 49.446,
203
- "eval_samples_per_second": 54.969,
204
- "eval_steps_per_second": 1.719,
205
- "step": 1300
206
- },
207
- {
208
- "epoch": 1.8300653594771243,
209
- "grad_norm": 2.9578452110290527,
210
- "learning_rate": 9.015e-06,
211
- "loss": 1.2407,
212
- "step": 1400
213
- },
214
- {
215
- "epoch": 1.8300653594771243,
216
- "eval_loss": 1.1107261180877686,
217
- "eval_runtime": 49.4097,
218
- "eval_samples_per_second": 55.009,
219
- "eval_steps_per_second": 1.72,
220
- "step": 1400
221
- },
222
- {
223
- "epoch": 1.9607843137254903,
224
- "grad_norm": 2.797602653503418,
225
- "learning_rate": 7.515e-06,
226
- "loss": 1.2679,
227
- "step": 1500
228
- },
229
- {
230
- "epoch": 1.9607843137254903,
231
- "eval_loss": 1.1109682321548462,
232
- "eval_runtime": 49.422,
233
- "eval_samples_per_second": 54.996,
234
- "eval_steps_per_second": 1.72,
235
- "step": 1500
236
- },
237
- {
238
- "epoch": 2.0915032679738563,
239
- "grad_norm": 2.613773822784424,
240
- "learning_rate": 6.015000000000001e-06,
241
- "loss": 1.1959,
242
- "step": 1600
243
- },
244
- {
245
- "epoch": 2.0915032679738563,
246
- "eval_loss": 1.108609914779663,
247
- "eval_runtime": 49.0734,
248
- "eval_samples_per_second": 55.386,
249
- "eval_steps_per_second": 1.732,
250
- "step": 1600
251
- },
252
- {
253
- "epoch": 2.2222222222222223,
254
- "grad_norm": 2.685255765914917,
255
- "learning_rate": 4.515e-06,
256
- "loss": 1.1868,
257
- "step": 1700
258
- },
259
- {
260
- "epoch": 2.2222222222222223,
261
- "eval_loss": 1.1089801788330078,
262
- "eval_runtime": 49.4346,
263
- "eval_samples_per_second": 54.982,
264
- "eval_steps_per_second": 1.719,
265
- "step": 1700
266
- },
267
- {
268
- "epoch": 2.3529411764705883,
269
- "grad_norm": 2.5635337829589844,
270
- "learning_rate": 3.0150000000000004e-06,
271
- "loss": 1.1443,
272
- "step": 1800
273
- },
274
- {
275
- "epoch": 2.3529411764705883,
276
- "eval_loss": 1.1081539392471313,
277
- "eval_runtime": 49.3845,
278
- "eval_samples_per_second": 55.038,
279
- "eval_steps_per_second": 1.721,
280
- "step": 1800
281
- }
282
- ],
283
- "logging_steps": 100,
284
- "max_steps": 2000,
285
- "num_input_tokens_seen": 0,
286
- "num_train_epochs": 3,
287
- "save_steps": 100,
288
- "stateful_callbacks": {
289
- "TrainerControl": {
290
- "args": {
291
- "should_epoch_stop": false,
292
- "should_evaluate": false,
293
- "should_log": false,
294
- "should_save": true,
295
- "should_training_stop": false
296
- },
297
- "attributes": {}
298
- }
299
- },
300
- "total_flos": 3.504309077016576e+16,
301
- "train_batch_size": 32,
302
- "trial_name": null,
303
- "trial_params": null
304
- }