SatyaSriVelpuri commited on
Commit
700436e
·
verified ·
1 Parent(s): c525c1e

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +314 -0
trainer_state.json ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.6835016835016834,
6
+ "eval_steps": 500,
7
+ "global_step": 1000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.04208754208754209,
14
+ "grad_norm": 44.5,
15
+ "learning_rate": 2.222222222222222e-06,
16
+ "loss": 9.708373413085937,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.08417508417508418,
21
+ "grad_norm": 20.5,
22
+ "learning_rate": 4.537037037037038e-06,
23
+ "loss": 7.022988891601562,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 0.12626262626262627,
28
+ "grad_norm": 13.375,
29
+ "learning_rate": 4.94212962962963e-06,
30
+ "loss": 4.4798516845703125,
31
+ "step": 75
32
+ },
33
+ {
34
+ "epoch": 0.16835016835016836,
35
+ "grad_norm": 35.75,
36
+ "learning_rate": 4.869791666666667e-06,
37
+ "loss": 3.579981384277344,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.21043771043771045,
42
+ "grad_norm": 33.0,
43
+ "learning_rate": 4.797453703703704e-06,
44
+ "loss": 2.9001919555664064,
45
+ "step": 125
46
+ },
47
+ {
48
+ "epoch": 0.25252525252525254,
49
+ "grad_norm": 38.5,
50
+ "learning_rate": 4.725115740740741e-06,
51
+ "loss": 2.722982177734375,
52
+ "step": 150
53
+ },
54
+ {
55
+ "epoch": 0.2946127946127946,
56
+ "grad_norm": 98.0,
57
+ "learning_rate": 4.652777777777779e-06,
58
+ "loss": 2.658799133300781,
59
+ "step": 175
60
+ },
61
+ {
62
+ "epoch": 0.3367003367003367,
63
+ "grad_norm": 37.5,
64
+ "learning_rate": 4.580439814814815e-06,
65
+ "loss": 2.5711898803710938,
66
+ "step": 200
67
+ },
68
+ {
69
+ "epoch": 0.3787878787878788,
70
+ "grad_norm": 148.0,
71
+ "learning_rate": 4.508101851851852e-06,
72
+ "loss": 2.5608544921875,
73
+ "step": 225
74
+ },
75
+ {
76
+ "epoch": 0.4208754208754209,
77
+ "grad_norm": 100.0,
78
+ "learning_rate": 4.435763888888889e-06,
79
+ "loss": 2.5246621704101564,
80
+ "step": 250
81
+ },
82
+ {
83
+ "epoch": 0.46296296296296297,
84
+ "grad_norm": 111.0,
85
+ "learning_rate": 4.363425925925927e-06,
86
+ "loss": 2.504690399169922,
87
+ "step": 275
88
+ },
89
+ {
90
+ "epoch": 0.5050505050505051,
91
+ "grad_norm": 74.5,
92
+ "learning_rate": 4.2910879629629634e-06,
93
+ "loss": 2.456527099609375,
94
+ "step": 300
95
+ },
96
+ {
97
+ "epoch": 0.5471380471380471,
98
+ "grad_norm": 84.0,
99
+ "learning_rate": 4.21875e-06,
100
+ "loss": 2.476842498779297,
101
+ "step": 325
102
+ },
103
+ {
104
+ "epoch": 0.5892255892255892,
105
+ "grad_norm": 26.125,
106
+ "learning_rate": 4.1464120370370375e-06,
107
+ "loss": 2.447265930175781,
108
+ "step": 350
109
+ },
110
+ {
111
+ "epoch": 0.6313131313131313,
112
+ "grad_norm": 38.5,
113
+ "learning_rate": 4.074074074074074e-06,
114
+ "loss": 2.3707008361816406,
115
+ "step": 375
116
+ },
117
+ {
118
+ "epoch": 0.6734006734006734,
119
+ "grad_norm": 23.375,
120
+ "learning_rate": 4.001736111111112e-06,
121
+ "loss": 2.374063720703125,
122
+ "step": 400
123
+ },
124
+ {
125
+ "epoch": 0.7154882154882155,
126
+ "grad_norm": 21.875,
127
+ "learning_rate": 3.929398148148148e-06,
128
+ "loss": 2.3581019592285157,
129
+ "step": 425
130
+ },
131
+ {
132
+ "epoch": 0.7575757575757576,
133
+ "grad_norm": 21.25,
134
+ "learning_rate": 3.857060185185186e-06,
135
+ "loss": 2.3752615356445315,
136
+ "step": 450
137
+ },
138
+ {
139
+ "epoch": 0.7996632996632996,
140
+ "grad_norm": 48.0,
141
+ "learning_rate": 3.7847222222222224e-06,
142
+ "loss": 2.309081726074219,
143
+ "step": 475
144
+ },
145
+ {
146
+ "epoch": 0.8417508417508418,
147
+ "grad_norm": 17.375,
148
+ "learning_rate": 3.7123842592592594e-06,
149
+ "loss": 2.265882568359375,
150
+ "step": 500
151
+ },
152
+ {
153
+ "epoch": 0.8838383838383839,
154
+ "grad_norm": 38.5,
155
+ "learning_rate": 3.640046296296297e-06,
156
+ "loss": 2.245345458984375,
157
+ "step": 525
158
+ },
159
+ {
160
+ "epoch": 0.9259259259259259,
161
+ "grad_norm": 26.875,
162
+ "learning_rate": 3.5677083333333335e-06,
163
+ "loss": 2.2155528259277344,
164
+ "step": 550
165
+ },
166
+ {
167
+ "epoch": 0.968013468013468,
168
+ "grad_norm": 40.0,
169
+ "learning_rate": 3.4953703703703706e-06,
170
+ "loss": 2.2166319274902344,
171
+ "step": 575
172
+ },
173
+ {
174
+ "epoch": 1.0101010101010102,
175
+ "grad_norm": 141.0,
176
+ "learning_rate": 3.423032407407408e-06,
177
+ "loss": 2.2579159545898437,
178
+ "step": 600
179
+ },
180
+ {
181
+ "epoch": 1.0521885521885521,
182
+ "grad_norm": 19.75,
183
+ "learning_rate": 3.3506944444444447e-06,
184
+ "loss": 2.1792347717285154,
185
+ "step": 625
186
+ },
187
+ {
188
+ "epoch": 1.0942760942760943,
189
+ "grad_norm": 159.0,
190
+ "learning_rate": 3.2783564814814817e-06,
191
+ "loss": 2.1700653076171874,
192
+ "step": 650
193
+ },
194
+ {
195
+ "epoch": 1.1363636363636362,
196
+ "grad_norm": 26.125,
197
+ "learning_rate": 3.2060185185185188e-06,
198
+ "loss": 2.1729795837402346,
199
+ "step": 675
200
+ },
201
+ {
202
+ "epoch": 1.1784511784511784,
203
+ "grad_norm": 112.0,
204
+ "learning_rate": 3.1336805555555562e-06,
205
+ "loss": 2.1509564208984373,
206
+ "step": 700
207
+ },
208
+ {
209
+ "epoch": 1.2205387205387206,
210
+ "grad_norm": 18.5,
211
+ "learning_rate": 3.061342592592593e-06,
212
+ "loss": 2.1331724548339843,
213
+ "step": 725
214
+ },
215
+ {
216
+ "epoch": 1.2626262626262625,
217
+ "grad_norm": 72.0,
218
+ "learning_rate": 2.98900462962963e-06,
219
+ "loss": 2.0941813659667967,
220
+ "step": 750
221
+ },
222
+ {
223
+ "epoch": 1.3047138047138047,
224
+ "grad_norm": 56.0,
225
+ "learning_rate": 2.916666666666667e-06,
226
+ "loss": 2.122632598876953,
227
+ "step": 775
228
+ },
229
+ {
230
+ "epoch": 1.3468013468013469,
231
+ "grad_norm": 22.75,
232
+ "learning_rate": 2.8443287037037036e-06,
233
+ "loss": 2.120499572753906,
234
+ "step": 800
235
+ },
236
+ {
237
+ "epoch": 1.3888888888888888,
238
+ "grad_norm": 27.5,
239
+ "learning_rate": 2.771990740740741e-06,
240
+ "loss": 2.1208335876464846,
241
+ "step": 825
242
+ },
243
+ {
244
+ "epoch": 1.430976430976431,
245
+ "grad_norm": 19.125,
246
+ "learning_rate": 2.699652777777778e-06,
247
+ "loss": 2.1359043884277344,
248
+ "step": 850
249
+ },
250
+ {
251
+ "epoch": 1.4730639730639732,
252
+ "grad_norm": 18.5,
253
+ "learning_rate": 2.6273148148148148e-06,
254
+ "loss": 2.1154779052734374,
255
+ "step": 875
256
+ },
257
+ {
258
+ "epoch": 1.5151515151515151,
259
+ "grad_norm": 25.625,
260
+ "learning_rate": 2.554976851851852e-06,
261
+ "loss": 2.107982635498047,
262
+ "step": 900
263
+ },
264
+ {
265
+ "epoch": 1.557239057239057,
266
+ "grad_norm": 17.375,
267
+ "learning_rate": 2.4826388888888893e-06,
268
+ "loss": 2.086489410400391,
269
+ "step": 925
270
+ },
271
+ {
272
+ "epoch": 1.5993265993265995,
273
+ "grad_norm": 23.125,
274
+ "learning_rate": 2.410300925925926e-06,
275
+ "loss": 2.1238157653808596,
276
+ "step": 950
277
+ },
278
+ {
279
+ "epoch": 1.6414141414141414,
280
+ "grad_norm": 17.25,
281
+ "learning_rate": 2.3379629629629634e-06,
282
+ "loss": 2.0812089538574217,
283
+ "step": 975
284
+ },
285
+ {
286
+ "epoch": 1.6835016835016834,
287
+ "grad_norm": 19.5,
288
+ "learning_rate": 2.265625e-06,
289
+ "loss": 2.041307220458984,
290
+ "step": 1000
291
+ }
292
+ ],
293
+ "logging_steps": 25,
294
+ "max_steps": 1782,
295
+ "num_input_tokens_seen": 0,
296
+ "num_train_epochs": 3,
297
+ "save_steps": 500,
298
+ "stateful_callbacks": {
299
+ "TrainerControl": {
300
+ "args": {
301
+ "should_epoch_stop": false,
302
+ "should_evaluate": false,
303
+ "should_log": false,
304
+ "should_save": true,
305
+ "should_training_stop": false
306
+ },
307
+ "attributes": {}
308
+ }
309
+ },
310
+ "total_flos": 9.185883303947796e+17,
311
+ "train_batch_size": 8,
312
+ "trial_name": null,
313
+ "trial_params": null
314
+ }