File size: 107,488 Bytes
877477d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
{"loss": 1.70305681, "token_acc": 0.69298079, "grad_norm": 4.9375, "learning_rate": 5e-08, "memory(GiB)": 30.37, "train_speed(iter/s)": 0.361434, "epoch": 4.637e-05, "global_step/max_steps": "1/43134", "percentage": "0.00%", "elapsed_time": "2s", "remaining_time": "1d 6h 47m 0s"}
{"loss": 2.40781256, "token_acc": 0.57170931, "grad_norm": 4.875, "learning_rate": 4.64e-06, "memory(GiB)": 63.1, "train_speed(iter/s)": 0.592803, "epoch": 0.00463669, "global_step/max_steps": "100/43134", "percentage": "0.23%", "elapsed_time": "2m 48s", "remaining_time": "20h 8m 20s"}
{"loss": 2.21514603, "token_acc": 0.59194125, "grad_norm": 4.03125, "learning_rate": 9.27e-06, "memory(GiB)": 75.43, "train_speed(iter/s)": 0.592983, "epoch": 0.00927337, "global_step/max_steps": "200/43134", "percentage": "0.46%", "elapsed_time": "5m 37s", "remaining_time": "20h 5m 58s"}
{"loss": 1.90964798, "token_acc": 0.63171472, "grad_norm": 4.21875, "learning_rate": 1.391e-05, "memory(GiB)": 75.43, "train_speed(iter/s)": 0.592888, "epoch": 0.01391006, "global_step/max_steps": "300/43134", "percentage": "0.70%", "elapsed_time": "8m 25s", "remaining_time": "20h 3m 37s"}
{"loss": 1.66375946, "token_acc": 0.65747111, "grad_norm": 4.40625, "learning_rate": 1.854e-05, "memory(GiB)": 75.43, "train_speed(iter/s)": 0.592944, "epoch": 0.01854675, "global_step/max_steps": "400/43134", "percentage": "0.93%", "elapsed_time": "11m 14s", "remaining_time": "20h 0m 47s"}
{"loss": 1.46813171, "token_acc": 0.68721029, "grad_norm": 4.1875, "learning_rate": 2.318e-05, "memory(GiB)": 75.43, "train_speed(iter/s)": 0.594145, "epoch": 0.02318343, "global_step/max_steps": "500/43134", "percentage": "1.16%", "elapsed_time": "14m 1s", "remaining_time": "19h 55m 38s"}
{"loss": 1.39030716, "token_acc": 0.69330558, "grad_norm": 3.796875, "learning_rate": 2.782e-05, "memory(GiB)": 75.43, "train_speed(iter/s)": 0.59521, "epoch": 0.02782012, "global_step/max_steps": "600/43134", "percentage": "1.39%", "elapsed_time": "16m 47s", "remaining_time": "19h 50m 45s"}
{"loss": 1.28300049, "token_acc": 0.71258477, "grad_norm": 3.8125, "learning_rate": 3.245e-05, "memory(GiB)": 75.43, "train_speed(iter/s)": 0.59662, "epoch": 0.03245681, "global_step/max_steps": "700/43134", "percentage": "1.62%", "elapsed_time": "19m 33s", "remaining_time": "19h 45m 11s"}
{"loss": 1.21706207, "token_acc": 0.7225112, "grad_norm": 4.46875, "learning_rate": 3.709e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.598358, "epoch": 0.03709349, "global_step/max_steps": "800/43134", "percentage": "1.85%", "elapsed_time": "22m 16s", "remaining_time": "19h 38m 59s"}
{"loss": 1.12478455, "token_acc": 0.7366296, "grad_norm": 3.0, "learning_rate": 4.172e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.59978, "epoch": 0.04173018, "global_step/max_steps": "900/43134", "percentage": "2.09%", "elapsed_time": "25m 0s", "remaining_time": "19h 33m 25s"}
{"loss": 1.11833473, "token_acc": 0.73576704, "grad_norm": 5.4375, "learning_rate": 4.636e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600727, "epoch": 0.04636687, "global_step/max_steps": "1000/43134", "percentage": "2.32%", "elapsed_time": "27m 44s", "remaining_time": "19h 28m 48s"}
{"loss": 1.08743713, "token_acc": 0.74106109, "grad_norm": 4.71875, "learning_rate": 5.1e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601775, "epoch": 0.05100355, "global_step/max_steps": "1100/43134", "percentage": "2.55%", "elapsed_time": "30m 27s", "remaining_time": "19h 24m 2s"}
{"loss": 1.04385033, "token_acc": 0.7477399, "grad_norm": 4.3125, "learning_rate": 5.563e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602817, "epoch": 0.05564024, "global_step/max_steps": "1200/43134", "percentage": "2.78%", "elapsed_time": "33m 10s", "remaining_time": "19h 19m 15s"}
{"loss": 1.03257431, "token_acc": 0.75005819, "grad_norm": 4.21875, "learning_rate": 6.027e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603542, "epoch": 0.06027693, "global_step/max_steps": "1300/43134", "percentage": "3.01%", "elapsed_time": "35m 53s", "remaining_time": "19h 15m 7s"}
{"loss": 0.98755402, "token_acc": 0.75940324, "grad_norm": 3.78125, "learning_rate": 6.49e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60401, "epoch": 0.06491361, "global_step/max_steps": "1400/43134", "percentage": "3.25%", "elapsed_time": "38m 37s", "remaining_time": "19h 11m 28s"}
{"loss": 1.01085098, "token_acc": 0.75283316, "grad_norm": 3.09375, "learning_rate": 6.954e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60375, "epoch": 0.0695503, "global_step/max_steps": "1500/43134", "percentage": "3.48%", "elapsed_time": "41m 24s", "remaining_time": "19h 9m 13s"}
{"loss": 1.03658607, "token_acc": 0.74412432, "grad_norm": 2.875, "learning_rate": 7.418e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603922, "epoch": 0.07418699, "global_step/max_steps": "1600/43134", "percentage": "3.71%", "elapsed_time": "44m 9s", "remaining_time": "19h 6m 7s"}
{"loss": 1.01009842, "token_acc": 0.74942246, "grad_norm": 2.71875, "learning_rate": 7.881e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.604365, "epoch": 0.07882367, "global_step/max_steps": "1700/43134", "percentage": "3.94%", "elapsed_time": "46m 52s", "remaining_time": "19h 2m 32s"}
{"loss": 1.02152382, "token_acc": 0.7474721, "grad_norm": 3.59375, "learning_rate": 8.345e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.604914, "epoch": 0.08346036, "global_step/max_steps": "1800/43134", "percentage": "4.17%", "elapsed_time": "49m 35s", "remaining_time": "18h 58m 45s"}
{"loss": 0.96835106, "token_acc": 0.76201064, "grad_norm": 2.359375, "learning_rate": 8.809e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.604621, "epoch": 0.08809705, "global_step/max_steps": "1900/43134", "percentage": "4.40%", "elapsed_time": "52m 22s", "remaining_time": "18h 56m 33s"}
{"loss": 0.99693657, "token_acc": 0.75189409, "grad_norm": 2.984375, "learning_rate": 9.272e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.605563, "epoch": 0.09273373, "global_step/max_steps": "2000/43134", "percentage": "4.64%", "elapsed_time": "55m 2s", "remaining_time": "18h 52m 2s"}
{"loss": 0.96875748, "token_acc": 0.75781623, "grad_norm": 3.109375, "learning_rate": 9.736e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.605642, "epoch": 0.09737042, "global_step/max_steps": "2100/43134", "percentage": "4.87%", "elapsed_time": "57m 47s", "remaining_time": "18h 49m 8s"}
{"loss": 1.01544777, "token_acc": 0.74925344, "grad_norm": 2.96875, "learning_rate": 0.0001, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.606223, "epoch": 0.10200711, "global_step/max_steps": "2200/43134", "percentage": "5.10%", "elapsed_time": "1h 0m 28s", "remaining_time": "18h 45m 18s"}
{"loss": 0.99702782, "token_acc": 0.7554657, "grad_norm": 2.234375, "learning_rate": 0.0001, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.6065, "epoch": 0.10664379, "global_step/max_steps": "2300/43134", "percentage": "5.33%", "elapsed_time": "1h 3m 12s", "remaining_time": "18h 42m 3s"}
{"loss": 0.97473412, "token_acc": 0.75668982, "grad_norm": 2.5625, "learning_rate": 9.999e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.606776, "epoch": 0.11128048, "global_step/max_steps": "2400/43134", "percentage": "5.56%", "elapsed_time": "1h 5m 55s", "remaining_time": "18h 38m 48s"}
{"loss": 0.95360138, "token_acc": 0.76059182, "grad_norm": 2.28125, "learning_rate": 9.998e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.606841, "epoch": 0.11591717, "global_step/max_steps": "2500/43134", "percentage": "5.80%", "elapsed_time": "1h 8m 39s", "remaining_time": "18h 35m 56s"}
{"loss": 0.94902199, "token_acc": 0.76204764, "grad_norm": 3.453125, "learning_rate": 9.997e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.607019, "epoch": 0.12055385, "global_step/max_steps": "2600/43134", "percentage": "6.03%", "elapsed_time": "1h 11m 23s", "remaining_time": "18h 32m 52s"}
{"loss": 0.96022026, "token_acc": 0.75774331, "grad_norm": 2.71875, "learning_rate": 9.996e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.607312, "epoch": 0.12519054, "global_step/max_steps": "2700/43134", "percentage": "6.26%", "elapsed_time": "1h 14m 5s", "remaining_time": "18h 29m 35s"}
{"loss": 0.97631668, "token_acc": 0.7567433, "grad_norm": 3.765625, "learning_rate": 9.994e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60751, "epoch": 0.12982723, "global_step/max_steps": "2800/43134", "percentage": "6.49%", "elapsed_time": "1h 16m 48s", "remaining_time": "18h 26m 29s"}
{"loss": 0.97035271, "token_acc": 0.75651442, "grad_norm": 3.390625, "learning_rate": 9.992e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.607675, "epoch": 0.13446391, "global_step/max_steps": "2900/43134", "percentage": "6.72%", "elapsed_time": "1h 19m 32s", "remaining_time": "18h 23m 26s"}
{"loss": 0.96049545, "token_acc": 0.75812567, "grad_norm": 1.9140625, "learning_rate": 9.99e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.607919, "epoch": 0.1391006, "global_step/max_steps": "3000/43134", "percentage": "6.96%", "elapsed_time": "1h 22m 14s", "remaining_time": "18h 20m 15s"}
{"loss": 0.95818245, "token_acc": 0.76187009, "grad_norm": 2.046875, "learning_rate": 9.987e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.608155, "epoch": 0.14373729, "global_step/max_steps": "3100/43134", "percentage": "7.19%", "elapsed_time": "1h 24m 57s", "remaining_time": "18h 17m 5s"}
{"loss": 0.95937515, "token_acc": 0.76011257, "grad_norm": 2.203125, "learning_rate": 9.984e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60814, "epoch": 0.14837397, "global_step/max_steps": "3200/43134", "percentage": "7.42%", "elapsed_time": "1h 27m 41s", "remaining_time": "18h 14m 23s"}
{"loss": 0.94899986, "token_acc": 0.76196518, "grad_norm": 2.71875, "learning_rate": 9.981e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.607968, "epoch": 0.15301066, "global_step/max_steps": "3300/43134", "percentage": "7.65%", "elapsed_time": "1h 30m 27s", "remaining_time": "18h 11m 57s"}
{"loss": 0.94022324, "token_acc": 0.7628479, "grad_norm": 1.953125, "learning_rate": 9.977e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.607859, "epoch": 0.15764735, "global_step/max_steps": "3400/43134", "percentage": "7.88%", "elapsed_time": "1h 33m 13s", "remaining_time": "18h 9m 24s"}
{"loss": 0.93512207, "token_acc": 0.76327397, "grad_norm": 2.59375, "learning_rate": 9.974e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.607692, "epoch": 0.16228403, "global_step/max_steps": "3500/43134", "percentage": "8.11%", "elapsed_time": "1h 35m 59s", "remaining_time": "18h 6m 58s"}
{"loss": 0.94038048, "token_acc": 0.76092882, "grad_norm": 1.8828125, "learning_rate": 9.969e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.607903, "epoch": 0.16692072, "global_step/max_steps": "3600/43134", "percentage": "8.35%", "elapsed_time": "1h 38m 41s", "remaining_time": "18h 3m 50s"}
{"loss": 0.94078835, "token_acc": 0.764041, "grad_norm": 2.0625, "learning_rate": 9.965e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.608253, "epoch": 0.17155741, "global_step/max_steps": "3700/43134", "percentage": "8.58%", "elapsed_time": "1h 41m 22s", "remaining_time": "18h 0m 29s"}
{"loss": 0.92240067, "token_acc": 0.76621977, "grad_norm": 2.65625, "learning_rate": 9.96e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.608304, "epoch": 0.17619409, "global_step/max_steps": "3800/43134", "percentage": "8.81%", "elapsed_time": "1h 44m 6s", "remaining_time": "17h 57m 39s"}
{"loss": 0.93344322, "token_acc": 0.76367142, "grad_norm": 2.296875, "learning_rate": 9.955e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.608717, "epoch": 0.18083078, "global_step/max_steps": "3900/43134", "percentage": "9.04%", "elapsed_time": "1h 46m 46s", "remaining_time": "17h 54m 11s"}
{"loss": 0.91280495, "token_acc": 0.76961809, "grad_norm": 2.28125, "learning_rate": 9.95e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.608805, "epoch": 0.18546746, "global_step/max_steps": "4000/43134", "percentage": "9.27%", "elapsed_time": "1h 49m 30s", "remaining_time": "17h 51m 17s"}
{"loss": 0.88127403, "token_acc": 0.77400661, "grad_norm": 2.1875, "learning_rate": 9.945e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600164, "epoch": 0.19010415, "global_step/max_steps": "4100/43134", "percentage": "9.51%", "elapsed_time": "1h 53m 51s", "remaining_time": "18h 3m 56s"}
{"loss": 0.89884361, "token_acc": 0.76853585, "grad_norm": 3.0625, "learning_rate": 9.939e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600416, "epoch": 0.19474084, "global_step/max_steps": "4200/43134", "percentage": "9.74%", "elapsed_time": "1h 56m 34s", "remaining_time": "18h 0m 42s"}
{"loss": 0.9313044, "token_acc": 0.76455987, "grad_norm": 2.78125, "learning_rate": 9.933e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60057, "epoch": 0.19937752, "global_step/max_steps": "4300/43134", "percentage": "9.97%", "elapsed_time": "1h 59m 19s", "remaining_time": "17h 57m 39s"}
{"loss": 0.91635086, "token_acc": 0.76676652, "grad_norm": 2.125, "learning_rate": 9.926e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600781, "epoch": 0.20401421, "global_step/max_steps": "4400/43134", "percentage": "10.20%", "elapsed_time": "2h 2m 3s", "remaining_time": "17h 54m 30s"}
{"loss": 0.90197624, "token_acc": 0.76940935, "grad_norm": 2.34375, "learning_rate": 9.92e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60084, "epoch": 0.2086509, "global_step/max_steps": "4500/43134", "percentage": "10.43%", "elapsed_time": "2h 4m 49s", "remaining_time": "17h 51m 38s"}
{"loss": 0.89389282, "token_acc": 0.77215099, "grad_norm": 2.828125, "learning_rate": 9.913e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600955, "epoch": 0.21328758, "global_step/max_steps": "4600/43134", "percentage": "10.66%", "elapsed_time": "2h 7m 34s", "remaining_time": "17h 48m 39s"}
{"loss": 0.92162666, "token_acc": 0.76461691, "grad_norm": 2.0625, "learning_rate": 9.905e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601193, "epoch": 0.21792427, "global_step/max_steps": "4700/43134", "percentage": "10.90%", "elapsed_time": "2h 10m 17s", "remaining_time": "17h 45m 27s"}
{"loss": 0.90684639, "token_acc": 0.7681336, "grad_norm": 2.765625, "learning_rate": 9.898e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601249, "epoch": 0.22256096, "global_step/max_steps": "4800/43134", "percentage": "11.13%", "elapsed_time": "2h 13m 3s", "remaining_time": "17h 42m 35s"}
{"loss": 0.90473015, "token_acc": 0.77060688, "grad_norm": 2.15625, "learning_rate": 9.89e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601339, "epoch": 0.22719764, "global_step/max_steps": "4900/43134", "percentage": "11.36%", "elapsed_time": "2h 15m 48s", "remaining_time": "17h 39m 39s"}
{"loss": 0.88898643, "token_acc": 0.77295339, "grad_norm": 1.796875, "learning_rate": 9.882e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601454, "epoch": 0.23183433, "global_step/max_steps": "5000/43134", "percentage": "11.59%", "elapsed_time": "2h 18m 32s", "remaining_time": "17h 36m 41s"}
{"loss": 0.90144432, "token_acc": 0.7702861, "grad_norm": 1.8125, "learning_rate": 9.873e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60169, "epoch": 0.23647102, "global_step/max_steps": "5100/43134", "percentage": "11.82%", "elapsed_time": "2h 21m 15s", "remaining_time": "17h 33m 30s"}
{"loss": 0.88884544, "token_acc": 0.77097209, "grad_norm": 2.671875, "learning_rate": 9.865e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601823, "epoch": 0.2411077, "global_step/max_steps": "5200/43134", "percentage": "12.06%", "elapsed_time": "2h 24m 0s", "remaining_time": "17h 30m 30s"}
{"loss": 0.92380615, "token_acc": 0.76578364, "grad_norm": 2.421875, "learning_rate": 9.856e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601986, "epoch": 0.24574439, "global_step/max_steps": "5300/43134", "percentage": "12.29%", "elapsed_time": "2h 26m 43s", "remaining_time": "17h 27m 27s"}
{"loss": 0.8783786, "token_acc": 0.77544085, "grad_norm": 2.484375, "learning_rate": 9.846e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602139, "epoch": 0.25038108, "global_step/max_steps": "5400/43134", "percentage": "12.52%", "elapsed_time": "2h 29m 27s", "remaining_time": "17h 24m 25s"}
{"loss": 0.90122681, "token_acc": 0.770996, "grad_norm": 3.015625, "learning_rate": 9.837e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602162, "epoch": 0.25501776, "global_step/max_steps": "5500/43134", "percentage": "12.75%", "elapsed_time": "2h 32m 13s", "remaining_time": "17h 21m 36s"}
{"loss": 0.87117256, "token_acc": 0.77772013, "grad_norm": 1.578125, "learning_rate": 9.827e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602204, "epoch": 0.25965445, "global_step/max_steps": "5600/43134", "percentage": "12.98%", "elapsed_time": "2h 34m 58s", "remaining_time": "17h 18m 46s"}
{"loss": 0.88681183, "token_acc": 0.77226293, "grad_norm": 2.203125, "learning_rate": 9.817e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602275, "epoch": 0.26429114, "global_step/max_steps": "5700/43134", "percentage": "13.21%", "elapsed_time": "2h 37m 43s", "remaining_time": "17h 15m 52s"}
{"loss": 0.8799234, "token_acc": 0.77457571, "grad_norm": 1.2734375, "learning_rate": 9.806e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602381, "epoch": 0.26892782, "global_step/max_steps": "5800/43134", "percentage": "13.45%", "elapsed_time": "2h 40m 28s", "remaining_time": "17h 12m 55s"}
{"loss": 0.86950027, "token_acc": 0.77643216, "grad_norm": 1.78125, "learning_rate": 9.796e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602579, "epoch": 0.27356451, "global_step/max_steps": "5900/43134", "percentage": "13.68%", "elapsed_time": "2h 43m 11s", "remaining_time": "17h 9m 49s"}
{"loss": 0.89389915, "token_acc": 0.76921509, "grad_norm": 1.5546875, "learning_rate": 9.785e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602776, "epoch": 0.2782012, "global_step/max_steps": "6000/43134", "percentage": "13.91%", "elapsed_time": "2h 45m 53s", "remaining_time": "17h 6m 43s"}
{"loss": 0.86930153, "token_acc": 0.77759636, "grad_norm": 2.03125, "learning_rate": 9.773e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602828, "epoch": 0.28283788, "global_step/max_steps": "6100/43134", "percentage": "14.14%", "elapsed_time": "2h 48m 38s", "remaining_time": "17h 3m 52s"}
{"loss": 0.8892514, "token_acc": 0.77378792, "grad_norm": 2.234375, "learning_rate": 9.762e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602949, "epoch": 0.28747457, "global_step/max_steps": "6200/43134", "percentage": "14.37%", "elapsed_time": "2h 51m 22s", "remaining_time": "17h 0m 54s"}
{"loss": 0.87281944, "token_acc": 0.77810627, "grad_norm": 1.9296875, "learning_rate": 9.75e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603181, "epoch": 0.29211126, "global_step/max_steps": "6300/43134", "percentage": "14.61%", "elapsed_time": "2h 54m 4s", "remaining_time": "16h 57m 44s"}
{"loss": 0.91376938, "token_acc": 0.76946089, "grad_norm": 3.625, "learning_rate": 9.738e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603402, "epoch": 0.29674794, "global_step/max_steps": "6400/43134", "percentage": "14.84%", "elapsed_time": "2h 56m 46s", "remaining_time": "16h 54m 36s"}
{"loss": 0.88021217, "token_acc": 0.77508311, "grad_norm": 2.234375, "learning_rate": 9.725e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603626, "epoch": 0.30138463, "global_step/max_steps": "6500/43134", "percentage": "15.07%", "elapsed_time": "2h 59m 28s", "remaining_time": "16h 51m 28s"}
{"loss": 0.8886441, "token_acc": 0.77179133, "grad_norm": 1.859375, "learning_rate": 9.713e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60376, "epoch": 0.30602132, "global_step/max_steps": "6600/43134", "percentage": "15.30%", "elapsed_time": "3h 2m 11s", "remaining_time": "16h 48m 29s"}
{"loss": 0.90819771, "token_acc": 0.7693904, "grad_norm": 1.6953125, "learning_rate": 9.7e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603851, "epoch": 0.310658, "global_step/max_steps": "6700/43134", "percentage": "15.53%", "elapsed_time": "3h 4m 55s", "remaining_time": "16h 45m 34s"}
{"loss": 0.87641556, "token_acc": 0.77558301, "grad_norm": 2.6875, "learning_rate": 9.687e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603984, "epoch": 0.31529469, "global_step/max_steps": "6800/43134", "percentage": "15.76%", "elapsed_time": "3h 7m 38s", "remaining_time": "16h 42m 36s"}
{"loss": 0.83785927, "token_acc": 0.78483004, "grad_norm": 1.7265625, "learning_rate": 9.673e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.604198, "epoch": 0.31993138, "global_step/max_steps": "6900/43134", "percentage": "16.00%", "elapsed_time": "3h 10m 19s", "remaining_time": "16h 39m 29s"}
{"loss": 0.88464417, "token_acc": 0.77580015, "grad_norm": 3.015625, "learning_rate": 9.659e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.604052, "epoch": 0.32456806, "global_step/max_steps": "7000/43134", "percentage": "16.23%", "elapsed_time": "3h 13m 8s", "remaining_time": "16h 36m 58s"}
{"loss": 0.86187279, "token_acc": 0.77800119, "grad_norm": 1.984375, "learning_rate": 9.645e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.604077, "epoch": 0.32920475, "global_step/max_steps": "7100/43134", "percentage": "16.46%", "elapsed_time": "3h 15m 53s", "remaining_time": "16h 34m 10s"}
{"loss": 0.86018356, "token_acc": 0.77774877, "grad_norm": 1.8125, "learning_rate": 9.631e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.604177, "epoch": 0.33384144, "global_step/max_steps": "7200/43134", "percentage": "16.69%", "elapsed_time": "3h 18m 36s", "remaining_time": "16h 31m 14s"}
{"loss": 0.86967415, "token_acc": 0.77430538, "grad_norm": 1.734375, "learning_rate": 9.616e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60419, "epoch": 0.33847812, "global_step/max_steps": "7300/43134", "percentage": "16.92%", "elapsed_time": "3h 21m 22s", "remaining_time": "16h 28m 28s"}
{"loss": 0.87444847, "token_acc": 0.77591232, "grad_norm": 1.9609375, "learning_rate": 9.601e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.6043, "epoch": 0.34311481, "global_step/max_steps": "7400/43134", "percentage": "17.16%", "elapsed_time": "3h 24m 5s", "remaining_time": "16h 25m 31s"}
{"loss": 0.84919785, "token_acc": 0.77791003, "grad_norm": 2.84375, "learning_rate": 9.586e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.604461, "epoch": 0.3477515, "global_step/max_steps": "7500/43134", "percentage": "17.39%", "elapsed_time": "3h 26m 47s", "remaining_time": "16h 22m 30s"}
{"loss": 0.85965591, "token_acc": 0.7800337, "grad_norm": 2.484375, "learning_rate": 9.571e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.604534, "epoch": 0.35238818, "global_step/max_steps": "7600/43134", "percentage": "17.62%", "elapsed_time": "3h 29m 31s", "remaining_time": "16h 19m 38s"}
{"loss": 0.8509285, "token_acc": 0.78146567, "grad_norm": 2.875, "learning_rate": 9.555e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.604481, "epoch": 0.35702487, "global_step/max_steps": "7700/43134", "percentage": "17.85%", "elapsed_time": "3h 32m 17s", "remaining_time": "16h 16m 57s"}
{"loss": 0.86479988, "token_acc": 0.77471605, "grad_norm": 1.6015625, "learning_rate": 9.539e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.604619, "epoch": 0.36166156, "global_step/max_steps": "7800/43134", "percentage": "18.08%", "elapsed_time": "3h 35m 0s", "remaining_time": "16h 13m 59s"}
{"loss": 0.85470345, "token_acc": 0.78059288, "grad_norm": 1.8828125, "learning_rate": 9.523e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.604595, "epoch": 0.36629824, "global_step/max_steps": "7900/43134", "percentage": "18.32%", "elapsed_time": "3h 37m 46s", "remaining_time": "16h 11m 16s"}
{"loss": 0.87300674, "token_acc": 0.77680631, "grad_norm": 1.640625, "learning_rate": 9.507e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.604366, "epoch": 0.37093493, "global_step/max_steps": "8000/43134", "percentage": "18.55%", "elapsed_time": "3h 40m 36s", "remaining_time": "16h 8m 52s"}
{"loss": 0.83662537, "token_acc": 0.78312909, "grad_norm": 2.21875, "learning_rate": 9.49e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.599886, "epoch": 0.37557162, "global_step/max_steps": "8100/43134", "percentage": "18.78%", "elapsed_time": "3h 45m 2s", "remaining_time": "16h 13m 20s"}
{"loss": 0.8401812, "token_acc": 0.78329562, "grad_norm": 2.140625, "learning_rate": 9.473e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600063, "epoch": 0.3802083, "global_step/max_steps": "8200/43134", "percentage": "19.01%", "elapsed_time": "3h 47m 45s", "remaining_time": "16h 10m 16s"}
{"loss": 0.84851234, "token_acc": 0.78100318, "grad_norm": 2.53125, "learning_rate": 9.456e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600187, "epoch": 0.38484499, "global_step/max_steps": "8300/43134", "percentage": "19.24%", "elapsed_time": "3h 50m 28s", "remaining_time": "16h 7m 17s"}
{"loss": 0.83170082, "token_acc": 0.78439251, "grad_norm": 2.109375, "learning_rate": 9.438e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600346, "epoch": 0.38948168, "global_step/max_steps": "8400/43134", "percentage": "19.47%", "elapsed_time": "3h 53m 11s", "remaining_time": "16h 4m 15s"}
{"loss": 0.86422783, "token_acc": 0.77697908, "grad_norm": 1.828125, "learning_rate": 9.42e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600313, "epoch": 0.39411836, "global_step/max_steps": "8500/43134", "percentage": "19.71%", "elapsed_time": "3h 55m 59s", "remaining_time": "16h 1m 32s"}
{"loss": 0.83783272, "token_acc": 0.78262072, "grad_norm": 2.0625, "learning_rate": 9.402e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60035, "epoch": 0.39875505, "global_step/max_steps": "8600/43134", "percentage": "19.94%", "elapsed_time": "3h 58m 44s", "remaining_time": "15h 58m 42s"}
{"loss": 0.88829857, "token_acc": 0.77437119, "grad_norm": 1.734375, "learning_rate": 9.384e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600511, "epoch": 0.40339174, "global_step/max_steps": "8700/43134", "percentage": "20.17%", "elapsed_time": "4h 1m 27s", "remaining_time": "15h 55m 40s"}
{"loss": 0.84674408, "token_acc": 0.78262271, "grad_norm": 1.7734375, "learning_rate": 9.365e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600596, "epoch": 0.40802842, "global_step/max_steps": "8800/43134", "percentage": "20.40%", "elapsed_time": "4h 4m 11s", "remaining_time": "15h 52m 45s"}
{"loss": 0.83167953, "token_acc": 0.78603807, "grad_norm": 1.8515625, "learning_rate": 9.347e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600731, "epoch": 0.41266511, "global_step/max_steps": "8900/43134", "percentage": "20.63%", "elapsed_time": "4h 6m 55s", "remaining_time": "15h 49m 46s"}
{"loss": 0.85422028, "token_acc": 0.78128141, "grad_norm": 2.078125, "learning_rate": 9.328e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600778, "epoch": 0.4173018, "global_step/max_steps": "9000/43134", "percentage": "20.87%", "elapsed_time": "4h 9m 40s", "remaining_time": "15h 46m 55s"}
{"loss": 0.83795662, "token_acc": 0.7835171, "grad_norm": 1.609375, "learning_rate": 9.308e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600743, "epoch": 0.42193848, "global_step/max_steps": "9100/43134", "percentage": "21.10%", "elapsed_time": "4h 12m 27s", "remaining_time": "15h 44m 12s"}
{"loss": 0.80908737, "token_acc": 0.7909767, "grad_norm": 1.6328125, "learning_rate": 9.289e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600842, "epoch": 0.42657517, "global_step/max_steps": "9200/43134", "percentage": "21.33%", "elapsed_time": "4h 15m 11s", "remaining_time": "15h 41m 16s"}
{"loss": 0.82845955, "token_acc": 0.78431486, "grad_norm": 1.6328125, "learning_rate": 9.269e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600987, "epoch": 0.43121186, "global_step/max_steps": "9300/43134", "percentage": "21.56%", "elapsed_time": "4h 17m 54s", "remaining_time": "15h 38m 16s"}
{"loss": 0.82603355, "token_acc": 0.7861017, "grad_norm": 1.671875, "learning_rate": 9.249e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60105, "epoch": 0.43584854, "global_step/max_steps": "9400/43134", "percentage": "21.79%", "elapsed_time": "4h 20m 39s", "remaining_time": "15h 35m 24s"}
{"loss": 0.82378151, "token_acc": 0.786054, "grad_norm": 2.609375, "learning_rate": 9.228e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601161, "epoch": 0.44048523, "global_step/max_steps": "9500/43134", "percentage": "22.02%", "elapsed_time": "4h 23m 22s", "remaining_time": "15h 32m 27s"}
{"loss": 0.83084793, "token_acc": 0.78551821, "grad_norm": 2.265625, "learning_rate": 9.208e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601287, "epoch": 0.44512192, "global_step/max_steps": "9600/43134", "percentage": "22.26%", "elapsed_time": "4h 26m 5s", "remaining_time": "15h 29m 29s"}
{"loss": 0.85569878, "token_acc": 0.78089508, "grad_norm": 1.6484375, "learning_rate": 9.187e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601399, "epoch": 0.4497586, "global_step/max_steps": "9700/43134", "percentage": "22.49%", "elapsed_time": "4h 28m 48s", "remaining_time": "15h 26m 32s"}
{"loss": 0.83468437, "token_acc": 0.78264225, "grad_norm": 1.6875, "learning_rate": 9.166e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601494, "epoch": 0.45439529, "global_step/max_steps": "9800/43134", "percentage": "22.72%", "elapsed_time": "4h 31m 32s", "remaining_time": "15h 23m 37s"}
{"loss": 0.83271774, "token_acc": 0.78291104, "grad_norm": 2.265625, "learning_rate": 9.145e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601595, "epoch": 0.45903198, "global_step/max_steps": "9900/43134", "percentage": "22.95%", "elapsed_time": "4h 34m 16s", "remaining_time": "15h 20m 42s"}
{"loss": 0.84802261, "token_acc": 0.78155766, "grad_norm": 2.671875, "learning_rate": 9.123e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60176, "epoch": 0.46366866, "global_step/max_steps": "10000/43134", "percentage": "23.18%", "elapsed_time": "4h 36m 57s", "remaining_time": "15h 17m 41s"}
{"loss": 0.83296875, "token_acc": 0.78166521, "grad_norm": 1.7421875, "learning_rate": 9.101e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601865, "epoch": 0.46830535, "global_step/max_steps": "10100/43134", "percentage": "23.42%", "elapsed_time": "4h 39m 40s", "remaining_time": "15h 14m 45s"}
{"loss": 0.83994598, "token_acc": 0.78364869, "grad_norm": 1.6015625, "learning_rate": 9.079e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602026, "epoch": 0.47294204, "global_step/max_steps": "10200/43134", "percentage": "23.65%", "elapsed_time": "4h 42m 22s", "remaining_time": "15h 11m 44s"}
{"loss": 0.84033173, "token_acc": 0.77913862, "grad_norm": 2.203125, "learning_rate": 9.057e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602101, "epoch": 0.47757872, "global_step/max_steps": "10300/43134", "percentage": "23.88%", "elapsed_time": "4h 45m 6s", "remaining_time": "15h 8m 51s"}
{"loss": 0.85349327, "token_acc": 0.78027704, "grad_norm": 1.703125, "learning_rate": 9.034e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602217, "epoch": 0.48221541, "global_step/max_steps": "10400/43134", "percentage": "24.11%", "elapsed_time": "4h 47m 49s", "remaining_time": "15h 5m 55s"}
{"loss": 0.83425529, "token_acc": 0.78382602, "grad_norm": 1.9140625, "learning_rate": 9.012e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602263, "epoch": 0.4868521, "global_step/max_steps": "10500/43134", "percentage": "24.34%", "elapsed_time": "4h 50m 34s", "remaining_time": "15h 3m 4s"}
{"loss": 0.8271167, "token_acc": 0.78267483, "grad_norm": 2.265625, "learning_rate": 8.989e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602304, "epoch": 0.49148878, "global_step/max_steps": "10600/43134", "percentage": "24.57%", "elapsed_time": "4h 53m 18s", "remaining_time": "15h 0m 15s"}
{"loss": 0.83736702, "token_acc": 0.78289321, "grad_norm": 2.015625, "learning_rate": 8.965e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602432, "epoch": 0.49612547, "global_step/max_steps": "10700/43134", "percentage": "24.81%", "elapsed_time": "4h 56m 1s", "remaining_time": "14h 57m 17s"}
{"loss": 0.83074944, "token_acc": 0.78623373, "grad_norm": 2.65625, "learning_rate": 8.942e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602458, "epoch": 0.50076216, "global_step/max_steps": "10800/43134", "percentage": "25.04%", "elapsed_time": "4h 58m 46s", "remaining_time": "14h 54m 29s"}
{"loss": 0.82065857, "token_acc": 0.78566266, "grad_norm": 1.796875, "learning_rate": 8.918e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602522, "epoch": 0.50539884, "global_step/max_steps": "10900/43134", "percentage": "25.27%", "elapsed_time": "5h 1m 30s", "remaining_time": "14h 51m 37s"}
{"loss": 0.81969635, "token_acc": 0.78700443, "grad_norm": 1.984375, "learning_rate": 8.894e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602642, "epoch": 0.51003553, "global_step/max_steps": "11000/43134", "percentage": "25.50%", "elapsed_time": "5h 4m 12s", "remaining_time": "14h 48m 41s"}
{"loss": 0.82801834, "token_acc": 0.78521232, "grad_norm": 2.953125, "learning_rate": 8.87e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602733, "epoch": 0.51467222, "global_step/max_steps": "11100/43134", "percentage": "25.73%", "elapsed_time": "5h 6m 55s", "remaining_time": "14h 45m 47s"}
{"loss": 0.80775673, "token_acc": 0.78602843, "grad_norm": 2.265625, "learning_rate": 8.846e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602771, "epoch": 0.5193089, "global_step/max_steps": "11200/43134", "percentage": "25.97%", "elapsed_time": "5h 9m 40s", "remaining_time": "14h 42m 58s"}
{"loss": 0.80457855, "token_acc": 0.79207415, "grad_norm": 1.6484375, "learning_rate": 8.821e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602861, "epoch": 0.52394559, "global_step/max_steps": "11300/43134", "percentage": "26.20%", "elapsed_time": "5h 12m 23s", "remaining_time": "14h 40m 4s"}
{"loss": 0.80528114, "token_acc": 0.78910797, "grad_norm": 1.4375, "learning_rate": 8.796e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602967, "epoch": 0.52858228, "global_step/max_steps": "11400/43134", "percentage": "26.43%", "elapsed_time": "5h 15m 6s", "remaining_time": "14h 37m 9s"}
{"loss": 0.82447235, "token_acc": 0.78753651, "grad_norm": 1.4921875, "learning_rate": 8.771e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603003, "epoch": 0.53321896, "global_step/max_steps": "11500/43134", "percentage": "26.66%", "elapsed_time": "5h 17m 50s", "remaining_time": "14h 34m 20s"}
{"loss": 0.82421928, "token_acc": 0.78478847, "grad_norm": 1.5546875, "learning_rate": 8.746e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603001, "epoch": 0.53785565, "global_step/max_steps": "11600/43134", "percentage": "26.89%", "elapsed_time": "5h 20m 36s", "remaining_time": "14h 31m 34s"}
{"loss": 0.79770607, "token_acc": 0.79001832, "grad_norm": 1.5, "learning_rate": 8.72e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603061, "epoch": 0.54249233, "global_step/max_steps": "11700/43134", "percentage": "27.12%", "elapsed_time": "5h 23m 20s", "remaining_time": "14h 28m 43s"}
{"loss": 0.83195732, "token_acc": 0.78248764, "grad_norm": 1.75, "learning_rate": 8.695e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603055, "epoch": 0.54712902, "global_step/max_steps": "11800/43134", "percentage": "27.36%", "elapsed_time": "5h 26m 6s", "remaining_time": "14h 25m 58s"}
{"loss": 0.7738533, "token_acc": 0.79481549, "grad_norm": 1.859375, "learning_rate": 8.669e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60318, "epoch": 0.55176571, "global_step/max_steps": "11900/43134", "percentage": "27.59%", "elapsed_time": "5h 28m 48s", "remaining_time": "14h 23m 1s"}
{"loss": 0.81530708, "token_acc": 0.78683046, "grad_norm": 1.2734375, "learning_rate": 8.643e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603308, "epoch": 0.55640239, "global_step/max_steps": "12000/43134", "percentage": "27.82%", "elapsed_time": "5h 31m 30s", "remaining_time": "14h 20m 4s"}
{"loss": 0.80773407, "token_acc": 0.78806757, "grad_norm": 1.5703125, "learning_rate": 8.616e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600572, "epoch": 0.56103908, "global_step/max_steps": "12100/43134", "percentage": "28.05%", "elapsed_time": "5h 35m 47s", "remaining_time": "14h 21m 13s"}
{"loss": 0.82588585, "token_acc": 0.78490123, "grad_norm": 1.5390625, "learning_rate": 8.59e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600646, "epoch": 0.56567577, "global_step/max_steps": "12200/43134", "percentage": "28.28%", "elapsed_time": "5h 38m 31s", "remaining_time": "14h 18m 20s"}
{"loss": 0.78970413, "token_acc": 0.79219146, "grad_norm": 1.8515625, "learning_rate": 8.563e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600685, "epoch": 0.57031245, "global_step/max_steps": "12300/43134", "percentage": "28.52%", "elapsed_time": "5h 41m 16s", "remaining_time": "14h 15m 30s"}
{"loss": 0.8045031, "token_acc": 0.78790333, "grad_norm": 1.5859375, "learning_rate": 8.536e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600802, "epoch": 0.57494914, "global_step/max_steps": "12400/43134", "percentage": "28.75%", "elapsed_time": "5h 43m 58s", "remaining_time": "14h 12m 34s"}
{"loss": 0.81390541, "token_acc": 0.78621168, "grad_norm": 1.8359375, "learning_rate": 8.509e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600805, "epoch": 0.57958583, "global_step/max_steps": "12500/43134", "percentage": "28.98%", "elapsed_time": "5h 46m 45s", "remaining_time": "14h 9m 47s"}
{"loss": 0.77846947, "token_acc": 0.79342394, "grad_norm": 1.390625, "learning_rate": 8.481e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600893, "epoch": 0.58422251, "global_step/max_steps": "12600/43134", "percentage": "29.21%", "elapsed_time": "5h 49m 28s", "remaining_time": "14h 6m 53s"}
{"loss": 0.80872391, "token_acc": 0.78864306, "grad_norm": 2.015625, "learning_rate": 8.454e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601008, "epoch": 0.5888592, "global_step/max_steps": "12700/43134", "percentage": "29.44%", "elapsed_time": "5h 52m 10s", "remaining_time": "14h 3m 57s"}
{"loss": 0.81915741, "token_acc": 0.78521705, "grad_norm": 1.828125, "learning_rate": 8.426e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601014, "epoch": 0.59349589, "global_step/max_steps": "12800/43134", "percentage": "29.67%", "elapsed_time": "5h 54m 57s", "remaining_time": "14h 1m 10s"}
{"loss": 0.80338135, "token_acc": 0.78852314, "grad_norm": 1.5546875, "learning_rate": 8.398e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601076, "epoch": 0.59813257, "global_step/max_steps": "12900/43134", "percentage": "29.91%", "elapsed_time": "5h 57m 41s", "remaining_time": "13h 58m 19s"}
{"loss": 0.78896217, "token_acc": 0.79085017, "grad_norm": 2.078125, "learning_rate": 8.37e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601157, "epoch": 0.60276926, "global_step/max_steps": "13000/43134", "percentage": "30.14%", "elapsed_time": "6h 0m 24s", "remaining_time": "13h 55m 26s"}
{"loss": 0.80992455, "token_acc": 0.78718045, "grad_norm": 1.796875, "learning_rate": 8.341e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60122, "epoch": 0.60740595, "global_step/max_steps": "13100/43134", "percentage": "30.37%", "elapsed_time": "6h 3m 8s", "remaining_time": "13h 52m 34s"}
{"loss": 0.79954536, "token_acc": 0.78963438, "grad_norm": 3.34375, "learning_rate": 8.313e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601281, "epoch": 0.61204263, "global_step/max_steps": "13200/43134", "percentage": "30.60%", "elapsed_time": "6h 5m 52s", "remaining_time": "13h 49m 43s"}
{"loss": 0.81707436, "token_acc": 0.78539706, "grad_norm": 2.796875, "learning_rate": 8.284e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601324, "epoch": 0.61667932, "global_step/max_steps": "13300/43134", "percentage": "30.83%", "elapsed_time": "6h 8m 37s", "remaining_time": "13h 46m 53s"}
{"loss": 0.77795685, "token_acc": 0.79385061, "grad_norm": 1.4609375, "learning_rate": 8.255e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601424, "epoch": 0.62131601, "global_step/max_steps": "13400/43134", "percentage": "31.07%", "elapsed_time": "6h 11m 20s", "remaining_time": "13h 43m 58s"}
{"loss": 0.8076413, "token_acc": 0.78768371, "grad_norm": 1.71875, "learning_rate": 8.226e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601511, "epoch": 0.62595269, "global_step/max_steps": "13500/43134", "percentage": "31.30%", "elapsed_time": "6h 14m 3s", "remaining_time": "13h 41m 5s"}
{"loss": 0.76674881, "token_acc": 0.79556177, "grad_norm": 2.03125, "learning_rate": 8.196e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601583, "epoch": 0.63058938, "global_step/max_steps": "13600/43134", "percentage": "31.53%", "elapsed_time": "6h 16m 46s", "remaining_time": "13h 38m 13s"}
{"loss": 0.8046331, "token_acc": 0.79110756, "grad_norm": 1.7890625, "learning_rate": 8.167e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601638, "epoch": 0.63522607, "global_step/max_steps": "13700/43134", "percentage": "31.76%", "elapsed_time": "6h 19m 30s", "remaining_time": "13h 35m 22s"}
{"loss": 0.80590523, "token_acc": 0.7881224, "grad_norm": 1.640625, "learning_rate": 8.137e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601741, "epoch": 0.63986275, "global_step/max_steps": "13800/43134", "percentage": "31.99%", "elapsed_time": "6h 22m 13s", "remaining_time": "13h 32m 28s"}
{"loss": 0.78765587, "token_acc": 0.79482178, "grad_norm": 2.4375, "learning_rate": 8.107e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601841, "epoch": 0.64449944, "global_step/max_steps": "13900/43134", "percentage": "32.23%", "elapsed_time": "6h 24m 55s", "remaining_time": "13h 29m 33s"}
{"loss": 0.78234413, "token_acc": 0.79466705, "grad_norm": 1.875, "learning_rate": 8.077e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601859, "epoch": 0.64913613, "global_step/max_steps": "14000/43134", "percentage": "32.46%", "elapsed_time": "6h 27m 41s", "remaining_time": "13h 26m 46s"}
{"loss": 0.78542572, "token_acc": 0.79365157, "grad_norm": 2.046875, "learning_rate": 8.046e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601946, "epoch": 0.65377281, "global_step/max_steps": "14100/43134", "percentage": "32.69%", "elapsed_time": "6h 30m 23s", "remaining_time": "13h 23m 53s"}
{"loss": 0.8044397, "token_acc": 0.78659801, "grad_norm": 1.65625, "learning_rate": 8.016e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602065, "epoch": 0.6584095, "global_step/max_steps": "14200/43134", "percentage": "32.92%", "elapsed_time": "6h 33m 5s", "remaining_time": "13h 20m 57s"}
{"loss": 0.78139862, "token_acc": 0.79353879, "grad_norm": 1.796875, "learning_rate": 7.985e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602137, "epoch": 0.66304619, "global_step/max_steps": "14300/43134", "percentage": "33.15%", "elapsed_time": "6h 35m 48s", "remaining_time": "13h 18m 5s"}
{"loss": 0.77669716, "token_acc": 0.79577172, "grad_norm": 1.9375, "learning_rate": 7.954e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602247, "epoch": 0.66768287, "global_step/max_steps": "14400/43134", "percentage": "33.38%", "elapsed_time": "6h 38m 30s", "remaining_time": "13h 15m 10s"}
{"loss": 0.77213577, "token_acc": 0.79490334, "grad_norm": 1.765625, "learning_rate": 7.923e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602329, "epoch": 0.67231956, "global_step/max_steps": "14500/43134", "percentage": "33.62%", "elapsed_time": "6h 41m 13s", "remaining_time": "13h 12m 18s"}
{"loss": 0.81960541, "token_acc": 0.78795618, "grad_norm": 1.8125, "learning_rate": 7.892e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602406, "epoch": 0.67695625, "global_step/max_steps": "14600/43134", "percentage": "33.85%", "elapsed_time": "6h 43m 55s", "remaining_time": "13h 9m 26s"}
{"loss": 0.78769165, "token_acc": 0.79149487, "grad_norm": 2.109375, "learning_rate": 7.861e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602504, "epoch": 0.68159293, "global_step/max_steps": "14700/43134", "percentage": "34.08%", "elapsed_time": "6h 46m 37s", "remaining_time": "13h 6m 32s"}
{"loss": 0.79572411, "token_acc": 0.78973659, "grad_norm": 1.6328125, "learning_rate": 7.829e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602519, "epoch": 0.68622962, "global_step/max_steps": "14800/43134", "percentage": "34.31%", "elapsed_time": "6h 49m 23s", "remaining_time": "13h 3m 45s"}
{"loss": 0.80944313, "token_acc": 0.78889959, "grad_norm": 1.6328125, "learning_rate": 7.798e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602553, "epoch": 0.69086631, "global_step/max_steps": "14900/43134", "percentage": "34.54%", "elapsed_time": "6h 52m 7s", "remaining_time": "13h 0m 56s"}
{"loss": 0.75376472, "token_acc": 0.8018266, "grad_norm": 2.140625, "learning_rate": 7.766e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602614, "epoch": 0.69550299, "global_step/max_steps": "15000/43134", "percentage": "34.78%", "elapsed_time": "6h 54m 51s", "remaining_time": "12h 58m 6s"}
{"loss": 0.77548431, "token_acc": 0.79579452, "grad_norm": 2.71875, "learning_rate": 7.734e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602719, "epoch": 0.70013968, "global_step/max_steps": "15100/43134", "percentage": "35.01%", "elapsed_time": "6h 57m 32s", "remaining_time": "12h 55m 12s"}
{"loss": 0.75227928, "token_acc": 0.79840355, "grad_norm": 1.5078125, "learning_rate": 7.702e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602829, "epoch": 0.70477637, "global_step/max_steps": "15200/43134", "percentage": "35.24%", "elapsed_time": "7h 0m 14s", "remaining_time": "12h 52m 17s"}
{"loss": 0.79488541, "token_acc": 0.79097876, "grad_norm": 1.6875, "learning_rate": 7.669e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602895, "epoch": 0.70941305, "global_step/max_steps": "15300/43134", "percentage": "35.47%", "elapsed_time": "7h 2m 57s", "remaining_time": "12h 49m 26s"}
{"loss": 0.80162743, "token_acc": 0.78821814, "grad_norm": 2.125, "learning_rate": 7.637e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602875, "epoch": 0.71404974, "global_step/max_steps": "15400/43134", "percentage": "35.70%", "elapsed_time": "7h 5m 44s", "remaining_time": "12h 46m 42s"}
{"loss": 0.77597542, "token_acc": 0.79305637, "grad_norm": 2.078125, "learning_rate": 7.604e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60291, "epoch": 0.71868643, "global_step/max_steps": "15500/43134", "percentage": "35.93%", "elapsed_time": "7h 8m 28s", "remaining_time": "12h 43m 54s"}
{"loss": 0.76020851, "token_acc": 0.79807588, "grad_norm": 1.25, "learning_rate": 7.571e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602975, "epoch": 0.72332311, "global_step/max_steps": "15600/43134", "percentage": "36.17%", "elapsed_time": "7h 11m 11s", "remaining_time": "12h 41m 3s"}
{"loss": 0.76953094, "token_acc": 0.79386011, "grad_norm": 1.7109375, "learning_rate": 7.538e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603055, "epoch": 0.7279598, "global_step/max_steps": "15700/43134", "percentage": "36.40%", "elapsed_time": "7h 13m 53s", "remaining_time": "12h 38m 11s"}
{"loss": 0.76387741, "token_acc": 0.79607853, "grad_norm": 1.8984375, "learning_rate": 7.505e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603109, "epoch": 0.73259649, "global_step/max_steps": "15800/43134", "percentage": "36.63%", "elapsed_time": "7h 16m 37s", "remaining_time": "12h 35m 21s"}
{"loss": 0.75049355, "token_acc": 0.80048954, "grad_norm": 1.7109375, "learning_rate": 7.472e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603162, "epoch": 0.73723317, "global_step/max_steps": "15900/43134", "percentage": "36.86%", "elapsed_time": "7h 19m 20s", "remaining_time": "12h 32m 31s"}
{"loss": 0.75230415, "token_acc": 0.80177725, "grad_norm": 3.0625, "learning_rate": 7.439e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603229, "epoch": 0.74186986, "global_step/max_steps": "16000/43134", "percentage": "37.09%", "elapsed_time": "7h 22m 3s", "remaining_time": "12h 29m 40s"}
{"loss": 0.75798561, "token_acc": 0.79982812, "grad_norm": 2.0, "learning_rate": 7.405e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601269, "epoch": 0.74650655, "global_step/max_steps": "16100/43134", "percentage": "37.33%", "elapsed_time": "7h 26m 16s", "remaining_time": "12h 29m 21s"}
{"loss": 0.7515583, "token_acc": 0.80210319, "grad_norm": 1.8671875, "learning_rate": 7.371e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601305, "epoch": 0.75114323, "global_step/max_steps": "16200/43134", "percentage": "37.56%", "elapsed_time": "7h 29m 1s", "remaining_time": "12h 26m 32s"}
{"loss": 0.77854271, "token_acc": 0.7930067, "grad_norm": 1.7890625, "learning_rate": 7.338e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601353, "epoch": 0.75577992, "global_step/max_steps": "16300/43134", "percentage": "37.79%", "elapsed_time": "7h 31m 45s", "remaining_time": "12h 23m 42s"}
{"loss": 0.75831627, "token_acc": 0.80023528, "grad_norm": 1.6015625, "learning_rate": 7.304e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601404, "epoch": 0.76041661, "global_step/max_steps": "16400/43134", "percentage": "38.02%", "elapsed_time": "7h 34m 29s", "remaining_time": "12h 20m 52s"}
{"loss": 0.75896538, "token_acc": 0.79856849, "grad_norm": 1.6640625, "learning_rate": 7.27e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601458, "epoch": 0.76505329, "global_step/max_steps": "16500/43134", "percentage": "38.25%", "elapsed_time": "7h 37m 13s", "remaining_time": "12h 18m 2s"}
{"loss": 0.76656403, "token_acc": 0.79915531, "grad_norm": 2.53125, "learning_rate": 7.235e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.6015, "epoch": 0.76968998, "global_step/max_steps": "16600/43134", "percentage": "38.48%", "elapsed_time": "7h 39m 57s", "remaining_time": "12h 15m 12s"}
{"loss": 0.75409302, "token_acc": 0.79969176, "grad_norm": 1.2421875, "learning_rate": 7.201e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601532, "epoch": 0.77432667, "global_step/max_steps": "16700/43134", "percentage": "38.72%", "elapsed_time": "7h 42m 42s", "remaining_time": "12h 12m 24s"}
{"loss": 0.76336823, "token_acc": 0.79764966, "grad_norm": 1.875, "learning_rate": 7.167e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60161, "epoch": 0.77896335, "global_step/max_steps": "16800/43134", "percentage": "38.95%", "elapsed_time": "7h 45m 24s", "remaining_time": "12h 9m 32s"}
{"loss": 0.76153183, "token_acc": 0.79867956, "grad_norm": 2.25, "learning_rate": 7.132e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601667, "epoch": 0.78360004, "global_step/max_steps": "16900/43134", "percentage": "39.18%", "elapsed_time": "7h 48m 8s", "remaining_time": "12h 6m 41s"}
{"loss": 0.75003342, "token_acc": 0.80163609, "grad_norm": 1.8671875, "learning_rate": 7.097e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60171, "epoch": 0.78823673, "global_step/max_steps": "17000/43134", "percentage": "39.41%", "elapsed_time": "7h 50m 52s", "remaining_time": "12h 3m 52s"}
{"loss": 0.76277763, "token_acc": 0.79705335, "grad_norm": 1.65625, "learning_rate": 7.062e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601777, "epoch": 0.79287341, "global_step/max_steps": "17100/43134", "percentage": "39.64%", "elapsed_time": "7h 53m 35s", "remaining_time": "12h 1m 1s"}
{"loss": 0.76455421, "token_acc": 0.79482104, "grad_norm": 2.09375, "learning_rate": 7.027e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601837, "epoch": 0.7975101, "global_step/max_steps": "17200/43134", "percentage": "39.88%", "elapsed_time": "7h 56m 18s", "remaining_time": "11h 58m 11s"}
{"loss": 0.7586161, "token_acc": 0.79716541, "grad_norm": 1.3828125, "learning_rate": 6.992e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601927, "epoch": 0.80214679, "global_step/max_steps": "17300/43134", "percentage": "40.11%", "elapsed_time": "7h 59m 0s", "remaining_time": "11h 55m 18s"}
{"loss": 0.74415985, "token_acc": 0.80012987, "grad_norm": 1.9375, "learning_rate": 6.957e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602001, "epoch": 0.80678347, "global_step/max_steps": "17400/43134", "percentage": "40.34%", "elapsed_time": "8h 1m 43s", "remaining_time": "11h 52m 27s"}
{"loss": 0.72422462, "token_acc": 0.80653535, "grad_norm": 1.7265625, "learning_rate": 6.922e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602079, "epoch": 0.81142016, "global_step/max_steps": "17500/43134", "percentage": "40.57%", "elapsed_time": "8h 4m 25s", "remaining_time": "11h 49m 35s"}
{"loss": 0.76713837, "token_acc": 0.7957436, "grad_norm": 1.8515625, "learning_rate": 6.886e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602124, "epoch": 0.81605685, "global_step/max_steps": "17600/43134", "percentage": "40.80%", "elapsed_time": "8h 7m 9s", "remaining_time": "11h 46m 46s"}
{"loss": 0.73114258, "token_acc": 0.80277144, "grad_norm": 2.71875, "learning_rate": 6.851e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602234, "epoch": 0.82069353, "global_step/max_steps": "17700/43134", "percentage": "41.03%", "elapsed_time": "8h 9m 50s", "remaining_time": "11h 43m 52s"}
{"loss": 0.73019455, "token_acc": 0.8010799, "grad_norm": 1.7578125, "learning_rate": 6.815e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602317, "epoch": 0.82533022, "global_step/max_steps": "17800/43134", "percentage": "41.27%", "elapsed_time": "8h 12m 32s", "remaining_time": "11h 41m 0s"}
{"loss": 0.73838051, "token_acc": 0.80020004, "grad_norm": 1.984375, "learning_rate": 6.779e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602372, "epoch": 0.82996691, "global_step/max_steps": "17900/43134", "percentage": "41.50%", "elapsed_time": "8h 15m 15s", "remaining_time": "11h 38m 10s"}
{"loss": 0.70706902, "token_acc": 0.80786046, "grad_norm": 1.6640625, "learning_rate": 6.743e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602469, "epoch": 0.83460359, "global_step/max_steps": "18000/43134", "percentage": "41.73%", "elapsed_time": "8h 17m 56s", "remaining_time": "11h 35m 18s"}
{"loss": 0.73353973, "token_acc": 0.8042052, "grad_norm": 1.6171875, "learning_rate": 6.707e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602494, "epoch": 0.83924028, "global_step/max_steps": "18100/43134", "percentage": "41.96%", "elapsed_time": "8h 20m 41s", "remaining_time": "11h 32m 30s"}
{"loss": 0.74904579, "token_acc": 0.80016297, "grad_norm": 1.375, "learning_rate": 6.671e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602464, "epoch": 0.84387697, "global_step/max_steps": "18200/43134", "percentage": "42.19%", "elapsed_time": "8h 23m 29s", "remaining_time": "11h 29m 46s"}
{"loss": 0.75404259, "token_acc": 0.79967818, "grad_norm": 2.015625, "learning_rate": 6.635e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602486, "epoch": 0.84851365, "global_step/max_steps": "18300/43134", "percentage": "42.43%", "elapsed_time": "8h 26m 13s", "remaining_time": "11h 26m 58s"}
{"loss": 0.74473526, "token_acc": 0.80426221, "grad_norm": 1.7265625, "learning_rate": 6.599e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602569, "epoch": 0.85315034, "global_step/max_steps": "18400/43134", "percentage": "42.66%", "elapsed_time": "8h 28m 55s", "remaining_time": "11h 24m 7s"}
{"loss": 0.76895546, "token_acc": 0.79603841, "grad_norm": 1.5859375, "learning_rate": 6.563e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602548, "epoch": 0.85778703, "global_step/max_steps": "18500/43134", "percentage": "42.89%", "elapsed_time": "8h 31m 42s", "remaining_time": "11h 21m 22s"}
{"loss": 0.73571098, "token_acc": 0.80309751, "grad_norm": 3.109375, "learning_rate": 6.526e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602632, "epoch": 0.86242371, "global_step/max_steps": "18600/43134", "percentage": "43.12%", "elapsed_time": "8h 34m 24s", "remaining_time": "11h 18m 31s"}
{"loss": 0.75514694, "token_acc": 0.79757345, "grad_norm": 1.546875, "learning_rate": 6.489e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602707, "epoch": 0.8670604, "global_step/max_steps": "18700/43134", "percentage": "43.35%", "elapsed_time": "8h 37m 6s", "remaining_time": "11h 15m 40s"}
{"loss": 0.73793694, "token_acc": 0.80325753, "grad_norm": 1.4921875, "learning_rate": 6.453e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602748, "epoch": 0.87169709, "global_step/max_steps": "18800/43134", "percentage": "43.59%", "elapsed_time": "8h 39m 50s", "remaining_time": "11h 12m 51s"}
{"loss": 0.73277565, "token_acc": 0.80309863, "grad_norm": 1.59375, "learning_rate": 6.416e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602805, "epoch": 0.87633377, "global_step/max_steps": "18900/43134", "percentage": "43.82%", "elapsed_time": "8h 42m 33s", "remaining_time": "11h 10m 1s"}
{"loss": 0.76496941, "token_acc": 0.79669999, "grad_norm": 2.109375, "learning_rate": 6.379e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602864, "epoch": 0.88097046, "global_step/max_steps": "19000/43134", "percentage": "44.05%", "elapsed_time": "8h 45m 15s", "remaining_time": "11h 7m 11s"}
{"loss": 0.74721298, "token_acc": 0.79786825, "grad_norm": 1.859375, "learning_rate": 6.342e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602896, "epoch": 0.88560715, "global_step/max_steps": "19100/43134", "percentage": "44.28%", "elapsed_time": "8h 48m 0s", "remaining_time": "11h 4m 23s"}
{"loss": 0.71726028, "token_acc": 0.80738199, "grad_norm": 1.34375, "learning_rate": 6.305e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60289, "epoch": 0.89024383, "global_step/max_steps": "19200/43134", "percentage": "44.51%", "elapsed_time": "8h 50m 46s", "remaining_time": "11h 1m 38s"}
{"loss": 0.72047112, "token_acc": 0.80324518, "grad_norm": 1.484375, "learning_rate": 6.268e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60294, "epoch": 0.89488052, "global_step/max_steps": "19300/43134", "percentage": "44.74%", "elapsed_time": "8h 53m 29s", "remaining_time": "10h 58m 49s"}
{"loss": 0.71464966, "token_acc": 0.80717657, "grad_norm": 1.546875, "learning_rate": 6.231e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602985, "epoch": 0.89951721, "global_step/max_steps": "19400/43134", "percentage": "44.98%", "elapsed_time": "8h 56m 13s", "remaining_time": "10h 56m 0s"}
{"loss": 0.72219612, "token_acc": 0.80610638, "grad_norm": 1.2890625, "learning_rate": 6.194e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603031, "epoch": 0.90415389, "global_step/max_steps": "19500/43134", "percentage": "45.21%", "elapsed_time": "8h 58m 56s", "remaining_time": "10h 53m 11s"}
{"loss": 0.73958847, "token_acc": 0.80256634, "grad_norm": 1.9921875, "learning_rate": 6.157e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60303, "epoch": 0.90879058, "global_step/max_steps": "19600/43134", "percentage": "45.44%", "elapsed_time": "9h 1m 42s", "remaining_time": "10h 50m 26s"}
{"loss": 0.72860046, "token_acc": 0.80582072, "grad_norm": 1.421875, "learning_rate": 6.12e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603093, "epoch": 0.91342726, "global_step/max_steps": "19700/43134", "percentage": "45.67%", "elapsed_time": "9h 4m 24s", "remaining_time": "10h 47m 36s"}
{"loss": 0.70783257, "token_acc": 0.81078825, "grad_norm": 1.53125, "learning_rate": 6.082e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603094, "epoch": 0.91806395, "global_step/max_steps": "19800/43134", "percentage": "45.90%", "elapsed_time": "9h 7m 10s", "remaining_time": "10h 44m 50s"}
{"loss": 0.71239876, "token_acc": 0.80974907, "grad_norm": 1.921875, "learning_rate": 6.045e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603154, "epoch": 0.92270064, "global_step/max_steps": "19900/43134", "percentage": "46.14%", "elapsed_time": "9h 9m 53s", "remaining_time": "10h 42m 0s"}
{"loss": 0.72070625, "token_acc": 0.8044054, "grad_norm": 1.3203125, "learning_rate": 6.007e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.603202, "epoch": 0.92733732, "global_step/max_steps": "20000/43134", "percentage": "46.37%", "elapsed_time": "9h 12m 36s", "remaining_time": "10h 39m 11s"}
{"loss": 0.73514961, "token_acc": 0.80294665, "grad_norm": 2.0625, "learning_rate": 5.97e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601562, "epoch": 0.93197401, "global_step/max_steps": "20100/43134", "percentage": "46.60%", "elapsed_time": "9h 16m 52s", "remaining_time": "10h 38m 10s"}
{"loss": 0.71380585, "token_acc": 0.80671501, "grad_norm": 2.0625, "learning_rate": 5.932e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601603, "epoch": 0.9366107, "global_step/max_steps": "20200/43134", "percentage": "46.83%", "elapsed_time": "9h 19m 36s", "remaining_time": "10h 35m 21s"}
{"loss": 0.72715546, "token_acc": 0.80604726, "grad_norm": 1.5546875, "learning_rate": 5.894e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601661, "epoch": 0.94124738, "global_step/max_steps": "20300/43134", "percentage": "47.06%", "elapsed_time": "9h 22m 19s", "remaining_time": "10h 32m 31s"}
{"loss": 0.71372391, "token_acc": 0.80782361, "grad_norm": 2.0625, "learning_rate": 5.857e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601744, "epoch": 0.94588407, "global_step/max_steps": "20400/43134", "percentage": "47.29%", "elapsed_time": "9h 25m 1s", "remaining_time": "10h 29m 39s"}
{"loss": 0.72830193, "token_acc": 0.80361642, "grad_norm": 1.71875, "learning_rate": 5.819e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601811, "epoch": 0.95052076, "global_step/max_steps": "20500/43134", "percentage": "47.53%", "elapsed_time": "9h 27m 43s", "remaining_time": "10h 26m 49s"}
{"loss": 0.72133324, "token_acc": 0.80683148, "grad_norm": 1.9375, "learning_rate": 5.781e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601863, "epoch": 0.95515744, "global_step/max_steps": "20600/43134", "percentage": "47.76%", "elapsed_time": "9h 30m 26s", "remaining_time": "10h 24m 0s"}
{"loss": 0.70327866, "token_acc": 0.80979079, "grad_norm": 2.15625, "learning_rate": 5.743e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601951, "epoch": 0.95979413, "global_step/max_steps": "20700/43134", "percentage": "47.99%", "elapsed_time": "9h 33m 7s", "remaining_time": "10h 21m 8s"}
{"loss": 0.73309807, "token_acc": 0.80221182, "grad_norm": 1.484375, "learning_rate": 5.705e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601953, "epoch": 0.96443082, "global_step/max_steps": "20800/43134", "percentage": "48.22%", "elapsed_time": "9h 35m 53s", "remaining_time": "10h 18m 22s"}
{"loss": 0.70552612, "token_acc": 0.81170957, "grad_norm": 2.40625, "learning_rate": 5.667e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602002, "epoch": 0.9690675, "global_step/max_steps": "20900/43134", "percentage": "48.45%", "elapsed_time": "9h 38m 37s", "remaining_time": "10h 15m 33s"}
{"loss": 0.73332039, "token_acc": 0.8026941, "grad_norm": 2.3125, "learning_rate": 5.629e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602075, "epoch": 0.97370419, "global_step/max_steps": "21000/43134", "percentage": "48.69%", "elapsed_time": "9h 41m 19s", "remaining_time": "10h 12m 42s"}
{"loss": 0.73443573, "token_acc": 0.80175166, "grad_norm": 1.3671875, "learning_rate": 5.591e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602084, "epoch": 0.97834088, "global_step/max_steps": "21100/43134", "percentage": "48.92%", "elapsed_time": "9h 44m 4s", "remaining_time": "10h 9m 56s"}
{"loss": 0.73433189, "token_acc": 0.80364415, "grad_norm": 2.09375, "learning_rate": 5.553e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602141, "epoch": 0.98297756, "global_step/max_steps": "21200/43134", "percentage": "49.15%", "elapsed_time": "9h 46m 47s", "remaining_time": "10h 7m 6s"}
{"loss": 0.6957621, "token_acc": 0.81155914, "grad_norm": 1.7890625, "learning_rate": 5.515e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602139, "epoch": 0.98761425, "global_step/max_steps": "21300/43134", "percentage": "49.38%", "elapsed_time": "9h 49m 33s", "remaining_time": "10h 4m 20s"}
{"loss": 0.69710777, "token_acc": 0.81301358, "grad_norm": 1.1953125, "learning_rate": 5.477e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602176, "epoch": 0.99225094, "global_step/max_steps": "21400/43134", "percentage": "49.61%", "elapsed_time": "9h 52m 17s", "remaining_time": "10h 1m 32s"}
{"loss": 0.73763504, "token_acc": 0.80011747, "grad_norm": 2.078125, "learning_rate": 5.439e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602192, "epoch": 0.99688762, "global_step/max_steps": "21500/43134", "percentage": "49.84%", "elapsed_time": "9h 55m 2s", "remaining_time": "9h 58m 45s"}
{"loss": 0.6758699, "token_acc": 0.81635754, "grad_norm": 1.2421875, "learning_rate": 5.4e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60224, "epoch": 1.00148374, "global_step/max_steps": "21600/43134", "percentage": "50.08%", "elapsed_time": "9h 57m 45s", "remaining_time": "9h 55m 56s"}
{"loss": 0.52570343, "token_acc": 0.84904989, "grad_norm": 1.8671875, "learning_rate": 5.362e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602287, "epoch": 1.00612043, "global_step/max_steps": "21700/43134", "percentage": "50.31%", "elapsed_time": "10h 0m 29s", "remaining_time": "9h 53m 7s"}
{"loss": 0.51916256, "token_acc": 0.84708268, "grad_norm": 1.640625, "learning_rate": 5.324e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602335, "epoch": 1.01075711, "global_step/max_steps": "21800/43134", "percentage": "50.54%", "elapsed_time": "10h 3m 12s", "remaining_time": "9h 50m 18s"}
{"loss": 0.53419849, "token_acc": 0.84584728, "grad_norm": 1.6015625, "learning_rate": 5.286e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602339, "epoch": 1.0153938, "global_step/max_steps": "21900/43134", "percentage": "50.77%", "elapsed_time": "10h 5m 58s", "remaining_time": "9h 47m 32s"}
{"loss": 0.51064098, "token_acc": 0.85322857, "grad_norm": 1.5, "learning_rate": 5.247e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602388, "epoch": 1.02003049, "global_step/max_steps": "22000/43134", "percentage": "51.00%", "elapsed_time": "10h 8m 41s", "remaining_time": "9h 44m 43s"}
{"loss": 0.53206593, "token_acc": 0.84824592, "grad_norm": 1.3984375, "learning_rate": 5.209e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602426, "epoch": 1.02466717, "global_step/max_steps": "22100/43134", "percentage": "51.24%", "elapsed_time": "10h 11m 24s", "remaining_time": "9h 41m 55s"}
{"loss": 0.5308075, "token_acc": 0.84985141, "grad_norm": 1.765625, "learning_rate": 5.171e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602431, "epoch": 1.02930386, "global_step/max_steps": "22200/43134", "percentage": "51.47%", "elapsed_time": "10h 14m 10s", "remaining_time": "9h 39m 9s"}
{"loss": 0.52218758, "token_acc": 0.85024347, "grad_norm": 1.6015625, "learning_rate": 5.132e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602502, "epoch": 1.03394055, "global_step/max_steps": "22300/43134", "percentage": "51.70%", "elapsed_time": "10h 16m 52s", "remaining_time": "9h 36m 18s"}
{"loss": 0.52411591, "token_acc": 0.84941333, "grad_norm": 2.46875, "learning_rate": 5.094e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602545, "epoch": 1.03857723, "global_step/max_steps": "22400/43134", "percentage": "51.93%", "elapsed_time": "10h 19m 35s", "remaining_time": "9h 33m 30s"}
{"loss": 0.53722252, "token_acc": 0.84714747, "grad_norm": 2.171875, "learning_rate": 5.056e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602572, "epoch": 1.04321392, "global_step/max_steps": "22500/43134", "percentage": "52.16%", "elapsed_time": "10h 22m 19s", "remaining_time": "9h 30m 43s"}
{"loss": 0.51906464, "token_acc": 0.85150835, "grad_norm": 1.375, "learning_rate": 5.017e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60259, "epoch": 1.04785061, "global_step/max_steps": "22600/43134", "percentage": "52.39%", "elapsed_time": "10h 25m 4s", "remaining_time": "9h 27m 56s"}
{"loss": 0.54453213, "token_acc": 0.84335873, "grad_norm": 2.0625, "learning_rate": 4.979e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602599, "epoch": 1.05248729, "global_step/max_steps": "22700/43134", "percentage": "52.63%", "elapsed_time": "10h 27m 49s", "remaining_time": "9h 25m 9s"}
{"loss": 0.54128559, "token_acc": 0.84513293, "grad_norm": 2.03125, "learning_rate": 4.941e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602617, "epoch": 1.05712398, "global_step/max_steps": "22800/43134", "percentage": "52.86%", "elapsed_time": "10h 30m 34s", "remaining_time": "9h 22m 22s"}
{"loss": 0.54286972, "token_acc": 0.8449446, "grad_norm": 1.484375, "learning_rate": 4.902e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602609, "epoch": 1.06176067, "global_step/max_steps": "22900/43134", "percentage": "53.09%", "elapsed_time": "10h 33m 21s", "remaining_time": "9h 19m 37s"}
{"loss": 0.51491539, "token_acc": 0.85242751, "grad_norm": 2.140625, "learning_rate": 4.864e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602599, "epoch": 1.06639735, "global_step/max_steps": "23000/43134", "percentage": "53.32%", "elapsed_time": "10h 36m 7s", "remaining_time": "9h 16m 51s"}
{"loss": 0.52914497, "token_acc": 0.84962237, "grad_norm": 1.7421875, "learning_rate": 4.826e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602632, "epoch": 1.07103404, "global_step/max_steps": "23100/43134", "percentage": "53.55%", "elapsed_time": "10h 38m 51s", "remaining_time": "9h 14m 3s"}
{"loss": 0.513829, "token_acc": 0.85429832, "grad_norm": 2.0625, "learning_rate": 4.788e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602691, "epoch": 1.07567073, "global_step/max_steps": "23200/43134", "percentage": "53.79%", "elapsed_time": "10h 41m 33s", "remaining_time": "9h 11m 14s"}
{"loss": 0.52953236, "token_acc": 0.84754979, "grad_norm": 1.53125, "learning_rate": 4.749e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602703, "epoch": 1.08030741, "global_step/max_steps": "23300/43134", "percentage": "54.02%", "elapsed_time": "10h 44m 18s", "remaining_time": "9h 8m 28s"}
{"loss": 0.52894108, "token_acc": 0.8484757, "grad_norm": 2.046875, "learning_rate": 4.711e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602726, "epoch": 1.0849441, "global_step/max_steps": "23400/43134", "percentage": "54.25%", "elapsed_time": "10h 47m 3s", "remaining_time": "9h 5m 41s"}
{"loss": 0.53897778, "token_acc": 0.8457776, "grad_norm": 1.921875, "learning_rate": 4.673e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60278, "epoch": 1.08958079, "global_step/max_steps": "23500/43134", "percentage": "54.48%", "elapsed_time": "10h 49m 45s", "remaining_time": "9h 2m 52s"}
{"loss": 0.50856117, "token_acc": 0.85262072, "grad_norm": 1.7734375, "learning_rate": 4.634e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602832, "epoch": 1.09421747, "global_step/max_steps": "23600/43134", "percentage": "54.71%", "elapsed_time": "10h 52m 28s", "remaining_time": "9h 0m 3s"}
{"loss": 0.50407707, "token_acc": 0.85451962, "grad_norm": 1.5234375, "learning_rate": 4.596e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602846, "epoch": 1.09885416, "global_step/max_steps": "23700/43134", "percentage": "54.95%", "elapsed_time": "10h 55m 13s", "remaining_time": "8h 57m 16s"}
{"loss": 0.52156319, "token_acc": 0.84936531, "grad_norm": 1.4765625, "learning_rate": 4.558e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602843, "epoch": 1.10349085, "global_step/max_steps": "23800/43134", "percentage": "55.18%", "elapsed_time": "10h 57m 59s", "remaining_time": "8h 54m 31s"}
{"loss": 0.53133461, "token_acc": 0.84838873, "grad_norm": 1.734375, "learning_rate": 4.52e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602841, "epoch": 1.10812753, "global_step/max_steps": "23900/43134", "percentage": "55.41%", "elapsed_time": "11h 0m 45s", "remaining_time": "8h 51m 45s"}
{"loss": 0.55353996, "token_acc": 0.84455936, "grad_norm": 1.859375, "learning_rate": 4.482e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602865, "epoch": 1.11276422, "global_step/max_steps": "24000/43134", "percentage": "55.64%", "elapsed_time": "11h 3m 29s", "remaining_time": "8h 48m 58s"}
{"loss": 0.55495659, "token_acc": 0.84456454, "grad_norm": 1.34375, "learning_rate": 4.444e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601493, "epoch": 1.11740091, "global_step/max_steps": "24100/43134", "percentage": "55.87%", "elapsed_time": "11h 7m 46s", "remaining_time": "8h 47m 24s"}
{"loss": 0.53722683, "token_acc": 0.84764125, "grad_norm": 1.265625, "learning_rate": 4.406e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601548, "epoch": 1.12203759, "global_step/max_steps": "24200/43134", "percentage": "56.10%", "elapsed_time": "11h 10m 29s", "remaining_time": "8h 44m 35s"}
{"loss": 0.51588486, "token_acc": 0.85195024, "grad_norm": 1.5, "learning_rate": 4.367e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601601, "epoch": 1.12667428, "global_step/max_steps": "24300/43134", "percentage": "56.34%", "elapsed_time": "11h 13m 11s", "remaining_time": "8h 41m 46s"}
{"loss": 0.52413223, "token_acc": 0.84882349, "grad_norm": 1.4921875, "learning_rate": 4.329e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601652, "epoch": 1.13131097, "global_step/max_steps": "24400/43134", "percentage": "56.57%", "elapsed_time": "11h 15m 54s", "remaining_time": "8h 38m 57s"}
{"loss": 0.50967892, "token_acc": 0.85415134, "grad_norm": 1.9921875, "learning_rate": 4.291e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601676, "epoch": 1.13594765, "global_step/max_steps": "24500/43134", "percentage": "56.80%", "elapsed_time": "11h 18m 39s", "remaining_time": "8h 36m 9s"}
{"loss": 0.51329414, "token_acc": 0.85315912, "grad_norm": 1.625, "learning_rate": 4.254e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601721, "epoch": 1.14058434, "global_step/max_steps": "24600/43134", "percentage": "57.03%", "elapsed_time": "11h 21m 22s", "remaining_time": "8h 33m 21s"}
{"loss": 0.51895451, "token_acc": 0.85000373, "grad_norm": 4.375, "learning_rate": 4.216e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601713, "epoch": 1.14522103, "global_step/max_steps": "24700/43134", "percentage": "57.26%", "elapsed_time": "11h 24m 9s", "remaining_time": "8h 30m 35s"}
{"loss": 0.52288601, "token_acc": 0.8506363, "grad_norm": 1.90625, "learning_rate": 4.178e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601715, "epoch": 1.14985771, "global_step/max_steps": "24800/43134", "percentage": "57.50%", "elapsed_time": "11h 26m 55s", "remaining_time": "8h 27m 49s"}
{"loss": 0.51650444, "token_acc": 0.85068217, "grad_norm": 1.9765625, "learning_rate": 4.14e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601757, "epoch": 1.1544944, "global_step/max_steps": "24900/43134", "percentage": "57.73%", "elapsed_time": "11h 29m 38s", "remaining_time": "8h 25m 1s"}
{"loss": 0.51310863, "token_acc": 0.85082821, "grad_norm": 1.2109375, "learning_rate": 4.102e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601786, "epoch": 1.15913108, "global_step/max_steps": "25000/43134", "percentage": "57.96%", "elapsed_time": "11h 32m 22s", "remaining_time": "8h 22m 13s"}
{"loss": 0.52676037, "token_acc": 0.84681239, "grad_norm": 1.5625, "learning_rate": 4.065e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601823, "epoch": 1.16376777, "global_step/max_steps": "25100/43134", "percentage": "58.19%", "elapsed_time": "11h 35m 6s", "remaining_time": "8h 19m 25s"}
{"loss": 0.50712284, "token_acc": 0.85321577, "grad_norm": 1.8359375, "learning_rate": 4.027e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601863, "epoch": 1.16840446, "global_step/max_steps": "25200/43134", "percentage": "58.42%", "elapsed_time": "11h 37m 49s", "remaining_time": "8h 16m 37s"}
{"loss": 0.51849949, "token_acc": 0.85115641, "grad_norm": 1.5859375, "learning_rate": 3.989e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601906, "epoch": 1.17304114, "global_step/max_steps": "25300/43134", "percentage": "58.65%", "elapsed_time": "11h 40m 32s", "remaining_time": "8h 13m 49s"}
{"loss": 0.52366795, "token_acc": 0.85102658, "grad_norm": 1.8515625, "learning_rate": 3.952e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601912, "epoch": 1.17767783, "global_step/max_steps": "25400/43134", "percentage": "58.89%", "elapsed_time": "11h 43m 18s", "remaining_time": "8h 11m 2s"}
{"loss": 0.52765636, "token_acc": 0.84725115, "grad_norm": 1.671875, "learning_rate": 3.914e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601933, "epoch": 1.18231452, "global_step/max_steps": "25500/43134", "percentage": "59.12%", "elapsed_time": "11h 46m 3s", "remaining_time": "8h 8m 15s"}
{"loss": 0.5328569, "token_acc": 0.84860212, "grad_norm": 2.25, "learning_rate": 3.877e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601939, "epoch": 1.1869512, "global_step/max_steps": "25600/43134", "percentage": "59.35%", "elapsed_time": "11h 48m 49s", "remaining_time": "8h 5m 29s"}
{"loss": 0.5036916, "token_acc": 0.85377413, "grad_norm": 1.6796875, "learning_rate": 3.84e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601953, "epoch": 1.19158789, "global_step/max_steps": "25700/43134", "percentage": "59.58%", "elapsed_time": "11h 51m 34s", "remaining_time": "8h 2m 42s"}
{"loss": 0.50120338, "token_acc": 0.85455544, "grad_norm": 2.5625, "learning_rate": 3.803e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601979, "epoch": 1.19622458, "global_step/max_steps": "25800/43134", "percentage": "59.81%", "elapsed_time": "11h 54m 18s", "remaining_time": "7h 59m 54s"}
{"loss": 0.50982616, "token_acc": 0.8517804, "grad_norm": 2.0625, "learning_rate": 3.765e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602031, "epoch": 1.20086126, "global_step/max_steps": "25900/43134", "percentage": "60.05%", "elapsed_time": "11h 57m 0s", "remaining_time": "7h 57m 6s"}
{"loss": 0.52327831, "token_acc": 0.84963866, "grad_norm": 1.65625, "learning_rate": 3.728e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602073, "epoch": 1.20549795, "global_step/max_steps": "26000/43134", "percentage": "60.28%", "elapsed_time": "11h 59m 43s", "remaining_time": "7h 54m 18s"}
{"loss": 0.52400993, "token_acc": 0.85082903, "grad_norm": 1.7890625, "learning_rate": 3.691e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602114, "epoch": 1.21013464, "global_step/max_steps": "26100/43134", "percentage": "60.51%", "elapsed_time": "12h 2m 27s", "remaining_time": "7h 51m 30s"}
{"loss": 0.52850834, "token_acc": 0.84867971, "grad_norm": 1.703125, "learning_rate": 3.654e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602127, "epoch": 1.21477132, "global_step/max_steps": "26200/43134", "percentage": "60.74%", "elapsed_time": "12h 5m 12s", "remaining_time": "7h 48m 43s"}
{"loss": 0.52802952, "token_acc": 0.84874413, "grad_norm": 1.5234375, "learning_rate": 3.617e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602125, "epoch": 1.21940801, "global_step/max_steps": "26300/43134", "percentage": "60.97%", "elapsed_time": "12h 7m 58s", "remaining_time": "7h 45m 57s"}
{"loss": 0.52363449, "token_acc": 0.84954629, "grad_norm": 1.84375, "learning_rate": 3.581e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602136, "epoch": 1.2240447, "global_step/max_steps": "26400/43134", "percentage": "61.20%", "elapsed_time": "12h 10m 43s", "remaining_time": "7h 43m 10s"}
{"loss": 0.51689137, "token_acc": 0.84952495, "grad_norm": 1.953125, "learning_rate": 3.544e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602164, "epoch": 1.22868138, "global_step/max_steps": "26500/43134", "percentage": "61.44%", "elapsed_time": "12h 13m 27s", "remaining_time": "7h 40m 23s"}
{"loss": 0.52507545, "token_acc": 0.84766554, "grad_norm": 1.578125, "learning_rate": 3.507e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602203, "epoch": 1.23331807, "global_step/max_steps": "26600/43134", "percentage": "61.67%", "elapsed_time": "12h 16m 10s", "remaining_time": "7h 37m 35s"}
{"loss": 0.54070469, "token_acc": 0.84471386, "grad_norm": 1.5, "learning_rate": 3.471e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602211, "epoch": 1.23795476, "global_step/max_steps": "26700/43134", "percentage": "61.90%", "elapsed_time": "12h 18m 56s", "remaining_time": "7h 34m 49s"}
{"loss": 0.48795864, "token_acc": 0.85840501, "grad_norm": 1.40625, "learning_rate": 3.434e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602272, "epoch": 1.24259144, "global_step/max_steps": "26800/43134", "percentage": "62.13%", "elapsed_time": "12h 21m 37s", "remaining_time": "7h 32m 0s"}
{"loss": 0.50926018, "token_acc": 0.8539995, "grad_norm": 1.65625, "learning_rate": 3.398e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602315, "epoch": 1.24722813, "global_step/max_steps": "26900/43134", "percentage": "62.36%", "elapsed_time": "12h 24m 20s", "remaining_time": "7h 29m 12s"}
{"loss": 0.52081017, "token_acc": 0.84984719, "grad_norm": 1.34375, "learning_rate": 3.362e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602372, "epoch": 1.25186482, "global_step/max_steps": "27000/43134", "percentage": "62.60%", "elapsed_time": "12h 27m 2s", "remaining_time": "7h 26m 23s"}
{"loss": 0.52198727, "token_acc": 0.84983715, "grad_norm": 1.4296875, "learning_rate": 3.325e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602379, "epoch": 1.2565015, "global_step/max_steps": "27100/43134", "percentage": "62.83%", "elapsed_time": "12h 29m 48s", "remaining_time": "7h 23m 37s"}
{"loss": 0.52170235, "token_acc": 0.84985575, "grad_norm": 1.6171875, "learning_rate": 3.289e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602413, "epoch": 1.26113819, "global_step/max_steps": "27200/43134", "percentage": "63.06%", "elapsed_time": "12h 32m 31s", "remaining_time": "7h 20m 50s"}
{"loss": 0.50462936, "token_acc": 0.85612125, "grad_norm": 1.3359375, "learning_rate": 3.253e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602454, "epoch": 1.26577488, "global_step/max_steps": "27300/43134", "percentage": "63.29%", "elapsed_time": "12h 35m 14s", "remaining_time": "7h 18m 2s"}
{"loss": 0.50492535, "token_acc": 0.85293294, "grad_norm": 2.453125, "learning_rate": 3.218e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60248, "epoch": 1.27041156, "global_step/max_steps": "27400/43134", "percentage": "63.52%", "elapsed_time": "12h 37m 58s", "remaining_time": "7h 15m 15s"}
{"loss": 0.52806942, "token_acc": 0.84886007, "grad_norm": 1.5078125, "learning_rate": 3.182e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602513, "epoch": 1.27504825, "global_step/max_steps": "27500/43134", "percentage": "63.75%", "elapsed_time": "12h 40m 41s", "remaining_time": "7h 12m 27s"}
{"loss": 0.49516224, "token_acc": 0.85662241, "grad_norm": 1.453125, "learning_rate": 3.146e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602521, "epoch": 1.27968494, "global_step/max_steps": "27600/43134", "percentage": "63.99%", "elapsed_time": "12h 43m 27s", "remaining_time": "7h 9m 41s"}
{"loss": 0.48900772, "token_acc": 0.85799428, "grad_norm": 1.6796875, "learning_rate": 3.111e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60255, "epoch": 1.28432162, "global_step/max_steps": "27700/43134", "percentage": "64.22%", "elapsed_time": "12h 46m 11s", "remaining_time": "7h 6m 54s"}
{"loss": 0.53656063, "token_acc": 0.84590615, "grad_norm": 1.3125, "learning_rate": 3.075e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602539, "epoch": 1.28895831, "global_step/max_steps": "27800/43134", "percentage": "64.45%", "elapsed_time": "12h 48m 57s", "remaining_time": "7h 4m 8s"}
{"loss": 0.52688267, "token_acc": 0.84870811, "grad_norm": 1.8984375, "learning_rate": 3.04e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602575, "epoch": 1.293595, "global_step/max_steps": "27900/43134", "percentage": "64.68%", "elapsed_time": "12h 51m 41s", "remaining_time": "7h 1m 21s"}
{"loss": 0.52236122, "token_acc": 0.84935938, "grad_norm": 1.9453125, "learning_rate": 3.005e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602612, "epoch": 1.29823168, "global_step/max_steps": "28000/43134", "percentage": "64.91%", "elapsed_time": "12h 54m 24s", "remaining_time": "6h 58m 33s"}
{"loss": 0.51056019, "token_acc": 0.85303336, "grad_norm": 2.75, "learning_rate": 2.97e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601393, "epoch": 1.30286837, "global_step/max_steps": "28100/43134", "percentage": "65.15%", "elapsed_time": "12h 58m 44s", "remaining_time": "6h 56m 38s"}
{"loss": 0.51891212, "token_acc": 0.8508206, "grad_norm": 1.453125, "learning_rate": 2.935e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601415, "epoch": 1.30750506, "global_step/max_steps": "28200/43134", "percentage": "65.38%", "elapsed_time": "13h 1m 29s", "remaining_time": "6h 53m 51s"}
{"loss": 0.49939896, "token_acc": 0.85473773, "grad_norm": 1.59375, "learning_rate": 2.9e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601451, "epoch": 1.31214174, "global_step/max_steps": "28300/43134", "percentage": "65.61%", "elapsed_time": "13h 4m 12s", "remaining_time": "6h 51m 3s"}
{"loss": 0.49695831, "token_acc": 0.85573761, "grad_norm": 2.359375, "learning_rate": 2.865e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601484, "epoch": 1.31677843, "global_step/max_steps": "28400/43134", "percentage": "65.84%", "elapsed_time": "13h 6m 56s", "remaining_time": "6h 48m 15s"}
{"loss": 0.51093327, "token_acc": 0.85410264, "grad_norm": 1.5625, "learning_rate": 2.83e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.6015, "epoch": 1.32141512, "global_step/max_steps": "28500/43134", "percentage": "66.07%", "elapsed_time": "13h 9m 41s", "remaining_time": "6h 45m 29s"}
{"loss": 0.50824932, "token_acc": 0.85303471, "grad_norm": 1.7109375, "learning_rate": 2.796e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601524, "epoch": 1.3260518, "global_step/max_steps": "28600/43134", "percentage": "66.31%", "elapsed_time": "13h 12m 25s", "remaining_time": "6h 42m 41s"}
{"loss": 0.52283504, "token_acc": 0.84942782, "grad_norm": 1.8359375, "learning_rate": 2.762e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601554, "epoch": 1.33068849, "global_step/max_steps": "28700/43134", "percentage": "66.54%", "elapsed_time": "13h 15m 9s", "remaining_time": "6h 39m 54s"}
{"loss": 0.52958923, "token_acc": 0.84834593, "grad_norm": 1.859375, "learning_rate": 2.727e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601585, "epoch": 1.33532518, "global_step/max_steps": "28800/43134", "percentage": "66.77%", "elapsed_time": "13h 17m 53s", "remaining_time": "6h 37m 6s"}
{"loss": 0.51894787, "token_acc": 0.84986554, "grad_norm": 2.109375, "learning_rate": 2.693e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601604, "epoch": 1.33996186, "global_step/max_steps": "28900/43134", "percentage": "67.00%", "elapsed_time": "13h 20m 38s", "remaining_time": "6h 34m 19s"}
{"loss": 0.51675041, "token_acc": 0.85094595, "grad_norm": 1.6328125, "learning_rate": 2.659e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601627, "epoch": 1.34459855, "global_step/max_steps": "29000/43134", "percentage": "67.23%", "elapsed_time": "13h 23m 22s", "remaining_time": "6h 31m 32s"}
{"loss": 0.52403889, "token_acc": 0.84872471, "grad_norm": 1.921875, "learning_rate": 2.626e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601653, "epoch": 1.34923524, "global_step/max_steps": "29100/43134", "percentage": "67.46%", "elapsed_time": "13h 26m 6s", "remaining_time": "6h 28m 45s"}
{"loss": 0.50501461, "token_acc": 0.85659372, "grad_norm": 2.375, "learning_rate": 2.592e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601672, "epoch": 1.35387192, "global_step/max_steps": "29200/43134", "percentage": "67.70%", "elapsed_time": "13h 28m 51s", "remaining_time": "6h 25m 58s"}
{"loss": 0.49470093, "token_acc": 0.855804, "grad_norm": 1.8828125, "learning_rate": 2.558e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601628, "epoch": 1.35850861, "global_step/max_steps": "29300/43134", "percentage": "67.93%", "elapsed_time": "13h 31m 40s", "remaining_time": "6h 23m 14s"}
{"loss": 0.52046108, "token_acc": 0.85247095, "grad_norm": 1.3359375, "learning_rate": 2.525e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601658, "epoch": 1.3631453, "global_step/max_steps": "29400/43134", "percentage": "68.16%", "elapsed_time": "13h 34m 24s", "remaining_time": "6h 20m 26s"}
{"loss": 0.51606113, "token_acc": 0.85182358, "grad_norm": 2.140625, "learning_rate": 2.492e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601699, "epoch": 1.36778198, "global_step/max_steps": "29500/43134", "percentage": "68.39%", "elapsed_time": "13h 37m 7s", "remaining_time": "6h 17m 39s"}
{"loss": 0.53274746, "token_acc": 0.84814812, "grad_norm": 1.5, "learning_rate": 2.459e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601727, "epoch": 1.37241867, "global_step/max_steps": "29600/43134", "percentage": "68.62%", "elapsed_time": "13h 39m 51s", "remaining_time": "6h 14m 51s"}
{"loss": 0.49717644, "token_acc": 0.85697425, "grad_norm": 2.265625, "learning_rate": 2.426e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601772, "epoch": 1.37705536, "global_step/max_steps": "29700/43134", "percentage": "68.86%", "elapsed_time": "13h 42m 34s", "remaining_time": "6h 12m 3s"}
{"loss": 0.49218143, "token_acc": 0.85532965, "grad_norm": 1.65625, "learning_rate": 2.393e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601786, "epoch": 1.38169204, "global_step/max_steps": "29800/43134", "percentage": "69.09%", "elapsed_time": "13h 45m 19s", "remaining_time": "6h 9m 17s"}
{"loss": 0.51304981, "token_acc": 0.85514943, "grad_norm": 1.3203125, "learning_rate": 2.36e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601825, "epoch": 1.38632873, "global_step/max_steps": "29900/43134", "percentage": "69.32%", "elapsed_time": "13h 48m 2s", "remaining_time": "6h 6m 29s"}
{"loss": 0.51763264, "token_acc": 0.85093741, "grad_norm": 1.3046875, "learning_rate": 2.328e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601852, "epoch": 1.39096542, "global_step/max_steps": "30000/43134", "percentage": "69.55%", "elapsed_time": "13h 50m 45s", "remaining_time": "6h 3m 42s"}
{"loss": 0.49885704, "token_acc": 0.85409417, "grad_norm": 1.5234375, "learning_rate": 2.295e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60188, "epoch": 1.3956021, "global_step/max_steps": "30100/43134", "percentage": "69.78%", "elapsed_time": "13h 53m 29s", "remaining_time": "6h 0m 55s"}
{"loss": 0.51671623, "token_acc": 0.85109188, "grad_norm": 1.6953125, "learning_rate": 2.263e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601879, "epoch": 1.40023879, "global_step/max_steps": "30200/43134", "percentage": "70.01%", "elapsed_time": "13h 56m 15s", "remaining_time": "5h 58m 9s"}
{"loss": 0.51873959, "token_acc": 0.85050129, "grad_norm": 2.0, "learning_rate": 2.231e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601896, "epoch": 1.40487548, "global_step/max_steps": "30300/43134", "percentage": "70.25%", "elapsed_time": "13h 59m 0s", "remaining_time": "5h 55m 22s"}
{"loss": 0.50084473, "token_acc": 0.85681098, "grad_norm": 1.3828125, "learning_rate": 2.199e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601958, "epoch": 1.40951216, "global_step/max_steps": "30400/43134", "percentage": "70.48%", "elapsed_time": "14h 1m 41s", "remaining_time": "5h 52m 34s"}
{"loss": 0.50405025, "token_acc": 0.85610265, "grad_norm": 1.8125, "learning_rate": 2.168e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602004, "epoch": 1.41414885, "global_step/max_steps": "30500/43134", "percentage": "70.71%", "elapsed_time": "14h 4m 23s", "remaining_time": "5h 49m 46s"}
{"loss": 0.48265121, "token_acc": 0.86184595, "grad_norm": 1.4921875, "learning_rate": 2.136e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602021, "epoch": 1.41878554, "global_step/max_steps": "30600/43134", "percentage": "70.94%", "elapsed_time": "14h 7m 8s", "remaining_time": "5h 46m 59s"}
{"loss": 0.51356506, "token_acc": 0.8519371, "grad_norm": 1.984375, "learning_rate": 2.105e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602044, "epoch": 1.42342222, "global_step/max_steps": "30700/43134", "percentage": "71.17%", "elapsed_time": "14h 9m 52s", "remaining_time": "5h 44m 12s"}
{"loss": 0.53286583, "token_acc": 0.84697282, "grad_norm": 1.7109375, "learning_rate": 2.074e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602075, "epoch": 1.42805891, "global_step/max_steps": "30800/43134", "percentage": "71.41%", "elapsed_time": "14h 12m 36s", "remaining_time": "5h 41m 25s"}
{"loss": 0.53154282, "token_acc": 0.84739309, "grad_norm": 1.890625, "learning_rate": 2.043e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602073, "epoch": 1.4326956, "global_step/max_steps": "30900/43134", "percentage": "71.64%", "elapsed_time": "14h 15m 22s", "remaining_time": "5h 38m 39s"}
{"loss": 0.50183041, "token_acc": 0.85576499, "grad_norm": 1.59375, "learning_rate": 2.012e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602068, "epoch": 1.43733228, "global_step/max_steps": "31000/43134", "percentage": "71.87%", "elapsed_time": "14h 18m 8s", "remaining_time": "5h 35m 53s"}
{"loss": 0.49964619, "token_acc": 0.85507045, "grad_norm": 1.5859375, "learning_rate": 1.981e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602085, "epoch": 1.44196897, "global_step/max_steps": "31100/43134", "percentage": "72.10%", "elapsed_time": "14h 20m 53s", "remaining_time": "5h 33m 7s"}
{"loss": 0.50576633, "token_acc": 0.85489354, "grad_norm": 2.25, "learning_rate": 1.951e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602108, "epoch": 1.44660566, "global_step/max_steps": "31200/43134", "percentage": "72.33%", "elapsed_time": "14h 23m 37s", "remaining_time": "5h 30m 20s"}
{"loss": 0.49164761, "token_acc": 0.85864967, "grad_norm": 1.640625, "learning_rate": 1.921e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602102, "epoch": 1.45124234, "global_step/max_steps": "31300/43134", "percentage": "72.56%", "elapsed_time": "14h 26m 24s", "remaining_time": "5h 27m 34s"}
{"loss": 0.50505424, "token_acc": 0.85320373, "grad_norm": 1.4296875, "learning_rate": 1.89e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60213, "epoch": 1.45587903, "global_step/max_steps": "31400/43134", "percentage": "72.80%", "elapsed_time": "14h 29m 8s", "remaining_time": "5h 24m 47s"}
{"loss": 0.52889908, "token_acc": 0.84663017, "grad_norm": 1.296875, "learning_rate": 1.861e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602139, "epoch": 1.46051572, "global_step/max_steps": "31500/43134", "percentage": "73.03%", "elapsed_time": "14h 31m 53s", "remaining_time": "5h 22m 1s"}
{"loss": 0.51589058, "token_acc": 0.85319127, "grad_norm": 2.046875, "learning_rate": 1.831e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602135, "epoch": 1.4651524, "global_step/max_steps": "31600/43134", "percentage": "73.26%", "elapsed_time": "14h 34m 39s", "remaining_time": "5h 19m 15s"}
{"loss": 0.50014214, "token_acc": 0.85483338, "grad_norm": 2.53125, "learning_rate": 1.801e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602132, "epoch": 1.46978909, "global_step/max_steps": "31700/43134", "percentage": "73.49%", "elapsed_time": "14h 37m 26s", "remaining_time": "5h 16m 29s"}
{"loss": 0.49238022, "token_acc": 0.8566869, "grad_norm": 1.3984375, "learning_rate": 1.772e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602137, "epoch": 1.47442578, "global_step/max_steps": "31800/43134", "percentage": "73.72%", "elapsed_time": "14h 40m 11s", "remaining_time": "5h 13m 42s"}
{"loss": 0.49601685, "token_acc": 0.85612907, "grad_norm": 1.4296875, "learning_rate": 1.743e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602129, "epoch": 1.47906246, "global_step/max_steps": "31900/43134", "percentage": "73.96%", "elapsed_time": "14h 42m 58s", "remaining_time": "5h 10m 57s"}
{"loss": 0.5164212, "token_acc": 0.85206249, "grad_norm": 1.4609375, "learning_rate": 1.714e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602128, "epoch": 1.48369915, "global_step/max_steps": "32000/43134", "percentage": "74.19%", "elapsed_time": "14h 45m 44s", "remaining_time": "5h 8m 11s"}
{"loss": 0.50367386, "token_acc": 0.85265384, "grad_norm": 2.140625, "learning_rate": 1.685e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.600964, "epoch": 1.48833584, "global_step/max_steps": "32100/43134", "percentage": "74.42%", "elapsed_time": "14h 50m 13s", "remaining_time": "5h 6m 0s"}
{"loss": 0.48099854, "token_acc": 0.86064438, "grad_norm": 1.203125, "learning_rate": 1.656e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601021, "epoch": 1.49297252, "global_step/max_steps": "32200/43134", "percentage": "74.65%", "elapsed_time": "14h 52m 55s", "remaining_time": "5h 3m 12s"}
{"loss": 0.48580185, "token_acc": 0.86188954, "grad_norm": 1.3515625, "learning_rate": 1.628e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601042, "epoch": 1.49760921, "global_step/max_steps": "32300/43134", "percentage": "74.88%", "elapsed_time": "14h 55m 39s", "remaining_time": "5h 0m 25s"}
{"loss": 0.49442848, "token_acc": 0.85747152, "grad_norm": 2.328125, "learning_rate": 1.6e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601083, "epoch": 1.5022459, "global_step/max_steps": "32400/43134", "percentage": "75.11%", "elapsed_time": "14h 58m 22s", "remaining_time": "4h 57m 37s"}
{"loss": 0.51468029, "token_acc": 0.85371316, "grad_norm": 1.9765625, "learning_rate": 1.572e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601135, "epoch": 1.50688258, "global_step/max_steps": "32500/43134", "percentage": "75.35%", "elapsed_time": "15h 1m 4s", "remaining_time": "4h 54m 49s"}
{"loss": 0.507486, "token_acc": 0.85318452, "grad_norm": 1.7734375, "learning_rate": 1.544e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601191, "epoch": 1.51151927, "global_step/max_steps": "32600/43134", "percentage": "75.58%", "elapsed_time": "15h 3m 45s", "remaining_time": "4h 52m 1s"}
{"loss": 0.50717316, "token_acc": 0.8552948, "grad_norm": 1.4921875, "learning_rate": 1.516e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601183, "epoch": 1.51615595, "global_step/max_steps": "32700/43134", "percentage": "75.81%", "elapsed_time": "15h 6m 32s", "remaining_time": "4h 49m 15s"}
{"loss": 0.50004128, "token_acc": 0.8566262, "grad_norm": 1.546875, "learning_rate": 1.489e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601228, "epoch": 1.52079264, "global_step/max_steps": "32800/43134", "percentage": "76.04%", "elapsed_time": "15h 9m 14s", "remaining_time": "4h 46m 28s"}
{"loss": 0.51311298, "token_acc": 0.85350094, "grad_norm": 1.2734375, "learning_rate": 1.462e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601253, "epoch": 1.52542933, "global_step/max_steps": "32900/43134", "percentage": "76.27%", "elapsed_time": "15h 11m 58s", "remaining_time": "4h 43m 41s"}
{"loss": 0.47713997, "token_acc": 0.8619863, "grad_norm": 1.3046875, "learning_rate": 1.435e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601274, "epoch": 1.53006601, "global_step/max_steps": "33000/43134", "percentage": "76.51%", "elapsed_time": "15h 14m 43s", "remaining_time": "4h 40m 54s"}
{"loss": 0.50158588, "token_acc": 0.85481183, "grad_norm": 2.0, "learning_rate": 1.408e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601275, "epoch": 1.5347027, "global_step/max_steps": "33100/43134", "percentage": "76.74%", "elapsed_time": "15h 17m 29s", "remaining_time": "4h 38m 7s"}
{"loss": 0.5158115, "token_acc": 0.85132513, "grad_norm": 2.03125, "learning_rate": 1.381e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601286, "epoch": 1.53933939, "global_step/max_steps": "33200/43134", "percentage": "76.97%", "elapsed_time": "15h 20m 14s", "remaining_time": "4h 35m 21s"}
{"loss": 0.49888496, "token_acc": 0.85691128, "grad_norm": 1.390625, "learning_rate": 1.355e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601324, "epoch": 1.54397607, "global_step/max_steps": "33300/43134", "percentage": "77.20%", "elapsed_time": "15h 22m 57s", "remaining_time": "4h 32m 33s"}
{"loss": 0.51694984, "token_acc": 0.85294724, "grad_norm": 1.78125, "learning_rate": 1.329e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601349, "epoch": 1.54861276, "global_step/max_steps": "33400/43134", "percentage": "77.43%", "elapsed_time": "15h 25m 41s", "remaining_time": "4h 29m 46s"}
{"loss": 0.51929909, "token_acc": 0.85073534, "grad_norm": 1.5390625, "learning_rate": 1.303e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601329, "epoch": 1.55324945, "global_step/max_steps": "33500/43134", "percentage": "77.66%", "elapsed_time": "15h 28m 29s", "remaining_time": "4h 27m 1s"}
{"loss": 0.48458004, "token_acc": 0.85924013, "grad_norm": 1.25, "learning_rate": 1.277e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601334, "epoch": 1.55788613, "global_step/max_steps": "33600/43134", "percentage": "77.90%", "elapsed_time": "15h 31m 15s", "remaining_time": "4h 24m 14s"}
{"loss": 0.49762413, "token_acc": 0.85521368, "grad_norm": 1.8359375, "learning_rate": 1.252e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60136, "epoch": 1.56252282, "global_step/max_steps": "33700/43134", "percentage": "78.13%", "elapsed_time": "15h 33m 59s", "remaining_time": "4h 21m 27s"}
{"loss": 0.52211201, "token_acc": 0.84986669, "grad_norm": 1.0625, "learning_rate": 1.227e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601387, "epoch": 1.56715951, "global_step/max_steps": "33800/43134", "percentage": "78.36%", "elapsed_time": "15h 36m 43s", "remaining_time": "4h 18m 40s"}
{"loss": 0.50039497, "token_acc": 0.85553899, "grad_norm": 1.546875, "learning_rate": 1.202e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601422, "epoch": 1.57179619, "global_step/max_steps": "33900/43134", "percentage": "78.59%", "elapsed_time": "15h 39m 26s", "remaining_time": "4h 15m 53s"}
{"loss": 0.49133373, "token_acc": 0.85757934, "grad_norm": 1.875, "learning_rate": 1.177e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601431, "epoch": 1.57643288, "global_step/max_steps": "34000/43134", "percentage": "78.82%", "elapsed_time": "15h 42m 11s", "remaining_time": "4h 13m 7s"}
{"loss": 0.5174255, "token_acc": 0.8516847, "grad_norm": 1.7578125, "learning_rate": 1.152e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601476, "epoch": 1.58106957, "global_step/max_steps": "34100/43134", "percentage": "79.06%", "elapsed_time": "15h 44m 53s", "remaining_time": "4h 10m 19s"}
{"loss": 0.50782967, "token_acc": 0.85472272, "grad_norm": 1.2578125, "learning_rate": 1.128e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601497, "epoch": 1.58570625, "global_step/max_steps": "34200/43134", "percentage": "79.29%", "elapsed_time": "15h 47m 37s", "remaining_time": "4h 7m 32s"}
{"loss": 0.510243, "token_acc": 0.85279064, "grad_norm": 1.671875, "learning_rate": 1.104e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601526, "epoch": 1.59034294, "global_step/max_steps": "34300/43134", "percentage": "79.52%", "elapsed_time": "15h 50m 21s", "remaining_time": "4h 4m 45s"}
{"loss": 0.48712143, "token_acc": 0.86140186, "grad_norm": 1.765625, "learning_rate": 1.08e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601564, "epoch": 1.59497963, "global_step/max_steps": "34400/43134", "percentage": "79.75%", "elapsed_time": "15h 53m 4s", "remaining_time": "4h 1m 58s"}
{"loss": 0.50406536, "token_acc": 0.85443361, "grad_norm": 1.8359375, "learning_rate": 1.056e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601618, "epoch": 1.59961631, "global_step/max_steps": "34500/43134", "percentage": "79.98%", "elapsed_time": "15h 55m 45s", "remaining_time": "3h 59m 11s"}
{"loss": 0.48922169, "token_acc": 0.86041056, "grad_norm": 1.9453125, "learning_rate": 1.033e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601644, "epoch": 1.604253, "global_step/max_steps": "34600/43134", "percentage": "80.22%", "elapsed_time": "15h 58m 28s", "remaining_time": "3h 56m 24s"}
{"loss": 0.49174488, "token_acc": 0.85835385, "grad_norm": 3.890625, "learning_rate": 1.009e-05, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601655, "epoch": 1.60888969, "global_step/max_steps": "34700/43134", "percentage": "80.45%", "elapsed_time": "16h 1m 13s", "remaining_time": "3h 53m 37s"}
{"loss": 0.51790977, "token_acc": 0.85042683, "grad_norm": 1.7890625, "learning_rate": 9.86e-06, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601688, "epoch": 1.61352637, "global_step/max_steps": "34800/43134", "percentage": "80.68%", "elapsed_time": "16h 3m 57s", "remaining_time": "3h 50m 50s"}
{"loss": 0.51445999, "token_acc": 0.85156952, "grad_norm": 2.4375, "learning_rate": 9.64e-06, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601708, "epoch": 1.61816306, "global_step/max_steps": "34900/43134", "percentage": "80.91%", "elapsed_time": "16h 6m 41s", "remaining_time": "3h 48m 4s"}
{"loss": 0.51366943, "token_acc": 0.85413901, "grad_norm": 1.578125, "learning_rate": 9.41e-06, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601748, "epoch": 1.62279975, "global_step/max_steps": "35000/43134", "percentage": "81.14%", "elapsed_time": "16h 9m 23s", "remaining_time": "3h 45m 17s"}
{"loss": 0.49605499, "token_acc": 0.85696459, "grad_norm": 1.7578125, "learning_rate": 9.19e-06, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601784, "epoch": 1.62743643, "global_step/max_steps": "35100/43134", "percentage": "81.37%", "elapsed_time": "16h 12m 6s", "remaining_time": "3h 42m 30s"}
{"loss": 0.5227446, "token_acc": 0.84938476, "grad_norm": 1.453125, "learning_rate": 8.97e-06, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.6018, "epoch": 1.63207312, "global_step/max_steps": "35200/43134", "percentage": "81.61%", "elapsed_time": "16h 14m 51s", "remaining_time": "3h 39m 43s"}
{"loss": 0.49766178, "token_acc": 0.85589614, "grad_norm": 1.546875, "learning_rate": 8.75e-06, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601853, "epoch": 1.63670981, "global_step/max_steps": "35300/43134", "percentage": "81.84%", "elapsed_time": "16h 17m 32s", "remaining_time": "3h 36m 56s"}
{"loss": 0.50467583, "token_acc": 0.85383751, "grad_norm": 2.984375, "learning_rate": 8.54e-06, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601888, "epoch": 1.64134649, "global_step/max_steps": "35400/43134", "percentage": "82.07%", "elapsed_time": "16h 20m 14s", "remaining_time": "3h 34m 9s"}
{"loss": 0.5060828, "token_acc": 0.85346244, "grad_norm": 1.6640625, "learning_rate": 8.32e-06, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601914, "epoch": 1.64598318, "global_step/max_steps": "35500/43134", "percentage": "82.30%", "elapsed_time": "16h 22m 58s", "remaining_time": "3h 31m 22s"}
{"loss": 0.53442989, "token_acc": 0.84803329, "grad_norm": 1.59375, "learning_rate": 8.11e-06, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601907, "epoch": 1.65061987, "global_step/max_steps": "35600/43134", "percentage": "82.53%", "elapsed_time": "16h 25m 45s", "remaining_time": "3h 28m 36s"}
{"loss": 0.50631168, "token_acc": 0.85377535, "grad_norm": 1.671875, "learning_rate": 7.9e-06, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.60192, "epoch": 1.65525655, "global_step/max_steps": "35700/43134", "percentage": "82.77%", "elapsed_time": "16h 28m 29s", "remaining_time": "3h 25m 50s"}
{"loss": 0.49357468, "token_acc": 0.85825564, "grad_norm": 1.5625, "learning_rate": 7.7e-06, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601957, "epoch": 1.65989324, "global_step/max_steps": "35800/43134", "percentage": "83.00%", "elapsed_time": "16h 31m 12s", "remaining_time": "3h 23m 3s"}
{"loss": 0.50163651, "token_acc": 0.85644217, "grad_norm": 1.8046875, "learning_rate": 7.49e-06, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.601995, "epoch": 1.66452993, "global_step/max_steps": "35900/43134", "percentage": "83.23%", "elapsed_time": "16h 33m 54s", "remaining_time": "3h 20m 16s"}
{"loss": 0.49665947, "token_acc": 0.85763998, "grad_norm": 1.609375, "learning_rate": 7.29e-06, "memory(GiB)": 76.94, "train_speed(iter/s)": 0.602005, "epoch": 1.66916661, "global_step/max_steps": "36000/43134", "percentage": "83.46%", "elapsed_time": "16h 36m 39s", "remaining_time": "3h 17m 30s"}