| [ | |
| { | |
| "loss": 8.4858, | |
| "grad_norm": 10.956440925598145, | |
| "learning_rate": 2e-05, | |
| "epoch": 0.2, | |
| "step": 5 | |
| }, | |
| { | |
| "loss": 6.9312, | |
| "grad_norm": 7.161553382873535, | |
| "learning_rate": 4.5e-05, | |
| "epoch": 0.4, | |
| "step": 10 | |
| }, | |
| { | |
| "loss": 5.9262, | |
| "grad_norm": 6.012239456176758, | |
| "learning_rate": 4.8947368421052635e-05, | |
| "epoch": 0.6, | |
| "step": 15 | |
| }, | |
| { | |
| "loss": 4.2748, | |
| "grad_norm": 4.727581024169922, | |
| "learning_rate": 4.7631578947368424e-05, | |
| "epoch": 0.8, | |
| "step": 20 | |
| }, | |
| { | |
| "loss": 4.3024, | |
| "grad_norm": 4.418272018432617, | |
| "learning_rate": 4.6315789473684214e-05, | |
| "epoch": 1.0, | |
| "step": 25 | |
| }, | |
| { | |
| "loss": 3.4231, | |
| "grad_norm": 88.75386810302734, | |
| "learning_rate": 4.5e-05, | |
| "epoch": 1.2, | |
| "step": 30 | |
| }, | |
| { | |
| "loss": 3.2013, | |
| "grad_norm": 4.03700590133667, | |
| "learning_rate": 4.368421052631579e-05, | |
| "epoch": 1.4, | |
| "step": 35 | |
| }, | |
| { | |
| "loss": 2.7781, | |
| "grad_norm": 4.357565879821777, | |
| "learning_rate": 4.236842105263158e-05, | |
| "epoch": 1.6, | |
| "step": 40 | |
| }, | |
| { | |
| "loss": 2.6868, | |
| "grad_norm": 3.961747169494629, | |
| "learning_rate": 4.105263157894737e-05, | |
| "epoch": 1.8, | |
| "step": 45 | |
| }, | |
| { | |
| "loss": 2.625, | |
| "grad_norm": 4.623239040374756, | |
| "learning_rate": 3.973684210526316e-05, | |
| "epoch": 2.0, | |
| "step": 50 | |
| }, | |
| { | |
| "loss": 2.253, | |
| "grad_norm": 3.8357508182525635, | |
| "learning_rate": 3.842105263157895e-05, | |
| "epoch": 2.2, | |
| "step": 55 | |
| }, | |
| { | |
| "loss": 1.7868, | |
| "grad_norm": 3.983182907104492, | |
| "learning_rate": 3.710526315789474e-05, | |
| "epoch": 2.4, | |
| "step": 60 | |
| }, | |
| { | |
| "loss": 2.158, | |
| "grad_norm": 4.157156944274902, | |
| "learning_rate": 3.578947368421053e-05, | |
| "epoch": 2.6, | |
| "step": 65 | |
| }, | |
| { | |
| "loss": 2.1846, | |
| "grad_norm": 3.965906858444214, | |
| "learning_rate": 3.447368421052632e-05, | |
| "epoch": 2.8, | |
| "step": 70 | |
| }, | |
| { | |
| "loss": 2.1961, | |
| "grad_norm": 2.782144546508789, | |
| "learning_rate": 3.3157894736842106e-05, | |
| "epoch": 3.0, | |
| "step": 75 | |
| }, | |
| { | |
| "loss": 1.4554, | |
| "grad_norm": 3.1297521591186523, | |
| "learning_rate": 3.1842105263157895e-05, | |
| "epoch": 3.2, | |
| "step": 80 | |
| }, | |
| { | |
| "loss": 1.6128, | |
| "grad_norm": 3.906054735183716, | |
| "learning_rate": 3.0526315789473684e-05, | |
| "epoch": 3.4, | |
| "step": 85 | |
| }, | |
| { | |
| "loss": 1.4562, | |
| "grad_norm": 4.510481834411621, | |
| "learning_rate": 2.9210526315789477e-05, | |
| "epoch": 3.6, | |
| "step": 90 | |
| }, | |
| { | |
| "loss": 1.5626, | |
| "grad_norm": 3.879499673843384, | |
| "learning_rate": 2.7894736842105263e-05, | |
| "epoch": 3.8, | |
| "step": 95 | |
| }, | |
| { | |
| "loss": 1.5182, | |
| "grad_norm": 3.139321804046631, | |
| "learning_rate": 2.6578947368421052e-05, | |
| "epoch": 4.0, | |
| "step": 100 | |
| }, | |
| { | |
| "loss": 1.2072, | |
| "grad_norm": 4.304155349731445, | |
| "learning_rate": 2.5263157894736845e-05, | |
| "epoch": 4.2, | |
| "step": 105 | |
| }, | |
| { | |
| "loss": 1.1877, | |
| "grad_norm": 3.2858364582061768, | |
| "learning_rate": 2.394736842105263e-05, | |
| "epoch": 4.4, | |
| "step": 110 | |
| }, | |
| { | |
| "loss": 1.1419, | |
| "grad_norm": 3.662776231765747, | |
| "learning_rate": 2.2631578947368423e-05, | |
| "epoch": 4.6, | |
| "step": 115 | |
| }, | |
| { | |
| "loss": 1.0726, | |
| "grad_norm": 3.3753128051757812, | |
| "learning_rate": 2.1315789473684212e-05, | |
| "epoch": 4.8, | |
| "step": 120 | |
| }, | |
| { | |
| "loss": 1.16, | |
| "grad_norm": 3.4297780990600586, | |
| "learning_rate": 2e-05, | |
| "epoch": 5.0, | |
| "step": 125 | |
| }, | |
| { | |
| "loss": 1.1555, | |
| "grad_norm": 3.373642921447754, | |
| "learning_rate": 1.868421052631579e-05, | |
| "epoch": 5.2, | |
| "step": 130 | |
| }, | |
| { | |
| "loss": 1.0915, | |
| "grad_norm": 3.190053701400757, | |
| "learning_rate": 1.736842105263158e-05, | |
| "epoch": 5.4, | |
| "step": 135 | |
| }, | |
| { | |
| "loss": 0.6836, | |
| "grad_norm": 3.1136105060577393, | |
| "learning_rate": 1.605263157894737e-05, | |
| "epoch": 5.6, | |
| "step": 140 | |
| }, | |
| { | |
| "loss": 0.8947, | |
| "grad_norm": 4.21175479888916, | |
| "learning_rate": 1.4736842105263157e-05, | |
| "epoch": 5.8, | |
| "step": 145 | |
| }, | |
| { | |
| "loss": 1.0125, | |
| "grad_norm": 3.606748342514038, | |
| "learning_rate": 1.3421052631578948e-05, | |
| "epoch": 6.0, | |
| "step": 150 | |
| }, | |
| { | |
| "loss": 0.6748, | |
| "grad_norm": 2.8370039463043213, | |
| "learning_rate": 1.2105263157894737e-05, | |
| "epoch": 6.2, | |
| "step": 155 | |
| }, | |
| { | |
| "loss": 0.7417, | |
| "grad_norm": 5.026889801025391, | |
| "learning_rate": 1.0789473684210526e-05, | |
| "epoch": 6.4, | |
| "step": 160 | |
| }, | |
| { | |
| "loss": 0.836, | |
| "grad_norm": 4.09874153137207, | |
| "learning_rate": 9.473684210526317e-06, | |
| "epoch": 6.6, | |
| "step": 165 | |
| }, | |
| { | |
| "loss": 0.7355, | |
| "grad_norm": 3.5339722633361816, | |
| "learning_rate": 8.157894736842106e-06, | |
| "epoch": 6.8, | |
| "step": 170 | |
| }, | |
| { | |
| "loss": 0.817, | |
| "grad_norm": 3.718662738800049, | |
| "learning_rate": 6.842105263157896e-06, | |
| "epoch": 7.0, | |
| "step": 175 | |
| }, | |
| { | |
| "loss": 0.6803, | |
| "grad_norm": 2.443586826324463, | |
| "learning_rate": 5.526315789473684e-06, | |
| "epoch": 7.2, | |
| "step": 180 | |
| }, | |
| { | |
| "loss": 0.644, | |
| "grad_norm": 4.012761116027832, | |
| "learning_rate": 4.210526315789474e-06, | |
| "epoch": 7.4, | |
| "step": 185 | |
| }, | |
| { | |
| "loss": 0.5224, | |
| "grad_norm": 2.8739984035491943, | |
| "learning_rate": 2.8947368421052634e-06, | |
| "epoch": 7.6, | |
| "step": 190 | |
| }, | |
| { | |
| "loss": 0.6857, | |
| "grad_norm": 3.989027261734009, | |
| "learning_rate": 1.5789473684210528e-06, | |
| "epoch": 7.8, | |
| "step": 195 | |
| }, | |
| { | |
| "loss": 0.7937, | |
| "grad_norm": 4.327380180358887, | |
| "learning_rate": 2.6315789473684213e-07, | |
| "epoch": 8.0, | |
| "step": 200 | |
| }, | |
| { | |
| "train_runtime": 12626.9237, | |
| "train_samples_per_second": 0.063, | |
| "train_steps_per_second": 0.016, | |
| "total_flos": 2611410370560000.0, | |
| "train_loss": 2.0139254927635193, | |
| "epoch": 8.0, | |
| "step": 200 | |
| } | |
| ] |