| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 200, | |
| "global_step": 950, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002105263157894737, | |
| "grad_norm": 6.217544016659503, | |
| "learning_rate": 9.999972660400536e-06, | |
| "loss": 0.3989, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004210526315789474, | |
| "grad_norm": 4.110718040113299, | |
| "learning_rate": 9.999890641901124e-06, | |
| "loss": 0.3669, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00631578947368421, | |
| "grad_norm": 4.338497302088012, | |
| "learning_rate": 9.999753945398704e-06, | |
| "loss": 0.312, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.008421052631578947, | |
| "grad_norm": 5.282542189044485, | |
| "learning_rate": 9.99956257238817e-06, | |
| "loss": 0.3437, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.010526315789473684, | |
| "grad_norm": 3.834449844048163, | |
| "learning_rate": 9.999316524962347e-06, | |
| "loss": 0.2323, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01263157894736842, | |
| "grad_norm": 3.8748984755674143, | |
| "learning_rate": 9.999015805811965e-06, | |
| "loss": 0.3381, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.014736842105263158, | |
| "grad_norm": 3.0258910678432107, | |
| "learning_rate": 9.998660418225645e-06, | |
| "loss": 0.2191, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.016842105263157894, | |
| "grad_norm": 3.569909197687586, | |
| "learning_rate": 9.998250366089848e-06, | |
| "loss": 0.2458, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.018947368421052633, | |
| "grad_norm": 5.072654329244835, | |
| "learning_rate": 9.997785653888835e-06, | |
| "loss": 0.308, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.021052631578947368, | |
| "grad_norm": 3.23335292769489, | |
| "learning_rate": 9.99726628670463e-06, | |
| "loss": 0.2266, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.023157894736842106, | |
| "grad_norm": 3.287012798491171, | |
| "learning_rate": 9.996692270216946e-06, | |
| "loss": 0.2592, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.02526315789473684, | |
| "grad_norm": 4.0856720065160745, | |
| "learning_rate": 9.996063610703138e-06, | |
| "loss": 0.2674, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.02736842105263158, | |
| "grad_norm": 3.454246688370746, | |
| "learning_rate": 9.995380315038119e-06, | |
| "loss": 0.2565, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.029473684210526315, | |
| "grad_norm": 3.2274320728380066, | |
| "learning_rate": 9.994642390694308e-06, | |
| "loss": 0.1976, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.031578947368421054, | |
| "grad_norm": 4.260565705891167, | |
| "learning_rate": 9.993849845741525e-06, | |
| "loss": 0.245, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03368421052631579, | |
| "grad_norm": 3.4341257739977102, | |
| "learning_rate": 9.993002688846913e-06, | |
| "loss": 0.2833, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.035789473684210524, | |
| "grad_norm": 3.62754868119338, | |
| "learning_rate": 9.992100929274848e-06, | |
| "loss": 0.2459, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.037894736842105266, | |
| "grad_norm": 3.3599215508623788, | |
| "learning_rate": 9.991144576886824e-06, | |
| "loss": 0.2597, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 2.759190741252958, | |
| "learning_rate": 9.990133642141359e-06, | |
| "loss": 0.2007, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.042105263157894736, | |
| "grad_norm": 3.1433858959494656, | |
| "learning_rate": 9.989068136093873e-06, | |
| "loss": 0.2126, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04421052631578947, | |
| "grad_norm": 4.780930240687513, | |
| "learning_rate": 9.987948070396572e-06, | |
| "loss": 0.2567, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.04631578947368421, | |
| "grad_norm": 3.1361116248862704, | |
| "learning_rate": 9.986773457298311e-06, | |
| "loss": 0.218, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.04842105263157895, | |
| "grad_norm": 4.660863559826474, | |
| "learning_rate": 9.985544309644474e-06, | |
| "loss": 0.3171, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.05052631578947368, | |
| "grad_norm": 3.83833154370056, | |
| "learning_rate": 9.984260640876821e-06, | |
| "loss": 0.2453, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05263157894736842, | |
| "grad_norm": 4.394031981209525, | |
| "learning_rate": 9.98292246503335e-06, | |
| "loss": 0.3022, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.05473684210526316, | |
| "grad_norm": 3.815312734391198, | |
| "learning_rate": 9.981529796748135e-06, | |
| "loss": 0.2566, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.056842105263157895, | |
| "grad_norm": 3.751567223965873, | |
| "learning_rate": 9.980082651251175e-06, | |
| "loss": 0.2057, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.05894736842105263, | |
| "grad_norm": 3.5703892124307886, | |
| "learning_rate": 9.97858104436822e-06, | |
| "loss": 0.2611, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.061052631578947365, | |
| "grad_norm": 4.882927650965578, | |
| "learning_rate": 9.977024992520604e-06, | |
| "loss": 0.2957, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.06315789473684211, | |
| "grad_norm": 3.5773603919322614, | |
| "learning_rate": 9.975414512725058e-06, | |
| "loss": 0.2483, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06526315789473684, | |
| "grad_norm": 3.19691324527535, | |
| "learning_rate": 9.973749622593534e-06, | |
| "loss": 0.199, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.06736842105263158, | |
| "grad_norm": 3.285608727213878, | |
| "learning_rate": 9.972030340333e-06, | |
| "loss": 0.2476, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.06947368421052631, | |
| "grad_norm": 3.9448084183142202, | |
| "learning_rate": 9.970256684745258e-06, | |
| "loss": 0.2584, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.07157894736842105, | |
| "grad_norm": 3.093609102136492, | |
| "learning_rate": 9.968428675226714e-06, | |
| "loss": 0.1965, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07368421052631578, | |
| "grad_norm": 2.866766273599304, | |
| "learning_rate": 9.966546331768192e-06, | |
| "loss": 0.235, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07578947368421053, | |
| "grad_norm": 3.1680983150404862, | |
| "learning_rate": 9.964609674954696e-06, | |
| "loss": 0.2179, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.07789473684210527, | |
| "grad_norm": 2.531925390508716, | |
| "learning_rate": 9.962618725965196e-06, | |
| "loss": 0.1638, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 3.479913242409686, | |
| "learning_rate": 9.960573506572391e-06, | |
| "loss": 0.2607, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08210526315789474, | |
| "grad_norm": 3.453241583441747, | |
| "learning_rate": 9.95847403914247e-06, | |
| "loss": 0.2846, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.08421052631578947, | |
| "grad_norm": 3.7499588076121415, | |
| "learning_rate": 9.956320346634877e-06, | |
| "loss": 0.2474, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0863157894736842, | |
| "grad_norm": 3.7513628477476626, | |
| "learning_rate": 9.954112452602045e-06, | |
| "loss": 0.257, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.08842105263157894, | |
| "grad_norm": 3.249974357345021, | |
| "learning_rate": 9.951850381189152e-06, | |
| "loss": 0.2342, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09052631578947369, | |
| "grad_norm": 3.5256183668310053, | |
| "learning_rate": 9.949534157133844e-06, | |
| "loss": 0.254, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.09263157894736843, | |
| "grad_norm": 3.0367239176760554, | |
| "learning_rate": 9.94716380576598e-06, | |
| "loss": 0.2181, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.09473684210526316, | |
| "grad_norm": 2.680983187953939, | |
| "learning_rate": 9.944739353007344e-06, | |
| "loss": 0.2092, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.0968421052631579, | |
| "grad_norm": 3.172342567980907, | |
| "learning_rate": 9.942260825371359e-06, | |
| "loss": 0.1665, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.09894736842105263, | |
| "grad_norm": 3.683978184159089, | |
| "learning_rate": 9.939728249962808e-06, | |
| "loss": 0.2671, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.10105263157894737, | |
| "grad_norm": 3.6089008339524664, | |
| "learning_rate": 9.937141654477529e-06, | |
| "loss": 0.2557, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1031578947368421, | |
| "grad_norm": 3.1128433475033224, | |
| "learning_rate": 9.934501067202117e-06, | |
| "loss": 0.2298, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.10526315789473684, | |
| "grad_norm": 3.837339505212247, | |
| "learning_rate": 9.931806517013612e-06, | |
| "loss": 0.2121, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10736842105263159, | |
| "grad_norm": 3.1188404843719986, | |
| "learning_rate": 9.929058033379181e-06, | |
| "loss": 0.2449, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.10947368421052632, | |
| "grad_norm": 3.499970578608811, | |
| "learning_rate": 9.926255646355804e-06, | |
| "loss": 0.2362, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11157894736842106, | |
| "grad_norm": 3.8814562389117127, | |
| "learning_rate": 9.923399386589933e-06, | |
| "loss": 0.2403, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.11368421052631579, | |
| "grad_norm": 3.887530063847657, | |
| "learning_rate": 9.920489285317169e-06, | |
| "loss": 0.2276, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.11578947368421053, | |
| "grad_norm": 3.6293508472876455, | |
| "learning_rate": 9.917525374361913e-06, | |
| "loss": 0.2577, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.11789473684210526, | |
| "grad_norm": 3.865568740211283, | |
| "learning_rate": 9.91450768613702e-06, | |
| "loss": 0.2416, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 3.7137281964397095, | |
| "learning_rate": 9.911436253643445e-06, | |
| "loss": 0.2411, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.12210526315789473, | |
| "grad_norm": 3.6900754543193153, | |
| "learning_rate": 9.908311110469881e-06, | |
| "loss": 0.267, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.12421052631578948, | |
| "grad_norm": 3.4314998812484165, | |
| "learning_rate": 9.905132290792395e-06, | |
| "loss": 0.2415, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.12631578947368421, | |
| "grad_norm": 3.5441260133809154, | |
| "learning_rate": 9.901899829374048e-06, | |
| "loss": 0.2649, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12842105263157894, | |
| "grad_norm": 2.969279579151304, | |
| "learning_rate": 9.89861376156452e-06, | |
| "loss": 0.2342, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.13052631578947368, | |
| "grad_norm": 3.0270242630571644, | |
| "learning_rate": 9.895274123299724e-06, | |
| "loss": 0.215, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.13263157894736843, | |
| "grad_norm": 3.2468251690158945, | |
| "learning_rate": 9.891880951101407e-06, | |
| "loss": 0.2645, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.13473684210526315, | |
| "grad_norm": 3.7405952840185255, | |
| "learning_rate": 9.888434282076759e-06, | |
| "loss": 0.224, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.1368421052631579, | |
| "grad_norm": 3.7386890942637736, | |
| "learning_rate": 9.884934153917998e-06, | |
| "loss": 0.2338, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.13894736842105262, | |
| "grad_norm": 3.6559224501234655, | |
| "learning_rate": 9.881380604901964e-06, | |
| "loss": 0.2674, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.14105263157894737, | |
| "grad_norm": 3.2126708403650723, | |
| "learning_rate": 9.877773673889702e-06, | |
| "loss": 0.257, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.1431578947368421, | |
| "grad_norm": 3.3544140835288387, | |
| "learning_rate": 9.874113400326031e-06, | |
| "loss": 0.2644, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.14526315789473684, | |
| "grad_norm": 3.2197549480894305, | |
| "learning_rate": 9.870399824239116e-06, | |
| "loss": 0.2337, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.14736842105263157, | |
| "grad_norm": 3.0956092032854787, | |
| "learning_rate": 9.86663298624003e-06, | |
| "loss": 0.2018, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14947368421052631, | |
| "grad_norm": 2.728914245630825, | |
| "learning_rate": 9.86281292752231e-06, | |
| "loss": 0.1902, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.15157894736842106, | |
| "grad_norm": 2.728563361219932, | |
| "learning_rate": 9.858939689861506e-06, | |
| "loss": 0.1998, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.15368421052631578, | |
| "grad_norm": 3.6065260596056428, | |
| "learning_rate": 9.855013315614725e-06, | |
| "loss": 0.2589, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.15578947368421053, | |
| "grad_norm": 3.9437466543223016, | |
| "learning_rate": 9.851033847720167e-06, | |
| "loss": 0.2839, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.15789473684210525, | |
| "grad_norm": 2.6492053943266973, | |
| "learning_rate": 9.847001329696653e-06, | |
| "loss": 0.1926, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 3.4928074094432975, | |
| "learning_rate": 9.842915805643156e-06, | |
| "loss": 0.2567, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.16210526315789472, | |
| "grad_norm": 3.745761767248173, | |
| "learning_rate": 9.838777320238312e-06, | |
| "loss": 0.2473, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.16421052631578947, | |
| "grad_norm": 4.662473350343442, | |
| "learning_rate": 9.834585918739936e-06, | |
| "loss": 0.2534, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.16631578947368422, | |
| "grad_norm": 3.5985866535045092, | |
| "learning_rate": 9.830341646984521e-06, | |
| "loss": 0.2375, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.16842105263157894, | |
| "grad_norm": 3.3095318865144323, | |
| "learning_rate": 9.826044551386743e-06, | |
| "loss": 0.2179, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1705263157894737, | |
| "grad_norm": 3.218832777420868, | |
| "learning_rate": 9.821694678938954e-06, | |
| "loss": 0.2245, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.1726315789473684, | |
| "grad_norm": 3.4749041260361326, | |
| "learning_rate": 9.817292077210658e-06, | |
| "loss": 0.2451, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.17473684210526316, | |
| "grad_norm": 3.6052413099966376, | |
| "learning_rate": 9.812836794348005e-06, | |
| "loss": 0.2132, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.17684210526315788, | |
| "grad_norm": 3.819893458132905, | |
| "learning_rate": 9.808328879073251e-06, | |
| "loss": 0.2518, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.17894736842105263, | |
| "grad_norm": 3.447449141237711, | |
| "learning_rate": 9.803768380684242e-06, | |
| "loss": 0.2832, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.18105263157894738, | |
| "grad_norm": 3.357478738557209, | |
| "learning_rate": 9.79915534905385e-06, | |
| "loss": 0.2568, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.1831578947368421, | |
| "grad_norm": 3.7920160087811476, | |
| "learning_rate": 9.794489834629457e-06, | |
| "loss": 0.263, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.18526315789473685, | |
| "grad_norm": 3.182104834724342, | |
| "learning_rate": 9.789771888432375e-06, | |
| "loss": 0.2245, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.18736842105263157, | |
| "grad_norm": 3.4674212312793813, | |
| "learning_rate": 9.785001562057311e-06, | |
| "loss": 0.2417, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.18947368421052632, | |
| "grad_norm": 4.117467872656145, | |
| "learning_rate": 9.780178907671788e-06, | |
| "loss": 0.2768, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.19157894736842104, | |
| "grad_norm": 3.631475929836605, | |
| "learning_rate": 9.775303978015585e-06, | |
| "loss": 0.2437, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.1936842105263158, | |
| "grad_norm": 3.3413603143822335, | |
| "learning_rate": 9.77037682640015e-06, | |
| "loss": 0.2642, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.1957894736842105, | |
| "grad_norm": 3.9842190799219876, | |
| "learning_rate": 9.765397506708023e-06, | |
| "loss": 0.3169, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.19789473684210526, | |
| "grad_norm": 3.7032684341350173, | |
| "learning_rate": 9.760366073392246e-06, | |
| "loss": 0.1791, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 3.987149618467848, | |
| "learning_rate": 9.755282581475769e-06, | |
| "loss": 0.3039, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.20210526315789473, | |
| "grad_norm": 3.2139873581817486, | |
| "learning_rate": 9.750147086550843e-06, | |
| "loss": 0.2504, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.20421052631578948, | |
| "grad_norm": 3.566561813353208, | |
| "learning_rate": 9.744959644778422e-06, | |
| "loss": 0.2863, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2063157894736842, | |
| "grad_norm": 3.7268559206075946, | |
| "learning_rate": 9.739720312887536e-06, | |
| "loss": 0.2167, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.20842105263157895, | |
| "grad_norm": 2.9695827733722595, | |
| "learning_rate": 9.734429148174676e-06, | |
| "loss": 0.2393, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 3.6108040436142823, | |
| "learning_rate": 9.729086208503174e-06, | |
| "loss": 0.295, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.21263157894736842, | |
| "grad_norm": 3.925500913610178, | |
| "learning_rate": 9.723691552302563e-06, | |
| "loss": 0.2467, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.21473684210526317, | |
| "grad_norm": 3.5695343047388666, | |
| "learning_rate": 9.718245238567939e-06, | |
| "loss": 0.2242, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2168421052631579, | |
| "grad_norm": 3.3235918183280866, | |
| "learning_rate": 9.712747326859316e-06, | |
| "loss": 0.2278, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.21894736842105264, | |
| "grad_norm": 3.764042590013744, | |
| "learning_rate": 9.707197877300974e-06, | |
| "loss": 0.2921, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.22105263157894736, | |
| "grad_norm": 2.903454636343328, | |
| "learning_rate": 9.701596950580807e-06, | |
| "loss": 0.2165, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2231578947368421, | |
| "grad_norm": 2.5462228191934124, | |
| "learning_rate": 9.69594460794965e-06, | |
| "loss": 0.1913, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.22526315789473683, | |
| "grad_norm": 3.048612041076824, | |
| "learning_rate": 9.690240911220618e-06, | |
| "loss": 0.1913, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.22736842105263158, | |
| "grad_norm": 2.7190276333100885, | |
| "learning_rate": 9.684485922768422e-06, | |
| "loss": 0.1846, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2294736842105263, | |
| "grad_norm": 3.3279879332903164, | |
| "learning_rate": 9.678679705528699e-06, | |
| "loss": 0.2444, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.23157894736842105, | |
| "grad_norm": 3.086182493388614, | |
| "learning_rate": 9.672822322997305e-06, | |
| "loss": 0.1827, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2336842105263158, | |
| "grad_norm": 3.0198656142842433, | |
| "learning_rate": 9.666913839229639e-06, | |
| "loss": 0.2064, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.23578947368421052, | |
| "grad_norm": 3.871643739742935, | |
| "learning_rate": 9.660954318839934e-06, | |
| "loss": 0.2537, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.23789473684210527, | |
| "grad_norm": 4.034332856853841, | |
| "learning_rate": 9.654943827000548e-06, | |
| "loss": 0.2499, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 3.7628273980242515, | |
| "learning_rate": 9.648882429441258e-06, | |
| "loss": 0.2557, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.24210526315789474, | |
| "grad_norm": 3.5786660920291493, | |
| "learning_rate": 9.642770192448537e-06, | |
| "loss": 0.2677, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.24421052631578946, | |
| "grad_norm": 4.532586938593248, | |
| "learning_rate": 9.636607182864828e-06, | |
| "loss": 0.2685, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2463157894736842, | |
| "grad_norm": 3.0674072329356856, | |
| "learning_rate": 9.630393468087818e-06, | |
| "loss": 0.1846, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.24842105263157896, | |
| "grad_norm": 4.2865579808258945, | |
| "learning_rate": 9.624129116069695e-06, | |
| "loss": 0.342, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2505263157894737, | |
| "grad_norm": 3.8921150967122156, | |
| "learning_rate": 9.61781419531641e-06, | |
| "loss": 0.2634, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.25263157894736843, | |
| "grad_norm": 3.3675053578108978, | |
| "learning_rate": 9.611448774886925e-06, | |
| "loss": 0.2273, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.25473684210526315, | |
| "grad_norm": 4.105187040991947, | |
| "learning_rate": 9.605032924392457e-06, | |
| "loss": 0.242, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.25684210526315787, | |
| "grad_norm": 3.2229116365485924, | |
| "learning_rate": 9.598566713995718e-06, | |
| "loss": 0.2471, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.25894736842105265, | |
| "grad_norm": 2.8700645053873126, | |
| "learning_rate": 9.592050214410152e-06, | |
| "loss": 0.2465, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.26105263157894737, | |
| "grad_norm": 3.6312759588775783, | |
| "learning_rate": 9.585483496899151e-06, | |
| "loss": 0.24, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2631578947368421, | |
| "grad_norm": 2.9630698799183226, | |
| "learning_rate": 9.578866633275289e-06, | |
| "loss": 0.2054, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.26526315789473687, | |
| "grad_norm": 3.544581712241485, | |
| "learning_rate": 9.572199695899522e-06, | |
| "loss": 0.2314, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2673684210526316, | |
| "grad_norm": 3.255776996164575, | |
| "learning_rate": 9.565482757680415e-06, | |
| "loss": 0.2785, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.2694736842105263, | |
| "grad_norm": 2.8952918607035363, | |
| "learning_rate": 9.558715892073324e-06, | |
| "loss": 0.2218, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.27157894736842103, | |
| "grad_norm": 3.4853221427011065, | |
| "learning_rate": 9.551899173079607e-06, | |
| "loss": 0.2862, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.2736842105263158, | |
| "grad_norm": 4.128929611734161, | |
| "learning_rate": 9.545032675245814e-06, | |
| "loss": 0.3055, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.27578947368421053, | |
| "grad_norm": 2.974600343932656, | |
| "learning_rate": 9.538116473662862e-06, | |
| "loss": 0.215, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.27789473684210525, | |
| "grad_norm": 2.7669257916596823, | |
| "learning_rate": 9.531150643965224e-06, | |
| "loss": 0.2182, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 3.97273403473512, | |
| "learning_rate": 9.524135262330098e-06, | |
| "loss": 0.2658, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.28210526315789475, | |
| "grad_norm": 3.6696654833766895, | |
| "learning_rate": 9.517070405476575e-06, | |
| "loss": 0.2305, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.28421052631578947, | |
| "grad_norm": 2.5127237232679667, | |
| "learning_rate": 9.509956150664796e-06, | |
| "loss": 0.1511, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.2863157894736842, | |
| "grad_norm": 3.371290504994853, | |
| "learning_rate": 9.502792575695112e-06, | |
| "loss": 0.26, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.28842105263157897, | |
| "grad_norm": 3.0689625597079684, | |
| "learning_rate": 9.495579758907231e-06, | |
| "loss": 0.2524, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.2905263157894737, | |
| "grad_norm": 3.183088939033141, | |
| "learning_rate": 9.48831777917936e-06, | |
| "loss": 0.2122, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.2926315789473684, | |
| "grad_norm": 3.8034642187376035, | |
| "learning_rate": 9.481006715927352e-06, | |
| "loss": 0.2593, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.29473684210526313, | |
| "grad_norm": 3.8705009381157343, | |
| "learning_rate": 9.473646649103819e-06, | |
| "loss": 0.2594, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2968421052631579, | |
| "grad_norm": 3.0318361821750286, | |
| "learning_rate": 9.466237659197271e-06, | |
| "loss": 0.2254, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.29894736842105263, | |
| "grad_norm": 3.615169294903516, | |
| "learning_rate": 9.458779827231237e-06, | |
| "loss": 0.2096, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.30105263157894735, | |
| "grad_norm": 3.2733885578132313, | |
| "learning_rate": 9.451273234763372e-06, | |
| "loss": 0.2431, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.3031578947368421, | |
| "grad_norm": 3.2273667507387533, | |
| "learning_rate": 9.443717963884568e-06, | |
| "loss": 0.2228, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.30526315789473685, | |
| "grad_norm": 3.632106743266242, | |
| "learning_rate": 9.43611409721806e-06, | |
| "loss": 0.248, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.30736842105263157, | |
| "grad_norm": 3.320763426450409, | |
| "learning_rate": 9.428461717918512e-06, | |
| "loss": 0.2878, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3094736842105263, | |
| "grad_norm": 3.549945610201063, | |
| "learning_rate": 9.420760909671119e-06, | |
| "loss": 0.231, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.31157894736842107, | |
| "grad_norm": 3.282653593524781, | |
| "learning_rate": 9.413011756690686e-06, | |
| "loss": 0.2659, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3136842105263158, | |
| "grad_norm": 3.261626438744862, | |
| "learning_rate": 9.405214343720708e-06, | |
| "loss": 0.2586, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.3157894736842105, | |
| "grad_norm": 2.924567282994091, | |
| "learning_rate": 9.397368756032445e-06, | |
| "loss": 0.1778, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3178947368421053, | |
| "grad_norm": 3.0558746792231464, | |
| "learning_rate": 9.389475079423988e-06, | |
| "loss": 0.2471, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 3.7586909960856207, | |
| "learning_rate": 9.381533400219319e-06, | |
| "loss": 0.258, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.32210526315789473, | |
| "grad_norm": 3.392179309145632, | |
| "learning_rate": 9.373543805267367e-06, | |
| "loss": 0.249, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.32421052631578945, | |
| "grad_norm": 4.083035200586394, | |
| "learning_rate": 9.365506381941066e-06, | |
| "loss": 0.2789, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3263157894736842, | |
| "grad_norm": 3.19777370952952, | |
| "learning_rate": 9.357421218136387e-06, | |
| "loss": 0.2062, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.32842105263157895, | |
| "grad_norm": 3.4456582477689928, | |
| "learning_rate": 9.349288402271387e-06, | |
| "loss": 0.2382, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.33052631578947367, | |
| "grad_norm": 2.765215943542346, | |
| "learning_rate": 9.341108023285239e-06, | |
| "loss": 0.1827, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.33263157894736844, | |
| "grad_norm": 2.883818519531558, | |
| "learning_rate": 9.332880170637252e-06, | |
| "loss": 0.1995, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.33473684210526317, | |
| "grad_norm": 3.6487183139434234, | |
| "learning_rate": 9.324604934305911e-06, | |
| "loss": 0.2598, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.3368421052631579, | |
| "grad_norm": 3.9308083666697344, | |
| "learning_rate": 9.31628240478787e-06, | |
| "loss": 0.2412, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3389473684210526, | |
| "grad_norm": 3.5970617830856773, | |
| "learning_rate": 9.30791267309698e-06, | |
| "loss": 0.2851, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.3410526315789474, | |
| "grad_norm": 3.467839501820664, | |
| "learning_rate": 9.299495830763285e-06, | |
| "loss": 0.2853, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3431578947368421, | |
| "grad_norm": 3.602755193669457, | |
| "learning_rate": 9.291031969832026e-06, | |
| "loss": 0.2225, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.3452631578947368, | |
| "grad_norm": 3.0886925699452985, | |
| "learning_rate": 9.28252118286263e-06, | |
| "loss": 0.1903, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.3473684210526316, | |
| "grad_norm": 3.439033801554011, | |
| "learning_rate": 9.273963562927695e-06, | |
| "loss": 0.2287, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3494736842105263, | |
| "grad_norm": 3.3882150165690783, | |
| "learning_rate": 9.265359203611988e-06, | |
| "loss": 0.2904, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.35157894736842105, | |
| "grad_norm": 3.3452062359089507, | |
| "learning_rate": 9.256708199011402e-06, | |
| "loss": 0.2339, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.35368421052631577, | |
| "grad_norm": 3.700729392048823, | |
| "learning_rate": 9.248010643731936e-06, | |
| "loss": 0.2796, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.35578947368421054, | |
| "grad_norm": 3.1210284485776874, | |
| "learning_rate": 9.23926663288866e-06, | |
| "loss": 0.2126, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.35789473684210527, | |
| "grad_norm": 3.6574312237344992, | |
| "learning_rate": 9.230476262104678e-06, | |
| "loss": 0.2493, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 3.679409049213219, | |
| "learning_rate": 9.221639627510076e-06, | |
| "loss": 0.2551, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.36210526315789476, | |
| "grad_norm": 3.3167727556758693, | |
| "learning_rate": 9.212756825740874e-06, | |
| "loss": 0.2482, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3642105263157895, | |
| "grad_norm": 2.401115724431016, | |
| "learning_rate": 9.203827953937969e-06, | |
| "loss": 0.1881, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.3663157894736842, | |
| "grad_norm": 3.4427049239845533, | |
| "learning_rate": 9.194853109746073e-06, | |
| "loss": 0.2088, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3684210526315789, | |
| "grad_norm": 3.2573463355993826, | |
| "learning_rate": 9.185832391312644e-06, | |
| "loss": 0.2495, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3705263157894737, | |
| "grad_norm": 3.2797367783671234, | |
| "learning_rate": 9.176765897286812e-06, | |
| "loss": 0.2435, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3726315789473684, | |
| "grad_norm": 3.6588831382550806, | |
| "learning_rate": 9.167653726818305e-06, | |
| "loss": 0.2293, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.37473684210526315, | |
| "grad_norm": 2.7755595814174363, | |
| "learning_rate": 9.15849597955636e-06, | |
| "loss": 0.2044, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.37684210526315787, | |
| "grad_norm": 3.5962245807858255, | |
| "learning_rate": 9.149292755648631e-06, | |
| "loss": 0.2214, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.37894736842105264, | |
| "grad_norm": 3.4357076470231305, | |
| "learning_rate": 9.140044155740102e-06, | |
| "loss": 0.2402, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.38105263157894737, | |
| "grad_norm": 3.4123688743753853, | |
| "learning_rate": 9.130750280971978e-06, | |
| "loss": 0.2553, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.3831578947368421, | |
| "grad_norm": 2.741940954433975, | |
| "learning_rate": 9.121411232980589e-06, | |
| "loss": 0.1907, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.38526315789473686, | |
| "grad_norm": 3.6400973883721384, | |
| "learning_rate": 9.112027113896262e-06, | |
| "loss": 0.2616, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.3873684210526316, | |
| "grad_norm": 3.5161597871058277, | |
| "learning_rate": 9.102598026342223e-06, | |
| "loss": 0.2114, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.3894736842105263, | |
| "grad_norm": 3.5917090120879904, | |
| "learning_rate": 9.093124073433464e-06, | |
| "loss": 0.2304, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.391578947368421, | |
| "grad_norm": 3.1502787194480897, | |
| "learning_rate": 9.083605358775612e-06, | |
| "loss": 0.2032, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.3936842105263158, | |
| "grad_norm": 3.9729218899091063, | |
| "learning_rate": 9.074041986463808e-06, | |
| "loss": 0.2325, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.3957894736842105, | |
| "grad_norm": 3.0185052960999523, | |
| "learning_rate": 9.064434061081562e-06, | |
| "loss": 0.1981, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.39789473684210525, | |
| "grad_norm": 4.209209496600263, | |
| "learning_rate": 9.0547816876996e-06, | |
| "loss": 0.2586, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 3.967209050356877, | |
| "learning_rate": 9.045084971874738e-06, | |
| "loss": 0.2946, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.40210526315789474, | |
| "grad_norm": 3.017106050384066, | |
| "learning_rate": 9.035344019648701e-06, | |
| "loss": 0.2465, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.40421052631578946, | |
| "grad_norm": 3.917356737129771, | |
| "learning_rate": 9.025558937546987e-06, | |
| "loss": 0.3207, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4063157894736842, | |
| "grad_norm": 3.2403291834347767, | |
| "learning_rate": 9.015729832577681e-06, | |
| "loss": 0.233, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.40842105263157896, | |
| "grad_norm": 3.322798306669591, | |
| "learning_rate": 9.005856812230304e-06, | |
| "loss": 0.1899, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.4105263157894737, | |
| "grad_norm": 3.430365209049047, | |
| "learning_rate": 8.995939984474624e-06, | |
| "loss": 0.2304, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.4126315789473684, | |
| "grad_norm": 3.39458107073051, | |
| "learning_rate": 8.98597945775948e-06, | |
| "loss": 0.2357, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4147368421052632, | |
| "grad_norm": 3.3413958584715475, | |
| "learning_rate": 8.975975341011595e-06, | |
| "loss": 0.2855, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.4168421052631579, | |
| "grad_norm": 2.5726994940315415, | |
| "learning_rate": 8.96592774363439e-06, | |
| "loss": 0.1901, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4189473684210526, | |
| "grad_norm": 4.335519110486464, | |
| "learning_rate": 8.955836775506776e-06, | |
| "loss": 0.2933, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 4.19815390177116, | |
| "learning_rate": 8.94570254698197e-06, | |
| "loss": 0.2688, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "eval_loss": 0.2162427455186844, | |
| "eval_runtime": 0.9508, | |
| "eval_samples_per_second": 41.017, | |
| "eval_steps_per_second": 10.517, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4231578947368421, | |
| "grad_norm": 3.269776118271859, | |
| "learning_rate": 8.935525168886263e-06, | |
| "loss": 0.2096, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.42526315789473684, | |
| "grad_norm": 4.04123612262294, | |
| "learning_rate": 8.92530475251784e-06, | |
| "loss": 0.2568, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.42736842105263156, | |
| "grad_norm": 3.8368271309479933, | |
| "learning_rate": 8.91504140964553e-06, | |
| "loss": 0.2657, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.42947368421052634, | |
| "grad_norm": 3.1272371621856037, | |
| "learning_rate": 8.90473525250761e-06, | |
| "loss": 0.2268, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.43157894736842106, | |
| "grad_norm": 3.093955290257307, | |
| "learning_rate": 8.894386393810563e-06, | |
| "loss": 0.2042, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.4336842105263158, | |
| "grad_norm": 2.6581659343898543, | |
| "learning_rate": 8.883994946727848e-06, | |
| "loss": 0.1746, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4357894736842105, | |
| "grad_norm": 3.955613588917937, | |
| "learning_rate": 8.873561024898668e-06, | |
| "loss": 0.1996, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.4378947368421053, | |
| "grad_norm": 2.7835005086015903, | |
| "learning_rate": 8.863084742426719e-06, | |
| "loss": 0.192, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 3.347640148688381, | |
| "learning_rate": 8.852566213878947e-06, | |
| "loss": 0.1955, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.4421052631578947, | |
| "grad_norm": 3.6781001625254643, | |
| "learning_rate": 8.842005554284296e-06, | |
| "loss": 0.2583, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4442105263157895, | |
| "grad_norm": 3.3060488425103416, | |
| "learning_rate": 8.831402879132447e-06, | |
| "loss": 0.2273, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.4463157894736842, | |
| "grad_norm": 3.924014440413263, | |
| "learning_rate": 8.820758304372557e-06, | |
| "loss": 0.2294, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.44842105263157894, | |
| "grad_norm": 3.7994401024720066, | |
| "learning_rate": 8.810071946411989e-06, | |
| "loss": 0.2199, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.45052631578947366, | |
| "grad_norm": 3.376294637610717, | |
| "learning_rate": 8.799343922115045e-06, | |
| "loss": 0.2433, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.45263157894736844, | |
| "grad_norm": 3.3193795798150165, | |
| "learning_rate": 8.788574348801676e-06, | |
| "loss": 0.209, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.45473684210526316, | |
| "grad_norm": 3.0915010534262795, | |
| "learning_rate": 8.777763344246209e-06, | |
| "loss": 0.179, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.4568421052631579, | |
| "grad_norm": 2.8659181552677375, | |
| "learning_rate": 8.766911026676063e-06, | |
| "loss": 0.1811, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.4589473684210526, | |
| "grad_norm": 3.45215463473198, | |
| "learning_rate": 8.756017514770444e-06, | |
| "loss": 0.2281, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4610526315789474, | |
| "grad_norm": 3.1257499399451394, | |
| "learning_rate": 8.745082927659048e-06, | |
| "loss": 0.2184, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.4631578947368421, | |
| "grad_norm": 3.8271139734522945, | |
| "learning_rate": 8.734107384920771e-06, | |
| "loss": 0.2623, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4652631578947368, | |
| "grad_norm": 2.835561102259285, | |
| "learning_rate": 8.72309100658239e-06, | |
| "loss": 0.1964, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4673684210526316, | |
| "grad_norm": 3.3688712713428766, | |
| "learning_rate": 8.71203391311725e-06, | |
| "loss": 0.2168, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.4694736842105263, | |
| "grad_norm": 3.7240976383868736, | |
| "learning_rate": 8.700936225443958e-06, | |
| "loss": 0.2518, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.47157894736842104, | |
| "grad_norm": 2.96476521824005, | |
| "learning_rate": 8.689798064925049e-06, | |
| "loss": 0.2378, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.47368421052631576, | |
| "grad_norm": 2.7984591391533953, | |
| "learning_rate": 8.67861955336566e-06, | |
| "loss": 0.2252, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.47578947368421054, | |
| "grad_norm": 2.7976795282629254, | |
| "learning_rate": 8.6674008130122e-06, | |
| "loss": 0.1755, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.47789473684210526, | |
| "grad_norm": 3.33023467809358, | |
| "learning_rate": 8.65614196655102e-06, | |
| "loss": 0.2361, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 2.966759381413828, | |
| "learning_rate": 8.644843137107058e-06, | |
| "loss": 0.2027, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.48210526315789476, | |
| "grad_norm": 3.1104223364393535, | |
| "learning_rate": 8.633504448242504e-06, | |
| "loss": 0.1961, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.4842105263157895, | |
| "grad_norm": 2.787274197616676, | |
| "learning_rate": 8.622126023955446e-06, | |
| "loss": 0.2031, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4863157894736842, | |
| "grad_norm": 3.3738049865267925, | |
| "learning_rate": 8.610707988678504e-06, | |
| "loss": 0.2533, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.4884210526315789, | |
| "grad_norm": 3.407815533241093, | |
| "learning_rate": 8.599250467277483e-06, | |
| "loss": 0.2524, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.4905263157894737, | |
| "grad_norm": 3.296831884586839, | |
| "learning_rate": 8.587753585050004e-06, | |
| "loss": 0.2396, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.4926315789473684, | |
| "grad_norm": 2.8560599820160073, | |
| "learning_rate": 8.576217467724129e-06, | |
| "loss": 0.2416, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.49473684210526314, | |
| "grad_norm": 2.9054696528766524, | |
| "learning_rate": 8.564642241456986e-06, | |
| "loss": 0.1973, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.4968421052631579, | |
| "grad_norm": 2.8181421804733358, | |
| "learning_rate": 8.553028032833397e-06, | |
| "loss": 0.179, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.49894736842105264, | |
| "grad_norm": 2.7050097156036284, | |
| "learning_rate": 8.541374968864486e-06, | |
| "loss": 0.2037, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.5010526315789474, | |
| "grad_norm": 2.585908271011497, | |
| "learning_rate": 8.529683176986295e-06, | |
| "loss": 0.1633, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5031578947368421, | |
| "grad_norm": 3.6063087447245414, | |
| "learning_rate": 8.517952785058385e-06, | |
| "loss": 0.2354, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.5052631578947369, | |
| "grad_norm": 2.8004827647319073, | |
| "learning_rate": 8.506183921362443e-06, | |
| "loss": 0.1783, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5073684210526316, | |
| "grad_norm": 3.0924391138448777, | |
| "learning_rate": 8.494376714600878e-06, | |
| "loss": 0.2086, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.5094736842105263, | |
| "grad_norm": 3.28651564075383, | |
| "learning_rate": 8.482531293895412e-06, | |
| "loss": 0.2345, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.511578947368421, | |
| "grad_norm": 3.2830296016413056, | |
| "learning_rate": 8.470647788785665e-06, | |
| "loss": 0.2149, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.5136842105263157, | |
| "grad_norm": 3.546287405553885, | |
| "learning_rate": 8.458726329227748e-06, | |
| "loss": 0.2261, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5157894736842106, | |
| "grad_norm": 3.394923024937159, | |
| "learning_rate": 8.446767045592829e-06, | |
| "loss": 0.2468, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.5178947368421053, | |
| "grad_norm": 3.864701196963864, | |
| "learning_rate": 8.434770068665723e-06, | |
| "loss": 0.2638, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 3.4189011314403976, | |
| "learning_rate": 8.422735529643445e-06, | |
| "loss": 0.2219, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.5221052631578947, | |
| "grad_norm": 3.4940583139796497, | |
| "learning_rate": 8.410663560133784e-06, | |
| "loss": 0.2055, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5242105263157895, | |
| "grad_norm": 2.9563885540382717, | |
| "learning_rate": 8.398554292153866e-06, | |
| "loss": 0.2063, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 3.856575711945962, | |
| "learning_rate": 8.386407858128707e-06, | |
| "loss": 0.2493, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5284210526315789, | |
| "grad_norm": 2.963714344149301, | |
| "learning_rate": 8.37422439088976e-06, | |
| "loss": 0.2173, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.5305263157894737, | |
| "grad_norm": 3.5084770315497953, | |
| "learning_rate": 8.362004023673473e-06, | |
| "loss": 0.2637, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5326315789473685, | |
| "grad_norm": 3.2627548109310545, | |
| "learning_rate": 8.349746890119826e-06, | |
| "loss": 0.2059, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.5347368421052632, | |
| "grad_norm": 3.537857944594144, | |
| "learning_rate": 8.337453124270864e-06, | |
| "loss": 0.2064, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5368421052631579, | |
| "grad_norm": 3.203619307633033, | |
| "learning_rate": 8.325122860569241e-06, | |
| "loss": 0.1859, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.5389473684210526, | |
| "grad_norm": 2.8427156228829946, | |
| "learning_rate": 8.31275623385675e-06, | |
| "loss": 0.1781, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5410526315789473, | |
| "grad_norm": 3.4548444256099495, | |
| "learning_rate": 8.300353379372834e-06, | |
| "loss": 0.2253, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.5431578947368421, | |
| "grad_norm": 3.316389585769609, | |
| "learning_rate": 8.287914432753123e-06, | |
| "loss": 0.2496, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5452631578947369, | |
| "grad_norm": 3.925056071030507, | |
| "learning_rate": 8.275439530027948e-06, | |
| "loss": 0.2259, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.5473684210526316, | |
| "grad_norm": 3.992456726752316, | |
| "learning_rate": 8.262928807620843e-06, | |
| "loss": 0.2566, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5494736842105263, | |
| "grad_norm": 3.432001698331824, | |
| "learning_rate": 8.250382402347066e-06, | |
| "loss": 0.2084, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.5515789473684211, | |
| "grad_norm": 3.4259679677663843, | |
| "learning_rate": 8.237800451412095e-06, | |
| "loss": 0.2381, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5536842105263158, | |
| "grad_norm": 3.1299226563183193, | |
| "learning_rate": 8.225183092410128e-06, | |
| "loss": 0.2374, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.5557894736842105, | |
| "grad_norm": 3.2234103937622924, | |
| "learning_rate": 8.212530463322584e-06, | |
| "loss": 0.2192, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5578947368421052, | |
| "grad_norm": 3.840611086800957, | |
| "learning_rate": 8.199842702516584e-06, | |
| "loss": 0.2349, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 3.090365309566825, | |
| "learning_rate": 8.18711994874345e-06, | |
| "loss": 0.2441, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5621052631578948, | |
| "grad_norm": 3.5041886865116783, | |
| "learning_rate": 8.174362341137177e-06, | |
| "loss": 0.2659, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.5642105263157895, | |
| "grad_norm": 3.0931593729585516, | |
| "learning_rate": 8.161570019212921e-06, | |
| "loss": 0.2308, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5663157894736842, | |
| "grad_norm": 3.6356498976901332, | |
| "learning_rate": 8.148743122865463e-06, | |
| "loss": 0.2534, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.5684210526315789, | |
| "grad_norm": 3.408126383096958, | |
| "learning_rate": 8.135881792367686e-06, | |
| "loss": 0.2321, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5705263157894737, | |
| "grad_norm": 2.6458628263496284, | |
| "learning_rate": 8.12298616836904e-06, | |
| "loss": 0.1978, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.5726315789473684, | |
| "grad_norm": 3.1483733395983595, | |
| "learning_rate": 8.110056391894005e-06, | |
| "loss": 0.2172, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5747368421052632, | |
| "grad_norm": 3.467397710095167, | |
| "learning_rate": 8.097092604340543e-06, | |
| "loss": 0.2394, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.5768421052631579, | |
| "grad_norm": 3.8996216518849454, | |
| "learning_rate": 8.084094947478556e-06, | |
| "loss": 0.2731, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.5789473684210527, | |
| "grad_norm": 3.0037248186783936, | |
| "learning_rate": 8.071063563448341e-06, | |
| "loss": 0.1767, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5810526315789474, | |
| "grad_norm": 2.5277085823211864, | |
| "learning_rate": 8.057998594759022e-06, | |
| "loss": 0.1814, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5831578947368421, | |
| "grad_norm": 3.3543130599108255, | |
| "learning_rate": 8.044900184287007e-06, | |
| "loss": 0.2266, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.5852631578947368, | |
| "grad_norm": 3.1857375439158266, | |
| "learning_rate": 8.031768475274412e-06, | |
| "loss": 0.2343, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5873684210526315, | |
| "grad_norm": 3.055157108563214, | |
| "learning_rate": 8.018603611327505e-06, | |
| "loss": 0.227, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.5894736842105263, | |
| "grad_norm": 3.2243095637150927, | |
| "learning_rate": 8.005405736415127e-06, | |
| "loss": 0.1937, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5915789473684211, | |
| "grad_norm": 3.250488849370332, | |
| "learning_rate": 7.992174994867124e-06, | |
| "loss": 0.2374, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.5936842105263158, | |
| "grad_norm": 3.0167916103746912, | |
| "learning_rate": 7.978911531372764e-06, | |
| "loss": 0.225, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5957894736842105, | |
| "grad_norm": 3.2651532548799374, | |
| "learning_rate": 7.965615490979165e-06, | |
| "loss": 0.2337, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.5978947368421053, | |
| "grad_norm": 3.896346456849055, | |
| "learning_rate": 7.952287019089686e-06, | |
| "loss": 0.2748, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 3.5822792888425803, | |
| "learning_rate": 7.938926261462366e-06, | |
| "loss": 0.211, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.6021052631578947, | |
| "grad_norm": 3.444306149909226, | |
| "learning_rate": 7.925533364208308e-06, | |
| "loss": 0.1983, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6042105263157894, | |
| "grad_norm": 4.1948069859545445, | |
| "learning_rate": 7.912108473790092e-06, | |
| "loss": 0.2328, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.6063157894736843, | |
| "grad_norm": 3.4747320472234517, | |
| "learning_rate": 7.898651737020166e-06, | |
| "loss": 0.265, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.608421052631579, | |
| "grad_norm": 3.240236939628344, | |
| "learning_rate": 7.885163301059251e-06, | |
| "loss": 0.2105, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.6105263157894737, | |
| "grad_norm": 3.721836217869373, | |
| "learning_rate": 7.871643313414718e-06, | |
| "loss": 0.2183, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6126315789473684, | |
| "grad_norm": 3.326881302452429, | |
| "learning_rate": 7.858091921938989e-06, | |
| "loss": 0.2394, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.6147368421052631, | |
| "grad_norm": 4.006855011965986, | |
| "learning_rate": 7.844509274827907e-06, | |
| "loss": 0.2294, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6168421052631579, | |
| "grad_norm": 2.977288794276405, | |
| "learning_rate": 7.830895520619129e-06, | |
| "loss": 0.1943, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.6189473684210526, | |
| "grad_norm": 3.503869431295621, | |
| "learning_rate": 7.817250808190483e-06, | |
| "loss": 0.2271, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6210526315789474, | |
| "grad_norm": 2.397881273267794, | |
| "learning_rate": 7.803575286758365e-06, | |
| "loss": 0.1522, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.6231578947368421, | |
| "grad_norm": 3.1498677204648855, | |
| "learning_rate": 7.789869105876083e-06, | |
| "loss": 0.2223, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6252631578947369, | |
| "grad_norm": 3.532048573053879, | |
| "learning_rate": 7.776132415432234e-06, | |
| "loss": 0.2548, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.6273684210526316, | |
| "grad_norm": 2.9494325963626777, | |
| "learning_rate": 7.762365365649068e-06, | |
| "loss": 0.2047, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6294736842105263, | |
| "grad_norm": 3.1322331545957707, | |
| "learning_rate": 7.748568107080831e-06, | |
| "loss": 0.2239, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "grad_norm": 2.996031382032748, | |
| "learning_rate": 7.734740790612137e-06, | |
| "loss": 0.177, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6336842105263157, | |
| "grad_norm": 3.6318074014394135, | |
| "learning_rate": 7.720883567456299e-06, | |
| "loss": 0.2797, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.6357894736842106, | |
| "grad_norm": 3.5126271433689817, | |
| "learning_rate": 7.70699658915369e-06, | |
| "loss": 0.2965, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6378947368421053, | |
| "grad_norm": 3.067374146183351, | |
| "learning_rate": 7.693080007570084e-06, | |
| "loss": 0.2311, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 2.8467013786071735, | |
| "learning_rate": 7.679133974894984e-06, | |
| "loss": 0.1952, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6421052631578947, | |
| "grad_norm": 3.298916474796445, | |
| "learning_rate": 7.66515864363997e-06, | |
| "loss": 0.2233, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6442105263157895, | |
| "grad_norm": 4.447954496664178, | |
| "learning_rate": 7.651154166637025e-06, | |
| "loss": 0.3085, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6463157894736842, | |
| "grad_norm": 3.0739296320424736, | |
| "learning_rate": 7.637120697036866e-06, | |
| "loss": 0.1874, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.6484210526315789, | |
| "grad_norm": 2.672772402397274, | |
| "learning_rate": 7.62305838830727e-06, | |
| "loss": 0.2168, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6505263157894737, | |
| "grad_norm": 3.5823577010326844, | |
| "learning_rate": 7.608967394231387e-06, | |
| "loss": 0.2523, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.6526315789473685, | |
| "grad_norm": 3.363408010518267, | |
| "learning_rate": 7.594847868906076e-06, | |
| "loss": 0.213, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6547368421052632, | |
| "grad_norm": 3.0932376636426238, | |
| "learning_rate": 7.580699966740201e-06, | |
| "loss": 0.2267, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.6568421052631579, | |
| "grad_norm": 3.483318561632507, | |
| "learning_rate": 7.566523842452958e-06, | |
| "loss": 0.256, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6589473684210526, | |
| "grad_norm": 2.7912893670301484, | |
| "learning_rate": 7.552319651072164e-06, | |
| "loss": 0.2106, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.6610526315789473, | |
| "grad_norm": 3.4981450541010704, | |
| "learning_rate": 7.5380875479325855e-06, | |
| "loss": 0.2547, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6631578947368421, | |
| "grad_norm": 3.124883447115098, | |
| "learning_rate": 7.52382768867422e-06, | |
| "loss": 0.1939, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6652631578947369, | |
| "grad_norm": 4.620680045339017, | |
| "learning_rate": 7.509540229240601e-06, | |
| "loss": 0.2953, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6673684210526316, | |
| "grad_norm": 3.2282886161755786, | |
| "learning_rate": 7.4952253258771036e-06, | |
| "loss": 0.2112, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.6694736842105263, | |
| "grad_norm": 3.047727830370946, | |
| "learning_rate": 7.480883135129211e-06, | |
| "loss": 0.2086, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.671578947368421, | |
| "grad_norm": 2.584859580905444, | |
| "learning_rate": 7.4665138138408255e-06, | |
| "loss": 0.2119, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.6736842105263158, | |
| "grad_norm": 3.316066265356493, | |
| "learning_rate": 7.452117519152542e-06, | |
| "loss": 0.2489, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6757894736842105, | |
| "grad_norm": 3.2406113992536136, | |
| "learning_rate": 7.437694408499932e-06, | |
| "loss": 0.1915, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.6778947368421052, | |
| "grad_norm": 2.956072384698419, | |
| "learning_rate": 7.4232446396118265e-06, | |
| "loss": 0.2141, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 2.911407487056924, | |
| "learning_rate": 7.408768370508577e-06, | |
| "loss": 0.2149, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.6821052631578948, | |
| "grad_norm": 2.8116902443594016, | |
| "learning_rate": 7.394265759500348e-06, | |
| "loss": 0.1691, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.6842105263157895, | |
| "grad_norm": 3.276193445347204, | |
| "learning_rate": 7.379736965185369e-06, | |
| "loss": 0.2003, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6863157894736842, | |
| "grad_norm": 2.980429816982403, | |
| "learning_rate": 7.365182146448205e-06, | |
| "loss": 0.2071, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.6884210526315789, | |
| "grad_norm": 3.168944857843924, | |
| "learning_rate": 7.350601462458025e-06, | |
| "loss": 0.2249, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.6905263157894737, | |
| "grad_norm": 3.2312005808906608, | |
| "learning_rate": 7.335995072666848e-06, | |
| "loss": 0.1985, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.6926315789473684, | |
| "grad_norm": 3.0522979756884236, | |
| "learning_rate": 7.3213631368078196e-06, | |
| "loss": 0.2025, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.6947368421052632, | |
| "grad_norm": 2.787658703366056, | |
| "learning_rate": 7.30670581489344e-06, | |
| "loss": 0.1983, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6968421052631579, | |
| "grad_norm": 4.3667882707177625, | |
| "learning_rate": 7.292023267213836e-06, | |
| "loss": 0.2243, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.6989473684210527, | |
| "grad_norm": 5.1674527899722085, | |
| "learning_rate": 7.2773156543349965e-06, | |
| "loss": 0.2317, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7010526315789474, | |
| "grad_norm": 2.7521986848960216, | |
| "learning_rate": 7.262583137097019e-06, | |
| "loss": 0.1964, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.7031578947368421, | |
| "grad_norm": 2.8301069192286445, | |
| "learning_rate": 7.247825876612353e-06, | |
| "loss": 0.2043, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7052631578947368, | |
| "grad_norm": 3.770631339460926, | |
| "learning_rate": 7.233044034264034e-06, | |
| "loss": 0.1965, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.7073684210526315, | |
| "grad_norm": 2.8548456329448872, | |
| "learning_rate": 7.218237771703921e-06, | |
| "loss": 0.1819, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7094736842105264, | |
| "grad_norm": 3.6843919985708173, | |
| "learning_rate": 7.203407250850929e-06, | |
| "loss": 0.2101, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.7115789473684211, | |
| "grad_norm": 2.481860597568968, | |
| "learning_rate": 7.18855263388926e-06, | |
| "loss": 0.1619, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7136842105263158, | |
| "grad_norm": 2.8454463712055653, | |
| "learning_rate": 7.173674083266624e-06, | |
| "loss": 0.1548, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.7157894736842105, | |
| "grad_norm": 3.1220177562190297, | |
| "learning_rate": 7.158771761692464e-06, | |
| "loss": 0.1873, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7178947368421053, | |
| "grad_norm": 3.1026746108893204, | |
| "learning_rate": 7.143845832136188e-06, | |
| "loss": 0.1708, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 3.613177488828585, | |
| "learning_rate": 7.128896457825364e-06, | |
| "loss": 0.2051, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7221052631578947, | |
| "grad_norm": 4.023734813281506, | |
| "learning_rate": 7.113923802243957e-06, | |
| "loss": 0.2371, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.7242105263157895, | |
| "grad_norm": 2.4891706091722283, | |
| "learning_rate": 7.098928029130529e-06, | |
| "loss": 0.1585, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7263157894736842, | |
| "grad_norm": 3.625956257810872, | |
| "learning_rate": 7.083909302476453e-06, | |
| "loss": 0.2379, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.728421052631579, | |
| "grad_norm": 3.409493884604401, | |
| "learning_rate": 7.068867786524116e-06, | |
| "loss": 0.1783, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7305263157894737, | |
| "grad_norm": 3.0090022256319866, | |
| "learning_rate": 7.053803645765128e-06, | |
| "loss": 0.1831, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.7326315789473684, | |
| "grad_norm": 3.5360587589584127, | |
| "learning_rate": 7.038717044938519e-06, | |
| "loss": 0.2413, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7347368421052631, | |
| "grad_norm": 3.4382217950294236, | |
| "learning_rate": 7.023608149028936e-06, | |
| "loss": 0.2155, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.7368421052631579, | |
| "grad_norm": 4.004045022458863, | |
| "learning_rate": 7.008477123264849e-06, | |
| "loss": 0.2836, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7389473684210527, | |
| "grad_norm": 3.3203295306272196, | |
| "learning_rate": 6.993324133116726e-06, | |
| "loss": 0.2658, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.7410526315789474, | |
| "grad_norm": 2.548964384681694, | |
| "learning_rate": 6.978149344295242e-06, | |
| "loss": 0.1785, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7431578947368421, | |
| "grad_norm": 3.4483832833571912, | |
| "learning_rate": 6.9629529227494575e-06, | |
| "loss": 0.2214, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.7452631578947368, | |
| "grad_norm": 3.4383584987113274, | |
| "learning_rate": 6.9477350346650016e-06, | |
| "loss": 0.192, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7473684210526316, | |
| "grad_norm": 3.691018189312247, | |
| "learning_rate": 6.932495846462262e-06, | |
| "loss": 0.2435, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7494736842105263, | |
| "grad_norm": 3.385770493095089, | |
| "learning_rate": 6.9172355247945586e-06, | |
| "loss": 0.205, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.751578947368421, | |
| "grad_norm": 2.662810311197674, | |
| "learning_rate": 6.901954236546324e-06, | |
| "loss": 0.1659, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.7536842105263157, | |
| "grad_norm": 4.168405645399794, | |
| "learning_rate": 6.88665214883128e-06, | |
| "loss": 0.2934, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7557894736842106, | |
| "grad_norm": 3.5383114057012843, | |
| "learning_rate": 6.871329428990602e-06, | |
| "loss": 0.2157, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.7578947368421053, | |
| "grad_norm": 2.8894956368254103, | |
| "learning_rate": 6.855986244591104e-06, | |
| "loss": 0.1912, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 2.787711973501566, | |
| "learning_rate": 6.840622763423391e-06, | |
| "loss": 0.1706, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.7621052631578947, | |
| "grad_norm": 2.72901738571353, | |
| "learning_rate": 6.825239153500029e-06, | |
| "loss": 0.164, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7642105263157895, | |
| "grad_norm": 3.189665352469265, | |
| "learning_rate": 6.809835583053716e-06, | |
| "loss": 0.1764, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.7663157894736842, | |
| "grad_norm": 3.1275848607099133, | |
| "learning_rate": 6.794412220535426e-06, | |
| "loss": 0.2197, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7684210526315789, | |
| "grad_norm": 3.5188488634301263, | |
| "learning_rate": 6.778969234612583e-06, | |
| "loss": 0.2439, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7705263157894737, | |
| "grad_norm": 2.62111339980637, | |
| "learning_rate": 6.763506794167207e-06, | |
| "loss": 0.1879, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7726315789473684, | |
| "grad_norm": 2.8407752570746005, | |
| "learning_rate": 6.748025068294067e-06, | |
| "loss": 0.179, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.7747368421052632, | |
| "grad_norm": 3.230423148951695, | |
| "learning_rate": 6.732524226298841e-06, | |
| "loss": 0.1906, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.7768421052631579, | |
| "grad_norm": 3.9240082867236974, | |
| "learning_rate": 6.717004437696249e-06, | |
| "loss": 0.2593, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.7789473684210526, | |
| "grad_norm": 2.949281736906227, | |
| "learning_rate": 6.701465872208216e-06, | |
| "loss": 0.1767, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7810526315789473, | |
| "grad_norm": 3.4699155102688293, | |
| "learning_rate": 6.685908699762003e-06, | |
| "loss": 0.2495, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.783157894736842, | |
| "grad_norm": 3.441878628446404, | |
| "learning_rate": 6.670333090488357e-06, | |
| "loss": 0.2499, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.7852631578947369, | |
| "grad_norm": 3.1405985518052772, | |
| "learning_rate": 6.654739214719642e-06, | |
| "loss": 0.2127, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.7873684210526316, | |
| "grad_norm": 2.593987567673624, | |
| "learning_rate": 6.6391272429879886e-06, | |
| "loss": 0.1835, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.7894736842105263, | |
| "grad_norm": 3.276693821618827, | |
| "learning_rate": 6.6234973460234184e-06, | |
| "loss": 0.2027, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.791578947368421, | |
| "grad_norm": 2.995174038901829, | |
| "learning_rate": 6.607849694751978e-06, | |
| "loss": 0.2003, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.7936842105263158, | |
| "grad_norm": 2.6846031430529567, | |
| "learning_rate": 6.592184460293878e-06, | |
| "loss": 0.1421, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.7957894736842105, | |
| "grad_norm": 3.312415514283232, | |
| "learning_rate": 6.576501813961609e-06, | |
| "loss": 0.1863, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.7978947368421052, | |
| "grad_norm": 3.775675123728028, | |
| "learning_rate": 6.560801927258081e-06, | |
| "loss": 0.1958, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 2.5927726340982264, | |
| "learning_rate": 6.545084971874738e-06, | |
| "loss": 0.1625, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8021052631578948, | |
| "grad_norm": 3.1587922841231917, | |
| "learning_rate": 6.529351119689687e-06, | |
| "loss": 0.1965, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.8042105263157895, | |
| "grad_norm": 3.1769362735899356, | |
| "learning_rate": 6.513600542765816e-06, | |
| "loss": 0.2057, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8063157894736842, | |
| "grad_norm": 3.808162384466138, | |
| "learning_rate": 6.49783341334891e-06, | |
| "loss": 0.2042, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.8084210526315789, | |
| "grad_norm": 3.3063478217630107, | |
| "learning_rate": 6.4820499038657695e-06, | |
| "loss": 0.1916, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8105263157894737, | |
| "grad_norm": 3.043905617430906, | |
| "learning_rate": 6.466250186922325e-06, | |
| "loss": 0.1944, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.8126315789473684, | |
| "grad_norm": 4.168593975170044, | |
| "learning_rate": 6.450434435301751e-06, | |
| "loss": 0.2748, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8147368421052632, | |
| "grad_norm": 4.274013610158174, | |
| "learning_rate": 6.434602821962571e-06, | |
| "loss": 0.2494, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.8168421052631579, | |
| "grad_norm": 3.5963573539929463, | |
| "learning_rate": 6.418755520036775e-06, | |
| "loss": 0.2013, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8189473684210526, | |
| "grad_norm": 2.9666962047426666, | |
| "learning_rate": 6.402892702827916e-06, | |
| "loss": 0.187, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.8210526315789474, | |
| "grad_norm": 2.9643994270594884, | |
| "learning_rate": 6.387014543809224e-06, | |
| "loss": 0.2049, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8231578947368421, | |
| "grad_norm": 2.3657391759758397, | |
| "learning_rate": 6.371121216621698e-06, | |
| "loss": 0.1751, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.8252631578947368, | |
| "grad_norm": 3.3529253765458167, | |
| "learning_rate": 6.355212895072223e-06, | |
| "loss": 0.2193, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8273684210526315, | |
| "grad_norm": 3.1720607901606206, | |
| "learning_rate": 6.339289753131649e-06, | |
| "loss": 0.2148, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.8294736842105264, | |
| "grad_norm": 3.3584897742031834, | |
| "learning_rate": 6.323351964932909e-06, | |
| "loss": 0.2302, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8315789473684211, | |
| "grad_norm": 4.380475651099131, | |
| "learning_rate": 6.3073997047691e-06, | |
| "loss": 0.2887, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8336842105263158, | |
| "grad_norm": 3.289882212635633, | |
| "learning_rate": 6.291433147091583e-06, | |
| "loss": 0.2106, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8357894736842105, | |
| "grad_norm": 2.9345166529972952, | |
| "learning_rate": 6.275452466508076e-06, | |
| "loss": 0.2063, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.8378947368421052, | |
| "grad_norm": 3.6273888701243355, | |
| "learning_rate": 6.259457837780741e-06, | |
| "loss": 0.2245, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 3.6790816473406847, | |
| "learning_rate": 6.243449435824276e-06, | |
| "loss": 0.193, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "grad_norm": 3.1914433056812426, | |
| "learning_rate": 6.227427435703997e-06, | |
| "loss": 0.2164, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "eval_loss": 0.19365844130516052, | |
| "eval_runtime": 0.9303, | |
| "eval_samples_per_second": 41.923, | |
| "eval_steps_per_second": 10.749, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8442105263157895, | |
| "grad_norm": 3.0422393517644095, | |
| "learning_rate": 6.211392012633932e-06, | |
| "loss": 0.1945, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.8463157894736842, | |
| "grad_norm": 3.3896222895957204, | |
| "learning_rate": 6.1953433419748995e-06, | |
| "loss": 0.2183, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.848421052631579, | |
| "grad_norm": 2.8202481621645226, | |
| "learning_rate": 6.179281599232592e-06, | |
| "loss": 0.222, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.8505263157894737, | |
| "grad_norm": 2.7904065123537545, | |
| "learning_rate": 6.163206960055652e-06, | |
| "loss": 0.1965, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8526315789473684, | |
| "grad_norm": 3.318994535797195, | |
| "learning_rate": 6.147119600233758e-06, | |
| "loss": 0.2116, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8547368421052631, | |
| "grad_norm": 3.787907520422109, | |
| "learning_rate": 6.131019695695702e-06, | |
| "loss": 0.2441, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8568421052631578, | |
| "grad_norm": 2.6044409986603947, | |
| "learning_rate": 6.114907422507459e-06, | |
| "loss": 0.1696, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.8589473684210527, | |
| "grad_norm": 3.1123186046200577, | |
| "learning_rate": 6.098782956870266e-06, | |
| "loss": 0.1714, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8610526315789474, | |
| "grad_norm": 3.5641698976572886, | |
| "learning_rate": 6.0826464751187e-06, | |
| "loss": 0.2129, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.8631578947368421, | |
| "grad_norm": 3.4449729307238397, | |
| "learning_rate": 6.066498153718735e-06, | |
| "loss": 0.2059, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8652631578947368, | |
| "grad_norm": 3.091646410008194, | |
| "learning_rate": 6.0503381692658305e-06, | |
| "loss": 0.2244, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.8673684210526316, | |
| "grad_norm": 3.426356919246921, | |
| "learning_rate": 6.034166698482984e-06, | |
| "loss": 0.2493, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8694736842105263, | |
| "grad_norm": 3.009157338394937, | |
| "learning_rate": 6.0179839182188125e-06, | |
| "loss": 0.1769, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.871578947368421, | |
| "grad_norm": 2.68571377740786, | |
| "learning_rate": 6.001790005445607e-06, | |
| "loss": 0.1801, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.8736842105263158, | |
| "grad_norm": 3.13266305671967, | |
| "learning_rate": 5.985585137257401e-06, | |
| "loss": 0.2552, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.8757894736842106, | |
| "grad_norm": 3.118129327899299, | |
| "learning_rate": 5.969369490868042e-06, | |
| "loss": 0.2213, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.8778947368421053, | |
| "grad_norm": 3.1170850548476428, | |
| "learning_rate": 5.953143243609235e-06, | |
| "loss": 0.2228, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 3.4825948598222136, | |
| "learning_rate": 5.936906572928625e-06, | |
| "loss": 0.2319, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.8821052631578947, | |
| "grad_norm": 3.364021447031936, | |
| "learning_rate": 5.920659656387836e-06, | |
| "loss": 0.1935, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.8842105263157894, | |
| "grad_norm": 2.7683123292862497, | |
| "learning_rate": 5.904402671660551e-06, | |
| "loss": 0.1622, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8863157894736842, | |
| "grad_norm": 3.089059939046834, | |
| "learning_rate": 5.8881357965305444e-06, | |
| "loss": 0.1677, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.888421052631579, | |
| "grad_norm": 3.1348448785512675, | |
| "learning_rate": 5.871859208889759e-06, | |
| "loss": 0.1814, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.8905263157894737, | |
| "grad_norm": 3.230597062554221, | |
| "learning_rate": 5.855573086736351e-06, | |
| "loss": 0.2091, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.8926315789473684, | |
| "grad_norm": 2.883110792133594, | |
| "learning_rate": 5.839277608172739e-06, | |
| "loss": 0.1836, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.8947368421052632, | |
| "grad_norm": 4.415508413152931, | |
| "learning_rate": 5.82297295140367e-06, | |
| "loss": 0.3021, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.8968421052631579, | |
| "grad_norm": 2.953180474766528, | |
| "learning_rate": 5.806659294734256e-06, | |
| "loss": 0.1912, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.8989473684210526, | |
| "grad_norm": 2.5058106964907814, | |
| "learning_rate": 5.790336816568033e-06, | |
| "loss": 0.1418, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.9010526315789473, | |
| "grad_norm": 2.784908569571114, | |
| "learning_rate": 5.774005695405008e-06, | |
| "loss": 0.1733, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9031578947368422, | |
| "grad_norm": 3.2074914294258643, | |
| "learning_rate": 5.7576661098397024e-06, | |
| "loss": 0.217, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.9052631578947369, | |
| "grad_norm": 3.8184949532629955, | |
| "learning_rate": 5.74131823855921e-06, | |
| "loss": 0.1928, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9073684210526316, | |
| "grad_norm": 2.884763048980032, | |
| "learning_rate": 5.72496226034123e-06, | |
| "loss": 0.179, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.9094736842105263, | |
| "grad_norm": 3.131007686373488, | |
| "learning_rate": 5.708598354052122e-06, | |
| "loss": 0.2092, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.911578947368421, | |
| "grad_norm": 3.600180991489015, | |
| "learning_rate": 5.692226698644938e-06, | |
| "loss": 0.1771, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.9136842105263158, | |
| "grad_norm": 2.6092430120715386, | |
| "learning_rate": 5.675847473157485e-06, | |
| "loss": 0.1505, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9157894736842105, | |
| "grad_norm": 3.758561821727175, | |
| "learning_rate": 5.659460856710346e-06, | |
| "loss": 0.2449, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.9178947368421052, | |
| "grad_norm": 3.005737007201367, | |
| "learning_rate": 5.643067028504931e-06, | |
| "loss": 0.1706, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 2.9179364125259557, | |
| "learning_rate": 5.626666167821522e-06, | |
| "loss": 0.1812, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.9221052631578948, | |
| "grad_norm": 3.1976728733646738, | |
| "learning_rate": 5.610258454017301e-06, | |
| "loss": 0.2345, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9242105263157895, | |
| "grad_norm": 3.475521355404778, | |
| "learning_rate": 5.593844066524401e-06, | |
| "loss": 0.254, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.9263157894736842, | |
| "grad_norm": 3.5995093334761963, | |
| "learning_rate": 5.577423184847932e-06, | |
| "loss": 0.2348, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9284210526315789, | |
| "grad_norm": 2.835624142601258, | |
| "learning_rate": 5.560995988564023e-06, | |
| "loss": 0.1802, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.9305263157894736, | |
| "grad_norm": 3.8989119467413613, | |
| "learning_rate": 5.544562657317863e-06, | |
| "loss": 0.2229, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9326315789473684, | |
| "grad_norm": 3.62544713638484, | |
| "learning_rate": 5.52812337082173e-06, | |
| "loss": 0.2153, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.9347368421052632, | |
| "grad_norm": 3.392283457067749, | |
| "learning_rate": 5.5116783088530255e-06, | |
| "loss": 0.1824, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9368421052631579, | |
| "grad_norm": 4.303709047671292, | |
| "learning_rate": 5.495227651252315e-06, | |
| "loss": 0.298, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.9389473684210526, | |
| "grad_norm": 2.830025115217364, | |
| "learning_rate": 5.478771577921351e-06, | |
| "loss": 0.1657, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9410526315789474, | |
| "grad_norm": 3.2810223083748826, | |
| "learning_rate": 5.4623102688211186e-06, | |
| "loss": 0.2494, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.9431578947368421, | |
| "grad_norm": 3.4438213790356444, | |
| "learning_rate": 5.445843903969854e-06, | |
| "loss": 0.2062, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9452631578947368, | |
| "grad_norm": 2.879757240077144, | |
| "learning_rate": 5.429372663441086e-06, | |
| "loss": 0.2002, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.9473684210526315, | |
| "grad_norm": 2.8548701745465563, | |
| "learning_rate": 5.412896727361663e-06, | |
| "loss": 0.1942, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9494736842105264, | |
| "grad_norm": 3.3673638986518872, | |
| "learning_rate": 5.396416275909779e-06, | |
| "loss": 0.2442, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.9515789473684211, | |
| "grad_norm": 3.151677859424395, | |
| "learning_rate": 5.379931489313016e-06, | |
| "loss": 0.1857, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.9536842105263158, | |
| "grad_norm": 2.3401970680752653, | |
| "learning_rate": 5.363442547846356e-06, | |
| "loss": 0.1574, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.9557894736842105, | |
| "grad_norm": 3.171440734498741, | |
| "learning_rate": 5.346949631830221e-06, | |
| "loss": 0.1858, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.9578947368421052, | |
| "grad_norm": 3.572091487862273, | |
| "learning_rate": 5.3304529216284974e-06, | |
| "loss": 0.233, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 3.3362097570655704, | |
| "learning_rate": 5.3139525976465675e-06, | |
| "loss": 0.1577, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.9621052631578947, | |
| "grad_norm": 3.521394981695169, | |
| "learning_rate": 5.2974488403293285e-06, | |
| "loss": 0.2165, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.9642105263157895, | |
| "grad_norm": 3.5537369585027876, | |
| "learning_rate": 5.280941830159228e-06, | |
| "loss": 0.2035, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.9663157894736842, | |
| "grad_norm": 2.6967873973758336, | |
| "learning_rate": 5.264431747654284e-06, | |
| "loss": 0.1903, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.968421052631579, | |
| "grad_norm": 3.451224252952003, | |
| "learning_rate": 5.247918773366112e-06, | |
| "loss": 0.2189, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9705263157894737, | |
| "grad_norm": 3.3703738535372305, | |
| "learning_rate": 5.231403087877955e-06, | |
| "loss": 0.1658, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.9726315789473684, | |
| "grad_norm": 2.850165218926584, | |
| "learning_rate": 5.214884871802703e-06, | |
| "loss": 0.1932, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.9747368421052631, | |
| "grad_norm": 3.37619966686572, | |
| "learning_rate": 5.198364305780922e-06, | |
| "loss": 0.1988, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.9768421052631578, | |
| "grad_norm": 2.960765636480082, | |
| "learning_rate": 5.1818415704788725e-06, | |
| "loss": 0.1904, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.9789473684210527, | |
| "grad_norm": 2.7214682076892354, | |
| "learning_rate": 5.165316846586541e-06, | |
| "loss": 0.2017, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.9810526315789474, | |
| "grad_norm": 2.4388957400236624, | |
| "learning_rate": 5.148790314815662e-06, | |
| "loss": 0.1764, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.9831578947368421, | |
| "grad_norm": 2.8678613327792184, | |
| "learning_rate": 5.132262155897739e-06, | |
| "loss": 0.1778, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.9852631578947368, | |
| "grad_norm": 3.210155912400773, | |
| "learning_rate": 5.11573255058207e-06, | |
| "loss": 0.2211, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.9873684210526316, | |
| "grad_norm": 3.5663187219986074, | |
| "learning_rate": 5.099201679633769e-06, | |
| "loss": 0.2235, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.9894736842105263, | |
| "grad_norm": 2.956548038927285, | |
| "learning_rate": 5.082669723831793e-06, | |
| "loss": 0.1466, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.991578947368421, | |
| "grad_norm": 3.930944163022198, | |
| "learning_rate": 5.066136863966963e-06, | |
| "loss": 0.2018, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.9936842105263158, | |
| "grad_norm": 3.031263337005746, | |
| "learning_rate": 5.049603280839982e-06, | |
| "loss": 0.2197, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.9957894736842106, | |
| "grad_norm": 3.721525482445003, | |
| "learning_rate": 5.033069155259471e-06, | |
| "loss": 0.2175, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.9978947368421053, | |
| "grad_norm": 2.238401391190845, | |
| "learning_rate": 5.016534668039976e-06, | |
| "loss": 0.1057, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.700095513199168, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1825, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.0021052631578948, | |
| "grad_norm": 2.102327993497151, | |
| "learning_rate": 4.983465331960025e-06, | |
| "loss": 0.0885, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0042105263157894, | |
| "grad_norm": 2.2164179180128127, | |
| "learning_rate": 4.96693084474053e-06, | |
| "loss": 0.101, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.0063157894736843, | |
| "grad_norm": 2.591907392954552, | |
| "learning_rate": 4.950396719160019e-06, | |
| "loss": 0.1016, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0084210526315789, | |
| "grad_norm": 2.3536825700723707, | |
| "learning_rate": 4.93386313603304e-06, | |
| "loss": 0.12, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.0105263157894737, | |
| "grad_norm": 2.7777745052862106, | |
| "learning_rate": 4.917330276168208e-06, | |
| "loss": 0.102, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0126315789473683, | |
| "grad_norm": 2.4751195672854704, | |
| "learning_rate": 4.900798320366233e-06, | |
| "loss": 0.0989, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.0147368421052632, | |
| "grad_norm": 1.889820583657195, | |
| "learning_rate": 4.884267449417932e-06, | |
| "loss": 0.0762, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.016842105263158, | |
| "grad_norm": 2.2064010641296155, | |
| "learning_rate": 4.867737844102261e-06, | |
| "loss": 0.0932, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.0189473684210526, | |
| "grad_norm": 2.9805461448209556, | |
| "learning_rate": 4.851209685184339e-06, | |
| "loss": 0.092, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0210526315789474, | |
| "grad_norm": 2.2601627421875032, | |
| "learning_rate": 4.8346831534134595e-06, | |
| "loss": 0.09, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.023157894736842, | |
| "grad_norm": 2.3939195754809055, | |
| "learning_rate": 4.818158429521129e-06, | |
| "loss": 0.1179, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.0252631578947369, | |
| "grad_norm": 2.3451597644966573, | |
| "learning_rate": 4.801635694219079e-06, | |
| "loss": 0.08, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.0273684210526315, | |
| "grad_norm": 2.6640782365642592, | |
| "learning_rate": 4.785115128197298e-06, | |
| "loss": 0.1017, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0294736842105263, | |
| "grad_norm": 2.1177638500079365, | |
| "learning_rate": 4.768596912122046e-06, | |
| "loss": 0.0731, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.0315789473684212, | |
| "grad_norm": 3.3240436401313618, | |
| "learning_rate": 4.752081226633888e-06, | |
| "loss": 0.0919, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0336842105263158, | |
| "grad_norm": 2.2384781946355794, | |
| "learning_rate": 4.735568252345718e-06, | |
| "loss": 0.0719, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.0357894736842106, | |
| "grad_norm": 2.983441854897483, | |
| "learning_rate": 4.719058169840773e-06, | |
| "loss": 0.0745, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0378947368421052, | |
| "grad_norm": 2.6556422035702045, | |
| "learning_rate": 4.702551159670672e-06, | |
| "loss": 0.0734, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 3.219998720581149, | |
| "learning_rate": 4.686047402353433e-06, | |
| "loss": 0.0775, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.0421052631578946, | |
| "grad_norm": 2.6391239908163233, | |
| "learning_rate": 4.669547078371503e-06, | |
| "loss": 0.0787, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.0442105263157895, | |
| "grad_norm": 3.041237149660994, | |
| "learning_rate": 4.65305036816978e-06, | |
| "loss": 0.089, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.0463157894736843, | |
| "grad_norm": 3.687880198514741, | |
| "learning_rate": 4.636557452153645e-06, | |
| "loss": 0.0831, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.048421052631579, | |
| "grad_norm": 5.095705229375661, | |
| "learning_rate": 4.620068510686985e-06, | |
| "loss": 0.0804, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.0505263157894738, | |
| "grad_norm": 3.197432814925761, | |
| "learning_rate": 4.60358372409022e-06, | |
| "loss": 0.0574, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 3.09106465983814, | |
| "learning_rate": 4.587103272638339e-06, | |
| "loss": 0.0823, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0547368421052632, | |
| "grad_norm": 3.4094205016943193, | |
| "learning_rate": 4.570627336558915e-06, | |
| "loss": 0.077, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.0568421052631578, | |
| "grad_norm": 3.2700532893266723, | |
| "learning_rate": 4.554156096030149e-06, | |
| "loss": 0.0888, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.0589473684210526, | |
| "grad_norm": 3.8444997651481274, | |
| "learning_rate": 4.537689731178883e-06, | |
| "loss": 0.0995, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.0610526315789475, | |
| "grad_norm": 3.460457464328528, | |
| "learning_rate": 4.5212284220786495e-06, | |
| "loss": 0.0852, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.063157894736842, | |
| "grad_norm": 3.5197825821543844, | |
| "learning_rate": 4.504772348747687e-06, | |
| "loss": 0.089, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.065263157894737, | |
| "grad_norm": 2.9315058365098148, | |
| "learning_rate": 4.488321691146975e-06, | |
| "loss": 0.0917, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.0673684210526315, | |
| "grad_norm": 2.959097650131179, | |
| "learning_rate": 4.471876629178273e-06, | |
| "loss": 0.0927, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.0694736842105264, | |
| "grad_norm": 2.9752640084242543, | |
| "learning_rate": 4.4554373426821375e-06, | |
| "loss": 0.0754, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.071578947368421, | |
| "grad_norm": 3.6867032363293633, | |
| "learning_rate": 4.439004011435979e-06, | |
| "loss": 0.0931, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.0736842105263158, | |
| "grad_norm": 3.9162282663094437, | |
| "learning_rate": 4.42257681515207e-06, | |
| "loss": 0.0915, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.0757894736842106, | |
| "grad_norm": 3.245956552006904, | |
| "learning_rate": 4.406155933475599e-06, | |
| "loss": 0.0825, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.0778947368421052, | |
| "grad_norm": 3.3407176706701303, | |
| "learning_rate": 4.3897415459827e-06, | |
| "loss": 0.0833, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 3.2605562952641325, | |
| "learning_rate": 4.373333832178478e-06, | |
| "loss": 0.0836, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.0821052631578947, | |
| "grad_norm": 3.022186927091034, | |
| "learning_rate": 4.356932971495071e-06, | |
| "loss": 0.0893, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.0842105263157895, | |
| "grad_norm": 2.570982657234066, | |
| "learning_rate": 4.340539143289655e-06, | |
| "loss": 0.0691, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.0863157894736841, | |
| "grad_norm": 2.6495275768322966, | |
| "learning_rate": 4.324152526842517e-06, | |
| "loss": 0.0703, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.088421052631579, | |
| "grad_norm": 2.9529381342049357, | |
| "learning_rate": 4.307773301355063e-06, | |
| "loss": 0.0878, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.0905263157894738, | |
| "grad_norm": 3.2139910027789913, | |
| "learning_rate": 4.291401645947879e-06, | |
| "loss": 0.0858, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.0926315789473684, | |
| "grad_norm": 3.359687231677775, | |
| "learning_rate": 4.275037739658771e-06, | |
| "loss": 0.0991, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.0947368421052632, | |
| "grad_norm": 2.7257961811651867, | |
| "learning_rate": 4.25868176144079e-06, | |
| "loss": 0.0636, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.0968421052631578, | |
| "grad_norm": 3.0300299027782205, | |
| "learning_rate": 4.242333890160299e-06, | |
| "loss": 0.0744, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.0989473684210527, | |
| "grad_norm": 2.673076324741469, | |
| "learning_rate": 4.225994304594994e-06, | |
| "loss": 0.0733, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1010526315789473, | |
| "grad_norm": 3.434548397420313, | |
| "learning_rate": 4.209663183431969e-06, | |
| "loss": 0.0894, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.1031578947368421, | |
| "grad_norm": 2.656738290688316, | |
| "learning_rate": 4.193340705265746e-06, | |
| "loss": 0.0816, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1052631578947367, | |
| "grad_norm": 2.752642253228426, | |
| "learning_rate": 4.17702704859633e-06, | |
| "loss": 0.0737, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.1073684210526316, | |
| "grad_norm": 2.8240215409779204, | |
| "learning_rate": 4.160722391827262e-06, | |
| "loss": 0.0946, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.1094736842105264, | |
| "grad_norm": 2.6310984451059523, | |
| "learning_rate": 4.14442691326365e-06, | |
| "loss": 0.075, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.111578947368421, | |
| "grad_norm": 3.3487552916421097, | |
| "learning_rate": 4.128140791110243e-06, | |
| "loss": 0.0904, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.1136842105263158, | |
| "grad_norm": 3.071402311195297, | |
| "learning_rate": 4.111864203469457e-06, | |
| "loss": 0.079, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.1157894736842104, | |
| "grad_norm": 2.8739724428152993, | |
| "learning_rate": 4.0955973283394525e-06, | |
| "loss": 0.0844, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1178947368421053, | |
| "grad_norm": 2.7822422708634442, | |
| "learning_rate": 4.079340343612165e-06, | |
| "loss": 0.0943, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 2.5902291543214364, | |
| "learning_rate": 4.063093427071376e-06, | |
| "loss": 0.0827, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1221052631578947, | |
| "grad_norm": 3.084348236278604, | |
| "learning_rate": 4.046856756390767e-06, | |
| "loss": 0.0892, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.1242105263157895, | |
| "grad_norm": 2.95461042174687, | |
| "learning_rate": 4.03063050913196e-06, | |
| "loss": 0.0816, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.1263157894736842, | |
| "grad_norm": 2.7009483055282892, | |
| "learning_rate": 4.0144148627426e-06, | |
| "loss": 0.063, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.128421052631579, | |
| "grad_norm": 3.2167472489705062, | |
| "learning_rate": 3.998209994554395e-06, | |
| "loss": 0.0993, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1305263157894736, | |
| "grad_norm": 3.164155379501995, | |
| "learning_rate": 3.982016081781189e-06, | |
| "loss": 0.0928, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.1326315789473684, | |
| "grad_norm": 2.6712684161255873, | |
| "learning_rate": 3.965833301517017e-06, | |
| "loss": 0.0792, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.134736842105263, | |
| "grad_norm": 3.590217130090868, | |
| "learning_rate": 3.949661830734172e-06, | |
| "loss": 0.1122, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.1368421052631579, | |
| "grad_norm": 2.757855187593266, | |
| "learning_rate": 3.9335018462812664e-06, | |
| "loss": 0.0732, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1389473684210527, | |
| "grad_norm": 3.536487052728721, | |
| "learning_rate": 3.9173535248813026e-06, | |
| "loss": 0.0678, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.1410526315789473, | |
| "grad_norm": 2.4862366546978483, | |
| "learning_rate": 3.901217043129735e-06, | |
| "loss": 0.0728, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.1431578947368422, | |
| "grad_norm": 3.2902474007718907, | |
| "learning_rate": 3.885092577492543e-06, | |
| "loss": 0.1086, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.1452631578947368, | |
| "grad_norm": 3.451017932646852, | |
| "learning_rate": 3.8689803043043e-06, | |
| "loss": 0.0868, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.1473684210526316, | |
| "grad_norm": 2.8980165245573692, | |
| "learning_rate": 3.852880399766243e-06, | |
| "loss": 0.0829, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.1494736842105264, | |
| "grad_norm": 2.8916674632956134, | |
| "learning_rate": 3.8367930399443495e-06, | |
| "loss": 0.0782, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.151578947368421, | |
| "grad_norm": 2.505026430566736, | |
| "learning_rate": 3.820718400767409e-06, | |
| "loss": 0.0763, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.1536842105263159, | |
| "grad_norm": 3.7982142015035305, | |
| "learning_rate": 3.8046566580251e-06, | |
| "loss": 0.0895, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.1557894736842105, | |
| "grad_norm": 2.550264620010391, | |
| "learning_rate": 3.7886079873660693e-06, | |
| "loss": 0.085, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.1578947368421053, | |
| "grad_norm": 3.2823943298060483, | |
| "learning_rate": 3.7725725642960047e-06, | |
| "loss": 0.0838, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 3.091694066417572, | |
| "learning_rate": 3.756550564175727e-06, | |
| "loss": 0.0945, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.1621052631578948, | |
| "grad_norm": 2.6667880955040855, | |
| "learning_rate": 3.7405421622192607e-06, | |
| "loss": 0.067, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.1642105263157894, | |
| "grad_norm": 3.2831274480460055, | |
| "learning_rate": 3.7245475334919246e-06, | |
| "loss": 0.0994, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.1663157894736842, | |
| "grad_norm": 2.2115339073168903, | |
| "learning_rate": 3.7085668529084183e-06, | |
| "loss": 0.0609, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.168421052631579, | |
| "grad_norm": 3.103922834249276, | |
| "learning_rate": 3.6926002952309015e-06, | |
| "loss": 0.0705, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.1705263157894736, | |
| "grad_norm": 3.1379466258374413, | |
| "learning_rate": 3.676648035067093e-06, | |
| "loss": 0.0755, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.1726315789473685, | |
| "grad_norm": 3.144558129851556, | |
| "learning_rate": 3.6607102468683524e-06, | |
| "loss": 0.0906, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.174736842105263, | |
| "grad_norm": 2.843620211669143, | |
| "learning_rate": 3.64478710492778e-06, | |
| "loss": 0.0752, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.176842105263158, | |
| "grad_norm": 2.737119577196797, | |
| "learning_rate": 3.628878783378302e-06, | |
| "loss": 0.0855, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.1789473684210527, | |
| "grad_norm": 3.38606655435301, | |
| "learning_rate": 3.6129854561907786e-06, | |
| "loss": 0.1073, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.1810526315789474, | |
| "grad_norm": 3.022666071905334, | |
| "learning_rate": 3.5971072971720844e-06, | |
| "loss": 0.096, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.1831578947368422, | |
| "grad_norm": 3.013844862309235, | |
| "learning_rate": 3.581244479963225e-06, | |
| "loss": 0.0699, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.1852631578947368, | |
| "grad_norm": 2.4616484667093532, | |
| "learning_rate": 3.56539717803743e-06, | |
| "loss": 0.0686, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.1873684210526316, | |
| "grad_norm": 3.092537315474559, | |
| "learning_rate": 3.5495655646982506e-06, | |
| "loss": 0.1022, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.1894736842105262, | |
| "grad_norm": 2.781358161394791, | |
| "learning_rate": 3.533749813077677e-06, | |
| "loss": 0.0804, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.191578947368421, | |
| "grad_norm": 2.453888657239943, | |
| "learning_rate": 3.517950096134232e-06, | |
| "loss": 0.0577, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.1936842105263157, | |
| "grad_norm": 2.935143310735812, | |
| "learning_rate": 3.5021665866510924e-06, | |
| "loss": 0.0905, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.1957894736842105, | |
| "grad_norm": 2.6509666167907726, | |
| "learning_rate": 3.4863994572341845e-06, | |
| "loss": 0.0854, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.1978947368421053, | |
| "grad_norm": 2.955009635915876, | |
| "learning_rate": 3.470648880310313e-06, | |
| "loss": 0.0883, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 3.2167621312639794, | |
| "learning_rate": 3.4549150281252635e-06, | |
| "loss": 0.072, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2021052631578948, | |
| "grad_norm": 2.7638395907473225, | |
| "learning_rate": 3.4391980727419206e-06, | |
| "loss": 0.082, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.2042105263157894, | |
| "grad_norm": 3.4412996909683806, | |
| "learning_rate": 3.423498186038393e-06, | |
| "loss": 0.1093, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2063157894736842, | |
| "grad_norm": 2.6520865262952515, | |
| "learning_rate": 3.4078155397061243e-06, | |
| "loss": 0.07, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.208421052631579, | |
| "grad_norm": 2.376283277388007, | |
| "learning_rate": 3.3921503052480243e-06, | |
| "loss": 0.0748, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2105263157894737, | |
| "grad_norm": 2.242871891939484, | |
| "learning_rate": 3.3765026539765832e-06, | |
| "loss": 0.0681, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.2126315789473685, | |
| "grad_norm": 4.4744073948915535, | |
| "learning_rate": 3.3608727570120114e-06, | |
| "loss": 0.0928, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.2147368421052631, | |
| "grad_norm": 2.7537788196508903, | |
| "learning_rate": 3.3452607852803585e-06, | |
| "loss": 0.0966, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.216842105263158, | |
| "grad_norm": 3.1307439683337517, | |
| "learning_rate": 3.3296669095116454e-06, | |
| "loss": 0.0778, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2189473684210526, | |
| "grad_norm": 2.7177020729876253, | |
| "learning_rate": 3.3140913002379993e-06, | |
| "loss": 0.0697, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.2210526315789474, | |
| "grad_norm": 2.4210212110003484, | |
| "learning_rate": 3.298534127791785e-06, | |
| "loss": 0.0898, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.223157894736842, | |
| "grad_norm": 2.486998916941089, | |
| "learning_rate": 3.2829955623037536e-06, | |
| "loss": 0.0906, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.2252631578947368, | |
| "grad_norm": 2.398945212379016, | |
| "learning_rate": 3.267475773701161e-06, | |
| "loss": 0.072, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.2273684210526317, | |
| "grad_norm": 2.98342220040315, | |
| "learning_rate": 3.251974931705933e-06, | |
| "loss": 0.0884, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.2294736842105263, | |
| "grad_norm": 2.471587722526877, | |
| "learning_rate": 3.236493205832795e-06, | |
| "loss": 0.0803, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.231578947368421, | |
| "grad_norm": 3.3871586088205277, | |
| "learning_rate": 3.2210307653874175e-06, | |
| "loss": 0.0905, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.2336842105263157, | |
| "grad_norm": 3.1548244792093727, | |
| "learning_rate": 3.205587779464576e-06, | |
| "loss": 0.0807, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.2357894736842105, | |
| "grad_norm": 2.751888064247698, | |
| "learning_rate": 3.1901644169462854e-06, | |
| "loss": 0.1001, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.2378947368421054, | |
| "grad_norm": 2.953100733477854, | |
| "learning_rate": 3.1747608464999723e-06, | |
| "loss": 0.0859, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 3.0915284845137383, | |
| "learning_rate": 3.1593772365766107e-06, | |
| "loss": 0.0916, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.2421052631578948, | |
| "grad_norm": 3.212692328218412, | |
| "learning_rate": 3.1440137554088957e-06, | |
| "loss": 0.087, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.2442105263157894, | |
| "grad_norm": 3.6741186905601673, | |
| "learning_rate": 3.128670571009399e-06, | |
| "loss": 0.0918, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.2463157894736843, | |
| "grad_norm": 3.052869956993313, | |
| "learning_rate": 3.1133478511687217e-06, | |
| "loss": 0.0882, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.2484210526315789, | |
| "grad_norm": 2.7038650862339026, | |
| "learning_rate": 3.0980457634536775e-06, | |
| "loss": 0.0694, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.2505263157894737, | |
| "grad_norm": 3.018334207884892, | |
| "learning_rate": 3.082764475205442e-06, | |
| "loss": 0.0858, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.2526315789473683, | |
| "grad_norm": 2.569495801078813, | |
| "learning_rate": 3.06750415353774e-06, | |
| "loss": 0.0782, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.2547368421052632, | |
| "grad_norm": 3.3984298834388835, | |
| "learning_rate": 3.052264965335e-06, | |
| "loss": 0.109, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.256842105263158, | |
| "grad_norm": 2.3590945955494416, | |
| "learning_rate": 3.0370470772505433e-06, | |
| "loss": 0.071, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.2589473684210526, | |
| "grad_norm": 2.234877038235419, | |
| "learning_rate": 3.02185065570476e-06, | |
| "loss": 0.0692, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.2610526315789474, | |
| "grad_norm": 4.052354805427412, | |
| "learning_rate": 3.0066758668832752e-06, | |
| "loss": 0.0948, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.263157894736842, | |
| "grad_norm": 2.742079337401893, | |
| "learning_rate": 2.991522876735154e-06, | |
| "loss": 0.0969, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.263157894736842, | |
| "eval_loss": 0.20044729113578796, | |
| "eval_runtime": 0.929, | |
| "eval_samples_per_second": 41.983, | |
| "eval_steps_per_second": 10.765, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.2652631578947369, | |
| "grad_norm": 2.8741805199859205, | |
| "learning_rate": 2.9763918509710647e-06, | |
| "loss": 0.0963, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.2673684210526317, | |
| "grad_norm": 2.737388943410033, | |
| "learning_rate": 2.9612829550614836e-06, | |
| "loss": 0.0826, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.2694736842105263, | |
| "grad_norm": 2.717582073137317, | |
| "learning_rate": 2.9461963542348737e-06, | |
| "loss": 0.0681, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.271578947368421, | |
| "grad_norm": 3.3716699599065123, | |
| "learning_rate": 2.931132213475884e-06, | |
| "loss": 0.101, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.2736842105263158, | |
| "grad_norm": 2.439989476563021, | |
| "learning_rate": 2.9160906975235493e-06, | |
| "loss": 0.0732, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.2757894736842106, | |
| "grad_norm": 3.092634953355724, | |
| "learning_rate": 2.9010719708694724e-06, | |
| "loss": 0.0744, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.2778947368421052, | |
| "grad_norm": 2.638312682828106, | |
| "learning_rate": 2.8860761977560435e-06, | |
| "loss": 0.0757, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 2.3219077212278494, | |
| "learning_rate": 2.871103542174637e-06, | |
| "loss": 0.0941, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.2821052631578946, | |
| "grad_norm": 2.7468019529994607, | |
| "learning_rate": 2.8561541678638145e-06, | |
| "loss": 0.0679, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.2842105263157895, | |
| "grad_norm": 2.592555737944712, | |
| "learning_rate": 2.8412282383075362e-06, | |
| "loss": 0.072, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.2863157894736843, | |
| "grad_norm": 2.5748600678466493, | |
| "learning_rate": 2.826325916733378e-06, | |
| "loss": 0.078, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.288421052631579, | |
| "grad_norm": 3.272935966473756, | |
| "learning_rate": 2.811447366110741e-06, | |
| "loss": 0.0985, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.2905263157894737, | |
| "grad_norm": 2.718391171117533, | |
| "learning_rate": 2.796592749149071e-06, | |
| "loss": 0.0856, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.2926315789473684, | |
| "grad_norm": 2.895251659895727, | |
| "learning_rate": 2.7817622282960816e-06, | |
| "loss": 0.0814, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.2947368421052632, | |
| "grad_norm": 3.098118941203153, | |
| "learning_rate": 2.766955965735968e-06, | |
| "loss": 0.1, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.296842105263158, | |
| "grad_norm": 2.4925459738486078, | |
| "learning_rate": 2.7521741233876496e-06, | |
| "loss": 0.066, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.2989473684210526, | |
| "grad_norm": 3.2668047779458447, | |
| "learning_rate": 2.7374168629029814e-06, | |
| "loss": 0.0662, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.3010526315789472, | |
| "grad_norm": 2.365373318259492, | |
| "learning_rate": 2.722684345665004e-06, | |
| "loss": 0.0568, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.303157894736842, | |
| "grad_norm": 2.674596379228086, | |
| "learning_rate": 2.707976732786166e-06, | |
| "loss": 0.0716, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.305263157894737, | |
| "grad_norm": 2.629799474227148, | |
| "learning_rate": 2.693294185106562e-06, | |
| "loss": 0.0708, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3073684210526315, | |
| "grad_norm": 2.7427790950830917, | |
| "learning_rate": 2.678636863192184e-06, | |
| "loss": 0.0819, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.3094736842105263, | |
| "grad_norm": 2.355303580669883, | |
| "learning_rate": 2.6640049273331516e-06, | |
| "loss": 0.0682, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.311578947368421, | |
| "grad_norm": 2.5202135513477595, | |
| "learning_rate": 2.649398537541978e-06, | |
| "loss": 0.0592, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.3136842105263158, | |
| "grad_norm": 3.1053392641729056, | |
| "learning_rate": 2.6348178535517967e-06, | |
| "loss": 0.0815, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3157894736842106, | |
| "grad_norm": 2.287461098881695, | |
| "learning_rate": 2.6202630348146323e-06, | |
| "loss": 0.0809, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.3178947368421052, | |
| "grad_norm": 2.443760626345547, | |
| "learning_rate": 2.605734240499652e-06, | |
| "loss": 0.0604, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 2.845171834877243, | |
| "learning_rate": 2.5912316294914232e-06, | |
| "loss": 0.0744, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.3221052631578947, | |
| "grad_norm": 2.1955991785027127, | |
| "learning_rate": 2.576755360388177e-06, | |
| "loss": 0.0592, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3242105263157895, | |
| "grad_norm": 3.8739275751093456, | |
| "learning_rate": 2.562305591500069e-06, | |
| "loss": 0.1056, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.3263157894736843, | |
| "grad_norm": 2.6932111092229234, | |
| "learning_rate": 2.5478824808474613e-06, | |
| "loss": 0.0762, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.328421052631579, | |
| "grad_norm": 2.3653261808302393, | |
| "learning_rate": 2.5334861861591753e-06, | |
| "loss": 0.072, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.3305263157894736, | |
| "grad_norm": 3.0167769884448057, | |
| "learning_rate": 2.5191168648707888e-06, | |
| "loss": 0.0896, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.3326315789473684, | |
| "grad_norm": 2.4444541248066796, | |
| "learning_rate": 2.5047746741228977e-06, | |
| "loss": 0.0679, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.3347368421052632, | |
| "grad_norm": 3.1319838501376056, | |
| "learning_rate": 2.490459770759398e-06, | |
| "loss": 0.0794, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.3368421052631578, | |
| "grad_norm": 2.4160632314580583, | |
| "learning_rate": 2.476172311325783e-06, | |
| "loss": 0.057, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.3389473684210527, | |
| "grad_norm": 2.8056259509770083, | |
| "learning_rate": 2.461912452067415e-06, | |
| "loss": 0.0788, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.3410526315789473, | |
| "grad_norm": 3.5606250812923994, | |
| "learning_rate": 2.447680348927837e-06, | |
| "loss": 0.0991, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.343157894736842, | |
| "grad_norm": 2.939424365838173, | |
| "learning_rate": 2.433476157547044e-06, | |
| "loss": 0.0791, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.345263157894737, | |
| "grad_norm": 2.3944747127012547, | |
| "learning_rate": 2.4193000332597984e-06, | |
| "loss": 0.0776, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.3473684210526315, | |
| "grad_norm": 2.4097140332528144, | |
| "learning_rate": 2.4051521310939258e-06, | |
| "loss": 0.0548, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3494736842105264, | |
| "grad_norm": 2.484876677592921, | |
| "learning_rate": 2.391032605768613e-06, | |
| "loss": 0.0639, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.351578947368421, | |
| "grad_norm": 2.9569313033101023, | |
| "learning_rate": 2.3769416116927335e-06, | |
| "loss": 0.0702, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.3536842105263158, | |
| "grad_norm": 2.3154837496863268, | |
| "learning_rate": 2.3628793029631353e-06, | |
| "loss": 0.0696, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.3557894736842107, | |
| "grad_norm": 4.0950527559872345, | |
| "learning_rate": 2.3488458333629777e-06, | |
| "loss": 0.0988, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.3578947368421053, | |
| "grad_norm": 3.2898299985671953, | |
| "learning_rate": 2.3348413563600324e-06, | |
| "loss": 0.0998, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 3.5565589465236402, | |
| "learning_rate": 2.320866025105016e-06, | |
| "loss": 0.0748, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.3621052631578947, | |
| "grad_norm": 2.9838579493260142, | |
| "learning_rate": 2.3069199924299175e-06, | |
| "loss": 0.0781, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.3642105263157895, | |
| "grad_norm": 3.109898543479839, | |
| "learning_rate": 2.29300341084631e-06, | |
| "loss": 0.0702, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.3663157894736842, | |
| "grad_norm": 2.736969851304859, | |
| "learning_rate": 2.2791164325437047e-06, | |
| "loss": 0.0792, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.368421052631579, | |
| "grad_norm": 3.3868564041377973, | |
| "learning_rate": 2.265259209387867e-06, | |
| "loss": 0.0899, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.3705263157894736, | |
| "grad_norm": 2.7082372890636375, | |
| "learning_rate": 2.2514318929191707e-06, | |
| "loss": 0.0752, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.3726315789473684, | |
| "grad_norm": 3.274685542658562, | |
| "learning_rate": 2.2376346343509343e-06, | |
| "loss": 0.0789, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.3747368421052633, | |
| "grad_norm": 3.3918195389906436, | |
| "learning_rate": 2.2238675845677663e-06, | |
| "loss": 0.0811, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.3768421052631579, | |
| "grad_norm": 2.436153684588233, | |
| "learning_rate": 2.2101308941239204e-06, | |
| "loss": 0.0694, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.3789473684210527, | |
| "grad_norm": 3.2956597758884816, | |
| "learning_rate": 2.1964247132416373e-06, | |
| "loss": 0.0845, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.3810526315789473, | |
| "grad_norm": 2.891107537325035, | |
| "learning_rate": 2.182749191809518e-06, | |
| "loss": 0.0806, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.3831578947368421, | |
| "grad_norm": 3.174882207717556, | |
| "learning_rate": 2.1691044793808734e-06, | |
| "loss": 0.0766, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.385263157894737, | |
| "grad_norm": 3.239310623984899, | |
| "learning_rate": 2.1554907251720947e-06, | |
| "loss": 0.1132, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.3873684210526316, | |
| "grad_norm": 3.0000793459288797, | |
| "learning_rate": 2.1419080780610123e-06, | |
| "loss": 0.0779, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.3894736842105262, | |
| "grad_norm": 3.0156978277959166, | |
| "learning_rate": 2.1283566865852824e-06, | |
| "loss": 0.074, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.391578947368421, | |
| "grad_norm": 2.6551132879995007, | |
| "learning_rate": 2.11483669894075e-06, | |
| "loss": 0.0746, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.3936842105263159, | |
| "grad_norm": 2.3673083937385875, | |
| "learning_rate": 2.1013482629798334e-06, | |
| "loss": 0.0714, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.3957894736842105, | |
| "grad_norm": 3.2553558351574288, | |
| "learning_rate": 2.08789152620991e-06, | |
| "loss": 0.1127, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.3978947368421053, | |
| "grad_norm": 3.1793358246388386, | |
| "learning_rate": 2.0744666357916925e-06, | |
| "loss": 0.1027, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 2.3044156367485766, | |
| "learning_rate": 2.061073738537635e-06, | |
| "loss": 0.079, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.4021052631578947, | |
| "grad_norm": 3.038506581721067, | |
| "learning_rate": 2.0477129809103147e-06, | |
| "loss": 0.078, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.4042105263157896, | |
| "grad_norm": 3.2134574970286254, | |
| "learning_rate": 2.034384509020837e-06, | |
| "loss": 0.0787, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.4063157894736842, | |
| "grad_norm": 2.8833109727229593, | |
| "learning_rate": 2.021088468627237e-06, | |
| "loss": 0.0945, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.408421052631579, | |
| "grad_norm": 2.6681060575893922, | |
| "learning_rate": 2.0078250051328783e-06, | |
| "loss": 0.0785, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.4105263157894736, | |
| "grad_norm": 3.309105305859723, | |
| "learning_rate": 1.9945942635848745e-06, | |
| "loss": 0.0932, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4126315789473685, | |
| "grad_norm": 2.7557375649742992, | |
| "learning_rate": 1.981396388672496e-06, | |
| "loss": 0.0704, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.4147368421052633, | |
| "grad_norm": 2.780789111855544, | |
| "learning_rate": 1.9682315247255897e-06, | |
| "loss": 0.0681, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.416842105263158, | |
| "grad_norm": 2.891129671769762, | |
| "learning_rate": 1.9550998157129946e-06, | |
| "loss": 0.0689, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.4189473684210525, | |
| "grad_norm": 2.8438288324834136, | |
| "learning_rate": 1.9420014052409793e-06, | |
| "loss": 0.0948, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4210526315789473, | |
| "grad_norm": 2.8648199763393363, | |
| "learning_rate": 1.928936436551661e-06, | |
| "loss": 0.0852, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.4231578947368422, | |
| "grad_norm": 2.4983276715177802, | |
| "learning_rate": 1.915905052521445e-06, | |
| "loss": 0.0691, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.4252631578947368, | |
| "grad_norm": 2.6685238310395167, | |
| "learning_rate": 1.9029073956594607e-06, | |
| "loss": 0.0902, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.4273684210526316, | |
| "grad_norm": 2.6981688261841623, | |
| "learning_rate": 1.8899436081059974e-06, | |
| "loss": 0.0626, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.4294736842105262, | |
| "grad_norm": 3.4512093051473287, | |
| "learning_rate": 1.877013831630961e-06, | |
| "loss": 0.0873, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.431578947368421, | |
| "grad_norm": 3.136937795473418, | |
| "learning_rate": 1.864118207632315e-06, | |
| "loss": 0.0817, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.433684210526316, | |
| "grad_norm": 2.845589565177414, | |
| "learning_rate": 1.851256877134538e-06, | |
| "loss": 0.084, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.4357894736842105, | |
| "grad_norm": 2.6247730269493634, | |
| "learning_rate": 1.838429980787081e-06, | |
| "loss": 0.0868, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.4378947368421053, | |
| "grad_norm": 2.212932444663422, | |
| "learning_rate": 1.825637658862824e-06, | |
| "loss": 0.056, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 3.0708180874395525, | |
| "learning_rate": 1.8128800512565514e-06, | |
| "loss": 0.0819, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.4421052631578948, | |
| "grad_norm": 3.191848306499893, | |
| "learning_rate": 1.8001572974834169e-06, | |
| "loss": 0.0874, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.4442105263157896, | |
| "grad_norm": 3.118644672611863, | |
| "learning_rate": 1.7874695366774191e-06, | |
| "loss": 0.0703, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.4463157894736842, | |
| "grad_norm": 3.3640943896050577, | |
| "learning_rate": 1.774816907589873e-06, | |
| "loss": 0.0792, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.4484210526315788, | |
| "grad_norm": 2.4258203150187994, | |
| "learning_rate": 1.7621995485879062e-06, | |
| "loss": 0.0759, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.4505263157894737, | |
| "grad_norm": 2.581622498733916, | |
| "learning_rate": 1.749617597652934e-06, | |
| "loss": 0.063, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.4526315789473685, | |
| "grad_norm": 2.946473481196987, | |
| "learning_rate": 1.7370711923791567e-06, | |
| "loss": 0.0822, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.454736842105263, | |
| "grad_norm": 2.4753384481559055, | |
| "learning_rate": 1.7245604699720536e-06, | |
| "loss": 0.0598, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.456842105263158, | |
| "grad_norm": 3.416152120993626, | |
| "learning_rate": 1.7120855672468779e-06, | |
| "loss": 0.0907, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.4589473684210525, | |
| "grad_norm": 2.695879145021625, | |
| "learning_rate": 1.6996466206271679e-06, | |
| "loss": 0.0612, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.4610526315789474, | |
| "grad_norm": 2.9873858087756635, | |
| "learning_rate": 1.6872437661432518e-06, | |
| "loss": 0.0811, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.4631578947368422, | |
| "grad_norm": 3.4960013543762125, | |
| "learning_rate": 1.6748771394307584e-06, | |
| "loss": 0.0813, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.4652631578947368, | |
| "grad_norm": 2.277927581675807, | |
| "learning_rate": 1.6625468757291379e-06, | |
| "loss": 0.0561, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.4673684210526317, | |
| "grad_norm": 2.3801716966637105, | |
| "learning_rate": 1.6502531098801756e-06, | |
| "loss": 0.0672, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.4694736842105263, | |
| "grad_norm": 3.4687383026734837, | |
| "learning_rate": 1.6379959763265268e-06, | |
| "loss": 0.0876, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.471578947368421, | |
| "grad_norm": 3.2466329337423874, | |
| "learning_rate": 1.62577560911024e-06, | |
| "loss": 0.0778, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.4736842105263157, | |
| "grad_norm": 2.904526223024363, | |
| "learning_rate": 1.6135921418712959e-06, | |
| "loss": 0.0938, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.4757894736842105, | |
| "grad_norm": 2.686553988217244, | |
| "learning_rate": 1.6014457078461354e-06, | |
| "loss": 0.0643, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.4778947368421052, | |
| "grad_norm": 3.5299540645067444, | |
| "learning_rate": 1.5893364398662175e-06, | |
| "loss": 0.0936, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 2.9376299944007855, | |
| "learning_rate": 1.5772644703565564e-06, | |
| "loss": 0.0853, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.4821052631578948, | |
| "grad_norm": 2.688100953143273, | |
| "learning_rate": 1.5652299313342772e-06, | |
| "loss": 0.0792, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.4842105263157894, | |
| "grad_norm": 3.4048347453623857, | |
| "learning_rate": 1.5532329544071712e-06, | |
| "loss": 0.083, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.4863157894736843, | |
| "grad_norm": 2.0479188515220623, | |
| "learning_rate": 1.5412736707722537e-06, | |
| "loss": 0.0483, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.4884210526315789, | |
| "grad_norm": 2.2750434748340935, | |
| "learning_rate": 1.5293522112143371e-06, | |
| "loss": 0.0619, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.4905263157894737, | |
| "grad_norm": 2.7200793140054103, | |
| "learning_rate": 1.517468706104589e-06, | |
| "loss": 0.0727, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.4926315789473685, | |
| "grad_norm": 2.2546054623423335, | |
| "learning_rate": 1.505623285399121e-06, | |
| "loss": 0.0488, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.4947368421052631, | |
| "grad_norm": 2.7989024442399435, | |
| "learning_rate": 1.4938160786375571e-06, | |
| "loss": 0.0921, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.496842105263158, | |
| "grad_norm": 2.6090857807395023, | |
| "learning_rate": 1.4820472149416153e-06, | |
| "loss": 0.074, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.4989473684210526, | |
| "grad_norm": 2.5952217582934756, | |
| "learning_rate": 1.4703168230137072e-06, | |
| "loss": 0.0531, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5010526315789474, | |
| "grad_norm": 2.854421948263307, | |
| "learning_rate": 1.4586250311355132e-06, | |
| "loss": 0.0706, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.5031578947368422, | |
| "grad_norm": 2.491393713483, | |
| "learning_rate": 1.4469719671666043e-06, | |
| "loss": 0.0712, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.5052631578947369, | |
| "grad_norm": 3.0941426529266085, | |
| "learning_rate": 1.4353577585430152e-06, | |
| "loss": 0.1008, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.5073684210526315, | |
| "grad_norm": 2.1188906422201153, | |
| "learning_rate": 1.4237825322758735e-06, | |
| "loss": 0.053, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.5094736842105263, | |
| "grad_norm": 2.499562810650923, | |
| "learning_rate": 1.412246414949997e-06, | |
| "loss": 0.0773, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.5115789473684211, | |
| "grad_norm": 2.686352758516756, | |
| "learning_rate": 1.4007495327225162e-06, | |
| "loss": 0.0803, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.5136842105263157, | |
| "grad_norm": 3.003166198685424, | |
| "learning_rate": 1.389292011321498e-06, | |
| "loss": 0.0942, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.5157894736842106, | |
| "grad_norm": 3.276518434334761, | |
| "learning_rate": 1.3778739760445552e-06, | |
| "loss": 0.0822, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.5178947368421052, | |
| "grad_norm": 2.5581478643854147, | |
| "learning_rate": 1.3664955517574967e-06, | |
| "loss": 0.0656, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 2.9505141669864203, | |
| "learning_rate": 1.3551568628929434e-06, | |
| "loss": 0.0695, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.5221052631578948, | |
| "grad_norm": 2.8839627500523632, | |
| "learning_rate": 1.343858033448982e-06, | |
| "loss": 0.0731, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.5242105263157895, | |
| "grad_norm": 2.7109817535795693, | |
| "learning_rate": 1.3325991869878013e-06, | |
| "loss": 0.0648, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.526315789473684, | |
| "grad_norm": 2.9195695673715063, | |
| "learning_rate": 1.321380446634342e-06, | |
| "loss": 0.0825, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.528421052631579, | |
| "grad_norm": 2.857165833663471, | |
| "learning_rate": 1.3102019350749528e-06, | |
| "loss": 0.062, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.5305263157894737, | |
| "grad_norm": 3.120768459996078, | |
| "learning_rate": 1.2990637745560418e-06, | |
| "loss": 0.0638, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.5326315789473686, | |
| "grad_norm": 3.601845356286033, | |
| "learning_rate": 1.2879660868827508e-06, | |
| "loss": 0.0606, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.5347368421052632, | |
| "grad_norm": 2.754264101684756, | |
| "learning_rate": 1.2769089934176126e-06, | |
| "loss": 0.061, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.5368421052631578, | |
| "grad_norm": 3.763355031418207, | |
| "learning_rate": 1.2658926150792321e-06, | |
| "loss": 0.0853, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5389473684210526, | |
| "grad_norm": 2.435184182108376, | |
| "learning_rate": 1.2549170723409548e-06, | |
| "loss": 0.068, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.5410526315789475, | |
| "grad_norm": 3.1384502245840205, | |
| "learning_rate": 1.243982485229559e-06, | |
| "loss": 0.0839, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.543157894736842, | |
| "grad_norm": 2.915181764624064, | |
| "learning_rate": 1.233088973323937e-06, | |
| "loss": 0.0932, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.545263157894737, | |
| "grad_norm": 3.4631955921569824, | |
| "learning_rate": 1.2222366557537911e-06, | |
| "loss": 0.0902, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.5473684210526315, | |
| "grad_norm": 3.012931176367388, | |
| "learning_rate": 1.2114256511983274e-06, | |
| "loss": 0.0887, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.5494736842105263, | |
| "grad_norm": 3.1207818685791144, | |
| "learning_rate": 1.200656077884958e-06, | |
| "loss": 0.1018, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.5515789473684212, | |
| "grad_norm": 2.4612609560279877, | |
| "learning_rate": 1.189928053588012e-06, | |
| "loss": 0.0822, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.5536842105263158, | |
| "grad_norm": 3.8256380681691797, | |
| "learning_rate": 1.1792416956274443e-06, | |
| "loss": 0.0885, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.5557894736842104, | |
| "grad_norm": 2.3636768015398557, | |
| "learning_rate": 1.1685971208675539e-06, | |
| "loss": 0.0459, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.5578947368421052, | |
| "grad_norm": 2.6401608062517825, | |
| "learning_rate": 1.157994445715706e-06, | |
| "loss": 0.0828, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 2.3934171850503727, | |
| "learning_rate": 1.1474337861210543e-06, | |
| "loss": 0.0678, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.5621052631578949, | |
| "grad_norm": 2.337161351456917, | |
| "learning_rate": 1.1369152575732823e-06, | |
| "loss": 0.0514, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.5642105263157895, | |
| "grad_norm": 2.7480915820160154, | |
| "learning_rate": 1.1264389751013326e-06, | |
| "loss": 0.0881, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.566315789473684, | |
| "grad_norm": 2.3256118886994943, | |
| "learning_rate": 1.1160050532721527e-06, | |
| "loss": 0.0733, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.568421052631579, | |
| "grad_norm": 2.798649624701208, | |
| "learning_rate": 1.1056136061894386e-06, | |
| "loss": 0.0833, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.5705263157894738, | |
| "grad_norm": 3.3074889617939847, | |
| "learning_rate": 1.095264747492391e-06, | |
| "loss": 0.0854, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.5726315789473684, | |
| "grad_norm": 3.246747529237332, | |
| "learning_rate": 1.0849585903544707e-06, | |
| "loss": 0.09, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.5747368421052632, | |
| "grad_norm": 2.268435499902229, | |
| "learning_rate": 1.0746952474821615e-06, | |
| "loss": 0.0635, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.5768421052631578, | |
| "grad_norm": 3.048564605805993, | |
| "learning_rate": 1.0644748311137377e-06, | |
| "loss": 0.0736, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.5789473684210527, | |
| "grad_norm": 2.729660101843144, | |
| "learning_rate": 1.0542974530180327e-06, | |
| "loss": 0.0815, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.5810526315789475, | |
| "grad_norm": 3.846053704689222, | |
| "learning_rate": 1.0441632244932238e-06, | |
| "loss": 0.0855, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.583157894736842, | |
| "grad_norm": 3.242518428961916, | |
| "learning_rate": 1.0340722563656109e-06, | |
| "loss": 0.0617, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.5852631578947367, | |
| "grad_norm": 3.288508939138202, | |
| "learning_rate": 1.0240246589884046e-06, | |
| "loss": 0.0627, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.5873684210526315, | |
| "grad_norm": 2.6583829662575753, | |
| "learning_rate": 1.0140205422405213e-06, | |
| "loss": 0.0668, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.5894736842105264, | |
| "grad_norm": 3.1324991437039547, | |
| "learning_rate": 1.0040600155253766e-06, | |
| "loss": 0.0875, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.5915789473684212, | |
| "grad_norm": 2.1752967431606436, | |
| "learning_rate": 9.941431877696955e-07, | |
| "loss": 0.0625, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.5936842105263158, | |
| "grad_norm": 3.153051450237148, | |
| "learning_rate": 9.842701674223187e-07, | |
| "loss": 0.0916, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.5957894736842104, | |
| "grad_norm": 3.0851678174272656, | |
| "learning_rate": 9.744410624530148e-07, | |
| "loss": 0.0682, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.5978947368421053, | |
| "grad_norm": 3.6017843313201627, | |
| "learning_rate": 9.646559803512995e-07, | |
| "loss": 0.0718, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 2.6419335189350717, | |
| "learning_rate": 9.549150281252633e-07, | |
| "loss": 0.0771, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6021052631578947, | |
| "grad_norm": 2.672014578151709, | |
| "learning_rate": 9.452183123003999e-07, | |
| "loss": 0.0664, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.6042105263157893, | |
| "grad_norm": 2.7604515088486776, | |
| "learning_rate": 9.355659389184396e-07, | |
| "loss": 0.0802, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.6063157894736841, | |
| "grad_norm": 2.4989218985147827, | |
| "learning_rate": 9.259580135361929e-07, | |
| "loss": 0.0712, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.608421052631579, | |
| "grad_norm": 2.761122397313585, | |
| "learning_rate": 9.163946412243896e-07, | |
| "loss": 0.0776, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6105263157894738, | |
| "grad_norm": 2.502582479061757, | |
| "learning_rate": 9.068759265665384e-07, | |
| "loss": 0.0579, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.6126315789473684, | |
| "grad_norm": 2.564496551486698, | |
| "learning_rate": 8.974019736577777e-07, | |
| "loss": 0.067, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.614736842105263, | |
| "grad_norm": 2.6370265688116774, | |
| "learning_rate": 8.879728861037385e-07, | |
| "loss": 0.0851, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.6168421052631579, | |
| "grad_norm": 2.711692043610178, | |
| "learning_rate": 8.785887670194137e-07, | |
| "loss": 0.0503, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.6189473684210527, | |
| "grad_norm": 2.9973543080867993, | |
| "learning_rate": 8.692497190280225e-07, | |
| "loss": 0.084, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.6210526315789475, | |
| "grad_norm": 2.797781874617319, | |
| "learning_rate": 8.599558442598998e-07, | |
| "loss": 0.0772, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.6231578947368421, | |
| "grad_norm": 3.008160474882518, | |
| "learning_rate": 8.507072443513703e-07, | |
| "loss": 0.0718, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.6252631578947367, | |
| "grad_norm": 2.569308456782316, | |
| "learning_rate": 8.415040204436426e-07, | |
| "loss": 0.0566, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.6273684210526316, | |
| "grad_norm": 2.605706053568482, | |
| "learning_rate": 8.323462731816962e-07, | |
| "loss": 0.0572, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.6294736842105264, | |
| "grad_norm": 2.326197374578758, | |
| "learning_rate": 8.232341027131885e-07, | |
| "loss": 0.0627, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.631578947368421, | |
| "grad_norm": 2.5642000583273283, | |
| "learning_rate": 8.141676086873574e-07, | |
| "loss": 0.0751, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.6336842105263156, | |
| "grad_norm": 2.191527946956071, | |
| "learning_rate": 8.051468902539272e-07, | |
| "loss": 0.0383, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.6357894736842105, | |
| "grad_norm": 2.4051907305716265, | |
| "learning_rate": 7.961720460620321e-07, | |
| "loss": 0.0627, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.6378947368421053, | |
| "grad_norm": 2.5165851730543114, | |
| "learning_rate": 7.872431742591268e-07, | |
| "loss": 0.0579, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 3.389030001143065, | |
| "learning_rate": 7.783603724899258e-07, | |
| "loss": 0.0897, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.6421052631578947, | |
| "grad_norm": 3.150859694485845, | |
| "learning_rate": 7.695237378953224e-07, | |
| "loss": 0.0889, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.6442105263157893, | |
| "grad_norm": 2.5782746782491577, | |
| "learning_rate": 7.607333671113409e-07, | |
| "loss": 0.0691, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.6463157894736842, | |
| "grad_norm": 2.932437547604155, | |
| "learning_rate": 7.519893562680663e-07, | |
| "loss": 0.0747, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.648421052631579, | |
| "grad_norm": 2.7289448649430486, | |
| "learning_rate": 7.432918009885997e-07, | |
| "loss": 0.0894, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.6505263157894738, | |
| "grad_norm": 2.622139736136532, | |
| "learning_rate": 7.346407963880137e-07, | |
| "loss": 0.0657, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.6526315789473685, | |
| "grad_norm": 2.844401593647993, | |
| "learning_rate": 7.260364370723044e-07, | |
| "loss": 0.0678, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.654736842105263, | |
| "grad_norm": 2.46801387849074, | |
| "learning_rate": 7.174788171373731e-07, | |
| "loss": 0.0698, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.656842105263158, | |
| "grad_norm": 3.2459197537543103, | |
| "learning_rate": 7.089680301679752e-07, | |
| "loss": 0.0763, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.6589473684210527, | |
| "grad_norm": 2.1306283714857694, | |
| "learning_rate": 7.005041692367154e-07, | |
| "loss": 0.0516, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.6610526315789473, | |
| "grad_norm": 2.6775897413319028, | |
| "learning_rate": 6.92087326903022e-07, | |
| "loss": 0.0619, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.663157894736842, | |
| "grad_norm": 2.524367526847338, | |
| "learning_rate": 6.837175952121305e-07, | |
| "loss": 0.0688, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.6652631578947368, | |
| "grad_norm": 2.708587596728961, | |
| "learning_rate": 6.753950656940905e-07, | |
| "loss": 0.0703, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.6673684210526316, | |
| "grad_norm": 2.897104706239707, | |
| "learning_rate": 6.671198293627479e-07, | |
| "loss": 0.0621, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.6694736842105264, | |
| "grad_norm": 3.359155853905581, | |
| "learning_rate": 6.58891976714764e-07, | |
| "loss": 0.0843, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.671578947368421, | |
| "grad_norm": 3.1572683992293564, | |
| "learning_rate": 6.507115977286144e-07, | |
| "loss": 0.0631, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.6736842105263157, | |
| "grad_norm": 2.5427050293849613, | |
| "learning_rate": 6.425787818636131e-07, | |
| "loss": 0.0789, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.6757894736842105, | |
| "grad_norm": 3.066625121124378, | |
| "learning_rate": 6.34493618058935e-07, | |
| "loss": 0.0686, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.6778947368421053, | |
| "grad_norm": 2.822755036635395, | |
| "learning_rate": 6.264561947326331e-07, | |
| "loss": 0.0684, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 2.8703507999231035, | |
| "learning_rate": 6.184665997806832e-07, | |
| "loss": 0.0747, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.6821052631578948, | |
| "grad_norm": 3.333171116783649, | |
| "learning_rate": 6.105249205760128e-07, | |
| "loss": 0.089, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.6842105263157894, | |
| "grad_norm": 2.2878271103214316, | |
| "learning_rate": 6.026312439675553e-07, | |
| "loss": 0.0622, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6842105263157894, | |
| "eval_loss": 0.1972101926803589, | |
| "eval_runtime": 0.9281, | |
| "eval_samples_per_second": 42.023, | |
| "eval_steps_per_second": 10.775, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6863157894736842, | |
| "grad_norm": 2.812568484004764, | |
| "learning_rate": 5.947856562792926e-07, | |
| "loss": 0.0654, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.688421052631579, | |
| "grad_norm": 2.7128006161210108, | |
| "learning_rate": 5.869882433093154e-07, | |
| "loss": 0.0864, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.6905263157894737, | |
| "grad_norm": 3.0041687010640983, | |
| "learning_rate": 5.79239090328883e-07, | |
| "loss": 0.0747, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.6926315789473683, | |
| "grad_norm": 2.9138447859502095, | |
| "learning_rate": 5.715382820814885e-07, | |
| "loss": 0.0802, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.694736842105263, | |
| "grad_norm": 2.5166922519171506, | |
| "learning_rate": 5.63885902781941e-07, | |
| "loss": 0.0675, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.696842105263158, | |
| "grad_norm": 2.8497481916116896, | |
| "learning_rate": 5.562820361154315e-07, | |
| "loss": 0.0696, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.6989473684210528, | |
| "grad_norm": 3.4754839940102, | |
| "learning_rate": 5.487267652366291e-07, | |
| "loss": 0.0852, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.7010526315789474, | |
| "grad_norm": 3.2547037998817596, | |
| "learning_rate": 5.412201727687644e-07, | |
| "loss": 0.0862, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.703157894736842, | |
| "grad_norm": 2.414633712271825, | |
| "learning_rate": 5.337623408027293e-07, | |
| "loss": 0.061, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.7052631578947368, | |
| "grad_norm": 3.401374616115059, | |
| "learning_rate": 5.263533508961827e-07, | |
| "loss": 0.0952, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7073684210526316, | |
| "grad_norm": 2.73608904289166, | |
| "learning_rate": 5.189932840726486e-07, | |
| "loss": 0.0679, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.7094736842105265, | |
| "grad_norm": 2.808779656242632, | |
| "learning_rate": 5.116822208206396e-07, | |
| "loss": 0.0636, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.711578947368421, | |
| "grad_norm": 2.8278948519331046, | |
| "learning_rate": 5.044202410927707e-07, | |
| "loss": 0.0757, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.7136842105263157, | |
| "grad_norm": 2.5103777723289054, | |
| "learning_rate": 4.972074243048896e-07, | |
| "loss": 0.0603, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.7157894736842105, | |
| "grad_norm": 3.1933744646672135, | |
| "learning_rate": 4.900438493352056e-07, | |
| "loss": 0.0682, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.7178947368421054, | |
| "grad_norm": 3.1503659721543706, | |
| "learning_rate": 4.829295945234258e-07, | |
| "loss": 0.072, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 2.747297586956878, | |
| "learning_rate": 4.758647376699033e-07, | |
| "loss": 0.0528, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.7221052631578946, | |
| "grad_norm": 2.2158924674953777, | |
| "learning_rate": 4.6884935603477733e-07, | |
| "loss": 0.0565, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.7242105263157894, | |
| "grad_norm": 2.8793571493416823, | |
| "learning_rate": 4.6188352633713964e-07, | |
| "loss": 0.072, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.7263157894736842, | |
| "grad_norm": 3.1524529277439055, | |
| "learning_rate": 4.549673247541875e-07, | |
| "loss": 0.0759, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.728421052631579, | |
| "grad_norm": 2.5386910299707854, | |
| "learning_rate": 4.48100826920394e-07, | |
| "loss": 0.0715, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.7305263157894737, | |
| "grad_norm": 3.012214075182323, | |
| "learning_rate": 4.412841079266778e-07, | |
| "loss": 0.0613, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.7326315789473683, | |
| "grad_norm": 2.708221561942324, | |
| "learning_rate": 4.345172423195865e-07, | |
| "loss": 0.0977, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.7347368421052631, | |
| "grad_norm": 3.115720504229963, | |
| "learning_rate": 4.27800304100478e-07, | |
| "loss": 0.0688, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.736842105263158, | |
| "grad_norm": 1.987345247381263, | |
| "learning_rate": 4.211333667247125e-07, | |
| "loss": 0.0569, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.7389473684210528, | |
| "grad_norm": 3.3300694952948593, | |
| "learning_rate": 4.1451650310085076e-07, | |
| "loss": 0.0718, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.7410526315789474, | |
| "grad_norm": 2.6073178578766365, | |
| "learning_rate": 4.079497855898501e-07, | |
| "loss": 0.0651, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.743157894736842, | |
| "grad_norm": 2.520132180808996, | |
| "learning_rate": 4.01433286004283e-07, | |
| "loss": 0.0746, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.7452631578947368, | |
| "grad_norm": 4.216004495536436, | |
| "learning_rate": 3.949670756075447e-07, | |
| "loss": 0.0986, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.7473684210526317, | |
| "grad_norm": 3.0641794864142313, | |
| "learning_rate": 3.885512251130763e-07, | |
| "loss": 0.0694, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.7494736842105263, | |
| "grad_norm": 2.3436697576482493, | |
| "learning_rate": 3.8218580468359136e-07, | |
| "loss": 0.0677, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.751578947368421, | |
| "grad_norm": 2.5525226853131406, | |
| "learning_rate": 3.7587088393030604e-07, | |
| "loss": 0.07, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.7536842105263157, | |
| "grad_norm": 2.557138885671911, | |
| "learning_rate": 3.6960653191218333e-07, | |
| "loss": 0.0715, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.7557894736842106, | |
| "grad_norm": 2.7336947284134907, | |
| "learning_rate": 3.6339281713517304e-07, | |
| "loss": 0.0766, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.7578947368421054, | |
| "grad_norm": 2.7665277428264687, | |
| "learning_rate": 3.572298075514652e-07, | |
| "loss": 0.0892, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 3.7811259313548935, | |
| "learning_rate": 3.511175705587433e-07, | |
| "loss": 0.0848, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.7621052631578946, | |
| "grad_norm": 3.144574327722061, | |
| "learning_rate": 3.450561729994534e-07, | |
| "loss": 0.0738, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.7642105263157895, | |
| "grad_norm": 2.857062951752684, | |
| "learning_rate": 3.390456811600673e-07, | |
| "loss": 0.0733, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.7663157894736843, | |
| "grad_norm": 3.655857210909399, | |
| "learning_rate": 3.3308616077036113e-07, | |
| "loss": 0.1221, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.768421052631579, | |
| "grad_norm": 2.1902799347040944, | |
| "learning_rate": 3.271776770026963e-07, | |
| "loss": 0.0592, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.7705263157894737, | |
| "grad_norm": 3.4061442220418305, | |
| "learning_rate": 3.213202944713023e-07, | |
| "loss": 0.0959, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.7726315789473683, | |
| "grad_norm": 2.1231955359369286, | |
| "learning_rate": 3.1551407723157734e-07, | |
| "loss": 0.065, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.7747368421052632, | |
| "grad_norm": 2.8471057173295273, | |
| "learning_rate": 3.0975908877938277e-07, | |
| "loss": 0.0744, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.776842105263158, | |
| "grad_norm": 3.2337983555019605, | |
| "learning_rate": 3.040553920503503e-07, | |
| "loss": 0.0905, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.7789473684210526, | |
| "grad_norm": 2.542206604869396, | |
| "learning_rate": 2.984030494191942e-07, | |
| "loss": 0.0689, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.7810526315789472, | |
| "grad_norm": 2.980410819168702, | |
| "learning_rate": 2.928021226990263e-07, | |
| "loss": 0.0698, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.783157894736842, | |
| "grad_norm": 2.117615427082996, | |
| "learning_rate": 2.8725267314068496e-07, | |
| "loss": 0.0611, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.7852631578947369, | |
| "grad_norm": 2.3322615491429475, | |
| "learning_rate": 2.817547614320615e-07, | |
| "loss": 0.0606, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.7873684210526317, | |
| "grad_norm": 2.1217132504251626, | |
| "learning_rate": 2.763084476974376e-07, | |
| "loss": 0.0677, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.7894736842105263, | |
| "grad_norm": 2.635485392016026, | |
| "learning_rate": 2.7091379149682683e-07, | |
| "loss": 0.0654, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.791578947368421, | |
| "grad_norm": 2.9790312851694876, | |
| "learning_rate": 2.655708518253258e-07, | |
| "loss": 0.0677, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.7936842105263158, | |
| "grad_norm": 2.3973095715711317, | |
| "learning_rate": 2.602796871124663e-07, | |
| "loss": 0.0504, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.7957894736842106, | |
| "grad_norm": 2.7082756582731973, | |
| "learning_rate": 2.5504035522157853e-07, | |
| "loss": 0.0647, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.7978947368421052, | |
| "grad_norm": 2.9121449991346062, | |
| "learning_rate": 2.4985291344915675e-07, | |
| "loss": 0.0903, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 2.9867534692660245, | |
| "learning_rate": 2.447174185242324e-07, | |
| "loss": 0.074, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.8021052631578947, | |
| "grad_norm": 2.8737059550127455, | |
| "learning_rate": 2.3963392660775576e-07, | |
| "loss": 0.084, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.8042105263157895, | |
| "grad_norm": 3.2375432000666216, | |
| "learning_rate": 2.3460249329197825e-07, | |
| "loss": 0.0939, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.8063157894736843, | |
| "grad_norm": 3.286934040592846, | |
| "learning_rate": 2.296231735998511e-07, | |
| "loss": 0.0756, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.808421052631579, | |
| "grad_norm": 3.06301786931153, | |
| "learning_rate": 2.2469602198441575e-07, | |
| "loss": 0.0777, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.8105263157894735, | |
| "grad_norm": 3.1645723085351123, | |
| "learning_rate": 2.198210923282118e-07, | |
| "loss": 0.073, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.8126315789473684, | |
| "grad_norm": 2.493058988989823, | |
| "learning_rate": 2.149984379426906e-07, | |
| "loss": 0.0764, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.8147368421052632, | |
| "grad_norm": 2.881863955432623, | |
| "learning_rate": 2.102281115676258e-07, | |
| "loss": 0.0809, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.816842105263158, | |
| "grad_norm": 2.7694248049652974, | |
| "learning_rate": 2.0551016537054492e-07, | |
| "loss": 0.0627, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.8189473684210526, | |
| "grad_norm": 2.879825150355328, | |
| "learning_rate": 2.008446509461498e-07, | |
| "loss": 0.0651, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.8210526315789473, | |
| "grad_norm": 2.6298059648937135, | |
| "learning_rate": 1.962316193157593e-07, | |
| "loss": 0.077, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.823157894736842, | |
| "grad_norm": 2.302690351699349, | |
| "learning_rate": 1.91671120926748e-07, | |
| "loss": 0.055, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.825263157894737, | |
| "grad_norm": 2.954098530395544, | |
| "learning_rate": 1.871632056519962e-07, | |
| "loss": 0.0876, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.8273684210526315, | |
| "grad_norm": 2.7636736978855594, | |
| "learning_rate": 1.8270792278934302e-07, | |
| "loss": 0.0915, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.8294736842105264, | |
| "grad_norm": 2.7545832309484988, | |
| "learning_rate": 1.7830532106104747e-07, | |
| "loss": 0.0667, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.831578947368421, | |
| "grad_norm": 2.543153382990055, | |
| "learning_rate": 1.7395544861325718e-07, | |
| "loss": 0.0584, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.8336842105263158, | |
| "grad_norm": 2.807508111947715, | |
| "learning_rate": 1.696583530154794e-07, | |
| "loss": 0.0797, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.8357894736842106, | |
| "grad_norm": 2.8192335775327795, | |
| "learning_rate": 1.6541408126006464e-07, | |
| "loss": 0.0872, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.8378947368421052, | |
| "grad_norm": 3.209795989098829, | |
| "learning_rate": 1.6122267976168783e-07, | |
| "loss": 0.0999, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 3.4761511974704264, | |
| "learning_rate": 1.5708419435684463e-07, | |
| "loss": 0.0922, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.8421052631578947, | |
| "grad_norm": 3.1674697236400813, | |
| "learning_rate": 1.5299867030334815e-07, | |
| "loss": 0.0635, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.8442105263157895, | |
| "grad_norm": 3.6723508356857484, | |
| "learning_rate": 1.4896615227983468e-07, | |
| "loss": 0.0602, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.8463157894736844, | |
| "grad_norm": 2.911727155785081, | |
| "learning_rate": 1.4498668438527597e-07, | |
| "loss": 0.0529, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.848421052631579, | |
| "grad_norm": 2.3359023762817825, | |
| "learning_rate": 1.4106031013849498e-07, | |
| "loss": 0.0514, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.8505263157894736, | |
| "grad_norm": 2.409615553053479, | |
| "learning_rate": 1.3718707247769137e-07, | |
| "loss": 0.0555, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.8526315789473684, | |
| "grad_norm": 2.8117387336774855, | |
| "learning_rate": 1.333670137599713e-07, | |
| "loss": 0.0916, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.8547368421052632, | |
| "grad_norm": 2.0682295293481077, | |
| "learning_rate": 1.2960017576088445e-07, | |
| "loss": 0.0582, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.8568421052631578, | |
| "grad_norm": 2.551562601977345, | |
| "learning_rate": 1.2588659967396998e-07, | |
| "loss": 0.0732, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.8589473684210527, | |
| "grad_norm": 3.2003699780502597, | |
| "learning_rate": 1.222263261102985e-07, | |
| "loss": 0.0711, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.8610526315789473, | |
| "grad_norm": 3.486580924102614, | |
| "learning_rate": 1.1861939509803688e-07, | |
| "loss": 0.0748, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.8631578947368421, | |
| "grad_norm": 2.2455491108015613, | |
| "learning_rate": 1.1506584608200366e-07, | |
| "loss": 0.0727, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.865263157894737, | |
| "grad_norm": 2.4398853127583284, | |
| "learning_rate": 1.1156571792324212e-07, | |
| "loss": 0.0529, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.8673684210526316, | |
| "grad_norm": 2.624358820662373, | |
| "learning_rate": 1.0811904889859337e-07, | |
| "loss": 0.0796, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.8694736842105262, | |
| "grad_norm": 2.984398704743138, | |
| "learning_rate": 1.0472587670027678e-07, | |
| "loss": 0.0853, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.871578947368421, | |
| "grad_norm": 3.4342828404155736, | |
| "learning_rate": 1.0138623843548078e-07, | |
| "loss": 0.0807, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.8736842105263158, | |
| "grad_norm": 2.135864268441869, | |
| "learning_rate": 9.810017062595322e-08, | |
| "loss": 0.053, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.8757894736842107, | |
| "grad_norm": 2.915254739899194, | |
| "learning_rate": 9.486770920760668e-08, | |
| "loss": 0.0751, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.8778947368421053, | |
| "grad_norm": 2.867686868160426, | |
| "learning_rate": 9.16888895301199e-08, | |
| "loss": 0.0695, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 2.4403837817102008, | |
| "learning_rate": 8.856374635655696e-08, | |
| "loss": 0.0552, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.8821052631578947, | |
| "grad_norm": 2.96278001070486, | |
| "learning_rate": 8.549231386298151e-08, | |
| "loss": 0.0745, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.8842105263157896, | |
| "grad_norm": 2.2812297751359134, | |
| "learning_rate": 8.247462563808816e-08, | |
| "loss": 0.058, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.8863157894736842, | |
| "grad_norm": 2.8257873863079532, | |
| "learning_rate": 7.951071468283166e-08, | |
| "loss": 0.0799, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.888421052631579, | |
| "grad_norm": 3.826719667852147, | |
| "learning_rate": 7.660061341006719e-08, | |
| "loss": 0.0713, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.8905263157894736, | |
| "grad_norm": 3.7405575996641156, | |
| "learning_rate": 7.374435364419675e-08, | |
| "loss": 0.0948, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.8926315789473684, | |
| "grad_norm": 3.4882300697302058, | |
| "learning_rate": 7.094196662081832e-08, | |
| "loss": 0.0778, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.8947368421052633, | |
| "grad_norm": 2.7491143569308525, | |
| "learning_rate": 6.819348298638839e-08, | |
| "loss": 0.0549, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.8968421052631579, | |
| "grad_norm": 2.255741826050675, | |
| "learning_rate": 6.549893279788278e-08, | |
| "loss": 0.0581, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.8989473684210525, | |
| "grad_norm": 2.8944471113447774, | |
| "learning_rate": 6.285834552247127e-08, | |
| "loss": 0.101, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.9010526315789473, | |
| "grad_norm": 3.516368002515928, | |
| "learning_rate": 6.027175003719354e-08, | |
| "loss": 0.0843, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.9031578947368422, | |
| "grad_norm": 2.8055662943099717, | |
| "learning_rate": 5.773917462864265e-08, | |
| "loss": 0.0718, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.905263157894737, | |
| "grad_norm": 2.8528278587154317, | |
| "learning_rate": 5.526064699265754e-08, | |
| "loss": 0.0771, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.9073684210526316, | |
| "grad_norm": 2.4442597669717374, | |
| "learning_rate": 5.2836194234019976e-08, | |
| "loss": 0.0578, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.9094736842105262, | |
| "grad_norm": 3.328401624060881, | |
| "learning_rate": 5.0465842866156965e-08, | |
| "loss": 0.0834, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.911578947368421, | |
| "grad_norm": 3.1255146432363095, | |
| "learning_rate": 4.8149618810850454e-08, | |
| "loss": 0.0775, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.9136842105263159, | |
| "grad_norm": 2.6888604157650615, | |
| "learning_rate": 4.588754739795587e-08, | |
| "loss": 0.0697, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.9157894736842105, | |
| "grad_norm": 4.068321596359929, | |
| "learning_rate": 4.367965336512403e-08, | |
| "loss": 0.0959, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.917894736842105, | |
| "grad_norm": 2.432792122907068, | |
| "learning_rate": 4.1525960857530244e-08, | |
| "loss": 0.0583, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 2.77665437029982, | |
| "learning_rate": 3.9426493427611177e-08, | |
| "loss": 0.0672, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.9221052631578948, | |
| "grad_norm": 3.087230249032823, | |
| "learning_rate": 3.738127403480507e-08, | |
| "loss": 0.0699, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.9242105263157896, | |
| "grad_norm": 2.6241279675480054, | |
| "learning_rate": 3.5390325045304704e-08, | |
| "loss": 0.0649, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.9263157894736842, | |
| "grad_norm": 2.7148617633550507, | |
| "learning_rate": 3.345366823180929e-08, | |
| "loss": 0.0677, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.9284210526315788, | |
| "grad_norm": 2.5474852778498316, | |
| "learning_rate": 3.1571324773286284e-08, | |
| "loss": 0.0639, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.9305263157894736, | |
| "grad_norm": 2.1409033097729813, | |
| "learning_rate": 2.9743315254743834e-08, | |
| "loss": 0.041, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.9326315789473685, | |
| "grad_norm": 3.275485354161904, | |
| "learning_rate": 2.7969659666999273e-08, | |
| "loss": 0.0881, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.9347368421052633, | |
| "grad_norm": 2.6681527860380396, | |
| "learning_rate": 2.625037740646763e-08, | |
| "loss": 0.0772, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 1.936842105263158, | |
| "grad_norm": 2.3495006124196576, | |
| "learning_rate": 2.4585487274942922e-08, | |
| "loss": 0.0557, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.9389473684210525, | |
| "grad_norm": 2.5228819355573235, | |
| "learning_rate": 2.2975007479397736e-08, | |
| "loss": 0.0507, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 1.9410526315789474, | |
| "grad_norm": 3.756140115268572, | |
| "learning_rate": 2.1418955631781203e-08, | |
| "loss": 0.1053, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.9431578947368422, | |
| "grad_norm": 2.960441030514824, | |
| "learning_rate": 1.9917348748826337e-08, | |
| "loss": 0.0708, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.9452631578947368, | |
| "grad_norm": 2.8382521221794863, | |
| "learning_rate": 1.847020325186577e-08, | |
| "loss": 0.0531, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.9473684210526314, | |
| "grad_norm": 3.0227161901258914, | |
| "learning_rate": 1.7077534966650767e-08, | |
| "loss": 0.0752, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.9494736842105262, | |
| "grad_norm": 2.666050724013321, | |
| "learning_rate": 1.5739359123178587e-08, | |
| "loss": 0.0606, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.951578947368421, | |
| "grad_norm": 3.1791911217585054, | |
| "learning_rate": 1.4455690355525964e-08, | |
| "loss": 0.0657, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 1.953684210526316, | |
| "grad_norm": 2.9100920884957406, | |
| "learning_rate": 1.3226542701689215e-08, | |
| "loss": 0.0674, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.9557894736842105, | |
| "grad_norm": 2.903965005728698, | |
| "learning_rate": 1.2051929603428824e-08, | |
| "loss": 0.0801, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 1.9578947368421051, | |
| "grad_norm": 3.3785772789177706, | |
| "learning_rate": 1.0931863906127327e-08, | |
| "loss": 0.0813, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 2.8464427553618235, | |
| "learning_rate": 9.866357858642206e-09, | |
| "loss": 0.0773, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 1.9621052631578948, | |
| "grad_norm": 2.683834178385762, | |
| "learning_rate": 8.855423113177664e-09, | |
| "loss": 0.0878, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.9642105263157896, | |
| "grad_norm": 2.293423722958731, | |
| "learning_rate": 7.899070725153612e-09, | |
| "loss": 0.0574, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 1.9663157894736842, | |
| "grad_norm": 2.9921722390971985, | |
| "learning_rate": 6.997311153086883e-09, | |
| "loss": 0.0786, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.9684210526315788, | |
| "grad_norm": 3.0905052771040102, | |
| "learning_rate": 6.150154258476315e-09, | |
| "loss": 0.0795, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.9705263157894737, | |
| "grad_norm": 2.91652752925557, | |
| "learning_rate": 5.357609305692291e-09, | |
| "loss": 0.0869, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.9726315789473685, | |
| "grad_norm": 2.664322796790042, | |
| "learning_rate": 4.619684961881255e-09, | |
| "loss": 0.0603, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 1.9747368421052631, | |
| "grad_norm": 2.5227019769378507, | |
| "learning_rate": 3.936389296864129e-09, | |
| "loss": 0.0691, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.9768421052631577, | |
| "grad_norm": 3.070188327105861, | |
| "learning_rate": 3.307729783054159e-09, | |
| "loss": 0.0701, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 1.9789473684210526, | |
| "grad_norm": 2.527356227943579, | |
| "learning_rate": 2.7337132953697555e-09, | |
| "loss": 0.0543, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.9810526315789474, | |
| "grad_norm": 3.2854112794071066, | |
| "learning_rate": 2.214346111164556e-09, | |
| "loss": 0.0843, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 1.9831578947368422, | |
| "grad_norm": 2.4190595898058014, | |
| "learning_rate": 1.749633910153592e-09, | |
| "loss": 0.0584, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.9852631578947368, | |
| "grad_norm": 2.924977300546306, | |
| "learning_rate": 1.3395817743561135e-09, | |
| "loss": 0.0867, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 1.9873684210526315, | |
| "grad_norm": 2.892067107131255, | |
| "learning_rate": 9.841941880361917e-10, | |
| "loss": 0.0712, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.9894736842105263, | |
| "grad_norm": 2.507649725691115, | |
| "learning_rate": 6.834750376549793e-10, | |
| "loss": 0.0695, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.9915789473684211, | |
| "grad_norm": 3.029465353157538, | |
| "learning_rate": 4.374276118301879e-10, | |
| "loss": 0.0804, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.993684210526316, | |
| "grad_norm": 3.759725367800689, | |
| "learning_rate": 2.4605460129556446e-10, | |
| "loss": 0.0821, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 1.9957894736842106, | |
| "grad_norm": 2.500823320563419, | |
| "learning_rate": 1.0935809887702154e-10, | |
| "loss": 0.0678, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.9978947368421052, | |
| "grad_norm": 3.6772672072650834, | |
| "learning_rate": 2.733959946432663e-11, | |
| "loss": 0.0815, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.6395246965587384, | |
| "learning_rate": 0.0, | |
| "loss": 0.0596, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 950, | |
| "total_flos": 1598198317056.0, | |
| "train_loss": 0.15143464744875307, | |
| "train_runtime": 436.4524, | |
| "train_samples_per_second": 17.409, | |
| "train_steps_per_second": 2.177 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 950, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1598198317056.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |