| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 180, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0055768560474032764, |
| "grad_norm": 46.09545673843412, |
| "learning_rate": 0.0, |
| "loss": 1.7007, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.011153712094806553, |
| "grad_norm": 44.46037462061874, |
| "learning_rate": 2.7777777777777776e-07, |
| "loss": 1.6776, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01673056814220983, |
| "grad_norm": 40.73202594632966, |
| "learning_rate": 5.555555555555555e-07, |
| "loss": 1.5002, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.022307424189613106, |
| "grad_norm": 42.88149317876023, |
| "learning_rate": 8.333333333333333e-07, |
| "loss": 1.6238, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.027884280237016383, |
| "grad_norm": 42.513311455243326, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 1.678, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03346113628441966, |
| "grad_norm": 43.24081118630191, |
| "learning_rate": 1.3888888888888892e-06, |
| "loss": 1.7263, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.03903799233182294, |
| "grad_norm": 25.77384950516741, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 1.272, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.04461484837922621, |
| "grad_norm": 24.137113597713526, |
| "learning_rate": 1.944444444444445e-06, |
| "loss": 1.1886, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.050191704426629485, |
| "grad_norm": 11.629783818575346, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 1.2781, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.055768560474032766, |
| "grad_norm": 8.00658343314912, |
| "learning_rate": 2.5e-06, |
| "loss": 1.0554, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06134541652143604, |
| "grad_norm": 7.16421136479377, |
| "learning_rate": 2.7777777777777783e-06, |
| "loss": 1.172, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.06692227256883931, |
| "grad_norm": 4.137822360489128, |
| "learning_rate": 3.055555555555556e-06, |
| "loss": 1.0006, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0724991286162426, |
| "grad_norm": 2.9965048555106204, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.8589, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.07807598466364588, |
| "grad_norm": 3.1452698509974435, |
| "learning_rate": 3.6111111111111115e-06, |
| "loss": 1.0973, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.08365284071104914, |
| "grad_norm": 2.8688258039939702, |
| "learning_rate": 3.88888888888889e-06, |
| "loss": 1.2093, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.08922969675845242, |
| "grad_norm": 2.0484453697226055, |
| "learning_rate": 4.166666666666667e-06, |
| "loss": 0.9244, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0948065528058557, |
| "grad_norm": 2.4395289504901303, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 0.9074, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.10038340885325897, |
| "grad_norm": 3.3112350184991084, |
| "learning_rate": 4.722222222222222e-06, |
| "loss": 0.9056, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.10596026490066225, |
| "grad_norm": 2.2238505337460017, |
| "learning_rate": 5e-06, |
| "loss": 0.9156, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.11153712094806553, |
| "grad_norm": 1.6023122355959452, |
| "learning_rate": 4.999529926121254e-06, |
| "loss": 0.7145, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1171139769954688, |
| "grad_norm": 1.5747014721624342, |
| "learning_rate": 4.998119881260576e-06, |
| "loss": 0.9797, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.12269083304287208, |
| "grad_norm": 1.3008124678483608, |
| "learning_rate": 4.995770395678171e-06, |
| "loss": 0.8005, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.12826768909027536, |
| "grad_norm": 1.3341513132245302, |
| "learning_rate": 4.99248235291948e-06, |
| "loss": 0.9707, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.13384454513767863, |
| "grad_norm": 1.2836163377989422, |
| "learning_rate": 4.9882569894829146e-06, |
| "loss": 0.8303, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.13942140118508192, |
| "grad_norm": 1.5625132921969171, |
| "learning_rate": 4.983095894354858e-06, |
| "loss": 1.012, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1449982572324852, |
| "grad_norm": 1.3279470446330688, |
| "learning_rate": 4.977001008412113e-06, |
| "loss": 0.8311, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.15057511327988846, |
| "grad_norm": 1.1984169005450507, |
| "learning_rate": 4.969974623692023e-06, |
| "loss": 0.701, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.15615196932729175, |
| "grad_norm": 1.2333632916915551, |
| "learning_rate": 4.962019382530521e-06, |
| "loss": 0.8799, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.16172882537469502, |
| "grad_norm": 1.1133972779802328, |
| "learning_rate": 4.953138276568462e-06, |
| "loss": 0.75, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.16730568142209828, |
| "grad_norm": 1.0517214425691086, |
| "learning_rate": 4.943334645626589e-06, |
| "loss": 0.7046, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.17288253746950158, |
| "grad_norm": 0.9434629605801058, |
| "learning_rate": 4.93261217644956e-06, |
| "loss": 0.6824, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.17845939351690485, |
| "grad_norm": 1.1847049987820477, |
| "learning_rate": 4.9209749013195155e-06, |
| "loss": 0.8257, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.1840362495643081, |
| "grad_norm": 1.0458338052848428, |
| "learning_rate": 4.908427196539701e-06, |
| "loss": 0.8103, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.1896131056117114, |
| "grad_norm": 1.0107452620708213, |
| "learning_rate": 4.894973780788722e-06, |
| "loss": 0.8077, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.19518996165911467, |
| "grad_norm": 1.0732542601644082, |
| "learning_rate": 4.8806197133460385e-06, |
| "loss": 0.83, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.20076681770651794, |
| "grad_norm": 1.111873065908315, |
| "learning_rate": 4.865370392189377e-06, |
| "loss": 0.8261, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.20634367375392124, |
| "grad_norm": 0.9469124459097857, |
| "learning_rate": 4.849231551964771e-06, |
| "loss": 0.7354, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.2119205298013245, |
| "grad_norm": 0.9890209354758053, |
| "learning_rate": 4.832209261830002e-06, |
| "loss": 0.7614, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.21749738584872777, |
| "grad_norm": 0.9617476508331165, |
| "learning_rate": 4.814309923172227e-06, |
| "loss": 0.6634, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.22307424189613106, |
| "grad_norm": 0.8693853556809209, |
| "learning_rate": 4.7955402672006855e-06, |
| "loss": 0.6524, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.22865109794353433, |
| "grad_norm": 1.1164427987030467, |
| "learning_rate": 4.775907352415367e-06, |
| "loss": 0.9437, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.2342279539909376, |
| "grad_norm": 1.0342203325734225, |
| "learning_rate": 4.755418561952595e-06, |
| "loss": 0.7833, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.2398048100383409, |
| "grad_norm": 1.0381130170634878, |
| "learning_rate": 4.734081600808531e-06, |
| "loss": 0.8537, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.24538166608574416, |
| "grad_norm": 0.980064465437933, |
| "learning_rate": 4.711904492941644e-06, |
| "loss": 0.7711, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.25095852213314745, |
| "grad_norm": 2.4464904323304255, |
| "learning_rate": 4.688895578255228e-06, |
| "loss": 0.8071, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2565353781805507, |
| "grad_norm": 0.9568068666237374, |
| "learning_rate": 4.665063509461098e-06, |
| "loss": 0.7486, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.262112234227954, |
| "grad_norm": 0.9773834471749688, |
| "learning_rate": 4.640417248825667e-06, |
| "loss": 0.6626, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.26768909027535726, |
| "grad_norm": 0.9467873226243072, |
| "learning_rate": 4.614966064799603e-06, |
| "loss": 0.7737, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.2732659463227605, |
| "grad_norm": 0.929090416098371, |
| "learning_rate": 4.588719528532342e-06, |
| "loss": 0.7288, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.27884280237016384, |
| "grad_norm": 0.9558116358409616, |
| "learning_rate": 4.561687510272767e-06, |
| "loss": 0.7173, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2844196584175671, |
| "grad_norm": 1.0358016086386252, |
| "learning_rate": 4.533880175657419e-06, |
| "loss": 0.8558, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.2899965144649704, |
| "grad_norm": 0.9268356298097264, |
| "learning_rate": 4.50530798188761e-06, |
| "loss": 0.6913, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.29557337051237365, |
| "grad_norm": 0.9026491423974176, |
| "learning_rate": 4.475981673796899e-06, |
| "loss": 0.6573, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.3011502265597769, |
| "grad_norm": 0.9341984552378991, |
| "learning_rate": 4.445912279810401e-06, |
| "loss": 0.7178, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.3067270826071802, |
| "grad_norm": 0.963698636099554, |
| "learning_rate": 4.415111107797445e-06, |
| "loss": 0.672, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.3123039386545835, |
| "grad_norm": 1.0123866280188825, |
| "learning_rate": 4.3835897408191515e-06, |
| "loss": 0.7958, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.31788079470198677, |
| "grad_norm": 1.0536090394015367, |
| "learning_rate": 4.351360032772512e-06, |
| "loss": 0.8384, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.32345765074939004, |
| "grad_norm": 1.005997664037765, |
| "learning_rate": 4.318434103932622e-06, |
| "loss": 0.8511, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.3290345067967933, |
| "grad_norm": 0.9082165011415732, |
| "learning_rate": 4.284824336394748e-06, |
| "loss": 0.6731, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.33461136284419657, |
| "grad_norm": 0.9309889749980601, |
| "learning_rate": 4.250543369417921e-06, |
| "loss": 0.7276, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.34018821889159984, |
| "grad_norm": 1.7819442287605565, |
| "learning_rate": 4.215604094671835e-06, |
| "loss": 0.7831, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.34576507493900316, |
| "grad_norm": 0.9434498292744896, |
| "learning_rate": 4.180019651388807e-06, |
| "loss": 0.7503, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.3513419309864064, |
| "grad_norm": 1.0424541071897502, |
| "learning_rate": 4.14380342142266e-06, |
| "loss": 0.9153, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.3569187870338097, |
| "grad_norm": 0.9753148306326342, |
| "learning_rate": 4.106969024216348e-06, |
| "loss": 0.6555, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.36249564308121296, |
| "grad_norm": 0.964467277240988, |
| "learning_rate": 4.069530311680247e-06, |
| "loss": 0.7372, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.3680724991286162, |
| "grad_norm": 1.1418604292487458, |
| "learning_rate": 4.031501362983007e-06, |
| "loss": 0.7585, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.3736493551760195, |
| "grad_norm": 0.9125259121774503, |
| "learning_rate": 3.992896479256966e-06, |
| "loss": 0.6934, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.3792262112234228, |
| "grad_norm": 0.9183210408388318, |
| "learning_rate": 3.953730178220067e-06, |
| "loss": 0.6741, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.3848030672708261, |
| "grad_norm": 0.9841348365534922, |
| "learning_rate": 3.914017188716347e-06, |
| "loss": 0.7713, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.39037992331822935, |
| "grad_norm": 0.9397953829688301, |
| "learning_rate": 3.8737724451770155e-06, |
| "loss": 0.6898, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3959567793656326, |
| "grad_norm": 0.9360968357631917, |
| "learning_rate": 3.833011082004229e-06, |
| "loss": 0.7356, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.4015336354130359, |
| "grad_norm": 1.0490874310335498, |
| "learning_rate": 3.7917484278796578e-06, |
| "loss": 0.8834, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.40711049146043915, |
| "grad_norm": 0.8729992951101984, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.6581, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.4126873475078425, |
| "grad_norm": 0.8526096559762923, |
| "learning_rate": 3.7077814982415966e-06, |
| "loss": 0.6796, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.41826420355524574, |
| "grad_norm": 0.9123193201559427, |
| "learning_rate": 3.665108799256348e-06, |
| "loss": 0.6747, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.423841059602649, |
| "grad_norm": 0.8692473859121223, |
| "learning_rate": 3.621997950501156e-06, |
| "loss": 0.6573, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.4294179156500523, |
| "grad_norm": 0.9141140630813748, |
| "learning_rate": 3.578465164203134e-06, |
| "loss": 0.6013, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.43499477169745554, |
| "grad_norm": 1.0607296974506348, |
| "learning_rate": 3.5345268112628485e-06, |
| "loss": 0.897, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.44057162774485886, |
| "grad_norm": 1.027907705537933, |
| "learning_rate": 3.4901994150978926e-06, |
| "loss": 0.8139, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.44614848379226213, |
| "grad_norm": 0.8964908897959806, |
| "learning_rate": 3.4454996454291066e-06, |
| "loss": 0.682, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4517253398396654, |
| "grad_norm": 0.9696808820116304, |
| "learning_rate": 3.400444312011776e-06, |
| "loss": 0.7677, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.45730219588706866, |
| "grad_norm": 0.9538347785493502, |
| "learning_rate": 3.3550503583141726e-06, |
| "loss": 0.7707, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.46287905193447193, |
| "grad_norm": 0.8834434922285562, |
| "learning_rate": 3.3093348551458033e-06, |
| "loss": 0.7254, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.4684559079818752, |
| "grad_norm": 0.9664583802329054, |
| "learning_rate": 3.2633149942377835e-06, |
| "loss": 0.6009, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.4740327640292785, |
| "grad_norm": 0.9989900249921821, |
| "learning_rate": 3.217008081777726e-06, |
| "loss": 0.7277, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.4796096200766818, |
| "grad_norm": 0.9641224372984417, |
| "learning_rate": 3.1704315319015936e-06, |
| "loss": 0.7693, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.48518647612408505, |
| "grad_norm": 0.8563738741272415, |
| "learning_rate": 3.1236028601449534e-06, |
| "loss": 0.5502, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.4907633321714883, |
| "grad_norm": 0.9814456923681252, |
| "learning_rate": 3.0765396768561005e-06, |
| "loss": 0.7591, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.4963401882188916, |
| "grad_norm": 1.0031797145449588, |
| "learning_rate": 3.0292596805735275e-06, |
| "loss": 0.7336, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.5019170442662949, |
| "grad_norm": 1.0307972932412588, |
| "learning_rate": 2.9817806513702247e-06, |
| "loss": 0.8728, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5074939003136981, |
| "grad_norm": 0.9121639360068265, |
| "learning_rate": 2.9341204441673267e-06, |
| "loss": 0.7234, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.5130707563611014, |
| "grad_norm": 0.9266082791067043, |
| "learning_rate": 2.8862969820196017e-06, |
| "loss": 0.6637, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.5186476124085047, |
| "grad_norm": 0.9817424775807924, |
| "learning_rate": 2.8383282493753282e-06, |
| "loss": 0.818, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.524224468455908, |
| "grad_norm": 0.9290655450825701, |
| "learning_rate": 2.7902322853130758e-06, |
| "loss": 0.7372, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5298013245033113, |
| "grad_norm": 0.9630341129207757, |
| "learning_rate": 2.742027176757948e-06, |
| "loss": 0.786, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5353781805507145, |
| "grad_norm": 0.8736837998218376, |
| "learning_rate": 2.6937310516798276e-06, |
| "loss": 0.6546, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.5409550365981178, |
| "grad_norm": 0.970695862336814, |
| "learning_rate": 2.6453620722761897e-06, |
| "loss": 0.6716, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.546531892645521, |
| "grad_norm": 0.9311590205600201, |
| "learning_rate": 2.5969384281420425e-06, |
| "loss": 0.6955, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.5521087486929244, |
| "grad_norm": 0.9022156745489164, |
| "learning_rate": 2.548478329429561e-06, |
| "loss": 0.6765, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.5576856047403277, |
| "grad_norm": 0.9321595225209163, |
| "learning_rate": 2.5e-06, |
| "loss": 0.7459, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5632624607877309, |
| "grad_norm": 0.8642803835512484, |
| "learning_rate": 2.4515216705704396e-06, |
| "loss": 0.6295, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.5688393168351342, |
| "grad_norm": 0.8770432551457372, |
| "learning_rate": 2.403061571857958e-06, |
| "loss": 0.5756, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.5744161728825374, |
| "grad_norm": 0.986644075495802, |
| "learning_rate": 2.3546379277238107e-06, |
| "loss": 0.7433, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.5799930289299408, |
| "grad_norm": 0.9151759455914666, |
| "learning_rate": 2.3062689483201732e-06, |
| "loss": 0.6835, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.585569884977344, |
| "grad_norm": 1.0566592626672804, |
| "learning_rate": 2.2579728232420524e-06, |
| "loss": 0.7049, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.5911467410247473, |
| "grad_norm": 0.8911833907323385, |
| "learning_rate": 2.2097677146869242e-06, |
| "loss": 0.6252, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.5967235970721506, |
| "grad_norm": 3.583992947348307, |
| "learning_rate": 2.161671750624673e-06, |
| "loss": 0.7175, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.6023004531195538, |
| "grad_norm": 0.8470926199178831, |
| "learning_rate": 2.113703017980399e-06, |
| "loss": 0.6039, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.6078773091669571, |
| "grad_norm": 0.9202011923092919, |
| "learning_rate": 2.0658795558326745e-06, |
| "loss": 0.675, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.6134541652143604, |
| "grad_norm": 0.991676719260929, |
| "learning_rate": 2.0182193486297757e-06, |
| "loss": 0.8416, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6190310212617637, |
| "grad_norm": 0.9920121314417771, |
| "learning_rate": 1.970740319426474e-06, |
| "loss": 0.7869, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.624607877309167, |
| "grad_norm": 0.9147914111270489, |
| "learning_rate": 1.9234603231439e-06, |
| "loss": 0.6715, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.6301847333565702, |
| "grad_norm": 0.9960901454429568, |
| "learning_rate": 1.876397139855047e-06, |
| "loss": 0.8459, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.6357615894039735, |
| "grad_norm": 0.8398771049626784, |
| "learning_rate": 1.8295684680984064e-06, |
| "loss": 0.6327, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.6413384454513767, |
| "grad_norm": 0.8848277584910325, |
| "learning_rate": 1.7829919182222752e-06, |
| "loss": 0.6674, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6469153014987801, |
| "grad_norm": 0.9599403253441103, |
| "learning_rate": 1.7366850057622176e-06, |
| "loss": 0.7381, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.6524921575461834, |
| "grad_norm": 0.8633918886732347, |
| "learning_rate": 1.6906651448541977e-06, |
| "loss": 0.5713, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.6580690135935866, |
| "grad_norm": 0.9564730887800509, |
| "learning_rate": 1.6449496416858285e-06, |
| "loss": 0.7964, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.6636458696409899, |
| "grad_norm": 0.9220167090814314, |
| "learning_rate": 1.5995556879882246e-06, |
| "loss": 0.7074, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.6692227256883931, |
| "grad_norm": 0.9412177200866909, |
| "learning_rate": 1.5545003545708942e-06, |
| "loss": 0.6798, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6747995817357965, |
| "grad_norm": 0.8799708797193134, |
| "learning_rate": 1.509800584902108e-06, |
| "loss": 0.636, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.6803764377831997, |
| "grad_norm": 0.8746008313087483, |
| "learning_rate": 1.4654731887371524e-06, |
| "loss": 0.6517, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.685953293830603, |
| "grad_norm": 0.8622866280586909, |
| "learning_rate": 1.421534835796867e-06, |
| "loss": 0.5685, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.6915301498780063, |
| "grad_norm": 0.9161469277312331, |
| "learning_rate": 1.3780020494988447e-06, |
| "loss": 0.7142, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.6971070059254095, |
| "grad_norm": 0.8834589774119394, |
| "learning_rate": 1.3348912007436538e-06, |
| "loss": 0.6794, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7026838619728129, |
| "grad_norm": 0.8420150113363432, |
| "learning_rate": 1.2922185017584038e-06, |
| "loss": 0.5548, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.7082607180202161, |
| "grad_norm": 0.9186571271373966, |
| "learning_rate": 1.2500000000000007e-06, |
| "loss": 0.7093, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.7138375740676194, |
| "grad_norm": 0.9024224044680166, |
| "learning_rate": 1.2082515721203429e-06, |
| "loss": 0.601, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.7194144301150227, |
| "grad_norm": 0.8730115171814332, |
| "learning_rate": 1.1669889179957725e-06, |
| "loss": 0.6485, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.7249912861624259, |
| "grad_norm": 2.560680193595368, |
| "learning_rate": 1.1262275548229852e-06, |
| "loss": 0.681, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7305681422098292, |
| "grad_norm": 0.8433376742578463, |
| "learning_rate": 1.085982811283654e-06, |
| "loss": 0.6025, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.7361449982572325, |
| "grad_norm": 0.9000811460890688, |
| "learning_rate": 1.0462698217799333e-06, |
| "loss": 0.7098, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.7417218543046358, |
| "grad_norm": 0.9015484414513791, |
| "learning_rate": 1.0071035207430352e-06, |
| "loss": 0.6939, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.747298710352039, |
| "grad_norm": 0.924647234962446, |
| "learning_rate": 9.68498637016993e-07, |
| "loss": 0.7219, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.7528755663994423, |
| "grad_norm": 0.9622683692067883, |
| "learning_rate": 9.304696883197542e-07, |
| "loss": 0.7445, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7584524224468456, |
| "grad_norm": 1.0018020723323282, |
| "learning_rate": 8.930309757836517e-07, |
| "loss": 0.7285, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.7640292784942488, |
| "grad_norm": 1.0034101578791559, |
| "learning_rate": 8.561965785773413e-07, |
| "loss": 0.647, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.7696061345416522, |
| "grad_norm": 0.865650213772322, |
| "learning_rate": 8.19980348611194e-07, |
| "loss": 0.6588, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.7751829905890554, |
| "grad_norm": 0.9237880174335488, |
| "learning_rate": 7.843959053281663e-07, |
| "loss": 0.738, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.7807598466364587, |
| "grad_norm": 0.992180072952141, |
| "learning_rate": 7.494566305820788e-07, |
| "loss": 0.7533, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.786336702683862, |
| "grad_norm": 0.8845919723729968, |
| "learning_rate": 7.151756636052529e-07, |
| "loss": 0.6062, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.7919135587312652, |
| "grad_norm": 0.9575278544789321, |
| "learning_rate": 6.815658960673782e-07, |
| "loss": 0.7661, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.7974904147786686, |
| "grad_norm": 1.060097465810906, |
| "learning_rate": 6.48639967227489e-07, |
| "loss": 0.7093, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.8030672708260718, |
| "grad_norm": 1.1137158210751135, |
| "learning_rate": 6.164102591808482e-07, |
| "loss": 0.6516, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.8086441268734751, |
| "grad_norm": 0.9467474421643487, |
| "learning_rate": 5.848888922025553e-07, |
| "loss": 0.7106, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.8142209829208783, |
| "grad_norm": 0.977712019438005, |
| "learning_rate": 5.540877201896e-07, |
| "loss": 0.6485, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.8197978389682816, |
| "grad_norm": 1.2725548643418227, |
| "learning_rate": 5.240183262031021e-07, |
| "loss": 0.7106, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.825374695015685, |
| "grad_norm": 0.9531375340902994, |
| "learning_rate": 4.946920181123904e-07, |
| "loss": 0.6352, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.8309515510630882, |
| "grad_norm": 0.8239496228158747, |
| "learning_rate": 4.661198243425813e-07, |
| "loss": 0.5812, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.8365284071104915, |
| "grad_norm": 1.4768742600927571, |
| "learning_rate": 4.383124897272331e-07, |
| "loss": 0.825, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 0.8936174846742114, |
| "learning_rate": 4.1128047146765936e-07, |
| "loss": 0.7137, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.847682119205298, |
| "grad_norm": 0.868290680328461, |
| "learning_rate": 3.8503393520039734e-07, |
| "loss": 0.646, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.8532589752527013, |
| "grad_norm": 0.9170777878398306, |
| "learning_rate": 3.595827511743341e-07, |
| "loss": 0.6338, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.8588358313001045, |
| "grad_norm": 0.8709479986895221, |
| "learning_rate": 3.3493649053890325e-07, |
| "loss": 0.6942, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.8644126873475079, |
| "grad_norm": 1.015273029250277, |
| "learning_rate": 3.111044217447731e-07, |
| "loss": 0.8455, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.8699895433949111, |
| "grad_norm": 1.8953728977728321, |
| "learning_rate": 2.880955070583555e-07, |
| "loss": 0.8089, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.8755663994423144, |
| "grad_norm": 1.0266628340926212, |
| "learning_rate": 2.6591839919146963e-07, |
| "loss": 0.6747, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.8811432554897177, |
| "grad_norm": 1.0638475327851682, |
| "learning_rate": 2.445814380474057e-07, |
| "loss": 0.6979, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.8867201115371209, |
| "grad_norm": 0.9287481759641896, |
| "learning_rate": 2.240926475846336e-07, |
| "loss": 0.7963, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.8922969675845243, |
| "grad_norm": 0.8785430203193436, |
| "learning_rate": 2.044597327993153e-07, |
| "loss": 0.6534, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8978738236319275, |
| "grad_norm": 0.953415594296327, |
| "learning_rate": 1.8569007682777417e-07, |
| "loss": 0.7474, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.9034506796793308, |
| "grad_norm": 0.9294478803532011, |
| "learning_rate": 1.6779073816999864e-07, |
| "loss": 0.7906, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.909027535726734, |
| "grad_norm": 0.8384613909869523, |
| "learning_rate": 1.507684480352292e-07, |
| "loss": 0.6377, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.9146043917741373, |
| "grad_norm": 0.8835908163025235, |
| "learning_rate": 1.3462960781062433e-07, |
| "loss": 0.6392, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.9201812478215406, |
| "grad_norm": 0.8449058090284122, |
| "learning_rate": 1.1938028665396172e-07, |
| "loss": 0.5656, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.9257581038689439, |
| "grad_norm": 0.9392723321505706, |
| "learning_rate": 1.0502621921127776e-07, |
| "loss": 0.7239, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.9313349599163472, |
| "grad_norm": 0.8758199104024277, |
| "learning_rate": 9.157280346029918e-08, |
| "loss": 0.6666, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.9369118159637504, |
| "grad_norm": 0.8830370044979928, |
| "learning_rate": 7.902509868048552e-08, |
| "loss": 0.6846, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.9424886720111537, |
| "grad_norm": 0.8870739682602913, |
| "learning_rate": 6.738782355044048e-08, |
| "loss": 0.7071, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.948065528058557, |
| "grad_norm": 0.8732031194960946, |
| "learning_rate": 5.6665354373411085e-08, |
| "loss": 0.7037, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9536423841059603, |
| "grad_norm": 0.9251119129299412, |
| "learning_rate": 4.6861723431538273e-08, |
| "loss": 0.6949, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.9592192401533636, |
| "grad_norm": 0.8254069704702977, |
| "learning_rate": 3.798061746947995e-08, |
| "loss": 0.5747, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.9647960962007668, |
| "grad_norm": 0.8556828230063313, |
| "learning_rate": 3.0025376307977474e-08, |
| "loss": 0.6367, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.9703729522481701, |
| "grad_norm": 0.8964787284563904, |
| "learning_rate": 2.299899158788671e-08, |
| "loss": 0.6943, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.9759498082955733, |
| "grad_norm": 0.8529908965639459, |
| "learning_rate": 1.6904105645142443e-08, |
| "loss": 0.6373, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.9815266643429766, |
| "grad_norm": 0.8934738534392561, |
| "learning_rate": 1.1743010517085428e-08, |
| "loss": 0.6968, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.98710352039038, |
| "grad_norm": 0.9302061985515149, |
| "learning_rate": 7.517647080519941e-09, |
| "loss": 0.7773, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.9926803764377832, |
| "grad_norm": 0.9858177539703182, |
| "learning_rate": 4.229604321829561e-09, |
| "loss": 0.7393, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.9982572324851865, |
| "grad_norm": 0.9326264974645483, |
| "learning_rate": 1.8801187394248966e-09, |
| "loss": 0.7246, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.9326264974645483, |
| "learning_rate": 4.700738787466463e-10, |
| "loss": 0.9, |
| "step": 180 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 180, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 27643248377856.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|