| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.9891808346213296, |
| "eval_steps": 500, |
| "global_step": 805, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0061823802163833074, |
| "grad_norm": 10.693047278702467, |
| "learning_rate": 9.876543209876544e-07, |
| "loss": 1.6775, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.012364760432766615, |
| "grad_norm": 10.815416954217403, |
| "learning_rate": 1.9753086419753087e-06, |
| "loss": 1.6824, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01854714064914992, |
| "grad_norm": 10.616338898894972, |
| "learning_rate": 2.962962962962963e-06, |
| "loss": 1.6745, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.02472952086553323, |
| "grad_norm": 9.861459123224664, |
| "learning_rate": 3.9506172839506175e-06, |
| "loss": 1.6421, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.030911901081916538, |
| "grad_norm": 7.614357546616699, |
| "learning_rate": 4.938271604938272e-06, |
| "loss": 1.5691, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03709428129829984, |
| "grad_norm": 4.203625583627038, |
| "learning_rate": 5.925925925925926e-06, |
| "loss": 1.5004, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.04327666151468315, |
| "grad_norm": 3.5662282715378195, |
| "learning_rate": 6.913580246913581e-06, |
| "loss": 1.4777, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.04945904173106646, |
| "grad_norm": 5.739743938641111, |
| "learning_rate": 7.901234567901235e-06, |
| "loss": 1.4747, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.05564142194744977, |
| "grad_norm": 6.180000116352135, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 1.4418, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.061823802163833076, |
| "grad_norm": 6.739405843412405, |
| "learning_rate": 9.876543209876543e-06, |
| "loss": 1.3952, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06800618238021638, |
| "grad_norm": 7.52997521103117, |
| "learning_rate": 1.0864197530864198e-05, |
| "loss": 1.3959, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.07418856259659969, |
| "grad_norm": 6.385085000826414, |
| "learning_rate": 1.1851851851851852e-05, |
| "loss": 1.363, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.080370942812983, |
| "grad_norm": 5.007200781174352, |
| "learning_rate": 1.2839506172839507e-05, |
| "loss": 1.3392, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0865533230293663, |
| "grad_norm": 4.058377749470976, |
| "learning_rate": 1.3827160493827162e-05, |
| "loss": 1.3045, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.09273570324574962, |
| "grad_norm": 3.5316422870540793, |
| "learning_rate": 1.4814814814814815e-05, |
| "loss": 1.2877, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.09891808346213292, |
| "grad_norm": 3.842638477890943, |
| "learning_rate": 1.580246913580247e-05, |
| "loss": 1.2683, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.10510046367851623, |
| "grad_norm": 3.1304998105929815, |
| "learning_rate": 1.6790123456790123e-05, |
| "loss": 1.2722, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.11128284389489954, |
| "grad_norm": 1.831350110427916, |
| "learning_rate": 1.7777777777777777e-05, |
| "loss": 1.2427, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.11746522411128284, |
| "grad_norm": 2.1448411883286593, |
| "learning_rate": 1.8765432098765433e-05, |
| "loss": 1.238, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.12364760432766615, |
| "grad_norm": 2.0898956083682263, |
| "learning_rate": 1.9753086419753087e-05, |
| "loss": 1.2214, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12982998454404945, |
| "grad_norm": 1.5783782113338631, |
| "learning_rate": 2.074074074074074e-05, |
| "loss": 1.21, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.13601236476043277, |
| "grad_norm": 1.7643935663724537, |
| "learning_rate": 2.1728395061728397e-05, |
| "loss": 1.1865, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.14219474497681608, |
| "grad_norm": 2.0831771497504783, |
| "learning_rate": 2.271604938271605e-05, |
| "loss": 1.195, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.14837712519319937, |
| "grad_norm": 1.7488536795684462, |
| "learning_rate": 2.3703703703703703e-05, |
| "loss": 1.1696, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.1545595054095827, |
| "grad_norm": 1.5931909595273852, |
| "learning_rate": 2.469135802469136e-05, |
| "loss": 1.1892, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.160741885625966, |
| "grad_norm": 1.396445568089242, |
| "learning_rate": 2.5679012345679013e-05, |
| "loss": 1.1793, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.16692426584234932, |
| "grad_norm": 1.0145848862012221, |
| "learning_rate": 2.6666666666666667e-05, |
| "loss": 1.1664, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.1731066460587326, |
| "grad_norm": 2.4324574107146226, |
| "learning_rate": 2.7654320987654323e-05, |
| "loss": 1.1573, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.17928902627511592, |
| "grad_norm": 1.4236022981017393, |
| "learning_rate": 2.8641975308641977e-05, |
| "loss": 1.1501, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.18547140649149924, |
| "grad_norm": 2.1562503786156153, |
| "learning_rate": 2.962962962962963e-05, |
| "loss": 1.1608, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.19165378670788252, |
| "grad_norm": 1.9018824527792042, |
| "learning_rate": 3.061728395061729e-05, |
| "loss": 1.1451, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.19783616692426584, |
| "grad_norm": 1.5703835911829096, |
| "learning_rate": 3.160493827160494e-05, |
| "loss": 1.1408, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.20401854714064915, |
| "grad_norm": 8.652718407803869, |
| "learning_rate": 3.259259259259259e-05, |
| "loss": 1.1346, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.21020092735703247, |
| "grad_norm": 6.435439968019908, |
| "learning_rate": 3.3580246913580247e-05, |
| "loss": 1.1582, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.21638330757341576, |
| "grad_norm": 105.3495618548623, |
| "learning_rate": 3.45679012345679e-05, |
| "loss": 1.1558, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.22256568778979907, |
| "grad_norm": 2.5364224403663056, |
| "learning_rate": 3.555555555555555e-05, |
| "loss": 1.1431, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.2287480680061824, |
| "grad_norm": 1.9024835350187577, |
| "learning_rate": 3.654320987654321e-05, |
| "loss": 1.1463, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.23493044822256567, |
| "grad_norm": 2.0426678600887254, |
| "learning_rate": 3.7530864197530867e-05, |
| "loss": 1.1307, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.241112828438949, |
| "grad_norm": 1.8582429580354745, |
| "learning_rate": 3.851851851851852e-05, |
| "loss": 1.1272, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.2472952086553323, |
| "grad_norm": 1.860203917219744, |
| "learning_rate": 3.950617283950617e-05, |
| "loss": 1.1137, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2534775888717156, |
| "grad_norm": 10.27861275542122, |
| "learning_rate": 4.049382716049383e-05, |
| "loss": 1.1247, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.2596599690880989, |
| "grad_norm": 5.066950295263267, |
| "learning_rate": 4.148148148148148e-05, |
| "loss": 1.1334, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.26584234930448225, |
| "grad_norm": 3.502140270278529, |
| "learning_rate": 4.246913580246914e-05, |
| "loss": 1.1114, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.27202472952086554, |
| "grad_norm": 3.203814615327836, |
| "learning_rate": 4.345679012345679e-05, |
| "loss": 1.1182, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.2782071097372488, |
| "grad_norm": 2.4989709311447954, |
| "learning_rate": 4.444444444444445e-05, |
| "loss": 1.1157, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.28438948995363217, |
| "grad_norm": 3.2014585800746813, |
| "learning_rate": 4.54320987654321e-05, |
| "loss": 1.1158, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.29057187017001546, |
| "grad_norm": 2.3255133597202073, |
| "learning_rate": 4.641975308641976e-05, |
| "loss": 1.1214, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.29675425038639874, |
| "grad_norm": 3.340427993360326, |
| "learning_rate": 4.7407407407407407e-05, |
| "loss": 1.1072, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.3029366306027821, |
| "grad_norm": 2.6053156370494137, |
| "learning_rate": 4.8395061728395067e-05, |
| "loss": 1.1125, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.3091190108191654, |
| "grad_norm": 3.416491408495564, |
| "learning_rate": 4.938271604938272e-05, |
| "loss": 1.1076, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.31530139103554866, |
| "grad_norm": 3.0240291240770016, |
| "learning_rate": 5.037037037037037e-05, |
| "loss": 1.1039, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.321483771251932, |
| "grad_norm": 2.6974546966855173, |
| "learning_rate": 5.1358024691358027e-05, |
| "loss": 1.1083, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.3276661514683153, |
| "grad_norm": 2.1712600314640174, |
| "learning_rate": 5.234567901234569e-05, |
| "loss": 1.0969, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.33384853168469864, |
| "grad_norm": 2.43061865655575, |
| "learning_rate": 5.333333333333333e-05, |
| "loss": 1.1009, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.3400309119010819, |
| "grad_norm": 1.4299217962960098, |
| "learning_rate": 5.432098765432099e-05, |
| "loss": 1.1071, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.3462132921174652, |
| "grad_norm": 3.4472743528033942, |
| "learning_rate": 5.5308641975308647e-05, |
| "loss": 1.1034, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.35239567233384855, |
| "grad_norm": 2.2533692308911486, |
| "learning_rate": 5.62962962962963e-05, |
| "loss": 1.0926, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.35857805255023184, |
| "grad_norm": 3.124786132702191, |
| "learning_rate": 5.728395061728395e-05, |
| "loss": 1.0998, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.36476043276661513, |
| "grad_norm": 2.9945765774059847, |
| "learning_rate": 5.827160493827161e-05, |
| "loss": 1.0936, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.37094281298299847, |
| "grad_norm": 1.9029030448259223, |
| "learning_rate": 5.925925925925926e-05, |
| "loss": 1.0865, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.37712519319938176, |
| "grad_norm": 2.5191915533993434, |
| "learning_rate": 6.024691358024692e-05, |
| "loss": 1.094, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.38330757341576505, |
| "grad_norm": 2.377743288984034, |
| "learning_rate": 6.123456790123457e-05, |
| "loss": 1.0724, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.3894899536321484, |
| "grad_norm": 3.418329883653554, |
| "learning_rate": 6.222222222222223e-05, |
| "loss": 1.1097, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.3956723338485317, |
| "grad_norm": 1.8243621455646457, |
| "learning_rate": 6.320987654320988e-05, |
| "loss": 1.0972, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.401854714064915, |
| "grad_norm": 3.3653254310211804, |
| "learning_rate": 6.419753086419754e-05, |
| "loss": 1.0954, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.4080370942812983, |
| "grad_norm": 2.859591151453805, |
| "learning_rate": 6.518518518518519e-05, |
| "loss": 1.0887, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.4142194744976816, |
| "grad_norm": 2.132914874530937, |
| "learning_rate": 6.617283950617285e-05, |
| "loss": 1.0847, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.42040185471406494, |
| "grad_norm": 3.0306427822925386, |
| "learning_rate": 6.716049382716049e-05, |
| "loss": 1.1009, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.4265842349304482, |
| "grad_norm": 2.0609891283339477, |
| "learning_rate": 6.814814814814815e-05, |
| "loss": 1.092, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.4327666151468315, |
| "grad_norm": 2.9169289764667603, |
| "learning_rate": 6.91358024691358e-05, |
| "loss": 1.0721, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.43894899536321486, |
| "grad_norm": 2.170819034455064, |
| "learning_rate": 7.012345679012346e-05, |
| "loss": 1.0752, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.44513137557959814, |
| "grad_norm": 2.6561925596391256, |
| "learning_rate": 7.11111111111111e-05, |
| "loss": 1.0763, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.45131375579598143, |
| "grad_norm": 2.0412946622837453, |
| "learning_rate": 7.209876543209877e-05, |
| "loss": 1.0855, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.4574961360123648, |
| "grad_norm": 1.7224050848012167, |
| "learning_rate": 7.308641975308643e-05, |
| "loss": 1.0781, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.46367851622874806, |
| "grad_norm": 2.0509259917647564, |
| "learning_rate": 7.407407407407409e-05, |
| "loss": 1.0864, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.46986089644513135, |
| "grad_norm": 3.9723221221808225, |
| "learning_rate": 7.506172839506173e-05, |
| "loss": 1.077, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.4760432766615147, |
| "grad_norm": 2.5313197271865464, |
| "learning_rate": 7.60493827160494e-05, |
| "loss": 1.0873, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.482225656877898, |
| "grad_norm": 2.6058497426158342, |
| "learning_rate": 7.703703703703704e-05, |
| "loss": 1.0912, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.4884080370942813, |
| "grad_norm": 4.045410961666709, |
| "learning_rate": 7.80246913580247e-05, |
| "loss": 1.0905, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.4945904173106646, |
| "grad_norm": 2.085038720402762, |
| "learning_rate": 7.901234567901235e-05, |
| "loss": 1.0796, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.500772797527048, |
| "grad_norm": 4.492244112618374, |
| "learning_rate": 8e-05, |
| "loss": 1.094, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.5069551777434312, |
| "grad_norm": 3.6362050618093122, |
| "learning_rate": 7.999962342461635e-05, |
| "loss": 1.1031, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.5131375579598145, |
| "grad_norm": 3.1038756568216317, |
| "learning_rate": 7.999849370555585e-05, |
| "loss": 1.0726, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.5193199381761978, |
| "grad_norm": 2.526971760311998, |
| "learning_rate": 7.999661086408972e-05, |
| "loss": 1.0824, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.5255023183925811, |
| "grad_norm": 1.8821681779156758, |
| "learning_rate": 7.999397493566954e-05, |
| "loss": 1.0752, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.5316846986089645, |
| "grad_norm": 2.3448656117874913, |
| "learning_rate": 7.99905859699266e-05, |
| "loss": 1.0908, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.5378670788253478, |
| "grad_norm": 2.0611523416179542, |
| "learning_rate": 7.998644403067097e-05, |
| "loss": 1.0721, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.5440494590417311, |
| "grad_norm": 2.9983162968300983, |
| "learning_rate": 7.998154919589024e-05, |
| "loss": 1.0776, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.5502318392581144, |
| "grad_norm": 2.1806569564777987, |
| "learning_rate": 7.997590155774815e-05, |
| "loss": 1.0733, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.5564142194744977, |
| "grad_norm": 2.1571411866423533, |
| "learning_rate": 7.996950122258277e-05, |
| "loss": 1.0577, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5625965996908809, |
| "grad_norm": 3.999074777477232, |
| "learning_rate": 7.996234831090451e-05, |
| "loss": 1.0793, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.5687789799072643, |
| "grad_norm": 2.2312489091174172, |
| "learning_rate": 7.995444295739394e-05, |
| "loss": 1.0775, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.5749613601236476, |
| "grad_norm": 6.134653577370118, |
| "learning_rate": 7.994578531089908e-05, |
| "loss": 1.0927, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.5811437403400309, |
| "grad_norm": 6.274513062714145, |
| "learning_rate": 7.99363755344328e-05, |
| "loss": 1.1068, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5873261205564142, |
| "grad_norm": 2.3500100638359287, |
| "learning_rate": 7.99262138051696e-05, |
| "loss": 1.0885, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5935085007727975, |
| "grad_norm": 4.48097545076724, |
| "learning_rate": 7.991530031444231e-05, |
| "loss": 1.0838, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.5996908809891809, |
| "grad_norm": 3.749050135435733, |
| "learning_rate": 7.990363526773856e-05, |
| "loss": 1.0947, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.6058732612055642, |
| "grad_norm": 2.7226697465582674, |
| "learning_rate": 7.98912188846968e-05, |
| "loss": 1.0835, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.6120556414219475, |
| "grad_norm": 2.7748343898198073, |
| "learning_rate": 7.987805139910226e-05, |
| "loss": 1.0601, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.6182380216383307, |
| "grad_norm": 2.0786601854908087, |
| "learning_rate": 7.986413305888247e-05, |
| "loss": 1.0628, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.624420401854714, |
| "grad_norm": 3.0450749679460842, |
| "learning_rate": 7.984946412610265e-05, |
| "loss": 1.0746, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.6306027820710973, |
| "grad_norm": 2.0666897869515273, |
| "learning_rate": 7.983404487696076e-05, |
| "loss": 1.0712, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.6367851622874807, |
| "grad_norm": 3.3981180649215212, |
| "learning_rate": 7.981787560178227e-05, |
| "loss": 1.0555, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.642967542503864, |
| "grad_norm": 2.793840699546338, |
| "learning_rate": 7.980095660501473e-05, |
| "loss": 1.0705, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.6491499227202473, |
| "grad_norm": 3.0934299956993754, |
| "learning_rate": 7.978328820522204e-05, |
| "loss": 1.0593, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.6553323029366306, |
| "grad_norm": 2.2997166417283186, |
| "learning_rate": 7.976487073507839e-05, |
| "loss": 1.0485, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.6615146831530139, |
| "grad_norm": 3.3687080136243903, |
| "learning_rate": 7.974570454136212e-05, |
| "loss": 1.0623, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.6676970633693973, |
| "grad_norm": 2.668689204618569, |
| "learning_rate": 7.972578998494903e-05, |
| "loss": 1.06, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.6738794435857806, |
| "grad_norm": 3.259938106278256, |
| "learning_rate": 7.970512744080571e-05, |
| "loss": 1.0646, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.6800618238021638, |
| "grad_norm": 2.539712415638497, |
| "learning_rate": 7.968371729798246e-05, |
| "loss": 1.0578, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6862442040185471, |
| "grad_norm": 2.875087869638967, |
| "learning_rate": 7.96615599596059e-05, |
| "loss": 1.0495, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.6924265842349304, |
| "grad_norm": 2.171964418083895, |
| "learning_rate": 7.963865584287142e-05, |
| "loss": 1.0482, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.6986089644513137, |
| "grad_norm": 2.4445515677858776, |
| "learning_rate": 7.96150053790354e-05, |
| "loss": 1.0478, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.7047913446676971, |
| "grad_norm": 1.4736364847794041, |
| "learning_rate": 7.959060901340691e-05, |
| "loss": 1.0644, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.7109737248840804, |
| "grad_norm": 3.2793219071142574, |
| "learning_rate": 7.956546720533952e-05, |
| "loss": 1.0581, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.7171561051004637, |
| "grad_norm": 2.1935733687249157, |
| "learning_rate": 7.953958042822252e-05, |
| "loss": 1.0577, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.723338485316847, |
| "grad_norm": 1.9736309619709513, |
| "learning_rate": 7.951294916947206e-05, |
| "loss": 1.0562, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.7295208655332303, |
| "grad_norm": 2.1690441232660724, |
| "learning_rate": 7.948557393052195e-05, |
| "loss": 1.052, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.7357032457496137, |
| "grad_norm": 2.703798771440302, |
| "learning_rate": 7.945745522681429e-05, |
| "loss": 1.0645, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.7418856259659969, |
| "grad_norm": 2.6860226290594427, |
| "learning_rate": 7.942859358778961e-05, |
| "loss": 1.0469, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7480680061823802, |
| "grad_norm": 1.2516089611393217, |
| "learning_rate": 7.939898955687709e-05, |
| "loss": 1.0442, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.7542503863987635, |
| "grad_norm": 3.4267893638027878, |
| "learning_rate": 7.936864369148418e-05, |
| "loss": 1.0608, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.7604327666151468, |
| "grad_norm": 2.225834600604942, |
| "learning_rate": 7.933755656298616e-05, |
| "loss": 1.0668, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.7666151468315301, |
| "grad_norm": 1.6048920414465118, |
| "learning_rate": 7.930572875671542e-05, |
| "loss": 1.046, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.7727975270479135, |
| "grad_norm": 3.4705093098033957, |
| "learning_rate": 7.927316087195035e-05, |
| "loss": 1.0703, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7789799072642968, |
| "grad_norm": 2.280254489129258, |
| "learning_rate": 7.923985352190416e-05, |
| "loss": 1.0632, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.7851622874806801, |
| "grad_norm": 2.8077251065232884, |
| "learning_rate": 7.920580733371325e-05, |
| "loss": 1.0637, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.7913446676970634, |
| "grad_norm": 2.79897007061372, |
| "learning_rate": 7.917102294842545e-05, |
| "loss": 1.0511, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.7975270479134466, |
| "grad_norm": 2.356217446775361, |
| "learning_rate": 7.913550102098789e-05, |
| "loss": 1.0621, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.80370942812983, |
| "grad_norm": 2.837553637508471, |
| "learning_rate": 7.909924222023478e-05, |
| "loss": 1.0498, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.8098918083462133, |
| "grad_norm": 2.074863492575953, |
| "learning_rate": 7.906224722887468e-05, |
| "loss": 1.0555, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.8160741885625966, |
| "grad_norm": 3.906632921125056, |
| "learning_rate": 7.902451674347778e-05, |
| "loss": 1.0547, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.8222565687789799, |
| "grad_norm": 3.098700348501345, |
| "learning_rate": 7.898605147446262e-05, |
| "loss": 1.0459, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.8284389489953632, |
| "grad_norm": 2.847790553655261, |
| "learning_rate": 7.894685214608293e-05, |
| "loss": 1.0442, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.8346213292117465, |
| "grad_norm": 2.908201865083222, |
| "learning_rate": 7.89069194964138e-05, |
| "loss": 1.0553, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.8408037094281299, |
| "grad_norm": 2.1751989062888146, |
| "learning_rate": 7.886625427733786e-05, |
| "loss": 1.0479, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.8469860896445132, |
| "grad_norm": 1.729338903416391, |
| "learning_rate": 7.882485725453114e-05, |
| "loss": 1.0315, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.8531684698608965, |
| "grad_norm": 2.2703882009096827, |
| "learning_rate": 7.878272920744862e-05, |
| "loss": 1.0496, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.8593508500772797, |
| "grad_norm": 1.3426280628545364, |
| "learning_rate": 7.87398709293096e-05, |
| "loss": 1.0407, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.865533230293663, |
| "grad_norm": 2.68728781786679, |
| "learning_rate": 7.869628322708269e-05, |
| "loss": 1.0551, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8717156105100463, |
| "grad_norm": 1.7262218207414903, |
| "learning_rate": 7.865196692147066e-05, |
| "loss": 1.0483, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.8778979907264297, |
| "grad_norm": 2.5755795419575156, |
| "learning_rate": 7.860692284689502e-05, |
| "loss": 1.0657, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.884080370942813, |
| "grad_norm": 2.1034039247877856, |
| "learning_rate": 7.856115185148025e-05, |
| "loss": 1.056, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.8902627511591963, |
| "grad_norm": 1.9719606878351836, |
| "learning_rate": 7.851465479703785e-05, |
| "loss": 1.0549, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.8964451313755796, |
| "grad_norm": 2.066417220460305, |
| "learning_rate": 7.846743255905014e-05, |
| "loss": 1.0483, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.9026275115919629, |
| "grad_norm": 1.7106893433354307, |
| "learning_rate": 7.841948602665373e-05, |
| "loss": 1.0423, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.9088098918083463, |
| "grad_norm": 2.4989235217550676, |
| "learning_rate": 7.83708161026228e-05, |
| "loss": 1.0486, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.9149922720247295, |
| "grad_norm": 2.354839754481779, |
| "learning_rate": 7.832142370335215e-05, |
| "loss": 1.0445, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.9211746522411128, |
| "grad_norm": 1.702601699332555, |
| "learning_rate": 7.827130975883982e-05, |
| "loss": 1.0206, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.9273570324574961, |
| "grad_norm": 3.284158052097331, |
| "learning_rate": 7.822047521266973e-05, |
| "loss": 1.028, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9335394126738794, |
| "grad_norm": 2.5496411598803648, |
| "learning_rate": 7.816892102199382e-05, |
| "loss": 1.0367, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.9397217928902627, |
| "grad_norm": 1.842181874139785, |
| "learning_rate": 7.811664815751404e-05, |
| "loss": 1.0415, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.9459041731066461, |
| "grad_norm": 3.2108389564617887, |
| "learning_rate": 7.806365760346409e-05, |
| "loss": 1.0379, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.9520865533230294, |
| "grad_norm": 1.8227204697842778, |
| "learning_rate": 7.800995035759087e-05, |
| "loss": 1.0498, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.9582689335394127, |
| "grad_norm": 3.4947702832902476, |
| "learning_rate": 7.795552743113573e-05, |
| "loss": 1.0479, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.964451313755796, |
| "grad_norm": 1.8114431864984537, |
| "learning_rate": 7.79003898488154e-05, |
| "loss": 1.0313, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.9706336939721792, |
| "grad_norm": 3.535447837519456, |
| "learning_rate": 7.784453864880267e-05, |
| "loss": 1.0474, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.9768160741885626, |
| "grad_norm": 1.7365198353975386, |
| "learning_rate": 7.77879748827069e-05, |
| "loss": 1.0511, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.9829984544049459, |
| "grad_norm": 3.594055596899371, |
| "learning_rate": 7.77306996155542e-05, |
| "loss": 1.0603, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.9891808346213292, |
| "grad_norm": 2.5313017940876086, |
| "learning_rate": 7.767271392576732e-05, |
| "loss": 1.0626, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9953632148377125, |
| "grad_norm": 3.40567680255142, |
| "learning_rate": 7.761401890514547e-05, |
| "loss": 1.0638, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.0046367851622875, |
| "grad_norm": 4.5174674178938945, |
| "learning_rate": 7.755461565884362e-05, |
| "loss": 1.8415, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.010819165378671, |
| "grad_norm": 3.0038968679186957, |
| "learning_rate": 7.749450530535179e-05, |
| "loss": 1.0416, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.017001545595054, |
| "grad_norm": 2.217689466421883, |
| "learning_rate": 7.743368897647395e-05, |
| "loss": 1.0335, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.0231839258114375, |
| "grad_norm": 3.387468691972088, |
| "learning_rate": 7.737216781730673e-05, |
| "loss": 1.0285, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.0293663060278206, |
| "grad_norm": 2.9834708542560433, |
| "learning_rate": 7.730994298621781e-05, |
| "loss": 1.0462, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.035548686244204, |
| "grad_norm": 2.33967613172349, |
| "learning_rate": 7.72470156548242e-05, |
| "loss": 1.0315, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.0417310664605872, |
| "grad_norm": 1.98032140135135, |
| "learning_rate": 7.718338700797007e-05, |
| "loss": 1.021, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.0479134466769706, |
| "grad_norm": 1.8602974186471763, |
| "learning_rate": 7.711905824370457e-05, |
| "loss": 1.0253, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.054095826893354, |
| "grad_norm": 1.571944862888458, |
| "learning_rate": 7.70540305732591e-05, |
| "loss": 1.0096, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.0602782071097372, |
| "grad_norm": 1.2148625487373597, |
| "learning_rate": 7.69883052210247e-05, |
| "loss": 1.0156, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.0664605873261206, |
| "grad_norm": 2.1277306224703865, |
| "learning_rate": 7.692188342452885e-05, |
| "loss": 1.0201, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.0726429675425038, |
| "grad_norm": 1.92583372296243, |
| "learning_rate": 7.685476643441219e-05, |
| "loss": 1.0207, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.0788253477588872, |
| "grad_norm": 1.8133100096935075, |
| "learning_rate": 7.678695551440506e-05, |
| "loss": 1.0181, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.0850077279752706, |
| "grad_norm": 1.4080085200242758, |
| "learning_rate": 7.671845194130363e-05, |
| "loss": 1.0219, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.0911901081916537, |
| "grad_norm": 2.2832079127892326, |
| "learning_rate": 7.664925700494585e-05, |
| "loss": 1.0199, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.0973724884080371, |
| "grad_norm": 2.150797505157169, |
| "learning_rate": 7.657937200818722e-05, |
| "loss": 1.0173, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.1035548686244203, |
| "grad_norm": 0.8849916762716471, |
| "learning_rate": 7.65087982668762e-05, |
| "loss": 1.005, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.1097372488408037, |
| "grad_norm": 3.229134861969822, |
| "learning_rate": 7.643753710982949e-05, |
| "loss": 1.0133, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.1159196290571871, |
| "grad_norm": 1.7293989048925642, |
| "learning_rate": 7.636558987880694e-05, |
| "loss": 1.0353, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.1221020092735703, |
| "grad_norm": 2.7325244810866747, |
| "learning_rate": 7.629295792848639e-05, |
| "loss": 1.0188, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.1282843894899537, |
| "grad_norm": 1.9813373605063669, |
| "learning_rate": 7.621964262643805e-05, |
| "loss": 1.0331, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.1344667697063369, |
| "grad_norm": 1.9157662111283729, |
| "learning_rate": 7.614564535309882e-05, |
| "loss": 0.9959, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.1406491499227203, |
| "grad_norm": 2.0892926569694685, |
| "learning_rate": 7.607096750174629e-05, |
| "loss": 1.0097, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.1468315301391034, |
| "grad_norm": 1.821781934019597, |
| "learning_rate": 7.599561047847247e-05, |
| "loss": 1.0213, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.1530139103554868, |
| "grad_norm": 1.8556200827944644, |
| "learning_rate": 7.591957570215735e-05, |
| "loss": 1.0084, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.1591962905718702, |
| "grad_norm": 1.3594082674263888, |
| "learning_rate": 7.584286460444222e-05, |
| "loss": 1.0044, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.1653786707882534, |
| "grad_norm": 2.2744478173451594, |
| "learning_rate": 7.576547862970261e-05, |
| "loss": 0.993, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.1715610510046368, |
| "grad_norm": 1.3757650290844639, |
| "learning_rate": 7.568741923502118e-05, |
| "loss": 1.0055, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.1777434312210202, |
| "grad_norm": 1.854325739268096, |
| "learning_rate": 7.560868789016024e-05, |
| "loss": 1.0125, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.1839258114374034, |
| "grad_norm": 1.5111560222547549, |
| "learning_rate": 7.552928607753414e-05, |
| "loss": 1.0005, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.1901081916537868, |
| "grad_norm": 1.811472101825589, |
| "learning_rate": 7.544921529218125e-05, |
| "loss": 1.004, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.19629057187017, |
| "grad_norm": 1.4223817808791495, |
| "learning_rate": 7.536847704173593e-05, |
| "loss": 1.0014, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.2024729520865534, |
| "grad_norm": 1.9843008102793314, |
| "learning_rate": 7.528707284640004e-05, |
| "loss": 1.0142, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.2086553323029365, |
| "grad_norm": 1.5650426744555666, |
| "learning_rate": 7.520500423891442e-05, |
| "loss": 1.0053, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.21483771251932, |
| "grad_norm": 1.5008249395490572, |
| "learning_rate": 7.512227276452989e-05, |
| "loss": 1.0001, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.2210200927357033, |
| "grad_norm": 1.8158622037471164, |
| "learning_rate": 7.503887998097833e-05, |
| "loss": 1.0046, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.2272024729520865, |
| "grad_norm": 1.594408802837267, |
| "learning_rate": 7.495482745844317e-05, |
| "loss": 0.9926, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.23338485316847, |
| "grad_norm": 1.6151725290749885, |
| "learning_rate": 7.487011677953e-05, |
| "loss": 1.0135, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.239567233384853, |
| "grad_norm": 1.6326603710438912, |
| "learning_rate": 7.478474953923662e-05, |
| "loss": 1.0011, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.2457496136012365, |
| "grad_norm": 1.8016191114443714, |
| "learning_rate": 7.469872734492308e-05, |
| "loss": 0.9952, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.2519319938176197, |
| "grad_norm": 1.1900057306211553, |
| "learning_rate": 7.461205181628143e-05, |
| "loss": 0.9903, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.258114374034003, |
| "grad_norm": 2.4753152288862763, |
| "learning_rate": 7.452472458530522e-05, |
| "loss": 1.0031, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.2642967542503865, |
| "grad_norm": 1.156802595501098, |
| "learning_rate": 7.443674729625869e-05, |
| "loss": 1.0059, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.2704791344667696, |
| "grad_norm": 1.41497847335521, |
| "learning_rate": 7.434812160564592e-05, |
| "loss": 1.0144, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.276661514683153, |
| "grad_norm": 2.3844147534681155, |
| "learning_rate": 7.425884918217958e-05, |
| "loss": 1.0058, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.2828438948995364, |
| "grad_norm": 1.3271435145317263, |
| "learning_rate": 7.416893170674954e-05, |
| "loss": 1.0165, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.2890262751159196, |
| "grad_norm": 1.6975589849591926, |
| "learning_rate": 7.407837087239117e-05, |
| "loss": 0.99, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.295208655332303, |
| "grad_norm": 1.8519388740645097, |
| "learning_rate": 7.398716838425352e-05, |
| "loss": 1.0089, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.3013910355486862, |
| "grad_norm": 1.6947179785568054, |
| "learning_rate": 7.38953259595672e-05, |
| "loss": 0.9879, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.3075734157650696, |
| "grad_norm": 1.3528548666192206, |
| "learning_rate": 7.380284532761202e-05, |
| "loss": 1.0024, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.3137557959814528, |
| "grad_norm": 1.6098695630590318, |
| "learning_rate": 7.370972822968446e-05, |
| "loss": 0.9804, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.3199381761978362, |
| "grad_norm": 1.5753154362328263, |
| "learning_rate": 7.361597641906486e-05, |
| "loss": 0.9905, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.3261205564142196, |
| "grad_norm": 1.4448197507546439, |
| "learning_rate": 7.352159166098441e-05, |
| "loss": 0.9847, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.3323029366306027, |
| "grad_norm": 2.188314280055232, |
| "learning_rate": 7.342657573259194e-05, |
| "loss": 0.9992, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.3384853168469861, |
| "grad_norm": 1.2657997745237497, |
| "learning_rate": 7.333093042292044e-05, |
| "loss": 0.9904, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.3446676970633695, |
| "grad_norm": 1.2714200260276027, |
| "learning_rate": 7.323465753285337e-05, |
| "loss": 1.0057, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.3508500772797527, |
| "grad_norm": 1.3287688093801202, |
| "learning_rate": 7.313775887509075e-05, |
| "loss": 1.0102, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.3570324574961359, |
| "grad_norm": 3.081273929769149, |
| "learning_rate": 7.304023627411505e-05, |
| "loss": 1.0116, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.3632148377125193, |
| "grad_norm": 1.767890752318478, |
| "learning_rate": 7.29420915661568e-05, |
| "loss": 0.9999, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.3693972179289027, |
| "grad_norm": 3.3832241258249143, |
| "learning_rate": 7.284332659916006e-05, |
| "loss": 1.0146, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.3755795981452859, |
| "grad_norm": 2.792347707620742, |
| "learning_rate": 7.274394323274761e-05, |
| "loss": 1.038, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.3817619783616693, |
| "grad_norm": 2.717608779519406, |
| "learning_rate": 7.26439433381859e-05, |
| "loss": 0.9968, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.3879443585780527, |
| "grad_norm": 2.6449000513172023, |
| "learning_rate": 7.254332879834986e-05, |
| "loss": 1.0202, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.3941267387944358, |
| "grad_norm": 2.5816179353247386, |
| "learning_rate": 7.244210150768745e-05, |
| "loss": 1.0126, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.4003091190108192, |
| "grad_norm": 1.8819265089556003, |
| "learning_rate": 7.234026337218395e-05, |
| "loss": 1.0044, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.4064914992272024, |
| "grad_norm": 2.778434728345896, |
| "learning_rate": 7.223781630932612e-05, |
| "loss": 1.0112, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.4126738794435858, |
| "grad_norm": 2.3953953560776164, |
| "learning_rate": 7.213476224806604e-05, |
| "loss": 1.007, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.418856259659969, |
| "grad_norm": 2.1884861738889554, |
| "learning_rate": 7.203110312878487e-05, |
| "loss": 0.9997, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.4250386398763524, |
| "grad_norm": 1.9108092994214398, |
| "learning_rate": 7.192684090325621e-05, |
| "loss": 0.9961, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.4312210200927358, |
| "grad_norm": 2.070150415301729, |
| "learning_rate": 7.182197753460947e-05, |
| "loss": 0.9928, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.437403400309119, |
| "grad_norm": 1.2896742775847363, |
| "learning_rate": 7.171651499729278e-05, |
| "loss": 0.9843, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.4435857805255023, |
| "grad_norm": 2.4892589479787914, |
| "learning_rate": 7.161045527703593e-05, |
| "loss": 0.997, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.4497681607418857, |
| "grad_norm": 1.9172236222391295, |
| "learning_rate": 7.150380037081293e-05, |
| "loss": 0.9986, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.455950540958269, |
| "grad_norm": 2.2862715410270282, |
| "learning_rate": 7.139655228680438e-05, |
| "loss": 1.0075, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.4621329211746523, |
| "grad_norm": 1.8213511562680735, |
| "learning_rate": 7.128871304435968e-05, |
| "loss": 1.0109, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.4683153013910355, |
| "grad_norm": 2.1357271598290186, |
| "learning_rate": 7.118028467395904e-05, |
| "loss": 0.9899, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.474497681607419, |
| "grad_norm": 1.51723941617764, |
| "learning_rate": 7.107126921717526e-05, |
| "loss": 0.999, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.480680061823802, |
| "grad_norm": 2.216906523864941, |
| "learning_rate": 7.096166872663516e-05, |
| "loss": 0.9908, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.4868624420401855, |
| "grad_norm": 1.6475567811924245, |
| "learning_rate": 7.085148526598109e-05, |
| "loss": 0.9846, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.4930448222565689, |
| "grad_norm": 1.8561898420792513, |
| "learning_rate": 7.074072090983201e-05, |
| "loss": 0.9885, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.499227202472952, |
| "grad_norm": 1.4076817648604656, |
| "learning_rate": 7.062937774374444e-05, |
| "loss": 0.9827, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.5054095826893354, |
| "grad_norm": 2.043899369455665, |
| "learning_rate": 7.051745786417309e-05, |
| "loss": 1.0022, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.5115919629057188, |
| "grad_norm": 1.7025584839030323, |
| "learning_rate": 7.04049633784316e-05, |
| "loss": 0.9935, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.517774343122102, |
| "grad_norm": 1.8180249630171987, |
| "learning_rate": 7.029189640465266e-05, |
| "loss": 0.9948, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.5239567233384852, |
| "grad_norm": 1.617153539919805, |
| "learning_rate": 7.017825907174819e-05, |
| "loss": 0.9946, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.5301391035548686, |
| "grad_norm": 1.9584695599096857, |
| "learning_rate": 7.006405351936935e-05, |
| "loss": 0.981, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.536321483771252, |
| "grad_norm": 1.5120141733299006, |
| "learning_rate": 6.99492818978661e-05, |
| "loss": 0.9957, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.5425038639876352, |
| "grad_norm": 1.7224427233243673, |
| "learning_rate": 6.983394636824681e-05, |
| "loss": 0.985, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.5486862442040186, |
| "grad_norm": 1.352522335955625, |
| "learning_rate": 6.971804910213753e-05, |
| "loss": 0.9947, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.554868624420402, |
| "grad_norm": 1.9649504513247107, |
| "learning_rate": 6.960159228174118e-05, |
| "loss": 0.9802, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.5610510046367851, |
| "grad_norm": 1.400091397431256, |
| "learning_rate": 6.94845780997963e-05, |
| "loss": 0.9869, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.5672333848531683, |
| "grad_norm": 1.809258707764459, |
| "learning_rate": 6.936700875953593e-05, |
| "loss": 0.9951, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.573415765069552, |
| "grad_norm": 1.544914399926865, |
| "learning_rate": 6.924888647464606e-05, |
| "loss": 0.9866, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.5795981452859351, |
| "grad_norm": 1.6678917783004827, |
| "learning_rate": 6.91302134692239e-05, |
| "loss": 0.9839, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.5857805255023183, |
| "grad_norm": 1.4327533793298342, |
| "learning_rate": 6.90109919777361e-05, |
| "loss": 0.9842, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.5919629057187017, |
| "grad_norm": 1.4415951253324433, |
| "learning_rate": 6.889122424497659e-05, |
| "loss": 0.9888, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.598145285935085, |
| "grad_norm": 1.305272535568148, |
| "learning_rate": 6.877091252602437e-05, |
| "loss": 0.9925, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.6043276661514683, |
| "grad_norm": 2.4748168628602016, |
| "learning_rate": 6.865005908620102e-05, |
| "loss": 0.9862, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.6105100463678517, |
| "grad_norm": 1.5958180723817172, |
| "learning_rate": 6.852866620102808e-05, |
| "loss": 1.0002, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.616692426584235, |
| "grad_norm": 1.6536900940347805, |
| "learning_rate": 6.840673615618416e-05, |
| "loss": 0.9708, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.6228748068006182, |
| "grad_norm": 2.6056238946624335, |
| "learning_rate": 6.828427124746191e-05, |
| "loss": 0.9896, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.6290571870170014, |
| "grad_norm": 1.090301990739562, |
| "learning_rate": 6.816127378072484e-05, |
| "loss": 0.9909, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.6352395672333848, |
| "grad_norm": 2.9921825238289768, |
| "learning_rate": 6.803774607186389e-05, |
| "loss": 1.014, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.6414219474497682, |
| "grad_norm": 2.2996006999305822, |
| "learning_rate": 6.791369044675372e-05, |
| "loss": 1.0089, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.6476043276661514, |
| "grad_norm": 2.472497102419325, |
| "learning_rate": 6.778910924120913e-05, |
| "loss": 0.9898, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.6537867078825348, |
| "grad_norm": 1.4712820809315457, |
| "learning_rate": 6.766400480094084e-05, |
| "loss": 0.9982, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.6599690880989182, |
| "grad_norm": 1.7900944541125188, |
| "learning_rate": 6.753837948151148e-05, |
| "loss": 1.0049, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.6661514683153014, |
| "grad_norm": 1.5422419041247095, |
| "learning_rate": 6.74122356482912e-05, |
| "loss": 0.984, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.6723338485316845, |
| "grad_norm": 1.7666863054997888, |
| "learning_rate": 6.728557567641313e-05, |
| "loss": 0.9982, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.6785162287480682, |
| "grad_norm": 1.0188448221447397, |
| "learning_rate": 6.715840195072862e-05, |
| "loss": 0.9927, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.6846986089644513, |
| "grad_norm": 1.4323614508510398, |
| "learning_rate": 6.703071686576243e-05, |
| "loss": 0.9933, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.6908809891808345, |
| "grad_norm": 1.3733086668334178, |
| "learning_rate": 6.690252282566753e-05, |
| "loss": 0.98, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.697063369397218, |
| "grad_norm": 1.7979930738646992, |
| "learning_rate": 6.677382224417991e-05, |
| "loss": 0.9915, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.7032457496136013, |
| "grad_norm": 1.5018213587724738, |
| "learning_rate": 6.664461754457313e-05, |
| "loss": 0.9789, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.7094281298299845, |
| "grad_norm": 0.8313817964266842, |
| "learning_rate": 6.651491115961264e-05, |
| "loss": 0.9898, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.7156105100463679, |
| "grad_norm": 1.563766264104193, |
| "learning_rate": 6.638470553151003e-05, |
| "loss": 0.9839, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.7217928902627513, |
| "grad_norm": 1.3394469302914662, |
| "learning_rate": 6.625400311187701e-05, |
| "loss": 0.9862, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.7279752704791345, |
| "grad_norm": 0.9069310750596258, |
| "learning_rate": 6.61228063616793e-05, |
| "loss": 0.9772, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.7341576506955176, |
| "grad_norm": 1.1937761300056076, |
| "learning_rate": 6.599111775119019e-05, |
| "loss": 0.9834, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.740340030911901, |
| "grad_norm": 2.0959839523048722, |
| "learning_rate": 6.585893975994416e-05, |
| "loss": 0.9908, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.7465224111282844, |
| "grad_norm": 1.4068229617185941, |
| "learning_rate": 6.572627487669005e-05, |
| "loss": 0.9857, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.7527047913446676, |
| "grad_norm": 1.497838436021524, |
| "learning_rate": 6.55931255993444e-05, |
| "loss": 0.9982, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.758887171561051, |
| "grad_norm": 1.2049062127509398, |
| "learning_rate": 6.545949443494416e-05, |
| "loss": 0.9816, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.7650695517774344, |
| "grad_norm": 2.293459748858112, |
| "learning_rate": 6.53253838995997e-05, |
| "loss": 0.9939, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.7712519319938176, |
| "grad_norm": 1.196771206420521, |
| "learning_rate": 6.519079651844733e-05, |
| "loss": 0.9917, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.7774343122102008, |
| "grad_norm": 2.6719101899588464, |
| "learning_rate": 6.505573482560181e-05, |
| "loss": 1.0024, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.7836166924265844, |
| "grad_norm": 2.151184032156713, |
| "learning_rate": 6.492020136410854e-05, |
| "loss": 1.0023, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.7897990726429676, |
| "grad_norm": 2.049945153207459, |
| "learning_rate": 6.478419868589582e-05, |
| "loss": 0.9942, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.7959814528593507, |
| "grad_norm": 1.7030760467943136, |
| "learning_rate": 6.464772935172666e-05, |
| "loss": 0.9972, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.8021638330757341, |
| "grad_norm": 1.6124042099877869, |
| "learning_rate": 6.451079593115065e-05, |
| "loss": 0.9859, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.8083462132921175, |
| "grad_norm": 1.7086055086974385, |
| "learning_rate": 6.437340100245558e-05, |
| "loss": 0.9875, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.8145285935085007, |
| "grad_norm": 1.1804574569777302, |
| "learning_rate": 6.423554715261885e-05, |
| "loss": 0.9863, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.820710973724884, |
| "grad_norm": 2.072000983521134, |
| "learning_rate": 6.409723697725876e-05, |
| "loss": 0.984, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.8268933539412675, |
| "grad_norm": 1.5605771368628958, |
| "learning_rate": 6.395847308058569e-05, |
| "loss": 1.0016, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.8330757341576507, |
| "grad_norm": 2.0470560137227944, |
| "learning_rate": 6.381925807535302e-05, |
| "loss": 0.9797, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.8392581143740339, |
| "grad_norm": 1.5269600972745945, |
| "learning_rate": 6.367959458280795e-05, |
| "loss": 0.9952, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.8454404945904173, |
| "grad_norm": 1.6911790008170018, |
| "learning_rate": 6.353948523264216e-05, |
| "loss": 0.9948, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.8516228748068007, |
| "grad_norm": 1.5053272401335374, |
| "learning_rate": 6.339893266294223e-05, |
| "loss": 0.9851, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.8578052550231838, |
| "grad_norm": 1.3443219469962477, |
| "learning_rate": 6.325793952014007e-05, |
| "loss": 0.9819, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.8639876352395672, |
| "grad_norm": 1.3516905581796814, |
| "learning_rate": 6.311650845896303e-05, |
| "loss": 0.9957, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.8701700154559506, |
| "grad_norm": 1.2720069275023291, |
| "learning_rate": 6.297464214238391e-05, |
| "loss": 0.9773, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.8763523956723338, |
| "grad_norm": 1.281381485288164, |
| "learning_rate": 6.283234324157084e-05, |
| "loss": 0.9751, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.8825347758887172, |
| "grad_norm": 1.109592460056133, |
| "learning_rate": 6.268961443583696e-05, |
| "loss": 0.9908, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.8887171561051006, |
| "grad_norm": 2.4545190255394225, |
| "learning_rate": 6.254645841259005e-05, |
| "loss": 0.9889, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.8948995363214838, |
| "grad_norm": 1.4595972385019333, |
| "learning_rate": 6.24028778672818e-05, |
| "loss": 0.987, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.901081916537867, |
| "grad_norm": 3.0482536802039535, |
| "learning_rate": 6.225887550335715e-05, |
| "loss": 0.9898, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.9072642967542504, |
| "grad_norm": 2.1361756083703503, |
| "learning_rate": 6.21144540322034e-05, |
| "loss": 0.9899, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.9134466769706338, |
| "grad_norm": 2.820913172877751, |
| "learning_rate": 6.196961617309908e-05, |
| "loss": 0.9906, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.919629057187017, |
| "grad_norm": 2.1277865542014536, |
| "learning_rate": 6.182436465316279e-05, |
| "loss": 0.9755, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.9258114374034003, |
| "grad_norm": 2.3636934602350372, |
| "learning_rate": 6.16787022073019e-05, |
| "loss": 0.981, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.9319938176197837, |
| "grad_norm": 1.5489010186147583, |
| "learning_rate": 6.153263157816098e-05, |
| "loss": 0.9714, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.938176197836167, |
| "grad_norm": 2.4665556380164366, |
| "learning_rate": 6.138615551607017e-05, |
| "loss": 0.9852, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.94435857805255, |
| "grad_norm": 1.769365451752886, |
| "learning_rate": 6.123927677899344e-05, |
| "loss": 0.9817, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.9505409582689337, |
| "grad_norm": 2.276315268584059, |
| "learning_rate": 6.109199813247666e-05, |
| "loss": 0.98, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.9567233384853169, |
| "grad_norm": 1.7457681230603967, |
| "learning_rate": 6.0944322349595426e-05, |
| "loss": 0.9943, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.9629057187017, |
| "grad_norm": 1.897451826245397, |
| "learning_rate": 6.0796252210902986e-05, |
| "loss": 0.9707, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.9690880989180835, |
| "grad_norm": 1.7758674602809486, |
| "learning_rate": 6.06477905043778e-05, |
| "loss": 0.986, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.9752704791344669, |
| "grad_norm": 1.4801297268108475, |
| "learning_rate": 6.049894002537109e-05, |
| "loss": 0.9624, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.98145285935085, |
| "grad_norm": 1.1171987537741563, |
| "learning_rate": 6.034970357655415e-05, |
| "loss": 0.9712, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.9876352395672334, |
| "grad_norm": 1.5190947129294494, |
| "learning_rate": 6.020008396786562e-05, |
| "loss": 0.9823, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.9938176197836168, |
| "grad_norm": 1.006947536865789, |
| "learning_rate": 6.00500840164586e-05, |
| "loss": 0.9771, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.003091190108192, |
| "grad_norm": 4.006886737490512, |
| "learning_rate": 5.989970654664756e-05, |
| "loss": 1.6791, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.009273570324575, |
| "grad_norm": 1.8558698221909793, |
| "learning_rate": 5.974895438985514e-05, |
| "loss": 0.952, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.015455950540958, |
| "grad_norm": 1.455258316310302, |
| "learning_rate": 5.959783038455891e-05, |
| "loss": 0.9402, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.021638330757342, |
| "grad_norm": 1.7859640511983568, |
| "learning_rate": 5.944633737623791e-05, |
| "loss": 0.9346, |
| "step": 326 |
| }, |
| { |
| "epoch": 2.027820710973725, |
| "grad_norm": 0.9891580259760341, |
| "learning_rate": 5.929447821731899e-05, |
| "loss": 0.9467, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.034003091190108, |
| "grad_norm": 1.2780787887464713, |
| "learning_rate": 5.914225576712323e-05, |
| "loss": 0.9364, |
| "step": 328 |
| }, |
| { |
| "epoch": 2.0401854714064913, |
| "grad_norm": 1.747509422985778, |
| "learning_rate": 5.8989672891811987e-05, |
| "loss": 0.9337, |
| "step": 329 |
| }, |
| { |
| "epoch": 2.046367851622875, |
| "grad_norm": 1.2249047782739366, |
| "learning_rate": 5.883673246433302e-05, |
| "loss": 0.9498, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.052550231839258, |
| "grad_norm": 2.003614583053326, |
| "learning_rate": 5.8683437364366316e-05, |
| "loss": 0.9535, |
| "step": 331 |
| }, |
| { |
| "epoch": 2.0587326120556413, |
| "grad_norm": 1.226972934974554, |
| "learning_rate": 5.852979047826996e-05, |
| "loss": 0.9329, |
| "step": 332 |
| }, |
| { |
| "epoch": 2.064914992272025, |
| "grad_norm": 2.172547322337205, |
| "learning_rate": 5.837579469902567e-05, |
| "loss": 0.9479, |
| "step": 333 |
| }, |
| { |
| "epoch": 2.071097372488408, |
| "grad_norm": 1.6402327078514485, |
| "learning_rate": 5.822145292618444e-05, |
| "loss": 0.9462, |
| "step": 334 |
| }, |
| { |
| "epoch": 2.0772797527047913, |
| "grad_norm": 1.6699665609409802, |
| "learning_rate": 5.806676806581192e-05, |
| "loss": 0.9431, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.0834621329211744, |
| "grad_norm": 1.3197835509782132, |
| "learning_rate": 5.7911743030433596e-05, |
| "loss": 0.9557, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.089644513137558, |
| "grad_norm": 1.1660288371337755, |
| "learning_rate": 5.77563807389801e-05, |
| "loss": 0.9356, |
| "step": 337 |
| }, |
| { |
| "epoch": 2.0958268933539412, |
| "grad_norm": 1.1228481962160324, |
| "learning_rate": 5.760068411673215e-05, |
| "loss": 0.9417, |
| "step": 338 |
| }, |
| { |
| "epoch": 2.1020092735703244, |
| "grad_norm": 1.372197582933058, |
| "learning_rate": 5.7444656095265506e-05, |
| "loss": 0.9269, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.108191653786708, |
| "grad_norm": 1.0621673720020575, |
| "learning_rate": 5.7288299612395784e-05, |
| "loss": 0.9376, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.114374034003091, |
| "grad_norm": 1.290738782393939, |
| "learning_rate": 5.713161761212309e-05, |
| "loss": 0.9484, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.1205564142194744, |
| "grad_norm": 1.5545485725757495, |
| "learning_rate": 5.697461304457667e-05, |
| "loss": 0.9451, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.126738794435858, |
| "grad_norm": 0.9984947527180189, |
| "learning_rate": 5.681728886595926e-05, |
| "loss": 0.9264, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.132921174652241, |
| "grad_norm": 1.435440933520763, |
| "learning_rate": 5.665964803849152e-05, |
| "loss": 0.9576, |
| "step": 344 |
| }, |
| { |
| "epoch": 2.1391035548686244, |
| "grad_norm": 1.0914760893879398, |
| "learning_rate": 5.65016935303562e-05, |
| "loss": 0.9341, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.1452859350850075, |
| "grad_norm": 0.9611538837750422, |
| "learning_rate": 5.634342831564228e-05, |
| "loss": 0.9408, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.151468315301391, |
| "grad_norm": 1.1965180402340152, |
| "learning_rate": 5.6184855374288946e-05, |
| "loss": 0.9415, |
| "step": 347 |
| }, |
| { |
| "epoch": 2.1576506955177743, |
| "grad_norm": 1.0359352454914963, |
| "learning_rate": 5.602597769202953e-05, |
| "loss": 0.9506, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.1638330757341575, |
| "grad_norm": 1.8769371801728494, |
| "learning_rate": 5.586679826033521e-05, |
| "loss": 0.9369, |
| "step": 349 |
| }, |
| { |
| "epoch": 2.170015455950541, |
| "grad_norm": 0.9836827672447646, |
| "learning_rate": 5.5707320076358776e-05, |
| "loss": 0.9452, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.1761978361669243, |
| "grad_norm": 1.32097187927076, |
| "learning_rate": 5.5547546142878164e-05, |
| "loss": 0.9464, |
| "step": 351 |
| }, |
| { |
| "epoch": 2.1823802163833075, |
| "grad_norm": 1.3411663056222534, |
| "learning_rate": 5.538747946823987e-05, |
| "loss": 0.9321, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.1885625965996907, |
| "grad_norm": 1.3570047609173206, |
| "learning_rate": 5.522712306630236e-05, |
| "loss": 0.9373, |
| "step": 353 |
| }, |
| { |
| "epoch": 2.1947449768160743, |
| "grad_norm": 0.9761232794419572, |
| "learning_rate": 5.506647995637932e-05, |
| "loss": 0.9462, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.2009273570324575, |
| "grad_norm": 1.430074471656494, |
| "learning_rate": 5.490555316318279e-05, |
| "loss": 0.9485, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.2071097372488406, |
| "grad_norm": 1.059127482296428, |
| "learning_rate": 5.474434571676622e-05, |
| "loss": 0.9422, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.2132921174652243, |
| "grad_norm": 1.8097837664747738, |
| "learning_rate": 5.4582860652467385e-05, |
| "loss": 0.938, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.2194744976816074, |
| "grad_norm": 1.125902108018908, |
| "learning_rate": 5.4421101010851315e-05, |
| "loss": 0.9334, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.2256568778979906, |
| "grad_norm": 1.8628860336453976, |
| "learning_rate": 5.425906983765295e-05, |
| "loss": 0.9462, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.2318392581143742, |
| "grad_norm": 1.5709465886819862, |
| "learning_rate": 5.409677018371986e-05, |
| "loss": 0.9563, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.2380216383307574, |
| "grad_norm": 1.5452591189158589, |
| "learning_rate": 5.393420510495476e-05, |
| "loss": 0.9542, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.2442040185471406, |
| "grad_norm": 1.215273856313518, |
| "learning_rate": 5.377137766225799e-05, |
| "loss": 0.9401, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.250386398763524, |
| "grad_norm": 1.2233276485983613, |
| "learning_rate": 5.36082909214699e-05, |
| "loss": 0.9448, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.2565687789799074, |
| "grad_norm": 1.3147561966681955, |
| "learning_rate": 5.344494795331308e-05, |
| "loss": 0.9465, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.2627511591962906, |
| "grad_norm": 1.4155564384468768, |
| "learning_rate": 5.328135183333456e-05, |
| "loss": 0.951, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.2689335394126737, |
| "grad_norm": 0.6114114300419576, |
| "learning_rate": 5.311750564184795e-05, |
| "loss": 0.9396, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.2751159196290573, |
| "grad_norm": 1.5798139980221138, |
| "learning_rate": 5.2953412463875364e-05, |
| "loss": 0.9506, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.2812982998454405, |
| "grad_norm": 0.9988799351566259, |
| "learning_rate": 5.278907538908936e-05, |
| "loss": 0.9526, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.2874806800618237, |
| "grad_norm": 1.05377706000738, |
| "learning_rate": 5.26244975117548e-05, |
| "loss": 0.942, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.293663060278207, |
| "grad_norm": 1.5699703997946644, |
| "learning_rate": 5.2459681930670536e-05, |
| "loss": 0.9511, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.2998454404945905, |
| "grad_norm": 0.9947848025404337, |
| "learning_rate": 5.2294631749111125e-05, |
| "loss": 0.9151, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.3060278207109737, |
| "grad_norm": 1.1765237704089029, |
| "learning_rate": 5.2129350074768316e-05, |
| "loss": 0.9467, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.312210200927357, |
| "grad_norm": 1.5204036418963962, |
| "learning_rate": 5.1963840019692616e-05, |
| "loss": 0.953, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.3183925811437405, |
| "grad_norm": 0.9961157368195384, |
| "learning_rate": 5.1798104700234655e-05, |
| "loss": 0.9353, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.3245749613601236, |
| "grad_norm": 1.2662517756577527, |
| "learning_rate": 5.16321472369865e-05, |
| "loss": 0.9313, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.330757341576507, |
| "grad_norm": 1.0077333440409275, |
| "learning_rate": 5.146597075472293e-05, |
| "loss": 0.949, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.3369397217928904, |
| "grad_norm": 1.1569892416058336, |
| "learning_rate": 5.129957838234255e-05, |
| "loss": 0.9421, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.3431221020092736, |
| "grad_norm": 1.0327382337486777, |
| "learning_rate": 5.113297325280896e-05, |
| "loss": 0.9359, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.349304482225657, |
| "grad_norm": 1.5413059311272268, |
| "learning_rate": 5.0966158503091673e-05, |
| "loss": 0.9308, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.3554868624420404, |
| "grad_norm": 1.0041025469926124, |
| "learning_rate": 5.079913727410712e-05, |
| "loss": 0.9472, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.3616692426584236, |
| "grad_norm": 1.0449355058196719, |
| "learning_rate": 5.063191271065945e-05, |
| "loss": 0.9507, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.3678516228748068, |
| "grad_norm": 1.4231290368942768, |
| "learning_rate": 5.046448796138138e-05, |
| "loss": 0.9617, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.37403400309119, |
| "grad_norm": 1.4686945371549769, |
| "learning_rate": 5.029686617867488e-05, |
| "loss": 0.9487, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.3802163833075736, |
| "grad_norm": 0.7321343928729324, |
| "learning_rate": 5.012905051865179e-05, |
| "loss": 0.9134, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.3863987635239567, |
| "grad_norm": 1.7871707923975175, |
| "learning_rate": 4.9961044141074454e-05, |
| "loss": 0.9454, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.39258114374034, |
| "grad_norm": 0.8554156480675892, |
| "learning_rate": 4.979285020929617e-05, |
| "loss": 0.958, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.398763523956723, |
| "grad_norm": 2.0222183736349177, |
| "learning_rate": 4.9624471890201656e-05, |
| "loss": 0.9398, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.4049459041731067, |
| "grad_norm": 1.1083392384698756, |
| "learning_rate": 4.945591235414741e-05, |
| "loss": 0.9353, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.41112828438949, |
| "grad_norm": 2.249237349775883, |
| "learning_rate": 4.9287174774902034e-05, |
| "loss": 0.9299, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.417310664605873, |
| "grad_norm": 1.7547197652519158, |
| "learning_rate": 4.911826232958647e-05, |
| "loss": 0.9465, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.4234930448222567, |
| "grad_norm": 1.816970919376788, |
| "learning_rate": 4.894917819861416e-05, |
| "loss": 0.9425, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.42967542503864, |
| "grad_norm": 1.5766998688721199, |
| "learning_rate": 4.8779925565631166e-05, |
| "loss": 0.9572, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.435857805255023, |
| "grad_norm": 1.6988141319269177, |
| "learning_rate": 4.8610507617456265e-05, |
| "loss": 0.9401, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.4420401854714067, |
| "grad_norm": 1.4658219077550854, |
| "learning_rate": 4.8440927544020896e-05, |
| "loss": 0.9429, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.44822256568779, |
| "grad_norm": 1.4942830281619395, |
| "learning_rate": 4.82711885383091e-05, |
| "loss": 0.9305, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.454404945904173, |
| "grad_norm": 1.4911047290042807, |
| "learning_rate": 4.810129379629745e-05, |
| "loss": 0.9509, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.4605873261205566, |
| "grad_norm": 1.0945238549084377, |
| "learning_rate": 4.793124651689484e-05, |
| "loss": 0.9319, |
| "step": 397 |
| }, |
| { |
| "epoch": 2.46676970633694, |
| "grad_norm": 1.6671980576283978, |
| "learning_rate": 4.7761049901882227e-05, |
| "loss": 0.939, |
| "step": 398 |
| }, |
| { |
| "epoch": 2.472952086553323, |
| "grad_norm": 0.92852786359375, |
| "learning_rate": 4.7590707155852406e-05, |
| "loss": 0.954, |
| "step": 399 |
| }, |
| { |
| "epoch": 2.479134466769706, |
| "grad_norm": 1.7875978184704922, |
| "learning_rate": 4.74202214861496e-05, |
| "loss": 0.9381, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.48531684698609, |
| "grad_norm": 1.212284622798296, |
| "learning_rate": 4.7249596102809164e-05, |
| "loss": 0.9467, |
| "step": 401 |
| }, |
| { |
| "epoch": 2.491499227202473, |
| "grad_norm": 1.7168328470834615, |
| "learning_rate": 4.707883421849703e-05, |
| "loss": 0.9496, |
| "step": 402 |
| }, |
| { |
| "epoch": 2.497681607418856, |
| "grad_norm": 1.3407496449046934, |
| "learning_rate": 4.6907939048449314e-05, |
| "loss": 0.9486, |
| "step": 403 |
| }, |
| { |
| "epoch": 2.5038639876352393, |
| "grad_norm": 1.4996984373050763, |
| "learning_rate": 4.673691381041173e-05, |
| "loss": 0.9298, |
| "step": 404 |
| }, |
| { |
| "epoch": 2.510046367851623, |
| "grad_norm": 1.4309169783530684, |
| "learning_rate": 4.6565761724579e-05, |
| "loss": 0.9452, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.516228748068006, |
| "grad_norm": 1.4369198232445761, |
| "learning_rate": 4.6394486013534265e-05, |
| "loss": 0.9274, |
| "step": 406 |
| }, |
| { |
| "epoch": 2.5224111282843893, |
| "grad_norm": 1.3376962293350674, |
| "learning_rate": 4.6223089902188336e-05, |
| "loss": 0.9321, |
| "step": 407 |
| }, |
| { |
| "epoch": 2.528593508500773, |
| "grad_norm": 1.3175000668496903, |
| "learning_rate": 4.605157661771904e-05, |
| "loss": 0.9385, |
| "step": 408 |
| }, |
| { |
| "epoch": 2.534775888717156, |
| "grad_norm": 1.6617422309422225, |
| "learning_rate": 4.5879949389510406e-05, |
| "loss": 0.9301, |
| "step": 409 |
| }, |
| { |
| "epoch": 2.5409582689335393, |
| "grad_norm": 1.3172794288702236, |
| "learning_rate": 4.570821144909193e-05, |
| "loss": 0.9423, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.547140649149923, |
| "grad_norm": 1.6218116986054838, |
| "learning_rate": 4.553636603007761e-05, |
| "loss": 0.9309, |
| "step": 411 |
| }, |
| { |
| "epoch": 2.553323029366306, |
| "grad_norm": 1.282648230450059, |
| "learning_rate": 4.536441636810521e-05, |
| "loss": 0.9393, |
| "step": 412 |
| }, |
| { |
| "epoch": 2.5595054095826892, |
| "grad_norm": 1.7085653102331866, |
| "learning_rate": 4.519236570077523e-05, |
| "loss": 0.9387, |
| "step": 413 |
| }, |
| { |
| "epoch": 2.565687789799073, |
| "grad_norm": 1.181853985248212, |
| "learning_rate": 4.502021726758994e-05, |
| "loss": 0.9329, |
| "step": 414 |
| }, |
| { |
| "epoch": 2.571870170015456, |
| "grad_norm": 1.7534552716011547, |
| "learning_rate": 4.484797430989248e-05, |
| "loss": 0.9412, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.578052550231839, |
| "grad_norm": 1.3292585743678849, |
| "learning_rate": 4.467564007080574e-05, |
| "loss": 0.9399, |
| "step": 416 |
| }, |
| { |
| "epoch": 2.584234930448223, |
| "grad_norm": 1.7630942395608926, |
| "learning_rate": 4.4503217795171326e-05, |
| "loss": 0.9303, |
| "step": 417 |
| }, |
| { |
| "epoch": 2.590417310664606, |
| "grad_norm": 1.5547379842247044, |
| "learning_rate": 4.433071072948846e-05, |
| "loss": 0.9404, |
| "step": 418 |
| }, |
| { |
| "epoch": 2.596599690880989, |
| "grad_norm": 1.55358461949987, |
| "learning_rate": 4.415812212185287e-05, |
| "loss": 0.9479, |
| "step": 419 |
| }, |
| { |
| "epoch": 2.6027820710973724, |
| "grad_norm": 1.3519799459619244, |
| "learning_rate": 4.398545522189562e-05, |
| "loss": 0.9381, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.6089644513137555, |
| "grad_norm": 1.4063429925697228, |
| "learning_rate": 4.38127132807219e-05, |
| "loss": 0.9308, |
| "step": 421 |
| }, |
| { |
| "epoch": 2.615146831530139, |
| "grad_norm": 1.1100048648257177, |
| "learning_rate": 4.363989955084988e-05, |
| "loss": 0.9316, |
| "step": 422 |
| }, |
| { |
| "epoch": 2.6213292117465223, |
| "grad_norm": 1.5473918787261554, |
| "learning_rate": 4.3467017286149355e-05, |
| "loss": 0.9288, |
| "step": 423 |
| }, |
| { |
| "epoch": 2.6275115919629055, |
| "grad_norm": 1.1413947086858753, |
| "learning_rate": 4.32940697417806e-05, |
| "loss": 0.9281, |
| "step": 424 |
| }, |
| { |
| "epoch": 2.633693972179289, |
| "grad_norm": 1.4822907840670647, |
| "learning_rate": 4.3121060174133006e-05, |
| "loss": 0.9391, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.6398763523956723, |
| "grad_norm": 1.1893158134844697, |
| "learning_rate": 4.294799184076379e-05, |
| "loss": 0.9358, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.6460587326120555, |
| "grad_norm": 1.395540540534361, |
| "learning_rate": 4.277486800033664e-05, |
| "loss": 0.9344, |
| "step": 427 |
| }, |
| { |
| "epoch": 2.652241112828439, |
| "grad_norm": 1.1389047154561869, |
| "learning_rate": 4.260169191256041e-05, |
| "loss": 0.9121, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.6584234930448223, |
| "grad_norm": 1.349515321074191, |
| "learning_rate": 4.242846683812768e-05, |
| "loss": 0.9424, |
| "step": 429 |
| }, |
| { |
| "epoch": 2.6646058732612055, |
| "grad_norm": 1.2170924594039827, |
| "learning_rate": 4.225519603865338e-05, |
| "loss": 0.9285, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.670788253477589, |
| "grad_norm": 1.2637766040441283, |
| "learning_rate": 4.208188277661341e-05, |
| "loss": 0.9489, |
| "step": 431 |
| }, |
| { |
| "epoch": 2.6769706336939723, |
| "grad_norm": 0.9722140941647698, |
| "learning_rate": 4.190853031528317e-05, |
| "loss": 0.9042, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.6831530139103554, |
| "grad_norm": 1.3212458332351424, |
| "learning_rate": 4.173514191867615e-05, |
| "loss": 0.9294, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.689335394126739, |
| "grad_norm": 1.0216097339005061, |
| "learning_rate": 4.156172085148244e-05, |
| "loss": 0.9365, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.6955177743431222, |
| "grad_norm": 1.4931636832422879, |
| "learning_rate": 4.138827037900728e-05, |
| "loss": 0.9453, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.7017001545595054, |
| "grad_norm": 1.5106660907983307, |
| "learning_rate": 4.1214793767109607e-05, |
| "loss": 0.9488, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.7078825347758886, |
| "grad_norm": 0.8429680654036087, |
| "learning_rate": 4.104129428214048e-05, |
| "loss": 0.9286, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.7140649149922718, |
| "grad_norm": 1.1888732405027307, |
| "learning_rate": 4.0867775190881673e-05, |
| "loss": 0.9389, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.7202472952086554, |
| "grad_norm": 0.8249720034944886, |
| "learning_rate": 4.0694239760484084e-05, |
| "loss": 0.9236, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.7264296754250386, |
| "grad_norm": 0.8112023719901648, |
| "learning_rate": 4.05206912584063e-05, |
| "loss": 0.93, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.7326120556414217, |
| "grad_norm": 0.8700099921141801, |
| "learning_rate": 4.0347132952352976e-05, |
| "loss": 0.9274, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.7387944358578054, |
| "grad_norm": 0.7451536808284058, |
| "learning_rate": 4.0173568110213444e-05, |
| "loss": 0.9444, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.7449768160741885, |
| "grad_norm": 0.7730600029414318, |
| "learning_rate": 4e-05, |
| "loss": 0.9264, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.7511591962905717, |
| "grad_norm": 0.64992428377988, |
| "learning_rate": 3.9826431889786576e-05, |
| "loss": 0.9228, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.7573415765069553, |
| "grad_norm": 0.6092471954250116, |
| "learning_rate": 3.965286704764702e-05, |
| "loss": 0.9386, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.7635239567233385, |
| "grad_norm": 0.5829783913867378, |
| "learning_rate": 3.947930874159371e-05, |
| "loss": 0.9466, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.7697063369397217, |
| "grad_norm": 0.5791113943292933, |
| "learning_rate": 3.930576023951593e-05, |
| "loss": 0.9315, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.7758887171561053, |
| "grad_norm": 0.496929341909277, |
| "learning_rate": 3.9132224809118347e-05, |
| "loss": 0.9433, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.7820710973724885, |
| "grad_norm": 0.5814134399140919, |
| "learning_rate": 3.8958705717859526e-05, |
| "loss": 0.94, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.7882534775888717, |
| "grad_norm": 0.49572568964279784, |
| "learning_rate": 3.878520623289041e-05, |
| "loss": 0.9288, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.7944358578052553, |
| "grad_norm": 0.5284855150274684, |
| "learning_rate": 3.8611729620992726e-05, |
| "loss": 0.9333, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.8006182380216385, |
| "grad_norm": 0.516369342375662, |
| "learning_rate": 3.8438279148517575e-05, |
| "loss": 0.9484, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.8068006182380216, |
| "grad_norm": 0.46332381283153445, |
| "learning_rate": 3.826485808132386e-05, |
| "loss": 0.9394, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.812982998454405, |
| "grad_norm": 0.5353899030659641, |
| "learning_rate": 3.8091469684716845e-05, |
| "loss": 0.93, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.819165378670788, |
| "grad_norm": 0.3932685579553986, |
| "learning_rate": 3.7918117223386615e-05, |
| "loss": 0.9368, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.8253477588871716, |
| "grad_norm": 0.43586697192948876, |
| "learning_rate": 3.774480396134663e-05, |
| "loss": 0.9397, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.8315301391035548, |
| "grad_norm": 0.40361521367835107, |
| "learning_rate": 3.7571533161872334e-05, |
| "loss": 0.9213, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.837712519319938, |
| "grad_norm": 0.4604217029229173, |
| "learning_rate": 3.7398308087439603e-05, |
| "loss": 0.9268, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.8438948995363216, |
| "grad_norm": 0.538514910957252, |
| "learning_rate": 3.722513199966336e-05, |
| "loss": 0.9455, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.8500772797527048, |
| "grad_norm": 0.6470702786089004, |
| "learning_rate": 3.705200815923622e-05, |
| "loss": 0.9123, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.856259659969088, |
| "grad_norm": 0.5008393569354235, |
| "learning_rate": 3.6878939825867014e-05, |
| "loss": 0.9382, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.8624420401854715, |
| "grad_norm": 0.41610970566343775, |
| "learning_rate": 3.670593025821942e-05, |
| "loss": 0.9387, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.8686244204018547, |
| "grad_norm": 0.3582328984320768, |
| "learning_rate": 3.653298271385065e-05, |
| "loss": 0.9365, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.874806800618238, |
| "grad_norm": 0.36268739794043653, |
| "learning_rate": 3.6360100449150135e-05, |
| "loss": 0.9398, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.8809891808346215, |
| "grad_norm": 0.31637193895013355, |
| "learning_rate": 3.618728671927811e-05, |
| "loss": 0.9468, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.8871715610510047, |
| "grad_norm": 0.37689814936512106, |
| "learning_rate": 3.6014544778104394e-05, |
| "loss": 0.9499, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.893353941267388, |
| "grad_norm": 0.4779224408619651, |
| "learning_rate": 3.584187787814714e-05, |
| "loss": 0.9396, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.8995363214837715, |
| "grad_norm": 0.509229566930791, |
| "learning_rate": 3.566928927051155e-05, |
| "loss": 0.9291, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.9057187017001547, |
| "grad_norm": 0.48474577075424746, |
| "learning_rate": 3.5496782204828694e-05, |
| "loss": 0.9203, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.911901081916538, |
| "grad_norm": 0.3971864637298685, |
| "learning_rate": 3.5324359929194274e-05, |
| "loss": 0.9394, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.918083462132921, |
| "grad_norm": 0.3053259719312419, |
| "learning_rate": 3.5152025690107524e-05, |
| "loss": 0.9268, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.9242658423493046, |
| "grad_norm": 0.29216270961639657, |
| "learning_rate": 3.497978273241007e-05, |
| "loss": 0.9148, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.930448222565688, |
| "grad_norm": 0.2654490613569246, |
| "learning_rate": 3.480763429922479e-05, |
| "loss": 0.9379, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.936630602782071, |
| "grad_norm": 0.38722618089775157, |
| "learning_rate": 3.463558363189479e-05, |
| "loss": 0.9277, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.942812982998454, |
| "grad_norm": 0.3877069119339608, |
| "learning_rate": 3.4463633969922395e-05, |
| "loss": 0.9165, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.948995363214838, |
| "grad_norm": 0.49582780013936295, |
| "learning_rate": 3.429178855090809e-05, |
| "loss": 0.9395, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.955177743431221, |
| "grad_norm": 0.48453229307728, |
| "learning_rate": 3.412005061048959e-05, |
| "loss": 0.9184, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.961360123647604, |
| "grad_norm": 0.372292891013131, |
| "learning_rate": 3.394842338228097e-05, |
| "loss": 0.9257, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.9675425038639878, |
| "grad_norm": 0.21481675893348937, |
| "learning_rate": 3.377691009781168e-05, |
| "loss": 0.9342, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.973724884080371, |
| "grad_norm": 0.2791079849765998, |
| "learning_rate": 3.360551398646575e-05, |
| "loss": 0.927, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.979907264296754, |
| "grad_norm": 0.31864441303786156, |
| "learning_rate": 3.3434238275421e-05, |
| "loss": 0.9364, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.9860896445131377, |
| "grad_norm": 0.3426078238120316, |
| "learning_rate": 3.326308618958828e-05, |
| "loss": 0.9237, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.992272024729521, |
| "grad_norm": 0.33872000941997643, |
| "learning_rate": 3.309206095155069e-05, |
| "loss": 0.9331, |
| "step": 483 |
| }, |
| { |
| "epoch": 3.001545595054096, |
| "grad_norm": 0.41746066648889707, |
| "learning_rate": 3.292116578150298e-05, |
| "loss": 1.632, |
| "step": 484 |
| }, |
| { |
| "epoch": 3.007727975270479, |
| "grad_norm": 0.479690534350614, |
| "learning_rate": 3.2750403897190856e-05, |
| "loss": 0.8917, |
| "step": 485 |
| }, |
| { |
| "epoch": 3.0139103554868623, |
| "grad_norm": 0.37954910983497075, |
| "learning_rate": 3.2579778513850405e-05, |
| "loss": 0.8802, |
| "step": 486 |
| }, |
| { |
| "epoch": 3.020092735703246, |
| "grad_norm": 0.43731886585463775, |
| "learning_rate": 3.240929284414762e-05, |
| "loss": 0.9013, |
| "step": 487 |
| }, |
| { |
| "epoch": 3.026275115919629, |
| "grad_norm": 0.3851011426541529, |
| "learning_rate": 3.223895009811777e-05, |
| "loss": 0.876, |
| "step": 488 |
| }, |
| { |
| "epoch": 3.0324574961360122, |
| "grad_norm": 0.321671225557479, |
| "learning_rate": 3.2068753483105165e-05, |
| "loss": 0.8888, |
| "step": 489 |
| }, |
| { |
| "epoch": 3.038639876352396, |
| "grad_norm": 0.4076120056603458, |
| "learning_rate": 3.189870620370256e-05, |
| "loss": 0.9015, |
| "step": 490 |
| }, |
| { |
| "epoch": 3.044822256568779, |
| "grad_norm": 0.32597913024373476, |
| "learning_rate": 3.1728811461690904e-05, |
| "loss": 0.8858, |
| "step": 491 |
| }, |
| { |
| "epoch": 3.051004636785162, |
| "grad_norm": 0.39721200533205675, |
| "learning_rate": 3.155907245597912e-05, |
| "loss": 0.8971, |
| "step": 492 |
| }, |
| { |
| "epoch": 3.0571870170015454, |
| "grad_norm": 0.33689618227170504, |
| "learning_rate": 3.138949238254375e-05, |
| "loss": 0.8913, |
| "step": 493 |
| }, |
| { |
| "epoch": 3.063369397217929, |
| "grad_norm": 0.31095628858697094, |
| "learning_rate": 3.122007443436885e-05, |
| "loss": 0.8996, |
| "step": 494 |
| }, |
| { |
| "epoch": 3.069551777434312, |
| "grad_norm": 0.4028486236419519, |
| "learning_rate": 3.105082180138585e-05, |
| "loss": 0.8889, |
| "step": 495 |
| }, |
| { |
| "epoch": 3.0757341576506954, |
| "grad_norm": 0.484359324518245, |
| "learning_rate": 3.0881737670413534e-05, |
| "loss": 0.8964, |
| "step": 496 |
| }, |
| { |
| "epoch": 3.081916537867079, |
| "grad_norm": 0.38584852659317825, |
| "learning_rate": 3.071282522509798e-05, |
| "loss": 0.8794, |
| "step": 497 |
| }, |
| { |
| "epoch": 3.088098918083462, |
| "grad_norm": 0.3397498441090358, |
| "learning_rate": 3.054408764585261e-05, |
| "loss": 0.8957, |
| "step": 498 |
| }, |
| { |
| "epoch": 3.0942812982998453, |
| "grad_norm": 0.31333063472846795, |
| "learning_rate": 3.037552810979836e-05, |
| "loss": 0.8786, |
| "step": 499 |
| }, |
| { |
| "epoch": 3.100463678516229, |
| "grad_norm": 0.3649292765310615, |
| "learning_rate": 3.020714979070385e-05, |
| "loss": 0.8884, |
| "step": 500 |
| }, |
| { |
| "epoch": 3.106646058732612, |
| "grad_norm": 0.4200346160735661, |
| "learning_rate": 3.0038955858925556e-05, |
| "loss": 0.8846, |
| "step": 501 |
| }, |
| { |
| "epoch": 3.1128284389489953, |
| "grad_norm": 0.28488029439459195, |
| "learning_rate": 2.9870949481348214e-05, |
| "loss": 0.8942, |
| "step": 502 |
| }, |
| { |
| "epoch": 3.1190108191653785, |
| "grad_norm": 0.2697608832955852, |
| "learning_rate": 2.9703133821325125e-05, |
| "loss": 0.8915, |
| "step": 503 |
| }, |
| { |
| "epoch": 3.125193199381762, |
| "grad_norm": 0.3357046563951315, |
| "learning_rate": 2.953551203861863e-05, |
| "loss": 0.8885, |
| "step": 504 |
| }, |
| { |
| "epoch": 3.1313755795981453, |
| "grad_norm": 0.3200692987355479, |
| "learning_rate": 2.9368087289340575e-05, |
| "loss": 0.8844, |
| "step": 505 |
| }, |
| { |
| "epoch": 3.1375579598145285, |
| "grad_norm": 0.24036855461462353, |
| "learning_rate": 2.9200862725892897e-05, |
| "loss": 0.8929, |
| "step": 506 |
| }, |
| { |
| "epoch": 3.143740340030912, |
| "grad_norm": 0.238395091168019, |
| "learning_rate": 2.903384149690834e-05, |
| "loss": 0.8874, |
| "step": 507 |
| }, |
| { |
| "epoch": 3.1499227202472952, |
| "grad_norm": 0.2401017173847325, |
| "learning_rate": 2.8867026747191054e-05, |
| "loss": 0.8876, |
| "step": 508 |
| }, |
| { |
| "epoch": 3.1561051004636784, |
| "grad_norm": 0.2799086754124938, |
| "learning_rate": 2.8700421617657446e-05, |
| "loss": 0.8958, |
| "step": 509 |
| }, |
| { |
| "epoch": 3.162287480680062, |
| "grad_norm": 0.2780648545074759, |
| "learning_rate": 2.853402924527708e-05, |
| "loss": 0.8952, |
| "step": 510 |
| }, |
| { |
| "epoch": 3.1684698608964452, |
| "grad_norm": 0.23416871362717123, |
| "learning_rate": 2.8367852763013508e-05, |
| "loss": 0.8783, |
| "step": 511 |
| }, |
| { |
| "epoch": 3.1746522411128284, |
| "grad_norm": 0.19517664014067915, |
| "learning_rate": 2.8201895299765358e-05, |
| "loss": 0.8885, |
| "step": 512 |
| }, |
| { |
| "epoch": 3.1808346213292116, |
| "grad_norm": 0.24055029383355403, |
| "learning_rate": 2.803615998030738e-05, |
| "loss": 0.8781, |
| "step": 513 |
| }, |
| { |
| "epoch": 3.187017001545595, |
| "grad_norm": 0.21251629293077243, |
| "learning_rate": 2.787064992523169e-05, |
| "loss": 0.896, |
| "step": 514 |
| }, |
| { |
| "epoch": 3.1931993817619784, |
| "grad_norm": 0.2205317155524029, |
| "learning_rate": 2.7705368250888892e-05, |
| "loss": 0.8977, |
| "step": 515 |
| }, |
| { |
| "epoch": 3.1993817619783615, |
| "grad_norm": 0.2710843453756844, |
| "learning_rate": 2.7540318069329468e-05, |
| "loss": 0.8809, |
| "step": 516 |
| }, |
| { |
| "epoch": 3.205564142194745, |
| "grad_norm": 0.2134694304897921, |
| "learning_rate": 2.7375502488245218e-05, |
| "loss": 0.8852, |
| "step": 517 |
| }, |
| { |
| "epoch": 3.2117465224111283, |
| "grad_norm": 0.21525338899378574, |
| "learning_rate": 2.7210924610910658e-05, |
| "loss": 0.8843, |
| "step": 518 |
| }, |
| { |
| "epoch": 3.2179289026275115, |
| "grad_norm": 0.2257874439917892, |
| "learning_rate": 2.7046587536124656e-05, |
| "loss": 0.8912, |
| "step": 519 |
| }, |
| { |
| "epoch": 3.2241112828438947, |
| "grad_norm": 0.2146891709570046, |
| "learning_rate": 2.6882494358152055e-05, |
| "loss": 0.8902, |
| "step": 520 |
| }, |
| { |
| "epoch": 3.2302936630602783, |
| "grad_norm": 0.19521290168739072, |
| "learning_rate": 2.6718648166665445e-05, |
| "loss": 0.8931, |
| "step": 521 |
| }, |
| { |
| "epoch": 3.2364760432766615, |
| "grad_norm": 0.2375023715085163, |
| "learning_rate": 2.6555052046686943e-05, |
| "loss": 0.8799, |
| "step": 522 |
| }, |
| { |
| "epoch": 3.2426584234930447, |
| "grad_norm": 0.2440992341795168, |
| "learning_rate": 2.6391709078530106e-05, |
| "loss": 0.9015, |
| "step": 523 |
| }, |
| { |
| "epoch": 3.2488408037094283, |
| "grad_norm": 0.2660799604690821, |
| "learning_rate": 2.6228622337742016e-05, |
| "loss": 0.8869, |
| "step": 524 |
| }, |
| { |
| "epoch": 3.2550231839258115, |
| "grad_norm": 0.1898820986247372, |
| "learning_rate": 2.606579489504526e-05, |
| "loss": 0.8921, |
| "step": 525 |
| }, |
| { |
| "epoch": 3.2612055641421946, |
| "grad_norm": 0.21206019174841687, |
| "learning_rate": 2.590322981628016e-05, |
| "loss": 0.8969, |
| "step": 526 |
| }, |
| { |
| "epoch": 3.2673879443585783, |
| "grad_norm": 0.21516060193400413, |
| "learning_rate": 2.5740930162347053e-05, |
| "loss": 0.8839, |
| "step": 527 |
| }, |
| { |
| "epoch": 3.2735703245749614, |
| "grad_norm": 0.19044808648502842, |
| "learning_rate": 2.5578898989148692e-05, |
| "loss": 0.8927, |
| "step": 528 |
| }, |
| { |
| "epoch": 3.2797527047913446, |
| "grad_norm": 0.22486834698093985, |
| "learning_rate": 2.5417139347532625e-05, |
| "loss": 0.8889, |
| "step": 529 |
| }, |
| { |
| "epoch": 3.285935085007728, |
| "grad_norm": 0.19216048198447702, |
| "learning_rate": 2.525565428323379e-05, |
| "loss": 0.8893, |
| "step": 530 |
| }, |
| { |
| "epoch": 3.2921174652241114, |
| "grad_norm": 0.20965862072042166, |
| "learning_rate": 2.509444683681722e-05, |
| "loss": 0.8989, |
| "step": 531 |
| }, |
| { |
| "epoch": 3.2982998454404946, |
| "grad_norm": 0.1815566372511265, |
| "learning_rate": 2.49335200436207e-05, |
| "loss": 0.8795, |
| "step": 532 |
| }, |
| { |
| "epoch": 3.3044822256568778, |
| "grad_norm": 0.23072409231554872, |
| "learning_rate": 2.4772876933697658e-05, |
| "loss": 0.909, |
| "step": 533 |
| }, |
| { |
| "epoch": 3.3106646058732614, |
| "grad_norm": 0.2299841285498698, |
| "learning_rate": 2.461252053176015e-05, |
| "loss": 0.8966, |
| "step": 534 |
| }, |
| { |
| "epoch": 3.3168469860896446, |
| "grad_norm": 0.23678203939185868, |
| "learning_rate": 2.4452453857121846e-05, |
| "loss": 0.904, |
| "step": 535 |
| }, |
| { |
| "epoch": 3.3230293663060277, |
| "grad_norm": 0.23345278141855083, |
| "learning_rate": 2.429267992364123e-05, |
| "loss": 0.8709, |
| "step": 536 |
| }, |
| { |
| "epoch": 3.329211746522411, |
| "grad_norm": 0.21351104494670123, |
| "learning_rate": 2.413320173966481e-05, |
| "loss": 0.886, |
| "step": 537 |
| }, |
| { |
| "epoch": 3.3353941267387945, |
| "grad_norm": 0.24843505340058486, |
| "learning_rate": 2.3974022307970488e-05, |
| "loss": 0.8994, |
| "step": 538 |
| }, |
| { |
| "epoch": 3.3415765069551777, |
| "grad_norm": 0.2144602081955269, |
| "learning_rate": 2.3815144625711063e-05, |
| "loss": 0.8928, |
| "step": 539 |
| }, |
| { |
| "epoch": 3.347758887171561, |
| "grad_norm": 0.214440559424067, |
| "learning_rate": 2.3656571684357736e-05, |
| "loss": 0.8952, |
| "step": 540 |
| }, |
| { |
| "epoch": 3.3539412673879445, |
| "grad_norm": 0.1870653320452973, |
| "learning_rate": 2.34983064696438e-05, |
| "loss": 0.8789, |
| "step": 541 |
| }, |
| { |
| "epoch": 3.3601236476043277, |
| "grad_norm": 0.1816470397576087, |
| "learning_rate": 2.3340351961508495e-05, |
| "loss": 0.8904, |
| "step": 542 |
| }, |
| { |
| "epoch": 3.366306027820711, |
| "grad_norm": 0.23629374681914123, |
| "learning_rate": 2.318271113404075e-05, |
| "loss": 0.8853, |
| "step": 543 |
| }, |
| { |
| "epoch": 3.3724884080370945, |
| "grad_norm": 0.2136915907110259, |
| "learning_rate": 2.3025386955423342e-05, |
| "loss": 0.9049, |
| "step": 544 |
| }, |
| { |
| "epoch": 3.3786707882534777, |
| "grad_norm": 0.22758131014283642, |
| "learning_rate": 2.2868382387876905e-05, |
| "loss": 0.8876, |
| "step": 545 |
| }, |
| { |
| "epoch": 3.384853168469861, |
| "grad_norm": 0.2365037683934222, |
| "learning_rate": 2.2711700387604223e-05, |
| "loss": 0.9025, |
| "step": 546 |
| }, |
| { |
| "epoch": 3.391035548686244, |
| "grad_norm": 0.21359052590913488, |
| "learning_rate": 2.255534390473451e-05, |
| "loss": 0.8939, |
| "step": 547 |
| }, |
| { |
| "epoch": 3.3972179289026276, |
| "grad_norm": 0.21656702849393653, |
| "learning_rate": 2.2399315883267853e-05, |
| "loss": 0.8987, |
| "step": 548 |
| }, |
| { |
| "epoch": 3.403400309119011, |
| "grad_norm": 0.225661122723241, |
| "learning_rate": 2.2243619261019918e-05, |
| "loss": 0.8893, |
| "step": 549 |
| }, |
| { |
| "epoch": 3.409582689335394, |
| "grad_norm": 0.18874144934647666, |
| "learning_rate": 2.2088256969566413e-05, |
| "loss": 0.9041, |
| "step": 550 |
| }, |
| { |
| "epoch": 3.4157650695517776, |
| "grad_norm": 0.28668375357552045, |
| "learning_rate": 2.1933231934188104e-05, |
| "loss": 0.8855, |
| "step": 551 |
| }, |
| { |
| "epoch": 3.421947449768161, |
| "grad_norm": 0.2531027429361992, |
| "learning_rate": 2.1778547073815554e-05, |
| "loss": 0.8954, |
| "step": 552 |
| }, |
| { |
| "epoch": 3.428129829984544, |
| "grad_norm": 0.21767635407476482, |
| "learning_rate": 2.1624205300974346e-05, |
| "loss": 0.8951, |
| "step": 553 |
| }, |
| { |
| "epoch": 3.434312210200927, |
| "grad_norm": 0.18527077734942415, |
| "learning_rate": 2.1470209521730064e-05, |
| "loss": 0.891, |
| "step": 554 |
| }, |
| { |
| "epoch": 3.4404945904173108, |
| "grad_norm": 0.26428285410878966, |
| "learning_rate": 2.131656263563369e-05, |
| "loss": 0.9102, |
| "step": 555 |
| }, |
| { |
| "epoch": 3.446676970633694, |
| "grad_norm": 0.17077454909439538, |
| "learning_rate": 2.1163267535666994e-05, |
| "loss": 0.9022, |
| "step": 556 |
| }, |
| { |
| "epoch": 3.452859350850077, |
| "grad_norm": 0.2136026248917033, |
| "learning_rate": 2.1010327108188037e-05, |
| "loss": 0.8797, |
| "step": 557 |
| }, |
| { |
| "epoch": 3.4590417310664607, |
| "grad_norm": 0.23170046494717933, |
| "learning_rate": 2.0857744232876797e-05, |
| "loss": 0.9014, |
| "step": 558 |
| }, |
| { |
| "epoch": 3.465224111282844, |
| "grad_norm": 0.18466452017435814, |
| "learning_rate": 2.070552178268102e-05, |
| "loss": 0.893, |
| "step": 559 |
| }, |
| { |
| "epoch": 3.471406491499227, |
| "grad_norm": 0.2938322911655651, |
| "learning_rate": 2.0553662623762105e-05, |
| "loss": 0.8872, |
| "step": 560 |
| }, |
| { |
| "epoch": 3.4775888717156107, |
| "grad_norm": 0.28795550547667914, |
| "learning_rate": 2.040216961544109e-05, |
| "loss": 0.8975, |
| "step": 561 |
| }, |
| { |
| "epoch": 3.483771251931994, |
| "grad_norm": 0.22847154183739538, |
| "learning_rate": 2.0251045610144865e-05, |
| "loss": 0.8918, |
| "step": 562 |
| }, |
| { |
| "epoch": 3.489953632148377, |
| "grad_norm": 0.28080426895854366, |
| "learning_rate": 2.0100293453352446e-05, |
| "loss": 0.8703, |
| "step": 563 |
| }, |
| { |
| "epoch": 3.4961360123647607, |
| "grad_norm": 0.23972543857227632, |
| "learning_rate": 1.9949915983541405e-05, |
| "loss": 0.8794, |
| "step": 564 |
| }, |
| { |
| "epoch": 3.502318392581144, |
| "grad_norm": 0.2159429902592839, |
| "learning_rate": 1.9799916032134384e-05, |
| "loss": 0.8844, |
| "step": 565 |
| }, |
| { |
| "epoch": 3.508500772797527, |
| "grad_norm": 0.2126223807781731, |
| "learning_rate": 1.9650296423445865e-05, |
| "loss": 0.8934, |
| "step": 566 |
| }, |
| { |
| "epoch": 3.51468315301391, |
| "grad_norm": 0.25136395675418194, |
| "learning_rate": 1.9501059974628923e-05, |
| "loss": 0.8951, |
| "step": 567 |
| }, |
| { |
| "epoch": 3.5208655332302934, |
| "grad_norm": 0.2422901671285048, |
| "learning_rate": 1.9352209495622215e-05, |
| "loss": 0.8931, |
| "step": 568 |
| }, |
| { |
| "epoch": 3.527047913446677, |
| "grad_norm": 0.22399150155898304, |
| "learning_rate": 1.920374778909702e-05, |
| "loss": 0.894, |
| "step": 569 |
| }, |
| { |
| "epoch": 3.53323029366306, |
| "grad_norm": 0.17997096245249658, |
| "learning_rate": 1.9055677650404598e-05, |
| "loss": 0.8854, |
| "step": 570 |
| }, |
| { |
| "epoch": 3.5394126738794434, |
| "grad_norm": 0.20091228641905334, |
| "learning_rate": 1.890800186752336e-05, |
| "loss": 0.8912, |
| "step": 571 |
| }, |
| { |
| "epoch": 3.545595054095827, |
| "grad_norm": 0.1713636230002705, |
| "learning_rate": 1.8760723221006577e-05, |
| "loss": 0.8984, |
| "step": 572 |
| }, |
| { |
| "epoch": 3.55177743431221, |
| "grad_norm": 0.22444993402898378, |
| "learning_rate": 1.861384448392984e-05, |
| "loss": 0.8957, |
| "step": 573 |
| }, |
| { |
| "epoch": 3.5579598145285933, |
| "grad_norm": 0.1622926494483365, |
| "learning_rate": 1.8467368421839045e-05, |
| "loss": 0.8882, |
| "step": 574 |
| }, |
| { |
| "epoch": 3.564142194744977, |
| "grad_norm": 0.22561783403087973, |
| "learning_rate": 1.832129779269811e-05, |
| "loss": 0.8837, |
| "step": 575 |
| }, |
| { |
| "epoch": 3.57032457496136, |
| "grad_norm": 0.1776156141854532, |
| "learning_rate": 1.817563534683722e-05, |
| "loss": 0.8705, |
| "step": 576 |
| }, |
| { |
| "epoch": 3.5765069551777433, |
| "grad_norm": 0.1905281137121667, |
| "learning_rate": 1.8030383826900935e-05, |
| "loss": 0.8814, |
| "step": 577 |
| }, |
| { |
| "epoch": 3.582689335394127, |
| "grad_norm": 0.2044502851986332, |
| "learning_rate": 1.7885545967796606e-05, |
| "loss": 0.8981, |
| "step": 578 |
| }, |
| { |
| "epoch": 3.58887171561051, |
| "grad_norm": 0.1784191323277939, |
| "learning_rate": 1.7741124496642862e-05, |
| "loss": 0.8901, |
| "step": 579 |
| }, |
| { |
| "epoch": 3.5950540958268933, |
| "grad_norm": 0.18594801839576822, |
| "learning_rate": 1.7597122132718202e-05, |
| "loss": 0.8666, |
| "step": 580 |
| }, |
| { |
| "epoch": 3.601236476043277, |
| "grad_norm": 0.18365311459244033, |
| "learning_rate": 1.7453541587409963e-05, |
| "loss": 0.8936, |
| "step": 581 |
| }, |
| { |
| "epoch": 3.60741885625966, |
| "grad_norm": 0.20713153169279955, |
| "learning_rate": 1.731038556416304e-05, |
| "loss": 0.8898, |
| "step": 582 |
| }, |
| { |
| "epoch": 3.6136012364760433, |
| "grad_norm": 0.20905582740873777, |
| "learning_rate": 1.716765675842919e-05, |
| "loss": 0.8905, |
| "step": 583 |
| }, |
| { |
| "epoch": 3.6197836166924264, |
| "grad_norm": 0.18415135918656184, |
| "learning_rate": 1.70253578576161e-05, |
| "loss": 0.8945, |
| "step": 584 |
| }, |
| { |
| "epoch": 3.62596599690881, |
| "grad_norm": 0.2042255922116097, |
| "learning_rate": 1.688349154103699e-05, |
| "loss": 0.8839, |
| "step": 585 |
| }, |
| { |
| "epoch": 3.6321483771251932, |
| "grad_norm": 0.17189580999683968, |
| "learning_rate": 1.6742060479859944e-05, |
| "loss": 0.8747, |
| "step": 586 |
| }, |
| { |
| "epoch": 3.6383307573415764, |
| "grad_norm": 0.21696578872996222, |
| "learning_rate": 1.6601067337057783e-05, |
| "loss": 0.8863, |
| "step": 587 |
| }, |
| { |
| "epoch": 3.6445131375579596, |
| "grad_norm": 0.20566434805583705, |
| "learning_rate": 1.6460514767357855e-05, |
| "loss": 0.8978, |
| "step": 588 |
| }, |
| { |
| "epoch": 3.650695517774343, |
| "grad_norm": 0.212105435407208, |
| "learning_rate": 1.6320405417192066e-05, |
| "loss": 0.8796, |
| "step": 589 |
| }, |
| { |
| "epoch": 3.6568778979907264, |
| "grad_norm": 0.21704798077180454, |
| "learning_rate": 1.618074192464699e-05, |
| "loss": 0.8962, |
| "step": 590 |
| }, |
| { |
| "epoch": 3.6630602782071096, |
| "grad_norm": 0.26219775112073423, |
| "learning_rate": 1.6041526919414318e-05, |
| "loss": 0.8965, |
| "step": 591 |
| }, |
| { |
| "epoch": 3.669242658423493, |
| "grad_norm": 0.18361447307055298, |
| "learning_rate": 1.5902763022741247e-05, |
| "loss": 0.8912, |
| "step": 592 |
| }, |
| { |
| "epoch": 3.6754250386398764, |
| "grad_norm": 0.2767924709588361, |
| "learning_rate": 1.576445284738116e-05, |
| "loss": 0.8974, |
| "step": 593 |
| }, |
| { |
| "epoch": 3.6816074188562595, |
| "grad_norm": 0.21044903902705353, |
| "learning_rate": 1.5626598997544422e-05, |
| "loss": 0.8932, |
| "step": 594 |
| }, |
| { |
| "epoch": 3.687789799072643, |
| "grad_norm": 0.22141335816275728, |
| "learning_rate": 1.548920406884935e-05, |
| "loss": 0.8966, |
| "step": 595 |
| }, |
| { |
| "epoch": 3.6939721792890263, |
| "grad_norm": 0.19199822889292517, |
| "learning_rate": 1.5352270648273363e-05, |
| "loss": 0.8927, |
| "step": 596 |
| }, |
| { |
| "epoch": 3.7001545595054095, |
| "grad_norm": 0.2256809667548217, |
| "learning_rate": 1.5215801314104202e-05, |
| "loss": 0.8917, |
| "step": 597 |
| }, |
| { |
| "epoch": 3.706336939721793, |
| "grad_norm": 0.23515679935616451, |
| "learning_rate": 1.5079798635891468e-05, |
| "loss": 0.8907, |
| "step": 598 |
| }, |
| { |
| "epoch": 3.7125193199381763, |
| "grad_norm": 0.1640102854982691, |
| "learning_rate": 1.4944265174398203e-05, |
| "loss": 0.9016, |
| "step": 599 |
| }, |
| { |
| "epoch": 3.7187017001545595, |
| "grad_norm": 0.19789240249181025, |
| "learning_rate": 1.4809203481552681e-05, |
| "loss": 0.8811, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.7248840803709427, |
| "grad_norm": 0.17774590981378566, |
| "learning_rate": 1.4674616100400303e-05, |
| "loss": 0.8811, |
| "step": 601 |
| }, |
| { |
| "epoch": 3.7310664605873263, |
| "grad_norm": 0.19736844331190076, |
| "learning_rate": 1.4540505565055858e-05, |
| "loss": 0.8996, |
| "step": 602 |
| }, |
| { |
| "epoch": 3.7372488408037094, |
| "grad_norm": 0.17893905096693596, |
| "learning_rate": 1.4406874400655615e-05, |
| "loss": 0.9037, |
| "step": 603 |
| }, |
| { |
| "epoch": 3.7434312210200926, |
| "grad_norm": 0.19684792739449095, |
| "learning_rate": 1.427372512330996e-05, |
| "loss": 0.8851, |
| "step": 604 |
| }, |
| { |
| "epoch": 3.749613601236476, |
| "grad_norm": 0.2512744015419969, |
| "learning_rate": 1.4141060240055859e-05, |
| "loss": 0.8954, |
| "step": 605 |
| }, |
| { |
| "epoch": 3.7557959814528594, |
| "grad_norm": 0.17423327717089102, |
| "learning_rate": 1.400888224880983e-05, |
| "loss": 0.8808, |
| "step": 606 |
| }, |
| { |
| "epoch": 3.7619783616692426, |
| "grad_norm": 0.1819843673558297, |
| "learning_rate": 1.3877193638320718e-05, |
| "loss": 0.8982, |
| "step": 607 |
| }, |
| { |
| "epoch": 3.7681607418856258, |
| "grad_norm": 0.1693796580052696, |
| "learning_rate": 1.3745996888122992e-05, |
| "loss": 0.8701, |
| "step": 608 |
| }, |
| { |
| "epoch": 3.7743431221020094, |
| "grad_norm": 0.1821658263170253, |
| "learning_rate": 1.361529446848998e-05, |
| "loss": 0.8843, |
| "step": 609 |
| }, |
| { |
| "epoch": 3.7805255023183926, |
| "grad_norm": 0.24650364015406534, |
| "learning_rate": 1.3485088840387369e-05, |
| "loss": 0.8966, |
| "step": 610 |
| }, |
| { |
| "epoch": 3.7867078825347757, |
| "grad_norm": 0.18771305026658164, |
| "learning_rate": 1.3355382455426892e-05, |
| "loss": 0.8846, |
| "step": 611 |
| }, |
| { |
| "epoch": 3.7928902627511594, |
| "grad_norm": 0.18610630863899502, |
| "learning_rate": 1.3226177755820087e-05, |
| "loss": 0.8846, |
| "step": 612 |
| }, |
| { |
| "epoch": 3.7990726429675425, |
| "grad_norm": 0.17549326513432761, |
| "learning_rate": 1.3097477174332482e-05, |
| "loss": 0.8855, |
| "step": 613 |
| }, |
| { |
| "epoch": 3.8052550231839257, |
| "grad_norm": 0.17546252086320724, |
| "learning_rate": 1.296928313423758e-05, |
| "loss": 0.894, |
| "step": 614 |
| }, |
| { |
| "epoch": 3.8114374034003093, |
| "grad_norm": 0.1904786805615447, |
| "learning_rate": 1.2841598049271395e-05, |
| "loss": 0.8847, |
| "step": 615 |
| }, |
| { |
| "epoch": 3.8176197836166925, |
| "grad_norm": 0.16824287711277486, |
| "learning_rate": 1.271442432358688e-05, |
| "loss": 0.882, |
| "step": 616 |
| }, |
| { |
| "epoch": 3.8238021638330757, |
| "grad_norm": 0.15191088188497087, |
| "learning_rate": 1.2587764351708813e-05, |
| "loss": 0.897, |
| "step": 617 |
| }, |
| { |
| "epoch": 3.8299845440494593, |
| "grad_norm": 0.17144785228852993, |
| "learning_rate": 1.2461620518488533e-05, |
| "loss": 0.8947, |
| "step": 618 |
| }, |
| { |
| "epoch": 3.8361669242658425, |
| "grad_norm": 0.1981987256042829, |
| "learning_rate": 1.2335995199059175e-05, |
| "loss": 0.8888, |
| "step": 619 |
| }, |
| { |
| "epoch": 3.8423493044822257, |
| "grad_norm": 0.1664686018609899, |
| "learning_rate": 1.2210890758790877e-05, |
| "loss": 0.8865, |
| "step": 620 |
| }, |
| { |
| "epoch": 3.848531684698609, |
| "grad_norm": 0.17952380029164736, |
| "learning_rate": 1.2086309553246282e-05, |
| "loss": 0.8735, |
| "step": 621 |
| }, |
| { |
| "epoch": 3.854714064914992, |
| "grad_norm": 0.1654528501349644, |
| "learning_rate": 1.1962253928136129e-05, |
| "loss": 0.8975, |
| "step": 622 |
| }, |
| { |
| "epoch": 3.8608964451313756, |
| "grad_norm": 0.19932349446317263, |
| "learning_rate": 1.1838726219275163e-05, |
| "loss": 0.8994, |
| "step": 623 |
| }, |
| { |
| "epoch": 3.867078825347759, |
| "grad_norm": 0.16515103220193778, |
| "learning_rate": 1.1715728752538103e-05, |
| "loss": 0.8795, |
| "step": 624 |
| }, |
| { |
| "epoch": 3.873261205564142, |
| "grad_norm": 0.15802093965187888, |
| "learning_rate": 1.1593263843815854e-05, |
| "loss": 0.8846, |
| "step": 625 |
| }, |
| { |
| "epoch": 3.8794435857805256, |
| "grad_norm": 0.18002633059902745, |
| "learning_rate": 1.1471333798971922e-05, |
| "loss": 0.8813, |
| "step": 626 |
| }, |
| { |
| "epoch": 3.885625965996909, |
| "grad_norm": 0.16043447191960278, |
| "learning_rate": 1.1349940913798978e-05, |
| "loss": 0.88, |
| "step": 627 |
| }, |
| { |
| "epoch": 3.891808346213292, |
| "grad_norm": 0.21327907033825158, |
| "learning_rate": 1.1229087473975641e-05, |
| "loss": 0.8961, |
| "step": 628 |
| }, |
| { |
| "epoch": 3.8979907264296756, |
| "grad_norm": 0.15650552928778205, |
| "learning_rate": 1.1108775755023422e-05, |
| "loss": 0.8748, |
| "step": 629 |
| }, |
| { |
| "epoch": 3.9041731066460588, |
| "grad_norm": 0.18123273261524603, |
| "learning_rate": 1.0989008022263906e-05, |
| "loss": 0.8935, |
| "step": 630 |
| }, |
| { |
| "epoch": 3.910355486862442, |
| "grad_norm": 0.15260920437276415, |
| "learning_rate": 1.08697865307761e-05, |
| "loss": 0.8852, |
| "step": 631 |
| }, |
| { |
| "epoch": 3.9165378670788256, |
| "grad_norm": 0.165453780873266, |
| "learning_rate": 1.0751113525353957e-05, |
| "loss": 0.9049, |
| "step": 632 |
| }, |
| { |
| "epoch": 3.9227202472952087, |
| "grad_norm": 0.13033209339725224, |
| "learning_rate": 1.0632991240464068e-05, |
| "loss": 0.8931, |
| "step": 633 |
| }, |
| { |
| "epoch": 3.928902627511592, |
| "grad_norm": 0.16631041812626945, |
| "learning_rate": 1.0515421900203714e-05, |
| "loss": 0.888, |
| "step": 634 |
| }, |
| { |
| "epoch": 3.9350850077279755, |
| "grad_norm": 0.16953390165836668, |
| "learning_rate": 1.0398407718258836e-05, |
| "loss": 0.8984, |
| "step": 635 |
| }, |
| { |
| "epoch": 3.9412673879443587, |
| "grad_norm": 0.14284684056392535, |
| "learning_rate": 1.028195089786248e-05, |
| "loss": 0.9026, |
| "step": 636 |
| }, |
| { |
| "epoch": 3.947449768160742, |
| "grad_norm": 0.16855244368923294, |
| "learning_rate": 1.0166053631753204e-05, |
| "loss": 0.8875, |
| "step": 637 |
| }, |
| { |
| "epoch": 3.953632148377125, |
| "grad_norm": 0.14590639079902812, |
| "learning_rate": 1.0050718102133916e-05, |
| "loss": 0.8929, |
| "step": 638 |
| }, |
| { |
| "epoch": 3.9598145285935082, |
| "grad_norm": 0.14384779730638278, |
| "learning_rate": 9.935946480630658e-06, |
| "loss": 0.8825, |
| "step": 639 |
| }, |
| { |
| "epoch": 3.965996908809892, |
| "grad_norm": 0.1534372289046035, |
| "learning_rate": 9.82174092825181e-06, |
| "loss": 0.8982, |
| "step": 640 |
| }, |
| { |
| "epoch": 3.972179289026275, |
| "grad_norm": 0.15609090465249761, |
| "learning_rate": 9.708103595347352e-06, |
| "loss": 0.8939, |
| "step": 641 |
| }, |
| { |
| "epoch": 3.978361669242658, |
| "grad_norm": 0.1403869008769113, |
| "learning_rate": 9.595036621568398e-06, |
| "loss": 0.8843, |
| "step": 642 |
| }, |
| { |
| "epoch": 3.984544049459042, |
| "grad_norm": 0.14678540378586016, |
| "learning_rate": 9.482542135826916e-06, |
| "loss": 0.8987, |
| "step": 643 |
| }, |
| { |
| "epoch": 3.990726429675425, |
| "grad_norm": 0.1875416299280276, |
| "learning_rate": 9.370622256255571e-06, |
| "loss": 0.8774, |
| "step": 644 |
| }, |
| { |
| "epoch": 3.996908809891808, |
| "grad_norm": 0.27186472008456647, |
| "learning_rate": 9.259279090167994e-06, |
| "loss": 1.5557, |
| "step": 645 |
| }, |
| { |
| "epoch": 4.006182380216384, |
| "grad_norm": 0.20982320372706692, |
| "learning_rate": 9.148514734018917e-06, |
| "loss": 0.87, |
| "step": 646 |
| }, |
| { |
| "epoch": 4.012364760432766, |
| "grad_norm": 0.17703070468066032, |
| "learning_rate": 9.038331273364869e-06, |
| "loss": 0.8649, |
| "step": 647 |
| }, |
| { |
| "epoch": 4.01854714064915, |
| "grad_norm": 0.16674753883298887, |
| "learning_rate": 8.928730782824755e-06, |
| "loss": 0.8577, |
| "step": 648 |
| }, |
| { |
| "epoch": 4.024729520865534, |
| "grad_norm": 0.16839923962797768, |
| "learning_rate": 8.819715326040965e-06, |
| "loss": 0.8571, |
| "step": 649 |
| }, |
| { |
| "epoch": 4.030911901081916, |
| "grad_norm": 0.220038624506249, |
| "learning_rate": 8.711286955640332e-06, |
| "loss": 0.8609, |
| "step": 650 |
| }, |
| { |
| "epoch": 4.0370942812983, |
| "grad_norm": 0.16790422765840488, |
| "learning_rate": 8.603447713195634e-06, |
| "loss": 0.8566, |
| "step": 651 |
| }, |
| { |
| "epoch": 4.043276661514684, |
| "grad_norm": 0.18920607343960402, |
| "learning_rate": 8.496199629187072e-06, |
| "loss": 0.8608, |
| "step": 652 |
| }, |
| { |
| "epoch": 4.049459041731066, |
| "grad_norm": 0.18186173348032736, |
| "learning_rate": 8.389544722964076e-06, |
| "loss": 0.8675, |
| "step": 653 |
| }, |
| { |
| "epoch": 4.05564142194745, |
| "grad_norm": 0.17348169700628485, |
| "learning_rate": 8.283485002707233e-06, |
| "loss": 0.8689, |
| "step": 654 |
| }, |
| { |
| "epoch": 4.061823802163833, |
| "grad_norm": 0.17798870193571564, |
| "learning_rate": 8.178022465390549e-06, |
| "loss": 0.853, |
| "step": 655 |
| }, |
| { |
| "epoch": 4.068006182380216, |
| "grad_norm": 0.18943450530940084, |
| "learning_rate": 8.073159096743799e-06, |
| "loss": 0.8645, |
| "step": 656 |
| }, |
| { |
| "epoch": 4.0741885625966, |
| "grad_norm": 0.1602196742298431, |
| "learning_rate": 7.968896871215147e-06, |
| "loss": 0.8517, |
| "step": 657 |
| }, |
| { |
| "epoch": 4.080370942812983, |
| "grad_norm": 0.16728654932655415, |
| "learning_rate": 7.865237751933965e-06, |
| "loss": 0.858, |
| "step": 658 |
| }, |
| { |
| "epoch": 4.086553323029366, |
| "grad_norm": 0.17345900037449719, |
| "learning_rate": 7.762183690673887e-06, |
| "loss": 0.8552, |
| "step": 659 |
| }, |
| { |
| "epoch": 4.09273570324575, |
| "grad_norm": 0.16849302168626767, |
| "learning_rate": 7.659736627816064e-06, |
| "loss": 0.8559, |
| "step": 660 |
| }, |
| { |
| "epoch": 4.098918083462133, |
| "grad_norm": 0.15814956982575118, |
| "learning_rate": 7.557898492312561e-06, |
| "loss": 0.8527, |
| "step": 661 |
| }, |
| { |
| "epoch": 4.105100463678516, |
| "grad_norm": 0.15201462004812408, |
| "learning_rate": 7.456671201650145e-06, |
| "loss": 0.8551, |
| "step": 662 |
| }, |
| { |
| "epoch": 4.1112828438949, |
| "grad_norm": 0.13755795678813534, |
| "learning_rate": 7.356056661814106e-06, |
| "loss": 0.8557, |
| "step": 663 |
| }, |
| { |
| "epoch": 4.117465224111283, |
| "grad_norm": 0.15959755820070046, |
| "learning_rate": 7.256056767252402e-06, |
| "loss": 0.8539, |
| "step": 664 |
| }, |
| { |
| "epoch": 4.123647604327666, |
| "grad_norm": 0.1511791942647596, |
| "learning_rate": 7.156673400839933e-06, |
| "loss": 0.853, |
| "step": 665 |
| }, |
| { |
| "epoch": 4.12982998454405, |
| "grad_norm": 0.14354560331309524, |
| "learning_rate": 7.057908433843205e-06, |
| "loss": 0.8464, |
| "step": 666 |
| }, |
| { |
| "epoch": 4.1360123647604325, |
| "grad_norm": 0.1365948998014824, |
| "learning_rate": 6.959763725884956e-06, |
| "loss": 0.86, |
| "step": 667 |
| }, |
| { |
| "epoch": 4.142194744976816, |
| "grad_norm": 0.1745107521168772, |
| "learning_rate": 6.862241124909262e-06, |
| "loss": 0.8559, |
| "step": 668 |
| }, |
| { |
| "epoch": 4.1483771251932, |
| "grad_norm": 0.16007671239764104, |
| "learning_rate": 6.765342467146632e-06, |
| "loss": 0.8547, |
| "step": 669 |
| }, |
| { |
| "epoch": 4.1545595054095825, |
| "grad_norm": 0.14588280210721616, |
| "learning_rate": 6.66906957707957e-06, |
| "loss": 0.8563, |
| "step": 670 |
| }, |
| { |
| "epoch": 4.160741885625966, |
| "grad_norm": 0.13534918455746356, |
| "learning_rate": 6.5734242674080705e-06, |
| "loss": 0.8536, |
| "step": 671 |
| }, |
| { |
| "epoch": 4.166924265842349, |
| "grad_norm": 0.16010646794448433, |
| "learning_rate": 6.478408339015603e-06, |
| "loss": 0.8624, |
| "step": 672 |
| }, |
| { |
| "epoch": 4.1731066460587325, |
| "grad_norm": 0.22075894460772627, |
| "learning_rate": 6.384023580935155e-06, |
| "loss": 0.8588, |
| "step": 673 |
| }, |
| { |
| "epoch": 4.179289026275116, |
| "grad_norm": 0.17179022848077144, |
| "learning_rate": 6.290271770315541e-06, |
| "loss": 0.8497, |
| "step": 674 |
| }, |
| { |
| "epoch": 4.185471406491499, |
| "grad_norm": 0.14586862929974034, |
| "learning_rate": 6.1971546723879864e-06, |
| "loss": 0.859, |
| "step": 675 |
| }, |
| { |
| "epoch": 4.1916537867078825, |
| "grad_norm": 0.1629793349005496, |
| "learning_rate": 6.104674040432797e-06, |
| "loss": 0.8649, |
| "step": 676 |
| }, |
| { |
| "epoch": 4.197836166924266, |
| "grad_norm": 0.15123853435524942, |
| "learning_rate": 6.012831615746484e-06, |
| "loss": 0.8536, |
| "step": 677 |
| }, |
| { |
| "epoch": 4.204018547140649, |
| "grad_norm": 0.14020729925440908, |
| "learning_rate": 5.9216291276088435e-06, |
| "loss": 0.8633, |
| "step": 678 |
| }, |
| { |
| "epoch": 4.210200927357032, |
| "grad_norm": 0.1342216269142737, |
| "learning_rate": 5.831068293250481e-06, |
| "loss": 0.863, |
| "step": 679 |
| }, |
| { |
| "epoch": 4.216383307573416, |
| "grad_norm": 0.16252430886540703, |
| "learning_rate": 5.74115081782042e-06, |
| "loss": 0.8623, |
| "step": 680 |
| }, |
| { |
| "epoch": 4.222565687789799, |
| "grad_norm": 0.16349108022579686, |
| "learning_rate": 5.651878394354091e-06, |
| "loss": 0.8582, |
| "step": 681 |
| }, |
| { |
| "epoch": 4.228748068006182, |
| "grad_norm": 0.14338160366251965, |
| "learning_rate": 5.563252703741322e-06, |
| "loss": 0.8565, |
| "step": 682 |
| }, |
| { |
| "epoch": 4.234930448222566, |
| "grad_norm": 0.12199876601554722, |
| "learning_rate": 5.475275414694787e-06, |
| "loss": 0.8553, |
| "step": 683 |
| }, |
| { |
| "epoch": 4.241112828438949, |
| "grad_norm": 0.13894410880991992, |
| "learning_rate": 5.387948183718563e-06, |
| "loss": 0.8586, |
| "step": 684 |
| }, |
| { |
| "epoch": 4.247295208655332, |
| "grad_norm": 0.14581402965942716, |
| "learning_rate": 5.30127265507693e-06, |
| "loss": 0.8639, |
| "step": 685 |
| }, |
| { |
| "epoch": 4.253477588871716, |
| "grad_norm": 0.12813655780527142, |
| "learning_rate": 5.215250460763397e-06, |
| "loss": 0.845, |
| "step": 686 |
| }, |
| { |
| "epoch": 4.259659969088099, |
| "grad_norm": 0.1303992841804699, |
| "learning_rate": 5.129883220470007e-06, |
| "loss": 0.8509, |
| "step": 687 |
| }, |
| { |
| "epoch": 4.265842349304482, |
| "grad_norm": 0.1334156527459534, |
| "learning_rate": 5.045172541556831e-06, |
| "loss": 0.8522, |
| "step": 688 |
| }, |
| { |
| "epoch": 4.272024729520865, |
| "grad_norm": 0.12398532810622111, |
| "learning_rate": 4.961120019021684e-06, |
| "loss": 0.8647, |
| "step": 689 |
| }, |
| { |
| "epoch": 4.278207109737249, |
| "grad_norm": 0.12909526271671973, |
| "learning_rate": 4.877727235470113e-06, |
| "loss": 0.8586, |
| "step": 690 |
| }, |
| { |
| "epoch": 4.284389489953632, |
| "grad_norm": 0.128229338131557, |
| "learning_rate": 4.794995761085593e-06, |
| "loss": 0.8606, |
| "step": 691 |
| }, |
| { |
| "epoch": 4.290571870170015, |
| "grad_norm": 0.12394373173993793, |
| "learning_rate": 4.712927153599967e-06, |
| "loss": 0.8561, |
| "step": 692 |
| }, |
| { |
| "epoch": 4.296754250386399, |
| "grad_norm": 0.12226322639491397, |
| "learning_rate": 4.631522958264083e-06, |
| "loss": 0.8526, |
| "step": 693 |
| }, |
| { |
| "epoch": 4.302936630602782, |
| "grad_norm": 0.11913527209349921, |
| "learning_rate": 4.550784707818756e-06, |
| "loss": 0.8556, |
| "step": 694 |
| }, |
| { |
| "epoch": 4.309119010819165, |
| "grad_norm": 0.1459462503792828, |
| "learning_rate": 4.47071392246587e-06, |
| "loss": 0.8616, |
| "step": 695 |
| }, |
| { |
| "epoch": 4.315301391035549, |
| "grad_norm": 0.13037715558952065, |
| "learning_rate": 4.3913121098397675e-06, |
| "loss": 0.8595, |
| "step": 696 |
| }, |
| { |
| "epoch": 4.321483771251932, |
| "grad_norm": 0.11600080031698429, |
| "learning_rate": 4.312580764978825e-06, |
| "loss": 0.8472, |
| "step": 697 |
| }, |
| { |
| "epoch": 4.327666151468315, |
| "grad_norm": 0.12978365860295593, |
| "learning_rate": 4.234521370297398e-06, |
| "loss": 0.8652, |
| "step": 698 |
| }, |
| { |
| "epoch": 4.333848531684699, |
| "grad_norm": 0.13293173492859417, |
| "learning_rate": 4.157135395557786e-06, |
| "loss": 0.862, |
| "step": 699 |
| }, |
| { |
| "epoch": 4.340030911901082, |
| "grad_norm": 0.1159570605469207, |
| "learning_rate": 4.080424297842656e-06, |
| "loss": 0.8626, |
| "step": 700 |
| }, |
| { |
| "epoch": 4.346213292117465, |
| "grad_norm": 0.12754643596592866, |
| "learning_rate": 4.004389521527543e-06, |
| "loss": 0.8599, |
| "step": 701 |
| }, |
| { |
| "epoch": 4.352395672333849, |
| "grad_norm": 0.12442724249604767, |
| "learning_rate": 3.929032498253729e-06, |
| "loss": 0.8643, |
| "step": 702 |
| }, |
| { |
| "epoch": 4.358578052550232, |
| "grad_norm": 0.12301901657925675, |
| "learning_rate": 3.8543546469011904e-06, |
| "loss": 0.8684, |
| "step": 703 |
| }, |
| { |
| "epoch": 4.364760432766615, |
| "grad_norm": 0.11793079664110967, |
| "learning_rate": 3.780357373561958e-06, |
| "loss": 0.8554, |
| "step": 704 |
| }, |
| { |
| "epoch": 4.370942812982999, |
| "grad_norm": 0.11575437385032925, |
| "learning_rate": 3.7070420715136133e-06, |
| "loss": 0.8649, |
| "step": 705 |
| }, |
| { |
| "epoch": 4.377125193199381, |
| "grad_norm": 0.12884152614107805, |
| "learning_rate": 3.634410121193059e-06, |
| "loss": 0.8549, |
| "step": 706 |
| }, |
| { |
| "epoch": 4.383307573415765, |
| "grad_norm": 0.13882711090422645, |
| "learning_rate": 3.562462890170526e-06, |
| "loss": 0.848, |
| "step": 707 |
| }, |
| { |
| "epoch": 4.3894899536321486, |
| "grad_norm": 0.13110201135261, |
| "learning_rate": 3.4912017331238057e-06, |
| "loss": 0.8634, |
| "step": 708 |
| }, |
| { |
| "epoch": 4.395672333848531, |
| "grad_norm": 0.1192918433930348, |
| "learning_rate": 3.420627991812788e-06, |
| "loss": 0.8595, |
| "step": 709 |
| }, |
| { |
| "epoch": 4.401854714064915, |
| "grad_norm": 0.12360082779438417, |
| "learning_rate": 3.3507429950541527e-06, |
| "loss": 0.8526, |
| "step": 710 |
| }, |
| { |
| "epoch": 4.4080370942812985, |
| "grad_norm": 0.12305496739557657, |
| "learning_rate": 3.281548058696373e-06, |
| "loss": 0.8583, |
| "step": 711 |
| }, |
| { |
| "epoch": 4.414219474497681, |
| "grad_norm": 0.11759210587278442, |
| "learning_rate": 3.2130444855949406e-06, |
| "loss": 0.8671, |
| "step": 712 |
| }, |
| { |
| "epoch": 4.420401854714065, |
| "grad_norm": 0.12180525882355446, |
| "learning_rate": 3.145233565587824e-06, |
| "loss": 0.8616, |
| "step": 713 |
| }, |
| { |
| "epoch": 4.4265842349304485, |
| "grad_norm": 0.11198832899126798, |
| "learning_rate": 3.078116575471173e-06, |
| "loss": 0.8566, |
| "step": 714 |
| }, |
| { |
| "epoch": 4.432766615146831, |
| "grad_norm": 0.11314209890394392, |
| "learning_rate": 3.0116947789753028e-06, |
| "loss": 0.8581, |
| "step": 715 |
| }, |
| { |
| "epoch": 4.438948995363215, |
| "grad_norm": 0.11662833912400648, |
| "learning_rate": 2.9459694267408977e-06, |
| "loss": 0.8646, |
| "step": 716 |
| }, |
| { |
| "epoch": 4.4451313755795985, |
| "grad_norm": 0.11975312911303863, |
| "learning_rate": 2.8809417562954435e-06, |
| "loss": 0.8636, |
| "step": 717 |
| }, |
| { |
| "epoch": 4.451313755795981, |
| "grad_norm": 0.11940652053609146, |
| "learning_rate": 2.8166129920299278e-06, |
| "loss": 0.8474, |
| "step": 718 |
| }, |
| { |
| "epoch": 4.457496136012365, |
| "grad_norm": 0.11252115918580587, |
| "learning_rate": 2.752984345175809e-06, |
| "loss": 0.848, |
| "step": 719 |
| }, |
| { |
| "epoch": 4.4636785162287484, |
| "grad_norm": 0.10311476898498252, |
| "learning_rate": 2.690057013782195e-06, |
| "loss": 0.8599, |
| "step": 720 |
| }, |
| { |
| "epoch": 4.469860896445131, |
| "grad_norm": 0.11653845815565568, |
| "learning_rate": 2.6278321826932818e-06, |
| "loss": 0.8585, |
| "step": 721 |
| }, |
| { |
| "epoch": 4.476043276661515, |
| "grad_norm": 0.1098739721478331, |
| "learning_rate": 2.566311023526056e-06, |
| "loss": 0.8704, |
| "step": 722 |
| }, |
| { |
| "epoch": 4.4822256568778975, |
| "grad_norm": 0.11465020117380761, |
| "learning_rate": 2.5054946946482208e-06, |
| "loss": 0.8619, |
| "step": 723 |
| }, |
| { |
| "epoch": 4.488408037094281, |
| "grad_norm": 0.11796720870498818, |
| "learning_rate": 2.445384341156389e-06, |
| "loss": 0.8498, |
| "step": 724 |
| }, |
| { |
| "epoch": 4.494590417310665, |
| "grad_norm": 0.1166183962860216, |
| "learning_rate": 2.3859810948545414e-06, |
| "loss": 0.8567, |
| "step": 725 |
| }, |
| { |
| "epoch": 4.500772797527048, |
| "grad_norm": 0.11682256683745675, |
| "learning_rate": 2.3272860742326798e-06, |
| "loss": 0.8556, |
| "step": 726 |
| }, |
| { |
| "epoch": 4.506955177743431, |
| "grad_norm": 0.11434906715119132, |
| "learning_rate": 2.269300384445812e-06, |
| "loss": 0.8533, |
| "step": 727 |
| }, |
| { |
| "epoch": 4.513137557959815, |
| "grad_norm": 0.10735947434712247, |
| "learning_rate": 2.2120251172931082e-06, |
| "loss": 0.856, |
| "step": 728 |
| }, |
| { |
| "epoch": 4.5193199381761975, |
| "grad_norm": 0.10434949655119075, |
| "learning_rate": 2.15546135119733e-06, |
| "loss": 0.8583, |
| "step": 729 |
| }, |
| { |
| "epoch": 4.525502318392581, |
| "grad_norm": 0.11530305446016027, |
| "learning_rate": 2.0996101511846056e-06, |
| "loss": 0.8522, |
| "step": 730 |
| }, |
| { |
| "epoch": 4.531684698608965, |
| "grad_norm": 0.11525723139251923, |
| "learning_rate": 2.0444725688642685e-06, |
| "loss": 0.8537, |
| "step": 731 |
| }, |
| { |
| "epoch": 4.5378670788253475, |
| "grad_norm": 0.10816720987898426, |
| "learning_rate": 1.9900496424091375e-06, |
| "loss": 0.8552, |
| "step": 732 |
| }, |
| { |
| "epoch": 4.544049459041731, |
| "grad_norm": 0.10384961737476203, |
| "learning_rate": 1.9363423965359195e-06, |
| "loss": 0.8562, |
| "step": 733 |
| }, |
| { |
| "epoch": 4.550231839258115, |
| "grad_norm": 0.11336995410635603, |
| "learning_rate": 1.883351842485972e-06, |
| "loss": 0.8506, |
| "step": 734 |
| }, |
| { |
| "epoch": 4.556414219474497, |
| "grad_norm": 0.1092135826099414, |
| "learning_rate": 1.8310789780061887e-06, |
| "loss": 0.8589, |
| "step": 735 |
| }, |
| { |
| "epoch": 4.562596599690881, |
| "grad_norm": 0.10993015362867578, |
| "learning_rate": 1.7795247873302735e-06, |
| "loss": 0.8568, |
| "step": 736 |
| }, |
| { |
| "epoch": 4.568778979907265, |
| "grad_norm": 0.10666335057459857, |
| "learning_rate": 1.728690241160189e-06, |
| "loss": 0.8549, |
| "step": 737 |
| }, |
| { |
| "epoch": 4.574961360123647, |
| "grad_norm": 0.10922147280384506, |
| "learning_rate": 1.6785762966478715e-06, |
| "loss": 0.8616, |
| "step": 738 |
| }, |
| { |
| "epoch": 4.581143740340031, |
| "grad_norm": 0.10394964664273364, |
| "learning_rate": 1.6291838973772068e-06, |
| "loss": 0.85, |
| "step": 739 |
| }, |
| { |
| "epoch": 4.587326120556414, |
| "grad_norm": 0.10207519688840014, |
| "learning_rate": 1.5805139733462827e-06, |
| "loss": 0.8542, |
| "step": 740 |
| }, |
| { |
| "epoch": 4.593508500772797, |
| "grad_norm": 0.11068267423445127, |
| "learning_rate": 1.532567440949868e-06, |
| "loss": 0.863, |
| "step": 741 |
| }, |
| { |
| "epoch": 4.599690880989181, |
| "grad_norm": 0.10290817052985828, |
| "learning_rate": 1.4853452029621518e-06, |
| "loss": 0.8582, |
| "step": 742 |
| }, |
| { |
| "epoch": 4.605873261205565, |
| "grad_norm": 0.10564697594520893, |
| "learning_rate": 1.4388481485197558e-06, |
| "loss": 0.8666, |
| "step": 743 |
| }, |
| { |
| "epoch": 4.612055641421947, |
| "grad_norm": 0.11348466915609945, |
| "learning_rate": 1.3930771531049847e-06, |
| "loss": 0.8543, |
| "step": 744 |
| }, |
| { |
| "epoch": 4.618238021638331, |
| "grad_norm": 0.10419274079331056, |
| "learning_rate": 1.3480330785293494e-06, |
| "loss": 0.8605, |
| "step": 745 |
| }, |
| { |
| "epoch": 4.624420401854714, |
| "grad_norm": 0.10717824788246437, |
| "learning_rate": 1.3037167729173273e-06, |
| "loss": 0.8602, |
| "step": 746 |
| }, |
| { |
| "epoch": 4.630602782071097, |
| "grad_norm": 0.11005975127499093, |
| "learning_rate": 1.2601290706904102e-06, |
| "loss": 0.8612, |
| "step": 747 |
| }, |
| { |
| "epoch": 4.636785162287481, |
| "grad_norm": 0.11366201823377237, |
| "learning_rate": 1.2172707925513838e-06, |
| "loss": 0.8692, |
| "step": 748 |
| }, |
| { |
| "epoch": 4.642967542503864, |
| "grad_norm": 0.10308899025027428, |
| "learning_rate": 1.1751427454688735e-06, |
| "loss": 0.8739, |
| "step": 749 |
| }, |
| { |
| "epoch": 4.649149922720247, |
| "grad_norm": 0.10280506406459125, |
| "learning_rate": 1.1337457226621518e-06, |
| "loss": 0.8507, |
| "step": 750 |
| }, |
| { |
| "epoch": 4.655332302936631, |
| "grad_norm": 0.10187117757230453, |
| "learning_rate": 1.0930805035862125e-06, |
| "loss": 0.8688, |
| "step": 751 |
| }, |
| { |
| "epoch": 4.661514683153014, |
| "grad_norm": 0.10669750138958087, |
| "learning_rate": 1.0531478539170713e-06, |
| "loss": 0.8576, |
| "step": 752 |
| }, |
| { |
| "epoch": 4.667697063369397, |
| "grad_norm": 0.1101955728854345, |
| "learning_rate": 1.0139485255373826e-06, |
| "loss": 0.8586, |
| "step": 753 |
| }, |
| { |
| "epoch": 4.673879443585781, |
| "grad_norm": 0.1012943029959148, |
| "learning_rate": 9.75483256522236e-07, |
| "loss": 0.8732, |
| "step": 754 |
| }, |
| { |
| "epoch": 4.680061823802164, |
| "grad_norm": 0.10388171869974447, |
| "learning_rate": 9.377527711253198e-07, |
| "loss": 0.8501, |
| "step": 755 |
| }, |
| { |
| "epoch": 4.686244204018547, |
| "grad_norm": 0.10442678790389989, |
| "learning_rate": 9.007577797652245e-07, |
| "loss": 0.8606, |
| "step": 756 |
| }, |
| { |
| "epoch": 4.69242658423493, |
| "grad_norm": 0.11370097685694985, |
| "learning_rate": 8.644989790121072e-07, |
| "loss": 0.8687, |
| "step": 757 |
| }, |
| { |
| "epoch": 4.698608964451314, |
| "grad_norm": 0.10057604736658246, |
| "learning_rate": 8.289770515745599e-07, |
| "loss": 0.8623, |
| "step": 758 |
| }, |
| { |
| "epoch": 4.704791344667697, |
| "grad_norm": 0.10924197746964438, |
| "learning_rate": 7.941926662867528e-07, |
| "loss": 0.8663, |
| "step": 759 |
| }, |
| { |
| "epoch": 4.710973724884081, |
| "grad_norm": 0.10139852002580828, |
| "learning_rate": 7.60146478095849e-07, |
| "loss": 0.8533, |
| "step": 760 |
| }, |
| { |
| "epoch": 4.717156105100464, |
| "grad_norm": 0.10196842257139534, |
| "learning_rate": 7.268391280496589e-07, |
| "loss": 0.8545, |
| "step": 761 |
| }, |
| { |
| "epoch": 4.723338485316847, |
| "grad_norm": 0.09982941082955533, |
| "learning_rate": 6.94271243284601e-07, |
| "loss": 0.8593, |
| "step": 762 |
| }, |
| { |
| "epoch": 4.72952086553323, |
| "grad_norm": 0.1057680391701117, |
| "learning_rate": 6.624434370138532e-07, |
| "loss": 0.8637, |
| "step": 763 |
| }, |
| { |
| "epoch": 4.7357032457496135, |
| "grad_norm": 0.09876242839351415, |
| "learning_rate": 6.313563085158425e-07, |
| "loss": 0.8527, |
| "step": 764 |
| }, |
| { |
| "epoch": 4.741885625965997, |
| "grad_norm": 0.1029622819478206, |
| "learning_rate": 6.010104431229202e-07, |
| "loss": 0.8512, |
| "step": 765 |
| }, |
| { |
| "epoch": 4.74806800618238, |
| "grad_norm": 0.1017976721996803, |
| "learning_rate": 5.714064122103935e-07, |
| "loss": 0.8535, |
| "step": 766 |
| }, |
| { |
| "epoch": 4.7542503863987635, |
| "grad_norm": 0.10123837826010473, |
| "learning_rate": 5.425447731857248e-07, |
| "loss": 0.8563, |
| "step": 767 |
| }, |
| { |
| "epoch": 4.760432766615147, |
| "grad_norm": 0.10371103096875577, |
| "learning_rate": 5.144260694780512e-07, |
| "loss": 0.8558, |
| "step": 768 |
| }, |
| { |
| "epoch": 4.76661514683153, |
| "grad_norm": 0.10142002208178304, |
| "learning_rate": 4.870508305279531e-07, |
| "loss": 0.8574, |
| "step": 769 |
| }, |
| { |
| "epoch": 4.7727975270479135, |
| "grad_norm": 0.10174156389331632, |
| "learning_rate": 4.604195717774973e-07, |
| "loss": 0.8678, |
| "step": 770 |
| }, |
| { |
| "epoch": 4.778979907264297, |
| "grad_norm": 0.10158475219119292, |
| "learning_rate": 4.3453279466049383e-07, |
| "loss": 0.8652, |
| "step": 771 |
| }, |
| { |
| "epoch": 4.78516228748068, |
| "grad_norm": 0.1037484611064643, |
| "learning_rate": 4.0939098659309895e-07, |
| "loss": 0.8489, |
| "step": 772 |
| }, |
| { |
| "epoch": 4.7913446676970635, |
| "grad_norm": 0.10162493935552491, |
| "learning_rate": 3.849946209646138e-07, |
| "loss": 0.87, |
| "step": 773 |
| }, |
| { |
| "epoch": 4.797527047913446, |
| "grad_norm": 0.10099579812699412, |
| "learning_rate": 3.6134415712857587e-07, |
| "loss": 0.861, |
| "step": 774 |
| }, |
| { |
| "epoch": 4.80370942812983, |
| "grad_norm": 0.10320302386148672, |
| "learning_rate": 3.384400403941124e-07, |
| "loss": 0.8506, |
| "step": 775 |
| }, |
| { |
| "epoch": 4.809891808346213, |
| "grad_norm": 0.10386437696890127, |
| "learning_rate": 3.1628270201754743e-07, |
| "loss": 0.8498, |
| "step": 776 |
| }, |
| { |
| "epoch": 4.816074188562597, |
| "grad_norm": 0.09844682031008335, |
| "learning_rate": 2.948725591942925e-07, |
| "loss": 0.8537, |
| "step": 777 |
| }, |
| { |
| "epoch": 4.82225656877898, |
| "grad_norm": 0.10232392208050557, |
| "learning_rate": 2.742100150509819e-07, |
| "loss": 0.8668, |
| "step": 778 |
| }, |
| { |
| "epoch": 4.828438948995363, |
| "grad_norm": 0.09744721925527271, |
| "learning_rate": 2.542954586378921e-07, |
| "loss": 0.8657, |
| "step": 779 |
| }, |
| { |
| "epoch": 4.834621329211746, |
| "grad_norm": 0.10125314166249126, |
| "learning_rate": 2.351292649216097e-07, |
| "loss": 0.8631, |
| "step": 780 |
| }, |
| { |
| "epoch": 4.84080370942813, |
| "grad_norm": 0.09816843886551711, |
| "learning_rate": 2.167117947779751e-07, |
| "loss": 0.8657, |
| "step": 781 |
| }, |
| { |
| "epoch": 4.846986089644513, |
| "grad_norm": 0.10038235663832168, |
| "learning_rate": 1.990433949852788e-07, |
| "loss": 0.8666, |
| "step": 782 |
| }, |
| { |
| "epoch": 4.853168469860896, |
| "grad_norm": 0.09799020116935744, |
| "learning_rate": 1.821243982177423e-07, |
| "loss": 0.8635, |
| "step": 783 |
| }, |
| { |
| "epoch": 4.85935085007728, |
| "grad_norm": 0.10142283764104856, |
| "learning_rate": 1.6595512303925199e-07, |
| "loss": 0.8496, |
| "step": 784 |
| }, |
| { |
| "epoch": 4.865533230293663, |
| "grad_norm": 0.10199837045642687, |
| "learning_rate": 1.5053587389735502e-07, |
| "loss": 0.8764, |
| "step": 785 |
| }, |
| { |
| "epoch": 4.871715610510046, |
| "grad_norm": 0.10337397487378153, |
| "learning_rate": 1.358669411175395e-07, |
| "loss": 0.8548, |
| "step": 786 |
| }, |
| { |
| "epoch": 4.87789799072643, |
| "grad_norm": 0.09793020007211001, |
| "learning_rate": 1.2194860089774995e-07, |
| "loss": 0.8653, |
| "step": 787 |
| }, |
| { |
| "epoch": 4.884080370942813, |
| "grad_norm": 0.09878155037542055, |
| "learning_rate": 1.0878111530320478e-07, |
| "loss": 0.8623, |
| "step": 788 |
| }, |
| { |
| "epoch": 4.890262751159196, |
| "grad_norm": 0.10147729772509538, |
| "learning_rate": 9.636473226144916e-08, |
| "loss": 0.8614, |
| "step": 789 |
| }, |
| { |
| "epoch": 4.89644513137558, |
| "grad_norm": 0.09737787354641762, |
| "learning_rate": 8.469968555769648e-08, |
| "loss": 0.8647, |
| "step": 790 |
| }, |
| { |
| "epoch": 4.902627511591962, |
| "grad_norm": 0.09933111257188475, |
| "learning_rate": 7.37861948304186e-08, |
| "loss": 0.8482, |
| "step": 791 |
| }, |
| { |
| "epoch": 4.908809891808346, |
| "grad_norm": 0.09569900244500511, |
| "learning_rate": 6.362446556720691e-08, |
| "loss": 0.851, |
| "step": 792 |
| }, |
| { |
| "epoch": 4.91499227202473, |
| "grad_norm": 0.1031116966509426, |
| "learning_rate": 5.421468910092209e-08, |
| "loss": 0.8663, |
| "step": 793 |
| }, |
| { |
| "epoch": 4.921174652241113, |
| "grad_norm": 0.11035260250280782, |
| "learning_rate": 4.555704260607474e-08, |
| "loss": 0.8488, |
| "step": 794 |
| }, |
| { |
| "epoch": 4.927357032457496, |
| "grad_norm": 0.09621566038424581, |
| "learning_rate": 3.765168909548589e-08, |
| "loss": 0.8606, |
| "step": 795 |
| }, |
| { |
| "epoch": 4.93353941267388, |
| "grad_norm": 0.10155877681943523, |
| "learning_rate": 3.049877741723606e-08, |
| "loss": 0.8639, |
| "step": 796 |
| }, |
| { |
| "epoch": 4.939721792890262, |
| "grad_norm": 0.10009225600988104, |
| "learning_rate": 2.4098442251849762e-08, |
| "loss": 0.8559, |
| "step": 797 |
| }, |
| { |
| "epoch": 4.945904173106646, |
| "grad_norm": 0.10209104156755659, |
| "learning_rate": 1.8450804109759745e-08, |
| "loss": 0.8421, |
| "step": 798 |
| }, |
| { |
| "epoch": 4.95208655332303, |
| "grad_norm": 0.10570200426208885, |
| "learning_rate": 1.3555969329037688e-08, |
| "loss": 0.8565, |
| "step": 799 |
| }, |
| { |
| "epoch": 4.958268933539412, |
| "grad_norm": 0.09810578254731156, |
| "learning_rate": 9.41403007340025e-09, |
| "loss": 0.8638, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.964451313755796, |
| "grad_norm": 0.1002862128277217, |
| "learning_rate": 6.025064330463792e-09, |
| "loss": 0.8553, |
| "step": 801 |
| }, |
| { |
| "epoch": 4.97063369397218, |
| "grad_norm": 0.09978110120418406, |
| "learning_rate": 3.3891359102877773e-09, |
| "loss": 0.8633, |
| "step": 802 |
| }, |
| { |
| "epoch": 4.976816074188562, |
| "grad_norm": 0.09867622221883632, |
| "learning_rate": 1.506294444153511e-09, |
| "loss": 0.8764, |
| "step": 803 |
| }, |
| { |
| "epoch": 4.982998454404946, |
| "grad_norm": 0.10760151986299173, |
| "learning_rate": 3.7657538364932467e-10, |
| "loss": 0.8589, |
| "step": 804 |
| }, |
| { |
| "epoch": 4.9891808346213296, |
| "grad_norm": 0.10304725530595252, |
| "learning_rate": 0.0, |
| "loss": 0.8693, |
| "step": 805 |
| }, |
| { |
| "epoch": 4.9891808346213296, |
| "step": 805, |
| "total_flos": 1.657473506467224e+19, |
| "train_loss": 0.9660356465333737, |
| "train_runtime": 42578.0212, |
| "train_samples_per_second": 9.723, |
| "train_steps_per_second": 0.019 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 805, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.657473506467224e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|