| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8369601606963508, |
| "eval_steps": 500, |
| "global_step": 2500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00033478406427854036, |
| "grad_norm": 2.5951156616210938, |
| "learning_rate": 5e-06, |
| "loss": 2.0872, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0006695681285570807, |
| "grad_norm": 2.011127471923828, |
| "learning_rate": 1e-05, |
| "loss": 2.0068, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001004352192835621, |
| "grad_norm": 2.058666467666626, |
| "learning_rate": 1.5e-05, |
| "loss": 2.0258, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0013391362571141614, |
| "grad_norm": 2.2120566368103027, |
| "learning_rate": 2e-05, |
| "loss": 2.0142, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0016739203213927017, |
| "grad_norm": 2.370628595352173, |
| "learning_rate": 2.5e-05, |
| "loss": 2.0344, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.002008704385671242, |
| "grad_norm": 1.437334418296814, |
| "learning_rate": 3e-05, |
| "loss": 1.9203, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.002343488449949782, |
| "grad_norm": 1.1889039278030396, |
| "learning_rate": 3.5e-05, |
| "loss": 1.9264, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.002678272514228323, |
| "grad_norm": 1.0925624370574951, |
| "learning_rate": 4e-05, |
| "loss": 1.9148, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.003013056578506863, |
| "grad_norm": 0.5106806755065918, |
| "learning_rate": 4.5e-05, |
| "loss": 1.9098, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0033478406427854034, |
| "grad_norm": 0.506732702255249, |
| "learning_rate": 5e-05, |
| "loss": 1.832, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0036826247070639436, |
| "grad_norm": 0.47460949420928955, |
| "learning_rate": 5.500000000000001e-05, |
| "loss": 1.879, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.004017408771342484, |
| "grad_norm": 0.5833293199539185, |
| "learning_rate": 6e-05, |
| "loss": 1.8777, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.004352192835621024, |
| "grad_norm": 0.4383687376976013, |
| "learning_rate": 6.500000000000001e-05, |
| "loss": 1.8559, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.004686976899899564, |
| "grad_norm": 0.35056746006011963, |
| "learning_rate": 7e-05, |
| "loss": 1.7573, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0050217609641781055, |
| "grad_norm": 0.5545064210891724, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 1.7541, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.005356545028456646, |
| "grad_norm": 0.3440060019493103, |
| "learning_rate": 8e-05, |
| "loss": 1.6876, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.005691329092735186, |
| "grad_norm": 0.36561861634254456, |
| "learning_rate": 8.5e-05, |
| "loss": 1.7454, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.006026113157013726, |
| "grad_norm": 0.282402902841568, |
| "learning_rate": 9e-05, |
| "loss": 1.8184, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0063608972212922665, |
| "grad_norm": 0.1981375813484192, |
| "learning_rate": 9.5e-05, |
| "loss": 1.7448, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.006695681285570807, |
| "grad_norm": 0.16754242777824402, |
| "learning_rate": 0.0001, |
| "loss": 1.7555, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.007030465349849347, |
| "grad_norm": 0.17915141582489014, |
| "learning_rate": 0.0001, |
| "loss": 1.7533, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.007365249414127887, |
| "grad_norm": 0.1990516483783722, |
| "learning_rate": 0.0001, |
| "loss": 1.6819, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0077000334784064275, |
| "grad_norm": 0.20808538794517517, |
| "learning_rate": 0.0001, |
| "loss": 1.7345, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.008034817542684968, |
| "grad_norm": 0.2500799894332886, |
| "learning_rate": 0.0001, |
| "loss": 1.7636, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.008369601606963508, |
| "grad_norm": 0.2141977846622467, |
| "learning_rate": 0.0001, |
| "loss": 1.7475, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.008704385671242048, |
| "grad_norm": 0.2018044888973236, |
| "learning_rate": 0.0001, |
| "loss": 1.6445, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.009039169735520589, |
| "grad_norm": 0.19822722673416138, |
| "learning_rate": 0.0001, |
| "loss": 1.7604, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.009373953799799129, |
| "grad_norm": 0.18675795197486877, |
| "learning_rate": 0.0001, |
| "loss": 1.7743, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.009708737864077669, |
| "grad_norm": 0.16032469272613525, |
| "learning_rate": 0.0001, |
| "loss": 1.7221, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.010043521928356211, |
| "grad_norm": 0.17107701301574707, |
| "learning_rate": 0.0001, |
| "loss": 1.7514, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.010378305992634751, |
| "grad_norm": 0.1589154601097107, |
| "learning_rate": 0.0001, |
| "loss": 1.6738, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.010713090056913292, |
| "grad_norm": 0.13346004486083984, |
| "learning_rate": 0.0001, |
| "loss": 1.6011, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.011047874121191832, |
| "grad_norm": 0.1687479168176651, |
| "learning_rate": 0.0001, |
| "loss": 1.7694, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.011382658185470372, |
| "grad_norm": 0.14785747230052948, |
| "learning_rate": 0.0001, |
| "loss": 1.6836, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.011717442249748912, |
| "grad_norm": 0.13441652059555054, |
| "learning_rate": 0.0001, |
| "loss": 1.7087, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.012052226314027453, |
| "grad_norm": 0.13479024171829224, |
| "learning_rate": 0.0001, |
| "loss": 1.6456, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.012387010378305993, |
| "grad_norm": 0.15816231071949005, |
| "learning_rate": 0.0001, |
| "loss": 1.6643, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.012721794442584533, |
| "grad_norm": 0.12814071774482727, |
| "learning_rate": 0.0001, |
| "loss": 1.6382, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.013056578506863073, |
| "grad_norm": 0.129450261592865, |
| "learning_rate": 0.0001, |
| "loss": 1.7623, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.013391362571141614, |
| "grad_norm": 0.13946504890918732, |
| "learning_rate": 0.0001, |
| "loss": 1.8067, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.013726146635420154, |
| "grad_norm": 0.1161293238401413, |
| "learning_rate": 0.0001, |
| "loss": 1.688, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.014060930699698694, |
| "grad_norm": 0.11296379566192627, |
| "learning_rate": 0.0001, |
| "loss": 1.6035, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.014395714763977234, |
| "grad_norm": 0.12507247924804688, |
| "learning_rate": 0.0001, |
| "loss": 1.7287, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.014730498828255775, |
| "grad_norm": 0.11496929079294205, |
| "learning_rate": 0.0001, |
| "loss": 1.626, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.015065282892534315, |
| "grad_norm": 0.13881774246692657, |
| "learning_rate": 0.0001, |
| "loss": 1.7501, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.015400066956812855, |
| "grad_norm": 0.1255090981721878, |
| "learning_rate": 0.0001, |
| "loss": 1.6952, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.015734851021091397, |
| "grad_norm": 0.11783197522163391, |
| "learning_rate": 0.0001, |
| "loss": 1.6256, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.016069635085369936, |
| "grad_norm": 0.12152993679046631, |
| "learning_rate": 0.0001, |
| "loss": 1.6443, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.016404419149648478, |
| "grad_norm": 0.12172088027000427, |
| "learning_rate": 0.0001, |
| "loss": 1.6927, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.016739203213927016, |
| "grad_norm": 0.13490882515907288, |
| "learning_rate": 0.0001, |
| "loss": 1.7372, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.017073987278205558, |
| "grad_norm": 0.1124483197927475, |
| "learning_rate": 0.0001, |
| "loss": 1.6206, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.017408771342484097, |
| "grad_norm": 0.11569201201200485, |
| "learning_rate": 0.0001, |
| "loss": 1.7156, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.01774355540676264, |
| "grad_norm": 0.12394021451473236, |
| "learning_rate": 0.0001, |
| "loss": 1.6132, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.018078339471041177, |
| "grad_norm": 0.11930014938116074, |
| "learning_rate": 0.0001, |
| "loss": 1.6552, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.01841312353531972, |
| "grad_norm": 0.1183612123131752, |
| "learning_rate": 0.0001, |
| "loss": 1.6953, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.018747907599598258, |
| "grad_norm": 0.11677711457014084, |
| "learning_rate": 0.0001, |
| "loss": 1.6936, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0190826916638768, |
| "grad_norm": 0.12049452215433121, |
| "learning_rate": 0.0001, |
| "loss": 1.6381, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.019417475728155338, |
| "grad_norm": 0.11653623729944229, |
| "learning_rate": 0.0001, |
| "loss": 1.7704, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.01975225979243388, |
| "grad_norm": 0.12089766561985016, |
| "learning_rate": 0.0001, |
| "loss": 1.6819, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.020087043856712422, |
| "grad_norm": 0.12823008000850677, |
| "learning_rate": 0.0001, |
| "loss": 1.7584, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02042182792099096, |
| "grad_norm": 0.12439601868391037, |
| "learning_rate": 0.0001, |
| "loss": 1.6761, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.020756611985269503, |
| "grad_norm": 0.12000609189271927, |
| "learning_rate": 0.0001, |
| "loss": 1.7014, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.02109139604954804, |
| "grad_norm": 0.12034812569618225, |
| "learning_rate": 0.0001, |
| "loss": 1.7128, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.021426180113826583, |
| "grad_norm": 0.11534720659255981, |
| "learning_rate": 0.0001, |
| "loss": 1.694, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.02176096417810512, |
| "grad_norm": 0.11633310467004776, |
| "learning_rate": 0.0001, |
| "loss": 1.6718, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.022095748242383664, |
| "grad_norm": 0.13419900834560394, |
| "learning_rate": 0.0001, |
| "loss": 1.707, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.022430532306662202, |
| "grad_norm": 0.11928509920835495, |
| "learning_rate": 0.0001, |
| "loss": 1.6935, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.022765316370940744, |
| "grad_norm": 0.11948949843645096, |
| "learning_rate": 0.0001, |
| "loss": 1.6304, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.023100100435219283, |
| "grad_norm": 0.12679244577884674, |
| "learning_rate": 0.0001, |
| "loss": 1.6605, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.023434884499497825, |
| "grad_norm": 0.10675504058599472, |
| "learning_rate": 0.0001, |
| "loss": 1.6785, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.023769668563776363, |
| "grad_norm": 0.12108162045478821, |
| "learning_rate": 0.0001, |
| "loss": 1.6695, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.024104452628054905, |
| "grad_norm": 0.11032188683748245, |
| "learning_rate": 0.0001, |
| "loss": 1.7293, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.024439236692333444, |
| "grad_norm": 0.11592775583267212, |
| "learning_rate": 0.0001, |
| "loss": 1.6726, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.024774020756611986, |
| "grad_norm": 0.11566442996263504, |
| "learning_rate": 0.0001, |
| "loss": 1.6396, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.025108804820890524, |
| "grad_norm": 0.11673177778720856, |
| "learning_rate": 0.0001, |
| "loss": 1.6223, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.025443588885169066, |
| "grad_norm": 0.1140669733285904, |
| "learning_rate": 0.0001, |
| "loss": 1.6886, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.025778372949447605, |
| "grad_norm": 0.11448585987091064, |
| "learning_rate": 0.0001, |
| "loss": 1.6765, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.026113157013726147, |
| "grad_norm": 0.11363522708415985, |
| "learning_rate": 0.0001, |
| "loss": 1.6241, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.02644794107800469, |
| "grad_norm": 0.10882357507944107, |
| "learning_rate": 0.0001, |
| "loss": 1.6495, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.026782725142283227, |
| "grad_norm": 0.11577261239290237, |
| "learning_rate": 0.0001, |
| "loss": 1.6941, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02711750920656177, |
| "grad_norm": 0.12674297392368317, |
| "learning_rate": 0.0001, |
| "loss": 1.7615, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.027452293270840308, |
| "grad_norm": 0.11801646649837494, |
| "learning_rate": 0.0001, |
| "loss": 1.6414, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.02778707733511885, |
| "grad_norm": 0.11615725606679916, |
| "learning_rate": 0.0001, |
| "loss": 1.6586, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.028121861399397388, |
| "grad_norm": 0.1159651130437851, |
| "learning_rate": 0.0001, |
| "loss": 1.6371, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.02845664546367593, |
| "grad_norm": 0.12539416551589966, |
| "learning_rate": 0.0001, |
| "loss": 1.7152, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.02879142952795447, |
| "grad_norm": 0.10691766440868378, |
| "learning_rate": 0.0001, |
| "loss": 1.552, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.02912621359223301, |
| "grad_norm": 0.11859432607889175, |
| "learning_rate": 0.0001, |
| "loss": 1.6516, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.02946099765651155, |
| "grad_norm": 0.12362800538539886, |
| "learning_rate": 0.0001, |
| "loss": 1.6944, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.02979578172079009, |
| "grad_norm": 0.12135861068964005, |
| "learning_rate": 0.0001, |
| "loss": 1.6703, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.03013056578506863, |
| "grad_norm": 0.15077495574951172, |
| "learning_rate": 0.0001, |
| "loss": 1.7522, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03046534984934717, |
| "grad_norm": 0.1137770563364029, |
| "learning_rate": 0.0001, |
| "loss": 1.6263, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.03080013391362571, |
| "grad_norm": 0.11616989970207214, |
| "learning_rate": 0.0001, |
| "loss": 1.7166, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.031134917977904252, |
| "grad_norm": 0.14210130274295807, |
| "learning_rate": 0.0001, |
| "loss": 1.7889, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.031469702042182794, |
| "grad_norm": 0.1261507272720337, |
| "learning_rate": 0.0001, |
| "loss": 1.6593, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.03180448610646133, |
| "grad_norm": 0.13197694718837738, |
| "learning_rate": 0.0001, |
| "loss": 1.6182, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.03213927017073987, |
| "grad_norm": 0.11830636113882065, |
| "learning_rate": 0.0001, |
| "loss": 1.6373, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.03247405423501841, |
| "grad_norm": 0.12643662095069885, |
| "learning_rate": 0.0001, |
| "loss": 1.6601, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.032808838299296955, |
| "grad_norm": 0.13787776231765747, |
| "learning_rate": 0.0001, |
| "loss": 1.7496, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.033143622363575494, |
| "grad_norm": 0.1096898540854454, |
| "learning_rate": 0.0001, |
| "loss": 1.5582, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.03347840642785403, |
| "grad_norm": 0.13948234915733337, |
| "learning_rate": 0.0001, |
| "loss": 1.6281, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03381319049213258, |
| "grad_norm": 0.11294490098953247, |
| "learning_rate": 0.0001, |
| "loss": 1.6703, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.034147974556411116, |
| "grad_norm": 0.12141433358192444, |
| "learning_rate": 0.0001, |
| "loss": 1.6553, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.034482758620689655, |
| "grad_norm": 0.13332489132881165, |
| "learning_rate": 0.0001, |
| "loss": 1.6761, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.03481754268496819, |
| "grad_norm": 0.12173039466142654, |
| "learning_rate": 0.0001, |
| "loss": 1.6304, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.03515232674924674, |
| "grad_norm": 0.12168910354375839, |
| "learning_rate": 0.0001, |
| "loss": 1.6396, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.03548711081352528, |
| "grad_norm": 0.1244431585073471, |
| "learning_rate": 0.0001, |
| "loss": 1.6463, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.035821894877803816, |
| "grad_norm": 0.12028734385967255, |
| "learning_rate": 0.0001, |
| "loss": 1.684, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.036156678942082354, |
| "grad_norm": 0.12029126286506653, |
| "learning_rate": 0.0001, |
| "loss": 1.6799, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.0364914630063609, |
| "grad_norm": 0.11806860566139221, |
| "learning_rate": 0.0001, |
| "loss": 1.7245, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.03682624707063944, |
| "grad_norm": 0.12406452000141144, |
| "learning_rate": 0.0001, |
| "loss": 1.6881, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.03716103113491798, |
| "grad_norm": 0.118985615670681, |
| "learning_rate": 0.0001, |
| "loss": 1.6675, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.037495815199196515, |
| "grad_norm": 0.12949040532112122, |
| "learning_rate": 0.0001, |
| "loss": 1.6871, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.03783059926347506, |
| "grad_norm": 0.12375173717737198, |
| "learning_rate": 0.0001, |
| "loss": 1.6234, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.0381653833277536, |
| "grad_norm": 0.11779066920280457, |
| "learning_rate": 0.0001, |
| "loss": 1.7399, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.03850016739203214, |
| "grad_norm": 0.1195269301533699, |
| "learning_rate": 0.0001, |
| "loss": 1.65, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.038834951456310676, |
| "grad_norm": 0.11929327249526978, |
| "learning_rate": 0.0001, |
| "loss": 1.6214, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.03916973552058922, |
| "grad_norm": 0.11532218009233475, |
| "learning_rate": 0.0001, |
| "loss": 1.6395, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.03950451958486776, |
| "grad_norm": 0.11126700043678284, |
| "learning_rate": 0.0001, |
| "loss": 1.622, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.0398393036491463, |
| "grad_norm": 0.1309433877468109, |
| "learning_rate": 0.0001, |
| "loss": 1.5791, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.040174087713424844, |
| "grad_norm": 0.12015924602746964, |
| "learning_rate": 0.0001, |
| "loss": 1.655, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04050887177770338, |
| "grad_norm": 0.12615351378917694, |
| "learning_rate": 0.0001, |
| "loss": 1.6215, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.04084365584198192, |
| "grad_norm": 0.1387631893157959, |
| "learning_rate": 0.0001, |
| "loss": 1.7451, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.04117843990626046, |
| "grad_norm": 0.1166117936372757, |
| "learning_rate": 0.0001, |
| "loss": 1.6537, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.041513223970539005, |
| "grad_norm": 0.1521015763282776, |
| "learning_rate": 0.0001, |
| "loss": 1.6545, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.041848008034817544, |
| "grad_norm": 0.1296280473470688, |
| "learning_rate": 0.0001, |
| "loss": 1.6355, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.04218279209909608, |
| "grad_norm": 0.13189557194709778, |
| "learning_rate": 0.0001, |
| "loss": 1.5868, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.04251757616337462, |
| "grad_norm": 0.1445418745279312, |
| "learning_rate": 0.0001, |
| "loss": 1.7444, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.042852360227653166, |
| "grad_norm": 0.11560577899217606, |
| "learning_rate": 0.0001, |
| "loss": 1.6468, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.043187144291931705, |
| "grad_norm": 0.16312864422798157, |
| "learning_rate": 0.0001, |
| "loss": 1.6734, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.04352192835621024, |
| "grad_norm": 0.1284494251012802, |
| "learning_rate": 0.0001, |
| "loss": 1.6643, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.04385671242048878, |
| "grad_norm": 0.11743518710136414, |
| "learning_rate": 0.0001, |
| "loss": 1.6273, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.04419149648476733, |
| "grad_norm": 0.17127898335456848, |
| "learning_rate": 0.0001, |
| "loss": 1.5955, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.044526280549045866, |
| "grad_norm": 0.1554144024848938, |
| "learning_rate": 0.0001, |
| "loss": 1.7738, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.044861064613324404, |
| "grad_norm": 0.13085848093032837, |
| "learning_rate": 0.0001, |
| "loss": 1.5957, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.04519584867760294, |
| "grad_norm": 0.1883288025856018, |
| "learning_rate": 0.0001, |
| "loss": 1.6159, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.04553063274188149, |
| "grad_norm": 0.11826716363430023, |
| "learning_rate": 0.0001, |
| "loss": 1.6284, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.04586541680616003, |
| "grad_norm": 0.15767724812030792, |
| "learning_rate": 0.0001, |
| "loss": 1.682, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.046200200870438565, |
| "grad_norm": 0.14300817251205444, |
| "learning_rate": 0.0001, |
| "loss": 1.6152, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.04653498493471711, |
| "grad_norm": 0.11646521836519241, |
| "learning_rate": 0.0001, |
| "loss": 1.6343, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.04686976899899565, |
| "grad_norm": 0.12624727189540863, |
| "learning_rate": 0.0001, |
| "loss": 1.6128, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.04720455306327419, |
| "grad_norm": 0.14111122488975525, |
| "learning_rate": 0.0001, |
| "loss": 1.618, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.047539337127552726, |
| "grad_norm": 0.1404058188199997, |
| "learning_rate": 0.0001, |
| "loss": 1.66, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.04787412119183127, |
| "grad_norm": 0.12555940449237823, |
| "learning_rate": 0.0001, |
| "loss": 1.666, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.04820890525610981, |
| "grad_norm": 0.14494475722312927, |
| "learning_rate": 0.0001, |
| "loss": 1.6147, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.04854368932038835, |
| "grad_norm": 0.12508632242679596, |
| "learning_rate": 0.0001, |
| "loss": 1.5765, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.04887847338466689, |
| "grad_norm": 0.11790450662374496, |
| "learning_rate": 0.0001, |
| "loss": 1.7342, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.04921325744894543, |
| "grad_norm": 0.1416400671005249, |
| "learning_rate": 0.0001, |
| "loss": 1.6673, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.04954804151322397, |
| "grad_norm": 0.13537850975990295, |
| "learning_rate": 0.0001, |
| "loss": 1.6328, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.04988282557750251, |
| "grad_norm": 0.12219058722257614, |
| "learning_rate": 0.0001, |
| "loss": 1.6677, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.05021760964178105, |
| "grad_norm": 0.1398639678955078, |
| "learning_rate": 0.0001, |
| "loss": 1.6454, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.050552393706059594, |
| "grad_norm": 0.14572647213935852, |
| "learning_rate": 0.0001, |
| "loss": 1.6094, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.05088717777033813, |
| "grad_norm": 0.10937194526195526, |
| "learning_rate": 0.0001, |
| "loss": 1.5776, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.05122196183461667, |
| "grad_norm": 0.1404120773077011, |
| "learning_rate": 0.0001, |
| "loss": 1.6112, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.05155674589889521, |
| "grad_norm": 0.1480460911989212, |
| "learning_rate": 0.0001, |
| "loss": 1.6196, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.051891529963173755, |
| "grad_norm": 0.10971348732709885, |
| "learning_rate": 0.0001, |
| "loss": 1.5744, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.05222631402745229, |
| "grad_norm": 0.1468382179737091, |
| "learning_rate": 0.0001, |
| "loss": 1.7518, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.05256109809173083, |
| "grad_norm": 0.13429516553878784, |
| "learning_rate": 0.0001, |
| "loss": 1.5812, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.05289588215600938, |
| "grad_norm": 0.11399335414171219, |
| "learning_rate": 0.0001, |
| "loss": 1.6812, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.053230666220287916, |
| "grad_norm": 0.13944409787654877, |
| "learning_rate": 0.0001, |
| "loss": 1.6789, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.053565450284566454, |
| "grad_norm": 0.1390630453824997, |
| "learning_rate": 0.0001, |
| "loss": 1.6368, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.05390023434884499, |
| "grad_norm": 0.1098702922463417, |
| "learning_rate": 0.0001, |
| "loss": 1.5462, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.05423501841312354, |
| "grad_norm": 0.13710471987724304, |
| "learning_rate": 0.0001, |
| "loss": 1.7208, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.05456980247740208, |
| "grad_norm": 0.1283336579799652, |
| "learning_rate": 0.0001, |
| "loss": 1.6648, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.054904586541680615, |
| "grad_norm": 0.11550601571798325, |
| "learning_rate": 0.0001, |
| "loss": 1.7409, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.055239370605959154, |
| "grad_norm": 0.12028289586305618, |
| "learning_rate": 0.0001, |
| "loss": 1.6685, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.0555741546702377, |
| "grad_norm": 0.13237926363945007, |
| "learning_rate": 0.0001, |
| "loss": 1.6639, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.05590893873451624, |
| "grad_norm": 0.11385014653205872, |
| "learning_rate": 0.0001, |
| "loss": 1.6742, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.056243722798794776, |
| "grad_norm": 0.13613030314445496, |
| "learning_rate": 0.0001, |
| "loss": 1.6898, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.056578506863073315, |
| "grad_norm": 0.12617048621177673, |
| "learning_rate": 0.0001, |
| "loss": 1.6239, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.05691329092735186, |
| "grad_norm": 0.11637625098228455, |
| "learning_rate": 0.0001, |
| "loss": 1.6362, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0572480749916304, |
| "grad_norm": 0.13217699527740479, |
| "learning_rate": 0.0001, |
| "loss": 1.6319, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.05758285905590894, |
| "grad_norm": 0.12088079750537872, |
| "learning_rate": 0.0001, |
| "loss": 1.4997, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.057917643120187476, |
| "grad_norm": 0.11359237879514694, |
| "learning_rate": 0.0001, |
| "loss": 1.564, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.05825242718446602, |
| "grad_norm": 0.12509793043136597, |
| "learning_rate": 0.0001, |
| "loss": 1.6855, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.05858721124874456, |
| "grad_norm": 0.1233699694275856, |
| "learning_rate": 0.0001, |
| "loss": 1.665, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0589219953130231, |
| "grad_norm": 0.11172114312648773, |
| "learning_rate": 0.0001, |
| "loss": 1.6242, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.059256779377301644, |
| "grad_norm": 0.12242110818624496, |
| "learning_rate": 0.0001, |
| "loss": 1.6736, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.05959156344158018, |
| "grad_norm": 0.12275474518537521, |
| "learning_rate": 0.0001, |
| "loss": 1.6373, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.05992634750585872, |
| "grad_norm": 0.11666038632392883, |
| "learning_rate": 0.0001, |
| "loss": 1.6957, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.06026113157013726, |
| "grad_norm": 0.1209944486618042, |
| "learning_rate": 0.0001, |
| "loss": 1.618, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.060595915634415805, |
| "grad_norm": 0.12028312683105469, |
| "learning_rate": 0.0001, |
| "loss": 1.6738, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.06093069969869434, |
| "grad_norm": 0.11835712194442749, |
| "learning_rate": 0.0001, |
| "loss": 1.6348, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.06126548376297288, |
| "grad_norm": 0.13166043162345886, |
| "learning_rate": 0.0001, |
| "loss": 1.6064, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.06160026782725142, |
| "grad_norm": 0.1366170346736908, |
| "learning_rate": 0.0001, |
| "loss": 1.674, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.061935051891529966, |
| "grad_norm": 0.12185468524694443, |
| "learning_rate": 0.0001, |
| "loss": 1.5695, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.062269835955808504, |
| "grad_norm": 0.12310407310724258, |
| "learning_rate": 0.0001, |
| "loss": 1.6799, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.06260462002008704, |
| "grad_norm": 0.14412462711334229, |
| "learning_rate": 0.0001, |
| "loss": 1.5855, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.06293940408436559, |
| "grad_norm": 0.11908841878175735, |
| "learning_rate": 0.0001, |
| "loss": 1.5752, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.06327418814864412, |
| "grad_norm": 0.12137061357498169, |
| "learning_rate": 0.0001, |
| "loss": 1.6018, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.06360897221292267, |
| "grad_norm": 0.128020778298378, |
| "learning_rate": 0.0001, |
| "loss": 1.5894, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06394375627720121, |
| "grad_norm": 0.13447493314743042, |
| "learning_rate": 0.0001, |
| "loss": 1.5884, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.06427854034147974, |
| "grad_norm": 0.11885492503643036, |
| "learning_rate": 0.0001, |
| "loss": 1.6245, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.06461332440575829, |
| "grad_norm": 0.13066913187503815, |
| "learning_rate": 0.0001, |
| "loss": 1.6807, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.06494810847003682, |
| "grad_norm": 0.12650778889656067, |
| "learning_rate": 0.0001, |
| "loss": 1.6498, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.06528289253431536, |
| "grad_norm": 0.116504967212677, |
| "learning_rate": 0.0001, |
| "loss": 1.6037, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.06561767659859391, |
| "grad_norm": 0.12200898677110672, |
| "learning_rate": 0.0001, |
| "loss": 1.5816, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.06595246066287244, |
| "grad_norm": 0.13350239396095276, |
| "learning_rate": 0.0001, |
| "loss": 1.6281, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.06628724472715099, |
| "grad_norm": 0.12119137495756149, |
| "learning_rate": 0.0001, |
| "loss": 1.5747, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.06662202879142953, |
| "grad_norm": 0.12292595952749252, |
| "learning_rate": 0.0001, |
| "loss": 1.6294, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.06695681285570806, |
| "grad_norm": 0.14958657324314117, |
| "learning_rate": 0.0001, |
| "loss": 1.7248, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06729159691998661, |
| "grad_norm": 0.1206580251455307, |
| "learning_rate": 0.0001, |
| "loss": 1.647, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.06762638098426516, |
| "grad_norm": 0.13404549658298492, |
| "learning_rate": 0.0001, |
| "loss": 1.6827, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.06796116504854369, |
| "grad_norm": 0.11746184527873993, |
| "learning_rate": 0.0001, |
| "loss": 1.5827, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.06829594911282223, |
| "grad_norm": 0.1220933049917221, |
| "learning_rate": 0.0001, |
| "loss": 1.6209, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.06863073317710076, |
| "grad_norm": 0.1395500898361206, |
| "learning_rate": 0.0001, |
| "loss": 1.6691, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.06896551724137931, |
| "grad_norm": 0.12085775285959244, |
| "learning_rate": 0.0001, |
| "loss": 1.6186, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.06930030130565785, |
| "grad_norm": 0.139579176902771, |
| "learning_rate": 0.0001, |
| "loss": 1.6357, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.06963508536993639, |
| "grad_norm": 0.12011922895908356, |
| "learning_rate": 0.0001, |
| "loss": 1.5418, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.06996986943421493, |
| "grad_norm": 0.11939892917871475, |
| "learning_rate": 0.0001, |
| "loss": 1.5816, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.07030465349849348, |
| "grad_norm": 0.12651924788951874, |
| "learning_rate": 0.0001, |
| "loss": 1.5286, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07063943756277201, |
| "grad_norm": 0.13420534133911133, |
| "learning_rate": 0.0001, |
| "loss": 1.6213, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.07097422162705055, |
| "grad_norm": 0.11868797987699509, |
| "learning_rate": 0.0001, |
| "loss": 1.6367, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.07130900569132909, |
| "grad_norm": 0.11338218301534653, |
| "learning_rate": 0.0001, |
| "loss": 1.517, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.07164378975560763, |
| "grad_norm": 0.14230981469154358, |
| "learning_rate": 0.0001, |
| "loss": 1.6773, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.07197857381988618, |
| "grad_norm": 0.11315491795539856, |
| "learning_rate": 0.0001, |
| "loss": 1.5564, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.07231335788416471, |
| "grad_norm": 0.12009023874998093, |
| "learning_rate": 0.0001, |
| "loss": 1.6317, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.07264814194844325, |
| "grad_norm": 0.1332681030035019, |
| "learning_rate": 0.0001, |
| "loss": 1.6393, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.0729829260127218, |
| "grad_norm": 0.12581905722618103, |
| "learning_rate": 0.0001, |
| "loss": 1.7155, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.07331771007700033, |
| "grad_norm": 0.12259216606616974, |
| "learning_rate": 0.0001, |
| "loss": 1.661, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.07365249414127888, |
| "grad_norm": 0.13090763986110687, |
| "learning_rate": 0.0001, |
| "loss": 1.6692, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07398727820555742, |
| "grad_norm": 0.11311494559049606, |
| "learning_rate": 0.0001, |
| "loss": 1.6653, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.07432206226983595, |
| "grad_norm": 0.1307578831911087, |
| "learning_rate": 0.0001, |
| "loss": 1.5978, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.0746568463341145, |
| "grad_norm": 0.12622885406017303, |
| "learning_rate": 0.0001, |
| "loss": 1.7782, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.07499163039839303, |
| "grad_norm": 0.11902297288179398, |
| "learning_rate": 0.0001, |
| "loss": 1.5689, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.07532641446267158, |
| "grad_norm": 0.11696305125951767, |
| "learning_rate": 0.0001, |
| "loss": 1.6077, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.07566119852695012, |
| "grad_norm": 0.11666855216026306, |
| "learning_rate": 0.0001, |
| "loss": 1.5568, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.07599598259122865, |
| "grad_norm": 0.12056950479745865, |
| "learning_rate": 0.0001, |
| "loss": 1.6829, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.0763307666555072, |
| "grad_norm": 0.11957021802663803, |
| "learning_rate": 0.0001, |
| "loss": 1.7184, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.07666555071978574, |
| "grad_norm": 0.11590487509965897, |
| "learning_rate": 0.0001, |
| "loss": 1.6775, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.07700033478406428, |
| "grad_norm": 0.11034328490495682, |
| "learning_rate": 0.0001, |
| "loss": 1.5773, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.07733511884834282, |
| "grad_norm": 0.12097325176000595, |
| "learning_rate": 0.0001, |
| "loss": 1.5552, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.07766990291262135, |
| "grad_norm": 0.11697199940681458, |
| "learning_rate": 0.0001, |
| "loss": 1.6762, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.0780046869768999, |
| "grad_norm": 0.11488549411296844, |
| "learning_rate": 0.0001, |
| "loss": 1.6219, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.07833947104117844, |
| "grad_norm": 0.12868645787239075, |
| "learning_rate": 0.0001, |
| "loss": 1.6596, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.07867425510545697, |
| "grad_norm": 0.11428504437208176, |
| "learning_rate": 0.0001, |
| "loss": 1.5926, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.07900903916973552, |
| "grad_norm": 0.14550745487213135, |
| "learning_rate": 0.0001, |
| "loss": 1.6773, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.07934382323401407, |
| "grad_norm": 0.11800127476453781, |
| "learning_rate": 0.0001, |
| "loss": 1.7403, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.0796786072982926, |
| "grad_norm": 0.12732075154781342, |
| "learning_rate": 0.0001, |
| "loss": 1.6886, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.08001339136257114, |
| "grad_norm": 0.1188284233212471, |
| "learning_rate": 0.0001, |
| "loss": 1.6552, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.08034817542684969, |
| "grad_norm": 0.12447573244571686, |
| "learning_rate": 0.0001, |
| "loss": 1.668, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08068295949112822, |
| "grad_norm": 0.129620760679245, |
| "learning_rate": 0.0001, |
| "loss": 1.6134, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.08101774355540677, |
| "grad_norm": 0.12539665400981903, |
| "learning_rate": 0.0001, |
| "loss": 1.7069, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.0813525276196853, |
| "grad_norm": 0.13554492592811584, |
| "learning_rate": 0.0001, |
| "loss": 1.6704, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.08168731168396384, |
| "grad_norm": 0.11758473515510559, |
| "learning_rate": 0.0001, |
| "loss": 1.6329, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.08202209574824239, |
| "grad_norm": 0.11309672147035599, |
| "learning_rate": 0.0001, |
| "loss": 1.5836, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.08235687981252092, |
| "grad_norm": 0.12910054624080658, |
| "learning_rate": 0.0001, |
| "loss": 1.6104, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.08269166387679946, |
| "grad_norm": 0.12267620116472244, |
| "learning_rate": 0.0001, |
| "loss": 1.6505, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.08302644794107801, |
| "grad_norm": 0.12700802087783813, |
| "learning_rate": 0.0001, |
| "loss": 1.6474, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.08336123200535654, |
| "grad_norm": 0.13106848299503326, |
| "learning_rate": 0.0001, |
| "loss": 1.7076, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.08369601606963509, |
| "grad_norm": 0.12598051130771637, |
| "learning_rate": 0.0001, |
| "loss": 1.6463, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08403080013391362, |
| "grad_norm": 0.1270611584186554, |
| "learning_rate": 0.0001, |
| "loss": 1.6407, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.08436558419819216, |
| "grad_norm": 0.1215846911072731, |
| "learning_rate": 0.0001, |
| "loss": 1.7082, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.08470036826247071, |
| "grad_norm": 0.11944068968296051, |
| "learning_rate": 0.0001, |
| "loss": 1.6046, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.08503515232674924, |
| "grad_norm": 0.12395983189344406, |
| "learning_rate": 0.0001, |
| "loss": 1.6444, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.08536993639102779, |
| "grad_norm": 0.11616060882806778, |
| "learning_rate": 0.0001, |
| "loss": 1.6514, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.08570472045530633, |
| "grad_norm": 0.1274399757385254, |
| "learning_rate": 0.0001, |
| "loss": 1.6023, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.08603950451958486, |
| "grad_norm": 0.11419884115457535, |
| "learning_rate": 0.0001, |
| "loss": 1.6053, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.08637428858386341, |
| "grad_norm": 0.11922091245651245, |
| "learning_rate": 0.0001, |
| "loss": 1.6771, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.08670907264814195, |
| "grad_norm": 0.12727287411689758, |
| "learning_rate": 0.0001, |
| "loss": 1.5332, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.08704385671242049, |
| "grad_norm": 0.12368068844079971, |
| "learning_rate": 0.0001, |
| "loss": 1.6962, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.08737864077669903, |
| "grad_norm": 0.11546538770198822, |
| "learning_rate": 0.0001, |
| "loss": 1.6239, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.08771342484097756, |
| "grad_norm": 0.13736455142498016, |
| "learning_rate": 0.0001, |
| "loss": 1.7133, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.08804820890525611, |
| "grad_norm": 0.12773726880550385, |
| "learning_rate": 0.0001, |
| "loss": 1.6127, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.08838299296953465, |
| "grad_norm": 0.12833422422409058, |
| "learning_rate": 0.0001, |
| "loss": 1.5803, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.08871777703381319, |
| "grad_norm": 0.13427826762199402, |
| "learning_rate": 0.0001, |
| "loss": 1.5815, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.08905256109809173, |
| "grad_norm": 0.1173439621925354, |
| "learning_rate": 0.0001, |
| "loss": 1.5457, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.08938734516237028, |
| "grad_norm": 0.12156970053911209, |
| "learning_rate": 0.0001, |
| "loss": 1.5969, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.08972212922664881, |
| "grad_norm": 0.15133506059646606, |
| "learning_rate": 0.0001, |
| "loss": 1.6223, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.09005691329092735, |
| "grad_norm": 0.13353589177131653, |
| "learning_rate": 0.0001, |
| "loss": 1.545, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.09039169735520589, |
| "grad_norm": 0.12940257787704468, |
| "learning_rate": 0.0001, |
| "loss": 1.6135, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.09072648141948443, |
| "grad_norm": 0.12897267937660217, |
| "learning_rate": 0.0001, |
| "loss": 1.6413, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.09106126548376298, |
| "grad_norm": 0.12336087226867676, |
| "learning_rate": 0.0001, |
| "loss": 1.702, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.09139604954804151, |
| "grad_norm": 0.11277737468481064, |
| "learning_rate": 0.0001, |
| "loss": 1.5743, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.09173083361232005, |
| "grad_norm": 0.11659134924411774, |
| "learning_rate": 0.0001, |
| "loss": 1.6456, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.0920656176765986, |
| "grad_norm": 0.11736118793487549, |
| "learning_rate": 0.0001, |
| "loss": 1.655, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.09240040174087713, |
| "grad_norm": 0.12133463472127914, |
| "learning_rate": 0.0001, |
| "loss": 1.6771, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.09273518580515568, |
| "grad_norm": 0.11516664177179337, |
| "learning_rate": 0.0001, |
| "loss": 1.5545, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.09306996986943422, |
| "grad_norm": 0.10916180163621902, |
| "learning_rate": 0.0001, |
| "loss": 1.5301, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.09340475393371275, |
| "grad_norm": 0.11232040077447891, |
| "learning_rate": 0.0001, |
| "loss": 1.5489, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.0937395379979913, |
| "grad_norm": 0.12515543401241302, |
| "learning_rate": 0.0001, |
| "loss": 1.6817, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.09407432206226983, |
| "grad_norm": 0.11998307704925537, |
| "learning_rate": 0.0001, |
| "loss": 1.563, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.09440910612654838, |
| "grad_norm": 0.12774354219436646, |
| "learning_rate": 0.0001, |
| "loss": 1.622, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.09474389019082692, |
| "grad_norm": 0.12023581564426422, |
| "learning_rate": 0.0001, |
| "loss": 1.5367, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.09507867425510545, |
| "grad_norm": 0.12877605855464935, |
| "learning_rate": 0.0001, |
| "loss": 1.5806, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.095413458319384, |
| "grad_norm": 0.11994509398937225, |
| "learning_rate": 0.0001, |
| "loss": 1.6017, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.09574824238366254, |
| "grad_norm": 0.12522728741168976, |
| "learning_rate": 0.0001, |
| "loss": 1.6213, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.09608302644794108, |
| "grad_norm": 0.13130401074886322, |
| "learning_rate": 0.0001, |
| "loss": 1.6211, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.09641781051221962, |
| "grad_norm": 0.1242026537656784, |
| "learning_rate": 0.0001, |
| "loss": 1.6428, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.09675259457649815, |
| "grad_norm": 0.12561045587062836, |
| "learning_rate": 0.0001, |
| "loss": 1.7275, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.0970873786407767, |
| "grad_norm": 0.11756443232297897, |
| "learning_rate": 0.0001, |
| "loss": 1.5905, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.09742216270505524, |
| "grad_norm": 0.11787443608045578, |
| "learning_rate": 0.0001, |
| "loss": 1.5809, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.09775694676933377, |
| "grad_norm": 0.11708027869462967, |
| "learning_rate": 0.0001, |
| "loss": 1.6205, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.09809173083361232, |
| "grad_norm": 0.12011709064245224, |
| "learning_rate": 0.0001, |
| "loss": 1.6327, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.09842651489789087, |
| "grad_norm": 0.12868238985538483, |
| "learning_rate": 0.0001, |
| "loss": 1.7539, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.0987612989621694, |
| "grad_norm": 0.11626073718070984, |
| "learning_rate": 0.0001, |
| "loss": 1.6877, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.09909608302644794, |
| "grad_norm": 0.1279468834400177, |
| "learning_rate": 0.0001, |
| "loss": 1.635, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.09943086709072649, |
| "grad_norm": 0.12956663966178894, |
| "learning_rate": 0.0001, |
| "loss": 1.5736, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.09976565115500502, |
| "grad_norm": 0.11931903660297394, |
| "learning_rate": 0.0001, |
| "loss": 1.6534, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.10010043521928357, |
| "grad_norm": 0.12837816774845123, |
| "learning_rate": 0.0001, |
| "loss": 1.5923, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1004352192835621, |
| "grad_norm": 0.12146858870983124, |
| "learning_rate": 0.0001, |
| "loss": 1.6206, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10077000334784064, |
| "grad_norm": 0.11455334722995758, |
| "learning_rate": 0.0001, |
| "loss": 1.5292, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.10110478741211919, |
| "grad_norm": 0.12035822868347168, |
| "learning_rate": 0.0001, |
| "loss": 1.576, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.10143957147639772, |
| "grad_norm": 0.12373282760381699, |
| "learning_rate": 0.0001, |
| "loss": 1.6688, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.10177435554067626, |
| "grad_norm": 0.13985779881477356, |
| "learning_rate": 0.0001, |
| "loss": 1.667, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.10210913960495481, |
| "grad_norm": 0.11246056109666824, |
| "learning_rate": 0.0001, |
| "loss": 1.6014, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.10244392366923334, |
| "grad_norm": 0.13154080510139465, |
| "learning_rate": 0.0001, |
| "loss": 1.5909, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.10277870773351189, |
| "grad_norm": 0.13235047459602356, |
| "learning_rate": 0.0001, |
| "loss": 1.6888, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.10311349179779042, |
| "grad_norm": 0.13294562697410583, |
| "learning_rate": 0.0001, |
| "loss": 1.6534, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.10344827586206896, |
| "grad_norm": 0.1274106800556183, |
| "learning_rate": 0.0001, |
| "loss": 1.7178, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.10378305992634751, |
| "grad_norm": 0.11676975339651108, |
| "learning_rate": 0.0001, |
| "loss": 1.5587, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.10411784399062604, |
| "grad_norm": 0.1180170550942421, |
| "learning_rate": 0.0001, |
| "loss": 1.5579, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.10445262805490459, |
| "grad_norm": 0.1267906278371811, |
| "learning_rate": 0.0001, |
| "loss": 1.5994, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.10478741211918313, |
| "grad_norm": 0.12398704141378403, |
| "learning_rate": 0.0001, |
| "loss": 1.5459, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.10512219618346166, |
| "grad_norm": 0.12039758265018463, |
| "learning_rate": 0.0001, |
| "loss": 1.5995, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.10545698024774021, |
| "grad_norm": 0.12191271781921387, |
| "learning_rate": 0.0001, |
| "loss": 1.5639, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.10579176431201875, |
| "grad_norm": 0.1351427584886551, |
| "learning_rate": 0.0001, |
| "loss": 1.6553, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.10612654837629729, |
| "grad_norm": 0.13542529940605164, |
| "learning_rate": 0.0001, |
| "loss": 1.5455, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.10646133244057583, |
| "grad_norm": 0.13739462196826935, |
| "learning_rate": 0.0001, |
| "loss": 1.6414, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.10679611650485436, |
| "grad_norm": 0.11810696870088577, |
| "learning_rate": 0.0001, |
| "loss": 1.7078, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.10713090056913291, |
| "grad_norm": 0.13632580637931824, |
| "learning_rate": 0.0001, |
| "loss": 1.6044, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.10746568463341145, |
| "grad_norm": 0.12454043328762054, |
| "learning_rate": 0.0001, |
| "loss": 1.6654, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.10780046869768999, |
| "grad_norm": 0.11818061023950577, |
| "learning_rate": 0.0001, |
| "loss": 1.5693, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.10813525276196853, |
| "grad_norm": 0.12229089438915253, |
| "learning_rate": 0.0001, |
| "loss": 1.6248, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.10847003682624708, |
| "grad_norm": 0.11546499282121658, |
| "learning_rate": 0.0001, |
| "loss": 1.5091, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.10880482089052561, |
| "grad_norm": 0.12005545943975449, |
| "learning_rate": 0.0001, |
| "loss": 1.5801, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.10913960495480415, |
| "grad_norm": 0.12114623188972473, |
| "learning_rate": 0.0001, |
| "loss": 1.6552, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.10947438901908269, |
| "grad_norm": 0.11608844995498657, |
| "learning_rate": 0.0001, |
| "loss": 1.5183, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.10980917308336123, |
| "grad_norm": 0.11119306832551956, |
| "learning_rate": 0.0001, |
| "loss": 1.5515, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.11014395714763978, |
| "grad_norm": 0.12586964666843414, |
| "learning_rate": 0.0001, |
| "loss": 1.6353, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.11047874121191831, |
| "grad_norm": 0.127826526761055, |
| "learning_rate": 0.0001, |
| "loss": 1.7205, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11081352527619685, |
| "grad_norm": 0.11828092485666275, |
| "learning_rate": 0.0001, |
| "loss": 1.6711, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.1111483093404754, |
| "grad_norm": 0.13583530485630035, |
| "learning_rate": 0.0001, |
| "loss": 1.6455, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.11148309340475393, |
| "grad_norm": 0.11893647909164429, |
| "learning_rate": 0.0001, |
| "loss": 1.5707, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.11181787746903248, |
| "grad_norm": 0.13151027262210846, |
| "learning_rate": 0.0001, |
| "loss": 1.6576, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.11215266153331102, |
| "grad_norm": 0.11656352877616882, |
| "learning_rate": 0.0001, |
| "loss": 1.6456, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.11248744559758955, |
| "grad_norm": 0.1267959475517273, |
| "learning_rate": 0.0001, |
| "loss": 1.5069, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.1128222296618681, |
| "grad_norm": 0.12403184920549393, |
| "learning_rate": 0.0001, |
| "loss": 1.5273, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.11315701372614663, |
| "grad_norm": 0.12692154943943024, |
| "learning_rate": 0.0001, |
| "loss": 1.6647, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.11349179779042518, |
| "grad_norm": 0.11919606477022171, |
| "learning_rate": 0.0001, |
| "loss": 1.6833, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.11382658185470372, |
| "grad_norm": 0.11304503679275513, |
| "learning_rate": 0.0001, |
| "loss": 1.5757, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.11416136591898225, |
| "grad_norm": 0.11996794492006302, |
| "learning_rate": 0.0001, |
| "loss": 1.6102, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.1144961499832608, |
| "grad_norm": 0.12606146931648254, |
| "learning_rate": 0.0001, |
| "loss": 1.59, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.11483093404753934, |
| "grad_norm": 0.12146681547164917, |
| "learning_rate": 0.0001, |
| "loss": 1.4989, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.11516571811181787, |
| "grad_norm": 0.13275377452373505, |
| "learning_rate": 0.0001, |
| "loss": 1.6152, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.11550050217609642, |
| "grad_norm": 0.12684765458106995, |
| "learning_rate": 0.0001, |
| "loss": 1.542, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.11583528624037495, |
| "grad_norm": 0.1186991035938263, |
| "learning_rate": 0.0001, |
| "loss": 1.573, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.1161700703046535, |
| "grad_norm": 0.12221034616231918, |
| "learning_rate": 0.0001, |
| "loss": 1.6418, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.11650485436893204, |
| "grad_norm": 0.11776617169380188, |
| "learning_rate": 0.0001, |
| "loss": 1.5821, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.11683963843321057, |
| "grad_norm": 0.13464072346687317, |
| "learning_rate": 0.0001, |
| "loss": 1.6188, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.11717442249748912, |
| "grad_norm": 0.13101482391357422, |
| "learning_rate": 0.0001, |
| "loss": 1.5194, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.11750920656176767, |
| "grad_norm": 0.11970439553260803, |
| "learning_rate": 0.0001, |
| "loss": 1.5891, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.1178439906260462, |
| "grad_norm": 0.11731956154108047, |
| "learning_rate": 0.0001, |
| "loss": 1.6441, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.11817877469032474, |
| "grad_norm": 0.1163954809308052, |
| "learning_rate": 0.0001, |
| "loss": 1.5739, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.11851355875460329, |
| "grad_norm": 0.13119016587734222, |
| "learning_rate": 0.0001, |
| "loss": 1.6667, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.11884834281888182, |
| "grad_norm": 0.11406403034925461, |
| "learning_rate": 0.0001, |
| "loss": 1.5391, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.11918312688316036, |
| "grad_norm": 0.12543243169784546, |
| "learning_rate": 0.0001, |
| "loss": 1.6413, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.1195179109474389, |
| "grad_norm": 0.11639681458473206, |
| "learning_rate": 0.0001, |
| "loss": 1.5946, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.11985269501171744, |
| "grad_norm": 0.11582693457603455, |
| "learning_rate": 0.0001, |
| "loss": 1.5797, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.12018747907599599, |
| "grad_norm": 0.12131619453430176, |
| "learning_rate": 0.0001, |
| "loss": 1.5762, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.12052226314027452, |
| "grad_norm": 0.1220826804637909, |
| "learning_rate": 0.0001, |
| "loss": 1.4938, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.12085704720455306, |
| "grad_norm": 0.12737631797790527, |
| "learning_rate": 0.0001, |
| "loss": 1.5622, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.12119183126883161, |
| "grad_norm": 0.12794937193393707, |
| "learning_rate": 0.0001, |
| "loss": 1.5852, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.12152661533311014, |
| "grad_norm": 0.11786255985498428, |
| "learning_rate": 0.0001, |
| "loss": 1.6532, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.12186139939738869, |
| "grad_norm": 0.12443582713603973, |
| "learning_rate": 0.0001, |
| "loss": 1.5664, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.12219618346166722, |
| "grad_norm": 0.124130979180336, |
| "learning_rate": 0.0001, |
| "loss": 1.5809, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.12253096752594576, |
| "grad_norm": 0.11969106644392014, |
| "learning_rate": 0.0001, |
| "loss": 1.5073, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.12286575159022431, |
| "grad_norm": 0.12146104872226715, |
| "learning_rate": 0.0001, |
| "loss": 1.6322, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.12320053565450284, |
| "grad_norm": 0.11919710785150528, |
| "learning_rate": 0.0001, |
| "loss": 1.6405, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.12353531971878139, |
| "grad_norm": 0.12359990924596786, |
| "learning_rate": 0.0001, |
| "loss": 1.6564, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.12387010378305993, |
| "grad_norm": 0.12216739356517792, |
| "learning_rate": 0.0001, |
| "loss": 1.658, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.12420488784733846, |
| "grad_norm": 0.12388269603252411, |
| "learning_rate": 0.0001, |
| "loss": 1.6542, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.12453967191161701, |
| "grad_norm": 0.12631452083587646, |
| "learning_rate": 0.0001, |
| "loss": 1.5741, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.12487445597589555, |
| "grad_norm": 0.11718172580003738, |
| "learning_rate": 0.0001, |
| "loss": 1.5247, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.12520924004017409, |
| "grad_norm": 0.11787404865026474, |
| "learning_rate": 0.0001, |
| "loss": 1.604, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.12554402410445262, |
| "grad_norm": 0.1190713569521904, |
| "learning_rate": 0.0001, |
| "loss": 1.5771, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.12587880816873118, |
| "grad_norm": 0.11780121177434921, |
| "learning_rate": 0.0001, |
| "loss": 1.6445, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.1262135922330097, |
| "grad_norm": 0.11370184272527695, |
| "learning_rate": 0.0001, |
| "loss": 1.4544, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.12654837629728824, |
| "grad_norm": 0.12931419909000397, |
| "learning_rate": 0.0001, |
| "loss": 1.5261, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.1268831603615668, |
| "grad_norm": 0.11074584722518921, |
| "learning_rate": 0.0001, |
| "loss": 1.5329, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.12721794442584533, |
| "grad_norm": 0.1251228302717209, |
| "learning_rate": 0.0001, |
| "loss": 1.6181, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.12755272849012386, |
| "grad_norm": 0.11304245889186859, |
| "learning_rate": 0.0001, |
| "loss": 1.5198, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.12788751255440242, |
| "grad_norm": 0.11219135671854019, |
| "learning_rate": 0.0001, |
| "loss": 1.494, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.12822229661868095, |
| "grad_norm": 0.13162165880203247, |
| "learning_rate": 0.0001, |
| "loss": 1.6073, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.12855708068295948, |
| "grad_norm": 0.11944107711315155, |
| "learning_rate": 0.0001, |
| "loss": 1.6021, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.12889186474723804, |
| "grad_norm": 0.11878252029418945, |
| "learning_rate": 0.0001, |
| "loss": 1.6051, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.12922664881151658, |
| "grad_norm": 0.1224270910024643, |
| "learning_rate": 0.0001, |
| "loss": 1.596, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.1295614328757951, |
| "grad_norm": 0.12815283238887787, |
| "learning_rate": 0.0001, |
| "loss": 1.6652, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.12989621694007364, |
| "grad_norm": 0.11265059560537338, |
| "learning_rate": 0.0001, |
| "loss": 1.5478, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.1302310010043522, |
| "grad_norm": 0.12850640714168549, |
| "learning_rate": 0.0001, |
| "loss": 1.6242, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.13056578506863073, |
| "grad_norm": 0.11487656831741333, |
| "learning_rate": 0.0001, |
| "loss": 1.6089, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.13090056913290926, |
| "grad_norm": 0.1160978302359581, |
| "learning_rate": 0.0001, |
| "loss": 1.6248, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.13123535319718782, |
| "grad_norm": 0.12001185864210129, |
| "learning_rate": 0.0001, |
| "loss": 1.5911, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.13157013726146635, |
| "grad_norm": 0.11623065918684006, |
| "learning_rate": 0.0001, |
| "loss": 1.6194, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.13190492132574488, |
| "grad_norm": 0.11913128942251205, |
| "learning_rate": 0.0001, |
| "loss": 1.6233, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.13223970539002344, |
| "grad_norm": 0.11658355593681335, |
| "learning_rate": 0.0001, |
| "loss": 1.629, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.13257448945430197, |
| "grad_norm": 0.11986858397722244, |
| "learning_rate": 0.0001, |
| "loss": 1.7414, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.1329092735185805, |
| "grad_norm": 0.12659533321857452, |
| "learning_rate": 0.0001, |
| "loss": 1.6037, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.13324405758285907, |
| "grad_norm": 0.11471698433160782, |
| "learning_rate": 0.0001, |
| "loss": 1.5939, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.1335788416471376, |
| "grad_norm": 0.12152232974767685, |
| "learning_rate": 0.0001, |
| "loss": 1.5663, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.13391362571141613, |
| "grad_norm": 0.12228668481111526, |
| "learning_rate": 0.0001, |
| "loss": 1.6717, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1342484097756947, |
| "grad_norm": 0.11998744308948517, |
| "learning_rate": 0.0001, |
| "loss": 1.6532, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.13458319383997322, |
| "grad_norm": 0.12556074559688568, |
| "learning_rate": 0.0001, |
| "loss": 1.6477, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.13491797790425175, |
| "grad_norm": 0.12216352671384811, |
| "learning_rate": 0.0001, |
| "loss": 1.6084, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.1352527619685303, |
| "grad_norm": 0.1290225237607956, |
| "learning_rate": 0.0001, |
| "loss": 1.6369, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.13558754603280884, |
| "grad_norm": 0.11453018337488174, |
| "learning_rate": 0.0001, |
| "loss": 1.5886, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.13592233009708737, |
| "grad_norm": 0.12192509323358536, |
| "learning_rate": 0.0001, |
| "loss": 1.5788, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.1362571141613659, |
| "grad_norm": 0.11374159157276154, |
| "learning_rate": 0.0001, |
| "loss": 1.5429, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.13659189822564446, |
| "grad_norm": 0.11875942349433899, |
| "learning_rate": 0.0001, |
| "loss": 1.6524, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.136926682289923, |
| "grad_norm": 0.12176533043384552, |
| "learning_rate": 0.0001, |
| "loss": 1.6572, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.13726146635420153, |
| "grad_norm": 0.12200423330068588, |
| "learning_rate": 0.0001, |
| "loss": 1.7139, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1375962504184801, |
| "grad_norm": 0.11800340563058853, |
| "learning_rate": 0.0001, |
| "loss": 1.6276, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.13793103448275862, |
| "grad_norm": 0.12321179360151291, |
| "learning_rate": 0.0001, |
| "loss": 1.6849, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.13826581854703715, |
| "grad_norm": 0.12165375053882599, |
| "learning_rate": 0.0001, |
| "loss": 1.5823, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.1386006026113157, |
| "grad_norm": 0.12587733566761017, |
| "learning_rate": 0.0001, |
| "loss": 1.5712, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.13893538667559424, |
| "grad_norm": 0.11877655982971191, |
| "learning_rate": 0.0001, |
| "loss": 1.606, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.13927017073987277, |
| "grad_norm": 0.11970411241054535, |
| "learning_rate": 0.0001, |
| "loss": 1.4995, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.13960495480415133, |
| "grad_norm": 0.14770293235778809, |
| "learning_rate": 0.0001, |
| "loss": 1.7334, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.13993973886842986, |
| "grad_norm": 0.11904104053974152, |
| "learning_rate": 0.0001, |
| "loss": 1.6258, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.1402745229327084, |
| "grad_norm": 0.13043157756328583, |
| "learning_rate": 0.0001, |
| "loss": 1.5564, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.14060930699698695, |
| "grad_norm": 0.1354888528585434, |
| "learning_rate": 0.0001, |
| "loss": 1.6391, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1409440910612655, |
| "grad_norm": 0.11834760010242462, |
| "learning_rate": 0.0001, |
| "loss": 1.5345, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.14127887512554402, |
| "grad_norm": 0.13029152154922485, |
| "learning_rate": 0.0001, |
| "loss": 1.5007, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.14161365918982258, |
| "grad_norm": 0.1352154165506363, |
| "learning_rate": 0.0001, |
| "loss": 1.5925, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.1419484432541011, |
| "grad_norm": 0.13768818974494934, |
| "learning_rate": 0.0001, |
| "loss": 1.6513, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.14228322731837964, |
| "grad_norm": 0.1345231682062149, |
| "learning_rate": 0.0001, |
| "loss": 1.6524, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.14261801138265817, |
| "grad_norm": 0.11808541417121887, |
| "learning_rate": 0.0001, |
| "loss": 1.6038, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.14295279544693673, |
| "grad_norm": 0.1403636336326599, |
| "learning_rate": 0.0001, |
| "loss": 1.5559, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.14328757951121526, |
| "grad_norm": 0.13042065501213074, |
| "learning_rate": 0.0001, |
| "loss": 1.6516, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1436223635754938, |
| "grad_norm": 0.12809261679649353, |
| "learning_rate": 0.0001, |
| "loss": 1.5913, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.14395714763977235, |
| "grad_norm": 0.13735899329185486, |
| "learning_rate": 0.0001, |
| "loss": 1.5814, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.14429193170405089, |
| "grad_norm": 0.12458304315805435, |
| "learning_rate": 0.0001, |
| "loss": 1.6909, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.14462671576832942, |
| "grad_norm": 0.11777736246585846, |
| "learning_rate": 0.0001, |
| "loss": 1.599, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.14496149983260798, |
| "grad_norm": 0.11958497762680054, |
| "learning_rate": 0.0001, |
| "loss": 1.6224, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.1452962838968865, |
| "grad_norm": 0.11626480519771576, |
| "learning_rate": 0.0001, |
| "loss": 1.6192, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.14563106796116504, |
| "grad_norm": 0.12103210389614105, |
| "learning_rate": 0.0001, |
| "loss": 1.5581, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.1459658520254436, |
| "grad_norm": 0.1175006702542305, |
| "learning_rate": 0.0001, |
| "loss": 1.6147, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.14630063608972213, |
| "grad_norm": 0.1194823831319809, |
| "learning_rate": 0.0001, |
| "loss": 1.4559, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.14663542015400066, |
| "grad_norm": 0.12060422450304031, |
| "learning_rate": 0.0001, |
| "loss": 1.706, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.14697020421827922, |
| "grad_norm": 0.12133188545703888, |
| "learning_rate": 0.0001, |
| "loss": 1.6583, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.14730498828255775, |
| "grad_norm": 0.11069684475660324, |
| "learning_rate": 0.0001, |
| "loss": 1.5626, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.14763977234683628, |
| "grad_norm": 0.11735668778419495, |
| "learning_rate": 0.0001, |
| "loss": 1.5014, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.14797455641111484, |
| "grad_norm": 0.11778223514556885, |
| "learning_rate": 0.0001, |
| "loss": 1.6483, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.14830934047539338, |
| "grad_norm": 0.11628784239292145, |
| "learning_rate": 0.0001, |
| "loss": 1.5629, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.1486441245396719, |
| "grad_norm": 0.12314952164888382, |
| "learning_rate": 0.0001, |
| "loss": 1.6362, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.14897890860395044, |
| "grad_norm": 0.11853016167879105, |
| "learning_rate": 0.0001, |
| "loss": 1.6355, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.149313692668229, |
| "grad_norm": 0.1322093904018402, |
| "learning_rate": 0.0001, |
| "loss": 1.7655, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.14964847673250753, |
| "grad_norm": 0.11611328274011612, |
| "learning_rate": 0.0001, |
| "loss": 1.5878, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.14998326079678606, |
| "grad_norm": 0.11989305913448334, |
| "learning_rate": 0.0001, |
| "loss": 1.5576, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.15031804486106462, |
| "grad_norm": 0.11867792159318924, |
| "learning_rate": 0.0001, |
| "loss": 1.5453, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.15065282892534315, |
| "grad_norm": 0.11955395340919495, |
| "learning_rate": 0.0001, |
| "loss": 1.6089, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.15098761298962168, |
| "grad_norm": 0.13159644603729248, |
| "learning_rate": 0.0001, |
| "loss": 1.6053, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.15132239705390024, |
| "grad_norm": 0.12264451384544373, |
| "learning_rate": 0.0001, |
| "loss": 1.6277, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.15165718111817877, |
| "grad_norm": 0.1267840564250946, |
| "learning_rate": 0.0001, |
| "loss": 1.6047, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.1519919651824573, |
| "grad_norm": 0.1316317319869995, |
| "learning_rate": 0.0001, |
| "loss": 1.5497, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.15232674924673587, |
| "grad_norm": 0.12278051674365997, |
| "learning_rate": 0.0001, |
| "loss": 1.665, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.1526615333110144, |
| "grad_norm": 0.13153740763664246, |
| "learning_rate": 0.0001, |
| "loss": 1.6262, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.15299631737529293, |
| "grad_norm": 0.12118583172559738, |
| "learning_rate": 0.0001, |
| "loss": 1.5897, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.1533311014395715, |
| "grad_norm": 0.12203945219516754, |
| "learning_rate": 0.0001, |
| "loss": 1.5709, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.15366588550385002, |
| "grad_norm": 0.13483074307441711, |
| "learning_rate": 0.0001, |
| "loss": 1.662, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.15400066956812855, |
| "grad_norm": 0.12122450023889542, |
| "learning_rate": 0.0001, |
| "loss": 1.6289, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.1543354536324071, |
| "grad_norm": 0.1384558528661728, |
| "learning_rate": 0.0001, |
| "loss": 1.6274, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.15467023769668564, |
| "grad_norm": 0.1436455249786377, |
| "learning_rate": 0.0001, |
| "loss": 1.6007, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.15500502176096417, |
| "grad_norm": 0.12359965592622757, |
| "learning_rate": 0.0001, |
| "loss": 1.6757, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.1553398058252427, |
| "grad_norm": 0.13497023284435272, |
| "learning_rate": 0.0001, |
| "loss": 1.6328, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.15567458988952126, |
| "grad_norm": 0.12588655948638916, |
| "learning_rate": 0.0001, |
| "loss": 1.6066, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.1560093739537998, |
| "grad_norm": 0.11950384825468063, |
| "learning_rate": 0.0001, |
| "loss": 1.6388, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.15634415801807833, |
| "grad_norm": 0.13280175626277924, |
| "learning_rate": 0.0001, |
| "loss": 1.6097, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.1566789420823569, |
| "grad_norm": 0.11717383563518524, |
| "learning_rate": 0.0001, |
| "loss": 1.6519, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.15701372614663542, |
| "grad_norm": 0.12387187778949738, |
| "learning_rate": 0.0001, |
| "loss": 1.5661, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.15734851021091395, |
| "grad_norm": 0.12535057961940765, |
| "learning_rate": 0.0001, |
| "loss": 1.601, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.1576832942751925, |
| "grad_norm": 0.12057804316282272, |
| "learning_rate": 0.0001, |
| "loss": 1.6463, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.15801807833947104, |
| "grad_norm": 0.1360681802034378, |
| "learning_rate": 0.0001, |
| "loss": 1.7093, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.15835286240374957, |
| "grad_norm": 0.11986411362886429, |
| "learning_rate": 0.0001, |
| "loss": 1.5864, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.15868764646802813, |
| "grad_norm": 0.11335694789886475, |
| "learning_rate": 0.0001, |
| "loss": 1.5495, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.15902243053230666, |
| "grad_norm": 0.11684451997280121, |
| "learning_rate": 0.0001, |
| "loss": 1.5295, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.1593572145965852, |
| "grad_norm": 0.12882184982299805, |
| "learning_rate": 0.0001, |
| "loss": 1.6903, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.15969199866086375, |
| "grad_norm": 0.12175029516220093, |
| "learning_rate": 0.0001, |
| "loss": 1.6421, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.16002678272514229, |
| "grad_norm": 0.1330244094133377, |
| "learning_rate": 0.0001, |
| "loss": 1.5691, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.16036156678942082, |
| "grad_norm": 0.12204015254974365, |
| "learning_rate": 0.0001, |
| "loss": 1.6557, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.16069635085369938, |
| "grad_norm": 0.1265457272529602, |
| "learning_rate": 0.0001, |
| "loss": 1.6319, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.1610311349179779, |
| "grad_norm": 0.13419146835803986, |
| "learning_rate": 0.0001, |
| "loss": 1.5694, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.16136591898225644, |
| "grad_norm": 0.12663477659225464, |
| "learning_rate": 0.0001, |
| "loss": 1.49, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.16170070304653497, |
| "grad_norm": 0.1386338621377945, |
| "learning_rate": 0.0001, |
| "loss": 1.5774, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.16203548711081353, |
| "grad_norm": 0.1266423612833023, |
| "learning_rate": 0.0001, |
| "loss": 1.568, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.16237027117509206, |
| "grad_norm": 0.11795584112405777, |
| "learning_rate": 0.0001, |
| "loss": 1.5952, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.1627050552393706, |
| "grad_norm": 0.13247069716453552, |
| "learning_rate": 0.0001, |
| "loss": 1.5486, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.16303983930364915, |
| "grad_norm": 0.12367638945579529, |
| "learning_rate": 0.0001, |
| "loss": 1.6618, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.16337462336792768, |
| "grad_norm": 0.11988285183906555, |
| "learning_rate": 0.0001, |
| "loss": 1.6338, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.16370940743220622, |
| "grad_norm": 0.12422308325767517, |
| "learning_rate": 0.0001, |
| "loss": 1.5753, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.16404419149648478, |
| "grad_norm": 0.12060552090406418, |
| "learning_rate": 0.0001, |
| "loss": 1.6158, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1643789755607633, |
| "grad_norm": 0.1219470277428627, |
| "learning_rate": 0.0001, |
| "loss": 1.5057, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.16471375962504184, |
| "grad_norm": 0.12771841883659363, |
| "learning_rate": 0.0001, |
| "loss": 1.6627, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.1650485436893204, |
| "grad_norm": 0.11713176220655441, |
| "learning_rate": 0.0001, |
| "loss": 1.5697, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.16538332775359893, |
| "grad_norm": 0.1419348567724228, |
| "learning_rate": 0.0001, |
| "loss": 1.7253, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.16571811181787746, |
| "grad_norm": 0.1297536939382553, |
| "learning_rate": 0.0001, |
| "loss": 1.666, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.16605289588215602, |
| "grad_norm": 0.12997077405452728, |
| "learning_rate": 0.0001, |
| "loss": 1.5825, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.16638767994643455, |
| "grad_norm": 0.14354097843170166, |
| "learning_rate": 0.0001, |
| "loss": 1.628, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.16672246401071308, |
| "grad_norm": 0.12498887628316879, |
| "learning_rate": 0.0001, |
| "loss": 1.7003, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.16705724807499164, |
| "grad_norm": 0.13219912350177765, |
| "learning_rate": 0.0001, |
| "loss": 1.6218, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.16739203213927017, |
| "grad_norm": 0.13144424557685852, |
| "learning_rate": 0.0001, |
| "loss": 1.5874, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1677268162035487, |
| "grad_norm": 0.12147901952266693, |
| "learning_rate": 0.0001, |
| "loss": 1.6308, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.16806160026782724, |
| "grad_norm": 0.13109005987644196, |
| "learning_rate": 0.0001, |
| "loss": 1.7168, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.1683963843321058, |
| "grad_norm": 0.1306311935186386, |
| "learning_rate": 0.0001, |
| "loss": 1.5859, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.16873116839638433, |
| "grad_norm": 0.115351103246212, |
| "learning_rate": 0.0001, |
| "loss": 1.6124, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.16906595246066286, |
| "grad_norm": 0.12713004648685455, |
| "learning_rate": 0.0001, |
| "loss": 1.5558, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.16940073652494142, |
| "grad_norm": 0.1304563283920288, |
| "learning_rate": 0.0001, |
| "loss": 1.676, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.16973552058921995, |
| "grad_norm": 0.12284432351589203, |
| "learning_rate": 0.0001, |
| "loss": 1.5585, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.17007030465349848, |
| "grad_norm": 0.12343181669712067, |
| "learning_rate": 0.0001, |
| "loss": 1.5869, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.17040508871777704, |
| "grad_norm": 0.11459839344024658, |
| "learning_rate": 0.0001, |
| "loss": 1.6051, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.17073987278205557, |
| "grad_norm": 0.11883780360221863, |
| "learning_rate": 0.0001, |
| "loss": 1.5064, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.1710746568463341, |
| "grad_norm": 0.12307373434305191, |
| "learning_rate": 0.0001, |
| "loss": 1.5257, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.17140944091061266, |
| "grad_norm": 0.11666516959667206, |
| "learning_rate": 0.0001, |
| "loss": 1.5842, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.1717442249748912, |
| "grad_norm": 0.11493846029043198, |
| "learning_rate": 0.0001, |
| "loss": 1.6215, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.17207900903916973, |
| "grad_norm": 0.1198093444108963, |
| "learning_rate": 0.0001, |
| "loss": 1.5875, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.1724137931034483, |
| "grad_norm": 0.11997364461421967, |
| "learning_rate": 0.0001, |
| "loss": 1.5819, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.17274857716772682, |
| "grad_norm": 0.12003917992115021, |
| "learning_rate": 0.0001, |
| "loss": 1.7019, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.17308336123200535, |
| "grad_norm": 0.11761089414358139, |
| "learning_rate": 0.0001, |
| "loss": 1.5742, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.1734181452962839, |
| "grad_norm": 0.12004124373197556, |
| "learning_rate": 0.0001, |
| "loss": 1.5947, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.17375292936056244, |
| "grad_norm": 0.12139872461557388, |
| "learning_rate": 0.0001, |
| "loss": 1.4861, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.17408771342484097, |
| "grad_norm": 0.12214326858520508, |
| "learning_rate": 0.0001, |
| "loss": 1.6953, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.1744224974891195, |
| "grad_norm": 0.12239626795053482, |
| "learning_rate": 0.0001, |
| "loss": 1.5529, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.17475728155339806, |
| "grad_norm": 0.11888886988162994, |
| "learning_rate": 0.0001, |
| "loss": 1.5099, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.1750920656176766, |
| "grad_norm": 0.11585521697998047, |
| "learning_rate": 0.0001, |
| "loss": 1.5392, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.17542684968195513, |
| "grad_norm": 0.1300823837518692, |
| "learning_rate": 0.0001, |
| "loss": 1.6598, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.1757616337462337, |
| "grad_norm": 0.12741157412528992, |
| "learning_rate": 0.0001, |
| "loss": 1.5798, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.17609641781051222, |
| "grad_norm": 0.11614137142896652, |
| "learning_rate": 0.0001, |
| "loss": 1.5343, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.17643120187479075, |
| "grad_norm": 0.12221526354551315, |
| "learning_rate": 0.0001, |
| "loss": 1.552, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.1767659859390693, |
| "grad_norm": 0.13221661746501923, |
| "learning_rate": 0.0001, |
| "loss": 1.6213, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.17710077000334784, |
| "grad_norm": 0.12069322913885117, |
| "learning_rate": 0.0001, |
| "loss": 1.6148, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.17743555406762637, |
| "grad_norm": 0.11254309117794037, |
| "learning_rate": 0.0001, |
| "loss": 1.5917, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.17777033813190493, |
| "grad_norm": 0.11715224385261536, |
| "learning_rate": 0.0001, |
| "loss": 1.6343, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.17810512219618346, |
| "grad_norm": 0.1183256059885025, |
| "learning_rate": 0.0001, |
| "loss": 1.4889, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.178439906260462, |
| "grad_norm": 0.12182603031396866, |
| "learning_rate": 0.0001, |
| "loss": 1.5487, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.17877469032474055, |
| "grad_norm": 0.1232253909111023, |
| "learning_rate": 0.0001, |
| "loss": 1.6754, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.17910947438901909, |
| "grad_norm": 0.11796277016401291, |
| "learning_rate": 0.0001, |
| "loss": 1.6396, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.17944425845329762, |
| "grad_norm": 0.13181637227535248, |
| "learning_rate": 0.0001, |
| "loss": 1.6505, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.17977904251757618, |
| "grad_norm": 0.11481553316116333, |
| "learning_rate": 0.0001, |
| "loss": 1.492, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.1801138265818547, |
| "grad_norm": 0.12842705845832825, |
| "learning_rate": 0.0001, |
| "loss": 1.734, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.18044861064613324, |
| "grad_norm": 0.1235375851392746, |
| "learning_rate": 0.0001, |
| "loss": 1.6496, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.18078339471041177, |
| "grad_norm": 0.12111697345972061, |
| "learning_rate": 0.0001, |
| "loss": 1.5044, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.18111817877469033, |
| "grad_norm": 0.12484171241521835, |
| "learning_rate": 0.0001, |
| "loss": 1.6643, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.18145296283896886, |
| "grad_norm": 0.12675760686397552, |
| "learning_rate": 0.0001, |
| "loss": 1.6188, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.1817877469032474, |
| "grad_norm": 0.12203079462051392, |
| "learning_rate": 0.0001, |
| "loss": 1.507, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.18212253096752595, |
| "grad_norm": 0.12013613432645798, |
| "learning_rate": 0.0001, |
| "loss": 1.6247, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.18245731503180448, |
| "grad_norm": 0.12438444793224335, |
| "learning_rate": 0.0001, |
| "loss": 1.5849, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.18279209909608302, |
| "grad_norm": 0.13607415556907654, |
| "learning_rate": 0.0001, |
| "loss": 1.6562, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.18312688316036158, |
| "grad_norm": 0.1240532174706459, |
| "learning_rate": 0.0001, |
| "loss": 1.5205, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.1834616672246401, |
| "grad_norm": 0.1510075032711029, |
| "learning_rate": 0.0001, |
| "loss": 1.6608, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.18379645128891864, |
| "grad_norm": 0.11965179443359375, |
| "learning_rate": 0.0001, |
| "loss": 1.6391, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.1841312353531972, |
| "grad_norm": 0.14874660968780518, |
| "learning_rate": 0.0001, |
| "loss": 1.6156, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.18446601941747573, |
| "grad_norm": 0.1273370385169983, |
| "learning_rate": 0.0001, |
| "loss": 1.5117, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.18480080348175426, |
| "grad_norm": 0.1213572546839714, |
| "learning_rate": 0.0001, |
| "loss": 1.5124, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.18513558754603282, |
| "grad_norm": 0.1602640151977539, |
| "learning_rate": 0.0001, |
| "loss": 1.6318, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.18547037161031135, |
| "grad_norm": 0.12859167158603668, |
| "learning_rate": 0.0001, |
| "loss": 1.6562, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.18580515567458988, |
| "grad_norm": 0.13728216290473938, |
| "learning_rate": 0.0001, |
| "loss": 1.5873, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.18613993973886844, |
| "grad_norm": 0.12880103290081024, |
| "learning_rate": 0.0001, |
| "loss": 1.5121, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.18647472380314697, |
| "grad_norm": 0.1293378323316574, |
| "learning_rate": 0.0001, |
| "loss": 1.6275, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.1868095078674255, |
| "grad_norm": 0.1387391984462738, |
| "learning_rate": 0.0001, |
| "loss": 1.6486, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.18714429193170404, |
| "grad_norm": 0.14882785081863403, |
| "learning_rate": 0.0001, |
| "loss": 1.6422, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.1874790759959826, |
| "grad_norm": 0.11521956324577332, |
| "learning_rate": 0.0001, |
| "loss": 1.5032, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.18781386006026113, |
| "grad_norm": 0.12418463081121445, |
| "learning_rate": 0.0001, |
| "loss": 1.5422, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.18814864412453966, |
| "grad_norm": 0.13123475015163422, |
| "learning_rate": 0.0001, |
| "loss": 1.6459, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.18848342818881822, |
| "grad_norm": 0.12267505377531052, |
| "learning_rate": 0.0001, |
| "loss": 1.61, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.18881821225309675, |
| "grad_norm": 0.12172992527484894, |
| "learning_rate": 0.0001, |
| "loss": 1.551, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.18915299631737528, |
| "grad_norm": 0.12027712911367416, |
| "learning_rate": 0.0001, |
| "loss": 1.6178, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.18948778038165384, |
| "grad_norm": 0.11598297208547592, |
| "learning_rate": 0.0001, |
| "loss": 1.5959, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.18982256444593237, |
| "grad_norm": 0.11541326344013214, |
| "learning_rate": 0.0001, |
| "loss": 1.5936, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.1901573485102109, |
| "grad_norm": 0.12343809008598328, |
| "learning_rate": 0.0001, |
| "loss": 1.6091, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.19049213257448946, |
| "grad_norm": 0.11451027542352676, |
| "learning_rate": 0.0001, |
| "loss": 1.6203, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.190826916638768, |
| "grad_norm": 0.1260651797056198, |
| "learning_rate": 0.0001, |
| "loss": 1.6105, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.19116170070304653, |
| "grad_norm": 0.1183401346206665, |
| "learning_rate": 0.0001, |
| "loss": 1.583, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.1914964847673251, |
| "grad_norm": 0.11767153441905975, |
| "learning_rate": 0.0001, |
| "loss": 1.5717, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.19183126883160362, |
| "grad_norm": 0.11693871766328812, |
| "learning_rate": 0.0001, |
| "loss": 1.5783, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.19216605289588215, |
| "grad_norm": 0.1267687827348709, |
| "learning_rate": 0.0001, |
| "loss": 1.5803, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.1925008369601607, |
| "grad_norm": 0.11946652829647064, |
| "learning_rate": 0.0001, |
| "loss": 1.5575, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.19283562102443924, |
| "grad_norm": 0.12602412700653076, |
| "learning_rate": 0.0001, |
| "loss": 1.7297, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.19317040508871777, |
| "grad_norm": 0.12529441714286804, |
| "learning_rate": 0.0001, |
| "loss": 1.6877, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.1935051891529963, |
| "grad_norm": 0.12578092515468597, |
| "learning_rate": 0.0001, |
| "loss": 1.5397, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.19383997321727486, |
| "grad_norm": 0.12697197496891022, |
| "learning_rate": 0.0001, |
| "loss": 1.5541, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.1941747572815534, |
| "grad_norm": 0.12927542626857758, |
| "learning_rate": 0.0001, |
| "loss": 1.6155, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.19450954134583193, |
| "grad_norm": 0.1361040472984314, |
| "learning_rate": 0.0001, |
| "loss": 1.5857, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.19484432541011049, |
| "grad_norm": 0.11877462267875671, |
| "learning_rate": 0.0001, |
| "loss": 1.5558, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.19517910947438902, |
| "grad_norm": 0.14642973244190216, |
| "learning_rate": 0.0001, |
| "loss": 1.6476, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.19551389353866755, |
| "grad_norm": 0.13428737223148346, |
| "learning_rate": 0.0001, |
| "loss": 1.5862, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.1958486776029461, |
| "grad_norm": 0.1275390088558197, |
| "learning_rate": 0.0001, |
| "loss": 1.5418, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.19618346166722464, |
| "grad_norm": 0.1398482322692871, |
| "learning_rate": 0.0001, |
| "loss": 1.4985, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.19651824573150317, |
| "grad_norm": 0.12443619966506958, |
| "learning_rate": 0.0001, |
| "loss": 1.5726, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.19685302979578173, |
| "grad_norm": 0.12923243641853333, |
| "learning_rate": 0.0001, |
| "loss": 1.5596, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.19718781386006026, |
| "grad_norm": 0.14045698940753937, |
| "learning_rate": 0.0001, |
| "loss": 1.5475, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.1975225979243388, |
| "grad_norm": 0.12687772512435913, |
| "learning_rate": 0.0001, |
| "loss": 1.7041, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.19785738198861735, |
| "grad_norm": 0.14536388218402863, |
| "learning_rate": 0.0001, |
| "loss": 1.5724, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.19819216605289589, |
| "grad_norm": 0.1331462413072586, |
| "learning_rate": 0.0001, |
| "loss": 1.6991, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.19852695011717442, |
| "grad_norm": 0.13363464176654816, |
| "learning_rate": 0.0001, |
| "loss": 1.6665, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.19886173418145298, |
| "grad_norm": 0.13291539251804352, |
| "learning_rate": 0.0001, |
| "loss": 1.6278, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.1991965182457315, |
| "grad_norm": 0.1261158436536789, |
| "learning_rate": 0.0001, |
| "loss": 1.6129, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.19953130231001004, |
| "grad_norm": 0.12324585020542145, |
| "learning_rate": 0.0001, |
| "loss": 1.6509, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.19986608637428857, |
| "grad_norm": 0.11849376559257507, |
| "learning_rate": 0.0001, |
| "loss": 1.6226, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.20020087043856713, |
| "grad_norm": 0.1167241707444191, |
| "learning_rate": 0.0001, |
| "loss": 1.5539, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.20053565450284566, |
| "grad_norm": 0.11860879510641098, |
| "learning_rate": 0.0001, |
| "loss": 1.5962, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.2008704385671242, |
| "grad_norm": 0.12385833263397217, |
| "learning_rate": 0.0001, |
| "loss": 1.593, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.20120522263140275, |
| "grad_norm": 0.12093829363584518, |
| "learning_rate": 0.0001, |
| "loss": 1.6914, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.20154000669568128, |
| "grad_norm": 0.11839880049228668, |
| "learning_rate": 0.0001, |
| "loss": 1.5645, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.20187479075995982, |
| "grad_norm": 0.11958955973386765, |
| "learning_rate": 0.0001, |
| "loss": 1.6964, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.20220957482423838, |
| "grad_norm": 0.12148015946149826, |
| "learning_rate": 0.0001, |
| "loss": 1.6201, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.2025443588885169, |
| "grad_norm": 0.11879414319992065, |
| "learning_rate": 0.0001, |
| "loss": 1.5696, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.20287914295279544, |
| "grad_norm": 0.11815709620714188, |
| "learning_rate": 0.0001, |
| "loss": 1.5771, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.203213927017074, |
| "grad_norm": 0.12391653656959534, |
| "learning_rate": 0.0001, |
| "loss": 1.4984, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.20354871108135253, |
| "grad_norm": 0.12949740886688232, |
| "learning_rate": 0.0001, |
| "loss": 1.6746, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.20388349514563106, |
| "grad_norm": 0.12630179524421692, |
| "learning_rate": 0.0001, |
| "loss": 1.5984, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.20421827920990962, |
| "grad_norm": 0.13836237788200378, |
| "learning_rate": 0.0001, |
| "loss": 1.6562, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.20455306327418815, |
| "grad_norm": 0.12105460464954376, |
| "learning_rate": 0.0001, |
| "loss": 1.628, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.20488784733846668, |
| "grad_norm": 0.13807529211044312, |
| "learning_rate": 0.0001, |
| "loss": 1.5858, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.20522263140274524, |
| "grad_norm": 0.12660756707191467, |
| "learning_rate": 0.0001, |
| "loss": 1.5819, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.20555741546702377, |
| "grad_norm": 0.11513250321149826, |
| "learning_rate": 0.0001, |
| "loss": 1.5572, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.2058921995313023, |
| "grad_norm": 0.12499019503593445, |
| "learning_rate": 0.0001, |
| "loss": 1.5902, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.20622698359558084, |
| "grad_norm": 0.13060630857944489, |
| "learning_rate": 0.0001, |
| "loss": 1.6933, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.2065617676598594, |
| "grad_norm": 0.11751000583171844, |
| "learning_rate": 0.0001, |
| "loss": 1.6165, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.20689655172413793, |
| "grad_norm": 0.12362553179264069, |
| "learning_rate": 0.0001, |
| "loss": 1.6214, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.20723133578841646, |
| "grad_norm": 0.11933618783950806, |
| "learning_rate": 0.0001, |
| "loss": 1.6041, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.20756611985269502, |
| "grad_norm": 0.12560446560382843, |
| "learning_rate": 0.0001, |
| "loss": 1.689, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.20790090391697355, |
| "grad_norm": 0.12433163821697235, |
| "learning_rate": 0.0001, |
| "loss": 1.6717, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.20823568798125208, |
| "grad_norm": 0.12220048159360886, |
| "learning_rate": 0.0001, |
| "loss": 1.6216, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.20857047204553064, |
| "grad_norm": 0.11404889076948166, |
| "learning_rate": 0.0001, |
| "loss": 1.5362, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.20890525610980917, |
| "grad_norm": 0.11990871280431747, |
| "learning_rate": 0.0001, |
| "loss": 1.5971, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.2092400401740877, |
| "grad_norm": 0.11785005033016205, |
| "learning_rate": 0.0001, |
| "loss": 1.5641, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.20957482423836626, |
| "grad_norm": 0.12312883138656616, |
| "learning_rate": 0.0001, |
| "loss": 1.617, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.2099096083026448, |
| "grad_norm": 0.11449938267469406, |
| "learning_rate": 0.0001, |
| "loss": 1.5396, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.21024439236692333, |
| "grad_norm": 0.1219322681427002, |
| "learning_rate": 0.0001, |
| "loss": 1.5951, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.2105791764312019, |
| "grad_norm": 0.12152589112520218, |
| "learning_rate": 0.0001, |
| "loss": 1.6017, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.21091396049548042, |
| "grad_norm": 0.11546038091182709, |
| "learning_rate": 0.0001, |
| "loss": 1.5969, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.21124874455975895, |
| "grad_norm": 0.1294824779033661, |
| "learning_rate": 0.0001, |
| "loss": 1.5983, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.2115835286240375, |
| "grad_norm": 0.12606552243232727, |
| "learning_rate": 0.0001, |
| "loss": 1.6026, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.21191831268831604, |
| "grad_norm": 0.12761344015598297, |
| "learning_rate": 0.0001, |
| "loss": 1.6561, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.21225309675259457, |
| "grad_norm": 0.11588580161333084, |
| "learning_rate": 0.0001, |
| "loss": 1.5967, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.2125878808168731, |
| "grad_norm": 0.11629272252321243, |
| "learning_rate": 0.0001, |
| "loss": 1.5894, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.21292266488115166, |
| "grad_norm": 0.1237213984131813, |
| "learning_rate": 0.0001, |
| "loss": 1.6113, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.2132574489454302, |
| "grad_norm": 0.12293344736099243, |
| "learning_rate": 0.0001, |
| "loss": 1.5972, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.21359223300970873, |
| "grad_norm": 0.1172887459397316, |
| "learning_rate": 0.0001, |
| "loss": 1.5765, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.21392701707398729, |
| "grad_norm": 0.12403010576963425, |
| "learning_rate": 0.0001, |
| "loss": 1.5639, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.21426180113826582, |
| "grad_norm": 0.12683235108852386, |
| "learning_rate": 0.0001, |
| "loss": 1.5197, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.21459658520254435, |
| "grad_norm": 0.11593903601169586, |
| "learning_rate": 0.0001, |
| "loss": 1.5158, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.2149313692668229, |
| "grad_norm": 0.1251828819513321, |
| "learning_rate": 0.0001, |
| "loss": 1.6396, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.21526615333110144, |
| "grad_norm": 0.12358346581459045, |
| "learning_rate": 0.0001, |
| "loss": 1.6012, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.21560093739537997, |
| "grad_norm": 0.11473721265792847, |
| "learning_rate": 0.0001, |
| "loss": 1.5365, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.21593572145965853, |
| "grad_norm": 0.1184060201048851, |
| "learning_rate": 0.0001, |
| "loss": 1.4507, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.21627050552393706, |
| "grad_norm": 0.12540043890476227, |
| "learning_rate": 0.0001, |
| "loss": 1.5854, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.2166052895882156, |
| "grad_norm": 0.12070447206497192, |
| "learning_rate": 0.0001, |
| "loss": 1.6097, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.21694007365249415, |
| "grad_norm": 0.11351459473371506, |
| "learning_rate": 0.0001, |
| "loss": 1.5937, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.21727485771677268, |
| "grad_norm": 0.1242094486951828, |
| "learning_rate": 0.0001, |
| "loss": 1.5222, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.21760964178105122, |
| "grad_norm": 0.12054958194494247, |
| "learning_rate": 0.0001, |
| "loss": 1.5285, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.21794442584532978, |
| "grad_norm": 0.12539923191070557, |
| "learning_rate": 0.0001, |
| "loss": 1.5001, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.2182792099096083, |
| "grad_norm": 0.12270530313253403, |
| "learning_rate": 0.0001, |
| "loss": 1.6769, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.21861399397388684, |
| "grad_norm": 0.12920905649662018, |
| "learning_rate": 0.0001, |
| "loss": 1.5999, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.21894877803816537, |
| "grad_norm": 0.13267312943935394, |
| "learning_rate": 0.0001, |
| "loss": 1.5382, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.21928356210244393, |
| "grad_norm": 0.11984428763389587, |
| "learning_rate": 0.0001, |
| "loss": 1.631, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.21961834616672246, |
| "grad_norm": 0.1474982053041458, |
| "learning_rate": 0.0001, |
| "loss": 1.6709, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.219953130231001, |
| "grad_norm": 0.13542193174362183, |
| "learning_rate": 0.0001, |
| "loss": 1.6415, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.22028791429527955, |
| "grad_norm": 0.13832658529281616, |
| "learning_rate": 0.0001, |
| "loss": 1.6118, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.22062269835955808, |
| "grad_norm": 0.15140588581562042, |
| "learning_rate": 0.0001, |
| "loss": 1.6314, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.22095748242383662, |
| "grad_norm": 0.12110920995473862, |
| "learning_rate": 0.0001, |
| "loss": 1.5884, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.22129226648811517, |
| "grad_norm": 0.14811581373214722, |
| "learning_rate": 0.0001, |
| "loss": 1.6642, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.2216270505523937, |
| "grad_norm": 0.12733857333660126, |
| "learning_rate": 0.0001, |
| "loss": 1.5512, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.22196183461667224, |
| "grad_norm": 0.13028332591056824, |
| "learning_rate": 0.0001, |
| "loss": 1.5613, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.2222966186809508, |
| "grad_norm": 0.1242808997631073, |
| "learning_rate": 0.0001, |
| "loss": 1.5869, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.22263140274522933, |
| "grad_norm": 0.12380847334861755, |
| "learning_rate": 0.0001, |
| "loss": 1.5926, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.22296618680950786, |
| "grad_norm": 0.12564754486083984, |
| "learning_rate": 0.0001, |
| "loss": 1.5811, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.22330097087378642, |
| "grad_norm": 0.1509399712085724, |
| "learning_rate": 0.0001, |
| "loss": 1.7172, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.22363575493806495, |
| "grad_norm": 0.12397512793540955, |
| "learning_rate": 0.0001, |
| "loss": 1.642, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.22397053900234348, |
| "grad_norm": 0.13826021552085876, |
| "learning_rate": 0.0001, |
| "loss": 1.6395, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.22430532306662204, |
| "grad_norm": 0.1417902112007141, |
| "learning_rate": 0.0001, |
| "loss": 1.6169, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.22464010713090057, |
| "grad_norm": 0.12220132350921631, |
| "learning_rate": 0.0001, |
| "loss": 1.5686, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.2249748911951791, |
| "grad_norm": 0.13563144207000732, |
| "learning_rate": 0.0001, |
| "loss": 1.6556, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.22530967525945764, |
| "grad_norm": 0.13794521987438202, |
| "learning_rate": 0.0001, |
| "loss": 1.5187, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.2256444593237362, |
| "grad_norm": 0.12060145288705826, |
| "learning_rate": 0.0001, |
| "loss": 1.5901, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.22597924338801473, |
| "grad_norm": 0.13909369707107544, |
| "learning_rate": 0.0001, |
| "loss": 1.5101, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.22631402745229326, |
| "grad_norm": 0.13746792078018188, |
| "learning_rate": 0.0001, |
| "loss": 1.6084, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.22664881151657182, |
| "grad_norm": 0.11612525582313538, |
| "learning_rate": 0.0001, |
| "loss": 1.606, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.22698359558085035, |
| "grad_norm": 0.13988125324249268, |
| "learning_rate": 0.0001, |
| "loss": 1.6123, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.22731837964512888, |
| "grad_norm": 0.13023462891578674, |
| "learning_rate": 0.0001, |
| "loss": 1.6202, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.22765316370940744, |
| "grad_norm": 0.11764882504940033, |
| "learning_rate": 0.0001, |
| "loss": 1.5744, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.22798794777368597, |
| "grad_norm": 0.12987253069877625, |
| "learning_rate": 0.0001, |
| "loss": 1.6287, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.2283227318379645, |
| "grad_norm": 0.12687528133392334, |
| "learning_rate": 0.0001, |
| "loss": 1.6177, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.22865751590224306, |
| "grad_norm": 0.117088183760643, |
| "learning_rate": 0.0001, |
| "loss": 1.5704, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.2289922999665216, |
| "grad_norm": 0.13380305469036102, |
| "learning_rate": 0.0001, |
| "loss": 1.5013, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.22932708403080013, |
| "grad_norm": 0.13155803084373474, |
| "learning_rate": 0.0001, |
| "loss": 1.6627, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.2296618680950787, |
| "grad_norm": 0.12210634350776672, |
| "learning_rate": 0.0001, |
| "loss": 1.491, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.22999665215935722, |
| "grad_norm": 0.12427474558353424, |
| "learning_rate": 0.0001, |
| "loss": 1.6381, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.23033143622363575, |
| "grad_norm": 0.12354297190904617, |
| "learning_rate": 0.0001, |
| "loss": 1.5804, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.2306662202879143, |
| "grad_norm": 0.11402271687984467, |
| "learning_rate": 0.0001, |
| "loss": 1.5562, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.23100100435219284, |
| "grad_norm": 0.12571346759796143, |
| "learning_rate": 0.0001, |
| "loss": 1.6974, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.23133578841647137, |
| "grad_norm": 0.12201119214296341, |
| "learning_rate": 0.0001, |
| "loss": 1.5866, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.2316705724807499, |
| "grad_norm": 0.13017117977142334, |
| "learning_rate": 0.0001, |
| "loss": 1.6493, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.23200535654502846, |
| "grad_norm": 0.11595404893159866, |
| "learning_rate": 0.0001, |
| "loss": 1.5047, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.232340140609307, |
| "grad_norm": 0.11953503638505936, |
| "learning_rate": 0.0001, |
| "loss": 1.4952, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.23267492467358553, |
| "grad_norm": 0.11844140291213989, |
| "learning_rate": 0.0001, |
| "loss": 1.6223, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.23300970873786409, |
| "grad_norm": 0.12358598411083221, |
| "learning_rate": 0.0001, |
| "loss": 1.6303, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.23334449280214262, |
| "grad_norm": 0.12384648621082306, |
| "learning_rate": 0.0001, |
| "loss": 1.6594, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.23367927686642115, |
| "grad_norm": 0.11835581809282303, |
| "learning_rate": 0.0001, |
| "loss": 1.6098, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.2340140609306997, |
| "grad_norm": 0.1138228103518486, |
| "learning_rate": 0.0001, |
| "loss": 1.4118, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.23434884499497824, |
| "grad_norm": 0.11459102481603622, |
| "learning_rate": 0.0001, |
| "loss": 1.5633, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.23468362905925677, |
| "grad_norm": 0.11587528139352798, |
| "learning_rate": 0.0001, |
| "loss": 1.6973, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.23501841312353533, |
| "grad_norm": 0.13280251622200012, |
| "learning_rate": 0.0001, |
| "loss": 1.5161, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.23535319718781386, |
| "grad_norm": 0.12264399230480194, |
| "learning_rate": 0.0001, |
| "loss": 1.656, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.2356879812520924, |
| "grad_norm": 0.11608457565307617, |
| "learning_rate": 0.0001, |
| "loss": 1.5099, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.23602276531637095, |
| "grad_norm": 0.12152610719203949, |
| "learning_rate": 0.0001, |
| "loss": 1.5169, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.23635754938064948, |
| "grad_norm": 0.12914855778217316, |
| "learning_rate": 0.0001, |
| "loss": 1.5904, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.23669233344492802, |
| "grad_norm": 0.13277898728847504, |
| "learning_rate": 0.0001, |
| "loss": 1.656, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.23702711750920658, |
| "grad_norm": 0.12540487945079803, |
| "learning_rate": 0.0001, |
| "loss": 1.6178, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.2373619015734851, |
| "grad_norm": 0.11845158785581589, |
| "learning_rate": 0.0001, |
| "loss": 1.5014, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.23769668563776364, |
| "grad_norm": 0.11418534815311432, |
| "learning_rate": 0.0001, |
| "loss": 1.5292, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.23803146970204217, |
| "grad_norm": 0.1384686678647995, |
| "learning_rate": 0.0001, |
| "loss": 1.6188, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.23836625376632073, |
| "grad_norm": 0.12325987964868546, |
| "learning_rate": 0.0001, |
| "loss": 1.5636, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.23870103783059926, |
| "grad_norm": 0.11931071430444717, |
| "learning_rate": 0.0001, |
| "loss": 1.5655, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.2390358218948778, |
| "grad_norm": 0.12119931727647781, |
| "learning_rate": 0.0001, |
| "loss": 1.5289, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.23937060595915635, |
| "grad_norm": 0.12172186374664307, |
| "learning_rate": 0.0001, |
| "loss": 1.6467, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.23970539002343488, |
| "grad_norm": 0.12344299256801605, |
| "learning_rate": 0.0001, |
| "loss": 1.5616, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.24004017408771341, |
| "grad_norm": 0.12173335254192352, |
| "learning_rate": 0.0001, |
| "loss": 1.6135, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.24037495815199197, |
| "grad_norm": 0.1223810538649559, |
| "learning_rate": 0.0001, |
| "loss": 1.6239, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.2407097422162705, |
| "grad_norm": 0.11744136363267899, |
| "learning_rate": 0.0001, |
| "loss": 1.5704, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.24104452628054904, |
| "grad_norm": 0.12341196089982986, |
| "learning_rate": 0.0001, |
| "loss": 1.6704, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.2413793103448276, |
| "grad_norm": 0.12578146159648895, |
| "learning_rate": 0.0001, |
| "loss": 1.604, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.24171409440910613, |
| "grad_norm": 0.12708286941051483, |
| "learning_rate": 0.0001, |
| "loss": 1.5583, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.24204887847338466, |
| "grad_norm": 0.11757246404886246, |
| "learning_rate": 0.0001, |
| "loss": 1.4911, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.24238366253766322, |
| "grad_norm": 0.1309349238872528, |
| "learning_rate": 0.0001, |
| "loss": 1.6648, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.24271844660194175, |
| "grad_norm": 0.13289286196231842, |
| "learning_rate": 0.0001, |
| "loss": 1.6547, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.24305323066622028, |
| "grad_norm": 0.12044942378997803, |
| "learning_rate": 0.0001, |
| "loss": 1.661, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.24338801473049884, |
| "grad_norm": 0.12810328602790833, |
| "learning_rate": 0.0001, |
| "loss": 1.6775, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.24372279879477737, |
| "grad_norm": 0.12643273174762726, |
| "learning_rate": 0.0001, |
| "loss": 1.4938, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.2440575828590559, |
| "grad_norm": 0.1253504455089569, |
| "learning_rate": 0.0001, |
| "loss": 1.6482, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.24439236692333444, |
| "grad_norm": 0.12725912034511566, |
| "learning_rate": 0.0001, |
| "loss": 1.4911, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.244727150987613, |
| "grad_norm": 0.13506008684635162, |
| "learning_rate": 0.0001, |
| "loss": 1.5739, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.24506193505189153, |
| "grad_norm": 0.12034797668457031, |
| "learning_rate": 0.0001, |
| "loss": 1.6477, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.24539671911617006, |
| "grad_norm": 0.12169791758060455, |
| "learning_rate": 0.0001, |
| "loss": 1.6398, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.24573150318044862, |
| "grad_norm": 0.1253383755683899, |
| "learning_rate": 0.0001, |
| "loss": 1.5921, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.24606628724472715, |
| "grad_norm": 0.11854001134634018, |
| "learning_rate": 0.0001, |
| "loss": 1.598, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.24640107130900568, |
| "grad_norm": 0.13825742900371552, |
| "learning_rate": 0.0001, |
| "loss": 1.6588, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.24673585537328424, |
| "grad_norm": 0.1235450729727745, |
| "learning_rate": 0.0001, |
| "loss": 1.5872, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.24707063943756277, |
| "grad_norm": 0.12598398327827454, |
| "learning_rate": 0.0001, |
| "loss": 1.6038, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.2474054235018413, |
| "grad_norm": 0.14527225494384766, |
| "learning_rate": 0.0001, |
| "loss": 1.6419, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.24774020756611986, |
| "grad_norm": 0.11842803657054901, |
| "learning_rate": 0.0001, |
| "loss": 1.5628, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.2480749916303984, |
| "grad_norm": 0.12376052141189575, |
| "learning_rate": 0.0001, |
| "loss": 1.5271, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.24840977569467693, |
| "grad_norm": 0.13634417951107025, |
| "learning_rate": 0.0001, |
| "loss": 1.7012, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.24874455975895549, |
| "grad_norm": 0.12457748502492905, |
| "learning_rate": 0.0001, |
| "loss": 1.5623, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.24907934382323402, |
| "grad_norm": 0.11860496550798416, |
| "learning_rate": 0.0001, |
| "loss": 1.6049, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.24941412788751255, |
| "grad_norm": 0.12447136640548706, |
| "learning_rate": 0.0001, |
| "loss": 1.6967, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.2497489119517911, |
| "grad_norm": 0.12220341712236404, |
| "learning_rate": 0.0001, |
| "loss": 1.5819, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.2500836960160696, |
| "grad_norm": 0.11865612119436264, |
| "learning_rate": 0.0001, |
| "loss": 1.5519, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.25041848008034817, |
| "grad_norm": 0.11847954988479614, |
| "learning_rate": 0.0001, |
| "loss": 1.5087, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.25075326414462673, |
| "grad_norm": 0.12107084691524506, |
| "learning_rate": 0.0001, |
| "loss": 1.5995, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.25108804820890523, |
| "grad_norm": 0.12188322097063065, |
| "learning_rate": 0.0001, |
| "loss": 1.6439, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2514228322731838, |
| "grad_norm": 0.12144109606742859, |
| "learning_rate": 0.0001, |
| "loss": 1.5613, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.25175761633746235, |
| "grad_norm": 0.12133816629648209, |
| "learning_rate": 0.0001, |
| "loss": 1.5364, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.25209240040174086, |
| "grad_norm": 0.11708073318004608, |
| "learning_rate": 0.0001, |
| "loss": 1.5221, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.2524271844660194, |
| "grad_norm": 0.1203671544790268, |
| "learning_rate": 0.0001, |
| "loss": 1.5736, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.252761968530298, |
| "grad_norm": 0.12079092115163803, |
| "learning_rate": 0.0001, |
| "loss": 1.5842, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.2530967525945765, |
| "grad_norm": 0.1294735223054886, |
| "learning_rate": 0.0001, |
| "loss": 1.5994, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.25343153665885504, |
| "grad_norm": 0.1251528263092041, |
| "learning_rate": 0.0001, |
| "loss": 1.6391, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.2537663207231336, |
| "grad_norm": 0.12093610316514969, |
| "learning_rate": 0.0001, |
| "loss": 1.6275, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.2541011047874121, |
| "grad_norm": 0.1214980036020279, |
| "learning_rate": 0.0001, |
| "loss": 1.5887, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.25443588885169066, |
| "grad_norm": 0.12011279165744781, |
| "learning_rate": 0.0001, |
| "loss": 1.5973, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2547706729159692, |
| "grad_norm": 0.12630945444107056, |
| "learning_rate": 0.0001, |
| "loss": 1.6184, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.2551054569802477, |
| "grad_norm": 0.12001120299100876, |
| "learning_rate": 0.0001, |
| "loss": 1.5298, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.2554402410445263, |
| "grad_norm": 0.1369365155696869, |
| "learning_rate": 0.0001, |
| "loss": 1.5718, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.25577502510880484, |
| "grad_norm": 0.1201329231262207, |
| "learning_rate": 0.0001, |
| "loss": 1.5354, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.25610980917308335, |
| "grad_norm": 0.12741532921791077, |
| "learning_rate": 0.0001, |
| "loss": 1.6193, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.2564445932373619, |
| "grad_norm": 0.12349703162908554, |
| "learning_rate": 0.0001, |
| "loss": 1.6143, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.25677937730164047, |
| "grad_norm": 0.11855439841747284, |
| "learning_rate": 0.0001, |
| "loss": 1.6037, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.25711416136591897, |
| "grad_norm": 0.12034845352172852, |
| "learning_rate": 0.0001, |
| "loss": 1.5317, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.25744894543019753, |
| "grad_norm": 0.11987943202257156, |
| "learning_rate": 0.0001, |
| "loss": 1.535, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.2577837294944761, |
| "grad_norm": 0.12118515372276306, |
| "learning_rate": 0.0001, |
| "loss": 1.5974, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.2581185135587546, |
| "grad_norm": 0.12842996418476105, |
| "learning_rate": 0.0001, |
| "loss": 1.609, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.25845329762303315, |
| "grad_norm": 0.12420446425676346, |
| "learning_rate": 0.0001, |
| "loss": 1.6093, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.2587880816873117, |
| "grad_norm": 0.12443120032548904, |
| "learning_rate": 0.0001, |
| "loss": 1.6122, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.2591228657515902, |
| "grad_norm": 0.11912049353122711, |
| "learning_rate": 0.0001, |
| "loss": 1.5209, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.2594576498158688, |
| "grad_norm": 0.1273064911365509, |
| "learning_rate": 0.0001, |
| "loss": 1.608, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.2597924338801473, |
| "grad_norm": 0.11585114896297455, |
| "learning_rate": 0.0001, |
| "loss": 1.3888, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.26012721794442584, |
| "grad_norm": 0.12005290389060974, |
| "learning_rate": 0.0001, |
| "loss": 1.4666, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.2604620020087044, |
| "grad_norm": 0.11954595148563385, |
| "learning_rate": 0.0001, |
| "loss": 1.5558, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.2607967860729829, |
| "grad_norm": 0.1307271122932434, |
| "learning_rate": 0.0001, |
| "loss": 1.6063, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.26113157013726146, |
| "grad_norm": 0.113981693983078, |
| "learning_rate": 0.0001, |
| "loss": 1.4857, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.26146635420154, |
| "grad_norm": 0.1225418671965599, |
| "learning_rate": 0.0001, |
| "loss": 1.5508, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.2618011382658185, |
| "grad_norm": 0.12919741868972778, |
| "learning_rate": 0.0001, |
| "loss": 1.6255, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.2621359223300971, |
| "grad_norm": 0.11552941054105759, |
| "learning_rate": 0.0001, |
| "loss": 1.6183, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.26247070639437564, |
| "grad_norm": 0.13457614183425903, |
| "learning_rate": 0.0001, |
| "loss": 1.6461, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.26280549045865415, |
| "grad_norm": 0.11841408908367157, |
| "learning_rate": 0.0001, |
| "loss": 1.5481, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.2631402745229327, |
| "grad_norm": 0.11701938509941101, |
| "learning_rate": 0.0001, |
| "loss": 1.5883, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.26347505858721126, |
| "grad_norm": 0.14221838116645813, |
| "learning_rate": 0.0001, |
| "loss": 1.5904, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.26380984265148977, |
| "grad_norm": 0.11813905090093613, |
| "learning_rate": 0.0001, |
| "loss": 1.5653, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.2641446267157683, |
| "grad_norm": 0.1315639317035675, |
| "learning_rate": 0.0001, |
| "loss": 1.5811, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.2644794107800469, |
| "grad_norm": 0.13400433957576752, |
| "learning_rate": 0.0001, |
| "loss": 1.5363, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.2648141948443254, |
| "grad_norm": 0.12116281688213348, |
| "learning_rate": 0.0001, |
| "loss": 1.6353, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.26514897890860395, |
| "grad_norm": 0.1382567137479782, |
| "learning_rate": 0.0001, |
| "loss": 1.592, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.2654837629728825, |
| "grad_norm": 0.14005912840366364, |
| "learning_rate": 0.0001, |
| "loss": 1.6114, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.265818547037161, |
| "grad_norm": 0.13382911682128906, |
| "learning_rate": 0.0001, |
| "loss": 1.5942, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.26615333110143957, |
| "grad_norm": 0.12423510104417801, |
| "learning_rate": 0.0001, |
| "loss": 1.5378, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.26648811516571813, |
| "grad_norm": 0.12228628993034363, |
| "learning_rate": 0.0001, |
| "loss": 1.5704, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.26682289922999664, |
| "grad_norm": 0.1286916881799698, |
| "learning_rate": 0.0001, |
| "loss": 1.6037, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.2671576832942752, |
| "grad_norm": 0.12864018976688385, |
| "learning_rate": 0.0001, |
| "loss": 1.6522, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.26749246735855375, |
| "grad_norm": 0.12012400478124619, |
| "learning_rate": 0.0001, |
| "loss": 1.5275, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.26782725142283226, |
| "grad_norm": 0.12273643165826797, |
| "learning_rate": 0.0001, |
| "loss": 1.5848, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2681620354871108, |
| "grad_norm": 0.13991284370422363, |
| "learning_rate": 0.0001, |
| "loss": 1.6271, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.2684968195513894, |
| "grad_norm": 0.1236526146531105, |
| "learning_rate": 0.0001, |
| "loss": 1.57, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.2688316036156679, |
| "grad_norm": 0.1302153319120407, |
| "learning_rate": 0.0001, |
| "loss": 1.5638, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.26916638767994644, |
| "grad_norm": 0.11963735520839691, |
| "learning_rate": 0.0001, |
| "loss": 1.6089, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.269501171744225, |
| "grad_norm": 0.13298673927783966, |
| "learning_rate": 0.0001, |
| "loss": 1.6313, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.2698359558085035, |
| "grad_norm": 0.13616934418678284, |
| "learning_rate": 0.0001, |
| "loss": 1.653, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.27017073987278206, |
| "grad_norm": 0.12497668713331223, |
| "learning_rate": 0.0001, |
| "loss": 1.5514, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.2705055239370606, |
| "grad_norm": 0.11764683574438095, |
| "learning_rate": 0.0001, |
| "loss": 1.5878, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.2708403080013391, |
| "grad_norm": 0.12114263325929642, |
| "learning_rate": 0.0001, |
| "loss": 1.5628, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.2711750920656177, |
| "grad_norm": 0.1347784847021103, |
| "learning_rate": 0.0001, |
| "loss": 1.7159, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.27150987612989624, |
| "grad_norm": 0.12009880691766739, |
| "learning_rate": 0.0001, |
| "loss": 1.6043, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.27184466019417475, |
| "grad_norm": 0.1278241127729416, |
| "learning_rate": 0.0001, |
| "loss": 1.6309, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.2721794442584533, |
| "grad_norm": 0.1216406300663948, |
| "learning_rate": 0.0001, |
| "loss": 1.5867, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.2725142283227318, |
| "grad_norm": 0.11623333394527435, |
| "learning_rate": 0.0001, |
| "loss": 1.5272, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.27284901238701037, |
| "grad_norm": 0.11762827634811401, |
| "learning_rate": 0.0001, |
| "loss": 1.4148, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.27318379645128893, |
| "grad_norm": 0.12679798901081085, |
| "learning_rate": 0.0001, |
| "loss": 1.678, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.27351858051556743, |
| "grad_norm": 0.12463215738534927, |
| "learning_rate": 0.0001, |
| "loss": 1.6383, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.273853364579846, |
| "grad_norm": 0.12248417735099792, |
| "learning_rate": 0.0001, |
| "loss": 1.5937, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.27418814864412455, |
| "grad_norm": 0.11953899264335632, |
| "learning_rate": 0.0001, |
| "loss": 1.5704, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.27452293270840306, |
| "grad_norm": 0.12919148802757263, |
| "learning_rate": 0.0001, |
| "loss": 1.6948, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.2748577167726816, |
| "grad_norm": 0.11798353493213654, |
| "learning_rate": 0.0001, |
| "loss": 1.4814, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.2751925008369602, |
| "grad_norm": 0.13017946481704712, |
| "learning_rate": 0.0001, |
| "loss": 1.5837, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.2755272849012387, |
| "grad_norm": 0.1253434419631958, |
| "learning_rate": 0.0001, |
| "loss": 1.5418, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.27586206896551724, |
| "grad_norm": 0.11546894907951355, |
| "learning_rate": 0.0001, |
| "loss": 1.5851, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.2761968530297958, |
| "grad_norm": 0.12117631733417511, |
| "learning_rate": 0.0001, |
| "loss": 1.6335, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.2765316370940743, |
| "grad_norm": 0.12088704109191895, |
| "learning_rate": 0.0001, |
| "loss": 1.571, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.27686642115835286, |
| "grad_norm": 0.12261457741260529, |
| "learning_rate": 0.0001, |
| "loss": 1.5143, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.2772012052226314, |
| "grad_norm": 0.12313897162675858, |
| "learning_rate": 0.0001, |
| "loss": 1.621, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.2775359892869099, |
| "grad_norm": 0.12563903629779816, |
| "learning_rate": 0.0001, |
| "loss": 1.657, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.2778707733511885, |
| "grad_norm": 0.1187531128525734, |
| "learning_rate": 0.0001, |
| "loss": 1.5346, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.27820555741546704, |
| "grad_norm": 0.12233595550060272, |
| "learning_rate": 0.0001, |
| "loss": 1.5835, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.27854034147974555, |
| "grad_norm": 0.12235147505998611, |
| "learning_rate": 0.0001, |
| "loss": 1.6104, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.2788751255440241, |
| "grad_norm": 0.11765027791261673, |
| "learning_rate": 0.0001, |
| "loss": 1.5489, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.27920990960830266, |
| "grad_norm": 0.12349414080381393, |
| "learning_rate": 0.0001, |
| "loss": 1.6089, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.27954469367258117, |
| "grad_norm": 0.12419009208679199, |
| "learning_rate": 0.0001, |
| "loss": 1.6418, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.2798794777368597, |
| "grad_norm": 0.12406160682439804, |
| "learning_rate": 0.0001, |
| "loss": 1.5774, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.2802142618011383, |
| "grad_norm": 0.11722970008850098, |
| "learning_rate": 0.0001, |
| "loss": 1.5634, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.2805490458654168, |
| "grad_norm": 0.1188267171382904, |
| "learning_rate": 0.0001, |
| "loss": 1.5005, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.28088382992969535, |
| "grad_norm": 0.11977598071098328, |
| "learning_rate": 0.0001, |
| "loss": 1.5556, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.2812186139939739, |
| "grad_norm": 0.12196852266788483, |
| "learning_rate": 0.0001, |
| "loss": 1.604, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.2815533980582524, |
| "grad_norm": 0.12035735696554184, |
| "learning_rate": 0.0001, |
| "loss": 1.5304, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.281888182122531, |
| "grad_norm": 0.12375766038894653, |
| "learning_rate": 0.0001, |
| "loss": 1.5929, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.28222296618680953, |
| "grad_norm": 0.1304924190044403, |
| "learning_rate": 0.0001, |
| "loss": 1.6148, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.28255775025108804, |
| "grad_norm": 0.12864375114440918, |
| "learning_rate": 0.0001, |
| "loss": 1.4907, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.2828925343153666, |
| "grad_norm": 0.12013059109449387, |
| "learning_rate": 0.0001, |
| "loss": 1.5051, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.28322731837964515, |
| "grad_norm": 0.1277569979429245, |
| "learning_rate": 0.0001, |
| "loss": 1.5942, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.28356210244392366, |
| "grad_norm": 0.13474377989768982, |
| "learning_rate": 0.0001, |
| "loss": 1.6098, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.2838968865082022, |
| "grad_norm": 0.12635944783687592, |
| "learning_rate": 0.0001, |
| "loss": 1.6217, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.2842316705724808, |
| "grad_norm": 0.12218885123729706, |
| "learning_rate": 0.0001, |
| "loss": 1.578, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.2845664546367593, |
| "grad_norm": 0.12037128210067749, |
| "learning_rate": 0.0001, |
| "loss": 1.5502, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.28490123870103784, |
| "grad_norm": 0.12386499345302582, |
| "learning_rate": 0.0001, |
| "loss": 1.6922, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.28523602276531634, |
| "grad_norm": 0.1298052966594696, |
| "learning_rate": 0.0001, |
| "loss": 1.6589, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.2855708068295949, |
| "grad_norm": 0.12143804877996445, |
| "learning_rate": 0.0001, |
| "loss": 1.5856, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.28590559089387346, |
| "grad_norm": 0.11675681918859482, |
| "learning_rate": 0.0001, |
| "loss": 1.4877, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.28624037495815197, |
| "grad_norm": 0.11870943009853363, |
| "learning_rate": 0.0001, |
| "loss": 1.5699, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.2865751590224305, |
| "grad_norm": 0.12752340734004974, |
| "learning_rate": 0.0001, |
| "loss": 1.5648, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.2869099430867091, |
| "grad_norm": 0.1254730522632599, |
| "learning_rate": 0.0001, |
| "loss": 1.5331, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.2872447271509876, |
| "grad_norm": 0.12351144105195999, |
| "learning_rate": 0.0001, |
| "loss": 1.5984, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.28757951121526615, |
| "grad_norm": 0.12823925912380219, |
| "learning_rate": 0.0001, |
| "loss": 1.4704, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.2879142952795447, |
| "grad_norm": 0.12884090840816498, |
| "learning_rate": 0.0001, |
| "loss": 1.5302, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.2882490793438232, |
| "grad_norm": 0.12310319393873215, |
| "learning_rate": 0.0001, |
| "loss": 1.5554, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.28858386340810177, |
| "grad_norm": 0.12592901289463043, |
| "learning_rate": 0.0001, |
| "loss": 1.573, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.28891864747238033, |
| "grad_norm": 0.12326246500015259, |
| "learning_rate": 0.0001, |
| "loss": 1.5408, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.28925343153665883, |
| "grad_norm": 0.12688298523426056, |
| "learning_rate": 0.0001, |
| "loss": 1.609, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.2895882156009374, |
| "grad_norm": 0.13284268975257874, |
| "learning_rate": 0.0001, |
| "loss": 1.4774, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.28992299966521595, |
| "grad_norm": 0.12346718460321426, |
| "learning_rate": 0.0001, |
| "loss": 1.5675, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.29025778372949446, |
| "grad_norm": 0.12501643598079681, |
| "learning_rate": 0.0001, |
| "loss": 1.54, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.290592567793773, |
| "grad_norm": 0.14129911363124847, |
| "learning_rate": 0.0001, |
| "loss": 1.5707, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.2909273518580516, |
| "grad_norm": 0.11998032033443451, |
| "learning_rate": 0.0001, |
| "loss": 1.6025, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.2912621359223301, |
| "grad_norm": 0.14502458274364471, |
| "learning_rate": 0.0001, |
| "loss": 1.6066, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.29159691998660864, |
| "grad_norm": 0.13429078459739685, |
| "learning_rate": 0.0001, |
| "loss": 1.5773, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.2919317040508872, |
| "grad_norm": 0.12702088057994843, |
| "learning_rate": 0.0001, |
| "loss": 1.5331, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.2922664881151657, |
| "grad_norm": 0.1450689435005188, |
| "learning_rate": 0.0001, |
| "loss": 1.6426, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.29260127217944426, |
| "grad_norm": 0.12571430206298828, |
| "learning_rate": 0.0001, |
| "loss": 1.5702, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.2929360562437228, |
| "grad_norm": 0.15491126477718353, |
| "learning_rate": 0.0001, |
| "loss": 1.6229, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.2932708403080013, |
| "grad_norm": 0.1497523933649063, |
| "learning_rate": 0.0001, |
| "loss": 1.6073, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.2936056243722799, |
| "grad_norm": 0.12279631197452545, |
| "learning_rate": 0.0001, |
| "loss": 1.5836, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.29394040843655844, |
| "grad_norm": 0.16039283573627472, |
| "learning_rate": 0.0001, |
| "loss": 1.6125, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.29427519250083695, |
| "grad_norm": 0.1275695562362671, |
| "learning_rate": 0.0001, |
| "loss": 1.5279, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.2946099765651155, |
| "grad_norm": 0.12885813415050507, |
| "learning_rate": 0.0001, |
| "loss": 1.5662, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.29494476062939406, |
| "grad_norm": 0.1439967006444931, |
| "learning_rate": 0.0001, |
| "loss": 1.6408, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.29527954469367257, |
| "grad_norm": 0.12064056098461151, |
| "learning_rate": 0.0001, |
| "loss": 1.5292, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.29561432875795113, |
| "grad_norm": 0.12883847951889038, |
| "learning_rate": 0.0001, |
| "loss": 1.6024, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.2959491128222297, |
| "grad_norm": 0.12654966115951538, |
| "learning_rate": 0.0001, |
| "loss": 1.5838, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.2962838968865082, |
| "grad_norm": 0.13914820551872253, |
| "learning_rate": 0.0001, |
| "loss": 1.5345, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.29661868095078675, |
| "grad_norm": 0.12559537589550018, |
| "learning_rate": 0.0001, |
| "loss": 1.515, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.2969534650150653, |
| "grad_norm": 0.1451893299818039, |
| "learning_rate": 0.0001, |
| "loss": 1.5924, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.2972882490793438, |
| "grad_norm": 0.13416925072669983, |
| "learning_rate": 0.0001, |
| "loss": 1.6371, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.2976230331436224, |
| "grad_norm": 0.12274248152971268, |
| "learning_rate": 0.0001, |
| "loss": 1.6539, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.2979578172079009, |
| "grad_norm": 0.143101766705513, |
| "learning_rate": 0.0001, |
| "loss": 1.5748, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.29829260127217944, |
| "grad_norm": 0.12564097344875336, |
| "learning_rate": 0.0001, |
| "loss": 1.5875, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.298627385336458, |
| "grad_norm": 0.12403486669063568, |
| "learning_rate": 0.0001, |
| "loss": 1.5765, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.2989621694007365, |
| "grad_norm": 0.13099223375320435, |
| "learning_rate": 0.0001, |
| "loss": 1.5656, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.29929695346501506, |
| "grad_norm": 0.12135787308216095, |
| "learning_rate": 0.0001, |
| "loss": 1.4958, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.2996317375292936, |
| "grad_norm": 0.12442804127931595, |
| "learning_rate": 0.0001, |
| "loss": 1.6222, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.2999665215935721, |
| "grad_norm": 0.12768028676509857, |
| "learning_rate": 0.0001, |
| "loss": 1.6719, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.3003013056578507, |
| "grad_norm": 0.1240835040807724, |
| "learning_rate": 0.0001, |
| "loss": 1.5114, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.30063608972212924, |
| "grad_norm": 0.12057949602603912, |
| "learning_rate": 0.0001, |
| "loss": 1.5864, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.30097087378640774, |
| "grad_norm": 0.1332257241010666, |
| "learning_rate": 0.0001, |
| "loss": 1.652, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.3013056578506863, |
| "grad_norm": 0.12191877514123917, |
| "learning_rate": 0.0001, |
| "loss": 1.6016, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.30164044191496486, |
| "grad_norm": 0.13481038808822632, |
| "learning_rate": 0.0001, |
| "loss": 1.5724, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.30197522597924337, |
| "grad_norm": 0.12434981763362885, |
| "learning_rate": 0.0001, |
| "loss": 1.5873, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.3023100100435219, |
| "grad_norm": 0.12398968636989594, |
| "learning_rate": 0.0001, |
| "loss": 1.5917, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.3026447941078005, |
| "grad_norm": 0.13455741107463837, |
| "learning_rate": 0.0001, |
| "loss": 1.6293, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.302979578172079, |
| "grad_norm": 0.12864330410957336, |
| "learning_rate": 0.0001, |
| "loss": 1.6671, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.30331436223635755, |
| "grad_norm": 0.1306915581226349, |
| "learning_rate": 0.0001, |
| "loss": 1.5669, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.3036491463006361, |
| "grad_norm": 0.12770214676856995, |
| "learning_rate": 0.0001, |
| "loss": 1.515, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.3039839303649146, |
| "grad_norm": 0.12244972586631775, |
| "learning_rate": 0.0001, |
| "loss": 1.7102, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.30431871442919317, |
| "grad_norm": 0.12544330954551697, |
| "learning_rate": 0.0001, |
| "loss": 1.5809, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.30465349849347173, |
| "grad_norm": 0.12653569877147675, |
| "learning_rate": 0.0001, |
| "loss": 1.5504, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.30498828255775023, |
| "grad_norm": 0.1295597404241562, |
| "learning_rate": 0.0001, |
| "loss": 1.6077, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.3053230666220288, |
| "grad_norm": 0.13423195481300354, |
| "learning_rate": 0.0001, |
| "loss": 1.6433, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.30565785068630735, |
| "grad_norm": 0.12957747280597687, |
| "learning_rate": 0.0001, |
| "loss": 1.72, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.30599263475058586, |
| "grad_norm": 0.1274273693561554, |
| "learning_rate": 0.0001, |
| "loss": 1.5916, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.3063274188148644, |
| "grad_norm": 0.12693728506565094, |
| "learning_rate": 0.0001, |
| "loss": 1.5582, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.306662202879143, |
| "grad_norm": 0.12224942445755005, |
| "learning_rate": 0.0001, |
| "loss": 1.6431, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.3069969869434215, |
| "grad_norm": 0.12495341151952744, |
| "learning_rate": 0.0001, |
| "loss": 1.6554, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.30733177100770004, |
| "grad_norm": 0.12348316609859467, |
| "learning_rate": 0.0001, |
| "loss": 1.5617, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.3076665550719786, |
| "grad_norm": 0.12086449563503265, |
| "learning_rate": 0.0001, |
| "loss": 1.5866, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.3080013391362571, |
| "grad_norm": 0.12970371544361115, |
| "learning_rate": 0.0001, |
| "loss": 1.6444, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.30833612320053566, |
| "grad_norm": 0.115717314183712, |
| "learning_rate": 0.0001, |
| "loss": 1.4493, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.3086709072648142, |
| "grad_norm": 0.1250089704990387, |
| "learning_rate": 0.0001, |
| "loss": 1.5889, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.3090056913290927, |
| "grad_norm": 0.11084622144699097, |
| "learning_rate": 0.0001, |
| "loss": 1.3815, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.3093404753933713, |
| "grad_norm": 0.12127161026000977, |
| "learning_rate": 0.0001, |
| "loss": 1.5558, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.30967525945764984, |
| "grad_norm": 0.12244665622711182, |
| "learning_rate": 0.0001, |
| "loss": 1.6409, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.31001004352192835, |
| "grad_norm": 0.12553781270980835, |
| "learning_rate": 0.0001, |
| "loss": 1.6205, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.3103448275862069, |
| "grad_norm": 0.12222031503915787, |
| "learning_rate": 0.0001, |
| "loss": 1.6323, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.3106796116504854, |
| "grad_norm": 0.1246923953294754, |
| "learning_rate": 0.0001, |
| "loss": 1.719, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.31101439571476397, |
| "grad_norm": 0.13237862288951874, |
| "learning_rate": 0.0001, |
| "loss": 1.6517, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.31134917977904253, |
| "grad_norm": 0.11562683433294296, |
| "learning_rate": 0.0001, |
| "loss": 1.5043, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.31168396384332103, |
| "grad_norm": 0.12860921025276184, |
| "learning_rate": 0.0001, |
| "loss": 1.5939, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.3120187479075996, |
| "grad_norm": 0.11789809912443161, |
| "learning_rate": 0.0001, |
| "loss": 1.4763, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.31235353197187815, |
| "grad_norm": 0.12612248957157135, |
| "learning_rate": 0.0001, |
| "loss": 1.6355, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.31268831603615665, |
| "grad_norm": 0.14561748504638672, |
| "learning_rate": 0.0001, |
| "loss": 1.6897, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.3130231001004352, |
| "grad_norm": 0.1276092380285263, |
| "learning_rate": 0.0001, |
| "loss": 1.6438, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.3133578841647138, |
| "grad_norm": 0.13539274036884308, |
| "learning_rate": 0.0001, |
| "loss": 1.5562, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.3136926682289923, |
| "grad_norm": 0.12490363419055939, |
| "learning_rate": 0.0001, |
| "loss": 1.5592, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.31402745229327084, |
| "grad_norm": 0.12392627447843552, |
| "learning_rate": 0.0001, |
| "loss": 1.6344, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.3143622363575494, |
| "grad_norm": 0.13469712436199188, |
| "learning_rate": 0.0001, |
| "loss": 1.7123, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.3146970204218279, |
| "grad_norm": 0.13380196690559387, |
| "learning_rate": 0.0001, |
| "loss": 1.6485, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.31503180448610646, |
| "grad_norm": 0.12370868027210236, |
| "learning_rate": 0.0001, |
| "loss": 1.5663, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.315366588550385, |
| "grad_norm": 0.1381116360425949, |
| "learning_rate": 0.0001, |
| "loss": 1.5682, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.3157013726146635, |
| "grad_norm": 0.15112708508968353, |
| "learning_rate": 0.0001, |
| "loss": 1.6236, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.3160361566789421, |
| "grad_norm": 0.13402314484119415, |
| "learning_rate": 0.0001, |
| "loss": 1.67, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.31637094074322064, |
| "grad_norm": 0.13505329191684723, |
| "learning_rate": 0.0001, |
| "loss": 1.5149, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.31670572480749914, |
| "grad_norm": 0.1328267902135849, |
| "learning_rate": 0.0001, |
| "loss": 1.5129, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.3170405088717777, |
| "grad_norm": 0.12792791426181793, |
| "learning_rate": 0.0001, |
| "loss": 1.5868, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.31737529293605626, |
| "grad_norm": 0.11726494878530502, |
| "learning_rate": 0.0001, |
| "loss": 1.5581, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.31771007700033477, |
| "grad_norm": 0.12302982062101364, |
| "learning_rate": 0.0001, |
| "loss": 1.5296, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.3180448610646133, |
| "grad_norm": 0.1206970065832138, |
| "learning_rate": 0.0001, |
| "loss": 1.5066, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3183796451288919, |
| "grad_norm": 0.1165679469704628, |
| "learning_rate": 0.0001, |
| "loss": 1.5486, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.3187144291931704, |
| "grad_norm": 0.12752187252044678, |
| "learning_rate": 0.0001, |
| "loss": 1.6441, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.31904921325744895, |
| "grad_norm": 0.12091311067342758, |
| "learning_rate": 0.0001, |
| "loss": 1.5482, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.3193839973217275, |
| "grad_norm": 0.12838125228881836, |
| "learning_rate": 0.0001, |
| "loss": 1.6027, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.319718781386006, |
| "grad_norm": 0.11839887499809265, |
| "learning_rate": 0.0001, |
| "loss": 1.5533, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.32005356545028457, |
| "grad_norm": 0.1277683675289154, |
| "learning_rate": 0.0001, |
| "loss": 1.5461, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.32038834951456313, |
| "grad_norm": 0.12134066224098206, |
| "learning_rate": 0.0001, |
| "loss": 1.5649, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.32072313357884163, |
| "grad_norm": 0.12735500931739807, |
| "learning_rate": 0.0001, |
| "loss": 1.608, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.3210579176431202, |
| "grad_norm": 0.133828267455101, |
| "learning_rate": 0.0001, |
| "loss": 1.5675, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.32139270170739875, |
| "grad_norm": 0.12437241524457932, |
| "learning_rate": 0.0001, |
| "loss": 1.6325, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.32172748577167726, |
| "grad_norm": 0.12489302456378937, |
| "learning_rate": 0.0001, |
| "loss": 1.6441, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.3220622698359558, |
| "grad_norm": 0.12957216799259186, |
| "learning_rate": 0.0001, |
| "loss": 1.5328, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.3223970539002344, |
| "grad_norm": 0.1317603886127472, |
| "learning_rate": 0.0001, |
| "loss": 1.6061, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.3227318379645129, |
| "grad_norm": 0.12075690180063248, |
| "learning_rate": 0.0001, |
| "loss": 1.5508, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.32306662202879144, |
| "grad_norm": 0.11924642324447632, |
| "learning_rate": 0.0001, |
| "loss": 1.4772, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.32340140609306994, |
| "grad_norm": 0.12515272200107574, |
| "learning_rate": 0.0001, |
| "loss": 1.5748, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.3237361901573485, |
| "grad_norm": 0.11952123045921326, |
| "learning_rate": 0.0001, |
| "loss": 1.5852, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.32407097422162706, |
| "grad_norm": 0.125240296125412, |
| "learning_rate": 0.0001, |
| "loss": 1.5388, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.32440575828590557, |
| "grad_norm": 0.12284346669912338, |
| "learning_rate": 0.0001, |
| "loss": 1.6059, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.3247405423501841, |
| "grad_norm": 0.11825854331254959, |
| "learning_rate": 0.0001, |
| "loss": 1.52, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.3250753264144627, |
| "grad_norm": 0.1247822567820549, |
| "learning_rate": 0.0001, |
| "loss": 1.6265, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.3254101104787412, |
| "grad_norm": 0.12490460276603699, |
| "learning_rate": 0.0001, |
| "loss": 1.6047, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.32574489454301975, |
| "grad_norm": 0.11784359812736511, |
| "learning_rate": 0.0001, |
| "loss": 1.451, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.3260796786072983, |
| "grad_norm": 0.12558013200759888, |
| "learning_rate": 0.0001, |
| "loss": 1.6244, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.3264144626715768, |
| "grad_norm": 0.12492769211530685, |
| "learning_rate": 0.0001, |
| "loss": 1.6821, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.32674924673585537, |
| "grad_norm": 0.11894410103559494, |
| "learning_rate": 0.0001, |
| "loss": 1.5476, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.32708403080013393, |
| "grad_norm": 0.12406729906797409, |
| "learning_rate": 0.0001, |
| "loss": 1.5954, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.32741881486441243, |
| "grad_norm": 0.12805567681789398, |
| "learning_rate": 0.0001, |
| "loss": 1.5216, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.327753598928691, |
| "grad_norm": 0.12648111581802368, |
| "learning_rate": 0.0001, |
| "loss": 1.6923, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.32808838299296955, |
| "grad_norm": 0.12503187358379364, |
| "learning_rate": 0.0001, |
| "loss": 1.6204, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.32842316705724806, |
| "grad_norm": 0.12180895358324051, |
| "learning_rate": 0.0001, |
| "loss": 1.5764, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.3287579511215266, |
| "grad_norm": 0.12118836492300034, |
| "learning_rate": 0.0001, |
| "loss": 1.4937, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.3290927351858052, |
| "grad_norm": 0.12758868932724, |
| "learning_rate": 0.0001, |
| "loss": 1.6198, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.3294275192500837, |
| "grad_norm": 0.1190565824508667, |
| "learning_rate": 0.0001, |
| "loss": 1.587, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.32976230331436224, |
| "grad_norm": 0.12521426379680634, |
| "learning_rate": 0.0001, |
| "loss": 1.5403, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.3300970873786408, |
| "grad_norm": 0.1259697824716568, |
| "learning_rate": 0.0001, |
| "loss": 1.5356, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.3304318714429193, |
| "grad_norm": 0.12639686465263367, |
| "learning_rate": 0.0001, |
| "loss": 1.5941, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.33076665550719786, |
| "grad_norm": 0.12533701956272125, |
| "learning_rate": 0.0001, |
| "loss": 1.6826, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.3311014395714764, |
| "grad_norm": 0.1349916309118271, |
| "learning_rate": 0.0001, |
| "loss": 1.6818, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.3314362236357549, |
| "grad_norm": 0.12522515654563904, |
| "learning_rate": 0.0001, |
| "loss": 1.531, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.3317710077000335, |
| "grad_norm": 0.12278946489095688, |
| "learning_rate": 0.0001, |
| "loss": 1.5098, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.33210579176431204, |
| "grad_norm": 0.1286853700876236, |
| "learning_rate": 0.0001, |
| "loss": 1.5117, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.33244057582859055, |
| "grad_norm": 0.1212511882185936, |
| "learning_rate": 0.0001, |
| "loss": 1.4762, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.3327753598928691, |
| "grad_norm": 0.1347900927066803, |
| "learning_rate": 0.0001, |
| "loss": 1.6793, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.33311014395714766, |
| "grad_norm": 0.11994650959968567, |
| "learning_rate": 0.0001, |
| "loss": 1.6026, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.33344492802142617, |
| "grad_norm": 0.13167862594127655, |
| "learning_rate": 0.0001, |
| "loss": 1.6341, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.3337797120857047, |
| "grad_norm": 0.13315805792808533, |
| "learning_rate": 0.0001, |
| "loss": 1.5414, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.3341144961499833, |
| "grad_norm": 0.12088074535131454, |
| "learning_rate": 0.0001, |
| "loss": 1.5769, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.3344492802142618, |
| "grad_norm": 0.13783089816570282, |
| "learning_rate": 0.0001, |
| "loss": 1.5365, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.33478406427854035, |
| "grad_norm": 0.13187260925769806, |
| "learning_rate": 0.0001, |
| "loss": 1.5929, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3351188483428189, |
| "grad_norm": 0.13189886510372162, |
| "learning_rate": 0.0001, |
| "loss": 1.5591, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.3354536324070974, |
| "grad_norm": 0.1421831101179123, |
| "learning_rate": 0.0001, |
| "loss": 1.5674, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.335788416471376, |
| "grad_norm": 0.1282414346933365, |
| "learning_rate": 0.0001, |
| "loss": 1.5696, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.3361232005356545, |
| "grad_norm": 0.13641226291656494, |
| "learning_rate": 0.0001, |
| "loss": 1.5336, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.33645798459993304, |
| "grad_norm": 0.14396816492080688, |
| "learning_rate": 0.0001, |
| "loss": 1.5648, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.3367927686642116, |
| "grad_norm": 0.12792754173278809, |
| "learning_rate": 0.0001, |
| "loss": 1.631, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.3371275527284901, |
| "grad_norm": 0.1327052116394043, |
| "learning_rate": 0.0001, |
| "loss": 1.5746, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.33746233679276866, |
| "grad_norm": 0.14353278279304504, |
| "learning_rate": 0.0001, |
| "loss": 1.5345, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.3377971208570472, |
| "grad_norm": 0.137548565864563, |
| "learning_rate": 0.0001, |
| "loss": 1.6771, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.3381319049213257, |
| "grad_norm": 0.13727347552776337, |
| "learning_rate": 0.0001, |
| "loss": 1.6451, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.3384666889856043, |
| "grad_norm": 0.13395574688911438, |
| "learning_rate": 0.0001, |
| "loss": 1.5378, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.33880147304988284, |
| "grad_norm": 0.12692630290985107, |
| "learning_rate": 0.0001, |
| "loss": 1.5555, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.33913625711416134, |
| "grad_norm": 0.12900549173355103, |
| "learning_rate": 0.0001, |
| "loss": 1.5451, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.3394710411784399, |
| "grad_norm": 0.11654023826122284, |
| "learning_rate": 0.0001, |
| "loss": 1.5063, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.33980582524271846, |
| "grad_norm": 0.13518574833869934, |
| "learning_rate": 0.0001, |
| "loss": 1.5578, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.34014060930699697, |
| "grad_norm": 0.126609668135643, |
| "learning_rate": 0.0001, |
| "loss": 1.4299, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.3404753933712755, |
| "grad_norm": 0.12412185966968536, |
| "learning_rate": 0.0001, |
| "loss": 1.5083, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.3408101774355541, |
| "grad_norm": 0.12521536648273468, |
| "learning_rate": 0.0001, |
| "loss": 1.5264, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.3411449614998326, |
| "grad_norm": 0.12396744638681412, |
| "learning_rate": 0.0001, |
| "loss": 1.5984, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.34147974556411115, |
| "grad_norm": 0.12353380024433136, |
| "learning_rate": 0.0001, |
| "loss": 1.5615, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.3418145296283897, |
| "grad_norm": 0.1337115615606308, |
| "learning_rate": 0.0001, |
| "loss": 1.5777, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.3421493136926682, |
| "grad_norm": 0.13354641199111938, |
| "learning_rate": 0.0001, |
| "loss": 1.5417, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.34248409775694677, |
| "grad_norm": 0.12444625794887543, |
| "learning_rate": 0.0001, |
| "loss": 1.579, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.34281888182122533, |
| "grad_norm": 0.12876839935779572, |
| "learning_rate": 0.0001, |
| "loss": 1.4921, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.34315366588550383, |
| "grad_norm": 0.13097478449344635, |
| "learning_rate": 0.0001, |
| "loss": 1.5756, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.3434884499497824, |
| "grad_norm": 0.1257512867450714, |
| "learning_rate": 0.0001, |
| "loss": 1.5273, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.34382323401406095, |
| "grad_norm": 0.13378176093101501, |
| "learning_rate": 0.0001, |
| "loss": 1.5484, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.34415801807833946, |
| "grad_norm": 0.1325940638780594, |
| "learning_rate": 0.0001, |
| "loss": 1.6229, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.344492802142618, |
| "grad_norm": 0.11962547153234482, |
| "learning_rate": 0.0001, |
| "loss": 1.4859, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.3448275862068966, |
| "grad_norm": 0.12927503883838654, |
| "learning_rate": 0.0001, |
| "loss": 1.6788, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.3451623702711751, |
| "grad_norm": 0.13427825272083282, |
| "learning_rate": 0.0001, |
| "loss": 1.5514, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.34549715433545364, |
| "grad_norm": 0.13139428198337555, |
| "learning_rate": 0.0001, |
| "loss": 1.6164, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.3458319383997322, |
| "grad_norm": 0.12266752868890762, |
| "learning_rate": 0.0001, |
| "loss": 1.5226, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.3461667224640107, |
| "grad_norm": 0.14490726590156555, |
| "learning_rate": 0.0001, |
| "loss": 1.5562, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.34650150652828926, |
| "grad_norm": 0.11922143399715424, |
| "learning_rate": 0.0001, |
| "loss": 1.465, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.3468362905925678, |
| "grad_norm": 0.12442134320735931, |
| "learning_rate": 0.0001, |
| "loss": 1.5653, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.3471710746568463, |
| "grad_norm": 0.1383199840784073, |
| "learning_rate": 0.0001, |
| "loss": 1.5509, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.3475058587211249, |
| "grad_norm": 0.12311188876628876, |
| "learning_rate": 0.0001, |
| "loss": 1.5429, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.34784064278540344, |
| "grad_norm": 0.12368562817573547, |
| "learning_rate": 0.0001, |
| "loss": 1.6099, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.34817542684968195, |
| "grad_norm": 0.13235348463058472, |
| "learning_rate": 0.0001, |
| "loss": 1.586, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.3485102109139605, |
| "grad_norm": 0.12543101608753204, |
| "learning_rate": 0.0001, |
| "loss": 1.5094, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.348844994978239, |
| "grad_norm": 0.12461157888174057, |
| "learning_rate": 0.0001, |
| "loss": 1.6067, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.34917977904251757, |
| "grad_norm": 0.12375465035438538, |
| "learning_rate": 0.0001, |
| "loss": 1.5953, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.34951456310679613, |
| "grad_norm": 0.13041523098945618, |
| "learning_rate": 0.0001, |
| "loss": 1.6088, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.34984934717107463, |
| "grad_norm": 0.12022354453802109, |
| "learning_rate": 0.0001, |
| "loss": 1.4805, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.3501841312353532, |
| "grad_norm": 0.1251700222492218, |
| "learning_rate": 0.0001, |
| "loss": 1.5457, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.35051891529963175, |
| "grad_norm": 0.12562930583953857, |
| "learning_rate": 0.0001, |
| "loss": 1.501, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.35085369936391025, |
| "grad_norm": 0.13178695738315582, |
| "learning_rate": 0.0001, |
| "loss": 1.6332, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.3511884834281888, |
| "grad_norm": 0.12346772104501724, |
| "learning_rate": 0.0001, |
| "loss": 1.5875, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.3515232674924674, |
| "grad_norm": 0.12000037729740143, |
| "learning_rate": 0.0001, |
| "loss": 1.5166, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.3518580515567459, |
| "grad_norm": 0.13240620493888855, |
| "learning_rate": 0.0001, |
| "loss": 1.5801, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.35219283562102444, |
| "grad_norm": 0.12688381969928741, |
| "learning_rate": 0.0001, |
| "loss": 1.5581, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.352527619685303, |
| "grad_norm": 0.12421749532222748, |
| "learning_rate": 0.0001, |
| "loss": 1.5626, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.3528624037495815, |
| "grad_norm": 0.12876258790493011, |
| "learning_rate": 0.0001, |
| "loss": 1.4921, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.35319718781386006, |
| "grad_norm": 0.13299116492271423, |
| "learning_rate": 0.0001, |
| "loss": 1.5828, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.3535319718781386, |
| "grad_norm": 0.12605415284633636, |
| "learning_rate": 0.0001, |
| "loss": 1.5963, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.3538667559424171, |
| "grad_norm": 0.13100145757198334, |
| "learning_rate": 0.0001, |
| "loss": 1.6035, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.3542015400066957, |
| "grad_norm": 0.12380324304103851, |
| "learning_rate": 0.0001, |
| "loss": 1.5784, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.35453632407097424, |
| "grad_norm": 0.1288285106420517, |
| "learning_rate": 0.0001, |
| "loss": 1.5454, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.35487110813525274, |
| "grad_norm": 0.12464431673288345, |
| "learning_rate": 0.0001, |
| "loss": 1.5622, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.3552058921995313, |
| "grad_norm": 0.12694504857063293, |
| "learning_rate": 0.0001, |
| "loss": 1.5361, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.35554067626380986, |
| "grad_norm": 0.12736117839813232, |
| "learning_rate": 0.0001, |
| "loss": 1.5931, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.35587546032808837, |
| "grad_norm": 0.12816745042800903, |
| "learning_rate": 0.0001, |
| "loss": 1.584, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.3562102443923669, |
| "grad_norm": 0.12096529453992844, |
| "learning_rate": 0.0001, |
| "loss": 1.4851, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.3565450284566455, |
| "grad_norm": 0.12956807017326355, |
| "learning_rate": 0.0001, |
| "loss": 1.5296, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.356879812520924, |
| "grad_norm": 0.12413816154003143, |
| "learning_rate": 0.0001, |
| "loss": 1.5634, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.35721459658520255, |
| "grad_norm": 0.13675865530967712, |
| "learning_rate": 0.0001, |
| "loss": 1.498, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.3575493806494811, |
| "grad_norm": 0.12694036960601807, |
| "learning_rate": 0.0001, |
| "loss": 1.6586, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.3578841647137596, |
| "grad_norm": 0.13280896842479706, |
| "learning_rate": 0.0001, |
| "loss": 1.4662, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.35821894877803817, |
| "grad_norm": 0.13775292038917542, |
| "learning_rate": 0.0001, |
| "loss": 1.5833, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.35855373284231673, |
| "grad_norm": 0.12691499292850494, |
| "learning_rate": 0.0001, |
| "loss": 1.6034, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.35888851690659523, |
| "grad_norm": 0.13247890770435333, |
| "learning_rate": 0.0001, |
| "loss": 1.5617, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.3592233009708738, |
| "grad_norm": 0.1524164378643036, |
| "learning_rate": 0.0001, |
| "loss": 1.7153, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.35955808503515235, |
| "grad_norm": 0.12795189023017883, |
| "learning_rate": 0.0001, |
| "loss": 1.5657, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.35989286909943086, |
| "grad_norm": 0.12827672064304352, |
| "learning_rate": 0.0001, |
| "loss": 1.4345, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.3602276531637094, |
| "grad_norm": 0.13488048315048218, |
| "learning_rate": 0.0001, |
| "loss": 1.5137, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.360562437227988, |
| "grad_norm": 0.11891927570104599, |
| "learning_rate": 0.0001, |
| "loss": 1.508, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.3608972212922665, |
| "grad_norm": 0.1263907551765442, |
| "learning_rate": 0.0001, |
| "loss": 1.5969, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.36123200535654504, |
| "grad_norm": 0.12749949097633362, |
| "learning_rate": 0.0001, |
| "loss": 1.5646, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.36156678942082354, |
| "grad_norm": 0.12221404910087585, |
| "learning_rate": 0.0001, |
| "loss": 1.5279, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.3619015734851021, |
| "grad_norm": 0.12473400682210922, |
| "learning_rate": 0.0001, |
| "loss": 1.507, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.36223635754938066, |
| "grad_norm": 0.13297304511070251, |
| "learning_rate": 0.0001, |
| "loss": 1.5636, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.36257114161365916, |
| "grad_norm": 0.1260288655757904, |
| "learning_rate": 0.0001, |
| "loss": 1.5429, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.3629059256779377, |
| "grad_norm": 0.12271251529455185, |
| "learning_rate": 0.0001, |
| "loss": 1.6139, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.3632407097422163, |
| "grad_norm": 0.13517338037490845, |
| "learning_rate": 0.0001, |
| "loss": 1.59, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.3635754938064948, |
| "grad_norm": 0.12335921078920364, |
| "learning_rate": 0.0001, |
| "loss": 1.5477, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.36391027787077335, |
| "grad_norm": 0.12416140735149384, |
| "learning_rate": 0.0001, |
| "loss": 1.5792, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.3642450619350519, |
| "grad_norm": 0.1330622136592865, |
| "learning_rate": 0.0001, |
| "loss": 1.6416, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.3645798459993304, |
| "grad_norm": 0.11882945895195007, |
| "learning_rate": 0.0001, |
| "loss": 1.5633, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.36491463006360897, |
| "grad_norm": 0.12056804448366165, |
| "learning_rate": 0.0001, |
| "loss": 1.5639, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.36524941412788753, |
| "grad_norm": 0.12773139774799347, |
| "learning_rate": 0.0001, |
| "loss": 1.5221, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.36558419819216603, |
| "grad_norm": 0.12159121781587601, |
| "learning_rate": 0.0001, |
| "loss": 1.5255, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.3659189822564446, |
| "grad_norm": 0.12454614788293839, |
| "learning_rate": 0.0001, |
| "loss": 1.5685, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.36625376632072315, |
| "grad_norm": 0.1252131462097168, |
| "learning_rate": 0.0001, |
| "loss": 1.5721, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.36658855038500165, |
| "grad_norm": 0.12228623777627945, |
| "learning_rate": 0.0001, |
| "loss": 1.5488, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.3669233344492802, |
| "grad_norm": 0.1220550686120987, |
| "learning_rate": 0.0001, |
| "loss": 1.524, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.3672581185135588, |
| "grad_norm": 0.12096890807151794, |
| "learning_rate": 0.0001, |
| "loss": 1.4846, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.3675929025778373, |
| "grad_norm": 0.12377587705850601, |
| "learning_rate": 0.0001, |
| "loss": 1.6305, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.36792768664211584, |
| "grad_norm": 0.12515562772750854, |
| "learning_rate": 0.0001, |
| "loss": 1.6078, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.3682624707063944, |
| "grad_norm": 0.12402921915054321, |
| "learning_rate": 0.0001, |
| "loss": 1.532, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3685972547706729, |
| "grad_norm": 0.12373632192611694, |
| "learning_rate": 0.0001, |
| "loss": 1.512, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.36893203883495146, |
| "grad_norm": 0.12751725316047668, |
| "learning_rate": 0.0001, |
| "loss": 1.5799, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.36926682289923, |
| "grad_norm": 0.12221360951662064, |
| "learning_rate": 0.0001, |
| "loss": 1.4454, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.3696016069635085, |
| "grad_norm": 0.12299706041812897, |
| "learning_rate": 0.0001, |
| "loss": 1.5994, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.3699363910277871, |
| "grad_norm": 0.1294013112783432, |
| "learning_rate": 0.0001, |
| "loss": 1.6196, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.37027117509206564, |
| "grad_norm": 0.1240616887807846, |
| "learning_rate": 0.0001, |
| "loss": 1.5548, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.37060595915634414, |
| "grad_norm": 0.12403808534145355, |
| "learning_rate": 0.0001, |
| "loss": 1.6311, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.3709407432206227, |
| "grad_norm": 0.11872854828834534, |
| "learning_rate": 0.0001, |
| "loss": 1.4156, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.37127552728490126, |
| "grad_norm": 0.12752331793308258, |
| "learning_rate": 0.0001, |
| "loss": 1.6212, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.37161031134917977, |
| "grad_norm": 0.12329373508691788, |
| "learning_rate": 0.0001, |
| "loss": 1.5868, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.3719450954134583, |
| "grad_norm": 0.12340104579925537, |
| "learning_rate": 0.0001, |
| "loss": 1.5292, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.3722798794777369, |
| "grad_norm": 0.11669819802045822, |
| "learning_rate": 0.0001, |
| "loss": 1.5188, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.3726146635420154, |
| "grad_norm": 0.11677731573581696, |
| "learning_rate": 0.0001, |
| "loss": 1.5151, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.37294944760629395, |
| "grad_norm": 0.12206505239009857, |
| "learning_rate": 0.0001, |
| "loss": 1.6733, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.3732842316705725, |
| "grad_norm": 0.12234992533922195, |
| "learning_rate": 0.0001, |
| "loss": 1.5242, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.373619015734851, |
| "grad_norm": 0.12357670813798904, |
| "learning_rate": 0.0001, |
| "loss": 1.5432, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.37395379979912957, |
| "grad_norm": 0.12345674633979797, |
| "learning_rate": 0.0001, |
| "loss": 1.6483, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.3742885838634081, |
| "grad_norm": 0.1179901510477066, |
| "learning_rate": 0.0001, |
| "loss": 1.5899, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.37462336792768663, |
| "grad_norm": 0.12135247141122818, |
| "learning_rate": 0.0001, |
| "loss": 1.554, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.3749581519919652, |
| "grad_norm": 0.12836892902851105, |
| "learning_rate": 0.0001, |
| "loss": 1.6242, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.3752929360562437, |
| "grad_norm": 0.12851716578006744, |
| "learning_rate": 0.0001, |
| "loss": 1.6372, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.37562772012052226, |
| "grad_norm": 0.12096036225557327, |
| "learning_rate": 0.0001, |
| "loss": 1.5042, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.3759625041848008, |
| "grad_norm": 0.121758371591568, |
| "learning_rate": 0.0001, |
| "loss": 1.5561, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.3762972882490793, |
| "grad_norm": 0.12547370791435242, |
| "learning_rate": 0.0001, |
| "loss": 1.571, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.3766320723133579, |
| "grad_norm": 0.12488459795713425, |
| "learning_rate": 0.0001, |
| "loss": 1.6101, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.37696685637763644, |
| "grad_norm": 0.12440396845340729, |
| "learning_rate": 0.0001, |
| "loss": 1.4978, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.37730164044191494, |
| "grad_norm": 0.1293293535709381, |
| "learning_rate": 0.0001, |
| "loss": 1.6226, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.3776364245061935, |
| "grad_norm": 0.1270667314529419, |
| "learning_rate": 0.0001, |
| "loss": 1.5403, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.37797120857047206, |
| "grad_norm": 0.13023768365383148, |
| "learning_rate": 0.0001, |
| "loss": 1.6641, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.37830599263475057, |
| "grad_norm": 0.12713496387004852, |
| "learning_rate": 0.0001, |
| "loss": 1.5685, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.3786407766990291, |
| "grad_norm": 0.126458540558815, |
| "learning_rate": 0.0001, |
| "loss": 1.5624, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.3789755607633077, |
| "grad_norm": 0.12100820988416672, |
| "learning_rate": 0.0001, |
| "loss": 1.5158, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.3793103448275862, |
| "grad_norm": 0.13373976945877075, |
| "learning_rate": 0.0001, |
| "loss": 1.5151, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.37964512889186475, |
| "grad_norm": 0.12730540335178375, |
| "learning_rate": 0.0001, |
| "loss": 1.5701, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.3799799129561433, |
| "grad_norm": 0.13641048967838287, |
| "learning_rate": 0.0001, |
| "loss": 1.5144, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.3803146970204218, |
| "grad_norm": 0.13271461427211761, |
| "learning_rate": 0.0001, |
| "loss": 1.5884, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.38064948108470037, |
| "grad_norm": 0.12385160475969315, |
| "learning_rate": 0.0001, |
| "loss": 1.5374, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.38098426514897893, |
| "grad_norm": 0.12949350476264954, |
| "learning_rate": 0.0001, |
| "loss": 1.546, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.38131904921325743, |
| "grad_norm": 0.135132297873497, |
| "learning_rate": 0.0001, |
| "loss": 1.5913, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.381653833277536, |
| "grad_norm": 0.11533955484628677, |
| "learning_rate": 0.0001, |
| "loss": 1.3968, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.38198861734181455, |
| "grad_norm": 0.13532719016075134, |
| "learning_rate": 0.0001, |
| "loss": 1.5534, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.38232340140609306, |
| "grad_norm": 0.14101184904575348, |
| "learning_rate": 0.0001, |
| "loss": 1.557, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.3826581854703716, |
| "grad_norm": 0.12038899213075638, |
| "learning_rate": 0.0001, |
| "loss": 1.4831, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.3829929695346502, |
| "grad_norm": 0.13053514063358307, |
| "learning_rate": 0.0001, |
| "loss": 1.5882, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.3833277535989287, |
| "grad_norm": 0.12372793257236481, |
| "learning_rate": 0.0001, |
| "loss": 1.6047, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.38366253766320724, |
| "grad_norm": 0.12823140621185303, |
| "learning_rate": 0.0001, |
| "loss": 1.6126, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.3839973217274858, |
| "grad_norm": 0.12058600783348083, |
| "learning_rate": 0.0001, |
| "loss": 1.4713, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.3843321057917643, |
| "grad_norm": 0.12674620747566223, |
| "learning_rate": 0.0001, |
| "loss": 1.6126, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.38466688985604286, |
| "grad_norm": 0.1214526891708374, |
| "learning_rate": 0.0001, |
| "loss": 1.6317, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.3850016739203214, |
| "grad_norm": 0.12831653654575348, |
| "learning_rate": 0.0001, |
| "loss": 1.5479, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.3853364579845999, |
| "grad_norm": 0.12079459428787231, |
| "learning_rate": 0.0001, |
| "loss": 1.5544, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.3856712420488785, |
| "grad_norm": 0.12021779268980026, |
| "learning_rate": 0.0001, |
| "loss": 1.5536, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.38600602611315704, |
| "grad_norm": 0.13052217662334442, |
| "learning_rate": 0.0001, |
| "loss": 1.5482, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.38634081017743555, |
| "grad_norm": 0.12613235414028168, |
| "learning_rate": 0.0001, |
| "loss": 1.6056, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.3866755942417141, |
| "grad_norm": 0.12751324474811554, |
| "learning_rate": 0.0001, |
| "loss": 1.5513, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.3870103783059926, |
| "grad_norm": 0.11987000703811646, |
| "learning_rate": 0.0001, |
| "loss": 1.4836, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.38734516237027117, |
| "grad_norm": 0.13999362289905548, |
| "learning_rate": 0.0001, |
| "loss": 1.6763, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.3876799464345497, |
| "grad_norm": 0.128611221909523, |
| "learning_rate": 0.0001, |
| "loss": 1.6281, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.38801473049882823, |
| "grad_norm": 0.1292606145143509, |
| "learning_rate": 0.0001, |
| "loss": 1.6846, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.3883495145631068, |
| "grad_norm": 0.13090923428535461, |
| "learning_rate": 0.0001, |
| "loss": 1.628, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.38868429862738535, |
| "grad_norm": 0.12356492131948471, |
| "learning_rate": 0.0001, |
| "loss": 1.5158, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.38901908269166385, |
| "grad_norm": 0.12005447596311569, |
| "learning_rate": 0.0001, |
| "loss": 1.62, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.3893538667559424, |
| "grad_norm": 0.12113460153341293, |
| "learning_rate": 0.0001, |
| "loss": 1.4954, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.38968865082022097, |
| "grad_norm": 0.11953802406787872, |
| "learning_rate": 0.0001, |
| "loss": 1.4891, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.3900234348844995, |
| "grad_norm": 0.1292644739151001, |
| "learning_rate": 0.0001, |
| "loss": 1.555, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.39035821894877804, |
| "grad_norm": 0.12345704436302185, |
| "learning_rate": 0.0001, |
| "loss": 1.4939, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.3906930030130566, |
| "grad_norm": 0.12334253638982773, |
| "learning_rate": 0.0001, |
| "loss": 1.6058, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.3910277870773351, |
| "grad_norm": 0.13044217228889465, |
| "learning_rate": 0.0001, |
| "loss": 1.5349, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.39136257114161366, |
| "grad_norm": 0.12309286743402481, |
| "learning_rate": 0.0001, |
| "loss": 1.5007, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.3916973552058922, |
| "grad_norm": 0.12565681338310242, |
| "learning_rate": 0.0001, |
| "loss": 1.5172, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.3920321392701707, |
| "grad_norm": 0.13335129618644714, |
| "learning_rate": 0.0001, |
| "loss": 1.5666, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.3923669233344493, |
| "grad_norm": 0.12664766609668732, |
| "learning_rate": 0.0001, |
| "loss": 1.5471, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.39270170739872784, |
| "grad_norm": 0.12703973054885864, |
| "learning_rate": 0.0001, |
| "loss": 1.545, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.39303649146300634, |
| "grad_norm": 0.12242884933948517, |
| "learning_rate": 0.0001, |
| "loss": 1.4768, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.3933712755272849, |
| "grad_norm": 0.13055263459682465, |
| "learning_rate": 0.0001, |
| "loss": 1.4782, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.39370605959156346, |
| "grad_norm": 0.13161849975585938, |
| "learning_rate": 0.0001, |
| "loss": 1.621, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.39404084365584197, |
| "grad_norm": 0.1257203370332718, |
| "learning_rate": 0.0001, |
| "loss": 1.5655, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.3943756277201205, |
| "grad_norm": 0.14164592325687408, |
| "learning_rate": 0.0001, |
| "loss": 1.4884, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.3947104117843991, |
| "grad_norm": 0.12696050107479095, |
| "learning_rate": 0.0001, |
| "loss": 1.5829, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.3950451958486776, |
| "grad_norm": 0.12652398645877838, |
| "learning_rate": 0.0001, |
| "loss": 1.6345, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.39537997991295615, |
| "grad_norm": 0.12333660572767258, |
| "learning_rate": 0.0001, |
| "loss": 1.5375, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.3957147639772347, |
| "grad_norm": 0.13108794391155243, |
| "learning_rate": 0.0001, |
| "loss": 1.6441, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.3960495480415132, |
| "grad_norm": 0.13195887207984924, |
| "learning_rate": 0.0001, |
| "loss": 1.5939, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.39638433210579177, |
| "grad_norm": 0.12931646406650543, |
| "learning_rate": 0.0001, |
| "loss": 1.5317, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.39671911617007033, |
| "grad_norm": 0.12439566105604172, |
| "learning_rate": 0.0001, |
| "loss": 1.5391, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.39705390023434883, |
| "grad_norm": 0.12557551264762878, |
| "learning_rate": 0.0001, |
| "loss": 1.5723, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.3973886842986274, |
| "grad_norm": 0.13013330101966858, |
| "learning_rate": 0.0001, |
| "loss": 1.4812, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.39772346836290595, |
| "grad_norm": 0.12955336272716522, |
| "learning_rate": 0.0001, |
| "loss": 1.5799, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.39805825242718446, |
| "grad_norm": 0.1347295343875885, |
| "learning_rate": 0.0001, |
| "loss": 1.6634, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.398393036491463, |
| "grad_norm": 0.13187319040298462, |
| "learning_rate": 0.0001, |
| "loss": 1.5146, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.3987278205557416, |
| "grad_norm": 0.13010048866271973, |
| "learning_rate": 0.0001, |
| "loss": 1.5003, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.3990626046200201, |
| "grad_norm": 0.12330204248428345, |
| "learning_rate": 0.0001, |
| "loss": 1.5765, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.39939738868429864, |
| "grad_norm": 0.1346241533756256, |
| "learning_rate": 0.0001, |
| "loss": 1.5979, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.39973217274857714, |
| "grad_norm": 0.13725797832012177, |
| "learning_rate": 0.0001, |
| "loss": 1.5813, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.4000669568128557, |
| "grad_norm": 0.12039465457201004, |
| "learning_rate": 0.0001, |
| "loss": 1.4363, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.40040174087713426, |
| "grad_norm": 0.1276928186416626, |
| "learning_rate": 0.0001, |
| "loss": 1.6575, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.40073652494141276, |
| "grad_norm": 0.12903235852718353, |
| "learning_rate": 0.0001, |
| "loss": 1.6059, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.4010713090056913, |
| "grad_norm": 0.12678353488445282, |
| "learning_rate": 0.0001, |
| "loss": 1.5624, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.4014060930699699, |
| "grad_norm": 0.12884308397769928, |
| "learning_rate": 0.0001, |
| "loss": 1.5995, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.4017408771342484, |
| "grad_norm": 0.11986846476793289, |
| "learning_rate": 0.0001, |
| "loss": 1.4767, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.40207566119852695, |
| "grad_norm": 0.12227410078048706, |
| "learning_rate": 0.0001, |
| "loss": 1.5056, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.4024104452628055, |
| "grad_norm": 0.12593914568424225, |
| "learning_rate": 0.0001, |
| "loss": 1.5836, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.402745229327084, |
| "grad_norm": 0.12477041780948639, |
| "learning_rate": 0.0001, |
| "loss": 1.5745, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.40308001339136257, |
| "grad_norm": 0.1216067373752594, |
| "learning_rate": 0.0001, |
| "loss": 1.5824, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.4034147974556411, |
| "grad_norm": 0.13550971448421478, |
| "learning_rate": 0.0001, |
| "loss": 1.6635, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.40374958151991963, |
| "grad_norm": 0.12963739037513733, |
| "learning_rate": 0.0001, |
| "loss": 1.6586, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.4040843655841982, |
| "grad_norm": 0.11887506395578384, |
| "learning_rate": 0.0001, |
| "loss": 1.4933, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.40441914964847675, |
| "grad_norm": 0.13262464106082916, |
| "learning_rate": 0.0001, |
| "loss": 1.5759, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.40475393371275525, |
| "grad_norm": 0.13952501118183136, |
| "learning_rate": 0.0001, |
| "loss": 1.6918, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.4050887177770338, |
| "grad_norm": 0.13401460647583008, |
| "learning_rate": 0.0001, |
| "loss": 1.5102, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.4054235018413124, |
| "grad_norm": 0.14476630091667175, |
| "learning_rate": 0.0001, |
| "loss": 1.6817, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.4057582859055909, |
| "grad_norm": 0.1285640001296997, |
| "learning_rate": 0.0001, |
| "loss": 1.653, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.40609306996986944, |
| "grad_norm": 0.13845203816890717, |
| "learning_rate": 0.0001, |
| "loss": 1.5996, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.406427854034148, |
| "grad_norm": 0.13416174054145813, |
| "learning_rate": 0.0001, |
| "loss": 1.6222, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.4067626380984265, |
| "grad_norm": 0.1267634481191635, |
| "learning_rate": 0.0001, |
| "loss": 1.5257, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.40709742216270506, |
| "grad_norm": 0.13453447818756104, |
| "learning_rate": 0.0001, |
| "loss": 1.5745, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.4074322062269836, |
| "grad_norm": 0.12069771438837051, |
| "learning_rate": 0.0001, |
| "loss": 1.5516, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.4077669902912621, |
| "grad_norm": 0.12483450770378113, |
| "learning_rate": 0.0001, |
| "loss": 1.5899, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.4081017743555407, |
| "grad_norm": 0.14123085141181946, |
| "learning_rate": 0.0001, |
| "loss": 1.6334, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.40843655841981924, |
| "grad_norm": 0.12844936549663544, |
| "learning_rate": 0.0001, |
| "loss": 1.4936, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.40877134248409774, |
| "grad_norm": 0.13094481825828552, |
| "learning_rate": 0.0001, |
| "loss": 1.6554, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.4091061265483763, |
| "grad_norm": 0.12563113868236542, |
| "learning_rate": 0.0001, |
| "loss": 1.4708, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.40944091061265486, |
| "grad_norm": 0.12495769560337067, |
| "learning_rate": 0.0001, |
| "loss": 1.5012, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.40977569467693337, |
| "grad_norm": 0.12314360588788986, |
| "learning_rate": 0.0001, |
| "loss": 1.5769, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.4101104787412119, |
| "grad_norm": 0.1389753818511963, |
| "learning_rate": 0.0001, |
| "loss": 1.5978, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.4104452628054905, |
| "grad_norm": 0.12703324854373932, |
| "learning_rate": 0.0001, |
| "loss": 1.5349, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.410780046869769, |
| "grad_norm": 0.11995337903499603, |
| "learning_rate": 0.0001, |
| "loss": 1.5307, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.41111483093404755, |
| "grad_norm": 0.1330454796552658, |
| "learning_rate": 0.0001, |
| "loss": 1.6277, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.41144961499832605, |
| "grad_norm": 0.12632183730602264, |
| "learning_rate": 0.0001, |
| "loss": 1.507, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.4117843990626046, |
| "grad_norm": 0.13255640864372253, |
| "learning_rate": 0.0001, |
| "loss": 1.5797, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.41211918312688317, |
| "grad_norm": 0.13822025060653687, |
| "learning_rate": 0.0001, |
| "loss": 1.5945, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.4124539671911617, |
| "grad_norm": 0.1303391307592392, |
| "learning_rate": 0.0001, |
| "loss": 1.5928, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.41278875125544023, |
| "grad_norm": 0.12309371680021286, |
| "learning_rate": 0.0001, |
| "loss": 1.4794, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.4131235353197188, |
| "grad_norm": 0.12375032901763916, |
| "learning_rate": 0.0001, |
| "loss": 1.5133, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.4134583193839973, |
| "grad_norm": 0.13613499701023102, |
| "learning_rate": 0.0001, |
| "loss": 1.621, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.41379310344827586, |
| "grad_norm": 0.13198764622211456, |
| "learning_rate": 0.0001, |
| "loss": 1.5762, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.4141278875125544, |
| "grad_norm": 0.1294814646244049, |
| "learning_rate": 0.0001, |
| "loss": 1.5836, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.4144626715768329, |
| "grad_norm": 0.12597797811031342, |
| "learning_rate": 0.0001, |
| "loss": 1.5988, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.4147974556411115, |
| "grad_norm": 0.12371232360601425, |
| "learning_rate": 0.0001, |
| "loss": 1.5432, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.41513223970539004, |
| "grad_norm": 0.12919354438781738, |
| "learning_rate": 0.0001, |
| "loss": 1.5507, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.41546702376966854, |
| "grad_norm": 0.12919418513774872, |
| "learning_rate": 0.0001, |
| "loss": 1.7431, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.4158018078339471, |
| "grad_norm": 0.12314452975988388, |
| "learning_rate": 0.0001, |
| "loss": 1.5407, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.41613659189822566, |
| "grad_norm": 0.1360636204481125, |
| "learning_rate": 0.0001, |
| "loss": 1.5872, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.41647137596250416, |
| "grad_norm": 0.12739785015583038, |
| "learning_rate": 0.0001, |
| "loss": 1.4998, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.4168061600267827, |
| "grad_norm": 0.12558461725711823, |
| "learning_rate": 0.0001, |
| "loss": 1.6422, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.4171409440910613, |
| "grad_norm": 0.130743145942688, |
| "learning_rate": 0.0001, |
| "loss": 1.6537, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.4174757281553398, |
| "grad_norm": 0.12714166939258575, |
| "learning_rate": 0.0001, |
| "loss": 1.4309, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.41781051221961835, |
| "grad_norm": 0.12849892675876617, |
| "learning_rate": 0.0001, |
| "loss": 1.514, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.4181452962838969, |
| "grad_norm": 0.1366477757692337, |
| "learning_rate": 0.0001, |
| "loss": 1.6397, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.4184800803481754, |
| "grad_norm": 0.1324029415845871, |
| "learning_rate": 0.0001, |
| "loss": 1.5647, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.41881486441245397, |
| "grad_norm": 0.1272830069065094, |
| "learning_rate": 0.0001, |
| "loss": 1.633, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.41914964847673253, |
| "grad_norm": 0.12891270220279694, |
| "learning_rate": 0.0001, |
| "loss": 1.5571, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.41948443254101103, |
| "grad_norm": 0.1334099918603897, |
| "learning_rate": 0.0001, |
| "loss": 1.4905, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.4198192166052896, |
| "grad_norm": 0.12439723312854767, |
| "learning_rate": 0.0001, |
| "loss": 1.5859, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.42015400066956815, |
| "grad_norm": 0.13870543241500854, |
| "learning_rate": 0.0001, |
| "loss": 1.6226, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.42048878473384665, |
| "grad_norm": 0.13232079148292542, |
| "learning_rate": 0.0001, |
| "loss": 1.6566, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.4208235687981252, |
| "grad_norm": 0.12575885653495789, |
| "learning_rate": 0.0001, |
| "loss": 1.5629, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.4211583528624038, |
| "grad_norm": 0.12995895743370056, |
| "learning_rate": 0.0001, |
| "loss": 1.5703, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.4214931369266823, |
| "grad_norm": 0.12801054120063782, |
| "learning_rate": 0.0001, |
| "loss": 1.6326, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.42182792099096084, |
| "grad_norm": 0.12584693729877472, |
| "learning_rate": 0.0001, |
| "loss": 1.6329, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.4221627050552394, |
| "grad_norm": 0.13142889738082886, |
| "learning_rate": 0.0001, |
| "loss": 1.7081, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.4224974891195179, |
| "grad_norm": 0.12793239951133728, |
| "learning_rate": 0.0001, |
| "loss": 1.6032, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.42283227318379646, |
| "grad_norm": 0.12368165701627731, |
| "learning_rate": 0.0001, |
| "loss": 1.5468, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.423167057248075, |
| "grad_norm": 0.13081911206245422, |
| "learning_rate": 0.0001, |
| "loss": 1.6175, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.4235018413123535, |
| "grad_norm": 0.12801037728786469, |
| "learning_rate": 0.0001, |
| "loss": 1.537, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.4238366253766321, |
| "grad_norm": 0.1274782121181488, |
| "learning_rate": 0.0001, |
| "loss": 1.5277, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.4241714094409106, |
| "grad_norm": 0.1194332018494606, |
| "learning_rate": 0.0001, |
| "loss": 1.496, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.42450619350518914, |
| "grad_norm": 0.13174927234649658, |
| "learning_rate": 0.0001, |
| "loss": 1.5975, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.4248409775694677, |
| "grad_norm": 0.1254556030035019, |
| "learning_rate": 0.0001, |
| "loss": 1.6119, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.4251757616337462, |
| "grad_norm": 0.13203318417072296, |
| "learning_rate": 0.0001, |
| "loss": 1.5564, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.42551054569802477, |
| "grad_norm": 0.12941622734069824, |
| "learning_rate": 0.0001, |
| "loss": 1.6285, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.4258453297623033, |
| "grad_norm": 0.12527894973754883, |
| "learning_rate": 0.0001, |
| "loss": 1.5703, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.42618011382658183, |
| "grad_norm": 0.12617714703083038, |
| "learning_rate": 0.0001, |
| "loss": 1.6523, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.4265148978908604, |
| "grad_norm": 0.12326870858669281, |
| "learning_rate": 0.0001, |
| "loss": 1.5533, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.42684968195513895, |
| "grad_norm": 0.1295124888420105, |
| "learning_rate": 0.0001, |
| "loss": 1.5587, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.42718446601941745, |
| "grad_norm": 0.12248773872852325, |
| "learning_rate": 0.0001, |
| "loss": 1.5762, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.427519250083696, |
| "grad_norm": 0.12932232022285461, |
| "learning_rate": 0.0001, |
| "loss": 1.6162, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.42785403414797457, |
| "grad_norm": 0.1178537905216217, |
| "learning_rate": 0.0001, |
| "loss": 1.472, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.4281888182122531, |
| "grad_norm": 0.1269647628068924, |
| "learning_rate": 0.0001, |
| "loss": 1.5551, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.42852360227653163, |
| "grad_norm": 0.120000459253788, |
| "learning_rate": 0.0001, |
| "loss": 1.509, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4288583863408102, |
| "grad_norm": 0.12708665430545807, |
| "learning_rate": 0.0001, |
| "loss": 1.5293, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.4291931704050887, |
| "grad_norm": 0.13209426403045654, |
| "learning_rate": 0.0001, |
| "loss": 1.6311, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.42952795446936726, |
| "grad_norm": 0.1305491328239441, |
| "learning_rate": 0.0001, |
| "loss": 1.5505, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.4298627385336458, |
| "grad_norm": 0.1237809956073761, |
| "learning_rate": 0.0001, |
| "loss": 1.5457, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.4301975225979243, |
| "grad_norm": 0.13375982642173767, |
| "learning_rate": 0.0001, |
| "loss": 1.5321, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.4305323066622029, |
| "grad_norm": 0.13597902655601501, |
| "learning_rate": 0.0001, |
| "loss": 1.6229, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.43086709072648144, |
| "grad_norm": 0.12488207966089249, |
| "learning_rate": 0.0001, |
| "loss": 1.5231, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.43120187479075994, |
| "grad_norm": 0.12950995564460754, |
| "learning_rate": 0.0001, |
| "loss": 1.7162, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.4315366588550385, |
| "grad_norm": 0.12734153866767883, |
| "learning_rate": 0.0001, |
| "loss": 1.5735, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.43187144291931706, |
| "grad_norm": 0.13684290647506714, |
| "learning_rate": 0.0001, |
| "loss": 1.5866, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.43220622698359557, |
| "grad_norm": 0.12665408849716187, |
| "learning_rate": 0.0001, |
| "loss": 1.5236, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.4325410110478741, |
| "grad_norm": 0.12092933058738708, |
| "learning_rate": 0.0001, |
| "loss": 1.4859, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.4328757951121527, |
| "grad_norm": 0.14012545347213745, |
| "learning_rate": 0.0001, |
| "loss": 1.6158, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.4332105791764312, |
| "grad_norm": 0.12820059061050415, |
| "learning_rate": 0.0001, |
| "loss": 1.5108, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.43354536324070975, |
| "grad_norm": 0.13247036933898926, |
| "learning_rate": 0.0001, |
| "loss": 1.6031, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.4338801473049883, |
| "grad_norm": 0.12412893772125244, |
| "learning_rate": 0.0001, |
| "loss": 1.5829, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.4342149313692668, |
| "grad_norm": 0.12657597661018372, |
| "learning_rate": 0.0001, |
| "loss": 1.5139, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.43454971543354537, |
| "grad_norm": 0.13494263589382172, |
| "learning_rate": 0.0001, |
| "loss": 1.6264, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.43488449949782393, |
| "grad_norm": 0.12553179264068604, |
| "learning_rate": 0.0001, |
| "loss": 1.5587, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.43521928356210243, |
| "grad_norm": 0.12029055505990982, |
| "learning_rate": 0.0001, |
| "loss": 1.5177, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.435554067626381, |
| "grad_norm": 0.12742608785629272, |
| "learning_rate": 0.0001, |
| "loss": 1.6345, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.43588885169065955, |
| "grad_norm": 0.12749677896499634, |
| "learning_rate": 0.0001, |
| "loss": 1.5183, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.43622363575493805, |
| "grad_norm": 0.13716910779476166, |
| "learning_rate": 0.0001, |
| "loss": 1.6064, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.4365584198192166, |
| "grad_norm": 0.11626800149679184, |
| "learning_rate": 0.0001, |
| "loss": 1.461, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.4368932038834951, |
| "grad_norm": 0.12892816960811615, |
| "learning_rate": 0.0001, |
| "loss": 1.5856, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.4372279879477737, |
| "grad_norm": 0.12171407043933868, |
| "learning_rate": 0.0001, |
| "loss": 1.5669, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.43756277201205224, |
| "grad_norm": 0.12705732882022858, |
| "learning_rate": 0.0001, |
| "loss": 1.5392, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.43789755607633074, |
| "grad_norm": 0.12489151209592819, |
| "learning_rate": 0.0001, |
| "loss": 1.5621, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.4382323401406093, |
| "grad_norm": 0.1306968778371811, |
| "learning_rate": 0.0001, |
| "loss": 1.5601, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.43856712420488786, |
| "grad_norm": 0.12457779794931412, |
| "learning_rate": 0.0001, |
| "loss": 1.5292, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.43890190826916636, |
| "grad_norm": 0.1351223587989807, |
| "learning_rate": 0.0001, |
| "loss": 1.6364, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.4392366923334449, |
| "grad_norm": 0.16403745114803314, |
| "learning_rate": 0.0001, |
| "loss": 1.6135, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.4395714763977235, |
| "grad_norm": 0.1373598426580429, |
| "learning_rate": 0.0001, |
| "loss": 1.6102, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.439906260462002, |
| "grad_norm": 0.12474294006824493, |
| "learning_rate": 0.0001, |
| "loss": 1.4732, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.44024104452628054, |
| "grad_norm": 0.13775482773780823, |
| "learning_rate": 0.0001, |
| "loss": 1.4623, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.4405758285905591, |
| "grad_norm": 0.12874817848205566, |
| "learning_rate": 0.0001, |
| "loss": 1.5885, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.4409106126548376, |
| "grad_norm": 0.13382995128631592, |
| "learning_rate": 0.0001, |
| "loss": 1.4458, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.44124539671911617, |
| "grad_norm": 0.1267126202583313, |
| "learning_rate": 0.0001, |
| "loss": 1.5709, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.4415801807833947, |
| "grad_norm": 0.12839357554912567, |
| "learning_rate": 0.0001, |
| "loss": 1.5377, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.44191496484767323, |
| "grad_norm": 0.13176332414150238, |
| "learning_rate": 0.0001, |
| "loss": 1.4342, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.4422497489119518, |
| "grad_norm": 0.13202795386314392, |
| "learning_rate": 0.0001, |
| "loss": 1.5997, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.44258453297623035, |
| "grad_norm": 0.12316932529211044, |
| "learning_rate": 0.0001, |
| "loss": 1.4323, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.44291931704050885, |
| "grad_norm": 0.1301979273557663, |
| "learning_rate": 0.0001, |
| "loss": 1.5882, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.4432541011047874, |
| "grad_norm": 0.1263076364994049, |
| "learning_rate": 0.0001, |
| "loss": 1.4469, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.44358888516906597, |
| "grad_norm": 0.12310474365949631, |
| "learning_rate": 0.0001, |
| "loss": 1.4898, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.4439236692333445, |
| "grad_norm": 0.12039102613925934, |
| "learning_rate": 0.0001, |
| "loss": 1.5324, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.44425845329762303, |
| "grad_norm": 0.12545818090438843, |
| "learning_rate": 0.0001, |
| "loss": 1.6171, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.4445932373619016, |
| "grad_norm": 0.1259836107492447, |
| "learning_rate": 0.0001, |
| "loss": 1.5059, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.4449280214261801, |
| "grad_norm": 0.12518031895160675, |
| "learning_rate": 0.0001, |
| "loss": 1.5958, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.44526280549045866, |
| "grad_norm": 0.12583878636360168, |
| "learning_rate": 0.0001, |
| "loss": 1.4837, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.4455975895547372, |
| "grad_norm": 0.12569929659366608, |
| "learning_rate": 0.0001, |
| "loss": 1.536, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.4459323736190157, |
| "grad_norm": 0.1288549304008484, |
| "learning_rate": 0.0001, |
| "loss": 1.5525, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.4462671576832943, |
| "grad_norm": 0.13198384642601013, |
| "learning_rate": 0.0001, |
| "loss": 1.542, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.44660194174757284, |
| "grad_norm": 0.1238170713186264, |
| "learning_rate": 0.0001, |
| "loss": 1.4021, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.44693672581185134, |
| "grad_norm": 0.13295157253742218, |
| "learning_rate": 0.0001, |
| "loss": 1.5553, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.4472715098761299, |
| "grad_norm": 0.13403776288032532, |
| "learning_rate": 0.0001, |
| "loss": 1.4761, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.44760629394040846, |
| "grad_norm": 0.13343052566051483, |
| "learning_rate": 0.0001, |
| "loss": 1.573, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.44794107800468697, |
| "grad_norm": 0.125327467918396, |
| "learning_rate": 0.0001, |
| "loss": 1.5682, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.4482758620689655, |
| "grad_norm": 0.12958160042762756, |
| "learning_rate": 0.0001, |
| "loss": 1.5294, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.4486106461332441, |
| "grad_norm": 0.1384599506855011, |
| "learning_rate": 0.0001, |
| "loss": 1.5791, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.4489454301975226, |
| "grad_norm": 0.1257963478565216, |
| "learning_rate": 0.0001, |
| "loss": 1.5732, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.44928021426180115, |
| "grad_norm": 0.12630927562713623, |
| "learning_rate": 0.0001, |
| "loss": 1.5558, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.44961499832607965, |
| "grad_norm": 0.1268066167831421, |
| "learning_rate": 0.0001, |
| "loss": 1.5958, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.4499497823903582, |
| "grad_norm": 0.12455032020807266, |
| "learning_rate": 0.0001, |
| "loss": 1.5607, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.45028456645463677, |
| "grad_norm": 0.12265735119581223, |
| "learning_rate": 0.0001, |
| "loss": 1.5197, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.4506193505189153, |
| "grad_norm": 0.1307050883769989, |
| "learning_rate": 0.0001, |
| "loss": 1.6407, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.45095413458319383, |
| "grad_norm": 0.13128429651260376, |
| "learning_rate": 0.0001, |
| "loss": 1.5559, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.4512889186474724, |
| "grad_norm": 0.13010568916797638, |
| "learning_rate": 0.0001, |
| "loss": 1.5332, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.4516237027117509, |
| "grad_norm": 0.12650929391384125, |
| "learning_rate": 0.0001, |
| "loss": 1.6047, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.45195848677602946, |
| "grad_norm": 0.12306904792785645, |
| "learning_rate": 0.0001, |
| "loss": 1.5499, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.452293270840308, |
| "grad_norm": 0.13351021707057953, |
| "learning_rate": 0.0001, |
| "loss": 1.4737, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.4526280549045865, |
| "grad_norm": 0.12178155779838562, |
| "learning_rate": 0.0001, |
| "loss": 1.4775, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.4529628389688651, |
| "grad_norm": 0.13516512513160706, |
| "learning_rate": 0.0001, |
| "loss": 1.6391, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.45329762303314364, |
| "grad_norm": 0.12909267842769623, |
| "learning_rate": 0.0001, |
| "loss": 1.4684, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.45363240709742214, |
| "grad_norm": 0.12209142744541168, |
| "learning_rate": 0.0001, |
| "loss": 1.5198, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.4539671911617007, |
| "grad_norm": 0.1269826740026474, |
| "learning_rate": 0.0001, |
| "loss": 1.5294, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.45430197522597926, |
| "grad_norm": 0.13762542605400085, |
| "learning_rate": 0.0001, |
| "loss": 1.5567, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.45463675929025776, |
| "grad_norm": 0.1306358128786087, |
| "learning_rate": 0.0001, |
| "loss": 1.5829, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.4549715433545363, |
| "grad_norm": 0.1383924037218094, |
| "learning_rate": 0.0001, |
| "loss": 1.6382, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.4553063274188149, |
| "grad_norm": 0.13577204942703247, |
| "learning_rate": 0.0001, |
| "loss": 1.6067, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.4556411114830934, |
| "grad_norm": 0.12534180283546448, |
| "learning_rate": 0.0001, |
| "loss": 1.574, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.45597589554737195, |
| "grad_norm": 0.12367561459541321, |
| "learning_rate": 0.0001, |
| "loss": 1.5089, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.4563106796116505, |
| "grad_norm": 0.14012429118156433, |
| "learning_rate": 0.0001, |
| "loss": 1.6044, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.456645463675929, |
| "grad_norm": 0.13164697587490082, |
| "learning_rate": 0.0001, |
| "loss": 1.6058, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.45698024774020757, |
| "grad_norm": 0.14275015890598297, |
| "learning_rate": 0.0001, |
| "loss": 1.6945, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.4573150318044861, |
| "grad_norm": 0.1312190294265747, |
| "learning_rate": 0.0001, |
| "loss": 1.5595, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.45764981586876463, |
| "grad_norm": 0.1276426464319229, |
| "learning_rate": 0.0001, |
| "loss": 1.5639, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.4579845999330432, |
| "grad_norm": 0.12928691506385803, |
| "learning_rate": 0.0001, |
| "loss": 1.6555, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.45831938399732175, |
| "grad_norm": 0.12562155723571777, |
| "learning_rate": 0.0001, |
| "loss": 1.5017, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.45865416806160025, |
| "grad_norm": 0.12555162608623505, |
| "learning_rate": 0.0001, |
| "loss": 1.5133, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.4589889521258788, |
| "grad_norm": 0.13354945182800293, |
| "learning_rate": 0.0001, |
| "loss": 1.5802, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.4593237361901574, |
| "grad_norm": 0.13059929013252258, |
| "learning_rate": 0.0001, |
| "loss": 1.5152, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.4596585202544359, |
| "grad_norm": 0.1313420981168747, |
| "learning_rate": 0.0001, |
| "loss": 1.5411, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.45999330431871444, |
| "grad_norm": 0.13619214296340942, |
| "learning_rate": 0.0001, |
| "loss": 1.5348, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.460328088382993, |
| "grad_norm": 0.12227842211723328, |
| "learning_rate": 0.0001, |
| "loss": 1.5258, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.4606628724472715, |
| "grad_norm": 0.12962037324905396, |
| "learning_rate": 0.0001, |
| "loss": 1.6469, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.46099765651155006, |
| "grad_norm": 0.128581240773201, |
| "learning_rate": 0.0001, |
| "loss": 1.6151, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.4613324405758286, |
| "grad_norm": 0.12887564301490784, |
| "learning_rate": 0.0001, |
| "loss": 1.5741, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.4616672246401071, |
| "grad_norm": 0.12684863805770874, |
| "learning_rate": 0.0001, |
| "loss": 1.6168, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.4620020087043857, |
| "grad_norm": 0.11986137181520462, |
| "learning_rate": 0.0001, |
| "loss": 1.5278, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.4623367927686642, |
| "grad_norm": 0.12904709577560425, |
| "learning_rate": 0.0001, |
| "loss": 1.5247, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.46267157683294274, |
| "grad_norm": 0.12737007439136505, |
| "learning_rate": 0.0001, |
| "loss": 1.6354, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.4630063608972213, |
| "grad_norm": 0.13845406472682953, |
| "learning_rate": 0.0001, |
| "loss": 1.5696, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.4633411449614998, |
| "grad_norm": 0.1215730682015419, |
| "learning_rate": 0.0001, |
| "loss": 1.5277, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.46367592902577837, |
| "grad_norm": 0.12643855810165405, |
| "learning_rate": 0.0001, |
| "loss": 1.5691, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.4640107130900569, |
| "grad_norm": 0.12575271725654602, |
| "learning_rate": 0.0001, |
| "loss": 1.5075, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.46434549715433543, |
| "grad_norm": 0.13134850561618805, |
| "learning_rate": 0.0001, |
| "loss": 1.6195, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.464680281218614, |
| "grad_norm": 0.12751908600330353, |
| "learning_rate": 0.0001, |
| "loss": 1.5396, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.46501506528289255, |
| "grad_norm": 0.1260857880115509, |
| "learning_rate": 0.0001, |
| "loss": 1.581, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.46534984934717105, |
| "grad_norm": 0.13056620955467224, |
| "learning_rate": 0.0001, |
| "loss": 1.5604, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.4656846334114496, |
| "grad_norm": 0.12854252755641937, |
| "learning_rate": 0.0001, |
| "loss": 1.5729, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.46601941747572817, |
| "grad_norm": 0.12587207555770874, |
| "learning_rate": 0.0001, |
| "loss": 1.5685, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.4663542015400067, |
| "grad_norm": 0.13984687626361847, |
| "learning_rate": 0.0001, |
| "loss": 1.5327, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.46668898560428523, |
| "grad_norm": 0.1340693235397339, |
| "learning_rate": 0.0001, |
| "loss": 1.5047, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.4670237696685638, |
| "grad_norm": 0.12426851689815521, |
| "learning_rate": 0.0001, |
| "loss": 1.5614, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.4673585537328423, |
| "grad_norm": 0.14335423707962036, |
| "learning_rate": 0.0001, |
| "loss": 1.5968, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.46769333779712086, |
| "grad_norm": 0.1285167783498764, |
| "learning_rate": 0.0001, |
| "loss": 1.4816, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.4680281218613994, |
| "grad_norm": 0.12221338599920273, |
| "learning_rate": 0.0001, |
| "loss": 1.5412, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.4683629059256779, |
| "grad_norm": 0.13749419152736664, |
| "learning_rate": 0.0001, |
| "loss": 1.6426, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.4686976899899565, |
| "grad_norm": 0.1292765736579895, |
| "learning_rate": 0.0001, |
| "loss": 1.4826, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.46903247405423504, |
| "grad_norm": 0.12175814807415009, |
| "learning_rate": 0.0001, |
| "loss": 1.4674, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.46936725811851354, |
| "grad_norm": 0.13381820917129517, |
| "learning_rate": 0.0001, |
| "loss": 1.515, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.4697020421827921, |
| "grad_norm": 0.13659454882144928, |
| "learning_rate": 0.0001, |
| "loss": 1.5513, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.47003682624707066, |
| "grad_norm": 0.12511052191257477, |
| "learning_rate": 0.0001, |
| "loss": 1.5457, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.47037161031134916, |
| "grad_norm": 0.13325883448123932, |
| "learning_rate": 0.0001, |
| "loss": 1.5893, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.4707063943756277, |
| "grad_norm": 0.12582562863826752, |
| "learning_rate": 0.0001, |
| "loss": 1.5285, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.4710411784399063, |
| "grad_norm": 0.13141517341136932, |
| "learning_rate": 0.0001, |
| "loss": 1.5865, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.4713759625041848, |
| "grad_norm": 0.13099296391010284, |
| "learning_rate": 0.0001, |
| "loss": 1.5322, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.47171074656846335, |
| "grad_norm": 0.146238312125206, |
| "learning_rate": 0.0001, |
| "loss": 1.6397, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.4720455306327419, |
| "grad_norm": 0.12129180878400803, |
| "learning_rate": 0.0001, |
| "loss": 1.5033, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.4723803146970204, |
| "grad_norm": 0.125573992729187, |
| "learning_rate": 0.0001, |
| "loss": 1.571, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.47271509876129897, |
| "grad_norm": 0.14334800839424133, |
| "learning_rate": 0.0001, |
| "loss": 1.5323, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.47304988282557753, |
| "grad_norm": 0.1354663372039795, |
| "learning_rate": 0.0001, |
| "loss": 1.5733, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.47338466688985603, |
| "grad_norm": 0.13040928542613983, |
| "learning_rate": 0.0001, |
| "loss": 1.4702, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.4737194509541346, |
| "grad_norm": 0.12931925058364868, |
| "learning_rate": 0.0001, |
| "loss": 1.6017, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.47405423501841315, |
| "grad_norm": 0.13492871820926666, |
| "learning_rate": 0.0001, |
| "loss": 1.5827, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.47438901908269165, |
| "grad_norm": 0.12549789249897003, |
| "learning_rate": 0.0001, |
| "loss": 1.5856, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.4747238031469702, |
| "grad_norm": 0.13328687846660614, |
| "learning_rate": 0.0001, |
| "loss": 1.6163, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.4750585872112487, |
| "grad_norm": 0.13430629670619965, |
| "learning_rate": 0.0001, |
| "loss": 1.5663, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.4753933712755273, |
| "grad_norm": 0.12909024953842163, |
| "learning_rate": 0.0001, |
| "loss": 1.6085, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.47572815533980584, |
| "grad_norm": 0.13095097243785858, |
| "learning_rate": 0.0001, |
| "loss": 1.585, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.47606293940408434, |
| "grad_norm": 0.1313266009092331, |
| "learning_rate": 0.0001, |
| "loss": 1.5279, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.4763977234683629, |
| "grad_norm": 0.12739764153957367, |
| "learning_rate": 0.0001, |
| "loss": 1.6473, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.47673250753264146, |
| "grad_norm": 0.12780874967575073, |
| "learning_rate": 0.0001, |
| "loss": 1.5566, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.47706729159691996, |
| "grad_norm": 0.12299945950508118, |
| "learning_rate": 0.0001, |
| "loss": 1.5632, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.4774020756611985, |
| "grad_norm": 0.12845619022846222, |
| "learning_rate": 0.0001, |
| "loss": 1.5799, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.4777368597254771, |
| "grad_norm": 0.12429885566234589, |
| "learning_rate": 0.0001, |
| "loss": 1.565, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.4780716437897556, |
| "grad_norm": 0.12623021006584167, |
| "learning_rate": 0.0001, |
| "loss": 1.5579, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.47840642785403414, |
| "grad_norm": 0.121118925511837, |
| "learning_rate": 0.0001, |
| "loss": 1.5044, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.4787412119183127, |
| "grad_norm": 0.13029584288597107, |
| "learning_rate": 0.0001, |
| "loss": 1.5945, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.4790759959825912, |
| "grad_norm": 0.1309075504541397, |
| "learning_rate": 0.0001, |
| "loss": 1.5638, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.47941078004686977, |
| "grad_norm": 0.12302339822053909, |
| "learning_rate": 0.0001, |
| "loss": 1.553, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.4797455641111483, |
| "grad_norm": 0.13640674948692322, |
| "learning_rate": 0.0001, |
| "loss": 1.6299, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.48008034817542683, |
| "grad_norm": 0.12669233977794647, |
| "learning_rate": 0.0001, |
| "loss": 1.5603, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.4804151322397054, |
| "grad_norm": 0.14192534983158112, |
| "learning_rate": 0.0001, |
| "loss": 1.5648, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.48074991630398395, |
| "grad_norm": 0.12855654954910278, |
| "learning_rate": 0.0001, |
| "loss": 1.5782, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.48108470036826245, |
| "grad_norm": 0.13193868100643158, |
| "learning_rate": 0.0001, |
| "loss": 1.4815, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.481419484432541, |
| "grad_norm": 0.1313331574201584, |
| "learning_rate": 0.0001, |
| "loss": 1.597, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.48175426849681957, |
| "grad_norm": 0.14010664820671082, |
| "learning_rate": 0.0001, |
| "loss": 1.5911, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.4820890525610981, |
| "grad_norm": 0.12899306416511536, |
| "learning_rate": 0.0001, |
| "loss": 1.5346, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.48242383662537663, |
| "grad_norm": 0.14157001674175262, |
| "learning_rate": 0.0001, |
| "loss": 1.4947, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.4827586206896552, |
| "grad_norm": 0.12598420679569244, |
| "learning_rate": 0.0001, |
| "loss": 1.5713, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.4830934047539337, |
| "grad_norm": 0.12368304282426834, |
| "learning_rate": 0.0001, |
| "loss": 1.4691, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.48342818881821226, |
| "grad_norm": 0.15252211689949036, |
| "learning_rate": 0.0001, |
| "loss": 1.5298, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.4837629728824908, |
| "grad_norm": 0.12461958080530167, |
| "learning_rate": 0.0001, |
| "loss": 1.5377, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.4840977569467693, |
| "grad_norm": 0.13883721828460693, |
| "learning_rate": 0.0001, |
| "loss": 1.5754, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.4844325410110479, |
| "grad_norm": 0.14833161234855652, |
| "learning_rate": 0.0001, |
| "loss": 1.514, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.48476732507532644, |
| "grad_norm": 0.12511619925498962, |
| "learning_rate": 0.0001, |
| "loss": 1.5765, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.48510210913960494, |
| "grad_norm": 0.1352238804101944, |
| "learning_rate": 0.0001, |
| "loss": 1.5231, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.4854368932038835, |
| "grad_norm": 0.14310289919376373, |
| "learning_rate": 0.0001, |
| "loss": 1.5516, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.48577167726816206, |
| "grad_norm": 0.1293793022632599, |
| "learning_rate": 0.0001, |
| "loss": 1.6124, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.48610646133244056, |
| "grad_norm": 0.1351606398820877, |
| "learning_rate": 0.0001, |
| "loss": 1.5535, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.4864412453967191, |
| "grad_norm": 0.1305823028087616, |
| "learning_rate": 0.0001, |
| "loss": 1.505, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.4867760294609977, |
| "grad_norm": 0.12973332405090332, |
| "learning_rate": 0.0001, |
| "loss": 1.6027, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.4871108135252762, |
| "grad_norm": 0.1279638260602951, |
| "learning_rate": 0.0001, |
| "loss": 1.5664, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.48744559758955475, |
| "grad_norm": 0.1322777271270752, |
| "learning_rate": 0.0001, |
| "loss": 1.605, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.48778038165383325, |
| "grad_norm": 0.14680039882659912, |
| "learning_rate": 0.0001, |
| "loss": 1.5243, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.4881151657181118, |
| "grad_norm": 0.12435714155435562, |
| "learning_rate": 0.0001, |
| "loss": 1.4835, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.48844994978239037, |
| "grad_norm": 0.13253144919872284, |
| "learning_rate": 0.0001, |
| "loss": 1.5797, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.4887847338466689, |
| "grad_norm": 0.14123192429542542, |
| "learning_rate": 0.0001, |
| "loss": 1.5795, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.48911951791094743, |
| "grad_norm": 0.1254579871892929, |
| "learning_rate": 0.0001, |
| "loss": 1.4829, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.489454301975226, |
| "grad_norm": 0.1407458633184433, |
| "learning_rate": 0.0001, |
| "loss": 1.5746, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.4897890860395045, |
| "grad_norm": 0.13967539370059967, |
| "learning_rate": 0.0001, |
| "loss": 1.611, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.49012387010378305, |
| "grad_norm": 0.13044650852680206, |
| "learning_rate": 0.0001, |
| "loss": 1.5614, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.4904586541680616, |
| "grad_norm": 0.13819964230060577, |
| "learning_rate": 0.0001, |
| "loss": 1.5579, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.4907934382323401, |
| "grad_norm": 0.12795104086399078, |
| "learning_rate": 0.0001, |
| "loss": 1.5373, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.4911282222966187, |
| "grad_norm": 0.13034126162528992, |
| "learning_rate": 0.0001, |
| "loss": 1.5077, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.49146300636089724, |
| "grad_norm": 0.1358436644077301, |
| "learning_rate": 0.0001, |
| "loss": 1.6376, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.49179779042517574, |
| "grad_norm": 0.12750184535980225, |
| "learning_rate": 0.0001, |
| "loss": 1.5638, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.4921325744894543, |
| "grad_norm": 0.13034793734550476, |
| "learning_rate": 0.0001, |
| "loss": 1.5053, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.49246735855373286, |
| "grad_norm": 0.1303941309452057, |
| "learning_rate": 0.0001, |
| "loss": 1.5342, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.49280214261801136, |
| "grad_norm": 0.12955164909362793, |
| "learning_rate": 0.0001, |
| "loss": 1.5396, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.4931369266822899, |
| "grad_norm": 0.12884975969791412, |
| "learning_rate": 0.0001, |
| "loss": 1.5389, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.4934717107465685, |
| "grad_norm": 0.1278049647808075, |
| "learning_rate": 0.0001, |
| "loss": 1.5937, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.493806494810847, |
| "grad_norm": 0.12420760840177536, |
| "learning_rate": 0.0001, |
| "loss": 1.4753, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.49414127887512554, |
| "grad_norm": 0.12760096788406372, |
| "learning_rate": 0.0001, |
| "loss": 1.647, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.4944760629394041, |
| "grad_norm": 0.1320486068725586, |
| "learning_rate": 0.0001, |
| "loss": 1.5758, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.4948108470036826, |
| "grad_norm": 0.13898344337940216, |
| "learning_rate": 0.0001, |
| "loss": 1.6265, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.49514563106796117, |
| "grad_norm": 0.12908297777175903, |
| "learning_rate": 0.0001, |
| "loss": 1.6294, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.4954804151322397, |
| "grad_norm": 0.13149291276931763, |
| "learning_rate": 0.0001, |
| "loss": 1.5297, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.49581519919651823, |
| "grad_norm": 0.13526497781276703, |
| "learning_rate": 0.0001, |
| "loss": 1.5374, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.4961499832607968, |
| "grad_norm": 0.12223420292139053, |
| "learning_rate": 0.0001, |
| "loss": 1.5424, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.49648476732507535, |
| "grad_norm": 0.1266697198152542, |
| "learning_rate": 0.0001, |
| "loss": 1.5847, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.49681955138935385, |
| "grad_norm": 0.14440171420574188, |
| "learning_rate": 0.0001, |
| "loss": 1.5362, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.4971543354536324, |
| "grad_norm": 0.12831640243530273, |
| "learning_rate": 0.0001, |
| "loss": 1.5803, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.49748911951791097, |
| "grad_norm": 0.13665077090263367, |
| "learning_rate": 0.0001, |
| "loss": 1.5741, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.4978239035821895, |
| "grad_norm": 0.13725218176841736, |
| "learning_rate": 0.0001, |
| "loss": 1.6207, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.49815868764646803, |
| "grad_norm": 0.1271527111530304, |
| "learning_rate": 0.0001, |
| "loss": 1.6129, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.4984934717107466, |
| "grad_norm": 0.15319159626960754, |
| "learning_rate": 0.0001, |
| "loss": 1.6247, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.4988282557750251, |
| "grad_norm": 0.12440894544124603, |
| "learning_rate": 0.0001, |
| "loss": 1.4354, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.49916303983930366, |
| "grad_norm": 0.1261643022298813, |
| "learning_rate": 0.0001, |
| "loss": 1.609, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.4994978239035822, |
| "grad_norm": 0.14216668903827667, |
| "learning_rate": 0.0001, |
| "loss": 1.5599, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.4998326079678607, |
| "grad_norm": 0.13173174858093262, |
| "learning_rate": 0.0001, |
| "loss": 1.5056, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.5001673920321392, |
| "grad_norm": 0.12335377931594849, |
| "learning_rate": 0.0001, |
| "loss": 1.5544, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.5005021760964178, |
| "grad_norm": 0.13367588818073273, |
| "learning_rate": 0.0001, |
| "loss": 1.4908, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.5008369601606963, |
| "grad_norm": 0.13830317556858063, |
| "learning_rate": 0.0001, |
| "loss": 1.6147, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.5011717442249749, |
| "grad_norm": 0.13441935181617737, |
| "learning_rate": 0.0001, |
| "loss": 1.6855, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.5015065282892535, |
| "grad_norm": 0.14937585592269897, |
| "learning_rate": 0.0001, |
| "loss": 1.6021, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.501841312353532, |
| "grad_norm": 0.1289912909269333, |
| "learning_rate": 0.0001, |
| "loss": 1.5516, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.5021760964178105, |
| "grad_norm": 0.12371324002742767, |
| "learning_rate": 0.0001, |
| "loss": 1.5842, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.502510880482089, |
| "grad_norm": 0.12764602899551392, |
| "learning_rate": 0.0001, |
| "loss": 1.5836, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.5028456645463676, |
| "grad_norm": 0.12929953634738922, |
| "learning_rate": 0.0001, |
| "loss": 1.5656, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.5031804486106461, |
| "grad_norm": 0.1252906322479248, |
| "learning_rate": 0.0001, |
| "loss": 1.4856, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.5035152326749247, |
| "grad_norm": 0.13477809727191925, |
| "learning_rate": 0.0001, |
| "loss": 1.6185, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.5038500167392033, |
| "grad_norm": 0.12459214776754379, |
| "learning_rate": 0.0001, |
| "loss": 1.5323, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.5041848008034817, |
| "grad_norm": 0.12989842891693115, |
| "learning_rate": 0.0001, |
| "loss": 1.5325, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.5045195848677603, |
| "grad_norm": 0.12878334522247314, |
| "learning_rate": 0.0001, |
| "loss": 1.6504, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.5048543689320388, |
| "grad_norm": 0.14765828847885132, |
| "learning_rate": 0.0001, |
| "loss": 1.5978, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.5051891529963174, |
| "grad_norm": 0.1294100284576416, |
| "learning_rate": 0.0001, |
| "loss": 1.6909, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.505523937060596, |
| "grad_norm": 0.1304991990327835, |
| "learning_rate": 0.0001, |
| "loss": 1.6513, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.5058587211248745, |
| "grad_norm": 0.1318545788526535, |
| "learning_rate": 0.0001, |
| "loss": 1.5489, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.506193505189153, |
| "grad_norm": 0.13185527920722961, |
| "learning_rate": 0.0001, |
| "loss": 1.6317, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.5065282892534315, |
| "grad_norm": 0.13133597373962402, |
| "learning_rate": 0.0001, |
| "loss": 1.5853, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.5068630733177101, |
| "grad_norm": 0.14132916927337646, |
| "learning_rate": 0.0001, |
| "loss": 1.6844, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.5071978573819886, |
| "grad_norm": 0.12680397927761078, |
| "learning_rate": 0.0001, |
| "loss": 1.6048, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.5075326414462672, |
| "grad_norm": 0.125723198056221, |
| "learning_rate": 0.0001, |
| "loss": 1.5296, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.5078674255105456, |
| "grad_norm": 0.135573148727417, |
| "learning_rate": 0.0001, |
| "loss": 1.6619, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.5082022095748242, |
| "grad_norm": 0.12755006551742554, |
| "learning_rate": 0.0001, |
| "loss": 1.5376, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.5085369936391028, |
| "grad_norm": 0.1527450680732727, |
| "learning_rate": 0.0001, |
| "loss": 1.4984, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.5088717777033813, |
| "grad_norm": 0.12978217005729675, |
| "learning_rate": 0.0001, |
| "loss": 1.514, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.5092065617676599, |
| "grad_norm": 0.13393737375736237, |
| "learning_rate": 0.0001, |
| "loss": 1.5267, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.5095413458319384, |
| "grad_norm": 0.13406458497047424, |
| "learning_rate": 0.0001, |
| "loss": 1.4858, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.5098761298962169, |
| "grad_norm": 0.13214215636253357, |
| "learning_rate": 0.0001, |
| "loss": 1.5391, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.5102109139604954, |
| "grad_norm": 0.13335101306438446, |
| "learning_rate": 0.0001, |
| "loss": 1.5791, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.510545698024774, |
| "grad_norm": 0.12885718047618866, |
| "learning_rate": 0.0001, |
| "loss": 1.532, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.5108804820890526, |
| "grad_norm": 0.12838226556777954, |
| "learning_rate": 0.0001, |
| "loss": 1.5186, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.5112152661533311, |
| "grad_norm": 0.13160903751850128, |
| "learning_rate": 0.0001, |
| "loss": 1.5792, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.5115500502176097, |
| "grad_norm": 0.1264614462852478, |
| "learning_rate": 0.0001, |
| "loss": 1.6005, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.5118848342818881, |
| "grad_norm": 0.13425403833389282, |
| "learning_rate": 0.0001, |
| "loss": 1.5413, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.5122196183461667, |
| "grad_norm": 0.12175809592008591, |
| "learning_rate": 0.0001, |
| "loss": 1.5128, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.5125544024104453, |
| "grad_norm": 0.1299484223127365, |
| "learning_rate": 0.0001, |
| "loss": 1.4981, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.5128891864747238, |
| "grad_norm": 0.12358542531728745, |
| "learning_rate": 0.0001, |
| "loss": 1.4794, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.5132239705390024, |
| "grad_norm": 0.12457676231861115, |
| "learning_rate": 0.0001, |
| "loss": 1.462, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.5135587546032809, |
| "grad_norm": 0.12775678932666779, |
| "learning_rate": 0.0001, |
| "loss": 1.4993, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.5138935386675594, |
| "grad_norm": 0.12386265397071838, |
| "learning_rate": 0.0001, |
| "loss": 1.504, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.5142283227318379, |
| "grad_norm": 0.13995805382728577, |
| "learning_rate": 0.0001, |
| "loss": 1.5912, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.5145631067961165, |
| "grad_norm": 0.1274706870317459, |
| "learning_rate": 0.0001, |
| "loss": 1.6514, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.5148978908603951, |
| "grad_norm": 0.12781144678592682, |
| "learning_rate": 0.0001, |
| "loss": 1.5379, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.5152326749246736, |
| "grad_norm": 0.12408823519945145, |
| "learning_rate": 0.0001, |
| "loss": 1.4709, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.5155674589889522, |
| "grad_norm": 0.12711866199970245, |
| "learning_rate": 0.0001, |
| "loss": 1.5529, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.5159022430532306, |
| "grad_norm": 0.12433881312608719, |
| "learning_rate": 0.0001, |
| "loss": 1.4641, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.5162370271175092, |
| "grad_norm": 0.13031256198883057, |
| "learning_rate": 0.0001, |
| "loss": 1.6042, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.5165718111817877, |
| "grad_norm": 0.1294173002243042, |
| "learning_rate": 0.0001, |
| "loss": 1.5269, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.5169065952460663, |
| "grad_norm": 0.1273273229598999, |
| "learning_rate": 0.0001, |
| "loss": 1.5984, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.5172413793103449, |
| "grad_norm": 0.13191919028759003, |
| "learning_rate": 0.0001, |
| "loss": 1.5684, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.5175761633746234, |
| "grad_norm": 0.13768093287944794, |
| "learning_rate": 0.0001, |
| "loss": 1.555, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.5179109474389019, |
| "grad_norm": 0.12926150858402252, |
| "learning_rate": 0.0001, |
| "loss": 1.4731, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.5182457315031804, |
| "grad_norm": 0.12586715817451477, |
| "learning_rate": 0.0001, |
| "loss": 1.4794, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.518580515567459, |
| "grad_norm": 0.12548579275608063, |
| "learning_rate": 0.0001, |
| "loss": 1.5266, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.5189152996317375, |
| "grad_norm": 0.12171539664268494, |
| "learning_rate": 0.0001, |
| "loss": 1.4205, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5192500836960161, |
| "grad_norm": 0.13130709528923035, |
| "learning_rate": 0.0001, |
| "loss": 1.5927, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.5195848677602946, |
| "grad_norm": 0.1342555582523346, |
| "learning_rate": 0.0001, |
| "loss": 1.5756, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.5199196518245731, |
| "grad_norm": 0.12991021573543549, |
| "learning_rate": 0.0001, |
| "loss": 1.646, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.5202544358888517, |
| "grad_norm": 0.13074184954166412, |
| "learning_rate": 0.0001, |
| "loss": 1.4619, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.5205892199531302, |
| "grad_norm": 0.12969058752059937, |
| "learning_rate": 0.0001, |
| "loss": 1.5048, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.5209240040174088, |
| "grad_norm": 0.12283259630203247, |
| "learning_rate": 0.0001, |
| "loss": 1.4968, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.5212587880816874, |
| "grad_norm": 0.14244720339775085, |
| "learning_rate": 0.0001, |
| "loss": 1.5984, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.5215935721459658, |
| "grad_norm": 0.12856322526931763, |
| "learning_rate": 0.0001, |
| "loss": 1.5382, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.5219283562102444, |
| "grad_norm": 0.1262657344341278, |
| "learning_rate": 0.0001, |
| "loss": 1.5191, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.5222631402745229, |
| "grad_norm": 0.1350589543581009, |
| "learning_rate": 0.0001, |
| "loss": 1.5812, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.5225979243388015, |
| "grad_norm": 0.13602742552757263, |
| "learning_rate": 0.0001, |
| "loss": 1.6252, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.52293270840308, |
| "grad_norm": 0.1273350566625595, |
| "learning_rate": 0.0001, |
| "loss": 1.5607, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.5232674924673586, |
| "grad_norm": 0.1261235773563385, |
| "learning_rate": 0.0001, |
| "loss": 1.4537, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.523602276531637, |
| "grad_norm": 0.123395174741745, |
| "learning_rate": 0.0001, |
| "loss": 1.4839, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.5239370605959156, |
| "grad_norm": 0.12707623839378357, |
| "learning_rate": 0.0001, |
| "loss": 1.5671, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.5242718446601942, |
| "grad_norm": 0.119587741792202, |
| "learning_rate": 0.0001, |
| "loss": 1.4637, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.5246066287244727, |
| "grad_norm": 0.12568604946136475, |
| "learning_rate": 0.0001, |
| "loss": 1.5196, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.5249414127887513, |
| "grad_norm": 0.13292740285396576, |
| "learning_rate": 0.0001, |
| "loss": 1.5909, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.5252761968530298, |
| "grad_norm": 0.13198155164718628, |
| "learning_rate": 0.0001, |
| "loss": 1.6039, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.5256109809173083, |
| "grad_norm": 0.12587766349315643, |
| "learning_rate": 0.0001, |
| "loss": 1.5418, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.5259457649815868, |
| "grad_norm": 0.12726300954818726, |
| "learning_rate": 0.0001, |
| "loss": 1.5366, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.5262805490458654, |
| "grad_norm": 0.12479355186223984, |
| "learning_rate": 0.0001, |
| "loss": 1.5486, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.526615333110144, |
| "grad_norm": 0.1242307722568512, |
| "learning_rate": 0.0001, |
| "loss": 1.4547, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.5269501171744225, |
| "grad_norm": 0.12753188610076904, |
| "learning_rate": 0.0001, |
| "loss": 1.6649, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.5272849012387011, |
| "grad_norm": 0.12815521657466888, |
| "learning_rate": 0.0001, |
| "loss": 1.4489, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.5276196853029795, |
| "grad_norm": 0.1192578375339508, |
| "learning_rate": 0.0001, |
| "loss": 1.4078, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.5279544693672581, |
| "grad_norm": 0.12596169114112854, |
| "learning_rate": 0.0001, |
| "loss": 1.5369, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.5282892534315367, |
| "grad_norm": 0.13193419575691223, |
| "learning_rate": 0.0001, |
| "loss": 1.5601, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.5286240374958152, |
| "grad_norm": 0.1277266889810562, |
| "learning_rate": 0.0001, |
| "loss": 1.5336, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.5289588215600938, |
| "grad_norm": 0.12819704413414001, |
| "learning_rate": 0.0001, |
| "loss": 1.4713, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.5292936056243723, |
| "grad_norm": 0.1399090439081192, |
| "learning_rate": 0.0001, |
| "loss": 1.5978, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.5296283896886508, |
| "grad_norm": 0.1373160183429718, |
| "learning_rate": 0.0001, |
| "loss": 1.6614, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.5299631737529293, |
| "grad_norm": 0.1253012716770172, |
| "learning_rate": 0.0001, |
| "loss": 1.5317, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.5302979578172079, |
| "grad_norm": 0.124544158577919, |
| "learning_rate": 0.0001, |
| "loss": 1.4947, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.5306327418814865, |
| "grad_norm": 0.13060353696346283, |
| "learning_rate": 0.0001, |
| "loss": 1.5342, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.530967525945765, |
| "grad_norm": 0.12680500745773315, |
| "learning_rate": 0.0001, |
| "loss": 1.4597, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.5313023100100436, |
| "grad_norm": 0.13112664222717285, |
| "learning_rate": 0.0001, |
| "loss": 1.5978, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.531637094074322, |
| "grad_norm": 0.13016077876091003, |
| "learning_rate": 0.0001, |
| "loss": 1.5575, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.5319718781386006, |
| "grad_norm": 0.1273767054080963, |
| "learning_rate": 0.0001, |
| "loss": 1.607, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.5323066622028791, |
| "grad_norm": 0.1310475915670395, |
| "learning_rate": 0.0001, |
| "loss": 1.5066, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.5326414462671577, |
| "grad_norm": 0.12938565015792847, |
| "learning_rate": 0.0001, |
| "loss": 1.4933, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.5329762303314363, |
| "grad_norm": 0.12316200882196426, |
| "learning_rate": 0.0001, |
| "loss": 1.4752, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.5333110143957147, |
| "grad_norm": 0.13205035030841827, |
| "learning_rate": 0.0001, |
| "loss": 1.5061, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.5336457984599933, |
| "grad_norm": 0.12517520785331726, |
| "learning_rate": 0.0001, |
| "loss": 1.5237, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.5339805825242718, |
| "grad_norm": 0.1309306025505066, |
| "learning_rate": 0.0001, |
| "loss": 1.5975, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.5343153665885504, |
| "grad_norm": 0.13565212488174438, |
| "learning_rate": 0.0001, |
| "loss": 1.6888, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.534650150652829, |
| "grad_norm": 0.13044795393943787, |
| "learning_rate": 0.0001, |
| "loss": 1.547, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.5349849347171075, |
| "grad_norm": 0.12757791578769684, |
| "learning_rate": 0.0001, |
| "loss": 1.5788, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.535319718781386, |
| "grad_norm": 0.12625539302825928, |
| "learning_rate": 0.0001, |
| "loss": 1.6271, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.5356545028456645, |
| "grad_norm": 0.12980274856090546, |
| "learning_rate": 0.0001, |
| "loss": 1.4808, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5359892869099431, |
| "grad_norm": 0.1339329481124878, |
| "learning_rate": 0.0001, |
| "loss": 1.5838, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.5363240709742216, |
| "grad_norm": 0.13570533692836761, |
| "learning_rate": 0.0001, |
| "loss": 1.5526, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.5366588550385002, |
| "grad_norm": 0.13043223321437836, |
| "learning_rate": 0.0001, |
| "loss": 1.5046, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.5369936391027788, |
| "grad_norm": 0.1268492341041565, |
| "learning_rate": 0.0001, |
| "loss": 1.4846, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.5373284231670572, |
| "grad_norm": 0.12844318151474, |
| "learning_rate": 0.0001, |
| "loss": 1.622, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.5376632072313358, |
| "grad_norm": 0.12543794512748718, |
| "learning_rate": 0.0001, |
| "loss": 1.4895, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.5379979912956143, |
| "grad_norm": 0.13247263431549072, |
| "learning_rate": 0.0001, |
| "loss": 1.5431, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.5383327753598929, |
| "grad_norm": 0.12495877593755722, |
| "learning_rate": 0.0001, |
| "loss": 1.5534, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.5386675594241714, |
| "grad_norm": 0.12770773470401764, |
| "learning_rate": 0.0001, |
| "loss": 1.5296, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.53900234348845, |
| "grad_norm": 0.1249793991446495, |
| "learning_rate": 0.0001, |
| "loss": 1.549, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.5393371275527284, |
| "grad_norm": 0.13602420687675476, |
| "learning_rate": 0.0001, |
| "loss": 1.6911, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.539671911617007, |
| "grad_norm": 0.1260257512331009, |
| "learning_rate": 0.0001, |
| "loss": 1.6155, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.5400066956812856, |
| "grad_norm": 0.13716067373752594, |
| "learning_rate": 0.0001, |
| "loss": 1.5017, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.5403414797455641, |
| "grad_norm": 0.12322457879781723, |
| "learning_rate": 0.0001, |
| "loss": 1.4567, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.5406762638098427, |
| "grad_norm": 0.1295168548822403, |
| "learning_rate": 0.0001, |
| "loss": 1.5388, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.5410110478741212, |
| "grad_norm": 0.13598200678825378, |
| "learning_rate": 0.0001, |
| "loss": 1.6189, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.5413458319383997, |
| "grad_norm": 0.12514351308345795, |
| "learning_rate": 0.0001, |
| "loss": 1.5957, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.5416806160026783, |
| "grad_norm": 0.13243642449378967, |
| "learning_rate": 0.0001, |
| "loss": 1.5211, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.5420154000669568, |
| "grad_norm": 0.14331547915935516, |
| "learning_rate": 0.0001, |
| "loss": 1.628, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.5423501841312354, |
| "grad_norm": 0.13204847276210785, |
| "learning_rate": 0.0001, |
| "loss": 1.6131, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.5426849681955139, |
| "grad_norm": 0.13828937709331512, |
| "learning_rate": 0.0001, |
| "loss": 1.6206, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.5430197522597925, |
| "grad_norm": 0.13166444003582, |
| "learning_rate": 0.0001, |
| "loss": 1.556, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.5433545363240709, |
| "grad_norm": 0.131551131606102, |
| "learning_rate": 0.0001, |
| "loss": 1.5884, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.5436893203883495, |
| "grad_norm": 0.1386868953704834, |
| "learning_rate": 0.0001, |
| "loss": 1.626, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.544024104452628, |
| "grad_norm": 0.12754793465137482, |
| "learning_rate": 0.0001, |
| "loss": 1.5419, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.5443588885169066, |
| "grad_norm": 0.13059911131858826, |
| "learning_rate": 0.0001, |
| "loss": 1.5886, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.5446936725811852, |
| "grad_norm": 0.13056625425815582, |
| "learning_rate": 0.0001, |
| "loss": 1.5093, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.5450284566454636, |
| "grad_norm": 0.12965354323387146, |
| "learning_rate": 0.0001, |
| "loss": 1.5766, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.5453632407097422, |
| "grad_norm": 0.12052886188030243, |
| "learning_rate": 0.0001, |
| "loss": 1.5315, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.5456980247740207, |
| "grad_norm": 0.12897798418998718, |
| "learning_rate": 0.0001, |
| "loss": 1.6129, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.5460328088382993, |
| "grad_norm": 0.12880270183086395, |
| "learning_rate": 0.0001, |
| "loss": 1.6111, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.5463675929025779, |
| "grad_norm": 0.13251414895057678, |
| "learning_rate": 0.0001, |
| "loss": 1.5786, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.5467023769668564, |
| "grad_norm": 0.13067522644996643, |
| "learning_rate": 0.0001, |
| "loss": 1.5724, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.5470371610311349, |
| "grad_norm": 0.127615824341774, |
| "learning_rate": 0.0001, |
| "loss": 1.4672, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.5473719450954134, |
| "grad_norm": 0.12785358726978302, |
| "learning_rate": 0.0001, |
| "loss": 1.4379, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.547706729159692, |
| "grad_norm": 0.1336808055639267, |
| "learning_rate": 0.0001, |
| "loss": 1.5894, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.5480415132239705, |
| "grad_norm": 0.12709666788578033, |
| "learning_rate": 0.0001, |
| "loss": 1.5646, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.5483762972882491, |
| "grad_norm": 0.1278083175420761, |
| "learning_rate": 0.0001, |
| "loss": 1.5481, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.5487110813525277, |
| "grad_norm": 0.1273607462644577, |
| "learning_rate": 0.0001, |
| "loss": 1.6099, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.5490458654168061, |
| "grad_norm": 0.13073420524597168, |
| "learning_rate": 0.0001, |
| "loss": 1.6554, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.5493806494810847, |
| "grad_norm": 0.12339271605014801, |
| "learning_rate": 0.0001, |
| "loss": 1.4866, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.5497154335453632, |
| "grad_norm": 0.12296874821186066, |
| "learning_rate": 0.0001, |
| "loss": 1.4542, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.5500502176096418, |
| "grad_norm": 0.12228816747665405, |
| "learning_rate": 0.0001, |
| "loss": 1.5008, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.5503850016739203, |
| "grad_norm": 0.12167999148368835, |
| "learning_rate": 0.0001, |
| "loss": 1.4793, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.5507197857381989, |
| "grad_norm": 0.1323646754026413, |
| "learning_rate": 0.0001, |
| "loss": 1.6053, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.5510545698024774, |
| "grad_norm": 0.13682882487773895, |
| "learning_rate": 0.0001, |
| "loss": 1.5962, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.5513893538667559, |
| "grad_norm": 0.13337336480617523, |
| "learning_rate": 0.0001, |
| "loss": 1.6422, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.5517241379310345, |
| "grad_norm": 0.12662284076213837, |
| "learning_rate": 0.0001, |
| "loss": 1.4729, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.552058921995313, |
| "grad_norm": 0.13070893287658691, |
| "learning_rate": 0.0001, |
| "loss": 1.5548, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.5523937060595916, |
| "grad_norm": 0.1237405389547348, |
| "learning_rate": 0.0001, |
| "loss": 1.5731, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.5527284901238702, |
| "grad_norm": 0.12684407830238342, |
| "learning_rate": 0.0001, |
| "loss": 1.5927, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.5530632741881486, |
| "grad_norm": 0.13257922232151031, |
| "learning_rate": 0.0001, |
| "loss": 1.6194, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.5533980582524272, |
| "grad_norm": 0.12506547570228577, |
| "learning_rate": 0.0001, |
| "loss": 1.4954, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.5537328423167057, |
| "grad_norm": 0.13652825355529785, |
| "learning_rate": 0.0001, |
| "loss": 1.5936, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.5540676263809843, |
| "grad_norm": 0.1281632035970688, |
| "learning_rate": 0.0001, |
| "loss": 1.5239, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.5544024104452628, |
| "grad_norm": 0.1302935630083084, |
| "learning_rate": 0.0001, |
| "loss": 1.5731, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.5547371945095414, |
| "grad_norm": 0.13843512535095215, |
| "learning_rate": 0.0001, |
| "loss": 1.6028, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.5550719785738198, |
| "grad_norm": 0.13132615387439728, |
| "learning_rate": 0.0001, |
| "loss": 1.5167, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.5554067626380984, |
| "grad_norm": 0.1269274204969406, |
| "learning_rate": 0.0001, |
| "loss": 1.3276, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.555741546702377, |
| "grad_norm": 0.14026238024234772, |
| "learning_rate": 0.0001, |
| "loss": 1.5699, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.5560763307666555, |
| "grad_norm": 0.13259948790073395, |
| "learning_rate": 0.0001, |
| "loss": 1.5627, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.5564111148309341, |
| "grad_norm": 0.1282505840063095, |
| "learning_rate": 0.0001, |
| "loss": 1.601, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.5567458988952126, |
| "grad_norm": 0.14385761320590973, |
| "learning_rate": 0.0001, |
| "loss": 1.5731, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.5570806829594911, |
| "grad_norm": 0.12249067425727844, |
| "learning_rate": 0.0001, |
| "loss": 1.5416, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.5574154670237697, |
| "grad_norm": 0.13182908296585083, |
| "learning_rate": 0.0001, |
| "loss": 1.5313, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.5577502510880482, |
| "grad_norm": 0.14085689187049866, |
| "learning_rate": 0.0001, |
| "loss": 1.5736, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.5580850351523268, |
| "grad_norm": 0.14808295667171478, |
| "learning_rate": 0.0001, |
| "loss": 1.6265, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.5584198192166053, |
| "grad_norm": 0.13931553065776825, |
| "learning_rate": 0.0001, |
| "loss": 1.5729, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.5587546032808838, |
| "grad_norm": 0.14633771777153015, |
| "learning_rate": 0.0001, |
| "loss": 1.5433, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.5590893873451623, |
| "grad_norm": 0.1228380873799324, |
| "learning_rate": 0.0001, |
| "loss": 1.544, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.5594241714094409, |
| "grad_norm": 0.12809088826179504, |
| "learning_rate": 0.0001, |
| "loss": 1.5724, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.5597589554737195, |
| "grad_norm": 0.13453969359397888, |
| "learning_rate": 0.0001, |
| "loss": 1.5062, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.560093739537998, |
| "grad_norm": 0.13969993591308594, |
| "learning_rate": 0.0001, |
| "loss": 1.6302, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.5604285236022766, |
| "grad_norm": 0.13022400438785553, |
| "learning_rate": 0.0001, |
| "loss": 1.6323, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.560763307666555, |
| "grad_norm": 0.13372890651226044, |
| "learning_rate": 0.0001, |
| "loss": 1.6017, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.5610980917308336, |
| "grad_norm": 0.1426994502544403, |
| "learning_rate": 0.0001, |
| "loss": 1.5737, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.5614328757951121, |
| "grad_norm": 0.1358005702495575, |
| "learning_rate": 0.0001, |
| "loss": 1.5812, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.5617676598593907, |
| "grad_norm": 0.1320638507604599, |
| "learning_rate": 0.0001, |
| "loss": 1.5414, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.5621024439236693, |
| "grad_norm": 0.13449324667453766, |
| "learning_rate": 0.0001, |
| "loss": 1.4752, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.5624372279879478, |
| "grad_norm": 0.13063769042491913, |
| "learning_rate": 0.0001, |
| "loss": 1.5002, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.5627720120522263, |
| "grad_norm": 0.12591435015201569, |
| "learning_rate": 0.0001, |
| "loss": 1.5331, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.5631067961165048, |
| "grad_norm": 0.144126296043396, |
| "learning_rate": 0.0001, |
| "loss": 1.6207, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.5634415801807834, |
| "grad_norm": 0.13355223834514618, |
| "learning_rate": 0.0001, |
| "loss": 1.546, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.563776364245062, |
| "grad_norm": 0.12519478797912598, |
| "learning_rate": 0.0001, |
| "loss": 1.5836, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.5641111483093405, |
| "grad_norm": 0.1350811868906021, |
| "learning_rate": 0.0001, |
| "loss": 1.577, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.5644459323736191, |
| "grad_norm": 0.14059753715991974, |
| "learning_rate": 0.0001, |
| "loss": 1.5457, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.5647807164378975, |
| "grad_norm": 0.13620074093341827, |
| "learning_rate": 0.0001, |
| "loss": 1.5318, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.5651155005021761, |
| "grad_norm": 0.13117417693138123, |
| "learning_rate": 0.0001, |
| "loss": 1.5413, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.5654502845664546, |
| "grad_norm": 0.14555278420448303, |
| "learning_rate": 0.0001, |
| "loss": 1.5775, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.5657850686307332, |
| "grad_norm": 0.12660092115402222, |
| "learning_rate": 0.0001, |
| "loss": 1.5034, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.5661198526950117, |
| "grad_norm": 0.12967108190059662, |
| "learning_rate": 0.0001, |
| "loss": 1.5755, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.5664546367592903, |
| "grad_norm": 0.13999544084072113, |
| "learning_rate": 0.0001, |
| "loss": 1.4471, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.5667894208235688, |
| "grad_norm": 0.13235735893249512, |
| "learning_rate": 0.0001, |
| "loss": 1.4967, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.5671242048878473, |
| "grad_norm": 0.1373562067747116, |
| "learning_rate": 0.0001, |
| "loss": 1.6267, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.5674589889521259, |
| "grad_norm": 0.1320851445198059, |
| "learning_rate": 0.0001, |
| "loss": 1.5259, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.5677937730164044, |
| "grad_norm": 0.13309001922607422, |
| "learning_rate": 0.0001, |
| "loss": 1.5604, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.568128557080683, |
| "grad_norm": 0.12666000425815582, |
| "learning_rate": 0.0001, |
| "loss": 1.5352, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.5684633411449616, |
| "grad_norm": 0.12397143244743347, |
| "learning_rate": 0.0001, |
| "loss": 1.5474, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.56879812520924, |
| "grad_norm": 0.1286936104297638, |
| "learning_rate": 0.0001, |
| "loss": 1.5125, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.5691329092735186, |
| "grad_norm": 0.12525172531604767, |
| "learning_rate": 0.0001, |
| "loss": 1.4172, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5694676933377971, |
| "grad_norm": 0.13234922289848328, |
| "learning_rate": 0.0001, |
| "loss": 1.5374, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.5698024774020757, |
| "grad_norm": 0.13341423869132996, |
| "learning_rate": 0.0001, |
| "loss": 1.5615, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.5701372614663542, |
| "grad_norm": 0.12672466039657593, |
| "learning_rate": 0.0001, |
| "loss": 1.4147, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.5704720455306327, |
| "grad_norm": 0.13073183596134186, |
| "learning_rate": 0.0001, |
| "loss": 1.5237, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.5708068295949112, |
| "grad_norm": 0.13044412434101105, |
| "learning_rate": 0.0001, |
| "loss": 1.6044, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.5711416136591898, |
| "grad_norm": 0.13865146040916443, |
| "learning_rate": 0.0001, |
| "loss": 1.5648, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.5714763977234684, |
| "grad_norm": 0.13418787717819214, |
| "learning_rate": 0.0001, |
| "loss": 1.5948, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.5718111817877469, |
| "grad_norm": 0.1279216855764389, |
| "learning_rate": 0.0001, |
| "loss": 1.5465, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.5721459658520255, |
| "grad_norm": 0.13305789232254028, |
| "learning_rate": 0.0001, |
| "loss": 1.5768, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.5724807499163039, |
| "grad_norm": 0.12358289957046509, |
| "learning_rate": 0.0001, |
| "loss": 1.4377, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.5728155339805825, |
| "grad_norm": 0.128280371427536, |
| "learning_rate": 0.0001, |
| "loss": 1.5684, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.573150318044861, |
| "grad_norm": 0.1336420327425003, |
| "learning_rate": 0.0001, |
| "loss": 1.5438, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.5734851021091396, |
| "grad_norm": 0.13142135739326477, |
| "learning_rate": 0.0001, |
| "loss": 1.5821, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.5738198861734182, |
| "grad_norm": 0.1367759257555008, |
| "learning_rate": 0.0001, |
| "loss": 1.5294, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.5741546702376967, |
| "grad_norm": 0.1364768147468567, |
| "learning_rate": 0.0001, |
| "loss": 1.4889, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.5744894543019752, |
| "grad_norm": 0.12675487995147705, |
| "learning_rate": 0.0001, |
| "loss": 1.5789, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.5748242383662537, |
| "grad_norm": 0.13054460287094116, |
| "learning_rate": 0.0001, |
| "loss": 1.5653, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.5751590224305323, |
| "grad_norm": 0.14481523633003235, |
| "learning_rate": 0.0001, |
| "loss": 1.6135, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.5754938064948109, |
| "grad_norm": 0.1317768394947052, |
| "learning_rate": 0.0001, |
| "loss": 1.5015, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.5758285905590894, |
| "grad_norm": 0.13205017149448395, |
| "learning_rate": 0.0001, |
| "loss": 1.5667, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.576163374623368, |
| "grad_norm": 0.13702328503131866, |
| "learning_rate": 0.0001, |
| "loss": 1.5487, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.5764981586876464, |
| "grad_norm": 0.13435296714305878, |
| "learning_rate": 0.0001, |
| "loss": 1.6059, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.576832942751925, |
| "grad_norm": 0.13013921678066254, |
| "learning_rate": 0.0001, |
| "loss": 1.5948, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.5771677268162035, |
| "grad_norm": 0.12254009395837784, |
| "learning_rate": 0.0001, |
| "loss": 1.485, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.5775025108804821, |
| "grad_norm": 0.13023540377616882, |
| "learning_rate": 0.0001, |
| "loss": 1.6237, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.5778372949447607, |
| "grad_norm": 0.1339290589094162, |
| "learning_rate": 0.0001, |
| "loss": 1.5983, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.5781720790090392, |
| "grad_norm": 0.13126787543296814, |
| "learning_rate": 0.0001, |
| "loss": 1.5947, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.5785068630733177, |
| "grad_norm": 0.12525591254234314, |
| "learning_rate": 0.0001, |
| "loss": 1.4519, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.5788416471375962, |
| "grad_norm": 0.12789173424243927, |
| "learning_rate": 0.0001, |
| "loss": 1.5293, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.5791764312018748, |
| "grad_norm": 0.12775948643684387, |
| "learning_rate": 0.0001, |
| "loss": 1.5971, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.5795112152661533, |
| "grad_norm": 0.13437266647815704, |
| "learning_rate": 0.0001, |
| "loss": 1.595, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.5798459993304319, |
| "grad_norm": 0.13249057531356812, |
| "learning_rate": 0.0001, |
| "loss": 1.5524, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.5801807833947105, |
| "grad_norm": 0.12838158011436462, |
| "learning_rate": 0.0001, |
| "loss": 1.4641, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.5805155674589889, |
| "grad_norm": 0.1311095654964447, |
| "learning_rate": 0.0001, |
| "loss": 1.5964, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.5808503515232675, |
| "grad_norm": 0.12928825616836548, |
| "learning_rate": 0.0001, |
| "loss": 1.5153, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.581185135587546, |
| "grad_norm": 0.1317373663187027, |
| "learning_rate": 0.0001, |
| "loss": 1.5805, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.5815199196518246, |
| "grad_norm": 0.1291595846414566, |
| "learning_rate": 0.0001, |
| "loss": 1.4974, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.5818547037161031, |
| "grad_norm": 0.12890678644180298, |
| "learning_rate": 0.0001, |
| "loss": 1.5778, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.5821894877803817, |
| "grad_norm": 0.13605663180351257, |
| "learning_rate": 0.0001, |
| "loss": 1.5206, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.5825242718446602, |
| "grad_norm": 0.12535326182842255, |
| "learning_rate": 0.0001, |
| "loss": 1.4989, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.5828590559089387, |
| "grad_norm": 0.13682806491851807, |
| "learning_rate": 0.0001, |
| "loss": 1.5558, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.5831938399732173, |
| "grad_norm": 0.12900637090206146, |
| "learning_rate": 0.0001, |
| "loss": 1.5687, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.5835286240374958, |
| "grad_norm": 0.1287071406841278, |
| "learning_rate": 0.0001, |
| "loss": 1.5349, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.5838634081017744, |
| "grad_norm": 0.12810088694095612, |
| "learning_rate": 0.0001, |
| "loss": 1.5363, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.5841981921660528, |
| "grad_norm": 0.13105565309524536, |
| "learning_rate": 0.0001, |
| "loss": 1.5633, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.5845329762303314, |
| "grad_norm": 0.13414978981018066, |
| "learning_rate": 0.0001, |
| "loss": 1.5965, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.58486776029461, |
| "grad_norm": 0.12767766416072845, |
| "learning_rate": 0.0001, |
| "loss": 1.517, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.5852025443588885, |
| "grad_norm": 0.12798413634300232, |
| "learning_rate": 0.0001, |
| "loss": 1.4184, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.5855373284231671, |
| "grad_norm": 0.13183465600013733, |
| "learning_rate": 0.0001, |
| "loss": 1.4812, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.5858721124874456, |
| "grad_norm": 0.12950639426708221, |
| "learning_rate": 0.0001, |
| "loss": 1.4371, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.5862068965517241, |
| "grad_norm": 0.1397038698196411, |
| "learning_rate": 0.0001, |
| "loss": 1.5023, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.5865416806160026, |
| "grad_norm": 0.1396951824426651, |
| "learning_rate": 0.0001, |
| "loss": 1.5174, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.5868764646802812, |
| "grad_norm": 0.13188160955905914, |
| "learning_rate": 0.0001, |
| "loss": 1.511, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.5872112487445598, |
| "grad_norm": 0.13433519005775452, |
| "learning_rate": 0.0001, |
| "loss": 1.5214, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.5875460328088383, |
| "grad_norm": 0.13022519648075104, |
| "learning_rate": 0.0001, |
| "loss": 1.5629, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.5878808168731169, |
| "grad_norm": 0.12651024758815765, |
| "learning_rate": 0.0001, |
| "loss": 1.4469, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.5882156009373953, |
| "grad_norm": 0.13489894568920135, |
| "learning_rate": 0.0001, |
| "loss": 1.5363, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.5885503850016739, |
| "grad_norm": 0.13707391917705536, |
| "learning_rate": 0.0001, |
| "loss": 1.6495, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.5888851690659525, |
| "grad_norm": 0.12528660893440247, |
| "learning_rate": 0.0001, |
| "loss": 1.5296, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.589219953130231, |
| "grad_norm": 0.14160814881324768, |
| "learning_rate": 0.0001, |
| "loss": 1.5977, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.5895547371945096, |
| "grad_norm": 0.12557724118232727, |
| "learning_rate": 0.0001, |
| "loss": 1.4915, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.5898895212587881, |
| "grad_norm": 0.12706881761550903, |
| "learning_rate": 0.0001, |
| "loss": 1.5775, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.5902243053230666, |
| "grad_norm": 0.13343869149684906, |
| "learning_rate": 0.0001, |
| "loss": 1.6033, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.5905590893873451, |
| "grad_norm": 0.1284165382385254, |
| "learning_rate": 0.0001, |
| "loss": 1.5255, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.5908938734516237, |
| "grad_norm": 0.12860101461410522, |
| "learning_rate": 0.0001, |
| "loss": 1.4694, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.5912286575159023, |
| "grad_norm": 0.12808945775032043, |
| "learning_rate": 0.0001, |
| "loss": 1.6068, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.5915634415801808, |
| "grad_norm": 0.13219839334487915, |
| "learning_rate": 0.0001, |
| "loss": 1.5519, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.5918982256444594, |
| "grad_norm": 0.12471086531877518, |
| "learning_rate": 0.0001, |
| "loss": 1.4465, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.5922330097087378, |
| "grad_norm": 0.13721035420894623, |
| "learning_rate": 0.0001, |
| "loss": 1.5656, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.5925677937730164, |
| "grad_norm": 0.1299833208322525, |
| "learning_rate": 0.0001, |
| "loss": 1.4767, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.5929025778372949, |
| "grad_norm": 0.13570041954517365, |
| "learning_rate": 0.0001, |
| "loss": 1.5929, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.5932373619015735, |
| "grad_norm": 0.12360662966966629, |
| "learning_rate": 0.0001, |
| "loss": 1.4179, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.5935721459658521, |
| "grad_norm": 0.138414204120636, |
| "learning_rate": 0.0001, |
| "loss": 1.6123, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.5939069300301306, |
| "grad_norm": 0.1347961127758026, |
| "learning_rate": 0.0001, |
| "loss": 1.6135, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.5942417140944091, |
| "grad_norm": 0.1333123743534088, |
| "learning_rate": 0.0001, |
| "loss": 1.3935, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.5945764981586876, |
| "grad_norm": 0.13112439215183258, |
| "learning_rate": 0.0001, |
| "loss": 1.5531, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.5949112822229662, |
| "grad_norm": 0.1356613039970398, |
| "learning_rate": 0.0001, |
| "loss": 1.5338, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.5952460662872447, |
| "grad_norm": 0.13762056827545166, |
| "learning_rate": 0.0001, |
| "loss": 1.5684, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.5955808503515233, |
| "grad_norm": 0.13242678344249725, |
| "learning_rate": 0.0001, |
| "loss": 1.5946, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.5959156344158018, |
| "grad_norm": 0.1304038166999817, |
| "learning_rate": 0.0001, |
| "loss": 1.5634, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.5962504184800803, |
| "grad_norm": 0.13004854321479797, |
| "learning_rate": 0.0001, |
| "loss": 1.5612, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.5965852025443589, |
| "grad_norm": 0.13909399509429932, |
| "learning_rate": 0.0001, |
| "loss": 1.5613, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.5969199866086374, |
| "grad_norm": 0.13109537959098816, |
| "learning_rate": 0.0001, |
| "loss": 1.5769, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.597254770672916, |
| "grad_norm": 0.13889670372009277, |
| "learning_rate": 0.0001, |
| "loss": 1.5788, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.5975895547371946, |
| "grad_norm": 0.12981747090816498, |
| "learning_rate": 0.0001, |
| "loss": 1.5294, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.597924338801473, |
| "grad_norm": 0.12865106761455536, |
| "learning_rate": 0.0001, |
| "loss": 1.5907, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.5982591228657516, |
| "grad_norm": 0.13081815838813782, |
| "learning_rate": 0.0001, |
| "loss": 1.6513, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.5985939069300301, |
| "grad_norm": 0.1357847899198532, |
| "learning_rate": 0.0001, |
| "loss": 1.6925, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.5989286909943087, |
| "grad_norm": 0.1296125054359436, |
| "learning_rate": 0.0001, |
| "loss": 1.5362, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.5992634750585872, |
| "grad_norm": 0.13272371888160706, |
| "learning_rate": 0.0001, |
| "loss": 1.669, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.5995982591228658, |
| "grad_norm": 0.1340399980545044, |
| "learning_rate": 0.0001, |
| "loss": 1.5674, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.5999330431871442, |
| "grad_norm": 0.12497217208147049, |
| "learning_rate": 0.0001, |
| "loss": 1.4629, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.6002678272514228, |
| "grad_norm": 0.14285002648830414, |
| "learning_rate": 0.0001, |
| "loss": 1.5278, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.6006026113157014, |
| "grad_norm": 0.1328384429216385, |
| "learning_rate": 0.0001, |
| "loss": 1.5532, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.6009373953799799, |
| "grad_norm": 0.13168397545814514, |
| "learning_rate": 0.0001, |
| "loss": 1.6406, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.6012721794442585, |
| "grad_norm": 0.12567539513111115, |
| "learning_rate": 0.0001, |
| "loss": 1.5389, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.601606963508537, |
| "grad_norm": 0.13105528056621552, |
| "learning_rate": 0.0001, |
| "loss": 1.5754, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.6019417475728155, |
| "grad_norm": 0.1292327493429184, |
| "learning_rate": 0.0001, |
| "loss": 1.4713, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.602276531637094, |
| "grad_norm": 0.12788547575473785, |
| "learning_rate": 0.0001, |
| "loss": 1.5787, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.6026113157013726, |
| "grad_norm": 0.1307074874639511, |
| "learning_rate": 0.0001, |
| "loss": 1.6191, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6029460997656512, |
| "grad_norm": 0.136485293507576, |
| "learning_rate": 0.0001, |
| "loss": 1.6063, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.6032808838299297, |
| "grad_norm": 0.12938566505908966, |
| "learning_rate": 0.0001, |
| "loss": 1.5466, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.6036156678942083, |
| "grad_norm": 0.12429405003786087, |
| "learning_rate": 0.0001, |
| "loss": 1.4672, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.6039504519584867, |
| "grad_norm": 0.12657684087753296, |
| "learning_rate": 0.0001, |
| "loss": 1.5159, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.6042852360227653, |
| "grad_norm": 0.13287223875522614, |
| "learning_rate": 0.0001, |
| "loss": 1.5838, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.6046200200870439, |
| "grad_norm": 0.13268281519412994, |
| "learning_rate": 0.0001, |
| "loss": 1.5282, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.6049548041513224, |
| "grad_norm": 0.1264685094356537, |
| "learning_rate": 0.0001, |
| "loss": 1.5795, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.605289588215601, |
| "grad_norm": 0.1276138424873352, |
| "learning_rate": 0.0001, |
| "loss": 1.4648, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.6056243722798795, |
| "grad_norm": 0.13063056766986847, |
| "learning_rate": 0.0001, |
| "loss": 1.5692, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.605959156344158, |
| "grad_norm": 0.12172877043485641, |
| "learning_rate": 0.0001, |
| "loss": 1.4785, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.6062939404084365, |
| "grad_norm": 0.13516037166118622, |
| "learning_rate": 0.0001, |
| "loss": 1.5316, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.6066287244727151, |
| "grad_norm": 0.12978719174861908, |
| "learning_rate": 0.0001, |
| "loss": 1.5103, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.6069635085369937, |
| "grad_norm": 0.1354977786540985, |
| "learning_rate": 0.0001, |
| "loss": 1.5368, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.6072982926012722, |
| "grad_norm": 0.12445911020040512, |
| "learning_rate": 0.0001, |
| "loss": 1.4966, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.6076330766655507, |
| "grad_norm": 0.13546685874462128, |
| "learning_rate": 0.0001, |
| "loss": 1.62, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.6079678607298292, |
| "grad_norm": 0.12861642241477966, |
| "learning_rate": 0.0001, |
| "loss": 1.5895, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.6083026447941078, |
| "grad_norm": 0.13455091416835785, |
| "learning_rate": 0.0001, |
| "loss": 1.5217, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.6086374288583863, |
| "grad_norm": 0.13514240086078644, |
| "learning_rate": 0.0001, |
| "loss": 1.5947, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.6089722129226649, |
| "grad_norm": 0.12753477692604065, |
| "learning_rate": 0.0001, |
| "loss": 1.492, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.6093069969869435, |
| "grad_norm": 0.1335463970899582, |
| "learning_rate": 0.0001, |
| "loss": 1.5806, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.6096417810512219, |
| "grad_norm": 0.14587751030921936, |
| "learning_rate": 0.0001, |
| "loss": 1.5679, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.6099765651155005, |
| "grad_norm": 0.13787920773029327, |
| "learning_rate": 0.0001, |
| "loss": 1.4759, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.610311349179779, |
| "grad_norm": 0.135360449552536, |
| "learning_rate": 0.0001, |
| "loss": 1.4968, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.6106461332440576, |
| "grad_norm": 0.13543657958507538, |
| "learning_rate": 0.0001, |
| "loss": 1.5321, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.6109809173083361, |
| "grad_norm": 0.127221018075943, |
| "learning_rate": 0.0001, |
| "loss": 1.5239, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.6113157013726147, |
| "grad_norm": 0.1439230740070343, |
| "learning_rate": 0.0001, |
| "loss": 1.6458, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.6116504854368932, |
| "grad_norm": 0.13141925632953644, |
| "learning_rate": 0.0001, |
| "loss": 1.504, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.6119852695011717, |
| "grad_norm": 0.12811610102653503, |
| "learning_rate": 0.0001, |
| "loss": 1.6137, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.6123200535654503, |
| "grad_norm": 0.13353578746318817, |
| "learning_rate": 0.0001, |
| "loss": 1.5209, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.6126548376297288, |
| "grad_norm": 0.13006985187530518, |
| "learning_rate": 0.0001, |
| "loss": 1.4776, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.6129896216940074, |
| "grad_norm": 0.1350172609090805, |
| "learning_rate": 0.0001, |
| "loss": 1.5994, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.613324405758286, |
| "grad_norm": 0.13640815019607544, |
| "learning_rate": 0.0001, |
| "loss": 1.6383, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.6136591898225644, |
| "grad_norm": 0.14161550998687744, |
| "learning_rate": 0.0001, |
| "loss": 1.5486, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.613993973886843, |
| "grad_norm": 0.12927186489105225, |
| "learning_rate": 0.0001, |
| "loss": 1.5166, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.6143287579511215, |
| "grad_norm": 0.1287536919116974, |
| "learning_rate": 0.0001, |
| "loss": 1.496, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.6146635420154001, |
| "grad_norm": 0.13734175264835358, |
| "learning_rate": 0.0001, |
| "loss": 1.5638, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.6149983260796786, |
| "grad_norm": 0.13784490525722504, |
| "learning_rate": 0.0001, |
| "loss": 1.593, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.6153331101439572, |
| "grad_norm": 0.1259312480688095, |
| "learning_rate": 0.0001, |
| "loss": 1.5208, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.6156678942082356, |
| "grad_norm": 0.15089771151542664, |
| "learning_rate": 0.0001, |
| "loss": 1.5251, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.6160026782725142, |
| "grad_norm": 0.14801523089408875, |
| "learning_rate": 0.0001, |
| "loss": 1.5706, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.6163374623367928, |
| "grad_norm": 0.1345253735780716, |
| "learning_rate": 0.0001, |
| "loss": 1.5695, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.6166722464010713, |
| "grad_norm": 0.15094773471355438, |
| "learning_rate": 0.0001, |
| "loss": 1.5744, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.6170070304653499, |
| "grad_norm": 0.13193759322166443, |
| "learning_rate": 0.0001, |
| "loss": 1.5345, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.6173418145296284, |
| "grad_norm": 0.12728765606880188, |
| "learning_rate": 0.0001, |
| "loss": 1.5026, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.6176765985939069, |
| "grad_norm": 0.14725570380687714, |
| "learning_rate": 0.0001, |
| "loss": 1.581, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.6180113826581854, |
| "grad_norm": 0.13824598491191864, |
| "learning_rate": 0.0001, |
| "loss": 1.5359, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.618346166722464, |
| "grad_norm": 0.12178414314985275, |
| "learning_rate": 0.0001, |
| "loss": 1.4936, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.6186809507867426, |
| "grad_norm": 0.156047984957695, |
| "learning_rate": 0.0001, |
| "loss": 1.5737, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.6190157348510211, |
| "grad_norm": 0.15707126259803772, |
| "learning_rate": 0.0001, |
| "loss": 1.6287, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.6193505189152997, |
| "grad_norm": 0.1378837376832962, |
| "learning_rate": 0.0001, |
| "loss": 1.616, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.6196853029795781, |
| "grad_norm": 0.1423729658126831, |
| "learning_rate": 0.0001, |
| "loss": 1.5409, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.6200200870438567, |
| "grad_norm": 0.16630493104457855, |
| "learning_rate": 0.0001, |
| "loss": 1.6264, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.6203548711081353, |
| "grad_norm": 0.13753686845302582, |
| "learning_rate": 0.0001, |
| "loss": 1.6104, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.6206896551724138, |
| "grad_norm": 0.13337332010269165, |
| "learning_rate": 0.0001, |
| "loss": 1.5104, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.6210244392366924, |
| "grad_norm": 0.14229977130889893, |
| "learning_rate": 0.0001, |
| "loss": 1.4228, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.6213592233009708, |
| "grad_norm": 0.1403966248035431, |
| "learning_rate": 0.0001, |
| "loss": 1.5623, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.6216940073652494, |
| "grad_norm": 0.12786665558815002, |
| "learning_rate": 0.0001, |
| "loss": 1.5058, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.6220287914295279, |
| "grad_norm": 0.14748771488666534, |
| "learning_rate": 0.0001, |
| "loss": 1.5004, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.6223635754938065, |
| "grad_norm": 0.14041772484779358, |
| "learning_rate": 0.0001, |
| "loss": 1.6154, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.6226983595580851, |
| "grad_norm": 0.1256851702928543, |
| "learning_rate": 0.0001, |
| "loss": 1.4634, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.6230331436223636, |
| "grad_norm": 0.12676502764225006, |
| "learning_rate": 0.0001, |
| "loss": 1.5163, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.6233679276866421, |
| "grad_norm": 0.14927968382835388, |
| "learning_rate": 0.0001, |
| "loss": 1.5686, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.6237027117509206, |
| "grad_norm": 0.1308298408985138, |
| "learning_rate": 0.0001, |
| "loss": 1.5032, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.6240374958151992, |
| "grad_norm": 0.13208165764808655, |
| "learning_rate": 0.0001, |
| "loss": 1.5519, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.6243722798794777, |
| "grad_norm": 0.13822416961193085, |
| "learning_rate": 0.0001, |
| "loss": 1.5664, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.6247070639437563, |
| "grad_norm": 0.13646993041038513, |
| "learning_rate": 0.0001, |
| "loss": 1.5361, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.6250418480080349, |
| "grad_norm": 0.1273556500673294, |
| "learning_rate": 0.0001, |
| "loss": 1.546, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.6253766320723133, |
| "grad_norm": 0.13555049896240234, |
| "learning_rate": 0.0001, |
| "loss": 1.5288, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.6257114161365919, |
| "grad_norm": 0.13126762211322784, |
| "learning_rate": 0.0001, |
| "loss": 1.4659, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.6260462002008704, |
| "grad_norm": 0.1348927766084671, |
| "learning_rate": 0.0001, |
| "loss": 1.5812, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.626380984265149, |
| "grad_norm": 0.1363980621099472, |
| "learning_rate": 0.0001, |
| "loss": 1.6506, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.6267157683294275, |
| "grad_norm": 0.13422980904579163, |
| "learning_rate": 0.0001, |
| "loss": 1.5298, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.6270505523937061, |
| "grad_norm": 0.12745925784111023, |
| "learning_rate": 0.0001, |
| "loss": 1.4898, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.6273853364579846, |
| "grad_norm": 0.1292264759540558, |
| "learning_rate": 0.0001, |
| "loss": 1.548, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.6277201205222631, |
| "grad_norm": 0.1412927806377411, |
| "learning_rate": 0.0001, |
| "loss": 1.5228, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.6280549045865417, |
| "grad_norm": 0.1328163594007492, |
| "learning_rate": 0.0001, |
| "loss": 1.5521, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.6283896886508202, |
| "grad_norm": 0.1258804351091385, |
| "learning_rate": 0.0001, |
| "loss": 1.4781, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.6287244727150988, |
| "grad_norm": 0.128944993019104, |
| "learning_rate": 0.0001, |
| "loss": 1.5123, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.6290592567793774, |
| "grad_norm": 0.1244087815284729, |
| "learning_rate": 0.0001, |
| "loss": 1.4386, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.6293940408436558, |
| "grad_norm": 0.12890097498893738, |
| "learning_rate": 0.0001, |
| "loss": 1.5266, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.6297288249079344, |
| "grad_norm": 0.1312391459941864, |
| "learning_rate": 0.0001, |
| "loss": 1.5395, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.6300636089722129, |
| "grad_norm": 0.13363149762153625, |
| "learning_rate": 0.0001, |
| "loss": 1.5721, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.6303983930364915, |
| "grad_norm": 0.13130998611450195, |
| "learning_rate": 0.0001, |
| "loss": 1.5542, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.63073317710077, |
| "grad_norm": 0.13050179183483124, |
| "learning_rate": 0.0001, |
| "loss": 1.5422, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.6310679611650486, |
| "grad_norm": 0.13548725843429565, |
| "learning_rate": 0.0001, |
| "loss": 1.5597, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.631402745229327, |
| "grad_norm": 0.13810521364212036, |
| "learning_rate": 0.0001, |
| "loss": 1.6428, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.6317375292936056, |
| "grad_norm": 0.12898769974708557, |
| "learning_rate": 0.0001, |
| "loss": 1.5091, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.6320723133578842, |
| "grad_norm": 0.13874949514865875, |
| "learning_rate": 0.0001, |
| "loss": 1.473, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.6324070974221627, |
| "grad_norm": 0.1275644749403, |
| "learning_rate": 0.0001, |
| "loss": 1.5844, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.6327418814864413, |
| "grad_norm": 0.13245896995067596, |
| "learning_rate": 0.0001, |
| "loss": 1.602, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.6330766655507197, |
| "grad_norm": 0.13937050104141235, |
| "learning_rate": 0.0001, |
| "loss": 1.6106, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.6334114496149983, |
| "grad_norm": 0.13569729030132294, |
| "learning_rate": 0.0001, |
| "loss": 1.523, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.6337462336792768, |
| "grad_norm": 0.1360468864440918, |
| "learning_rate": 0.0001, |
| "loss": 1.5032, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.6340810177435554, |
| "grad_norm": 0.12757538259029388, |
| "learning_rate": 0.0001, |
| "loss": 1.487, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.634415801807834, |
| "grad_norm": 0.13325755298137665, |
| "learning_rate": 0.0001, |
| "loss": 1.5386, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.6347505858721125, |
| "grad_norm": 0.1348341703414917, |
| "learning_rate": 0.0001, |
| "loss": 1.6195, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.635085369936391, |
| "grad_norm": 0.14284925162792206, |
| "learning_rate": 0.0001, |
| "loss": 1.636, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.6354201540006695, |
| "grad_norm": 0.12641146779060364, |
| "learning_rate": 0.0001, |
| "loss": 1.5172, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.6357549380649481, |
| "grad_norm": 0.1327671855688095, |
| "learning_rate": 0.0001, |
| "loss": 1.6519, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.6360897221292267, |
| "grad_norm": 0.13408274948596954, |
| "learning_rate": 0.0001, |
| "loss": 1.4722, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6364245061935052, |
| "grad_norm": 0.13136939704418182, |
| "learning_rate": 0.0001, |
| "loss": 1.56, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.6367592902577838, |
| "grad_norm": 0.13018733263015747, |
| "learning_rate": 0.0001, |
| "loss": 1.5499, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.6370940743220622, |
| "grad_norm": 0.137217178940773, |
| "learning_rate": 0.0001, |
| "loss": 1.6224, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.6374288583863408, |
| "grad_norm": 0.12886135280132294, |
| "learning_rate": 0.0001, |
| "loss": 1.5993, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.6377636424506193, |
| "grad_norm": 0.12878277897834778, |
| "learning_rate": 0.0001, |
| "loss": 1.4407, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.6380984265148979, |
| "grad_norm": 0.12817195057868958, |
| "learning_rate": 0.0001, |
| "loss": 1.5113, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.6384332105791765, |
| "grad_norm": 0.12779603898525238, |
| "learning_rate": 0.0001, |
| "loss": 1.573, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.638767994643455, |
| "grad_norm": 0.13575701415538788, |
| "learning_rate": 0.0001, |
| "loss": 1.5689, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.6391027787077335, |
| "grad_norm": 0.1292586326599121, |
| "learning_rate": 0.0001, |
| "loss": 1.5853, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.639437562772012, |
| "grad_norm": 0.13209429383277893, |
| "learning_rate": 0.0001, |
| "loss": 1.5374, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.6397723468362906, |
| "grad_norm": 0.13795161247253418, |
| "learning_rate": 0.0001, |
| "loss": 1.5752, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.6401071309005691, |
| "grad_norm": 0.13106195628643036, |
| "learning_rate": 0.0001, |
| "loss": 1.5074, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.6404419149648477, |
| "grad_norm": 0.1364029496908188, |
| "learning_rate": 0.0001, |
| "loss": 1.4415, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.6407766990291263, |
| "grad_norm": 0.13437704741954803, |
| "learning_rate": 0.0001, |
| "loss": 1.5179, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.6411114830934047, |
| "grad_norm": 0.12899838387966156, |
| "learning_rate": 0.0001, |
| "loss": 1.4437, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.6414462671576833, |
| "grad_norm": 0.1336640864610672, |
| "learning_rate": 0.0001, |
| "loss": 1.4988, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.6417810512219618, |
| "grad_norm": 0.13116469979286194, |
| "learning_rate": 0.0001, |
| "loss": 1.5944, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.6421158352862404, |
| "grad_norm": 0.1323315054178238, |
| "learning_rate": 0.0001, |
| "loss": 1.6378, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.642450619350519, |
| "grad_norm": 0.13012604415416718, |
| "learning_rate": 0.0001, |
| "loss": 1.591, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.6427854034147975, |
| "grad_norm": 0.13358043134212494, |
| "learning_rate": 0.0001, |
| "loss": 1.4948, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.643120187479076, |
| "grad_norm": 0.13027198612689972, |
| "learning_rate": 0.0001, |
| "loss": 1.5749, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.6434549715433545, |
| "grad_norm": 0.11880921572446823, |
| "learning_rate": 0.0001, |
| "loss": 1.434, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.6437897556076331, |
| "grad_norm": 0.1275249421596527, |
| "learning_rate": 0.0001, |
| "loss": 1.5074, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.6441245396719116, |
| "grad_norm": 0.13402846455574036, |
| "learning_rate": 0.0001, |
| "loss": 1.6019, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.6444593237361902, |
| "grad_norm": 0.1263839304447174, |
| "learning_rate": 0.0001, |
| "loss": 1.494, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.6447941078004688, |
| "grad_norm": 0.12889358401298523, |
| "learning_rate": 0.0001, |
| "loss": 1.4811, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.6451288918647472, |
| "grad_norm": 0.13030682504177094, |
| "learning_rate": 0.0001, |
| "loss": 1.5573, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.6454636759290258, |
| "grad_norm": 0.12815749645233154, |
| "learning_rate": 0.0001, |
| "loss": 1.5839, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.6457984599933043, |
| "grad_norm": 0.13763943314552307, |
| "learning_rate": 0.0001, |
| "loss": 1.4967, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.6461332440575829, |
| "grad_norm": 0.12890425324440002, |
| "learning_rate": 0.0001, |
| "loss": 1.4861, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.6464680281218614, |
| "grad_norm": 0.13768140971660614, |
| "learning_rate": 0.0001, |
| "loss": 1.5095, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.6468028121861399, |
| "grad_norm": 0.1268666833639145, |
| "learning_rate": 0.0001, |
| "loss": 1.5237, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.6471375962504184, |
| "grad_norm": 0.13325713574886322, |
| "learning_rate": 0.0001, |
| "loss": 1.593, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.647472380314697, |
| "grad_norm": 0.13848131895065308, |
| "learning_rate": 0.0001, |
| "loss": 1.4935, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.6478071643789756, |
| "grad_norm": 0.1393735706806183, |
| "learning_rate": 0.0001, |
| "loss": 1.6234, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.6481419484432541, |
| "grad_norm": 0.1441955864429474, |
| "learning_rate": 0.0001, |
| "loss": 1.6218, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.6484767325075327, |
| "grad_norm": 0.13111312687397003, |
| "learning_rate": 0.0001, |
| "loss": 1.5639, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.6488115165718111, |
| "grad_norm": 0.12940305471420288, |
| "learning_rate": 0.0001, |
| "loss": 1.5864, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.6491463006360897, |
| "grad_norm": 0.13657227158546448, |
| "learning_rate": 0.0001, |
| "loss": 1.5125, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.6494810847003682, |
| "grad_norm": 0.12390992790460587, |
| "learning_rate": 0.0001, |
| "loss": 1.4631, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.6498158687646468, |
| "grad_norm": 0.1316480040550232, |
| "learning_rate": 0.0001, |
| "loss": 1.5343, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.6501506528289254, |
| "grad_norm": 0.13427673280239105, |
| "learning_rate": 0.0001, |
| "loss": 1.5456, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.6504854368932039, |
| "grad_norm": 0.1284562200307846, |
| "learning_rate": 0.0001, |
| "loss": 1.5017, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.6508202209574824, |
| "grad_norm": 0.13431181013584137, |
| "learning_rate": 0.0001, |
| "loss": 1.45, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.6511550050217609, |
| "grad_norm": 0.13080428540706635, |
| "learning_rate": 0.0001, |
| "loss": 1.5035, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.6514897890860395, |
| "grad_norm": 0.13691136240959167, |
| "learning_rate": 0.0001, |
| "loss": 1.5145, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.651824573150318, |
| "grad_norm": 0.12990237772464752, |
| "learning_rate": 0.0001, |
| "loss": 1.5393, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.6521593572145966, |
| "grad_norm": 0.12529443204402924, |
| "learning_rate": 0.0001, |
| "loss": 1.468, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.6524941412788752, |
| "grad_norm": 0.13029485940933228, |
| "learning_rate": 0.0001, |
| "loss": 1.5229, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.6528289253431536, |
| "grad_norm": 0.13873140513896942, |
| "learning_rate": 0.0001, |
| "loss": 1.5667, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.6531637094074322, |
| "grad_norm": 0.13176368176937103, |
| "learning_rate": 0.0001, |
| "loss": 1.4231, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.6534984934717107, |
| "grad_norm": 0.13046538829803467, |
| "learning_rate": 0.0001, |
| "loss": 1.5151, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.6538332775359893, |
| "grad_norm": 0.1290617287158966, |
| "learning_rate": 0.0001, |
| "loss": 1.6184, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.6541680616002679, |
| "grad_norm": 0.13826888799667358, |
| "learning_rate": 0.0001, |
| "loss": 1.5597, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.6545028456645464, |
| "grad_norm": 0.1341448426246643, |
| "learning_rate": 0.0001, |
| "loss": 1.5763, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.6548376297288249, |
| "grad_norm": 0.1293526589870453, |
| "learning_rate": 0.0001, |
| "loss": 1.4475, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.6551724137931034, |
| "grad_norm": 0.12727828323841095, |
| "learning_rate": 0.0001, |
| "loss": 1.5195, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.655507197857382, |
| "grad_norm": 0.13981108367443085, |
| "learning_rate": 0.0001, |
| "loss": 1.6515, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.6558419819216605, |
| "grad_norm": 0.1339573711156845, |
| "learning_rate": 0.0001, |
| "loss": 1.4506, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.6561767659859391, |
| "grad_norm": 0.13203227519989014, |
| "learning_rate": 0.0001, |
| "loss": 1.5553, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.6565115500502177, |
| "grad_norm": 0.1276148110628128, |
| "learning_rate": 0.0001, |
| "loss": 1.5442, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.6568463341144961, |
| "grad_norm": 0.13206414878368378, |
| "learning_rate": 0.0001, |
| "loss": 1.4193, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.6571811181787747, |
| "grad_norm": 0.14616969227790833, |
| "learning_rate": 0.0001, |
| "loss": 1.6147, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.6575159022430532, |
| "grad_norm": 0.13604846596717834, |
| "learning_rate": 0.0001, |
| "loss": 1.5652, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.6578506863073318, |
| "grad_norm": 0.13196608424186707, |
| "learning_rate": 0.0001, |
| "loss": 1.565, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.6581854703716103, |
| "grad_norm": 0.14214178919792175, |
| "learning_rate": 0.0001, |
| "loss": 1.5692, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.6585202544358888, |
| "grad_norm": 0.1290048062801361, |
| "learning_rate": 0.0001, |
| "loss": 1.5004, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.6588550385001674, |
| "grad_norm": 0.13306178152561188, |
| "learning_rate": 0.0001, |
| "loss": 1.5913, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.6591898225644459, |
| "grad_norm": 0.1337195485830307, |
| "learning_rate": 0.0001, |
| "loss": 1.5888, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.6595246066287245, |
| "grad_norm": 0.1345224380493164, |
| "learning_rate": 0.0001, |
| "loss": 1.5513, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.659859390693003, |
| "grad_norm": 0.12885946035385132, |
| "learning_rate": 0.0001, |
| "loss": 1.4686, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.6601941747572816, |
| "grad_norm": 0.1352531760931015, |
| "learning_rate": 0.0001, |
| "loss": 1.5958, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.66052895882156, |
| "grad_norm": 0.12501929700374603, |
| "learning_rate": 0.0001, |
| "loss": 1.4162, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.6608637428858386, |
| "grad_norm": 0.1291869580745697, |
| "learning_rate": 0.0001, |
| "loss": 1.4463, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.6611985269501172, |
| "grad_norm": 0.14670369029045105, |
| "learning_rate": 0.0001, |
| "loss": 1.4661, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.6615333110143957, |
| "grad_norm": 0.13643884658813477, |
| "learning_rate": 0.0001, |
| "loss": 1.5677, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.6618680950786743, |
| "grad_norm": 0.13746634125709534, |
| "learning_rate": 0.0001, |
| "loss": 1.4903, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.6622028791429528, |
| "grad_norm": 0.14677157998085022, |
| "learning_rate": 0.0001, |
| "loss": 1.5492, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.6625376632072313, |
| "grad_norm": 0.1345069259405136, |
| "learning_rate": 0.0001, |
| "loss": 1.6059, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.6628724472715098, |
| "grad_norm": 0.13783417642116547, |
| "learning_rate": 0.0001, |
| "loss": 1.5546, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.6632072313357884, |
| "grad_norm": 0.13266097009181976, |
| "learning_rate": 0.0001, |
| "loss": 1.4469, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.663542015400067, |
| "grad_norm": 0.13931085169315338, |
| "learning_rate": 0.0001, |
| "loss": 1.5797, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.6638767994643455, |
| "grad_norm": 0.13039837777614594, |
| "learning_rate": 0.0001, |
| "loss": 1.4508, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.6642115835286241, |
| "grad_norm": 0.13921616971492767, |
| "learning_rate": 0.0001, |
| "loss": 1.6177, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.6645463675929025, |
| "grad_norm": 0.1381753534078598, |
| "learning_rate": 0.0001, |
| "loss": 1.6578, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.6648811516571811, |
| "grad_norm": 0.1361846625804901, |
| "learning_rate": 0.0001, |
| "loss": 1.5422, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.6652159357214597, |
| "grad_norm": 0.14170324802398682, |
| "learning_rate": 0.0001, |
| "loss": 1.6339, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.6655507197857382, |
| "grad_norm": 0.13164804875850677, |
| "learning_rate": 0.0001, |
| "loss": 1.5623, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.6658855038500168, |
| "grad_norm": 0.13766439259052277, |
| "learning_rate": 0.0001, |
| "loss": 1.5661, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.6662202879142953, |
| "grad_norm": 0.1340639889240265, |
| "learning_rate": 0.0001, |
| "loss": 1.6035, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.6665550719785738, |
| "grad_norm": 0.132024347782135, |
| "learning_rate": 0.0001, |
| "loss": 1.6319, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.6668898560428523, |
| "grad_norm": 0.13272161781787872, |
| "learning_rate": 0.0001, |
| "loss": 1.4522, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.6672246401071309, |
| "grad_norm": 0.14372223615646362, |
| "learning_rate": 0.0001, |
| "loss": 1.581, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.6675594241714095, |
| "grad_norm": 0.13869139552116394, |
| "learning_rate": 0.0001, |
| "loss": 1.6178, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.667894208235688, |
| "grad_norm": 0.12776124477386475, |
| "learning_rate": 0.0001, |
| "loss": 1.51, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.6682289922999666, |
| "grad_norm": 0.13583005964756012, |
| "learning_rate": 0.0001, |
| "loss": 1.5771, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.668563776364245, |
| "grad_norm": 0.13394635915756226, |
| "learning_rate": 0.0001, |
| "loss": 1.5605, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.6688985604285236, |
| "grad_norm": 0.13842739164829254, |
| "learning_rate": 0.0001, |
| "loss": 1.5541, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.6692333444928021, |
| "grad_norm": 0.13265378773212433, |
| "learning_rate": 0.0001, |
| "loss": 1.5772, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.6695681285570807, |
| "grad_norm": 0.13662943243980408, |
| "learning_rate": 0.0001, |
| "loss": 1.591, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6699029126213593, |
| "grad_norm": 0.12512929737567902, |
| "learning_rate": 0.0001, |
| "loss": 1.4162, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.6702376966856378, |
| "grad_norm": 0.1327543556690216, |
| "learning_rate": 0.0001, |
| "loss": 1.4978, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.6705724807499163, |
| "grad_norm": 0.13269194960594177, |
| "learning_rate": 0.0001, |
| "loss": 1.5998, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.6709072648141948, |
| "grad_norm": 0.14017336070537567, |
| "learning_rate": 0.0001, |
| "loss": 1.5785, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.6712420488784734, |
| "grad_norm": 0.1304367482662201, |
| "learning_rate": 0.0001, |
| "loss": 1.4781, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.671576832942752, |
| "grad_norm": 0.13442495465278625, |
| "learning_rate": 0.0001, |
| "loss": 1.5358, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.6719116170070305, |
| "grad_norm": 0.13490137457847595, |
| "learning_rate": 0.0001, |
| "loss": 1.6273, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.672246401071309, |
| "grad_norm": 0.1324394941329956, |
| "learning_rate": 0.0001, |
| "loss": 1.5884, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.6725811851355875, |
| "grad_norm": 0.12797103822231293, |
| "learning_rate": 0.0001, |
| "loss": 1.551, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.6729159691998661, |
| "grad_norm": 0.13374999165534973, |
| "learning_rate": 0.0001, |
| "loss": 1.5571, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.6732507532641446, |
| "grad_norm": 0.13020572066307068, |
| "learning_rate": 0.0001, |
| "loss": 1.4756, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.6735855373284232, |
| "grad_norm": 0.12501733005046844, |
| "learning_rate": 0.0001, |
| "loss": 1.5073, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.6739203213927017, |
| "grad_norm": 0.12433689087629318, |
| "learning_rate": 0.0001, |
| "loss": 1.4574, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.6742551054569802, |
| "grad_norm": 0.14026397466659546, |
| "learning_rate": 0.0001, |
| "loss": 1.5513, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.6745898895212588, |
| "grad_norm": 0.1340554803609848, |
| "learning_rate": 0.0001, |
| "loss": 1.5686, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.6749246735855373, |
| "grad_norm": 0.12796646356582642, |
| "learning_rate": 0.0001, |
| "loss": 1.4842, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.6752594576498159, |
| "grad_norm": 0.1362949162721634, |
| "learning_rate": 0.0001, |
| "loss": 1.5763, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.6755942417140944, |
| "grad_norm": 0.1347300410270691, |
| "learning_rate": 0.0001, |
| "loss": 1.5975, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.675929025778373, |
| "grad_norm": 0.13647662103176117, |
| "learning_rate": 0.0001, |
| "loss": 1.5395, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.6762638098426514, |
| "grad_norm": 0.13441947102546692, |
| "learning_rate": 0.0001, |
| "loss": 1.5726, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.67659859390693, |
| "grad_norm": 0.13435856997966766, |
| "learning_rate": 0.0001, |
| "loss": 1.6806, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.6769333779712086, |
| "grad_norm": 0.1239754781126976, |
| "learning_rate": 0.0001, |
| "loss": 1.4045, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.6772681620354871, |
| "grad_norm": 0.13493669033050537, |
| "learning_rate": 0.0001, |
| "loss": 1.5606, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.6776029460997657, |
| "grad_norm": 0.12938407063484192, |
| "learning_rate": 0.0001, |
| "loss": 1.5201, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.6779377301640442, |
| "grad_norm": 0.12213901430368423, |
| "learning_rate": 0.0001, |
| "loss": 1.4436, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.6782725142283227, |
| "grad_norm": 0.14107517898082733, |
| "learning_rate": 0.0001, |
| "loss": 1.5584, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.6786072982926012, |
| "grad_norm": 0.13082027435302734, |
| "learning_rate": 0.0001, |
| "loss": 1.5278, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.6789420823568798, |
| "grad_norm": 0.14623381197452545, |
| "learning_rate": 0.0001, |
| "loss": 1.668, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.6792768664211584, |
| "grad_norm": 0.12862159311771393, |
| "learning_rate": 0.0001, |
| "loss": 1.5534, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.6796116504854369, |
| "grad_norm": 0.13177117705345154, |
| "learning_rate": 0.0001, |
| "loss": 1.5564, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.6799464345497155, |
| "grad_norm": 0.12835298478603363, |
| "learning_rate": 0.0001, |
| "loss": 1.479, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.6802812186139939, |
| "grad_norm": 0.14096349477767944, |
| "learning_rate": 0.0001, |
| "loss": 1.6175, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.6806160026782725, |
| "grad_norm": 0.12646090984344482, |
| "learning_rate": 0.0001, |
| "loss": 1.4861, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.680950786742551, |
| "grad_norm": 0.137931689620018, |
| "learning_rate": 0.0001, |
| "loss": 1.5051, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.6812855708068296, |
| "grad_norm": 0.13240592181682587, |
| "learning_rate": 0.0001, |
| "loss": 1.5868, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.6816203548711082, |
| "grad_norm": 0.1362670511007309, |
| "learning_rate": 0.0001, |
| "loss": 1.5899, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.6819551389353867, |
| "grad_norm": 0.13148629665374756, |
| "learning_rate": 0.0001, |
| "loss": 1.521, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.6822899229996652, |
| "grad_norm": 0.13285885751247406, |
| "learning_rate": 0.0001, |
| "loss": 1.5122, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.6826247070639437, |
| "grad_norm": 0.1264655739068985, |
| "learning_rate": 0.0001, |
| "loss": 1.4886, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.6829594911282223, |
| "grad_norm": 0.12677529454231262, |
| "learning_rate": 0.0001, |
| "loss": 1.5068, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.6832942751925009, |
| "grad_norm": 0.13277101516723633, |
| "learning_rate": 0.0001, |
| "loss": 1.6065, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.6836290592567794, |
| "grad_norm": 0.13291488587856293, |
| "learning_rate": 0.0001, |
| "loss": 1.5755, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.6839638433210579, |
| "grad_norm": 0.13058260083198547, |
| "learning_rate": 0.0001, |
| "loss": 1.5286, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.6842986273853364, |
| "grad_norm": 0.13059435784816742, |
| "learning_rate": 0.0001, |
| "loss": 1.5803, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.684633411449615, |
| "grad_norm": 0.12917304039001465, |
| "learning_rate": 0.0001, |
| "loss": 1.576, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.6849681955138935, |
| "grad_norm": 0.12822791934013367, |
| "learning_rate": 0.0001, |
| "loss": 1.5201, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.6853029795781721, |
| "grad_norm": 0.14006927609443665, |
| "learning_rate": 0.0001, |
| "loss": 1.5445, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.6856377636424507, |
| "grad_norm": 0.13502942025661469, |
| "learning_rate": 0.0001, |
| "loss": 1.543, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.6859725477067291, |
| "grad_norm": 0.1351221352815628, |
| "learning_rate": 0.0001, |
| "loss": 1.5594, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.6863073317710077, |
| "grad_norm": 0.13474461436271667, |
| "learning_rate": 0.0001, |
| "loss": 1.5984, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.6866421158352862, |
| "grad_norm": 0.1317591369152069, |
| "learning_rate": 0.0001, |
| "loss": 1.5681, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.6869768998995648, |
| "grad_norm": 0.1300475299358368, |
| "learning_rate": 0.0001, |
| "loss": 1.5426, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.6873116839638433, |
| "grad_norm": 0.1308741718530655, |
| "learning_rate": 0.0001, |
| "loss": 1.5649, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.6876464680281219, |
| "grad_norm": 0.1339602768421173, |
| "learning_rate": 0.0001, |
| "loss": 1.5422, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.6879812520924004, |
| "grad_norm": 0.12556122243404388, |
| "learning_rate": 0.0001, |
| "loss": 1.3939, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.6883160361566789, |
| "grad_norm": 0.1331097036600113, |
| "learning_rate": 0.0001, |
| "loss": 1.5725, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.6886508202209575, |
| "grad_norm": 0.12769033014774323, |
| "learning_rate": 0.0001, |
| "loss": 1.5133, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.688985604285236, |
| "grad_norm": 0.13246020674705505, |
| "learning_rate": 0.0001, |
| "loss": 1.5533, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.6893203883495146, |
| "grad_norm": 0.13371361792087555, |
| "learning_rate": 0.0001, |
| "loss": 1.6253, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.6896551724137931, |
| "grad_norm": 0.1314792037010193, |
| "learning_rate": 0.0001, |
| "loss": 1.4943, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.6899899564780716, |
| "grad_norm": 0.13194666802883148, |
| "learning_rate": 0.0001, |
| "loss": 1.5983, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.6903247405423502, |
| "grad_norm": 0.13631388545036316, |
| "learning_rate": 0.0001, |
| "loss": 1.4932, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.6906595246066287, |
| "grad_norm": 0.1319463849067688, |
| "learning_rate": 0.0001, |
| "loss": 1.5848, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.6909943086709073, |
| "grad_norm": 0.14124637842178345, |
| "learning_rate": 0.0001, |
| "loss": 1.6066, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.6913290927351858, |
| "grad_norm": 0.12954577803611755, |
| "learning_rate": 0.0001, |
| "loss": 1.4153, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.6916638767994644, |
| "grad_norm": 0.1325748711824417, |
| "learning_rate": 0.0001, |
| "loss": 1.5766, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.6919986608637428, |
| "grad_norm": 0.13064290583133698, |
| "learning_rate": 0.0001, |
| "loss": 1.4995, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.6923334449280214, |
| "grad_norm": 0.1248745545744896, |
| "learning_rate": 0.0001, |
| "loss": 1.5077, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.6926682289923, |
| "grad_norm": 0.1278417706489563, |
| "learning_rate": 0.0001, |
| "loss": 1.5449, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.6930030130565785, |
| "grad_norm": 0.13311515748500824, |
| "learning_rate": 0.0001, |
| "loss": 1.5251, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.6933377971208571, |
| "grad_norm": 0.13218218088150024, |
| "learning_rate": 0.0001, |
| "loss": 1.5359, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.6936725811851356, |
| "grad_norm": 0.13042452931404114, |
| "learning_rate": 0.0001, |
| "loss": 1.5534, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.6940073652494141, |
| "grad_norm": 0.1393493264913559, |
| "learning_rate": 0.0001, |
| "loss": 1.594, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.6943421493136926, |
| "grad_norm": 0.1298573911190033, |
| "learning_rate": 0.0001, |
| "loss": 1.518, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.6946769333779712, |
| "grad_norm": 0.13325051963329315, |
| "learning_rate": 0.0001, |
| "loss": 1.5068, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.6950117174422498, |
| "grad_norm": 0.1269649714231491, |
| "learning_rate": 0.0001, |
| "loss": 1.4805, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.6953465015065283, |
| "grad_norm": 0.12699490785598755, |
| "learning_rate": 0.0001, |
| "loss": 1.4228, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.6956812855708069, |
| "grad_norm": 0.1379399597644806, |
| "learning_rate": 0.0001, |
| "loss": 1.4138, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.6960160696350853, |
| "grad_norm": 0.13343951106071472, |
| "learning_rate": 0.0001, |
| "loss": 1.5947, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.6963508536993639, |
| "grad_norm": 0.13461847603321075, |
| "learning_rate": 0.0001, |
| "loss": 1.5333, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.6966856377636425, |
| "grad_norm": 0.1299065202474594, |
| "learning_rate": 0.0001, |
| "loss": 1.5415, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.697020421827921, |
| "grad_norm": 0.1272873431444168, |
| "learning_rate": 0.0001, |
| "loss": 1.4443, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.6973552058921996, |
| "grad_norm": 0.136282280087471, |
| "learning_rate": 0.0001, |
| "loss": 1.4996, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.697689989956478, |
| "grad_norm": 0.12842769920825958, |
| "learning_rate": 0.0001, |
| "loss": 1.5574, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.6980247740207566, |
| "grad_norm": 0.12897315621376038, |
| "learning_rate": 0.0001, |
| "loss": 1.6162, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.6983595580850351, |
| "grad_norm": 0.13097885251045227, |
| "learning_rate": 0.0001, |
| "loss": 1.4949, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.6986943421493137, |
| "grad_norm": 0.13251438736915588, |
| "learning_rate": 0.0001, |
| "loss": 1.5041, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.6990291262135923, |
| "grad_norm": 0.1319066435098648, |
| "learning_rate": 0.0001, |
| "loss": 1.5499, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.6993639102778708, |
| "grad_norm": 0.13142657279968262, |
| "learning_rate": 0.0001, |
| "loss": 1.452, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.6996986943421493, |
| "grad_norm": 0.13348999619483948, |
| "learning_rate": 0.0001, |
| "loss": 1.4905, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.7000334784064278, |
| "grad_norm": 0.13037413358688354, |
| "learning_rate": 0.0001, |
| "loss": 1.4949, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.7003682624707064, |
| "grad_norm": 0.15308037400245667, |
| "learning_rate": 0.0001, |
| "loss": 1.6023, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.7007030465349849, |
| "grad_norm": 0.128286212682724, |
| "learning_rate": 0.0001, |
| "loss": 1.5298, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.7010378305992635, |
| "grad_norm": 0.13967067003250122, |
| "learning_rate": 0.0001, |
| "loss": 1.5577, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.7013726146635421, |
| "grad_norm": 0.13320837914943695, |
| "learning_rate": 0.0001, |
| "loss": 1.5923, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.7017073987278205, |
| "grad_norm": 0.12857401371002197, |
| "learning_rate": 0.0001, |
| "loss": 1.4623, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.7020421827920991, |
| "grad_norm": 0.12525291740894318, |
| "learning_rate": 0.0001, |
| "loss": 1.5126, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.7023769668563776, |
| "grad_norm": 0.1316770762205124, |
| "learning_rate": 0.0001, |
| "loss": 1.5433, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.7027117509206562, |
| "grad_norm": 0.1343490481376648, |
| "learning_rate": 0.0001, |
| "loss": 1.5085, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.7030465349849347, |
| "grad_norm": 0.12864871323108673, |
| "learning_rate": 0.0001, |
| "loss": 1.46, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7033813190492133, |
| "grad_norm": 0.13915804028511047, |
| "learning_rate": 0.0001, |
| "loss": 1.6961, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.7037161031134918, |
| "grad_norm": 0.12709419429302216, |
| "learning_rate": 0.0001, |
| "loss": 1.4931, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.7040508871777703, |
| "grad_norm": 0.1383008360862732, |
| "learning_rate": 0.0001, |
| "loss": 1.5925, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.7043856712420489, |
| "grad_norm": 0.1338641494512558, |
| "learning_rate": 0.0001, |
| "loss": 1.4715, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.7047204553063274, |
| "grad_norm": 0.12291635572910309, |
| "learning_rate": 0.0001, |
| "loss": 1.3746, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.705055239370606, |
| "grad_norm": 0.13391555845737457, |
| "learning_rate": 0.0001, |
| "loss": 1.627, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.7053900234348845, |
| "grad_norm": 0.13259120285511017, |
| "learning_rate": 0.0001, |
| "loss": 1.6069, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.705724807499163, |
| "grad_norm": 0.13009488582611084, |
| "learning_rate": 0.0001, |
| "loss": 1.534, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.7060595915634416, |
| "grad_norm": 0.12612484395503998, |
| "learning_rate": 0.0001, |
| "loss": 1.4612, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.7063943756277201, |
| "grad_norm": 0.12470883876085281, |
| "learning_rate": 0.0001, |
| "loss": 1.4388, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.7067291596919987, |
| "grad_norm": 0.13072682917118073, |
| "learning_rate": 0.0001, |
| "loss": 1.5083, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.7070639437562772, |
| "grad_norm": 0.13037820160388947, |
| "learning_rate": 0.0001, |
| "loss": 1.4514, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.7073987278205558, |
| "grad_norm": 0.1304703801870346, |
| "learning_rate": 0.0001, |
| "loss": 1.4644, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.7077335118848342, |
| "grad_norm": 0.1345730423927307, |
| "learning_rate": 0.0001, |
| "loss": 1.4849, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.7080682959491128, |
| "grad_norm": 0.14024527370929718, |
| "learning_rate": 0.0001, |
| "loss": 1.5851, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.7084030800133914, |
| "grad_norm": 0.13666972517967224, |
| "learning_rate": 0.0001, |
| "loss": 1.4858, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.7087378640776699, |
| "grad_norm": 0.13574914634227753, |
| "learning_rate": 0.0001, |
| "loss": 1.5258, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.7090726481419485, |
| "grad_norm": 0.1362755447626114, |
| "learning_rate": 0.0001, |
| "loss": 1.5592, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.7094074322062269, |
| "grad_norm": 0.12771886587142944, |
| "learning_rate": 0.0001, |
| "loss": 1.459, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.7097422162705055, |
| "grad_norm": 0.13762152194976807, |
| "learning_rate": 0.0001, |
| "loss": 1.5934, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.710077000334784, |
| "grad_norm": 0.13554149866104126, |
| "learning_rate": 0.0001, |
| "loss": 1.5728, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.7104117843990626, |
| "grad_norm": 0.1313951313495636, |
| "learning_rate": 0.0001, |
| "loss": 1.517, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.7107465684633412, |
| "grad_norm": 0.12920212745666504, |
| "learning_rate": 0.0001, |
| "loss": 1.4647, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.7110813525276197, |
| "grad_norm": 0.13671697676181793, |
| "learning_rate": 0.0001, |
| "loss": 1.4987, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.7114161365918982, |
| "grad_norm": 0.12860006093978882, |
| "learning_rate": 0.0001, |
| "loss": 1.5304, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.7117509206561767, |
| "grad_norm": 0.12372934073209763, |
| "learning_rate": 0.0001, |
| "loss": 1.4964, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.7120857047204553, |
| "grad_norm": 0.13640989363193512, |
| "learning_rate": 0.0001, |
| "loss": 1.521, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.7124204887847339, |
| "grad_norm": 0.13121746480464935, |
| "learning_rate": 0.0001, |
| "loss": 1.5118, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.7127552728490124, |
| "grad_norm": 0.1307837963104248, |
| "learning_rate": 0.0001, |
| "loss": 1.5688, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.713090056913291, |
| "grad_norm": 0.13141870498657227, |
| "learning_rate": 0.0001, |
| "loss": 1.5435, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.7134248409775694, |
| "grad_norm": 0.13490049540996552, |
| "learning_rate": 0.0001, |
| "loss": 1.5421, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.713759625041848, |
| "grad_norm": 0.13801416754722595, |
| "learning_rate": 0.0001, |
| "loss": 1.6097, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.7140944091061265, |
| "grad_norm": 0.13066011667251587, |
| "learning_rate": 0.0001, |
| "loss": 1.4629, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.7144291931704051, |
| "grad_norm": 0.13355465233325958, |
| "learning_rate": 0.0001, |
| "loss": 1.6363, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.7147639772346837, |
| "grad_norm": 0.12968328595161438, |
| "learning_rate": 0.0001, |
| "loss": 1.4454, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.7150987612989622, |
| "grad_norm": 0.14093713462352753, |
| "learning_rate": 0.0001, |
| "loss": 1.6115, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.7154335453632407, |
| "grad_norm": 0.13097916543483734, |
| "learning_rate": 0.0001, |
| "loss": 1.5531, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.7157683294275192, |
| "grad_norm": 0.1295294314622879, |
| "learning_rate": 0.0001, |
| "loss": 1.5923, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.7161031134917978, |
| "grad_norm": 0.13776849210262299, |
| "learning_rate": 0.0001, |
| "loss": 1.5992, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.7164378975560763, |
| "grad_norm": 0.13502860069274902, |
| "learning_rate": 0.0001, |
| "loss": 1.4677, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.7167726816203549, |
| "grad_norm": 0.13480490446090698, |
| "learning_rate": 0.0001, |
| "loss": 1.6244, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.7171074656846335, |
| "grad_norm": 0.13483154773712158, |
| "learning_rate": 0.0001, |
| "loss": 1.616, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.7174422497489119, |
| "grad_norm": 0.14340271055698395, |
| "learning_rate": 0.0001, |
| "loss": 1.6287, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.7177770338131905, |
| "grad_norm": 0.13620589673519135, |
| "learning_rate": 0.0001, |
| "loss": 1.5193, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.718111817877469, |
| "grad_norm": 0.13150522112846375, |
| "learning_rate": 0.0001, |
| "loss": 1.5038, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.7184466019417476, |
| "grad_norm": 0.13259613513946533, |
| "learning_rate": 0.0001, |
| "loss": 1.5666, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.7187813860060261, |
| "grad_norm": 0.1307973563671112, |
| "learning_rate": 0.0001, |
| "loss": 1.5762, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.7191161700703047, |
| "grad_norm": 0.13372613489627838, |
| "learning_rate": 0.0001, |
| "loss": 1.5352, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.7194509541345832, |
| "grad_norm": 0.13534867763519287, |
| "learning_rate": 0.0001, |
| "loss": 1.4652, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.7197857381988617, |
| "grad_norm": 0.1332571804523468, |
| "learning_rate": 0.0001, |
| "loss": 1.5532, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.7201205222631403, |
| "grad_norm": 0.13172098994255066, |
| "learning_rate": 0.0001, |
| "loss": 1.4728, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.7204553063274188, |
| "grad_norm": 0.12765897810459137, |
| "learning_rate": 0.0001, |
| "loss": 1.4597, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.7207900903916974, |
| "grad_norm": 0.13026951253414154, |
| "learning_rate": 0.0001, |
| "loss": 1.4877, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.721124874455976, |
| "grad_norm": 0.1389724761247635, |
| "learning_rate": 0.0001, |
| "loss": 1.5332, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.7214596585202544, |
| "grad_norm": 0.13382194936275482, |
| "learning_rate": 0.0001, |
| "loss": 1.5179, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.721794442584533, |
| "grad_norm": 0.12780801951885223, |
| "learning_rate": 0.0001, |
| "loss": 1.4393, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.7221292266488115, |
| "grad_norm": 0.1323569118976593, |
| "learning_rate": 0.0001, |
| "loss": 1.5528, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.7224640107130901, |
| "grad_norm": 0.1358579397201538, |
| "learning_rate": 0.0001, |
| "loss": 1.4996, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.7227987947773686, |
| "grad_norm": 0.13905704021453857, |
| "learning_rate": 0.0001, |
| "loss": 1.5979, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.7231335788416471, |
| "grad_norm": 0.1356305480003357, |
| "learning_rate": 0.0001, |
| "loss": 1.5851, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.7234683629059256, |
| "grad_norm": 0.13545480370521545, |
| "learning_rate": 0.0001, |
| "loss": 1.5622, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.7238031469702042, |
| "grad_norm": 0.13289092481136322, |
| "learning_rate": 0.0001, |
| "loss": 1.5253, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.7241379310344828, |
| "grad_norm": 0.130274698138237, |
| "learning_rate": 0.0001, |
| "loss": 1.4498, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.7244727150987613, |
| "grad_norm": 0.13009384274482727, |
| "learning_rate": 0.0001, |
| "loss": 1.5593, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.7248074991630399, |
| "grad_norm": 0.13778330385684967, |
| "learning_rate": 0.0001, |
| "loss": 1.4054, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.7251422832273183, |
| "grad_norm": 0.14639288187026978, |
| "learning_rate": 0.0001, |
| "loss": 1.5563, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.7254770672915969, |
| "grad_norm": 0.14019513130187988, |
| "learning_rate": 0.0001, |
| "loss": 1.6143, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.7258118513558754, |
| "grad_norm": 0.15255634486675262, |
| "learning_rate": 0.0001, |
| "loss": 1.4999, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.726146635420154, |
| "grad_norm": 0.133973628282547, |
| "learning_rate": 0.0001, |
| "loss": 1.5648, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.7264814194844326, |
| "grad_norm": 0.14227105677127838, |
| "learning_rate": 0.0001, |
| "loss": 1.5372, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.7268162035487111, |
| "grad_norm": 0.13694263994693756, |
| "learning_rate": 0.0001, |
| "loss": 1.5454, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.7271509876129896, |
| "grad_norm": 0.1395786851644516, |
| "learning_rate": 0.0001, |
| "loss": 1.6018, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.7274857716772681, |
| "grad_norm": 0.13695751130580902, |
| "learning_rate": 0.0001, |
| "loss": 1.5542, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.7278205557415467, |
| "grad_norm": 0.14114227890968323, |
| "learning_rate": 0.0001, |
| "loss": 1.4742, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.7281553398058253, |
| "grad_norm": 0.14633609354496002, |
| "learning_rate": 0.0001, |
| "loss": 1.5335, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.7284901238701038, |
| "grad_norm": 0.12929964065551758, |
| "learning_rate": 0.0001, |
| "loss": 1.4759, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.7288249079343824, |
| "grad_norm": 0.14383701980113983, |
| "learning_rate": 0.0001, |
| "loss": 1.5744, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.7291596919986608, |
| "grad_norm": 0.14609093964099884, |
| "learning_rate": 0.0001, |
| "loss": 1.4927, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.7294944760629394, |
| "grad_norm": 0.13813704252243042, |
| "learning_rate": 0.0001, |
| "loss": 1.535, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.7298292601272179, |
| "grad_norm": 0.13343721628189087, |
| "learning_rate": 0.0001, |
| "loss": 1.5239, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.7301640441914965, |
| "grad_norm": 0.13793961703777313, |
| "learning_rate": 0.0001, |
| "loss": 1.4959, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.7304988282557751, |
| "grad_norm": 0.14635740220546722, |
| "learning_rate": 0.0001, |
| "loss": 1.5759, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.7308336123200536, |
| "grad_norm": 0.13331273198127747, |
| "learning_rate": 0.0001, |
| "loss": 1.5169, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.7311683963843321, |
| "grad_norm": 0.13492250442504883, |
| "learning_rate": 0.0001, |
| "loss": 1.4711, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.7315031804486106, |
| "grad_norm": 0.14489556849002838, |
| "learning_rate": 0.0001, |
| "loss": 1.584, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.7318379645128892, |
| "grad_norm": 0.13701508939266205, |
| "learning_rate": 0.0001, |
| "loss": 1.5844, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.7321727485771677, |
| "grad_norm": 0.1370009034872055, |
| "learning_rate": 0.0001, |
| "loss": 1.5287, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.7325075326414463, |
| "grad_norm": 0.14577260613441467, |
| "learning_rate": 0.0001, |
| "loss": 1.4752, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.7328423167057249, |
| "grad_norm": 0.1377391368150711, |
| "learning_rate": 0.0001, |
| "loss": 1.5484, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.7331771007700033, |
| "grad_norm": 0.1396346390247345, |
| "learning_rate": 0.0001, |
| "loss": 1.5405, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.7335118848342819, |
| "grad_norm": 0.1492149382829666, |
| "learning_rate": 0.0001, |
| "loss": 1.5028, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.7338466688985604, |
| "grad_norm": 0.13928255438804626, |
| "learning_rate": 0.0001, |
| "loss": 1.6229, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.734181452962839, |
| "grad_norm": 0.13838155567646027, |
| "learning_rate": 0.0001, |
| "loss": 1.5661, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.7345162370271175, |
| "grad_norm": 0.1435183733701706, |
| "learning_rate": 0.0001, |
| "loss": 1.6133, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.734851021091396, |
| "grad_norm": 0.13500259816646576, |
| "learning_rate": 0.0001, |
| "loss": 1.5728, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.7351858051556746, |
| "grad_norm": 0.13238045573234558, |
| "learning_rate": 0.0001, |
| "loss": 1.5435, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.7355205892199531, |
| "grad_norm": 0.13493601977825165, |
| "learning_rate": 0.0001, |
| "loss": 1.5117, |
| "step": 2197 |
| }, |
| { |
| "epoch": 0.7358553732842317, |
| "grad_norm": 0.1433602273464203, |
| "learning_rate": 0.0001, |
| "loss": 1.5921, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.7361901573485102, |
| "grad_norm": 0.13165898621082306, |
| "learning_rate": 0.0001, |
| "loss": 1.5648, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.7365249414127888, |
| "grad_norm": 0.1355050653219223, |
| "learning_rate": 0.0001, |
| "loss": 1.5998, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7368597254770672, |
| "grad_norm": 0.1296299695968628, |
| "learning_rate": 0.0001, |
| "loss": 1.3903, |
| "step": 2201 |
| }, |
| { |
| "epoch": 0.7371945095413458, |
| "grad_norm": 0.13563255965709686, |
| "learning_rate": 0.0001, |
| "loss": 1.5462, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.7375292936056244, |
| "grad_norm": 0.13449116051197052, |
| "learning_rate": 0.0001, |
| "loss": 1.5344, |
| "step": 2203 |
| }, |
| { |
| "epoch": 0.7378640776699029, |
| "grad_norm": 0.12928107380867004, |
| "learning_rate": 0.0001, |
| "loss": 1.5212, |
| "step": 2204 |
| }, |
| { |
| "epoch": 0.7381988617341815, |
| "grad_norm": 0.13199785351753235, |
| "learning_rate": 0.0001, |
| "loss": 1.5408, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.73853364579846, |
| "grad_norm": 0.13608896732330322, |
| "learning_rate": 0.0001, |
| "loss": 1.6036, |
| "step": 2206 |
| }, |
| { |
| "epoch": 0.7388684298627385, |
| "grad_norm": 0.1248575821518898, |
| "learning_rate": 0.0001, |
| "loss": 1.4513, |
| "step": 2207 |
| }, |
| { |
| "epoch": 0.739203213927017, |
| "grad_norm": 0.1319798231124878, |
| "learning_rate": 0.0001, |
| "loss": 1.5231, |
| "step": 2208 |
| }, |
| { |
| "epoch": 0.7395379979912956, |
| "grad_norm": 0.1297694742679596, |
| "learning_rate": 0.0001, |
| "loss": 1.492, |
| "step": 2209 |
| }, |
| { |
| "epoch": 0.7398727820555742, |
| "grad_norm": 0.13263830542564392, |
| "learning_rate": 0.0001, |
| "loss": 1.5746, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.7402075661198527, |
| "grad_norm": 0.1352548599243164, |
| "learning_rate": 0.0001, |
| "loss": 1.567, |
| "step": 2211 |
| }, |
| { |
| "epoch": 0.7405423501841313, |
| "grad_norm": 0.13107185065746307, |
| "learning_rate": 0.0001, |
| "loss": 1.5053, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.7408771342484097, |
| "grad_norm": 0.13326485455036163, |
| "learning_rate": 0.0001, |
| "loss": 1.5838, |
| "step": 2213 |
| }, |
| { |
| "epoch": 0.7412119183126883, |
| "grad_norm": 0.14211507141590118, |
| "learning_rate": 0.0001, |
| "loss": 1.5694, |
| "step": 2214 |
| }, |
| { |
| "epoch": 0.7415467023769668, |
| "grad_norm": 0.13121196627616882, |
| "learning_rate": 0.0001, |
| "loss": 1.4977, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.7418814864412454, |
| "grad_norm": 0.13140466809272766, |
| "learning_rate": 0.0001, |
| "loss": 1.568, |
| "step": 2216 |
| }, |
| { |
| "epoch": 0.742216270505524, |
| "grad_norm": 0.1365407258272171, |
| "learning_rate": 0.0001, |
| "loss": 1.6667, |
| "step": 2217 |
| }, |
| { |
| "epoch": 0.7425510545698025, |
| "grad_norm": 0.13460293412208557, |
| "learning_rate": 0.0001, |
| "loss": 1.5813, |
| "step": 2218 |
| }, |
| { |
| "epoch": 0.742885838634081, |
| "grad_norm": 0.13729612529277802, |
| "learning_rate": 0.0001, |
| "loss": 1.5491, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.7432206226983595, |
| "grad_norm": 0.13383755087852478, |
| "learning_rate": 0.0001, |
| "loss": 1.5678, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.7435554067626381, |
| "grad_norm": 0.13744328916072845, |
| "learning_rate": 0.0001, |
| "loss": 1.5336, |
| "step": 2221 |
| }, |
| { |
| "epoch": 0.7438901908269167, |
| "grad_norm": 0.12934266030788422, |
| "learning_rate": 0.0001, |
| "loss": 1.5429, |
| "step": 2222 |
| }, |
| { |
| "epoch": 0.7442249748911952, |
| "grad_norm": 0.1308993250131607, |
| "learning_rate": 0.0001, |
| "loss": 1.5449, |
| "step": 2223 |
| }, |
| { |
| "epoch": 0.7445597589554738, |
| "grad_norm": 0.1382169872522354, |
| "learning_rate": 0.0001, |
| "loss": 1.6019, |
| "step": 2224 |
| }, |
| { |
| "epoch": 0.7448945430197522, |
| "grad_norm": 0.13184891641139984, |
| "learning_rate": 0.0001, |
| "loss": 1.5357, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.7452293270840308, |
| "grad_norm": 0.1404266655445099, |
| "learning_rate": 0.0001, |
| "loss": 1.5935, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.7455641111483093, |
| "grad_norm": 0.13625003397464752, |
| "learning_rate": 0.0001, |
| "loss": 1.5588, |
| "step": 2227 |
| }, |
| { |
| "epoch": 0.7458988952125879, |
| "grad_norm": 0.1287645548582077, |
| "learning_rate": 0.0001, |
| "loss": 1.435, |
| "step": 2228 |
| }, |
| { |
| "epoch": 0.7462336792768665, |
| "grad_norm": 0.13726918399333954, |
| "learning_rate": 0.0001, |
| "loss": 1.5453, |
| "step": 2229 |
| }, |
| { |
| "epoch": 0.746568463341145, |
| "grad_norm": 0.13299064338207245, |
| "learning_rate": 0.0001, |
| "loss": 1.4996, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.7469032474054235, |
| "grad_norm": 0.13553793728351593, |
| "learning_rate": 0.0001, |
| "loss": 1.5395, |
| "step": 2231 |
| }, |
| { |
| "epoch": 0.747238031469702, |
| "grad_norm": 0.13683359324932098, |
| "learning_rate": 0.0001, |
| "loss": 1.661, |
| "step": 2232 |
| }, |
| { |
| "epoch": 0.7475728155339806, |
| "grad_norm": 0.13002213835716248, |
| "learning_rate": 0.0001, |
| "loss": 1.5245, |
| "step": 2233 |
| }, |
| { |
| "epoch": 0.7479075995982591, |
| "grad_norm": 0.13479109108448029, |
| "learning_rate": 0.0001, |
| "loss": 1.5724, |
| "step": 2234 |
| }, |
| { |
| "epoch": 0.7482423836625377, |
| "grad_norm": 0.13677366077899933, |
| "learning_rate": 0.0001, |
| "loss": 1.6276, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.7485771677268162, |
| "grad_norm": 0.14970214664936066, |
| "learning_rate": 0.0001, |
| "loss": 1.6145, |
| "step": 2236 |
| }, |
| { |
| "epoch": 0.7489119517910947, |
| "grad_norm": 0.1285363882780075, |
| "learning_rate": 0.0001, |
| "loss": 1.4591, |
| "step": 2237 |
| }, |
| { |
| "epoch": 0.7492467358553733, |
| "grad_norm": 0.14044371247291565, |
| "learning_rate": 0.0001, |
| "loss": 1.511, |
| "step": 2238 |
| }, |
| { |
| "epoch": 0.7495815199196518, |
| "grad_norm": 0.13310682773590088, |
| "learning_rate": 0.0001, |
| "loss": 1.5777, |
| "step": 2239 |
| }, |
| { |
| "epoch": 0.7499163039839304, |
| "grad_norm": 0.14290130138397217, |
| "learning_rate": 0.0001, |
| "loss": 1.5075, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.750251088048209, |
| "grad_norm": 0.1509731411933899, |
| "learning_rate": 0.0001, |
| "loss": 1.6198, |
| "step": 2241 |
| }, |
| { |
| "epoch": 0.7505858721124874, |
| "grad_norm": 0.13322798907756805, |
| "learning_rate": 0.0001, |
| "loss": 1.5722, |
| "step": 2242 |
| }, |
| { |
| "epoch": 0.750920656176766, |
| "grad_norm": 0.1355818659067154, |
| "learning_rate": 0.0001, |
| "loss": 1.4922, |
| "step": 2243 |
| }, |
| { |
| "epoch": 0.7512554402410445, |
| "grad_norm": 0.14394080638885498, |
| "learning_rate": 0.0001, |
| "loss": 1.5976, |
| "step": 2244 |
| }, |
| { |
| "epoch": 0.7515902243053231, |
| "grad_norm": 0.135832279920578, |
| "learning_rate": 0.0001, |
| "loss": 1.5138, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.7519250083696016, |
| "grad_norm": 0.13906393945217133, |
| "learning_rate": 0.0001, |
| "loss": 1.5351, |
| "step": 2246 |
| }, |
| { |
| "epoch": 0.7522597924338802, |
| "grad_norm": 0.13090325891971588, |
| "learning_rate": 0.0001, |
| "loss": 1.4505, |
| "step": 2247 |
| }, |
| { |
| "epoch": 0.7525945764981586, |
| "grad_norm": 0.13537496328353882, |
| "learning_rate": 0.0001, |
| "loss": 1.4955, |
| "step": 2248 |
| }, |
| { |
| "epoch": 0.7529293605624372, |
| "grad_norm": 0.1373416930437088, |
| "learning_rate": 0.0001, |
| "loss": 1.541, |
| "step": 2249 |
| }, |
| { |
| "epoch": 0.7532641446267158, |
| "grad_norm": 0.1294248253107071, |
| "learning_rate": 0.0001, |
| "loss": 1.4943, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.7535989286909943, |
| "grad_norm": 0.12977437674999237, |
| "learning_rate": 0.0001, |
| "loss": 1.5315, |
| "step": 2251 |
| }, |
| { |
| "epoch": 0.7539337127552729, |
| "grad_norm": 0.13353915512561798, |
| "learning_rate": 0.0001, |
| "loss": 1.4855, |
| "step": 2252 |
| }, |
| { |
| "epoch": 0.7542684968195514, |
| "grad_norm": 0.1338808536529541, |
| "learning_rate": 0.0001, |
| "loss": 1.5483, |
| "step": 2253 |
| }, |
| { |
| "epoch": 0.7546032808838299, |
| "grad_norm": 0.13082879781723022, |
| "learning_rate": 0.0001, |
| "loss": 1.5276, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.7549380649481084, |
| "grad_norm": 0.12903323769569397, |
| "learning_rate": 0.0001, |
| "loss": 1.5506, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.755272849012387, |
| "grad_norm": 0.1312693953514099, |
| "learning_rate": 0.0001, |
| "loss": 1.4347, |
| "step": 2256 |
| }, |
| { |
| "epoch": 0.7556076330766656, |
| "grad_norm": 0.13503922522068024, |
| "learning_rate": 0.0001, |
| "loss": 1.5089, |
| "step": 2257 |
| }, |
| { |
| "epoch": 0.7559424171409441, |
| "grad_norm": 0.13478560745716095, |
| "learning_rate": 0.0001, |
| "loss": 1.4717, |
| "step": 2258 |
| }, |
| { |
| "epoch": 0.7562772012052227, |
| "grad_norm": 0.14111362397670746, |
| "learning_rate": 0.0001, |
| "loss": 1.4982, |
| "step": 2259 |
| }, |
| { |
| "epoch": 0.7566119852695011, |
| "grad_norm": 0.13715283572673798, |
| "learning_rate": 0.0001, |
| "loss": 1.5166, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.7569467693337797, |
| "grad_norm": 0.14457426965236664, |
| "learning_rate": 0.0001, |
| "loss": 1.6322, |
| "step": 2261 |
| }, |
| { |
| "epoch": 0.7572815533980582, |
| "grad_norm": 0.13212622702121735, |
| "learning_rate": 0.0001, |
| "loss": 1.4653, |
| "step": 2262 |
| }, |
| { |
| "epoch": 0.7576163374623368, |
| "grad_norm": 0.136484295129776, |
| "learning_rate": 0.0001, |
| "loss": 1.4416, |
| "step": 2263 |
| }, |
| { |
| "epoch": 0.7579511215266154, |
| "grad_norm": 0.13701216876506805, |
| "learning_rate": 0.0001, |
| "loss": 1.5158, |
| "step": 2264 |
| }, |
| { |
| "epoch": 0.7582859055908939, |
| "grad_norm": 0.13045822083950043, |
| "learning_rate": 0.0001, |
| "loss": 1.4805, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.7586206896551724, |
| "grad_norm": 0.13484729826450348, |
| "learning_rate": 0.0001, |
| "loss": 1.4919, |
| "step": 2266 |
| }, |
| { |
| "epoch": 0.7589554737194509, |
| "grad_norm": 0.1352708488702774, |
| "learning_rate": 0.0001, |
| "loss": 1.5632, |
| "step": 2267 |
| }, |
| { |
| "epoch": 0.7592902577837295, |
| "grad_norm": 0.13968177139759064, |
| "learning_rate": 0.0001, |
| "loss": 1.5983, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.759625041848008, |
| "grad_norm": 0.13527031242847443, |
| "learning_rate": 0.0001, |
| "loss": 1.5361, |
| "step": 2269 |
| }, |
| { |
| "epoch": 0.7599598259122866, |
| "grad_norm": 0.13342413306236267, |
| "learning_rate": 0.0001, |
| "loss": 1.5487, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.7602946099765651, |
| "grad_norm": 0.13037632405757904, |
| "learning_rate": 0.0001, |
| "loss": 1.4433, |
| "step": 2271 |
| }, |
| { |
| "epoch": 0.7606293940408436, |
| "grad_norm": 0.12888109683990479, |
| "learning_rate": 0.0001, |
| "loss": 1.5565, |
| "step": 2272 |
| }, |
| { |
| "epoch": 0.7609641781051222, |
| "grad_norm": 0.13160650432109833, |
| "learning_rate": 0.0001, |
| "loss": 1.6344, |
| "step": 2273 |
| }, |
| { |
| "epoch": 0.7612989621694007, |
| "grad_norm": 0.13456179201602936, |
| "learning_rate": 0.0001, |
| "loss": 1.5983, |
| "step": 2274 |
| }, |
| { |
| "epoch": 0.7616337462336793, |
| "grad_norm": 0.12624886631965637, |
| "learning_rate": 0.0001, |
| "loss": 1.4877, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.7619685302979579, |
| "grad_norm": 0.13493984937667847, |
| "learning_rate": 0.0001, |
| "loss": 1.6083, |
| "step": 2276 |
| }, |
| { |
| "epoch": 0.7623033143622363, |
| "grad_norm": 0.13616621494293213, |
| "learning_rate": 0.0001, |
| "loss": 1.59, |
| "step": 2277 |
| }, |
| { |
| "epoch": 0.7626380984265149, |
| "grad_norm": 0.1309913843870163, |
| "learning_rate": 0.0001, |
| "loss": 1.5356, |
| "step": 2278 |
| }, |
| { |
| "epoch": 0.7629728824907934, |
| "grad_norm": 0.1269841343164444, |
| "learning_rate": 0.0001, |
| "loss": 1.442, |
| "step": 2279 |
| }, |
| { |
| "epoch": 0.763307666555072, |
| "grad_norm": 0.13083530962467194, |
| "learning_rate": 0.0001, |
| "loss": 1.4919, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.7636424506193505, |
| "grad_norm": 0.13288795948028564, |
| "learning_rate": 0.0001, |
| "loss": 1.5919, |
| "step": 2281 |
| }, |
| { |
| "epoch": 0.7639772346836291, |
| "grad_norm": 0.1334894597530365, |
| "learning_rate": 0.0001, |
| "loss": 1.5203, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.7643120187479076, |
| "grad_norm": 0.1322222203016281, |
| "learning_rate": 0.0001, |
| "loss": 1.4987, |
| "step": 2283 |
| }, |
| { |
| "epoch": 0.7646468028121861, |
| "grad_norm": 0.13740068674087524, |
| "learning_rate": 0.0001, |
| "loss": 1.5966, |
| "step": 2284 |
| }, |
| { |
| "epoch": 0.7649815868764647, |
| "grad_norm": 0.13021446764469147, |
| "learning_rate": 0.0001, |
| "loss": 1.5163, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.7653163709407432, |
| "grad_norm": 0.13992641866207123, |
| "learning_rate": 0.0001, |
| "loss": 1.5116, |
| "step": 2286 |
| }, |
| { |
| "epoch": 0.7656511550050218, |
| "grad_norm": 0.13332848250865936, |
| "learning_rate": 0.0001, |
| "loss": 1.5066, |
| "step": 2287 |
| }, |
| { |
| "epoch": 0.7659859390693003, |
| "grad_norm": 0.12683235108852386, |
| "learning_rate": 0.0001, |
| "loss": 1.4933, |
| "step": 2288 |
| }, |
| { |
| "epoch": 0.7663207231335788, |
| "grad_norm": 0.13610418140888214, |
| "learning_rate": 0.0001, |
| "loss": 1.5115, |
| "step": 2289 |
| }, |
| { |
| "epoch": 0.7666555071978574, |
| "grad_norm": 0.13530276715755463, |
| "learning_rate": 0.0001, |
| "loss": 1.5899, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.7669902912621359, |
| "grad_norm": 0.13067664206027985, |
| "learning_rate": 0.0001, |
| "loss": 1.4806, |
| "step": 2291 |
| }, |
| { |
| "epoch": 0.7673250753264145, |
| "grad_norm": 0.12956401705741882, |
| "learning_rate": 0.0001, |
| "loss": 1.4432, |
| "step": 2292 |
| }, |
| { |
| "epoch": 0.767659859390693, |
| "grad_norm": 0.1368110626935959, |
| "learning_rate": 0.0001, |
| "loss": 1.5858, |
| "step": 2293 |
| }, |
| { |
| "epoch": 0.7679946434549716, |
| "grad_norm": 0.13342629373073578, |
| "learning_rate": 0.0001, |
| "loss": 1.4773, |
| "step": 2294 |
| }, |
| { |
| "epoch": 0.76832942751925, |
| "grad_norm": 0.13525448739528656, |
| "learning_rate": 0.0001, |
| "loss": 1.5574, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.7686642115835286, |
| "grad_norm": 0.14219002425670624, |
| "learning_rate": 0.0001, |
| "loss": 1.6207, |
| "step": 2296 |
| }, |
| { |
| "epoch": 0.7689989956478072, |
| "grad_norm": 0.13410523533821106, |
| "learning_rate": 0.0001, |
| "loss": 1.5414, |
| "step": 2297 |
| }, |
| { |
| "epoch": 0.7693337797120857, |
| "grad_norm": 0.1366255283355713, |
| "learning_rate": 0.0001, |
| "loss": 1.5588, |
| "step": 2298 |
| }, |
| { |
| "epoch": 0.7696685637763643, |
| "grad_norm": 0.14335733652114868, |
| "learning_rate": 0.0001, |
| "loss": 1.4797, |
| "step": 2299 |
| }, |
| { |
| "epoch": 0.7700033478406428, |
| "grad_norm": 0.13368913531303406, |
| "learning_rate": 0.0001, |
| "loss": 1.5068, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7703381319049213, |
| "grad_norm": 0.14045390486717224, |
| "learning_rate": 0.0001, |
| "loss": 1.5532, |
| "step": 2301 |
| }, |
| { |
| "epoch": 0.7706729159691998, |
| "grad_norm": 0.13820236921310425, |
| "learning_rate": 0.0001, |
| "loss": 1.4334, |
| "step": 2302 |
| }, |
| { |
| "epoch": 0.7710077000334784, |
| "grad_norm": 0.13486477732658386, |
| "learning_rate": 0.0001, |
| "loss": 1.6277, |
| "step": 2303 |
| }, |
| { |
| "epoch": 0.771342484097757, |
| "grad_norm": 0.1374381184577942, |
| "learning_rate": 0.0001, |
| "loss": 1.4995, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.7716772681620355, |
| "grad_norm": 0.14841946959495544, |
| "learning_rate": 0.0001, |
| "loss": 1.6044, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.7720120522263141, |
| "grad_norm": 0.13106206059455872, |
| "learning_rate": 0.0001, |
| "loss": 1.5009, |
| "step": 2306 |
| }, |
| { |
| "epoch": 0.7723468362905925, |
| "grad_norm": 0.13768276572227478, |
| "learning_rate": 0.0001, |
| "loss": 1.5289, |
| "step": 2307 |
| }, |
| { |
| "epoch": 0.7726816203548711, |
| "grad_norm": 0.14987289905548096, |
| "learning_rate": 0.0001, |
| "loss": 1.5654, |
| "step": 2308 |
| }, |
| { |
| "epoch": 0.7730164044191496, |
| "grad_norm": 0.13422365486621857, |
| "learning_rate": 0.0001, |
| "loss": 1.5781, |
| "step": 2309 |
| }, |
| { |
| "epoch": 0.7733511884834282, |
| "grad_norm": 0.14007548987865448, |
| "learning_rate": 0.0001, |
| "loss": 1.468, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.7736859725477068, |
| "grad_norm": 0.140237495303154, |
| "learning_rate": 0.0001, |
| "loss": 1.4408, |
| "step": 2311 |
| }, |
| { |
| "epoch": 0.7740207566119852, |
| "grad_norm": 0.1331593543291092, |
| "learning_rate": 0.0001, |
| "loss": 1.5213, |
| "step": 2312 |
| }, |
| { |
| "epoch": 0.7743555406762638, |
| "grad_norm": 0.13670580089092255, |
| "learning_rate": 0.0001, |
| "loss": 1.5034, |
| "step": 2313 |
| }, |
| { |
| "epoch": 0.7746903247405423, |
| "grad_norm": 0.13198411464691162, |
| "learning_rate": 0.0001, |
| "loss": 1.4633, |
| "step": 2314 |
| }, |
| { |
| "epoch": 0.7750251088048209, |
| "grad_norm": 0.14384810626506805, |
| "learning_rate": 0.0001, |
| "loss": 1.6254, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.7753598928690995, |
| "grad_norm": 0.12808088958263397, |
| "learning_rate": 0.0001, |
| "loss": 1.4751, |
| "step": 2316 |
| }, |
| { |
| "epoch": 0.775694676933378, |
| "grad_norm": 0.14130346477031708, |
| "learning_rate": 0.0001, |
| "loss": 1.5306, |
| "step": 2317 |
| }, |
| { |
| "epoch": 0.7760294609976565, |
| "grad_norm": 0.13153797388076782, |
| "learning_rate": 0.0001, |
| "loss": 1.5046, |
| "step": 2318 |
| }, |
| { |
| "epoch": 0.776364245061935, |
| "grad_norm": 0.13447383046150208, |
| "learning_rate": 0.0001, |
| "loss": 1.5288, |
| "step": 2319 |
| }, |
| { |
| "epoch": 0.7766990291262136, |
| "grad_norm": 0.13588428497314453, |
| "learning_rate": 0.0001, |
| "loss": 1.5792, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.7770338131904921, |
| "grad_norm": 0.1414654701948166, |
| "learning_rate": 0.0001, |
| "loss": 1.6252, |
| "step": 2321 |
| }, |
| { |
| "epoch": 0.7773685972547707, |
| "grad_norm": 0.14798319339752197, |
| "learning_rate": 0.0001, |
| "loss": 1.5182, |
| "step": 2322 |
| }, |
| { |
| "epoch": 0.7777033813190493, |
| "grad_norm": 0.13594651222229004, |
| "learning_rate": 0.0001, |
| "loss": 1.59, |
| "step": 2323 |
| }, |
| { |
| "epoch": 0.7780381653833277, |
| "grad_norm": 0.13689537346363068, |
| "learning_rate": 0.0001, |
| "loss": 1.5312, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.7783729494476063, |
| "grad_norm": 0.13842853903770447, |
| "learning_rate": 0.0001, |
| "loss": 1.5453, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.7787077335118848, |
| "grad_norm": 0.14006944000720978, |
| "learning_rate": 0.0001, |
| "loss": 1.5789, |
| "step": 2326 |
| }, |
| { |
| "epoch": 0.7790425175761634, |
| "grad_norm": 0.1328335702419281, |
| "learning_rate": 0.0001, |
| "loss": 1.5183, |
| "step": 2327 |
| }, |
| { |
| "epoch": 0.7793773016404419, |
| "grad_norm": 0.1366383582353592, |
| "learning_rate": 0.0001, |
| "loss": 1.5861, |
| "step": 2328 |
| }, |
| { |
| "epoch": 0.7797120857047205, |
| "grad_norm": 0.1384078413248062, |
| "learning_rate": 0.0001, |
| "loss": 1.4768, |
| "step": 2329 |
| }, |
| { |
| "epoch": 0.780046869768999, |
| "grad_norm": 0.13138563930988312, |
| "learning_rate": 0.0001, |
| "loss": 1.5415, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.7803816538332775, |
| "grad_norm": 0.13533802330493927, |
| "learning_rate": 0.0001, |
| "loss": 1.5351, |
| "step": 2331 |
| }, |
| { |
| "epoch": 0.7807164378975561, |
| "grad_norm": 0.12634359300136566, |
| "learning_rate": 0.0001, |
| "loss": 1.4854, |
| "step": 2332 |
| }, |
| { |
| "epoch": 0.7810512219618346, |
| "grad_norm": 0.14045196771621704, |
| "learning_rate": 0.0001, |
| "loss": 1.5979, |
| "step": 2333 |
| }, |
| { |
| "epoch": 0.7813860060261132, |
| "grad_norm": 0.12970393896102905, |
| "learning_rate": 0.0001, |
| "loss": 1.4883, |
| "step": 2334 |
| }, |
| { |
| "epoch": 0.7817207900903917, |
| "grad_norm": 0.13416926562786102, |
| "learning_rate": 0.0001, |
| "loss": 1.538, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.7820555741546702, |
| "grad_norm": 0.12993508577346802, |
| "learning_rate": 0.0001, |
| "loss": 1.3861, |
| "step": 2336 |
| }, |
| { |
| "epoch": 0.7823903582189488, |
| "grad_norm": 0.1441780924797058, |
| "learning_rate": 0.0001, |
| "loss": 1.5082, |
| "step": 2337 |
| }, |
| { |
| "epoch": 0.7827251422832273, |
| "grad_norm": 0.1340634673833847, |
| "learning_rate": 0.0001, |
| "loss": 1.5308, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.7830599263475059, |
| "grad_norm": 0.1375696063041687, |
| "learning_rate": 0.0001, |
| "loss": 1.4726, |
| "step": 2339 |
| }, |
| { |
| "epoch": 0.7833947104117844, |
| "grad_norm": 0.13143296539783478, |
| "learning_rate": 0.0001, |
| "loss": 1.5403, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.783729494476063, |
| "grad_norm": 0.14144007861614227, |
| "learning_rate": 0.0001, |
| "loss": 1.5596, |
| "step": 2341 |
| }, |
| { |
| "epoch": 0.7840642785403414, |
| "grad_norm": 0.1288491189479828, |
| "learning_rate": 0.0001, |
| "loss": 1.4793, |
| "step": 2342 |
| }, |
| { |
| "epoch": 0.78439906260462, |
| "grad_norm": 0.13762634992599487, |
| "learning_rate": 0.0001, |
| "loss": 1.5224, |
| "step": 2343 |
| }, |
| { |
| "epoch": 0.7847338466688986, |
| "grad_norm": 0.1369268000125885, |
| "learning_rate": 0.0001, |
| "loss": 1.5678, |
| "step": 2344 |
| }, |
| { |
| "epoch": 0.7850686307331771, |
| "grad_norm": 0.1348867565393448, |
| "learning_rate": 0.0001, |
| "loss": 1.5764, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.7854034147974557, |
| "grad_norm": 0.13499613106250763, |
| "learning_rate": 0.0001, |
| "loss": 1.5317, |
| "step": 2346 |
| }, |
| { |
| "epoch": 0.7857381988617341, |
| "grad_norm": 0.136494979262352, |
| "learning_rate": 0.0001, |
| "loss": 1.6178, |
| "step": 2347 |
| }, |
| { |
| "epoch": 0.7860729829260127, |
| "grad_norm": 0.13742174208164215, |
| "learning_rate": 0.0001, |
| "loss": 1.5524, |
| "step": 2348 |
| }, |
| { |
| "epoch": 0.7864077669902912, |
| "grad_norm": 0.1315702348947525, |
| "learning_rate": 0.0001, |
| "loss": 1.5199, |
| "step": 2349 |
| }, |
| { |
| "epoch": 0.7867425510545698, |
| "grad_norm": 0.1344085931777954, |
| "learning_rate": 0.0001, |
| "loss": 1.5222, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.7870773351188484, |
| "grad_norm": 0.1331881582736969, |
| "learning_rate": 0.0001, |
| "loss": 1.4746, |
| "step": 2351 |
| }, |
| { |
| "epoch": 0.7874121191831269, |
| "grad_norm": 0.13880756497383118, |
| "learning_rate": 0.0001, |
| "loss": 1.5027, |
| "step": 2352 |
| }, |
| { |
| "epoch": 0.7877469032474054, |
| "grad_norm": 0.1315576285123825, |
| "learning_rate": 0.0001, |
| "loss": 1.5833, |
| "step": 2353 |
| }, |
| { |
| "epoch": 0.7880816873116839, |
| "grad_norm": 0.1278029829263687, |
| "learning_rate": 0.0001, |
| "loss": 1.4475, |
| "step": 2354 |
| }, |
| { |
| "epoch": 0.7884164713759625, |
| "grad_norm": 0.14114075899124146, |
| "learning_rate": 0.0001, |
| "loss": 1.4451, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.788751255440241, |
| "grad_norm": 0.1352827101945877, |
| "learning_rate": 0.0001, |
| "loss": 1.4816, |
| "step": 2356 |
| }, |
| { |
| "epoch": 0.7890860395045196, |
| "grad_norm": 0.1316574364900589, |
| "learning_rate": 0.0001, |
| "loss": 1.4572, |
| "step": 2357 |
| }, |
| { |
| "epoch": 0.7894208235687982, |
| "grad_norm": 0.13792237639427185, |
| "learning_rate": 0.0001, |
| "loss": 1.6108, |
| "step": 2358 |
| }, |
| { |
| "epoch": 0.7897556076330766, |
| "grad_norm": 0.1365162879228592, |
| "learning_rate": 0.0001, |
| "loss": 1.5303, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.7900903916973552, |
| "grad_norm": 0.13918493688106537, |
| "learning_rate": 0.0001, |
| "loss": 1.6387, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.7904251757616337, |
| "grad_norm": 0.1277536302804947, |
| "learning_rate": 0.0001, |
| "loss": 1.5365, |
| "step": 2361 |
| }, |
| { |
| "epoch": 0.7907599598259123, |
| "grad_norm": 0.13407327234745026, |
| "learning_rate": 0.0001, |
| "loss": 1.4571, |
| "step": 2362 |
| }, |
| { |
| "epoch": 0.7910947438901909, |
| "grad_norm": 0.1346539407968521, |
| "learning_rate": 0.0001, |
| "loss": 1.4506, |
| "step": 2363 |
| }, |
| { |
| "epoch": 0.7914295279544694, |
| "grad_norm": 0.13160093128681183, |
| "learning_rate": 0.0001, |
| "loss": 1.4457, |
| "step": 2364 |
| }, |
| { |
| "epoch": 0.7917643120187479, |
| "grad_norm": 0.13025003671646118, |
| "learning_rate": 0.0001, |
| "loss": 1.56, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.7920990960830264, |
| "grad_norm": 0.14476409554481506, |
| "learning_rate": 0.0001, |
| "loss": 1.5876, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.792433880147305, |
| "grad_norm": 0.13053929805755615, |
| "learning_rate": 0.0001, |
| "loss": 1.4338, |
| "step": 2367 |
| }, |
| { |
| "epoch": 0.7927686642115835, |
| "grad_norm": 0.13872520625591278, |
| "learning_rate": 0.0001, |
| "loss": 1.6427, |
| "step": 2368 |
| }, |
| { |
| "epoch": 0.7931034482758621, |
| "grad_norm": 0.14061668515205383, |
| "learning_rate": 0.0001, |
| "loss": 1.4886, |
| "step": 2369 |
| }, |
| { |
| "epoch": 0.7934382323401407, |
| "grad_norm": 0.130232036113739, |
| "learning_rate": 0.0001, |
| "loss": 1.4023, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.7937730164044191, |
| "grad_norm": 0.23358748853206635, |
| "learning_rate": 0.0001, |
| "loss": 1.457, |
| "step": 2371 |
| }, |
| { |
| "epoch": 0.7941078004686977, |
| "grad_norm": 0.13233914971351624, |
| "learning_rate": 0.0001, |
| "loss": 1.4307, |
| "step": 2372 |
| }, |
| { |
| "epoch": 0.7944425845329762, |
| "grad_norm": 0.13504283130168915, |
| "learning_rate": 0.0001, |
| "loss": 1.5976, |
| "step": 2373 |
| }, |
| { |
| "epoch": 0.7947773685972548, |
| "grad_norm": 0.13976161181926727, |
| "learning_rate": 0.0001, |
| "loss": 1.6455, |
| "step": 2374 |
| }, |
| { |
| "epoch": 0.7951121526615333, |
| "grad_norm": 0.1336098313331604, |
| "learning_rate": 0.0001, |
| "loss": 1.4469, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.7954469367258119, |
| "grad_norm": 0.13648861646652222, |
| "learning_rate": 0.0001, |
| "loss": 1.4964, |
| "step": 2376 |
| }, |
| { |
| "epoch": 0.7957817207900904, |
| "grad_norm": 0.13627798855304718, |
| "learning_rate": 0.0001, |
| "loss": 1.5834, |
| "step": 2377 |
| }, |
| { |
| "epoch": 0.7961165048543689, |
| "grad_norm": 0.14114542305469513, |
| "learning_rate": 0.0001, |
| "loss": 1.5566, |
| "step": 2378 |
| }, |
| { |
| "epoch": 0.7964512889186475, |
| "grad_norm": 0.13499446213245392, |
| "learning_rate": 0.0001, |
| "loss": 1.5174, |
| "step": 2379 |
| }, |
| { |
| "epoch": 0.796786072982926, |
| "grad_norm": 0.14620280265808105, |
| "learning_rate": 0.0001, |
| "loss": 1.6778, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.7971208570472046, |
| "grad_norm": 0.13239939510822296, |
| "learning_rate": 0.0001, |
| "loss": 1.5274, |
| "step": 2381 |
| }, |
| { |
| "epoch": 0.7974556411114831, |
| "grad_norm": 0.13517913222312927, |
| "learning_rate": 0.0001, |
| "loss": 1.5291, |
| "step": 2382 |
| }, |
| { |
| "epoch": 0.7977904251757616, |
| "grad_norm": 0.1352391242980957, |
| "learning_rate": 0.0001, |
| "loss": 1.5285, |
| "step": 2383 |
| }, |
| { |
| "epoch": 0.7981252092400402, |
| "grad_norm": 0.14000670611858368, |
| "learning_rate": 0.0001, |
| "loss": 1.6194, |
| "step": 2384 |
| }, |
| { |
| "epoch": 0.7984599933043187, |
| "grad_norm": 0.1349296271800995, |
| "learning_rate": 0.0001, |
| "loss": 1.5001, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.7987947773685973, |
| "grad_norm": 0.1352308988571167, |
| "learning_rate": 0.0001, |
| "loss": 1.6213, |
| "step": 2386 |
| }, |
| { |
| "epoch": 0.7991295614328758, |
| "grad_norm": 0.1368694305419922, |
| "learning_rate": 0.0001, |
| "loss": 1.5861, |
| "step": 2387 |
| }, |
| { |
| "epoch": 0.7994643454971543, |
| "grad_norm": 0.1355554759502411, |
| "learning_rate": 0.0001, |
| "loss": 1.5377, |
| "step": 2388 |
| }, |
| { |
| "epoch": 0.7997991295614328, |
| "grad_norm": 0.13328254222869873, |
| "learning_rate": 0.0001, |
| "loss": 1.5517, |
| "step": 2389 |
| }, |
| { |
| "epoch": 0.8001339136257114, |
| "grad_norm": 0.13724930584430695, |
| "learning_rate": 0.0001, |
| "loss": 1.5987, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.80046869768999, |
| "grad_norm": 0.13542616367340088, |
| "learning_rate": 0.0001, |
| "loss": 1.6654, |
| "step": 2391 |
| }, |
| { |
| "epoch": 0.8008034817542685, |
| "grad_norm": 0.1366943120956421, |
| "learning_rate": 0.0001, |
| "loss": 1.6196, |
| "step": 2392 |
| }, |
| { |
| "epoch": 0.8011382658185471, |
| "grad_norm": 0.13868063688278198, |
| "learning_rate": 0.0001, |
| "loss": 1.587, |
| "step": 2393 |
| }, |
| { |
| "epoch": 0.8014730498828255, |
| "grad_norm": 0.1393207311630249, |
| "learning_rate": 0.0001, |
| "loss": 1.5559, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.8018078339471041, |
| "grad_norm": 0.13909262418746948, |
| "learning_rate": 0.0001, |
| "loss": 1.5007, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.8021426180113826, |
| "grad_norm": 0.12949267029762268, |
| "learning_rate": 0.0001, |
| "loss": 1.5108, |
| "step": 2396 |
| }, |
| { |
| "epoch": 0.8024774020756612, |
| "grad_norm": 0.12755730748176575, |
| "learning_rate": 0.0001, |
| "loss": 1.5008, |
| "step": 2397 |
| }, |
| { |
| "epoch": 0.8028121861399398, |
| "grad_norm": 0.12899887561798096, |
| "learning_rate": 0.0001, |
| "loss": 1.3877, |
| "step": 2398 |
| }, |
| { |
| "epoch": 0.8031469702042183, |
| "grad_norm": 0.1423116773366928, |
| "learning_rate": 0.0001, |
| "loss": 1.4996, |
| "step": 2399 |
| }, |
| { |
| "epoch": 0.8034817542684968, |
| "grad_norm": 0.13548225164413452, |
| "learning_rate": 0.0001, |
| "loss": 1.5214, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8038165383327753, |
| "grad_norm": 0.13150808215141296, |
| "learning_rate": 0.0001, |
| "loss": 1.4772, |
| "step": 2401 |
| }, |
| { |
| "epoch": 0.8041513223970539, |
| "grad_norm": 0.13790038228034973, |
| "learning_rate": 0.0001, |
| "loss": 1.5704, |
| "step": 2402 |
| }, |
| { |
| "epoch": 0.8044861064613325, |
| "grad_norm": 0.13106264173984528, |
| "learning_rate": 0.0001, |
| "loss": 1.5073, |
| "step": 2403 |
| }, |
| { |
| "epoch": 0.804820890525611, |
| "grad_norm": 0.13568797707557678, |
| "learning_rate": 0.0001, |
| "loss": 1.6371, |
| "step": 2404 |
| }, |
| { |
| "epoch": 0.8051556745898896, |
| "grad_norm": 0.13882842659950256, |
| "learning_rate": 0.0001, |
| "loss": 1.5571, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.805490458654168, |
| "grad_norm": 0.1312180459499359, |
| "learning_rate": 0.0001, |
| "loss": 1.5625, |
| "step": 2406 |
| }, |
| { |
| "epoch": 0.8058252427184466, |
| "grad_norm": 0.12823453545570374, |
| "learning_rate": 0.0001, |
| "loss": 1.5046, |
| "step": 2407 |
| }, |
| { |
| "epoch": 0.8061600267827251, |
| "grad_norm": 0.13207179307937622, |
| "learning_rate": 0.0001, |
| "loss": 1.5031, |
| "step": 2408 |
| }, |
| { |
| "epoch": 0.8064948108470037, |
| "grad_norm": 0.1277305632829666, |
| "learning_rate": 0.0001, |
| "loss": 1.4867, |
| "step": 2409 |
| }, |
| { |
| "epoch": 0.8068295949112823, |
| "grad_norm": 0.13227322697639465, |
| "learning_rate": 0.0001, |
| "loss": 1.5019, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.8071643789755608, |
| "grad_norm": 0.1336304098367691, |
| "learning_rate": 0.0001, |
| "loss": 1.4424, |
| "step": 2411 |
| }, |
| { |
| "epoch": 0.8074991630398393, |
| "grad_norm": 0.13859078288078308, |
| "learning_rate": 0.0001, |
| "loss": 1.5301, |
| "step": 2412 |
| }, |
| { |
| "epoch": 0.8078339471041178, |
| "grad_norm": 0.1342136412858963, |
| "learning_rate": 0.0001, |
| "loss": 1.485, |
| "step": 2413 |
| }, |
| { |
| "epoch": 0.8081687311683964, |
| "grad_norm": 0.14003999531269073, |
| "learning_rate": 0.0001, |
| "loss": 1.5313, |
| "step": 2414 |
| }, |
| { |
| "epoch": 0.8085035152326749, |
| "grad_norm": 0.13216662406921387, |
| "learning_rate": 0.0001, |
| "loss": 1.52, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.8088382992969535, |
| "grad_norm": 0.1373407393693924, |
| "learning_rate": 0.0001, |
| "loss": 1.5157, |
| "step": 2416 |
| }, |
| { |
| "epoch": 0.8091730833612321, |
| "grad_norm": 0.13850343227386475, |
| "learning_rate": 0.0001, |
| "loss": 1.4971, |
| "step": 2417 |
| }, |
| { |
| "epoch": 0.8095078674255105, |
| "grad_norm": 0.1334608793258667, |
| "learning_rate": 0.0001, |
| "loss": 1.5237, |
| "step": 2418 |
| }, |
| { |
| "epoch": 0.8098426514897891, |
| "grad_norm": 0.13133668899536133, |
| "learning_rate": 0.0001, |
| "loss": 1.5053, |
| "step": 2419 |
| }, |
| { |
| "epoch": 0.8101774355540676, |
| "grad_norm": 0.13715368509292603, |
| "learning_rate": 0.0001, |
| "loss": 1.6357, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.8105122196183462, |
| "grad_norm": 0.14129430055618286, |
| "learning_rate": 0.0001, |
| "loss": 1.5736, |
| "step": 2421 |
| }, |
| { |
| "epoch": 0.8108470036826247, |
| "grad_norm": 0.133287250995636, |
| "learning_rate": 0.0001, |
| "loss": 1.4701, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.8111817877469032, |
| "grad_norm": 0.137081116437912, |
| "learning_rate": 0.0001, |
| "loss": 1.4562, |
| "step": 2423 |
| }, |
| { |
| "epoch": 0.8115165718111818, |
| "grad_norm": 0.13136571645736694, |
| "learning_rate": 0.0001, |
| "loss": 1.5014, |
| "step": 2424 |
| }, |
| { |
| "epoch": 0.8118513558754603, |
| "grad_norm": 0.13660964369773865, |
| "learning_rate": 0.0001, |
| "loss": 1.5533, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.8121861399397389, |
| "grad_norm": 0.145840123295784, |
| "learning_rate": 0.0001, |
| "loss": 1.6406, |
| "step": 2426 |
| }, |
| { |
| "epoch": 0.8125209240040174, |
| "grad_norm": 0.13612517714500427, |
| "learning_rate": 0.0001, |
| "loss": 1.4968, |
| "step": 2427 |
| }, |
| { |
| "epoch": 0.812855708068296, |
| "grad_norm": 0.14182846248149872, |
| "learning_rate": 0.0001, |
| "loss": 1.5507, |
| "step": 2428 |
| }, |
| { |
| "epoch": 0.8131904921325744, |
| "grad_norm": 0.13697752356529236, |
| "learning_rate": 0.0001, |
| "loss": 1.5241, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.813525276196853, |
| "grad_norm": 0.14000248908996582, |
| "learning_rate": 0.0001, |
| "loss": 1.6002, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.8138600602611316, |
| "grad_norm": 0.13774293661117554, |
| "learning_rate": 0.0001, |
| "loss": 1.5198, |
| "step": 2431 |
| }, |
| { |
| "epoch": 0.8141948443254101, |
| "grad_norm": 0.13524143397808075, |
| "learning_rate": 0.0001, |
| "loss": 1.5326, |
| "step": 2432 |
| }, |
| { |
| "epoch": 0.8145296283896887, |
| "grad_norm": 0.13584178686141968, |
| "learning_rate": 0.0001, |
| "loss": 1.5313, |
| "step": 2433 |
| }, |
| { |
| "epoch": 0.8148644124539672, |
| "grad_norm": 0.13589173555374146, |
| "learning_rate": 0.0001, |
| "loss": 1.5097, |
| "step": 2434 |
| }, |
| { |
| "epoch": 0.8151991965182457, |
| "grad_norm": 0.1420723795890808, |
| "learning_rate": 0.0001, |
| "loss": 1.593, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.8155339805825242, |
| "grad_norm": 0.13078542053699493, |
| "learning_rate": 0.0001, |
| "loss": 1.4239, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.8158687646468028, |
| "grad_norm": 0.14007273316383362, |
| "learning_rate": 0.0001, |
| "loss": 1.5912, |
| "step": 2437 |
| }, |
| { |
| "epoch": 0.8162035487110814, |
| "grad_norm": 0.13472947478294373, |
| "learning_rate": 0.0001, |
| "loss": 1.5146, |
| "step": 2438 |
| }, |
| { |
| "epoch": 0.8165383327753599, |
| "grad_norm": 0.13456539809703827, |
| "learning_rate": 0.0001, |
| "loss": 1.5277, |
| "step": 2439 |
| }, |
| { |
| "epoch": 0.8168731168396385, |
| "grad_norm": 0.13376279175281525, |
| "learning_rate": 0.0001, |
| "loss": 1.4554, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.8172079009039169, |
| "grad_norm": 0.13720721006393433, |
| "learning_rate": 0.0001, |
| "loss": 1.5463, |
| "step": 2441 |
| }, |
| { |
| "epoch": 0.8175426849681955, |
| "grad_norm": 0.1363624483346939, |
| "learning_rate": 0.0001, |
| "loss": 1.537, |
| "step": 2442 |
| }, |
| { |
| "epoch": 0.817877469032474, |
| "grad_norm": 0.13379956781864166, |
| "learning_rate": 0.0001, |
| "loss": 1.5831, |
| "step": 2443 |
| }, |
| { |
| "epoch": 0.8182122530967526, |
| "grad_norm": 0.13432839512825012, |
| "learning_rate": 0.0001, |
| "loss": 1.5511, |
| "step": 2444 |
| }, |
| { |
| "epoch": 0.8185470371610312, |
| "grad_norm": 0.1365717500448227, |
| "learning_rate": 0.0001, |
| "loss": 1.4519, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.8188818212253097, |
| "grad_norm": 0.13430190086364746, |
| "learning_rate": 0.0001, |
| "loss": 1.4878, |
| "step": 2446 |
| }, |
| { |
| "epoch": 0.8192166052895882, |
| "grad_norm": 0.13606110215187073, |
| "learning_rate": 0.0001, |
| "loss": 1.5585, |
| "step": 2447 |
| }, |
| { |
| "epoch": 0.8195513893538667, |
| "grad_norm": 0.13404667377471924, |
| "learning_rate": 0.0001, |
| "loss": 1.5156, |
| "step": 2448 |
| }, |
| { |
| "epoch": 0.8198861734181453, |
| "grad_norm": 0.14223212003707886, |
| "learning_rate": 0.0001, |
| "loss": 1.5904, |
| "step": 2449 |
| }, |
| { |
| "epoch": 0.8202209574824239, |
| "grad_norm": 0.13209384679794312, |
| "learning_rate": 0.0001, |
| "loss": 1.551, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.8205557415467024, |
| "grad_norm": 0.13522854447364807, |
| "learning_rate": 0.0001, |
| "loss": 1.5325, |
| "step": 2451 |
| }, |
| { |
| "epoch": 0.820890525610981, |
| "grad_norm": 0.13555531203746796, |
| "learning_rate": 0.0001, |
| "loss": 1.5327, |
| "step": 2452 |
| }, |
| { |
| "epoch": 0.8212253096752594, |
| "grad_norm": 0.13121196627616882, |
| "learning_rate": 0.0001, |
| "loss": 1.5208, |
| "step": 2453 |
| }, |
| { |
| "epoch": 0.821560093739538, |
| "grad_norm": 0.13988123834133148, |
| "learning_rate": 0.0001, |
| "loss": 1.6188, |
| "step": 2454 |
| }, |
| { |
| "epoch": 0.8218948778038165, |
| "grad_norm": 0.1347675770521164, |
| "learning_rate": 0.0001, |
| "loss": 1.5212, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.8222296618680951, |
| "grad_norm": 0.13975632190704346, |
| "learning_rate": 0.0001, |
| "loss": 1.6152, |
| "step": 2456 |
| }, |
| { |
| "epoch": 0.8225644459323737, |
| "grad_norm": 0.1271917223930359, |
| "learning_rate": 0.0001, |
| "loss": 1.4209, |
| "step": 2457 |
| }, |
| { |
| "epoch": 0.8228992299966521, |
| "grad_norm": 0.13226144015789032, |
| "learning_rate": 0.0001, |
| "loss": 1.5397, |
| "step": 2458 |
| }, |
| { |
| "epoch": 0.8232340140609307, |
| "grad_norm": 0.1391698569059372, |
| "learning_rate": 0.0001, |
| "loss": 1.5394, |
| "step": 2459 |
| }, |
| { |
| "epoch": 0.8235687981252092, |
| "grad_norm": 0.13757720589637756, |
| "learning_rate": 0.0001, |
| "loss": 1.5465, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.8239035821894878, |
| "grad_norm": 0.13116374611854553, |
| "learning_rate": 0.0001, |
| "loss": 1.5072, |
| "step": 2461 |
| }, |
| { |
| "epoch": 0.8242383662537663, |
| "grad_norm": 0.13408921658992767, |
| "learning_rate": 0.0001, |
| "loss": 1.5398, |
| "step": 2462 |
| }, |
| { |
| "epoch": 0.8245731503180449, |
| "grad_norm": 0.13682673871517181, |
| "learning_rate": 0.0001, |
| "loss": 1.574, |
| "step": 2463 |
| }, |
| { |
| "epoch": 0.8249079343823233, |
| "grad_norm": 0.12918630242347717, |
| "learning_rate": 0.0001, |
| "loss": 1.4619, |
| "step": 2464 |
| }, |
| { |
| "epoch": 0.8252427184466019, |
| "grad_norm": 0.14337001740932465, |
| "learning_rate": 0.0001, |
| "loss": 1.5494, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.8255775025108805, |
| "grad_norm": 0.13083745539188385, |
| "learning_rate": 0.0001, |
| "loss": 1.4594, |
| "step": 2466 |
| }, |
| { |
| "epoch": 0.825912286575159, |
| "grad_norm": 0.13452093303203583, |
| "learning_rate": 0.0001, |
| "loss": 1.5114, |
| "step": 2467 |
| }, |
| { |
| "epoch": 0.8262470706394376, |
| "grad_norm": 0.1375538408756256, |
| "learning_rate": 0.0001, |
| "loss": 1.5472, |
| "step": 2468 |
| }, |
| { |
| "epoch": 0.8265818547037161, |
| "grad_norm": 0.13618512451648712, |
| "learning_rate": 0.0001, |
| "loss": 1.5067, |
| "step": 2469 |
| }, |
| { |
| "epoch": 0.8269166387679946, |
| "grad_norm": 0.13334475457668304, |
| "learning_rate": 0.0001, |
| "loss": 1.5626, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.8272514228322732, |
| "grad_norm": 0.12935003638267517, |
| "learning_rate": 0.0001, |
| "loss": 1.4524, |
| "step": 2471 |
| }, |
| { |
| "epoch": 0.8275862068965517, |
| "grad_norm": 0.1333768367767334, |
| "learning_rate": 0.0001, |
| "loss": 1.4809, |
| "step": 2472 |
| }, |
| { |
| "epoch": 0.8279209909608303, |
| "grad_norm": 0.139461949467659, |
| "learning_rate": 0.0001, |
| "loss": 1.5265, |
| "step": 2473 |
| }, |
| { |
| "epoch": 0.8282557750251088, |
| "grad_norm": 0.14345921576023102, |
| "learning_rate": 0.0001, |
| "loss": 1.5911, |
| "step": 2474 |
| }, |
| { |
| "epoch": 0.8285905590893874, |
| "grad_norm": 0.12835142016410828, |
| "learning_rate": 0.0001, |
| "loss": 1.4934, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.8289253431536658, |
| "grad_norm": 0.13207587599754333, |
| "learning_rate": 0.0001, |
| "loss": 1.5013, |
| "step": 2476 |
| }, |
| { |
| "epoch": 0.8292601272179444, |
| "grad_norm": 0.14216424524784088, |
| "learning_rate": 0.0001, |
| "loss": 1.5783, |
| "step": 2477 |
| }, |
| { |
| "epoch": 0.829594911282223, |
| "grad_norm": 0.1372382938861847, |
| "learning_rate": 0.0001, |
| "loss": 1.5487, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.8299296953465015, |
| "grad_norm": 0.14100505411624908, |
| "learning_rate": 0.0001, |
| "loss": 1.5893, |
| "step": 2479 |
| }, |
| { |
| "epoch": 0.8302644794107801, |
| "grad_norm": 0.13831539452075958, |
| "learning_rate": 0.0001, |
| "loss": 1.5308, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.8305992634750586, |
| "grad_norm": 0.13254091143608093, |
| "learning_rate": 0.0001, |
| "loss": 1.509, |
| "step": 2481 |
| }, |
| { |
| "epoch": 0.8309340475393371, |
| "grad_norm": 0.13434451818466187, |
| "learning_rate": 0.0001, |
| "loss": 1.4544, |
| "step": 2482 |
| }, |
| { |
| "epoch": 0.8312688316036156, |
| "grad_norm": 0.13452693819999695, |
| "learning_rate": 0.0001, |
| "loss": 1.4875, |
| "step": 2483 |
| }, |
| { |
| "epoch": 0.8316036156678942, |
| "grad_norm": 0.13497060537338257, |
| "learning_rate": 0.0001, |
| "loss": 1.4973, |
| "step": 2484 |
| }, |
| { |
| "epoch": 0.8319383997321728, |
| "grad_norm": 0.13919363915920258, |
| "learning_rate": 0.0001, |
| "loss": 1.4425, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.8322731837964513, |
| "grad_norm": 0.14376235008239746, |
| "learning_rate": 0.0001, |
| "loss": 1.5438, |
| "step": 2486 |
| }, |
| { |
| "epoch": 0.8326079678607299, |
| "grad_norm": 0.13027647137641907, |
| "learning_rate": 0.0001, |
| "loss": 1.4899, |
| "step": 2487 |
| }, |
| { |
| "epoch": 0.8329427519250083, |
| "grad_norm": 0.1342213749885559, |
| "learning_rate": 0.0001, |
| "loss": 1.4716, |
| "step": 2488 |
| }, |
| { |
| "epoch": 0.8332775359892869, |
| "grad_norm": 0.1298682540655136, |
| "learning_rate": 0.0001, |
| "loss": 1.4359, |
| "step": 2489 |
| }, |
| { |
| "epoch": 0.8336123200535654, |
| "grad_norm": 0.13764667510986328, |
| "learning_rate": 0.0001, |
| "loss": 1.6205, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.833947104117844, |
| "grad_norm": 0.13023105263710022, |
| "learning_rate": 0.0001, |
| "loss": 1.4276, |
| "step": 2491 |
| }, |
| { |
| "epoch": 0.8342818881821226, |
| "grad_norm": 0.1355689913034439, |
| "learning_rate": 0.0001, |
| "loss": 1.4635, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.8346166722464011, |
| "grad_norm": 0.13397172093391418, |
| "learning_rate": 0.0001, |
| "loss": 1.5855, |
| "step": 2493 |
| }, |
| { |
| "epoch": 0.8349514563106796, |
| "grad_norm": 0.13192683458328247, |
| "learning_rate": 0.0001, |
| "loss": 1.5209, |
| "step": 2494 |
| }, |
| { |
| "epoch": 0.8352862403749581, |
| "grad_norm": 0.13405252993106842, |
| "learning_rate": 0.0001, |
| "loss": 1.5144, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.8356210244392367, |
| "grad_norm": 0.13375818729400635, |
| "learning_rate": 0.0001, |
| "loss": 1.4467, |
| "step": 2496 |
| }, |
| { |
| "epoch": 0.8359558085035153, |
| "grad_norm": 0.12543916702270508, |
| "learning_rate": 0.0001, |
| "loss": 1.3992, |
| "step": 2497 |
| }, |
| { |
| "epoch": 0.8362905925677938, |
| "grad_norm": 0.13587196171283722, |
| "learning_rate": 0.0001, |
| "loss": 1.487, |
| "step": 2498 |
| }, |
| { |
| "epoch": 0.8366253766320723, |
| "grad_norm": 0.13462427258491516, |
| "learning_rate": 0.0001, |
| "loss": 1.5455, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.8369601606963508, |
| "grad_norm": 0.13338516652584076, |
| "learning_rate": 0.0001, |
| "loss": 1.5612, |
| "step": 2500 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2987, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.419234898870272e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|