| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9970788704965923, | |
| "eval_steps": 500, | |
| "global_step": 1026, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0029211295034079843, | |
| "grad_norm": 6.789194746397651, | |
| "learning_rate": 9.70873786407767e-08, | |
| "loss": 1.0797, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005842259006815969, | |
| "grad_norm": 6.623744256841628, | |
| "learning_rate": 1.941747572815534e-07, | |
| "loss": 1.0936, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008763388510223954, | |
| "grad_norm": 6.814087231706784, | |
| "learning_rate": 2.9126213592233014e-07, | |
| "loss": 1.0915, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.011684518013631937, | |
| "grad_norm": 6.979864404527602, | |
| "learning_rate": 3.883495145631068e-07, | |
| "loss": 1.113, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.014605647517039922, | |
| "grad_norm": 7.130506329535252, | |
| "learning_rate": 4.854368932038835e-07, | |
| "loss": 1.1201, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.017526777020447908, | |
| "grad_norm": 6.457316225305797, | |
| "learning_rate": 5.825242718446603e-07, | |
| "loss": 1.0589, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.02044790652385589, | |
| "grad_norm": 6.634803833935598, | |
| "learning_rate": 6.79611650485437e-07, | |
| "loss": 1.0948, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.023369036027263874, | |
| "grad_norm": 6.51924054198785, | |
| "learning_rate": 7.766990291262136e-07, | |
| "loss": 1.1033, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02629016553067186, | |
| "grad_norm": 6.172807303107381, | |
| "learning_rate": 8.737864077669904e-07, | |
| "loss": 1.1025, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.029211295034079845, | |
| "grad_norm": 6.123481829151969, | |
| "learning_rate": 9.70873786407767e-07, | |
| "loss": 1.0593, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03213242453748783, | |
| "grad_norm": 5.067066736988861, | |
| "learning_rate": 1.0679611650485437e-06, | |
| "loss": 1.0514, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.035053554040895815, | |
| "grad_norm": 5.316963931526159, | |
| "learning_rate": 1.1650485436893206e-06, | |
| "loss": 1.0649, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0379746835443038, | |
| "grad_norm": 4.8023708760386326, | |
| "learning_rate": 1.2621359223300972e-06, | |
| "loss": 1.0595, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.04089581304771178, | |
| "grad_norm": 4.307477266760391, | |
| "learning_rate": 1.359223300970874e-06, | |
| "loss": 1.0045, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.043816942551119765, | |
| "grad_norm": 2.9737946154856254, | |
| "learning_rate": 1.4563106796116506e-06, | |
| "loss": 0.9864, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.04673807205452775, | |
| "grad_norm": 2.969891758892917, | |
| "learning_rate": 1.5533980582524272e-06, | |
| "loss": 1.0038, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04965920155793573, | |
| "grad_norm": 2.6410254985919264, | |
| "learning_rate": 1.650485436893204e-06, | |
| "loss": 0.9688, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.05258033106134372, | |
| "grad_norm": 2.6767839969522385, | |
| "learning_rate": 1.7475728155339808e-06, | |
| "loss": 0.9777, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.055501460564751706, | |
| "grad_norm": 2.3845837305117867, | |
| "learning_rate": 1.8446601941747574e-06, | |
| "loss": 0.9803, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.05842259006815969, | |
| "grad_norm": 2.485668257022799, | |
| "learning_rate": 1.941747572815534e-06, | |
| "loss": 0.9512, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06134371957156767, | |
| "grad_norm": 2.9203150216318057, | |
| "learning_rate": 2.0388349514563107e-06, | |
| "loss": 0.93, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.06426484907497566, | |
| "grad_norm": 2.868722547204036, | |
| "learning_rate": 2.1359223300970874e-06, | |
| "loss": 0.9171, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.06718597857838364, | |
| "grad_norm": 2.679648431038279, | |
| "learning_rate": 2.2330097087378645e-06, | |
| "loss": 0.9199, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.07010710808179163, | |
| "grad_norm": 2.459073576199394, | |
| "learning_rate": 2.330097087378641e-06, | |
| "loss": 0.9157, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0730282375851996, | |
| "grad_norm": 2.014055908464458, | |
| "learning_rate": 2.427184466019418e-06, | |
| "loss": 0.9164, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0759493670886076, | |
| "grad_norm": 1.5955860458303692, | |
| "learning_rate": 2.5242718446601945e-06, | |
| "loss": 0.8909, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.07887049659201557, | |
| "grad_norm": 1.3790379745407235, | |
| "learning_rate": 2.621359223300971e-06, | |
| "loss": 0.8631, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.08179162609542356, | |
| "grad_norm": 1.6462939916147095, | |
| "learning_rate": 2.718446601941748e-06, | |
| "loss": 0.8384, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.08471275559883155, | |
| "grad_norm": 1.7416274692927092, | |
| "learning_rate": 2.8155339805825245e-06, | |
| "loss": 0.8643, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.08763388510223953, | |
| "grad_norm": 1.4689264445022938, | |
| "learning_rate": 2.912621359223301e-06, | |
| "loss": 0.8401, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09055501460564752, | |
| "grad_norm": 1.2970139226424346, | |
| "learning_rate": 3.0097087378640778e-06, | |
| "loss": 0.8423, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0934761441090555, | |
| "grad_norm": 1.1674121947058942, | |
| "learning_rate": 3.1067961165048544e-06, | |
| "loss": 0.837, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.09639727361246349, | |
| "grad_norm": 1.1908632755730892, | |
| "learning_rate": 3.2038834951456315e-06, | |
| "loss": 0.8203, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.09931840311587146, | |
| "grad_norm": 1.1042928737436872, | |
| "learning_rate": 3.300970873786408e-06, | |
| "loss": 0.799, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.10223953261927946, | |
| "grad_norm": 1.1603936762022113, | |
| "learning_rate": 3.398058252427185e-06, | |
| "loss": 0.8303, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.10516066212268745, | |
| "grad_norm": 1.0681312398436076, | |
| "learning_rate": 3.4951456310679615e-06, | |
| "loss": 0.8052, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.10808179162609542, | |
| "grad_norm": 0.8925175456575719, | |
| "learning_rate": 3.592233009708738e-06, | |
| "loss": 0.8069, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.11100292112950341, | |
| "grad_norm": 0.8822021824942768, | |
| "learning_rate": 3.689320388349515e-06, | |
| "loss": 0.7812, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.11392405063291139, | |
| "grad_norm": 0.8935693976115221, | |
| "learning_rate": 3.7864077669902915e-06, | |
| "loss": 0.7787, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.11684518013631938, | |
| "grad_norm": 0.9243397791705507, | |
| "learning_rate": 3.883495145631068e-06, | |
| "loss": 0.7936, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11976630963972736, | |
| "grad_norm": 0.8181188534202998, | |
| "learning_rate": 3.980582524271845e-06, | |
| "loss": 0.7752, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.12268743914313535, | |
| "grad_norm": 0.723717052367605, | |
| "learning_rate": 4.0776699029126215e-06, | |
| "loss": 0.7544, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.12560856864654332, | |
| "grad_norm": 0.7268642480625847, | |
| "learning_rate": 4.1747572815533986e-06, | |
| "loss": 0.7871, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.12852969814995133, | |
| "grad_norm": 0.7154200711509474, | |
| "learning_rate": 4.271844660194175e-06, | |
| "loss": 0.7617, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.1314508276533593, | |
| "grad_norm": 0.7425189888635864, | |
| "learning_rate": 4.368932038834952e-06, | |
| "loss": 0.7661, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.13437195715676728, | |
| "grad_norm": 0.6964525874804132, | |
| "learning_rate": 4.466019417475729e-06, | |
| "loss": 0.7578, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.13729308666017526, | |
| "grad_norm": 0.7077568254698756, | |
| "learning_rate": 4.563106796116505e-06, | |
| "loss": 0.7466, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.14021421616358326, | |
| "grad_norm": 0.728109048064247, | |
| "learning_rate": 4.660194174757282e-06, | |
| "loss": 0.7536, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.14313534566699124, | |
| "grad_norm": 0.7073551382013681, | |
| "learning_rate": 4.7572815533980585e-06, | |
| "loss": 0.7386, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.1460564751703992, | |
| "grad_norm": 0.6810942703152736, | |
| "learning_rate": 4.854368932038836e-06, | |
| "loss": 0.7771, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.14897760467380722, | |
| "grad_norm": 0.7280450266974076, | |
| "learning_rate": 4.951456310679612e-06, | |
| "loss": 0.739, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.1518987341772152, | |
| "grad_norm": 0.7021483972343961, | |
| "learning_rate": 5.048543689320389e-06, | |
| "loss": 0.7437, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.15481986368062317, | |
| "grad_norm": 0.7167957676962433, | |
| "learning_rate": 5.145631067961165e-06, | |
| "loss": 0.764, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.15774099318403115, | |
| "grad_norm": 0.6140023964252928, | |
| "learning_rate": 5.242718446601942e-06, | |
| "loss": 0.7338, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.16066212268743915, | |
| "grad_norm": 0.7304557287827438, | |
| "learning_rate": 5.3398058252427185e-06, | |
| "loss": 0.7815, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.16358325219084713, | |
| "grad_norm": 0.6396185255120146, | |
| "learning_rate": 5.436893203883496e-06, | |
| "loss": 0.7349, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1665043816942551, | |
| "grad_norm": 0.6558732255969539, | |
| "learning_rate": 5.533980582524272e-06, | |
| "loss": 0.7212, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.1694255111976631, | |
| "grad_norm": 0.5940933645304805, | |
| "learning_rate": 5.631067961165049e-06, | |
| "loss": 0.7577, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.17234664070107109, | |
| "grad_norm": 0.6218585913117457, | |
| "learning_rate": 5.728155339805825e-06, | |
| "loss": 0.7431, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.17526777020447906, | |
| "grad_norm": 0.6908136674947283, | |
| "learning_rate": 5.825242718446602e-06, | |
| "loss": 0.7368, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.17818889970788704, | |
| "grad_norm": 0.6522752947974526, | |
| "learning_rate": 5.9223300970873785e-06, | |
| "loss": 0.7126, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.18111002921129504, | |
| "grad_norm": 0.6780346336214896, | |
| "learning_rate": 6.0194174757281556e-06, | |
| "loss": 0.7367, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.18403115871470302, | |
| "grad_norm": 0.7722253573433183, | |
| "learning_rate": 6.116504854368932e-06, | |
| "loss": 0.7228, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.186952288218111, | |
| "grad_norm": 0.6399768773236866, | |
| "learning_rate": 6.213592233009709e-06, | |
| "loss": 0.7244, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.189873417721519, | |
| "grad_norm": 0.6674128228125541, | |
| "learning_rate": 6.310679611650487e-06, | |
| "loss": 0.7376, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.19279454722492698, | |
| "grad_norm": 0.6775138710637573, | |
| "learning_rate": 6.407766990291263e-06, | |
| "loss": 0.7286, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.19571567672833495, | |
| "grad_norm": 0.7443299390748634, | |
| "learning_rate": 6.50485436893204e-06, | |
| "loss": 0.7123, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.19863680623174293, | |
| "grad_norm": 0.6712217876186718, | |
| "learning_rate": 6.601941747572816e-06, | |
| "loss": 0.7118, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.20155793573515093, | |
| "grad_norm": 0.6173482204736721, | |
| "learning_rate": 6.6990291262135935e-06, | |
| "loss": 0.7141, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.2044790652385589, | |
| "grad_norm": 0.7586706797857404, | |
| "learning_rate": 6.79611650485437e-06, | |
| "loss": 0.7182, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2074001947419669, | |
| "grad_norm": 0.6711328351671544, | |
| "learning_rate": 6.893203883495147e-06, | |
| "loss": 0.7182, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.2103213242453749, | |
| "grad_norm": 0.6423529972707454, | |
| "learning_rate": 6.990291262135923e-06, | |
| "loss": 0.7147, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.21324245374878287, | |
| "grad_norm": 0.7175157009799245, | |
| "learning_rate": 7.0873786407767e-06, | |
| "loss": 0.7078, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.21616358325219084, | |
| "grad_norm": 0.7156449836663106, | |
| "learning_rate": 7.184466019417476e-06, | |
| "loss": 0.7026, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.21908471275559882, | |
| "grad_norm": 0.591120785534527, | |
| "learning_rate": 7.2815533980582534e-06, | |
| "loss": 0.6996, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.22200584225900682, | |
| "grad_norm": 0.6568235675952798, | |
| "learning_rate": 7.37864077669903e-06, | |
| "loss": 0.7098, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.2249269717624148, | |
| "grad_norm": 0.6969907394816692, | |
| "learning_rate": 7.475728155339807e-06, | |
| "loss": 0.7112, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.22784810126582278, | |
| "grad_norm": 0.674624972312595, | |
| "learning_rate": 7.572815533980583e-06, | |
| "loss": 0.6905, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.23076923076923078, | |
| "grad_norm": 0.6004655479528318, | |
| "learning_rate": 7.66990291262136e-06, | |
| "loss": 0.6987, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.23369036027263876, | |
| "grad_norm": 0.672439140786889, | |
| "learning_rate": 7.766990291262136e-06, | |
| "loss": 0.7059, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.23661148977604674, | |
| "grad_norm": 0.6379167803971234, | |
| "learning_rate": 7.864077669902913e-06, | |
| "loss": 0.6993, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.2395326192794547, | |
| "grad_norm": 0.624979376741808, | |
| "learning_rate": 7.96116504854369e-06, | |
| "loss": 0.6961, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.24245374878286272, | |
| "grad_norm": 0.5967840653189634, | |
| "learning_rate": 8.058252427184466e-06, | |
| "loss": 0.7115, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.2453748782862707, | |
| "grad_norm": 0.6448351126797235, | |
| "learning_rate": 8.155339805825243e-06, | |
| "loss": 0.7058, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.24829600778967867, | |
| "grad_norm": 0.5456246974361787, | |
| "learning_rate": 8.25242718446602e-06, | |
| "loss": 0.6971, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.25121713729308665, | |
| "grad_norm": 0.5981523190285354, | |
| "learning_rate": 8.349514563106797e-06, | |
| "loss": 0.6644, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.25413826679649465, | |
| "grad_norm": 0.6094281430676193, | |
| "learning_rate": 8.446601941747573e-06, | |
| "loss": 0.6822, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.25705939629990265, | |
| "grad_norm": 0.645486530934357, | |
| "learning_rate": 8.54368932038835e-06, | |
| "loss": 0.6767, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.2599805258033106, | |
| "grad_norm": 0.6225273389721123, | |
| "learning_rate": 8.640776699029127e-06, | |
| "loss": 0.7152, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.2629016553067186, | |
| "grad_norm": 0.6432259026110182, | |
| "learning_rate": 8.737864077669904e-06, | |
| "loss": 0.7159, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.26582278481012656, | |
| "grad_norm": 0.6162731782600254, | |
| "learning_rate": 8.834951456310681e-06, | |
| "loss": 0.7132, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.26874391431353456, | |
| "grad_norm": 0.6542304058964258, | |
| "learning_rate": 8.932038834951458e-06, | |
| "loss": 0.6809, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.27166504381694256, | |
| "grad_norm": 0.6079172030969984, | |
| "learning_rate": 9.029126213592233e-06, | |
| "loss": 0.6824, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.2745861733203505, | |
| "grad_norm": 0.6519383743853913, | |
| "learning_rate": 9.12621359223301e-06, | |
| "loss": 0.6899, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2775073028237585, | |
| "grad_norm": 0.7282902638094394, | |
| "learning_rate": 9.223300970873788e-06, | |
| "loss": 0.6922, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.2804284323271665, | |
| "grad_norm": 0.5989786182176935, | |
| "learning_rate": 9.320388349514565e-06, | |
| "loss": 0.6734, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.28334956183057447, | |
| "grad_norm": 0.7160709546948157, | |
| "learning_rate": 9.41747572815534e-06, | |
| "loss": 0.6815, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2862706913339825, | |
| "grad_norm": 0.6813096412081009, | |
| "learning_rate": 9.514563106796117e-06, | |
| "loss": 0.6885, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2891918208373905, | |
| "grad_norm": 0.6540975722149734, | |
| "learning_rate": 9.611650485436894e-06, | |
| "loss": 0.69, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.2921129503407984, | |
| "grad_norm": 0.8050571281257926, | |
| "learning_rate": 9.708737864077671e-06, | |
| "loss": 0.691, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.29503407984420643, | |
| "grad_norm": 0.6433559989032654, | |
| "learning_rate": 9.805825242718447e-06, | |
| "loss": 0.6986, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.29795520934761444, | |
| "grad_norm": 0.8412736504392738, | |
| "learning_rate": 9.902912621359224e-06, | |
| "loss": 0.6926, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.3008763388510224, | |
| "grad_norm": 0.7443726086923518, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6906, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.3037974683544304, | |
| "grad_norm": 0.6597015006842325, | |
| "learning_rate": 9.999971037507608e-06, | |
| "loss": 0.675, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.30671859785783834, | |
| "grad_norm": 0.5931435595661035, | |
| "learning_rate": 9.99988415036596e-06, | |
| "loss": 0.6802, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.30963972736124634, | |
| "grad_norm": 0.7214217523040783, | |
| "learning_rate": 9.99973933958164e-06, | |
| "loss": 0.7041, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.31256085686465435, | |
| "grad_norm": 0.7234513096207073, | |
| "learning_rate": 9.999536606832288e-06, | |
| "loss": 0.6872, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.3154819863680623, | |
| "grad_norm": 0.7879752038918911, | |
| "learning_rate": 9.999275954466555e-06, | |
| "loss": 0.6873, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.3184031158714703, | |
| "grad_norm": 0.6655845938433153, | |
| "learning_rate": 9.998957385504103e-06, | |
| "loss": 0.6976, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.3213242453748783, | |
| "grad_norm": 0.8522730059744493, | |
| "learning_rate": 9.99858090363555e-06, | |
| "loss": 0.6719, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.32424537487828625, | |
| "grad_norm": 0.6292219731062122, | |
| "learning_rate": 9.998146513222436e-06, | |
| "loss": 0.6993, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.32716650438169426, | |
| "grad_norm": 0.7181176135878521, | |
| "learning_rate": 9.997654219297176e-06, | |
| "loss": 0.6901, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.33008763388510226, | |
| "grad_norm": 0.6962928810640735, | |
| "learning_rate": 9.997104027562991e-06, | |
| "loss": 0.6951, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.3330087633885102, | |
| "grad_norm": 0.6849230790401417, | |
| "learning_rate": 9.996495944393853e-06, | |
| "loss": 0.6828, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.3359298928919182, | |
| "grad_norm": 0.7135171270031142, | |
| "learning_rate": 9.995829976834402e-06, | |
| "loss": 0.6737, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.3388510223953262, | |
| "grad_norm": 0.6814570642632325, | |
| "learning_rate": 9.995106132599869e-06, | |
| "loss": 0.6875, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.34177215189873417, | |
| "grad_norm": 0.8248995841087691, | |
| "learning_rate": 9.99432442007599e-06, | |
| "loss": 0.6873, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.34469328140214217, | |
| "grad_norm": 0.6919031766051941, | |
| "learning_rate": 9.993484848318899e-06, | |
| "loss": 0.6835, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.3476144109055501, | |
| "grad_norm": 0.7748697122331325, | |
| "learning_rate": 9.992587427055036e-06, | |
| "loss": 0.6933, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.3505355404089581, | |
| "grad_norm": 0.6232356121853884, | |
| "learning_rate": 9.99163216668102e-06, | |
| "loss": 0.6634, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.35345666991236613, | |
| "grad_norm": 0.8152682417581196, | |
| "learning_rate": 9.990619078263543e-06, | |
| "loss": 0.6833, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.3563777994157741, | |
| "grad_norm": 0.6829212246748637, | |
| "learning_rate": 9.989548173539229e-06, | |
| "loss": 0.6904, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3592989289191821, | |
| "grad_norm": 0.7764702428398512, | |
| "learning_rate": 9.988419464914505e-06, | |
| "loss": 0.6911, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.3622200584225901, | |
| "grad_norm": 0.792332480824063, | |
| "learning_rate": 9.98723296546546e-06, | |
| "loss": 0.6817, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.36514118792599803, | |
| "grad_norm": 0.6594623726005864, | |
| "learning_rate": 9.985988688937684e-06, | |
| "loss": 0.6873, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.36806231742940604, | |
| "grad_norm": 0.8269823477396988, | |
| "learning_rate": 9.984686649746119e-06, | |
| "loss": 0.693, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.37098344693281404, | |
| "grad_norm": 0.6422126697095933, | |
| "learning_rate": 9.983326862974882e-06, | |
| "loss": 0.6576, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.373904576436222, | |
| "grad_norm": 0.8200568586438982, | |
| "learning_rate": 9.981909344377101e-06, | |
| "loss": 0.6929, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.37682570593963, | |
| "grad_norm": 0.7400126500706381, | |
| "learning_rate": 9.980434110374725e-06, | |
| "loss": 0.6557, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.379746835443038, | |
| "grad_norm": 0.713854878991036, | |
| "learning_rate": 9.978901178058333e-06, | |
| "loss": 0.6942, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.38266796494644595, | |
| "grad_norm": 0.7483541113835968, | |
| "learning_rate": 9.977310565186945e-06, | |
| "loss": 0.6781, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.38558909444985395, | |
| "grad_norm": 0.7131870241688308, | |
| "learning_rate": 9.975662290187802e-06, | |
| "loss": 0.6564, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.3885102239532619, | |
| "grad_norm": 0.7880618680074065, | |
| "learning_rate": 9.973956372156166e-06, | |
| "loss": 0.6752, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.3914313534566699, | |
| "grad_norm": 0.5977156256261835, | |
| "learning_rate": 9.972192830855095e-06, | |
| "loss": 0.6763, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.3943524829600779, | |
| "grad_norm": 0.76186235107972, | |
| "learning_rate": 9.970371686715205e-06, | |
| "loss": 0.7014, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.39727361246348586, | |
| "grad_norm": 0.8438134698760479, | |
| "learning_rate": 9.96849296083445e-06, | |
| "loss": 0.6902, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.40019474196689386, | |
| "grad_norm": 0.6120514167187466, | |
| "learning_rate": 9.966556674977864e-06, | |
| "loss": 0.663, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.40311587147030187, | |
| "grad_norm": 0.7891889540366791, | |
| "learning_rate": 9.964562851577307e-06, | |
| "loss": 0.6458, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.4060370009737098, | |
| "grad_norm": 0.8381022052177086, | |
| "learning_rate": 9.962511513731219e-06, | |
| "loss": 0.6728, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.4089581304771178, | |
| "grad_norm": 0.7489332182994131, | |
| "learning_rate": 9.960402685204347e-06, | |
| "loss": 0.6585, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4118792599805258, | |
| "grad_norm": 0.804869829411989, | |
| "learning_rate": 9.958236390427458e-06, | |
| "loss": 0.6784, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.4148003894839338, | |
| "grad_norm": 0.7474713341428297, | |
| "learning_rate": 9.956012654497073e-06, | |
| "loss": 0.6488, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.4177215189873418, | |
| "grad_norm": 0.8115239389486957, | |
| "learning_rate": 9.953731503175166e-06, | |
| "loss": 0.6894, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.4206426484907498, | |
| "grad_norm": 0.781168520393115, | |
| "learning_rate": 9.951392962888868e-06, | |
| "loss": 0.6534, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.42356377799415773, | |
| "grad_norm": 0.7722795439014734, | |
| "learning_rate": 9.948997060730161e-06, | |
| "loss": 0.6504, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.42648490749756574, | |
| "grad_norm": 0.8668435801796398, | |
| "learning_rate": 9.946543824455563e-06, | |
| "loss": 0.6507, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.4294060370009737, | |
| "grad_norm": 0.8391877814865175, | |
| "learning_rate": 9.94403328248581e-06, | |
| "loss": 0.6702, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.4323271665043817, | |
| "grad_norm": 0.6805575537389376, | |
| "learning_rate": 9.941465463905522e-06, | |
| "loss": 0.6744, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.4352482960077897, | |
| "grad_norm": 0.7033559759136386, | |
| "learning_rate": 9.938840398462872e-06, | |
| "loss": 0.6732, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.43816942551119764, | |
| "grad_norm": 0.8071774421297472, | |
| "learning_rate": 9.936158116569231e-06, | |
| "loss": 0.6704, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.44109055501460565, | |
| "grad_norm": 0.7788309365778397, | |
| "learning_rate": 9.933418649298831e-06, | |
| "loss": 0.6697, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.44401168451801365, | |
| "grad_norm": 0.633484330960411, | |
| "learning_rate": 9.930622028388388e-06, | |
| "loss": 0.6533, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.4469328140214216, | |
| "grad_norm": 0.6677671353777382, | |
| "learning_rate": 9.92776828623675e-06, | |
| "loss": 0.6323, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.4498539435248296, | |
| "grad_norm": 0.6580113405415786, | |
| "learning_rate": 9.924857455904511e-06, | |
| "loss": 0.6569, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.4527750730282376, | |
| "grad_norm": 0.7153967279296722, | |
| "learning_rate": 9.921889571113629e-06, | |
| "loss": 0.6651, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.45569620253164556, | |
| "grad_norm": 0.6507727168616313, | |
| "learning_rate": 9.918864666247042e-06, | |
| "loss": 0.6709, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.45861733203505356, | |
| "grad_norm": 0.6614595067208825, | |
| "learning_rate": 9.915782776348263e-06, | |
| "loss": 0.6558, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 0.8137858393206525, | |
| "learning_rate": 9.912643937120978e-06, | |
| "loss": 0.6756, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4644595910418695, | |
| "grad_norm": 0.6306212983973334, | |
| "learning_rate": 9.909448184928629e-06, | |
| "loss": 0.6416, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.4673807205452775, | |
| "grad_norm": 0.6836208681318199, | |
| "learning_rate": 9.906195556793996e-06, | |
| "loss": 0.6662, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.47030185004868547, | |
| "grad_norm": 0.6141476817991299, | |
| "learning_rate": 9.902886090398764e-06, | |
| "loss": 0.6774, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.47322297955209347, | |
| "grad_norm": 0.7052661650958192, | |
| "learning_rate": 9.899519824083095e-06, | |
| "loss": 0.6618, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.4761441090555015, | |
| "grad_norm": 0.5986401947237227, | |
| "learning_rate": 9.896096796845172e-06, | |
| "loss": 0.6738, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.4790652385589094, | |
| "grad_norm": 0.6916970139226505, | |
| "learning_rate": 9.892617048340754e-06, | |
| "loss": 0.6588, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.4819863680623174, | |
| "grad_norm": 0.6805676670949773, | |
| "learning_rate": 9.889080618882719e-06, | |
| "loss": 0.6826, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.48490749756572543, | |
| "grad_norm": 0.6658431348883423, | |
| "learning_rate": 9.88548754944059e-06, | |
| "loss": 0.6702, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.4878286270691334, | |
| "grad_norm": 0.682259295410329, | |
| "learning_rate": 9.881837881640064e-06, | |
| "loss": 0.6735, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.4907497565725414, | |
| "grad_norm": 0.5993509164653038, | |
| "learning_rate": 9.878131657762535e-06, | |
| "loss": 0.6545, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4936708860759494, | |
| "grad_norm": 0.7534948952289838, | |
| "learning_rate": 9.874368920744594e-06, | |
| "loss": 0.6812, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.49659201557935734, | |
| "grad_norm": 0.707512873305316, | |
| "learning_rate": 9.870549714177538e-06, | |
| "loss": 0.6513, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.49951314508276534, | |
| "grad_norm": 0.7200355139629895, | |
| "learning_rate": 9.866674082306861e-06, | |
| "loss": 0.6438, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.5024342745861733, | |
| "grad_norm": 0.8156295486958224, | |
| "learning_rate": 9.86274207003175e-06, | |
| "loss": 0.6564, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.5053554040895814, | |
| "grad_norm": 0.6117511012510686, | |
| "learning_rate": 9.858753722904552e-06, | |
| "loss": 0.6827, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.5082765335929893, | |
| "grad_norm": 0.8219493047877929, | |
| "learning_rate": 9.854709087130261e-06, | |
| "loss": 0.6718, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.5111976630963972, | |
| "grad_norm": 0.6662487126424073, | |
| "learning_rate": 9.850608209565967e-06, | |
| "loss": 0.6388, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.5141187925998053, | |
| "grad_norm": 0.7125438710540766, | |
| "learning_rate": 9.84645113772032e-06, | |
| "loss": 0.6589, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.5170399221032133, | |
| "grad_norm": 0.7487609228616714, | |
| "learning_rate": 9.842237919752994e-06, | |
| "loss": 0.6544, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.5199610516066212, | |
| "grad_norm": 0.6468420309416394, | |
| "learning_rate": 9.8379686044741e-06, | |
| "loss": 0.6565, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.5228821811100293, | |
| "grad_norm": 0.7546649500098744, | |
| "learning_rate": 9.833643241343642e-06, | |
| "loss": 0.6647, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.5258033106134372, | |
| "grad_norm": 0.6258186572488958, | |
| "learning_rate": 9.829261880470941e-06, | |
| "loss": 0.6392, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5287244401168452, | |
| "grad_norm": 0.5997447293335689, | |
| "learning_rate": 9.82482457261405e-06, | |
| "loss": 0.6398, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.5316455696202531, | |
| "grad_norm": 0.6452320212378018, | |
| "learning_rate": 9.820331369179166e-06, | |
| "loss": 0.6611, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.5345666991236612, | |
| "grad_norm": 0.5580735581912285, | |
| "learning_rate": 9.815782322220036e-06, | |
| "loss": 0.6548, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.5374878286270691, | |
| "grad_norm": 0.6058080142971995, | |
| "learning_rate": 9.811177484437357e-06, | |
| "loss": 0.6664, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.5404089581304771, | |
| "grad_norm": 0.7487147947448509, | |
| "learning_rate": 9.806516909178161e-06, | |
| "loss": 0.665, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.5433300876338851, | |
| "grad_norm": 0.5994532711002538, | |
| "learning_rate": 9.801800650435194e-06, | |
| "loss": 0.6345, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.5462512171372931, | |
| "grad_norm": 0.6402638834025774, | |
| "learning_rate": 9.797028762846305e-06, | |
| "loss": 0.6689, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.549172346640701, | |
| "grad_norm": 0.8110120277002857, | |
| "learning_rate": 9.792201301693793e-06, | |
| "loss": 0.6623, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.5520934761441091, | |
| "grad_norm": 0.6022982451173915, | |
| "learning_rate": 9.787318322903784e-06, | |
| "loss": 0.642, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.555014605647517, | |
| "grad_norm": 0.8359964918822578, | |
| "learning_rate": 9.78237988304557e-06, | |
| "loss": 0.6828, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.557935735150925, | |
| "grad_norm": 0.5971974835914099, | |
| "learning_rate": 9.77738603933096e-06, | |
| "loss": 0.6637, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.560856864654333, | |
| "grad_norm": 0.9007553762322157, | |
| "learning_rate": 9.772336849613624e-06, | |
| "loss": 0.6489, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.563777994157741, | |
| "grad_norm": 0.6097333173160772, | |
| "learning_rate": 9.767232372388406e-06, | |
| "loss": 0.6195, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.5666991236611489, | |
| "grad_norm": 0.7430323349181741, | |
| "learning_rate": 9.762072666790658e-06, | |
| "loss": 0.6602, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.569620253164557, | |
| "grad_norm": 0.698590106062137, | |
| "learning_rate": 9.756857792595555e-06, | |
| "loss": 0.654, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.572541382667965, | |
| "grad_norm": 0.6098458915248055, | |
| "learning_rate": 9.751587810217398e-06, | |
| "loss": 0.6571, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5754625121713729, | |
| "grad_norm": 0.6600018141821303, | |
| "learning_rate": 9.746262780708919e-06, | |
| "loss": 0.6572, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.578383641674781, | |
| "grad_norm": 0.6601639946678165, | |
| "learning_rate": 9.740882765760567e-06, | |
| "loss": 0.6593, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5813047711781889, | |
| "grad_norm": 0.726967683938266, | |
| "learning_rate": 9.735447827699798e-06, | |
| "loss": 0.6573, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5842259006815969, | |
| "grad_norm": 0.6076134837821863, | |
| "learning_rate": 9.729958029490353e-06, | |
| "loss": 0.6495, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5871470301850049, | |
| "grad_norm": 0.693728259825805, | |
| "learning_rate": 9.72441343473153e-06, | |
| "loss": 0.6384, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.5900681596884129, | |
| "grad_norm": 0.7278423168034551, | |
| "learning_rate": 9.718814107657441e-06, | |
| "loss": 0.6584, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5929892891918208, | |
| "grad_norm": 0.7100671054561837, | |
| "learning_rate": 9.713160113136272e-06, | |
| "loss": 0.6555, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5959104186952289, | |
| "grad_norm": 0.6955835624438068, | |
| "learning_rate": 9.707451516669533e-06, | |
| "loss": 0.6581, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5988315481986368, | |
| "grad_norm": 0.6862859891275203, | |
| "learning_rate": 9.701688384391296e-06, | |
| "loss": 0.6471, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.6017526777020448, | |
| "grad_norm": 0.7918106833642026, | |
| "learning_rate": 9.695870783067434e-06, | |
| "loss": 0.6351, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.6046738072054528, | |
| "grad_norm": 0.762255183423834, | |
| "learning_rate": 9.689998780094839e-06, | |
| "loss": 0.6464, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.6075949367088608, | |
| "grad_norm": 0.5926349601655899, | |
| "learning_rate": 9.684072443500645e-06, | |
| "loss": 0.6342, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.6105160662122687, | |
| "grad_norm": 0.863234457455766, | |
| "learning_rate": 9.678091841941446e-06, | |
| "loss": 0.653, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.6134371957156767, | |
| "grad_norm": 0.7588656251837851, | |
| "learning_rate": 9.672057044702492e-06, | |
| "loss": 0.6379, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6163583252190847, | |
| "grad_norm": 0.6108635991637165, | |
| "learning_rate": 9.665968121696892e-06, | |
| "loss": 0.6605, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.6192794547224927, | |
| "grad_norm": 0.8356230484629192, | |
| "learning_rate": 9.659825143464798e-06, | |
| "loss": 0.6458, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.6222005842259006, | |
| "grad_norm": 0.6240730332192024, | |
| "learning_rate": 9.653628181172596e-06, | |
| "loss": 0.6506, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.6251217137293087, | |
| "grad_norm": 0.6689297135107584, | |
| "learning_rate": 9.647377306612075e-06, | |
| "loss": 0.6299, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.6280428432327166, | |
| "grad_norm": 0.7685374427252067, | |
| "learning_rate": 9.641072592199599e-06, | |
| "loss": 0.6634, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.6309639727361246, | |
| "grad_norm": 0.6331465350705314, | |
| "learning_rate": 9.634714110975263e-06, | |
| "loss": 0.6705, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.6338851022395326, | |
| "grad_norm": 0.7142109675799595, | |
| "learning_rate": 9.628301936602053e-06, | |
| "loss": 0.6539, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.6368062317429406, | |
| "grad_norm": 0.8729837863809322, | |
| "learning_rate": 9.62183614336499e-06, | |
| "loss": 0.6596, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.6397273612463485, | |
| "grad_norm": 0.5991176634004923, | |
| "learning_rate": 9.61531680617027e-06, | |
| "loss": 0.6656, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.6426484907497566, | |
| "grad_norm": 0.7145894148388473, | |
| "learning_rate": 9.608744000544392e-06, | |
| "loss": 0.6643, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6455696202531646, | |
| "grad_norm": 0.7598912110336243, | |
| "learning_rate": 9.602117802633293e-06, | |
| "loss": 0.6291, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.6484907497565725, | |
| "grad_norm": 0.6815208811474045, | |
| "learning_rate": 9.595438289201453e-06, | |
| "loss": 0.6472, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.6514118792599806, | |
| "grad_norm": 0.5760568859368258, | |
| "learning_rate": 9.588705537631014e-06, | |
| "loss": 0.6563, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.6543330087633885, | |
| "grad_norm": 0.7022056147869815, | |
| "learning_rate": 9.581919625920886e-06, | |
| "loss": 0.6524, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.6572541382667965, | |
| "grad_norm": 0.67470404537858, | |
| "learning_rate": 9.575080632685832e-06, | |
| "loss": 0.6436, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.6601752677702045, | |
| "grad_norm": 0.6156654623500659, | |
| "learning_rate": 9.568188637155569e-06, | |
| "loss": 0.6256, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6630963972736125, | |
| "grad_norm": 0.6870167927139845, | |
| "learning_rate": 9.561243719173844e-06, | |
| "loss": 0.628, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.6660175267770204, | |
| "grad_norm": 0.6043043393160271, | |
| "learning_rate": 9.554245959197511e-06, | |
| "loss": 0.6631, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.6689386562804285, | |
| "grad_norm": 0.6424377779531785, | |
| "learning_rate": 9.5471954382956e-06, | |
| "loss": 0.6455, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.6718597857838364, | |
| "grad_norm": 0.6747246252989533, | |
| "learning_rate": 9.54009223814837e-06, | |
| "loss": 0.6482, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6747809152872444, | |
| "grad_norm": 0.6198749042634925, | |
| "learning_rate": 9.532936441046376e-06, | |
| "loss": 0.6679, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.6777020447906524, | |
| "grad_norm": 0.6700871145900489, | |
| "learning_rate": 9.525728129889505e-06, | |
| "loss": 0.6704, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.6806231742940604, | |
| "grad_norm": 0.6486221862584836, | |
| "learning_rate": 9.51846738818602e-06, | |
| "loss": 0.6533, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.6835443037974683, | |
| "grad_norm": 0.6158430372621209, | |
| "learning_rate": 9.511154300051591e-06, | |
| "loss": 0.6391, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.6864654333008764, | |
| "grad_norm": 0.6059458334544432, | |
| "learning_rate": 9.503788950208324e-06, | |
| "loss": 0.6326, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.6893865628042843, | |
| "grad_norm": 0.6630441105155737, | |
| "learning_rate": 9.49637142398377e-06, | |
| "loss": 0.6419, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6923076923076923, | |
| "grad_norm": 0.7121010143311686, | |
| "learning_rate": 9.48890180730995e-06, | |
| "loss": 0.6366, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.6952288218111002, | |
| "grad_norm": 0.6536311654933115, | |
| "learning_rate": 9.481380186722354e-06, | |
| "loss": 0.6475, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6981499513145083, | |
| "grad_norm": 0.5870586837084283, | |
| "learning_rate": 9.473806649358929e-06, | |
| "loss": 0.6664, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.7010710808179162, | |
| "grad_norm": 0.7293444660664181, | |
| "learning_rate": 9.466181282959083e-06, | |
| "loss": 0.6294, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7039922103213242, | |
| "grad_norm": 0.6803091119725557, | |
| "learning_rate": 9.458504175862665e-06, | |
| "loss": 0.6543, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.7069133398247323, | |
| "grad_norm": 0.513109460376802, | |
| "learning_rate": 9.450775417008936e-06, | |
| "loss": 0.6529, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.7098344693281402, | |
| "grad_norm": 0.6591044352211995, | |
| "learning_rate": 9.442995095935542e-06, | |
| "loss": 0.6485, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.7127555988315482, | |
| "grad_norm": 0.5639394652214005, | |
| "learning_rate": 9.43516330277748e-06, | |
| "loss": 0.6354, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.7156767283349562, | |
| "grad_norm": 0.5382276491132706, | |
| "learning_rate": 9.427280128266049e-06, | |
| "loss": 0.6338, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.7185978578383642, | |
| "grad_norm": 0.5783621915913141, | |
| "learning_rate": 9.419345663727805e-06, | |
| "loss": 0.6541, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.7215189873417721, | |
| "grad_norm": 0.5457758477722148, | |
| "learning_rate": 9.411360001083496e-06, | |
| "loss": 0.6649, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.7244401168451802, | |
| "grad_norm": 0.5701118395223765, | |
| "learning_rate": 9.403323232846994e-06, | |
| "loss": 0.6305, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.7273612463485881, | |
| "grad_norm": 0.5986045250901076, | |
| "learning_rate": 9.395235452124239e-06, | |
| "loss": 0.6315, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.7302823758519961, | |
| "grad_norm": 0.5915757822980239, | |
| "learning_rate": 9.387096752612144e-06, | |
| "loss": 0.6563, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7332035053554041, | |
| "grad_norm": 0.6447044009504002, | |
| "learning_rate": 9.378907228597518e-06, | |
| "loss": 0.6543, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.7361246348588121, | |
| "grad_norm": 0.6146190085950654, | |
| "learning_rate": 9.370666974955973e-06, | |
| "loss": 0.6474, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.73904576436222, | |
| "grad_norm": 0.6186340229955254, | |
| "learning_rate": 9.362376087150822e-06, | |
| "loss": 0.6498, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.7419668938656281, | |
| "grad_norm": 0.6697188050730257, | |
| "learning_rate": 9.354034661231976e-06, | |
| "loss": 0.629, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.744888023369036, | |
| "grad_norm": 0.7166126450253048, | |
| "learning_rate": 9.345642793834825e-06, | |
| "loss": 0.6476, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.747809152872444, | |
| "grad_norm": 0.5909733136537622, | |
| "learning_rate": 9.337200582179134e-06, | |
| "loss": 0.6338, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.750730282375852, | |
| "grad_norm": 0.6690338784895201, | |
| "learning_rate": 9.328708124067893e-06, | |
| "loss": 0.6425, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.75365141187926, | |
| "grad_norm": 0.6170552061635086, | |
| "learning_rate": 9.320165517886207e-06, | |
| "loss": 0.649, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.7565725413826679, | |
| "grad_norm": 0.6767600272762853, | |
| "learning_rate": 9.31157286260014e-06, | |
| "loss": 0.6496, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.759493670886076, | |
| "grad_norm": 0.7264560653599718, | |
| "learning_rate": 9.302930257755579e-06, | |
| "loss": 0.6583, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.762414800389484, | |
| "grad_norm": 0.5453999254984644, | |
| "learning_rate": 9.294237803477076e-06, | |
| "loss": 0.648, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.7653359298928919, | |
| "grad_norm": 0.7523663818704205, | |
| "learning_rate": 9.285495600466683e-06, | |
| "loss": 0.6488, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.7682570593963, | |
| "grad_norm": 0.7485014295621598, | |
| "learning_rate": 9.276703750002801e-06, | |
| "loss": 0.6255, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.7711781888997079, | |
| "grad_norm": 0.686777506781002, | |
| "learning_rate": 9.267862353938988e-06, | |
| "loss": 0.6534, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.7740993184031159, | |
| "grad_norm": 0.7655220545156425, | |
| "learning_rate": 9.258971514702789e-06, | |
| "loss": 0.6439, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.7770204479065238, | |
| "grad_norm": 0.5995690372205543, | |
| "learning_rate": 9.250031335294551e-06, | |
| "loss": 0.6264, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.7799415774099319, | |
| "grad_norm": 0.6306408667729854, | |
| "learning_rate": 9.241041919286227e-06, | |
| "loss": 0.633, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.7828627069133398, | |
| "grad_norm": 0.7405970033463782, | |
| "learning_rate": 9.232003370820171e-06, | |
| "loss": 0.6355, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.7857838364167478, | |
| "grad_norm": 0.5761877077710947, | |
| "learning_rate": 9.222915794607942e-06, | |
| "loss": 0.6431, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.7887049659201558, | |
| "grad_norm": 0.6417671555725009, | |
| "learning_rate": 9.213779295929082e-06, | |
| "loss": 0.6302, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7916260954235638, | |
| "grad_norm": 0.5476966311891922, | |
| "learning_rate": 9.204593980629898e-06, | |
| "loss": 0.6307, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.7945472249269717, | |
| "grad_norm": 0.6325575856222458, | |
| "learning_rate": 9.195359955122244e-06, | |
| "loss": 0.6316, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.7974683544303798, | |
| "grad_norm": 0.6487642715668982, | |
| "learning_rate": 9.186077326382275e-06, | |
| "loss": 0.6324, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.8003894839337877, | |
| "grad_norm": 0.5751549331819923, | |
| "learning_rate": 9.176746201949216e-06, | |
| "loss": 0.6585, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.8033106134371957, | |
| "grad_norm": 0.6208214920916966, | |
| "learning_rate": 9.167366689924116e-06, | |
| "loss": 0.6517, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.8062317429406037, | |
| "grad_norm": 0.6579852013112687, | |
| "learning_rate": 9.157938898968594e-06, | |
| "loss": 0.643, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.8091528724440117, | |
| "grad_norm": 0.5985871969783593, | |
| "learning_rate": 9.14846293830358e-06, | |
| "loss": 0.6386, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.8120740019474196, | |
| "grad_norm": 0.5776495482063276, | |
| "learning_rate": 9.138938917708047e-06, | |
| "loss": 0.6367, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.8149951314508277, | |
| "grad_norm": 0.6461097088775256, | |
| "learning_rate": 9.129366947517746e-06, | |
| "loss": 0.6311, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.8179162609542356, | |
| "grad_norm": 0.6054324701596054, | |
| "learning_rate": 9.119747138623925e-06, | |
| "loss": 0.6365, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8208373904576436, | |
| "grad_norm": 0.6046664159813615, | |
| "learning_rate": 9.110079602472035e-06, | |
| "loss": 0.6549, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.8237585199610516, | |
| "grad_norm": 0.690747949343666, | |
| "learning_rate": 9.100364451060457e-06, | |
| "loss": 0.6477, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.8266796494644596, | |
| "grad_norm": 0.6352884441285447, | |
| "learning_rate": 9.090601796939192e-06, | |
| "loss": 0.6315, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.8296007789678675, | |
| "grad_norm": 0.7031617950372325, | |
| "learning_rate": 9.080791753208553e-06, | |
| "loss": 0.6304, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.8325219084712756, | |
| "grad_norm": 0.6955969158961154, | |
| "learning_rate": 9.070934433517872e-06, | |
| "loss": 0.6371, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.8354430379746836, | |
| "grad_norm": 0.8508450672946094, | |
| "learning_rate": 9.061029952064165e-06, | |
| "loss": 0.6392, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.8383641674780915, | |
| "grad_norm": 0.6682143045272909, | |
| "learning_rate": 9.05107842359082e-06, | |
| "loss": 0.6354, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.8412852969814996, | |
| "grad_norm": 0.730935179569577, | |
| "learning_rate": 9.041079963386263e-06, | |
| "loss": 0.6365, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.8442064264849075, | |
| "grad_norm": 0.9284592753787645, | |
| "learning_rate": 9.031034687282627e-06, | |
| "loss": 0.6512, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.8471275559883155, | |
| "grad_norm": 0.5711740420462373, | |
| "learning_rate": 9.020942711654404e-06, | |
| "loss": 0.6253, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.8500486854917235, | |
| "grad_norm": 0.7521676041243963, | |
| "learning_rate": 9.0108041534171e-06, | |
| "loss": 0.6346, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.8529698149951315, | |
| "grad_norm": 0.7268819094436544, | |
| "learning_rate": 9.000619130025885e-06, | |
| "loss": 0.6321, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.8558909444985394, | |
| "grad_norm": 0.6255002973375909, | |
| "learning_rate": 8.99038775947422e-06, | |
| "loss": 0.6448, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.8588120740019474, | |
| "grad_norm": 0.5654580978730834, | |
| "learning_rate": 8.980110160292503e-06, | |
| "loss": 0.6546, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.8617332035053554, | |
| "grad_norm": 0.5744283394449378, | |
| "learning_rate": 8.969786451546691e-06, | |
| "loss": 0.6354, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.8646543330087634, | |
| "grad_norm": 0.5607277367845812, | |
| "learning_rate": 8.959416752836915e-06, | |
| "loss": 0.6315, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.8675754625121713, | |
| "grad_norm": 0.6883032069319117, | |
| "learning_rate": 8.949001184296107e-06, | |
| "loss": 0.6284, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.8704965920155794, | |
| "grad_norm": 0.5793320184156081, | |
| "learning_rate": 8.938539866588593e-06, | |
| "loss": 0.6299, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.8734177215189873, | |
| "grad_norm": 0.6122270930586687, | |
| "learning_rate": 8.928032920908709e-06, | |
| "loss": 0.6463, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.8763388510223953, | |
| "grad_norm": 0.6718320925279334, | |
| "learning_rate": 8.917480468979387e-06, | |
| "loss": 0.6405, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8792599805258033, | |
| "grad_norm": 0.6132181340646334, | |
| "learning_rate": 8.906882633050753e-06, | |
| "loss": 0.6426, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.8821811100292113, | |
| "grad_norm": 0.6206805066838101, | |
| "learning_rate": 8.896239535898702e-06, | |
| "loss": 0.65, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.8851022395326192, | |
| "grad_norm": 0.7456156741229341, | |
| "learning_rate": 8.885551300823483e-06, | |
| "loss": 0.6395, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.8880233690360273, | |
| "grad_norm": 0.6186279743363227, | |
| "learning_rate": 8.874818051648267e-06, | |
| "loss": 0.6236, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.8909444985394352, | |
| "grad_norm": 0.7192434268898347, | |
| "learning_rate": 8.864039912717713e-06, | |
| "loss": 0.6427, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.8938656280428432, | |
| "grad_norm": 0.6528244466221247, | |
| "learning_rate": 8.853217008896526e-06, | |
| "loss": 0.6478, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.8967867575462513, | |
| "grad_norm": 0.6240468261028331, | |
| "learning_rate": 8.842349465568018e-06, | |
| "loss": 0.6354, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.8997078870496592, | |
| "grad_norm": 0.6086442105860419, | |
| "learning_rate": 8.831437408632639e-06, | |
| "loss": 0.6175, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.9026290165530672, | |
| "grad_norm": 0.6206978097636743, | |
| "learning_rate": 8.820480964506542e-06, | |
| "loss": 0.6329, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.9055501460564752, | |
| "grad_norm": 0.67571865721595, | |
| "learning_rate": 8.809480260120096e-06, | |
| "loss": 0.6302, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.9084712755598832, | |
| "grad_norm": 0.612775453081801, | |
| "learning_rate": 8.798435422916425e-06, | |
| "loss": 0.6248, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.9113924050632911, | |
| "grad_norm": 0.5939423168299965, | |
| "learning_rate": 8.787346580849939e-06, | |
| "loss": 0.6274, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.9143135345666992, | |
| "grad_norm": 0.6578489213437513, | |
| "learning_rate": 8.776213862384838e-06, | |
| "loss": 0.6367, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.9172346640701071, | |
| "grad_norm": 0.7439730364901744, | |
| "learning_rate": 8.76503739649363e-06, | |
| "loss": 0.6584, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.9201557935735151, | |
| "grad_norm": 0.5758626313580656, | |
| "learning_rate": 8.753817312655642e-06, | |
| "loss": 0.638, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 0.7372145224476075, | |
| "learning_rate": 8.742553740855507e-06, | |
| "loss": 0.6391, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.9259980525803311, | |
| "grad_norm": 0.7004119821194315, | |
| "learning_rate": 8.73124681158167e-06, | |
| "loss": 0.6426, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.928919182083739, | |
| "grad_norm": 0.5393578063337247, | |
| "learning_rate": 8.719896655824878e-06, | |
| "loss": 0.6326, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.9318403115871471, | |
| "grad_norm": 0.5698772754745327, | |
| "learning_rate": 8.708503405076646e-06, | |
| "loss": 0.634, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.934761441090555, | |
| "grad_norm": 0.588256486663727, | |
| "learning_rate": 8.697067191327748e-06, | |
| "loss": 0.6328, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.937682570593963, | |
| "grad_norm": 0.6194561945270076, | |
| "learning_rate": 8.685588147066688e-06, | |
| "loss": 0.6303, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.9406037000973709, | |
| "grad_norm": 0.5910869397544244, | |
| "learning_rate": 8.67406640527816e-06, | |
| "loss": 0.6494, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.943524829600779, | |
| "grad_norm": 0.6055073823945398, | |
| "learning_rate": 8.662502099441505e-06, | |
| "loss": 0.6334, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.9464459591041869, | |
| "grad_norm": 0.5417594595582104, | |
| "learning_rate": 8.650895363529172e-06, | |
| "loss": 0.6279, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.9493670886075949, | |
| "grad_norm": 0.5878187442362004, | |
| "learning_rate": 8.639246332005163e-06, | |
| "loss": 0.639, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.952288218111003, | |
| "grad_norm": 0.5233625621707794, | |
| "learning_rate": 8.627555139823468e-06, | |
| "loss": 0.614, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.9552093476144109, | |
| "grad_norm": 0.5656952115933153, | |
| "learning_rate": 8.615821922426517e-06, | |
| "loss": 0.6214, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.9581304771178188, | |
| "grad_norm": 0.5806200502649499, | |
| "learning_rate": 8.604046815743598e-06, | |
| "loss": 0.6424, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.9610516066212269, | |
| "grad_norm": 0.5561534960958242, | |
| "learning_rate": 8.592229956189283e-06, | |
| "loss": 0.638, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.9639727361246349, | |
| "grad_norm": 0.5340425065767049, | |
| "learning_rate": 8.580371480661857e-06, | |
| "loss": 0.6238, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.9668938656280428, | |
| "grad_norm": 0.5501524304300768, | |
| "learning_rate": 8.568471526541721e-06, | |
| "loss": 0.6518, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.9698149951314509, | |
| "grad_norm": 0.571968183721703, | |
| "learning_rate": 8.556530231689809e-06, | |
| "loss": 0.6588, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.9727361246348588, | |
| "grad_norm": 0.5870934675467651, | |
| "learning_rate": 8.544547734445983e-06, | |
| "loss": 0.6441, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.9756572541382668, | |
| "grad_norm": 0.5633142366316923, | |
| "learning_rate": 8.532524173627438e-06, | |
| "loss": 0.6558, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.9785783836416748, | |
| "grad_norm": 0.6080503864783372, | |
| "learning_rate": 8.520459688527091e-06, | |
| "loss": 0.6239, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.9814995131450828, | |
| "grad_norm": 0.6010534017830508, | |
| "learning_rate": 8.508354418911966e-06, | |
| "loss": 0.6392, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.9844206426484907, | |
| "grad_norm": 0.6110941694269748, | |
| "learning_rate": 8.496208505021572e-06, | |
| "loss": 0.6334, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.9873417721518988, | |
| "grad_norm": 0.5748899254330844, | |
| "learning_rate": 8.484022087566284e-06, | |
| "loss": 0.6213, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.9902629016553067, | |
| "grad_norm": 0.5910067428214469, | |
| "learning_rate": 8.471795307725713e-06, | |
| "loss": 0.6313, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.9931840311587147, | |
| "grad_norm": 0.6409845798522262, | |
| "learning_rate": 8.459528307147066e-06, | |
| "loss": 0.6223, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9961051606621227, | |
| "grad_norm": 0.5798933129205326, | |
| "learning_rate": 8.447221227943507e-06, | |
| "loss": 0.6375, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.9990262901655307, | |
| "grad_norm": 0.5323356127721257, | |
| "learning_rate": 8.434874212692513e-06, | |
| "loss": 0.631, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.0019474196689386, | |
| "grad_norm": 1.1893567012708939, | |
| "learning_rate": 8.422487404434214e-06, | |
| "loss": 1.0195, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.0048685491723466, | |
| "grad_norm": 0.6455689118062985, | |
| "learning_rate": 8.41006094666975e-06, | |
| "loss": 0.5891, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.0077896786757545, | |
| "grad_norm": 0.6583690984771934, | |
| "learning_rate": 8.397594983359591e-06, | |
| "loss": 0.5549, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.0107108081791627, | |
| "grad_norm": 0.6011562108844719, | |
| "learning_rate": 8.385089658921892e-06, | |
| "loss": 0.4942, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.0136319376825706, | |
| "grad_norm": 0.7194629629820957, | |
| "learning_rate": 8.372545118230793e-06, | |
| "loss": 0.5879, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.0165530671859786, | |
| "grad_norm": 0.7106922347864785, | |
| "learning_rate": 8.35996150661476e-06, | |
| "loss": 0.6464, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.0194741966893865, | |
| "grad_norm": 0.7004876649145284, | |
| "learning_rate": 8.347338969854898e-06, | |
| "loss": 0.5635, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.0223953261927945, | |
| "grad_norm": 0.7176313477690597, | |
| "learning_rate": 8.334677654183254e-06, | |
| "loss": 0.6121, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.0253164556962024, | |
| "grad_norm": 0.6767419736782746, | |
| "learning_rate": 8.321977706281135e-06, | |
| "loss": 0.5923, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.0282375851996106, | |
| "grad_norm": 0.6606355795156919, | |
| "learning_rate": 8.309239273277394e-06, | |
| "loss": 0.5375, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.0311587147030186, | |
| "grad_norm": 0.7777491012749531, | |
| "learning_rate": 8.296462502746743e-06, | |
| "loss": 0.5971, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.0340798442064265, | |
| "grad_norm": 0.6239185790928177, | |
| "learning_rate": 8.283647542708026e-06, | |
| "loss": 0.6017, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.0370009737098345, | |
| "grad_norm": 0.659621365452411, | |
| "learning_rate": 8.27079454162252e-06, | |
| "loss": 0.523, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.0399221032132424, | |
| "grad_norm": 0.7314219237543246, | |
| "learning_rate": 8.2579036483922e-06, | |
| "loss": 0.5992, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.0428432327166504, | |
| "grad_norm": 0.6317967581739343, | |
| "learning_rate": 8.244975012358028e-06, | |
| "loss": 0.58, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.0457643622200585, | |
| "grad_norm": 0.709552964482946, | |
| "learning_rate": 8.232008783298211e-06, | |
| "loss": 0.5703, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.0486854917234665, | |
| "grad_norm": 0.5856088171700015, | |
| "learning_rate": 8.219005111426472e-06, | |
| "loss": 0.5851, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.0516066212268744, | |
| "grad_norm": 0.72335786152197, | |
| "learning_rate": 8.205964147390313e-06, | |
| "loss": 0.5762, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.0545277507302824, | |
| "grad_norm": 0.6082315528170907, | |
| "learning_rate": 8.19288604226926e-06, | |
| "loss": 0.6045, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.0574488802336903, | |
| "grad_norm": 0.6904542946382585, | |
| "learning_rate": 8.179770947573124e-06, | |
| "loss": 0.5649, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.0603700097370983, | |
| "grad_norm": 0.5889683647522176, | |
| "learning_rate": 8.166619015240236e-06, | |
| "loss": 0.5681, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.0632911392405062, | |
| "grad_norm": 0.5621253187560026, | |
| "learning_rate": 8.15343039763569e-06, | |
| "loss": 0.6013, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.0662122687439144, | |
| "grad_norm": 0.5587583825073225, | |
| "learning_rate": 8.140205247549583e-06, | |
| "loss": 0.6026, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.0691333982473223, | |
| "grad_norm": 0.5053935078299595, | |
| "learning_rate": 8.126943718195239e-06, | |
| "loss": 0.5693, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.0720545277507303, | |
| "grad_norm": 0.5469581161571481, | |
| "learning_rate": 8.113645963207432e-06, | |
| "loss": 0.6007, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.0749756572541382, | |
| "grad_norm": 0.5430360387939689, | |
| "learning_rate": 8.100312136640618e-06, | |
| "loss": 0.5754, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.0778967867575462, | |
| "grad_norm": 0.529838189847217, | |
| "learning_rate": 8.086942392967131e-06, | |
| "loss": 0.5758, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.0808179162609541, | |
| "grad_norm": 0.5537166527008645, | |
| "learning_rate": 8.073536887075417e-06, | |
| "loss": 0.5596, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.0837390457643623, | |
| "grad_norm": 0.575683387457894, | |
| "learning_rate": 8.060095774268217e-06, | |
| "loss": 0.6149, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.0866601752677703, | |
| "grad_norm": 0.5212823944156575, | |
| "learning_rate": 8.046619210260785e-06, | |
| "loss": 0.5376, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.0895813047711782, | |
| "grad_norm": 0.6132577983715938, | |
| "learning_rate": 8.03310735117907e-06, | |
| "loss": 0.611, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.0925024342745862, | |
| "grad_norm": 0.5741650450345563, | |
| "learning_rate": 8.019560353557923e-06, | |
| "loss": 0.5428, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.095423563777994, | |
| "grad_norm": 0.6898994227426227, | |
| "learning_rate": 8.005978374339264e-06, | |
| "loss": 0.6122, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.098344693281402, | |
| "grad_norm": 0.6195159909025971, | |
| "learning_rate": 7.992361570870289e-06, | |
| "loss": 0.6298, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.1012658227848102, | |
| "grad_norm": 0.5592176863621418, | |
| "learning_rate": 7.978710100901617e-06, | |
| "loss": 0.527, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.1041869522882182, | |
| "grad_norm": 0.6077274644298606, | |
| "learning_rate": 7.965024122585491e-06, | |
| "loss": 0.5733, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.1071080817916261, | |
| "grad_norm": 0.5404386498094536, | |
| "learning_rate": 7.951303794473926e-06, | |
| "loss": 0.5786, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.110029211295034, | |
| "grad_norm": 0.6285033579389189, | |
| "learning_rate": 7.937549275516882e-06, | |
| "loss": 0.5593, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.112950340798442, | |
| "grad_norm": 0.6136636789874864, | |
| "learning_rate": 7.92376072506042e-06, | |
| "loss": 0.5887, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.11587147030185, | |
| "grad_norm": 0.5563073653338333, | |
| "learning_rate": 7.909938302844856e-06, | |
| "loss": 0.637, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.1187925998052581, | |
| "grad_norm": 0.4984935085580574, | |
| "learning_rate": 7.896082169002903e-06, | |
| "loss": 0.537, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.121713729308666, | |
| "grad_norm": 0.5688731697204966, | |
| "learning_rate": 7.882192484057837e-06, | |
| "loss": 0.5977, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.124634858812074, | |
| "grad_norm": 0.5409938929481367, | |
| "learning_rate": 7.868269408921614e-06, | |
| "loss": 0.5477, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.127555988315482, | |
| "grad_norm": 0.6311630545441865, | |
| "learning_rate": 7.854313104893014e-06, | |
| "loss": 0.5595, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.13047711781889, | |
| "grad_norm": 0.5517528259351719, | |
| "learning_rate": 7.84032373365578e-06, | |
| "loss": 0.5588, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.1333982473222979, | |
| "grad_norm": 0.515690277716415, | |
| "learning_rate": 7.826301457276733e-06, | |
| "loss": 0.5767, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.136319376825706, | |
| "grad_norm": 0.5872560884226455, | |
| "learning_rate": 7.812246438203905e-06, | |
| "loss": 0.5618, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.139240506329114, | |
| "grad_norm": 0.5449550621943328, | |
| "learning_rate": 7.798158839264645e-06, | |
| "loss": 0.5373, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.142161635832522, | |
| "grad_norm": 0.6192261787272578, | |
| "learning_rate": 7.784038823663746e-06, | |
| "loss": 0.6117, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.14508276533593, | |
| "grad_norm": 0.5911797274948096, | |
| "learning_rate": 7.769886554981549e-06, | |
| "loss": 0.5832, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.1480038948393378, | |
| "grad_norm": 0.5393860544929336, | |
| "learning_rate": 7.755702197172036e-06, | |
| "loss": 0.5511, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.1509250243427458, | |
| "grad_norm": 0.5686675548722897, | |
| "learning_rate": 7.741485914560958e-06, | |
| "loss": 0.601, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.1538461538461537, | |
| "grad_norm": 0.5965421717249094, | |
| "learning_rate": 7.7272378718439e-06, | |
| "loss": 0.6142, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.156767283349562, | |
| "grad_norm": 0.5976378598496599, | |
| "learning_rate": 7.712958234084395e-06, | |
| "loss": 0.532, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.1596884128529699, | |
| "grad_norm": 0.5866514447746561, | |
| "learning_rate": 7.698647166712003e-06, | |
| "loss": 0.6436, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.1626095423563778, | |
| "grad_norm": 0.5577115650955226, | |
| "learning_rate": 7.684304835520395e-06, | |
| "loss": 0.5524, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.1655306718597858, | |
| "grad_norm": 0.6320330036922427, | |
| "learning_rate": 7.669931406665437e-06, | |
| "loss": 0.631, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.1684518013631937, | |
| "grad_norm": 0.5274101108563934, | |
| "learning_rate": 7.655527046663254e-06, | |
| "loss": 0.5369, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.1713729308666017, | |
| "grad_norm": 0.5563468540404465, | |
| "learning_rate": 7.641091922388316e-06, | |
| "loss": 0.577, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.1742940603700098, | |
| "grad_norm": 0.5043411412651241, | |
| "learning_rate": 7.626626201071494e-06, | |
| "loss": 0.5623, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.1772151898734178, | |
| "grad_norm": 0.5658997847969963, | |
| "learning_rate": 7.612130050298126e-06, | |
| "loss": 0.5613, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.1801363193768257, | |
| "grad_norm": 0.5617715925867288, | |
| "learning_rate": 7.597603638006071e-06, | |
| "loss": 0.5796, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.1830574488802337, | |
| "grad_norm": 0.5730674124102592, | |
| "learning_rate": 7.5830471324837765e-06, | |
| "loss": 0.6102, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.1859785783836416, | |
| "grad_norm": 0.5322280548113558, | |
| "learning_rate": 7.56846070236831e-06, | |
| "loss": 0.5392, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.1888997078870496, | |
| "grad_norm": 0.5636446588596294, | |
| "learning_rate": 7.55384451664342e-06, | |
| "loss": 0.5805, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.1918208373904577, | |
| "grad_norm": 0.5515192754192108, | |
| "learning_rate": 7.539198744637577e-06, | |
| "loss": 0.5647, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.1947419668938657, | |
| "grad_norm": 0.649611846767762, | |
| "learning_rate": 7.524523556022003e-06, | |
| "loss": 0.5804, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.1976630963972736, | |
| "grad_norm": 0.5400876783820088, | |
| "learning_rate": 7.5098191208087144e-06, | |
| "loss": 0.5295, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.2005842259006816, | |
| "grad_norm": 0.727016701945529, | |
| "learning_rate": 7.495085609348549e-06, | |
| "loss": 0.6035, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.2035053554040895, | |
| "grad_norm": 0.6474638029028308, | |
| "learning_rate": 7.4803231923291905e-06, | |
| "loss": 0.5905, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.2064264849074975, | |
| "grad_norm": 0.5322418094330414, | |
| "learning_rate": 7.465532040773195e-06, | |
| "loss": 0.5696, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.2093476144109054, | |
| "grad_norm": 0.6033615790880061, | |
| "learning_rate": 7.45071232603601e-06, | |
| "loss": 0.5495, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.2122687439143136, | |
| "grad_norm": 0.5696185950396394, | |
| "learning_rate": 7.4358642198039835e-06, | |
| "loss": 0.5761, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.2151898734177216, | |
| "grad_norm": 0.58236484440868, | |
| "learning_rate": 7.420987894092383e-06, | |
| "loss": 0.6225, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.2181110029211295, | |
| "grad_norm": 0.6023725223738031, | |
| "learning_rate": 7.406083521243396e-06, | |
| "loss": 0.5539, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.2210321324245375, | |
| "grad_norm": 0.5964833664567649, | |
| "learning_rate": 7.391151273924135e-06, | |
| "loss": 0.5766, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.2239532619279454, | |
| "grad_norm": 0.5579327351939204, | |
| "learning_rate": 7.376191325124644e-06, | |
| "loss": 0.6037, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.2268743914313536, | |
| "grad_norm": 0.662691054067833, | |
| "learning_rate": 7.36120384815588e-06, | |
| "loss": 0.5775, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.2297955209347615, | |
| "grad_norm": 0.5457870053427097, | |
| "learning_rate": 7.34618901664772e-06, | |
| "loss": 0.6032, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.2327166504381695, | |
| "grad_norm": 0.528437147629629, | |
| "learning_rate": 7.33114700454694e-06, | |
| "loss": 0.5551, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.2356377799415774, | |
| "grad_norm": 0.6396983335282087, | |
| "learning_rate": 7.316077986115206e-06, | |
| "loss": 0.5546, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.2385589094449854, | |
| "grad_norm": 0.6059608136412865, | |
| "learning_rate": 7.300982135927051e-06, | |
| "loss": 0.5889, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.2414800389483933, | |
| "grad_norm": 0.5395833042938699, | |
| "learning_rate": 7.285859628867851e-06, | |
| "loss": 0.5765, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.2444011684518013, | |
| "grad_norm": 0.5938150264550871, | |
| "learning_rate": 7.270710640131806e-06, | |
| "loss": 0.6058, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.2473222979552094, | |
| "grad_norm": 0.5435408340953944, | |
| "learning_rate": 7.255535345219905e-06, | |
| "loss": 0.5933, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.2502434274586174, | |
| "grad_norm": 0.5055763559518435, | |
| "learning_rate": 7.240333919937893e-06, | |
| "loss": 0.5989, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.2531645569620253, | |
| "grad_norm": 0.5862621574043387, | |
| "learning_rate": 7.2251065403942355e-06, | |
| "loss": 0.5888, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.2560856864654333, | |
| "grad_norm": 0.5234746081250162, | |
| "learning_rate": 7.209853382998077e-06, | |
| "loss": 0.5537, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.2590068159688412, | |
| "grad_norm": 0.5736429396541277, | |
| "learning_rate": 7.1945746244572e-06, | |
| "loss": 0.547, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.2619279454722494, | |
| "grad_norm": 0.5662950178895197, | |
| "learning_rate": 7.179270441775976e-06, | |
| "loss": 0.6502, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.2648490749756571, | |
| "grad_norm": 0.6193942417064999, | |
| "learning_rate": 7.163941012253317e-06, | |
| "loss": 0.5182, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.2677702044790653, | |
| "grad_norm": 0.5954433533450075, | |
| "learning_rate": 7.148586513480614e-06, | |
| "loss": 0.5961, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.2706913339824732, | |
| "grad_norm": 0.5407062847357796, | |
| "learning_rate": 7.133207123339689e-06, | |
| "loss": 0.5475, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.2736124634858812, | |
| "grad_norm": 0.6133121400132443, | |
| "learning_rate": 7.117803020000733e-06, | |
| "loss": 0.59, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.2765335929892891, | |
| "grad_norm": 0.5272070801107334, | |
| "learning_rate": 7.102374381920233e-06, | |
| "loss": 0.542, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.279454722492697, | |
| "grad_norm": 0.6014559091825684, | |
| "learning_rate": 7.086921387838916e-06, | |
| "loss": 0.6136, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.2823758519961053, | |
| "grad_norm": 0.4925204082847554, | |
| "learning_rate": 7.071444216779669e-06, | |
| "loss": 0.5707, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.2852969814995132, | |
| "grad_norm": 0.5297523991584766, | |
| "learning_rate": 7.055943048045476e-06, | |
| "loss": 0.5917, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.2882181110029212, | |
| "grad_norm": 0.5742994575633306, | |
| "learning_rate": 7.040418061217325e-06, | |
| "loss": 0.6161, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.2911392405063291, | |
| "grad_norm": 0.5144128093922836, | |
| "learning_rate": 7.024869436152144e-06, | |
| "loss": 0.5481, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.294060370009737, | |
| "grad_norm": 0.5651127309438425, | |
| "learning_rate": 7.009297352980706e-06, | |
| "loss": 0.5789, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.296981499513145, | |
| "grad_norm": 0.46327530559493646, | |
| "learning_rate": 6.99370199210555e-06, | |
| "loss": 0.5435, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.299902629016553, | |
| "grad_norm": 0.5306445037454199, | |
| "learning_rate": 6.978083534198878e-06, | |
| "loss": 0.6135, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.3028237585199611, | |
| "grad_norm": 0.6551321587944974, | |
| "learning_rate": 6.962442160200484e-06, | |
| "loss": 0.6111, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.305744888023369, | |
| "grad_norm": 0.5887843505365383, | |
| "learning_rate": 6.9467780513156335e-06, | |
| "loss": 0.5428, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.308666017526777, | |
| "grad_norm": 0.5871163724329924, | |
| "learning_rate": 6.931091389012983e-06, | |
| "loss": 0.5595, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.311587147030185, | |
| "grad_norm": 0.6876137396335796, | |
| "learning_rate": 6.915382355022465e-06, | |
| "loss": 0.5776, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.314508276533593, | |
| "grad_norm": 0.5978349360901288, | |
| "learning_rate": 6.899651131333194e-06, | |
| "loss": 0.559, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.317429406037001, | |
| "grad_norm": 0.5600538501243326, | |
| "learning_rate": 6.8838979001913454e-06, | |
| "loss": 0.5782, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.3203505355404088, | |
| "grad_norm": 0.6670604315541686, | |
| "learning_rate": 6.868122844098057e-06, | |
| "loss": 0.6669, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.323271665043817, | |
| "grad_norm": 0.6019460261148103, | |
| "learning_rate": 6.852326145807302e-06, | |
| "loss": 0.6006, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.326192794547225, | |
| "grad_norm": 0.4932938142869286, | |
| "learning_rate": 6.836507988323785e-06, | |
| "loss": 0.4971, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.3291139240506329, | |
| "grad_norm": 0.6389612562939274, | |
| "learning_rate": 6.82066855490081e-06, | |
| "loss": 0.5994, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.3320350535540408, | |
| "grad_norm": 0.569391901245458, | |
| "learning_rate": 6.804808029038168e-06, | |
| "loss": 0.5776, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.3349561830574488, | |
| "grad_norm": 0.5627619450291675, | |
| "learning_rate": 6.788926594480001e-06, | |
| "loss": 0.5894, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.337877312560857, | |
| "grad_norm": 0.5525138776130833, | |
| "learning_rate": 6.773024435212678e-06, | |
| "loss": 0.5507, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.340798442064265, | |
| "grad_norm": 0.6243616187017386, | |
| "learning_rate": 6.75710173546267e-06, | |
| "loss": 0.6052, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.3437195715676729, | |
| "grad_norm": 0.46296070627628405, | |
| "learning_rate": 6.741158679694403e-06, | |
| "loss": 0.5284, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.3466407010710808, | |
| "grad_norm": 0.556033009188379, | |
| "learning_rate": 6.7251954526081294e-06, | |
| "loss": 0.6225, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.3495618305744888, | |
| "grad_norm": 0.5425579804205766, | |
| "learning_rate": 6.709212239137785e-06, | |
| "loss": 0.5919, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.352482960077897, | |
| "grad_norm": 0.5628452971401441, | |
| "learning_rate": 6.693209224448853e-06, | |
| "loss": 0.6162, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.3554040895813046, | |
| "grad_norm": 0.5283138648819659, | |
| "learning_rate": 6.677186593936207e-06, | |
| "loss": 0.5022, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.3583252190847128, | |
| "grad_norm": 0.5688480741205626, | |
| "learning_rate": 6.661144533221974e-06, | |
| "loss": 0.594, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.3612463485881208, | |
| "grad_norm": 0.6112831896983953, | |
| "learning_rate": 6.645083228153377e-06, | |
| "loss": 0.5803, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.3641674780915287, | |
| "grad_norm": 0.5385606514191121, | |
| "learning_rate": 6.629002864800589e-06, | |
| "loss": 0.5834, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.3670886075949367, | |
| "grad_norm": 0.5366278285884805, | |
| "learning_rate": 6.612903629454568e-06, | |
| "loss": 0.5343, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.3700097370983446, | |
| "grad_norm": 0.6713338764048196, | |
| "learning_rate": 6.5967857086249065e-06, | |
| "loss": 0.5573, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.3729308666017528, | |
| "grad_norm": 0.5538883029206723, | |
| "learning_rate": 6.58064928903767e-06, | |
| "loss": 0.6164, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.3758519961051607, | |
| "grad_norm": 0.5005412280561692, | |
| "learning_rate": 6.56449455763323e-06, | |
| "loss": 0.5576, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.3787731256085687, | |
| "grad_norm": 0.5868063901075268, | |
| "learning_rate": 6.548321701564099e-06, | |
| "loss": 0.5933, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.3816942551119766, | |
| "grad_norm": 0.5499777044713934, | |
| "learning_rate": 6.5321309081927665e-06, | |
| "loss": 0.562, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.3846153846153846, | |
| "grad_norm": 0.5209457126764143, | |
| "learning_rate": 6.515922365089524e-06, | |
| "loss": 0.5793, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.3875365141187925, | |
| "grad_norm": 0.5045261657801648, | |
| "learning_rate": 6.499696260030297e-06, | |
| "loss": 0.594, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.3904576436222005, | |
| "grad_norm": 0.5243175331043878, | |
| "learning_rate": 6.483452780994459e-06, | |
| "loss": 0.534, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.3933787731256086, | |
| "grad_norm": 0.5169962119701911, | |
| "learning_rate": 6.467192116162668e-06, | |
| "loss": 0.5403, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.3962999026290166, | |
| "grad_norm": 0.5140189723701601, | |
| "learning_rate": 6.450914453914674e-06, | |
| "loss": 0.6058, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.3992210321324245, | |
| "grad_norm": 0.5330866177837568, | |
| "learning_rate": 6.434619982827147e-06, | |
| "loss": 0.5882, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.4021421616358325, | |
| "grad_norm": 0.5414814038608168, | |
| "learning_rate": 6.418308891671484e-06, | |
| "loss": 0.5954, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.4050632911392404, | |
| "grad_norm": 0.5376659643418761, | |
| "learning_rate": 6.401981369411626e-06, | |
| "loss": 0.5633, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.4079844206426486, | |
| "grad_norm": 0.5438938648229659, | |
| "learning_rate": 6.385637605201871e-06, | |
| "loss": 0.5677, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.4109055501460563, | |
| "grad_norm": 0.533597042074437, | |
| "learning_rate": 6.3692777883846746e-06, | |
| "loss": 0.6217, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.4138266796494645, | |
| "grad_norm": 0.5530416412659575, | |
| "learning_rate": 6.3529021084884655e-06, | |
| "loss": 0.5135, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.4167478091528725, | |
| "grad_norm": 0.6426214488830264, | |
| "learning_rate": 6.336510755225447e-06, | |
| "loss": 0.6039, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.4196689386562804, | |
| "grad_norm": 0.5321542466408685, | |
| "learning_rate": 6.320103918489395e-06, | |
| "loss": 0.5615, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.4225900681596884, | |
| "grad_norm": 0.5807878117878842, | |
| "learning_rate": 6.303681788353465e-06, | |
| "loss": 0.6417, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.4255111976630963, | |
| "grad_norm": 0.6006151574662654, | |
| "learning_rate": 6.287244555067984e-06, | |
| "loss": 0.5794, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.4284323271665045, | |
| "grad_norm": 0.5126167984608686, | |
| "learning_rate": 6.270792409058247e-06, | |
| "loss": 0.5242, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.4313534566699124, | |
| "grad_norm": 0.509995152360523, | |
| "learning_rate": 6.25432554092232e-06, | |
| "loss": 0.5828, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.4342745861733204, | |
| "grad_norm": 0.5960222110178056, | |
| "learning_rate": 6.237844141428817e-06, | |
| "loss": 0.568, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.4371957156767283, | |
| "grad_norm": 0.5173153132658744, | |
| "learning_rate": 6.221348401514703e-06, | |
| "loss": 0.5796, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.4401168451801363, | |
| "grad_norm": 0.5671800858222442, | |
| "learning_rate": 6.204838512283073e-06, | |
| "loss": 0.5506, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.4430379746835442, | |
| "grad_norm": 0.6040430910549448, | |
| "learning_rate": 6.188314665000944e-06, | |
| "loss": 0.5444, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.4459591041869522, | |
| "grad_norm": 0.5391742690410167, | |
| "learning_rate": 6.171777051097037e-06, | |
| "loss": 0.5649, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.4488802336903603, | |
| "grad_norm": 0.597749185130725, | |
| "learning_rate": 6.155225862159558e-06, | |
| "loss": 0.6408, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.4518013631937683, | |
| "grad_norm": 0.5229846439398993, | |
| "learning_rate": 6.138661289933981e-06, | |
| "loss": 0.5494, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.4547224926971762, | |
| "grad_norm": 0.5920796120572965, | |
| "learning_rate": 6.1220835263208256e-06, | |
| "loss": 0.5992, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.4576436222005842, | |
| "grad_norm": 0.5775415962111137, | |
| "learning_rate": 6.105492763373431e-06, | |
| "loss": 0.5911, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.4605647517039921, | |
| "grad_norm": 0.5917654314551983, | |
| "learning_rate": 6.088889193295738e-06, | |
| "loss": 0.5643, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.4634858812074003, | |
| "grad_norm": 0.5199838430286094, | |
| "learning_rate": 6.072273008440052e-06, | |
| "loss": 0.5472, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.4664070107108083, | |
| "grad_norm": 0.5522718232673819, | |
| "learning_rate": 6.0556444013048265e-06, | |
| "loss": 0.546, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.4693281402142162, | |
| "grad_norm": 0.525908351198303, | |
| "learning_rate": 6.039003564532423e-06, | |
| "loss": 0.6015, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.4722492697176242, | |
| "grad_norm": 0.5105926082337128, | |
| "learning_rate": 6.0223506909068875e-06, | |
| "loss": 0.5785, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.475170399221032, | |
| "grad_norm": 0.5832929477157227, | |
| "learning_rate": 6.005685973351708e-06, | |
| "loss": 0.6027, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.47809152872444, | |
| "grad_norm": 0.5447366515820563, | |
| "learning_rate": 5.989009604927587e-06, | |
| "loss": 0.5833, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.481012658227848, | |
| "grad_norm": 0.5245433114840506, | |
| "learning_rate": 5.972321778830202e-06, | |
| "loss": 0.4944, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.4839337877312562, | |
| "grad_norm": 0.5377622062276557, | |
| "learning_rate": 5.9556226883879685e-06, | |
| "loss": 0.5518, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.4868549172346641, | |
| "grad_norm": 0.6351508971275112, | |
| "learning_rate": 5.938912527059798e-06, | |
| "loss": 0.5982, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.489776046738072, | |
| "grad_norm": 0.5264532680548314, | |
| "learning_rate": 5.922191488432857e-06, | |
| "loss": 0.5472, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.49269717624148, | |
| "grad_norm": 0.5618733075790842, | |
| "learning_rate": 5.90545976622033e-06, | |
| "loss": 0.6387, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.495618305744888, | |
| "grad_norm": 0.5042404211846953, | |
| "learning_rate": 5.888717554259165e-06, | |
| "loss": 0.5188, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.4985394352482961, | |
| "grad_norm": 0.5996827758246558, | |
| "learning_rate": 5.871965046507838e-06, | |
| "loss": 0.5845, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.5014605647517039, | |
| "grad_norm": 0.513160385214818, | |
| "learning_rate": 5.855202437044102e-06, | |
| "loss": 0.5407, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.504381694255112, | |
| "grad_norm": 0.49930093522201097, | |
| "learning_rate": 5.838429920062734e-06, | |
| "loss": 0.5888, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.50730282375852, | |
| "grad_norm": 0.5820994183473087, | |
| "learning_rate": 5.8216476898732935e-06, | |
| "loss": 0.6026, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.510223953261928, | |
| "grad_norm": 0.5965529197255538, | |
| "learning_rate": 5.804855940897866e-06, | |
| "loss": 0.5772, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.5131450827653359, | |
| "grad_norm": 0.4891063083201174, | |
| "learning_rate": 5.788054867668811e-06, | |
| "loss": 0.5383, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.5160662122687438, | |
| "grad_norm": 0.549598064227309, | |
| "learning_rate": 5.771244664826512e-06, | |
| "loss": 0.5701, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.518987341772152, | |
| "grad_norm": 0.5201662235289578, | |
| "learning_rate": 5.754425527117118e-06, | |
| "loss": 0.5221, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.5219084712755597, | |
| "grad_norm": 0.5727695969794172, | |
| "learning_rate": 5.737597649390288e-06, | |
| "loss": 0.5849, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.524829600778968, | |
| "grad_norm": 0.5387267148344115, | |
| "learning_rate": 5.720761226596935e-06, | |
| "loss": 0.5899, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.5277507302823758, | |
| "grad_norm": 0.5196417961644373, | |
| "learning_rate": 5.703916453786965e-06, | |
| "loss": 0.6075, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.5306718597857838, | |
| "grad_norm": 0.5162526391071218, | |
| "learning_rate": 5.6870635261070186e-06, | |
| "loss": 0.5524, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.533592989289192, | |
| "grad_norm": 0.5712502081281199, | |
| "learning_rate": 5.670202638798213e-06, | |
| "loss": 0.5639, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.5365141187925997, | |
| "grad_norm": 0.569067876773126, | |
| "learning_rate": 5.653333987193876e-06, | |
| "loss": 0.5437, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.5394352482960079, | |
| "grad_norm": 0.5925750045532109, | |
| "learning_rate": 5.636457766717286e-06, | |
| "loss": 0.591, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.5423563777994158, | |
| "grad_norm": 0.5141531017914309, | |
| "learning_rate": 5.619574172879405e-06, | |
| "loss": 0.5471, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.5452775073028238, | |
| "grad_norm": 0.6597290642553237, | |
| "learning_rate": 5.6026834012766155e-06, | |
| "loss": 0.5901, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.5481986368062317, | |
| "grad_norm": 0.5884715259486929, | |
| "learning_rate": 5.585785647588458e-06, | |
| "loss": 0.5878, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.5511197663096397, | |
| "grad_norm": 0.5255721650946908, | |
| "learning_rate": 5.568881107575353e-06, | |
| "loss": 0.5447, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.5540408958130478, | |
| "grad_norm": 0.5091097959641512, | |
| "learning_rate": 5.55196997707635e-06, | |
| "loss": 0.6276, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.5569620253164556, | |
| "grad_norm": 0.5119232421542075, | |
| "learning_rate": 5.53505245200684e-06, | |
| "loss": 0.5658, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.5598831548198637, | |
| "grad_norm": 0.556141615427262, | |
| "learning_rate": 5.518128728356303e-06, | |
| "loss": 0.5951, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.5628042843232717, | |
| "grad_norm": 0.4969794250430925, | |
| "learning_rate": 5.501199002186024e-06, | |
| "loss": 0.5457, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.5657254138266796, | |
| "grad_norm": 0.5377430121732353, | |
| "learning_rate": 5.48426346962683e-06, | |
| "loss": 0.587, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.5686465433300878, | |
| "grad_norm": 0.4763376786579273, | |
| "learning_rate": 5.467322326876813e-06, | |
| "loss": 0.5352, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.5715676728334955, | |
| "grad_norm": 0.5630031793193719, | |
| "learning_rate": 5.450375770199063e-06, | |
| "loss": 0.652, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.5744888023369037, | |
| "grad_norm": 0.505233569384271, | |
| "learning_rate": 5.433423995919383e-06, | |
| "loss": 0.5633, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.5774099318403116, | |
| "grad_norm": 0.4918114442897718, | |
| "learning_rate": 5.416467200424032e-06, | |
| "loss": 0.5422, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.5803310613437196, | |
| "grad_norm": 0.5991334895146722, | |
| "learning_rate": 5.399505580157428e-06, | |
| "loss": 0.558, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.5832521908471275, | |
| "grad_norm": 0.5363474139996335, | |
| "learning_rate": 5.382539331619896e-06, | |
| "loss": 0.5699, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.5861733203505355, | |
| "grad_norm": 0.5807115654118472, | |
| "learning_rate": 5.365568651365369e-06, | |
| "loss": 0.5688, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.5890944498539437, | |
| "grad_norm": 0.5904366045058206, | |
| "learning_rate": 5.34859373599913e-06, | |
| "loss": 0.5969, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.5920155793573514, | |
| "grad_norm": 0.491932507277215, | |
| "learning_rate": 5.33161478217552e-06, | |
| "loss": 0.5542, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.5949367088607596, | |
| "grad_norm": 0.5011301426277018, | |
| "learning_rate": 5.314631986595669e-06, | |
| "loss": 0.569, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.5978578383641675, | |
| "grad_norm": 0.6106059864005979, | |
| "learning_rate": 5.297645546005208e-06, | |
| "loss": 0.6051, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.6007789678675755, | |
| "grad_norm": 0.5958177689979132, | |
| "learning_rate": 5.280655657192003e-06, | |
| "loss": 0.5266, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.6037000973709834, | |
| "grad_norm": 0.5395470595824067, | |
| "learning_rate": 5.263662516983863e-06, | |
| "loss": 0.5657, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.6066212268743914, | |
| "grad_norm": 0.6212970783782318, | |
| "learning_rate": 5.246666322246267e-06, | |
| "loss": 0.5619, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.6095423563777995, | |
| "grad_norm": 0.6613037032143712, | |
| "learning_rate": 5.229667269880078e-06, | |
| "loss": 0.6092, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.6124634858812072, | |
| "grad_norm": 0.48332526807644743, | |
| "learning_rate": 5.212665556819264e-06, | |
| "loss": 0.5401, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.6153846153846154, | |
| "grad_norm": 0.5325857980417065, | |
| "learning_rate": 5.195661380028625e-06, | |
| "loss": 0.6035, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.6183057448880234, | |
| "grad_norm": 0.5442785714221408, | |
| "learning_rate": 5.178654936501492e-06, | |
| "loss": 0.5494, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.6212268743914313, | |
| "grad_norm": 0.6437807568653632, | |
| "learning_rate": 5.1616464232574635e-06, | |
| "loss": 0.6254, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.6241480038948395, | |
| "grad_norm": 0.4492125406814553, | |
| "learning_rate": 5.1446360373401125e-06, | |
| "loss": 0.5301, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.6270691333982472, | |
| "grad_norm": 0.5622471364004906, | |
| "learning_rate": 5.127623975814709e-06, | |
| "loss": 0.5715, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.6299902629016554, | |
| "grad_norm": 0.6240851711756873, | |
| "learning_rate": 5.110610435765935e-06, | |
| "loss": 0.5876, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.6329113924050633, | |
| "grad_norm": 0.4713533254066173, | |
| "learning_rate": 5.093595614295599e-06, | |
| "loss": 0.4901, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.6358325219084713, | |
| "grad_norm": 0.5796358881475803, | |
| "learning_rate": 5.076579708520355e-06, | |
| "loss": 0.6325, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.6387536514118792, | |
| "grad_norm": 0.4866837205814519, | |
| "learning_rate": 5.059562915569424e-06, | |
| "loss": 0.5202, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.6416747809152872, | |
| "grad_norm": 0.5697352103252339, | |
| "learning_rate": 5.0425454325822946e-06, | |
| "loss": 0.6339, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.6445959104186954, | |
| "grad_norm": 0.5721771538016244, | |
| "learning_rate": 5.0255274567064594e-06, | |
| "loss": 0.5364, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.647517039922103, | |
| "grad_norm": 0.5605730886016151, | |
| "learning_rate": 5.008509185095114e-06, | |
| "loss": 0.5717, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.6504381694255112, | |
| "grad_norm": 0.472681806190808, | |
| "learning_rate": 4.991490814904888e-06, | |
| "loss": 0.555, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.6533592989289192, | |
| "grad_norm": 0.5791761573446532, | |
| "learning_rate": 4.974472543293544e-06, | |
| "loss": 0.6077, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.6562804284323271, | |
| "grad_norm": 0.5167447517838185, | |
| "learning_rate": 4.957454567417708e-06, | |
| "loss": 0.5805, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.6592015579357353, | |
| "grad_norm": 0.5561818381815233, | |
| "learning_rate": 4.940437084430579e-06, | |
| "loss": 0.594, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.662122687439143, | |
| "grad_norm": 0.44095968784824047, | |
| "learning_rate": 4.923420291479646e-06, | |
| "loss": 0.5235, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.6650438169425512, | |
| "grad_norm": 0.49918372558187907, | |
| "learning_rate": 4.906404385704402e-06, | |
| "loss": 0.6011, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.667964946445959, | |
| "grad_norm": 0.5067863457643954, | |
| "learning_rate": 4.8893895642340665e-06, | |
| "loss": 0.5415, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.6708860759493671, | |
| "grad_norm": 0.49737844087035643, | |
| "learning_rate": 4.872376024185291e-06, | |
| "loss": 0.583, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.673807205452775, | |
| "grad_norm": 0.4659962080708305, | |
| "learning_rate": 4.855363962659889e-06, | |
| "loss": 0.5627, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.676728334956183, | |
| "grad_norm": 0.4972582768404136, | |
| "learning_rate": 4.838353576742538e-06, | |
| "loss": 0.5911, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.6796494644595912, | |
| "grad_norm": 0.44592731268572705, | |
| "learning_rate": 4.82134506349851e-06, | |
| "loss": 0.5459, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.682570593962999, | |
| "grad_norm": 0.506185991053395, | |
| "learning_rate": 4.804338619971377e-06, | |
| "loss": 0.5306, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.685491723466407, | |
| "grad_norm": 0.5177923049058465, | |
| "learning_rate": 4.787334443180735e-06, | |
| "loss": 0.6209, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.688412852969815, | |
| "grad_norm": 0.4601309987230691, | |
| "learning_rate": 4.7703327301199244e-06, | |
| "loss": 0.5145, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.691333982473223, | |
| "grad_norm": 0.4643978865329562, | |
| "learning_rate": 4.753333677753734e-06, | |
| "loss": 0.5788, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.694255111976631, | |
| "grad_norm": 0.5283219640600681, | |
| "learning_rate": 4.736337483016138e-06, | |
| "loss": 0.6056, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.6971762414800389, | |
| "grad_norm": 0.5571423915108132, | |
| "learning_rate": 4.719344342808e-06, | |
| "loss": 0.5791, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.700097370983447, | |
| "grad_norm": 0.49563030247464135, | |
| "learning_rate": 4.702354453994794e-06, | |
| "loss": 0.5373, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.7030185004868548, | |
| "grad_norm": 0.49902926183967655, | |
| "learning_rate": 4.6853680134043345e-06, | |
| "loss": 0.5823, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.705939629990263, | |
| "grad_norm": 0.5116289187531037, | |
| "learning_rate": 4.668385217824482e-06, | |
| "loss": 0.5557, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.7088607594936709, | |
| "grad_norm": 0.5412976872055105, | |
| "learning_rate": 4.651406264000871e-06, | |
| "loss": 0.5535, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.7117818889970788, | |
| "grad_norm": 0.5297049362432723, | |
| "learning_rate": 4.634431348634632e-06, | |
| "loss": 0.5872, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.714703018500487, | |
| "grad_norm": 0.46815419228452076, | |
| "learning_rate": 4.6174606683801055e-06, | |
| "loss": 0.5612, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.7176241480038947, | |
| "grad_norm": 0.5047028920506614, | |
| "learning_rate": 4.600494419842573e-06, | |
| "loss": 0.5457, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.720545277507303, | |
| "grad_norm": 0.5021848097551853, | |
| "learning_rate": 4.58353279957597e-06, | |
| "loss": 0.5512, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.7234664070107109, | |
| "grad_norm": 0.5567736723329815, | |
| "learning_rate": 4.5665760040806174e-06, | |
| "loss": 0.577, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.7263875365141188, | |
| "grad_norm": 0.4982671338092423, | |
| "learning_rate": 4.549624229800938e-06, | |
| "loss": 0.548, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.7293086660175268, | |
| "grad_norm": 0.49329094074335034, | |
| "learning_rate": 4.532677673123188e-06, | |
| "loss": 0.5563, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.7322297955209347, | |
| "grad_norm": 0.4890277332079673, | |
| "learning_rate": 4.5157365303731705e-06, | |
| "loss": 0.5747, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.7351509250243429, | |
| "grad_norm": 0.5600537499346627, | |
| "learning_rate": 4.498800997813976e-06, | |
| "loss": 0.5816, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.7380720545277506, | |
| "grad_norm": 0.5086959673884651, | |
| "learning_rate": 4.481871271643698e-06, | |
| "loss": 0.5671, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.7409931840311588, | |
| "grad_norm": 0.5157431177024923, | |
| "learning_rate": 4.464947547993162e-06, | |
| "loss": 0.5731, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.7439143135345667, | |
| "grad_norm": 0.5137961841228513, | |
| "learning_rate": 4.4480300229236525e-06, | |
| "loss": 0.544, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.7468354430379747, | |
| "grad_norm": 0.5937636838238008, | |
| "learning_rate": 4.431118892424649e-06, | |
| "loss": 0.6082, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.7497565725413826, | |
| "grad_norm": 0.5529433304939138, | |
| "learning_rate": 4.414214352411544e-06, | |
| "loss": 0.5569, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.7526777020447906, | |
| "grad_norm": 0.5159596510951358, | |
| "learning_rate": 4.397316598723385e-06, | |
| "loss": 0.5455, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.7555988315481987, | |
| "grad_norm": 0.5506758519748508, | |
| "learning_rate": 4.3804258271205965e-06, | |
| "loss": 0.5505, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.7585199610516065, | |
| "grad_norm": 0.5523849090934733, | |
| "learning_rate": 4.363542233282715e-06, | |
| "loss": 0.5355, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.7614410905550146, | |
| "grad_norm": 0.6508686455735765, | |
| "learning_rate": 4.346666012806126e-06, | |
| "loss": 0.602, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.7643622200584226, | |
| "grad_norm": 0.46084404235460724, | |
| "learning_rate": 4.329797361201788e-06, | |
| "loss": 0.5028, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.7672833495618305, | |
| "grad_norm": 0.5857372990733761, | |
| "learning_rate": 4.312936473892984e-06, | |
| "loss": 0.6468, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.7702044790652387, | |
| "grad_norm": 0.4935847914878853, | |
| "learning_rate": 4.296083546213037e-06, | |
| "loss": 0.5578, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.7731256085686464, | |
| "grad_norm": 0.5144278362398599, | |
| "learning_rate": 4.279238773403066e-06, | |
| "loss": 0.5337, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.7760467380720546, | |
| "grad_norm": 0.46428380216043086, | |
| "learning_rate": 4.2624023506097116e-06, | |
| "loss": 0.6219, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.7789678675754625, | |
| "grad_norm": 0.4492920362378749, | |
| "learning_rate": 4.245574472882882e-06, | |
| "loss": 0.5061, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.7818889970788705, | |
| "grad_norm": 0.48814416218826545, | |
| "learning_rate": 4.228755335173488e-06, | |
| "loss": 0.5045, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.7848101265822784, | |
| "grad_norm": 0.4935317746542341, | |
| "learning_rate": 4.21194513233119e-06, | |
| "loss": 0.6334, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.7877312560856864, | |
| "grad_norm": 0.5119284354029839, | |
| "learning_rate": 4.1951440591021375e-06, | |
| "loss": 0.5599, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.7906523855890946, | |
| "grad_norm": 0.4827371863106343, | |
| "learning_rate": 4.17835231012671e-06, | |
| "loss": 0.5692, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.7935735150925023, | |
| "grad_norm": 0.5212508350905807, | |
| "learning_rate": 4.161570079937268e-06, | |
| "loss": 0.5947, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.7964946445959105, | |
| "grad_norm": 0.5235515777898554, | |
| "learning_rate": 4.1447975629559e-06, | |
| "loss": 0.5808, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.7994157740993184, | |
| "grad_norm": 0.5336005328533747, | |
| "learning_rate": 4.128034953492163e-06, | |
| "loss": 0.5358, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.8023369036027264, | |
| "grad_norm": 0.5597776990519019, | |
| "learning_rate": 4.1112824457408375e-06, | |
| "loss": 0.5884, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.8052580331061345, | |
| "grad_norm": 0.5036193557727575, | |
| "learning_rate": 4.094540233779672e-06, | |
| "loss": 0.5688, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.8081791626095423, | |
| "grad_norm": 0.5338396120419266, | |
| "learning_rate": 4.077808511567145e-06, | |
| "loss": 0.5608, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.8111002921129504, | |
| "grad_norm": 0.4958715717085618, | |
| "learning_rate": 4.061087472940204e-06, | |
| "loss": 0.5543, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.8140214216163584, | |
| "grad_norm": 0.6029334087991165, | |
| "learning_rate": 4.044377311612033e-06, | |
| "loss": 0.5691, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.8169425511197663, | |
| "grad_norm": 0.47915777984754265, | |
| "learning_rate": 4.0276782211698e-06, | |
| "loss": 0.5391, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.8198636806231743, | |
| "grad_norm": 0.5235008297199194, | |
| "learning_rate": 4.010990395072414e-06, | |
| "loss": 0.5736, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.8227848101265822, | |
| "grad_norm": 0.5590747505915074, | |
| "learning_rate": 3.9943140266482935e-06, | |
| "loss": 0.6075, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.8257059396299904, | |
| "grad_norm": 0.47598042280541497, | |
| "learning_rate": 3.977649309093113e-06, | |
| "loss": 0.5344, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.8286270691333981, | |
| "grad_norm": 0.48621841809715133, | |
| "learning_rate": 3.960996435467577e-06, | |
| "loss": 0.6179, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.8315481986368063, | |
| "grad_norm": 0.5017298117872958, | |
| "learning_rate": 3.944355598695174e-06, | |
| "loss": 0.5443, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.8344693281402142, | |
| "grad_norm": 0.5380565465468942, | |
| "learning_rate": 3.9277269915599505e-06, | |
| "loss": 0.57, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.8373904576436222, | |
| "grad_norm": 0.5990209575217125, | |
| "learning_rate": 3.911110806704265e-06, | |
| "loss": 0.5784, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.8403115871470301, | |
| "grad_norm": 0.49509817541993284, | |
| "learning_rate": 3.89450723662657e-06, | |
| "loss": 0.5628, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.843232716650438, | |
| "grad_norm": 0.550605497861562, | |
| "learning_rate": 3.877916473679176e-06, | |
| "loss": 0.6147, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 0.5524680706044535, | |
| "learning_rate": 3.861338710066021e-06, | |
| "loss": 0.5125, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.849074975657254, | |
| "grad_norm": 0.5494313577644265, | |
| "learning_rate": 3.8447741378404436e-06, | |
| "loss": 0.5562, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.8519961051606622, | |
| "grad_norm": 0.5058622045652387, | |
| "learning_rate": 3.828222948902965e-06, | |
| "loss": 0.5967, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.85491723466407, | |
| "grad_norm": 0.4488395706127018, | |
| "learning_rate": 3.8116853349990574e-06, | |
| "loss": 0.4735, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.857838364167478, | |
| "grad_norm": 0.5073104983823334, | |
| "learning_rate": 3.7951614877169285e-06, | |
| "loss": 0.6214, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.8607594936708862, | |
| "grad_norm": 0.5020623526859631, | |
| "learning_rate": 3.7786515984852977e-06, | |
| "loss": 0.5801, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.863680623174294, | |
| "grad_norm": 0.5066558405528695, | |
| "learning_rate": 3.762155858571184e-06, | |
| "loss": 0.5571, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.8666017526777021, | |
| "grad_norm": 0.5016476079074594, | |
| "learning_rate": 3.7456744590776807e-06, | |
| "loss": 0.554, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.86952288218111, | |
| "grad_norm": 0.5522667996432958, | |
| "learning_rate": 3.729207590941753e-06, | |
| "loss": 0.5684, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.872444011684518, | |
| "grad_norm": 0.5063071640192653, | |
| "learning_rate": 3.712755444932018e-06, | |
| "loss": 0.5536, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.875365141187926, | |
| "grad_norm": 0.47979895935138983, | |
| "learning_rate": 3.6963182116465358e-06, | |
| "loss": 0.5607, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.878286270691334, | |
| "grad_norm": 0.5466280214439997, | |
| "learning_rate": 3.6798960815106065e-06, | |
| "loss": 0.5912, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.881207400194742, | |
| "grad_norm": 0.50137359619968, | |
| "learning_rate": 3.663489244774555e-06, | |
| "loss": 0.6032, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.8841285296981498, | |
| "grad_norm": 0.5176702766961846, | |
| "learning_rate": 3.647097891511536e-06, | |
| "loss": 0.5441, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.887049659201558, | |
| "grad_norm": 0.45011045640480307, | |
| "learning_rate": 3.630722211615328e-06, | |
| "loss": 0.5064, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.889970788704966, | |
| "grad_norm": 0.5394614468332969, | |
| "learning_rate": 3.614362394798131e-06, | |
| "loss": 0.6368, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.8928919182083739, | |
| "grad_norm": 0.4874425981153431, | |
| "learning_rate": 3.5980186305883746e-06, | |
| "loss": 0.5701, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.895813047711782, | |
| "grad_norm": 0.507766689537816, | |
| "learning_rate": 3.5816911083285165e-06, | |
| "loss": 0.5798, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.8987341772151898, | |
| "grad_norm": 0.4863274459783003, | |
| "learning_rate": 3.565380017172854e-06, | |
| "loss": 0.5244, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.901655306718598, | |
| "grad_norm": 0.5805740988868203, | |
| "learning_rate": 3.5490855460853275e-06, | |
| "loss": 0.6026, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.904576436222006, | |
| "grad_norm": 0.5151920826178348, | |
| "learning_rate": 3.5328078838373338e-06, | |
| "loss": 0.5578, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.9074975657254138, | |
| "grad_norm": 0.4988300821200546, | |
| "learning_rate": 3.516547219005542e-06, | |
| "loss": 0.5639, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.9104186952288218, | |
| "grad_norm": 0.5020574341639038, | |
| "learning_rate": 3.500303739969704e-06, | |
| "loss": 0.5553, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.9133398247322297, | |
| "grad_norm": 0.47286299047516567, | |
| "learning_rate": 3.4840776349104755e-06, | |
| "loss": 0.5649, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.916260954235638, | |
| "grad_norm": 0.4681307130388586, | |
| "learning_rate": 3.4678690918072335e-06, | |
| "loss": 0.5682, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.9191820837390456, | |
| "grad_norm": 0.46768532251073486, | |
| "learning_rate": 3.451678298435902e-06, | |
| "loss": 0.5486, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.9221032132424538, | |
| "grad_norm": 0.4674675744224569, | |
| "learning_rate": 3.4355054423667712e-06, | |
| "loss": 0.5421, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.9250243427458618, | |
| "grad_norm": 0.5073113729998869, | |
| "learning_rate": 3.4193507109623323e-06, | |
| "loss": 0.5802, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.9279454722492697, | |
| "grad_norm": 0.47796072007470125, | |
| "learning_rate": 3.4032142913750956e-06, | |
| "loss": 0.5454, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.9308666017526777, | |
| "grad_norm": 0.44978612526614165, | |
| "learning_rate": 3.3870963705454353e-06, | |
| "loss": 0.6016, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.9337877312560856, | |
| "grad_norm": 0.49209754086259494, | |
| "learning_rate": 3.370997135199413e-06, | |
| "loss": 0.5898, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.9367088607594938, | |
| "grad_norm": 0.5261340174516683, | |
| "learning_rate": 3.3549167718466245e-06, | |
| "loss": 0.5438, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.9396299902629015, | |
| "grad_norm": 0.5215555650313894, | |
| "learning_rate": 3.3388554667780272e-06, | |
| "loss": 0.59, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.9425511197663097, | |
| "grad_norm": 0.4360310531256464, | |
| "learning_rate": 3.322813406063794e-06, | |
| "loss": 0.5361, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.9454722492697176, | |
| "grad_norm": 0.4338971494593997, | |
| "learning_rate": 3.3067907755511473e-06, | |
| "loss": 0.5217, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.9483933787731256, | |
| "grad_norm": 0.5993978224848855, | |
| "learning_rate": 3.290787760862215e-06, | |
| "loss": 0.6499, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.9513145082765337, | |
| "grad_norm": 0.4277968919618892, | |
| "learning_rate": 3.274804547391872e-06, | |
| "loss": 0.5059, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.9542356377799415, | |
| "grad_norm": 0.4833235638298786, | |
| "learning_rate": 3.258841320305598e-06, | |
| "loss": 0.6043, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.9571567672833496, | |
| "grad_norm": 0.4876241950113585, | |
| "learning_rate": 3.242898264537331e-06, | |
| "loss": 0.5982, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.9600778967867576, | |
| "grad_norm": 0.4287569825122473, | |
| "learning_rate": 3.226975564787322e-06, | |
| "loss": 0.5291, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.9629990262901655, | |
| "grad_norm": 0.559198431896861, | |
| "learning_rate": 3.211073405520001e-06, | |
| "loss": 0.5722, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.9659201557935735, | |
| "grad_norm": 0.5529747384120326, | |
| "learning_rate": 3.195191970961833e-06, | |
| "loss": 0.5766, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.9688412852969814, | |
| "grad_norm": 0.4620747878088523, | |
| "learning_rate": 3.1793314450991895e-06, | |
| "loss": 0.5911, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.9717624148003896, | |
| "grad_norm": 0.46006255679284874, | |
| "learning_rate": 3.1634920116762175e-06, | |
| "loss": 0.5622, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.9746835443037973, | |
| "grad_norm": 0.4259271639046577, | |
| "learning_rate": 3.1476738541926993e-06, | |
| "loss": 0.5287, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.9776046738072055, | |
| "grad_norm": 0.4729000058273843, | |
| "learning_rate": 3.1318771559019455e-06, | |
| "loss": 0.5863, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.9805258033106135, | |
| "grad_norm": 0.5100371866983662, | |
| "learning_rate": 3.1161020998086566e-06, | |
| "loss": 0.5687, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.9834469328140214, | |
| "grad_norm": 0.4379626484222088, | |
| "learning_rate": 3.1003488686668076e-06, | |
| "loss": 0.5155, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.9863680623174296, | |
| "grad_norm": 0.4844416917506815, | |
| "learning_rate": 3.0846176449775363e-06, | |
| "loss": 0.5782, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.9892891918208373, | |
| "grad_norm": 0.48815074844900985, | |
| "learning_rate": 3.0689086109870188e-06, | |
| "loss": 0.5709, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.9922103213242455, | |
| "grad_norm": 0.5064903715857387, | |
| "learning_rate": 3.0532219486843686e-06, | |
| "loss": 0.596, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.9951314508276532, | |
| "grad_norm": 0.492762116678178, | |
| "learning_rate": 3.0375578397995178e-06, | |
| "loss": 0.5766, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.9980525803310614, | |
| "grad_norm": 0.44675106308795903, | |
| "learning_rate": 3.021916465801122e-06, | |
| "loss": 0.5206, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.0009737098344695, | |
| "grad_norm": 0.9800800703433351, | |
| "learning_rate": 3.0062980078944515e-06, | |
| "loss": 0.8991, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 2.0038948393378773, | |
| "grad_norm": 0.4864022689636578, | |
| "learning_rate": 2.990702647019294e-06, | |
| "loss": 0.4938, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.0068159688412854, | |
| "grad_norm": 0.47889455472294706, | |
| "learning_rate": 2.9751305638478555e-06, | |
| "loss": 0.5913, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 2.009737098344693, | |
| "grad_norm": 0.41456980849801855, | |
| "learning_rate": 2.9595819387826753e-06, | |
| "loss": 0.4387, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.0126582278481013, | |
| "grad_norm": 0.5149845371997606, | |
| "learning_rate": 2.9440569519545258e-06, | |
| "loss": 0.5165, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 2.015579357351509, | |
| "grad_norm": 0.5752113401087547, | |
| "learning_rate": 2.9285557832203328e-06, | |
| "loss": 0.5373, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.0185004868549172, | |
| "grad_norm": 0.5062618329379694, | |
| "learning_rate": 2.9130786121610866e-06, | |
| "loss": 0.4947, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 2.0214216163583254, | |
| "grad_norm": 0.49684882097652555, | |
| "learning_rate": 2.897625618079769e-06, | |
| "loss": 0.5208, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.024342745861733, | |
| "grad_norm": 0.53299035788322, | |
| "learning_rate": 2.88219697999927e-06, | |
| "loss": 0.5438, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 2.0272638753651413, | |
| "grad_norm": 0.5543675569999611, | |
| "learning_rate": 2.8667928766603115e-06, | |
| "loss": 0.5444, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 2.030185004868549, | |
| "grad_norm": 0.4572356814121919, | |
| "learning_rate": 2.851413486519388e-06, | |
| "loss": 0.4634, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 2.033106134371957, | |
| "grad_norm": 0.48340574114252344, | |
| "learning_rate": 2.8360589877466848e-06, | |
| "loss": 0.5391, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.036027263875365, | |
| "grad_norm": 0.5141120553439197, | |
| "learning_rate": 2.8207295582240248e-06, | |
| "loss": 0.4871, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 2.038948393378773, | |
| "grad_norm": 0.4913089013833701, | |
| "learning_rate": 2.8054253755428017e-06, | |
| "loss": 0.5574, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 2.0418695228821813, | |
| "grad_norm": 0.4601265937011902, | |
| "learning_rate": 2.7901466170019242e-06, | |
| "loss": 0.4977, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 2.044790652385589, | |
| "grad_norm": 0.5120604434660413, | |
| "learning_rate": 2.774893459605766e-06, | |
| "loss": 0.56, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.047711781888997, | |
| "grad_norm": 0.47951033771571855, | |
| "learning_rate": 2.7596660800621076e-06, | |
| "loss": 0.5028, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 2.050632911392405, | |
| "grad_norm": 0.48501372243432067, | |
| "learning_rate": 2.744464654780096e-06, | |
| "loss": 0.5174, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 2.053554040895813, | |
| "grad_norm": 0.4741865276944199, | |
| "learning_rate": 2.7292893598681934e-06, | |
| "loss": 0.5104, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 2.0564751703992212, | |
| "grad_norm": 0.48778437702309935, | |
| "learning_rate": 2.714140371132149e-06, | |
| "loss": 0.5375, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 2.059396299902629, | |
| "grad_norm": 0.49018615472755583, | |
| "learning_rate": 2.69901786407295e-06, | |
| "loss": 0.5105, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 2.062317429406037, | |
| "grad_norm": 0.48782945068406763, | |
| "learning_rate": 2.6839220138847966e-06, | |
| "loss": 0.4899, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 2.065238558909445, | |
| "grad_norm": 0.4732567901538321, | |
| "learning_rate": 2.6688529954530612e-06, | |
| "loss": 0.5388, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 2.068159688412853, | |
| "grad_norm": 0.4935373768584108, | |
| "learning_rate": 2.653810983352282e-06, | |
| "loss": 0.5822, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 2.0710808179162608, | |
| "grad_norm": 0.44379266935434936, | |
| "learning_rate": 2.6387961518441223e-06, | |
| "loss": 0.4639, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 2.074001947419669, | |
| "grad_norm": 0.47555161755659076, | |
| "learning_rate": 2.6238086748753587e-06, | |
| "loss": 0.5153, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.076923076923077, | |
| "grad_norm": 0.4924498329014108, | |
| "learning_rate": 2.6088487260758643e-06, | |
| "loss": 0.5245, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 2.079844206426485, | |
| "grad_norm": 0.4492288702216835, | |
| "learning_rate": 2.593916478756605e-06, | |
| "loss": 0.4816, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 2.082765335929893, | |
| "grad_norm": 0.5008247473603998, | |
| "learning_rate": 2.579012105907619e-06, | |
| "loss": 0.5824, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 2.0856864654333007, | |
| "grad_norm": 0.479453244836006, | |
| "learning_rate": 2.5641357801960186e-06, | |
| "loss": 0.4738, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 2.088607594936709, | |
| "grad_norm": 0.49048895532662706, | |
| "learning_rate": 2.5492876739639912e-06, | |
| "loss": 0.5299, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 2.091528724440117, | |
| "grad_norm": 0.5369724976128885, | |
| "learning_rate": 2.534467959226806e-06, | |
| "loss": 0.5162, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 2.094449853943525, | |
| "grad_norm": 0.5137260447049424, | |
| "learning_rate": 2.519676807670811e-06, | |
| "loss": 0.5081, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 2.097370983446933, | |
| "grad_norm": 0.4930859741487903, | |
| "learning_rate": 2.504914390651453e-06, | |
| "loss": 0.5576, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 2.1002921129503407, | |
| "grad_norm": 0.4775125343363975, | |
| "learning_rate": 2.4901808791912864e-06, | |
| "loss": 0.4998, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 2.103213242453749, | |
| "grad_norm": 0.5214779418043487, | |
| "learning_rate": 2.475476443977996e-06, | |
| "loss": 0.5696, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.1061343719571566, | |
| "grad_norm": 0.47977761871037206, | |
| "learning_rate": 2.460801255362425e-06, | |
| "loss": 0.4764, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 2.1090555014605648, | |
| "grad_norm": 0.48061705012654404, | |
| "learning_rate": 2.446155483356582e-06, | |
| "loss": 0.5357, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 2.111976630963973, | |
| "grad_norm": 0.45629547557990546, | |
| "learning_rate": 2.4315392976316923e-06, | |
| "loss": 0.4915, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 2.1148977604673806, | |
| "grad_norm": 0.4780522247938775, | |
| "learning_rate": 2.4169528675162256e-06, | |
| "loss": 0.4838, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 2.117818889970789, | |
| "grad_norm": 0.49801842180761746, | |
| "learning_rate": 2.40239636199393e-06, | |
| "loss": 0.537, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 2.1207400194741965, | |
| "grad_norm": 0.4846689678826097, | |
| "learning_rate": 2.3878699497018763e-06, | |
| "loss": 0.549, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 2.1236611489776047, | |
| "grad_norm": 0.5018761582798917, | |
| "learning_rate": 2.373373798928507e-06, | |
| "loss": 0.5042, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 2.1265822784810124, | |
| "grad_norm": 0.5072064832309269, | |
| "learning_rate": 2.358908077611684e-06, | |
| "loss": 0.5795, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 2.1295034079844206, | |
| "grad_norm": 0.46610002849697485, | |
| "learning_rate": 2.344472953336747e-06, | |
| "loss": 0.468, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 2.132424537487829, | |
| "grad_norm": 0.5002554866584938, | |
| "learning_rate": 2.3300685933345656e-06, | |
| "loss": 0.5022, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.1353456669912365, | |
| "grad_norm": 0.5324744280306557, | |
| "learning_rate": 2.3156951644796065e-06, | |
| "loss": 0.5877, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 2.1382667964946447, | |
| "grad_norm": 0.4804432916120911, | |
| "learning_rate": 2.3013528332879976e-06, | |
| "loss": 0.4814, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 2.1411879259980524, | |
| "grad_norm": 0.47091232830887136, | |
| "learning_rate": 2.287041765915606e-06, | |
| "loss": 0.4953, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 2.1441090555014606, | |
| "grad_norm": 0.4535727155626379, | |
| "learning_rate": 2.272762128156101e-06, | |
| "loss": 0.5269, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 2.1470301850048688, | |
| "grad_norm": 0.47112551281102016, | |
| "learning_rate": 2.2585140854390432e-06, | |
| "loss": 0.5244, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 2.1499513145082765, | |
| "grad_norm": 0.4545532988740129, | |
| "learning_rate": 2.2442978028279634e-06, | |
| "loss": 0.5341, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.1528724440116846, | |
| "grad_norm": 0.5024829916260531, | |
| "learning_rate": 2.2301134450184535e-06, | |
| "loss": 0.5243, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 2.1557935735150924, | |
| "grad_norm": 0.457679806572502, | |
| "learning_rate": 2.215961176336255e-06, | |
| "loss": 0.5157, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 2.1587147030185005, | |
| "grad_norm": 0.41799952326682965, | |
| "learning_rate": 2.2018411607353572e-06, | |
| "loss": 0.4653, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 2.1616358325219083, | |
| "grad_norm": 0.4787265660707973, | |
| "learning_rate": 2.187753561796097e-06, | |
| "loss": 0.5521, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.1645569620253164, | |
| "grad_norm": 0.48532252970802797, | |
| "learning_rate": 2.1736985427232684e-06, | |
| "loss": 0.5736, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 2.1674780915287246, | |
| "grad_norm": 0.46670045472968474, | |
| "learning_rate": 2.159676266344222e-06, | |
| "loss": 0.4856, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 2.1703992210321323, | |
| "grad_norm": 0.456907097163635, | |
| "learning_rate": 2.1456868951069875e-06, | |
| "loss": 0.513, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 2.1733203505355405, | |
| "grad_norm": 0.43480032818372993, | |
| "learning_rate": 2.1317305910783886e-06, | |
| "loss": 0.5183, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 2.1762414800389482, | |
| "grad_norm": 0.4646674951011356, | |
| "learning_rate": 2.117807515942163e-06, | |
| "loss": 0.5169, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 2.1791626095423564, | |
| "grad_norm": 0.5402952715425572, | |
| "learning_rate": 2.1039178309970975e-06, | |
| "loss": 0.5256, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 2.1820837390457646, | |
| "grad_norm": 0.44604300646980855, | |
| "learning_rate": 2.090061697155147e-06, | |
| "loss": 0.5264, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 2.1850048685491723, | |
| "grad_norm": 0.4855629840620991, | |
| "learning_rate": 2.076239274939582e-06, | |
| "loss": 0.497, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 2.1879259980525805, | |
| "grad_norm": 0.450371819506377, | |
| "learning_rate": 2.062450724483118e-06, | |
| "loss": 0.492, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 2.190847127555988, | |
| "grad_norm": 0.4675791607919664, | |
| "learning_rate": 2.0486962055260744e-06, | |
| "loss": 0.4833, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.1937682570593964, | |
| "grad_norm": 0.4931924675592549, | |
| "learning_rate": 2.03497587741451e-06, | |
| "loss": 0.5325, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 2.196689386562804, | |
| "grad_norm": 0.4728333199503108, | |
| "learning_rate": 2.021289899098384e-06, | |
| "loss": 0.5753, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 2.1996105160662123, | |
| "grad_norm": 0.44477975654172386, | |
| "learning_rate": 2.0076384291297134e-06, | |
| "loss": 0.5095, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 2.2025316455696204, | |
| "grad_norm": 0.42502487470067785, | |
| "learning_rate": 1.994021625660737e-06, | |
| "loss": 0.5004, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 2.205452775073028, | |
| "grad_norm": 0.4682554593875959, | |
| "learning_rate": 1.9804396464420798e-06, | |
| "loss": 0.5377, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 2.2083739045764363, | |
| "grad_norm": 0.478543398252641, | |
| "learning_rate": 1.966892648820932e-06, | |
| "loss": 0.5214, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 2.211295034079844, | |
| "grad_norm": 0.4638445072960299, | |
| "learning_rate": 1.953380789739216e-06, | |
| "loss": 0.522, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 2.2142161635832522, | |
| "grad_norm": 0.44455755591464263, | |
| "learning_rate": 1.939904225731783e-06, | |
| "loss": 0.5279, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 2.21713729308666, | |
| "grad_norm": 0.43381712587340415, | |
| "learning_rate": 1.9264631129245836e-06, | |
| "loss": 0.5082, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 2.220058422590068, | |
| "grad_norm": 0.46180168699161817, | |
| "learning_rate": 1.9130576070328695e-06, | |
| "loss": 0.5129, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.2229795520934763, | |
| "grad_norm": 0.4554045010755852, | |
| "learning_rate": 1.8996878633593829e-06, | |
| "loss": 0.499, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 2.225900681596884, | |
| "grad_norm": 0.4749182830100526, | |
| "learning_rate": 1.8863540367925676e-06, | |
| "loss": 0.5343, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 2.228821811100292, | |
| "grad_norm": 0.45836107720097835, | |
| "learning_rate": 1.873056281804762e-06, | |
| "loss": 0.4869, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 2.2317429406037, | |
| "grad_norm": 0.45195906937962993, | |
| "learning_rate": 1.8597947524504178e-06, | |
| "loss": 0.5201, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 2.234664070107108, | |
| "grad_norm": 0.45112191767699233, | |
| "learning_rate": 1.8465696023643115e-06, | |
| "loss": 0.5354, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 2.2375851996105163, | |
| "grad_norm": 0.49025509449815313, | |
| "learning_rate": 1.8333809847597644e-06, | |
| "loss": 0.558, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 2.240506329113924, | |
| "grad_norm": 0.4486301410086504, | |
| "learning_rate": 1.8202290524268761e-06, | |
| "loss": 0.4795, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 2.243427458617332, | |
| "grad_norm": 0.45776452388300076, | |
| "learning_rate": 1.8071139577307416e-06, | |
| "loss": 0.5228, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 2.24634858812074, | |
| "grad_norm": 0.4414886979982915, | |
| "learning_rate": 1.7940358526096885e-06, | |
| "loss": 0.4903, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 2.249269717624148, | |
| "grad_norm": 0.44672029042681155, | |
| "learning_rate": 1.7809948885735295e-06, | |
| "loss": 0.5906, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.252190847127556, | |
| "grad_norm": 0.40774965094554166, | |
| "learning_rate": 1.7679912167017922e-06, | |
| "loss": 0.4726, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 2.255111976630964, | |
| "grad_norm": 0.42904714451405873, | |
| "learning_rate": 1.7550249876419746e-06, | |
| "loss": 0.5263, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 2.258033106134372, | |
| "grad_norm": 0.46232426979933267, | |
| "learning_rate": 1.7420963516078016e-06, | |
| "loss": 0.5207, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 2.26095423563778, | |
| "grad_norm": 0.43964498189412426, | |
| "learning_rate": 1.7292054583774809e-06, | |
| "loss": 0.5634, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 2.263875365141188, | |
| "grad_norm": 0.4614885797674806, | |
| "learning_rate": 1.7163524572919748e-06, | |
| "loss": 0.5167, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 2.2667964946445958, | |
| "grad_norm": 0.4326167271312955, | |
| "learning_rate": 1.7035374972532593e-06, | |
| "loss": 0.4734, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 2.269717624148004, | |
| "grad_norm": 0.477108582339082, | |
| "learning_rate": 1.6907607267226079e-06, | |
| "loss": 0.5436, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 2.272638753651412, | |
| "grad_norm": 0.45201769752670545, | |
| "learning_rate": 1.6780222937188662e-06, | |
| "loss": 0.5207, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 2.27555988315482, | |
| "grad_norm": 0.48262208131523315, | |
| "learning_rate": 1.665322345816746e-06, | |
| "loss": 0.5304, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 2.278481012658228, | |
| "grad_norm": 0.45623936201495924, | |
| "learning_rate": 1.6526610301451028e-06, | |
| "loss": 0.4604, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.2814021421616357, | |
| "grad_norm": 0.45941621411519823, | |
| "learning_rate": 1.6400384933852403e-06, | |
| "loss": 0.508, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 2.284323271665044, | |
| "grad_norm": 0.4640162246141768, | |
| "learning_rate": 1.6274548817692088e-06, | |
| "loss": 0.5429, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 2.2872444011684516, | |
| "grad_norm": 0.4517120340093356, | |
| "learning_rate": 1.6149103410781086e-06, | |
| "loss": 0.5126, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 2.29016553067186, | |
| "grad_norm": 0.4528239429323758, | |
| "learning_rate": 1.6024050166404097e-06, | |
| "loss": 0.4995, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 2.293086660175268, | |
| "grad_norm": 0.4440949852742247, | |
| "learning_rate": 1.5899390533302538e-06, | |
| "loss": 0.5333, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 2.2960077896786757, | |
| "grad_norm": 0.3970036577732869, | |
| "learning_rate": 1.5775125955657877e-06, | |
| "loss": 0.5012, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 2.298928919182084, | |
| "grad_norm": 0.4484643953022932, | |
| "learning_rate": 1.5651257873074898e-06, | |
| "loss": 0.5413, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 2.3018500486854916, | |
| "grad_norm": 0.4679410901074801, | |
| "learning_rate": 1.5527787720564946e-06, | |
| "loss": 0.5229, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 2.3047711781888998, | |
| "grad_norm": 0.450714999626221, | |
| "learning_rate": 1.5404716928529356e-06, | |
| "loss": 0.5079, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 0.46250776338412736, | |
| "learning_rate": 1.5282046922742876e-06, | |
| "loss": 0.5056, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.3106134371957157, | |
| "grad_norm": 0.47191060686599, | |
| "learning_rate": 1.515977912433717e-06, | |
| "loss": 0.5279, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 2.313534566699124, | |
| "grad_norm": 0.493016587785878, | |
| "learning_rate": 1.50379149497843e-06, | |
| "loss": 0.5483, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 2.3164556962025316, | |
| "grad_norm": 0.47810281591953624, | |
| "learning_rate": 1.4916455810880358e-06, | |
| "loss": 0.4898, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 2.3193768257059397, | |
| "grad_norm": 0.4478075131535074, | |
| "learning_rate": 1.4795403114729095e-06, | |
| "loss": 0.5101, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 2.3222979552093475, | |
| "grad_norm": 0.4875641000416008, | |
| "learning_rate": 1.4674758263725614e-06, | |
| "loss": 0.5854, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 2.3252190847127556, | |
| "grad_norm": 0.42525631573701944, | |
| "learning_rate": 1.4554522655540176e-06, | |
| "loss": 0.4989, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 2.3281402142161634, | |
| "grad_norm": 0.41706543335088775, | |
| "learning_rate": 1.4434697683101928e-06, | |
| "loss": 0.516, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 2.3310613437195715, | |
| "grad_norm": 0.4524630863860165, | |
| "learning_rate": 1.4315284734582802e-06, | |
| "loss": 0.4936, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 2.3339824732229797, | |
| "grad_norm": 0.5066460944480237, | |
| "learning_rate": 1.4196285193381431e-06, | |
| "loss": 0.5665, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 2.3369036027263874, | |
| "grad_norm": 0.4886439078673485, | |
| "learning_rate": 1.4077700438107183e-06, | |
| "loss": 0.5191, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.3398247322297956, | |
| "grad_norm": 0.42502615563238366, | |
| "learning_rate": 1.3959531842564046e-06, | |
| "loss": 0.4785, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 2.3427458617332033, | |
| "grad_norm": 0.45982217381683227, | |
| "learning_rate": 1.3841780775734847e-06, | |
| "loss": 0.5643, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 2.3456669912366115, | |
| "grad_norm": 0.43225051785834356, | |
| "learning_rate": 1.3724448601765328e-06, | |
| "loss": 0.5629, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 2.3485881207400197, | |
| "grad_norm": 0.44393943057319624, | |
| "learning_rate": 1.3607536679948397e-06, | |
| "loss": 0.456, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 2.3515092502434274, | |
| "grad_norm": 0.44935770423521926, | |
| "learning_rate": 1.3491046364708294e-06, | |
| "loss": 0.52, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 2.3544303797468356, | |
| "grad_norm": 0.4573183127076476, | |
| "learning_rate": 1.337497900558497e-06, | |
| "loss": 0.5231, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 2.3573515092502433, | |
| "grad_norm": 0.4471873705684597, | |
| "learning_rate": 1.325933594721841e-06, | |
| "loss": 0.5191, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 2.3602726387536515, | |
| "grad_norm": 0.46232878806411926, | |
| "learning_rate": 1.3144118529333126e-06, | |
| "loss": 0.4641, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 2.3631937682570596, | |
| "grad_norm": 0.46569289026754496, | |
| "learning_rate": 1.3029328086722537e-06, | |
| "loss": 0.5363, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 2.3661148977604674, | |
| "grad_norm": 0.4515090725602521, | |
| "learning_rate": 1.2914965949233572e-06, | |
| "loss": 0.5278, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.3690360272638755, | |
| "grad_norm": 0.45127137221258873, | |
| "learning_rate": 1.2801033441751244e-06, | |
| "loss": 0.4919, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 2.3719571567672832, | |
| "grad_norm": 0.45307816143888285, | |
| "learning_rate": 1.268753188418329e-06, | |
| "loss": 0.5063, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 2.3748782862706914, | |
| "grad_norm": 0.4681309102712795, | |
| "learning_rate": 1.257446259144494e-06, | |
| "loss": 0.5185, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 2.377799415774099, | |
| "grad_norm": 0.4665552455306928, | |
| "learning_rate": 1.24618268734436e-06, | |
| "loss": 0.5426, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 2.3807205452775073, | |
| "grad_norm": 0.45997968978470405, | |
| "learning_rate": 1.2349626035063705e-06, | |
| "loss": 0.4996, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 2.3836416747809155, | |
| "grad_norm": 0.470113793180687, | |
| "learning_rate": 1.2237861376151632e-06, | |
| "loss": 0.5392, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 2.386562804284323, | |
| "grad_norm": 0.47230105708445114, | |
| "learning_rate": 1.2126534191500622e-06, | |
| "loss": 0.5361, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 2.3894839337877314, | |
| "grad_norm": 0.4439367141535501, | |
| "learning_rate": 1.2015645770835765e-06, | |
| "loss": 0.4732, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 2.392405063291139, | |
| "grad_norm": 0.4656016270422327, | |
| "learning_rate": 1.1905197398799074e-06, | |
| "loss": 0.5227, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 2.3953261927945473, | |
| "grad_norm": 0.46417888892808024, | |
| "learning_rate": 1.1795190354934587e-06, | |
| "loss": 0.5606, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.398247322297955, | |
| "grad_norm": 0.41153069172218604, | |
| "learning_rate": 1.168562591367361e-06, | |
| "loss": 0.5035, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 2.401168451801363, | |
| "grad_norm": 0.4591933177826969, | |
| "learning_rate": 1.1576505344319843e-06, | |
| "loss": 0.5485, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 2.4040895813047714, | |
| "grad_norm": 0.48641885058814227, | |
| "learning_rate": 1.146782991103475e-06, | |
| "loss": 0.5122, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 2.407010710808179, | |
| "grad_norm": 0.452636414728882, | |
| "learning_rate": 1.1359600872822879e-06, | |
| "loss": 0.528, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 2.4099318403115872, | |
| "grad_norm": 0.4637194713579063, | |
| "learning_rate": 1.1251819483517334e-06, | |
| "loss": 0.5335, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 2.412852969814995, | |
| "grad_norm": 0.46499658066713967, | |
| "learning_rate": 1.1144486991765175e-06, | |
| "loss": 0.519, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 2.415774099318403, | |
| "grad_norm": 0.4519765342482912, | |
| "learning_rate": 1.1037604641012995e-06, | |
| "loss": 0.5488, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 2.418695228821811, | |
| "grad_norm": 0.4382247666420335, | |
| "learning_rate": 1.0931173669492472e-06, | |
| "loss": 0.4853, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 2.421616358325219, | |
| "grad_norm": 0.45306336606053793, | |
| "learning_rate": 1.0825195310206132e-06, | |
| "loss": 0.5224, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 2.424537487828627, | |
| "grad_norm": 0.4473053801916244, | |
| "learning_rate": 1.0719670790912928e-06, | |
| "loss": 0.4971, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.427458617332035, | |
| "grad_norm": 0.44465614500766465, | |
| "learning_rate": 1.0614601334114099e-06, | |
| "loss": 0.5404, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 2.430379746835443, | |
| "grad_norm": 0.4251639911624754, | |
| "learning_rate": 1.0509988157038952e-06, | |
| "loss": 0.5179, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 2.433300876338851, | |
| "grad_norm": 0.40203118130358384, | |
| "learning_rate": 1.0405832471630862e-06, | |
| "loss": 0.4465, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 2.436222005842259, | |
| "grad_norm": 0.49627881222358805, | |
| "learning_rate": 1.030213548453311e-06, | |
| "loss": 0.5782, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 2.439143135345667, | |
| "grad_norm": 0.4352033965269781, | |
| "learning_rate": 1.019889839707498e-06, | |
| "loss": 0.471, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 2.442064264849075, | |
| "grad_norm": 0.4112365791877552, | |
| "learning_rate": 1.0096122405257802e-06, | |
| "loss": 0.525, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 2.444985394352483, | |
| "grad_norm": 0.46113597408698487, | |
| "learning_rate": 9.99380869974116e-07, | |
| "loss": 0.55, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 2.447906523855891, | |
| "grad_norm": 0.42894572321057406, | |
| "learning_rate": 9.891958465828999e-07, | |
| "loss": 0.4883, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 2.450827653359299, | |
| "grad_norm": 0.4306576340142323, | |
| "learning_rate": 9.790572883455974e-07, | |
| "loss": 0.5127, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 2.453748782862707, | |
| "grad_norm": 0.47843377943065934, | |
| "learning_rate": 9.689653127173743e-07, | |
| "loss": 0.5376, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.456669912366115, | |
| "grad_norm": 0.48286119801655825, | |
| "learning_rate": 9.589200366137375e-07, | |
| "loss": 0.5108, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 2.459591041869523, | |
| "grad_norm": 0.4840439892081914, | |
| "learning_rate": 9.489215764091813e-07, | |
| "loss": 0.5436, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 2.4625121713729308, | |
| "grad_norm": 0.4360507831964012, | |
| "learning_rate": 9.389700479358365e-07, | |
| "loss": 0.4733, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 2.465433300876339, | |
| "grad_norm": 0.449450069287443, | |
| "learning_rate": 9.290655664821296e-07, | |
| "loss": 0.5599, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 2.4683544303797467, | |
| "grad_norm": 0.41280059188581175, | |
| "learning_rate": 9.192082467914465e-07, | |
| "loss": 0.4792, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 2.471275559883155, | |
| "grad_norm": 0.5093589634384059, | |
| "learning_rate": 9.093982030608095e-07, | |
| "loss": 0.5397, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 2.474196689386563, | |
| "grad_norm": 0.45448988250512634, | |
| "learning_rate": 8.996355489395442e-07, | |
| "loss": 0.4975, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 2.4771178188899707, | |
| "grad_norm": 0.44480146821328176, | |
| "learning_rate": 8.899203975279674e-07, | |
| "loss": 0.4914, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 2.480038948393379, | |
| "grad_norm": 0.4896804183179627, | |
| "learning_rate": 8.802528613760775e-07, | |
| "loss": 0.5599, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 2.4829600778967866, | |
| "grad_norm": 0.4529609749837371, | |
| "learning_rate": 8.706330524822548e-07, | |
| "loss": 0.4598, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.485881207400195, | |
| "grad_norm": 0.4141270566207645, | |
| "learning_rate": 8.610610822919546e-07, | |
| "loss": 0.5195, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 2.4888023369036025, | |
| "grad_norm": 0.4413472687231943, | |
| "learning_rate": 8.515370616964219e-07, | |
| "loss": 0.5181, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 2.4917234664070107, | |
| "grad_norm": 0.4120555785416349, | |
| "learning_rate": 8.420611010314062e-07, | |
| "loss": 0.5267, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 2.494644595910419, | |
| "grad_norm": 0.4661650695044543, | |
| "learning_rate": 8.326333100758843e-07, | |
| "loss": 0.5086, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 2.4975657254138266, | |
| "grad_norm": 0.5073361612635612, | |
| "learning_rate": 8.232537980507848e-07, | |
| "loss": 0.5536, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 2.5004868549172348, | |
| "grad_norm": 0.4961949890878462, | |
| "learning_rate": 8.13922673617727e-07, | |
| "loss": 0.5238, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 2.5034079844206425, | |
| "grad_norm": 0.4720376448013444, | |
| "learning_rate": 8.046400448777575e-07, | |
| "loss": 0.4823, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 2.5063291139240507, | |
| "grad_norm": 0.49809468171059407, | |
| "learning_rate": 7.954060193701019e-07, | |
| "loss": 0.5466, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 2.5092502434274584, | |
| "grad_norm": 0.4639329084639315, | |
| "learning_rate": 7.862207040709191e-07, | |
| "loss": 0.5104, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 2.5121713729308666, | |
| "grad_norm": 0.47993248882289974, | |
| "learning_rate": 7.770842053920585e-07, | |
| "loss": 0.5379, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.5150925024342747, | |
| "grad_norm": 0.42897728377766037, | |
| "learning_rate": 7.679966291798297e-07, | |
| "loss": 0.482, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 2.5180136319376825, | |
| "grad_norm": 0.45778561195288503, | |
| "learning_rate": 7.589580807137742e-07, | |
| "loss": 0.5032, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 2.5209347614410906, | |
| "grad_norm": 0.46270673489951974, | |
| "learning_rate": 7.4996866470545e-07, | |
| "loss": 0.5706, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 2.523855890944499, | |
| "grad_norm": 0.42072713048943156, | |
| "learning_rate": 7.410284852972127e-07, | |
| "loss": 0.5204, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 2.5267770204479065, | |
| "grad_norm": 0.40846266187572267, | |
| "learning_rate": 7.321376460610136e-07, | |
| "loss": 0.4916, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 2.5296981499513143, | |
| "grad_norm": 0.4443443498829621, | |
| "learning_rate": 7.232962499972002e-07, | |
| "loss": 0.5395, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 2.5326192794547224, | |
| "grad_norm": 0.459368960082765, | |
| "learning_rate": 7.145043995333173e-07, | |
| "loss": 0.4924, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 2.5355404089581306, | |
| "grad_norm": 0.47824210909228654, | |
| "learning_rate": 7.057621965229267e-07, | |
| "loss": 0.513, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 2.5384615384615383, | |
| "grad_norm": 0.438214605329431, | |
| "learning_rate": 6.970697422444228e-07, | |
| "loss": 0.5695, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 2.5413826679649465, | |
| "grad_norm": 0.45798584370168316, | |
| "learning_rate": 6.884271373998608e-07, | |
| "loss": 0.4925, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.5443037974683547, | |
| "grad_norm": 0.4811217026678713, | |
| "learning_rate": 6.798344821137947e-07, | |
| "loss": 0.5083, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 2.5472249269717624, | |
| "grad_norm": 0.4418809012550695, | |
| "learning_rate": 6.71291875932108e-07, | |
| "loss": 0.5352, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 2.5501460564751706, | |
| "grad_norm": 0.42576085849329387, | |
| "learning_rate": 6.62799417820868e-07, | |
| "loss": 0.5262, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 2.5530671859785783, | |
| "grad_norm": 0.4287609653415205, | |
| "learning_rate": 6.543572061651738e-07, | |
| "loss": 0.5399, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 2.5559883154819865, | |
| "grad_norm": 0.45232434071279953, | |
| "learning_rate": 6.459653387680248e-07, | |
| "loss": 0.4725, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 2.558909444985394, | |
| "grad_norm": 0.4554887075692202, | |
| "learning_rate": 6.376239128491784e-07, | |
| "loss": 0.5122, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 2.5618305744888024, | |
| "grad_norm": 0.46277880760014206, | |
| "learning_rate": 6.293330250440277e-07, | |
| "loss": 0.4941, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 2.5647517039922105, | |
| "grad_norm": 0.4962082268099505, | |
| "learning_rate": 6.210927714024834e-07, | |
| "loss": 0.5592, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 2.5676728334956183, | |
| "grad_norm": 0.44324064458458867, | |
| "learning_rate": 6.129032473878582e-07, | |
| "loss": 0.5282, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 2.5705939629990264, | |
| "grad_norm": 0.43320029335686094, | |
| "learning_rate": 6.047645478757635e-07, | |
| "loss": 0.4562, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.573515092502434, | |
| "grad_norm": 0.45163259818241996, | |
| "learning_rate": 5.966767671530078e-07, | |
| "loss": 0.5501, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 2.5764362220058423, | |
| "grad_norm": 0.4552021870162526, | |
| "learning_rate": 5.88639998916506e-07, | |
| "loss": 0.5359, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 2.57935735150925, | |
| "grad_norm": 0.4604883255796744, | |
| "learning_rate": 5.806543362721945e-07, | |
| "loss": 0.5274, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 2.5822784810126582, | |
| "grad_norm": 0.41454954591229876, | |
| "learning_rate": 5.727198717339511e-07, | |
| "loss": 0.4765, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 2.5851996105160664, | |
| "grad_norm": 0.4325932298835654, | |
| "learning_rate": 5.648366972225222e-07, | |
| "loss": 0.5282, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 2.588120740019474, | |
| "grad_norm": 0.433038404211758, | |
| "learning_rate": 5.570049040644609e-07, | |
| "loss": 0.5137, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 2.5910418695228823, | |
| "grad_norm": 0.45496347053471886, | |
| "learning_rate": 5.492245829910664e-07, | |
| "loss": 0.4973, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 2.59396299902629, | |
| "grad_norm": 0.44790019595001196, | |
| "learning_rate": 5.414958241373358e-07, | |
| "loss": 0.4881, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 2.596884128529698, | |
| "grad_norm": 0.4506105888324243, | |
| "learning_rate": 5.33818717040917e-07, | |
| "loss": 0.5446, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 2.599805258033106, | |
| "grad_norm": 0.45920237343152387, | |
| "learning_rate": 5.261933506410722e-07, | |
| "loss": 0.5544, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.602726387536514, | |
| "grad_norm": 0.433968288589714, | |
| "learning_rate": 5.186198132776459e-07, | |
| "loss": 0.4792, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 2.6056475170399223, | |
| "grad_norm": 0.45054587006233904, | |
| "learning_rate": 5.110981926900488e-07, | |
| "loss": 0.5271, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 2.60856864654333, | |
| "grad_norm": 0.4402130423120147, | |
| "learning_rate": 5.036285760162307e-07, | |
| "loss": 0.4772, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 2.611489776046738, | |
| "grad_norm": 0.4560401508104557, | |
| "learning_rate": 4.962110497916789e-07, | |
| "loss": 0.5078, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 2.6144109055501463, | |
| "grad_norm": 0.4574934740247989, | |
| "learning_rate": 4.888456999484098e-07, | |
| "loss": 0.5177, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 2.617332035053554, | |
| "grad_norm": 0.42797938213430475, | |
| "learning_rate": 4.815326118139813e-07, | |
| "loss": 0.5068, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 2.620253164556962, | |
| "grad_norm": 0.4508198643702282, | |
| "learning_rate": 4.742718701104965e-07, | |
| "loss": 0.4836, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 2.62317429406037, | |
| "grad_norm": 0.4936637557435792, | |
| "learning_rate": 4.670635589536254e-07, | |
| "loss": 0.5688, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 2.626095423563778, | |
| "grad_norm": 0.4446810251135343, | |
| "learning_rate": 4.599077618516312e-07, | |
| "loss": 0.5213, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 2.629016553067186, | |
| "grad_norm": 0.4393121918571539, | |
| "learning_rate": 4.528045617044019e-07, | |
| "loss": 0.515, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.631937682570594, | |
| "grad_norm": 0.4546453714565344, | |
| "learning_rate": 4.457540408024896e-07, | |
| "loss": 0.5119, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 2.634858812074002, | |
| "grad_norm": 0.43919005512991516, | |
| "learning_rate": 4.387562808261575e-07, | |
| "loss": 0.5273, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.63777994157741, | |
| "grad_norm": 0.4491615831317555, | |
| "learning_rate": 4.3181136284443204e-07, | |
| "loss": 0.4994, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 2.6407010710808176, | |
| "grad_norm": 0.48434931343594284, | |
| "learning_rate": 4.249193673141694e-07, | |
| "loss": 0.5186, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.643622200584226, | |
| "grad_norm": 0.41259506287384623, | |
| "learning_rate": 4.180803740791156e-07, | |
| "loss": 0.5319, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 2.646543330087634, | |
| "grad_norm": 0.4076433170920647, | |
| "learning_rate": 4.112944623689863e-07, | |
| "loss": 0.5178, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.6494644595910417, | |
| "grad_norm": 0.421345630456112, | |
| "learning_rate": 4.0456171079854833e-07, | |
| "loss": 0.4951, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 2.65238558909445, | |
| "grad_norm": 0.4435092242276153, | |
| "learning_rate": 3.978821973667074e-07, | |
| "loss": 0.4837, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.655306718597858, | |
| "grad_norm": 0.44521873058867134, | |
| "learning_rate": 3.9125599945560866e-07, | |
| "loss": 0.5526, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 2.6582278481012658, | |
| "grad_norm": 0.45505539256105115, | |
| "learning_rate": 3.846831938297324e-07, | |
| "loss": 0.5158, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.661148977604674, | |
| "grad_norm": 0.433074017412774, | |
| "learning_rate": 3.7816385663501097e-07, | |
| "loss": 0.4809, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 2.6640701071080817, | |
| "grad_norm": 0.4502224951078667, | |
| "learning_rate": 3.716980633979489e-07, | |
| "loss": 0.5363, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.66699123661149, | |
| "grad_norm": 0.43979723124115566, | |
| "learning_rate": 3.6528588902473905e-07, | |
| "loss": 0.508, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 2.6699123661148976, | |
| "grad_norm": 0.46901546979749387, | |
| "learning_rate": 3.589274078004029e-07, | |
| "loss": 0.5489, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.6728334956183057, | |
| "grad_norm": 0.4432062648822634, | |
| "learning_rate": 3.5262269338792623e-07, | |
| "loss": 0.526, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 2.675754625121714, | |
| "grad_norm": 0.43511333463780544, | |
| "learning_rate": 3.463718188274046e-07, | |
| "loss": 0.5332, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.6786757546251216, | |
| "grad_norm": 0.38843912794513447, | |
| "learning_rate": 3.401748565352031e-07, | |
| "loss": 0.4997, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 2.68159688412853, | |
| "grad_norm": 0.41986039406023057, | |
| "learning_rate": 3.340318783031099e-07, | |
| "loss": 0.4804, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.6845180136319375, | |
| "grad_norm": 0.44648799987073357, | |
| "learning_rate": 3.279429552975094e-07, | |
| "loss": 0.57, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 2.6874391431353457, | |
| "grad_norm": 0.38625226404630086, | |
| "learning_rate": 3.219081580585548e-07, | |
| "loss": 0.418, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.6903602726387534, | |
| "grad_norm": 0.46495901890128216, | |
| "learning_rate": 3.159275564993558e-07, | |
| "loss": 0.562, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 2.6932814021421616, | |
| "grad_norm": 0.4542095138457356, | |
| "learning_rate": 3.100012199051627e-07, | |
| "loss": 0.48, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.6962025316455698, | |
| "grad_norm": 0.461034837465003, | |
| "learning_rate": 3.0412921693256657e-07, | |
| "loss": 0.5802, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 2.6991236611489775, | |
| "grad_norm": 0.4124404226906978, | |
| "learning_rate": 2.9831161560870346e-07, | |
| "loss": 0.5124, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.7020447906523857, | |
| "grad_norm": 0.420405602577591, | |
| "learning_rate": 2.9254848333046817e-07, | |
| "loss": 0.4959, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 2.704965920155794, | |
| "grad_norm": 0.42637463336396936, | |
| "learning_rate": 2.8683988686372956e-07, | |
| "loss": 0.529, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.7078870496592016, | |
| "grad_norm": 0.4156040595949887, | |
| "learning_rate": 2.811858923425609e-07, | |
| "loss": 0.5184, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 2.7108081791626093, | |
| "grad_norm": 0.4312166608407746, | |
| "learning_rate": 2.755865652684703e-07, | |
| "loss": 0.5438, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.7137293086660175, | |
| "grad_norm": 0.4369289965460609, | |
| "learning_rate": 2.7004197050964744e-07, | |
| "loss": 0.4978, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 2.7166504381694256, | |
| "grad_norm": 0.426344227648129, | |
| "learning_rate": 2.645521723002037e-07, | |
| "loss": 0.5041, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.7195715676728334, | |
| "grad_norm": 0.4417752376579622, | |
| "learning_rate": 2.591172342394349e-07, | |
| "loss": 0.5587, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 2.7224926971762415, | |
| "grad_norm": 0.457814912149392, | |
| "learning_rate": 2.537372192910825e-07, | |
| "loss": 0.5175, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.7254138266796497, | |
| "grad_norm": 0.45549699443779185, | |
| "learning_rate": 2.4841218978260194e-07, | |
| "loss": 0.5033, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 2.7283349561830574, | |
| "grad_norm": 0.44509990954989737, | |
| "learning_rate": 2.4314220740444595e-07, | |
| "loss": 0.5192, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.731256085686465, | |
| "grad_norm": 0.4542595414389855, | |
| "learning_rate": 2.3792733320934348e-07, | |
| "loss": 0.5193, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 2.7341772151898733, | |
| "grad_norm": 0.46788823553431785, | |
| "learning_rate": 2.3276762761159588e-07, | |
| "loss": 0.5226, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.7370983446932815, | |
| "grad_norm": 0.46548939441743264, | |
| "learning_rate": 2.276631503863763e-07, | |
| "loss": 0.5234, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 2.7400194741966892, | |
| "grad_norm": 0.43071296003232673, | |
| "learning_rate": 2.2261396066903916e-07, | |
| "loss": 0.4968, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.7429406037000974, | |
| "grad_norm": 0.4422590771192652, | |
| "learning_rate": 2.176201169544312e-07, | |
| "loss": 0.5135, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 2.7458617332035056, | |
| "grad_norm": 0.43839294000442797, | |
| "learning_rate": 2.12681677096217e-07, | |
| "loss": 0.5459, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.7487828627069133, | |
| "grad_norm": 0.4296191280132258, | |
| "learning_rate": 2.0779869830620692e-07, | |
| "loss": 0.4538, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 2.7517039922103215, | |
| "grad_norm": 0.4119484440139385, | |
| "learning_rate": 2.029712371536957e-07, | |
| "loss": 0.4839, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.754625121713729, | |
| "grad_norm": 0.4512158763524109, | |
| "learning_rate": 1.9819934956480603e-07, | |
| "loss": 0.5445, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 2.7575462512171374, | |
| "grad_norm": 0.4212292286246555, | |
| "learning_rate": 1.9348309082184102e-07, | |
| "loss": 0.4934, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.760467380720545, | |
| "grad_norm": 0.44496919909800775, | |
| "learning_rate": 1.888225155626433e-07, | |
| "loss": 0.5456, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 2.7633885102239533, | |
| "grad_norm": 0.41713723572900646, | |
| "learning_rate": 1.8421767777996425e-07, | |
| "loss": 0.4462, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.7663096397273614, | |
| "grad_norm": 0.4457624171080001, | |
| "learning_rate": 1.7966863082083507e-07, | |
| "loss": 0.5427, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "grad_norm": 0.4348007177346548, | |
| "learning_rate": 1.7517542738595071e-07, | |
| "loss": 0.5568, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.7721518987341773, | |
| "grad_norm": 0.41399848976523823, | |
| "learning_rate": 1.7073811952905862e-07, | |
| "loss": 0.5083, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 2.775073028237585, | |
| "grad_norm": 0.43232436981734923, | |
| "learning_rate": 1.6635675865635859e-07, | |
| "loss": 0.4505, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.7779941577409932, | |
| "grad_norm": 0.4750164143516587, | |
| "learning_rate": 1.6203139552590175e-07, | |
| "loss": 0.5831, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 2.780915287244401, | |
| "grad_norm": 0.39972505599387415, | |
| "learning_rate": 1.5776208024700702e-07, | |
| "loss": 0.4751, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.783836416747809, | |
| "grad_norm": 0.42708956986618357, | |
| "learning_rate": 1.5354886227967934e-07, | |
| "loss": 0.532, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 2.7867575462512173, | |
| "grad_norm": 0.4435213719711565, | |
| "learning_rate": 1.4939179043403474e-07, | |
| "loss": 0.5347, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.789678675754625, | |
| "grad_norm": 0.4416374818886015, | |
| "learning_rate": 1.4529091286973994e-07, | |
| "loss": 0.5459, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 2.792599805258033, | |
| "grad_norm": 0.45153869521194906, | |
| "learning_rate": 1.4124627709544814e-07, | |
| "loss": 0.4918, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.7955209347614414, | |
| "grad_norm": 0.4440135260222283, | |
| "learning_rate": 1.3725792996825083e-07, | |
| "loss": 0.5016, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 2.798442064264849, | |
| "grad_norm": 0.39484795107770926, | |
| "learning_rate": 1.3332591769314006e-07, | |
| "loss": 0.5043, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.801363193768257, | |
| "grad_norm": 0.444235962409132, | |
| "learning_rate": 1.2945028582246445e-07, | |
| "loss": 0.5637, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 2.804284323271665, | |
| "grad_norm": 0.4583481416669065, | |
| "learning_rate": 1.2563107925540774e-07, | |
| "loss": 0.5017, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.807205452775073, | |
| "grad_norm": 0.4215520268166574, | |
| "learning_rate": 1.2186834223746612e-07, | |
| "loss": 0.5136, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 2.810126582278481, | |
| "grad_norm": 0.43820018627048524, | |
| "learning_rate": 1.1816211835993684e-07, | |
| "loss": 0.4964, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.813047711781889, | |
| "grad_norm": 0.4420212484091613, | |
| "learning_rate": 1.1451245055941251e-07, | |
| "loss": 0.5112, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 2.8159688412852972, | |
| "grad_norm": 0.4365334218610012, | |
| "learning_rate": 1.1091938111728374e-07, | |
| "loss": 0.5273, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.818889970788705, | |
| "grad_norm": 0.4377608647741322, | |
| "learning_rate": 1.0738295165924783e-07, | |
| "loss": 0.5398, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 2.8218111002921127, | |
| "grad_norm": 0.45046127023142907, | |
| "learning_rate": 1.0390320315482982e-07, | |
| "loss": 0.5062, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.824732229795521, | |
| "grad_norm": 0.4019879301034501, | |
| "learning_rate": 1.0048017591690607e-07, | |
| "loss": 0.5114, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 2.827653359298929, | |
| "grad_norm": 0.4402917529148519, | |
| "learning_rate": 9.711390960123634e-08, | |
| "loss": 0.5359, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.8305744888023368, | |
| "grad_norm": 0.41904135734811376, | |
| "learning_rate": 9.380444320600591e-08, | |
| "loss": 0.4684, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 2.833495618305745, | |
| "grad_norm": 0.4659811321879393, | |
| "learning_rate": 9.055181507137245e-08, | |
| "loss": 0.5426, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.836416747809153, | |
| "grad_norm": 0.440634928654449, | |
| "learning_rate": 8.73560628790232e-08, | |
| "loss": 0.5258, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 2.839337877312561, | |
| "grad_norm": 0.44241019235537177, | |
| "learning_rate": 8.421722365173856e-08, | |
| "loss": 0.553, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.842259006815969, | |
| "grad_norm": 0.4085933163681831, | |
| "learning_rate": 8.113533375295968e-08, | |
| "loss": 0.4804, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 2.8451801363193767, | |
| "grad_norm": 0.42283609192144683, | |
| "learning_rate": 7.81104288863721e-08, | |
| "loss": 0.5268, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.848101265822785, | |
| "grad_norm": 0.4414452447566111, | |
| "learning_rate": 7.514254409549005e-08, | |
| "loss": 0.5059, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.8510223953261926, | |
| "grad_norm": 0.477876908869347, | |
| "learning_rate": 7.223171376325e-08, | |
| "loss": 0.5401, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.853943524829601, | |
| "grad_norm": 0.45667617596842697, | |
| "learning_rate": 6.93779716116122e-08, | |
| "loss": 0.5179, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 2.856864654333009, | |
| "grad_norm": 0.43661296400815486, | |
| "learning_rate": 6.658135070116978e-08, | |
| "loss": 0.479, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.8597857838364167, | |
| "grad_norm": 0.43447801900111893, | |
| "learning_rate": 6.384188343076914e-08, | |
| "loss": 0.4933, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 2.862706913339825, | |
| "grad_norm": 0.4567696696725672, | |
| "learning_rate": 6.115960153712963e-08, | |
| "loss": 0.5225, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.8656280428432326, | |
| "grad_norm": 0.4423237005020546, | |
| "learning_rate": 5.8534536094478345e-08, | |
| "loss": 0.5649, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 2.8685491723466408, | |
| "grad_norm": 0.41379887497949286, | |
| "learning_rate": 5.596671751419147e-08, | |
| "loss": 0.474, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.8714703018500485, | |
| "grad_norm": 0.4382842717392665, | |
| "learning_rate": 5.3456175544437936e-08, | |
| "loss": 0.529, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 2.8743914313534566, | |
| "grad_norm": 0.4463860151059245, | |
| "learning_rate": 5.100293926984023e-08, | |
| "loss": 0.4909, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.877312560856865, | |
| "grad_norm": 0.4057084453405752, | |
| "learning_rate": 4.860703711113246e-08, | |
| "loss": 0.5029, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 2.8802336903602725, | |
| "grad_norm": 0.42518495194251527, | |
| "learning_rate": 4.626849682483392e-08, | |
| "loss": 0.5376, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.8831548198636807, | |
| "grad_norm": 0.45413911287155506, | |
| "learning_rate": 4.398734550292716e-08, | |
| "loss": 0.5467, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 2.8860759493670884, | |
| "grad_norm": 0.4142515305933239, | |
| "learning_rate": 4.176360957254266e-08, | |
| "loss": 0.4568, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.8889970788704966, | |
| "grad_norm": 0.435060369756503, | |
| "learning_rate": 3.959731479565465e-08, | |
| "loss": 0.5319, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 2.8919182083739043, | |
| "grad_norm": 0.43929179209673974, | |
| "learning_rate": 3.748848626878132e-08, | |
| "loss": 0.5782, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.8948393378773125, | |
| "grad_norm": 0.4061515904911967, | |
| "learning_rate": 3.543714842269508e-08, | |
| "loss": 0.462, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 2.8977604673807207, | |
| "grad_norm": 0.46420928088916874, | |
| "learning_rate": 3.344332502213887e-08, | |
| "loss": 0.5218, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.9006815968841284, | |
| "grad_norm": 0.4346617171265348, | |
| "learning_rate": 3.150703916554976e-08, | |
| "loss": 0.4832, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 2.9036027263875366, | |
| "grad_norm": 0.45371971197536476, | |
| "learning_rate": 2.962831328479465e-08, | |
| "loss": 0.5087, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.9065238558909448, | |
| "grad_norm": 0.47344580171350403, | |
| "learning_rate": 2.7807169144906108e-08, | |
| "loss": 0.5639, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 2.9094449853943525, | |
| "grad_norm": 0.4343079692320858, | |
| "learning_rate": 2.6043627843834185e-08, | |
| "loss": 0.534, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.91236611489776, | |
| "grad_norm": 0.41472366849725406, | |
| "learning_rate": 2.4337709812199405e-08, | |
| "loss": 0.471, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 2.9152872444011684, | |
| "grad_norm": 0.41847164257771846, | |
| "learning_rate": 2.2689434813056856e-08, | |
| "loss": 0.5252, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.9182083739045765, | |
| "grad_norm": 0.4540400278681399, | |
| "learning_rate": 2.1098821941667457e-08, | |
| "loss": 0.5683, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 2.9211295034079843, | |
| "grad_norm": 0.43916840754064074, | |
| "learning_rate": 1.9565889625275945e-08, | |
| "loss": 0.4844, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.9240506329113924, | |
| "grad_norm": 0.4345074027775515, | |
| "learning_rate": 1.8090655622899356e-08, | |
| "loss": 0.4881, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 2.9269717624148006, | |
| "grad_norm": 0.4796907378778938, | |
| "learning_rate": 1.6673137025118303e-08, | |
| "loss": 0.5576, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.9298928919182083, | |
| "grad_norm": 0.4751897325081864, | |
| "learning_rate": 1.5313350253882143e-08, | |
| "loss": 0.5248, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 2.9328140214216165, | |
| "grad_norm": 0.4566958532328187, | |
| "learning_rate": 1.4011311062316347e-08, | |
| "loss": 0.5207, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.9357351509250242, | |
| "grad_norm": 0.4143592301627743, | |
| "learning_rate": 1.2767034534540978e-08, | |
| "loss": 0.483, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 2.9386562804284324, | |
| "grad_norm": 0.4151441658060871, | |
| "learning_rate": 1.158053508549528e-08, | |
| "loss": 0.4624, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.94157740993184, | |
| "grad_norm": 0.44443161265224573, | |
| "learning_rate": 1.0451826460772252e-08, | |
| "loss": 0.5295, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 2.9444985394352483, | |
| "grad_norm": 0.4244569182328679, | |
| "learning_rate": 9.38092173645766e-09, | |
| "loss": 0.5164, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.9474196689386565, | |
| "grad_norm": 0.42619195275616983, | |
| "learning_rate": 8.367833318980168e-09, | |
| "loss": 0.5474, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 2.950340798442064, | |
| "grad_norm": 0.44631991928366016, | |
| "learning_rate": 7.412572944965335e-09, | |
| "loss": 0.4825, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.9532619279454724, | |
| "grad_norm": 0.4028816094942567, | |
| "learning_rate": 6.515151681101284e-09, | |
| "loss": 0.507, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 2.95618305744888, | |
| "grad_norm": 0.4346618256973241, | |
| "learning_rate": 5.675579924011576e-09, | |
| "loss": 0.5173, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.9591041869522883, | |
| "grad_norm": 0.4213345827232603, | |
| "learning_rate": 4.89386740013198e-09, | |
| "loss": 0.4984, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 2.962025316455696, | |
| "grad_norm": 0.40444422520787165, | |
| "learning_rate": 4.170023165600001e-09, | |
| "loss": 0.5463, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.964946445959104, | |
| "grad_norm": 0.4158643063044851, | |
| "learning_rate": 3.5040556061483043e-09, | |
| "loss": 0.4929, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 2.9678675754625123, | |
| "grad_norm": 0.45968510533530904, | |
| "learning_rate": 2.8959724370092313e-09, | |
| "loss": 0.5618, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.97078870496592, | |
| "grad_norm": 0.45874234552575655, | |
| "learning_rate": 2.3457807028248734e-09, | |
| "loss": 0.5245, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 2.9737098344693282, | |
| "grad_norm": 0.4715342361612812, | |
| "learning_rate": 1.8534867775649169e-09, | |
| "loss": 0.5129, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.976630963972736, | |
| "grad_norm": 0.4184866954459166, | |
| "learning_rate": 1.4190963644522549e-09, | |
| "loss": 0.4844, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 2.979552093476144, | |
| "grad_norm": 0.41399082545193333, | |
| "learning_rate": 1.0426144958985974e-09, | |
| "loss": 0.5588, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.982473222979552, | |
| "grad_norm": 0.445097303921185, | |
| "learning_rate": 7.240455334456276e-10, | |
| "loss": 0.4614, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 2.98539435248296, | |
| "grad_norm": 0.4214572233508389, | |
| "learning_rate": 4.633931677139325e-10, | |
| "loss": 0.5183, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.988315481986368, | |
| "grad_norm": 0.4246075700283406, | |
| "learning_rate": 2.60660418360259e-10, | |
| "loss": 0.5797, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 2.991236611489776, | |
| "grad_norm": 0.4332145327952596, | |
| "learning_rate": 1.158496340419868e-10, | |
| "loss": 0.5132, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.994157740993184, | |
| "grad_norm": 0.4486911892623868, | |
| "learning_rate": 2.8962492393258546e-11, | |
| "loss": 0.5159, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.9970788704965923, | |
| "grad_norm": 0.4412334672273621, | |
| "learning_rate": 0.0, | |
| "loss": 0.5245, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.9970788704965923, | |
| "step": 1026, | |
| "total_flos": 883131557314560.0, | |
| "train_loss": 0.5971221869800523, | |
| "train_runtime": 15429.9437, | |
| "train_samples_per_second": 6.384, | |
| "train_steps_per_second": 0.066 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1026, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 883131557314560.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |