| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.999092650468797, |
| "eval_steps": 500, |
| "global_step": 2973, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0010081661457808247, |
| "grad_norm": 34.57471518068361, |
| "learning_rate": 5e-05, |
| "loss": 3.9088, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0020163322915616494, |
| "grad_norm": 14.984387011888629, |
| "learning_rate": 5e-05, |
| "loss": 3.2615, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.003024498437342474, |
| "grad_norm": 10.049184651061143, |
| "learning_rate": 5e-05, |
| "loss": 2.9787, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.004032664583123299, |
| "grad_norm": 8.838958962379248, |
| "learning_rate": 5e-05, |
| "loss": 2.8635, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.005040830728904123, |
| "grad_norm": 3.9611442119429787, |
| "learning_rate": 5e-05, |
| "loss": 2.751, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.006048996874684948, |
| "grad_norm": 3.9695757524166546, |
| "learning_rate": 5e-05, |
| "loss": 2.7094, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.007057163020465773, |
| "grad_norm": 4.086711126397493, |
| "learning_rate": 5e-05, |
| "loss": 2.67, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.008065329166246598, |
| "grad_norm": 4.107991896963143, |
| "learning_rate": 5e-05, |
| "loss": 2.617, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.009073495312027422, |
| "grad_norm": 4.144442971978443, |
| "learning_rate": 5e-05, |
| "loss": 2.5949, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.010081661457808247, |
| "grad_norm": 4.106646166791991, |
| "learning_rate": 5e-05, |
| "loss": 2.5656, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.011089827603589071, |
| "grad_norm": 4.053417964656737, |
| "learning_rate": 5e-05, |
| "loss": 2.5501, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.012097993749369896, |
| "grad_norm": 4.136404620418808, |
| "learning_rate": 5e-05, |
| "loss": 2.5117, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.01310615989515072, |
| "grad_norm": 3.996802290533662, |
| "learning_rate": 5e-05, |
| "loss": 2.4996, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.014114326040931546, |
| "grad_norm": 3.849705821582526, |
| "learning_rate": 5e-05, |
| "loss": 2.487, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.01512249218671237, |
| "grad_norm": 3.876905025177348, |
| "learning_rate": 5e-05, |
| "loss": 2.4751, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.016130658332493195, |
| "grad_norm": 4.174026769641786, |
| "learning_rate": 5e-05, |
| "loss": 2.4676, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.017138824478274018, |
| "grad_norm": 3.6416371605800775, |
| "learning_rate": 5e-05, |
| "loss": 2.4028, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.018146990624054844, |
| "grad_norm": 3.632584282733268, |
| "learning_rate": 5e-05, |
| "loss": 2.4173, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.01915515676983567, |
| "grad_norm": 3.6069769034580834, |
| "learning_rate": 5e-05, |
| "loss": 2.4006, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.020163322915616493, |
| "grad_norm": 3.544857323595724, |
| "learning_rate": 5e-05, |
| "loss": 2.3923, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02117148906139732, |
| "grad_norm": 3.5114294914477626, |
| "learning_rate": 5e-05, |
| "loss": 2.3706, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.022179655207178142, |
| "grad_norm": 3.4607286551586376, |
| "learning_rate": 5e-05, |
| "loss": 2.3337, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.02318782135295897, |
| "grad_norm": 3.5128484012447716, |
| "learning_rate": 5e-05, |
| "loss": 2.3594, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.02419598749873979, |
| "grad_norm": 3.4759482001503335, |
| "learning_rate": 5e-05, |
| "loss": 2.3424, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.025204153644520617, |
| "grad_norm": 3.4342708313075407, |
| "learning_rate": 5e-05, |
| "loss": 2.321, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.02621231979030144, |
| "grad_norm": 3.4635287305691396, |
| "learning_rate": 5e-05, |
| "loss": 2.2965, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.027220485936082266, |
| "grad_norm": 3.5484975768174545, |
| "learning_rate": 5e-05, |
| "loss": 2.2829, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.028228652081863093, |
| "grad_norm": 3.445709870621474, |
| "learning_rate": 5e-05, |
| "loss": 2.2953, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.029236818227643915, |
| "grad_norm": 3.5550881065258517, |
| "learning_rate": 5e-05, |
| "loss": 2.2736, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.03024498437342474, |
| "grad_norm": 3.453768977491902, |
| "learning_rate": 5e-05, |
| "loss": 2.2509, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03125315051920557, |
| "grad_norm": 3.288221262065957, |
| "learning_rate": 5e-05, |
| "loss": 2.2539, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.03226131666498639, |
| "grad_norm": 3.2957588848132113, |
| "learning_rate": 5e-05, |
| "loss": 2.222, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.03326948281076721, |
| "grad_norm": 3.326504022688874, |
| "learning_rate": 5e-05, |
| "loss": 2.2299, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.034277648956548036, |
| "grad_norm": 3.3869099538130536, |
| "learning_rate": 5e-05, |
| "loss": 2.2075, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.035285815102328866, |
| "grad_norm": 3.3039093489375793, |
| "learning_rate": 5e-05, |
| "loss": 2.2039, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.03629398124810969, |
| "grad_norm": 3.372398687640437, |
| "learning_rate": 5e-05, |
| "loss": 2.1876, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.03730214739389051, |
| "grad_norm": 3.3305428300095614, |
| "learning_rate": 5e-05, |
| "loss": 2.1848, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.03831031353967134, |
| "grad_norm": 3.153488406520241, |
| "learning_rate": 5e-05, |
| "loss": 2.168, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.039318479685452164, |
| "grad_norm": 3.1899907953902136, |
| "learning_rate": 5e-05, |
| "loss": 2.1406, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.040326645831232986, |
| "grad_norm": 3.1966061633238367, |
| "learning_rate": 5e-05, |
| "loss": 2.1458, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04133481197701381, |
| "grad_norm": 3.1424092213443884, |
| "learning_rate": 5e-05, |
| "loss": 2.1308, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.04234297812279464, |
| "grad_norm": 3.1232443108155943, |
| "learning_rate": 5e-05, |
| "loss": 2.1274, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.04335114426857546, |
| "grad_norm": 3.2221627259743633, |
| "learning_rate": 5e-05, |
| "loss": 2.1014, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.044359310414356284, |
| "grad_norm": 3.1413246825252155, |
| "learning_rate": 5e-05, |
| "loss": 2.1125, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.04536747656013711, |
| "grad_norm": 3.1011534555071405, |
| "learning_rate": 5e-05, |
| "loss": 2.0817, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.04637564270591794, |
| "grad_norm": 3.047593153180324, |
| "learning_rate": 5e-05, |
| "loss": 2.0662, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.04738380885169876, |
| "grad_norm": 3.0099009196968742, |
| "learning_rate": 5e-05, |
| "loss": 2.0795, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.04839197499747958, |
| "grad_norm": 2.8797605142116502, |
| "learning_rate": 5e-05, |
| "loss": 2.064, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.04940014114326041, |
| "grad_norm": 2.9135674114693804, |
| "learning_rate": 5e-05, |
| "loss": 2.0572, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.050408307289041235, |
| "grad_norm": 2.854983324411203, |
| "learning_rate": 5e-05, |
| "loss": 2.0437, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05141647343482206, |
| "grad_norm": 2.6836463449195183, |
| "learning_rate": 5e-05, |
| "loss": 2.0318, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.05242463958060288, |
| "grad_norm": 2.6293906040436927, |
| "learning_rate": 5e-05, |
| "loss": 2.0319, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.05343280572638371, |
| "grad_norm": 2.5302507817152065, |
| "learning_rate": 5e-05, |
| "loss": 2.0341, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.05444097187216453, |
| "grad_norm": 2.365010314655601, |
| "learning_rate": 5e-05, |
| "loss": 2.0192, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.055449138017945356, |
| "grad_norm": 2.2898929454078036, |
| "learning_rate": 5e-05, |
| "loss": 2.0123, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.056457304163726185, |
| "grad_norm": 2.1619235754274055, |
| "learning_rate": 5e-05, |
| "loss": 1.9955, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.05746547030950701, |
| "grad_norm": 1.9924556971488956, |
| "learning_rate": 5e-05, |
| "loss": 1.9743, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.05847363645528783, |
| "grad_norm": 1.8642154895286722, |
| "learning_rate": 5e-05, |
| "loss": 1.965, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.05948180260106865, |
| "grad_norm": 2.5911398208880607, |
| "learning_rate": 5e-05, |
| "loss": 2.0067, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.06048996874684948, |
| "grad_norm": 1.639201916804658, |
| "learning_rate": 5e-05, |
| "loss": 1.9476, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.061498134892630306, |
| "grad_norm": 1.4719820152496543, |
| "learning_rate": 5e-05, |
| "loss": 1.9788, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.06250630103841114, |
| "grad_norm": 1.334249704262187, |
| "learning_rate": 5e-05, |
| "loss": 1.9746, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.06351446718419196, |
| "grad_norm": 1.2045528079644199, |
| "learning_rate": 5e-05, |
| "loss": 1.9384, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.06452263332997278, |
| "grad_norm": 1.1167108623471675, |
| "learning_rate": 5e-05, |
| "loss": 1.9608, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.0655307994757536, |
| "grad_norm": 1.0221312125696673, |
| "learning_rate": 5e-05, |
| "loss": 1.9638, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.06653896562153443, |
| "grad_norm": 0.8737146038745541, |
| "learning_rate": 5e-05, |
| "loss": 1.9518, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.06754713176731525, |
| "grad_norm": 0.7942690648353735, |
| "learning_rate": 5e-05, |
| "loss": 1.9362, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.06855529791309607, |
| "grad_norm": 0.6836365100733637, |
| "learning_rate": 5e-05, |
| "loss": 1.9519, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.06956346405887691, |
| "grad_norm": 0.6076738169177845, |
| "learning_rate": 5e-05, |
| "loss": 1.9278, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.07057163020465773, |
| "grad_norm": 0.5459271299084082, |
| "learning_rate": 5e-05, |
| "loss": 1.9116, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.07157979635043855, |
| "grad_norm": 0.48476007136077964, |
| "learning_rate": 5e-05, |
| "loss": 1.9356, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.07258796249621938, |
| "grad_norm": 0.431119236964475, |
| "learning_rate": 5e-05, |
| "loss": 1.8993, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0735961286420002, |
| "grad_norm": 0.39233195771215057, |
| "learning_rate": 5e-05, |
| "loss": 1.9308, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.07460429478778102, |
| "grad_norm": 0.3525945811491024, |
| "learning_rate": 5e-05, |
| "loss": 1.9144, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.07561246093356185, |
| "grad_norm": 0.3147607611067091, |
| "learning_rate": 5e-05, |
| "loss": 1.9147, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.07662062707934268, |
| "grad_norm": 0.29445808394590856, |
| "learning_rate": 5e-05, |
| "loss": 1.9116, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.0776287932251235, |
| "grad_norm": 0.26730669596893064, |
| "learning_rate": 5e-05, |
| "loss": 1.938, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.07863695937090433, |
| "grad_norm": 0.2541501927237772, |
| "learning_rate": 5e-05, |
| "loss": 1.9046, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.07964512551668515, |
| "grad_norm": 0.2402658098277575, |
| "learning_rate": 5e-05, |
| "loss": 1.9239, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.08065329166246597, |
| "grad_norm": 0.22676350053618127, |
| "learning_rate": 5e-05, |
| "loss": 1.8973, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0816614578082468, |
| "grad_norm": 0.21692458062593906, |
| "learning_rate": 5e-05, |
| "loss": 1.9027, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.08266962395402762, |
| "grad_norm": 0.20753970513804731, |
| "learning_rate": 5e-05, |
| "loss": 1.8857, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.08367779009980846, |
| "grad_norm": 0.19995124003410258, |
| "learning_rate": 5e-05, |
| "loss": 1.8915, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.08468595624558928, |
| "grad_norm": 0.18993359513015598, |
| "learning_rate": 5e-05, |
| "loss": 1.897, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0856941223913701, |
| "grad_norm": 0.189296051381214, |
| "learning_rate": 5e-05, |
| "loss": 1.903, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.08670228853715092, |
| "grad_norm": 0.1848921313288835, |
| "learning_rate": 5e-05, |
| "loss": 1.9063, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.08771045468293175, |
| "grad_norm": 0.24718165656407112, |
| "learning_rate": 5e-05, |
| "loss": 1.8911, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.08871862082871257, |
| "grad_norm": 0.17748622791573787, |
| "learning_rate": 5e-05, |
| "loss": 1.8771, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.08972678697449339, |
| "grad_norm": 0.17315877460124424, |
| "learning_rate": 5e-05, |
| "loss": 1.8772, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.09073495312027421, |
| "grad_norm": 0.16519480154282987, |
| "learning_rate": 5e-05, |
| "loss": 1.8926, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.09174311926605505, |
| "grad_norm": 0.1646588806987109, |
| "learning_rate": 5e-05, |
| "loss": 1.8871, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.09275128541183587, |
| "grad_norm": 0.16399339737997176, |
| "learning_rate": 5e-05, |
| "loss": 1.8774, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.0937594515576167, |
| "grad_norm": 0.1669428196999267, |
| "learning_rate": 5e-05, |
| "loss": 1.8834, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.09476761770339752, |
| "grad_norm": 1.0261114242764373, |
| "learning_rate": 5e-05, |
| "loss": 1.8693, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.09577578384917834, |
| "grad_norm": 0.1719496356360831, |
| "learning_rate": 5e-05, |
| "loss": 1.886, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.09678394999495916, |
| "grad_norm": 0.2790856954528175, |
| "learning_rate": 5e-05, |
| "loss": 1.8651, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.09779211614073999, |
| "grad_norm": 0.16174313169788473, |
| "learning_rate": 5e-05, |
| "loss": 1.886, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.09880028228652082, |
| "grad_norm": 0.15585221456592352, |
| "learning_rate": 5e-05, |
| "loss": 1.8844, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.09980844843230165, |
| "grad_norm": 0.16364893420163687, |
| "learning_rate": 5e-05, |
| "loss": 1.8588, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.10081661457808247, |
| "grad_norm": 0.15674565320396686, |
| "learning_rate": 5e-05, |
| "loss": 1.8484, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.10182478072386329, |
| "grad_norm": 0.16912516357614416, |
| "learning_rate": 5e-05, |
| "loss": 1.8667, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.10283294686964412, |
| "grad_norm": 0.17191340384387088, |
| "learning_rate": 5e-05, |
| "loss": 1.8787, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.10384111301542494, |
| "grad_norm": 0.16516975348246232, |
| "learning_rate": 5e-05, |
| "loss": 1.8583, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.10484927916120576, |
| "grad_norm": 1.9242450149861634, |
| "learning_rate": 5e-05, |
| "loss": 1.9162, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.1058574453069866, |
| "grad_norm": 0.16888127293984725, |
| "learning_rate": 5e-05, |
| "loss": 1.8751, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.10686561145276742, |
| "grad_norm": 0.15854633983815475, |
| "learning_rate": 5e-05, |
| "loss": 1.8563, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.10787377759854824, |
| "grad_norm": 0.16888919013834458, |
| "learning_rate": 5e-05, |
| "loss": 1.8511, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.10888194374432907, |
| "grad_norm": 0.15933324172053995, |
| "learning_rate": 5e-05, |
| "loss": 1.87, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.10989010989010989, |
| "grad_norm": 0.16397568361062448, |
| "learning_rate": 5e-05, |
| "loss": 1.8442, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.11089827603589071, |
| "grad_norm": 0.16652309668195364, |
| "learning_rate": 5e-05, |
| "loss": 1.8439, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.11190644218167153, |
| "grad_norm": 0.15799876365905016, |
| "learning_rate": 5e-05, |
| "loss": 1.8642, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.11291460832745237, |
| "grad_norm": 0.15119742114523702, |
| "learning_rate": 5e-05, |
| "loss": 1.8374, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1139227744732332, |
| "grad_norm": 0.17103775998292917, |
| "learning_rate": 5e-05, |
| "loss": 1.865, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.11493094061901402, |
| "grad_norm": 0.1683642083482424, |
| "learning_rate": 5e-05, |
| "loss": 1.8518, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.11593910676479484, |
| "grad_norm": 0.16671673058838882, |
| "learning_rate": 5e-05, |
| "loss": 1.8554, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.11694727291057566, |
| "grad_norm": 0.14987402234479336, |
| "learning_rate": 5e-05, |
| "loss": 1.8631, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.11795543905635648, |
| "grad_norm": 0.15527353891827372, |
| "learning_rate": 5e-05, |
| "loss": 1.8453, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.1189636052021373, |
| "grad_norm": 0.1679615027458173, |
| "learning_rate": 5e-05, |
| "loss": 1.8625, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.11997177134791814, |
| "grad_norm": 0.224122986925736, |
| "learning_rate": 5e-05, |
| "loss": 1.8657, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.12097993749369897, |
| "grad_norm": 0.1562370758852964, |
| "learning_rate": 5e-05, |
| "loss": 1.8477, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.12198810363947979, |
| "grad_norm": 0.18240341523398404, |
| "learning_rate": 5e-05, |
| "loss": 1.8515, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.12299626978526061, |
| "grad_norm": 0.15759149835129863, |
| "learning_rate": 5e-05, |
| "loss": 1.8476, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.12400443593104143, |
| "grad_norm": 0.16577117834023483, |
| "learning_rate": 5e-05, |
| "loss": 1.8364, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.12501260207682227, |
| "grad_norm": 0.15114077783588442, |
| "learning_rate": 5e-05, |
| "loss": 1.8534, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.12602076822260308, |
| "grad_norm": 0.17320420801320205, |
| "learning_rate": 5e-05, |
| "loss": 1.8533, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.12702893436838392, |
| "grad_norm": 0.1426802208439398, |
| "learning_rate": 5e-05, |
| "loss": 1.8492, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.12803710051416473, |
| "grad_norm": 0.1427745438880488, |
| "learning_rate": 5e-05, |
| "loss": 1.8515, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.12904526665994556, |
| "grad_norm": 0.14985816706678418, |
| "learning_rate": 5e-05, |
| "loss": 1.8359, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.13005343280572637, |
| "grad_norm": 0.14340309531623066, |
| "learning_rate": 5e-05, |
| "loss": 1.8503, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.1310615989515072, |
| "grad_norm": 0.1435206282235358, |
| "learning_rate": 5e-05, |
| "loss": 1.8517, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.13206976509728804, |
| "grad_norm": 0.14058170113730545, |
| "learning_rate": 5e-05, |
| "loss": 1.8429, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.13307793124306885, |
| "grad_norm": 0.14306330526901823, |
| "learning_rate": 5e-05, |
| "loss": 1.8301, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.1340860973888497, |
| "grad_norm": 0.14477090717659785, |
| "learning_rate": 5e-05, |
| "loss": 1.8601, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.1350942635346305, |
| "grad_norm": 0.14319080650257207, |
| "learning_rate": 5e-05, |
| "loss": 1.8437, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.13610242968041134, |
| "grad_norm": 0.14034122909510496, |
| "learning_rate": 5e-05, |
| "loss": 1.8259, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.13711059582619214, |
| "grad_norm": 0.13808662342530606, |
| "learning_rate": 5e-05, |
| "loss": 1.8434, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.13811876197197298, |
| "grad_norm": 0.15111039758004002, |
| "learning_rate": 5e-05, |
| "loss": 1.8507, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.13912692811775382, |
| "grad_norm": 0.1365342831509653, |
| "learning_rate": 5e-05, |
| "loss": 1.8358, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.14013509426353463, |
| "grad_norm": 0.13875807647032345, |
| "learning_rate": 5e-05, |
| "loss": 1.8397, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.14114326040931546, |
| "grad_norm": 0.14253547281592827, |
| "learning_rate": 5e-05, |
| "loss": 1.8313, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.14215142655509627, |
| "grad_norm": 0.14753781033459742, |
| "learning_rate": 5e-05, |
| "loss": 1.8487, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.1431595927008771, |
| "grad_norm": 0.13160128302364027, |
| "learning_rate": 5e-05, |
| "loss": 1.8184, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.14416775884665792, |
| "grad_norm": 0.14333676268082823, |
| "learning_rate": 5e-05, |
| "loss": 1.8261, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.14517592499243875, |
| "grad_norm": 0.1430079366271181, |
| "learning_rate": 5e-05, |
| "loss": 1.8191, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.1461840911382196, |
| "grad_norm": 0.14215288025785802, |
| "learning_rate": 5e-05, |
| "loss": 1.8243, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.1471922572840004, |
| "grad_norm": 0.5239016479752525, |
| "learning_rate": 5e-05, |
| "loss": 1.8419, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.14820042342978124, |
| "grad_norm": 0.14824799807909486, |
| "learning_rate": 5e-05, |
| "loss": 1.8268, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.14920858957556205, |
| "grad_norm": 0.1430248626948606, |
| "learning_rate": 5e-05, |
| "loss": 1.8267, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.15021675572134288, |
| "grad_norm": 0.14496122414220788, |
| "learning_rate": 5e-05, |
| "loss": 1.8427, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.1512249218671237, |
| "grad_norm": 0.1473177263441529, |
| "learning_rate": 5e-05, |
| "loss": 1.8355, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.15223308801290453, |
| "grad_norm": 0.14231490267928462, |
| "learning_rate": 5e-05, |
| "loss": 1.805, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.15324125415868536, |
| "grad_norm": 0.1418245299909162, |
| "learning_rate": 5e-05, |
| "loss": 1.8183, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.15424942030446617, |
| "grad_norm": 0.13753317806858326, |
| "learning_rate": 5e-05, |
| "loss": 1.8147, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.155257586450247, |
| "grad_norm": 0.13914926431077226, |
| "learning_rate": 5e-05, |
| "loss": 1.8119, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.15626575259602782, |
| "grad_norm": 0.1435782070136882, |
| "learning_rate": 5e-05, |
| "loss": 1.8165, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.15727391874180865, |
| "grad_norm": 0.14305520914603162, |
| "learning_rate": 5e-05, |
| "loss": 1.7945, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.15828208488758946, |
| "grad_norm": 0.15121415486642223, |
| "learning_rate": 5e-05, |
| "loss": 1.8184, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.1592902510333703, |
| "grad_norm": 0.1455230658958776, |
| "learning_rate": 5e-05, |
| "loss": 1.8347, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.16029841717915114, |
| "grad_norm": 0.38203744961546876, |
| "learning_rate": 5e-05, |
| "loss": 1.8339, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.16130658332493195, |
| "grad_norm": 0.15216313167006948, |
| "learning_rate": 5e-05, |
| "loss": 1.8241, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.16231474947071278, |
| "grad_norm": 0.14241802216869767, |
| "learning_rate": 5e-05, |
| "loss": 1.8183, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.1633229156164936, |
| "grad_norm": 0.16701614066822063, |
| "learning_rate": 5e-05, |
| "loss": 1.8266, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.16433108176227443, |
| "grad_norm": 0.14130437355543407, |
| "learning_rate": 5e-05, |
| "loss": 1.823, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.16533924790805524, |
| "grad_norm": 0.14144400494428122, |
| "learning_rate": 5e-05, |
| "loss": 1.8102, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.16634741405383607, |
| "grad_norm": 0.19337899524551297, |
| "learning_rate": 5e-05, |
| "loss": 1.8124, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.1673555801996169, |
| "grad_norm": 0.14367033024459142, |
| "learning_rate": 5e-05, |
| "loss": 1.8215, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.16836374634539772, |
| "grad_norm": 0.14832836884198922, |
| "learning_rate": 5e-05, |
| "loss": 1.8168, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.16937191249117856, |
| "grad_norm": 0.1465143571682479, |
| "learning_rate": 5e-05, |
| "loss": 1.8256, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.17038007863695936, |
| "grad_norm": 0.14542201739695612, |
| "learning_rate": 5e-05, |
| "loss": 1.801, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.1713882447827402, |
| "grad_norm": 0.13892745937383433, |
| "learning_rate": 5e-05, |
| "loss": 1.8057, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.172396410928521, |
| "grad_norm": 0.14274131608703794, |
| "learning_rate": 5e-05, |
| "loss": 1.8079, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.17340457707430185, |
| "grad_norm": 0.1401424607261012, |
| "learning_rate": 5e-05, |
| "loss": 1.8055, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.17441274322008268, |
| "grad_norm": 0.15769308735320056, |
| "learning_rate": 5e-05, |
| "loss": 1.7996, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.1754209093658635, |
| "grad_norm": 0.14070813717782912, |
| "learning_rate": 5e-05, |
| "loss": 1.8039, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.17642907551164433, |
| "grad_norm": 0.1578385284864902, |
| "learning_rate": 5e-05, |
| "loss": 1.8177, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.17743724165742514, |
| "grad_norm": 0.15862029917030843, |
| "learning_rate": 5e-05, |
| "loss": 1.8211, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.17844540780320597, |
| "grad_norm": 0.15757584940286892, |
| "learning_rate": 5e-05, |
| "loss": 1.8311, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.17945357394898678, |
| "grad_norm": 0.15109490816160354, |
| "learning_rate": 5e-05, |
| "loss": 1.8189, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.18046174009476762, |
| "grad_norm": 0.16394550016574783, |
| "learning_rate": 5e-05, |
| "loss": 1.8033, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.18146990624054843, |
| "grad_norm": 0.15861740081769296, |
| "learning_rate": 5e-05, |
| "loss": 1.8019, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.18247807238632927, |
| "grad_norm": 0.14363097977546618, |
| "learning_rate": 5e-05, |
| "loss": 1.8044, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.1834862385321101, |
| "grad_norm": 0.15924066731744524, |
| "learning_rate": 5e-05, |
| "loss": 1.7934, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.1844944046778909, |
| "grad_norm": 0.1409238970199551, |
| "learning_rate": 5e-05, |
| "loss": 1.8236, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.18550257082367175, |
| "grad_norm": 0.14448492106367192, |
| "learning_rate": 5e-05, |
| "loss": 1.7832, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.18651073696945256, |
| "grad_norm": 0.13579799309864551, |
| "learning_rate": 5e-05, |
| "loss": 1.8104, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.1875189031152334, |
| "grad_norm": 0.1519137140895342, |
| "learning_rate": 5e-05, |
| "loss": 1.8095, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.1885270692610142, |
| "grad_norm": 0.14284624647289126, |
| "learning_rate": 5e-05, |
| "loss": 1.8111, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.18953523540679504, |
| "grad_norm": 0.13804963935004147, |
| "learning_rate": 5e-05, |
| "loss": 1.7897, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.19054340155257588, |
| "grad_norm": 0.13890536326955857, |
| "learning_rate": 5e-05, |
| "loss": 1.7911, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.19155156769835668, |
| "grad_norm": 0.1454820197216778, |
| "learning_rate": 5e-05, |
| "loss": 1.8, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.19255973384413752, |
| "grad_norm": 0.14265790459239303, |
| "learning_rate": 5e-05, |
| "loss": 1.8011, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.19356789998991833, |
| "grad_norm": 0.15184150116266387, |
| "learning_rate": 5e-05, |
| "loss": 1.8358, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.19457606613569917, |
| "grad_norm": 0.13887936401292228, |
| "learning_rate": 5e-05, |
| "loss": 1.8029, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.19558423228147997, |
| "grad_norm": 0.15123788408580607, |
| "learning_rate": 5e-05, |
| "loss": 1.7918, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.1965923984272608, |
| "grad_norm": 0.4184962428270207, |
| "learning_rate": 5e-05, |
| "loss": 1.8033, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.19760056457304165, |
| "grad_norm": 0.13421178586714527, |
| "learning_rate": 5e-05, |
| "loss": 1.8121, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.19860873071882246, |
| "grad_norm": 0.14770772432026583, |
| "learning_rate": 5e-05, |
| "loss": 1.7971, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.1996168968646033, |
| "grad_norm": 0.14383182989087254, |
| "learning_rate": 5e-05, |
| "loss": 1.8008, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.2006250630103841, |
| "grad_norm": 0.1384392103492628, |
| "learning_rate": 5e-05, |
| "loss": 1.8115, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.20163322915616494, |
| "grad_norm": 0.14726904874212857, |
| "learning_rate": 5e-05, |
| "loss": 1.8155, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.20264139530194575, |
| "grad_norm": 0.1384824989118434, |
| "learning_rate": 5e-05, |
| "loss": 1.8092, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.20364956144772658, |
| "grad_norm": 0.13775347706113975, |
| "learning_rate": 5e-05, |
| "loss": 1.793, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.20465772759350742, |
| "grad_norm": 0.15774933663999155, |
| "learning_rate": 5e-05, |
| "loss": 1.806, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.20566589373928823, |
| "grad_norm": 0.14301341548219315, |
| "learning_rate": 5e-05, |
| "loss": 1.8072, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.20667405988506907, |
| "grad_norm": 0.169861283670675, |
| "learning_rate": 5e-05, |
| "loss": 1.7983, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.20768222603084988, |
| "grad_norm": 0.15504709124693639, |
| "learning_rate": 5e-05, |
| "loss": 1.8048, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.2086903921766307, |
| "grad_norm": 0.14436742876219333, |
| "learning_rate": 5e-05, |
| "loss": 1.7936, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.20969855832241152, |
| "grad_norm": 1.2062437482583328, |
| "learning_rate": 5e-05, |
| "loss": 1.8324, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.21070672446819236, |
| "grad_norm": 0.1649150229959874, |
| "learning_rate": 5e-05, |
| "loss": 1.7906, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.2117148906139732, |
| "grad_norm": 0.1460932612643998, |
| "learning_rate": 5e-05, |
| "loss": 1.8009, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.212723056759754, |
| "grad_norm": 0.14251437275063011, |
| "learning_rate": 5e-05, |
| "loss": 1.7927, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.21373122290553484, |
| "grad_norm": 0.15037931584645792, |
| "learning_rate": 5e-05, |
| "loss": 1.7999, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.21473938905131565, |
| "grad_norm": 0.1446855518280459, |
| "learning_rate": 5e-05, |
| "loss": 1.7884, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.21574755519709649, |
| "grad_norm": 0.14747066125003186, |
| "learning_rate": 5e-05, |
| "loss": 1.7736, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.2167557213428773, |
| "grad_norm": 0.14478801075472833, |
| "learning_rate": 5e-05, |
| "loss": 1.7958, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.21776388748865813, |
| "grad_norm": 0.14675607719308872, |
| "learning_rate": 5e-05, |
| "loss": 1.784, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.21877205363443897, |
| "grad_norm": 0.13905433311433602, |
| "learning_rate": 5e-05, |
| "loss": 1.781, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.21978021978021978, |
| "grad_norm": 0.15427823427776496, |
| "learning_rate": 5e-05, |
| "loss": 1.7826, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.2207883859260006, |
| "grad_norm": 0.13866445119988918, |
| "learning_rate": 5e-05, |
| "loss": 1.7772, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.22179655207178142, |
| "grad_norm": 0.15729430183656884, |
| "learning_rate": 5e-05, |
| "loss": 1.8089, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.22280471821756226, |
| "grad_norm": 0.1384346165139127, |
| "learning_rate": 5e-05, |
| "loss": 1.8027, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.22381288436334307, |
| "grad_norm": 0.15170585753242535, |
| "learning_rate": 5e-05, |
| "loss": 1.793, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.2248210505091239, |
| "grad_norm": 0.14604589286191189, |
| "learning_rate": 5e-05, |
| "loss": 1.7886, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.22582921665490474, |
| "grad_norm": 0.15226581106025078, |
| "learning_rate": 5e-05, |
| "loss": 1.7866, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.22683738280068555, |
| "grad_norm": 0.4148206066745425, |
| "learning_rate": 5e-05, |
| "loss": 1.8154, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.2278455489464664, |
| "grad_norm": 0.154524327856525, |
| "learning_rate": 5e-05, |
| "loss": 1.7785, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.2288537150922472, |
| "grad_norm": 0.14509919795107487, |
| "learning_rate": 5e-05, |
| "loss": 1.789, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.22986188123802803, |
| "grad_norm": 0.14848451528795917, |
| "learning_rate": 5e-05, |
| "loss": 1.803, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.23087004738380884, |
| "grad_norm": 0.1619137027086449, |
| "learning_rate": 5e-05, |
| "loss": 1.793, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.23187821352958968, |
| "grad_norm": 0.15893201293250522, |
| "learning_rate": 5e-05, |
| "loss": 1.7825, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.23288637967537051, |
| "grad_norm": 0.18006927954065935, |
| "learning_rate": 5e-05, |
| "loss": 1.7925, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.23389454582115132, |
| "grad_norm": 0.21554580188759084, |
| "learning_rate": 5e-05, |
| "loss": 1.7699, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.23490271196693216, |
| "grad_norm": 0.17549267236851238, |
| "learning_rate": 5e-05, |
| "loss": 1.7641, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.23591087811271297, |
| "grad_norm": 0.15014379853354953, |
| "learning_rate": 5e-05, |
| "loss": 1.8095, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.2369190442584938, |
| "grad_norm": 0.1445848774035969, |
| "learning_rate": 5e-05, |
| "loss": 1.7858, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.2379272104042746, |
| "grad_norm": 0.1606246837780276, |
| "learning_rate": 5e-05, |
| "loss": 1.7918, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.23893537655005545, |
| "grad_norm": 0.15316695958989793, |
| "learning_rate": 5e-05, |
| "loss": 1.7798, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.2399435426958363, |
| "grad_norm": 0.16374005732964989, |
| "learning_rate": 5e-05, |
| "loss": 1.7959, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.2409517088416171, |
| "grad_norm": 0.14355882787135935, |
| "learning_rate": 5e-05, |
| "loss": 1.7788, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.24195987498739793, |
| "grad_norm": 0.14247097062180922, |
| "learning_rate": 5e-05, |
| "loss": 1.7731, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.24296804113317874, |
| "grad_norm": 0.13912036094349547, |
| "learning_rate": 5e-05, |
| "loss": 1.7861, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.24397620727895958, |
| "grad_norm": 0.16920052818228684, |
| "learning_rate": 5e-05, |
| "loss": 1.7764, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.2449843734247404, |
| "grad_norm": 0.1455022483791817, |
| "learning_rate": 5e-05, |
| "loss": 1.783, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.24599253957052122, |
| "grad_norm": 0.15542518289693488, |
| "learning_rate": 5e-05, |
| "loss": 1.8016, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.24700070571630206, |
| "grad_norm": 0.15524505680074863, |
| "learning_rate": 5e-05, |
| "loss": 1.7909, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.24800887186208287, |
| "grad_norm": 0.1516919173418789, |
| "learning_rate": 5e-05, |
| "loss": 1.7882, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.2490170380078637, |
| "grad_norm": 0.1668727384239558, |
| "learning_rate": 5e-05, |
| "loss": 1.7863, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.25002520415364454, |
| "grad_norm": 0.17266897431534142, |
| "learning_rate": 5e-05, |
| "loss": 1.7668, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.25103337029942535, |
| "grad_norm": 0.1531271105909299, |
| "learning_rate": 5e-05, |
| "loss": 1.7835, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.25204153644520616, |
| "grad_norm": 0.15085666027016767, |
| "learning_rate": 5e-05, |
| "loss": 1.8045, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.25304970259098697, |
| "grad_norm": 0.15682683621010082, |
| "learning_rate": 5e-05, |
| "loss": 1.7693, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.25405786873676783, |
| "grad_norm": 0.15144607833613863, |
| "learning_rate": 5e-05, |
| "loss": 1.7774, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.25506603488254864, |
| "grad_norm": 0.14001585494695262, |
| "learning_rate": 5e-05, |
| "loss": 1.7901, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.25607420102832945, |
| "grad_norm": 0.19707640367045973, |
| "learning_rate": 5e-05, |
| "loss": 1.7838, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.2570823671741103, |
| "grad_norm": 0.12846973999730532, |
| "learning_rate": 5e-05, |
| "loss": 1.7763, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.2580905333198911, |
| "grad_norm": 0.16735151403436016, |
| "learning_rate": 5e-05, |
| "loss": 1.7713, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.25909869946567193, |
| "grad_norm": 0.14797938918139564, |
| "learning_rate": 5e-05, |
| "loss": 1.7742, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.26010686561145274, |
| "grad_norm": 0.15168862915568712, |
| "learning_rate": 5e-05, |
| "loss": 1.7794, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.2611150317572336, |
| "grad_norm": 0.16227121267503694, |
| "learning_rate": 5e-05, |
| "loss": 1.77, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.2621231979030144, |
| "grad_norm": 0.14066196762298472, |
| "learning_rate": 5e-05, |
| "loss": 1.7882, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2631313640487952, |
| "grad_norm": 0.13869952828234983, |
| "learning_rate": 5e-05, |
| "loss": 1.782, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.2641395301945761, |
| "grad_norm": 0.14487517640755312, |
| "learning_rate": 5e-05, |
| "loss": 1.779, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.2651476963403569, |
| "grad_norm": 0.13417492252768634, |
| "learning_rate": 5e-05, |
| "loss": 1.771, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.2661558624861377, |
| "grad_norm": 0.14848309582974037, |
| "learning_rate": 5e-05, |
| "loss": 1.7873, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.2671640286319185, |
| "grad_norm": 0.14221777255093823, |
| "learning_rate": 5e-05, |
| "loss": 1.789, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.2681721947776994, |
| "grad_norm": 0.13934641054898725, |
| "learning_rate": 5e-05, |
| "loss": 1.7813, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.2691803609234802, |
| "grad_norm": 0.1470259671529886, |
| "learning_rate": 5e-05, |
| "loss": 1.7584, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.270188527069261, |
| "grad_norm": 0.13951054708411406, |
| "learning_rate": 5e-05, |
| "loss": 1.7562, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.27119669321504186, |
| "grad_norm": 0.14853290147291082, |
| "learning_rate": 5e-05, |
| "loss": 1.7704, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.27220485936082267, |
| "grad_norm": 0.14895311614357434, |
| "learning_rate": 5e-05, |
| "loss": 1.7874, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2732130255066035, |
| "grad_norm": 0.13590434471079565, |
| "learning_rate": 5e-05, |
| "loss": 1.7796, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.2742211916523843, |
| "grad_norm": 0.14848968529004114, |
| "learning_rate": 5e-05, |
| "loss": 1.7733, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.27522935779816515, |
| "grad_norm": 0.14128642904473834, |
| "learning_rate": 5e-05, |
| "loss": 1.7761, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.27623752394394596, |
| "grad_norm": 0.15714325279208918, |
| "learning_rate": 5e-05, |
| "loss": 1.7793, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.27724569008972677, |
| "grad_norm": 0.13785114250535732, |
| "learning_rate": 5e-05, |
| "loss": 1.7724, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.27825385623550764, |
| "grad_norm": 0.14777719343647677, |
| "learning_rate": 5e-05, |
| "loss": 1.7776, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.27926202238128844, |
| "grad_norm": 0.15031370981427467, |
| "learning_rate": 5e-05, |
| "loss": 1.7702, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.28027018852706925, |
| "grad_norm": 0.13880569847429872, |
| "learning_rate": 5e-05, |
| "loss": 1.7712, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.28127835467285006, |
| "grad_norm": 0.14874303568984948, |
| "learning_rate": 5e-05, |
| "loss": 1.7668, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.2822865208186309, |
| "grad_norm": 0.14816035016590623, |
| "learning_rate": 5e-05, |
| "loss": 1.7576, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.28329468696441173, |
| "grad_norm": 0.14049002413491998, |
| "learning_rate": 5e-05, |
| "loss": 1.7693, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.28430285311019254, |
| "grad_norm": 0.1510130790384099, |
| "learning_rate": 5e-05, |
| "loss": 1.7484, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.2853110192559734, |
| "grad_norm": 0.13918487318352804, |
| "learning_rate": 5e-05, |
| "loss": 1.7642, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.2863191854017542, |
| "grad_norm": 0.1474578079181453, |
| "learning_rate": 5e-05, |
| "loss": 1.7744, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.287327351547535, |
| "grad_norm": 0.13820771228831047, |
| "learning_rate": 5e-05, |
| "loss": 1.7672, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.28833551769331583, |
| "grad_norm": 0.14351030264796166, |
| "learning_rate": 5e-05, |
| "loss": 1.7582, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.2893436838390967, |
| "grad_norm": 0.15670700153087316, |
| "learning_rate": 5e-05, |
| "loss": 1.7629, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.2903518499848775, |
| "grad_norm": 0.1783938883378467, |
| "learning_rate": 5e-05, |
| "loss": 1.7827, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.2913600161306583, |
| "grad_norm": 0.15333916211025533, |
| "learning_rate": 5e-05, |
| "loss": 1.7636, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.2923681822764392, |
| "grad_norm": 0.13236694666147217, |
| "learning_rate": 5e-05, |
| "loss": 1.7682, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.29337634842222, |
| "grad_norm": 0.13938856624825205, |
| "learning_rate": 5e-05, |
| "loss": 1.768, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.2943845145680008, |
| "grad_norm": 0.1314885900094198, |
| "learning_rate": 5e-05, |
| "loss": 1.7932, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.2953926807137816, |
| "grad_norm": 0.15583014570979986, |
| "learning_rate": 5e-05, |
| "loss": 1.758, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.2964008468595625, |
| "grad_norm": 0.14067581924618947, |
| "learning_rate": 5e-05, |
| "loss": 1.7721, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.2974090130053433, |
| "grad_norm": 0.15970423037745704, |
| "learning_rate": 5e-05, |
| "loss": 1.7731, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.2984171791511241, |
| "grad_norm": 0.13752711701400588, |
| "learning_rate": 5e-05, |
| "loss": 1.7674, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.29942534529690495, |
| "grad_norm": 0.1521446918249182, |
| "learning_rate": 5e-05, |
| "loss": 1.7721, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.30043351144268576, |
| "grad_norm": 0.1475193365401531, |
| "learning_rate": 5e-05, |
| "loss": 1.7457, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.30144167758846657, |
| "grad_norm": 0.16275272355252648, |
| "learning_rate": 5e-05, |
| "loss": 1.7625, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.3024498437342474, |
| "grad_norm": 0.15182041598893675, |
| "learning_rate": 5e-05, |
| "loss": 1.7597, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.30345800988002825, |
| "grad_norm": 0.14884368791325303, |
| "learning_rate": 5e-05, |
| "loss": 1.7472, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.30446617602580905, |
| "grad_norm": 0.1670052966040775, |
| "learning_rate": 5e-05, |
| "loss": 1.765, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.30547434217158986, |
| "grad_norm": 0.1463624528999074, |
| "learning_rate": 5e-05, |
| "loss": 1.786, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.3064825083173707, |
| "grad_norm": 0.16637180035101126, |
| "learning_rate": 5e-05, |
| "loss": 1.7483, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.30749067446315154, |
| "grad_norm": 0.14187795345800958, |
| "learning_rate": 5e-05, |
| "loss": 1.7453, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.30849884060893235, |
| "grad_norm": 0.15031420779115814, |
| "learning_rate": 5e-05, |
| "loss": 1.7623, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.30950700675471315, |
| "grad_norm": 0.14881086214174263, |
| "learning_rate": 5e-05, |
| "loss": 1.7562, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.310515172900494, |
| "grad_norm": 0.13856355863719944, |
| "learning_rate": 5e-05, |
| "loss": 1.77, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.3115233390462748, |
| "grad_norm": 0.154670739980791, |
| "learning_rate": 5e-05, |
| "loss": 1.7881, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.31253150519205564, |
| "grad_norm": 0.12865970113488273, |
| "learning_rate": 5e-05, |
| "loss": 1.7808, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3135396713378365, |
| "grad_norm": 0.13399014677184057, |
| "learning_rate": 5e-05, |
| "loss": 1.7543, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.3145478374836173, |
| "grad_norm": 0.14027126764064923, |
| "learning_rate": 5e-05, |
| "loss": 1.7619, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.3155560036293981, |
| "grad_norm": 0.13197916406773966, |
| "learning_rate": 5e-05, |
| "loss": 1.7532, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.3165641697751789, |
| "grad_norm": 0.13246622547168005, |
| "learning_rate": 5e-05, |
| "loss": 1.7724, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.3175723359209598, |
| "grad_norm": 0.15951916864488466, |
| "learning_rate": 5e-05, |
| "loss": 1.766, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.3185805020667406, |
| "grad_norm": 0.1348760333121085, |
| "learning_rate": 5e-05, |
| "loss": 1.7621, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.3195886682125214, |
| "grad_norm": 0.1519377070479792, |
| "learning_rate": 5e-05, |
| "loss": 1.765, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.3205968343583023, |
| "grad_norm": 0.14818327854333438, |
| "learning_rate": 5e-05, |
| "loss": 1.7636, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.3216050005040831, |
| "grad_norm": 0.14522476140759424, |
| "learning_rate": 5e-05, |
| "loss": 1.7479, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.3226131666498639, |
| "grad_norm": 0.14944731912062306, |
| "learning_rate": 5e-05, |
| "loss": 1.7668, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.3236213327956447, |
| "grad_norm": 0.15818576872525297, |
| "learning_rate": 5e-05, |
| "loss": 1.7709, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.32462949894142556, |
| "grad_norm": 0.13986279703460075, |
| "learning_rate": 5e-05, |
| "loss": 1.7724, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.3256376650872064, |
| "grad_norm": 0.1488733535408475, |
| "learning_rate": 5e-05, |
| "loss": 1.748, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.3266458312329872, |
| "grad_norm": 0.14710962194981353, |
| "learning_rate": 5e-05, |
| "loss": 1.7522, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.32765399737876805, |
| "grad_norm": 0.15431100217374796, |
| "learning_rate": 5e-05, |
| "loss": 1.7528, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.32866216352454886, |
| "grad_norm": 0.13109425651027415, |
| "learning_rate": 5e-05, |
| "loss": 1.781, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.32967032967032966, |
| "grad_norm": 0.16057027582711905, |
| "learning_rate": 5e-05, |
| "loss": 1.7629, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.3306784958161105, |
| "grad_norm": 0.1685848412347586, |
| "learning_rate": 5e-05, |
| "loss": 1.7537, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.33168666196189134, |
| "grad_norm": 0.14191134015434137, |
| "learning_rate": 5e-05, |
| "loss": 1.7649, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.33269482810767215, |
| "grad_norm": 0.14552531083316922, |
| "learning_rate": 5e-05, |
| "loss": 1.7454, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.33370299425345296, |
| "grad_norm": 0.13436840358793842, |
| "learning_rate": 5e-05, |
| "loss": 1.7607, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.3347111603992338, |
| "grad_norm": 0.1437049850393499, |
| "learning_rate": 5e-05, |
| "loss": 1.759, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.33571932654501463, |
| "grad_norm": 0.1479474964632027, |
| "learning_rate": 5e-05, |
| "loss": 1.7513, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.33672749269079544, |
| "grad_norm": 0.13190734076173918, |
| "learning_rate": 5e-05, |
| "loss": 1.7508, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.33773565883657625, |
| "grad_norm": 0.1388043068065826, |
| "learning_rate": 5e-05, |
| "loss": 1.7516, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.3387438249823571, |
| "grad_norm": 0.13435510062113534, |
| "learning_rate": 5e-05, |
| "loss": 1.7699, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.3397519911281379, |
| "grad_norm": 0.14201154202574512, |
| "learning_rate": 5e-05, |
| "loss": 1.7474, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.34076015727391873, |
| "grad_norm": 0.1295896147377835, |
| "learning_rate": 5e-05, |
| "loss": 1.7569, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.3417683234196996, |
| "grad_norm": 0.14909601316736315, |
| "learning_rate": 5e-05, |
| "loss": 1.7646, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.3427764895654804, |
| "grad_norm": 0.12623421831242762, |
| "learning_rate": 5e-05, |
| "loss": 1.7574, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3437846557112612, |
| "grad_norm": 0.1536075709940801, |
| "learning_rate": 5e-05, |
| "loss": 1.7522, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.344792821857042, |
| "grad_norm": 0.13677884574284213, |
| "learning_rate": 5e-05, |
| "loss": 1.7663, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.3458009880028229, |
| "grad_norm": 0.1452552313969236, |
| "learning_rate": 5e-05, |
| "loss": 1.7519, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.3468091541486037, |
| "grad_norm": 0.13100920298589408, |
| "learning_rate": 5e-05, |
| "loss": 1.7351, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.3478173202943845, |
| "grad_norm": 0.1529118284017465, |
| "learning_rate": 5e-05, |
| "loss": 1.7425, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.34882548644016537, |
| "grad_norm": 0.1449911362454515, |
| "learning_rate": 5e-05, |
| "loss": 1.7642, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.3498336525859462, |
| "grad_norm": 0.13800055137890146, |
| "learning_rate": 5e-05, |
| "loss": 1.7562, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.350841818731727, |
| "grad_norm": 0.13597864974899437, |
| "learning_rate": 5e-05, |
| "loss": 1.7589, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.3518499848775078, |
| "grad_norm": 0.1364345014185089, |
| "learning_rate": 5e-05, |
| "loss": 1.7617, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.35285815102328866, |
| "grad_norm": 0.13446515013689875, |
| "learning_rate": 5e-05, |
| "loss": 1.7451, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.35386631716906947, |
| "grad_norm": 0.13355960968251984, |
| "learning_rate": 5e-05, |
| "loss": 1.7484, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.3548744833148503, |
| "grad_norm": 0.1421313311554251, |
| "learning_rate": 5e-05, |
| "loss": 1.7596, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.35588264946063114, |
| "grad_norm": 0.14566126575906166, |
| "learning_rate": 5e-05, |
| "loss": 1.7312, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.35689081560641195, |
| "grad_norm": 0.13035111170688612, |
| "learning_rate": 5e-05, |
| "loss": 1.7614, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.35789898175219276, |
| "grad_norm": 0.13286517726961428, |
| "learning_rate": 5e-05, |
| "loss": 1.7472, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.35890714789797357, |
| "grad_norm": 0.1372842264280202, |
| "learning_rate": 5e-05, |
| "loss": 1.7767, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.35991531404375443, |
| "grad_norm": 0.14233910976123346, |
| "learning_rate": 5e-05, |
| "loss": 1.7592, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.36092348018953524, |
| "grad_norm": 0.13669422347097734, |
| "learning_rate": 5e-05, |
| "loss": 1.7538, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.36193164633531605, |
| "grad_norm": 0.15205769492979604, |
| "learning_rate": 5e-05, |
| "loss": 1.7576, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.36293981248109686, |
| "grad_norm": 0.13378904395785118, |
| "learning_rate": 5e-05, |
| "loss": 1.7626, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3639479786268777, |
| "grad_norm": 0.3089538056627945, |
| "learning_rate": 5e-05, |
| "loss": 1.7493, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.36495614477265853, |
| "grad_norm": 0.14212608488794012, |
| "learning_rate": 5e-05, |
| "loss": 1.743, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.36596431091843934, |
| "grad_norm": 0.14052901555742894, |
| "learning_rate": 5e-05, |
| "loss": 1.7326, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.3669724770642202, |
| "grad_norm": 0.1278029391584442, |
| "learning_rate": 5e-05, |
| "loss": 1.7438, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.367980643210001, |
| "grad_norm": 0.14825199869813654, |
| "learning_rate": 5e-05, |
| "loss": 1.7622, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.3689888093557818, |
| "grad_norm": 0.1463739970423256, |
| "learning_rate": 5e-05, |
| "loss": 1.7574, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.36999697550156263, |
| "grad_norm": 0.33813722142883335, |
| "learning_rate": 5e-05, |
| "loss": 1.7545, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.3710051416473435, |
| "grad_norm": 0.14054743655697394, |
| "learning_rate": 5e-05, |
| "loss": 1.7437, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.3720133077931243, |
| "grad_norm": 0.1515425002596235, |
| "learning_rate": 5e-05, |
| "loss": 1.7612, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.3730214739389051, |
| "grad_norm": 0.1389451504765318, |
| "learning_rate": 5e-05, |
| "loss": 1.7674, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.374029640084686, |
| "grad_norm": 0.13701439256551207, |
| "learning_rate": 5e-05, |
| "loss": 1.7611, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.3750378062304668, |
| "grad_norm": 0.14830435767997738, |
| "learning_rate": 5e-05, |
| "loss": 1.7508, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.3760459723762476, |
| "grad_norm": 0.1302612126588965, |
| "learning_rate": 5e-05, |
| "loss": 1.7452, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.3770541385220284, |
| "grad_norm": 0.15597780892702984, |
| "learning_rate": 5e-05, |
| "loss": 1.751, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.37806230466780927, |
| "grad_norm": 0.14754988055242355, |
| "learning_rate": 5e-05, |
| "loss": 1.7384, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.3790704708135901, |
| "grad_norm": 0.1314239062985928, |
| "learning_rate": 5e-05, |
| "loss": 1.7456, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.3800786369593709, |
| "grad_norm": 0.14764554065106597, |
| "learning_rate": 5e-05, |
| "loss": 1.7498, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.38108680310515175, |
| "grad_norm": 0.14732407564585398, |
| "learning_rate": 5e-05, |
| "loss": 1.7508, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.38209496925093256, |
| "grad_norm": 0.12259986325733822, |
| "learning_rate": 5e-05, |
| "loss": 1.7387, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.38310313539671337, |
| "grad_norm": 0.2808281947042048, |
| "learning_rate": 5e-05, |
| "loss": 1.7471, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.3841113015424942, |
| "grad_norm": 0.1460713637825732, |
| "learning_rate": 5e-05, |
| "loss": 1.745, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.38511946768827504, |
| "grad_norm": 0.14483673080907972, |
| "learning_rate": 5e-05, |
| "loss": 1.7376, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.38612763383405585, |
| "grad_norm": 0.1378264320137719, |
| "learning_rate": 5e-05, |
| "loss": 1.7477, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.38713579997983666, |
| "grad_norm": 0.15073976260859417, |
| "learning_rate": 5e-05, |
| "loss": 1.737, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.3881439661256175, |
| "grad_norm": 0.16818259732146923, |
| "learning_rate": 5e-05, |
| "loss": 1.7322, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.38915213227139833, |
| "grad_norm": 0.14184910393165215, |
| "learning_rate": 5e-05, |
| "loss": 1.7523, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.39016029841717914, |
| "grad_norm": 0.1426597749017869, |
| "learning_rate": 5e-05, |
| "loss": 1.7265, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.39116846456295995, |
| "grad_norm": 0.15016411676683258, |
| "learning_rate": 5e-05, |
| "loss": 1.7492, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.3921766307087408, |
| "grad_norm": 0.1563835678153886, |
| "learning_rate": 5e-05, |
| "loss": 1.7418, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.3931847968545216, |
| "grad_norm": 0.14052651716274392, |
| "learning_rate": 5e-05, |
| "loss": 1.7375, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.39419296300030243, |
| "grad_norm": 0.15568693064666192, |
| "learning_rate": 5e-05, |
| "loss": 1.741, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.3952011291460833, |
| "grad_norm": 0.1451434834091193, |
| "learning_rate": 5e-05, |
| "loss": 1.7385, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.3962092952918641, |
| "grad_norm": 0.1544580671517428, |
| "learning_rate": 5e-05, |
| "loss": 1.7523, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.3972174614376449, |
| "grad_norm": 0.1381900723590304, |
| "learning_rate": 5e-05, |
| "loss": 1.7372, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.3982256275834257, |
| "grad_norm": 0.15968284750095027, |
| "learning_rate": 5e-05, |
| "loss": 1.7543, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.3992337937292066, |
| "grad_norm": 0.14776907712047604, |
| "learning_rate": 5e-05, |
| "loss": 1.7394, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.4002419598749874, |
| "grad_norm": 0.13501018733973855, |
| "learning_rate": 5e-05, |
| "loss": 1.7518, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.4012501260207682, |
| "grad_norm": 0.16874153226114574, |
| "learning_rate": 5e-05, |
| "loss": 1.7176, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.40225829216654907, |
| "grad_norm": 0.14973959531484612, |
| "learning_rate": 5e-05, |
| "loss": 1.7389, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.4032664583123299, |
| "grad_norm": 0.1547127773297765, |
| "learning_rate": 5e-05, |
| "loss": 1.7472, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4042746244581107, |
| "grad_norm": 0.1460619660702291, |
| "learning_rate": 5e-05, |
| "loss": 1.7224, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.4052827906038915, |
| "grad_norm": 0.1478494609390841, |
| "learning_rate": 5e-05, |
| "loss": 1.7697, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.40629095674967236, |
| "grad_norm": 0.1398560289935615, |
| "learning_rate": 5e-05, |
| "loss": 1.7312, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.40729912289545317, |
| "grad_norm": 0.13341445069107552, |
| "learning_rate": 5e-05, |
| "loss": 1.7499, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.408307289041234, |
| "grad_norm": 0.138920341930684, |
| "learning_rate": 5e-05, |
| "loss": 1.7369, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.40931545518701484, |
| "grad_norm": 0.1406715694146648, |
| "learning_rate": 5e-05, |
| "loss": 1.7555, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.41032362133279565, |
| "grad_norm": 0.13978239406309753, |
| "learning_rate": 5e-05, |
| "loss": 1.7327, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.41133178747857646, |
| "grad_norm": 0.138769156475813, |
| "learning_rate": 5e-05, |
| "loss": 1.7332, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.41233995362435727, |
| "grad_norm": 0.13985747952648767, |
| "learning_rate": 5e-05, |
| "loss": 1.7225, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.41334811977013813, |
| "grad_norm": 0.15206432665256198, |
| "learning_rate": 5e-05, |
| "loss": 1.7441, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.41435628591591894, |
| "grad_norm": 0.13933462078819422, |
| "learning_rate": 5e-05, |
| "loss": 1.7413, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.41536445206169975, |
| "grad_norm": 2.2257681112592564, |
| "learning_rate": 5e-05, |
| "loss": 1.7312, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.4163726182074806, |
| "grad_norm": 0.1690367229814024, |
| "learning_rate": 5e-05, |
| "loss": 1.7225, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.4173807843532614, |
| "grad_norm": 0.14142371640158033, |
| "learning_rate": 5e-05, |
| "loss": 1.7328, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.41838895049904223, |
| "grad_norm": 0.1443718391541402, |
| "learning_rate": 5e-05, |
| "loss": 1.7357, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.41939711664482304, |
| "grad_norm": 0.2948974879165241, |
| "learning_rate": 5e-05, |
| "loss": 1.7537, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.4204052827906039, |
| "grad_norm": 0.1502743223563147, |
| "learning_rate": 5e-05, |
| "loss": 1.7405, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.4214134489363847, |
| "grad_norm": 0.14613986823609737, |
| "learning_rate": 5e-05, |
| "loss": 1.7577, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.4224216150821655, |
| "grad_norm": 0.13894542829285816, |
| "learning_rate": 5e-05, |
| "loss": 1.7496, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.4234297812279464, |
| "grad_norm": 0.14043550419038592, |
| "learning_rate": 5e-05, |
| "loss": 1.7474, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4244379473737272, |
| "grad_norm": 0.13780554695644626, |
| "learning_rate": 5e-05, |
| "loss": 1.7456, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.425446113519508, |
| "grad_norm": 0.14639228010470762, |
| "learning_rate": 5e-05, |
| "loss": 1.7354, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.4264542796652888, |
| "grad_norm": 0.1520050370518254, |
| "learning_rate": 5e-05, |
| "loss": 1.7448, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.4274624458110697, |
| "grad_norm": 0.13683884365296195, |
| "learning_rate": 5e-05, |
| "loss": 1.7166, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.4284706119568505, |
| "grad_norm": 0.15001649638954673, |
| "learning_rate": 5e-05, |
| "loss": 1.7414, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.4294787781026313, |
| "grad_norm": 0.14911716164608302, |
| "learning_rate": 5e-05, |
| "loss": 1.743, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.43048694424841216, |
| "grad_norm": 0.14543327208617476, |
| "learning_rate": 5e-05, |
| "loss": 1.7389, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.43149511039419297, |
| "grad_norm": 0.14680781392232245, |
| "learning_rate": 5e-05, |
| "loss": 1.7493, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.4325032765399738, |
| "grad_norm": 0.14236841418985777, |
| "learning_rate": 5e-05, |
| "loss": 1.7366, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.4335114426857546, |
| "grad_norm": 0.14485011489323618, |
| "learning_rate": 5e-05, |
| "loss": 1.7171, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.43451960883153545, |
| "grad_norm": 0.13241397122387577, |
| "learning_rate": 5e-05, |
| "loss": 1.7482, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.43552777497731626, |
| "grad_norm": 0.14853569815804848, |
| "learning_rate": 5e-05, |
| "loss": 1.7347, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.43653594112309707, |
| "grad_norm": 0.13704108362000308, |
| "learning_rate": 5e-05, |
| "loss": 1.731, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.43754410726887794, |
| "grad_norm": 0.13536978394353463, |
| "learning_rate": 5e-05, |
| "loss": 1.7324, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.43855227341465874, |
| "grad_norm": 0.12979908901630022, |
| "learning_rate": 5e-05, |
| "loss": 1.7284, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.43956043956043955, |
| "grad_norm": 0.1470395254667769, |
| "learning_rate": 5e-05, |
| "loss": 1.7364, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.44056860570622036, |
| "grad_norm": 0.13171786834848215, |
| "learning_rate": 5e-05, |
| "loss": 1.7358, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.4415767718520012, |
| "grad_norm": 0.12371696720195713, |
| "learning_rate": 5e-05, |
| "loss": 1.7304, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.44258493799778204, |
| "grad_norm": 0.1396213895694086, |
| "learning_rate": 5e-05, |
| "loss": 1.7408, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.44359310414356284, |
| "grad_norm": 0.14030549983014554, |
| "learning_rate": 5e-05, |
| "loss": 1.7274, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.4446012702893437, |
| "grad_norm": 0.13146485100443162, |
| "learning_rate": 5e-05, |
| "loss": 1.7235, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.4456094364351245, |
| "grad_norm": 0.14614864845330183, |
| "learning_rate": 5e-05, |
| "loss": 1.7316, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.4466176025809053, |
| "grad_norm": 0.14869530126493308, |
| "learning_rate": 5e-05, |
| "loss": 1.7198, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.44762576872668614, |
| "grad_norm": 0.14476843782545282, |
| "learning_rate": 5e-05, |
| "loss": 1.711, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.448633934872467, |
| "grad_norm": 0.14384381110954358, |
| "learning_rate": 5e-05, |
| "loss": 1.735, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.4496421010182478, |
| "grad_norm": 0.14075247779244654, |
| "learning_rate": 5e-05, |
| "loss": 1.7334, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.4506502671640286, |
| "grad_norm": 0.13764214124217602, |
| "learning_rate": 5e-05, |
| "loss": 1.7288, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.4516584333098095, |
| "grad_norm": 0.14818845579532672, |
| "learning_rate": 5e-05, |
| "loss": 1.7412, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.4526665994555903, |
| "grad_norm": 0.15111207644428082, |
| "learning_rate": 5e-05, |
| "loss": 1.7358, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.4536747656013711, |
| "grad_norm": 0.14168477020342377, |
| "learning_rate": 5e-05, |
| "loss": 1.7449, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4546829317471519, |
| "grad_norm": 0.15078242479582551, |
| "learning_rate": 5e-05, |
| "loss": 1.7211, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.4556910978929328, |
| "grad_norm": 0.1385439837802104, |
| "learning_rate": 5e-05, |
| "loss": 1.739, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.4566992640387136, |
| "grad_norm": 0.14591800685044598, |
| "learning_rate": 5e-05, |
| "loss": 1.725, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.4577074301844944, |
| "grad_norm": 0.22199306850298647, |
| "learning_rate": 5e-05, |
| "loss": 1.7465, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.45871559633027525, |
| "grad_norm": 0.15203439655834225, |
| "learning_rate": 5e-05, |
| "loss": 1.7268, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.45972376247605606, |
| "grad_norm": 0.13798415129149033, |
| "learning_rate": 5e-05, |
| "loss": 1.7438, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.4607319286218369, |
| "grad_norm": 0.13188851417331895, |
| "learning_rate": 5e-05, |
| "loss": 1.7236, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.4617400947676177, |
| "grad_norm": 0.1334541786269807, |
| "learning_rate": 5e-05, |
| "loss": 1.7299, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.46274826091339855, |
| "grad_norm": 0.1377200829622698, |
| "learning_rate": 5e-05, |
| "loss": 1.7099, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.46375642705917935, |
| "grad_norm": 0.14092160238477452, |
| "learning_rate": 5e-05, |
| "loss": 1.7491, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.46476459320496016, |
| "grad_norm": 0.12847342176262316, |
| "learning_rate": 5e-05, |
| "loss": 1.7287, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.46577275935074103, |
| "grad_norm": 0.15559256378324404, |
| "learning_rate": 5e-05, |
| "loss": 1.7376, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.46678092549652184, |
| "grad_norm": 0.12551572964931354, |
| "learning_rate": 5e-05, |
| "loss": 1.753, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.46778909164230265, |
| "grad_norm": 0.1364430200172465, |
| "learning_rate": 5e-05, |
| "loss": 1.7317, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.46879725778808345, |
| "grad_norm": 0.14685795918261627, |
| "learning_rate": 5e-05, |
| "loss": 1.739, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.4698054239338643, |
| "grad_norm": 0.13843596201019037, |
| "learning_rate": 5e-05, |
| "loss": 1.7327, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.47081359007964513, |
| "grad_norm": 0.14978074414352593, |
| "learning_rate": 5e-05, |
| "loss": 1.7122, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.47182175622542594, |
| "grad_norm": 0.1343468710676364, |
| "learning_rate": 5e-05, |
| "loss": 1.717, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.4728299223712068, |
| "grad_norm": 0.12935824246473474, |
| "learning_rate": 5e-05, |
| "loss": 1.7288, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.4738380885169876, |
| "grad_norm": 0.13786240231667163, |
| "learning_rate": 5e-05, |
| "loss": 1.7252, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.4748462546627684, |
| "grad_norm": 0.14555494126848026, |
| "learning_rate": 5e-05, |
| "loss": 1.7274, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.4758544208085492, |
| "grad_norm": 0.14024877099742716, |
| "learning_rate": 5e-05, |
| "loss": 1.7278, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.4768625869543301, |
| "grad_norm": 0.1409529955461895, |
| "learning_rate": 5e-05, |
| "loss": 1.7283, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.4778707531001109, |
| "grad_norm": 0.13352675400260733, |
| "learning_rate": 5e-05, |
| "loss": 1.7334, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.4788789192458917, |
| "grad_norm": 0.13271967028157422, |
| "learning_rate": 5e-05, |
| "loss": 1.73, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.4798870853916726, |
| "grad_norm": 0.14269298630641805, |
| "learning_rate": 5e-05, |
| "loss": 1.7339, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.4808952515374534, |
| "grad_norm": 0.13466898722756104, |
| "learning_rate": 5e-05, |
| "loss": 1.7362, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.4819034176832342, |
| "grad_norm": 0.14709845539406763, |
| "learning_rate": 5e-05, |
| "loss": 1.7369, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.482911583829015, |
| "grad_norm": 0.8605202681074835, |
| "learning_rate": 5e-05, |
| "loss": 1.7396, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.48391974997479587, |
| "grad_norm": 0.1408289867387891, |
| "learning_rate": 5e-05, |
| "loss": 1.7486, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.4849279161205767, |
| "grad_norm": 0.13874114391890766, |
| "learning_rate": 5e-05, |
| "loss": 1.7298, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.4859360822663575, |
| "grad_norm": 0.13518417994710039, |
| "learning_rate": 5e-05, |
| "loss": 1.7425, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.48694424841213835, |
| "grad_norm": 0.13536099460502093, |
| "learning_rate": 5e-05, |
| "loss": 1.7366, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.48795241455791916, |
| "grad_norm": 0.14571584731434253, |
| "learning_rate": 5e-05, |
| "loss": 1.7179, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.48896058070369997, |
| "grad_norm": 0.1532694552886724, |
| "learning_rate": 5e-05, |
| "loss": 1.7261, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.4899687468494808, |
| "grad_norm": 0.1488328657164774, |
| "learning_rate": 5e-05, |
| "loss": 1.7295, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.49097691299526164, |
| "grad_norm": 0.16219320768165912, |
| "learning_rate": 5e-05, |
| "loss": 1.7513, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.49198507914104245, |
| "grad_norm": 0.1567614713551956, |
| "learning_rate": 5e-05, |
| "loss": 1.7395, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.49299324528682326, |
| "grad_norm": 0.14386584627809132, |
| "learning_rate": 5e-05, |
| "loss": 1.7374, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.4940014114326041, |
| "grad_norm": 0.1477080265489417, |
| "learning_rate": 5e-05, |
| "loss": 1.7299, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.49500957757838493, |
| "grad_norm": 0.15832395917512415, |
| "learning_rate": 5e-05, |
| "loss": 1.7102, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.49601774372416574, |
| "grad_norm": 0.14645390760965665, |
| "learning_rate": 5e-05, |
| "loss": 1.7303, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.49702590986994655, |
| "grad_norm": 0.15695111138785844, |
| "learning_rate": 5e-05, |
| "loss": 1.7365, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.4980340760157274, |
| "grad_norm": 0.15125086541771027, |
| "learning_rate": 5e-05, |
| "loss": 1.7329, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.4990422421615082, |
| "grad_norm": 0.15968163890366974, |
| "learning_rate": 5e-05, |
| "loss": 1.7357, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.5000504083072891, |
| "grad_norm": 0.16114977122007354, |
| "learning_rate": 5e-05, |
| "loss": 1.7425, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.5010585744530699, |
| "grad_norm": 0.15366693494225167, |
| "learning_rate": 5e-05, |
| "loss": 1.7324, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.5020667405988507, |
| "grad_norm": 0.1702223823593283, |
| "learning_rate": 5e-05, |
| "loss": 1.7431, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.5030749067446315, |
| "grad_norm": 0.13707825885720634, |
| "learning_rate": 5e-05, |
| "loss": 1.727, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.5040830728904123, |
| "grad_norm": 0.17011916766699697, |
| "learning_rate": 5e-05, |
| "loss": 1.7499, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5050912390361931, |
| "grad_norm": 0.13836910508259959, |
| "learning_rate": 5e-05, |
| "loss": 1.7163, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.5060994051819739, |
| "grad_norm": 0.15250504212866517, |
| "learning_rate": 5e-05, |
| "loss": 1.7036, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.5071075713277549, |
| "grad_norm": 0.14003029207685944, |
| "learning_rate": 5e-05, |
| "loss": 1.7243, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.5081157374735357, |
| "grad_norm": 0.1402206682632946, |
| "learning_rate": 5e-05, |
| "loss": 1.7233, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.5091239036193165, |
| "grad_norm": 0.1386346284282238, |
| "learning_rate": 5e-05, |
| "loss": 1.7264, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.5101320697650973, |
| "grad_norm": 0.1501064183846461, |
| "learning_rate": 5e-05, |
| "loss": 1.703, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.5111402359108781, |
| "grad_norm": 0.13375944136661508, |
| "learning_rate": 5e-05, |
| "loss": 1.7252, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.5121484020566589, |
| "grad_norm": 0.1290192933245119, |
| "learning_rate": 5e-05, |
| "loss": 1.745, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.5131565682024397, |
| "grad_norm": 0.13695047818203324, |
| "learning_rate": 5e-05, |
| "loss": 1.7053, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.5141647343482206, |
| "grad_norm": 0.14082092939847418, |
| "learning_rate": 5e-05, |
| "loss": 1.7329, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5151729004940014, |
| "grad_norm": 0.13646757680134972, |
| "learning_rate": 5e-05, |
| "loss": 1.721, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.5161810666397822, |
| "grad_norm": 0.13407540360293163, |
| "learning_rate": 5e-05, |
| "loss": 1.7281, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.5171892327855631, |
| "grad_norm": 0.13098469909538138, |
| "learning_rate": 5e-05, |
| "loss": 1.7262, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.5181973989313439, |
| "grad_norm": 0.14710547224222956, |
| "learning_rate": 5e-05, |
| "loss": 1.7274, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.5192055650771247, |
| "grad_norm": 0.13328204864391038, |
| "learning_rate": 5e-05, |
| "loss": 1.7219, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.5202137312229055, |
| "grad_norm": 0.13775879403810146, |
| "learning_rate": 5e-05, |
| "loss": 1.7106, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.5212218973686864, |
| "grad_norm": 0.1361065570686086, |
| "learning_rate": 5e-05, |
| "loss": 1.7091, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.5222300635144672, |
| "grad_norm": 0.14499957520034, |
| "learning_rate": 5e-05, |
| "loss": 1.7251, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.523238229660248, |
| "grad_norm": 0.12979459429272397, |
| "learning_rate": 5e-05, |
| "loss": 1.725, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.5242463958060288, |
| "grad_norm": 0.14007157320777774, |
| "learning_rate": 5e-05, |
| "loss": 1.7235, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5252545619518096, |
| "grad_norm": 0.14159905194394676, |
| "learning_rate": 5e-05, |
| "loss": 1.7328, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.5262627280975904, |
| "grad_norm": 0.13713095256059554, |
| "learning_rate": 5e-05, |
| "loss": 1.7318, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.5272708942433713, |
| "grad_norm": 0.13585308519517678, |
| "learning_rate": 5e-05, |
| "loss": 1.7107, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.5282790603891522, |
| "grad_norm": 0.13652232276260395, |
| "learning_rate": 5e-05, |
| "loss": 1.7026, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.529287226534933, |
| "grad_norm": 0.14269519168803854, |
| "learning_rate": 5e-05, |
| "loss": 1.7168, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.5302953926807138, |
| "grad_norm": 0.1546096414052513, |
| "learning_rate": 5e-05, |
| "loss": 1.7075, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.5313035588264946, |
| "grad_norm": 0.1451610095802245, |
| "learning_rate": 5e-05, |
| "loss": 1.7191, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.5323117249722754, |
| "grad_norm": 0.14790875937903392, |
| "learning_rate": 5e-05, |
| "loss": 1.731, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.5333198911180562, |
| "grad_norm": 0.13845540973359793, |
| "learning_rate": 5e-05, |
| "loss": 1.712, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.534328057263837, |
| "grad_norm": 0.14603408754761985, |
| "learning_rate": 5e-05, |
| "loss": 1.716, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.535336223409618, |
| "grad_norm": 0.14745894068649418, |
| "learning_rate": 5e-05, |
| "loss": 1.7208, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.5363443895553988, |
| "grad_norm": 0.15007467357406096, |
| "learning_rate": 5e-05, |
| "loss": 1.7197, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.5373525557011796, |
| "grad_norm": 0.1371057758498084, |
| "learning_rate": 5e-05, |
| "loss": 1.7045, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.5383607218469604, |
| "grad_norm": 0.1359949109326032, |
| "learning_rate": 5e-05, |
| "loss": 1.728, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.5393688879927412, |
| "grad_norm": 0.13925029366834532, |
| "learning_rate": 5e-05, |
| "loss": 1.7302, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.540377054138522, |
| "grad_norm": 0.13327284315989982, |
| "learning_rate": 5e-05, |
| "loss": 1.7336, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.5413852202843028, |
| "grad_norm": 0.14631809213996722, |
| "learning_rate": 5e-05, |
| "loss": 1.7125, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.5423933864300837, |
| "grad_norm": 0.15397077312479548, |
| "learning_rate": 5e-05, |
| "loss": 1.7171, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.5434015525758645, |
| "grad_norm": 0.13392329130753536, |
| "learning_rate": 5e-05, |
| "loss": 1.7127, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.5444097187216453, |
| "grad_norm": 0.13337174252821263, |
| "learning_rate": 5e-05, |
| "loss": 1.7155, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5454178848674262, |
| "grad_norm": 0.13116751901128795, |
| "learning_rate": 5e-05, |
| "loss": 1.7024, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.546426051013207, |
| "grad_norm": 0.1428951010708818, |
| "learning_rate": 5e-05, |
| "loss": 1.7333, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.5474342171589878, |
| "grad_norm": 0.1290258219479217, |
| "learning_rate": 5e-05, |
| "loss": 1.7159, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.5484423833047686, |
| "grad_norm": 0.1253949130299628, |
| "learning_rate": 5e-05, |
| "loss": 1.7108, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.5494505494505495, |
| "grad_norm": 0.13389518594876162, |
| "learning_rate": 5e-05, |
| "loss": 1.7207, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.5504587155963303, |
| "grad_norm": 0.12847899268177035, |
| "learning_rate": 5e-05, |
| "loss": 1.7199, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.5514668817421111, |
| "grad_norm": 0.1349692531199703, |
| "learning_rate": 5e-05, |
| "loss": 1.7218, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.5524750478878919, |
| "grad_norm": 0.13660970361600422, |
| "learning_rate": 5e-05, |
| "loss": 1.7012, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.5534832140336727, |
| "grad_norm": 0.1289091568097243, |
| "learning_rate": 5e-05, |
| "loss": 1.7101, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.5544913801794535, |
| "grad_norm": 0.1267210192380341, |
| "learning_rate": 5e-05, |
| "loss": 1.748, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5554995463252344, |
| "grad_norm": 0.5082915304718543, |
| "learning_rate": 5e-05, |
| "loss": 1.7317, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.5565077124710153, |
| "grad_norm": 0.13703430635922975, |
| "learning_rate": 5e-05, |
| "loss": 1.7122, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.5575158786167961, |
| "grad_norm": 0.14737679979743612, |
| "learning_rate": 5e-05, |
| "loss": 1.7129, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.5585240447625769, |
| "grad_norm": 0.13721196520311135, |
| "learning_rate": 5e-05, |
| "loss": 1.7061, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.5595322109083577, |
| "grad_norm": 0.13752293899101511, |
| "learning_rate": 5e-05, |
| "loss": 1.7129, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.5605403770541385, |
| "grad_norm": 0.14405745219156493, |
| "learning_rate": 5e-05, |
| "loss": 1.732, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.5615485431999193, |
| "grad_norm": 0.14145457824757338, |
| "learning_rate": 5e-05, |
| "loss": 1.7129, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.5625567093457001, |
| "grad_norm": 0.2815416558938452, |
| "learning_rate": 5e-05, |
| "loss": 1.7132, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.563564875491481, |
| "grad_norm": 0.1348175656894975, |
| "learning_rate": 5e-05, |
| "loss": 1.7062, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.5645730416372619, |
| "grad_norm": 0.1404056458141734, |
| "learning_rate": 5e-05, |
| "loss": 1.7041, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.5655812077830427, |
| "grad_norm": 0.13557060086657308, |
| "learning_rate": 5e-05, |
| "loss": 1.6929, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.5665893739288235, |
| "grad_norm": 0.1814667125480638, |
| "learning_rate": 5e-05, |
| "loss": 1.7188, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.5675975400746043, |
| "grad_norm": 0.1495980796266418, |
| "learning_rate": 5e-05, |
| "loss": 1.7131, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.5686057062203851, |
| "grad_norm": 0.1426252612271266, |
| "learning_rate": 5e-05, |
| "loss": 1.7111, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.5696138723661659, |
| "grad_norm": 0.14109547917090415, |
| "learning_rate": 5e-05, |
| "loss": 1.7157, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.5706220385119468, |
| "grad_norm": 1.1846012501687457, |
| "learning_rate": 5e-05, |
| "loss": 1.7389, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.5716302046577276, |
| "grad_norm": 0.13348962336896375, |
| "learning_rate": 5e-05, |
| "loss": 1.7395, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.5726383708035084, |
| "grad_norm": 0.14031380805805546, |
| "learning_rate": 5e-05, |
| "loss": 1.712, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.5736465369492892, |
| "grad_norm": 0.1310314007639987, |
| "learning_rate": 5e-05, |
| "loss": 1.693, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.57465470309507, |
| "grad_norm": 0.13576282604410486, |
| "learning_rate": 5e-05, |
| "loss": 1.7147, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5756628692408509, |
| "grad_norm": 0.13663564305484593, |
| "learning_rate": 5e-05, |
| "loss": 1.7218, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.5766710353866317, |
| "grad_norm": 0.13310876341309824, |
| "learning_rate": 5e-05, |
| "loss": 1.7115, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.5776792015324126, |
| "grad_norm": 0.13371585466789873, |
| "learning_rate": 5e-05, |
| "loss": 1.6953, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.5786873676781934, |
| "grad_norm": 0.1325972537789156, |
| "learning_rate": 5e-05, |
| "loss": 1.7171, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.5796955338239742, |
| "grad_norm": 0.1399994172395994, |
| "learning_rate": 5e-05, |
| "loss": 1.7164, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.580703699969755, |
| "grad_norm": 0.13203487633668445, |
| "learning_rate": 5e-05, |
| "loss": 1.7071, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.5817118661155358, |
| "grad_norm": 0.14386726865520158, |
| "learning_rate": 5e-05, |
| "loss": 1.7, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.5827200322613166, |
| "grad_norm": 0.13018151439861664, |
| "learning_rate": 5e-05, |
| "loss": 1.729, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.5837281984070974, |
| "grad_norm": 0.1447246872750649, |
| "learning_rate": 5e-05, |
| "loss": 1.6932, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.5847363645528784, |
| "grad_norm": 0.16595954582029865, |
| "learning_rate": 5e-05, |
| "loss": 1.6853, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.5857445306986592, |
| "grad_norm": 0.13139675694006203, |
| "learning_rate": 5e-05, |
| "loss": 1.6956, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.58675269684444, |
| "grad_norm": 0.1296186802645693, |
| "learning_rate": 5e-05, |
| "loss": 1.704, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.5877608629902208, |
| "grad_norm": 0.1417163740036467, |
| "learning_rate": 5e-05, |
| "loss": 1.7366, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.5887690291360016, |
| "grad_norm": 0.1393769542097642, |
| "learning_rate": 5e-05, |
| "loss": 1.7082, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.5897771952817824, |
| "grad_norm": 0.14036329264215688, |
| "learning_rate": 5e-05, |
| "loss": 1.7113, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.5907853614275632, |
| "grad_norm": 0.1301775902839359, |
| "learning_rate": 5e-05, |
| "loss": 1.7102, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.5917935275733441, |
| "grad_norm": 0.1390680768441475, |
| "learning_rate": 5e-05, |
| "loss": 1.7178, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.592801693719125, |
| "grad_norm": 1.234403190380023, |
| "learning_rate": 5e-05, |
| "loss": 1.7226, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.5938098598649058, |
| "grad_norm": 0.13833101198973996, |
| "learning_rate": 5e-05, |
| "loss": 1.7067, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.5948180260106866, |
| "grad_norm": 0.14494669886852304, |
| "learning_rate": 5e-05, |
| "loss": 1.7266, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5958261921564674, |
| "grad_norm": 0.14362347468568545, |
| "learning_rate": 5e-05, |
| "loss": 1.7143, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.5968343583022482, |
| "grad_norm": 0.1461984401259311, |
| "learning_rate": 5e-05, |
| "loss": 1.712, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.597842524448029, |
| "grad_norm": 0.1739881660017132, |
| "learning_rate": 5e-05, |
| "loss": 1.7098, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.5988506905938099, |
| "grad_norm": 0.18425300681672835, |
| "learning_rate": 5e-05, |
| "loss": 1.7064, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.5998588567395907, |
| "grad_norm": 0.19078389638041413, |
| "learning_rate": 5e-05, |
| "loss": 1.7178, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.6008670228853715, |
| "grad_norm": 0.1873949916072263, |
| "learning_rate": 5e-05, |
| "loss": 1.6959, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.6018751890311523, |
| "grad_norm": 0.17344176857191784, |
| "learning_rate": 5e-05, |
| "loss": 1.7149, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.6028833551769331, |
| "grad_norm": 0.17090514987747174, |
| "learning_rate": 5e-05, |
| "loss": 1.7267, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.603891521322714, |
| "grad_norm": 0.1487087124109567, |
| "learning_rate": 5e-05, |
| "loss": 1.6872, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.6048996874684948, |
| "grad_norm": 0.2528381347556544, |
| "learning_rate": 5e-05, |
| "loss": 1.7156, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6059078536142757, |
| "grad_norm": 0.15234832402505802, |
| "learning_rate": 5e-05, |
| "loss": 1.7009, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.6069160197600565, |
| "grad_norm": 0.14617071918285268, |
| "learning_rate": 5e-05, |
| "loss": 1.7271, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.6079241859058373, |
| "grad_norm": 0.1464277003567868, |
| "learning_rate": 5e-05, |
| "loss": 1.7059, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.6089323520516181, |
| "grad_norm": 0.1364383676186627, |
| "learning_rate": 5e-05, |
| "loss": 1.699, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.6099405181973989, |
| "grad_norm": 0.13348764051783354, |
| "learning_rate": 5e-05, |
| "loss": 1.6955, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.6109486843431797, |
| "grad_norm": 0.14467069260355409, |
| "learning_rate": 5e-05, |
| "loss": 1.7234, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.6119568504889605, |
| "grad_norm": 0.14430553492028114, |
| "learning_rate": 5e-05, |
| "loss": 1.7044, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.6129650166347415, |
| "grad_norm": 0.13383413430124796, |
| "learning_rate": 5e-05, |
| "loss": 1.7153, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.6139731827805223, |
| "grad_norm": 0.35332289652423776, |
| "learning_rate": 5e-05, |
| "loss": 1.7271, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.6149813489263031, |
| "grad_norm": 0.1347172057729184, |
| "learning_rate": 5e-05, |
| "loss": 1.7054, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6159895150720839, |
| "grad_norm": 0.14344071966912086, |
| "learning_rate": 5e-05, |
| "loss": 1.7372, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.6169976812178647, |
| "grad_norm": 0.13636203998882243, |
| "learning_rate": 5e-05, |
| "loss": 1.7011, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.6180058473636455, |
| "grad_norm": 0.1330854184385751, |
| "learning_rate": 5e-05, |
| "loss": 1.6987, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.6190140135094263, |
| "grad_norm": 0.13300020535045284, |
| "learning_rate": 5e-05, |
| "loss": 1.705, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.6200221796552072, |
| "grad_norm": 0.13095614639583578, |
| "learning_rate": 5e-05, |
| "loss": 1.7164, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.621030345800988, |
| "grad_norm": 0.13408249920884896, |
| "learning_rate": 5e-05, |
| "loss": 1.7114, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.6220385119467688, |
| "grad_norm": 0.13287977802166426, |
| "learning_rate": 5e-05, |
| "loss": 1.7052, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.6230466780925497, |
| "grad_norm": 0.13438176270298807, |
| "learning_rate": 5e-05, |
| "loss": 1.6987, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.6240548442383305, |
| "grad_norm": 0.14213719214134174, |
| "learning_rate": 5e-05, |
| "loss": 1.7098, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.6250630103841113, |
| "grad_norm": 0.14563144439803705, |
| "learning_rate": 5e-05, |
| "loss": 1.7003, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6260711765298921, |
| "grad_norm": 0.13777788141797628, |
| "learning_rate": 5e-05, |
| "loss": 1.6947, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.627079342675673, |
| "grad_norm": 0.14480162592645623, |
| "learning_rate": 5e-05, |
| "loss": 1.7073, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.6280875088214538, |
| "grad_norm": 0.1580924816537373, |
| "learning_rate": 5e-05, |
| "loss": 1.7055, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.6290956749672346, |
| "grad_norm": 0.14248189883225654, |
| "learning_rate": 5e-05, |
| "loss": 1.6967, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.6301038411130154, |
| "grad_norm": 0.1460039384954323, |
| "learning_rate": 5e-05, |
| "loss": 1.7049, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.6311120072587962, |
| "grad_norm": 0.14093391631694235, |
| "learning_rate": 5e-05, |
| "loss": 1.7084, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.632120173404577, |
| "grad_norm": 0.13217177954523113, |
| "learning_rate": 5e-05, |
| "loss": 1.7006, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.6331283395503579, |
| "grad_norm": 0.14346490701814205, |
| "learning_rate": 5e-05, |
| "loss": 1.706, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.6341365056961388, |
| "grad_norm": 0.15624821587275614, |
| "learning_rate": 5e-05, |
| "loss": 1.7058, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.6351446718419196, |
| "grad_norm": 0.13155447156335595, |
| "learning_rate": 5e-05, |
| "loss": 1.7206, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6361528379877004, |
| "grad_norm": 1.0827383527162002, |
| "learning_rate": 5e-05, |
| "loss": 1.7073, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.6371610041334812, |
| "grad_norm": 0.1374997471183784, |
| "learning_rate": 5e-05, |
| "loss": 1.7009, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.638169170279262, |
| "grad_norm": 0.3334907532113421, |
| "learning_rate": 5e-05, |
| "loss": 1.7053, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.6391773364250428, |
| "grad_norm": 0.13738631554336547, |
| "learning_rate": 5e-05, |
| "loss": 1.6925, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.6401855025708236, |
| "grad_norm": 0.14421445661445198, |
| "learning_rate": 5e-05, |
| "loss": 1.684, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.6411936687166045, |
| "grad_norm": 0.18704625439878989, |
| "learning_rate": 5e-05, |
| "loss": 1.7159, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.6422018348623854, |
| "grad_norm": 0.15492677537903046, |
| "learning_rate": 5e-05, |
| "loss": 1.7143, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.6432100010081662, |
| "grad_norm": 0.14329772207472202, |
| "learning_rate": 5e-05, |
| "loss": 1.6936, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.644218167153947, |
| "grad_norm": 0.14506569626786983, |
| "learning_rate": 5e-05, |
| "loss": 1.7204, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.6452263332997278, |
| "grad_norm": 0.14492261935555906, |
| "learning_rate": 5e-05, |
| "loss": 1.7256, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.6462344994455086, |
| "grad_norm": 0.1444154532219128, |
| "learning_rate": 5e-05, |
| "loss": 1.7007, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.6472426655912894, |
| "grad_norm": 0.1564687421107206, |
| "learning_rate": 5e-05, |
| "loss": 1.7277, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.6482508317370703, |
| "grad_norm": 0.13974139251562445, |
| "learning_rate": 5e-05, |
| "loss": 1.7195, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.6492589978828511, |
| "grad_norm": 0.15722678783316818, |
| "learning_rate": 5e-05, |
| "loss": 1.7142, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.6502671640286319, |
| "grad_norm": 0.13965193592073247, |
| "learning_rate": 5e-05, |
| "loss": 1.7061, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.6512753301744127, |
| "grad_norm": 0.16062017984617108, |
| "learning_rate": 5e-05, |
| "loss": 1.699, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.6522834963201936, |
| "grad_norm": 0.13834940905057014, |
| "learning_rate": 5e-05, |
| "loss": 1.6782, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.6532916624659744, |
| "grad_norm": 0.14378694887801025, |
| "learning_rate": 5e-05, |
| "loss": 1.7134, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.6542998286117552, |
| "grad_norm": 0.1403877743768562, |
| "learning_rate": 5e-05, |
| "loss": 1.7375, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.6553079947575361, |
| "grad_norm": 0.14834934143675046, |
| "learning_rate": 5e-05, |
| "loss": 1.7174, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6563161609033169, |
| "grad_norm": 0.13349134651059139, |
| "learning_rate": 5e-05, |
| "loss": 1.6888, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.6573243270490977, |
| "grad_norm": 0.14300986280595238, |
| "learning_rate": 5e-05, |
| "loss": 1.6887, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.6583324931948785, |
| "grad_norm": 0.1366229209171474, |
| "learning_rate": 5e-05, |
| "loss": 1.6866, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.6593406593406593, |
| "grad_norm": 0.13129142830945212, |
| "learning_rate": 5e-05, |
| "loss": 1.6929, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.6603488254864401, |
| "grad_norm": 0.15534033844039313, |
| "learning_rate": 5e-05, |
| "loss": 1.7266, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.661356991632221, |
| "grad_norm": 0.13585701733479483, |
| "learning_rate": 5e-05, |
| "loss": 1.7184, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.6623651577780019, |
| "grad_norm": 0.14604955910857845, |
| "learning_rate": 5e-05, |
| "loss": 1.7064, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.6633733239237827, |
| "grad_norm": 0.13294919069862188, |
| "learning_rate": 5e-05, |
| "loss": 1.7013, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.6643814900695635, |
| "grad_norm": 0.14828211853774284, |
| "learning_rate": 5e-05, |
| "loss": 1.7178, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.6653896562153443, |
| "grad_norm": 0.14322058740077032, |
| "learning_rate": 5e-05, |
| "loss": 1.7017, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.6663978223611251, |
| "grad_norm": 0.22694947426265846, |
| "learning_rate": 5e-05, |
| "loss": 1.7233, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.6674059885069059, |
| "grad_norm": 0.13369181995377313, |
| "learning_rate": 5e-05, |
| "loss": 1.7122, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.6684141546526867, |
| "grad_norm": 0.12961955869168454, |
| "learning_rate": 5e-05, |
| "loss": 1.7103, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.6694223207984676, |
| "grad_norm": 0.12740690105639293, |
| "learning_rate": 5e-05, |
| "loss": 1.6991, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.6704304869442484, |
| "grad_norm": 0.1395080769602082, |
| "learning_rate": 5e-05, |
| "loss": 1.7003, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.6714386530900293, |
| "grad_norm": 0.13266459395217106, |
| "learning_rate": 5e-05, |
| "loss": 1.6896, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.6724468192358101, |
| "grad_norm": 0.13792503447640894, |
| "learning_rate": 5e-05, |
| "loss": 1.7094, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.6734549853815909, |
| "grad_norm": 0.13549499942407675, |
| "learning_rate": 5e-05, |
| "loss": 1.6852, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.6744631515273717, |
| "grad_norm": 0.1304887647842567, |
| "learning_rate": 5e-05, |
| "loss": 1.7224, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.6754713176731525, |
| "grad_norm": 0.13803972632497452, |
| "learning_rate": 5e-05, |
| "loss": 1.7074, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.6764794838189334, |
| "grad_norm": 0.15238968656547802, |
| "learning_rate": 5e-05, |
| "loss": 1.697, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.6774876499647142, |
| "grad_norm": 0.13650396460895298, |
| "learning_rate": 5e-05, |
| "loss": 1.7006, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.678495816110495, |
| "grad_norm": 0.15406427775258108, |
| "learning_rate": 5e-05, |
| "loss": 1.7225, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.6795039822562758, |
| "grad_norm": 0.14975427688081136, |
| "learning_rate": 5e-05, |
| "loss": 1.6938, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.6805121484020566, |
| "grad_norm": 0.14574699614799233, |
| "learning_rate": 5e-05, |
| "loss": 1.6909, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.6815203145478375, |
| "grad_norm": 0.140769429118802, |
| "learning_rate": 5e-05, |
| "loss": 1.7179, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.6825284806936183, |
| "grad_norm": 0.1397379615230004, |
| "learning_rate": 5e-05, |
| "loss": 1.7137, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.6835366468393992, |
| "grad_norm": 0.14148911779317347, |
| "learning_rate": 5e-05, |
| "loss": 1.6874, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.68454481298518, |
| "grad_norm": 0.13796021307017373, |
| "learning_rate": 5e-05, |
| "loss": 1.7047, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.6855529791309608, |
| "grad_norm": 0.12981325688396536, |
| "learning_rate": 5e-05, |
| "loss": 1.6835, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.6865611452767416, |
| "grad_norm": 0.1520733919033312, |
| "learning_rate": 5e-05, |
| "loss": 1.6752, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.6875693114225224, |
| "grad_norm": 0.13925368484326953, |
| "learning_rate": 5e-05, |
| "loss": 1.73, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.6885774775683032, |
| "grad_norm": 0.15191330782704446, |
| "learning_rate": 5e-05, |
| "loss": 1.7128, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.689585643714084, |
| "grad_norm": 0.1393558696693607, |
| "learning_rate": 5e-05, |
| "loss": 1.7002, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.690593809859865, |
| "grad_norm": 0.16282864818947926, |
| "learning_rate": 5e-05, |
| "loss": 1.6996, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.6916019760056458, |
| "grad_norm": 0.13659644056226283, |
| "learning_rate": 5e-05, |
| "loss": 1.6925, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.6926101421514266, |
| "grad_norm": 0.14230355233928796, |
| "learning_rate": 5e-05, |
| "loss": 1.7016, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.6936183082972074, |
| "grad_norm": 0.1320721416125541, |
| "learning_rate": 5e-05, |
| "loss": 1.6923, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.6946264744429882, |
| "grad_norm": 0.12974182615487503, |
| "learning_rate": 5e-05, |
| "loss": 1.6961, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.695634640588769, |
| "grad_norm": 0.14088384510278604, |
| "learning_rate": 5e-05, |
| "loss": 1.7033, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.6966428067345498, |
| "grad_norm": 0.12974104848865461, |
| "learning_rate": 5e-05, |
| "loss": 1.6946, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.6976509728803307, |
| "grad_norm": 0.1350755538708987, |
| "learning_rate": 5e-05, |
| "loss": 1.6926, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.6986591390261115, |
| "grad_norm": 0.13778448675665025, |
| "learning_rate": 5e-05, |
| "loss": 1.7203, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.6996673051718924, |
| "grad_norm": 0.17101586020957985, |
| "learning_rate": 5e-05, |
| "loss": 1.6938, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.7006754713176732, |
| "grad_norm": 0.1303067376289026, |
| "learning_rate": 5e-05, |
| "loss": 1.7038, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.701683637463454, |
| "grad_norm": 0.13399992160593918, |
| "learning_rate": 5e-05, |
| "loss": 1.7003, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.7026918036092348, |
| "grad_norm": 0.1382522486144173, |
| "learning_rate": 5e-05, |
| "loss": 1.6986, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.7036999697550156, |
| "grad_norm": 0.30559753438026604, |
| "learning_rate": 5e-05, |
| "loss": 1.7017, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.7047081359007965, |
| "grad_norm": 0.13451228910548593, |
| "learning_rate": 5e-05, |
| "loss": 1.6905, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.7057163020465773, |
| "grad_norm": 0.1231524957978636, |
| "learning_rate": 5e-05, |
| "loss": 1.6833, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7067244681923581, |
| "grad_norm": 0.13457654769947636, |
| "learning_rate": 5e-05, |
| "loss": 1.7098, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.7077326343381389, |
| "grad_norm": 0.13286181727814403, |
| "learning_rate": 5e-05, |
| "loss": 1.6984, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.7087408004839197, |
| "grad_norm": 0.13158152592049696, |
| "learning_rate": 5e-05, |
| "loss": 1.6898, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.7097489666297006, |
| "grad_norm": 0.12393794270845451, |
| "learning_rate": 5e-05, |
| "loss": 1.7019, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.7107571327754814, |
| "grad_norm": 0.8791329643142535, |
| "learning_rate": 5e-05, |
| "loss": 1.7017, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.7117652989212623, |
| "grad_norm": 0.13766657632572252, |
| "learning_rate": 5e-05, |
| "loss": 1.7083, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.7127734650670431, |
| "grad_norm": 0.1314801794244582, |
| "learning_rate": 5e-05, |
| "loss": 1.7014, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.7137816312128239, |
| "grad_norm": 0.14301185853218212, |
| "learning_rate": 5e-05, |
| "loss": 1.6946, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.7147897973586047, |
| "grad_norm": 0.13315850713688443, |
| "learning_rate": 5e-05, |
| "loss": 1.6916, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.7157979635043855, |
| "grad_norm": 0.15050185104963668, |
| "learning_rate": 5e-05, |
| "loss": 1.6951, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.7168061296501663, |
| "grad_norm": 0.193887861942083, |
| "learning_rate": 5e-05, |
| "loss": 1.7016, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.7178142957959471, |
| "grad_norm": 0.184705631693118, |
| "learning_rate": 5e-05, |
| "loss": 1.6808, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.7188224619417279, |
| "grad_norm": 0.20131120279722708, |
| "learning_rate": 5e-05, |
| "loss": 1.6846, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.7198306280875089, |
| "grad_norm": 0.1645196401547296, |
| "learning_rate": 5e-05, |
| "loss": 1.6853, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.7208387942332897, |
| "grad_norm": 0.15217136145669521, |
| "learning_rate": 5e-05, |
| "loss": 1.6905, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.7218469603790705, |
| "grad_norm": 0.14669369727694134, |
| "learning_rate": 5e-05, |
| "loss": 1.7173, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.7228551265248513, |
| "grad_norm": 0.1518741725825213, |
| "learning_rate": 5e-05, |
| "loss": 1.6945, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.7238632926706321, |
| "grad_norm": 0.15882664040360764, |
| "learning_rate": 5e-05, |
| "loss": 1.6833, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.7248714588164129, |
| "grad_norm": 1.1379498482193364, |
| "learning_rate": 5e-05, |
| "loss": 1.6763, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.7258796249621937, |
| "grad_norm": 0.1586630734368667, |
| "learning_rate": 5e-05, |
| "loss": 1.6993, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7268877911079746, |
| "grad_norm": 0.16714817199638557, |
| "learning_rate": 5e-05, |
| "loss": 1.7066, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.7278959572537554, |
| "grad_norm": 0.1439676312445262, |
| "learning_rate": 5e-05, |
| "loss": 1.7123, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.7289041233995363, |
| "grad_norm": 0.14412259743914776, |
| "learning_rate": 5e-05, |
| "loss": 1.6992, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.7299122895453171, |
| "grad_norm": 0.1444786558037397, |
| "learning_rate": 5e-05, |
| "loss": 1.6805, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.7309204556910979, |
| "grad_norm": 0.12930306224525773, |
| "learning_rate": 5e-05, |
| "loss": 1.6876, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.7319286218368787, |
| "grad_norm": 0.1406707248655597, |
| "learning_rate": 5e-05, |
| "loss": 1.7009, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.7329367879826595, |
| "grad_norm": 0.14693857868721935, |
| "learning_rate": 5e-05, |
| "loss": 1.696, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.7339449541284404, |
| "grad_norm": 0.14013055752478618, |
| "learning_rate": 5e-05, |
| "loss": 1.7079, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.7349531202742212, |
| "grad_norm": 0.44162578171698896, |
| "learning_rate": 5e-05, |
| "loss": 1.7047, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.735961286420002, |
| "grad_norm": 0.144202453186501, |
| "learning_rate": 5e-05, |
| "loss": 1.6722, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7369694525657828, |
| "grad_norm": 0.20350906402966454, |
| "learning_rate": 5e-05, |
| "loss": 1.6882, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.7379776187115636, |
| "grad_norm": 0.13670367287551335, |
| "learning_rate": 5e-05, |
| "loss": 1.6952, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.7389857848573445, |
| "grad_norm": 0.13733218254706195, |
| "learning_rate": 5e-05, |
| "loss": 1.707, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.7399939510031253, |
| "grad_norm": 0.15097417904303445, |
| "learning_rate": 5e-05, |
| "loss": 1.7045, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.7410021171489062, |
| "grad_norm": 0.13762128360800496, |
| "learning_rate": 5e-05, |
| "loss": 1.7033, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.742010283294687, |
| "grad_norm": 0.13213047670959707, |
| "learning_rate": 5e-05, |
| "loss": 1.6971, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.7430184494404678, |
| "grad_norm": 0.1499348434975417, |
| "learning_rate": 5e-05, |
| "loss": 1.6975, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.7440266155862486, |
| "grad_norm": 0.13723445349671687, |
| "learning_rate": 5e-05, |
| "loss": 1.6922, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.7450347817320294, |
| "grad_norm": 0.14545498886021488, |
| "learning_rate": 5e-05, |
| "loss": 1.6788, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.7460429478778102, |
| "grad_norm": 0.1287235990420584, |
| "learning_rate": 5e-05, |
| "loss": 1.6955, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.747051114023591, |
| "grad_norm": 0.1455471683094355, |
| "learning_rate": 5e-05, |
| "loss": 1.704, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.748059280169372, |
| "grad_norm": 0.14617565645976455, |
| "learning_rate": 5e-05, |
| "loss": 1.6939, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.7490674463151528, |
| "grad_norm": 0.1346326602528038, |
| "learning_rate": 5e-05, |
| "loss": 1.7025, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.7500756124609336, |
| "grad_norm": 0.15520918731889405, |
| "learning_rate": 5e-05, |
| "loss": 1.6939, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.7510837786067144, |
| "grad_norm": 0.13569763381362607, |
| "learning_rate": 5e-05, |
| "loss": 1.6966, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.7520919447524952, |
| "grad_norm": 0.1299510283700129, |
| "learning_rate": 5e-05, |
| "loss": 1.6987, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.753100110898276, |
| "grad_norm": 0.13419663557532094, |
| "learning_rate": 5e-05, |
| "loss": 1.6932, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.7541082770440568, |
| "grad_norm": 0.13169849020289098, |
| "learning_rate": 5e-05, |
| "loss": 1.7071, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.7551164431898377, |
| "grad_norm": 0.12522318521189696, |
| "learning_rate": 5e-05, |
| "loss": 1.6883, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.7561246093356185, |
| "grad_norm": 0.12835934020684264, |
| "learning_rate": 5e-05, |
| "loss": 1.6758, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7571327754813993, |
| "grad_norm": 0.13498286500128723, |
| "learning_rate": 5e-05, |
| "loss": 1.6985, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.7581409416271802, |
| "grad_norm": 0.13419888046777306, |
| "learning_rate": 5e-05, |
| "loss": 1.6846, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.759149107772961, |
| "grad_norm": 0.12600233145443784, |
| "learning_rate": 5e-05, |
| "loss": 1.6979, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.7601572739187418, |
| "grad_norm": 0.12934232671574464, |
| "learning_rate": 5e-05, |
| "loss": 1.6878, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.7611654400645226, |
| "grad_norm": 0.1950926929344581, |
| "learning_rate": 5e-05, |
| "loss": 1.7178, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.7621736062103035, |
| "grad_norm": 0.12859037517359037, |
| "learning_rate": 5e-05, |
| "loss": 1.7016, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.7631817723560843, |
| "grad_norm": 0.1409670994846807, |
| "learning_rate": 5e-05, |
| "loss": 1.6854, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.7641899385018651, |
| "grad_norm": 0.12288058164990474, |
| "learning_rate": 5e-05, |
| "loss": 1.6938, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.7651981046476459, |
| "grad_norm": 0.12523243562044944, |
| "learning_rate": 5e-05, |
| "loss": 1.7088, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.7662062707934267, |
| "grad_norm": 0.12400867326960456, |
| "learning_rate": 5e-05, |
| "loss": 1.6961, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.7672144369392075, |
| "grad_norm": 0.12733361317641823, |
| "learning_rate": 5e-05, |
| "loss": 1.6895, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.7682226030849884, |
| "grad_norm": 0.12401980643230025, |
| "learning_rate": 5e-05, |
| "loss": 1.6871, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 0.13258386366930572, |
| "learning_rate": 5e-05, |
| "loss": 1.7151, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.7702389353765501, |
| "grad_norm": 0.13554333774066002, |
| "learning_rate": 5e-05, |
| "loss": 1.6828, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.7712471015223309, |
| "grad_norm": 0.1298590698259134, |
| "learning_rate": 5e-05, |
| "loss": 1.6817, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.7722552676681117, |
| "grad_norm": 0.13225875098703646, |
| "learning_rate": 5e-05, |
| "loss": 1.7172, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.7732634338138925, |
| "grad_norm": 0.12372816362157169, |
| "learning_rate": 5e-05, |
| "loss": 1.697, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.7742715999596733, |
| "grad_norm": 0.8145033326229155, |
| "learning_rate": 5e-05, |
| "loss": 1.7296, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.7752797661054541, |
| "grad_norm": 0.13559791454225872, |
| "learning_rate": 5e-05, |
| "loss": 1.6963, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.776287932251235, |
| "grad_norm": 0.1308197405902908, |
| "learning_rate": 5e-05, |
| "loss": 1.6977, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.7772960983970159, |
| "grad_norm": 0.13209728678813237, |
| "learning_rate": 5e-05, |
| "loss": 1.6858, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.7783042645427967, |
| "grad_norm": 0.1278801420116914, |
| "learning_rate": 5e-05, |
| "loss": 1.6837, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.7793124306885775, |
| "grad_norm": 0.13513888346210853, |
| "learning_rate": 5e-05, |
| "loss": 1.6887, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.7803205968343583, |
| "grad_norm": 0.13570837058732832, |
| "learning_rate": 5e-05, |
| "loss": 1.694, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.7813287629801391, |
| "grad_norm": 0.13434724641320867, |
| "learning_rate": 5e-05, |
| "loss": 1.6922, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.7823369291259199, |
| "grad_norm": 0.12411873442488698, |
| "learning_rate": 5e-05, |
| "loss": 1.6832, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.7833450952717008, |
| "grad_norm": 0.13282243905863508, |
| "learning_rate": 5e-05, |
| "loss": 1.6787, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.7843532614174816, |
| "grad_norm": 0.14016303702305682, |
| "learning_rate": 5e-05, |
| "loss": 1.7033, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.7853614275632624, |
| "grad_norm": 0.12715810408217135, |
| "learning_rate": 5e-05, |
| "loss": 1.698, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.7863695937090432, |
| "grad_norm": 1.6383771081899785, |
| "learning_rate": 5e-05, |
| "loss": 1.6946, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.787377759854824, |
| "grad_norm": 0.1555175450872356, |
| "learning_rate": 5e-05, |
| "loss": 1.6939, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.7883859260006049, |
| "grad_norm": 0.12270274039737394, |
| "learning_rate": 5e-05, |
| "loss": 1.6934, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.7893940921463857, |
| "grad_norm": 0.14328346799244598, |
| "learning_rate": 5e-05, |
| "loss": 1.6772, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.7904022582921666, |
| "grad_norm": 0.1344035866062604, |
| "learning_rate": 5e-05, |
| "loss": 1.6731, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.7914104244379474, |
| "grad_norm": 0.12957396945165556, |
| "learning_rate": 5e-05, |
| "loss": 1.701, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.7924185905837282, |
| "grad_norm": 0.134892447245696, |
| "learning_rate": 5e-05, |
| "loss": 1.6861, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.793426756729509, |
| "grad_norm": 0.13944146412945518, |
| "learning_rate": 5e-05, |
| "loss": 1.6878, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.7944349228752898, |
| "grad_norm": 0.13517076378309315, |
| "learning_rate": 5e-05, |
| "loss": 1.6799, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.7954430890210706, |
| "grad_norm": 0.13052895695360262, |
| "learning_rate": 5e-05, |
| "loss": 1.6729, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.7964512551668514, |
| "grad_norm": 0.16070051048503994, |
| "learning_rate": 5e-05, |
| "loss": 1.6913, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.7974594213126324, |
| "grad_norm": 0.13432850463263618, |
| "learning_rate": 5e-05, |
| "loss": 1.6768, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.7984675874584132, |
| "grad_norm": 0.14983096856734876, |
| "learning_rate": 5e-05, |
| "loss": 1.6807, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.799475753604194, |
| "grad_norm": 0.14499086970343855, |
| "learning_rate": 5e-05, |
| "loss": 1.6788, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.8004839197499748, |
| "grad_norm": 0.14034782106855784, |
| "learning_rate": 5e-05, |
| "loss": 1.7087, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.8014920858957556, |
| "grad_norm": 0.138379791092538, |
| "learning_rate": 5e-05, |
| "loss": 1.6776, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.8025002520415364, |
| "grad_norm": 0.1482595466004464, |
| "learning_rate": 5e-05, |
| "loss": 1.6909, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.8035084181873172, |
| "grad_norm": 0.13548313971169001, |
| "learning_rate": 5e-05, |
| "loss": 1.6666, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.8045165843330981, |
| "grad_norm": 0.13487479450679285, |
| "learning_rate": 5e-05, |
| "loss": 1.689, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.805524750478879, |
| "grad_norm": 0.1432363848206779, |
| "learning_rate": 5e-05, |
| "loss": 1.697, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.8065329166246598, |
| "grad_norm": 0.13896327594504643, |
| "learning_rate": 5e-05, |
| "loss": 1.6911, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8075410827704406, |
| "grad_norm": 0.14338629590554086, |
| "learning_rate": 5e-05, |
| "loss": 1.6869, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.8085492489162214, |
| "grad_norm": 0.13281813618885874, |
| "learning_rate": 5e-05, |
| "loss": 1.6861, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.8095574150620022, |
| "grad_norm": 0.139985059403575, |
| "learning_rate": 5e-05, |
| "loss": 1.6827, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.810565581207783, |
| "grad_norm": 0.14363687868955394, |
| "learning_rate": 5e-05, |
| "loss": 1.6707, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.8115737473535639, |
| "grad_norm": 0.1336932116903534, |
| "learning_rate": 5e-05, |
| "loss": 1.6922, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.8125819134993447, |
| "grad_norm": 0.1529549893245701, |
| "learning_rate": 5e-05, |
| "loss": 1.6766, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.8135900796451255, |
| "grad_norm": 0.13672318865512173, |
| "learning_rate": 5e-05, |
| "loss": 1.6953, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.8145982457909063, |
| "grad_norm": 0.14190254697613094, |
| "learning_rate": 5e-05, |
| "loss": 1.6598, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.8156064119366871, |
| "grad_norm": 0.13913502141445533, |
| "learning_rate": 5e-05, |
| "loss": 1.687, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.816614578082468, |
| "grad_norm": 0.13641964764170883, |
| "learning_rate": 5e-05, |
| "loss": 1.7055, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.8176227442282488, |
| "grad_norm": 0.13599664312690396, |
| "learning_rate": 5e-05, |
| "loss": 1.705, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.8186309103740297, |
| "grad_norm": 0.15018714562444313, |
| "learning_rate": 5e-05, |
| "loss": 1.6857, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.8196390765198105, |
| "grad_norm": 0.14301860812865722, |
| "learning_rate": 5e-05, |
| "loss": 1.682, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.8206472426655913, |
| "grad_norm": 0.1344232562677402, |
| "learning_rate": 5e-05, |
| "loss": 1.6966, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.8216554088113721, |
| "grad_norm": 0.14041565750131477, |
| "learning_rate": 5e-05, |
| "loss": 1.6887, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.8226635749571529, |
| "grad_norm": 0.13764254491162634, |
| "learning_rate": 5e-05, |
| "loss": 1.6734, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.8236717411029337, |
| "grad_norm": 0.48603813362470627, |
| "learning_rate": 5e-05, |
| "loss": 1.7112, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.8246799072487145, |
| "grad_norm": 0.15630406069565908, |
| "learning_rate": 5e-05, |
| "loss": 1.702, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.8256880733944955, |
| "grad_norm": 0.14342007772006957, |
| "learning_rate": 5e-05, |
| "loss": 1.6846, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.8266962395402763, |
| "grad_norm": 0.12685666364606676, |
| "learning_rate": 5e-05, |
| "loss": 1.6745, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.8277044056860571, |
| "grad_norm": 0.14328976861961018, |
| "learning_rate": 5e-05, |
| "loss": 1.676, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.8287125718318379, |
| "grad_norm": 0.14792477595331482, |
| "learning_rate": 5e-05, |
| "loss": 1.6945, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.8297207379776187, |
| "grad_norm": 0.12890024243258635, |
| "learning_rate": 5e-05, |
| "loss": 1.6946, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.8307289041233995, |
| "grad_norm": 0.1394381946621567, |
| "learning_rate": 5e-05, |
| "loss": 1.6897, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.8317370702691803, |
| "grad_norm": 0.14871567892964102, |
| "learning_rate": 5e-05, |
| "loss": 1.6665, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.8327452364149612, |
| "grad_norm": 0.1346517356681281, |
| "learning_rate": 5e-05, |
| "loss": 1.6892, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.833753402560742, |
| "grad_norm": 0.13954790270024078, |
| "learning_rate": 5e-05, |
| "loss": 1.6878, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.8347615687065228, |
| "grad_norm": 0.14054961982092284, |
| "learning_rate": 5e-05, |
| "loss": 1.667, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.8357697348523037, |
| "grad_norm": 0.13206695580210953, |
| "learning_rate": 5e-05, |
| "loss": 1.6757, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.8367779009980845, |
| "grad_norm": 0.15778739105529885, |
| "learning_rate": 5e-05, |
| "loss": 1.692, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8377860671438653, |
| "grad_norm": 0.1373991062407869, |
| "learning_rate": 5e-05, |
| "loss": 1.6739, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.8387942332896461, |
| "grad_norm": 0.2576531409786061, |
| "learning_rate": 5e-05, |
| "loss": 1.6695, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.839802399435427, |
| "grad_norm": 0.15106020481865556, |
| "learning_rate": 5e-05, |
| "loss": 1.6805, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.8408105655812078, |
| "grad_norm": 0.14046305460650463, |
| "learning_rate": 5e-05, |
| "loss": 1.6777, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.8418187317269886, |
| "grad_norm": 0.14720245880174745, |
| "learning_rate": 5e-05, |
| "loss": 1.6836, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.8428268978727694, |
| "grad_norm": 0.1578072566386783, |
| "learning_rate": 5e-05, |
| "loss": 1.6861, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.8438350640185502, |
| "grad_norm": 0.1518940101101013, |
| "learning_rate": 5e-05, |
| "loss": 1.6878, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.844843230164331, |
| "grad_norm": 0.13998500777852416, |
| "learning_rate": 5e-05, |
| "loss": 1.6832, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.8458513963101119, |
| "grad_norm": 0.13904704518884228, |
| "learning_rate": 5e-05, |
| "loss": 1.6753, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.8468595624558928, |
| "grad_norm": 0.14081647055828106, |
| "learning_rate": 5e-05, |
| "loss": 1.6812, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.8478677286016736, |
| "grad_norm": 0.15399662388279747, |
| "learning_rate": 5e-05, |
| "loss": 1.6726, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.8488758947474544, |
| "grad_norm": 0.24482482359606256, |
| "learning_rate": 5e-05, |
| "loss": 1.6894, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.8498840608932352, |
| "grad_norm": 0.1381996934428476, |
| "learning_rate": 5e-05, |
| "loss": 1.6787, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.850892227039016, |
| "grad_norm": 0.12942528029338027, |
| "learning_rate": 5e-05, |
| "loss": 1.674, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.8519003931847968, |
| "grad_norm": 0.13627903853259218, |
| "learning_rate": 5e-05, |
| "loss": 1.676, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.8529085593305776, |
| "grad_norm": 0.1336712149207386, |
| "learning_rate": 5e-05, |
| "loss": 1.6817, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.8539167254763586, |
| "grad_norm": 0.1367092325582646, |
| "learning_rate": 5e-05, |
| "loss": 1.6621, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.8549248916221394, |
| "grad_norm": 0.1347323160292146, |
| "learning_rate": 5e-05, |
| "loss": 1.7124, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.8559330577679202, |
| "grad_norm": 0.13560405221175614, |
| "learning_rate": 5e-05, |
| "loss": 1.6861, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.856941223913701, |
| "grad_norm": 0.13449548817890208, |
| "learning_rate": 5e-05, |
| "loss": 1.6826, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8579493900594818, |
| "grad_norm": 0.1341433652220611, |
| "learning_rate": 5e-05, |
| "loss": 1.6864, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.8589575562052626, |
| "grad_norm": 0.14825925731848053, |
| "learning_rate": 5e-05, |
| "loss": 1.6864, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.8599657223510434, |
| "grad_norm": 0.1304576882873733, |
| "learning_rate": 5e-05, |
| "loss": 1.7001, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.8609738884968243, |
| "grad_norm": 0.13574394501767972, |
| "learning_rate": 5e-05, |
| "loss": 1.6918, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.8619820546426051, |
| "grad_norm": 0.13884970149183168, |
| "learning_rate": 5e-05, |
| "loss": 1.6875, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.8629902207883859, |
| "grad_norm": 0.1362435489981324, |
| "learning_rate": 5e-05, |
| "loss": 1.6945, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.8639983869341668, |
| "grad_norm": 0.13528485619923905, |
| "learning_rate": 5e-05, |
| "loss": 1.6828, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.8650065530799476, |
| "grad_norm": 0.13432004891402732, |
| "learning_rate": 5e-05, |
| "loss": 1.6852, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.8660147192257284, |
| "grad_norm": 0.1242110461943383, |
| "learning_rate": 5e-05, |
| "loss": 1.6859, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.8670228853715092, |
| "grad_norm": 0.13526165386716868, |
| "learning_rate": 5e-05, |
| "loss": 1.6882, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.8680310515172901, |
| "grad_norm": 0.13723528079790265, |
| "learning_rate": 5e-05, |
| "loss": 1.6742, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.8690392176630709, |
| "grad_norm": 0.14152025186993977, |
| "learning_rate": 5e-05, |
| "loss": 1.6824, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.8700473838088517, |
| "grad_norm": 0.13593092657670974, |
| "learning_rate": 5e-05, |
| "loss": 1.6877, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.8710555499546325, |
| "grad_norm": 0.1237240839204448, |
| "learning_rate": 5e-05, |
| "loss": 1.6653, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.8720637161004133, |
| "grad_norm": 0.12457362639962367, |
| "learning_rate": 5e-05, |
| "loss": 1.6741, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.8730718822461941, |
| "grad_norm": 0.13769038207724557, |
| "learning_rate": 5e-05, |
| "loss": 1.6866, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.874080048391975, |
| "grad_norm": 0.22819354832540836, |
| "learning_rate": 5e-05, |
| "loss": 1.6865, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.8750882145377559, |
| "grad_norm": 0.12812321433260723, |
| "learning_rate": 5e-05, |
| "loss": 1.6629, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.8760963806835367, |
| "grad_norm": 0.13193468553242307, |
| "learning_rate": 5e-05, |
| "loss": 1.6795, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.8771045468293175, |
| "grad_norm": 0.1339735770465933, |
| "learning_rate": 5e-05, |
| "loss": 1.6642, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.8781127129750983, |
| "grad_norm": 0.13219641454052525, |
| "learning_rate": 5e-05, |
| "loss": 1.6824, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.8791208791208791, |
| "grad_norm": 0.1278981257624631, |
| "learning_rate": 5e-05, |
| "loss": 1.7017, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.8801290452666599, |
| "grad_norm": 0.1268463958373461, |
| "learning_rate": 5e-05, |
| "loss": 1.685, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.8811372114124407, |
| "grad_norm": 0.12262984135503795, |
| "learning_rate": 5e-05, |
| "loss": 1.6832, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.8821453775582216, |
| "grad_norm": 0.12807704116710825, |
| "learning_rate": 5e-05, |
| "loss": 1.6715, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.8831535437040025, |
| "grad_norm": 0.13384246213045484, |
| "learning_rate": 5e-05, |
| "loss": 1.6796, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.8841617098497833, |
| "grad_norm": 0.13271946030978699, |
| "learning_rate": 5e-05, |
| "loss": 1.6792, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.8851698759955641, |
| "grad_norm": 0.13002797068913113, |
| "learning_rate": 5e-05, |
| "loss": 1.6826, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.8861780421413449, |
| "grad_norm": 0.12387813054000466, |
| "learning_rate": 5e-05, |
| "loss": 1.6764, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.8871862082871257, |
| "grad_norm": 0.1539348827112057, |
| "learning_rate": 5e-05, |
| "loss": 1.681, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.8881943744329065, |
| "grad_norm": 0.14191475073212179, |
| "learning_rate": 5e-05, |
| "loss": 1.7038, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.8892025405786874, |
| "grad_norm": 0.12626208410619427, |
| "learning_rate": 5e-05, |
| "loss": 1.6734, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.8902107067244682, |
| "grad_norm": 0.1537939707678987, |
| "learning_rate": 5e-05, |
| "loss": 1.6878, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.891218872870249, |
| "grad_norm": 0.14006513053904576, |
| "learning_rate": 5e-05, |
| "loss": 1.6784, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.8922270390160298, |
| "grad_norm": 0.1323359573323714, |
| "learning_rate": 5e-05, |
| "loss": 1.6753, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.8932352051618107, |
| "grad_norm": 0.127135232877454, |
| "learning_rate": 5e-05, |
| "loss": 1.6703, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.8942433713075915, |
| "grad_norm": 0.14395160246089883, |
| "learning_rate": 5e-05, |
| "loss": 1.6791, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.8952515374533723, |
| "grad_norm": 0.13213800028351733, |
| "learning_rate": 5e-05, |
| "loss": 1.6977, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.8962597035991532, |
| "grad_norm": 0.1339578277536838, |
| "learning_rate": 5e-05, |
| "loss": 1.6655, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.897267869744934, |
| "grad_norm": 0.13843289199372555, |
| "learning_rate": 5e-05, |
| "loss": 1.6807, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.8982760358907148, |
| "grad_norm": 0.15034227589113325, |
| "learning_rate": 5e-05, |
| "loss": 1.684, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.8992842020364956, |
| "grad_norm": 0.1295931007457488, |
| "learning_rate": 5e-05, |
| "loss": 1.6821, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.9002923681822764, |
| "grad_norm": 0.13591806645658097, |
| "learning_rate": 5e-05, |
| "loss": 1.6728, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.9013005343280572, |
| "grad_norm": 0.1438591830321329, |
| "learning_rate": 5e-05, |
| "loss": 1.6698, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.902308700473838, |
| "grad_norm": 0.1271279095390623, |
| "learning_rate": 5e-05, |
| "loss": 1.6668, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.903316866619619, |
| "grad_norm": 0.13381633362697323, |
| "learning_rate": 5e-05, |
| "loss": 1.6692, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.9043250327653998, |
| "grad_norm": 0.1365519873037108, |
| "learning_rate": 5e-05, |
| "loss": 1.6771, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.9053331989111806, |
| "grad_norm": 0.12756904544167164, |
| "learning_rate": 5e-05, |
| "loss": 1.6862, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.9063413650569614, |
| "grad_norm": 0.14850169861234608, |
| "learning_rate": 5e-05, |
| "loss": 1.6716, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.9073495312027422, |
| "grad_norm": 0.13581227089444428, |
| "learning_rate": 5e-05, |
| "loss": 1.6805, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.908357697348523, |
| "grad_norm": 0.12471119236144047, |
| "learning_rate": 5e-05, |
| "loss": 1.6618, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.9093658634943038, |
| "grad_norm": 0.15506755587406426, |
| "learning_rate": 5e-05, |
| "loss": 1.6787, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.9103740296400847, |
| "grad_norm": 0.13513790109556142, |
| "learning_rate": 5e-05, |
| "loss": 1.6708, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.9113821957858655, |
| "grad_norm": 0.15277615577202727, |
| "learning_rate": 5e-05, |
| "loss": 1.6595, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.9123903619316464, |
| "grad_norm": 0.1350891914749887, |
| "learning_rate": 5e-05, |
| "loss": 1.6567, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.9133985280774272, |
| "grad_norm": 0.14602516176664662, |
| "learning_rate": 5e-05, |
| "loss": 1.6749, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.914406694223208, |
| "grad_norm": 0.14463663430798326, |
| "learning_rate": 5e-05, |
| "loss": 1.6732, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.9154148603689888, |
| "grad_norm": 0.14699091093716773, |
| "learning_rate": 5e-05, |
| "loss": 1.6604, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.9164230265147696, |
| "grad_norm": 0.13215022110277264, |
| "learning_rate": 5e-05, |
| "loss": 1.6741, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.9174311926605505, |
| "grad_norm": 0.13050745363963512, |
| "learning_rate": 5e-05, |
| "loss": 1.6608, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.9184393588063313, |
| "grad_norm": 0.1466525773007831, |
| "learning_rate": 5e-05, |
| "loss": 1.6616, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.9194475249521121, |
| "grad_norm": 0.13347369745114426, |
| "learning_rate": 5e-05, |
| "loss": 1.6709, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.9204556910978929, |
| "grad_norm": 0.14407041271422674, |
| "learning_rate": 5e-05, |
| "loss": 1.6736, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.9214638572436737, |
| "grad_norm": 0.13888242220869906, |
| "learning_rate": 5e-05, |
| "loss": 1.6949, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.9224720233894546, |
| "grad_norm": 0.14262053965466828, |
| "learning_rate": 5e-05, |
| "loss": 1.6734, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.9234801895352354, |
| "grad_norm": 0.1409772264763286, |
| "learning_rate": 5e-05, |
| "loss": 1.6678, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.9244883556810163, |
| "grad_norm": 0.14527396885120117, |
| "learning_rate": 5e-05, |
| "loss": 1.6761, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.9254965218267971, |
| "grad_norm": 0.12740510773335295, |
| "learning_rate": 5e-05, |
| "loss": 1.6708, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.9265046879725779, |
| "grad_norm": 0.139395581998477, |
| "learning_rate": 5e-05, |
| "loss": 1.6613, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.9275128541183587, |
| "grad_norm": 0.13417759421161327, |
| "learning_rate": 5e-05, |
| "loss": 1.6753, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.9285210202641395, |
| "grad_norm": 0.13555609704891103, |
| "learning_rate": 5e-05, |
| "loss": 1.6738, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.9295291864099203, |
| "grad_norm": 0.12829038107875804, |
| "learning_rate": 5e-05, |
| "loss": 1.6684, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.9305373525557011, |
| "grad_norm": 0.13296107854572745, |
| "learning_rate": 5e-05, |
| "loss": 1.6726, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.9315455187014821, |
| "grad_norm": 0.14241035335107388, |
| "learning_rate": 5e-05, |
| "loss": 1.6709, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.9325536848472629, |
| "grad_norm": 0.1271240766685691, |
| "learning_rate": 5e-05, |
| "loss": 1.6766, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.9335618509930437, |
| "grad_norm": 0.1354859254660614, |
| "learning_rate": 5e-05, |
| "loss": 1.6715, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.9345700171388245, |
| "grad_norm": 0.12889632841887344, |
| "learning_rate": 5e-05, |
| "loss": 1.68, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.9355781832846053, |
| "grad_norm": 0.12917740851963883, |
| "learning_rate": 5e-05, |
| "loss": 1.6793, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.9365863494303861, |
| "grad_norm": 0.1327484530362986, |
| "learning_rate": 5e-05, |
| "loss": 1.6586, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.9375945155761669, |
| "grad_norm": 0.12967479905068346, |
| "learning_rate": 5e-05, |
| "loss": 1.6829, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.9386026817219478, |
| "grad_norm": 0.12850564347803076, |
| "learning_rate": 5e-05, |
| "loss": 1.666, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.9396108478677286, |
| "grad_norm": 0.12869497807470315, |
| "learning_rate": 5e-05, |
| "loss": 1.6911, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.9406190140135094, |
| "grad_norm": 0.13662082311676438, |
| "learning_rate": 5e-05, |
| "loss": 1.674, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.9416271801592903, |
| "grad_norm": 0.1382953464962334, |
| "learning_rate": 5e-05, |
| "loss": 1.6647, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.9426353463050711, |
| "grad_norm": 0.13350088079608952, |
| "learning_rate": 5e-05, |
| "loss": 1.6832, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.9436435124508519, |
| "grad_norm": 0.1431935916731277, |
| "learning_rate": 5e-05, |
| "loss": 1.6748, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.9446516785966327, |
| "grad_norm": 0.14180022265326894, |
| "learning_rate": 5e-05, |
| "loss": 1.6641, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.9456598447424136, |
| "grad_norm": 0.1272013668604564, |
| "learning_rate": 5e-05, |
| "loss": 1.6853, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.9466680108881944, |
| "grad_norm": 0.13326949088898338, |
| "learning_rate": 5e-05, |
| "loss": 1.666, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.9476761770339752, |
| "grad_norm": 0.1475715105954654, |
| "learning_rate": 5e-05, |
| "loss": 1.6828, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.948684343179756, |
| "grad_norm": 0.14083105254743475, |
| "learning_rate": 5e-05, |
| "loss": 1.6784, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.9496925093255368, |
| "grad_norm": 0.13511643953253086, |
| "learning_rate": 5e-05, |
| "loss": 1.6815, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.9507006754713176, |
| "grad_norm": 0.12796079103971297, |
| "learning_rate": 5e-05, |
| "loss": 1.6817, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.9517088416170985, |
| "grad_norm": 0.16362744096426632, |
| "learning_rate": 5e-05, |
| "loss": 1.6836, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.9527170077628794, |
| "grad_norm": 0.12797064422723695, |
| "learning_rate": 5e-05, |
| "loss": 1.6751, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.9537251739086602, |
| "grad_norm": 0.1434444700945595, |
| "learning_rate": 5e-05, |
| "loss": 1.6834, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.954733340054441, |
| "grad_norm": 0.1321562433293951, |
| "learning_rate": 5e-05, |
| "loss": 1.6654, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.9557415062002218, |
| "grad_norm": 0.1350527789374817, |
| "learning_rate": 5e-05, |
| "loss": 1.6668, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.9567496723460026, |
| "grad_norm": 0.14156980572384642, |
| "learning_rate": 5e-05, |
| "loss": 1.6734, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.9577578384917834, |
| "grad_norm": 0.14013712544503423, |
| "learning_rate": 5e-05, |
| "loss": 1.6842, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.9587660046375642, |
| "grad_norm": 0.13222246222944062, |
| "learning_rate": 5e-05, |
| "loss": 1.6779, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.9597741707833451, |
| "grad_norm": 0.13904740589623618, |
| "learning_rate": 5e-05, |
| "loss": 1.6672, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.960782336929126, |
| "grad_norm": 0.12712158954742464, |
| "learning_rate": 5e-05, |
| "loss": 1.6727, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.9617905030749068, |
| "grad_norm": 0.1288804401638552, |
| "learning_rate": 5e-05, |
| "loss": 1.6632, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.9627986692206876, |
| "grad_norm": 0.13295930679196813, |
| "learning_rate": 5e-05, |
| "loss": 1.6839, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.9638068353664684, |
| "grad_norm": 0.23028443790162464, |
| "learning_rate": 5e-05, |
| "loss": 1.6842, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.9648150015122492, |
| "grad_norm": 0.1397117984780913, |
| "learning_rate": 5e-05, |
| "loss": 1.6865, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.96582316765803, |
| "grad_norm": 0.13497797279155332, |
| "learning_rate": 5e-05, |
| "loss": 1.6864, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.9668313338038109, |
| "grad_norm": 0.12916303558347642, |
| "learning_rate": 5e-05, |
| "loss": 1.6901, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.9678394999495917, |
| "grad_norm": 0.23211777447315532, |
| "learning_rate": 5e-05, |
| "loss": 1.6766, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.9688476660953725, |
| "grad_norm": 0.13601563623084056, |
| "learning_rate": 5e-05, |
| "loss": 1.6753, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.9698558322411533, |
| "grad_norm": 0.137289477966096, |
| "learning_rate": 5e-05, |
| "loss": 1.6737, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.9708639983869342, |
| "grad_norm": 0.13667594781630565, |
| "learning_rate": 5e-05, |
| "loss": 1.6804, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.971872164532715, |
| "grad_norm": 0.13576064908436217, |
| "learning_rate": 5e-05, |
| "loss": 1.6603, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.9728803306784958, |
| "grad_norm": 0.132798732372051, |
| "learning_rate": 5e-05, |
| "loss": 1.6828, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.9738884968242767, |
| "grad_norm": 0.13208449355289498, |
| "learning_rate": 5e-05, |
| "loss": 1.6744, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.9748966629700575, |
| "grad_norm": 0.13585942411581226, |
| "learning_rate": 5e-05, |
| "loss": 1.6777, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.9759048291158383, |
| "grad_norm": 0.13548184798449628, |
| "learning_rate": 5e-05, |
| "loss": 1.6881, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.9769129952616191, |
| "grad_norm": 0.1392166913735763, |
| "learning_rate": 5e-05, |
| "loss": 1.6675, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.9779211614073999, |
| "grad_norm": 0.13739517699713566, |
| "learning_rate": 5e-05, |
| "loss": 1.6725, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.9789293275531807, |
| "grad_norm": 0.1325157842600348, |
| "learning_rate": 5e-05, |
| "loss": 1.7046, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.9799374936989615, |
| "grad_norm": 0.14491654836379084, |
| "learning_rate": 5e-05, |
| "loss": 1.6907, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.9809456598447425, |
| "grad_norm": 0.1350018683671611, |
| "learning_rate": 5e-05, |
| "loss": 1.6893, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.9819538259905233, |
| "grad_norm": 0.1380573133150687, |
| "learning_rate": 5e-05, |
| "loss": 1.6686, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.9829619921363041, |
| "grad_norm": 0.15639160382515796, |
| "learning_rate": 5e-05, |
| "loss": 1.6831, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.9839701582820849, |
| "grad_norm": 0.13129130265567285, |
| "learning_rate": 5e-05, |
| "loss": 1.6852, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.9849783244278657, |
| "grad_norm": 0.1378835095946666, |
| "learning_rate": 5e-05, |
| "loss": 1.6809, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.9859864905736465, |
| "grad_norm": 0.15323167789285774, |
| "learning_rate": 5e-05, |
| "loss": 1.6584, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.9869946567194273, |
| "grad_norm": 0.13120916627174922, |
| "learning_rate": 5e-05, |
| "loss": 1.6746, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.9880028228652082, |
| "grad_norm": 0.16658089074004762, |
| "learning_rate": 5e-05, |
| "loss": 1.6883, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.989010989010989, |
| "grad_norm": 0.12480946878755692, |
| "learning_rate": 5e-05, |
| "loss": 1.6667, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.9900191551567699, |
| "grad_norm": 0.15011152635478692, |
| "learning_rate": 5e-05, |
| "loss": 1.6713, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.9910273213025507, |
| "grad_norm": 0.14336382061066752, |
| "learning_rate": 5e-05, |
| "loss": 1.6682, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.9920354874483315, |
| "grad_norm": 0.15560552997255717, |
| "learning_rate": 5e-05, |
| "loss": 1.6554, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.9930436535941123, |
| "grad_norm": 0.25239196240528766, |
| "learning_rate": 5e-05, |
| "loss": 1.6708, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.9940518197398931, |
| "grad_norm": 0.15375826976754078, |
| "learning_rate": 5e-05, |
| "loss": 1.6722, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.995059985885674, |
| "grad_norm": 0.13908301421214161, |
| "learning_rate": 5e-05, |
| "loss": 1.6638, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.9960681520314548, |
| "grad_norm": 0.13376466586132293, |
| "learning_rate": 5e-05, |
| "loss": 1.6708, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.9970763181772356, |
| "grad_norm": 0.14674096140221835, |
| "learning_rate": 5e-05, |
| "loss": 1.6638, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.9980844843230164, |
| "grad_norm": 0.14637311521906743, |
| "learning_rate": 5e-05, |
| "loss": 1.6977, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.9990926504687972, |
| "grad_norm": 0.1432639711689218, |
| "learning_rate": 5e-05, |
| "loss": 1.6693, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.0010081661457808, |
| "grad_norm": 0.21860288289168814, |
| "learning_rate": 5e-05, |
| "loss": 3.2927, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.0020163322915616, |
| "grad_norm": 0.14091034776903996, |
| "learning_rate": 5e-05, |
| "loss": 1.6465, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.0030244984373424, |
| "grad_norm": 0.15840691147102967, |
| "learning_rate": 5e-05, |
| "loss": 1.6415, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.0040326645831232, |
| "grad_norm": 0.1419834757811408, |
| "learning_rate": 5e-05, |
| "loss": 1.6695, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.005040830728904, |
| "grad_norm": 0.15445174116810362, |
| "learning_rate": 5e-05, |
| "loss": 1.6508, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.0060489968746849, |
| "grad_norm": 0.15506560418976248, |
| "learning_rate": 5e-05, |
| "loss": 1.6608, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.0070571630204657, |
| "grad_norm": 0.15772043212937356, |
| "learning_rate": 5e-05, |
| "loss": 1.6549, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.0080653291662467, |
| "grad_norm": 0.14809733736088554, |
| "learning_rate": 5e-05, |
| "loss": 1.6756, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.0090734953120275, |
| "grad_norm": 0.15527053504041188, |
| "learning_rate": 5e-05, |
| "loss": 1.6443, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0100816614578083, |
| "grad_norm": 0.13339301695926947, |
| "learning_rate": 5e-05, |
| "loss": 1.6502, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.0110898276035891, |
| "grad_norm": 0.16069903566032434, |
| "learning_rate": 5e-05, |
| "loss": 1.6386, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.01209799374937, |
| "grad_norm": 0.145353827020301, |
| "learning_rate": 5e-05, |
| "loss": 1.6708, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.0131061598951507, |
| "grad_norm": 0.15847490082912677, |
| "learning_rate": 5e-05, |
| "loss": 1.6668, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.0141143260409315, |
| "grad_norm": 0.1527946599717766, |
| "learning_rate": 5e-05, |
| "loss": 1.6544, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.0151224921867124, |
| "grad_norm": 0.14568390303961978, |
| "learning_rate": 5e-05, |
| "loss": 1.653, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.0161306583324932, |
| "grad_norm": 0.15026544585100637, |
| "learning_rate": 5e-05, |
| "loss": 1.6631, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.017138824478274, |
| "grad_norm": 0.14649278087306322, |
| "learning_rate": 5e-05, |
| "loss": 1.67, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.0181469906240548, |
| "grad_norm": 0.13992710750397244, |
| "learning_rate": 5e-05, |
| "loss": 1.6659, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.0191551567698356, |
| "grad_norm": 0.134375044039903, |
| "learning_rate": 5e-05, |
| "loss": 1.6758, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.0201633229156164, |
| "grad_norm": 0.14737843192197858, |
| "learning_rate": 5e-05, |
| "loss": 1.6422, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.0211714890613974, |
| "grad_norm": 0.13773473373848963, |
| "learning_rate": 5e-05, |
| "loss": 1.664, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.0221796552071782, |
| "grad_norm": 0.15945433650936014, |
| "learning_rate": 5e-05, |
| "loss": 1.652, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.023187821352959, |
| "grad_norm": 0.15989378386570163, |
| "learning_rate": 5e-05, |
| "loss": 1.6789, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.0241959874987399, |
| "grad_norm": 0.15474749283228387, |
| "learning_rate": 5e-05, |
| "loss": 1.6484, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.0252041536445207, |
| "grad_norm": 0.14454939561179925, |
| "learning_rate": 5e-05, |
| "loss": 1.6549, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.0262123197903015, |
| "grad_norm": 0.1429342231425721, |
| "learning_rate": 5e-05, |
| "loss": 1.6575, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.0272204859360823, |
| "grad_norm": 0.1472418787822263, |
| "learning_rate": 5e-05, |
| "loss": 1.6575, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.028228652081863, |
| "grad_norm": 0.15058269545560304, |
| "learning_rate": 5e-05, |
| "loss": 1.6466, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.029236818227644, |
| "grad_norm": 0.14338434142901946, |
| "learning_rate": 5e-05, |
| "loss": 1.6367, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.0302449843734247, |
| "grad_norm": 0.14739578480466062, |
| "learning_rate": 5e-05, |
| "loss": 1.6462, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.0312531505192055, |
| "grad_norm": 0.14596335620916118, |
| "learning_rate": 5e-05, |
| "loss": 1.6519, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.0322613166649863, |
| "grad_norm": 0.14344758953412376, |
| "learning_rate": 5e-05, |
| "loss": 1.6361, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.0332694828107671, |
| "grad_norm": 0.14371231242831609, |
| "learning_rate": 5e-05, |
| "loss": 1.652, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.034277648956548, |
| "grad_norm": 0.14003131085381088, |
| "learning_rate": 5e-05, |
| "loss": 1.6816, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.0352858151023288, |
| "grad_norm": 0.1378014474462088, |
| "learning_rate": 5e-05, |
| "loss": 1.6555, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.0362939812481098, |
| "grad_norm": 0.13383360458955793, |
| "learning_rate": 5e-05, |
| "loss": 1.6475, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.0373021473938906, |
| "grad_norm": 0.1494803160243675, |
| "learning_rate": 5e-05, |
| "loss": 1.6562, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.0383103135396714, |
| "grad_norm": 0.1357968016925739, |
| "learning_rate": 5e-05, |
| "loss": 1.6377, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.0393184796854522, |
| "grad_norm": 0.1389382336016073, |
| "learning_rate": 5e-05, |
| "loss": 1.6594, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.040326645831233, |
| "grad_norm": 0.1406360279058917, |
| "learning_rate": 5e-05, |
| "loss": 1.6465, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.0413348119770138, |
| "grad_norm": 0.1398760422398223, |
| "learning_rate": 5e-05, |
| "loss": 1.6609, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.0423429781227946, |
| "grad_norm": 0.1551391224014159, |
| "learning_rate": 5e-05, |
| "loss": 1.6569, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.0433511442685754, |
| "grad_norm": 0.13814685402174295, |
| "learning_rate": 5e-05, |
| "loss": 1.6478, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.0443593104143563, |
| "grad_norm": 0.13526430253748103, |
| "learning_rate": 5e-05, |
| "loss": 1.6464, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.045367476560137, |
| "grad_norm": 0.15377599703718353, |
| "learning_rate": 5e-05, |
| "loss": 1.6533, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.0463756427059179, |
| "grad_norm": 0.14272580240194616, |
| "learning_rate": 5e-05, |
| "loss": 1.6464, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.0473838088516987, |
| "grad_norm": 0.1425037845577125, |
| "learning_rate": 5e-05, |
| "loss": 1.6561, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.0483919749974795, |
| "grad_norm": 0.1382158099797001, |
| "learning_rate": 5e-05, |
| "loss": 1.6556, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.0494001411432605, |
| "grad_norm": 0.13446219082640498, |
| "learning_rate": 5e-05, |
| "loss": 1.6477, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.0504083072890413, |
| "grad_norm": 0.14002570935710634, |
| "learning_rate": 5e-05, |
| "loss": 1.6603, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.0514164734348221, |
| "grad_norm": 0.14680327184585512, |
| "learning_rate": 5e-05, |
| "loss": 1.6463, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.052424639580603, |
| "grad_norm": 0.13807927181398283, |
| "learning_rate": 5e-05, |
| "loss": 1.6606, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.0534328057263838, |
| "grad_norm": 0.14002355585431073, |
| "learning_rate": 5e-05, |
| "loss": 1.6559, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.0544409718721646, |
| "grad_norm": 0.13712234265374093, |
| "learning_rate": 5e-05, |
| "loss": 1.6568, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.0554491380179454, |
| "grad_norm": 0.1478330543967965, |
| "learning_rate": 5e-05, |
| "loss": 1.6487, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.0564573041637262, |
| "grad_norm": 0.140280334541867, |
| "learning_rate": 5e-05, |
| "loss": 1.657, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.057465470309507, |
| "grad_norm": 0.14612153616751714, |
| "learning_rate": 5e-05, |
| "loss": 1.6685, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.0584736364552878, |
| "grad_norm": 0.13553689622586162, |
| "learning_rate": 5e-05, |
| "loss": 1.6424, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.0594818026010686, |
| "grad_norm": 0.14257587085611279, |
| "learning_rate": 5e-05, |
| "loss": 1.6423, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.0604899687468494, |
| "grad_norm": 0.13249030300586634, |
| "learning_rate": 5e-05, |
| "loss": 1.6588, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.0614981348926302, |
| "grad_norm": 0.14948997388776752, |
| "learning_rate": 5e-05, |
| "loss": 1.6514, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.062506301038411, |
| "grad_norm": 0.13413211819187493, |
| "learning_rate": 5e-05, |
| "loss": 1.6243, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.0635144671841918, |
| "grad_norm": 0.13517249636027376, |
| "learning_rate": 5e-05, |
| "loss": 1.6718, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.0645226333299729, |
| "grad_norm": 0.1425324839674816, |
| "learning_rate": 5e-05, |
| "loss": 1.634, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.0655307994757537, |
| "grad_norm": 0.14421737540989363, |
| "learning_rate": 5e-05, |
| "loss": 1.6601, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.0665389656215345, |
| "grad_norm": 0.13949099479682, |
| "learning_rate": 5e-05, |
| "loss": 1.6584, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.0675471317673153, |
| "grad_norm": 0.13543143787477846, |
| "learning_rate": 5e-05, |
| "loss": 1.6264, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.0685552979130961, |
| "grad_norm": 0.1308536607672446, |
| "learning_rate": 5e-05, |
| "loss": 1.6483, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.069563464058877, |
| "grad_norm": 0.15395546567197096, |
| "learning_rate": 5e-05, |
| "loss": 1.6537, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.0705716302046577, |
| "grad_norm": 0.13590652212696186, |
| "learning_rate": 5e-05, |
| "loss": 1.6489, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.0715797963504385, |
| "grad_norm": 0.1369179871265128, |
| "learning_rate": 5e-05, |
| "loss": 1.6499, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.0725879624962193, |
| "grad_norm": 0.13868179923924345, |
| "learning_rate": 5e-05, |
| "loss": 1.6568, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.0735961286420002, |
| "grad_norm": 0.1418740879249988, |
| "learning_rate": 5e-05, |
| "loss": 1.6507, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.074604294787781, |
| "grad_norm": 0.1398126841174943, |
| "learning_rate": 5e-05, |
| "loss": 1.6492, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.0756124609335618, |
| "grad_norm": 0.14638179326062736, |
| "learning_rate": 5e-05, |
| "loss": 1.6467, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.0766206270793426, |
| "grad_norm": 0.12995279427717285, |
| "learning_rate": 5e-05, |
| "loss": 1.6449, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.0776287932251236, |
| "grad_norm": 0.14547801601785154, |
| "learning_rate": 5e-05, |
| "loss": 1.6355, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.0786369593709044, |
| "grad_norm": 0.14421057898446202, |
| "learning_rate": 5e-05, |
| "loss": 1.6374, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.0796451255166852, |
| "grad_norm": 0.14129351206800517, |
| "learning_rate": 5e-05, |
| "loss": 1.652, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.080653291662466, |
| "grad_norm": 0.13777294504511903, |
| "learning_rate": 5e-05, |
| "loss": 1.6619, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.0816614578082469, |
| "grad_norm": 0.14301808646954006, |
| "learning_rate": 5e-05, |
| "loss": 1.6333, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.0826696239540277, |
| "grad_norm": 0.1455323994765, |
| "learning_rate": 5e-05, |
| "loss": 1.63, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.0836777900998085, |
| "grad_norm": 0.13488078036821274, |
| "learning_rate": 5e-05, |
| "loss": 1.6476, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.0846859562455893, |
| "grad_norm": 0.12922264908060607, |
| "learning_rate": 5e-05, |
| "loss": 1.6562, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.08569412239137, |
| "grad_norm": 0.13245289137103436, |
| "learning_rate": 5e-05, |
| "loss": 1.638, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.086702288537151, |
| "grad_norm": 0.13467066705212152, |
| "learning_rate": 5e-05, |
| "loss": 1.6432, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.0877104546829317, |
| "grad_norm": 0.13683554315901364, |
| "learning_rate": 5e-05, |
| "loss": 1.6418, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.0887186208287125, |
| "grad_norm": 0.13087668906495806, |
| "learning_rate": 5e-05, |
| "loss": 1.6453, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.0897267869744933, |
| "grad_norm": 0.12813283034951103, |
| "learning_rate": 5e-05, |
| "loss": 1.6664, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.0907349531202741, |
| "grad_norm": 0.13206635982114845, |
| "learning_rate": 5e-05, |
| "loss": 1.6612, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.091743119266055, |
| "grad_norm": 0.1298970210608696, |
| "learning_rate": 5e-05, |
| "loss": 1.6357, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.092751285411836, |
| "grad_norm": 0.13177936167224702, |
| "learning_rate": 5e-05, |
| "loss": 1.6533, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.0937594515576168, |
| "grad_norm": 0.1370794107339465, |
| "learning_rate": 5e-05, |
| "loss": 1.6573, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.0947676177033976, |
| "grad_norm": 0.12985926535902795, |
| "learning_rate": 5e-05, |
| "loss": 1.6597, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.0957757838491784, |
| "grad_norm": 0.14365813461674878, |
| "learning_rate": 5e-05, |
| "loss": 1.6415, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.0967839499949592, |
| "grad_norm": 0.13831040201343497, |
| "learning_rate": 5e-05, |
| "loss": 1.6427, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.09779211614074, |
| "grad_norm": 0.12700324637807814, |
| "learning_rate": 5e-05, |
| "loss": 1.6635, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.0988002822865208, |
| "grad_norm": 0.14530007715183632, |
| "learning_rate": 5e-05, |
| "loss": 1.6552, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.0998084484323016, |
| "grad_norm": 0.14358509176757844, |
| "learning_rate": 5e-05, |
| "loss": 1.65, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.1008166145780824, |
| "grad_norm": 0.12220911593027503, |
| "learning_rate": 5e-05, |
| "loss": 1.6603, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.1018247807238633, |
| "grad_norm": 0.1383717567406863, |
| "learning_rate": 5e-05, |
| "loss": 1.6642, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.102832946869644, |
| "grad_norm": 0.13425833878331841, |
| "learning_rate": 5e-05, |
| "loss": 1.6413, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.1038411130154249, |
| "grad_norm": 0.12995091320791363, |
| "learning_rate": 5e-05, |
| "loss": 1.6429, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.1048492791612057, |
| "grad_norm": 0.13727248059012334, |
| "learning_rate": 5e-05, |
| "loss": 1.643, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.1058574453069867, |
| "grad_norm": 0.1451092239957977, |
| "learning_rate": 5e-05, |
| "loss": 1.6235, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.1068656114527675, |
| "grad_norm": 1.1003212392254569, |
| "learning_rate": 5e-05, |
| "loss": 1.6607, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.1078737775985483, |
| "grad_norm": 0.1487788294386961, |
| "learning_rate": 5e-05, |
| "loss": 1.6637, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.1088819437443291, |
| "grad_norm": 0.1348509108073677, |
| "learning_rate": 5e-05, |
| "loss": 1.6633, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.10989010989011, |
| "grad_norm": 0.13451834596455475, |
| "learning_rate": 5e-05, |
| "loss": 1.6503, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.1108982760358908, |
| "grad_norm": 0.1434915625519777, |
| "learning_rate": 5e-05, |
| "loss": 1.6441, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.1119064421816716, |
| "grad_norm": 0.13247439246408826, |
| "learning_rate": 5e-05, |
| "loss": 1.6452, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.1129146083274524, |
| "grad_norm": 0.15483809847029564, |
| "learning_rate": 5e-05, |
| "loss": 1.6334, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.1139227744732332, |
| "grad_norm": 0.15969160070618565, |
| "learning_rate": 5e-05, |
| "loss": 1.6429, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.114930940619014, |
| "grad_norm": 0.1628238357590171, |
| "learning_rate": 5e-05, |
| "loss": 1.6608, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.1159391067647948, |
| "grad_norm": 0.15930542337028844, |
| "learning_rate": 5e-05, |
| "loss": 1.6488, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.1169472729105756, |
| "grad_norm": 0.15466370593275638, |
| "learning_rate": 5e-05, |
| "loss": 1.6532, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.1179554390563564, |
| "grad_norm": 0.14010912938299824, |
| "learning_rate": 5e-05, |
| "loss": 1.6573, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.1189636052021372, |
| "grad_norm": 0.16390153898300394, |
| "learning_rate": 5e-05, |
| "loss": 1.6594, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.119971771347918, |
| "grad_norm": 0.14116244076539533, |
| "learning_rate": 5e-05, |
| "loss": 1.644, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.120979937493699, |
| "grad_norm": 0.13648666647020574, |
| "learning_rate": 5e-05, |
| "loss": 1.6513, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.1219881036394799, |
| "grad_norm": 0.140123096773187, |
| "learning_rate": 5e-05, |
| "loss": 1.6301, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.1229962697852607, |
| "grad_norm": 0.13187345151623706, |
| "learning_rate": 5e-05, |
| "loss": 1.644, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.1240044359310415, |
| "grad_norm": 0.13763479057234174, |
| "learning_rate": 5e-05, |
| "loss": 1.6425, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.1250126020768223, |
| "grad_norm": 0.13396232089072882, |
| "learning_rate": 5e-05, |
| "loss": 1.6681, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.126020768222603, |
| "grad_norm": 0.13322825877954908, |
| "learning_rate": 5e-05, |
| "loss": 1.659, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.127028934368384, |
| "grad_norm": 0.13579297620215142, |
| "learning_rate": 5e-05, |
| "loss": 1.6437, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.1280371005141647, |
| "grad_norm": 0.1341591695038578, |
| "learning_rate": 5e-05, |
| "loss": 1.6507, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.1290452666599455, |
| "grad_norm": 0.13741517387144328, |
| "learning_rate": 5e-05, |
| "loss": 1.6442, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.1300534328057263, |
| "grad_norm": 0.14068768012069355, |
| "learning_rate": 5e-05, |
| "loss": 1.6465, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.1310615989515072, |
| "grad_norm": 0.14004222531844976, |
| "learning_rate": 5e-05, |
| "loss": 1.6438, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.132069765097288, |
| "grad_norm": 0.1430222089343432, |
| "learning_rate": 5e-05, |
| "loss": 1.6294, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.1330779312430688, |
| "grad_norm": 0.1354002130699085, |
| "learning_rate": 5e-05, |
| "loss": 1.6235, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.1340860973888498, |
| "grad_norm": 0.14706699771800255, |
| "learning_rate": 5e-05, |
| "loss": 1.6524, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.1350942635346306, |
| "grad_norm": 0.14384265748302816, |
| "learning_rate": 5e-05, |
| "loss": 1.6484, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.1361024296804114, |
| "grad_norm": 0.1328015268784059, |
| "learning_rate": 5e-05, |
| "loss": 1.6241, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.1371105958261922, |
| "grad_norm": 0.1398676330492913, |
| "learning_rate": 5e-05, |
| "loss": 1.6441, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.138118761971973, |
| "grad_norm": 0.14489176249712743, |
| "learning_rate": 5e-05, |
| "loss": 1.6501, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.1391269281177538, |
| "grad_norm": 0.15427806273758912, |
| "learning_rate": 5e-05, |
| "loss": 1.6382, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.1401350942635347, |
| "grad_norm": 0.1353320676919305, |
| "learning_rate": 5e-05, |
| "loss": 1.6316, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.1411432604093155, |
| "grad_norm": 0.1459498336653144, |
| "learning_rate": 5e-05, |
| "loss": 1.6762, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.1421514265550963, |
| "grad_norm": 0.15798013402714647, |
| "learning_rate": 5e-05, |
| "loss": 1.6511, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.143159592700877, |
| "grad_norm": 0.15079687525350177, |
| "learning_rate": 5e-05, |
| "loss": 1.6349, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.144167758846658, |
| "grad_norm": 0.1750724581326861, |
| "learning_rate": 5e-05, |
| "loss": 1.6552, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.1451759249924387, |
| "grad_norm": 0.13556870245122188, |
| "learning_rate": 5e-05, |
| "loss": 1.6489, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.1461840911382195, |
| "grad_norm": 0.1319776262053859, |
| "learning_rate": 5e-05, |
| "loss": 1.6505, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.1471922572840003, |
| "grad_norm": 0.13323668607471617, |
| "learning_rate": 5e-05, |
| "loss": 1.6707, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.1482004234297811, |
| "grad_norm": 0.13656918475459365, |
| "learning_rate": 5e-05, |
| "loss": 1.6283, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.149208589575562, |
| "grad_norm": 0.12451944847688946, |
| "learning_rate": 5e-05, |
| "loss": 1.6379, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.150216755721343, |
| "grad_norm": 0.12773660059098071, |
| "learning_rate": 5e-05, |
| "loss": 1.644, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.1512249218671238, |
| "grad_norm": 0.12345976774900896, |
| "learning_rate": 5e-05, |
| "loss": 1.6386, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.1522330880129046, |
| "grad_norm": 0.1380893224066923, |
| "learning_rate": 5e-05, |
| "loss": 1.6667, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.1532412541586854, |
| "grad_norm": 0.140157865219376, |
| "learning_rate": 5e-05, |
| "loss": 1.635, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.1542494203044662, |
| "grad_norm": 0.13727630744096145, |
| "learning_rate": 5e-05, |
| "loss": 1.6424, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.155257586450247, |
| "grad_norm": 0.1324099666783387, |
| "learning_rate": 5e-05, |
| "loss": 1.6373, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.1562657525960278, |
| "grad_norm": 0.13882259979347136, |
| "learning_rate": 5e-05, |
| "loss": 1.6591, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.1572739187418086, |
| "grad_norm": 0.13098459974691126, |
| "learning_rate": 5e-05, |
| "loss": 1.6447, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.1582820848875894, |
| "grad_norm": 0.1431528108442056, |
| "learning_rate": 5e-05, |
| "loss": 1.6467, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.1592902510333702, |
| "grad_norm": 0.13353399419959994, |
| "learning_rate": 5e-05, |
| "loss": 1.6259, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.160298417179151, |
| "grad_norm": 0.14073095816973716, |
| "learning_rate": 5e-05, |
| "loss": 1.6524, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.1613065833249319, |
| "grad_norm": 0.12967149139945466, |
| "learning_rate": 5e-05, |
| "loss": 1.636, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.162314749470713, |
| "grad_norm": 0.1404076923142028, |
| "learning_rate": 5e-05, |
| "loss": 1.6388, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.1633229156164937, |
| "grad_norm": 0.12890108550224091, |
| "learning_rate": 5e-05, |
| "loss": 1.6419, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.1643310817622745, |
| "grad_norm": 0.14108978798383223, |
| "learning_rate": 5e-05, |
| "loss": 1.6519, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.1653392479080553, |
| "grad_norm": 0.12324326033477677, |
| "learning_rate": 5e-05, |
| "loss": 1.6378, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.1663474140538361, |
| "grad_norm": 0.13123027082856784, |
| "learning_rate": 5e-05, |
| "loss": 1.6271, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.167355580199617, |
| "grad_norm": 0.13632941963813844, |
| "learning_rate": 5e-05, |
| "loss": 1.6449, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.1683637463453977, |
| "grad_norm": 0.14274588260292997, |
| "learning_rate": 5e-05, |
| "loss": 1.6289, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.1693719124911786, |
| "grad_norm": 0.1388212405939215, |
| "learning_rate": 5e-05, |
| "loss": 1.651, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.1703800786369594, |
| "grad_norm": 0.13913045853970632, |
| "learning_rate": 5e-05, |
| "loss": 1.6128, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.1713882447827402, |
| "grad_norm": 0.12953138641069079, |
| "learning_rate": 5e-05, |
| "loss": 1.6716, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.172396410928521, |
| "grad_norm": 0.13383463327732076, |
| "learning_rate": 5e-05, |
| "loss": 1.6353, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.1734045770743018, |
| "grad_norm": 0.13282072248663965, |
| "learning_rate": 5e-05, |
| "loss": 1.6457, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.1744127432200826, |
| "grad_norm": 0.1256500519520948, |
| "learning_rate": 5e-05, |
| "loss": 1.6458, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.1754209093658634, |
| "grad_norm": 0.13370852595416396, |
| "learning_rate": 5e-05, |
| "loss": 1.6396, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.1764290755116442, |
| "grad_norm": 0.14088261443154418, |
| "learning_rate": 5e-05, |
| "loss": 1.6178, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.177437241657425, |
| "grad_norm": 0.1278315762332314, |
| "learning_rate": 5e-05, |
| "loss": 1.6467, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.178445407803206, |
| "grad_norm": 0.1366827463160475, |
| "learning_rate": 5e-05, |
| "loss": 1.6612, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.1794535739489869, |
| "grad_norm": 0.13220181447257495, |
| "learning_rate": 5e-05, |
| "loss": 1.6393, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.1804617400947677, |
| "grad_norm": 0.13957285481272086, |
| "learning_rate": 5e-05, |
| "loss": 1.6442, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.1814699062405485, |
| "grad_norm": 0.13381450080318658, |
| "learning_rate": 5e-05, |
| "loss": 1.6409, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.1824780723863293, |
| "grad_norm": 0.1331024616666049, |
| "learning_rate": 5e-05, |
| "loss": 1.6297, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.18348623853211, |
| "grad_norm": 0.13425932581575892, |
| "learning_rate": 5e-05, |
| "loss": 1.646, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.184494404677891, |
| "grad_norm": 0.1419448930914817, |
| "learning_rate": 5e-05, |
| "loss": 1.6333, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.1855025708236717, |
| "grad_norm": 0.12587029419536305, |
| "learning_rate": 5e-05, |
| "loss": 1.6451, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.1865107369694525, |
| "grad_norm": 0.12990741754999835, |
| "learning_rate": 5e-05, |
| "loss": 1.6377, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.1875189031152333, |
| "grad_norm": 0.13028002070390035, |
| "learning_rate": 5e-05, |
| "loss": 1.6448, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.1885270692610141, |
| "grad_norm": 0.1361939415163281, |
| "learning_rate": 5e-05, |
| "loss": 1.6432, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.189535235406795, |
| "grad_norm": 0.13811927402862617, |
| "learning_rate": 5e-05, |
| "loss": 1.6421, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.190543401552576, |
| "grad_norm": 0.13087126034252194, |
| "learning_rate": 5e-05, |
| "loss": 1.6426, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.1915515676983568, |
| "grad_norm": 0.14294244646531867, |
| "learning_rate": 5e-05, |
| "loss": 1.6554, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.1925597338441376, |
| "grad_norm": 0.14034413475200178, |
| "learning_rate": 5e-05, |
| "loss": 1.6532, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.1935678999899184, |
| "grad_norm": 0.13177978129089846, |
| "learning_rate": 5e-05, |
| "loss": 1.6269, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.1945760661356992, |
| "grad_norm": 0.12164759452371639, |
| "learning_rate": 5e-05, |
| "loss": 1.6411, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.19558423228148, |
| "grad_norm": 0.13393061368735346, |
| "learning_rate": 5e-05, |
| "loss": 1.6504, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.1965923984272608, |
| "grad_norm": 0.12922097478855823, |
| "learning_rate": 5e-05, |
| "loss": 1.644, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.1976005645730416, |
| "grad_norm": 0.13474046194928005, |
| "learning_rate": 5e-05, |
| "loss": 1.6357, |
| "step": 1187 |
| }, |
| { |
| "epoch": 1.1986087307188225, |
| "grad_norm": 0.13416139424027906, |
| "learning_rate": 5e-05, |
| "loss": 1.6253, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.1996168968646033, |
| "grad_norm": 0.12424536100362801, |
| "learning_rate": 5e-05, |
| "loss": 1.6597, |
| "step": 1189 |
| }, |
| { |
| "epoch": 1.200625063010384, |
| "grad_norm": 0.12859940727054653, |
| "learning_rate": 5e-05, |
| "loss": 1.6473, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.2016332291561649, |
| "grad_norm": 0.13774315044583574, |
| "learning_rate": 5e-05, |
| "loss": 1.6433, |
| "step": 1191 |
| }, |
| { |
| "epoch": 1.2026413953019457, |
| "grad_norm": 0.13474605371401327, |
| "learning_rate": 5e-05, |
| "loss": 1.6429, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.2036495614477265, |
| "grad_norm": 0.13137818581832644, |
| "learning_rate": 5e-05, |
| "loss": 1.6361, |
| "step": 1193 |
| }, |
| { |
| "epoch": 1.2046577275935073, |
| "grad_norm": 0.12998129292792446, |
| "learning_rate": 5e-05, |
| "loss": 1.642, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.2056658937392881, |
| "grad_norm": 0.13364766964304525, |
| "learning_rate": 5e-05, |
| "loss": 1.647, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.2066740598850692, |
| "grad_norm": 0.13387780726266185, |
| "learning_rate": 5e-05, |
| "loss": 1.639, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.20768222603085, |
| "grad_norm": 0.1262397535621686, |
| "learning_rate": 5e-05, |
| "loss": 1.6516, |
| "step": 1197 |
| }, |
| { |
| "epoch": 1.2086903921766308, |
| "grad_norm": 0.1335534592115057, |
| "learning_rate": 5e-05, |
| "loss": 1.6357, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.2096985583224116, |
| "grad_norm": 0.12213304785010605, |
| "learning_rate": 5e-05, |
| "loss": 1.6424, |
| "step": 1199 |
| }, |
| { |
| "epoch": 1.2107067244681924, |
| "grad_norm": 0.1301733256476672, |
| "learning_rate": 5e-05, |
| "loss": 1.6374, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.2117148906139732, |
| "grad_norm": 0.1264707123675731, |
| "learning_rate": 5e-05, |
| "loss": 1.6634, |
| "step": 1201 |
| }, |
| { |
| "epoch": 1.212723056759754, |
| "grad_norm": 0.13464550491855465, |
| "learning_rate": 5e-05, |
| "loss": 1.6452, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.2137312229055348, |
| "grad_norm": 0.12740550424329222, |
| "learning_rate": 5e-05, |
| "loss": 1.6259, |
| "step": 1203 |
| }, |
| { |
| "epoch": 1.2147393890513156, |
| "grad_norm": 0.12745955103663922, |
| "learning_rate": 5e-05, |
| "loss": 1.6616, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.2157475551970964, |
| "grad_norm": 0.1388644513951216, |
| "learning_rate": 5e-05, |
| "loss": 1.6387, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.2167557213428772, |
| "grad_norm": 0.1341359780867019, |
| "learning_rate": 5e-05, |
| "loss": 1.6458, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.217763887488658, |
| "grad_norm": 0.22124542084376875, |
| "learning_rate": 5e-05, |
| "loss": 1.6437, |
| "step": 1207 |
| }, |
| { |
| "epoch": 1.218772053634439, |
| "grad_norm": 0.13394553567218737, |
| "learning_rate": 5e-05, |
| "loss": 1.6443, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.2197802197802199, |
| "grad_norm": 0.13108124495407641, |
| "learning_rate": 5e-05, |
| "loss": 1.6375, |
| "step": 1209 |
| }, |
| { |
| "epoch": 1.2207883859260007, |
| "grad_norm": 0.13289777245013853, |
| "learning_rate": 5e-05, |
| "loss": 1.6245, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.2217965520717815, |
| "grad_norm": 0.13453450266809466, |
| "learning_rate": 5e-05, |
| "loss": 1.6338, |
| "step": 1211 |
| }, |
| { |
| "epoch": 1.2228047182175623, |
| "grad_norm": 0.17614553222683538, |
| "learning_rate": 5e-05, |
| "loss": 1.6254, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.2238128843633431, |
| "grad_norm": 0.12999419428479778, |
| "learning_rate": 5e-05, |
| "loss": 1.6345, |
| "step": 1213 |
| }, |
| { |
| "epoch": 1.224821050509124, |
| "grad_norm": 0.13534104691368284, |
| "learning_rate": 5e-05, |
| "loss": 1.6466, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.2258292166549047, |
| "grad_norm": 0.13500513832725639, |
| "learning_rate": 5e-05, |
| "loss": 1.6463, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.2268373828006855, |
| "grad_norm": 0.13460460013548567, |
| "learning_rate": 5e-05, |
| "loss": 1.6284, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.2278455489464664, |
| "grad_norm": 0.14239914142179805, |
| "learning_rate": 5e-05, |
| "loss": 1.6321, |
| "step": 1217 |
| }, |
| { |
| "epoch": 1.2288537150922472, |
| "grad_norm": 0.13003403900073976, |
| "learning_rate": 5e-05, |
| "loss": 1.6315, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.229861881238028, |
| "grad_norm": 0.1375130522018261, |
| "learning_rate": 5e-05, |
| "loss": 1.6507, |
| "step": 1219 |
| }, |
| { |
| "epoch": 1.2308700473838088, |
| "grad_norm": 0.14339474655375617, |
| "learning_rate": 5e-05, |
| "loss": 1.6321, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.2318782135295896, |
| "grad_norm": 0.9032408125998735, |
| "learning_rate": 5e-05, |
| "loss": 1.632, |
| "step": 1221 |
| }, |
| { |
| "epoch": 1.2328863796753704, |
| "grad_norm": 0.1474587794305201, |
| "learning_rate": 5e-05, |
| "loss": 1.6427, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.2338945458211512, |
| "grad_norm": 0.14814555142158561, |
| "learning_rate": 5e-05, |
| "loss": 1.6494, |
| "step": 1223 |
| }, |
| { |
| "epoch": 1.2349027119669322, |
| "grad_norm": 0.14395647101199663, |
| "learning_rate": 5e-05, |
| "loss": 1.6268, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.235910878112713, |
| "grad_norm": 0.13929910981835994, |
| "learning_rate": 5e-05, |
| "loss": 1.6295, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.2369190442584939, |
| "grad_norm": 0.14002983850498224, |
| "learning_rate": 5e-05, |
| "loss": 1.6522, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.2379272104042747, |
| "grad_norm": 0.1353192537450001, |
| "learning_rate": 5e-05, |
| "loss": 1.6309, |
| "step": 1227 |
| }, |
| { |
| "epoch": 1.2389353765500555, |
| "grad_norm": 0.13292267565626098, |
| "learning_rate": 5e-05, |
| "loss": 1.6428, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.2399435426958363, |
| "grad_norm": 0.13456391714773153, |
| "learning_rate": 5e-05, |
| "loss": 1.629, |
| "step": 1229 |
| }, |
| { |
| "epoch": 1.240951708841617, |
| "grad_norm": 0.13788714692286827, |
| "learning_rate": 5e-05, |
| "loss": 1.6346, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.241959874987398, |
| "grad_norm": 0.14423261316710512, |
| "learning_rate": 5e-05, |
| "loss": 1.6448, |
| "step": 1231 |
| }, |
| { |
| "epoch": 1.2429680411331787, |
| "grad_norm": 0.14447654635989376, |
| "learning_rate": 5e-05, |
| "loss": 1.6462, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.2439762072789595, |
| "grad_norm": 0.1434544966920315, |
| "learning_rate": 5e-05, |
| "loss": 1.649, |
| "step": 1233 |
| }, |
| { |
| "epoch": 1.2449843734247403, |
| "grad_norm": 0.14777426191966908, |
| "learning_rate": 5e-05, |
| "loss": 1.6367, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.2459925395705211, |
| "grad_norm": 0.13612570962263057, |
| "learning_rate": 5e-05, |
| "loss": 1.6166, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.2470007057163022, |
| "grad_norm": 0.14476607599552396, |
| "learning_rate": 5e-05, |
| "loss": 1.6263, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.248008871862083, |
| "grad_norm": 0.13145764552676129, |
| "learning_rate": 5e-05, |
| "loss": 1.6451, |
| "step": 1237 |
| }, |
| { |
| "epoch": 1.2490170380078638, |
| "grad_norm": 0.1490958446557475, |
| "learning_rate": 5e-05, |
| "loss": 1.6182, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.2500252041536446, |
| "grad_norm": 0.13808203154322748, |
| "learning_rate": 5e-05, |
| "loss": 1.6477, |
| "step": 1239 |
| }, |
| { |
| "epoch": 1.2510333702994254, |
| "grad_norm": 0.14138072513679126, |
| "learning_rate": 5e-05, |
| "loss": 1.6399, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.2520415364452062, |
| "grad_norm": 0.1320433504886423, |
| "learning_rate": 5e-05, |
| "loss": 1.6212, |
| "step": 1241 |
| }, |
| { |
| "epoch": 1.253049702590987, |
| "grad_norm": 0.13903842537780303, |
| "learning_rate": 5e-05, |
| "loss": 1.646, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.2540578687367678, |
| "grad_norm": 0.2562763748818742, |
| "learning_rate": 5e-05, |
| "loss": 1.6453, |
| "step": 1243 |
| }, |
| { |
| "epoch": 1.2550660348825486, |
| "grad_norm": 0.13135629946561656, |
| "learning_rate": 5e-05, |
| "loss": 1.6308, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.2560742010283295, |
| "grad_norm": 0.14459808940104862, |
| "learning_rate": 5e-05, |
| "loss": 1.6379, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.2570823671741103, |
| "grad_norm": 0.14002530789490344, |
| "learning_rate": 5e-05, |
| "loss": 1.6452, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.258090533319891, |
| "grad_norm": 0.13156584118231135, |
| "learning_rate": 5e-05, |
| "loss": 1.6396, |
| "step": 1247 |
| }, |
| { |
| "epoch": 1.2590986994656719, |
| "grad_norm": 0.1446357476831878, |
| "learning_rate": 5e-05, |
| "loss": 1.641, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.2601068656114527, |
| "grad_norm": 0.13668026201833627, |
| "learning_rate": 5e-05, |
| "loss": 1.654, |
| "step": 1249 |
| }, |
| { |
| "epoch": 1.2611150317572335, |
| "grad_norm": 0.14431276066509344, |
| "learning_rate": 5e-05, |
| "loss": 1.6429, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.2621231979030143, |
| "grad_norm": 0.13805388076215672, |
| "learning_rate": 5e-05, |
| "loss": 1.6422, |
| "step": 1251 |
| }, |
| { |
| "epoch": 1.2631313640487951, |
| "grad_norm": 0.127891326472375, |
| "learning_rate": 5e-05, |
| "loss": 1.6512, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.2641395301945761, |
| "grad_norm": 0.1380651670587077, |
| "learning_rate": 5e-05, |
| "loss": 1.6556, |
| "step": 1253 |
| }, |
| { |
| "epoch": 1.265147696340357, |
| "grad_norm": 0.13354225518347246, |
| "learning_rate": 5e-05, |
| "loss": 1.6314, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.2661558624861378, |
| "grad_norm": 0.14441013224418936, |
| "learning_rate": 5e-05, |
| "loss": 1.6374, |
| "step": 1255 |
| }, |
| { |
| "epoch": 1.2671640286319186, |
| "grad_norm": 0.1467779285068228, |
| "learning_rate": 5e-05, |
| "loss": 1.6457, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.2681721947776994, |
| "grad_norm": 0.18403393179481492, |
| "learning_rate": 5e-05, |
| "loss": 1.6244, |
| "step": 1257 |
| }, |
| { |
| "epoch": 1.2691803609234802, |
| "grad_norm": 0.14105230329683727, |
| "learning_rate": 5e-05, |
| "loss": 1.6263, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.270188527069261, |
| "grad_norm": 0.14021432110623025, |
| "learning_rate": 5e-05, |
| "loss": 1.6344, |
| "step": 1259 |
| }, |
| { |
| "epoch": 1.2711966932150418, |
| "grad_norm": 0.14733366499856676, |
| "learning_rate": 5e-05, |
| "loss": 1.6422, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.2722048593608226, |
| "grad_norm": 0.12082105925908934, |
| "learning_rate": 5e-05, |
| "loss": 1.6357, |
| "step": 1261 |
| }, |
| { |
| "epoch": 1.2732130255066034, |
| "grad_norm": 0.15655495903198283, |
| "learning_rate": 5e-05, |
| "loss": 1.641, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.2742211916523842, |
| "grad_norm": 0.128474136803076, |
| "learning_rate": 5e-05, |
| "loss": 1.6574, |
| "step": 1263 |
| }, |
| { |
| "epoch": 1.2752293577981653, |
| "grad_norm": 0.14881569196732872, |
| "learning_rate": 5e-05, |
| "loss": 1.626, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.276237523943946, |
| "grad_norm": 0.13311171207901276, |
| "learning_rate": 5e-05, |
| "loss": 1.6443, |
| "step": 1265 |
| }, |
| { |
| "epoch": 1.2772456900897269, |
| "grad_norm": 0.1252689256815893, |
| "learning_rate": 5e-05, |
| "loss": 1.6238, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.2782538562355077, |
| "grad_norm": 0.13341970118068097, |
| "learning_rate": 5e-05, |
| "loss": 1.6411, |
| "step": 1267 |
| }, |
| { |
| "epoch": 1.2792620223812885, |
| "grad_norm": 0.1324244420868155, |
| "learning_rate": 5e-05, |
| "loss": 1.6365, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.2802701885270693, |
| "grad_norm": 0.12881476074167852, |
| "learning_rate": 5e-05, |
| "loss": 1.652, |
| "step": 1269 |
| }, |
| { |
| "epoch": 1.2812783546728501, |
| "grad_norm": 0.12856209242490413, |
| "learning_rate": 5e-05, |
| "loss": 1.6406, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.282286520818631, |
| "grad_norm": 0.13208309716233654, |
| "learning_rate": 5e-05, |
| "loss": 1.6298, |
| "step": 1271 |
| }, |
| { |
| "epoch": 1.2832946869644117, |
| "grad_norm": 0.13939720107304857, |
| "learning_rate": 5e-05, |
| "loss": 1.6312, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.2843028531101925, |
| "grad_norm": 0.14248500778548934, |
| "learning_rate": 5e-05, |
| "loss": 1.6518, |
| "step": 1273 |
| }, |
| { |
| "epoch": 1.2853110192559734, |
| "grad_norm": 0.13631582079163687, |
| "learning_rate": 5e-05, |
| "loss": 1.6503, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.2863191854017542, |
| "grad_norm": 0.5103305312002676, |
| "learning_rate": 5e-05, |
| "loss": 1.6421, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.287327351547535, |
| "grad_norm": 0.12896301140990832, |
| "learning_rate": 5e-05, |
| "loss": 1.6398, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.2883355176933158, |
| "grad_norm": 0.12142844514497131, |
| "learning_rate": 5e-05, |
| "loss": 1.6239, |
| "step": 1277 |
| }, |
| { |
| "epoch": 1.2893436838390966, |
| "grad_norm": 0.13477045845515837, |
| "learning_rate": 5e-05, |
| "loss": 1.6259, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.2903518499848774, |
| "grad_norm": 0.13139882642042439, |
| "learning_rate": 5e-05, |
| "loss": 1.6465, |
| "step": 1279 |
| }, |
| { |
| "epoch": 1.2913600161306582, |
| "grad_norm": 0.1351573799385729, |
| "learning_rate": 5e-05, |
| "loss": 1.6385, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.2923681822764392, |
| "grad_norm": 0.13532575707387254, |
| "learning_rate": 5e-05, |
| "loss": 1.655, |
| "step": 1281 |
| }, |
| { |
| "epoch": 1.29337634842222, |
| "grad_norm": 0.15211481123741347, |
| "learning_rate": 5e-05, |
| "loss": 1.6241, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.2943845145680009, |
| "grad_norm": 0.14059638838644756, |
| "learning_rate": 5e-05, |
| "loss": 1.6411, |
| "step": 1283 |
| }, |
| { |
| "epoch": 1.2953926807137817, |
| "grad_norm": 0.1478517568247696, |
| "learning_rate": 5e-05, |
| "loss": 1.6185, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.2964008468595625, |
| "grad_norm": 0.13518147900413588, |
| "learning_rate": 5e-05, |
| "loss": 1.6549, |
| "step": 1285 |
| }, |
| { |
| "epoch": 1.2974090130053433, |
| "grad_norm": 0.14127263362808326, |
| "learning_rate": 5e-05, |
| "loss": 1.6169, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.298417179151124, |
| "grad_norm": 0.1435161140567047, |
| "learning_rate": 5e-05, |
| "loss": 1.6505, |
| "step": 1287 |
| }, |
| { |
| "epoch": 1.299425345296905, |
| "grad_norm": 0.262970067626453, |
| "learning_rate": 5e-05, |
| "loss": 1.6175, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.3004335114426857, |
| "grad_norm": 0.14386872816206583, |
| "learning_rate": 5e-05, |
| "loss": 1.6399, |
| "step": 1289 |
| }, |
| { |
| "epoch": 1.3014416775884665, |
| "grad_norm": 0.13310896853865345, |
| "learning_rate": 5e-05, |
| "loss": 1.6423, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.3024498437342473, |
| "grad_norm": 0.12947071002773367, |
| "learning_rate": 5e-05, |
| "loss": 1.6238, |
| "step": 1291 |
| }, |
| { |
| "epoch": 1.3034580098800284, |
| "grad_norm": 0.13778793067032488, |
| "learning_rate": 5e-05, |
| "loss": 1.6463, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.3044661760258092, |
| "grad_norm": 0.12863210061676175, |
| "learning_rate": 5e-05, |
| "loss": 1.6515, |
| "step": 1293 |
| }, |
| { |
| "epoch": 1.30547434217159, |
| "grad_norm": 0.1400329870431008, |
| "learning_rate": 5e-05, |
| "loss": 1.6384, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.3064825083173708, |
| "grad_norm": 0.14907669819312522, |
| "learning_rate": 5e-05, |
| "loss": 1.6547, |
| "step": 1295 |
| }, |
| { |
| "epoch": 1.3074906744631516, |
| "grad_norm": 0.13952657031737695, |
| "learning_rate": 5e-05, |
| "loss": 1.6494, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.3084988406089324, |
| "grad_norm": 0.14408484354195883, |
| "learning_rate": 5e-05, |
| "loss": 1.6359, |
| "step": 1297 |
| }, |
| { |
| "epoch": 1.3095070067547132, |
| "grad_norm": 0.14741323470832812, |
| "learning_rate": 5e-05, |
| "loss": 1.6414, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.310515172900494, |
| "grad_norm": 0.14470601610286588, |
| "learning_rate": 5e-05, |
| "loss": 1.6524, |
| "step": 1299 |
| }, |
| { |
| "epoch": 1.3115233390462748, |
| "grad_norm": 0.13677564741215217, |
| "learning_rate": 5e-05, |
| "loss": 1.6182, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.3125315051920556, |
| "grad_norm": 0.1509685257674073, |
| "learning_rate": 5e-05, |
| "loss": 1.6505, |
| "step": 1301 |
| }, |
| { |
| "epoch": 1.3135396713378364, |
| "grad_norm": 0.1379807694079571, |
| "learning_rate": 5e-05, |
| "loss": 1.6241, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.3145478374836173, |
| "grad_norm": 0.14359330330139006, |
| "learning_rate": 5e-05, |
| "loss": 1.6485, |
| "step": 1303 |
| }, |
| { |
| "epoch": 1.315556003629398, |
| "grad_norm": 0.1380525441991341, |
| "learning_rate": 5e-05, |
| "loss": 1.6367, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.3165641697751789, |
| "grad_norm": 0.12872879401596335, |
| "learning_rate": 5e-05, |
| "loss": 1.6273, |
| "step": 1305 |
| }, |
| { |
| "epoch": 1.3175723359209597, |
| "grad_norm": 0.15181805868172427, |
| "learning_rate": 5e-05, |
| "loss": 1.6312, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.3185805020667405, |
| "grad_norm": 0.13761906701342136, |
| "learning_rate": 5e-05, |
| "loss": 1.6308, |
| "step": 1307 |
| }, |
| { |
| "epoch": 1.3195886682125213, |
| "grad_norm": 0.13196832773553202, |
| "learning_rate": 5e-05, |
| "loss": 1.6312, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.3205968343583023, |
| "grad_norm": 0.14610514452766105, |
| "learning_rate": 5e-05, |
| "loss": 1.6492, |
| "step": 1309 |
| }, |
| { |
| "epoch": 1.3216050005040831, |
| "grad_norm": 0.13794310427945244, |
| "learning_rate": 5e-05, |
| "loss": 1.6377, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.322613166649864, |
| "grad_norm": 0.13378727396876383, |
| "learning_rate": 5e-05, |
| "loss": 1.626, |
| "step": 1311 |
| }, |
| { |
| "epoch": 1.3236213327956448, |
| "grad_norm": 0.13461401880559, |
| "learning_rate": 5e-05, |
| "loss": 1.6367, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.3246294989414256, |
| "grad_norm": 0.1466088079580074, |
| "learning_rate": 5e-05, |
| "loss": 1.6206, |
| "step": 1313 |
| }, |
| { |
| "epoch": 1.3256376650872064, |
| "grad_norm": 0.12716896573606204, |
| "learning_rate": 5e-05, |
| "loss": 1.6464, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.3266458312329872, |
| "grad_norm": 0.14687168001753023, |
| "learning_rate": 5e-05, |
| "loss": 1.6417, |
| "step": 1315 |
| }, |
| { |
| "epoch": 1.327653997378768, |
| "grad_norm": 0.1472725540417992, |
| "learning_rate": 5e-05, |
| "loss": 1.6463, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.3286621635245488, |
| "grad_norm": 0.2813113312514839, |
| "learning_rate": 5e-05, |
| "loss": 1.6317, |
| "step": 1317 |
| }, |
| { |
| "epoch": 1.3296703296703296, |
| "grad_norm": 0.14714981053972961, |
| "learning_rate": 5e-05, |
| "loss": 1.6495, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.3306784958161104, |
| "grad_norm": 0.14529120808295362, |
| "learning_rate": 5e-05, |
| "loss": 1.6332, |
| "step": 1319 |
| }, |
| { |
| "epoch": 1.3316866619618914, |
| "grad_norm": 0.1266252330698273, |
| "learning_rate": 5e-05, |
| "loss": 1.6266, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.3326948281076723, |
| "grad_norm": 0.13448730108032922, |
| "learning_rate": 5e-05, |
| "loss": 1.6386, |
| "step": 1321 |
| }, |
| { |
| "epoch": 1.333702994253453, |
| "grad_norm": 0.14425479757866708, |
| "learning_rate": 5e-05, |
| "loss": 1.6537, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.3347111603992339, |
| "grad_norm": 0.1367216965708987, |
| "learning_rate": 5e-05, |
| "loss": 1.6413, |
| "step": 1323 |
| }, |
| { |
| "epoch": 1.3357193265450147, |
| "grad_norm": 0.13294074465376157, |
| "learning_rate": 5e-05, |
| "loss": 1.623, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.3367274926907955, |
| "grad_norm": 0.13725476866885875, |
| "learning_rate": 5e-05, |
| "loss": 1.6438, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.3377356588365763, |
| "grad_norm": 0.13523299845821615, |
| "learning_rate": 5e-05, |
| "loss": 1.6393, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.3387438249823571, |
| "grad_norm": 0.12945209128402962, |
| "learning_rate": 5e-05, |
| "loss": 1.6244, |
| "step": 1327 |
| }, |
| { |
| "epoch": 1.339751991128138, |
| "grad_norm": 0.1418573980815489, |
| "learning_rate": 5e-05, |
| "loss": 1.6448, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.3407601572739187, |
| "grad_norm": 0.1439901458961193, |
| "learning_rate": 5e-05, |
| "loss": 1.6268, |
| "step": 1329 |
| }, |
| { |
| "epoch": 1.3417683234196995, |
| "grad_norm": 0.13282615605008152, |
| "learning_rate": 5e-05, |
| "loss": 1.625, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.3427764895654803, |
| "grad_norm": 0.14182252901536777, |
| "learning_rate": 5e-05, |
| "loss": 1.6522, |
| "step": 1331 |
| }, |
| { |
| "epoch": 1.3437846557112612, |
| "grad_norm": 0.1322659099290378, |
| "learning_rate": 5e-05, |
| "loss": 1.6484, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.344792821857042, |
| "grad_norm": 0.14218642066492232, |
| "learning_rate": 5e-05, |
| "loss": 1.621, |
| "step": 1333 |
| }, |
| { |
| "epoch": 1.3458009880028228, |
| "grad_norm": 0.14964017176346037, |
| "learning_rate": 5e-05, |
| "loss": 1.6405, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.3468091541486036, |
| "grad_norm": 0.1285209866495888, |
| "learning_rate": 5e-05, |
| "loss": 1.6267, |
| "step": 1335 |
| }, |
| { |
| "epoch": 1.3478173202943844, |
| "grad_norm": 0.13753938035954444, |
| "learning_rate": 5e-05, |
| "loss": 1.645, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.3488254864401654, |
| "grad_norm": 0.13202352215553254, |
| "learning_rate": 5e-05, |
| "loss": 1.6327, |
| "step": 1337 |
| }, |
| { |
| "epoch": 1.3498336525859462, |
| "grad_norm": 0.13901724623518463, |
| "learning_rate": 5e-05, |
| "loss": 1.6554, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.350841818731727, |
| "grad_norm": 0.1295242891524851, |
| "learning_rate": 5e-05, |
| "loss": 1.6303, |
| "step": 1339 |
| }, |
| { |
| "epoch": 1.3518499848775078, |
| "grad_norm": 0.12942845751350548, |
| "learning_rate": 5e-05, |
| "loss": 1.644, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.3528581510232887, |
| "grad_norm": 0.1423410306591456, |
| "learning_rate": 5e-05, |
| "loss": 1.63, |
| "step": 1341 |
| }, |
| { |
| "epoch": 1.3538663171690695, |
| "grad_norm": 0.13302313269191274, |
| "learning_rate": 5e-05, |
| "loss": 1.6114, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.3548744833148503, |
| "grad_norm": 0.12861793129843543, |
| "learning_rate": 5e-05, |
| "loss": 1.6396, |
| "step": 1343 |
| }, |
| { |
| "epoch": 1.355882649460631, |
| "grad_norm": 0.1402971078531711, |
| "learning_rate": 5e-05, |
| "loss": 1.6351, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.356890815606412, |
| "grad_norm": 0.14298016010767073, |
| "learning_rate": 5e-05, |
| "loss": 1.6259, |
| "step": 1345 |
| }, |
| { |
| "epoch": 1.3578989817521927, |
| "grad_norm": 0.12968525696099564, |
| "learning_rate": 5e-05, |
| "loss": 1.6244, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.3589071478979735, |
| "grad_norm": 0.13672510109372069, |
| "learning_rate": 5e-05, |
| "loss": 1.6124, |
| "step": 1347 |
| }, |
| { |
| "epoch": 1.3599153140437545, |
| "grad_norm": 0.1369770021713587, |
| "learning_rate": 5e-05, |
| "loss": 1.6213, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.3609234801895354, |
| "grad_norm": 0.14972171375276808, |
| "learning_rate": 5e-05, |
| "loss": 1.6504, |
| "step": 1349 |
| }, |
| { |
| "epoch": 1.3619316463353162, |
| "grad_norm": 0.13548138877666627, |
| "learning_rate": 5e-05, |
| "loss": 1.6246, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.362939812481097, |
| "grad_norm": 0.12731452256887532, |
| "learning_rate": 5e-05, |
| "loss": 1.6348, |
| "step": 1351 |
| }, |
| { |
| "epoch": 1.3639479786268778, |
| "grad_norm": 0.13106154830975245, |
| "learning_rate": 5e-05, |
| "loss": 1.6252, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.3649561447726586, |
| "grad_norm": 0.12384538188385992, |
| "learning_rate": 5e-05, |
| "loss": 1.6318, |
| "step": 1353 |
| }, |
| { |
| "epoch": 1.3659643109184394, |
| "grad_norm": 0.124405294327545, |
| "learning_rate": 5e-05, |
| "loss": 1.6235, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.3669724770642202, |
| "grad_norm": 0.12918504823168622, |
| "learning_rate": 5e-05, |
| "loss": 1.6256, |
| "step": 1355 |
| }, |
| { |
| "epoch": 1.367980643210001, |
| "grad_norm": 0.12337217263498097, |
| "learning_rate": 5e-05, |
| "loss": 1.6166, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.3689888093557818, |
| "grad_norm": 0.12962612029855872, |
| "learning_rate": 5e-05, |
| "loss": 1.6329, |
| "step": 1357 |
| }, |
| { |
| "epoch": 1.3699969755015626, |
| "grad_norm": 0.12086261928461686, |
| "learning_rate": 5e-05, |
| "loss": 1.6468, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.3710051416473434, |
| "grad_norm": 0.12948482979477746, |
| "learning_rate": 5e-05, |
| "loss": 1.6402, |
| "step": 1359 |
| }, |
| { |
| "epoch": 1.3720133077931242, |
| "grad_norm": 0.125839711380269, |
| "learning_rate": 5e-05, |
| "loss": 1.6469, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.373021473938905, |
| "grad_norm": 0.12573772766040806, |
| "learning_rate": 5e-05, |
| "loss": 1.6168, |
| "step": 1361 |
| }, |
| { |
| "epoch": 1.3740296400846859, |
| "grad_norm": 0.1290446046069406, |
| "learning_rate": 5e-05, |
| "loss": 1.6276, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.3750378062304667, |
| "grad_norm": 0.13146037117405457, |
| "learning_rate": 5e-05, |
| "loss": 1.6368, |
| "step": 1363 |
| }, |
| { |
| "epoch": 1.3760459723762475, |
| "grad_norm": 0.12755209258029462, |
| "learning_rate": 5e-05, |
| "loss": 1.6457, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.3770541385220283, |
| "grad_norm": 0.13647382863573795, |
| "learning_rate": 5e-05, |
| "loss": 1.6253, |
| "step": 1365 |
| }, |
| { |
| "epoch": 1.3780623046678093, |
| "grad_norm": 0.12246578130156244, |
| "learning_rate": 5e-05, |
| "loss": 1.6187, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.3790704708135901, |
| "grad_norm": 0.12975737421478123, |
| "learning_rate": 5e-05, |
| "loss": 1.6388, |
| "step": 1367 |
| }, |
| { |
| "epoch": 1.380078636959371, |
| "grad_norm": 0.12116689228695646, |
| "learning_rate": 5e-05, |
| "loss": 1.6268, |
| "step": 1368 |
| }, |
| { |
| "epoch": 1.3810868031051518, |
| "grad_norm": 0.13078737733785906, |
| "learning_rate": 5e-05, |
| "loss": 1.624, |
| "step": 1369 |
| }, |
| { |
| "epoch": 1.3820949692509326, |
| "grad_norm": 0.13726230514420107, |
| "learning_rate": 5e-05, |
| "loss": 1.6562, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.3831031353967134, |
| "grad_norm": 0.1260785537305176, |
| "learning_rate": 5e-05, |
| "loss": 1.6444, |
| "step": 1371 |
| }, |
| { |
| "epoch": 1.3841113015424942, |
| "grad_norm": 0.13315635100403078, |
| "learning_rate": 5e-05, |
| "loss": 1.6475, |
| "step": 1372 |
| }, |
| { |
| "epoch": 1.385119467688275, |
| "grad_norm": 0.13844883164175295, |
| "learning_rate": 5e-05, |
| "loss": 1.6258, |
| "step": 1373 |
| }, |
| { |
| "epoch": 1.3861276338340558, |
| "grad_norm": 0.12683794742559432, |
| "learning_rate": 5e-05, |
| "loss": 1.6413, |
| "step": 1374 |
| }, |
| { |
| "epoch": 1.3871357999798366, |
| "grad_norm": 0.12598536946678948, |
| "learning_rate": 5e-05, |
| "loss": 1.629, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.3881439661256176, |
| "grad_norm": 0.12784468233003735, |
| "learning_rate": 5e-05, |
| "loss": 1.6074, |
| "step": 1376 |
| }, |
| { |
| "epoch": 1.3891521322713984, |
| "grad_norm": 0.1312115590212781, |
| "learning_rate": 5e-05, |
| "loss": 1.6326, |
| "step": 1377 |
| }, |
| { |
| "epoch": 1.3901602984171793, |
| "grad_norm": 0.12983030111730282, |
| "learning_rate": 5e-05, |
| "loss": 1.6264, |
| "step": 1378 |
| }, |
| { |
| "epoch": 1.39116846456296, |
| "grad_norm": 0.21725924278257416, |
| "learning_rate": 5e-05, |
| "loss": 1.6359, |
| "step": 1379 |
| }, |
| { |
| "epoch": 1.3921766307087409, |
| "grad_norm": 0.1332941952033477, |
| "learning_rate": 5e-05, |
| "loss": 1.6322, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.3931847968545217, |
| "grad_norm": 0.13413967271869495, |
| "learning_rate": 5e-05, |
| "loss": 1.6144, |
| "step": 1381 |
| }, |
| { |
| "epoch": 1.3941929630003025, |
| "grad_norm": 0.14430992905252235, |
| "learning_rate": 5e-05, |
| "loss": 1.6407, |
| "step": 1382 |
| }, |
| { |
| "epoch": 1.3952011291460833, |
| "grad_norm": 0.13923694142317142, |
| "learning_rate": 5e-05, |
| "loss": 1.6267, |
| "step": 1383 |
| }, |
| { |
| "epoch": 1.396209295291864, |
| "grad_norm": 0.12797099248284885, |
| "learning_rate": 5e-05, |
| "loss": 1.6018, |
| "step": 1384 |
| }, |
| { |
| "epoch": 1.397217461437645, |
| "grad_norm": 0.1307895540629942, |
| "learning_rate": 5e-05, |
| "loss": 1.6366, |
| "step": 1385 |
| }, |
| { |
| "epoch": 1.3982256275834257, |
| "grad_norm": 0.12571320908006497, |
| "learning_rate": 5e-05, |
| "loss": 1.62, |
| "step": 1386 |
| }, |
| { |
| "epoch": 1.3992337937292065, |
| "grad_norm": 0.3542423664590062, |
| "learning_rate": 5e-05, |
| "loss": 1.6325, |
| "step": 1387 |
| }, |
| { |
| "epoch": 1.4002419598749873, |
| "grad_norm": 0.12876602312218682, |
| "learning_rate": 5e-05, |
| "loss": 1.6416, |
| "step": 1388 |
| }, |
| { |
| "epoch": 1.4012501260207681, |
| "grad_norm": 0.13824360520518839, |
| "learning_rate": 5e-05, |
| "loss": 1.6345, |
| "step": 1389 |
| }, |
| { |
| "epoch": 1.402258292166549, |
| "grad_norm": 0.121799646242387, |
| "learning_rate": 5e-05, |
| "loss": 1.6179, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.4032664583123298, |
| "grad_norm": 0.13664100958490472, |
| "learning_rate": 5e-05, |
| "loss": 1.641, |
| "step": 1391 |
| }, |
| { |
| "epoch": 1.4042746244581106, |
| "grad_norm": 0.12576756467632957, |
| "learning_rate": 5e-05, |
| "loss": 1.6237, |
| "step": 1392 |
| }, |
| { |
| "epoch": 1.4052827906038914, |
| "grad_norm": 0.1439018295437482, |
| "learning_rate": 5e-05, |
| "loss": 1.6273, |
| "step": 1393 |
| }, |
| { |
| "epoch": 1.4062909567496724, |
| "grad_norm": 0.13003815870667002, |
| "learning_rate": 5e-05, |
| "loss": 1.6219, |
| "step": 1394 |
| }, |
| { |
| "epoch": 1.4072991228954532, |
| "grad_norm": 0.13687971905369478, |
| "learning_rate": 5e-05, |
| "loss": 1.6197, |
| "step": 1395 |
| }, |
| { |
| "epoch": 1.408307289041234, |
| "grad_norm": 0.1375315943365244, |
| "learning_rate": 5e-05, |
| "loss": 1.6364, |
| "step": 1396 |
| }, |
| { |
| "epoch": 1.4093154551870148, |
| "grad_norm": 0.13327720464715445, |
| "learning_rate": 5e-05, |
| "loss": 1.623, |
| "step": 1397 |
| }, |
| { |
| "epoch": 1.4103236213327957, |
| "grad_norm": 0.13341972541206865, |
| "learning_rate": 5e-05, |
| "loss": 1.6217, |
| "step": 1398 |
| }, |
| { |
| "epoch": 1.4113317874785765, |
| "grad_norm": 0.13173002538223347, |
| "learning_rate": 5e-05, |
| "loss": 1.6118, |
| "step": 1399 |
| }, |
| { |
| "epoch": 1.4123399536243573, |
| "grad_norm": 0.1440735510707323, |
| "learning_rate": 5e-05, |
| "loss": 1.6423, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.413348119770138, |
| "grad_norm": 0.12973621264990803, |
| "learning_rate": 5e-05, |
| "loss": 1.6091, |
| "step": 1401 |
| }, |
| { |
| "epoch": 1.4143562859159189, |
| "grad_norm": 0.1387503285921373, |
| "learning_rate": 5e-05, |
| "loss": 1.6255, |
| "step": 1402 |
| }, |
| { |
| "epoch": 1.4153644520616997, |
| "grad_norm": 0.1287204365133802, |
| "learning_rate": 5e-05, |
| "loss": 1.6251, |
| "step": 1403 |
| }, |
| { |
| "epoch": 1.4163726182074807, |
| "grad_norm": 0.13824194326191094, |
| "learning_rate": 5e-05, |
| "loss": 1.6326, |
| "step": 1404 |
| }, |
| { |
| "epoch": 1.4173807843532615, |
| "grad_norm": 0.14030055845752487, |
| "learning_rate": 5e-05, |
| "loss": 1.6248, |
| "step": 1405 |
| }, |
| { |
| "epoch": 1.4183889504990423, |
| "grad_norm": 0.1327154607182318, |
| "learning_rate": 5e-05, |
| "loss": 1.6165, |
| "step": 1406 |
| }, |
| { |
| "epoch": 1.4193971166448232, |
| "grad_norm": 0.14178972226561212, |
| "learning_rate": 5e-05, |
| "loss": 1.614, |
| "step": 1407 |
| }, |
| { |
| "epoch": 1.420405282790604, |
| "grad_norm": 0.12894624684755449, |
| "learning_rate": 5e-05, |
| "loss": 1.6351, |
| "step": 1408 |
| }, |
| { |
| "epoch": 1.4214134489363848, |
| "grad_norm": 0.15528603445217812, |
| "learning_rate": 5e-05, |
| "loss": 1.6523, |
| "step": 1409 |
| }, |
| { |
| "epoch": 1.4224216150821656, |
| "grad_norm": 0.1353781562647246, |
| "learning_rate": 5e-05, |
| "loss": 1.6179, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.4234297812279464, |
| "grad_norm": 0.13026314546525541, |
| "learning_rate": 5e-05, |
| "loss": 1.6151, |
| "step": 1411 |
| }, |
| { |
| "epoch": 1.4244379473737272, |
| "grad_norm": 0.13180261150357153, |
| "learning_rate": 5e-05, |
| "loss": 1.6429, |
| "step": 1412 |
| }, |
| { |
| "epoch": 1.425446113519508, |
| "grad_norm": 0.13287377471543904, |
| "learning_rate": 5e-05, |
| "loss": 1.6339, |
| "step": 1413 |
| }, |
| { |
| "epoch": 1.4264542796652888, |
| "grad_norm": 0.12721440160192868, |
| "learning_rate": 5e-05, |
| "loss": 1.6333, |
| "step": 1414 |
| }, |
| { |
| "epoch": 1.4274624458110696, |
| "grad_norm": 0.12732334827178118, |
| "learning_rate": 5e-05, |
| "loss": 1.6236, |
| "step": 1415 |
| }, |
| { |
| "epoch": 1.4284706119568504, |
| "grad_norm": 0.13937418837251475, |
| "learning_rate": 5e-05, |
| "loss": 1.6365, |
| "step": 1416 |
| }, |
| { |
| "epoch": 1.4294787781026312, |
| "grad_norm": 0.13917627454938042, |
| "learning_rate": 5e-05, |
| "loss": 1.6504, |
| "step": 1417 |
| }, |
| { |
| "epoch": 1.430486944248412, |
| "grad_norm": 0.13390358153558804, |
| "learning_rate": 5e-05, |
| "loss": 1.6468, |
| "step": 1418 |
| }, |
| { |
| "epoch": 1.4314951103941929, |
| "grad_norm": 0.12976181090918484, |
| "learning_rate": 5e-05, |
| "loss": 1.6382, |
| "step": 1419 |
| }, |
| { |
| "epoch": 1.4325032765399737, |
| "grad_norm": 0.13271624982794075, |
| "learning_rate": 5e-05, |
| "loss": 1.6171, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.4335114426857545, |
| "grad_norm": 0.1705925536645093, |
| "learning_rate": 5e-05, |
| "loss": 1.6127, |
| "step": 1421 |
| }, |
| { |
| "epoch": 1.4345196088315355, |
| "grad_norm": 0.13810164939076577, |
| "learning_rate": 5e-05, |
| "loss": 1.6375, |
| "step": 1422 |
| }, |
| { |
| "epoch": 1.4355277749773163, |
| "grad_norm": 0.15063906545851247, |
| "learning_rate": 5e-05, |
| "loss": 1.6106, |
| "step": 1423 |
| }, |
| { |
| "epoch": 1.4365359411230971, |
| "grad_norm": 0.13828434307491985, |
| "learning_rate": 5e-05, |
| "loss": 1.6278, |
| "step": 1424 |
| }, |
| { |
| "epoch": 1.437544107268878, |
| "grad_norm": 0.12320067761427174, |
| "learning_rate": 5e-05, |
| "loss": 1.6611, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.4385522734146587, |
| "grad_norm": 0.1473590960932424, |
| "learning_rate": 5e-05, |
| "loss": 1.6326, |
| "step": 1426 |
| }, |
| { |
| "epoch": 1.4395604395604396, |
| "grad_norm": 0.1429247807137141, |
| "learning_rate": 5e-05, |
| "loss": 1.6419, |
| "step": 1427 |
| }, |
| { |
| "epoch": 1.4405686057062204, |
| "grad_norm": 0.15239036053934374, |
| "learning_rate": 5e-05, |
| "loss": 1.6406, |
| "step": 1428 |
| }, |
| { |
| "epoch": 1.4415767718520012, |
| "grad_norm": 0.14066773498783475, |
| "learning_rate": 5e-05, |
| "loss": 1.6305, |
| "step": 1429 |
| }, |
| { |
| "epoch": 1.442584937997782, |
| "grad_norm": 0.15385686605084753, |
| "learning_rate": 5e-05, |
| "loss": 1.6213, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.4435931041435628, |
| "grad_norm": 0.13581356581239723, |
| "learning_rate": 5e-05, |
| "loss": 1.6371, |
| "step": 1431 |
| }, |
| { |
| "epoch": 1.4446012702893438, |
| "grad_norm": 0.15507768261137825, |
| "learning_rate": 5e-05, |
| "loss": 1.6354, |
| "step": 1432 |
| }, |
| { |
| "epoch": 1.4456094364351246, |
| "grad_norm": 0.13224979284586927, |
| "learning_rate": 5e-05, |
| "loss": 1.6474, |
| "step": 1433 |
| }, |
| { |
| "epoch": 1.4466176025809054, |
| "grad_norm": 0.1382288574532829, |
| "learning_rate": 5e-05, |
| "loss": 1.6344, |
| "step": 1434 |
| }, |
| { |
| "epoch": 1.4476257687266862, |
| "grad_norm": 0.13584124436539982, |
| "learning_rate": 5e-05, |
| "loss": 1.6154, |
| "step": 1435 |
| }, |
| { |
| "epoch": 1.448633934872467, |
| "grad_norm": 0.13671329188776624, |
| "learning_rate": 5e-05, |
| "loss": 1.6231, |
| "step": 1436 |
| }, |
| { |
| "epoch": 1.4496421010182479, |
| "grad_norm": 0.13666691741442263, |
| "learning_rate": 5e-05, |
| "loss": 1.6311, |
| "step": 1437 |
| }, |
| { |
| "epoch": 1.4506502671640287, |
| "grad_norm": 0.13064295993930966, |
| "learning_rate": 5e-05, |
| "loss": 1.6414, |
| "step": 1438 |
| }, |
| { |
| "epoch": 1.4516584333098095, |
| "grad_norm": 0.1443609737427036, |
| "learning_rate": 5e-05, |
| "loss": 1.639, |
| "step": 1439 |
| }, |
| { |
| "epoch": 1.4526665994555903, |
| "grad_norm": 0.1263187296916393, |
| "learning_rate": 5e-05, |
| "loss": 1.6262, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.453674765601371, |
| "grad_norm": 0.1404759867837024, |
| "learning_rate": 5e-05, |
| "loss": 1.6179, |
| "step": 1441 |
| }, |
| { |
| "epoch": 1.454682931747152, |
| "grad_norm": 0.13926923655101497, |
| "learning_rate": 5e-05, |
| "loss": 1.6274, |
| "step": 1442 |
| }, |
| { |
| "epoch": 1.4556910978929327, |
| "grad_norm": 0.14531337277887243, |
| "learning_rate": 5e-05, |
| "loss": 1.6167, |
| "step": 1443 |
| }, |
| { |
| "epoch": 1.4566992640387135, |
| "grad_norm": 0.14131640008793925, |
| "learning_rate": 5e-05, |
| "loss": 1.626, |
| "step": 1444 |
| }, |
| { |
| "epoch": 1.4577074301844943, |
| "grad_norm": 0.13671686790875545, |
| "learning_rate": 5e-05, |
| "loss": 1.6365, |
| "step": 1445 |
| }, |
| { |
| "epoch": 1.4587155963302751, |
| "grad_norm": 0.15206688858491618, |
| "learning_rate": 5e-05, |
| "loss": 1.6255, |
| "step": 1446 |
| }, |
| { |
| "epoch": 1.459723762476056, |
| "grad_norm": 0.13153412143553034, |
| "learning_rate": 5e-05, |
| "loss": 1.6154, |
| "step": 1447 |
| }, |
| { |
| "epoch": 1.4607319286218368, |
| "grad_norm": 0.14583017658822006, |
| "learning_rate": 5e-05, |
| "loss": 1.6436, |
| "step": 1448 |
| }, |
| { |
| "epoch": 1.4617400947676176, |
| "grad_norm": 0.13210222622246628, |
| "learning_rate": 5e-05, |
| "loss": 1.6285, |
| "step": 1449 |
| }, |
| { |
| "epoch": 1.4627482609133986, |
| "grad_norm": 0.1377997855343899, |
| "learning_rate": 5e-05, |
| "loss": 1.6456, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.4637564270591794, |
| "grad_norm": 0.15205802244172517, |
| "learning_rate": 5e-05, |
| "loss": 1.6382, |
| "step": 1451 |
| }, |
| { |
| "epoch": 1.4647645932049602, |
| "grad_norm": 0.12713100923946807, |
| "learning_rate": 5e-05, |
| "loss": 1.6229, |
| "step": 1452 |
| }, |
| { |
| "epoch": 1.465772759350741, |
| "grad_norm": 0.14136573990700596, |
| "learning_rate": 5e-05, |
| "loss": 1.6351, |
| "step": 1453 |
| }, |
| { |
| "epoch": 1.4667809254965218, |
| "grad_norm": 0.13839984348763215, |
| "learning_rate": 5e-05, |
| "loss": 1.6235, |
| "step": 1454 |
| }, |
| { |
| "epoch": 1.4677890916423026, |
| "grad_norm": 0.13224270697703103, |
| "learning_rate": 5e-05, |
| "loss": 1.6127, |
| "step": 1455 |
| }, |
| { |
| "epoch": 1.4687972577880835, |
| "grad_norm": 0.13995029322894012, |
| "learning_rate": 5e-05, |
| "loss": 1.6212, |
| "step": 1456 |
| }, |
| { |
| "epoch": 1.4698054239338643, |
| "grad_norm": 0.1343268792694107, |
| "learning_rate": 5e-05, |
| "loss": 1.6363, |
| "step": 1457 |
| }, |
| { |
| "epoch": 1.470813590079645, |
| "grad_norm": 0.13654059463569093, |
| "learning_rate": 5e-05, |
| "loss": 1.633, |
| "step": 1458 |
| }, |
| { |
| "epoch": 1.4718217562254259, |
| "grad_norm": 0.13691595619242897, |
| "learning_rate": 5e-05, |
| "loss": 1.6474, |
| "step": 1459 |
| }, |
| { |
| "epoch": 1.472829922371207, |
| "grad_norm": 0.1408875473463007, |
| "learning_rate": 5e-05, |
| "loss": 1.63, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.4738380885169877, |
| "grad_norm": 0.14665577417113682, |
| "learning_rate": 5e-05, |
| "loss": 1.6092, |
| "step": 1461 |
| }, |
| { |
| "epoch": 1.4748462546627685, |
| "grad_norm": 0.13392872202872624, |
| "learning_rate": 5e-05, |
| "loss": 1.6277, |
| "step": 1462 |
| }, |
| { |
| "epoch": 1.4758544208085493, |
| "grad_norm": 0.13318288146628018, |
| "learning_rate": 5e-05, |
| "loss": 1.6309, |
| "step": 1463 |
| }, |
| { |
| "epoch": 1.4768625869543301, |
| "grad_norm": 0.14756630254079095, |
| "learning_rate": 5e-05, |
| "loss": 1.62, |
| "step": 1464 |
| }, |
| { |
| "epoch": 1.477870753100111, |
| "grad_norm": 0.14478366008638208, |
| "learning_rate": 5e-05, |
| "loss": 1.6137, |
| "step": 1465 |
| }, |
| { |
| "epoch": 1.4788789192458918, |
| "grad_norm": 0.1387406186352145, |
| "learning_rate": 5e-05, |
| "loss": 1.6236, |
| "step": 1466 |
| }, |
| { |
| "epoch": 1.4798870853916726, |
| "grad_norm": 0.14007315709845755, |
| "learning_rate": 5e-05, |
| "loss": 1.6437, |
| "step": 1467 |
| }, |
| { |
| "epoch": 1.4808952515374534, |
| "grad_norm": 0.14807940769005204, |
| "learning_rate": 5e-05, |
| "loss": 1.6185, |
| "step": 1468 |
| }, |
| { |
| "epoch": 1.4819034176832342, |
| "grad_norm": 0.13015841439441841, |
| "learning_rate": 5e-05, |
| "loss": 1.64, |
| "step": 1469 |
| }, |
| { |
| "epoch": 1.482911583829015, |
| "grad_norm": 0.13423951958450828, |
| "learning_rate": 5e-05, |
| "loss": 1.6196, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.4839197499747958, |
| "grad_norm": 0.15393810166949654, |
| "learning_rate": 5e-05, |
| "loss": 1.6009, |
| "step": 1471 |
| }, |
| { |
| "epoch": 1.4849279161205766, |
| "grad_norm": 0.13589415860343146, |
| "learning_rate": 5e-05, |
| "loss": 1.6219, |
| "step": 1472 |
| }, |
| { |
| "epoch": 1.4859360822663574, |
| "grad_norm": 0.14671625526864973, |
| "learning_rate": 5e-05, |
| "loss": 1.6211, |
| "step": 1473 |
| }, |
| { |
| "epoch": 1.4869442484121382, |
| "grad_norm": 0.1353917088551739, |
| "learning_rate": 5e-05, |
| "loss": 1.6267, |
| "step": 1474 |
| }, |
| { |
| "epoch": 1.487952414557919, |
| "grad_norm": 0.14107952075322783, |
| "learning_rate": 5e-05, |
| "loss": 1.6234, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.4889605807036999, |
| "grad_norm": 0.13491768117614777, |
| "learning_rate": 5e-05, |
| "loss": 1.619, |
| "step": 1476 |
| }, |
| { |
| "epoch": 1.4899687468494807, |
| "grad_norm": 0.19622895258627024, |
| "learning_rate": 5e-05, |
| "loss": 1.6411, |
| "step": 1477 |
| }, |
| { |
| "epoch": 1.4909769129952617, |
| "grad_norm": 0.13347997431566885, |
| "learning_rate": 5e-05, |
| "loss": 1.6306, |
| "step": 1478 |
| }, |
| { |
| "epoch": 1.4919850791410425, |
| "grad_norm": 0.1416800687994707, |
| "learning_rate": 5e-05, |
| "loss": 1.6341, |
| "step": 1479 |
| }, |
| { |
| "epoch": 1.4929932452868233, |
| "grad_norm": 0.13591874359876954, |
| "learning_rate": 5e-05, |
| "loss": 1.6441, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.4940014114326041, |
| "grad_norm": 0.1408069374827294, |
| "learning_rate": 5e-05, |
| "loss": 1.621, |
| "step": 1481 |
| }, |
| { |
| "epoch": 1.495009577578385, |
| "grad_norm": 0.14029513798691906, |
| "learning_rate": 5e-05, |
| "loss": 1.6326, |
| "step": 1482 |
| }, |
| { |
| "epoch": 1.4960177437241657, |
| "grad_norm": 0.15447150259956755, |
| "learning_rate": 5e-05, |
| "loss": 1.6438, |
| "step": 1483 |
| }, |
| { |
| "epoch": 1.4970259098699465, |
| "grad_norm": 0.15816124844116827, |
| "learning_rate": 5e-05, |
| "loss": 1.6183, |
| "step": 1484 |
| }, |
| { |
| "epoch": 1.4980340760157274, |
| "grad_norm": 0.12696159571269544, |
| "learning_rate": 5e-05, |
| "loss": 1.6361, |
| "step": 1485 |
| }, |
| { |
| "epoch": 1.4990422421615082, |
| "grad_norm": 0.13687446317253593, |
| "learning_rate": 5e-05, |
| "loss": 1.6362, |
| "step": 1486 |
| }, |
| { |
| "epoch": 1.5000504083072892, |
| "grad_norm": 0.19177985637700964, |
| "learning_rate": 5e-05, |
| "loss": 1.6092, |
| "step": 1487 |
| }, |
| { |
| "epoch": 1.50105857445307, |
| "grad_norm": 0.13815553297856198, |
| "learning_rate": 5e-05, |
| "loss": 1.6483, |
| "step": 1488 |
| }, |
| { |
| "epoch": 1.5020667405988508, |
| "grad_norm": 0.12867409727512602, |
| "learning_rate": 5e-05, |
| "loss": 1.6232, |
| "step": 1489 |
| }, |
| { |
| "epoch": 1.5030749067446316, |
| "grad_norm": 0.13876479312318743, |
| "learning_rate": 5e-05, |
| "loss": 1.6283, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.5040830728904124, |
| "grad_norm": 0.1380968512865023, |
| "learning_rate": 5e-05, |
| "loss": 1.6088, |
| "step": 1491 |
| }, |
| { |
| "epoch": 1.5050912390361932, |
| "grad_norm": 0.1387886106002393, |
| "learning_rate": 5e-05, |
| "loss": 1.6195, |
| "step": 1492 |
| }, |
| { |
| "epoch": 1.506099405181974, |
| "grad_norm": 0.13155001387555398, |
| "learning_rate": 5e-05, |
| "loss": 1.6282, |
| "step": 1493 |
| }, |
| { |
| "epoch": 1.5071075713277549, |
| "grad_norm": 0.14474568242324626, |
| "learning_rate": 5e-05, |
| "loss": 1.6174, |
| "step": 1494 |
| }, |
| { |
| "epoch": 1.5081157374735357, |
| "grad_norm": 0.1328261700337089, |
| "learning_rate": 5e-05, |
| "loss": 1.6371, |
| "step": 1495 |
| }, |
| { |
| "epoch": 1.5091239036193165, |
| "grad_norm": 0.13791902147936075, |
| "learning_rate": 5e-05, |
| "loss": 1.635, |
| "step": 1496 |
| }, |
| { |
| "epoch": 1.5101320697650973, |
| "grad_norm": 0.13810690850494428, |
| "learning_rate": 5e-05, |
| "loss": 1.6131, |
| "step": 1497 |
| }, |
| { |
| "epoch": 1.511140235910878, |
| "grad_norm": 0.13120526016158554, |
| "learning_rate": 5e-05, |
| "loss": 1.6164, |
| "step": 1498 |
| }, |
| { |
| "epoch": 1.512148402056659, |
| "grad_norm": 0.1287974235018955, |
| "learning_rate": 5e-05, |
| "loss": 1.6268, |
| "step": 1499 |
| }, |
| { |
| "epoch": 1.5131565682024397, |
| "grad_norm": 0.1378623308637562, |
| "learning_rate": 5e-05, |
| "loss": 1.6315, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.5141647343482205, |
| "grad_norm": 0.15571650444651786, |
| "learning_rate": 5e-05, |
| "loss": 1.6158, |
| "step": 1501 |
| }, |
| { |
| "epoch": 1.5151729004940013, |
| "grad_norm": 0.15196148508385474, |
| "learning_rate": 5e-05, |
| "loss": 1.6127, |
| "step": 1502 |
| }, |
| { |
| "epoch": 1.5161810666397821, |
| "grad_norm": 0.1291006666802701, |
| "learning_rate": 5e-05, |
| "loss": 1.6228, |
| "step": 1503 |
| }, |
| { |
| "epoch": 1.517189232785563, |
| "grad_norm": 0.1288597453694645, |
| "learning_rate": 5e-05, |
| "loss": 1.6281, |
| "step": 1504 |
| }, |
| { |
| "epoch": 1.5181973989313438, |
| "grad_norm": 0.13822665412671015, |
| "learning_rate": 5e-05, |
| "loss": 1.627, |
| "step": 1505 |
| }, |
| { |
| "epoch": 1.5192055650771246, |
| "grad_norm": 0.22934071247722015, |
| "learning_rate": 5e-05, |
| "loss": 1.64, |
| "step": 1506 |
| }, |
| { |
| "epoch": 1.5202137312229054, |
| "grad_norm": 0.1336341986385085, |
| "learning_rate": 5e-05, |
| "loss": 1.6038, |
| "step": 1507 |
| }, |
| { |
| "epoch": 1.5212218973686864, |
| "grad_norm": 0.14920723438783143, |
| "learning_rate": 5e-05, |
| "loss": 1.6242, |
| "step": 1508 |
| }, |
| { |
| "epoch": 1.5222300635144672, |
| "grad_norm": 0.1377984398583919, |
| "learning_rate": 5e-05, |
| "loss": 1.6109, |
| "step": 1509 |
| }, |
| { |
| "epoch": 1.523238229660248, |
| "grad_norm": 0.14474709932915958, |
| "learning_rate": 5e-05, |
| "loss": 1.6369, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.5242463958060288, |
| "grad_norm": 0.14597802367199372, |
| "learning_rate": 5e-05, |
| "loss": 1.6076, |
| "step": 1511 |
| }, |
| { |
| "epoch": 1.5252545619518096, |
| "grad_norm": 0.1835711819944948, |
| "learning_rate": 5e-05, |
| "loss": 1.6065, |
| "step": 1512 |
| }, |
| { |
| "epoch": 1.5262627280975904, |
| "grad_norm": 0.13796861080831385, |
| "learning_rate": 5e-05, |
| "loss": 1.6382, |
| "step": 1513 |
| }, |
| { |
| "epoch": 1.5272708942433713, |
| "grad_norm": 0.13180750342058548, |
| "learning_rate": 5e-05, |
| "loss": 1.6229, |
| "step": 1514 |
| }, |
| { |
| "epoch": 1.5282790603891523, |
| "grad_norm": 0.13830723804472403, |
| "learning_rate": 5e-05, |
| "loss": 1.6165, |
| "step": 1515 |
| }, |
| { |
| "epoch": 1.529287226534933, |
| "grad_norm": 0.14674607489303773, |
| "learning_rate": 5e-05, |
| "loss": 1.6209, |
| "step": 1516 |
| }, |
| { |
| "epoch": 1.530295392680714, |
| "grad_norm": 0.13829861312181452, |
| "learning_rate": 5e-05, |
| "loss": 1.6404, |
| "step": 1517 |
| }, |
| { |
| "epoch": 1.5313035588264947, |
| "grad_norm": 0.13531408327464506, |
| "learning_rate": 5e-05, |
| "loss": 1.6321, |
| "step": 1518 |
| }, |
| { |
| "epoch": 1.5323117249722755, |
| "grad_norm": 0.12793478847755976, |
| "learning_rate": 5e-05, |
| "loss": 1.6109, |
| "step": 1519 |
| }, |
| { |
| "epoch": 1.5333198911180563, |
| "grad_norm": 0.1366784633044243, |
| "learning_rate": 5e-05, |
| "loss": 1.6404, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.5343280572638371, |
| "grad_norm": 0.1283164088827875, |
| "learning_rate": 5e-05, |
| "loss": 1.6128, |
| "step": 1521 |
| }, |
| { |
| "epoch": 1.535336223409618, |
| "grad_norm": 0.1292907360896931, |
| "learning_rate": 5e-05, |
| "loss": 1.6265, |
| "step": 1522 |
| }, |
| { |
| "epoch": 1.5363443895553988, |
| "grad_norm": 0.12656329603863192, |
| "learning_rate": 5e-05, |
| "loss": 1.6251, |
| "step": 1523 |
| }, |
| { |
| "epoch": 1.5373525557011796, |
| "grad_norm": 0.1316052445071765, |
| "learning_rate": 5e-05, |
| "loss": 1.609, |
| "step": 1524 |
| }, |
| { |
| "epoch": 1.5383607218469604, |
| "grad_norm": 0.14252214471740876, |
| "learning_rate": 5e-05, |
| "loss": 1.6338, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.5393688879927412, |
| "grad_norm": 0.14870741117949973, |
| "learning_rate": 5e-05, |
| "loss": 1.6356, |
| "step": 1526 |
| }, |
| { |
| "epoch": 1.540377054138522, |
| "grad_norm": 0.13577012776225475, |
| "learning_rate": 5e-05, |
| "loss": 1.6212, |
| "step": 1527 |
| }, |
| { |
| "epoch": 1.5413852202843028, |
| "grad_norm": 0.17114483738951686, |
| "learning_rate": 5e-05, |
| "loss": 1.6024, |
| "step": 1528 |
| }, |
| { |
| "epoch": 1.5423933864300836, |
| "grad_norm": 0.14737308412560612, |
| "learning_rate": 5e-05, |
| "loss": 1.6286, |
| "step": 1529 |
| }, |
| { |
| "epoch": 1.5434015525758644, |
| "grad_norm": 0.1295264825941411, |
| "learning_rate": 5e-05, |
| "loss": 1.6222, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.5444097187216452, |
| "grad_norm": 0.14018320799858966, |
| "learning_rate": 5e-05, |
| "loss": 1.6295, |
| "step": 1531 |
| }, |
| { |
| "epoch": 1.545417884867426, |
| "grad_norm": 0.14609731368477788, |
| "learning_rate": 5e-05, |
| "loss": 1.6143, |
| "step": 1532 |
| }, |
| { |
| "epoch": 1.5464260510132068, |
| "grad_norm": 0.14330539697029224, |
| "learning_rate": 5e-05, |
| "loss": 1.6307, |
| "step": 1533 |
| }, |
| { |
| "epoch": 1.5474342171589877, |
| "grad_norm": 0.15111461104455878, |
| "learning_rate": 5e-05, |
| "loss": 1.6433, |
| "step": 1534 |
| }, |
| { |
| "epoch": 1.5484423833047685, |
| "grad_norm": 0.15868830823996302, |
| "learning_rate": 5e-05, |
| "loss": 1.6026, |
| "step": 1535 |
| }, |
| { |
| "epoch": 1.5494505494505495, |
| "grad_norm": 0.13936815230644864, |
| "learning_rate": 5e-05, |
| "loss": 1.6121, |
| "step": 1536 |
| }, |
| { |
| "epoch": 1.5504587155963303, |
| "grad_norm": 0.15699741936829822, |
| "learning_rate": 5e-05, |
| "loss": 1.6285, |
| "step": 1537 |
| }, |
| { |
| "epoch": 1.5514668817421111, |
| "grad_norm": 0.13811556094647703, |
| "learning_rate": 5e-05, |
| "loss": 1.621, |
| "step": 1538 |
| }, |
| { |
| "epoch": 1.552475047887892, |
| "grad_norm": 0.15020339457186493, |
| "learning_rate": 5e-05, |
| "loss": 1.6175, |
| "step": 1539 |
| }, |
| { |
| "epoch": 1.5534832140336727, |
| "grad_norm": 0.1369692638804312, |
| "learning_rate": 5e-05, |
| "loss": 1.6112, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.5544913801794535, |
| "grad_norm": 0.13858737929742926, |
| "learning_rate": 5e-05, |
| "loss": 1.6377, |
| "step": 1541 |
| }, |
| { |
| "epoch": 1.5554995463252344, |
| "grad_norm": 0.16480903243687473, |
| "learning_rate": 5e-05, |
| "loss": 1.6418, |
| "step": 1542 |
| }, |
| { |
| "epoch": 1.5565077124710154, |
| "grad_norm": 0.14958634326370873, |
| "learning_rate": 5e-05, |
| "loss": 1.6139, |
| "step": 1543 |
| }, |
| { |
| "epoch": 1.5575158786167962, |
| "grad_norm": 0.15298580974284656, |
| "learning_rate": 5e-05, |
| "loss": 1.6128, |
| "step": 1544 |
| }, |
| { |
| "epoch": 1.558524044762577, |
| "grad_norm": 0.13583289639406054, |
| "learning_rate": 5e-05, |
| "loss": 1.6103, |
| "step": 1545 |
| }, |
| { |
| "epoch": 1.5595322109083578, |
| "grad_norm": 0.1503120470346654, |
| "learning_rate": 5e-05, |
| "loss": 1.6166, |
| "step": 1546 |
| }, |
| { |
| "epoch": 1.5605403770541386, |
| "grad_norm": 0.1383639822102947, |
| "learning_rate": 5e-05, |
| "loss": 1.6147, |
| "step": 1547 |
| }, |
| { |
| "epoch": 1.5615485431999194, |
| "grad_norm": 0.14685023146454493, |
| "learning_rate": 5e-05, |
| "loss": 1.6219, |
| "step": 1548 |
| }, |
| { |
| "epoch": 1.5625567093457002, |
| "grad_norm": 0.1873494126741415, |
| "learning_rate": 5e-05, |
| "loss": 1.6167, |
| "step": 1549 |
| }, |
| { |
| "epoch": 1.563564875491481, |
| "grad_norm": 0.13611446482024203, |
| "learning_rate": 5e-05, |
| "loss": 1.6246, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.5645730416372619, |
| "grad_norm": 0.15741117634697316, |
| "learning_rate": 5e-05, |
| "loss": 1.619, |
| "step": 1551 |
| }, |
| { |
| "epoch": 1.5655812077830427, |
| "grad_norm": 0.147224170977167, |
| "learning_rate": 5e-05, |
| "loss": 1.6095, |
| "step": 1552 |
| }, |
| { |
| "epoch": 1.5665893739288235, |
| "grad_norm": 0.1577262032399655, |
| "learning_rate": 5e-05, |
| "loss": 1.6179, |
| "step": 1553 |
| }, |
| { |
| "epoch": 1.5675975400746043, |
| "grad_norm": 0.14858674477219846, |
| "learning_rate": 5e-05, |
| "loss": 1.6023, |
| "step": 1554 |
| }, |
| { |
| "epoch": 1.568605706220385, |
| "grad_norm": 0.13688153875311357, |
| "learning_rate": 5e-05, |
| "loss": 1.6226, |
| "step": 1555 |
| }, |
| { |
| "epoch": 1.569613872366166, |
| "grad_norm": 0.15426478611617334, |
| "learning_rate": 5e-05, |
| "loss": 1.6111, |
| "step": 1556 |
| }, |
| { |
| "epoch": 1.5706220385119467, |
| "grad_norm": 0.13637312594724105, |
| "learning_rate": 5e-05, |
| "loss": 1.6286, |
| "step": 1557 |
| }, |
| { |
| "epoch": 1.5716302046577275, |
| "grad_norm": 0.1668337456049804, |
| "learning_rate": 5e-05, |
| "loss": 1.6216, |
| "step": 1558 |
| }, |
| { |
| "epoch": 1.5726383708035083, |
| "grad_norm": 0.13939660345064128, |
| "learning_rate": 5e-05, |
| "loss": 1.6185, |
| "step": 1559 |
| }, |
| { |
| "epoch": 1.5736465369492891, |
| "grad_norm": 0.5399345687431912, |
| "learning_rate": 5e-05, |
| "loss": 1.6361, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.57465470309507, |
| "grad_norm": 0.1521584080169154, |
| "learning_rate": 5e-05, |
| "loss": 1.6053, |
| "step": 1561 |
| }, |
| { |
| "epoch": 1.5756628692408507, |
| "grad_norm": 0.13935279315517085, |
| "learning_rate": 5e-05, |
| "loss": 1.6247, |
| "step": 1562 |
| }, |
| { |
| "epoch": 1.5766710353866316, |
| "grad_norm": 0.13475960826393785, |
| "learning_rate": 5e-05, |
| "loss": 1.6336, |
| "step": 1563 |
| }, |
| { |
| "epoch": 1.5776792015324126, |
| "grad_norm": 0.13736336068453353, |
| "learning_rate": 5e-05, |
| "loss": 1.6266, |
| "step": 1564 |
| }, |
| { |
| "epoch": 1.5786873676781934, |
| "grad_norm": 0.14005434359348037, |
| "learning_rate": 5e-05, |
| "loss": 1.6098, |
| "step": 1565 |
| }, |
| { |
| "epoch": 1.5796955338239742, |
| "grad_norm": 0.1409911202784141, |
| "learning_rate": 5e-05, |
| "loss": 1.6524, |
| "step": 1566 |
| }, |
| { |
| "epoch": 1.580703699969755, |
| "grad_norm": 0.14734554570710956, |
| "learning_rate": 5e-05, |
| "loss": 1.6165, |
| "step": 1567 |
| }, |
| { |
| "epoch": 1.5817118661155358, |
| "grad_norm": 0.13213475979971628, |
| "learning_rate": 5e-05, |
| "loss": 1.6408, |
| "step": 1568 |
| }, |
| { |
| "epoch": 1.5827200322613166, |
| "grad_norm": 0.14975068515074358, |
| "learning_rate": 5e-05, |
| "loss": 1.6084, |
| "step": 1569 |
| }, |
| { |
| "epoch": 1.5837281984070974, |
| "grad_norm": 0.6523063563005193, |
| "learning_rate": 5e-05, |
| "loss": 1.6404, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.5847363645528785, |
| "grad_norm": 0.14186737586646578, |
| "learning_rate": 5e-05, |
| "loss": 1.6075, |
| "step": 1571 |
| }, |
| { |
| "epoch": 1.5857445306986593, |
| "grad_norm": 0.30669830762249223, |
| "learning_rate": 5e-05, |
| "loss": 1.6185, |
| "step": 1572 |
| }, |
| { |
| "epoch": 1.58675269684444, |
| "grad_norm": 0.1390697291644904, |
| "learning_rate": 5e-05, |
| "loss": 1.6216, |
| "step": 1573 |
| }, |
| { |
| "epoch": 1.587760862990221, |
| "grad_norm": 0.1345479360720834, |
| "learning_rate": 5e-05, |
| "loss": 1.6267, |
| "step": 1574 |
| }, |
| { |
| "epoch": 1.5887690291360017, |
| "grad_norm": 0.1335398629343587, |
| "learning_rate": 5e-05, |
| "loss": 1.6126, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.5897771952817825, |
| "grad_norm": 0.47436425719803493, |
| "learning_rate": 5e-05, |
| "loss": 1.6169, |
| "step": 1576 |
| }, |
| { |
| "epoch": 1.5907853614275633, |
| "grad_norm": 0.14245577008322435, |
| "learning_rate": 5e-05, |
| "loss": 1.6284, |
| "step": 1577 |
| }, |
| { |
| "epoch": 1.5917935275733441, |
| "grad_norm": 0.14938912417150504, |
| "learning_rate": 5e-05, |
| "loss": 1.6299, |
| "step": 1578 |
| }, |
| { |
| "epoch": 1.592801693719125, |
| "grad_norm": 0.14006970763748072, |
| "learning_rate": 5e-05, |
| "loss": 1.6089, |
| "step": 1579 |
| }, |
| { |
| "epoch": 1.5938098598649058, |
| "grad_norm": 0.15149158584758154, |
| "learning_rate": 5e-05, |
| "loss": 1.608, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.5948180260106866, |
| "grad_norm": 0.13996334020731915, |
| "learning_rate": 5e-05, |
| "loss": 1.6207, |
| "step": 1581 |
| }, |
| { |
| "epoch": 1.5958261921564674, |
| "grad_norm": 0.14878426743491646, |
| "learning_rate": 5e-05, |
| "loss": 1.6129, |
| "step": 1582 |
| }, |
| { |
| "epoch": 1.5968343583022482, |
| "grad_norm": 0.13983441814569922, |
| "learning_rate": 5e-05, |
| "loss": 1.6256, |
| "step": 1583 |
| }, |
| { |
| "epoch": 1.597842524448029, |
| "grad_norm": 0.14514902618278047, |
| "learning_rate": 5e-05, |
| "loss": 1.5978, |
| "step": 1584 |
| }, |
| { |
| "epoch": 1.5988506905938098, |
| "grad_norm": 0.144016085291176, |
| "learning_rate": 5e-05, |
| "loss": 1.6025, |
| "step": 1585 |
| }, |
| { |
| "epoch": 1.5998588567395906, |
| "grad_norm": 0.14685717799417497, |
| "learning_rate": 5e-05, |
| "loss": 1.6203, |
| "step": 1586 |
| }, |
| { |
| "epoch": 1.6008670228853714, |
| "grad_norm": 0.14838433929728054, |
| "learning_rate": 5e-05, |
| "loss": 1.6076, |
| "step": 1587 |
| }, |
| { |
| "epoch": 1.6018751890311522, |
| "grad_norm": 0.13310027592461138, |
| "learning_rate": 5e-05, |
| "loss": 1.6222, |
| "step": 1588 |
| }, |
| { |
| "epoch": 1.602883355176933, |
| "grad_norm": 0.14697520402464495, |
| "learning_rate": 5e-05, |
| "loss": 1.6227, |
| "step": 1589 |
| }, |
| { |
| "epoch": 1.6038915213227138, |
| "grad_norm": 0.134026752065162, |
| "learning_rate": 5e-05, |
| "loss": 1.6324, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.6048996874684947, |
| "grad_norm": 0.14411651340829987, |
| "learning_rate": 5e-05, |
| "loss": 1.6212, |
| "step": 1591 |
| }, |
| { |
| "epoch": 1.6059078536142757, |
| "grad_norm": 0.14008440924202942, |
| "learning_rate": 5e-05, |
| "loss": 1.6208, |
| "step": 1592 |
| }, |
| { |
| "epoch": 1.6069160197600565, |
| "grad_norm": 0.14065237992088672, |
| "learning_rate": 5e-05, |
| "loss": 1.6188, |
| "step": 1593 |
| }, |
| { |
| "epoch": 1.6079241859058373, |
| "grad_norm": 0.13042442625793854, |
| "learning_rate": 5e-05, |
| "loss": 1.63, |
| "step": 1594 |
| }, |
| { |
| "epoch": 1.608932352051618, |
| "grad_norm": 0.1550178866652208, |
| "learning_rate": 5e-05, |
| "loss": 1.6094, |
| "step": 1595 |
| }, |
| { |
| "epoch": 1.609940518197399, |
| "grad_norm": 0.13287478123944418, |
| "learning_rate": 5e-05, |
| "loss": 1.6104, |
| "step": 1596 |
| }, |
| { |
| "epoch": 1.6109486843431797, |
| "grad_norm": 0.1390379550303179, |
| "learning_rate": 5e-05, |
| "loss": 1.6173, |
| "step": 1597 |
| }, |
| { |
| "epoch": 1.6119568504889605, |
| "grad_norm": 0.14050846371572615, |
| "learning_rate": 5e-05, |
| "loss": 1.6283, |
| "step": 1598 |
| }, |
| { |
| "epoch": 1.6129650166347416, |
| "grad_norm": 0.12452266302553723, |
| "learning_rate": 5e-05, |
| "loss": 1.6104, |
| "step": 1599 |
| }, |
| { |
| "epoch": 1.6139731827805224, |
| "grad_norm": 0.14720611562795238, |
| "learning_rate": 5e-05, |
| "loss": 1.6209, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.6149813489263032, |
| "grad_norm": 0.13214543696834904, |
| "learning_rate": 5e-05, |
| "loss": 1.6249, |
| "step": 1601 |
| }, |
| { |
| "epoch": 1.615989515072084, |
| "grad_norm": 0.13514507336848264, |
| "learning_rate": 5e-05, |
| "loss": 1.6063, |
| "step": 1602 |
| }, |
| { |
| "epoch": 1.6169976812178648, |
| "grad_norm": 0.13479055340954935, |
| "learning_rate": 5e-05, |
| "loss": 1.637, |
| "step": 1603 |
| }, |
| { |
| "epoch": 1.6180058473636456, |
| "grad_norm": 0.13861806828091428, |
| "learning_rate": 5e-05, |
| "loss": 1.6089, |
| "step": 1604 |
| }, |
| { |
| "epoch": 1.6190140135094264, |
| "grad_norm": 0.14778093009386095, |
| "learning_rate": 5e-05, |
| "loss": 1.618, |
| "step": 1605 |
| }, |
| { |
| "epoch": 1.6200221796552072, |
| "grad_norm": 0.14198742984621807, |
| "learning_rate": 5e-05, |
| "loss": 1.6253, |
| "step": 1606 |
| }, |
| { |
| "epoch": 1.621030345800988, |
| "grad_norm": 0.13665966814805347, |
| "learning_rate": 5e-05, |
| "loss": 1.6155, |
| "step": 1607 |
| }, |
| { |
| "epoch": 1.6220385119467688, |
| "grad_norm": 0.1420508367016213, |
| "learning_rate": 5e-05, |
| "loss": 1.6206, |
| "step": 1608 |
| }, |
| { |
| "epoch": 1.6230466780925497, |
| "grad_norm": 0.14238222987888366, |
| "learning_rate": 5e-05, |
| "loss": 1.6232, |
| "step": 1609 |
| }, |
| { |
| "epoch": 1.6240548442383305, |
| "grad_norm": 0.1379466487700634, |
| "learning_rate": 5e-05, |
| "loss": 1.6015, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.6250630103841113, |
| "grad_norm": 0.15179244846068082, |
| "learning_rate": 5e-05, |
| "loss": 1.6232, |
| "step": 1611 |
| }, |
| { |
| "epoch": 1.626071176529892, |
| "grad_norm": 0.13271666919390243, |
| "learning_rate": 5e-05, |
| "loss": 1.6165, |
| "step": 1612 |
| }, |
| { |
| "epoch": 1.627079342675673, |
| "grad_norm": 0.14790413368011668, |
| "learning_rate": 5e-05, |
| "loss": 1.6188, |
| "step": 1613 |
| }, |
| { |
| "epoch": 1.6280875088214537, |
| "grad_norm": 0.14767363930567415, |
| "learning_rate": 5e-05, |
| "loss": 1.6158, |
| "step": 1614 |
| }, |
| { |
| "epoch": 1.6290956749672345, |
| "grad_norm": 0.13613544304024167, |
| "learning_rate": 5e-05, |
| "loss": 1.6316, |
| "step": 1615 |
| }, |
| { |
| "epoch": 1.6301038411130153, |
| "grad_norm": 0.1425243634985558, |
| "learning_rate": 5e-05, |
| "loss": 1.6186, |
| "step": 1616 |
| }, |
| { |
| "epoch": 1.6311120072587961, |
| "grad_norm": 0.13375330006671063, |
| "learning_rate": 5e-05, |
| "loss": 1.617, |
| "step": 1617 |
| }, |
| { |
| "epoch": 1.632120173404577, |
| "grad_norm": 0.22112118602632483, |
| "learning_rate": 5e-05, |
| "loss": 1.6065, |
| "step": 1618 |
| }, |
| { |
| "epoch": 1.6331283395503577, |
| "grad_norm": 0.13804098585417388, |
| "learning_rate": 5e-05, |
| "loss": 1.6121, |
| "step": 1619 |
| }, |
| { |
| "epoch": 1.6341365056961388, |
| "grad_norm": 0.14314107543990023, |
| "learning_rate": 5e-05, |
| "loss": 1.6357, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.6351446718419196, |
| "grad_norm": 0.13844475251859784, |
| "learning_rate": 5e-05, |
| "loss": 1.6261, |
| "step": 1621 |
| }, |
| { |
| "epoch": 1.6361528379877004, |
| "grad_norm": 0.14077933123926825, |
| "learning_rate": 5e-05, |
| "loss": 1.617, |
| "step": 1622 |
| }, |
| { |
| "epoch": 1.6371610041334812, |
| "grad_norm": 0.13847095038208737, |
| "learning_rate": 5e-05, |
| "loss": 1.6096, |
| "step": 1623 |
| }, |
| { |
| "epoch": 1.638169170279262, |
| "grad_norm": 0.14235824859571994, |
| "learning_rate": 5e-05, |
| "loss": 1.6336, |
| "step": 1624 |
| }, |
| { |
| "epoch": 1.6391773364250428, |
| "grad_norm": 0.145577748481187, |
| "learning_rate": 5e-05, |
| "loss": 1.6253, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.6401855025708236, |
| "grad_norm": 0.15603712171954626, |
| "learning_rate": 5e-05, |
| "loss": 1.6177, |
| "step": 1626 |
| }, |
| { |
| "epoch": 1.6411936687166047, |
| "grad_norm": 0.13651075788038194, |
| "learning_rate": 5e-05, |
| "loss": 1.6221, |
| "step": 1627 |
| }, |
| { |
| "epoch": 1.6422018348623855, |
| "grad_norm": 0.1380322696697574, |
| "learning_rate": 5e-05, |
| "loss": 1.623, |
| "step": 1628 |
| }, |
| { |
| "epoch": 1.6432100010081663, |
| "grad_norm": 0.14918368429745976, |
| "learning_rate": 5e-05, |
| "loss": 1.624, |
| "step": 1629 |
| }, |
| { |
| "epoch": 1.644218167153947, |
| "grad_norm": 0.11732167310285535, |
| "learning_rate": 5e-05, |
| "loss": 1.5998, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.645226333299728, |
| "grad_norm": 0.14002340488796944, |
| "learning_rate": 5e-05, |
| "loss": 1.6209, |
| "step": 1631 |
| }, |
| { |
| "epoch": 1.6462344994455087, |
| "grad_norm": 0.13742206757706413, |
| "learning_rate": 5e-05, |
| "loss": 1.6137, |
| "step": 1632 |
| }, |
| { |
| "epoch": 1.6472426655912895, |
| "grad_norm": 0.13878053780251967, |
| "learning_rate": 5e-05, |
| "loss": 1.6176, |
| "step": 1633 |
| }, |
| { |
| "epoch": 1.6482508317370703, |
| "grad_norm": 0.13538113449588376, |
| "learning_rate": 5e-05, |
| "loss": 1.6382, |
| "step": 1634 |
| }, |
| { |
| "epoch": 1.6492589978828511, |
| "grad_norm": 0.1372021043904172, |
| "learning_rate": 5e-05, |
| "loss": 1.6249, |
| "step": 1635 |
| }, |
| { |
| "epoch": 1.650267164028632, |
| "grad_norm": 0.14008214908108707, |
| "learning_rate": 5e-05, |
| "loss": 1.6091, |
| "step": 1636 |
| }, |
| { |
| "epoch": 1.6512753301744127, |
| "grad_norm": 0.1339866493894695, |
| "learning_rate": 5e-05, |
| "loss": 1.5994, |
| "step": 1637 |
| }, |
| { |
| "epoch": 1.6522834963201936, |
| "grad_norm": 0.13181194279202782, |
| "learning_rate": 5e-05, |
| "loss": 1.6418, |
| "step": 1638 |
| }, |
| { |
| "epoch": 1.6532916624659744, |
| "grad_norm": 0.14351324455593178, |
| "learning_rate": 5e-05, |
| "loss": 1.6024, |
| "step": 1639 |
| }, |
| { |
| "epoch": 1.6542998286117552, |
| "grad_norm": 0.12816439188392217, |
| "learning_rate": 5e-05, |
| "loss": 1.6424, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.655307994757536, |
| "grad_norm": 0.13510192746334643, |
| "learning_rate": 5e-05, |
| "loss": 1.6066, |
| "step": 1641 |
| }, |
| { |
| "epoch": 1.6563161609033168, |
| "grad_norm": 0.13959090945226782, |
| "learning_rate": 5e-05, |
| "loss": 1.6378, |
| "step": 1642 |
| }, |
| { |
| "epoch": 1.6573243270490976, |
| "grad_norm": 0.14054462649724678, |
| "learning_rate": 5e-05, |
| "loss": 1.6342, |
| "step": 1643 |
| }, |
| { |
| "epoch": 1.6583324931948784, |
| "grad_norm": 0.24272243831428125, |
| "learning_rate": 5e-05, |
| "loss": 1.6091, |
| "step": 1644 |
| }, |
| { |
| "epoch": 1.6593406593406592, |
| "grad_norm": 0.13496532677224876, |
| "learning_rate": 5e-05, |
| "loss": 1.6224, |
| "step": 1645 |
| }, |
| { |
| "epoch": 1.66034882548644, |
| "grad_norm": 0.13530180754992427, |
| "learning_rate": 5e-05, |
| "loss": 1.5929, |
| "step": 1646 |
| }, |
| { |
| "epoch": 1.6613569916322208, |
| "grad_norm": 0.1402470630054397, |
| "learning_rate": 5e-05, |
| "loss": 1.5957, |
| "step": 1647 |
| }, |
| { |
| "epoch": 1.6623651577780019, |
| "grad_norm": 0.1339642900610824, |
| "learning_rate": 5e-05, |
| "loss": 1.6135, |
| "step": 1648 |
| }, |
| { |
| "epoch": 1.6633733239237827, |
| "grad_norm": 0.15291952746349996, |
| "learning_rate": 5e-05, |
| "loss": 1.6113, |
| "step": 1649 |
| }, |
| { |
| "epoch": 1.6643814900695635, |
| "grad_norm": 0.1439871519251173, |
| "learning_rate": 5e-05, |
| "loss": 1.617, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.6653896562153443, |
| "grad_norm": 0.13197615212474387, |
| "learning_rate": 5e-05, |
| "loss": 1.6232, |
| "step": 1651 |
| }, |
| { |
| "epoch": 1.666397822361125, |
| "grad_norm": 0.13639699783186127, |
| "learning_rate": 5e-05, |
| "loss": 1.618, |
| "step": 1652 |
| }, |
| { |
| "epoch": 1.667405988506906, |
| "grad_norm": 0.13605034216960754, |
| "learning_rate": 5e-05, |
| "loss": 1.6288, |
| "step": 1653 |
| }, |
| { |
| "epoch": 1.6684141546526867, |
| "grad_norm": 0.13680563856675576, |
| "learning_rate": 5e-05, |
| "loss": 1.6284, |
| "step": 1654 |
| }, |
| { |
| "epoch": 1.6694223207984678, |
| "grad_norm": 0.14294132013022695, |
| "learning_rate": 5e-05, |
| "loss": 1.6033, |
| "step": 1655 |
| }, |
| { |
| "epoch": 1.6704304869442486, |
| "grad_norm": 0.14258491267969362, |
| "learning_rate": 5e-05, |
| "loss": 1.624, |
| "step": 1656 |
| }, |
| { |
| "epoch": 1.6714386530900294, |
| "grad_norm": 0.1425290155938703, |
| "learning_rate": 5e-05, |
| "loss": 1.6037, |
| "step": 1657 |
| }, |
| { |
| "epoch": 1.6724468192358102, |
| "grad_norm": 0.12627279779042835, |
| "learning_rate": 5e-05, |
| "loss": 1.6149, |
| "step": 1658 |
| }, |
| { |
| "epoch": 1.673454985381591, |
| "grad_norm": 0.1343344246316048, |
| "learning_rate": 5e-05, |
| "loss": 1.6097, |
| "step": 1659 |
| }, |
| { |
| "epoch": 1.6744631515273718, |
| "grad_norm": 0.12793350365768266, |
| "learning_rate": 5e-05, |
| "loss": 1.617, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.6754713176731526, |
| "grad_norm": 0.13886742282540715, |
| "learning_rate": 5e-05, |
| "loss": 1.6132, |
| "step": 1661 |
| }, |
| { |
| "epoch": 1.6764794838189334, |
| "grad_norm": 0.14001713897764617, |
| "learning_rate": 5e-05, |
| "loss": 1.6154, |
| "step": 1662 |
| }, |
| { |
| "epoch": 1.6774876499647142, |
| "grad_norm": 0.135753035141293, |
| "learning_rate": 5e-05, |
| "loss": 1.6166, |
| "step": 1663 |
| }, |
| { |
| "epoch": 1.678495816110495, |
| "grad_norm": 0.13147689252282455, |
| "learning_rate": 5e-05, |
| "loss": 1.6074, |
| "step": 1664 |
| }, |
| { |
| "epoch": 1.6795039822562758, |
| "grad_norm": 0.13514270183722293, |
| "learning_rate": 5e-05, |
| "loss": 1.6204, |
| "step": 1665 |
| }, |
| { |
| "epoch": 1.6805121484020566, |
| "grad_norm": 0.13207538462664556, |
| "learning_rate": 5e-05, |
| "loss": 1.6158, |
| "step": 1666 |
| }, |
| { |
| "epoch": 1.6815203145478375, |
| "grad_norm": 0.14653678599732686, |
| "learning_rate": 5e-05, |
| "loss": 1.6035, |
| "step": 1667 |
| }, |
| { |
| "epoch": 1.6825284806936183, |
| "grad_norm": 0.1371827365018962, |
| "learning_rate": 5e-05, |
| "loss": 1.6138, |
| "step": 1668 |
| }, |
| { |
| "epoch": 1.683536646839399, |
| "grad_norm": 0.13723733155590662, |
| "learning_rate": 5e-05, |
| "loss": 1.6442, |
| "step": 1669 |
| }, |
| { |
| "epoch": 1.6845448129851799, |
| "grad_norm": 0.1401309125942649, |
| "learning_rate": 5e-05, |
| "loss": 1.605, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.6855529791309607, |
| "grad_norm": 0.14698602949806877, |
| "learning_rate": 5e-05, |
| "loss": 1.6145, |
| "step": 1671 |
| }, |
| { |
| "epoch": 1.6865611452767415, |
| "grad_norm": 0.14460773794736487, |
| "learning_rate": 5e-05, |
| "loss": 1.6079, |
| "step": 1672 |
| }, |
| { |
| "epoch": 1.6875693114225223, |
| "grad_norm": 0.1376051842545434, |
| "learning_rate": 5e-05, |
| "loss": 1.6377, |
| "step": 1673 |
| }, |
| { |
| "epoch": 1.6885774775683031, |
| "grad_norm": 0.13423153255852358, |
| "learning_rate": 5e-05, |
| "loss": 1.6093, |
| "step": 1674 |
| }, |
| { |
| "epoch": 1.689585643714084, |
| "grad_norm": 0.1385968781092482, |
| "learning_rate": 5e-05, |
| "loss": 1.5947, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.690593809859865, |
| "grad_norm": 0.1461167654309264, |
| "learning_rate": 5e-05, |
| "loss": 1.6221, |
| "step": 1676 |
| }, |
| { |
| "epoch": 1.6916019760056458, |
| "grad_norm": 0.12582225692106638, |
| "learning_rate": 5e-05, |
| "loss": 1.6135, |
| "step": 1677 |
| }, |
| { |
| "epoch": 1.6926101421514266, |
| "grad_norm": 0.1438589812956846, |
| "learning_rate": 5e-05, |
| "loss": 1.6041, |
| "step": 1678 |
| }, |
| { |
| "epoch": 1.6936183082972074, |
| "grad_norm": 0.13943583979636195, |
| "learning_rate": 5e-05, |
| "loss": 1.6045, |
| "step": 1679 |
| }, |
| { |
| "epoch": 1.6946264744429882, |
| "grad_norm": 0.13360275717336678, |
| "learning_rate": 5e-05, |
| "loss": 1.618, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.695634640588769, |
| "grad_norm": 0.12441487879737671, |
| "learning_rate": 5e-05, |
| "loss": 1.6249, |
| "step": 1681 |
| }, |
| { |
| "epoch": 1.6966428067345498, |
| "grad_norm": 0.1339772033533686, |
| "learning_rate": 5e-05, |
| "loss": 1.6203, |
| "step": 1682 |
| }, |
| { |
| "epoch": 1.6976509728803308, |
| "grad_norm": 0.13386807917707239, |
| "learning_rate": 5e-05, |
| "loss": 1.6183, |
| "step": 1683 |
| }, |
| { |
| "epoch": 1.6986591390261117, |
| "grad_norm": 0.129651625611091, |
| "learning_rate": 5e-05, |
| "loss": 1.615, |
| "step": 1684 |
| }, |
| { |
| "epoch": 1.6996673051718925, |
| "grad_norm": 0.12428352736785793, |
| "learning_rate": 5e-05, |
| "loss": 1.6281, |
| "step": 1685 |
| }, |
| { |
| "epoch": 1.7006754713176733, |
| "grad_norm": 0.1363843489132686, |
| "learning_rate": 5e-05, |
| "loss": 1.6204, |
| "step": 1686 |
| }, |
| { |
| "epoch": 1.701683637463454, |
| "grad_norm": 0.14359773549748206, |
| "learning_rate": 5e-05, |
| "loss": 1.6011, |
| "step": 1687 |
| }, |
| { |
| "epoch": 1.7026918036092349, |
| "grad_norm": 0.13322068817698787, |
| "learning_rate": 5e-05, |
| "loss": 1.6377, |
| "step": 1688 |
| }, |
| { |
| "epoch": 1.7036999697550157, |
| "grad_norm": 0.12744174180057552, |
| "learning_rate": 5e-05, |
| "loss": 1.6319, |
| "step": 1689 |
| }, |
| { |
| "epoch": 1.7047081359007965, |
| "grad_norm": 0.12545956427102473, |
| "learning_rate": 5e-05, |
| "loss": 1.6244, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.7057163020465773, |
| "grad_norm": 0.13246626345885423, |
| "learning_rate": 5e-05, |
| "loss": 1.6319, |
| "step": 1691 |
| }, |
| { |
| "epoch": 1.7067244681923581, |
| "grad_norm": 0.13204181957812078, |
| "learning_rate": 5e-05, |
| "loss": 1.6016, |
| "step": 1692 |
| }, |
| { |
| "epoch": 1.707732634338139, |
| "grad_norm": 0.12492646517845629, |
| "learning_rate": 5e-05, |
| "loss": 1.6029, |
| "step": 1693 |
| }, |
| { |
| "epoch": 1.7087408004839197, |
| "grad_norm": 0.1392980997156786, |
| "learning_rate": 5e-05, |
| "loss": 1.606, |
| "step": 1694 |
| }, |
| { |
| "epoch": 1.7097489666297006, |
| "grad_norm": 0.1330684199861111, |
| "learning_rate": 5e-05, |
| "loss": 1.612, |
| "step": 1695 |
| }, |
| { |
| "epoch": 1.7107571327754814, |
| "grad_norm": 0.14132626784333496, |
| "learning_rate": 5e-05, |
| "loss": 1.5997, |
| "step": 1696 |
| }, |
| { |
| "epoch": 1.7117652989212622, |
| "grad_norm": 0.1407849034511313, |
| "learning_rate": 5e-05, |
| "loss": 1.6333, |
| "step": 1697 |
| }, |
| { |
| "epoch": 1.712773465067043, |
| "grad_norm": 0.1321531853957436, |
| "learning_rate": 5e-05, |
| "loss": 1.6038, |
| "step": 1698 |
| }, |
| { |
| "epoch": 1.7137816312128238, |
| "grad_norm": 0.14485102927415872, |
| "learning_rate": 5e-05, |
| "loss": 1.6048, |
| "step": 1699 |
| }, |
| { |
| "epoch": 1.7147897973586046, |
| "grad_norm": 0.13069749492979196, |
| "learning_rate": 5e-05, |
| "loss": 1.6327, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.7157979635043854, |
| "grad_norm": 0.13630816111779237, |
| "learning_rate": 5e-05, |
| "loss": 1.6112, |
| "step": 1701 |
| }, |
| { |
| "epoch": 1.7168061296501662, |
| "grad_norm": 0.14237455896465825, |
| "learning_rate": 5e-05, |
| "loss": 1.6248, |
| "step": 1702 |
| }, |
| { |
| "epoch": 1.717814295795947, |
| "grad_norm": 0.13763466168468277, |
| "learning_rate": 5e-05, |
| "loss": 1.6197, |
| "step": 1703 |
| }, |
| { |
| "epoch": 1.7188224619417278, |
| "grad_norm": 0.1374930902892298, |
| "learning_rate": 5e-05, |
| "loss": 1.6218, |
| "step": 1704 |
| }, |
| { |
| "epoch": 1.7198306280875089, |
| "grad_norm": 0.13512322205932592, |
| "learning_rate": 5e-05, |
| "loss": 1.6268, |
| "step": 1705 |
| }, |
| { |
| "epoch": 1.7208387942332897, |
| "grad_norm": 0.1452506652798437, |
| "learning_rate": 5e-05, |
| "loss": 1.6203, |
| "step": 1706 |
| }, |
| { |
| "epoch": 1.7218469603790705, |
| "grad_norm": 0.1557032826185054, |
| "learning_rate": 5e-05, |
| "loss": 1.6126, |
| "step": 1707 |
| }, |
| { |
| "epoch": 1.7228551265248513, |
| "grad_norm": 0.13340855639180935, |
| "learning_rate": 5e-05, |
| "loss": 1.6194, |
| "step": 1708 |
| }, |
| { |
| "epoch": 1.723863292670632, |
| "grad_norm": 0.1736722982841165, |
| "learning_rate": 5e-05, |
| "loss": 1.6177, |
| "step": 1709 |
| }, |
| { |
| "epoch": 1.724871458816413, |
| "grad_norm": 0.13838096398331612, |
| "learning_rate": 5e-05, |
| "loss": 1.6258, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.7258796249621937, |
| "grad_norm": 0.1340817051050624, |
| "learning_rate": 5e-05, |
| "loss": 1.6222, |
| "step": 1711 |
| }, |
| { |
| "epoch": 1.7268877911079747, |
| "grad_norm": 0.1549435001901365, |
| "learning_rate": 5e-05, |
| "loss": 1.6046, |
| "step": 1712 |
| }, |
| { |
| "epoch": 1.7278959572537556, |
| "grad_norm": 0.12750772195502852, |
| "learning_rate": 5e-05, |
| "loss": 1.6148, |
| "step": 1713 |
| }, |
| { |
| "epoch": 1.7289041233995364, |
| "grad_norm": 0.1457235372622354, |
| "learning_rate": 5e-05, |
| "loss": 1.6334, |
| "step": 1714 |
| }, |
| { |
| "epoch": 1.7299122895453172, |
| "grad_norm": 0.1319619921628885, |
| "learning_rate": 5e-05, |
| "loss": 1.6232, |
| "step": 1715 |
| }, |
| { |
| "epoch": 1.730920455691098, |
| "grad_norm": 0.13299977467266605, |
| "learning_rate": 5e-05, |
| "loss": 1.6075, |
| "step": 1716 |
| }, |
| { |
| "epoch": 1.7319286218368788, |
| "grad_norm": 0.13998386630533857, |
| "learning_rate": 5e-05, |
| "loss": 1.6111, |
| "step": 1717 |
| }, |
| { |
| "epoch": 1.7329367879826596, |
| "grad_norm": 0.13255509324875403, |
| "learning_rate": 5e-05, |
| "loss": 1.6104, |
| "step": 1718 |
| }, |
| { |
| "epoch": 1.7339449541284404, |
| "grad_norm": 0.13387268249492904, |
| "learning_rate": 5e-05, |
| "loss": 1.6204, |
| "step": 1719 |
| }, |
| { |
| "epoch": 1.7349531202742212, |
| "grad_norm": 0.1413802578936938, |
| "learning_rate": 5e-05, |
| "loss": 1.5923, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.735961286420002, |
| "grad_norm": 0.13701754337130248, |
| "learning_rate": 5e-05, |
| "loss": 1.6029, |
| "step": 1721 |
| }, |
| { |
| "epoch": 1.7369694525657828, |
| "grad_norm": 0.1380475535571739, |
| "learning_rate": 5e-05, |
| "loss": 1.5915, |
| "step": 1722 |
| }, |
| { |
| "epoch": 1.7379776187115636, |
| "grad_norm": 0.5622851151853336, |
| "learning_rate": 5e-05, |
| "loss": 1.6169, |
| "step": 1723 |
| }, |
| { |
| "epoch": 1.7389857848573445, |
| "grad_norm": 0.13527175858738594, |
| "learning_rate": 5e-05, |
| "loss": 1.6033, |
| "step": 1724 |
| }, |
| { |
| "epoch": 1.7399939510031253, |
| "grad_norm": 0.13601583456425725, |
| "learning_rate": 5e-05, |
| "loss": 1.5897, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.741002117148906, |
| "grad_norm": 0.12879872996292097, |
| "learning_rate": 5e-05, |
| "loss": 1.633, |
| "step": 1726 |
| }, |
| { |
| "epoch": 1.7420102832946869, |
| "grad_norm": 0.1428323936794853, |
| "learning_rate": 5e-05, |
| "loss": 1.6072, |
| "step": 1727 |
| }, |
| { |
| "epoch": 1.7430184494404677, |
| "grad_norm": 0.19285827074287154, |
| "learning_rate": 5e-05, |
| "loss": 1.6181, |
| "step": 1728 |
| }, |
| { |
| "epoch": 1.7440266155862485, |
| "grad_norm": 0.19549095939112598, |
| "learning_rate": 5e-05, |
| "loss": 1.5952, |
| "step": 1729 |
| }, |
| { |
| "epoch": 1.7450347817320293, |
| "grad_norm": 0.13241128621801906, |
| "learning_rate": 5e-05, |
| "loss": 1.6142, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.7460429478778101, |
| "grad_norm": 0.1402339809583584, |
| "learning_rate": 5e-05, |
| "loss": 1.584, |
| "step": 1731 |
| }, |
| { |
| "epoch": 1.747051114023591, |
| "grad_norm": 0.14004271275884192, |
| "learning_rate": 5e-05, |
| "loss": 1.6304, |
| "step": 1732 |
| }, |
| { |
| "epoch": 1.748059280169372, |
| "grad_norm": 0.14445552177426974, |
| "learning_rate": 5e-05, |
| "loss": 1.5978, |
| "step": 1733 |
| }, |
| { |
| "epoch": 1.7490674463151528, |
| "grad_norm": 0.14003088790753546, |
| "learning_rate": 5e-05, |
| "loss": 1.6014, |
| "step": 1734 |
| }, |
| { |
| "epoch": 1.7500756124609336, |
| "grad_norm": 0.13228066300335042, |
| "learning_rate": 5e-05, |
| "loss": 1.6289, |
| "step": 1735 |
| }, |
| { |
| "epoch": 1.7510837786067144, |
| "grad_norm": 0.15457189168732346, |
| "learning_rate": 5e-05, |
| "loss": 1.613, |
| "step": 1736 |
| }, |
| { |
| "epoch": 1.7520919447524952, |
| "grad_norm": 0.13633629639002504, |
| "learning_rate": 5e-05, |
| "loss": 1.627, |
| "step": 1737 |
| }, |
| { |
| "epoch": 1.753100110898276, |
| "grad_norm": 0.14369857847598344, |
| "learning_rate": 5e-05, |
| "loss": 1.6029, |
| "step": 1738 |
| }, |
| { |
| "epoch": 1.7541082770440568, |
| "grad_norm": 0.14664263803679667, |
| "learning_rate": 5e-05, |
| "loss": 1.6201, |
| "step": 1739 |
| }, |
| { |
| "epoch": 1.7551164431898378, |
| "grad_norm": 0.14355866363102157, |
| "learning_rate": 5e-05, |
| "loss": 1.6216, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.7561246093356186, |
| "grad_norm": 0.1466730945121939, |
| "learning_rate": 5e-05, |
| "loss": 1.6003, |
| "step": 1741 |
| }, |
| { |
| "epoch": 1.7571327754813995, |
| "grad_norm": 0.13314431110264674, |
| "learning_rate": 5e-05, |
| "loss": 1.6125, |
| "step": 1742 |
| }, |
| { |
| "epoch": 1.7581409416271803, |
| "grad_norm": 0.15711319563822906, |
| "learning_rate": 5e-05, |
| "loss": 1.6124, |
| "step": 1743 |
| }, |
| { |
| "epoch": 1.759149107772961, |
| "grad_norm": 1.2638567147756596, |
| "learning_rate": 5e-05, |
| "loss": 1.5946, |
| "step": 1744 |
| }, |
| { |
| "epoch": 1.7601572739187419, |
| "grad_norm": 0.14049558238092832, |
| "learning_rate": 5e-05, |
| "loss": 1.6199, |
| "step": 1745 |
| }, |
| { |
| "epoch": 1.7611654400645227, |
| "grad_norm": 0.15174858113188466, |
| "learning_rate": 5e-05, |
| "loss": 1.6235, |
| "step": 1746 |
| }, |
| { |
| "epoch": 1.7621736062103035, |
| "grad_norm": 0.14413750895344504, |
| "learning_rate": 5e-05, |
| "loss": 1.6208, |
| "step": 1747 |
| }, |
| { |
| "epoch": 1.7631817723560843, |
| "grad_norm": 0.15508494984696386, |
| "learning_rate": 5e-05, |
| "loss": 1.6084, |
| "step": 1748 |
| }, |
| { |
| "epoch": 1.7641899385018651, |
| "grad_norm": 0.15498485653169553, |
| "learning_rate": 5e-05, |
| "loss": 1.5952, |
| "step": 1749 |
| }, |
| { |
| "epoch": 1.765198104647646, |
| "grad_norm": 0.16604683843727644, |
| "learning_rate": 5e-05, |
| "loss": 1.6142, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.7662062707934267, |
| "grad_norm": 0.17563345232700797, |
| "learning_rate": 5e-05, |
| "loss": 1.5914, |
| "step": 1751 |
| }, |
| { |
| "epoch": 1.7672144369392075, |
| "grad_norm": 0.15092209589409813, |
| "learning_rate": 5e-05, |
| "loss": 1.616, |
| "step": 1752 |
| }, |
| { |
| "epoch": 1.7682226030849884, |
| "grad_norm": 0.17913311190933373, |
| "learning_rate": 5e-05, |
| "loss": 1.6192, |
| "step": 1753 |
| }, |
| { |
| "epoch": 1.7692307692307692, |
| "grad_norm": 0.1710531241250294, |
| "learning_rate": 5e-05, |
| "loss": 1.6162, |
| "step": 1754 |
| }, |
| { |
| "epoch": 1.77023893537655, |
| "grad_norm": 0.16638506208920983, |
| "learning_rate": 5e-05, |
| "loss": 1.6334, |
| "step": 1755 |
| }, |
| { |
| "epoch": 1.7712471015223308, |
| "grad_norm": 0.18761824583553158, |
| "learning_rate": 5e-05, |
| "loss": 1.6142, |
| "step": 1756 |
| }, |
| { |
| "epoch": 1.7722552676681116, |
| "grad_norm": 0.14892043726691043, |
| "learning_rate": 5e-05, |
| "loss": 1.6317, |
| "step": 1757 |
| }, |
| { |
| "epoch": 1.7732634338138924, |
| "grad_norm": 0.16535088344152474, |
| "learning_rate": 5e-05, |
| "loss": 1.6128, |
| "step": 1758 |
| }, |
| { |
| "epoch": 1.7742715999596732, |
| "grad_norm": 0.15060919472661713, |
| "learning_rate": 5e-05, |
| "loss": 1.6075, |
| "step": 1759 |
| }, |
| { |
| "epoch": 1.775279766105454, |
| "grad_norm": 0.1685283379818972, |
| "learning_rate": 5e-05, |
| "loss": 1.6045, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.776287932251235, |
| "grad_norm": 0.1457182770606618, |
| "learning_rate": 5e-05, |
| "loss": 1.6248, |
| "step": 1761 |
| }, |
| { |
| "epoch": 1.7772960983970159, |
| "grad_norm": 0.14486683958980914, |
| "learning_rate": 5e-05, |
| "loss": 1.6043, |
| "step": 1762 |
| }, |
| { |
| "epoch": 1.7783042645427967, |
| "grad_norm": 0.13696686191142463, |
| "learning_rate": 5e-05, |
| "loss": 1.6395, |
| "step": 1763 |
| }, |
| { |
| "epoch": 1.7793124306885775, |
| "grad_norm": 0.16153417016077473, |
| "learning_rate": 5e-05, |
| "loss": 1.6063, |
| "step": 1764 |
| }, |
| { |
| "epoch": 1.7803205968343583, |
| "grad_norm": 0.12761827086520675, |
| "learning_rate": 5e-05, |
| "loss": 1.6002, |
| "step": 1765 |
| }, |
| { |
| "epoch": 1.781328762980139, |
| "grad_norm": 0.1528242449301567, |
| "learning_rate": 5e-05, |
| "loss": 1.6141, |
| "step": 1766 |
| }, |
| { |
| "epoch": 1.78233692912592, |
| "grad_norm": 0.14813788393804353, |
| "learning_rate": 5e-05, |
| "loss": 1.6057, |
| "step": 1767 |
| }, |
| { |
| "epoch": 1.783345095271701, |
| "grad_norm": 0.13362860876896798, |
| "learning_rate": 5e-05, |
| "loss": 1.6029, |
| "step": 1768 |
| }, |
| { |
| "epoch": 1.7843532614174817, |
| "grad_norm": 0.14148302977868854, |
| "learning_rate": 5e-05, |
| "loss": 1.6254, |
| "step": 1769 |
| }, |
| { |
| "epoch": 1.7853614275632625, |
| "grad_norm": 0.3091291743457628, |
| "learning_rate": 5e-05, |
| "loss": 1.6312, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.7863695937090434, |
| "grad_norm": 0.13735284354244087, |
| "learning_rate": 5e-05, |
| "loss": 1.6013, |
| "step": 1771 |
| }, |
| { |
| "epoch": 1.7873777598548242, |
| "grad_norm": 0.12693953022906115, |
| "learning_rate": 5e-05, |
| "loss": 1.5964, |
| "step": 1772 |
| }, |
| { |
| "epoch": 1.788385926000605, |
| "grad_norm": 0.12719899555292422, |
| "learning_rate": 5e-05, |
| "loss": 1.6171, |
| "step": 1773 |
| }, |
| { |
| "epoch": 1.7893940921463858, |
| "grad_norm": 0.12635154500382387, |
| "learning_rate": 5e-05, |
| "loss": 1.6087, |
| "step": 1774 |
| }, |
| { |
| "epoch": 1.7904022582921666, |
| "grad_norm": 0.13868520829220343, |
| "learning_rate": 5e-05, |
| "loss": 1.6173, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.7914104244379474, |
| "grad_norm": 0.1391173368990605, |
| "learning_rate": 5e-05, |
| "loss": 1.6213, |
| "step": 1776 |
| }, |
| { |
| "epoch": 1.7924185905837282, |
| "grad_norm": 0.1376872401819976, |
| "learning_rate": 5e-05, |
| "loss": 1.6016, |
| "step": 1777 |
| }, |
| { |
| "epoch": 1.793426756729509, |
| "grad_norm": 0.15909193130638846, |
| "learning_rate": 5e-05, |
| "loss": 1.6022, |
| "step": 1778 |
| }, |
| { |
| "epoch": 1.7944349228752898, |
| "grad_norm": 0.13145339806752976, |
| "learning_rate": 5e-05, |
| "loss": 1.5977, |
| "step": 1779 |
| }, |
| { |
| "epoch": 1.7954430890210706, |
| "grad_norm": 0.13830608154808835, |
| "learning_rate": 5e-05, |
| "loss": 1.6146, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.7964512551668514, |
| "grad_norm": 0.13587870252501869, |
| "learning_rate": 5e-05, |
| "loss": 1.612, |
| "step": 1781 |
| }, |
| { |
| "epoch": 1.7974594213126323, |
| "grad_norm": 0.14346041025356132, |
| "learning_rate": 5e-05, |
| "loss": 1.6336, |
| "step": 1782 |
| }, |
| { |
| "epoch": 1.798467587458413, |
| "grad_norm": 0.13668262983065077, |
| "learning_rate": 5e-05, |
| "loss": 1.6003, |
| "step": 1783 |
| }, |
| { |
| "epoch": 1.7994757536041939, |
| "grad_norm": 0.15063006748789715, |
| "learning_rate": 5e-05, |
| "loss": 1.6111, |
| "step": 1784 |
| }, |
| { |
| "epoch": 1.8004839197499747, |
| "grad_norm": 0.1358547525997899, |
| "learning_rate": 5e-05, |
| "loss": 1.615, |
| "step": 1785 |
| }, |
| { |
| "epoch": 1.8014920858957555, |
| "grad_norm": 0.12987109911965108, |
| "learning_rate": 5e-05, |
| "loss": 1.6107, |
| "step": 1786 |
| }, |
| { |
| "epoch": 1.8025002520415363, |
| "grad_norm": 0.15381766777406494, |
| "learning_rate": 5e-05, |
| "loss": 1.6209, |
| "step": 1787 |
| }, |
| { |
| "epoch": 1.803508418187317, |
| "grad_norm": 0.13625928451952748, |
| "learning_rate": 5e-05, |
| "loss": 1.6131, |
| "step": 1788 |
| }, |
| { |
| "epoch": 1.8045165843330981, |
| "grad_norm": 0.14903057932506056, |
| "learning_rate": 5e-05, |
| "loss": 1.62, |
| "step": 1789 |
| }, |
| { |
| "epoch": 1.805524750478879, |
| "grad_norm": 0.1363950109586751, |
| "learning_rate": 5e-05, |
| "loss": 1.5978, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.8065329166246598, |
| "grad_norm": 0.12663592216992495, |
| "learning_rate": 5e-05, |
| "loss": 1.6146, |
| "step": 1791 |
| }, |
| { |
| "epoch": 1.8075410827704406, |
| "grad_norm": 0.13673734077949914, |
| "learning_rate": 5e-05, |
| "loss": 1.6285, |
| "step": 1792 |
| }, |
| { |
| "epoch": 1.8085492489162214, |
| "grad_norm": 0.1987012161353432, |
| "learning_rate": 5e-05, |
| "loss": 1.6161, |
| "step": 1793 |
| }, |
| { |
| "epoch": 1.8095574150620022, |
| "grad_norm": 0.12952797334927602, |
| "learning_rate": 5e-05, |
| "loss": 1.6171, |
| "step": 1794 |
| }, |
| { |
| "epoch": 1.810565581207783, |
| "grad_norm": 0.12897445209773659, |
| "learning_rate": 5e-05, |
| "loss": 1.6033, |
| "step": 1795 |
| }, |
| { |
| "epoch": 1.811573747353564, |
| "grad_norm": 0.12947253799660324, |
| "learning_rate": 5e-05, |
| "loss": 1.583, |
| "step": 1796 |
| }, |
| { |
| "epoch": 1.8125819134993448, |
| "grad_norm": 0.1343054463682107, |
| "learning_rate": 5e-05, |
| "loss": 1.6088, |
| "step": 1797 |
| }, |
| { |
| "epoch": 1.8135900796451256, |
| "grad_norm": 0.1390809767000595, |
| "learning_rate": 5e-05, |
| "loss": 1.6049, |
| "step": 1798 |
| }, |
| { |
| "epoch": 1.8145982457909065, |
| "grad_norm": 1.1247567635889086, |
| "learning_rate": 5e-05, |
| "loss": 1.6084, |
| "step": 1799 |
| }, |
| { |
| "epoch": 1.8156064119366873, |
| "grad_norm": 0.13972004841516494, |
| "learning_rate": 5e-05, |
| "loss": 1.6027, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.816614578082468, |
| "grad_norm": 0.12266920923306984, |
| "learning_rate": 5e-05, |
| "loss": 1.5949, |
| "step": 1801 |
| }, |
| { |
| "epoch": 1.8176227442282489, |
| "grad_norm": 0.13573438818370204, |
| "learning_rate": 5e-05, |
| "loss": 1.5959, |
| "step": 1802 |
| }, |
| { |
| "epoch": 1.8186309103740297, |
| "grad_norm": 0.13976539408124178, |
| "learning_rate": 5e-05, |
| "loss": 1.6162, |
| "step": 1803 |
| }, |
| { |
| "epoch": 1.8196390765198105, |
| "grad_norm": 0.15254799912538275, |
| "learning_rate": 5e-05, |
| "loss": 1.5973, |
| "step": 1804 |
| }, |
| { |
| "epoch": 1.8206472426655913, |
| "grad_norm": 0.1384953584615067, |
| "learning_rate": 5e-05, |
| "loss": 1.6354, |
| "step": 1805 |
| }, |
| { |
| "epoch": 1.8216554088113721, |
| "grad_norm": 0.1422730990739598, |
| "learning_rate": 5e-05, |
| "loss": 1.602, |
| "step": 1806 |
| }, |
| { |
| "epoch": 1.822663574957153, |
| "grad_norm": 0.15360377620657606, |
| "learning_rate": 5e-05, |
| "loss": 1.5962, |
| "step": 1807 |
| }, |
| { |
| "epoch": 1.8236717411029337, |
| "grad_norm": 0.14850922203359646, |
| "learning_rate": 5e-05, |
| "loss": 1.6307, |
| "step": 1808 |
| }, |
| { |
| "epoch": 1.8246799072487145, |
| "grad_norm": 0.15150459907289224, |
| "learning_rate": 5e-05, |
| "loss": 1.6181, |
| "step": 1809 |
| }, |
| { |
| "epoch": 1.8256880733944953, |
| "grad_norm": 0.15088788156019828, |
| "learning_rate": 5e-05, |
| "loss": 1.5938, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.8266962395402762, |
| "grad_norm": 0.15492817542755252, |
| "learning_rate": 5e-05, |
| "loss": 1.6048, |
| "step": 1811 |
| }, |
| { |
| "epoch": 1.827704405686057, |
| "grad_norm": 0.14112165642917768, |
| "learning_rate": 5e-05, |
| "loss": 1.6073, |
| "step": 1812 |
| }, |
| { |
| "epoch": 1.8287125718318378, |
| "grad_norm": 0.13706017286835598, |
| "learning_rate": 5e-05, |
| "loss": 1.6075, |
| "step": 1813 |
| }, |
| { |
| "epoch": 1.8297207379776186, |
| "grad_norm": 0.1385283772912929, |
| "learning_rate": 5e-05, |
| "loss": 1.6211, |
| "step": 1814 |
| }, |
| { |
| "epoch": 1.8307289041233994, |
| "grad_norm": 0.1404996606919889, |
| "learning_rate": 5e-05, |
| "loss": 1.6087, |
| "step": 1815 |
| }, |
| { |
| "epoch": 1.8317370702691802, |
| "grad_norm": 0.13800608084793464, |
| "learning_rate": 5e-05, |
| "loss": 1.6078, |
| "step": 1816 |
| }, |
| { |
| "epoch": 1.8327452364149612, |
| "grad_norm": 0.13881745887222013, |
| "learning_rate": 5e-05, |
| "loss": 1.6336, |
| "step": 1817 |
| }, |
| { |
| "epoch": 1.833753402560742, |
| "grad_norm": 0.137211197141056, |
| "learning_rate": 5e-05, |
| "loss": 1.6274, |
| "step": 1818 |
| }, |
| { |
| "epoch": 1.8347615687065228, |
| "grad_norm": 0.13123746536610786, |
| "learning_rate": 5e-05, |
| "loss": 1.6398, |
| "step": 1819 |
| }, |
| { |
| "epoch": 1.8357697348523037, |
| "grad_norm": 0.13812430669099415, |
| "learning_rate": 5e-05, |
| "loss": 1.6013, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.8367779009980845, |
| "grad_norm": 0.13585642799638906, |
| "learning_rate": 5e-05, |
| "loss": 1.6024, |
| "step": 1821 |
| }, |
| { |
| "epoch": 1.8377860671438653, |
| "grad_norm": 0.14276337338880293, |
| "learning_rate": 5e-05, |
| "loss": 1.6071, |
| "step": 1822 |
| }, |
| { |
| "epoch": 1.838794233289646, |
| "grad_norm": 0.1371532484515092, |
| "learning_rate": 5e-05, |
| "loss": 1.6214, |
| "step": 1823 |
| }, |
| { |
| "epoch": 1.8398023994354271, |
| "grad_norm": 0.14658292784641913, |
| "learning_rate": 5e-05, |
| "loss": 1.6019, |
| "step": 1824 |
| }, |
| { |
| "epoch": 1.840810565581208, |
| "grad_norm": 0.151520889065639, |
| "learning_rate": 5e-05, |
| "loss": 1.6075, |
| "step": 1825 |
| }, |
| { |
| "epoch": 1.8418187317269887, |
| "grad_norm": 0.13216226198024741, |
| "learning_rate": 5e-05, |
| "loss": 1.6125, |
| "step": 1826 |
| }, |
| { |
| "epoch": 1.8428268978727695, |
| "grad_norm": 0.14248039055169562, |
| "learning_rate": 5e-05, |
| "loss": 1.6328, |
| "step": 1827 |
| }, |
| { |
| "epoch": 1.8438350640185504, |
| "grad_norm": 0.13790247147421328, |
| "learning_rate": 5e-05, |
| "loss": 1.5826, |
| "step": 1828 |
| }, |
| { |
| "epoch": 1.8448432301643312, |
| "grad_norm": 0.14759698549401534, |
| "learning_rate": 5e-05, |
| "loss": 1.6195, |
| "step": 1829 |
| }, |
| { |
| "epoch": 1.845851396310112, |
| "grad_norm": 0.13045200667392873, |
| "learning_rate": 5e-05, |
| "loss": 1.606, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.8468595624558928, |
| "grad_norm": 0.32465101968739074, |
| "learning_rate": 5e-05, |
| "loss": 1.6092, |
| "step": 1831 |
| }, |
| { |
| "epoch": 1.8478677286016736, |
| "grad_norm": 0.1398935855602046, |
| "learning_rate": 5e-05, |
| "loss": 1.6139, |
| "step": 1832 |
| }, |
| { |
| "epoch": 1.8488758947474544, |
| "grad_norm": 0.1338958194359356, |
| "learning_rate": 5e-05, |
| "loss": 1.6117, |
| "step": 1833 |
| }, |
| { |
| "epoch": 1.8498840608932352, |
| "grad_norm": 0.14382819664190047, |
| "learning_rate": 5e-05, |
| "loss": 1.6057, |
| "step": 1834 |
| }, |
| { |
| "epoch": 1.850892227039016, |
| "grad_norm": 0.14351491833839605, |
| "learning_rate": 5e-05, |
| "loss": 1.6086, |
| "step": 1835 |
| }, |
| { |
| "epoch": 1.8519003931847968, |
| "grad_norm": 0.13662427769391197, |
| "learning_rate": 5e-05, |
| "loss": 1.6218, |
| "step": 1836 |
| }, |
| { |
| "epoch": 1.8529085593305776, |
| "grad_norm": 0.13890383974711884, |
| "learning_rate": 5e-05, |
| "loss": 1.6166, |
| "step": 1837 |
| }, |
| { |
| "epoch": 1.8539167254763584, |
| "grad_norm": 0.13513145904149312, |
| "learning_rate": 5e-05, |
| "loss": 1.5881, |
| "step": 1838 |
| }, |
| { |
| "epoch": 1.8549248916221392, |
| "grad_norm": 0.13594723016421367, |
| "learning_rate": 5e-05, |
| "loss": 1.6008, |
| "step": 1839 |
| }, |
| { |
| "epoch": 1.85593305776792, |
| "grad_norm": 0.1378923959889255, |
| "learning_rate": 5e-05, |
| "loss": 1.6138, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.8569412239137009, |
| "grad_norm": 0.14780320989735776, |
| "learning_rate": 5e-05, |
| "loss": 1.5961, |
| "step": 1841 |
| }, |
| { |
| "epoch": 1.8579493900594817, |
| "grad_norm": 0.14025512215005298, |
| "learning_rate": 5e-05, |
| "loss": 1.5891, |
| "step": 1842 |
| }, |
| { |
| "epoch": 1.8589575562052625, |
| "grad_norm": 0.13720223676278484, |
| "learning_rate": 5e-05, |
| "loss": 1.5996, |
| "step": 1843 |
| }, |
| { |
| "epoch": 1.8599657223510433, |
| "grad_norm": 0.13394687034017358, |
| "learning_rate": 5e-05, |
| "loss": 1.5953, |
| "step": 1844 |
| }, |
| { |
| "epoch": 1.8609738884968243, |
| "grad_norm": 0.13660579030003928, |
| "learning_rate": 5e-05, |
| "loss": 1.6247, |
| "step": 1845 |
| }, |
| { |
| "epoch": 1.8619820546426051, |
| "grad_norm": 0.15443892739189163, |
| "learning_rate": 5e-05, |
| "loss": 1.6047, |
| "step": 1846 |
| }, |
| { |
| "epoch": 1.862990220788386, |
| "grad_norm": 0.13569253914401672, |
| "learning_rate": 5e-05, |
| "loss": 1.5989, |
| "step": 1847 |
| }, |
| { |
| "epoch": 1.8639983869341668, |
| "grad_norm": 0.14797438502778215, |
| "learning_rate": 5e-05, |
| "loss": 1.5955, |
| "step": 1848 |
| }, |
| { |
| "epoch": 1.8650065530799476, |
| "grad_norm": 0.1421115324462668, |
| "learning_rate": 5e-05, |
| "loss": 1.6031, |
| "step": 1849 |
| }, |
| { |
| "epoch": 1.8660147192257284, |
| "grad_norm": 0.1333814678847669, |
| "learning_rate": 5e-05, |
| "loss": 1.6313, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.8670228853715092, |
| "grad_norm": 0.14198072611997878, |
| "learning_rate": 5e-05, |
| "loss": 1.6246, |
| "step": 1851 |
| }, |
| { |
| "epoch": 1.8680310515172902, |
| "grad_norm": 0.12894781507480482, |
| "learning_rate": 5e-05, |
| "loss": 1.5984, |
| "step": 1852 |
| }, |
| { |
| "epoch": 1.869039217663071, |
| "grad_norm": 0.13553779775058764, |
| "learning_rate": 5e-05, |
| "loss": 1.6331, |
| "step": 1853 |
| }, |
| { |
| "epoch": 1.8700473838088518, |
| "grad_norm": 0.1488801216335878, |
| "learning_rate": 5e-05, |
| "loss": 1.6056, |
| "step": 1854 |
| }, |
| { |
| "epoch": 1.8710555499546326, |
| "grad_norm": 0.1340319579502652, |
| "learning_rate": 5e-05, |
| "loss": 1.5931, |
| "step": 1855 |
| }, |
| { |
| "epoch": 1.8720637161004134, |
| "grad_norm": 0.15078566624864237, |
| "learning_rate": 5e-05, |
| "loss": 1.6172, |
| "step": 1856 |
| }, |
| { |
| "epoch": 1.8730718822461943, |
| "grad_norm": 0.14431215810257134, |
| "learning_rate": 5e-05, |
| "loss": 1.6202, |
| "step": 1857 |
| }, |
| { |
| "epoch": 1.874080048391975, |
| "grad_norm": 0.13057775295180585, |
| "learning_rate": 5e-05, |
| "loss": 1.6009, |
| "step": 1858 |
| }, |
| { |
| "epoch": 1.8750882145377559, |
| "grad_norm": 0.13694652840155766, |
| "learning_rate": 5e-05, |
| "loss": 1.6063, |
| "step": 1859 |
| }, |
| { |
| "epoch": 1.8760963806835367, |
| "grad_norm": 0.13929146625825486, |
| "learning_rate": 5e-05, |
| "loss": 1.5999, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.8771045468293175, |
| "grad_norm": 0.13962543869508612, |
| "learning_rate": 5e-05, |
| "loss": 1.6071, |
| "step": 1861 |
| }, |
| { |
| "epoch": 1.8781127129750983, |
| "grad_norm": 0.1294264670653415, |
| "learning_rate": 5e-05, |
| "loss": 1.597, |
| "step": 1862 |
| }, |
| { |
| "epoch": 1.879120879120879, |
| "grad_norm": 0.1392941337579169, |
| "learning_rate": 5e-05, |
| "loss": 1.6109, |
| "step": 1863 |
| }, |
| { |
| "epoch": 1.88012904526666, |
| "grad_norm": 0.1375911580117992, |
| "learning_rate": 5e-05, |
| "loss": 1.6063, |
| "step": 1864 |
| }, |
| { |
| "epoch": 1.8811372114124407, |
| "grad_norm": 0.13057281302682908, |
| "learning_rate": 5e-05, |
| "loss": 1.5869, |
| "step": 1865 |
| }, |
| { |
| "epoch": 1.8821453775582215, |
| "grad_norm": 0.13465492897476905, |
| "learning_rate": 5e-05, |
| "loss": 1.6111, |
| "step": 1866 |
| }, |
| { |
| "epoch": 1.8831535437040023, |
| "grad_norm": 0.1352567853452641, |
| "learning_rate": 5e-05, |
| "loss": 1.6065, |
| "step": 1867 |
| }, |
| { |
| "epoch": 1.8841617098497832, |
| "grad_norm": 0.13265304956653806, |
| "learning_rate": 5e-05, |
| "loss": 1.6004, |
| "step": 1868 |
| }, |
| { |
| "epoch": 1.885169875995564, |
| "grad_norm": 0.134585810919123, |
| "learning_rate": 5e-05, |
| "loss": 1.6098, |
| "step": 1869 |
| }, |
| { |
| "epoch": 1.8861780421413448, |
| "grad_norm": 0.1378781672247223, |
| "learning_rate": 5e-05, |
| "loss": 1.6134, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.8871862082871256, |
| "grad_norm": 0.6687407616313615, |
| "learning_rate": 5e-05, |
| "loss": 1.6211, |
| "step": 1871 |
| }, |
| { |
| "epoch": 1.8881943744329064, |
| "grad_norm": 0.14131248756584672, |
| "learning_rate": 5e-05, |
| "loss": 1.6081, |
| "step": 1872 |
| }, |
| { |
| "epoch": 1.8892025405786874, |
| "grad_norm": 0.14944089262776455, |
| "learning_rate": 5e-05, |
| "loss": 1.6127, |
| "step": 1873 |
| }, |
| { |
| "epoch": 1.8902107067244682, |
| "grad_norm": 0.14994048389580897, |
| "learning_rate": 5e-05, |
| "loss": 1.6082, |
| "step": 1874 |
| }, |
| { |
| "epoch": 1.891218872870249, |
| "grad_norm": 0.14204566054703277, |
| "learning_rate": 5e-05, |
| "loss": 1.5939, |
| "step": 1875 |
| }, |
| { |
| "epoch": 1.8922270390160298, |
| "grad_norm": 0.14563930123363145, |
| "learning_rate": 5e-05, |
| "loss": 1.6285, |
| "step": 1876 |
| }, |
| { |
| "epoch": 1.8932352051618107, |
| "grad_norm": 0.14602409024075935, |
| "learning_rate": 5e-05, |
| "loss": 1.6162, |
| "step": 1877 |
| }, |
| { |
| "epoch": 1.8942433713075915, |
| "grad_norm": 0.3250170638850309, |
| "learning_rate": 5e-05, |
| "loss": 1.6326, |
| "step": 1878 |
| }, |
| { |
| "epoch": 1.8952515374533723, |
| "grad_norm": 0.1480123135093311, |
| "learning_rate": 5e-05, |
| "loss": 1.6074, |
| "step": 1879 |
| }, |
| { |
| "epoch": 1.8962597035991533, |
| "grad_norm": 0.13564589267677152, |
| "learning_rate": 5e-05, |
| "loss": 1.6165, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.897267869744934, |
| "grad_norm": 0.14598987026919524, |
| "learning_rate": 5e-05, |
| "loss": 1.6241, |
| "step": 1881 |
| }, |
| { |
| "epoch": 1.898276035890715, |
| "grad_norm": 0.15340222651431212, |
| "learning_rate": 5e-05, |
| "loss": 1.6038, |
| "step": 1882 |
| }, |
| { |
| "epoch": 1.8992842020364957, |
| "grad_norm": 0.14660842376268418, |
| "learning_rate": 5e-05, |
| "loss": 1.6137, |
| "step": 1883 |
| }, |
| { |
| "epoch": 1.9002923681822765, |
| "grad_norm": 0.14523481586952683, |
| "learning_rate": 5e-05, |
| "loss": 1.6114, |
| "step": 1884 |
| }, |
| { |
| "epoch": 1.9013005343280573, |
| "grad_norm": 0.139366252487723, |
| "learning_rate": 5e-05, |
| "loss": 1.6189, |
| "step": 1885 |
| }, |
| { |
| "epoch": 1.9023087004738382, |
| "grad_norm": 0.14047698934688582, |
| "learning_rate": 5e-05, |
| "loss": 1.5822, |
| "step": 1886 |
| }, |
| { |
| "epoch": 1.903316866619619, |
| "grad_norm": 0.1429688256910901, |
| "learning_rate": 5e-05, |
| "loss": 1.6005, |
| "step": 1887 |
| }, |
| { |
| "epoch": 1.9043250327653998, |
| "grad_norm": 0.1449894299784549, |
| "learning_rate": 5e-05, |
| "loss": 1.619, |
| "step": 1888 |
| }, |
| { |
| "epoch": 1.9053331989111806, |
| "grad_norm": 0.136258179982917, |
| "learning_rate": 5e-05, |
| "loss": 1.6177, |
| "step": 1889 |
| }, |
| { |
| "epoch": 1.9063413650569614, |
| "grad_norm": 0.14993956007154408, |
| "learning_rate": 5e-05, |
| "loss": 1.6151, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.9073495312027422, |
| "grad_norm": 0.18176454707772344, |
| "learning_rate": 5e-05, |
| "loss": 1.6147, |
| "step": 1891 |
| }, |
| { |
| "epoch": 1.908357697348523, |
| "grad_norm": 0.13697270459679123, |
| "learning_rate": 5e-05, |
| "loss": 1.5944, |
| "step": 1892 |
| }, |
| { |
| "epoch": 1.9093658634943038, |
| "grad_norm": 0.14866596724221887, |
| "learning_rate": 5e-05, |
| "loss": 1.6019, |
| "step": 1893 |
| }, |
| { |
| "epoch": 1.9103740296400846, |
| "grad_norm": 0.13384771033533666, |
| "learning_rate": 5e-05, |
| "loss": 1.6016, |
| "step": 1894 |
| }, |
| { |
| "epoch": 1.9113821957858654, |
| "grad_norm": 0.1443979145146113, |
| "learning_rate": 5e-05, |
| "loss": 1.6118, |
| "step": 1895 |
| }, |
| { |
| "epoch": 1.9123903619316462, |
| "grad_norm": 0.14963383206603662, |
| "learning_rate": 5e-05, |
| "loss": 1.6152, |
| "step": 1896 |
| }, |
| { |
| "epoch": 1.913398528077427, |
| "grad_norm": 0.13098848994842957, |
| "learning_rate": 5e-05, |
| "loss": 1.6132, |
| "step": 1897 |
| }, |
| { |
| "epoch": 1.9144066942232079, |
| "grad_norm": 0.1420448647911459, |
| "learning_rate": 5e-05, |
| "loss": 1.6152, |
| "step": 1898 |
| }, |
| { |
| "epoch": 1.9154148603689887, |
| "grad_norm": 0.1402422345987282, |
| "learning_rate": 5e-05, |
| "loss": 1.6258, |
| "step": 1899 |
| }, |
| { |
| "epoch": 1.9164230265147695, |
| "grad_norm": 0.14282419116361197, |
| "learning_rate": 5e-05, |
| "loss": 1.5993, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.9174311926605505, |
| "grad_norm": 0.12900876538976672, |
| "learning_rate": 5e-05, |
| "loss": 1.5947, |
| "step": 1901 |
| }, |
| { |
| "epoch": 1.9184393588063313, |
| "grad_norm": 0.13602993676987202, |
| "learning_rate": 5e-05, |
| "loss": 1.5963, |
| "step": 1902 |
| }, |
| { |
| "epoch": 1.9194475249521121, |
| "grad_norm": 0.13091353897349692, |
| "learning_rate": 5e-05, |
| "loss": 1.6103, |
| "step": 1903 |
| }, |
| { |
| "epoch": 1.920455691097893, |
| "grad_norm": 0.13147459168353956, |
| "learning_rate": 5e-05, |
| "loss": 1.5896, |
| "step": 1904 |
| }, |
| { |
| "epoch": 1.9214638572436737, |
| "grad_norm": 0.1278349356366095, |
| "learning_rate": 5e-05, |
| "loss": 1.585, |
| "step": 1905 |
| }, |
| { |
| "epoch": 1.9224720233894546, |
| "grad_norm": 0.13556216612236158, |
| "learning_rate": 5e-05, |
| "loss": 1.5997, |
| "step": 1906 |
| }, |
| { |
| "epoch": 1.9234801895352354, |
| "grad_norm": 0.13786564534521312, |
| "learning_rate": 5e-05, |
| "loss": 1.6049, |
| "step": 1907 |
| }, |
| { |
| "epoch": 1.9244883556810164, |
| "grad_norm": 0.13361588168189525, |
| "learning_rate": 5e-05, |
| "loss": 1.6127, |
| "step": 1908 |
| }, |
| { |
| "epoch": 1.9254965218267972, |
| "grad_norm": 0.12569201204576966, |
| "learning_rate": 5e-05, |
| "loss": 1.5928, |
| "step": 1909 |
| }, |
| { |
| "epoch": 1.926504687972578, |
| "grad_norm": 0.152867731337792, |
| "learning_rate": 5e-05, |
| "loss": 1.6042, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.9275128541183588, |
| "grad_norm": 0.13575798302417755, |
| "learning_rate": 5e-05, |
| "loss": 1.5977, |
| "step": 1911 |
| }, |
| { |
| "epoch": 1.9285210202641396, |
| "grad_norm": 0.1398337969996516, |
| "learning_rate": 5e-05, |
| "loss": 1.6014, |
| "step": 1912 |
| }, |
| { |
| "epoch": 1.9295291864099204, |
| "grad_norm": 0.12623522408596025, |
| "learning_rate": 5e-05, |
| "loss": 1.6017, |
| "step": 1913 |
| }, |
| { |
| "epoch": 1.9305373525557012, |
| "grad_norm": 0.13172330015779796, |
| "learning_rate": 5e-05, |
| "loss": 1.5824, |
| "step": 1914 |
| }, |
| { |
| "epoch": 1.931545518701482, |
| "grad_norm": 0.13863466103596936, |
| "learning_rate": 5e-05, |
| "loss": 1.6083, |
| "step": 1915 |
| }, |
| { |
| "epoch": 1.9325536848472629, |
| "grad_norm": 0.1289112695594299, |
| "learning_rate": 5e-05, |
| "loss": 1.6057, |
| "step": 1916 |
| }, |
| { |
| "epoch": 1.9335618509930437, |
| "grad_norm": 0.125376155275176, |
| "learning_rate": 5e-05, |
| "loss": 1.6015, |
| "step": 1917 |
| }, |
| { |
| "epoch": 1.9345700171388245, |
| "grad_norm": 0.1269564764984299, |
| "learning_rate": 5e-05, |
| "loss": 1.6147, |
| "step": 1918 |
| }, |
| { |
| "epoch": 1.9355781832846053, |
| "grad_norm": 0.12566810938900913, |
| "learning_rate": 5e-05, |
| "loss": 1.5841, |
| "step": 1919 |
| }, |
| { |
| "epoch": 1.936586349430386, |
| "grad_norm": 0.13117779231765533, |
| "learning_rate": 5e-05, |
| "loss": 1.5935, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.937594515576167, |
| "grad_norm": 0.13386327063924972, |
| "learning_rate": 5e-05, |
| "loss": 1.5995, |
| "step": 1921 |
| }, |
| { |
| "epoch": 1.9386026817219477, |
| "grad_norm": 0.1257136559733915, |
| "learning_rate": 5e-05, |
| "loss": 1.6112, |
| "step": 1922 |
| }, |
| { |
| "epoch": 1.9396108478677285, |
| "grad_norm": 0.13537512034613913, |
| "learning_rate": 5e-05, |
| "loss": 1.5972, |
| "step": 1923 |
| }, |
| { |
| "epoch": 1.9406190140135093, |
| "grad_norm": 0.1253377735548057, |
| "learning_rate": 5e-05, |
| "loss": 1.606, |
| "step": 1924 |
| }, |
| { |
| "epoch": 1.9416271801592901, |
| "grad_norm": 0.13805307841774989, |
| "learning_rate": 5e-05, |
| "loss": 1.6166, |
| "step": 1925 |
| }, |
| { |
| "epoch": 1.942635346305071, |
| "grad_norm": 0.14387345444798735, |
| "learning_rate": 5e-05, |
| "loss": 1.6043, |
| "step": 1926 |
| }, |
| { |
| "epoch": 1.9436435124508518, |
| "grad_norm": 0.14135640883511918, |
| "learning_rate": 5e-05, |
| "loss": 1.5987, |
| "step": 1927 |
| }, |
| { |
| "epoch": 1.9446516785966326, |
| "grad_norm": 0.137130239912801, |
| "learning_rate": 5e-05, |
| "loss": 1.6048, |
| "step": 1928 |
| }, |
| { |
| "epoch": 1.9456598447424136, |
| "grad_norm": 0.13528422418643588, |
| "learning_rate": 5e-05, |
| "loss": 1.5905, |
| "step": 1929 |
| }, |
| { |
| "epoch": 1.9466680108881944, |
| "grad_norm": 0.13217969456041886, |
| "learning_rate": 5e-05, |
| "loss": 1.5956, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.9476761770339752, |
| "grad_norm": 0.12844860444043849, |
| "learning_rate": 5e-05, |
| "loss": 1.5986, |
| "step": 1931 |
| }, |
| { |
| "epoch": 1.948684343179756, |
| "grad_norm": 0.13198107001437961, |
| "learning_rate": 5e-05, |
| "loss": 1.5972, |
| "step": 1932 |
| }, |
| { |
| "epoch": 1.9496925093255368, |
| "grad_norm": 0.14030130422292156, |
| "learning_rate": 5e-05, |
| "loss": 1.5947, |
| "step": 1933 |
| }, |
| { |
| "epoch": 1.9507006754713176, |
| "grad_norm": 0.13982594899557532, |
| "learning_rate": 5e-05, |
| "loss": 1.6082, |
| "step": 1934 |
| }, |
| { |
| "epoch": 1.9517088416170985, |
| "grad_norm": 0.13591588212366842, |
| "learning_rate": 5e-05, |
| "loss": 1.6009, |
| "step": 1935 |
| }, |
| { |
| "epoch": 1.9527170077628795, |
| "grad_norm": 0.14041955247040697, |
| "learning_rate": 5e-05, |
| "loss": 1.6068, |
| "step": 1936 |
| }, |
| { |
| "epoch": 1.9537251739086603, |
| "grad_norm": 0.12783435122430417, |
| "learning_rate": 5e-05, |
| "loss": 1.5945, |
| "step": 1937 |
| }, |
| { |
| "epoch": 1.954733340054441, |
| "grad_norm": 0.15003800100073741, |
| "learning_rate": 5e-05, |
| "loss": 1.5921, |
| "step": 1938 |
| }, |
| { |
| "epoch": 1.955741506200222, |
| "grad_norm": 0.13003233132983819, |
| "learning_rate": 5e-05, |
| "loss": 1.6035, |
| "step": 1939 |
| }, |
| { |
| "epoch": 1.9567496723460027, |
| "grad_norm": 0.13691493601420102, |
| "learning_rate": 5e-05, |
| "loss": 1.602, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.9577578384917835, |
| "grad_norm": 0.14181799516668003, |
| "learning_rate": 5e-05, |
| "loss": 1.615, |
| "step": 1941 |
| }, |
| { |
| "epoch": 1.9587660046375643, |
| "grad_norm": 0.12521176786439234, |
| "learning_rate": 5e-05, |
| "loss": 1.595, |
| "step": 1942 |
| }, |
| { |
| "epoch": 1.9597741707833451, |
| "grad_norm": 0.12916795703523762, |
| "learning_rate": 5e-05, |
| "loss": 1.6053, |
| "step": 1943 |
| }, |
| { |
| "epoch": 1.960782336929126, |
| "grad_norm": 0.13398015324125567, |
| "learning_rate": 5e-05, |
| "loss": 1.6093, |
| "step": 1944 |
| }, |
| { |
| "epoch": 1.9617905030749068, |
| "grad_norm": 0.12958030427407444, |
| "learning_rate": 5e-05, |
| "loss": 1.6057, |
| "step": 1945 |
| }, |
| { |
| "epoch": 1.9627986692206876, |
| "grad_norm": 0.12561514967589582, |
| "learning_rate": 5e-05, |
| "loss": 1.5809, |
| "step": 1946 |
| }, |
| { |
| "epoch": 1.9638068353664684, |
| "grad_norm": 0.13248125250714624, |
| "learning_rate": 5e-05, |
| "loss": 1.6067, |
| "step": 1947 |
| }, |
| { |
| "epoch": 1.9648150015122492, |
| "grad_norm": 0.14501357641489096, |
| "learning_rate": 5e-05, |
| "loss": 1.6124, |
| "step": 1948 |
| }, |
| { |
| "epoch": 1.96582316765803, |
| "grad_norm": 0.15592280691143984, |
| "learning_rate": 5e-05, |
| "loss": 1.6014, |
| "step": 1949 |
| }, |
| { |
| "epoch": 1.9668313338038108, |
| "grad_norm": 0.134906763274516, |
| "learning_rate": 5e-05, |
| "loss": 1.6183, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.9678394999495916, |
| "grad_norm": 0.14157220397428136, |
| "learning_rate": 5e-05, |
| "loss": 1.6093, |
| "step": 1951 |
| }, |
| { |
| "epoch": 1.9688476660953724, |
| "grad_norm": 0.14414118076516377, |
| "learning_rate": 5e-05, |
| "loss": 1.6088, |
| "step": 1952 |
| }, |
| { |
| "epoch": 1.9698558322411532, |
| "grad_norm": 0.13162906710490183, |
| "learning_rate": 5e-05, |
| "loss": 1.5969, |
| "step": 1953 |
| }, |
| { |
| "epoch": 1.970863998386934, |
| "grad_norm": 0.13250441324273587, |
| "learning_rate": 5e-05, |
| "loss": 1.6031, |
| "step": 1954 |
| }, |
| { |
| "epoch": 1.9718721645327149, |
| "grad_norm": 0.24250705772782222, |
| "learning_rate": 5e-05, |
| "loss": 1.6015, |
| "step": 1955 |
| }, |
| { |
| "epoch": 1.9728803306784957, |
| "grad_norm": 0.13645893773608062, |
| "learning_rate": 5e-05, |
| "loss": 1.6159, |
| "step": 1956 |
| }, |
| { |
| "epoch": 1.9738884968242767, |
| "grad_norm": 0.1371558712887575, |
| "learning_rate": 5e-05, |
| "loss": 1.6047, |
| "step": 1957 |
| }, |
| { |
| "epoch": 1.9748966629700575, |
| "grad_norm": 0.12471125980392224, |
| "learning_rate": 5e-05, |
| "loss": 1.5932, |
| "step": 1958 |
| }, |
| { |
| "epoch": 1.9759048291158383, |
| "grad_norm": 0.13982098444969587, |
| "learning_rate": 5e-05, |
| "loss": 1.595, |
| "step": 1959 |
| }, |
| { |
| "epoch": 1.9769129952616191, |
| "grad_norm": 0.13725684179927516, |
| "learning_rate": 5e-05, |
| "loss": 1.6089, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.9779211614074, |
| "grad_norm": 0.13982441728850534, |
| "learning_rate": 5e-05, |
| "loss": 1.619, |
| "step": 1961 |
| }, |
| { |
| "epoch": 1.9789293275531807, |
| "grad_norm": 0.15335757017626273, |
| "learning_rate": 5e-05, |
| "loss": 1.5956, |
| "step": 1962 |
| }, |
| { |
| "epoch": 1.9799374936989615, |
| "grad_norm": 0.1312542054924984, |
| "learning_rate": 5e-05, |
| "loss": 1.603, |
| "step": 1963 |
| }, |
| { |
| "epoch": 1.9809456598447426, |
| "grad_norm": 0.12820080032328318, |
| "learning_rate": 5e-05, |
| "loss": 1.6009, |
| "step": 1964 |
| }, |
| { |
| "epoch": 1.9819538259905234, |
| "grad_norm": 0.12787509611016895, |
| "learning_rate": 5e-05, |
| "loss": 1.6012, |
| "step": 1965 |
| }, |
| { |
| "epoch": 1.9829619921363042, |
| "grad_norm": 0.13478549723799105, |
| "learning_rate": 5e-05, |
| "loss": 1.5921, |
| "step": 1966 |
| }, |
| { |
| "epoch": 1.983970158282085, |
| "grad_norm": 0.12801241537455618, |
| "learning_rate": 5e-05, |
| "loss": 1.5773, |
| "step": 1967 |
| }, |
| { |
| "epoch": 1.9849783244278658, |
| "grad_norm": 0.13642931742379077, |
| "learning_rate": 5e-05, |
| "loss": 1.6076, |
| "step": 1968 |
| }, |
| { |
| "epoch": 1.9859864905736466, |
| "grad_norm": 0.13228520189020773, |
| "learning_rate": 5e-05, |
| "loss": 1.594, |
| "step": 1969 |
| }, |
| { |
| "epoch": 1.9869946567194274, |
| "grad_norm": 0.15214148479625955, |
| "learning_rate": 5e-05, |
| "loss": 1.6005, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.9880028228652082, |
| "grad_norm": 0.12704794193194915, |
| "learning_rate": 5e-05, |
| "loss": 1.5948, |
| "step": 1971 |
| }, |
| { |
| "epoch": 1.989010989010989, |
| "grad_norm": 0.14032856285895748, |
| "learning_rate": 5e-05, |
| "loss": 1.6169, |
| "step": 1972 |
| }, |
| { |
| "epoch": 1.9900191551567699, |
| "grad_norm": 0.1379011923261903, |
| "learning_rate": 5e-05, |
| "loss": 1.6093, |
| "step": 1973 |
| }, |
| { |
| "epoch": 1.9910273213025507, |
| "grad_norm": 0.15400627838079264, |
| "learning_rate": 5e-05, |
| "loss": 1.5877, |
| "step": 1974 |
| }, |
| { |
| "epoch": 1.9920354874483315, |
| "grad_norm": 0.13931815971769526, |
| "learning_rate": 5e-05, |
| "loss": 1.6181, |
| "step": 1975 |
| }, |
| { |
| "epoch": 1.9930436535941123, |
| "grad_norm": 0.14901278041832924, |
| "learning_rate": 5e-05, |
| "loss": 1.6117, |
| "step": 1976 |
| }, |
| { |
| "epoch": 1.994051819739893, |
| "grad_norm": 0.13392496239472437, |
| "learning_rate": 5e-05, |
| "loss": 1.5965, |
| "step": 1977 |
| }, |
| { |
| "epoch": 1.995059985885674, |
| "grad_norm": 0.14729085153751564, |
| "learning_rate": 5e-05, |
| "loss": 1.5922, |
| "step": 1978 |
| }, |
| { |
| "epoch": 1.9960681520314547, |
| "grad_norm": 0.1441581729480837, |
| "learning_rate": 5e-05, |
| "loss": 1.5931, |
| "step": 1979 |
| }, |
| { |
| "epoch": 1.9970763181772355, |
| "grad_norm": 0.14997561173196974, |
| "learning_rate": 5e-05, |
| "loss": 1.5888, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.9980844843230163, |
| "grad_norm": 0.14243833152383623, |
| "learning_rate": 5e-05, |
| "loss": 1.6121, |
| "step": 1981 |
| }, |
| { |
| "epoch": 1.9990926504687971, |
| "grad_norm": 0.13679601875916828, |
| "learning_rate": 5e-05, |
| "loss": 1.6121, |
| "step": 1982 |
| }, |
| { |
| "epoch": 2.001008166145781, |
| "grad_norm": 0.15239737325547434, |
| "learning_rate": 5e-05, |
| "loss": 3.149, |
| "step": 1983 |
| }, |
| { |
| "epoch": 2.0020163322915616, |
| "grad_norm": 0.1414434123590037, |
| "learning_rate": 5e-05, |
| "loss": 1.5731, |
| "step": 1984 |
| }, |
| { |
| "epoch": 2.0030244984373424, |
| "grad_norm": 0.14044131087245215, |
| "learning_rate": 5e-05, |
| "loss": 1.5719, |
| "step": 1985 |
| }, |
| { |
| "epoch": 2.0040326645831232, |
| "grad_norm": 0.14938263143092181, |
| "learning_rate": 5e-05, |
| "loss": 1.5797, |
| "step": 1986 |
| }, |
| { |
| "epoch": 2.005040830728904, |
| "grad_norm": 0.1480385858459359, |
| "learning_rate": 5e-05, |
| "loss": 1.5864, |
| "step": 1987 |
| }, |
| { |
| "epoch": 2.006048996874685, |
| "grad_norm": 0.15105987898215928, |
| "learning_rate": 5e-05, |
| "loss": 1.5857, |
| "step": 1988 |
| }, |
| { |
| "epoch": 2.0070571630204657, |
| "grad_norm": 0.15277805692282542, |
| "learning_rate": 5e-05, |
| "loss": 1.5877, |
| "step": 1989 |
| }, |
| { |
| "epoch": 2.0080653291662465, |
| "grad_norm": 0.14136226071223026, |
| "learning_rate": 5e-05, |
| "loss": 1.5922, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.0090734953120273, |
| "grad_norm": 0.15906443661641592, |
| "learning_rate": 5e-05, |
| "loss": 1.5765, |
| "step": 1991 |
| }, |
| { |
| "epoch": 2.010081661457808, |
| "grad_norm": 0.1390828662294288, |
| "learning_rate": 5e-05, |
| "loss": 1.5749, |
| "step": 1992 |
| }, |
| { |
| "epoch": 2.011089827603589, |
| "grad_norm": 0.13479860947934347, |
| "learning_rate": 5e-05, |
| "loss": 1.5758, |
| "step": 1993 |
| }, |
| { |
| "epoch": 2.0120979937493697, |
| "grad_norm": 0.14011583458011628, |
| "learning_rate": 5e-05, |
| "loss": 1.5849, |
| "step": 1994 |
| }, |
| { |
| "epoch": 2.0131061598951505, |
| "grad_norm": 0.14109946386905808, |
| "learning_rate": 5e-05, |
| "loss": 1.599, |
| "step": 1995 |
| }, |
| { |
| "epoch": 2.0141143260409313, |
| "grad_norm": 0.13679672267236298, |
| "learning_rate": 5e-05, |
| "loss": 1.594, |
| "step": 1996 |
| }, |
| { |
| "epoch": 2.0151224921867126, |
| "grad_norm": 0.14025024019966215, |
| "learning_rate": 5e-05, |
| "loss": 1.5715, |
| "step": 1997 |
| }, |
| { |
| "epoch": 2.0161306583324934, |
| "grad_norm": 0.14492555269221222, |
| "learning_rate": 5e-05, |
| "loss": 1.5849, |
| "step": 1998 |
| }, |
| { |
| "epoch": 2.017138824478274, |
| "grad_norm": 0.14225952288247473, |
| "learning_rate": 5e-05, |
| "loss": 1.5875, |
| "step": 1999 |
| }, |
| { |
| "epoch": 2.018146990624055, |
| "grad_norm": 0.12931382989804227, |
| "learning_rate": 5e-05, |
| "loss": 1.5862, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.019155156769836, |
| "grad_norm": 0.12570288310495115, |
| "learning_rate": 5e-05, |
| "loss": 1.582, |
| "step": 2001 |
| }, |
| { |
| "epoch": 2.0201633229156166, |
| "grad_norm": 0.128123634753698, |
| "learning_rate": 5e-05, |
| "loss": 1.5896, |
| "step": 2002 |
| }, |
| { |
| "epoch": 2.0211714890613974, |
| "grad_norm": 0.13997309593233415, |
| "learning_rate": 5e-05, |
| "loss": 1.5859, |
| "step": 2003 |
| }, |
| { |
| "epoch": 2.0221796552071782, |
| "grad_norm": 0.1277717156019842, |
| "learning_rate": 5e-05, |
| "loss": 1.5649, |
| "step": 2004 |
| }, |
| { |
| "epoch": 2.023187821352959, |
| "grad_norm": 0.1364969063859019, |
| "learning_rate": 5e-05, |
| "loss": 1.5711, |
| "step": 2005 |
| }, |
| { |
| "epoch": 2.02419598749874, |
| "grad_norm": 0.13159486148668112, |
| "learning_rate": 5e-05, |
| "loss": 1.5834, |
| "step": 2006 |
| }, |
| { |
| "epoch": 2.0252041536445207, |
| "grad_norm": 0.1400695599752123, |
| "learning_rate": 5e-05, |
| "loss": 1.5679, |
| "step": 2007 |
| }, |
| { |
| "epoch": 2.0262123197903015, |
| "grad_norm": 0.12494151796163276, |
| "learning_rate": 5e-05, |
| "loss": 1.5795, |
| "step": 2008 |
| }, |
| { |
| "epoch": 2.0272204859360823, |
| "grad_norm": 0.12971624165572987, |
| "learning_rate": 5e-05, |
| "loss": 1.5857, |
| "step": 2009 |
| }, |
| { |
| "epoch": 2.028228652081863, |
| "grad_norm": 0.14127100423090322, |
| "learning_rate": 5e-05, |
| "loss": 1.58, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.029236818227644, |
| "grad_norm": 0.12546115316106868, |
| "learning_rate": 5e-05, |
| "loss": 1.6, |
| "step": 2011 |
| }, |
| { |
| "epoch": 2.0302449843734247, |
| "grad_norm": 0.14428275735853666, |
| "learning_rate": 5e-05, |
| "loss": 1.5982, |
| "step": 2012 |
| }, |
| { |
| "epoch": 2.0312531505192055, |
| "grad_norm": 0.1332295837228725, |
| "learning_rate": 5e-05, |
| "loss": 1.5935, |
| "step": 2013 |
| }, |
| { |
| "epoch": 2.0322613166649863, |
| "grad_norm": 0.13597233433162517, |
| "learning_rate": 5e-05, |
| "loss": 1.5897, |
| "step": 2014 |
| }, |
| { |
| "epoch": 2.033269482810767, |
| "grad_norm": 0.13374651915633368, |
| "learning_rate": 5e-05, |
| "loss": 1.582, |
| "step": 2015 |
| }, |
| { |
| "epoch": 2.034277648956548, |
| "grad_norm": 1.0718429567439263, |
| "learning_rate": 5e-05, |
| "loss": 1.6054, |
| "step": 2016 |
| }, |
| { |
| "epoch": 2.0352858151023288, |
| "grad_norm": 0.1454753303776003, |
| "learning_rate": 5e-05, |
| "loss": 1.5744, |
| "step": 2017 |
| }, |
| { |
| "epoch": 2.0362939812481096, |
| "grad_norm": 1.3192993490814344, |
| "learning_rate": 5e-05, |
| "loss": 1.5846, |
| "step": 2018 |
| }, |
| { |
| "epoch": 2.0373021473938904, |
| "grad_norm": 0.15829490474116634, |
| "learning_rate": 5e-05, |
| "loss": 1.5645, |
| "step": 2019 |
| }, |
| { |
| "epoch": 2.038310313539671, |
| "grad_norm": 0.4166200931020022, |
| "learning_rate": 5e-05, |
| "loss": 1.5748, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.039318479685452, |
| "grad_norm": 0.16462236982161826, |
| "learning_rate": 5e-05, |
| "loss": 1.6045, |
| "step": 2021 |
| }, |
| { |
| "epoch": 2.040326645831233, |
| "grad_norm": 0.18848958751308395, |
| "learning_rate": 5e-05, |
| "loss": 1.5723, |
| "step": 2022 |
| }, |
| { |
| "epoch": 2.0413348119770136, |
| "grad_norm": 2.8691517912384406, |
| "learning_rate": 5e-05, |
| "loss": 1.5825, |
| "step": 2023 |
| }, |
| { |
| "epoch": 2.042342978122795, |
| "grad_norm": 0.1831129706751049, |
| "learning_rate": 5e-05, |
| "loss": 1.5909, |
| "step": 2024 |
| }, |
| { |
| "epoch": 2.0433511442685757, |
| "grad_norm": 0.1542464053659099, |
| "learning_rate": 5e-05, |
| "loss": 1.5815, |
| "step": 2025 |
| }, |
| { |
| "epoch": 2.0443593104143565, |
| "grad_norm": 0.16951371372735333, |
| "learning_rate": 5e-05, |
| "loss": 1.5849, |
| "step": 2026 |
| }, |
| { |
| "epoch": 2.0453674765601373, |
| "grad_norm": 0.15075713521771286, |
| "learning_rate": 5e-05, |
| "loss": 1.5829, |
| "step": 2027 |
| }, |
| { |
| "epoch": 2.046375642705918, |
| "grad_norm": 0.1710363964481069, |
| "learning_rate": 5e-05, |
| "loss": 1.5661, |
| "step": 2028 |
| }, |
| { |
| "epoch": 2.047383808851699, |
| "grad_norm": 0.1407350500918785, |
| "learning_rate": 5e-05, |
| "loss": 1.5794, |
| "step": 2029 |
| }, |
| { |
| "epoch": 2.0483919749974797, |
| "grad_norm": 0.12745889682392728, |
| "learning_rate": 5e-05, |
| "loss": 1.5856, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.0494001411432605, |
| "grad_norm": 0.1548959761618227, |
| "learning_rate": 5e-05, |
| "loss": 1.571, |
| "step": 2031 |
| }, |
| { |
| "epoch": 2.0504083072890413, |
| "grad_norm": 0.13805558786072203, |
| "learning_rate": 5e-05, |
| "loss": 1.5865, |
| "step": 2032 |
| }, |
| { |
| "epoch": 2.051416473434822, |
| "grad_norm": 0.13469138055910054, |
| "learning_rate": 5e-05, |
| "loss": 1.5706, |
| "step": 2033 |
| }, |
| { |
| "epoch": 2.052424639580603, |
| "grad_norm": 0.13611480781385657, |
| "learning_rate": 5e-05, |
| "loss": 1.5763, |
| "step": 2034 |
| }, |
| { |
| "epoch": 2.0534328057263838, |
| "grad_norm": 0.12987461003973538, |
| "learning_rate": 5e-05, |
| "loss": 1.574, |
| "step": 2035 |
| }, |
| { |
| "epoch": 2.0544409718721646, |
| "grad_norm": 0.1394014787871454, |
| "learning_rate": 5e-05, |
| "loss": 1.6029, |
| "step": 2036 |
| }, |
| { |
| "epoch": 2.0554491380179454, |
| "grad_norm": 0.12930904952960037, |
| "learning_rate": 5e-05, |
| "loss": 1.5956, |
| "step": 2037 |
| }, |
| { |
| "epoch": 2.056457304163726, |
| "grad_norm": 0.12879520510508266, |
| "learning_rate": 5e-05, |
| "loss": 1.5755, |
| "step": 2038 |
| }, |
| { |
| "epoch": 2.057465470309507, |
| "grad_norm": 0.134482974278635, |
| "learning_rate": 5e-05, |
| "loss": 1.5903, |
| "step": 2039 |
| }, |
| { |
| "epoch": 2.058473636455288, |
| "grad_norm": 0.12654255401460732, |
| "learning_rate": 5e-05, |
| "loss": 1.5734, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.0594818026010686, |
| "grad_norm": 0.13279043549099975, |
| "learning_rate": 5e-05, |
| "loss": 1.575, |
| "step": 2041 |
| }, |
| { |
| "epoch": 2.0604899687468494, |
| "grad_norm": 0.1333429239635304, |
| "learning_rate": 5e-05, |
| "loss": 1.5925, |
| "step": 2042 |
| }, |
| { |
| "epoch": 2.0614981348926302, |
| "grad_norm": 0.1418866674864204, |
| "learning_rate": 5e-05, |
| "loss": 1.5896, |
| "step": 2043 |
| }, |
| { |
| "epoch": 2.062506301038411, |
| "grad_norm": 0.14434314231466377, |
| "learning_rate": 5e-05, |
| "loss": 1.588, |
| "step": 2044 |
| }, |
| { |
| "epoch": 2.063514467184192, |
| "grad_norm": 0.1344900582718443, |
| "learning_rate": 5e-05, |
| "loss": 1.5736, |
| "step": 2045 |
| }, |
| { |
| "epoch": 2.0645226333299727, |
| "grad_norm": 0.12807147404779445, |
| "learning_rate": 5e-05, |
| "loss": 1.5801, |
| "step": 2046 |
| }, |
| { |
| "epoch": 2.0655307994757535, |
| "grad_norm": 0.13979396715382578, |
| "learning_rate": 5e-05, |
| "loss": 1.5837, |
| "step": 2047 |
| }, |
| { |
| "epoch": 2.0665389656215343, |
| "grad_norm": 0.14404907785468482, |
| "learning_rate": 5e-05, |
| "loss": 1.6047, |
| "step": 2048 |
| }, |
| { |
| "epoch": 2.067547131767315, |
| "grad_norm": 0.13668043965508236, |
| "learning_rate": 5e-05, |
| "loss": 1.6051, |
| "step": 2049 |
| }, |
| { |
| "epoch": 2.068555297913096, |
| "grad_norm": 0.1351624797762825, |
| "learning_rate": 5e-05, |
| "loss": 1.5896, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.0695634640588767, |
| "grad_norm": 0.13984443389880155, |
| "learning_rate": 5e-05, |
| "loss": 1.5854, |
| "step": 2051 |
| }, |
| { |
| "epoch": 2.0705716302046575, |
| "grad_norm": 0.16648618218296007, |
| "learning_rate": 5e-05, |
| "loss": 1.5876, |
| "step": 2052 |
| }, |
| { |
| "epoch": 2.0715797963504388, |
| "grad_norm": 0.13088261845123292, |
| "learning_rate": 5e-05, |
| "loss": 1.5868, |
| "step": 2053 |
| }, |
| { |
| "epoch": 2.0725879624962196, |
| "grad_norm": 0.13745554033609456, |
| "learning_rate": 5e-05, |
| "loss": 1.5746, |
| "step": 2054 |
| }, |
| { |
| "epoch": 2.0735961286420004, |
| "grad_norm": 0.13532858724224328, |
| "learning_rate": 5e-05, |
| "loss": 1.5877, |
| "step": 2055 |
| }, |
| { |
| "epoch": 2.074604294787781, |
| "grad_norm": 0.15855367646124624, |
| "learning_rate": 5e-05, |
| "loss": 1.5802, |
| "step": 2056 |
| }, |
| { |
| "epoch": 2.075612460933562, |
| "grad_norm": 0.12206776230979091, |
| "learning_rate": 5e-05, |
| "loss": 1.5795, |
| "step": 2057 |
| }, |
| { |
| "epoch": 2.076620627079343, |
| "grad_norm": 0.13082081367498316, |
| "learning_rate": 5e-05, |
| "loss": 1.5909, |
| "step": 2058 |
| }, |
| { |
| "epoch": 2.0776287932251236, |
| "grad_norm": 0.14240441263897932, |
| "learning_rate": 5e-05, |
| "loss": 1.5802, |
| "step": 2059 |
| }, |
| { |
| "epoch": 2.0786369593709044, |
| "grad_norm": 0.12811284544223986, |
| "learning_rate": 5e-05, |
| "loss": 1.5766, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.0796451255166852, |
| "grad_norm": 4.17170817033231, |
| "learning_rate": 5e-05, |
| "loss": 1.5797, |
| "step": 2061 |
| }, |
| { |
| "epoch": 2.080653291662466, |
| "grad_norm": 0.1721950101628573, |
| "learning_rate": 5e-05, |
| "loss": 1.587, |
| "step": 2062 |
| }, |
| { |
| "epoch": 2.081661457808247, |
| "grad_norm": 0.13185518863717982, |
| "learning_rate": 5e-05, |
| "loss": 1.5774, |
| "step": 2063 |
| }, |
| { |
| "epoch": 2.0826696239540277, |
| "grad_norm": 0.146560571379586, |
| "learning_rate": 5e-05, |
| "loss": 1.5938, |
| "step": 2064 |
| }, |
| { |
| "epoch": 2.0836777900998085, |
| "grad_norm": 0.14846207117290025, |
| "learning_rate": 5e-05, |
| "loss": 1.5758, |
| "step": 2065 |
| }, |
| { |
| "epoch": 2.0846859562455893, |
| "grad_norm": 0.1311640077841521, |
| "learning_rate": 5e-05, |
| "loss": 1.5809, |
| "step": 2066 |
| }, |
| { |
| "epoch": 2.08569412239137, |
| "grad_norm": 0.13241479202761453, |
| "learning_rate": 5e-05, |
| "loss": 1.6085, |
| "step": 2067 |
| }, |
| { |
| "epoch": 2.086702288537151, |
| "grad_norm": 0.1312992668660464, |
| "learning_rate": 5e-05, |
| "loss": 1.5771, |
| "step": 2068 |
| }, |
| { |
| "epoch": 2.0877104546829317, |
| "grad_norm": 0.1352480938058842, |
| "learning_rate": 5e-05, |
| "loss": 1.5829, |
| "step": 2069 |
| }, |
| { |
| "epoch": 2.0887186208287125, |
| "grad_norm": 0.1333541128455232, |
| "learning_rate": 5e-05, |
| "loss": 1.6096, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.0897267869744933, |
| "grad_norm": 0.14180592326583852, |
| "learning_rate": 5e-05, |
| "loss": 1.581, |
| "step": 2071 |
| }, |
| { |
| "epoch": 2.090734953120274, |
| "grad_norm": 0.13239894577254888, |
| "learning_rate": 5e-05, |
| "loss": 1.5804, |
| "step": 2072 |
| }, |
| { |
| "epoch": 2.091743119266055, |
| "grad_norm": 0.12376712319547249, |
| "learning_rate": 5e-05, |
| "loss": 1.5701, |
| "step": 2073 |
| }, |
| { |
| "epoch": 2.0927512854118357, |
| "grad_norm": 0.12850324197745347, |
| "learning_rate": 5e-05, |
| "loss": 1.5788, |
| "step": 2074 |
| }, |
| { |
| "epoch": 2.0937594515576166, |
| "grad_norm": 0.13756263568523414, |
| "learning_rate": 5e-05, |
| "loss": 1.595, |
| "step": 2075 |
| }, |
| { |
| "epoch": 2.0947676177033974, |
| "grad_norm": 0.13632894139290583, |
| "learning_rate": 5e-05, |
| "loss": 1.5792, |
| "step": 2076 |
| }, |
| { |
| "epoch": 2.095775783849178, |
| "grad_norm": 0.1377732804776425, |
| "learning_rate": 5e-05, |
| "loss": 1.5925, |
| "step": 2077 |
| }, |
| { |
| "epoch": 2.096783949994959, |
| "grad_norm": 0.1265264861915451, |
| "learning_rate": 5e-05, |
| "loss": 1.5803, |
| "step": 2078 |
| }, |
| { |
| "epoch": 2.09779211614074, |
| "grad_norm": 0.1373811074844198, |
| "learning_rate": 5e-05, |
| "loss": 1.5895, |
| "step": 2079 |
| }, |
| { |
| "epoch": 2.098800282286521, |
| "grad_norm": 0.16079023863605688, |
| "learning_rate": 5e-05, |
| "loss": 1.5761, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.099808448432302, |
| "grad_norm": 0.1287996846512774, |
| "learning_rate": 5e-05, |
| "loss": 1.5884, |
| "step": 2081 |
| }, |
| { |
| "epoch": 2.1008166145780827, |
| "grad_norm": 0.13967937800715108, |
| "learning_rate": 5e-05, |
| "loss": 1.584, |
| "step": 2082 |
| }, |
| { |
| "epoch": 2.1018247807238635, |
| "grad_norm": 0.20316101494970074, |
| "learning_rate": 5e-05, |
| "loss": 1.5841, |
| "step": 2083 |
| }, |
| { |
| "epoch": 2.1028329468696443, |
| "grad_norm": 0.1351942953234323, |
| "learning_rate": 5e-05, |
| "loss": 1.5961, |
| "step": 2084 |
| }, |
| { |
| "epoch": 2.103841113015425, |
| "grad_norm": 0.1349643106789439, |
| "learning_rate": 5e-05, |
| "loss": 1.6004, |
| "step": 2085 |
| }, |
| { |
| "epoch": 2.104849279161206, |
| "grad_norm": 0.13766524752091427, |
| "learning_rate": 5e-05, |
| "loss": 1.5861, |
| "step": 2086 |
| }, |
| { |
| "epoch": 2.1058574453069867, |
| "grad_norm": 0.14504842619366753, |
| "learning_rate": 5e-05, |
| "loss": 1.569, |
| "step": 2087 |
| }, |
| { |
| "epoch": 2.1068656114527675, |
| "grad_norm": 0.1397972647089506, |
| "learning_rate": 5e-05, |
| "loss": 1.577, |
| "step": 2088 |
| }, |
| { |
| "epoch": 2.1078737775985483, |
| "grad_norm": 0.14666718158867315, |
| "learning_rate": 5e-05, |
| "loss": 1.5732, |
| "step": 2089 |
| }, |
| { |
| "epoch": 2.108881943744329, |
| "grad_norm": 0.14788272692590154, |
| "learning_rate": 5e-05, |
| "loss": 1.5743, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.10989010989011, |
| "grad_norm": 0.1359914376560995, |
| "learning_rate": 5e-05, |
| "loss": 1.5776, |
| "step": 2091 |
| }, |
| { |
| "epoch": 2.1108982760358908, |
| "grad_norm": 0.13778006607100088, |
| "learning_rate": 5e-05, |
| "loss": 1.5765, |
| "step": 2092 |
| }, |
| { |
| "epoch": 2.1119064421816716, |
| "grad_norm": 0.1368601787177404, |
| "learning_rate": 5e-05, |
| "loss": 1.5898, |
| "step": 2093 |
| }, |
| { |
| "epoch": 2.1129146083274524, |
| "grad_norm": 0.1388394060190795, |
| "learning_rate": 5e-05, |
| "loss": 1.5891, |
| "step": 2094 |
| }, |
| { |
| "epoch": 2.113922774473233, |
| "grad_norm": 0.14575647068642428, |
| "learning_rate": 5e-05, |
| "loss": 1.5689, |
| "step": 2095 |
| }, |
| { |
| "epoch": 2.114930940619014, |
| "grad_norm": 0.14607699343754782, |
| "learning_rate": 5e-05, |
| "loss": 1.5795, |
| "step": 2096 |
| }, |
| { |
| "epoch": 2.115939106764795, |
| "grad_norm": 0.1391717279301115, |
| "learning_rate": 5e-05, |
| "loss": 1.5871, |
| "step": 2097 |
| }, |
| { |
| "epoch": 2.1169472729105756, |
| "grad_norm": 0.1325220561356466, |
| "learning_rate": 5e-05, |
| "loss": 1.5864, |
| "step": 2098 |
| }, |
| { |
| "epoch": 2.1179554390563564, |
| "grad_norm": 0.1518234847245551, |
| "learning_rate": 5e-05, |
| "loss": 1.577, |
| "step": 2099 |
| }, |
| { |
| "epoch": 2.1189636052021372, |
| "grad_norm": 0.13544712006560455, |
| "learning_rate": 5e-05, |
| "loss": 1.5939, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.119971771347918, |
| "grad_norm": 0.1349862559901404, |
| "learning_rate": 5e-05, |
| "loss": 1.6002, |
| "step": 2101 |
| }, |
| { |
| "epoch": 2.120979937493699, |
| "grad_norm": 0.13453836426341287, |
| "learning_rate": 5e-05, |
| "loss": 1.5763, |
| "step": 2102 |
| }, |
| { |
| "epoch": 2.1219881036394797, |
| "grad_norm": 2.960637713596343, |
| "learning_rate": 5e-05, |
| "loss": 1.6011, |
| "step": 2103 |
| }, |
| { |
| "epoch": 2.1229962697852605, |
| "grad_norm": 0.15059737751300753, |
| "learning_rate": 5e-05, |
| "loss": 1.5764, |
| "step": 2104 |
| }, |
| { |
| "epoch": 2.1240044359310413, |
| "grad_norm": 0.12980284644941475, |
| "learning_rate": 5e-05, |
| "loss": 1.5908, |
| "step": 2105 |
| }, |
| { |
| "epoch": 2.125012602076822, |
| "grad_norm": 0.15228697770189628, |
| "learning_rate": 5e-05, |
| "loss": 1.5925, |
| "step": 2106 |
| }, |
| { |
| "epoch": 2.126020768222603, |
| "grad_norm": 0.12991652337590776, |
| "learning_rate": 5e-05, |
| "loss": 1.5722, |
| "step": 2107 |
| }, |
| { |
| "epoch": 2.1270289343683837, |
| "grad_norm": 0.13422596335523498, |
| "learning_rate": 5e-05, |
| "loss": 1.5842, |
| "step": 2108 |
| }, |
| { |
| "epoch": 2.128037100514165, |
| "grad_norm": 0.14033677209306625, |
| "learning_rate": 5e-05, |
| "loss": 1.6051, |
| "step": 2109 |
| }, |
| { |
| "epoch": 2.1290452666599458, |
| "grad_norm": 0.13554909833181647, |
| "learning_rate": 5e-05, |
| "loss": 1.5684, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.1300534328057266, |
| "grad_norm": 0.137669348086702, |
| "learning_rate": 5e-05, |
| "loss": 1.5865, |
| "step": 2111 |
| }, |
| { |
| "epoch": 2.1310615989515074, |
| "grad_norm": 0.12888124635141174, |
| "learning_rate": 5e-05, |
| "loss": 1.5723, |
| "step": 2112 |
| }, |
| { |
| "epoch": 2.132069765097288, |
| "grad_norm": 0.14405589627337975, |
| "learning_rate": 5e-05, |
| "loss": 1.5749, |
| "step": 2113 |
| }, |
| { |
| "epoch": 2.133077931243069, |
| "grad_norm": 0.13261819566358374, |
| "learning_rate": 5e-05, |
| "loss": 1.5961, |
| "step": 2114 |
| }, |
| { |
| "epoch": 2.13408609738885, |
| "grad_norm": 0.15633574445165777, |
| "learning_rate": 5e-05, |
| "loss": 1.5877, |
| "step": 2115 |
| }, |
| { |
| "epoch": 2.1350942635346306, |
| "grad_norm": 0.143612355437984, |
| "learning_rate": 5e-05, |
| "loss": 1.5826, |
| "step": 2116 |
| }, |
| { |
| "epoch": 2.1361024296804114, |
| "grad_norm": 0.13224621619113583, |
| "learning_rate": 5e-05, |
| "loss": 1.5885, |
| "step": 2117 |
| }, |
| { |
| "epoch": 2.1371105958261922, |
| "grad_norm": 0.1449013360115979, |
| "learning_rate": 5e-05, |
| "loss": 1.5805, |
| "step": 2118 |
| }, |
| { |
| "epoch": 2.138118761971973, |
| "grad_norm": 0.13555979751611189, |
| "learning_rate": 5e-05, |
| "loss": 1.5847, |
| "step": 2119 |
| }, |
| { |
| "epoch": 2.139126928117754, |
| "grad_norm": 0.14961995183462212, |
| "learning_rate": 5e-05, |
| "loss": 1.5903, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.1401350942635347, |
| "grad_norm": 0.13816884057531062, |
| "learning_rate": 5e-05, |
| "loss": 1.5834, |
| "step": 2121 |
| }, |
| { |
| "epoch": 2.1411432604093155, |
| "grad_norm": 0.12696546427922376, |
| "learning_rate": 5e-05, |
| "loss": 1.5897, |
| "step": 2122 |
| }, |
| { |
| "epoch": 2.1421514265550963, |
| "grad_norm": 0.1364021704158851, |
| "learning_rate": 5e-05, |
| "loss": 1.5808, |
| "step": 2123 |
| }, |
| { |
| "epoch": 2.143159592700877, |
| "grad_norm": 0.32059154960425695, |
| "learning_rate": 5e-05, |
| "loss": 1.5731, |
| "step": 2124 |
| }, |
| { |
| "epoch": 2.144167758846658, |
| "grad_norm": 0.1286960221199563, |
| "learning_rate": 5e-05, |
| "loss": 1.5722, |
| "step": 2125 |
| }, |
| { |
| "epoch": 2.1451759249924387, |
| "grad_norm": 0.13168440073637036, |
| "learning_rate": 5e-05, |
| "loss": 1.5784, |
| "step": 2126 |
| }, |
| { |
| "epoch": 2.1461840911382195, |
| "grad_norm": 0.12958498432639048, |
| "learning_rate": 5e-05, |
| "loss": 1.5662, |
| "step": 2127 |
| }, |
| { |
| "epoch": 2.1471922572840003, |
| "grad_norm": 0.12674408462329906, |
| "learning_rate": 5e-05, |
| "loss": 1.576, |
| "step": 2128 |
| }, |
| { |
| "epoch": 2.148200423429781, |
| "grad_norm": 0.1285194149227695, |
| "learning_rate": 5e-05, |
| "loss": 1.5878, |
| "step": 2129 |
| }, |
| { |
| "epoch": 2.149208589575562, |
| "grad_norm": 0.13258417774564543, |
| "learning_rate": 5e-05, |
| "loss": 1.5715, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.1502167557213427, |
| "grad_norm": 0.13196317225101575, |
| "learning_rate": 5e-05, |
| "loss": 1.5752, |
| "step": 2131 |
| }, |
| { |
| "epoch": 2.1512249218671236, |
| "grad_norm": 0.1327938664530659, |
| "learning_rate": 5e-05, |
| "loss": 1.5596, |
| "step": 2132 |
| }, |
| { |
| "epoch": 2.1522330880129044, |
| "grad_norm": 0.12022858158050774, |
| "learning_rate": 5e-05, |
| "loss": 1.607, |
| "step": 2133 |
| }, |
| { |
| "epoch": 2.153241254158685, |
| "grad_norm": 0.13503719807505016, |
| "learning_rate": 5e-05, |
| "loss": 1.5919, |
| "step": 2134 |
| }, |
| { |
| "epoch": 2.154249420304466, |
| "grad_norm": 0.13245288331895266, |
| "learning_rate": 5e-05, |
| "loss": 1.5653, |
| "step": 2135 |
| }, |
| { |
| "epoch": 2.1552575864502472, |
| "grad_norm": 0.1312546045670137, |
| "learning_rate": 5e-05, |
| "loss": 1.5931, |
| "step": 2136 |
| }, |
| { |
| "epoch": 2.1562657525960276, |
| "grad_norm": 0.13183928326778555, |
| "learning_rate": 5e-05, |
| "loss": 1.5893, |
| "step": 2137 |
| }, |
| { |
| "epoch": 2.157273918741809, |
| "grad_norm": 0.1317393731662259, |
| "learning_rate": 5e-05, |
| "loss": 1.5663, |
| "step": 2138 |
| }, |
| { |
| "epoch": 2.1582820848875897, |
| "grad_norm": 0.13923135753150415, |
| "learning_rate": 5e-05, |
| "loss": 1.6045, |
| "step": 2139 |
| }, |
| { |
| "epoch": 2.1592902510333705, |
| "grad_norm": 0.13848524773596155, |
| "learning_rate": 5e-05, |
| "loss": 1.5747, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.1602984171791513, |
| "grad_norm": 0.13263746512720395, |
| "learning_rate": 5e-05, |
| "loss": 1.5734, |
| "step": 2141 |
| }, |
| { |
| "epoch": 2.161306583324932, |
| "grad_norm": 0.13110665556573656, |
| "learning_rate": 5e-05, |
| "loss": 1.5906, |
| "step": 2142 |
| }, |
| { |
| "epoch": 2.162314749470713, |
| "grad_norm": 0.13151151142333048, |
| "learning_rate": 5e-05, |
| "loss": 1.5988, |
| "step": 2143 |
| }, |
| { |
| "epoch": 2.1633229156164937, |
| "grad_norm": 0.13200152470504317, |
| "learning_rate": 5e-05, |
| "loss": 1.5876, |
| "step": 2144 |
| }, |
| { |
| "epoch": 2.1643310817622745, |
| "grad_norm": 0.13533006578271542, |
| "learning_rate": 5e-05, |
| "loss": 1.5894, |
| "step": 2145 |
| }, |
| { |
| "epoch": 2.1653392479080553, |
| "grad_norm": 0.1284499300796628, |
| "learning_rate": 5e-05, |
| "loss": 1.5815, |
| "step": 2146 |
| }, |
| { |
| "epoch": 2.166347414053836, |
| "grad_norm": 0.13632214074228208, |
| "learning_rate": 5e-05, |
| "loss": 1.5804, |
| "step": 2147 |
| }, |
| { |
| "epoch": 2.167355580199617, |
| "grad_norm": 0.12791435267801157, |
| "learning_rate": 5e-05, |
| "loss": 1.5779, |
| "step": 2148 |
| }, |
| { |
| "epoch": 2.1683637463453977, |
| "grad_norm": 0.1473984814477361, |
| "learning_rate": 5e-05, |
| "loss": 1.5879, |
| "step": 2149 |
| }, |
| { |
| "epoch": 2.1693719124911786, |
| "grad_norm": 0.13749322099010552, |
| "learning_rate": 5e-05, |
| "loss": 1.5867, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.1703800786369594, |
| "grad_norm": 0.1461564315809883, |
| "learning_rate": 5e-05, |
| "loss": 1.5771, |
| "step": 2151 |
| }, |
| { |
| "epoch": 2.17138824478274, |
| "grad_norm": 0.1458481299707068, |
| "learning_rate": 5e-05, |
| "loss": 1.5897, |
| "step": 2152 |
| }, |
| { |
| "epoch": 2.172396410928521, |
| "grad_norm": 0.1498709317955309, |
| "learning_rate": 5e-05, |
| "loss": 1.5626, |
| "step": 2153 |
| }, |
| { |
| "epoch": 2.173404577074302, |
| "grad_norm": 0.1492508722779085, |
| "learning_rate": 5e-05, |
| "loss": 1.6004, |
| "step": 2154 |
| }, |
| { |
| "epoch": 2.1744127432200826, |
| "grad_norm": 0.13424273422951521, |
| "learning_rate": 5e-05, |
| "loss": 1.579, |
| "step": 2155 |
| }, |
| { |
| "epoch": 2.1754209093658634, |
| "grad_norm": 0.13508285975796092, |
| "learning_rate": 5e-05, |
| "loss": 1.572, |
| "step": 2156 |
| }, |
| { |
| "epoch": 2.176429075511644, |
| "grad_norm": 0.14287579327034905, |
| "learning_rate": 5e-05, |
| "loss": 1.565, |
| "step": 2157 |
| }, |
| { |
| "epoch": 2.177437241657425, |
| "grad_norm": 0.15647576630368834, |
| "learning_rate": 5e-05, |
| "loss": 1.601, |
| "step": 2158 |
| }, |
| { |
| "epoch": 2.178445407803206, |
| "grad_norm": 0.13642422731103188, |
| "learning_rate": 5e-05, |
| "loss": 1.5608, |
| "step": 2159 |
| }, |
| { |
| "epoch": 2.1794535739489866, |
| "grad_norm": 0.15150847011968074, |
| "learning_rate": 5e-05, |
| "loss": 1.58, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.1804617400947675, |
| "grad_norm": 0.13834478868061467, |
| "learning_rate": 5e-05, |
| "loss": 1.5742, |
| "step": 2161 |
| }, |
| { |
| "epoch": 2.1814699062405483, |
| "grad_norm": 0.1329070692603562, |
| "learning_rate": 5e-05, |
| "loss": 1.5851, |
| "step": 2162 |
| }, |
| { |
| "epoch": 2.182478072386329, |
| "grad_norm": 0.1442442344587271, |
| "learning_rate": 5e-05, |
| "loss": 1.5689, |
| "step": 2163 |
| }, |
| { |
| "epoch": 2.18348623853211, |
| "grad_norm": 0.13450680290698083, |
| "learning_rate": 5e-05, |
| "loss": 1.5923, |
| "step": 2164 |
| }, |
| { |
| "epoch": 2.184494404677891, |
| "grad_norm": 0.13365578601135725, |
| "learning_rate": 5e-05, |
| "loss": 1.586, |
| "step": 2165 |
| }, |
| { |
| "epoch": 2.185502570823672, |
| "grad_norm": 0.13122827567534617, |
| "learning_rate": 5e-05, |
| "loss": 1.5887, |
| "step": 2166 |
| }, |
| { |
| "epoch": 2.1865107369694528, |
| "grad_norm": 0.1324478292442316, |
| "learning_rate": 5e-05, |
| "loss": 1.5651, |
| "step": 2167 |
| }, |
| { |
| "epoch": 2.1875189031152336, |
| "grad_norm": 0.13095148860474917, |
| "learning_rate": 5e-05, |
| "loss": 1.57, |
| "step": 2168 |
| }, |
| { |
| "epoch": 2.1885270692610144, |
| "grad_norm": 0.13487743576121505, |
| "learning_rate": 5e-05, |
| "loss": 1.5847, |
| "step": 2169 |
| }, |
| { |
| "epoch": 2.189535235406795, |
| "grad_norm": 0.13851027435124633, |
| "learning_rate": 5e-05, |
| "loss": 1.5942, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.190543401552576, |
| "grad_norm": 0.13806832538791874, |
| "learning_rate": 5e-05, |
| "loss": 1.5841, |
| "step": 2171 |
| }, |
| { |
| "epoch": 2.191551567698357, |
| "grad_norm": 0.136314059375051, |
| "learning_rate": 5e-05, |
| "loss": 1.567, |
| "step": 2172 |
| }, |
| { |
| "epoch": 2.1925597338441376, |
| "grad_norm": 0.13767914337656698, |
| "learning_rate": 5e-05, |
| "loss": 1.5817, |
| "step": 2173 |
| }, |
| { |
| "epoch": 2.1935678999899184, |
| "grad_norm": 0.13590218518992947, |
| "learning_rate": 5e-05, |
| "loss": 1.5909, |
| "step": 2174 |
| }, |
| { |
| "epoch": 2.194576066135699, |
| "grad_norm": 0.140979551198686, |
| "learning_rate": 5e-05, |
| "loss": 1.5849, |
| "step": 2175 |
| }, |
| { |
| "epoch": 2.19558423228148, |
| "grad_norm": 0.1351539370773555, |
| "learning_rate": 5e-05, |
| "loss": 1.5887, |
| "step": 2176 |
| }, |
| { |
| "epoch": 2.196592398427261, |
| "grad_norm": 0.13522254246517385, |
| "learning_rate": 5e-05, |
| "loss": 1.5631, |
| "step": 2177 |
| }, |
| { |
| "epoch": 2.1976005645730416, |
| "grad_norm": 0.1341256457939, |
| "learning_rate": 5e-05, |
| "loss": 1.5681, |
| "step": 2178 |
| }, |
| { |
| "epoch": 2.1986087307188225, |
| "grad_norm": 0.182496231321001, |
| "learning_rate": 5e-05, |
| "loss": 1.5728, |
| "step": 2179 |
| }, |
| { |
| "epoch": 2.1996168968646033, |
| "grad_norm": 0.1301106186108056, |
| "learning_rate": 5e-05, |
| "loss": 1.5915, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.200625063010384, |
| "grad_norm": 0.24496089985012215, |
| "learning_rate": 5e-05, |
| "loss": 1.5783, |
| "step": 2181 |
| }, |
| { |
| "epoch": 2.201633229156165, |
| "grad_norm": 0.13970245980834814, |
| "learning_rate": 5e-05, |
| "loss": 1.5774, |
| "step": 2182 |
| }, |
| { |
| "epoch": 2.2026413953019457, |
| "grad_norm": 0.13368084450749423, |
| "learning_rate": 5e-05, |
| "loss": 1.5732, |
| "step": 2183 |
| }, |
| { |
| "epoch": 2.2036495614477265, |
| "grad_norm": 0.14027167656602788, |
| "learning_rate": 5e-05, |
| "loss": 1.5801, |
| "step": 2184 |
| }, |
| { |
| "epoch": 2.2046577275935073, |
| "grad_norm": 0.12608760050047663, |
| "learning_rate": 5e-05, |
| "loss": 1.565, |
| "step": 2185 |
| }, |
| { |
| "epoch": 2.205665893739288, |
| "grad_norm": 0.1400159673728019, |
| "learning_rate": 5e-05, |
| "loss": 1.591, |
| "step": 2186 |
| }, |
| { |
| "epoch": 2.206674059885069, |
| "grad_norm": 0.1280770249793632, |
| "learning_rate": 5e-05, |
| "loss": 1.5874, |
| "step": 2187 |
| }, |
| { |
| "epoch": 2.2076822260308497, |
| "grad_norm": 0.13005190751973192, |
| "learning_rate": 5e-05, |
| "loss": 1.5717, |
| "step": 2188 |
| }, |
| { |
| "epoch": 2.2086903921766305, |
| "grad_norm": 0.13149320702928272, |
| "learning_rate": 5e-05, |
| "loss": 1.5892, |
| "step": 2189 |
| }, |
| { |
| "epoch": 2.2096985583224114, |
| "grad_norm": 0.13944948801880389, |
| "learning_rate": 5e-05, |
| "loss": 1.5683, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.210706724468192, |
| "grad_norm": 0.1347679049234339, |
| "learning_rate": 5e-05, |
| "loss": 1.5784, |
| "step": 2191 |
| }, |
| { |
| "epoch": 2.2117148906139734, |
| "grad_norm": 0.13761469202598953, |
| "learning_rate": 5e-05, |
| "loss": 1.5801, |
| "step": 2192 |
| }, |
| { |
| "epoch": 2.212723056759754, |
| "grad_norm": 0.1359688017286691, |
| "learning_rate": 5e-05, |
| "loss": 1.5811, |
| "step": 2193 |
| }, |
| { |
| "epoch": 2.213731222905535, |
| "grad_norm": 0.13710284930832534, |
| "learning_rate": 5e-05, |
| "loss": 1.578, |
| "step": 2194 |
| }, |
| { |
| "epoch": 2.214739389051316, |
| "grad_norm": 0.13307043588957998, |
| "learning_rate": 5e-05, |
| "loss": 1.5851, |
| "step": 2195 |
| }, |
| { |
| "epoch": 2.2157475551970967, |
| "grad_norm": 0.13932056597633424, |
| "learning_rate": 5e-05, |
| "loss": 1.5768, |
| "step": 2196 |
| }, |
| { |
| "epoch": 2.2167557213428775, |
| "grad_norm": 0.14029489437072842, |
| "learning_rate": 5e-05, |
| "loss": 1.5786, |
| "step": 2197 |
| }, |
| { |
| "epoch": 2.2177638874886583, |
| "grad_norm": 0.12511079910671596, |
| "learning_rate": 5e-05, |
| "loss": 1.5635, |
| "step": 2198 |
| }, |
| { |
| "epoch": 2.218772053634439, |
| "grad_norm": 0.14343716835451112, |
| "learning_rate": 5e-05, |
| "loss": 1.5872, |
| "step": 2199 |
| }, |
| { |
| "epoch": 2.21978021978022, |
| "grad_norm": 0.2178821073538235, |
| "learning_rate": 5e-05, |
| "loss": 1.5571, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.2207883859260007, |
| "grad_norm": 0.13920646180211935, |
| "learning_rate": 5e-05, |
| "loss": 1.5638, |
| "step": 2201 |
| }, |
| { |
| "epoch": 2.2217965520717815, |
| "grad_norm": 0.14362100346423048, |
| "learning_rate": 5e-05, |
| "loss": 1.5828, |
| "step": 2202 |
| }, |
| { |
| "epoch": 2.2228047182175623, |
| "grad_norm": 0.14703335119452382, |
| "learning_rate": 5e-05, |
| "loss": 1.5693, |
| "step": 2203 |
| }, |
| { |
| "epoch": 2.223812884363343, |
| "grad_norm": 0.14195761111941657, |
| "learning_rate": 5e-05, |
| "loss": 1.5727, |
| "step": 2204 |
| }, |
| { |
| "epoch": 2.224821050509124, |
| "grad_norm": 0.13970507344162816, |
| "learning_rate": 5e-05, |
| "loss": 1.5999, |
| "step": 2205 |
| }, |
| { |
| "epoch": 2.2258292166549047, |
| "grad_norm": 0.14852447157811244, |
| "learning_rate": 5e-05, |
| "loss": 1.5775, |
| "step": 2206 |
| }, |
| { |
| "epoch": 2.2268373828006855, |
| "grad_norm": 0.1363828903790539, |
| "learning_rate": 5e-05, |
| "loss": 1.583, |
| "step": 2207 |
| }, |
| { |
| "epoch": 2.2278455489464664, |
| "grad_norm": 0.1481544737328289, |
| "learning_rate": 5e-05, |
| "loss": 1.5798, |
| "step": 2208 |
| }, |
| { |
| "epoch": 2.228853715092247, |
| "grad_norm": 0.1374534164490761, |
| "learning_rate": 5e-05, |
| "loss": 1.5645, |
| "step": 2209 |
| }, |
| { |
| "epoch": 2.229861881238028, |
| "grad_norm": 0.163200552421793, |
| "learning_rate": 5e-05, |
| "loss": 1.5766, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.230870047383809, |
| "grad_norm": 0.12232954664324697, |
| "learning_rate": 5e-05, |
| "loss": 1.5907, |
| "step": 2211 |
| }, |
| { |
| "epoch": 2.2318782135295896, |
| "grad_norm": 0.13950948739812444, |
| "learning_rate": 5e-05, |
| "loss": 1.5693, |
| "step": 2212 |
| }, |
| { |
| "epoch": 2.2328863796753704, |
| "grad_norm": 0.14155762951252868, |
| "learning_rate": 5e-05, |
| "loss": 1.5744, |
| "step": 2213 |
| }, |
| { |
| "epoch": 2.233894545821151, |
| "grad_norm": 0.1472663258665249, |
| "learning_rate": 5e-05, |
| "loss": 1.5624, |
| "step": 2214 |
| }, |
| { |
| "epoch": 2.234902711966932, |
| "grad_norm": 0.1395352568904782, |
| "learning_rate": 5e-05, |
| "loss": 1.5837, |
| "step": 2215 |
| }, |
| { |
| "epoch": 2.235910878112713, |
| "grad_norm": 0.14155823128431386, |
| "learning_rate": 5e-05, |
| "loss": 1.5628, |
| "step": 2216 |
| }, |
| { |
| "epoch": 2.2369190442584936, |
| "grad_norm": 0.1252450124685863, |
| "learning_rate": 5e-05, |
| "loss": 1.5732, |
| "step": 2217 |
| }, |
| { |
| "epoch": 2.2379272104042744, |
| "grad_norm": 0.14270458613439232, |
| "learning_rate": 5e-05, |
| "loss": 1.5672, |
| "step": 2218 |
| }, |
| { |
| "epoch": 2.2389353765500553, |
| "grad_norm": 0.1327344509151647, |
| "learning_rate": 5e-05, |
| "loss": 1.5768, |
| "step": 2219 |
| }, |
| { |
| "epoch": 2.239943542695836, |
| "grad_norm": 0.14289844364572002, |
| "learning_rate": 5e-05, |
| "loss": 1.5655, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.2409517088416173, |
| "grad_norm": 0.13075263019525016, |
| "learning_rate": 5e-05, |
| "loss": 1.5631, |
| "step": 2221 |
| }, |
| { |
| "epoch": 2.241959874987398, |
| "grad_norm": 0.1353210741302557, |
| "learning_rate": 5e-05, |
| "loss": 1.5826, |
| "step": 2222 |
| }, |
| { |
| "epoch": 2.242968041133179, |
| "grad_norm": 0.12712135635024222, |
| "learning_rate": 5e-05, |
| "loss": 1.5672, |
| "step": 2223 |
| }, |
| { |
| "epoch": 2.2439762072789597, |
| "grad_norm": 0.13005000485849497, |
| "learning_rate": 5e-05, |
| "loss": 1.5779, |
| "step": 2224 |
| }, |
| { |
| "epoch": 2.2449843734247406, |
| "grad_norm": 0.13477545800901453, |
| "learning_rate": 5e-05, |
| "loss": 1.5872, |
| "step": 2225 |
| }, |
| { |
| "epoch": 2.2459925395705214, |
| "grad_norm": 0.1455016663994394, |
| "learning_rate": 5e-05, |
| "loss": 1.5901, |
| "step": 2226 |
| }, |
| { |
| "epoch": 2.247000705716302, |
| "grad_norm": 0.13429071804096004, |
| "learning_rate": 5e-05, |
| "loss": 1.5639, |
| "step": 2227 |
| }, |
| { |
| "epoch": 2.248008871862083, |
| "grad_norm": 0.13385857234037593, |
| "learning_rate": 5e-05, |
| "loss": 1.5959, |
| "step": 2228 |
| }, |
| { |
| "epoch": 2.249017038007864, |
| "grad_norm": 0.13026162883941678, |
| "learning_rate": 5e-05, |
| "loss": 1.5955, |
| "step": 2229 |
| }, |
| { |
| "epoch": 2.2500252041536446, |
| "grad_norm": 0.15064310024726288, |
| "learning_rate": 5e-05, |
| "loss": 1.5859, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.2510333702994254, |
| "grad_norm": 0.13267969070820512, |
| "learning_rate": 5e-05, |
| "loss": 1.5738, |
| "step": 2231 |
| }, |
| { |
| "epoch": 2.252041536445206, |
| "grad_norm": 0.12875095223195912, |
| "learning_rate": 5e-05, |
| "loss": 1.5912, |
| "step": 2232 |
| }, |
| { |
| "epoch": 2.253049702590987, |
| "grad_norm": 0.1496068264954723, |
| "learning_rate": 5e-05, |
| "loss": 1.576, |
| "step": 2233 |
| }, |
| { |
| "epoch": 2.254057868736768, |
| "grad_norm": 0.13575255873767209, |
| "learning_rate": 5e-05, |
| "loss": 1.5781, |
| "step": 2234 |
| }, |
| { |
| "epoch": 2.2550660348825486, |
| "grad_norm": 0.1418476006839431, |
| "learning_rate": 5e-05, |
| "loss": 1.5879, |
| "step": 2235 |
| }, |
| { |
| "epoch": 2.2560742010283295, |
| "grad_norm": 0.1390998352061481, |
| "learning_rate": 5e-05, |
| "loss": 1.5647, |
| "step": 2236 |
| }, |
| { |
| "epoch": 2.2570823671741103, |
| "grad_norm": 0.14615130491475525, |
| "learning_rate": 5e-05, |
| "loss": 1.584, |
| "step": 2237 |
| }, |
| { |
| "epoch": 2.258090533319891, |
| "grad_norm": 0.12856984501618252, |
| "learning_rate": 5e-05, |
| "loss": 1.5756, |
| "step": 2238 |
| }, |
| { |
| "epoch": 2.259098699465672, |
| "grad_norm": 0.13665089453090834, |
| "learning_rate": 5e-05, |
| "loss": 1.5582, |
| "step": 2239 |
| }, |
| { |
| "epoch": 2.2601068656114527, |
| "grad_norm": 1.3474519747649263, |
| "learning_rate": 5e-05, |
| "loss": 1.5816, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.2611150317572335, |
| "grad_norm": 0.1448758915526553, |
| "learning_rate": 5e-05, |
| "loss": 1.5757, |
| "step": 2241 |
| }, |
| { |
| "epoch": 2.2621231979030143, |
| "grad_norm": 0.14092104981267023, |
| "learning_rate": 5e-05, |
| "loss": 1.5724, |
| "step": 2242 |
| }, |
| { |
| "epoch": 2.263131364048795, |
| "grad_norm": 0.1509143463685949, |
| "learning_rate": 5e-05, |
| "loss": 1.5728, |
| "step": 2243 |
| }, |
| { |
| "epoch": 2.264139530194576, |
| "grad_norm": 0.14023101809057317, |
| "learning_rate": 5e-05, |
| "loss": 1.5741, |
| "step": 2244 |
| }, |
| { |
| "epoch": 2.2651476963403567, |
| "grad_norm": 0.1484884492305928, |
| "learning_rate": 5e-05, |
| "loss": 1.5545, |
| "step": 2245 |
| }, |
| { |
| "epoch": 2.2661558624861375, |
| "grad_norm": 0.14920882778947953, |
| "learning_rate": 5e-05, |
| "loss": 1.5876, |
| "step": 2246 |
| }, |
| { |
| "epoch": 2.2671640286319183, |
| "grad_norm": 0.1632290484686048, |
| "learning_rate": 5e-05, |
| "loss": 1.5732, |
| "step": 2247 |
| }, |
| { |
| "epoch": 2.2681721947776996, |
| "grad_norm": 0.14978755710589833, |
| "learning_rate": 5e-05, |
| "loss": 1.6006, |
| "step": 2248 |
| }, |
| { |
| "epoch": 2.26918036092348, |
| "grad_norm": 0.14689789735003894, |
| "learning_rate": 5e-05, |
| "loss": 1.5757, |
| "step": 2249 |
| }, |
| { |
| "epoch": 2.270188527069261, |
| "grad_norm": 0.15144881512451783, |
| "learning_rate": 5e-05, |
| "loss": 1.5879, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.271196693215042, |
| "grad_norm": 0.15866554211155684, |
| "learning_rate": 5e-05, |
| "loss": 1.567, |
| "step": 2251 |
| }, |
| { |
| "epoch": 2.272204859360823, |
| "grad_norm": 0.14616524485747734, |
| "learning_rate": 5e-05, |
| "loss": 1.5915, |
| "step": 2252 |
| }, |
| { |
| "epoch": 2.2732130255066036, |
| "grad_norm": 0.13541258512111323, |
| "learning_rate": 5e-05, |
| "loss": 1.5909, |
| "step": 2253 |
| }, |
| { |
| "epoch": 2.2742211916523845, |
| "grad_norm": 0.14557208487414966, |
| "learning_rate": 5e-05, |
| "loss": 1.5753, |
| "step": 2254 |
| }, |
| { |
| "epoch": 2.2752293577981653, |
| "grad_norm": 0.14355832149078251, |
| "learning_rate": 5e-05, |
| "loss": 1.5777, |
| "step": 2255 |
| }, |
| { |
| "epoch": 2.276237523943946, |
| "grad_norm": 0.15534461022920001, |
| "learning_rate": 5e-05, |
| "loss": 1.582, |
| "step": 2256 |
| }, |
| { |
| "epoch": 2.277245690089727, |
| "grad_norm": 0.14040643758465315, |
| "learning_rate": 5e-05, |
| "loss": 1.5937, |
| "step": 2257 |
| }, |
| { |
| "epoch": 2.2782538562355077, |
| "grad_norm": 0.14217759030812158, |
| "learning_rate": 5e-05, |
| "loss": 1.5939, |
| "step": 2258 |
| }, |
| { |
| "epoch": 2.2792620223812885, |
| "grad_norm": 0.15321054452281707, |
| "learning_rate": 5e-05, |
| "loss": 1.5897, |
| "step": 2259 |
| }, |
| { |
| "epoch": 2.2802701885270693, |
| "grad_norm": 0.13411534025001054, |
| "learning_rate": 5e-05, |
| "loss": 1.571, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.28127835467285, |
| "grad_norm": 0.1264814479446074, |
| "learning_rate": 5e-05, |
| "loss": 1.5781, |
| "step": 2261 |
| }, |
| { |
| "epoch": 2.282286520818631, |
| "grad_norm": 0.13892914007499751, |
| "learning_rate": 5e-05, |
| "loss": 1.5753, |
| "step": 2262 |
| }, |
| { |
| "epoch": 2.2832946869644117, |
| "grad_norm": 0.13891896623161473, |
| "learning_rate": 5e-05, |
| "loss": 1.5754, |
| "step": 2263 |
| }, |
| { |
| "epoch": 2.2843028531101925, |
| "grad_norm": 0.13986597276910606, |
| "learning_rate": 5e-05, |
| "loss": 1.5846, |
| "step": 2264 |
| }, |
| { |
| "epoch": 2.2853110192559734, |
| "grad_norm": 0.1313651130008528, |
| "learning_rate": 5e-05, |
| "loss": 1.583, |
| "step": 2265 |
| }, |
| { |
| "epoch": 2.286319185401754, |
| "grad_norm": 0.14180149149457943, |
| "learning_rate": 5e-05, |
| "loss": 1.6002, |
| "step": 2266 |
| }, |
| { |
| "epoch": 2.287327351547535, |
| "grad_norm": 0.13664033183968566, |
| "learning_rate": 5e-05, |
| "loss": 1.565, |
| "step": 2267 |
| }, |
| { |
| "epoch": 2.288335517693316, |
| "grad_norm": 0.13106617230412848, |
| "learning_rate": 5e-05, |
| "loss": 1.5653, |
| "step": 2268 |
| }, |
| { |
| "epoch": 2.2893436838390966, |
| "grad_norm": 0.14110105058274033, |
| "learning_rate": 5e-05, |
| "loss": 1.5739, |
| "step": 2269 |
| }, |
| { |
| "epoch": 2.2903518499848774, |
| "grad_norm": 0.13246989225005226, |
| "learning_rate": 5e-05, |
| "loss": 1.5812, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.291360016130658, |
| "grad_norm": 0.12690842300537472, |
| "learning_rate": 5e-05, |
| "loss": 1.6001, |
| "step": 2271 |
| }, |
| { |
| "epoch": 2.292368182276439, |
| "grad_norm": 0.14061559722453193, |
| "learning_rate": 5e-05, |
| "loss": 1.5666, |
| "step": 2272 |
| }, |
| { |
| "epoch": 2.29337634842222, |
| "grad_norm": 0.13319571985220693, |
| "learning_rate": 5e-05, |
| "loss": 1.5843, |
| "step": 2273 |
| }, |
| { |
| "epoch": 2.2943845145680006, |
| "grad_norm": 0.12788817214469078, |
| "learning_rate": 5e-05, |
| "loss": 1.5789, |
| "step": 2274 |
| }, |
| { |
| "epoch": 2.2953926807137814, |
| "grad_norm": 0.12997493129548418, |
| "learning_rate": 5e-05, |
| "loss": 1.5625, |
| "step": 2275 |
| }, |
| { |
| "epoch": 2.2964008468595623, |
| "grad_norm": 0.13560915101486962, |
| "learning_rate": 5e-05, |
| "loss": 1.572, |
| "step": 2276 |
| }, |
| { |
| "epoch": 2.2974090130053435, |
| "grad_norm": 0.13228064336220804, |
| "learning_rate": 5e-05, |
| "loss": 1.5789, |
| "step": 2277 |
| }, |
| { |
| "epoch": 2.298417179151124, |
| "grad_norm": 0.1425843624741558, |
| "learning_rate": 5e-05, |
| "loss": 1.6008, |
| "step": 2278 |
| }, |
| { |
| "epoch": 2.299425345296905, |
| "grad_norm": 0.14131677728244713, |
| "learning_rate": 5e-05, |
| "loss": 1.5733, |
| "step": 2279 |
| }, |
| { |
| "epoch": 2.300433511442686, |
| "grad_norm": 0.14384822881880258, |
| "learning_rate": 5e-05, |
| "loss": 1.5723, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.3014416775884667, |
| "grad_norm": 0.13621106913387487, |
| "learning_rate": 5e-05, |
| "loss": 1.5816, |
| "step": 2281 |
| }, |
| { |
| "epoch": 2.3024498437342475, |
| "grad_norm": 0.18150249302366323, |
| "learning_rate": 5e-05, |
| "loss": 1.5868, |
| "step": 2282 |
| }, |
| { |
| "epoch": 2.3034580098800284, |
| "grad_norm": 0.13024559329255764, |
| "learning_rate": 5e-05, |
| "loss": 1.561, |
| "step": 2283 |
| }, |
| { |
| "epoch": 2.304466176025809, |
| "grad_norm": 0.14221434659857535, |
| "learning_rate": 5e-05, |
| "loss": 1.5815, |
| "step": 2284 |
| }, |
| { |
| "epoch": 2.30547434217159, |
| "grad_norm": 0.1243982975878934, |
| "learning_rate": 5e-05, |
| "loss": 1.5692, |
| "step": 2285 |
| }, |
| { |
| "epoch": 2.306482508317371, |
| "grad_norm": 0.14314562098995504, |
| "learning_rate": 5e-05, |
| "loss": 1.5928, |
| "step": 2286 |
| }, |
| { |
| "epoch": 2.3074906744631516, |
| "grad_norm": 0.1343242688971022, |
| "learning_rate": 5e-05, |
| "loss": 1.5871, |
| "step": 2287 |
| }, |
| { |
| "epoch": 2.3084988406089324, |
| "grad_norm": 0.1321173628116035, |
| "learning_rate": 5e-05, |
| "loss": 1.5944, |
| "step": 2288 |
| }, |
| { |
| "epoch": 2.309507006754713, |
| "grad_norm": 0.13453862969642263, |
| "learning_rate": 5e-05, |
| "loss": 1.5907, |
| "step": 2289 |
| }, |
| { |
| "epoch": 2.310515172900494, |
| "grad_norm": 0.13687930001775503, |
| "learning_rate": 5e-05, |
| "loss": 1.5894, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.311523339046275, |
| "grad_norm": 0.13215284755773643, |
| "learning_rate": 5e-05, |
| "loss": 1.5834, |
| "step": 2291 |
| }, |
| { |
| "epoch": 2.3125315051920556, |
| "grad_norm": 0.13579965157318924, |
| "learning_rate": 5e-05, |
| "loss": 1.5779, |
| "step": 2292 |
| }, |
| { |
| "epoch": 2.3135396713378364, |
| "grad_norm": 0.14328135334944211, |
| "learning_rate": 5e-05, |
| "loss": 1.5758, |
| "step": 2293 |
| }, |
| { |
| "epoch": 2.3145478374836173, |
| "grad_norm": 0.1325118478849696, |
| "learning_rate": 5e-05, |
| "loss": 1.5829, |
| "step": 2294 |
| }, |
| { |
| "epoch": 2.315556003629398, |
| "grad_norm": 0.1380410269853047, |
| "learning_rate": 5e-05, |
| "loss": 1.5791, |
| "step": 2295 |
| }, |
| { |
| "epoch": 2.316564169775179, |
| "grad_norm": 0.13875953603401303, |
| "learning_rate": 5e-05, |
| "loss": 1.5506, |
| "step": 2296 |
| }, |
| { |
| "epoch": 2.3175723359209597, |
| "grad_norm": 0.13467458290717993, |
| "learning_rate": 5e-05, |
| "loss": 1.5693, |
| "step": 2297 |
| }, |
| { |
| "epoch": 2.3185805020667405, |
| "grad_norm": 0.140163645245547, |
| "learning_rate": 5e-05, |
| "loss": 1.5794, |
| "step": 2298 |
| }, |
| { |
| "epoch": 2.3195886682125213, |
| "grad_norm": 0.1453337264089796, |
| "learning_rate": 5e-05, |
| "loss": 1.5605, |
| "step": 2299 |
| }, |
| { |
| "epoch": 2.320596834358302, |
| "grad_norm": 0.13839240934781297, |
| "learning_rate": 5e-05, |
| "loss": 1.5904, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.321605000504083, |
| "grad_norm": 0.14108100710419977, |
| "learning_rate": 5e-05, |
| "loss": 1.5721, |
| "step": 2301 |
| }, |
| { |
| "epoch": 2.3226131666498637, |
| "grad_norm": 0.1292487879613601, |
| "learning_rate": 5e-05, |
| "loss": 1.5786, |
| "step": 2302 |
| }, |
| { |
| "epoch": 2.3236213327956445, |
| "grad_norm": 0.1415731273313686, |
| "learning_rate": 5e-05, |
| "loss": 1.5663, |
| "step": 2303 |
| }, |
| { |
| "epoch": 2.324629498941426, |
| "grad_norm": 0.13192875328689405, |
| "learning_rate": 5e-05, |
| "loss": 1.5763, |
| "step": 2304 |
| }, |
| { |
| "epoch": 2.325637665087206, |
| "grad_norm": 0.13791037594694341, |
| "learning_rate": 5e-05, |
| "loss": 1.5748, |
| "step": 2305 |
| }, |
| { |
| "epoch": 2.3266458312329874, |
| "grad_norm": 0.132297965228094, |
| "learning_rate": 5e-05, |
| "loss": 1.5697, |
| "step": 2306 |
| }, |
| { |
| "epoch": 2.327653997378768, |
| "grad_norm": 0.12873243615415655, |
| "learning_rate": 5e-05, |
| "loss": 1.5615, |
| "step": 2307 |
| }, |
| { |
| "epoch": 2.328662163524549, |
| "grad_norm": 0.1565315534359242, |
| "learning_rate": 5e-05, |
| "loss": 1.5715, |
| "step": 2308 |
| }, |
| { |
| "epoch": 2.32967032967033, |
| "grad_norm": 0.22566782353722953, |
| "learning_rate": 5e-05, |
| "loss": 1.5741, |
| "step": 2309 |
| }, |
| { |
| "epoch": 2.3306784958161106, |
| "grad_norm": 0.14956726587127525, |
| "learning_rate": 5e-05, |
| "loss": 1.5733, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.3316866619618914, |
| "grad_norm": 0.15605951198053955, |
| "learning_rate": 5e-05, |
| "loss": 1.586, |
| "step": 2311 |
| }, |
| { |
| "epoch": 2.3326948281076723, |
| "grad_norm": 0.1407105414222044, |
| "learning_rate": 5e-05, |
| "loss": 1.5723, |
| "step": 2312 |
| }, |
| { |
| "epoch": 2.333702994253453, |
| "grad_norm": 0.15265911696540452, |
| "learning_rate": 5e-05, |
| "loss": 1.5642, |
| "step": 2313 |
| }, |
| { |
| "epoch": 2.334711160399234, |
| "grad_norm": 0.1381536437205612, |
| "learning_rate": 5e-05, |
| "loss": 1.5601, |
| "step": 2314 |
| }, |
| { |
| "epoch": 2.3357193265450147, |
| "grad_norm": 0.15228291932338708, |
| "learning_rate": 5e-05, |
| "loss": 1.5706, |
| "step": 2315 |
| }, |
| { |
| "epoch": 2.3367274926907955, |
| "grad_norm": 0.1306924937598232, |
| "learning_rate": 5e-05, |
| "loss": 1.5827, |
| "step": 2316 |
| }, |
| { |
| "epoch": 2.3377356588365763, |
| "grad_norm": 0.14001665585417195, |
| "learning_rate": 5e-05, |
| "loss": 1.5958, |
| "step": 2317 |
| }, |
| { |
| "epoch": 2.338743824982357, |
| "grad_norm": 0.13735707283706902, |
| "learning_rate": 5e-05, |
| "loss": 1.5848, |
| "step": 2318 |
| }, |
| { |
| "epoch": 2.339751991128138, |
| "grad_norm": 0.1556551079360177, |
| "learning_rate": 5e-05, |
| "loss": 1.5659, |
| "step": 2319 |
| }, |
| { |
| "epoch": 2.3407601572739187, |
| "grad_norm": 0.12912763501066377, |
| "learning_rate": 5e-05, |
| "loss": 1.5754, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.3417683234196995, |
| "grad_norm": 0.1387010095308894, |
| "learning_rate": 5e-05, |
| "loss": 1.5826, |
| "step": 2321 |
| }, |
| { |
| "epoch": 2.3427764895654803, |
| "grad_norm": 0.13707015278373597, |
| "learning_rate": 5e-05, |
| "loss": 1.5722, |
| "step": 2322 |
| }, |
| { |
| "epoch": 2.343784655711261, |
| "grad_norm": 0.13755459849656163, |
| "learning_rate": 5e-05, |
| "loss": 1.599, |
| "step": 2323 |
| }, |
| { |
| "epoch": 2.344792821857042, |
| "grad_norm": 0.1477816867261747, |
| "learning_rate": 5e-05, |
| "loss": 1.5929, |
| "step": 2324 |
| }, |
| { |
| "epoch": 2.3458009880028228, |
| "grad_norm": 0.14646723031107797, |
| "learning_rate": 5e-05, |
| "loss": 1.5688, |
| "step": 2325 |
| }, |
| { |
| "epoch": 2.3468091541486036, |
| "grad_norm": 0.13740302126433548, |
| "learning_rate": 5e-05, |
| "loss": 1.5705, |
| "step": 2326 |
| }, |
| { |
| "epoch": 2.3478173202943844, |
| "grad_norm": 0.14019264086801358, |
| "learning_rate": 5e-05, |
| "loss": 1.5734, |
| "step": 2327 |
| }, |
| { |
| "epoch": 2.348825486440165, |
| "grad_norm": 0.1467986736371059, |
| "learning_rate": 5e-05, |
| "loss": 1.5707, |
| "step": 2328 |
| }, |
| { |
| "epoch": 2.349833652585946, |
| "grad_norm": 0.14612680323301838, |
| "learning_rate": 5e-05, |
| "loss": 1.5849, |
| "step": 2329 |
| }, |
| { |
| "epoch": 2.350841818731727, |
| "grad_norm": 0.12608156150784167, |
| "learning_rate": 5e-05, |
| "loss": 1.5938, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.3518499848775076, |
| "grad_norm": 0.1396137084828096, |
| "learning_rate": 5e-05, |
| "loss": 1.5776, |
| "step": 2331 |
| }, |
| { |
| "epoch": 2.3528581510232884, |
| "grad_norm": 0.14697256462380895, |
| "learning_rate": 5e-05, |
| "loss": 1.5785, |
| "step": 2332 |
| }, |
| { |
| "epoch": 2.3538663171690697, |
| "grad_norm": 0.13318598995909164, |
| "learning_rate": 5e-05, |
| "loss": 1.5649, |
| "step": 2333 |
| }, |
| { |
| "epoch": 2.35487448331485, |
| "grad_norm": 0.13167374592526118, |
| "learning_rate": 5e-05, |
| "loss": 1.5588, |
| "step": 2334 |
| }, |
| { |
| "epoch": 2.3558826494606313, |
| "grad_norm": 0.13698685253970463, |
| "learning_rate": 5e-05, |
| "loss": 1.5496, |
| "step": 2335 |
| }, |
| { |
| "epoch": 2.356890815606412, |
| "grad_norm": 0.1452425046413725, |
| "learning_rate": 5e-05, |
| "loss": 1.5836, |
| "step": 2336 |
| }, |
| { |
| "epoch": 2.357898981752193, |
| "grad_norm": 0.1376201655979563, |
| "learning_rate": 5e-05, |
| "loss": 1.5726, |
| "step": 2337 |
| }, |
| { |
| "epoch": 2.3589071478979737, |
| "grad_norm": 0.1319396160325784, |
| "learning_rate": 5e-05, |
| "loss": 1.5648, |
| "step": 2338 |
| }, |
| { |
| "epoch": 2.3599153140437545, |
| "grad_norm": 0.13551922943268416, |
| "learning_rate": 5e-05, |
| "loss": 1.5738, |
| "step": 2339 |
| }, |
| { |
| "epoch": 2.3609234801895354, |
| "grad_norm": 0.13680512805774192, |
| "learning_rate": 5e-05, |
| "loss": 1.5667, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.361931646335316, |
| "grad_norm": 0.12812856649089038, |
| "learning_rate": 5e-05, |
| "loss": 1.5845, |
| "step": 2341 |
| }, |
| { |
| "epoch": 2.362939812481097, |
| "grad_norm": 0.13206644043547325, |
| "learning_rate": 5e-05, |
| "loss": 1.5612, |
| "step": 2342 |
| }, |
| { |
| "epoch": 2.3639479786268778, |
| "grad_norm": 0.13412558436594751, |
| "learning_rate": 5e-05, |
| "loss": 1.5779, |
| "step": 2343 |
| }, |
| { |
| "epoch": 2.3649561447726586, |
| "grad_norm": 0.13473004496883761, |
| "learning_rate": 5e-05, |
| "loss": 1.5746, |
| "step": 2344 |
| }, |
| { |
| "epoch": 2.3659643109184394, |
| "grad_norm": 0.13206568313033137, |
| "learning_rate": 5e-05, |
| "loss": 1.5781, |
| "step": 2345 |
| }, |
| { |
| "epoch": 2.36697247706422, |
| "grad_norm": 0.1486456193954694, |
| "learning_rate": 5e-05, |
| "loss": 1.568, |
| "step": 2346 |
| }, |
| { |
| "epoch": 2.367980643210001, |
| "grad_norm": 0.12754711366765203, |
| "learning_rate": 5e-05, |
| "loss": 1.5649, |
| "step": 2347 |
| }, |
| { |
| "epoch": 2.368988809355782, |
| "grad_norm": 0.13566146603000886, |
| "learning_rate": 5e-05, |
| "loss": 1.5655, |
| "step": 2348 |
| }, |
| { |
| "epoch": 2.3699969755015626, |
| "grad_norm": 0.13956288232984534, |
| "learning_rate": 5e-05, |
| "loss": 1.5731, |
| "step": 2349 |
| }, |
| { |
| "epoch": 2.3710051416473434, |
| "grad_norm": 0.1642712298546713, |
| "learning_rate": 5e-05, |
| "loss": 1.5786, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.3720133077931242, |
| "grad_norm": 0.1371958047601129, |
| "learning_rate": 5e-05, |
| "loss": 1.5597, |
| "step": 2351 |
| }, |
| { |
| "epoch": 2.373021473938905, |
| "grad_norm": 0.1431570975130207, |
| "learning_rate": 5e-05, |
| "loss": 1.5628, |
| "step": 2352 |
| }, |
| { |
| "epoch": 2.374029640084686, |
| "grad_norm": 0.12426697149592261, |
| "learning_rate": 5e-05, |
| "loss": 1.5779, |
| "step": 2353 |
| }, |
| { |
| "epoch": 2.3750378062304667, |
| "grad_norm": 0.14237237175042702, |
| "learning_rate": 5e-05, |
| "loss": 1.5786, |
| "step": 2354 |
| }, |
| { |
| "epoch": 2.3760459723762475, |
| "grad_norm": 0.1318323948497074, |
| "learning_rate": 5e-05, |
| "loss": 1.5625, |
| "step": 2355 |
| }, |
| { |
| "epoch": 2.3770541385220283, |
| "grad_norm": 0.13119991131073985, |
| "learning_rate": 5e-05, |
| "loss": 1.5647, |
| "step": 2356 |
| }, |
| { |
| "epoch": 2.378062304667809, |
| "grad_norm": 0.14500573229210184, |
| "learning_rate": 5e-05, |
| "loss": 1.5727, |
| "step": 2357 |
| }, |
| { |
| "epoch": 2.37907047081359, |
| "grad_norm": 0.13472149296359828, |
| "learning_rate": 5e-05, |
| "loss": 1.5687, |
| "step": 2358 |
| }, |
| { |
| "epoch": 2.3800786369593707, |
| "grad_norm": 0.13195202941241038, |
| "learning_rate": 5e-05, |
| "loss": 1.5926, |
| "step": 2359 |
| }, |
| { |
| "epoch": 2.381086803105152, |
| "grad_norm": 0.14237660486568385, |
| "learning_rate": 5e-05, |
| "loss": 1.5744, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.3820949692509323, |
| "grad_norm": 0.13706216336194096, |
| "learning_rate": 5e-05, |
| "loss": 1.5752, |
| "step": 2361 |
| }, |
| { |
| "epoch": 2.3831031353967136, |
| "grad_norm": 0.1492832019493269, |
| "learning_rate": 5e-05, |
| "loss": 1.5751, |
| "step": 2362 |
| }, |
| { |
| "epoch": 2.384111301542494, |
| "grad_norm": 0.13284967563267536, |
| "learning_rate": 5e-05, |
| "loss": 1.5828, |
| "step": 2363 |
| }, |
| { |
| "epoch": 2.385119467688275, |
| "grad_norm": 0.13781931262747582, |
| "learning_rate": 5e-05, |
| "loss": 1.5615, |
| "step": 2364 |
| }, |
| { |
| "epoch": 2.386127633834056, |
| "grad_norm": 0.13612007614009056, |
| "learning_rate": 5e-05, |
| "loss": 1.5774, |
| "step": 2365 |
| }, |
| { |
| "epoch": 2.387135799979837, |
| "grad_norm": 0.1447708769772133, |
| "learning_rate": 5e-05, |
| "loss": 1.59, |
| "step": 2366 |
| }, |
| { |
| "epoch": 2.3881439661256176, |
| "grad_norm": 0.13170398607372086, |
| "learning_rate": 5e-05, |
| "loss": 1.5639, |
| "step": 2367 |
| }, |
| { |
| "epoch": 2.3891521322713984, |
| "grad_norm": 0.14621589964890505, |
| "learning_rate": 5e-05, |
| "loss": 1.5792, |
| "step": 2368 |
| }, |
| { |
| "epoch": 2.3901602984171793, |
| "grad_norm": 0.1511773599153227, |
| "learning_rate": 5e-05, |
| "loss": 1.5746, |
| "step": 2369 |
| }, |
| { |
| "epoch": 2.39116846456296, |
| "grad_norm": 0.1458467606603414, |
| "learning_rate": 5e-05, |
| "loss": 1.5833, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.392176630708741, |
| "grad_norm": 0.14927910226780375, |
| "learning_rate": 5e-05, |
| "loss": 1.5585, |
| "step": 2371 |
| }, |
| { |
| "epoch": 2.3931847968545217, |
| "grad_norm": 0.1449109010627941, |
| "learning_rate": 5e-05, |
| "loss": 1.5763, |
| "step": 2372 |
| }, |
| { |
| "epoch": 2.3941929630003025, |
| "grad_norm": 0.13109405942336458, |
| "learning_rate": 5e-05, |
| "loss": 1.5581, |
| "step": 2373 |
| }, |
| { |
| "epoch": 2.3952011291460833, |
| "grad_norm": 0.13934861021734576, |
| "learning_rate": 5e-05, |
| "loss": 1.5674, |
| "step": 2374 |
| }, |
| { |
| "epoch": 2.396209295291864, |
| "grad_norm": 0.1446961784361111, |
| "learning_rate": 5e-05, |
| "loss": 1.5415, |
| "step": 2375 |
| }, |
| { |
| "epoch": 2.397217461437645, |
| "grad_norm": 0.13755035427062254, |
| "learning_rate": 5e-05, |
| "loss": 1.5687, |
| "step": 2376 |
| }, |
| { |
| "epoch": 2.3982256275834257, |
| "grad_norm": 0.12921600400332672, |
| "learning_rate": 5e-05, |
| "loss": 1.5843, |
| "step": 2377 |
| }, |
| { |
| "epoch": 2.3992337937292065, |
| "grad_norm": 0.12599830025256673, |
| "learning_rate": 5e-05, |
| "loss": 1.5836, |
| "step": 2378 |
| }, |
| { |
| "epoch": 2.4002419598749873, |
| "grad_norm": 0.14284986000034275, |
| "learning_rate": 5e-05, |
| "loss": 1.5692, |
| "step": 2379 |
| }, |
| { |
| "epoch": 2.401250126020768, |
| "grad_norm": 0.13384043727676295, |
| "learning_rate": 5e-05, |
| "loss": 1.5778, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.402258292166549, |
| "grad_norm": 0.12888164481531164, |
| "learning_rate": 5e-05, |
| "loss": 1.5788, |
| "step": 2381 |
| }, |
| { |
| "epoch": 2.4032664583123298, |
| "grad_norm": 0.13001360124841055, |
| "learning_rate": 5e-05, |
| "loss": 1.564, |
| "step": 2382 |
| }, |
| { |
| "epoch": 2.4042746244581106, |
| "grad_norm": 0.13855183595836684, |
| "learning_rate": 5e-05, |
| "loss": 1.5777, |
| "step": 2383 |
| }, |
| { |
| "epoch": 2.4052827906038914, |
| "grad_norm": 0.1355775003901884, |
| "learning_rate": 5e-05, |
| "loss": 1.5773, |
| "step": 2384 |
| }, |
| { |
| "epoch": 2.406290956749672, |
| "grad_norm": 0.13842090636379575, |
| "learning_rate": 5e-05, |
| "loss": 1.577, |
| "step": 2385 |
| }, |
| { |
| "epoch": 2.407299122895453, |
| "grad_norm": 0.1312234964077852, |
| "learning_rate": 5e-05, |
| "loss": 1.5805, |
| "step": 2386 |
| }, |
| { |
| "epoch": 2.408307289041234, |
| "grad_norm": 0.1480537482351519, |
| "learning_rate": 5e-05, |
| "loss": 1.5641, |
| "step": 2387 |
| }, |
| { |
| "epoch": 2.4093154551870146, |
| "grad_norm": 0.13563346503461496, |
| "learning_rate": 5e-05, |
| "loss": 1.5812, |
| "step": 2388 |
| }, |
| { |
| "epoch": 2.410323621332796, |
| "grad_norm": 0.1374414779066698, |
| "learning_rate": 5e-05, |
| "loss": 1.5775, |
| "step": 2389 |
| }, |
| { |
| "epoch": 2.4113317874785762, |
| "grad_norm": 0.13960415581308322, |
| "learning_rate": 5e-05, |
| "loss": 1.5545, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.4123399536243575, |
| "grad_norm": 0.13359670238450724, |
| "learning_rate": 5e-05, |
| "loss": 1.5834, |
| "step": 2391 |
| }, |
| { |
| "epoch": 2.4133481197701383, |
| "grad_norm": 0.1392195241675204, |
| "learning_rate": 5e-05, |
| "loss": 1.5594, |
| "step": 2392 |
| }, |
| { |
| "epoch": 2.414356285915919, |
| "grad_norm": 0.13247489393452228, |
| "learning_rate": 5e-05, |
| "loss": 1.5781, |
| "step": 2393 |
| }, |
| { |
| "epoch": 2.4153644520617, |
| "grad_norm": 0.1497906517108831, |
| "learning_rate": 5e-05, |
| "loss": 1.5744, |
| "step": 2394 |
| }, |
| { |
| "epoch": 2.4163726182074807, |
| "grad_norm": 0.12838007271692908, |
| "learning_rate": 5e-05, |
| "loss": 1.5749, |
| "step": 2395 |
| }, |
| { |
| "epoch": 2.4173807843532615, |
| "grad_norm": 0.1407089734768055, |
| "learning_rate": 5e-05, |
| "loss": 1.5792, |
| "step": 2396 |
| }, |
| { |
| "epoch": 2.4183889504990423, |
| "grad_norm": 0.1393752593452367, |
| "learning_rate": 5e-05, |
| "loss": 1.5811, |
| "step": 2397 |
| }, |
| { |
| "epoch": 2.419397116644823, |
| "grad_norm": 0.12964122325258895, |
| "learning_rate": 5e-05, |
| "loss": 1.5627, |
| "step": 2398 |
| }, |
| { |
| "epoch": 2.420405282790604, |
| "grad_norm": 0.13460702024917695, |
| "learning_rate": 5e-05, |
| "loss": 1.5689, |
| "step": 2399 |
| }, |
| { |
| "epoch": 2.4214134489363848, |
| "grad_norm": 0.13878826918050213, |
| "learning_rate": 5e-05, |
| "loss": 1.5736, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.4224216150821656, |
| "grad_norm": 0.12934976735747655, |
| "learning_rate": 5e-05, |
| "loss": 1.562, |
| "step": 2401 |
| }, |
| { |
| "epoch": 2.4234297812279464, |
| "grad_norm": 0.13563528648016107, |
| "learning_rate": 5e-05, |
| "loss": 1.584, |
| "step": 2402 |
| }, |
| { |
| "epoch": 2.424437947373727, |
| "grad_norm": 0.1635163479489948, |
| "learning_rate": 5e-05, |
| "loss": 1.5644, |
| "step": 2403 |
| }, |
| { |
| "epoch": 2.425446113519508, |
| "grad_norm": 0.1362603873202254, |
| "learning_rate": 5e-05, |
| "loss": 1.5709, |
| "step": 2404 |
| }, |
| { |
| "epoch": 2.426454279665289, |
| "grad_norm": 0.13705910067232868, |
| "learning_rate": 5e-05, |
| "loss": 1.5599, |
| "step": 2405 |
| }, |
| { |
| "epoch": 2.4274624458110696, |
| "grad_norm": 0.13238627727596283, |
| "learning_rate": 5e-05, |
| "loss": 1.567, |
| "step": 2406 |
| }, |
| { |
| "epoch": 2.4284706119568504, |
| "grad_norm": 0.1424503110347935, |
| "learning_rate": 5e-05, |
| "loss": 1.5827, |
| "step": 2407 |
| }, |
| { |
| "epoch": 2.4294787781026312, |
| "grad_norm": 0.14481999094444797, |
| "learning_rate": 5e-05, |
| "loss": 1.5802, |
| "step": 2408 |
| }, |
| { |
| "epoch": 2.430486944248412, |
| "grad_norm": 0.1254927464881309, |
| "learning_rate": 5e-05, |
| "loss": 1.5666, |
| "step": 2409 |
| }, |
| { |
| "epoch": 2.431495110394193, |
| "grad_norm": 0.14871450999559535, |
| "learning_rate": 5e-05, |
| "loss": 1.5794, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.4325032765399737, |
| "grad_norm": 0.13294327553010796, |
| "learning_rate": 5e-05, |
| "loss": 1.5715, |
| "step": 2411 |
| }, |
| { |
| "epoch": 2.4335114426857545, |
| "grad_norm": 0.13348530146232257, |
| "learning_rate": 5e-05, |
| "loss": 1.5764, |
| "step": 2412 |
| }, |
| { |
| "epoch": 2.4345196088315353, |
| "grad_norm": 0.14339091781597899, |
| "learning_rate": 5e-05, |
| "loss": 1.5721, |
| "step": 2413 |
| }, |
| { |
| "epoch": 2.435527774977316, |
| "grad_norm": 0.15100964315181148, |
| "learning_rate": 5e-05, |
| "loss": 1.5742, |
| "step": 2414 |
| }, |
| { |
| "epoch": 2.436535941123097, |
| "grad_norm": 0.1291649549852917, |
| "learning_rate": 5e-05, |
| "loss": 1.5665, |
| "step": 2415 |
| }, |
| { |
| "epoch": 2.437544107268878, |
| "grad_norm": 0.13241803442644284, |
| "learning_rate": 5e-05, |
| "loss": 1.5589, |
| "step": 2416 |
| }, |
| { |
| "epoch": 2.4385522734146585, |
| "grad_norm": 0.13515219086853045, |
| "learning_rate": 5e-05, |
| "loss": 1.5817, |
| "step": 2417 |
| }, |
| { |
| "epoch": 2.4395604395604398, |
| "grad_norm": 0.13471099156870195, |
| "learning_rate": 5e-05, |
| "loss": 1.5648, |
| "step": 2418 |
| }, |
| { |
| "epoch": 2.44056860570622, |
| "grad_norm": 0.1392251071384398, |
| "learning_rate": 5e-05, |
| "loss": 1.5876, |
| "step": 2419 |
| }, |
| { |
| "epoch": 2.4415767718520014, |
| "grad_norm": 0.1410794306172876, |
| "learning_rate": 5e-05, |
| "loss": 1.5932, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.442584937997782, |
| "grad_norm": 0.1362769330250836, |
| "learning_rate": 5e-05, |
| "loss": 1.5854, |
| "step": 2421 |
| }, |
| { |
| "epoch": 2.443593104143563, |
| "grad_norm": 0.14631288657866592, |
| "learning_rate": 5e-05, |
| "loss": 1.5527, |
| "step": 2422 |
| }, |
| { |
| "epoch": 2.444601270289344, |
| "grad_norm": 0.13269401985059945, |
| "learning_rate": 5e-05, |
| "loss": 1.5845, |
| "step": 2423 |
| }, |
| { |
| "epoch": 2.4456094364351246, |
| "grad_norm": 0.1319557868081523, |
| "learning_rate": 5e-05, |
| "loss": 1.5667, |
| "step": 2424 |
| }, |
| { |
| "epoch": 2.4466176025809054, |
| "grad_norm": 0.12597772042087557, |
| "learning_rate": 5e-05, |
| "loss": 1.5695, |
| "step": 2425 |
| }, |
| { |
| "epoch": 2.4476257687266862, |
| "grad_norm": 0.13815243282087905, |
| "learning_rate": 5e-05, |
| "loss": 1.5816, |
| "step": 2426 |
| }, |
| { |
| "epoch": 2.448633934872467, |
| "grad_norm": 0.13713330689169553, |
| "learning_rate": 5e-05, |
| "loss": 1.572, |
| "step": 2427 |
| }, |
| { |
| "epoch": 2.449642101018248, |
| "grad_norm": 0.13859041649550563, |
| "learning_rate": 5e-05, |
| "loss": 1.5953, |
| "step": 2428 |
| }, |
| { |
| "epoch": 2.4506502671640287, |
| "grad_norm": 0.13508535172669156, |
| "learning_rate": 5e-05, |
| "loss": 1.5721, |
| "step": 2429 |
| }, |
| { |
| "epoch": 2.4516584333098095, |
| "grad_norm": 0.1463530271146982, |
| "learning_rate": 5e-05, |
| "loss": 1.5834, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.4526665994555903, |
| "grad_norm": 0.13224226734969893, |
| "learning_rate": 5e-05, |
| "loss": 1.5764, |
| "step": 2431 |
| }, |
| { |
| "epoch": 2.453674765601371, |
| "grad_norm": 0.1607786000995468, |
| "learning_rate": 5e-05, |
| "loss": 1.5888, |
| "step": 2432 |
| }, |
| { |
| "epoch": 2.454682931747152, |
| "grad_norm": 0.1408107276695577, |
| "learning_rate": 5e-05, |
| "loss": 1.5759, |
| "step": 2433 |
| }, |
| { |
| "epoch": 2.4556910978929327, |
| "grad_norm": 0.17113821583538205, |
| "learning_rate": 5e-05, |
| "loss": 1.5892, |
| "step": 2434 |
| }, |
| { |
| "epoch": 2.4566992640387135, |
| "grad_norm": 0.1336457346672259, |
| "learning_rate": 5e-05, |
| "loss": 1.5703, |
| "step": 2435 |
| }, |
| { |
| "epoch": 2.4577074301844943, |
| "grad_norm": 0.14812625323333414, |
| "learning_rate": 5e-05, |
| "loss": 1.552, |
| "step": 2436 |
| }, |
| { |
| "epoch": 2.458715596330275, |
| "grad_norm": 0.13290094043888356, |
| "learning_rate": 5e-05, |
| "loss": 1.5769, |
| "step": 2437 |
| }, |
| { |
| "epoch": 2.459723762476056, |
| "grad_norm": 0.13961256613566742, |
| "learning_rate": 5e-05, |
| "loss": 1.5871, |
| "step": 2438 |
| }, |
| { |
| "epoch": 2.4607319286218368, |
| "grad_norm": 0.13177722231521807, |
| "learning_rate": 5e-05, |
| "loss": 1.5921, |
| "step": 2439 |
| }, |
| { |
| "epoch": 2.4617400947676176, |
| "grad_norm": 0.14231658931889407, |
| "learning_rate": 5e-05, |
| "loss": 1.567, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.4627482609133984, |
| "grad_norm": 0.13476284327765373, |
| "learning_rate": 5e-05, |
| "loss": 1.5865, |
| "step": 2441 |
| }, |
| { |
| "epoch": 2.463756427059179, |
| "grad_norm": 0.13970460473325771, |
| "learning_rate": 5e-05, |
| "loss": 1.5655, |
| "step": 2442 |
| }, |
| { |
| "epoch": 2.46476459320496, |
| "grad_norm": 0.13205775238552916, |
| "learning_rate": 5e-05, |
| "loss": 1.579, |
| "step": 2443 |
| }, |
| { |
| "epoch": 2.465772759350741, |
| "grad_norm": 0.13888961499112318, |
| "learning_rate": 5e-05, |
| "loss": 1.5736, |
| "step": 2444 |
| }, |
| { |
| "epoch": 2.466780925496522, |
| "grad_norm": 0.12906724404483103, |
| "learning_rate": 5e-05, |
| "loss": 1.5602, |
| "step": 2445 |
| }, |
| { |
| "epoch": 2.4677890916423024, |
| "grad_norm": 0.13215069256460463, |
| "learning_rate": 5e-05, |
| "loss": 1.5707, |
| "step": 2446 |
| }, |
| { |
| "epoch": 2.4687972577880837, |
| "grad_norm": 0.13378752606561972, |
| "learning_rate": 5e-05, |
| "loss": 1.5813, |
| "step": 2447 |
| }, |
| { |
| "epoch": 2.4698054239338645, |
| "grad_norm": 0.12885498076034543, |
| "learning_rate": 5e-05, |
| "loss": 1.5834, |
| "step": 2448 |
| }, |
| { |
| "epoch": 2.4708135900796453, |
| "grad_norm": 0.13188478896564354, |
| "learning_rate": 5e-05, |
| "loss": 1.5708, |
| "step": 2449 |
| }, |
| { |
| "epoch": 2.471821756225426, |
| "grad_norm": 0.14245357066832004, |
| "learning_rate": 5e-05, |
| "loss": 1.5777, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.472829922371207, |
| "grad_norm": 0.12803504022023557, |
| "learning_rate": 5e-05, |
| "loss": 1.5649, |
| "step": 2451 |
| }, |
| { |
| "epoch": 2.4738380885169877, |
| "grad_norm": 0.13782634262350107, |
| "learning_rate": 5e-05, |
| "loss": 1.5625, |
| "step": 2452 |
| }, |
| { |
| "epoch": 2.4748462546627685, |
| "grad_norm": 0.13296958894649374, |
| "learning_rate": 5e-05, |
| "loss": 1.5771, |
| "step": 2453 |
| }, |
| { |
| "epoch": 2.4758544208085493, |
| "grad_norm": 0.12638621012304266, |
| "learning_rate": 5e-05, |
| "loss": 1.5841, |
| "step": 2454 |
| }, |
| { |
| "epoch": 2.47686258695433, |
| "grad_norm": 0.1332807458477833, |
| "learning_rate": 5e-05, |
| "loss": 1.5927, |
| "step": 2455 |
| }, |
| { |
| "epoch": 2.477870753100111, |
| "grad_norm": 0.1289022244439646, |
| "learning_rate": 5e-05, |
| "loss": 1.5829, |
| "step": 2456 |
| }, |
| { |
| "epoch": 2.4788789192458918, |
| "grad_norm": 0.14266754208759228, |
| "learning_rate": 5e-05, |
| "loss": 1.5741, |
| "step": 2457 |
| }, |
| { |
| "epoch": 2.4798870853916726, |
| "grad_norm": 0.13739273922110581, |
| "learning_rate": 5e-05, |
| "loss": 1.5577, |
| "step": 2458 |
| }, |
| { |
| "epoch": 2.4808952515374534, |
| "grad_norm": 0.14045495223205887, |
| "learning_rate": 5e-05, |
| "loss": 1.5534, |
| "step": 2459 |
| }, |
| { |
| "epoch": 2.481903417683234, |
| "grad_norm": 0.14154710496839273, |
| "learning_rate": 5e-05, |
| "loss": 1.5755, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.482911583829015, |
| "grad_norm": 0.12835695587273255, |
| "learning_rate": 5e-05, |
| "loss": 1.5675, |
| "step": 2461 |
| }, |
| { |
| "epoch": 2.483919749974796, |
| "grad_norm": 0.12933419582659292, |
| "learning_rate": 5e-05, |
| "loss": 1.5685, |
| "step": 2462 |
| }, |
| { |
| "epoch": 2.4849279161205766, |
| "grad_norm": 0.130298325020088, |
| "learning_rate": 5e-05, |
| "loss": 1.5529, |
| "step": 2463 |
| }, |
| { |
| "epoch": 2.4859360822663574, |
| "grad_norm": 0.12967199709240884, |
| "learning_rate": 5e-05, |
| "loss": 1.5681, |
| "step": 2464 |
| }, |
| { |
| "epoch": 2.4869442484121382, |
| "grad_norm": 0.13052105938370967, |
| "learning_rate": 5e-05, |
| "loss": 1.5588, |
| "step": 2465 |
| }, |
| { |
| "epoch": 2.487952414557919, |
| "grad_norm": 0.13926608795875886, |
| "learning_rate": 5e-05, |
| "loss": 1.57, |
| "step": 2466 |
| }, |
| { |
| "epoch": 2.4889605807037, |
| "grad_norm": 0.13527261030795035, |
| "learning_rate": 5e-05, |
| "loss": 1.562, |
| "step": 2467 |
| }, |
| { |
| "epoch": 2.4899687468494807, |
| "grad_norm": 0.13615561502455836, |
| "learning_rate": 5e-05, |
| "loss": 1.5685, |
| "step": 2468 |
| }, |
| { |
| "epoch": 2.4909769129952615, |
| "grad_norm": 0.13182708641555427, |
| "learning_rate": 5e-05, |
| "loss": 1.5588, |
| "step": 2469 |
| }, |
| { |
| "epoch": 2.4919850791410423, |
| "grad_norm": 0.14280294347540126, |
| "learning_rate": 5e-05, |
| "loss": 1.5682, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.492993245286823, |
| "grad_norm": 0.1398245999275893, |
| "learning_rate": 5e-05, |
| "loss": 1.5499, |
| "step": 2471 |
| }, |
| { |
| "epoch": 2.4940014114326043, |
| "grad_norm": 0.14408179071191235, |
| "learning_rate": 5e-05, |
| "loss": 1.5684, |
| "step": 2472 |
| }, |
| { |
| "epoch": 2.4950095775783847, |
| "grad_norm": 0.1482559154904685, |
| "learning_rate": 5e-05, |
| "loss": 1.5685, |
| "step": 2473 |
| }, |
| { |
| "epoch": 2.496017743724166, |
| "grad_norm": 0.12983878701279014, |
| "learning_rate": 5e-05, |
| "loss": 1.5616, |
| "step": 2474 |
| }, |
| { |
| "epoch": 2.4970259098699463, |
| "grad_norm": 0.1350990230454604, |
| "learning_rate": 5e-05, |
| "loss": 1.5686, |
| "step": 2475 |
| }, |
| { |
| "epoch": 2.4980340760157276, |
| "grad_norm": 0.1517745861472744, |
| "learning_rate": 5e-05, |
| "loss": 1.5649, |
| "step": 2476 |
| }, |
| { |
| "epoch": 2.4990422421615084, |
| "grad_norm": 0.1296415782702942, |
| "learning_rate": 5e-05, |
| "loss": 1.5809, |
| "step": 2477 |
| }, |
| { |
| "epoch": 2.500050408307289, |
| "grad_norm": 0.14410943255216555, |
| "learning_rate": 5e-05, |
| "loss": 1.5739, |
| "step": 2478 |
| }, |
| { |
| "epoch": 2.50105857445307, |
| "grad_norm": 0.1330701724321787, |
| "learning_rate": 5e-05, |
| "loss": 1.5388, |
| "step": 2479 |
| }, |
| { |
| "epoch": 2.502066740598851, |
| "grad_norm": 0.13484529044587992, |
| "learning_rate": 5e-05, |
| "loss": 1.5706, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.5030749067446316, |
| "grad_norm": 0.14585530598199864, |
| "learning_rate": 5e-05, |
| "loss": 1.5628, |
| "step": 2481 |
| }, |
| { |
| "epoch": 2.5040830728904124, |
| "grad_norm": 0.14335110959236813, |
| "learning_rate": 5e-05, |
| "loss": 1.5791, |
| "step": 2482 |
| }, |
| { |
| "epoch": 2.5050912390361932, |
| "grad_norm": 0.14823280767430108, |
| "learning_rate": 5e-05, |
| "loss": 1.566, |
| "step": 2483 |
| }, |
| { |
| "epoch": 2.506099405181974, |
| "grad_norm": 0.14556611452022872, |
| "learning_rate": 5e-05, |
| "loss": 1.5653, |
| "step": 2484 |
| }, |
| { |
| "epoch": 2.507107571327755, |
| "grad_norm": 0.13454439849791208, |
| "learning_rate": 5e-05, |
| "loss": 1.56, |
| "step": 2485 |
| }, |
| { |
| "epoch": 2.5081157374735357, |
| "grad_norm": 0.13839984587712512, |
| "learning_rate": 5e-05, |
| "loss": 1.5565, |
| "step": 2486 |
| }, |
| { |
| "epoch": 2.5091239036193165, |
| "grad_norm": 0.13307537522190815, |
| "learning_rate": 5e-05, |
| "loss": 1.5671, |
| "step": 2487 |
| }, |
| { |
| "epoch": 2.5101320697650973, |
| "grad_norm": 0.13959579904141645, |
| "learning_rate": 5e-05, |
| "loss": 1.5797, |
| "step": 2488 |
| }, |
| { |
| "epoch": 2.511140235910878, |
| "grad_norm": 0.13005573539881465, |
| "learning_rate": 5e-05, |
| "loss": 1.5768, |
| "step": 2489 |
| }, |
| { |
| "epoch": 2.512148402056659, |
| "grad_norm": 0.13276083136899364, |
| "learning_rate": 5e-05, |
| "loss": 1.5707, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.5131565682024397, |
| "grad_norm": 0.12732790377512698, |
| "learning_rate": 5e-05, |
| "loss": 1.5637, |
| "step": 2491 |
| }, |
| { |
| "epoch": 2.5141647343482205, |
| "grad_norm": 0.13703947337554104, |
| "learning_rate": 5e-05, |
| "loss": 1.5608, |
| "step": 2492 |
| }, |
| { |
| "epoch": 2.5151729004940013, |
| "grad_norm": 0.1369690453430113, |
| "learning_rate": 5e-05, |
| "loss": 1.5871, |
| "step": 2493 |
| }, |
| { |
| "epoch": 2.516181066639782, |
| "grad_norm": 0.13733318758079022, |
| "learning_rate": 5e-05, |
| "loss": 1.5803, |
| "step": 2494 |
| }, |
| { |
| "epoch": 2.517189232785563, |
| "grad_norm": 0.14136033097026432, |
| "learning_rate": 5e-05, |
| "loss": 1.5658, |
| "step": 2495 |
| }, |
| { |
| "epoch": 2.5181973989313438, |
| "grad_norm": 0.14034198910723664, |
| "learning_rate": 5e-05, |
| "loss": 1.561, |
| "step": 2496 |
| }, |
| { |
| "epoch": 2.5192055650771246, |
| "grad_norm": 0.14047149813923593, |
| "learning_rate": 5e-05, |
| "loss": 1.5775, |
| "step": 2497 |
| }, |
| { |
| "epoch": 2.5202137312229054, |
| "grad_norm": 0.1361565028785915, |
| "learning_rate": 5e-05, |
| "loss": 1.5864, |
| "step": 2498 |
| }, |
| { |
| "epoch": 2.5212218973686866, |
| "grad_norm": 0.1346932459707488, |
| "learning_rate": 5e-05, |
| "loss": 1.5628, |
| "step": 2499 |
| }, |
| { |
| "epoch": 2.522230063514467, |
| "grad_norm": 0.13712538669966598, |
| "learning_rate": 5e-05, |
| "loss": 1.5732, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.5232382296602482, |
| "grad_norm": 0.13199711949758308, |
| "learning_rate": 5e-05, |
| "loss": 1.5574, |
| "step": 2501 |
| }, |
| { |
| "epoch": 2.5242463958060286, |
| "grad_norm": 0.1306387580998687, |
| "learning_rate": 5e-05, |
| "loss": 1.5614, |
| "step": 2502 |
| }, |
| { |
| "epoch": 2.52525456195181, |
| "grad_norm": 0.1302641243617534, |
| "learning_rate": 5e-05, |
| "loss": 1.5607, |
| "step": 2503 |
| }, |
| { |
| "epoch": 2.5262627280975902, |
| "grad_norm": 0.13309453441297842, |
| "learning_rate": 5e-05, |
| "loss": 1.5731, |
| "step": 2504 |
| }, |
| { |
| "epoch": 2.5272708942433715, |
| "grad_norm": 0.1402063808794923, |
| "learning_rate": 5e-05, |
| "loss": 1.5924, |
| "step": 2505 |
| }, |
| { |
| "epoch": 2.5282790603891523, |
| "grad_norm": 0.13066719388767034, |
| "learning_rate": 5e-05, |
| "loss": 1.5675, |
| "step": 2506 |
| }, |
| { |
| "epoch": 2.529287226534933, |
| "grad_norm": 0.1314587822264092, |
| "learning_rate": 5e-05, |
| "loss": 1.5551, |
| "step": 2507 |
| }, |
| { |
| "epoch": 2.530295392680714, |
| "grad_norm": 0.13270831067662986, |
| "learning_rate": 5e-05, |
| "loss": 1.5629, |
| "step": 2508 |
| }, |
| { |
| "epoch": 2.5313035588264947, |
| "grad_norm": 0.13610270267142832, |
| "learning_rate": 5e-05, |
| "loss": 1.5789, |
| "step": 2509 |
| }, |
| { |
| "epoch": 2.5323117249722755, |
| "grad_norm": 0.1420301959642942, |
| "learning_rate": 5e-05, |
| "loss": 1.5786, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.5333198911180563, |
| "grad_norm": 0.1320847830419652, |
| "learning_rate": 5e-05, |
| "loss": 1.5542, |
| "step": 2511 |
| }, |
| { |
| "epoch": 2.534328057263837, |
| "grad_norm": 0.13115439787293542, |
| "learning_rate": 5e-05, |
| "loss": 1.5868, |
| "step": 2512 |
| }, |
| { |
| "epoch": 2.535336223409618, |
| "grad_norm": 0.13671059366359536, |
| "learning_rate": 5e-05, |
| "loss": 1.5749, |
| "step": 2513 |
| }, |
| { |
| "epoch": 2.5363443895553988, |
| "grad_norm": 0.15170920625959478, |
| "learning_rate": 5e-05, |
| "loss": 1.5785, |
| "step": 2514 |
| }, |
| { |
| "epoch": 2.5373525557011796, |
| "grad_norm": 0.1385479032372868, |
| "learning_rate": 5e-05, |
| "loss": 1.5806, |
| "step": 2515 |
| }, |
| { |
| "epoch": 2.5383607218469604, |
| "grad_norm": 0.15501047413228763, |
| "learning_rate": 5e-05, |
| "loss": 1.5737, |
| "step": 2516 |
| }, |
| { |
| "epoch": 2.539368887992741, |
| "grad_norm": 0.14382396943316664, |
| "learning_rate": 5e-05, |
| "loss": 1.5747, |
| "step": 2517 |
| }, |
| { |
| "epoch": 2.540377054138522, |
| "grad_norm": 0.13810111650121543, |
| "learning_rate": 5e-05, |
| "loss": 1.5718, |
| "step": 2518 |
| }, |
| { |
| "epoch": 2.541385220284303, |
| "grad_norm": 0.14698439268982416, |
| "learning_rate": 5e-05, |
| "loss": 1.5738, |
| "step": 2519 |
| }, |
| { |
| "epoch": 2.5423933864300836, |
| "grad_norm": 0.13239747259177995, |
| "learning_rate": 5e-05, |
| "loss": 1.5693, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.5434015525758644, |
| "grad_norm": 0.1392786276751499, |
| "learning_rate": 5e-05, |
| "loss": 1.5598, |
| "step": 2521 |
| }, |
| { |
| "epoch": 2.5444097187216452, |
| "grad_norm": 0.13066731381474933, |
| "learning_rate": 5e-05, |
| "loss": 1.5432, |
| "step": 2522 |
| }, |
| { |
| "epoch": 2.545417884867426, |
| "grad_norm": 0.14006779722474544, |
| "learning_rate": 5e-05, |
| "loss": 1.5616, |
| "step": 2523 |
| }, |
| { |
| "epoch": 2.546426051013207, |
| "grad_norm": 0.14015375699552457, |
| "learning_rate": 5e-05, |
| "loss": 1.5757, |
| "step": 2524 |
| }, |
| { |
| "epoch": 2.5474342171589877, |
| "grad_norm": 0.1287079586458338, |
| "learning_rate": 5e-05, |
| "loss": 1.5612, |
| "step": 2525 |
| }, |
| { |
| "epoch": 2.5484423833047685, |
| "grad_norm": 0.1380581924188533, |
| "learning_rate": 5e-05, |
| "loss": 1.5612, |
| "step": 2526 |
| }, |
| { |
| "epoch": 2.5494505494505493, |
| "grad_norm": 0.1461047231472578, |
| "learning_rate": 5e-05, |
| "loss": 1.5813, |
| "step": 2527 |
| }, |
| { |
| "epoch": 2.5504587155963305, |
| "grad_norm": 0.13094559552009488, |
| "learning_rate": 5e-05, |
| "loss": 1.5576, |
| "step": 2528 |
| }, |
| { |
| "epoch": 2.551466881742111, |
| "grad_norm": 0.14877667456952287, |
| "learning_rate": 5e-05, |
| "loss": 1.5803, |
| "step": 2529 |
| }, |
| { |
| "epoch": 2.552475047887892, |
| "grad_norm": 0.1410988956684791, |
| "learning_rate": 5e-05, |
| "loss": 1.5676, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.5534832140336725, |
| "grad_norm": 0.1346136915835775, |
| "learning_rate": 5e-05, |
| "loss": 1.5645, |
| "step": 2531 |
| }, |
| { |
| "epoch": 2.5544913801794538, |
| "grad_norm": 0.13677299683014227, |
| "learning_rate": 5e-05, |
| "loss": 1.5723, |
| "step": 2532 |
| }, |
| { |
| "epoch": 2.555499546325234, |
| "grad_norm": 0.1381562967508932, |
| "learning_rate": 5e-05, |
| "loss": 1.572, |
| "step": 2533 |
| }, |
| { |
| "epoch": 2.5565077124710154, |
| "grad_norm": 0.12280272126633365, |
| "learning_rate": 5e-05, |
| "loss": 1.5681, |
| "step": 2534 |
| }, |
| { |
| "epoch": 2.557515878616796, |
| "grad_norm": 0.12742988769491065, |
| "learning_rate": 5e-05, |
| "loss": 1.584, |
| "step": 2535 |
| }, |
| { |
| "epoch": 2.558524044762577, |
| "grad_norm": 0.13510718950092368, |
| "learning_rate": 5e-05, |
| "loss": 1.5764, |
| "step": 2536 |
| }, |
| { |
| "epoch": 2.559532210908358, |
| "grad_norm": 0.12533625305806456, |
| "learning_rate": 5e-05, |
| "loss": 1.5665, |
| "step": 2537 |
| }, |
| { |
| "epoch": 2.5605403770541386, |
| "grad_norm": 0.1285836841448671, |
| "learning_rate": 5e-05, |
| "loss": 1.5705, |
| "step": 2538 |
| }, |
| { |
| "epoch": 2.5615485431999194, |
| "grad_norm": 0.1348571888933907, |
| "learning_rate": 5e-05, |
| "loss": 1.5556, |
| "step": 2539 |
| }, |
| { |
| "epoch": 2.5625567093457002, |
| "grad_norm": 0.1325268099316684, |
| "learning_rate": 5e-05, |
| "loss": 1.5766, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.563564875491481, |
| "grad_norm": 0.13932827541236092, |
| "learning_rate": 5e-05, |
| "loss": 1.581, |
| "step": 2541 |
| }, |
| { |
| "epoch": 2.564573041637262, |
| "grad_norm": 0.24623788216863024, |
| "learning_rate": 5e-05, |
| "loss": 1.5761, |
| "step": 2542 |
| }, |
| { |
| "epoch": 2.5655812077830427, |
| "grad_norm": 0.14270394122091418, |
| "learning_rate": 5e-05, |
| "loss": 1.5712, |
| "step": 2543 |
| }, |
| { |
| "epoch": 2.5665893739288235, |
| "grad_norm": 0.26287780505488345, |
| "learning_rate": 5e-05, |
| "loss": 1.5688, |
| "step": 2544 |
| }, |
| { |
| "epoch": 2.5675975400746043, |
| "grad_norm": 0.13643131361751182, |
| "learning_rate": 5e-05, |
| "loss": 1.5749, |
| "step": 2545 |
| }, |
| { |
| "epoch": 2.568605706220385, |
| "grad_norm": 0.14293860347761278, |
| "learning_rate": 5e-05, |
| "loss": 1.5597, |
| "step": 2546 |
| }, |
| { |
| "epoch": 2.569613872366166, |
| "grad_norm": 0.1439622809758673, |
| "learning_rate": 5e-05, |
| "loss": 1.57, |
| "step": 2547 |
| }, |
| { |
| "epoch": 2.5706220385119467, |
| "grad_norm": 0.1360551242225786, |
| "learning_rate": 5e-05, |
| "loss": 1.5813, |
| "step": 2548 |
| }, |
| { |
| "epoch": 2.5716302046577275, |
| "grad_norm": 0.1433763087264025, |
| "learning_rate": 5e-05, |
| "loss": 1.5737, |
| "step": 2549 |
| }, |
| { |
| "epoch": 2.5726383708035083, |
| "grad_norm": 0.14349978498686156, |
| "learning_rate": 5e-05, |
| "loss": 1.5873, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.573646536949289, |
| "grad_norm": 0.13563939190053786, |
| "learning_rate": 5e-05, |
| "loss": 1.565, |
| "step": 2551 |
| }, |
| { |
| "epoch": 2.57465470309507, |
| "grad_norm": 0.1400727636727626, |
| "learning_rate": 5e-05, |
| "loss": 1.5659, |
| "step": 2552 |
| }, |
| { |
| "epoch": 2.5756628692408507, |
| "grad_norm": 0.46677268806303335, |
| "learning_rate": 5e-05, |
| "loss": 1.5834, |
| "step": 2553 |
| }, |
| { |
| "epoch": 2.5766710353866316, |
| "grad_norm": 0.14369253983364566, |
| "learning_rate": 5e-05, |
| "loss": 1.5558, |
| "step": 2554 |
| }, |
| { |
| "epoch": 2.577679201532413, |
| "grad_norm": 0.14498641663098719, |
| "learning_rate": 5e-05, |
| "loss": 1.5652, |
| "step": 2555 |
| }, |
| { |
| "epoch": 2.578687367678193, |
| "grad_norm": 0.13760310777721238, |
| "learning_rate": 5e-05, |
| "loss": 1.581, |
| "step": 2556 |
| }, |
| { |
| "epoch": 2.5796955338239744, |
| "grad_norm": 0.14467222241686664, |
| "learning_rate": 5e-05, |
| "loss": 1.5752, |
| "step": 2557 |
| }, |
| { |
| "epoch": 2.580703699969755, |
| "grad_norm": 0.1361819067350698, |
| "learning_rate": 5e-05, |
| "loss": 1.5776, |
| "step": 2558 |
| }, |
| { |
| "epoch": 2.581711866115536, |
| "grad_norm": 0.13332450522409992, |
| "learning_rate": 5e-05, |
| "loss": 1.5606, |
| "step": 2559 |
| }, |
| { |
| "epoch": 2.5827200322613164, |
| "grad_norm": 0.14004076082903943, |
| "learning_rate": 5e-05, |
| "loss": 1.5679, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.5837281984070977, |
| "grad_norm": 0.17716816167241073, |
| "learning_rate": 5e-05, |
| "loss": 1.5808, |
| "step": 2561 |
| }, |
| { |
| "epoch": 2.5847363645528785, |
| "grad_norm": 0.135531215421772, |
| "learning_rate": 5e-05, |
| "loss": 1.5717, |
| "step": 2562 |
| }, |
| { |
| "epoch": 2.5857445306986593, |
| "grad_norm": 0.14131718191338816, |
| "learning_rate": 5e-05, |
| "loss": 1.5629, |
| "step": 2563 |
| }, |
| { |
| "epoch": 2.58675269684444, |
| "grad_norm": 0.15566287819098393, |
| "learning_rate": 5e-05, |
| "loss": 1.5772, |
| "step": 2564 |
| }, |
| { |
| "epoch": 2.587760862990221, |
| "grad_norm": 0.1486242781666469, |
| "learning_rate": 5e-05, |
| "loss": 1.5629, |
| "step": 2565 |
| }, |
| { |
| "epoch": 2.5887690291360017, |
| "grad_norm": 0.13751406947742945, |
| "learning_rate": 5e-05, |
| "loss": 1.5636, |
| "step": 2566 |
| }, |
| { |
| "epoch": 2.5897771952817825, |
| "grad_norm": 0.15458590089287258, |
| "learning_rate": 5e-05, |
| "loss": 1.5714, |
| "step": 2567 |
| }, |
| { |
| "epoch": 2.5907853614275633, |
| "grad_norm": 0.13822881305270032, |
| "learning_rate": 5e-05, |
| "loss": 1.5801, |
| "step": 2568 |
| }, |
| { |
| "epoch": 2.591793527573344, |
| "grad_norm": 0.14331964687266405, |
| "learning_rate": 5e-05, |
| "loss": 1.5846, |
| "step": 2569 |
| }, |
| { |
| "epoch": 2.592801693719125, |
| "grad_norm": 0.15434411395135197, |
| "learning_rate": 5e-05, |
| "loss": 1.57, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.5938098598649058, |
| "grad_norm": 0.14346789950395492, |
| "learning_rate": 5e-05, |
| "loss": 1.5553, |
| "step": 2571 |
| }, |
| { |
| "epoch": 2.5948180260106866, |
| "grad_norm": 0.3649511832228748, |
| "learning_rate": 5e-05, |
| "loss": 1.571, |
| "step": 2572 |
| }, |
| { |
| "epoch": 2.5958261921564674, |
| "grad_norm": 0.23624873083863812, |
| "learning_rate": 5e-05, |
| "loss": 1.5614, |
| "step": 2573 |
| }, |
| { |
| "epoch": 2.596834358302248, |
| "grad_norm": 0.13868172024332132, |
| "learning_rate": 5e-05, |
| "loss": 1.5732, |
| "step": 2574 |
| }, |
| { |
| "epoch": 2.597842524448029, |
| "grad_norm": 0.19472869370683973, |
| "learning_rate": 5e-05, |
| "loss": 1.5739, |
| "step": 2575 |
| }, |
| { |
| "epoch": 2.59885069059381, |
| "grad_norm": 0.15021863097825597, |
| "learning_rate": 5e-05, |
| "loss": 1.5746, |
| "step": 2576 |
| }, |
| { |
| "epoch": 2.5998588567395906, |
| "grad_norm": 0.1723890646573745, |
| "learning_rate": 5e-05, |
| "loss": 1.5868, |
| "step": 2577 |
| }, |
| { |
| "epoch": 2.6008670228853714, |
| "grad_norm": 0.14318449183244025, |
| "learning_rate": 5e-05, |
| "loss": 1.5693, |
| "step": 2578 |
| }, |
| { |
| "epoch": 2.6018751890311522, |
| "grad_norm": 0.14758726018313456, |
| "learning_rate": 5e-05, |
| "loss": 1.5837, |
| "step": 2579 |
| }, |
| { |
| "epoch": 2.602883355176933, |
| "grad_norm": 0.18145968707420648, |
| "learning_rate": 5e-05, |
| "loss": 1.5929, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.603891521322714, |
| "grad_norm": 0.15104416134896115, |
| "learning_rate": 5e-05, |
| "loss": 1.5777, |
| "step": 2581 |
| }, |
| { |
| "epoch": 2.6048996874684947, |
| "grad_norm": 0.1578694401415007, |
| "learning_rate": 5e-05, |
| "loss": 1.5815, |
| "step": 2582 |
| }, |
| { |
| "epoch": 2.6059078536142755, |
| "grad_norm": 0.18166720384742438, |
| "learning_rate": 5e-05, |
| "loss": 1.5623, |
| "step": 2583 |
| }, |
| { |
| "epoch": 2.6069160197600567, |
| "grad_norm": 0.15697367202164383, |
| "learning_rate": 5e-05, |
| "loss": 1.5641, |
| "step": 2584 |
| }, |
| { |
| "epoch": 2.607924185905837, |
| "grad_norm": 0.14997661568258938, |
| "learning_rate": 5e-05, |
| "loss": 1.5638, |
| "step": 2585 |
| }, |
| { |
| "epoch": 2.6089323520516183, |
| "grad_norm": 0.1669423778875084, |
| "learning_rate": 5e-05, |
| "loss": 1.5763, |
| "step": 2586 |
| }, |
| { |
| "epoch": 2.6099405181973987, |
| "grad_norm": 0.18036491079980102, |
| "learning_rate": 5e-05, |
| "loss": 1.5621, |
| "step": 2587 |
| }, |
| { |
| "epoch": 2.61094868434318, |
| "grad_norm": 0.13695247542006733, |
| "learning_rate": 5e-05, |
| "loss": 1.5736, |
| "step": 2588 |
| }, |
| { |
| "epoch": 2.6119568504889603, |
| "grad_norm": 0.17108760911033974, |
| "learning_rate": 5e-05, |
| "loss": 1.5776, |
| "step": 2589 |
| }, |
| { |
| "epoch": 2.6129650166347416, |
| "grad_norm": 0.15743505516329523, |
| "learning_rate": 5e-05, |
| "loss": 1.561, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.6139731827805224, |
| "grad_norm": 0.15546759277401703, |
| "learning_rate": 5e-05, |
| "loss": 1.5809, |
| "step": 2591 |
| }, |
| { |
| "epoch": 2.614981348926303, |
| "grad_norm": 0.1525456111524391, |
| "learning_rate": 5e-05, |
| "loss": 1.5649, |
| "step": 2592 |
| }, |
| { |
| "epoch": 2.615989515072084, |
| "grad_norm": 0.1590829570184181, |
| "learning_rate": 5e-05, |
| "loss": 1.5632, |
| "step": 2593 |
| }, |
| { |
| "epoch": 2.616997681217865, |
| "grad_norm": 0.15097999238651005, |
| "learning_rate": 5e-05, |
| "loss": 1.5692, |
| "step": 2594 |
| }, |
| { |
| "epoch": 2.6180058473636456, |
| "grad_norm": 0.15281298093347742, |
| "learning_rate": 5e-05, |
| "loss": 1.564, |
| "step": 2595 |
| }, |
| { |
| "epoch": 2.6190140135094264, |
| "grad_norm": 0.15336242452735435, |
| "learning_rate": 5e-05, |
| "loss": 1.5464, |
| "step": 2596 |
| }, |
| { |
| "epoch": 2.6200221796552072, |
| "grad_norm": 0.15028356582825605, |
| "learning_rate": 5e-05, |
| "loss": 1.5682, |
| "step": 2597 |
| }, |
| { |
| "epoch": 2.621030345800988, |
| "grad_norm": 0.14572672233718242, |
| "learning_rate": 5e-05, |
| "loss": 1.5601, |
| "step": 2598 |
| }, |
| { |
| "epoch": 2.622038511946769, |
| "grad_norm": 0.14094923929247996, |
| "learning_rate": 5e-05, |
| "loss": 1.5762, |
| "step": 2599 |
| }, |
| { |
| "epoch": 2.6230466780925497, |
| "grad_norm": 0.14403490955925488, |
| "learning_rate": 5e-05, |
| "loss": 1.5861, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.6240548442383305, |
| "grad_norm": 0.15008485743918354, |
| "learning_rate": 5e-05, |
| "loss": 1.576, |
| "step": 2601 |
| }, |
| { |
| "epoch": 2.6250630103841113, |
| "grad_norm": 0.14787246043739002, |
| "learning_rate": 5e-05, |
| "loss": 1.5764, |
| "step": 2602 |
| }, |
| { |
| "epoch": 2.626071176529892, |
| "grad_norm": 0.15241722364084348, |
| "learning_rate": 5e-05, |
| "loss": 1.5547, |
| "step": 2603 |
| }, |
| { |
| "epoch": 2.627079342675673, |
| "grad_norm": 0.1502994733216112, |
| "learning_rate": 5e-05, |
| "loss": 1.5685, |
| "step": 2604 |
| }, |
| { |
| "epoch": 2.6280875088214537, |
| "grad_norm": 0.16074365492554207, |
| "learning_rate": 5e-05, |
| "loss": 1.5758, |
| "step": 2605 |
| }, |
| { |
| "epoch": 2.6290956749672345, |
| "grad_norm": 0.14430981056083628, |
| "learning_rate": 5e-05, |
| "loss": 1.5736, |
| "step": 2606 |
| }, |
| { |
| "epoch": 2.6301038411130153, |
| "grad_norm": 0.16123995936065388, |
| "learning_rate": 5e-05, |
| "loss": 1.5663, |
| "step": 2607 |
| }, |
| { |
| "epoch": 2.631112007258796, |
| "grad_norm": 0.14231002853104405, |
| "learning_rate": 5e-05, |
| "loss": 1.5559, |
| "step": 2608 |
| }, |
| { |
| "epoch": 2.632120173404577, |
| "grad_norm": 0.14228451160268388, |
| "learning_rate": 5e-05, |
| "loss": 1.5378, |
| "step": 2609 |
| }, |
| { |
| "epoch": 2.6331283395503577, |
| "grad_norm": 0.14162038187228881, |
| "learning_rate": 5e-05, |
| "loss": 1.5657, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.634136505696139, |
| "grad_norm": 0.16034654380517346, |
| "learning_rate": 5e-05, |
| "loss": 1.5607, |
| "step": 2611 |
| }, |
| { |
| "epoch": 2.6351446718419194, |
| "grad_norm": 0.1406556428280142, |
| "learning_rate": 5e-05, |
| "loss": 1.5773, |
| "step": 2612 |
| }, |
| { |
| "epoch": 2.6361528379877006, |
| "grad_norm": 0.1574208300546788, |
| "learning_rate": 5e-05, |
| "loss": 1.5687, |
| "step": 2613 |
| }, |
| { |
| "epoch": 2.637161004133481, |
| "grad_norm": 0.129992555684169, |
| "learning_rate": 5e-05, |
| "loss": 1.5761, |
| "step": 2614 |
| }, |
| { |
| "epoch": 2.6381691702792622, |
| "grad_norm": 0.1479122083673606, |
| "learning_rate": 5e-05, |
| "loss": 1.5715, |
| "step": 2615 |
| }, |
| { |
| "epoch": 2.6391773364250426, |
| "grad_norm": 0.17401025097129166, |
| "learning_rate": 5e-05, |
| "loss": 1.5764, |
| "step": 2616 |
| }, |
| { |
| "epoch": 2.640185502570824, |
| "grad_norm": 0.16327524485219438, |
| "learning_rate": 5e-05, |
| "loss": 1.5734, |
| "step": 2617 |
| }, |
| { |
| "epoch": 2.6411936687166047, |
| "grad_norm": 0.15186897128683868, |
| "learning_rate": 5e-05, |
| "loss": 1.5752, |
| "step": 2618 |
| }, |
| { |
| "epoch": 2.6422018348623855, |
| "grad_norm": 0.14122002915712664, |
| "learning_rate": 5e-05, |
| "loss": 1.5752, |
| "step": 2619 |
| }, |
| { |
| "epoch": 2.6432100010081663, |
| "grad_norm": 0.14625103956516458, |
| "learning_rate": 5e-05, |
| "loss": 1.5548, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.644218167153947, |
| "grad_norm": 0.15319911711521672, |
| "learning_rate": 5e-05, |
| "loss": 1.5782, |
| "step": 2621 |
| }, |
| { |
| "epoch": 2.645226333299728, |
| "grad_norm": 0.14080663787259234, |
| "learning_rate": 5e-05, |
| "loss": 1.5309, |
| "step": 2622 |
| }, |
| { |
| "epoch": 2.6462344994455087, |
| "grad_norm": 0.14200037232407361, |
| "learning_rate": 5e-05, |
| "loss": 1.5609, |
| "step": 2623 |
| }, |
| { |
| "epoch": 2.6472426655912895, |
| "grad_norm": 0.14548049077056605, |
| "learning_rate": 5e-05, |
| "loss": 1.5662, |
| "step": 2624 |
| }, |
| { |
| "epoch": 2.6482508317370703, |
| "grad_norm": 0.14588858561313403, |
| "learning_rate": 5e-05, |
| "loss": 1.5715, |
| "step": 2625 |
| }, |
| { |
| "epoch": 2.649258997882851, |
| "grad_norm": 0.13422997913312334, |
| "learning_rate": 5e-05, |
| "loss": 1.5576, |
| "step": 2626 |
| }, |
| { |
| "epoch": 2.650267164028632, |
| "grad_norm": 0.14373272226483658, |
| "learning_rate": 5e-05, |
| "loss": 1.5654, |
| "step": 2627 |
| }, |
| { |
| "epoch": 2.6512753301744127, |
| "grad_norm": 0.14397101488478398, |
| "learning_rate": 5e-05, |
| "loss": 1.5482, |
| "step": 2628 |
| }, |
| { |
| "epoch": 2.6522834963201936, |
| "grad_norm": 0.1399424865762421, |
| "learning_rate": 5e-05, |
| "loss": 1.5737, |
| "step": 2629 |
| }, |
| { |
| "epoch": 2.6532916624659744, |
| "grad_norm": 0.14886678980576779, |
| "learning_rate": 5e-05, |
| "loss": 1.5883, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.654299828611755, |
| "grad_norm": 0.14653892263537838, |
| "learning_rate": 5e-05, |
| "loss": 1.5725, |
| "step": 2631 |
| }, |
| { |
| "epoch": 2.655307994757536, |
| "grad_norm": 0.13657992074184186, |
| "learning_rate": 5e-05, |
| "loss": 1.5558, |
| "step": 2632 |
| }, |
| { |
| "epoch": 2.656316160903317, |
| "grad_norm": 0.16181280045845536, |
| "learning_rate": 5e-05, |
| "loss": 1.5665, |
| "step": 2633 |
| }, |
| { |
| "epoch": 2.6573243270490976, |
| "grad_norm": 0.15163898001388915, |
| "learning_rate": 5e-05, |
| "loss": 1.5636, |
| "step": 2634 |
| }, |
| { |
| "epoch": 2.6583324931948784, |
| "grad_norm": 0.14391052900139392, |
| "learning_rate": 5e-05, |
| "loss": 1.5716, |
| "step": 2635 |
| }, |
| { |
| "epoch": 2.659340659340659, |
| "grad_norm": 0.15441727511277034, |
| "learning_rate": 5e-05, |
| "loss": 1.5658, |
| "step": 2636 |
| }, |
| { |
| "epoch": 2.66034882548644, |
| "grad_norm": 0.18434934741540565, |
| "learning_rate": 5e-05, |
| "loss": 1.5583, |
| "step": 2637 |
| }, |
| { |
| "epoch": 2.661356991632221, |
| "grad_norm": 0.14675093820621574, |
| "learning_rate": 5e-05, |
| "loss": 1.5822, |
| "step": 2638 |
| }, |
| { |
| "epoch": 2.6623651577780016, |
| "grad_norm": 0.17292648886603113, |
| "learning_rate": 5e-05, |
| "loss": 1.5895, |
| "step": 2639 |
| }, |
| { |
| "epoch": 2.663373323923783, |
| "grad_norm": 0.13899788061020074, |
| "learning_rate": 5e-05, |
| "loss": 1.5786, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.6643814900695633, |
| "grad_norm": 0.16613231489632996, |
| "learning_rate": 5e-05, |
| "loss": 1.5556, |
| "step": 2641 |
| }, |
| { |
| "epoch": 2.6653896562153445, |
| "grad_norm": 0.17077450676770634, |
| "learning_rate": 5e-05, |
| "loss": 1.5697, |
| "step": 2642 |
| }, |
| { |
| "epoch": 2.666397822361125, |
| "grad_norm": 0.1403577426269955, |
| "learning_rate": 5e-05, |
| "loss": 1.5632, |
| "step": 2643 |
| }, |
| { |
| "epoch": 2.667405988506906, |
| "grad_norm": 0.13213661449923972, |
| "learning_rate": 5e-05, |
| "loss": 1.5613, |
| "step": 2644 |
| }, |
| { |
| "epoch": 2.6684141546526865, |
| "grad_norm": 0.14106671427970552, |
| "learning_rate": 5e-05, |
| "loss": 1.5725, |
| "step": 2645 |
| }, |
| { |
| "epoch": 2.6694223207984678, |
| "grad_norm": 0.1366834656968961, |
| "learning_rate": 5e-05, |
| "loss": 1.5797, |
| "step": 2646 |
| }, |
| { |
| "epoch": 2.6704304869442486, |
| "grad_norm": 0.14767004453703217, |
| "learning_rate": 5e-05, |
| "loss": 1.5517, |
| "step": 2647 |
| }, |
| { |
| "epoch": 2.6714386530900294, |
| "grad_norm": 0.13292170771715223, |
| "learning_rate": 5e-05, |
| "loss": 1.5689, |
| "step": 2648 |
| }, |
| { |
| "epoch": 2.67244681923581, |
| "grad_norm": 0.14250366847001242, |
| "learning_rate": 5e-05, |
| "loss": 1.5439, |
| "step": 2649 |
| }, |
| { |
| "epoch": 2.673454985381591, |
| "grad_norm": 0.13990853172352147, |
| "learning_rate": 5e-05, |
| "loss": 1.5605, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.674463151527372, |
| "grad_norm": 0.1342108354881665, |
| "learning_rate": 5e-05, |
| "loss": 1.5474, |
| "step": 2651 |
| }, |
| { |
| "epoch": 2.6754713176731526, |
| "grad_norm": 0.14095351494547906, |
| "learning_rate": 5e-05, |
| "loss": 1.562, |
| "step": 2652 |
| }, |
| { |
| "epoch": 2.6764794838189334, |
| "grad_norm": 0.13632871187159845, |
| "learning_rate": 5e-05, |
| "loss": 1.5788, |
| "step": 2653 |
| }, |
| { |
| "epoch": 2.6774876499647142, |
| "grad_norm": 0.13741013059016396, |
| "learning_rate": 5e-05, |
| "loss": 1.5809, |
| "step": 2654 |
| }, |
| { |
| "epoch": 2.678495816110495, |
| "grad_norm": 0.14104148902284483, |
| "learning_rate": 5e-05, |
| "loss": 1.568, |
| "step": 2655 |
| }, |
| { |
| "epoch": 2.679503982256276, |
| "grad_norm": 0.13634681877896998, |
| "learning_rate": 5e-05, |
| "loss": 1.5826, |
| "step": 2656 |
| }, |
| { |
| "epoch": 2.6805121484020566, |
| "grad_norm": 0.1400690608553844, |
| "learning_rate": 5e-05, |
| "loss": 1.5479, |
| "step": 2657 |
| }, |
| { |
| "epoch": 2.6815203145478375, |
| "grad_norm": 0.1381298076752533, |
| "learning_rate": 5e-05, |
| "loss": 1.5753, |
| "step": 2658 |
| }, |
| { |
| "epoch": 2.6825284806936183, |
| "grad_norm": 0.14096150667236787, |
| "learning_rate": 5e-05, |
| "loss": 1.5556, |
| "step": 2659 |
| }, |
| { |
| "epoch": 2.683536646839399, |
| "grad_norm": 0.15006887442688122, |
| "learning_rate": 5e-05, |
| "loss": 1.5747, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.68454481298518, |
| "grad_norm": 0.1378134811649124, |
| "learning_rate": 5e-05, |
| "loss": 1.5687, |
| "step": 2661 |
| }, |
| { |
| "epoch": 2.6855529791309607, |
| "grad_norm": 0.2270710658723629, |
| "learning_rate": 5e-05, |
| "loss": 1.5771, |
| "step": 2662 |
| }, |
| { |
| "epoch": 2.6865611452767415, |
| "grad_norm": 0.14058235952025902, |
| "learning_rate": 5e-05, |
| "loss": 1.5646, |
| "step": 2663 |
| }, |
| { |
| "epoch": 2.6875693114225223, |
| "grad_norm": 0.12594815732088815, |
| "learning_rate": 5e-05, |
| "loss": 1.5576, |
| "step": 2664 |
| }, |
| { |
| "epoch": 2.688577477568303, |
| "grad_norm": 0.1399517021609818, |
| "learning_rate": 5e-05, |
| "loss": 1.5728, |
| "step": 2665 |
| }, |
| { |
| "epoch": 2.689585643714084, |
| "grad_norm": 0.13174605200545916, |
| "learning_rate": 5e-05, |
| "loss": 1.566, |
| "step": 2666 |
| }, |
| { |
| "epoch": 2.690593809859865, |
| "grad_norm": 0.13746031870272224, |
| "learning_rate": 5e-05, |
| "loss": 1.569, |
| "step": 2667 |
| }, |
| { |
| "epoch": 2.6916019760056455, |
| "grad_norm": 4.560783413286865, |
| "learning_rate": 5e-05, |
| "loss": 1.5996, |
| "step": 2668 |
| }, |
| { |
| "epoch": 2.692610142151427, |
| "grad_norm": 0.1599729642198486, |
| "learning_rate": 5e-05, |
| "loss": 1.5783, |
| "step": 2669 |
| }, |
| { |
| "epoch": 2.693618308297207, |
| "grad_norm": 0.1328849115559514, |
| "learning_rate": 5e-05, |
| "loss": 1.5763, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.6946264744429884, |
| "grad_norm": 0.1576681364906905, |
| "learning_rate": 5e-05, |
| "loss": 1.5723, |
| "step": 2671 |
| }, |
| { |
| "epoch": 2.695634640588769, |
| "grad_norm": 0.1366040880089965, |
| "learning_rate": 5e-05, |
| "loss": 1.5641, |
| "step": 2672 |
| }, |
| { |
| "epoch": 2.69664280673455, |
| "grad_norm": 0.1580270504056786, |
| "learning_rate": 5e-05, |
| "loss": 1.5447, |
| "step": 2673 |
| }, |
| { |
| "epoch": 2.697650972880331, |
| "grad_norm": 0.13771171876035967, |
| "learning_rate": 5e-05, |
| "loss": 1.5708, |
| "step": 2674 |
| }, |
| { |
| "epoch": 2.6986591390261117, |
| "grad_norm": 0.1517424456471761, |
| "learning_rate": 5e-05, |
| "loss": 1.5494, |
| "step": 2675 |
| }, |
| { |
| "epoch": 2.6996673051718925, |
| "grad_norm": 0.14995179976865108, |
| "learning_rate": 5e-05, |
| "loss": 1.5595, |
| "step": 2676 |
| }, |
| { |
| "epoch": 2.7006754713176733, |
| "grad_norm": 2.1810950431818146, |
| "learning_rate": 5e-05, |
| "loss": 1.5817, |
| "step": 2677 |
| }, |
| { |
| "epoch": 2.701683637463454, |
| "grad_norm": 0.1798172607390676, |
| "learning_rate": 5e-05, |
| "loss": 1.5635, |
| "step": 2678 |
| }, |
| { |
| "epoch": 2.702691803609235, |
| "grad_norm": 0.16109832316524236, |
| "learning_rate": 5e-05, |
| "loss": 1.5825, |
| "step": 2679 |
| }, |
| { |
| "epoch": 2.7036999697550157, |
| "grad_norm": 0.14862178006767085, |
| "learning_rate": 5e-05, |
| "loss": 1.5512, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.7047081359007965, |
| "grad_norm": 0.14919683751945761, |
| "learning_rate": 5e-05, |
| "loss": 1.5653, |
| "step": 2681 |
| }, |
| { |
| "epoch": 2.7057163020465773, |
| "grad_norm": 0.20775257275377015, |
| "learning_rate": 5e-05, |
| "loss": 1.5673, |
| "step": 2682 |
| }, |
| { |
| "epoch": 2.706724468192358, |
| "grad_norm": 0.15438174577601985, |
| "learning_rate": 5e-05, |
| "loss": 1.5799, |
| "step": 2683 |
| }, |
| { |
| "epoch": 2.707732634338139, |
| "grad_norm": 0.15569951049593259, |
| "learning_rate": 5e-05, |
| "loss": 1.5618, |
| "step": 2684 |
| }, |
| { |
| "epoch": 2.7087408004839197, |
| "grad_norm": 0.15064037867202598, |
| "learning_rate": 5e-05, |
| "loss": 1.5648, |
| "step": 2685 |
| }, |
| { |
| "epoch": 2.7097489666297006, |
| "grad_norm": 0.15781744910859977, |
| "learning_rate": 5e-05, |
| "loss": 1.5626, |
| "step": 2686 |
| }, |
| { |
| "epoch": 2.7107571327754814, |
| "grad_norm": 0.15466431610953948, |
| "learning_rate": 5e-05, |
| "loss": 1.5609, |
| "step": 2687 |
| }, |
| { |
| "epoch": 2.711765298921262, |
| "grad_norm": 0.15814654807557524, |
| "learning_rate": 5e-05, |
| "loss": 1.5655, |
| "step": 2688 |
| }, |
| { |
| "epoch": 2.712773465067043, |
| "grad_norm": 0.13949634738027647, |
| "learning_rate": 5e-05, |
| "loss": 1.5831, |
| "step": 2689 |
| }, |
| { |
| "epoch": 2.713781631212824, |
| "grad_norm": 0.1406618538641033, |
| "learning_rate": 5e-05, |
| "loss": 1.5663, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.7147897973586046, |
| "grad_norm": 0.13191748568761202, |
| "learning_rate": 5e-05, |
| "loss": 1.5647, |
| "step": 2691 |
| }, |
| { |
| "epoch": 2.7157979635043854, |
| "grad_norm": 0.5074854253225494, |
| "learning_rate": 5e-05, |
| "loss": 1.5717, |
| "step": 2692 |
| }, |
| { |
| "epoch": 2.716806129650166, |
| "grad_norm": 0.13776438838613422, |
| "learning_rate": 5e-05, |
| "loss": 1.5749, |
| "step": 2693 |
| }, |
| { |
| "epoch": 2.717814295795947, |
| "grad_norm": 0.16485203741006418, |
| "learning_rate": 5e-05, |
| "loss": 1.5693, |
| "step": 2694 |
| }, |
| { |
| "epoch": 2.718822461941728, |
| "grad_norm": 0.1398154057360336, |
| "learning_rate": 5e-05, |
| "loss": 1.5709, |
| "step": 2695 |
| }, |
| { |
| "epoch": 2.719830628087509, |
| "grad_norm": 0.1322842540538691, |
| "learning_rate": 5e-05, |
| "loss": 1.5669, |
| "step": 2696 |
| }, |
| { |
| "epoch": 2.7208387942332894, |
| "grad_norm": 0.14578755614140504, |
| "learning_rate": 5e-05, |
| "loss": 1.5893, |
| "step": 2697 |
| }, |
| { |
| "epoch": 2.7218469603790707, |
| "grad_norm": 0.13249217510021813, |
| "learning_rate": 5e-05, |
| "loss": 1.5695, |
| "step": 2698 |
| }, |
| { |
| "epoch": 2.722855126524851, |
| "grad_norm": 0.13230498419415554, |
| "learning_rate": 5e-05, |
| "loss": 1.5581, |
| "step": 2699 |
| }, |
| { |
| "epoch": 2.7238632926706323, |
| "grad_norm": 0.140920778481646, |
| "learning_rate": 5e-05, |
| "loss": 1.5528, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.7248714588164127, |
| "grad_norm": 0.1433753585777024, |
| "learning_rate": 5e-05, |
| "loss": 1.5704, |
| "step": 2701 |
| }, |
| { |
| "epoch": 2.725879624962194, |
| "grad_norm": 0.1336399638319816, |
| "learning_rate": 5e-05, |
| "loss": 1.593, |
| "step": 2702 |
| }, |
| { |
| "epoch": 2.7268877911079747, |
| "grad_norm": 0.14560257221000472, |
| "learning_rate": 5e-05, |
| "loss": 1.5345, |
| "step": 2703 |
| }, |
| { |
| "epoch": 2.7278959572537556, |
| "grad_norm": 0.1836365416353928, |
| "learning_rate": 5e-05, |
| "loss": 1.5812, |
| "step": 2704 |
| }, |
| { |
| "epoch": 2.7289041233995364, |
| "grad_norm": 0.14009232696115306, |
| "learning_rate": 5e-05, |
| "loss": 1.5655, |
| "step": 2705 |
| }, |
| { |
| "epoch": 2.729912289545317, |
| "grad_norm": 0.15768817911278674, |
| "learning_rate": 5e-05, |
| "loss": 1.5618, |
| "step": 2706 |
| }, |
| { |
| "epoch": 2.730920455691098, |
| "grad_norm": 0.1357757946680182, |
| "learning_rate": 5e-05, |
| "loss": 1.5679, |
| "step": 2707 |
| }, |
| { |
| "epoch": 2.731928621836879, |
| "grad_norm": 0.1361176788290324, |
| "learning_rate": 5e-05, |
| "loss": 1.557, |
| "step": 2708 |
| }, |
| { |
| "epoch": 2.7329367879826596, |
| "grad_norm": 0.1552287534850647, |
| "learning_rate": 5e-05, |
| "loss": 1.567, |
| "step": 2709 |
| }, |
| { |
| "epoch": 2.7339449541284404, |
| "grad_norm": 0.8550126356500881, |
| "learning_rate": 5e-05, |
| "loss": 1.573, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.734953120274221, |
| "grad_norm": 0.14590212131755376, |
| "learning_rate": 5e-05, |
| "loss": 1.5705, |
| "step": 2711 |
| }, |
| { |
| "epoch": 2.735961286420002, |
| "grad_norm": 0.14077732180407562, |
| "learning_rate": 5e-05, |
| "loss": 1.5711, |
| "step": 2712 |
| }, |
| { |
| "epoch": 2.736969452565783, |
| "grad_norm": 0.14271742070831292, |
| "learning_rate": 5e-05, |
| "loss": 1.5651, |
| "step": 2713 |
| }, |
| { |
| "epoch": 2.7379776187115636, |
| "grad_norm": 0.13449509255344788, |
| "learning_rate": 5e-05, |
| "loss": 1.5657, |
| "step": 2714 |
| }, |
| { |
| "epoch": 2.7389857848573445, |
| "grad_norm": 0.1428486321025379, |
| "learning_rate": 5e-05, |
| "loss": 1.5692, |
| "step": 2715 |
| }, |
| { |
| "epoch": 2.7399939510031253, |
| "grad_norm": 0.14030467155716347, |
| "learning_rate": 5e-05, |
| "loss": 1.5568, |
| "step": 2716 |
| }, |
| { |
| "epoch": 2.741002117148906, |
| "grad_norm": 0.13805285727527963, |
| "learning_rate": 5e-05, |
| "loss": 1.5836, |
| "step": 2717 |
| }, |
| { |
| "epoch": 2.742010283294687, |
| "grad_norm": 0.14366377411261483, |
| "learning_rate": 5e-05, |
| "loss": 1.545, |
| "step": 2718 |
| }, |
| { |
| "epoch": 2.7430184494404677, |
| "grad_norm": 0.14521750353616375, |
| "learning_rate": 5e-05, |
| "loss": 1.5728, |
| "step": 2719 |
| }, |
| { |
| "epoch": 2.7440266155862485, |
| "grad_norm": 0.15205719509608184, |
| "learning_rate": 5e-05, |
| "loss": 1.5549, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.7450347817320293, |
| "grad_norm": 0.15011156624584093, |
| "learning_rate": 5e-05, |
| "loss": 1.5611, |
| "step": 2721 |
| }, |
| { |
| "epoch": 2.74604294787781, |
| "grad_norm": 0.13791270226630295, |
| "learning_rate": 5e-05, |
| "loss": 1.5597, |
| "step": 2722 |
| }, |
| { |
| "epoch": 2.747051114023591, |
| "grad_norm": 0.13822033117826582, |
| "learning_rate": 5e-05, |
| "loss": 1.5685, |
| "step": 2723 |
| }, |
| { |
| "epoch": 2.7480592801693717, |
| "grad_norm": 0.13309371437546486, |
| "learning_rate": 5e-05, |
| "loss": 1.5737, |
| "step": 2724 |
| }, |
| { |
| "epoch": 2.749067446315153, |
| "grad_norm": 0.13956376666855283, |
| "learning_rate": 5e-05, |
| "loss": 1.5533, |
| "step": 2725 |
| }, |
| { |
| "epoch": 2.7500756124609333, |
| "grad_norm": 0.14242752187426724, |
| "learning_rate": 5e-05, |
| "loss": 1.5695, |
| "step": 2726 |
| }, |
| { |
| "epoch": 2.7510837786067146, |
| "grad_norm": 0.13778114699423819, |
| "learning_rate": 5e-05, |
| "loss": 1.5566, |
| "step": 2727 |
| }, |
| { |
| "epoch": 2.752091944752495, |
| "grad_norm": 0.1252140442866245, |
| "learning_rate": 5e-05, |
| "loss": 1.5619, |
| "step": 2728 |
| }, |
| { |
| "epoch": 2.753100110898276, |
| "grad_norm": 0.14450735002596674, |
| "learning_rate": 5e-05, |
| "loss": 1.5588, |
| "step": 2729 |
| }, |
| { |
| "epoch": 2.7541082770440566, |
| "grad_norm": 0.12444546171527433, |
| "learning_rate": 5e-05, |
| "loss": 1.5662, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.755116443189838, |
| "grad_norm": 0.141004291675775, |
| "learning_rate": 5e-05, |
| "loss": 1.572, |
| "step": 2731 |
| }, |
| { |
| "epoch": 2.7561246093356186, |
| "grad_norm": 0.13199288201421328, |
| "learning_rate": 5e-05, |
| "loss": 1.5861, |
| "step": 2732 |
| }, |
| { |
| "epoch": 2.7571327754813995, |
| "grad_norm": 0.12793988338331708, |
| "learning_rate": 5e-05, |
| "loss": 1.5785, |
| "step": 2733 |
| }, |
| { |
| "epoch": 2.7581409416271803, |
| "grad_norm": 0.14342057001903644, |
| "learning_rate": 5e-05, |
| "loss": 1.5567, |
| "step": 2734 |
| }, |
| { |
| "epoch": 2.759149107772961, |
| "grad_norm": 0.14450779133652514, |
| "learning_rate": 5e-05, |
| "loss": 1.5822, |
| "step": 2735 |
| }, |
| { |
| "epoch": 2.760157273918742, |
| "grad_norm": 0.1405320308190688, |
| "learning_rate": 5e-05, |
| "loss": 1.5595, |
| "step": 2736 |
| }, |
| { |
| "epoch": 2.7611654400645227, |
| "grad_norm": 0.1286823927500477, |
| "learning_rate": 5e-05, |
| "loss": 1.5511, |
| "step": 2737 |
| }, |
| { |
| "epoch": 2.7621736062103035, |
| "grad_norm": 0.15254126540044513, |
| "learning_rate": 5e-05, |
| "loss": 1.5599, |
| "step": 2738 |
| }, |
| { |
| "epoch": 2.7631817723560843, |
| "grad_norm": 0.1332062281330069, |
| "learning_rate": 5e-05, |
| "loss": 1.563, |
| "step": 2739 |
| }, |
| { |
| "epoch": 2.764189938501865, |
| "grad_norm": 0.1307457427111258, |
| "learning_rate": 5e-05, |
| "loss": 1.5617, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.765198104647646, |
| "grad_norm": 0.13375355501400343, |
| "learning_rate": 5e-05, |
| "loss": 1.5766, |
| "step": 2741 |
| }, |
| { |
| "epoch": 2.7662062707934267, |
| "grad_norm": 0.13102102704849145, |
| "learning_rate": 5e-05, |
| "loss": 1.5598, |
| "step": 2742 |
| }, |
| { |
| "epoch": 2.7672144369392075, |
| "grad_norm": 0.1344654670681869, |
| "learning_rate": 5e-05, |
| "loss": 1.5709, |
| "step": 2743 |
| }, |
| { |
| "epoch": 2.7682226030849884, |
| "grad_norm": 0.135632915147248, |
| "learning_rate": 5e-05, |
| "loss": 1.5623, |
| "step": 2744 |
| }, |
| { |
| "epoch": 2.769230769230769, |
| "grad_norm": 0.14105458578703645, |
| "learning_rate": 5e-05, |
| "loss": 1.5459, |
| "step": 2745 |
| }, |
| { |
| "epoch": 2.77023893537655, |
| "grad_norm": 0.1330217189753598, |
| "learning_rate": 5e-05, |
| "loss": 1.5593, |
| "step": 2746 |
| }, |
| { |
| "epoch": 2.771247101522331, |
| "grad_norm": 0.13021194087012497, |
| "learning_rate": 5e-05, |
| "loss": 1.5572, |
| "step": 2747 |
| }, |
| { |
| "epoch": 2.7722552676681116, |
| "grad_norm": 0.1300297855746897, |
| "learning_rate": 5e-05, |
| "loss": 1.5658, |
| "step": 2748 |
| }, |
| { |
| "epoch": 2.7732634338138924, |
| "grad_norm": 0.1370134739303623, |
| "learning_rate": 5e-05, |
| "loss": 1.5621, |
| "step": 2749 |
| }, |
| { |
| "epoch": 2.774271599959673, |
| "grad_norm": 0.13816320034814938, |
| "learning_rate": 5e-05, |
| "loss": 1.5893, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.775279766105454, |
| "grad_norm": 0.14272754344183877, |
| "learning_rate": 5e-05, |
| "loss": 1.5436, |
| "step": 2751 |
| }, |
| { |
| "epoch": 2.7762879322512353, |
| "grad_norm": 0.13315789617751309, |
| "learning_rate": 5e-05, |
| "loss": 1.5773, |
| "step": 2752 |
| }, |
| { |
| "epoch": 2.7772960983970156, |
| "grad_norm": 0.14393952562946477, |
| "learning_rate": 5e-05, |
| "loss": 1.5576, |
| "step": 2753 |
| }, |
| { |
| "epoch": 2.778304264542797, |
| "grad_norm": 0.14032685806148001, |
| "learning_rate": 5e-05, |
| "loss": 1.567, |
| "step": 2754 |
| }, |
| { |
| "epoch": 2.7793124306885773, |
| "grad_norm": 0.15032876140994053, |
| "learning_rate": 5e-05, |
| "loss": 1.5859, |
| "step": 2755 |
| }, |
| { |
| "epoch": 2.7803205968343585, |
| "grad_norm": 0.14094821098163882, |
| "learning_rate": 5e-05, |
| "loss": 1.5663, |
| "step": 2756 |
| }, |
| { |
| "epoch": 2.781328762980139, |
| "grad_norm": 0.14430135424273705, |
| "learning_rate": 5e-05, |
| "loss": 1.5515, |
| "step": 2757 |
| }, |
| { |
| "epoch": 2.78233692912592, |
| "grad_norm": 0.14519553615256653, |
| "learning_rate": 5e-05, |
| "loss": 1.5588, |
| "step": 2758 |
| }, |
| { |
| "epoch": 2.783345095271701, |
| "grad_norm": 0.21843890586032347, |
| "learning_rate": 5e-05, |
| "loss": 1.5949, |
| "step": 2759 |
| }, |
| { |
| "epoch": 2.7843532614174817, |
| "grad_norm": 0.14087284318724133, |
| "learning_rate": 5e-05, |
| "loss": 1.5634, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.7853614275632625, |
| "grad_norm": 0.13897524686225557, |
| "learning_rate": 5e-05, |
| "loss": 1.5929, |
| "step": 2761 |
| }, |
| { |
| "epoch": 2.7863695937090434, |
| "grad_norm": 0.1403214060798171, |
| "learning_rate": 5e-05, |
| "loss": 1.5688, |
| "step": 2762 |
| }, |
| { |
| "epoch": 2.787377759854824, |
| "grad_norm": 0.13466657363854218, |
| "learning_rate": 5e-05, |
| "loss": 1.5429, |
| "step": 2763 |
| }, |
| { |
| "epoch": 2.788385926000605, |
| "grad_norm": 0.14004040045303778, |
| "learning_rate": 5e-05, |
| "loss": 1.5561, |
| "step": 2764 |
| }, |
| { |
| "epoch": 2.789394092146386, |
| "grad_norm": 0.12908514859637493, |
| "learning_rate": 5e-05, |
| "loss": 1.567, |
| "step": 2765 |
| }, |
| { |
| "epoch": 2.7904022582921666, |
| "grad_norm": 0.13545587394709968, |
| "learning_rate": 5e-05, |
| "loss": 1.5735, |
| "step": 2766 |
| }, |
| { |
| "epoch": 2.7914104244379474, |
| "grad_norm": 0.14099444514004147, |
| "learning_rate": 5e-05, |
| "loss": 1.5572, |
| "step": 2767 |
| }, |
| { |
| "epoch": 2.792418590583728, |
| "grad_norm": 0.12851123246323942, |
| "learning_rate": 5e-05, |
| "loss": 1.5647, |
| "step": 2768 |
| }, |
| { |
| "epoch": 2.793426756729509, |
| "grad_norm": 0.1294860650724102, |
| "learning_rate": 5e-05, |
| "loss": 1.5632, |
| "step": 2769 |
| }, |
| { |
| "epoch": 2.79443492287529, |
| "grad_norm": 0.1545242303048526, |
| "learning_rate": 5e-05, |
| "loss": 1.5623, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.7954430890210706, |
| "grad_norm": 0.13293580747047254, |
| "learning_rate": 5e-05, |
| "loss": 1.5631, |
| "step": 2771 |
| }, |
| { |
| "epoch": 2.7964512551668514, |
| "grad_norm": 0.1677966111788506, |
| "learning_rate": 5e-05, |
| "loss": 1.5661, |
| "step": 2772 |
| }, |
| { |
| "epoch": 2.7974594213126323, |
| "grad_norm": 0.1373898025220244, |
| "learning_rate": 5e-05, |
| "loss": 1.5724, |
| "step": 2773 |
| }, |
| { |
| "epoch": 2.798467587458413, |
| "grad_norm": 0.15485397737946918, |
| "learning_rate": 5e-05, |
| "loss": 1.5709, |
| "step": 2774 |
| }, |
| { |
| "epoch": 2.799475753604194, |
| "grad_norm": 0.13026482998146938, |
| "learning_rate": 5e-05, |
| "loss": 1.5582, |
| "step": 2775 |
| }, |
| { |
| "epoch": 2.8004839197499747, |
| "grad_norm": 0.14900467029658052, |
| "learning_rate": 5e-05, |
| "loss": 1.569, |
| "step": 2776 |
| }, |
| { |
| "epoch": 2.8014920858957555, |
| "grad_norm": 0.15753731898029472, |
| "learning_rate": 5e-05, |
| "loss": 1.5671, |
| "step": 2777 |
| }, |
| { |
| "epoch": 2.8025002520415363, |
| "grad_norm": 0.14340045158388026, |
| "learning_rate": 5e-05, |
| "loss": 1.5746, |
| "step": 2778 |
| }, |
| { |
| "epoch": 2.803508418187317, |
| "grad_norm": 0.13741597876550288, |
| "learning_rate": 5e-05, |
| "loss": 1.582, |
| "step": 2779 |
| }, |
| { |
| "epoch": 2.804516584333098, |
| "grad_norm": 0.15596253708049596, |
| "learning_rate": 5e-05, |
| "loss": 1.5475, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.805524750478879, |
| "grad_norm": 0.14072537992748982, |
| "learning_rate": 5e-05, |
| "loss": 1.5766, |
| "step": 2781 |
| }, |
| { |
| "epoch": 2.8065329166246595, |
| "grad_norm": 0.14150135394284288, |
| "learning_rate": 5e-05, |
| "loss": 1.57, |
| "step": 2782 |
| }, |
| { |
| "epoch": 2.807541082770441, |
| "grad_norm": 0.16124329516191444, |
| "learning_rate": 5e-05, |
| "loss": 1.5729, |
| "step": 2783 |
| }, |
| { |
| "epoch": 2.808549248916221, |
| "grad_norm": 0.1409731887973358, |
| "learning_rate": 5e-05, |
| "loss": 1.5762, |
| "step": 2784 |
| }, |
| { |
| "epoch": 2.8095574150620024, |
| "grad_norm": 0.12994746549707634, |
| "learning_rate": 5e-05, |
| "loss": 1.5647, |
| "step": 2785 |
| }, |
| { |
| "epoch": 2.8105655812077828, |
| "grad_norm": 0.1337149306330427, |
| "learning_rate": 5e-05, |
| "loss": 1.5589, |
| "step": 2786 |
| }, |
| { |
| "epoch": 2.811573747353564, |
| "grad_norm": 0.14527439997833774, |
| "learning_rate": 5e-05, |
| "loss": 1.5571, |
| "step": 2787 |
| }, |
| { |
| "epoch": 2.812581913499345, |
| "grad_norm": 0.14729537346535954, |
| "learning_rate": 5e-05, |
| "loss": 1.5582, |
| "step": 2788 |
| }, |
| { |
| "epoch": 2.8135900796451256, |
| "grad_norm": 0.13523666939952828, |
| "learning_rate": 5e-05, |
| "loss": 1.5811, |
| "step": 2789 |
| }, |
| { |
| "epoch": 2.8145982457909065, |
| "grad_norm": 0.13471718692012805, |
| "learning_rate": 5e-05, |
| "loss": 1.56, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.8156064119366873, |
| "grad_norm": 0.14241905055575318, |
| "learning_rate": 5e-05, |
| "loss": 1.5552, |
| "step": 2791 |
| }, |
| { |
| "epoch": 2.816614578082468, |
| "grad_norm": 0.15117704580718153, |
| "learning_rate": 5e-05, |
| "loss": 1.5667, |
| "step": 2792 |
| }, |
| { |
| "epoch": 2.817622744228249, |
| "grad_norm": 0.129569133240046, |
| "learning_rate": 5e-05, |
| "loss": 1.5584, |
| "step": 2793 |
| }, |
| { |
| "epoch": 2.8186309103740297, |
| "grad_norm": 0.13326536091885646, |
| "learning_rate": 5e-05, |
| "loss": 1.5528, |
| "step": 2794 |
| }, |
| { |
| "epoch": 2.8196390765198105, |
| "grad_norm": 0.14180855374306933, |
| "learning_rate": 5e-05, |
| "loss": 1.5659, |
| "step": 2795 |
| }, |
| { |
| "epoch": 2.8206472426655913, |
| "grad_norm": 0.13591751704326935, |
| "learning_rate": 5e-05, |
| "loss": 1.5587, |
| "step": 2796 |
| }, |
| { |
| "epoch": 2.821655408811372, |
| "grad_norm": 0.12936260419202925, |
| "learning_rate": 5e-05, |
| "loss": 1.5539, |
| "step": 2797 |
| }, |
| { |
| "epoch": 2.822663574957153, |
| "grad_norm": 0.13430709757077128, |
| "learning_rate": 5e-05, |
| "loss": 1.5658, |
| "step": 2798 |
| }, |
| { |
| "epoch": 2.8236717411029337, |
| "grad_norm": 0.13930112727317112, |
| "learning_rate": 5e-05, |
| "loss": 1.5566, |
| "step": 2799 |
| }, |
| { |
| "epoch": 2.8246799072487145, |
| "grad_norm": 0.1394216962524837, |
| "learning_rate": 5e-05, |
| "loss": 1.5691, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.8256880733944953, |
| "grad_norm": 0.13870544987039013, |
| "learning_rate": 5e-05, |
| "loss": 1.5585, |
| "step": 2801 |
| }, |
| { |
| "epoch": 2.826696239540276, |
| "grad_norm": 0.13573910551114407, |
| "learning_rate": 5e-05, |
| "loss": 1.5679, |
| "step": 2802 |
| }, |
| { |
| "epoch": 2.827704405686057, |
| "grad_norm": 0.13141232855559276, |
| "learning_rate": 5e-05, |
| "loss": 1.5571, |
| "step": 2803 |
| }, |
| { |
| "epoch": 2.8287125718318378, |
| "grad_norm": 0.14085927498398315, |
| "learning_rate": 5e-05, |
| "loss": 1.5725, |
| "step": 2804 |
| }, |
| { |
| "epoch": 2.8297207379776186, |
| "grad_norm": 0.1495486355365083, |
| "learning_rate": 5e-05, |
| "loss": 1.5754, |
| "step": 2805 |
| }, |
| { |
| "epoch": 2.8307289041233994, |
| "grad_norm": 0.14377995563695223, |
| "learning_rate": 5e-05, |
| "loss": 1.5577, |
| "step": 2806 |
| }, |
| { |
| "epoch": 2.83173707026918, |
| "grad_norm": 0.13733139961069687, |
| "learning_rate": 5e-05, |
| "loss": 1.5534, |
| "step": 2807 |
| }, |
| { |
| "epoch": 2.8327452364149615, |
| "grad_norm": 0.1459807185548558, |
| "learning_rate": 5e-05, |
| "loss": 1.5683, |
| "step": 2808 |
| }, |
| { |
| "epoch": 2.833753402560742, |
| "grad_norm": 0.14123278862792107, |
| "learning_rate": 5e-05, |
| "loss": 1.5641, |
| "step": 2809 |
| }, |
| { |
| "epoch": 2.834761568706523, |
| "grad_norm": 0.1302876133381825, |
| "learning_rate": 5e-05, |
| "loss": 1.567, |
| "step": 2810 |
| }, |
| { |
| "epoch": 2.8357697348523034, |
| "grad_norm": 0.12946097333437906, |
| "learning_rate": 5e-05, |
| "loss": 1.5448, |
| "step": 2811 |
| }, |
| { |
| "epoch": 2.8367779009980847, |
| "grad_norm": 0.141413346599211, |
| "learning_rate": 5e-05, |
| "loss": 1.5502, |
| "step": 2812 |
| }, |
| { |
| "epoch": 2.837786067143865, |
| "grad_norm": 0.1373517445844322, |
| "learning_rate": 5e-05, |
| "loss": 1.5538, |
| "step": 2813 |
| }, |
| { |
| "epoch": 2.8387942332896463, |
| "grad_norm": 0.14637065916716574, |
| "learning_rate": 5e-05, |
| "loss": 1.5619, |
| "step": 2814 |
| }, |
| { |
| "epoch": 2.839802399435427, |
| "grad_norm": 0.1441806648238271, |
| "learning_rate": 5e-05, |
| "loss": 1.5678, |
| "step": 2815 |
| }, |
| { |
| "epoch": 2.840810565581208, |
| "grad_norm": 0.14384298061843082, |
| "learning_rate": 5e-05, |
| "loss": 1.5414, |
| "step": 2816 |
| }, |
| { |
| "epoch": 2.8418187317269887, |
| "grad_norm": 0.15100690742938513, |
| "learning_rate": 5e-05, |
| "loss": 1.5895, |
| "step": 2817 |
| }, |
| { |
| "epoch": 2.8428268978727695, |
| "grad_norm": 0.14521702309872028, |
| "learning_rate": 5e-05, |
| "loss": 1.5636, |
| "step": 2818 |
| }, |
| { |
| "epoch": 2.8438350640185504, |
| "grad_norm": 0.1442222471812924, |
| "learning_rate": 5e-05, |
| "loss": 1.5601, |
| "step": 2819 |
| }, |
| { |
| "epoch": 2.844843230164331, |
| "grad_norm": 0.14102393840045072, |
| "learning_rate": 5e-05, |
| "loss": 1.5619, |
| "step": 2820 |
| }, |
| { |
| "epoch": 2.845851396310112, |
| "grad_norm": 0.14387069683105316, |
| "learning_rate": 5e-05, |
| "loss": 1.5604, |
| "step": 2821 |
| }, |
| { |
| "epoch": 2.846859562455893, |
| "grad_norm": 0.14830391380316718, |
| "learning_rate": 5e-05, |
| "loss": 1.5597, |
| "step": 2822 |
| }, |
| { |
| "epoch": 2.8478677286016736, |
| "grad_norm": 0.1390966393194733, |
| "learning_rate": 5e-05, |
| "loss": 1.5673, |
| "step": 2823 |
| }, |
| { |
| "epoch": 2.8488758947474544, |
| "grad_norm": 0.13823246164976574, |
| "learning_rate": 5e-05, |
| "loss": 1.5549, |
| "step": 2824 |
| }, |
| { |
| "epoch": 2.849884060893235, |
| "grad_norm": 0.15290443253208844, |
| "learning_rate": 5e-05, |
| "loss": 1.5464, |
| "step": 2825 |
| }, |
| { |
| "epoch": 2.850892227039016, |
| "grad_norm": 0.14540803890446127, |
| "learning_rate": 5e-05, |
| "loss": 1.5564, |
| "step": 2826 |
| }, |
| { |
| "epoch": 2.851900393184797, |
| "grad_norm": 0.14501993953342943, |
| "learning_rate": 5e-05, |
| "loss": 1.5617, |
| "step": 2827 |
| }, |
| { |
| "epoch": 2.8529085593305776, |
| "grad_norm": 0.14398340860081874, |
| "learning_rate": 5e-05, |
| "loss": 1.5658, |
| "step": 2828 |
| }, |
| { |
| "epoch": 2.8539167254763584, |
| "grad_norm": 0.14518128848972808, |
| "learning_rate": 5e-05, |
| "loss": 1.557, |
| "step": 2829 |
| }, |
| { |
| "epoch": 2.8549248916221392, |
| "grad_norm": 0.14454368726953212, |
| "learning_rate": 5e-05, |
| "loss": 1.576, |
| "step": 2830 |
| }, |
| { |
| "epoch": 2.85593305776792, |
| "grad_norm": 0.14806003244528992, |
| "learning_rate": 5e-05, |
| "loss": 1.5659, |
| "step": 2831 |
| }, |
| { |
| "epoch": 2.856941223913701, |
| "grad_norm": 0.1438396358137218, |
| "learning_rate": 5e-05, |
| "loss": 1.5661, |
| "step": 2832 |
| }, |
| { |
| "epoch": 2.8579493900594817, |
| "grad_norm": 0.14870498477600272, |
| "learning_rate": 5e-05, |
| "loss": 1.5676, |
| "step": 2833 |
| }, |
| { |
| "epoch": 2.8589575562052625, |
| "grad_norm": 0.13352249252236617, |
| "learning_rate": 5e-05, |
| "loss": 1.561, |
| "step": 2834 |
| }, |
| { |
| "epoch": 2.8599657223510433, |
| "grad_norm": 0.1398136261294875, |
| "learning_rate": 5e-05, |
| "loss": 1.565, |
| "step": 2835 |
| }, |
| { |
| "epoch": 2.860973888496824, |
| "grad_norm": 0.15341641930861216, |
| "learning_rate": 5e-05, |
| "loss": 1.5592, |
| "step": 2836 |
| }, |
| { |
| "epoch": 2.8619820546426054, |
| "grad_norm": 0.1435822023700197, |
| "learning_rate": 5e-05, |
| "loss": 1.5623, |
| "step": 2837 |
| }, |
| { |
| "epoch": 2.8629902207883857, |
| "grad_norm": 0.13842838786544373, |
| "learning_rate": 5e-05, |
| "loss": 1.548, |
| "step": 2838 |
| }, |
| { |
| "epoch": 2.863998386934167, |
| "grad_norm": 0.14097645285321597, |
| "learning_rate": 5e-05, |
| "loss": 1.5694, |
| "step": 2839 |
| }, |
| { |
| "epoch": 2.8650065530799473, |
| "grad_norm": 0.14074603728496404, |
| "learning_rate": 5e-05, |
| "loss": 1.5753, |
| "step": 2840 |
| }, |
| { |
| "epoch": 2.8660147192257286, |
| "grad_norm": 0.136340337143158, |
| "learning_rate": 5e-05, |
| "loss": 1.5656, |
| "step": 2841 |
| }, |
| { |
| "epoch": 2.867022885371509, |
| "grad_norm": 0.1661909550319213, |
| "learning_rate": 5e-05, |
| "loss": 1.5535, |
| "step": 2842 |
| }, |
| { |
| "epoch": 2.86803105151729, |
| "grad_norm": 0.14300362069607922, |
| "learning_rate": 5e-05, |
| "loss": 1.5716, |
| "step": 2843 |
| }, |
| { |
| "epoch": 2.869039217663071, |
| "grad_norm": 0.13424444297100016, |
| "learning_rate": 5e-05, |
| "loss": 1.5575, |
| "step": 2844 |
| }, |
| { |
| "epoch": 2.870047383808852, |
| "grad_norm": 0.14593598021094323, |
| "learning_rate": 5e-05, |
| "loss": 1.5708, |
| "step": 2845 |
| }, |
| { |
| "epoch": 2.8710555499546326, |
| "grad_norm": 0.13718997873999128, |
| "learning_rate": 5e-05, |
| "loss": 1.5716, |
| "step": 2846 |
| }, |
| { |
| "epoch": 2.8720637161004134, |
| "grad_norm": 0.14756508713246896, |
| "learning_rate": 5e-05, |
| "loss": 1.5479, |
| "step": 2847 |
| }, |
| { |
| "epoch": 2.8730718822461943, |
| "grad_norm": 0.13240813416747257, |
| "learning_rate": 5e-05, |
| "loss": 1.5709, |
| "step": 2848 |
| }, |
| { |
| "epoch": 2.874080048391975, |
| "grad_norm": 0.14818471917119877, |
| "learning_rate": 5e-05, |
| "loss": 1.5827, |
| "step": 2849 |
| }, |
| { |
| "epoch": 2.875088214537756, |
| "grad_norm": 0.1622234890061836, |
| "learning_rate": 5e-05, |
| "loss": 1.5567, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.8760963806835367, |
| "grad_norm": 0.14840209944205146, |
| "learning_rate": 5e-05, |
| "loss": 1.5647, |
| "step": 2851 |
| }, |
| { |
| "epoch": 2.8771045468293175, |
| "grad_norm": 0.13686801156144324, |
| "learning_rate": 5e-05, |
| "loss": 1.5519, |
| "step": 2852 |
| }, |
| { |
| "epoch": 2.8781127129750983, |
| "grad_norm": 0.1390584020109862, |
| "learning_rate": 5e-05, |
| "loss": 1.5478, |
| "step": 2853 |
| }, |
| { |
| "epoch": 2.879120879120879, |
| "grad_norm": 0.1358182096960893, |
| "learning_rate": 5e-05, |
| "loss": 1.5484, |
| "step": 2854 |
| }, |
| { |
| "epoch": 2.88012904526666, |
| "grad_norm": 0.398577333773907, |
| "learning_rate": 5e-05, |
| "loss": 1.5786, |
| "step": 2855 |
| }, |
| { |
| "epoch": 2.8811372114124407, |
| "grad_norm": 0.1331042940921803, |
| "learning_rate": 5e-05, |
| "loss": 1.58, |
| "step": 2856 |
| }, |
| { |
| "epoch": 2.8821453775582215, |
| "grad_norm": 0.13902477299710286, |
| "learning_rate": 5e-05, |
| "loss": 1.5654, |
| "step": 2857 |
| }, |
| { |
| "epoch": 2.8831535437040023, |
| "grad_norm": 0.13660096150095175, |
| "learning_rate": 5e-05, |
| "loss": 1.5622, |
| "step": 2858 |
| }, |
| { |
| "epoch": 2.884161709849783, |
| "grad_norm": 0.1324123575740831, |
| "learning_rate": 5e-05, |
| "loss": 1.5634, |
| "step": 2859 |
| }, |
| { |
| "epoch": 2.885169875995564, |
| "grad_norm": 0.1419985727170632, |
| "learning_rate": 5e-05, |
| "loss": 1.5446, |
| "step": 2860 |
| }, |
| { |
| "epoch": 2.8861780421413448, |
| "grad_norm": 0.14450454707140364, |
| "learning_rate": 5e-05, |
| "loss": 1.5588, |
| "step": 2861 |
| }, |
| { |
| "epoch": 2.8871862082871256, |
| "grad_norm": 0.13556264678370267, |
| "learning_rate": 5e-05, |
| "loss": 1.5616, |
| "step": 2862 |
| }, |
| { |
| "epoch": 2.8881943744329064, |
| "grad_norm": 0.13924940223913126, |
| "learning_rate": 5e-05, |
| "loss": 1.5582, |
| "step": 2863 |
| }, |
| { |
| "epoch": 2.8892025405786876, |
| "grad_norm": 0.14489037756646053, |
| "learning_rate": 5e-05, |
| "loss": 1.5547, |
| "step": 2864 |
| }, |
| { |
| "epoch": 2.890210706724468, |
| "grad_norm": 0.1364069870015726, |
| "learning_rate": 5e-05, |
| "loss": 1.5613, |
| "step": 2865 |
| }, |
| { |
| "epoch": 2.8912188728702493, |
| "grad_norm": 0.1413793239639967, |
| "learning_rate": 5e-05, |
| "loss": 1.5625, |
| "step": 2866 |
| }, |
| { |
| "epoch": 2.8922270390160296, |
| "grad_norm": 0.14912428347663212, |
| "learning_rate": 5e-05, |
| "loss": 1.5564, |
| "step": 2867 |
| }, |
| { |
| "epoch": 2.893235205161811, |
| "grad_norm": 0.1538535083338703, |
| "learning_rate": 5e-05, |
| "loss": 1.5599, |
| "step": 2868 |
| }, |
| { |
| "epoch": 2.8942433713075912, |
| "grad_norm": 0.14106606384804676, |
| "learning_rate": 5e-05, |
| "loss": 1.5505, |
| "step": 2869 |
| }, |
| { |
| "epoch": 2.8952515374533725, |
| "grad_norm": 0.14601342368943243, |
| "learning_rate": 5e-05, |
| "loss": 1.5743, |
| "step": 2870 |
| }, |
| { |
| "epoch": 2.8962597035991533, |
| "grad_norm": 0.1359969686343452, |
| "learning_rate": 5e-05, |
| "loss": 1.5752, |
| "step": 2871 |
| }, |
| { |
| "epoch": 2.897267869744934, |
| "grad_norm": 0.1321428989584743, |
| "learning_rate": 5e-05, |
| "loss": 1.5609, |
| "step": 2872 |
| }, |
| { |
| "epoch": 2.898276035890715, |
| "grad_norm": 0.13621152268777725, |
| "learning_rate": 5e-05, |
| "loss": 1.5699, |
| "step": 2873 |
| }, |
| { |
| "epoch": 2.8992842020364957, |
| "grad_norm": 0.14578823774326305, |
| "learning_rate": 5e-05, |
| "loss": 1.5666, |
| "step": 2874 |
| }, |
| { |
| "epoch": 2.9002923681822765, |
| "grad_norm": 0.13560289941208062, |
| "learning_rate": 5e-05, |
| "loss": 1.5631, |
| "step": 2875 |
| }, |
| { |
| "epoch": 2.9013005343280573, |
| "grad_norm": 0.6578459661435414, |
| "learning_rate": 5e-05, |
| "loss": 1.5493, |
| "step": 2876 |
| }, |
| { |
| "epoch": 2.902308700473838, |
| "grad_norm": 0.139381900708492, |
| "learning_rate": 5e-05, |
| "loss": 1.5577, |
| "step": 2877 |
| }, |
| { |
| "epoch": 2.903316866619619, |
| "grad_norm": 0.13559165116802954, |
| "learning_rate": 5e-05, |
| "loss": 1.5588, |
| "step": 2878 |
| }, |
| { |
| "epoch": 2.9043250327653998, |
| "grad_norm": 0.15146327521944022, |
| "learning_rate": 5e-05, |
| "loss": 1.5661, |
| "step": 2879 |
| }, |
| { |
| "epoch": 2.9053331989111806, |
| "grad_norm": 0.13804288601464923, |
| "learning_rate": 5e-05, |
| "loss": 1.5452, |
| "step": 2880 |
| }, |
| { |
| "epoch": 2.9063413650569614, |
| "grad_norm": 0.14744411336595978, |
| "learning_rate": 5e-05, |
| "loss": 1.5637, |
| "step": 2881 |
| }, |
| { |
| "epoch": 2.907349531202742, |
| "grad_norm": 0.14237854990959567, |
| "learning_rate": 5e-05, |
| "loss": 1.5553, |
| "step": 2882 |
| }, |
| { |
| "epoch": 2.908357697348523, |
| "grad_norm": 0.15023980689854555, |
| "learning_rate": 5e-05, |
| "loss": 1.5424, |
| "step": 2883 |
| }, |
| { |
| "epoch": 2.909365863494304, |
| "grad_norm": 0.1429507112014247, |
| "learning_rate": 5e-05, |
| "loss": 1.5445, |
| "step": 2884 |
| }, |
| { |
| "epoch": 2.9103740296400846, |
| "grad_norm": 0.13597749204024304, |
| "learning_rate": 5e-05, |
| "loss": 1.5802, |
| "step": 2885 |
| }, |
| { |
| "epoch": 2.9113821957858654, |
| "grad_norm": 0.14491507670987977, |
| "learning_rate": 5e-05, |
| "loss": 1.562, |
| "step": 2886 |
| }, |
| { |
| "epoch": 2.9123903619316462, |
| "grad_norm": 0.13456355698926403, |
| "learning_rate": 5e-05, |
| "loss": 1.5596, |
| "step": 2887 |
| }, |
| { |
| "epoch": 2.913398528077427, |
| "grad_norm": 0.15444933801070784, |
| "learning_rate": 5e-05, |
| "loss": 1.5605, |
| "step": 2888 |
| }, |
| { |
| "epoch": 2.914406694223208, |
| "grad_norm": 0.1371525313971729, |
| "learning_rate": 5e-05, |
| "loss": 1.5667, |
| "step": 2889 |
| }, |
| { |
| "epoch": 2.9154148603689887, |
| "grad_norm": 0.14412880734806927, |
| "learning_rate": 5e-05, |
| "loss": 1.5656, |
| "step": 2890 |
| }, |
| { |
| "epoch": 2.9164230265147695, |
| "grad_norm": 0.14034209949997412, |
| "learning_rate": 5e-05, |
| "loss": 1.5588, |
| "step": 2891 |
| }, |
| { |
| "epoch": 2.9174311926605503, |
| "grad_norm": 0.12952729294769288, |
| "learning_rate": 5e-05, |
| "loss": 1.5442, |
| "step": 2892 |
| }, |
| { |
| "epoch": 2.9184393588063315, |
| "grad_norm": 0.1398162399293511, |
| "learning_rate": 5e-05, |
| "loss": 1.5717, |
| "step": 2893 |
| }, |
| { |
| "epoch": 2.919447524952112, |
| "grad_norm": 0.1329999216346244, |
| "learning_rate": 5e-05, |
| "loss": 1.5395, |
| "step": 2894 |
| }, |
| { |
| "epoch": 2.920455691097893, |
| "grad_norm": 0.15674089864365628, |
| "learning_rate": 5e-05, |
| "loss": 1.5437, |
| "step": 2895 |
| }, |
| { |
| "epoch": 2.9214638572436735, |
| "grad_norm": 0.13429643000428865, |
| "learning_rate": 5e-05, |
| "loss": 1.5494, |
| "step": 2896 |
| }, |
| { |
| "epoch": 2.9224720233894548, |
| "grad_norm": 0.1332516748670609, |
| "learning_rate": 5e-05, |
| "loss": 1.5691, |
| "step": 2897 |
| }, |
| { |
| "epoch": 2.923480189535235, |
| "grad_norm": 0.15368020425840034, |
| "learning_rate": 5e-05, |
| "loss": 1.5516, |
| "step": 2898 |
| }, |
| { |
| "epoch": 2.9244883556810164, |
| "grad_norm": 0.13508568799212733, |
| "learning_rate": 5e-05, |
| "loss": 1.5496, |
| "step": 2899 |
| }, |
| { |
| "epoch": 2.925496521826797, |
| "grad_norm": 0.14579709546992245, |
| "learning_rate": 5e-05, |
| "loss": 1.5559, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.926504687972578, |
| "grad_norm": 0.15426626925981565, |
| "learning_rate": 5e-05, |
| "loss": 1.5659, |
| "step": 2901 |
| }, |
| { |
| "epoch": 2.927512854118359, |
| "grad_norm": 0.138471559025861, |
| "learning_rate": 5e-05, |
| "loss": 1.5656, |
| "step": 2902 |
| }, |
| { |
| "epoch": 2.9285210202641396, |
| "grad_norm": 0.155357177378076, |
| "learning_rate": 5e-05, |
| "loss": 1.5688, |
| "step": 2903 |
| }, |
| { |
| "epoch": 2.9295291864099204, |
| "grad_norm": 0.137418601181882, |
| "learning_rate": 5e-05, |
| "loss": 1.559, |
| "step": 2904 |
| }, |
| { |
| "epoch": 2.9305373525557012, |
| "grad_norm": 0.15213283592124305, |
| "learning_rate": 5e-05, |
| "loss": 1.5697, |
| "step": 2905 |
| }, |
| { |
| "epoch": 2.931545518701482, |
| "grad_norm": 0.1371356248123586, |
| "learning_rate": 5e-05, |
| "loss": 1.5536, |
| "step": 2906 |
| }, |
| { |
| "epoch": 2.932553684847263, |
| "grad_norm": 0.15585636460909322, |
| "learning_rate": 5e-05, |
| "loss": 1.5784, |
| "step": 2907 |
| }, |
| { |
| "epoch": 2.9335618509930437, |
| "grad_norm": 0.13432776199282834, |
| "learning_rate": 5e-05, |
| "loss": 1.5633, |
| "step": 2908 |
| }, |
| { |
| "epoch": 2.9345700171388245, |
| "grad_norm": 0.15101484572955937, |
| "learning_rate": 5e-05, |
| "loss": 1.5638, |
| "step": 2909 |
| }, |
| { |
| "epoch": 2.9355781832846053, |
| "grad_norm": 0.13284986895435724, |
| "learning_rate": 5e-05, |
| "loss": 1.5536, |
| "step": 2910 |
| }, |
| { |
| "epoch": 2.936586349430386, |
| "grad_norm": 0.15239448643115522, |
| "learning_rate": 5e-05, |
| "loss": 1.5542, |
| "step": 2911 |
| }, |
| { |
| "epoch": 2.937594515576167, |
| "grad_norm": 0.13304948631549568, |
| "learning_rate": 5e-05, |
| "loss": 1.5515, |
| "step": 2912 |
| }, |
| { |
| "epoch": 2.9386026817219477, |
| "grad_norm": 0.1493914552614863, |
| "learning_rate": 5e-05, |
| "loss": 1.5786, |
| "step": 2913 |
| }, |
| { |
| "epoch": 2.9396108478677285, |
| "grad_norm": 0.141104588952366, |
| "learning_rate": 5e-05, |
| "loss": 1.5541, |
| "step": 2914 |
| }, |
| { |
| "epoch": 2.9406190140135093, |
| "grad_norm": 0.14744388875991352, |
| "learning_rate": 5e-05, |
| "loss": 1.5695, |
| "step": 2915 |
| }, |
| { |
| "epoch": 2.94162718015929, |
| "grad_norm": 0.15106287768765167, |
| "learning_rate": 5e-05, |
| "loss": 1.5635, |
| "step": 2916 |
| }, |
| { |
| "epoch": 2.942635346305071, |
| "grad_norm": 0.13633154398328548, |
| "learning_rate": 5e-05, |
| "loss": 1.5568, |
| "step": 2917 |
| }, |
| { |
| "epoch": 2.9436435124508518, |
| "grad_norm": 0.14206620322234342, |
| "learning_rate": 5e-05, |
| "loss": 1.5712, |
| "step": 2918 |
| }, |
| { |
| "epoch": 2.9446516785966326, |
| "grad_norm": 0.1400371750496543, |
| "learning_rate": 5e-05, |
| "loss": 1.5481, |
| "step": 2919 |
| }, |
| { |
| "epoch": 2.945659844742414, |
| "grad_norm": 0.15361701555835644, |
| "learning_rate": 5e-05, |
| "loss": 1.5517, |
| "step": 2920 |
| }, |
| { |
| "epoch": 2.946668010888194, |
| "grad_norm": 0.1395143142535918, |
| "learning_rate": 5e-05, |
| "loss": 1.5563, |
| "step": 2921 |
| }, |
| { |
| "epoch": 2.9476761770339754, |
| "grad_norm": 0.15238197270206633, |
| "learning_rate": 5e-05, |
| "loss": 1.5611, |
| "step": 2922 |
| }, |
| { |
| "epoch": 2.948684343179756, |
| "grad_norm": 0.13791269422161265, |
| "learning_rate": 5e-05, |
| "loss": 1.5445, |
| "step": 2923 |
| }, |
| { |
| "epoch": 2.949692509325537, |
| "grad_norm": 0.1527120115790887, |
| "learning_rate": 5e-05, |
| "loss": 1.5591, |
| "step": 2924 |
| }, |
| { |
| "epoch": 2.9507006754713174, |
| "grad_norm": 0.14875816521276122, |
| "learning_rate": 5e-05, |
| "loss": 1.5391, |
| "step": 2925 |
| }, |
| { |
| "epoch": 2.9517088416170987, |
| "grad_norm": 0.14956494592977318, |
| "learning_rate": 5e-05, |
| "loss": 1.548, |
| "step": 2926 |
| }, |
| { |
| "epoch": 2.9527170077628795, |
| "grad_norm": 0.1323459526950249, |
| "learning_rate": 5e-05, |
| "loss": 1.5546, |
| "step": 2927 |
| }, |
| { |
| "epoch": 2.9537251739086603, |
| "grad_norm": 0.16534355366989031, |
| "learning_rate": 5e-05, |
| "loss": 1.5373, |
| "step": 2928 |
| }, |
| { |
| "epoch": 2.954733340054441, |
| "grad_norm": 0.1345830722048253, |
| "learning_rate": 5e-05, |
| "loss": 1.5526, |
| "step": 2929 |
| }, |
| { |
| "epoch": 2.955741506200222, |
| "grad_norm": 0.14475119660699556, |
| "learning_rate": 5e-05, |
| "loss": 1.5604, |
| "step": 2930 |
| }, |
| { |
| "epoch": 2.9567496723460027, |
| "grad_norm": 0.1423458248073331, |
| "learning_rate": 5e-05, |
| "loss": 1.5737, |
| "step": 2931 |
| }, |
| { |
| "epoch": 2.9577578384917835, |
| "grad_norm": 0.12953891111963645, |
| "learning_rate": 5e-05, |
| "loss": 1.5623, |
| "step": 2932 |
| }, |
| { |
| "epoch": 2.9587660046375643, |
| "grad_norm": 0.14369391910038792, |
| "learning_rate": 5e-05, |
| "loss": 1.537, |
| "step": 2933 |
| }, |
| { |
| "epoch": 2.959774170783345, |
| "grad_norm": 0.15086425876239956, |
| "learning_rate": 5e-05, |
| "loss": 1.5727, |
| "step": 2934 |
| }, |
| { |
| "epoch": 2.960782336929126, |
| "grad_norm": 0.14151325112581856, |
| "learning_rate": 5e-05, |
| "loss": 1.5504, |
| "step": 2935 |
| }, |
| { |
| "epoch": 2.9617905030749068, |
| "grad_norm": 0.14019801752830394, |
| "learning_rate": 5e-05, |
| "loss": 1.5711, |
| "step": 2936 |
| }, |
| { |
| "epoch": 2.9627986692206876, |
| "grad_norm": 0.1453666753568266, |
| "learning_rate": 5e-05, |
| "loss": 1.566, |
| "step": 2937 |
| }, |
| { |
| "epoch": 2.9638068353664684, |
| "grad_norm": 0.13919807108999072, |
| "learning_rate": 5e-05, |
| "loss": 1.5651, |
| "step": 2938 |
| }, |
| { |
| "epoch": 2.964815001512249, |
| "grad_norm": 0.14464003186645194, |
| "learning_rate": 5e-05, |
| "loss": 1.5845, |
| "step": 2939 |
| }, |
| { |
| "epoch": 2.96582316765803, |
| "grad_norm": 0.13620309551680643, |
| "learning_rate": 5e-05, |
| "loss": 1.5598, |
| "step": 2940 |
| }, |
| { |
| "epoch": 2.966831333803811, |
| "grad_norm": 0.1372206763034652, |
| "learning_rate": 5e-05, |
| "loss": 1.5578, |
| "step": 2941 |
| }, |
| { |
| "epoch": 2.9678394999495916, |
| "grad_norm": 0.13886626014581938, |
| "learning_rate": 5e-05, |
| "loss": 1.5649, |
| "step": 2942 |
| }, |
| { |
| "epoch": 2.9688476660953724, |
| "grad_norm": 0.13428686365047957, |
| "learning_rate": 5e-05, |
| "loss": 1.5704, |
| "step": 2943 |
| }, |
| { |
| "epoch": 2.9698558322411532, |
| "grad_norm": 0.12948797335444784, |
| "learning_rate": 5e-05, |
| "loss": 1.5725, |
| "step": 2944 |
| }, |
| { |
| "epoch": 2.970863998386934, |
| "grad_norm": 0.12769456210733446, |
| "learning_rate": 5e-05, |
| "loss": 1.5541, |
| "step": 2945 |
| }, |
| { |
| "epoch": 2.971872164532715, |
| "grad_norm": 0.14488293202297234, |
| "learning_rate": 5e-05, |
| "loss": 1.5609, |
| "step": 2946 |
| }, |
| { |
| "epoch": 2.9728803306784957, |
| "grad_norm": 0.14137326637650713, |
| "learning_rate": 5e-05, |
| "loss": 1.5581, |
| "step": 2947 |
| }, |
| { |
| "epoch": 2.9738884968242765, |
| "grad_norm": 0.1338660797211836, |
| "learning_rate": 5e-05, |
| "loss": 1.5614, |
| "step": 2948 |
| }, |
| { |
| "epoch": 2.9748966629700577, |
| "grad_norm": 0.1379123131883918, |
| "learning_rate": 5e-05, |
| "loss": 1.5642, |
| "step": 2949 |
| }, |
| { |
| "epoch": 2.975904829115838, |
| "grad_norm": 0.1340410935395909, |
| "learning_rate": 5e-05, |
| "loss": 1.5668, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.9769129952616193, |
| "grad_norm": 0.12855729642664465, |
| "learning_rate": 5e-05, |
| "loss": 1.5513, |
| "step": 2951 |
| }, |
| { |
| "epoch": 2.9779211614073997, |
| "grad_norm": 0.13071208212474497, |
| "learning_rate": 5e-05, |
| "loss": 1.5522, |
| "step": 2952 |
| }, |
| { |
| "epoch": 2.978929327553181, |
| "grad_norm": 0.13305329821521164, |
| "learning_rate": 5e-05, |
| "loss": 1.5668, |
| "step": 2953 |
| }, |
| { |
| "epoch": 2.9799374936989613, |
| "grad_norm": 0.13376568327004498, |
| "learning_rate": 5e-05, |
| "loss": 1.558, |
| "step": 2954 |
| }, |
| { |
| "epoch": 2.9809456598447426, |
| "grad_norm": 0.13084931939094915, |
| "learning_rate": 5e-05, |
| "loss": 1.5575, |
| "step": 2955 |
| }, |
| { |
| "epoch": 2.9819538259905234, |
| "grad_norm": 0.13867891937239454, |
| "learning_rate": 5e-05, |
| "loss": 1.5598, |
| "step": 2956 |
| }, |
| { |
| "epoch": 2.982961992136304, |
| "grad_norm": 0.1397205924366696, |
| "learning_rate": 5e-05, |
| "loss": 1.5663, |
| "step": 2957 |
| }, |
| { |
| "epoch": 2.983970158282085, |
| "grad_norm": 0.13969260296990604, |
| "learning_rate": 5e-05, |
| "loss": 1.5606, |
| "step": 2958 |
| }, |
| { |
| "epoch": 2.984978324427866, |
| "grad_norm": 0.1631492011661443, |
| "learning_rate": 5e-05, |
| "loss": 1.5398, |
| "step": 2959 |
| }, |
| { |
| "epoch": 2.9859864905736466, |
| "grad_norm": 0.15119413820443203, |
| "learning_rate": 5e-05, |
| "loss": 1.5569, |
| "step": 2960 |
| }, |
| { |
| "epoch": 2.9869946567194274, |
| "grad_norm": 0.14863919142032006, |
| "learning_rate": 5e-05, |
| "loss": 1.5451, |
| "step": 2961 |
| }, |
| { |
| "epoch": 2.9880028228652082, |
| "grad_norm": 0.13687840450793468, |
| "learning_rate": 5e-05, |
| "loss": 1.5598, |
| "step": 2962 |
| }, |
| { |
| "epoch": 2.989010989010989, |
| "grad_norm": 0.1493181266930825, |
| "learning_rate": 5e-05, |
| "loss": 1.5572, |
| "step": 2963 |
| }, |
| { |
| "epoch": 2.99001915515677, |
| "grad_norm": 0.20032135827358913, |
| "learning_rate": 5e-05, |
| "loss": 1.5583, |
| "step": 2964 |
| }, |
| { |
| "epoch": 2.9910273213025507, |
| "grad_norm": 0.13696879150194943, |
| "learning_rate": 5e-05, |
| "loss": 1.569, |
| "step": 2965 |
| }, |
| { |
| "epoch": 2.9920354874483315, |
| "grad_norm": 0.14187831421826821, |
| "learning_rate": 5e-05, |
| "loss": 1.5625, |
| "step": 2966 |
| }, |
| { |
| "epoch": 2.9930436535941123, |
| "grad_norm": 0.14273354843205222, |
| "learning_rate": 5e-05, |
| "loss": 1.5632, |
| "step": 2967 |
| }, |
| { |
| "epoch": 2.994051819739893, |
| "grad_norm": 0.1372115086061227, |
| "learning_rate": 5e-05, |
| "loss": 1.5637, |
| "step": 2968 |
| }, |
| { |
| "epoch": 2.995059985885674, |
| "grad_norm": 0.13929386398909455, |
| "learning_rate": 5e-05, |
| "loss": 1.5568, |
| "step": 2969 |
| }, |
| { |
| "epoch": 2.9960681520314547, |
| "grad_norm": 0.14597924434479242, |
| "learning_rate": 5e-05, |
| "loss": 1.5658, |
| "step": 2970 |
| }, |
| { |
| "epoch": 2.9970763181772355, |
| "grad_norm": 0.13204270313325644, |
| "learning_rate": 5e-05, |
| "loss": 1.552, |
| "step": 2971 |
| }, |
| { |
| "epoch": 2.9980844843230163, |
| "grad_norm": 0.1490654908890621, |
| "learning_rate": 5e-05, |
| "loss": 1.57, |
| "step": 2972 |
| }, |
| { |
| "epoch": 2.999092650468797, |
| "grad_norm": 0.14445886974569572, |
| "learning_rate": 5e-05, |
| "loss": 1.5441, |
| "step": 2973 |
| }, |
| { |
| "epoch": 2.999092650468797, |
| "step": 2973, |
| "total_flos": 1045643124342784.0, |
| "train_loss": 1.6583996424962084, |
| "train_runtime": 54127.1912, |
| "train_samples_per_second": 10.994, |
| "train_steps_per_second": 0.055 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2973, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 150, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1045643124342784.0, |
| "train_batch_size": 10, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|