| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.448542237727386, |
| "eval_steps": 500, |
| "global_step": 900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00049838026414154, |
| "grad_norm": 0.8175273537635803, |
| "learning_rate": 1e-05, |
| "loss": 1.8901, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00099676052828308, |
| "grad_norm": 0.5205090641975403, |
| "learning_rate": 2e-05, |
| "loss": 1.8661, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00149514079242462, |
| "grad_norm": 0.7050982713699341, |
| "learning_rate": 3e-05, |
| "loss": 1.884, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00199352105656616, |
| "grad_norm": 0.3958536684513092, |
| "learning_rate": 4e-05, |
| "loss": 1.848, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0024919013207077, |
| "grad_norm": 0.2910257577896118, |
| "learning_rate": 5e-05, |
| "loss": 1.8363, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00299028158484924, |
| "grad_norm": 1.1061186790466309, |
| "learning_rate": 6e-05, |
| "loss": 2.1065, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.00348866184899078, |
| "grad_norm": 0.35989394783973694, |
| "learning_rate": 7e-05, |
| "loss": 1.8461, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.00398704211313232, |
| "grad_norm": 0.3001234233379364, |
| "learning_rate": 8e-05, |
| "loss": 1.8691, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.00448542237727386, |
| "grad_norm": 0.3210326135158539, |
| "learning_rate": 9e-05, |
| "loss": 1.8006, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0049838026414154, |
| "grad_norm": 0.24240201711654663, |
| "learning_rate": 0.0001, |
| "loss": 1.8136, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00548218290555694, |
| "grad_norm": 0.2921009957790375, |
| "learning_rate": 0.00011000000000000002, |
| "loss": 1.7785, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00598056316969848, |
| "grad_norm": 0.2199179232120514, |
| "learning_rate": 0.00012, |
| "loss": 1.8334, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.00647894343384002, |
| "grad_norm": 0.18247301876544952, |
| "learning_rate": 0.00013000000000000002, |
| "loss": 1.8171, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.00697732369798156, |
| "grad_norm": 0.16971151530742645, |
| "learning_rate": 0.00014, |
| "loss": 1.8838, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0074757039621231, |
| "grad_norm": 0.19395150244235992, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 1.8121, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.00797408422626464, |
| "grad_norm": 0.18596555292606354, |
| "learning_rate": 0.00016, |
| "loss": 1.7756, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.00847246449040618, |
| "grad_norm": 0.23639832437038422, |
| "learning_rate": 0.00017, |
| "loss": 1.8293, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00897084475454772, |
| "grad_norm": 0.5992503762245178, |
| "learning_rate": 0.00018, |
| "loss": 1.8285, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.00946922501868926, |
| "grad_norm": 0.24062925577163696, |
| "learning_rate": 0.00019, |
| "loss": 1.8139, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0099676052828308, |
| "grad_norm": 0.1615862101316452, |
| "learning_rate": 0.0002, |
| "loss": 1.7916, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01046598554697234, |
| "grad_norm": 0.1461448222398758, |
| "learning_rate": 0.0002, |
| "loss": 1.756, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.01096436581111388, |
| "grad_norm": 0.16745099425315857, |
| "learning_rate": 0.0002, |
| "loss": 1.7139, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.01146274607525542, |
| "grad_norm": 0.13099125027656555, |
| "learning_rate": 0.0002, |
| "loss": 1.7764, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01196112633939696, |
| "grad_norm": 0.11523797363042831, |
| "learning_rate": 0.0002, |
| "loss": 1.6983, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.012459506603538499, |
| "grad_norm": 0.4995543956756592, |
| "learning_rate": 0.0002, |
| "loss": 1.7629, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.01295788686768004, |
| "grad_norm": 0.1197713166475296, |
| "learning_rate": 0.0002, |
| "loss": 1.6818, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.01345626713182158, |
| "grad_norm": 0.12242875248193741, |
| "learning_rate": 0.0002, |
| "loss": 1.7446, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.01395464739596312, |
| "grad_norm": 0.11533704400062561, |
| "learning_rate": 0.0002, |
| "loss": 1.7924, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.01445302766010466, |
| "grad_norm": 0.11372833698987961, |
| "learning_rate": 0.0002, |
| "loss": 1.8541, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0149514079242462, |
| "grad_norm": 0.10559230297803879, |
| "learning_rate": 0.0002, |
| "loss": 1.727, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01544978818838774, |
| "grad_norm": 0.1040055975317955, |
| "learning_rate": 0.0002, |
| "loss": 1.6867, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.01594816845252928, |
| "grad_norm": 0.09699314832687378, |
| "learning_rate": 0.0002, |
| "loss": 1.7119, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.016446548716670818, |
| "grad_norm": 0.09951823949813843, |
| "learning_rate": 0.0002, |
| "loss": 1.6883, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.01694492898081236, |
| "grad_norm": 0.09926764667034149, |
| "learning_rate": 0.0002, |
| "loss": 1.6828, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0174433092449539, |
| "grad_norm": 0.11137701570987701, |
| "learning_rate": 0.0002, |
| "loss": 1.8129, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01794168950909544, |
| "grad_norm": 0.09449079632759094, |
| "learning_rate": 0.0002, |
| "loss": 1.7351, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01844006977323698, |
| "grad_norm": 0.10035137832164764, |
| "learning_rate": 0.0002, |
| "loss": 1.7835, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.01893845003737852, |
| "grad_norm": 0.0987599715590477, |
| "learning_rate": 0.0002, |
| "loss": 1.6905, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.01943683030152006, |
| "grad_norm": 0.1124144196510315, |
| "learning_rate": 0.0002, |
| "loss": 1.7833, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0199352105656616, |
| "grad_norm": 0.10424085706472397, |
| "learning_rate": 0.0002, |
| "loss": 1.7308, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02043359082980314, |
| "grad_norm": 0.10069456696510315, |
| "learning_rate": 0.0002, |
| "loss": 1.7756, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.02093197109394468, |
| "grad_norm": 0.096500463783741, |
| "learning_rate": 0.0002, |
| "loss": 1.6723, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.02143035135808622, |
| "grad_norm": 0.10054206848144531, |
| "learning_rate": 0.0002, |
| "loss": 1.7609, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.02192873162222776, |
| "grad_norm": 0.6995068192481995, |
| "learning_rate": 0.0002, |
| "loss": 1.8469, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0224271118863693, |
| "grad_norm": 0.10629299283027649, |
| "learning_rate": 0.0002, |
| "loss": 1.7838, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.02292549215051084, |
| "grad_norm": 0.7601500749588013, |
| "learning_rate": 0.0002, |
| "loss": 1.9191, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.02342387241465238, |
| "grad_norm": 0.15130610764026642, |
| "learning_rate": 0.0002, |
| "loss": 1.7054, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02392225267879392, |
| "grad_norm": 0.13523732125759125, |
| "learning_rate": 0.0002, |
| "loss": 1.8099, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.02442063294293546, |
| "grad_norm": 0.13607007265090942, |
| "learning_rate": 0.0002, |
| "loss": 1.7106, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.024919013207076998, |
| "grad_norm": 0.12477318197488785, |
| "learning_rate": 0.0002, |
| "loss": 1.6664, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02541739347121854, |
| "grad_norm": 0.6004332304000854, |
| "learning_rate": 0.0002, |
| "loss": 1.8337, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.02591577373536008, |
| "grad_norm": 0.11952889710664749, |
| "learning_rate": 0.0002, |
| "loss": 1.8014, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.02641415399950162, |
| "grad_norm": 0.12411167472600937, |
| "learning_rate": 0.0002, |
| "loss": 1.716, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.02691253426364316, |
| "grad_norm": 0.13071775436401367, |
| "learning_rate": 0.0002, |
| "loss": 1.8158, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0274109145277847, |
| "grad_norm": 0.10316825658082962, |
| "learning_rate": 0.0002, |
| "loss": 1.7051, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.02790929479192624, |
| "grad_norm": 0.12366951256990433, |
| "learning_rate": 0.0002, |
| "loss": 1.7233, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.02840767505606778, |
| "grad_norm": 0.11353752017021179, |
| "learning_rate": 0.0002, |
| "loss": 1.7875, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.02890605532020932, |
| "grad_norm": 0.10084105283021927, |
| "learning_rate": 0.0002, |
| "loss": 1.8455, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02940443558435086, |
| "grad_norm": 0.09446979314088821, |
| "learning_rate": 0.0002, |
| "loss": 1.6738, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0299028158484924, |
| "grad_norm": 0.10983336716890335, |
| "learning_rate": 0.0002, |
| "loss": 1.7517, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03040119611263394, |
| "grad_norm": 0.09697376936674118, |
| "learning_rate": 0.0002, |
| "loss": 1.7885, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.03089957637677548, |
| "grad_norm": 0.10111090540885925, |
| "learning_rate": 0.0002, |
| "loss": 1.7711, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.03139795664091702, |
| "grad_norm": 0.09077231585979462, |
| "learning_rate": 0.0002, |
| "loss": 1.6886, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.03189633690505856, |
| "grad_norm": 0.09181386977434158, |
| "learning_rate": 0.0002, |
| "loss": 1.7101, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.0323947171692001, |
| "grad_norm": 0.09549912065267563, |
| "learning_rate": 0.0002, |
| "loss": 1.727, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.032893097433341636, |
| "grad_norm": 0.09550771117210388, |
| "learning_rate": 0.0002, |
| "loss": 1.7627, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.03339147769748318, |
| "grad_norm": 0.09617152065038681, |
| "learning_rate": 0.0002, |
| "loss": 1.7195, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.03388985796162472, |
| "grad_norm": 0.08987727761268616, |
| "learning_rate": 0.0002, |
| "loss": 1.6672, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.03438823822576626, |
| "grad_norm": 0.1968306601047516, |
| "learning_rate": 0.0002, |
| "loss": 1.7743, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.0348866184899078, |
| "grad_norm": 0.11987251788377762, |
| "learning_rate": 0.0002, |
| "loss": 1.7883, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03538499875404934, |
| "grad_norm": 0.09412620961666107, |
| "learning_rate": 0.0002, |
| "loss": 1.7965, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03588337901819088, |
| "grad_norm": 0.09160133451223373, |
| "learning_rate": 0.0002, |
| "loss": 1.7451, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.03638175928233242, |
| "grad_norm": 0.08958347886800766, |
| "learning_rate": 0.0002, |
| "loss": 1.6991, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.03688013954647396, |
| "grad_norm": 0.08735426515340805, |
| "learning_rate": 0.0002, |
| "loss": 1.7267, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0373785198106155, |
| "grad_norm": 0.09234903752803802, |
| "learning_rate": 0.0002, |
| "loss": 1.7363, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.03787690007475704, |
| "grad_norm": 0.3366870582103729, |
| "learning_rate": 0.0002, |
| "loss": 1.7519, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.03837528033889858, |
| "grad_norm": 0.11989757418632507, |
| "learning_rate": 0.0002, |
| "loss": 1.7388, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.03887366060304012, |
| "grad_norm": 0.09671110659837723, |
| "learning_rate": 0.0002, |
| "loss": 1.6955, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.03937204086718166, |
| "grad_norm": 0.3544454276561737, |
| "learning_rate": 0.0002, |
| "loss": 1.7123, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0398704211313232, |
| "grad_norm": 0.36497563123703003, |
| "learning_rate": 0.0002, |
| "loss": 1.8832, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.04036880139546474, |
| "grad_norm": 0.1029423251748085, |
| "learning_rate": 0.0002, |
| "loss": 1.6739, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.04086718165960628, |
| "grad_norm": 0.13265877962112427, |
| "learning_rate": 0.0002, |
| "loss": 1.6735, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.041365561923747816, |
| "grad_norm": 0.10281170904636383, |
| "learning_rate": 0.0002, |
| "loss": 1.7079, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.04186394218788936, |
| "grad_norm": 0.9060964584350586, |
| "learning_rate": 0.0002, |
| "loss": 2.0666, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0423623224520309, |
| "grad_norm": 0.6496222615242004, |
| "learning_rate": 0.0002, |
| "loss": 1.7719, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.04286070271617244, |
| "grad_norm": 0.20052167773246765, |
| "learning_rate": 0.0002, |
| "loss": 1.7717, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.04335908298031398, |
| "grad_norm": 0.20841394364833832, |
| "learning_rate": 0.0002, |
| "loss": 1.7548, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.04385746324445552, |
| "grad_norm": 0.14324237406253815, |
| "learning_rate": 0.0002, |
| "loss": 1.6689, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.04435584350859706, |
| "grad_norm": 0.1330689936876297, |
| "learning_rate": 0.0002, |
| "loss": 1.741, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.0448542237727386, |
| "grad_norm": 0.13436254858970642, |
| "learning_rate": 0.0002, |
| "loss": 1.8316, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.04535260403688014, |
| "grad_norm": 0.11558011174201965, |
| "learning_rate": 0.0002, |
| "loss": 1.7094, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.04585098430102168, |
| "grad_norm": 0.13997307419776917, |
| "learning_rate": 0.0002, |
| "loss": 1.7487, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.04634936456516322, |
| "grad_norm": 0.11401030421257019, |
| "learning_rate": 0.0002, |
| "loss": 1.6971, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.04684774482930476, |
| "grad_norm": 0.1490752398967743, |
| "learning_rate": 0.0002, |
| "loss": 1.7318, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.0473461250934463, |
| "grad_norm": 0.10417014360427856, |
| "learning_rate": 0.0002, |
| "loss": 1.6225, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.04784450535758784, |
| "grad_norm": 0.11896169185638428, |
| "learning_rate": 0.0002, |
| "loss": 1.6585, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.04834288562172938, |
| "grad_norm": 0.1187196597456932, |
| "learning_rate": 0.0002, |
| "loss": 1.6665, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.04884126588587092, |
| "grad_norm": 0.10665114969015121, |
| "learning_rate": 0.0002, |
| "loss": 1.7154, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.04933964615001246, |
| "grad_norm": 0.11822202056646347, |
| "learning_rate": 0.0002, |
| "loss": 1.7159, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.049838026414153996, |
| "grad_norm": 0.10062436759471893, |
| "learning_rate": 0.0002, |
| "loss": 1.6696, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05033640667829554, |
| "grad_norm": 0.10343766212463379, |
| "learning_rate": 0.0002, |
| "loss": 1.69, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.05083478694243708, |
| "grad_norm": 0.09872441738843918, |
| "learning_rate": 0.0002, |
| "loss": 1.7566, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.05133316720657862, |
| "grad_norm": 0.08979122340679169, |
| "learning_rate": 0.0002, |
| "loss": 1.6714, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.05183154747072016, |
| "grad_norm": 0.10805679857730865, |
| "learning_rate": 0.0002, |
| "loss": 1.7127, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.0523299277348617, |
| "grad_norm": 0.0966518372297287, |
| "learning_rate": 0.0002, |
| "loss": 1.6586, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.05282830799900324, |
| "grad_norm": 0.6643556952476501, |
| "learning_rate": 0.0002, |
| "loss": 1.906, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.05332668826314478, |
| "grad_norm": 0.14238013327121735, |
| "learning_rate": 0.0002, |
| "loss": 1.7367, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.05382506852728632, |
| "grad_norm": 0.2091197371482849, |
| "learning_rate": 0.0002, |
| "loss": 1.7879, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.05432344879142786, |
| "grad_norm": 0.11703892797231674, |
| "learning_rate": 0.0002, |
| "loss": 1.7743, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.0548218290555694, |
| "grad_norm": 0.15277640521526337, |
| "learning_rate": 0.0002, |
| "loss": 1.6906, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.05532020931971094, |
| "grad_norm": 0.11744142323732376, |
| "learning_rate": 0.0002, |
| "loss": 1.6935, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.05581858958385248, |
| "grad_norm": 0.10640200227499008, |
| "learning_rate": 0.0002, |
| "loss": 1.6654, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.05631696984799402, |
| "grad_norm": 0.10955353826284409, |
| "learning_rate": 0.0002, |
| "loss": 1.7095, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.05681535011213556, |
| "grad_norm": 0.3743372857570648, |
| "learning_rate": 0.0002, |
| "loss": 1.8212, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.0573137303762771, |
| "grad_norm": 0.11817771941423416, |
| "learning_rate": 0.0002, |
| "loss": 1.7246, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.05781211064041864, |
| "grad_norm": 0.10563557595014572, |
| "learning_rate": 0.0002, |
| "loss": 1.6554, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.058310490904560176, |
| "grad_norm": 0.11494623869657516, |
| "learning_rate": 0.0002, |
| "loss": 1.7563, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.05880887116870172, |
| "grad_norm": 0.12262585759162903, |
| "learning_rate": 0.0002, |
| "loss": 1.7416, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.05930725143284326, |
| "grad_norm": 0.09501025080680847, |
| "learning_rate": 0.0002, |
| "loss": 1.7068, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0598056316969848, |
| "grad_norm": 0.15478286147117615, |
| "learning_rate": 0.0002, |
| "loss": 1.8005, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.06030401196112634, |
| "grad_norm": 0.5174306631088257, |
| "learning_rate": 0.0002, |
| "loss": 1.7736, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.06080239222526788, |
| "grad_norm": 0.37489035725593567, |
| "learning_rate": 0.0002, |
| "loss": 1.7367, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.06130077248940942, |
| "grad_norm": 0.10632194578647614, |
| "learning_rate": 0.0002, |
| "loss": 1.6754, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.06179915275355096, |
| "grad_norm": 0.5897635817527771, |
| "learning_rate": 0.0002, |
| "loss": 1.8483, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.0622975330176925, |
| "grad_norm": 0.1104891449213028, |
| "learning_rate": 0.0002, |
| "loss": 1.6705, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.06279591328183404, |
| "grad_norm": 0.171495720744133, |
| "learning_rate": 0.0002, |
| "loss": 1.8345, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.06329429354597559, |
| "grad_norm": 0.2864750921726227, |
| "learning_rate": 0.0002, |
| "loss": 1.6944, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.06379267381011712, |
| "grad_norm": 0.1258823126554489, |
| "learning_rate": 0.0002, |
| "loss": 1.6922, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.06429105407425866, |
| "grad_norm": 0.10813643783330917, |
| "learning_rate": 0.0002, |
| "loss": 1.6886, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.0647894343384002, |
| "grad_norm": 0.12285427749156952, |
| "learning_rate": 0.0002, |
| "loss": 1.712, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06528781460254174, |
| "grad_norm": 0.11049698293209076, |
| "learning_rate": 0.0002, |
| "loss": 1.7107, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.06578619486668327, |
| "grad_norm": 0.4740373492240906, |
| "learning_rate": 0.0002, |
| "loss": 1.8128, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06628457513082482, |
| "grad_norm": 0.11663281917572021, |
| "learning_rate": 0.0002, |
| "loss": 1.7054, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.06678295539496636, |
| "grad_norm": 0.1274426281452179, |
| "learning_rate": 0.0002, |
| "loss": 1.7461, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.0672813356591079, |
| "grad_norm": 0.11273318529129028, |
| "learning_rate": 0.0002, |
| "loss": 1.6195, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06777971592324944, |
| "grad_norm": 0.12240920960903168, |
| "learning_rate": 0.0002, |
| "loss": 1.7528, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.06827809618739097, |
| "grad_norm": 0.1003924235701561, |
| "learning_rate": 0.0002, |
| "loss": 1.5651, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.06877647645153252, |
| "grad_norm": 0.12279325723648071, |
| "learning_rate": 0.0002, |
| "loss": 1.7905, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.06927485671567406, |
| "grad_norm": 0.10567662119865417, |
| "learning_rate": 0.0002, |
| "loss": 1.7437, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.0697732369798156, |
| "grad_norm": 0.0949968695640564, |
| "learning_rate": 0.0002, |
| "loss": 1.7375, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.07027161724395714, |
| "grad_norm": 0.10375083237886429, |
| "learning_rate": 0.0002, |
| "loss": 1.713, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.07076999750809868, |
| "grad_norm": 0.0937686413526535, |
| "learning_rate": 0.0002, |
| "loss": 1.7152, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.07126837777224022, |
| "grad_norm": 0.0981929674744606, |
| "learning_rate": 0.0002, |
| "loss": 1.7116, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.07176675803638176, |
| "grad_norm": 1.1460381746292114, |
| "learning_rate": 0.0002, |
| "loss": 1.9091, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.0722651383005233, |
| "grad_norm": 0.1193133145570755, |
| "learning_rate": 0.0002, |
| "loss": 1.7387, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.07276351856466484, |
| "grad_norm": 0.13854117691516876, |
| "learning_rate": 0.0002, |
| "loss": 1.656, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.07326189882880638, |
| "grad_norm": 0.6005303263664246, |
| "learning_rate": 0.0002, |
| "loss": 1.9014, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.07376027909294793, |
| "grad_norm": 0.13879133760929108, |
| "learning_rate": 0.0002, |
| "loss": 1.7158, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.07425865935708946, |
| "grad_norm": 0.13073574006557465, |
| "learning_rate": 0.0002, |
| "loss": 1.7355, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.074757039621231, |
| "grad_norm": 0.12578125298023224, |
| "learning_rate": 0.0002, |
| "loss": 1.7376, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.07525541988537254, |
| "grad_norm": 0.13024558126926422, |
| "learning_rate": 0.0002, |
| "loss": 1.7675, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.07575380014951408, |
| "grad_norm": 0.12630225718021393, |
| "learning_rate": 0.0002, |
| "loss": 1.6509, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.07625218041365561, |
| "grad_norm": 0.13081084191799164, |
| "learning_rate": 0.0002, |
| "loss": 1.7393, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.07675056067779716, |
| "grad_norm": 0.11292438209056854, |
| "learning_rate": 0.0002, |
| "loss": 1.6533, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0772489409419387, |
| "grad_norm": 0.10187578946352005, |
| "learning_rate": 0.0002, |
| "loss": 1.6915, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.07774732120608024, |
| "grad_norm": 0.10563293844461441, |
| "learning_rate": 0.0002, |
| "loss": 1.7378, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.07824570147022178, |
| "grad_norm": 0.10501443594694138, |
| "learning_rate": 0.0002, |
| "loss": 1.6498, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.07874408173436331, |
| "grad_norm": 0.11756912618875504, |
| "learning_rate": 0.0002, |
| "loss": 1.7963, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.07924246199850486, |
| "grad_norm": 0.1010415181517601, |
| "learning_rate": 0.0002, |
| "loss": 1.6637, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.0797408422626464, |
| "grad_norm": 0.09472226351499557, |
| "learning_rate": 0.0002, |
| "loss": 1.6057, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.08023922252678795, |
| "grad_norm": 0.10156677663326263, |
| "learning_rate": 0.0002, |
| "loss": 1.7573, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.08073760279092948, |
| "grad_norm": 0.09345332533121109, |
| "learning_rate": 0.0002, |
| "loss": 1.6327, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.08123598305507101, |
| "grad_norm": 0.09440191835165024, |
| "learning_rate": 0.0002, |
| "loss": 1.6753, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.08173436331921256, |
| "grad_norm": 0.0925949364900589, |
| "learning_rate": 0.0002, |
| "loss": 1.6786, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.0822327435833541, |
| "grad_norm": 0.09808436781167984, |
| "learning_rate": 0.0002, |
| "loss": 1.75, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.08273112384749563, |
| "grad_norm": 0.10032784938812256, |
| "learning_rate": 0.0002, |
| "loss": 1.6463, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.08322950411163718, |
| "grad_norm": 0.769005298614502, |
| "learning_rate": 0.0002, |
| "loss": 1.8314, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.08372788437577872, |
| "grad_norm": 1.013753890991211, |
| "learning_rate": 0.0002, |
| "loss": 1.9179, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.08422626463992026, |
| "grad_norm": 0.11522974818944931, |
| "learning_rate": 0.0002, |
| "loss": 1.8271, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.0847246449040618, |
| "grad_norm": 0.1381683349609375, |
| "learning_rate": 0.0002, |
| "loss": 1.7015, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.08522302516820333, |
| "grad_norm": 0.13124744594097137, |
| "learning_rate": 0.0002, |
| "loss": 1.7213, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.08572140543234488, |
| "grad_norm": 0.1552695333957672, |
| "learning_rate": 0.0002, |
| "loss": 1.6868, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.08621978569648642, |
| "grad_norm": 0.11559716612100601, |
| "learning_rate": 0.0002, |
| "loss": 1.7474, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.08671816596062797, |
| "grad_norm": 0.11131990700960159, |
| "learning_rate": 0.0002, |
| "loss": 1.6365, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.0872165462247695, |
| "grad_norm": 0.11412417143583298, |
| "learning_rate": 0.0002, |
| "loss": 1.6205, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08771492648891104, |
| "grad_norm": 0.11382830142974854, |
| "learning_rate": 0.0002, |
| "loss": 1.7673, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.08821330675305258, |
| "grad_norm": 0.7038962244987488, |
| "learning_rate": 0.0002, |
| "loss": 1.8568, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.08871168701719412, |
| "grad_norm": 0.11253572255373001, |
| "learning_rate": 0.0002, |
| "loss": 1.7263, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.08921006728133565, |
| "grad_norm": 0.12908123433589935, |
| "learning_rate": 0.0002, |
| "loss": 1.7021, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0897084475454772, |
| "grad_norm": 0.12027324736118317, |
| "learning_rate": 0.0002, |
| "loss": 1.7542, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.09020682780961874, |
| "grad_norm": 0.13822880387306213, |
| "learning_rate": 0.0002, |
| "loss": 1.7947, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.09070520807376029, |
| "grad_norm": 0.11809349060058594, |
| "learning_rate": 0.0002, |
| "loss": 1.7438, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.09120358833790182, |
| "grad_norm": 0.11567198485136032, |
| "learning_rate": 0.0002, |
| "loss": 1.7006, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.09170196860204335, |
| "grad_norm": 0.11884818226099014, |
| "learning_rate": 0.0002, |
| "loss": 1.7481, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.0922003488661849, |
| "grad_norm": 0.13118627667427063, |
| "learning_rate": 0.0002, |
| "loss": 1.7579, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.09269872913032644, |
| "grad_norm": 0.10780288279056549, |
| "learning_rate": 0.0002, |
| "loss": 1.7563, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.09319710939446797, |
| "grad_norm": 0.1052689403295517, |
| "learning_rate": 0.0002, |
| "loss": 1.7176, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.09369548965860952, |
| "grad_norm": 0.11142247915267944, |
| "learning_rate": 0.0002, |
| "loss": 1.6998, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.09419386992275106, |
| "grad_norm": 0.11082904785871506, |
| "learning_rate": 0.0002, |
| "loss": 1.7492, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.0946922501868926, |
| "grad_norm": 0.09668837487697601, |
| "learning_rate": 0.0002, |
| "loss": 1.6655, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.09519063045103414, |
| "grad_norm": 0.09926537424325943, |
| "learning_rate": 0.0002, |
| "loss": 1.7393, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.09568901071517567, |
| "grad_norm": 0.09865368157625198, |
| "learning_rate": 0.0002, |
| "loss": 1.7538, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.09618739097931722, |
| "grad_norm": 0.10074108839035034, |
| "learning_rate": 0.0002, |
| "loss": 1.7556, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.09668577124345876, |
| "grad_norm": 0.11467942595481873, |
| "learning_rate": 0.0002, |
| "loss": 1.7414, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.0971841515076003, |
| "grad_norm": 0.09638036042451859, |
| "learning_rate": 0.0002, |
| "loss": 1.7296, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.09768253177174184, |
| "grad_norm": 0.09951262921094894, |
| "learning_rate": 0.0002, |
| "loss": 1.6691, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.09818091203588337, |
| "grad_norm": 0.09425103664398193, |
| "learning_rate": 0.0002, |
| "loss": 1.6563, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.09867929230002492, |
| "grad_norm": 0.09163974225521088, |
| "learning_rate": 0.0002, |
| "loss": 1.6591, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.09917767256416646, |
| "grad_norm": 0.10825615376234055, |
| "learning_rate": 0.0002, |
| "loss": 1.6748, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.09967605282830799, |
| "grad_norm": 0.08873865008354187, |
| "learning_rate": 0.0002, |
| "loss": 1.7027, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.10017443309244954, |
| "grad_norm": 0.09379550069570541, |
| "learning_rate": 0.0002, |
| "loss": 1.7475, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.10067281335659108, |
| "grad_norm": 0.09395930916070938, |
| "learning_rate": 0.0002, |
| "loss": 1.7183, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.10117119362073262, |
| "grad_norm": 0.09373954683542252, |
| "learning_rate": 0.0002, |
| "loss": 1.7413, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.10166957388487416, |
| "grad_norm": 0.0926884338259697, |
| "learning_rate": 0.0002, |
| "loss": 1.7284, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.1021679541490157, |
| "grad_norm": 0.09394028782844543, |
| "learning_rate": 0.0002, |
| "loss": 1.6777, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.10266633441315724, |
| "grad_norm": 0.0934232845902443, |
| "learning_rate": 0.0002, |
| "loss": 1.6389, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.10316471467729878, |
| "grad_norm": 0.08943123370409012, |
| "learning_rate": 0.0002, |
| "loss": 1.7382, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.10366309494144033, |
| "grad_norm": 0.09671316295862198, |
| "learning_rate": 0.0002, |
| "loss": 1.7017, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.10416147520558186, |
| "grad_norm": 0.12016978114843369, |
| "learning_rate": 0.0002, |
| "loss": 1.7993, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.1046598554697234, |
| "grad_norm": 0.5822897553443909, |
| "learning_rate": 0.0002, |
| "loss": 1.6948, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.10515823573386494, |
| "grad_norm": 0.10984666645526886, |
| "learning_rate": 0.0002, |
| "loss": 1.703, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.10565661599800648, |
| "grad_norm": 0.661040186882019, |
| "learning_rate": 0.0002, |
| "loss": 1.7008, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.10615499626214801, |
| "grad_norm": 0.1641639620065689, |
| "learning_rate": 0.0002, |
| "loss": 1.8105, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.10665337652628956, |
| "grad_norm": 0.34271761775016785, |
| "learning_rate": 0.0002, |
| "loss": 1.7768, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1071517567904311, |
| "grad_norm": 0.11224206537008286, |
| "learning_rate": 0.0002, |
| "loss": 1.7126, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.10765013705457264, |
| "grad_norm": 0.11788146197795868, |
| "learning_rate": 0.0002, |
| "loss": 1.7617, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.10814851731871418, |
| "grad_norm": 0.10918893665075302, |
| "learning_rate": 0.0002, |
| "loss": 1.6258, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.10864689758285571, |
| "grad_norm": 0.12023265659809113, |
| "learning_rate": 0.0002, |
| "loss": 1.7459, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.10914527784699726, |
| "grad_norm": 0.11474837362766266, |
| "learning_rate": 0.0002, |
| "loss": 1.749, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.1096436581111388, |
| "grad_norm": 0.10222747921943665, |
| "learning_rate": 0.0002, |
| "loss": 1.696, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.11014203837528033, |
| "grad_norm": 0.1074354350566864, |
| "learning_rate": 0.0002, |
| "loss": 1.708, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.11064041863942188, |
| "grad_norm": 0.5447832345962524, |
| "learning_rate": 0.0002, |
| "loss": 1.8402, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.11113879890356342, |
| "grad_norm": 0.12009864300489426, |
| "learning_rate": 0.0002, |
| "loss": 1.7412, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.11163717916770496, |
| "grad_norm": 0.11686031520366669, |
| "learning_rate": 0.0002, |
| "loss": 1.7185, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.1121355594318465, |
| "grad_norm": 0.12914586067199707, |
| "learning_rate": 0.0002, |
| "loss": 1.6867, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.11263393969598803, |
| "grad_norm": 0.10797183215618134, |
| "learning_rate": 0.0002, |
| "loss": 1.706, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.11313231996012958, |
| "grad_norm": 0.1088324561715126, |
| "learning_rate": 0.0002, |
| "loss": 1.6257, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.11363070022427112, |
| "grad_norm": 0.10438574105501175, |
| "learning_rate": 0.0002, |
| "loss": 1.6798, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.11412908048841267, |
| "grad_norm": 0.14163640141487122, |
| "learning_rate": 0.0002, |
| "loss": 1.785, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.1146274607525542, |
| "grad_norm": 0.10191742330789566, |
| "learning_rate": 0.0002, |
| "loss": 1.6979, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.11512584101669573, |
| "grad_norm": 0.11547041684389114, |
| "learning_rate": 0.0002, |
| "loss": 1.7793, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.11562422128083728, |
| "grad_norm": 0.10447453707456589, |
| "learning_rate": 0.0002, |
| "loss": 1.7791, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.11612260154497882, |
| "grad_norm": 0.10447558760643005, |
| "learning_rate": 0.0002, |
| "loss": 1.6799, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.11662098180912035, |
| "grad_norm": 0.10260461270809174, |
| "learning_rate": 0.0002, |
| "loss": 1.6561, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.1171193620732619, |
| "grad_norm": 0.10199354588985443, |
| "learning_rate": 0.0002, |
| "loss": 1.6476, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.11761774233740344, |
| "grad_norm": 0.09869713336229324, |
| "learning_rate": 0.0002, |
| "loss": 1.6183, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.11811612260154498, |
| "grad_norm": 0.9354596138000488, |
| "learning_rate": 0.0002, |
| "loss": 1.9584, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.11861450286568652, |
| "grad_norm": 0.15785987675189972, |
| "learning_rate": 0.0002, |
| "loss": 1.718, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.11911288312982805, |
| "grad_norm": 0.16236662864685059, |
| "learning_rate": 0.0002, |
| "loss": 1.7275, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1196112633939696, |
| "grad_norm": 0.1407175064086914, |
| "learning_rate": 0.0002, |
| "loss": 1.6987, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.12010964365811114, |
| "grad_norm": 0.13428977131843567, |
| "learning_rate": 0.0002, |
| "loss": 1.6998, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.12060802392225269, |
| "grad_norm": 0.5954437255859375, |
| "learning_rate": 0.0002, |
| "loss": 1.7536, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.12110640418639422, |
| "grad_norm": 0.12084382027387619, |
| "learning_rate": 0.0002, |
| "loss": 1.6446, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.12160478445053576, |
| "grad_norm": 0.12887060642242432, |
| "learning_rate": 0.0002, |
| "loss": 1.6994, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.1221031647146773, |
| "grad_norm": 0.12585604190826416, |
| "learning_rate": 0.0002, |
| "loss": 1.6705, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.12260154497881884, |
| "grad_norm": 0.11495430767536163, |
| "learning_rate": 0.0002, |
| "loss": 1.6833, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.12309992524296037, |
| "grad_norm": 0.36918768286705017, |
| "learning_rate": 0.0002, |
| "loss": 1.8354, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.12359830550710192, |
| "grad_norm": 0.1330924779176712, |
| "learning_rate": 0.0002, |
| "loss": 1.6915, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.12409668577124346, |
| "grad_norm": 0.6573293805122375, |
| "learning_rate": 0.0002, |
| "loss": 1.7672, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.124595066035385, |
| "grad_norm": 0.13000234961509705, |
| "learning_rate": 0.0002, |
| "loss": 1.6639, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.12509344629952654, |
| "grad_norm": 0.14653077721595764, |
| "learning_rate": 0.0002, |
| "loss": 1.7126, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.12559182656366807, |
| "grad_norm": 0.13498292863368988, |
| "learning_rate": 0.0002, |
| "loss": 1.6848, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1260902068278096, |
| "grad_norm": 0.13268351554870605, |
| "learning_rate": 0.0002, |
| "loss": 1.7338, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.12658858709195117, |
| "grad_norm": 0.1395343542098999, |
| "learning_rate": 0.0002, |
| "loss": 1.7099, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.1270869673560927, |
| "grad_norm": 0.1279151439666748, |
| "learning_rate": 0.0002, |
| "loss": 1.7156, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.12758534762023424, |
| "grad_norm": 0.112457275390625, |
| "learning_rate": 0.0002, |
| "loss": 1.7054, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.12808372788437578, |
| "grad_norm": 0.11672843992710114, |
| "learning_rate": 0.0002, |
| "loss": 1.6895, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.1285821081485173, |
| "grad_norm": 0.1295323520898819, |
| "learning_rate": 0.0002, |
| "loss": 1.6738, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.12908048841265887, |
| "grad_norm": 0.10538823157548904, |
| "learning_rate": 0.0002, |
| "loss": 1.626, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.1295788686768004, |
| "grad_norm": 0.1093951016664505, |
| "learning_rate": 0.0002, |
| "loss": 1.6494, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.13007724894094194, |
| "grad_norm": 0.10753627866506577, |
| "learning_rate": 0.0002, |
| "loss": 1.7058, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.13057562920508348, |
| "grad_norm": 0.11015735566616058, |
| "learning_rate": 0.0002, |
| "loss": 1.7519, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.131074009469225, |
| "grad_norm": 0.10606027394533157, |
| "learning_rate": 0.0002, |
| "loss": 1.6725, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.13157238973336655, |
| "grad_norm": 0.09919940680265427, |
| "learning_rate": 0.0002, |
| "loss": 1.6522, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1320707699975081, |
| "grad_norm": 0.1004357561469078, |
| "learning_rate": 0.0002, |
| "loss": 1.7, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.13256915026164964, |
| "grad_norm": 0.1044403687119484, |
| "learning_rate": 0.0002, |
| "loss": 1.7131, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.13306753052579118, |
| "grad_norm": 0.09830351173877716, |
| "learning_rate": 0.0002, |
| "loss": 1.7057, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.1335659107899327, |
| "grad_norm": 0.09731124341487885, |
| "learning_rate": 0.0002, |
| "loss": 1.6696, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.13406429105407425, |
| "grad_norm": 0.09874913096427917, |
| "learning_rate": 0.0002, |
| "loss": 1.6704, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.1345626713182158, |
| "grad_norm": 1.0015792846679688, |
| "learning_rate": 0.0002, |
| "loss": 1.828, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.13506105158235734, |
| "grad_norm": 0.15942072868347168, |
| "learning_rate": 0.0002, |
| "loss": 1.6851, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.13555943184649888, |
| "grad_norm": 0.1272728443145752, |
| "learning_rate": 0.0002, |
| "loss": 1.6946, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.1360578121106404, |
| "grad_norm": 0.13415473699569702, |
| "learning_rate": 0.0002, |
| "loss": 1.6865, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.13655619237478195, |
| "grad_norm": 0.6600972414016724, |
| "learning_rate": 0.0002, |
| "loss": 1.845, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1370545726389235, |
| "grad_norm": 0.16784119606018066, |
| "learning_rate": 0.0002, |
| "loss": 1.8104, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.13755295290306505, |
| "grad_norm": 0.14813649654388428, |
| "learning_rate": 0.0002, |
| "loss": 1.7188, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.13805133316720658, |
| "grad_norm": 0.14158020913600922, |
| "learning_rate": 0.0002, |
| "loss": 1.7002, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.13854971343134811, |
| "grad_norm": 0.48206424713134766, |
| "learning_rate": 0.0002, |
| "loss": 1.8617, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.13904809369548965, |
| "grad_norm": 0.18177767097949982, |
| "learning_rate": 0.0002, |
| "loss": 1.7111, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.1395464739596312, |
| "grad_norm": 0.12430819869041443, |
| "learning_rate": 0.0002, |
| "loss": 1.6939, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.14004485422377275, |
| "grad_norm": 0.44922658801078796, |
| "learning_rate": 0.0002, |
| "loss": 1.7779, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.14054323448791428, |
| "grad_norm": 0.14023765921592712, |
| "learning_rate": 0.0002, |
| "loss": 1.6521, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.14104161475205582, |
| "grad_norm": 0.15241369605064392, |
| "learning_rate": 0.0002, |
| "loss": 1.6819, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.14153999501619735, |
| "grad_norm": 0.12531667947769165, |
| "learning_rate": 0.0002, |
| "loss": 1.7014, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.14203837528033889, |
| "grad_norm": 0.13596689701080322, |
| "learning_rate": 0.0002, |
| "loss": 1.6841, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.14253675554448045, |
| "grad_norm": 0.1316744089126587, |
| "learning_rate": 0.0002, |
| "loss": 1.7503, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.14303513580862198, |
| "grad_norm": 0.11584890633821487, |
| "learning_rate": 0.0002, |
| "loss": 1.6776, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.14353351607276352, |
| "grad_norm": 0.37444308400154114, |
| "learning_rate": 0.0002, |
| "loss": 1.7808, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.14403189633690505, |
| "grad_norm": 0.3217577338218689, |
| "learning_rate": 0.0002, |
| "loss": 1.6491, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.1445302766010466, |
| "grad_norm": 0.12234029918909073, |
| "learning_rate": 0.0002, |
| "loss": 1.7131, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.14502865686518815, |
| "grad_norm": 0.13871504366397858, |
| "learning_rate": 0.0002, |
| "loss": 1.7737, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.14552703712932968, |
| "grad_norm": 0.10792572051286697, |
| "learning_rate": 0.0002, |
| "loss": 1.7162, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.14602541739347122, |
| "grad_norm": 0.11277946084737778, |
| "learning_rate": 0.0002, |
| "loss": 1.666, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.14652379765761275, |
| "grad_norm": 0.11250103265047073, |
| "learning_rate": 0.0002, |
| "loss": 1.7334, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.1470221779217543, |
| "grad_norm": 0.10644537955522537, |
| "learning_rate": 0.0002, |
| "loss": 1.6836, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.14752055818589585, |
| "grad_norm": 0.12423089891672134, |
| "learning_rate": 0.0002, |
| "loss": 1.7349, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.14801893845003739, |
| "grad_norm": 0.10547474026679993, |
| "learning_rate": 0.0002, |
| "loss": 1.6783, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.14851731871417892, |
| "grad_norm": 0.10867539793252945, |
| "learning_rate": 0.0002, |
| "loss": 1.6709, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.14901569897832045, |
| "grad_norm": 0.21218198537826538, |
| "learning_rate": 0.0002, |
| "loss": 1.6717, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.149514079242462, |
| "grad_norm": 0.11373799294233322, |
| "learning_rate": 0.0002, |
| "loss": 1.7398, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.15001245950660355, |
| "grad_norm": 0.12452666461467743, |
| "learning_rate": 0.0002, |
| "loss": 1.7625, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.1505108397707451, |
| "grad_norm": 0.4068242609500885, |
| "learning_rate": 0.0002, |
| "loss": 1.7357, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.15100922003488662, |
| "grad_norm": 0.15395419299602509, |
| "learning_rate": 0.0002, |
| "loss": 1.6878, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.15150760029902816, |
| "grad_norm": 0.11441215127706528, |
| "learning_rate": 0.0002, |
| "loss": 1.7055, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.1520059805631697, |
| "grad_norm": 0.13675518333911896, |
| "learning_rate": 0.0002, |
| "loss": 1.7005, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.15250436082731123, |
| "grad_norm": 0.11606375873088837, |
| "learning_rate": 0.0002, |
| "loss": 1.6453, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.1530027410914528, |
| "grad_norm": 0.4435337483882904, |
| "learning_rate": 0.0002, |
| "loss": 1.7435, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.15350112135559432, |
| "grad_norm": 0.12212298810482025, |
| "learning_rate": 0.0002, |
| "loss": 1.705, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.15399950161973586, |
| "grad_norm": 0.14606495201587677, |
| "learning_rate": 0.0002, |
| "loss": 1.6517, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1544978818838774, |
| "grad_norm": 0.11753024160861969, |
| "learning_rate": 0.0002, |
| "loss": 1.7427, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.15499626214801893, |
| "grad_norm": 0.13007789850234985, |
| "learning_rate": 0.0002, |
| "loss": 1.7462, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.1554946424121605, |
| "grad_norm": 0.11651528626680374, |
| "learning_rate": 0.0002, |
| "loss": 1.7128, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.15599302267630202, |
| "grad_norm": 0.1128389984369278, |
| "learning_rate": 0.0002, |
| "loss": 1.6977, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.15649140294044356, |
| "grad_norm": 0.10965872555971146, |
| "learning_rate": 0.0002, |
| "loss": 1.6578, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.1569897832045851, |
| "grad_norm": 0.10751237720251083, |
| "learning_rate": 0.0002, |
| "loss": 1.6346, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.15748816346872663, |
| "grad_norm": 0.09646358340978622, |
| "learning_rate": 0.0002, |
| "loss": 1.6873, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.1579865437328682, |
| "grad_norm": 0.09908836334943771, |
| "learning_rate": 0.0002, |
| "loss": 1.6934, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.15848492399700972, |
| "grad_norm": 0.09631779044866562, |
| "learning_rate": 0.0002, |
| "loss": 1.6703, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.15898330426115126, |
| "grad_norm": 0.5702200531959534, |
| "learning_rate": 0.0002, |
| "loss": 1.7651, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.1594816845252928, |
| "grad_norm": 0.1274351179599762, |
| "learning_rate": 0.0002, |
| "loss": 1.6632, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.15998006478943433, |
| "grad_norm": 0.10685572028160095, |
| "learning_rate": 0.0002, |
| "loss": 1.6691, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.1604784450535759, |
| "grad_norm": 0.12333345413208008, |
| "learning_rate": 0.0002, |
| "loss": 1.6811, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.16097682531771743, |
| "grad_norm": 0.10747205466032028, |
| "learning_rate": 0.0002, |
| "loss": 1.6292, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.16147520558185896, |
| "grad_norm": 0.10506169497966766, |
| "learning_rate": 0.0002, |
| "loss": 1.7463, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.1619735858460005, |
| "grad_norm": 0.11267457902431488, |
| "learning_rate": 0.0002, |
| "loss": 1.7192, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.16247196611014203, |
| "grad_norm": 0.10924848914146423, |
| "learning_rate": 0.0002, |
| "loss": 1.7146, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.16297034637428356, |
| "grad_norm": 0.11103785783052444, |
| "learning_rate": 0.0002, |
| "loss": 1.6215, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.16346872663842513, |
| "grad_norm": 0.3997076451778412, |
| "learning_rate": 0.0002, |
| "loss": 1.8753, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.16396710690256666, |
| "grad_norm": 0.10188498347997665, |
| "learning_rate": 0.0002, |
| "loss": 1.7483, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.1644654871667082, |
| "grad_norm": 0.10824645310640335, |
| "learning_rate": 0.0002, |
| "loss": 1.6828, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.16496386743084973, |
| "grad_norm": 0.09962976723909378, |
| "learning_rate": 0.0002, |
| "loss": 1.7127, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.16546224769499127, |
| "grad_norm": 0.10796276479959488, |
| "learning_rate": 0.0002, |
| "loss": 1.6799, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.16596062795913283, |
| "grad_norm": 0.09546298533678055, |
| "learning_rate": 0.0002, |
| "loss": 1.736, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.16645900822327436, |
| "grad_norm": 0.3045598864555359, |
| "learning_rate": 0.0002, |
| "loss": 1.6192, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.1669573884874159, |
| "grad_norm": 0.10275569558143616, |
| "learning_rate": 0.0002, |
| "loss": 1.7551, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.16745576875155743, |
| "grad_norm": 0.14451362192630768, |
| "learning_rate": 0.0002, |
| "loss": 1.7094, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.16795414901569897, |
| "grad_norm": 0.0982123464345932, |
| "learning_rate": 0.0002, |
| "loss": 1.6996, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.16845252927984053, |
| "grad_norm": 0.11521178483963013, |
| "learning_rate": 0.0002, |
| "loss": 1.6409, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.16895090954398206, |
| "grad_norm": 0.2746621072292328, |
| "learning_rate": 0.0002, |
| "loss": 1.7035, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.1694492898081236, |
| "grad_norm": 0.0955624207854271, |
| "learning_rate": 0.0002, |
| "loss": 1.6689, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.16994767007226513, |
| "grad_norm": 0.10157962888479233, |
| "learning_rate": 0.0002, |
| "loss": 1.6561, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.17044605033640667, |
| "grad_norm": 0.0971306711435318, |
| "learning_rate": 0.0002, |
| "loss": 1.7626, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.17094443060054823, |
| "grad_norm": 0.10407841205596924, |
| "learning_rate": 0.0002, |
| "loss": 1.681, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.17144281086468977, |
| "grad_norm": 0.09228493273258209, |
| "learning_rate": 0.0002, |
| "loss": 1.6196, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.1719411911288313, |
| "grad_norm": 0.10309567302465439, |
| "learning_rate": 0.0002, |
| "loss": 1.6534, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.17243957139297283, |
| "grad_norm": 0.10019028931856155, |
| "learning_rate": 0.0002, |
| "loss": 1.7315, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.17293795165711437, |
| "grad_norm": 0.09051994234323502, |
| "learning_rate": 0.0002, |
| "loss": 1.6537, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.17343633192125593, |
| "grad_norm": 0.09501929581165314, |
| "learning_rate": 0.0002, |
| "loss": 1.681, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.17393471218539747, |
| "grad_norm": 0.09314325451850891, |
| "learning_rate": 0.0002, |
| "loss": 1.6141, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.174433092449539, |
| "grad_norm": 0.09021347016096115, |
| "learning_rate": 0.0002, |
| "loss": 1.6864, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.17493147271368054, |
| "grad_norm": 0.27376627922058105, |
| "learning_rate": 0.0002, |
| "loss": 1.7223, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.17542985297782207, |
| "grad_norm": 0.11608853936195374, |
| "learning_rate": 0.0002, |
| "loss": 1.6974, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.1759282332419636, |
| "grad_norm": 0.09565002471208572, |
| "learning_rate": 0.0002, |
| "loss": 1.6925, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.17642661350610517, |
| "grad_norm": 0.10814974457025528, |
| "learning_rate": 0.0002, |
| "loss": 1.6349, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.1769249937702467, |
| "grad_norm": 0.09551705420017242, |
| "learning_rate": 0.0002, |
| "loss": 1.6715, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.17742337403438824, |
| "grad_norm": 0.10541266202926636, |
| "learning_rate": 0.0002, |
| "loss": 1.6592, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.17792175429852977, |
| "grad_norm": 0.09884203970432281, |
| "learning_rate": 0.0002, |
| "loss": 1.638, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.1784201345626713, |
| "grad_norm": 0.19244062900543213, |
| "learning_rate": 0.0002, |
| "loss": 1.6823, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.17891851482681287, |
| "grad_norm": 0.1312815397977829, |
| "learning_rate": 0.0002, |
| "loss": 1.747, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.1794168950909544, |
| "grad_norm": 0.10575084388256073, |
| "learning_rate": 0.0002, |
| "loss": 1.6958, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.17991527535509594, |
| "grad_norm": 0.1993856579065323, |
| "learning_rate": 0.0002, |
| "loss": 1.5862, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.18041365561923747, |
| "grad_norm": 0.1053745448589325, |
| "learning_rate": 0.0002, |
| "loss": 1.705, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.180912035883379, |
| "grad_norm": 0.10017159581184387, |
| "learning_rate": 0.0002, |
| "loss": 1.6565, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.18141041614752057, |
| "grad_norm": 0.12066628038883209, |
| "learning_rate": 0.0002, |
| "loss": 1.639, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.1819087964116621, |
| "grad_norm": 0.12606841325759888, |
| "learning_rate": 0.0002, |
| "loss": 1.8435, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.18240717667580364, |
| "grad_norm": 0.10491355508565903, |
| "learning_rate": 0.0002, |
| "loss": 1.5846, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.18290555693994517, |
| "grad_norm": 0.10337149351835251, |
| "learning_rate": 0.0002, |
| "loss": 1.6903, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.1834039372040867, |
| "grad_norm": 0.09452168643474579, |
| "learning_rate": 0.0002, |
| "loss": 1.6865, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.18390231746822827, |
| "grad_norm": 0.09799271076917648, |
| "learning_rate": 0.0002, |
| "loss": 1.6343, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.1844006977323698, |
| "grad_norm": 0.09442919492721558, |
| "learning_rate": 0.0002, |
| "loss": 1.6266, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.18489907799651134, |
| "grad_norm": 0.09542658925056458, |
| "learning_rate": 0.0002, |
| "loss": 1.612, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.18539745826065288, |
| "grad_norm": 0.0989847183227539, |
| "learning_rate": 0.0002, |
| "loss": 1.6957, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.1858958385247944, |
| "grad_norm": 0.09289655089378357, |
| "learning_rate": 0.0002, |
| "loss": 1.6501, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.18639421878893594, |
| "grad_norm": 0.10097731649875641, |
| "learning_rate": 0.0002, |
| "loss": 1.7114, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.1868925990530775, |
| "grad_norm": 0.09352610260248184, |
| "learning_rate": 0.0002, |
| "loss": 1.7375, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.18739097931721904, |
| "grad_norm": 0.0907459631562233, |
| "learning_rate": 0.0002, |
| "loss": 1.651, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.18788935958136058, |
| "grad_norm": 0.0915813073515892, |
| "learning_rate": 0.0002, |
| "loss": 1.6289, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.1883877398455021, |
| "grad_norm": 0.09011110663414001, |
| "learning_rate": 0.0002, |
| "loss": 1.7024, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.18888612010964365, |
| "grad_norm": 0.4069153964519501, |
| "learning_rate": 0.0002, |
| "loss": 1.6647, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.1893845003737852, |
| "grad_norm": 0.1351984292268753, |
| "learning_rate": 0.0002, |
| "loss": 1.7911, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.18988288063792674, |
| "grad_norm": 0.537133514881134, |
| "learning_rate": 0.0002, |
| "loss": 1.75, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.19038126090206828, |
| "grad_norm": 0.10901357978582382, |
| "learning_rate": 0.0002, |
| "loss": 1.6767, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.1908796411662098, |
| "grad_norm": 0.19000430405139923, |
| "learning_rate": 0.0002, |
| "loss": 1.6682, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.19137802143035135, |
| "grad_norm": 0.12100650370121002, |
| "learning_rate": 0.0002, |
| "loss": 1.6844, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.1918764016944929, |
| "grad_norm": 0.12487197667360306, |
| "learning_rate": 0.0002, |
| "loss": 1.7239, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.19237478195863444, |
| "grad_norm": 0.12008525431156158, |
| "learning_rate": 0.0002, |
| "loss": 1.6443, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.19287316222277598, |
| "grad_norm": 0.119840107858181, |
| "learning_rate": 0.0002, |
| "loss": 1.6271, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.1933715424869175, |
| "grad_norm": 0.1126130223274231, |
| "learning_rate": 0.0002, |
| "loss": 1.681, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.19386992275105905, |
| "grad_norm": 0.11164896190166473, |
| "learning_rate": 0.0002, |
| "loss": 1.6586, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.1943683030152006, |
| "grad_norm": 0.1496819108724594, |
| "learning_rate": 0.0002, |
| "loss": 1.6856, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.19486668327934215, |
| "grad_norm": 0.09984704852104187, |
| "learning_rate": 0.0002, |
| "loss": 1.6656, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.19536506354348368, |
| "grad_norm": 0.10864219069480896, |
| "learning_rate": 0.0002, |
| "loss": 1.659, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.19586344380762521, |
| "grad_norm": 0.09744228422641754, |
| "learning_rate": 0.0002, |
| "loss": 1.6162, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.19636182407176675, |
| "grad_norm": 0.11409466713666916, |
| "learning_rate": 0.0002, |
| "loss": 1.6646, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.19686020433590828, |
| "grad_norm": 0.096027672290802, |
| "learning_rate": 0.0002, |
| "loss": 1.6464, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.19735858460004985, |
| "grad_norm": 0.48993775248527527, |
| "learning_rate": 0.0002, |
| "loss": 1.7454, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.19785696486419138, |
| "grad_norm": 0.11972647160291672, |
| "learning_rate": 0.0002, |
| "loss": 1.6958, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.19835534512833292, |
| "grad_norm": 0.49595576524734497, |
| "learning_rate": 0.0002, |
| "loss": 1.6128, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.19885372539247445, |
| "grad_norm": 0.11590411514043808, |
| "learning_rate": 0.0002, |
| "loss": 1.7173, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.19935210565661599, |
| "grad_norm": 0.11584487557411194, |
| "learning_rate": 0.0002, |
| "loss": 1.6773, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.19985048592075755, |
| "grad_norm": 0.1017480343580246, |
| "learning_rate": 0.0002, |
| "loss": 1.6388, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.20034886618489908, |
| "grad_norm": 0.12011077255010605, |
| "learning_rate": 0.0002, |
| "loss": 1.707, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.20084724644904062, |
| "grad_norm": 0.36016201972961426, |
| "learning_rate": 0.0002, |
| "loss": 1.8179, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.20134562671318215, |
| "grad_norm": 0.11278028786182404, |
| "learning_rate": 0.0002, |
| "loss": 1.6733, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.2018440069773237, |
| "grad_norm": 0.10928738862276077, |
| "learning_rate": 0.0002, |
| "loss": 1.6858, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.20234238724146525, |
| "grad_norm": 0.10860306769609451, |
| "learning_rate": 0.0002, |
| "loss": 1.6975, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.20284076750560678, |
| "grad_norm": 0.11352024972438812, |
| "learning_rate": 0.0002, |
| "loss": 1.7504, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.20333914776974832, |
| "grad_norm": 0.10320567339658737, |
| "learning_rate": 0.0002, |
| "loss": 1.6715, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.20383752803388985, |
| "grad_norm": 0.12056868523359299, |
| "learning_rate": 0.0002, |
| "loss": 1.7571, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.2043359082980314, |
| "grad_norm": 0.11091714352369308, |
| "learning_rate": 0.0002, |
| "loss": 1.6391, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.20483428856217295, |
| "grad_norm": 0.10888761281967163, |
| "learning_rate": 0.0002, |
| "loss": 1.6763, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.20533266882631449, |
| "grad_norm": 0.2625375986099243, |
| "learning_rate": 0.0002, |
| "loss": 1.58, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.20583104909045602, |
| "grad_norm": 0.12070990353822708, |
| "learning_rate": 0.0002, |
| "loss": 1.7437, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.20632942935459755, |
| "grad_norm": 0.09670402854681015, |
| "learning_rate": 0.0002, |
| "loss": 1.6502, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.2068278096187391, |
| "grad_norm": 0.10343360900878906, |
| "learning_rate": 0.0002, |
| "loss": 1.7273, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.20732618988288065, |
| "grad_norm": 0.10445055365562439, |
| "learning_rate": 0.0002, |
| "loss": 1.674, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.2078245701470222, |
| "grad_norm": 0.24325382709503174, |
| "learning_rate": 0.0002, |
| "loss": 1.7492, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.20832295041116372, |
| "grad_norm": 0.10541153699159622, |
| "learning_rate": 0.0002, |
| "loss": 1.6389, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.20882133067530526, |
| "grad_norm": 0.09688902646303177, |
| "learning_rate": 0.0002, |
| "loss": 1.7145, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.2093197109394468, |
| "grad_norm": 0.10568691790103912, |
| "learning_rate": 0.0002, |
| "loss": 1.6699, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.20981809120358832, |
| "grad_norm": 0.09683585166931152, |
| "learning_rate": 0.0002, |
| "loss": 1.6411, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.2103164714677299, |
| "grad_norm": 0.10286644101142883, |
| "learning_rate": 0.0002, |
| "loss": 1.6951, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.21081485173187142, |
| "grad_norm": 0.09786178171634674, |
| "learning_rate": 0.0002, |
| "loss": 1.6316, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.21131323199601296, |
| "grad_norm": 0.10202211886644363, |
| "learning_rate": 0.0002, |
| "loss": 1.6702, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.2118116122601545, |
| "grad_norm": 0.10444546490907669, |
| "learning_rate": 0.0002, |
| "loss": 1.6371, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.21230999252429603, |
| "grad_norm": 0.09346964955329895, |
| "learning_rate": 0.0002, |
| "loss": 1.6638, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.2128083727884376, |
| "grad_norm": 0.09578395634889603, |
| "learning_rate": 0.0002, |
| "loss": 1.622, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.21330675305257912, |
| "grad_norm": 0.09412133693695068, |
| "learning_rate": 0.0002, |
| "loss": 1.6292, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.21380513331672066, |
| "grad_norm": 0.49985215067863464, |
| "learning_rate": 0.0002, |
| "loss": 1.7932, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.2143035135808622, |
| "grad_norm": 0.58636075258255, |
| "learning_rate": 0.0002, |
| "loss": 1.7671, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.21480189384500373, |
| "grad_norm": 0.12334456294775009, |
| "learning_rate": 0.0002, |
| "loss": 1.6392, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.2153002741091453, |
| "grad_norm": 0.13144731521606445, |
| "learning_rate": 0.0002, |
| "loss": 1.6686, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.21579865437328682, |
| "grad_norm": 0.14804112911224365, |
| "learning_rate": 0.0002, |
| "loss": 1.7357, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.21629703463742836, |
| "grad_norm": 0.7628450393676758, |
| "learning_rate": 0.0002, |
| "loss": 1.8465, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.2167954149015699, |
| "grad_norm": 0.18024517595767975, |
| "learning_rate": 0.0002, |
| "loss": 1.6732, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.21729379516571143, |
| "grad_norm": 0.195417121052742, |
| "learning_rate": 0.0002, |
| "loss": 1.7811, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.217792175429853, |
| "grad_norm": 0.28199324011802673, |
| "learning_rate": 0.0002, |
| "loss": 1.6088, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.21829055569399453, |
| "grad_norm": 0.15422897040843964, |
| "learning_rate": 0.0002, |
| "loss": 1.7555, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.21878893595813606, |
| "grad_norm": 0.13214194774627686, |
| "learning_rate": 0.0002, |
| "loss": 1.6575, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.2192873162222776, |
| "grad_norm": 0.14797765016555786, |
| "learning_rate": 0.0002, |
| "loss": 1.7903, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.21978569648641913, |
| "grad_norm": 0.12424055486917496, |
| "learning_rate": 0.0002, |
| "loss": 1.7089, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.22028407675056066, |
| "grad_norm": 0.5921161770820618, |
| "learning_rate": 0.0002, |
| "loss": 1.7352, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.22078245701470223, |
| "grad_norm": 0.1724957525730133, |
| "learning_rate": 0.0002, |
| "loss": 1.7427, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.22128083727884376, |
| "grad_norm": 0.1341264247894287, |
| "learning_rate": 0.0002, |
| "loss": 1.6738, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2217792175429853, |
| "grad_norm": 0.43373820185661316, |
| "learning_rate": 0.0002, |
| "loss": 1.7591, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.22227759780712683, |
| "grad_norm": 0.15030571818351746, |
| "learning_rate": 0.0002, |
| "loss": 1.7306, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.22277597807126837, |
| "grad_norm": 0.15096893906593323, |
| "learning_rate": 0.0002, |
| "loss": 1.7637, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.22327435833540993, |
| "grad_norm": 0.1577889323234558, |
| "learning_rate": 0.0002, |
| "loss": 1.6704, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.22377273859955146, |
| "grad_norm": 0.11596284061670303, |
| "learning_rate": 0.0002, |
| "loss": 1.5843, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.224271118863693, |
| "grad_norm": 0.14083531498908997, |
| "learning_rate": 0.0002, |
| "loss": 1.6502, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.22476949912783453, |
| "grad_norm": 0.11369968950748444, |
| "learning_rate": 0.0002, |
| "loss": 1.7063, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.22526787939197607, |
| "grad_norm": 0.12249240279197693, |
| "learning_rate": 0.0002, |
| "loss": 1.6041, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.22576625965611763, |
| "grad_norm": 0.13246704638004303, |
| "learning_rate": 0.0002, |
| "loss": 1.7227, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.22626463992025916, |
| "grad_norm": 0.15372870862483978, |
| "learning_rate": 0.0002, |
| "loss": 1.7364, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2267630201844007, |
| "grad_norm": 0.10773339122533798, |
| "learning_rate": 0.0002, |
| "loss": 1.6797, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.22726140044854223, |
| "grad_norm": 0.10603539645671844, |
| "learning_rate": 0.0002, |
| "loss": 1.6608, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.22775978071268377, |
| "grad_norm": 0.11118324100971222, |
| "learning_rate": 0.0002, |
| "loss": 1.6659, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.22825816097682533, |
| "grad_norm": 0.10193316638469696, |
| "learning_rate": 0.0002, |
| "loss": 1.7149, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.22875654124096687, |
| "grad_norm": 0.118270143866539, |
| "learning_rate": 0.0002, |
| "loss": 1.6581, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.2292549215051084, |
| "grad_norm": 0.09839551895856857, |
| "learning_rate": 0.0002, |
| "loss": 1.6906, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.22975330176924993, |
| "grad_norm": 0.10430920869112015, |
| "learning_rate": 0.0002, |
| "loss": 1.6367, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.23025168203339147, |
| "grad_norm": 0.7883297204971313, |
| "learning_rate": 0.0002, |
| "loss": 1.8726, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.230750062297533, |
| "grad_norm": 0.14015096426010132, |
| "learning_rate": 0.0002, |
| "loss": 1.6885, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.23124844256167457, |
| "grad_norm": 0.6940969824790955, |
| "learning_rate": 0.0002, |
| "loss": 1.8366, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.2317468228258161, |
| "grad_norm": 0.16839167475700378, |
| "learning_rate": 0.0002, |
| "loss": 1.6627, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.23224520308995764, |
| "grad_norm": 0.14831361174583435, |
| "learning_rate": 0.0002, |
| "loss": 1.6192, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.23274358335409917, |
| "grad_norm": 0.6374949216842651, |
| "learning_rate": 0.0002, |
| "loss": 1.8086, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.2332419636182407, |
| "grad_norm": 0.1442909985780716, |
| "learning_rate": 0.0002, |
| "loss": 1.6875, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.23374034388238227, |
| "grad_norm": 0.15487882494926453, |
| "learning_rate": 0.0002, |
| "loss": 1.6939, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.2342387241465238, |
| "grad_norm": 0.133474662899971, |
| "learning_rate": 0.0002, |
| "loss": 1.6011, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.23473710441066534, |
| "grad_norm": 0.15738508105278015, |
| "learning_rate": 0.0002, |
| "loss": 1.6801, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.23523548467480687, |
| "grad_norm": 0.13371291756629944, |
| "learning_rate": 0.0002, |
| "loss": 1.6454, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.2357338649389484, |
| "grad_norm": 0.12480079382658005, |
| "learning_rate": 0.0002, |
| "loss": 1.613, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.23623224520308997, |
| "grad_norm": 0.138162761926651, |
| "learning_rate": 0.0002, |
| "loss": 1.6844, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.2367306254672315, |
| "grad_norm": 0.13453134894371033, |
| "learning_rate": 0.0002, |
| "loss": 1.7113, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.23722900573137304, |
| "grad_norm": 0.11864453554153442, |
| "learning_rate": 0.0002, |
| "loss": 1.7311, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.23772738599551457, |
| "grad_norm": 0.3905930817127228, |
| "learning_rate": 0.0002, |
| "loss": 1.7638, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.2382257662596561, |
| "grad_norm": 0.1613403707742691, |
| "learning_rate": 0.0002, |
| "loss": 1.6413, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.23872414652379767, |
| "grad_norm": 0.13828811049461365, |
| "learning_rate": 0.0002, |
| "loss": 1.7163, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2392225267879392, |
| "grad_norm": 0.13535858690738678, |
| "learning_rate": 0.0002, |
| "loss": 1.6059, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.23972090705208074, |
| "grad_norm": 0.15594834089279175, |
| "learning_rate": 0.0002, |
| "loss": 1.7161, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.24021928731622227, |
| "grad_norm": 0.11990589648485184, |
| "learning_rate": 0.0002, |
| "loss": 1.7051, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2407176675803638, |
| "grad_norm": 0.11655411124229431, |
| "learning_rate": 0.0002, |
| "loss": 1.6711, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.24121604784450537, |
| "grad_norm": 0.11754405498504639, |
| "learning_rate": 0.0002, |
| "loss": 1.7237, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.2417144281086469, |
| "grad_norm": 0.1332051157951355, |
| "learning_rate": 0.0002, |
| "loss": 1.7598, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.24221280837278844, |
| "grad_norm": 0.10240749269723892, |
| "learning_rate": 0.0002, |
| "loss": 1.6356, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.24271118863692998, |
| "grad_norm": 0.1425447165966034, |
| "learning_rate": 0.0002, |
| "loss": 1.7993, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.2432095689010715, |
| "grad_norm": 0.10178319364786148, |
| "learning_rate": 0.0002, |
| "loss": 1.6705, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.24370794916521304, |
| "grad_norm": 0.354878306388855, |
| "learning_rate": 0.0002, |
| "loss": 1.7251, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2442063294293546, |
| "grad_norm": 0.10244394838809967, |
| "learning_rate": 0.0002, |
| "loss": 1.5874, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.24470470969349614, |
| "grad_norm": 0.10944903641939163, |
| "learning_rate": 0.0002, |
| "loss": 1.5817, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.24520308995763768, |
| "grad_norm": 0.11182764172554016, |
| "learning_rate": 0.0002, |
| "loss": 1.6859, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.2457014702217792, |
| "grad_norm": 0.11066277325153351, |
| "learning_rate": 0.0002, |
| "loss": 1.6275, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.24619985048592075, |
| "grad_norm": 0.6789163947105408, |
| "learning_rate": 0.0002, |
| "loss": 1.8408, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.2466982307500623, |
| "grad_norm": 0.15237462520599365, |
| "learning_rate": 0.0002, |
| "loss": 1.5969, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.24719661101420384, |
| "grad_norm": 0.14016127586364746, |
| "learning_rate": 0.0002, |
| "loss": 1.6325, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.24769499127834538, |
| "grad_norm": 0.12557458877563477, |
| "learning_rate": 0.0002, |
| "loss": 1.6745, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.2481933715424869, |
| "grad_norm": 0.12593714892864227, |
| "learning_rate": 0.0002, |
| "loss": 1.7337, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.24869175180662845, |
| "grad_norm": 0.12869895994663239, |
| "learning_rate": 0.0002, |
| "loss": 1.6982, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.24919013207077, |
| "grad_norm": 0.6727408766746521, |
| "learning_rate": 0.0002, |
| "loss": 1.7735, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.24968851233491154, |
| "grad_norm": 0.18164046108722687, |
| "learning_rate": 0.0002, |
| "loss": 1.7327, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.2501868925990531, |
| "grad_norm": 0.12988890707492828, |
| "learning_rate": 0.0002, |
| "loss": 1.6335, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.25068527286319464, |
| "grad_norm": 0.14229950308799744, |
| "learning_rate": 0.0002, |
| "loss": 1.6705, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.25118365312733615, |
| "grad_norm": 0.12232649326324463, |
| "learning_rate": 0.0002, |
| "loss": 1.5992, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.2516820333914777, |
| "grad_norm": 0.12053592503070831, |
| "learning_rate": 0.0002, |
| "loss": 1.5962, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.2521804136556192, |
| "grad_norm": 0.12370762974023819, |
| "learning_rate": 0.0002, |
| "loss": 1.6675, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.2526787939197608, |
| "grad_norm": 0.11628440022468567, |
| "learning_rate": 0.0002, |
| "loss": 1.6743, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.25317717418390234, |
| "grad_norm": 0.1284741759300232, |
| "learning_rate": 0.0002, |
| "loss": 1.6903, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.25367555444804385, |
| "grad_norm": 0.133184552192688, |
| "learning_rate": 0.0002, |
| "loss": 1.6735, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.2541739347121854, |
| "grad_norm": 0.11966334283351898, |
| "learning_rate": 0.0002, |
| "loss": 1.6323, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.2546723149763269, |
| "grad_norm": 0.12117716670036316, |
| "learning_rate": 0.0002, |
| "loss": 1.6458, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2551706952404685, |
| "grad_norm": 0.11778345704078674, |
| "learning_rate": 0.0002, |
| "loss": 1.6272, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.25566907550461004, |
| "grad_norm": 0.11609595268964767, |
| "learning_rate": 0.0002, |
| "loss": 1.6588, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.25616745576875155, |
| "grad_norm": 0.11605001240968704, |
| "learning_rate": 0.0002, |
| "loss": 1.6666, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.2566658360328931, |
| "grad_norm": 0.10593124479055405, |
| "learning_rate": 0.0002, |
| "loss": 1.6628, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.2571642162970346, |
| "grad_norm": 0.11132659763097763, |
| "learning_rate": 0.0002, |
| "loss": 1.7112, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.2576625965611762, |
| "grad_norm": 0.09980247169733047, |
| "learning_rate": 0.0002, |
| "loss": 1.6759, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.25816097682531775, |
| "grad_norm": 0.6143377423286438, |
| "learning_rate": 0.0002, |
| "loss": 1.6616, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.25865935708945925, |
| "grad_norm": 0.11244726181030273, |
| "learning_rate": 0.0002, |
| "loss": 1.7124, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.2591577373536008, |
| "grad_norm": 0.6190444827079773, |
| "learning_rate": 0.0002, |
| "loss": 1.7698, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2596561176177423, |
| "grad_norm": 0.7441633939743042, |
| "learning_rate": 0.0002, |
| "loss": 1.8182, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.2601544978818839, |
| "grad_norm": 0.13578347861766815, |
| "learning_rate": 0.0002, |
| "loss": 1.6609, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.2606528781460254, |
| "grad_norm": 0.1662416160106659, |
| "learning_rate": 0.0002, |
| "loss": 1.7167, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.26115125841016695, |
| "grad_norm": 0.16020916402339935, |
| "learning_rate": 0.0002, |
| "loss": 1.6636, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.2616496386743085, |
| "grad_norm": 0.12748084962368011, |
| "learning_rate": 0.0002, |
| "loss": 1.6832, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.26214801893845, |
| "grad_norm": 0.13277047872543335, |
| "learning_rate": 0.0002, |
| "loss": 1.682, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.2626463992025916, |
| "grad_norm": 0.11746570467948914, |
| "learning_rate": 0.0002, |
| "loss": 1.6567, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.2631447794667331, |
| "grad_norm": 0.1124933585524559, |
| "learning_rate": 0.0002, |
| "loss": 1.6462, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.26364315973087465, |
| "grad_norm": 0.13045774400234222, |
| "learning_rate": 0.0002, |
| "loss": 1.7247, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2641415399950162, |
| "grad_norm": 0.11953026801347733, |
| "learning_rate": 0.0002, |
| "loss": 1.6896, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2646399202591577, |
| "grad_norm": 0.3236943185329437, |
| "learning_rate": 0.0002, |
| "loss": 1.6562, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.2651383005232993, |
| "grad_norm": 0.13000494241714478, |
| "learning_rate": 0.0002, |
| "loss": 1.6329, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.2656366807874408, |
| "grad_norm": 0.13072949647903442, |
| "learning_rate": 0.0002, |
| "loss": 1.6584, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.26613506105158236, |
| "grad_norm": 0.30452999472618103, |
| "learning_rate": 0.0002, |
| "loss": 1.6066, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.2666334413157239, |
| "grad_norm": 0.11118455231189728, |
| "learning_rate": 0.0002, |
| "loss": 1.6874, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.2671318215798654, |
| "grad_norm": 0.12459013611078262, |
| "learning_rate": 0.0002, |
| "loss": 1.6959, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.267630201844007, |
| "grad_norm": 0.10970738530158997, |
| "learning_rate": 0.0002, |
| "loss": 1.6167, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.2681285821081485, |
| "grad_norm": 0.1440659761428833, |
| "learning_rate": 0.0002, |
| "loss": 1.7254, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.26862696237229006, |
| "grad_norm": 0.11448108404874802, |
| "learning_rate": 0.0002, |
| "loss": 1.6896, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.2691253426364316, |
| "grad_norm": 0.11026275157928467, |
| "learning_rate": 0.0002, |
| "loss": 1.6675, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.2696237229005731, |
| "grad_norm": 0.10443202406167984, |
| "learning_rate": 0.0002, |
| "loss": 1.7035, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.2701221031647147, |
| "grad_norm": 0.11404629796743393, |
| "learning_rate": 0.0002, |
| "loss": 1.727, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2706204834288562, |
| "grad_norm": 0.12783807516098022, |
| "learning_rate": 0.0002, |
| "loss": 1.7468, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.27111886369299776, |
| "grad_norm": 0.1040879487991333, |
| "learning_rate": 0.0002, |
| "loss": 1.642, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.2716172439571393, |
| "grad_norm": 0.10120297223329544, |
| "learning_rate": 0.0002, |
| "loss": 1.6792, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2721156242212808, |
| "grad_norm": 0.11116039007902145, |
| "learning_rate": 0.0002, |
| "loss": 1.6685, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.2726140044854224, |
| "grad_norm": 0.353816956281662, |
| "learning_rate": 0.0002, |
| "loss": 1.7458, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.2731123847495639, |
| "grad_norm": 0.10361409932374954, |
| "learning_rate": 0.0002, |
| "loss": 1.583, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.27361076501370546, |
| "grad_norm": 0.10164079070091248, |
| "learning_rate": 0.0002, |
| "loss": 1.7219, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.274109145277847, |
| "grad_norm": 0.3576943278312683, |
| "learning_rate": 0.0002, |
| "loss": 1.7155, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.27460752554198853, |
| "grad_norm": 0.1307370960712433, |
| "learning_rate": 0.0002, |
| "loss": 1.6491, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.2751059058061301, |
| "grad_norm": 0.11267419159412384, |
| "learning_rate": 0.0002, |
| "loss": 1.6299, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2756042860702716, |
| "grad_norm": 0.10955934971570969, |
| "learning_rate": 0.0002, |
| "loss": 1.6972, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.27610266633441316, |
| "grad_norm": 0.3629993796348572, |
| "learning_rate": 0.0002, |
| "loss": 1.6558, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.2766010465985547, |
| "grad_norm": 0.10678595304489136, |
| "learning_rate": 0.0002, |
| "loss": 1.7133, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.27709942686269623, |
| "grad_norm": 0.3551732301712036, |
| "learning_rate": 0.0002, |
| "loss": 1.7884, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.2775978071268378, |
| "grad_norm": 0.1157960370182991, |
| "learning_rate": 0.0002, |
| "loss": 1.6664, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.2780961873909793, |
| "grad_norm": 0.4219015836715698, |
| "learning_rate": 0.0002, |
| "loss": 1.6258, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.27859456765512086, |
| "grad_norm": 0.1442400962114334, |
| "learning_rate": 0.0002, |
| "loss": 1.7081, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.2790929479192624, |
| "grad_norm": 0.12307796627283096, |
| "learning_rate": 0.0002, |
| "loss": 1.5812, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.27959132818340393, |
| "grad_norm": 0.13523195683956146, |
| "learning_rate": 0.0002, |
| "loss": 1.6644, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.2800897084475455, |
| "grad_norm": 0.14576253294944763, |
| "learning_rate": 0.0002, |
| "loss": 1.6724, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.280588088711687, |
| "grad_norm": 0.1239597350358963, |
| "learning_rate": 0.0002, |
| "loss": 1.6501, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.28108646897582856, |
| "grad_norm": 0.11444118618965149, |
| "learning_rate": 0.0002, |
| "loss": 1.6218, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.28158484923997007, |
| "grad_norm": 0.11568321287631989, |
| "learning_rate": 0.0002, |
| "loss": 1.622, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.28208322950411163, |
| "grad_norm": 0.1155436560511589, |
| "learning_rate": 0.0002, |
| "loss": 1.6856, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.2825816097682532, |
| "grad_norm": 0.10945037007331848, |
| "learning_rate": 0.0002, |
| "loss": 1.5764, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.2830799900323947, |
| "grad_norm": 0.5043824315071106, |
| "learning_rate": 0.0002, |
| "loss": 1.7022, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.28357837029653626, |
| "grad_norm": 0.7879558801651001, |
| "learning_rate": 0.0002, |
| "loss": 1.8313, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.28407675056067777, |
| "grad_norm": 0.13888636231422424, |
| "learning_rate": 0.0002, |
| "loss": 1.6418, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.28457513082481933, |
| "grad_norm": 0.16137146949768066, |
| "learning_rate": 0.0002, |
| "loss": 1.6884, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.2850735110889609, |
| "grad_norm": 0.2237291783094406, |
| "learning_rate": 0.0002, |
| "loss": 1.7934, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.2855718913531024, |
| "grad_norm": 0.14624369144439697, |
| "learning_rate": 0.0002, |
| "loss": 1.676, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.28607027161724397, |
| "grad_norm": 0.1463831216096878, |
| "learning_rate": 0.0002, |
| "loss": 1.5869, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.28656865188138547, |
| "grad_norm": 0.14725126326084137, |
| "learning_rate": 0.0002, |
| "loss": 1.632, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.28706703214552703, |
| "grad_norm": 0.13732214272022247, |
| "learning_rate": 0.0002, |
| "loss": 1.7513, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.2875654124096686, |
| "grad_norm": 0.14334504306316376, |
| "learning_rate": 0.0002, |
| "loss": 1.6318, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.2880637926738101, |
| "grad_norm": 0.8194677829742432, |
| "learning_rate": 0.0002, |
| "loss": 1.8945, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.28856217293795167, |
| "grad_norm": 0.1749170422554016, |
| "learning_rate": 0.0002, |
| "loss": 1.6608, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.2890605532020932, |
| "grad_norm": 0.12977321445941925, |
| "learning_rate": 0.0002, |
| "loss": 1.6363, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.28955893346623474, |
| "grad_norm": 0.2908933162689209, |
| "learning_rate": 0.0002, |
| "loss": 1.8448, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.2900573137303763, |
| "grad_norm": 0.17108629643917084, |
| "learning_rate": 0.0002, |
| "loss": 1.6822, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.2905556939945178, |
| "grad_norm": 0.14702463150024414, |
| "learning_rate": 0.0002, |
| "loss": 1.7491, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.29105407425865937, |
| "grad_norm": 0.12582743167877197, |
| "learning_rate": 0.0002, |
| "loss": 1.6245, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.2915524545228009, |
| "grad_norm": 0.14732137322425842, |
| "learning_rate": 0.0002, |
| "loss": 1.6916, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.29205083478694244, |
| "grad_norm": 0.12849657237529755, |
| "learning_rate": 0.0002, |
| "loss": 1.6583, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.292549215051084, |
| "grad_norm": 0.11466097086668015, |
| "learning_rate": 0.0002, |
| "loss": 1.6306, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.2930475953152255, |
| "grad_norm": 0.12361207604408264, |
| "learning_rate": 0.0002, |
| "loss": 1.6765, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.29354597557936707, |
| "grad_norm": 0.1265360414981842, |
| "learning_rate": 0.0002, |
| "loss": 1.667, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.2940443558435086, |
| "grad_norm": 0.11903838813304901, |
| "learning_rate": 0.0002, |
| "loss": 1.6567, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.29454273610765014, |
| "grad_norm": 0.8345243334770203, |
| "learning_rate": 0.0002, |
| "loss": 1.6467, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.2950411163717917, |
| "grad_norm": 0.1365821361541748, |
| "learning_rate": 0.0002, |
| "loss": 1.7028, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.2955394966359332, |
| "grad_norm": 0.13564884662628174, |
| "learning_rate": 0.0002, |
| "loss": 1.6129, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.29603787690007477, |
| "grad_norm": 0.13604499399662018, |
| "learning_rate": 0.0002, |
| "loss": 1.7387, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.2965362571642163, |
| "grad_norm": 0.12102136015892029, |
| "learning_rate": 0.0002, |
| "loss": 1.632, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.29703463742835784, |
| "grad_norm": 0.11927222460508347, |
| "learning_rate": 0.0002, |
| "loss": 1.7149, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.2975330176924994, |
| "grad_norm": 0.10716401040554047, |
| "learning_rate": 0.0002, |
| "loss": 1.6268, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.2980313979566409, |
| "grad_norm": 0.12001641094684601, |
| "learning_rate": 0.0002, |
| "loss": 1.6879, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.29852977822078247, |
| "grad_norm": 0.11045756936073303, |
| "learning_rate": 0.0002, |
| "loss": 1.6871, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.299028158484924, |
| "grad_norm": 0.7450900077819824, |
| "learning_rate": 0.0002, |
| "loss": 1.8146, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.29952653874906554, |
| "grad_norm": 0.16306158900260925, |
| "learning_rate": 0.0002, |
| "loss": 1.7092, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.3000249190132071, |
| "grad_norm": 0.43425318598747253, |
| "learning_rate": 0.0002, |
| "loss": 1.7405, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.3005232992773486, |
| "grad_norm": 0.16279961168766022, |
| "learning_rate": 0.0002, |
| "loss": 1.6, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.3010216795414902, |
| "grad_norm": 0.1403011977672577, |
| "learning_rate": 0.0002, |
| "loss": 1.5979, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.3015200598056317, |
| "grad_norm": 0.13146822154521942, |
| "learning_rate": 0.0002, |
| "loss": 1.5689, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.30201844006977324, |
| "grad_norm": 0.15902653336524963, |
| "learning_rate": 0.0002, |
| "loss": 1.6664, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.3025168203339148, |
| "grad_norm": 0.12351160496473312, |
| "learning_rate": 0.0002, |
| "loss": 1.714, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.3030152005980563, |
| "grad_norm": 0.1543518602848053, |
| "learning_rate": 0.0002, |
| "loss": 1.6432, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.3035135808621979, |
| "grad_norm": 0.11827117949724197, |
| "learning_rate": 0.0002, |
| "loss": 1.6325, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.3040119611263394, |
| "grad_norm": 0.5559304356575012, |
| "learning_rate": 0.0002, |
| "loss": 1.6789, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.30451034139048094, |
| "grad_norm": 0.13763754069805145, |
| "learning_rate": 0.0002, |
| "loss": 1.6715, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.30500872165462245, |
| "grad_norm": 0.12646999955177307, |
| "learning_rate": 0.0002, |
| "loss": 1.7162, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.305507101918764, |
| "grad_norm": 0.34849414229393005, |
| "learning_rate": 0.0002, |
| "loss": 1.6708, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.3060054821829056, |
| "grad_norm": 0.11648757755756378, |
| "learning_rate": 0.0002, |
| "loss": 1.646, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.3065038624470471, |
| "grad_norm": 0.13477148115634918, |
| "learning_rate": 0.0002, |
| "loss": 1.6502, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.30700224271118864, |
| "grad_norm": 0.1102217361330986, |
| "learning_rate": 0.0002, |
| "loss": 1.6729, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.30750062297533015, |
| "grad_norm": 0.5752671957015991, |
| "learning_rate": 0.0002, |
| "loss": 1.6233, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.3079990032394717, |
| "grad_norm": 0.13107599318027496, |
| "learning_rate": 0.0002, |
| "loss": 1.6636, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.3084973835036133, |
| "grad_norm": 0.11860768496990204, |
| "learning_rate": 0.0002, |
| "loss": 1.7313, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.3089957637677548, |
| "grad_norm": 0.1229948177933693, |
| "learning_rate": 0.0002, |
| "loss": 1.6327, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.30949414403189635, |
| "grad_norm": 0.30836552381515503, |
| "learning_rate": 0.0002, |
| "loss": 1.6969, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.30999252429603785, |
| "grad_norm": 0.11798208951950073, |
| "learning_rate": 0.0002, |
| "loss": 1.7364, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.3104909045601794, |
| "grad_norm": 0.4807080030441284, |
| "learning_rate": 0.0002, |
| "loss": 1.6899, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.310989284824321, |
| "grad_norm": 0.1726754605770111, |
| "learning_rate": 0.0002, |
| "loss": 1.8045, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.3114876650884625, |
| "grad_norm": 0.13296914100646973, |
| "learning_rate": 0.0002, |
| "loss": 1.6966, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.31198604535260405, |
| "grad_norm": 0.14966656267642975, |
| "learning_rate": 0.0002, |
| "loss": 1.6685, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.31248442561674555, |
| "grad_norm": 0.3757789731025696, |
| "learning_rate": 0.0002, |
| "loss": 1.7225, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.3129828058808871, |
| "grad_norm": 0.1234004870057106, |
| "learning_rate": 0.0002, |
| "loss": 1.6204, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.3134811861450287, |
| "grad_norm": 0.12280552089214325, |
| "learning_rate": 0.0002, |
| "loss": 1.6913, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.3139795664091702, |
| "grad_norm": 0.12360548228025436, |
| "learning_rate": 0.0002, |
| "loss": 1.6808, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.31447794667331175, |
| "grad_norm": 0.1292014867067337, |
| "learning_rate": 0.0002, |
| "loss": 1.6697, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.31497632693745325, |
| "grad_norm": 0.11038494855165482, |
| "learning_rate": 0.0002, |
| "loss": 1.6103, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.3154747072015948, |
| "grad_norm": 0.11607655137777328, |
| "learning_rate": 0.0002, |
| "loss": 1.6241, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.3159730874657364, |
| "grad_norm": 0.10514742881059647, |
| "learning_rate": 0.0002, |
| "loss": 1.6922, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.3164714677298779, |
| "grad_norm": 0.107606902718544, |
| "learning_rate": 0.0002, |
| "loss": 1.6975, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.31696984799401945, |
| "grad_norm": 0.20367765426635742, |
| "learning_rate": 0.0002, |
| "loss": 1.5704, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.31746822825816096, |
| "grad_norm": 0.10455407947301865, |
| "learning_rate": 0.0002, |
| "loss": 1.7109, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.3179666085223025, |
| "grad_norm": 0.48424893617630005, |
| "learning_rate": 0.0002, |
| "loss": 1.5871, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.3184649887864441, |
| "grad_norm": 0.16340336203575134, |
| "learning_rate": 0.0002, |
| "loss": 1.6856, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.3189633690505856, |
| "grad_norm": 0.1317445933818817, |
| "learning_rate": 0.0002, |
| "loss": 1.6904, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.31946174931472715, |
| "grad_norm": 0.12784677743911743, |
| "learning_rate": 0.0002, |
| "loss": 1.6983, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.31996012957886866, |
| "grad_norm": 0.10745134204626083, |
| "learning_rate": 0.0002, |
| "loss": 1.6353, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.3204585098430102, |
| "grad_norm": 0.1444125920534134, |
| "learning_rate": 0.0002, |
| "loss": 1.7109, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.3209568901071518, |
| "grad_norm": 0.3750239908695221, |
| "learning_rate": 0.0002, |
| "loss": 1.6571, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.3214552703712933, |
| "grad_norm": 0.11034873872995377, |
| "learning_rate": 0.0002, |
| "loss": 1.6547, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.32195365063543485, |
| "grad_norm": 0.10759663581848145, |
| "learning_rate": 0.0002, |
| "loss": 1.628, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.32245203089957636, |
| "grad_norm": 0.11017131060361862, |
| "learning_rate": 0.0002, |
| "loss": 1.6877, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.3229504111637179, |
| "grad_norm": 0.1253817230463028, |
| "learning_rate": 0.0002, |
| "loss": 1.7226, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.3234487914278595, |
| "grad_norm": 0.5153695344924927, |
| "learning_rate": 0.0002, |
| "loss": 1.7687, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.323947171692001, |
| "grad_norm": 0.11948184669017792, |
| "learning_rate": 0.0002, |
| "loss": 1.7044, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.32444555195614255, |
| "grad_norm": 0.11249465495347977, |
| "learning_rate": 0.0002, |
| "loss": 1.6282, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.32494393222028406, |
| "grad_norm": 0.11555810272693634, |
| "learning_rate": 0.0002, |
| "loss": 1.7295, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.3254423124844256, |
| "grad_norm": 0.11882718652486801, |
| "learning_rate": 0.0002, |
| "loss": 1.6531, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.32594069274856713, |
| "grad_norm": 0.10453632473945618, |
| "learning_rate": 0.0002, |
| "loss": 1.6342, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.3264390730127087, |
| "grad_norm": 0.11219029873609543, |
| "learning_rate": 0.0002, |
| "loss": 1.6902, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.32693745327685025, |
| "grad_norm": 0.10499835759401321, |
| "learning_rate": 0.0002, |
| "loss": 1.5583, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.32743583354099176, |
| "grad_norm": 0.10964427143335342, |
| "learning_rate": 0.0002, |
| "loss": 1.5675, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.3279342138051333, |
| "grad_norm": 0.18510489165782928, |
| "learning_rate": 0.0002, |
| "loss": 1.6178, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.32843259406927483, |
| "grad_norm": 0.11548275500535965, |
| "learning_rate": 0.0002, |
| "loss": 1.6699, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.3289309743334164, |
| "grad_norm": 0.11357063800096512, |
| "learning_rate": 0.0002, |
| "loss": 1.6008, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.32942935459755796, |
| "grad_norm": 0.10668730735778809, |
| "learning_rate": 0.0002, |
| "loss": 1.6433, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.32992773486169946, |
| "grad_norm": 0.11750250309705734, |
| "learning_rate": 0.0002, |
| "loss": 1.6813, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.330426115125841, |
| "grad_norm": 0.8277010321617126, |
| "learning_rate": 0.0002, |
| "loss": 1.7333, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.33092449538998253, |
| "grad_norm": 0.165303573012352, |
| "learning_rate": 0.0002, |
| "loss": 1.6812, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.3314228756541241, |
| "grad_norm": 0.12780268490314484, |
| "learning_rate": 0.0002, |
| "loss": 1.7106, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.33192125591826566, |
| "grad_norm": 0.13066166639328003, |
| "learning_rate": 0.0002, |
| "loss": 1.6846, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.33241963618240716, |
| "grad_norm": 0.12650184333324432, |
| "learning_rate": 0.0002, |
| "loss": 1.6144, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.3329180164465487, |
| "grad_norm": 0.12420842051506042, |
| "learning_rate": 0.0002, |
| "loss": 1.7015, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.33341639671069023, |
| "grad_norm": 0.1261165291070938, |
| "learning_rate": 0.0002, |
| "loss": 1.67, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.3339147769748318, |
| "grad_norm": 0.11121337115764618, |
| "learning_rate": 0.0002, |
| "loss": 1.6772, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.33441315723897336, |
| "grad_norm": 0.10835525393486023, |
| "learning_rate": 0.0002, |
| "loss": 1.6681, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.33491153750311486, |
| "grad_norm": 0.10837749391794205, |
| "learning_rate": 0.0002, |
| "loss": 1.6268, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3354099177672564, |
| "grad_norm": 0.10254842787981033, |
| "learning_rate": 0.0002, |
| "loss": 1.5997, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.33590829803139793, |
| "grad_norm": 0.5288554430007935, |
| "learning_rate": 0.0002, |
| "loss": 1.7397, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.3364066782955395, |
| "grad_norm": 0.10820039361715317, |
| "learning_rate": 0.0002, |
| "loss": 1.6962, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.33690505855968106, |
| "grad_norm": 0.11754646897315979, |
| "learning_rate": 0.0002, |
| "loss": 1.6059, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.33740343882382257, |
| "grad_norm": 0.9506744742393494, |
| "learning_rate": 0.0002, |
| "loss": 1.8916, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.33790181908796413, |
| "grad_norm": 0.1273750215768814, |
| "learning_rate": 0.0002, |
| "loss": 1.6896, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.33840019935210564, |
| "grad_norm": 0.14315767586231232, |
| "learning_rate": 0.0002, |
| "loss": 1.6903, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.3388985796162472, |
| "grad_norm": 0.15645241737365723, |
| "learning_rate": 0.0002, |
| "loss": 1.6823, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.33939695988038876, |
| "grad_norm": 0.5159462690353394, |
| "learning_rate": 0.0002, |
| "loss": 1.6947, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.33989534014453027, |
| "grad_norm": 0.13883577287197113, |
| "learning_rate": 0.0002, |
| "loss": 1.7448, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.34039372040867183, |
| "grad_norm": 0.39283788204193115, |
| "learning_rate": 0.0002, |
| "loss": 1.6181, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.34089210067281334, |
| "grad_norm": 0.20534516870975494, |
| "learning_rate": 0.0002, |
| "loss": 1.721, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.3413904809369549, |
| "grad_norm": 0.14379210770130157, |
| "learning_rate": 0.0002, |
| "loss": 1.6955, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.34188886120109646, |
| "grad_norm": 0.1505320966243744, |
| "learning_rate": 0.0002, |
| "loss": 1.7168, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.34238724146523797, |
| "grad_norm": 0.1377919316291809, |
| "learning_rate": 0.0002, |
| "loss": 1.7001, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.34288562172937953, |
| "grad_norm": 0.1268286257982254, |
| "learning_rate": 0.0002, |
| "loss": 1.6405, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.34338400199352104, |
| "grad_norm": 0.11991781741380692, |
| "learning_rate": 0.0002, |
| "loss": 1.6862, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.3438823822576626, |
| "grad_norm": 0.12283925712108612, |
| "learning_rate": 0.0002, |
| "loss": 1.7222, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.34438076252180416, |
| "grad_norm": 0.11207298189401627, |
| "learning_rate": 0.0002, |
| "loss": 1.6477, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.34487914278594567, |
| "grad_norm": 0.11342150717973709, |
| "learning_rate": 0.0002, |
| "loss": 1.6907, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.34537752305008723, |
| "grad_norm": 0.1479737013578415, |
| "learning_rate": 0.0002, |
| "loss": 1.6982, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.34587590331422874, |
| "grad_norm": 0.11498729884624481, |
| "learning_rate": 0.0002, |
| "loss": 1.6604, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.3463742835783703, |
| "grad_norm": 0.12394261360168457, |
| "learning_rate": 0.0002, |
| "loss": 1.699, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.34687266384251186, |
| "grad_norm": 0.12563689053058624, |
| "learning_rate": 0.0002, |
| "loss": 1.6637, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.34737104410665337, |
| "grad_norm": 0.10661863535642624, |
| "learning_rate": 0.0002, |
| "loss": 1.6921, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.34786942437079493, |
| "grad_norm": 0.10778840631246567, |
| "learning_rate": 0.0002, |
| "loss": 1.6719, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.34836780463493644, |
| "grad_norm": 0.10504487156867981, |
| "learning_rate": 0.0002, |
| "loss": 1.6616, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.348866184899078, |
| "grad_norm": 0.10722413659095764, |
| "learning_rate": 0.0002, |
| "loss": 1.6452, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3493645651632195, |
| "grad_norm": 0.10450419783592224, |
| "learning_rate": 0.0002, |
| "loss": 1.6342, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.34986294542736107, |
| "grad_norm": 0.10961712151765823, |
| "learning_rate": 0.0002, |
| "loss": 1.68, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.35036132569150263, |
| "grad_norm": 0.10789170861244202, |
| "learning_rate": 0.0002, |
| "loss": 1.6662, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.35085970595564414, |
| "grad_norm": 0.10823702067136765, |
| "learning_rate": 0.0002, |
| "loss": 1.6733, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.3513580862197857, |
| "grad_norm": 0.11080746352672577, |
| "learning_rate": 0.0002, |
| "loss": 1.6332, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.3518564664839272, |
| "grad_norm": 0.10004162788391113, |
| "learning_rate": 0.0002, |
| "loss": 1.5841, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.3523548467480688, |
| "grad_norm": 0.10398257523775101, |
| "learning_rate": 0.0002, |
| "loss": 1.6735, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.35285322701221034, |
| "grad_norm": 0.10170764476060867, |
| "learning_rate": 0.0002, |
| "loss": 1.6584, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.35335160727635184, |
| "grad_norm": 0.8194452524185181, |
| "learning_rate": 0.0002, |
| "loss": 1.8272, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.3538499875404934, |
| "grad_norm": 0.15103065967559814, |
| "learning_rate": 0.0002, |
| "loss": 1.6954, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3543483678046349, |
| "grad_norm": 0.12205032259225845, |
| "learning_rate": 0.0002, |
| "loss": 1.6823, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.3548467480687765, |
| "grad_norm": 0.1272657811641693, |
| "learning_rate": 0.0002, |
| "loss": 1.5557, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.35534512833291804, |
| "grad_norm": 0.503338634967804, |
| "learning_rate": 0.0002, |
| "loss": 1.7847, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.35584350859705954, |
| "grad_norm": 0.11442038416862488, |
| "learning_rate": 0.0002, |
| "loss": 1.6633, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.3563418888612011, |
| "grad_norm": 0.1573084145784378, |
| "learning_rate": 0.0002, |
| "loss": 1.7377, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.3568402691253426, |
| "grad_norm": 0.11450973153114319, |
| "learning_rate": 0.0002, |
| "loss": 1.5862, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.3573386493894842, |
| "grad_norm": 0.1249619573354721, |
| "learning_rate": 0.0002, |
| "loss": 1.5954, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.35783702965362574, |
| "grad_norm": 0.11494952440261841, |
| "learning_rate": 0.0002, |
| "loss": 1.6432, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.35833540991776724, |
| "grad_norm": 0.13213759660720825, |
| "learning_rate": 0.0002, |
| "loss": 1.803, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.3588337901819088, |
| "grad_norm": 1.1261271238327026, |
| "learning_rate": 0.0002, |
| "loss": 1.818, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.3593321704460503, |
| "grad_norm": 1.338255524635315, |
| "learning_rate": 0.0002, |
| "loss": 1.7306, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.3598305507101919, |
| "grad_norm": 0.21815264225006104, |
| "learning_rate": 0.0002, |
| "loss": 1.7224, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.36032893097433344, |
| "grad_norm": 0.5178132653236389, |
| "learning_rate": 0.0002, |
| "loss": 1.7097, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.36082731123847495, |
| "grad_norm": 0.241803839802742, |
| "learning_rate": 0.0002, |
| "loss": 1.7047, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.3613256915026165, |
| "grad_norm": 0.20727293193340302, |
| "learning_rate": 0.0002, |
| "loss": 1.7278, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.361824071766758, |
| "grad_norm": 0.16459515690803528, |
| "learning_rate": 0.0002, |
| "loss": 1.7204, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.3623224520308996, |
| "grad_norm": 0.16415144503116608, |
| "learning_rate": 0.0002, |
| "loss": 1.6764, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.36282083229504114, |
| "grad_norm": 0.16096027195453644, |
| "learning_rate": 0.0002, |
| "loss": 1.665, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.36331921255918265, |
| "grad_norm": 0.17240643501281738, |
| "learning_rate": 0.0002, |
| "loss": 1.6761, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.3638175928233242, |
| "grad_norm": 0.19763271510601044, |
| "learning_rate": 0.0002, |
| "loss": 1.7402, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3643159730874657, |
| "grad_norm": 0.15238463878631592, |
| "learning_rate": 0.0002, |
| "loss": 1.6884, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.3648143533516073, |
| "grad_norm": 0.27482038736343384, |
| "learning_rate": 0.0002, |
| "loss": 1.7064, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.36531273361574884, |
| "grad_norm": 0.5192012786865234, |
| "learning_rate": 0.0002, |
| "loss": 1.8117, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.36581111387989035, |
| "grad_norm": 0.1510191708803177, |
| "learning_rate": 0.0002, |
| "loss": 1.667, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.3663094941440319, |
| "grad_norm": 0.14513470232486725, |
| "learning_rate": 0.0002, |
| "loss": 1.6431, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.3668078744081734, |
| "grad_norm": 0.7901990413665771, |
| "learning_rate": 0.0002, |
| "loss": 1.764, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.367306254672315, |
| "grad_norm": 0.17642100155353546, |
| "learning_rate": 0.0002, |
| "loss": 1.7096, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.36780463493645654, |
| "grad_norm": 0.14719779789447784, |
| "learning_rate": 0.0002, |
| "loss": 1.6343, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.36830301520059805, |
| "grad_norm": 0.16173601150512695, |
| "learning_rate": 0.0002, |
| "loss": 1.6937, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.3688013954647396, |
| "grad_norm": 0.32359546422958374, |
| "learning_rate": 0.0002, |
| "loss": 1.681, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.3692997757288811, |
| "grad_norm": 0.14779435098171234, |
| "learning_rate": 0.0002, |
| "loss": 1.6745, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.3697981559930227, |
| "grad_norm": 0.19540923833847046, |
| "learning_rate": 0.0002, |
| "loss": 1.5529, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.37029653625716424, |
| "grad_norm": 0.13870155811309814, |
| "learning_rate": 0.0002, |
| "loss": 1.6497, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.37079491652130575, |
| "grad_norm": 0.13447612524032593, |
| "learning_rate": 0.0002, |
| "loss": 1.7275, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.3712932967854473, |
| "grad_norm": 0.13197576999664307, |
| "learning_rate": 0.0002, |
| "loss": 1.6776, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.3717916770495888, |
| "grad_norm": 0.13072870671749115, |
| "learning_rate": 0.0002, |
| "loss": 1.6227, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.3722900573137304, |
| "grad_norm": 0.13418208062648773, |
| "learning_rate": 0.0002, |
| "loss": 1.6998, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.3727884375778719, |
| "grad_norm": 0.11689562350511551, |
| "learning_rate": 0.0002, |
| "loss": 1.6863, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.37328681784201345, |
| "grad_norm": 0.1243453249335289, |
| "learning_rate": 0.0002, |
| "loss": 1.6456, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.373785198106155, |
| "grad_norm": 0.11520450562238693, |
| "learning_rate": 0.0002, |
| "loss": 1.6815, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3742835783702965, |
| "grad_norm": 0.13939018547534943, |
| "learning_rate": 0.0002, |
| "loss": 1.6556, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.3747819586344381, |
| "grad_norm": 0.11021385341882706, |
| "learning_rate": 0.0002, |
| "loss": 1.6923, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.3752803388985796, |
| "grad_norm": 0.11470180004835129, |
| "learning_rate": 0.0002, |
| "loss": 1.6402, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.37577871916272115, |
| "grad_norm": 0.12256886065006256, |
| "learning_rate": 0.0002, |
| "loss": 1.7271, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.3762770994268627, |
| "grad_norm": 0.11696486920118332, |
| "learning_rate": 0.0002, |
| "loss": 1.7069, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.3767754796910042, |
| "grad_norm": 0.11340934783220291, |
| "learning_rate": 0.0002, |
| "loss": 1.6261, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.3772738599551458, |
| "grad_norm": 0.10606078803539276, |
| "learning_rate": 0.0002, |
| "loss": 1.6425, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.3777722402192873, |
| "grad_norm": 0.12084966152906418, |
| "learning_rate": 0.0002, |
| "loss": 1.6273, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.37827062048342885, |
| "grad_norm": 0.1084008663892746, |
| "learning_rate": 0.0002, |
| "loss": 1.6471, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.3787690007475704, |
| "grad_norm": 0.11194922029972076, |
| "learning_rate": 0.0002, |
| "loss": 1.6478, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.3792673810117119, |
| "grad_norm": 0.48235663771629333, |
| "learning_rate": 0.0002, |
| "loss": 1.5982, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.3797657612758535, |
| "grad_norm": 0.586637556552887, |
| "learning_rate": 0.0002, |
| "loss": 1.7294, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.380264141539995, |
| "grad_norm": 0.14328181743621826, |
| "learning_rate": 0.0002, |
| "loss": 1.7112, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.38076252180413656, |
| "grad_norm": 0.13296020030975342, |
| "learning_rate": 0.0002, |
| "loss": 1.7044, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.3812609020682781, |
| "grad_norm": 0.44004350900650024, |
| "learning_rate": 0.0002, |
| "loss": 1.6377, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.3817592823324196, |
| "grad_norm": 0.12628889083862305, |
| "learning_rate": 0.0002, |
| "loss": 1.6192, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.3822576625965612, |
| "grad_norm": 0.1330346316099167, |
| "learning_rate": 0.0002, |
| "loss": 1.6461, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.3827560428607027, |
| "grad_norm": 0.11893340200185776, |
| "learning_rate": 0.0002, |
| "loss": 1.6299, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.38325442312484426, |
| "grad_norm": 0.15412816405296326, |
| "learning_rate": 0.0002, |
| "loss": 1.7436, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.3837528033889858, |
| "grad_norm": 0.12351204454898834, |
| "learning_rate": 0.0002, |
| "loss": 1.6844, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3842511836531273, |
| "grad_norm": 0.11671744287014008, |
| "learning_rate": 0.0002, |
| "loss": 1.6748, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.3847495639172689, |
| "grad_norm": 0.12512736022472382, |
| "learning_rate": 0.0002, |
| "loss": 1.6362, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.3852479441814104, |
| "grad_norm": 0.12629447877407074, |
| "learning_rate": 0.0002, |
| "loss": 1.6033, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.38574632444555196, |
| "grad_norm": 0.11553051322698593, |
| "learning_rate": 0.0002, |
| "loss": 1.6639, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.3862447047096935, |
| "grad_norm": 0.12756189703941345, |
| "learning_rate": 0.0002, |
| "loss": 1.6397, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.386743084973835, |
| "grad_norm": 0.11309953778982162, |
| "learning_rate": 0.0002, |
| "loss": 1.6098, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.3872414652379766, |
| "grad_norm": 0.164617121219635, |
| "learning_rate": 0.0002, |
| "loss": 1.54, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.3877398455021181, |
| "grad_norm": 0.45813101530075073, |
| "learning_rate": 0.0002, |
| "loss": 1.7208, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.38823822576625966, |
| "grad_norm": 0.7587694525718689, |
| "learning_rate": 0.0002, |
| "loss": 1.6195, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.3887366060304012, |
| "grad_norm": 0.12699078023433685, |
| "learning_rate": 0.0002, |
| "loss": 1.6596, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.38923498629454273, |
| "grad_norm": 0.139120951294899, |
| "learning_rate": 0.0002, |
| "loss": 1.6511, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.3897333665586843, |
| "grad_norm": 0.13968676328659058, |
| "learning_rate": 0.0002, |
| "loss": 1.7033, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.3902317468228258, |
| "grad_norm": 0.28061848878860474, |
| "learning_rate": 0.0002, |
| "loss": 1.6016, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.39073012708696736, |
| "grad_norm": 0.11748450994491577, |
| "learning_rate": 0.0002, |
| "loss": 1.5984, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.3912285073511089, |
| "grad_norm": 0.7288643717765808, |
| "learning_rate": 0.0002, |
| "loss": 1.769, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.39172688761525043, |
| "grad_norm": 0.12540021538734436, |
| "learning_rate": 0.0002, |
| "loss": 1.6622, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.392225267879392, |
| "grad_norm": 0.13594292104244232, |
| "learning_rate": 0.0002, |
| "loss": 1.6626, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.3927236481435335, |
| "grad_norm": 0.12894773483276367, |
| "learning_rate": 0.0002, |
| "loss": 1.5733, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.39322202840767506, |
| "grad_norm": 0.6577300429344177, |
| "learning_rate": 0.0002, |
| "loss": 1.8085, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.39372040867181657, |
| "grad_norm": 0.12034627795219421, |
| "learning_rate": 0.0002, |
| "loss": 1.5798, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.39421878893595813, |
| "grad_norm": 0.1254388988018036, |
| "learning_rate": 0.0002, |
| "loss": 1.6677, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.3947171692000997, |
| "grad_norm": 0.136959508061409, |
| "learning_rate": 0.0002, |
| "loss": 1.6108, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.3952155494642412, |
| "grad_norm": 0.37221673130989075, |
| "learning_rate": 0.0002, |
| "loss": 1.826, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.39571392972838276, |
| "grad_norm": 0.14947831630706787, |
| "learning_rate": 0.0002, |
| "loss": 1.6967, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.39621230999252427, |
| "grad_norm": 0.1409454494714737, |
| "learning_rate": 0.0002, |
| "loss": 1.7217, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.39671069025666583, |
| "grad_norm": 0.1448691040277481, |
| "learning_rate": 0.0002, |
| "loss": 1.7872, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.3972090705208074, |
| "grad_norm": 0.12816311419010162, |
| "learning_rate": 0.0002, |
| "loss": 1.6976, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.3977074507849489, |
| "grad_norm": 0.12581898272037506, |
| "learning_rate": 0.0002, |
| "loss": 1.7111, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.39820583104909046, |
| "grad_norm": 0.1256158947944641, |
| "learning_rate": 0.0002, |
| "loss": 1.6778, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.39870421131323197, |
| "grad_norm": 0.12009266763925552, |
| "learning_rate": 0.0002, |
| "loss": 1.6336, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.39920259157737353, |
| "grad_norm": 0.14727051556110382, |
| "learning_rate": 0.0002, |
| "loss": 1.7165, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.3997009718415151, |
| "grad_norm": 1.98500394821167, |
| "learning_rate": 0.0002, |
| "loss": 1.9632, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.4001993521056566, |
| "grad_norm": 0.12300129979848862, |
| "learning_rate": 0.0002, |
| "loss": 1.6003, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.40069773236979817, |
| "grad_norm": 0.13758836686611176, |
| "learning_rate": 0.0002, |
| "loss": 1.6486, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.40119611263393967, |
| "grad_norm": 0.13127754628658295, |
| "learning_rate": 0.0002, |
| "loss": 1.6673, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.40169449289808123, |
| "grad_norm": 0.13612794876098633, |
| "learning_rate": 0.0002, |
| "loss": 1.7149, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.4021928731622228, |
| "grad_norm": 0.3637385964393616, |
| "learning_rate": 0.0002, |
| "loss": 1.6486, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.4026912534263643, |
| "grad_norm": 0.19778436422348022, |
| "learning_rate": 0.0002, |
| "loss": 1.5517, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.40318963369050587, |
| "grad_norm": 0.1478605717420578, |
| "learning_rate": 0.0002, |
| "loss": 1.7642, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.4036880139546474, |
| "grad_norm": 0.3014202415943146, |
| "learning_rate": 0.0002, |
| "loss": 1.6141, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.40418639421878894, |
| "grad_norm": 0.13049842417240143, |
| "learning_rate": 0.0002, |
| "loss": 1.6579, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.4046847744829305, |
| "grad_norm": 0.932788610458374, |
| "learning_rate": 0.0002, |
| "loss": 1.7722, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.405183154747072, |
| "grad_norm": 0.1687835305929184, |
| "learning_rate": 0.0002, |
| "loss": 1.6492, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.40568153501121357, |
| "grad_norm": 0.2024388164281845, |
| "learning_rate": 0.0002, |
| "loss": 1.5523, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.4061799152753551, |
| "grad_norm": 0.20838886499404907, |
| "learning_rate": 0.0002, |
| "loss": 1.6884, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.40667829553949664, |
| "grad_norm": 0.1490757167339325, |
| "learning_rate": 0.0002, |
| "loss": 1.6936, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.4071766758036382, |
| "grad_norm": 1.1997255086898804, |
| "learning_rate": 0.0002, |
| "loss": 1.873, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.4076750560677797, |
| "grad_norm": 0.139000803232193, |
| "learning_rate": 0.0002, |
| "loss": 1.7303, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.40817343633192127, |
| "grad_norm": 0.14747615158557892, |
| "learning_rate": 0.0002, |
| "loss": 1.6558, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.4086718165960628, |
| "grad_norm": 0.15866988897323608, |
| "learning_rate": 0.0002, |
| "loss": 1.6991, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.40917019686020434, |
| "grad_norm": 0.14660963416099548, |
| "learning_rate": 0.0002, |
| "loss": 1.7233, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.4096685771243459, |
| "grad_norm": 0.14071424305438995, |
| "learning_rate": 0.0002, |
| "loss": 1.6434, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.4101669573884874, |
| "grad_norm": 0.1368856132030487, |
| "learning_rate": 0.0002, |
| "loss": 1.6415, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.41066533765262897, |
| "grad_norm": 0.14662376046180725, |
| "learning_rate": 0.0002, |
| "loss": 1.7111, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.4111637179167705, |
| "grad_norm": 0.14027300477027893, |
| "learning_rate": 0.0002, |
| "loss": 1.6698, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.41166209818091204, |
| "grad_norm": 0.5542290210723877, |
| "learning_rate": 0.0002, |
| "loss": 1.6551, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.4121604784450536, |
| "grad_norm": 0.15360352396965027, |
| "learning_rate": 0.0002, |
| "loss": 1.7313, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.4126588587091951, |
| "grad_norm": 0.14451801776885986, |
| "learning_rate": 0.0002, |
| "loss": 1.6481, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.41315723897333667, |
| "grad_norm": 0.1393883228302002, |
| "learning_rate": 0.0002, |
| "loss": 1.5922, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.4136556192374782, |
| "grad_norm": 0.13610626757144928, |
| "learning_rate": 0.0002, |
| "loss": 1.6347, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.41415399950161974, |
| "grad_norm": 0.12424327433109283, |
| "learning_rate": 0.0002, |
| "loss": 1.6563, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.4146523797657613, |
| "grad_norm": 0.127548947930336, |
| "learning_rate": 0.0002, |
| "loss": 1.6609, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.4151507600299028, |
| "grad_norm": 0.1881740391254425, |
| "learning_rate": 0.0002, |
| "loss": 1.7251, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.4156491402940444, |
| "grad_norm": 0.12144262343645096, |
| "learning_rate": 0.0002, |
| "loss": 1.6922, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.4161475205581859, |
| "grad_norm": 0.11799559742212296, |
| "learning_rate": 0.0002, |
| "loss": 1.672, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.41664590082232744, |
| "grad_norm": 0.12129071354866028, |
| "learning_rate": 0.0002, |
| "loss": 1.6189, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.41714428108646895, |
| "grad_norm": 0.11648084223270416, |
| "learning_rate": 0.0002, |
| "loss": 1.636, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.4176426613506105, |
| "grad_norm": 0.11401843279600143, |
| "learning_rate": 0.0002, |
| "loss": 1.6266, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.4181410416147521, |
| "grad_norm": 0.11244560778141022, |
| "learning_rate": 0.0002, |
| "loss": 1.6338, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.4186394218788936, |
| "grad_norm": 0.11274567991495132, |
| "learning_rate": 0.0002, |
| "loss": 1.5518, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.41913780214303514, |
| "grad_norm": 0.11203539371490479, |
| "learning_rate": 0.0002, |
| "loss": 1.6372, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.41963618240717665, |
| "grad_norm": 0.11548861116170883, |
| "learning_rate": 0.0002, |
| "loss": 1.5787, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.4201345626713182, |
| "grad_norm": 0.10921257734298706, |
| "learning_rate": 0.0002, |
| "loss": 1.6457, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.4206329429354598, |
| "grad_norm": 0.10832211375236511, |
| "learning_rate": 0.0002, |
| "loss": 1.6613, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.4211313231996013, |
| "grad_norm": 0.11785157024860382, |
| "learning_rate": 0.0002, |
| "loss": 1.6687, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.42162970346374284, |
| "grad_norm": 0.1575067639350891, |
| "learning_rate": 0.0002, |
| "loss": 1.7148, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.42212808372788435, |
| "grad_norm": 0.5687432885169983, |
| "learning_rate": 0.0002, |
| "loss": 1.8016, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.4226264639920259, |
| "grad_norm": 0.887058675289154, |
| "learning_rate": 0.0002, |
| "loss": 1.7988, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.4231248442561675, |
| "grad_norm": 0.12778295576572418, |
| "learning_rate": 0.0002, |
| "loss": 1.6586, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.423623224520309, |
| "grad_norm": 0.13481804728507996, |
| "learning_rate": 0.0002, |
| "loss": 1.696, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.42412160478445055, |
| "grad_norm": 0.1478685438632965, |
| "learning_rate": 0.0002, |
| "loss": 1.6758, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.42461998504859205, |
| "grad_norm": 0.13414372503757477, |
| "learning_rate": 0.0002, |
| "loss": 1.657, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.4251183653127336, |
| "grad_norm": 0.13211821019649506, |
| "learning_rate": 0.0002, |
| "loss": 1.6403, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.4256167455768752, |
| "grad_norm": 0.13594435155391693, |
| "learning_rate": 0.0002, |
| "loss": 1.6363, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.4261151258410167, |
| "grad_norm": 0.13266883790493011, |
| "learning_rate": 0.0002, |
| "loss": 1.6632, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.42661350610515825, |
| "grad_norm": 0.12024448066949844, |
| "learning_rate": 0.0002, |
| "loss": 1.6745, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.42711188636929975, |
| "grad_norm": 0.12828536331653595, |
| "learning_rate": 0.0002, |
| "loss": 1.6493, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.4276102666334413, |
| "grad_norm": 0.12315808236598969, |
| "learning_rate": 0.0002, |
| "loss": 1.6803, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.4281086468975829, |
| "grad_norm": 0.13026510179042816, |
| "learning_rate": 0.0002, |
| "loss": 1.6536, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.4286070271617244, |
| "grad_norm": 0.45274946093559265, |
| "learning_rate": 0.0002, |
| "loss": 1.7579, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.42910540742586595, |
| "grad_norm": 0.12899275124073029, |
| "learning_rate": 0.0002, |
| "loss": 1.6603, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.42960378769000745, |
| "grad_norm": 0.12414630502462387, |
| "learning_rate": 0.0002, |
| "loss": 1.6933, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.430102167954149, |
| "grad_norm": 0.146366149187088, |
| "learning_rate": 0.0002, |
| "loss": 1.6799, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.4306005482182906, |
| "grad_norm": 0.11743781715631485, |
| "learning_rate": 0.0002, |
| "loss": 1.6395, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.4310989284824321, |
| "grad_norm": 0.15248535573482513, |
| "learning_rate": 0.0002, |
| "loss": 1.7598, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.43159730874657365, |
| "grad_norm": 0.11914569139480591, |
| "learning_rate": 0.0002, |
| "loss": 1.663, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.43209568901071516, |
| "grad_norm": 0.11982624977827072, |
| "learning_rate": 0.0002, |
| "loss": 1.651, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.4325940692748567, |
| "grad_norm": 0.12126267701387405, |
| "learning_rate": 0.0002, |
| "loss": 1.7153, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.4330924495389983, |
| "grad_norm": 0.3660570979118347, |
| "learning_rate": 0.0002, |
| "loss": 1.6142, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.4335908298031398, |
| "grad_norm": 0.11174522340297699, |
| "learning_rate": 0.0002, |
| "loss": 1.6199, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.43408921006728135, |
| "grad_norm": 0.12089698761701584, |
| "learning_rate": 0.0002, |
| "loss": 1.7026, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.43458759033142286, |
| "grad_norm": 0.11779413372278214, |
| "learning_rate": 0.0002, |
| "loss": 1.6757, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.4350859705955644, |
| "grad_norm": 0.11461353302001953, |
| "learning_rate": 0.0002, |
| "loss": 1.6943, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.435584350859706, |
| "grad_norm": 0.1294202357530594, |
| "learning_rate": 0.0002, |
| "loss": 1.7078, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.4360827311238475, |
| "grad_norm": 0.1081145629286766, |
| "learning_rate": 0.0002, |
| "loss": 1.6078, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.43658111138798905, |
| "grad_norm": 0.11721238493919373, |
| "learning_rate": 0.0002, |
| "loss": 1.6056, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.43707949165213056, |
| "grad_norm": 0.11436528712511063, |
| "learning_rate": 0.0002, |
| "loss": 1.6806, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.4375778719162721, |
| "grad_norm": 0.11401306092739105, |
| "learning_rate": 0.0002, |
| "loss": 1.7225, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.4380762521804137, |
| "grad_norm": 0.11282623559236526, |
| "learning_rate": 0.0002, |
| "loss": 1.6614, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.4385746324445552, |
| "grad_norm": 0.11592991650104523, |
| "learning_rate": 0.0002, |
| "loss": 1.5984, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.43907301270869675, |
| "grad_norm": 0.10579363256692886, |
| "learning_rate": 0.0002, |
| "loss": 1.6349, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.43957139297283826, |
| "grad_norm": 0.1032218486070633, |
| "learning_rate": 0.0002, |
| "loss": 1.6017, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.4400697732369798, |
| "grad_norm": 0.10277747362852097, |
| "learning_rate": 0.0002, |
| "loss": 1.6396, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.44056815350112133, |
| "grad_norm": 0.12377838790416718, |
| "learning_rate": 0.0002, |
| "loss": 1.6298, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.4410665337652629, |
| "grad_norm": 0.10326054692268372, |
| "learning_rate": 0.0002, |
| "loss": 1.6335, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.44156491402940445, |
| "grad_norm": 0.10518341511487961, |
| "learning_rate": 0.0002, |
| "loss": 1.6343, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.44206329429354596, |
| "grad_norm": 0.10297736525535583, |
| "learning_rate": 0.0002, |
| "loss": 1.622, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.4425616745576875, |
| "grad_norm": 0.10891593992710114, |
| "learning_rate": 0.0002, |
| "loss": 1.6928, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.44306005482182903, |
| "grad_norm": 0.10570312291383743, |
| "learning_rate": 0.0002, |
| "loss": 1.5769, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.4435584350859706, |
| "grad_norm": 0.10274644941091537, |
| "learning_rate": 0.0002, |
| "loss": 1.7139, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.44405681535011216, |
| "grad_norm": 0.11095419526100159, |
| "learning_rate": 0.0002, |
| "loss": 1.6141, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.44455519561425366, |
| "grad_norm": 0.14802560210227966, |
| "learning_rate": 0.0002, |
| "loss": 1.6019, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4450535758783952, |
| "grad_norm": 0.10468854010105133, |
| "learning_rate": 0.0002, |
| "loss": 1.5875, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.44555195614253673, |
| "grad_norm": 0.10267975926399231, |
| "learning_rate": 0.0002, |
| "loss": 1.6071, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.4460503364066783, |
| "grad_norm": 0.10226966440677643, |
| "learning_rate": 0.0002, |
| "loss": 1.6654, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.44654871667081986, |
| "grad_norm": 0.1046745628118515, |
| "learning_rate": 0.0002, |
| "loss": 1.6244, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.44704709693496136, |
| "grad_norm": 0.5514235496520996, |
| "learning_rate": 0.0002, |
| "loss": 1.6949, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.4475454771991029, |
| "grad_norm": 0.10770034044981003, |
| "learning_rate": 0.0002, |
| "loss": 1.6388, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.44804385746324443, |
| "grad_norm": 0.1274634599685669, |
| "learning_rate": 0.0002, |
| "loss": 1.7169, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.448542237727386, |
| "grad_norm": 0.11944198608398438, |
| "learning_rate": 0.0002, |
| "loss": 1.635, |
| "step": 900 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 4012, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.024817808581591e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|