| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 1079, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0009267840593141798, | |
| "grad_norm": 356.406982421875, | |
| "learning_rate": 0.005, | |
| "loss": 15.9, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0018535681186283596, | |
| "grad_norm": 32.9332389831543, | |
| "learning_rate": 0.0049999894033994794, | |
| "loss": 13.6, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0027803521779425394, | |
| "grad_norm": 10.453313827514648, | |
| "learning_rate": 0.004999957613687751, | |
| "loss": 21.425, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0037071362372567192, | |
| "grad_norm": 3.510478973388672, | |
| "learning_rate": 0.004999904631134301, | |
| "loss": 15.225, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.004633920296570899, | |
| "grad_norm": 35.607364654541016, | |
| "learning_rate": 0.004999830456188281, | |
| "loss": 18.325, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.005560704355885079, | |
| "grad_norm": 4.46471643447876, | |
| "learning_rate": 0.004999735089478491, | |
| "loss": 19.7, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.006487488415199258, | |
| "grad_norm": 1.207599401473999, | |
| "learning_rate": 0.004999618531813382, | |
| "loss": 14.125, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0074142724745134385, | |
| "grad_norm": 46.56653594970703, | |
| "learning_rate": 0.004999480784181046, | |
| "loss": 32.7, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.008341056533827619, | |
| "grad_norm": 2.0620079040527344, | |
| "learning_rate": 0.004999321847749208, | |
| "loss": 13.4, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.009267840593141797, | |
| "grad_norm": 3.376063823699951, | |
| "learning_rate": 0.0049991417238652155, | |
| "loss": 13.3, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.010194624652455977, | |
| "grad_norm": 0.6672539710998535, | |
| "learning_rate": 0.004998940414056032, | |
| "loss": 13.4375, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.011121408711770158, | |
| "grad_norm": 0.4186709523200989, | |
| "learning_rate": 0.004998717920028215, | |
| "loss": 12.6375, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.012048192771084338, | |
| "grad_norm": 0.4992158114910126, | |
| "learning_rate": 0.00499847424366791, | |
| "loss": 11.6625, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.012974976830398516, | |
| "grad_norm": 0.21440155804157257, | |
| "learning_rate": 0.004998209387040828, | |
| "loss": 10.5375, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.013901760889712697, | |
| "grad_norm": 2.2223408222198486, | |
| "learning_rate": 0.004997923352392236, | |
| "loss": 11.6, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.014828544949026877, | |
| "grad_norm": 1.4461462497711182, | |
| "learning_rate": 0.004997616142146927, | |
| "loss": 12.7125, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.015755329008341055, | |
| "grad_norm": 1.9746646881103516, | |
| "learning_rate": 0.004997287758909209, | |
| "loss": 12.2125, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.016682113067655237, | |
| "grad_norm": 8.858609199523926, | |
| "learning_rate": 0.004996938205462881, | |
| "loss": 14.0625, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.017608897126969416, | |
| "grad_norm": 0.9914843440055847, | |
| "learning_rate": 0.004996567484771203, | |
| "loss": 11.35, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.018535681186283594, | |
| "grad_norm": 0.8945605158805847, | |
| "learning_rate": 0.004996175599976878, | |
| "loss": 11.725, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.019462465245597776, | |
| "grad_norm": 1.340647578239441, | |
| "learning_rate": 0.004995762554402026, | |
| "loss": 12.8875, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.020389249304911955, | |
| "grad_norm": 0.6224690079689026, | |
| "learning_rate": 0.004995328351548148, | |
| "loss": 11.7, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.021316033364226137, | |
| "grad_norm": 0.6904886960983276, | |
| "learning_rate": 0.004994872995096104, | |
| "loss": 10.6375, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.022242817423540315, | |
| "grad_norm": 0.7552493214607239, | |
| "learning_rate": 0.004994396488906078, | |
| "loss": 13.275, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.023169601482854494, | |
| "grad_norm": 0.1830722540616989, | |
| "learning_rate": 0.004993898837017547, | |
| "loss": 10.225, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.024096385542168676, | |
| "grad_norm": 0.31753918528556824, | |
| "learning_rate": 0.004993380043649245, | |
| "loss": 10.0875, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.025023169601482854, | |
| "grad_norm": 0.17651186883449554, | |
| "learning_rate": 0.00499284011319913, | |
| "loss": 9.675, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.025949953660797033, | |
| "grad_norm": 0.1835695058107376, | |
| "learning_rate": 0.004992279050244343, | |
| "loss": 9.625, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.026876737720111215, | |
| "grad_norm": 0.15531466901302338, | |
| "learning_rate": 0.004991696859541173, | |
| "loss": 9.525, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.027803521779425393, | |
| "grad_norm": 0.1167324110865593, | |
| "learning_rate": 0.004991093546025012, | |
| "loss": 9.3375, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.028730305838739572, | |
| "grad_norm": 0.06774014979600906, | |
| "learning_rate": 0.004990469114810318, | |
| "loss": 9.275, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.029657089898053754, | |
| "grad_norm": 0.11318591982126236, | |
| "learning_rate": 0.004989823571190571, | |
| "loss": 9.2875, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.030583873957367932, | |
| "grad_norm": 0.039967115968465805, | |
| "learning_rate": 0.004989156920638226, | |
| "loss": 9.225, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.03151065801668211, | |
| "grad_norm": 0.07919777184724808, | |
| "learning_rate": 0.004988469168804664, | |
| "loss": 9.2375, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.03243744207599629, | |
| "grad_norm": 0.04368596524000168, | |
| "learning_rate": 0.0049877603215201525, | |
| "loss": 9.1875, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.033364226135310475, | |
| "grad_norm": 0.04921940341591835, | |
| "learning_rate": 0.004987030384793787, | |
| "loss": 9.1875, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.03429101019462465, | |
| "grad_norm": 0.040833037346601486, | |
| "learning_rate": 0.0049862793648134465, | |
| "loss": 9.1625, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.03521779425393883, | |
| "grad_norm": 0.03423991799354553, | |
| "learning_rate": 0.004985507267945738, | |
| "loss": 9.1125, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.03614457831325301, | |
| "grad_norm": 0.04628804698586464, | |
| "learning_rate": 0.004984714100735943, | |
| "loss": 9.1375, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.03707136237256719, | |
| "grad_norm": 0.02513456903398037, | |
| "learning_rate": 0.0049838998699079625, | |
| "loss": 9.125, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.037998146431881374, | |
| "grad_norm": 0.04390294477343559, | |
| "learning_rate": 0.00498306458236426, | |
| "loss": 9.125, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.03892493049119555, | |
| "grad_norm": 0.02223977819085121, | |
| "learning_rate": 0.004982208245185801, | |
| "loss": 9.1125, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.03985171455050973, | |
| "grad_norm": 0.03464260324835777, | |
| "learning_rate": 0.004981330865631997, | |
| "loss": 9.1125, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.04077849860982391, | |
| "grad_norm": 0.0259235929697752, | |
| "learning_rate": 0.00498043245114064, | |
| "loss": 9.0625, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.04170528266913809, | |
| "grad_norm": 0.023725276812911034, | |
| "learning_rate": 0.004979513009327842, | |
| "loss": 9.1, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.042632066728452274, | |
| "grad_norm": 0.022491367533802986, | |
| "learning_rate": 0.004978572547987968, | |
| "loss": 9.05, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.04355885078776645, | |
| "grad_norm": 0.018162831664085388, | |
| "learning_rate": 0.004977611075093574, | |
| "loss": 9.0875, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.04448563484708063, | |
| "grad_norm": 0.033248819410800934, | |
| "learning_rate": 0.004976628598795336, | |
| "loss": 9.025, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.04541241890639481, | |
| "grad_norm": 0.015689486637711525, | |
| "learning_rate": 0.0049756251274219775, | |
| "loss": 9.0625, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.04633920296570899, | |
| "grad_norm": 0.022721588611602783, | |
| "learning_rate": 0.00497460066948021, | |
| "loss": 9.0375, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.047265987025023166, | |
| "grad_norm": 0.020086370408535004, | |
| "learning_rate": 0.00497355523365465, | |
| "loss": 9.0625, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.04819277108433735, | |
| "grad_norm": 0.01713702268898487, | |
| "learning_rate": 0.00497248882880775, | |
| "loss": 9.0375, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.04911955514365153, | |
| "grad_norm": 0.01819983310997486, | |
| "learning_rate": 0.004971401463979721, | |
| "loss": 9.0375, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.05004633920296571, | |
| "grad_norm": 0.01858202926814556, | |
| "learning_rate": 0.004970293148388463, | |
| "loss": 9.0125, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.05097312326227989, | |
| "grad_norm": 0.016383878886699677, | |
| "learning_rate": 0.004969163891429476, | |
| "loss": 9.0, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.051899907321594066, | |
| "grad_norm": 0.01655055209994316, | |
| "learning_rate": 0.0049680137026757885, | |
| "loss": 9.025, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.05282669138090825, | |
| "grad_norm": 0.01438821293413639, | |
| "learning_rate": 0.004966842591877872, | |
| "loss": 9.0, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.05375347544022243, | |
| "grad_norm": 0.01816794089972973, | |
| "learning_rate": 0.004965650568963563, | |
| "loss": 9.0, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.05468025949953661, | |
| "grad_norm": 0.017415305599570274, | |
| "learning_rate": 0.004964437644037973, | |
| "loss": 8.9625, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.05560704355885079, | |
| "grad_norm": 0.017612161114811897, | |
| "learning_rate": 0.004963203827383406, | |
| "loss": 8.975, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.056533827618164965, | |
| "grad_norm": 0.014700948260724545, | |
| "learning_rate": 0.0049619491294592725, | |
| "loss": 9.0, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.057460611677479144, | |
| "grad_norm": 0.0167540330439806, | |
| "learning_rate": 0.004960673560901999, | |
| "loss": 8.9875, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.05838739573679333, | |
| "grad_norm": 0.029445504769682884, | |
| "learning_rate": 0.004959377132524938, | |
| "loss": 8.9625, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.05931417979610751, | |
| "grad_norm": 0.013282664120197296, | |
| "learning_rate": 0.004958059855318275, | |
| "loss": 8.9625, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.060240963855421686, | |
| "grad_norm": 0.019158177077770233, | |
| "learning_rate": 0.00495672174044894, | |
| "loss": 8.9, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.061167747914735865, | |
| "grad_norm": 0.02090335451066494, | |
| "learning_rate": 0.004955362799260506, | |
| "loss": 8.9125, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.06209453197405004, | |
| "grad_norm": 0.019786162301898003, | |
| "learning_rate": 0.004953983043273102, | |
| "loss": 8.95, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.06302131603336422, | |
| "grad_norm": 0.0192793570458889, | |
| "learning_rate": 0.004952582484183302, | |
| "loss": 8.925, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.0639481000926784, | |
| "grad_norm": 0.029085692018270493, | |
| "learning_rate": 0.0049511611338640404, | |
| "loss": 8.9625, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.06487488415199258, | |
| "grad_norm": 0.028297357261180878, | |
| "learning_rate": 0.004949719004364503, | |
| "loss": 8.925, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.06580166821130677, | |
| "grad_norm": 0.013140903785824776, | |
| "learning_rate": 0.0049482561079100245, | |
| "loss": 8.925, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.06672845227062095, | |
| "grad_norm": 0.016508571803569794, | |
| "learning_rate": 0.004946772456901989, | |
| "loss": 8.95, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.06765523632993513, | |
| "grad_norm": 0.028362734243273735, | |
| "learning_rate": 0.004945268063917723, | |
| "loss": 8.9375, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.0685820203892493, | |
| "grad_norm": 0.028645526617765427, | |
| "learning_rate": 0.004943742941710386, | |
| "loss": 8.9375, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.06950880444856349, | |
| "grad_norm": 0.010765830054879189, | |
| "learning_rate": 0.004942197103208867, | |
| "loss": 8.925, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.07043558850787766, | |
| "grad_norm": 0.022227909415960312, | |
| "learning_rate": 0.004940630561517674, | |
| "loss": 8.9375, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.07136237256719184, | |
| "grad_norm": 0.020959695801138878, | |
| "learning_rate": 0.004939043329916819, | |
| "loss": 8.95, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.07228915662650602, | |
| "grad_norm": 0.01679840497672558, | |
| "learning_rate": 0.00493743542186171, | |
| "loss": 8.925, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.0732159406858202, | |
| "grad_norm": 0.01441862341016531, | |
| "learning_rate": 0.004935806850983033, | |
| "loss": 8.9125, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.07414272474513438, | |
| "grad_norm": 0.014738287776708603, | |
| "learning_rate": 0.004934157631086642, | |
| "loss": 8.9, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.07506950880444857, | |
| "grad_norm": 0.013974464498460293, | |
| "learning_rate": 0.004932487776153435, | |
| "loss": 8.875, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.07599629286376275, | |
| "grad_norm": 0.014242907054722309, | |
| "learning_rate": 0.004930797300339241, | |
| "loss": 8.8875, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 0.014142482541501522, | |
| "learning_rate": 0.004929086217974697, | |
| "loss": 8.875, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.0778498609823911, | |
| "grad_norm": 0.011345421895384789, | |
| "learning_rate": 0.0049273545435651305, | |
| "loss": 8.9, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.07877664504170528, | |
| "grad_norm": 0.01937839388847351, | |
| "learning_rate": 0.004925602291790427, | |
| "loss": 8.875, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.07970342910101946, | |
| "grad_norm": 0.019322404637932777, | |
| "learning_rate": 0.0049238294775049195, | |
| "loss": 8.875, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.08063021316033364, | |
| "grad_norm": 0.02427850104868412, | |
| "learning_rate": 0.004922036115737251, | |
| "loss": 8.875, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.08155699721964782, | |
| "grad_norm": 0.02773062139749527, | |
| "learning_rate": 0.0049202222216902505, | |
| "loss": 8.875, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.082483781278962, | |
| "grad_norm": 0.022121064364910126, | |
| "learning_rate": 0.0049183878107408084, | |
| "loss": 8.875, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.08341056533827618, | |
| "grad_norm": 0.014306942000985146, | |
| "learning_rate": 0.00491653289843974, | |
| "loss": 8.85, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.08433734939759036, | |
| "grad_norm": 0.01174082513898611, | |
| "learning_rate": 0.004914657500511657, | |
| "loss": 8.85, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.08526413345690455, | |
| "grad_norm": 0.017720786854624748, | |
| "learning_rate": 0.004912761632854833, | |
| "loss": 8.8625, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.08619091751621873, | |
| "grad_norm": 0.023863809183239937, | |
| "learning_rate": 0.004910845311541071, | |
| "loss": 8.8625, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.0871177015755329, | |
| "grad_norm": 0.034596893936395645, | |
| "learning_rate": 0.004908908552815563, | |
| "loss": 8.8625, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.08804448563484708, | |
| "grad_norm": 0.04321544989943504, | |
| "learning_rate": 0.004906951373096757, | |
| "loss": 8.85, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.08897126969416126, | |
| "grad_norm": 0.05180607736110687, | |
| "learning_rate": 0.004904973788976213, | |
| "loss": 8.8625, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.08989805375347544, | |
| "grad_norm": 0.04927121847867966, | |
| "learning_rate": 0.004902975817218467, | |
| "loss": 8.825, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.09082483781278962, | |
| "grad_norm": 0.030304012820124626, | |
| "learning_rate": 0.004900957474760885, | |
| "loss": 8.825, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.0917516218721038, | |
| "grad_norm": 0.018640510737895966, | |
| "learning_rate": 0.004898918778713524, | |
| "loss": 8.8, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.09267840593141798, | |
| "grad_norm": 0.033853888511657715, | |
| "learning_rate": 0.004896859746358979, | |
| "loss": 8.7875, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09360518999073215, | |
| "grad_norm": 0.04043276980519295, | |
| "learning_rate": 0.004894780395152247, | |
| "loss": 8.775, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.09453197405004633, | |
| "grad_norm": 0.0534222349524498, | |
| "learning_rate": 0.004892680742720571, | |
| "loss": 8.7375, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.09545875810936053, | |
| "grad_norm": 0.082061268389225, | |
| "learning_rate": 0.004890560806863293, | |
| "loss": 8.8, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.0963855421686747, | |
| "grad_norm": 0.05508153885602951, | |
| "learning_rate": 0.004888420605551703, | |
| "loss": 8.775, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.09731232622798888, | |
| "grad_norm": 0.04220907762646675, | |
| "learning_rate": 0.004886260156928888, | |
| "loss": 8.7625, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.09823911028730306, | |
| "grad_norm": 0.04727254807949066, | |
| "learning_rate": 0.004884079479309578, | |
| "loss": 8.7875, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.09916589434661724, | |
| "grad_norm": 0.04981837049126625, | |
| "learning_rate": 0.004881878591179988, | |
| "loss": 8.75, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.10009267840593142, | |
| "grad_norm": 0.039716847240924835, | |
| "learning_rate": 0.004879657511197662, | |
| "loss": 8.675, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1010194624652456, | |
| "grad_norm": 0.028658628463745117, | |
| "learning_rate": 0.0048774162581913215, | |
| "loss": 8.675, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.10194624652455977, | |
| "grad_norm": 0.03913936764001846, | |
| "learning_rate": 0.0048751548511606945, | |
| "loss": 8.6625, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.10287303058387395, | |
| "grad_norm": 0.027623698115348816, | |
| "learning_rate": 0.004872873309276362, | |
| "loss": 8.6625, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.10379981464318813, | |
| "grad_norm": 0.0399942547082901, | |
| "learning_rate": 0.004870571651879596, | |
| "loss": 8.6625, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.10472659870250231, | |
| "grad_norm": 0.02140922099351883, | |
| "learning_rate": 0.00486824989848219, | |
| "loss": 8.5875, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.1056533827618165, | |
| "grad_norm": 0.0371641181409359, | |
| "learning_rate": 0.0048659080687663, | |
| "loss": 8.6, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.10658016682113068, | |
| "grad_norm": 0.018301891162991524, | |
| "learning_rate": 0.004863546182584273, | |
| "loss": 8.575, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.10750695088044486, | |
| "grad_norm": 0.029274851083755493, | |
| "learning_rate": 0.0048611642599584795, | |
| "loss": 8.55, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.10843373493975904, | |
| "grad_norm": 0.025735612958669662, | |
| "learning_rate": 0.004858762321081146, | |
| "loss": 8.525, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.10936051899907322, | |
| "grad_norm": 0.036481715738773346, | |
| "learning_rate": 0.004856340386314182, | |
| "loss": 8.4875, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.1102873030583874, | |
| "grad_norm": 0.11254877597093582, | |
| "learning_rate": 0.004853898476189007, | |
| "loss": 8.5375, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.11121408711770157, | |
| "grad_norm": 0.19445450603961945, | |
| "learning_rate": 0.00485143661140638, | |
| "loss": 8.85, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.11214087117701575, | |
| "grad_norm": 0.16596297919750214, | |
| "learning_rate": 0.004848954812836217, | |
| "loss": 8.7625, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.11306765523632993, | |
| "grad_norm": 0.044869761914014816, | |
| "learning_rate": 0.004846453101517421, | |
| "loss": 8.5125, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.11399443929564411, | |
| "grad_norm": 0.08229261636734009, | |
| "learning_rate": 0.0048439314986577, | |
| "loss": 8.6, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.11492122335495829, | |
| "grad_norm": 0.04814854636788368, | |
| "learning_rate": 0.00484139002563339, | |
| "loss": 8.475, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.11584800741427248, | |
| "grad_norm": 0.07902152091264725, | |
| "learning_rate": 0.004838828703989269, | |
| "loss": 8.55, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.11677479147358666, | |
| "grad_norm": 0.02725468948483467, | |
| "learning_rate": 0.0048362475554383786, | |
| "loss": 8.4, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.11770157553290084, | |
| "grad_norm": 0.05269164219498634, | |
| "learning_rate": 0.004833646601861841, | |
| "loss": 8.4375, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.11862835959221502, | |
| "grad_norm": 0.03333018347620964, | |
| "learning_rate": 0.004831025865308667, | |
| "loss": 8.3625, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.1195551436515292, | |
| "grad_norm": 0.040032755583524704, | |
| "learning_rate": 0.004828385367995575, | |
| "loss": 8.325, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.12048192771084337, | |
| "grad_norm": 0.03257158771157265, | |
| "learning_rate": 0.004825725132306803, | |
| "loss": 8.2625, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.12140871177015755, | |
| "grad_norm": 0.03259531036019325, | |
| "learning_rate": 0.0048230451807939135, | |
| "loss": 8.225, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.12233549582947173, | |
| "grad_norm": 0.03383934497833252, | |
| "learning_rate": 0.004820345536175607, | |
| "loss": 8.2, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.12326227988878591, | |
| "grad_norm": 0.02867773361504078, | |
| "learning_rate": 0.004817626221337529, | |
| "loss": 8.15, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.12418906394810009, | |
| "grad_norm": 0.03943765163421631, | |
| "learning_rate": 0.004814887259332073, | |
| "loss": 8.125, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.12511584800741427, | |
| "grad_norm": 0.034471139311790466, | |
| "learning_rate": 0.004812128673378188, | |
| "loss": 7.9875, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.12604263206672844, | |
| "grad_norm": 0.03869534283876419, | |
| "learning_rate": 0.004809350486861181, | |
| "loss": 7.95, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.12696941612604262, | |
| "grad_norm": 0.03380202502012253, | |
| "learning_rate": 0.0048065527233325175, | |
| "loss": 7.875, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.1278962001853568, | |
| "grad_norm": 0.03459366410970688, | |
| "learning_rate": 0.004803735406509625, | |
| "loss": 7.7812, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.12882298424467098, | |
| "grad_norm": 0.0600280836224556, | |
| "learning_rate": 0.0048008985602756874, | |
| "loss": 7.65, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.12974976830398516, | |
| "grad_norm": 0.11870339512825012, | |
| "learning_rate": 0.004798042208679445, | |
| "loss": 7.6375, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.13067655236329936, | |
| "grad_norm": 0.1849852204322815, | |
| "learning_rate": 0.0047951663759349915, | |
| "loss": 7.7, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.13160333642261354, | |
| "grad_norm": 0.15893682837486267, | |
| "learning_rate": 0.0047922710864215685, | |
| "loss": 7.6375, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.13253012048192772, | |
| "grad_norm": 0.10825814306735992, | |
| "learning_rate": 0.004789356364683356, | |
| "loss": 7.4437, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.1334569045412419, | |
| "grad_norm": 0.12936848402023315, | |
| "learning_rate": 0.004786422235429268, | |
| "loss": 7.3688, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.13438368860055608, | |
| "grad_norm": 0.07664606720209122, | |
| "learning_rate": 0.0047834687235327415, | |
| "loss": 7.2625, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.13531047265987026, | |
| "grad_norm": 0.1079607829451561, | |
| "learning_rate": 0.0047804958540315235, | |
| "loss": 7.2125, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.13623725671918444, | |
| "grad_norm": 0.04593510553240776, | |
| "learning_rate": 0.004777503652127464, | |
| "loss": 7.0687, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.1371640407784986, | |
| "grad_norm": 0.06448942422866821, | |
| "learning_rate": 0.004774492143186296, | |
| "loss": 7.075, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.1380908248378128, | |
| "grad_norm": 0.04284033551812172, | |
| "learning_rate": 0.004771461352737427, | |
| "loss": 6.9688, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.13901760889712697, | |
| "grad_norm": 0.048541247844696045, | |
| "learning_rate": 0.004768411306473717, | |
| "loss": 6.9125, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.13994439295644115, | |
| "grad_norm": 0.0369611531496048, | |
| "learning_rate": 0.004765342030251263, | |
| "loss": 6.8875, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.14087117701575533, | |
| "grad_norm": 0.07809454202651978, | |
| "learning_rate": 0.004762253550089181, | |
| "loss": 6.8375, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.1417979610750695, | |
| "grad_norm": 0.030714238062500954, | |
| "learning_rate": 0.004759145892169382, | |
| "loss": 6.8063, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.14272474513438368, | |
| "grad_norm": 0.030746718868613243, | |
| "learning_rate": 0.004756019082836354, | |
| "loss": 6.7875, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.14365152919369786, | |
| "grad_norm": 0.026088058948516846, | |
| "learning_rate": 0.004752873148596938, | |
| "loss": 6.7438, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.14457831325301204, | |
| "grad_norm": 0.017927952110767365, | |
| "learning_rate": 0.004749708116120099, | |
| "loss": 6.7688, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.14550509731232622, | |
| "grad_norm": 0.023661252111196518, | |
| "learning_rate": 0.004746524012236706, | |
| "loss": 6.725, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.1464318813716404, | |
| "grad_norm": 0.018965313211083412, | |
| "learning_rate": 0.004743320863939299, | |
| "loss": 6.725, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.14735866543095458, | |
| "grad_norm": 0.022316887974739075, | |
| "learning_rate": 0.004740098698381866, | |
| "loss": 6.675, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.14828544949026876, | |
| "grad_norm": 0.019958553835749626, | |
| "learning_rate": 0.004736857542879608, | |
| "loss": 6.6875, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.14921223354958293, | |
| "grad_norm": 0.016147589311003685, | |
| "learning_rate": 0.004733597424908707, | |
| "loss": 6.6875, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.15013901760889714, | |
| "grad_norm": 0.020692575722932816, | |
| "learning_rate": 0.004730318372106099, | |
| "loss": 6.6438, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.15106580166821132, | |
| "grad_norm": 0.014802551828324795, | |
| "learning_rate": 0.004727020412269234, | |
| "loss": 6.6312, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.1519925857275255, | |
| "grad_norm": 0.01826154999434948, | |
| "learning_rate": 0.004723703573355842, | |
| "loss": 6.6375, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.15291936978683968, | |
| "grad_norm": 0.014861056581139565, | |
| "learning_rate": 0.004720367883483697, | |
| "loss": 6.6562, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 0.0160931795835495, | |
| "learning_rate": 0.004717013370930377, | |
| "loss": 6.6, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.15477293790546803, | |
| "grad_norm": 0.02078167535364628, | |
| "learning_rate": 0.004713640064133024, | |
| "loss": 6.6063, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.1556997219647822, | |
| "grad_norm": 0.01577616296708584, | |
| "learning_rate": 0.004710247991688109, | |
| "loss": 6.5563, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.1566265060240964, | |
| "grad_norm": 0.019711369648575783, | |
| "learning_rate": 0.0047068371823511795, | |
| "loss": 6.575, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.15755329008341057, | |
| "grad_norm": 0.01820039190351963, | |
| "learning_rate": 0.004703407665036622, | |
| "loss": 6.5813, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.15848007414272475, | |
| "grad_norm": 0.015363371931016445, | |
| "learning_rate": 0.004699959468817417, | |
| "loss": 6.5375, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.15940685820203893, | |
| "grad_norm": 0.015872852876782417, | |
| "learning_rate": 0.004696492622924892, | |
| "loss": 6.5687, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.1603336422613531, | |
| "grad_norm": 0.016906000673770905, | |
| "learning_rate": 0.004693007156748471, | |
| "loss": 6.5125, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.16126042632066728, | |
| "grad_norm": 0.016961950808763504, | |
| "learning_rate": 0.0046895030998354275, | |
| "loss": 6.525, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.16218721037998146, | |
| "grad_norm": 0.016262684017419815, | |
| "learning_rate": 0.004685980481890634, | |
| "loss": 6.5062, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.16311399443929564, | |
| "grad_norm": 0.014922458678483963, | |
| "learning_rate": 0.004682439332776313, | |
| "loss": 6.4688, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.16404077849860982, | |
| "grad_norm": 0.022018995136022568, | |
| "learning_rate": 0.004678879682511777, | |
| "loss": 6.5188, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.164967562557924, | |
| "grad_norm": 0.014819780364632607, | |
| "learning_rate": 0.004675301561273179, | |
| "loss": 6.4437, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.16589434661723818, | |
| "grad_norm": 0.0183818731456995, | |
| "learning_rate": 0.004671704999393256, | |
| "loss": 6.4563, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.16682113067655235, | |
| "grad_norm": 0.020285405218601227, | |
| "learning_rate": 0.004668090027361074, | |
| "loss": 6.4563, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.16774791473586653, | |
| "grad_norm": 0.0204929132014513, | |
| "learning_rate": 0.004664456675821761, | |
| "loss": 6.4813, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.1686746987951807, | |
| "grad_norm": 0.022332845255732536, | |
| "learning_rate": 0.0046608049755762606, | |
| "loss": 6.4563, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.1696014828544949, | |
| "grad_norm": 0.014836137183010578, | |
| "learning_rate": 0.004657134957581057, | |
| "loss": 6.4625, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.1705282669138091, | |
| "grad_norm": 0.024512965232133865, | |
| "learning_rate": 0.0046534466529479235, | |
| "loss": 6.4563, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.17145505097312327, | |
| "grad_norm": 0.025079630315303802, | |
| "learning_rate": 0.004649740092943651, | |
| "loss": 6.4188, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.17238183503243745, | |
| "grad_norm": 0.032594986259937286, | |
| "learning_rate": 0.00464601530898979, | |
| "loss": 6.4125, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.17330861909175163, | |
| "grad_norm": 0.028524870052933693, | |
| "learning_rate": 0.004642272332662377, | |
| "loss": 6.4125, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.1742354031510658, | |
| "grad_norm": 0.02017652988433838, | |
| "learning_rate": 0.0046385111956916735, | |
| "loss": 6.3938, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.17516218721038, | |
| "grad_norm": 0.023051844909787178, | |
| "learning_rate": 0.004634731929961891, | |
| "loss": 6.4062, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.17608897126969417, | |
| "grad_norm": 0.025438351556658745, | |
| "learning_rate": 0.004630934567510925, | |
| "loss": 6.3812, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.17701575532900835, | |
| "grad_norm": 0.037845317274332047, | |
| "learning_rate": 0.004627119140530083, | |
| "loss": 6.4062, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.17794253938832252, | |
| "grad_norm": 0.05386321246623993, | |
| "learning_rate": 0.004623285681363807, | |
| "loss": 6.4062, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.1788693234476367, | |
| "grad_norm": 0.0913223922252655, | |
| "learning_rate": 0.004619434222509408, | |
| "loss": 6.3875, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.17979610750695088, | |
| "grad_norm": 0.1158546730875969, | |
| "learning_rate": 0.00461556479661678, | |
| "loss": 6.4563, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.18072289156626506, | |
| "grad_norm": 0.08018877357244492, | |
| "learning_rate": 0.0046116774364881345, | |
| "loss": 6.375, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.18164967562557924, | |
| "grad_norm": 0.03276560455560684, | |
| "learning_rate": 0.0046077721750777114, | |
| "loss": 6.3812, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.18257645968489342, | |
| "grad_norm": 0.07004847377538681, | |
| "learning_rate": 0.0046038490454915065, | |
| "loss": 6.3875, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.1835032437442076, | |
| "grad_norm": 0.03939942270517349, | |
| "learning_rate": 0.004599908080986991, | |
| "loss": 6.325, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.18443002780352177, | |
| "grad_norm": 0.0445321649312973, | |
| "learning_rate": 0.004595949314972824, | |
| "loss": 6.3125, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.18535681186283595, | |
| "grad_norm": 0.04666861146688461, | |
| "learning_rate": 0.004591972781008576, | |
| "loss": 6.3375, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.18628359592215013, | |
| "grad_norm": 0.032554373145103455, | |
| "learning_rate": 0.0045879785128044425, | |
| "loss": 6.3187, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.1872103799814643, | |
| "grad_norm": 0.03748049587011337, | |
| "learning_rate": 0.004583966544220952, | |
| "loss": 6.3313, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.1881371640407785, | |
| "grad_norm": 0.02630574069917202, | |
| "learning_rate": 0.00457993690926869, | |
| "loss": 6.3563, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.18906394810009267, | |
| "grad_norm": 0.04539572447538376, | |
| "learning_rate": 0.004575889642107998, | |
| "loss": 6.3063, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.18999073215940684, | |
| "grad_norm": 0.02216522768139839, | |
| "learning_rate": 0.0045718247770487, | |
| "loss": 6.2812, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.19091751621872105, | |
| "grad_norm": 0.05376052483916283, | |
| "learning_rate": 0.004567742348549793, | |
| "loss": 6.35, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.19184430027803523, | |
| "grad_norm": 0.02676314301788807, | |
| "learning_rate": 0.004563642391219168, | |
| "loss": 6.3, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.1927710843373494, | |
| "grad_norm": 0.039810191839933395, | |
| "learning_rate": 0.004559524939813316, | |
| "loss": 6.2875, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.1936978683966636, | |
| "grad_norm": 0.03783705458045006, | |
| "learning_rate": 0.0045553900292370254, | |
| "loss": 6.2625, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.19462465245597776, | |
| "grad_norm": 0.02999858744442463, | |
| "learning_rate": 0.004551237694543092, | |
| "loss": 6.2438, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.19555143651529194, | |
| "grad_norm": 0.0282985121011734, | |
| "learning_rate": 0.004547067970932022, | |
| "loss": 6.2438, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.19647822057460612, | |
| "grad_norm": 0.03198060020804405, | |
| "learning_rate": 0.004542880893751732, | |
| "loss": 6.2625, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.1974050046339203, | |
| "grad_norm": 0.03950299322605133, | |
| "learning_rate": 0.00453867649849725, | |
| "loss": 6.2188, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.19833178869323448, | |
| "grad_norm": 0.026990199461579323, | |
| "learning_rate": 0.004534454820810412, | |
| "loss": 6.2063, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.19925857275254866, | |
| "grad_norm": 0.0420188382267952, | |
| "learning_rate": 0.004530215896479564, | |
| "loss": 6.2625, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.20018535681186284, | |
| "grad_norm": 0.04251977428793907, | |
| "learning_rate": 0.004525959761439257, | |
| "loss": 6.2063, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.20111214087117701, | |
| "grad_norm": 0.06442005932331085, | |
| "learning_rate": 0.0045216864517699405, | |
| "loss": 6.2125, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.2020389249304912, | |
| "grad_norm": 0.05594475567340851, | |
| "learning_rate": 0.004517396003697659, | |
| "loss": 6.1562, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.20296570898980537, | |
| "grad_norm": 0.038938529789447784, | |
| "learning_rate": 0.004513088453593744, | |
| "loss": 6.1937, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.20389249304911955, | |
| "grad_norm": 0.057002611458301544, | |
| "learning_rate": 0.0045087638379745065, | |
| "loss": 6.175, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.20481927710843373, | |
| "grad_norm": 0.047009214758872986, | |
| "learning_rate": 0.004504422193500925, | |
| "loss": 6.1688, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.2057460611677479, | |
| "grad_norm": 0.05817709118127823, | |
| "learning_rate": 0.004500063556978336, | |
| "loss": 6.1375, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.20667284522706209, | |
| "grad_norm": 0.05288264900445938, | |
| "learning_rate": 0.004495687965356126, | |
| "loss": 6.1688, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.20759962928637626, | |
| "grad_norm": 0.03736674785614014, | |
| "learning_rate": 0.00449129545572741, | |
| "loss": 6.175, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.20852641334569044, | |
| "grad_norm": 0.034431926906108856, | |
| "learning_rate": 0.004486886065328725, | |
| "loss": 6.1125, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.20945319740500462, | |
| "grad_norm": 0.03445250913500786, | |
| "learning_rate": 0.004482459831539709, | |
| "loss": 6.1625, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.21037998146431883, | |
| "grad_norm": 0.035410068929195404, | |
| "learning_rate": 0.004478016791882787, | |
| "loss": 6.0875, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.211306765523633, | |
| "grad_norm": 0.026350026950240135, | |
| "learning_rate": 0.004473556984022854, | |
| "loss": 6.125, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.21223354958294718, | |
| "grad_norm": 0.028956936672329903, | |
| "learning_rate": 0.0044690804457669505, | |
| "loss": 6.1063, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.21316033364226136, | |
| "grad_norm": 0.03521239385008812, | |
| "learning_rate": 0.004464587215063946, | |
| "loss": 6.0875, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.21408711770157554, | |
| "grad_norm": 0.04613986983895302, | |
| "learning_rate": 0.004460077330004218, | |
| "loss": 6.1312, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.21501390176088972, | |
| "grad_norm": 0.05228109285235405, | |
| "learning_rate": 0.0044555508288193265, | |
| "loss": 6.1063, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2159406858202039, | |
| "grad_norm": 0.045205965638160706, | |
| "learning_rate": 0.004451007749881691, | |
| "loss": 6.1, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.21686746987951808, | |
| "grad_norm": 0.028526296839118004, | |
| "learning_rate": 0.004446448131704267, | |
| "loss": 6.0813, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.21779425393883226, | |
| "grad_norm": 0.027809731662273407, | |
| "learning_rate": 0.004441872012940214, | |
| "loss": 6.075, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.21872103799814643, | |
| "grad_norm": 0.04913929104804993, | |
| "learning_rate": 0.004437279432382576, | |
| "loss": 6.075, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.2196478220574606, | |
| "grad_norm": 0.046848297119140625, | |
| "learning_rate": 0.004432670428963946, | |
| "loss": 6.0938, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.2205746061167748, | |
| "grad_norm": 0.0395938940346241, | |
| "learning_rate": 0.004428045041756137, | |
| "loss": 6.075, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.22150139017608897, | |
| "grad_norm": 0.0638502761721611, | |
| "learning_rate": 0.004423403309969855, | |
| "loss": 6.025, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.22242817423540315, | |
| "grad_norm": 0.06795669347047806, | |
| "learning_rate": 0.004418745272954361, | |
| "loss": 6.0438, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.22335495829471733, | |
| "grad_norm": 0.052847135812044144, | |
| "learning_rate": 0.004414070970197141, | |
| "loss": 6.0625, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.2242817423540315, | |
| "grad_norm": 0.04967901483178139, | |
| "learning_rate": 0.0044093804413235715, | |
| "loss": 6.0375, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.22520852641334568, | |
| "grad_norm": 0.0682300478219986, | |
| "learning_rate": 0.004404673726096578, | |
| "loss": 6.0625, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.22613531047265986, | |
| "grad_norm": 0.0553511306643486, | |
| "learning_rate": 0.00439995086441631, | |
| "loss": 5.9813, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.22706209453197404, | |
| "grad_norm": 0.028195617720484734, | |
| "learning_rate": 0.004395211896319786, | |
| "loss": 6.025, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.22798887859128822, | |
| "grad_norm": 0.04402211681008339, | |
| "learning_rate": 0.00439045686198057, | |
| "loss": 6.0125, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.2289156626506024, | |
| "grad_norm": 0.03047800622880459, | |
| "learning_rate": 0.00438568580170842, | |
| "loss": 5.9938, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.22984244670991658, | |
| "grad_norm": 0.03843539580702782, | |
| "learning_rate": 0.004380898755948953, | |
| "loss": 5.9813, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.23076923076923078, | |
| "grad_norm": 0.0366608090698719, | |
| "learning_rate": 0.004376095765283298, | |
| "loss": 6.0, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.23169601482854496, | |
| "grad_norm": 0.06157747656106949, | |
| "learning_rate": 0.004371276870427753, | |
| "loss": 6.025, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.23262279888785914, | |
| "grad_norm": 0.055426549166440964, | |
| "learning_rate": 0.004366442112233441, | |
| "loss": 5.975, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.23354958294717332, | |
| "grad_norm": 0.03506896272301674, | |
| "learning_rate": 0.004361591531685964, | |
| "loss": 5.9813, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.2344763670064875, | |
| "grad_norm": 0.03997468575835228, | |
| "learning_rate": 0.004356725169905052, | |
| "loss": 5.95, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.23540315106580167, | |
| "grad_norm": 0.06662409007549286, | |
| "learning_rate": 0.0043518430681442205, | |
| "loss": 5.9625, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.23632993512511585, | |
| "grad_norm": 0.0542214997112751, | |
| "learning_rate": 0.004346945267790413, | |
| "loss": 5.9625, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.23725671918443003, | |
| "grad_norm": 0.05418306961655617, | |
| "learning_rate": 0.004342031810363658, | |
| "loss": 5.9625, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.2381835032437442, | |
| "grad_norm": 0.08298410475254059, | |
| "learning_rate": 0.004337102737516711, | |
| "loss": 5.9563, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.2391102873030584, | |
| "grad_norm": 0.051485590636730194, | |
| "learning_rate": 0.004332158091034705, | |
| "loss": 5.9938, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.24003707136237257, | |
| "grad_norm": 0.041104063391685486, | |
| "learning_rate": 0.004327197912834795, | |
| "loss": 5.9125, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.24096385542168675, | |
| "grad_norm": 0.06750784069299698, | |
| "learning_rate": 0.0043222222449658025, | |
| "loss": 5.9563, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.24189063948100092, | |
| "grad_norm": 0.05327602103352547, | |
| "learning_rate": 0.0043172311296078595, | |
| "loss": 5.8812, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.2428174235403151, | |
| "grad_norm": 0.05027195066213608, | |
| "learning_rate": 0.00431222460907205, | |
| "loss": 5.9125, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.24374420759962928, | |
| "grad_norm": 0.06142845377326012, | |
| "learning_rate": 0.004307202725800052, | |
| "loss": 5.9, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.24467099165894346, | |
| "grad_norm": 0.06710369884967804, | |
| "learning_rate": 0.004302165522363779, | |
| "loss": 5.9437, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.24559777571825764, | |
| "grad_norm": 0.06705372035503387, | |
| "learning_rate": 0.004297113041465017, | |
| "loss": 5.9062, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.24652455977757182, | |
| "grad_norm": 0.06116189435124397, | |
| "learning_rate": 0.004292045325935063, | |
| "loss": 5.9, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.247451343836886, | |
| "grad_norm": 0.054194726049900055, | |
| "learning_rate": 0.004286962418734364, | |
| "loss": 5.875, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.24837812789620017, | |
| "grad_norm": 0.0627150684595108, | |
| "learning_rate": 0.004281864362952147, | |
| "loss": 5.8875, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.24930491195551435, | |
| "grad_norm": 0.0440673902630806, | |
| "learning_rate": 0.004276751201806063, | |
| "loss": 5.8938, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.25023169601482853, | |
| "grad_norm": 0.034663740545511246, | |
| "learning_rate": 0.004271622978641812, | |
| "loss": 5.8625, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2511584800741427, | |
| "grad_norm": 0.04779878258705139, | |
| "learning_rate": 0.004266479736932779, | |
| "loss": 5.8563, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.2520852641334569, | |
| "grad_norm": 0.060510262846946716, | |
| "learning_rate": 0.004261321520279666, | |
| "loss": 5.8563, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.25301204819277107, | |
| "grad_norm": 0.05226600542664528, | |
| "learning_rate": 0.004256148372410125, | |
| "loss": 5.8375, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.25393883225208524, | |
| "grad_norm": 0.05810929834842682, | |
| "learning_rate": 0.004250960337178377, | |
| "loss": 5.8625, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.2548656163113994, | |
| "grad_norm": 0.07357963919639587, | |
| "learning_rate": 0.004245757458564855, | |
| "loss": 5.8688, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.2557924003707136, | |
| "grad_norm": 0.07380347698926926, | |
| "learning_rate": 0.004240539780675817, | |
| "loss": 5.8563, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.2567191844300278, | |
| "grad_norm": 0.05101478099822998, | |
| "learning_rate": 0.0042353073477429835, | |
| "loss": 5.825, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.25764596848934196, | |
| "grad_norm": 0.03864740952849388, | |
| "learning_rate": 0.004230060204123156, | |
| "loss": 5.8688, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.25857275254865614, | |
| "grad_norm": 0.06766132265329361, | |
| "learning_rate": 0.004224798394297841, | |
| "loss": 5.85, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.2594995366079703, | |
| "grad_norm": 0.06980055570602417, | |
| "learning_rate": 0.004219521962872876, | |
| "loss": 5.875, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.26042632066728455, | |
| "grad_norm": 0.04153401404619217, | |
| "learning_rate": 0.004214230954578051, | |
| "loss": 5.8313, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.26135310472659873, | |
| "grad_norm": 0.045340005308389664, | |
| "learning_rate": 0.004208925414266726, | |
| "loss": 5.8125, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.2622798887859129, | |
| "grad_norm": 0.04986559599637985, | |
| "learning_rate": 0.004203605386915454, | |
| "loss": 5.825, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.2632066728452271, | |
| "grad_norm": 0.04970383271574974, | |
| "learning_rate": 0.004198270917623599, | |
| "loss": 5.7688, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.26413345690454126, | |
| "grad_norm": 0.05129897966980934, | |
| "learning_rate": 0.004192922051612953, | |
| "loss": 5.8, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.26506024096385544, | |
| "grad_norm": 0.03994636610150337, | |
| "learning_rate": 0.004187558834227354, | |
| "loss": 5.8, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.2659870250231696, | |
| "grad_norm": 0.05204310640692711, | |
| "learning_rate": 0.004182181310932297, | |
| "loss": 5.7938, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.2669138090824838, | |
| "grad_norm": 0.03257805109024048, | |
| "learning_rate": 0.004176789527314558, | |
| "loss": 5.7562, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.267840593141798, | |
| "grad_norm": 0.035661760717630386, | |
| "learning_rate": 0.004171383529081797, | |
| "loss": 5.7812, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.26876737720111216, | |
| "grad_norm": 0.04478088766336441, | |
| "learning_rate": 0.004165963362062177, | |
| "loss": 5.7562, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.26969416126042633, | |
| "grad_norm": 0.03838647902011871, | |
| "learning_rate": 0.004160529072203974, | |
| "loss": 5.7688, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.2706209453197405, | |
| "grad_norm": 0.040849462151527405, | |
| "learning_rate": 0.004155080705575188, | |
| "loss": 5.7438, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.2715477293790547, | |
| "grad_norm": 0.051210496574640274, | |
| "learning_rate": 0.004149618308363149, | |
| "loss": 5.7375, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.27247451343836887, | |
| "grad_norm": 0.07401825487613678, | |
| "learning_rate": 0.00414414192687413, | |
| "loss": 5.7812, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.27340129749768305, | |
| "grad_norm": 0.10748963057994843, | |
| "learning_rate": 0.004138651607532954, | |
| "loss": 5.75, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.2743280815569972, | |
| "grad_norm": 0.07754500955343246, | |
| "learning_rate": 0.004133147396882597, | |
| "loss": 5.7562, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.2752548656163114, | |
| "grad_norm": 0.04524754732847214, | |
| "learning_rate": 0.004127629341583795, | |
| "loss": 5.7375, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.2761816496756256, | |
| "grad_norm": 0.06774584203958511, | |
| "learning_rate": 0.004122097488414652, | |
| "loss": 5.7375, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.27710843373493976, | |
| "grad_norm": 0.050472185015678406, | |
| "learning_rate": 0.004116551884270237, | |
| "loss": 5.6937, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.27803521779425394, | |
| "grad_norm": 0.040967270731925964, | |
| "learning_rate": 0.0041109925761621926, | |
| "loss": 5.7313, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2789620018535681, | |
| "grad_norm": 0.03739303722977638, | |
| "learning_rate": 0.004105419611218332, | |
| "loss": 5.7188, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.2798887859128823, | |
| "grad_norm": 0.04636852815747261, | |
| "learning_rate": 0.004099833036682241, | |
| "loss": 5.725, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.2808155699721965, | |
| "grad_norm": 0.08012169599533081, | |
| "learning_rate": 0.00409423289991288, | |
| "loss": 5.7313, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.28174235403151066, | |
| "grad_norm": 0.05987093225121498, | |
| "learning_rate": 0.004088619248384178, | |
| "loss": 5.7125, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.28266913809082483, | |
| "grad_norm": 0.07735589891672134, | |
| "learning_rate": 0.0040829921296846325, | |
| "loss": 5.7, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.283595922150139, | |
| "grad_norm": 0.09283655136823654, | |
| "learning_rate": 0.004077351591516908, | |
| "loss": 5.675, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.2845227062094532, | |
| "grad_norm": 0.09337766468524933, | |
| "learning_rate": 0.004071697681697427, | |
| "loss": 5.7375, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.28544949026876737, | |
| "grad_norm": 0.06437985599040985, | |
| "learning_rate": 0.00406603044815597, | |
| "loss": 5.6875, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.28637627432808155, | |
| "grad_norm": 0.04110102728009224, | |
| "learning_rate": 0.004060349938935264, | |
| "loss": 5.6937, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.2873030583873957, | |
| "grad_norm": 0.06071547046303749, | |
| "learning_rate": 0.004054656202190578, | |
| "loss": 5.7375, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.2882298424467099, | |
| "grad_norm": 0.05311071500182152, | |
| "learning_rate": 0.004048949286189315, | |
| "loss": 5.65, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.2891566265060241, | |
| "grad_norm": 0.031259018927812576, | |
| "learning_rate": 0.004043229239310603, | |
| "loss": 5.6688, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.29008341056533826, | |
| "grad_norm": 0.03335728868842125, | |
| "learning_rate": 0.0040374961100448845, | |
| "loss": 5.675, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.29101019462465244, | |
| "grad_norm": 0.035077281296253204, | |
| "learning_rate": 0.004031749946993501, | |
| "loss": 5.675, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.2919369786839666, | |
| "grad_norm": 0.030766339972615242, | |
| "learning_rate": 0.004025990798868291, | |
| "loss": 5.6688, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.2928637627432808, | |
| "grad_norm": 0.03741341829299927, | |
| "learning_rate": 0.004020218714491166, | |
| "loss": 5.6625, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.293790546802595, | |
| "grad_norm": 0.044073686003685, | |
| "learning_rate": 0.0040144337427937046, | |
| "loss": 5.6375, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.29471733086190915, | |
| "grad_norm": 0.05024448409676552, | |
| "learning_rate": 0.004008635932816734, | |
| "loss": 5.6813, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.29564411492122333, | |
| "grad_norm": 0.045678358525037766, | |
| "learning_rate": 0.004002825333709915, | |
| "loss": 5.5938, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.2965708989805375, | |
| "grad_norm": 0.05762135609984398, | |
| "learning_rate": 0.003997001994731328, | |
| "loss": 5.6438, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2974976830398517, | |
| "grad_norm": 0.07177098840475082, | |
| "learning_rate": 0.003991165965247046, | |
| "loss": 5.6375, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.29842446709916587, | |
| "grad_norm": 0.07682537287473679, | |
| "learning_rate": 0.003985317294730731, | |
| "loss": 5.675, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.29935125115848005, | |
| "grad_norm": 0.08128990978002548, | |
| "learning_rate": 0.003979456032763201, | |
| "loss": 5.675, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.3002780352177943, | |
| "grad_norm": 0.08135168999433517, | |
| "learning_rate": 0.003973582229032019, | |
| "loss": 5.7125, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.30120481927710846, | |
| "grad_norm": 0.10176597535610199, | |
| "learning_rate": 0.003967695933331064, | |
| "loss": 5.6875, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.30213160333642264, | |
| "grad_norm": 0.10529598593711853, | |
| "learning_rate": 0.003961797195560118, | |
| "loss": 5.675, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.3030583873957368, | |
| "grad_norm": 0.06495360285043716, | |
| "learning_rate": 0.003955886065724433, | |
| "loss": 5.6312, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.303985171455051, | |
| "grad_norm": 0.06810038536787033, | |
| "learning_rate": 0.003949962593934316, | |
| "loss": 5.6312, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.3049119555143652, | |
| "grad_norm": 0.058491405099630356, | |
| "learning_rate": 0.003944026830404698, | |
| "loss": 5.5813, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.30583873957367935, | |
| "grad_norm": 0.05078050121665001, | |
| "learning_rate": 0.003938078825454709, | |
| "loss": 5.575, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.30676552363299353, | |
| "grad_norm": 0.06602590531110764, | |
| "learning_rate": 0.003932118629507257, | |
| "loss": 5.5875, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.0416589193046093, | |
| "learning_rate": 0.0039261462930885935, | |
| "loss": 5.6, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.3086190917516219, | |
| "grad_norm": 0.04823141545057297, | |
| "learning_rate": 0.003920161866827889, | |
| "loss": 5.5813, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.30954587581093607, | |
| "grad_norm": 0.03508712351322174, | |
| "learning_rate": 0.003914165401456804, | |
| "loss": 5.5875, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.31047265987025024, | |
| "grad_norm": 0.03729189559817314, | |
| "learning_rate": 0.003908156947809056, | |
| "loss": 5.575, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.3113994439295644, | |
| "grad_norm": 0.047349270433187485, | |
| "learning_rate": 0.0039021365568199917, | |
| "loss": 5.5625, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.3123262279888786, | |
| "grad_norm": 0.04627249017357826, | |
| "learning_rate": 0.0038961042795261536, | |
| "loss": 5.5375, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.3132530120481928, | |
| "grad_norm": 0.03604106232523918, | |
| "learning_rate": 0.0038900601670648484, | |
| "loss": 5.575, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.31417979610750696, | |
| "grad_norm": 0.040808554738759995, | |
| "learning_rate": 0.0038840042706737112, | |
| "loss": 5.5563, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.31510658016682114, | |
| "grad_norm": 0.027617141604423523, | |
| "learning_rate": 0.003877936641690275, | |
| "loss": 5.5813, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3160333642261353, | |
| "grad_norm": 0.03513359650969505, | |
| "learning_rate": 0.0038718573315515317, | |
| "loss": 5.5438, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.3169601482854495, | |
| "grad_norm": 0.03978215530514717, | |
| "learning_rate": 0.0038657663917934983, | |
| "loss": 5.575, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.3178869323447637, | |
| "grad_norm": 0.025322776287794113, | |
| "learning_rate": 0.0038596638740507785, | |
| "loss": 5.525, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.31881371640407785, | |
| "grad_norm": 0.04898100346326828, | |
| "learning_rate": 0.0038535498300561266, | |
| "loss": 5.525, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.31974050046339203, | |
| "grad_norm": 0.0469982884824276, | |
| "learning_rate": 0.003847424311640009, | |
| "loss": 5.5438, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.3206672845227062, | |
| "grad_norm": 0.03919081762433052, | |
| "learning_rate": 0.0038412873707301615, | |
| "loss": 5.5312, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.3215940685820204, | |
| "grad_norm": 0.04740371182560921, | |
| "learning_rate": 0.0038351390593511546, | |
| "loss": 5.5, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.32252085264133457, | |
| "grad_norm": 0.05560089647769928, | |
| "learning_rate": 0.003828979429623947, | |
| "loss": 5.5125, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.32344763670064874, | |
| "grad_norm": 0.060783710330724716, | |
| "learning_rate": 0.0038228085337654472, | |
| "loss": 5.5312, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.3243744207599629, | |
| "grad_norm": 0.0725303441286087, | |
| "learning_rate": 0.00381662642408807, | |
| "loss": 5.5, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3253012048192771, | |
| "grad_norm": 0.07496823370456696, | |
| "learning_rate": 0.003810433152999293, | |
| "loss": 5.5, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.3262279888785913, | |
| "grad_norm": 0.06248985975980759, | |
| "learning_rate": 0.0038042287730012114, | |
| "loss": 5.525, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.32715477293790546, | |
| "grad_norm": 0.06995397806167603, | |
| "learning_rate": 0.003798013336690095, | |
| "loss": 5.5188, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.32808155699721964, | |
| "grad_norm": 0.04727565497159958, | |
| "learning_rate": 0.0037917868967559387, | |
| "loss": 5.525, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.3290083410565338, | |
| "grad_norm": 0.05960770696401596, | |
| "learning_rate": 0.0037855495059820215, | |
| "loss": 5.5, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.329935125115848, | |
| "grad_norm": 0.049259670078754425, | |
| "learning_rate": 0.0037793012172444534, | |
| "loss": 5.4813, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.33086190917516217, | |
| "grad_norm": 0.06020974740386009, | |
| "learning_rate": 0.003773042083511731, | |
| "loss": 5.4625, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.33178869323447635, | |
| "grad_norm": 0.0410022996366024, | |
| "learning_rate": 0.003766772157844284, | |
| "loss": 5.4813, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.33271547729379053, | |
| "grad_norm": 0.04682173952460289, | |
| "learning_rate": 0.003760491493394032, | |
| "loss": 5.5, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.3336422613531047, | |
| "grad_norm": 0.055474553257226944, | |
| "learning_rate": 0.003754200143403929, | |
| "loss": 5.4938, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3345690454124189, | |
| "grad_norm": 0.04533625394105911, | |
| "learning_rate": 0.0037478981612075126, | |
| "loss": 5.4625, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.33549582947173306, | |
| "grad_norm": 0.0564807690680027, | |
| "learning_rate": 0.0037415856002284524, | |
| "loss": 5.4188, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.33642261353104724, | |
| "grad_norm": 0.056940093636512756, | |
| "learning_rate": 0.003735262513980099, | |
| "loss": 5.4313, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.3373493975903614, | |
| "grad_norm": 0.03561275824904442, | |
| "learning_rate": 0.003728928956065027, | |
| "loss": 5.4313, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.3382761816496756, | |
| "grad_norm": 0.04059695452451706, | |
| "learning_rate": 0.003722584980174583, | |
| "loss": 5.425, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.3392029657089898, | |
| "grad_norm": 0.05738742649555206, | |
| "learning_rate": 0.0037162306400884307, | |
| "loss": 5.45, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.340129749768304, | |
| "grad_norm": 0.057356227189302444, | |
| "learning_rate": 0.0037098659896740906, | |
| "loss": 5.45, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.3410565338276182, | |
| "grad_norm": 0.049577098339796066, | |
| "learning_rate": 0.0037034910828864904, | |
| "loss": 5.4625, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.34198331788693237, | |
| "grad_norm": 0.03639480471611023, | |
| "learning_rate": 0.003697105973767503, | |
| "loss": 5.3875, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.34291010194624655, | |
| "grad_norm": 0.0382065586745739, | |
| "learning_rate": 0.003690710716445488, | |
| "loss": 5.4437, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3438368860055607, | |
| "grad_norm": 0.06564627587795258, | |
| "learning_rate": 0.0036843053651348357, | |
| "loss": 5.4062, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.3447636700648749, | |
| "grad_norm": 0.08808669447898865, | |
| "learning_rate": 0.003677889974135504, | |
| "loss": 5.4062, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.3456904541241891, | |
| "grad_norm": 0.05307735130190849, | |
| "learning_rate": 0.0036714645978325636, | |
| "loss": 5.4, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.34661723818350326, | |
| "grad_norm": 0.05861683562397957, | |
| "learning_rate": 0.0036650292906957294, | |
| "loss": 5.4563, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.34754402224281744, | |
| "grad_norm": 0.06583855301141739, | |
| "learning_rate": 0.003658584107278905, | |
| "loss": 5.3938, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.3484708063021316, | |
| "grad_norm": 0.038819484412670135, | |
| "learning_rate": 0.0036521291022197184, | |
| "loss": 5.3625, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.3493975903614458, | |
| "grad_norm": 0.0668378546833992, | |
| "learning_rate": 0.0036456643302390564, | |
| "loss": 5.3688, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.35032437442076, | |
| "grad_norm": 0.06500761210918427, | |
| "learning_rate": 0.0036391898461406043, | |
| "loss": 5.3688, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.35125115848007415, | |
| "grad_norm": 0.06566040962934494, | |
| "learning_rate": 0.003632705704810379, | |
| "loss": 5.3875, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.35217794253938833, | |
| "grad_norm": 0.04046965390443802, | |
| "learning_rate": 0.0036262119612162657, | |
| "loss": 5.3563, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3531047265987025, | |
| "grad_norm": 0.04664246365427971, | |
| "learning_rate": 0.0036197086704075495, | |
| "loss": 5.35, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.3540315106580167, | |
| "grad_norm": 0.06433206051588058, | |
| "learning_rate": 0.0036131958875144496, | |
| "loss": 5.3938, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.35495829471733087, | |
| "grad_norm": 0.06552179157733917, | |
| "learning_rate": 0.003606673667747653, | |
| "loss": 5.375, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.35588507877664505, | |
| "grad_norm": 0.0640706792473793, | |
| "learning_rate": 0.0036001420663978466, | |
| "loss": 5.3938, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.3568118628359592, | |
| "grad_norm": 0.0631820559501648, | |
| "learning_rate": 0.003593601138835246, | |
| "loss": 5.3375, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.3577386468952734, | |
| "grad_norm": 0.0694313570857048, | |
| "learning_rate": 0.0035870509405091272, | |
| "loss": 5.3812, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.3586654309545876, | |
| "grad_norm": 0.05696525424718857, | |
| "learning_rate": 0.0035804915269473598, | |
| "loss": 5.3563, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.35959221501390176, | |
| "grad_norm": 0.041316401213407516, | |
| "learning_rate": 0.0035739229537559316, | |
| "loss": 5.3313, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.36051899907321594, | |
| "grad_norm": 0.05180737376213074, | |
| "learning_rate": 0.003567345276618479, | |
| "loss": 5.3625, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.3614457831325301, | |
| "grad_norm": 0.06132522597908974, | |
| "learning_rate": 0.003560758551295816, | |
| "loss": 5.3375, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3623725671918443, | |
| "grad_norm": 0.0825105607509613, | |
| "learning_rate": 0.00355416283362546, | |
| "loss": 5.3625, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.3632993512511585, | |
| "grad_norm": 0.09952400624752045, | |
| "learning_rate": 0.0035475581795211594, | |
| "loss": 5.375, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.36422613531047265, | |
| "grad_norm": 0.11159048974514008, | |
| "learning_rate": 0.0035409446449724187, | |
| "loss": 5.3875, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.36515291936978683, | |
| "grad_norm": 0.06153342127799988, | |
| "learning_rate": 0.0035343222860440247, | |
| "loss": 5.35, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.366079703429101, | |
| "grad_norm": 0.055650901049375534, | |
| "learning_rate": 0.0035276911588755723, | |
| "loss": 5.2938, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.3670064874884152, | |
| "grad_norm": 0.05008624121546745, | |
| "learning_rate": 0.003521051319680984, | |
| "loss": 5.3375, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.36793327154772937, | |
| "grad_norm": 0.04708503931760788, | |
| "learning_rate": 0.0035144028247480405, | |
| "loss": 5.3438, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.36886005560704355, | |
| "grad_norm": 0.041482266038656235, | |
| "learning_rate": 0.0035077457304378964, | |
| "loss": 5.2875, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.3697868396663577, | |
| "grad_norm": 0.056157998740673065, | |
| "learning_rate": 0.003501080093184607, | |
| "loss": 5.3, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.3707136237256719, | |
| "grad_norm": 0.047049764543771744, | |
| "learning_rate": 0.0034944059694946494, | |
| "loss": 5.3, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3716404077849861, | |
| "grad_norm": 0.0425553135573864, | |
| "learning_rate": 0.0034877234159464412, | |
| "loss": 5.325, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.37256719184430026, | |
| "grad_norm": 0.036974068731069565, | |
| "learning_rate": 0.003481032489189862, | |
| "loss": 5.275, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.37349397590361444, | |
| "grad_norm": 0.038740385323762894, | |
| "learning_rate": 0.003474333245945775, | |
| "loss": 5.2438, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.3744207599629286, | |
| "grad_norm": 0.037295546382665634, | |
| "learning_rate": 0.0034676257430055436, | |
| "loss": 5.2688, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.3753475440222428, | |
| "grad_norm": 0.04598161205649376, | |
| "learning_rate": 0.00346091003723055, | |
| "loss": 5.2812, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.376274328081557, | |
| "grad_norm": 0.052688293159008026, | |
| "learning_rate": 0.003454186185551717, | |
| "loss": 5.2625, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.37720111214087115, | |
| "grad_norm": 0.0431685745716095, | |
| "learning_rate": 0.0034474542449690203, | |
| "loss": 5.2313, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.37812789620018533, | |
| "grad_norm": 0.047002580016851425, | |
| "learning_rate": 0.0034407142725510075, | |
| "loss": 5.25, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.3790546802594995, | |
| "grad_norm": 0.034174490720033646, | |
| "learning_rate": 0.003433966325434315, | |
| "loss": 5.2438, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.3799814643188137, | |
| "grad_norm": 0.037927597761154175, | |
| "learning_rate": 0.0034272104608231825, | |
| "loss": 5.2562, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3809082483781279, | |
| "grad_norm": 0.040478792041540146, | |
| "learning_rate": 0.003420446735988969, | |
| "loss": 5.25, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.3818350324374421, | |
| "grad_norm": 0.043072253465652466, | |
| "learning_rate": 0.0034136752082696664, | |
| "loss": 5.1688, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.3827618164967563, | |
| "grad_norm": 0.04011726379394531, | |
| "learning_rate": 0.003406895935069414, | |
| "loss": 5.2375, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.38368860055607046, | |
| "grad_norm": 0.056565847247838974, | |
| "learning_rate": 0.0034001089738580127, | |
| "loss": 5.2562, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.045512937009334564, | |
| "learning_rate": 0.0033933143821704343, | |
| "loss": 5.25, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.3855421686746988, | |
| "grad_norm": 0.05256471410393715, | |
| "learning_rate": 0.003386512217606339, | |
| "loss": 5.2375, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.386468952734013, | |
| "grad_norm": 0.055981192737817764, | |
| "learning_rate": 0.0033797025378295826, | |
| "loss": 5.2438, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.3873957367933272, | |
| "grad_norm": 0.06136908382177353, | |
| "learning_rate": 0.003372885400567731, | |
| "loss": 5.2375, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.38832252085264135, | |
| "grad_norm": 0.07198972254991531, | |
| "learning_rate": 0.003366060863611567, | |
| "loss": 5.225, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.38924930491195553, | |
| "grad_norm": 0.05037841945886612, | |
| "learning_rate": 0.003359228984814605, | |
| "loss": 5.1937, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3901760889712697, | |
| "grad_norm": 0.0768144503235817, | |
| "learning_rate": 0.0033523898220925974, | |
| "loss": 5.1875, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.3911028730305839, | |
| "grad_norm": 0.08858561515808105, | |
| "learning_rate": 0.003345543433423044, | |
| "loss": 5.2625, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.39202965708989806, | |
| "grad_norm": 0.10811244696378708, | |
| "learning_rate": 0.0033386898768447016, | |
| "loss": 5.2375, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.39295644114921224, | |
| "grad_norm": 0.11364039778709412, | |
| "learning_rate": 0.003331829210457091, | |
| "loss": 5.2812, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.3938832252085264, | |
| "grad_norm": 0.08991072326898575, | |
| "learning_rate": 0.0033249614924200054, | |
| "loss": 5.2188, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.3948100092678406, | |
| "grad_norm": 0.0634012222290039, | |
| "learning_rate": 0.003318086780953016, | |
| "loss": 5.1813, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.3957367933271548, | |
| "grad_norm": 0.07201571762561798, | |
| "learning_rate": 0.003311205134334979, | |
| "loss": 5.2, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.39666357738646896, | |
| "grad_norm": 0.0652351826429367, | |
| "learning_rate": 0.0033043166109035446, | |
| "loss": 5.2, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.39759036144578314, | |
| "grad_norm": 0.04549067094922066, | |
| "learning_rate": 0.0032974212690546558, | |
| "loss": 5.1875, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.3985171455050973, | |
| "grad_norm": 0.06608382612466812, | |
| "learning_rate": 0.0032905191672420596, | |
| "loss": 5.2313, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3994439295644115, | |
| "grad_norm": 0.04941621795296669, | |
| "learning_rate": 0.003283610363976809, | |
| "loss": 5.1375, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.40037071362372567, | |
| "grad_norm": 0.05331863835453987, | |
| "learning_rate": 0.0032766949178267657, | |
| "loss": 5.1188, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.40129749768303985, | |
| "grad_norm": 0.04874474182724953, | |
| "learning_rate": 0.003269772887416106, | |
| "loss": 5.1562, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.40222428174235403, | |
| "grad_norm": 0.05278300493955612, | |
| "learning_rate": 0.0032628443314248233, | |
| "loss": 5.1438, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.4031510658016682, | |
| "grad_norm": 0.04638415202498436, | |
| "learning_rate": 0.003255909308588229, | |
| "loss": 5.1438, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.4040778498609824, | |
| "grad_norm": 0.06462404876947403, | |
| "learning_rate": 0.003248967877696457, | |
| "loss": 5.1875, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.40500463392029656, | |
| "grad_norm": 0.04122454300522804, | |
| "learning_rate": 0.0032420200975939633, | |
| "loss": 5.1375, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.40593141797961074, | |
| "grad_norm": 0.05846314877271652, | |
| "learning_rate": 0.003235066027179028, | |
| "loss": 5.15, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.4068582020389249, | |
| "grad_norm": 0.06503690779209137, | |
| "learning_rate": 0.0032281057254032563, | |
| "loss": 5.1375, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.4077849860982391, | |
| "grad_norm": 0.05073606222867966, | |
| "learning_rate": 0.0032211392512710773, | |
| "loss": 5.0875, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4087117701575533, | |
| "grad_norm": 0.06046286225318909, | |
| "learning_rate": 0.003214166663839247, | |
| "loss": 5.1188, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.40963855421686746, | |
| "grad_norm": 0.03978972136974335, | |
| "learning_rate": 0.003207188022216343, | |
| "loss": 5.125, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.41056533827618164, | |
| "grad_norm": 0.04392355680465698, | |
| "learning_rate": 0.0032002033855622683, | |
| "loss": 5.125, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.4114921223354958, | |
| "grad_norm": 0.039449259638786316, | |
| "learning_rate": 0.003193212813087745, | |
| "loss": 5.125, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.41241890639481, | |
| "grad_norm": 0.04521370679140091, | |
| "learning_rate": 0.003186216364053818, | |
| "loss": 5.0813, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.41334569045412417, | |
| "grad_norm": 0.06002253293991089, | |
| "learning_rate": 0.003179214097771346, | |
| "loss": 5.0875, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.41427247451343835, | |
| "grad_norm": 0.07361883670091629, | |
| "learning_rate": 0.0031722060736005054, | |
| "loss": 5.1312, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.4151992585727525, | |
| "grad_norm": 0.06389747560024261, | |
| "learning_rate": 0.0031651923509502817, | |
| "loss": 5.0875, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.4161260426320667, | |
| "grad_norm": 0.07580303400754929, | |
| "learning_rate": 0.003158172989277968, | |
| "loss": 5.1438, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.4170528266913809, | |
| "grad_norm": 0.06630785763263702, | |
| "learning_rate": 0.0031511480480886623, | |
| "loss": 5.125, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.41797961075069506, | |
| "grad_norm": 0.05100114271044731, | |
| "learning_rate": 0.0031441175869347604, | |
| "loss": 5.0563, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.41890639481000924, | |
| "grad_norm": 0.044168341904878616, | |
| "learning_rate": 0.003137081665415453, | |
| "loss": 5.1063, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.4198331788693234, | |
| "grad_norm": 0.036300163716077805, | |
| "learning_rate": 0.0031300403431762202, | |
| "loss": 5.0938, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.42075996292863765, | |
| "grad_norm": 0.03885301947593689, | |
| "learning_rate": 0.003122993679908325, | |
| "loss": 5.075, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.42168674698795183, | |
| "grad_norm": 0.047411106526851654, | |
| "learning_rate": 0.0031159417353483075, | |
| "loss": 5.0813, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.422613531047266, | |
| "grad_norm": 0.04042837396264076, | |
| "learning_rate": 0.00310888456927748, | |
| "loss": 5.025, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.4235403151065802, | |
| "grad_norm": 0.0529557429254055, | |
| "learning_rate": 0.0031018222415214176, | |
| "loss": 5.0938, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.42446709916589437, | |
| "grad_norm": 0.03582127019762993, | |
| "learning_rate": 0.003094754811949453, | |
| "loss": 5.05, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.42539388322520855, | |
| "grad_norm": 0.04631989449262619, | |
| "learning_rate": 0.0030876823404741693, | |
| "loss": 5.0625, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.4263206672845227, | |
| "grad_norm": 0.05943077430129051, | |
| "learning_rate": 0.0030806048870508896, | |
| "loss": 5.0375, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.4272474513438369, | |
| "grad_norm": 0.04641159623861313, | |
| "learning_rate": 0.003073522511677171, | |
| "loss": 5.0687, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.4281742354031511, | |
| "grad_norm": 0.04967037960886955, | |
| "learning_rate": 0.0030664352743922964, | |
| "loss": 5.05, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.42910101946246526, | |
| "grad_norm": 0.05452379956841469, | |
| "learning_rate": 0.0030593432352767637, | |
| "loss": 5.0563, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.43002780352177944, | |
| "grad_norm": 0.05275031551718712, | |
| "learning_rate": 0.003052246454451776, | |
| "loss": 5.05, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.4309545875810936, | |
| "grad_norm": 0.0582866407930851, | |
| "learning_rate": 0.0030451449920787356, | |
| "loss": 5.0375, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.4318813716404078, | |
| "grad_norm": 0.07089794427156448, | |
| "learning_rate": 0.00303803890835873, | |
| "loss": 5.0813, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.432808155699722, | |
| "grad_norm": 0.05818159505724907, | |
| "learning_rate": 0.0030309282635320235, | |
| "loss": 5.025, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.43373493975903615, | |
| "grad_norm": 0.05577028915286064, | |
| "learning_rate": 0.0030238131178775465, | |
| "loss": 5.0312, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.43466172381835033, | |
| "grad_norm": 0.0684211254119873, | |
| "learning_rate": 0.0030166935317123824, | |
| "loss": 5.0, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.4355885078776645, | |
| "grad_norm": 0.06801000237464905, | |
| "learning_rate": 0.0030095695653912617, | |
| "loss": 5.0687, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.4365152919369787, | |
| "grad_norm": 0.07714419811964035, | |
| "learning_rate": 0.0030024412793060442, | |
| "loss": 5.05, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.43744207599629287, | |
| "grad_norm": 0.07117122411727905, | |
| "learning_rate": 0.0029953087338852086, | |
| "loss": 5.0375, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.43836886005560705, | |
| "grad_norm": 0.05810219794511795, | |
| "learning_rate": 0.002988171989593344, | |
| "loss": 5.0125, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.4392956441149212, | |
| "grad_norm": 0.0630822405219078, | |
| "learning_rate": 0.002981031106930632, | |
| "loss": 4.9938, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.4402224281742354, | |
| "grad_norm": 0.09144022315740585, | |
| "learning_rate": 0.002973886146432338, | |
| "loss": 5.05, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.4411492122335496, | |
| "grad_norm": 0.07084767520427704, | |
| "learning_rate": 0.002966737168668295, | |
| "loss": 5.0062, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.44207599629286376, | |
| "grad_norm": 0.048369865864515305, | |
| "learning_rate": 0.0029595842342423936, | |
| "loss": 4.9313, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.44300278035217794, | |
| "grad_norm": 0.05783843249082565, | |
| "learning_rate": 0.002952427403792063, | |
| "loss": 4.9375, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.4439295644114921, | |
| "grad_norm": 0.05991849675774574, | |
| "learning_rate": 0.002945266737987763, | |
| "loss": 4.9688, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.4448563484708063, | |
| "grad_norm": 0.05597536638379097, | |
| "learning_rate": 0.0029381022975324645, | |
| "loss": 5.0, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.4457831325301205, | |
| "grad_norm": 0.0695003792643547, | |
| "learning_rate": 0.0029309341431611397, | |
| "loss": 5.0125, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.44670991658943465, | |
| "grad_norm": 0.08234460651874542, | |
| "learning_rate": 0.002923762335640242, | |
| "loss": 5.0125, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.44763670064874883, | |
| "grad_norm": 0.07713950425386429, | |
| "learning_rate": 0.002916586935767195, | |
| "loss": 5.0125, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.448563484708063, | |
| "grad_norm": 0.07240517437458038, | |
| "learning_rate": 0.002909408004369877, | |
| "loss": 5.0125, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.4494902687673772, | |
| "grad_norm": 0.0547131672501564, | |
| "learning_rate": 0.0029022256023061004, | |
| "loss": 4.9625, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.45041705282669137, | |
| "grad_norm": 0.045404303818941116, | |
| "learning_rate": 0.0028950397904631033, | |
| "loss": 5.0, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.45134383688600554, | |
| "grad_norm": 0.05781068280339241, | |
| "learning_rate": 0.002887850629757026, | |
| "loss": 4.9563, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.4522706209453197, | |
| "grad_norm": 0.048498354852199554, | |
| "learning_rate": 0.0028806581811324007, | |
| "loss": 4.925, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.4531974050046339, | |
| "grad_norm": 0.039063528180122375, | |
| "learning_rate": 0.002873462505561632, | |
| "loss": 4.9688, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.4541241890639481, | |
| "grad_norm": 0.038773953914642334, | |
| "learning_rate": 0.002866263664044479, | |
| "loss": 4.9437, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.45505097312326226, | |
| "grad_norm": 0.058951422572135925, | |
| "learning_rate": 0.002859061717607539, | |
| "loss": 4.95, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.45597775718257644, | |
| "grad_norm": 0.058964647352695465, | |
| "learning_rate": 0.0028518567273037327, | |
| "loss": 4.9313, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.4569045412418906, | |
| "grad_norm": 0.05438453331589699, | |
| "learning_rate": 0.002844648754211783, | |
| "loss": 4.95, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.4578313253012048, | |
| "grad_norm": 0.04710723087191582, | |
| "learning_rate": 0.002837437859435698, | |
| "loss": 4.9062, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.458758109360519, | |
| "grad_norm": 0.0365031473338604, | |
| "learning_rate": 0.0028302241041042566, | |
| "loss": 4.9688, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.45968489341983315, | |
| "grad_norm": 0.03951582312583923, | |
| "learning_rate": 0.0028230075493704838, | |
| "loss": 4.9563, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.4606116774791474, | |
| "grad_norm": 0.04623804986476898, | |
| "learning_rate": 0.0028157882564111385, | |
| "loss": 4.9375, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 0.040012940764427185, | |
| "learning_rate": 0.002808566286426191, | |
| "loss": 4.925, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.46246524559777574, | |
| "grad_norm": 0.04338626191020012, | |
| "learning_rate": 0.0028013417006383075, | |
| "loss": 4.95, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.4633920296570899, | |
| "grad_norm": 0.0410669781267643, | |
| "learning_rate": 0.0027941145602923267, | |
| "loss": 4.9125, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4643188137164041, | |
| "grad_norm": 0.03322385624051094, | |
| "learning_rate": 0.0027868849266547437, | |
| "loss": 4.8875, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.4652455977757183, | |
| "grad_norm": 0.036676980555057526, | |
| "learning_rate": 0.00277965286101319, | |
| "loss": 4.95, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.46617238183503246, | |
| "grad_norm": 0.044222161173820496, | |
| "learning_rate": 0.0027724184246759147, | |
| "loss": 4.9125, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.46709916589434664, | |
| "grad_norm": 0.06456394493579865, | |
| "learning_rate": 0.002765181678971263, | |
| "loss": 4.9062, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.4680259499536608, | |
| "grad_norm": 0.0746362954378128, | |
| "learning_rate": 0.0027579426852471574, | |
| "loss": 4.8875, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.468952734012975, | |
| "grad_norm": 0.08617927134037018, | |
| "learning_rate": 0.0027507015048705776, | |
| "loss": 4.8938, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.46987951807228917, | |
| "grad_norm": 0.07306444644927979, | |
| "learning_rate": 0.00274345819922704, | |
| "loss": 4.9, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.47080630213160335, | |
| "grad_norm": 0.04307616129517555, | |
| "learning_rate": 0.0027362128297200783, | |
| "loss": 4.9062, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.4717330861909175, | |
| "grad_norm": 0.06619231402873993, | |
| "learning_rate": 0.0027289654577707214, | |
| "loss": 4.8938, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.4726598702502317, | |
| "grad_norm": 0.07649318128824234, | |
| "learning_rate": 0.002721716144816973, | |
| "loss": 4.8938, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4735866543095459, | |
| "grad_norm": 0.0643559917807579, | |
| "learning_rate": 0.002714464952313292, | |
| "loss": 4.825, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.47451343836886006, | |
| "grad_norm": 0.07730736583471298, | |
| "learning_rate": 0.0027072119417300713, | |
| "loss": 4.8812, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.47544022242817424, | |
| "grad_norm": 0.08054769784212112, | |
| "learning_rate": 0.002699957174553115, | |
| "loss": 4.9062, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.4763670064874884, | |
| "grad_norm": 0.06001604348421097, | |
| "learning_rate": 0.002692700712283119, | |
| "loss": 4.8938, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.4772937905468026, | |
| "grad_norm": 0.04911705106496811, | |
| "learning_rate": 0.0026854426164351483, | |
| "loss": 4.8625, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.4782205746061168, | |
| "grad_norm": 0.04762764275074005, | |
| "learning_rate": 0.002678182948538117, | |
| "loss": 4.8375, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.47914735866543096, | |
| "grad_norm": 0.045550934970378876, | |
| "learning_rate": 0.002670921770134266, | |
| "loss": 4.8938, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.48007414272474513, | |
| "grad_norm": 0.057238396257162094, | |
| "learning_rate": 0.00266365914277864, | |
| "loss": 4.8875, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.4810009267840593, | |
| "grad_norm": 0.053200677037239075, | |
| "learning_rate": 0.002656395128038568, | |
| "loss": 4.8438, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.4819277108433735, | |
| "grad_norm": 0.047585804015398026, | |
| "learning_rate": 0.00264912978749314, | |
| "loss": 4.8063, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.48285449490268767, | |
| "grad_norm": 0.05673938989639282, | |
| "learning_rate": 0.0026418631827326857, | |
| "loss": 4.8875, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.48378127896200185, | |
| "grad_norm": 0.05663244426250458, | |
| "learning_rate": 0.0026345953753582497, | |
| "loss": 4.9, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.484708063021316, | |
| "grad_norm": 0.04882281646132469, | |
| "learning_rate": 0.0026273264269810743, | |
| "loss": 4.8313, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.4856348470806302, | |
| "grad_norm": 0.0483589768409729, | |
| "learning_rate": 0.0026200563992220733, | |
| "loss": 4.8438, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.4865616311399444, | |
| "grad_norm": 0.05800378695130348, | |
| "learning_rate": 0.00261278535371131, | |
| "loss": 4.8125, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.48748841519925856, | |
| "grad_norm": 0.04723868519067764, | |
| "learning_rate": 0.002605513352087477, | |
| "loss": 4.7812, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.48841519925857274, | |
| "grad_norm": 0.051099590957164764, | |
| "learning_rate": 0.0025982404559973704, | |
| "loss": 4.8125, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.4893419833178869, | |
| "grad_norm": 0.05315464735031128, | |
| "learning_rate": 0.00259096672709537, | |
| "loss": 4.775, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.4902687673772011, | |
| "grad_norm": 0.05382310971617699, | |
| "learning_rate": 0.002583692227042916, | |
| "loss": 4.7812, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.4911955514365153, | |
| "grad_norm": 0.05870763957500458, | |
| "learning_rate": 0.002576417017507983, | |
| "loss": 4.8625, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.49212233549582945, | |
| "grad_norm": 0.03859548643231392, | |
| "learning_rate": 0.0025691411601645657, | |
| "loss": 4.7938, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.49304911955514363, | |
| "grad_norm": 0.05789710581302643, | |
| "learning_rate": 0.002561864716692145, | |
| "loss": 4.8438, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.4939759036144578, | |
| "grad_norm": 0.04865971952676773, | |
| "learning_rate": 0.0025545877487751735, | |
| "loss": 4.7812, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.494902687673772, | |
| "grad_norm": 0.05406877398490906, | |
| "learning_rate": 0.0025473103181025475, | |
| "loss": 4.8313, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.49582947173308617, | |
| "grad_norm": 0.051227353513240814, | |
| "learning_rate": 0.002540032486367089, | |
| "loss": 4.7562, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.49675625579240035, | |
| "grad_norm": 0.05123087763786316, | |
| "learning_rate": 0.002532754315265018, | |
| "loss": 4.8187, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.4976830398517145, | |
| "grad_norm": 0.04913110285997391, | |
| "learning_rate": 0.0025254758664954306, | |
| "loss": 4.8125, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.4986098239110287, | |
| "grad_norm": 0.04741792008280754, | |
| "learning_rate": 0.0025181972017597806, | |
| "loss": 4.7875, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.4995366079703429, | |
| "grad_norm": 0.055246248841285706, | |
| "learning_rate": 0.0025109183827613474, | |
| "loss": 4.8063, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.5004633920296571, | |
| "grad_norm": 0.037354640662670135, | |
| "learning_rate": 0.002503639471204722, | |
| "loss": 4.75, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5013901760889713, | |
| "grad_norm": 0.04416719824075699, | |
| "learning_rate": 0.002496360528795279, | |
| "loss": 4.7812, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.5023169601482854, | |
| "grad_norm": 0.04072472080588341, | |
| "learning_rate": 0.0024890816172386527, | |
| "loss": 4.75, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.5032437442075997, | |
| "grad_norm": 0.048542048782110214, | |
| "learning_rate": 0.002481802798240221, | |
| "loss": 4.7688, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.5041705282669138, | |
| "grad_norm": 0.05309506133198738, | |
| "learning_rate": 0.0024745241335045695, | |
| "loss": 4.775, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.505097312326228, | |
| "grad_norm": 0.037804365158081055, | |
| "learning_rate": 0.0024672456847349834, | |
| "loss": 4.75, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5060240963855421, | |
| "grad_norm": 0.045449260622262955, | |
| "learning_rate": 0.0024599675136329113, | |
| "loss": 4.7625, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.5069508804448564, | |
| "grad_norm": 0.046078864485025406, | |
| "learning_rate": 0.002452689681897453, | |
| "loss": 4.7688, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.5078776645041705, | |
| "grad_norm": 0.04518760368227959, | |
| "learning_rate": 0.002445412251224827, | |
| "loss": 4.7375, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.5088044485634847, | |
| "grad_norm": 0.03942165523767471, | |
| "learning_rate": 0.002438135283307855, | |
| "loss": 4.75, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.5097312326227988, | |
| "grad_norm": 0.045819394290447235, | |
| "learning_rate": 0.0024308588398354344, | |
| "loss": 4.7313, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5106580166821131, | |
| "grad_norm": 0.06149514392018318, | |
| "learning_rate": 0.002423582982492017, | |
| "loss": 4.7313, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.5115848007414272, | |
| "grad_norm": 0.06028604507446289, | |
| "learning_rate": 0.002416307772957085, | |
| "loss": 4.7438, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.5125115848007414, | |
| "grad_norm": 0.043709807097911835, | |
| "learning_rate": 0.002409033272904631, | |
| "loss": 4.7625, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.5134383688600556, | |
| "grad_norm": 0.042988523840904236, | |
| "learning_rate": 0.00240175954400263, | |
| "loss": 4.7562, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.5143651529193698, | |
| "grad_norm": 0.053336091339588165, | |
| "learning_rate": 0.002394486647912524, | |
| "loss": 4.6875, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.5152919369786839, | |
| "grad_norm": 0.061223022639751434, | |
| "learning_rate": 0.00238721464628869, | |
| "loss": 4.725, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.5162187210379982, | |
| "grad_norm": 0.0704147219657898, | |
| "learning_rate": 0.0023799436007779277, | |
| "loss": 4.6813, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.5171455050973123, | |
| "grad_norm": 0.06097421795129776, | |
| "learning_rate": 0.002372673573018926, | |
| "loss": 4.7625, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.5180722891566265, | |
| "grad_norm": 0.04165394976735115, | |
| "learning_rate": 0.0023654046246417513, | |
| "loss": 4.7125, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.5189990732159406, | |
| "grad_norm": 0.040571633726358414, | |
| "learning_rate": 0.0023581368172673153, | |
| "loss": 4.7625, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5199258572752549, | |
| "grad_norm": 0.04544011875987053, | |
| "learning_rate": 0.0023508702125068608, | |
| "loss": 4.7625, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.5208526413345691, | |
| "grad_norm": 0.04342002421617508, | |
| "learning_rate": 0.0023436048719614323, | |
| "loss": 4.7313, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.5217794253938832, | |
| "grad_norm": 0.041976965963840485, | |
| "learning_rate": 0.00233634085722136, | |
| "loss": 4.7313, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.5227062094531975, | |
| "grad_norm": 0.0512029230594635, | |
| "learning_rate": 0.0023290782298657346, | |
| "loss": 4.6937, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.5236329935125116, | |
| "grad_norm": 0.06346142292022705, | |
| "learning_rate": 0.002321817051461883, | |
| "loss": 4.675, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.5245597775718258, | |
| "grad_norm": 0.05272765830159187, | |
| "learning_rate": 0.002314557383564852, | |
| "loss": 4.75, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.5254865616311399, | |
| "grad_norm": 0.038122035562992096, | |
| "learning_rate": 0.002307299287716881, | |
| "loss": 4.7125, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.5264133456904542, | |
| "grad_norm": 0.042520515620708466, | |
| "learning_rate": 0.0023000428254468853, | |
| "loss": 4.6875, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.5273401297497683, | |
| "grad_norm": 0.05327059328556061, | |
| "learning_rate": 0.0022927880582699284, | |
| "loss": 4.7438, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.5282669138090825, | |
| "grad_norm": 0.10062926262617111, | |
| "learning_rate": 0.0022855350476867083, | |
| "loss": 5.4125, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5291936978683967, | |
| "grad_norm": 0.19139476120471954, | |
| "learning_rate": 0.002278283855183027, | |
| "loss": 5.9375, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.5301204819277109, | |
| "grad_norm": 0.30302053689956665, | |
| "learning_rate": 0.002271034542229279, | |
| "loss": 6.1438, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.531047265987025, | |
| "grad_norm": 0.3599642515182495, | |
| "learning_rate": 0.002263787170279922, | |
| "loss": 6.125, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.5319740500463392, | |
| "grad_norm": 0.2241661548614502, | |
| "learning_rate": 0.00225654180077296, | |
| "loss": 5.9938, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.5329008341056534, | |
| "grad_norm": 0.10801433026790619, | |
| "learning_rate": 0.0022492984951294225, | |
| "loss": 5.7938, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.5338276181649676, | |
| "grad_norm": 0.11764154583215714, | |
| "learning_rate": 0.0022420573147528436, | |
| "loss": 5.7812, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.5347544022242817, | |
| "grad_norm": 0.08790837973356247, | |
| "learning_rate": 0.002234818321028737, | |
| "loss": 5.7375, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.535681186283596, | |
| "grad_norm": 0.06823479384183884, | |
| "learning_rate": 0.002227581575324086, | |
| "loss": 5.6438, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.5366079703429101, | |
| "grad_norm": 0.0775035172700882, | |
| "learning_rate": 0.00222034713898681, | |
| "loss": 5.6375, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.5375347544022243, | |
| "grad_norm": 0.05802862346172333, | |
| "learning_rate": 0.0022131150733452573, | |
| "loss": 5.5687, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5384615384615384, | |
| "grad_norm": 0.058500614017248154, | |
| "learning_rate": 0.0022058854397076734, | |
| "loss": 5.5438, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.5393883225208527, | |
| "grad_norm": 0.055464208126068115, | |
| "learning_rate": 0.0021986582993616926, | |
| "loss": 5.5, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.5403151065801668, | |
| "grad_norm": 0.041989766061306, | |
| "learning_rate": 0.0021914337135738086, | |
| "loss": 5.4563, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.541241890639481, | |
| "grad_norm": 0.05176004022359848, | |
| "learning_rate": 0.0021842117435888625, | |
| "loss": 5.45, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.5421686746987951, | |
| "grad_norm": 0.058837149292230606, | |
| "learning_rate": 0.0021769924506295168, | |
| "loss": 5.4563, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.5430954587581094, | |
| "grad_norm": 0.04392680153250694, | |
| "learning_rate": 0.002169775895895745, | |
| "loss": 5.4062, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.5440222428174235, | |
| "grad_norm": 0.05528188496828079, | |
| "learning_rate": 0.002162562140564302, | |
| "loss": 5.375, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.5449490268767377, | |
| "grad_norm": 0.04781576246023178, | |
| "learning_rate": 0.002155351245788218, | |
| "loss": 5.3938, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.5458758109360519, | |
| "grad_norm": 0.0435294434428215, | |
| "learning_rate": 0.002148143272696268, | |
| "loss": 5.3, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.5468025949953661, | |
| "grad_norm": 0.04509313404560089, | |
| "learning_rate": 0.002140938282392461, | |
| "loss": 5.35, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5477293790546802, | |
| "grad_norm": 0.03679104149341583, | |
| "learning_rate": 0.002133736335955522, | |
| "loss": 5.2688, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.5486561631139945, | |
| "grad_norm": 0.05090980976819992, | |
| "learning_rate": 0.0021265374944383682, | |
| "loss": 5.2812, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.5495829471733086, | |
| "grad_norm": 0.03438156098127365, | |
| "learning_rate": 0.0021193418188675994, | |
| "loss": 5.2688, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.5505097312326228, | |
| "grad_norm": 0.03302653878927231, | |
| "learning_rate": 0.002112149370242975, | |
| "loss": 5.25, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.5514365152919369, | |
| "grad_norm": 0.039244670420885086, | |
| "learning_rate": 0.0021049602095368973, | |
| "loss": 5.2063, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.5523632993512512, | |
| "grad_norm": 0.03585642948746681, | |
| "learning_rate": 0.0020977743976939005, | |
| "loss": 5.275, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.5532900834105653, | |
| "grad_norm": 0.03510696068406105, | |
| "learning_rate": 0.0020905919956301236, | |
| "loss": 5.2438, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.5542168674698795, | |
| "grad_norm": 0.03569590672850609, | |
| "learning_rate": 0.0020834130642328054, | |
| "loss": 5.175, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.5551436515291936, | |
| "grad_norm": 0.030981766059994698, | |
| "learning_rate": 0.0020762376643597585, | |
| "loss": 5.2, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.5560704355885079, | |
| "grad_norm": 0.04017426446080208, | |
| "learning_rate": 0.0020690658568388613, | |
| "loss": 5.15, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.556997219647822, | |
| "grad_norm": 0.039772696793079376, | |
| "learning_rate": 0.0020618977024675356, | |
| "loss": 5.125, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.5579240037071362, | |
| "grad_norm": 0.043551571667194366, | |
| "learning_rate": 0.002054733262012238, | |
| "loss": 5.1438, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.5588507877664504, | |
| "grad_norm": 0.03988911956548691, | |
| "learning_rate": 0.0020475725962079373, | |
| "loss": 5.1688, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.5597775718257646, | |
| "grad_norm": 0.03845544904470444, | |
| "learning_rate": 0.0020404157657576073, | |
| "loss": 5.1375, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.5607043558850788, | |
| "grad_norm": 0.048617441207170486, | |
| "learning_rate": 0.002033262831331705, | |
| "loss": 5.15, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.561631139944393, | |
| "grad_norm": 0.03950534015893936, | |
| "learning_rate": 0.0020261138535676614, | |
| "loss": 5.1312, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.5625579240037072, | |
| "grad_norm": 0.04601586237549782, | |
| "learning_rate": 0.002018968893069368, | |
| "loss": 5.0687, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.5634847080630213, | |
| "grad_norm": 0.048377152532339096, | |
| "learning_rate": 0.002011828010406656, | |
| "loss": 5.0625, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.5644114921223355, | |
| "grad_norm": 0.04253297671675682, | |
| "learning_rate": 0.0020046912661147915, | |
| "loss": 5.1, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.5653382761816497, | |
| "grad_norm": 0.04242146387696266, | |
| "learning_rate": 0.001997558720693956, | |
| "loss": 5.0813, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5662650602409639, | |
| "grad_norm": 0.042660947889089584, | |
| "learning_rate": 0.001990430434608739, | |
| "loss": 5.1188, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.567191844300278, | |
| "grad_norm": 0.03864769637584686, | |
| "learning_rate": 0.0019833064682876177, | |
| "loss": 5.0625, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.5681186283595923, | |
| "grad_norm": 0.03322991728782654, | |
| "learning_rate": 0.0019761868821224545, | |
| "loss": 5.0375, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.5690454124189064, | |
| "grad_norm": 0.032155055552721024, | |
| "learning_rate": 0.001969071736467977, | |
| "loss": 5.0687, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.5699721964782206, | |
| "grad_norm": 0.04553236439824104, | |
| "learning_rate": 0.0019619610916412704, | |
| "loss": 5.1, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.5708989805375347, | |
| "grad_norm": 0.039135731756687164, | |
| "learning_rate": 0.001954855007921265, | |
| "loss": 5.025, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.571825764596849, | |
| "grad_norm": 0.03503022342920303, | |
| "learning_rate": 0.0019477535455482242, | |
| "loss": 5.0312, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.5727525486561631, | |
| "grad_norm": 0.02648424543440342, | |
| "learning_rate": 0.0019406567647232366, | |
| "loss": 5.0125, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.5736793327154773, | |
| "grad_norm": 0.030889399349689484, | |
| "learning_rate": 0.0019335647256077037, | |
| "loss": 5.0312, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.5746061167747915, | |
| "grad_norm": 0.028193505480885506, | |
| "learning_rate": 0.0019264774883228286, | |
| "loss": 5.0563, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5755329008341057, | |
| "grad_norm": 0.039721377193927765, | |
| "learning_rate": 0.0019193951129491112, | |
| "loss": 4.9563, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.5764596848934198, | |
| "grad_norm": 0.0343133881688118, | |
| "learning_rate": 0.0019123176595258306, | |
| "loss": 5.0, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.577386468952734, | |
| "grad_norm": 0.03925079479813576, | |
| "learning_rate": 0.0019052451880505472, | |
| "loss": 5.05, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.5783132530120482, | |
| "grad_norm": 0.061298515647649765, | |
| "learning_rate": 0.0018981777584785823, | |
| "loss": 5.0, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.5792400370713624, | |
| "grad_norm": 0.045300450176000595, | |
| "learning_rate": 0.0018911154307225204, | |
| "loss": 4.975, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.5801668211306765, | |
| "grad_norm": 0.03320182114839554, | |
| "learning_rate": 0.0018840582646516924, | |
| "loss": 4.9938, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.5810936051899908, | |
| "grad_norm": 0.04246627911925316, | |
| "learning_rate": 0.0018770063200916757, | |
| "loss": 4.9625, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.5820203892493049, | |
| "grad_norm": 0.04181812331080437, | |
| "learning_rate": 0.0018699596568237799, | |
| "loss": 4.9875, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.5829471733086191, | |
| "grad_norm": 0.038650691509246826, | |
| "learning_rate": 0.0018629183345845477, | |
| "loss": 4.9625, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.5838739573679332, | |
| "grad_norm": 0.03198286145925522, | |
| "learning_rate": 0.0018558824130652399, | |
| "loss": 4.9125, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.5848007414272475, | |
| "grad_norm": 0.030322790145874023, | |
| "learning_rate": 0.0018488519519113387, | |
| "loss": 4.9563, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.5857275254865616, | |
| "grad_norm": 0.03637656942009926, | |
| "learning_rate": 0.0018418270107220325, | |
| "loss": 4.9625, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.5866543095458758, | |
| "grad_norm": 0.03812320902943611, | |
| "learning_rate": 0.001834807649049719, | |
| "loss": 4.9062, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.58758109360519, | |
| "grad_norm": 0.038305167108774185, | |
| "learning_rate": 0.001827793926399495, | |
| "loss": 4.9062, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.5885078776645042, | |
| "grad_norm": 0.03868838772177696, | |
| "learning_rate": 0.0018207859022286543, | |
| "loss": 4.95, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.5894346617238183, | |
| "grad_norm": 0.05012492835521698, | |
| "learning_rate": 0.0018137836359461822, | |
| "loss": 4.9125, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.5903614457831325, | |
| "grad_norm": 0.04664972424507141, | |
| "learning_rate": 0.0018067871869122559, | |
| "loss": 4.9188, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.5912882298424467, | |
| "grad_norm": 0.03777710720896721, | |
| "learning_rate": 0.0017997966144377327, | |
| "loss": 4.9, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.5922150139017609, | |
| "grad_norm": 0.04331712797284126, | |
| "learning_rate": 0.0017928119777836581, | |
| "loss": 4.9062, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.593141797961075, | |
| "grad_norm": 0.04469927027821541, | |
| "learning_rate": 0.0017858333361607537, | |
| "loss": 4.9188, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5940685820203893, | |
| "grad_norm": 0.02936607599258423, | |
| "learning_rate": 0.0017788607487289232, | |
| "loss": 4.9188, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.5949953660797034, | |
| "grad_norm": 0.05327693372964859, | |
| "learning_rate": 0.0017718942745967442, | |
| "loss": 4.9437, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.5959221501390176, | |
| "grad_norm": 0.04499313235282898, | |
| "learning_rate": 0.0017649339728209726, | |
| "loss": 4.9125, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.5968489341983317, | |
| "grad_norm": 0.03915273770689964, | |
| "learning_rate": 0.0017579799024060366, | |
| "loss": 4.9437, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.597775718257646, | |
| "grad_norm": 0.04526703059673309, | |
| "learning_rate": 0.0017510321223035436, | |
| "loss": 4.9062, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.5987025023169601, | |
| "grad_norm": 0.05192454531788826, | |
| "learning_rate": 0.001744090691411771, | |
| "loss": 4.8438, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.5996292863762743, | |
| "grad_norm": 0.06659810990095139, | |
| "learning_rate": 0.0017371556685751776, | |
| "loss": 4.875, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.6005560704355886, | |
| "grad_norm": 0.026750769466161728, | |
| "learning_rate": 0.0017302271125838944, | |
| "loss": 4.8688, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.6014828544949027, | |
| "grad_norm": 0.05911999195814133, | |
| "learning_rate": 0.0017233050821732344, | |
| "loss": 4.9125, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.6024096385542169, | |
| "grad_norm": 0.046929407864809036, | |
| "learning_rate": 0.0017163896360231918, | |
| "loss": 4.8438, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.603336422613531, | |
| "grad_norm": 0.0461881086230278, | |
| "learning_rate": 0.00170948083275794, | |
| "loss": 4.8812, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.6042632066728453, | |
| "grad_norm": 0.041216105222702026, | |
| "learning_rate": 0.0017025787309453443, | |
| "loss": 4.8625, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.6051899907321594, | |
| "grad_norm": 0.047575026750564575, | |
| "learning_rate": 0.001695683389096455, | |
| "loss": 4.8625, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.6061167747914736, | |
| "grad_norm": 0.039946090430021286, | |
| "learning_rate": 0.001688794865665021, | |
| "loss": 4.8688, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.6070435588507878, | |
| "grad_norm": 0.03767408803105354, | |
| "learning_rate": 0.0016819132190469843, | |
| "loss": 4.8563, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.607970342910102, | |
| "grad_norm": 0.046980541199445724, | |
| "learning_rate": 0.0016750385075799952, | |
| "loss": 4.8438, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.6088971269694161, | |
| "grad_norm": 0.04574093222618103, | |
| "learning_rate": 0.0016681707895429094, | |
| "loss": 4.8563, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.6098239110287303, | |
| "grad_norm": 0.049847353249788284, | |
| "learning_rate": 0.0016613101231552987, | |
| "loss": 4.8563, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.6107506950880445, | |
| "grad_norm": 0.04778432473540306, | |
| "learning_rate": 0.0016544565665769558, | |
| "loss": 4.8625, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.6116774791473587, | |
| "grad_norm": 0.042641252279281616, | |
| "learning_rate": 0.001647610177907403, | |
| "loss": 4.875, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6126042632066728, | |
| "grad_norm": 0.03789043426513672, | |
| "learning_rate": 0.001640771015185395, | |
| "loss": 4.8563, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.6135310472659871, | |
| "grad_norm": 0.0583447702229023, | |
| "learning_rate": 0.0016339391363884334, | |
| "loss": 4.7875, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.6144578313253012, | |
| "grad_norm": 0.047329407185316086, | |
| "learning_rate": 0.0016271145994322693, | |
| "loss": 4.8313, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 0.051290351897478104, | |
| "learning_rate": 0.0016202974621704175, | |
| "loss": 4.8438, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.6163113994439295, | |
| "grad_norm": 0.04638203606009483, | |
| "learning_rate": 0.0016134877823936607, | |
| "loss": 4.8187, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.6172381835032438, | |
| "grad_norm": 0.0436415858566761, | |
| "learning_rate": 0.0016066856178295658, | |
| "loss": 4.8063, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.6181649675625579, | |
| "grad_norm": 0.05077355355024338, | |
| "learning_rate": 0.0015998910261419874, | |
| "loss": 4.8063, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.6190917516218721, | |
| "grad_norm": 0.05078209191560745, | |
| "learning_rate": 0.0015931040649305862, | |
| "loss": 4.8688, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.6200185356811863, | |
| "grad_norm": 0.06357160210609436, | |
| "learning_rate": 0.0015863247917303337, | |
| "loss": 4.8313, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.6209453197405005, | |
| "grad_norm": 0.03996184095740318, | |
| "learning_rate": 0.0015795532640110316, | |
| "loss": 4.8688, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6218721037998146, | |
| "grad_norm": 0.05953163281083107, | |
| "learning_rate": 0.0015727895391768176, | |
| "loss": 4.7938, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.6227988878591288, | |
| "grad_norm": 0.05362982302904129, | |
| "learning_rate": 0.0015660336745656862, | |
| "loss": 4.7875, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.623725671918443, | |
| "grad_norm": 0.03395141288638115, | |
| "learning_rate": 0.001559285727448993, | |
| "loss": 4.7875, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.6246524559777572, | |
| "grad_norm": 0.06038745865225792, | |
| "learning_rate": 0.0015525457550309802, | |
| "loss": 4.775, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.6255792400370713, | |
| "grad_norm": 0.04683006927371025, | |
| "learning_rate": 0.0015458138144482832, | |
| "loss": 4.8625, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.6265060240963856, | |
| "grad_norm": 0.04466160014271736, | |
| "learning_rate": 0.0015390899627694505, | |
| "loss": 4.7812, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.6274328081556997, | |
| "grad_norm": 0.054469116032123566, | |
| "learning_rate": 0.0015323742569944572, | |
| "loss": 4.775, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.6283595922150139, | |
| "grad_norm": 0.05092649534344673, | |
| "learning_rate": 0.001525666754054226, | |
| "loss": 4.775, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.629286376274328, | |
| "grad_norm": 0.04114770516753197, | |
| "learning_rate": 0.0015189675108101385, | |
| "loss": 4.8063, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.6302131603336423, | |
| "grad_norm": 0.04045185446739197, | |
| "learning_rate": 0.0015122765840535602, | |
| "loss": 4.8063, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.6311399443929564, | |
| "grad_norm": 0.04068306088447571, | |
| "learning_rate": 0.0015055940305053511, | |
| "loss": 4.7688, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.6320667284522706, | |
| "grad_norm": 0.048991914838552475, | |
| "learning_rate": 0.0014989199068153936, | |
| "loss": 4.7812, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.6329935125115848, | |
| "grad_norm": 0.04630220681428909, | |
| "learning_rate": 0.0014922542695621041, | |
| "loss": 4.8313, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.633920296570899, | |
| "grad_norm": 0.05090312659740448, | |
| "learning_rate": 0.0014855971752519607, | |
| "loss": 4.75, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.6348470806302131, | |
| "grad_norm": 0.03676120191812515, | |
| "learning_rate": 0.001478948680319016, | |
| "loss": 4.775, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.6357738646895273, | |
| "grad_norm": 0.04959641024470329, | |
| "learning_rate": 0.001472308841124429, | |
| "loss": 4.8063, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.6367006487488415, | |
| "grad_norm": 0.04228943958878517, | |
| "learning_rate": 0.0014656777139559754, | |
| "loss": 4.8125, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.6376274328081557, | |
| "grad_norm": 0.04116208478808403, | |
| "learning_rate": 0.001459055355027582, | |
| "loss": 4.7562, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.6385542168674698, | |
| "grad_norm": 0.05446736142039299, | |
| "learning_rate": 0.0014524418204788405, | |
| "loss": 4.75, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.6394810009267841, | |
| "grad_norm": 0.04483804479241371, | |
| "learning_rate": 0.0014458371663745402, | |
| "loss": 4.7688, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.6404077849860983, | |
| "grad_norm": 0.04954027384519577, | |
| "learning_rate": 0.0014392414487041838, | |
| "loss": 4.6937, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.6413345690454124, | |
| "grad_norm": 0.043852776288986206, | |
| "learning_rate": 0.00143265472338152, | |
| "loss": 4.7938, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.6422613531047267, | |
| "grad_norm": 0.046749938279390335, | |
| "learning_rate": 0.001426077046244068, | |
| "loss": 4.7688, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.6431881371640408, | |
| "grad_norm": 0.05037090927362442, | |
| "learning_rate": 0.0014195084730526395, | |
| "loss": 4.7562, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.644114921223355, | |
| "grad_norm": 0.0452822744846344, | |
| "learning_rate": 0.0014129490594908729, | |
| "loss": 4.8, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.6450417052826691, | |
| "grad_norm": 0.03884583339095116, | |
| "learning_rate": 0.001406398861164754, | |
| "loss": 4.725, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.6459684893419834, | |
| "grad_norm": 0.04877614974975586, | |
| "learning_rate": 0.0013998579336021535, | |
| "loss": 4.7063, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.6468952734012975, | |
| "grad_norm": 0.043750159442424774, | |
| "learning_rate": 0.0013933263322523466, | |
| "loss": 4.7063, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.6478220574606117, | |
| "grad_norm": 0.047424763441085815, | |
| "learning_rate": 0.0013868041124855508, | |
| "loss": 4.7562, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.6487488415199258, | |
| "grad_norm": 0.044932421296834946, | |
| "learning_rate": 0.0013802913295924508, | |
| "loss": 4.6875, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6496756255792401, | |
| "grad_norm": 0.03677170351147652, | |
| "learning_rate": 0.0013737880387837348, | |
| "loss": 4.7688, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.6506024096385542, | |
| "grad_norm": 0.049118272960186005, | |
| "learning_rate": 0.0013672942951896206, | |
| "loss": 4.7188, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.6515291936978684, | |
| "grad_norm": 0.06206013634800911, | |
| "learning_rate": 0.0013608101538593964, | |
| "loss": 4.75, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.6524559777571826, | |
| "grad_norm": 0.045777998864650726, | |
| "learning_rate": 0.0013543356697609439, | |
| "loss": 4.8063, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.6533827618164968, | |
| "grad_norm": 0.06643692404031754, | |
| "learning_rate": 0.0013478708977802823, | |
| "loss": 4.7438, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.6543095458758109, | |
| "grad_norm": 0.05065048485994339, | |
| "learning_rate": 0.0013414158927210946, | |
| "loss": 4.7375, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.6552363299351252, | |
| "grad_norm": 0.047690439969301224, | |
| "learning_rate": 0.0013349707093042707, | |
| "loss": 4.75, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.6561631139944393, | |
| "grad_norm": 0.05915187671780586, | |
| "learning_rate": 0.0013285354021674361, | |
| "loss": 4.675, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.6570898980537535, | |
| "grad_norm": 0.04628239572048187, | |
| "learning_rate": 0.0013221100258644957, | |
| "loss": 4.7375, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.6580166821130676, | |
| "grad_norm": 0.04324619472026825, | |
| "learning_rate": 0.0013156946348651644, | |
| "loss": 4.7, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.6589434661723819, | |
| "grad_norm": 0.048746492713689804, | |
| "learning_rate": 0.0013092892835545123, | |
| "loss": 4.7438, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.659870250231696, | |
| "grad_norm": 0.04211176931858063, | |
| "learning_rate": 0.001302894026232497, | |
| "loss": 4.7188, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.6607970342910102, | |
| "grad_norm": 0.04411826655268669, | |
| "learning_rate": 0.0012965089171135097, | |
| "loss": 4.7375, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.6617238183503243, | |
| "grad_norm": 0.049165111035108566, | |
| "learning_rate": 0.0012901340103259097, | |
| "loss": 4.7, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.6626506024096386, | |
| "grad_norm": 0.04350108280777931, | |
| "learning_rate": 0.0012837693599115707, | |
| "loss": 4.6813, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.6635773864689527, | |
| "grad_norm": 0.053538527339696884, | |
| "learning_rate": 0.001277415019825417, | |
| "loss": 4.7375, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.6645041705282669, | |
| "grad_norm": 0.03999413177371025, | |
| "learning_rate": 0.0012710710439349739, | |
| "loss": 4.6625, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.6654309545875811, | |
| "grad_norm": 0.05112524330615997, | |
| "learning_rate": 0.0012647374860199018, | |
| "loss": 4.7375, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.6663577386468953, | |
| "grad_norm": 0.03731364756822586, | |
| "learning_rate": 0.0012584143997715486, | |
| "loss": 4.6625, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.6672845227062094, | |
| "grad_norm": 0.036096684634685516, | |
| "learning_rate": 0.0012521018387924884, | |
| "loss": 4.7, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6682113067655236, | |
| "grad_norm": 0.040185850113630295, | |
| "learning_rate": 0.0012457998565960724, | |
| "loss": 4.7, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.6691380908248378, | |
| "grad_norm": 0.03686061128973961, | |
| "learning_rate": 0.0012395085066059686, | |
| "loss": 4.7125, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.670064874884152, | |
| "grad_norm": 0.04309338331222534, | |
| "learning_rate": 0.0012332278421557175, | |
| "loss": 4.6875, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.6709916589434661, | |
| "grad_norm": 0.033990684896707535, | |
| "learning_rate": 0.0012269579164882706, | |
| "loss": 4.7, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.6719184430027804, | |
| "grad_norm": 0.06331422179937363, | |
| "learning_rate": 0.0012206987827555469, | |
| "loss": 4.6875, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.6728452270620945, | |
| "grad_norm": 0.05111413821578026, | |
| "learning_rate": 0.0012144504940179793, | |
| "loss": 4.6625, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.6737720111214087, | |
| "grad_norm": 0.039602335542440414, | |
| "learning_rate": 0.0012082131032440616, | |
| "loss": 4.6562, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.6746987951807228, | |
| "grad_norm": 0.0525193028151989, | |
| "learning_rate": 0.0012019866633099052, | |
| "loss": 4.6562, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.6756255792400371, | |
| "grad_norm": 0.04521778225898743, | |
| "learning_rate": 0.001195771226998789, | |
| "loss": 4.675, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.6765523632993512, | |
| "grad_norm": 0.042900171130895615, | |
| "learning_rate": 0.0011895668470007067, | |
| "loss": 4.675, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.6774791473586654, | |
| "grad_norm": 0.046152036637067795, | |
| "learning_rate": 0.0011833735759119303, | |
| "loss": 4.6375, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.6784059314179796, | |
| "grad_norm": 0.03777175024151802, | |
| "learning_rate": 0.0011771914662345527, | |
| "loss": 4.7125, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.6793327154772938, | |
| "grad_norm": 0.04087323322892189, | |
| "learning_rate": 0.0011710205703760535, | |
| "loss": 4.6875, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.680259499536608, | |
| "grad_norm": 0.03955033794045448, | |
| "learning_rate": 0.0011648609406488455, | |
| "loss": 4.6562, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.6811862835959221, | |
| "grad_norm": 0.030934706330299377, | |
| "learning_rate": 0.001158712629269838, | |
| "loss": 4.6438, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.6821130676552364, | |
| "grad_norm": 0.03988910838961601, | |
| "learning_rate": 0.0011525756883599915, | |
| "loss": 4.6438, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.6830398517145505, | |
| "grad_norm": 0.03788105770945549, | |
| "learning_rate": 0.0011464501699438728, | |
| "loss": 4.65, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.6839666357738647, | |
| "grad_norm": 0.04469624534249306, | |
| "learning_rate": 0.0011403361259492218, | |
| "loss": 4.6937, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.6848934198331789, | |
| "grad_norm": 0.04028180614113808, | |
| "learning_rate": 0.001134233608206502, | |
| "loss": 4.65, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.6858202038924931, | |
| "grad_norm": 0.04203322157263756, | |
| "learning_rate": 0.0011281426684484686, | |
| "loss": 4.65, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.6867469879518072, | |
| "grad_norm": 0.045880451798439026, | |
| "learning_rate": 0.0011220633583097247, | |
| "loss": 4.65, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.6876737720111215, | |
| "grad_norm": 0.0346485935151577, | |
| "learning_rate": 0.0011159957293262886, | |
| "loss": 4.6562, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.6886005560704356, | |
| "grad_norm": 0.048363398760557175, | |
| "learning_rate": 0.0011099398329351515, | |
| "loss": 4.6438, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.6895273401297498, | |
| "grad_norm": 0.0373103991150856, | |
| "learning_rate": 0.0011038957204738465, | |
| "loss": 4.6813, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.6904541241890639, | |
| "grad_norm": 0.043777722865343094, | |
| "learning_rate": 0.001097863443180008, | |
| "loss": 4.6688, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.6913809082483782, | |
| "grad_norm": 0.03708568960428238, | |
| "learning_rate": 0.0010918430521909442, | |
| "loss": 4.6688, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.6923076923076923, | |
| "grad_norm": 0.04273151233792305, | |
| "learning_rate": 0.0010858345985431956, | |
| "loss": 4.6312, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.6932344763670065, | |
| "grad_norm": 0.04535781592130661, | |
| "learning_rate": 0.0010798381331721108, | |
| "loss": 4.675, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.6941612604263206, | |
| "grad_norm": 0.03782697021961212, | |
| "learning_rate": 0.0010738537069114062, | |
| "loss": 4.675, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.6950880444856349, | |
| "grad_norm": 0.04372243955731392, | |
| "learning_rate": 0.0010678813704927434, | |
| "loss": 4.6625, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.696014828544949, | |
| "grad_norm": 0.04960807040333748, | |
| "learning_rate": 0.0010619211745452912, | |
| "loss": 4.6375, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.6969416126042632, | |
| "grad_norm": 0.040741242468357086, | |
| "learning_rate": 0.001055973169595303, | |
| "loss": 4.6375, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.6978683966635774, | |
| "grad_norm": 0.04263027384877205, | |
| "learning_rate": 0.0010500374060656839, | |
| "loss": 4.5938, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.6987951807228916, | |
| "grad_norm": 0.046234361827373505, | |
| "learning_rate": 0.001044113934275567, | |
| "loss": 4.6688, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.6997219647822057, | |
| "grad_norm": 0.03574342280626297, | |
| "learning_rate": 0.0010382028044398823, | |
| "loss": 4.6375, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.70064874884152, | |
| "grad_norm": 0.044964589178562164, | |
| "learning_rate": 0.0010323040666689366, | |
| "loss": 4.6312, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.7015755329008341, | |
| "grad_norm": 0.037156179547309875, | |
| "learning_rate": 0.001026417770967982, | |
| "loss": 4.6188, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.7025023169601483, | |
| "grad_norm": 0.046747058629989624, | |
| "learning_rate": 0.0010205439672368, | |
| "loss": 4.5875, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.7034291010194624, | |
| "grad_norm": 0.042588070034980774, | |
| "learning_rate": 0.0010146827052692701, | |
| "loss": 4.6125, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.7043558850787767, | |
| "grad_norm": 0.036094602197408676, | |
| "learning_rate": 0.0010088340347529552, | |
| "loss": 4.6625, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7052826691380908, | |
| "grad_norm": 0.03903704881668091, | |
| "learning_rate": 0.0010029980052686733, | |
| "loss": 4.5875, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.706209453197405, | |
| "grad_norm": 0.045382946729660034, | |
| "learning_rate": 0.0009971746662900851, | |
| "loss": 4.6375, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.7071362372567191, | |
| "grad_norm": 0.04216109961271286, | |
| "learning_rate": 0.0009913640671832663, | |
| "loss": 4.6063, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.7080630213160334, | |
| "grad_norm": 0.044599149376153946, | |
| "learning_rate": 0.0009855662572062962, | |
| "loss": 4.625, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.7089898053753475, | |
| "grad_norm": 0.0511021688580513, | |
| "learning_rate": 0.0009797812855088348, | |
| "loss": 4.5875, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.7099165894346617, | |
| "grad_norm": 0.04359891265630722, | |
| "learning_rate": 0.0009740092011317095, | |
| "loss": 4.6688, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.7108433734939759, | |
| "grad_norm": 0.047334376722574234, | |
| "learning_rate": 0.0009682500530064992, | |
| "loss": 4.5875, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.7117701575532901, | |
| "grad_norm": 0.04199070855975151, | |
| "learning_rate": 0.0009625038899551161, | |
| "loss": 4.625, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.7126969416126042, | |
| "grad_norm": 0.057890091091394424, | |
| "learning_rate": 0.0009567707606893971, | |
| "loss": 4.6125, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.7136237256719185, | |
| "grad_norm": 0.04788359999656677, | |
| "learning_rate": 0.0009510507138106853, | |
| "loss": 4.5875, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7145505097312326, | |
| "grad_norm": 0.04499724879860878, | |
| "learning_rate": 0.0009453437978094223, | |
| "loss": 4.5938, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.7154772937905468, | |
| "grad_norm": 0.04197373613715172, | |
| "learning_rate": 0.0009396500610647368, | |
| "loss": 4.6562, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.7164040778498609, | |
| "grad_norm": 0.048124760389328, | |
| "learning_rate": 0.00093396955184403, | |
| "loss": 4.625, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.7173308619091752, | |
| "grad_norm": 0.05138612538576126, | |
| "learning_rate": 0.000928302318302573, | |
| "loss": 4.575, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.7182576459684893, | |
| "grad_norm": 0.044739775359630585, | |
| "learning_rate": 0.0009226484084830918, | |
| "loss": 4.625, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.7191844300278035, | |
| "grad_norm": 0.04016095772385597, | |
| "learning_rate": 0.0009170078703153676, | |
| "loss": 4.6063, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.7201112140871178, | |
| "grad_norm": 0.05538894608616829, | |
| "learning_rate": 0.000911380751615822, | |
| "loss": 4.625, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.7210379981464319, | |
| "grad_norm": 0.04083118215203285, | |
| "learning_rate": 0.0009057671000871195, | |
| "loss": 4.6063, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.7219647822057461, | |
| "grad_norm": 0.05446457862854004, | |
| "learning_rate": 0.0009001669633177587, | |
| "loss": 4.575, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.7228915662650602, | |
| "grad_norm": 0.03577585890889168, | |
| "learning_rate": 0.0008945803887816678, | |
| "loss": 4.6, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7238183503243745, | |
| "grad_norm": 0.04933847859501839, | |
| "learning_rate": 0.0008890074238378073, | |
| "loss": 4.5875, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.7247451343836886, | |
| "grad_norm": 0.03600107133388519, | |
| "learning_rate": 0.0008834481157297625, | |
| "loss": 4.5875, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.7256719184430028, | |
| "grad_norm": 0.05166667327284813, | |
| "learning_rate": 0.0008779025115853482, | |
| "loss": 4.5938, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.726598702502317, | |
| "grad_norm": 0.03323368355631828, | |
| "learning_rate": 0.0008723706584162044, | |
| "loss": 4.5563, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.7275254865616312, | |
| "grad_norm": 0.04717453941702843, | |
| "learning_rate": 0.0008668526031174034, | |
| "loss": 4.6125, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.7284522706209453, | |
| "grad_norm": 0.04695433750748634, | |
| "learning_rate": 0.0008613483924670457, | |
| "loss": 4.5875, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.7293790546802595, | |
| "grad_norm": 0.04457440972328186, | |
| "learning_rate": 0.00085585807312587, | |
| "loss": 4.6, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.7303058387395737, | |
| "grad_norm": 0.04753506928682327, | |
| "learning_rate": 0.0008503816916368512, | |
| "loss": 4.5687, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.7312326227988879, | |
| "grad_norm": 0.04823901131749153, | |
| "learning_rate": 0.0008449192944248127, | |
| "loss": 4.5625, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.732159406858202, | |
| "grad_norm": 0.041306272149086, | |
| "learning_rate": 0.0008394709277960255, | |
| "loss": 4.5563, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.7330861909175163, | |
| "grad_norm": 0.054446831345558167, | |
| "learning_rate": 0.0008340366379378234, | |
| "loss": 4.55, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.7340129749768304, | |
| "grad_norm": 0.03289240226149559, | |
| "learning_rate": 0.0008286164709182031, | |
| "loss": 4.575, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.7349397590361446, | |
| "grad_norm": 0.04518633335828781, | |
| "learning_rate": 0.0008232104726854425, | |
| "loss": 4.6, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.7358665430954587, | |
| "grad_norm": 0.03345628082752228, | |
| "learning_rate": 0.0008178186890677027, | |
| "loss": 4.55, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.736793327154773, | |
| "grad_norm": 0.046789661049842834, | |
| "learning_rate": 0.0008124411657726471, | |
| "loss": 4.575, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.7377201112140871, | |
| "grad_norm": 0.03443962708115578, | |
| "learning_rate": 0.0008070779483870469, | |
| "loss": 4.55, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.7386468952734013, | |
| "grad_norm": 0.04330628737807274, | |
| "learning_rate": 0.0008017290823764014, | |
| "loss": 4.5563, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.7395736793327155, | |
| "grad_norm": 0.032368697226047516, | |
| "learning_rate": 0.0007963946130845462, | |
| "loss": 4.5438, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.7405004633920297, | |
| "grad_norm": 0.04270923137664795, | |
| "learning_rate": 0.0007910745857332749, | |
| "loss": 4.6, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.7414272474513438, | |
| "grad_norm": 0.03373492881655693, | |
| "learning_rate": 0.0007857690454219494, | |
| "loss": 4.5687, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.742354031510658, | |
| "grad_norm": 0.03647404536604881, | |
| "learning_rate": 0.0007804780371271248, | |
| "loss": 4.5125, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.7432808155699722, | |
| "grad_norm": 0.037898655980825424, | |
| "learning_rate": 0.0007752016057021596, | |
| "loss": 4.5687, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.7442075996292864, | |
| "grad_norm": 0.0339631550014019, | |
| "learning_rate": 0.0007699397958768451, | |
| "loss": 4.575, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.7451343836886005, | |
| "grad_norm": 0.03792402520775795, | |
| "learning_rate": 0.0007646926522570166, | |
| "loss": 4.5687, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.7460611677479148, | |
| "grad_norm": 0.03865986317396164, | |
| "learning_rate": 0.0007594602193241839, | |
| "loss": 4.5312, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.7469879518072289, | |
| "grad_norm": 0.03740232065320015, | |
| "learning_rate": 0.0007542425414351462, | |
| "loss": 4.55, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.7479147358665431, | |
| "grad_norm": 0.03663860633969307, | |
| "learning_rate": 0.0007490396628216237, | |
| "loss": 4.55, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.7488415199258572, | |
| "grad_norm": 0.0422244630753994, | |
| "learning_rate": 0.0007438516275898762, | |
| "loss": 4.5563, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.7497683039851715, | |
| "grad_norm": 0.03552339971065521, | |
| "learning_rate": 0.0007386784797203335, | |
| "loss": 4.5563, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.7506950880444856, | |
| "grad_norm": 0.03856317326426506, | |
| "learning_rate": 0.0007335202630672222, | |
| "loss": 4.5188, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.7516218721037998, | |
| "grad_norm": 0.03579216077923775, | |
| "learning_rate": 0.0007283770213581889, | |
| "loss": 4.525, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.752548656163114, | |
| "grad_norm": 0.04030256345868111, | |
| "learning_rate": 0.0007232487981939371, | |
| "loss": 4.5563, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.7534754402224282, | |
| "grad_norm": 0.03762529417872429, | |
| "learning_rate": 0.0007181356370478531, | |
| "loss": 4.55, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.7544022242817423, | |
| "grad_norm": 0.03724801167845726, | |
| "learning_rate": 0.0007130375812656365, | |
| "loss": 4.5375, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.7553290083410565, | |
| "grad_norm": 0.03805640712380409, | |
| "learning_rate": 0.000707954674064937, | |
| "loss": 4.575, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.7562557924003707, | |
| "grad_norm": 0.0410294272005558, | |
| "learning_rate": 0.0007028869585349828, | |
| "loss": 4.5625, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.7571825764596849, | |
| "grad_norm": 0.0386902280151844, | |
| "learning_rate": 0.0006978344776362214, | |
| "loss": 4.5188, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.758109360518999, | |
| "grad_norm": 0.037720050662755966, | |
| "learning_rate": 0.000692797274199948, | |
| "loss": 4.55, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.7590361445783133, | |
| "grad_norm": 0.029812660068273544, | |
| "learning_rate": 0.0006877753909279508, | |
| "loss": 4.475, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.7599629286376274, | |
| "grad_norm": 0.04356846958398819, | |
| "learning_rate": 0.0006827688703921406, | |
| "loss": 4.4938, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.7608897126969416, | |
| "grad_norm": 0.03893793001770973, | |
| "learning_rate": 0.0006777777550341977, | |
| "loss": 4.5188, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.7618164967562558, | |
| "grad_norm": 0.0387520007789135, | |
| "learning_rate": 0.0006728020871652046, | |
| "loss": 4.5188, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.76274328081557, | |
| "grad_norm": 0.0450495183467865, | |
| "learning_rate": 0.0006678419089652943, | |
| "loss": 4.5438, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.7636700648748842, | |
| "grad_norm": 0.04003477469086647, | |
| "learning_rate": 0.0006628972624832891, | |
| "loss": 4.5813, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.7645968489341983, | |
| "grad_norm": 0.05103557929396629, | |
| "learning_rate": 0.0006579681896363418, | |
| "loss": 4.5188, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.7655236329935126, | |
| "grad_norm": 0.038706224411726, | |
| "learning_rate": 0.000653054732209587, | |
| "loss": 4.5188, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.7664504170528267, | |
| "grad_norm": 0.04914843663573265, | |
| "learning_rate": 0.0006481569318557793, | |
| "loss": 4.525, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.7673772011121409, | |
| "grad_norm": 0.03715524449944496, | |
| "learning_rate": 0.0006432748300949476, | |
| "loss": 4.5062, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.768303985171455, | |
| "grad_norm": 0.03968851640820503, | |
| "learning_rate": 0.0006384084683140359, | |
| "loss": 4.5563, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.042003631591796875, | |
| "learning_rate": 0.000633557887766559, | |
| "loss": 4.5312, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.7701575532900834, | |
| "grad_norm": 0.04498601332306862, | |
| "learning_rate": 0.000628723129572247, | |
| "loss": 4.5, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.7710843373493976, | |
| "grad_norm": 0.039209991693496704, | |
| "learning_rate": 0.0006239042347167026, | |
| "loss": 4.5375, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.7720111214087118, | |
| "grad_norm": 0.03667667508125305, | |
| "learning_rate": 0.0006191012440510469, | |
| "loss": 4.5375, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.772937905468026, | |
| "grad_norm": 0.03756443038582802, | |
| "learning_rate": 0.0006143141982915801, | |
| "loss": 4.525, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.7738646895273401, | |
| "grad_norm": 0.03308939188718796, | |
| "learning_rate": 0.0006095431380194299, | |
| "loss": 4.55, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.7747914735866543, | |
| "grad_norm": 0.03881024196743965, | |
| "learning_rate": 0.0006047881036802141, | |
| "loss": 4.5375, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.7757182576459685, | |
| "grad_norm": 0.03667169064283371, | |
| "learning_rate": 0.0006000491355836904, | |
| "loss": 4.5188, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.7766450417052827, | |
| "grad_norm": 0.03264870494604111, | |
| "learning_rate": 0.0005953262739034218, | |
| "loss": 4.5188, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.7775718257645968, | |
| "grad_norm": 0.0369790680706501, | |
| "learning_rate": 0.0005906195586764294, | |
| "loss": 4.5125, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.7784986098239111, | |
| "grad_norm": 0.03252223879098892, | |
| "learning_rate": 0.0005859290298028596, | |
| "loss": 4.4813, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.7794253938832252, | |
| "grad_norm": 0.03256712481379509, | |
| "learning_rate": 0.0005812547270456397, | |
| "loss": 4.5062, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.7803521779425394, | |
| "grad_norm": 0.031595002859830856, | |
| "learning_rate": 0.0005765966900301462, | |
| "loss": 4.5188, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.7812789620018535, | |
| "grad_norm": 0.0356653667986393, | |
| "learning_rate": 0.0005719549582438636, | |
| "loss": 4.5438, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.7822057460611678, | |
| "grad_norm": 0.038195762783288956, | |
| "learning_rate": 0.0005673295710360555, | |
| "loss": 4.4875, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.7831325301204819, | |
| "grad_norm": 0.02905537374317646, | |
| "learning_rate": 0.0005627205676174244, | |
| "loss": 4.525, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.7840593141797961, | |
| "grad_norm": 0.03345280513167381, | |
| "learning_rate": 0.0005581279870597866, | |
| "loss": 4.4938, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.7849860982391103, | |
| "grad_norm": 0.034679800271987915, | |
| "learning_rate": 0.0005535518682957341, | |
| "loss": 4.4938, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.7859128822984245, | |
| "grad_norm": 0.03583706170320511, | |
| "learning_rate": 0.0005489922501183095, | |
| "loss": 4.5188, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.7868396663577386, | |
| "grad_norm": 0.032523263245821, | |
| "learning_rate": 0.000544449171180674, | |
| "loss": 4.4938, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.7877664504170528, | |
| "grad_norm": 0.03378100320696831, | |
| "learning_rate": 0.0005399226699957821, | |
| "loss": 4.5062, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.788693234476367, | |
| "grad_norm": 0.03234217315912247, | |
| "learning_rate": 0.0005354127849360543, | |
| "loss": 4.45, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.7896200185356812, | |
| "grad_norm": 0.03637991473078728, | |
| "learning_rate": 0.0005309195542330497, | |
| "loss": 4.5188, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.7905468025949953, | |
| "grad_norm": 0.03120928816497326, | |
| "learning_rate": 0.0005264430159771455, | |
| "loss": 4.5, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.7914735866543096, | |
| "grad_norm": 0.03429511934518814, | |
| "learning_rate": 0.0005219832081172124, | |
| "loss": 4.5312, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.7924003707136237, | |
| "grad_norm": 0.029146216809749603, | |
| "learning_rate": 0.0005175401684602912, | |
| "loss": 4.4938, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.7933271547729379, | |
| "grad_norm": 0.029695888981223106, | |
| "learning_rate": 0.0005131139346712758, | |
| "loss": 4.4875, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.794253938832252, | |
| "grad_norm": 0.03263707831501961, | |
| "learning_rate": 0.0005087045442725904, | |
| "loss": 4.5312, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.7951807228915663, | |
| "grad_norm": 0.028736894950270653, | |
| "learning_rate": 0.0005043120346438748, | |
| "loss": 4.525, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.7961075069508804, | |
| "grad_norm": 0.030789796262979507, | |
| "learning_rate": 0.0004999364430216638, | |
| "loss": 4.5, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.7970342910101946, | |
| "grad_norm": 0.04033099114894867, | |
| "learning_rate": 0.0004955778064990757, | |
| "loss": 4.5125, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.7979610750695088, | |
| "grad_norm": 0.03556600585579872, | |
| "learning_rate": 0.0004912361620254932, | |
| "loss": 4.4813, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.798887859128823, | |
| "grad_norm": 0.031120220199227333, | |
| "learning_rate": 0.00048691154640625566, | |
| "loss": 4.4688, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.7998146431881371, | |
| "grad_norm": 0.03250223025679588, | |
| "learning_rate": 0.0004826039963023407, | |
| "loss": 4.4688, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.8007414272474513, | |
| "grad_norm": 0.029799439013004303, | |
| "learning_rate": 0.0004783135482300596, | |
| "loss": 4.4875, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.8016682113067656, | |
| "grad_norm": 0.030422599986195564, | |
| "learning_rate": 0.0004740402385607431, | |
| "loss": 4.4813, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.8025949953660797, | |
| "grad_norm": 0.029015803709626198, | |
| "learning_rate": 0.0004697841035204356, | |
| "loss": 4.4938, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.8035217794253939, | |
| "grad_norm": 0.031820014119148254, | |
| "learning_rate": 0.00046554517918958845, | |
| "loss": 4.5062, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.8044485634847081, | |
| "grad_norm": 0.03146743401885033, | |
| "learning_rate": 0.00046132350150275005, | |
| "loss": 4.475, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.8053753475440223, | |
| "grad_norm": 0.02848106250166893, | |
| "learning_rate": 0.0004571191062482677, | |
| "loss": 4.4875, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.8063021316033364, | |
| "grad_norm": 0.031561560928821564, | |
| "learning_rate": 0.00045293202906797754, | |
| "loss": 4.4875, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8072289156626506, | |
| "grad_norm": 0.031885311007499695, | |
| "learning_rate": 0.0004487623054569084, | |
| "loss": 4.5062, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.8081556997219648, | |
| "grad_norm": 0.03388173505663872, | |
| "learning_rate": 0.000444609970762975, | |
| "loss": 4.4813, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.809082483781279, | |
| "grad_norm": 0.03390287980437279, | |
| "learning_rate": 0.00044047506018668415, | |
| "loss": 4.5, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.8100092678405931, | |
| "grad_norm": 0.032265473157167435, | |
| "learning_rate": 0.0004363576087808313, | |
| "loss": 4.4938, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.8109360518999074, | |
| "grad_norm": 0.03563728928565979, | |
| "learning_rate": 0.00043225765145020803, | |
| "loss": 4.5188, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.8118628359592215, | |
| "grad_norm": 0.03663501888513565, | |
| "learning_rate": 0.0004281752229513006, | |
| "loss": 4.5188, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.8127896200185357, | |
| "grad_norm": 0.03167020156979561, | |
| "learning_rate": 0.00042411035789200163, | |
| "loss": 4.4875, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.8137164040778498, | |
| "grad_norm": 0.03226330131292343, | |
| "learning_rate": 0.0004200630907313108, | |
| "loss": 4.5062, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.8146431881371641, | |
| "grad_norm": 0.029977647587656975, | |
| "learning_rate": 0.00041603345577904824, | |
| "loss": 4.4688, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.8155699721964782, | |
| "grad_norm": 0.03339603170752525, | |
| "learning_rate": 0.0004120214871955577, | |
| "loss": 4.5125, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.8164967562557924, | |
| "grad_norm": 0.031077727675437927, | |
| "learning_rate": 0.00040802721899142356, | |
| "loss": 4.4938, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.8174235403151066, | |
| "grad_norm": 0.02900145947933197, | |
| "learning_rate": 0.0004040506850271761, | |
| "loss": 4.4375, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.8183503243744208, | |
| "grad_norm": 0.029496431350708008, | |
| "learning_rate": 0.00040009191901301005, | |
| "loss": 4.4625, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.8192771084337349, | |
| "grad_norm": 0.02934381552040577, | |
| "learning_rate": 0.00039615095450849374, | |
| "loss": 4.5062, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.8202038924930491, | |
| "grad_norm": 0.030950119718909264, | |
| "learning_rate": 0.00039222782492228937, | |
| "loss": 4.5, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.8211306765523633, | |
| "grad_norm": 0.029751867055892944, | |
| "learning_rate": 0.0003883225635118659, | |
| "loss": 4.4625, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.8220574606116775, | |
| "grad_norm": 0.026806732639670372, | |
| "learning_rate": 0.0003844352033832199, | |
| "loss": 4.5125, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.8229842446709916, | |
| "grad_norm": 0.03083191066980362, | |
| "learning_rate": 0.00038056577749059266, | |
| "loss": 4.4688, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.8239110287303059, | |
| "grad_norm": 0.034451741725206375, | |
| "learning_rate": 0.0003767143186361935, | |
| "loss": 4.4563, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.82483781278962, | |
| "grad_norm": 0.030912496149539948, | |
| "learning_rate": 0.0003728808594699179, | |
| "loss": 4.475, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.8257645968489342, | |
| "grad_norm": 0.03567620739340782, | |
| "learning_rate": 0.00036906543248907495, | |
| "loss": 4.4938, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.8266913809082483, | |
| "grad_norm": 0.03392716869711876, | |
| "learning_rate": 0.0003652680700381092, | |
| "loss": 4.45, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.8276181649675626, | |
| "grad_norm": 0.032731059938669205, | |
| "learning_rate": 0.0003614888043083264, | |
| "loss": 4.4875, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.8285449490268767, | |
| "grad_norm": 0.035781849175691605, | |
| "learning_rate": 0.00035772766733762284, | |
| "loss": 4.4625, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.8294717330861909, | |
| "grad_norm": 0.02696853317320347, | |
| "learning_rate": 0.00035398469101020983, | |
| "loss": 4.4688, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.830398517145505, | |
| "grad_norm": 0.033876750618219376, | |
| "learning_rate": 0.00035025990705634833, | |
| "loss": 4.5, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.8313253012048193, | |
| "grad_norm": 0.03308440372347832, | |
| "learning_rate": 0.0003465533470520768, | |
| "loss": 4.5125, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.8322520852641334, | |
| "grad_norm": 0.0284098070114851, | |
| "learning_rate": 0.0003428650424189428, | |
| "loss": 4.5, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.8331788693234476, | |
| "grad_norm": 0.0362527072429657, | |
| "learning_rate": 0.0003391950244237396, | |
| "loss": 4.4813, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.8341056533827618, | |
| "grad_norm": 0.03239575773477554, | |
| "learning_rate": 0.0003355433241782385, | |
| "loss": 4.4437, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.835032437442076, | |
| "grad_norm": 0.028916817158460617, | |
| "learning_rate": 0.00033190997263892683, | |
| "loss": 4.5062, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.8359592215013901, | |
| "grad_norm": 0.037763047963380814, | |
| "learning_rate": 0.0003282950006067439, | |
| "loss": 4.475, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.8368860055607044, | |
| "grad_norm": 0.03783184662461281, | |
| "learning_rate": 0.000324698438726822, | |
| "loss": 4.4375, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.8378127896200185, | |
| "grad_norm": 0.03236427158117294, | |
| "learning_rate": 0.00032112031748822407, | |
| "loss": 4.425, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.8387395736793327, | |
| "grad_norm": 0.031087512150406837, | |
| "learning_rate": 0.00031756066722368775, | |
| "loss": 4.4875, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.8396663577386468, | |
| "grad_norm": 0.02958965301513672, | |
| "learning_rate": 0.0003140195181093658, | |
| "loss": 4.475, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.8405931417979611, | |
| "grad_norm": 0.028066281229257584, | |
| "learning_rate": 0.0003104969001645735, | |
| "loss": 4.4563, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.8415199258572753, | |
| "grad_norm": 0.030324235558509827, | |
| "learning_rate": 0.00030699284325152955, | |
| "loss": 4.4437, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.8424467099165894, | |
| "grad_norm": 0.03359181433916092, | |
| "learning_rate": 0.00030350737707510764, | |
| "loss": 4.4813, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.8433734939759037, | |
| "grad_norm": 0.02781173586845398, | |
| "learning_rate": 0.0003000405311825824, | |
| "loss": 4.4437, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.8443002780352178, | |
| "grad_norm": 0.03504948690533638, | |
| "learning_rate": 0.0002965923349633778, | |
| "loss": 4.45, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.845227062094532, | |
| "grad_norm": 0.03041827119886875, | |
| "learning_rate": 0.00029316281764882074, | |
| "loss": 4.4563, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.8461538461538461, | |
| "grad_norm": 0.03221605718135834, | |
| "learning_rate": 0.00028975200831189067, | |
| "loss": 4.475, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.8470806302131604, | |
| "grad_norm": 0.03199669346213341, | |
| "learning_rate": 0.0002863599358669755, | |
| "loss": 4.4313, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.8480074142724745, | |
| "grad_norm": 0.030510928481817245, | |
| "learning_rate": 0.0002829866290696234, | |
| "loss": 4.4, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.8489341983317887, | |
| "grad_norm": 0.02957424893975258, | |
| "learning_rate": 0.0002796321165163032, | |
| "loss": 4.5062, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.8498609823911029, | |
| "grad_norm": 0.0366031751036644, | |
| "learning_rate": 0.0002762964266441578, | |
| "loss": 4.4313, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.8507877664504171, | |
| "grad_norm": 0.03369331359863281, | |
| "learning_rate": 0.0002729795877307659, | |
| "loss": 4.4437, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.8517145505097312, | |
| "grad_norm": 0.03299278765916824, | |
| "learning_rate": 0.00026968162789390074, | |
| "loss": 4.4313, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.8526413345690455, | |
| "grad_norm": 0.03193372115492821, | |
| "learning_rate": 0.0002664025750912932, | |
| "loss": 4.4625, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.8535681186283596, | |
| "grad_norm": 0.029631877318024635, | |
| "learning_rate": 0.00026314245712039276, | |
| "loss": 4.4375, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.8544949026876738, | |
| "grad_norm": 0.03459390997886658, | |
| "learning_rate": 0.00025990130161813427, | |
| "loss": 4.4688, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.8554216867469879, | |
| "grad_norm": 0.0364365316927433, | |
| "learning_rate": 0.00025667913606070095, | |
| "loss": 4.4625, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.8563484708063022, | |
| "grad_norm": 0.0323617160320282, | |
| "learning_rate": 0.000253475987763295, | |
| "loss": 4.425, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.8572752548656163, | |
| "grad_norm": 0.02805604226887226, | |
| "learning_rate": 0.0002502918838799015, | |
| "loss": 4.4813, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.8582020389249305, | |
| "grad_norm": 0.033434659242630005, | |
| "learning_rate": 0.0002471268514030628, | |
| "loss": 4.425, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.8591288229842446, | |
| "grad_norm": 0.03157290443778038, | |
| "learning_rate": 0.00024398091716364617, | |
| "loss": 4.4313, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.8600556070435589, | |
| "grad_norm": 0.029048243537545204, | |
| "learning_rate": 0.00024085410783061895, | |
| "loss": 4.4625, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.860982391102873, | |
| "grad_norm": 0.0280530396848917, | |
| "learning_rate": 0.00023774644991081978, | |
| "loss": 4.4125, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.8619091751621872, | |
| "grad_norm": 0.03451543301343918, | |
| "learning_rate": 0.00023465796974873722, | |
| "loss": 4.4875, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.8628359592215014, | |
| "grad_norm": 0.030910175293684006, | |
| "learning_rate": 0.00023158869352628286, | |
| "loss": 4.45, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.8637627432808156, | |
| "grad_norm": 0.03156379237771034, | |
| "learning_rate": 0.00022853864726257307, | |
| "loss": 4.4125, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.8646895273401297, | |
| "grad_norm": 0.03295775502920151, | |
| "learning_rate": 0.00022550785681370368, | |
| "loss": 4.4313, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.865616311399444, | |
| "grad_norm": 0.026067038998007774, | |
| "learning_rate": 0.00022249634787253615, | |
| "loss": 4.45, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.8665430954587581, | |
| "grad_norm": 0.02678762935101986, | |
| "learning_rate": 0.00021950414596847684, | |
| "loss": 4.4563, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.8674698795180723, | |
| "grad_norm": 0.028849739581346512, | |
| "learning_rate": 0.0002165312764672589, | |
| "loss": 4.4437, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.8683966635773864, | |
| "grad_norm": 0.03232532739639282, | |
| "learning_rate": 0.0002135777645707318, | |
| "loss": 4.4, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.8693234476367007, | |
| "grad_norm": 0.027282997965812683, | |
| "learning_rate": 0.0002106436353166441, | |
| "loss": 4.4625, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.8702502316960148, | |
| "grad_norm": 0.026645608246326447, | |
| "learning_rate": 0.0002077289135784316, | |
| "loss": 4.4437, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.871177015755329, | |
| "grad_norm": 0.02711557038128376, | |
| "learning_rate": 0.00020483362406500838, | |
| "loss": 4.4313, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.8721037998146431, | |
| "grad_norm": 0.030816104263067245, | |
| "learning_rate": 0.0002019577913205553, | |
| "loss": 4.4625, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.8730305838739574, | |
| "grad_norm": 0.026929127052426338, | |
| "learning_rate": 0.00019910143972431323, | |
| "loss": 4.4313, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.8739573679332715, | |
| "grad_norm": 0.028096897527575493, | |
| "learning_rate": 0.0001962645934903748, | |
| "loss": 4.4875, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.8748841519925857, | |
| "grad_norm": 0.029124116525053978, | |
| "learning_rate": 0.00019344727666748218, | |
| "loss": 4.4563, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.8758109360518999, | |
| "grad_norm": 0.027243295684456825, | |
| "learning_rate": 0.00019064951313881918, | |
| "loss": 4.4375, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.8767377201112141, | |
| "grad_norm": 0.028546737506985664, | |
| "learning_rate": 0.00018787132662181238, | |
| "loss": 4.45, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.8776645041705282, | |
| "grad_norm": 0.026934707537293434, | |
| "learning_rate": 0.00018511274066792733, | |
| "loss": 4.425, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.8785912882298424, | |
| "grad_norm": 0.03399607166647911, | |
| "learning_rate": 0.00018237377866247157, | |
| "loss": 4.4563, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.8795180722891566, | |
| "grad_norm": 0.02882063016295433, | |
| "learning_rate": 0.000179654463824393, | |
| "loss": 4.4688, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.8804448563484708, | |
| "grad_norm": 0.026831530034542084, | |
| "learning_rate": 0.00017695481920608713, | |
| "loss": 4.4188, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.881371640407785, | |
| "grad_norm": 0.029771380126476288, | |
| "learning_rate": 0.00017427486769319738, | |
| "loss": 4.4062, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.8822984244670992, | |
| "grad_norm": 0.025736462324857712, | |
| "learning_rate": 0.00017161463200442484, | |
| "loss": 4.4125, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.8832252085264134, | |
| "grad_norm": 0.027890045195817947, | |
| "learning_rate": 0.0001689741346913337, | |
| "loss": 4.4625, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.8841519925857275, | |
| "grad_norm": 0.028950916603207588, | |
| "learning_rate": 0.0001663533981381593, | |
| "loss": 4.4375, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.8850787766450418, | |
| "grad_norm": 0.029823975637555122, | |
| "learning_rate": 0.00016375244456162119, | |
| "loss": 4.4688, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.8860055607043559, | |
| "grad_norm": 0.02855784259736538, | |
| "learning_rate": 0.00016117129601073116, | |
| "loss": 4.4563, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.8869323447636701, | |
| "grad_norm": 0.026093894615769386, | |
| "learning_rate": 0.00015860997436661028, | |
| "loss": 4.4875, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.8878591288229842, | |
| "grad_norm": 0.02811110019683838, | |
| "learning_rate": 0.00015606850134229966, | |
| "loss": 4.4375, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.8887859128822985, | |
| "grad_norm": 0.027288252487778664, | |
| "learning_rate": 0.00015354689848257942, | |
| "loss": 4.4188, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.8897126969416126, | |
| "grad_norm": 0.02676665410399437, | |
| "learning_rate": 0.0001510451871637833, | |
| "loss": 4.4188, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.8906394810009268, | |
| "grad_norm": 0.03431456908583641, | |
| "learning_rate": 0.00014856338859362052, | |
| "loss": 4.4188, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.891566265060241, | |
| "grad_norm": 0.026652604341506958, | |
| "learning_rate": 0.0001461015238109925, | |
| "loss": 4.375, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.8924930491195552, | |
| "grad_norm": 0.032444290816783905, | |
| "learning_rate": 0.00014365961368581842, | |
| "loss": 4.4313, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.8934198331788693, | |
| "grad_norm": 0.02602170594036579, | |
| "learning_rate": 0.00014123767891885435, | |
| "loss": 4.375, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.8943466172381835, | |
| "grad_norm": 0.026148205623030663, | |
| "learning_rate": 0.00013883574004152106, | |
| "loss": 4.425, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.8952734012974977, | |
| "grad_norm": 0.028608886525034904, | |
| "learning_rate": 0.0001364538174157273, | |
| "loss": 4.3812, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.8962001853568119, | |
| "grad_norm": 0.026529457420110703, | |
| "learning_rate": 0.00013409193123369996, | |
| "loss": 4.3812, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.897126969416126, | |
| "grad_norm": 0.029828151687979698, | |
| "learning_rate": 0.00013175010151780965, | |
| "loss": 4.4188, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.8980537534754403, | |
| "grad_norm": 0.03368750587105751, | |
| "learning_rate": 0.0001294283481204042, | |
| "loss": 4.4313, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.8989805375347544, | |
| "grad_norm": 0.02840586192905903, | |
| "learning_rate": 0.00012712669072363763, | |
| "loss": 4.4375, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.8999073215940686, | |
| "grad_norm": 0.030109241604804993, | |
| "learning_rate": 0.0001248451488393057, | |
| "loss": 4.4125, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.9008341056533827, | |
| "grad_norm": 0.028758615255355835, | |
| "learning_rate": 0.00012258374180867837, | |
| "loss": 4.45, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.901760889712697, | |
| "grad_norm": 0.02661893516778946, | |
| "learning_rate": 0.00012034248880233744, | |
| "loss": 4.4813, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.9026876737720111, | |
| "grad_norm": 0.02796340361237526, | |
| "learning_rate": 0.00011812140882001277, | |
| "loss": 4.45, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.9036144578313253, | |
| "grad_norm": 0.024077627807855606, | |
| "learning_rate": 0.00011592052069042208, | |
| "loss": 4.4625, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.9045412418906394, | |
| "grad_norm": 0.02510063722729683, | |
| "learning_rate": 0.00011373984307111229, | |
| "loss": 4.4188, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.9054680259499537, | |
| "grad_norm": 0.02504696324467659, | |
| "learning_rate": 0.00011157939444829762, | |
| "loss": 4.4437, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.9063948100092678, | |
| "grad_norm": 0.026624388992786407, | |
| "learning_rate": 0.0001094391931367078, | |
| "loss": 4.4563, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.907321594068582, | |
| "grad_norm": 0.02774794027209282, | |
| "learning_rate": 0.00010731925727942932, | |
| "loss": 4.4313, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.9082483781278962, | |
| "grad_norm": 0.027720240876078606, | |
| "learning_rate": 0.00010521960484775273, | |
| "loss": 4.425, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.9091751621872104, | |
| "grad_norm": 0.0258037019520998, | |
| "learning_rate": 0.00010314025364102087, | |
| "loss": 4.425, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.9101019462465245, | |
| "grad_norm": 0.031181413680315018, | |
| "learning_rate": 0.00010108122128647645, | |
| "loss": 4.425, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.9110287303058388, | |
| "grad_norm": 0.026958808302879333, | |
| "learning_rate": 9.904252523911473e-05, | |
| "loss": 4.425, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.9119555143651529, | |
| "grad_norm": 0.0251258946955204, | |
| "learning_rate": 9.702418278153296e-05, | |
| "loss": 4.3938, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.9128822984244671, | |
| "grad_norm": 0.026582978665828705, | |
| "learning_rate": 9.502621102378706e-05, | |
| "loss": 4.4062, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.9138090824837812, | |
| "grad_norm": 0.028273189440369606, | |
| "learning_rate": 9.304862690324295e-05, | |
| "loss": 4.4, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.9147358665430955, | |
| "grad_norm": 0.02678096853196621, | |
| "learning_rate": 9.109144718443679e-05, | |
| "loss": 4.4125, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.9156626506024096, | |
| "grad_norm": 0.024335335940122604, | |
| "learning_rate": 8.915468845892894e-05, | |
| "loss": 4.4125, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.9165894346617238, | |
| "grad_norm": 0.02453056164085865, | |
| "learning_rate": 8.72383671451668e-05, | |
| "loss": 4.4062, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.917516218721038, | |
| "grad_norm": 0.025096192955970764, | |
| "learning_rate": 8.534249948834311e-05, | |
| "loss": 4.4437, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.9184430027803522, | |
| "grad_norm": 0.025366991758346558, | |
| "learning_rate": 8.346710156026033e-05, | |
| "loss": 4.4062, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.9193697868396663, | |
| "grad_norm": 0.02832290157675743, | |
| "learning_rate": 8.161218925919172e-05, | |
| "loss": 4.4625, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.9202965708989805, | |
| "grad_norm": 0.027890915051102638, | |
| "learning_rate": 7.977777830974947e-05, | |
| "loss": 4.4375, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.9212233549582948, | |
| "grad_norm": 0.027829816564917564, | |
| "learning_rate": 7.796388426274947e-05, | |
| "loss": 4.45, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.9221501390176089, | |
| "grad_norm": 0.02420070953667164, | |
| "learning_rate": 7.61705224950801e-05, | |
| "loss": 4.4313, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 0.024921340867877007, | |
| "learning_rate": 7.43977082095726e-05, | |
| "loss": 4.4125, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.9240037071362373, | |
| "grad_norm": 0.02533474750816822, | |
| "learning_rate": 7.264545643486997e-05, | |
| "loss": 4.4062, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.9249304911955515, | |
| "grad_norm": 0.02694832719862461, | |
| "learning_rate": 7.091378202530224e-05, | |
| "loss": 4.375, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.9258572752548656, | |
| "grad_norm": 0.024787478148937225, | |
| "learning_rate": 6.920269966075893e-05, | |
| "loss": 4.4125, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.9267840593141798, | |
| "grad_norm": 0.02519523911178112, | |
| "learning_rate": 6.751222384656502e-05, | |
| "loss": 4.425, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.927710843373494, | |
| "grad_norm": 0.0249481238424778, | |
| "learning_rate": 6.584236891335804e-05, | |
| "loss": 4.45, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.9286376274328082, | |
| "grad_norm": 0.027095666155219078, | |
| "learning_rate": 6.419314901696671e-05, | |
| "loss": 4.4125, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.9295644114921223, | |
| "grad_norm": 0.026183003559708595, | |
| "learning_rate": 6.256457813828997e-05, | |
| "loss": 4.3938, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.9304911955514366, | |
| "grad_norm": 0.025982800871133804, | |
| "learning_rate": 6.095667008318068e-05, | |
| "loss": 4.4062, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.9314179796107507, | |
| "grad_norm": 0.027629397809505463, | |
| "learning_rate": 5.936943848232568e-05, | |
| "loss": 4.4625, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.9323447636700649, | |
| "grad_norm": 0.02437759004533291, | |
| "learning_rate": 5.78028967911326e-05, | |
| "loss": 4.425, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.933271547729379, | |
| "grad_norm": 0.024311203509569168, | |
| "learning_rate": 5.625705828961436e-05, | |
| "loss": 4.4375, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.9341983317886933, | |
| "grad_norm": 0.024223096668720245, | |
| "learning_rate": 5.473193608227789e-05, | |
| "loss": 4.4062, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.9351251158480074, | |
| "grad_norm": 0.023723123595118523, | |
| "learning_rate": 5.322754309801115e-05, | |
| "loss": 4.45, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.9360518999073216, | |
| "grad_norm": 0.02314998209476471, | |
| "learning_rate": 5.174389208997598e-05, | |
| "loss": 4.4188, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.9369786839666358, | |
| "grad_norm": 0.028589608147740364, | |
| "learning_rate": 5.0280995635497705e-05, | |
| "loss": 4.4375, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.93790546802595, | |
| "grad_norm": 0.023467648774385452, | |
| "learning_rate": 4.883886613595984e-05, | |
| "loss": 4.3938, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.9388322520852641, | |
| "grad_norm": 0.025684082880616188, | |
| "learning_rate": 4.74175158166984e-05, | |
| "loss": 4.4188, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.9397590361445783, | |
| "grad_norm": 0.028895532712340355, | |
| "learning_rate": 4.601695672689921e-05, | |
| "loss": 4.4375, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.9406858202038925, | |
| "grad_norm": 0.02598528377711773, | |
| "learning_rate": 4.463720073949351e-05, | |
| "loss": 4.4375, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.9416126042632067, | |
| "grad_norm": 0.025186927989125252, | |
| "learning_rate": 4.3278259551060015e-05, | |
| "loss": 4.4188, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.9425393883225208, | |
| "grad_norm": 0.02664157934486866, | |
| "learning_rate": 4.194014468172469e-05, | |
| "loss": 4.4313, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.943466172381835, | |
| "grad_norm": 0.02440650388598442, | |
| "learning_rate": 4.062286747506222e-05, | |
| "loss": 4.45, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.9443929564411492, | |
| "grad_norm": 0.022903352975845337, | |
| "learning_rate": 3.932643909800082e-05, | |
| "loss": 4.3875, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.9453197405004634, | |
| "grad_norm": 0.024947639554739, | |
| "learning_rate": 3.805087054072731e-05, | |
| "loss": 4.4375, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.9462465245597775, | |
| "grad_norm": 0.027093123644590378, | |
| "learning_rate": 3.6796172616594126e-05, | |
| "loss": 4.4188, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.9471733086190918, | |
| "grad_norm": 0.023427557200193405, | |
| "learning_rate": 3.5562355962027726e-05, | |
| "loss": 4.4625, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.9481000926784059, | |
| "grad_norm": 0.02435910701751709, | |
| "learning_rate": 3.434943103643728e-05, | |
| "loss": 4.4188, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.9490268767377201, | |
| "grad_norm": 0.025206558406352997, | |
| "learning_rate": 3.315740812212781e-05, | |
| "loss": 4.4062, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.9499536607970342, | |
| "grad_norm": 0.024215737357735634, | |
| "learning_rate": 3.198629732421188e-05, | |
| "loss": 4.4, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.9508804448563485, | |
| "grad_norm": 0.022633830085396767, | |
| "learning_rate": 3.0836108570524154e-05, | |
| "loss": 4.4062, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.9518072289156626, | |
| "grad_norm": 0.024218518286943436, | |
| "learning_rate": 2.9706851611537023e-05, | |
| "loss": 4.4938, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.9527340129749768, | |
| "grad_norm": 0.023550162091851234, | |
| "learning_rate": 2.8598536020278676e-05, | |
| "loss": 4.4, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.953660797034291, | |
| "grad_norm": 0.024799218401312828, | |
| "learning_rate": 2.7511171192250718e-05, | |
| "loss": 4.4375, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.9545875810936052, | |
| "grad_norm": 0.025713039562106133, | |
| "learning_rate": 2.6444766345350425e-05, | |
| "loss": 4.4062, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.9555143651529193, | |
| "grad_norm": 0.024386629462242126, | |
| "learning_rate": 2.539933051978971e-05, | |
| "loss": 4.4188, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.9564411492122336, | |
| "grad_norm": 0.025705767795443535, | |
| "learning_rate": 2.43748725780224e-05, | |
| "loss": 4.375, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.9573679332715477, | |
| "grad_norm": 0.026646282523870468, | |
| "learning_rate": 2.3371401204664577e-05, | |
| "loss": 4.45, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.9582947173308619, | |
| "grad_norm": 0.025327732786536217, | |
| "learning_rate": 2.238892490642547e-05, | |
| "loss": 4.4437, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.959221501390176, | |
| "grad_norm": 0.024950072169303894, | |
| "learning_rate": 2.142745201203139e-05, | |
| "loss": 4.45, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.9601482854494903, | |
| "grad_norm": 0.023224515840411186, | |
| "learning_rate": 2.048699067215831e-05, | |
| "loss": 4.4125, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.9610750695088045, | |
| "grad_norm": 0.024536075070500374, | |
| "learning_rate": 1.9567548859359963e-05, | |
| "loss": 4.45, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.9620018535681186, | |
| "grad_norm": 0.025291137397289276, | |
| "learning_rate": 1.866913436800316e-05, | |
| "loss": 4.4563, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.9629286376274329, | |
| "grad_norm": 0.023913368582725525, | |
| "learning_rate": 1.7791754814199255e-05, | |
| "loss": 4.4563, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.963855421686747, | |
| "grad_norm": 0.02541198581457138, | |
| "learning_rate": 1.693541763574058e-05, | |
| "loss": 4.45, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.9647822057460612, | |
| "grad_norm": 0.02386779710650444, | |
| "learning_rate": 1.6100130092037703e-05, | |
| "loss": 4.3812, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.9657089898053753, | |
| "grad_norm": 0.02432171255350113, | |
| "learning_rate": 1.528589926405727e-05, | |
| "loss": 4.4563, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.9666357738646896, | |
| "grad_norm": 0.026072759181261063, | |
| "learning_rate": 1.4492732054262603e-05, | |
| "loss": 4.4062, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.9675625579240037, | |
| "grad_norm": 0.02468552440404892, | |
| "learning_rate": 1.372063518655403e-05, | |
| "loss": 4.45, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.9684893419833179, | |
| "grad_norm": 0.023878788575530052, | |
| "learning_rate": 1.2969615206213369e-05, | |
| "loss": 4.4188, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.969416126042632, | |
| "grad_norm": 0.0231490395963192, | |
| "learning_rate": 1.223967847984786e-05, | |
| "loss": 4.4188, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.9703429101019463, | |
| "grad_norm": 0.024373695254325867, | |
| "learning_rate": 1.1530831195335767e-05, | |
| "loss": 4.4437, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.9712696941612604, | |
| "grad_norm": 0.02477751113474369, | |
| "learning_rate": 1.08430793617742e-05, | |
| "loss": 4.4188, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.9721964782205746, | |
| "grad_norm": 0.023831041529774666, | |
| "learning_rate": 1.0176428809428318e-05, | |
| "loss": 4.4813, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.9731232622798888, | |
| "grad_norm": 0.02483510971069336, | |
| "learning_rate": 9.530885189681649e-06, | |
| "loss": 4.4125, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.974050046339203, | |
| "grad_norm": 0.023760484531521797, | |
| "learning_rate": 8.906453974988626e-06, | |
| "loss": 4.4062, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.9749768303985171, | |
| "grad_norm": 0.02444753795862198, | |
| "learning_rate": 8.303140458827684e-06, | |
| "loss": 4.4062, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.9759036144578314, | |
| "grad_norm": 0.021337734535336494, | |
| "learning_rate": 7.720949755657125e-06, | |
| "loss": 4.4, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.9768303985171455, | |
| "grad_norm": 0.022071754559874535, | |
| "learning_rate": 7.159886800869875e-06, | |
| "loss": 4.425, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.9777571825764597, | |
| "grad_norm": 0.024915462359786034, | |
| "learning_rate": 6.6199563507549075e-06, | |
| "loss": 4.3938, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.9786839666357738, | |
| "grad_norm": 0.022235747426748276, | |
| "learning_rate": 6.1011629824533895e-06, | |
| "loss": 4.4, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.9796107506950881, | |
| "grad_norm": 0.02508777379989624, | |
| "learning_rate": 5.60351109392232e-06, | |
| "loss": 4.425, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.9805375347544022, | |
| "grad_norm": 0.02421114780008793, | |
| "learning_rate": 5.127004903896504e-06, | |
| "loss": 4.4688, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.9814643188137164, | |
| "grad_norm": 0.023330386728048325, | |
| "learning_rate": 4.6716484518524726e-06, | |
| "loss": 4.3875, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.9823911028730306, | |
| "grad_norm": 0.02507002279162407, | |
| "learning_rate": 4.237445597974343e-06, | |
| "loss": 4.4563, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.9833178869323448, | |
| "grad_norm": 0.023726079612970352, | |
| "learning_rate": 3.824400023121621e-06, | |
| "loss": 4.4688, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.9842446709916589, | |
| "grad_norm": 0.022975319996476173, | |
| "learning_rate": 3.4325152287975615e-06, | |
| "loss": 4.3938, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.9851714550509731, | |
| "grad_norm": 0.02411024458706379, | |
| "learning_rate": 3.061794537119467e-06, | |
| "loss": 4.4563, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.9860982391102873, | |
| "grad_norm": 0.022638075053691864, | |
| "learning_rate": 2.7122410907903794e-06, | |
| "loss": 4.4563, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.9870250231696015, | |
| "grad_norm": 0.023638809099793434, | |
| "learning_rate": 2.383857853073268e-06, | |
| "loss": 4.425, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.9879518072289156, | |
| "grad_norm": 0.02219136245548725, | |
| "learning_rate": 2.0766476077643813e-06, | |
| "loss": 4.4, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.9888785912882299, | |
| "grad_norm": 0.02723466046154499, | |
| "learning_rate": 1.7906129591713227e-06, | |
| "loss": 4.4437, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.989805375347544, | |
| "grad_norm": 0.024723384529352188, | |
| "learning_rate": 1.525756332090289e-06, | |
| "loss": 4.4, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.9907321594068582, | |
| "grad_norm": 0.023885123431682587, | |
| "learning_rate": 1.2820799717849775e-06, | |
| "loss": 4.4, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.9916589434661723, | |
| "grad_norm": 0.022805040702223778, | |
| "learning_rate": 1.059585943967989e-06, | |
| "loss": 4.4437, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.9925857275254866, | |
| "grad_norm": 0.023890964686870575, | |
| "learning_rate": 8.58276134784175e-07, | |
| "loss": 4.3812, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.9935125115848007, | |
| "grad_norm": 0.025231240317225456, | |
| "learning_rate": 6.781522507925964e-07, | |
| "loss": 4.3688, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.9944392956441149, | |
| "grad_norm": 0.021534454077482224, | |
| "learning_rate": 5.192158189543106e-07, | |
| "loss": 4.4938, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.995366079703429, | |
| "grad_norm": 0.023576676845550537, | |
| "learning_rate": 3.8146818661793925e-07, | |
| "loss": 4.4, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.9962928637627433, | |
| "grad_norm": 0.02641914412379265, | |
| "learning_rate": 2.6491052150884323e-07, | |
| "loss": 4.3625, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.9972196478220574, | |
| "grad_norm": 0.02341269887983799, | |
| "learning_rate": 1.6954381171885302e-07, | |
| "loss": 4.3812, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.9981464318813716, | |
| "grad_norm": 0.022809363901615143, | |
| "learning_rate": 9.536886569849746e-08, | |
| "loss": 4.4437, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.9990732159406858, | |
| "grad_norm": 0.023255689069628716, | |
| "learning_rate": 4.23863122495094e-08, | |
| "loss": 4.4437, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.03218919411301613, | |
| "learning_rate": 1.059660052010747e-08, | |
| "loss": 4.425, | |
| "step": 1079 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1079, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.154917754792837e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |