| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 752, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0026595744680851063, |
| "grad_norm": 7.823265044222166, |
| "learning_rate": 0.0, |
| "loss": 3.0134, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.005319148936170213, |
| "grad_norm": 7.642957709635029, |
| "learning_rate": 1.6005307325482135e-07, |
| "loss": 3.1765, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.007978723404255319, |
| "grad_norm": 7.334880781186477, |
| "learning_rate": 2.5367811923406806e-07, |
| "loss": 3.015, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.010638297872340425, |
| "grad_norm": 7.622164840160959, |
| "learning_rate": 3.201061465096427e-07, |
| "loss": 3.0191, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.013297872340425532, |
| "grad_norm": 7.559561166288389, |
| "learning_rate": 3.716317274634347e-07, |
| "loss": 3.0604, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.015957446808510637, |
| "grad_norm": 7.537486932594524, |
| "learning_rate": 4.137311924888894e-07, |
| "loss": 3.0974, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.018617021276595744, |
| "grad_norm": 7.852202340999875, |
| "learning_rate": 4.4932578299236894e-07, |
| "loss": 3.0015, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.02127659574468085, |
| "grad_norm": 7.556325866974648, |
| "learning_rate": 4.80159219764464e-07, |
| "loss": 3.0507, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.023936170212765957, |
| "grad_norm": 7.332175625826142, |
| "learning_rate": 5.073562384681361e-07, |
| "loss": 3.127, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.026595744680851064, |
| "grad_norm": 7.725255533578292, |
| "learning_rate": 5.316848007182561e-07, |
| "loss": 3.0381, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02925531914893617, |
| "grad_norm": 7.457326924140823, |
| "learning_rate": 5.536926622778005e-07, |
| "loss": 3.0634, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.031914893617021274, |
| "grad_norm": 7.693199711944396, |
| "learning_rate": 5.737842657437107e-07, |
| "loss": 3.0101, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.034574468085106384, |
| "grad_norm": 7.313122323445423, |
| "learning_rate": 5.922667492826867e-07, |
| "loss": 3.0967, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03723404255319149, |
| "grad_norm": 7.381687042192129, |
| "learning_rate": 6.093788562471904e-07, |
| "loss": 3.0606, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0398936170212766, |
| "grad_norm": 7.501689466289098, |
| "learning_rate": 6.253098466975028e-07, |
| "loss": 3.0923, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0425531914893617, |
| "grad_norm": 7.764166896561166, |
| "learning_rate": 6.402122930192854e-07, |
| "loss": 3.0133, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.04521276595744681, |
| "grad_norm": 7.632126776388045, |
| "learning_rate": 6.542109895570008e-07, |
| "loss": 3.0261, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.047872340425531915, |
| "grad_norm": 7.5260472985128875, |
| "learning_rate": 6.674093117229574e-07, |
| "loss": 3.0122, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.05053191489361702, |
| "grad_norm": 7.760501268851623, |
| "learning_rate": 6.798938534903572e-07, |
| "loss": 2.8592, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.05319148936170213, |
| "grad_norm": 7.498060611474783, |
| "learning_rate": 6.917378739730775e-07, |
| "loss": 2.8595, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05585106382978723, |
| "grad_norm": 7.527553046681602, |
| "learning_rate": 7.030039022264371e-07, |
| "loss": 2.7323, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.05851063829787234, |
| "grad_norm": 7.139599749462118, |
| "learning_rate": 7.13745735532622e-07, |
| "loss": 3.0506, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.061170212765957445, |
| "grad_norm": 7.6904167286597165, |
| "learning_rate": 7.24009993125516e-07, |
| "loss": 2.8334, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.06382978723404255, |
| "grad_norm": 7.13831891441032, |
| "learning_rate": 7.338373389985321e-07, |
| "loss": 3.0714, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.06648936170212766, |
| "grad_norm": 7.277657469838149, |
| "learning_rate": 7.432634549268694e-07, |
| "loss": 3.0034, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06914893617021277, |
| "grad_norm": 7.601860581578553, |
| "learning_rate": 7.52319822537508e-07, |
| "loss": 2.8405, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.07180851063829788, |
| "grad_norm": 7.406160378215662, |
| "learning_rate": 7.610343577022042e-07, |
| "loss": 2.8776, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.07446808510638298, |
| "grad_norm": 7.501158815505739, |
| "learning_rate": 7.694319295020116e-07, |
| "loss": 2.8623, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.07712765957446809, |
| "grad_norm": 7.475182990956878, |
| "learning_rate": 7.775347880836832e-07, |
| "loss": 2.8176, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0797872340425532, |
| "grad_norm": 6.90076598186568, |
| "learning_rate": 7.853629199523242e-07, |
| "loss": 2.6601, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08244680851063829, |
| "grad_norm": 7.317267534729387, |
| "learning_rate": 7.929343449851162e-07, |
| "loss": 2.5921, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 6.540702440994457, |
| "learning_rate": 8.002653662741068e-07, |
| "loss": 2.7996, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.08776595744680851, |
| "grad_norm": 7.135880043595326, |
| "learning_rate": 8.073707815118686e-07, |
| "loss": 2.5778, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.09042553191489362, |
| "grad_norm": 6.507937939592382, |
| "learning_rate": 8.142640628118222e-07, |
| "loss": 2.7356, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.09308510638297872, |
| "grad_norm": 6.445617298664252, |
| "learning_rate": 8.209575104558038e-07, |
| "loss": 2.7163, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.09574468085106383, |
| "grad_norm": 6.62462859642164, |
| "learning_rate": 8.274623849777788e-07, |
| "loss": 2.6283, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.09840425531914894, |
| "grad_norm": 6.7066040969467995, |
| "learning_rate": 8.337890211465859e-07, |
| "loss": 2.5485, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.10106382978723404, |
| "grad_norm": 6.6019115107446815, |
| "learning_rate": 8.399469267451787e-07, |
| "loss": 2.5864, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.10372340425531915, |
| "grad_norm": 6.526338473606032, |
| "learning_rate": 8.459448685167547e-07, |
| "loss": 2.6546, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.10638297872340426, |
| "grad_norm": 6.677090538207485, |
| "learning_rate": 8.517909472278988e-07, |
| "loss": 2.6135, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.10904255319148937, |
| "grad_norm": 6.627853354895093, |
| "learning_rate": 8.574926634616532e-07, |
| "loss": 2.6117, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.11170212765957446, |
| "grad_norm": 6.210420944517809, |
| "learning_rate": 8.630569754812584e-07, |
| "loss": 2.6221, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.11436170212765957, |
| "grad_norm": 6.490293150689325, |
| "learning_rate": 8.684903502843901e-07, |
| "loss": 2.6191, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.11702127659574468, |
| "grad_norm": 6.5133887096434036, |
| "learning_rate": 8.737988087874431e-07, |
| "loss": 2.6479, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.1196808510638298, |
| "grad_norm": 6.5629458321401275, |
| "learning_rate": 8.789879659315709e-07, |
| "loss": 2.5919, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.12234042553191489, |
| "grad_norm": 6.263965110944729, |
| "learning_rate": 8.840630663803374e-07, |
| "loss": 2.564, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 6.2925368290438355, |
| "learning_rate": 8.890290163779749e-07, |
| "loss": 2.4828, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.1276595744680851, |
| "grad_norm": 5.562972488052771, |
| "learning_rate": 8.938904122533535e-07, |
| "loss": 2.3536, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.13031914893617022, |
| "grad_norm": 5.051596119252882, |
| "learning_rate": 8.986515659847379e-07, |
| "loss": 2.2057, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.13297872340425532, |
| "grad_norm": 4.783115795828969, |
| "learning_rate": 9.033165281816909e-07, |
| "loss": 2.1078, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1356382978723404, |
| "grad_norm": 4.0379873353666, |
| "learning_rate": 9.078891087910689e-07, |
| "loss": 2.1146, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.13829787234042554, |
| "grad_norm": 4.425461679286027, |
| "learning_rate": 9.123728957923294e-07, |
| "loss": 2.106, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.14095744680851063, |
| "grad_norm": 4.219063302168513, |
| "learning_rate": 9.167712721119934e-07, |
| "loss": 2.0448, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.14361702127659576, |
| "grad_norm": 4.177230312333208, |
| "learning_rate": 9.210874309570255e-07, |
| "loss": 2.106, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.14627659574468085, |
| "grad_norm": 3.58662530955563, |
| "learning_rate": 9.253243897412354e-07, |
| "loss": 2.1577, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.14893617021276595, |
| "grad_norm": 3.8076903225375607, |
| "learning_rate": 9.294850027568331e-07, |
| "loss": 2.044, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.15159574468085107, |
| "grad_norm": 3.9757823965828445, |
| "learning_rate": 9.335719727244254e-07, |
| "loss": 2.1354, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.15425531914893617, |
| "grad_norm": 3.8984355429604305, |
| "learning_rate": 9.375878613385046e-07, |
| "loss": 2.0297, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.15691489361702127, |
| "grad_norm": 4.006300970220442, |
| "learning_rate": 9.415350989114764e-07, |
| "loss": 1.8268, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.1595744680851064, |
| "grad_norm": 3.7231660155630126, |
| "learning_rate": 9.454159932071455e-07, |
| "loss": 1.8824, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1622340425531915, |
| "grad_norm": 3.896921356096762, |
| "learning_rate": 9.492327375440568e-07, |
| "loss": 1.9475, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.16489361702127658, |
| "grad_norm": 3.1704796037774394, |
| "learning_rate": 9.529874182399376e-07, |
| "loss": 1.9461, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1675531914893617, |
| "grad_norm": 3.624185273266048, |
| "learning_rate": 9.566820214605051e-07, |
| "loss": 2.0426, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 3.438777616799716, |
| "learning_rate": 9.60318439528928e-07, |
| "loss": 1.9094, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.17287234042553193, |
| "grad_norm": 4.546206080990496, |
| "learning_rate": 9.638984767461214e-07, |
| "loss": 1.9037, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.17553191489361702, |
| "grad_norm": 3.092553572071205, |
| "learning_rate": 9.6742385476669e-07, |
| "loss": 1.9928, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.17819148936170212, |
| "grad_norm": 3.3574221590495807, |
| "learning_rate": 9.708962175706178e-07, |
| "loss": 1.9752, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.18085106382978725, |
| "grad_norm": 3.0865121040891714, |
| "learning_rate": 9.743171360666435e-07, |
| "loss": 1.9853, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.18351063829787234, |
| "grad_norm": 3.25288028731065, |
| "learning_rate": 9.776881123595842e-07, |
| "loss": 1.8024, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.18617021276595744, |
| "grad_norm": 2.8739512645435865, |
| "learning_rate": 9.810105837106252e-07, |
| "loss": 2.0918, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.18882978723404256, |
| "grad_norm": 2.8379601865829414, |
| "learning_rate": 9.842859262167094e-07, |
| "loss": 1.801, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.19148936170212766, |
| "grad_norm": 2.634217473181439, |
| "learning_rate": 9.875154582326002e-07, |
| "loss": 1.9093, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.19414893617021275, |
| "grad_norm": 2.674409166489119, |
| "learning_rate": 9.907004435569156e-07, |
| "loss": 1.8468, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.19680851063829788, |
| "grad_norm": 2.5418462429291178, |
| "learning_rate": 9.938420944014074e-07, |
| "loss": 2.0187, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.19946808510638298, |
| "grad_norm": 2.249520992577069, |
| "learning_rate": 9.969415741609375e-07, |
| "loss": 1.8433, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.20212765957446807, |
| "grad_norm": 3.4070896898561567, |
| "learning_rate": 1e-06, |
| "loss": 1.6904, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.2047872340425532, |
| "grad_norm": 2.405587439537431, |
| "learning_rate": 1e-06, |
| "loss": 1.9263, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.2074468085106383, |
| "grad_norm": 2.1351508990882686, |
| "learning_rate": 9.985207100591716e-07, |
| "loss": 1.9915, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.21010638297872342, |
| "grad_norm": 2.253674050573154, |
| "learning_rate": 9.97041420118343e-07, |
| "loss": 1.8581, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "grad_norm": 1.9816102720192281, |
| "learning_rate": 9.955621301775147e-07, |
| "loss": 1.8838, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2154255319148936, |
| "grad_norm": 2.1585154035600502, |
| "learning_rate": 9.940828402366864e-07, |
| "loss": 1.7891, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.21808510638297873, |
| "grad_norm": 2.1927798177222466, |
| "learning_rate": 9.92603550295858e-07, |
| "loss": 1.6338, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.22074468085106383, |
| "grad_norm": 2.1195681092445606, |
| "learning_rate": 9.911242603550295e-07, |
| "loss": 1.8233, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.22340425531914893, |
| "grad_norm": 1.79894840198714, |
| "learning_rate": 9.896449704142011e-07, |
| "loss": 1.6456, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.22606382978723405, |
| "grad_norm": 2.1334819930653004, |
| "learning_rate": 9.881656804733728e-07, |
| "loss": 1.7721, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.22872340425531915, |
| "grad_norm": 2.2522144017174988, |
| "learning_rate": 9.866863905325444e-07, |
| "loss": 1.8225, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.23138297872340424, |
| "grad_norm": 2.1109395194449885, |
| "learning_rate": 9.852071005917159e-07, |
| "loss": 1.8103, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.23404255319148937, |
| "grad_norm": 1.889220101950186, |
| "learning_rate": 9.837278106508875e-07, |
| "loss": 1.7531, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.23670212765957446, |
| "grad_norm": 2.0103297503036797, |
| "learning_rate": 9.822485207100592e-07, |
| "loss": 1.4589, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.2393617021276596, |
| "grad_norm": 2.0087468092299012, |
| "learning_rate": 9.807692307692306e-07, |
| "loss": 1.7291, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.24202127659574468, |
| "grad_norm": 2.1537427838116603, |
| "learning_rate": 9.792899408284023e-07, |
| "loss": 1.8277, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.24468085106382978, |
| "grad_norm": 1.811327456337824, |
| "learning_rate": 9.77810650887574e-07, |
| "loss": 1.6806, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2473404255319149, |
| "grad_norm": 1.8193216533171808, |
| "learning_rate": 9.763313609467456e-07, |
| "loss": 1.5615, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.7880991378455267, |
| "learning_rate": 9.748520710059172e-07, |
| "loss": 1.6315, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.2526595744680851, |
| "grad_norm": 1.9732905510612142, |
| "learning_rate": 9.733727810650887e-07, |
| "loss": 1.6118, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2553191489361702, |
| "grad_norm": 1.8507510310387487, |
| "learning_rate": 9.718934911242603e-07, |
| "loss": 1.562, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.2579787234042553, |
| "grad_norm": 1.937704198597928, |
| "learning_rate": 9.704142011834318e-07, |
| "loss": 1.5891, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.26063829787234044, |
| "grad_norm": 1.8949539814094551, |
| "learning_rate": 9.689349112426034e-07, |
| "loss": 1.55, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2632978723404255, |
| "grad_norm": 1.7741323445830024, |
| "learning_rate": 9.67455621301775e-07, |
| "loss": 1.734, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.26595744680851063, |
| "grad_norm": 1.7021314190064671, |
| "learning_rate": 9.659763313609467e-07, |
| "loss": 1.4889, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.26861702127659576, |
| "grad_norm": 1.9644914618403917, |
| "learning_rate": 9.644970414201184e-07, |
| "loss": 1.7278, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.2712765957446808, |
| "grad_norm": 1.8634125925152643, |
| "learning_rate": 9.630177514792898e-07, |
| "loss": 1.5682, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.27393617021276595, |
| "grad_norm": 1.8401952841001055, |
| "learning_rate": 9.615384615384615e-07, |
| "loss": 1.565, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.2765957446808511, |
| "grad_norm": 1.804040900318666, |
| "learning_rate": 9.600591715976331e-07, |
| "loss": 1.5869, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.27925531914893614, |
| "grad_norm": 1.706090462740245, |
| "learning_rate": 9.585798816568048e-07, |
| "loss": 1.5148, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.28191489361702127, |
| "grad_norm": 1.728525487149655, |
| "learning_rate": 9.571005917159762e-07, |
| "loss": 1.5603, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.2845744680851064, |
| "grad_norm": 1.7524632420405768, |
| "learning_rate": 9.556213017751479e-07, |
| "loss": 1.6348, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.2872340425531915, |
| "grad_norm": 1.7478581672975904, |
| "learning_rate": 9.541420118343195e-07, |
| "loss": 1.6436, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.2898936170212766, |
| "grad_norm": 1.9985633100646443, |
| "learning_rate": 9.526627218934911e-07, |
| "loss": 1.7681, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.2925531914893617, |
| "grad_norm": 2.079481786216591, |
| "learning_rate": 9.511834319526627e-07, |
| "loss": 1.419, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.29521276595744683, |
| "grad_norm": 1.6892443930394687, |
| "learning_rate": 9.497041420118342e-07, |
| "loss": 1.6604, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2978723404255319, |
| "grad_norm": 1.610960466459826, |
| "learning_rate": 9.482248520710058e-07, |
| "loss": 1.6354, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.300531914893617, |
| "grad_norm": 1.8544084912738468, |
| "learning_rate": 9.467455621301774e-07, |
| "loss": 1.596, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.30319148936170215, |
| "grad_norm": 1.9170248346565737, |
| "learning_rate": 9.45266272189349e-07, |
| "loss": 1.6547, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3058510638297872, |
| "grad_norm": 1.7881140714522759, |
| "learning_rate": 9.437869822485207e-07, |
| "loss": 1.6039, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.30851063829787234, |
| "grad_norm": 1.9088889444538937, |
| "learning_rate": 9.423076923076923e-07, |
| "loss": 1.7795, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.31117021276595747, |
| "grad_norm": 2.2553186073976383, |
| "learning_rate": 9.408284023668639e-07, |
| "loss": 1.4582, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.31382978723404253, |
| "grad_norm": 1.843300845507743, |
| "learning_rate": 9.393491124260355e-07, |
| "loss": 1.5459, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.31648936170212766, |
| "grad_norm": 1.7169781655337961, |
| "learning_rate": 9.378698224852071e-07, |
| "loss": 1.5621, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.3191489361702128, |
| "grad_norm": 1.690585106680432, |
| "learning_rate": 9.363905325443787e-07, |
| "loss": 1.5449, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.32180851063829785, |
| "grad_norm": 1.634720340224596, |
| "learning_rate": 9.349112426035502e-07, |
| "loss": 1.5937, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.324468085106383, |
| "grad_norm": 1.905851103523696, |
| "learning_rate": 9.334319526627219e-07, |
| "loss": 1.6611, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.3271276595744681, |
| "grad_norm": 1.5977114642239374, |
| "learning_rate": 9.319526627218934e-07, |
| "loss": 1.3517, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.32978723404255317, |
| "grad_norm": 1.7739378101582597, |
| "learning_rate": 9.304733727810651e-07, |
| "loss": 1.6623, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.3324468085106383, |
| "grad_norm": 1.7433956729666737, |
| "learning_rate": 9.289940828402366e-07, |
| "loss": 1.565, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.3351063829787234, |
| "grad_norm": 1.8292660534852752, |
| "learning_rate": 9.275147928994083e-07, |
| "loss": 1.6095, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.3377659574468085, |
| "grad_norm": 1.8582148418654536, |
| "learning_rate": 9.260355029585798e-07, |
| "loss": 1.6689, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 1.5903483721719576, |
| "learning_rate": 9.245562130177515e-07, |
| "loss": 1.4741, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.34308510638297873, |
| "grad_norm": 1.5937293556222676, |
| "learning_rate": 9.230769230769231e-07, |
| "loss": 1.4603, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.34574468085106386, |
| "grad_norm": 1.8563999128333846, |
| "learning_rate": 9.215976331360947e-07, |
| "loss": 1.6907, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3484042553191489, |
| "grad_norm": 1.7139518347374663, |
| "learning_rate": 9.201183431952662e-07, |
| "loss": 1.6474, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.35106382978723405, |
| "grad_norm": 1.7465122753927422, |
| "learning_rate": 9.186390532544378e-07, |
| "loss": 1.6324, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.3537234042553192, |
| "grad_norm": 1.6610443253134841, |
| "learning_rate": 9.171597633136094e-07, |
| "loss": 1.5045, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.35638297872340424, |
| "grad_norm": 1.655396079412198, |
| "learning_rate": 9.15680473372781e-07, |
| "loss": 1.5022, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.35904255319148937, |
| "grad_norm": 1.6336073697442315, |
| "learning_rate": 9.142011834319526e-07, |
| "loss": 1.5522, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.3617021276595745, |
| "grad_norm": 1.707143679480118, |
| "learning_rate": 9.127218934911243e-07, |
| "loss": 1.551, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.36436170212765956, |
| "grad_norm": 1.685651756020523, |
| "learning_rate": 9.112426035502958e-07, |
| "loss": 1.6122, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.3670212765957447, |
| "grad_norm": 2.358800653945757, |
| "learning_rate": 9.097633136094675e-07, |
| "loss": 1.5604, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3696808510638298, |
| "grad_norm": 1.9426261400386715, |
| "learning_rate": 9.08284023668639e-07, |
| "loss": 1.4685, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.3723404255319149, |
| "grad_norm": 1.7028161565048658, |
| "learning_rate": 9.068047337278106e-07, |
| "loss": 1.4377, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 1.6566345919926695, |
| "learning_rate": 9.053254437869821e-07, |
| "loss": 1.4231, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.3776595744680851, |
| "grad_norm": 1.750830521373255, |
| "learning_rate": 9.038461538461538e-07, |
| "loss": 1.5528, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.3803191489361702, |
| "grad_norm": 1.673773427490192, |
| "learning_rate": 9.023668639053253e-07, |
| "loss": 1.5206, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.3829787234042553, |
| "grad_norm": 1.9158107325263525, |
| "learning_rate": 9.00887573964497e-07, |
| "loss": 1.5624, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.38563829787234044, |
| "grad_norm": 1.863606934304487, |
| "learning_rate": 8.994082840236686e-07, |
| "loss": 1.7617, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3882978723404255, |
| "grad_norm": 1.743890793561562, |
| "learning_rate": 8.979289940828402e-07, |
| "loss": 1.6736, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.39095744680851063, |
| "grad_norm": 1.7765187119696408, |
| "learning_rate": 8.964497041420118e-07, |
| "loss": 1.401, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.39361702127659576, |
| "grad_norm": 1.9492055399414594, |
| "learning_rate": 8.949704142011834e-07, |
| "loss": 1.7687, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.3962765957446808, |
| "grad_norm": 1.9491511698228168, |
| "learning_rate": 8.93491124260355e-07, |
| "loss": 1.5873, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.39893617021276595, |
| "grad_norm": 1.7732893423967535, |
| "learning_rate": 8.920118343195265e-07, |
| "loss": 1.4666, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4015957446808511, |
| "grad_norm": 1.628295930467344, |
| "learning_rate": 8.905325443786981e-07, |
| "loss": 1.4253, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.40425531914893614, |
| "grad_norm": 1.9501879161375453, |
| "learning_rate": 8.890532544378698e-07, |
| "loss": 1.5748, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.40691489361702127, |
| "grad_norm": 1.6883577356837587, |
| "learning_rate": 8.875739644970413e-07, |
| "loss": 1.3546, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.4095744680851064, |
| "grad_norm": 1.7177779483356421, |
| "learning_rate": 8.86094674556213e-07, |
| "loss": 1.6715, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.4122340425531915, |
| "grad_norm": 1.806803334100437, |
| "learning_rate": 8.846153846153846e-07, |
| "loss": 1.5485, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4148936170212766, |
| "grad_norm": 1.7522522193654075, |
| "learning_rate": 8.831360946745562e-07, |
| "loss": 1.5091, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.4175531914893617, |
| "grad_norm": 1.8508800423865754, |
| "learning_rate": 8.816568047337278e-07, |
| "loss": 1.7112, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.42021276595744683, |
| "grad_norm": 1.752024433296569, |
| "learning_rate": 8.801775147928994e-07, |
| "loss": 1.3702, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.4228723404255319, |
| "grad_norm": 2.0875697232783246, |
| "learning_rate": 8.786982248520711e-07, |
| "loss": 1.5972, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 1.7852623572002673, |
| "learning_rate": 8.772189349112425e-07, |
| "loss": 1.5496, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.42819148936170215, |
| "grad_norm": 2.0049459574733968, |
| "learning_rate": 8.757396449704142e-07, |
| "loss": 1.5256, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.4308510638297872, |
| "grad_norm": 1.8394745863340762, |
| "learning_rate": 8.742603550295857e-07, |
| "loss": 1.5466, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.43351063829787234, |
| "grad_norm": 1.890821588557376, |
| "learning_rate": 8.727810650887574e-07, |
| "loss": 1.4839, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.43617021276595747, |
| "grad_norm": 1.6481011214712673, |
| "learning_rate": 8.713017751479289e-07, |
| "loss": 1.6322, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.43882978723404253, |
| "grad_norm": 1.6910215297075097, |
| "learning_rate": 8.698224852071006e-07, |
| "loss": 1.4294, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.44148936170212766, |
| "grad_norm": 2.2849926490581978, |
| "learning_rate": 8.683431952662722e-07, |
| "loss": 1.5214, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.4441489361702128, |
| "grad_norm": 1.6530282697158378, |
| "learning_rate": 8.668639053254438e-07, |
| "loss": 1.5387, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.44680851063829785, |
| "grad_norm": 1.8612064349812791, |
| "learning_rate": 8.653846153846154e-07, |
| "loss": 1.4698, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.449468085106383, |
| "grad_norm": 1.844773154127249, |
| "learning_rate": 8.639053254437869e-07, |
| "loss": 1.6155, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.4521276595744681, |
| "grad_norm": 1.7920892424117567, |
| "learning_rate": 8.624260355029585e-07, |
| "loss": 1.4435, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.45478723404255317, |
| "grad_norm": 1.7631889349519279, |
| "learning_rate": 8.609467455621301e-07, |
| "loss": 1.5657, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.4574468085106383, |
| "grad_norm": 1.850684750618834, |
| "learning_rate": 8.594674556213017e-07, |
| "loss": 1.4599, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.4601063829787234, |
| "grad_norm": 1.8936077580680233, |
| "learning_rate": 8.579881656804734e-07, |
| "loss": 1.4487, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.4627659574468085, |
| "grad_norm": 1.8302300226282981, |
| "learning_rate": 8.565088757396449e-07, |
| "loss": 1.3982, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.4654255319148936, |
| "grad_norm": 1.77826681795055, |
| "learning_rate": 8.550295857988166e-07, |
| "loss": 1.4513, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.46808510638297873, |
| "grad_norm": 1.687204688334926, |
| "learning_rate": 8.535502958579881e-07, |
| "loss": 1.4119, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.47074468085106386, |
| "grad_norm": 1.8120179028458203, |
| "learning_rate": 8.520710059171598e-07, |
| "loss": 1.6192, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.4734042553191489, |
| "grad_norm": 1.8795981293168291, |
| "learning_rate": 8.505917159763313e-07, |
| "loss": 1.4954, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.47606382978723405, |
| "grad_norm": 1.7065716786077503, |
| "learning_rate": 8.491124260355029e-07, |
| "loss": 1.5966, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.4787234042553192, |
| "grad_norm": 1.7627718668452295, |
| "learning_rate": 8.476331360946745e-07, |
| "loss": 1.4327, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.48138297872340424, |
| "grad_norm": 1.8665938451163775, |
| "learning_rate": 8.461538461538461e-07, |
| "loss": 1.4918, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.48404255319148937, |
| "grad_norm": 1.819110850294668, |
| "learning_rate": 8.446745562130177e-07, |
| "loss": 1.5539, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.4867021276595745, |
| "grad_norm": 1.8453397847354074, |
| "learning_rate": 8.431952662721893e-07, |
| "loss": 1.5331, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.48936170212765956, |
| "grad_norm": 2.622110865899153, |
| "learning_rate": 8.417159763313609e-07, |
| "loss": 1.3705, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.4920212765957447, |
| "grad_norm": 2.0496831369913378, |
| "learning_rate": 8.402366863905325e-07, |
| "loss": 1.434, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.4946808510638298, |
| "grad_norm": 1.7777460669960974, |
| "learning_rate": 8.387573964497041e-07, |
| "loss": 1.507, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.4973404255319149, |
| "grad_norm": 1.7648525067264564, |
| "learning_rate": 8.372781065088757e-07, |
| "loss": 1.5419, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.7346580205717035, |
| "learning_rate": 8.357988165680473e-07, |
| "loss": 1.4474, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5026595744680851, |
| "grad_norm": 1.8941186829293386, |
| "learning_rate": 8.343195266272189e-07, |
| "loss": 1.4239, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.5053191489361702, |
| "grad_norm": 1.793062893259623, |
| "learning_rate": 8.328402366863904e-07, |
| "loss": 1.5339, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5079787234042553, |
| "grad_norm": 1.867427730668421, |
| "learning_rate": 8.313609467455621e-07, |
| "loss": 1.3395, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.5106382978723404, |
| "grad_norm": 1.836942681632619, |
| "learning_rate": 8.298816568047336e-07, |
| "loss": 1.6135, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5132978723404256, |
| "grad_norm": 1.6942657192312134, |
| "learning_rate": 8.284023668639053e-07, |
| "loss": 1.5308, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.5159574468085106, |
| "grad_norm": 1.841411465300408, |
| "learning_rate": 8.269230769230768e-07, |
| "loss": 1.5345, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.5186170212765957, |
| "grad_norm": 1.8794098811564628, |
| "learning_rate": 8.254437869822485e-07, |
| "loss": 1.6901, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5212765957446809, |
| "grad_norm": 1.7012388651957833, |
| "learning_rate": 8.239644970414202e-07, |
| "loss": 1.474, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.523936170212766, |
| "grad_norm": 1.7944418314011599, |
| "learning_rate": 8.224852071005917e-07, |
| "loss": 1.32, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.526595744680851, |
| "grad_norm": 1.7425046897179257, |
| "learning_rate": 8.210059171597633e-07, |
| "loss": 1.368, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.5292553191489362, |
| "grad_norm": 1.7880936710475852, |
| "learning_rate": 8.195266272189348e-07, |
| "loss": 1.645, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.5319148936170213, |
| "grad_norm": 1.6715457627732746, |
| "learning_rate": 8.180473372781065e-07, |
| "loss": 1.4261, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5345744680851063, |
| "grad_norm": 1.822736509594185, |
| "learning_rate": 8.16568047337278e-07, |
| "loss": 1.624, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.5372340425531915, |
| "grad_norm": 1.6809290356200428, |
| "learning_rate": 8.150887573964497e-07, |
| "loss": 1.3937, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.5398936170212766, |
| "grad_norm": 1.7496410410443377, |
| "learning_rate": 8.136094674556213e-07, |
| "loss": 1.5429, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.5425531914893617, |
| "grad_norm": 1.9199038686131074, |
| "learning_rate": 8.121301775147929e-07, |
| "loss": 1.5023, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.5452127659574468, |
| "grad_norm": 1.6850758554154257, |
| "learning_rate": 8.106508875739645e-07, |
| "loss": 1.4991, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5478723404255319, |
| "grad_norm": 1.9329660957508767, |
| "learning_rate": 8.091715976331361e-07, |
| "loss": 1.5398, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.550531914893617, |
| "grad_norm": 1.666476222919606, |
| "learning_rate": 8.076923076923077e-07, |
| "loss": 1.5637, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.5531914893617021, |
| "grad_norm": 2.224757697809998, |
| "learning_rate": 8.062130177514792e-07, |
| "loss": 1.6452, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.5558510638297872, |
| "grad_norm": 1.861249667872802, |
| "learning_rate": 8.047337278106508e-07, |
| "loss": 1.541, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.5585106382978723, |
| "grad_norm": 1.7749036870266581, |
| "learning_rate": 8.032544378698225e-07, |
| "loss": 1.4811, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5611702127659575, |
| "grad_norm": 1.9725453562773687, |
| "learning_rate": 8.01775147928994e-07, |
| "loss": 1.4289, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.5638297872340425, |
| "grad_norm": 1.5771745142242444, |
| "learning_rate": 8.002958579881657e-07, |
| "loss": 1.1851, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.5664893617021277, |
| "grad_norm": 1.7671763938332208, |
| "learning_rate": 7.988165680473372e-07, |
| "loss": 1.4632, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.5691489361702128, |
| "grad_norm": 2.3539254619170147, |
| "learning_rate": 7.973372781065089e-07, |
| "loss": 1.4399, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.5718085106382979, |
| "grad_norm": 1.6811659582751803, |
| "learning_rate": 7.958579881656804e-07, |
| "loss": 1.3874, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.574468085106383, |
| "grad_norm": 1.816100982937805, |
| "learning_rate": 7.943786982248521e-07, |
| "loss": 1.3507, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.5771276595744681, |
| "grad_norm": 1.6054386033989114, |
| "learning_rate": 7.928994082840237e-07, |
| "loss": 1.3523, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.5797872340425532, |
| "grad_norm": 1.747109205347203, |
| "learning_rate": 7.914201183431952e-07, |
| "loss": 1.4471, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.5824468085106383, |
| "grad_norm": 2.544095072667201, |
| "learning_rate": 7.899408284023668e-07, |
| "loss": 1.4659, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.5851063829787234, |
| "grad_norm": 1.9052355208698295, |
| "learning_rate": 7.884615384615384e-07, |
| "loss": 1.6808, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5877659574468085, |
| "grad_norm": 1.7475037482225553, |
| "learning_rate": 7.8698224852071e-07, |
| "loss": 1.4223, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.5904255319148937, |
| "grad_norm": 1.7030078252678653, |
| "learning_rate": 7.855029585798816e-07, |
| "loss": 1.558, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.5930851063829787, |
| "grad_norm": 1.6935707673119045, |
| "learning_rate": 7.840236686390532e-07, |
| "loss": 1.3466, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.5957446808510638, |
| "grad_norm": 1.9730090137474936, |
| "learning_rate": 7.825443786982249e-07, |
| "loss": 1.6373, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.598404255319149, |
| "grad_norm": 1.7329005942211182, |
| "learning_rate": 7.810650887573964e-07, |
| "loss": 1.3348, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.601063829787234, |
| "grad_norm": 2.033731441401403, |
| "learning_rate": 7.795857988165681e-07, |
| "loss": 1.5524, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.6037234042553191, |
| "grad_norm": 1.7207604873916247, |
| "learning_rate": 7.781065088757395e-07, |
| "loss": 1.3862, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.6063829787234043, |
| "grad_norm": 1.8075753757910789, |
| "learning_rate": 7.766272189349112e-07, |
| "loss": 1.5981, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.6090425531914894, |
| "grad_norm": 2.291978352476086, |
| "learning_rate": 7.751479289940827e-07, |
| "loss": 1.4514, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.6117021276595744, |
| "grad_norm": 1.8644568615293915, |
| "learning_rate": 7.736686390532544e-07, |
| "loss": 1.6587, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6143617021276596, |
| "grad_norm": 2.594171053250292, |
| "learning_rate": 7.721893491124259e-07, |
| "loss": 1.6336, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.6170212765957447, |
| "grad_norm": 1.5011539788709316, |
| "learning_rate": 7.707100591715976e-07, |
| "loss": 1.2387, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.6196808510638298, |
| "grad_norm": 1.6819405282763624, |
| "learning_rate": 7.692307692307693e-07, |
| "loss": 1.5038, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.6223404255319149, |
| "grad_norm": 1.7251235005494032, |
| "learning_rate": 7.677514792899408e-07, |
| "loss": 1.5774, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 1.864499827243002, |
| "learning_rate": 7.662721893491125e-07, |
| "loss": 1.5276, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6276595744680851, |
| "grad_norm": 1.7781078666304035, |
| "learning_rate": 7.64792899408284e-07, |
| "loss": 1.5232, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.6303191489361702, |
| "grad_norm": 1.6599021088795032, |
| "learning_rate": 7.633136094674556e-07, |
| "loss": 1.4473, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.6329787234042553, |
| "grad_norm": 1.6721336663765791, |
| "learning_rate": 7.618343195266271e-07, |
| "loss": 1.3851, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.6356382978723404, |
| "grad_norm": 1.797473310291003, |
| "learning_rate": 7.603550295857988e-07, |
| "loss": 1.4871, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "grad_norm": 1.68684289642348, |
| "learning_rate": 7.588757396449704e-07, |
| "loss": 1.3971, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6409574468085106, |
| "grad_norm": 1.6548030218587813, |
| "learning_rate": 7.57396449704142e-07, |
| "loss": 1.4413, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.6436170212765957, |
| "grad_norm": 1.7764920048747164, |
| "learning_rate": 7.559171597633136e-07, |
| "loss": 1.5327, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.6462765957446809, |
| "grad_norm": 2.3776019048662627, |
| "learning_rate": 7.544378698224852e-07, |
| "loss": 1.3973, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.648936170212766, |
| "grad_norm": 2.180898241246454, |
| "learning_rate": 7.529585798816568e-07, |
| "loss": 1.4108, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.651595744680851, |
| "grad_norm": 1.7308120559219609, |
| "learning_rate": 7.514792899408284e-07, |
| "loss": 1.437, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.6542553191489362, |
| "grad_norm": 1.6797613083347633, |
| "learning_rate": 7.5e-07, |
| "loss": 1.4266, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.6569148936170213, |
| "grad_norm": 1.7244677372074293, |
| "learning_rate": 7.485207100591716e-07, |
| "loss": 1.4562, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.6595744680851063, |
| "grad_norm": 1.831008658275623, |
| "learning_rate": 7.470414201183431e-07, |
| "loss": 1.625, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.6622340425531915, |
| "grad_norm": 1.5987807515924746, |
| "learning_rate": 7.455621301775148e-07, |
| "loss": 1.351, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.6648936170212766, |
| "grad_norm": 1.657627324756177, |
| "learning_rate": 7.440828402366863e-07, |
| "loss": 1.3021, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6675531914893617, |
| "grad_norm": 1.6806656229564951, |
| "learning_rate": 7.42603550295858e-07, |
| "loss": 1.4708, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.6702127659574468, |
| "grad_norm": 1.6469208307421896, |
| "learning_rate": 7.411242603550295e-07, |
| "loss": 1.4309, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.6728723404255319, |
| "grad_norm": 1.6396856616158755, |
| "learning_rate": 7.396449704142012e-07, |
| "loss": 1.501, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.675531914893617, |
| "grad_norm": 1.6377964159170837, |
| "learning_rate": 7.381656804733728e-07, |
| "loss": 1.5208, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.6781914893617021, |
| "grad_norm": 1.6580558864253538, |
| "learning_rate": 7.366863905325444e-07, |
| "loss": 1.4638, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 1.837851772242258, |
| "learning_rate": 7.352071005917159e-07, |
| "loss": 1.3164, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.6835106382978723, |
| "grad_norm": 2.224825104258165, |
| "learning_rate": 7.337278106508875e-07, |
| "loss": 1.6295, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.6861702127659575, |
| "grad_norm": 1.6131790535172048, |
| "learning_rate": 7.322485207100591e-07, |
| "loss": 1.4414, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.6888297872340425, |
| "grad_norm": 1.549489595607848, |
| "learning_rate": 7.307692307692307e-07, |
| "loss": 1.4455, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.6914893617021277, |
| "grad_norm": 1.761687284810298, |
| "learning_rate": 7.292899408284023e-07, |
| "loss": 1.4913, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6941489361702128, |
| "grad_norm": 1.6593936380320258, |
| "learning_rate": 7.27810650887574e-07, |
| "loss": 1.6427, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.6968085106382979, |
| "grad_norm": 1.7879593292364175, |
| "learning_rate": 7.263313609467455e-07, |
| "loss": 1.6127, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.699468085106383, |
| "grad_norm": 1.559119726167982, |
| "learning_rate": 7.248520710059172e-07, |
| "loss": 1.3617, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.7021276595744681, |
| "grad_norm": 1.5376887507996986, |
| "learning_rate": 7.233727810650887e-07, |
| "loss": 1.3915, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.7047872340425532, |
| "grad_norm": 1.892877482230423, |
| "learning_rate": 7.218934911242604e-07, |
| "loss": 1.3938, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.7074468085106383, |
| "grad_norm": 2.1615047832844647, |
| "learning_rate": 7.204142011834318e-07, |
| "loss": 1.433, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.7101063829787234, |
| "grad_norm": 1.5754637988987956, |
| "learning_rate": 7.189349112426035e-07, |
| "loss": 1.3913, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.7127659574468085, |
| "grad_norm": 1.4917666655680848, |
| "learning_rate": 7.17455621301775e-07, |
| "loss": 1.4024, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.7154255319148937, |
| "grad_norm": 1.7371252437936426, |
| "learning_rate": 7.159763313609467e-07, |
| "loss": 1.5104, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.7180851063829787, |
| "grad_norm": 1.479255763133087, |
| "learning_rate": 7.144970414201183e-07, |
| "loss": 1.3533, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7207446808510638, |
| "grad_norm": 1.6094715867178733, |
| "learning_rate": 7.130177514792899e-07, |
| "loss": 1.3532, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.723404255319149, |
| "grad_norm": 1.565198399335246, |
| "learning_rate": 7.115384615384616e-07, |
| "loss": 1.3988, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.726063829787234, |
| "grad_norm": 1.5067122007483011, |
| "learning_rate": 7.100591715976331e-07, |
| "loss": 1.3825, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.7287234042553191, |
| "grad_norm": 1.7140633929936213, |
| "learning_rate": 7.085798816568048e-07, |
| "loss": 1.4082, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.7313829787234043, |
| "grad_norm": 1.540948863934289, |
| "learning_rate": 7.071005917159762e-07, |
| "loss": 1.5153, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.7340425531914894, |
| "grad_norm": 1.7664241501358, |
| "learning_rate": 7.056213017751479e-07, |
| "loss": 1.2721, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.7367021276595744, |
| "grad_norm": 1.5709026992552224, |
| "learning_rate": 7.041420118343195e-07, |
| "loss": 1.3492, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.7393617021276596, |
| "grad_norm": 1.5068566647857482, |
| "learning_rate": 7.026627218934911e-07, |
| "loss": 1.362, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.7420212765957447, |
| "grad_norm": 1.9554416192824882, |
| "learning_rate": 7.011834319526627e-07, |
| "loss": 1.6618, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.7446808510638298, |
| "grad_norm": 1.6405976792740071, |
| "learning_rate": 6.997041420118343e-07, |
| "loss": 1.5917, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7473404255319149, |
| "grad_norm": 1.7066156854813295, |
| "learning_rate": 6.982248520710059e-07, |
| "loss": 1.2984, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.616654607721298, |
| "learning_rate": 6.967455621301775e-07, |
| "loss": 1.4085, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.7526595744680851, |
| "grad_norm": 1.6119917549130687, |
| "learning_rate": 6.952662721893491e-07, |
| "loss": 1.4059, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.7553191489361702, |
| "grad_norm": 1.4894224582399371, |
| "learning_rate": 6.937869822485208e-07, |
| "loss": 1.4205, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.7579787234042553, |
| "grad_norm": 1.7561130701083838, |
| "learning_rate": 6.923076923076922e-07, |
| "loss": 1.5931, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.7606382978723404, |
| "grad_norm": 1.5507572662266917, |
| "learning_rate": 6.908284023668639e-07, |
| "loss": 1.3968, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.7632978723404256, |
| "grad_norm": 1.4671913155048064, |
| "learning_rate": 6.893491124260354e-07, |
| "loss": 1.2951, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.7659574468085106, |
| "grad_norm": 1.5498863732312698, |
| "learning_rate": 6.878698224852071e-07, |
| "loss": 1.2232, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.7686170212765957, |
| "grad_norm": 1.680206076834721, |
| "learning_rate": 6.863905325443786e-07, |
| "loss": 1.4992, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.7712765957446809, |
| "grad_norm": 1.718088751084764, |
| "learning_rate": 6.849112426035503e-07, |
| "loss": 1.4422, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.773936170212766, |
| "grad_norm": 1.5282347438855142, |
| "learning_rate": 6.834319526627219e-07, |
| "loss": 1.4063, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.776595744680851, |
| "grad_norm": 1.9525999050003993, |
| "learning_rate": 6.819526627218935e-07, |
| "loss": 1.5957, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.7792553191489362, |
| "grad_norm": 2.258813866966866, |
| "learning_rate": 6.804733727810651e-07, |
| "loss": 1.4431, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.7819148936170213, |
| "grad_norm": 1.5364750834268603, |
| "learning_rate": 6.789940828402367e-07, |
| "loss": 1.3558, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.7845744680851063, |
| "grad_norm": 1.5393402313754123, |
| "learning_rate": 6.775147928994082e-07, |
| "loss": 1.439, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.7872340425531915, |
| "grad_norm": 1.6455162885770198, |
| "learning_rate": 6.760355029585798e-07, |
| "loss": 1.5158, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.7898936170212766, |
| "grad_norm": 1.6475778661453933, |
| "learning_rate": 6.745562130177514e-07, |
| "loss": 1.4278, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.7925531914893617, |
| "grad_norm": 1.502594611161215, |
| "learning_rate": 6.730769230769231e-07, |
| "loss": 1.3064, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.7952127659574468, |
| "grad_norm": 1.4819306978451936, |
| "learning_rate": 6.715976331360946e-07, |
| "loss": 1.4, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.7978723404255319, |
| "grad_norm": 1.6911681538448085, |
| "learning_rate": 6.701183431952663e-07, |
| "loss": 1.3364, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.800531914893617, |
| "grad_norm": 1.4712764033020207, |
| "learning_rate": 6.686390532544378e-07, |
| "loss": 1.3514, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.8031914893617021, |
| "grad_norm": 1.5453820007555663, |
| "learning_rate": 6.671597633136095e-07, |
| "loss": 1.252, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.8058510638297872, |
| "grad_norm": 1.6870546106387143, |
| "learning_rate": 6.65680473372781e-07, |
| "loss": 1.4819, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.8085106382978723, |
| "grad_norm": 1.539899104888, |
| "learning_rate": 6.642011834319526e-07, |
| "loss": 1.4248, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.8111702127659575, |
| "grad_norm": 1.8570540873303243, |
| "learning_rate": 6.627218934911242e-07, |
| "loss": 1.398, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.8138297872340425, |
| "grad_norm": 1.6462980732890118, |
| "learning_rate": 6.612426035502958e-07, |
| "loss": 1.472, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.8164893617021277, |
| "grad_norm": 5.047207753458083, |
| "learning_rate": 6.597633136094674e-07, |
| "loss": 1.4934, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.8191489361702128, |
| "grad_norm": 1.6578320558708661, |
| "learning_rate": 6.58284023668639e-07, |
| "loss": 1.4467, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.8218085106382979, |
| "grad_norm": 1.650877101009254, |
| "learning_rate": 6.568047337278106e-07, |
| "loss": 1.3491, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.824468085106383, |
| "grad_norm": 1.7139451577038085, |
| "learning_rate": 6.553254437869822e-07, |
| "loss": 1.4975, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8271276595744681, |
| "grad_norm": 1.6275656326818695, |
| "learning_rate": 6.538461538461538e-07, |
| "loss": 1.4493, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.8297872340425532, |
| "grad_norm": 1.693438289435893, |
| "learning_rate": 6.523668639053254e-07, |
| "loss": 1.3593, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.8324468085106383, |
| "grad_norm": 1.5252049292780119, |
| "learning_rate": 6.50887573964497e-07, |
| "loss": 1.4798, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.8351063829787234, |
| "grad_norm": 1.7006952995622482, |
| "learning_rate": 6.494082840236686e-07, |
| "loss": 1.5054, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.8377659574468085, |
| "grad_norm": 1.7203889834996966, |
| "learning_rate": 6.479289940828401e-07, |
| "loss": 1.599, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.8404255319148937, |
| "grad_norm": 1.665289055188048, |
| "learning_rate": 6.464497041420118e-07, |
| "loss": 1.47, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.8430851063829787, |
| "grad_norm": 1.783255201333473, |
| "learning_rate": 6.449704142011834e-07, |
| "loss": 1.3293, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.8457446808510638, |
| "grad_norm": 1.5474686687545494, |
| "learning_rate": 6.43491124260355e-07, |
| "loss": 1.5827, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.848404255319149, |
| "grad_norm": 1.7096057045749924, |
| "learning_rate": 6.420118343195266e-07, |
| "loss": 1.4208, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 1.660091264238197, |
| "learning_rate": 6.405325443786982e-07, |
| "loss": 1.3729, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8537234042553191, |
| "grad_norm": 2.1515758550003663, |
| "learning_rate": 6.390532544378699e-07, |
| "loss": 1.6061, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.8563829787234043, |
| "grad_norm": 1.6705826372283528, |
| "learning_rate": 6.375739644970414e-07, |
| "loss": 1.3534, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.8590425531914894, |
| "grad_norm": 1.6232024300738965, |
| "learning_rate": 6.360946745562131e-07, |
| "loss": 1.425, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.8617021276595744, |
| "grad_norm": 1.7044169574045285, |
| "learning_rate": 6.346153846153845e-07, |
| "loss": 1.5695, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.8643617021276596, |
| "grad_norm": 1.7606258681853417, |
| "learning_rate": 6.331360946745562e-07, |
| "loss": 1.418, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.8670212765957447, |
| "grad_norm": 1.5280589114761016, |
| "learning_rate": 6.316568047337277e-07, |
| "loss": 1.4349, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.8696808510638298, |
| "grad_norm": 1.7139434884413298, |
| "learning_rate": 6.301775147928994e-07, |
| "loss": 1.4371, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.8723404255319149, |
| "grad_norm": 1.5926203744807812, |
| "learning_rate": 6.28698224852071e-07, |
| "loss": 1.4015, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 1.6264161761425606, |
| "learning_rate": 6.272189349112426e-07, |
| "loss": 1.4729, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.8776595744680851, |
| "grad_norm": 1.5831228752137032, |
| "learning_rate": 6.257396449704142e-07, |
| "loss": 1.421, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8803191489361702, |
| "grad_norm": 1.5811866295220025, |
| "learning_rate": 6.242603550295858e-07, |
| "loss": 1.3628, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.8829787234042553, |
| "grad_norm": 1.5679708453260865, |
| "learning_rate": 6.227810650887574e-07, |
| "loss": 1.2859, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.8856382978723404, |
| "grad_norm": 1.6766225130373726, |
| "learning_rate": 6.213017751479289e-07, |
| "loss": 1.4369, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.8882978723404256, |
| "grad_norm": 1.8047128650814857, |
| "learning_rate": 6.198224852071005e-07, |
| "loss": 1.5913, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.8909574468085106, |
| "grad_norm": 1.6456822515106042, |
| "learning_rate": 6.183431952662722e-07, |
| "loss": 1.4972, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.8936170212765957, |
| "grad_norm": 1.552523155961138, |
| "learning_rate": 6.168639053254437e-07, |
| "loss": 1.3171, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.8962765957446809, |
| "grad_norm": 1.788183804411441, |
| "learning_rate": 6.153846153846154e-07, |
| "loss": 1.5059, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.898936170212766, |
| "grad_norm": 1.5907686060024624, |
| "learning_rate": 6.139053254437869e-07, |
| "loss": 1.1485, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.901595744680851, |
| "grad_norm": 1.7254040314022046, |
| "learning_rate": 6.124260355029586e-07, |
| "loss": 1.5628, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.9042553191489362, |
| "grad_norm": 1.6347353623664331, |
| "learning_rate": 6.109467455621301e-07, |
| "loss": 1.3704, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9069148936170213, |
| "grad_norm": 2.194464251540189, |
| "learning_rate": 6.094674556213018e-07, |
| "loss": 1.4758, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.9095744680851063, |
| "grad_norm": 1.5698776022464798, |
| "learning_rate": 6.079881656804734e-07, |
| "loss": 1.3871, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.9122340425531915, |
| "grad_norm": 1.8859732282362605, |
| "learning_rate": 6.065088757396449e-07, |
| "loss": 1.4136, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.9148936170212766, |
| "grad_norm": 1.7373147056080605, |
| "learning_rate": 6.050295857988165e-07, |
| "loss": 1.5494, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.9175531914893617, |
| "grad_norm": 1.6179407549268443, |
| "learning_rate": 6.035502958579881e-07, |
| "loss": 1.3776, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.9202127659574468, |
| "grad_norm": 1.77670135626407, |
| "learning_rate": 6.020710059171597e-07, |
| "loss": 1.3275, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.9228723404255319, |
| "grad_norm": 1.7482955670467306, |
| "learning_rate": 6.005917159763313e-07, |
| "loss": 1.4015, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.925531914893617, |
| "grad_norm": 1.6887523807534266, |
| "learning_rate": 5.991124260355029e-07, |
| "loss": 1.5069, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.9281914893617021, |
| "grad_norm": 1.514381055516736, |
| "learning_rate": 5.976331360946746e-07, |
| "loss": 1.3818, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.9308510638297872, |
| "grad_norm": 1.4907168186147164, |
| "learning_rate": 5.961538461538461e-07, |
| "loss": 1.4495, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9335106382978723, |
| "grad_norm": 2.265910373999388, |
| "learning_rate": 5.946745562130178e-07, |
| "loss": 1.2853, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.9361702127659575, |
| "grad_norm": 1.7992082788491501, |
| "learning_rate": 5.931952662721894e-07, |
| "loss": 1.539, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.9388297872340425, |
| "grad_norm": 1.443028062263383, |
| "learning_rate": 5.917159763313609e-07, |
| "loss": 1.324, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.9414893617021277, |
| "grad_norm": 1.6139434859203183, |
| "learning_rate": 5.902366863905324e-07, |
| "loss": 1.3336, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.9441489361702128, |
| "grad_norm": 2.252829785523421, |
| "learning_rate": 5.887573964497041e-07, |
| "loss": 1.2986, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.9468085106382979, |
| "grad_norm": 1.7284412087838827, |
| "learning_rate": 5.872781065088757e-07, |
| "loss": 1.4817, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.949468085106383, |
| "grad_norm": 1.7787571244355151, |
| "learning_rate": 5.857988165680473e-07, |
| "loss": 1.5187, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.9521276595744681, |
| "grad_norm": 2.181835688354598, |
| "learning_rate": 5.84319526627219e-07, |
| "loss": 1.5578, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.9547872340425532, |
| "grad_norm": 1.4634212657053263, |
| "learning_rate": 5.828402366863905e-07, |
| "loss": 1.2286, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.9574468085106383, |
| "grad_norm": 1.687131629579792, |
| "learning_rate": 5.813609467455622e-07, |
| "loss": 1.3256, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9601063829787234, |
| "grad_norm": 1.629444719409858, |
| "learning_rate": 5.798816568047337e-07, |
| "loss": 1.5522, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.9627659574468085, |
| "grad_norm": 1.6487449612370586, |
| "learning_rate": 5.784023668639053e-07, |
| "loss": 1.5252, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.9654255319148937, |
| "grad_norm": 1.5119623190054727, |
| "learning_rate": 5.769230769230768e-07, |
| "loss": 1.4479, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.9680851063829787, |
| "grad_norm": 1.529900871256959, |
| "learning_rate": 5.754437869822485e-07, |
| "loss": 1.4081, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.9707446808510638, |
| "grad_norm": 1.679158185017686, |
| "learning_rate": 5.739644970414201e-07, |
| "loss": 1.3219, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.973404255319149, |
| "grad_norm": 1.5743852626682602, |
| "learning_rate": 5.724852071005917e-07, |
| "loss": 1.4408, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.976063829787234, |
| "grad_norm": 1.4327135424204693, |
| "learning_rate": 5.710059171597633e-07, |
| "loss": 1.4267, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.9787234042553191, |
| "grad_norm": 1.693248001536766, |
| "learning_rate": 5.695266272189349e-07, |
| "loss": 1.459, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.9813829787234043, |
| "grad_norm": 1.6118417002332202, |
| "learning_rate": 5.680473372781065e-07, |
| "loss": 1.3239, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.9840425531914894, |
| "grad_norm": 1.5994817848229685, |
| "learning_rate": 5.665680473372781e-07, |
| "loss": 1.3316, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9867021276595744, |
| "grad_norm": 1.734698428678095, |
| "learning_rate": 5.650887573964497e-07, |
| "loss": 1.4394, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.9893617021276596, |
| "grad_norm": 2.8750724783344626, |
| "learning_rate": 5.636094674556213e-07, |
| "loss": 1.3439, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.9920212765957447, |
| "grad_norm": 1.5483975094463054, |
| "learning_rate": 5.621301775147928e-07, |
| "loss": 1.3684, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.9946808510638298, |
| "grad_norm": 1.5202730618700395, |
| "learning_rate": 5.606508875739645e-07, |
| "loss": 1.3361, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.9973404255319149, |
| "grad_norm": 1.6144865765856777, |
| "learning_rate": 5.59171597633136e-07, |
| "loss": 1.3195, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.6792801473937533, |
| "learning_rate": 5.576923076923077e-07, |
| "loss": 1.4384, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.002659574468085, |
| "grad_norm": 1.9175762077814629, |
| "learning_rate": 5.562130177514792e-07, |
| "loss": 1.4758, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.0053191489361701, |
| "grad_norm": 1.8048610851481421, |
| "learning_rate": 5.547337278106509e-07, |
| "loss": 1.4803, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.0079787234042554, |
| "grad_norm": 1.606071563190404, |
| "learning_rate": 5.532544378698225e-07, |
| "loss": 1.485, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.0106382978723405, |
| "grad_norm": 1.5572569044777356, |
| "learning_rate": 5.517751479289941e-07, |
| "loss": 1.2355, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.0132978723404256, |
| "grad_norm": 1.5959684601920348, |
| "learning_rate": 5.502958579881657e-07, |
| "loss": 1.2246, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.0159574468085106, |
| "grad_norm": 1.9674075560318893, |
| "learning_rate": 5.488165680473372e-07, |
| "loss": 1.5334, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.0186170212765957, |
| "grad_norm": 1.6680206362227628, |
| "learning_rate": 5.473372781065088e-07, |
| "loss": 1.4226, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.0212765957446808, |
| "grad_norm": 1.5700791218738284, |
| "learning_rate": 5.458579881656804e-07, |
| "loss": 1.3727, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.023936170212766, |
| "grad_norm": 1.5969942768737249, |
| "learning_rate": 5.44378698224852e-07, |
| "loss": 1.4911, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.0265957446808511, |
| "grad_norm": 1.5398360114287806, |
| "learning_rate": 5.428994082840237e-07, |
| "loss": 1.3769, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.0292553191489362, |
| "grad_norm": 1.5805625597294484, |
| "learning_rate": 5.414201183431952e-07, |
| "loss": 1.4166, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.0319148936170213, |
| "grad_norm": 1.5312252431931253, |
| "learning_rate": 5.399408284023669e-07, |
| "loss": 1.2332, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.0345744680851063, |
| "grad_norm": 2.185966499141712, |
| "learning_rate": 5.384615384615384e-07, |
| "loss": 1.3489, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.0372340425531914, |
| "grad_norm": 1.5033859343676257, |
| "learning_rate": 5.369822485207101e-07, |
| "loss": 1.4487, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.0398936170212767, |
| "grad_norm": 1.6054054860368354, |
| "learning_rate": 5.355029585798815e-07, |
| "loss": 1.4788, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.0425531914893618, |
| "grad_norm": 1.6494604615754016, |
| "learning_rate": 5.340236686390532e-07, |
| "loss": 1.479, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.0452127659574468, |
| "grad_norm": 1.7222866777780232, |
| "learning_rate": 5.325443786982249e-07, |
| "loss": 1.3891, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.047872340425532, |
| "grad_norm": 1.7350078493539867, |
| "learning_rate": 5.310650887573964e-07, |
| "loss": 1.5214, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.050531914893617, |
| "grad_norm": 1.677699700420203, |
| "learning_rate": 5.295857988165681e-07, |
| "loss": 1.4027, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.053191489361702, |
| "grad_norm": 1.7218061845324277, |
| "learning_rate": 5.281065088757396e-07, |
| "loss": 1.5612, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.0558510638297873, |
| "grad_norm": 2.0460338465780015, |
| "learning_rate": 5.266272189349113e-07, |
| "loss": 1.7095, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.0585106382978724, |
| "grad_norm": 1.8707733198479073, |
| "learning_rate": 5.251479289940828e-07, |
| "loss": 1.3582, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.0611702127659575, |
| "grad_norm": 1.6674094055135629, |
| "learning_rate": 5.236686390532545e-07, |
| "loss": 1.4667, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.0638297872340425, |
| "grad_norm": 1.9223542274996348, |
| "learning_rate": 5.22189349112426e-07, |
| "loss": 1.3237, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.0664893617021276, |
| "grad_norm": 1.442702870639783, |
| "learning_rate": 5.207100591715976e-07, |
| "loss": 1.3436, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.0691489361702127, |
| "grad_norm": 1.459623592531859, |
| "learning_rate": 5.192307692307692e-07, |
| "loss": 1.3075, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.071808510638298, |
| "grad_norm": 1.7736298040913328, |
| "learning_rate": 5.177514792899408e-07, |
| "loss": 1.55, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.074468085106383, |
| "grad_norm": 1.492584255658168, |
| "learning_rate": 5.162721893491124e-07, |
| "loss": 1.3287, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.077127659574468, |
| "grad_norm": 1.5311371897968131, |
| "learning_rate": 5.14792899408284e-07, |
| "loss": 1.2852, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.0797872340425532, |
| "grad_norm": 1.7056998990486645, |
| "learning_rate": 5.133136094674556e-07, |
| "loss": 1.3844, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.0824468085106382, |
| "grad_norm": 1.5754295217572547, |
| "learning_rate": 5.118343195266272e-07, |
| "loss": 1.4362, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.0851063829787233, |
| "grad_norm": 1.7118767376849466, |
| "learning_rate": 5.103550295857988e-07, |
| "loss": 1.4678, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.0877659574468086, |
| "grad_norm": 1.7720130880057632, |
| "learning_rate": 5.088757396449705e-07, |
| "loss": 1.407, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.0904255319148937, |
| "grad_norm": 1.6779654968724649, |
| "learning_rate": 5.07396449704142e-07, |
| "loss": 1.4306, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.0930851063829787, |
| "grad_norm": 1.6236129122592553, |
| "learning_rate": 5.059171597633136e-07, |
| "loss": 1.3498, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.0957446808510638, |
| "grad_norm": 1.6329048532167492, |
| "learning_rate": 5.044378698224851e-07, |
| "loss": 1.4461, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.0984042553191489, |
| "grad_norm": 1.6207024159387382, |
| "learning_rate": 5.029585798816568e-07, |
| "loss": 1.3772, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.101063829787234, |
| "grad_norm": 1.5324741841766363, |
| "learning_rate": 5.014792899408283e-07, |
| "loss": 1.1312, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.1037234042553192, |
| "grad_norm": 1.7401441557132455, |
| "learning_rate": 5e-07, |
| "loss": 1.1982, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.1063829787234043, |
| "grad_norm": 1.7504453773507886, |
| "learning_rate": 4.985207100591715e-07, |
| "loss": 1.4541, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.1090425531914894, |
| "grad_norm": 1.699882851098421, |
| "learning_rate": 4.970414201183432e-07, |
| "loss": 1.2368, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.1117021276595744, |
| "grad_norm": 1.6218516588828402, |
| "learning_rate": 4.955621301775147e-07, |
| "loss": 1.2906, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.1143617021276595, |
| "grad_norm": 1.6649091116123456, |
| "learning_rate": 4.940828402366864e-07, |
| "loss": 1.4454, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.1170212765957448, |
| "grad_norm": 1.728282227356823, |
| "learning_rate": 4.926035502958579e-07, |
| "loss": 1.4663, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.1196808510638299, |
| "grad_norm": 1.6435295189184387, |
| "learning_rate": 4.911242603550296e-07, |
| "loss": 1.4789, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.122340425531915, |
| "grad_norm": 1.8191659615562332, |
| "learning_rate": 4.896449704142011e-07, |
| "loss": 1.3986, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.125, |
| "grad_norm": 1.5470082389400086, |
| "learning_rate": 4.881656804733728e-07, |
| "loss": 1.4072, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.127659574468085, |
| "grad_norm": 1.581839768866324, |
| "learning_rate": 4.866863905325443e-07, |
| "loss": 1.3122, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.1303191489361701, |
| "grad_norm": 1.4620677635311095, |
| "learning_rate": 4.852071005917159e-07, |
| "loss": 1.2643, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.1329787234042552, |
| "grad_norm": 1.6707102916564711, |
| "learning_rate": 4.837278106508875e-07, |
| "loss": 1.3747, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.1356382978723405, |
| "grad_norm": 1.5396285202284683, |
| "learning_rate": 4.822485207100592e-07, |
| "loss": 1.3101, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.1382978723404256, |
| "grad_norm": 1.8606687901078265, |
| "learning_rate": 4.807692307692307e-07, |
| "loss": 1.3172, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.1409574468085106, |
| "grad_norm": 1.6119139560046312, |
| "learning_rate": 4.792899408284024e-07, |
| "loss": 1.3865, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.1436170212765957, |
| "grad_norm": 1.715672112601465, |
| "learning_rate": 4.778106508875739e-07, |
| "loss": 1.4168, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.1462765957446808, |
| "grad_norm": 1.6367162736314051, |
| "learning_rate": 4.7633136094674555e-07, |
| "loss": 1.6202, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.148936170212766, |
| "grad_norm": 1.6173047746530647, |
| "learning_rate": 4.748520710059171e-07, |
| "loss": 1.4345, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.1515957446808511, |
| "grad_norm": 1.591852292459417, |
| "learning_rate": 4.733727810650887e-07, |
| "loss": 1.3504, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.1542553191489362, |
| "grad_norm": 1.704091419091409, |
| "learning_rate": 4.7189349112426035e-07, |
| "loss": 1.3978, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.1569148936170213, |
| "grad_norm": 1.6750388468322808, |
| "learning_rate": 4.7041420118343195e-07, |
| "loss": 1.5323, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.1595744680851063, |
| "grad_norm": 1.550611356946591, |
| "learning_rate": 4.6893491124260356e-07, |
| "loss": 1.3516, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.1622340425531914, |
| "grad_norm": 1.6666235759250934, |
| "learning_rate": 4.674556213017751e-07, |
| "loss": 1.3193, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.1648936170212765, |
| "grad_norm": 1.6060648830034072, |
| "learning_rate": 4.659763313609467e-07, |
| "loss": 1.4802, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.1675531914893618, |
| "grad_norm": 2.7759623465499113, |
| "learning_rate": 4.644970414201183e-07, |
| "loss": 1.3673, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.1702127659574468, |
| "grad_norm": 1.6142500584687862, |
| "learning_rate": 4.630177514792899e-07, |
| "loss": 1.2367, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.172872340425532, |
| "grad_norm": 1.6293255382971552, |
| "learning_rate": 4.6153846153846156e-07, |
| "loss": 1.4771, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.175531914893617, |
| "grad_norm": 1.6166636037633662, |
| "learning_rate": 4.600591715976331e-07, |
| "loss": 1.3891, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.178191489361702, |
| "grad_norm": 1.6156668770120142, |
| "learning_rate": 4.585798816568047e-07, |
| "loss": 1.3015, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.1808510638297873, |
| "grad_norm": 1.541456190983287, |
| "learning_rate": 4.571005917159763e-07, |
| "loss": 1.325, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.1835106382978724, |
| "grad_norm": 1.5371528822910774, |
| "learning_rate": 4.556213017751479e-07, |
| "loss": 1.391, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.1861702127659575, |
| "grad_norm": 1.8047509120352834, |
| "learning_rate": 4.541420118343195e-07, |
| "loss": 1.3802, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.1888297872340425, |
| "grad_norm": 1.4772002442457595, |
| "learning_rate": 4.5266272189349107e-07, |
| "loss": 1.2972, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.1914893617021276, |
| "grad_norm": 1.4833680602448407, |
| "learning_rate": 4.5118343195266267e-07, |
| "loss": 1.3515, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.1941489361702127, |
| "grad_norm": 1.557530779220624, |
| "learning_rate": 4.497041420118343e-07, |
| "loss": 1.367, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.196808510638298, |
| "grad_norm": 1.8027220753490893, |
| "learning_rate": 4.482248520710059e-07, |
| "loss": 1.5443, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.199468085106383, |
| "grad_norm": 1.5684441226470547, |
| "learning_rate": 4.467455621301775e-07, |
| "loss": 1.3059, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.202127659574468, |
| "grad_norm": 1.593970040483734, |
| "learning_rate": 4.4526627218934907e-07, |
| "loss": 1.2474, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.2047872340425532, |
| "grad_norm": 1.7048839620218588, |
| "learning_rate": 4.437869822485207e-07, |
| "loss": 1.4689, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.2074468085106382, |
| "grad_norm": 1.6500745120708162, |
| "learning_rate": 4.423076923076923e-07, |
| "loss": 1.3768, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.2101063829787235, |
| "grad_norm": 1.6649022378945304, |
| "learning_rate": 4.408284023668639e-07, |
| "loss": 1.6992, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.2127659574468086, |
| "grad_norm": 2.150475218838757, |
| "learning_rate": 4.3934911242603553e-07, |
| "loss": 1.4338, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.2154255319148937, |
| "grad_norm": 1.4810681098612493, |
| "learning_rate": 4.378698224852071e-07, |
| "loss": 1.2523, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.2180851063829787, |
| "grad_norm": 1.5941194592252996, |
| "learning_rate": 4.363905325443787e-07, |
| "loss": 1.5144, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.2207446808510638, |
| "grad_norm": 2.9846606692055855, |
| "learning_rate": 4.349112426035503e-07, |
| "loss": 1.4394, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.2234042553191489, |
| "grad_norm": 1.5758645515570575, |
| "learning_rate": 4.334319526627219e-07, |
| "loss": 1.314, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.226063829787234, |
| "grad_norm": 2.0348791713600374, |
| "learning_rate": 4.3195266272189343e-07, |
| "loss": 1.3581, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.2287234042553192, |
| "grad_norm": 1.65492749945659, |
| "learning_rate": 4.3047337278106503e-07, |
| "loss": 1.5053, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.2313829787234043, |
| "grad_norm": 1.6722641251398465, |
| "learning_rate": 4.289940828402367e-07, |
| "loss": 1.4641, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.2340425531914894, |
| "grad_norm": 1.5474460973272384, |
| "learning_rate": 4.275147928994083e-07, |
| "loss": 1.4182, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.2367021276595744, |
| "grad_norm": 1.7345506046508428, |
| "learning_rate": 4.260355029585799e-07, |
| "loss": 1.3139, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.2393617021276595, |
| "grad_norm": 1.7713814803315784, |
| "learning_rate": 4.2455621301775144e-07, |
| "loss": 1.4832, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.2420212765957448, |
| "grad_norm": 1.5498103025703653, |
| "learning_rate": 4.2307692307692304e-07, |
| "loss": 1.4115, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.2446808510638299, |
| "grad_norm": 1.5577840972729278, |
| "learning_rate": 4.2159763313609464e-07, |
| "loss": 1.3256, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.247340425531915, |
| "grad_norm": 1.578861933203747, |
| "learning_rate": 4.2011834319526624e-07, |
| "loss": 1.2007, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 1.6507686385229483, |
| "learning_rate": 4.1863905325443785e-07, |
| "loss": 1.3944, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.252659574468085, |
| "grad_norm": 1.7990714539210155, |
| "learning_rate": 4.1715976331360945e-07, |
| "loss": 1.4632, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.2553191489361701, |
| "grad_norm": 1.7618234269198014, |
| "learning_rate": 4.1568047337278105e-07, |
| "loss": 1.3313, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.2579787234042552, |
| "grad_norm": 1.5213599490802718, |
| "learning_rate": 4.1420118343195265e-07, |
| "loss": 1.5047, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.2606382978723405, |
| "grad_norm": 1.6052633557883167, |
| "learning_rate": 4.1272189349112425e-07, |
| "loss": 1.4177, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.2632978723404256, |
| "grad_norm": 1.9773267391803975, |
| "learning_rate": 4.1124260355029585e-07, |
| "loss": 1.2606, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.2659574468085106, |
| "grad_norm": 1.7023545368582853, |
| "learning_rate": 4.097633136094674e-07, |
| "loss": 1.3522, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.2686170212765957, |
| "grad_norm": 1.657218002450086, |
| "learning_rate": 4.08284023668639e-07, |
| "loss": 1.307, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.2712765957446808, |
| "grad_norm": 1.6560677482089055, |
| "learning_rate": 4.0680473372781066e-07, |
| "loss": 1.5599, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.273936170212766, |
| "grad_norm": 1.5827603390864668, |
| "learning_rate": 4.0532544378698226e-07, |
| "loss": 1.3864, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.2765957446808511, |
| "grad_norm": 1.490492812079521, |
| "learning_rate": 4.0384615384615386e-07, |
| "loss": 1.3238, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.2792553191489362, |
| "grad_norm": 1.4427306337618429, |
| "learning_rate": 4.023668639053254e-07, |
| "loss": 1.3381, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.2819148936170213, |
| "grad_norm": 1.8739427128710302, |
| "learning_rate": 4.00887573964497e-07, |
| "loss": 1.5195, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.2845744680851063, |
| "grad_norm": 1.4205586135195478, |
| "learning_rate": 3.994082840236686e-07, |
| "loss": 1.3342, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.2872340425531914, |
| "grad_norm": 1.4978308888768397, |
| "learning_rate": 3.979289940828402e-07, |
| "loss": 1.3198, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.2898936170212765, |
| "grad_norm": 1.453096779169849, |
| "learning_rate": 3.9644970414201187e-07, |
| "loss": 1.0572, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.2925531914893618, |
| "grad_norm": 1.9700050592115472, |
| "learning_rate": 3.949704142011834e-07, |
| "loss": 1.5476, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.2952127659574468, |
| "grad_norm": 1.52650807341244, |
| "learning_rate": 3.93491124260355e-07, |
| "loss": 1.3027, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.297872340425532, |
| "grad_norm": 1.6797022619264115, |
| "learning_rate": 3.920118343195266e-07, |
| "loss": 1.4014, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.300531914893617, |
| "grad_norm": 1.4684740172475148, |
| "learning_rate": 3.905325443786982e-07, |
| "loss": 1.2891, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.3031914893617023, |
| "grad_norm": 1.7009794386978352, |
| "learning_rate": 3.8905325443786977e-07, |
| "loss": 1.498, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.3058510638297873, |
| "grad_norm": 1.8679273089411261, |
| "learning_rate": 3.8757396449704137e-07, |
| "loss": 1.5135, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.3085106382978724, |
| "grad_norm": 2.6124670473785723, |
| "learning_rate": 3.8609467455621297e-07, |
| "loss": 1.4419, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.3111702127659575, |
| "grad_norm": 1.531497234704401, |
| "learning_rate": 3.8461538461538463e-07, |
| "loss": 1.441, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.3138297872340425, |
| "grad_norm": 1.6983808183380165, |
| "learning_rate": 3.8313609467455623e-07, |
| "loss": 1.3176, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.3164893617021276, |
| "grad_norm": 1.7106971746124235, |
| "learning_rate": 3.816568047337278e-07, |
| "loss": 1.2673, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.3191489361702127, |
| "grad_norm": 1.7661676163840787, |
| "learning_rate": 3.801775147928994e-07, |
| "loss": 1.6258, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.3218085106382977, |
| "grad_norm": 1.6248132891862335, |
| "learning_rate": 3.78698224852071e-07, |
| "loss": 1.3813, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.324468085106383, |
| "grad_norm": 1.5079876101311178, |
| "learning_rate": 3.772189349112426e-07, |
| "loss": 1.3491, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.327127659574468, |
| "grad_norm": 1.9080784267885529, |
| "learning_rate": 3.757396449704142e-07, |
| "loss": 1.4263, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.3297872340425532, |
| "grad_norm": 1.7134136936747053, |
| "learning_rate": 3.742603550295858e-07, |
| "loss": 1.4804, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.3324468085106382, |
| "grad_norm": 2.6890220767611934, |
| "learning_rate": 3.727810650887574e-07, |
| "loss": 1.4301, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.3351063829787235, |
| "grad_norm": 1.4491614153026324, |
| "learning_rate": 3.71301775147929e-07, |
| "loss": 1.2226, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.3377659574468086, |
| "grad_norm": 1.4673050610910694, |
| "learning_rate": 3.698224852071006e-07, |
| "loss": 1.2824, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.3404255319148937, |
| "grad_norm": 1.5811077672143066, |
| "learning_rate": 3.683431952662722e-07, |
| "loss": 1.4056, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.3430851063829787, |
| "grad_norm": 1.784207214911482, |
| "learning_rate": 3.6686390532544374e-07, |
| "loss": 1.4456, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.3457446808510638, |
| "grad_norm": 1.7545013437687231, |
| "learning_rate": 3.6538461538461534e-07, |
| "loss": 1.4255, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.3484042553191489, |
| "grad_norm": 1.5378814658235478, |
| "learning_rate": 3.63905325443787e-07, |
| "loss": 1.4752, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.351063829787234, |
| "grad_norm": 1.5663338737224375, |
| "learning_rate": 3.624260355029586e-07, |
| "loss": 1.4324, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.3537234042553192, |
| "grad_norm": 1.5530747526395428, |
| "learning_rate": 3.609467455621302e-07, |
| "loss": 1.3294, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.3563829787234043, |
| "grad_norm": 1.5555181536643647, |
| "learning_rate": 3.5946745562130175e-07, |
| "loss": 1.2615, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.3590425531914894, |
| "grad_norm": 1.561907923100703, |
| "learning_rate": 3.5798816568047335e-07, |
| "loss": 1.4247, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.3617021276595744, |
| "grad_norm": 1.561727561754077, |
| "learning_rate": 3.5650887573964495e-07, |
| "loss": 1.442, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.3643617021276595, |
| "grad_norm": 1.571729864924405, |
| "learning_rate": 3.5502958579881655e-07, |
| "loss": 1.3471, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.3670212765957448, |
| "grad_norm": 1.6501651767936791, |
| "learning_rate": 3.535502958579881e-07, |
| "loss": 1.4957, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.3696808510638299, |
| "grad_norm": 1.7712985007484374, |
| "learning_rate": 3.5207100591715975e-07, |
| "loss": 1.3116, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.372340425531915, |
| "grad_norm": 1.6021754882790804, |
| "learning_rate": 3.5059171597633135e-07, |
| "loss": 1.3507, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.375, |
| "grad_norm": 1.9744682223829157, |
| "learning_rate": 3.4911242603550296e-07, |
| "loss": 1.3187, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.377659574468085, |
| "grad_norm": 1.437548678030046, |
| "learning_rate": 3.4763313609467456e-07, |
| "loss": 1.3055, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.3803191489361701, |
| "grad_norm": 1.7376163882785898, |
| "learning_rate": 3.461538461538461e-07, |
| "loss": 1.3712, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.3829787234042552, |
| "grad_norm": 1.709895613646418, |
| "learning_rate": 3.446745562130177e-07, |
| "loss": 1.4941, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.3856382978723405, |
| "grad_norm": 1.5064773577923485, |
| "learning_rate": 3.431952662721893e-07, |
| "loss": 1.3598, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.3882978723404256, |
| "grad_norm": 1.6991123209979573, |
| "learning_rate": 3.4171597633136096e-07, |
| "loss": 1.3859, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.3909574468085106, |
| "grad_norm": 1.611358975201833, |
| "learning_rate": 3.4023668639053256e-07, |
| "loss": 1.3624, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.3936170212765957, |
| "grad_norm": 1.5235030722566782, |
| "learning_rate": 3.387573964497041e-07, |
| "loss": 1.306, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.3962765957446808, |
| "grad_norm": 1.5097567026286727, |
| "learning_rate": 3.372781065088757e-07, |
| "loss": 1.3098, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.398936170212766, |
| "grad_norm": 1.5501867735527708, |
| "learning_rate": 3.357988165680473e-07, |
| "loss": 1.2582, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.4015957446808511, |
| "grad_norm": 1.5737400889065642, |
| "learning_rate": 3.343195266272189e-07, |
| "loss": 1.4226, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.4042553191489362, |
| "grad_norm": 1.8163702192116935, |
| "learning_rate": 3.328402366863905e-07, |
| "loss": 1.45, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.4069148936170213, |
| "grad_norm": 1.6761526127572786, |
| "learning_rate": 3.313609467455621e-07, |
| "loss": 1.4133, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.4095744680851063, |
| "grad_norm": 1.7300976770863319, |
| "learning_rate": 3.298816568047337e-07, |
| "loss": 1.5036, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.4122340425531914, |
| "grad_norm": 1.7219520565452116, |
| "learning_rate": 3.284023668639053e-07, |
| "loss": 1.4172, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.4148936170212765, |
| "grad_norm": 1.8137826656078981, |
| "learning_rate": 3.269230769230769e-07, |
| "loss": 1.5673, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.4175531914893618, |
| "grad_norm": 1.9605494871424245, |
| "learning_rate": 3.254437869822485e-07, |
| "loss": 1.4421, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.4202127659574468, |
| "grad_norm": 1.5063443324517625, |
| "learning_rate": 3.239644970414201e-07, |
| "loss": 1.3858, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.422872340425532, |
| "grad_norm": 1.5929428001187216, |
| "learning_rate": 3.224852071005917e-07, |
| "loss": 1.4245, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.425531914893617, |
| "grad_norm": 1.5090052181328104, |
| "learning_rate": 3.210059171597633e-07, |
| "loss": 1.185, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.4281914893617023, |
| "grad_norm": 1.7599894966549008, |
| "learning_rate": 3.1952662721893493e-07, |
| "loss": 1.5936, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.4308510638297873, |
| "grad_norm": 1.8274682976599146, |
| "learning_rate": 3.1804733727810653e-07, |
| "loss": 1.5133, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.4335106382978724, |
| "grad_norm": 1.6304863965807304, |
| "learning_rate": 3.165680473372781e-07, |
| "loss": 1.4513, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.4361702127659575, |
| "grad_norm": 1.865748149954226, |
| "learning_rate": 3.150887573964497e-07, |
| "loss": 1.579, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.4388297872340425, |
| "grad_norm": 1.497890260310679, |
| "learning_rate": 3.136094674556213e-07, |
| "loss": 1.3996, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.4414893617021276, |
| "grad_norm": 1.5505684579290944, |
| "learning_rate": 3.121301775147929e-07, |
| "loss": 1.4765, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.4441489361702127, |
| "grad_norm": 1.5934674629645669, |
| "learning_rate": 3.1065088757396443e-07, |
| "loss": 1.2206, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.4468085106382977, |
| "grad_norm": 2.5003698075483776, |
| "learning_rate": 3.091715976331361e-07, |
| "loss": 1.4785, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.449468085106383, |
| "grad_norm": 1.5430363507491573, |
| "learning_rate": 3.076923076923077e-07, |
| "loss": 1.3596, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.452127659574468, |
| "grad_norm": 1.6114525579321486, |
| "learning_rate": 3.062130177514793e-07, |
| "loss": 1.3768, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.4547872340425532, |
| "grad_norm": 1.51705181171149, |
| "learning_rate": 3.047337278106509e-07, |
| "loss": 1.3161, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.4574468085106382, |
| "grad_norm": 1.659706683154854, |
| "learning_rate": 3.0325443786982244e-07, |
| "loss": 1.4808, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.4601063829787235, |
| "grad_norm": 1.6484483474446856, |
| "learning_rate": 3.0177514792899404e-07, |
| "loss": 1.398, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.4627659574468086, |
| "grad_norm": 1.6054531570011474, |
| "learning_rate": 3.0029585798816564e-07, |
| "loss": 1.1421, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.4654255319148937, |
| "grad_norm": 1.5260699880356663, |
| "learning_rate": 2.988165680473373e-07, |
| "loss": 1.4223, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.4680851063829787, |
| "grad_norm": 1.5022650148070196, |
| "learning_rate": 2.973372781065089e-07, |
| "loss": 1.3579, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.4707446808510638, |
| "grad_norm": 1.696210632092268, |
| "learning_rate": 2.9585798816568045e-07, |
| "loss": 1.4437, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.4734042553191489, |
| "grad_norm": 1.50505509525979, |
| "learning_rate": 2.9437869822485205e-07, |
| "loss": 1.3666, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.476063829787234, |
| "grad_norm": 1.6283581586889138, |
| "learning_rate": 2.9289940828402365e-07, |
| "loss": 1.3807, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.4787234042553192, |
| "grad_norm": 1.57845733466985, |
| "learning_rate": 2.9142011834319525e-07, |
| "loss": 1.4947, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.4813829787234043, |
| "grad_norm": 1.6269594263364617, |
| "learning_rate": 2.8994082840236686e-07, |
| "loss": 1.5315, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.4840425531914894, |
| "grad_norm": 1.4901674188093539, |
| "learning_rate": 2.884615384615384e-07, |
| "loss": 1.2067, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.4867021276595744, |
| "grad_norm": 1.608926803251607, |
| "learning_rate": 2.8698224852071006e-07, |
| "loss": 1.4501, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.4893617021276595, |
| "grad_norm": 1.5736272188001768, |
| "learning_rate": 2.8550295857988166e-07, |
| "loss": 1.4938, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.4920212765957448, |
| "grad_norm": 1.6178988306695008, |
| "learning_rate": 2.8402366863905326e-07, |
| "loss": 1.2858, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.4946808510638299, |
| "grad_norm": 1.612098241628475, |
| "learning_rate": 2.8254437869822486e-07, |
| "loss": 1.3793, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.497340425531915, |
| "grad_norm": 1.521850228548639, |
| "learning_rate": 2.810650887573964e-07, |
| "loss": 1.3616, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 1.4283693834886921, |
| "learning_rate": 2.79585798816568e-07, |
| "loss": 1.2373, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.502659574468085, |
| "grad_norm": 1.4614575118454327, |
| "learning_rate": 2.781065088757396e-07, |
| "loss": 1.3506, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.5053191489361701, |
| "grad_norm": 4.833934856122629, |
| "learning_rate": 2.7662721893491127e-07, |
| "loss": 1.3368, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.5079787234042552, |
| "grad_norm": 1.5417407593664367, |
| "learning_rate": 2.7514792899408287e-07, |
| "loss": 1.3806, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.5106382978723403, |
| "grad_norm": 1.3942611390001125, |
| "learning_rate": 2.736686390532544e-07, |
| "loss": 1.2778, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.5132978723404256, |
| "grad_norm": 1.5232973474443783, |
| "learning_rate": 2.72189349112426e-07, |
| "loss": 1.5106, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.5159574468085106, |
| "grad_norm": 1.6181295111494955, |
| "learning_rate": 2.707100591715976e-07, |
| "loss": 1.3182, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.5186170212765957, |
| "grad_norm": 1.4905875051329172, |
| "learning_rate": 2.692307692307692e-07, |
| "loss": 1.359, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.521276595744681, |
| "grad_norm": 1.5438422326091557, |
| "learning_rate": 2.6775147928994077e-07, |
| "loss": 1.4581, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.523936170212766, |
| "grad_norm": 1.6689444553647594, |
| "learning_rate": 2.662721893491124e-07, |
| "loss": 1.4416, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.5265957446808511, |
| "grad_norm": 1.732092721800618, |
| "learning_rate": 2.6479289940828403e-07, |
| "loss": 1.4653, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.5292553191489362, |
| "grad_norm": 1.5939357125781168, |
| "learning_rate": 2.6331360946745563e-07, |
| "loss": 1.3659, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.5319148936170213, |
| "grad_norm": 1.619819379203523, |
| "learning_rate": 2.6183431952662723e-07, |
| "loss": 1.4057, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.5345744680851063, |
| "grad_norm": 1.5228031500567076, |
| "learning_rate": 2.603550295857988e-07, |
| "loss": 1.3322, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.5372340425531914, |
| "grad_norm": 1.6403075138073668, |
| "learning_rate": 2.588757396449704e-07, |
| "loss": 1.3243, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.5398936170212765, |
| "grad_norm": 1.6158463818930031, |
| "learning_rate": 2.57396449704142e-07, |
| "loss": 1.3743, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.5425531914893615, |
| "grad_norm": 1.4401607766731626, |
| "learning_rate": 2.559171597633136e-07, |
| "loss": 1.3209, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.5452127659574468, |
| "grad_norm": 1.610458527778034, |
| "learning_rate": 2.5443786982248524e-07, |
| "loss": 1.437, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.547872340425532, |
| "grad_norm": 1.4720391313596763, |
| "learning_rate": 2.529585798816568e-07, |
| "loss": 1.2406, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.550531914893617, |
| "grad_norm": 1.4693642812943966, |
| "learning_rate": 2.514792899408284e-07, |
| "loss": 1.3345, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.5531914893617023, |
| "grad_norm": 1.6024699547818029, |
| "learning_rate": 2.5e-07, |
| "loss": 1.4164, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.5558510638297873, |
| "grad_norm": 1.602502091357314, |
| "learning_rate": 2.485207100591716e-07, |
| "loss": 1.4412, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.5585106382978724, |
| "grad_norm": 1.7241679714315328, |
| "learning_rate": 2.470414201183432e-07, |
| "loss": 1.331, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.5611702127659575, |
| "grad_norm": 1.7371187244572857, |
| "learning_rate": 2.455621301775148e-07, |
| "loss": 1.4532, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.5638297872340425, |
| "grad_norm": 1.4995956670676633, |
| "learning_rate": 2.440828402366864e-07, |
| "loss": 1.2702, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.5664893617021276, |
| "grad_norm": 1.4659221291046236, |
| "learning_rate": 2.4260355029585794e-07, |
| "loss": 1.4754, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.5691489361702127, |
| "grad_norm": 1.5385805721266792, |
| "learning_rate": 2.411242603550296e-07, |
| "loss": 1.5509, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.5718085106382977, |
| "grad_norm": 1.5161262548508925, |
| "learning_rate": 2.396449704142012e-07, |
| "loss": 1.3936, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.574468085106383, |
| "grad_norm": 1.4666556990097799, |
| "learning_rate": 2.3816568047337277e-07, |
| "loss": 1.3143, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.577127659574468, |
| "grad_norm": 1.6300523417207398, |
| "learning_rate": 2.3668639053254435e-07, |
| "loss": 1.3385, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.5797872340425532, |
| "grad_norm": 1.5121985962743036, |
| "learning_rate": 2.3520710059171598e-07, |
| "loss": 1.4693, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.5824468085106385, |
| "grad_norm": 1.6977627534281994, |
| "learning_rate": 2.3372781065088755e-07, |
| "loss": 1.5827, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.5851063829787235, |
| "grad_norm": 1.4675438168952388, |
| "learning_rate": 2.3224852071005915e-07, |
| "loss": 1.4037, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.5877659574468086, |
| "grad_norm": 1.5087620408684652, |
| "learning_rate": 2.3076923076923078e-07, |
| "loss": 1.2713, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.5904255319148937, |
| "grad_norm": 1.853798719037303, |
| "learning_rate": 2.2928994082840236e-07, |
| "loss": 1.4517, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.5930851063829787, |
| "grad_norm": 1.5624391162454545, |
| "learning_rate": 2.2781065088757396e-07, |
| "loss": 1.5716, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.5957446808510638, |
| "grad_norm": 1.5647362537380562, |
| "learning_rate": 2.2633136094674553e-07, |
| "loss": 1.2679, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.5984042553191489, |
| "grad_norm": 1.5028293469540326, |
| "learning_rate": 2.2485207100591716e-07, |
| "loss": 1.3477, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.601063829787234, |
| "grad_norm": 1.5616178692766567, |
| "learning_rate": 2.2337278106508876e-07, |
| "loss": 1.349, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.603723404255319, |
| "grad_norm": 1.5652068533404448, |
| "learning_rate": 2.2189349112426034e-07, |
| "loss": 1.3408, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.6063829787234043, |
| "grad_norm": 1.8312281167867779, |
| "learning_rate": 2.2041420118343194e-07, |
| "loss": 1.5744, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.6090425531914894, |
| "grad_norm": 1.5113532834536092, |
| "learning_rate": 2.1893491124260354e-07, |
| "loss": 1.5116, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.6117021276595744, |
| "grad_norm": 1.4148002933798485, |
| "learning_rate": 2.1745562130177514e-07, |
| "loss": 1.3254, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.6143617021276597, |
| "grad_norm": 1.4128390757612144, |
| "learning_rate": 2.1597633136094672e-07, |
| "loss": 1.3424, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.6170212765957448, |
| "grad_norm": 1.664151543039297, |
| "learning_rate": 2.1449704142011834e-07, |
| "loss": 1.4507, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.6196808510638299, |
| "grad_norm": 1.5001892924079347, |
| "learning_rate": 2.1301775147928995e-07, |
| "loss": 1.3598, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.622340425531915, |
| "grad_norm": 1.7189011247258703, |
| "learning_rate": 2.1153846153846152e-07, |
| "loss": 1.4798, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.625, |
| "grad_norm": 1.4495039913652832, |
| "learning_rate": 2.1005917159763312e-07, |
| "loss": 1.1879, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.627659574468085, |
| "grad_norm": 1.4863964571390131, |
| "learning_rate": 2.0857988165680472e-07, |
| "loss": 1.4149, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.6303191489361701, |
| "grad_norm": 1.470842696782351, |
| "learning_rate": 2.0710059171597633e-07, |
| "loss": 1.5213, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.6329787234042552, |
| "grad_norm": 1.5332931589309218, |
| "learning_rate": 2.0562130177514793e-07, |
| "loss": 1.3847, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.6356382978723403, |
| "grad_norm": 1.5012230655181953, |
| "learning_rate": 2.041420118343195e-07, |
| "loss": 1.2194, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.6382978723404256, |
| "grad_norm": 1.4592244922211661, |
| "learning_rate": 2.0266272189349113e-07, |
| "loss": 1.2863, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.6409574468085106, |
| "grad_norm": 1.6194968573694928, |
| "learning_rate": 2.011834319526627e-07, |
| "loss": 1.563, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.6436170212765957, |
| "grad_norm": 1.5398995693701385, |
| "learning_rate": 1.997041420118343e-07, |
| "loss": 1.5, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.646276595744681, |
| "grad_norm": 1.803830954994613, |
| "learning_rate": 1.9822485207100593e-07, |
| "loss": 1.3459, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.648936170212766, |
| "grad_norm": 1.5731270083148248, |
| "learning_rate": 1.967455621301775e-07, |
| "loss": 1.3277, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.6515957446808511, |
| "grad_norm": 1.6370008858204694, |
| "learning_rate": 1.952662721893491e-07, |
| "loss": 1.4752, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.6542553191489362, |
| "grad_norm": 1.3905339157621093, |
| "learning_rate": 1.9378698224852069e-07, |
| "loss": 1.1591, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.6569148936170213, |
| "grad_norm": 1.521784820078054, |
| "learning_rate": 1.9230769230769231e-07, |
| "loss": 1.348, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.6595744680851063, |
| "grad_norm": 2.480779673395715, |
| "learning_rate": 1.908284023668639e-07, |
| "loss": 1.3468, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.6622340425531914, |
| "grad_norm": 1.5047866424190777, |
| "learning_rate": 1.893491124260355e-07, |
| "loss": 1.3808, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.6648936170212765, |
| "grad_norm": 1.5186127777273435, |
| "learning_rate": 1.878698224852071e-07, |
| "loss": 1.4201, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.6675531914893615, |
| "grad_norm": 1.4407427328000266, |
| "learning_rate": 1.863905325443787e-07, |
| "loss": 1.273, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.6702127659574468, |
| "grad_norm": 1.5224116074533014, |
| "learning_rate": 1.849112426035503e-07, |
| "loss": 1.2098, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.672872340425532, |
| "grad_norm": 1.527239003211648, |
| "learning_rate": 1.8343195266272187e-07, |
| "loss": 1.3724, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.675531914893617, |
| "grad_norm": 1.6525871512419401, |
| "learning_rate": 1.819526627218935e-07, |
| "loss": 1.3946, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.6781914893617023, |
| "grad_norm": 3.0200043340992933, |
| "learning_rate": 1.804733727810651e-07, |
| "loss": 1.4742, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.6808510638297873, |
| "grad_norm": 1.5029965510376364, |
| "learning_rate": 1.7899408284023667e-07, |
| "loss": 1.3623, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.6835106382978724, |
| "grad_norm": 1.5389625013367383, |
| "learning_rate": 1.7751479289940827e-07, |
| "loss": 1.5043, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.6861702127659575, |
| "grad_norm": 1.5608661501656413, |
| "learning_rate": 1.7603550295857988e-07, |
| "loss": 1.2883, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.6888297872340425, |
| "grad_norm": 1.6847845057440693, |
| "learning_rate": 1.7455621301775148e-07, |
| "loss": 1.4244, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.6914893617021276, |
| "grad_norm": 1.5793904433648327, |
| "learning_rate": 1.7307692307692305e-07, |
| "loss": 1.4062, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.6941489361702127, |
| "grad_norm": 1.4350293530642095, |
| "learning_rate": 1.7159763313609465e-07, |
| "loss": 1.2754, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.6968085106382977, |
| "grad_norm": 1.902506858522582, |
| "learning_rate": 1.7011834319526628e-07, |
| "loss": 1.4541, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.699468085106383, |
| "grad_norm": 1.478754263683889, |
| "learning_rate": 1.6863905325443786e-07, |
| "loss": 1.3463, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.702127659574468, |
| "grad_norm": 1.6464724285737642, |
| "learning_rate": 1.6715976331360946e-07, |
| "loss": 1.3807, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.7047872340425532, |
| "grad_norm": 1.6125752749357112, |
| "learning_rate": 1.6568047337278106e-07, |
| "loss": 1.2933, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.7074468085106385, |
| "grad_norm": 1.5928623495071816, |
| "learning_rate": 1.6420118343195266e-07, |
| "loss": 1.4326, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.7101063829787235, |
| "grad_norm": 1.5193190242572798, |
| "learning_rate": 1.6272189349112426e-07, |
| "loss": 1.3588, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.7127659574468086, |
| "grad_norm": 1.5482920311769846, |
| "learning_rate": 1.6124260355029584e-07, |
| "loss": 1.3839, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.7154255319148937, |
| "grad_norm": 1.8407335336806905, |
| "learning_rate": 1.5976331360946747e-07, |
| "loss": 1.3248, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.7180851063829787, |
| "grad_norm": 1.6055785649743377, |
| "learning_rate": 1.5828402366863904e-07, |
| "loss": 1.3872, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.7207446808510638, |
| "grad_norm": 1.6297496194969232, |
| "learning_rate": 1.5680473372781064e-07, |
| "loss": 1.389, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.7234042553191489, |
| "grad_norm": 1.577321745146047, |
| "learning_rate": 1.5532544378698222e-07, |
| "loss": 1.1947, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.726063829787234, |
| "grad_norm": 1.6447713137577962, |
| "learning_rate": 1.5384615384615385e-07, |
| "loss": 1.2652, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.728723404255319, |
| "grad_norm": 1.6234194331407543, |
| "learning_rate": 1.5236686390532545e-07, |
| "loss": 1.4239, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.7313829787234043, |
| "grad_norm": 1.532776130454777, |
| "learning_rate": 1.5088757396449702e-07, |
| "loss": 1.3875, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.7340425531914894, |
| "grad_norm": 1.4837535962878305, |
| "learning_rate": 1.4940828402366865e-07, |
| "loss": 1.2059, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.7367021276595744, |
| "grad_norm": 1.5395205053467318, |
| "learning_rate": 1.4792899408284022e-07, |
| "loss": 1.3513, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.7393617021276597, |
| "grad_norm": 1.4112077844892696, |
| "learning_rate": 1.4644970414201183e-07, |
| "loss": 1.3336, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.7420212765957448, |
| "grad_norm": 1.481010800777514, |
| "learning_rate": 1.4497041420118343e-07, |
| "loss": 1.4028, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.7446808510638299, |
| "grad_norm": 1.4564408238676725, |
| "learning_rate": 1.4349112426035503e-07, |
| "loss": 1.3502, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.747340425531915, |
| "grad_norm": 1.6956227102239596, |
| "learning_rate": 1.4201183431952663e-07, |
| "loss": 1.5672, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 1.5705454639314052, |
| "learning_rate": 1.405325443786982e-07, |
| "loss": 1.4109, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.752659574468085, |
| "grad_norm": 1.5656622358755812, |
| "learning_rate": 1.390532544378698e-07, |
| "loss": 1.557, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.7553191489361701, |
| "grad_norm": 1.8848625197729474, |
| "learning_rate": 1.3757396449704143e-07, |
| "loss": 1.4017, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.7579787234042552, |
| "grad_norm": 1.4196764538431994, |
| "learning_rate": 1.36094674556213e-07, |
| "loss": 1.2331, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.7606382978723403, |
| "grad_norm": 1.4675927168298655, |
| "learning_rate": 1.346153846153846e-07, |
| "loss": 1.4689, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.7632978723404256, |
| "grad_norm": 1.6895719453339277, |
| "learning_rate": 1.331360946745562e-07, |
| "loss": 1.6055, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.7659574468085106, |
| "grad_norm": 1.6565509018980442, |
| "learning_rate": 1.3165680473372781e-07, |
| "loss": 1.346, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.7686170212765957, |
| "grad_norm": 1.6111421234975374, |
| "learning_rate": 1.301775147928994e-07, |
| "loss": 1.3318, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.771276595744681, |
| "grad_norm": 1.5477525938145107, |
| "learning_rate": 1.28698224852071e-07, |
| "loss": 1.4311, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.773936170212766, |
| "grad_norm": 1.4344548853484294, |
| "learning_rate": 1.2721893491124262e-07, |
| "loss": 1.4168, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.7765957446808511, |
| "grad_norm": 2.002400150167084, |
| "learning_rate": 1.257396449704142e-07, |
| "loss": 1.5304, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.7792553191489362, |
| "grad_norm": 1.6203137830914942, |
| "learning_rate": 1.242603550295858e-07, |
| "loss": 1.4902, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.7819148936170213, |
| "grad_norm": 1.653101321305009, |
| "learning_rate": 1.227810650887574e-07, |
| "loss": 1.523, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.7845744680851063, |
| "grad_norm": 1.4583067028702263, |
| "learning_rate": 1.2130177514792897e-07, |
| "loss": 1.3307, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.7872340425531914, |
| "grad_norm": 1.4416958484378999, |
| "learning_rate": 1.198224852071006e-07, |
| "loss": 1.2879, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.7898936170212765, |
| "grad_norm": 1.5342015216907867, |
| "learning_rate": 1.1834319526627217e-07, |
| "loss": 1.3491, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.7925531914893615, |
| "grad_norm": 1.5120417917571398, |
| "learning_rate": 1.1686390532544378e-07, |
| "loss": 1.5533, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.7952127659574468, |
| "grad_norm": 1.6448669091043147, |
| "learning_rate": 1.1538461538461539e-07, |
| "loss": 1.4507, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.797872340425532, |
| "grad_norm": 1.5744246355313867, |
| "learning_rate": 1.1390532544378698e-07, |
| "loss": 1.4762, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.800531914893617, |
| "grad_norm": 1.407351126310039, |
| "learning_rate": 1.1242603550295858e-07, |
| "loss": 1.2665, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.8031914893617023, |
| "grad_norm": 1.4428356495487928, |
| "learning_rate": 1.1094674556213017e-07, |
| "loss": 1.4222, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.8058510638297873, |
| "grad_norm": 1.4978022369408812, |
| "learning_rate": 1.0946745562130177e-07, |
| "loss": 1.3571, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.8085106382978724, |
| "grad_norm": 1.608694580830846, |
| "learning_rate": 1.0798816568047336e-07, |
| "loss": 1.2468, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.8111702127659575, |
| "grad_norm": 1.3671652219864612, |
| "learning_rate": 1.0650887573964497e-07, |
| "loss": 1.1918, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.8138297872340425, |
| "grad_norm": 1.5436563625586248, |
| "learning_rate": 1.0502958579881656e-07, |
| "loss": 1.3447, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.8164893617021276, |
| "grad_norm": 2.0668175329496448, |
| "learning_rate": 1.0355029585798816e-07, |
| "loss": 1.2851, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.8191489361702127, |
| "grad_norm": 1.4711737418040087, |
| "learning_rate": 1.0207100591715975e-07, |
| "loss": 1.4054, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.8218085106382977, |
| "grad_norm": 1.628475068104997, |
| "learning_rate": 1.0059171597633135e-07, |
| "loss": 1.2297, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.824468085106383, |
| "grad_norm": 1.6652537635356375, |
| "learning_rate": 9.911242603550297e-08, |
| "loss": 1.4249, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.827127659574468, |
| "grad_norm": 1.4549454801379844, |
| "learning_rate": 9.763313609467456e-08, |
| "loss": 1.4738, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.8297872340425532, |
| "grad_norm": 1.4571125733944477, |
| "learning_rate": 9.615384615384616e-08, |
| "loss": 1.2531, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.8324468085106385, |
| "grad_norm": 1.4934710030590315, |
| "learning_rate": 9.467455621301774e-08, |
| "loss": 1.4224, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.8351063829787235, |
| "grad_norm": 1.5068998589001918, |
| "learning_rate": 9.319526627218935e-08, |
| "loss": 1.4137, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.8377659574468086, |
| "grad_norm": 1.5592030646382606, |
| "learning_rate": 9.171597633136093e-08, |
| "loss": 1.4923, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.8404255319148937, |
| "grad_norm": 1.5420672523438603, |
| "learning_rate": 9.023668639053255e-08, |
| "loss": 1.3542, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.8430851063829787, |
| "grad_norm": 1.4933658760362354, |
| "learning_rate": 8.875739644970414e-08, |
| "loss": 1.4062, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.8457446808510638, |
| "grad_norm": 2.1197107348039648, |
| "learning_rate": 8.727810650887574e-08, |
| "loss": 1.3514, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.8484042553191489, |
| "grad_norm": 1.420310868366173, |
| "learning_rate": 8.579881656804733e-08, |
| "loss": 1.3865, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.851063829787234, |
| "grad_norm": 2.1476526664851083, |
| "learning_rate": 8.431952662721893e-08, |
| "loss": 1.1886, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.853723404255319, |
| "grad_norm": 1.3847908910859454, |
| "learning_rate": 8.284023668639053e-08, |
| "loss": 1.4107, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.8563829787234043, |
| "grad_norm": 1.6527903429011437, |
| "learning_rate": 8.136094674556213e-08, |
| "loss": 1.2876, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.8590425531914894, |
| "grad_norm": 1.5745014854949893, |
| "learning_rate": 7.988165680473373e-08, |
| "loss": 1.4558, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.8617021276595744, |
| "grad_norm": 1.5350363492855568, |
| "learning_rate": 7.840236686390532e-08, |
| "loss": 1.4523, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.8643617021276597, |
| "grad_norm": 1.4853786087332579, |
| "learning_rate": 7.692307692307692e-08, |
| "loss": 1.3292, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.8670212765957448, |
| "grad_norm": 1.4473821719214552, |
| "learning_rate": 7.544378698224851e-08, |
| "loss": 1.2034, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.8696808510638299, |
| "grad_norm": 1.4659266830367277, |
| "learning_rate": 7.396449704142011e-08, |
| "loss": 1.2584, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.872340425531915, |
| "grad_norm": 1.4759466915583441, |
| "learning_rate": 7.248520710059171e-08, |
| "loss": 1.3187, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 2.111257320773056, |
| "learning_rate": 7.100591715976332e-08, |
| "loss": 1.3323, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.877659574468085, |
| "grad_norm": 1.5831480252428458, |
| "learning_rate": 6.95266272189349e-08, |
| "loss": 1.4302, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.8803191489361701, |
| "grad_norm": 1.6086043948043176, |
| "learning_rate": 6.80473372781065e-08, |
| "loss": 1.4017, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.8829787234042552, |
| "grad_norm": 2.0849492736061332, |
| "learning_rate": 6.65680473372781e-08, |
| "loss": 1.5211, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.8856382978723403, |
| "grad_norm": 1.5043217865201886, |
| "learning_rate": 6.50887573964497e-08, |
| "loss": 1.2166, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.8882978723404256, |
| "grad_norm": 1.5612635488662876, |
| "learning_rate": 6.360946745562131e-08, |
| "loss": 1.3481, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.8909574468085106, |
| "grad_norm": 1.4947402449036076, |
| "learning_rate": 6.21301775147929e-08, |
| "loss": 1.1988, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.8936170212765957, |
| "grad_norm": 1.7123431001612024, |
| "learning_rate": 6.065088757396449e-08, |
| "loss": 1.6215, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.896276595744681, |
| "grad_norm": 1.5722027689056413, |
| "learning_rate": 5.917159763313609e-08, |
| "loss": 1.435, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.898936170212766, |
| "grad_norm": 1.5736347184744337, |
| "learning_rate": 5.7692307692307695e-08, |
| "loss": 1.4243, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.9015957446808511, |
| "grad_norm": 1.4558857769282714, |
| "learning_rate": 5.621301775147929e-08, |
| "loss": 1.3732, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.9042553191489362, |
| "grad_norm": 1.430432818475582, |
| "learning_rate": 5.4733727810650885e-08, |
| "loss": 1.2582, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.9069148936170213, |
| "grad_norm": 1.4010572562597123, |
| "learning_rate": 5.3254437869822486e-08, |
| "loss": 1.2036, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.9095744680851063, |
| "grad_norm": 1.5030183623430164, |
| "learning_rate": 5.177514792899408e-08, |
| "loss": 1.3348, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.9122340425531914, |
| "grad_norm": 1.5264425521471463, |
| "learning_rate": 5.0295857988165676e-08, |
| "loss": 1.3486, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.9148936170212765, |
| "grad_norm": 1.6568867880777098, |
| "learning_rate": 4.881656804733728e-08, |
| "loss": 1.5102, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.9175531914893615, |
| "grad_norm": 1.426877705408139, |
| "learning_rate": 4.733727810650887e-08, |
| "loss": 1.2843, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.9202127659574468, |
| "grad_norm": 1.5745176121540452, |
| "learning_rate": 4.585798816568047e-08, |
| "loss": 1.5892, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.922872340425532, |
| "grad_norm": 1.5299374151207628, |
| "learning_rate": 4.437869822485207e-08, |
| "loss": 1.2833, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.925531914893617, |
| "grad_norm": 1.5485924328569498, |
| "learning_rate": 4.2899408284023664e-08, |
| "loss": 1.2528, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.9281914893617023, |
| "grad_norm": 1.5650812571579016, |
| "learning_rate": 4.1420118343195265e-08, |
| "loss": 1.4623, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.9308510638297873, |
| "grad_norm": 1.4874887834654986, |
| "learning_rate": 3.9940828402366866e-08, |
| "loss": 1.331, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.9335106382978724, |
| "grad_norm": 2.2386953559992606, |
| "learning_rate": 3.846153846153846e-08, |
| "loss": 1.3, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.9361702127659575, |
| "grad_norm": 1.6479534866842882, |
| "learning_rate": 3.6982248520710056e-08, |
| "loss": 1.5275, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.9388297872340425, |
| "grad_norm": 1.3421164941268597, |
| "learning_rate": 3.550295857988166e-08, |
| "loss": 1.2612, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.9414893617021276, |
| "grad_norm": 1.4738714329564195, |
| "learning_rate": 3.402366863905325e-08, |
| "loss": 1.4314, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.9441489361702127, |
| "grad_norm": 1.44156180682146, |
| "learning_rate": 3.254437869822485e-08, |
| "loss": 1.3338, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.9468085106382977, |
| "grad_norm": 1.5784061294788538, |
| "learning_rate": 3.106508875739645e-08, |
| "loss": 1.3386, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.949468085106383, |
| "grad_norm": 1.3371318363907538, |
| "learning_rate": 2.9585798816568044e-08, |
| "loss": 1.3028, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.952127659574468, |
| "grad_norm": 1.6852290803170833, |
| "learning_rate": 2.8106508875739645e-08, |
| "loss": 1.4942, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.9547872340425532, |
| "grad_norm": 1.4000270483265091, |
| "learning_rate": 2.6627218934911243e-08, |
| "loss": 1.2736, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.9574468085106385, |
| "grad_norm": 1.5150814064740574, |
| "learning_rate": 2.5147928994082838e-08, |
| "loss": 1.3892, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.9601063829787235, |
| "grad_norm": 1.6902820824629503, |
| "learning_rate": 2.3668639053254436e-08, |
| "loss": 1.4473, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.9627659574468086, |
| "grad_norm": 1.5540380607077866, |
| "learning_rate": 2.2189349112426034e-08, |
| "loss": 1.4118, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.9654255319148937, |
| "grad_norm": 1.7104646212150858, |
| "learning_rate": 2.0710059171597633e-08, |
| "loss": 1.3168, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.9680851063829787, |
| "grad_norm": 2.2605324172049865, |
| "learning_rate": 1.923076923076923e-08, |
| "loss": 1.5395, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.9707446808510638, |
| "grad_norm": 1.5974851484011308, |
| "learning_rate": 1.775147928994083e-08, |
| "loss": 1.6206, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.9734042553191489, |
| "grad_norm": 1.5065611553522427, |
| "learning_rate": 1.6272189349112424e-08, |
| "loss": 1.282, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.976063829787234, |
| "grad_norm": 1.5885436344655675, |
| "learning_rate": 1.4792899408284022e-08, |
| "loss": 1.3356, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.978723404255319, |
| "grad_norm": 1.503074753014641, |
| "learning_rate": 1.3313609467455622e-08, |
| "loss": 1.4519, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.9813829787234043, |
| "grad_norm": 1.4858939981761545, |
| "learning_rate": 1.1834319526627218e-08, |
| "loss": 1.4708, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.9840425531914894, |
| "grad_norm": 1.5483339923710784, |
| "learning_rate": 1.0355029585798816e-08, |
| "loss": 1.4574, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.9867021276595744, |
| "grad_norm": 1.4527735951794787, |
| "learning_rate": 8.875739644970414e-09, |
| "loss": 1.2918, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.9893617021276597, |
| "grad_norm": 1.6044461968692099, |
| "learning_rate": 7.396449704142011e-09, |
| "loss": 1.4377, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.9920212765957448, |
| "grad_norm": 1.530330381812826, |
| "learning_rate": 5.917159763313609e-09, |
| "loss": 1.4002, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.9946808510638299, |
| "grad_norm": 1.5188564969623919, |
| "learning_rate": 4.437869822485207e-09, |
| "loss": 1.3378, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.997340425531915, |
| "grad_norm": 1.470529168569605, |
| "learning_rate": 2.9585798816568045e-09, |
| "loss": 1.3217, |
| "step": 751 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.4650169982184404, |
| "learning_rate": 1.4792899408284023e-09, |
| "loss": 1.3323, |
| "step": 752 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 752, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 3000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|