diff --git "a/checkpoint-10954/trainer_state.json" "b/checkpoint-10954/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-10954/trainer_state.json" @@ -0,0 +1,76711 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 10954, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 9.129085265656382e-05, + "grad_norm": 1.1155520677566528, + "learning_rate": 5.0000000000000004e-08, + "loss": 1.0833, + "step": 1 + }, + { + "epoch": 0.00018258170531312764, + "grad_norm": 1.221660852432251, + "learning_rate": 1.0000000000000001e-07, + "loss": 1.0445, + "step": 2 + }, + { + "epoch": 0.00027387255796969143, + "grad_norm": 1.244646430015564, + "learning_rate": 1.5000000000000002e-07, + "loss": 1.0489, + "step": 3 + }, + { + "epoch": 0.0003651634106262553, + "grad_norm": 1.2081717252731323, + "learning_rate": 2.0000000000000002e-07, + "loss": 1.0895, + "step": 4 + }, + { + "epoch": 0.00045645426328281907, + "grad_norm": 1.1786338090896606, + "learning_rate": 2.5000000000000004e-07, + "loss": 1.0168, + "step": 5 + }, + { + "epoch": 0.0005477451159393829, + "grad_norm": 1.0965907573699951, + "learning_rate": 3.0000000000000004e-07, + "loss": 1.0438, + "step": 6 + }, + { + "epoch": 0.0006390359685959466, + "grad_norm": 1.1698453426361084, + "learning_rate": 3.5000000000000004e-07, + "loss": 1.0732, + "step": 7 + }, + { + "epoch": 0.0007303268212525105, + "grad_norm": 1.1248061656951904, + "learning_rate": 4.0000000000000003e-07, + "loss": 1.0841, + "step": 8 + }, + { + "epoch": 0.0008216176739090743, + "grad_norm": 1.1473627090454102, + "learning_rate": 4.5000000000000003e-07, + "loss": 1.0695, + "step": 9 + }, + { + "epoch": 0.0009129085265656381, + "grad_norm": 1.0959899425506592, + "learning_rate": 5.000000000000001e-07, + "loss": 1.067, + "step": 10 + }, + { + "epoch": 0.001004199379222202, + "grad_norm": 1.1543772220611572, + "learning_rate": 5.5e-07, + "loss": 1.1099, + "step": 11 + }, + { + "epoch": 0.0010954902318787657, + "grad_norm": 1.144818663597107, + "learning_rate": 6.000000000000001e-07, + "loss": 1.0442, + "step": 12 + }, + { + "epoch": 0.0011867810845353295, + "grad_norm": 1.0776710510253906, + "learning_rate": 6.5e-07, + "loss": 0.9708, + "step": 13 + }, + { + "epoch": 0.0012780719371918933, + "grad_norm": 1.1011018753051758, + "learning_rate": 7.000000000000001e-07, + "loss": 1.0477, + "step": 14 + }, + { + "epoch": 0.001369362789848457, + "grad_norm": 1.0577869415283203, + "learning_rate": 7.5e-07, + "loss": 1.0301, + "step": 15 + }, + { + "epoch": 0.001460653642505021, + "grad_norm": 1.0282212495803833, + "learning_rate": 8.000000000000001e-07, + "loss": 0.9959, + "step": 16 + }, + { + "epoch": 0.0015519444951615849, + "grad_norm": 0.9805408120155334, + "learning_rate": 8.500000000000001e-07, + "loss": 0.9546, + "step": 17 + }, + { + "epoch": 0.0016432353478181487, + "grad_norm": 1.023559331893921, + "learning_rate": 9.000000000000001e-07, + "loss": 1.0118, + "step": 18 + }, + { + "epoch": 0.0017345262004747125, + "grad_norm": 0.9573562145233154, + "learning_rate": 9.500000000000001e-07, + "loss": 1.0583, + "step": 19 + }, + { + "epoch": 0.0018258170531312763, + "grad_norm": 0.9513068795204163, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.9979, + "step": 20 + }, + { + "epoch": 0.00191710790578784, + "grad_norm": 0.9268428087234497, + "learning_rate": 1.0500000000000001e-06, + "loss": 1.03, + "step": 21 + }, + { + "epoch": 0.002008398758444404, + "grad_norm": 0.8570038080215454, + "learning_rate": 1.1e-06, + "loss": 0.991, + "step": 22 + }, + { + "epoch": 0.0020996896111009676, + "grad_norm": 0.8453893661499023, + "learning_rate": 1.1500000000000002e-06, + "loss": 1.0249, + "step": 23 + }, + { + "epoch": 0.0021909804637575314, + "grad_norm": 0.8058479428291321, + "learning_rate": 1.2000000000000002e-06, + "loss": 1.0627, + "step": 24 + }, + { + "epoch": 0.0022822713164140952, + "grad_norm": 0.7557787299156189, + "learning_rate": 1.25e-06, + "loss": 0.9944, + "step": 25 + }, + { + "epoch": 0.002373562169070659, + "grad_norm": 0.7127640843391418, + "learning_rate": 1.3e-06, + "loss": 1.0255, + "step": 26 + }, + { + "epoch": 0.002464853021727223, + "grad_norm": 0.6897225975990295, + "learning_rate": 1.3500000000000002e-06, + "loss": 1.0516, + "step": 27 + }, + { + "epoch": 0.0025561438743837866, + "grad_norm": 0.6535264253616333, + "learning_rate": 1.4000000000000001e-06, + "loss": 1.0241, + "step": 28 + }, + { + "epoch": 0.0026474347270403504, + "grad_norm": 0.5920505523681641, + "learning_rate": 1.45e-06, + "loss": 0.9809, + "step": 29 + }, + { + "epoch": 0.002738725579696914, + "grad_norm": 0.5482279062271118, + "learning_rate": 1.5e-06, + "loss": 0.9232, + "step": 30 + }, + { + "epoch": 0.002830016432353478, + "grad_norm": 0.5773881077766418, + "learning_rate": 1.5500000000000002e-06, + "loss": 1.0115, + "step": 31 + }, + { + "epoch": 0.002921307285010042, + "grad_norm": 0.5399670600891113, + "learning_rate": 1.6000000000000001e-06, + "loss": 0.9799, + "step": 32 + }, + { + "epoch": 0.003012598137666606, + "grad_norm": 0.5379753112792969, + "learning_rate": 1.6500000000000003e-06, + "loss": 0.9903, + "step": 33 + }, + { + "epoch": 0.0031038889903231698, + "grad_norm": 0.4637337028980255, + "learning_rate": 1.7000000000000002e-06, + "loss": 0.8822, + "step": 34 + }, + { + "epoch": 0.0031951798429797336, + "grad_norm": 0.4830026626586914, + "learning_rate": 1.75e-06, + "loss": 0.9589, + "step": 35 + }, + { + "epoch": 0.0032864706956362974, + "grad_norm": 0.4807652235031128, + "learning_rate": 1.8000000000000001e-06, + "loss": 1.0171, + "step": 36 + }, + { + "epoch": 0.003377761548292861, + "grad_norm": 0.5044782161712646, + "learning_rate": 1.85e-06, + "loss": 0.984, + "step": 37 + }, + { + "epoch": 0.003469052400949425, + "grad_norm": 0.47416791319847107, + "learning_rate": 1.9000000000000002e-06, + "loss": 0.9777, + "step": 38 + }, + { + "epoch": 0.0035603432536059887, + "grad_norm": 0.4736081659793854, + "learning_rate": 1.9500000000000004e-06, + "loss": 1.0031, + "step": 39 + }, + { + "epoch": 0.0036516341062625525, + "grad_norm": 0.47097012400627136, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.8801, + "step": 40 + }, + { + "epoch": 0.0037429249589191163, + "grad_norm": 0.4400787055492401, + "learning_rate": 2.05e-06, + "loss": 0.964, + "step": 41 + }, + { + "epoch": 0.00383421581157568, + "grad_norm": 0.4522041082382202, + "learning_rate": 2.1000000000000002e-06, + "loss": 0.9762, + "step": 42 + }, + { + "epoch": 0.003925506664232244, + "grad_norm": 0.42608028650283813, + "learning_rate": 2.15e-06, + "loss": 0.9727, + "step": 43 + }, + { + "epoch": 0.004016797516888808, + "grad_norm": 0.41590142250061035, + "learning_rate": 2.2e-06, + "loss": 0.9012, + "step": 44 + }, + { + "epoch": 0.004108088369545372, + "grad_norm": 0.4263918697834015, + "learning_rate": 2.25e-06, + "loss": 0.8959, + "step": 45 + }, + { + "epoch": 0.004199379222201935, + "grad_norm": 0.3883999288082123, + "learning_rate": 2.3000000000000004e-06, + "loss": 0.9233, + "step": 46 + }, + { + "epoch": 0.0042906700748584995, + "grad_norm": 0.40718531608581543, + "learning_rate": 2.35e-06, + "loss": 0.9225, + "step": 47 + }, + { + "epoch": 0.004381960927515063, + "grad_norm": 0.3938511908054352, + "learning_rate": 2.4000000000000003e-06, + "loss": 0.9123, + "step": 48 + }, + { + "epoch": 0.004473251780171627, + "grad_norm": 0.3851708173751831, + "learning_rate": 2.4500000000000003e-06, + "loss": 0.923, + "step": 49 + }, + { + "epoch": 0.0045645426328281904, + "grad_norm": 0.37378355860710144, + "learning_rate": 2.5e-06, + "loss": 0.9177, + "step": 50 + }, + { + "epoch": 0.004655833485484755, + "grad_norm": 0.37607336044311523, + "learning_rate": 2.55e-06, + "loss": 0.9205, + "step": 51 + }, + { + "epoch": 0.004747124338141318, + "grad_norm": 0.3750596344470978, + "learning_rate": 2.6e-06, + "loss": 0.9235, + "step": 52 + }, + { + "epoch": 0.004838415190797882, + "grad_norm": 0.3767654597759247, + "learning_rate": 2.6500000000000005e-06, + "loss": 0.9051, + "step": 53 + }, + { + "epoch": 0.004929706043454446, + "grad_norm": 0.3789389133453369, + "learning_rate": 2.7000000000000004e-06, + "loss": 0.9151, + "step": 54 + }, + { + "epoch": 0.00502099689611101, + "grad_norm": 0.35584843158721924, + "learning_rate": 2.7500000000000004e-06, + "loss": 0.8721, + "step": 55 + }, + { + "epoch": 0.005112287748767573, + "grad_norm": 0.32576268911361694, + "learning_rate": 2.8000000000000003e-06, + "loss": 0.8729, + "step": 56 + }, + { + "epoch": 0.005203578601424137, + "grad_norm": 0.33255735039711, + "learning_rate": 2.85e-06, + "loss": 0.8952, + "step": 57 + }, + { + "epoch": 0.005294869454080701, + "grad_norm": 0.3185463547706604, + "learning_rate": 2.9e-06, + "loss": 0.9142, + "step": 58 + }, + { + "epoch": 0.005386160306737265, + "grad_norm": 0.31628739833831787, + "learning_rate": 2.95e-06, + "loss": 0.9065, + "step": 59 + }, + { + "epoch": 0.005477451159393828, + "grad_norm": 0.310596764087677, + "learning_rate": 3e-06, + "loss": 0.9017, + "step": 60 + }, + { + "epoch": 0.005568742012050393, + "grad_norm": 0.291942298412323, + "learning_rate": 3.05e-06, + "loss": 0.7979, + "step": 61 + }, + { + "epoch": 0.005660032864706956, + "grad_norm": 0.29947158694267273, + "learning_rate": 3.1000000000000004e-06, + "loss": 0.8576, + "step": 62 + }, + { + "epoch": 0.00575132371736352, + "grad_norm": 0.31249454617500305, + "learning_rate": 3.1500000000000003e-06, + "loss": 0.8835, + "step": 63 + }, + { + "epoch": 0.005842614570020084, + "grad_norm": 0.3100808560848236, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.8617, + "step": 64 + }, + { + "epoch": 0.005933905422676648, + "grad_norm": 0.3115972876548767, + "learning_rate": 3.2500000000000002e-06, + "loss": 0.8877, + "step": 65 + }, + { + "epoch": 0.006025196275333212, + "grad_norm": 0.2996424734592438, + "learning_rate": 3.3000000000000006e-06, + "loss": 0.856, + "step": 66 + }, + { + "epoch": 0.006116487127989775, + "grad_norm": 0.3182090222835541, + "learning_rate": 3.3500000000000005e-06, + "loss": 0.8814, + "step": 67 + }, + { + "epoch": 0.0062077779806463396, + "grad_norm": 0.31781643629074097, + "learning_rate": 3.4000000000000005e-06, + "loss": 0.9171, + "step": 68 + }, + { + "epoch": 0.006299068833302903, + "grad_norm": 0.33338436484336853, + "learning_rate": 3.45e-06, + "loss": 0.9219, + "step": 69 + }, + { + "epoch": 0.006390359685959467, + "grad_norm": 0.3048798143863678, + "learning_rate": 3.5e-06, + "loss": 0.9048, + "step": 70 + }, + { + "epoch": 0.0064816505386160305, + "grad_norm": 0.30044931173324585, + "learning_rate": 3.5500000000000003e-06, + "loss": 0.86, + "step": 71 + }, + { + "epoch": 0.006572941391272595, + "grad_norm": 0.32148417830467224, + "learning_rate": 3.6000000000000003e-06, + "loss": 0.8815, + "step": 72 + }, + { + "epoch": 0.006664232243929158, + "grad_norm": 0.3310251533985138, + "learning_rate": 3.65e-06, + "loss": 0.8865, + "step": 73 + }, + { + "epoch": 0.006755523096585722, + "grad_norm": 0.3109063506126404, + "learning_rate": 3.7e-06, + "loss": 0.8585, + "step": 74 + }, + { + "epoch": 0.006846813949242286, + "grad_norm": 0.2930205762386322, + "learning_rate": 3.7500000000000005e-06, + "loss": 0.8468, + "step": 75 + }, + { + "epoch": 0.00693810480189885, + "grad_norm": 0.3102734386920929, + "learning_rate": 3.8000000000000005e-06, + "loss": 0.8426, + "step": 76 + }, + { + "epoch": 0.007029395654555413, + "grad_norm": 0.305605411529541, + "learning_rate": 3.85e-06, + "loss": 0.8878, + "step": 77 + }, + { + "epoch": 0.0071206865072119775, + "grad_norm": 0.28958672285079956, + "learning_rate": 3.900000000000001e-06, + "loss": 0.8391, + "step": 78 + }, + { + "epoch": 0.007211977359868541, + "grad_norm": 0.28830093145370483, + "learning_rate": 3.95e-06, + "loss": 0.8005, + "step": 79 + }, + { + "epoch": 0.007303268212525105, + "grad_norm": 0.2755364179611206, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7929, + "step": 80 + }, + { + "epoch": 0.007394559065181668, + "grad_norm": 0.3140427768230438, + "learning_rate": 4.05e-06, + "loss": 0.8447, + "step": 81 + }, + { + "epoch": 0.007485849917838233, + "grad_norm": 0.2918093800544739, + "learning_rate": 4.1e-06, + "loss": 0.8344, + "step": 82 + }, + { + "epoch": 0.007577140770494796, + "grad_norm": 0.30039432644844055, + "learning_rate": 4.15e-06, + "loss": 0.8513, + "step": 83 + }, + { + "epoch": 0.00766843162315136, + "grad_norm": 0.28813984990119934, + "learning_rate": 4.2000000000000004e-06, + "loss": 0.828, + "step": 84 + }, + { + "epoch": 0.0077597224758079245, + "grad_norm": 0.3079264163970947, + "learning_rate": 4.25e-06, + "loss": 0.8459, + "step": 85 + }, + { + "epoch": 0.007851013328464489, + "grad_norm": 0.29014450311660767, + "learning_rate": 4.3e-06, + "loss": 0.8079, + "step": 86 + }, + { + "epoch": 0.007942304181121051, + "grad_norm": 0.29572978615760803, + "learning_rate": 4.350000000000001e-06, + "loss": 0.8083, + "step": 87 + }, + { + "epoch": 0.008033595033777615, + "grad_norm": 0.30638864636421204, + "learning_rate": 4.4e-06, + "loss": 0.8361, + "step": 88 + }, + { + "epoch": 0.00812488588643418, + "grad_norm": 0.2938840985298157, + "learning_rate": 4.450000000000001e-06, + "loss": 0.8221, + "step": 89 + }, + { + "epoch": 0.008216176739090744, + "grad_norm": 0.2975762188434601, + "learning_rate": 4.5e-06, + "loss": 0.8152, + "step": 90 + }, + { + "epoch": 0.008307467591747306, + "grad_norm": 0.2977428436279297, + "learning_rate": 4.5500000000000005e-06, + "loss": 0.8439, + "step": 91 + }, + { + "epoch": 0.00839875844440387, + "grad_norm": 0.2804766595363617, + "learning_rate": 4.600000000000001e-06, + "loss": 0.8149, + "step": 92 + }, + { + "epoch": 0.008490049297060435, + "grad_norm": 0.3014034926891327, + "learning_rate": 4.65e-06, + "loss": 0.7997, + "step": 93 + }, + { + "epoch": 0.008581340149716999, + "grad_norm": 0.2964629828929901, + "learning_rate": 4.7e-06, + "loss": 0.795, + "step": 94 + }, + { + "epoch": 0.008672631002373562, + "grad_norm": 0.2954620122909546, + "learning_rate": 4.75e-06, + "loss": 0.8443, + "step": 95 + }, + { + "epoch": 0.008763921855030126, + "grad_norm": 0.2871263027191162, + "learning_rate": 4.800000000000001e-06, + "loss": 0.7913, + "step": 96 + }, + { + "epoch": 0.00885521270768669, + "grad_norm": 0.29162418842315674, + "learning_rate": 4.85e-06, + "loss": 0.808, + "step": 97 + }, + { + "epoch": 0.008946503560343254, + "grad_norm": 0.322452187538147, + "learning_rate": 4.9000000000000005e-06, + "loss": 0.8187, + "step": 98 + }, + { + "epoch": 0.009037794412999817, + "grad_norm": 0.296351820230484, + "learning_rate": 4.95e-06, + "loss": 0.8007, + "step": 99 + }, + { + "epoch": 0.009129085265656381, + "grad_norm": 0.2863863408565521, + "learning_rate": 5e-06, + "loss": 0.8118, + "step": 100 + }, + { + "epoch": 0.009220376118312945, + "grad_norm": 0.30291223526000977, + "learning_rate": 4.999999997135266e-06, + "loss": 0.7879, + "step": 101 + }, + { + "epoch": 0.00931166697096951, + "grad_norm": 0.2947655916213989, + "learning_rate": 4.999999988541063e-06, + "loss": 0.8011, + "step": 102 + }, + { + "epoch": 0.009402957823626072, + "grad_norm": 0.2831778824329376, + "learning_rate": 4.999999974217391e-06, + "loss": 0.7929, + "step": 103 + }, + { + "epoch": 0.009494248676282636, + "grad_norm": 0.28980201482772827, + "learning_rate": 4.99999995416425e-06, + "loss": 0.7587, + "step": 104 + }, + { + "epoch": 0.0095855395289392, + "grad_norm": 0.2973884642124176, + "learning_rate": 4.999999928381641e-06, + "loss": 0.7605, + "step": 105 + }, + { + "epoch": 0.009676830381595765, + "grad_norm": 0.3118056356906891, + "learning_rate": 4.999999896869563e-06, + "loss": 0.8339, + "step": 106 + }, + { + "epoch": 0.009768121234252329, + "grad_norm": 0.2716490924358368, + "learning_rate": 4.9999998596280165e-06, + "loss": 0.8117, + "step": 107 + }, + { + "epoch": 0.009859412086908891, + "grad_norm": 0.2949272394180298, + "learning_rate": 4.999999816657002e-06, + "loss": 0.7635, + "step": 108 + }, + { + "epoch": 0.009950702939565455, + "grad_norm": 0.2827512323856354, + "learning_rate": 4.999999767956518e-06, + "loss": 0.8318, + "step": 109 + }, + { + "epoch": 0.01004199379222202, + "grad_norm": 0.2919203042984009, + "learning_rate": 4.999999713526567e-06, + "loss": 0.8076, + "step": 110 + }, + { + "epoch": 0.010133284644878584, + "grad_norm": 0.29824671149253845, + "learning_rate": 4.999999653367147e-06, + "loss": 0.7693, + "step": 111 + }, + { + "epoch": 0.010224575497535146, + "grad_norm": 0.2848367393016815, + "learning_rate": 4.99999958747826e-06, + "loss": 0.7748, + "step": 112 + }, + { + "epoch": 0.01031586635019171, + "grad_norm": 0.3092404305934906, + "learning_rate": 4.999999515859905e-06, + "loss": 0.7498, + "step": 113 + }, + { + "epoch": 0.010407157202848275, + "grad_norm": 0.29285624623298645, + "learning_rate": 4.999999438512082e-06, + "loss": 0.7917, + "step": 114 + }, + { + "epoch": 0.010498448055504839, + "grad_norm": 0.29227763414382935, + "learning_rate": 4.999999355434791e-06, + "loss": 0.7996, + "step": 115 + }, + { + "epoch": 0.010589738908161402, + "grad_norm": 0.2887090742588043, + "learning_rate": 4.999999266628033e-06, + "loss": 0.7677, + "step": 116 + }, + { + "epoch": 0.010681029760817966, + "grad_norm": 0.2812791168689728, + "learning_rate": 4.999999172091809e-06, + "loss": 0.8075, + "step": 117 + }, + { + "epoch": 0.01077232061347453, + "grad_norm": 0.28971099853515625, + "learning_rate": 4.999999071826116e-06, + "loss": 0.7627, + "step": 118 + }, + { + "epoch": 0.010863611466131094, + "grad_norm": 0.2932809293270111, + "learning_rate": 4.999998965830958e-06, + "loss": 0.7989, + "step": 119 + }, + { + "epoch": 0.010954902318787657, + "grad_norm": 0.29668599367141724, + "learning_rate": 4.999998854106333e-06, + "loss": 0.7985, + "step": 120 + }, + { + "epoch": 0.011046193171444221, + "grad_norm": 0.2738335132598877, + "learning_rate": 4.999998736652242e-06, + "loss": 0.777, + "step": 121 + }, + { + "epoch": 0.011137484024100785, + "grad_norm": 0.2960744798183441, + "learning_rate": 4.999998613468685e-06, + "loss": 0.803, + "step": 122 + }, + { + "epoch": 0.01122877487675735, + "grad_norm": 0.29946866631507874, + "learning_rate": 4.9999984845556625e-06, + "loss": 0.7374, + "step": 123 + }, + { + "epoch": 0.011320065729413912, + "grad_norm": 0.28832170367240906, + "learning_rate": 4.999998349913175e-06, + "loss": 0.8009, + "step": 124 + }, + { + "epoch": 0.011411356582070476, + "grad_norm": 0.30114713311195374, + "learning_rate": 4.999998209541221e-06, + "loss": 0.765, + "step": 125 + }, + { + "epoch": 0.01150264743472704, + "grad_norm": 0.3151075541973114, + "learning_rate": 4.999998063439804e-06, + "loss": 0.82, + "step": 126 + }, + { + "epoch": 0.011593938287383605, + "grad_norm": 0.3088056743144989, + "learning_rate": 4.999997911608923e-06, + "loss": 0.7695, + "step": 127 + }, + { + "epoch": 0.011685229140040169, + "grad_norm": 0.2899041473865509, + "learning_rate": 4.999997754048577e-06, + "loss": 0.7595, + "step": 128 + }, + { + "epoch": 0.011776519992696731, + "grad_norm": 0.28878113627433777, + "learning_rate": 4.999997590758767e-06, + "loss": 0.7847, + "step": 129 + }, + { + "epoch": 0.011867810845353296, + "grad_norm": 0.2836965322494507, + "learning_rate": 4.999997421739494e-06, + "loss": 0.7507, + "step": 130 + }, + { + "epoch": 0.01195910169800986, + "grad_norm": 0.3097086250782013, + "learning_rate": 4.999997246990759e-06, + "loss": 0.7929, + "step": 131 + }, + { + "epoch": 0.012050392550666424, + "grad_norm": 0.29743435978889465, + "learning_rate": 4.9999970665125605e-06, + "loss": 0.7604, + "step": 132 + }, + { + "epoch": 0.012141683403322986, + "grad_norm": 0.2903442084789276, + "learning_rate": 4.999996880304901e-06, + "loss": 0.7776, + "step": 133 + }, + { + "epoch": 0.01223297425597955, + "grad_norm": 0.29244914650917053, + "learning_rate": 4.999996688367779e-06, + "loss": 0.7544, + "step": 134 + }, + { + "epoch": 0.012324265108636115, + "grad_norm": 0.3028836250305176, + "learning_rate": 4.999996490701196e-06, + "loss": 0.7678, + "step": 135 + }, + { + "epoch": 0.012415555961292679, + "grad_norm": 0.2823251187801361, + "learning_rate": 4.999996287305152e-06, + "loss": 0.7519, + "step": 136 + }, + { + "epoch": 0.012506846813949242, + "grad_norm": 0.29676562547683716, + "learning_rate": 4.999996078179649e-06, + "loss": 0.7559, + "step": 137 + }, + { + "epoch": 0.012598137666605806, + "grad_norm": 0.292229562997818, + "learning_rate": 4.999995863324685e-06, + "loss": 0.7863, + "step": 138 + }, + { + "epoch": 0.01268942851926237, + "grad_norm": 0.2985602021217346, + "learning_rate": 4.9999956427402615e-06, + "loss": 0.7189, + "step": 139 + }, + { + "epoch": 0.012780719371918934, + "grad_norm": 0.2812100052833557, + "learning_rate": 4.99999541642638e-06, + "loss": 0.7423, + "step": 140 + }, + { + "epoch": 0.012872010224575497, + "grad_norm": 0.29865947365760803, + "learning_rate": 4.99999518438304e-06, + "loss": 0.7584, + "step": 141 + }, + { + "epoch": 0.012963301077232061, + "grad_norm": 0.2948894500732422, + "learning_rate": 4.999994946610243e-06, + "loss": 0.755, + "step": 142 + }, + { + "epoch": 0.013054591929888625, + "grad_norm": 0.2936941385269165, + "learning_rate": 4.999994703107988e-06, + "loss": 0.7254, + "step": 143 + }, + { + "epoch": 0.01314588278254519, + "grad_norm": 0.29584944248199463, + "learning_rate": 4.9999944538762756e-06, + "loss": 0.7664, + "step": 144 + }, + { + "epoch": 0.013237173635201752, + "grad_norm": 0.29717135429382324, + "learning_rate": 4.999994198915108e-06, + "loss": 0.76, + "step": 145 + }, + { + "epoch": 0.013328464487858316, + "grad_norm": 0.29961296916007996, + "learning_rate": 4.999993938224485e-06, + "loss": 0.7357, + "step": 146 + }, + { + "epoch": 0.01341975534051488, + "grad_norm": 0.30016839504241943, + "learning_rate": 4.999993671804408e-06, + "loss": 0.7358, + "step": 147 + }, + { + "epoch": 0.013511046193171445, + "grad_norm": 0.29745858907699585, + "learning_rate": 4.999993399654875e-06, + "loss": 0.7704, + "step": 148 + }, + { + "epoch": 0.013602337045828009, + "grad_norm": 0.3027574121952057, + "learning_rate": 4.999993121775889e-06, + "loss": 0.7745, + "step": 149 + }, + { + "epoch": 0.013693627898484571, + "grad_norm": 0.2902216613292694, + "learning_rate": 4.99999283816745e-06, + "loss": 0.7576, + "step": 150 + }, + { + "epoch": 0.013784918751141136, + "grad_norm": 0.3018718361854553, + "learning_rate": 4.999992548829559e-06, + "loss": 0.7335, + "step": 151 + }, + { + "epoch": 0.0138762096037977, + "grad_norm": 0.3045636713504791, + "learning_rate": 4.9999922537622155e-06, + "loss": 0.7701, + "step": 152 + }, + { + "epoch": 0.013967500456454264, + "grad_norm": 0.29009997844696045, + "learning_rate": 4.999991952965422e-06, + "loss": 0.7211, + "step": 153 + }, + { + "epoch": 0.014058791309110827, + "grad_norm": 0.2944207489490509, + "learning_rate": 4.999991646439177e-06, + "loss": 0.7297, + "step": 154 + }, + { + "epoch": 0.01415008216176739, + "grad_norm": 0.3090279698371887, + "learning_rate": 4.999991334183483e-06, + "loss": 0.7695, + "step": 155 + }, + { + "epoch": 0.014241373014423955, + "grad_norm": 0.29848068952560425, + "learning_rate": 4.99999101619834e-06, + "loss": 0.7468, + "step": 156 + }, + { + "epoch": 0.01433266386708052, + "grad_norm": 0.3222959041595459, + "learning_rate": 4.999990692483749e-06, + "loss": 0.7632, + "step": 157 + }, + { + "epoch": 0.014423954719737082, + "grad_norm": 0.30205845832824707, + "learning_rate": 4.999990363039711e-06, + "loss": 0.757, + "step": 158 + }, + { + "epoch": 0.014515245572393646, + "grad_norm": 0.3000972867012024, + "learning_rate": 4.999990027866226e-06, + "loss": 0.7298, + "step": 159 + }, + { + "epoch": 0.01460653642505021, + "grad_norm": 0.29826071858406067, + "learning_rate": 4.999989686963295e-06, + "loss": 0.7596, + "step": 160 + }, + { + "epoch": 0.014697827277706774, + "grad_norm": 0.3144635558128357, + "learning_rate": 4.999989340330918e-06, + "loss": 0.774, + "step": 161 + }, + { + "epoch": 0.014789118130363337, + "grad_norm": 0.30685916543006897, + "learning_rate": 4.9999889879690975e-06, + "loss": 0.7558, + "step": 162 + }, + { + "epoch": 0.014880408983019901, + "grad_norm": 0.29829251766204834, + "learning_rate": 4.999988629877833e-06, + "loss": 0.7581, + "step": 163 + }, + { + "epoch": 0.014971699835676465, + "grad_norm": 0.30140820145606995, + "learning_rate": 4.9999882660571264e-06, + "loss": 0.7573, + "step": 164 + }, + { + "epoch": 0.01506299068833303, + "grad_norm": 0.2927103340625763, + "learning_rate": 4.999987896506978e-06, + "loss": 0.764, + "step": 165 + }, + { + "epoch": 0.015154281540989592, + "grad_norm": 0.29751718044281006, + "learning_rate": 4.999987521227388e-06, + "loss": 0.7468, + "step": 166 + }, + { + "epoch": 0.015245572393646156, + "grad_norm": 0.3024549186229706, + "learning_rate": 4.999987140218358e-06, + "loss": 0.7828, + "step": 167 + }, + { + "epoch": 0.01533686324630272, + "grad_norm": 0.30675628781318665, + "learning_rate": 4.999986753479888e-06, + "loss": 0.7506, + "step": 168 + }, + { + "epoch": 0.015428154098959285, + "grad_norm": 0.311788409948349, + "learning_rate": 4.999986361011981e-06, + "loss": 0.7352, + "step": 169 + }, + { + "epoch": 0.015519444951615849, + "grad_norm": 0.31575125455856323, + "learning_rate": 4.999985962814636e-06, + "loss": 0.7152, + "step": 170 + }, + { + "epoch": 0.015610735804272411, + "grad_norm": 0.29424580931663513, + "learning_rate": 4.999985558887854e-06, + "loss": 0.7653, + "step": 171 + }, + { + "epoch": 0.015702026656928977, + "grad_norm": 0.293381929397583, + "learning_rate": 4.999985149231636e-06, + "loss": 0.7244, + "step": 172 + }, + { + "epoch": 0.015793317509585538, + "grad_norm": 0.3023812472820282, + "learning_rate": 4.999984733845984e-06, + "loss": 0.7612, + "step": 173 + }, + { + "epoch": 0.015884608362242102, + "grad_norm": 0.31813088059425354, + "learning_rate": 4.999984312730898e-06, + "loss": 0.7395, + "step": 174 + }, + { + "epoch": 0.015975899214898667, + "grad_norm": 0.2954006493091583, + "learning_rate": 4.999983885886379e-06, + "loss": 0.7305, + "step": 175 + }, + { + "epoch": 0.01606719006755523, + "grad_norm": 0.310088187456131, + "learning_rate": 4.9999834533124285e-06, + "loss": 0.7118, + "step": 176 + }, + { + "epoch": 0.016158480920211795, + "grad_norm": 0.29479947686195374, + "learning_rate": 4.999983015009047e-06, + "loss": 0.7373, + "step": 177 + }, + { + "epoch": 0.01624977177286836, + "grad_norm": 0.3049282431602478, + "learning_rate": 4.999982570976236e-06, + "loss": 0.7452, + "step": 178 + }, + { + "epoch": 0.016341062625524923, + "grad_norm": 0.3083893060684204, + "learning_rate": 4.9999821212139955e-06, + "loss": 0.7635, + "step": 179 + }, + { + "epoch": 0.016432353478181488, + "grad_norm": 0.3010261058807373, + "learning_rate": 4.999981665722328e-06, + "loss": 0.7578, + "step": 180 + }, + { + "epoch": 0.01652364433083805, + "grad_norm": 0.3169185221195221, + "learning_rate": 4.999981204501233e-06, + "loss": 0.7486, + "step": 181 + }, + { + "epoch": 0.016614935183494613, + "grad_norm": 0.30552828311920166, + "learning_rate": 4.999980737550712e-06, + "loss": 0.7242, + "step": 182 + }, + { + "epoch": 0.016706226036151177, + "grad_norm": 0.30409127473831177, + "learning_rate": 4.999980264870768e-06, + "loss": 0.7226, + "step": 183 + }, + { + "epoch": 0.01679751688880774, + "grad_norm": 0.30580082535743713, + "learning_rate": 4.999979786461399e-06, + "loss": 0.7469, + "step": 184 + }, + { + "epoch": 0.016888807741464305, + "grad_norm": 0.3118675649166107, + "learning_rate": 4.999979302322607e-06, + "loss": 0.7387, + "step": 185 + }, + { + "epoch": 0.01698009859412087, + "grad_norm": 0.30668187141418457, + "learning_rate": 4.999978812454394e-06, + "loss": 0.7385, + "step": 186 + }, + { + "epoch": 0.017071389446777434, + "grad_norm": 0.30797988176345825, + "learning_rate": 4.999978316856762e-06, + "loss": 0.7417, + "step": 187 + }, + { + "epoch": 0.017162680299433998, + "grad_norm": 0.2958287000656128, + "learning_rate": 4.999977815529711e-06, + "loss": 0.7398, + "step": 188 + }, + { + "epoch": 0.017253971152090562, + "grad_norm": 0.31880971789360046, + "learning_rate": 4.9999773084732405e-06, + "loss": 0.7475, + "step": 189 + }, + { + "epoch": 0.017345262004747123, + "grad_norm": 0.3203723132610321, + "learning_rate": 4.999976795687355e-06, + "loss": 0.7735, + "step": 190 + }, + { + "epoch": 0.017436552857403687, + "grad_norm": 0.30986565351486206, + "learning_rate": 4.9999762771720525e-06, + "loss": 0.7626, + "step": 191 + }, + { + "epoch": 0.01752784371006025, + "grad_norm": 0.3087269365787506, + "learning_rate": 4.999975752927336e-06, + "loss": 0.7234, + "step": 192 + }, + { + "epoch": 0.017619134562716816, + "grad_norm": 0.3208625018596649, + "learning_rate": 4.9999752229532065e-06, + "loss": 0.6832, + "step": 193 + }, + { + "epoch": 0.01771042541537338, + "grad_norm": 0.3088611364364624, + "learning_rate": 4.999974687249664e-06, + "loss": 0.6937, + "step": 194 + }, + { + "epoch": 0.017801716268029944, + "grad_norm": 0.30350005626678467, + "learning_rate": 4.999974145816712e-06, + "loss": 0.7286, + "step": 195 + }, + { + "epoch": 0.01789300712068651, + "grad_norm": 0.3133600056171417, + "learning_rate": 4.9999735986543505e-06, + "loss": 0.7333, + "step": 196 + }, + { + "epoch": 0.017984297973343073, + "grad_norm": 0.3206118047237396, + "learning_rate": 4.99997304576258e-06, + "loss": 0.7144, + "step": 197 + }, + { + "epoch": 0.018075588825999633, + "grad_norm": 0.29841524362564087, + "learning_rate": 4.999972487141402e-06, + "loss": 0.7428, + "step": 198 + }, + { + "epoch": 0.018166879678656198, + "grad_norm": 0.2962550222873688, + "learning_rate": 4.99997192279082e-06, + "loss": 0.7334, + "step": 199 + }, + { + "epoch": 0.018258170531312762, + "grad_norm": 0.31179919838905334, + "learning_rate": 4.999971352710832e-06, + "loss": 0.7305, + "step": 200 + }, + { + "epoch": 0.018349461383969326, + "grad_norm": 0.32509198784828186, + "learning_rate": 4.9999707769014415e-06, + "loss": 0.733, + "step": 201 + }, + { + "epoch": 0.01844075223662589, + "grad_norm": 0.304981529712677, + "learning_rate": 4.99997019536265e-06, + "loss": 0.7223, + "step": 202 + }, + { + "epoch": 0.018532043089282454, + "grad_norm": 0.3160554766654968, + "learning_rate": 4.999969608094457e-06, + "loss": 0.7428, + "step": 203 + }, + { + "epoch": 0.01862333394193902, + "grad_norm": 0.3151814937591553, + "learning_rate": 4.999969015096865e-06, + "loss": 0.7132, + "step": 204 + }, + { + "epoch": 0.018714624794595583, + "grad_norm": 0.30642783641815186, + "learning_rate": 4.999968416369876e-06, + "loss": 0.7487, + "step": 205 + }, + { + "epoch": 0.018805915647252144, + "grad_norm": 0.310494601726532, + "learning_rate": 4.99996781191349e-06, + "loss": 0.7618, + "step": 206 + }, + { + "epoch": 0.018897206499908708, + "grad_norm": 0.32557883858680725, + "learning_rate": 4.999967201727708e-06, + "loss": 0.7038, + "step": 207 + }, + { + "epoch": 0.018988497352565272, + "grad_norm": 0.3262990117073059, + "learning_rate": 4.999966585812533e-06, + "loss": 0.7351, + "step": 208 + }, + { + "epoch": 0.019079788205221836, + "grad_norm": 0.31450575590133667, + "learning_rate": 4.999965964167967e-06, + "loss": 0.6967, + "step": 209 + }, + { + "epoch": 0.0191710790578784, + "grad_norm": 0.31455448269844055, + "learning_rate": 4.9999653367940096e-06, + "loss": 0.6875, + "step": 210 + }, + { + "epoch": 0.019262369910534965, + "grad_norm": 0.3192206621170044, + "learning_rate": 4.999964703690662e-06, + "loss": 0.7062, + "step": 211 + }, + { + "epoch": 0.01935366076319153, + "grad_norm": 0.32328417897224426, + "learning_rate": 4.999964064857927e-06, + "loss": 0.7509, + "step": 212 + }, + { + "epoch": 0.019444951615848093, + "grad_norm": 0.3242582380771637, + "learning_rate": 4.999963420295806e-06, + "loss": 0.7099, + "step": 213 + }, + { + "epoch": 0.019536242468504657, + "grad_norm": 0.34786897897720337, + "learning_rate": 4.9999627700043e-06, + "loss": 0.7642, + "step": 214 + }, + { + "epoch": 0.019627533321161218, + "grad_norm": 0.33293962478637695, + "learning_rate": 4.99996211398341e-06, + "loss": 0.7163, + "step": 215 + }, + { + "epoch": 0.019718824173817782, + "grad_norm": 0.3241311311721802, + "learning_rate": 4.999961452233139e-06, + "loss": 0.7251, + "step": 216 + }, + { + "epoch": 0.019810115026474347, + "grad_norm": 0.31855690479278564, + "learning_rate": 4.999960784753487e-06, + "loss": 0.7269, + "step": 217 + }, + { + "epoch": 0.01990140587913091, + "grad_norm": 0.3189046084880829, + "learning_rate": 4.999960111544456e-06, + "loss": 0.737, + "step": 218 + }, + { + "epoch": 0.019992696731787475, + "grad_norm": 0.3264787197113037, + "learning_rate": 4.999959432606048e-06, + "loss": 0.7135, + "step": 219 + }, + { + "epoch": 0.02008398758444404, + "grad_norm": 0.31626003980636597, + "learning_rate": 4.999958747938264e-06, + "loss": 0.7276, + "step": 220 + }, + { + "epoch": 0.020175278437100604, + "grad_norm": 0.32106253504753113, + "learning_rate": 4.999958057541105e-06, + "loss": 0.7123, + "step": 221 + }, + { + "epoch": 0.020266569289757168, + "grad_norm": 0.3091062903404236, + "learning_rate": 4.9999573614145746e-06, + "loss": 0.6846, + "step": 222 + }, + { + "epoch": 0.02035786014241373, + "grad_norm": 0.339714914560318, + "learning_rate": 4.9999566595586724e-06, + "loss": 0.6987, + "step": 223 + }, + { + "epoch": 0.020449150995070293, + "grad_norm": 0.3473016619682312, + "learning_rate": 4.9999559519734005e-06, + "loss": 0.7324, + "step": 224 + }, + { + "epoch": 0.020540441847726857, + "grad_norm": 0.3320472538471222, + "learning_rate": 4.999955238658761e-06, + "loss": 0.7191, + "step": 225 + }, + { + "epoch": 0.02063173270038342, + "grad_norm": 0.33818238973617554, + "learning_rate": 4.999954519614756e-06, + "loss": 0.7174, + "step": 226 + }, + { + "epoch": 0.020723023553039985, + "grad_norm": 0.32529693841934204, + "learning_rate": 4.999953794841386e-06, + "loss": 0.7126, + "step": 227 + }, + { + "epoch": 0.02081431440569655, + "grad_norm": 0.3156394362449646, + "learning_rate": 4.999953064338653e-06, + "loss": 0.7429, + "step": 228 + }, + { + "epoch": 0.020905605258353114, + "grad_norm": 0.3204750120639801, + "learning_rate": 4.999952328106559e-06, + "loss": 0.732, + "step": 229 + }, + { + "epoch": 0.020996896111009678, + "grad_norm": 0.3432689607143402, + "learning_rate": 4.999951586145105e-06, + "loss": 0.7429, + "step": 230 + }, + { + "epoch": 0.021088186963666242, + "grad_norm": 0.313144713640213, + "learning_rate": 4.999950838454294e-06, + "loss": 0.7441, + "step": 231 + }, + { + "epoch": 0.021179477816322803, + "grad_norm": 0.33827072381973267, + "learning_rate": 4.999950085034127e-06, + "loss": 0.7206, + "step": 232 + }, + { + "epoch": 0.021270768668979367, + "grad_norm": 0.32923823595046997, + "learning_rate": 4.999949325884604e-06, + "loss": 0.7487, + "step": 233 + }, + { + "epoch": 0.02136205952163593, + "grad_norm": 0.31398797035217285, + "learning_rate": 4.99994856100573e-06, + "loss": 0.7316, + "step": 234 + }, + { + "epoch": 0.021453350374292496, + "grad_norm": 0.31832587718963623, + "learning_rate": 4.999947790397505e-06, + "loss": 0.704, + "step": 235 + }, + { + "epoch": 0.02154464122694906, + "grad_norm": 0.31062281131744385, + "learning_rate": 4.999947014059929e-06, + "loss": 0.713, + "step": 236 + }, + { + "epoch": 0.021635932079605624, + "grad_norm": 0.33382534980773926, + "learning_rate": 4.999946231993008e-06, + "loss": 0.6982, + "step": 237 + }, + { + "epoch": 0.02172722293226219, + "grad_norm": 0.34992024302482605, + "learning_rate": 4.99994544419674e-06, + "loss": 0.7231, + "step": 238 + }, + { + "epoch": 0.021818513784918753, + "grad_norm": 0.31549888849258423, + "learning_rate": 4.999944650671129e-06, + "loss": 0.7113, + "step": 239 + }, + { + "epoch": 0.021909804637575313, + "grad_norm": 0.344689279794693, + "learning_rate": 4.999943851416175e-06, + "loss": 0.7177, + "step": 240 + }, + { + "epoch": 0.022001095490231878, + "grad_norm": 0.3332938849925995, + "learning_rate": 4.9999430464318825e-06, + "loss": 0.6948, + "step": 241 + }, + { + "epoch": 0.022092386342888442, + "grad_norm": 0.32246172428131104, + "learning_rate": 4.999942235718251e-06, + "loss": 0.6861, + "step": 242 + }, + { + "epoch": 0.022183677195545006, + "grad_norm": 0.3199536204338074, + "learning_rate": 4.999941419275283e-06, + "loss": 0.7093, + "step": 243 + }, + { + "epoch": 0.02227496804820157, + "grad_norm": 0.33518749475479126, + "learning_rate": 4.9999405971029805e-06, + "loss": 0.7318, + "step": 244 + }, + { + "epoch": 0.022366258900858135, + "grad_norm": 0.34112459421157837, + "learning_rate": 4.999939769201346e-06, + "loss": 0.7301, + "step": 245 + }, + { + "epoch": 0.0224575497535147, + "grad_norm": 0.32260701060295105, + "learning_rate": 4.99993893557038e-06, + "loss": 0.673, + "step": 246 + }, + { + "epoch": 0.022548840606171263, + "grad_norm": 0.3402305245399475, + "learning_rate": 4.999938096210085e-06, + "loss": 0.7015, + "step": 247 + }, + { + "epoch": 0.022640131458827824, + "grad_norm": 0.3504970371723175, + "learning_rate": 4.999937251120463e-06, + "loss": 0.6975, + "step": 248 + }, + { + "epoch": 0.022731422311484388, + "grad_norm": 0.3363197147846222, + "learning_rate": 4.9999364003015176e-06, + "loss": 0.7228, + "step": 249 + }, + { + "epoch": 0.022822713164140952, + "grad_norm": 0.31873178482055664, + "learning_rate": 4.999935543753248e-06, + "loss": 0.6996, + "step": 250 + }, + { + "epoch": 0.022914004016797516, + "grad_norm": 0.3264579474925995, + "learning_rate": 4.999934681475657e-06, + "loss": 0.7029, + "step": 251 + }, + { + "epoch": 0.02300529486945408, + "grad_norm": 0.31919705867767334, + "learning_rate": 4.999933813468747e-06, + "loss": 0.7324, + "step": 252 + }, + { + "epoch": 0.023096585722110645, + "grad_norm": 0.35983842611312866, + "learning_rate": 4.999932939732521e-06, + "loss": 0.756, + "step": 253 + }, + { + "epoch": 0.02318787657476721, + "grad_norm": 0.33089563250541687, + "learning_rate": 4.999932060266979e-06, + "loss": 0.7154, + "step": 254 + }, + { + "epoch": 0.023279167427423773, + "grad_norm": 0.3348519504070282, + "learning_rate": 4.999931175072124e-06, + "loss": 0.6856, + "step": 255 + }, + { + "epoch": 0.023370458280080338, + "grad_norm": 0.3518310487270355, + "learning_rate": 4.999930284147958e-06, + "loss": 0.7134, + "step": 256 + }, + { + "epoch": 0.0234617491327369, + "grad_norm": 0.30809471011161804, + "learning_rate": 4.999929387494484e-06, + "loss": 0.7293, + "step": 257 + }, + { + "epoch": 0.023553039985393463, + "grad_norm": 0.34893903136253357, + "learning_rate": 4.999928485111702e-06, + "loss": 0.7156, + "step": 258 + }, + { + "epoch": 0.023644330838050027, + "grad_norm": 0.3687606155872345, + "learning_rate": 4.999927576999614e-06, + "loss": 0.7034, + "step": 259 + }, + { + "epoch": 0.02373562169070659, + "grad_norm": 0.32933491468429565, + "learning_rate": 4.999926663158225e-06, + "loss": 0.7116, + "step": 260 + }, + { + "epoch": 0.023826912543363155, + "grad_norm": 0.33695897459983826, + "learning_rate": 4.999925743587535e-06, + "loss": 0.6951, + "step": 261 + }, + { + "epoch": 0.02391820339601972, + "grad_norm": 0.33278581500053406, + "learning_rate": 4.999924818287546e-06, + "loss": 0.7114, + "step": 262 + }, + { + "epoch": 0.024009494248676284, + "grad_norm": 0.32807546854019165, + "learning_rate": 4.9999238872582605e-06, + "loss": 0.7605, + "step": 263 + }, + { + "epoch": 0.024100785101332848, + "grad_norm": 0.33730560541152954, + "learning_rate": 4.999922950499681e-06, + "loss": 0.6567, + "step": 264 + }, + { + "epoch": 0.02419207595398941, + "grad_norm": 0.34759390354156494, + "learning_rate": 4.99992200801181e-06, + "loss": 0.7437, + "step": 265 + }, + { + "epoch": 0.024283366806645973, + "grad_norm": 0.3315420150756836, + "learning_rate": 4.999921059794647e-06, + "loss": 0.7019, + "step": 266 + }, + { + "epoch": 0.024374657659302537, + "grad_norm": 0.32427775859832764, + "learning_rate": 4.999920105848197e-06, + "loss": 0.6901, + "step": 267 + }, + { + "epoch": 0.0244659485119591, + "grad_norm": 0.3302616775035858, + "learning_rate": 4.999919146172462e-06, + "loss": 0.6906, + "step": 268 + }, + { + "epoch": 0.024557239364615666, + "grad_norm": 0.32750827074050903, + "learning_rate": 4.999918180767444e-06, + "loss": 0.7495, + "step": 269 + }, + { + "epoch": 0.02464853021727223, + "grad_norm": 0.35215336084365845, + "learning_rate": 4.999917209633143e-06, + "loss": 0.715, + "step": 270 + }, + { + "epoch": 0.024739821069928794, + "grad_norm": 0.33855533599853516, + "learning_rate": 4.999916232769564e-06, + "loss": 0.685, + "step": 271 + }, + { + "epoch": 0.024831111922585358, + "grad_norm": 0.33181247115135193, + "learning_rate": 4.999915250176708e-06, + "loss": 0.7144, + "step": 272 + }, + { + "epoch": 0.024922402775241922, + "grad_norm": 0.33599501848220825, + "learning_rate": 4.999914261854577e-06, + "loss": 0.7012, + "step": 273 + }, + { + "epoch": 0.025013693627898483, + "grad_norm": 0.32531285285949707, + "learning_rate": 4.9999132678031746e-06, + "loss": 0.7086, + "step": 274 + }, + { + "epoch": 0.025104984480555047, + "grad_norm": 0.34616297483444214, + "learning_rate": 4.999912268022502e-06, + "loss": 0.7077, + "step": 275 + }, + { + "epoch": 0.02519627533321161, + "grad_norm": 0.3479490280151367, + "learning_rate": 4.999911262512561e-06, + "loss": 0.7388, + "step": 276 + }, + { + "epoch": 0.025287566185868176, + "grad_norm": 0.32720986008644104, + "learning_rate": 4.9999102512733555e-06, + "loss": 0.7329, + "step": 277 + }, + { + "epoch": 0.02537885703852474, + "grad_norm": 0.3458847403526306, + "learning_rate": 4.999909234304886e-06, + "loss": 0.6825, + "step": 278 + }, + { + "epoch": 0.025470147891181304, + "grad_norm": 0.34290748834609985, + "learning_rate": 4.999908211607156e-06, + "loss": 0.7101, + "step": 279 + }, + { + "epoch": 0.02556143874383787, + "grad_norm": 0.35147154331207275, + "learning_rate": 4.999907183180168e-06, + "loss": 0.7147, + "step": 280 + }, + { + "epoch": 0.025652729596494433, + "grad_norm": 0.3560108244419098, + "learning_rate": 4.999906149023924e-06, + "loss": 0.6985, + "step": 281 + }, + { + "epoch": 0.025744020449150994, + "grad_norm": 0.3420274555683136, + "learning_rate": 4.9999051091384255e-06, + "loss": 0.7336, + "step": 282 + }, + { + "epoch": 0.025835311301807558, + "grad_norm": 0.3226446211338043, + "learning_rate": 4.999904063523676e-06, + "loss": 0.7125, + "step": 283 + }, + { + "epoch": 0.025926602154464122, + "grad_norm": 0.36459654569625854, + "learning_rate": 4.999903012179679e-06, + "loss": 0.7197, + "step": 284 + }, + { + "epoch": 0.026017893007120686, + "grad_norm": 0.3519653081893921, + "learning_rate": 4.999901955106434e-06, + "loss": 0.7562, + "step": 285 + }, + { + "epoch": 0.02610918385977725, + "grad_norm": 0.3408181667327881, + "learning_rate": 4.999900892303945e-06, + "loss": 0.6971, + "step": 286 + }, + { + "epoch": 0.026200474712433815, + "grad_norm": 0.3411369025707245, + "learning_rate": 4.999899823772215e-06, + "loss": 0.6937, + "step": 287 + }, + { + "epoch": 0.02629176556509038, + "grad_norm": 0.33070990443229675, + "learning_rate": 4.999898749511245e-06, + "loss": 0.7097, + "step": 288 + }, + { + "epoch": 0.026383056417746943, + "grad_norm": 0.34680479764938354, + "learning_rate": 4.999897669521039e-06, + "loss": 0.733, + "step": 289 + }, + { + "epoch": 0.026474347270403504, + "grad_norm": 0.355587899684906, + "learning_rate": 4.9998965838015974e-06, + "loss": 0.7151, + "step": 290 + }, + { + "epoch": 0.026565638123060068, + "grad_norm": 0.3638277053833008, + "learning_rate": 4.999895492352926e-06, + "loss": 0.6753, + "step": 291 + }, + { + "epoch": 0.026656928975716632, + "grad_norm": 0.36650604009628296, + "learning_rate": 4.999894395175024e-06, + "loss": 0.7073, + "step": 292 + }, + { + "epoch": 0.026748219828373197, + "grad_norm": 0.3597724437713623, + "learning_rate": 4.999893292267895e-06, + "loss": 0.7, + "step": 293 + }, + { + "epoch": 0.02683951068102976, + "grad_norm": 0.33917638659477234, + "learning_rate": 4.999892183631543e-06, + "loss": 0.7042, + "step": 294 + }, + { + "epoch": 0.026930801533686325, + "grad_norm": 0.37296631932258606, + "learning_rate": 4.999891069265968e-06, + "loss": 0.7175, + "step": 295 + }, + { + "epoch": 0.02702209238634289, + "grad_norm": 0.351059228181839, + "learning_rate": 4.999889949171175e-06, + "loss": 0.6848, + "step": 296 + }, + { + "epoch": 0.027113383238999453, + "grad_norm": 0.3511442542076111, + "learning_rate": 4.999888823347166e-06, + "loss": 0.7255, + "step": 297 + }, + { + "epoch": 0.027204674091656018, + "grad_norm": 0.34399712085723877, + "learning_rate": 4.999887691793942e-06, + "loss": 0.7366, + "step": 298 + }, + { + "epoch": 0.02729596494431258, + "grad_norm": 0.3452884554862976, + "learning_rate": 4.999886554511506e-06, + "loss": 0.725, + "step": 299 + }, + { + "epoch": 0.027387255796969143, + "grad_norm": 0.3547109067440033, + "learning_rate": 4.999885411499862e-06, + "loss": 0.7051, + "step": 300 + }, + { + "epoch": 0.027478546649625707, + "grad_norm": 0.3425750732421875, + "learning_rate": 4.999884262759012e-06, + "loss": 0.7498, + "step": 301 + }, + { + "epoch": 0.02756983750228227, + "grad_norm": 0.3397521674633026, + "learning_rate": 4.999883108288958e-06, + "loss": 0.6804, + "step": 302 + }, + { + "epoch": 0.027661128354938835, + "grad_norm": 0.3550315201282501, + "learning_rate": 4.999881948089703e-06, + "loss": 0.6932, + "step": 303 + }, + { + "epoch": 0.0277524192075954, + "grad_norm": 0.36609262228012085, + "learning_rate": 4.999880782161251e-06, + "loss": 0.6732, + "step": 304 + }, + { + "epoch": 0.027843710060251964, + "grad_norm": 0.3816431164741516, + "learning_rate": 4.9998796105036014e-06, + "loss": 0.7236, + "step": 305 + }, + { + "epoch": 0.027935000912908528, + "grad_norm": 0.34365829825401306, + "learning_rate": 4.99987843311676e-06, + "loss": 0.7254, + "step": 306 + }, + { + "epoch": 0.02802629176556509, + "grad_norm": 0.3690064251422882, + "learning_rate": 4.99987725000073e-06, + "loss": 0.7454, + "step": 307 + }, + { + "epoch": 0.028117582618221653, + "grad_norm": 0.3501172959804535, + "learning_rate": 4.999876061155511e-06, + "loss": 0.698, + "step": 308 + }, + { + "epoch": 0.028208873470878217, + "grad_norm": 0.36074283719062805, + "learning_rate": 4.999874866581107e-06, + "loss": 0.6989, + "step": 309 + }, + { + "epoch": 0.02830016432353478, + "grad_norm": 0.33554086089134216, + "learning_rate": 4.999873666277521e-06, + "loss": 0.7133, + "step": 310 + }, + { + "epoch": 0.028391455176191346, + "grad_norm": 0.3583099842071533, + "learning_rate": 4.999872460244756e-06, + "loss": 0.7024, + "step": 311 + }, + { + "epoch": 0.02848274602884791, + "grad_norm": 0.3523677587509155, + "learning_rate": 4.999871248482815e-06, + "loss": 0.7117, + "step": 312 + }, + { + "epoch": 0.028574036881504474, + "grad_norm": 0.35744479298591614, + "learning_rate": 4.9998700309916995e-06, + "loss": 0.6952, + "step": 313 + }, + { + "epoch": 0.02866532773416104, + "grad_norm": 0.36353155970573425, + "learning_rate": 4.9998688077714135e-06, + "loss": 0.7267, + "step": 314 + }, + { + "epoch": 0.028756618586817603, + "grad_norm": 0.35441461205482483, + "learning_rate": 4.99986757882196e-06, + "loss": 0.7065, + "step": 315 + }, + { + "epoch": 0.028847909439474163, + "grad_norm": 0.3734096884727478, + "learning_rate": 4.99986634414334e-06, + "loss": 0.6942, + "step": 316 + }, + { + "epoch": 0.028939200292130728, + "grad_norm": 0.3196989893913269, + "learning_rate": 4.999865103735559e-06, + "loss": 0.696, + "step": 317 + }, + { + "epoch": 0.029030491144787292, + "grad_norm": 0.3307783305644989, + "learning_rate": 4.999863857598617e-06, + "loss": 0.6921, + "step": 318 + }, + { + "epoch": 0.029121781997443856, + "grad_norm": 0.3581627905368805, + "learning_rate": 4.999862605732518e-06, + "loss": 0.7046, + "step": 319 + }, + { + "epoch": 0.02921307285010042, + "grad_norm": 0.36050084233283997, + "learning_rate": 4.999861348137266e-06, + "loss": 0.6794, + "step": 320 + }, + { + "epoch": 0.029304363702756984, + "grad_norm": 0.35001689195632935, + "learning_rate": 4.9998600848128635e-06, + "loss": 0.7133, + "step": 321 + }, + { + "epoch": 0.02939565455541355, + "grad_norm": 0.37870073318481445, + "learning_rate": 4.999858815759312e-06, + "loss": 0.6744, + "step": 322 + }, + { + "epoch": 0.029486945408070113, + "grad_norm": 0.3394257128238678, + "learning_rate": 4.999857540976615e-06, + "loss": 0.7191, + "step": 323 + }, + { + "epoch": 0.029578236260726674, + "grad_norm": 0.37561550736427307, + "learning_rate": 4.999856260464777e-06, + "loss": 0.7056, + "step": 324 + }, + { + "epoch": 0.029669527113383238, + "grad_norm": 0.3721442222595215, + "learning_rate": 4.9998549742237985e-06, + "loss": 0.6845, + "step": 325 + }, + { + "epoch": 0.029760817966039802, + "grad_norm": 0.34867557883262634, + "learning_rate": 4.9998536822536835e-06, + "loss": 0.6914, + "step": 326 + }, + { + "epoch": 0.029852108818696366, + "grad_norm": 0.33491823077201843, + "learning_rate": 4.999852384554436e-06, + "loss": 0.7069, + "step": 327 + }, + { + "epoch": 0.02994339967135293, + "grad_norm": 0.36797264218330383, + "learning_rate": 4.999851081126057e-06, + "loss": 0.7254, + "step": 328 + }, + { + "epoch": 0.030034690524009495, + "grad_norm": 0.3744238018989563, + "learning_rate": 4.999849771968551e-06, + "loss": 0.7142, + "step": 329 + }, + { + "epoch": 0.03012598137666606, + "grad_norm": 0.3620997965335846, + "learning_rate": 4.999848457081921e-06, + "loss": 0.7138, + "step": 330 + }, + { + "epoch": 0.030217272229322623, + "grad_norm": 0.33243444561958313, + "learning_rate": 4.999847136466169e-06, + "loss": 0.6441, + "step": 331 + }, + { + "epoch": 0.030308563081979184, + "grad_norm": 0.33643439412117004, + "learning_rate": 4.999845810121298e-06, + "loss": 0.6932, + "step": 332 + }, + { + "epoch": 0.030399853934635748, + "grad_norm": 0.3533491790294647, + "learning_rate": 4.9998444780473125e-06, + "loss": 0.7214, + "step": 333 + }, + { + "epoch": 0.030491144787292312, + "grad_norm": 0.36705830693244934, + "learning_rate": 4.999843140244215e-06, + "loss": 0.6743, + "step": 334 + }, + { + "epoch": 0.030582435639948877, + "grad_norm": 0.36369651556015015, + "learning_rate": 4.999841796712007e-06, + "loss": 0.6859, + "step": 335 + }, + { + "epoch": 0.03067372649260544, + "grad_norm": 0.3776869475841522, + "learning_rate": 4.999840447450693e-06, + "loss": 0.6913, + "step": 336 + }, + { + "epoch": 0.030765017345262005, + "grad_norm": 0.3750239312648773, + "learning_rate": 4.999839092460277e-06, + "loss": 0.697, + "step": 337 + }, + { + "epoch": 0.03085630819791857, + "grad_norm": 0.37689825892448425, + "learning_rate": 4.99983773174076e-06, + "loss": 0.7207, + "step": 338 + }, + { + "epoch": 0.030947599050575134, + "grad_norm": 0.36609670519828796, + "learning_rate": 4.999836365292146e-06, + "loss": 0.692, + "step": 339 + }, + { + "epoch": 0.031038889903231698, + "grad_norm": 0.3562335968017578, + "learning_rate": 4.999834993114439e-06, + "loss": 0.6775, + "step": 340 + }, + { + "epoch": 0.03113018075588826, + "grad_norm": 0.36390963196754456, + "learning_rate": 4.999833615207641e-06, + "loss": 0.7181, + "step": 341 + }, + { + "epoch": 0.031221471608544823, + "grad_norm": 0.35182470083236694, + "learning_rate": 4.999832231571755e-06, + "loss": 0.6493, + "step": 342 + }, + { + "epoch": 0.03131276246120139, + "grad_norm": 0.37195345759391785, + "learning_rate": 4.999830842206786e-06, + "loss": 0.671, + "step": 343 + }, + { + "epoch": 0.031404053313857955, + "grad_norm": 0.3553541600704193, + "learning_rate": 4.999829447112735e-06, + "loss": 0.6949, + "step": 344 + }, + { + "epoch": 0.03149534416651451, + "grad_norm": 0.34279486536979675, + "learning_rate": 4.999828046289607e-06, + "loss": 0.6709, + "step": 345 + }, + { + "epoch": 0.031586635019171076, + "grad_norm": 0.3492174446582794, + "learning_rate": 4.999826639737403e-06, + "loss": 0.6988, + "step": 346 + }, + { + "epoch": 0.03167792587182764, + "grad_norm": 0.3541882336139679, + "learning_rate": 4.999825227456128e-06, + "loss": 0.6808, + "step": 347 + }, + { + "epoch": 0.031769216724484205, + "grad_norm": 0.356037974357605, + "learning_rate": 4.999823809445785e-06, + "loss": 0.6785, + "step": 348 + }, + { + "epoch": 0.03186050757714077, + "grad_norm": 0.34075769782066345, + "learning_rate": 4.9998223857063765e-06, + "loss": 0.7165, + "step": 349 + }, + { + "epoch": 0.03195179842979733, + "grad_norm": 0.38354727625846863, + "learning_rate": 4.999820956237907e-06, + "loss": 0.6762, + "step": 350 + }, + { + "epoch": 0.0320430892824539, + "grad_norm": 0.37635788321495056, + "learning_rate": 4.999819521040379e-06, + "loss": 0.6887, + "step": 351 + }, + { + "epoch": 0.03213438013511046, + "grad_norm": 0.37534934282302856, + "learning_rate": 4.999818080113795e-06, + "loss": 0.728, + "step": 352 + }, + { + "epoch": 0.032225670987767026, + "grad_norm": 0.3572161793708801, + "learning_rate": 4.99981663345816e-06, + "loss": 0.6868, + "step": 353 + }, + { + "epoch": 0.03231696184042359, + "grad_norm": 0.3523486852645874, + "learning_rate": 4.999815181073477e-06, + "loss": 0.6966, + "step": 354 + }, + { + "epoch": 0.032408252693080154, + "grad_norm": 0.3599279224872589, + "learning_rate": 4.999813722959747e-06, + "loss": 0.6671, + "step": 355 + }, + { + "epoch": 0.03249954354573672, + "grad_norm": 0.39559823274612427, + "learning_rate": 4.999812259116977e-06, + "loss": 0.6693, + "step": 356 + }, + { + "epoch": 0.03259083439839328, + "grad_norm": 0.353456974029541, + "learning_rate": 4.9998107895451665e-06, + "loss": 0.7233, + "step": 357 + }, + { + "epoch": 0.03268212525104985, + "grad_norm": 0.36995670199394226, + "learning_rate": 4.999809314244322e-06, + "loss": 0.672, + "step": 358 + }, + { + "epoch": 0.03277341610370641, + "grad_norm": 0.3305618166923523, + "learning_rate": 4.999807833214446e-06, + "loss": 0.6999, + "step": 359 + }, + { + "epoch": 0.032864706956362975, + "grad_norm": 0.3665432333946228, + "learning_rate": 4.999806346455541e-06, + "loss": 0.7161, + "step": 360 + }, + { + "epoch": 0.03295599780901954, + "grad_norm": 0.34642866253852844, + "learning_rate": 4.999804853967611e-06, + "loss": 0.6896, + "step": 361 + }, + { + "epoch": 0.0330472886616761, + "grad_norm": 0.3810369670391083, + "learning_rate": 4.9998033557506605e-06, + "loss": 0.7016, + "step": 362 + }, + { + "epoch": 0.03313857951433266, + "grad_norm": 0.39821478724479675, + "learning_rate": 4.999801851804691e-06, + "loss": 0.6818, + "step": 363 + }, + { + "epoch": 0.033229870366989225, + "grad_norm": 0.36369362473487854, + "learning_rate": 4.999800342129707e-06, + "loss": 0.7264, + "step": 364 + }, + { + "epoch": 0.03332116121964579, + "grad_norm": 0.36085808277130127, + "learning_rate": 4.999798826725712e-06, + "loss": 0.7072, + "step": 365 + }, + { + "epoch": 0.033412452072302354, + "grad_norm": 0.35711199045181274, + "learning_rate": 4.9997973055927094e-06, + "loss": 0.693, + "step": 366 + }, + { + "epoch": 0.03350374292495892, + "grad_norm": 0.40576687455177307, + "learning_rate": 4.999795778730702e-06, + "loss": 0.6765, + "step": 367 + }, + { + "epoch": 0.03359503377761548, + "grad_norm": 0.34541022777557373, + "learning_rate": 4.999794246139694e-06, + "loss": 0.6975, + "step": 368 + }, + { + "epoch": 0.033686324630272046, + "grad_norm": 0.3555791974067688, + "learning_rate": 4.999792707819688e-06, + "loss": 0.6791, + "step": 369 + }, + { + "epoch": 0.03377761548292861, + "grad_norm": 0.3587239384651184, + "learning_rate": 4.99979116377069e-06, + "loss": 0.6925, + "step": 370 + }, + { + "epoch": 0.033868906335585175, + "grad_norm": 0.35135895013809204, + "learning_rate": 4.999789613992701e-06, + "loss": 0.7057, + "step": 371 + }, + { + "epoch": 0.03396019718824174, + "grad_norm": 0.36076873540878296, + "learning_rate": 4.999788058485725e-06, + "loss": 0.6691, + "step": 372 + }, + { + "epoch": 0.0340514880408983, + "grad_norm": 0.38143211603164673, + "learning_rate": 4.999786497249767e-06, + "loss": 0.6948, + "step": 373 + }, + { + "epoch": 0.03414277889355487, + "grad_norm": 0.3760371804237366, + "learning_rate": 4.9997849302848285e-06, + "loss": 0.6957, + "step": 374 + }, + { + "epoch": 0.03423406974621143, + "grad_norm": 0.35466983914375305, + "learning_rate": 4.999783357590915e-06, + "loss": 0.6748, + "step": 375 + }, + { + "epoch": 0.034325360598867996, + "grad_norm": 0.34246063232421875, + "learning_rate": 4.9997817791680285e-06, + "loss": 0.6736, + "step": 376 + }, + { + "epoch": 0.03441665145152456, + "grad_norm": 0.36128711700439453, + "learning_rate": 4.9997801950161735e-06, + "loss": 0.7252, + "step": 377 + }, + { + "epoch": 0.034507942304181125, + "grad_norm": 0.42316222190856934, + "learning_rate": 4.9997786051353535e-06, + "loss": 0.6712, + "step": 378 + }, + { + "epoch": 0.03459923315683768, + "grad_norm": 0.39411211013793945, + "learning_rate": 4.999777009525572e-06, + "loss": 0.6601, + "step": 379 + }, + { + "epoch": 0.034690524009494246, + "grad_norm": 0.363391250371933, + "learning_rate": 4.999775408186833e-06, + "loss": 0.7223, + "step": 380 + }, + { + "epoch": 0.03478181486215081, + "grad_norm": 0.35315293073654175, + "learning_rate": 4.9997738011191395e-06, + "loss": 0.6936, + "step": 381 + }, + { + "epoch": 0.034873105714807374, + "grad_norm": 0.3528394103050232, + "learning_rate": 4.999772188322496e-06, + "loss": 0.6624, + "step": 382 + }, + { + "epoch": 0.03496439656746394, + "grad_norm": 0.3714195489883423, + "learning_rate": 4.999770569796906e-06, + "loss": 0.6607, + "step": 383 + }, + { + "epoch": 0.0350556874201205, + "grad_norm": 0.3905945420265198, + "learning_rate": 4.9997689455423724e-06, + "loss": 0.7041, + "step": 384 + }, + { + "epoch": 0.03514697827277707, + "grad_norm": 0.3491882383823395, + "learning_rate": 4.9997673155589e-06, + "loss": 0.675, + "step": 385 + }, + { + "epoch": 0.03523826912543363, + "grad_norm": 0.3592163622379303, + "learning_rate": 4.999765679846492e-06, + "loss": 0.6792, + "step": 386 + }, + { + "epoch": 0.035329559978090196, + "grad_norm": 0.4028419554233551, + "learning_rate": 4.999764038405153e-06, + "loss": 0.6666, + "step": 387 + }, + { + "epoch": 0.03542085083074676, + "grad_norm": 0.36772558093070984, + "learning_rate": 4.999762391234885e-06, + "loss": 0.6568, + "step": 388 + }, + { + "epoch": 0.035512141683403324, + "grad_norm": 0.37875691056251526, + "learning_rate": 4.999760738335693e-06, + "loss": 0.701, + "step": 389 + }, + { + "epoch": 0.03560343253605989, + "grad_norm": 0.368428111076355, + "learning_rate": 4.999759079707581e-06, + "loss": 0.6883, + "step": 390 + }, + { + "epoch": 0.03569472338871645, + "grad_norm": 0.38353976607322693, + "learning_rate": 4.9997574153505515e-06, + "loss": 0.674, + "step": 391 + }, + { + "epoch": 0.03578601424137302, + "grad_norm": 0.3652234375476837, + "learning_rate": 4.99975574526461e-06, + "loss": 0.6798, + "step": 392 + }, + { + "epoch": 0.03587730509402958, + "grad_norm": 0.35736221075057983, + "learning_rate": 4.999754069449759e-06, + "loss": 0.7143, + "step": 393 + }, + { + "epoch": 0.035968595946686145, + "grad_norm": 0.36247631907463074, + "learning_rate": 4.9997523879060035e-06, + "loss": 0.6925, + "step": 394 + }, + { + "epoch": 0.0360598867993427, + "grad_norm": 0.3840050995349884, + "learning_rate": 4.999750700633346e-06, + "loss": 0.6799, + "step": 395 + }, + { + "epoch": 0.03615117765199927, + "grad_norm": 0.37189099192619324, + "learning_rate": 4.999749007631791e-06, + "loss": 0.6919, + "step": 396 + }, + { + "epoch": 0.03624246850465583, + "grad_norm": 0.37432044744491577, + "learning_rate": 4.999747308901343e-06, + "loss": 0.7009, + "step": 397 + }, + { + "epoch": 0.036333759357312395, + "grad_norm": 0.3853381872177124, + "learning_rate": 4.999745604442006e-06, + "loss": 0.7063, + "step": 398 + }, + { + "epoch": 0.03642505020996896, + "grad_norm": 0.36421850323677063, + "learning_rate": 4.999743894253781e-06, + "loss": 0.6908, + "step": 399 + }, + { + "epoch": 0.036516341062625524, + "grad_norm": 0.3493221402168274, + "learning_rate": 4.999742178336676e-06, + "loss": 0.7165, + "step": 400 + }, + { + "epoch": 0.03660763191528209, + "grad_norm": 0.37548306584358215, + "learning_rate": 4.999740456690693e-06, + "loss": 0.6914, + "step": 401 + }, + { + "epoch": 0.03669892276793865, + "grad_norm": 0.3857927620410919, + "learning_rate": 4.999738729315835e-06, + "loss": 0.6819, + "step": 402 + }, + { + "epoch": 0.036790213620595216, + "grad_norm": 0.3681091070175171, + "learning_rate": 4.999736996212108e-06, + "loss": 0.6845, + "step": 403 + }, + { + "epoch": 0.03688150447325178, + "grad_norm": 0.3495480716228485, + "learning_rate": 4.999735257379514e-06, + "loss": 0.6587, + "step": 404 + }, + { + "epoch": 0.036972795325908345, + "grad_norm": 0.38181161880493164, + "learning_rate": 4.999733512818059e-06, + "loss": 0.6819, + "step": 405 + }, + { + "epoch": 0.03706408617856491, + "grad_norm": 0.3883487582206726, + "learning_rate": 4.999731762527746e-06, + "loss": 0.6705, + "step": 406 + }, + { + "epoch": 0.03715537703122147, + "grad_norm": 0.3847110867500305, + "learning_rate": 4.9997300065085785e-06, + "loss": 0.6602, + "step": 407 + }, + { + "epoch": 0.03724666788387804, + "grad_norm": 0.3719756603240967, + "learning_rate": 4.999728244760561e-06, + "loss": 0.6785, + "step": 408 + }, + { + "epoch": 0.0373379587365346, + "grad_norm": 0.3913652002811432, + "learning_rate": 4.9997264772836976e-06, + "loss": 0.6775, + "step": 409 + }, + { + "epoch": 0.037429249589191166, + "grad_norm": 0.33757948875427246, + "learning_rate": 4.999724704077992e-06, + "loss": 0.7046, + "step": 410 + }, + { + "epoch": 0.03752054044184773, + "grad_norm": 0.39537978172302246, + "learning_rate": 4.999722925143449e-06, + "loss": 0.7019, + "step": 411 + }, + { + "epoch": 0.03761183129450429, + "grad_norm": 0.39076751470565796, + "learning_rate": 4.999721140480072e-06, + "loss": 0.6756, + "step": 412 + }, + { + "epoch": 0.03770312214716085, + "grad_norm": 0.36871010065078735, + "learning_rate": 4.999719350087865e-06, + "loss": 0.6738, + "step": 413 + }, + { + "epoch": 0.037794412999817416, + "grad_norm": 0.36078548431396484, + "learning_rate": 4.999717553966833e-06, + "loss": 0.706, + "step": 414 + }, + { + "epoch": 0.03788570385247398, + "grad_norm": 0.3729201853275299, + "learning_rate": 4.99971575211698e-06, + "loss": 0.6623, + "step": 415 + }, + { + "epoch": 0.037976994705130544, + "grad_norm": 0.3847123384475708, + "learning_rate": 4.9997139445383084e-06, + "loss": 0.694, + "step": 416 + }, + { + "epoch": 0.03806828555778711, + "grad_norm": 0.3868594169616699, + "learning_rate": 4.999712131230823e-06, + "loss": 0.6709, + "step": 417 + }, + { + "epoch": 0.03815957641044367, + "grad_norm": 0.3817828893661499, + "learning_rate": 4.99971031219453e-06, + "loss": 0.6923, + "step": 418 + }, + { + "epoch": 0.03825086726310024, + "grad_norm": 0.3556261658668518, + "learning_rate": 4.999708487429432e-06, + "loss": 0.6671, + "step": 419 + }, + { + "epoch": 0.0383421581157568, + "grad_norm": 0.3654210567474365, + "learning_rate": 4.999706656935533e-06, + "loss": 0.6838, + "step": 420 + }, + { + "epoch": 0.038433448968413365, + "grad_norm": 0.36358246207237244, + "learning_rate": 4.999704820712837e-06, + "loss": 0.6971, + "step": 421 + }, + { + "epoch": 0.03852473982106993, + "grad_norm": 0.3677324652671814, + "learning_rate": 4.99970297876135e-06, + "loss": 0.6763, + "step": 422 + }, + { + "epoch": 0.038616030673726494, + "grad_norm": 0.39533933997154236, + "learning_rate": 4.999701131081074e-06, + "loss": 0.6837, + "step": 423 + }, + { + "epoch": 0.03870732152638306, + "grad_norm": 0.3865302503108978, + "learning_rate": 4.999699277672014e-06, + "loss": 0.7136, + "step": 424 + }, + { + "epoch": 0.03879861237903962, + "grad_norm": 0.3655787706375122, + "learning_rate": 4.999697418534175e-06, + "loss": 0.6585, + "step": 425 + }, + { + "epoch": 0.038889903231696187, + "grad_norm": 0.4029444754123688, + "learning_rate": 4.99969555366756e-06, + "loss": 0.6632, + "step": 426 + }, + { + "epoch": 0.03898119408435275, + "grad_norm": 0.39173975586891174, + "learning_rate": 4.9996936830721755e-06, + "loss": 0.6979, + "step": 427 + }, + { + "epoch": 0.039072484937009315, + "grad_norm": 0.34943887591362, + "learning_rate": 4.999691806748023e-06, + "loss": 0.6724, + "step": 428 + }, + { + "epoch": 0.03916377578966587, + "grad_norm": 0.36867791414260864, + "learning_rate": 4.999689924695108e-06, + "loss": 0.6872, + "step": 429 + }, + { + "epoch": 0.039255066642322436, + "grad_norm": 0.3453427851200104, + "learning_rate": 4.999688036913435e-06, + "loss": 0.7265, + "step": 430 + }, + { + "epoch": 0.039346357494979, + "grad_norm": 0.36396071314811707, + "learning_rate": 4.999686143403009e-06, + "loss": 0.6394, + "step": 431 + }, + { + "epoch": 0.039437648347635565, + "grad_norm": 0.35305026173591614, + "learning_rate": 4.999684244163833e-06, + "loss": 0.6679, + "step": 432 + }, + { + "epoch": 0.03952893920029213, + "grad_norm": 0.37896621227264404, + "learning_rate": 4.999682339195911e-06, + "loss": 0.6914, + "step": 433 + }, + { + "epoch": 0.03962023005294869, + "grad_norm": 0.37600573897361755, + "learning_rate": 4.999680428499249e-06, + "loss": 0.696, + "step": 434 + }, + { + "epoch": 0.03971152090560526, + "grad_norm": 0.37988701462745667, + "learning_rate": 4.999678512073851e-06, + "loss": 0.7051, + "step": 435 + }, + { + "epoch": 0.03980281175826182, + "grad_norm": 0.4026210606098175, + "learning_rate": 4.99967658991972e-06, + "loss": 0.6926, + "step": 436 + }, + { + "epoch": 0.039894102610918386, + "grad_norm": 0.416831374168396, + "learning_rate": 4.9996746620368616e-06, + "loss": 0.6429, + "step": 437 + }, + { + "epoch": 0.03998539346357495, + "grad_norm": 0.3703261613845825, + "learning_rate": 4.999672728425281e-06, + "loss": 0.6712, + "step": 438 + }, + { + "epoch": 0.040076684316231515, + "grad_norm": 0.35857829451560974, + "learning_rate": 4.999670789084981e-06, + "loss": 0.7033, + "step": 439 + }, + { + "epoch": 0.04016797516888808, + "grad_norm": 0.370822012424469, + "learning_rate": 4.999668844015967e-06, + "loss": 0.6736, + "step": 440 + }, + { + "epoch": 0.04025926602154464, + "grad_norm": 0.37433475255966187, + "learning_rate": 4.999666893218242e-06, + "loss": 0.6822, + "step": 441 + }, + { + "epoch": 0.04035055687420121, + "grad_norm": 0.39057478308677673, + "learning_rate": 4.999664936691813e-06, + "loss": 0.6767, + "step": 442 + }, + { + "epoch": 0.04044184772685777, + "grad_norm": 0.37698066234588623, + "learning_rate": 4.999662974436682e-06, + "loss": 0.6554, + "step": 443 + }, + { + "epoch": 0.040533138579514336, + "grad_norm": 0.3805365264415741, + "learning_rate": 4.999661006452856e-06, + "loss": 0.7162, + "step": 444 + }, + { + "epoch": 0.0406244294321709, + "grad_norm": 0.3632431924343109, + "learning_rate": 4.9996590327403375e-06, + "loss": 0.6537, + "step": 445 + }, + { + "epoch": 0.04071572028482746, + "grad_norm": 0.3913363218307495, + "learning_rate": 4.999657053299132e-06, + "loss": 0.6563, + "step": 446 + }, + { + "epoch": 0.04080701113748402, + "grad_norm": 0.40578633546829224, + "learning_rate": 4.999655068129243e-06, + "loss": 0.6944, + "step": 447 + }, + { + "epoch": 0.040898301990140586, + "grad_norm": 0.39672183990478516, + "learning_rate": 4.9996530772306756e-06, + "loss": 0.6927, + "step": 448 + }, + { + "epoch": 0.04098959284279715, + "grad_norm": 0.3730323612689972, + "learning_rate": 4.999651080603435e-06, + "loss": 0.6694, + "step": 449 + }, + { + "epoch": 0.041080883695453714, + "grad_norm": 0.3681950569152832, + "learning_rate": 4.999649078247525e-06, + "loss": 0.6546, + "step": 450 + }, + { + "epoch": 0.04117217454811028, + "grad_norm": 0.36713722348213196, + "learning_rate": 4.999647070162951e-06, + "loss": 0.7139, + "step": 451 + }, + { + "epoch": 0.04126346540076684, + "grad_norm": 0.40133756399154663, + "learning_rate": 4.9996450563497165e-06, + "loss": 0.6734, + "step": 452 + }, + { + "epoch": 0.04135475625342341, + "grad_norm": 0.3876374363899231, + "learning_rate": 4.9996430368078265e-06, + "loss": 0.6718, + "step": 453 + }, + { + "epoch": 0.04144604710607997, + "grad_norm": 0.3528994023799896, + "learning_rate": 4.999641011537286e-06, + "loss": 0.7064, + "step": 454 + }, + { + "epoch": 0.041537337958736535, + "grad_norm": 0.38584500551223755, + "learning_rate": 4.9996389805381e-06, + "loss": 0.6888, + "step": 455 + }, + { + "epoch": 0.0416286288113931, + "grad_norm": 0.3479042947292328, + "learning_rate": 4.999636943810272e-06, + "loss": 0.6647, + "step": 456 + }, + { + "epoch": 0.041719919664049664, + "grad_norm": 0.3856630027294159, + "learning_rate": 4.999634901353807e-06, + "loss": 0.6513, + "step": 457 + }, + { + "epoch": 0.04181121051670623, + "grad_norm": 0.3739722669124603, + "learning_rate": 4.999632853168711e-06, + "loss": 0.6704, + "step": 458 + }, + { + "epoch": 0.04190250136936279, + "grad_norm": 0.41796010732650757, + "learning_rate": 4.9996307992549865e-06, + "loss": 0.6889, + "step": 459 + }, + { + "epoch": 0.041993792222019356, + "grad_norm": 0.3790974020957947, + "learning_rate": 4.999628739612641e-06, + "loss": 0.6851, + "step": 460 + }, + { + "epoch": 0.04208508307467592, + "grad_norm": 0.370760977268219, + "learning_rate": 4.999626674241676e-06, + "loss": 0.6857, + "step": 461 + }, + { + "epoch": 0.042176373927332485, + "grad_norm": 0.3761022388935089, + "learning_rate": 4.999624603142098e-06, + "loss": 0.6846, + "step": 462 + }, + { + "epoch": 0.04226766477998904, + "grad_norm": 0.434345543384552, + "learning_rate": 4.999622526313912e-06, + "loss": 0.6872, + "step": 463 + }, + { + "epoch": 0.042358955632645606, + "grad_norm": 0.3558933436870575, + "learning_rate": 4.999620443757123e-06, + "loss": 0.6744, + "step": 464 + }, + { + "epoch": 0.04245024648530217, + "grad_norm": 0.4017431437969208, + "learning_rate": 4.999618355471734e-06, + "loss": 0.6798, + "step": 465 + }, + { + "epoch": 0.042541537337958735, + "grad_norm": 0.4058558940887451, + "learning_rate": 4.999616261457751e-06, + "loss": 0.6584, + "step": 466 + }, + { + "epoch": 0.0426328281906153, + "grad_norm": 0.38320180773735046, + "learning_rate": 4.999614161715178e-06, + "loss": 0.6905, + "step": 467 + }, + { + "epoch": 0.04272411904327186, + "grad_norm": 0.39639419317245483, + "learning_rate": 4.999612056244022e-06, + "loss": 0.6745, + "step": 468 + }, + { + "epoch": 0.04281540989592843, + "grad_norm": 0.4130030572414398, + "learning_rate": 4.999609945044286e-06, + "loss": 0.6773, + "step": 469 + }, + { + "epoch": 0.04290670074858499, + "grad_norm": 0.3733846843242645, + "learning_rate": 4.9996078281159745e-06, + "loss": 0.6847, + "step": 470 + }, + { + "epoch": 0.042997991601241556, + "grad_norm": 0.37136009335517883, + "learning_rate": 4.999605705459093e-06, + "loss": 0.7074, + "step": 471 + }, + { + "epoch": 0.04308928245389812, + "grad_norm": 0.3778153955936432, + "learning_rate": 4.999603577073647e-06, + "loss": 0.6808, + "step": 472 + }, + { + "epoch": 0.043180573306554684, + "grad_norm": 0.3687952756881714, + "learning_rate": 4.99960144295964e-06, + "loss": 0.6507, + "step": 473 + }, + { + "epoch": 0.04327186415921125, + "grad_norm": 0.3910067081451416, + "learning_rate": 4.999599303117079e-06, + "loss": 0.6116, + "step": 474 + }, + { + "epoch": 0.04336315501186781, + "grad_norm": 0.375323086977005, + "learning_rate": 4.999597157545965e-06, + "loss": 0.6547, + "step": 475 + }, + { + "epoch": 0.04345444586452438, + "grad_norm": 0.3774455487728119, + "learning_rate": 4.999595006246308e-06, + "loss": 0.6697, + "step": 476 + }, + { + "epoch": 0.04354573671718094, + "grad_norm": 0.39559146761894226, + "learning_rate": 4.999592849218109e-06, + "loss": 0.6697, + "step": 477 + }, + { + "epoch": 0.043637027569837505, + "grad_norm": 0.37322479486465454, + "learning_rate": 4.999590686461375e-06, + "loss": 0.7039, + "step": 478 + }, + { + "epoch": 0.04372831842249406, + "grad_norm": 0.3878616392612457, + "learning_rate": 4.99958851797611e-06, + "loss": 0.7185, + "step": 479 + }, + { + "epoch": 0.04381960927515063, + "grad_norm": 0.37686148285865784, + "learning_rate": 4.999586343762319e-06, + "loss": 0.7168, + "step": 480 + }, + { + "epoch": 0.04391090012780719, + "grad_norm": 0.4224311113357544, + "learning_rate": 4.999584163820007e-06, + "loss": 0.6455, + "step": 481 + }, + { + "epoch": 0.044002190980463755, + "grad_norm": 0.3955426514148712, + "learning_rate": 4.99958197814918e-06, + "loss": 0.6618, + "step": 482 + }, + { + "epoch": 0.04409348183312032, + "grad_norm": 0.4109545648097992, + "learning_rate": 4.999579786749842e-06, + "loss": 0.6797, + "step": 483 + }, + { + "epoch": 0.044184772685776884, + "grad_norm": 0.37886855006217957, + "learning_rate": 4.999577589621999e-06, + "loss": 0.6564, + "step": 484 + }, + { + "epoch": 0.04427606353843345, + "grad_norm": 0.38770952820777893, + "learning_rate": 4.999575386765654e-06, + "loss": 0.6424, + "step": 485 + }, + { + "epoch": 0.04436735439109001, + "grad_norm": 0.3846931457519531, + "learning_rate": 4.999573178180815e-06, + "loss": 0.694, + "step": 486 + }, + { + "epoch": 0.044458645243746577, + "grad_norm": 0.40086859464645386, + "learning_rate": 4.999570963867483e-06, + "loss": 0.6625, + "step": 487 + }, + { + "epoch": 0.04454993609640314, + "grad_norm": 0.36470767855644226, + "learning_rate": 4.999568743825667e-06, + "loss": 0.6866, + "step": 488 + }, + { + "epoch": 0.044641226949059705, + "grad_norm": 0.39908239245414734, + "learning_rate": 4.999566518055371e-06, + "loss": 0.6711, + "step": 489 + }, + { + "epoch": 0.04473251780171627, + "grad_norm": 0.39964717626571655, + "learning_rate": 4.999564286556599e-06, + "loss": 0.6625, + "step": 490 + }, + { + "epoch": 0.04482380865437283, + "grad_norm": 0.3753734827041626, + "learning_rate": 4.999562049329356e-06, + "loss": 0.6234, + "step": 491 + }, + { + "epoch": 0.0449150995070294, + "grad_norm": 0.40746021270751953, + "learning_rate": 4.999559806373649e-06, + "loss": 0.6577, + "step": 492 + }, + { + "epoch": 0.04500639035968596, + "grad_norm": 0.39094385504722595, + "learning_rate": 4.999557557689482e-06, + "loss": 0.6387, + "step": 493 + }, + { + "epoch": 0.045097681212342526, + "grad_norm": 0.38072606921195984, + "learning_rate": 4.999555303276859e-06, + "loss": 0.6579, + "step": 494 + }, + { + "epoch": 0.04518897206499909, + "grad_norm": 0.3812282383441925, + "learning_rate": 4.999553043135788e-06, + "loss": 0.6665, + "step": 495 + }, + { + "epoch": 0.04528026291765565, + "grad_norm": 0.4084235727787018, + "learning_rate": 4.9995507772662724e-06, + "loss": 0.6685, + "step": 496 + }, + { + "epoch": 0.04537155377031221, + "grad_norm": 0.3994607627391815, + "learning_rate": 4.999548505668317e-06, + "loss": 0.6635, + "step": 497 + }, + { + "epoch": 0.045462844622968776, + "grad_norm": 0.38658633828163147, + "learning_rate": 4.9995462283419274e-06, + "loss": 0.656, + "step": 498 + }, + { + "epoch": 0.04555413547562534, + "grad_norm": 0.3701649606227875, + "learning_rate": 4.999543945287109e-06, + "loss": 0.6483, + "step": 499 + }, + { + "epoch": 0.045645426328281904, + "grad_norm": 0.374792218208313, + "learning_rate": 4.999541656503867e-06, + "loss": 0.6647, + "step": 500 + }, + { + "epoch": 0.04573671718093847, + "grad_norm": 0.36866897344589233, + "learning_rate": 4.999539361992207e-06, + "loss": 0.7015, + "step": 501 + }, + { + "epoch": 0.04582800803359503, + "grad_norm": 0.4174692928791046, + "learning_rate": 4.999537061752135e-06, + "loss": 0.7039, + "step": 502 + }, + { + "epoch": 0.0459192988862516, + "grad_norm": 0.3656955659389496, + "learning_rate": 4.999534755783653e-06, + "loss": 0.697, + "step": 503 + }, + { + "epoch": 0.04601058973890816, + "grad_norm": 0.3659496009349823, + "learning_rate": 4.999532444086769e-06, + "loss": 0.6726, + "step": 504 + }, + { + "epoch": 0.046101880591564726, + "grad_norm": 0.39072421193122864, + "learning_rate": 4.999530126661488e-06, + "loss": 0.6705, + "step": 505 + }, + { + "epoch": 0.04619317144422129, + "grad_norm": 0.4200589954853058, + "learning_rate": 4.999527803507815e-06, + "loss": 0.6561, + "step": 506 + }, + { + "epoch": 0.046284462296877854, + "grad_norm": 0.39702388644218445, + "learning_rate": 4.999525474625757e-06, + "loss": 0.6986, + "step": 507 + }, + { + "epoch": 0.04637575314953442, + "grad_norm": 0.38718196749687195, + "learning_rate": 4.999523140015316e-06, + "loss": 0.6799, + "step": 508 + }, + { + "epoch": 0.04646704400219098, + "grad_norm": 0.38763147592544556, + "learning_rate": 4.9995207996765e-06, + "loss": 0.6846, + "step": 509 + }, + { + "epoch": 0.04655833485484755, + "grad_norm": 0.4033505618572235, + "learning_rate": 4.999518453609312e-06, + "loss": 0.6634, + "step": 510 + }, + { + "epoch": 0.04664962570750411, + "grad_norm": 0.36116817593574524, + "learning_rate": 4.99951610181376e-06, + "loss": 0.6986, + "step": 511 + }, + { + "epoch": 0.046740916560160675, + "grad_norm": 0.37798038125038147, + "learning_rate": 4.999513744289848e-06, + "loss": 0.6801, + "step": 512 + }, + { + "epoch": 0.04683220741281723, + "grad_norm": 0.41014623641967773, + "learning_rate": 4.999511381037582e-06, + "loss": 0.6453, + "step": 513 + }, + { + "epoch": 0.0469234982654738, + "grad_norm": 0.40599581599235535, + "learning_rate": 4.999509012056966e-06, + "loss": 0.7226, + "step": 514 + }, + { + "epoch": 0.04701478911813036, + "grad_norm": 0.39968791604042053, + "learning_rate": 4.999506637348007e-06, + "loss": 0.6728, + "step": 515 + }, + { + "epoch": 0.047106079970786925, + "grad_norm": 0.3950003683567047, + "learning_rate": 4.99950425691071e-06, + "loss": 0.624, + "step": 516 + }, + { + "epoch": 0.04719737082344349, + "grad_norm": 0.3597630262374878, + "learning_rate": 4.99950187074508e-06, + "loss": 0.72, + "step": 517 + }, + { + "epoch": 0.047288661676100054, + "grad_norm": 0.38048550486564636, + "learning_rate": 4.999499478851124e-06, + "loss": 0.6792, + "step": 518 + }, + { + "epoch": 0.04737995252875662, + "grad_norm": 0.38597938418388367, + "learning_rate": 4.9994970812288455e-06, + "loss": 0.6818, + "step": 519 + }, + { + "epoch": 0.04747124338141318, + "grad_norm": 0.3761007785797119, + "learning_rate": 4.9994946778782506e-06, + "loss": 0.6832, + "step": 520 + }, + { + "epoch": 0.047562534234069746, + "grad_norm": 0.40404102206230164, + "learning_rate": 4.9994922687993455e-06, + "loss": 0.6832, + "step": 521 + }, + { + "epoch": 0.04765382508672631, + "grad_norm": 0.4031407833099365, + "learning_rate": 4.9994898539921344e-06, + "loss": 0.6837, + "step": 522 + }, + { + "epoch": 0.047745115939382875, + "grad_norm": 0.38608264923095703, + "learning_rate": 4.999487433456624e-06, + "loss": 0.6798, + "step": 523 + }, + { + "epoch": 0.04783640679203944, + "grad_norm": 0.38452818989753723, + "learning_rate": 4.99948500719282e-06, + "loss": 0.6641, + "step": 524 + }, + { + "epoch": 0.047927697644696, + "grad_norm": 0.3845877945423126, + "learning_rate": 4.9994825752007275e-06, + "loss": 0.7282, + "step": 525 + }, + { + "epoch": 0.04801898849735257, + "grad_norm": 0.3817768394947052, + "learning_rate": 4.999480137480352e-06, + "loss": 0.6603, + "step": 526 + }, + { + "epoch": 0.04811027935000913, + "grad_norm": 0.3842001259326935, + "learning_rate": 4.999477694031699e-06, + "loss": 0.6721, + "step": 527 + }, + { + "epoch": 0.048201570202665696, + "grad_norm": 0.4070844054222107, + "learning_rate": 4.999475244854774e-06, + "loss": 0.6563, + "step": 528 + }, + { + "epoch": 0.04829286105532226, + "grad_norm": 0.4080294668674469, + "learning_rate": 4.999472789949583e-06, + "loss": 0.7057, + "step": 529 + }, + { + "epoch": 0.04838415190797882, + "grad_norm": 0.4031464755535126, + "learning_rate": 4.999470329316132e-06, + "loss": 0.6776, + "step": 530 + }, + { + "epoch": 0.04847544276063538, + "grad_norm": 0.3819552958011627, + "learning_rate": 4.9994678629544255e-06, + "loss": 0.6894, + "step": 531 + }, + { + "epoch": 0.048566733613291946, + "grad_norm": 0.42205560207366943, + "learning_rate": 4.999465390864469e-06, + "loss": 0.6775, + "step": 532 + }, + { + "epoch": 0.04865802446594851, + "grad_norm": 0.4073433578014374, + "learning_rate": 4.99946291304627e-06, + "loss": 0.6766, + "step": 533 + }, + { + "epoch": 0.048749315318605074, + "grad_norm": 0.4266825020313263, + "learning_rate": 4.999460429499833e-06, + "loss": 0.6618, + "step": 534 + }, + { + "epoch": 0.04884060617126164, + "grad_norm": 0.41335979104042053, + "learning_rate": 4.999457940225164e-06, + "loss": 0.7037, + "step": 535 + }, + { + "epoch": 0.0489318970239182, + "grad_norm": 0.3893336355686188, + "learning_rate": 4.9994554452222684e-06, + "loss": 0.6807, + "step": 536 + }, + { + "epoch": 0.04902318787657477, + "grad_norm": 0.3672478199005127, + "learning_rate": 4.999452944491152e-06, + "loss": 0.6743, + "step": 537 + }, + { + "epoch": 0.04911447872923133, + "grad_norm": 0.3806333839893341, + "learning_rate": 4.99945043803182e-06, + "loss": 0.6751, + "step": 538 + }, + { + "epoch": 0.049205769581887895, + "grad_norm": 0.38236045837402344, + "learning_rate": 4.999447925844279e-06, + "loss": 0.672, + "step": 539 + }, + { + "epoch": 0.04929706043454446, + "grad_norm": 0.3912815749645233, + "learning_rate": 4.9994454079285345e-06, + "loss": 0.7101, + "step": 540 + }, + { + "epoch": 0.049388351287201024, + "grad_norm": 0.3895753026008606, + "learning_rate": 4.999442884284593e-06, + "loss": 0.6732, + "step": 541 + }, + { + "epoch": 0.04947964213985759, + "grad_norm": 0.3938537538051605, + "learning_rate": 4.999440354912459e-06, + "loss": 0.6528, + "step": 542 + }, + { + "epoch": 0.04957093299251415, + "grad_norm": 0.39563897252082825, + "learning_rate": 4.999437819812139e-06, + "loss": 0.6437, + "step": 543 + }, + { + "epoch": 0.049662223845170717, + "grad_norm": 0.39320260286331177, + "learning_rate": 4.9994352789836376e-06, + "loss": 0.6539, + "step": 544 + }, + { + "epoch": 0.04975351469782728, + "grad_norm": 0.39298954606056213, + "learning_rate": 4.999432732426962e-06, + "loss": 0.6615, + "step": 545 + }, + { + "epoch": 0.049844805550483845, + "grad_norm": 0.39219993352890015, + "learning_rate": 4.999430180142118e-06, + "loss": 0.6696, + "step": 546 + }, + { + "epoch": 0.0499360964031404, + "grad_norm": 0.37926968932151794, + "learning_rate": 4.999427622129112e-06, + "loss": 0.7051, + "step": 547 + }, + { + "epoch": 0.050027387255796966, + "grad_norm": 0.4507634937763214, + "learning_rate": 4.999425058387948e-06, + "loss": 0.6701, + "step": 548 + }, + { + "epoch": 0.05011867810845353, + "grad_norm": 0.3923813998699188, + "learning_rate": 4.999422488918633e-06, + "loss": 0.7064, + "step": 549 + }, + { + "epoch": 0.050209968961110095, + "grad_norm": 0.3985123932361603, + "learning_rate": 4.999419913721173e-06, + "loss": 0.6642, + "step": 550 + }, + { + "epoch": 0.05030125981376666, + "grad_norm": 0.4269675016403198, + "learning_rate": 4.999417332795573e-06, + "loss": 0.6566, + "step": 551 + }, + { + "epoch": 0.05039255066642322, + "grad_norm": 0.37865403294563293, + "learning_rate": 4.99941474614184e-06, + "loss": 0.6716, + "step": 552 + }, + { + "epoch": 0.05048384151907979, + "grad_norm": 0.3681197166442871, + "learning_rate": 4.9994121537599794e-06, + "loss": 0.6824, + "step": 553 + }, + { + "epoch": 0.05057513237173635, + "grad_norm": 0.3967353403568268, + "learning_rate": 4.999409555649998e-06, + "loss": 0.6742, + "step": 554 + }, + { + "epoch": 0.050666423224392916, + "grad_norm": 0.407619833946228, + "learning_rate": 4.9994069518119e-06, + "loss": 0.6809, + "step": 555 + }, + { + "epoch": 0.05075771407704948, + "grad_norm": 0.41666415333747864, + "learning_rate": 4.999404342245693e-06, + "loss": 0.7039, + "step": 556 + }, + { + "epoch": 0.050849004929706045, + "grad_norm": 0.38353419303894043, + "learning_rate": 4.999401726951382e-06, + "loss": 0.6694, + "step": 557 + }, + { + "epoch": 0.05094029578236261, + "grad_norm": 0.41499319672584534, + "learning_rate": 4.999399105928973e-06, + "loss": 0.7016, + "step": 558 + }, + { + "epoch": 0.05103158663501917, + "grad_norm": 0.4226193428039551, + "learning_rate": 4.999396479178473e-06, + "loss": 0.6632, + "step": 559 + }, + { + "epoch": 0.05112287748767574, + "grad_norm": 0.3802546262741089, + "learning_rate": 4.999393846699887e-06, + "loss": 0.6594, + "step": 560 + }, + { + "epoch": 0.0512141683403323, + "grad_norm": 0.40158066153526306, + "learning_rate": 4.999391208493222e-06, + "loss": 0.6646, + "step": 561 + }, + { + "epoch": 0.051305459192988866, + "grad_norm": 0.3945000469684601, + "learning_rate": 4.999388564558483e-06, + "loss": 0.7175, + "step": 562 + }, + { + "epoch": 0.05139675004564542, + "grad_norm": 0.3774644434452057, + "learning_rate": 4.999385914895676e-06, + "loss": 0.6739, + "step": 563 + }, + { + "epoch": 0.05148804089830199, + "grad_norm": 0.422597736120224, + "learning_rate": 4.999383259504808e-06, + "loss": 0.6638, + "step": 564 + }, + { + "epoch": 0.05157933175095855, + "grad_norm": 0.38308289647102356, + "learning_rate": 4.9993805983858846e-06, + "loss": 0.658, + "step": 565 + }, + { + "epoch": 0.051670622603615116, + "grad_norm": 0.404683917760849, + "learning_rate": 4.9993779315389125e-06, + "loss": 0.6665, + "step": 566 + }, + { + "epoch": 0.05176191345627168, + "grad_norm": 0.3787073493003845, + "learning_rate": 4.999375258963897e-06, + "loss": 0.653, + "step": 567 + }, + { + "epoch": 0.051853204308928244, + "grad_norm": 0.3954494595527649, + "learning_rate": 4.999372580660844e-06, + "loss": 0.6814, + "step": 568 + }, + { + "epoch": 0.05194449516158481, + "grad_norm": 0.4210622012615204, + "learning_rate": 4.99936989662976e-06, + "loss": 0.661, + "step": 569 + }, + { + "epoch": 0.05203578601424137, + "grad_norm": 0.3972916901111603, + "learning_rate": 4.999367206870652e-06, + "loss": 0.6949, + "step": 570 + }, + { + "epoch": 0.05212707686689794, + "grad_norm": 0.3884393870830536, + "learning_rate": 4.999364511383525e-06, + "loss": 0.6925, + "step": 571 + }, + { + "epoch": 0.0522183677195545, + "grad_norm": 0.41106149554252625, + "learning_rate": 4.999361810168386e-06, + "loss": 0.7005, + "step": 572 + }, + { + "epoch": 0.052309658572211065, + "grad_norm": 0.4217515289783478, + "learning_rate": 4.999359103225241e-06, + "loss": 0.642, + "step": 573 + }, + { + "epoch": 0.05240094942486763, + "grad_norm": 0.41447100043296814, + "learning_rate": 4.9993563905540954e-06, + "loss": 0.6349, + "step": 574 + }, + { + "epoch": 0.052492240277524194, + "grad_norm": 0.37484225630760193, + "learning_rate": 4.999353672154957e-06, + "loss": 0.7129, + "step": 575 + }, + { + "epoch": 0.05258353113018076, + "grad_norm": 0.3948400020599365, + "learning_rate": 4.99935094802783e-06, + "loss": 0.6528, + "step": 576 + }, + { + "epoch": 0.05267482198283732, + "grad_norm": 0.4180794060230255, + "learning_rate": 4.999348218172722e-06, + "loss": 0.6751, + "step": 577 + }, + { + "epoch": 0.052766112835493886, + "grad_norm": 0.3829811215400696, + "learning_rate": 4.999345482589639e-06, + "loss": 0.6582, + "step": 578 + }, + { + "epoch": 0.05285740368815045, + "grad_norm": 0.4000038504600525, + "learning_rate": 4.999342741278588e-06, + "loss": 0.6327, + "step": 579 + }, + { + "epoch": 0.05294869454080701, + "grad_norm": 0.3872138261795044, + "learning_rate": 4.999339994239574e-06, + "loss": 0.6641, + "step": 580 + }, + { + "epoch": 0.05303998539346357, + "grad_norm": 0.4271261990070343, + "learning_rate": 4.9993372414726025e-06, + "loss": 0.6614, + "step": 581 + }, + { + "epoch": 0.053131276246120136, + "grad_norm": 0.4011830985546112, + "learning_rate": 4.999334482977683e-06, + "loss": 0.6711, + "step": 582 + }, + { + "epoch": 0.0532225670987767, + "grad_norm": 0.41854405403137207, + "learning_rate": 4.999331718754819e-06, + "loss": 0.6793, + "step": 583 + }, + { + "epoch": 0.053313857951433265, + "grad_norm": 0.4060238003730774, + "learning_rate": 4.999328948804018e-06, + "loss": 0.6706, + "step": 584 + }, + { + "epoch": 0.05340514880408983, + "grad_norm": 0.4323076009750366, + "learning_rate": 4.999326173125286e-06, + "loss": 0.6438, + "step": 585 + }, + { + "epoch": 0.05349643965674639, + "grad_norm": 0.4004110097885132, + "learning_rate": 4.99932339171863e-06, + "loss": 0.6539, + "step": 586 + }, + { + "epoch": 0.05358773050940296, + "grad_norm": 0.41786614060401917, + "learning_rate": 4.999320604584056e-06, + "loss": 0.6599, + "step": 587 + }, + { + "epoch": 0.05367902136205952, + "grad_norm": 0.39222252368927, + "learning_rate": 4.999317811721569e-06, + "loss": 0.648, + "step": 588 + }, + { + "epoch": 0.053770312214716086, + "grad_norm": 0.41459548473358154, + "learning_rate": 4.999315013131177e-06, + "loss": 0.6833, + "step": 589 + }, + { + "epoch": 0.05386160306737265, + "grad_norm": 0.426921546459198, + "learning_rate": 4.999312208812887e-06, + "loss": 0.6301, + "step": 590 + }, + { + "epoch": 0.053952893920029214, + "grad_norm": 0.40150561928749084, + "learning_rate": 4.999309398766704e-06, + "loss": 0.6718, + "step": 591 + }, + { + "epoch": 0.05404418477268578, + "grad_norm": 0.3878902196884155, + "learning_rate": 4.9993065829926355e-06, + "loss": 0.6851, + "step": 592 + }, + { + "epoch": 0.05413547562534234, + "grad_norm": 0.4116886258125305, + "learning_rate": 4.999303761490687e-06, + "loss": 0.6675, + "step": 593 + }, + { + "epoch": 0.05422676647799891, + "grad_norm": 0.3840506970882416, + "learning_rate": 4.999300934260865e-06, + "loss": 0.7254, + "step": 594 + }, + { + "epoch": 0.05431805733065547, + "grad_norm": 0.38283205032348633, + "learning_rate": 4.999298101303176e-06, + "loss": 0.7035, + "step": 595 + }, + { + "epoch": 0.054409348183312035, + "grad_norm": 0.43563112616539, + "learning_rate": 4.999295262617628e-06, + "loss": 0.6716, + "step": 596 + }, + { + "epoch": 0.05450063903596859, + "grad_norm": 0.39210352301597595, + "learning_rate": 4.9992924182042255e-06, + "loss": 0.6467, + "step": 597 + }, + { + "epoch": 0.05459192988862516, + "grad_norm": 0.40817517042160034, + "learning_rate": 4.999289568062976e-06, + "loss": 0.6949, + "step": 598 + }, + { + "epoch": 0.05468322074128172, + "grad_norm": 0.3996681869029999, + "learning_rate": 4.999286712193886e-06, + "loss": 0.648, + "step": 599 + }, + { + "epoch": 0.054774511593938285, + "grad_norm": 0.3801427185535431, + "learning_rate": 4.999283850596962e-06, + "loss": 0.6591, + "step": 600 + }, + { + "epoch": 0.05486580244659485, + "grad_norm": 0.4333913028240204, + "learning_rate": 4.99928098327221e-06, + "loss": 0.6639, + "step": 601 + }, + { + "epoch": 0.054957093299251414, + "grad_norm": 0.38112854957580566, + "learning_rate": 4.9992781102196375e-06, + "loss": 0.6712, + "step": 602 + }, + { + "epoch": 0.05504838415190798, + "grad_norm": 0.42062488198280334, + "learning_rate": 4.99927523143925e-06, + "loss": 0.6702, + "step": 603 + }, + { + "epoch": 0.05513967500456454, + "grad_norm": 0.42690691351890564, + "learning_rate": 4.999272346931055e-06, + "loss": 0.6737, + "step": 604 + }, + { + "epoch": 0.055230965857221107, + "grad_norm": 0.428337961435318, + "learning_rate": 4.99926945669506e-06, + "loss": 0.6559, + "step": 605 + }, + { + "epoch": 0.05532225670987767, + "grad_norm": 0.42687731981277466, + "learning_rate": 4.999266560731269e-06, + "loss": 0.6979, + "step": 606 + }, + { + "epoch": 0.055413547562534235, + "grad_norm": 0.4007745385169983, + "learning_rate": 4.999263659039691e-06, + "loss": 0.6967, + "step": 607 + }, + { + "epoch": 0.0555048384151908, + "grad_norm": 0.3912906050682068, + "learning_rate": 4.9992607516203315e-06, + "loss": 0.6502, + "step": 608 + }, + { + "epoch": 0.05559612926784736, + "grad_norm": 0.39106661081314087, + "learning_rate": 4.999257838473198e-06, + "loss": 0.675, + "step": 609 + }, + { + "epoch": 0.05568742012050393, + "grad_norm": 0.4223267436027527, + "learning_rate": 4.999254919598295e-06, + "loss": 0.6325, + "step": 610 + }, + { + "epoch": 0.05577871097316049, + "grad_norm": 0.38540908694267273, + "learning_rate": 4.999251994995633e-06, + "loss": 0.6941, + "step": 611 + }, + { + "epoch": 0.055870001825817056, + "grad_norm": 0.4410611093044281, + "learning_rate": 4.999249064665214e-06, + "loss": 0.6679, + "step": 612 + }, + { + "epoch": 0.05596129267847362, + "grad_norm": 0.41830044984817505, + "learning_rate": 4.999246128607049e-06, + "loss": 0.7004, + "step": 613 + }, + { + "epoch": 0.05605258353113018, + "grad_norm": 0.4376927614212036, + "learning_rate": 4.999243186821143e-06, + "loss": 0.6343, + "step": 614 + }, + { + "epoch": 0.05614387438378674, + "grad_norm": 0.4177927076816559, + "learning_rate": 4.999240239307502e-06, + "loss": 0.6629, + "step": 615 + }, + { + "epoch": 0.056235165236443306, + "grad_norm": 0.42397046089172363, + "learning_rate": 4.999237286066133e-06, + "loss": 0.6659, + "step": 616 + }, + { + "epoch": 0.05632645608909987, + "grad_norm": 0.40964049100875854, + "learning_rate": 4.999234327097045e-06, + "loss": 0.6903, + "step": 617 + }, + { + "epoch": 0.056417746941756435, + "grad_norm": 0.4204998314380646, + "learning_rate": 4.999231362400241e-06, + "loss": 0.6639, + "step": 618 + }, + { + "epoch": 0.056509037794413, + "grad_norm": 0.4298378527164459, + "learning_rate": 4.99922839197573e-06, + "loss": 0.6442, + "step": 619 + }, + { + "epoch": 0.05660032864706956, + "grad_norm": 0.4260886013507843, + "learning_rate": 4.99922541582352e-06, + "loss": 0.6808, + "step": 620 + }, + { + "epoch": 0.05669161949972613, + "grad_norm": 0.444267600774765, + "learning_rate": 4.999222433943615e-06, + "loss": 0.6444, + "step": 621 + }, + { + "epoch": 0.05678291035238269, + "grad_norm": 0.4174424707889557, + "learning_rate": 4.999219446336024e-06, + "loss": 0.6415, + "step": 622 + }, + { + "epoch": 0.056874201205039256, + "grad_norm": 0.3816477954387665, + "learning_rate": 4.999216453000753e-06, + "loss": 0.6462, + "step": 623 + }, + { + "epoch": 0.05696549205769582, + "grad_norm": 0.4213516414165497, + "learning_rate": 4.9992134539378085e-06, + "loss": 0.6627, + "step": 624 + }, + { + "epoch": 0.057056782910352384, + "grad_norm": 0.47928568720817566, + "learning_rate": 4.999210449147198e-06, + "loss": 0.6327, + "step": 625 + }, + { + "epoch": 0.05714807376300895, + "grad_norm": 0.42737165093421936, + "learning_rate": 4.999207438628928e-06, + "loss": 0.6268, + "step": 626 + }, + { + "epoch": 0.05723936461566551, + "grad_norm": 0.4267426133155823, + "learning_rate": 4.999204422383006e-06, + "loss": 0.6513, + "step": 627 + }, + { + "epoch": 0.05733065546832208, + "grad_norm": 0.39638569951057434, + "learning_rate": 4.999201400409439e-06, + "loss": 0.6727, + "step": 628 + }, + { + "epoch": 0.05742194632097864, + "grad_norm": 0.4031842052936554, + "learning_rate": 4.999198372708233e-06, + "loss": 0.6749, + "step": 629 + }, + { + "epoch": 0.057513237173635205, + "grad_norm": 0.4395941197872162, + "learning_rate": 4.999195339279395e-06, + "loss": 0.6483, + "step": 630 + }, + { + "epoch": 0.05760452802629176, + "grad_norm": 0.3938354253768921, + "learning_rate": 4.999192300122932e-06, + "loss": 0.6609, + "step": 631 + }, + { + "epoch": 0.05769581887894833, + "grad_norm": 0.451696515083313, + "learning_rate": 4.999189255238852e-06, + "loss": 0.7044, + "step": 632 + }, + { + "epoch": 0.05778710973160489, + "grad_norm": 0.4307000935077667, + "learning_rate": 4.9991862046271625e-06, + "loss": 0.6671, + "step": 633 + }, + { + "epoch": 0.057878400584261455, + "grad_norm": 0.39128443598747253, + "learning_rate": 4.999183148287867e-06, + "loss": 0.6614, + "step": 634 + }, + { + "epoch": 0.05796969143691802, + "grad_norm": 0.3900526165962219, + "learning_rate": 4.999180086220976e-06, + "loss": 0.6492, + "step": 635 + }, + { + "epoch": 0.058060982289574584, + "grad_norm": 0.4216470718383789, + "learning_rate": 4.999177018426495e-06, + "loss": 0.6601, + "step": 636 + }, + { + "epoch": 0.05815227314223115, + "grad_norm": 0.3997768461704254, + "learning_rate": 4.999173944904432e-06, + "loss": 0.6867, + "step": 637 + }, + { + "epoch": 0.05824356399488771, + "grad_norm": 0.41191112995147705, + "learning_rate": 4.999170865654792e-06, + "loss": 0.6336, + "step": 638 + }, + { + "epoch": 0.058334854847544276, + "grad_norm": 0.428680419921875, + "learning_rate": 4.999167780677584e-06, + "loss": 0.6953, + "step": 639 + }, + { + "epoch": 0.05842614570020084, + "grad_norm": 0.416694700717926, + "learning_rate": 4.999164689972815e-06, + "loss": 0.694, + "step": 640 + }, + { + "epoch": 0.058517436552857405, + "grad_norm": 0.42111021280288696, + "learning_rate": 4.999161593540491e-06, + "loss": 0.6628, + "step": 641 + }, + { + "epoch": 0.05860872740551397, + "grad_norm": 0.4283362627029419, + "learning_rate": 4.99915849138062e-06, + "loss": 0.6405, + "step": 642 + }, + { + "epoch": 0.05870001825817053, + "grad_norm": 0.39876052737236023, + "learning_rate": 4.999155383493209e-06, + "loss": 0.6751, + "step": 643 + }, + { + "epoch": 0.0587913091108271, + "grad_norm": 0.40818077325820923, + "learning_rate": 4.999152269878265e-06, + "loss": 0.6741, + "step": 644 + }, + { + "epoch": 0.05888259996348366, + "grad_norm": 0.41474419832229614, + "learning_rate": 4.999149150535794e-06, + "loss": 0.6734, + "step": 645 + }, + { + "epoch": 0.058973890816140226, + "grad_norm": 0.41929399967193604, + "learning_rate": 4.999146025465805e-06, + "loss": 0.652, + "step": 646 + }, + { + "epoch": 0.05906518166879678, + "grad_norm": 0.4249083697795868, + "learning_rate": 4.999142894668304e-06, + "loss": 0.6437, + "step": 647 + }, + { + "epoch": 0.05915647252145335, + "grad_norm": 0.40257468819618225, + "learning_rate": 4.999139758143299e-06, + "loss": 0.6421, + "step": 648 + }, + { + "epoch": 0.05924776337410991, + "grad_norm": 0.4070029556751251, + "learning_rate": 4.999136615890796e-06, + "loss": 0.6461, + "step": 649 + }, + { + "epoch": 0.059339054226766476, + "grad_norm": 0.45141464471817017, + "learning_rate": 4.999133467910804e-06, + "loss": 0.6477, + "step": 650 + }, + { + "epoch": 0.05943034507942304, + "grad_norm": 0.4280347228050232, + "learning_rate": 4.999130314203328e-06, + "loss": 0.6922, + "step": 651 + }, + { + "epoch": 0.059521635932079604, + "grad_norm": 0.4433148503303528, + "learning_rate": 4.999127154768378e-06, + "loss": 0.658, + "step": 652 + }, + { + "epoch": 0.05961292678473617, + "grad_norm": 0.3755834400653839, + "learning_rate": 4.999123989605957e-06, + "loss": 0.6612, + "step": 653 + }, + { + "epoch": 0.05970421763739273, + "grad_norm": 0.40714704990386963, + "learning_rate": 4.999120818716077e-06, + "loss": 0.6756, + "step": 654 + }, + { + "epoch": 0.0597955084900493, + "grad_norm": 0.41739970445632935, + "learning_rate": 4.9991176420987416e-06, + "loss": 0.6508, + "step": 655 + }, + { + "epoch": 0.05988679934270586, + "grad_norm": 0.4094506502151489, + "learning_rate": 4.999114459753961e-06, + "loss": 0.6423, + "step": 656 + }, + { + "epoch": 0.059978090195362425, + "grad_norm": 0.4132070541381836, + "learning_rate": 4.9991112716817404e-06, + "loss": 0.6818, + "step": 657 + }, + { + "epoch": 0.06006938104801899, + "grad_norm": 0.4311443269252777, + "learning_rate": 4.999108077882088e-06, + "loss": 0.66, + "step": 658 + }, + { + "epoch": 0.060160671900675554, + "grad_norm": 0.40546005964279175, + "learning_rate": 4.999104878355011e-06, + "loss": 0.6623, + "step": 659 + }, + { + "epoch": 0.06025196275333212, + "grad_norm": 0.41920965909957886, + "learning_rate": 4.999101673100516e-06, + "loss": 0.6447, + "step": 660 + }, + { + "epoch": 0.06034325360598868, + "grad_norm": 0.3860510587692261, + "learning_rate": 4.999098462118612e-06, + "loss": 0.6937, + "step": 661 + }, + { + "epoch": 0.06043454445864525, + "grad_norm": 0.4227907955646515, + "learning_rate": 4.999095245409305e-06, + "loss": 0.6752, + "step": 662 + }, + { + "epoch": 0.06052583531130181, + "grad_norm": 0.4212552607059479, + "learning_rate": 4.999092022972602e-06, + "loss": 0.6498, + "step": 663 + }, + { + "epoch": 0.06061712616395837, + "grad_norm": 0.39683133363723755, + "learning_rate": 4.999088794808512e-06, + "loss": 0.6674, + "step": 664 + }, + { + "epoch": 0.06070841701661493, + "grad_norm": 0.42421674728393555, + "learning_rate": 4.999085560917041e-06, + "loss": 0.6448, + "step": 665 + }, + { + "epoch": 0.060799707869271497, + "grad_norm": 0.4209863245487213, + "learning_rate": 4.999082321298198e-06, + "loss": 0.6574, + "step": 666 + }, + { + "epoch": 0.06089099872192806, + "grad_norm": 0.41120749711990356, + "learning_rate": 4.999079075951988e-06, + "loss": 0.7116, + "step": 667 + }, + { + "epoch": 0.060982289574584625, + "grad_norm": 0.3923589289188385, + "learning_rate": 4.99907582487842e-06, + "loss": 0.6885, + "step": 668 + }, + { + "epoch": 0.06107358042724119, + "grad_norm": 0.39750978350639343, + "learning_rate": 4.999072568077502e-06, + "loss": 0.6772, + "step": 669 + }, + { + "epoch": 0.06116487127989775, + "grad_norm": 0.40812546014785767, + "learning_rate": 4.99906930554924e-06, + "loss": 0.648, + "step": 670 + }, + { + "epoch": 0.06125616213255432, + "grad_norm": 0.41468915343284607, + "learning_rate": 4.999066037293643e-06, + "loss": 0.6838, + "step": 671 + }, + { + "epoch": 0.06134745298521088, + "grad_norm": 0.3917642831802368, + "learning_rate": 4.999062763310716e-06, + "loss": 0.6787, + "step": 672 + }, + { + "epoch": 0.061438743837867446, + "grad_norm": 0.4405134916305542, + "learning_rate": 4.99905948360047e-06, + "loss": 0.6749, + "step": 673 + }, + { + "epoch": 0.06153003469052401, + "grad_norm": 0.37939807772636414, + "learning_rate": 4.99905619816291e-06, + "loss": 0.6661, + "step": 674 + }, + { + "epoch": 0.061621325543180575, + "grad_norm": 0.41962674260139465, + "learning_rate": 4.999052906998044e-06, + "loss": 0.6527, + "step": 675 + }, + { + "epoch": 0.06171261639583714, + "grad_norm": 0.41839006543159485, + "learning_rate": 4.999049610105879e-06, + "loss": 0.6495, + "step": 676 + }, + { + "epoch": 0.0618039072484937, + "grad_norm": 0.4372313320636749, + "learning_rate": 4.999046307486425e-06, + "loss": 0.6647, + "step": 677 + }, + { + "epoch": 0.06189519810115027, + "grad_norm": 0.4331560730934143, + "learning_rate": 4.999042999139687e-06, + "loss": 0.6596, + "step": 678 + }, + { + "epoch": 0.06198648895380683, + "grad_norm": 0.413308709859848, + "learning_rate": 4.999039685065674e-06, + "loss": 0.6984, + "step": 679 + }, + { + "epoch": 0.062077779806463396, + "grad_norm": 0.4319170415401459, + "learning_rate": 4.9990363652643924e-06, + "loss": 0.6505, + "step": 680 + }, + { + "epoch": 0.06216907065911995, + "grad_norm": 0.39381420612335205, + "learning_rate": 4.9990330397358506e-06, + "loss": 0.6981, + "step": 681 + }, + { + "epoch": 0.06226036151177652, + "grad_norm": 0.41272974014282227, + "learning_rate": 4.9990297084800564e-06, + "loss": 0.6929, + "step": 682 + }, + { + "epoch": 0.06235165236443308, + "grad_norm": 0.4115516245365143, + "learning_rate": 4.999026371497017e-06, + "loss": 0.6567, + "step": 683 + }, + { + "epoch": 0.062442943217089646, + "grad_norm": 0.4041133522987366, + "learning_rate": 4.99902302878674e-06, + "loss": 0.6676, + "step": 684 + }, + { + "epoch": 0.06253423406974622, + "grad_norm": 0.4020182490348816, + "learning_rate": 4.999019680349233e-06, + "loss": 0.6508, + "step": 685 + }, + { + "epoch": 0.06262552492240278, + "grad_norm": 0.42469966411590576, + "learning_rate": 4.999016326184505e-06, + "loss": 0.6933, + "step": 686 + }, + { + "epoch": 0.06271681577505935, + "grad_norm": 0.43353283405303955, + "learning_rate": 4.9990129662925615e-06, + "loss": 0.665, + "step": 687 + }, + { + "epoch": 0.06280810662771591, + "grad_norm": 0.40704643726348877, + "learning_rate": 4.9990096006734115e-06, + "loss": 0.6652, + "step": 688 + }, + { + "epoch": 0.06289939748037246, + "grad_norm": 0.43241870403289795, + "learning_rate": 4.999006229327063e-06, + "loss": 0.646, + "step": 689 + }, + { + "epoch": 0.06299068833302902, + "grad_norm": 0.4392887353897095, + "learning_rate": 4.9990028522535226e-06, + "loss": 0.6542, + "step": 690 + }, + { + "epoch": 0.06308197918568559, + "grad_norm": 0.42760583758354187, + "learning_rate": 4.998999469452799e-06, + "loss": 0.6236, + "step": 691 + }, + { + "epoch": 0.06317327003834215, + "grad_norm": 0.42326483130455017, + "learning_rate": 4.998996080924899e-06, + "loss": 0.6198, + "step": 692 + }, + { + "epoch": 0.06326456089099872, + "grad_norm": 0.42503899335861206, + "learning_rate": 4.998992686669833e-06, + "loss": 0.6546, + "step": 693 + }, + { + "epoch": 0.06335585174365528, + "grad_norm": 0.3801426887512207, + "learning_rate": 4.998989286687605e-06, + "loss": 0.6506, + "step": 694 + }, + { + "epoch": 0.06344714259631185, + "grad_norm": 0.4177113175392151, + "learning_rate": 4.998985880978224e-06, + "loss": 0.658, + "step": 695 + }, + { + "epoch": 0.06353843344896841, + "grad_norm": 0.42337295413017273, + "learning_rate": 4.9989824695417e-06, + "loss": 0.6627, + "step": 696 + }, + { + "epoch": 0.06362972430162497, + "grad_norm": 0.3977207839488983, + "learning_rate": 4.998979052378038e-06, + "loss": 0.6403, + "step": 697 + }, + { + "epoch": 0.06372101515428154, + "grad_norm": 0.40340423583984375, + "learning_rate": 4.9989756294872476e-06, + "loss": 0.6615, + "step": 698 + }, + { + "epoch": 0.0638123060069381, + "grad_norm": 0.40903088450431824, + "learning_rate": 4.998972200869336e-06, + "loss": 0.6911, + "step": 699 + }, + { + "epoch": 0.06390359685959467, + "grad_norm": 0.4132124185562134, + "learning_rate": 4.998968766524311e-06, + "loss": 0.6303, + "step": 700 + }, + { + "epoch": 0.06399488771225123, + "grad_norm": 0.422931045293808, + "learning_rate": 4.998965326452181e-06, + "loss": 0.6765, + "step": 701 + }, + { + "epoch": 0.0640861785649078, + "grad_norm": 0.4331187903881073, + "learning_rate": 4.998961880652953e-06, + "loss": 0.6291, + "step": 702 + }, + { + "epoch": 0.06417746941756436, + "grad_norm": 0.43487292528152466, + "learning_rate": 4.9989584291266354e-06, + "loss": 0.6604, + "step": 703 + }, + { + "epoch": 0.06426876027022092, + "grad_norm": 0.41108694672584534, + "learning_rate": 4.998954971873237e-06, + "loss": 0.6915, + "step": 704 + }, + { + "epoch": 0.06436005112287749, + "grad_norm": 0.4049113392829895, + "learning_rate": 4.998951508892763e-06, + "loss": 0.6849, + "step": 705 + }, + { + "epoch": 0.06445134197553405, + "grad_norm": 0.4071425199508667, + "learning_rate": 4.998948040185224e-06, + "loss": 0.6706, + "step": 706 + }, + { + "epoch": 0.06454263282819062, + "grad_norm": 0.4004170894622803, + "learning_rate": 4.998944565750628e-06, + "loss": 0.6786, + "step": 707 + }, + { + "epoch": 0.06463392368084718, + "grad_norm": 0.4338032901287079, + "learning_rate": 4.998941085588981e-06, + "loss": 0.6445, + "step": 708 + }, + { + "epoch": 0.06472521453350374, + "grad_norm": 0.4187476634979248, + "learning_rate": 4.998937599700292e-06, + "loss": 0.6431, + "step": 709 + }, + { + "epoch": 0.06481650538616031, + "grad_norm": 0.40085330605506897, + "learning_rate": 4.99893410808457e-06, + "loss": 0.6722, + "step": 710 + }, + { + "epoch": 0.06490779623881687, + "grad_norm": 0.394779235124588, + "learning_rate": 4.998930610741821e-06, + "loss": 0.6469, + "step": 711 + }, + { + "epoch": 0.06499908709147344, + "grad_norm": 0.4032870829105377, + "learning_rate": 4.9989271076720544e-06, + "loss": 0.6586, + "step": 712 + }, + { + "epoch": 0.06509037794413, + "grad_norm": 0.4559696316719055, + "learning_rate": 4.998923598875278e-06, + "loss": 0.6283, + "step": 713 + }, + { + "epoch": 0.06518166879678657, + "grad_norm": 0.4183897376060486, + "learning_rate": 4.9989200843515e-06, + "loss": 0.6908, + "step": 714 + }, + { + "epoch": 0.06527295964944313, + "grad_norm": 0.4126933217048645, + "learning_rate": 4.9989165641007275e-06, + "loss": 0.6488, + "step": 715 + }, + { + "epoch": 0.0653642505020997, + "grad_norm": 0.40130388736724854, + "learning_rate": 4.99891303812297e-06, + "loss": 0.6498, + "step": 716 + }, + { + "epoch": 0.06545554135475626, + "grad_norm": 0.42422401905059814, + "learning_rate": 4.9989095064182345e-06, + "loss": 0.6444, + "step": 717 + }, + { + "epoch": 0.06554683220741282, + "grad_norm": 0.41207119822502136, + "learning_rate": 4.9989059689865284e-06, + "loss": 0.6394, + "step": 718 + }, + { + "epoch": 0.06563812306006939, + "grad_norm": 0.4167602062225342, + "learning_rate": 4.9989024258278615e-06, + "loss": 0.647, + "step": 719 + }, + { + "epoch": 0.06572941391272595, + "grad_norm": 0.395680695772171, + "learning_rate": 4.998898876942242e-06, + "loss": 0.656, + "step": 720 + }, + { + "epoch": 0.06582070476538252, + "grad_norm": 0.4223659336566925, + "learning_rate": 4.9988953223296765e-06, + "loss": 0.6176, + "step": 721 + }, + { + "epoch": 0.06591199561803908, + "grad_norm": 0.41858914494514465, + "learning_rate": 4.9988917619901745e-06, + "loss": 0.6889, + "step": 722 + }, + { + "epoch": 0.06600328647069563, + "grad_norm": 0.39375895261764526, + "learning_rate": 4.998888195923743e-06, + "loss": 0.6626, + "step": 723 + }, + { + "epoch": 0.0660945773233522, + "grad_norm": 0.4366607666015625, + "learning_rate": 4.99888462413039e-06, + "loss": 0.6443, + "step": 724 + }, + { + "epoch": 0.06618586817600876, + "grad_norm": 0.40507620573043823, + "learning_rate": 4.998881046610125e-06, + "loss": 0.645, + "step": 725 + }, + { + "epoch": 0.06627715902866532, + "grad_norm": 0.4083855450153351, + "learning_rate": 4.998877463362957e-06, + "loss": 0.6773, + "step": 726 + }, + { + "epoch": 0.06636844988132189, + "grad_norm": 0.44155335426330566, + "learning_rate": 4.998873874388892e-06, + "loss": 0.6666, + "step": 727 + }, + { + "epoch": 0.06645974073397845, + "grad_norm": 0.4565143883228302, + "learning_rate": 4.998870279687938e-06, + "loss": 0.6651, + "step": 728 + }, + { + "epoch": 0.06655103158663501, + "grad_norm": 0.4382263422012329, + "learning_rate": 4.998866679260106e-06, + "loss": 0.6528, + "step": 729 + }, + { + "epoch": 0.06664232243929158, + "grad_norm": 0.417591392993927, + "learning_rate": 4.998863073105401e-06, + "loss": 0.6978, + "step": 730 + }, + { + "epoch": 0.06673361329194814, + "grad_norm": 0.40352457761764526, + "learning_rate": 4.998859461223834e-06, + "loss": 0.6714, + "step": 731 + }, + { + "epoch": 0.06682490414460471, + "grad_norm": 0.4211982190608978, + "learning_rate": 4.9988558436154115e-06, + "loss": 0.65, + "step": 732 + }, + { + "epoch": 0.06691619499726127, + "grad_norm": 0.41649124026298523, + "learning_rate": 4.998852220280143e-06, + "loss": 0.6427, + "step": 733 + }, + { + "epoch": 0.06700748584991784, + "grad_norm": 0.4176290035247803, + "learning_rate": 4.998848591218035e-06, + "loss": 0.6557, + "step": 734 + }, + { + "epoch": 0.0670987767025744, + "grad_norm": 0.4221097230911255, + "learning_rate": 4.998844956429098e-06, + "loss": 0.6785, + "step": 735 + }, + { + "epoch": 0.06719006755523096, + "grad_norm": 0.41179144382476807, + "learning_rate": 4.998841315913339e-06, + "loss": 0.6283, + "step": 736 + }, + { + "epoch": 0.06728135840788753, + "grad_norm": 0.3966716229915619, + "learning_rate": 4.998837669670767e-06, + "loss": 0.6652, + "step": 737 + }, + { + "epoch": 0.06737264926054409, + "grad_norm": 0.41322916746139526, + "learning_rate": 4.998834017701389e-06, + "loss": 0.6611, + "step": 738 + }, + { + "epoch": 0.06746394011320066, + "grad_norm": 0.40228933095932007, + "learning_rate": 4.9988303600052155e-06, + "loss": 0.6888, + "step": 739 + }, + { + "epoch": 0.06755523096585722, + "grad_norm": 0.417727530002594, + "learning_rate": 4.998826696582254e-06, + "loss": 0.6855, + "step": 740 + }, + { + "epoch": 0.06764652181851379, + "grad_norm": 0.44023311138153076, + "learning_rate": 4.998823027432512e-06, + "loss": 0.6538, + "step": 741 + }, + { + "epoch": 0.06773781267117035, + "grad_norm": 0.4468768537044525, + "learning_rate": 4.998819352555998e-06, + "loss": 0.677, + "step": 742 + }, + { + "epoch": 0.06782910352382691, + "grad_norm": 0.4186881184577942, + "learning_rate": 4.9988156719527224e-06, + "loss": 0.6457, + "step": 743 + }, + { + "epoch": 0.06792039437648348, + "grad_norm": 0.40906471014022827, + "learning_rate": 4.998811985622691e-06, + "loss": 0.6349, + "step": 744 + }, + { + "epoch": 0.06801168522914004, + "grad_norm": 0.4055098295211792, + "learning_rate": 4.998808293565914e-06, + "loss": 0.6762, + "step": 745 + }, + { + "epoch": 0.0681029760817966, + "grad_norm": 0.4226076304912567, + "learning_rate": 4.9988045957824e-06, + "loss": 0.6425, + "step": 746 + }, + { + "epoch": 0.06819426693445317, + "grad_norm": 0.4006712734699249, + "learning_rate": 4.9988008922721565e-06, + "loss": 0.6822, + "step": 747 + }, + { + "epoch": 0.06828555778710974, + "grad_norm": 0.44449305534362793, + "learning_rate": 4.9987971830351925e-06, + "loss": 0.6393, + "step": 748 + }, + { + "epoch": 0.0683768486397663, + "grad_norm": 0.4119352102279663, + "learning_rate": 4.998793468071516e-06, + "loss": 0.6182, + "step": 749 + }, + { + "epoch": 0.06846813949242286, + "grad_norm": 0.41106319427490234, + "learning_rate": 4.998789747381136e-06, + "loss": 0.6564, + "step": 750 + }, + { + "epoch": 0.06855943034507943, + "grad_norm": 0.45847031474113464, + "learning_rate": 4.99878602096406e-06, + "loss": 0.637, + "step": 751 + }, + { + "epoch": 0.06865072119773599, + "grad_norm": 0.4267105758190155, + "learning_rate": 4.998782288820298e-06, + "loss": 0.6198, + "step": 752 + }, + { + "epoch": 0.06874201205039256, + "grad_norm": 0.4056825637817383, + "learning_rate": 4.9987785509498575e-06, + "loss": 0.6303, + "step": 753 + }, + { + "epoch": 0.06883330290304912, + "grad_norm": 0.41392162442207336, + "learning_rate": 4.998774807352749e-06, + "loss": 0.6365, + "step": 754 + }, + { + "epoch": 0.06892459375570568, + "grad_norm": 0.40870603919029236, + "learning_rate": 4.998771058028978e-06, + "loss": 0.6641, + "step": 755 + }, + { + "epoch": 0.06901588460836225, + "grad_norm": 0.4581434428691864, + "learning_rate": 4.998767302978556e-06, + "loss": 0.625, + "step": 756 + }, + { + "epoch": 0.0691071754610188, + "grad_norm": 0.4203435182571411, + "learning_rate": 4.998763542201489e-06, + "loss": 0.6598, + "step": 757 + }, + { + "epoch": 0.06919846631367536, + "grad_norm": 0.42250069975852966, + "learning_rate": 4.998759775697788e-06, + "loss": 0.6816, + "step": 758 + }, + { + "epoch": 0.06928975716633193, + "grad_norm": 0.4333309233188629, + "learning_rate": 4.99875600346746e-06, + "loss": 0.6385, + "step": 759 + }, + { + "epoch": 0.06938104801898849, + "grad_norm": 0.4367373585700989, + "learning_rate": 4.9987522255105145e-06, + "loss": 0.614, + "step": 760 + }, + { + "epoch": 0.06947233887164506, + "grad_norm": 0.4396708607673645, + "learning_rate": 4.998748441826959e-06, + "loss": 0.6952, + "step": 761 + }, + { + "epoch": 0.06956362972430162, + "grad_norm": 0.42351800203323364, + "learning_rate": 4.9987446524168045e-06, + "loss": 0.6407, + "step": 762 + }, + { + "epoch": 0.06965492057695818, + "grad_norm": 0.4143843948841095, + "learning_rate": 4.9987408572800574e-06, + "loss": 0.6594, + "step": 763 + }, + { + "epoch": 0.06974621142961475, + "grad_norm": 0.39067479968070984, + "learning_rate": 4.998737056416727e-06, + "loss": 0.6475, + "step": 764 + }, + { + "epoch": 0.06983750228227131, + "grad_norm": 0.4436683654785156, + "learning_rate": 4.998733249826823e-06, + "loss": 0.6387, + "step": 765 + }, + { + "epoch": 0.06992879313492788, + "grad_norm": 0.4109380543231964, + "learning_rate": 4.998729437510353e-06, + "loss": 0.6442, + "step": 766 + }, + { + "epoch": 0.07002008398758444, + "grad_norm": 0.4183235764503479, + "learning_rate": 4.998725619467326e-06, + "loss": 0.6598, + "step": 767 + }, + { + "epoch": 0.070111374840241, + "grad_norm": 0.4247523546218872, + "learning_rate": 4.998721795697751e-06, + "loss": 0.6522, + "step": 768 + }, + { + "epoch": 0.07020266569289757, + "grad_norm": 0.44446080923080444, + "learning_rate": 4.9987179662016365e-06, + "loss": 0.6687, + "step": 769 + }, + { + "epoch": 0.07029395654555413, + "grad_norm": 0.4448980689048767, + "learning_rate": 4.998714130978991e-06, + "loss": 0.6674, + "step": 770 + }, + { + "epoch": 0.0703852473982107, + "grad_norm": 0.4220661520957947, + "learning_rate": 4.998710290029823e-06, + "loss": 0.6188, + "step": 771 + }, + { + "epoch": 0.07047653825086726, + "grad_norm": 0.42888668179512024, + "learning_rate": 4.9987064433541435e-06, + "loss": 0.6491, + "step": 772 + }, + { + "epoch": 0.07056782910352383, + "grad_norm": 0.45922061800956726, + "learning_rate": 4.998702590951959e-06, + "loss": 0.6475, + "step": 773 + }, + { + "epoch": 0.07065911995618039, + "grad_norm": 0.45889610052108765, + "learning_rate": 4.99869873282328e-06, + "loss": 0.635, + "step": 774 + }, + { + "epoch": 0.07075041080883696, + "grad_norm": 0.41889381408691406, + "learning_rate": 4.998694868968113e-06, + "loss": 0.6405, + "step": 775 + }, + { + "epoch": 0.07084170166149352, + "grad_norm": 0.38503319025039673, + "learning_rate": 4.998690999386468e-06, + "loss": 0.6863, + "step": 776 + }, + { + "epoch": 0.07093299251415008, + "grad_norm": 0.42855051159858704, + "learning_rate": 4.998687124078356e-06, + "loss": 0.6344, + "step": 777 + }, + { + "epoch": 0.07102428336680665, + "grad_norm": 0.4483852684497833, + "learning_rate": 4.9986832430437835e-06, + "loss": 0.6391, + "step": 778 + }, + { + "epoch": 0.07111557421946321, + "grad_norm": 0.45960259437561035, + "learning_rate": 4.998679356282759e-06, + "loss": 0.6526, + "step": 779 + }, + { + "epoch": 0.07120686507211978, + "grad_norm": 0.4250592887401581, + "learning_rate": 4.998675463795294e-06, + "loss": 0.6861, + "step": 780 + }, + { + "epoch": 0.07129815592477634, + "grad_norm": 0.42064303159713745, + "learning_rate": 4.998671565581394e-06, + "loss": 0.6582, + "step": 781 + }, + { + "epoch": 0.0713894467774329, + "grad_norm": 0.3982466757297516, + "learning_rate": 4.998667661641071e-06, + "loss": 0.6708, + "step": 782 + }, + { + "epoch": 0.07148073763008947, + "grad_norm": 0.4357706904411316, + "learning_rate": 4.998663751974332e-06, + "loss": 0.6765, + "step": 783 + }, + { + "epoch": 0.07157202848274603, + "grad_norm": 0.4253442883491516, + "learning_rate": 4.998659836581187e-06, + "loss": 0.6304, + "step": 784 + }, + { + "epoch": 0.0716633193354026, + "grad_norm": 0.4361851215362549, + "learning_rate": 4.998655915461644e-06, + "loss": 0.6583, + "step": 785 + }, + { + "epoch": 0.07175461018805916, + "grad_norm": 0.4567020535469055, + "learning_rate": 4.998651988615713e-06, + "loss": 0.6393, + "step": 786 + }, + { + "epoch": 0.07184590104071573, + "grad_norm": 0.4210924804210663, + "learning_rate": 4.998648056043403e-06, + "loss": 0.6663, + "step": 787 + }, + { + "epoch": 0.07193719189337229, + "grad_norm": 0.45269861817359924, + "learning_rate": 4.998644117744723e-06, + "loss": 0.6442, + "step": 788 + }, + { + "epoch": 0.07202848274602885, + "grad_norm": 0.41953492164611816, + "learning_rate": 4.99864017371968e-06, + "loss": 0.6631, + "step": 789 + }, + { + "epoch": 0.0721197735986854, + "grad_norm": 0.44697579741477966, + "learning_rate": 4.998636223968286e-06, + "loss": 0.6633, + "step": 790 + }, + { + "epoch": 0.07221106445134197, + "grad_norm": 0.40691617131233215, + "learning_rate": 4.998632268490548e-06, + "loss": 0.6297, + "step": 791 + }, + { + "epoch": 0.07230235530399853, + "grad_norm": 0.41908660531044006, + "learning_rate": 4.998628307286476e-06, + "loss": 0.6548, + "step": 792 + }, + { + "epoch": 0.0723936461566551, + "grad_norm": 0.40833842754364014, + "learning_rate": 4.998624340356079e-06, + "loss": 0.6576, + "step": 793 + }, + { + "epoch": 0.07248493700931166, + "grad_norm": 0.4356827139854431, + "learning_rate": 4.998620367699365e-06, + "loss": 0.6456, + "step": 794 + }, + { + "epoch": 0.07257622786196823, + "grad_norm": 0.4348240792751312, + "learning_rate": 4.9986163893163456e-06, + "loss": 0.6634, + "step": 795 + }, + { + "epoch": 0.07266751871462479, + "grad_norm": 0.4136624038219452, + "learning_rate": 4.9986124052070274e-06, + "loss": 0.6242, + "step": 796 + }, + { + "epoch": 0.07275880956728135, + "grad_norm": 0.4319223165512085, + "learning_rate": 4.998608415371421e-06, + "loss": 0.6951, + "step": 797 + }, + { + "epoch": 0.07285010041993792, + "grad_norm": 0.4304793179035187, + "learning_rate": 4.998604419809534e-06, + "loss": 0.6589, + "step": 798 + }, + { + "epoch": 0.07294139127259448, + "grad_norm": 0.4444180428981781, + "learning_rate": 4.998600418521378e-06, + "loss": 0.6606, + "step": 799 + }, + { + "epoch": 0.07303268212525105, + "grad_norm": 0.4206359088420868, + "learning_rate": 4.99859641150696e-06, + "loss": 0.6766, + "step": 800 + }, + { + "epoch": 0.07312397297790761, + "grad_norm": 0.45977967977523804, + "learning_rate": 4.99859239876629e-06, + "loss": 0.6522, + "step": 801 + }, + { + "epoch": 0.07321526383056418, + "grad_norm": 0.40196648240089417, + "learning_rate": 4.9985883802993775e-06, + "loss": 0.6044, + "step": 802 + }, + { + "epoch": 0.07330655468322074, + "grad_norm": 0.4097943902015686, + "learning_rate": 4.998584356106231e-06, + "loss": 0.6702, + "step": 803 + }, + { + "epoch": 0.0733978455358773, + "grad_norm": 0.4224611818790436, + "learning_rate": 4.99858032618686e-06, + "loss": 0.6521, + "step": 804 + }, + { + "epoch": 0.07348913638853387, + "grad_norm": 0.42143338918685913, + "learning_rate": 4.998576290541274e-06, + "loss": 0.6738, + "step": 805 + }, + { + "epoch": 0.07358042724119043, + "grad_norm": 0.4144637882709503, + "learning_rate": 4.9985722491694815e-06, + "loss": 0.6406, + "step": 806 + }, + { + "epoch": 0.073671718093847, + "grad_norm": 0.40608838200569153, + "learning_rate": 4.998568202071493e-06, + "loss": 0.6179, + "step": 807 + }, + { + "epoch": 0.07376300894650356, + "grad_norm": 0.446697860956192, + "learning_rate": 4.998564149247317e-06, + "loss": 0.6248, + "step": 808 + }, + { + "epoch": 0.07385429979916013, + "grad_norm": 0.4421645700931549, + "learning_rate": 4.998560090696963e-06, + "loss": 0.63, + "step": 809 + }, + { + "epoch": 0.07394559065181669, + "grad_norm": 0.4163895547389984, + "learning_rate": 4.9985560264204394e-06, + "loss": 0.6227, + "step": 810 + }, + { + "epoch": 0.07403688150447325, + "grad_norm": 0.4384167194366455, + "learning_rate": 4.998551956417758e-06, + "loss": 0.6453, + "step": 811 + }, + { + "epoch": 0.07412817235712982, + "grad_norm": 0.45667576789855957, + "learning_rate": 4.998547880688925e-06, + "loss": 0.6554, + "step": 812 + }, + { + "epoch": 0.07421946320978638, + "grad_norm": 0.4506523609161377, + "learning_rate": 4.998543799233951e-06, + "loss": 0.6416, + "step": 813 + }, + { + "epoch": 0.07431075406244295, + "grad_norm": 0.40741702914237976, + "learning_rate": 4.998539712052846e-06, + "loss": 0.6882, + "step": 814 + }, + { + "epoch": 0.07440204491509951, + "grad_norm": 0.4178232252597809, + "learning_rate": 4.998535619145619e-06, + "loss": 0.6409, + "step": 815 + }, + { + "epoch": 0.07449333576775607, + "grad_norm": 0.4318689703941345, + "learning_rate": 4.99853152051228e-06, + "loss": 0.6324, + "step": 816 + }, + { + "epoch": 0.07458462662041264, + "grad_norm": 0.43991726636886597, + "learning_rate": 4.998527416152836e-06, + "loss": 0.6197, + "step": 817 + }, + { + "epoch": 0.0746759174730692, + "grad_norm": 0.4175993800163269, + "learning_rate": 4.998523306067299e-06, + "loss": 0.641, + "step": 818 + }, + { + "epoch": 0.07476720832572577, + "grad_norm": 0.4378940761089325, + "learning_rate": 4.998519190255677e-06, + "loss": 0.6378, + "step": 819 + }, + { + "epoch": 0.07485849917838233, + "grad_norm": 0.41552412509918213, + "learning_rate": 4.99851506871798e-06, + "loss": 0.6317, + "step": 820 + }, + { + "epoch": 0.0749497900310389, + "grad_norm": 0.4184335470199585, + "learning_rate": 4.998510941454219e-06, + "loss": 0.6761, + "step": 821 + }, + { + "epoch": 0.07504108088369546, + "grad_norm": 0.4191174805164337, + "learning_rate": 4.998506808464401e-06, + "loss": 0.6751, + "step": 822 + }, + { + "epoch": 0.07513237173635202, + "grad_norm": 0.44218701124191284, + "learning_rate": 4.998502669748535e-06, + "loss": 0.6217, + "step": 823 + }, + { + "epoch": 0.07522366258900857, + "grad_norm": 0.40840375423431396, + "learning_rate": 4.998498525306633e-06, + "loss": 0.6701, + "step": 824 + }, + { + "epoch": 0.07531495344166514, + "grad_norm": 0.41254305839538574, + "learning_rate": 4.998494375138703e-06, + "loss": 0.6812, + "step": 825 + }, + { + "epoch": 0.0754062442943217, + "grad_norm": 0.4106837213039398, + "learning_rate": 4.998490219244755e-06, + "loss": 0.6285, + "step": 826 + }, + { + "epoch": 0.07549753514697827, + "grad_norm": 0.4117595851421356, + "learning_rate": 4.998486057624798e-06, + "loss": 0.6708, + "step": 827 + }, + { + "epoch": 0.07558882599963483, + "grad_norm": 0.4113306999206543, + "learning_rate": 4.998481890278841e-06, + "loss": 0.6822, + "step": 828 + }, + { + "epoch": 0.0756801168522914, + "grad_norm": 0.4581579864025116, + "learning_rate": 4.998477717206896e-06, + "loss": 0.6258, + "step": 829 + }, + { + "epoch": 0.07577140770494796, + "grad_norm": 0.42312008142471313, + "learning_rate": 4.99847353840897e-06, + "loss": 0.6545, + "step": 830 + }, + { + "epoch": 0.07586269855760452, + "grad_norm": 0.4099233150482178, + "learning_rate": 4.998469353885075e-06, + "loss": 0.6677, + "step": 831 + }, + { + "epoch": 0.07595398941026109, + "grad_norm": 0.40173253417015076, + "learning_rate": 4.9984651636352176e-06, + "loss": 0.6522, + "step": 832 + }, + { + "epoch": 0.07604528026291765, + "grad_norm": 0.4387069642543793, + "learning_rate": 4.998460967659409e-06, + "loss": 0.6526, + "step": 833 + }, + { + "epoch": 0.07613657111557422, + "grad_norm": 0.4225899577140808, + "learning_rate": 4.998456765957659e-06, + "loss": 0.6993, + "step": 834 + }, + { + "epoch": 0.07622786196823078, + "grad_norm": 0.45768672227859497, + "learning_rate": 4.998452558529977e-06, + "loss": 0.6423, + "step": 835 + }, + { + "epoch": 0.07631915282088735, + "grad_norm": 0.44786304235458374, + "learning_rate": 4.998448345376373e-06, + "loss": 0.6522, + "step": 836 + }, + { + "epoch": 0.07641044367354391, + "grad_norm": 0.4362468719482422, + "learning_rate": 4.9984441264968565e-06, + "loss": 0.616, + "step": 837 + }, + { + "epoch": 0.07650173452620047, + "grad_norm": 0.4262109100818634, + "learning_rate": 4.998439901891436e-06, + "loss": 0.6866, + "step": 838 + }, + { + "epoch": 0.07659302537885704, + "grad_norm": 0.42424312233924866, + "learning_rate": 4.998435671560123e-06, + "loss": 0.6513, + "step": 839 + }, + { + "epoch": 0.0766843162315136, + "grad_norm": 0.44296789169311523, + "learning_rate": 4.998431435502926e-06, + "loss": 0.6882, + "step": 840 + }, + { + "epoch": 0.07677560708417017, + "grad_norm": 0.4500221014022827, + "learning_rate": 4.9984271937198545e-06, + "loss": 0.639, + "step": 841 + }, + { + "epoch": 0.07686689793682673, + "grad_norm": 0.4288533627986908, + "learning_rate": 4.99842294621092e-06, + "loss": 0.6785, + "step": 842 + }, + { + "epoch": 0.0769581887894833, + "grad_norm": 0.4309190511703491, + "learning_rate": 4.99841869297613e-06, + "loss": 0.6335, + "step": 843 + }, + { + "epoch": 0.07704947964213986, + "grad_norm": 0.43097686767578125, + "learning_rate": 4.998414434015495e-06, + "loss": 0.6478, + "step": 844 + }, + { + "epoch": 0.07714077049479642, + "grad_norm": 0.4324665367603302, + "learning_rate": 4.998410169329026e-06, + "loss": 0.6526, + "step": 845 + }, + { + "epoch": 0.07723206134745299, + "grad_norm": 0.45681890845298767, + "learning_rate": 4.998405898916731e-06, + "loss": 0.6739, + "step": 846 + }, + { + "epoch": 0.07732335220010955, + "grad_norm": 0.45086580514907837, + "learning_rate": 4.998401622778621e-06, + "loss": 0.6447, + "step": 847 + }, + { + "epoch": 0.07741464305276612, + "grad_norm": 0.43465104699134827, + "learning_rate": 4.998397340914705e-06, + "loss": 0.6574, + "step": 848 + }, + { + "epoch": 0.07750593390542268, + "grad_norm": 0.4714541733264923, + "learning_rate": 4.998393053324993e-06, + "loss": 0.5841, + "step": 849 + }, + { + "epoch": 0.07759722475807924, + "grad_norm": 0.4548891484737396, + "learning_rate": 4.998388760009496e-06, + "loss": 0.6784, + "step": 850 + }, + { + "epoch": 0.07768851561073581, + "grad_norm": 0.4249517023563385, + "learning_rate": 4.9983844609682224e-06, + "loss": 0.6709, + "step": 851 + }, + { + "epoch": 0.07777980646339237, + "grad_norm": 0.4325467050075531, + "learning_rate": 4.998380156201182e-06, + "loss": 0.6153, + "step": 852 + }, + { + "epoch": 0.07787109731604894, + "grad_norm": 0.4636313319206238, + "learning_rate": 4.998375845708386e-06, + "loss": 0.6767, + "step": 853 + }, + { + "epoch": 0.0779623881687055, + "grad_norm": 0.43988844752311707, + "learning_rate": 4.998371529489842e-06, + "loss": 0.6406, + "step": 854 + }, + { + "epoch": 0.07805367902136207, + "grad_norm": 0.3848872184753418, + "learning_rate": 4.998367207545563e-06, + "loss": 0.6603, + "step": 855 + }, + { + "epoch": 0.07814496987401863, + "grad_norm": 0.4165835678577423, + "learning_rate": 4.998362879875557e-06, + "loss": 0.6608, + "step": 856 + }, + { + "epoch": 0.07823626072667518, + "grad_norm": 0.4442967176437378, + "learning_rate": 4.998358546479834e-06, + "loss": 0.6765, + "step": 857 + }, + { + "epoch": 0.07832755157933174, + "grad_norm": 0.4406103789806366, + "learning_rate": 4.998354207358403e-06, + "loss": 0.6872, + "step": 858 + }, + { + "epoch": 0.07841884243198831, + "grad_norm": 0.4415813982486725, + "learning_rate": 4.998349862511276e-06, + "loss": 0.6634, + "step": 859 + }, + { + "epoch": 0.07851013328464487, + "grad_norm": 0.4056772291660309, + "learning_rate": 4.998345511938461e-06, + "loss": 0.6432, + "step": 860 + }, + { + "epoch": 0.07860142413730144, + "grad_norm": 0.44207513332366943, + "learning_rate": 4.998341155639971e-06, + "loss": 0.6798, + "step": 861 + }, + { + "epoch": 0.078692714989958, + "grad_norm": 0.4374401867389679, + "learning_rate": 4.9983367936158125e-06, + "loss": 0.6409, + "step": 862 + }, + { + "epoch": 0.07878400584261457, + "grad_norm": 0.47418850660324097, + "learning_rate": 4.9983324258659974e-06, + "loss": 0.6655, + "step": 863 + }, + { + "epoch": 0.07887529669527113, + "grad_norm": 0.41959813237190247, + "learning_rate": 4.9983280523905345e-06, + "loss": 0.6955, + "step": 864 + }, + { + "epoch": 0.0789665875479277, + "grad_norm": 0.45012181997299194, + "learning_rate": 4.9983236731894345e-06, + "loss": 0.6716, + "step": 865 + }, + { + "epoch": 0.07905787840058426, + "grad_norm": 0.4370107054710388, + "learning_rate": 4.998319288262709e-06, + "loss": 0.6303, + "step": 866 + }, + { + "epoch": 0.07914916925324082, + "grad_norm": 0.43437743186950684, + "learning_rate": 4.998314897610365e-06, + "loss": 0.6621, + "step": 867 + }, + { + "epoch": 0.07924046010589739, + "grad_norm": 0.42072978615760803, + "learning_rate": 4.998310501232415e-06, + "loss": 0.6729, + "step": 868 + }, + { + "epoch": 0.07933175095855395, + "grad_norm": 0.4233083426952362, + "learning_rate": 4.998306099128868e-06, + "loss": 0.6429, + "step": 869 + }, + { + "epoch": 0.07942304181121052, + "grad_norm": 0.42923399806022644, + "learning_rate": 4.998301691299734e-06, + "loss": 0.6764, + "step": 870 + }, + { + "epoch": 0.07951433266386708, + "grad_norm": 0.42505791783332825, + "learning_rate": 4.998297277745024e-06, + "loss": 0.6076, + "step": 871 + }, + { + "epoch": 0.07960562351652364, + "grad_norm": 0.4364859461784363, + "learning_rate": 4.998292858464747e-06, + "loss": 0.6338, + "step": 872 + }, + { + "epoch": 0.07969691436918021, + "grad_norm": 0.45140939950942993, + "learning_rate": 4.998288433458914e-06, + "loss": 0.6682, + "step": 873 + }, + { + "epoch": 0.07978820522183677, + "grad_norm": 0.4090183675289154, + "learning_rate": 4.998284002727535e-06, + "loss": 0.6471, + "step": 874 + }, + { + "epoch": 0.07987949607449334, + "grad_norm": 0.4508051872253418, + "learning_rate": 4.998279566270618e-06, + "loss": 0.6646, + "step": 875 + }, + { + "epoch": 0.0799707869271499, + "grad_norm": 0.4356769025325775, + "learning_rate": 4.998275124088178e-06, + "loss": 0.6363, + "step": 876 + }, + { + "epoch": 0.08006207777980646, + "grad_norm": 0.4579784870147705, + "learning_rate": 4.99827067618022e-06, + "loss": 0.6329, + "step": 877 + }, + { + "epoch": 0.08015336863246303, + "grad_norm": 0.44290268421173096, + "learning_rate": 4.998266222546758e-06, + "loss": 0.6766, + "step": 878 + }, + { + "epoch": 0.0802446594851196, + "grad_norm": 0.4090328812599182, + "learning_rate": 4.9982617631877994e-06, + "loss": 0.6936, + "step": 879 + }, + { + "epoch": 0.08033595033777616, + "grad_norm": 0.44363969564437866, + "learning_rate": 4.998257298103356e-06, + "loss": 0.6528, + "step": 880 + }, + { + "epoch": 0.08042724119043272, + "grad_norm": 0.45657244324684143, + "learning_rate": 4.998252827293439e-06, + "loss": 0.64, + "step": 881 + }, + { + "epoch": 0.08051853204308929, + "grad_norm": 0.4417364001274109, + "learning_rate": 4.998248350758056e-06, + "loss": 0.6354, + "step": 882 + }, + { + "epoch": 0.08060982289574585, + "grad_norm": 0.43925541639328003, + "learning_rate": 4.998243868497219e-06, + "loss": 0.6197, + "step": 883 + }, + { + "epoch": 0.08070111374840241, + "grad_norm": 0.42557668685913086, + "learning_rate": 4.998239380510938e-06, + "loss": 0.6612, + "step": 884 + }, + { + "epoch": 0.08079240460105898, + "grad_norm": 0.46097850799560547, + "learning_rate": 4.9982348867992226e-06, + "loss": 0.6491, + "step": 885 + }, + { + "epoch": 0.08088369545371554, + "grad_norm": 0.42419785261154175, + "learning_rate": 4.998230387362084e-06, + "loss": 0.6746, + "step": 886 + }, + { + "epoch": 0.08097498630637211, + "grad_norm": 0.4747112989425659, + "learning_rate": 4.9982258821995325e-06, + "loss": 0.6674, + "step": 887 + }, + { + "epoch": 0.08106627715902867, + "grad_norm": 0.4769522547721863, + "learning_rate": 4.998221371311578e-06, + "loss": 0.6396, + "step": 888 + }, + { + "epoch": 0.08115756801168524, + "grad_norm": 0.43865612149238586, + "learning_rate": 4.9982168546982315e-06, + "loss": 0.6207, + "step": 889 + }, + { + "epoch": 0.0812488588643418, + "grad_norm": 0.43357059359550476, + "learning_rate": 4.998212332359502e-06, + "loss": 0.6345, + "step": 890 + }, + { + "epoch": 0.08134014971699835, + "grad_norm": 0.4105129539966583, + "learning_rate": 4.998207804295401e-06, + "loss": 0.6617, + "step": 891 + }, + { + "epoch": 0.08143144056965491, + "grad_norm": 0.4108254313468933, + "learning_rate": 4.998203270505939e-06, + "loss": 0.6566, + "step": 892 + }, + { + "epoch": 0.08152273142231148, + "grad_norm": 0.44242382049560547, + "learning_rate": 4.998198730991125e-06, + "loss": 0.6628, + "step": 893 + }, + { + "epoch": 0.08161402227496804, + "grad_norm": 0.42243996262550354, + "learning_rate": 4.998194185750971e-06, + "loss": 0.6332, + "step": 894 + }, + { + "epoch": 0.08170531312762461, + "grad_norm": 0.4171886146068573, + "learning_rate": 4.998189634785486e-06, + "loss": 0.6374, + "step": 895 + }, + { + "epoch": 0.08179660398028117, + "grad_norm": 0.4127545952796936, + "learning_rate": 4.998185078094683e-06, + "loss": 0.6395, + "step": 896 + }, + { + "epoch": 0.08188789483293774, + "grad_norm": 0.43991392850875854, + "learning_rate": 4.998180515678569e-06, + "loss": 0.6539, + "step": 897 + }, + { + "epoch": 0.0819791856855943, + "grad_norm": 0.4269426763057709, + "learning_rate": 4.998175947537156e-06, + "loss": 0.6522, + "step": 898 + }, + { + "epoch": 0.08207047653825086, + "grad_norm": 0.4459540545940399, + "learning_rate": 4.9981713736704555e-06, + "loss": 0.6465, + "step": 899 + }, + { + "epoch": 0.08216176739090743, + "grad_norm": 0.4511788785457611, + "learning_rate": 4.998166794078477e-06, + "loss": 0.6271, + "step": 900 + }, + { + "epoch": 0.08225305824356399, + "grad_norm": 0.4007466435432434, + "learning_rate": 4.99816220876123e-06, + "loss": 0.7041, + "step": 901 + }, + { + "epoch": 0.08234434909622056, + "grad_norm": 0.4299915134906769, + "learning_rate": 4.9981576177187275e-06, + "loss": 0.6765, + "step": 902 + }, + { + "epoch": 0.08243563994887712, + "grad_norm": 0.42886024713516235, + "learning_rate": 4.9981530209509776e-06, + "loss": 0.5992, + "step": 903 + }, + { + "epoch": 0.08252693080153368, + "grad_norm": 0.4635188579559326, + "learning_rate": 4.998148418457992e-06, + "loss": 0.6549, + "step": 904 + }, + { + "epoch": 0.08261822165419025, + "grad_norm": 0.46674731373786926, + "learning_rate": 4.9981438102397806e-06, + "loss": 0.6335, + "step": 905 + }, + { + "epoch": 0.08270951250684681, + "grad_norm": 0.4452310800552368, + "learning_rate": 4.998139196296356e-06, + "loss": 0.6865, + "step": 906 + }, + { + "epoch": 0.08280080335950338, + "grad_norm": 0.46163102984428406, + "learning_rate": 4.998134576627726e-06, + "loss": 0.6188, + "step": 907 + }, + { + "epoch": 0.08289209421215994, + "grad_norm": 0.43544355034828186, + "learning_rate": 4.998129951233902e-06, + "loss": 0.6291, + "step": 908 + }, + { + "epoch": 0.0829833850648165, + "grad_norm": 0.4527488052845001, + "learning_rate": 4.998125320114895e-06, + "loss": 0.6444, + "step": 909 + }, + { + "epoch": 0.08307467591747307, + "grad_norm": 0.42127346992492676, + "learning_rate": 4.998120683270716e-06, + "loss": 0.6663, + "step": 910 + }, + { + "epoch": 0.08316596677012963, + "grad_norm": 0.44522881507873535, + "learning_rate": 4.998116040701376e-06, + "loss": 0.6199, + "step": 911 + }, + { + "epoch": 0.0832572576227862, + "grad_norm": 0.42551127076148987, + "learning_rate": 4.998111392406884e-06, + "loss": 0.6449, + "step": 912 + }, + { + "epoch": 0.08334854847544276, + "grad_norm": 0.40275874733924866, + "learning_rate": 4.998106738387252e-06, + "loss": 0.6317, + "step": 913 + }, + { + "epoch": 0.08343983932809933, + "grad_norm": 0.48335567116737366, + "learning_rate": 4.9981020786424895e-06, + "loss": 0.6171, + "step": 914 + }, + { + "epoch": 0.08353113018075589, + "grad_norm": 0.4499799311161041, + "learning_rate": 4.998097413172608e-06, + "loss": 0.6467, + "step": 915 + }, + { + "epoch": 0.08362242103341246, + "grad_norm": 0.48877856135368347, + "learning_rate": 4.998092741977618e-06, + "loss": 0.6507, + "step": 916 + }, + { + "epoch": 0.08371371188606902, + "grad_norm": 0.45516517758369446, + "learning_rate": 4.99808806505753e-06, + "loss": 0.614, + "step": 917 + }, + { + "epoch": 0.08380500273872558, + "grad_norm": 0.4821721017360687, + "learning_rate": 4.9980833824123565e-06, + "loss": 0.6122, + "step": 918 + }, + { + "epoch": 0.08389629359138215, + "grad_norm": 0.420401394367218, + "learning_rate": 4.998078694042106e-06, + "loss": 0.6425, + "step": 919 + }, + { + "epoch": 0.08398758444403871, + "grad_norm": 0.43004411458969116, + "learning_rate": 4.998073999946789e-06, + "loss": 0.6679, + "step": 920 + }, + { + "epoch": 0.08407887529669528, + "grad_norm": 0.425337016582489, + "learning_rate": 4.998069300126418e-06, + "loss": 0.6271, + "step": 921 + }, + { + "epoch": 0.08417016614935184, + "grad_norm": 0.4573572278022766, + "learning_rate": 4.9980645945810025e-06, + "loss": 0.673, + "step": 922 + }, + { + "epoch": 0.0842614570020084, + "grad_norm": 0.4444371163845062, + "learning_rate": 4.998059883310554e-06, + "loss": 0.6614, + "step": 923 + }, + { + "epoch": 0.08435274785466497, + "grad_norm": 0.4335695803165436, + "learning_rate": 4.998055166315083e-06, + "loss": 0.6637, + "step": 924 + }, + { + "epoch": 0.08444403870732152, + "grad_norm": 0.4372963607311249, + "learning_rate": 4.9980504435946006e-06, + "loss": 0.6533, + "step": 925 + }, + { + "epoch": 0.08453532955997808, + "grad_norm": 0.4314687252044678, + "learning_rate": 4.998045715149118e-06, + "loss": 0.6821, + "step": 926 + }, + { + "epoch": 0.08462662041263465, + "grad_norm": 0.43320953845977783, + "learning_rate": 4.998040980978644e-06, + "loss": 0.6731, + "step": 927 + }, + { + "epoch": 0.08471791126529121, + "grad_norm": 0.4466760456562042, + "learning_rate": 4.998036241083192e-06, + "loss": 0.6674, + "step": 928 + }, + { + "epoch": 0.08480920211794778, + "grad_norm": 0.4393499493598938, + "learning_rate": 4.99803149546277e-06, + "loss": 0.6286, + "step": 929 + }, + { + "epoch": 0.08490049297060434, + "grad_norm": 0.4054203927516937, + "learning_rate": 4.998026744117393e-06, + "loss": 0.6886, + "step": 930 + }, + { + "epoch": 0.0849917838232609, + "grad_norm": 0.45664525032043457, + "learning_rate": 4.9980219870470684e-06, + "loss": 0.6403, + "step": 931 + }, + { + "epoch": 0.08508307467591747, + "grad_norm": 0.40016329288482666, + "learning_rate": 4.9980172242518086e-06, + "loss": 0.6196, + "step": 932 + }, + { + "epoch": 0.08517436552857403, + "grad_norm": 0.4139724373817444, + "learning_rate": 4.998012455731623e-06, + "loss": 0.6339, + "step": 933 + }, + { + "epoch": 0.0852656563812306, + "grad_norm": 0.4382818341255188, + "learning_rate": 4.998007681486525e-06, + "loss": 0.6169, + "step": 934 + }, + { + "epoch": 0.08535694723388716, + "grad_norm": 0.4479823410511017, + "learning_rate": 4.998002901516524e-06, + "loss": 0.6662, + "step": 935 + }, + { + "epoch": 0.08544823808654373, + "grad_norm": 0.41528141498565674, + "learning_rate": 4.997998115821631e-06, + "loss": 0.654, + "step": 936 + }, + { + "epoch": 0.08553952893920029, + "grad_norm": 0.4006654918193817, + "learning_rate": 4.997993324401856e-06, + "loss": 0.6364, + "step": 937 + }, + { + "epoch": 0.08563081979185685, + "grad_norm": 0.4256483316421509, + "learning_rate": 4.997988527257212e-06, + "loss": 0.6348, + "step": 938 + }, + { + "epoch": 0.08572211064451342, + "grad_norm": 0.47047194838523865, + "learning_rate": 4.997983724387709e-06, + "loss": 0.637, + "step": 939 + }, + { + "epoch": 0.08581340149716998, + "grad_norm": 0.4607711136341095, + "learning_rate": 4.997978915793359e-06, + "loss": 0.6274, + "step": 940 + }, + { + "epoch": 0.08590469234982655, + "grad_norm": 0.4679885506629944, + "learning_rate": 4.997974101474171e-06, + "loss": 0.6158, + "step": 941 + }, + { + "epoch": 0.08599598320248311, + "grad_norm": 0.4555853009223938, + "learning_rate": 4.997969281430157e-06, + "loss": 0.6291, + "step": 942 + }, + { + "epoch": 0.08608727405513968, + "grad_norm": 0.45908060669898987, + "learning_rate": 4.997964455661329e-06, + "loss": 0.6675, + "step": 943 + }, + { + "epoch": 0.08617856490779624, + "grad_norm": 0.43646669387817383, + "learning_rate": 4.997959624167698e-06, + "loss": 0.6155, + "step": 944 + }, + { + "epoch": 0.0862698557604528, + "grad_norm": 0.46910643577575684, + "learning_rate": 4.997954786949273e-06, + "loss": 0.6045, + "step": 945 + }, + { + "epoch": 0.08636114661310937, + "grad_norm": 0.44079211354255676, + "learning_rate": 4.997949944006066e-06, + "loss": 0.6269, + "step": 946 + }, + { + "epoch": 0.08645243746576593, + "grad_norm": 0.45063745975494385, + "learning_rate": 4.99794509533809e-06, + "loss": 0.6541, + "step": 947 + }, + { + "epoch": 0.0865437283184225, + "grad_norm": 0.42943963408470154, + "learning_rate": 4.9979402409453535e-06, + "loss": 0.6411, + "step": 948 + }, + { + "epoch": 0.08663501917107906, + "grad_norm": 0.4903389811515808, + "learning_rate": 4.997935380827869e-06, + "loss": 0.6747, + "step": 949 + }, + { + "epoch": 0.08672631002373563, + "grad_norm": 0.47013404965400696, + "learning_rate": 4.997930514985647e-06, + "loss": 0.6539, + "step": 950 + }, + { + "epoch": 0.08681760087639219, + "grad_norm": 0.43444541096687317, + "learning_rate": 4.9979256434187e-06, + "loss": 0.5914, + "step": 951 + }, + { + "epoch": 0.08690889172904875, + "grad_norm": 0.4641004502773285, + "learning_rate": 4.997920766127038e-06, + "loss": 0.6423, + "step": 952 + }, + { + "epoch": 0.08700018258170532, + "grad_norm": 0.4670931100845337, + "learning_rate": 4.997915883110673e-06, + "loss": 0.6248, + "step": 953 + }, + { + "epoch": 0.08709147343436188, + "grad_norm": 0.4302326440811157, + "learning_rate": 4.997910994369614e-06, + "loss": 0.6537, + "step": 954 + }, + { + "epoch": 0.08718276428701845, + "grad_norm": 0.44365328550338745, + "learning_rate": 4.997906099903874e-06, + "loss": 0.6482, + "step": 955 + }, + { + "epoch": 0.08727405513967501, + "grad_norm": 0.44235366582870483, + "learning_rate": 4.997901199713465e-06, + "loss": 0.6569, + "step": 956 + }, + { + "epoch": 0.08736534599233158, + "grad_norm": 0.4569603502750397, + "learning_rate": 4.997896293798396e-06, + "loss": 0.6478, + "step": 957 + }, + { + "epoch": 0.08745663684498813, + "grad_norm": 0.42668813467025757, + "learning_rate": 4.997891382158681e-06, + "loss": 0.654, + "step": 958 + }, + { + "epoch": 0.08754792769764469, + "grad_norm": 0.4258496165275574, + "learning_rate": 4.997886464794329e-06, + "loss": 0.6389, + "step": 959 + }, + { + "epoch": 0.08763921855030125, + "grad_norm": 0.45556384325027466, + "learning_rate": 4.997881541705351e-06, + "loss": 0.6332, + "step": 960 + }, + { + "epoch": 0.08773050940295782, + "grad_norm": 0.44096776843070984, + "learning_rate": 4.997876612891761e-06, + "loss": 0.6358, + "step": 961 + }, + { + "epoch": 0.08782180025561438, + "grad_norm": 0.42765408754348755, + "learning_rate": 4.997871678353568e-06, + "loss": 0.6599, + "step": 962 + }, + { + "epoch": 0.08791309110827095, + "grad_norm": 0.44471317529678345, + "learning_rate": 4.997866738090783e-06, + "loss": 0.6572, + "step": 963 + }, + { + "epoch": 0.08800438196092751, + "grad_norm": 0.4492691159248352, + "learning_rate": 4.997861792103419e-06, + "loss": 0.6014, + "step": 964 + }, + { + "epoch": 0.08809567281358407, + "grad_norm": 0.4511792063713074, + "learning_rate": 4.997856840391486e-06, + "loss": 0.6232, + "step": 965 + }, + { + "epoch": 0.08818696366624064, + "grad_norm": 0.428981214761734, + "learning_rate": 4.997851882954997e-06, + "loss": 0.6413, + "step": 966 + }, + { + "epoch": 0.0882782545188972, + "grad_norm": 0.4450417160987854, + "learning_rate": 4.9978469197939615e-06, + "loss": 0.6506, + "step": 967 + }, + { + "epoch": 0.08836954537155377, + "grad_norm": 0.42994430661201477, + "learning_rate": 4.997841950908392e-06, + "loss": 0.6503, + "step": 968 + }, + { + "epoch": 0.08846083622421033, + "grad_norm": 0.4395301938056946, + "learning_rate": 4.997836976298299e-06, + "loss": 0.6562, + "step": 969 + }, + { + "epoch": 0.0885521270768669, + "grad_norm": 0.4276043176651001, + "learning_rate": 4.997831995963695e-06, + "loss": 0.6675, + "step": 970 + }, + { + "epoch": 0.08864341792952346, + "grad_norm": 0.3966187536716461, + "learning_rate": 4.99782700990459e-06, + "loss": 0.657, + "step": 971 + }, + { + "epoch": 0.08873470878218002, + "grad_norm": 0.4386347532272339, + "learning_rate": 4.997822018120997e-06, + "loss": 0.6001, + "step": 972 + }, + { + "epoch": 0.08882599963483659, + "grad_norm": 0.4494863748550415, + "learning_rate": 4.997817020612926e-06, + "loss": 0.6524, + "step": 973 + }, + { + "epoch": 0.08891729048749315, + "grad_norm": 0.43710821866989136, + "learning_rate": 4.99781201738039e-06, + "loss": 0.6655, + "step": 974 + }, + { + "epoch": 0.08900858134014972, + "grad_norm": 0.46819618344306946, + "learning_rate": 4.9978070084233986e-06, + "loss": 0.6474, + "step": 975 + }, + { + "epoch": 0.08909987219280628, + "grad_norm": 0.44311395287513733, + "learning_rate": 4.997801993741965e-06, + "loss": 0.6577, + "step": 976 + }, + { + "epoch": 0.08919116304546285, + "grad_norm": 0.454967200756073, + "learning_rate": 4.9977969733361e-06, + "loss": 0.6363, + "step": 977 + }, + { + "epoch": 0.08928245389811941, + "grad_norm": 0.4402250647544861, + "learning_rate": 4.997791947205815e-06, + "loss": 0.6465, + "step": 978 + }, + { + "epoch": 0.08937374475077597, + "grad_norm": 0.4485089182853699, + "learning_rate": 4.997786915351121e-06, + "loss": 0.6096, + "step": 979 + }, + { + "epoch": 0.08946503560343254, + "grad_norm": 0.42890095710754395, + "learning_rate": 4.997781877772031e-06, + "loss": 0.6604, + "step": 980 + }, + { + "epoch": 0.0895563264560891, + "grad_norm": 0.44408121705055237, + "learning_rate": 4.997776834468556e-06, + "loss": 0.6357, + "step": 981 + }, + { + "epoch": 0.08964761730874567, + "grad_norm": 0.453235924243927, + "learning_rate": 4.9977717854407064e-06, + "loss": 0.6593, + "step": 982 + }, + { + "epoch": 0.08973890816140223, + "grad_norm": 0.4673648774623871, + "learning_rate": 4.997766730688495e-06, + "loss": 0.6433, + "step": 983 + }, + { + "epoch": 0.0898301990140588, + "grad_norm": 0.4491332173347473, + "learning_rate": 4.997761670211933e-06, + "loss": 0.6629, + "step": 984 + }, + { + "epoch": 0.08992148986671536, + "grad_norm": 0.46853938698768616, + "learning_rate": 4.997756604011031e-06, + "loss": 0.662, + "step": 985 + }, + { + "epoch": 0.09001278071937192, + "grad_norm": 0.4817178547382355, + "learning_rate": 4.997751532085803e-06, + "loss": 0.6658, + "step": 986 + }, + { + "epoch": 0.09010407157202849, + "grad_norm": 0.4387228786945343, + "learning_rate": 4.997746454436259e-06, + "loss": 0.6325, + "step": 987 + }, + { + "epoch": 0.09019536242468505, + "grad_norm": 0.4459332525730133, + "learning_rate": 4.997741371062411e-06, + "loss": 0.648, + "step": 988 + }, + { + "epoch": 0.09028665327734162, + "grad_norm": 0.42421799898147583, + "learning_rate": 4.997736281964269e-06, + "loss": 0.6506, + "step": 989 + }, + { + "epoch": 0.09037794412999818, + "grad_norm": 0.44107040762901306, + "learning_rate": 4.997731187141848e-06, + "loss": 0.674, + "step": 990 + }, + { + "epoch": 0.09046923498265474, + "grad_norm": 0.4264661967754364, + "learning_rate": 4.997726086595157e-06, + "loss": 0.6531, + "step": 991 + }, + { + "epoch": 0.0905605258353113, + "grad_norm": 0.456001341342926, + "learning_rate": 4.9977209803242085e-06, + "loss": 0.644, + "step": 992 + }, + { + "epoch": 0.09065181668796786, + "grad_norm": 0.44143593311309814, + "learning_rate": 4.997715868329015e-06, + "loss": 0.6392, + "step": 993 + }, + { + "epoch": 0.09074310754062442, + "grad_norm": 0.462342768907547, + "learning_rate": 4.997710750609587e-06, + "loss": 0.6416, + "step": 994 + }, + { + "epoch": 0.09083439839328099, + "grad_norm": 0.43978461623191833, + "learning_rate": 4.997705627165936e-06, + "loss": 0.609, + "step": 995 + }, + { + "epoch": 0.09092568924593755, + "grad_norm": 0.4770508408546448, + "learning_rate": 4.997700497998076e-06, + "loss": 0.606, + "step": 996 + }, + { + "epoch": 0.09101698009859412, + "grad_norm": 0.43395254015922546, + "learning_rate": 4.997695363106016e-06, + "loss": 0.6358, + "step": 997 + }, + { + "epoch": 0.09110827095125068, + "grad_norm": 0.42107346653938293, + "learning_rate": 4.99769022248977e-06, + "loss": 0.6116, + "step": 998 + }, + { + "epoch": 0.09119956180390724, + "grad_norm": 0.431855708360672, + "learning_rate": 4.997685076149347e-06, + "loss": 0.6517, + "step": 999 + }, + { + "epoch": 0.09129085265656381, + "grad_norm": 0.44303157925605774, + "learning_rate": 4.997679924084762e-06, + "loss": 0.6587, + "step": 1000 + }, + { + "epoch": 0.09138214350922037, + "grad_norm": 0.4737203121185303, + "learning_rate": 4.997674766296024e-06, + "loss": 0.6096, + "step": 1001 + }, + { + "epoch": 0.09147343436187694, + "grad_norm": 0.4425562620162964, + "learning_rate": 4.997669602783147e-06, + "loss": 0.6395, + "step": 1002 + }, + { + "epoch": 0.0915647252145335, + "grad_norm": 0.44633686542510986, + "learning_rate": 4.997664433546143e-06, + "loss": 0.6518, + "step": 1003 + }, + { + "epoch": 0.09165601606719007, + "grad_norm": 0.4285804033279419, + "learning_rate": 4.997659258585022e-06, + "loss": 0.6957, + "step": 1004 + }, + { + "epoch": 0.09174730691984663, + "grad_norm": 0.40238913893699646, + "learning_rate": 4.997654077899796e-06, + "loss": 0.6757, + "step": 1005 + }, + { + "epoch": 0.0918385977725032, + "grad_norm": 0.42245495319366455, + "learning_rate": 4.9976488914904785e-06, + "loss": 0.6427, + "step": 1006 + }, + { + "epoch": 0.09192988862515976, + "grad_norm": 0.4455591142177582, + "learning_rate": 4.99764369935708e-06, + "loss": 0.6575, + "step": 1007 + }, + { + "epoch": 0.09202117947781632, + "grad_norm": 0.4597484767436981, + "learning_rate": 4.997638501499613e-06, + "loss": 0.6179, + "step": 1008 + }, + { + "epoch": 0.09211247033047289, + "grad_norm": 0.47085729241371155, + "learning_rate": 4.99763329791809e-06, + "loss": 0.6326, + "step": 1009 + }, + { + "epoch": 0.09220376118312945, + "grad_norm": 0.4265415668487549, + "learning_rate": 4.9976280886125205e-06, + "loss": 0.661, + "step": 1010 + }, + { + "epoch": 0.09229505203578602, + "grad_norm": 0.47247111797332764, + "learning_rate": 4.997622873582919e-06, + "loss": 0.607, + "step": 1011 + }, + { + "epoch": 0.09238634288844258, + "grad_norm": 0.4248165190219879, + "learning_rate": 4.997617652829297e-06, + "loss": 0.6159, + "step": 1012 + }, + { + "epoch": 0.09247763374109914, + "grad_norm": 0.45526060461997986, + "learning_rate": 4.997612426351665e-06, + "loss": 0.6214, + "step": 1013 + }, + { + "epoch": 0.09256892459375571, + "grad_norm": 0.4518580436706543, + "learning_rate": 4.997607194150038e-06, + "loss": 0.63, + "step": 1014 + }, + { + "epoch": 0.09266021544641227, + "grad_norm": 0.4479242265224457, + "learning_rate": 4.997601956224425e-06, + "loss": 0.6301, + "step": 1015 + }, + { + "epoch": 0.09275150629906884, + "grad_norm": 0.4567962884902954, + "learning_rate": 4.997596712574839e-06, + "loss": 0.6202, + "step": 1016 + }, + { + "epoch": 0.0928427971517254, + "grad_norm": 0.411943256855011, + "learning_rate": 4.997591463201291e-06, + "loss": 0.6498, + "step": 1017 + }, + { + "epoch": 0.09293408800438197, + "grad_norm": 0.44506901502609253, + "learning_rate": 4.997586208103796e-06, + "loss": 0.6282, + "step": 1018 + }, + { + "epoch": 0.09302537885703853, + "grad_norm": 0.48801541328430176, + "learning_rate": 4.9975809472823625e-06, + "loss": 0.6045, + "step": 1019 + }, + { + "epoch": 0.0931166697096951, + "grad_norm": 0.42476779222488403, + "learning_rate": 4.997575680737005e-06, + "loss": 0.636, + "step": 1020 + }, + { + "epoch": 0.09320796056235166, + "grad_norm": 0.4736705720424652, + "learning_rate": 4.997570408467735e-06, + "loss": 0.6481, + "step": 1021 + }, + { + "epoch": 0.09329925141500822, + "grad_norm": 0.45385727286338806, + "learning_rate": 4.9975651304745635e-06, + "loss": 0.6027, + "step": 1022 + }, + { + "epoch": 0.09339054226766479, + "grad_norm": 0.455638587474823, + "learning_rate": 4.997559846757504e-06, + "loss": 0.6809, + "step": 1023 + }, + { + "epoch": 0.09348183312032135, + "grad_norm": 0.42775434255599976, + "learning_rate": 4.997554557316567e-06, + "loss": 0.6331, + "step": 1024 + }, + { + "epoch": 0.0935731239729779, + "grad_norm": 0.42719516158103943, + "learning_rate": 4.997549262151767e-06, + "loss": 0.6345, + "step": 1025 + }, + { + "epoch": 0.09366441482563446, + "grad_norm": 0.47551780939102173, + "learning_rate": 4.997543961263115e-06, + "loss": 0.6163, + "step": 1026 + }, + { + "epoch": 0.09375570567829103, + "grad_norm": 0.4361383318901062, + "learning_rate": 4.997538654650622e-06, + "loss": 0.627, + "step": 1027 + }, + { + "epoch": 0.0938469965309476, + "grad_norm": 0.4358835220336914, + "learning_rate": 4.997533342314301e-06, + "loss": 0.6647, + "step": 1028 + }, + { + "epoch": 0.09393828738360416, + "grad_norm": 0.43726059794425964, + "learning_rate": 4.997528024254165e-06, + "loss": 0.6537, + "step": 1029 + }, + { + "epoch": 0.09402957823626072, + "grad_norm": 0.42114007472991943, + "learning_rate": 4.997522700470225e-06, + "loss": 0.6663, + "step": 1030 + }, + { + "epoch": 0.09412086908891729, + "grad_norm": 0.4475143253803253, + "learning_rate": 4.997517370962494e-06, + "loss": 0.6377, + "step": 1031 + }, + { + "epoch": 0.09421215994157385, + "grad_norm": 0.4194785952568054, + "learning_rate": 4.997512035730983e-06, + "loss": 0.6608, + "step": 1032 + }, + { + "epoch": 0.09430345079423041, + "grad_norm": 0.43873330950737, + "learning_rate": 4.997506694775707e-06, + "loss": 0.6406, + "step": 1033 + }, + { + "epoch": 0.09439474164688698, + "grad_norm": 0.4234958291053772, + "learning_rate": 4.997501348096675e-06, + "loss": 0.6572, + "step": 1034 + }, + { + "epoch": 0.09448603249954354, + "grad_norm": 0.43009912967681885, + "learning_rate": 4.9974959956939e-06, + "loss": 0.6384, + "step": 1035 + }, + { + "epoch": 0.09457732335220011, + "grad_norm": 0.45061445236206055, + "learning_rate": 4.997490637567396e-06, + "loss": 0.6365, + "step": 1036 + }, + { + "epoch": 0.09466861420485667, + "grad_norm": 0.43406742811203003, + "learning_rate": 4.997485273717174e-06, + "loss": 0.6804, + "step": 1037 + }, + { + "epoch": 0.09475990505751324, + "grad_norm": 0.3984141945838928, + "learning_rate": 4.997479904143246e-06, + "loss": 0.6608, + "step": 1038 + }, + { + "epoch": 0.0948511959101698, + "grad_norm": 0.42687878012657166, + "learning_rate": 4.997474528845625e-06, + "loss": 0.6015, + "step": 1039 + }, + { + "epoch": 0.09494248676282636, + "grad_norm": 0.44916850328445435, + "learning_rate": 4.997469147824323e-06, + "loss": 0.6472, + "step": 1040 + }, + { + "epoch": 0.09503377761548293, + "grad_norm": 0.4373859763145447, + "learning_rate": 4.997463761079352e-06, + "loss": 0.6104, + "step": 1041 + }, + { + "epoch": 0.09512506846813949, + "grad_norm": 0.46185871958732605, + "learning_rate": 4.997458368610725e-06, + "loss": 0.6466, + "step": 1042 + }, + { + "epoch": 0.09521635932079606, + "grad_norm": 0.4637300670146942, + "learning_rate": 4.997452970418455e-06, + "loss": 0.6239, + "step": 1043 + }, + { + "epoch": 0.09530765017345262, + "grad_norm": 0.419059693813324, + "learning_rate": 4.997447566502552e-06, + "loss": 0.6325, + "step": 1044 + }, + { + "epoch": 0.09539894102610919, + "grad_norm": 0.44520503282546997, + "learning_rate": 4.997442156863031e-06, + "loss": 0.6632, + "step": 1045 + }, + { + "epoch": 0.09549023187876575, + "grad_norm": 0.44014009833335876, + "learning_rate": 4.997436741499903e-06, + "loss": 0.6179, + "step": 1046 + }, + { + "epoch": 0.09558152273142231, + "grad_norm": 0.4452167749404907, + "learning_rate": 4.997431320413181e-06, + "loss": 0.6328, + "step": 1047 + }, + { + "epoch": 0.09567281358407888, + "grad_norm": 0.4389941096305847, + "learning_rate": 4.997425893602876e-06, + "loss": 0.6405, + "step": 1048 + }, + { + "epoch": 0.09576410443673544, + "grad_norm": 0.4330339729785919, + "learning_rate": 4.997420461069002e-06, + "loss": 0.6441, + "step": 1049 + }, + { + "epoch": 0.095855395289392, + "grad_norm": 0.4365481436252594, + "learning_rate": 4.997415022811571e-06, + "loss": 0.634, + "step": 1050 + }, + { + "epoch": 0.09594668614204857, + "grad_norm": 0.4539879858493805, + "learning_rate": 4.997409578830596e-06, + "loss": 0.6493, + "step": 1051 + }, + { + "epoch": 0.09603797699470513, + "grad_norm": 0.45276451110839844, + "learning_rate": 4.997404129126089e-06, + "loss": 0.6297, + "step": 1052 + }, + { + "epoch": 0.0961292678473617, + "grad_norm": 0.4666369557380676, + "learning_rate": 4.997398673698061e-06, + "loss": 0.6053, + "step": 1053 + }, + { + "epoch": 0.09622055870001826, + "grad_norm": 0.4360077381134033, + "learning_rate": 4.997393212546528e-06, + "loss": 0.6449, + "step": 1054 + }, + { + "epoch": 0.09631184955267483, + "grad_norm": 0.45730385184288025, + "learning_rate": 4.997387745671499e-06, + "loss": 0.6632, + "step": 1055 + }, + { + "epoch": 0.09640314040533139, + "grad_norm": 0.4605061113834381, + "learning_rate": 4.997382273072987e-06, + "loss": 0.6454, + "step": 1056 + }, + { + "epoch": 0.09649443125798796, + "grad_norm": 0.4185450077056885, + "learning_rate": 4.9973767947510075e-06, + "loss": 0.6503, + "step": 1057 + }, + { + "epoch": 0.09658572211064452, + "grad_norm": 0.45340752601623535, + "learning_rate": 4.99737131070557e-06, + "loss": 0.6284, + "step": 1058 + }, + { + "epoch": 0.09667701296330107, + "grad_norm": 0.45051318407058716, + "learning_rate": 4.997365820936689e-06, + "loss": 0.633, + "step": 1059 + }, + { + "epoch": 0.09676830381595763, + "grad_norm": 0.4289276599884033, + "learning_rate": 4.997360325444375e-06, + "loss": 0.6609, + "step": 1060 + }, + { + "epoch": 0.0968595946686142, + "grad_norm": 0.475050151348114, + "learning_rate": 4.997354824228643e-06, + "loss": 0.6115, + "step": 1061 + }, + { + "epoch": 0.09695088552127076, + "grad_norm": 0.4614880681037903, + "learning_rate": 4.997349317289504e-06, + "loss": 0.644, + "step": 1062 + }, + { + "epoch": 0.09704217637392733, + "grad_norm": 0.4548715054988861, + "learning_rate": 4.997343804626971e-06, + "loss": 0.6677, + "step": 1063 + }, + { + "epoch": 0.09713346722658389, + "grad_norm": 0.5018542408943176, + "learning_rate": 4.997338286241056e-06, + "loss": 0.6428, + "step": 1064 + }, + { + "epoch": 0.09722475807924046, + "grad_norm": 0.46855854988098145, + "learning_rate": 4.9973327621317725e-06, + "loss": 0.6314, + "step": 1065 + }, + { + "epoch": 0.09731604893189702, + "grad_norm": 0.4279564619064331, + "learning_rate": 4.997327232299133e-06, + "loss": 0.6472, + "step": 1066 + }, + { + "epoch": 0.09740733978455358, + "grad_norm": 0.45014533400535583, + "learning_rate": 4.997321696743151e-06, + "loss": 0.6394, + "step": 1067 + }, + { + "epoch": 0.09749863063721015, + "grad_norm": 0.4263485074043274, + "learning_rate": 4.997316155463837e-06, + "loss": 0.6386, + "step": 1068 + }, + { + "epoch": 0.09758992148986671, + "grad_norm": 0.45804163813591003, + "learning_rate": 4.997310608461206e-06, + "loss": 0.6261, + "step": 1069 + }, + { + "epoch": 0.09768121234252328, + "grad_norm": 0.451754629611969, + "learning_rate": 4.99730505573527e-06, + "loss": 0.6269, + "step": 1070 + }, + { + "epoch": 0.09777250319517984, + "grad_norm": 0.43901732563972473, + "learning_rate": 4.99729949728604e-06, + "loss": 0.638, + "step": 1071 + }, + { + "epoch": 0.0978637940478364, + "grad_norm": 0.45784977078437805, + "learning_rate": 4.9972939331135315e-06, + "loss": 0.6518, + "step": 1072 + }, + { + "epoch": 0.09795508490049297, + "grad_norm": 0.44674718379974365, + "learning_rate": 4.997288363217756e-06, + "loss": 0.6266, + "step": 1073 + }, + { + "epoch": 0.09804637575314953, + "grad_norm": 0.43315035104751587, + "learning_rate": 4.997282787598726e-06, + "loss": 0.6233, + "step": 1074 + }, + { + "epoch": 0.0981376666058061, + "grad_norm": 0.4514172673225403, + "learning_rate": 4.997277206256455e-06, + "loss": 0.6501, + "step": 1075 + }, + { + "epoch": 0.09822895745846266, + "grad_norm": 0.4758932292461395, + "learning_rate": 4.9972716191909535e-06, + "loss": 0.6384, + "step": 1076 + }, + { + "epoch": 0.09832024831111923, + "grad_norm": 0.5041109919548035, + "learning_rate": 4.997266026402238e-06, + "loss": 0.6294, + "step": 1077 + }, + { + "epoch": 0.09841153916377579, + "grad_norm": 0.4412895441055298, + "learning_rate": 4.997260427890318e-06, + "loss": 0.6441, + "step": 1078 + }, + { + "epoch": 0.09850283001643236, + "grad_norm": 0.4654560089111328, + "learning_rate": 4.9972548236552085e-06, + "loss": 0.6161, + "step": 1079 + }, + { + "epoch": 0.09859412086908892, + "grad_norm": 0.42210257053375244, + "learning_rate": 4.997249213696922e-06, + "loss": 0.6809, + "step": 1080 + }, + { + "epoch": 0.09868541172174548, + "grad_norm": 0.4744971692562103, + "learning_rate": 4.9972435980154695e-06, + "loss": 0.6302, + "step": 1081 + }, + { + "epoch": 0.09877670257440205, + "grad_norm": 0.44008681178092957, + "learning_rate": 4.997237976610867e-06, + "loss": 0.6207, + "step": 1082 + }, + { + "epoch": 0.09886799342705861, + "grad_norm": 0.4577513635158539, + "learning_rate": 4.9972323494831244e-06, + "loss": 0.6173, + "step": 1083 + }, + { + "epoch": 0.09895928427971518, + "grad_norm": 0.43270155787467957, + "learning_rate": 4.997226716632256e-06, + "loss": 0.6717, + "step": 1084 + }, + { + "epoch": 0.09905057513237174, + "grad_norm": 0.47406986355781555, + "learning_rate": 4.997221078058276e-06, + "loss": 0.635, + "step": 1085 + }, + { + "epoch": 0.0991418659850283, + "grad_norm": 0.42293867468833923, + "learning_rate": 4.997215433761194e-06, + "loss": 0.6737, + "step": 1086 + }, + { + "epoch": 0.09923315683768487, + "grad_norm": 0.4642094075679779, + "learning_rate": 4.997209783741026e-06, + "loss": 0.6271, + "step": 1087 + }, + { + "epoch": 0.09932444769034143, + "grad_norm": 0.4454536437988281, + "learning_rate": 4.997204127997784e-06, + "loss": 0.6647, + "step": 1088 + }, + { + "epoch": 0.099415738542998, + "grad_norm": 0.4251849949359894, + "learning_rate": 4.99719846653148e-06, + "loss": 0.6719, + "step": 1089 + }, + { + "epoch": 0.09950702939565456, + "grad_norm": 0.4643940031528473, + "learning_rate": 4.9971927993421285e-06, + "loss": 0.614, + "step": 1090 + }, + { + "epoch": 0.09959832024831113, + "grad_norm": 0.46607494354248047, + "learning_rate": 4.997187126429741e-06, + "loss": 0.6323, + "step": 1091 + }, + { + "epoch": 0.09968961110096769, + "grad_norm": 0.4573415517807007, + "learning_rate": 4.997181447794332e-06, + "loss": 0.6489, + "step": 1092 + }, + { + "epoch": 0.09978090195362424, + "grad_norm": 0.4255734086036682, + "learning_rate": 4.997175763435913e-06, + "loss": 0.605, + "step": 1093 + }, + { + "epoch": 0.0998721928062808, + "grad_norm": 0.4773276150226593, + "learning_rate": 4.997170073354498e-06, + "loss": 0.6299, + "step": 1094 + }, + { + "epoch": 0.09996348365893737, + "grad_norm": 0.4253195822238922, + "learning_rate": 4.9971643775501e-06, + "loss": 0.6373, + "step": 1095 + }, + { + "epoch": 0.10005477451159393, + "grad_norm": 0.4320557415485382, + "learning_rate": 4.997158676022732e-06, + "loss": 0.6618, + "step": 1096 + }, + { + "epoch": 0.1001460653642505, + "grad_norm": 0.442116379737854, + "learning_rate": 4.997152968772407e-06, + "loss": 0.6313, + "step": 1097 + }, + { + "epoch": 0.10023735621690706, + "grad_norm": 0.47256800532341003, + "learning_rate": 4.9971472557991374e-06, + "loss": 0.6085, + "step": 1098 + }, + { + "epoch": 0.10032864706956363, + "grad_norm": 0.4650265574455261, + "learning_rate": 4.9971415371029376e-06, + "loss": 0.6424, + "step": 1099 + }, + { + "epoch": 0.10041993792222019, + "grad_norm": 0.42933663725852966, + "learning_rate": 4.99713581268382e-06, + "loss": 0.6534, + "step": 1100 + }, + { + "epoch": 0.10051122877487675, + "grad_norm": 0.4331287443637848, + "learning_rate": 4.997130082541797e-06, + "loss": 0.6562, + "step": 1101 + }, + { + "epoch": 0.10060251962753332, + "grad_norm": 0.437282532453537, + "learning_rate": 4.997124346676883e-06, + "loss": 0.6498, + "step": 1102 + }, + { + "epoch": 0.10069381048018988, + "grad_norm": 0.4289567768573761, + "learning_rate": 4.9971186050890905e-06, + "loss": 0.6338, + "step": 1103 + }, + { + "epoch": 0.10078510133284645, + "grad_norm": 0.4455282688140869, + "learning_rate": 4.997112857778432e-06, + "loss": 0.6554, + "step": 1104 + }, + { + "epoch": 0.10087639218550301, + "grad_norm": 0.4179583489894867, + "learning_rate": 4.997107104744923e-06, + "loss": 0.6765, + "step": 1105 + }, + { + "epoch": 0.10096768303815958, + "grad_norm": 0.45267540216445923, + "learning_rate": 4.997101345988574e-06, + "loss": 0.697, + "step": 1106 + }, + { + "epoch": 0.10105897389081614, + "grad_norm": 0.45052504539489746, + "learning_rate": 4.9970955815094e-06, + "loss": 0.6569, + "step": 1107 + }, + { + "epoch": 0.1011502647434727, + "grad_norm": 0.451447993516922, + "learning_rate": 4.997089811307413e-06, + "loss": 0.6267, + "step": 1108 + }, + { + "epoch": 0.10124155559612927, + "grad_norm": 0.47330477833747864, + "learning_rate": 4.9970840353826266e-06, + "loss": 0.6311, + "step": 1109 + }, + { + "epoch": 0.10133284644878583, + "grad_norm": 0.4733980596065521, + "learning_rate": 4.997078253735055e-06, + "loss": 0.6534, + "step": 1110 + }, + { + "epoch": 0.1014241373014424, + "grad_norm": 0.4350626468658447, + "learning_rate": 4.997072466364709e-06, + "loss": 0.6346, + "step": 1111 + }, + { + "epoch": 0.10151542815409896, + "grad_norm": 0.46513721346855164, + "learning_rate": 4.997066673271606e-06, + "loss": 0.6024, + "step": 1112 + }, + { + "epoch": 0.10160671900675552, + "grad_norm": 0.4610224962234497, + "learning_rate": 4.997060874455755e-06, + "loss": 0.633, + "step": 1113 + }, + { + "epoch": 0.10169800985941209, + "grad_norm": 0.4401266276836395, + "learning_rate": 4.997055069917172e-06, + "loss": 0.6414, + "step": 1114 + }, + { + "epoch": 0.10178930071206865, + "grad_norm": 0.43819165229797363, + "learning_rate": 4.9970492596558685e-06, + "loss": 0.6669, + "step": 1115 + }, + { + "epoch": 0.10188059156472522, + "grad_norm": 0.45688435435295105, + "learning_rate": 4.997043443671859e-06, + "loss": 0.6691, + "step": 1116 + }, + { + "epoch": 0.10197188241738178, + "grad_norm": 0.4480249881744385, + "learning_rate": 4.9970376219651565e-06, + "loss": 0.6602, + "step": 1117 + }, + { + "epoch": 0.10206317327003835, + "grad_norm": 0.4141787886619568, + "learning_rate": 4.997031794535775e-06, + "loss": 0.641, + "step": 1118 + }, + { + "epoch": 0.10215446412269491, + "grad_norm": 0.4238651990890503, + "learning_rate": 4.9970259613837265e-06, + "loss": 0.6596, + "step": 1119 + }, + { + "epoch": 0.10224575497535147, + "grad_norm": 0.5333155393600464, + "learning_rate": 4.997020122509025e-06, + "loss": 0.6474, + "step": 1120 + }, + { + "epoch": 0.10233704582800804, + "grad_norm": 0.43095624446868896, + "learning_rate": 4.997014277911685e-06, + "loss": 0.6684, + "step": 1121 + }, + { + "epoch": 0.1024283366806646, + "grad_norm": 0.4556123912334442, + "learning_rate": 4.997008427591718e-06, + "loss": 0.6554, + "step": 1122 + }, + { + "epoch": 0.10251962753332117, + "grad_norm": 0.45624780654907227, + "learning_rate": 4.997002571549138e-06, + "loss": 0.6402, + "step": 1123 + }, + { + "epoch": 0.10261091838597773, + "grad_norm": 0.4504489004611969, + "learning_rate": 4.996996709783959e-06, + "loss": 0.6497, + "step": 1124 + }, + { + "epoch": 0.1027022092386343, + "grad_norm": 0.4470590054988861, + "learning_rate": 4.996990842296194e-06, + "loss": 0.6585, + "step": 1125 + }, + { + "epoch": 0.10279350009129085, + "grad_norm": 0.40157145261764526, + "learning_rate": 4.9969849690858576e-06, + "loss": 0.6756, + "step": 1126 + }, + { + "epoch": 0.10288479094394741, + "grad_norm": 0.4020521640777588, + "learning_rate": 4.996979090152961e-06, + "loss": 0.6526, + "step": 1127 + }, + { + "epoch": 0.10297608179660397, + "grad_norm": 0.440324068069458, + "learning_rate": 4.99697320549752e-06, + "loss": 0.649, + "step": 1128 + }, + { + "epoch": 0.10306737264926054, + "grad_norm": 0.4351504445075989, + "learning_rate": 4.996967315119547e-06, + "loss": 0.6752, + "step": 1129 + }, + { + "epoch": 0.1031586635019171, + "grad_norm": 0.44459569454193115, + "learning_rate": 4.996961419019054e-06, + "loss": 0.6094, + "step": 1130 + }, + { + "epoch": 0.10324995435457367, + "grad_norm": 0.4599197804927826, + "learning_rate": 4.996955517196057e-06, + "loss": 0.6383, + "step": 1131 + }, + { + "epoch": 0.10334124520723023, + "grad_norm": 0.43437498807907104, + "learning_rate": 4.996949609650568e-06, + "loss": 0.6463, + "step": 1132 + }, + { + "epoch": 0.1034325360598868, + "grad_norm": 0.4468528628349304, + "learning_rate": 4.996943696382602e-06, + "loss": 0.661, + "step": 1133 + }, + { + "epoch": 0.10352382691254336, + "grad_norm": 0.45088624954223633, + "learning_rate": 4.996937777392171e-06, + "loss": 0.6693, + "step": 1134 + }, + { + "epoch": 0.10361511776519992, + "grad_norm": 0.46902328729629517, + "learning_rate": 4.9969318526792885e-06, + "loss": 0.6191, + "step": 1135 + }, + { + "epoch": 0.10370640861785649, + "grad_norm": 0.4586443603038788, + "learning_rate": 4.99692592224397e-06, + "loss": 0.6207, + "step": 1136 + }, + { + "epoch": 0.10379769947051305, + "grad_norm": 0.44109848141670227, + "learning_rate": 4.996919986086227e-06, + "loss": 0.6476, + "step": 1137 + }, + { + "epoch": 0.10388899032316962, + "grad_norm": 0.4759523570537567, + "learning_rate": 4.996914044206075e-06, + "loss": 0.6416, + "step": 1138 + }, + { + "epoch": 0.10398028117582618, + "grad_norm": 0.4481741786003113, + "learning_rate": 4.996908096603526e-06, + "loss": 0.6383, + "step": 1139 + }, + { + "epoch": 0.10407157202848275, + "grad_norm": 0.45147159695625305, + "learning_rate": 4.9969021432785944e-06, + "loss": 0.6198, + "step": 1140 + }, + { + "epoch": 0.10416286288113931, + "grad_norm": 0.4825770854949951, + "learning_rate": 4.9968961842312935e-06, + "loss": 0.634, + "step": 1141 + }, + { + "epoch": 0.10425415373379587, + "grad_norm": 0.44942450523376465, + "learning_rate": 4.996890219461636e-06, + "loss": 0.6426, + "step": 1142 + }, + { + "epoch": 0.10434544458645244, + "grad_norm": 0.4545472264289856, + "learning_rate": 4.9968842489696384e-06, + "loss": 0.6294, + "step": 1143 + }, + { + "epoch": 0.104436735439109, + "grad_norm": 0.4769029915332794, + "learning_rate": 4.996878272755312e-06, + "loss": 0.6488, + "step": 1144 + }, + { + "epoch": 0.10452802629176557, + "grad_norm": 0.4907781481742859, + "learning_rate": 4.996872290818671e-06, + "loss": 0.6107, + "step": 1145 + }, + { + "epoch": 0.10461931714442213, + "grad_norm": 0.48322373628616333, + "learning_rate": 4.99686630315973e-06, + "loss": 0.6299, + "step": 1146 + }, + { + "epoch": 0.1047106079970787, + "grad_norm": 0.4439358711242676, + "learning_rate": 4.996860309778501e-06, + "loss": 0.6336, + "step": 1147 + }, + { + "epoch": 0.10480189884973526, + "grad_norm": 0.45824721455574036, + "learning_rate": 4.996854310674999e-06, + "loss": 0.6625, + "step": 1148 + }, + { + "epoch": 0.10489318970239182, + "grad_norm": 0.43058425188064575, + "learning_rate": 4.996848305849238e-06, + "loss": 0.6198, + "step": 1149 + }, + { + "epoch": 0.10498448055504839, + "grad_norm": 0.44466936588287354, + "learning_rate": 4.996842295301231e-06, + "loss": 0.6391, + "step": 1150 + }, + { + "epoch": 0.10507577140770495, + "grad_norm": 0.43906116485595703, + "learning_rate": 4.996836279030991e-06, + "loss": 0.6798, + "step": 1151 + }, + { + "epoch": 0.10516706226036152, + "grad_norm": 0.4516656994819641, + "learning_rate": 4.996830257038534e-06, + "loss": 0.5938, + "step": 1152 + }, + { + "epoch": 0.10525835311301808, + "grad_norm": 0.44862616062164307, + "learning_rate": 4.996824229323872e-06, + "loss": 0.6415, + "step": 1153 + }, + { + "epoch": 0.10534964396567464, + "grad_norm": 0.46018460392951965, + "learning_rate": 4.99681819588702e-06, + "loss": 0.6692, + "step": 1154 + }, + { + "epoch": 0.10544093481833121, + "grad_norm": 0.44220468401908875, + "learning_rate": 4.996812156727991e-06, + "loss": 0.6356, + "step": 1155 + }, + { + "epoch": 0.10553222567098777, + "grad_norm": 0.42811161279678345, + "learning_rate": 4.996806111846799e-06, + "loss": 0.6566, + "step": 1156 + }, + { + "epoch": 0.10562351652364434, + "grad_norm": 0.4557129144668579, + "learning_rate": 4.996800061243458e-06, + "loss": 0.6169, + "step": 1157 + }, + { + "epoch": 0.1057148073763009, + "grad_norm": 0.45726296305656433, + "learning_rate": 4.996794004917982e-06, + "loss": 0.6124, + "step": 1158 + }, + { + "epoch": 0.10580609822895747, + "grad_norm": 0.45647329092025757, + "learning_rate": 4.996787942870384e-06, + "loss": 0.632, + "step": 1159 + }, + { + "epoch": 0.10589738908161402, + "grad_norm": 0.45262232422828674, + "learning_rate": 4.996781875100679e-06, + "loss": 0.6753, + "step": 1160 + }, + { + "epoch": 0.10598867993427058, + "grad_norm": 0.49565088748931885, + "learning_rate": 4.996775801608881e-06, + "loss": 0.6309, + "step": 1161 + }, + { + "epoch": 0.10607997078692714, + "grad_norm": 0.44270533323287964, + "learning_rate": 4.996769722395003e-06, + "loss": 0.6331, + "step": 1162 + }, + { + "epoch": 0.10617126163958371, + "grad_norm": 0.45408499240875244, + "learning_rate": 4.996763637459059e-06, + "loss": 0.6441, + "step": 1163 + }, + { + "epoch": 0.10626255249224027, + "grad_norm": 0.446478933095932, + "learning_rate": 4.996757546801063e-06, + "loss": 0.6505, + "step": 1164 + }, + { + "epoch": 0.10635384334489684, + "grad_norm": 0.4454565644264221, + "learning_rate": 4.9967514504210305e-06, + "loss": 0.6518, + "step": 1165 + }, + { + "epoch": 0.1064451341975534, + "grad_norm": 0.47817757725715637, + "learning_rate": 4.996745348318973e-06, + "loss": 0.6237, + "step": 1166 + }, + { + "epoch": 0.10653642505020997, + "grad_norm": 0.44455960392951965, + "learning_rate": 4.996739240494907e-06, + "loss": 0.6021, + "step": 1167 + }, + { + "epoch": 0.10662771590286653, + "grad_norm": 0.4611789286136627, + "learning_rate": 4.996733126948844e-06, + "loss": 0.6333, + "step": 1168 + }, + { + "epoch": 0.1067190067555231, + "grad_norm": 0.4505334198474884, + "learning_rate": 4.9967270076807995e-06, + "loss": 0.6267, + "step": 1169 + }, + { + "epoch": 0.10681029760817966, + "grad_norm": 0.4290732145309448, + "learning_rate": 4.996720882690787e-06, + "loss": 0.6527, + "step": 1170 + }, + { + "epoch": 0.10690158846083622, + "grad_norm": 0.4567128121852875, + "learning_rate": 4.996714751978821e-06, + "loss": 0.5847, + "step": 1171 + }, + { + "epoch": 0.10699287931349279, + "grad_norm": 0.4365686774253845, + "learning_rate": 4.996708615544915e-06, + "loss": 0.6331, + "step": 1172 + }, + { + "epoch": 0.10708417016614935, + "grad_norm": 0.473114937543869, + "learning_rate": 4.9967024733890845e-06, + "loss": 0.6115, + "step": 1173 + }, + { + "epoch": 0.10717546101880591, + "grad_norm": 0.4579983949661255, + "learning_rate": 4.996696325511341e-06, + "loss": 0.6124, + "step": 1174 + }, + { + "epoch": 0.10726675187146248, + "grad_norm": 0.4544917047023773, + "learning_rate": 4.996690171911701e-06, + "loss": 0.6447, + "step": 1175 + }, + { + "epoch": 0.10735804272411904, + "grad_norm": 0.45635339617729187, + "learning_rate": 4.996684012590177e-06, + "loss": 0.6485, + "step": 1176 + }, + { + "epoch": 0.10744933357677561, + "grad_norm": 0.46421265602111816, + "learning_rate": 4.996677847546785e-06, + "loss": 0.6568, + "step": 1177 + }, + { + "epoch": 0.10754062442943217, + "grad_norm": 0.4741896688938141, + "learning_rate": 4.996671676781537e-06, + "loss": 0.6534, + "step": 1178 + }, + { + "epoch": 0.10763191528208874, + "grad_norm": 0.43967723846435547, + "learning_rate": 4.996665500294448e-06, + "loss": 0.6686, + "step": 1179 + }, + { + "epoch": 0.1077232061347453, + "grad_norm": 0.5271258354187012, + "learning_rate": 4.9966593180855325e-06, + "loss": 0.6635, + "step": 1180 + }, + { + "epoch": 0.10781449698740186, + "grad_norm": 0.4722614884376526, + "learning_rate": 4.996653130154804e-06, + "loss": 0.6525, + "step": 1181 + }, + { + "epoch": 0.10790578784005843, + "grad_norm": 0.47856783866882324, + "learning_rate": 4.9966469365022776e-06, + "loss": 0.6126, + "step": 1182 + }, + { + "epoch": 0.10799707869271499, + "grad_norm": 0.4670123755931854, + "learning_rate": 4.996640737127966e-06, + "loss": 0.6412, + "step": 1183 + }, + { + "epoch": 0.10808836954537156, + "grad_norm": 0.46869194507598877, + "learning_rate": 4.996634532031885e-06, + "loss": 0.5934, + "step": 1184 + }, + { + "epoch": 0.10817966039802812, + "grad_norm": 0.45180705189704895, + "learning_rate": 4.996628321214047e-06, + "loss": 0.6549, + "step": 1185 + }, + { + "epoch": 0.10827095125068469, + "grad_norm": 0.46321171522140503, + "learning_rate": 4.996622104674469e-06, + "loss": 0.651, + "step": 1186 + }, + { + "epoch": 0.10836224210334125, + "grad_norm": 0.4389713406562805, + "learning_rate": 4.9966158824131636e-06, + "loss": 0.6642, + "step": 1187 + }, + { + "epoch": 0.10845353295599781, + "grad_norm": 0.42291343212127686, + "learning_rate": 4.996609654430145e-06, + "loss": 0.641, + "step": 1188 + }, + { + "epoch": 0.10854482380865438, + "grad_norm": 0.478657066822052, + "learning_rate": 4.996603420725427e-06, + "loss": 0.6327, + "step": 1189 + }, + { + "epoch": 0.10863611466131094, + "grad_norm": 0.43900206685066223, + "learning_rate": 4.996597181299024e-06, + "loss": 0.6409, + "step": 1190 + }, + { + "epoch": 0.1087274055139675, + "grad_norm": 0.4177209138870239, + "learning_rate": 4.996590936150951e-06, + "loss": 0.6333, + "step": 1191 + }, + { + "epoch": 0.10881869636662407, + "grad_norm": 0.4718683958053589, + "learning_rate": 4.9965846852812225e-06, + "loss": 0.6686, + "step": 1192 + }, + { + "epoch": 0.10890998721928062, + "grad_norm": 0.4351769685745239, + "learning_rate": 4.996578428689853e-06, + "loss": 0.6488, + "step": 1193 + }, + { + "epoch": 0.10900127807193719, + "grad_norm": 0.44065767526626587, + "learning_rate": 4.996572166376855e-06, + "loss": 0.6159, + "step": 1194 + }, + { + "epoch": 0.10909256892459375, + "grad_norm": 0.501036524772644, + "learning_rate": 4.996565898342245e-06, + "loss": 0.5731, + "step": 1195 + }, + { + "epoch": 0.10918385977725031, + "grad_norm": 0.4415234625339508, + "learning_rate": 4.996559624586036e-06, + "loss": 0.6436, + "step": 1196 + }, + { + "epoch": 0.10927515062990688, + "grad_norm": 0.4423903822898865, + "learning_rate": 4.9965533451082426e-06, + "loss": 0.6662, + "step": 1197 + }, + { + "epoch": 0.10936644148256344, + "grad_norm": 0.4755546748638153, + "learning_rate": 4.996547059908881e-06, + "loss": 0.6203, + "step": 1198 + }, + { + "epoch": 0.10945773233522, + "grad_norm": 0.4385714530944824, + "learning_rate": 4.996540768987962e-06, + "loss": 0.6219, + "step": 1199 + }, + { + "epoch": 0.10954902318787657, + "grad_norm": 0.4663873314857483, + "learning_rate": 4.996534472345503e-06, + "loss": 0.6722, + "step": 1200 + }, + { + "epoch": 0.10964031404053314, + "grad_norm": 0.46957528591156006, + "learning_rate": 4.996528169981517e-06, + "loss": 0.6337, + "step": 1201 + }, + { + "epoch": 0.1097316048931897, + "grad_norm": 0.42349153757095337, + "learning_rate": 4.99652186189602e-06, + "loss": 0.655, + "step": 1202 + }, + { + "epoch": 0.10982289574584626, + "grad_norm": 0.4204639494419098, + "learning_rate": 4.9965155480890244e-06, + "loss": 0.6257, + "step": 1203 + }, + { + "epoch": 0.10991418659850283, + "grad_norm": 0.4338964819908142, + "learning_rate": 4.996509228560546e-06, + "loss": 0.6354, + "step": 1204 + }, + { + "epoch": 0.11000547745115939, + "grad_norm": 0.44064879417419434, + "learning_rate": 4.996502903310598e-06, + "loss": 0.6633, + "step": 1205 + }, + { + "epoch": 0.11009676830381596, + "grad_norm": 0.4661403000354767, + "learning_rate": 4.996496572339197e-06, + "loss": 0.665, + "step": 1206 + }, + { + "epoch": 0.11018805915647252, + "grad_norm": 0.4352363049983978, + "learning_rate": 4.996490235646356e-06, + "loss": 0.6374, + "step": 1207 + }, + { + "epoch": 0.11027935000912908, + "grad_norm": 0.4885610044002533, + "learning_rate": 4.996483893232091e-06, + "loss": 0.6304, + "step": 1208 + }, + { + "epoch": 0.11037064086178565, + "grad_norm": 0.46931302547454834, + "learning_rate": 4.996477545096413e-06, + "loss": 0.6626, + "step": 1209 + }, + { + "epoch": 0.11046193171444221, + "grad_norm": 0.4622620642185211, + "learning_rate": 4.996471191239341e-06, + "loss": 0.6105, + "step": 1210 + }, + { + "epoch": 0.11055322256709878, + "grad_norm": 0.42573145031929016, + "learning_rate": 4.996464831660886e-06, + "loss": 0.641, + "step": 1211 + }, + { + "epoch": 0.11064451341975534, + "grad_norm": 0.4593372642993927, + "learning_rate": 4.996458466361065e-06, + "loss": 0.6466, + "step": 1212 + }, + { + "epoch": 0.1107358042724119, + "grad_norm": 0.44212350249290466, + "learning_rate": 4.996452095339891e-06, + "loss": 0.6409, + "step": 1213 + }, + { + "epoch": 0.11082709512506847, + "grad_norm": 0.41927066445350647, + "learning_rate": 4.99644571859738e-06, + "loss": 0.6511, + "step": 1214 + }, + { + "epoch": 0.11091838597772503, + "grad_norm": 0.44647592306137085, + "learning_rate": 4.9964393361335454e-06, + "loss": 0.6388, + "step": 1215 + }, + { + "epoch": 0.1110096768303816, + "grad_norm": 0.46828213334083557, + "learning_rate": 4.996432947948403e-06, + "loss": 0.6124, + "step": 1216 + }, + { + "epoch": 0.11110096768303816, + "grad_norm": 0.4487086236476898, + "learning_rate": 4.996426554041966e-06, + "loss": 0.6495, + "step": 1217 + }, + { + "epoch": 0.11119225853569473, + "grad_norm": 0.45787015557289124, + "learning_rate": 4.99642015441425e-06, + "loss": 0.6546, + "step": 1218 + }, + { + "epoch": 0.11128354938835129, + "grad_norm": 0.41451215744018555, + "learning_rate": 4.99641374906527e-06, + "loss": 0.5686, + "step": 1219 + }, + { + "epoch": 0.11137484024100786, + "grad_norm": 0.4532049894332886, + "learning_rate": 4.996407337995039e-06, + "loss": 0.6225, + "step": 1220 + }, + { + "epoch": 0.11146613109366442, + "grad_norm": 0.48252058029174805, + "learning_rate": 4.996400921203574e-06, + "loss": 0.6686, + "step": 1221 + }, + { + "epoch": 0.11155742194632098, + "grad_norm": 0.4183346629142761, + "learning_rate": 4.996394498690888e-06, + "loss": 0.6745, + "step": 1222 + }, + { + "epoch": 0.11164871279897755, + "grad_norm": 0.4438028931617737, + "learning_rate": 4.996388070456996e-06, + "loss": 0.6291, + "step": 1223 + }, + { + "epoch": 0.11174000365163411, + "grad_norm": 0.4103265702724457, + "learning_rate": 4.996381636501914e-06, + "loss": 0.6526, + "step": 1224 + }, + { + "epoch": 0.11183129450429068, + "grad_norm": 0.4301983118057251, + "learning_rate": 4.996375196825655e-06, + "loss": 0.604, + "step": 1225 + }, + { + "epoch": 0.11192258535694724, + "grad_norm": 0.4521062970161438, + "learning_rate": 4.996368751428234e-06, + "loss": 0.6348, + "step": 1226 + }, + { + "epoch": 0.11201387620960379, + "grad_norm": 0.4511467516422272, + "learning_rate": 4.996362300309667e-06, + "loss": 0.6011, + "step": 1227 + }, + { + "epoch": 0.11210516706226036, + "grad_norm": 0.451175719499588, + "learning_rate": 4.996355843469968e-06, + "loss": 0.6509, + "step": 1228 + }, + { + "epoch": 0.11219645791491692, + "grad_norm": 0.4653257727622986, + "learning_rate": 4.996349380909152e-06, + "loss": 0.5882, + "step": 1229 + }, + { + "epoch": 0.11228774876757348, + "grad_norm": 0.47660914063453674, + "learning_rate": 4.9963429126272336e-06, + "loss": 0.6918, + "step": 1230 + }, + { + "epoch": 0.11237903962023005, + "grad_norm": 0.4506685733795166, + "learning_rate": 4.996336438624228e-06, + "loss": 0.6383, + "step": 1231 + }, + { + "epoch": 0.11247033047288661, + "grad_norm": 0.4430789351463318, + "learning_rate": 4.996329958900148e-06, + "loss": 0.6452, + "step": 1232 + }, + { + "epoch": 0.11256162132554318, + "grad_norm": 0.45401304960250854, + "learning_rate": 4.996323473455012e-06, + "loss": 0.6588, + "step": 1233 + }, + { + "epoch": 0.11265291217819974, + "grad_norm": 0.4453972578048706, + "learning_rate": 4.996316982288832e-06, + "loss": 0.6797, + "step": 1234 + }, + { + "epoch": 0.1127442030308563, + "grad_norm": 0.4492860734462738, + "learning_rate": 4.9963104854016255e-06, + "loss": 0.6586, + "step": 1235 + }, + { + "epoch": 0.11283549388351287, + "grad_norm": 0.4379408061504364, + "learning_rate": 4.996303982793404e-06, + "loss": 0.6292, + "step": 1236 + }, + { + "epoch": 0.11292678473616943, + "grad_norm": 0.4332534670829773, + "learning_rate": 4.996297474464185e-06, + "loss": 0.6415, + "step": 1237 + }, + { + "epoch": 0.113018075588826, + "grad_norm": 0.437433660030365, + "learning_rate": 4.9962909604139824e-06, + "loss": 0.6533, + "step": 1238 + }, + { + "epoch": 0.11310936644148256, + "grad_norm": 0.45754992961883545, + "learning_rate": 4.996284440642812e-06, + "loss": 0.627, + "step": 1239 + }, + { + "epoch": 0.11320065729413913, + "grad_norm": 0.4635239839553833, + "learning_rate": 4.9962779151506875e-06, + "loss": 0.6222, + "step": 1240 + }, + { + "epoch": 0.11329194814679569, + "grad_norm": 0.4691292345523834, + "learning_rate": 4.996271383937624e-06, + "loss": 0.6931, + "step": 1241 + }, + { + "epoch": 0.11338323899945225, + "grad_norm": 0.4445421099662781, + "learning_rate": 4.996264847003638e-06, + "loss": 0.6683, + "step": 1242 + }, + { + "epoch": 0.11347452985210882, + "grad_norm": 0.4589344263076782, + "learning_rate": 4.996258304348743e-06, + "loss": 0.6169, + "step": 1243 + }, + { + "epoch": 0.11356582070476538, + "grad_norm": 0.4474531412124634, + "learning_rate": 4.996251755972954e-06, + "loss": 0.6518, + "step": 1244 + }, + { + "epoch": 0.11365711155742195, + "grad_norm": 0.477422833442688, + "learning_rate": 4.996245201876287e-06, + "loss": 0.6337, + "step": 1245 + }, + { + "epoch": 0.11374840241007851, + "grad_norm": 0.4315752387046814, + "learning_rate": 4.996238642058756e-06, + "loss": 0.6201, + "step": 1246 + }, + { + "epoch": 0.11383969326273508, + "grad_norm": 0.4368182122707367, + "learning_rate": 4.996232076520377e-06, + "loss": 0.596, + "step": 1247 + }, + { + "epoch": 0.11393098411539164, + "grad_norm": 0.43781840801239014, + "learning_rate": 4.996225505261163e-06, + "loss": 0.6484, + "step": 1248 + }, + { + "epoch": 0.1140222749680482, + "grad_norm": 0.46017786860466003, + "learning_rate": 4.9962189282811325e-06, + "loss": 0.6404, + "step": 1249 + }, + { + "epoch": 0.11411356582070477, + "grad_norm": 0.4473588764667511, + "learning_rate": 4.996212345580297e-06, + "loss": 0.6384, + "step": 1250 + }, + { + "epoch": 0.11420485667336133, + "grad_norm": 0.45403844118118286, + "learning_rate": 4.996205757158674e-06, + "loss": 0.6267, + "step": 1251 + }, + { + "epoch": 0.1142961475260179, + "grad_norm": 0.4340182840824127, + "learning_rate": 4.9961991630162784e-06, + "loss": 0.6468, + "step": 1252 + }, + { + "epoch": 0.11438743837867446, + "grad_norm": 0.44590866565704346, + "learning_rate": 4.996192563153124e-06, + "loss": 0.6173, + "step": 1253 + }, + { + "epoch": 0.11447872923133103, + "grad_norm": 0.4565846025943756, + "learning_rate": 4.996185957569228e-06, + "loss": 0.6565, + "step": 1254 + }, + { + "epoch": 0.11457002008398759, + "grad_norm": 0.45077380537986755, + "learning_rate": 4.996179346264602e-06, + "loss": 0.6389, + "step": 1255 + }, + { + "epoch": 0.11466131093664415, + "grad_norm": 0.4571005702018738, + "learning_rate": 4.996172729239265e-06, + "loss": 0.6291, + "step": 1256 + }, + { + "epoch": 0.11475260178930072, + "grad_norm": 0.43926626443862915, + "learning_rate": 4.996166106493229e-06, + "loss": 0.6474, + "step": 1257 + }, + { + "epoch": 0.11484389264195728, + "grad_norm": 0.4415569007396698, + "learning_rate": 4.9961594780265124e-06, + "loss": 0.5796, + "step": 1258 + }, + { + "epoch": 0.11493518349461385, + "grad_norm": 0.4391997158527374, + "learning_rate": 4.9961528438391274e-06, + "loss": 0.659, + "step": 1259 + }, + { + "epoch": 0.11502647434727041, + "grad_norm": 0.4652986526489258, + "learning_rate": 4.996146203931091e-06, + "loss": 0.6233, + "step": 1260 + }, + { + "epoch": 0.11511776519992696, + "grad_norm": 0.44365301728248596, + "learning_rate": 4.996139558302417e-06, + "loss": 0.6604, + "step": 1261 + }, + { + "epoch": 0.11520905605258352, + "grad_norm": 0.4604197144508362, + "learning_rate": 4.996132906953123e-06, + "loss": 0.6537, + "step": 1262 + }, + { + "epoch": 0.11530034690524009, + "grad_norm": 0.47799497842788696, + "learning_rate": 4.996126249883223e-06, + "loss": 0.6377, + "step": 1263 + }, + { + "epoch": 0.11539163775789665, + "grad_norm": 0.49344655871391296, + "learning_rate": 4.99611958709273e-06, + "loss": 0.5983, + "step": 1264 + }, + { + "epoch": 0.11548292861055322, + "grad_norm": 0.4524383842945099, + "learning_rate": 4.996112918581663e-06, + "loss": 0.6257, + "step": 1265 + }, + { + "epoch": 0.11557421946320978, + "grad_norm": 0.4995439052581787, + "learning_rate": 4.996106244350035e-06, + "loss": 0.6411, + "step": 1266 + }, + { + "epoch": 0.11566551031586635, + "grad_norm": 0.46594730019569397, + "learning_rate": 4.996099564397862e-06, + "loss": 0.6359, + "step": 1267 + }, + { + "epoch": 0.11575680116852291, + "grad_norm": 0.48195862770080566, + "learning_rate": 4.996092878725159e-06, + "loss": 0.6125, + "step": 1268 + }, + { + "epoch": 0.11584809202117947, + "grad_norm": 0.4642961621284485, + "learning_rate": 4.996086187331942e-06, + "loss": 0.6632, + "step": 1269 + }, + { + "epoch": 0.11593938287383604, + "grad_norm": 0.43792182207107544, + "learning_rate": 4.996079490218226e-06, + "loss": 0.6506, + "step": 1270 + }, + { + "epoch": 0.1160306737264926, + "grad_norm": 0.43704909086227417, + "learning_rate": 4.996072787384025e-06, + "loss": 0.6054, + "step": 1271 + }, + { + "epoch": 0.11612196457914917, + "grad_norm": 0.46813321113586426, + "learning_rate": 4.9960660788293565e-06, + "loss": 0.6197, + "step": 1272 + }, + { + "epoch": 0.11621325543180573, + "grad_norm": 0.44456255435943604, + "learning_rate": 4.9960593645542356e-06, + "loss": 0.6287, + "step": 1273 + }, + { + "epoch": 0.1163045462844623, + "grad_norm": 0.43560177087783813, + "learning_rate": 4.996052644558675e-06, + "loss": 0.6719, + "step": 1274 + }, + { + "epoch": 0.11639583713711886, + "grad_norm": 0.4722190201282501, + "learning_rate": 4.9960459188426945e-06, + "loss": 0.6193, + "step": 1275 + }, + { + "epoch": 0.11648712798977542, + "grad_norm": 0.4692968726158142, + "learning_rate": 4.996039187406306e-06, + "loss": 0.6544, + "step": 1276 + }, + { + "epoch": 0.11657841884243199, + "grad_norm": 0.4110349714756012, + "learning_rate": 4.996032450249526e-06, + "loss": 0.671, + "step": 1277 + }, + { + "epoch": 0.11666970969508855, + "grad_norm": 0.47097259759902954, + "learning_rate": 4.9960257073723695e-06, + "loss": 0.6253, + "step": 1278 + }, + { + "epoch": 0.11676100054774512, + "grad_norm": 0.45845234394073486, + "learning_rate": 4.9960189587748535e-06, + "loss": 0.6373, + "step": 1279 + }, + { + "epoch": 0.11685229140040168, + "grad_norm": 0.4577721655368805, + "learning_rate": 4.996012204456991e-06, + "loss": 0.6575, + "step": 1280 + }, + { + "epoch": 0.11694358225305825, + "grad_norm": 0.4486275911331177, + "learning_rate": 4.9960054444188e-06, + "loss": 0.6485, + "step": 1281 + }, + { + "epoch": 0.11703487310571481, + "grad_norm": 0.43217816948890686, + "learning_rate": 4.995998678660296e-06, + "loss": 0.6407, + "step": 1282 + }, + { + "epoch": 0.11712616395837137, + "grad_norm": 0.46866944432258606, + "learning_rate": 4.995991907181491e-06, + "loss": 0.6271, + "step": 1283 + }, + { + "epoch": 0.11721745481102794, + "grad_norm": 0.5047217607498169, + "learning_rate": 4.995985129982405e-06, + "loss": 0.6089, + "step": 1284 + }, + { + "epoch": 0.1173087456636845, + "grad_norm": 0.42847394943237305, + "learning_rate": 4.995978347063049e-06, + "loss": 0.6603, + "step": 1285 + }, + { + "epoch": 0.11740003651634107, + "grad_norm": 0.4343608021736145, + "learning_rate": 4.995971558423443e-06, + "loss": 0.6614, + "step": 1286 + }, + { + "epoch": 0.11749132736899763, + "grad_norm": 0.4635472297668457, + "learning_rate": 4.995964764063601e-06, + "loss": 0.6315, + "step": 1287 + }, + { + "epoch": 0.1175826182216542, + "grad_norm": 0.48656952381134033, + "learning_rate": 4.995957963983537e-06, + "loss": 0.603, + "step": 1288 + }, + { + "epoch": 0.11767390907431076, + "grad_norm": 0.4451087415218353, + "learning_rate": 4.995951158183268e-06, + "loss": 0.6314, + "step": 1289 + }, + { + "epoch": 0.11776519992696732, + "grad_norm": 0.4428237974643707, + "learning_rate": 4.995944346662809e-06, + "loss": 0.6155, + "step": 1290 + }, + { + "epoch": 0.11785649077962389, + "grad_norm": 0.4750320613384247, + "learning_rate": 4.995937529422176e-06, + "loss": 0.6108, + "step": 1291 + }, + { + "epoch": 0.11794778163228045, + "grad_norm": 0.4721219837665558, + "learning_rate": 4.995930706461385e-06, + "loss": 0.634, + "step": 1292 + }, + { + "epoch": 0.11803907248493702, + "grad_norm": 0.4730502665042877, + "learning_rate": 4.995923877780451e-06, + "loss": 0.6645, + "step": 1293 + }, + { + "epoch": 0.11813036333759357, + "grad_norm": 0.49403759837150574, + "learning_rate": 4.9959170433793894e-06, + "loss": 0.6068, + "step": 1294 + }, + { + "epoch": 0.11822165419025013, + "grad_norm": 0.4405626952648163, + "learning_rate": 4.995910203258216e-06, + "loss": 0.6749, + "step": 1295 + }, + { + "epoch": 0.1183129450429067, + "grad_norm": 0.4696730375289917, + "learning_rate": 4.995903357416948e-06, + "loss": 0.639, + "step": 1296 + }, + { + "epoch": 0.11840423589556326, + "grad_norm": 0.46173444390296936, + "learning_rate": 4.995896505855598e-06, + "loss": 0.644, + "step": 1297 + }, + { + "epoch": 0.11849552674821982, + "grad_norm": 0.490527480840683, + "learning_rate": 4.995889648574186e-06, + "loss": 0.6243, + "step": 1298 + }, + { + "epoch": 0.11858681760087639, + "grad_norm": 0.43442538380622864, + "learning_rate": 4.9958827855727235e-06, + "loss": 0.6416, + "step": 1299 + }, + { + "epoch": 0.11867810845353295, + "grad_norm": 0.4851950705051422, + "learning_rate": 4.9958759168512285e-06, + "loss": 0.626, + "step": 1300 + }, + { + "epoch": 0.11876939930618952, + "grad_norm": 0.44052091240882874, + "learning_rate": 4.995869042409717e-06, + "loss": 0.6487, + "step": 1301 + }, + { + "epoch": 0.11886069015884608, + "grad_norm": 0.4398491382598877, + "learning_rate": 4.995862162248203e-06, + "loss": 0.6477, + "step": 1302 + }, + { + "epoch": 0.11895198101150264, + "grad_norm": 0.4966941177845001, + "learning_rate": 4.995855276366703e-06, + "loss": 0.6484, + "step": 1303 + }, + { + "epoch": 0.11904327186415921, + "grad_norm": 0.47986626625061035, + "learning_rate": 4.995848384765234e-06, + "loss": 0.6555, + "step": 1304 + }, + { + "epoch": 0.11913456271681577, + "grad_norm": 0.47397130727767944, + "learning_rate": 4.9958414874438105e-06, + "loss": 0.5991, + "step": 1305 + }, + { + "epoch": 0.11922585356947234, + "grad_norm": 0.43572473526000977, + "learning_rate": 4.995834584402449e-06, + "loss": 0.6592, + "step": 1306 + }, + { + "epoch": 0.1193171444221289, + "grad_norm": 0.45087847113609314, + "learning_rate": 4.995827675641164e-06, + "loss": 0.6508, + "step": 1307 + }, + { + "epoch": 0.11940843527478547, + "grad_norm": 0.42826607823371887, + "learning_rate": 4.995820761159973e-06, + "loss": 0.5827, + "step": 1308 + }, + { + "epoch": 0.11949972612744203, + "grad_norm": 0.4401370882987976, + "learning_rate": 4.995813840958891e-06, + "loss": 0.6288, + "step": 1309 + }, + { + "epoch": 0.1195910169800986, + "grad_norm": 0.4544541835784912, + "learning_rate": 4.995806915037934e-06, + "loss": 0.6515, + "step": 1310 + }, + { + "epoch": 0.11968230783275516, + "grad_norm": 0.46706002950668335, + "learning_rate": 4.995799983397118e-06, + "loss": 0.6324, + "step": 1311 + }, + { + "epoch": 0.11977359868541172, + "grad_norm": 0.4154023826122284, + "learning_rate": 4.995793046036459e-06, + "loss": 0.6739, + "step": 1312 + }, + { + "epoch": 0.11986488953806829, + "grad_norm": 0.4405616223812103, + "learning_rate": 4.995786102955973e-06, + "loss": 0.6204, + "step": 1313 + }, + { + "epoch": 0.11995618039072485, + "grad_norm": 0.42289406061172485, + "learning_rate": 4.9957791541556746e-06, + "loss": 0.6477, + "step": 1314 + }, + { + "epoch": 0.12004747124338142, + "grad_norm": 0.4886590242385864, + "learning_rate": 4.9957721996355815e-06, + "loss": 0.6127, + "step": 1315 + }, + { + "epoch": 0.12013876209603798, + "grad_norm": 0.4488118290901184, + "learning_rate": 4.995765239395708e-06, + "loss": 0.6644, + "step": 1316 + }, + { + "epoch": 0.12023005294869454, + "grad_norm": 0.4400750696659088, + "learning_rate": 4.995758273436071e-06, + "loss": 0.6581, + "step": 1317 + }, + { + "epoch": 0.12032134380135111, + "grad_norm": 0.4499109387397766, + "learning_rate": 4.9957513017566875e-06, + "loss": 0.638, + "step": 1318 + }, + { + "epoch": 0.12041263465400767, + "grad_norm": 0.447627991437912, + "learning_rate": 4.995744324357571e-06, + "loss": 0.6559, + "step": 1319 + }, + { + "epoch": 0.12050392550666424, + "grad_norm": 0.4518221616744995, + "learning_rate": 4.99573734123874e-06, + "loss": 0.6672, + "step": 1320 + }, + { + "epoch": 0.1205952163593208, + "grad_norm": 0.4548434317111969, + "learning_rate": 4.995730352400207e-06, + "loss": 0.6372, + "step": 1321 + }, + { + "epoch": 0.12068650721197736, + "grad_norm": 0.467818021774292, + "learning_rate": 4.9957233578419925e-06, + "loss": 0.6199, + "step": 1322 + }, + { + "epoch": 0.12077779806463393, + "grad_norm": 0.46265533566474915, + "learning_rate": 4.995716357564111e-06, + "loss": 0.6437, + "step": 1323 + }, + { + "epoch": 0.1208690889172905, + "grad_norm": 0.4571409225463867, + "learning_rate": 4.995709351566577e-06, + "loss": 0.6283, + "step": 1324 + }, + { + "epoch": 0.12096037976994706, + "grad_norm": 0.46676960587501526, + "learning_rate": 4.995702339849407e-06, + "loss": 0.6252, + "step": 1325 + }, + { + "epoch": 0.12105167062260362, + "grad_norm": 0.43975093960762024, + "learning_rate": 4.995695322412618e-06, + "loss": 0.6726, + "step": 1326 + }, + { + "epoch": 0.12114296147526019, + "grad_norm": 0.4687798321247101, + "learning_rate": 4.995688299256225e-06, + "loss": 0.6188, + "step": 1327 + }, + { + "epoch": 0.12123425232791674, + "grad_norm": 0.44347918033599854, + "learning_rate": 4.995681270380245e-06, + "loss": 0.6415, + "step": 1328 + }, + { + "epoch": 0.1213255431805733, + "grad_norm": 0.43838316202163696, + "learning_rate": 4.995674235784694e-06, + "loss": 0.6164, + "step": 1329 + }, + { + "epoch": 0.12141683403322986, + "grad_norm": 0.4457201063632965, + "learning_rate": 4.995667195469588e-06, + "loss": 0.6423, + "step": 1330 + }, + { + "epoch": 0.12150812488588643, + "grad_norm": 0.44246435165405273, + "learning_rate": 4.995660149434943e-06, + "loss": 0.6635, + "step": 1331 + }, + { + "epoch": 0.12159941573854299, + "grad_norm": 0.5041815638542175, + "learning_rate": 4.995653097680776e-06, + "loss": 0.6261, + "step": 1332 + }, + { + "epoch": 0.12169070659119956, + "grad_norm": 0.47831180691719055, + "learning_rate": 4.9956460402071015e-06, + "loss": 0.6491, + "step": 1333 + }, + { + "epoch": 0.12178199744385612, + "grad_norm": 0.4805046319961548, + "learning_rate": 4.9956389770139366e-06, + "loss": 0.6358, + "step": 1334 + }, + { + "epoch": 0.12187328829651269, + "grad_norm": 0.44685521721839905, + "learning_rate": 4.9956319081012986e-06, + "loss": 0.6266, + "step": 1335 + }, + { + "epoch": 0.12196457914916925, + "grad_norm": 0.45669665932655334, + "learning_rate": 4.995624833469201e-06, + "loss": 0.6085, + "step": 1336 + }, + { + "epoch": 0.12205587000182581, + "grad_norm": 0.4808363914489746, + "learning_rate": 4.995617753117662e-06, + "loss": 0.6237, + "step": 1337 + }, + { + "epoch": 0.12214716085448238, + "grad_norm": 0.48807045817375183, + "learning_rate": 4.995610667046698e-06, + "loss": 0.6584, + "step": 1338 + }, + { + "epoch": 0.12223845170713894, + "grad_norm": 0.4454022943973541, + "learning_rate": 4.995603575256324e-06, + "loss": 0.6585, + "step": 1339 + }, + { + "epoch": 0.1223297425597955, + "grad_norm": 0.44657742977142334, + "learning_rate": 4.995596477746557e-06, + "loss": 0.6061, + "step": 1340 + }, + { + "epoch": 0.12242103341245207, + "grad_norm": 0.5212991833686829, + "learning_rate": 4.995589374517414e-06, + "loss": 0.6247, + "step": 1341 + }, + { + "epoch": 0.12251232426510864, + "grad_norm": 0.4777282178401947, + "learning_rate": 4.995582265568909e-06, + "loss": 0.6352, + "step": 1342 + }, + { + "epoch": 0.1226036151177652, + "grad_norm": 0.4853440225124359, + "learning_rate": 4.995575150901061e-06, + "loss": 0.6262, + "step": 1343 + }, + { + "epoch": 0.12269490597042176, + "grad_norm": 0.42372721433639526, + "learning_rate": 4.995568030513885e-06, + "loss": 0.6158, + "step": 1344 + }, + { + "epoch": 0.12278619682307833, + "grad_norm": 0.45643141865730286, + "learning_rate": 4.995560904407396e-06, + "loss": 0.6535, + "step": 1345 + }, + { + "epoch": 0.12287748767573489, + "grad_norm": 0.4971558451652527, + "learning_rate": 4.995553772581613e-06, + "loss": 0.6427, + "step": 1346 + }, + { + "epoch": 0.12296877852839146, + "grad_norm": 0.4620111882686615, + "learning_rate": 4.995546635036551e-06, + "loss": 0.6273, + "step": 1347 + }, + { + "epoch": 0.12306006938104802, + "grad_norm": 0.4757828414440155, + "learning_rate": 4.9955394917722265e-06, + "loss": 0.6285, + "step": 1348 + }, + { + "epoch": 0.12315136023370458, + "grad_norm": 0.46051838994026184, + "learning_rate": 4.995532342788655e-06, + "loss": 0.6355, + "step": 1349 + }, + { + "epoch": 0.12324265108636115, + "grad_norm": 0.43986424803733826, + "learning_rate": 4.995525188085854e-06, + "loss": 0.6517, + "step": 1350 + }, + { + "epoch": 0.12333394193901771, + "grad_norm": 0.4648129343986511, + "learning_rate": 4.995518027663839e-06, + "loss": 0.6678, + "step": 1351 + }, + { + "epoch": 0.12342523279167428, + "grad_norm": 0.483275830745697, + "learning_rate": 4.995510861522628e-06, + "loss": 0.6203, + "step": 1352 + }, + { + "epoch": 0.12351652364433084, + "grad_norm": 0.46057918667793274, + "learning_rate": 4.995503689662236e-06, + "loss": 0.6375, + "step": 1353 + }, + { + "epoch": 0.1236078144969874, + "grad_norm": 0.4786418378353119, + "learning_rate": 4.99549651208268e-06, + "loss": 0.6291, + "step": 1354 + }, + { + "epoch": 0.12369910534964397, + "grad_norm": 0.5025826692581177, + "learning_rate": 4.995489328783975e-06, + "loss": 0.6035, + "step": 1355 + }, + { + "epoch": 0.12379039620230053, + "grad_norm": 0.42287477850914, + "learning_rate": 4.99548213976614e-06, + "loss": 0.6029, + "step": 1356 + }, + { + "epoch": 0.1238816870549571, + "grad_norm": 0.43932339549064636, + "learning_rate": 4.9954749450291895e-06, + "loss": 0.6439, + "step": 1357 + }, + { + "epoch": 0.12397297790761366, + "grad_norm": 0.46834081411361694, + "learning_rate": 4.995467744573141e-06, + "loss": 0.6037, + "step": 1358 + }, + { + "epoch": 0.12406426876027023, + "grad_norm": 0.4905703067779541, + "learning_rate": 4.99546053839801e-06, + "loss": 0.6595, + "step": 1359 + }, + { + "epoch": 0.12415555961292679, + "grad_norm": 0.45042479038238525, + "learning_rate": 4.995453326503815e-06, + "loss": 0.6406, + "step": 1360 + }, + { + "epoch": 0.12424685046558334, + "grad_norm": 0.4756280481815338, + "learning_rate": 4.99544610889057e-06, + "loss": 0.5964, + "step": 1361 + }, + { + "epoch": 0.1243381413182399, + "grad_norm": 0.4160858988761902, + "learning_rate": 4.995438885558294e-06, + "loss": 0.6513, + "step": 1362 + }, + { + "epoch": 0.12442943217089647, + "grad_norm": 0.45297738909721375, + "learning_rate": 4.995431656507002e-06, + "loss": 0.6465, + "step": 1363 + }, + { + "epoch": 0.12452072302355303, + "grad_norm": 0.4626235067844391, + "learning_rate": 4.99542442173671e-06, + "loss": 0.6635, + "step": 1364 + }, + { + "epoch": 0.1246120138762096, + "grad_norm": 0.47112730145454407, + "learning_rate": 4.995417181247437e-06, + "loss": 0.6447, + "step": 1365 + }, + { + "epoch": 0.12470330472886616, + "grad_norm": 0.4480900764465332, + "learning_rate": 4.995409935039197e-06, + "loss": 0.6347, + "step": 1366 + }, + { + "epoch": 0.12479459558152273, + "grad_norm": 0.45341619849205017, + "learning_rate": 4.9954026831120076e-06, + "loss": 0.6087, + "step": 1367 + }, + { + "epoch": 0.12488588643417929, + "grad_norm": 0.46243441104888916, + "learning_rate": 4.995395425465886e-06, + "loss": 0.6548, + "step": 1368 + }, + { + "epoch": 0.12497717728683586, + "grad_norm": 0.4588024318218231, + "learning_rate": 4.995388162100848e-06, + "loss": 0.6747, + "step": 1369 + }, + { + "epoch": 0.12506846813949243, + "grad_norm": 0.4913550019264221, + "learning_rate": 4.995380893016911e-06, + "loss": 0.6981, + "step": 1370 + }, + { + "epoch": 0.12515975899214898, + "grad_norm": 0.4724327623844147, + "learning_rate": 4.99537361821409e-06, + "loss": 0.5992, + "step": 1371 + }, + { + "epoch": 0.12525104984480556, + "grad_norm": 0.44811561703681946, + "learning_rate": 4.995366337692404e-06, + "loss": 0.6303, + "step": 1372 + }, + { + "epoch": 0.1253423406974621, + "grad_norm": 0.4762338697910309, + "learning_rate": 4.995359051451869e-06, + "loss": 0.6342, + "step": 1373 + }, + { + "epoch": 0.1254336315501187, + "grad_norm": 0.4540712833404541, + "learning_rate": 4.9953517594925e-06, + "loss": 0.6445, + "step": 1374 + }, + { + "epoch": 0.12552492240277524, + "grad_norm": 0.44534340500831604, + "learning_rate": 4.995344461814317e-06, + "loss": 0.6044, + "step": 1375 + }, + { + "epoch": 0.12561621325543182, + "grad_norm": 0.47937270998954773, + "learning_rate": 4.995337158417333e-06, + "loss": 0.623, + "step": 1376 + }, + { + "epoch": 0.12570750410808837, + "grad_norm": 0.4627498388290405, + "learning_rate": 4.995329849301567e-06, + "loss": 0.6253, + "step": 1377 + }, + { + "epoch": 0.12579879496074492, + "grad_norm": 0.47145044803619385, + "learning_rate": 4.9953225344670345e-06, + "loss": 0.6171, + "step": 1378 + }, + { + "epoch": 0.1258900858134015, + "grad_norm": 0.44443047046661377, + "learning_rate": 4.995315213913754e-06, + "loss": 0.6505, + "step": 1379 + }, + { + "epoch": 0.12598137666605805, + "grad_norm": 0.45069265365600586, + "learning_rate": 4.995307887641741e-06, + "loss": 0.6534, + "step": 1380 + }, + { + "epoch": 0.12607266751871463, + "grad_norm": 0.4302799701690674, + "learning_rate": 4.9953005556510124e-06, + "loss": 0.683, + "step": 1381 + }, + { + "epoch": 0.12616395837137118, + "grad_norm": 0.4608520269393921, + "learning_rate": 4.995293217941584e-06, + "loss": 0.6238, + "step": 1382 + }, + { + "epoch": 0.12625524922402775, + "grad_norm": 0.4795118570327759, + "learning_rate": 4.995285874513476e-06, + "loss": 0.6283, + "step": 1383 + }, + { + "epoch": 0.1263465400766843, + "grad_norm": 0.44409462809562683, + "learning_rate": 4.995278525366702e-06, + "loss": 0.6316, + "step": 1384 + }, + { + "epoch": 0.12643783092934088, + "grad_norm": 0.47177019715309143, + "learning_rate": 4.99527117050128e-06, + "loss": 0.5963, + "step": 1385 + }, + { + "epoch": 0.12652912178199743, + "grad_norm": 0.4398903548717499, + "learning_rate": 4.995263809917226e-06, + "loss": 0.5828, + "step": 1386 + }, + { + "epoch": 0.126620412634654, + "grad_norm": 0.4672877788543701, + "learning_rate": 4.995256443614558e-06, + "loss": 0.6115, + "step": 1387 + }, + { + "epoch": 0.12671170348731056, + "grad_norm": 0.4515328109264374, + "learning_rate": 4.995249071593293e-06, + "loss": 0.6111, + "step": 1388 + }, + { + "epoch": 0.12680299433996714, + "grad_norm": 0.4677361845970154, + "learning_rate": 4.995241693853446e-06, + "loss": 0.6261, + "step": 1389 + }, + { + "epoch": 0.1268942851926237, + "grad_norm": 0.48369842767715454, + "learning_rate": 4.995234310395037e-06, + "loss": 0.6061, + "step": 1390 + }, + { + "epoch": 0.12698557604528027, + "grad_norm": 0.45169445872306824, + "learning_rate": 4.99522692121808e-06, + "loss": 0.5745, + "step": 1391 + }, + { + "epoch": 0.12707686689793682, + "grad_norm": 0.4513642191886902, + "learning_rate": 4.995219526322593e-06, + "loss": 0.6173, + "step": 1392 + }, + { + "epoch": 0.1271681577505934, + "grad_norm": 0.43874651193618774, + "learning_rate": 4.995212125708594e-06, + "loss": 0.6354, + "step": 1393 + }, + { + "epoch": 0.12725944860324995, + "grad_norm": 0.5131449699401855, + "learning_rate": 4.995204719376099e-06, + "loss": 0.6366, + "step": 1394 + }, + { + "epoch": 0.12735073945590653, + "grad_norm": 0.5007283687591553, + "learning_rate": 4.995197307325124e-06, + "loss": 0.6154, + "step": 1395 + }, + { + "epoch": 0.12744203030856308, + "grad_norm": 0.4587138295173645, + "learning_rate": 4.995189889555687e-06, + "loss": 0.6247, + "step": 1396 + }, + { + "epoch": 0.12753332116121965, + "grad_norm": 0.4622844457626343, + "learning_rate": 4.995182466067806e-06, + "loss": 0.6405, + "step": 1397 + }, + { + "epoch": 0.1276246120138762, + "grad_norm": 0.4427952170372009, + "learning_rate": 4.995175036861496e-06, + "loss": 0.6569, + "step": 1398 + }, + { + "epoch": 0.12771590286653278, + "grad_norm": 0.45970383286476135, + "learning_rate": 4.995167601936776e-06, + "loss": 0.6502, + "step": 1399 + }, + { + "epoch": 0.12780719371918933, + "grad_norm": 0.47836002707481384, + "learning_rate": 4.99516016129366e-06, + "loss": 0.6191, + "step": 1400 + }, + { + "epoch": 0.1278984845718459, + "grad_norm": 0.458572119474411, + "learning_rate": 4.995152714932169e-06, + "loss": 0.6375, + "step": 1401 + }, + { + "epoch": 0.12798977542450246, + "grad_norm": 0.45365458726882935, + "learning_rate": 4.9951452628523175e-06, + "loss": 0.618, + "step": 1402 + }, + { + "epoch": 0.12808106627715904, + "grad_norm": 0.4435986578464508, + "learning_rate": 4.995137805054124e-06, + "loss": 0.6167, + "step": 1403 + }, + { + "epoch": 0.1281723571298156, + "grad_norm": 0.4455685615539551, + "learning_rate": 4.995130341537604e-06, + "loss": 0.6617, + "step": 1404 + }, + { + "epoch": 0.12826364798247217, + "grad_norm": 0.45301541686058044, + "learning_rate": 4.995122872302776e-06, + "loss": 0.6437, + "step": 1405 + }, + { + "epoch": 0.12835493883512872, + "grad_norm": 0.5112938284873962, + "learning_rate": 4.9951153973496566e-06, + "loss": 0.6239, + "step": 1406 + }, + { + "epoch": 0.1284462296877853, + "grad_norm": 0.4806113541126251, + "learning_rate": 4.995107916678263e-06, + "loss": 0.6535, + "step": 1407 + }, + { + "epoch": 0.12853752054044185, + "grad_norm": 0.42422348260879517, + "learning_rate": 4.995100430288611e-06, + "loss": 0.632, + "step": 1408 + }, + { + "epoch": 0.12862881139309842, + "grad_norm": 0.4344514012336731, + "learning_rate": 4.99509293818072e-06, + "loss": 0.6382, + "step": 1409 + }, + { + "epoch": 0.12872010224575497, + "grad_norm": 0.4482835829257965, + "learning_rate": 4.9950854403546065e-06, + "loss": 0.6418, + "step": 1410 + }, + { + "epoch": 0.12881139309841155, + "grad_norm": 0.46571433544158936, + "learning_rate": 4.995077936810286e-06, + "loss": 0.6144, + "step": 1411 + }, + { + "epoch": 0.1289026839510681, + "grad_norm": 0.4650091528892517, + "learning_rate": 4.995070427547779e-06, + "loss": 0.6531, + "step": 1412 + }, + { + "epoch": 0.12899397480372465, + "grad_norm": 0.4625003933906555, + "learning_rate": 4.995062912567099e-06, + "loss": 0.6286, + "step": 1413 + }, + { + "epoch": 0.12908526565638123, + "grad_norm": 0.46570831537246704, + "learning_rate": 4.995055391868266e-06, + "loss": 0.6468, + "step": 1414 + }, + { + "epoch": 0.12917655650903778, + "grad_norm": 0.44639766216278076, + "learning_rate": 4.995047865451296e-06, + "loss": 0.5964, + "step": 1415 + }, + { + "epoch": 0.12926784736169436, + "grad_norm": 0.4581958055496216, + "learning_rate": 4.995040333316206e-06, + "loss": 0.6231, + "step": 1416 + }, + { + "epoch": 0.1293591382143509, + "grad_norm": 0.47188499569892883, + "learning_rate": 4.995032795463014e-06, + "loss": 0.6254, + "step": 1417 + }, + { + "epoch": 0.1294504290670075, + "grad_norm": 0.4379798173904419, + "learning_rate": 4.9950252518917375e-06, + "loss": 0.6288, + "step": 1418 + }, + { + "epoch": 0.12954171991966404, + "grad_norm": 0.4634784460067749, + "learning_rate": 4.9950177026023926e-06, + "loss": 0.6403, + "step": 1419 + }, + { + "epoch": 0.12963301077232062, + "grad_norm": 0.49155133962631226, + "learning_rate": 4.9950101475949975e-06, + "loss": 0.5945, + "step": 1420 + }, + { + "epoch": 0.12972430162497717, + "grad_norm": 0.4266548156738281, + "learning_rate": 4.995002586869569e-06, + "loss": 0.661, + "step": 1421 + }, + { + "epoch": 0.12981559247763375, + "grad_norm": 0.42517712712287903, + "learning_rate": 4.994995020426125e-06, + "loss": 0.6647, + "step": 1422 + }, + { + "epoch": 0.1299068833302903, + "grad_norm": 0.48272860050201416, + "learning_rate": 4.994987448264683e-06, + "loss": 0.62, + "step": 1423 + }, + { + "epoch": 0.12999817418294687, + "grad_norm": 0.5075610876083374, + "learning_rate": 4.994979870385261e-06, + "loss": 0.6382, + "step": 1424 + }, + { + "epoch": 0.13008946503560342, + "grad_norm": 0.48727700114250183, + "learning_rate": 4.994972286787873e-06, + "loss": 0.6256, + "step": 1425 + }, + { + "epoch": 0.13018075588826, + "grad_norm": 0.43423429131507874, + "learning_rate": 4.99496469747254e-06, + "loss": 0.6552, + "step": 1426 + }, + { + "epoch": 0.13027204674091655, + "grad_norm": 0.46034032106399536, + "learning_rate": 4.994957102439277e-06, + "loss": 0.6497, + "step": 1427 + }, + { + "epoch": 0.13036333759357313, + "grad_norm": 0.4620881676673889, + "learning_rate": 4.9949495016881034e-06, + "loss": 0.6385, + "step": 1428 + }, + { + "epoch": 0.13045462844622968, + "grad_norm": 0.4520103335380554, + "learning_rate": 4.994941895219035e-06, + "loss": 0.6706, + "step": 1429 + }, + { + "epoch": 0.13054591929888626, + "grad_norm": 0.45082175731658936, + "learning_rate": 4.994934283032091e-06, + "loss": 0.6047, + "step": 1430 + }, + { + "epoch": 0.1306372101515428, + "grad_norm": 0.45949891209602356, + "learning_rate": 4.994926665127287e-06, + "loss": 0.5954, + "step": 1431 + }, + { + "epoch": 0.1307285010041994, + "grad_norm": 0.45218947529792786, + "learning_rate": 4.994919041504641e-06, + "loss": 0.626, + "step": 1432 + }, + { + "epoch": 0.13081979185685594, + "grad_norm": 0.43649157881736755, + "learning_rate": 4.994911412164172e-06, + "loss": 0.6557, + "step": 1433 + }, + { + "epoch": 0.13091108270951252, + "grad_norm": 0.5107784271240234, + "learning_rate": 4.994903777105895e-06, + "loss": 0.5954, + "step": 1434 + }, + { + "epoch": 0.13100237356216907, + "grad_norm": 0.46043142676353455, + "learning_rate": 4.9948961363298285e-06, + "loss": 0.637, + "step": 1435 + }, + { + "epoch": 0.13109366441482564, + "grad_norm": 0.5114560723304749, + "learning_rate": 4.9948884898359905e-06, + "loss": 0.6289, + "step": 1436 + }, + { + "epoch": 0.1311849552674822, + "grad_norm": 0.45225200057029724, + "learning_rate": 4.994880837624399e-06, + "loss": 0.6141, + "step": 1437 + }, + { + "epoch": 0.13127624612013877, + "grad_norm": 0.44835710525512695, + "learning_rate": 4.99487317969507e-06, + "loss": 0.6265, + "step": 1438 + }, + { + "epoch": 0.13136753697279532, + "grad_norm": 0.481545627117157, + "learning_rate": 4.994865516048022e-06, + "loss": 0.6117, + "step": 1439 + }, + { + "epoch": 0.1314588278254519, + "grad_norm": 0.506466269493103, + "learning_rate": 4.994857846683272e-06, + "loss": 0.6349, + "step": 1440 + }, + { + "epoch": 0.13155011867810845, + "grad_norm": 0.501962423324585, + "learning_rate": 4.994850171600838e-06, + "loss": 0.6097, + "step": 1441 + }, + { + "epoch": 0.13164140953076503, + "grad_norm": 0.4376351237297058, + "learning_rate": 4.994842490800737e-06, + "loss": 0.6556, + "step": 1442 + }, + { + "epoch": 0.13173270038342158, + "grad_norm": 0.4631885886192322, + "learning_rate": 4.994834804282988e-06, + "loss": 0.6126, + "step": 1443 + }, + { + "epoch": 0.13182399123607816, + "grad_norm": 0.45695555210113525, + "learning_rate": 4.9948271120476084e-06, + "loss": 0.6684, + "step": 1444 + }, + { + "epoch": 0.1319152820887347, + "grad_norm": 0.4611729681491852, + "learning_rate": 4.994819414094615e-06, + "loss": 0.6936, + "step": 1445 + }, + { + "epoch": 0.13200657294139126, + "grad_norm": 0.5099788308143616, + "learning_rate": 4.994811710424024e-06, + "loss": 0.6073, + "step": 1446 + }, + { + "epoch": 0.13209786379404784, + "grad_norm": 0.47566768527030945, + "learning_rate": 4.994804001035857e-06, + "loss": 0.6347, + "step": 1447 + }, + { + "epoch": 0.1321891546467044, + "grad_norm": 0.4473966360092163, + "learning_rate": 4.994796285930128e-06, + "loss": 0.6374, + "step": 1448 + }, + { + "epoch": 0.13228044549936097, + "grad_norm": 0.4468784034252167, + "learning_rate": 4.994788565106856e-06, + "loss": 0.6554, + "step": 1449 + }, + { + "epoch": 0.13237173635201752, + "grad_norm": 0.5005069375038147, + "learning_rate": 4.994780838566059e-06, + "loss": 0.6226, + "step": 1450 + }, + { + "epoch": 0.1324630272046741, + "grad_norm": 0.45267611742019653, + "learning_rate": 4.994773106307754e-06, + "loss": 0.6491, + "step": 1451 + }, + { + "epoch": 0.13255431805733064, + "grad_norm": 0.45565158128738403, + "learning_rate": 4.99476536833196e-06, + "loss": 0.6454, + "step": 1452 + }, + { + "epoch": 0.13264560890998722, + "grad_norm": 0.45250219106674194, + "learning_rate": 4.994757624638693e-06, + "loss": 0.6539, + "step": 1453 + }, + { + "epoch": 0.13273689976264377, + "grad_norm": 0.4558553397655487, + "learning_rate": 4.994749875227972e-06, + "loss": 0.637, + "step": 1454 + }, + { + "epoch": 0.13282819061530035, + "grad_norm": 0.4470129907131195, + "learning_rate": 4.9947421200998145e-06, + "loss": 0.6285, + "step": 1455 + }, + { + "epoch": 0.1329194814679569, + "grad_norm": 0.47359132766723633, + "learning_rate": 4.994734359254237e-06, + "loss": 0.597, + "step": 1456 + }, + { + "epoch": 0.13301077232061348, + "grad_norm": 0.44746851921081543, + "learning_rate": 4.99472659269126e-06, + "loss": 0.5899, + "step": 1457 + }, + { + "epoch": 0.13310206317327003, + "grad_norm": 0.4411845803260803, + "learning_rate": 4.9947188204109e-06, + "loss": 0.6286, + "step": 1458 + }, + { + "epoch": 0.1331933540259266, + "grad_norm": 0.4564726650714874, + "learning_rate": 4.994711042413173e-06, + "loss": 0.6447, + "step": 1459 + }, + { + "epoch": 0.13328464487858316, + "grad_norm": 0.4913645088672638, + "learning_rate": 4.9947032586980995e-06, + "loss": 0.5956, + "step": 1460 + }, + { + "epoch": 0.13337593573123974, + "grad_norm": 0.49188360571861267, + "learning_rate": 4.994695469265696e-06, + "loss": 0.5852, + "step": 1461 + }, + { + "epoch": 0.1334672265838963, + "grad_norm": 0.4404561221599579, + "learning_rate": 4.994687674115981e-06, + "loss": 0.623, + "step": 1462 + }, + { + "epoch": 0.13355851743655286, + "grad_norm": 0.4506376087665558, + "learning_rate": 4.994679873248971e-06, + "loss": 0.6137, + "step": 1463 + }, + { + "epoch": 0.13364980828920942, + "grad_norm": 0.48718732595443726, + "learning_rate": 4.994672066664685e-06, + "loss": 0.6226, + "step": 1464 + }, + { + "epoch": 0.133741099141866, + "grad_norm": 0.4915156662464142, + "learning_rate": 4.994664254363142e-06, + "loss": 0.6298, + "step": 1465 + }, + { + "epoch": 0.13383238999452254, + "grad_norm": 0.5049619078636169, + "learning_rate": 4.994656436344358e-06, + "loss": 0.6199, + "step": 1466 + }, + { + "epoch": 0.13392368084717912, + "grad_norm": 0.4503806531429291, + "learning_rate": 4.99464861260835e-06, + "loss": 0.6863, + "step": 1467 + }, + { + "epoch": 0.13401497169983567, + "grad_norm": 0.4683056175708771, + "learning_rate": 4.994640783155139e-06, + "loss": 0.6049, + "step": 1468 + }, + { + "epoch": 0.13410626255249225, + "grad_norm": 0.49767738580703735, + "learning_rate": 4.994632947984741e-06, + "loss": 0.6367, + "step": 1469 + }, + { + "epoch": 0.1341975534051488, + "grad_norm": 0.4878579378128052, + "learning_rate": 4.994625107097174e-06, + "loss": 0.6046, + "step": 1470 + }, + { + "epoch": 0.13428884425780538, + "grad_norm": 0.44909995794296265, + "learning_rate": 4.994617260492457e-06, + "loss": 0.6194, + "step": 1471 + }, + { + "epoch": 0.13438013511046193, + "grad_norm": 0.4586066007614136, + "learning_rate": 4.994609408170607e-06, + "loss": 0.6397, + "step": 1472 + }, + { + "epoch": 0.1344714259631185, + "grad_norm": 0.4700102210044861, + "learning_rate": 4.994601550131643e-06, + "loss": 0.6206, + "step": 1473 + }, + { + "epoch": 0.13456271681577506, + "grad_norm": 0.5321220755577087, + "learning_rate": 4.994593686375581e-06, + "loss": 0.5961, + "step": 1474 + }, + { + "epoch": 0.13465400766843164, + "grad_norm": 0.4798174500465393, + "learning_rate": 4.994585816902441e-06, + "loss": 0.6453, + "step": 1475 + }, + { + "epoch": 0.13474529852108819, + "grad_norm": 0.4572124481201172, + "learning_rate": 4.994577941712241e-06, + "loss": 0.6032, + "step": 1476 + }, + { + "epoch": 0.13483658937374476, + "grad_norm": 0.49828317761421204, + "learning_rate": 4.994570060804997e-06, + "loss": 0.6461, + "step": 1477 + }, + { + "epoch": 0.13492788022640131, + "grad_norm": 0.4576397240161896, + "learning_rate": 4.99456217418073e-06, + "loss": 0.6025, + "step": 1478 + }, + { + "epoch": 0.13501917107905786, + "grad_norm": 0.46684449911117554, + "learning_rate": 4.994554281839456e-06, + "loss": 0.6088, + "step": 1479 + }, + { + "epoch": 0.13511046193171444, + "grad_norm": 0.5171626210212708, + "learning_rate": 4.9945463837811935e-06, + "loss": 0.5827, + "step": 1480 + }, + { + "epoch": 0.135201752784371, + "grad_norm": 0.48589247465133667, + "learning_rate": 4.994538480005961e-06, + "loss": 0.5823, + "step": 1481 + }, + { + "epoch": 0.13529304363702757, + "grad_norm": 0.46232378482818604, + "learning_rate": 4.994530570513776e-06, + "loss": 0.6242, + "step": 1482 + }, + { + "epoch": 0.13538433448968412, + "grad_norm": 0.4452739357948303, + "learning_rate": 4.994522655304657e-06, + "loss": 0.6076, + "step": 1483 + }, + { + "epoch": 0.1354756253423407, + "grad_norm": 0.4771016240119934, + "learning_rate": 4.9945147343786225e-06, + "loss": 0.6431, + "step": 1484 + }, + { + "epoch": 0.13556691619499725, + "grad_norm": 0.4649167060852051, + "learning_rate": 4.9945068077356905e-06, + "loss": 0.604, + "step": 1485 + }, + { + "epoch": 0.13565820704765383, + "grad_norm": 0.4576749801635742, + "learning_rate": 4.994498875375878e-06, + "loss": 0.6444, + "step": 1486 + }, + { + "epoch": 0.13574949790031038, + "grad_norm": 0.4643741250038147, + "learning_rate": 4.994490937299206e-06, + "loss": 0.6659, + "step": 1487 + }, + { + "epoch": 0.13584078875296696, + "grad_norm": 0.4365338087081909, + "learning_rate": 4.994482993505689e-06, + "loss": 0.6253, + "step": 1488 + }, + { + "epoch": 0.1359320796056235, + "grad_norm": 0.47361519932746887, + "learning_rate": 4.994475043995347e-06, + "loss": 0.6055, + "step": 1489 + }, + { + "epoch": 0.13602337045828009, + "grad_norm": 0.46529248356819153, + "learning_rate": 4.994467088768199e-06, + "loss": 0.5928, + "step": 1490 + }, + { + "epoch": 0.13611466131093664, + "grad_norm": 0.47363707423210144, + "learning_rate": 4.994459127824263e-06, + "loss": 0.6066, + "step": 1491 + }, + { + "epoch": 0.1362059521635932, + "grad_norm": 0.45018160343170166, + "learning_rate": 4.994451161163556e-06, + "loss": 0.6166, + "step": 1492 + }, + { + "epoch": 0.13629724301624976, + "grad_norm": 0.4690074026584625, + "learning_rate": 4.994443188786097e-06, + "loss": 0.631, + "step": 1493 + }, + { + "epoch": 0.13638853386890634, + "grad_norm": 0.4794165790081024, + "learning_rate": 4.9944352106919035e-06, + "loss": 0.6421, + "step": 1494 + }, + { + "epoch": 0.1364798247215629, + "grad_norm": 0.4801348149776459, + "learning_rate": 4.994427226880996e-06, + "loss": 0.6154, + "step": 1495 + }, + { + "epoch": 0.13657111557421947, + "grad_norm": 0.5020914673805237, + "learning_rate": 4.994419237353391e-06, + "loss": 0.6176, + "step": 1496 + }, + { + "epoch": 0.13666240642687602, + "grad_norm": 0.44147446751594543, + "learning_rate": 4.994411242109106e-06, + "loss": 0.6276, + "step": 1497 + }, + { + "epoch": 0.1367536972795326, + "grad_norm": 0.43616002798080444, + "learning_rate": 4.9944032411481615e-06, + "loss": 0.6253, + "step": 1498 + }, + { + "epoch": 0.13684498813218915, + "grad_norm": 0.48531654477119446, + "learning_rate": 4.9943952344705745e-06, + "loss": 0.6126, + "step": 1499 + }, + { + "epoch": 0.13693627898484573, + "grad_norm": 0.4478273391723633, + "learning_rate": 4.994387222076364e-06, + "loss": 0.593, + "step": 1500 + }, + { + "epoch": 0.13702756983750228, + "grad_norm": 0.5028184056282043, + "learning_rate": 4.994379203965548e-06, + "loss": 0.6064, + "step": 1501 + }, + { + "epoch": 0.13711886069015886, + "grad_norm": 0.46088773012161255, + "learning_rate": 4.994371180138144e-06, + "loss": 0.6278, + "step": 1502 + }, + { + "epoch": 0.1372101515428154, + "grad_norm": 0.47683706879615784, + "learning_rate": 4.994363150594173e-06, + "loss": 0.6184, + "step": 1503 + }, + { + "epoch": 0.13730144239547198, + "grad_norm": 0.5031989216804504, + "learning_rate": 4.99435511533365e-06, + "loss": 0.6666, + "step": 1504 + }, + { + "epoch": 0.13739273324812853, + "grad_norm": 0.4496975839138031, + "learning_rate": 4.994347074356596e-06, + "loss": 0.6442, + "step": 1505 + }, + { + "epoch": 0.1374840241007851, + "grad_norm": 0.4495124816894531, + "learning_rate": 4.994339027663028e-06, + "loss": 0.6144, + "step": 1506 + }, + { + "epoch": 0.13757531495344166, + "grad_norm": 0.4485589265823364, + "learning_rate": 4.994330975252965e-06, + "loss": 0.6457, + "step": 1507 + }, + { + "epoch": 0.13766660580609824, + "grad_norm": 0.4482975900173187, + "learning_rate": 4.994322917126426e-06, + "loss": 0.6535, + "step": 1508 + }, + { + "epoch": 0.1377578966587548, + "grad_norm": 0.43963250517845154, + "learning_rate": 4.994314853283428e-06, + "loss": 0.6233, + "step": 1509 + }, + { + "epoch": 0.13784918751141137, + "grad_norm": 0.45776188373565674, + "learning_rate": 4.994306783723991e-06, + "loss": 0.6506, + "step": 1510 + }, + { + "epoch": 0.13794047836406792, + "grad_norm": 0.5160843133926392, + "learning_rate": 4.9942987084481335e-06, + "loss": 0.5958, + "step": 1511 + }, + { + "epoch": 0.1380317692167245, + "grad_norm": 0.4345034062862396, + "learning_rate": 4.994290627455872e-06, + "loss": 0.6431, + "step": 1512 + }, + { + "epoch": 0.13812306006938105, + "grad_norm": 0.47572946548461914, + "learning_rate": 4.9942825407472274e-06, + "loss": 0.6482, + "step": 1513 + }, + { + "epoch": 0.1382143509220376, + "grad_norm": 0.45549312233924866, + "learning_rate": 4.9942744483222175e-06, + "loss": 0.6442, + "step": 1514 + }, + { + "epoch": 0.13830564177469418, + "grad_norm": 0.4590066969394684, + "learning_rate": 4.99426635018086e-06, + "loss": 0.6546, + "step": 1515 + }, + { + "epoch": 0.13839693262735073, + "grad_norm": 0.5159279704093933, + "learning_rate": 4.994258246323174e-06, + "loss": 0.6076, + "step": 1516 + }, + { + "epoch": 0.1384882234800073, + "grad_norm": 0.44768232107162476, + "learning_rate": 4.994250136749177e-06, + "loss": 0.6191, + "step": 1517 + }, + { + "epoch": 0.13857951433266386, + "grad_norm": 0.48340681195259094, + "learning_rate": 4.9942420214588905e-06, + "loss": 0.627, + "step": 1518 + }, + { + "epoch": 0.13867080518532043, + "grad_norm": 0.46250852942466736, + "learning_rate": 4.99423390045233e-06, + "loss": 0.6215, + "step": 1519 + }, + { + "epoch": 0.13876209603797698, + "grad_norm": 0.4426298439502716, + "learning_rate": 4.994225773729516e-06, + "loss": 0.6278, + "step": 1520 + }, + { + "epoch": 0.13885338689063356, + "grad_norm": 0.46903201937675476, + "learning_rate": 4.9942176412904666e-06, + "loss": 0.6442, + "step": 1521 + }, + { + "epoch": 0.1389446777432901, + "grad_norm": 0.4554305076599121, + "learning_rate": 4.994209503135201e-06, + "loss": 0.6534, + "step": 1522 + }, + { + "epoch": 0.1390359685959467, + "grad_norm": 0.4553270637989044, + "learning_rate": 4.994201359263736e-06, + "loss": 0.6511, + "step": 1523 + }, + { + "epoch": 0.13912725944860324, + "grad_norm": 0.46331652998924255, + "learning_rate": 4.994193209676092e-06, + "loss": 0.6465, + "step": 1524 + }, + { + "epoch": 0.13921855030125982, + "grad_norm": 0.4681273400783539, + "learning_rate": 4.994185054372287e-06, + "loss": 0.6118, + "step": 1525 + }, + { + "epoch": 0.13930984115391637, + "grad_norm": 0.4959009885787964, + "learning_rate": 4.99417689335234e-06, + "loss": 0.5954, + "step": 1526 + }, + { + "epoch": 0.13940113200657295, + "grad_norm": 0.4697316884994507, + "learning_rate": 4.99416872661627e-06, + "loss": 0.6496, + "step": 1527 + }, + { + "epoch": 0.1394924228592295, + "grad_norm": 0.48425355553627014, + "learning_rate": 4.994160554164094e-06, + "loss": 0.6422, + "step": 1528 + }, + { + "epoch": 0.13958371371188608, + "grad_norm": 0.44952625036239624, + "learning_rate": 4.994152375995833e-06, + "loss": 0.6627, + "step": 1529 + }, + { + "epoch": 0.13967500456454263, + "grad_norm": 0.48581305146217346, + "learning_rate": 4.994144192111504e-06, + "loss": 0.6112, + "step": 1530 + }, + { + "epoch": 0.1397662954171992, + "grad_norm": 0.45889124274253845, + "learning_rate": 4.994136002511128e-06, + "loss": 0.6069, + "step": 1531 + }, + { + "epoch": 0.13985758626985575, + "grad_norm": 0.46377038955688477, + "learning_rate": 4.994127807194721e-06, + "loss": 0.639, + "step": 1532 + }, + { + "epoch": 0.13994887712251233, + "grad_norm": 0.4662431478500366, + "learning_rate": 4.994119606162303e-06, + "loss": 0.6358, + "step": 1533 + }, + { + "epoch": 0.14004016797516888, + "grad_norm": 0.4663451910018921, + "learning_rate": 4.994111399413893e-06, + "loss": 0.6166, + "step": 1534 + }, + { + "epoch": 0.14013145882782546, + "grad_norm": 0.47708386182785034, + "learning_rate": 4.99410318694951e-06, + "loss": 0.6278, + "step": 1535 + }, + { + "epoch": 0.140222749680482, + "grad_norm": 0.4823927879333496, + "learning_rate": 4.994094968769172e-06, + "loss": 0.632, + "step": 1536 + }, + { + "epoch": 0.1403140405331386, + "grad_norm": 0.4402211010456085, + "learning_rate": 4.994086744872899e-06, + "loss": 0.6711, + "step": 1537 + }, + { + "epoch": 0.14040533138579514, + "grad_norm": 0.48254328966140747, + "learning_rate": 4.9940785152607085e-06, + "loss": 0.6256, + "step": 1538 + }, + { + "epoch": 0.14049662223845172, + "grad_norm": 0.4731604754924774, + "learning_rate": 4.99407027993262e-06, + "loss": 0.6424, + "step": 1539 + }, + { + "epoch": 0.14058791309110827, + "grad_norm": 0.4419739246368408, + "learning_rate": 4.994062038888653e-06, + "loss": 0.6148, + "step": 1540 + }, + { + "epoch": 0.14067920394376485, + "grad_norm": 0.44786787033081055, + "learning_rate": 4.9940537921288255e-06, + "loss": 0.6408, + "step": 1541 + }, + { + "epoch": 0.1407704947964214, + "grad_norm": 0.47217345237731934, + "learning_rate": 4.9940455396531565e-06, + "loss": 0.6252, + "step": 1542 + }, + { + "epoch": 0.14086178564907798, + "grad_norm": 0.46152564883232117, + "learning_rate": 4.994037281461665e-06, + "loss": 0.6244, + "step": 1543 + }, + { + "epoch": 0.14095307650173453, + "grad_norm": 0.4682273268699646, + "learning_rate": 4.99402901755437e-06, + "loss": 0.6624, + "step": 1544 + }, + { + "epoch": 0.1410443673543911, + "grad_norm": 0.46977105736732483, + "learning_rate": 4.994020747931291e-06, + "loss": 0.6203, + "step": 1545 + }, + { + "epoch": 0.14113565820704765, + "grad_norm": 0.45847034454345703, + "learning_rate": 4.994012472592446e-06, + "loss": 0.6555, + "step": 1546 + }, + { + "epoch": 0.1412269490597042, + "grad_norm": 0.47648605704307556, + "learning_rate": 4.994004191537854e-06, + "loss": 0.6503, + "step": 1547 + }, + { + "epoch": 0.14131823991236078, + "grad_norm": 0.4566638171672821, + "learning_rate": 4.993995904767535e-06, + "loss": 0.6602, + "step": 1548 + }, + { + "epoch": 0.14140953076501733, + "grad_norm": 0.437751442193985, + "learning_rate": 4.993987612281507e-06, + "loss": 0.6532, + "step": 1549 + }, + { + "epoch": 0.1415008216176739, + "grad_norm": 0.5135921239852905, + "learning_rate": 4.993979314079789e-06, + "loss": 0.6253, + "step": 1550 + }, + { + "epoch": 0.14159211247033046, + "grad_norm": 0.4317440092563629, + "learning_rate": 4.9939710101624e-06, + "loss": 0.6561, + "step": 1551 + }, + { + "epoch": 0.14168340332298704, + "grad_norm": 0.4753427505493164, + "learning_rate": 4.9939627005293615e-06, + "loss": 0.6772, + "step": 1552 + }, + { + "epoch": 0.1417746941756436, + "grad_norm": 0.4441913664340973, + "learning_rate": 4.993954385180688e-06, + "loss": 0.6777, + "step": 1553 + }, + { + "epoch": 0.14186598502830017, + "grad_norm": 0.45764997601509094, + "learning_rate": 4.993946064116403e-06, + "loss": 0.6347, + "step": 1554 + }, + { + "epoch": 0.14195727588095672, + "grad_norm": 0.4598638117313385, + "learning_rate": 4.993937737336522e-06, + "loss": 0.6202, + "step": 1555 + }, + { + "epoch": 0.1420485667336133, + "grad_norm": 0.42764994502067566, + "learning_rate": 4.9939294048410665e-06, + "loss": 0.6231, + "step": 1556 + }, + { + "epoch": 0.14213985758626985, + "grad_norm": 0.46281513571739197, + "learning_rate": 4.993921066630054e-06, + "loss": 0.6431, + "step": 1557 + }, + { + "epoch": 0.14223114843892642, + "grad_norm": 0.42743751406669617, + "learning_rate": 4.993912722703505e-06, + "loss": 0.6186, + "step": 1558 + }, + { + "epoch": 0.14232243929158298, + "grad_norm": 0.4707600772380829, + "learning_rate": 4.993904373061438e-06, + "loss": 0.6438, + "step": 1559 + }, + { + "epoch": 0.14241373014423955, + "grad_norm": 0.44844549894332886, + "learning_rate": 4.993896017703872e-06, + "loss": 0.6486, + "step": 1560 + }, + { + "epoch": 0.1425050209968961, + "grad_norm": 0.45877888798713684, + "learning_rate": 4.993887656630826e-06, + "loss": 0.6393, + "step": 1561 + }, + { + "epoch": 0.14259631184955268, + "grad_norm": 0.4341309666633606, + "learning_rate": 4.9938792898423195e-06, + "loss": 0.6096, + "step": 1562 + }, + { + "epoch": 0.14268760270220923, + "grad_norm": 0.47492945194244385, + "learning_rate": 4.993870917338372e-06, + "loss": 0.6328, + "step": 1563 + }, + { + "epoch": 0.1427788935548658, + "grad_norm": 0.49267569184303284, + "learning_rate": 4.993862539119002e-06, + "loss": 0.5702, + "step": 1564 + }, + { + "epoch": 0.14287018440752236, + "grad_norm": 0.4416252672672272, + "learning_rate": 4.993854155184229e-06, + "loss": 0.6701, + "step": 1565 + }, + { + "epoch": 0.14296147526017894, + "grad_norm": 0.46396443247795105, + "learning_rate": 4.9938457655340715e-06, + "loss": 0.6196, + "step": 1566 + }, + { + "epoch": 0.1430527661128355, + "grad_norm": 0.45674440264701843, + "learning_rate": 4.9938373701685504e-06, + "loss": 0.6512, + "step": 1567 + }, + { + "epoch": 0.14314405696549207, + "grad_norm": 0.456626296043396, + "learning_rate": 4.993828969087683e-06, + "loss": 0.629, + "step": 1568 + }, + { + "epoch": 0.14323534781814862, + "grad_norm": 0.4622129201889038, + "learning_rate": 4.9938205622914905e-06, + "loss": 0.5957, + "step": 1569 + }, + { + "epoch": 0.1433266386708052, + "grad_norm": 0.47196292877197266, + "learning_rate": 4.9938121497799906e-06, + "loss": 0.5884, + "step": 1570 + }, + { + "epoch": 0.14341792952346175, + "grad_norm": 0.48210352659225464, + "learning_rate": 4.993803731553204e-06, + "loss": 0.5872, + "step": 1571 + }, + { + "epoch": 0.14350922037611832, + "grad_norm": 0.45912548899650574, + "learning_rate": 4.993795307611148e-06, + "loss": 0.6414, + "step": 1572 + }, + { + "epoch": 0.14360051122877487, + "grad_norm": 0.4825299382209778, + "learning_rate": 4.993786877953844e-06, + "loss": 0.629, + "step": 1573 + }, + { + "epoch": 0.14369180208143145, + "grad_norm": 0.47646111249923706, + "learning_rate": 4.993778442581309e-06, + "loss": 0.6073, + "step": 1574 + }, + { + "epoch": 0.143783092934088, + "grad_norm": 0.4599086344242096, + "learning_rate": 4.9937700014935655e-06, + "loss": 0.6545, + "step": 1575 + }, + { + "epoch": 0.14387438378674458, + "grad_norm": 0.4945249855518341, + "learning_rate": 4.9937615546906305e-06, + "loss": 0.6311, + "step": 1576 + }, + { + "epoch": 0.14396567463940113, + "grad_norm": 0.42764946818351746, + "learning_rate": 4.9937531021725235e-06, + "loss": 0.6134, + "step": 1577 + }, + { + "epoch": 0.1440569654920577, + "grad_norm": 0.4550107419490814, + "learning_rate": 4.993744643939264e-06, + "loss": 0.6334, + "step": 1578 + }, + { + "epoch": 0.14414825634471426, + "grad_norm": 0.4560924768447876, + "learning_rate": 4.993736179990872e-06, + "loss": 0.666, + "step": 1579 + }, + { + "epoch": 0.1442395471973708, + "grad_norm": 0.4551418423652649, + "learning_rate": 4.993727710327367e-06, + "loss": 0.5964, + "step": 1580 + }, + { + "epoch": 0.1443308380500274, + "grad_norm": 0.477216511964798, + "learning_rate": 4.993719234948768e-06, + "loss": 0.6492, + "step": 1581 + }, + { + "epoch": 0.14442212890268394, + "grad_norm": 0.5011753439903259, + "learning_rate": 4.993710753855094e-06, + "loss": 0.6092, + "step": 1582 + }, + { + "epoch": 0.14451341975534052, + "grad_norm": 0.44452911615371704, + "learning_rate": 4.993702267046365e-06, + "loss": 0.6367, + "step": 1583 + }, + { + "epoch": 0.14460471060799707, + "grad_norm": 0.5111166834831238, + "learning_rate": 4.9936937745226e-06, + "loss": 0.6041, + "step": 1584 + }, + { + "epoch": 0.14469600146065364, + "grad_norm": 0.4764624238014221, + "learning_rate": 4.993685276283819e-06, + "loss": 0.6425, + "step": 1585 + }, + { + "epoch": 0.1447872923133102, + "grad_norm": 0.5175803899765015, + "learning_rate": 4.9936767723300415e-06, + "loss": 0.6284, + "step": 1586 + }, + { + "epoch": 0.14487858316596677, + "grad_norm": 0.4812273681163788, + "learning_rate": 4.993668262661287e-06, + "loss": 0.627, + "step": 1587 + }, + { + "epoch": 0.14496987401862332, + "grad_norm": 0.5117160081863403, + "learning_rate": 4.9936597472775735e-06, + "loss": 0.6313, + "step": 1588 + }, + { + "epoch": 0.1450611648712799, + "grad_norm": 0.44647765159606934, + "learning_rate": 4.993651226178922e-06, + "loss": 0.6244, + "step": 1589 + }, + { + "epoch": 0.14515245572393645, + "grad_norm": 0.43965715169906616, + "learning_rate": 4.993642699365352e-06, + "loss": 0.6413, + "step": 1590 + }, + { + "epoch": 0.14524374657659303, + "grad_norm": 0.45129305124282837, + "learning_rate": 4.993634166836883e-06, + "loss": 0.6507, + "step": 1591 + }, + { + "epoch": 0.14533503742924958, + "grad_norm": 0.47135603427886963, + "learning_rate": 4.993625628593535e-06, + "loss": 0.6107, + "step": 1592 + }, + { + "epoch": 0.14542632828190616, + "grad_norm": 0.46191132068634033, + "learning_rate": 4.993617084635326e-06, + "loss": 0.631, + "step": 1593 + }, + { + "epoch": 0.1455176191345627, + "grad_norm": 0.4430689811706543, + "learning_rate": 4.993608534962277e-06, + "loss": 0.6439, + "step": 1594 + }, + { + "epoch": 0.1456089099872193, + "grad_norm": 0.44193482398986816, + "learning_rate": 4.993599979574406e-06, + "loss": 0.6548, + "step": 1595 + }, + { + "epoch": 0.14570020083987584, + "grad_norm": 0.4708199203014374, + "learning_rate": 4.993591418471735e-06, + "loss": 0.6266, + "step": 1596 + }, + { + "epoch": 0.14579149169253242, + "grad_norm": 0.4997350871562958, + "learning_rate": 4.993582851654282e-06, + "loss": 0.5774, + "step": 1597 + }, + { + "epoch": 0.14588278254518897, + "grad_norm": 0.4554002285003662, + "learning_rate": 4.993574279122066e-06, + "loss": 0.6391, + "step": 1598 + }, + { + "epoch": 0.14597407339784554, + "grad_norm": 0.47763481736183167, + "learning_rate": 4.993565700875109e-06, + "loss": 0.6608, + "step": 1599 + }, + { + "epoch": 0.1460653642505021, + "grad_norm": 0.4601512551307678, + "learning_rate": 4.993557116913429e-06, + "loss": 0.6409, + "step": 1600 + }, + { + "epoch": 0.14615665510315867, + "grad_norm": 0.45830366015434265, + "learning_rate": 4.993548527237046e-06, + "loss": 0.678, + "step": 1601 + }, + { + "epoch": 0.14624794595581522, + "grad_norm": 0.4786257743835449, + "learning_rate": 4.993539931845979e-06, + "loss": 0.6032, + "step": 1602 + }, + { + "epoch": 0.1463392368084718, + "grad_norm": 0.4719263017177582, + "learning_rate": 4.993531330740249e-06, + "loss": 0.6322, + "step": 1603 + }, + { + "epoch": 0.14643052766112835, + "grad_norm": 0.4814057946205139, + "learning_rate": 4.9935227239198745e-06, + "loss": 0.6055, + "step": 1604 + }, + { + "epoch": 0.14652181851378493, + "grad_norm": 0.43652471899986267, + "learning_rate": 4.9935141113848754e-06, + "loss": 0.621, + "step": 1605 + }, + { + "epoch": 0.14661310936644148, + "grad_norm": 0.4574231803417206, + "learning_rate": 4.993505493135273e-06, + "loss": 0.6456, + "step": 1606 + }, + { + "epoch": 0.14670440021909806, + "grad_norm": 0.5116026997566223, + "learning_rate": 4.993496869171085e-06, + "loss": 0.6062, + "step": 1607 + }, + { + "epoch": 0.1467956910717546, + "grad_norm": 0.4467705488204956, + "learning_rate": 4.993488239492332e-06, + "loss": 0.5989, + "step": 1608 + }, + { + "epoch": 0.1468869819244112, + "grad_norm": 0.4477675259113312, + "learning_rate": 4.993479604099034e-06, + "loss": 0.6405, + "step": 1609 + }, + { + "epoch": 0.14697827277706774, + "grad_norm": 0.4533292353153229, + "learning_rate": 4.993470962991211e-06, + "loss": 0.6126, + "step": 1610 + }, + { + "epoch": 0.14706956362972431, + "grad_norm": 0.4366755783557892, + "learning_rate": 4.993462316168882e-06, + "loss": 0.6191, + "step": 1611 + }, + { + "epoch": 0.14716085448238087, + "grad_norm": 0.44655489921569824, + "learning_rate": 4.993453663632068e-06, + "loss": 0.6107, + "step": 1612 + }, + { + "epoch": 0.14725214533503742, + "grad_norm": 0.4653833508491516, + "learning_rate": 4.993445005380786e-06, + "loss": 0.6061, + "step": 1613 + }, + { + "epoch": 0.147343436187694, + "grad_norm": 0.45454204082489014, + "learning_rate": 4.99343634141506e-06, + "loss": 0.6029, + "step": 1614 + }, + { + "epoch": 0.14743472704035054, + "grad_norm": 0.4899997115135193, + "learning_rate": 4.9934276717349066e-06, + "loss": 0.6118, + "step": 1615 + }, + { + "epoch": 0.14752601789300712, + "grad_norm": 0.47764667868614197, + "learning_rate": 4.9934189963403465e-06, + "loss": 0.6065, + "step": 1616 + }, + { + "epoch": 0.14761730874566367, + "grad_norm": 0.4673406183719635, + "learning_rate": 4.993410315231401e-06, + "loss": 0.6175, + "step": 1617 + }, + { + "epoch": 0.14770859959832025, + "grad_norm": 0.47433799505233765, + "learning_rate": 4.993401628408088e-06, + "loss": 0.6309, + "step": 1618 + }, + { + "epoch": 0.1477998904509768, + "grad_norm": 0.494870126247406, + "learning_rate": 4.993392935870429e-06, + "loss": 0.6174, + "step": 1619 + }, + { + "epoch": 0.14789118130363338, + "grad_norm": 0.4740104675292969, + "learning_rate": 4.993384237618443e-06, + "loss": 0.6259, + "step": 1620 + }, + { + "epoch": 0.14798247215628993, + "grad_norm": 0.439142644405365, + "learning_rate": 4.99337553365215e-06, + "loss": 0.6146, + "step": 1621 + }, + { + "epoch": 0.1480737630089465, + "grad_norm": 0.4820539951324463, + "learning_rate": 4.99336682397157e-06, + "loss": 0.6011, + "step": 1622 + }, + { + "epoch": 0.14816505386160306, + "grad_norm": 0.46384963393211365, + "learning_rate": 4.993358108576722e-06, + "loss": 0.6291, + "step": 1623 + }, + { + "epoch": 0.14825634471425964, + "grad_norm": 0.45968878269195557, + "learning_rate": 4.993349387467629e-06, + "loss": 0.6592, + "step": 1624 + }, + { + "epoch": 0.1483476355669162, + "grad_norm": 0.46255022287368774, + "learning_rate": 4.993340660644308e-06, + "loss": 0.6167, + "step": 1625 + }, + { + "epoch": 0.14843892641957276, + "grad_norm": 0.42355823516845703, + "learning_rate": 4.99333192810678e-06, + "loss": 0.6407, + "step": 1626 + }, + { + "epoch": 0.14853021727222931, + "grad_norm": 0.4432583749294281, + "learning_rate": 4.993323189855066e-06, + "loss": 0.6111, + "step": 1627 + }, + { + "epoch": 0.1486215081248859, + "grad_norm": 0.44762322306632996, + "learning_rate": 4.993314445889184e-06, + "loss": 0.6083, + "step": 1628 + }, + { + "epoch": 0.14871279897754244, + "grad_norm": 0.4670785069465637, + "learning_rate": 4.993305696209155e-06, + "loss": 0.6131, + "step": 1629 + }, + { + "epoch": 0.14880408983019902, + "grad_norm": 0.47753649950027466, + "learning_rate": 4.993296940815e-06, + "loss": 0.6379, + "step": 1630 + }, + { + "epoch": 0.14889538068285557, + "grad_norm": 0.4781991243362427, + "learning_rate": 4.993288179706738e-06, + "loss": 0.5949, + "step": 1631 + }, + { + "epoch": 0.14898667153551215, + "grad_norm": 0.4267476499080658, + "learning_rate": 4.993279412884388e-06, + "loss": 0.6464, + "step": 1632 + }, + { + "epoch": 0.1490779623881687, + "grad_norm": 0.465112566947937, + "learning_rate": 4.993270640347973e-06, + "loss": 0.6096, + "step": 1633 + }, + { + "epoch": 0.14916925324082528, + "grad_norm": 0.4414077699184418, + "learning_rate": 4.993261862097511e-06, + "loss": 0.6818, + "step": 1634 + }, + { + "epoch": 0.14926054409348183, + "grad_norm": 0.4644888639450073, + "learning_rate": 4.993253078133023e-06, + "loss": 0.6114, + "step": 1635 + }, + { + "epoch": 0.1493518349461384, + "grad_norm": 0.44152510166168213, + "learning_rate": 4.993244288454528e-06, + "loss": 0.6733, + "step": 1636 + }, + { + "epoch": 0.14944312579879496, + "grad_norm": 0.4568271338939667, + "learning_rate": 4.993235493062047e-06, + "loss": 0.6151, + "step": 1637 + }, + { + "epoch": 0.14953441665145153, + "grad_norm": 0.5103057622909546, + "learning_rate": 4.9932266919556e-06, + "loss": 0.5833, + "step": 1638 + }, + { + "epoch": 0.14962570750410809, + "grad_norm": 0.47959330677986145, + "learning_rate": 4.993217885135208e-06, + "loss": 0.6522, + "step": 1639 + }, + { + "epoch": 0.14971699835676466, + "grad_norm": 0.4487322270870209, + "learning_rate": 4.993209072600889e-06, + "loss": 0.6286, + "step": 1640 + }, + { + "epoch": 0.1498082892094212, + "grad_norm": 0.4476780593395233, + "learning_rate": 4.993200254352665e-06, + "loss": 0.6256, + "step": 1641 + }, + { + "epoch": 0.1498995800620778, + "grad_norm": 0.41051414608955383, + "learning_rate": 4.993191430390556e-06, + "loss": 0.6545, + "step": 1642 + }, + { + "epoch": 0.14999087091473434, + "grad_norm": 0.48335641622543335, + "learning_rate": 4.993182600714583e-06, + "loss": 0.6284, + "step": 1643 + }, + { + "epoch": 0.15008216176739092, + "grad_norm": 0.4694078862667084, + "learning_rate": 4.993173765324764e-06, + "loss": 0.6279, + "step": 1644 + }, + { + "epoch": 0.15017345262004747, + "grad_norm": 0.45961329340934753, + "learning_rate": 4.9931649242211215e-06, + "loss": 0.6783, + "step": 1645 + }, + { + "epoch": 0.15026474347270405, + "grad_norm": 0.44355276226997375, + "learning_rate": 4.993156077403674e-06, + "loss": 0.67, + "step": 1646 + }, + { + "epoch": 0.1503560343253606, + "grad_norm": 0.4654396176338196, + "learning_rate": 4.993147224872442e-06, + "loss": 0.6316, + "step": 1647 + }, + { + "epoch": 0.15044732517801715, + "grad_norm": 0.4564943015575409, + "learning_rate": 4.993138366627448e-06, + "loss": 0.6413, + "step": 1648 + }, + { + "epoch": 0.15053861603067373, + "grad_norm": 0.47748520970344543, + "learning_rate": 4.993129502668709e-06, + "loss": 0.6523, + "step": 1649 + }, + { + "epoch": 0.15062990688333028, + "grad_norm": 0.486545592546463, + "learning_rate": 4.993120632996248e-06, + "loss": 0.6021, + "step": 1650 + }, + { + "epoch": 0.15072119773598686, + "grad_norm": 0.45176467299461365, + "learning_rate": 4.993111757610084e-06, + "loss": 0.6547, + "step": 1651 + }, + { + "epoch": 0.1508124885886434, + "grad_norm": 0.4942667484283447, + "learning_rate": 4.993102876510238e-06, + "loss": 0.6469, + "step": 1652 + }, + { + "epoch": 0.15090377944129998, + "grad_norm": 0.44211065769195557, + "learning_rate": 4.993093989696729e-06, + "loss": 0.5903, + "step": 1653 + }, + { + "epoch": 0.15099507029395653, + "grad_norm": 0.4365377128124237, + "learning_rate": 4.993085097169579e-06, + "loss": 0.6491, + "step": 1654 + }, + { + "epoch": 0.1510863611466131, + "grad_norm": 0.4863208532333374, + "learning_rate": 4.993076198928807e-06, + "loss": 0.6394, + "step": 1655 + }, + { + "epoch": 0.15117765199926966, + "grad_norm": 0.4650897979736328, + "learning_rate": 4.993067294974435e-06, + "loss": 0.6147, + "step": 1656 + }, + { + "epoch": 0.15126894285192624, + "grad_norm": 0.4801124036312103, + "learning_rate": 4.993058385306482e-06, + "loss": 0.5924, + "step": 1657 + }, + { + "epoch": 0.1513602337045828, + "grad_norm": 0.45957204699516296, + "learning_rate": 4.993049469924969e-06, + "loss": 0.6212, + "step": 1658 + }, + { + "epoch": 0.15145152455723937, + "grad_norm": 0.45685505867004395, + "learning_rate": 4.9930405488299165e-06, + "loss": 0.6157, + "step": 1659 + }, + { + "epoch": 0.15154281540989592, + "grad_norm": 0.48469772934913635, + "learning_rate": 4.993031622021345e-06, + "loss": 0.6449, + "step": 1660 + }, + { + "epoch": 0.1516341062625525, + "grad_norm": 0.49485674500465393, + "learning_rate": 4.993022689499274e-06, + "loss": 0.6135, + "step": 1661 + }, + { + "epoch": 0.15172539711520905, + "grad_norm": 0.4765627086162567, + "learning_rate": 4.993013751263725e-06, + "loss": 0.6134, + "step": 1662 + }, + { + "epoch": 0.15181668796786563, + "grad_norm": 0.46778959035873413, + "learning_rate": 4.993004807314719e-06, + "loss": 0.6454, + "step": 1663 + }, + { + "epoch": 0.15190797882052218, + "grad_norm": 0.4818785488605499, + "learning_rate": 4.9929958576522755e-06, + "loss": 0.6205, + "step": 1664 + }, + { + "epoch": 0.15199926967317876, + "grad_norm": 0.47209060192108154, + "learning_rate": 4.992986902276415e-06, + "loss": 0.5976, + "step": 1665 + }, + { + "epoch": 0.1520905605258353, + "grad_norm": 0.4557834267616272, + "learning_rate": 4.9929779411871585e-06, + "loss": 0.6342, + "step": 1666 + }, + { + "epoch": 0.15218185137849188, + "grad_norm": 0.46169477701187134, + "learning_rate": 4.992968974384526e-06, + "loss": 0.6155, + "step": 1667 + }, + { + "epoch": 0.15227314223114843, + "grad_norm": 0.4836811423301697, + "learning_rate": 4.992960001868538e-06, + "loss": 0.654, + "step": 1668 + }, + { + "epoch": 0.152364433083805, + "grad_norm": 0.44146138429641724, + "learning_rate": 4.992951023639215e-06, + "loss": 0.6025, + "step": 1669 + }, + { + "epoch": 0.15245572393646156, + "grad_norm": 0.4712883234024048, + "learning_rate": 4.992942039696579e-06, + "loss": 0.5973, + "step": 1670 + }, + { + "epoch": 0.15254701478911814, + "grad_norm": 0.4521535336971283, + "learning_rate": 4.9929330500406494e-06, + "loss": 0.6158, + "step": 1671 + }, + { + "epoch": 0.1526383056417747, + "grad_norm": 0.4575965702533722, + "learning_rate": 4.9929240546714476e-06, + "loss": 0.6431, + "step": 1672 + }, + { + "epoch": 0.15272959649443127, + "grad_norm": 0.4836040735244751, + "learning_rate": 4.992915053588992e-06, + "loss": 0.6569, + "step": 1673 + }, + { + "epoch": 0.15282088734708782, + "grad_norm": 0.5065146684646606, + "learning_rate": 4.992906046793306e-06, + "loss": 0.613, + "step": 1674 + }, + { + "epoch": 0.1529121781997444, + "grad_norm": 0.4552953243255615, + "learning_rate": 4.992897034284408e-06, + "loss": 0.648, + "step": 1675 + }, + { + "epoch": 0.15300346905240095, + "grad_norm": 0.5155807733535767, + "learning_rate": 4.992888016062321e-06, + "loss": 0.5959, + "step": 1676 + }, + { + "epoch": 0.15309475990505753, + "grad_norm": 0.4471646845340729, + "learning_rate": 4.9928789921270635e-06, + "loss": 0.6177, + "step": 1677 + }, + { + "epoch": 0.15318605075771408, + "grad_norm": 0.4468787908554077, + "learning_rate": 4.992869962478658e-06, + "loss": 0.639, + "step": 1678 + }, + { + "epoch": 0.15327734161037065, + "grad_norm": 0.4986099600791931, + "learning_rate": 4.992860927117124e-06, + "loss": 0.6199, + "step": 1679 + }, + { + "epoch": 0.1533686324630272, + "grad_norm": 0.450394868850708, + "learning_rate": 4.9928518860424815e-06, + "loss": 0.6018, + "step": 1680 + }, + { + "epoch": 0.15345992331568375, + "grad_norm": 0.46999391913414, + "learning_rate": 4.992842839254753e-06, + "loss": 0.6241, + "step": 1681 + }, + { + "epoch": 0.15355121416834033, + "grad_norm": 0.4691679775714874, + "learning_rate": 4.992833786753958e-06, + "loss": 0.6437, + "step": 1682 + }, + { + "epoch": 0.15364250502099688, + "grad_norm": 0.44886353611946106, + "learning_rate": 4.992824728540118e-06, + "loss": 0.6255, + "step": 1683 + }, + { + "epoch": 0.15373379587365346, + "grad_norm": 0.46553847193717957, + "learning_rate": 4.9928156646132535e-06, + "loss": 0.6181, + "step": 1684 + }, + { + "epoch": 0.15382508672631, + "grad_norm": 0.46106764674186707, + "learning_rate": 4.992806594973385e-06, + "loss": 0.6639, + "step": 1685 + }, + { + "epoch": 0.1539163775789666, + "grad_norm": 0.5126250386238098, + "learning_rate": 4.992797519620534e-06, + "loss": 0.6503, + "step": 1686 + }, + { + "epoch": 0.15400766843162314, + "grad_norm": 0.4637489318847656, + "learning_rate": 4.99278843855472e-06, + "loss": 0.6468, + "step": 1687 + }, + { + "epoch": 0.15409895928427972, + "grad_norm": 0.4590233862400055, + "learning_rate": 4.992779351775964e-06, + "loss": 0.6614, + "step": 1688 + }, + { + "epoch": 0.15419025013693627, + "grad_norm": 0.5146231651306152, + "learning_rate": 4.9927702592842885e-06, + "loss": 0.5812, + "step": 1689 + }, + { + "epoch": 0.15428154098959285, + "grad_norm": 0.4905277192592621, + "learning_rate": 4.992761161079713e-06, + "loss": 0.6198, + "step": 1690 + }, + { + "epoch": 0.1543728318422494, + "grad_norm": 0.4572739005088806, + "learning_rate": 4.992752057162259e-06, + "loss": 0.6326, + "step": 1691 + }, + { + "epoch": 0.15446412269490598, + "grad_norm": 0.4723881483078003, + "learning_rate": 4.9927429475319465e-06, + "loss": 0.6607, + "step": 1692 + }, + { + "epoch": 0.15455541354756253, + "grad_norm": 0.4307527542114258, + "learning_rate": 4.992733832188796e-06, + "loss": 0.6351, + "step": 1693 + }, + { + "epoch": 0.1546467044002191, + "grad_norm": 0.4363213777542114, + "learning_rate": 4.992724711132831e-06, + "loss": 0.6065, + "step": 1694 + }, + { + "epoch": 0.15473799525287565, + "grad_norm": 0.4863000810146332, + "learning_rate": 4.9927155843640694e-06, + "loss": 0.6528, + "step": 1695 + }, + { + "epoch": 0.15482928610553223, + "grad_norm": 0.4842875599861145, + "learning_rate": 4.992706451882534e-06, + "loss": 0.6373, + "step": 1696 + }, + { + "epoch": 0.15492057695818878, + "grad_norm": 0.51173996925354, + "learning_rate": 4.992697313688245e-06, + "loss": 0.6199, + "step": 1697 + }, + { + "epoch": 0.15501186781084536, + "grad_norm": 0.47855424880981445, + "learning_rate": 4.992688169781224e-06, + "loss": 0.6281, + "step": 1698 + }, + { + "epoch": 0.1551031586635019, + "grad_norm": 0.4745177924633026, + "learning_rate": 4.9926790201614896e-06, + "loss": 0.6293, + "step": 1699 + }, + { + "epoch": 0.1551944495161585, + "grad_norm": 0.45516490936279297, + "learning_rate": 4.992669864829066e-06, + "loss": 0.6211, + "step": 1700 + }, + { + "epoch": 0.15528574036881504, + "grad_norm": 0.46007177233695984, + "learning_rate": 4.992660703783972e-06, + "loss": 0.5891, + "step": 1701 + }, + { + "epoch": 0.15537703122147162, + "grad_norm": 0.45540887117385864, + "learning_rate": 4.9926515370262296e-06, + "loss": 0.5872, + "step": 1702 + }, + { + "epoch": 0.15546832207412817, + "grad_norm": 0.4886207580566406, + "learning_rate": 4.99264236455586e-06, + "loss": 0.65, + "step": 1703 + }, + { + "epoch": 0.15555961292678475, + "grad_norm": 0.5124452114105225, + "learning_rate": 4.9926331863728836e-06, + "loss": 0.612, + "step": 1704 + }, + { + "epoch": 0.1556509037794413, + "grad_norm": 0.47122135758399963, + "learning_rate": 4.992624002477321e-06, + "loss": 0.6269, + "step": 1705 + }, + { + "epoch": 0.15574219463209787, + "grad_norm": 0.47550585865974426, + "learning_rate": 4.992614812869194e-06, + "loss": 0.6142, + "step": 1706 + }, + { + "epoch": 0.15583348548475442, + "grad_norm": 0.48265376687049866, + "learning_rate": 4.992605617548524e-06, + "loss": 0.5837, + "step": 1707 + }, + { + "epoch": 0.155924776337411, + "grad_norm": 0.44968026876449585, + "learning_rate": 4.9925964165153314e-06, + "loss": 0.6164, + "step": 1708 + }, + { + "epoch": 0.15601606719006755, + "grad_norm": 0.47194525599479675, + "learning_rate": 4.992587209769638e-06, + "loss": 0.6469, + "step": 1709 + }, + { + "epoch": 0.15610735804272413, + "grad_norm": 0.463605672121048, + "learning_rate": 4.992577997311463e-06, + "loss": 0.6284, + "step": 1710 + }, + { + "epoch": 0.15619864889538068, + "grad_norm": 0.4679792821407318, + "learning_rate": 4.992568779140829e-06, + "loss": 0.6203, + "step": 1711 + }, + { + "epoch": 0.15628993974803726, + "grad_norm": 0.4544932544231415, + "learning_rate": 4.992559555257759e-06, + "loss": 0.5988, + "step": 1712 + }, + { + "epoch": 0.1563812306006938, + "grad_norm": 0.45056575536727905, + "learning_rate": 4.99255032566227e-06, + "loss": 0.6438, + "step": 1713 + }, + { + "epoch": 0.15647252145335036, + "grad_norm": 0.43960607051849365, + "learning_rate": 4.9925410903543865e-06, + "loss": 0.6456, + "step": 1714 + }, + { + "epoch": 0.15656381230600694, + "grad_norm": 0.5036298036575317, + "learning_rate": 4.992531849334128e-06, + "loss": 0.5988, + "step": 1715 + }, + { + "epoch": 0.1566551031586635, + "grad_norm": 0.4676283895969391, + "learning_rate": 4.992522602601516e-06, + "loss": 0.5631, + "step": 1716 + }, + { + "epoch": 0.15674639401132007, + "grad_norm": 0.461383193731308, + "learning_rate": 4.992513350156572e-06, + "loss": 0.6321, + "step": 1717 + }, + { + "epoch": 0.15683768486397662, + "grad_norm": 0.501868486404419, + "learning_rate": 4.992504091999317e-06, + "loss": 0.6236, + "step": 1718 + }, + { + "epoch": 0.1569289757166332, + "grad_norm": 0.495995432138443, + "learning_rate": 4.992494828129773e-06, + "loss": 0.6383, + "step": 1719 + }, + { + "epoch": 0.15702026656928975, + "grad_norm": 0.45692822337150574, + "learning_rate": 4.9924855585479596e-06, + "loss": 0.6254, + "step": 1720 + }, + { + "epoch": 0.15711155742194632, + "grad_norm": 0.48568424582481384, + "learning_rate": 4.992476283253899e-06, + "loss": 0.5766, + "step": 1721 + }, + { + "epoch": 0.15720284827460287, + "grad_norm": 0.4511032700538635, + "learning_rate": 4.992467002247613e-06, + "loss": 0.637, + "step": 1722 + }, + { + "epoch": 0.15729413912725945, + "grad_norm": 0.46541643142700195, + "learning_rate": 4.992457715529121e-06, + "loss": 0.6147, + "step": 1723 + }, + { + "epoch": 0.157385429979916, + "grad_norm": 0.48884323239326477, + "learning_rate": 4.992448423098446e-06, + "loss": 0.6402, + "step": 1724 + }, + { + "epoch": 0.15747672083257258, + "grad_norm": 0.46534284949302673, + "learning_rate": 4.992439124955609e-06, + "loss": 0.6294, + "step": 1725 + }, + { + "epoch": 0.15756801168522913, + "grad_norm": 0.47645846009254456, + "learning_rate": 4.992429821100632e-06, + "loss": 0.625, + "step": 1726 + }, + { + "epoch": 0.1576593025378857, + "grad_norm": 0.44671428203582764, + "learning_rate": 4.9924205115335344e-06, + "loss": 0.6319, + "step": 1727 + }, + { + "epoch": 0.15775059339054226, + "grad_norm": 0.4987049102783203, + "learning_rate": 4.992411196254339e-06, + "loss": 0.604, + "step": 1728 + }, + { + "epoch": 0.15784188424319884, + "grad_norm": 0.49591830372810364, + "learning_rate": 4.992401875263066e-06, + "loss": 0.5924, + "step": 1729 + }, + { + "epoch": 0.1579331750958554, + "grad_norm": 0.47340846061706543, + "learning_rate": 4.992392548559739e-06, + "loss": 0.6168, + "step": 1730 + }, + { + "epoch": 0.15802446594851197, + "grad_norm": 0.45204511284828186, + "learning_rate": 4.992383216144377e-06, + "loss": 0.6435, + "step": 1731 + }, + { + "epoch": 0.15811575680116852, + "grad_norm": 0.48539862036705017, + "learning_rate": 4.992373878017003e-06, + "loss": 0.6084, + "step": 1732 + }, + { + "epoch": 0.1582070476538251, + "grad_norm": 0.4798493981361389, + "learning_rate": 4.9923645341776365e-06, + "loss": 0.6379, + "step": 1733 + }, + { + "epoch": 0.15829833850648165, + "grad_norm": 0.4935920536518097, + "learning_rate": 4.992355184626301e-06, + "loss": 0.5952, + "step": 1734 + }, + { + "epoch": 0.15838962935913822, + "grad_norm": 0.450093150138855, + "learning_rate": 4.992345829363016e-06, + "loss": 0.6257, + "step": 1735 + }, + { + "epoch": 0.15848092021179477, + "grad_norm": 0.507094144821167, + "learning_rate": 4.992336468387805e-06, + "loss": 0.601, + "step": 1736 + }, + { + "epoch": 0.15857221106445135, + "grad_norm": 0.45185762643814087, + "learning_rate": 4.992327101700688e-06, + "loss": 0.6149, + "step": 1737 + }, + { + "epoch": 0.1586635019171079, + "grad_norm": 0.4349115192890167, + "learning_rate": 4.992317729301687e-06, + "loss": 0.6102, + "step": 1738 + }, + { + "epoch": 0.15875479276976448, + "grad_norm": 0.45397087931632996, + "learning_rate": 4.992308351190823e-06, + "loss": 0.6812, + "step": 1739 + }, + { + "epoch": 0.15884608362242103, + "grad_norm": 0.4796385169029236, + "learning_rate": 4.992298967368118e-06, + "loss": 0.6075, + "step": 1740 + }, + { + "epoch": 0.1589373744750776, + "grad_norm": 0.5479741096496582, + "learning_rate": 4.992289577833593e-06, + "loss": 0.6189, + "step": 1741 + }, + { + "epoch": 0.15902866532773416, + "grad_norm": 0.449405699968338, + "learning_rate": 4.992280182587271e-06, + "loss": 0.6252, + "step": 1742 + }, + { + "epoch": 0.15911995618039074, + "grad_norm": 0.5115348100662231, + "learning_rate": 4.992270781629171e-06, + "loss": 0.6327, + "step": 1743 + }, + { + "epoch": 0.1592112470330473, + "grad_norm": 0.4674215018749237, + "learning_rate": 4.992261374959318e-06, + "loss": 0.594, + "step": 1744 + }, + { + "epoch": 0.15930253788570387, + "grad_norm": 0.47971203923225403, + "learning_rate": 4.99225196257773e-06, + "loss": 0.5938, + "step": 1745 + }, + { + "epoch": 0.15939382873836042, + "grad_norm": 0.47053149342536926, + "learning_rate": 4.9922425444844296e-06, + "loss": 0.6361, + "step": 1746 + }, + { + "epoch": 0.159485119591017, + "grad_norm": 0.4423227906227112, + "learning_rate": 4.992233120679439e-06, + "loss": 0.6421, + "step": 1747 + }, + { + "epoch": 0.15957641044367354, + "grad_norm": 0.4659043550491333, + "learning_rate": 4.992223691162781e-06, + "loss": 0.5894, + "step": 1748 + }, + { + "epoch": 0.1596677012963301, + "grad_norm": 0.44597089290618896, + "learning_rate": 4.992214255934475e-06, + "loss": 0.6088, + "step": 1749 + }, + { + "epoch": 0.15975899214898667, + "grad_norm": 0.46499601006507874, + "learning_rate": 4.992204814994544e-06, + "loss": 0.604, + "step": 1750 + }, + { + "epoch": 0.15985028300164322, + "grad_norm": 0.46032649278640747, + "learning_rate": 4.992195368343008e-06, + "loss": 0.6265, + "step": 1751 + }, + { + "epoch": 0.1599415738542998, + "grad_norm": 0.4488281309604645, + "learning_rate": 4.992185915979891e-06, + "loss": 0.6297, + "step": 1752 + }, + { + "epoch": 0.16003286470695635, + "grad_norm": 0.4519026279449463, + "learning_rate": 4.992176457905213e-06, + "loss": 0.6229, + "step": 1753 + }, + { + "epoch": 0.16012415555961293, + "grad_norm": 0.45204901695251465, + "learning_rate": 4.992166994118996e-06, + "loss": 0.6408, + "step": 1754 + }, + { + "epoch": 0.16021544641226948, + "grad_norm": 0.4500667154788971, + "learning_rate": 4.992157524621261e-06, + "loss": 0.6031, + "step": 1755 + }, + { + "epoch": 0.16030673726492606, + "grad_norm": 0.4283992350101471, + "learning_rate": 4.9921480494120326e-06, + "loss": 0.631, + "step": 1756 + }, + { + "epoch": 0.1603980281175826, + "grad_norm": 0.4398607611656189, + "learning_rate": 4.992138568491328e-06, + "loss": 0.6129, + "step": 1757 + }, + { + "epoch": 0.1604893189702392, + "grad_norm": 0.49156811833381653, + "learning_rate": 4.9921290818591734e-06, + "loss": 0.5846, + "step": 1758 + }, + { + "epoch": 0.16058060982289574, + "grad_norm": 0.49393847584724426, + "learning_rate": 4.992119589515587e-06, + "loss": 0.6224, + "step": 1759 + }, + { + "epoch": 0.16067190067555231, + "grad_norm": 0.46236753463745117, + "learning_rate": 4.992110091460594e-06, + "loss": 0.6112, + "step": 1760 + }, + { + "epoch": 0.16076319152820887, + "grad_norm": 0.4538114070892334, + "learning_rate": 4.992100587694212e-06, + "loss": 0.6532, + "step": 1761 + }, + { + "epoch": 0.16085448238086544, + "grad_norm": 0.4698866605758667, + "learning_rate": 4.992091078216466e-06, + "loss": 0.6648, + "step": 1762 + }, + { + "epoch": 0.160945773233522, + "grad_norm": 0.4657101631164551, + "learning_rate": 4.992081563027376e-06, + "loss": 0.6645, + "step": 1763 + }, + { + "epoch": 0.16103706408617857, + "grad_norm": 0.4737727642059326, + "learning_rate": 4.992072042126966e-06, + "loss": 0.6015, + "step": 1764 + }, + { + "epoch": 0.16112835493883512, + "grad_norm": 0.4807695746421814, + "learning_rate": 4.992062515515256e-06, + "loss": 0.5666, + "step": 1765 + }, + { + "epoch": 0.1612196457914917, + "grad_norm": 0.4714348316192627, + "learning_rate": 4.992052983192268e-06, + "loss": 0.5818, + "step": 1766 + }, + { + "epoch": 0.16131093664414825, + "grad_norm": 0.4752035439014435, + "learning_rate": 4.992043445158024e-06, + "loss": 0.6351, + "step": 1767 + }, + { + "epoch": 0.16140222749680483, + "grad_norm": 0.5123614072799683, + "learning_rate": 4.992033901412546e-06, + "loss": 0.5698, + "step": 1768 + }, + { + "epoch": 0.16149351834946138, + "grad_norm": 0.4685329794883728, + "learning_rate": 4.992024351955856e-06, + "loss": 0.6041, + "step": 1769 + }, + { + "epoch": 0.16158480920211796, + "grad_norm": 0.49435141682624817, + "learning_rate": 4.992014796787976e-06, + "loss": 0.5918, + "step": 1770 + }, + { + "epoch": 0.1616761000547745, + "grad_norm": 0.4690675437450409, + "learning_rate": 4.992005235908927e-06, + "loss": 0.6316, + "step": 1771 + }, + { + "epoch": 0.16176739090743109, + "grad_norm": 0.4371800422668457, + "learning_rate": 4.991995669318732e-06, + "loss": 0.6329, + "step": 1772 + }, + { + "epoch": 0.16185868176008764, + "grad_norm": 0.42018821835517883, + "learning_rate": 4.991986097017411e-06, + "loss": 0.6405, + "step": 1773 + }, + { + "epoch": 0.16194997261274421, + "grad_norm": 0.5055900812149048, + "learning_rate": 4.99197651900499e-06, + "loss": 0.6331, + "step": 1774 + }, + { + "epoch": 0.16204126346540076, + "grad_norm": 0.5141112804412842, + "learning_rate": 4.991966935281486e-06, + "loss": 0.6455, + "step": 1775 + }, + { + "epoch": 0.16213255431805734, + "grad_norm": 0.48697689175605774, + "learning_rate": 4.991957345846924e-06, + "loss": 0.609, + "step": 1776 + }, + { + "epoch": 0.1622238451707139, + "grad_norm": 0.4792541563510895, + "learning_rate": 4.991947750701326e-06, + "loss": 0.6133, + "step": 1777 + }, + { + "epoch": 0.16231513602337047, + "grad_norm": 0.4448118209838867, + "learning_rate": 4.991938149844713e-06, + "loss": 0.6066, + "step": 1778 + }, + { + "epoch": 0.16240642687602702, + "grad_norm": 0.45070236921310425, + "learning_rate": 4.991928543277107e-06, + "loss": 0.6267, + "step": 1779 + }, + { + "epoch": 0.1624977177286836, + "grad_norm": 0.47364071011543274, + "learning_rate": 4.99191893099853e-06, + "loss": 0.6063, + "step": 1780 + }, + { + "epoch": 0.16258900858134015, + "grad_norm": 0.4787783622741699, + "learning_rate": 4.991909313009005e-06, + "loss": 0.5716, + "step": 1781 + }, + { + "epoch": 0.1626802994339967, + "grad_norm": 0.4293047785758972, + "learning_rate": 4.9918996893085526e-06, + "loss": 0.6314, + "step": 1782 + }, + { + "epoch": 0.16277159028665328, + "grad_norm": 0.49369725584983826, + "learning_rate": 4.991890059897197e-06, + "loss": 0.6136, + "step": 1783 + }, + { + "epoch": 0.16286288113930983, + "grad_norm": 0.4901777505874634, + "learning_rate": 4.991880424774958e-06, + "loss": 0.6274, + "step": 1784 + }, + { + "epoch": 0.1629541719919664, + "grad_norm": 0.44564834237098694, + "learning_rate": 4.9918707839418575e-06, + "loss": 0.6585, + "step": 1785 + }, + { + "epoch": 0.16304546284462296, + "grad_norm": 0.4596983790397644, + "learning_rate": 4.9918611373979195e-06, + "loss": 0.6183, + "step": 1786 + }, + { + "epoch": 0.16313675369727954, + "grad_norm": 0.47965067625045776, + "learning_rate": 4.991851485143165e-06, + "loss": 0.6341, + "step": 1787 + }, + { + "epoch": 0.16322804454993609, + "grad_norm": 0.44988563656806946, + "learning_rate": 4.991841827177617e-06, + "loss": 0.6647, + "step": 1788 + }, + { + "epoch": 0.16331933540259266, + "grad_norm": 0.46345993876457214, + "learning_rate": 4.991832163501297e-06, + "loss": 0.6026, + "step": 1789 + }, + { + "epoch": 0.16341062625524921, + "grad_norm": 0.4777255356311798, + "learning_rate": 4.991822494114227e-06, + "loss": 0.6379, + "step": 1790 + }, + { + "epoch": 0.1635019171079058, + "grad_norm": 0.4584222137928009, + "learning_rate": 4.991812819016429e-06, + "loss": 0.5981, + "step": 1791 + }, + { + "epoch": 0.16359320796056234, + "grad_norm": 0.4677836000919342, + "learning_rate": 4.991803138207927e-06, + "loss": 0.6281, + "step": 1792 + }, + { + "epoch": 0.16368449881321892, + "grad_norm": 0.464712917804718, + "learning_rate": 4.99179345168874e-06, + "loss": 0.6545, + "step": 1793 + }, + { + "epoch": 0.16377578966587547, + "grad_norm": 0.44958457350730896, + "learning_rate": 4.991783759458892e-06, + "loss": 0.6621, + "step": 1794 + }, + { + "epoch": 0.16386708051853205, + "grad_norm": 0.4850762188434601, + "learning_rate": 4.991774061518406e-06, + "loss": 0.6412, + "step": 1795 + }, + { + "epoch": 0.1639583713711886, + "grad_norm": 0.46989643573760986, + "learning_rate": 4.991764357867303e-06, + "loss": 0.6107, + "step": 1796 + }, + { + "epoch": 0.16404966222384518, + "grad_norm": 0.4693230092525482, + "learning_rate": 4.991754648505605e-06, + "loss": 0.6736, + "step": 1797 + }, + { + "epoch": 0.16414095307650173, + "grad_norm": 0.4451369047164917, + "learning_rate": 4.9917449334333355e-06, + "loss": 0.6035, + "step": 1798 + }, + { + "epoch": 0.1642322439291583, + "grad_norm": 0.4780116081237793, + "learning_rate": 4.991735212650516e-06, + "loss": 0.6203, + "step": 1799 + }, + { + "epoch": 0.16432353478181486, + "grad_norm": 0.4667341709136963, + "learning_rate": 4.991725486157168e-06, + "loss": 0.6418, + "step": 1800 + }, + { + "epoch": 0.16441482563447143, + "grad_norm": 0.4468247890472412, + "learning_rate": 4.991715753953316e-06, + "loss": 0.5925, + "step": 1801 + }, + { + "epoch": 0.16450611648712798, + "grad_norm": 0.46415776014328003, + "learning_rate": 4.991706016038979e-06, + "loss": 0.6328, + "step": 1802 + }, + { + "epoch": 0.16459740733978456, + "grad_norm": 0.5024691224098206, + "learning_rate": 4.991696272414184e-06, + "loss": 0.6498, + "step": 1803 + }, + { + "epoch": 0.1646886981924411, + "grad_norm": 0.4667605757713318, + "learning_rate": 4.991686523078949e-06, + "loss": 0.611, + "step": 1804 + }, + { + "epoch": 0.1647799890450977, + "grad_norm": 0.46883872151374817, + "learning_rate": 4.991676768033298e-06, + "loss": 0.5797, + "step": 1805 + }, + { + "epoch": 0.16487127989775424, + "grad_norm": 0.5054078698158264, + "learning_rate": 4.991667007277254e-06, + "loss": 0.6131, + "step": 1806 + }, + { + "epoch": 0.16496257075041082, + "grad_norm": 0.475847989320755, + "learning_rate": 4.9916572408108374e-06, + "loss": 0.6175, + "step": 1807 + }, + { + "epoch": 0.16505386160306737, + "grad_norm": 0.4692200720310211, + "learning_rate": 4.991647468634072e-06, + "loss": 0.6005, + "step": 1808 + }, + { + "epoch": 0.16514515245572395, + "grad_norm": 0.4356849789619446, + "learning_rate": 4.991637690746981e-06, + "loss": 0.5732, + "step": 1809 + }, + { + "epoch": 0.1652364433083805, + "grad_norm": 0.4400574862957001, + "learning_rate": 4.9916279071495855e-06, + "loss": 0.5954, + "step": 1810 + }, + { + "epoch": 0.16532773416103708, + "grad_norm": 0.47884601354599, + "learning_rate": 4.9916181178419086e-06, + "loss": 0.6123, + "step": 1811 + }, + { + "epoch": 0.16541902501369363, + "grad_norm": 0.4600358009338379, + "learning_rate": 4.991608322823972e-06, + "loss": 0.647, + "step": 1812 + }, + { + "epoch": 0.1655103158663502, + "grad_norm": 0.45441925525665283, + "learning_rate": 4.991598522095799e-06, + "loss": 0.6253, + "step": 1813 + }, + { + "epoch": 0.16560160671900676, + "grad_norm": 0.46550220251083374, + "learning_rate": 4.991588715657411e-06, + "loss": 0.6145, + "step": 1814 + }, + { + "epoch": 0.1656928975716633, + "grad_norm": 0.4242846369743347, + "learning_rate": 4.991578903508831e-06, + "loss": 0.6257, + "step": 1815 + }, + { + "epoch": 0.16578418842431988, + "grad_norm": 0.44607457518577576, + "learning_rate": 4.991569085650082e-06, + "loss": 0.6214, + "step": 1816 + }, + { + "epoch": 0.16587547927697643, + "grad_norm": 0.4339963495731354, + "learning_rate": 4.991559262081186e-06, + "loss": 0.6588, + "step": 1817 + }, + { + "epoch": 0.165966770129633, + "grad_norm": 0.4505074918270111, + "learning_rate": 4.991549432802166e-06, + "loss": 0.6296, + "step": 1818 + }, + { + "epoch": 0.16605806098228956, + "grad_norm": 0.48049986362457275, + "learning_rate": 4.991539597813043e-06, + "loss": 0.607, + "step": 1819 + }, + { + "epoch": 0.16614935183494614, + "grad_norm": 0.49997037649154663, + "learning_rate": 4.991529757113842e-06, + "loss": 0.6259, + "step": 1820 + }, + { + "epoch": 0.1662406426876027, + "grad_norm": 0.4516671299934387, + "learning_rate": 4.991519910704583e-06, + "loss": 0.64, + "step": 1821 + }, + { + "epoch": 0.16633193354025927, + "grad_norm": 0.48457542061805725, + "learning_rate": 4.991510058585291e-06, + "loss": 0.6099, + "step": 1822 + }, + { + "epoch": 0.16642322439291582, + "grad_norm": 0.46761438250541687, + "learning_rate": 4.9915002007559865e-06, + "loss": 0.6108, + "step": 1823 + }, + { + "epoch": 0.1665145152455724, + "grad_norm": 0.4775469899177551, + "learning_rate": 4.9914903372166934e-06, + "loss": 0.6275, + "step": 1824 + }, + { + "epoch": 0.16660580609822895, + "grad_norm": 0.47247451543807983, + "learning_rate": 4.9914804679674336e-06, + "loss": 0.6207, + "step": 1825 + }, + { + "epoch": 0.16669709695088553, + "grad_norm": 0.4881669282913208, + "learning_rate": 4.9914705930082305e-06, + "loss": 0.5958, + "step": 1826 + }, + { + "epoch": 0.16678838780354208, + "grad_norm": 0.4741148054599762, + "learning_rate": 4.991460712339106e-06, + "loss": 0.6598, + "step": 1827 + }, + { + "epoch": 0.16687967865619865, + "grad_norm": 0.4754927456378937, + "learning_rate": 4.991450825960082e-06, + "loss": 0.6327, + "step": 1828 + }, + { + "epoch": 0.1669709695088552, + "grad_norm": 0.4873694181442261, + "learning_rate": 4.991440933871183e-06, + "loss": 0.6102, + "step": 1829 + }, + { + "epoch": 0.16706226036151178, + "grad_norm": 0.4854569137096405, + "learning_rate": 4.991431036072431e-06, + "loss": 0.6099, + "step": 1830 + }, + { + "epoch": 0.16715355121416833, + "grad_norm": 0.4548935294151306, + "learning_rate": 4.991421132563848e-06, + "loss": 0.6144, + "step": 1831 + }, + { + "epoch": 0.1672448420668249, + "grad_norm": 0.4619503915309906, + "learning_rate": 4.991411223345457e-06, + "loss": 0.6239, + "step": 1832 + }, + { + "epoch": 0.16733613291948146, + "grad_norm": 0.4661247432231903, + "learning_rate": 4.991401308417282e-06, + "loss": 0.6236, + "step": 1833 + }, + { + "epoch": 0.16742742377213804, + "grad_norm": 0.44889500737190247, + "learning_rate": 4.9913913877793435e-06, + "loss": 0.6361, + "step": 1834 + }, + { + "epoch": 0.1675187146247946, + "grad_norm": 0.47413820028305054, + "learning_rate": 4.9913814614316655e-06, + "loss": 0.6326, + "step": 1835 + }, + { + "epoch": 0.16761000547745117, + "grad_norm": 0.47422292828559875, + "learning_rate": 4.9913715293742705e-06, + "loss": 0.6082, + "step": 1836 + }, + { + "epoch": 0.16770129633010772, + "grad_norm": 0.46767574548721313, + "learning_rate": 4.991361591607182e-06, + "loss": 0.6091, + "step": 1837 + }, + { + "epoch": 0.1677925871827643, + "grad_norm": 0.4624524414539337, + "learning_rate": 4.991351648130421e-06, + "loss": 0.6505, + "step": 1838 + }, + { + "epoch": 0.16788387803542085, + "grad_norm": 0.4525628089904785, + "learning_rate": 4.9913416989440125e-06, + "loss": 0.5893, + "step": 1839 + }, + { + "epoch": 0.16797516888807743, + "grad_norm": 0.4961937665939331, + "learning_rate": 4.991331744047978e-06, + "loss": 0.6126, + "step": 1840 + }, + { + "epoch": 0.16806645974073398, + "grad_norm": 0.4321497976779938, + "learning_rate": 4.991321783442341e-06, + "loss": 0.6582, + "step": 1841 + }, + { + "epoch": 0.16815775059339055, + "grad_norm": 0.5024158954620361, + "learning_rate": 4.991311817127123e-06, + "loss": 0.6105, + "step": 1842 + }, + { + "epoch": 0.1682490414460471, + "grad_norm": 0.45268940925598145, + "learning_rate": 4.991301845102348e-06, + "loss": 0.6145, + "step": 1843 + }, + { + "epoch": 0.16834033229870368, + "grad_norm": 0.4592748284339905, + "learning_rate": 4.991291867368038e-06, + "loss": 0.6391, + "step": 1844 + }, + { + "epoch": 0.16843162315136023, + "grad_norm": 0.43695068359375, + "learning_rate": 4.9912818839242176e-06, + "loss": 0.6438, + "step": 1845 + }, + { + "epoch": 0.1685229140040168, + "grad_norm": 0.4593477249145508, + "learning_rate": 4.9912718947709074e-06, + "loss": 0.6231, + "step": 1846 + }, + { + "epoch": 0.16861420485667336, + "grad_norm": 0.4613357186317444, + "learning_rate": 4.991261899908131e-06, + "loss": 0.6199, + "step": 1847 + }, + { + "epoch": 0.16870549570932994, + "grad_norm": 0.4735906720161438, + "learning_rate": 4.991251899335913e-06, + "loss": 0.6216, + "step": 1848 + }, + { + "epoch": 0.1687967865619865, + "grad_norm": 0.45290055871009827, + "learning_rate": 4.991241893054274e-06, + "loss": 0.6342, + "step": 1849 + }, + { + "epoch": 0.16888807741464304, + "grad_norm": 0.44459235668182373, + "learning_rate": 4.991231881063239e-06, + "loss": 0.5945, + "step": 1850 + }, + { + "epoch": 0.16897936826729962, + "grad_norm": 0.47518494725227356, + "learning_rate": 4.991221863362828e-06, + "loss": 0.5958, + "step": 1851 + }, + { + "epoch": 0.16907065911995617, + "grad_norm": 0.46104639768600464, + "learning_rate": 4.991211839953068e-06, + "loss": 0.6495, + "step": 1852 + }, + { + "epoch": 0.16916194997261275, + "grad_norm": 0.4656905233860016, + "learning_rate": 4.991201810833979e-06, + "loss": 0.5957, + "step": 1853 + }, + { + "epoch": 0.1692532408252693, + "grad_norm": 0.48104068636894226, + "learning_rate": 4.991191776005585e-06, + "loss": 0.611, + "step": 1854 + }, + { + "epoch": 0.16934453167792587, + "grad_norm": 0.4262980818748474, + "learning_rate": 4.991181735467908e-06, + "loss": 0.6155, + "step": 1855 + }, + { + "epoch": 0.16943582253058243, + "grad_norm": 0.4545164406299591, + "learning_rate": 4.991171689220973e-06, + "loss": 0.6059, + "step": 1856 + }, + { + "epoch": 0.169527113383239, + "grad_norm": 0.46900299191474915, + "learning_rate": 4.9911616372648005e-06, + "loss": 0.6011, + "step": 1857 + }, + { + "epoch": 0.16961840423589555, + "grad_norm": 0.480673223733902, + "learning_rate": 4.991151579599416e-06, + "loss": 0.6397, + "step": 1858 + }, + { + "epoch": 0.16970969508855213, + "grad_norm": 0.4733421802520752, + "learning_rate": 4.991141516224841e-06, + "loss": 0.6072, + "step": 1859 + }, + { + "epoch": 0.16980098594120868, + "grad_norm": 0.4493863582611084, + "learning_rate": 4.9911314471410985e-06, + "loss": 0.6217, + "step": 1860 + }, + { + "epoch": 0.16989227679386526, + "grad_norm": 0.465742290019989, + "learning_rate": 4.991121372348213e-06, + "loss": 0.6108, + "step": 1861 + }, + { + "epoch": 0.1699835676465218, + "grad_norm": 0.5282201766967773, + "learning_rate": 4.991111291846205e-06, + "loss": 0.5714, + "step": 1862 + }, + { + "epoch": 0.1700748584991784, + "grad_norm": 0.47308027744293213, + "learning_rate": 4.9911012056351015e-06, + "loss": 0.6273, + "step": 1863 + }, + { + "epoch": 0.17016614935183494, + "grad_norm": 0.4713892638683319, + "learning_rate": 4.991091113714922e-06, + "loss": 0.6284, + "step": 1864 + }, + { + "epoch": 0.17025744020449152, + "grad_norm": 0.43993672728538513, + "learning_rate": 4.991081016085692e-06, + "loss": 0.634, + "step": 1865 + }, + { + "epoch": 0.17034873105714807, + "grad_norm": 0.4667469561100006, + "learning_rate": 4.991070912747433e-06, + "loss": 0.63, + "step": 1866 + }, + { + "epoch": 0.17044002190980465, + "grad_norm": 0.4759572446346283, + "learning_rate": 4.991060803700168e-06, + "loss": 0.6114, + "step": 1867 + }, + { + "epoch": 0.1705313127624612, + "grad_norm": 0.43756598234176636, + "learning_rate": 4.991050688943923e-06, + "loss": 0.6413, + "step": 1868 + }, + { + "epoch": 0.17062260361511777, + "grad_norm": 0.49660128355026245, + "learning_rate": 4.9910405684787175e-06, + "loss": 0.591, + "step": 1869 + }, + { + "epoch": 0.17071389446777432, + "grad_norm": 0.46317926049232483, + "learning_rate": 4.991030442304576e-06, + "loss": 0.5987, + "step": 1870 + }, + { + "epoch": 0.1708051853204309, + "grad_norm": 0.46410247683525085, + "learning_rate": 4.991020310421523e-06, + "loss": 0.6171, + "step": 1871 + }, + { + "epoch": 0.17089647617308745, + "grad_norm": 0.45331618189811707, + "learning_rate": 4.991010172829581e-06, + "loss": 0.6471, + "step": 1872 + }, + { + "epoch": 0.17098776702574403, + "grad_norm": 0.45177915692329407, + "learning_rate": 4.991000029528772e-06, + "loss": 0.6169, + "step": 1873 + }, + { + "epoch": 0.17107905787840058, + "grad_norm": 0.4655129015445709, + "learning_rate": 4.990989880519121e-06, + "loss": 0.6366, + "step": 1874 + }, + { + "epoch": 0.17117034873105716, + "grad_norm": 0.46662890911102295, + "learning_rate": 4.99097972580065e-06, + "loss": 0.6227, + "step": 1875 + }, + { + "epoch": 0.1712616395837137, + "grad_norm": 0.45198360085487366, + "learning_rate": 4.990969565373384e-06, + "loss": 0.6127, + "step": 1876 + }, + { + "epoch": 0.1713529304363703, + "grad_norm": 0.49535152316093445, + "learning_rate": 4.990959399237344e-06, + "loss": 0.597, + "step": 1877 + }, + { + "epoch": 0.17144422128902684, + "grad_norm": 0.46317222714424133, + "learning_rate": 4.990949227392554e-06, + "loss": 0.6564, + "step": 1878 + }, + { + "epoch": 0.17153551214168342, + "grad_norm": 0.5066303014755249, + "learning_rate": 4.990939049839038e-06, + "loss": 0.6036, + "step": 1879 + }, + { + "epoch": 0.17162680299433997, + "grad_norm": 0.4554339647293091, + "learning_rate": 4.990928866576819e-06, + "loss": 0.6647, + "step": 1880 + }, + { + "epoch": 0.17171809384699654, + "grad_norm": 0.46918535232543945, + "learning_rate": 4.990918677605921e-06, + "loss": 0.5995, + "step": 1881 + }, + { + "epoch": 0.1718093846996531, + "grad_norm": 0.47042161226272583, + "learning_rate": 4.990908482926365e-06, + "loss": 0.624, + "step": 1882 + }, + { + "epoch": 0.17190067555230965, + "grad_norm": 0.511886477470398, + "learning_rate": 4.990898282538178e-06, + "loss": 0.6175, + "step": 1883 + }, + { + "epoch": 0.17199196640496622, + "grad_norm": 0.4881305992603302, + "learning_rate": 4.990888076441381e-06, + "loss": 0.6157, + "step": 1884 + }, + { + "epoch": 0.17208325725762277, + "grad_norm": 0.46231526136398315, + "learning_rate": 4.9908778646359965e-06, + "loss": 0.6019, + "step": 1885 + }, + { + "epoch": 0.17217454811027935, + "grad_norm": 0.481523334980011, + "learning_rate": 4.990867647122051e-06, + "loss": 0.6396, + "step": 1886 + }, + { + "epoch": 0.1722658389629359, + "grad_norm": 0.44525063037872314, + "learning_rate": 4.990857423899565e-06, + "loss": 0.6142, + "step": 1887 + }, + { + "epoch": 0.17235712981559248, + "grad_norm": 0.531005859375, + "learning_rate": 4.990847194968563e-06, + "loss": 0.5766, + "step": 1888 + }, + { + "epoch": 0.17244842066824903, + "grad_norm": 0.4583427608013153, + "learning_rate": 4.99083696032907e-06, + "loss": 0.6217, + "step": 1889 + }, + { + "epoch": 0.1725397115209056, + "grad_norm": 0.45198917388916016, + "learning_rate": 4.990826719981107e-06, + "loss": 0.6551, + "step": 1890 + }, + { + "epoch": 0.17263100237356216, + "grad_norm": 0.48081904649734497, + "learning_rate": 4.990816473924698e-06, + "loss": 0.6619, + "step": 1891 + }, + { + "epoch": 0.17272229322621874, + "grad_norm": 0.4365113079547882, + "learning_rate": 4.990806222159867e-06, + "loss": 0.651, + "step": 1892 + }, + { + "epoch": 0.1728135840788753, + "grad_norm": 0.463355153799057, + "learning_rate": 4.990795964686638e-06, + "loss": 0.6182, + "step": 1893 + }, + { + "epoch": 0.17290487493153187, + "grad_norm": 0.45501214265823364, + "learning_rate": 4.9907857015050335e-06, + "loss": 0.656, + "step": 1894 + }, + { + "epoch": 0.17299616578418842, + "grad_norm": 0.45735734701156616, + "learning_rate": 4.990775432615077e-06, + "loss": 0.6378, + "step": 1895 + }, + { + "epoch": 0.173087456636845, + "grad_norm": 0.45325377583503723, + "learning_rate": 4.990765158016793e-06, + "loss": 0.6771, + "step": 1896 + }, + { + "epoch": 0.17317874748950154, + "grad_norm": 0.46682968735694885, + "learning_rate": 4.990754877710204e-06, + "loss": 0.623, + "step": 1897 + }, + { + "epoch": 0.17327003834215812, + "grad_norm": 0.4700373709201813, + "learning_rate": 4.990744591695335e-06, + "loss": 0.6201, + "step": 1898 + }, + { + "epoch": 0.17336132919481467, + "grad_norm": 0.48967450857162476, + "learning_rate": 4.990734299972209e-06, + "loss": 0.5842, + "step": 1899 + }, + { + "epoch": 0.17345262004747125, + "grad_norm": 0.4724716544151306, + "learning_rate": 4.9907240025408474e-06, + "loss": 0.6246, + "step": 1900 + }, + { + "epoch": 0.1735439109001278, + "grad_norm": 0.466016560792923, + "learning_rate": 4.990713699401277e-06, + "loss": 0.6045, + "step": 1901 + }, + { + "epoch": 0.17363520175278438, + "grad_norm": 0.46476471424102783, + "learning_rate": 4.990703390553519e-06, + "loss": 0.6418, + "step": 1902 + }, + { + "epoch": 0.17372649260544093, + "grad_norm": 0.4780973792076111, + "learning_rate": 4.990693075997599e-06, + "loss": 0.599, + "step": 1903 + }, + { + "epoch": 0.1738177834580975, + "grad_norm": 0.4923692047595978, + "learning_rate": 4.9906827557335404e-06, + "loss": 0.643, + "step": 1904 + }, + { + "epoch": 0.17390907431075406, + "grad_norm": 0.4793660044670105, + "learning_rate": 4.990672429761365e-06, + "loss": 0.6003, + "step": 1905 + }, + { + "epoch": 0.17400036516341064, + "grad_norm": 0.47098368406295776, + "learning_rate": 4.990662098081098e-06, + "loss": 0.618, + "step": 1906 + }, + { + "epoch": 0.1740916560160672, + "grad_norm": 0.45013561844825745, + "learning_rate": 4.990651760692763e-06, + "loss": 0.5949, + "step": 1907 + }, + { + "epoch": 0.17418294686872376, + "grad_norm": 0.4948805570602417, + "learning_rate": 4.990641417596382e-06, + "loss": 0.6123, + "step": 1908 + }, + { + "epoch": 0.17427423772138032, + "grad_norm": 0.4668284058570862, + "learning_rate": 4.9906310687919815e-06, + "loss": 0.6252, + "step": 1909 + }, + { + "epoch": 0.1743655285740369, + "grad_norm": 0.4928252100944519, + "learning_rate": 4.990620714279584e-06, + "loss": 0.6373, + "step": 1910 + }, + { + "epoch": 0.17445681942669344, + "grad_norm": 0.4730307459831238, + "learning_rate": 4.990610354059212e-06, + "loss": 0.6108, + "step": 1911 + }, + { + "epoch": 0.17454811027935002, + "grad_norm": 0.48040059208869934, + "learning_rate": 4.990599988130891e-06, + "loss": 0.5961, + "step": 1912 + }, + { + "epoch": 0.17463940113200657, + "grad_norm": 0.49720895290374756, + "learning_rate": 4.990589616494644e-06, + "loss": 0.5963, + "step": 1913 + }, + { + "epoch": 0.17473069198466315, + "grad_norm": 0.4774438440799713, + "learning_rate": 4.990579239150495e-06, + "loss": 0.5743, + "step": 1914 + }, + { + "epoch": 0.1748219828373197, + "grad_norm": 0.5334876179695129, + "learning_rate": 4.990568856098467e-06, + "loss": 0.5843, + "step": 1915 + }, + { + "epoch": 0.17491327368997625, + "grad_norm": 0.535195529460907, + "learning_rate": 4.990558467338585e-06, + "loss": 0.6267, + "step": 1916 + }, + { + "epoch": 0.17500456454263283, + "grad_norm": 0.45861902832984924, + "learning_rate": 4.9905480728708725e-06, + "loss": 0.6123, + "step": 1917 + }, + { + "epoch": 0.17509585539528938, + "grad_norm": 0.44943973422050476, + "learning_rate": 4.9905376726953516e-06, + "loss": 0.6062, + "step": 1918 + }, + { + "epoch": 0.17518714624794596, + "grad_norm": 0.5015069246292114, + "learning_rate": 4.990527266812048e-06, + "loss": 0.6153, + "step": 1919 + }, + { + "epoch": 0.1752784371006025, + "grad_norm": 0.5104631781578064, + "learning_rate": 4.9905168552209856e-06, + "loss": 0.6198, + "step": 1920 + }, + { + "epoch": 0.17536972795325909, + "grad_norm": 0.42378556728363037, + "learning_rate": 4.990506437922189e-06, + "loss": 0.6226, + "step": 1921 + }, + { + "epoch": 0.17546101880591564, + "grad_norm": 0.4592893123626709, + "learning_rate": 4.990496014915679e-06, + "loss": 0.6043, + "step": 1922 + }, + { + "epoch": 0.17555230965857221, + "grad_norm": 0.47687220573425293, + "learning_rate": 4.990485586201482e-06, + "loss": 0.6456, + "step": 1923 + }, + { + "epoch": 0.17564360051122876, + "grad_norm": 0.46961647272109985, + "learning_rate": 4.990475151779621e-06, + "loss": 0.605, + "step": 1924 + }, + { + "epoch": 0.17573489136388534, + "grad_norm": 0.5061085224151611, + "learning_rate": 4.99046471165012e-06, + "loss": 0.5729, + "step": 1925 + }, + { + "epoch": 0.1758261822165419, + "grad_norm": 0.4433331787586212, + "learning_rate": 4.990454265813004e-06, + "loss": 0.6552, + "step": 1926 + }, + { + "epoch": 0.17591747306919847, + "grad_norm": 0.4763057231903076, + "learning_rate": 4.990443814268296e-06, + "loss": 0.63, + "step": 1927 + }, + { + "epoch": 0.17600876392185502, + "grad_norm": 0.4857863485813141, + "learning_rate": 4.990433357016019e-06, + "loss": 0.5653, + "step": 1928 + }, + { + "epoch": 0.1761000547745116, + "grad_norm": 0.48465651273727417, + "learning_rate": 4.990422894056198e-06, + "loss": 0.5992, + "step": 1929 + }, + { + "epoch": 0.17619134562716815, + "grad_norm": 0.4548870921134949, + "learning_rate": 4.990412425388858e-06, + "loss": 0.603, + "step": 1930 + }, + { + "epoch": 0.17628263647982473, + "grad_norm": 0.47098249197006226, + "learning_rate": 4.990401951014021e-06, + "loss": 0.6317, + "step": 1931 + }, + { + "epoch": 0.17637392733248128, + "grad_norm": 0.4955928325653076, + "learning_rate": 4.990391470931711e-06, + "loss": 0.6192, + "step": 1932 + }, + { + "epoch": 0.17646521818513786, + "grad_norm": 0.4723578989505768, + "learning_rate": 4.990380985141955e-06, + "loss": 0.6182, + "step": 1933 + }, + { + "epoch": 0.1765565090377944, + "grad_norm": 0.478298544883728, + "learning_rate": 4.990370493644774e-06, + "loss": 0.6262, + "step": 1934 + }, + { + "epoch": 0.17664779989045098, + "grad_norm": 0.4831489622592926, + "learning_rate": 4.990359996440193e-06, + "loss": 0.6072, + "step": 1935 + }, + { + "epoch": 0.17673909074310754, + "grad_norm": 0.48159515857696533, + "learning_rate": 4.990349493528236e-06, + "loss": 0.6166, + "step": 1936 + }, + { + "epoch": 0.1768303815957641, + "grad_norm": 0.4281737208366394, + "learning_rate": 4.990338984908927e-06, + "loss": 0.6467, + "step": 1937 + }, + { + "epoch": 0.17692167244842066, + "grad_norm": 0.4913395047187805, + "learning_rate": 4.99032847058229e-06, + "loss": 0.6046, + "step": 1938 + }, + { + "epoch": 0.17701296330107724, + "grad_norm": 0.42846664786338806, + "learning_rate": 4.99031795054835e-06, + "loss": 0.6844, + "step": 1939 + }, + { + "epoch": 0.1771042541537338, + "grad_norm": 0.45981723070144653, + "learning_rate": 4.99030742480713e-06, + "loss": 0.589, + "step": 1940 + }, + { + "epoch": 0.17719554500639037, + "grad_norm": 0.45330727100372314, + "learning_rate": 4.990296893358655e-06, + "loss": 0.6232, + "step": 1941 + }, + { + "epoch": 0.17728683585904692, + "grad_norm": 0.4463844895362854, + "learning_rate": 4.990286356202948e-06, + "loss": 0.6259, + "step": 1942 + }, + { + "epoch": 0.1773781267117035, + "grad_norm": 0.46398067474365234, + "learning_rate": 4.990275813340034e-06, + "loss": 0.6114, + "step": 1943 + }, + { + "epoch": 0.17746941756436005, + "grad_norm": 0.4527031183242798, + "learning_rate": 4.990265264769938e-06, + "loss": 0.626, + "step": 1944 + }, + { + "epoch": 0.17756070841701663, + "grad_norm": 0.4317363202571869, + "learning_rate": 4.9902547104926815e-06, + "loss": 0.6523, + "step": 1945 + }, + { + "epoch": 0.17765199926967318, + "grad_norm": 0.47578826546669006, + "learning_rate": 4.990244150508291e-06, + "loss": 0.6104, + "step": 1946 + }, + { + "epoch": 0.17774329012232976, + "grad_norm": 0.4755452871322632, + "learning_rate": 4.990233584816791e-06, + "loss": 0.6316, + "step": 1947 + }, + { + "epoch": 0.1778345809749863, + "grad_norm": 0.4472620487213135, + "learning_rate": 4.990223013418204e-06, + "loss": 0.6448, + "step": 1948 + }, + { + "epoch": 0.17792587182764286, + "grad_norm": 0.4688396155834198, + "learning_rate": 4.990212436312555e-06, + "loss": 0.6387, + "step": 1949 + }, + { + "epoch": 0.17801716268029943, + "grad_norm": 0.43764132261276245, + "learning_rate": 4.990201853499869e-06, + "loss": 0.6287, + "step": 1950 + }, + { + "epoch": 0.17810845353295598, + "grad_norm": 0.4869897663593292, + "learning_rate": 4.9901912649801685e-06, + "loss": 0.5973, + "step": 1951 + }, + { + "epoch": 0.17819974438561256, + "grad_norm": 0.4781821072101593, + "learning_rate": 4.990180670753479e-06, + "loss": 0.6087, + "step": 1952 + }, + { + "epoch": 0.1782910352382691, + "grad_norm": 0.43917715549468994, + "learning_rate": 4.990170070819824e-06, + "loss": 0.6347, + "step": 1953 + }, + { + "epoch": 0.1783823260909257, + "grad_norm": 0.45861151814460754, + "learning_rate": 4.9901594651792295e-06, + "loss": 0.5882, + "step": 1954 + }, + { + "epoch": 0.17847361694358224, + "grad_norm": 0.44331663846969604, + "learning_rate": 4.990148853831718e-06, + "loss": 0.621, + "step": 1955 + }, + { + "epoch": 0.17856490779623882, + "grad_norm": 0.4776128828525543, + "learning_rate": 4.990138236777315e-06, + "loss": 0.6359, + "step": 1956 + }, + { + "epoch": 0.17865619864889537, + "grad_norm": 0.47968971729278564, + "learning_rate": 4.990127614016043e-06, + "loss": 0.6369, + "step": 1957 + }, + { + "epoch": 0.17874748950155195, + "grad_norm": 0.4764942526817322, + "learning_rate": 4.990116985547929e-06, + "loss": 0.661, + "step": 1958 + }, + { + "epoch": 0.1788387803542085, + "grad_norm": 0.488770455121994, + "learning_rate": 4.990106351372995e-06, + "loss": 0.5913, + "step": 1959 + }, + { + "epoch": 0.17893007120686508, + "grad_norm": 0.4349696636199951, + "learning_rate": 4.990095711491267e-06, + "loss": 0.6165, + "step": 1960 + }, + { + "epoch": 0.17902136205952163, + "grad_norm": 0.46637672185897827, + "learning_rate": 4.990085065902769e-06, + "loss": 0.5959, + "step": 1961 + }, + { + "epoch": 0.1791126529121782, + "grad_norm": 0.47333115339279175, + "learning_rate": 4.990074414607524e-06, + "loss": 0.6098, + "step": 1962 + }, + { + "epoch": 0.17920394376483476, + "grad_norm": 0.4596130847930908, + "learning_rate": 4.990063757605559e-06, + "loss": 0.6457, + "step": 1963 + }, + { + "epoch": 0.17929523461749133, + "grad_norm": 0.5056995749473572, + "learning_rate": 4.990053094896896e-06, + "loss": 0.62, + "step": 1964 + }, + { + "epoch": 0.17938652547014788, + "grad_norm": 0.474452406167984, + "learning_rate": 4.99004242648156e-06, + "loss": 0.5925, + "step": 1965 + }, + { + "epoch": 0.17947781632280446, + "grad_norm": 0.4698892831802368, + "learning_rate": 4.990031752359577e-06, + "loss": 0.6157, + "step": 1966 + }, + { + "epoch": 0.179569107175461, + "grad_norm": 0.49233686923980713, + "learning_rate": 4.99002107253097e-06, + "loss": 0.605, + "step": 1967 + }, + { + "epoch": 0.1796603980281176, + "grad_norm": 0.48025617003440857, + "learning_rate": 4.990010386995764e-06, + "loss": 0.6011, + "step": 1968 + }, + { + "epoch": 0.17975168888077414, + "grad_norm": 0.4825074374675751, + "learning_rate": 4.989999695753983e-06, + "loss": 0.6223, + "step": 1969 + }, + { + "epoch": 0.17984297973343072, + "grad_norm": 0.4482291340827942, + "learning_rate": 4.989988998805652e-06, + "loss": 0.6338, + "step": 1970 + }, + { + "epoch": 0.17993427058608727, + "grad_norm": 0.4639687240123749, + "learning_rate": 4.9899782961507944e-06, + "loss": 0.6248, + "step": 1971 + }, + { + "epoch": 0.18002556143874385, + "grad_norm": 0.4496769607067108, + "learning_rate": 4.989967587789436e-06, + "loss": 0.5973, + "step": 1972 + }, + { + "epoch": 0.1801168522914004, + "grad_norm": 0.46630164980888367, + "learning_rate": 4.989956873721602e-06, + "loss": 0.6373, + "step": 1973 + }, + { + "epoch": 0.18020814314405698, + "grad_norm": 0.45650726556777954, + "learning_rate": 4.989946153947315e-06, + "loss": 0.6416, + "step": 1974 + }, + { + "epoch": 0.18029943399671353, + "grad_norm": 0.4739907681941986, + "learning_rate": 4.9899354284666004e-06, + "loss": 0.649, + "step": 1975 + }, + { + "epoch": 0.1803907248493701, + "grad_norm": 0.47587263584136963, + "learning_rate": 4.989924697279483e-06, + "loss": 0.5972, + "step": 1976 + }, + { + "epoch": 0.18048201570202665, + "grad_norm": 0.47464442253112793, + "learning_rate": 4.989913960385988e-06, + "loss": 0.6232, + "step": 1977 + }, + { + "epoch": 0.18057330655468323, + "grad_norm": 0.47735196352005005, + "learning_rate": 4.989903217786139e-06, + "loss": 0.5796, + "step": 1978 + }, + { + "epoch": 0.18066459740733978, + "grad_norm": 0.4954230785369873, + "learning_rate": 4.989892469479961e-06, + "loss": 0.5714, + "step": 1979 + }, + { + "epoch": 0.18075588825999636, + "grad_norm": 0.49157658219337463, + "learning_rate": 4.9898817154674785e-06, + "loss": 0.6027, + "step": 1980 + }, + { + "epoch": 0.1808471791126529, + "grad_norm": 0.4633271098136902, + "learning_rate": 4.989870955748715e-06, + "loss": 0.6003, + "step": 1981 + }, + { + "epoch": 0.1809384699653095, + "grad_norm": 0.44755950570106506, + "learning_rate": 4.989860190323698e-06, + "loss": 0.6057, + "step": 1982 + }, + { + "epoch": 0.18102976081796604, + "grad_norm": 0.5034279823303223, + "learning_rate": 4.98984941919245e-06, + "loss": 0.5973, + "step": 1983 + }, + { + "epoch": 0.1811210516706226, + "grad_norm": 0.5084779262542725, + "learning_rate": 4.989838642354995e-06, + "loss": 0.6221, + "step": 1984 + }, + { + "epoch": 0.18121234252327917, + "grad_norm": 0.5033756494522095, + "learning_rate": 4.989827859811361e-06, + "loss": 0.575, + "step": 1985 + }, + { + "epoch": 0.18130363337593572, + "grad_norm": 0.4671507775783539, + "learning_rate": 4.989817071561569e-06, + "loss": 0.6115, + "step": 1986 + }, + { + "epoch": 0.1813949242285923, + "grad_norm": 0.4727565050125122, + "learning_rate": 4.989806277605645e-06, + "loss": 0.6159, + "step": 1987 + }, + { + "epoch": 0.18148621508124885, + "grad_norm": 0.4679848849773407, + "learning_rate": 4.9897954779436155e-06, + "loss": 0.571, + "step": 1988 + }, + { + "epoch": 0.18157750593390543, + "grad_norm": 0.46715977787971497, + "learning_rate": 4.989784672575503e-06, + "loss": 0.6442, + "step": 1989 + }, + { + "epoch": 0.18166879678656198, + "grad_norm": 0.4896464943885803, + "learning_rate": 4.989773861501333e-06, + "loss": 0.5792, + "step": 1990 + }, + { + "epoch": 0.18176008763921855, + "grad_norm": 0.465338796377182, + "learning_rate": 4.989763044721131e-06, + "loss": 0.6364, + "step": 1991 + }, + { + "epoch": 0.1818513784918751, + "grad_norm": 0.4850236177444458, + "learning_rate": 4.989752222234921e-06, + "loss": 0.6123, + "step": 1992 + }, + { + "epoch": 0.18194266934453168, + "grad_norm": 0.478457510471344, + "learning_rate": 4.989741394042728e-06, + "loss": 0.6421, + "step": 1993 + }, + { + "epoch": 0.18203396019718823, + "grad_norm": 0.46482670307159424, + "learning_rate": 4.989730560144576e-06, + "loss": 0.6568, + "step": 1994 + }, + { + "epoch": 0.1821252510498448, + "grad_norm": 0.501465380191803, + "learning_rate": 4.989719720540491e-06, + "loss": 0.6019, + "step": 1995 + }, + { + "epoch": 0.18221654190250136, + "grad_norm": 0.5172914862632751, + "learning_rate": 4.989708875230498e-06, + "loss": 0.5714, + "step": 1996 + }, + { + "epoch": 0.18230783275515794, + "grad_norm": 0.4657897353172302, + "learning_rate": 4.989698024214621e-06, + "loss": 0.6078, + "step": 1997 + }, + { + "epoch": 0.1823991236078145, + "grad_norm": 0.4821411073207855, + "learning_rate": 4.989687167492885e-06, + "loss": 0.5997, + "step": 1998 + }, + { + "epoch": 0.18249041446047107, + "grad_norm": 0.5305683016777039, + "learning_rate": 4.989676305065315e-06, + "loss": 0.6135, + "step": 1999 + }, + { + "epoch": 0.18258170531312762, + "grad_norm": 0.44966256618499756, + "learning_rate": 4.989665436931936e-06, + "loss": 0.6291, + "step": 2000 + }, + { + "epoch": 0.1826729961657842, + "grad_norm": 0.46012964844703674, + "learning_rate": 4.9896545630927724e-06, + "loss": 0.64, + "step": 2001 + }, + { + "epoch": 0.18276428701844075, + "grad_norm": 0.4635990262031555, + "learning_rate": 4.9896436835478505e-06, + "loss": 0.6156, + "step": 2002 + }, + { + "epoch": 0.18285557787109732, + "grad_norm": 0.48942670226097107, + "learning_rate": 4.989632798297194e-06, + "loss": 0.5895, + "step": 2003 + }, + { + "epoch": 0.18294686872375387, + "grad_norm": 0.4616686999797821, + "learning_rate": 4.989621907340828e-06, + "loss": 0.6133, + "step": 2004 + }, + { + "epoch": 0.18303815957641045, + "grad_norm": 0.4746715724468231, + "learning_rate": 4.989611010678778e-06, + "loss": 0.6037, + "step": 2005 + }, + { + "epoch": 0.183129450429067, + "grad_norm": 0.4602739214897156, + "learning_rate": 4.989600108311068e-06, + "loss": 0.6434, + "step": 2006 + }, + { + "epoch": 0.18322074128172358, + "grad_norm": 0.4522697627544403, + "learning_rate": 4.9895892002377234e-06, + "loss": 0.6322, + "step": 2007 + }, + { + "epoch": 0.18331203213438013, + "grad_norm": 0.49896469712257385, + "learning_rate": 4.98957828645877e-06, + "loss": 0.5997, + "step": 2008 + }, + { + "epoch": 0.1834033229870367, + "grad_norm": 0.47255197167396545, + "learning_rate": 4.989567366974232e-06, + "loss": 0.6295, + "step": 2009 + }, + { + "epoch": 0.18349461383969326, + "grad_norm": 0.4954518675804138, + "learning_rate": 4.989556441784134e-06, + "loss": 0.6125, + "step": 2010 + }, + { + "epoch": 0.18358590469234984, + "grad_norm": 0.4607478678226471, + "learning_rate": 4.989545510888502e-06, + "loss": 0.6311, + "step": 2011 + }, + { + "epoch": 0.1836771955450064, + "grad_norm": 0.45442914962768555, + "learning_rate": 4.9895345742873615e-06, + "loss": 0.6294, + "step": 2012 + }, + { + "epoch": 0.18376848639766297, + "grad_norm": 0.47014033794403076, + "learning_rate": 4.989523631980736e-06, + "loss": 0.6214, + "step": 2013 + }, + { + "epoch": 0.18385977725031952, + "grad_norm": 0.4531923532485962, + "learning_rate": 4.989512683968651e-06, + "loss": 0.626, + "step": 2014 + }, + { + "epoch": 0.1839510681029761, + "grad_norm": 0.444436639547348, + "learning_rate": 4.989501730251133e-06, + "loss": 0.6122, + "step": 2015 + }, + { + "epoch": 0.18404235895563265, + "grad_norm": 0.46515944600105286, + "learning_rate": 4.989490770828205e-06, + "loss": 0.6132, + "step": 2016 + }, + { + "epoch": 0.1841336498082892, + "grad_norm": 0.467538058757782, + "learning_rate": 4.989479805699894e-06, + "loss": 0.6305, + "step": 2017 + }, + { + "epoch": 0.18422494066094577, + "grad_norm": 0.4714435935020447, + "learning_rate": 4.989468834866224e-06, + "loss": 0.6265, + "step": 2018 + }, + { + "epoch": 0.18431623151360232, + "grad_norm": 0.42835354804992676, + "learning_rate": 4.9894578583272195e-06, + "loss": 0.6559, + "step": 2019 + }, + { + "epoch": 0.1844075223662589, + "grad_norm": 0.45113182067871094, + "learning_rate": 4.989446876082907e-06, + "loss": 0.6553, + "step": 2020 + }, + { + "epoch": 0.18449881321891545, + "grad_norm": 0.45193085074424744, + "learning_rate": 4.989435888133312e-06, + "loss": 0.6229, + "step": 2021 + }, + { + "epoch": 0.18459010407157203, + "grad_norm": 0.4413308799266815, + "learning_rate": 4.989424894478458e-06, + "loss": 0.616, + "step": 2022 + }, + { + "epoch": 0.18468139492422858, + "grad_norm": 0.5271345973014832, + "learning_rate": 4.989413895118372e-06, + "loss": 0.6147, + "step": 2023 + }, + { + "epoch": 0.18477268577688516, + "grad_norm": 0.49792635440826416, + "learning_rate": 4.989402890053078e-06, + "loss": 0.6162, + "step": 2024 + }, + { + "epoch": 0.1848639766295417, + "grad_norm": 0.4427639842033386, + "learning_rate": 4.9893918792826014e-06, + "loss": 0.6318, + "step": 2025 + }, + { + "epoch": 0.1849552674821983, + "grad_norm": 0.4754314720630646, + "learning_rate": 4.989380862806968e-06, + "loss": 0.6312, + "step": 2026 + }, + { + "epoch": 0.18504655833485484, + "grad_norm": 0.4572851359844208, + "learning_rate": 4.989369840626201e-06, + "loss": 0.5759, + "step": 2027 + }, + { + "epoch": 0.18513784918751142, + "grad_norm": 0.45087698101997375, + "learning_rate": 4.989358812740329e-06, + "loss": 0.6234, + "step": 2028 + }, + { + "epoch": 0.18522914004016797, + "grad_norm": 0.4623314440250397, + "learning_rate": 4.9893477791493756e-06, + "loss": 0.6144, + "step": 2029 + }, + { + "epoch": 0.18532043089282454, + "grad_norm": 0.4541591703891754, + "learning_rate": 4.9893367398533656e-06, + "loss": 0.6122, + "step": 2030 + }, + { + "epoch": 0.1854117217454811, + "grad_norm": 0.43251603841781616, + "learning_rate": 4.989325694852325e-06, + "loss": 0.6605, + "step": 2031 + }, + { + "epoch": 0.18550301259813767, + "grad_norm": 0.47077348828315735, + "learning_rate": 4.989314644146278e-06, + "loss": 0.5904, + "step": 2032 + }, + { + "epoch": 0.18559430345079422, + "grad_norm": 0.4711674153804779, + "learning_rate": 4.989303587735251e-06, + "loss": 0.5881, + "step": 2033 + }, + { + "epoch": 0.1856855943034508, + "grad_norm": 0.4914506673812866, + "learning_rate": 4.9892925256192706e-06, + "loss": 0.6325, + "step": 2034 + }, + { + "epoch": 0.18577688515610735, + "grad_norm": 0.5227639675140381, + "learning_rate": 4.98928145779836e-06, + "loss": 0.6, + "step": 2035 + }, + { + "epoch": 0.18586817600876393, + "grad_norm": 0.473072350025177, + "learning_rate": 4.989270384272545e-06, + "loss": 0.6595, + "step": 2036 + }, + { + "epoch": 0.18595946686142048, + "grad_norm": 0.4894888699054718, + "learning_rate": 4.98925930504185e-06, + "loss": 0.5912, + "step": 2037 + }, + { + "epoch": 0.18605075771407706, + "grad_norm": 0.48220425844192505, + "learning_rate": 4.989248220106303e-06, + "loss": 0.6149, + "step": 2038 + }, + { + "epoch": 0.1861420485667336, + "grad_norm": 0.5086258053779602, + "learning_rate": 4.989237129465927e-06, + "loss": 0.5968, + "step": 2039 + }, + { + "epoch": 0.1862333394193902, + "grad_norm": 0.46099579334259033, + "learning_rate": 4.98922603312075e-06, + "loss": 0.5675, + "step": 2040 + }, + { + "epoch": 0.18632463027204674, + "grad_norm": 0.5052932500839233, + "learning_rate": 4.989214931070795e-06, + "loss": 0.5932, + "step": 2041 + }, + { + "epoch": 0.18641592112470332, + "grad_norm": 0.4433305561542511, + "learning_rate": 4.989203823316088e-06, + "loss": 0.584, + "step": 2042 + }, + { + "epoch": 0.18650721197735987, + "grad_norm": 0.4788001775741577, + "learning_rate": 4.989192709856655e-06, + "loss": 0.6172, + "step": 2043 + }, + { + "epoch": 0.18659850283001644, + "grad_norm": 0.4783608615398407, + "learning_rate": 4.989181590692521e-06, + "loss": 0.6381, + "step": 2044 + }, + { + "epoch": 0.186689793682673, + "grad_norm": 0.49077993631362915, + "learning_rate": 4.989170465823713e-06, + "loss": 0.6397, + "step": 2045 + }, + { + "epoch": 0.18678108453532957, + "grad_norm": 0.5179793834686279, + "learning_rate": 4.989159335250253e-06, + "loss": 0.6318, + "step": 2046 + }, + { + "epoch": 0.18687237538798612, + "grad_norm": 0.4771205186843872, + "learning_rate": 4.989148198972171e-06, + "loss": 0.6428, + "step": 2047 + }, + { + "epoch": 0.1869636662406427, + "grad_norm": 0.5082806944847107, + "learning_rate": 4.9891370569894895e-06, + "loss": 0.6071, + "step": 2048 + }, + { + "epoch": 0.18705495709329925, + "grad_norm": 0.43049269914627075, + "learning_rate": 4.989125909302234e-06, + "loss": 0.6386, + "step": 2049 + }, + { + "epoch": 0.1871462479459558, + "grad_norm": 0.44470763206481934, + "learning_rate": 4.989114755910431e-06, + "loss": 0.5901, + "step": 2050 + }, + { + "epoch": 0.18723753879861238, + "grad_norm": 0.4747295081615448, + "learning_rate": 4.989103596814106e-06, + "loss": 0.6066, + "step": 2051 + }, + { + "epoch": 0.18732882965126893, + "grad_norm": 0.4601932168006897, + "learning_rate": 4.989092432013285e-06, + "loss": 0.6353, + "step": 2052 + }, + { + "epoch": 0.1874201205039255, + "grad_norm": 0.46776866912841797, + "learning_rate": 4.9890812615079935e-06, + "loss": 0.6422, + "step": 2053 + }, + { + "epoch": 0.18751141135658206, + "grad_norm": 0.4517160654067993, + "learning_rate": 4.9890700852982556e-06, + "loss": 0.6058, + "step": 2054 + }, + { + "epoch": 0.18760270220923864, + "grad_norm": 0.43257632851600647, + "learning_rate": 4.989058903384098e-06, + "loss": 0.6269, + "step": 2055 + }, + { + "epoch": 0.1876939930618952, + "grad_norm": 0.502618134021759, + "learning_rate": 4.989047715765547e-06, + "loss": 0.5861, + "step": 2056 + }, + { + "epoch": 0.18778528391455176, + "grad_norm": 0.5160783529281616, + "learning_rate": 4.9890365224426265e-06, + "loss": 0.6127, + "step": 2057 + }, + { + "epoch": 0.18787657476720832, + "grad_norm": 0.4514101445674896, + "learning_rate": 4.989025323415364e-06, + "loss": 0.648, + "step": 2058 + }, + { + "epoch": 0.1879678656198649, + "grad_norm": 0.4613990783691406, + "learning_rate": 4.989014118683784e-06, + "loss": 0.6246, + "step": 2059 + }, + { + "epoch": 0.18805915647252144, + "grad_norm": 0.4751376807689667, + "learning_rate": 4.989002908247913e-06, + "loss": 0.6178, + "step": 2060 + }, + { + "epoch": 0.18815044732517802, + "grad_norm": 0.4746283292770386, + "learning_rate": 4.9889916921077756e-06, + "loss": 0.5702, + "step": 2061 + }, + { + "epoch": 0.18824173817783457, + "grad_norm": 0.5045527815818787, + "learning_rate": 4.988980470263399e-06, + "loss": 0.5938, + "step": 2062 + }, + { + "epoch": 0.18833302903049115, + "grad_norm": 0.4935254752635956, + "learning_rate": 4.988969242714807e-06, + "loss": 0.6112, + "step": 2063 + }, + { + "epoch": 0.1884243198831477, + "grad_norm": 0.42991581559181213, + "learning_rate": 4.988958009462027e-06, + "loss": 0.6171, + "step": 2064 + }, + { + "epoch": 0.18851561073580428, + "grad_norm": 0.4889358580112457, + "learning_rate": 4.988946770505084e-06, + "loss": 0.6129, + "step": 2065 + }, + { + "epoch": 0.18860690158846083, + "grad_norm": 0.47880277037620544, + "learning_rate": 4.988935525844005e-06, + "loss": 0.6599, + "step": 2066 + }, + { + "epoch": 0.1886981924411174, + "grad_norm": 0.4569988548755646, + "learning_rate": 4.988924275478812e-06, + "loss": 0.642, + "step": 2067 + }, + { + "epoch": 0.18878948329377396, + "grad_norm": 0.5033777356147766, + "learning_rate": 4.988913019409535e-06, + "loss": 0.6273, + "step": 2068 + }, + { + "epoch": 0.18888077414643054, + "grad_norm": 0.43015289306640625, + "learning_rate": 4.988901757636199e-06, + "loss": 0.6188, + "step": 2069 + }, + { + "epoch": 0.18897206499908709, + "grad_norm": 0.468392938375473, + "learning_rate": 4.988890490158828e-06, + "loss": 0.6218, + "step": 2070 + }, + { + "epoch": 0.18906335585174366, + "grad_norm": 0.4760860204696655, + "learning_rate": 4.988879216977449e-06, + "loss": 0.61, + "step": 2071 + }, + { + "epoch": 0.18915464670440021, + "grad_norm": 0.475024551153183, + "learning_rate": 4.988867938092087e-06, + "loss": 0.6145, + "step": 2072 + }, + { + "epoch": 0.1892459375570568, + "grad_norm": 0.4553416669368744, + "learning_rate": 4.988856653502769e-06, + "loss": 0.605, + "step": 2073 + }, + { + "epoch": 0.18933722840971334, + "grad_norm": 0.4482528865337372, + "learning_rate": 4.988845363209521e-06, + "loss": 0.572, + "step": 2074 + }, + { + "epoch": 0.18942851926236992, + "grad_norm": 0.4930385947227478, + "learning_rate": 4.988834067212368e-06, + "loss": 0.5752, + "step": 2075 + }, + { + "epoch": 0.18951981011502647, + "grad_norm": 0.49522194266319275, + "learning_rate": 4.988822765511337e-06, + "loss": 0.6104, + "step": 2076 + }, + { + "epoch": 0.18961110096768305, + "grad_norm": 0.44486168026924133, + "learning_rate": 4.988811458106452e-06, + "loss": 0.6143, + "step": 2077 + }, + { + "epoch": 0.1897023918203396, + "grad_norm": 0.4483310878276825, + "learning_rate": 4.98880014499774e-06, + "loss": 0.6231, + "step": 2078 + }, + { + "epoch": 0.18979368267299618, + "grad_norm": 0.47309181094169617, + "learning_rate": 4.988788826185227e-06, + "loss": 0.5894, + "step": 2079 + }, + { + "epoch": 0.18988497352565273, + "grad_norm": 0.4456103444099426, + "learning_rate": 4.988777501668939e-06, + "loss": 0.6193, + "step": 2080 + }, + { + "epoch": 0.1899762643783093, + "grad_norm": 0.47491616010665894, + "learning_rate": 4.988766171448902e-06, + "loss": 0.5986, + "step": 2081 + }, + { + "epoch": 0.19006755523096586, + "grad_norm": 0.4654223918914795, + "learning_rate": 4.988754835525141e-06, + "loss": 0.5941, + "step": 2082 + }, + { + "epoch": 0.19015884608362243, + "grad_norm": 0.49841272830963135, + "learning_rate": 4.988743493897684e-06, + "loss": 0.6147, + "step": 2083 + }, + { + "epoch": 0.19025013693627899, + "grad_norm": 0.45189425349235535, + "learning_rate": 4.988732146566555e-06, + "loss": 0.6429, + "step": 2084 + }, + { + "epoch": 0.19034142778893554, + "grad_norm": 0.4647059142589569, + "learning_rate": 4.9887207935317805e-06, + "loss": 0.5837, + "step": 2085 + }, + { + "epoch": 0.1904327186415921, + "grad_norm": 0.46857911348342896, + "learning_rate": 4.988709434793387e-06, + "loss": 0.6509, + "step": 2086 + }, + { + "epoch": 0.19052400949424866, + "grad_norm": 0.45139431953430176, + "learning_rate": 4.988698070351401e-06, + "loss": 0.619, + "step": 2087 + }, + { + "epoch": 0.19061530034690524, + "grad_norm": 0.5126699805259705, + "learning_rate": 4.988686700205847e-06, + "loss": 0.6079, + "step": 2088 + }, + { + "epoch": 0.1907065911995618, + "grad_norm": 0.45233163237571716, + "learning_rate": 4.988675324356752e-06, + "loss": 0.6592, + "step": 2089 + }, + { + "epoch": 0.19079788205221837, + "grad_norm": 0.47542068362236023, + "learning_rate": 4.988663942804143e-06, + "loss": 0.5839, + "step": 2090 + }, + { + "epoch": 0.19088917290487492, + "grad_norm": 0.46675941348075867, + "learning_rate": 4.988652555548044e-06, + "loss": 0.6192, + "step": 2091 + }, + { + "epoch": 0.1909804637575315, + "grad_norm": 0.43993380665779114, + "learning_rate": 4.988641162588482e-06, + "loss": 0.6335, + "step": 2092 + }, + { + "epoch": 0.19107175461018805, + "grad_norm": 0.47965219616889954, + "learning_rate": 4.988629763925483e-06, + "loss": 0.5959, + "step": 2093 + }, + { + "epoch": 0.19116304546284463, + "grad_norm": 0.47348636388778687, + "learning_rate": 4.988618359559074e-06, + "loss": 0.5754, + "step": 2094 + }, + { + "epoch": 0.19125433631550118, + "grad_norm": 0.4638667106628418, + "learning_rate": 4.988606949489281e-06, + "loss": 0.5724, + "step": 2095 + }, + { + "epoch": 0.19134562716815776, + "grad_norm": 0.4808835983276367, + "learning_rate": 4.988595533716129e-06, + "loss": 0.6078, + "step": 2096 + }, + { + "epoch": 0.1914369180208143, + "grad_norm": 0.4643343687057495, + "learning_rate": 4.988584112239645e-06, + "loss": 0.6299, + "step": 2097 + }, + { + "epoch": 0.19152820887347088, + "grad_norm": 0.5079434514045715, + "learning_rate": 4.988572685059855e-06, + "loss": 0.5736, + "step": 2098 + }, + { + "epoch": 0.19161949972612743, + "grad_norm": 0.45968496799468994, + "learning_rate": 4.9885612521767855e-06, + "loss": 0.6046, + "step": 2099 + }, + { + "epoch": 0.191710790578784, + "grad_norm": 0.46687453985214233, + "learning_rate": 4.988549813590462e-06, + "loss": 0.6253, + "step": 2100 + }, + { + "epoch": 0.19180208143144056, + "grad_norm": 0.47866323590278625, + "learning_rate": 4.988538369300911e-06, + "loss": 0.6225, + "step": 2101 + }, + { + "epoch": 0.19189337228409714, + "grad_norm": 0.45322614908218384, + "learning_rate": 4.98852691930816e-06, + "loss": 0.6293, + "step": 2102 + }, + { + "epoch": 0.1919846631367537, + "grad_norm": 0.4651934802532196, + "learning_rate": 4.9885154636122325e-06, + "loss": 0.576, + "step": 2103 + }, + { + "epoch": 0.19207595398941027, + "grad_norm": 0.48211994767189026, + "learning_rate": 4.988504002213157e-06, + "loss": 0.5867, + "step": 2104 + }, + { + "epoch": 0.19216724484206682, + "grad_norm": 0.4629667103290558, + "learning_rate": 4.988492535110959e-06, + "loss": 0.6384, + "step": 2105 + }, + { + "epoch": 0.1922585356947234, + "grad_norm": 0.4638424515724182, + "learning_rate": 4.988481062305665e-06, + "loss": 0.6429, + "step": 2106 + }, + { + "epoch": 0.19234982654737995, + "grad_norm": 0.46841806173324585, + "learning_rate": 4.988469583797302e-06, + "loss": 0.6188, + "step": 2107 + }, + { + "epoch": 0.19244111740003653, + "grad_norm": 0.47086870670318604, + "learning_rate": 4.988458099585895e-06, + "loss": 0.6331, + "step": 2108 + }, + { + "epoch": 0.19253240825269308, + "grad_norm": 0.45484018325805664, + "learning_rate": 4.9884466096714705e-06, + "loss": 0.6376, + "step": 2109 + }, + { + "epoch": 0.19262369910534966, + "grad_norm": 0.4439411461353302, + "learning_rate": 4.988435114054055e-06, + "loss": 0.6226, + "step": 2110 + }, + { + "epoch": 0.1927149899580062, + "grad_norm": 0.45686307549476624, + "learning_rate": 4.988423612733675e-06, + "loss": 0.6032, + "step": 2111 + }, + { + "epoch": 0.19280628081066278, + "grad_norm": 0.4656987488269806, + "learning_rate": 4.988412105710357e-06, + "loss": 0.6426, + "step": 2112 + }, + { + "epoch": 0.19289757166331933, + "grad_norm": 0.4779198467731476, + "learning_rate": 4.988400592984127e-06, + "loss": 0.6091, + "step": 2113 + }, + { + "epoch": 0.1929888625159759, + "grad_norm": 0.455575168132782, + "learning_rate": 4.988389074555012e-06, + "loss": 0.6214, + "step": 2114 + }, + { + "epoch": 0.19308015336863246, + "grad_norm": 0.44416743516921997, + "learning_rate": 4.9883775504230375e-06, + "loss": 0.6013, + "step": 2115 + }, + { + "epoch": 0.19317144422128904, + "grad_norm": 0.440810889005661, + "learning_rate": 4.988366020588231e-06, + "loss": 0.5937, + "step": 2116 + }, + { + "epoch": 0.1932627350739456, + "grad_norm": 0.45785191655158997, + "learning_rate": 4.988354485050618e-06, + "loss": 0.622, + "step": 2117 + }, + { + "epoch": 0.19335402592660214, + "grad_norm": 0.4763910174369812, + "learning_rate": 4.988342943810225e-06, + "loss": 0.6427, + "step": 2118 + }, + { + "epoch": 0.19344531677925872, + "grad_norm": 0.4562667906284332, + "learning_rate": 4.988331396867079e-06, + "loss": 0.6243, + "step": 2119 + }, + { + "epoch": 0.19353660763191527, + "grad_norm": 0.48397096991539, + "learning_rate": 4.988319844221207e-06, + "loss": 0.6261, + "step": 2120 + }, + { + "epoch": 0.19362789848457185, + "grad_norm": 0.4804252088069916, + "learning_rate": 4.988308285872633e-06, + "loss": 0.612, + "step": 2121 + }, + { + "epoch": 0.1937191893372284, + "grad_norm": 0.4673190712928772, + "learning_rate": 4.988296721821386e-06, + "loss": 0.6127, + "step": 2122 + }, + { + "epoch": 0.19381048018988498, + "grad_norm": 0.45267578959465027, + "learning_rate": 4.988285152067492e-06, + "loss": 0.6185, + "step": 2123 + }, + { + "epoch": 0.19390177104254153, + "grad_norm": 0.46288877725601196, + "learning_rate": 4.988273576610977e-06, + "loss": 0.6114, + "step": 2124 + }, + { + "epoch": 0.1939930618951981, + "grad_norm": 0.4848719835281372, + "learning_rate": 4.988261995451867e-06, + "loss": 0.6197, + "step": 2125 + }, + { + "epoch": 0.19408435274785465, + "grad_norm": 0.44333234429359436, + "learning_rate": 4.98825040859019e-06, + "loss": 0.6254, + "step": 2126 + }, + { + "epoch": 0.19417564360051123, + "grad_norm": 0.49472153186798096, + "learning_rate": 4.988238816025972e-06, + "loss": 0.6252, + "step": 2127 + }, + { + "epoch": 0.19426693445316778, + "grad_norm": 0.48586079478263855, + "learning_rate": 4.988227217759239e-06, + "loss": 0.6127, + "step": 2128 + }, + { + "epoch": 0.19435822530582436, + "grad_norm": 0.46182793378829956, + "learning_rate": 4.988215613790018e-06, + "loss": 0.6203, + "step": 2129 + }, + { + "epoch": 0.1944495161584809, + "grad_norm": 0.45585671067237854, + "learning_rate": 4.988204004118335e-06, + "loss": 0.5783, + "step": 2130 + }, + { + "epoch": 0.1945408070111375, + "grad_norm": 0.4572851359844208, + "learning_rate": 4.988192388744217e-06, + "loss": 0.6116, + "step": 2131 + }, + { + "epoch": 0.19463209786379404, + "grad_norm": 0.4618508219718933, + "learning_rate": 4.988180767667692e-06, + "loss": 0.6251, + "step": 2132 + }, + { + "epoch": 0.19472338871645062, + "grad_norm": 0.4885331690311432, + "learning_rate": 4.988169140888784e-06, + "loss": 0.6099, + "step": 2133 + }, + { + "epoch": 0.19481467956910717, + "grad_norm": 0.46847963333129883, + "learning_rate": 4.988157508407523e-06, + "loss": 0.6167, + "step": 2134 + }, + { + "epoch": 0.19490597042176375, + "grad_norm": 0.46366557478904724, + "learning_rate": 4.988145870223932e-06, + "loss": 0.6173, + "step": 2135 + }, + { + "epoch": 0.1949972612744203, + "grad_norm": 0.45790761709213257, + "learning_rate": 4.98813422633804e-06, + "loss": 0.5982, + "step": 2136 + }, + { + "epoch": 0.19508855212707688, + "grad_norm": 0.4338397681713104, + "learning_rate": 4.9881225767498735e-06, + "loss": 0.6357, + "step": 2137 + }, + { + "epoch": 0.19517984297973343, + "grad_norm": 0.475614994764328, + "learning_rate": 4.988110921459458e-06, + "loss": 0.6169, + "step": 2138 + }, + { + "epoch": 0.19527113383239, + "grad_norm": 0.47536149621009827, + "learning_rate": 4.98809926046682e-06, + "loss": 0.6014, + "step": 2139 + }, + { + "epoch": 0.19536242468504655, + "grad_norm": 0.47896501421928406, + "learning_rate": 4.988087593771988e-06, + "loss": 0.632, + "step": 2140 + }, + { + "epoch": 0.19545371553770313, + "grad_norm": 0.4981900155544281, + "learning_rate": 4.988075921374989e-06, + "loss": 0.5967, + "step": 2141 + }, + { + "epoch": 0.19554500639035968, + "grad_norm": 0.45395395159721375, + "learning_rate": 4.988064243275848e-06, + "loss": 0.6345, + "step": 2142 + }, + { + "epoch": 0.19563629724301626, + "grad_norm": 0.4826612174510956, + "learning_rate": 4.988052559474592e-06, + "loss": 0.6183, + "step": 2143 + }, + { + "epoch": 0.1957275880956728, + "grad_norm": 0.4519719183444977, + "learning_rate": 4.988040869971249e-06, + "loss": 0.6456, + "step": 2144 + }, + { + "epoch": 0.1958188789483294, + "grad_norm": 0.48017460107803345, + "learning_rate": 4.9880291747658445e-06, + "loss": 0.6111, + "step": 2145 + }, + { + "epoch": 0.19591016980098594, + "grad_norm": 0.515299379825592, + "learning_rate": 4.988017473858407e-06, + "loss": 0.588, + "step": 2146 + }, + { + "epoch": 0.19600146065364252, + "grad_norm": 0.4408838748931885, + "learning_rate": 4.9880057672489606e-06, + "loss": 0.6158, + "step": 2147 + }, + { + "epoch": 0.19609275150629907, + "grad_norm": 0.45128926634788513, + "learning_rate": 4.9879940549375335e-06, + "loss": 0.5755, + "step": 2148 + }, + { + "epoch": 0.19618404235895565, + "grad_norm": 0.48204106092453003, + "learning_rate": 4.987982336924153e-06, + "loss": 0.6125, + "step": 2149 + }, + { + "epoch": 0.1962753332116122, + "grad_norm": 0.5043826103210449, + "learning_rate": 4.9879706132088465e-06, + "loss": 0.626, + "step": 2150 + }, + { + "epoch": 0.19636662406426875, + "grad_norm": 0.48323020339012146, + "learning_rate": 4.987958883791639e-06, + "loss": 0.6451, + "step": 2151 + }, + { + "epoch": 0.19645791491692532, + "grad_norm": 0.4478207528591156, + "learning_rate": 4.987947148672559e-06, + "loss": 0.621, + "step": 2152 + }, + { + "epoch": 0.19654920576958188, + "grad_norm": 0.47531214356422424, + "learning_rate": 4.987935407851633e-06, + "loss": 0.6121, + "step": 2153 + }, + { + "epoch": 0.19664049662223845, + "grad_norm": 0.48037829995155334, + "learning_rate": 4.987923661328887e-06, + "loss": 0.5939, + "step": 2154 + }, + { + "epoch": 0.196731787474895, + "grad_norm": 0.4725981652736664, + "learning_rate": 4.987911909104349e-06, + "loss": 0.637, + "step": 2155 + }, + { + "epoch": 0.19682307832755158, + "grad_norm": 0.5312232971191406, + "learning_rate": 4.987900151178046e-06, + "loss": 0.6238, + "step": 2156 + }, + { + "epoch": 0.19691436918020813, + "grad_norm": 0.4802699089050293, + "learning_rate": 4.9878883875500035e-06, + "loss": 0.6216, + "step": 2157 + }, + { + "epoch": 0.1970056600328647, + "grad_norm": 0.42026278376579285, + "learning_rate": 4.98787661822025e-06, + "loss": 0.6243, + "step": 2158 + }, + { + "epoch": 0.19709695088552126, + "grad_norm": 0.5047383308410645, + "learning_rate": 4.987864843188812e-06, + "loss": 0.5997, + "step": 2159 + }, + { + "epoch": 0.19718824173817784, + "grad_norm": 0.49453067779541016, + "learning_rate": 4.987853062455717e-06, + "loss": 0.592, + "step": 2160 + }, + { + "epoch": 0.1972795325908344, + "grad_norm": 0.5323103070259094, + "learning_rate": 4.9878412760209905e-06, + "loss": 0.5943, + "step": 2161 + }, + { + "epoch": 0.19737082344349097, + "grad_norm": 0.466146320104599, + "learning_rate": 4.987829483884661e-06, + "loss": 0.6065, + "step": 2162 + }, + { + "epoch": 0.19746211429614752, + "grad_norm": 0.43903565406799316, + "learning_rate": 4.9878176860467544e-06, + "loss": 0.6299, + "step": 2163 + }, + { + "epoch": 0.1975534051488041, + "grad_norm": 0.4564101994037628, + "learning_rate": 4.987805882507299e-06, + "loss": 0.5956, + "step": 2164 + }, + { + "epoch": 0.19764469600146065, + "grad_norm": 0.4563184976577759, + "learning_rate": 4.987794073266321e-06, + "loss": 0.5988, + "step": 2165 + }, + { + "epoch": 0.19773598685411722, + "grad_norm": 0.43411415815353394, + "learning_rate": 4.987782258323847e-06, + "loss": 0.6475, + "step": 2166 + }, + { + "epoch": 0.19782727770677377, + "grad_norm": 0.46514660120010376, + "learning_rate": 4.987770437679905e-06, + "loss": 0.6122, + "step": 2167 + }, + { + "epoch": 0.19791856855943035, + "grad_norm": 0.46153032779693604, + "learning_rate": 4.987758611334522e-06, + "loss": 0.6092, + "step": 2168 + }, + { + "epoch": 0.1980098594120869, + "grad_norm": 0.4928129017353058, + "learning_rate": 4.9877467792877245e-06, + "loss": 0.6316, + "step": 2169 + }, + { + "epoch": 0.19810115026474348, + "grad_norm": 0.45968201756477356, + "learning_rate": 4.987734941539541e-06, + "loss": 0.6043, + "step": 2170 + }, + { + "epoch": 0.19819244111740003, + "grad_norm": 0.49846151471138, + "learning_rate": 4.987723098089996e-06, + "loss": 0.6067, + "step": 2171 + }, + { + "epoch": 0.1982837319700566, + "grad_norm": 0.45652979612350464, + "learning_rate": 4.98771124893912e-06, + "loss": 0.6264, + "step": 2172 + }, + { + "epoch": 0.19837502282271316, + "grad_norm": 0.4911973476409912, + "learning_rate": 4.987699394086938e-06, + "loss": 0.6177, + "step": 2173 + }, + { + "epoch": 0.19846631367536974, + "grad_norm": 0.4950370788574219, + "learning_rate": 4.987687533533476e-06, + "loss": 0.6307, + "step": 2174 + }, + { + "epoch": 0.1985576045280263, + "grad_norm": 0.4700334370136261, + "learning_rate": 4.987675667278764e-06, + "loss": 0.6224, + "step": 2175 + }, + { + "epoch": 0.19864889538068287, + "grad_norm": 0.46540704369544983, + "learning_rate": 4.987663795322828e-06, + "loss": 0.6349, + "step": 2176 + }, + { + "epoch": 0.19874018623333942, + "grad_norm": 0.481821745634079, + "learning_rate": 4.9876519176656956e-06, + "loss": 0.5775, + "step": 2177 + }, + { + "epoch": 0.198831477085996, + "grad_norm": 0.44496434926986694, + "learning_rate": 4.987640034307393e-06, + "loss": 0.647, + "step": 2178 + }, + { + "epoch": 0.19892276793865254, + "grad_norm": 0.4507770836353302, + "learning_rate": 4.987628145247948e-06, + "loss": 0.6267, + "step": 2179 + }, + { + "epoch": 0.19901405879130912, + "grad_norm": 0.45230644941329956, + "learning_rate": 4.9876162504873885e-06, + "loss": 0.6234, + "step": 2180 + }, + { + "epoch": 0.19910534964396567, + "grad_norm": 0.5137555003166199, + "learning_rate": 4.987604350025741e-06, + "loss": 0.6367, + "step": 2181 + }, + { + "epoch": 0.19919664049662225, + "grad_norm": 0.5004298686981201, + "learning_rate": 4.987592443863032e-06, + "loss": 0.6003, + "step": 2182 + }, + { + "epoch": 0.1992879313492788, + "grad_norm": 0.4556944668292999, + "learning_rate": 4.987580531999291e-06, + "loss": 0.6104, + "step": 2183 + }, + { + "epoch": 0.19937922220193538, + "grad_norm": 0.45007064938545227, + "learning_rate": 4.987568614434543e-06, + "loss": 0.6092, + "step": 2184 + }, + { + "epoch": 0.19947051305459193, + "grad_norm": 0.46115052700042725, + "learning_rate": 4.987556691168817e-06, + "loss": 0.5997, + "step": 2185 + }, + { + "epoch": 0.19956180390724848, + "grad_norm": 0.43931376934051514, + "learning_rate": 4.987544762202139e-06, + "loss": 0.649, + "step": 2186 + }, + { + "epoch": 0.19965309475990506, + "grad_norm": 0.45384860038757324, + "learning_rate": 4.987532827534537e-06, + "loss": 0.6063, + "step": 2187 + }, + { + "epoch": 0.1997443856125616, + "grad_norm": 0.49424251914024353, + "learning_rate": 4.987520887166039e-06, + "loss": 0.5856, + "step": 2188 + }, + { + "epoch": 0.1998356764652182, + "grad_norm": 0.4685875177383423, + "learning_rate": 4.987508941096671e-06, + "loss": 0.6204, + "step": 2189 + }, + { + "epoch": 0.19992696731787474, + "grad_norm": 0.4693409204483032, + "learning_rate": 4.987496989326462e-06, + "loss": 0.6661, + "step": 2190 + }, + { + "epoch": 0.20001825817053132, + "grad_norm": 0.44216176867485046, + "learning_rate": 4.987485031855438e-06, + "loss": 0.6533, + "step": 2191 + }, + { + "epoch": 0.20010954902318787, + "grad_norm": 0.45043137669563293, + "learning_rate": 4.987473068683626e-06, + "loss": 0.6441, + "step": 2192 + }, + { + "epoch": 0.20020083987584444, + "grad_norm": 0.4829705059528351, + "learning_rate": 4.987461099811056e-06, + "loss": 0.5861, + "step": 2193 + }, + { + "epoch": 0.200292130728501, + "grad_norm": 0.4558354616165161, + "learning_rate": 4.987449125237752e-06, + "loss": 0.598, + "step": 2194 + }, + { + "epoch": 0.20038342158115757, + "grad_norm": 0.46277421712875366, + "learning_rate": 4.987437144963745e-06, + "loss": 0.6078, + "step": 2195 + }, + { + "epoch": 0.20047471243381412, + "grad_norm": 0.4883221387863159, + "learning_rate": 4.987425158989059e-06, + "loss": 0.6152, + "step": 2196 + }, + { + "epoch": 0.2005660032864707, + "grad_norm": 0.47993913292884827, + "learning_rate": 4.987413167313724e-06, + "loss": 0.6139, + "step": 2197 + }, + { + "epoch": 0.20065729413912725, + "grad_norm": 0.4640604257583618, + "learning_rate": 4.987401169937766e-06, + "loss": 0.6389, + "step": 2198 + }, + { + "epoch": 0.20074858499178383, + "grad_norm": 0.47599083185195923, + "learning_rate": 4.987389166861213e-06, + "loss": 0.6134, + "step": 2199 + }, + { + "epoch": 0.20083987584444038, + "grad_norm": 0.45747217535972595, + "learning_rate": 4.987377158084093e-06, + "loss": 0.635, + "step": 2200 + }, + { + "epoch": 0.20093116669709696, + "grad_norm": 0.486750990152359, + "learning_rate": 4.9873651436064326e-06, + "loss": 0.6208, + "step": 2201 + }, + { + "epoch": 0.2010224575497535, + "grad_norm": 0.4568743109703064, + "learning_rate": 4.98735312342826e-06, + "loss": 0.6474, + "step": 2202 + }, + { + "epoch": 0.2011137484024101, + "grad_norm": 0.45030295848846436, + "learning_rate": 4.987341097549603e-06, + "loss": 0.6256, + "step": 2203 + }, + { + "epoch": 0.20120503925506664, + "grad_norm": 0.46705496311187744, + "learning_rate": 4.987329065970488e-06, + "loss": 0.6264, + "step": 2204 + }, + { + "epoch": 0.20129633010772321, + "grad_norm": 0.4819604456424713, + "learning_rate": 4.987317028690944e-06, + "loss": 0.6336, + "step": 2205 + }, + { + "epoch": 0.20138762096037977, + "grad_norm": 0.47587820887565613, + "learning_rate": 4.987304985710998e-06, + "loss": 0.6159, + "step": 2206 + }, + { + "epoch": 0.20147891181303634, + "grad_norm": 0.48543044924736023, + "learning_rate": 4.987292937030678e-06, + "loss": 0.5873, + "step": 2207 + }, + { + "epoch": 0.2015702026656929, + "grad_norm": 0.46723270416259766, + "learning_rate": 4.987280882650009e-06, + "loss": 0.5874, + "step": 2208 + }, + { + "epoch": 0.20166149351834947, + "grad_norm": 0.45223426818847656, + "learning_rate": 4.987268822569023e-06, + "loss": 0.6118, + "step": 2209 + }, + { + "epoch": 0.20175278437100602, + "grad_norm": 0.5037962794303894, + "learning_rate": 4.987256756787745e-06, + "loss": 0.5792, + "step": 2210 + }, + { + "epoch": 0.2018440752236626, + "grad_norm": 0.4606070816516876, + "learning_rate": 4.987244685306202e-06, + "loss": 0.6373, + "step": 2211 + }, + { + "epoch": 0.20193536607631915, + "grad_norm": 0.4935378134250641, + "learning_rate": 4.987232608124422e-06, + "loss": 0.6262, + "step": 2212 + }, + { + "epoch": 0.20202665692897573, + "grad_norm": 0.49886471033096313, + "learning_rate": 4.987220525242435e-06, + "loss": 0.5948, + "step": 2213 + }, + { + "epoch": 0.20211794778163228, + "grad_norm": 0.4521165192127228, + "learning_rate": 4.987208436660267e-06, + "loss": 0.6621, + "step": 2214 + }, + { + "epoch": 0.20220923863428886, + "grad_norm": 0.46690499782562256, + "learning_rate": 4.987196342377946e-06, + "loss": 0.5822, + "step": 2215 + }, + { + "epoch": 0.2023005294869454, + "grad_norm": 0.4882388412952423, + "learning_rate": 4.987184242395499e-06, + "loss": 0.619, + "step": 2216 + }, + { + "epoch": 0.20239182033960199, + "grad_norm": 0.46598368883132935, + "learning_rate": 4.9871721367129536e-06, + "loss": 0.6231, + "step": 2217 + }, + { + "epoch": 0.20248311119225854, + "grad_norm": 0.4826822876930237, + "learning_rate": 4.987160025330339e-06, + "loss": 0.584, + "step": 2218 + }, + { + "epoch": 0.2025744020449151, + "grad_norm": 0.4487147033214569, + "learning_rate": 4.9871479082476815e-06, + "loss": 0.6321, + "step": 2219 + }, + { + "epoch": 0.20266569289757166, + "grad_norm": 0.47841131687164307, + "learning_rate": 4.98713578546501e-06, + "loss": 0.6176, + "step": 2220 + }, + { + "epoch": 0.20275698375022821, + "grad_norm": 0.5229328274726868, + "learning_rate": 4.987123656982351e-06, + "loss": 0.602, + "step": 2221 + }, + { + "epoch": 0.2028482746028848, + "grad_norm": 0.49592578411102295, + "learning_rate": 4.987111522799734e-06, + "loss": 0.6041, + "step": 2222 + }, + { + "epoch": 0.20293956545554134, + "grad_norm": 0.49135759472846985, + "learning_rate": 4.987099382917186e-06, + "loss": 0.5598, + "step": 2223 + }, + { + "epoch": 0.20303085630819792, + "grad_norm": 0.4652404487133026, + "learning_rate": 4.987087237334733e-06, + "loss": 0.6093, + "step": 2224 + }, + { + "epoch": 0.20312214716085447, + "grad_norm": 0.5147124528884888, + "learning_rate": 4.9870750860524065e-06, + "loss": 0.6533, + "step": 2225 + }, + { + "epoch": 0.20321343801351105, + "grad_norm": 0.47323575615882874, + "learning_rate": 4.987062929070231e-06, + "loss": 0.6421, + "step": 2226 + }, + { + "epoch": 0.2033047288661676, + "grad_norm": 0.4711827337741852, + "learning_rate": 4.987050766388237e-06, + "loss": 0.6045, + "step": 2227 + }, + { + "epoch": 0.20339601971882418, + "grad_norm": 0.46683332324028015, + "learning_rate": 4.9870385980064505e-06, + "loss": 0.614, + "step": 2228 + }, + { + "epoch": 0.20348731057148073, + "grad_norm": 0.48756909370422363, + "learning_rate": 4.9870264239249e-06, + "loss": 0.5961, + "step": 2229 + }, + { + "epoch": 0.2035786014241373, + "grad_norm": 0.4733630418777466, + "learning_rate": 4.987014244143613e-06, + "loss": 0.6314, + "step": 2230 + }, + { + "epoch": 0.20366989227679386, + "grad_norm": 0.4760105311870575, + "learning_rate": 4.987002058662618e-06, + "loss": 0.6166, + "step": 2231 + }, + { + "epoch": 0.20376118312945043, + "grad_norm": 0.5009088516235352, + "learning_rate": 4.986989867481943e-06, + "loss": 0.6041, + "step": 2232 + }, + { + "epoch": 0.20385247398210699, + "grad_norm": 0.4641794264316559, + "learning_rate": 4.986977670601616e-06, + "loss": 0.6201, + "step": 2233 + }, + { + "epoch": 0.20394376483476356, + "grad_norm": 0.46397557854652405, + "learning_rate": 4.986965468021664e-06, + "loss": 0.6187, + "step": 2234 + }, + { + "epoch": 0.2040350556874201, + "grad_norm": 0.5068020820617676, + "learning_rate": 4.986953259742116e-06, + "loss": 0.6156, + "step": 2235 + }, + { + "epoch": 0.2041263465400767, + "grad_norm": 0.4488068222999573, + "learning_rate": 4.986941045763e-06, + "loss": 0.6336, + "step": 2236 + }, + { + "epoch": 0.20421763739273324, + "grad_norm": 0.47351863980293274, + "learning_rate": 4.986928826084342e-06, + "loss": 0.6072, + "step": 2237 + }, + { + "epoch": 0.20430892824538982, + "grad_norm": 0.45649388432502747, + "learning_rate": 4.986916600706173e-06, + "loss": 0.65, + "step": 2238 + }, + { + "epoch": 0.20440021909804637, + "grad_norm": 0.4579210877418518, + "learning_rate": 4.986904369628519e-06, + "loss": 0.6349, + "step": 2239 + }, + { + "epoch": 0.20449150995070295, + "grad_norm": 0.4749593138694763, + "learning_rate": 4.986892132851408e-06, + "loss": 0.632, + "step": 2240 + }, + { + "epoch": 0.2045828008033595, + "grad_norm": 0.4969039559364319, + "learning_rate": 4.986879890374871e-06, + "loss": 0.5862, + "step": 2241 + }, + { + "epoch": 0.20467409165601608, + "grad_norm": 0.46429443359375, + "learning_rate": 4.986867642198931e-06, + "loss": 0.6542, + "step": 2242 + }, + { + "epoch": 0.20476538250867263, + "grad_norm": 0.45803987979888916, + "learning_rate": 4.98685538832362e-06, + "loss": 0.622, + "step": 2243 + }, + { + "epoch": 0.2048566733613292, + "grad_norm": 0.48299258947372437, + "learning_rate": 4.986843128748965e-06, + "loss": 0.5716, + "step": 2244 + }, + { + "epoch": 0.20494796421398576, + "grad_norm": 0.4765440821647644, + "learning_rate": 4.986830863474994e-06, + "loss": 0.585, + "step": 2245 + }, + { + "epoch": 0.20503925506664233, + "grad_norm": 0.4368971586227417, + "learning_rate": 4.986818592501736e-06, + "loss": 0.6028, + "step": 2246 + }, + { + "epoch": 0.20513054591929888, + "grad_norm": 0.4464603066444397, + "learning_rate": 4.986806315829217e-06, + "loss": 0.6476, + "step": 2247 + }, + { + "epoch": 0.20522183677195546, + "grad_norm": 0.4831985533237457, + "learning_rate": 4.986794033457466e-06, + "loss": 0.5914, + "step": 2248 + }, + { + "epoch": 0.205313127624612, + "grad_norm": 0.45978713035583496, + "learning_rate": 4.986781745386512e-06, + "loss": 0.613, + "step": 2249 + }, + { + "epoch": 0.2054044184772686, + "grad_norm": 0.5079149603843689, + "learning_rate": 4.986769451616383e-06, + "loss": 0.6158, + "step": 2250 + }, + { + "epoch": 0.20549570932992514, + "grad_norm": 0.48439615964889526, + "learning_rate": 4.986757152147107e-06, + "loss": 0.576, + "step": 2251 + }, + { + "epoch": 0.2055870001825817, + "grad_norm": 0.4852905869483948, + "learning_rate": 4.9867448469787105e-06, + "loss": 0.6009, + "step": 2252 + }, + { + "epoch": 0.20567829103523827, + "grad_norm": 0.45929715037345886, + "learning_rate": 4.986732536111224e-06, + "loss": 0.5997, + "step": 2253 + }, + { + "epoch": 0.20576958188789482, + "grad_norm": 0.47135236859321594, + "learning_rate": 4.986720219544676e-06, + "loss": 0.6422, + "step": 2254 + }, + { + "epoch": 0.2058608727405514, + "grad_norm": 0.4656299352645874, + "learning_rate": 4.9867078972790925e-06, + "loss": 0.5988, + "step": 2255 + }, + { + "epoch": 0.20595216359320795, + "grad_norm": 0.45271211862564087, + "learning_rate": 4.986695569314504e-06, + "loss": 0.6188, + "step": 2256 + }, + { + "epoch": 0.20604345444586453, + "grad_norm": 0.5139105319976807, + "learning_rate": 4.986683235650937e-06, + "loss": 0.5973, + "step": 2257 + }, + { + "epoch": 0.20613474529852108, + "grad_norm": 0.46811190247535706, + "learning_rate": 4.986670896288419e-06, + "loss": 0.6171, + "step": 2258 + }, + { + "epoch": 0.20622603615117766, + "grad_norm": 0.4583127796649933, + "learning_rate": 4.986658551226981e-06, + "loss": 0.6296, + "step": 2259 + }, + { + "epoch": 0.2063173270038342, + "grad_norm": 0.496629536151886, + "learning_rate": 4.9866462004666505e-06, + "loss": 0.6005, + "step": 2260 + }, + { + "epoch": 0.20640861785649078, + "grad_norm": 0.46613189578056335, + "learning_rate": 4.986633844007455e-06, + "loss": 0.6047, + "step": 2261 + }, + { + "epoch": 0.20649990870914733, + "grad_norm": 0.454826295375824, + "learning_rate": 4.9866214818494226e-06, + "loss": 0.6068, + "step": 2262 + }, + { + "epoch": 0.2065911995618039, + "grad_norm": 0.4676886796951294, + "learning_rate": 4.986609113992583e-06, + "loss": 0.5945, + "step": 2263 + }, + { + "epoch": 0.20668249041446046, + "grad_norm": 0.45209160447120667, + "learning_rate": 4.986596740436963e-06, + "loss": 0.6339, + "step": 2264 + }, + { + "epoch": 0.20677378126711704, + "grad_norm": 0.4794624447822571, + "learning_rate": 4.986584361182592e-06, + "loss": 0.6045, + "step": 2265 + }, + { + "epoch": 0.2068650721197736, + "grad_norm": 0.4589039981365204, + "learning_rate": 4.9865719762294975e-06, + "loss": 0.6137, + "step": 2266 + }, + { + "epoch": 0.20695636297243017, + "grad_norm": 0.4436984360218048, + "learning_rate": 4.986559585577708e-06, + "loss": 0.6487, + "step": 2267 + }, + { + "epoch": 0.20704765382508672, + "grad_norm": 0.45075345039367676, + "learning_rate": 4.9865471892272534e-06, + "loss": 0.5826, + "step": 2268 + }, + { + "epoch": 0.2071389446777433, + "grad_norm": 0.48130765557289124, + "learning_rate": 4.986534787178161e-06, + "loss": 0.6004, + "step": 2269 + }, + { + "epoch": 0.20723023553039985, + "grad_norm": 0.49656593799591064, + "learning_rate": 4.986522379430458e-06, + "loss": 0.5877, + "step": 2270 + }, + { + "epoch": 0.20732152638305643, + "grad_norm": 0.4467478394508362, + "learning_rate": 4.986509965984175e-06, + "loss": 0.6463, + "step": 2271 + }, + { + "epoch": 0.20741281723571298, + "grad_norm": 0.446742981672287, + "learning_rate": 4.98649754683934e-06, + "loss": 0.6248, + "step": 2272 + }, + { + "epoch": 0.20750410808836955, + "grad_norm": 0.4567435383796692, + "learning_rate": 4.9864851219959805e-06, + "loss": 0.6156, + "step": 2273 + }, + { + "epoch": 0.2075953989410261, + "grad_norm": 0.4585254192352295, + "learning_rate": 4.9864726914541255e-06, + "loss": 0.6315, + "step": 2274 + }, + { + "epoch": 0.20768668979368268, + "grad_norm": 0.502483069896698, + "learning_rate": 4.986460255213803e-06, + "loss": 0.6051, + "step": 2275 + }, + { + "epoch": 0.20777798064633923, + "grad_norm": 0.44679734110832214, + "learning_rate": 4.9864478132750425e-06, + "loss": 0.6608, + "step": 2276 + }, + { + "epoch": 0.2078692714989958, + "grad_norm": 0.4683312475681305, + "learning_rate": 4.986435365637871e-06, + "loss": 0.6211, + "step": 2277 + }, + { + "epoch": 0.20796056235165236, + "grad_norm": 0.5033209323883057, + "learning_rate": 4.986422912302319e-06, + "loss": 0.586, + "step": 2278 + }, + { + "epoch": 0.20805185320430894, + "grad_norm": 0.4738565981388092, + "learning_rate": 4.9864104532684145e-06, + "loss": 0.6124, + "step": 2279 + }, + { + "epoch": 0.2081431440569655, + "grad_norm": 0.43056273460388184, + "learning_rate": 4.986397988536185e-06, + "loss": 0.6001, + "step": 2280 + }, + { + "epoch": 0.20823443490962207, + "grad_norm": 0.4804709851741791, + "learning_rate": 4.9863855181056585e-06, + "loss": 0.6048, + "step": 2281 + }, + { + "epoch": 0.20832572576227862, + "grad_norm": 0.4326508641242981, + "learning_rate": 4.9863730419768655e-06, + "loss": 0.6156, + "step": 2282 + }, + { + "epoch": 0.2084170166149352, + "grad_norm": 0.4545615613460541, + "learning_rate": 4.9863605601498345e-06, + "loss": 0.6418, + "step": 2283 + }, + { + "epoch": 0.20850830746759175, + "grad_norm": 0.4754260778427124, + "learning_rate": 4.986348072624593e-06, + "loss": 0.6205, + "step": 2284 + }, + { + "epoch": 0.2085995983202483, + "grad_norm": 0.4232475161552429, + "learning_rate": 4.98633557940117e-06, + "loss": 0.6615, + "step": 2285 + }, + { + "epoch": 0.20869088917290488, + "grad_norm": 0.4811265468597412, + "learning_rate": 4.986323080479593e-06, + "loss": 0.6057, + "step": 2286 + }, + { + "epoch": 0.20878218002556143, + "grad_norm": 0.4803503155708313, + "learning_rate": 4.986310575859894e-06, + "loss": 0.614, + "step": 2287 + }, + { + "epoch": 0.208873470878218, + "grad_norm": 0.4737141728401184, + "learning_rate": 4.986298065542098e-06, + "loss": 0.5874, + "step": 2288 + }, + { + "epoch": 0.20896476173087455, + "grad_norm": 0.46374237537384033, + "learning_rate": 4.986285549526236e-06, + "loss": 0.5882, + "step": 2289 + }, + { + "epoch": 0.20905605258353113, + "grad_norm": 0.44800838828086853, + "learning_rate": 4.986273027812335e-06, + "loss": 0.6099, + "step": 2290 + }, + { + "epoch": 0.20914734343618768, + "grad_norm": 0.4988951086997986, + "learning_rate": 4.986260500400425e-06, + "loss": 0.6204, + "step": 2291 + }, + { + "epoch": 0.20923863428884426, + "grad_norm": 0.483930379152298, + "learning_rate": 4.986247967290534e-06, + "loss": 0.5969, + "step": 2292 + }, + { + "epoch": 0.2093299251415008, + "grad_norm": 0.4820505976676941, + "learning_rate": 4.986235428482691e-06, + "loss": 0.557, + "step": 2293 + }, + { + "epoch": 0.2094212159941574, + "grad_norm": 0.45511001348495483, + "learning_rate": 4.986222883976925e-06, + "loss": 0.6054, + "step": 2294 + }, + { + "epoch": 0.20951250684681394, + "grad_norm": 0.4514406621456146, + "learning_rate": 4.986210333773265e-06, + "loss": 0.589, + "step": 2295 + }, + { + "epoch": 0.20960379769947052, + "grad_norm": 0.4440823197364807, + "learning_rate": 4.986197777871738e-06, + "loss": 0.6154, + "step": 2296 + }, + { + "epoch": 0.20969508855212707, + "grad_norm": 0.48517778515815735, + "learning_rate": 4.986185216272375e-06, + "loss": 0.6263, + "step": 2297 + }, + { + "epoch": 0.20978637940478365, + "grad_norm": 0.47620269656181335, + "learning_rate": 4.986172648975203e-06, + "loss": 0.6045, + "step": 2298 + }, + { + "epoch": 0.2098776702574402, + "grad_norm": 0.4680774509906769, + "learning_rate": 4.986160075980252e-06, + "loss": 0.5979, + "step": 2299 + }, + { + "epoch": 0.20996896111009677, + "grad_norm": 0.49022695422172546, + "learning_rate": 4.98614749728755e-06, + "loss": 0.6073, + "step": 2300 + }, + { + "epoch": 0.21006025196275332, + "grad_norm": 0.49240830540657043, + "learning_rate": 4.986134912897126e-06, + "loss": 0.6335, + "step": 2301 + }, + { + "epoch": 0.2101515428154099, + "grad_norm": 0.5035703182220459, + "learning_rate": 4.9861223228090095e-06, + "loss": 0.6267, + "step": 2302 + }, + { + "epoch": 0.21024283366806645, + "grad_norm": 0.4328538179397583, + "learning_rate": 4.986109727023229e-06, + "loss": 0.6494, + "step": 2303 + }, + { + "epoch": 0.21033412452072303, + "grad_norm": 0.459771066904068, + "learning_rate": 4.986097125539813e-06, + "loss": 0.5787, + "step": 2304 + }, + { + "epoch": 0.21042541537337958, + "grad_norm": 0.4632032811641693, + "learning_rate": 4.986084518358792e-06, + "loss": 0.6165, + "step": 2305 + }, + { + "epoch": 0.21051670622603616, + "grad_norm": 0.4885532855987549, + "learning_rate": 4.986071905480191e-06, + "loss": 0.613, + "step": 2306 + }, + { + "epoch": 0.2106079970786927, + "grad_norm": 0.4665658175945282, + "learning_rate": 4.986059286904044e-06, + "loss": 0.6028, + "step": 2307 + }, + { + "epoch": 0.2106992879313493, + "grad_norm": 0.4835187792778015, + "learning_rate": 4.986046662630376e-06, + "loss": 0.5801, + "step": 2308 + }, + { + "epoch": 0.21079057878400584, + "grad_norm": 0.4385676681995392, + "learning_rate": 4.986034032659217e-06, + "loss": 0.6395, + "step": 2309 + }, + { + "epoch": 0.21088186963666242, + "grad_norm": 0.4813807010650635, + "learning_rate": 4.986021396990597e-06, + "loss": 0.5656, + "step": 2310 + }, + { + "epoch": 0.21097316048931897, + "grad_norm": 0.505876898765564, + "learning_rate": 4.986008755624544e-06, + "loss": 0.5355, + "step": 2311 + }, + { + "epoch": 0.21106445134197555, + "grad_norm": 0.47000157833099365, + "learning_rate": 4.9859961085610865e-06, + "loss": 0.6735, + "step": 2312 + }, + { + "epoch": 0.2111557421946321, + "grad_norm": 0.506333589553833, + "learning_rate": 4.985983455800255e-06, + "loss": 0.6074, + "step": 2313 + }, + { + "epoch": 0.21124703304728867, + "grad_norm": 0.4408816993236542, + "learning_rate": 4.985970797342078e-06, + "loss": 0.5927, + "step": 2314 + }, + { + "epoch": 0.21133832389994522, + "grad_norm": 0.46932080388069153, + "learning_rate": 4.985958133186583e-06, + "loss": 0.6149, + "step": 2315 + }, + { + "epoch": 0.2114296147526018, + "grad_norm": 0.4639962315559387, + "learning_rate": 4.9859454633338015e-06, + "loss": 0.6684, + "step": 2316 + }, + { + "epoch": 0.21152090560525835, + "grad_norm": 0.48874393105506897, + "learning_rate": 4.98593278778376e-06, + "loss": 0.5705, + "step": 2317 + }, + { + "epoch": 0.21161219645791493, + "grad_norm": 0.47655317187309265, + "learning_rate": 4.98592010653649e-06, + "loss": 0.5942, + "step": 2318 + }, + { + "epoch": 0.21170348731057148, + "grad_norm": 0.4547758102416992, + "learning_rate": 4.985907419592018e-06, + "loss": 0.6108, + "step": 2319 + }, + { + "epoch": 0.21179477816322803, + "grad_norm": 0.4706318974494934, + "learning_rate": 4.985894726950376e-06, + "loss": 0.6303, + "step": 2320 + }, + { + "epoch": 0.2118860690158846, + "grad_norm": 0.5018013119697571, + "learning_rate": 4.98588202861159e-06, + "loss": 0.6237, + "step": 2321 + }, + { + "epoch": 0.21197735986854116, + "grad_norm": 0.48672550916671753, + "learning_rate": 4.985869324575692e-06, + "loss": 0.5949, + "step": 2322 + }, + { + "epoch": 0.21206865072119774, + "grad_norm": 0.45544788241386414, + "learning_rate": 4.985856614842709e-06, + "loss": 0.6376, + "step": 2323 + }, + { + "epoch": 0.2121599415738543, + "grad_norm": 0.4742485284805298, + "learning_rate": 4.985843899412671e-06, + "loss": 0.6249, + "step": 2324 + }, + { + "epoch": 0.21225123242651087, + "grad_norm": 0.4836854934692383, + "learning_rate": 4.985831178285608e-06, + "loss": 0.6312, + "step": 2325 + }, + { + "epoch": 0.21234252327916742, + "grad_norm": 0.46847841143608093, + "learning_rate": 4.985818451461546e-06, + "loss": 0.6278, + "step": 2326 + }, + { + "epoch": 0.212433814131824, + "grad_norm": 0.4776568114757538, + "learning_rate": 4.985805718940517e-06, + "loss": 0.5705, + "step": 2327 + }, + { + "epoch": 0.21252510498448055, + "grad_norm": 0.4737057089805603, + "learning_rate": 4.985792980722551e-06, + "loss": 0.5948, + "step": 2328 + }, + { + "epoch": 0.21261639583713712, + "grad_norm": 0.4767676293849945, + "learning_rate": 4.9857802368076735e-06, + "loss": 0.5975, + "step": 2329 + }, + { + "epoch": 0.21270768668979367, + "grad_norm": 0.5040819644927979, + "learning_rate": 4.985767487195917e-06, + "loss": 0.605, + "step": 2330 + }, + { + "epoch": 0.21279897754245025, + "grad_norm": 0.4900922179222107, + "learning_rate": 4.98575473188731e-06, + "loss": 0.6266, + "step": 2331 + }, + { + "epoch": 0.2128902683951068, + "grad_norm": 0.4799405634403229, + "learning_rate": 4.985741970881881e-06, + "loss": 0.5906, + "step": 2332 + }, + { + "epoch": 0.21298155924776338, + "grad_norm": 0.4430413246154785, + "learning_rate": 4.98572920417966e-06, + "loss": 0.6405, + "step": 2333 + }, + { + "epoch": 0.21307285010041993, + "grad_norm": 0.47474005818367004, + "learning_rate": 4.985716431780675e-06, + "loss": 0.6136, + "step": 2334 + }, + { + "epoch": 0.2131641409530765, + "grad_norm": 0.5241936445236206, + "learning_rate": 4.9857036536849566e-06, + "loss": 0.5551, + "step": 2335 + }, + { + "epoch": 0.21325543180573306, + "grad_norm": 0.4788628816604614, + "learning_rate": 4.985690869892533e-06, + "loss": 0.6066, + "step": 2336 + }, + { + "epoch": 0.21334672265838964, + "grad_norm": 0.45432111620903015, + "learning_rate": 4.9856780804034355e-06, + "loss": 0.5735, + "step": 2337 + }, + { + "epoch": 0.2134380135110462, + "grad_norm": 0.4580667316913605, + "learning_rate": 4.985665285217691e-06, + "loss": 0.624, + "step": 2338 + }, + { + "epoch": 0.21352930436370277, + "grad_norm": 0.4513360857963562, + "learning_rate": 4.98565248433533e-06, + "loss": 0.6352, + "step": 2339 + }, + { + "epoch": 0.21362059521635932, + "grad_norm": 0.5047858953475952, + "learning_rate": 4.985639677756382e-06, + "loss": 0.6189, + "step": 2340 + }, + { + "epoch": 0.2137118860690159, + "grad_norm": 0.474608838558197, + "learning_rate": 4.9856268654808755e-06, + "loss": 0.5879, + "step": 2341 + }, + { + "epoch": 0.21380317692167244, + "grad_norm": 0.47878319025039673, + "learning_rate": 4.985614047508841e-06, + "loss": 0.586, + "step": 2342 + }, + { + "epoch": 0.21389446777432902, + "grad_norm": 0.489564448595047, + "learning_rate": 4.985601223840306e-06, + "loss": 0.6173, + "step": 2343 + }, + { + "epoch": 0.21398575862698557, + "grad_norm": 0.4746945798397064, + "learning_rate": 4.985588394475303e-06, + "loss": 0.6389, + "step": 2344 + }, + { + "epoch": 0.21407704947964215, + "grad_norm": 0.49592143297195435, + "learning_rate": 4.985575559413858e-06, + "loss": 0.625, + "step": 2345 + }, + { + "epoch": 0.2141683403322987, + "grad_norm": 0.4927091598510742, + "learning_rate": 4.985562718656003e-06, + "loss": 0.6022, + "step": 2346 + }, + { + "epoch": 0.21425963118495528, + "grad_norm": 0.4550168514251709, + "learning_rate": 4.985549872201766e-06, + "loss": 0.6324, + "step": 2347 + }, + { + "epoch": 0.21435092203761183, + "grad_norm": 0.48890310525894165, + "learning_rate": 4.985537020051177e-06, + "loss": 0.6043, + "step": 2348 + }, + { + "epoch": 0.2144422128902684, + "grad_norm": 0.47086280584335327, + "learning_rate": 4.985524162204265e-06, + "loss": 0.6257, + "step": 2349 + }, + { + "epoch": 0.21453350374292496, + "grad_norm": 0.4850345849990845, + "learning_rate": 4.98551129866106e-06, + "loss": 0.5884, + "step": 2350 + }, + { + "epoch": 0.21462479459558154, + "grad_norm": 0.5094274878501892, + "learning_rate": 4.985498429421591e-06, + "loss": 0.5947, + "step": 2351 + }, + { + "epoch": 0.2147160854482381, + "grad_norm": 0.5147122144699097, + "learning_rate": 4.9854855544858884e-06, + "loss": 0.5342, + "step": 2352 + }, + { + "epoch": 0.21480737630089464, + "grad_norm": 0.5039911866188049, + "learning_rate": 4.9854726738539806e-06, + "loss": 0.6016, + "step": 2353 + }, + { + "epoch": 0.21489866715355121, + "grad_norm": 0.4681297540664673, + "learning_rate": 4.985459787525897e-06, + "loss": 0.6512, + "step": 2354 + }, + { + "epoch": 0.21498995800620777, + "grad_norm": 0.49637818336486816, + "learning_rate": 4.985446895501668e-06, + "loss": 0.5742, + "step": 2355 + }, + { + "epoch": 0.21508124885886434, + "grad_norm": 0.4824541509151459, + "learning_rate": 4.985433997781324e-06, + "loss": 0.6092, + "step": 2356 + }, + { + "epoch": 0.2151725397115209, + "grad_norm": 0.451233446598053, + "learning_rate": 4.985421094364892e-06, + "loss": 0.6525, + "step": 2357 + }, + { + "epoch": 0.21526383056417747, + "grad_norm": 0.48175764083862305, + "learning_rate": 4.985408185252403e-06, + "loss": 0.609, + "step": 2358 + }, + { + "epoch": 0.21535512141683402, + "grad_norm": 0.506504476070404, + "learning_rate": 4.985395270443887e-06, + "loss": 0.6272, + "step": 2359 + }, + { + "epoch": 0.2154464122694906, + "grad_norm": 0.5172122716903687, + "learning_rate": 4.985382349939374e-06, + "loss": 0.6226, + "step": 2360 + }, + { + "epoch": 0.21553770312214715, + "grad_norm": 0.46804291009902954, + "learning_rate": 4.985369423738892e-06, + "loss": 0.58, + "step": 2361 + }, + { + "epoch": 0.21562899397480373, + "grad_norm": 0.4669826328754425, + "learning_rate": 4.985356491842471e-06, + "loss": 0.6144, + "step": 2362 + }, + { + "epoch": 0.21572028482746028, + "grad_norm": 0.5163729786872864, + "learning_rate": 4.985343554250141e-06, + "loss": 0.6113, + "step": 2363 + }, + { + "epoch": 0.21581157568011686, + "grad_norm": 0.46200039982795715, + "learning_rate": 4.985330610961932e-06, + "loss": 0.6266, + "step": 2364 + }, + { + "epoch": 0.2159028665327734, + "grad_norm": 0.48083826899528503, + "learning_rate": 4.985317661977874e-06, + "loss": 0.6099, + "step": 2365 + }, + { + "epoch": 0.21599415738542999, + "grad_norm": 0.514126181602478, + "learning_rate": 4.985304707297995e-06, + "loss": 0.572, + "step": 2366 + }, + { + "epoch": 0.21608544823808654, + "grad_norm": 0.46591082215309143, + "learning_rate": 4.985291746922326e-06, + "loss": 0.6137, + "step": 2367 + }, + { + "epoch": 0.21617673909074311, + "grad_norm": 0.4436059296131134, + "learning_rate": 4.985278780850897e-06, + "loss": 0.599, + "step": 2368 + }, + { + "epoch": 0.21626802994339966, + "grad_norm": 0.47545158863067627, + "learning_rate": 4.985265809083737e-06, + "loss": 0.5954, + "step": 2369 + }, + { + "epoch": 0.21635932079605624, + "grad_norm": 0.46405887603759766, + "learning_rate": 4.985252831620876e-06, + "loss": 0.6255, + "step": 2370 + }, + { + "epoch": 0.2164506116487128, + "grad_norm": 0.5030508041381836, + "learning_rate": 4.9852398484623435e-06, + "loss": 0.6301, + "step": 2371 + }, + { + "epoch": 0.21654190250136937, + "grad_norm": 0.4675087332725525, + "learning_rate": 4.985226859608169e-06, + "loss": 0.6043, + "step": 2372 + }, + { + "epoch": 0.21663319335402592, + "grad_norm": 0.47231215238571167, + "learning_rate": 4.9852138650583825e-06, + "loss": 0.597, + "step": 2373 + }, + { + "epoch": 0.2167244842066825, + "grad_norm": 0.4778638184070587, + "learning_rate": 4.9852008648130135e-06, + "loss": 0.5466, + "step": 2374 + }, + { + "epoch": 0.21681577505933905, + "grad_norm": 0.43642672896385193, + "learning_rate": 4.985187858872093e-06, + "loss": 0.635, + "step": 2375 + }, + { + "epoch": 0.21690706591199563, + "grad_norm": 0.4766359329223633, + "learning_rate": 4.98517484723565e-06, + "loss": 0.6125, + "step": 2376 + }, + { + "epoch": 0.21699835676465218, + "grad_norm": 0.46599018573760986, + "learning_rate": 4.985161829903714e-06, + "loss": 0.6232, + "step": 2377 + }, + { + "epoch": 0.21708964761730876, + "grad_norm": 0.4677221179008484, + "learning_rate": 4.985148806876315e-06, + "loss": 0.6498, + "step": 2378 + }, + { + "epoch": 0.2171809384699653, + "grad_norm": 0.46767017245292664, + "learning_rate": 4.985135778153484e-06, + "loss": 0.619, + "step": 2379 + }, + { + "epoch": 0.21727222932262188, + "grad_norm": 0.48100462555885315, + "learning_rate": 4.985122743735249e-06, + "loss": 0.6066, + "step": 2380 + }, + { + "epoch": 0.21736352017527844, + "grad_norm": 0.48640716075897217, + "learning_rate": 4.985109703621641e-06, + "loss": 0.6052, + "step": 2381 + }, + { + "epoch": 0.217454811027935, + "grad_norm": 0.48155659437179565, + "learning_rate": 4.985096657812689e-06, + "loss": 0.6333, + "step": 2382 + }, + { + "epoch": 0.21754610188059156, + "grad_norm": 0.4681108593940735, + "learning_rate": 4.985083606308424e-06, + "loss": 0.5802, + "step": 2383 + }, + { + "epoch": 0.21763739273324814, + "grad_norm": 0.4558980166912079, + "learning_rate": 4.985070549108876e-06, + "loss": 0.6186, + "step": 2384 + }, + { + "epoch": 0.2177286835859047, + "grad_norm": 0.5058985948562622, + "learning_rate": 4.985057486214073e-06, + "loss": 0.5581, + "step": 2385 + }, + { + "epoch": 0.21781997443856124, + "grad_norm": 0.4504533112049103, + "learning_rate": 4.9850444176240475e-06, + "loss": 0.6442, + "step": 2386 + }, + { + "epoch": 0.21791126529121782, + "grad_norm": 0.5004827976226807, + "learning_rate": 4.985031343338827e-06, + "loss": 0.5618, + "step": 2387 + }, + { + "epoch": 0.21800255614387437, + "grad_norm": 0.47557878494262695, + "learning_rate": 4.9850182633584444e-06, + "loss": 0.594, + "step": 2388 + }, + { + "epoch": 0.21809384699653095, + "grad_norm": 0.46560871601104736, + "learning_rate": 4.985005177682926e-06, + "loss": 0.6002, + "step": 2389 + }, + { + "epoch": 0.2181851378491875, + "grad_norm": 0.48443955183029175, + "learning_rate": 4.984992086312306e-06, + "loss": 0.5865, + "step": 2390 + }, + { + "epoch": 0.21827642870184408, + "grad_norm": 0.48435208201408386, + "learning_rate": 4.984978989246611e-06, + "loss": 0.5487, + "step": 2391 + }, + { + "epoch": 0.21836771955450063, + "grad_norm": 0.4813576638698578, + "learning_rate": 4.9849658864858715e-06, + "loss": 0.5797, + "step": 2392 + }, + { + "epoch": 0.2184590104071572, + "grad_norm": 0.4706885814666748, + "learning_rate": 4.98495277803012e-06, + "loss": 0.6221, + "step": 2393 + }, + { + "epoch": 0.21855030125981376, + "grad_norm": 0.49117666482925415, + "learning_rate": 4.9849396638793836e-06, + "loss": 0.6304, + "step": 2394 + }, + { + "epoch": 0.21864159211247033, + "grad_norm": 0.4912995994091034, + "learning_rate": 4.984926544033693e-06, + "loss": 0.5939, + "step": 2395 + }, + { + "epoch": 0.21873288296512688, + "grad_norm": 0.4864850640296936, + "learning_rate": 4.98491341849308e-06, + "loss": 0.5992, + "step": 2396 + }, + { + "epoch": 0.21882417381778346, + "grad_norm": 0.4855898916721344, + "learning_rate": 4.984900287257573e-06, + "loss": 0.5818, + "step": 2397 + }, + { + "epoch": 0.21891546467044, + "grad_norm": 0.444442480802536, + "learning_rate": 4.9848871503272026e-06, + "loss": 0.5928, + "step": 2398 + }, + { + "epoch": 0.2190067555230966, + "grad_norm": 0.4460761845111847, + "learning_rate": 4.984874007701999e-06, + "loss": 0.6163, + "step": 2399 + }, + { + "epoch": 0.21909804637575314, + "grad_norm": 0.4580259323120117, + "learning_rate": 4.9848608593819914e-06, + "loss": 0.61, + "step": 2400 + }, + { + "epoch": 0.21918933722840972, + "grad_norm": 0.48452240228652954, + "learning_rate": 4.9848477053672115e-06, + "loss": 0.5894, + "step": 2401 + }, + { + "epoch": 0.21928062808106627, + "grad_norm": 0.46642300486564636, + "learning_rate": 4.984834545657688e-06, + "loss": 0.5976, + "step": 2402 + }, + { + "epoch": 0.21937191893372285, + "grad_norm": 0.4458273649215698, + "learning_rate": 4.984821380253452e-06, + "loss": 0.6125, + "step": 2403 + }, + { + "epoch": 0.2194632097863794, + "grad_norm": 0.4879342317581177, + "learning_rate": 4.984808209154533e-06, + "loss": 0.5952, + "step": 2404 + }, + { + "epoch": 0.21955450063903598, + "grad_norm": 0.4754319190979004, + "learning_rate": 4.984795032360963e-06, + "loss": 0.5958, + "step": 2405 + }, + { + "epoch": 0.21964579149169253, + "grad_norm": 0.448660671710968, + "learning_rate": 4.98478184987277e-06, + "loss": 0.6285, + "step": 2406 + }, + { + "epoch": 0.2197370823443491, + "grad_norm": 0.49408116936683655, + "learning_rate": 4.984768661689985e-06, + "loss": 0.5794, + "step": 2407 + }, + { + "epoch": 0.21982837319700566, + "grad_norm": 0.48647722601890564, + "learning_rate": 4.984755467812638e-06, + "loss": 0.5956, + "step": 2408 + }, + { + "epoch": 0.21991966404966223, + "grad_norm": 0.4788742959499359, + "learning_rate": 4.984742268240759e-06, + "loss": 0.5945, + "step": 2409 + }, + { + "epoch": 0.22001095490231878, + "grad_norm": 0.45515042543411255, + "learning_rate": 4.98472906297438e-06, + "loss": 0.6219, + "step": 2410 + }, + { + "epoch": 0.22010224575497536, + "grad_norm": 0.47909703850746155, + "learning_rate": 4.984715852013528e-06, + "loss": 0.6151, + "step": 2411 + }, + { + "epoch": 0.2201935366076319, + "grad_norm": 0.4578130841255188, + "learning_rate": 4.984702635358238e-06, + "loss": 0.6292, + "step": 2412 + }, + { + "epoch": 0.2202848274602885, + "grad_norm": 0.4690195620059967, + "learning_rate": 4.984689413008536e-06, + "loss": 0.606, + "step": 2413 + }, + { + "epoch": 0.22037611831294504, + "grad_norm": 0.4664337933063507, + "learning_rate": 4.984676184964453e-06, + "loss": 0.5639, + "step": 2414 + }, + { + "epoch": 0.22046740916560162, + "grad_norm": 0.4566807746887207, + "learning_rate": 4.9846629512260204e-06, + "loss": 0.6227, + "step": 2415 + }, + { + "epoch": 0.22055870001825817, + "grad_norm": 0.45666611194610596, + "learning_rate": 4.984649711793269e-06, + "loss": 0.6377, + "step": 2416 + }, + { + "epoch": 0.22064999087091475, + "grad_norm": 0.5169102549552917, + "learning_rate": 4.984636466666228e-06, + "loss": 0.614, + "step": 2417 + }, + { + "epoch": 0.2207412817235713, + "grad_norm": 0.4523468017578125, + "learning_rate": 4.9846232158449295e-06, + "loss": 0.6285, + "step": 2418 + }, + { + "epoch": 0.22083257257622788, + "grad_norm": 0.49850624799728394, + "learning_rate": 4.984609959329401e-06, + "loss": 0.5993, + "step": 2419 + }, + { + "epoch": 0.22092386342888443, + "grad_norm": 0.4717639088630676, + "learning_rate": 4.9845966971196755e-06, + "loss": 0.5782, + "step": 2420 + }, + { + "epoch": 0.22101515428154098, + "grad_norm": 0.46248844265937805, + "learning_rate": 4.984583429215781e-06, + "loss": 0.5923, + "step": 2421 + }, + { + "epoch": 0.22110644513419755, + "grad_norm": 0.46754395961761475, + "learning_rate": 4.98457015561775e-06, + "loss": 0.5926, + "step": 2422 + }, + { + "epoch": 0.2211977359868541, + "grad_norm": 0.4766753911972046, + "learning_rate": 4.9845568763256125e-06, + "loss": 0.5976, + "step": 2423 + }, + { + "epoch": 0.22128902683951068, + "grad_norm": 0.4620617926120758, + "learning_rate": 4.984543591339398e-06, + "loss": 0.6275, + "step": 2424 + }, + { + "epoch": 0.22138031769216723, + "grad_norm": 0.4691691994667053, + "learning_rate": 4.984530300659139e-06, + "loss": 0.6062, + "step": 2425 + }, + { + "epoch": 0.2214716085448238, + "grad_norm": 0.4907781183719635, + "learning_rate": 4.984517004284862e-06, + "loss": 0.5867, + "step": 2426 + }, + { + "epoch": 0.22156289939748036, + "grad_norm": 0.475472629070282, + "learning_rate": 4.984503702216601e-06, + "loss": 0.5701, + "step": 2427 + }, + { + "epoch": 0.22165419025013694, + "grad_norm": 0.49174267053604126, + "learning_rate": 4.984490394454386e-06, + "loss": 0.6067, + "step": 2428 + }, + { + "epoch": 0.2217454811027935, + "grad_norm": 0.47995415329933167, + "learning_rate": 4.984477080998247e-06, + "loss": 0.6153, + "step": 2429 + }, + { + "epoch": 0.22183677195545007, + "grad_norm": 0.47232580184936523, + "learning_rate": 4.984463761848214e-06, + "loss": 0.6011, + "step": 2430 + }, + { + "epoch": 0.22192806280810662, + "grad_norm": 0.5069091320037842, + "learning_rate": 4.984450437004318e-06, + "loss": 0.6185, + "step": 2431 + }, + { + "epoch": 0.2220193536607632, + "grad_norm": 0.48283326625823975, + "learning_rate": 4.98443710646659e-06, + "loss": 0.6048, + "step": 2432 + }, + { + "epoch": 0.22211064451341975, + "grad_norm": 0.4797721207141876, + "learning_rate": 4.984423770235059e-06, + "loss": 0.6337, + "step": 2433 + }, + { + "epoch": 0.22220193536607633, + "grad_norm": 0.4233381450176239, + "learning_rate": 4.984410428309757e-06, + "loss": 0.6237, + "step": 2434 + }, + { + "epoch": 0.22229322621873288, + "grad_norm": 0.4599229097366333, + "learning_rate": 4.984397080690715e-06, + "loss": 0.6036, + "step": 2435 + }, + { + "epoch": 0.22238451707138945, + "grad_norm": 0.4593200385570526, + "learning_rate": 4.984383727377961e-06, + "loss": 0.6204, + "step": 2436 + }, + { + "epoch": 0.222475807924046, + "grad_norm": 0.4569995105266571, + "learning_rate": 4.984370368371529e-06, + "loss": 0.5834, + "step": 2437 + }, + { + "epoch": 0.22256709877670258, + "grad_norm": 0.5095599293708801, + "learning_rate": 4.984357003671448e-06, + "loss": 0.5865, + "step": 2438 + }, + { + "epoch": 0.22265838962935913, + "grad_norm": 0.45442357659339905, + "learning_rate": 4.984343633277748e-06, + "loss": 0.6301, + "step": 2439 + }, + { + "epoch": 0.2227496804820157, + "grad_norm": 0.47839656472206116, + "learning_rate": 4.984330257190462e-06, + "loss": 0.5762, + "step": 2440 + }, + { + "epoch": 0.22284097133467226, + "grad_norm": 0.509762704372406, + "learning_rate": 4.984316875409617e-06, + "loss": 0.595, + "step": 2441 + }, + { + "epoch": 0.22293226218732884, + "grad_norm": 0.5080264806747437, + "learning_rate": 4.984303487935246e-06, + "loss": 0.5966, + "step": 2442 + }, + { + "epoch": 0.2230235530399854, + "grad_norm": 0.46938690543174744, + "learning_rate": 4.98429009476738e-06, + "loss": 0.5772, + "step": 2443 + }, + { + "epoch": 0.22311484389264197, + "grad_norm": 0.4868433177471161, + "learning_rate": 4.984276695906049e-06, + "loss": 0.56, + "step": 2444 + }, + { + "epoch": 0.22320613474529852, + "grad_norm": 0.466597318649292, + "learning_rate": 4.984263291351282e-06, + "loss": 0.6084, + "step": 2445 + }, + { + "epoch": 0.2232974255979551, + "grad_norm": 0.4980083405971527, + "learning_rate": 4.984249881103112e-06, + "loss": 0.5516, + "step": 2446 + }, + { + "epoch": 0.22338871645061165, + "grad_norm": 0.4307517111301422, + "learning_rate": 4.984236465161571e-06, + "loss": 0.6288, + "step": 2447 + }, + { + "epoch": 0.22348000730326822, + "grad_norm": 0.49689674377441406, + "learning_rate": 4.984223043526686e-06, + "loss": 0.6127, + "step": 2448 + }, + { + "epoch": 0.22357129815592477, + "grad_norm": 0.5302207469940186, + "learning_rate": 4.984209616198491e-06, + "loss": 0.5972, + "step": 2449 + }, + { + "epoch": 0.22366258900858135, + "grad_norm": 0.4461748003959656, + "learning_rate": 4.9841961831770145e-06, + "loss": 0.6251, + "step": 2450 + }, + { + "epoch": 0.2237538798612379, + "grad_norm": 0.412101149559021, + "learning_rate": 4.984182744462288e-06, + "loss": 0.6483, + "step": 2451 + }, + { + "epoch": 0.22384517071389448, + "grad_norm": 0.4739653468132019, + "learning_rate": 4.984169300054344e-06, + "loss": 0.6011, + "step": 2452 + }, + { + "epoch": 0.22393646156655103, + "grad_norm": 0.46384337544441223, + "learning_rate": 4.98415584995321e-06, + "loss": 0.5964, + "step": 2453 + }, + { + "epoch": 0.22402775241920758, + "grad_norm": 0.49573618173599243, + "learning_rate": 4.984142394158919e-06, + "loss": 0.5795, + "step": 2454 + }, + { + "epoch": 0.22411904327186416, + "grad_norm": 0.45435476303100586, + "learning_rate": 4.984128932671503e-06, + "loss": 0.5845, + "step": 2455 + }, + { + "epoch": 0.2242103341245207, + "grad_norm": 0.45533162355422974, + "learning_rate": 4.984115465490989e-06, + "loss": 0.591, + "step": 2456 + }, + { + "epoch": 0.2243016249771773, + "grad_norm": 0.4421035349369049, + "learning_rate": 4.984101992617412e-06, + "loss": 0.5878, + "step": 2457 + }, + { + "epoch": 0.22439291582983384, + "grad_norm": 0.48001328110694885, + "learning_rate": 4.984088514050801e-06, + "loss": 0.6395, + "step": 2458 + }, + { + "epoch": 0.22448420668249042, + "grad_norm": 0.49244681000709534, + "learning_rate": 4.984075029791186e-06, + "loss": 0.6246, + "step": 2459 + }, + { + "epoch": 0.22457549753514697, + "grad_norm": 0.4843197464942932, + "learning_rate": 4.9840615398386e-06, + "loss": 0.5867, + "step": 2460 + }, + { + "epoch": 0.22466678838780355, + "grad_norm": 0.4483984708786011, + "learning_rate": 4.984048044193071e-06, + "loss": 0.6015, + "step": 2461 + }, + { + "epoch": 0.2247580792404601, + "grad_norm": 0.4617292881011963, + "learning_rate": 4.984034542854634e-06, + "loss": 0.5761, + "step": 2462 + }, + { + "epoch": 0.22484937009311667, + "grad_norm": 0.4502719044685364, + "learning_rate": 4.984021035823316e-06, + "loss": 0.5984, + "step": 2463 + }, + { + "epoch": 0.22494066094577322, + "grad_norm": 0.47131869196891785, + "learning_rate": 4.984007523099151e-06, + "loss": 0.6136, + "step": 2464 + }, + { + "epoch": 0.2250319517984298, + "grad_norm": 0.47175660729408264, + "learning_rate": 4.983994004682167e-06, + "loss": 0.549, + "step": 2465 + }, + { + "epoch": 0.22512324265108635, + "grad_norm": 0.4922371208667755, + "learning_rate": 4.983980480572397e-06, + "loss": 0.6241, + "step": 2466 + }, + { + "epoch": 0.22521453350374293, + "grad_norm": 0.48084667325019836, + "learning_rate": 4.983966950769872e-06, + "loss": 0.566, + "step": 2467 + }, + { + "epoch": 0.22530582435639948, + "grad_norm": 0.46100571751594543, + "learning_rate": 4.983953415274623e-06, + "loss": 0.6536, + "step": 2468 + }, + { + "epoch": 0.22539711520905606, + "grad_norm": 0.4785350561141968, + "learning_rate": 4.98393987408668e-06, + "loss": 0.5908, + "step": 2469 + }, + { + "epoch": 0.2254884060617126, + "grad_norm": 0.4717230796813965, + "learning_rate": 4.983926327206075e-06, + "loss": 0.6372, + "step": 2470 + }, + { + "epoch": 0.2255796969143692, + "grad_norm": 0.47066089510917664, + "learning_rate": 4.983912774632838e-06, + "loss": 0.5911, + "step": 2471 + }, + { + "epoch": 0.22567098776702574, + "grad_norm": 0.45517754554748535, + "learning_rate": 4.983899216367001e-06, + "loss": 0.6132, + "step": 2472 + }, + { + "epoch": 0.22576227861968232, + "grad_norm": 0.47339802980422974, + "learning_rate": 4.983885652408595e-06, + "loss": 0.5975, + "step": 2473 + }, + { + "epoch": 0.22585356947233887, + "grad_norm": 0.48036882281303406, + "learning_rate": 4.983872082757651e-06, + "loss": 0.6085, + "step": 2474 + }, + { + "epoch": 0.22594486032499544, + "grad_norm": 0.4446399509906769, + "learning_rate": 4.983858507414199e-06, + "loss": 0.5922, + "step": 2475 + }, + { + "epoch": 0.226036151177652, + "grad_norm": 0.4677897095680237, + "learning_rate": 4.9838449263782715e-06, + "loss": 0.6104, + "step": 2476 + }, + { + "epoch": 0.22612744203030857, + "grad_norm": 0.44393637776374817, + "learning_rate": 4.9838313396499e-06, + "loss": 0.6317, + "step": 2477 + }, + { + "epoch": 0.22621873288296512, + "grad_norm": 0.5126631259918213, + "learning_rate": 4.9838177472291145e-06, + "loss": 0.5524, + "step": 2478 + }, + { + "epoch": 0.2263100237356217, + "grad_norm": 0.440703421831131, + "learning_rate": 4.983804149115946e-06, + "loss": 0.6556, + "step": 2479 + }, + { + "epoch": 0.22640131458827825, + "grad_norm": 0.4539216160774231, + "learning_rate": 4.983790545310428e-06, + "loss": 0.6164, + "step": 2480 + }, + { + "epoch": 0.22649260544093483, + "grad_norm": 0.44335973262786865, + "learning_rate": 4.983776935812588e-06, + "loss": 0.6149, + "step": 2481 + }, + { + "epoch": 0.22658389629359138, + "grad_norm": 0.48273834586143494, + "learning_rate": 4.983763320622459e-06, + "loss": 0.619, + "step": 2482 + }, + { + "epoch": 0.22667518714624796, + "grad_norm": 0.49674421548843384, + "learning_rate": 4.983749699740072e-06, + "loss": 0.5565, + "step": 2483 + }, + { + "epoch": 0.2267664779989045, + "grad_norm": 0.49337446689605713, + "learning_rate": 4.9837360731654595e-06, + "loss": 0.5708, + "step": 2484 + }, + { + "epoch": 0.2268577688515611, + "grad_norm": 0.44441866874694824, + "learning_rate": 4.9837224408986515e-06, + "loss": 0.5949, + "step": 2485 + }, + { + "epoch": 0.22694905970421764, + "grad_norm": 0.4723318815231323, + "learning_rate": 4.9837088029396795e-06, + "loss": 0.6059, + "step": 2486 + }, + { + "epoch": 0.2270403505568742, + "grad_norm": 0.4593527019023895, + "learning_rate": 4.983695159288575e-06, + "loss": 0.5669, + "step": 2487 + }, + { + "epoch": 0.22713164140953077, + "grad_norm": 0.46581998467445374, + "learning_rate": 4.983681509945368e-06, + "loss": 0.6072, + "step": 2488 + }, + { + "epoch": 0.22722293226218732, + "grad_norm": 0.4566407799720764, + "learning_rate": 4.983667854910091e-06, + "loss": 0.6437, + "step": 2489 + }, + { + "epoch": 0.2273142231148439, + "grad_norm": 0.4669613838195801, + "learning_rate": 4.983654194182775e-06, + "loss": 0.6419, + "step": 2490 + }, + { + "epoch": 0.22740551396750044, + "grad_norm": 0.486337810754776, + "learning_rate": 4.983640527763452e-06, + "loss": 0.6185, + "step": 2491 + }, + { + "epoch": 0.22749680482015702, + "grad_norm": 0.4954524636268616, + "learning_rate": 4.983626855652152e-06, + "loss": 0.6129, + "step": 2492 + }, + { + "epoch": 0.22758809567281357, + "grad_norm": 0.4499495029449463, + "learning_rate": 4.983613177848907e-06, + "loss": 0.6315, + "step": 2493 + }, + { + "epoch": 0.22767938652547015, + "grad_norm": 0.43667036294937134, + "learning_rate": 4.983599494353749e-06, + "loss": 0.6381, + "step": 2494 + }, + { + "epoch": 0.2277706773781267, + "grad_norm": 0.45837557315826416, + "learning_rate": 4.983585805166709e-06, + "loss": 0.6027, + "step": 2495 + }, + { + "epoch": 0.22786196823078328, + "grad_norm": 0.4452093839645386, + "learning_rate": 4.9835721102878166e-06, + "loss": 0.6494, + "step": 2496 + }, + { + "epoch": 0.22795325908343983, + "grad_norm": 0.48938897252082825, + "learning_rate": 4.983558409717105e-06, + "loss": 0.6019, + "step": 2497 + }, + { + "epoch": 0.2280445499360964, + "grad_norm": 0.4669262170791626, + "learning_rate": 4.983544703454606e-06, + "loss": 0.5736, + "step": 2498 + }, + { + "epoch": 0.22813584078875296, + "grad_norm": 0.4343084990978241, + "learning_rate": 4.983530991500349e-06, + "loss": 0.6215, + "step": 2499 + }, + { + "epoch": 0.22822713164140954, + "grad_norm": 0.5038721561431885, + "learning_rate": 4.983517273854368e-06, + "loss": 0.6006, + "step": 2500 + }, + { + "epoch": 0.2283184224940661, + "grad_norm": 0.46979230642318726, + "learning_rate": 4.983503550516693e-06, + "loss": 0.6188, + "step": 2501 + }, + { + "epoch": 0.22840971334672266, + "grad_norm": 0.4579920768737793, + "learning_rate": 4.983489821487355e-06, + "loss": 0.6309, + "step": 2502 + }, + { + "epoch": 0.22850100419937922, + "grad_norm": 0.4413495361804962, + "learning_rate": 4.983476086766387e-06, + "loss": 0.6431, + "step": 2503 + }, + { + "epoch": 0.2285922950520358, + "grad_norm": 0.47935402393341064, + "learning_rate": 4.983462346353819e-06, + "loss": 0.6097, + "step": 2504 + }, + { + "epoch": 0.22868358590469234, + "grad_norm": 0.44243124127388, + "learning_rate": 4.983448600249683e-06, + "loss": 0.5739, + "step": 2505 + }, + { + "epoch": 0.22877487675734892, + "grad_norm": 0.4457767605781555, + "learning_rate": 4.9834348484540094e-06, + "loss": 0.6709, + "step": 2506 + }, + { + "epoch": 0.22886616761000547, + "grad_norm": 0.48083382844924927, + "learning_rate": 4.983421090966833e-06, + "loss": 0.6068, + "step": 2507 + }, + { + "epoch": 0.22895745846266205, + "grad_norm": 0.4629397988319397, + "learning_rate": 4.9834073277881815e-06, + "loss": 0.5957, + "step": 2508 + }, + { + "epoch": 0.2290487493153186, + "grad_norm": 0.5124772787094116, + "learning_rate": 4.983393558918088e-06, + "loss": 0.571, + "step": 2509 + }, + { + "epoch": 0.22914004016797518, + "grad_norm": 0.4336509108543396, + "learning_rate": 4.983379784356585e-06, + "loss": 0.6185, + "step": 2510 + }, + { + "epoch": 0.22923133102063173, + "grad_norm": 0.4736180007457733, + "learning_rate": 4.9833660041037034e-06, + "loss": 0.5943, + "step": 2511 + }, + { + "epoch": 0.2293226218732883, + "grad_norm": 0.49319159984588623, + "learning_rate": 4.983352218159474e-06, + "loss": 0.5857, + "step": 2512 + }, + { + "epoch": 0.22941391272594486, + "grad_norm": 0.4850059747695923, + "learning_rate": 4.983338426523929e-06, + "loss": 0.5907, + "step": 2513 + }, + { + "epoch": 0.22950520357860144, + "grad_norm": 0.46350327134132385, + "learning_rate": 4.983324629197101e-06, + "loss": 0.6644, + "step": 2514 + }, + { + "epoch": 0.22959649443125799, + "grad_norm": 0.4668329656124115, + "learning_rate": 4.983310826179019e-06, + "loss": 0.5814, + "step": 2515 + }, + { + "epoch": 0.22968778528391456, + "grad_norm": 0.45607316493988037, + "learning_rate": 4.983297017469717e-06, + "loss": 0.6234, + "step": 2516 + }, + { + "epoch": 0.22977907613657111, + "grad_norm": 0.4647156298160553, + "learning_rate": 4.983283203069227e-06, + "loss": 0.5775, + "step": 2517 + }, + { + "epoch": 0.2298703669892277, + "grad_norm": 0.45556360483169556, + "learning_rate": 4.983269382977578e-06, + "loss": 0.6092, + "step": 2518 + }, + { + "epoch": 0.22996165784188424, + "grad_norm": 0.47668784856796265, + "learning_rate": 4.983255557194804e-06, + "loss": 0.5492, + "step": 2519 + }, + { + "epoch": 0.23005294869454082, + "grad_norm": 0.45945945382118225, + "learning_rate": 4.983241725720936e-06, + "loss": 0.6198, + "step": 2520 + }, + { + "epoch": 0.23014423954719737, + "grad_norm": 0.5098994374275208, + "learning_rate": 4.983227888556005e-06, + "loss": 0.5956, + "step": 2521 + }, + { + "epoch": 0.23023553039985392, + "grad_norm": 0.44455984234809875, + "learning_rate": 4.983214045700044e-06, + "loss": 0.6298, + "step": 2522 + }, + { + "epoch": 0.2303268212525105, + "grad_norm": 0.44894734025001526, + "learning_rate": 4.983200197153083e-06, + "loss": 0.6109, + "step": 2523 + }, + { + "epoch": 0.23041811210516705, + "grad_norm": 0.46357840299606323, + "learning_rate": 4.983186342915156e-06, + "loss": 0.5134, + "step": 2524 + }, + { + "epoch": 0.23050940295782363, + "grad_norm": 0.4436038136482239, + "learning_rate": 4.9831724829862935e-06, + "loss": 0.656, + "step": 2525 + }, + { + "epoch": 0.23060069381048018, + "grad_norm": 0.5147343277931213, + "learning_rate": 4.983158617366527e-06, + "loss": 0.6275, + "step": 2526 + }, + { + "epoch": 0.23069198466313676, + "grad_norm": 0.45933058857917786, + "learning_rate": 4.983144746055888e-06, + "loss": 0.6163, + "step": 2527 + }, + { + "epoch": 0.2307832755157933, + "grad_norm": 0.46137747168540955, + "learning_rate": 4.98313086905441e-06, + "loss": 0.6176, + "step": 2528 + }, + { + "epoch": 0.23087456636844989, + "grad_norm": 0.4474332630634308, + "learning_rate": 4.983116986362123e-06, + "loss": 0.6073, + "step": 2529 + }, + { + "epoch": 0.23096585722110644, + "grad_norm": 0.5028336048126221, + "learning_rate": 4.9831030979790594e-06, + "loss": 0.5649, + "step": 2530 + }, + { + "epoch": 0.231057148073763, + "grad_norm": 0.49128833413124084, + "learning_rate": 4.983089203905252e-06, + "loss": 0.5969, + "step": 2531 + }, + { + "epoch": 0.23114843892641956, + "grad_norm": 0.5019619464874268, + "learning_rate": 4.983075304140731e-06, + "loss": 0.6179, + "step": 2532 + }, + { + "epoch": 0.23123972977907614, + "grad_norm": 0.4756677746772766, + "learning_rate": 4.983061398685529e-06, + "loss": 0.6004, + "step": 2533 + }, + { + "epoch": 0.2313310206317327, + "grad_norm": 0.48479965329170227, + "learning_rate": 4.983047487539678e-06, + "loss": 0.6415, + "step": 2534 + }, + { + "epoch": 0.23142231148438927, + "grad_norm": 0.48662903904914856, + "learning_rate": 4.98303357070321e-06, + "loss": 0.5839, + "step": 2535 + }, + { + "epoch": 0.23151360233704582, + "grad_norm": 0.4481215178966522, + "learning_rate": 4.983019648176156e-06, + "loss": 0.5947, + "step": 2536 + }, + { + "epoch": 0.2316048931897024, + "grad_norm": 0.45577555894851685, + "learning_rate": 4.9830057199585496e-06, + "loss": 0.5786, + "step": 2537 + }, + { + "epoch": 0.23169618404235895, + "grad_norm": 0.42452484369277954, + "learning_rate": 4.982991786050421e-06, + "loss": 0.6644, + "step": 2538 + }, + { + "epoch": 0.23178747489501553, + "grad_norm": 0.4569340646266937, + "learning_rate": 4.982977846451804e-06, + "loss": 0.6141, + "step": 2539 + }, + { + "epoch": 0.23187876574767208, + "grad_norm": 0.46981996297836304, + "learning_rate": 4.9829639011627275e-06, + "loss": 0.6089, + "step": 2540 + }, + { + "epoch": 0.23197005660032866, + "grad_norm": 0.4925045371055603, + "learning_rate": 4.982949950183227e-06, + "loss": 0.5785, + "step": 2541 + }, + { + "epoch": 0.2320613474529852, + "grad_norm": 0.47863566875457764, + "learning_rate": 4.982935993513332e-06, + "loss": 0.613, + "step": 2542 + }, + { + "epoch": 0.23215263830564178, + "grad_norm": 0.4465622901916504, + "learning_rate": 4.982922031153076e-06, + "loss": 0.6294, + "step": 2543 + }, + { + "epoch": 0.23224392915829833, + "grad_norm": 0.5277080535888672, + "learning_rate": 4.982908063102489e-06, + "loss": 0.5962, + "step": 2544 + }, + { + "epoch": 0.2323352200109549, + "grad_norm": 0.48280420899391174, + "learning_rate": 4.982894089361605e-06, + "loss": 0.6032, + "step": 2545 + }, + { + "epoch": 0.23242651086361146, + "grad_norm": 0.4669225513935089, + "learning_rate": 4.9828801099304554e-06, + "loss": 0.6078, + "step": 2546 + }, + { + "epoch": 0.23251780171626804, + "grad_norm": 0.479522705078125, + "learning_rate": 4.982866124809073e-06, + "loss": 0.5937, + "step": 2547 + }, + { + "epoch": 0.2326090925689246, + "grad_norm": 0.5128751993179321, + "learning_rate": 4.982852133997488e-06, + "loss": 0.6008, + "step": 2548 + }, + { + "epoch": 0.23270038342158117, + "grad_norm": 0.457802951335907, + "learning_rate": 4.982838137495733e-06, + "loss": 0.5865, + "step": 2549 + }, + { + "epoch": 0.23279167427423772, + "grad_norm": 0.4833679795265198, + "learning_rate": 4.982824135303842e-06, + "loss": 0.5836, + "step": 2550 + }, + { + "epoch": 0.2328829651268943, + "grad_norm": 0.46580246090888977, + "learning_rate": 4.982810127421844e-06, + "loss": 0.6161, + "step": 2551 + }, + { + "epoch": 0.23297425597955085, + "grad_norm": 0.500266432762146, + "learning_rate": 4.982796113849774e-06, + "loss": 0.6026, + "step": 2552 + }, + { + "epoch": 0.23306554683220743, + "grad_norm": 0.4800981283187866, + "learning_rate": 4.982782094587663e-06, + "loss": 0.6011, + "step": 2553 + }, + { + "epoch": 0.23315683768486398, + "grad_norm": 0.46933621168136597, + "learning_rate": 4.982768069635542e-06, + "loss": 0.5859, + "step": 2554 + }, + { + "epoch": 0.23324812853752053, + "grad_norm": 0.4861084818840027, + "learning_rate": 4.982754038993445e-06, + "loss": 0.5291, + "step": 2555 + }, + { + "epoch": 0.2333394193901771, + "grad_norm": 0.48754093050956726, + "learning_rate": 4.982740002661403e-06, + "loss": 0.5981, + "step": 2556 + }, + { + "epoch": 0.23343071024283366, + "grad_norm": 0.48384934663772583, + "learning_rate": 4.982725960639449e-06, + "loss": 0.6586, + "step": 2557 + }, + { + "epoch": 0.23352200109549023, + "grad_norm": 0.48981502652168274, + "learning_rate": 4.9827119129276135e-06, + "loss": 0.5649, + "step": 2558 + }, + { + "epoch": 0.23361329194814678, + "grad_norm": 0.4676945209503174, + "learning_rate": 4.98269785952593e-06, + "loss": 0.5911, + "step": 2559 + }, + { + "epoch": 0.23370458280080336, + "grad_norm": 0.4664284288883209, + "learning_rate": 4.982683800434432e-06, + "loss": 0.6048, + "step": 2560 + }, + { + "epoch": 0.2337958736534599, + "grad_norm": 0.4834744930267334, + "learning_rate": 4.9826697356531486e-06, + "loss": 0.6081, + "step": 2561 + }, + { + "epoch": 0.2338871645061165, + "grad_norm": 0.47090262174606323, + "learning_rate": 4.982655665182115e-06, + "loss": 0.6087, + "step": 2562 + }, + { + "epoch": 0.23397845535877304, + "grad_norm": 0.46303674578666687, + "learning_rate": 4.982641589021361e-06, + "loss": 0.6049, + "step": 2563 + }, + { + "epoch": 0.23406974621142962, + "grad_norm": 0.48107749223709106, + "learning_rate": 4.9826275071709205e-06, + "loss": 0.6413, + "step": 2564 + }, + { + "epoch": 0.23416103706408617, + "grad_norm": 0.4739375412464142, + "learning_rate": 4.982613419630825e-06, + "loss": 0.6362, + "step": 2565 + }, + { + "epoch": 0.23425232791674275, + "grad_norm": 0.467781126499176, + "learning_rate": 4.982599326401107e-06, + "loss": 0.6264, + "step": 2566 + }, + { + "epoch": 0.2343436187693993, + "grad_norm": 0.4975545108318329, + "learning_rate": 4.9825852274818e-06, + "loss": 0.5972, + "step": 2567 + }, + { + "epoch": 0.23443490962205588, + "grad_norm": 0.4513399600982666, + "learning_rate": 4.982571122872935e-06, + "loss": 0.6565, + "step": 2568 + }, + { + "epoch": 0.23452620047471243, + "grad_norm": 0.4883279800415039, + "learning_rate": 4.982557012574544e-06, + "loss": 0.5937, + "step": 2569 + }, + { + "epoch": 0.234617491327369, + "grad_norm": 0.48931267857551575, + "learning_rate": 4.982542896586659e-06, + "loss": 0.5997, + "step": 2570 + }, + { + "epoch": 0.23470878218002555, + "grad_norm": 0.4776861369609833, + "learning_rate": 4.982528774909314e-06, + "loss": 0.6121, + "step": 2571 + }, + { + "epoch": 0.23480007303268213, + "grad_norm": 0.46214112639427185, + "learning_rate": 4.982514647542541e-06, + "loss": 0.6158, + "step": 2572 + }, + { + "epoch": 0.23489136388533868, + "grad_norm": 0.47372931241989136, + "learning_rate": 4.9825005144863715e-06, + "loss": 0.6101, + "step": 2573 + }, + { + "epoch": 0.23498265473799526, + "grad_norm": 0.4434230625629425, + "learning_rate": 4.982486375740838e-06, + "loss": 0.5789, + "step": 2574 + }, + { + "epoch": 0.2350739455906518, + "grad_norm": 0.4797024428844452, + "learning_rate": 4.982472231305974e-06, + "loss": 0.5595, + "step": 2575 + }, + { + "epoch": 0.2351652364433084, + "grad_norm": 0.4714006781578064, + "learning_rate": 4.9824580811818106e-06, + "loss": 0.6359, + "step": 2576 + }, + { + "epoch": 0.23525652729596494, + "grad_norm": 0.46232032775878906, + "learning_rate": 4.982443925368381e-06, + "loss": 0.6372, + "step": 2577 + }, + { + "epoch": 0.23534781814862152, + "grad_norm": 0.4424479901790619, + "learning_rate": 4.982429763865717e-06, + "loss": 0.6063, + "step": 2578 + }, + { + "epoch": 0.23543910900127807, + "grad_norm": 0.45510271191596985, + "learning_rate": 4.982415596673852e-06, + "loss": 0.6526, + "step": 2579 + }, + { + "epoch": 0.23553039985393465, + "grad_norm": 0.4563710391521454, + "learning_rate": 4.982401423792818e-06, + "loss": 0.6079, + "step": 2580 + }, + { + "epoch": 0.2356216907065912, + "grad_norm": 0.43983370065689087, + "learning_rate": 4.982387245222647e-06, + "loss": 0.616, + "step": 2581 + }, + { + "epoch": 0.23571298155924778, + "grad_norm": 0.45701736211776733, + "learning_rate": 4.982373060963372e-06, + "loss": 0.6065, + "step": 2582 + }, + { + "epoch": 0.23580427241190433, + "grad_norm": 0.4766300916671753, + "learning_rate": 4.982358871015026e-06, + "loss": 0.6127, + "step": 2583 + }, + { + "epoch": 0.2358955632645609, + "grad_norm": 0.4586852490901947, + "learning_rate": 4.98234467537764e-06, + "loss": 0.6367, + "step": 2584 + }, + { + "epoch": 0.23598685411721745, + "grad_norm": 0.4521249532699585, + "learning_rate": 4.982330474051248e-06, + "loss": 0.6373, + "step": 2585 + }, + { + "epoch": 0.23607814496987403, + "grad_norm": 0.4739885926246643, + "learning_rate": 4.982316267035882e-06, + "loss": 0.6285, + "step": 2586 + }, + { + "epoch": 0.23616943582253058, + "grad_norm": 0.49994024634361267, + "learning_rate": 4.982302054331575e-06, + "loss": 0.6077, + "step": 2587 + }, + { + "epoch": 0.23626072667518713, + "grad_norm": 0.45876094698905945, + "learning_rate": 4.982287835938359e-06, + "loss": 0.6267, + "step": 2588 + }, + { + "epoch": 0.2363520175278437, + "grad_norm": 0.48949816823005676, + "learning_rate": 4.982273611856266e-06, + "loss": 0.5753, + "step": 2589 + }, + { + "epoch": 0.23644330838050026, + "grad_norm": 0.46696609258651733, + "learning_rate": 4.982259382085329e-06, + "loss": 0.6001, + "step": 2590 + }, + { + "epoch": 0.23653459923315684, + "grad_norm": 0.47598546743392944, + "learning_rate": 4.9822451466255825e-06, + "loss": 0.5604, + "step": 2591 + }, + { + "epoch": 0.2366258900858134, + "grad_norm": 0.46942219138145447, + "learning_rate": 4.982230905477057e-06, + "loss": 0.6161, + "step": 2592 + }, + { + "epoch": 0.23671718093846997, + "grad_norm": 0.49779170751571655, + "learning_rate": 4.982216658639786e-06, + "loss": 0.5726, + "step": 2593 + }, + { + "epoch": 0.23680847179112652, + "grad_norm": 0.4688427150249481, + "learning_rate": 4.9822024061138005e-06, + "loss": 0.6122, + "step": 2594 + }, + { + "epoch": 0.2368997626437831, + "grad_norm": 0.48561087250709534, + "learning_rate": 4.9821881478991354e-06, + "loss": 0.6496, + "step": 2595 + }, + { + "epoch": 0.23699105349643965, + "grad_norm": 0.4402487874031067, + "learning_rate": 4.9821738839958225e-06, + "loss": 0.6315, + "step": 2596 + }, + { + "epoch": 0.23708234434909622, + "grad_norm": 0.463993102312088, + "learning_rate": 4.982159614403895e-06, + "loss": 0.5954, + "step": 2597 + }, + { + "epoch": 0.23717363520175277, + "grad_norm": 0.5022472143173218, + "learning_rate": 4.982145339123384e-06, + "loss": 0.5642, + "step": 2598 + }, + { + "epoch": 0.23726492605440935, + "grad_norm": 0.46625983715057373, + "learning_rate": 4.982131058154324e-06, + "loss": 0.6159, + "step": 2599 + }, + { + "epoch": 0.2373562169070659, + "grad_norm": 0.4790540337562561, + "learning_rate": 4.982116771496748e-06, + "loss": 0.616, + "step": 2600 + }, + { + "epoch": 0.23744750775972248, + "grad_norm": 0.4806039035320282, + "learning_rate": 4.982102479150686e-06, + "loss": 0.6125, + "step": 2601 + }, + { + "epoch": 0.23753879861237903, + "grad_norm": 0.47973719239234924, + "learning_rate": 4.9820881811161735e-06, + "loss": 0.5964, + "step": 2602 + }, + { + "epoch": 0.2376300894650356, + "grad_norm": 0.4876379072666168, + "learning_rate": 4.982073877393242e-06, + "loss": 0.605, + "step": 2603 + }, + { + "epoch": 0.23772138031769216, + "grad_norm": 0.47484835982322693, + "learning_rate": 4.982059567981924e-06, + "loss": 0.626, + "step": 2604 + }, + { + "epoch": 0.23781267117034874, + "grad_norm": 0.45311373472213745, + "learning_rate": 4.982045252882254e-06, + "loss": 0.6184, + "step": 2605 + }, + { + "epoch": 0.2379039620230053, + "grad_norm": 0.5017043352127075, + "learning_rate": 4.982030932094264e-06, + "loss": 0.5982, + "step": 2606 + }, + { + "epoch": 0.23799525287566187, + "grad_norm": 0.47343847155570984, + "learning_rate": 4.982016605617985e-06, + "loss": 0.6041, + "step": 2607 + }, + { + "epoch": 0.23808654372831842, + "grad_norm": 0.5300868153572083, + "learning_rate": 4.982002273453452e-06, + "loss": 0.5866, + "step": 2608 + }, + { + "epoch": 0.238177834580975, + "grad_norm": 0.45700106024742126, + "learning_rate": 4.981987935600697e-06, + "loss": 0.6037, + "step": 2609 + }, + { + "epoch": 0.23826912543363155, + "grad_norm": 0.47604843974113464, + "learning_rate": 4.981973592059754e-06, + "loss": 0.5991, + "step": 2610 + }, + { + "epoch": 0.23836041628628812, + "grad_norm": 0.49107834696769714, + "learning_rate": 4.981959242830654e-06, + "loss": 0.5662, + "step": 2611 + }, + { + "epoch": 0.23845170713894467, + "grad_norm": 0.46815356612205505, + "learning_rate": 4.9819448879134314e-06, + "loss": 0.6135, + "step": 2612 + }, + { + "epoch": 0.23854299799160125, + "grad_norm": 0.4800657629966736, + "learning_rate": 4.981930527308118e-06, + "loss": 0.5797, + "step": 2613 + }, + { + "epoch": 0.2386342888442578, + "grad_norm": 0.4848173260688782, + "learning_rate": 4.981916161014747e-06, + "loss": 0.5974, + "step": 2614 + }, + { + "epoch": 0.23872557969691438, + "grad_norm": 0.44683340191841125, + "learning_rate": 4.981901789033352e-06, + "loss": 0.6473, + "step": 2615 + }, + { + "epoch": 0.23881687054957093, + "grad_norm": 0.4513581693172455, + "learning_rate": 4.981887411363966e-06, + "loss": 0.6219, + "step": 2616 + }, + { + "epoch": 0.2389081614022275, + "grad_norm": 0.4627458453178406, + "learning_rate": 4.981873028006621e-06, + "loss": 0.6138, + "step": 2617 + }, + { + "epoch": 0.23899945225488406, + "grad_norm": 0.4912053346633911, + "learning_rate": 4.98185863896135e-06, + "loss": 0.5949, + "step": 2618 + }, + { + "epoch": 0.23909074310754064, + "grad_norm": 0.4670160412788391, + "learning_rate": 4.981844244228187e-06, + "loss": 0.6414, + "step": 2619 + }, + { + "epoch": 0.2391820339601972, + "grad_norm": 0.504723310470581, + "learning_rate": 4.981829843807163e-06, + "loss": 0.5924, + "step": 2620 + }, + { + "epoch": 0.23927332481285374, + "grad_norm": 0.5002500414848328, + "learning_rate": 4.981815437698314e-06, + "loss": 0.5751, + "step": 2621 + }, + { + "epoch": 0.23936461566551032, + "grad_norm": 0.44758856296539307, + "learning_rate": 4.981801025901671e-06, + "loss": 0.5917, + "step": 2622 + }, + { + "epoch": 0.23945590651816687, + "grad_norm": 0.4640342593193054, + "learning_rate": 4.981786608417267e-06, + "loss": 0.6238, + "step": 2623 + }, + { + "epoch": 0.23954719737082344, + "grad_norm": 0.4835973083972931, + "learning_rate": 4.981772185245135e-06, + "loss": 0.5762, + "step": 2624 + }, + { + "epoch": 0.23963848822348, + "grad_norm": 0.44511011242866516, + "learning_rate": 4.981757756385309e-06, + "loss": 0.6473, + "step": 2625 + }, + { + "epoch": 0.23972977907613657, + "grad_norm": 0.48572084307670593, + "learning_rate": 4.981743321837822e-06, + "loss": 0.5817, + "step": 2626 + }, + { + "epoch": 0.23982106992879312, + "grad_norm": 0.4634771943092346, + "learning_rate": 4.9817288816027064e-06, + "loss": 0.6111, + "step": 2627 + }, + { + "epoch": 0.2399123607814497, + "grad_norm": 0.4648883640766144, + "learning_rate": 4.981714435679995e-06, + "loss": 0.6127, + "step": 2628 + }, + { + "epoch": 0.24000365163410625, + "grad_norm": 0.5064094066619873, + "learning_rate": 4.981699984069722e-06, + "loss": 0.5645, + "step": 2629 + }, + { + "epoch": 0.24009494248676283, + "grad_norm": 0.46995219588279724, + "learning_rate": 4.98168552677192e-06, + "loss": 0.6151, + "step": 2630 + }, + { + "epoch": 0.24018623333941938, + "grad_norm": 0.47547706961631775, + "learning_rate": 4.981671063786622e-06, + "loss": 0.6355, + "step": 2631 + }, + { + "epoch": 0.24027752419207596, + "grad_norm": 0.4779370427131653, + "learning_rate": 4.981656595113862e-06, + "loss": 0.5905, + "step": 2632 + }, + { + "epoch": 0.2403688150447325, + "grad_norm": 0.48957642912864685, + "learning_rate": 4.9816421207536715e-06, + "loss": 0.6042, + "step": 2633 + }, + { + "epoch": 0.2404601058973891, + "grad_norm": 0.4483500123023987, + "learning_rate": 4.981627640706084e-06, + "loss": 0.5926, + "step": 2634 + }, + { + "epoch": 0.24055139675004564, + "grad_norm": 0.4871062636375427, + "learning_rate": 4.981613154971134e-06, + "loss": 0.5507, + "step": 2635 + }, + { + "epoch": 0.24064268760270222, + "grad_norm": 0.46715250611305237, + "learning_rate": 4.981598663548854e-06, + "loss": 0.6223, + "step": 2636 + }, + { + "epoch": 0.24073397845535877, + "grad_norm": 0.4950784146785736, + "learning_rate": 4.9815841664392775e-06, + "loss": 0.5969, + "step": 2637 + }, + { + "epoch": 0.24082526930801534, + "grad_norm": 0.4727294147014618, + "learning_rate": 4.981569663642437e-06, + "loss": 0.6232, + "step": 2638 + }, + { + "epoch": 0.2409165601606719, + "grad_norm": 0.48540905117988586, + "learning_rate": 4.981555155158366e-06, + "loss": 0.5975, + "step": 2639 + }, + { + "epoch": 0.24100785101332847, + "grad_norm": 0.5104721188545227, + "learning_rate": 4.981540640987098e-06, + "loss": 0.6041, + "step": 2640 + }, + { + "epoch": 0.24109914186598502, + "grad_norm": 0.45431211590766907, + "learning_rate": 4.981526121128667e-06, + "loss": 0.616, + "step": 2641 + }, + { + "epoch": 0.2411904327186416, + "grad_norm": 0.4304794371128082, + "learning_rate": 4.981511595583104e-06, + "loss": 0.6568, + "step": 2642 + }, + { + "epoch": 0.24128172357129815, + "grad_norm": 0.4576108157634735, + "learning_rate": 4.981497064350445e-06, + "loss": 0.6576, + "step": 2643 + }, + { + "epoch": 0.24137301442395473, + "grad_norm": 0.4298950433731079, + "learning_rate": 4.981482527430721e-06, + "loss": 0.6527, + "step": 2644 + }, + { + "epoch": 0.24146430527661128, + "grad_norm": 0.5022599101066589, + "learning_rate": 4.981467984823967e-06, + "loss": 0.601, + "step": 2645 + }, + { + "epoch": 0.24155559612926786, + "grad_norm": 0.4854843318462372, + "learning_rate": 4.981453436530215e-06, + "loss": 0.6078, + "step": 2646 + }, + { + "epoch": 0.2416468869819244, + "grad_norm": 0.46673446893692017, + "learning_rate": 4.9814388825495e-06, + "loss": 0.6047, + "step": 2647 + }, + { + "epoch": 0.241738177834581, + "grad_norm": 0.4621797204017639, + "learning_rate": 4.981424322881854e-06, + "loss": 0.5996, + "step": 2648 + }, + { + "epoch": 0.24182946868723754, + "grad_norm": 0.47425517439842224, + "learning_rate": 4.98140975752731e-06, + "loss": 0.5873, + "step": 2649 + }, + { + "epoch": 0.24192075953989411, + "grad_norm": 0.440623939037323, + "learning_rate": 4.9813951864859025e-06, + "loss": 0.6233, + "step": 2650 + }, + { + "epoch": 0.24201205039255066, + "grad_norm": 0.46107080578804016, + "learning_rate": 4.981380609757665e-06, + "loss": 0.6246, + "step": 2651 + }, + { + "epoch": 0.24210334124520724, + "grad_norm": 0.4313350021839142, + "learning_rate": 4.98136602734263e-06, + "loss": 0.5888, + "step": 2652 + }, + { + "epoch": 0.2421946320978638, + "grad_norm": 0.4669926166534424, + "learning_rate": 4.981351439240831e-06, + "loss": 0.643, + "step": 2653 + }, + { + "epoch": 0.24228592295052037, + "grad_norm": 0.4762051999568939, + "learning_rate": 4.981336845452303e-06, + "loss": 0.586, + "step": 2654 + }, + { + "epoch": 0.24237721380317692, + "grad_norm": 0.4643552303314209, + "learning_rate": 4.981322245977076e-06, + "loss": 0.5985, + "step": 2655 + }, + { + "epoch": 0.24246850465583347, + "grad_norm": 0.48231998085975647, + "learning_rate": 4.981307640815187e-06, + "loss": 0.6101, + "step": 2656 + }, + { + "epoch": 0.24255979550849005, + "grad_norm": 0.4856661558151245, + "learning_rate": 4.981293029966669e-06, + "loss": 0.5785, + "step": 2657 + }, + { + "epoch": 0.2426510863611466, + "grad_norm": 0.5053586959838867, + "learning_rate": 4.9812784134315535e-06, + "loss": 0.6159, + "step": 2658 + }, + { + "epoch": 0.24274237721380318, + "grad_norm": 0.4666821360588074, + "learning_rate": 4.981263791209876e-06, + "loss": 0.5946, + "step": 2659 + }, + { + "epoch": 0.24283366806645973, + "grad_norm": 0.47961848974227905, + "learning_rate": 4.981249163301668e-06, + "loss": 0.5696, + "step": 2660 + }, + { + "epoch": 0.2429249589191163, + "grad_norm": 0.46640706062316895, + "learning_rate": 4.981234529706965e-06, + "loss": 0.6068, + "step": 2661 + }, + { + "epoch": 0.24301624977177286, + "grad_norm": 0.4648912250995636, + "learning_rate": 4.981219890425799e-06, + "loss": 0.5807, + "step": 2662 + }, + { + "epoch": 0.24310754062442944, + "grad_norm": 0.43925294280052185, + "learning_rate": 4.981205245458205e-06, + "loss": 0.5986, + "step": 2663 + }, + { + "epoch": 0.24319883147708599, + "grad_norm": 0.5103050470352173, + "learning_rate": 4.981190594804215e-06, + "loss": 0.5729, + "step": 2664 + }, + { + "epoch": 0.24329012232974256, + "grad_norm": 0.4456813931465149, + "learning_rate": 4.981175938463864e-06, + "loss": 0.6118, + "step": 2665 + }, + { + "epoch": 0.24338141318239911, + "grad_norm": 0.45549625158309937, + "learning_rate": 4.981161276437185e-06, + "loss": 0.6494, + "step": 2666 + }, + { + "epoch": 0.2434727040350557, + "grad_norm": 0.4654673933982849, + "learning_rate": 4.981146608724211e-06, + "loss": 0.5689, + "step": 2667 + }, + { + "epoch": 0.24356399488771224, + "grad_norm": 0.5233715176582336, + "learning_rate": 4.981131935324977e-06, + "loss": 0.5849, + "step": 2668 + }, + { + "epoch": 0.24365528574036882, + "grad_norm": 0.4869369864463806, + "learning_rate": 4.981117256239515e-06, + "loss": 0.6028, + "step": 2669 + }, + { + "epoch": 0.24374657659302537, + "grad_norm": 0.48629263043403625, + "learning_rate": 4.981102571467859e-06, + "loss": 0.6353, + "step": 2670 + }, + { + "epoch": 0.24383786744568195, + "grad_norm": 0.4758744239807129, + "learning_rate": 4.981087881010045e-06, + "loss": 0.6251, + "step": 2671 + }, + { + "epoch": 0.2439291582983385, + "grad_norm": 0.5063993334770203, + "learning_rate": 4.981073184866104e-06, + "loss": 0.6169, + "step": 2672 + }, + { + "epoch": 0.24402044915099508, + "grad_norm": 0.49379032850265503, + "learning_rate": 4.98105848303607e-06, + "loss": 0.6147, + "step": 2673 + }, + { + "epoch": 0.24411174000365163, + "grad_norm": 0.4869227707386017, + "learning_rate": 4.981043775519977e-06, + "loss": 0.6158, + "step": 2674 + }, + { + "epoch": 0.2442030308563082, + "grad_norm": 0.44843944907188416, + "learning_rate": 4.981029062317859e-06, + "loss": 0.6368, + "step": 2675 + }, + { + "epoch": 0.24429432170896476, + "grad_norm": 0.4814443290233612, + "learning_rate": 4.98101434342975e-06, + "loss": 0.6162, + "step": 2676 + }, + { + "epoch": 0.24438561256162133, + "grad_norm": 0.5171017050743103, + "learning_rate": 4.980999618855683e-06, + "loss": 0.5707, + "step": 2677 + }, + { + "epoch": 0.24447690341427789, + "grad_norm": 0.4350207448005676, + "learning_rate": 4.9809848885956925e-06, + "loss": 0.593, + "step": 2678 + }, + { + "epoch": 0.24456819426693446, + "grad_norm": 0.4857405722141266, + "learning_rate": 4.980970152649812e-06, + "loss": 0.5862, + "step": 2679 + }, + { + "epoch": 0.244659485119591, + "grad_norm": 0.46195188164711, + "learning_rate": 4.980955411018074e-06, + "loss": 0.6259, + "step": 2680 + }, + { + "epoch": 0.2447507759722476, + "grad_norm": 0.5213157534599304, + "learning_rate": 4.980940663700514e-06, + "loss": 0.5825, + "step": 2681 + }, + { + "epoch": 0.24484206682490414, + "grad_norm": 0.4479157626628876, + "learning_rate": 4.980925910697165e-06, + "loss": 0.6167, + "step": 2682 + }, + { + "epoch": 0.24493335767756072, + "grad_norm": 0.4824899733066559, + "learning_rate": 4.980911152008062e-06, + "loss": 0.5993, + "step": 2683 + }, + { + "epoch": 0.24502464853021727, + "grad_norm": 0.47077563405036926, + "learning_rate": 4.9808963876332365e-06, + "loss": 0.6066, + "step": 2684 + }, + { + "epoch": 0.24511593938287385, + "grad_norm": 0.47757869958877563, + "learning_rate": 4.9808816175727245e-06, + "loss": 0.6125, + "step": 2685 + }, + { + "epoch": 0.2452072302355304, + "grad_norm": 0.46060824394226074, + "learning_rate": 4.980866841826558e-06, + "loss": 0.6237, + "step": 2686 + }, + { + "epoch": 0.24529852108818698, + "grad_norm": 0.562822699546814, + "learning_rate": 4.980852060394773e-06, + "loss": 0.5247, + "step": 2687 + }, + { + "epoch": 0.24538981194084353, + "grad_norm": 0.48997971415519714, + "learning_rate": 4.980837273277401e-06, + "loss": 0.6176, + "step": 2688 + }, + { + "epoch": 0.24548110279350008, + "grad_norm": 0.49483343958854675, + "learning_rate": 4.980822480474478e-06, + "loss": 0.5797, + "step": 2689 + }, + { + "epoch": 0.24557239364615666, + "grad_norm": 0.43976110219955444, + "learning_rate": 4.980807681986037e-06, + "loss": 0.5951, + "step": 2690 + }, + { + "epoch": 0.2456636844988132, + "grad_norm": 0.481682687997818, + "learning_rate": 4.980792877812112e-06, + "loss": 0.6156, + "step": 2691 + }, + { + "epoch": 0.24575497535146978, + "grad_norm": 0.46779531240463257, + "learning_rate": 4.980778067952736e-06, + "loss": 0.6235, + "step": 2692 + }, + { + "epoch": 0.24584626620412633, + "grad_norm": 0.523402750492096, + "learning_rate": 4.980763252407945e-06, + "loss": 0.5664, + "step": 2693 + }, + { + "epoch": 0.2459375570567829, + "grad_norm": 0.46351462602615356, + "learning_rate": 4.980748431177771e-06, + "loss": 0.5887, + "step": 2694 + }, + { + "epoch": 0.24602884790943946, + "grad_norm": 0.4906781315803528, + "learning_rate": 4.980733604262249e-06, + "loss": 0.6417, + "step": 2695 + }, + { + "epoch": 0.24612013876209604, + "grad_norm": 0.46858078241348267, + "learning_rate": 4.9807187716614125e-06, + "loss": 0.6196, + "step": 2696 + }, + { + "epoch": 0.2462114296147526, + "grad_norm": 0.4952024519443512, + "learning_rate": 4.980703933375296e-06, + "loss": 0.6038, + "step": 2697 + }, + { + "epoch": 0.24630272046740917, + "grad_norm": 0.5017239451408386, + "learning_rate": 4.980689089403933e-06, + "loss": 0.5831, + "step": 2698 + }, + { + "epoch": 0.24639401132006572, + "grad_norm": 0.47698068618774414, + "learning_rate": 4.980674239747358e-06, + "loss": 0.637, + "step": 2699 + }, + { + "epoch": 0.2464853021727223, + "grad_norm": 0.4729108214378357, + "learning_rate": 4.980659384405605e-06, + "loss": 0.6356, + "step": 2700 + }, + { + "epoch": 0.24657659302537885, + "grad_norm": 0.474915087223053, + "learning_rate": 4.980644523378707e-06, + "loss": 0.6281, + "step": 2701 + }, + { + "epoch": 0.24666788387803543, + "grad_norm": 0.479140043258667, + "learning_rate": 4.980629656666699e-06, + "loss": 0.5747, + "step": 2702 + }, + { + "epoch": 0.24675917473069198, + "grad_norm": 0.46632322669029236, + "learning_rate": 4.980614784269615e-06, + "loss": 0.6111, + "step": 2703 + }, + { + "epoch": 0.24685046558334856, + "grad_norm": 0.4565157890319824, + "learning_rate": 4.9805999061874896e-06, + "loss": 0.6122, + "step": 2704 + }, + { + "epoch": 0.2469417564360051, + "grad_norm": 0.47374340891838074, + "learning_rate": 4.980585022420356e-06, + "loss": 0.6103, + "step": 2705 + }, + { + "epoch": 0.24703304728866168, + "grad_norm": 0.49034401774406433, + "learning_rate": 4.980570132968248e-06, + "loss": 0.611, + "step": 2706 + }, + { + "epoch": 0.24712433814131823, + "grad_norm": 0.5191422700881958, + "learning_rate": 4.980555237831201e-06, + "loss": 0.5736, + "step": 2707 + }, + { + "epoch": 0.2472156289939748, + "grad_norm": 0.48533520102500916, + "learning_rate": 4.980540337009249e-06, + "loss": 0.6054, + "step": 2708 + }, + { + "epoch": 0.24730691984663136, + "grad_norm": 0.47579333186149597, + "learning_rate": 4.9805254305024245e-06, + "loss": 0.6186, + "step": 2709 + }, + { + "epoch": 0.24739821069928794, + "grad_norm": 0.5001232624053955, + "learning_rate": 4.980510518310763e-06, + "loss": 0.5883, + "step": 2710 + }, + { + "epoch": 0.2474895015519445, + "grad_norm": 0.5023342370986938, + "learning_rate": 4.980495600434299e-06, + "loss": 0.5464, + "step": 2711 + }, + { + "epoch": 0.24758079240460107, + "grad_norm": 0.4847196638584137, + "learning_rate": 4.980480676873066e-06, + "loss": 0.5614, + "step": 2712 + }, + { + "epoch": 0.24767208325725762, + "grad_norm": 0.47265005111694336, + "learning_rate": 4.980465747627099e-06, + "loss": 0.5892, + "step": 2713 + }, + { + "epoch": 0.2477633741099142, + "grad_norm": 0.48527511954307556, + "learning_rate": 4.98045081269643e-06, + "loss": 0.6016, + "step": 2714 + }, + { + "epoch": 0.24785466496257075, + "grad_norm": 0.473994642496109, + "learning_rate": 4.980435872081096e-06, + "loss": 0.6441, + "step": 2715 + }, + { + "epoch": 0.24794595581522733, + "grad_norm": 0.4792456328868866, + "learning_rate": 4.98042092578113e-06, + "loss": 0.5882, + "step": 2716 + }, + { + "epoch": 0.24803724666788388, + "grad_norm": 0.4423483908176422, + "learning_rate": 4.980405973796566e-06, + "loss": 0.6385, + "step": 2717 + }, + { + "epoch": 0.24812853752054045, + "grad_norm": 0.5016661882400513, + "learning_rate": 4.980391016127438e-06, + "loss": 0.5844, + "step": 2718 + }, + { + "epoch": 0.248219828373197, + "grad_norm": 0.5014410018920898, + "learning_rate": 4.980376052773782e-06, + "loss": 0.5814, + "step": 2719 + }, + { + "epoch": 0.24831111922585358, + "grad_norm": 0.48711419105529785, + "learning_rate": 4.9803610837356305e-06, + "loss": 0.6121, + "step": 2720 + }, + { + "epoch": 0.24840241007851013, + "grad_norm": 0.4858604967594147, + "learning_rate": 4.980346109013019e-06, + "loss": 0.6102, + "step": 2721 + }, + { + "epoch": 0.24849370093116668, + "grad_norm": 0.5012629628181458, + "learning_rate": 4.980331128605981e-06, + "loss": 0.563, + "step": 2722 + }, + { + "epoch": 0.24858499178382326, + "grad_norm": 0.4528127610683441, + "learning_rate": 4.98031614251455e-06, + "loss": 0.6016, + "step": 2723 + }, + { + "epoch": 0.2486762826364798, + "grad_norm": 0.48389551043510437, + "learning_rate": 4.980301150738762e-06, + "loss": 0.552, + "step": 2724 + }, + { + "epoch": 0.2487675734891364, + "grad_norm": 0.44683873653411865, + "learning_rate": 4.9802861532786514e-06, + "loss": 0.6028, + "step": 2725 + }, + { + "epoch": 0.24885886434179294, + "grad_norm": 0.49379292130470276, + "learning_rate": 4.980271150134252e-06, + "loss": 0.6155, + "step": 2726 + }, + { + "epoch": 0.24895015519444952, + "grad_norm": 0.48521098494529724, + "learning_rate": 4.980256141305598e-06, + "loss": 0.5992, + "step": 2727 + }, + { + "epoch": 0.24904144604710607, + "grad_norm": 0.4590190351009369, + "learning_rate": 4.980241126792724e-06, + "loss": 0.5809, + "step": 2728 + }, + { + "epoch": 0.24913273689976265, + "grad_norm": 0.4804592728614807, + "learning_rate": 4.980226106595665e-06, + "loss": 0.6102, + "step": 2729 + }, + { + "epoch": 0.2492240277524192, + "grad_norm": 0.4404590129852295, + "learning_rate": 4.980211080714453e-06, + "loss": 0.6505, + "step": 2730 + }, + { + "epoch": 0.24931531860507578, + "grad_norm": 0.45907068252563477, + "learning_rate": 4.980196049149125e-06, + "loss": 0.6178, + "step": 2731 + }, + { + "epoch": 0.24940660945773233, + "grad_norm": 0.4435358941555023, + "learning_rate": 4.980181011899715e-06, + "loss": 0.6182, + "step": 2732 + }, + { + "epoch": 0.2494979003103889, + "grad_norm": 0.44632279872894287, + "learning_rate": 4.9801659689662575e-06, + "loss": 0.6445, + "step": 2733 + }, + { + "epoch": 0.24958919116304545, + "grad_norm": 0.45332542061805725, + "learning_rate": 4.980150920348786e-06, + "loss": 0.649, + "step": 2734 + }, + { + "epoch": 0.24968048201570203, + "grad_norm": 0.46071597933769226, + "learning_rate": 4.980135866047336e-06, + "loss": 0.6406, + "step": 2735 + }, + { + "epoch": 0.24977177286835858, + "grad_norm": 0.45276519656181335, + "learning_rate": 4.980120806061941e-06, + "loss": 0.637, + "step": 2736 + }, + { + "epoch": 0.24986306372101516, + "grad_norm": 0.48436400294303894, + "learning_rate": 4.980105740392637e-06, + "loss": 0.5875, + "step": 2737 + }, + { + "epoch": 0.2499543545736717, + "grad_norm": 0.44479265809059143, + "learning_rate": 4.980090669039457e-06, + "loss": 0.5923, + "step": 2738 + }, + { + "epoch": 0.2500456454263283, + "grad_norm": 0.46016693115234375, + "learning_rate": 4.9800755920024364e-06, + "loss": 0.6328, + "step": 2739 + }, + { + "epoch": 0.25013693627898487, + "grad_norm": 0.4975649416446686, + "learning_rate": 4.980060509281609e-06, + "loss": 0.5791, + "step": 2740 + }, + { + "epoch": 0.2502282271316414, + "grad_norm": 0.42688435316085815, + "learning_rate": 4.980045420877011e-06, + "loss": 0.6555, + "step": 2741 + }, + { + "epoch": 0.25031951798429797, + "grad_norm": 0.4822680354118347, + "learning_rate": 4.980030326788675e-06, + "loss": 0.5892, + "step": 2742 + }, + { + "epoch": 0.25041080883695455, + "grad_norm": 0.4604927897453308, + "learning_rate": 4.980015227016637e-06, + "loss": 0.6278, + "step": 2743 + }, + { + "epoch": 0.2505020996896111, + "grad_norm": 0.4837867319583893, + "learning_rate": 4.98000012156093e-06, + "loss": 0.6281, + "step": 2744 + }, + { + "epoch": 0.25059339054226765, + "grad_norm": 0.4720512926578522, + "learning_rate": 4.9799850104215905e-06, + "loss": 0.5959, + "step": 2745 + }, + { + "epoch": 0.2506846813949242, + "grad_norm": 0.5184236764907837, + "learning_rate": 4.979969893598652e-06, + "loss": 0.5353, + "step": 2746 + }, + { + "epoch": 0.2507759722475808, + "grad_norm": 0.474154531955719, + "learning_rate": 4.9799547710921505e-06, + "loss": 0.6725, + "step": 2747 + }, + { + "epoch": 0.2508672631002374, + "grad_norm": 0.4882921576499939, + "learning_rate": 4.979939642902119e-06, + "loss": 0.6081, + "step": 2748 + }, + { + "epoch": 0.2509585539528939, + "grad_norm": 0.45806074142456055, + "learning_rate": 4.979924509028592e-06, + "loss": 0.5913, + "step": 2749 + }, + { + "epoch": 0.2510498448055505, + "grad_norm": 0.4562845826148987, + "learning_rate": 4.979909369471606e-06, + "loss": 0.6091, + "step": 2750 + }, + { + "epoch": 0.25114113565820706, + "grad_norm": 0.4478330612182617, + "learning_rate": 4.979894224231194e-06, + "loss": 0.6085, + "step": 2751 + }, + { + "epoch": 0.25123242651086364, + "grad_norm": 0.4730070233345032, + "learning_rate": 4.979879073307392e-06, + "loss": 0.6287, + "step": 2752 + }, + { + "epoch": 0.25132371736352016, + "grad_norm": 0.4798735976219177, + "learning_rate": 4.979863916700234e-06, + "loss": 0.6192, + "step": 2753 + }, + { + "epoch": 0.25141500821617674, + "grad_norm": 0.4524998664855957, + "learning_rate": 4.9798487544097545e-06, + "loss": 0.6218, + "step": 2754 + }, + { + "epoch": 0.2515062990688333, + "grad_norm": 0.44562315940856934, + "learning_rate": 4.979833586435989e-06, + "loss": 0.6315, + "step": 2755 + }, + { + "epoch": 0.25159758992148984, + "grad_norm": 0.47500550746917725, + "learning_rate": 4.979818412778972e-06, + "loss": 0.6168, + "step": 2756 + }, + { + "epoch": 0.2516888807741464, + "grad_norm": 0.5040193200111389, + "learning_rate": 4.979803233438737e-06, + "loss": 0.6042, + "step": 2757 + }, + { + "epoch": 0.251780171626803, + "grad_norm": 0.49537283182144165, + "learning_rate": 4.9797880484153214e-06, + "loss": 0.6243, + "step": 2758 + }, + { + "epoch": 0.2518714624794596, + "grad_norm": 0.4973912239074707, + "learning_rate": 4.979772857708758e-06, + "loss": 0.6212, + "step": 2759 + }, + { + "epoch": 0.2519627533321161, + "grad_norm": 0.4517313838005066, + "learning_rate": 4.979757661319081e-06, + "loss": 0.6506, + "step": 2760 + }, + { + "epoch": 0.2520540441847727, + "grad_norm": 0.4658235013484955, + "learning_rate": 4.979742459246328e-06, + "loss": 0.6301, + "step": 2761 + }, + { + "epoch": 0.25214533503742925, + "grad_norm": 0.4811275005340576, + "learning_rate": 4.979727251490532e-06, + "loss": 0.6225, + "step": 2762 + }, + { + "epoch": 0.25223662589008583, + "grad_norm": 0.4817532002925873, + "learning_rate": 4.979712038051727e-06, + "loss": 0.6093, + "step": 2763 + }, + { + "epoch": 0.25232791674274235, + "grad_norm": 0.4471849203109741, + "learning_rate": 4.97969681892995e-06, + "loss": 0.6705, + "step": 2764 + }, + { + "epoch": 0.25241920759539893, + "grad_norm": 0.5094234943389893, + "learning_rate": 4.979681594125234e-06, + "loss": 0.6166, + "step": 2765 + }, + { + "epoch": 0.2525104984480555, + "grad_norm": 0.452567458152771, + "learning_rate": 4.979666363637615e-06, + "loss": 0.6337, + "step": 2766 + }, + { + "epoch": 0.2526017893007121, + "grad_norm": 0.4746599495410919, + "learning_rate": 4.979651127467128e-06, + "loss": 0.5878, + "step": 2767 + }, + { + "epoch": 0.2526930801533686, + "grad_norm": 0.4146117568016052, + "learning_rate": 4.979635885613807e-06, + "loss": 0.6156, + "step": 2768 + }, + { + "epoch": 0.2527843710060252, + "grad_norm": 0.4477097690105438, + "learning_rate": 4.979620638077688e-06, + "loss": 0.6399, + "step": 2769 + }, + { + "epoch": 0.25287566185868177, + "grad_norm": 0.4671681523323059, + "learning_rate": 4.979605384858805e-06, + "loss": 0.5981, + "step": 2770 + }, + { + "epoch": 0.25296695271133834, + "grad_norm": 0.4866611659526825, + "learning_rate": 4.979590125957192e-06, + "loss": 0.6173, + "step": 2771 + }, + { + "epoch": 0.25305824356399487, + "grad_norm": 0.46158263087272644, + "learning_rate": 4.9795748613728875e-06, + "loss": 0.6186, + "step": 2772 + }, + { + "epoch": 0.25314953441665144, + "grad_norm": 0.4638877213001251, + "learning_rate": 4.979559591105923e-06, + "loss": 0.61, + "step": 2773 + }, + { + "epoch": 0.253240825269308, + "grad_norm": 0.4660867750644684, + "learning_rate": 4.9795443151563355e-06, + "loss": 0.6109, + "step": 2774 + }, + { + "epoch": 0.2533321161219646, + "grad_norm": 0.48629307746887207, + "learning_rate": 4.979529033524159e-06, + "loss": 0.5933, + "step": 2775 + }, + { + "epoch": 0.2534234069746211, + "grad_norm": 0.487497478723526, + "learning_rate": 4.979513746209429e-06, + "loss": 0.6054, + "step": 2776 + }, + { + "epoch": 0.2535146978272777, + "grad_norm": 0.48254239559173584, + "learning_rate": 4.979498453212181e-06, + "loss": 0.6118, + "step": 2777 + }, + { + "epoch": 0.2536059886799343, + "grad_norm": 0.5286527276039124, + "learning_rate": 4.979483154532448e-06, + "loss": 0.5929, + "step": 2778 + }, + { + "epoch": 0.25369727953259086, + "grad_norm": 0.47253891825675964, + "learning_rate": 4.979467850170267e-06, + "loss": 0.601, + "step": 2779 + }, + { + "epoch": 0.2537885703852474, + "grad_norm": 0.5077100992202759, + "learning_rate": 4.979452540125672e-06, + "loss": 0.5901, + "step": 2780 + }, + { + "epoch": 0.25387986123790396, + "grad_norm": 0.4650615453720093, + "learning_rate": 4.9794372243987e-06, + "loss": 0.594, + "step": 2781 + }, + { + "epoch": 0.25397115209056054, + "grad_norm": 0.4610496163368225, + "learning_rate": 4.979421902989385e-06, + "loss": 0.5978, + "step": 2782 + }, + { + "epoch": 0.2540624429432171, + "grad_norm": 0.47394511103630066, + "learning_rate": 4.97940657589776e-06, + "loss": 0.6032, + "step": 2783 + }, + { + "epoch": 0.25415373379587364, + "grad_norm": 0.4841494858264923, + "learning_rate": 4.979391243123863e-06, + "loss": 0.619, + "step": 2784 + }, + { + "epoch": 0.2542450246485302, + "grad_norm": 0.48878875374794006, + "learning_rate": 4.979375904667728e-06, + "loss": 0.596, + "step": 2785 + }, + { + "epoch": 0.2543363155011868, + "grad_norm": 0.4597442150115967, + "learning_rate": 4.979360560529391e-06, + "loss": 0.628, + "step": 2786 + }, + { + "epoch": 0.25442760635384337, + "grad_norm": 0.5053868889808655, + "learning_rate": 4.979345210708885e-06, + "loss": 0.5716, + "step": 2787 + }, + { + "epoch": 0.2545188972064999, + "grad_norm": 0.46228912472724915, + "learning_rate": 4.979329855206248e-06, + "loss": 0.6044, + "step": 2788 + }, + { + "epoch": 0.2546101880591565, + "grad_norm": 0.47033774852752686, + "learning_rate": 4.979314494021512e-06, + "loss": 0.6055, + "step": 2789 + }, + { + "epoch": 0.25470147891181305, + "grad_norm": 0.4695437252521515, + "learning_rate": 4.9792991271547155e-06, + "loss": 0.6224, + "step": 2790 + }, + { + "epoch": 0.2547927697644696, + "grad_norm": 0.489064484834671, + "learning_rate": 4.979283754605892e-06, + "loss": 0.6085, + "step": 2791 + }, + { + "epoch": 0.25488406061712615, + "grad_norm": 0.464783251285553, + "learning_rate": 4.979268376375077e-06, + "loss": 0.6271, + "step": 2792 + }, + { + "epoch": 0.25497535146978273, + "grad_norm": 0.5157914757728577, + "learning_rate": 4.979252992462306e-06, + "loss": 0.5986, + "step": 2793 + }, + { + "epoch": 0.2550666423224393, + "grad_norm": 0.4307972490787506, + "learning_rate": 4.979237602867613e-06, + "loss": 0.6209, + "step": 2794 + }, + { + "epoch": 0.25515793317509583, + "grad_norm": 0.4907373785972595, + "learning_rate": 4.9792222075910345e-06, + "loss": 0.5885, + "step": 2795 + }, + { + "epoch": 0.2552492240277524, + "grad_norm": 0.45983827114105225, + "learning_rate": 4.979206806632606e-06, + "loss": 0.6022, + "step": 2796 + }, + { + "epoch": 0.255340514880409, + "grad_norm": 0.4501316249370575, + "learning_rate": 4.979191399992362e-06, + "loss": 0.6072, + "step": 2797 + }, + { + "epoch": 0.25543180573306556, + "grad_norm": 0.4643612504005432, + "learning_rate": 4.979175987670338e-06, + "loss": 0.6292, + "step": 2798 + }, + { + "epoch": 0.2555230965857221, + "grad_norm": 0.5230460166931152, + "learning_rate": 4.979160569666569e-06, + "loss": 0.5972, + "step": 2799 + }, + { + "epoch": 0.25561438743837867, + "grad_norm": 0.45325547456741333, + "learning_rate": 4.979145145981092e-06, + "loss": 0.6257, + "step": 2800 + }, + { + "epoch": 0.25570567829103524, + "grad_norm": 0.4870910346508026, + "learning_rate": 4.979129716613941e-06, + "loss": 0.5959, + "step": 2801 + }, + { + "epoch": 0.2557969691436918, + "grad_norm": 0.43028756976127625, + "learning_rate": 4.9791142815651505e-06, + "loss": 0.6263, + "step": 2802 + }, + { + "epoch": 0.25588825999634834, + "grad_norm": 0.503172755241394, + "learning_rate": 4.979098840834757e-06, + "loss": 0.5563, + "step": 2803 + }, + { + "epoch": 0.2559795508490049, + "grad_norm": 0.4624379575252533, + "learning_rate": 4.9790833944227965e-06, + "loss": 0.5892, + "step": 2804 + }, + { + "epoch": 0.2560708417016615, + "grad_norm": 0.4547909200191498, + "learning_rate": 4.979067942329303e-06, + "loss": 0.5844, + "step": 2805 + }, + { + "epoch": 0.2561621325543181, + "grad_norm": 0.467626690864563, + "learning_rate": 4.979052484554313e-06, + "loss": 0.6007, + "step": 2806 + }, + { + "epoch": 0.2562534234069746, + "grad_norm": 0.4635932743549347, + "learning_rate": 4.979037021097861e-06, + "loss": 0.604, + "step": 2807 + }, + { + "epoch": 0.2563447142596312, + "grad_norm": 0.454187273979187, + "learning_rate": 4.979021551959983e-06, + "loss": 0.564, + "step": 2808 + }, + { + "epoch": 0.25643600511228776, + "grad_norm": 0.47684288024902344, + "learning_rate": 4.979006077140714e-06, + "loss": 0.6115, + "step": 2809 + }, + { + "epoch": 0.25652729596494434, + "grad_norm": 0.4402465522289276, + "learning_rate": 4.978990596640091e-06, + "loss": 0.5959, + "step": 2810 + }, + { + "epoch": 0.25661858681760086, + "grad_norm": 0.49170881509780884, + "learning_rate": 4.978975110458147e-06, + "loss": 0.5456, + "step": 2811 + }, + { + "epoch": 0.25670987767025744, + "grad_norm": 0.5290873050689697, + "learning_rate": 4.97895961859492e-06, + "loss": 0.5849, + "step": 2812 + }, + { + "epoch": 0.256801168522914, + "grad_norm": 0.500197172164917, + "learning_rate": 4.978944121050444e-06, + "loss": 0.6215, + "step": 2813 + }, + { + "epoch": 0.2568924593755706, + "grad_norm": 0.4754537343978882, + "learning_rate": 4.978928617824754e-06, + "loss": 0.5605, + "step": 2814 + }, + { + "epoch": 0.2569837502282271, + "grad_norm": 0.47331416606903076, + "learning_rate": 4.978913108917888e-06, + "loss": 0.5944, + "step": 2815 + }, + { + "epoch": 0.2570750410808837, + "grad_norm": 0.47349685430526733, + "learning_rate": 4.978897594329878e-06, + "loss": 0.6211, + "step": 2816 + }, + { + "epoch": 0.25716633193354027, + "grad_norm": 0.46398162841796875, + "learning_rate": 4.978882074060763e-06, + "loss": 0.6264, + "step": 2817 + }, + { + "epoch": 0.25725762278619685, + "grad_norm": 0.4631032943725586, + "learning_rate": 4.978866548110576e-06, + "loss": 0.5872, + "step": 2818 + }, + { + "epoch": 0.25734891363885337, + "grad_norm": 0.46417221426963806, + "learning_rate": 4.978851016479354e-06, + "loss": 0.5963, + "step": 2819 + }, + { + "epoch": 0.25744020449150995, + "grad_norm": 0.4613894522190094, + "learning_rate": 4.9788354791671324e-06, + "loss": 0.6181, + "step": 2820 + }, + { + "epoch": 0.25753149534416653, + "grad_norm": 0.4777413606643677, + "learning_rate": 4.978819936173947e-06, + "loss": 0.6033, + "step": 2821 + }, + { + "epoch": 0.2576227861968231, + "grad_norm": 0.4808712303638458, + "learning_rate": 4.978804387499831e-06, + "loss": 0.6249, + "step": 2822 + }, + { + "epoch": 0.25771407704947963, + "grad_norm": 0.4432363212108612, + "learning_rate": 4.978788833144824e-06, + "loss": 0.6075, + "step": 2823 + }, + { + "epoch": 0.2578053679021362, + "grad_norm": 0.4731312096118927, + "learning_rate": 4.9787732731089585e-06, + "loss": 0.6295, + "step": 2824 + }, + { + "epoch": 0.2578966587547928, + "grad_norm": 0.46681877970695496, + "learning_rate": 4.9787577073922725e-06, + "loss": 0.6035, + "step": 2825 + }, + { + "epoch": 0.2579879496074493, + "grad_norm": 0.4660446047782898, + "learning_rate": 4.9787421359948e-06, + "loss": 0.5926, + "step": 2826 + }, + { + "epoch": 0.2580792404601059, + "grad_norm": 0.4530758261680603, + "learning_rate": 4.978726558916578e-06, + "loss": 0.6041, + "step": 2827 + }, + { + "epoch": 0.25817053131276246, + "grad_norm": 0.4773331880569458, + "learning_rate": 4.97871097615764e-06, + "loss": 0.6083, + "step": 2828 + }, + { + "epoch": 0.25826182216541904, + "grad_norm": 0.49036920070648193, + "learning_rate": 4.9786953877180245e-06, + "loss": 0.6086, + "step": 2829 + }, + { + "epoch": 0.25835311301807556, + "grad_norm": 0.4606376588344574, + "learning_rate": 4.978679793597765e-06, + "loss": 0.6273, + "step": 2830 + }, + { + "epoch": 0.25844440387073214, + "grad_norm": 0.4774637222290039, + "learning_rate": 4.978664193796898e-06, + "loss": 0.5641, + "step": 2831 + }, + { + "epoch": 0.2585356947233887, + "grad_norm": 0.46628084778785706, + "learning_rate": 4.97864858831546e-06, + "loss": 0.5574, + "step": 2832 + }, + { + "epoch": 0.2586269855760453, + "grad_norm": 0.44629138708114624, + "learning_rate": 4.9786329771534855e-06, + "loss": 0.5789, + "step": 2833 + }, + { + "epoch": 0.2587182764287018, + "grad_norm": 0.47296661138534546, + "learning_rate": 4.978617360311011e-06, + "loss": 0.6112, + "step": 2834 + }, + { + "epoch": 0.2588095672813584, + "grad_norm": 0.5043632388114929, + "learning_rate": 4.9786017377880734e-06, + "loss": 0.6076, + "step": 2835 + }, + { + "epoch": 0.258900858134015, + "grad_norm": 0.5142840147018433, + "learning_rate": 4.978586109584706e-06, + "loss": 0.5943, + "step": 2836 + }, + { + "epoch": 0.25899214898667156, + "grad_norm": 0.5153437852859497, + "learning_rate": 4.978570475700946e-06, + "loss": 0.595, + "step": 2837 + }, + { + "epoch": 0.2590834398393281, + "grad_norm": 0.4454834759235382, + "learning_rate": 4.97855483613683e-06, + "loss": 0.6253, + "step": 2838 + }, + { + "epoch": 0.25917473069198466, + "grad_norm": 0.4993593394756317, + "learning_rate": 4.9785391908923915e-06, + "loss": 0.5645, + "step": 2839 + }, + { + "epoch": 0.25926602154464123, + "grad_norm": 0.45100927352905273, + "learning_rate": 4.978523539967669e-06, + "loss": 0.6109, + "step": 2840 + }, + { + "epoch": 0.2593573123972978, + "grad_norm": 0.4365670680999756, + "learning_rate": 4.9785078833626976e-06, + "loss": 0.5742, + "step": 2841 + }, + { + "epoch": 0.25944860324995433, + "grad_norm": 0.48127061128616333, + "learning_rate": 4.978492221077512e-06, + "loss": 0.6261, + "step": 2842 + }, + { + "epoch": 0.2595398941026109, + "grad_norm": 0.49356281757354736, + "learning_rate": 4.978476553112149e-06, + "loss": 0.5852, + "step": 2843 + }, + { + "epoch": 0.2596311849552675, + "grad_norm": 0.46542438864707947, + "learning_rate": 4.978460879466645e-06, + "loss": 0.5966, + "step": 2844 + }, + { + "epoch": 0.25972247580792407, + "grad_norm": 0.5097302794456482, + "learning_rate": 4.978445200141034e-06, + "loss": 0.5389, + "step": 2845 + }, + { + "epoch": 0.2598137666605806, + "grad_norm": 0.4984922409057617, + "learning_rate": 4.978429515135353e-06, + "loss": 0.5793, + "step": 2846 + }, + { + "epoch": 0.25990505751323717, + "grad_norm": 0.45745596289634705, + "learning_rate": 4.97841382444964e-06, + "loss": 0.5939, + "step": 2847 + }, + { + "epoch": 0.25999634836589375, + "grad_norm": 0.43661943078041077, + "learning_rate": 4.978398128083929e-06, + "loss": 0.6323, + "step": 2848 + }, + { + "epoch": 0.2600876392185503, + "grad_norm": 0.46689051389694214, + "learning_rate": 4.978382426038255e-06, + "loss": 0.6163, + "step": 2849 + }, + { + "epoch": 0.26017893007120685, + "grad_norm": 0.49228471517562866, + "learning_rate": 4.978366718312655e-06, + "loss": 0.6019, + "step": 2850 + }, + { + "epoch": 0.2602702209238634, + "grad_norm": 0.49714183807373047, + "learning_rate": 4.978351004907166e-06, + "loss": 0.6298, + "step": 2851 + }, + { + "epoch": 0.26036151177652, + "grad_norm": 0.47133180499076843, + "learning_rate": 4.978335285821823e-06, + "loss": 0.5852, + "step": 2852 + }, + { + "epoch": 0.2604528026291766, + "grad_norm": 0.4662041962146759, + "learning_rate": 4.978319561056662e-06, + "loss": 0.6218, + "step": 2853 + }, + { + "epoch": 0.2605440934818331, + "grad_norm": 0.4739961326122284, + "learning_rate": 4.978303830611719e-06, + "loss": 0.6001, + "step": 2854 + }, + { + "epoch": 0.2606353843344897, + "grad_norm": 0.4563979208469391, + "learning_rate": 4.97828809448703e-06, + "loss": 0.614, + "step": 2855 + }, + { + "epoch": 0.26072667518714626, + "grad_norm": 0.50169837474823, + "learning_rate": 4.978272352682631e-06, + "loss": 0.5859, + "step": 2856 + }, + { + "epoch": 0.2608179660398028, + "grad_norm": 0.48716413974761963, + "learning_rate": 4.978256605198559e-06, + "loss": 0.5929, + "step": 2857 + }, + { + "epoch": 0.26090925689245936, + "grad_norm": 0.47593954205513, + "learning_rate": 4.97824085203485e-06, + "loss": 0.5324, + "step": 2858 + }, + { + "epoch": 0.26100054774511594, + "grad_norm": 0.4779545068740845, + "learning_rate": 4.978225093191539e-06, + "loss": 0.5892, + "step": 2859 + }, + { + "epoch": 0.2610918385977725, + "grad_norm": 0.47151318192481995, + "learning_rate": 4.978209328668663e-06, + "loss": 0.6095, + "step": 2860 + }, + { + "epoch": 0.26118312945042904, + "grad_norm": 0.5386344194412231, + "learning_rate": 4.9781935584662575e-06, + "loss": 0.5487, + "step": 2861 + }, + { + "epoch": 0.2612744203030856, + "grad_norm": 0.46061766147613525, + "learning_rate": 4.978177782584359e-06, + "loss": 0.6396, + "step": 2862 + }, + { + "epoch": 0.2613657111557422, + "grad_norm": 0.4846876263618469, + "learning_rate": 4.9781620010230035e-06, + "loss": 0.5972, + "step": 2863 + }, + { + "epoch": 0.2614570020083988, + "grad_norm": 0.487949937582016, + "learning_rate": 4.978146213782228e-06, + "loss": 0.6154, + "step": 2864 + }, + { + "epoch": 0.2615482928610553, + "grad_norm": 0.5048052668571472, + "learning_rate": 4.9781304208620666e-06, + "loss": 0.569, + "step": 2865 + }, + { + "epoch": 0.2616395837137119, + "grad_norm": 0.45996183156967163, + "learning_rate": 4.978114622262558e-06, + "loss": 0.6206, + "step": 2866 + }, + { + "epoch": 0.26173087456636845, + "grad_norm": 0.490559458732605, + "learning_rate": 4.9780988179837365e-06, + "loss": 0.5889, + "step": 2867 + }, + { + "epoch": 0.26182216541902503, + "grad_norm": 0.45721593499183655, + "learning_rate": 4.978083008025639e-06, + "loss": 0.6161, + "step": 2868 + }, + { + "epoch": 0.26191345627168156, + "grad_norm": 0.4618540108203888, + "learning_rate": 4.978067192388303e-06, + "loss": 0.6002, + "step": 2869 + }, + { + "epoch": 0.26200474712433813, + "grad_norm": 0.49046817421913147, + "learning_rate": 4.978051371071763e-06, + "loss": 0.6263, + "step": 2870 + }, + { + "epoch": 0.2620960379769947, + "grad_norm": 0.462907999753952, + "learning_rate": 4.978035544076055e-06, + "loss": 0.6183, + "step": 2871 + }, + { + "epoch": 0.2621873288296513, + "grad_norm": 0.46790215373039246, + "learning_rate": 4.978019711401217e-06, + "loss": 0.577, + "step": 2872 + }, + { + "epoch": 0.2622786196823078, + "grad_norm": 0.4624544680118561, + "learning_rate": 4.978003873047284e-06, + "loss": 0.5745, + "step": 2873 + }, + { + "epoch": 0.2623699105349644, + "grad_norm": 0.4773777425289154, + "learning_rate": 4.977988029014293e-06, + "loss": 0.617, + "step": 2874 + }, + { + "epoch": 0.26246120138762097, + "grad_norm": 0.488161563873291, + "learning_rate": 4.977972179302279e-06, + "loss": 0.606, + "step": 2875 + }, + { + "epoch": 0.26255249224027755, + "grad_norm": 0.512254536151886, + "learning_rate": 4.97795632391128e-06, + "loss": 0.5997, + "step": 2876 + }, + { + "epoch": 0.26264378309293407, + "grad_norm": 0.4454539120197296, + "learning_rate": 4.977940462841332e-06, + "loss": 0.5791, + "step": 2877 + }, + { + "epoch": 0.26273507394559065, + "grad_norm": 0.47292983531951904, + "learning_rate": 4.977924596092471e-06, + "loss": 0.6247, + "step": 2878 + }, + { + "epoch": 0.2628263647982472, + "grad_norm": 0.4575675427913666, + "learning_rate": 4.9779087236647335e-06, + "loss": 0.604, + "step": 2879 + }, + { + "epoch": 0.2629176556509038, + "grad_norm": 0.4357055723667145, + "learning_rate": 4.977892845558155e-06, + "loss": 0.5808, + "step": 2880 + }, + { + "epoch": 0.2630089465035603, + "grad_norm": 0.5046706795692444, + "learning_rate": 4.977876961772773e-06, + "loss": 0.5934, + "step": 2881 + }, + { + "epoch": 0.2631002373562169, + "grad_norm": 0.4349978566169739, + "learning_rate": 4.977861072308624e-06, + "loss": 0.605, + "step": 2882 + }, + { + "epoch": 0.2631915282088735, + "grad_norm": 0.472921222448349, + "learning_rate": 4.977845177165743e-06, + "loss": 0.602, + "step": 2883 + }, + { + "epoch": 0.26328281906153006, + "grad_norm": 0.4975443184375763, + "learning_rate": 4.977829276344168e-06, + "loss": 0.5947, + "step": 2884 + }, + { + "epoch": 0.2633741099141866, + "grad_norm": 0.4553331434726715, + "learning_rate": 4.977813369843934e-06, + "loss": 0.6457, + "step": 2885 + }, + { + "epoch": 0.26346540076684316, + "grad_norm": 0.4733615815639496, + "learning_rate": 4.977797457665079e-06, + "loss": 0.5999, + "step": 2886 + }, + { + "epoch": 0.26355669161949974, + "grad_norm": 0.427447646856308, + "learning_rate": 4.977781539807639e-06, + "loss": 0.612, + "step": 2887 + }, + { + "epoch": 0.2636479824721563, + "grad_norm": 0.4730258584022522, + "learning_rate": 4.977765616271649e-06, + "loss": 0.6274, + "step": 2888 + }, + { + "epoch": 0.26373927332481284, + "grad_norm": 0.44094014167785645, + "learning_rate": 4.977749687057148e-06, + "loss": 0.6218, + "step": 2889 + }, + { + "epoch": 0.2638305641774694, + "grad_norm": 0.4740373194217682, + "learning_rate": 4.977733752164171e-06, + "loss": 0.6189, + "step": 2890 + }, + { + "epoch": 0.263921855030126, + "grad_norm": 0.5023719668388367, + "learning_rate": 4.977717811592754e-06, + "loss": 0.5731, + "step": 2891 + }, + { + "epoch": 0.2640131458827825, + "grad_norm": 0.4511529505252838, + "learning_rate": 4.977701865342935e-06, + "loss": 0.6031, + "step": 2892 + }, + { + "epoch": 0.2641044367354391, + "grad_norm": 0.4950163960456848, + "learning_rate": 4.9776859134147495e-06, + "loss": 0.5762, + "step": 2893 + }, + { + "epoch": 0.2641957275880957, + "grad_norm": 0.4769740402698517, + "learning_rate": 4.977669955808234e-06, + "loss": 0.6022, + "step": 2894 + }, + { + "epoch": 0.26428701844075225, + "grad_norm": 0.4880686402320862, + "learning_rate": 4.977653992523426e-06, + "loss": 0.5881, + "step": 2895 + }, + { + "epoch": 0.2643783092934088, + "grad_norm": 0.42148783802986145, + "learning_rate": 4.977638023560361e-06, + "loss": 0.6469, + "step": 2896 + }, + { + "epoch": 0.26446960014606535, + "grad_norm": 0.550679087638855, + "learning_rate": 4.977622048919077e-06, + "loss": 0.5735, + "step": 2897 + }, + { + "epoch": 0.26456089099872193, + "grad_norm": 0.4725794196128845, + "learning_rate": 4.9776060685996095e-06, + "loss": 0.6041, + "step": 2898 + }, + { + "epoch": 0.2646521818513785, + "grad_norm": 0.445343941450119, + "learning_rate": 4.9775900826019954e-06, + "loss": 0.6172, + "step": 2899 + }, + { + "epoch": 0.26474347270403503, + "grad_norm": 0.4730018377304077, + "learning_rate": 4.977574090926271e-06, + "loss": 0.6066, + "step": 2900 + }, + { + "epoch": 0.2648347635566916, + "grad_norm": 0.48861467838287354, + "learning_rate": 4.977558093572473e-06, + "loss": 0.6167, + "step": 2901 + }, + { + "epoch": 0.2649260544093482, + "grad_norm": 0.46369507908821106, + "learning_rate": 4.977542090540638e-06, + "loss": 0.6339, + "step": 2902 + }, + { + "epoch": 0.26501734526200477, + "grad_norm": 0.46294671297073364, + "learning_rate": 4.977526081830803e-06, + "loss": 0.5869, + "step": 2903 + }, + { + "epoch": 0.2651086361146613, + "grad_norm": 0.474065363407135, + "learning_rate": 4.977510067443006e-06, + "loss": 0.5913, + "step": 2904 + }, + { + "epoch": 0.26519992696731787, + "grad_norm": 0.4599935710430145, + "learning_rate": 4.9774940473772815e-06, + "loss": 0.6179, + "step": 2905 + }, + { + "epoch": 0.26529121781997445, + "grad_norm": 0.4718455970287323, + "learning_rate": 4.977478021633667e-06, + "loss": 0.5909, + "step": 2906 + }, + { + "epoch": 0.265382508672631, + "grad_norm": 0.4367729127407074, + "learning_rate": 4.977461990212199e-06, + "loss": 0.6103, + "step": 2907 + }, + { + "epoch": 0.26547379952528755, + "grad_norm": 0.4857076406478882, + "learning_rate": 4.977445953112916e-06, + "loss": 0.5923, + "step": 2908 + }, + { + "epoch": 0.2655650903779441, + "grad_norm": 0.4862888753414154, + "learning_rate": 4.977429910335852e-06, + "loss": 0.6129, + "step": 2909 + }, + { + "epoch": 0.2656563812306007, + "grad_norm": 0.442245751619339, + "learning_rate": 4.977413861881045e-06, + "loss": 0.6155, + "step": 2910 + }, + { + "epoch": 0.2657476720832573, + "grad_norm": 0.45821383595466614, + "learning_rate": 4.977397807748533e-06, + "loss": 0.6467, + "step": 2911 + }, + { + "epoch": 0.2658389629359138, + "grad_norm": 0.4497896432876587, + "learning_rate": 4.977381747938351e-06, + "loss": 0.5676, + "step": 2912 + }, + { + "epoch": 0.2659302537885704, + "grad_norm": 0.42667776346206665, + "learning_rate": 4.977365682450536e-06, + "loss": 0.6092, + "step": 2913 + }, + { + "epoch": 0.26602154464122696, + "grad_norm": 0.4751184284687042, + "learning_rate": 4.977349611285125e-06, + "loss": 0.609, + "step": 2914 + }, + { + "epoch": 0.26611283549388354, + "grad_norm": 0.43711942434310913, + "learning_rate": 4.977333534442157e-06, + "loss": 0.6172, + "step": 2915 + }, + { + "epoch": 0.26620412634654006, + "grad_norm": 0.4667107164859772, + "learning_rate": 4.977317451921665e-06, + "loss": 0.6487, + "step": 2916 + }, + { + "epoch": 0.26629541719919664, + "grad_norm": 0.48282167315483093, + "learning_rate": 4.977301363723689e-06, + "loss": 0.583, + "step": 2917 + }, + { + "epoch": 0.2663867080518532, + "grad_norm": 0.4829453229904175, + "learning_rate": 4.977285269848263e-06, + "loss": 0.6115, + "step": 2918 + }, + { + "epoch": 0.2664779989045098, + "grad_norm": 0.43658408522605896, + "learning_rate": 4.977269170295428e-06, + "loss": 0.6048, + "step": 2919 + }, + { + "epoch": 0.2665692897571663, + "grad_norm": 0.4802066385746002, + "learning_rate": 4.9772530650652165e-06, + "loss": 0.6177, + "step": 2920 + }, + { + "epoch": 0.2666605806098229, + "grad_norm": 0.48798203468322754, + "learning_rate": 4.977236954157668e-06, + "loss": 0.5911, + "step": 2921 + }, + { + "epoch": 0.2667518714624795, + "grad_norm": 0.4667181372642517, + "learning_rate": 4.977220837572819e-06, + "loss": 0.614, + "step": 2922 + }, + { + "epoch": 0.26684316231513605, + "grad_norm": 0.5019550323486328, + "learning_rate": 4.977204715310706e-06, + "loss": 0.6142, + "step": 2923 + }, + { + "epoch": 0.2669344531677926, + "grad_norm": 0.4606674611568451, + "learning_rate": 4.977188587371367e-06, + "loss": 0.6196, + "step": 2924 + }, + { + "epoch": 0.26702574402044915, + "grad_norm": 0.48025012016296387, + "learning_rate": 4.977172453754836e-06, + "loss": 0.6003, + "step": 2925 + }, + { + "epoch": 0.26711703487310573, + "grad_norm": 0.48945754766464233, + "learning_rate": 4.977156314461154e-06, + "loss": 0.6187, + "step": 2926 + }, + { + "epoch": 0.26720832572576225, + "grad_norm": 0.46494922041893005, + "learning_rate": 4.9771401694903555e-06, + "loss": 0.6006, + "step": 2927 + }, + { + "epoch": 0.26729961657841883, + "grad_norm": 0.4446943402290344, + "learning_rate": 4.977124018842478e-06, + "loss": 0.6204, + "step": 2928 + }, + { + "epoch": 0.2673909074310754, + "grad_norm": 0.43193402886390686, + "learning_rate": 4.977107862517559e-06, + "loss": 0.6361, + "step": 2929 + }, + { + "epoch": 0.267482198283732, + "grad_norm": 0.49499309062957764, + "learning_rate": 4.977091700515635e-06, + "loss": 0.5828, + "step": 2930 + }, + { + "epoch": 0.2675734891363885, + "grad_norm": 0.48787829279899597, + "learning_rate": 4.977075532836743e-06, + "loss": 0.5724, + "step": 2931 + }, + { + "epoch": 0.2676647799890451, + "grad_norm": 0.4926914870738983, + "learning_rate": 4.9770593594809206e-06, + "loss": 0.6018, + "step": 2932 + }, + { + "epoch": 0.26775607084170167, + "grad_norm": 0.46148553490638733, + "learning_rate": 4.977043180448204e-06, + "loss": 0.5596, + "step": 2933 + }, + { + "epoch": 0.26784736169435824, + "grad_norm": 0.5026875734329224, + "learning_rate": 4.977026995738631e-06, + "loss": 0.5547, + "step": 2934 + }, + { + "epoch": 0.26793865254701477, + "grad_norm": 0.4530143737792969, + "learning_rate": 4.977010805352239e-06, + "loss": 0.64, + "step": 2935 + }, + { + "epoch": 0.26802994339967134, + "grad_norm": 0.47431403398513794, + "learning_rate": 4.976994609289064e-06, + "loss": 0.62, + "step": 2936 + }, + { + "epoch": 0.2681212342523279, + "grad_norm": 0.4682987332344055, + "learning_rate": 4.976978407549143e-06, + "loss": 0.6153, + "step": 2937 + }, + { + "epoch": 0.2682125251049845, + "grad_norm": 0.470621794462204, + "learning_rate": 4.976962200132515e-06, + "loss": 0.5767, + "step": 2938 + }, + { + "epoch": 0.268303815957641, + "grad_norm": 0.45472270250320435, + "learning_rate": 4.976945987039216e-06, + "loss": 0.5754, + "step": 2939 + }, + { + "epoch": 0.2683951068102976, + "grad_norm": 0.4510030448436737, + "learning_rate": 4.976929768269282e-06, + "loss": 0.6388, + "step": 2940 + }, + { + "epoch": 0.2684863976629542, + "grad_norm": 0.4582536518573761, + "learning_rate": 4.976913543822752e-06, + "loss": 0.6398, + "step": 2941 + }, + { + "epoch": 0.26857768851561076, + "grad_norm": 0.48371821641921997, + "learning_rate": 4.976897313699662e-06, + "loss": 0.6122, + "step": 2942 + }, + { + "epoch": 0.2686689793682673, + "grad_norm": 0.4890602231025696, + "learning_rate": 4.976881077900051e-06, + "loss": 0.6047, + "step": 2943 + }, + { + "epoch": 0.26876027022092386, + "grad_norm": 0.49072811007499695, + "learning_rate": 4.976864836423953e-06, + "loss": 0.5913, + "step": 2944 + }, + { + "epoch": 0.26885156107358044, + "grad_norm": 0.4543914198875427, + "learning_rate": 4.976848589271409e-06, + "loss": 0.6205, + "step": 2945 + }, + { + "epoch": 0.268942851926237, + "grad_norm": 0.49418559670448303, + "learning_rate": 4.9768323364424535e-06, + "loss": 0.5779, + "step": 2946 + }, + { + "epoch": 0.26903414277889354, + "grad_norm": 0.4603883624076843, + "learning_rate": 4.976816077937124e-06, + "loss": 0.6024, + "step": 2947 + }, + { + "epoch": 0.2691254336315501, + "grad_norm": 0.48281416296958923, + "learning_rate": 4.976799813755459e-06, + "loss": 0.6091, + "step": 2948 + }, + { + "epoch": 0.2692167244842067, + "grad_norm": 0.4524020850658417, + "learning_rate": 4.976783543897495e-06, + "loss": 0.6291, + "step": 2949 + }, + { + "epoch": 0.26930801533686327, + "grad_norm": 0.4697466492652893, + "learning_rate": 4.976767268363269e-06, + "loss": 0.6153, + "step": 2950 + }, + { + "epoch": 0.2693993061895198, + "grad_norm": 0.47266051173210144, + "learning_rate": 4.97675098715282e-06, + "loss": 0.6436, + "step": 2951 + }, + { + "epoch": 0.26949059704217637, + "grad_norm": 0.5186887979507446, + "learning_rate": 4.976734700266183e-06, + "loss": 0.5475, + "step": 2952 + }, + { + "epoch": 0.26958188789483295, + "grad_norm": 0.49054884910583496, + "learning_rate": 4.976718407703396e-06, + "loss": 0.5701, + "step": 2953 + }, + { + "epoch": 0.26967317874748953, + "grad_norm": 0.4449738562107086, + "learning_rate": 4.976702109464498e-06, + "loss": 0.6435, + "step": 2954 + }, + { + "epoch": 0.26976446960014605, + "grad_norm": 0.453254371881485, + "learning_rate": 4.976685805549524e-06, + "loss": 0.6421, + "step": 2955 + }, + { + "epoch": 0.26985576045280263, + "grad_norm": 0.4741998314857483, + "learning_rate": 4.976669495958513e-06, + "loss": 0.5882, + "step": 2956 + }, + { + "epoch": 0.2699470513054592, + "grad_norm": 0.5414241552352905, + "learning_rate": 4.976653180691501e-06, + "loss": 0.5692, + "step": 2957 + }, + { + "epoch": 0.27003834215811573, + "grad_norm": 0.44327858090400696, + "learning_rate": 4.976636859748526e-06, + "loss": 0.575, + "step": 2958 + }, + { + "epoch": 0.2701296330107723, + "grad_norm": 0.4414537250995636, + "learning_rate": 4.976620533129626e-06, + "loss": 0.6107, + "step": 2959 + }, + { + "epoch": 0.2702209238634289, + "grad_norm": 0.45102399587631226, + "learning_rate": 4.976604200834839e-06, + "loss": 0.6417, + "step": 2960 + }, + { + "epoch": 0.27031221471608546, + "grad_norm": 0.48813849687576294, + "learning_rate": 4.9765878628642e-06, + "loss": 0.5923, + "step": 2961 + }, + { + "epoch": 0.270403505568742, + "grad_norm": 0.46660181879997253, + "learning_rate": 4.976571519217749e-06, + "loss": 0.5968, + "step": 2962 + }, + { + "epoch": 0.27049479642139856, + "grad_norm": 0.46374914050102234, + "learning_rate": 4.976555169895522e-06, + "loss": 0.5839, + "step": 2963 + }, + { + "epoch": 0.27058608727405514, + "grad_norm": 0.45584920048713684, + "learning_rate": 4.976538814897556e-06, + "loss": 0.6057, + "step": 2964 + }, + { + "epoch": 0.2706773781267117, + "grad_norm": 0.45869219303131104, + "learning_rate": 4.9765224542238895e-06, + "loss": 0.6212, + "step": 2965 + }, + { + "epoch": 0.27076866897936824, + "grad_norm": 0.45168301463127136, + "learning_rate": 4.97650608787456e-06, + "loss": 0.6562, + "step": 2966 + }, + { + "epoch": 0.2708599598320248, + "grad_norm": 0.47932010889053345, + "learning_rate": 4.976489715849605e-06, + "loss": 0.5563, + "step": 2967 + }, + { + "epoch": 0.2709512506846814, + "grad_norm": 0.4711856245994568, + "learning_rate": 4.976473338149062e-06, + "loss": 0.6015, + "step": 2968 + }, + { + "epoch": 0.271042541537338, + "grad_norm": 0.5147467851638794, + "learning_rate": 4.976456954772968e-06, + "loss": 0.5911, + "step": 2969 + }, + { + "epoch": 0.2711338323899945, + "grad_norm": 0.43588247895240784, + "learning_rate": 4.976440565721361e-06, + "loss": 0.6078, + "step": 2970 + }, + { + "epoch": 0.2712251232426511, + "grad_norm": 0.5178803205490112, + "learning_rate": 4.976424170994278e-06, + "loss": 0.5999, + "step": 2971 + }, + { + "epoch": 0.27131641409530766, + "grad_norm": 0.4605054557323456, + "learning_rate": 4.976407770591758e-06, + "loss": 0.6258, + "step": 2972 + }, + { + "epoch": 0.27140770494796423, + "grad_norm": 0.4665486812591553, + "learning_rate": 4.976391364513836e-06, + "loss": 0.6547, + "step": 2973 + }, + { + "epoch": 0.27149899580062076, + "grad_norm": 0.4736156761646271, + "learning_rate": 4.976374952760553e-06, + "loss": 0.6192, + "step": 2974 + }, + { + "epoch": 0.27159028665327734, + "grad_norm": 0.4684886932373047, + "learning_rate": 4.976358535331944e-06, + "loss": 0.6044, + "step": 2975 + }, + { + "epoch": 0.2716815775059339, + "grad_norm": 0.46785038709640503, + "learning_rate": 4.976342112228048e-06, + "loss": 0.6094, + "step": 2976 + }, + { + "epoch": 0.2717728683585905, + "grad_norm": 0.49200284481048584, + "learning_rate": 4.9763256834489006e-06, + "loss": 0.6149, + "step": 2977 + }, + { + "epoch": 0.271864159211247, + "grad_norm": 0.4548487663269043, + "learning_rate": 4.976309248994542e-06, + "loss": 0.5615, + "step": 2978 + }, + { + "epoch": 0.2719554500639036, + "grad_norm": 0.45643430948257446, + "learning_rate": 4.976292808865009e-06, + "loss": 0.5945, + "step": 2979 + }, + { + "epoch": 0.27204674091656017, + "grad_norm": 0.45279213786125183, + "learning_rate": 4.976276363060338e-06, + "loss": 0.6156, + "step": 2980 + }, + { + "epoch": 0.27213803176921675, + "grad_norm": 0.47160235047340393, + "learning_rate": 4.976259911580569e-06, + "loss": 0.5843, + "step": 2981 + }, + { + "epoch": 0.27222932262187327, + "grad_norm": 0.4854941964149475, + "learning_rate": 4.976243454425738e-06, + "loss": 0.6039, + "step": 2982 + }, + { + "epoch": 0.27232061347452985, + "grad_norm": 0.4614693224430084, + "learning_rate": 4.976226991595883e-06, + "loss": 0.5995, + "step": 2983 + }, + { + "epoch": 0.2724119043271864, + "grad_norm": 0.4568752348423004, + "learning_rate": 4.976210523091042e-06, + "loss": 0.5773, + "step": 2984 + }, + { + "epoch": 0.272503195179843, + "grad_norm": 0.47804027795791626, + "learning_rate": 4.976194048911253e-06, + "loss": 0.5782, + "step": 2985 + }, + { + "epoch": 0.27259448603249953, + "grad_norm": 0.47077512741088867, + "learning_rate": 4.976177569056552e-06, + "loss": 0.5312, + "step": 2986 + }, + { + "epoch": 0.2726857768851561, + "grad_norm": 0.46929463744163513, + "learning_rate": 4.976161083526979e-06, + "loss": 0.6554, + "step": 2987 + }, + { + "epoch": 0.2727770677378127, + "grad_norm": 0.46521538496017456, + "learning_rate": 4.976144592322572e-06, + "loss": 0.6062, + "step": 2988 + }, + { + "epoch": 0.27286835859046926, + "grad_norm": 0.49698391556739807, + "learning_rate": 4.976128095443366e-06, + "loss": 0.6021, + "step": 2989 + }, + { + "epoch": 0.2729596494431258, + "grad_norm": 0.49383336305618286, + "learning_rate": 4.976111592889402e-06, + "loss": 0.5965, + "step": 2990 + }, + { + "epoch": 0.27305094029578236, + "grad_norm": 0.4996783435344696, + "learning_rate": 4.976095084660716e-06, + "loss": 0.6122, + "step": 2991 + }, + { + "epoch": 0.27314223114843894, + "grad_norm": 0.4399087727069855, + "learning_rate": 4.976078570757346e-06, + "loss": 0.6072, + "step": 2992 + }, + { + "epoch": 0.27323352200109546, + "grad_norm": 0.4892672002315521, + "learning_rate": 4.97606205117933e-06, + "loss": 0.6163, + "step": 2993 + }, + { + "epoch": 0.27332481285375204, + "grad_norm": 0.4647049605846405, + "learning_rate": 4.976045525926706e-06, + "loss": 0.6032, + "step": 2994 + }, + { + "epoch": 0.2734161037064086, + "grad_norm": 0.5088698863983154, + "learning_rate": 4.976028994999512e-06, + "loss": 0.622, + "step": 2995 + }, + { + "epoch": 0.2735073945590652, + "grad_norm": 0.4741155505180359, + "learning_rate": 4.976012458397786e-06, + "loss": 0.6074, + "step": 2996 + }, + { + "epoch": 0.2735986854117217, + "grad_norm": 0.46588972210884094, + "learning_rate": 4.975995916121565e-06, + "loss": 0.6071, + "step": 2997 + }, + { + "epoch": 0.2736899762643783, + "grad_norm": 0.49660295248031616, + "learning_rate": 4.975979368170887e-06, + "loss": 0.5807, + "step": 2998 + }, + { + "epoch": 0.2737812671170349, + "grad_norm": 0.48153671622276306, + "learning_rate": 4.975962814545792e-06, + "loss": 0.6136, + "step": 2999 + }, + { + "epoch": 0.27387255796969145, + "grad_norm": 0.473204642534256, + "learning_rate": 4.9759462552463145e-06, + "loss": 0.5861, + "step": 3000 + }, + { + "epoch": 0.273963848822348, + "grad_norm": 0.4653512239456177, + "learning_rate": 4.975929690272496e-06, + "loss": 0.6058, + "step": 3001 + }, + { + "epoch": 0.27405513967500456, + "grad_norm": 0.4483414888381958, + "learning_rate": 4.975913119624372e-06, + "loss": 0.6406, + "step": 3002 + }, + { + "epoch": 0.27414643052766113, + "grad_norm": 0.4677327871322632, + "learning_rate": 4.97589654330198e-06, + "loss": 0.5874, + "step": 3003 + }, + { + "epoch": 0.2742377213803177, + "grad_norm": 0.4446532130241394, + "learning_rate": 4.975879961305361e-06, + "loss": 0.6135, + "step": 3004 + }, + { + "epoch": 0.27432901223297423, + "grad_norm": 0.45806625485420227, + "learning_rate": 4.975863373634551e-06, + "loss": 0.6111, + "step": 3005 + }, + { + "epoch": 0.2744203030856308, + "grad_norm": 0.48224693536758423, + "learning_rate": 4.975846780289587e-06, + "loss": 0.6055, + "step": 3006 + }, + { + "epoch": 0.2745115939382874, + "grad_norm": 0.45042598247528076, + "learning_rate": 4.975830181270509e-06, + "loss": 0.6025, + "step": 3007 + }, + { + "epoch": 0.27460288479094397, + "grad_norm": 0.4709894359111786, + "learning_rate": 4.975813576577355e-06, + "loss": 0.6101, + "step": 3008 + }, + { + "epoch": 0.2746941756436005, + "grad_norm": 0.4440993070602417, + "learning_rate": 4.975796966210162e-06, + "loss": 0.6233, + "step": 3009 + }, + { + "epoch": 0.27478546649625707, + "grad_norm": 0.4551164507865906, + "learning_rate": 4.975780350168968e-06, + "loss": 0.6115, + "step": 3010 + }, + { + "epoch": 0.27487675734891365, + "grad_norm": 0.4965357482433319, + "learning_rate": 4.975763728453812e-06, + "loss": 0.5762, + "step": 3011 + }, + { + "epoch": 0.2749680482015702, + "grad_norm": 0.4971238076686859, + "learning_rate": 4.97574710106473e-06, + "loss": 0.572, + "step": 3012 + }, + { + "epoch": 0.27505933905422675, + "grad_norm": 0.48520660400390625, + "learning_rate": 4.975730468001764e-06, + "loss": 0.6118, + "step": 3013 + }, + { + "epoch": 0.2751506299068833, + "grad_norm": 0.5266100764274597, + "learning_rate": 4.975713829264949e-06, + "loss": 0.5502, + "step": 3014 + }, + { + "epoch": 0.2752419207595399, + "grad_norm": 0.4772391617298126, + "learning_rate": 4.975697184854324e-06, + "loss": 0.6011, + "step": 3015 + }, + { + "epoch": 0.2753332116121965, + "grad_norm": 0.5183457136154175, + "learning_rate": 4.975680534769926e-06, + "loss": 0.6267, + "step": 3016 + }, + { + "epoch": 0.275424502464853, + "grad_norm": 0.5007002949714661, + "learning_rate": 4.975663879011796e-06, + "loss": 0.6563, + "step": 3017 + }, + { + "epoch": 0.2755157933175096, + "grad_norm": 0.4910239279270172, + "learning_rate": 4.975647217579969e-06, + "loss": 0.5921, + "step": 3018 + }, + { + "epoch": 0.27560708417016616, + "grad_norm": 0.49853748083114624, + "learning_rate": 4.975630550474486e-06, + "loss": 0.574, + "step": 3019 + }, + { + "epoch": 0.27569837502282274, + "grad_norm": 0.4597960412502289, + "learning_rate": 4.975613877695383e-06, + "loss": 0.6094, + "step": 3020 + }, + { + "epoch": 0.27578966587547926, + "grad_norm": 0.46740269660949707, + "learning_rate": 4.9755971992426995e-06, + "loss": 0.5996, + "step": 3021 + }, + { + "epoch": 0.27588095672813584, + "grad_norm": 0.522997260093689, + "learning_rate": 4.975580515116473e-06, + "loss": 0.5971, + "step": 3022 + }, + { + "epoch": 0.2759722475807924, + "grad_norm": 0.47749146819114685, + "learning_rate": 4.975563825316742e-06, + "loss": 0.595, + "step": 3023 + }, + { + "epoch": 0.276063538433449, + "grad_norm": 0.5053848624229431, + "learning_rate": 4.975547129843544e-06, + "loss": 0.6095, + "step": 3024 + }, + { + "epoch": 0.2761548292861055, + "grad_norm": 0.4691961407661438, + "learning_rate": 4.975530428696919e-06, + "loss": 0.6069, + "step": 3025 + }, + { + "epoch": 0.2762461201387621, + "grad_norm": 0.4659302532672882, + "learning_rate": 4.975513721876904e-06, + "loss": 0.5801, + "step": 3026 + }, + { + "epoch": 0.2763374109914187, + "grad_norm": 0.4777376353740692, + "learning_rate": 4.975497009383538e-06, + "loss": 0.6028, + "step": 3027 + }, + { + "epoch": 0.2764287018440752, + "grad_norm": 0.4679165780544281, + "learning_rate": 4.975480291216858e-06, + "loss": 0.6181, + "step": 3028 + }, + { + "epoch": 0.2765199926967318, + "grad_norm": 0.48329129815101624, + "learning_rate": 4.975463567376904e-06, + "loss": 0.5808, + "step": 3029 + }, + { + "epoch": 0.27661128354938835, + "grad_norm": 0.46167272329330444, + "learning_rate": 4.975446837863713e-06, + "loss": 0.596, + "step": 3030 + }, + { + "epoch": 0.27670257440204493, + "grad_norm": 0.4751459062099457, + "learning_rate": 4.975430102677324e-06, + "loss": 0.609, + "step": 3031 + }, + { + "epoch": 0.27679386525470145, + "grad_norm": 0.48992881178855896, + "learning_rate": 4.975413361817775e-06, + "loss": 0.5998, + "step": 3032 + }, + { + "epoch": 0.27688515610735803, + "grad_norm": 0.46846482157707214, + "learning_rate": 4.9753966152851045e-06, + "loss": 0.6022, + "step": 3033 + }, + { + "epoch": 0.2769764469600146, + "grad_norm": 0.4947679340839386, + "learning_rate": 4.975379863079352e-06, + "loss": 0.6034, + "step": 3034 + }, + { + "epoch": 0.2770677378126712, + "grad_norm": 0.5248109102249146, + "learning_rate": 4.975363105200554e-06, + "loss": 0.6412, + "step": 3035 + }, + { + "epoch": 0.2771590286653277, + "grad_norm": 0.5131681561470032, + "learning_rate": 4.975346341648749e-06, + "loss": 0.5507, + "step": 3036 + }, + { + "epoch": 0.2772503195179843, + "grad_norm": 0.41495391726493835, + "learning_rate": 4.975329572423977e-06, + "loss": 0.5807, + "step": 3037 + }, + { + "epoch": 0.27734161037064087, + "grad_norm": 0.483031690120697, + "learning_rate": 4.975312797526276e-06, + "loss": 0.6207, + "step": 3038 + }, + { + "epoch": 0.27743290122329745, + "grad_norm": 0.47318151593208313, + "learning_rate": 4.975296016955683e-06, + "loss": 0.5801, + "step": 3039 + }, + { + "epoch": 0.27752419207595397, + "grad_norm": 0.4828239381313324, + "learning_rate": 4.9752792307122385e-06, + "loss": 0.5847, + "step": 3040 + }, + { + "epoch": 0.27761548292861055, + "grad_norm": 0.5396600961685181, + "learning_rate": 4.9752624387959805e-06, + "loss": 0.5526, + "step": 3041 + }, + { + "epoch": 0.2777067737812671, + "grad_norm": 0.47922182083129883, + "learning_rate": 4.9752456412069455e-06, + "loss": 0.6126, + "step": 3042 + }, + { + "epoch": 0.2777980646339237, + "grad_norm": 0.42104944586753845, + "learning_rate": 4.975228837945174e-06, + "loss": 0.5784, + "step": 3043 + }, + { + "epoch": 0.2778893554865802, + "grad_norm": 0.4789898693561554, + "learning_rate": 4.9752120290107045e-06, + "loss": 0.5929, + "step": 3044 + }, + { + "epoch": 0.2779806463392368, + "grad_norm": 0.42854636907577515, + "learning_rate": 4.975195214403574e-06, + "loss": 0.6754, + "step": 3045 + }, + { + "epoch": 0.2780719371918934, + "grad_norm": 0.48140767216682434, + "learning_rate": 4.975178394123822e-06, + "loss": 0.6467, + "step": 3046 + }, + { + "epoch": 0.27816322804454996, + "grad_norm": 0.47101786732673645, + "learning_rate": 4.975161568171488e-06, + "loss": 0.6122, + "step": 3047 + }, + { + "epoch": 0.2782545188972065, + "grad_norm": 0.42851072549819946, + "learning_rate": 4.9751447365466095e-06, + "loss": 0.6385, + "step": 3048 + }, + { + "epoch": 0.27834580974986306, + "grad_norm": 0.4822206199169159, + "learning_rate": 4.975127899249225e-06, + "loss": 0.5732, + "step": 3049 + }, + { + "epoch": 0.27843710060251964, + "grad_norm": 0.493317574262619, + "learning_rate": 4.975111056279373e-06, + "loss": 0.6126, + "step": 3050 + }, + { + "epoch": 0.2785283914551762, + "grad_norm": 0.4671173691749573, + "learning_rate": 4.975094207637092e-06, + "loss": 0.604, + "step": 3051 + }, + { + "epoch": 0.27861968230783274, + "grad_norm": 0.4563228487968445, + "learning_rate": 4.975077353322423e-06, + "loss": 0.6119, + "step": 3052 + }, + { + "epoch": 0.2787109731604893, + "grad_norm": 0.48210033774375916, + "learning_rate": 4.975060493335401e-06, + "loss": 0.5982, + "step": 3053 + }, + { + "epoch": 0.2788022640131459, + "grad_norm": 0.46558648347854614, + "learning_rate": 4.975043627676066e-06, + "loss": 0.6222, + "step": 3054 + }, + { + "epoch": 0.2788935548658025, + "grad_norm": 0.5015448331832886, + "learning_rate": 4.975026756344458e-06, + "loss": 0.5787, + "step": 3055 + }, + { + "epoch": 0.278984845718459, + "grad_norm": 0.4885419011116028, + "learning_rate": 4.975009879340613e-06, + "loss": 0.5732, + "step": 3056 + }, + { + "epoch": 0.2790761365711156, + "grad_norm": 0.4371364712715149, + "learning_rate": 4.974992996664573e-06, + "loss": 0.625, + "step": 3057 + }, + { + "epoch": 0.27916742742377215, + "grad_norm": 0.47750428318977356, + "learning_rate": 4.9749761083163735e-06, + "loss": 0.5847, + "step": 3058 + }, + { + "epoch": 0.2792587182764287, + "grad_norm": 0.47255992889404297, + "learning_rate": 4.974959214296056e-06, + "loss": 0.5823, + "step": 3059 + }, + { + "epoch": 0.27935000912908525, + "grad_norm": 0.4635242819786072, + "learning_rate": 4.974942314603657e-06, + "loss": 0.5935, + "step": 3060 + }, + { + "epoch": 0.27944129998174183, + "grad_norm": 0.46764615178108215, + "learning_rate": 4.974925409239217e-06, + "loss": 0.6455, + "step": 3061 + }, + { + "epoch": 0.2795325908343984, + "grad_norm": 0.45795759558677673, + "learning_rate": 4.974908498202773e-06, + "loss": 0.6245, + "step": 3062 + }, + { + "epoch": 0.27962388168705493, + "grad_norm": 0.42427945137023926, + "learning_rate": 4.974891581494365e-06, + "loss": 0.6398, + "step": 3063 + }, + { + "epoch": 0.2797151725397115, + "grad_norm": 0.4933362603187561, + "learning_rate": 4.974874659114032e-06, + "loss": 0.5685, + "step": 3064 + }, + { + "epoch": 0.2798064633923681, + "grad_norm": 0.4652724862098694, + "learning_rate": 4.974857731061811e-06, + "loss": 0.6069, + "step": 3065 + }, + { + "epoch": 0.27989775424502467, + "grad_norm": 0.468755304813385, + "learning_rate": 4.974840797337742e-06, + "loss": 0.5953, + "step": 3066 + }, + { + "epoch": 0.2799890450976812, + "grad_norm": 0.4884464144706726, + "learning_rate": 4.974823857941865e-06, + "loss": 0.5752, + "step": 3067 + }, + { + "epoch": 0.28008033595033777, + "grad_norm": 0.4673088490962982, + "learning_rate": 4.974806912874217e-06, + "loss": 0.6313, + "step": 3068 + }, + { + "epoch": 0.28017162680299434, + "grad_norm": 0.46047455072402954, + "learning_rate": 4.974789962134837e-06, + "loss": 0.6176, + "step": 3069 + }, + { + "epoch": 0.2802629176556509, + "grad_norm": 0.4701029658317566, + "learning_rate": 4.974773005723764e-06, + "loss": 0.6033, + "step": 3070 + }, + { + "epoch": 0.28035420850830745, + "grad_norm": 0.479758620262146, + "learning_rate": 4.974756043641038e-06, + "loss": 0.619, + "step": 3071 + }, + { + "epoch": 0.280445499360964, + "grad_norm": 0.48008662462234497, + "learning_rate": 4.9747390758866965e-06, + "loss": 0.5969, + "step": 3072 + }, + { + "epoch": 0.2805367902136206, + "grad_norm": 0.4290524125099182, + "learning_rate": 4.974722102460779e-06, + "loss": 0.5691, + "step": 3073 + }, + { + "epoch": 0.2806280810662772, + "grad_norm": 0.45727407932281494, + "learning_rate": 4.974705123363325e-06, + "loss": 0.6202, + "step": 3074 + }, + { + "epoch": 0.2807193719189337, + "grad_norm": 0.45720091462135315, + "learning_rate": 4.9746881385943714e-06, + "loss": 0.5676, + "step": 3075 + }, + { + "epoch": 0.2808106627715903, + "grad_norm": 0.44647330045700073, + "learning_rate": 4.974671148153959e-06, + "loss": 0.6041, + "step": 3076 + }, + { + "epoch": 0.28090195362424686, + "grad_norm": 0.46377241611480713, + "learning_rate": 4.974654152042127e-06, + "loss": 0.6501, + "step": 3077 + }, + { + "epoch": 0.28099324447690344, + "grad_norm": 0.44824323058128357, + "learning_rate": 4.974637150258913e-06, + "loss": 0.6028, + "step": 3078 + }, + { + "epoch": 0.28108453532955996, + "grad_norm": 0.4655210077762604, + "learning_rate": 4.974620142804355e-06, + "loss": 0.6162, + "step": 3079 + }, + { + "epoch": 0.28117582618221654, + "grad_norm": 0.4797835350036621, + "learning_rate": 4.974603129678496e-06, + "loss": 0.6071, + "step": 3080 + }, + { + "epoch": 0.2812671170348731, + "grad_norm": 0.4456099569797516, + "learning_rate": 4.974586110881371e-06, + "loss": 0.626, + "step": 3081 + }, + { + "epoch": 0.2813584078875297, + "grad_norm": 0.4794459044933319, + "learning_rate": 4.9745690864130205e-06, + "loss": 0.5845, + "step": 3082 + }, + { + "epoch": 0.2814496987401862, + "grad_norm": 0.4519578218460083, + "learning_rate": 4.9745520562734836e-06, + "loss": 0.5972, + "step": 3083 + }, + { + "epoch": 0.2815409895928428, + "grad_norm": 0.47315308451652527, + "learning_rate": 4.974535020462799e-06, + "loss": 0.6003, + "step": 3084 + }, + { + "epoch": 0.28163228044549937, + "grad_norm": 0.4844590723514557, + "learning_rate": 4.974517978981006e-06, + "loss": 0.5785, + "step": 3085 + }, + { + "epoch": 0.28172357129815595, + "grad_norm": 0.4955112338066101, + "learning_rate": 4.974500931828144e-06, + "loss": 0.6186, + "step": 3086 + }, + { + "epoch": 0.2818148621508125, + "grad_norm": 0.4639091491699219, + "learning_rate": 4.974483879004251e-06, + "loss": 0.6111, + "step": 3087 + }, + { + "epoch": 0.28190615300346905, + "grad_norm": 0.44525545835494995, + "learning_rate": 4.974466820509367e-06, + "loss": 0.6024, + "step": 3088 + }, + { + "epoch": 0.28199744385612563, + "grad_norm": 0.5008748173713684, + "learning_rate": 4.974449756343532e-06, + "loss": 0.612, + "step": 3089 + }, + { + "epoch": 0.2820887347087822, + "grad_norm": 0.45460307598114014, + "learning_rate": 4.974432686506783e-06, + "loss": 0.627, + "step": 3090 + }, + { + "epoch": 0.28218002556143873, + "grad_norm": 0.4798937737941742, + "learning_rate": 4.97441561099916e-06, + "loss": 0.6072, + "step": 3091 + }, + { + "epoch": 0.2822713164140953, + "grad_norm": 0.48987817764282227, + "learning_rate": 4.974398529820702e-06, + "loss": 0.5985, + "step": 3092 + }, + { + "epoch": 0.2823626072667519, + "grad_norm": 0.4883829653263092, + "learning_rate": 4.974381442971449e-06, + "loss": 0.6009, + "step": 3093 + }, + { + "epoch": 0.2824538981194084, + "grad_norm": 0.4971475899219513, + "learning_rate": 4.974364350451439e-06, + "loss": 0.5607, + "step": 3094 + }, + { + "epoch": 0.282545188972065, + "grad_norm": 0.4836571216583252, + "learning_rate": 4.9743472522607115e-06, + "loss": 0.5837, + "step": 3095 + }, + { + "epoch": 0.28263647982472156, + "grad_norm": 0.46974828839302063, + "learning_rate": 4.974330148399307e-06, + "loss": 0.6435, + "step": 3096 + }, + { + "epoch": 0.28272777067737814, + "grad_norm": 0.47914791107177734, + "learning_rate": 4.974313038867262e-06, + "loss": 0.6054, + "step": 3097 + }, + { + "epoch": 0.28281906153003467, + "grad_norm": 0.5040613412857056, + "learning_rate": 4.974295923664618e-06, + "loss": 0.5599, + "step": 3098 + }, + { + "epoch": 0.28291035238269124, + "grad_norm": 0.5087085366249084, + "learning_rate": 4.974278802791414e-06, + "loss": 0.5882, + "step": 3099 + }, + { + "epoch": 0.2830016432353478, + "grad_norm": 0.4828808307647705, + "learning_rate": 4.974261676247688e-06, + "loss": 0.5832, + "step": 3100 + }, + { + "epoch": 0.2830929340880044, + "grad_norm": 0.4717574417591095, + "learning_rate": 4.974244544033481e-06, + "loss": 0.622, + "step": 3101 + }, + { + "epoch": 0.2831842249406609, + "grad_norm": 0.45603927969932556, + "learning_rate": 4.974227406148831e-06, + "loss": 0.6106, + "step": 3102 + }, + { + "epoch": 0.2832755157933175, + "grad_norm": 0.5062204599380493, + "learning_rate": 4.974210262593776e-06, + "loss": 0.5727, + "step": 3103 + }, + { + "epoch": 0.2833668066459741, + "grad_norm": 0.5103610754013062, + "learning_rate": 4.974193113368358e-06, + "loss": 0.6016, + "step": 3104 + }, + { + "epoch": 0.28345809749863066, + "grad_norm": 0.50301194190979, + "learning_rate": 4.9741759584726156e-06, + "loss": 0.589, + "step": 3105 + }, + { + "epoch": 0.2835493883512872, + "grad_norm": 0.4918568432331085, + "learning_rate": 4.974158797906586e-06, + "loss": 0.5981, + "step": 3106 + }, + { + "epoch": 0.28364067920394376, + "grad_norm": 0.4569546580314636, + "learning_rate": 4.974141631670312e-06, + "loss": 0.6441, + "step": 3107 + }, + { + "epoch": 0.28373197005660034, + "grad_norm": 0.47353893518447876, + "learning_rate": 4.974124459763831e-06, + "loss": 0.6032, + "step": 3108 + }, + { + "epoch": 0.2838232609092569, + "grad_norm": 0.4768289625644684, + "learning_rate": 4.974107282187182e-06, + "loss": 0.5973, + "step": 3109 + }, + { + "epoch": 0.28391455176191344, + "grad_norm": 0.47221896052360535, + "learning_rate": 4.974090098940404e-06, + "loss": 0.5869, + "step": 3110 + }, + { + "epoch": 0.28400584261457, + "grad_norm": 0.4645223021507263, + "learning_rate": 4.974072910023538e-06, + "loss": 0.5858, + "step": 3111 + }, + { + "epoch": 0.2840971334672266, + "grad_norm": 0.43203189969062805, + "learning_rate": 4.974055715436622e-06, + "loss": 0.6059, + "step": 3112 + }, + { + "epoch": 0.28418842431988317, + "grad_norm": 0.4874393045902252, + "learning_rate": 4.9740385151796966e-06, + "loss": 0.6419, + "step": 3113 + }, + { + "epoch": 0.2842797151725397, + "grad_norm": 0.4608013331890106, + "learning_rate": 4.9740213092528015e-06, + "loss": 0.5733, + "step": 3114 + }, + { + "epoch": 0.28437100602519627, + "grad_norm": 0.484683632850647, + "learning_rate": 4.974004097655974e-06, + "loss": 0.5916, + "step": 3115 + }, + { + "epoch": 0.28446229687785285, + "grad_norm": 0.50804203748703, + "learning_rate": 4.973986880389255e-06, + "loss": 0.5944, + "step": 3116 + }, + { + "epoch": 0.2845535877305094, + "grad_norm": 0.47579827904701233, + "learning_rate": 4.973969657452684e-06, + "loss": 0.5843, + "step": 3117 + }, + { + "epoch": 0.28464487858316595, + "grad_norm": 0.46108633279800415, + "learning_rate": 4.9739524288463e-06, + "loss": 0.6103, + "step": 3118 + }, + { + "epoch": 0.28473616943582253, + "grad_norm": 0.4693218171596527, + "learning_rate": 4.9739351945701424e-06, + "loss": 0.5937, + "step": 3119 + }, + { + "epoch": 0.2848274602884791, + "grad_norm": 0.5112746953964233, + "learning_rate": 4.973917954624252e-06, + "loss": 0.5864, + "step": 3120 + }, + { + "epoch": 0.2849187511411357, + "grad_norm": 0.4500800371170044, + "learning_rate": 4.973900709008667e-06, + "loss": 0.5822, + "step": 3121 + }, + { + "epoch": 0.2850100419937922, + "grad_norm": 0.47370442748069763, + "learning_rate": 4.973883457723427e-06, + "loss": 0.6197, + "step": 3122 + }, + { + "epoch": 0.2851013328464488, + "grad_norm": 0.4681421220302582, + "learning_rate": 4.973866200768572e-06, + "loss": 0.6019, + "step": 3123 + }, + { + "epoch": 0.28519262369910536, + "grad_norm": 0.4927421510219574, + "learning_rate": 4.9738489381441404e-06, + "loss": 0.6104, + "step": 3124 + }, + { + "epoch": 0.2852839145517619, + "grad_norm": 0.4855537712574005, + "learning_rate": 4.973831669850174e-06, + "loss": 0.5572, + "step": 3125 + }, + { + "epoch": 0.28537520540441846, + "grad_norm": 0.42326831817626953, + "learning_rate": 4.97381439588671e-06, + "loss": 0.6069, + "step": 3126 + }, + { + "epoch": 0.28546649625707504, + "grad_norm": 0.45134207606315613, + "learning_rate": 4.97379711625379e-06, + "loss": 0.6387, + "step": 3127 + }, + { + "epoch": 0.2855577871097316, + "grad_norm": 0.4657342731952667, + "learning_rate": 4.973779830951451e-06, + "loss": 0.6178, + "step": 3128 + }, + { + "epoch": 0.28564907796238814, + "grad_norm": 0.4653412103652954, + "learning_rate": 4.973762539979737e-06, + "loss": 0.587, + "step": 3129 + }, + { + "epoch": 0.2857403688150447, + "grad_norm": 0.464657723903656, + "learning_rate": 4.973745243338684e-06, + "loss": 0.6052, + "step": 3130 + }, + { + "epoch": 0.2858316596677013, + "grad_norm": 0.49311962723731995, + "learning_rate": 4.9737279410283315e-06, + "loss": 0.593, + "step": 3131 + }, + { + "epoch": 0.2859229505203579, + "grad_norm": 0.48461398482322693, + "learning_rate": 4.973710633048721e-06, + "loss": 0.5857, + "step": 3132 + }, + { + "epoch": 0.2860142413730144, + "grad_norm": 0.45093947649002075, + "learning_rate": 4.9736933193998905e-06, + "loss": 0.6208, + "step": 3133 + }, + { + "epoch": 0.286105532225671, + "grad_norm": 0.466549813747406, + "learning_rate": 4.9736760000818815e-06, + "loss": 0.5915, + "step": 3134 + }, + { + "epoch": 0.28619682307832756, + "grad_norm": 0.49668243527412415, + "learning_rate": 4.973658675094733e-06, + "loss": 0.5458, + "step": 3135 + }, + { + "epoch": 0.28628811393098413, + "grad_norm": 0.4725465178489685, + "learning_rate": 4.973641344438483e-06, + "loss": 0.6018, + "step": 3136 + }, + { + "epoch": 0.28637940478364066, + "grad_norm": 0.4356968402862549, + "learning_rate": 4.973624008113174e-06, + "loss": 0.6296, + "step": 3137 + }, + { + "epoch": 0.28647069563629723, + "grad_norm": 0.47179123759269714, + "learning_rate": 4.973606666118843e-06, + "loss": 0.5853, + "step": 3138 + }, + { + "epoch": 0.2865619864889538, + "grad_norm": 0.5214576721191406, + "learning_rate": 4.973589318455533e-06, + "loss": 0.5745, + "step": 3139 + }, + { + "epoch": 0.2866532773416104, + "grad_norm": 0.4400362968444824, + "learning_rate": 4.9735719651232805e-06, + "loss": 0.6074, + "step": 3140 + }, + { + "epoch": 0.2867445681942669, + "grad_norm": 0.4826790690422058, + "learning_rate": 4.973554606122128e-06, + "loss": 0.5832, + "step": 3141 + }, + { + "epoch": 0.2868358590469235, + "grad_norm": 0.4720676839351654, + "learning_rate": 4.973537241452112e-06, + "loss": 0.6287, + "step": 3142 + }, + { + "epoch": 0.28692714989958007, + "grad_norm": 0.45942798256874084, + "learning_rate": 4.973519871113275e-06, + "loss": 0.6122, + "step": 3143 + }, + { + "epoch": 0.28701844075223665, + "grad_norm": 0.5357465744018555, + "learning_rate": 4.9735024951056565e-06, + "loss": 0.549, + "step": 3144 + }, + { + "epoch": 0.28710973160489317, + "grad_norm": 0.4999094009399414, + "learning_rate": 4.973485113429296e-06, + "loss": 0.5755, + "step": 3145 + }, + { + "epoch": 0.28720102245754975, + "grad_norm": 0.4785275161266327, + "learning_rate": 4.973467726084233e-06, + "loss": 0.571, + "step": 3146 + }, + { + "epoch": 0.2872923133102063, + "grad_norm": 0.4595888555049896, + "learning_rate": 4.973450333070508e-06, + "loss": 0.5703, + "step": 3147 + }, + { + "epoch": 0.2873836041628629, + "grad_norm": 0.4760652482509613, + "learning_rate": 4.9734329343881596e-06, + "loss": 0.6128, + "step": 3148 + }, + { + "epoch": 0.2874748950155194, + "grad_norm": 0.4936378598213196, + "learning_rate": 4.973415530037229e-06, + "loss": 0.5981, + "step": 3149 + }, + { + "epoch": 0.287566185868176, + "grad_norm": 0.48981308937072754, + "learning_rate": 4.973398120017755e-06, + "loss": 0.5851, + "step": 3150 + }, + { + "epoch": 0.2876574767208326, + "grad_norm": 0.4627302587032318, + "learning_rate": 4.973380704329778e-06, + "loss": 0.5907, + "step": 3151 + }, + { + "epoch": 0.28774876757348916, + "grad_norm": 0.49763208627700806, + "learning_rate": 4.973363282973338e-06, + "loss": 0.5916, + "step": 3152 + }, + { + "epoch": 0.2878400584261457, + "grad_norm": 0.4870164096355438, + "learning_rate": 4.973345855948476e-06, + "loss": 0.5911, + "step": 3153 + }, + { + "epoch": 0.28793134927880226, + "grad_norm": 0.43314048647880554, + "learning_rate": 4.97332842325523e-06, + "loss": 0.6118, + "step": 3154 + }, + { + "epoch": 0.28802264013145884, + "grad_norm": 0.5096493363380432, + "learning_rate": 4.973310984893641e-06, + "loss": 0.5893, + "step": 3155 + }, + { + "epoch": 0.2881139309841154, + "grad_norm": 0.4782969355583191, + "learning_rate": 4.973293540863749e-06, + "loss": 0.5981, + "step": 3156 + }, + { + "epoch": 0.28820522183677194, + "grad_norm": 0.4619390368461609, + "learning_rate": 4.973276091165594e-06, + "loss": 0.631, + "step": 3157 + }, + { + "epoch": 0.2882965126894285, + "grad_norm": 0.4668877422809601, + "learning_rate": 4.973258635799215e-06, + "loss": 0.6023, + "step": 3158 + }, + { + "epoch": 0.2883878035420851, + "grad_norm": 0.5000399351119995, + "learning_rate": 4.973241174764653e-06, + "loss": 0.5917, + "step": 3159 + }, + { + "epoch": 0.2884790943947416, + "grad_norm": 0.47133058309555054, + "learning_rate": 4.973223708061949e-06, + "loss": 0.6106, + "step": 3160 + }, + { + "epoch": 0.2885703852473982, + "grad_norm": 0.41934362053871155, + "learning_rate": 4.973206235691139e-06, + "loss": 0.6599, + "step": 3161 + }, + { + "epoch": 0.2886616761000548, + "grad_norm": 0.48910748958587646, + "learning_rate": 4.973188757652268e-06, + "loss": 0.5734, + "step": 3162 + }, + { + "epoch": 0.28875296695271135, + "grad_norm": 0.47329744696617126, + "learning_rate": 4.973171273945374e-06, + "loss": 0.616, + "step": 3163 + }, + { + "epoch": 0.2888442578053679, + "grad_norm": 0.480759859085083, + "learning_rate": 4.9731537845704955e-06, + "loss": 0.5687, + "step": 3164 + }, + { + "epoch": 0.28893554865802445, + "grad_norm": 0.4739513695240021, + "learning_rate": 4.973136289527675e-06, + "loss": 0.5837, + "step": 3165 + }, + { + "epoch": 0.28902683951068103, + "grad_norm": 0.47612422704696655, + "learning_rate": 4.973118788816952e-06, + "loss": 0.6165, + "step": 3166 + }, + { + "epoch": 0.2891181303633376, + "grad_norm": 0.48888447880744934, + "learning_rate": 4.973101282438366e-06, + "loss": 0.5892, + "step": 3167 + }, + { + "epoch": 0.28920942121599413, + "grad_norm": 0.45912691950798035, + "learning_rate": 4.973083770391957e-06, + "loss": 0.6356, + "step": 3168 + }, + { + "epoch": 0.2893007120686507, + "grad_norm": 0.5019619464874268, + "learning_rate": 4.973066252677765e-06, + "loss": 0.5718, + "step": 3169 + }, + { + "epoch": 0.2893920029213073, + "grad_norm": 0.4780627489089966, + "learning_rate": 4.973048729295832e-06, + "loss": 0.6053, + "step": 3170 + }, + { + "epoch": 0.28948329377396387, + "grad_norm": 0.46163010597229004, + "learning_rate": 4.973031200246195e-06, + "loss": 0.6192, + "step": 3171 + }, + { + "epoch": 0.2895745846266204, + "grad_norm": 0.44469746947288513, + "learning_rate": 4.973013665528898e-06, + "loss": 0.6443, + "step": 3172 + }, + { + "epoch": 0.28966587547927697, + "grad_norm": 0.47906920313835144, + "learning_rate": 4.9729961251439785e-06, + "loss": 0.608, + "step": 3173 + }, + { + "epoch": 0.28975716633193355, + "grad_norm": 0.49761319160461426, + "learning_rate": 4.972978579091476e-06, + "loss": 0.5792, + "step": 3174 + }, + { + "epoch": 0.2898484571845901, + "grad_norm": 0.4323689043521881, + "learning_rate": 4.972961027371434e-06, + "loss": 0.6323, + "step": 3175 + }, + { + "epoch": 0.28993974803724665, + "grad_norm": 0.4944392740726471, + "learning_rate": 4.97294346998389e-06, + "loss": 0.5524, + "step": 3176 + }, + { + "epoch": 0.2900310388899032, + "grad_norm": 0.4436018168926239, + "learning_rate": 4.972925906928885e-06, + "loss": 0.6293, + "step": 3177 + }, + { + "epoch": 0.2901223297425598, + "grad_norm": 0.49196290969848633, + "learning_rate": 4.972908338206459e-06, + "loss": 0.5666, + "step": 3178 + }, + { + "epoch": 0.2902136205952164, + "grad_norm": 0.4933280348777771, + "learning_rate": 4.972890763816653e-06, + "loss": 0.5835, + "step": 3179 + }, + { + "epoch": 0.2903049114478729, + "grad_norm": 0.4504285156726837, + "learning_rate": 4.972873183759507e-06, + "loss": 0.6095, + "step": 3180 + }, + { + "epoch": 0.2903962023005295, + "grad_norm": 0.47135230898857117, + "learning_rate": 4.972855598035062e-06, + "loss": 0.5941, + "step": 3181 + }, + { + "epoch": 0.29048749315318606, + "grad_norm": 0.4732695519924164, + "learning_rate": 4.9728380066433555e-06, + "loss": 0.5956, + "step": 3182 + }, + { + "epoch": 0.29057878400584264, + "grad_norm": 0.4693377912044525, + "learning_rate": 4.97282040958443e-06, + "loss": 0.6088, + "step": 3183 + }, + { + "epoch": 0.29067007485849916, + "grad_norm": 0.48352280259132385, + "learning_rate": 4.972802806858327e-06, + "loss": 0.5828, + "step": 3184 + }, + { + "epoch": 0.29076136571115574, + "grad_norm": 0.4901971220970154, + "learning_rate": 4.9727851984650846e-06, + "loss": 0.5921, + "step": 3185 + }, + { + "epoch": 0.2908526565638123, + "grad_norm": 0.4826771020889282, + "learning_rate": 4.972767584404743e-06, + "loss": 0.5719, + "step": 3186 + }, + { + "epoch": 0.2909439474164689, + "grad_norm": 0.4405258893966675, + "learning_rate": 4.9727499646773445e-06, + "loss": 0.6378, + "step": 3187 + }, + { + "epoch": 0.2910352382691254, + "grad_norm": 0.48271217942237854, + "learning_rate": 4.972732339282929e-06, + "loss": 0.5757, + "step": 3188 + }, + { + "epoch": 0.291126529121782, + "grad_norm": 0.4922981262207031, + "learning_rate": 4.972714708221536e-06, + "loss": 0.6194, + "step": 3189 + }, + { + "epoch": 0.2912178199744386, + "grad_norm": 0.48437315225601196, + "learning_rate": 4.972697071493205e-06, + "loss": 0.5648, + "step": 3190 + }, + { + "epoch": 0.29130911082709515, + "grad_norm": 0.4675815999507904, + "learning_rate": 4.972679429097979e-06, + "loss": 0.6191, + "step": 3191 + }, + { + "epoch": 0.2914004016797517, + "grad_norm": 0.4844789206981659, + "learning_rate": 4.972661781035898e-06, + "loss": 0.5752, + "step": 3192 + }, + { + "epoch": 0.29149169253240825, + "grad_norm": 0.4508255124092102, + "learning_rate": 4.9726441273070004e-06, + "loss": 0.632, + "step": 3193 + }, + { + "epoch": 0.29158298338506483, + "grad_norm": 0.4831562936306, + "learning_rate": 4.972626467911328e-06, + "loss": 0.6143, + "step": 3194 + }, + { + "epoch": 0.29167427423772135, + "grad_norm": 0.4520168602466583, + "learning_rate": 4.972608802848922e-06, + "loss": 0.5893, + "step": 3195 + }, + { + "epoch": 0.29176556509037793, + "grad_norm": 0.48822709918022156, + "learning_rate": 4.9725911321198205e-06, + "loss": 0.591, + "step": 3196 + }, + { + "epoch": 0.2918568559430345, + "grad_norm": 0.4443199336528778, + "learning_rate": 4.972573455724067e-06, + "loss": 0.6041, + "step": 3197 + }, + { + "epoch": 0.2919481467956911, + "grad_norm": 0.4787430763244629, + "learning_rate": 4.972555773661699e-06, + "loss": 0.5578, + "step": 3198 + }, + { + "epoch": 0.2920394376483476, + "grad_norm": 0.46544089913368225, + "learning_rate": 4.97253808593276e-06, + "loss": 0.62, + "step": 3199 + }, + { + "epoch": 0.2921307285010042, + "grad_norm": 0.46276500821113586, + "learning_rate": 4.972520392537289e-06, + "loss": 0.6135, + "step": 3200 + }, + { + "epoch": 0.29222201935366077, + "grad_norm": 0.48396944999694824, + "learning_rate": 4.972502693475326e-06, + "loss": 0.615, + "step": 3201 + }, + { + "epoch": 0.29231331020631734, + "grad_norm": 0.49713781476020813, + "learning_rate": 4.9724849887469115e-06, + "loss": 0.5735, + "step": 3202 + }, + { + "epoch": 0.29240460105897387, + "grad_norm": 0.48020586371421814, + "learning_rate": 4.972467278352088e-06, + "loss": 0.5819, + "step": 3203 + }, + { + "epoch": 0.29249589191163045, + "grad_norm": 0.4725522994995117, + "learning_rate": 4.972449562290895e-06, + "loss": 0.6014, + "step": 3204 + }, + { + "epoch": 0.292587182764287, + "grad_norm": 0.46980589628219604, + "learning_rate": 4.972431840563372e-06, + "loss": 0.6068, + "step": 3205 + }, + { + "epoch": 0.2926784736169436, + "grad_norm": 0.48652490973472595, + "learning_rate": 4.9724141131695615e-06, + "loss": 0.6017, + "step": 3206 + }, + { + "epoch": 0.2927697644696001, + "grad_norm": 0.481448233127594, + "learning_rate": 4.972396380109504e-06, + "loss": 0.604, + "step": 3207 + }, + { + "epoch": 0.2928610553222567, + "grad_norm": 0.482835978269577, + "learning_rate": 4.972378641383237e-06, + "loss": 0.61, + "step": 3208 + }, + { + "epoch": 0.2929523461749133, + "grad_norm": 0.4416465163230896, + "learning_rate": 4.9723608969908046e-06, + "loss": 0.6363, + "step": 3209 + }, + { + "epoch": 0.29304363702756986, + "grad_norm": 0.4706776440143585, + "learning_rate": 4.972343146932247e-06, + "loss": 0.5907, + "step": 3210 + }, + { + "epoch": 0.2931349278802264, + "grad_norm": 0.4661322236061096, + "learning_rate": 4.972325391207603e-06, + "loss": 0.6343, + "step": 3211 + }, + { + "epoch": 0.29322621873288296, + "grad_norm": 0.5062664747238159, + "learning_rate": 4.972307629816916e-06, + "loss": 0.5481, + "step": 3212 + }, + { + "epoch": 0.29331750958553954, + "grad_norm": 0.4789634644985199, + "learning_rate": 4.9722898627602244e-06, + "loss": 0.6057, + "step": 3213 + }, + { + "epoch": 0.2934088004381961, + "grad_norm": 0.46519362926483154, + "learning_rate": 4.97227209003757e-06, + "loss": 0.539, + "step": 3214 + }, + { + "epoch": 0.29350009129085264, + "grad_norm": 0.45167165994644165, + "learning_rate": 4.972254311648993e-06, + "loss": 0.6387, + "step": 3215 + }, + { + "epoch": 0.2935913821435092, + "grad_norm": 0.45458629727363586, + "learning_rate": 4.972236527594535e-06, + "loss": 0.6057, + "step": 3216 + }, + { + "epoch": 0.2936826729961658, + "grad_norm": 0.4780203402042389, + "learning_rate": 4.9722187378742355e-06, + "loss": 0.5975, + "step": 3217 + }, + { + "epoch": 0.2937739638488224, + "grad_norm": 0.5017853379249573, + "learning_rate": 4.9722009424881364e-06, + "loss": 0.6009, + "step": 3218 + }, + { + "epoch": 0.2938652547014789, + "grad_norm": 0.464133620262146, + "learning_rate": 4.972183141436277e-06, + "loss": 0.5881, + "step": 3219 + }, + { + "epoch": 0.2939565455541355, + "grad_norm": 0.4672791063785553, + "learning_rate": 4.972165334718701e-06, + "loss": 0.62, + "step": 3220 + }, + { + "epoch": 0.29404783640679205, + "grad_norm": 0.46312040090560913, + "learning_rate": 4.972147522335447e-06, + "loss": 0.5682, + "step": 3221 + }, + { + "epoch": 0.29413912725944863, + "grad_norm": 0.49040526151657104, + "learning_rate": 4.972129704286556e-06, + "loss": 0.5896, + "step": 3222 + }, + { + "epoch": 0.29423041811210515, + "grad_norm": 0.4762204885482788, + "learning_rate": 4.972111880572068e-06, + "loss": 0.6019, + "step": 3223 + }, + { + "epoch": 0.29432170896476173, + "grad_norm": 0.4927850663661957, + "learning_rate": 4.972094051192026e-06, + "loss": 0.606, + "step": 3224 + }, + { + "epoch": 0.2944129998174183, + "grad_norm": 0.5066984295845032, + "learning_rate": 4.9720762161464684e-06, + "loss": 0.6044, + "step": 3225 + }, + { + "epoch": 0.29450429067007483, + "grad_norm": 0.4909193217754364, + "learning_rate": 4.972058375435439e-06, + "loss": 0.5703, + "step": 3226 + }, + { + "epoch": 0.2945955815227314, + "grad_norm": 0.5354705452919006, + "learning_rate": 4.972040529058976e-06, + "loss": 0.589, + "step": 3227 + }, + { + "epoch": 0.294686872375388, + "grad_norm": 0.49071168899536133, + "learning_rate": 4.972022677017122e-06, + "loss": 0.5896, + "step": 3228 + }, + { + "epoch": 0.29477816322804457, + "grad_norm": 0.4487537145614624, + "learning_rate": 4.972004819309917e-06, + "loss": 0.5704, + "step": 3229 + }, + { + "epoch": 0.2948694540807011, + "grad_norm": 0.47737014293670654, + "learning_rate": 4.971986955937402e-06, + "loss": 0.5882, + "step": 3230 + }, + { + "epoch": 0.29496074493335767, + "grad_norm": 0.4961264729499817, + "learning_rate": 4.971969086899619e-06, + "loss": 0.5746, + "step": 3231 + }, + { + "epoch": 0.29505203578601424, + "grad_norm": 0.4445248544216156, + "learning_rate": 4.971951212196608e-06, + "loss": 0.6309, + "step": 3232 + }, + { + "epoch": 0.2951433266386708, + "grad_norm": 0.49793708324432373, + "learning_rate": 4.97193333182841e-06, + "loss": 0.5666, + "step": 3233 + }, + { + "epoch": 0.29523461749132734, + "grad_norm": 0.4675620496273041, + "learning_rate": 4.971915445795066e-06, + "loss": 0.5478, + "step": 3234 + }, + { + "epoch": 0.2953259083439839, + "grad_norm": 0.45743876695632935, + "learning_rate": 4.971897554096616e-06, + "loss": 0.6129, + "step": 3235 + }, + { + "epoch": 0.2954171991966405, + "grad_norm": 0.4246242344379425, + "learning_rate": 4.971879656733103e-06, + "loss": 0.6515, + "step": 3236 + }, + { + "epoch": 0.2955084900492971, + "grad_norm": 0.47488829493522644, + "learning_rate": 4.971861753704567e-06, + "loss": 0.5777, + "step": 3237 + }, + { + "epoch": 0.2955997809019536, + "grad_norm": 0.44360533356666565, + "learning_rate": 4.97184384501105e-06, + "loss": 0.6018, + "step": 3238 + }, + { + "epoch": 0.2956910717546102, + "grad_norm": 0.45053261518478394, + "learning_rate": 4.971825930652591e-06, + "loss": 0.604, + "step": 3239 + }, + { + "epoch": 0.29578236260726676, + "grad_norm": 0.48532745242118835, + "learning_rate": 4.9718080106292324e-06, + "loss": 0.6067, + "step": 3240 + }, + { + "epoch": 0.29587365345992334, + "grad_norm": 0.44460195302963257, + "learning_rate": 4.971790084941015e-06, + "loss": 0.6095, + "step": 3241 + }, + { + "epoch": 0.29596494431257986, + "grad_norm": 0.43210479617118835, + "learning_rate": 4.9717721535879794e-06, + "loss": 0.6222, + "step": 3242 + }, + { + "epoch": 0.29605623516523644, + "grad_norm": 0.5024151802062988, + "learning_rate": 4.971754216570168e-06, + "loss": 0.5762, + "step": 3243 + }, + { + "epoch": 0.296147526017893, + "grad_norm": 0.4861738383769989, + "learning_rate": 4.971736273887621e-06, + "loss": 0.5774, + "step": 3244 + }, + { + "epoch": 0.2962388168705496, + "grad_norm": 0.5012685656547546, + "learning_rate": 4.97171832554038e-06, + "loss": 0.5827, + "step": 3245 + }, + { + "epoch": 0.2963301077232061, + "grad_norm": 0.45798060297966003, + "learning_rate": 4.971700371528486e-06, + "loss": 0.605, + "step": 3246 + }, + { + "epoch": 0.2964213985758627, + "grad_norm": 0.4319254159927368, + "learning_rate": 4.971682411851979e-06, + "loss": 0.623, + "step": 3247 + }, + { + "epoch": 0.29651268942851927, + "grad_norm": 0.4519096612930298, + "learning_rate": 4.9716644465109025e-06, + "loss": 0.5803, + "step": 3248 + }, + { + "epoch": 0.29660398028117585, + "grad_norm": 0.45666393637657166, + "learning_rate": 4.971646475505295e-06, + "loss": 0.6249, + "step": 3249 + }, + { + "epoch": 0.2966952711338324, + "grad_norm": 0.5101509094238281, + "learning_rate": 4.971628498835199e-06, + "loss": 0.581, + "step": 3250 + }, + { + "epoch": 0.29678656198648895, + "grad_norm": 0.4823093116283417, + "learning_rate": 4.971610516500657e-06, + "loss": 0.5607, + "step": 3251 + }, + { + "epoch": 0.29687785283914553, + "grad_norm": 0.47726964950561523, + "learning_rate": 4.971592528501708e-06, + "loss": 0.593, + "step": 3252 + }, + { + "epoch": 0.2969691436918021, + "grad_norm": 0.4676932096481323, + "learning_rate": 4.971574534838393e-06, + "loss": 0.6027, + "step": 3253 + }, + { + "epoch": 0.29706043454445863, + "grad_norm": 0.48752543330192566, + "learning_rate": 4.971556535510756e-06, + "loss": 0.5826, + "step": 3254 + }, + { + "epoch": 0.2971517253971152, + "grad_norm": 0.4449816048145294, + "learning_rate": 4.971538530518836e-06, + "loss": 0.6089, + "step": 3255 + }, + { + "epoch": 0.2972430162497718, + "grad_norm": 0.4988909065723419, + "learning_rate": 4.9715205198626744e-06, + "loss": 0.5857, + "step": 3256 + }, + { + "epoch": 0.29733430710242836, + "grad_norm": 0.4503328204154968, + "learning_rate": 4.971502503542314e-06, + "loss": 0.5889, + "step": 3257 + }, + { + "epoch": 0.2974255979550849, + "grad_norm": 0.48743706941604614, + "learning_rate": 4.971484481557795e-06, + "loss": 0.5796, + "step": 3258 + }, + { + "epoch": 0.29751688880774146, + "grad_norm": 0.4550052285194397, + "learning_rate": 4.971466453909158e-06, + "loss": 0.6252, + "step": 3259 + }, + { + "epoch": 0.29760817966039804, + "grad_norm": 0.4789486229419708, + "learning_rate": 4.971448420596445e-06, + "loss": 0.6061, + "step": 3260 + }, + { + "epoch": 0.29769947051305456, + "grad_norm": 0.48940128087997437, + "learning_rate": 4.971430381619697e-06, + "loss": 0.5993, + "step": 3261 + }, + { + "epoch": 0.29779076136571114, + "grad_norm": 0.4444815218448639, + "learning_rate": 4.971412336978957e-06, + "loss": 0.6453, + "step": 3262 + }, + { + "epoch": 0.2978820522183677, + "grad_norm": 0.4542180299758911, + "learning_rate": 4.971394286674265e-06, + "loss": 0.5768, + "step": 3263 + }, + { + "epoch": 0.2979733430710243, + "grad_norm": 0.5177382826805115, + "learning_rate": 4.971376230705661e-06, + "loss": 0.5569, + "step": 3264 + }, + { + "epoch": 0.2980646339236808, + "grad_norm": 0.49313604831695557, + "learning_rate": 4.971358169073189e-06, + "loss": 0.6135, + "step": 3265 + }, + { + "epoch": 0.2981559247763374, + "grad_norm": 0.4692111313343048, + "learning_rate": 4.971340101776889e-06, + "loss": 0.6116, + "step": 3266 + }, + { + "epoch": 0.298247215628994, + "grad_norm": 0.4908788800239563, + "learning_rate": 4.971322028816803e-06, + "loss": 0.5879, + "step": 3267 + }, + { + "epoch": 0.29833850648165056, + "grad_norm": 0.47938811779022217, + "learning_rate": 4.971303950192971e-06, + "loss": 0.5527, + "step": 3268 + }, + { + "epoch": 0.2984297973343071, + "grad_norm": 0.5109127759933472, + "learning_rate": 4.971285865905437e-06, + "loss": 0.5668, + "step": 3269 + }, + { + "epoch": 0.29852108818696366, + "grad_norm": 0.48117583990097046, + "learning_rate": 4.971267775954239e-06, + "loss": 0.5879, + "step": 3270 + }, + { + "epoch": 0.29861237903962023, + "grad_norm": 0.4831419289112091, + "learning_rate": 4.971249680339422e-06, + "loss": 0.6128, + "step": 3271 + }, + { + "epoch": 0.2987036698922768, + "grad_norm": 0.4683879315853119, + "learning_rate": 4.971231579061025e-06, + "loss": 0.5912, + "step": 3272 + }, + { + "epoch": 0.29879496074493334, + "grad_norm": 0.4311085045337677, + "learning_rate": 4.971213472119091e-06, + "loss": 0.6005, + "step": 3273 + }, + { + "epoch": 0.2988862515975899, + "grad_norm": 0.4675687551498413, + "learning_rate": 4.97119535951366e-06, + "loss": 0.6256, + "step": 3274 + }, + { + "epoch": 0.2989775424502465, + "grad_norm": 0.445275217294693, + "learning_rate": 4.971177241244775e-06, + "loss": 0.5896, + "step": 3275 + }, + { + "epoch": 0.29906883330290307, + "grad_norm": 0.4911069869995117, + "learning_rate": 4.971159117312476e-06, + "loss": 0.5803, + "step": 3276 + }, + { + "epoch": 0.2991601241555596, + "grad_norm": 0.45587822794914246, + "learning_rate": 4.971140987716806e-06, + "loss": 0.5909, + "step": 3277 + }, + { + "epoch": 0.29925141500821617, + "grad_norm": 0.4461219310760498, + "learning_rate": 4.971122852457806e-06, + "loss": 0.6334, + "step": 3278 + }, + { + "epoch": 0.29934270586087275, + "grad_norm": 0.47020068764686584, + "learning_rate": 4.9711047115355175e-06, + "loss": 0.6245, + "step": 3279 + }, + { + "epoch": 0.2994339967135293, + "grad_norm": 0.5293331146240234, + "learning_rate": 4.971086564949982e-06, + "loss": 0.5649, + "step": 3280 + }, + { + "epoch": 0.29952528756618585, + "grad_norm": 0.51985764503479, + "learning_rate": 4.971068412701241e-06, + "loss": 0.598, + "step": 3281 + }, + { + "epoch": 0.2996165784188424, + "grad_norm": 0.47336623072624207, + "learning_rate": 4.971050254789337e-06, + "loss": 0.6283, + "step": 3282 + }, + { + "epoch": 0.299707869271499, + "grad_norm": 0.45629364252090454, + "learning_rate": 4.97103209121431e-06, + "loss": 0.6338, + "step": 3283 + }, + { + "epoch": 0.2997991601241556, + "grad_norm": 0.45783865451812744, + "learning_rate": 4.971013921976203e-06, + "loss": 0.5857, + "step": 3284 + }, + { + "epoch": 0.2998904509768121, + "grad_norm": 0.4670256972312927, + "learning_rate": 4.970995747075057e-06, + "loss": 0.5537, + "step": 3285 + }, + { + "epoch": 0.2999817418294687, + "grad_norm": 0.46918895840644836, + "learning_rate": 4.970977566510914e-06, + "loss": 0.5691, + "step": 3286 + }, + { + "epoch": 0.30007303268212526, + "grad_norm": 0.4892011880874634, + "learning_rate": 4.970959380283815e-06, + "loss": 0.5545, + "step": 3287 + }, + { + "epoch": 0.30016432353478184, + "grad_norm": 0.4617304801940918, + "learning_rate": 4.9709411883938034e-06, + "loss": 0.594, + "step": 3288 + }, + { + "epoch": 0.30025561438743836, + "grad_norm": 0.4336515963077545, + "learning_rate": 4.970922990840918e-06, + "loss": 0.6228, + "step": 3289 + }, + { + "epoch": 0.30034690524009494, + "grad_norm": 0.4924694001674652, + "learning_rate": 4.970904787625203e-06, + "loss": 0.5816, + "step": 3290 + }, + { + "epoch": 0.3004381960927515, + "grad_norm": 0.503383219242096, + "learning_rate": 4.970886578746699e-06, + "loss": 0.6307, + "step": 3291 + }, + { + "epoch": 0.3005294869454081, + "grad_norm": 0.46784669160842896, + "learning_rate": 4.970868364205448e-06, + "loss": 0.6159, + "step": 3292 + }, + { + "epoch": 0.3006207777980646, + "grad_norm": 0.4470028281211853, + "learning_rate": 4.970850144001492e-06, + "loss": 0.6094, + "step": 3293 + }, + { + "epoch": 0.3007120686507212, + "grad_norm": 0.47108209133148193, + "learning_rate": 4.970831918134873e-06, + "loss": 0.6062, + "step": 3294 + }, + { + "epoch": 0.3008033595033778, + "grad_norm": 0.5364285707473755, + "learning_rate": 4.970813686605631e-06, + "loss": 0.5525, + "step": 3295 + }, + { + "epoch": 0.3008946503560343, + "grad_norm": 0.4710347652435303, + "learning_rate": 4.97079544941381e-06, + "loss": 0.5899, + "step": 3296 + }, + { + "epoch": 0.3009859412086909, + "grad_norm": 0.4910123944282532, + "learning_rate": 4.9707772065594505e-06, + "loss": 0.6114, + "step": 3297 + }, + { + "epoch": 0.30107723206134746, + "grad_norm": 0.4461762011051178, + "learning_rate": 4.970758958042595e-06, + "loss": 0.617, + "step": 3298 + }, + { + "epoch": 0.30116852291400403, + "grad_norm": 0.5053549408912659, + "learning_rate": 4.970740703863285e-06, + "loss": 0.5919, + "step": 3299 + }, + { + "epoch": 0.30125981376666056, + "grad_norm": 0.46121641993522644, + "learning_rate": 4.9707224440215614e-06, + "loss": 0.5808, + "step": 3300 + }, + { + "epoch": 0.30135110461931713, + "grad_norm": 0.42845940589904785, + "learning_rate": 4.970704178517468e-06, + "loss": 0.6318, + "step": 3301 + }, + { + "epoch": 0.3014423954719737, + "grad_norm": 0.4579923152923584, + "learning_rate": 4.970685907351045e-06, + "loss": 0.6302, + "step": 3302 + }, + { + "epoch": 0.3015336863246303, + "grad_norm": 0.44190266728401184, + "learning_rate": 4.970667630522336e-06, + "loss": 0.617, + "step": 3303 + }, + { + "epoch": 0.3016249771772868, + "grad_norm": 0.5163379311561584, + "learning_rate": 4.97064934803138e-06, + "loss": 0.6031, + "step": 3304 + }, + { + "epoch": 0.3017162680299434, + "grad_norm": 0.4541466534137726, + "learning_rate": 4.970631059878221e-06, + "loss": 0.6216, + "step": 3305 + }, + { + "epoch": 0.30180755888259997, + "grad_norm": 0.4987511932849884, + "learning_rate": 4.970612766062903e-06, + "loss": 0.5875, + "step": 3306 + }, + { + "epoch": 0.30189884973525655, + "grad_norm": 0.4743630588054657, + "learning_rate": 4.970594466585462e-06, + "loss": 0.5817, + "step": 3307 + }, + { + "epoch": 0.30199014058791307, + "grad_norm": 0.4605453610420227, + "learning_rate": 4.970576161445946e-06, + "loss": 0.5938, + "step": 3308 + }, + { + "epoch": 0.30208143144056965, + "grad_norm": 0.47996944189071655, + "learning_rate": 4.970557850644393e-06, + "loss": 0.5873, + "step": 3309 + }, + { + "epoch": 0.3021727222932262, + "grad_norm": 0.5121245384216309, + "learning_rate": 4.970539534180847e-06, + "loss": 0.5869, + "step": 3310 + }, + { + "epoch": 0.3022640131458828, + "grad_norm": 0.4901210367679596, + "learning_rate": 4.970521212055349e-06, + "loss": 0.5801, + "step": 3311 + }, + { + "epoch": 0.3023553039985393, + "grad_norm": 0.5088865756988525, + "learning_rate": 4.970502884267942e-06, + "loss": 0.5694, + "step": 3312 + }, + { + "epoch": 0.3024465948511959, + "grad_norm": 0.4739653766155243, + "learning_rate": 4.970484550818667e-06, + "loss": 0.5826, + "step": 3313 + }, + { + "epoch": 0.3025378857038525, + "grad_norm": 0.4792991578578949, + "learning_rate": 4.970466211707566e-06, + "loss": 0.5563, + "step": 3314 + }, + { + "epoch": 0.30262917655650906, + "grad_norm": 0.5036203861236572, + "learning_rate": 4.970447866934682e-06, + "loss": 0.5986, + "step": 3315 + }, + { + "epoch": 0.3027204674091656, + "grad_norm": 0.5388427376747131, + "learning_rate": 4.970429516500057e-06, + "loss": 0.5616, + "step": 3316 + }, + { + "epoch": 0.30281175826182216, + "grad_norm": 0.46912702918052673, + "learning_rate": 4.970411160403731e-06, + "loss": 0.5975, + "step": 3317 + }, + { + "epoch": 0.30290304911447874, + "grad_norm": 0.46996667981147766, + "learning_rate": 4.9703927986457485e-06, + "loss": 0.6069, + "step": 3318 + }, + { + "epoch": 0.3029943399671353, + "grad_norm": 0.4730166792869568, + "learning_rate": 4.97037443122615e-06, + "loss": 0.5816, + "step": 3319 + }, + { + "epoch": 0.30308563081979184, + "grad_norm": 0.5085155963897705, + "learning_rate": 4.9703560581449785e-06, + "loss": 0.6044, + "step": 3320 + }, + { + "epoch": 0.3031769216724484, + "grad_norm": 0.4802258014678955, + "learning_rate": 4.970337679402276e-06, + "loss": 0.6094, + "step": 3321 + }, + { + "epoch": 0.303268212525105, + "grad_norm": 0.42561793327331543, + "learning_rate": 4.970319294998085e-06, + "loss": 0.6253, + "step": 3322 + }, + { + "epoch": 0.3033595033777616, + "grad_norm": 0.4682954251766205, + "learning_rate": 4.970300904932446e-06, + "loss": 0.5916, + "step": 3323 + }, + { + "epoch": 0.3034507942304181, + "grad_norm": 0.4661589562892914, + "learning_rate": 4.9702825092054034e-06, + "loss": 0.605, + "step": 3324 + }, + { + "epoch": 0.3035420850830747, + "grad_norm": 0.48157912492752075, + "learning_rate": 4.970264107816996e-06, + "loss": 0.5779, + "step": 3325 + }, + { + "epoch": 0.30363337593573125, + "grad_norm": 0.492602676153183, + "learning_rate": 4.970245700767271e-06, + "loss": 0.6039, + "step": 3326 + }, + { + "epoch": 0.3037246667883878, + "grad_norm": 0.5149199366569519, + "learning_rate": 4.970227288056266e-06, + "loss": 0.5641, + "step": 3327 + }, + { + "epoch": 0.30381595764104435, + "grad_norm": 0.4996856153011322, + "learning_rate": 4.970208869684026e-06, + "loss": 0.5541, + "step": 3328 + }, + { + "epoch": 0.30390724849370093, + "grad_norm": 0.5305718779563904, + "learning_rate": 4.970190445650591e-06, + "loss": 0.5608, + "step": 3329 + }, + { + "epoch": 0.3039985393463575, + "grad_norm": 0.4727995693683624, + "learning_rate": 4.9701720159560055e-06, + "loss": 0.5809, + "step": 3330 + }, + { + "epoch": 0.30408983019901403, + "grad_norm": 0.4364868104457855, + "learning_rate": 4.97015358060031e-06, + "loss": 0.6041, + "step": 3331 + }, + { + "epoch": 0.3041811210516706, + "grad_norm": 0.46077895164489746, + "learning_rate": 4.970135139583547e-06, + "loss": 0.5856, + "step": 3332 + }, + { + "epoch": 0.3042724119043272, + "grad_norm": 0.4938555061817169, + "learning_rate": 4.9701166929057595e-06, + "loss": 0.5925, + "step": 3333 + }, + { + "epoch": 0.30436370275698377, + "grad_norm": 0.46705010533332825, + "learning_rate": 4.97009824056699e-06, + "loss": 0.6165, + "step": 3334 + }, + { + "epoch": 0.3044549936096403, + "grad_norm": 0.48339077830314636, + "learning_rate": 4.97007978256728e-06, + "loss": 0.5978, + "step": 3335 + }, + { + "epoch": 0.30454628446229687, + "grad_norm": 0.4482889771461487, + "learning_rate": 4.970061318906671e-06, + "loss": 0.6018, + "step": 3336 + }, + { + "epoch": 0.30463757531495345, + "grad_norm": 0.471386194229126, + "learning_rate": 4.970042849585207e-06, + "loss": 0.6055, + "step": 3337 + }, + { + "epoch": 0.30472886616761, + "grad_norm": 0.5011101961135864, + "learning_rate": 4.9700243746029294e-06, + "loss": 0.5733, + "step": 3338 + }, + { + "epoch": 0.30482015702026655, + "grad_norm": 0.4588897228240967, + "learning_rate": 4.970005893959881e-06, + "loss": 0.5876, + "step": 3339 + }, + { + "epoch": 0.3049114478729231, + "grad_norm": 0.4559103548526764, + "learning_rate": 4.969987407656105e-06, + "loss": 0.6192, + "step": 3340 + }, + { + "epoch": 0.3050027387255797, + "grad_norm": 0.46736130118370056, + "learning_rate": 4.969968915691641e-06, + "loss": 0.6537, + "step": 3341 + }, + { + "epoch": 0.3050940295782363, + "grad_norm": 0.5090686678886414, + "learning_rate": 4.969950418066535e-06, + "loss": 0.604, + "step": 3342 + }, + { + "epoch": 0.3051853204308928, + "grad_norm": 0.46869805455207825, + "learning_rate": 4.9699319147808255e-06, + "loss": 0.624, + "step": 3343 + }, + { + "epoch": 0.3052766112835494, + "grad_norm": 0.4912732243537903, + "learning_rate": 4.969913405834558e-06, + "loss": 0.5987, + "step": 3344 + }, + { + "epoch": 0.30536790213620596, + "grad_norm": 0.48032304644584656, + "learning_rate": 4.969894891227774e-06, + "loss": 0.5745, + "step": 3345 + }, + { + "epoch": 0.30545919298886254, + "grad_norm": 0.4871198832988739, + "learning_rate": 4.969876370960514e-06, + "loss": 0.5784, + "step": 3346 + }, + { + "epoch": 0.30555048384151906, + "grad_norm": 0.4752461314201355, + "learning_rate": 4.969857845032824e-06, + "loss": 0.6198, + "step": 3347 + }, + { + "epoch": 0.30564177469417564, + "grad_norm": 0.4958326816558838, + "learning_rate": 4.969839313444744e-06, + "loss": 0.5818, + "step": 3348 + }, + { + "epoch": 0.3057330655468322, + "grad_norm": 0.48450592160224915, + "learning_rate": 4.969820776196317e-06, + "loss": 0.5735, + "step": 3349 + }, + { + "epoch": 0.3058243563994888, + "grad_norm": 0.45257440209388733, + "learning_rate": 4.9698022332875865e-06, + "loss": 0.6084, + "step": 3350 + }, + { + "epoch": 0.3059156472521453, + "grad_norm": 0.45686548948287964, + "learning_rate": 4.969783684718593e-06, + "loss": 0.568, + "step": 3351 + }, + { + "epoch": 0.3060069381048019, + "grad_norm": 0.4632519483566284, + "learning_rate": 4.96976513048938e-06, + "loss": 0.5927, + "step": 3352 + }, + { + "epoch": 0.3060982289574585, + "grad_norm": 0.44878143072128296, + "learning_rate": 4.969746570599992e-06, + "loss": 0.5683, + "step": 3353 + }, + { + "epoch": 0.30618951981011505, + "grad_norm": 0.4526680111885071, + "learning_rate": 4.969728005050467e-06, + "loss": 0.6031, + "step": 3354 + }, + { + "epoch": 0.3062808106627716, + "grad_norm": 0.4937720000743866, + "learning_rate": 4.969709433840851e-06, + "loss": 0.5822, + "step": 3355 + }, + { + "epoch": 0.30637210151542815, + "grad_norm": 0.46844372153282166, + "learning_rate": 4.969690856971187e-06, + "loss": 0.5519, + "step": 3356 + }, + { + "epoch": 0.30646339236808473, + "grad_norm": 0.4586551785469055, + "learning_rate": 4.969672274441515e-06, + "loss": 0.5808, + "step": 3357 + }, + { + "epoch": 0.3065546832207413, + "grad_norm": 0.47900262475013733, + "learning_rate": 4.96965368625188e-06, + "loss": 0.586, + "step": 3358 + }, + { + "epoch": 0.30664597407339783, + "grad_norm": 0.47694817185401917, + "learning_rate": 4.9696350924023225e-06, + "loss": 0.6103, + "step": 3359 + }, + { + "epoch": 0.3067372649260544, + "grad_norm": 0.48584339022636414, + "learning_rate": 4.969616492892887e-06, + "loss": 0.6183, + "step": 3360 + }, + { + "epoch": 0.306828555778711, + "grad_norm": 0.45314157009124756, + "learning_rate": 4.9695978877236155e-06, + "loss": 0.6106, + "step": 3361 + }, + { + "epoch": 0.3069198466313675, + "grad_norm": 0.49464747309684753, + "learning_rate": 4.969579276894549e-06, + "loss": 0.5918, + "step": 3362 + }, + { + "epoch": 0.3070111374840241, + "grad_norm": 0.4679890275001526, + "learning_rate": 4.969560660405733e-06, + "loss": 0.6282, + "step": 3363 + }, + { + "epoch": 0.30710242833668067, + "grad_norm": 0.4735637307167053, + "learning_rate": 4.969542038257208e-06, + "loss": 0.617, + "step": 3364 + }, + { + "epoch": 0.30719371918933724, + "grad_norm": 0.4668291211128235, + "learning_rate": 4.969523410449018e-06, + "loss": 0.5808, + "step": 3365 + }, + { + "epoch": 0.30728501004199377, + "grad_norm": 0.49003535509109497, + "learning_rate": 4.969504776981205e-06, + "loss": 0.5897, + "step": 3366 + }, + { + "epoch": 0.30737630089465034, + "grad_norm": 0.46826791763305664, + "learning_rate": 4.9694861378538105e-06, + "loss": 0.5892, + "step": 3367 + }, + { + "epoch": 0.3074675917473069, + "grad_norm": 0.43948814272880554, + "learning_rate": 4.969467493066879e-06, + "loss": 0.6038, + "step": 3368 + }, + { + "epoch": 0.3075588825999635, + "grad_norm": 0.47752171754837036, + "learning_rate": 4.9694488426204535e-06, + "loss": 0.6293, + "step": 3369 + }, + { + "epoch": 0.30765017345262, + "grad_norm": 0.4893947243690491, + "learning_rate": 4.969430186514575e-06, + "loss": 0.5728, + "step": 3370 + }, + { + "epoch": 0.3077414643052766, + "grad_norm": 0.4776171147823334, + "learning_rate": 4.969411524749289e-06, + "loss": 0.5992, + "step": 3371 + }, + { + "epoch": 0.3078327551579332, + "grad_norm": 0.4808536767959595, + "learning_rate": 4.969392857324636e-06, + "loss": 0.5829, + "step": 3372 + }, + { + "epoch": 0.30792404601058976, + "grad_norm": 0.4636821746826172, + "learning_rate": 4.969374184240657e-06, + "loss": 0.5964, + "step": 3373 + }, + { + "epoch": 0.3080153368632463, + "grad_norm": 0.4769359230995178, + "learning_rate": 4.9693555054974e-06, + "loss": 0.5763, + "step": 3374 + }, + { + "epoch": 0.30810662771590286, + "grad_norm": 0.45749229192733765, + "learning_rate": 4.969336821094904e-06, + "loss": 0.6024, + "step": 3375 + }, + { + "epoch": 0.30819791856855944, + "grad_norm": 0.4451361298561096, + "learning_rate": 4.969318131033212e-06, + "loss": 0.6129, + "step": 3376 + }, + { + "epoch": 0.308289209421216, + "grad_norm": 0.4974173307418823, + "learning_rate": 4.969299435312368e-06, + "loss": 0.5816, + "step": 3377 + }, + { + "epoch": 0.30838050027387254, + "grad_norm": 0.470228910446167, + "learning_rate": 4.969280733932413e-06, + "loss": 0.5965, + "step": 3378 + }, + { + "epoch": 0.3084717911265291, + "grad_norm": 0.46101316809654236, + "learning_rate": 4.969262026893394e-06, + "loss": 0.6253, + "step": 3379 + }, + { + "epoch": 0.3085630819791857, + "grad_norm": 0.49752163887023926, + "learning_rate": 4.969243314195349e-06, + "loss": 0.5954, + "step": 3380 + }, + { + "epoch": 0.30865437283184227, + "grad_norm": 0.4439908266067505, + "learning_rate": 4.969224595838323e-06, + "loss": 0.6092, + "step": 3381 + }, + { + "epoch": 0.3087456636844988, + "grad_norm": 0.4857617914676666, + "learning_rate": 4.96920587182236e-06, + "loss": 0.6081, + "step": 3382 + }, + { + "epoch": 0.3088369545371554, + "grad_norm": 0.4804227948188782, + "learning_rate": 4.969187142147502e-06, + "loss": 0.5811, + "step": 3383 + }, + { + "epoch": 0.30892824538981195, + "grad_norm": 0.4940849244594574, + "learning_rate": 4.969168406813792e-06, + "loss": 0.6111, + "step": 3384 + }, + { + "epoch": 0.30901953624246853, + "grad_norm": 0.49220868945121765, + "learning_rate": 4.969149665821271e-06, + "loss": 0.577, + "step": 3385 + }, + { + "epoch": 0.30911082709512505, + "grad_norm": 0.4732151925563812, + "learning_rate": 4.969130919169985e-06, + "loss": 0.5759, + "step": 3386 + }, + { + "epoch": 0.30920211794778163, + "grad_norm": 0.49255019426345825, + "learning_rate": 4.969112166859975e-06, + "loss": 0.5876, + "step": 3387 + }, + { + "epoch": 0.3092934088004382, + "grad_norm": 0.4341646730899811, + "learning_rate": 4.969093408891285e-06, + "loss": 0.6204, + "step": 3388 + }, + { + "epoch": 0.3093846996530948, + "grad_norm": 0.468036413192749, + "learning_rate": 4.9690746452639565e-06, + "loss": 0.5851, + "step": 3389 + }, + { + "epoch": 0.3094759905057513, + "grad_norm": 0.4530990421772003, + "learning_rate": 4.969055875978035e-06, + "loss": 0.6053, + "step": 3390 + }, + { + "epoch": 0.3095672813584079, + "grad_norm": 0.4599977731704712, + "learning_rate": 4.969037101033561e-06, + "loss": 0.6006, + "step": 3391 + }, + { + "epoch": 0.30965857221106446, + "grad_norm": 0.5197080969810486, + "learning_rate": 4.969018320430579e-06, + "loss": 0.6209, + "step": 3392 + }, + { + "epoch": 0.30974986306372104, + "grad_norm": 0.5225505828857422, + "learning_rate": 4.968999534169132e-06, + "loss": 0.5804, + "step": 3393 + }, + { + "epoch": 0.30984115391637757, + "grad_norm": 0.4519106149673462, + "learning_rate": 4.968980742249262e-06, + "loss": 0.6127, + "step": 3394 + }, + { + "epoch": 0.30993244476903414, + "grad_norm": 0.45485296845436096, + "learning_rate": 4.968961944671013e-06, + "loss": 0.6281, + "step": 3395 + }, + { + "epoch": 0.3100237356216907, + "grad_norm": 0.45902520418167114, + "learning_rate": 4.968943141434428e-06, + "loss": 0.6598, + "step": 3396 + }, + { + "epoch": 0.31011502647434724, + "grad_norm": 0.46591317653656006, + "learning_rate": 4.9689243325395496e-06, + "loss": 0.6407, + "step": 3397 + }, + { + "epoch": 0.3102063173270038, + "grad_norm": 0.46930408477783203, + "learning_rate": 4.968905517986422e-06, + "loss": 0.6264, + "step": 3398 + }, + { + "epoch": 0.3102976081796604, + "grad_norm": 0.48040148615837097, + "learning_rate": 4.968886697775087e-06, + "loss": 0.6001, + "step": 3399 + }, + { + "epoch": 0.310388899032317, + "grad_norm": 0.4485310912132263, + "learning_rate": 4.968867871905589e-06, + "loss": 0.6097, + "step": 3400 + }, + { + "epoch": 0.3104801898849735, + "grad_norm": 0.46131640672683716, + "learning_rate": 4.96884904037797e-06, + "loss": 0.5866, + "step": 3401 + }, + { + "epoch": 0.3105714807376301, + "grad_norm": 0.45879074931144714, + "learning_rate": 4.968830203192273e-06, + "loss": 0.6013, + "step": 3402 + }, + { + "epoch": 0.31066277159028666, + "grad_norm": 0.4742812216281891, + "learning_rate": 4.9688113603485425e-06, + "loss": 0.6009, + "step": 3403 + }, + { + "epoch": 0.31075406244294324, + "grad_norm": 0.4934229850769043, + "learning_rate": 4.9687925118468206e-06, + "loss": 0.6055, + "step": 3404 + }, + { + "epoch": 0.31084535329559976, + "grad_norm": 0.4618043005466461, + "learning_rate": 4.9687736576871515e-06, + "loss": 0.6154, + "step": 3405 + }, + { + "epoch": 0.31093664414825634, + "grad_norm": 0.49154818058013916, + "learning_rate": 4.968754797869577e-06, + "loss": 0.5832, + "step": 3406 + }, + { + "epoch": 0.3110279350009129, + "grad_norm": 0.4827009439468384, + "learning_rate": 4.968735932394141e-06, + "loss": 0.5949, + "step": 3407 + }, + { + "epoch": 0.3111192258535695, + "grad_norm": 0.500852644443512, + "learning_rate": 4.968717061260887e-06, + "loss": 0.5935, + "step": 3408 + }, + { + "epoch": 0.311210516706226, + "grad_norm": 0.46245381236076355, + "learning_rate": 4.968698184469859e-06, + "loss": 0.6069, + "step": 3409 + }, + { + "epoch": 0.3113018075588826, + "grad_norm": 0.4941573143005371, + "learning_rate": 4.968679302021099e-06, + "loss": 0.5917, + "step": 3410 + }, + { + "epoch": 0.31139309841153917, + "grad_norm": 0.48710164427757263, + "learning_rate": 4.96866041391465e-06, + "loss": 0.5983, + "step": 3411 + }, + { + "epoch": 0.31148438926419575, + "grad_norm": 0.44113481044769287, + "learning_rate": 4.968641520150556e-06, + "loss": 0.597, + "step": 3412 + }, + { + "epoch": 0.31157568011685227, + "grad_norm": 0.5062381029129028, + "learning_rate": 4.9686226207288605e-06, + "loss": 0.6057, + "step": 3413 + }, + { + "epoch": 0.31166697096950885, + "grad_norm": 0.4753515422344208, + "learning_rate": 4.968603715649606e-06, + "loss": 0.5733, + "step": 3414 + }, + { + "epoch": 0.31175826182216543, + "grad_norm": 0.4754325747489929, + "learning_rate": 4.968584804912837e-06, + "loss": 0.6113, + "step": 3415 + }, + { + "epoch": 0.311849552674822, + "grad_norm": 0.4800247848033905, + "learning_rate": 4.968565888518595e-06, + "loss": 0.5837, + "step": 3416 + }, + { + "epoch": 0.31194084352747853, + "grad_norm": 0.48575371503829956, + "learning_rate": 4.968546966466926e-06, + "loss": 0.5768, + "step": 3417 + }, + { + "epoch": 0.3120321343801351, + "grad_norm": 0.46320387721061707, + "learning_rate": 4.968528038757872e-06, + "loss": 0.5773, + "step": 3418 + }, + { + "epoch": 0.3121234252327917, + "grad_norm": 0.4786945879459381, + "learning_rate": 4.968509105391474e-06, + "loss": 0.5841, + "step": 3419 + }, + { + "epoch": 0.31221471608544826, + "grad_norm": 0.43330198526382446, + "learning_rate": 4.96849016636778e-06, + "loss": 0.637, + "step": 3420 + }, + { + "epoch": 0.3123060069381048, + "grad_norm": 0.45008915662765503, + "learning_rate": 4.96847122168683e-06, + "loss": 0.6119, + "step": 3421 + }, + { + "epoch": 0.31239729779076136, + "grad_norm": 0.47099950909614563, + "learning_rate": 4.968452271348669e-06, + "loss": 0.5888, + "step": 3422 + }, + { + "epoch": 0.31248858864341794, + "grad_norm": 0.4980766177177429, + "learning_rate": 4.96843331535334e-06, + "loss": 0.5712, + "step": 3423 + }, + { + "epoch": 0.3125798794960745, + "grad_norm": 0.4893605411052704, + "learning_rate": 4.9684143537008855e-06, + "loss": 0.5979, + "step": 3424 + }, + { + "epoch": 0.31267117034873104, + "grad_norm": 0.47787612676620483, + "learning_rate": 4.96839538639135e-06, + "loss": 0.6026, + "step": 3425 + }, + { + "epoch": 0.3127624612013876, + "grad_norm": 0.4744979739189148, + "learning_rate": 4.968376413424778e-06, + "loss": 0.5655, + "step": 3426 + }, + { + "epoch": 0.3128537520540442, + "grad_norm": 0.46195322275161743, + "learning_rate": 4.968357434801211e-06, + "loss": 0.6003, + "step": 3427 + }, + { + "epoch": 0.3129450429067007, + "grad_norm": 0.4922167658805847, + "learning_rate": 4.968338450520693e-06, + "loss": 0.5396, + "step": 3428 + }, + { + "epoch": 0.3130363337593573, + "grad_norm": 0.4981575012207031, + "learning_rate": 4.968319460583267e-06, + "loss": 0.5955, + "step": 3429 + }, + { + "epoch": 0.3131276246120139, + "grad_norm": 0.44148316979408264, + "learning_rate": 4.968300464988979e-06, + "loss": 0.6326, + "step": 3430 + }, + { + "epoch": 0.31321891546467046, + "grad_norm": 0.46032217144966125, + "learning_rate": 4.9682814637378704e-06, + "loss": 0.6042, + "step": 3431 + }, + { + "epoch": 0.313310206317327, + "grad_norm": 0.4719243049621582, + "learning_rate": 4.968262456829984e-06, + "loss": 0.5992, + "step": 3432 + }, + { + "epoch": 0.31340149716998356, + "grad_norm": 0.44643765687942505, + "learning_rate": 4.968243444265366e-06, + "loss": 0.5643, + "step": 3433 + }, + { + "epoch": 0.31349278802264013, + "grad_norm": 0.5056849718093872, + "learning_rate": 4.968224426044058e-06, + "loss": 0.5639, + "step": 3434 + }, + { + "epoch": 0.3135840788752967, + "grad_norm": 0.47270819544792175, + "learning_rate": 4.968205402166103e-06, + "loss": 0.5853, + "step": 3435 + }, + { + "epoch": 0.31367536972795323, + "grad_norm": 0.5035602450370789, + "learning_rate": 4.968186372631547e-06, + "loss": 0.5713, + "step": 3436 + }, + { + "epoch": 0.3137666605806098, + "grad_norm": 0.484068363904953, + "learning_rate": 4.968167337440432e-06, + "loss": 0.6236, + "step": 3437 + }, + { + "epoch": 0.3138579514332664, + "grad_norm": 0.450046569108963, + "learning_rate": 4.968148296592802e-06, + "loss": 0.6237, + "step": 3438 + }, + { + "epoch": 0.31394924228592297, + "grad_norm": 0.45648932456970215, + "learning_rate": 4.9681292500887e-06, + "loss": 0.6187, + "step": 3439 + }, + { + "epoch": 0.3140405331385795, + "grad_norm": 0.48997029662132263, + "learning_rate": 4.968110197928171e-06, + "loss": 0.5787, + "step": 3440 + }, + { + "epoch": 0.31413182399123607, + "grad_norm": 0.4710370898246765, + "learning_rate": 4.968091140111257e-06, + "loss": 0.5755, + "step": 3441 + }, + { + "epoch": 0.31422311484389265, + "grad_norm": 0.5106715559959412, + "learning_rate": 4.968072076638004e-06, + "loss": 0.5704, + "step": 3442 + }, + { + "epoch": 0.3143144056965492, + "grad_norm": 0.45078903436660767, + "learning_rate": 4.968053007508453e-06, + "loss": 0.6344, + "step": 3443 + }, + { + "epoch": 0.31440569654920575, + "grad_norm": 0.47060149908065796, + "learning_rate": 4.968033932722649e-06, + "loss": 0.6372, + "step": 3444 + }, + { + "epoch": 0.3144969874018623, + "grad_norm": 0.49940961599349976, + "learning_rate": 4.968014852280636e-06, + "loss": 0.5797, + "step": 3445 + }, + { + "epoch": 0.3145882782545189, + "grad_norm": 0.4722173810005188, + "learning_rate": 4.9679957661824575e-06, + "loss": 0.6104, + "step": 3446 + }, + { + "epoch": 0.3146795691071755, + "grad_norm": 0.46020838618278503, + "learning_rate": 4.967976674428157e-06, + "loss": 0.5863, + "step": 3447 + }, + { + "epoch": 0.314770859959832, + "grad_norm": 0.46359512209892273, + "learning_rate": 4.967957577017778e-06, + "loss": 0.5871, + "step": 3448 + }, + { + "epoch": 0.3148621508124886, + "grad_norm": 0.47710418701171875, + "learning_rate": 4.967938473951365e-06, + "loss": 0.6228, + "step": 3449 + }, + { + "epoch": 0.31495344166514516, + "grad_norm": 0.48862549662590027, + "learning_rate": 4.967919365228962e-06, + "loss": 0.5665, + "step": 3450 + }, + { + "epoch": 0.31504473251780174, + "grad_norm": 0.4841868281364441, + "learning_rate": 4.967900250850611e-06, + "loss": 0.5855, + "step": 3451 + }, + { + "epoch": 0.31513602337045826, + "grad_norm": 0.45141375064849854, + "learning_rate": 4.967881130816358e-06, + "loss": 0.6026, + "step": 3452 + }, + { + "epoch": 0.31522731422311484, + "grad_norm": 0.4716503918170929, + "learning_rate": 4.967862005126245e-06, + "loss": 0.6, + "step": 3453 + }, + { + "epoch": 0.3153186050757714, + "grad_norm": 0.4684523344039917, + "learning_rate": 4.967842873780317e-06, + "loss": 0.6105, + "step": 3454 + }, + { + "epoch": 0.315409895928428, + "grad_norm": 0.4848313629627228, + "learning_rate": 4.967823736778618e-06, + "loss": 0.5854, + "step": 3455 + }, + { + "epoch": 0.3155011867810845, + "grad_norm": 0.4960467219352722, + "learning_rate": 4.9678045941211906e-06, + "loss": 0.5724, + "step": 3456 + }, + { + "epoch": 0.3155924776337411, + "grad_norm": 0.4713859558105469, + "learning_rate": 4.96778544580808e-06, + "loss": 0.5817, + "step": 3457 + }, + { + "epoch": 0.3156837684863977, + "grad_norm": 0.5084191560745239, + "learning_rate": 4.96776629183933e-06, + "loss": 0.5537, + "step": 3458 + }, + { + "epoch": 0.31577505933905425, + "grad_norm": 0.46281343698501587, + "learning_rate": 4.967747132214983e-06, + "loss": 0.6022, + "step": 3459 + }, + { + "epoch": 0.3158663501917108, + "grad_norm": 0.46412065625190735, + "learning_rate": 4.9677279669350845e-06, + "loss": 0.5794, + "step": 3460 + }, + { + "epoch": 0.31595764104436735, + "grad_norm": 0.47112682461738586, + "learning_rate": 4.967708795999678e-06, + "loss": 0.5966, + "step": 3461 + }, + { + "epoch": 0.31604893189702393, + "grad_norm": 0.4523323178291321, + "learning_rate": 4.967689619408807e-06, + "loss": 0.6188, + "step": 3462 + }, + { + "epoch": 0.31614022274968046, + "grad_norm": 0.5034440755844116, + "learning_rate": 4.967670437162516e-06, + "loss": 0.5493, + "step": 3463 + }, + { + "epoch": 0.31623151360233703, + "grad_norm": 0.4925064146518707, + "learning_rate": 4.967651249260848e-06, + "loss": 0.5815, + "step": 3464 + }, + { + "epoch": 0.3163228044549936, + "grad_norm": 0.44768694043159485, + "learning_rate": 4.967632055703848e-06, + "loss": 0.5913, + "step": 3465 + }, + { + "epoch": 0.3164140953076502, + "grad_norm": 0.4484257102012634, + "learning_rate": 4.96761285649156e-06, + "loss": 0.6566, + "step": 3466 + }, + { + "epoch": 0.3165053861603067, + "grad_norm": 0.50800621509552, + "learning_rate": 4.967593651624028e-06, + "loss": 0.5907, + "step": 3467 + }, + { + "epoch": 0.3165966770129633, + "grad_norm": 0.47269874811172485, + "learning_rate": 4.967574441101295e-06, + "loss": 0.583, + "step": 3468 + }, + { + "epoch": 0.31668796786561987, + "grad_norm": 0.45194998383522034, + "learning_rate": 4.967555224923406e-06, + "loss": 0.6848, + "step": 3469 + }, + { + "epoch": 0.31677925871827645, + "grad_norm": 0.43408203125, + "learning_rate": 4.967536003090405e-06, + "loss": 0.6283, + "step": 3470 + }, + { + "epoch": 0.31687054957093297, + "grad_norm": 0.48345446586608887, + "learning_rate": 4.967516775602336e-06, + "loss": 0.5784, + "step": 3471 + }, + { + "epoch": 0.31696184042358955, + "grad_norm": 0.4545981287956238, + "learning_rate": 4.967497542459243e-06, + "loss": 0.5837, + "step": 3472 + }, + { + "epoch": 0.3170531312762461, + "grad_norm": 0.47062191367149353, + "learning_rate": 4.9674783036611686e-06, + "loss": 0.6071, + "step": 3473 + }, + { + "epoch": 0.3171444221289027, + "grad_norm": 0.4658990502357483, + "learning_rate": 4.967459059208159e-06, + "loss": 0.6053, + "step": 3474 + }, + { + "epoch": 0.3172357129815592, + "grad_norm": 0.4675287902355194, + "learning_rate": 4.967439809100258e-06, + "loss": 0.6171, + "step": 3475 + }, + { + "epoch": 0.3173270038342158, + "grad_norm": 0.4948670268058777, + "learning_rate": 4.967420553337508e-06, + "loss": 0.624, + "step": 3476 + }, + { + "epoch": 0.3174182946868724, + "grad_norm": 0.4552135169506073, + "learning_rate": 4.967401291919956e-06, + "loss": 0.6405, + "step": 3477 + }, + { + "epoch": 0.31750958553952896, + "grad_norm": 0.4432305693626404, + "learning_rate": 4.967382024847644e-06, + "loss": 0.636, + "step": 3478 + }, + { + "epoch": 0.3176008763921855, + "grad_norm": 0.4664963185787201, + "learning_rate": 4.967362752120616e-06, + "loss": 0.585, + "step": 3479 + }, + { + "epoch": 0.31769216724484206, + "grad_norm": 0.4662764072418213, + "learning_rate": 4.967343473738918e-06, + "loss": 0.5793, + "step": 3480 + }, + { + "epoch": 0.31778345809749864, + "grad_norm": 0.47695115208625793, + "learning_rate": 4.967324189702592e-06, + "loss": 0.5873, + "step": 3481 + }, + { + "epoch": 0.3178747489501552, + "grad_norm": 0.4739081561565399, + "learning_rate": 4.967304900011683e-06, + "loss": 0.6025, + "step": 3482 + }, + { + "epoch": 0.31796603980281174, + "grad_norm": 0.48457610607147217, + "learning_rate": 4.967285604666237e-06, + "loss": 0.6034, + "step": 3483 + }, + { + "epoch": 0.3180573306554683, + "grad_norm": 0.524390459060669, + "learning_rate": 4.967266303666296e-06, + "loss": 0.5732, + "step": 3484 + }, + { + "epoch": 0.3181486215081249, + "grad_norm": 0.4939536154270172, + "learning_rate": 4.967246997011904e-06, + "loss": 0.6237, + "step": 3485 + }, + { + "epoch": 0.3182399123607815, + "grad_norm": 0.4591628313064575, + "learning_rate": 4.967227684703107e-06, + "loss": 0.6383, + "step": 3486 + }, + { + "epoch": 0.318331203213438, + "grad_norm": 0.47264760732650757, + "learning_rate": 4.967208366739948e-06, + "loss": 0.6138, + "step": 3487 + }, + { + "epoch": 0.3184224940660946, + "grad_norm": 0.49660423398017883, + "learning_rate": 4.967189043122472e-06, + "loss": 0.5779, + "step": 3488 + }, + { + "epoch": 0.31851378491875115, + "grad_norm": 0.4788114130496979, + "learning_rate": 4.967169713850723e-06, + "loss": 0.621, + "step": 3489 + }, + { + "epoch": 0.31860507577140773, + "grad_norm": 0.4824436604976654, + "learning_rate": 4.967150378924745e-06, + "loss": 0.5652, + "step": 3490 + }, + { + "epoch": 0.31869636662406425, + "grad_norm": 0.46942660212516785, + "learning_rate": 4.967131038344583e-06, + "loss": 0.5576, + "step": 3491 + }, + { + "epoch": 0.31878765747672083, + "grad_norm": 0.5090717077255249, + "learning_rate": 4.96711169211028e-06, + "loss": 0.5719, + "step": 3492 + }, + { + "epoch": 0.3188789483293774, + "grad_norm": 0.47283488512039185, + "learning_rate": 4.967092340221882e-06, + "loss": 0.629, + "step": 3493 + }, + { + "epoch": 0.318970239182034, + "grad_norm": 0.4902234971523285, + "learning_rate": 4.967072982679433e-06, + "loss": 0.5766, + "step": 3494 + }, + { + "epoch": 0.3190615300346905, + "grad_norm": 0.4705328047275543, + "learning_rate": 4.9670536194829755e-06, + "loss": 0.6117, + "step": 3495 + }, + { + "epoch": 0.3191528208873471, + "grad_norm": 0.46110519766807556, + "learning_rate": 4.967034250632556e-06, + "loss": 0.6139, + "step": 3496 + }, + { + "epoch": 0.31924411174000367, + "grad_norm": 0.4534733295440674, + "learning_rate": 4.967014876128219e-06, + "loss": 0.6072, + "step": 3497 + }, + { + "epoch": 0.3193354025926602, + "grad_norm": 0.5093585252761841, + "learning_rate": 4.966995495970007e-06, + "loss": 0.6048, + "step": 3498 + }, + { + "epoch": 0.31942669344531677, + "grad_norm": 0.4661329984664917, + "learning_rate": 4.9669761101579666e-06, + "loss": 0.5907, + "step": 3499 + }, + { + "epoch": 0.31951798429797335, + "grad_norm": 0.4766860604286194, + "learning_rate": 4.966956718692141e-06, + "loss": 0.6069, + "step": 3500 + }, + { + "epoch": 0.3196092751506299, + "grad_norm": 0.48877307772636414, + "learning_rate": 4.966937321572574e-06, + "loss": 0.5961, + "step": 3501 + }, + { + "epoch": 0.31970056600328645, + "grad_norm": 0.47766855359077454, + "learning_rate": 4.966917918799311e-06, + "loss": 0.5991, + "step": 3502 + }, + { + "epoch": 0.319791856855943, + "grad_norm": 0.4675837457180023, + "learning_rate": 4.966898510372397e-06, + "loss": 0.6007, + "step": 3503 + }, + { + "epoch": 0.3198831477085996, + "grad_norm": 0.4683498442173004, + "learning_rate": 4.966879096291876e-06, + "loss": 0.5467, + "step": 3504 + }, + { + "epoch": 0.3199744385612562, + "grad_norm": 0.4915986955165863, + "learning_rate": 4.966859676557791e-06, + "loss": 0.5503, + "step": 3505 + }, + { + "epoch": 0.3200657294139127, + "grad_norm": 0.47504860162734985, + "learning_rate": 4.9668402511701884e-06, + "loss": 0.6048, + "step": 3506 + }, + { + "epoch": 0.3201570202665693, + "grad_norm": 0.48457229137420654, + "learning_rate": 4.9668208201291126e-06, + "loss": 0.554, + "step": 3507 + }, + { + "epoch": 0.32024831111922586, + "grad_norm": 0.4498867094516754, + "learning_rate": 4.966801383434608e-06, + "loss": 0.6113, + "step": 3508 + }, + { + "epoch": 0.32033960197188244, + "grad_norm": 0.4846920073032379, + "learning_rate": 4.966781941086718e-06, + "loss": 0.6084, + "step": 3509 + }, + { + "epoch": 0.32043089282453896, + "grad_norm": 0.4836241900920868, + "learning_rate": 4.9667624930854875e-06, + "loss": 0.6025, + "step": 3510 + }, + { + "epoch": 0.32052218367719554, + "grad_norm": 0.4658696949481964, + "learning_rate": 4.966743039430962e-06, + "loss": 0.5921, + "step": 3511 + }, + { + "epoch": 0.3206134745298521, + "grad_norm": 0.48903581500053406, + "learning_rate": 4.966723580123186e-06, + "loss": 0.6009, + "step": 3512 + }, + { + "epoch": 0.3207047653825087, + "grad_norm": 0.4674146771430969, + "learning_rate": 4.9667041151622034e-06, + "loss": 0.5953, + "step": 3513 + }, + { + "epoch": 0.3207960562351652, + "grad_norm": 0.4980522394180298, + "learning_rate": 4.966684644548059e-06, + "loss": 0.5725, + "step": 3514 + }, + { + "epoch": 0.3208873470878218, + "grad_norm": 0.45627373456954956, + "learning_rate": 4.9666651682807974e-06, + "loss": 0.6033, + "step": 3515 + }, + { + "epoch": 0.3209786379404784, + "grad_norm": 0.47355741262435913, + "learning_rate": 4.966645686360464e-06, + "loss": 0.5729, + "step": 3516 + }, + { + "epoch": 0.32106992879313495, + "grad_norm": 0.46824824810028076, + "learning_rate": 4.966626198787102e-06, + "loss": 0.5778, + "step": 3517 + }, + { + "epoch": 0.3211612196457915, + "grad_norm": 0.4368278980255127, + "learning_rate": 4.966606705560758e-06, + "loss": 0.6044, + "step": 3518 + }, + { + "epoch": 0.32125251049844805, + "grad_norm": 0.4790451228618622, + "learning_rate": 4.966587206681475e-06, + "loss": 0.571, + "step": 3519 + }, + { + "epoch": 0.32134380135110463, + "grad_norm": 0.44151848554611206, + "learning_rate": 4.966567702149297e-06, + "loss": 0.6285, + "step": 3520 + }, + { + "epoch": 0.3214350922037612, + "grad_norm": 0.4777100086212158, + "learning_rate": 4.966548191964272e-06, + "loss": 0.5455, + "step": 3521 + }, + { + "epoch": 0.32152638305641773, + "grad_norm": 0.4885808825492859, + "learning_rate": 4.966528676126442e-06, + "loss": 0.5623, + "step": 3522 + }, + { + "epoch": 0.3216176739090743, + "grad_norm": 0.4723167419433594, + "learning_rate": 4.966509154635852e-06, + "loss": 0.6189, + "step": 3523 + }, + { + "epoch": 0.3217089647617309, + "grad_norm": 0.5146410465240479, + "learning_rate": 4.9664896274925475e-06, + "loss": 0.5638, + "step": 3524 + }, + { + "epoch": 0.32180025561438746, + "grad_norm": 0.4850645959377289, + "learning_rate": 4.966470094696573e-06, + "loss": 0.6079, + "step": 3525 + }, + { + "epoch": 0.321891546467044, + "grad_norm": 0.4999505281448364, + "learning_rate": 4.966450556247973e-06, + "loss": 0.5829, + "step": 3526 + }, + { + "epoch": 0.32198283731970057, + "grad_norm": 0.5403751134872437, + "learning_rate": 4.9664310121467915e-06, + "loss": 0.5655, + "step": 3527 + }, + { + "epoch": 0.32207412817235714, + "grad_norm": 0.4945475161075592, + "learning_rate": 4.966411462393075e-06, + "loss": 0.5979, + "step": 3528 + }, + { + "epoch": 0.32216541902501367, + "grad_norm": 0.48003682494163513, + "learning_rate": 4.966391906986867e-06, + "loss": 0.6083, + "step": 3529 + }, + { + "epoch": 0.32225670987767024, + "grad_norm": 0.45169955492019653, + "learning_rate": 4.966372345928214e-06, + "loss": 0.6303, + "step": 3530 + }, + { + "epoch": 0.3223480007303268, + "grad_norm": 0.4999820291996002, + "learning_rate": 4.966352779217158e-06, + "loss": 0.5867, + "step": 3531 + }, + { + "epoch": 0.3224392915829834, + "grad_norm": 0.4584021270275116, + "learning_rate": 4.966333206853747e-06, + "loss": 0.626, + "step": 3532 + }, + { + "epoch": 0.3225305824356399, + "grad_norm": 0.43823912739753723, + "learning_rate": 4.966313628838024e-06, + "loss": 0.6372, + "step": 3533 + }, + { + "epoch": 0.3226218732882965, + "grad_norm": 0.46325239539146423, + "learning_rate": 4.966294045170034e-06, + "loss": 0.6137, + "step": 3534 + }, + { + "epoch": 0.3227131641409531, + "grad_norm": 0.48295509815216064, + "learning_rate": 4.966274455849822e-06, + "loss": 0.6046, + "step": 3535 + }, + { + "epoch": 0.32280445499360966, + "grad_norm": 0.5211877226829529, + "learning_rate": 4.966254860877433e-06, + "loss": 0.5654, + "step": 3536 + }, + { + "epoch": 0.3228957458462662, + "grad_norm": 0.5165207982063293, + "learning_rate": 4.966235260252913e-06, + "loss": 0.6084, + "step": 3537 + }, + { + "epoch": 0.32298703669892276, + "grad_norm": 0.4734027683734894, + "learning_rate": 4.966215653976304e-06, + "loss": 0.5943, + "step": 3538 + }, + { + "epoch": 0.32307832755157934, + "grad_norm": 0.5018326640129089, + "learning_rate": 4.966196042047654e-06, + "loss": 0.5185, + "step": 3539 + }, + { + "epoch": 0.3231696184042359, + "grad_norm": 0.4882725775241852, + "learning_rate": 4.9661764244670065e-06, + "loss": 0.6092, + "step": 3540 + }, + { + "epoch": 0.32326090925689244, + "grad_norm": 0.49221697449684143, + "learning_rate": 4.9661568012344065e-06, + "loss": 0.5687, + "step": 3541 + }, + { + "epoch": 0.323352200109549, + "grad_norm": 0.4937272369861603, + "learning_rate": 4.9661371723498995e-06, + "loss": 0.6002, + "step": 3542 + }, + { + "epoch": 0.3234434909622056, + "grad_norm": 0.45538243651390076, + "learning_rate": 4.96611753781353e-06, + "loss": 0.629, + "step": 3543 + }, + { + "epoch": 0.32353478181486217, + "grad_norm": 0.48769640922546387, + "learning_rate": 4.9660978976253425e-06, + "loss": 0.5851, + "step": 3544 + }, + { + "epoch": 0.3236260726675187, + "grad_norm": 0.4660129249095917, + "learning_rate": 4.966078251785383e-06, + "loss": 0.5949, + "step": 3545 + }, + { + "epoch": 0.32371736352017527, + "grad_norm": 0.5100692510604858, + "learning_rate": 4.966058600293696e-06, + "loss": 0.5917, + "step": 3546 + }, + { + "epoch": 0.32380865437283185, + "grad_norm": 0.46508389711380005, + "learning_rate": 4.966038943150328e-06, + "loss": 0.5454, + "step": 3547 + }, + { + "epoch": 0.32389994522548843, + "grad_norm": 0.48940855264663696, + "learning_rate": 4.966019280355321e-06, + "loss": 0.5497, + "step": 3548 + }, + { + "epoch": 0.32399123607814495, + "grad_norm": 0.492855042219162, + "learning_rate": 4.965999611908723e-06, + "loss": 0.5953, + "step": 3549 + }, + { + "epoch": 0.32408252693080153, + "grad_norm": 0.4983835518360138, + "learning_rate": 4.9659799378105774e-06, + "loss": 0.5547, + "step": 3550 + }, + { + "epoch": 0.3241738177834581, + "grad_norm": 0.4474561810493469, + "learning_rate": 4.9659602580609305e-06, + "loss": 0.6111, + "step": 3551 + }, + { + "epoch": 0.3242651086361147, + "grad_norm": 0.4779242277145386, + "learning_rate": 4.9659405726598255e-06, + "loss": 0.5523, + "step": 3552 + }, + { + "epoch": 0.3243563994887712, + "grad_norm": 0.44084131717681885, + "learning_rate": 4.965920881607309e-06, + "loss": 0.6011, + "step": 3553 + }, + { + "epoch": 0.3244476903414278, + "grad_norm": 0.4822891354560852, + "learning_rate": 4.965901184903426e-06, + "loss": 0.6112, + "step": 3554 + }, + { + "epoch": 0.32453898119408436, + "grad_norm": 0.47760137915611267, + "learning_rate": 4.9658814825482215e-06, + "loss": 0.5977, + "step": 3555 + }, + { + "epoch": 0.32463027204674094, + "grad_norm": 0.5412437319755554, + "learning_rate": 4.96586177454174e-06, + "loss": 0.5486, + "step": 3556 + }, + { + "epoch": 0.32472156289939746, + "grad_norm": 0.4635240137577057, + "learning_rate": 4.9658420608840284e-06, + "loss": 0.6177, + "step": 3557 + }, + { + "epoch": 0.32481285375205404, + "grad_norm": 0.4879371225833893, + "learning_rate": 4.965822341575131e-06, + "loss": 0.6073, + "step": 3558 + }, + { + "epoch": 0.3249041446047106, + "grad_norm": 0.4688158929347992, + "learning_rate": 4.965802616615092e-06, + "loss": 0.6002, + "step": 3559 + }, + { + "epoch": 0.3249954354573672, + "grad_norm": 0.4754199683666229, + "learning_rate": 4.965782886003957e-06, + "loss": 0.5981, + "step": 3560 + }, + { + "epoch": 0.3250867263100237, + "grad_norm": 0.46280235052108765, + "learning_rate": 4.965763149741771e-06, + "loss": 0.6083, + "step": 3561 + }, + { + "epoch": 0.3251780171626803, + "grad_norm": 0.4601276218891144, + "learning_rate": 4.965743407828581e-06, + "loss": 0.6017, + "step": 3562 + }, + { + "epoch": 0.3252693080153369, + "grad_norm": 0.469155490398407, + "learning_rate": 4.9657236602644306e-06, + "loss": 0.598, + "step": 3563 + }, + { + "epoch": 0.3253605988679934, + "grad_norm": 0.48268386721611023, + "learning_rate": 4.965703907049366e-06, + "loss": 0.5624, + "step": 3564 + }, + { + "epoch": 0.32545188972065, + "grad_norm": 0.4600314199924469, + "learning_rate": 4.965684148183432e-06, + "loss": 0.579, + "step": 3565 + }, + { + "epoch": 0.32554318057330656, + "grad_norm": 0.5777772068977356, + "learning_rate": 4.965664383666674e-06, + "loss": 0.5616, + "step": 3566 + }, + { + "epoch": 0.32563447142596313, + "grad_norm": 0.506583034992218, + "learning_rate": 4.9656446134991365e-06, + "loss": 0.5737, + "step": 3567 + }, + { + "epoch": 0.32572576227861966, + "grad_norm": 0.49128004908561707, + "learning_rate": 4.965624837680866e-06, + "loss": 0.5793, + "step": 3568 + }, + { + "epoch": 0.32581705313127624, + "grad_norm": 0.454704225063324, + "learning_rate": 4.9656050562119065e-06, + "loss": 0.625, + "step": 3569 + }, + { + "epoch": 0.3259083439839328, + "grad_norm": 0.44693899154663086, + "learning_rate": 4.965585269092305e-06, + "loss": 0.5624, + "step": 3570 + }, + { + "epoch": 0.3259996348365894, + "grad_norm": 0.49494731426239014, + "learning_rate": 4.965565476322105e-06, + "loss": 0.5289, + "step": 3571 + }, + { + "epoch": 0.3260909256892459, + "grad_norm": 0.45623061060905457, + "learning_rate": 4.965545677901354e-06, + "loss": 0.5962, + "step": 3572 + }, + { + "epoch": 0.3261822165419025, + "grad_norm": 0.4970141351222992, + "learning_rate": 4.965525873830095e-06, + "loss": 0.5636, + "step": 3573 + }, + { + "epoch": 0.32627350739455907, + "grad_norm": 0.46755826473236084, + "learning_rate": 4.9655060641083754e-06, + "loss": 0.598, + "step": 3574 + }, + { + "epoch": 0.32636479824721565, + "grad_norm": 0.4563705325126648, + "learning_rate": 4.96548624873624e-06, + "loss": 0.5875, + "step": 3575 + }, + { + "epoch": 0.32645608909987217, + "grad_norm": 0.49831873178482056, + "learning_rate": 4.965466427713734e-06, + "loss": 0.554, + "step": 3576 + }, + { + "epoch": 0.32654737995252875, + "grad_norm": 0.4888758361339569, + "learning_rate": 4.965446601040902e-06, + "loss": 0.5879, + "step": 3577 + }, + { + "epoch": 0.3266386708051853, + "grad_norm": 0.48430782556533813, + "learning_rate": 4.965426768717791e-06, + "loss": 0.5692, + "step": 3578 + }, + { + "epoch": 0.3267299616578419, + "grad_norm": 0.468913197517395, + "learning_rate": 4.965406930744445e-06, + "loss": 0.6224, + "step": 3579 + }, + { + "epoch": 0.32682125251049843, + "grad_norm": 0.45876359939575195, + "learning_rate": 4.965387087120911e-06, + "loss": 0.6261, + "step": 3580 + }, + { + "epoch": 0.326912543363155, + "grad_norm": 0.5042165517807007, + "learning_rate": 4.965367237847233e-06, + "loss": 0.5903, + "step": 3581 + }, + { + "epoch": 0.3270038342158116, + "grad_norm": 0.4461793601512909, + "learning_rate": 4.965347382923458e-06, + "loss": 0.6062, + "step": 3582 + }, + { + "epoch": 0.32709512506846816, + "grad_norm": 0.45740145444869995, + "learning_rate": 4.96532752234963e-06, + "loss": 0.6097, + "step": 3583 + }, + { + "epoch": 0.3271864159211247, + "grad_norm": 0.4950214624404907, + "learning_rate": 4.965307656125795e-06, + "loss": 0.6216, + "step": 3584 + }, + { + "epoch": 0.32727770677378126, + "grad_norm": 0.46342039108276367, + "learning_rate": 4.965287784251999e-06, + "loss": 0.5714, + "step": 3585 + }, + { + "epoch": 0.32736899762643784, + "grad_norm": 0.4847202003002167, + "learning_rate": 4.965267906728287e-06, + "loss": 0.6022, + "step": 3586 + }, + { + "epoch": 0.3274602884790944, + "grad_norm": 0.5065857172012329, + "learning_rate": 4.965248023554706e-06, + "loss": 0.5543, + "step": 3587 + }, + { + "epoch": 0.32755157933175094, + "grad_norm": 0.49155670404434204, + "learning_rate": 4.9652281347312996e-06, + "loss": 0.568, + "step": 3588 + }, + { + "epoch": 0.3276428701844075, + "grad_norm": 0.45355063676834106, + "learning_rate": 4.9652082402581135e-06, + "loss": 0.602, + "step": 3589 + }, + { + "epoch": 0.3277341610370641, + "grad_norm": 0.4554145634174347, + "learning_rate": 4.965188340135195e-06, + "loss": 0.6342, + "step": 3590 + }, + { + "epoch": 0.3278254518897207, + "grad_norm": 0.46580299735069275, + "learning_rate": 4.965168434362588e-06, + "loss": 0.6014, + "step": 3591 + }, + { + "epoch": 0.3279167427423772, + "grad_norm": 0.49127113819122314, + "learning_rate": 4.9651485229403384e-06, + "loss": 0.5778, + "step": 3592 + }, + { + "epoch": 0.3280080335950338, + "grad_norm": 0.4493759274482727, + "learning_rate": 4.965128605868493e-06, + "loss": 0.6057, + "step": 3593 + }, + { + "epoch": 0.32809932444769035, + "grad_norm": 0.4660278856754303, + "learning_rate": 4.965108683147096e-06, + "loss": 0.6226, + "step": 3594 + }, + { + "epoch": 0.32819061530034693, + "grad_norm": 0.4593081474304199, + "learning_rate": 4.9650887547761945e-06, + "loss": 0.6185, + "step": 3595 + }, + { + "epoch": 0.32828190615300346, + "grad_norm": 0.4817517101764679, + "learning_rate": 4.965068820755833e-06, + "loss": 0.5832, + "step": 3596 + }, + { + "epoch": 0.32837319700566003, + "grad_norm": 0.4740571975708008, + "learning_rate": 4.965048881086057e-06, + "loss": 0.5916, + "step": 3597 + }, + { + "epoch": 0.3284644878583166, + "grad_norm": 0.47172704339027405, + "learning_rate": 4.965028935766913e-06, + "loss": 0.6022, + "step": 3598 + }, + { + "epoch": 0.32855577871097313, + "grad_norm": 0.47011423110961914, + "learning_rate": 4.9650089847984474e-06, + "loss": 0.5462, + "step": 3599 + }, + { + "epoch": 0.3286470695636297, + "grad_norm": 0.4798688292503357, + "learning_rate": 4.964989028180705e-06, + "loss": 0.5682, + "step": 3600 + }, + { + "epoch": 0.3287383604162863, + "grad_norm": 0.455422967672348, + "learning_rate": 4.96496906591373e-06, + "loss": 0.5979, + "step": 3601 + }, + { + "epoch": 0.32882965126894287, + "grad_norm": 0.454615980386734, + "learning_rate": 4.96494909799757e-06, + "loss": 0.6001, + "step": 3602 + }, + { + "epoch": 0.3289209421215994, + "grad_norm": 0.4933079779148102, + "learning_rate": 4.964929124432272e-06, + "loss": 0.5878, + "step": 3603 + }, + { + "epoch": 0.32901223297425597, + "grad_norm": 0.4591624438762665, + "learning_rate": 4.964909145217879e-06, + "loss": 0.5884, + "step": 3604 + }, + { + "epoch": 0.32910352382691255, + "grad_norm": 0.509123682975769, + "learning_rate": 4.964889160354438e-06, + "loss": 0.5694, + "step": 3605 + }, + { + "epoch": 0.3291948146795691, + "grad_norm": 0.4709673523902893, + "learning_rate": 4.964869169841995e-06, + "loss": 0.5985, + "step": 3606 + }, + { + "epoch": 0.32928610553222565, + "grad_norm": 0.4868159592151642, + "learning_rate": 4.964849173680596e-06, + "loss": 0.5703, + "step": 3607 + }, + { + "epoch": 0.3293773963848822, + "grad_norm": 0.49150851368904114, + "learning_rate": 4.964829171870286e-06, + "loss": 0.5813, + "step": 3608 + }, + { + "epoch": 0.3294686872375388, + "grad_norm": 0.47792553901672363, + "learning_rate": 4.964809164411111e-06, + "loss": 0.6172, + "step": 3609 + }, + { + "epoch": 0.3295599780901954, + "grad_norm": 0.47572463750839233, + "learning_rate": 4.964789151303118e-06, + "loss": 0.6256, + "step": 3610 + }, + { + "epoch": 0.3296512689428519, + "grad_norm": 0.48561516404151917, + "learning_rate": 4.964769132546351e-06, + "loss": 0.5785, + "step": 3611 + }, + { + "epoch": 0.3297425597955085, + "grad_norm": 0.47778841853141785, + "learning_rate": 4.964749108140858e-06, + "loss": 0.5963, + "step": 3612 + }, + { + "epoch": 0.32983385064816506, + "grad_norm": 0.4738526940345764, + "learning_rate": 4.964729078086683e-06, + "loss": 0.5476, + "step": 3613 + }, + { + "epoch": 0.32992514150082164, + "grad_norm": 0.4789980947971344, + "learning_rate": 4.964709042383873e-06, + "loss": 0.5583, + "step": 3614 + }, + { + "epoch": 0.33001643235347816, + "grad_norm": 0.526889979839325, + "learning_rate": 4.964689001032473e-06, + "loss": 0.6132, + "step": 3615 + }, + { + "epoch": 0.33010772320613474, + "grad_norm": 0.5049574375152588, + "learning_rate": 4.96466895403253e-06, + "loss": 0.5702, + "step": 3616 + }, + { + "epoch": 0.3301990140587913, + "grad_norm": 0.4785289168357849, + "learning_rate": 4.964648901384091e-06, + "loss": 0.6248, + "step": 3617 + }, + { + "epoch": 0.3302903049114479, + "grad_norm": 0.5110921263694763, + "learning_rate": 4.964628843087198e-06, + "loss": 0.5902, + "step": 3618 + }, + { + "epoch": 0.3303815957641044, + "grad_norm": 0.4875067174434662, + "learning_rate": 4.9646087791419005e-06, + "loss": 0.6176, + "step": 3619 + }, + { + "epoch": 0.330472886616761, + "grad_norm": 0.5269833207130432, + "learning_rate": 4.964588709548243e-06, + "loss": 0.5588, + "step": 3620 + }, + { + "epoch": 0.3305641774694176, + "grad_norm": 0.4674772620201111, + "learning_rate": 4.964568634306272e-06, + "loss": 0.6144, + "step": 3621 + }, + { + "epoch": 0.33065546832207415, + "grad_norm": 0.4641217291355133, + "learning_rate": 4.964548553416034e-06, + "loss": 0.62, + "step": 3622 + }, + { + "epoch": 0.3307467591747307, + "grad_norm": 0.5085083246231079, + "learning_rate": 4.964528466877574e-06, + "loss": 0.5407, + "step": 3623 + }, + { + "epoch": 0.33083805002738725, + "grad_norm": 0.47431740164756775, + "learning_rate": 4.964508374690938e-06, + "loss": 0.574, + "step": 3624 + }, + { + "epoch": 0.33092934088004383, + "grad_norm": 0.4933801293373108, + "learning_rate": 4.964488276856173e-06, + "loss": 0.5807, + "step": 3625 + }, + { + "epoch": 0.3310206317327004, + "grad_norm": 0.5129244923591614, + "learning_rate": 4.964468173373324e-06, + "loss": 0.5714, + "step": 3626 + }, + { + "epoch": 0.33111192258535693, + "grad_norm": 0.4670986831188202, + "learning_rate": 4.964448064242438e-06, + "loss": 0.6214, + "step": 3627 + }, + { + "epoch": 0.3312032134380135, + "grad_norm": 0.5022810101509094, + "learning_rate": 4.964427949463561e-06, + "loss": 0.5502, + "step": 3628 + }, + { + "epoch": 0.3312945042906701, + "grad_norm": 0.4785522222518921, + "learning_rate": 4.964407829036738e-06, + "loss": 0.6056, + "step": 3629 + }, + { + "epoch": 0.3313857951433266, + "grad_norm": 0.5013771653175354, + "learning_rate": 4.964387702962017e-06, + "loss": 0.5599, + "step": 3630 + }, + { + "epoch": 0.3314770859959832, + "grad_norm": 0.47421252727508545, + "learning_rate": 4.964367571239442e-06, + "loss": 0.5841, + "step": 3631 + }, + { + "epoch": 0.33156837684863977, + "grad_norm": 0.48604315519332886, + "learning_rate": 4.964347433869061e-06, + "loss": 0.614, + "step": 3632 + }, + { + "epoch": 0.33165966770129635, + "grad_norm": 0.49630817770957947, + "learning_rate": 4.964327290850919e-06, + "loss": 0.5974, + "step": 3633 + }, + { + "epoch": 0.33175095855395287, + "grad_norm": 0.4863710105419159, + "learning_rate": 4.964307142185062e-06, + "loss": 0.5752, + "step": 3634 + }, + { + "epoch": 0.33184224940660945, + "grad_norm": 0.426788866519928, + "learning_rate": 4.964286987871537e-06, + "loss": 0.6239, + "step": 3635 + }, + { + "epoch": 0.331933540259266, + "grad_norm": 0.48625409603118896, + "learning_rate": 4.96426682791039e-06, + "loss": 0.5535, + "step": 3636 + }, + { + "epoch": 0.3320248311119226, + "grad_norm": 0.4317813813686371, + "learning_rate": 4.964246662301667e-06, + "loss": 0.6126, + "step": 3637 + }, + { + "epoch": 0.3321161219645791, + "grad_norm": 0.4617632329463959, + "learning_rate": 4.964226491045415e-06, + "loss": 0.5869, + "step": 3638 + }, + { + "epoch": 0.3322074128172357, + "grad_norm": 0.4592885375022888, + "learning_rate": 4.964206314141679e-06, + "loss": 0.5945, + "step": 3639 + }, + { + "epoch": 0.3322987036698923, + "grad_norm": 0.47535625100135803, + "learning_rate": 4.964186131590505e-06, + "loss": 0.5856, + "step": 3640 + }, + { + "epoch": 0.33238999452254886, + "grad_norm": 0.44710782170295715, + "learning_rate": 4.964165943391941e-06, + "loss": 0.576, + "step": 3641 + }, + { + "epoch": 0.3324812853752054, + "grad_norm": 0.4928602874279022, + "learning_rate": 4.9641457495460325e-06, + "loss": 0.5923, + "step": 3642 + }, + { + "epoch": 0.33257257622786196, + "grad_norm": 0.4810762107372284, + "learning_rate": 4.964125550052824e-06, + "loss": 0.5904, + "step": 3643 + }, + { + "epoch": 0.33266386708051854, + "grad_norm": 0.4518974721431732, + "learning_rate": 4.964105344912364e-06, + "loss": 0.612, + "step": 3644 + }, + { + "epoch": 0.3327551579331751, + "grad_norm": 0.4994048774242401, + "learning_rate": 4.964085134124699e-06, + "loss": 0.5836, + "step": 3645 + }, + { + "epoch": 0.33284644878583164, + "grad_norm": 0.47437357902526855, + "learning_rate": 4.964064917689874e-06, + "loss": 0.5871, + "step": 3646 + }, + { + "epoch": 0.3329377396384882, + "grad_norm": 0.46769005060195923, + "learning_rate": 4.964044695607935e-06, + "loss": 0.58, + "step": 3647 + }, + { + "epoch": 0.3330290304911448, + "grad_norm": 0.4640682637691498, + "learning_rate": 4.96402446787893e-06, + "loss": 0.5506, + "step": 3648 + }, + { + "epoch": 0.3331203213438014, + "grad_norm": 0.45264601707458496, + "learning_rate": 4.964004234502905e-06, + "loss": 0.5971, + "step": 3649 + }, + { + "epoch": 0.3332116121964579, + "grad_norm": 0.4817027151584625, + "learning_rate": 4.963983995479904e-06, + "loss": 0.5678, + "step": 3650 + }, + { + "epoch": 0.3333029030491145, + "grad_norm": 0.4680156409740448, + "learning_rate": 4.963963750809977e-06, + "loss": 0.5942, + "step": 3651 + }, + { + "epoch": 0.33339419390177105, + "grad_norm": 0.4952755570411682, + "learning_rate": 4.963943500493168e-06, + "loss": 0.593, + "step": 3652 + }, + { + "epoch": 0.33348548475442763, + "grad_norm": 0.46100372076034546, + "learning_rate": 4.963923244529524e-06, + "loss": 0.6351, + "step": 3653 + }, + { + "epoch": 0.33357677560708415, + "grad_norm": 0.4625193476676941, + "learning_rate": 4.963902982919092e-06, + "loss": 0.5924, + "step": 3654 + }, + { + "epoch": 0.33366806645974073, + "grad_norm": 0.49194657802581787, + "learning_rate": 4.963882715661917e-06, + "loss": 0.5866, + "step": 3655 + }, + { + "epoch": 0.3337593573123973, + "grad_norm": 0.5111036896705627, + "learning_rate": 4.963862442758047e-06, + "loss": 0.6026, + "step": 3656 + }, + { + "epoch": 0.3338506481650539, + "grad_norm": 0.4689730703830719, + "learning_rate": 4.963842164207529e-06, + "loss": 0.617, + "step": 3657 + }, + { + "epoch": 0.3339419390177104, + "grad_norm": 0.454291433095932, + "learning_rate": 4.963821880010406e-06, + "loss": 0.5906, + "step": 3658 + }, + { + "epoch": 0.334033229870367, + "grad_norm": 0.46291080117225647, + "learning_rate": 4.963801590166728e-06, + "loss": 0.5895, + "step": 3659 + }, + { + "epoch": 0.33412452072302357, + "grad_norm": 0.4634052813053131, + "learning_rate": 4.96378129467654e-06, + "loss": 0.5698, + "step": 3660 + }, + { + "epoch": 0.33421581157568014, + "grad_norm": 0.49210700392723083, + "learning_rate": 4.963760993539889e-06, + "loss": 0.562, + "step": 3661 + }, + { + "epoch": 0.33430710242833667, + "grad_norm": 0.4679211676120758, + "learning_rate": 4.963740686756821e-06, + "loss": 0.5827, + "step": 3662 + }, + { + "epoch": 0.33439839328099324, + "grad_norm": 0.46363818645477295, + "learning_rate": 4.963720374327383e-06, + "loss": 0.5637, + "step": 3663 + }, + { + "epoch": 0.3344896841336498, + "grad_norm": 0.4521419405937195, + "learning_rate": 4.963700056251623e-06, + "loss": 0.6612, + "step": 3664 + }, + { + "epoch": 0.33458097498630635, + "grad_norm": 0.46510717272758484, + "learning_rate": 4.963679732529584e-06, + "loss": 0.5677, + "step": 3665 + }, + { + "epoch": 0.3346722658389629, + "grad_norm": 0.4881404638290405, + "learning_rate": 4.963659403161315e-06, + "loss": 0.5645, + "step": 3666 + }, + { + "epoch": 0.3347635566916195, + "grad_norm": 0.48559293150901794, + "learning_rate": 4.963639068146861e-06, + "loss": 0.5942, + "step": 3667 + }, + { + "epoch": 0.3348548475442761, + "grad_norm": 0.5018342733383179, + "learning_rate": 4.9636187274862715e-06, + "loss": 0.5876, + "step": 3668 + }, + { + "epoch": 0.3349461383969326, + "grad_norm": 0.5130695104598999, + "learning_rate": 4.9635983811795915e-06, + "loss": 0.5679, + "step": 3669 + }, + { + "epoch": 0.3350374292495892, + "grad_norm": 0.4898633062839508, + "learning_rate": 4.963578029226867e-06, + "loss": 0.6249, + "step": 3670 + }, + { + "epoch": 0.33512872010224576, + "grad_norm": 0.4492626190185547, + "learning_rate": 4.963557671628145e-06, + "loss": 0.5952, + "step": 3671 + }, + { + "epoch": 0.33522001095490234, + "grad_norm": 0.47247186303138733, + "learning_rate": 4.963537308383472e-06, + "loss": 0.5968, + "step": 3672 + }, + { + "epoch": 0.33531130180755886, + "grad_norm": 0.4966326057910919, + "learning_rate": 4.963516939492895e-06, + "loss": 0.577, + "step": 3673 + }, + { + "epoch": 0.33540259266021544, + "grad_norm": 0.45703253149986267, + "learning_rate": 4.963496564956462e-06, + "loss": 0.5959, + "step": 3674 + }, + { + "epoch": 0.335493883512872, + "grad_norm": 0.4675590693950653, + "learning_rate": 4.963476184774216e-06, + "loss": 0.5821, + "step": 3675 + }, + { + "epoch": 0.3355851743655286, + "grad_norm": 0.5142287611961365, + "learning_rate": 4.963455798946207e-06, + "loss": 0.54, + "step": 3676 + }, + { + "epoch": 0.3356764652181851, + "grad_norm": 0.47792232036590576, + "learning_rate": 4.963435407472482e-06, + "loss": 0.5838, + "step": 3677 + }, + { + "epoch": 0.3357677560708417, + "grad_norm": 0.48031026124954224, + "learning_rate": 4.9634150103530855e-06, + "loss": 0.5762, + "step": 3678 + }, + { + "epoch": 0.3358590469234983, + "grad_norm": 0.44963306188583374, + "learning_rate": 4.963394607588065e-06, + "loss": 0.5937, + "step": 3679 + }, + { + "epoch": 0.33595033777615485, + "grad_norm": 0.49730172753334045, + "learning_rate": 4.963374199177468e-06, + "loss": 0.6359, + "step": 3680 + }, + { + "epoch": 0.3360416286288114, + "grad_norm": 0.47752389311790466, + "learning_rate": 4.963353785121341e-06, + "loss": 0.5686, + "step": 3681 + }, + { + "epoch": 0.33613291948146795, + "grad_norm": 0.49832212924957275, + "learning_rate": 4.96333336541973e-06, + "loss": 0.595, + "step": 3682 + }, + { + "epoch": 0.33622421033412453, + "grad_norm": 0.502557635307312, + "learning_rate": 4.963312940072682e-06, + "loss": 0.5966, + "step": 3683 + }, + { + "epoch": 0.3363155011867811, + "grad_norm": 0.4938780665397644, + "learning_rate": 4.963292509080246e-06, + "loss": 0.5806, + "step": 3684 + }, + { + "epoch": 0.33640679203943763, + "grad_norm": 0.43081241846084595, + "learning_rate": 4.9632720724424645e-06, + "loss": 0.6379, + "step": 3685 + }, + { + "epoch": 0.3364980828920942, + "grad_norm": 0.46080049872398376, + "learning_rate": 4.963251630159388e-06, + "loss": 0.6018, + "step": 3686 + }, + { + "epoch": 0.3365893737447508, + "grad_norm": 0.47576403617858887, + "learning_rate": 4.963231182231062e-06, + "loss": 0.5985, + "step": 3687 + }, + { + "epoch": 0.33668066459740736, + "grad_norm": 0.48689305782318115, + "learning_rate": 4.963210728657534e-06, + "loss": 0.6014, + "step": 3688 + }, + { + "epoch": 0.3367719554500639, + "grad_norm": 0.4594170153141022, + "learning_rate": 4.96319026943885e-06, + "loss": 0.638, + "step": 3689 + }, + { + "epoch": 0.33686324630272046, + "grad_norm": 0.488272488117218, + "learning_rate": 4.963169804575058e-06, + "loss": 0.5935, + "step": 3690 + }, + { + "epoch": 0.33695453715537704, + "grad_norm": 0.48593461513519287, + "learning_rate": 4.963149334066204e-06, + "loss": 0.6152, + "step": 3691 + }, + { + "epoch": 0.3370458280080336, + "grad_norm": 0.49894723296165466, + "learning_rate": 4.963128857912334e-06, + "loss": 0.5493, + "step": 3692 + }, + { + "epoch": 0.33713711886069014, + "grad_norm": 0.49756863713264465, + "learning_rate": 4.963108376113497e-06, + "loss": 0.5647, + "step": 3693 + }, + { + "epoch": 0.3372284097133467, + "grad_norm": 0.43247169256210327, + "learning_rate": 4.963087888669739e-06, + "loss": 0.6174, + "step": 3694 + }, + { + "epoch": 0.3373197005660033, + "grad_norm": 0.48186418414115906, + "learning_rate": 4.963067395581107e-06, + "loss": 0.5563, + "step": 3695 + }, + { + "epoch": 0.3374109914186599, + "grad_norm": 0.4721035659313202, + "learning_rate": 4.9630468968476475e-06, + "loss": 0.6043, + "step": 3696 + }, + { + "epoch": 0.3375022822713164, + "grad_norm": 0.4800824224948883, + "learning_rate": 4.963026392469408e-06, + "loss": 0.6054, + "step": 3697 + }, + { + "epoch": 0.337593573123973, + "grad_norm": 0.4731680154800415, + "learning_rate": 4.963005882446435e-06, + "loss": 0.6003, + "step": 3698 + }, + { + "epoch": 0.33768486397662956, + "grad_norm": 0.4382147192955017, + "learning_rate": 4.962985366778777e-06, + "loss": 0.6636, + "step": 3699 + }, + { + "epoch": 0.3377761548292861, + "grad_norm": 0.4541076719760895, + "learning_rate": 4.962964845466479e-06, + "loss": 0.6194, + "step": 3700 + }, + { + "epoch": 0.33786744568194266, + "grad_norm": 0.4957836866378784, + "learning_rate": 4.962944318509589e-06, + "loss": 0.5406, + "step": 3701 + }, + { + "epoch": 0.33795873653459924, + "grad_norm": 0.48638880252838135, + "learning_rate": 4.9629237859081546e-06, + "loss": 0.5546, + "step": 3702 + }, + { + "epoch": 0.3380500273872558, + "grad_norm": 0.45957857370376587, + "learning_rate": 4.962903247662222e-06, + "loss": 0.6233, + "step": 3703 + }, + { + "epoch": 0.33814131823991234, + "grad_norm": 0.49190741777420044, + "learning_rate": 4.962882703771838e-06, + "loss": 0.5902, + "step": 3704 + }, + { + "epoch": 0.3382326090925689, + "grad_norm": 0.48082441091537476, + "learning_rate": 4.96286215423705e-06, + "loss": 0.6235, + "step": 3705 + }, + { + "epoch": 0.3383238999452255, + "grad_norm": 0.45589521527290344, + "learning_rate": 4.962841599057906e-06, + "loss": 0.6135, + "step": 3706 + }, + { + "epoch": 0.33841519079788207, + "grad_norm": 0.4631209373474121, + "learning_rate": 4.962821038234452e-06, + "loss": 0.5854, + "step": 3707 + }, + { + "epoch": 0.3385064816505386, + "grad_norm": 0.44551023840904236, + "learning_rate": 4.962800471766736e-06, + "loss": 0.5941, + "step": 3708 + }, + { + "epoch": 0.33859777250319517, + "grad_norm": 0.5026925802230835, + "learning_rate": 4.962779899654804e-06, + "loss": 0.5876, + "step": 3709 + }, + { + "epoch": 0.33868906335585175, + "grad_norm": 0.48733821511268616, + "learning_rate": 4.962759321898704e-06, + "loss": 0.5535, + "step": 3710 + }, + { + "epoch": 0.3387803542085083, + "grad_norm": 0.45689114928245544, + "learning_rate": 4.962738738498483e-06, + "loss": 0.6441, + "step": 3711 + }, + { + "epoch": 0.33887164506116485, + "grad_norm": 0.4652917981147766, + "learning_rate": 4.962718149454188e-06, + "loss": 0.6187, + "step": 3712 + }, + { + "epoch": 0.33896293591382143, + "grad_norm": 0.4833424389362335, + "learning_rate": 4.962697554765866e-06, + "loss": 0.5903, + "step": 3713 + }, + { + "epoch": 0.339054226766478, + "grad_norm": 0.4974193274974823, + "learning_rate": 4.962676954433565e-06, + "loss": 0.5905, + "step": 3714 + }, + { + "epoch": 0.3391455176191346, + "grad_norm": 0.46575766801834106, + "learning_rate": 4.962656348457332e-06, + "loss": 0.6052, + "step": 3715 + }, + { + "epoch": 0.3392368084717911, + "grad_norm": 0.4546273946762085, + "learning_rate": 4.962635736837214e-06, + "loss": 0.6272, + "step": 3716 + }, + { + "epoch": 0.3393280993244477, + "grad_norm": 0.5096640586853027, + "learning_rate": 4.9626151195732575e-06, + "loss": 0.5293, + "step": 3717 + }, + { + "epoch": 0.33941939017710426, + "grad_norm": 0.47851595282554626, + "learning_rate": 4.9625944966655105e-06, + "loss": 0.5629, + "step": 3718 + }, + { + "epoch": 0.33951068102976084, + "grad_norm": 0.44079238176345825, + "learning_rate": 4.96257386811402e-06, + "loss": 0.5657, + "step": 3719 + }, + { + "epoch": 0.33960197188241736, + "grad_norm": 0.4508531987667084, + "learning_rate": 4.962553233918834e-06, + "loss": 0.5896, + "step": 3720 + }, + { + "epoch": 0.33969326273507394, + "grad_norm": 0.4678393304347992, + "learning_rate": 4.9625325940799995e-06, + "loss": 0.5524, + "step": 3721 + }, + { + "epoch": 0.3397845535877305, + "grad_norm": 0.465387761592865, + "learning_rate": 4.962511948597562e-06, + "loss": 0.597, + "step": 3722 + }, + { + "epoch": 0.3398758444403871, + "grad_norm": 0.4857478737831116, + "learning_rate": 4.9624912974715725e-06, + "loss": 0.5961, + "step": 3723 + }, + { + "epoch": 0.3399671352930436, + "grad_norm": 0.46515488624572754, + "learning_rate": 4.962470640702075e-06, + "loss": 0.5751, + "step": 3724 + }, + { + "epoch": 0.3400584261457002, + "grad_norm": 0.4708504378795624, + "learning_rate": 4.9624499782891185e-06, + "loss": 0.6042, + "step": 3725 + }, + { + "epoch": 0.3401497169983568, + "grad_norm": 0.49218377470970154, + "learning_rate": 4.962429310232749e-06, + "loss": 0.5564, + "step": 3726 + }, + { + "epoch": 0.34024100785101336, + "grad_norm": 0.47043314576148987, + "learning_rate": 4.962408636533016e-06, + "loss": 0.5964, + "step": 3727 + }, + { + "epoch": 0.3403322987036699, + "grad_norm": 0.4483877420425415, + "learning_rate": 4.9623879571899645e-06, + "loss": 0.665, + "step": 3728 + }, + { + "epoch": 0.34042358955632646, + "grad_norm": 0.4837096035480499, + "learning_rate": 4.9623672722036445e-06, + "loss": 0.601, + "step": 3729 + }, + { + "epoch": 0.34051488040898303, + "grad_norm": 0.4786980152130127, + "learning_rate": 4.9623465815741005e-06, + "loss": 0.5733, + "step": 3730 + }, + { + "epoch": 0.34060617126163956, + "grad_norm": 0.464840292930603, + "learning_rate": 4.9623258853013825e-06, + "loss": 0.5972, + "step": 3731 + }, + { + "epoch": 0.34069746211429613, + "grad_norm": 0.4856230616569519, + "learning_rate": 4.962305183385535e-06, + "loss": 0.5936, + "step": 3732 + }, + { + "epoch": 0.3407887529669527, + "grad_norm": 0.44558918476104736, + "learning_rate": 4.962284475826609e-06, + "loss": 0.5765, + "step": 3733 + }, + { + "epoch": 0.3408800438196093, + "grad_norm": 0.42967039346694946, + "learning_rate": 4.962263762624649e-06, + "loss": 0.6307, + "step": 3734 + }, + { + "epoch": 0.3409713346722658, + "grad_norm": 0.45166856050491333, + "learning_rate": 4.962243043779705e-06, + "loss": 0.6039, + "step": 3735 + }, + { + "epoch": 0.3410626255249224, + "grad_norm": 0.4661807417869568, + "learning_rate": 4.962222319291822e-06, + "loss": 0.5892, + "step": 3736 + }, + { + "epoch": 0.34115391637757897, + "grad_norm": 0.4836867153644562, + "learning_rate": 4.96220158916105e-06, + "loss": 0.5527, + "step": 3737 + }, + { + "epoch": 0.34124520723023555, + "grad_norm": 0.47456663846969604, + "learning_rate": 4.962180853387434e-06, + "loss": 0.5996, + "step": 3738 + }, + { + "epoch": 0.34133649808289207, + "grad_norm": 0.4749908745288849, + "learning_rate": 4.962160111971023e-06, + "loss": 0.61, + "step": 3739 + }, + { + "epoch": 0.34142778893554865, + "grad_norm": 0.4514191448688507, + "learning_rate": 4.9621393649118635e-06, + "loss": 0.5861, + "step": 3740 + }, + { + "epoch": 0.3415190797882052, + "grad_norm": 0.47501930594444275, + "learning_rate": 4.9621186122100045e-06, + "loss": 0.5795, + "step": 3741 + }, + { + "epoch": 0.3416103706408618, + "grad_norm": 0.4682404696941376, + "learning_rate": 4.962097853865493e-06, + "loss": 0.5777, + "step": 3742 + }, + { + "epoch": 0.3417016614935183, + "grad_norm": 0.44020238518714905, + "learning_rate": 4.962077089878375e-06, + "loss": 0.6099, + "step": 3743 + }, + { + "epoch": 0.3417929523461749, + "grad_norm": 0.47322607040405273, + "learning_rate": 4.962056320248702e-06, + "loss": 0.5817, + "step": 3744 + }, + { + "epoch": 0.3418842431988315, + "grad_norm": 0.4673744738101959, + "learning_rate": 4.962035544976517e-06, + "loss": 0.5904, + "step": 3745 + }, + { + "epoch": 0.34197553405148806, + "grad_norm": 0.477694571018219, + "learning_rate": 4.96201476406187e-06, + "loss": 0.5969, + "step": 3746 + }, + { + "epoch": 0.3420668249041446, + "grad_norm": 0.48049455881118774, + "learning_rate": 4.9619939775048085e-06, + "loss": 0.5818, + "step": 3747 + }, + { + "epoch": 0.34215811575680116, + "grad_norm": 0.44911324977874756, + "learning_rate": 4.9619731853053805e-06, + "loss": 0.5789, + "step": 3748 + }, + { + "epoch": 0.34224940660945774, + "grad_norm": 0.4524138867855072, + "learning_rate": 4.961952387463632e-06, + "loss": 0.5869, + "step": 3749 + }, + { + "epoch": 0.3423406974621143, + "grad_norm": 0.5102835297584534, + "learning_rate": 4.961931583979614e-06, + "loss": 0.5644, + "step": 3750 + }, + { + "epoch": 0.34243198831477084, + "grad_norm": 0.44094952940940857, + "learning_rate": 4.961910774853369e-06, + "loss": 0.5733, + "step": 3751 + }, + { + "epoch": 0.3425232791674274, + "grad_norm": 0.4856381416320801, + "learning_rate": 4.961889960084949e-06, + "loss": 0.5664, + "step": 3752 + }, + { + "epoch": 0.342614570020084, + "grad_norm": 0.4732969403266907, + "learning_rate": 4.961869139674401e-06, + "loss": 0.5932, + "step": 3753 + }, + { + "epoch": 0.3427058608727406, + "grad_norm": 0.47758427262306213, + "learning_rate": 4.961848313621771e-06, + "loss": 0.5967, + "step": 3754 + }, + { + "epoch": 0.3427971517253971, + "grad_norm": 0.46610227227211, + "learning_rate": 4.961827481927109e-06, + "loss": 0.5855, + "step": 3755 + }, + { + "epoch": 0.3428884425780537, + "grad_norm": 0.45668184757232666, + "learning_rate": 4.96180664459046e-06, + "loss": 0.5694, + "step": 3756 + }, + { + "epoch": 0.34297973343071025, + "grad_norm": 0.42410337924957275, + "learning_rate": 4.961785801611874e-06, + "loss": 0.578, + "step": 3757 + }, + { + "epoch": 0.34307102428336683, + "grad_norm": 0.4739677608013153, + "learning_rate": 4.961764952991398e-06, + "loss": 0.5884, + "step": 3758 + }, + { + "epoch": 0.34316231513602335, + "grad_norm": 0.47308269143104553, + "learning_rate": 4.96174409872908e-06, + "loss": 0.5721, + "step": 3759 + }, + { + "epoch": 0.34325360598867993, + "grad_norm": 0.45721715688705444, + "learning_rate": 4.961723238824968e-06, + "loss": 0.6625, + "step": 3760 + }, + { + "epoch": 0.3433448968413365, + "grad_norm": 0.4589204788208008, + "learning_rate": 4.961702373279108e-06, + "loss": 0.6083, + "step": 3761 + }, + { + "epoch": 0.3434361876939931, + "grad_norm": 0.4943632185459137, + "learning_rate": 4.961681502091551e-06, + "loss": 0.57, + "step": 3762 + }, + { + "epoch": 0.3435274785466496, + "grad_norm": 0.49496737122535706, + "learning_rate": 4.961660625262342e-06, + "loss": 0.55, + "step": 3763 + }, + { + "epoch": 0.3436187693993062, + "grad_norm": 0.5217538475990295, + "learning_rate": 4.96163974279153e-06, + "loss": 0.5983, + "step": 3764 + }, + { + "epoch": 0.34371006025196277, + "grad_norm": 0.43330517411231995, + "learning_rate": 4.961618854679163e-06, + "loss": 0.6516, + "step": 3765 + }, + { + "epoch": 0.3438013511046193, + "grad_norm": 0.4427773952484131, + "learning_rate": 4.961597960925288e-06, + "loss": 0.617, + "step": 3766 + }, + { + "epoch": 0.34389264195727587, + "grad_norm": 0.500595211982727, + "learning_rate": 4.961577061529955e-06, + "loss": 0.5583, + "step": 3767 + }, + { + "epoch": 0.34398393280993245, + "grad_norm": 0.5167930722236633, + "learning_rate": 4.961556156493209e-06, + "loss": 0.5542, + "step": 3768 + }, + { + "epoch": 0.344075223662589, + "grad_norm": 0.47793176770210266, + "learning_rate": 4.9615352458151e-06, + "loss": 0.6005, + "step": 3769 + }, + { + "epoch": 0.34416651451524555, + "grad_norm": 0.49608632922172546, + "learning_rate": 4.961514329495676e-06, + "loss": 0.6052, + "step": 3770 + }, + { + "epoch": 0.3442578053679021, + "grad_norm": 0.463097482919693, + "learning_rate": 4.961493407534983e-06, + "loss": 0.5906, + "step": 3771 + }, + { + "epoch": 0.3443490962205587, + "grad_norm": 0.4808676242828369, + "learning_rate": 4.961472479933071e-06, + "loss": 0.5867, + "step": 3772 + }, + { + "epoch": 0.3444403870732153, + "grad_norm": 0.48438557982444763, + "learning_rate": 4.961451546689986e-06, + "loss": 0.6174, + "step": 3773 + }, + { + "epoch": 0.3445316779258718, + "grad_norm": 0.5149322748184204, + "learning_rate": 4.9614306078057784e-06, + "loss": 0.6067, + "step": 3774 + }, + { + "epoch": 0.3446229687785284, + "grad_norm": 0.46882322430610657, + "learning_rate": 4.961409663280495e-06, + "loss": 0.5946, + "step": 3775 + }, + { + "epoch": 0.34471425963118496, + "grad_norm": 0.5245246887207031, + "learning_rate": 4.961388713114182e-06, + "loss": 0.5705, + "step": 3776 + }, + { + "epoch": 0.34480555048384154, + "grad_norm": 0.47305092215538025, + "learning_rate": 4.96136775730689e-06, + "loss": 0.6025, + "step": 3777 + }, + { + "epoch": 0.34489684133649806, + "grad_norm": 0.4877651035785675, + "learning_rate": 4.961346795858666e-06, + "loss": 0.568, + "step": 3778 + }, + { + "epoch": 0.34498813218915464, + "grad_norm": 0.4560176134109497, + "learning_rate": 4.961325828769558e-06, + "loss": 0.6134, + "step": 3779 + }, + { + "epoch": 0.3450794230418112, + "grad_norm": 0.4883682429790497, + "learning_rate": 4.961304856039615e-06, + "loss": 0.5349, + "step": 3780 + }, + { + "epoch": 0.3451707138944678, + "grad_norm": 0.4872421324253082, + "learning_rate": 4.961283877668883e-06, + "loss": 0.5763, + "step": 3781 + }, + { + "epoch": 0.3452620047471243, + "grad_norm": 0.4786335527896881, + "learning_rate": 4.961262893657412e-06, + "loss": 0.5749, + "step": 3782 + }, + { + "epoch": 0.3453532955997809, + "grad_norm": 0.4775882959365845, + "learning_rate": 4.96124190400525e-06, + "loss": 0.578, + "step": 3783 + }, + { + "epoch": 0.3454445864524375, + "grad_norm": 0.47412437200546265, + "learning_rate": 4.961220908712444e-06, + "loss": 0.5695, + "step": 3784 + }, + { + "epoch": 0.34553587730509405, + "grad_norm": 0.4575332701206207, + "learning_rate": 4.961199907779043e-06, + "loss": 0.5742, + "step": 3785 + }, + { + "epoch": 0.3456271681577506, + "grad_norm": 0.4892740249633789, + "learning_rate": 4.961178901205095e-06, + "loss": 0.6092, + "step": 3786 + }, + { + "epoch": 0.34571845901040715, + "grad_norm": 0.48583948612213135, + "learning_rate": 4.961157888990647e-06, + "loss": 0.5933, + "step": 3787 + }, + { + "epoch": 0.34580974986306373, + "grad_norm": 0.48255953192710876, + "learning_rate": 4.961136871135749e-06, + "loss": 0.6143, + "step": 3788 + }, + { + "epoch": 0.3459010407157203, + "grad_norm": 0.4738168716430664, + "learning_rate": 4.961115847640448e-06, + "loss": 0.6204, + "step": 3789 + }, + { + "epoch": 0.34599233156837683, + "grad_norm": 0.4709627032279968, + "learning_rate": 4.961094818504792e-06, + "loss": 0.5902, + "step": 3790 + }, + { + "epoch": 0.3460836224210334, + "grad_norm": 0.4991666376590729, + "learning_rate": 4.96107378372883e-06, + "loss": 0.5778, + "step": 3791 + }, + { + "epoch": 0.34617491327369, + "grad_norm": 0.4824483394622803, + "learning_rate": 4.961052743312609e-06, + "loss": 0.5789, + "step": 3792 + }, + { + "epoch": 0.34626620412634657, + "grad_norm": 0.46345171332359314, + "learning_rate": 4.961031697256179e-06, + "loss": 0.5966, + "step": 3793 + }, + { + "epoch": 0.3463574949790031, + "grad_norm": 0.47299641370773315, + "learning_rate": 4.961010645559587e-06, + "loss": 0.5661, + "step": 3794 + }, + { + "epoch": 0.34644878583165967, + "grad_norm": 0.4909308850765228, + "learning_rate": 4.960989588222882e-06, + "loss": 0.5738, + "step": 3795 + }, + { + "epoch": 0.34654007668431625, + "grad_norm": 0.5192651152610779, + "learning_rate": 4.960968525246112e-06, + "loss": 0.5561, + "step": 3796 + }, + { + "epoch": 0.34663136753697277, + "grad_norm": 0.48015591502189636, + "learning_rate": 4.960947456629324e-06, + "loss": 0.5733, + "step": 3797 + }, + { + "epoch": 0.34672265838962935, + "grad_norm": 0.48597681522369385, + "learning_rate": 4.960926382372568e-06, + "loss": 0.5879, + "step": 3798 + }, + { + "epoch": 0.3468139492422859, + "grad_norm": 0.46420472860336304, + "learning_rate": 4.960905302475891e-06, + "loss": 0.6309, + "step": 3799 + }, + { + "epoch": 0.3469052400949425, + "grad_norm": 0.49280551075935364, + "learning_rate": 4.960884216939343e-06, + "loss": 0.6163, + "step": 3800 + }, + { + "epoch": 0.346996530947599, + "grad_norm": 0.4415469169616699, + "learning_rate": 4.960863125762971e-06, + "loss": 0.5551, + "step": 3801 + }, + { + "epoch": 0.3470878218002556, + "grad_norm": 0.4633900225162506, + "learning_rate": 4.960842028946823e-06, + "loss": 0.5653, + "step": 3802 + }, + { + "epoch": 0.3471791126529122, + "grad_norm": 0.4651680886745453, + "learning_rate": 4.960820926490949e-06, + "loss": 0.6202, + "step": 3803 + }, + { + "epoch": 0.34727040350556876, + "grad_norm": 0.4722631871700287, + "learning_rate": 4.960799818395395e-06, + "loss": 0.564, + "step": 3804 + }, + { + "epoch": 0.3473616943582253, + "grad_norm": 0.485900342464447, + "learning_rate": 4.960778704660212e-06, + "loss": 0.5621, + "step": 3805 + }, + { + "epoch": 0.34745298521088186, + "grad_norm": 0.46705716848373413, + "learning_rate": 4.9607575852854465e-06, + "loss": 0.6024, + "step": 3806 + }, + { + "epoch": 0.34754427606353844, + "grad_norm": 0.4409416615962982, + "learning_rate": 4.9607364602711474e-06, + "loss": 0.6222, + "step": 3807 + }, + { + "epoch": 0.347635566916195, + "grad_norm": 0.47813358902931213, + "learning_rate": 4.960715329617363e-06, + "loss": 0.5755, + "step": 3808 + }, + { + "epoch": 0.34772685776885154, + "grad_norm": 0.47191643714904785, + "learning_rate": 4.960694193324143e-06, + "loss": 0.5975, + "step": 3809 + }, + { + "epoch": 0.3478181486215081, + "grad_norm": 0.4462253451347351, + "learning_rate": 4.960673051391534e-06, + "loss": 0.6334, + "step": 3810 + }, + { + "epoch": 0.3479094394741647, + "grad_norm": 0.4978450536727905, + "learning_rate": 4.960651903819585e-06, + "loss": 0.5535, + "step": 3811 + }, + { + "epoch": 0.3480007303268213, + "grad_norm": 0.45136645436286926, + "learning_rate": 4.960630750608345e-06, + "loss": 0.5568, + "step": 3812 + }, + { + "epoch": 0.3480920211794778, + "grad_norm": 0.49147167801856995, + "learning_rate": 4.9606095917578626e-06, + "loss": 0.5997, + "step": 3813 + }, + { + "epoch": 0.3481833120321344, + "grad_norm": 0.46926867961883545, + "learning_rate": 4.960588427268185e-06, + "loss": 0.5817, + "step": 3814 + }, + { + "epoch": 0.34827460288479095, + "grad_norm": 0.4537467956542969, + "learning_rate": 4.960567257139362e-06, + "loss": 0.6278, + "step": 3815 + }, + { + "epoch": 0.34836589373744753, + "grad_norm": 0.4456396996974945, + "learning_rate": 4.960546081371442e-06, + "loss": 0.6335, + "step": 3816 + }, + { + "epoch": 0.34845718459010405, + "grad_norm": 0.4720296263694763, + "learning_rate": 4.960524899964473e-06, + "loss": 0.5811, + "step": 3817 + }, + { + "epoch": 0.34854847544276063, + "grad_norm": 0.47637680172920227, + "learning_rate": 4.960503712918503e-06, + "loss": 0.5459, + "step": 3818 + }, + { + "epoch": 0.3486397662954172, + "grad_norm": 0.4475562274456024, + "learning_rate": 4.960482520233582e-06, + "loss": 0.6215, + "step": 3819 + }, + { + "epoch": 0.3487310571480738, + "grad_norm": 0.5034824013710022, + "learning_rate": 4.960461321909758e-06, + "loss": 0.5159, + "step": 3820 + }, + { + "epoch": 0.3488223480007303, + "grad_norm": 0.4340372681617737, + "learning_rate": 4.96044011794708e-06, + "loss": 0.5665, + "step": 3821 + }, + { + "epoch": 0.3489136388533869, + "grad_norm": 0.5170994997024536, + "learning_rate": 4.960418908345596e-06, + "loss": 0.5807, + "step": 3822 + }, + { + "epoch": 0.34900492970604347, + "grad_norm": 0.4629875421524048, + "learning_rate": 4.960397693105353e-06, + "loss": 0.61, + "step": 3823 + }, + { + "epoch": 0.34909622055870004, + "grad_norm": 0.47007718682289124, + "learning_rate": 4.960376472226403e-06, + "loss": 0.5613, + "step": 3824 + }, + { + "epoch": 0.34918751141135657, + "grad_norm": 0.4778081178665161, + "learning_rate": 4.960355245708792e-06, + "loss": 0.6162, + "step": 3825 + }, + { + "epoch": 0.34927880226401314, + "grad_norm": 0.48021042346954346, + "learning_rate": 4.96033401355257e-06, + "loss": 0.5954, + "step": 3826 + }, + { + "epoch": 0.3493700931166697, + "grad_norm": 0.45738542079925537, + "learning_rate": 4.960312775757785e-06, + "loss": 0.6239, + "step": 3827 + }, + { + "epoch": 0.3494613839693263, + "grad_norm": 0.47099897265434265, + "learning_rate": 4.960291532324486e-06, + "loss": 0.6137, + "step": 3828 + }, + { + "epoch": 0.3495526748219828, + "grad_norm": 0.49574723839759827, + "learning_rate": 4.960270283252722e-06, + "loss": 0.6139, + "step": 3829 + }, + { + "epoch": 0.3496439656746394, + "grad_norm": 0.5104870796203613, + "learning_rate": 4.9602490285425415e-06, + "loss": 0.5473, + "step": 3830 + }, + { + "epoch": 0.349735256527296, + "grad_norm": 0.4954681992530823, + "learning_rate": 4.960227768193993e-06, + "loss": 0.6049, + "step": 3831 + }, + { + "epoch": 0.3498265473799525, + "grad_norm": 0.4556465744972229, + "learning_rate": 4.960206502207125e-06, + "loss": 0.5856, + "step": 3832 + }, + { + "epoch": 0.3499178382326091, + "grad_norm": 0.4769222140312195, + "learning_rate": 4.960185230581986e-06, + "loss": 0.586, + "step": 3833 + }, + { + "epoch": 0.35000912908526566, + "grad_norm": 0.5002205967903137, + "learning_rate": 4.9601639533186245e-06, + "loss": 0.5467, + "step": 3834 + }, + { + "epoch": 0.35010041993792224, + "grad_norm": 0.4536399245262146, + "learning_rate": 4.960142670417091e-06, + "loss": 0.5973, + "step": 3835 + }, + { + "epoch": 0.35019171079057876, + "grad_norm": 0.48665374517440796, + "learning_rate": 4.960121381877433e-06, + "loss": 0.5509, + "step": 3836 + }, + { + "epoch": 0.35028300164323534, + "grad_norm": 0.4503982365131378, + "learning_rate": 4.960100087699699e-06, + "loss": 0.6386, + "step": 3837 + }, + { + "epoch": 0.3503742924958919, + "grad_norm": 0.49367237091064453, + "learning_rate": 4.960078787883939e-06, + "loss": 0.5862, + "step": 3838 + }, + { + "epoch": 0.3504655833485485, + "grad_norm": 0.4775850474834442, + "learning_rate": 4.960057482430202e-06, + "loss": 0.5658, + "step": 3839 + }, + { + "epoch": 0.350556874201205, + "grad_norm": 0.5044325590133667, + "learning_rate": 4.960036171338535e-06, + "loss": 0.5824, + "step": 3840 + }, + { + "epoch": 0.3506481650538616, + "grad_norm": 0.45593151450157166, + "learning_rate": 4.9600148546089886e-06, + "loss": 0.5795, + "step": 3841 + }, + { + "epoch": 0.35073945590651817, + "grad_norm": 0.48582908511161804, + "learning_rate": 4.95999353224161e-06, + "loss": 0.5632, + "step": 3842 + }, + { + "epoch": 0.35083074675917475, + "grad_norm": 0.4794149100780487, + "learning_rate": 4.95997220423645e-06, + "loss": 0.5592, + "step": 3843 + }, + { + "epoch": 0.3509220376118313, + "grad_norm": 0.49436500668525696, + "learning_rate": 4.959950870593555e-06, + "loss": 0.5463, + "step": 3844 + }, + { + "epoch": 0.35101332846448785, + "grad_norm": 0.494128942489624, + "learning_rate": 4.9599295313129755e-06, + "loss": 0.6097, + "step": 3845 + }, + { + "epoch": 0.35110461931714443, + "grad_norm": 0.4362156391143799, + "learning_rate": 4.959908186394761e-06, + "loss": 0.6044, + "step": 3846 + }, + { + "epoch": 0.351195910169801, + "grad_norm": 0.48039698600769043, + "learning_rate": 4.959886835838959e-06, + "loss": 0.5749, + "step": 3847 + }, + { + "epoch": 0.35128720102245753, + "grad_norm": 0.5036129951477051, + "learning_rate": 4.95986547964562e-06, + "loss": 0.5959, + "step": 3848 + }, + { + "epoch": 0.3513784918751141, + "grad_norm": 0.43701934814453125, + "learning_rate": 4.959844117814792e-06, + "loss": 0.59, + "step": 3849 + }, + { + "epoch": 0.3514697827277707, + "grad_norm": 0.477152556180954, + "learning_rate": 4.959822750346523e-06, + "loss": 0.5923, + "step": 3850 + }, + { + "epoch": 0.35156107358042726, + "grad_norm": 0.4744645655155182, + "learning_rate": 4.959801377240863e-06, + "loss": 0.5974, + "step": 3851 + }, + { + "epoch": 0.3516523644330838, + "grad_norm": 0.5204263925552368, + "learning_rate": 4.959779998497861e-06, + "loss": 0.5919, + "step": 3852 + }, + { + "epoch": 0.35174365528574036, + "grad_norm": 0.48821425437927246, + "learning_rate": 4.959758614117567e-06, + "loss": 0.5796, + "step": 3853 + }, + { + "epoch": 0.35183494613839694, + "grad_norm": 0.4710003435611725, + "learning_rate": 4.959737224100028e-06, + "loss": 0.5932, + "step": 3854 + }, + { + "epoch": 0.3519262369910535, + "grad_norm": 0.5085816979408264, + "learning_rate": 4.959715828445294e-06, + "loss": 0.5214, + "step": 3855 + }, + { + "epoch": 0.35201752784371004, + "grad_norm": 0.5145008563995361, + "learning_rate": 4.959694427153414e-06, + "loss": 0.5952, + "step": 3856 + }, + { + "epoch": 0.3521088186963666, + "grad_norm": 0.43380552530288696, + "learning_rate": 4.959673020224437e-06, + "loss": 0.6409, + "step": 3857 + }, + { + "epoch": 0.3522001095490232, + "grad_norm": 0.4798407554626465, + "learning_rate": 4.959651607658412e-06, + "loss": 0.5666, + "step": 3858 + }, + { + "epoch": 0.3522914004016798, + "grad_norm": 0.5126152038574219, + "learning_rate": 4.959630189455389e-06, + "loss": 0.5865, + "step": 3859 + }, + { + "epoch": 0.3523826912543363, + "grad_norm": 0.4578690826892853, + "learning_rate": 4.9596087656154154e-06, + "loss": 0.599, + "step": 3860 + }, + { + "epoch": 0.3524739821069929, + "grad_norm": 0.43962362408638, + "learning_rate": 4.959587336138541e-06, + "loss": 0.6227, + "step": 3861 + }, + { + "epoch": 0.35256527295964946, + "grad_norm": 0.4451289474964142, + "learning_rate": 4.959565901024816e-06, + "loss": 0.5856, + "step": 3862 + }, + { + "epoch": 0.35265656381230603, + "grad_norm": 0.4578063189983368, + "learning_rate": 4.959544460274288e-06, + "loss": 0.6038, + "step": 3863 + }, + { + "epoch": 0.35274785466496256, + "grad_norm": 0.4672562777996063, + "learning_rate": 4.959523013887006e-06, + "loss": 0.6015, + "step": 3864 + }, + { + "epoch": 0.35283914551761913, + "grad_norm": 0.5421310663223267, + "learning_rate": 4.959501561863021e-06, + "loss": 0.5648, + "step": 3865 + }, + { + "epoch": 0.3529304363702757, + "grad_norm": 0.4598354995250702, + "learning_rate": 4.959480104202381e-06, + "loss": 0.5777, + "step": 3866 + }, + { + "epoch": 0.35302172722293224, + "grad_norm": 0.5092676877975464, + "learning_rate": 4.959458640905135e-06, + "loss": 0.5899, + "step": 3867 + }, + { + "epoch": 0.3531130180755888, + "grad_norm": 0.4649612605571747, + "learning_rate": 4.959437171971332e-06, + "loss": 0.6109, + "step": 3868 + }, + { + "epoch": 0.3532043089282454, + "grad_norm": 0.4649731516838074, + "learning_rate": 4.959415697401022e-06, + "loss": 0.5548, + "step": 3869 + }, + { + "epoch": 0.35329559978090197, + "grad_norm": 0.46200326085090637, + "learning_rate": 4.959394217194254e-06, + "loss": 0.5999, + "step": 3870 + }, + { + "epoch": 0.3533868906335585, + "grad_norm": 0.5050727128982544, + "learning_rate": 4.959372731351076e-06, + "loss": 0.5786, + "step": 3871 + }, + { + "epoch": 0.35347818148621507, + "grad_norm": 0.4828548729419708, + "learning_rate": 4.9593512398715395e-06, + "loss": 0.6179, + "step": 3872 + }, + { + "epoch": 0.35356947233887165, + "grad_norm": 0.4844953119754791, + "learning_rate": 4.959329742755692e-06, + "loss": 0.5937, + "step": 3873 + }, + { + "epoch": 0.3536607631915282, + "grad_norm": 0.5077633261680603, + "learning_rate": 4.959308240003584e-06, + "loss": 0.5695, + "step": 3874 + }, + { + "epoch": 0.35375205404418475, + "grad_norm": 0.46700894832611084, + "learning_rate": 4.959286731615264e-06, + "loss": 0.5849, + "step": 3875 + }, + { + "epoch": 0.3538433448968413, + "grad_norm": 0.4647826552391052, + "learning_rate": 4.9592652175907805e-06, + "loss": 0.5838, + "step": 3876 + }, + { + "epoch": 0.3539346357494979, + "grad_norm": 0.48787814378738403, + "learning_rate": 4.959243697930184e-06, + "loss": 0.5671, + "step": 3877 + }, + { + "epoch": 0.3540259266021545, + "grad_norm": 0.4941917955875397, + "learning_rate": 4.959222172633523e-06, + "loss": 0.5701, + "step": 3878 + }, + { + "epoch": 0.354117217454811, + "grad_norm": 0.4568725824356079, + "learning_rate": 4.959200641700849e-06, + "loss": 0.5913, + "step": 3879 + }, + { + "epoch": 0.3542085083074676, + "grad_norm": 0.47490647435188293, + "learning_rate": 4.959179105132208e-06, + "loss": 0.6037, + "step": 3880 + }, + { + "epoch": 0.35429979916012416, + "grad_norm": 0.46447989344596863, + "learning_rate": 4.959157562927653e-06, + "loss": 0.6145, + "step": 3881 + }, + { + "epoch": 0.35439109001278074, + "grad_norm": 0.4873661696910858, + "learning_rate": 4.9591360150872295e-06, + "loss": 0.5852, + "step": 3882 + }, + { + "epoch": 0.35448238086543726, + "grad_norm": 0.5033969283103943, + "learning_rate": 4.959114461610989e-06, + "loss": 0.5972, + "step": 3883 + }, + { + "epoch": 0.35457367171809384, + "grad_norm": 0.4943122863769531, + "learning_rate": 4.959092902498981e-06, + "loss": 0.6007, + "step": 3884 + }, + { + "epoch": 0.3546649625707504, + "grad_norm": 0.46308279037475586, + "learning_rate": 4.959071337751255e-06, + "loss": 0.5921, + "step": 3885 + }, + { + "epoch": 0.354756253423407, + "grad_norm": 0.46245139837265015, + "learning_rate": 4.95904976736786e-06, + "loss": 0.5941, + "step": 3886 + }, + { + "epoch": 0.3548475442760635, + "grad_norm": 0.4895167350769043, + "learning_rate": 4.959028191348844e-06, + "loss": 0.5657, + "step": 3887 + }, + { + "epoch": 0.3549388351287201, + "grad_norm": 0.4543275833129883, + "learning_rate": 4.9590066096942596e-06, + "loss": 0.6005, + "step": 3888 + }, + { + "epoch": 0.3550301259813767, + "grad_norm": 0.4516284763813019, + "learning_rate": 4.9589850224041545e-06, + "loss": 0.6252, + "step": 3889 + }, + { + "epoch": 0.35512141683403325, + "grad_norm": 0.5095881819725037, + "learning_rate": 4.9589634294785775e-06, + "loss": 0.5617, + "step": 3890 + }, + { + "epoch": 0.3552127076866898, + "grad_norm": 0.4500885307788849, + "learning_rate": 4.9589418309175785e-06, + "loss": 0.6221, + "step": 3891 + }, + { + "epoch": 0.35530399853934636, + "grad_norm": 0.4449770748615265, + "learning_rate": 4.958920226721208e-06, + "loss": 0.6139, + "step": 3892 + }, + { + "epoch": 0.35539528939200293, + "grad_norm": 0.4896416962146759, + "learning_rate": 4.958898616889515e-06, + "loss": 0.5649, + "step": 3893 + }, + { + "epoch": 0.3554865802446595, + "grad_norm": 0.4541930556297302, + "learning_rate": 4.958877001422548e-06, + "loss": 0.6045, + "step": 3894 + }, + { + "epoch": 0.35557787109731603, + "grad_norm": 0.4719550609588623, + "learning_rate": 4.9588553803203584e-06, + "loss": 0.5717, + "step": 3895 + }, + { + "epoch": 0.3556691619499726, + "grad_norm": 0.4582492411136627, + "learning_rate": 4.958833753582995e-06, + "loss": 0.5743, + "step": 3896 + }, + { + "epoch": 0.3557604528026292, + "grad_norm": 0.4860735535621643, + "learning_rate": 4.958812121210506e-06, + "loss": 0.5891, + "step": 3897 + }, + { + "epoch": 0.3558517436552857, + "grad_norm": 0.4763486683368683, + "learning_rate": 4.958790483202943e-06, + "loss": 0.594, + "step": 3898 + }, + { + "epoch": 0.3559430345079423, + "grad_norm": 0.4638172686100006, + "learning_rate": 4.958768839560354e-06, + "loss": 0.597, + "step": 3899 + }, + { + "epoch": 0.35603432536059887, + "grad_norm": 0.4780596196651459, + "learning_rate": 4.95874719028279e-06, + "loss": 0.547, + "step": 3900 + }, + { + "epoch": 0.35612561621325545, + "grad_norm": 0.4698801636695862, + "learning_rate": 4.958725535370299e-06, + "loss": 0.6064, + "step": 3901 + }, + { + "epoch": 0.35621690706591197, + "grad_norm": 0.48408079147338867, + "learning_rate": 4.958703874822932e-06, + "loss": 0.6121, + "step": 3902 + }, + { + "epoch": 0.35630819791856855, + "grad_norm": 0.4693945348262787, + "learning_rate": 4.958682208640738e-06, + "loss": 0.5777, + "step": 3903 + }, + { + "epoch": 0.3563994887712251, + "grad_norm": 0.47975945472717285, + "learning_rate": 4.958660536823767e-06, + "loss": 0.5753, + "step": 3904 + }, + { + "epoch": 0.3564907796238817, + "grad_norm": 0.5034552812576294, + "learning_rate": 4.958638859372068e-06, + "loss": 0.6018, + "step": 3905 + }, + { + "epoch": 0.3565820704765382, + "grad_norm": 0.503547728061676, + "learning_rate": 4.958617176285692e-06, + "loss": 0.5789, + "step": 3906 + }, + { + "epoch": 0.3566733613291948, + "grad_norm": 0.4827102720737457, + "learning_rate": 4.958595487564688e-06, + "loss": 0.5486, + "step": 3907 + }, + { + "epoch": 0.3567646521818514, + "grad_norm": 0.4560181796550751, + "learning_rate": 4.958573793209105e-06, + "loss": 0.6121, + "step": 3908 + }, + { + "epoch": 0.35685594303450796, + "grad_norm": 0.5095345377922058, + "learning_rate": 4.958552093218994e-06, + "loss": 0.5686, + "step": 3909 + }, + { + "epoch": 0.3569472338871645, + "grad_norm": 0.5080133080482483, + "learning_rate": 4.958530387594402e-06, + "loss": 0.5519, + "step": 3910 + }, + { + "epoch": 0.35703852473982106, + "grad_norm": 0.47303506731987, + "learning_rate": 4.958508676335383e-06, + "loss": 0.5913, + "step": 3911 + }, + { + "epoch": 0.35712981559247764, + "grad_norm": 0.45971670746803284, + "learning_rate": 4.958486959441983e-06, + "loss": 0.6035, + "step": 3912 + }, + { + "epoch": 0.3572211064451342, + "grad_norm": 0.4261341691017151, + "learning_rate": 4.958465236914254e-06, + "loss": 0.5925, + "step": 3913 + }, + { + "epoch": 0.35731239729779074, + "grad_norm": 0.506766676902771, + "learning_rate": 4.958443508752245e-06, + "loss": 0.5472, + "step": 3914 + }, + { + "epoch": 0.3574036881504473, + "grad_norm": 0.4643999934196472, + "learning_rate": 4.958421774956006e-06, + "loss": 0.5707, + "step": 3915 + }, + { + "epoch": 0.3574949790031039, + "grad_norm": 0.4182972013950348, + "learning_rate": 4.9584000355255865e-06, + "loss": 0.6304, + "step": 3916 + }, + { + "epoch": 0.3575862698557605, + "grad_norm": 0.49351057410240173, + "learning_rate": 4.958378290461036e-06, + "loss": 0.5773, + "step": 3917 + }, + { + "epoch": 0.357677560708417, + "grad_norm": 0.43865907192230225, + "learning_rate": 4.9583565397624066e-06, + "loss": 0.6061, + "step": 3918 + }, + { + "epoch": 0.3577688515610736, + "grad_norm": 0.4627051055431366, + "learning_rate": 4.958334783429745e-06, + "loss": 0.5929, + "step": 3919 + }, + { + "epoch": 0.35786014241373015, + "grad_norm": 0.47412076592445374, + "learning_rate": 4.958313021463103e-06, + "loss": 0.5447, + "step": 3920 + }, + { + "epoch": 0.35795143326638673, + "grad_norm": 0.4635826647281647, + "learning_rate": 4.958291253862529e-06, + "loss": 0.5646, + "step": 3921 + }, + { + "epoch": 0.35804272411904325, + "grad_norm": 0.4985710680484772, + "learning_rate": 4.958269480628074e-06, + "loss": 0.5545, + "step": 3922 + }, + { + "epoch": 0.35813401497169983, + "grad_norm": 0.44305771589279175, + "learning_rate": 4.958247701759789e-06, + "loss": 0.5962, + "step": 3923 + }, + { + "epoch": 0.3582253058243564, + "grad_norm": 0.4762422442436218, + "learning_rate": 4.958225917257721e-06, + "loss": 0.5432, + "step": 3924 + }, + { + "epoch": 0.358316596677013, + "grad_norm": 0.46458327770233154, + "learning_rate": 4.958204127121922e-06, + "loss": 0.6158, + "step": 3925 + }, + { + "epoch": 0.3584078875296695, + "grad_norm": 0.46790188550949097, + "learning_rate": 4.9581823313524426e-06, + "loss": 0.5905, + "step": 3926 + }, + { + "epoch": 0.3584991783823261, + "grad_norm": 0.4528592824935913, + "learning_rate": 4.9581605299493305e-06, + "loss": 0.6315, + "step": 3927 + }, + { + "epoch": 0.35859046923498267, + "grad_norm": 0.4473235011100769, + "learning_rate": 4.958138722912637e-06, + "loss": 0.634, + "step": 3928 + }, + { + "epoch": 0.35868176008763925, + "grad_norm": 0.5125882625579834, + "learning_rate": 4.958116910242412e-06, + "loss": 0.5685, + "step": 3929 + }, + { + "epoch": 0.35877305094029577, + "grad_norm": 0.44996213912963867, + "learning_rate": 4.958095091938705e-06, + "loss": 0.6296, + "step": 3930 + }, + { + "epoch": 0.35886434179295235, + "grad_norm": 0.49202314019203186, + "learning_rate": 4.958073268001567e-06, + "loss": 0.5866, + "step": 3931 + }, + { + "epoch": 0.3589556326456089, + "grad_norm": 0.47068554162979126, + "learning_rate": 4.958051438431048e-06, + "loss": 0.5977, + "step": 3932 + }, + { + "epoch": 0.35904692349826545, + "grad_norm": 0.428131639957428, + "learning_rate": 4.958029603227196e-06, + "loss": 0.5875, + "step": 3933 + }, + { + "epoch": 0.359138214350922, + "grad_norm": 0.4852549433708191, + "learning_rate": 4.958007762390063e-06, + "loss": 0.5872, + "step": 3934 + }, + { + "epoch": 0.3592295052035786, + "grad_norm": 0.4707546830177307, + "learning_rate": 4.957985915919698e-06, + "loss": 0.591, + "step": 3935 + }, + { + "epoch": 0.3593207960562352, + "grad_norm": 0.45688915252685547, + "learning_rate": 4.957964063816152e-06, + "loss": 0.6313, + "step": 3936 + }, + { + "epoch": 0.3594120869088917, + "grad_norm": 0.4668893814086914, + "learning_rate": 4.957942206079475e-06, + "loss": 0.5936, + "step": 3937 + }, + { + "epoch": 0.3595033777615483, + "grad_norm": 0.5024162530899048, + "learning_rate": 4.957920342709716e-06, + "loss": 0.6007, + "step": 3938 + }, + { + "epoch": 0.35959466861420486, + "grad_norm": 0.47144848108291626, + "learning_rate": 4.957898473706927e-06, + "loss": 0.5644, + "step": 3939 + }, + { + "epoch": 0.35968595946686144, + "grad_norm": 0.48822590708732605, + "learning_rate": 4.957876599071156e-06, + "loss": 0.5805, + "step": 3940 + }, + { + "epoch": 0.35977725031951796, + "grad_norm": 0.45049938559532166, + "learning_rate": 4.957854718802454e-06, + "loss": 0.6026, + "step": 3941 + }, + { + "epoch": 0.35986854117217454, + "grad_norm": 0.4949943423271179, + "learning_rate": 4.957832832900872e-06, + "loss": 0.6216, + "step": 3942 + }, + { + "epoch": 0.3599598320248311, + "grad_norm": 0.4705745279788971, + "learning_rate": 4.957810941366459e-06, + "loss": 0.5996, + "step": 3943 + }, + { + "epoch": 0.3600511228774877, + "grad_norm": 0.47249963879585266, + "learning_rate": 4.957789044199265e-06, + "loss": 0.5717, + "step": 3944 + }, + { + "epoch": 0.3601424137301442, + "grad_norm": 0.4500087797641754, + "learning_rate": 4.9577671413993425e-06, + "loss": 0.5633, + "step": 3945 + }, + { + "epoch": 0.3602337045828008, + "grad_norm": 0.48166951537132263, + "learning_rate": 4.957745232966739e-06, + "loss": 0.6061, + "step": 3946 + }, + { + "epoch": 0.3603249954354574, + "grad_norm": 0.48146331310272217, + "learning_rate": 4.957723318901505e-06, + "loss": 0.5626, + "step": 3947 + }, + { + "epoch": 0.36041628628811395, + "grad_norm": 0.4722738564014435, + "learning_rate": 4.957701399203692e-06, + "loss": 0.6039, + "step": 3948 + }, + { + "epoch": 0.3605075771407705, + "grad_norm": 0.47061631083488464, + "learning_rate": 4.957679473873349e-06, + "loss": 0.5874, + "step": 3949 + }, + { + "epoch": 0.36059886799342705, + "grad_norm": 0.4869627058506012, + "learning_rate": 4.957657542910528e-06, + "loss": 0.5978, + "step": 3950 + }, + { + "epoch": 0.36069015884608363, + "grad_norm": 0.4319295585155487, + "learning_rate": 4.957635606315278e-06, + "loss": 0.6149, + "step": 3951 + }, + { + "epoch": 0.3607814496987402, + "grad_norm": 0.48752257227897644, + "learning_rate": 4.957613664087649e-06, + "loss": 0.5729, + "step": 3952 + }, + { + "epoch": 0.36087274055139673, + "grad_norm": 0.520550549030304, + "learning_rate": 4.957591716227691e-06, + "loss": 0.5723, + "step": 3953 + }, + { + "epoch": 0.3609640314040533, + "grad_norm": 0.5182803273200989, + "learning_rate": 4.957569762735456e-06, + "loss": 0.5482, + "step": 3954 + }, + { + "epoch": 0.3610553222567099, + "grad_norm": 0.5246379375457764, + "learning_rate": 4.957547803610993e-06, + "loss": 0.581, + "step": 3955 + }, + { + "epoch": 0.36114661310936647, + "grad_norm": 0.46552300453186035, + "learning_rate": 4.9575258388543525e-06, + "loss": 0.611, + "step": 3956 + }, + { + "epoch": 0.361237903962023, + "grad_norm": 0.4953038692474365, + "learning_rate": 4.957503868465585e-06, + "loss": 0.5527, + "step": 3957 + }, + { + "epoch": 0.36132919481467957, + "grad_norm": 0.48176074028015137, + "learning_rate": 4.957481892444741e-06, + "loss": 0.5891, + "step": 3958 + }, + { + "epoch": 0.36142048566733614, + "grad_norm": 0.46445247530937195, + "learning_rate": 4.957459910791871e-06, + "loss": 0.5637, + "step": 3959 + }, + { + "epoch": 0.3615117765199927, + "grad_norm": 0.49617722630500793, + "learning_rate": 4.957437923507024e-06, + "loss": 0.608, + "step": 3960 + }, + { + "epoch": 0.36160306737264925, + "grad_norm": 0.4756810665130615, + "learning_rate": 4.9574159305902525e-06, + "loss": 0.6065, + "step": 3961 + }, + { + "epoch": 0.3616943582253058, + "grad_norm": 0.47266700863838196, + "learning_rate": 4.957393932041606e-06, + "loss": 0.6067, + "step": 3962 + }, + { + "epoch": 0.3617856490779624, + "grad_norm": 0.49293583631515503, + "learning_rate": 4.957371927861134e-06, + "loss": 0.6028, + "step": 3963 + }, + { + "epoch": 0.361876939930619, + "grad_norm": 0.49437108635902405, + "learning_rate": 4.957349918048888e-06, + "loss": 0.5709, + "step": 3964 + }, + { + "epoch": 0.3619682307832755, + "grad_norm": 0.4785449504852295, + "learning_rate": 4.9573279026049185e-06, + "loss": 0.6088, + "step": 3965 + }, + { + "epoch": 0.3620595216359321, + "grad_norm": 0.46001318097114563, + "learning_rate": 4.957305881529275e-06, + "loss": 0.5992, + "step": 3966 + }, + { + "epoch": 0.36215081248858866, + "grad_norm": 0.48025983572006226, + "learning_rate": 4.957283854822009e-06, + "loss": 0.6489, + "step": 3967 + }, + { + "epoch": 0.3622421033412452, + "grad_norm": 0.5139955878257751, + "learning_rate": 4.95726182248317e-06, + "loss": 0.5757, + "step": 3968 + }, + { + "epoch": 0.36233339419390176, + "grad_norm": 0.4716361165046692, + "learning_rate": 4.95723978451281e-06, + "loss": 0.5904, + "step": 3969 + }, + { + "epoch": 0.36242468504655834, + "grad_norm": 0.45158612728118896, + "learning_rate": 4.957217740910978e-06, + "loss": 0.594, + "step": 3970 + }, + { + "epoch": 0.3625159758992149, + "grad_norm": 0.4971327781677246, + "learning_rate": 4.957195691677725e-06, + "loss": 0.5552, + "step": 3971 + }, + { + "epoch": 0.36260726675187144, + "grad_norm": 0.5327549576759338, + "learning_rate": 4.957173636813102e-06, + "loss": 0.5608, + "step": 3972 + }, + { + "epoch": 0.362698557604528, + "grad_norm": 0.46983838081359863, + "learning_rate": 4.957151576317158e-06, + "loss": 0.6035, + "step": 3973 + }, + { + "epoch": 0.3627898484571846, + "grad_norm": 0.4396279454231262, + "learning_rate": 4.957129510189946e-06, + "loss": 0.6298, + "step": 3974 + }, + { + "epoch": 0.36288113930984117, + "grad_norm": 0.4841189980506897, + "learning_rate": 4.9571074384315155e-06, + "loss": 0.5926, + "step": 3975 + }, + { + "epoch": 0.3629724301624977, + "grad_norm": 0.4857192933559418, + "learning_rate": 4.9570853610419155e-06, + "loss": 0.5769, + "step": 3976 + }, + { + "epoch": 0.3630637210151543, + "grad_norm": 0.4546220600605011, + "learning_rate": 4.957063278021199e-06, + "loss": 0.5471, + "step": 3977 + }, + { + "epoch": 0.36315501186781085, + "grad_norm": 0.47091275453567505, + "learning_rate": 4.957041189369415e-06, + "loss": 0.5941, + "step": 3978 + }, + { + "epoch": 0.36324630272046743, + "grad_norm": 0.48899269104003906, + "learning_rate": 4.957019095086615e-06, + "loss": 0.5727, + "step": 3979 + }, + { + "epoch": 0.36333759357312395, + "grad_norm": 0.4635283648967743, + "learning_rate": 4.956996995172849e-06, + "loss": 0.5688, + "step": 3980 + }, + { + "epoch": 0.36342888442578053, + "grad_norm": 0.4616434574127197, + "learning_rate": 4.956974889628169e-06, + "loss": 0.597, + "step": 3981 + }, + { + "epoch": 0.3635201752784371, + "grad_norm": 0.46884509921073914, + "learning_rate": 4.956952778452624e-06, + "loss": 0.5714, + "step": 3982 + }, + { + "epoch": 0.3636114661310937, + "grad_norm": 0.5217846632003784, + "learning_rate": 4.956930661646264e-06, + "loss": 0.5802, + "step": 3983 + }, + { + "epoch": 0.3637027569837502, + "grad_norm": 0.4572369158267975, + "learning_rate": 4.956908539209142e-06, + "loss": 0.6298, + "step": 3984 + }, + { + "epoch": 0.3637940478364068, + "grad_norm": 0.45352959632873535, + "learning_rate": 4.956886411141309e-06, + "loss": 0.5965, + "step": 3985 + }, + { + "epoch": 0.36388533868906336, + "grad_norm": 0.4550169110298157, + "learning_rate": 4.9568642774428126e-06, + "loss": 0.6153, + "step": 3986 + }, + { + "epoch": 0.36397662954171994, + "grad_norm": 0.5133897662162781, + "learning_rate": 4.9568421381137064e-06, + "loss": 0.5859, + "step": 3987 + }, + { + "epoch": 0.36406792039437647, + "grad_norm": 0.4644145369529724, + "learning_rate": 4.956819993154039e-06, + "loss": 0.6076, + "step": 3988 + }, + { + "epoch": 0.36415921124703304, + "grad_norm": 0.4875529706478119, + "learning_rate": 4.956797842563863e-06, + "loss": 0.5735, + "step": 3989 + }, + { + "epoch": 0.3642505020996896, + "grad_norm": 0.49935489892959595, + "learning_rate": 4.9567756863432286e-06, + "loss": 0.5718, + "step": 3990 + }, + { + "epoch": 0.3643417929523462, + "grad_norm": 0.4872485399246216, + "learning_rate": 4.956753524492186e-06, + "loss": 0.5851, + "step": 3991 + }, + { + "epoch": 0.3644330838050027, + "grad_norm": 0.4760178327560425, + "learning_rate": 4.956731357010787e-06, + "loss": 0.5906, + "step": 3992 + }, + { + "epoch": 0.3645243746576593, + "grad_norm": 0.5016440153121948, + "learning_rate": 4.956709183899081e-06, + "loss": 0.5909, + "step": 3993 + }, + { + "epoch": 0.3646156655103159, + "grad_norm": 0.42638102173805237, + "learning_rate": 4.956687005157119e-06, + "loss": 0.6074, + "step": 3994 + }, + { + "epoch": 0.36470695636297246, + "grad_norm": 0.4984687566757202, + "learning_rate": 4.956664820784953e-06, + "loss": 0.5937, + "step": 3995 + }, + { + "epoch": 0.364798247215629, + "grad_norm": 0.4832565188407898, + "learning_rate": 4.956642630782635e-06, + "loss": 0.5851, + "step": 3996 + }, + { + "epoch": 0.36488953806828556, + "grad_norm": 0.5164192318916321, + "learning_rate": 4.956620435150211e-06, + "loss": 0.5504, + "step": 3997 + }, + { + "epoch": 0.36498082892094214, + "grad_norm": 0.4550762474536896, + "learning_rate": 4.956598233887737e-06, + "loss": 0.5957, + "step": 3998 + }, + { + "epoch": 0.36507211977359866, + "grad_norm": 0.4777418375015259, + "learning_rate": 4.956576026995261e-06, + "loss": 0.6081, + "step": 3999 + }, + { + "epoch": 0.36516341062625524, + "grad_norm": 0.4658077657222748, + "learning_rate": 4.9565538144728355e-06, + "loss": 0.5705, + "step": 4000 + }, + { + "epoch": 0.3652547014789118, + "grad_norm": 0.4798206388950348, + "learning_rate": 4.95653159632051e-06, + "loss": 0.5973, + "step": 4001 + }, + { + "epoch": 0.3653459923315684, + "grad_norm": 0.4764616787433624, + "learning_rate": 4.9565093725383365e-06, + "loss": 0.588, + "step": 4002 + }, + { + "epoch": 0.3654372831842249, + "grad_norm": 0.44750699400901794, + "learning_rate": 4.956487143126365e-06, + "loss": 0.6379, + "step": 4003 + }, + { + "epoch": 0.3655285740368815, + "grad_norm": 0.48099711537361145, + "learning_rate": 4.9564649080846475e-06, + "loss": 0.6095, + "step": 4004 + }, + { + "epoch": 0.36561986488953807, + "grad_norm": 0.5036926865577698, + "learning_rate": 4.956442667413234e-06, + "loss": 0.5706, + "step": 4005 + }, + { + "epoch": 0.36571115574219465, + "grad_norm": 0.4505996108055115, + "learning_rate": 4.956420421112176e-06, + "loss": 0.5641, + "step": 4006 + }, + { + "epoch": 0.36580244659485117, + "grad_norm": 0.45471295714378357, + "learning_rate": 4.956398169181524e-06, + "loss": 0.6438, + "step": 4007 + }, + { + "epoch": 0.36589373744750775, + "grad_norm": 0.46649619936943054, + "learning_rate": 4.956375911621331e-06, + "loss": 0.6094, + "step": 4008 + }, + { + "epoch": 0.36598502830016433, + "grad_norm": 0.5038893222808838, + "learning_rate": 4.956353648431645e-06, + "loss": 0.5662, + "step": 4009 + }, + { + "epoch": 0.3660763191528209, + "grad_norm": 0.4886850416660309, + "learning_rate": 4.956331379612518e-06, + "loss": 0.5659, + "step": 4010 + }, + { + "epoch": 0.36616761000547743, + "grad_norm": 0.4627973735332489, + "learning_rate": 4.956309105164002e-06, + "loss": 0.6255, + "step": 4011 + }, + { + "epoch": 0.366258900858134, + "grad_norm": 0.4989151954650879, + "learning_rate": 4.956286825086148e-06, + "loss": 0.5709, + "step": 4012 + }, + { + "epoch": 0.3663501917107906, + "grad_norm": 0.4627581238746643, + "learning_rate": 4.956264539379005e-06, + "loss": 0.6066, + "step": 4013 + }, + { + "epoch": 0.36644148256344716, + "grad_norm": 0.4511900246143341, + "learning_rate": 4.956242248042626e-06, + "loss": 0.609, + "step": 4014 + }, + { + "epoch": 0.3665327734161037, + "grad_norm": 0.4676448702812195, + "learning_rate": 4.956219951077062e-06, + "loss": 0.598, + "step": 4015 + }, + { + "epoch": 0.36662406426876026, + "grad_norm": 0.462571918964386, + "learning_rate": 4.956197648482365e-06, + "loss": 0.5813, + "step": 4016 + }, + { + "epoch": 0.36671535512141684, + "grad_norm": 0.47500020265579224, + "learning_rate": 4.956175340258584e-06, + "loss": 0.5818, + "step": 4017 + }, + { + "epoch": 0.3668066459740734, + "grad_norm": 0.542279064655304, + "learning_rate": 4.956153026405771e-06, + "loss": 0.5312, + "step": 4018 + }, + { + "epoch": 0.36689793682672994, + "grad_norm": 0.4591880738735199, + "learning_rate": 4.9561307069239775e-06, + "loss": 0.6211, + "step": 4019 + }, + { + "epoch": 0.3669892276793865, + "grad_norm": 0.4831666350364685, + "learning_rate": 4.956108381813254e-06, + "loss": 0.5672, + "step": 4020 + }, + { + "epoch": 0.3670805185320431, + "grad_norm": 0.4750502407550812, + "learning_rate": 4.956086051073651e-06, + "loss": 0.5423, + "step": 4021 + }, + { + "epoch": 0.3671718093846997, + "grad_norm": 0.5026420950889587, + "learning_rate": 4.956063714705222e-06, + "loss": 0.5773, + "step": 4022 + }, + { + "epoch": 0.3672631002373562, + "grad_norm": 0.4591178894042969, + "learning_rate": 4.9560413727080164e-06, + "loss": 0.5985, + "step": 4023 + }, + { + "epoch": 0.3673543910900128, + "grad_norm": 0.4721660912036896, + "learning_rate": 4.956019025082086e-06, + "loss": 0.5901, + "step": 4024 + }, + { + "epoch": 0.36744568194266936, + "grad_norm": 0.4807828962802887, + "learning_rate": 4.9559966718274825e-06, + "loss": 0.5653, + "step": 4025 + }, + { + "epoch": 0.36753697279532593, + "grad_norm": 0.44985976815223694, + "learning_rate": 4.955974312944255e-06, + "loss": 0.5521, + "step": 4026 + }, + { + "epoch": 0.36762826364798246, + "grad_norm": 0.4743938446044922, + "learning_rate": 4.955951948432457e-06, + "loss": 0.5627, + "step": 4027 + }, + { + "epoch": 0.36771955450063903, + "grad_norm": 0.472163587808609, + "learning_rate": 4.955929578292139e-06, + "loss": 0.6015, + "step": 4028 + }, + { + "epoch": 0.3678108453532956, + "grad_norm": 0.48358938097953796, + "learning_rate": 4.955907202523352e-06, + "loss": 0.605, + "step": 4029 + }, + { + "epoch": 0.3679021362059522, + "grad_norm": 0.49314218759536743, + "learning_rate": 4.955884821126149e-06, + "loss": 0.5647, + "step": 4030 + }, + { + "epoch": 0.3679934270586087, + "grad_norm": 0.4899247884750366, + "learning_rate": 4.955862434100578e-06, + "loss": 0.5876, + "step": 4031 + }, + { + "epoch": 0.3680847179112653, + "grad_norm": 0.4736441969871521, + "learning_rate": 4.955840041446693e-06, + "loss": 0.6104, + "step": 4032 + }, + { + "epoch": 0.36817600876392187, + "grad_norm": 0.4642449617385864, + "learning_rate": 4.955817643164544e-06, + "loss": 0.6077, + "step": 4033 + }, + { + "epoch": 0.3682672996165784, + "grad_norm": 0.47506216168403625, + "learning_rate": 4.955795239254183e-06, + "loss": 0.5403, + "step": 4034 + }, + { + "epoch": 0.36835859046923497, + "grad_norm": 0.47725579142570496, + "learning_rate": 4.955772829715661e-06, + "loss": 0.6384, + "step": 4035 + }, + { + "epoch": 0.36844988132189155, + "grad_norm": 0.49555686116218567, + "learning_rate": 4.955750414549031e-06, + "loss": 0.5839, + "step": 4036 + }, + { + "epoch": 0.3685411721745481, + "grad_norm": 0.5239980816841125, + "learning_rate": 4.955727993754341e-06, + "loss": 0.5985, + "step": 4037 + }, + { + "epoch": 0.36863246302720465, + "grad_norm": 0.483662486076355, + "learning_rate": 4.955705567331644e-06, + "loss": 0.5673, + "step": 4038 + }, + { + "epoch": 0.3687237538798612, + "grad_norm": 0.4224509000778198, + "learning_rate": 4.955683135280993e-06, + "loss": 0.6156, + "step": 4039 + }, + { + "epoch": 0.3688150447325178, + "grad_norm": 0.48595383763313293, + "learning_rate": 4.955660697602438e-06, + "loss": 0.5967, + "step": 4040 + }, + { + "epoch": 0.3689063355851744, + "grad_norm": 0.4856463670730591, + "learning_rate": 4.95563825429603e-06, + "loss": 0.5788, + "step": 4041 + }, + { + "epoch": 0.3689976264378309, + "grad_norm": 0.4820491671562195, + "learning_rate": 4.955615805361821e-06, + "loss": 0.6134, + "step": 4042 + }, + { + "epoch": 0.3690889172904875, + "grad_norm": 0.4579945504665375, + "learning_rate": 4.955593350799862e-06, + "loss": 0.5755, + "step": 4043 + }, + { + "epoch": 0.36918020814314406, + "grad_norm": 0.5118858814239502, + "learning_rate": 4.955570890610206e-06, + "loss": 0.5309, + "step": 4044 + }, + { + "epoch": 0.36927149899580064, + "grad_norm": 0.48799535632133484, + "learning_rate": 4.955548424792902e-06, + "loss": 0.552, + "step": 4045 + }, + { + "epoch": 0.36936278984845716, + "grad_norm": 0.4769740700721741, + "learning_rate": 4.955525953348004e-06, + "loss": 0.6306, + "step": 4046 + }, + { + "epoch": 0.36945408070111374, + "grad_norm": 0.49778929352760315, + "learning_rate": 4.955503476275561e-06, + "loss": 0.5938, + "step": 4047 + }, + { + "epoch": 0.3695453715537703, + "grad_norm": 0.47368910908699036, + "learning_rate": 4.955480993575627e-06, + "loss": 0.6112, + "step": 4048 + }, + { + "epoch": 0.3696366624064269, + "grad_norm": 0.4647848606109619, + "learning_rate": 4.955458505248251e-06, + "loss": 0.5724, + "step": 4049 + }, + { + "epoch": 0.3697279532590834, + "grad_norm": 0.45623916387557983, + "learning_rate": 4.955436011293487e-06, + "loss": 0.5895, + "step": 4050 + }, + { + "epoch": 0.36981924411174, + "grad_norm": 0.47491419315338135, + "learning_rate": 4.955413511711385e-06, + "loss": 0.5713, + "step": 4051 + }, + { + "epoch": 0.3699105349643966, + "grad_norm": 0.5005938410758972, + "learning_rate": 4.955391006501997e-06, + "loss": 0.5992, + "step": 4052 + }, + { + "epoch": 0.37000182581705315, + "grad_norm": 0.463745653629303, + "learning_rate": 4.955368495665375e-06, + "loss": 0.6298, + "step": 4053 + }, + { + "epoch": 0.3700931166697097, + "grad_norm": 0.44855645298957825, + "learning_rate": 4.955345979201569e-06, + "loss": 0.5928, + "step": 4054 + }, + { + "epoch": 0.37018440752236625, + "grad_norm": 0.49715346097946167, + "learning_rate": 4.955323457110632e-06, + "loss": 0.5561, + "step": 4055 + }, + { + "epoch": 0.37027569837502283, + "grad_norm": 0.5065641403198242, + "learning_rate": 4.955300929392616e-06, + "loss": 0.5402, + "step": 4056 + }, + { + "epoch": 0.3703669892276794, + "grad_norm": 0.5058699250221252, + "learning_rate": 4.9552783960475725e-06, + "loss": 0.5857, + "step": 4057 + }, + { + "epoch": 0.37045828008033593, + "grad_norm": 0.4560822546482086, + "learning_rate": 4.955255857075551e-06, + "loss": 0.6143, + "step": 4058 + }, + { + "epoch": 0.3705495709329925, + "grad_norm": 0.4613572359085083, + "learning_rate": 4.955233312476606e-06, + "loss": 0.5854, + "step": 4059 + }, + { + "epoch": 0.3706408617856491, + "grad_norm": 0.46875301003456116, + "learning_rate": 4.955210762250788e-06, + "loss": 0.598, + "step": 4060 + }, + { + "epoch": 0.37073215263830567, + "grad_norm": 0.4776119589805603, + "learning_rate": 4.955188206398148e-06, + "loss": 0.5566, + "step": 4061 + }, + { + "epoch": 0.3708234434909622, + "grad_norm": 0.4427661895751953, + "learning_rate": 4.955165644918739e-06, + "loss": 0.6205, + "step": 4062 + }, + { + "epoch": 0.37091473434361877, + "grad_norm": 0.4402998983860016, + "learning_rate": 4.955143077812612e-06, + "loss": 0.6133, + "step": 4063 + }, + { + "epoch": 0.37100602519627535, + "grad_norm": 0.4459109604358673, + "learning_rate": 4.955120505079818e-06, + "loss": 0.5801, + "step": 4064 + }, + { + "epoch": 0.3710973160489319, + "grad_norm": 0.47550398111343384, + "learning_rate": 4.9550979267204105e-06, + "loss": 0.5908, + "step": 4065 + }, + { + "epoch": 0.37118860690158845, + "grad_norm": 0.4668654501438141, + "learning_rate": 4.955075342734439e-06, + "loss": 0.6031, + "step": 4066 + }, + { + "epoch": 0.371279897754245, + "grad_norm": 0.46809121966362, + "learning_rate": 4.955052753121958e-06, + "loss": 0.6176, + "step": 4067 + }, + { + "epoch": 0.3713711886069016, + "grad_norm": 0.4901549220085144, + "learning_rate": 4.955030157883017e-06, + "loss": 0.5946, + "step": 4068 + }, + { + "epoch": 0.3714624794595581, + "grad_norm": 0.43880054354667664, + "learning_rate": 4.9550075570176685e-06, + "loss": 0.616, + "step": 4069 + }, + { + "epoch": 0.3715537703122147, + "grad_norm": 0.46315181255340576, + "learning_rate": 4.954984950525964e-06, + "loss": 0.596, + "step": 4070 + }, + { + "epoch": 0.3716450611648713, + "grad_norm": 0.4494182765483856, + "learning_rate": 4.954962338407956e-06, + "loss": 0.5418, + "step": 4071 + }, + { + "epoch": 0.37173635201752786, + "grad_norm": 0.5359166860580444, + "learning_rate": 4.954939720663696e-06, + "loss": 0.5837, + "step": 4072 + }, + { + "epoch": 0.3718276428701844, + "grad_norm": 0.4757206439971924, + "learning_rate": 4.954917097293236e-06, + "loss": 0.5966, + "step": 4073 + }, + { + "epoch": 0.37191893372284096, + "grad_norm": 0.48943859338760376, + "learning_rate": 4.954894468296627e-06, + "loss": 0.6008, + "step": 4074 + }, + { + "epoch": 0.37201022457549754, + "grad_norm": 0.49660366773605347, + "learning_rate": 4.954871833673922e-06, + "loss": 0.5706, + "step": 4075 + }, + { + "epoch": 0.3721015154281541, + "grad_norm": 0.4588724374771118, + "learning_rate": 4.954849193425173e-06, + "loss": 0.6396, + "step": 4076 + }, + { + "epoch": 0.37219280628081064, + "grad_norm": 0.4799646735191345, + "learning_rate": 4.95482654755043e-06, + "loss": 0.6016, + "step": 4077 + }, + { + "epoch": 0.3722840971334672, + "grad_norm": 0.4961967468261719, + "learning_rate": 4.954803896049747e-06, + "loss": 0.5917, + "step": 4078 + }, + { + "epoch": 0.3723753879861238, + "grad_norm": 0.4847428500652313, + "learning_rate": 4.954781238923176e-06, + "loss": 0.6195, + "step": 4079 + }, + { + "epoch": 0.3724666788387804, + "grad_norm": 0.46948686242103577, + "learning_rate": 4.954758576170766e-06, + "loss": 0.6064, + "step": 4080 + }, + { + "epoch": 0.3725579696914369, + "grad_norm": 0.46913930773735046, + "learning_rate": 4.954735907792571e-06, + "loss": 0.5827, + "step": 4081 + }, + { + "epoch": 0.3726492605440935, + "grad_norm": 0.47374585270881653, + "learning_rate": 4.954713233788645e-06, + "loss": 0.5559, + "step": 4082 + }, + { + "epoch": 0.37274055139675005, + "grad_norm": 0.4804306626319885, + "learning_rate": 4.954690554159036e-06, + "loss": 0.5827, + "step": 4083 + }, + { + "epoch": 0.37283184224940663, + "grad_norm": 0.5057407021522522, + "learning_rate": 4.954667868903799e-06, + "loss": 0.5716, + "step": 4084 + }, + { + "epoch": 0.37292313310206315, + "grad_norm": 0.48006680607795715, + "learning_rate": 4.954645178022984e-06, + "loss": 0.5801, + "step": 4085 + }, + { + "epoch": 0.37301442395471973, + "grad_norm": 0.4578876197338104, + "learning_rate": 4.954622481516644e-06, + "loss": 0.5748, + "step": 4086 + }, + { + "epoch": 0.3731057148073763, + "grad_norm": 0.46862274408340454, + "learning_rate": 4.954599779384831e-06, + "loss": 0.6135, + "step": 4087 + }, + { + "epoch": 0.3731970056600329, + "grad_norm": 0.4856990575790405, + "learning_rate": 4.9545770716275975e-06, + "loss": 0.5737, + "step": 4088 + }, + { + "epoch": 0.3732882965126894, + "grad_norm": 0.47165223956108093, + "learning_rate": 4.954554358244994e-06, + "loss": 0.59, + "step": 4089 + }, + { + "epoch": 0.373379587365346, + "grad_norm": 0.47637006640434265, + "learning_rate": 4.954531639237074e-06, + "loss": 0.5705, + "step": 4090 + }, + { + "epoch": 0.37347087821800257, + "grad_norm": 0.4955317974090576, + "learning_rate": 4.9545089146038884e-06, + "loss": 0.5392, + "step": 4091 + }, + { + "epoch": 0.37356216907065914, + "grad_norm": 0.5256573557853699, + "learning_rate": 4.954486184345491e-06, + "loss": 0.5437, + "step": 4092 + }, + { + "epoch": 0.37365345992331567, + "grad_norm": 0.462755024433136, + "learning_rate": 4.954463448461932e-06, + "loss": 0.5403, + "step": 4093 + }, + { + "epoch": 0.37374475077597225, + "grad_norm": 0.5324056148529053, + "learning_rate": 4.954440706953265e-06, + "loss": 0.5688, + "step": 4094 + }, + { + "epoch": 0.3738360416286288, + "grad_norm": 0.47770294547080994, + "learning_rate": 4.954417959819541e-06, + "loss": 0.5819, + "step": 4095 + }, + { + "epoch": 0.3739273324812854, + "grad_norm": 0.49264585971832275, + "learning_rate": 4.954395207060812e-06, + "loss": 0.5915, + "step": 4096 + }, + { + "epoch": 0.3740186233339419, + "grad_norm": 0.519729495048523, + "learning_rate": 4.954372448677131e-06, + "loss": 0.5932, + "step": 4097 + }, + { + "epoch": 0.3741099141865985, + "grad_norm": 0.5277784466743469, + "learning_rate": 4.954349684668551e-06, + "loss": 0.5859, + "step": 4098 + }, + { + "epoch": 0.3742012050392551, + "grad_norm": 0.48503217101097107, + "learning_rate": 4.954326915035122e-06, + "loss": 0.5555, + "step": 4099 + }, + { + "epoch": 0.3742924958919116, + "grad_norm": 0.4882400631904602, + "learning_rate": 4.954304139776898e-06, + "loss": 0.5953, + "step": 4100 + }, + { + "epoch": 0.3743837867445682, + "grad_norm": 0.4633804261684418, + "learning_rate": 4.9542813588939294e-06, + "loss": 0.5361, + "step": 4101 + }, + { + "epoch": 0.37447507759722476, + "grad_norm": 0.4782145619392395, + "learning_rate": 4.95425857238627e-06, + "loss": 0.5661, + "step": 4102 + }, + { + "epoch": 0.37456636844988134, + "grad_norm": 0.44063568115234375, + "learning_rate": 4.954235780253971e-06, + "loss": 0.6093, + "step": 4103 + }, + { + "epoch": 0.37465765930253786, + "grad_norm": 0.4633474349975586, + "learning_rate": 4.954212982497085e-06, + "loss": 0.6464, + "step": 4104 + }, + { + "epoch": 0.37474895015519444, + "grad_norm": 0.4784098267555237, + "learning_rate": 4.954190179115666e-06, + "loss": 0.5453, + "step": 4105 + }, + { + "epoch": 0.374840241007851, + "grad_norm": 0.46248364448547363, + "learning_rate": 4.954167370109763e-06, + "loss": 0.619, + "step": 4106 + }, + { + "epoch": 0.3749315318605076, + "grad_norm": 0.4563727378845215, + "learning_rate": 4.95414455547943e-06, + "loss": 0.6018, + "step": 4107 + }, + { + "epoch": 0.3750228227131641, + "grad_norm": 0.5496466755867004, + "learning_rate": 4.95412173522472e-06, + "loss": 0.5982, + "step": 4108 + }, + { + "epoch": 0.3751141135658207, + "grad_norm": 0.4333980083465576, + "learning_rate": 4.954098909345683e-06, + "loss": 0.5811, + "step": 4109 + }, + { + "epoch": 0.3752054044184773, + "grad_norm": 0.4906304180622101, + "learning_rate": 4.954076077842374e-06, + "loss": 0.6165, + "step": 4110 + }, + { + "epoch": 0.37529669527113385, + "grad_norm": 0.4893149435520172, + "learning_rate": 4.954053240714844e-06, + "loss": 0.5376, + "step": 4111 + }, + { + "epoch": 0.3753879861237904, + "grad_norm": 0.4255636930465698, + "learning_rate": 4.9540303979631455e-06, + "loss": 0.613, + "step": 4112 + }, + { + "epoch": 0.37547927697644695, + "grad_norm": 0.46279534697532654, + "learning_rate": 4.95400754958733e-06, + "loss": 0.5903, + "step": 4113 + }, + { + "epoch": 0.37557056782910353, + "grad_norm": 0.42744356393814087, + "learning_rate": 4.953984695587452e-06, + "loss": 0.5937, + "step": 4114 + }, + { + "epoch": 0.3756618586817601, + "grad_norm": 0.4692378342151642, + "learning_rate": 4.953961835963561e-06, + "loss": 0.5967, + "step": 4115 + }, + { + "epoch": 0.37575314953441663, + "grad_norm": 0.46376273036003113, + "learning_rate": 4.953938970715711e-06, + "loss": 0.6358, + "step": 4116 + }, + { + "epoch": 0.3758444403870732, + "grad_norm": 0.44708243012428284, + "learning_rate": 4.953916099843955e-06, + "loss": 0.5916, + "step": 4117 + }, + { + "epoch": 0.3759357312397298, + "grad_norm": 0.47715428471565247, + "learning_rate": 4.953893223348345e-06, + "loss": 0.5981, + "step": 4118 + }, + { + "epoch": 0.37602702209238636, + "grad_norm": 0.47816017270088196, + "learning_rate": 4.953870341228933e-06, + "loss": 0.6066, + "step": 4119 + }, + { + "epoch": 0.3761183129450429, + "grad_norm": 0.4433819055557251, + "learning_rate": 4.953847453485771e-06, + "loss": 0.5905, + "step": 4120 + }, + { + "epoch": 0.37620960379769947, + "grad_norm": 0.5064710974693298, + "learning_rate": 4.9538245601189125e-06, + "loss": 0.5769, + "step": 4121 + }, + { + "epoch": 0.37630089465035604, + "grad_norm": 0.4932734966278076, + "learning_rate": 4.953801661128409e-06, + "loss": 0.5847, + "step": 4122 + }, + { + "epoch": 0.3763921855030126, + "grad_norm": 0.5044493079185486, + "learning_rate": 4.9537787565143145e-06, + "loss": 0.5752, + "step": 4123 + }, + { + "epoch": 0.37648347635566914, + "grad_norm": 0.43092671036720276, + "learning_rate": 4.95375584627668e-06, + "loss": 0.6265, + "step": 4124 + }, + { + "epoch": 0.3765747672083257, + "grad_norm": 0.4933178424835205, + "learning_rate": 4.953732930415558e-06, + "loss": 0.5551, + "step": 4125 + }, + { + "epoch": 0.3766660580609823, + "grad_norm": 0.4351172149181366, + "learning_rate": 4.953710008931002e-06, + "loss": 0.5775, + "step": 4126 + }, + { + "epoch": 0.3767573489136389, + "grad_norm": 0.501127302646637, + "learning_rate": 4.953687081823064e-06, + "loss": 0.6023, + "step": 4127 + }, + { + "epoch": 0.3768486397662954, + "grad_norm": 0.49212905764579773, + "learning_rate": 4.953664149091797e-06, + "loss": 0.5916, + "step": 4128 + }, + { + "epoch": 0.376939930618952, + "grad_norm": 0.4659079611301422, + "learning_rate": 4.953641210737253e-06, + "loss": 0.6153, + "step": 4129 + }, + { + "epoch": 0.37703122147160856, + "grad_norm": 0.4925759732723236, + "learning_rate": 4.953618266759485e-06, + "loss": 0.597, + "step": 4130 + }, + { + "epoch": 0.37712251232426514, + "grad_norm": 0.49873706698417664, + "learning_rate": 4.9535953171585436e-06, + "loss": 0.5676, + "step": 4131 + }, + { + "epoch": 0.37721380317692166, + "grad_norm": 0.4765758216381073, + "learning_rate": 4.953572361934484e-06, + "loss": 0.6272, + "step": 4132 + }, + { + "epoch": 0.37730509402957824, + "grad_norm": 0.48105305433273315, + "learning_rate": 4.953549401087359e-06, + "loss": 0.6262, + "step": 4133 + }, + { + "epoch": 0.3773963848822348, + "grad_norm": 0.4935668110847473, + "learning_rate": 4.953526434617219e-06, + "loss": 0.5375, + "step": 4134 + }, + { + "epoch": 0.37748767573489134, + "grad_norm": 0.47673025727272034, + "learning_rate": 4.953503462524119e-06, + "loss": 0.5898, + "step": 4135 + }, + { + "epoch": 0.3775789665875479, + "grad_norm": 0.4642772376537323, + "learning_rate": 4.953480484808108e-06, + "loss": 0.6179, + "step": 4136 + }, + { + "epoch": 0.3776702574402045, + "grad_norm": 0.468953937292099, + "learning_rate": 4.9534575014692435e-06, + "loss": 0.5765, + "step": 4137 + }, + { + "epoch": 0.37776154829286107, + "grad_norm": 0.500995934009552, + "learning_rate": 4.953434512507574e-06, + "loss": 0.6056, + "step": 4138 + }, + { + "epoch": 0.3778528391455176, + "grad_norm": 0.4290756285190582, + "learning_rate": 4.953411517923156e-06, + "loss": 0.562, + "step": 4139 + }, + { + "epoch": 0.37794412999817417, + "grad_norm": 0.5207393765449524, + "learning_rate": 4.953388517716038e-06, + "loss": 0.5578, + "step": 4140 + }, + { + "epoch": 0.37803542085083075, + "grad_norm": 0.5141724348068237, + "learning_rate": 4.953365511886276e-06, + "loss": 0.5619, + "step": 4141 + }, + { + "epoch": 0.37812671170348733, + "grad_norm": 0.4851301312446594, + "learning_rate": 4.953342500433921e-06, + "loss": 0.5438, + "step": 4142 + }, + { + "epoch": 0.37821800255614385, + "grad_norm": 0.44055017828941345, + "learning_rate": 4.953319483359027e-06, + "loss": 0.6277, + "step": 4143 + }, + { + "epoch": 0.37830929340880043, + "grad_norm": 0.4676036834716797, + "learning_rate": 4.953296460661645e-06, + "loss": 0.6091, + "step": 4144 + }, + { + "epoch": 0.378400584261457, + "grad_norm": 0.5031014680862427, + "learning_rate": 4.953273432341829e-06, + "loss": 0.5792, + "step": 4145 + }, + { + "epoch": 0.3784918751141136, + "grad_norm": 0.469460129737854, + "learning_rate": 4.953250398399633e-06, + "loss": 0.5728, + "step": 4146 + }, + { + "epoch": 0.3785831659667701, + "grad_norm": 0.4797264337539673, + "learning_rate": 4.953227358835107e-06, + "loss": 0.5793, + "step": 4147 + }, + { + "epoch": 0.3786744568194267, + "grad_norm": 0.4866003692150116, + "learning_rate": 4.953204313648306e-06, + "loss": 0.5974, + "step": 4148 + }, + { + "epoch": 0.37876574767208326, + "grad_norm": 0.47173017263412476, + "learning_rate": 4.953181262839281e-06, + "loss": 0.5685, + "step": 4149 + }, + { + "epoch": 0.37885703852473984, + "grad_norm": 0.4595384895801544, + "learning_rate": 4.953158206408087e-06, + "loss": 0.5802, + "step": 4150 + }, + { + "epoch": 0.37894832937739636, + "grad_norm": 0.5154102444648743, + "learning_rate": 4.953135144354775e-06, + "loss": 0.5308, + "step": 4151 + }, + { + "epoch": 0.37903962023005294, + "grad_norm": 0.4815710783004761, + "learning_rate": 4.953112076679399e-06, + "loss": 0.5787, + "step": 4152 + }, + { + "epoch": 0.3791309110827095, + "grad_norm": 0.45746856927871704, + "learning_rate": 4.95308900338201e-06, + "loss": 0.5853, + "step": 4153 + }, + { + "epoch": 0.3792222019353661, + "grad_norm": 0.4898660182952881, + "learning_rate": 4.9530659244626635e-06, + "loss": 0.5865, + "step": 4154 + }, + { + "epoch": 0.3793134927880226, + "grad_norm": 0.4725171625614166, + "learning_rate": 4.95304283992141e-06, + "loss": 0.5836, + "step": 4155 + }, + { + "epoch": 0.3794047836406792, + "grad_norm": 0.4213614761829376, + "learning_rate": 4.953019749758306e-06, + "loss": 0.6246, + "step": 4156 + }, + { + "epoch": 0.3794960744933358, + "grad_norm": 0.4638664424419403, + "learning_rate": 4.9529966539733996e-06, + "loss": 0.5498, + "step": 4157 + }, + { + "epoch": 0.37958736534599236, + "grad_norm": 0.48500362038612366, + "learning_rate": 4.952973552566747e-06, + "loss": 0.5533, + "step": 4158 + }, + { + "epoch": 0.3796786561986489, + "grad_norm": 0.45086196064949036, + "learning_rate": 4.9529504455384e-06, + "loss": 0.6433, + "step": 4159 + }, + { + "epoch": 0.37976994705130546, + "grad_norm": 0.5074502229690552, + "learning_rate": 4.952927332888412e-06, + "loss": 0.5127, + "step": 4160 + }, + { + "epoch": 0.37986123790396203, + "grad_norm": 0.4727591574192047, + "learning_rate": 4.952904214616836e-06, + "loss": 0.5742, + "step": 4161 + }, + { + "epoch": 0.3799525287566186, + "grad_norm": 0.48961153626441956, + "learning_rate": 4.952881090723724e-06, + "loss": 0.5725, + "step": 4162 + }, + { + "epoch": 0.38004381960927514, + "grad_norm": 0.48276394605636597, + "learning_rate": 4.952857961209131e-06, + "loss": 0.6008, + "step": 4163 + }, + { + "epoch": 0.3801351104619317, + "grad_norm": 0.4763690233230591, + "learning_rate": 4.9528348260731076e-06, + "loss": 0.6076, + "step": 4164 + }, + { + "epoch": 0.3802264013145883, + "grad_norm": 0.535686731338501, + "learning_rate": 4.952811685315708e-06, + "loss": 0.5647, + "step": 4165 + }, + { + "epoch": 0.38031769216724487, + "grad_norm": 0.4592985510826111, + "learning_rate": 4.952788538936985e-06, + "loss": 0.5749, + "step": 4166 + }, + { + "epoch": 0.3804089830199014, + "grad_norm": 0.4469195008277893, + "learning_rate": 4.952765386936993e-06, + "loss": 0.5625, + "step": 4167 + }, + { + "epoch": 0.38050027387255797, + "grad_norm": 0.4487350881099701, + "learning_rate": 4.952742229315783e-06, + "loss": 0.6102, + "step": 4168 + }, + { + "epoch": 0.38059156472521455, + "grad_norm": 0.4945680499076843, + "learning_rate": 4.95271906607341e-06, + "loss": 0.5794, + "step": 4169 + }, + { + "epoch": 0.38068285557787107, + "grad_norm": 0.4653199315071106, + "learning_rate": 4.952695897209925e-06, + "loss": 0.6061, + "step": 4170 + }, + { + "epoch": 0.38077414643052765, + "grad_norm": 0.488188773393631, + "learning_rate": 4.952672722725384e-06, + "loss": 0.5395, + "step": 4171 + }, + { + "epoch": 0.3808654372831842, + "grad_norm": 0.4834133982658386, + "learning_rate": 4.952649542619836e-06, + "loss": 0.5921, + "step": 4172 + }, + { + "epoch": 0.3809567281358408, + "grad_norm": 0.4469359815120697, + "learning_rate": 4.952626356893338e-06, + "loss": 0.6008, + "step": 4173 + }, + { + "epoch": 0.38104801898849733, + "grad_norm": 0.44795048236846924, + "learning_rate": 4.952603165545941e-06, + "loss": 0.5714, + "step": 4174 + }, + { + "epoch": 0.3811393098411539, + "grad_norm": 0.4667063355445862, + "learning_rate": 4.952579968577698e-06, + "loss": 0.5828, + "step": 4175 + }, + { + "epoch": 0.3812306006938105, + "grad_norm": 0.477394700050354, + "learning_rate": 4.952556765988663e-06, + "loss": 0.5577, + "step": 4176 + }, + { + "epoch": 0.38132189154646706, + "grad_norm": 0.47039327025413513, + "learning_rate": 4.95253355777889e-06, + "loss": 0.5891, + "step": 4177 + }, + { + "epoch": 0.3814131823991236, + "grad_norm": 0.4639368951320648, + "learning_rate": 4.95251034394843e-06, + "loss": 0.5915, + "step": 4178 + }, + { + "epoch": 0.38150447325178016, + "grad_norm": 0.5067062377929688, + "learning_rate": 4.952487124497339e-06, + "loss": 0.5556, + "step": 4179 + }, + { + "epoch": 0.38159576410443674, + "grad_norm": 0.4917445182800293, + "learning_rate": 4.952463899425667e-06, + "loss": 0.5725, + "step": 4180 + }, + { + "epoch": 0.3816870549570933, + "grad_norm": 0.44193536043167114, + "learning_rate": 4.95244066873347e-06, + "loss": 0.6118, + "step": 4181 + }, + { + "epoch": 0.38177834580974984, + "grad_norm": 0.4992716610431671, + "learning_rate": 4.9524174324208e-06, + "loss": 0.5633, + "step": 4182 + }, + { + "epoch": 0.3818696366624064, + "grad_norm": 0.4816027581691742, + "learning_rate": 4.95239419048771e-06, + "loss": 0.5566, + "step": 4183 + }, + { + "epoch": 0.381960927515063, + "grad_norm": 0.4846581816673279, + "learning_rate": 4.952370942934254e-06, + "loss": 0.5694, + "step": 4184 + }, + { + "epoch": 0.3820522183677196, + "grad_norm": 0.47185614705085754, + "learning_rate": 4.952347689760484e-06, + "loss": 0.5989, + "step": 4185 + }, + { + "epoch": 0.3821435092203761, + "grad_norm": 0.46108871698379517, + "learning_rate": 4.952324430966455e-06, + "loss": 0.5845, + "step": 4186 + }, + { + "epoch": 0.3822348000730327, + "grad_norm": 0.49109143018722534, + "learning_rate": 4.952301166552219e-06, + "loss": 0.5905, + "step": 4187 + }, + { + "epoch": 0.38232609092568925, + "grad_norm": 0.45736733078956604, + "learning_rate": 4.9522778965178305e-06, + "loss": 0.5569, + "step": 4188 + }, + { + "epoch": 0.38241738177834583, + "grad_norm": 0.4291079342365265, + "learning_rate": 4.952254620863342e-06, + "loss": 0.5747, + "step": 4189 + }, + { + "epoch": 0.38250867263100236, + "grad_norm": 0.5125429034233093, + "learning_rate": 4.9522313395888066e-06, + "loss": 0.5403, + "step": 4190 + }, + { + "epoch": 0.38259996348365893, + "grad_norm": 0.5102959275245667, + "learning_rate": 4.952208052694279e-06, + "loss": 0.5488, + "step": 4191 + }, + { + "epoch": 0.3826912543363155, + "grad_norm": 0.4683464467525482, + "learning_rate": 4.95218476017981e-06, + "loss": 0.6001, + "step": 4192 + }, + { + "epoch": 0.3827825451889721, + "grad_norm": 0.492471307516098, + "learning_rate": 4.952161462045455e-06, + "loss": 0.582, + "step": 4193 + }, + { + "epoch": 0.3828738360416286, + "grad_norm": 0.465385377407074, + "learning_rate": 4.952138158291268e-06, + "loss": 0.5858, + "step": 4194 + }, + { + "epoch": 0.3829651268942852, + "grad_norm": 0.4615674614906311, + "learning_rate": 4.9521148489173e-06, + "loss": 0.5912, + "step": 4195 + }, + { + "epoch": 0.38305641774694177, + "grad_norm": 0.4500979483127594, + "learning_rate": 4.952091533923607e-06, + "loss": 0.5788, + "step": 4196 + }, + { + "epoch": 0.38314770859959835, + "grad_norm": 0.4306259751319885, + "learning_rate": 4.952068213310241e-06, + "loss": 0.6395, + "step": 4197 + }, + { + "epoch": 0.38323899945225487, + "grad_norm": 0.47538870573043823, + "learning_rate": 4.952044887077255e-06, + "loss": 0.5776, + "step": 4198 + }, + { + "epoch": 0.38333029030491145, + "grad_norm": 0.4913058876991272, + "learning_rate": 4.952021555224704e-06, + "loss": 0.5812, + "step": 4199 + }, + { + "epoch": 0.383421581157568, + "grad_norm": 0.4121589660644531, + "learning_rate": 4.95199821775264e-06, + "loss": 0.607, + "step": 4200 + }, + { + "epoch": 0.38351287201022455, + "grad_norm": 0.4438312351703644, + "learning_rate": 4.951974874661119e-06, + "loss": 0.609, + "step": 4201 + }, + { + "epoch": 0.3836041628628811, + "grad_norm": 0.47508180141448975, + "learning_rate": 4.95195152595019e-06, + "loss": 0.5571, + "step": 4202 + }, + { + "epoch": 0.3836954537155377, + "grad_norm": 0.4799754023551941, + "learning_rate": 4.951928171619911e-06, + "loss": 0.5615, + "step": 4203 + }, + { + "epoch": 0.3837867445681943, + "grad_norm": 0.46293336153030396, + "learning_rate": 4.951904811670332e-06, + "loss": 0.6184, + "step": 4204 + }, + { + "epoch": 0.3838780354208508, + "grad_norm": 0.4532841742038727, + "learning_rate": 4.95188144610151e-06, + "loss": 0.5691, + "step": 4205 + }, + { + "epoch": 0.3839693262735074, + "grad_norm": 0.505009114742279, + "learning_rate": 4.951858074913495e-06, + "loss": 0.5682, + "step": 4206 + }, + { + "epoch": 0.38406061712616396, + "grad_norm": 0.5123653411865234, + "learning_rate": 4.951834698106343e-06, + "loss": 0.5666, + "step": 4207 + }, + { + "epoch": 0.38415190797882054, + "grad_norm": 0.4871501624584198, + "learning_rate": 4.951811315680107e-06, + "loss": 0.5716, + "step": 4208 + }, + { + "epoch": 0.38424319883147706, + "grad_norm": 0.4682171046733856, + "learning_rate": 4.95178792763484e-06, + "loss": 0.6076, + "step": 4209 + }, + { + "epoch": 0.38433448968413364, + "grad_norm": 0.4937323033809662, + "learning_rate": 4.951764533970597e-06, + "loss": 0.5822, + "step": 4210 + }, + { + "epoch": 0.3844257805367902, + "grad_norm": 0.47788238525390625, + "learning_rate": 4.95174113468743e-06, + "loss": 0.5439, + "step": 4211 + }, + { + "epoch": 0.3845170713894468, + "grad_norm": 0.4975408911705017, + "learning_rate": 4.951717729785393e-06, + "loss": 0.5575, + "step": 4212 + }, + { + "epoch": 0.3846083622421033, + "grad_norm": 0.4491221308708191, + "learning_rate": 4.951694319264541e-06, + "loss": 0.6279, + "step": 4213 + }, + { + "epoch": 0.3846996530947599, + "grad_norm": 0.5155954360961914, + "learning_rate": 4.951670903124926e-06, + "loss": 0.6118, + "step": 4214 + }, + { + "epoch": 0.3847909439474165, + "grad_norm": 0.5130831599235535, + "learning_rate": 4.951647481366602e-06, + "loss": 0.5537, + "step": 4215 + }, + { + "epoch": 0.38488223480007305, + "grad_norm": 0.4493478536605835, + "learning_rate": 4.951624053989622e-06, + "loss": 0.6169, + "step": 4216 + }, + { + "epoch": 0.3849735256527296, + "grad_norm": 0.5073252320289612, + "learning_rate": 4.951600620994042e-06, + "loss": 0.5715, + "step": 4217 + }, + { + "epoch": 0.38506481650538615, + "grad_norm": 0.5038630366325378, + "learning_rate": 4.951577182379914e-06, + "loss": 0.5918, + "step": 4218 + }, + { + "epoch": 0.38515610735804273, + "grad_norm": 0.4836706817150116, + "learning_rate": 4.951553738147292e-06, + "loss": 0.5284, + "step": 4219 + }, + { + "epoch": 0.3852473982106993, + "grad_norm": 0.4825471341609955, + "learning_rate": 4.95153028829623e-06, + "loss": 0.5602, + "step": 4220 + }, + { + "epoch": 0.38533868906335583, + "grad_norm": 0.4691873788833618, + "learning_rate": 4.9515068328267806e-06, + "loss": 0.6007, + "step": 4221 + }, + { + "epoch": 0.3854299799160124, + "grad_norm": 0.43397849798202515, + "learning_rate": 4.951483371739e-06, + "loss": 0.5921, + "step": 4222 + }, + { + "epoch": 0.385521270768669, + "grad_norm": 0.4868435859680176, + "learning_rate": 4.951459905032939e-06, + "loss": 0.5673, + "step": 4223 + }, + { + "epoch": 0.38561256162132557, + "grad_norm": 0.46237462759017944, + "learning_rate": 4.951436432708653e-06, + "loss": 0.6359, + "step": 4224 + }, + { + "epoch": 0.3857038524739821, + "grad_norm": 0.47215989232063293, + "learning_rate": 4.951412954766196e-06, + "loss": 0.625, + "step": 4225 + }, + { + "epoch": 0.38579514332663867, + "grad_norm": 0.50225430727005, + "learning_rate": 4.951389471205622e-06, + "loss": 0.5651, + "step": 4226 + }, + { + "epoch": 0.38588643417929525, + "grad_norm": 0.5022921562194824, + "learning_rate": 4.951365982026983e-06, + "loss": 0.5675, + "step": 4227 + }, + { + "epoch": 0.3859777250319518, + "grad_norm": 0.449207067489624, + "learning_rate": 4.951342487230335e-06, + "loss": 0.5954, + "step": 4228 + }, + { + "epoch": 0.38606901588460835, + "grad_norm": 0.49584516882896423, + "learning_rate": 4.95131898681573e-06, + "loss": 0.5696, + "step": 4229 + }, + { + "epoch": 0.3861603067372649, + "grad_norm": 0.4549560844898224, + "learning_rate": 4.9512954807832235e-06, + "loss": 0.616, + "step": 4230 + }, + { + "epoch": 0.3862515975899215, + "grad_norm": 0.5013883709907532, + "learning_rate": 4.951271969132868e-06, + "loss": 0.5939, + "step": 4231 + }, + { + "epoch": 0.3863428884425781, + "grad_norm": 0.4888851046562195, + "learning_rate": 4.951248451864719e-06, + "loss": 0.5732, + "step": 4232 + }, + { + "epoch": 0.3864341792952346, + "grad_norm": 0.48548710346221924, + "learning_rate": 4.951224928978829e-06, + "loss": 0.5723, + "step": 4233 + }, + { + "epoch": 0.3865254701478912, + "grad_norm": 0.4937704801559448, + "learning_rate": 4.951201400475252e-06, + "loss": 0.5508, + "step": 4234 + }, + { + "epoch": 0.38661676100054776, + "grad_norm": 0.4374832510948181, + "learning_rate": 4.951177866354042e-06, + "loss": 0.6057, + "step": 4235 + }, + { + "epoch": 0.3867080518532043, + "grad_norm": 0.47913599014282227, + "learning_rate": 4.951154326615254e-06, + "loss": 0.5561, + "step": 4236 + }, + { + "epoch": 0.38679934270586086, + "grad_norm": 0.47531843185424805, + "learning_rate": 4.951130781258941e-06, + "loss": 0.6054, + "step": 4237 + }, + { + "epoch": 0.38689063355851744, + "grad_norm": 0.4746064245700836, + "learning_rate": 4.951107230285157e-06, + "loss": 0.5715, + "step": 4238 + }, + { + "epoch": 0.386981924411174, + "grad_norm": 0.5071374773979187, + "learning_rate": 4.951083673693956e-06, + "loss": 0.5614, + "step": 4239 + }, + { + "epoch": 0.38707321526383054, + "grad_norm": 0.5005894303321838, + "learning_rate": 4.951060111485392e-06, + "loss": 0.5357, + "step": 4240 + }, + { + "epoch": 0.3871645061164871, + "grad_norm": 0.474806547164917, + "learning_rate": 4.951036543659518e-06, + "loss": 0.5904, + "step": 4241 + }, + { + "epoch": 0.3872557969691437, + "grad_norm": 0.4945315420627594, + "learning_rate": 4.951012970216391e-06, + "loss": 0.6032, + "step": 4242 + }, + { + "epoch": 0.3873470878218003, + "grad_norm": 0.485332727432251, + "learning_rate": 4.950989391156062e-06, + "loss": 0.5873, + "step": 4243 + }, + { + "epoch": 0.3874383786744568, + "grad_norm": 0.4593532979488373, + "learning_rate": 4.950965806478586e-06, + "loss": 0.6137, + "step": 4244 + }, + { + "epoch": 0.3875296695271134, + "grad_norm": 0.47145897150039673, + "learning_rate": 4.950942216184017e-06, + "loss": 0.5555, + "step": 4245 + }, + { + "epoch": 0.38762096037976995, + "grad_norm": 0.4852192997932434, + "learning_rate": 4.9509186202724106e-06, + "loss": 0.5844, + "step": 4246 + }, + { + "epoch": 0.38771225123242653, + "grad_norm": 0.49140670895576477, + "learning_rate": 4.950895018743818e-06, + "loss": 0.5975, + "step": 4247 + }, + { + "epoch": 0.38780354208508305, + "grad_norm": 0.4454435110092163, + "learning_rate": 4.950871411598296e-06, + "loss": 0.6109, + "step": 4248 + }, + { + "epoch": 0.38789483293773963, + "grad_norm": 0.49150025844573975, + "learning_rate": 4.950847798835897e-06, + "loss": 0.56, + "step": 4249 + }, + { + "epoch": 0.3879861237903962, + "grad_norm": 0.5093191862106323, + "learning_rate": 4.950824180456675e-06, + "loss": 0.552, + "step": 4250 + }, + { + "epoch": 0.3880774146430528, + "grad_norm": 0.48104774951934814, + "learning_rate": 4.950800556460685e-06, + "loss": 0.5427, + "step": 4251 + }, + { + "epoch": 0.3881687054957093, + "grad_norm": 0.45179295539855957, + "learning_rate": 4.950776926847982e-06, + "loss": 0.6158, + "step": 4252 + }, + { + "epoch": 0.3882599963483659, + "grad_norm": 0.47248509526252747, + "learning_rate": 4.950753291618619e-06, + "loss": 0.5711, + "step": 4253 + }, + { + "epoch": 0.38835128720102247, + "grad_norm": 0.48650333285331726, + "learning_rate": 4.950729650772649e-06, + "loss": 0.5701, + "step": 4254 + }, + { + "epoch": 0.38844257805367904, + "grad_norm": 0.4687216281890869, + "learning_rate": 4.950706004310128e-06, + "loss": 0.5613, + "step": 4255 + }, + { + "epoch": 0.38853386890633557, + "grad_norm": 0.5037401914596558, + "learning_rate": 4.950682352231109e-06, + "loss": 0.5628, + "step": 4256 + }, + { + "epoch": 0.38862515975899214, + "grad_norm": 0.4669901132583618, + "learning_rate": 4.950658694535647e-06, + "loss": 0.563, + "step": 4257 + }, + { + "epoch": 0.3887164506116487, + "grad_norm": 0.4683459997177124, + "learning_rate": 4.9506350312237965e-06, + "loss": 0.5926, + "step": 4258 + }, + { + "epoch": 0.3888077414643053, + "grad_norm": 0.4570710361003876, + "learning_rate": 4.950611362295611e-06, + "loss": 0.5981, + "step": 4259 + }, + { + "epoch": 0.3888990323169618, + "grad_norm": 0.4726351499557495, + "learning_rate": 4.950587687751145e-06, + "loss": 0.582, + "step": 4260 + }, + { + "epoch": 0.3889903231696184, + "grad_norm": 0.4712459146976471, + "learning_rate": 4.950564007590452e-06, + "loss": 0.6095, + "step": 4261 + }, + { + "epoch": 0.389081614022275, + "grad_norm": 0.4362497627735138, + "learning_rate": 4.950540321813588e-06, + "loss": 0.618, + "step": 4262 + }, + { + "epoch": 0.38917290487493156, + "grad_norm": 0.4700106382369995, + "learning_rate": 4.950516630420607e-06, + "loss": 0.5864, + "step": 4263 + }, + { + "epoch": 0.3892641957275881, + "grad_norm": 0.4792269766330719, + "learning_rate": 4.950492933411561e-06, + "loss": 0.5845, + "step": 4264 + }, + { + "epoch": 0.38935548658024466, + "grad_norm": 0.5171816945075989, + "learning_rate": 4.950469230786507e-06, + "loss": 0.563, + "step": 4265 + }, + { + "epoch": 0.38944677743290124, + "grad_norm": 0.48489055037498474, + "learning_rate": 4.9504455225454975e-06, + "loss": 0.5997, + "step": 4266 + }, + { + "epoch": 0.3895380682855578, + "grad_norm": 0.46987438201904297, + "learning_rate": 4.950421808688588e-06, + "loss": 0.5712, + "step": 4267 + }, + { + "epoch": 0.38962935913821434, + "grad_norm": 0.48160529136657715, + "learning_rate": 4.950398089215832e-06, + "loss": 0.5974, + "step": 4268 + }, + { + "epoch": 0.3897206499908709, + "grad_norm": 0.4780946373939514, + "learning_rate": 4.950374364127285e-06, + "loss": 0.5386, + "step": 4269 + }, + { + "epoch": 0.3898119408435275, + "grad_norm": 0.4875502586364746, + "learning_rate": 4.950350633423e-06, + "loss": 0.5789, + "step": 4270 + }, + { + "epoch": 0.389903231696184, + "grad_norm": 0.453818142414093, + "learning_rate": 4.9503268971030326e-06, + "loss": 0.5484, + "step": 4271 + }, + { + "epoch": 0.3899945225488406, + "grad_norm": 0.4589935541152954, + "learning_rate": 4.950303155167436e-06, + "loss": 0.5586, + "step": 4272 + }, + { + "epoch": 0.3900858134014972, + "grad_norm": 0.4761224389076233, + "learning_rate": 4.950279407616265e-06, + "loss": 0.5937, + "step": 4273 + }, + { + "epoch": 0.39017710425415375, + "grad_norm": 0.5050770044326782, + "learning_rate": 4.950255654449575e-06, + "loss": 0.5522, + "step": 4274 + }, + { + "epoch": 0.3902683951068103, + "grad_norm": 0.4646681547164917, + "learning_rate": 4.9502318956674194e-06, + "loss": 0.5792, + "step": 4275 + }, + { + "epoch": 0.39035968595946685, + "grad_norm": 0.4843439757823944, + "learning_rate": 4.950208131269853e-06, + "loss": 0.603, + "step": 4276 + }, + { + "epoch": 0.39045097681212343, + "grad_norm": 0.45157092809677124, + "learning_rate": 4.95018436125693e-06, + "loss": 0.6323, + "step": 4277 + }, + { + "epoch": 0.39054226766478, + "grad_norm": 0.4578101933002472, + "learning_rate": 4.950160585628705e-06, + "loss": 0.6056, + "step": 4278 + }, + { + "epoch": 0.39063355851743653, + "grad_norm": 0.47052451968193054, + "learning_rate": 4.950136804385233e-06, + "loss": 0.636, + "step": 4279 + }, + { + "epoch": 0.3907248493700931, + "grad_norm": 0.46674400568008423, + "learning_rate": 4.950113017526568e-06, + "loss": 0.5616, + "step": 4280 + }, + { + "epoch": 0.3908161402227497, + "grad_norm": 0.44170767068862915, + "learning_rate": 4.950089225052765e-06, + "loss": 0.651, + "step": 4281 + }, + { + "epoch": 0.39090743107540626, + "grad_norm": 0.4802749454975128, + "learning_rate": 4.950065426963877e-06, + "loss": 0.5774, + "step": 4282 + }, + { + "epoch": 0.3909987219280628, + "grad_norm": 0.49663275480270386, + "learning_rate": 4.95004162325996e-06, + "loss": 0.5843, + "step": 4283 + }, + { + "epoch": 0.39109001278071936, + "grad_norm": 0.529229462146759, + "learning_rate": 4.950017813941069e-06, + "loss": 0.5276, + "step": 4284 + }, + { + "epoch": 0.39118130363337594, + "grad_norm": 0.4836087226867676, + "learning_rate": 4.9499939990072565e-06, + "loss": 0.5641, + "step": 4285 + }, + { + "epoch": 0.3912725944860325, + "grad_norm": 0.4830372929573059, + "learning_rate": 4.9499701784585795e-06, + "loss": 0.5976, + "step": 4286 + }, + { + "epoch": 0.39136388533868904, + "grad_norm": 0.5111812949180603, + "learning_rate": 4.9499463522950904e-06, + "loss": 0.577, + "step": 4287 + }, + { + "epoch": 0.3914551761913456, + "grad_norm": 0.4827885925769806, + "learning_rate": 4.949922520516846e-06, + "loss": 0.6202, + "step": 4288 + }, + { + "epoch": 0.3915464670440022, + "grad_norm": 0.45391663908958435, + "learning_rate": 4.949898683123899e-06, + "loss": 0.5758, + "step": 4289 + }, + { + "epoch": 0.3916377578966588, + "grad_norm": 0.5083597302436829, + "learning_rate": 4.949874840116305e-06, + "loss": 0.5578, + "step": 4290 + }, + { + "epoch": 0.3917290487493153, + "grad_norm": 0.47006022930145264, + "learning_rate": 4.949850991494119e-06, + "loss": 0.6086, + "step": 4291 + }, + { + "epoch": 0.3918203396019719, + "grad_norm": 0.4755263030529022, + "learning_rate": 4.949827137257395e-06, + "loss": 0.579, + "step": 4292 + }, + { + "epoch": 0.39191163045462846, + "grad_norm": 0.4719795286655426, + "learning_rate": 4.9498032774061866e-06, + "loss": 0.5829, + "step": 4293 + }, + { + "epoch": 0.39200292130728503, + "grad_norm": 0.4867086112499237, + "learning_rate": 4.949779411940551e-06, + "loss": 0.5793, + "step": 4294 + }, + { + "epoch": 0.39209421215994156, + "grad_norm": 0.4552168846130371, + "learning_rate": 4.94975554086054e-06, + "loss": 0.5814, + "step": 4295 + }, + { + "epoch": 0.39218550301259814, + "grad_norm": 0.49463406205177307, + "learning_rate": 4.9497316641662115e-06, + "loss": 0.5721, + "step": 4296 + }, + { + "epoch": 0.3922767938652547, + "grad_norm": 0.4694592356681824, + "learning_rate": 4.949707781857618e-06, + "loss": 0.5886, + "step": 4297 + }, + { + "epoch": 0.3923680847179113, + "grad_norm": 0.4689556956291199, + "learning_rate": 4.949683893934814e-06, + "loss": 0.5947, + "step": 4298 + }, + { + "epoch": 0.3924593755705678, + "grad_norm": 0.4774288535118103, + "learning_rate": 4.949660000397856e-06, + "loss": 0.5691, + "step": 4299 + }, + { + "epoch": 0.3925506664232244, + "grad_norm": 0.4658929407596588, + "learning_rate": 4.949636101246798e-06, + "loss": 0.5883, + "step": 4300 + }, + { + "epoch": 0.39264195727588097, + "grad_norm": 0.4477391541004181, + "learning_rate": 4.949612196481693e-06, + "loss": 0.5971, + "step": 4301 + }, + { + "epoch": 0.3927332481285375, + "grad_norm": 0.4651006758213043, + "learning_rate": 4.949588286102599e-06, + "loss": 0.6016, + "step": 4302 + }, + { + "epoch": 0.39282453898119407, + "grad_norm": 0.4732820987701416, + "learning_rate": 4.949564370109569e-06, + "loss": 0.5997, + "step": 4303 + }, + { + "epoch": 0.39291582983385065, + "grad_norm": 0.49324682354927063, + "learning_rate": 4.949540448502657e-06, + "loss": 0.533, + "step": 4304 + }, + { + "epoch": 0.3930071206865072, + "grad_norm": 0.4780423939228058, + "learning_rate": 4.9495165212819196e-06, + "loss": 0.5933, + "step": 4305 + }, + { + "epoch": 0.39309841153916375, + "grad_norm": 0.4960300922393799, + "learning_rate": 4.94949258844741e-06, + "loss": 0.6005, + "step": 4306 + }, + { + "epoch": 0.39318970239182033, + "grad_norm": 0.5081140995025635, + "learning_rate": 4.949468649999185e-06, + "loss": 0.5883, + "step": 4307 + }, + { + "epoch": 0.3932809932444769, + "grad_norm": 0.47634458541870117, + "learning_rate": 4.949444705937297e-06, + "loss": 0.5851, + "step": 4308 + }, + { + "epoch": 0.3933722840971335, + "grad_norm": 0.49043866991996765, + "learning_rate": 4.949420756261803e-06, + "loss": 0.5609, + "step": 4309 + }, + { + "epoch": 0.39346357494979, + "grad_norm": 0.43408674001693726, + "learning_rate": 4.949396800972758e-06, + "loss": 0.6229, + "step": 4310 + }, + { + "epoch": 0.3935548658024466, + "grad_norm": 0.4654550552368164, + "learning_rate": 4.949372840070215e-06, + "loss": 0.5868, + "step": 4311 + }, + { + "epoch": 0.39364615665510316, + "grad_norm": 0.45357033610343933, + "learning_rate": 4.9493488735542305e-06, + "loss": 0.6112, + "step": 4312 + }, + { + "epoch": 0.39373744750775974, + "grad_norm": 0.4629100561141968, + "learning_rate": 4.949324901424858e-06, + "loss": 0.5815, + "step": 4313 + }, + { + "epoch": 0.39382873836041626, + "grad_norm": 0.47313520312309265, + "learning_rate": 4.949300923682154e-06, + "loss": 0.54, + "step": 4314 + }, + { + "epoch": 0.39392002921307284, + "grad_norm": 0.4718436300754547, + "learning_rate": 4.949276940326172e-06, + "loss": 0.571, + "step": 4315 + }, + { + "epoch": 0.3940113200657294, + "grad_norm": 0.4569703936576843, + "learning_rate": 4.949252951356969e-06, + "loss": 0.5746, + "step": 4316 + }, + { + "epoch": 0.394102610918386, + "grad_norm": 0.5014108419418335, + "learning_rate": 4.949228956774597e-06, + "loss": 0.5582, + "step": 4317 + }, + { + "epoch": 0.3941939017710425, + "grad_norm": 0.4266694486141205, + "learning_rate": 4.949204956579114e-06, + "loss": 0.6375, + "step": 4318 + }, + { + "epoch": 0.3942851926236991, + "grad_norm": 0.4780322313308716, + "learning_rate": 4.949180950770573e-06, + "loss": 0.6368, + "step": 4319 + }, + { + "epoch": 0.3943764834763557, + "grad_norm": 0.49994316697120667, + "learning_rate": 4.94915693934903e-06, + "loss": 0.5912, + "step": 4320 + }, + { + "epoch": 0.39446777432901226, + "grad_norm": 0.5149347186088562, + "learning_rate": 4.949132922314539e-06, + "loss": 0.5358, + "step": 4321 + }, + { + "epoch": 0.3945590651816688, + "grad_norm": 0.47380566596984863, + "learning_rate": 4.949108899667157e-06, + "loss": 0.5693, + "step": 4322 + }, + { + "epoch": 0.39465035603432536, + "grad_norm": 0.4666667878627777, + "learning_rate": 4.949084871406937e-06, + "loss": 0.6265, + "step": 4323 + }, + { + "epoch": 0.39474164688698193, + "grad_norm": 0.47881558537483215, + "learning_rate": 4.949060837533935e-06, + "loss": 0.5765, + "step": 4324 + }, + { + "epoch": 0.3948329377396385, + "grad_norm": 0.47797802090644836, + "learning_rate": 4.949036798048206e-06, + "loss": 0.5653, + "step": 4325 + }, + { + "epoch": 0.39492422859229503, + "grad_norm": 0.45852333307266235, + "learning_rate": 4.949012752949804e-06, + "loss": 0.589, + "step": 4326 + }, + { + "epoch": 0.3950155194449516, + "grad_norm": 0.4668664336204529, + "learning_rate": 4.9489887022387865e-06, + "loss": 0.5853, + "step": 4327 + }, + { + "epoch": 0.3951068102976082, + "grad_norm": 0.47936001420021057, + "learning_rate": 4.948964645915207e-06, + "loss": 0.5449, + "step": 4328 + }, + { + "epoch": 0.39519810115026477, + "grad_norm": 0.47419360280036926, + "learning_rate": 4.948940583979121e-06, + "loss": 0.5976, + "step": 4329 + }, + { + "epoch": 0.3952893920029213, + "grad_norm": 0.4937084913253784, + "learning_rate": 4.948916516430584e-06, + "loss": 0.5681, + "step": 4330 + }, + { + "epoch": 0.39538068285557787, + "grad_norm": 0.4393993318080902, + "learning_rate": 4.9488924432696495e-06, + "loss": 0.6258, + "step": 4331 + }, + { + "epoch": 0.39547197370823445, + "grad_norm": 0.4660295248031616, + "learning_rate": 4.948868364496375e-06, + "loss": 0.6015, + "step": 4332 + }, + { + "epoch": 0.395563264560891, + "grad_norm": 0.4694875478744507, + "learning_rate": 4.9488442801108136e-06, + "loss": 0.5967, + "step": 4333 + }, + { + "epoch": 0.39565455541354755, + "grad_norm": 0.45838963985443115, + "learning_rate": 4.948820190113023e-06, + "loss": 0.6123, + "step": 4334 + }, + { + "epoch": 0.3957458462662041, + "grad_norm": 0.4901482164859772, + "learning_rate": 4.948796094503055e-06, + "loss": 0.5758, + "step": 4335 + }, + { + "epoch": 0.3958371371188607, + "grad_norm": 0.5406219959259033, + "learning_rate": 4.948771993280967e-06, + "loss": 0.5555, + "step": 4336 + }, + { + "epoch": 0.3959284279715172, + "grad_norm": 0.48551374673843384, + "learning_rate": 4.948747886446815e-06, + "loss": 0.593, + "step": 4337 + }, + { + "epoch": 0.3960197188241738, + "grad_norm": 0.4763370156288147, + "learning_rate": 4.948723774000653e-06, + "loss": 0.5725, + "step": 4338 + }, + { + "epoch": 0.3961110096768304, + "grad_norm": 0.48945319652557373, + "learning_rate": 4.948699655942536e-06, + "loss": 0.5802, + "step": 4339 + }, + { + "epoch": 0.39620230052948696, + "grad_norm": 0.4530027508735657, + "learning_rate": 4.94867553227252e-06, + "loss": 0.6203, + "step": 4340 + }, + { + "epoch": 0.3962935913821435, + "grad_norm": 0.4923330545425415, + "learning_rate": 4.9486514029906595e-06, + "loss": 0.5573, + "step": 4341 + }, + { + "epoch": 0.39638488223480006, + "grad_norm": 0.4672296941280365, + "learning_rate": 4.9486272680970104e-06, + "loss": 0.6218, + "step": 4342 + }, + { + "epoch": 0.39647617308745664, + "grad_norm": 0.5204114317893982, + "learning_rate": 4.948603127591628e-06, + "loss": 0.5809, + "step": 4343 + }, + { + "epoch": 0.3965674639401132, + "grad_norm": 0.49085623025894165, + "learning_rate": 4.948578981474567e-06, + "loss": 0.5693, + "step": 4344 + }, + { + "epoch": 0.39665875479276974, + "grad_norm": 0.5191320776939392, + "learning_rate": 4.948554829745885e-06, + "loss": 0.5776, + "step": 4345 + }, + { + "epoch": 0.3967500456454263, + "grad_norm": 0.47230517864227295, + "learning_rate": 4.948530672405634e-06, + "loss": 0.5947, + "step": 4346 + }, + { + "epoch": 0.3968413364980829, + "grad_norm": 0.48400065302848816, + "learning_rate": 4.948506509453872e-06, + "loss": 0.5546, + "step": 4347 + }, + { + "epoch": 0.3969326273507395, + "grad_norm": 0.4825799763202667, + "learning_rate": 4.948482340890653e-06, + "loss": 0.5816, + "step": 4348 + }, + { + "epoch": 0.397023918203396, + "grad_norm": 0.4621483385562897, + "learning_rate": 4.948458166716033e-06, + "loss": 0.5748, + "step": 4349 + }, + { + "epoch": 0.3971152090560526, + "grad_norm": 0.4550122916698456, + "learning_rate": 4.948433986930066e-06, + "loss": 0.5449, + "step": 4350 + }, + { + "epoch": 0.39720649990870915, + "grad_norm": 0.45699381828308105, + "learning_rate": 4.948409801532809e-06, + "loss": 0.5867, + "step": 4351 + }, + { + "epoch": 0.39729779076136573, + "grad_norm": 0.4836646020412445, + "learning_rate": 4.948385610524318e-06, + "loss": 0.5581, + "step": 4352 + }, + { + "epoch": 0.39738908161402225, + "grad_norm": 0.45762085914611816, + "learning_rate": 4.948361413904647e-06, + "loss": 0.6123, + "step": 4353 + }, + { + "epoch": 0.39748037246667883, + "grad_norm": 0.4343688189983368, + "learning_rate": 4.948337211673851e-06, + "loss": 0.5924, + "step": 4354 + }, + { + "epoch": 0.3975716633193354, + "grad_norm": 0.4962754547595978, + "learning_rate": 4.9483130038319875e-06, + "loss": 0.5207, + "step": 4355 + }, + { + "epoch": 0.397662954171992, + "grad_norm": 0.4642292857170105, + "learning_rate": 4.948288790379111e-06, + "loss": 0.5746, + "step": 4356 + }, + { + "epoch": 0.3977542450246485, + "grad_norm": 0.48718777298927307, + "learning_rate": 4.948264571315276e-06, + "loss": 0.5825, + "step": 4357 + }, + { + "epoch": 0.3978455358773051, + "grad_norm": 0.5086919069290161, + "learning_rate": 4.948240346640539e-06, + "loss": 0.5658, + "step": 4358 + }, + { + "epoch": 0.39793682672996167, + "grad_norm": 0.4831852912902832, + "learning_rate": 4.948216116354955e-06, + "loss": 0.6362, + "step": 4359 + }, + { + "epoch": 0.39802811758261825, + "grad_norm": 0.469838410615921, + "learning_rate": 4.948191880458581e-06, + "loss": 0.6014, + "step": 4360 + }, + { + "epoch": 0.39811940843527477, + "grad_norm": 0.4784356653690338, + "learning_rate": 4.94816763895147e-06, + "loss": 0.5728, + "step": 4361 + }, + { + "epoch": 0.39821069928793135, + "grad_norm": 0.4904325306415558, + "learning_rate": 4.94814339183368e-06, + "loss": 0.5879, + "step": 4362 + }, + { + "epoch": 0.3983019901405879, + "grad_norm": 0.458393394947052, + "learning_rate": 4.948119139105265e-06, + "loss": 0.6179, + "step": 4363 + }, + { + "epoch": 0.3983932809932445, + "grad_norm": 0.43929535150527954, + "learning_rate": 4.948094880766281e-06, + "loss": 0.596, + "step": 4364 + }, + { + "epoch": 0.398484571845901, + "grad_norm": 0.5005854964256287, + "learning_rate": 4.948070616816785e-06, + "loss": 0.5612, + "step": 4365 + }, + { + "epoch": 0.3985758626985576, + "grad_norm": 0.4381415843963623, + "learning_rate": 4.948046347256829e-06, + "loss": 0.6023, + "step": 4366 + }, + { + "epoch": 0.3986671535512142, + "grad_norm": 0.45074546337127686, + "learning_rate": 4.948022072086474e-06, + "loss": 0.621, + "step": 4367 + }, + { + "epoch": 0.39875844440387076, + "grad_norm": 0.48363980650901794, + "learning_rate": 4.9479977913057705e-06, + "loss": 0.5849, + "step": 4368 + }, + { + "epoch": 0.3988497352565273, + "grad_norm": 0.49910616874694824, + "learning_rate": 4.947973504914777e-06, + "loss": 0.5559, + "step": 4369 + }, + { + "epoch": 0.39894102610918386, + "grad_norm": 0.4882427453994751, + "learning_rate": 4.947949212913548e-06, + "loss": 0.5822, + "step": 4370 + }, + { + "epoch": 0.39903231696184044, + "grad_norm": 0.5278094410896301, + "learning_rate": 4.9479249153021396e-06, + "loss": 0.5632, + "step": 4371 + }, + { + "epoch": 0.39912360781449696, + "grad_norm": 0.47502613067626953, + "learning_rate": 4.947900612080607e-06, + "loss": 0.6189, + "step": 4372 + }, + { + "epoch": 0.39921489866715354, + "grad_norm": 0.4576374590396881, + "learning_rate": 4.947876303249007e-06, + "loss": 0.5402, + "step": 4373 + }, + { + "epoch": 0.3993061895198101, + "grad_norm": 0.4457067847251892, + "learning_rate": 4.947851988807395e-06, + "loss": 0.6032, + "step": 4374 + }, + { + "epoch": 0.3993974803724667, + "grad_norm": 0.4919951558113098, + "learning_rate": 4.947827668755826e-06, + "loss": 0.5694, + "step": 4375 + }, + { + "epoch": 0.3994887712251232, + "grad_norm": 0.47449180483818054, + "learning_rate": 4.947803343094356e-06, + "loss": 0.6131, + "step": 4376 + }, + { + "epoch": 0.3995800620777798, + "grad_norm": 0.48601171374320984, + "learning_rate": 4.94777901182304e-06, + "loss": 0.6008, + "step": 4377 + }, + { + "epoch": 0.3996713529304364, + "grad_norm": 0.48908042907714844, + "learning_rate": 4.947754674941937e-06, + "loss": 0.6054, + "step": 4378 + }, + { + "epoch": 0.39976264378309295, + "grad_norm": 0.47701749205589294, + "learning_rate": 4.947730332451098e-06, + "loss": 0.57, + "step": 4379 + }, + { + "epoch": 0.3998539346357495, + "grad_norm": 0.49300912022590637, + "learning_rate": 4.9477059843505825e-06, + "loss": 0.5674, + "step": 4380 + }, + { + "epoch": 0.39994522548840605, + "grad_norm": 0.4959001839160919, + "learning_rate": 4.9476816306404456e-06, + "loss": 0.5748, + "step": 4381 + }, + { + "epoch": 0.40003651634106263, + "grad_norm": 0.46727073192596436, + "learning_rate": 4.947657271320741e-06, + "loss": 0.5845, + "step": 4382 + }, + { + "epoch": 0.4001278071937192, + "grad_norm": 0.46819692850112915, + "learning_rate": 4.947632906391526e-06, + "loss": 0.614, + "step": 4383 + }, + { + "epoch": 0.40021909804637573, + "grad_norm": 0.46558985114097595, + "learning_rate": 4.947608535852858e-06, + "loss": 0.5995, + "step": 4384 + }, + { + "epoch": 0.4003103888990323, + "grad_norm": 0.502480149269104, + "learning_rate": 4.94758415970479e-06, + "loss": 0.5622, + "step": 4385 + }, + { + "epoch": 0.4004016797516889, + "grad_norm": 0.4719303846359253, + "learning_rate": 4.94755977794738e-06, + "loss": 0.6323, + "step": 4386 + }, + { + "epoch": 0.40049297060434547, + "grad_norm": 0.4891454577445984, + "learning_rate": 4.947535390580682e-06, + "loss": 0.5787, + "step": 4387 + }, + { + "epoch": 0.400584261457002, + "grad_norm": 0.5047417879104614, + "learning_rate": 4.947510997604754e-06, + "loss": 0.5672, + "step": 4388 + }, + { + "epoch": 0.40067555230965857, + "grad_norm": 0.5171422958374023, + "learning_rate": 4.947486599019651e-06, + "loss": 0.555, + "step": 4389 + }, + { + "epoch": 0.40076684316231515, + "grad_norm": 0.4613368809223175, + "learning_rate": 4.947462194825428e-06, + "loss": 0.6002, + "step": 4390 + }, + { + "epoch": 0.4008581340149717, + "grad_norm": 0.4901154935359955, + "learning_rate": 4.947437785022142e-06, + "loss": 0.5636, + "step": 4391 + }, + { + "epoch": 0.40094942486762825, + "grad_norm": 0.47928115725517273, + "learning_rate": 4.947413369609849e-06, + "loss": 0.5626, + "step": 4392 + }, + { + "epoch": 0.4010407157202848, + "grad_norm": 0.4839957356452942, + "learning_rate": 4.947388948588604e-06, + "loss": 0.5447, + "step": 4393 + }, + { + "epoch": 0.4011320065729414, + "grad_norm": 0.46137186884880066, + "learning_rate": 4.947364521958464e-06, + "loss": 0.5873, + "step": 4394 + }, + { + "epoch": 0.401223297425598, + "grad_norm": 0.4794152081012726, + "learning_rate": 4.947340089719484e-06, + "loss": 0.5866, + "step": 4395 + }, + { + "epoch": 0.4013145882782545, + "grad_norm": 0.47418123483657837, + "learning_rate": 4.947315651871722e-06, + "loss": 0.598, + "step": 4396 + }, + { + "epoch": 0.4014058791309111, + "grad_norm": 0.49530115723609924, + "learning_rate": 4.947291208415231e-06, + "loss": 0.5955, + "step": 4397 + }, + { + "epoch": 0.40149716998356766, + "grad_norm": 0.4579787254333496, + "learning_rate": 4.947266759350069e-06, + "loss": 0.5903, + "step": 4398 + }, + { + "epoch": 0.40158846083622424, + "grad_norm": 0.4363807439804077, + "learning_rate": 4.947242304676292e-06, + "loss": 0.5802, + "step": 4399 + }, + { + "epoch": 0.40167975168888076, + "grad_norm": 0.4929395318031311, + "learning_rate": 4.947217844393955e-06, + "loss": 0.5651, + "step": 4400 + }, + { + "epoch": 0.40177104254153734, + "grad_norm": 0.5082393884658813, + "learning_rate": 4.947193378503115e-06, + "loss": 0.5674, + "step": 4401 + }, + { + "epoch": 0.4018623333941939, + "grad_norm": 0.49886417388916016, + "learning_rate": 4.9471689070038275e-06, + "loss": 0.5642, + "step": 4402 + }, + { + "epoch": 0.40195362424685044, + "grad_norm": 0.5002443194389343, + "learning_rate": 4.94714442989615e-06, + "loss": 0.5435, + "step": 4403 + }, + { + "epoch": 0.402044915099507, + "grad_norm": 0.49338576197624207, + "learning_rate": 4.947119947180137e-06, + "loss": 0.5756, + "step": 4404 + }, + { + "epoch": 0.4021362059521636, + "grad_norm": 0.49672046303749084, + "learning_rate": 4.947095458855844e-06, + "loss": 0.6026, + "step": 4405 + }, + { + "epoch": 0.4022274968048202, + "grad_norm": 0.4637942314147949, + "learning_rate": 4.947070964923329e-06, + "loss": 0.5513, + "step": 4406 + }, + { + "epoch": 0.4023187876574767, + "grad_norm": 0.47356486320495605, + "learning_rate": 4.947046465382647e-06, + "loss": 0.6324, + "step": 4407 + }, + { + "epoch": 0.4024100785101333, + "grad_norm": 0.4927634596824646, + "learning_rate": 4.947021960233855e-06, + "loss": 0.5452, + "step": 4408 + }, + { + "epoch": 0.40250136936278985, + "grad_norm": 0.4974652826786041, + "learning_rate": 4.946997449477008e-06, + "loss": 0.5931, + "step": 4409 + }, + { + "epoch": 0.40259266021544643, + "grad_norm": 0.48273077607154846, + "learning_rate": 4.9469729331121635e-06, + "loss": 0.5696, + "step": 4410 + }, + { + "epoch": 0.40268395106810295, + "grad_norm": 0.44702839851379395, + "learning_rate": 4.946948411139377e-06, + "loss": 0.5941, + "step": 4411 + }, + { + "epoch": 0.40277524192075953, + "grad_norm": 0.5081918835639954, + "learning_rate": 4.946923883558704e-06, + "loss": 0.5685, + "step": 4412 + }, + { + "epoch": 0.4028665327734161, + "grad_norm": 0.4733688235282898, + "learning_rate": 4.946899350370202e-06, + "loss": 0.5876, + "step": 4413 + }, + { + "epoch": 0.4029578236260727, + "grad_norm": 0.4657723605632782, + "learning_rate": 4.946874811573926e-06, + "loss": 0.5874, + "step": 4414 + }, + { + "epoch": 0.4030491144787292, + "grad_norm": 0.4518342614173889, + "learning_rate": 4.946850267169934e-06, + "loss": 0.6157, + "step": 4415 + }, + { + "epoch": 0.4031404053313858, + "grad_norm": 0.4967706799507141, + "learning_rate": 4.94682571715828e-06, + "loss": 0.5917, + "step": 4416 + }, + { + "epoch": 0.40323169618404237, + "grad_norm": 0.4798814058303833, + "learning_rate": 4.946801161539022e-06, + "loss": 0.5948, + "step": 4417 + }, + { + "epoch": 0.40332298703669894, + "grad_norm": 0.4741792678833008, + "learning_rate": 4.946776600312215e-06, + "loss": 0.573, + "step": 4418 + }, + { + "epoch": 0.40341427788935547, + "grad_norm": 0.4560060203075409, + "learning_rate": 4.946752033477917e-06, + "loss": 0.616, + "step": 4419 + }, + { + "epoch": 0.40350556874201204, + "grad_norm": 0.4393603801727295, + "learning_rate": 4.946727461036183e-06, + "loss": 0.6138, + "step": 4420 + }, + { + "epoch": 0.4035968595946686, + "grad_norm": 0.4753960371017456, + "learning_rate": 4.946702882987069e-06, + "loss": 0.5791, + "step": 4421 + }, + { + "epoch": 0.4036881504473252, + "grad_norm": 0.4788323938846588, + "learning_rate": 4.9466782993306314e-06, + "loss": 0.6091, + "step": 4422 + }, + { + "epoch": 0.4037794412999817, + "grad_norm": 0.47807034850120544, + "learning_rate": 4.946653710066928e-06, + "loss": 0.594, + "step": 4423 + }, + { + "epoch": 0.4038707321526383, + "grad_norm": 0.519965648651123, + "learning_rate": 4.946629115196014e-06, + "loss": 0.5418, + "step": 4424 + }, + { + "epoch": 0.4039620230052949, + "grad_norm": 0.47133851051330566, + "learning_rate": 4.946604514717945e-06, + "loss": 0.615, + "step": 4425 + }, + { + "epoch": 0.40405331385795146, + "grad_norm": 0.49337834119796753, + "learning_rate": 4.94657990863278e-06, + "loss": 0.571, + "step": 4426 + }, + { + "epoch": 0.404144604710608, + "grad_norm": 0.516957700252533, + "learning_rate": 4.946555296940573e-06, + "loss": 0.5728, + "step": 4427 + }, + { + "epoch": 0.40423589556326456, + "grad_norm": 0.48010799288749695, + "learning_rate": 4.946530679641381e-06, + "loss": 0.5734, + "step": 4428 + }, + { + "epoch": 0.40432718641592114, + "grad_norm": 0.4561481475830078, + "learning_rate": 4.946506056735261e-06, + "loss": 0.5823, + "step": 4429 + }, + { + "epoch": 0.4044184772685777, + "grad_norm": 0.4694233238697052, + "learning_rate": 4.946481428222268e-06, + "loss": 0.5992, + "step": 4430 + }, + { + "epoch": 0.40450976812123424, + "grad_norm": 0.48635947704315186, + "learning_rate": 4.94645679410246e-06, + "loss": 0.5773, + "step": 4431 + }, + { + "epoch": 0.4046010589738908, + "grad_norm": 0.46979594230651855, + "learning_rate": 4.946432154375893e-06, + "loss": 0.5958, + "step": 4432 + }, + { + "epoch": 0.4046923498265474, + "grad_norm": 0.4647224247455597, + "learning_rate": 4.946407509042623e-06, + "loss": 0.6388, + "step": 4433 + }, + { + "epoch": 0.40478364067920397, + "grad_norm": 0.4875044524669647, + "learning_rate": 4.946382858102707e-06, + "loss": 0.5757, + "step": 4434 + }, + { + "epoch": 0.4048749315318605, + "grad_norm": 0.4628136456012726, + "learning_rate": 4.946358201556202e-06, + "loss": 0.575, + "step": 4435 + }, + { + "epoch": 0.40496622238451707, + "grad_norm": 0.4980124533176422, + "learning_rate": 4.946333539403163e-06, + "loss": 0.5575, + "step": 4436 + }, + { + "epoch": 0.40505751323717365, + "grad_norm": 0.5141398906707764, + "learning_rate": 4.946308871643647e-06, + "loss": 0.5456, + "step": 4437 + }, + { + "epoch": 0.4051488040898302, + "grad_norm": 0.4867932200431824, + "learning_rate": 4.9462841982777125e-06, + "loss": 0.6014, + "step": 4438 + }, + { + "epoch": 0.40524009494248675, + "grad_norm": 0.4576658308506012, + "learning_rate": 4.946259519305413e-06, + "loss": 0.5997, + "step": 4439 + }, + { + "epoch": 0.40533138579514333, + "grad_norm": 0.5277939438819885, + "learning_rate": 4.946234834726806e-06, + "loss": 0.5637, + "step": 4440 + }, + { + "epoch": 0.4054226766477999, + "grad_norm": 0.48776841163635254, + "learning_rate": 4.94621014454195e-06, + "loss": 0.5686, + "step": 4441 + }, + { + "epoch": 0.40551396750045643, + "grad_norm": 0.48577895760536194, + "learning_rate": 4.946185448750899e-06, + "loss": 0.6223, + "step": 4442 + }, + { + "epoch": 0.405605258353113, + "grad_norm": 0.4958188831806183, + "learning_rate": 4.946160747353712e-06, + "loss": 0.5675, + "step": 4443 + }, + { + "epoch": 0.4056965492057696, + "grad_norm": 0.47651100158691406, + "learning_rate": 4.946136040350443e-06, + "loss": 0.5543, + "step": 4444 + }, + { + "epoch": 0.40578784005842616, + "grad_norm": 0.49033477902412415, + "learning_rate": 4.946111327741151e-06, + "loss": 0.6108, + "step": 4445 + }, + { + "epoch": 0.4058791309110827, + "grad_norm": 0.4712710380554199, + "learning_rate": 4.9460866095258906e-06, + "loss": 0.571, + "step": 4446 + }, + { + "epoch": 0.40597042176373926, + "grad_norm": 0.4103664755821228, + "learning_rate": 4.94606188570472e-06, + "loss": 0.5839, + "step": 4447 + }, + { + "epoch": 0.40606171261639584, + "grad_norm": 0.4721471667289734, + "learning_rate": 4.946037156277696e-06, + "loss": 0.5983, + "step": 4448 + }, + { + "epoch": 0.4061530034690524, + "grad_norm": 0.486253559589386, + "learning_rate": 4.946012421244874e-06, + "loss": 0.5944, + "step": 4449 + }, + { + "epoch": 0.40624429432170894, + "grad_norm": 0.47195863723754883, + "learning_rate": 4.945987680606311e-06, + "loss": 0.5909, + "step": 4450 + }, + { + "epoch": 0.4063355851743655, + "grad_norm": 0.5070449113845825, + "learning_rate": 4.945962934362064e-06, + "loss": 0.5516, + "step": 4451 + }, + { + "epoch": 0.4064268760270221, + "grad_norm": 0.4993434250354767, + "learning_rate": 4.945938182512191e-06, + "loss": 0.5725, + "step": 4452 + }, + { + "epoch": 0.4065181668796787, + "grad_norm": 0.4664149880409241, + "learning_rate": 4.9459134250567455e-06, + "loss": 0.559, + "step": 4453 + }, + { + "epoch": 0.4066094577323352, + "grad_norm": 0.4469369351863861, + "learning_rate": 4.945888661995788e-06, + "loss": 0.5985, + "step": 4454 + }, + { + "epoch": 0.4067007485849918, + "grad_norm": 0.48394304513931274, + "learning_rate": 4.945863893329372e-06, + "loss": 0.6191, + "step": 4455 + }, + { + "epoch": 0.40679203943764836, + "grad_norm": 0.49601680040359497, + "learning_rate": 4.945839119057556e-06, + "loss": 0.5876, + "step": 4456 + }, + { + "epoch": 0.40688333029030493, + "grad_norm": 0.4804690182209015, + "learning_rate": 4.945814339180397e-06, + "loss": 0.6072, + "step": 4457 + }, + { + "epoch": 0.40697462114296146, + "grad_norm": 0.45565417408943176, + "learning_rate": 4.945789553697951e-06, + "loss": 0.5929, + "step": 4458 + }, + { + "epoch": 0.40706591199561803, + "grad_norm": 0.45308488607406616, + "learning_rate": 4.9457647626102745e-06, + "loss": 0.6161, + "step": 4459 + }, + { + "epoch": 0.4071572028482746, + "grad_norm": 0.48810291290283203, + "learning_rate": 4.9457399659174256e-06, + "loss": 0.557, + "step": 4460 + }, + { + "epoch": 0.4072484937009312, + "grad_norm": 0.45909473299980164, + "learning_rate": 4.94571516361946e-06, + "loss": 0.6018, + "step": 4461 + }, + { + "epoch": 0.4073397845535877, + "grad_norm": 0.4529067575931549, + "learning_rate": 4.945690355716435e-06, + "loss": 0.5834, + "step": 4462 + }, + { + "epoch": 0.4074310754062443, + "grad_norm": 0.522181510925293, + "learning_rate": 4.945665542208406e-06, + "loss": 0.5525, + "step": 4463 + }, + { + "epoch": 0.40752236625890087, + "grad_norm": 0.47789958119392395, + "learning_rate": 4.945640723095434e-06, + "loss": 0.6302, + "step": 4464 + }, + { + "epoch": 0.40761365711155745, + "grad_norm": 0.4722321629524231, + "learning_rate": 4.945615898377571e-06, + "loss": 0.5688, + "step": 4465 + }, + { + "epoch": 0.40770494796421397, + "grad_norm": 0.47465208172798157, + "learning_rate": 4.945591068054877e-06, + "loss": 0.6064, + "step": 4466 + }, + { + "epoch": 0.40779623881687055, + "grad_norm": 0.45968514680862427, + "learning_rate": 4.945566232127408e-06, + "loss": 0.6117, + "step": 4467 + }, + { + "epoch": 0.4078875296695271, + "grad_norm": 0.5037714242935181, + "learning_rate": 4.94554139059522e-06, + "loss": 0.5906, + "step": 4468 + }, + { + "epoch": 0.40797882052218365, + "grad_norm": 0.48313525319099426, + "learning_rate": 4.945516543458372e-06, + "loss": 0.5744, + "step": 4469 + }, + { + "epoch": 0.4080701113748402, + "grad_norm": 0.4985443949699402, + "learning_rate": 4.945491690716919e-06, + "loss": 0.5708, + "step": 4470 + }, + { + "epoch": 0.4081614022274968, + "grad_norm": 0.4898921847343445, + "learning_rate": 4.9454668323709186e-06, + "loss": 0.5781, + "step": 4471 + }, + { + "epoch": 0.4082526930801534, + "grad_norm": 0.4690345823764801, + "learning_rate": 4.945441968420428e-06, + "loss": 0.5605, + "step": 4472 + }, + { + "epoch": 0.4083439839328099, + "grad_norm": 0.4583304822444916, + "learning_rate": 4.945417098865504e-06, + "loss": 0.5831, + "step": 4473 + }, + { + "epoch": 0.4084352747854665, + "grad_norm": 0.4524518549442291, + "learning_rate": 4.945392223706204e-06, + "loss": 0.5873, + "step": 4474 + }, + { + "epoch": 0.40852656563812306, + "grad_norm": 0.48284655809402466, + "learning_rate": 4.945367342942584e-06, + "loss": 0.5935, + "step": 4475 + }, + { + "epoch": 0.40861785649077964, + "grad_norm": 0.5028814673423767, + "learning_rate": 4.945342456574702e-06, + "loss": 0.5829, + "step": 4476 + }, + { + "epoch": 0.40870914734343616, + "grad_norm": 0.4645330607891083, + "learning_rate": 4.945317564602615e-06, + "loss": 0.5861, + "step": 4477 + }, + { + "epoch": 0.40880043819609274, + "grad_norm": 0.454140841960907, + "learning_rate": 4.94529266702638e-06, + "loss": 0.5875, + "step": 4478 + }, + { + "epoch": 0.4088917290487493, + "grad_norm": 0.4420640766620636, + "learning_rate": 4.945267763846054e-06, + "loss": 0.5863, + "step": 4479 + }, + { + "epoch": 0.4089830199014059, + "grad_norm": 0.467073529958725, + "learning_rate": 4.945242855061692e-06, + "loss": 0.5678, + "step": 4480 + }, + { + "epoch": 0.4090743107540624, + "grad_norm": 0.4614661633968353, + "learning_rate": 4.945217940673355e-06, + "loss": 0.6072, + "step": 4481 + }, + { + "epoch": 0.409165601606719, + "grad_norm": 0.4879530072212219, + "learning_rate": 4.945193020681097e-06, + "loss": 0.582, + "step": 4482 + }, + { + "epoch": 0.4092568924593756, + "grad_norm": 0.49943625926971436, + "learning_rate": 4.945168095084976e-06, + "loss": 0.5529, + "step": 4483 + }, + { + "epoch": 0.40934818331203215, + "grad_norm": 0.48123911023139954, + "learning_rate": 4.94514316388505e-06, + "loss": 0.5892, + "step": 4484 + }, + { + "epoch": 0.4094394741646887, + "grad_norm": 0.47406116127967834, + "learning_rate": 4.945118227081375e-06, + "loss": 0.5957, + "step": 4485 + }, + { + "epoch": 0.40953076501734526, + "grad_norm": 0.5074129104614258, + "learning_rate": 4.945093284674009e-06, + "loss": 0.5492, + "step": 4486 + }, + { + "epoch": 0.40962205587000183, + "grad_norm": 0.4659130871295929, + "learning_rate": 4.945068336663008e-06, + "loss": 0.5877, + "step": 4487 + }, + { + "epoch": 0.4097133467226584, + "grad_norm": 0.4655013978481293, + "learning_rate": 4.94504338304843e-06, + "loss": 0.5952, + "step": 4488 + }, + { + "epoch": 0.40980463757531493, + "grad_norm": 0.48271921277046204, + "learning_rate": 4.9450184238303325e-06, + "loss": 0.5792, + "step": 4489 + }, + { + "epoch": 0.4098959284279715, + "grad_norm": 0.44989171624183655, + "learning_rate": 4.944993459008772e-06, + "loss": 0.6024, + "step": 4490 + }, + { + "epoch": 0.4099872192806281, + "grad_norm": 0.47570326924324036, + "learning_rate": 4.9449684885838066e-06, + "loss": 0.5816, + "step": 4491 + }, + { + "epoch": 0.41007851013328467, + "grad_norm": 0.4954683184623718, + "learning_rate": 4.944943512555493e-06, + "loss": 0.5284, + "step": 4492 + }, + { + "epoch": 0.4101698009859412, + "grad_norm": 0.47905412316322327, + "learning_rate": 4.944918530923888e-06, + "loss": 0.5793, + "step": 4493 + }, + { + "epoch": 0.41026109183859777, + "grad_norm": 0.5096309185028076, + "learning_rate": 4.944893543689049e-06, + "loss": 0.611, + "step": 4494 + }, + { + "epoch": 0.41035238269125435, + "grad_norm": 0.5028529167175293, + "learning_rate": 4.944868550851034e-06, + "loss": 0.553, + "step": 4495 + }, + { + "epoch": 0.4104436735439109, + "grad_norm": 0.46258437633514404, + "learning_rate": 4.9448435524099005e-06, + "loss": 0.602, + "step": 4496 + }, + { + "epoch": 0.41053496439656745, + "grad_norm": 0.4643208980560303, + "learning_rate": 4.9448185483657035e-06, + "loss": 0.5551, + "step": 4497 + }, + { + "epoch": 0.410626255249224, + "grad_norm": 0.47520434856414795, + "learning_rate": 4.944793538718503e-06, + "loss": 0.6275, + "step": 4498 + }, + { + "epoch": 0.4107175461018806, + "grad_norm": 0.47931942343711853, + "learning_rate": 4.944768523468355e-06, + "loss": 0.6062, + "step": 4499 + }, + { + "epoch": 0.4108088369545372, + "grad_norm": 0.4620901942253113, + "learning_rate": 4.944743502615317e-06, + "loss": 0.5717, + "step": 4500 + }, + { + "epoch": 0.4109001278071937, + "grad_norm": 0.47074493765830994, + "learning_rate": 4.9447184761594454e-06, + "loss": 0.593, + "step": 4501 + }, + { + "epoch": 0.4109914186598503, + "grad_norm": 0.45173919200897217, + "learning_rate": 4.9446934441008e-06, + "loss": 0.6158, + "step": 4502 + }, + { + "epoch": 0.41108270951250686, + "grad_norm": 0.4708966016769409, + "learning_rate": 4.944668406439436e-06, + "loss": 0.5994, + "step": 4503 + }, + { + "epoch": 0.4111740003651634, + "grad_norm": 0.4754493236541748, + "learning_rate": 4.944643363175411e-06, + "loss": 0.5808, + "step": 4504 + }, + { + "epoch": 0.41126529121781996, + "grad_norm": 0.4861278831958771, + "learning_rate": 4.9446183143087835e-06, + "loss": 0.5433, + "step": 4505 + }, + { + "epoch": 0.41135658207047654, + "grad_norm": 0.45680704712867737, + "learning_rate": 4.944593259839611e-06, + "loss": 0.5615, + "step": 4506 + }, + { + "epoch": 0.4114478729231331, + "grad_norm": 0.45038992166519165, + "learning_rate": 4.9445681997679484e-06, + "loss": 0.59, + "step": 4507 + }, + { + "epoch": 0.41153916377578964, + "grad_norm": 0.4691777229309082, + "learning_rate": 4.944543134093856e-06, + "loss": 0.5985, + "step": 4508 + }, + { + "epoch": 0.4116304546284462, + "grad_norm": 0.46402427554130554, + "learning_rate": 4.9445180628173905e-06, + "loss": 0.5518, + "step": 4509 + }, + { + "epoch": 0.4117217454811028, + "grad_norm": 0.4578935205936432, + "learning_rate": 4.944492985938608e-06, + "loss": 0.5807, + "step": 4510 + }, + { + "epoch": 0.4118130363337594, + "grad_norm": 0.45398691296577454, + "learning_rate": 4.944467903457568e-06, + "loss": 0.6011, + "step": 4511 + }, + { + "epoch": 0.4119043271864159, + "grad_norm": 0.4778085947036743, + "learning_rate": 4.944442815374327e-06, + "loss": 0.5983, + "step": 4512 + }, + { + "epoch": 0.4119956180390725, + "grad_norm": 0.46987301111221313, + "learning_rate": 4.9444177216889426e-06, + "loss": 0.5874, + "step": 4513 + }, + { + "epoch": 0.41208690889172905, + "grad_norm": 0.4619865417480469, + "learning_rate": 4.944392622401471e-06, + "loss": 0.5707, + "step": 4514 + }, + { + "epoch": 0.41217819974438563, + "grad_norm": 0.513526439666748, + "learning_rate": 4.9443675175119715e-06, + "loss": 0.5646, + "step": 4515 + }, + { + "epoch": 0.41226949059704215, + "grad_norm": 0.448363721370697, + "learning_rate": 4.944342407020501e-06, + "loss": 0.5992, + "step": 4516 + }, + { + "epoch": 0.41236078144969873, + "grad_norm": 0.45611587166786194, + "learning_rate": 4.9443172909271174e-06, + "loss": 0.6241, + "step": 4517 + }, + { + "epoch": 0.4124520723023553, + "grad_norm": 0.4546946585178375, + "learning_rate": 4.944292169231878e-06, + "loss": 0.6031, + "step": 4518 + }, + { + "epoch": 0.4125433631550119, + "grad_norm": 0.4883200526237488, + "learning_rate": 4.94426704193484e-06, + "loss": 0.5857, + "step": 4519 + }, + { + "epoch": 0.4126346540076684, + "grad_norm": 0.49559643864631653, + "learning_rate": 4.944241909036062e-06, + "loss": 0.5635, + "step": 4520 + }, + { + "epoch": 0.412725944860325, + "grad_norm": 0.44929221272468567, + "learning_rate": 4.944216770535601e-06, + "loss": 0.586, + "step": 4521 + }, + { + "epoch": 0.41281723571298157, + "grad_norm": 0.4569617509841919, + "learning_rate": 4.944191626433513e-06, + "loss": 0.5907, + "step": 4522 + }, + { + "epoch": 0.41290852656563815, + "grad_norm": 0.48181670904159546, + "learning_rate": 4.944166476729858e-06, + "loss": 0.535, + "step": 4523 + }, + { + "epoch": 0.41299981741829467, + "grad_norm": 0.45303797721862793, + "learning_rate": 4.944141321424694e-06, + "loss": 0.6324, + "step": 4524 + }, + { + "epoch": 0.41309110827095125, + "grad_norm": 0.5140052437782288, + "learning_rate": 4.944116160518076e-06, + "loss": 0.6128, + "step": 4525 + }, + { + "epoch": 0.4131823991236078, + "grad_norm": 0.4697018265724182, + "learning_rate": 4.9440909940100636e-06, + "loss": 0.5603, + "step": 4526 + }, + { + "epoch": 0.4132736899762644, + "grad_norm": 0.45024970173835754, + "learning_rate": 4.944065821900713e-06, + "loss": 0.5328, + "step": 4527 + }, + { + "epoch": 0.4133649808289209, + "grad_norm": 0.4676574170589447, + "learning_rate": 4.944040644190084e-06, + "loss": 0.6006, + "step": 4528 + }, + { + "epoch": 0.4134562716815775, + "grad_norm": 0.5148577690124512, + "learning_rate": 4.944015460878233e-06, + "loss": 0.5182, + "step": 4529 + }, + { + "epoch": 0.4135475625342341, + "grad_norm": 0.4891408383846283, + "learning_rate": 4.943990271965218e-06, + "loss": 0.5938, + "step": 4530 + }, + { + "epoch": 0.41363885338689066, + "grad_norm": 0.4495580494403839, + "learning_rate": 4.943965077451096e-06, + "loss": 0.6158, + "step": 4531 + }, + { + "epoch": 0.4137301442395472, + "grad_norm": 0.4933204650878906, + "learning_rate": 4.9439398773359256e-06, + "loss": 0.5692, + "step": 4532 + }, + { + "epoch": 0.41382143509220376, + "grad_norm": 0.4839233458042145, + "learning_rate": 4.943914671619765e-06, + "loss": 0.5678, + "step": 4533 + }, + { + "epoch": 0.41391272594486034, + "grad_norm": 0.47123828530311584, + "learning_rate": 4.943889460302671e-06, + "loss": 0.5648, + "step": 4534 + }, + { + "epoch": 0.4140040167975169, + "grad_norm": 0.4553075134754181, + "learning_rate": 4.943864243384701e-06, + "loss": 0.5846, + "step": 4535 + }, + { + "epoch": 0.41409530765017344, + "grad_norm": 0.4973938465118408, + "learning_rate": 4.943839020865915e-06, + "loss": 0.5892, + "step": 4536 + }, + { + "epoch": 0.41418659850283, + "grad_norm": 0.46481946110725403, + "learning_rate": 4.943813792746367e-06, + "loss": 0.5644, + "step": 4537 + }, + { + "epoch": 0.4142778893554866, + "grad_norm": 0.4799150824546814, + "learning_rate": 4.943788559026119e-06, + "loss": 0.5723, + "step": 4538 + }, + { + "epoch": 0.4143691802081431, + "grad_norm": 0.48294949531555176, + "learning_rate": 4.943763319705226e-06, + "loss": 0.585, + "step": 4539 + }, + { + "epoch": 0.4144604710607997, + "grad_norm": 0.4761234223842621, + "learning_rate": 4.9437380747837465e-06, + "loss": 0.5848, + "step": 4540 + }, + { + "epoch": 0.4145517619134563, + "grad_norm": 0.479470431804657, + "learning_rate": 4.94371282426174e-06, + "loss": 0.6164, + "step": 4541 + }, + { + "epoch": 0.41464305276611285, + "grad_norm": 0.4782920181751251, + "learning_rate": 4.943687568139261e-06, + "loss": 0.5843, + "step": 4542 + }, + { + "epoch": 0.4147343436187694, + "grad_norm": 0.4545917510986328, + "learning_rate": 4.94366230641637e-06, + "loss": 0.6173, + "step": 4543 + }, + { + "epoch": 0.41482563447142595, + "grad_norm": 0.4522324800491333, + "learning_rate": 4.943637039093125e-06, + "loss": 0.6238, + "step": 4544 + }, + { + "epoch": 0.41491692532408253, + "grad_norm": 0.46290746331214905, + "learning_rate": 4.943611766169583e-06, + "loss": 0.5806, + "step": 4545 + }, + { + "epoch": 0.4150082161767391, + "grad_norm": 0.45545342564582825, + "learning_rate": 4.943586487645802e-06, + "loss": 0.5943, + "step": 4546 + }, + { + "epoch": 0.41509950702939563, + "grad_norm": 0.48169517517089844, + "learning_rate": 4.94356120352184e-06, + "loss": 0.5347, + "step": 4547 + }, + { + "epoch": 0.4151907978820522, + "grad_norm": 0.46752986311912537, + "learning_rate": 4.943535913797754e-06, + "loss": 0.5929, + "step": 4548 + }, + { + "epoch": 0.4152820887347088, + "grad_norm": 0.4793696999549866, + "learning_rate": 4.943510618473604e-06, + "loss": 0.5832, + "step": 4549 + }, + { + "epoch": 0.41537337958736537, + "grad_norm": 0.44650763273239136, + "learning_rate": 4.943485317549446e-06, + "loss": 0.6508, + "step": 4550 + }, + { + "epoch": 0.4154646704400219, + "grad_norm": 0.4716206192970276, + "learning_rate": 4.943460011025339e-06, + "loss": 0.5216, + "step": 4551 + }, + { + "epoch": 0.41555596129267847, + "grad_norm": 0.48047155141830444, + "learning_rate": 4.9434346989013416e-06, + "loss": 0.5693, + "step": 4552 + }, + { + "epoch": 0.41564725214533504, + "grad_norm": 0.5191252827644348, + "learning_rate": 4.943409381177511e-06, + "loss": 0.5716, + "step": 4553 + }, + { + "epoch": 0.4157385429979916, + "grad_norm": 0.489953875541687, + "learning_rate": 4.943384057853905e-06, + "loss": 0.5577, + "step": 4554 + }, + { + "epoch": 0.41582983385064815, + "grad_norm": 0.5107035040855408, + "learning_rate": 4.943358728930582e-06, + "loss": 0.5578, + "step": 4555 + }, + { + "epoch": 0.4159211247033047, + "grad_norm": 0.5203949213027954, + "learning_rate": 4.9433333944076e-06, + "loss": 0.5474, + "step": 4556 + }, + { + "epoch": 0.4160124155559613, + "grad_norm": 0.4889354705810547, + "learning_rate": 4.943308054285017e-06, + "loss": 0.556, + "step": 4557 + }, + { + "epoch": 0.4161037064086179, + "grad_norm": 0.4769100844860077, + "learning_rate": 4.943282708562891e-06, + "loss": 0.602, + "step": 4558 + }, + { + "epoch": 0.4161949972612744, + "grad_norm": 0.4801377058029175, + "learning_rate": 4.943257357241281e-06, + "loss": 0.6143, + "step": 4559 + }, + { + "epoch": 0.416286288113931, + "grad_norm": 0.4808768630027771, + "learning_rate": 4.943232000320243e-06, + "loss": 0.512, + "step": 4560 + }, + { + "epoch": 0.41637757896658756, + "grad_norm": 0.47150081396102905, + "learning_rate": 4.943206637799837e-06, + "loss": 0.5884, + "step": 4561 + }, + { + "epoch": 0.41646886981924414, + "grad_norm": 0.47526979446411133, + "learning_rate": 4.943181269680121e-06, + "loss": 0.6256, + "step": 4562 + }, + { + "epoch": 0.41656016067190066, + "grad_norm": 0.47925102710723877, + "learning_rate": 4.943155895961152e-06, + "loss": 0.5162, + "step": 4563 + }, + { + "epoch": 0.41665145152455724, + "grad_norm": 0.48005741834640503, + "learning_rate": 4.943130516642989e-06, + "loss": 0.599, + "step": 4564 + }, + { + "epoch": 0.4167427423772138, + "grad_norm": 0.45252785086631775, + "learning_rate": 4.943105131725691e-06, + "loss": 0.5745, + "step": 4565 + }, + { + "epoch": 0.4168340332298704, + "grad_norm": 0.44481223821640015, + "learning_rate": 4.943079741209313e-06, + "loss": 0.6225, + "step": 4566 + }, + { + "epoch": 0.4169253240825269, + "grad_norm": 0.4590763747692108, + "learning_rate": 4.943054345093917e-06, + "loss": 0.6013, + "step": 4567 + }, + { + "epoch": 0.4170166149351835, + "grad_norm": 0.4662605822086334, + "learning_rate": 4.94302894337956e-06, + "loss": 0.5933, + "step": 4568 + }, + { + "epoch": 0.41710790578784007, + "grad_norm": 0.4654122591018677, + "learning_rate": 4.943003536066299e-06, + "loss": 0.5419, + "step": 4569 + }, + { + "epoch": 0.4171991966404966, + "grad_norm": 0.4621468782424927, + "learning_rate": 4.9429781231541925e-06, + "loss": 0.5792, + "step": 4570 + }, + { + "epoch": 0.4172904874931532, + "grad_norm": 0.4828449785709381, + "learning_rate": 4.9429527046433e-06, + "loss": 0.6025, + "step": 4571 + }, + { + "epoch": 0.41738177834580975, + "grad_norm": 0.47575491666793823, + "learning_rate": 4.942927280533679e-06, + "loss": 0.555, + "step": 4572 + }, + { + "epoch": 0.41747306919846633, + "grad_norm": 0.45552974939346313, + "learning_rate": 4.9429018508253865e-06, + "loss": 0.6301, + "step": 4573 + }, + { + "epoch": 0.41756436005112285, + "grad_norm": 0.5010495781898499, + "learning_rate": 4.9428764155184835e-06, + "loss": 0.5477, + "step": 4574 + }, + { + "epoch": 0.41765565090377943, + "grad_norm": 0.48649007081985474, + "learning_rate": 4.942850974613027e-06, + "loss": 0.56, + "step": 4575 + }, + { + "epoch": 0.417746941756436, + "grad_norm": 0.479426771402359, + "learning_rate": 4.942825528109074e-06, + "loss": 0.5865, + "step": 4576 + }, + { + "epoch": 0.4178382326090926, + "grad_norm": 0.5047967433929443, + "learning_rate": 4.942800076006685e-06, + "loss": 0.5503, + "step": 4577 + }, + { + "epoch": 0.4179295234617491, + "grad_norm": 0.4555743932723999, + "learning_rate": 4.9427746183059165e-06, + "loss": 0.5519, + "step": 4578 + }, + { + "epoch": 0.4180208143144057, + "grad_norm": 0.48424315452575684, + "learning_rate": 4.942749155006829e-06, + "loss": 0.5853, + "step": 4579 + }, + { + "epoch": 0.41811210516706226, + "grad_norm": 0.47826850414276123, + "learning_rate": 4.942723686109477e-06, + "loss": 0.5795, + "step": 4580 + }, + { + "epoch": 0.41820339601971884, + "grad_norm": 0.48555776476860046, + "learning_rate": 4.942698211613923e-06, + "loss": 0.6353, + "step": 4581 + }, + { + "epoch": 0.41829468687237537, + "grad_norm": 0.44426679611206055, + "learning_rate": 4.9426727315202235e-06, + "loss": 0.5603, + "step": 4582 + }, + { + "epoch": 0.41838597772503194, + "grad_norm": 0.4665086269378662, + "learning_rate": 4.942647245828438e-06, + "loss": 0.6231, + "step": 4583 + }, + { + "epoch": 0.4184772685776885, + "grad_norm": 0.4699735641479492, + "learning_rate": 4.942621754538624e-06, + "loss": 0.5798, + "step": 4584 + }, + { + "epoch": 0.4185685594303451, + "grad_norm": 0.4894944131374359, + "learning_rate": 4.942596257650838e-06, + "loss": 0.5753, + "step": 4585 + }, + { + "epoch": 0.4186598502830016, + "grad_norm": 0.5209959149360657, + "learning_rate": 4.942570755165142e-06, + "loss": 0.5329, + "step": 4586 + }, + { + "epoch": 0.4187511411356582, + "grad_norm": 0.4976615011692047, + "learning_rate": 4.942545247081593e-06, + "loss": 0.6315, + "step": 4587 + }, + { + "epoch": 0.4188424319883148, + "grad_norm": 0.49282383918762207, + "learning_rate": 4.9425197334002485e-06, + "loss": 0.5669, + "step": 4588 + }, + { + "epoch": 0.41893372284097136, + "grad_norm": 0.4704790413379669, + "learning_rate": 4.942494214121168e-06, + "loss": 0.5798, + "step": 4589 + }, + { + "epoch": 0.4190250136936279, + "grad_norm": 0.45732420682907104, + "learning_rate": 4.94246868924441e-06, + "loss": 0.5823, + "step": 4590 + }, + { + "epoch": 0.41911630454628446, + "grad_norm": 0.47733527421951294, + "learning_rate": 4.942443158770033e-06, + "loss": 0.5858, + "step": 4591 + }, + { + "epoch": 0.41920759539894104, + "grad_norm": 0.49334654211997986, + "learning_rate": 4.942417622698095e-06, + "loss": 0.5698, + "step": 4592 + }, + { + "epoch": 0.4192988862515976, + "grad_norm": 0.4583303928375244, + "learning_rate": 4.942392081028654e-06, + "loss": 0.592, + "step": 4593 + }, + { + "epoch": 0.41939017710425414, + "grad_norm": 0.5107859969139099, + "learning_rate": 4.94236653376177e-06, + "loss": 0.553, + "step": 4594 + }, + { + "epoch": 0.4194814679569107, + "grad_norm": 0.48658543825149536, + "learning_rate": 4.942340980897501e-06, + "loss": 0.5472, + "step": 4595 + }, + { + "epoch": 0.4195727588095673, + "grad_norm": 0.48184165358543396, + "learning_rate": 4.9423154224359055e-06, + "loss": 0.5626, + "step": 4596 + }, + { + "epoch": 0.41966404966222387, + "grad_norm": 0.48466756939888, + "learning_rate": 4.942289858377042e-06, + "loss": 0.598, + "step": 4597 + }, + { + "epoch": 0.4197553405148804, + "grad_norm": 0.4389369487762451, + "learning_rate": 4.942264288720968e-06, + "loss": 0.6077, + "step": 4598 + }, + { + "epoch": 0.41984663136753697, + "grad_norm": 0.4538588225841522, + "learning_rate": 4.942238713467745e-06, + "loss": 0.6473, + "step": 4599 + }, + { + "epoch": 0.41993792222019355, + "grad_norm": 0.4373544752597809, + "learning_rate": 4.942213132617429e-06, + "loss": 0.5942, + "step": 4600 + }, + { + "epoch": 0.4200292130728501, + "grad_norm": 0.45767268538475037, + "learning_rate": 4.942187546170079e-06, + "loss": 0.6274, + "step": 4601 + }, + { + "epoch": 0.42012050392550665, + "grad_norm": 0.4882144629955292, + "learning_rate": 4.9421619541257545e-06, + "loss": 0.5622, + "step": 4602 + }, + { + "epoch": 0.42021179477816323, + "grad_norm": 0.5050222873687744, + "learning_rate": 4.9421363564845145e-06, + "loss": 0.5678, + "step": 4603 + }, + { + "epoch": 0.4203030856308198, + "grad_norm": 0.5091208219528198, + "learning_rate": 4.942110753246416e-06, + "loss": 0.5452, + "step": 4604 + }, + { + "epoch": 0.42039437648347633, + "grad_norm": 0.44629618525505066, + "learning_rate": 4.942085144411519e-06, + "loss": 0.5917, + "step": 4605 + }, + { + "epoch": 0.4204856673361329, + "grad_norm": 0.4747890532016754, + "learning_rate": 4.942059529979881e-06, + "loss": 0.5808, + "step": 4606 + }, + { + "epoch": 0.4205769581887895, + "grad_norm": 0.45701146125793457, + "learning_rate": 4.942033909951562e-06, + "loss": 0.5796, + "step": 4607 + }, + { + "epoch": 0.42066824904144606, + "grad_norm": 0.48538053035736084, + "learning_rate": 4.94200828432662e-06, + "loss": 0.5927, + "step": 4608 + }, + { + "epoch": 0.4207595398941026, + "grad_norm": 0.5267253518104553, + "learning_rate": 4.941982653105115e-06, + "loss": 0.5574, + "step": 4609 + }, + { + "epoch": 0.42085083074675916, + "grad_norm": 0.46127641201019287, + "learning_rate": 4.941957016287104e-06, + "loss": 0.5814, + "step": 4610 + }, + { + "epoch": 0.42094212159941574, + "grad_norm": 0.5336357355117798, + "learning_rate": 4.941931373872646e-06, + "loss": 0.5892, + "step": 4611 + }, + { + "epoch": 0.4210334124520723, + "grad_norm": 0.5133340954780579, + "learning_rate": 4.9419057258618e-06, + "loss": 0.557, + "step": 4612 + }, + { + "epoch": 0.42112470330472884, + "grad_norm": 0.4790208041667938, + "learning_rate": 4.9418800722546255e-06, + "loss": 0.6041, + "step": 4613 + }, + { + "epoch": 0.4212159941573854, + "grad_norm": 0.451053649187088, + "learning_rate": 4.94185441305118e-06, + "loss": 0.5791, + "step": 4614 + }, + { + "epoch": 0.421307285010042, + "grad_norm": 0.49131742119789124, + "learning_rate": 4.941828748251523e-06, + "loss": 0.5666, + "step": 4615 + }, + { + "epoch": 0.4213985758626986, + "grad_norm": 0.4862695634365082, + "learning_rate": 4.941803077855715e-06, + "loss": 0.5119, + "step": 4616 + }, + { + "epoch": 0.4214898667153551, + "grad_norm": 0.4687332212924957, + "learning_rate": 4.941777401863812e-06, + "loss": 0.5528, + "step": 4617 + }, + { + "epoch": 0.4215811575680117, + "grad_norm": 0.47901248931884766, + "learning_rate": 4.9417517202758744e-06, + "loss": 0.5162, + "step": 4618 + }, + { + "epoch": 0.42167244842066826, + "grad_norm": 0.497114896774292, + "learning_rate": 4.94172603309196e-06, + "loss": 0.5776, + "step": 4619 + }, + { + "epoch": 0.42176373927332483, + "grad_norm": 0.5103745460510254, + "learning_rate": 4.94170034031213e-06, + "loss": 0.5704, + "step": 4620 + }, + { + "epoch": 0.42185503012598136, + "grad_norm": 0.4693871736526489, + "learning_rate": 4.94167464193644e-06, + "loss": 0.603, + "step": 4621 + }, + { + "epoch": 0.42194632097863793, + "grad_norm": 0.48183536529541016, + "learning_rate": 4.941648937964951e-06, + "loss": 0.5916, + "step": 4622 + }, + { + "epoch": 0.4220376118312945, + "grad_norm": 0.45792651176452637, + "learning_rate": 4.941623228397722e-06, + "loss": 0.6012, + "step": 4623 + }, + { + "epoch": 0.4221289026839511, + "grad_norm": 0.4669925272464752, + "learning_rate": 4.941597513234811e-06, + "loss": 0.5843, + "step": 4624 + }, + { + "epoch": 0.4222201935366076, + "grad_norm": 0.472768098115921, + "learning_rate": 4.941571792476278e-06, + "loss": 0.5996, + "step": 4625 + }, + { + "epoch": 0.4223114843892642, + "grad_norm": 0.46685585379600525, + "learning_rate": 4.9415460661221795e-06, + "loss": 0.5669, + "step": 4626 + }, + { + "epoch": 0.42240277524192077, + "grad_norm": 0.49324721097946167, + "learning_rate": 4.941520334172577e-06, + "loss": 0.5364, + "step": 4627 + }, + { + "epoch": 0.42249406609457735, + "grad_norm": 0.45219478011131287, + "learning_rate": 4.94149459662753e-06, + "loss": 0.5892, + "step": 4628 + }, + { + "epoch": 0.42258535694723387, + "grad_norm": 0.5059217810630798, + "learning_rate": 4.941468853487094e-06, + "loss": 0.6104, + "step": 4629 + }, + { + "epoch": 0.42267664779989045, + "grad_norm": 0.4958425462245941, + "learning_rate": 4.941443104751332e-06, + "loss": 0.5779, + "step": 4630 + }, + { + "epoch": 0.422767938652547, + "grad_norm": 0.47100284695625305, + "learning_rate": 4.9414173504203006e-06, + "loss": 0.5651, + "step": 4631 + }, + { + "epoch": 0.4228592295052036, + "grad_norm": 0.47921061515808105, + "learning_rate": 4.941391590494059e-06, + "loss": 0.5773, + "step": 4632 + }, + { + "epoch": 0.4229505203578601, + "grad_norm": 0.4601290225982666, + "learning_rate": 4.941365824972667e-06, + "loss": 0.5651, + "step": 4633 + }, + { + "epoch": 0.4230418112105167, + "grad_norm": 0.4548417031764984, + "learning_rate": 4.941340053856183e-06, + "loss": 0.5878, + "step": 4634 + }, + { + "epoch": 0.4231331020631733, + "grad_norm": 0.4496326148509979, + "learning_rate": 4.941314277144667e-06, + "loss": 0.6017, + "step": 4635 + }, + { + "epoch": 0.42322439291582986, + "grad_norm": 0.46472838521003723, + "learning_rate": 4.941288494838177e-06, + "loss": 0.605, + "step": 4636 + }, + { + "epoch": 0.4233156837684864, + "grad_norm": 0.4934650659561157, + "learning_rate": 4.941262706936772e-06, + "loss": 0.5483, + "step": 4637 + }, + { + "epoch": 0.42340697462114296, + "grad_norm": 0.46709176898002625, + "learning_rate": 4.941236913440513e-06, + "loss": 0.594, + "step": 4638 + }, + { + "epoch": 0.42349826547379954, + "grad_norm": 0.5288203358650208, + "learning_rate": 4.941211114349457e-06, + "loss": 0.5368, + "step": 4639 + }, + { + "epoch": 0.42358955632645606, + "grad_norm": 0.4654332399368286, + "learning_rate": 4.941185309663663e-06, + "loss": 0.5745, + "step": 4640 + }, + { + "epoch": 0.42368084717911264, + "grad_norm": 0.500820517539978, + "learning_rate": 4.9411594993831926e-06, + "loss": 0.5768, + "step": 4641 + }, + { + "epoch": 0.4237721380317692, + "grad_norm": 0.4974794387817383, + "learning_rate": 4.941133683508102e-06, + "loss": 0.5674, + "step": 4642 + }, + { + "epoch": 0.4238634288844258, + "grad_norm": 0.5245096683502197, + "learning_rate": 4.941107862038453e-06, + "loss": 0.5386, + "step": 4643 + }, + { + "epoch": 0.4239547197370823, + "grad_norm": 0.49174875020980835, + "learning_rate": 4.941082034974302e-06, + "loss": 0.5765, + "step": 4644 + }, + { + "epoch": 0.4240460105897389, + "grad_norm": 0.474403440952301, + "learning_rate": 4.9410562023157105e-06, + "loss": 0.5592, + "step": 4645 + }, + { + "epoch": 0.4241373014423955, + "grad_norm": 0.4821576476097107, + "learning_rate": 4.941030364062737e-06, + "loss": 0.5869, + "step": 4646 + }, + { + "epoch": 0.42422859229505205, + "grad_norm": 0.45041486620903015, + "learning_rate": 4.94100452021544e-06, + "loss": 0.5647, + "step": 4647 + }, + { + "epoch": 0.4243198831477086, + "grad_norm": 0.47553613781929016, + "learning_rate": 4.94097867077388e-06, + "loss": 0.5753, + "step": 4648 + }, + { + "epoch": 0.42441117400036515, + "grad_norm": 0.4632227122783661, + "learning_rate": 4.940952815738116e-06, + "loss": 0.5947, + "step": 4649 + }, + { + "epoch": 0.42450246485302173, + "grad_norm": 0.5180102586746216, + "learning_rate": 4.940926955108206e-06, + "loss": 0.5502, + "step": 4650 + }, + { + "epoch": 0.4245937557056783, + "grad_norm": 0.48703089356422424, + "learning_rate": 4.940901088884209e-06, + "loss": 0.5689, + "step": 4651 + }, + { + "epoch": 0.42468504655833483, + "grad_norm": 0.45545804500579834, + "learning_rate": 4.940875217066187e-06, + "loss": 0.597, + "step": 4652 + }, + { + "epoch": 0.4247763374109914, + "grad_norm": 0.4420917332172394, + "learning_rate": 4.940849339654198e-06, + "loss": 0.6043, + "step": 4653 + }, + { + "epoch": 0.424867628263648, + "grad_norm": 0.4888594448566437, + "learning_rate": 4.9408234566483e-06, + "loss": 0.5961, + "step": 4654 + }, + { + "epoch": 0.42495891911630457, + "grad_norm": 0.48447245359420776, + "learning_rate": 4.9407975680485535e-06, + "loss": 0.5703, + "step": 4655 + }, + { + "epoch": 0.4250502099689611, + "grad_norm": 0.4553995728492737, + "learning_rate": 4.940771673855017e-06, + "loss": 0.5996, + "step": 4656 + }, + { + "epoch": 0.42514150082161767, + "grad_norm": 0.5061298608779907, + "learning_rate": 4.940745774067751e-06, + "loss": 0.5659, + "step": 4657 + }, + { + "epoch": 0.42523279167427425, + "grad_norm": 0.4988664388656616, + "learning_rate": 4.9407198686868154e-06, + "loss": 0.5406, + "step": 4658 + }, + { + "epoch": 0.4253240825269308, + "grad_norm": 0.4730622172355652, + "learning_rate": 4.940693957712267e-06, + "loss": 0.5432, + "step": 4659 + }, + { + "epoch": 0.42541537337958735, + "grad_norm": 0.48572853207588196, + "learning_rate": 4.940668041144167e-06, + "loss": 0.5678, + "step": 4660 + }, + { + "epoch": 0.4255066642322439, + "grad_norm": 0.4842260479927063, + "learning_rate": 4.940642118982575e-06, + "loss": 0.5643, + "step": 4661 + }, + { + "epoch": 0.4255979550849005, + "grad_norm": 0.49473729729652405, + "learning_rate": 4.94061619122755e-06, + "loss": 0.5729, + "step": 4662 + }, + { + "epoch": 0.4256892459375571, + "grad_norm": 0.48599037528038025, + "learning_rate": 4.94059025787915e-06, + "loss": 0.5693, + "step": 4663 + }, + { + "epoch": 0.4257805367902136, + "grad_norm": 0.490151047706604, + "learning_rate": 4.940564318937436e-06, + "loss": 0.5889, + "step": 4664 + }, + { + "epoch": 0.4258718276428702, + "grad_norm": 0.4703475832939148, + "learning_rate": 4.940538374402469e-06, + "loss": 0.5812, + "step": 4665 + }, + { + "epoch": 0.42596311849552676, + "grad_norm": 0.49089622497558594, + "learning_rate": 4.940512424274305e-06, + "loss": 0.527, + "step": 4666 + }, + { + "epoch": 0.42605440934818334, + "grad_norm": 0.47621771693229675, + "learning_rate": 4.940486468553005e-06, + "loss": 0.542, + "step": 4667 + }, + { + "epoch": 0.42614570020083986, + "grad_norm": 0.45321181416511536, + "learning_rate": 4.940460507238629e-06, + "loss": 0.5848, + "step": 4668 + }, + { + "epoch": 0.42623699105349644, + "grad_norm": 0.4916478097438812, + "learning_rate": 4.940434540331236e-06, + "loss": 0.5503, + "step": 4669 + }, + { + "epoch": 0.426328281906153, + "grad_norm": 0.5060372352600098, + "learning_rate": 4.940408567830886e-06, + "loss": 0.5838, + "step": 4670 + }, + { + "epoch": 0.42641957275880954, + "grad_norm": 0.5162298679351807, + "learning_rate": 4.940382589737638e-06, + "loss": 0.5768, + "step": 4671 + }, + { + "epoch": 0.4265108636114661, + "grad_norm": 0.48101937770843506, + "learning_rate": 4.940356606051552e-06, + "loss": 0.5574, + "step": 4672 + }, + { + "epoch": 0.4266021544641227, + "grad_norm": 0.4871888756752014, + "learning_rate": 4.940330616772686e-06, + "loss": 0.5643, + "step": 4673 + }, + { + "epoch": 0.4266934453167793, + "grad_norm": 0.46027159690856934, + "learning_rate": 4.940304621901102e-06, + "loss": 0.5759, + "step": 4674 + }, + { + "epoch": 0.4267847361694358, + "grad_norm": 0.46795663237571716, + "learning_rate": 4.940278621436858e-06, + "loss": 0.5868, + "step": 4675 + }, + { + "epoch": 0.4268760270220924, + "grad_norm": 0.4847409725189209, + "learning_rate": 4.940252615380013e-06, + "loss": 0.5536, + "step": 4676 + }, + { + "epoch": 0.42696731787474895, + "grad_norm": 0.4662221670150757, + "learning_rate": 4.940226603730629e-06, + "loss": 0.5979, + "step": 4677 + }, + { + "epoch": 0.42705860872740553, + "grad_norm": 0.48681172728538513, + "learning_rate": 4.940200586488763e-06, + "loss": 0.5674, + "step": 4678 + }, + { + "epoch": 0.42714989958006205, + "grad_norm": 0.447409451007843, + "learning_rate": 4.940174563654477e-06, + "loss": 0.6016, + "step": 4679 + }, + { + "epoch": 0.42724119043271863, + "grad_norm": 0.4742809534072876, + "learning_rate": 4.940148535227828e-06, + "loss": 0.5769, + "step": 4680 + }, + { + "epoch": 0.4273324812853752, + "grad_norm": 0.48213905096054077, + "learning_rate": 4.940122501208878e-06, + "loss": 0.5629, + "step": 4681 + }, + { + "epoch": 0.4274237721380318, + "grad_norm": 0.4678114354610443, + "learning_rate": 4.940096461597685e-06, + "loss": 0.5725, + "step": 4682 + }, + { + "epoch": 0.4275150629906883, + "grad_norm": 0.4984743297100067, + "learning_rate": 4.9400704163943105e-06, + "loss": 0.5441, + "step": 4683 + }, + { + "epoch": 0.4276063538433449, + "grad_norm": 0.45149022340774536, + "learning_rate": 4.9400443655988125e-06, + "loss": 0.5849, + "step": 4684 + }, + { + "epoch": 0.42769764469600147, + "grad_norm": 0.4666275084018707, + "learning_rate": 4.94001830921125e-06, + "loss": 0.5762, + "step": 4685 + }, + { + "epoch": 0.42778893554865804, + "grad_norm": 0.47159647941589355, + "learning_rate": 4.939992247231686e-06, + "loss": 0.5841, + "step": 4686 + }, + { + "epoch": 0.42788022640131457, + "grad_norm": 0.48331567645072937, + "learning_rate": 4.939966179660179e-06, + "loss": 0.5454, + "step": 4687 + }, + { + "epoch": 0.42797151725397115, + "grad_norm": 0.5011781454086304, + "learning_rate": 4.939940106496785e-06, + "loss": 0.5279, + "step": 4688 + }, + { + "epoch": 0.4280628081066277, + "grad_norm": 0.47968316078186035, + "learning_rate": 4.939914027741569e-06, + "loss": 0.5746, + "step": 4689 + }, + { + "epoch": 0.4281540989592843, + "grad_norm": 0.49797552824020386, + "learning_rate": 4.939887943394588e-06, + "loss": 0.5462, + "step": 4690 + }, + { + "epoch": 0.4282453898119408, + "grad_norm": 0.4654615521430969, + "learning_rate": 4.9398618534559014e-06, + "loss": 0.5872, + "step": 4691 + }, + { + "epoch": 0.4283366806645974, + "grad_norm": 0.4833407998085022, + "learning_rate": 4.93983575792557e-06, + "loss": 0.5679, + "step": 4692 + }, + { + "epoch": 0.428427971517254, + "grad_norm": 0.4791429042816162, + "learning_rate": 4.939809656803655e-06, + "loss": 0.5494, + "step": 4693 + }, + { + "epoch": 0.42851926236991056, + "grad_norm": 0.5101969242095947, + "learning_rate": 4.939783550090214e-06, + "loss": 0.5324, + "step": 4694 + }, + { + "epoch": 0.4286105532225671, + "grad_norm": 0.48380810022354126, + "learning_rate": 4.9397574377853065e-06, + "loss": 0.5705, + "step": 4695 + }, + { + "epoch": 0.42870184407522366, + "grad_norm": 0.46649202704429626, + "learning_rate": 4.939731319888994e-06, + "loss": 0.5679, + "step": 4696 + }, + { + "epoch": 0.42879313492788024, + "grad_norm": 0.5262860655784607, + "learning_rate": 4.939705196401336e-06, + "loss": 0.5944, + "step": 4697 + }, + { + "epoch": 0.4288844257805368, + "grad_norm": 0.438265323638916, + "learning_rate": 4.9396790673223925e-06, + "loss": 0.5979, + "step": 4698 + }, + { + "epoch": 0.42897571663319334, + "grad_norm": 0.4813167452812195, + "learning_rate": 4.9396529326522216e-06, + "loss": 0.6009, + "step": 4699 + }, + { + "epoch": 0.4290670074858499, + "grad_norm": 0.5154109597206116, + "learning_rate": 4.939626792390886e-06, + "loss": 0.5577, + "step": 4700 + }, + { + "epoch": 0.4291582983385065, + "grad_norm": 0.4890201985836029, + "learning_rate": 4.939600646538443e-06, + "loss": 0.5252, + "step": 4701 + }, + { + "epoch": 0.4292495891911631, + "grad_norm": 0.5147213935852051, + "learning_rate": 4.9395744950949545e-06, + "loss": 0.5549, + "step": 4702 + }, + { + "epoch": 0.4293408800438196, + "grad_norm": 0.5085967183113098, + "learning_rate": 4.9395483380604795e-06, + "loss": 0.5603, + "step": 4703 + }, + { + "epoch": 0.4294321708964762, + "grad_norm": 0.4918125867843628, + "learning_rate": 4.939522175435079e-06, + "loss": 0.5365, + "step": 4704 + }, + { + "epoch": 0.42952346174913275, + "grad_norm": 0.489278107881546, + "learning_rate": 4.93949600721881e-06, + "loss": 0.5729, + "step": 4705 + }, + { + "epoch": 0.4296147526017893, + "grad_norm": 0.4805561304092407, + "learning_rate": 4.939469833411736e-06, + "loss": 0.577, + "step": 4706 + }, + { + "epoch": 0.42970604345444585, + "grad_norm": 0.47623103857040405, + "learning_rate": 4.939443654013915e-06, + "loss": 0.5779, + "step": 4707 + }, + { + "epoch": 0.42979733430710243, + "grad_norm": 0.4787435829639435, + "learning_rate": 4.939417469025408e-06, + "loss": 0.6107, + "step": 4708 + }, + { + "epoch": 0.429888625159759, + "grad_norm": 0.480640709400177, + "learning_rate": 4.939391278446274e-06, + "loss": 0.5882, + "step": 4709 + }, + { + "epoch": 0.42997991601241553, + "grad_norm": 0.4607410430908203, + "learning_rate": 4.939365082276574e-06, + "loss": 0.5914, + "step": 4710 + }, + { + "epoch": 0.4300712068650721, + "grad_norm": 0.46554431319236755, + "learning_rate": 4.939338880516366e-06, + "loss": 0.6075, + "step": 4711 + }, + { + "epoch": 0.4301624977177287, + "grad_norm": 0.45942312479019165, + "learning_rate": 4.9393126731657135e-06, + "loss": 0.5674, + "step": 4712 + }, + { + "epoch": 0.43025378857038526, + "grad_norm": 0.522607684135437, + "learning_rate": 4.9392864602246745e-06, + "loss": 0.5784, + "step": 4713 + }, + { + "epoch": 0.4303450794230418, + "grad_norm": 0.48470672965049744, + "learning_rate": 4.939260241693309e-06, + "loss": 0.5716, + "step": 4714 + }, + { + "epoch": 0.43043637027569837, + "grad_norm": 0.46529850363731384, + "learning_rate": 4.939234017571677e-06, + "loss": 0.5989, + "step": 4715 + }, + { + "epoch": 0.43052766112835494, + "grad_norm": 0.4562022089958191, + "learning_rate": 4.939207787859839e-06, + "loss": 0.6116, + "step": 4716 + }, + { + "epoch": 0.4306189519810115, + "grad_norm": 0.463227242231369, + "learning_rate": 4.939181552557854e-06, + "loss": 0.521, + "step": 4717 + }, + { + "epoch": 0.43071024283366804, + "grad_norm": 0.4710007905960083, + "learning_rate": 4.939155311665785e-06, + "loss": 0.5738, + "step": 4718 + }, + { + "epoch": 0.4308015336863246, + "grad_norm": 0.4889717400074005, + "learning_rate": 4.93912906518369e-06, + "loss": 0.5561, + "step": 4719 + }, + { + "epoch": 0.4308928245389812, + "grad_norm": 0.42336952686309814, + "learning_rate": 4.939102813111629e-06, + "loss": 0.5983, + "step": 4720 + }, + { + "epoch": 0.4309841153916378, + "grad_norm": 0.47078046202659607, + "learning_rate": 4.939076555449663e-06, + "loss": 0.5408, + "step": 4721 + }, + { + "epoch": 0.4310754062442943, + "grad_norm": 0.4805757403373718, + "learning_rate": 4.939050292197851e-06, + "loss": 0.5719, + "step": 4722 + }, + { + "epoch": 0.4311666970969509, + "grad_norm": 0.47431379556655884, + "learning_rate": 4.9390240233562545e-06, + "loss": 0.5708, + "step": 4723 + }, + { + "epoch": 0.43125798794960746, + "grad_norm": 0.48053833842277527, + "learning_rate": 4.938997748924934e-06, + "loss": 0.5485, + "step": 4724 + }, + { + "epoch": 0.43134927880226404, + "grad_norm": 0.4981983006000519, + "learning_rate": 4.938971468903948e-06, + "loss": 0.551, + "step": 4725 + }, + { + "epoch": 0.43144056965492056, + "grad_norm": 0.43528294563293457, + "learning_rate": 4.9389451832933575e-06, + "loss": 0.5842, + "step": 4726 + }, + { + "epoch": 0.43153186050757714, + "grad_norm": 0.5028179287910461, + "learning_rate": 4.938918892093224e-06, + "loss": 0.5465, + "step": 4727 + }, + { + "epoch": 0.4316231513602337, + "grad_norm": 0.4597007632255554, + "learning_rate": 4.938892595303605e-06, + "loss": 0.5982, + "step": 4728 + }, + { + "epoch": 0.4317144422128903, + "grad_norm": 0.4446679651737213, + "learning_rate": 4.938866292924564e-06, + "loss": 0.6094, + "step": 4729 + }, + { + "epoch": 0.4318057330655468, + "grad_norm": 0.5263429284095764, + "learning_rate": 4.938839984956159e-06, + "loss": 0.5449, + "step": 4730 + }, + { + "epoch": 0.4318970239182034, + "grad_norm": 0.49124959111213684, + "learning_rate": 4.93881367139845e-06, + "loss": 0.5556, + "step": 4731 + }, + { + "epoch": 0.43198831477085997, + "grad_norm": 0.4612252414226532, + "learning_rate": 4.9387873522515e-06, + "loss": 0.5951, + "step": 4732 + }, + { + "epoch": 0.43207960562351655, + "grad_norm": 0.48690882325172424, + "learning_rate": 4.938761027515367e-06, + "loss": 0.6052, + "step": 4733 + }, + { + "epoch": 0.43217089647617307, + "grad_norm": 0.47522300481796265, + "learning_rate": 4.938734697190112e-06, + "loss": 0.5784, + "step": 4734 + }, + { + "epoch": 0.43226218732882965, + "grad_norm": 0.43610090017318726, + "learning_rate": 4.938708361275795e-06, + "loss": 0.6158, + "step": 4735 + }, + { + "epoch": 0.43235347818148623, + "grad_norm": 0.48320844769477844, + "learning_rate": 4.938682019772477e-06, + "loss": 0.622, + "step": 4736 + }, + { + "epoch": 0.4324447690341428, + "grad_norm": 0.45256567001342773, + "learning_rate": 4.938655672680217e-06, + "loss": 0.5997, + "step": 4737 + }, + { + "epoch": 0.43253605988679933, + "grad_norm": 0.48604699969291687, + "learning_rate": 4.938629319999078e-06, + "loss": 0.5849, + "step": 4738 + }, + { + "epoch": 0.4326273507394559, + "grad_norm": 0.44696563482284546, + "learning_rate": 4.938602961729118e-06, + "loss": 0.5865, + "step": 4739 + }, + { + "epoch": 0.4327186415921125, + "grad_norm": 0.4852451682090759, + "learning_rate": 4.938576597870398e-06, + "loss": 0.5764, + "step": 4740 + }, + { + "epoch": 0.432809932444769, + "grad_norm": 0.48228582739830017, + "learning_rate": 4.938550228422978e-06, + "loss": 0.5909, + "step": 4741 + }, + { + "epoch": 0.4329012232974256, + "grad_norm": 0.4763626456260681, + "learning_rate": 4.93852385338692e-06, + "loss": 0.6011, + "step": 4742 + }, + { + "epoch": 0.43299251415008216, + "grad_norm": 0.474283903837204, + "learning_rate": 4.9384974727622834e-06, + "loss": 0.5817, + "step": 4743 + }, + { + "epoch": 0.43308380500273874, + "grad_norm": 0.4863004982471466, + "learning_rate": 4.938471086549129e-06, + "loss": 0.5797, + "step": 4744 + }, + { + "epoch": 0.43317509585539526, + "grad_norm": 0.510244607925415, + "learning_rate": 4.9384446947475165e-06, + "loss": 0.5328, + "step": 4745 + }, + { + "epoch": 0.43326638670805184, + "grad_norm": 0.43571212887763977, + "learning_rate": 4.938418297357507e-06, + "loss": 0.6048, + "step": 4746 + }, + { + "epoch": 0.4333576775607084, + "grad_norm": 0.46580877900123596, + "learning_rate": 4.938391894379161e-06, + "loss": 0.6199, + "step": 4747 + }, + { + "epoch": 0.433448968413365, + "grad_norm": 0.5029786825180054, + "learning_rate": 4.9383654858125384e-06, + "loss": 0.5369, + "step": 4748 + }, + { + "epoch": 0.4335402592660215, + "grad_norm": 0.4527876675128937, + "learning_rate": 4.9383390716577005e-06, + "loss": 0.5958, + "step": 4749 + }, + { + "epoch": 0.4336315501186781, + "grad_norm": 0.4358299970626831, + "learning_rate": 4.938312651914708e-06, + "loss": 0.6048, + "step": 4750 + }, + { + "epoch": 0.4337228409713347, + "grad_norm": 0.4566567540168762, + "learning_rate": 4.9382862265836205e-06, + "loss": 0.5794, + "step": 4751 + }, + { + "epoch": 0.43381413182399126, + "grad_norm": 0.4899011552333832, + "learning_rate": 4.9382597956645e-06, + "loss": 0.5526, + "step": 4752 + }, + { + "epoch": 0.4339054226766478, + "grad_norm": 0.44487154483795166, + "learning_rate": 4.938233359157405e-06, + "loss": 0.6201, + "step": 4753 + }, + { + "epoch": 0.43399671352930436, + "grad_norm": 0.46617430448532104, + "learning_rate": 4.938206917062398e-06, + "loss": 0.5827, + "step": 4754 + }, + { + "epoch": 0.43408800438196093, + "grad_norm": 0.45395153760910034, + "learning_rate": 4.938180469379539e-06, + "loss": 0.6173, + "step": 4755 + }, + { + "epoch": 0.4341792952346175, + "grad_norm": 0.47440144419670105, + "learning_rate": 4.938154016108887e-06, + "loss": 0.5946, + "step": 4756 + }, + { + "epoch": 0.43427058608727404, + "grad_norm": 0.47654348611831665, + "learning_rate": 4.938127557250506e-06, + "loss": 0.5615, + "step": 4757 + }, + { + "epoch": 0.4343618769399306, + "grad_norm": 0.44879150390625, + "learning_rate": 4.938101092804454e-06, + "loss": 0.6214, + "step": 4758 + }, + { + "epoch": 0.4344531677925872, + "grad_norm": 0.4935052990913391, + "learning_rate": 4.938074622770792e-06, + "loss": 0.5572, + "step": 4759 + }, + { + "epoch": 0.43454445864524377, + "grad_norm": 0.4487876296043396, + "learning_rate": 4.938048147149581e-06, + "loss": 0.6188, + "step": 4760 + }, + { + "epoch": 0.4346357494979003, + "grad_norm": 0.4916597604751587, + "learning_rate": 4.938021665940882e-06, + "loss": 0.5765, + "step": 4761 + }, + { + "epoch": 0.43472704035055687, + "grad_norm": 0.4950982928276062, + "learning_rate": 4.937995179144756e-06, + "loss": 0.5574, + "step": 4762 + }, + { + "epoch": 0.43481833120321345, + "grad_norm": 0.497991681098938, + "learning_rate": 4.937968686761262e-06, + "loss": 0.5621, + "step": 4763 + }, + { + "epoch": 0.43490962205587, + "grad_norm": 0.45625823736190796, + "learning_rate": 4.937942188790462e-06, + "loss": 0.5432, + "step": 4764 + }, + { + "epoch": 0.43500091290852655, + "grad_norm": 0.47829434275627136, + "learning_rate": 4.937915685232417e-06, + "loss": 0.583, + "step": 4765 + }, + { + "epoch": 0.4350922037611831, + "grad_norm": 0.4628323018550873, + "learning_rate": 4.937889176087187e-06, + "loss": 0.6062, + "step": 4766 + }, + { + "epoch": 0.4351834946138397, + "grad_norm": 0.4704802632331848, + "learning_rate": 4.937862661354833e-06, + "loss": 0.5664, + "step": 4767 + }, + { + "epoch": 0.4352747854664963, + "grad_norm": 0.49834999442100525, + "learning_rate": 4.937836141035416e-06, + "loss": 0.5787, + "step": 4768 + }, + { + "epoch": 0.4353660763191528, + "grad_norm": 0.47194114327430725, + "learning_rate": 4.9378096151289966e-06, + "loss": 0.5534, + "step": 4769 + }, + { + "epoch": 0.4354573671718094, + "grad_norm": 0.4774269759654999, + "learning_rate": 4.937783083635636e-06, + "loss": 0.549, + "step": 4770 + }, + { + "epoch": 0.43554865802446596, + "grad_norm": 0.49056509137153625, + "learning_rate": 4.937756546555394e-06, + "loss": 0.5538, + "step": 4771 + }, + { + "epoch": 0.4356399488771225, + "grad_norm": 0.44496485590934753, + "learning_rate": 4.937730003888332e-06, + "loss": 0.5535, + "step": 4772 + }, + { + "epoch": 0.43573123972977906, + "grad_norm": 0.5220516324043274, + "learning_rate": 4.93770345563451e-06, + "loss": 0.5794, + "step": 4773 + }, + { + "epoch": 0.43582253058243564, + "grad_norm": 0.4598642587661743, + "learning_rate": 4.937676901793991e-06, + "loss": 0.6364, + "step": 4774 + }, + { + "epoch": 0.4359138214350922, + "grad_norm": 0.47981229424476624, + "learning_rate": 4.9376503423668345e-06, + "loss": 0.5684, + "step": 4775 + }, + { + "epoch": 0.43600511228774874, + "grad_norm": 0.502121090888977, + "learning_rate": 4.9376237773531e-06, + "loss": 0.5653, + "step": 4776 + }, + { + "epoch": 0.4360964031404053, + "grad_norm": 0.4876307547092438, + "learning_rate": 4.937597206752851e-06, + "loss": 0.5831, + "step": 4777 + }, + { + "epoch": 0.4361876939930619, + "grad_norm": 0.46715277433395386, + "learning_rate": 4.937570630566147e-06, + "loss": 0.5892, + "step": 4778 + }, + { + "epoch": 0.4362789848457185, + "grad_norm": 0.473210871219635, + "learning_rate": 4.937544048793049e-06, + "loss": 0.6108, + "step": 4779 + }, + { + "epoch": 0.436370275698375, + "grad_norm": 0.4816517233848572, + "learning_rate": 4.937517461433618e-06, + "loss": 0.5568, + "step": 4780 + }, + { + "epoch": 0.4364615665510316, + "grad_norm": 0.45753008127212524, + "learning_rate": 4.937490868487915e-06, + "loss": 0.6338, + "step": 4781 + }, + { + "epoch": 0.43655285740368815, + "grad_norm": 0.48908016085624695, + "learning_rate": 4.937464269956001e-06, + "loss": 0.5954, + "step": 4782 + }, + { + "epoch": 0.43664414825634473, + "grad_norm": 0.45127126574516296, + "learning_rate": 4.937437665837937e-06, + "loss": 0.5765, + "step": 4783 + }, + { + "epoch": 0.43673543910900126, + "grad_norm": 0.47408005595207214, + "learning_rate": 4.937411056133783e-06, + "loss": 0.6167, + "step": 4784 + }, + { + "epoch": 0.43682672996165783, + "grad_norm": 0.45191341638565063, + "learning_rate": 4.937384440843601e-06, + "loss": 0.5891, + "step": 4785 + }, + { + "epoch": 0.4369180208143144, + "grad_norm": 0.4801657199859619, + "learning_rate": 4.937357819967451e-06, + "loss": 0.5951, + "step": 4786 + }, + { + "epoch": 0.437009311666971, + "grad_norm": 0.44887036085128784, + "learning_rate": 4.937331193505397e-06, + "loss": 0.6208, + "step": 4787 + }, + { + "epoch": 0.4371006025196275, + "grad_norm": 0.4631193280220032, + "learning_rate": 4.937304561457496e-06, + "loss": 0.5848, + "step": 4788 + }, + { + "epoch": 0.4371918933722841, + "grad_norm": 0.5013570189476013, + "learning_rate": 4.937277923823811e-06, + "loss": 0.5611, + "step": 4789 + }, + { + "epoch": 0.43728318422494067, + "grad_norm": 0.4642599821090698, + "learning_rate": 4.937251280604403e-06, + "loss": 0.6156, + "step": 4790 + }, + { + "epoch": 0.43737447507759725, + "grad_norm": 0.47929298877716064, + "learning_rate": 4.937224631799334e-06, + "loss": 0.5868, + "step": 4791 + }, + { + "epoch": 0.43746576593025377, + "grad_norm": 0.477420836687088, + "learning_rate": 4.9371979774086634e-06, + "loss": 0.5325, + "step": 4792 + }, + { + "epoch": 0.43755705678291035, + "grad_norm": 0.48028185963630676, + "learning_rate": 4.937171317432453e-06, + "loss": 0.6059, + "step": 4793 + }, + { + "epoch": 0.4376483476355669, + "grad_norm": 0.4860416650772095, + "learning_rate": 4.937144651870763e-06, + "loss": 0.5628, + "step": 4794 + }, + { + "epoch": 0.4377396384882235, + "grad_norm": 0.467313289642334, + "learning_rate": 4.937117980723656e-06, + "loss": 0.574, + "step": 4795 + }, + { + "epoch": 0.43783092934088, + "grad_norm": 0.4572211503982544, + "learning_rate": 4.937091303991193e-06, + "loss": 0.5783, + "step": 4796 + }, + { + "epoch": 0.4379222201935366, + "grad_norm": 0.4791659116744995, + "learning_rate": 4.9370646216734336e-06, + "loss": 0.567, + "step": 4797 + }, + { + "epoch": 0.4380135110461932, + "grad_norm": 0.45993146300315857, + "learning_rate": 4.937037933770441e-06, + "loss": 0.5856, + "step": 4798 + }, + { + "epoch": 0.43810480189884976, + "grad_norm": 0.5131351947784424, + "learning_rate": 4.937011240282274e-06, + "loss": 0.5285, + "step": 4799 + }, + { + "epoch": 0.4381960927515063, + "grad_norm": 0.45481380820274353, + "learning_rate": 4.936984541208995e-06, + "loss": 0.5629, + "step": 4800 + }, + { + "epoch": 0.43828738360416286, + "grad_norm": 0.45217084884643555, + "learning_rate": 4.9369578365506666e-06, + "loss": 0.5943, + "step": 4801 + }, + { + "epoch": 0.43837867445681944, + "grad_norm": 0.4551839530467987, + "learning_rate": 4.936931126307348e-06, + "loss": 0.5757, + "step": 4802 + }, + { + "epoch": 0.438469965309476, + "grad_norm": 0.4961443841457367, + "learning_rate": 4.936904410479101e-06, + "loss": 0.556, + "step": 4803 + }, + { + "epoch": 0.43856125616213254, + "grad_norm": 0.47808411717414856, + "learning_rate": 4.936877689065987e-06, + "loss": 0.5664, + "step": 4804 + }, + { + "epoch": 0.4386525470147891, + "grad_norm": 0.4783770740032196, + "learning_rate": 4.9368509620680675e-06, + "loss": 0.615, + "step": 4805 + }, + { + "epoch": 0.4387438378674457, + "grad_norm": 0.46936002373695374, + "learning_rate": 4.936824229485403e-06, + "loss": 0.5794, + "step": 4806 + }, + { + "epoch": 0.4388351287201022, + "grad_norm": 0.4663902521133423, + "learning_rate": 4.936797491318055e-06, + "loss": 0.6005, + "step": 4807 + }, + { + "epoch": 0.4389264195727588, + "grad_norm": 0.4990154206752777, + "learning_rate": 4.9367707475660854e-06, + "loss": 0.5546, + "step": 4808 + }, + { + "epoch": 0.4390177104254154, + "grad_norm": 0.47343358397483826, + "learning_rate": 4.9367439982295555e-06, + "loss": 0.5466, + "step": 4809 + }, + { + "epoch": 0.43910900127807195, + "grad_norm": 0.49808141589164734, + "learning_rate": 4.9367172433085255e-06, + "loss": 0.5406, + "step": 4810 + }, + { + "epoch": 0.4392002921307285, + "grad_norm": 0.50486159324646, + "learning_rate": 4.936690482803057e-06, + "loss": 0.5691, + "step": 4811 + }, + { + "epoch": 0.43929158298338505, + "grad_norm": 0.47042274475097656, + "learning_rate": 4.936663716713212e-06, + "loss": 0.5697, + "step": 4812 + }, + { + "epoch": 0.43938287383604163, + "grad_norm": 0.4570331871509552, + "learning_rate": 4.9366369450390516e-06, + "loss": 0.5727, + "step": 4813 + }, + { + "epoch": 0.4394741646886982, + "grad_norm": 0.4861038029193878, + "learning_rate": 4.936610167780637e-06, + "loss": 0.5555, + "step": 4814 + }, + { + "epoch": 0.43956545554135473, + "grad_norm": 0.48927441239356995, + "learning_rate": 4.93658338493803e-06, + "loss": 0.5729, + "step": 4815 + }, + { + "epoch": 0.4396567463940113, + "grad_norm": 0.4921349883079529, + "learning_rate": 4.93655659651129e-06, + "loss": 0.5813, + "step": 4816 + }, + { + "epoch": 0.4397480372466679, + "grad_norm": 0.528670608997345, + "learning_rate": 4.936529802500482e-06, + "loss": 0.5668, + "step": 4817 + }, + { + "epoch": 0.43983932809932447, + "grad_norm": 0.47073936462402344, + "learning_rate": 4.936503002905665e-06, + "loss": 0.6043, + "step": 4818 + }, + { + "epoch": 0.439930618951981, + "grad_norm": 0.509588897228241, + "learning_rate": 4.936476197726901e-06, + "loss": 0.5458, + "step": 4819 + }, + { + "epoch": 0.44002190980463757, + "grad_norm": 0.4765579402446747, + "learning_rate": 4.9364493869642505e-06, + "loss": 0.5732, + "step": 4820 + }, + { + "epoch": 0.44011320065729415, + "grad_norm": 0.4816097617149353, + "learning_rate": 4.936422570617776e-06, + "loss": 0.5687, + "step": 4821 + }, + { + "epoch": 0.4402044915099507, + "grad_norm": 0.4854411780834198, + "learning_rate": 4.936395748687539e-06, + "loss": 0.605, + "step": 4822 + }, + { + "epoch": 0.44029578236260725, + "grad_norm": 0.47264665365219116, + "learning_rate": 4.9363689211735995e-06, + "loss": 0.5732, + "step": 4823 + }, + { + "epoch": 0.4403870732152638, + "grad_norm": 0.48261287808418274, + "learning_rate": 4.936342088076022e-06, + "loss": 0.5672, + "step": 4824 + }, + { + "epoch": 0.4404783640679204, + "grad_norm": 0.4912281036376953, + "learning_rate": 4.936315249394865e-06, + "loss": 0.5719, + "step": 4825 + }, + { + "epoch": 0.440569654920577, + "grad_norm": 0.4752142131328583, + "learning_rate": 4.936288405130192e-06, + "loss": 0.5973, + "step": 4826 + }, + { + "epoch": 0.4406609457732335, + "grad_norm": 0.4822239279747009, + "learning_rate": 4.936261555282063e-06, + "loss": 0.5746, + "step": 4827 + }, + { + "epoch": 0.4407522366258901, + "grad_norm": 0.46713319420814514, + "learning_rate": 4.93623469985054e-06, + "loss": 0.595, + "step": 4828 + }, + { + "epoch": 0.44084352747854666, + "grad_norm": 0.43265748023986816, + "learning_rate": 4.9362078388356845e-06, + "loss": 0.6054, + "step": 4829 + }, + { + "epoch": 0.44093481833120324, + "grad_norm": 0.47188785672187805, + "learning_rate": 4.936180972237558e-06, + "loss": 0.5508, + "step": 4830 + }, + { + "epoch": 0.44102610918385976, + "grad_norm": 0.47187358140945435, + "learning_rate": 4.936154100056224e-06, + "loss": 0.6033, + "step": 4831 + }, + { + "epoch": 0.44111740003651634, + "grad_norm": 0.4499579071998596, + "learning_rate": 4.936127222291742e-06, + "loss": 0.5864, + "step": 4832 + }, + { + "epoch": 0.4412086908891729, + "grad_norm": 0.468742311000824, + "learning_rate": 4.9361003389441734e-06, + "loss": 0.5988, + "step": 4833 + }, + { + "epoch": 0.4412999817418295, + "grad_norm": 0.4770277738571167, + "learning_rate": 4.9360734500135806e-06, + "loss": 0.5681, + "step": 4834 + }, + { + "epoch": 0.441391272594486, + "grad_norm": 0.4711879789829254, + "learning_rate": 4.936046555500026e-06, + "loss": 0.5856, + "step": 4835 + }, + { + "epoch": 0.4414825634471426, + "grad_norm": 0.5175142884254456, + "learning_rate": 4.9360196554035685e-06, + "loss": 0.5398, + "step": 4836 + }, + { + "epoch": 0.4415738542997992, + "grad_norm": 0.4805940091609955, + "learning_rate": 4.935992749724273e-06, + "loss": 0.5542, + "step": 4837 + }, + { + "epoch": 0.44166514515245575, + "grad_norm": 0.4870317280292511, + "learning_rate": 4.9359658384622e-06, + "loss": 0.5744, + "step": 4838 + }, + { + "epoch": 0.4417564360051123, + "grad_norm": 0.4649401903152466, + "learning_rate": 4.9359389216174105e-06, + "loss": 0.5645, + "step": 4839 + }, + { + "epoch": 0.44184772685776885, + "grad_norm": 0.4970078766345978, + "learning_rate": 4.935911999189966e-06, + "loss": 0.5579, + "step": 4840 + }, + { + "epoch": 0.44193901771042543, + "grad_norm": 0.4768241047859192, + "learning_rate": 4.935885071179929e-06, + "loss": 0.6272, + "step": 4841 + }, + { + "epoch": 0.44203030856308195, + "grad_norm": 0.513089656829834, + "learning_rate": 4.935858137587361e-06, + "loss": 0.5688, + "step": 4842 + }, + { + "epoch": 0.44212159941573853, + "grad_norm": 0.4594179093837738, + "learning_rate": 4.935831198412324e-06, + "loss": 0.6088, + "step": 4843 + }, + { + "epoch": 0.4422128902683951, + "grad_norm": 0.48822152614593506, + "learning_rate": 4.93580425365488e-06, + "loss": 0.5405, + "step": 4844 + }, + { + "epoch": 0.4423041811210517, + "grad_norm": 0.47286900877952576, + "learning_rate": 4.935777303315089e-06, + "loss": 0.5938, + "step": 4845 + }, + { + "epoch": 0.4423954719737082, + "grad_norm": 0.4758285880088806, + "learning_rate": 4.9357503473930155e-06, + "loss": 0.5221, + "step": 4846 + }, + { + "epoch": 0.4424867628263648, + "grad_norm": 0.45528700947761536, + "learning_rate": 4.9357233858887184e-06, + "loss": 0.6049, + "step": 4847 + }, + { + "epoch": 0.44257805367902137, + "grad_norm": 0.44877707958221436, + "learning_rate": 4.935696418802262e-06, + "loss": 0.6173, + "step": 4848 + }, + { + "epoch": 0.44266934453167794, + "grad_norm": 0.4771975576877594, + "learning_rate": 4.935669446133706e-06, + "loss": 0.5542, + "step": 4849 + }, + { + "epoch": 0.44276063538433447, + "grad_norm": 0.4805957078933716, + "learning_rate": 4.935642467883114e-06, + "loss": 0.5905, + "step": 4850 + }, + { + "epoch": 0.44285192623699104, + "grad_norm": 0.4889434576034546, + "learning_rate": 4.935615484050546e-06, + "loss": 0.5534, + "step": 4851 + }, + { + "epoch": 0.4429432170896476, + "grad_norm": 0.46349263191223145, + "learning_rate": 4.935588494636066e-06, + "loss": 0.5861, + "step": 4852 + }, + { + "epoch": 0.4430345079423042, + "grad_norm": 0.5174172520637512, + "learning_rate": 4.9355614996397335e-06, + "loss": 0.5706, + "step": 4853 + }, + { + "epoch": 0.4431257987949607, + "grad_norm": 0.4871979355812073, + "learning_rate": 4.935534499061613e-06, + "loss": 0.5664, + "step": 4854 + }, + { + "epoch": 0.4432170896476173, + "grad_norm": 0.4624347686767578, + "learning_rate": 4.935507492901764e-06, + "loss": 0.5538, + "step": 4855 + }, + { + "epoch": 0.4433083805002739, + "grad_norm": 0.47503435611724854, + "learning_rate": 4.9354804811602496e-06, + "loss": 0.5586, + "step": 4856 + }, + { + "epoch": 0.44339967135293046, + "grad_norm": 0.4497949779033661, + "learning_rate": 4.935453463837131e-06, + "loss": 0.6072, + "step": 4857 + }, + { + "epoch": 0.443490962205587, + "grad_norm": 0.488385945558548, + "learning_rate": 4.935426440932472e-06, + "loss": 0.5861, + "step": 4858 + }, + { + "epoch": 0.44358225305824356, + "grad_norm": 0.4611852467060089, + "learning_rate": 4.9353994124463315e-06, + "loss": 0.602, + "step": 4859 + }, + { + "epoch": 0.44367354391090014, + "grad_norm": 0.5114672780036926, + "learning_rate": 4.935372378378773e-06, + "loss": 0.5947, + "step": 4860 + }, + { + "epoch": 0.4437648347635567, + "grad_norm": 0.47404688596725464, + "learning_rate": 4.93534533872986e-06, + "loss": 0.574, + "step": 4861 + }, + { + "epoch": 0.44385612561621324, + "grad_norm": 0.5012978315353394, + "learning_rate": 4.935318293499651e-06, + "loss": 0.5544, + "step": 4862 + }, + { + "epoch": 0.4439474164688698, + "grad_norm": 0.4727603793144226, + "learning_rate": 4.935291242688212e-06, + "loss": 0.5632, + "step": 4863 + }, + { + "epoch": 0.4440387073215264, + "grad_norm": 0.5042242407798767, + "learning_rate": 4.935264186295602e-06, + "loss": 0.5367, + "step": 4864 + }, + { + "epoch": 0.44412999817418297, + "grad_norm": 0.5004759430885315, + "learning_rate": 4.935237124321883e-06, + "loss": 0.5828, + "step": 4865 + }, + { + "epoch": 0.4442212890268395, + "grad_norm": 0.5113379955291748, + "learning_rate": 4.9352100567671195e-06, + "loss": 0.5582, + "step": 4866 + }, + { + "epoch": 0.4443125798794961, + "grad_norm": 0.46543437242507935, + "learning_rate": 4.935182983631371e-06, + "loss": 0.5872, + "step": 4867 + }, + { + "epoch": 0.44440387073215265, + "grad_norm": 0.5082384943962097, + "learning_rate": 4.9351559049147015e-06, + "loss": 0.5738, + "step": 4868 + }, + { + "epoch": 0.44449516158480923, + "grad_norm": 0.49864259362220764, + "learning_rate": 4.935128820617171e-06, + "loss": 0.5881, + "step": 4869 + }, + { + "epoch": 0.44458645243746575, + "grad_norm": 0.4654565751552582, + "learning_rate": 4.935101730738844e-06, + "loss": 0.598, + "step": 4870 + }, + { + "epoch": 0.44467774329012233, + "grad_norm": 0.47457608580589294, + "learning_rate": 4.935074635279781e-06, + "loss": 0.5562, + "step": 4871 + }, + { + "epoch": 0.4447690341427789, + "grad_norm": 0.42447468638420105, + "learning_rate": 4.935047534240043e-06, + "loss": 0.598, + "step": 4872 + }, + { + "epoch": 0.44486032499543543, + "grad_norm": 0.4721777141094208, + "learning_rate": 4.935020427619694e-06, + "loss": 0.5842, + "step": 4873 + }, + { + "epoch": 0.444951615848092, + "grad_norm": 0.4946427643299103, + "learning_rate": 4.9349933154187966e-06, + "loss": 0.5935, + "step": 4874 + }, + { + "epoch": 0.4450429067007486, + "grad_norm": 0.4995396137237549, + "learning_rate": 4.934966197637412e-06, + "loss": 0.5382, + "step": 4875 + }, + { + "epoch": 0.44513419755340516, + "grad_norm": 0.46374833583831787, + "learning_rate": 4.934939074275601e-06, + "loss": 0.5954, + "step": 4876 + }, + { + "epoch": 0.4452254884060617, + "grad_norm": 0.4735170602798462, + "learning_rate": 4.934911945333427e-06, + "loss": 0.5751, + "step": 4877 + }, + { + "epoch": 0.44531677925871826, + "grad_norm": 0.47823581099510193, + "learning_rate": 4.934884810810954e-06, + "loss": 0.5871, + "step": 4878 + }, + { + "epoch": 0.44540807011137484, + "grad_norm": 0.4897485375404358, + "learning_rate": 4.934857670708241e-06, + "loss": 0.5563, + "step": 4879 + }, + { + "epoch": 0.4454993609640314, + "grad_norm": 0.46564188599586487, + "learning_rate": 4.934830525025351e-06, + "loss": 0.5831, + "step": 4880 + }, + { + "epoch": 0.44559065181668794, + "grad_norm": 0.4475448429584503, + "learning_rate": 4.934803373762348e-06, + "loss": 0.6047, + "step": 4881 + }, + { + "epoch": 0.4456819426693445, + "grad_norm": 0.4708510637283325, + "learning_rate": 4.934776216919293e-06, + "loss": 0.5479, + "step": 4882 + }, + { + "epoch": 0.4457732335220011, + "grad_norm": 0.4387665390968323, + "learning_rate": 4.9347490544962485e-06, + "loss": 0.6134, + "step": 4883 + }, + { + "epoch": 0.4458645243746577, + "grad_norm": 0.5013877749443054, + "learning_rate": 4.934721886493276e-06, + "loss": 0.5863, + "step": 4884 + }, + { + "epoch": 0.4459558152273142, + "grad_norm": 0.47540825605392456, + "learning_rate": 4.934694712910438e-06, + "loss": 0.579, + "step": 4885 + }, + { + "epoch": 0.4460471060799708, + "grad_norm": 0.45260512828826904, + "learning_rate": 4.934667533747797e-06, + "loss": 0.593, + "step": 4886 + }, + { + "epoch": 0.44613839693262736, + "grad_norm": 0.47732752561569214, + "learning_rate": 4.934640349005416e-06, + "loss": 0.5803, + "step": 4887 + }, + { + "epoch": 0.44622968778528393, + "grad_norm": 0.471042662858963, + "learning_rate": 4.934613158683356e-06, + "loss": 0.5851, + "step": 4888 + }, + { + "epoch": 0.44632097863794046, + "grad_norm": 0.46934089064598083, + "learning_rate": 4.934585962781681e-06, + "loss": 0.5837, + "step": 4889 + }, + { + "epoch": 0.44641226949059704, + "grad_norm": 0.4735051989555359, + "learning_rate": 4.934558761300451e-06, + "loss": 0.6259, + "step": 4890 + }, + { + "epoch": 0.4465035603432536, + "grad_norm": 0.4654829204082489, + "learning_rate": 4.934531554239731e-06, + "loss": 0.5606, + "step": 4891 + }, + { + "epoch": 0.4465948511959102, + "grad_norm": 0.473833292722702, + "learning_rate": 4.934504341599581e-06, + "loss": 0.5654, + "step": 4892 + }, + { + "epoch": 0.4466861420485667, + "grad_norm": 0.48596251010894775, + "learning_rate": 4.934477123380065e-06, + "loss": 0.5645, + "step": 4893 + }, + { + "epoch": 0.4467774329012233, + "grad_norm": 0.4984133541584015, + "learning_rate": 4.934449899581244e-06, + "loss": 0.6024, + "step": 4894 + }, + { + "epoch": 0.44686872375387987, + "grad_norm": 0.5042566657066345, + "learning_rate": 4.934422670203182e-06, + "loss": 0.5827, + "step": 4895 + }, + { + "epoch": 0.44696001460653645, + "grad_norm": 0.4527246952056885, + "learning_rate": 4.93439543524594e-06, + "loss": 0.5768, + "step": 4896 + }, + { + "epoch": 0.44705130545919297, + "grad_norm": 0.5154772996902466, + "learning_rate": 4.934368194709581e-06, + "loss": 0.5603, + "step": 4897 + }, + { + "epoch": 0.44714259631184955, + "grad_norm": 0.47166821360588074, + "learning_rate": 4.934340948594168e-06, + "loss": 0.6155, + "step": 4898 + }, + { + "epoch": 0.4472338871645061, + "grad_norm": 0.4721258878707886, + "learning_rate": 4.934313696899762e-06, + "loss": 0.5812, + "step": 4899 + }, + { + "epoch": 0.4473251780171627, + "grad_norm": 0.5079735517501831, + "learning_rate": 4.934286439626426e-06, + "loss": 0.5386, + "step": 4900 + }, + { + "epoch": 0.44741646886981923, + "grad_norm": 0.4790903627872467, + "learning_rate": 4.934259176774223e-06, + "loss": 0.6074, + "step": 4901 + }, + { + "epoch": 0.4475077597224758, + "grad_norm": 0.47667619585990906, + "learning_rate": 4.9342319083432155e-06, + "loss": 0.5697, + "step": 4902 + }, + { + "epoch": 0.4475990505751324, + "grad_norm": 0.47435441613197327, + "learning_rate": 4.934204634333466e-06, + "loss": 0.587, + "step": 4903 + }, + { + "epoch": 0.44769034142778896, + "grad_norm": 0.45577704906463623, + "learning_rate": 4.934177354745036e-06, + "loss": 0.5791, + "step": 4904 + }, + { + "epoch": 0.4477816322804455, + "grad_norm": 0.43766260147094727, + "learning_rate": 4.934150069577988e-06, + "loss": 0.6626, + "step": 4905 + }, + { + "epoch": 0.44787292313310206, + "grad_norm": 0.435470312833786, + "learning_rate": 4.934122778832388e-06, + "loss": 0.5748, + "step": 4906 + }, + { + "epoch": 0.44796421398575864, + "grad_norm": 0.4697343111038208, + "learning_rate": 4.934095482508293e-06, + "loss": 0.55, + "step": 4907 + }, + { + "epoch": 0.44805550483841516, + "grad_norm": 0.4881639778614044, + "learning_rate": 4.93406818060577e-06, + "loss": 0.5606, + "step": 4908 + }, + { + "epoch": 0.44814679569107174, + "grad_norm": 0.5200773477554321, + "learning_rate": 4.93404087312488e-06, + "loss": 0.5575, + "step": 4909 + }, + { + "epoch": 0.4482380865437283, + "grad_norm": 0.4831254184246063, + "learning_rate": 4.934013560065685e-06, + "loss": 0.5733, + "step": 4910 + }, + { + "epoch": 0.4483293773963849, + "grad_norm": 0.4714451730251312, + "learning_rate": 4.933986241428248e-06, + "loss": 0.5945, + "step": 4911 + }, + { + "epoch": 0.4484206682490414, + "grad_norm": 0.5004172325134277, + "learning_rate": 4.933958917212632e-06, + "loss": 0.5661, + "step": 4912 + }, + { + "epoch": 0.448511959101698, + "grad_norm": 0.47920477390289307, + "learning_rate": 4.933931587418898e-06, + "loss": 0.5772, + "step": 4913 + }, + { + "epoch": 0.4486032499543546, + "grad_norm": 0.4416753649711609, + "learning_rate": 4.933904252047113e-06, + "loss": 0.649, + "step": 4914 + }, + { + "epoch": 0.44869454080701116, + "grad_norm": 0.4594247341156006, + "learning_rate": 4.9338769110973335e-06, + "loss": 0.6176, + "step": 4915 + }, + { + "epoch": 0.4487858316596677, + "grad_norm": 0.47205713391304016, + "learning_rate": 4.933849564569627e-06, + "loss": 0.5607, + "step": 4916 + }, + { + "epoch": 0.44887712251232426, + "grad_norm": 0.49186649918556213, + "learning_rate": 4.933822212464054e-06, + "loss": 0.563, + "step": 4917 + }, + { + "epoch": 0.44896841336498083, + "grad_norm": 0.4799816906452179, + "learning_rate": 4.933794854780678e-06, + "loss": 0.583, + "step": 4918 + }, + { + "epoch": 0.4490597042176374, + "grad_norm": 0.49253153800964355, + "learning_rate": 4.933767491519561e-06, + "loss": 0.5888, + "step": 4919 + }, + { + "epoch": 0.44915099507029393, + "grad_norm": 0.43053558468818665, + "learning_rate": 4.933740122680767e-06, + "loss": 0.6083, + "step": 4920 + }, + { + "epoch": 0.4492422859229505, + "grad_norm": 0.4892328679561615, + "learning_rate": 4.933712748264356e-06, + "loss": 0.5101, + "step": 4921 + }, + { + "epoch": 0.4493335767756071, + "grad_norm": 0.4894993305206299, + "learning_rate": 4.933685368270394e-06, + "loss": 0.5798, + "step": 4922 + }, + { + "epoch": 0.44942486762826367, + "grad_norm": 0.4728068709373474, + "learning_rate": 4.933657982698942e-06, + "loss": 0.5457, + "step": 4923 + }, + { + "epoch": 0.4495161584809202, + "grad_norm": 0.4741489589214325, + "learning_rate": 4.9336305915500625e-06, + "loss": 0.6311, + "step": 4924 + }, + { + "epoch": 0.44960744933357677, + "grad_norm": 0.49689221382141113, + "learning_rate": 4.933603194823819e-06, + "loss": 0.5504, + "step": 4925 + }, + { + "epoch": 0.44969874018623335, + "grad_norm": 0.507127583026886, + "learning_rate": 4.933575792520275e-06, + "loss": 0.5453, + "step": 4926 + }, + { + "epoch": 0.4497900310388899, + "grad_norm": 0.47395059466362, + "learning_rate": 4.933548384639491e-06, + "loss": 0.5873, + "step": 4927 + }, + { + "epoch": 0.44988132189154645, + "grad_norm": 0.4822642207145691, + "learning_rate": 4.933520971181532e-06, + "loss": 0.5962, + "step": 4928 + }, + { + "epoch": 0.449972612744203, + "grad_norm": 0.48409318923950195, + "learning_rate": 4.93349355214646e-06, + "loss": 0.5737, + "step": 4929 + }, + { + "epoch": 0.4500639035968596, + "grad_norm": 0.47779712080955505, + "learning_rate": 4.933466127534338e-06, + "loss": 0.5914, + "step": 4930 + }, + { + "epoch": 0.4501551944495162, + "grad_norm": 0.4840436279773712, + "learning_rate": 4.933438697345228e-06, + "loss": 0.5796, + "step": 4931 + }, + { + "epoch": 0.4502464853021727, + "grad_norm": 0.4481803774833679, + "learning_rate": 4.933411261579194e-06, + "loss": 0.6295, + "step": 4932 + }, + { + "epoch": 0.4503377761548293, + "grad_norm": 0.43041175603866577, + "learning_rate": 4.933383820236298e-06, + "loss": 0.6059, + "step": 4933 + }, + { + "epoch": 0.45042906700748586, + "grad_norm": 0.45162028074264526, + "learning_rate": 4.933356373316605e-06, + "loss": 0.5606, + "step": 4934 + }, + { + "epoch": 0.45052035786014244, + "grad_norm": 0.4723134934902191, + "learning_rate": 4.933328920820174e-06, + "loss": 0.5853, + "step": 4935 + }, + { + "epoch": 0.45061164871279896, + "grad_norm": 0.4984878599643707, + "learning_rate": 4.9333014627470715e-06, + "loss": 0.5809, + "step": 4936 + }, + { + "epoch": 0.45070293956545554, + "grad_norm": 0.4789973497390747, + "learning_rate": 4.933273999097359e-06, + "loss": 0.5476, + "step": 4937 + }, + { + "epoch": 0.4507942304181121, + "grad_norm": 0.5038356184959412, + "learning_rate": 4.9332465298711e-06, + "loss": 0.5811, + "step": 4938 + }, + { + "epoch": 0.4508855212707687, + "grad_norm": 0.47404828667640686, + "learning_rate": 4.933219055068355e-06, + "loss": 0.5602, + "step": 4939 + }, + { + "epoch": 0.4509768121234252, + "grad_norm": 0.48813310265541077, + "learning_rate": 4.9331915746891915e-06, + "loss": 0.592, + "step": 4940 + }, + { + "epoch": 0.4510681029760818, + "grad_norm": 0.47309234738349915, + "learning_rate": 4.933164088733668e-06, + "loss": 0.606, + "step": 4941 + }, + { + "epoch": 0.4511593938287384, + "grad_norm": 0.49751055240631104, + "learning_rate": 4.93313659720185e-06, + "loss": 0.5549, + "step": 4942 + }, + { + "epoch": 0.4512506846813949, + "grad_norm": 0.49564215540885925, + "learning_rate": 4.933109100093801e-06, + "loss": 0.5645, + "step": 4943 + }, + { + "epoch": 0.4513419755340515, + "grad_norm": 0.4594985544681549, + "learning_rate": 4.933081597409582e-06, + "loss": 0.5399, + "step": 4944 + }, + { + "epoch": 0.45143326638670805, + "grad_norm": 0.49771901965141296, + "learning_rate": 4.933054089149257e-06, + "loss": 0.5639, + "step": 4945 + }, + { + "epoch": 0.45152455723936463, + "grad_norm": 0.5032317042350769, + "learning_rate": 4.933026575312889e-06, + "loss": 0.59, + "step": 4946 + }, + { + "epoch": 0.45161584809202115, + "grad_norm": 0.4872450530529022, + "learning_rate": 4.932999055900541e-06, + "loss": 0.5661, + "step": 4947 + }, + { + "epoch": 0.45170713894467773, + "grad_norm": 0.4732816219329834, + "learning_rate": 4.932971530912277e-06, + "loss": 0.5883, + "step": 4948 + }, + { + "epoch": 0.4517984297973343, + "grad_norm": 0.45904046297073364, + "learning_rate": 4.932944000348159e-06, + "loss": 0.6077, + "step": 4949 + }, + { + "epoch": 0.4518897206499909, + "grad_norm": 0.4509744942188263, + "learning_rate": 4.932916464208249e-06, + "loss": 0.623, + "step": 4950 + }, + { + "epoch": 0.4519810115026474, + "grad_norm": 0.46325230598449707, + "learning_rate": 4.932888922492613e-06, + "loss": 0.6024, + "step": 4951 + }, + { + "epoch": 0.452072302355304, + "grad_norm": 0.5112321972846985, + "learning_rate": 4.932861375201312e-06, + "loss": 0.5319, + "step": 4952 + }, + { + "epoch": 0.45216359320796057, + "grad_norm": 0.5132735371589661, + "learning_rate": 4.93283382233441e-06, + "loss": 0.5544, + "step": 4953 + }, + { + "epoch": 0.45225488406061715, + "grad_norm": 0.482498437166214, + "learning_rate": 4.932806263891969e-06, + "loss": 0.5677, + "step": 4954 + }, + { + "epoch": 0.45234617491327367, + "grad_norm": 0.4810698926448822, + "learning_rate": 4.932778699874054e-06, + "loss": 0.5794, + "step": 4955 + }, + { + "epoch": 0.45243746576593025, + "grad_norm": 0.4880354106426239, + "learning_rate": 4.932751130280726e-06, + "loss": 0.5392, + "step": 4956 + }, + { + "epoch": 0.4525287566185868, + "grad_norm": 0.49883386492729187, + "learning_rate": 4.93272355511205e-06, + "loss": 0.5555, + "step": 4957 + }, + { + "epoch": 0.4526200474712434, + "grad_norm": 0.4927697479724884, + "learning_rate": 4.932695974368088e-06, + "loss": 0.5731, + "step": 4958 + }, + { + "epoch": 0.4527113383238999, + "grad_norm": 0.48779624700546265, + "learning_rate": 4.932668388048905e-06, + "loss": 0.6062, + "step": 4959 + }, + { + "epoch": 0.4528026291765565, + "grad_norm": 0.46810483932495117, + "learning_rate": 4.932640796154562e-06, + "loss": 0.5808, + "step": 4960 + }, + { + "epoch": 0.4528939200292131, + "grad_norm": 0.47715774178504944, + "learning_rate": 4.932613198685123e-06, + "loss": 0.5901, + "step": 4961 + }, + { + "epoch": 0.45298521088186966, + "grad_norm": 0.4439849257469177, + "learning_rate": 4.932585595640652e-06, + "loss": 0.5897, + "step": 4962 + }, + { + "epoch": 0.4530765017345262, + "grad_norm": 0.4852858781814575, + "learning_rate": 4.9325579870212116e-06, + "loss": 0.5965, + "step": 4963 + }, + { + "epoch": 0.45316779258718276, + "grad_norm": 0.4666236639022827, + "learning_rate": 4.932530372826865e-06, + "loss": 0.5922, + "step": 4964 + }, + { + "epoch": 0.45325908343983934, + "grad_norm": 0.4810521900653839, + "learning_rate": 4.932502753057676e-06, + "loss": 0.5669, + "step": 4965 + }, + { + "epoch": 0.4533503742924959, + "grad_norm": 0.4608128070831299, + "learning_rate": 4.932475127713707e-06, + "loss": 0.6146, + "step": 4966 + }, + { + "epoch": 0.45344166514515244, + "grad_norm": 0.48917320370674133, + "learning_rate": 4.932447496795023e-06, + "loss": 0.576, + "step": 4967 + }, + { + "epoch": 0.453532955997809, + "grad_norm": 0.4609353244304657, + "learning_rate": 4.932419860301685e-06, + "loss": 0.5813, + "step": 4968 + }, + { + "epoch": 0.4536242468504656, + "grad_norm": 0.48411616683006287, + "learning_rate": 4.932392218233758e-06, + "loss": 0.5618, + "step": 4969 + }, + { + "epoch": 0.4537155377031222, + "grad_norm": 0.46059855818748474, + "learning_rate": 4.932364570591306e-06, + "loss": 0.5798, + "step": 4970 + }, + { + "epoch": 0.4538068285557787, + "grad_norm": 0.4453516900539398, + "learning_rate": 4.93233691737439e-06, + "loss": 0.5582, + "step": 4971 + }, + { + "epoch": 0.4538981194084353, + "grad_norm": 0.4616798460483551, + "learning_rate": 4.932309258583074e-06, + "loss": 0.6011, + "step": 4972 + }, + { + "epoch": 0.45398941026109185, + "grad_norm": 0.45514392852783203, + "learning_rate": 4.932281594217423e-06, + "loss": 0.6253, + "step": 4973 + }, + { + "epoch": 0.4540807011137484, + "grad_norm": 0.4734249413013458, + "learning_rate": 4.932253924277499e-06, + "loss": 0.5637, + "step": 4974 + }, + { + "epoch": 0.45417199196640495, + "grad_norm": 0.5068597197532654, + "learning_rate": 4.932226248763367e-06, + "loss": 0.5544, + "step": 4975 + }, + { + "epoch": 0.45426328281906153, + "grad_norm": 0.46793878078460693, + "learning_rate": 4.932198567675088e-06, + "loss": 0.5561, + "step": 4976 + }, + { + "epoch": 0.4543545736717181, + "grad_norm": 0.4502808749675751, + "learning_rate": 4.932170881012727e-06, + "loss": 0.612, + "step": 4977 + }, + { + "epoch": 0.45444586452437463, + "grad_norm": 0.48523733019828796, + "learning_rate": 4.932143188776347e-06, + "loss": 0.5867, + "step": 4978 + }, + { + "epoch": 0.4545371553770312, + "grad_norm": 0.5021771192550659, + "learning_rate": 4.932115490966012e-06, + "loss": 0.5349, + "step": 4979 + }, + { + "epoch": 0.4546284462296878, + "grad_norm": 0.5174626111984253, + "learning_rate": 4.932087787581785e-06, + "loss": 0.5694, + "step": 4980 + }, + { + "epoch": 0.45471973708234437, + "grad_norm": 0.45058032870292664, + "learning_rate": 4.93206007862373e-06, + "loss": 0.5772, + "step": 4981 + }, + { + "epoch": 0.4548110279350009, + "grad_norm": 0.4748309552669525, + "learning_rate": 4.9320323640919095e-06, + "loss": 0.589, + "step": 4982 + }, + { + "epoch": 0.45490231878765747, + "grad_norm": 0.4983813166618347, + "learning_rate": 4.932004643986388e-06, + "loss": 0.5547, + "step": 4983 + }, + { + "epoch": 0.45499360964031405, + "grad_norm": 0.48547783493995667, + "learning_rate": 4.931976918307228e-06, + "loss": 0.5792, + "step": 4984 + }, + { + "epoch": 0.4550849004929706, + "grad_norm": 0.46559974551200867, + "learning_rate": 4.931949187054495e-06, + "loss": 0.5712, + "step": 4985 + }, + { + "epoch": 0.45517619134562715, + "grad_norm": 0.47873473167419434, + "learning_rate": 4.931921450228249e-06, + "loss": 0.5528, + "step": 4986 + }, + { + "epoch": 0.4552674821982837, + "grad_norm": 0.4781128466129303, + "learning_rate": 4.931893707828557e-06, + "loss": 0.5549, + "step": 4987 + }, + { + "epoch": 0.4553587730509403, + "grad_norm": 0.4834146499633789, + "learning_rate": 4.931865959855482e-06, + "loss": 0.5454, + "step": 4988 + }, + { + "epoch": 0.4554500639035969, + "grad_norm": 0.48384904861450195, + "learning_rate": 4.931838206309086e-06, + "loss": 0.5661, + "step": 4989 + }, + { + "epoch": 0.4555413547562534, + "grad_norm": 0.4744105041027069, + "learning_rate": 4.931810447189435e-06, + "loss": 0.6236, + "step": 4990 + }, + { + "epoch": 0.45563264560891, + "grad_norm": 0.4704288840293884, + "learning_rate": 4.93178268249659e-06, + "loss": 0.5728, + "step": 4991 + }, + { + "epoch": 0.45572393646156656, + "grad_norm": 0.47202908992767334, + "learning_rate": 4.931754912230616e-06, + "loss": 0.5725, + "step": 4992 + }, + { + "epoch": 0.45581522731422314, + "grad_norm": 0.46887439489364624, + "learning_rate": 4.931727136391576e-06, + "loss": 0.6174, + "step": 4993 + }, + { + "epoch": 0.45590651816687966, + "grad_norm": 0.47393766045570374, + "learning_rate": 4.931699354979535e-06, + "loss": 0.5274, + "step": 4994 + }, + { + "epoch": 0.45599780901953624, + "grad_norm": 0.46979647874832153, + "learning_rate": 4.931671567994555e-06, + "loss": 0.5713, + "step": 4995 + }, + { + "epoch": 0.4560890998721928, + "grad_norm": 0.5086151361465454, + "learning_rate": 4.931643775436701e-06, + "loss": 0.53, + "step": 4996 + }, + { + "epoch": 0.4561803907248494, + "grad_norm": 0.5070989727973938, + "learning_rate": 4.931615977306036e-06, + "loss": 0.5546, + "step": 4997 + }, + { + "epoch": 0.4562716815775059, + "grad_norm": 0.4745875895023346, + "learning_rate": 4.931588173602624e-06, + "loss": 0.5547, + "step": 4998 + }, + { + "epoch": 0.4563629724301625, + "grad_norm": 0.4403659999370575, + "learning_rate": 4.931560364326528e-06, + "loss": 0.6223, + "step": 4999 + }, + { + "epoch": 0.4564542632828191, + "grad_norm": 0.48422497510910034, + "learning_rate": 4.9315325494778125e-06, + "loss": 0.5659, + "step": 5000 + }, + { + "epoch": 0.45654555413547565, + "grad_norm": 0.4556420147418976, + "learning_rate": 4.931504729056542e-06, + "loss": 0.582, + "step": 5001 + }, + { + "epoch": 0.4566368449881322, + "grad_norm": 0.45186203718185425, + "learning_rate": 4.931476903062779e-06, + "loss": 0.6242, + "step": 5002 + }, + { + "epoch": 0.45672813584078875, + "grad_norm": 0.48550623655319214, + "learning_rate": 4.931449071496587e-06, + "loss": 0.599, + "step": 5003 + }, + { + "epoch": 0.45681942669344533, + "grad_norm": 0.4698956608772278, + "learning_rate": 4.93142123435803e-06, + "loss": 0.5543, + "step": 5004 + }, + { + "epoch": 0.4569107175461019, + "grad_norm": 0.49278897047042847, + "learning_rate": 4.931393391647173e-06, + "loss": 0.5703, + "step": 5005 + }, + { + "epoch": 0.45700200839875843, + "grad_norm": 0.47022855281829834, + "learning_rate": 4.931365543364079e-06, + "loss": 0.5846, + "step": 5006 + }, + { + "epoch": 0.457093299251415, + "grad_norm": 0.48133134841918945, + "learning_rate": 4.931337689508811e-06, + "loss": 0.5481, + "step": 5007 + }, + { + "epoch": 0.4571845901040716, + "grad_norm": 0.44253769516944885, + "learning_rate": 4.9313098300814354e-06, + "loss": 0.6034, + "step": 5008 + }, + { + "epoch": 0.4572758809567281, + "grad_norm": 0.48067620396614075, + "learning_rate": 4.931281965082013e-06, + "loss": 0.5759, + "step": 5009 + }, + { + "epoch": 0.4573671718093847, + "grad_norm": 0.48138588666915894, + "learning_rate": 4.9312540945106095e-06, + "loss": 0.5394, + "step": 5010 + }, + { + "epoch": 0.45745846266204127, + "grad_norm": 0.4554751217365265, + "learning_rate": 4.931226218367287e-06, + "loss": 0.5291, + "step": 5011 + }, + { + "epoch": 0.45754975351469784, + "grad_norm": 0.47287172079086304, + "learning_rate": 4.931198336652112e-06, + "loss": 0.6204, + "step": 5012 + }, + { + "epoch": 0.45764104436735437, + "grad_norm": 0.45392003655433655, + "learning_rate": 4.931170449365147e-06, + "loss": 0.5846, + "step": 5013 + }, + { + "epoch": 0.45773233522001094, + "grad_norm": 0.47983989119529724, + "learning_rate": 4.931142556506455e-06, + "loss": 0.5829, + "step": 5014 + }, + { + "epoch": 0.4578236260726675, + "grad_norm": 0.4806828796863556, + "learning_rate": 4.931114658076101e-06, + "loss": 0.5356, + "step": 5015 + }, + { + "epoch": 0.4579149169253241, + "grad_norm": 0.509510338306427, + "learning_rate": 4.931086754074149e-06, + "loss": 0.5679, + "step": 5016 + }, + { + "epoch": 0.4580062077779806, + "grad_norm": 0.4730863571166992, + "learning_rate": 4.931058844500664e-06, + "loss": 0.5875, + "step": 5017 + }, + { + "epoch": 0.4580974986306372, + "grad_norm": 0.49250951409339905, + "learning_rate": 4.931030929355707e-06, + "loss": 0.6196, + "step": 5018 + }, + { + "epoch": 0.4581887894832938, + "grad_norm": 0.4709804654121399, + "learning_rate": 4.9310030086393445e-06, + "loss": 0.6052, + "step": 5019 + }, + { + "epoch": 0.45828008033595036, + "grad_norm": 0.4925796687602997, + "learning_rate": 4.93097508235164e-06, + "loss": 0.5703, + "step": 5020 + }, + { + "epoch": 0.4583713711886069, + "grad_norm": 0.47790536284446716, + "learning_rate": 4.930947150492656e-06, + "loss": 0.6325, + "step": 5021 + }, + { + "epoch": 0.45846266204126346, + "grad_norm": 0.47610238194465637, + "learning_rate": 4.930919213062458e-06, + "loss": 0.5871, + "step": 5022 + }, + { + "epoch": 0.45855395289392004, + "grad_norm": 0.4615597128868103, + "learning_rate": 4.9308912700611104e-06, + "loss": 0.6225, + "step": 5023 + }, + { + "epoch": 0.4586452437465766, + "grad_norm": 0.46420085430145264, + "learning_rate": 4.930863321488676e-06, + "loss": 0.5988, + "step": 5024 + }, + { + "epoch": 0.45873653459923314, + "grad_norm": 0.4526638388633728, + "learning_rate": 4.930835367345219e-06, + "loss": 0.5831, + "step": 5025 + }, + { + "epoch": 0.4588278254518897, + "grad_norm": 0.4866856336593628, + "learning_rate": 4.930807407630805e-06, + "loss": 0.5726, + "step": 5026 + }, + { + "epoch": 0.4589191163045463, + "grad_norm": 0.49938052892684937, + "learning_rate": 4.930779442345496e-06, + "loss": 0.6205, + "step": 5027 + }, + { + "epoch": 0.45901040715720287, + "grad_norm": 0.4768795967102051, + "learning_rate": 4.930751471489358e-06, + "loss": 0.6451, + "step": 5028 + }, + { + "epoch": 0.4591016980098594, + "grad_norm": 0.5103341937065125, + "learning_rate": 4.930723495062454e-06, + "loss": 0.5504, + "step": 5029 + }, + { + "epoch": 0.45919298886251597, + "grad_norm": 0.4822559058666229, + "learning_rate": 4.930695513064847e-06, + "loss": 0.579, + "step": 5030 + }, + { + "epoch": 0.45928427971517255, + "grad_norm": 0.46646878123283386, + "learning_rate": 4.930667525496603e-06, + "loss": 0.5739, + "step": 5031 + }, + { + "epoch": 0.45937557056782913, + "grad_norm": 0.485766738653183, + "learning_rate": 4.930639532357786e-06, + "loss": 0.5772, + "step": 5032 + }, + { + "epoch": 0.45946686142048565, + "grad_norm": 0.43160971999168396, + "learning_rate": 4.93061153364846e-06, + "loss": 0.5862, + "step": 5033 + }, + { + "epoch": 0.45955815227314223, + "grad_norm": 0.4589519798755646, + "learning_rate": 4.930583529368688e-06, + "loss": 0.5655, + "step": 5034 + }, + { + "epoch": 0.4596494431257988, + "grad_norm": 0.4727303981781006, + "learning_rate": 4.930555519518535e-06, + "loss": 0.6083, + "step": 5035 + }, + { + "epoch": 0.4597407339784554, + "grad_norm": 0.4650692939758301, + "learning_rate": 4.930527504098065e-06, + "loss": 0.6064, + "step": 5036 + }, + { + "epoch": 0.4598320248311119, + "grad_norm": 0.5039564967155457, + "learning_rate": 4.930499483107343e-06, + "loss": 0.5607, + "step": 5037 + }, + { + "epoch": 0.4599233156837685, + "grad_norm": 0.49281731247901917, + "learning_rate": 4.930471456546433e-06, + "loss": 0.5725, + "step": 5038 + }, + { + "epoch": 0.46001460653642506, + "grad_norm": 0.45345208048820496, + "learning_rate": 4.930443424415398e-06, + "loss": 0.608, + "step": 5039 + }, + { + "epoch": 0.46010589738908164, + "grad_norm": 0.4940662980079651, + "learning_rate": 4.930415386714304e-06, + "loss": 0.5673, + "step": 5040 + }, + { + "epoch": 0.46019718824173816, + "grad_norm": 0.46368125081062317, + "learning_rate": 4.930387343443214e-06, + "loss": 0.5762, + "step": 5041 + }, + { + "epoch": 0.46028847909439474, + "grad_norm": 0.4679025113582611, + "learning_rate": 4.9303592946021915e-06, + "loss": 0.5889, + "step": 5042 + }, + { + "epoch": 0.4603797699470513, + "grad_norm": 0.4273339509963989, + "learning_rate": 4.930331240191303e-06, + "loss": 0.5918, + "step": 5043 + }, + { + "epoch": 0.46047106079970784, + "grad_norm": 0.470538854598999, + "learning_rate": 4.930303180210612e-06, + "loss": 0.5668, + "step": 5044 + }, + { + "epoch": 0.4605623516523644, + "grad_norm": 0.47438865900039673, + "learning_rate": 4.930275114660183e-06, + "loss": 0.5534, + "step": 5045 + }, + { + "epoch": 0.460653642505021, + "grad_norm": 0.4657011330127716, + "learning_rate": 4.930247043540079e-06, + "loss": 0.5631, + "step": 5046 + }, + { + "epoch": 0.4607449333576776, + "grad_norm": 0.44969844818115234, + "learning_rate": 4.9302189668503646e-06, + "loss": 0.5969, + "step": 5047 + }, + { + "epoch": 0.4608362242103341, + "grad_norm": 0.4597906172275543, + "learning_rate": 4.930190884591105e-06, + "loss": 0.6373, + "step": 5048 + }, + { + "epoch": 0.4609275150629907, + "grad_norm": 0.4782516360282898, + "learning_rate": 4.930162796762365e-06, + "loss": 0.5772, + "step": 5049 + }, + { + "epoch": 0.46101880591564726, + "grad_norm": 0.48578742146492004, + "learning_rate": 4.930134703364208e-06, + "loss": 0.5995, + "step": 5050 + }, + { + "epoch": 0.46111009676830383, + "grad_norm": 0.4949983060359955, + "learning_rate": 4.930106604396698e-06, + "loss": 0.5794, + "step": 5051 + }, + { + "epoch": 0.46120138762096036, + "grad_norm": 0.47610846161842346, + "learning_rate": 4.930078499859901e-06, + "loss": 0.5784, + "step": 5052 + }, + { + "epoch": 0.46129267847361693, + "grad_norm": 0.4815850853919983, + "learning_rate": 4.93005038975388e-06, + "loss": 0.5816, + "step": 5053 + }, + { + "epoch": 0.4613839693262735, + "grad_norm": 0.4755914807319641, + "learning_rate": 4.9300222740787e-06, + "loss": 0.5602, + "step": 5054 + }, + { + "epoch": 0.4614752601789301, + "grad_norm": 0.460068941116333, + "learning_rate": 4.929994152834425e-06, + "loss": 0.5695, + "step": 5055 + }, + { + "epoch": 0.4615665510315866, + "grad_norm": 0.4517327845096588, + "learning_rate": 4.92996602602112e-06, + "loss": 0.5792, + "step": 5056 + }, + { + "epoch": 0.4616578418842432, + "grad_norm": 0.47270023822784424, + "learning_rate": 4.929937893638849e-06, + "loss": 0.5555, + "step": 5057 + }, + { + "epoch": 0.46174913273689977, + "grad_norm": 0.4792550206184387, + "learning_rate": 4.929909755687677e-06, + "loss": 0.5872, + "step": 5058 + }, + { + "epoch": 0.46184042358955635, + "grad_norm": 0.5008746981620789, + "learning_rate": 4.929881612167668e-06, + "loss": 0.531, + "step": 5059 + }, + { + "epoch": 0.46193171444221287, + "grad_norm": 0.47017553448677063, + "learning_rate": 4.929853463078886e-06, + "loss": 0.6432, + "step": 5060 + }, + { + "epoch": 0.46202300529486945, + "grad_norm": 0.4618488550186157, + "learning_rate": 4.929825308421398e-06, + "loss": 0.6091, + "step": 5061 + }, + { + "epoch": 0.462114296147526, + "grad_norm": 0.521929919719696, + "learning_rate": 4.929797148195266e-06, + "loss": 0.5795, + "step": 5062 + }, + { + "epoch": 0.4622055870001826, + "grad_norm": 0.47608646750450134, + "learning_rate": 4.929768982400554e-06, + "loss": 0.5814, + "step": 5063 + }, + { + "epoch": 0.4622968778528391, + "grad_norm": 0.4988481402397156, + "learning_rate": 4.929740811037328e-06, + "loss": 0.5734, + "step": 5064 + }, + { + "epoch": 0.4623881687054957, + "grad_norm": 0.48273974657058716, + "learning_rate": 4.929712634105653e-06, + "loss": 0.5492, + "step": 5065 + }, + { + "epoch": 0.4624794595581523, + "grad_norm": 0.4821205735206604, + "learning_rate": 4.929684451605593e-06, + "loss": 0.5344, + "step": 5066 + }, + { + "epoch": 0.46257075041080886, + "grad_norm": 0.4756591320037842, + "learning_rate": 4.929656263537213e-06, + "loss": 0.5541, + "step": 5067 + }, + { + "epoch": 0.4626620412634654, + "grad_norm": 0.46389326453208923, + "learning_rate": 4.929628069900576e-06, + "loss": 0.5611, + "step": 5068 + }, + { + "epoch": 0.46275333211612196, + "grad_norm": 0.5014867782592773, + "learning_rate": 4.9295998706957486e-06, + "loss": 0.5352, + "step": 5069 + }, + { + "epoch": 0.46284462296877854, + "grad_norm": 0.4689602553844452, + "learning_rate": 4.929571665922794e-06, + "loss": 0.5802, + "step": 5070 + }, + { + "epoch": 0.4629359138214351, + "grad_norm": 0.4751029312610626, + "learning_rate": 4.9295434555817776e-06, + "loss": 0.6019, + "step": 5071 + }, + { + "epoch": 0.46302720467409164, + "grad_norm": 0.4210680425167084, + "learning_rate": 4.929515239672764e-06, + "loss": 0.604, + "step": 5072 + }, + { + "epoch": 0.4631184955267482, + "grad_norm": 0.4623096287250519, + "learning_rate": 4.929487018195818e-06, + "loss": 0.6088, + "step": 5073 + }, + { + "epoch": 0.4632097863794048, + "grad_norm": 0.4704301357269287, + "learning_rate": 4.929458791151003e-06, + "loss": 0.584, + "step": 5074 + }, + { + "epoch": 0.4633010772320613, + "grad_norm": 0.45055070519447327, + "learning_rate": 4.929430558538386e-06, + "loss": 0.5925, + "step": 5075 + }, + { + "epoch": 0.4633923680847179, + "grad_norm": 0.4581119418144226, + "learning_rate": 4.9294023203580295e-06, + "loss": 0.5731, + "step": 5076 + }, + { + "epoch": 0.4634836589373745, + "grad_norm": 0.5371304154396057, + "learning_rate": 4.929374076609999e-06, + "loss": 0.5656, + "step": 5077 + }, + { + "epoch": 0.46357494979003105, + "grad_norm": 0.4263966679573059, + "learning_rate": 4.92934582729436e-06, + "loss": 0.5851, + "step": 5078 + }, + { + "epoch": 0.4636662406426876, + "grad_norm": 0.4884660840034485, + "learning_rate": 4.929317572411176e-06, + "loss": 0.6033, + "step": 5079 + }, + { + "epoch": 0.46375753149534416, + "grad_norm": 0.5304703116416931, + "learning_rate": 4.929289311960512e-06, + "loss": 0.5616, + "step": 5080 + }, + { + "epoch": 0.46384882234800073, + "grad_norm": 0.4867529273033142, + "learning_rate": 4.929261045942434e-06, + "loss": 0.5724, + "step": 5081 + }, + { + "epoch": 0.4639401132006573, + "grad_norm": 0.4927726984024048, + "learning_rate": 4.929232774357006e-06, + "loss": 0.5636, + "step": 5082 + }, + { + "epoch": 0.46403140405331383, + "grad_norm": 0.4587337076663971, + "learning_rate": 4.9292044972042915e-06, + "loss": 0.5731, + "step": 5083 + }, + { + "epoch": 0.4641226949059704, + "grad_norm": 0.47571656107902527, + "learning_rate": 4.929176214484358e-06, + "loss": 0.5961, + "step": 5084 + }, + { + "epoch": 0.464213985758627, + "grad_norm": 0.446674108505249, + "learning_rate": 4.9291479261972676e-06, + "loss": 0.5639, + "step": 5085 + }, + { + "epoch": 0.46430527661128357, + "grad_norm": 0.46786171197891235, + "learning_rate": 4.9291196323430865e-06, + "loss": 0.5743, + "step": 5086 + }, + { + "epoch": 0.4643965674639401, + "grad_norm": 0.466828852891922, + "learning_rate": 4.92909133292188e-06, + "loss": 0.5799, + "step": 5087 + }, + { + "epoch": 0.46448785831659667, + "grad_norm": 0.46476471424102783, + "learning_rate": 4.929063027933711e-06, + "loss": 0.5858, + "step": 5088 + }, + { + "epoch": 0.46457914916925325, + "grad_norm": 0.490604043006897, + "learning_rate": 4.929034717378647e-06, + "loss": 0.5561, + "step": 5089 + }, + { + "epoch": 0.4646704400219098, + "grad_norm": 0.48326098918914795, + "learning_rate": 4.9290064012567505e-06, + "loss": 0.5577, + "step": 5090 + }, + { + "epoch": 0.46476173087456635, + "grad_norm": 0.5087090134620667, + "learning_rate": 4.928978079568088e-06, + "loss": 0.5957, + "step": 5091 + }, + { + "epoch": 0.4648530217272229, + "grad_norm": 0.4957529306411743, + "learning_rate": 4.928949752312725e-06, + "loss": 0.5941, + "step": 5092 + }, + { + "epoch": 0.4649443125798795, + "grad_norm": 0.5147501826286316, + "learning_rate": 4.9289214194907245e-06, + "loss": 0.5254, + "step": 5093 + }, + { + "epoch": 0.4650356034325361, + "grad_norm": 0.4618631601333618, + "learning_rate": 4.9288930811021515e-06, + "loss": 0.5742, + "step": 5094 + }, + { + "epoch": 0.4651268942851926, + "grad_norm": 0.4708406925201416, + "learning_rate": 4.928864737147072e-06, + "loss": 0.5914, + "step": 5095 + }, + { + "epoch": 0.4652181851378492, + "grad_norm": 0.48741379380226135, + "learning_rate": 4.928836387625551e-06, + "loss": 0.5843, + "step": 5096 + }, + { + "epoch": 0.46530947599050576, + "grad_norm": 0.5163028836250305, + "learning_rate": 4.928808032537653e-06, + "loss": 0.5269, + "step": 5097 + }, + { + "epoch": 0.46540076684316234, + "grad_norm": 0.47937437891960144, + "learning_rate": 4.9287796718834424e-06, + "loss": 0.5965, + "step": 5098 + }, + { + "epoch": 0.46549205769581886, + "grad_norm": 0.46755433082580566, + "learning_rate": 4.928751305662985e-06, + "loss": 0.5955, + "step": 5099 + }, + { + "epoch": 0.46558334854847544, + "grad_norm": 0.43846502900123596, + "learning_rate": 4.928722933876346e-06, + "loss": 0.57, + "step": 5100 + }, + { + "epoch": 0.465674639401132, + "grad_norm": 0.4677732288837433, + "learning_rate": 4.928694556523591e-06, + "loss": 0.6014, + "step": 5101 + }, + { + "epoch": 0.4657659302537886, + "grad_norm": 0.4596847593784332, + "learning_rate": 4.928666173604784e-06, + "loss": 0.6071, + "step": 5102 + }, + { + "epoch": 0.4658572211064451, + "grad_norm": 0.44049930572509766, + "learning_rate": 4.92863778511999e-06, + "loss": 0.5963, + "step": 5103 + }, + { + "epoch": 0.4659485119591017, + "grad_norm": 0.48516613245010376, + "learning_rate": 4.928609391069273e-06, + "loss": 0.5463, + "step": 5104 + }, + { + "epoch": 0.4660398028117583, + "grad_norm": 0.4930681884288788, + "learning_rate": 4.928580991452701e-06, + "loss": 0.5563, + "step": 5105 + }, + { + "epoch": 0.46613109366441485, + "grad_norm": 0.4664612114429474, + "learning_rate": 4.928552586270336e-06, + "loss": 0.5844, + "step": 5106 + }, + { + "epoch": 0.4662223845170714, + "grad_norm": 0.49238935112953186, + "learning_rate": 4.928524175522245e-06, + "loss": 0.581, + "step": 5107 + }, + { + "epoch": 0.46631367536972795, + "grad_norm": 0.47995325922966003, + "learning_rate": 4.928495759208493e-06, + "loss": 0.6033, + "step": 5108 + }, + { + "epoch": 0.46640496622238453, + "grad_norm": 0.4992028772830963, + "learning_rate": 4.928467337329145e-06, + "loss": 0.5954, + "step": 5109 + }, + { + "epoch": 0.46649625707504105, + "grad_norm": 0.49786680936813354, + "learning_rate": 4.928438909884265e-06, + "loss": 0.5546, + "step": 5110 + }, + { + "epoch": 0.46658754792769763, + "grad_norm": 0.47016456723213196, + "learning_rate": 4.928410476873919e-06, + "loss": 0.6116, + "step": 5111 + }, + { + "epoch": 0.4666788387803542, + "grad_norm": 0.5052571892738342, + "learning_rate": 4.928382038298173e-06, + "loss": 0.5495, + "step": 5112 + }, + { + "epoch": 0.4667701296330108, + "grad_norm": 0.46183064579963684, + "learning_rate": 4.928353594157091e-06, + "loss": 0.555, + "step": 5113 + }, + { + "epoch": 0.4668614204856673, + "grad_norm": 0.47915858030319214, + "learning_rate": 4.928325144450739e-06, + "loss": 0.5739, + "step": 5114 + }, + { + "epoch": 0.4669527113383239, + "grad_norm": 0.49400198459625244, + "learning_rate": 4.9282966891791806e-06, + "loss": 0.5281, + "step": 5115 + }, + { + "epoch": 0.46704400219098047, + "grad_norm": 0.4601491391658783, + "learning_rate": 4.928268228342483e-06, + "loss": 0.5554, + "step": 5116 + }, + { + "epoch": 0.46713529304363705, + "grad_norm": 0.4821004867553711, + "learning_rate": 4.928239761940711e-06, + "loss": 0.5703, + "step": 5117 + }, + { + "epoch": 0.46722658389629357, + "grad_norm": 0.4824400544166565, + "learning_rate": 4.928211289973929e-06, + "loss": 0.5439, + "step": 5118 + }, + { + "epoch": 0.46731787474895015, + "grad_norm": 0.4752098321914673, + "learning_rate": 4.9281828124422024e-06, + "loss": 0.5907, + "step": 5119 + }, + { + "epoch": 0.4674091656016067, + "grad_norm": 0.4522002041339874, + "learning_rate": 4.9281543293455966e-06, + "loss": 0.583, + "step": 5120 + }, + { + "epoch": 0.4675004564542633, + "grad_norm": 0.4433949589729309, + "learning_rate": 4.928125840684178e-06, + "loss": 0.5898, + "step": 5121 + }, + { + "epoch": 0.4675917473069198, + "grad_norm": 0.5016204714775085, + "learning_rate": 4.92809734645801e-06, + "loss": 0.5343, + "step": 5122 + }, + { + "epoch": 0.4676830381595764, + "grad_norm": 0.46159690618515015, + "learning_rate": 4.92806884666716e-06, + "loss": 0.558, + "step": 5123 + }, + { + "epoch": 0.467774329012233, + "grad_norm": 0.5087206363677979, + "learning_rate": 4.928040341311691e-06, + "loss": 0.5276, + "step": 5124 + }, + { + "epoch": 0.46786561986488956, + "grad_norm": 0.49968159198760986, + "learning_rate": 4.928011830391669e-06, + "loss": 0.5969, + "step": 5125 + }, + { + "epoch": 0.4679569107175461, + "grad_norm": 0.4708743095397949, + "learning_rate": 4.927983313907161e-06, + "loss": 0.5669, + "step": 5126 + }, + { + "epoch": 0.46804820157020266, + "grad_norm": 0.4700290858745575, + "learning_rate": 4.927954791858231e-06, + "loss": 0.5694, + "step": 5127 + }, + { + "epoch": 0.46813949242285924, + "grad_norm": 0.47699248790740967, + "learning_rate": 4.927926264244944e-06, + "loss": 0.5836, + "step": 5128 + }, + { + "epoch": 0.4682307832755158, + "grad_norm": 0.46856439113616943, + "learning_rate": 4.9278977310673664e-06, + "loss": 0.592, + "step": 5129 + }, + { + "epoch": 0.46832207412817234, + "grad_norm": 0.4701579213142395, + "learning_rate": 4.927869192325563e-06, + "loss": 0.6016, + "step": 5130 + }, + { + "epoch": 0.4684133649808289, + "grad_norm": 0.502196729183197, + "learning_rate": 4.927840648019599e-06, + "loss": 0.5481, + "step": 5131 + }, + { + "epoch": 0.4685046558334855, + "grad_norm": 0.4843573272228241, + "learning_rate": 4.927812098149541e-06, + "loss": 0.5484, + "step": 5132 + }, + { + "epoch": 0.4685959466861421, + "grad_norm": 0.4593832194805145, + "learning_rate": 4.927783542715453e-06, + "loss": 0.5933, + "step": 5133 + }, + { + "epoch": 0.4686872375387986, + "grad_norm": 0.4647672772407532, + "learning_rate": 4.927754981717401e-06, + "loss": 0.5863, + "step": 5134 + }, + { + "epoch": 0.4687785283914552, + "grad_norm": 0.46644461154937744, + "learning_rate": 4.92772641515545e-06, + "loss": 0.5513, + "step": 5135 + }, + { + "epoch": 0.46886981924411175, + "grad_norm": 0.4796525239944458, + "learning_rate": 4.927697843029667e-06, + "loss": 0.5611, + "step": 5136 + }, + { + "epoch": 0.46896111009676833, + "grad_norm": 0.4592292904853821, + "learning_rate": 4.927669265340115e-06, + "loss": 0.5289, + "step": 5137 + }, + { + "epoch": 0.46905240094942485, + "grad_norm": 0.48051437735557556, + "learning_rate": 4.9276406820868616e-06, + "loss": 0.5629, + "step": 5138 + }, + { + "epoch": 0.46914369180208143, + "grad_norm": 0.5041959285736084, + "learning_rate": 4.927612093269972e-06, + "loss": 0.5861, + "step": 5139 + }, + { + "epoch": 0.469234982654738, + "grad_norm": 0.4981286823749542, + "learning_rate": 4.927583498889511e-06, + "loss": 0.5588, + "step": 5140 + }, + { + "epoch": 0.46932627350739453, + "grad_norm": 0.49276190996170044, + "learning_rate": 4.927554898945544e-06, + "loss": 0.5782, + "step": 5141 + }, + { + "epoch": 0.4694175643600511, + "grad_norm": 0.4463717043399811, + "learning_rate": 4.927526293438137e-06, + "loss": 0.6174, + "step": 5142 + }, + { + "epoch": 0.4695088552127077, + "grad_norm": 0.5002493262290955, + "learning_rate": 4.927497682367357e-06, + "loss": 0.5471, + "step": 5143 + }, + { + "epoch": 0.46960014606536427, + "grad_norm": 0.49747031927108765, + "learning_rate": 4.927469065733267e-06, + "loss": 0.5727, + "step": 5144 + }, + { + "epoch": 0.4696914369180208, + "grad_norm": 0.49752935767173767, + "learning_rate": 4.927440443535934e-06, + "loss": 0.5449, + "step": 5145 + }, + { + "epoch": 0.46978272777067737, + "grad_norm": 0.468853622674942, + "learning_rate": 4.927411815775423e-06, + "loss": 0.6124, + "step": 5146 + }, + { + "epoch": 0.46987401862333394, + "grad_norm": 0.5083276033401489, + "learning_rate": 4.9273831824517996e-06, + "loss": 0.537, + "step": 5147 + }, + { + "epoch": 0.4699653094759905, + "grad_norm": 0.49282607436180115, + "learning_rate": 4.927354543565131e-06, + "loss": 0.577, + "step": 5148 + }, + { + "epoch": 0.47005660032864705, + "grad_norm": 0.48737940192222595, + "learning_rate": 4.9273258991154805e-06, + "loss": 0.5967, + "step": 5149 + }, + { + "epoch": 0.4701478911813036, + "grad_norm": 0.4675006866455078, + "learning_rate": 4.927297249102914e-06, + "loss": 0.5835, + "step": 5150 + }, + { + "epoch": 0.4702391820339602, + "grad_norm": 0.5061266422271729, + "learning_rate": 4.9272685935275e-06, + "loss": 0.5655, + "step": 5151 + }, + { + "epoch": 0.4703304728866168, + "grad_norm": 0.4924914538860321, + "learning_rate": 4.927239932389301e-06, + "loss": 0.5682, + "step": 5152 + }, + { + "epoch": 0.4704217637392733, + "grad_norm": 0.49228033423423767, + "learning_rate": 4.927211265688384e-06, + "loss": 0.5633, + "step": 5153 + }, + { + "epoch": 0.4705130545919299, + "grad_norm": 0.5081554651260376, + "learning_rate": 4.927182593424815e-06, + "loss": 0.5615, + "step": 5154 + }, + { + "epoch": 0.47060434544458646, + "grad_norm": 0.4877113699913025, + "learning_rate": 4.927153915598659e-06, + "loss": 0.5642, + "step": 5155 + }, + { + "epoch": 0.47069563629724304, + "grad_norm": 0.43586307764053345, + "learning_rate": 4.927125232209982e-06, + "loss": 0.6323, + "step": 5156 + }, + { + "epoch": 0.47078692714989956, + "grad_norm": 0.45308300852775574, + "learning_rate": 4.9270965432588495e-06, + "loss": 0.5867, + "step": 5157 + }, + { + "epoch": 0.47087821800255614, + "grad_norm": 0.5024036169052124, + "learning_rate": 4.927067848745327e-06, + "loss": 0.5612, + "step": 5158 + }, + { + "epoch": 0.4709695088552127, + "grad_norm": 0.46593761444091797, + "learning_rate": 4.927039148669481e-06, + "loss": 0.5678, + "step": 5159 + }, + { + "epoch": 0.4710607997078693, + "grad_norm": 0.4655308723449707, + "learning_rate": 4.927010443031378e-06, + "loss": 0.6135, + "step": 5160 + }, + { + "epoch": 0.4711520905605258, + "grad_norm": 0.49326974153518677, + "learning_rate": 4.926981731831082e-06, + "loss": 0.5801, + "step": 5161 + }, + { + "epoch": 0.4712433814131824, + "grad_norm": 0.43092045187950134, + "learning_rate": 4.9269530150686596e-06, + "loss": 0.6241, + "step": 5162 + }, + { + "epoch": 0.47133467226583897, + "grad_norm": 0.4890061616897583, + "learning_rate": 4.926924292744176e-06, + "loss": 0.5661, + "step": 5163 + }, + { + "epoch": 0.47142596311849555, + "grad_norm": 0.4900708496570587, + "learning_rate": 4.9268955648576986e-06, + "loss": 0.5492, + "step": 5164 + }, + { + "epoch": 0.4715172539711521, + "grad_norm": 0.5035121440887451, + "learning_rate": 4.926866831409291e-06, + "loss": 0.5474, + "step": 5165 + }, + { + "epoch": 0.47160854482380865, + "grad_norm": 0.48915669322013855, + "learning_rate": 4.926838092399022e-06, + "loss": 0.5793, + "step": 5166 + }, + { + "epoch": 0.47169983567646523, + "grad_norm": 0.5062595009803772, + "learning_rate": 4.926809347826955e-06, + "loss": 0.5949, + "step": 5167 + }, + { + "epoch": 0.4717911265291218, + "grad_norm": 0.45964670181274414, + "learning_rate": 4.926780597693156e-06, + "loss": 0.586, + "step": 5168 + }, + { + "epoch": 0.47188241738177833, + "grad_norm": 0.47706305980682373, + "learning_rate": 4.926751841997692e-06, + "loss": 0.5624, + "step": 5169 + }, + { + "epoch": 0.4719737082344349, + "grad_norm": 0.4943385422229767, + "learning_rate": 4.926723080740628e-06, + "loss": 0.5527, + "step": 5170 + }, + { + "epoch": 0.4720649990870915, + "grad_norm": 0.44396594166755676, + "learning_rate": 4.9266943139220316e-06, + "loss": 0.5811, + "step": 5171 + }, + { + "epoch": 0.47215628993974806, + "grad_norm": 0.48857760429382324, + "learning_rate": 4.926665541541967e-06, + "loss": 0.5347, + "step": 5172 + }, + { + "epoch": 0.4722475807924046, + "grad_norm": 0.4572940766811371, + "learning_rate": 4.926636763600501e-06, + "loss": 0.5717, + "step": 5173 + }, + { + "epoch": 0.47233887164506116, + "grad_norm": 0.45112136006355286, + "learning_rate": 4.926607980097698e-06, + "loss": 0.5727, + "step": 5174 + }, + { + "epoch": 0.47243016249771774, + "grad_norm": 0.48389342427253723, + "learning_rate": 4.9265791910336255e-06, + "loss": 0.5371, + "step": 5175 + }, + { + "epoch": 0.47252145335037427, + "grad_norm": 0.4804544150829315, + "learning_rate": 4.926550396408349e-06, + "loss": 0.6141, + "step": 5176 + }, + { + "epoch": 0.47261274420303084, + "grad_norm": 0.47760123014450073, + "learning_rate": 4.9265215962219355e-06, + "loss": 0.5519, + "step": 5177 + }, + { + "epoch": 0.4727040350556874, + "grad_norm": 0.48546329140663147, + "learning_rate": 4.92649279047445e-06, + "loss": 0.5885, + "step": 5178 + }, + { + "epoch": 0.472795325908344, + "grad_norm": 0.48838597536087036, + "learning_rate": 4.9264639791659575e-06, + "loss": 0.5783, + "step": 5179 + }, + { + "epoch": 0.4728866167610005, + "grad_norm": 0.4560226798057556, + "learning_rate": 4.926435162296525e-06, + "loss": 0.5723, + "step": 5180 + }, + { + "epoch": 0.4729779076136571, + "grad_norm": 0.42062631249427795, + "learning_rate": 4.92640633986622e-06, + "loss": 0.5769, + "step": 5181 + }, + { + "epoch": 0.4730691984663137, + "grad_norm": 0.4982585906982422, + "learning_rate": 4.926377511875107e-06, + "loss": 0.5677, + "step": 5182 + }, + { + "epoch": 0.47316048931897026, + "grad_norm": 0.48595547676086426, + "learning_rate": 4.926348678323253e-06, + "loss": 0.6029, + "step": 5183 + }, + { + "epoch": 0.4732517801716268, + "grad_norm": 0.48033004999160767, + "learning_rate": 4.926319839210722e-06, + "loss": 0.545, + "step": 5184 + }, + { + "epoch": 0.47334307102428336, + "grad_norm": 0.4605247378349304, + "learning_rate": 4.926290994537582e-06, + "loss": 0.5781, + "step": 5185 + }, + { + "epoch": 0.47343436187693994, + "grad_norm": 0.4917616546154022, + "learning_rate": 4.926262144303898e-06, + "loss": 0.5515, + "step": 5186 + }, + { + "epoch": 0.4735256527295965, + "grad_norm": 0.5149849057197571, + "learning_rate": 4.926233288509738e-06, + "loss": 0.6022, + "step": 5187 + }, + { + "epoch": 0.47361694358225304, + "grad_norm": 0.4436425268650055, + "learning_rate": 4.926204427155166e-06, + "loss": 0.6435, + "step": 5188 + }, + { + "epoch": 0.4737082344349096, + "grad_norm": 0.5223186016082764, + "learning_rate": 4.92617556024025e-06, + "loss": 0.5353, + "step": 5189 + }, + { + "epoch": 0.4737995252875662, + "grad_norm": 0.48245882987976074, + "learning_rate": 4.9261466877650535e-06, + "loss": 0.5959, + "step": 5190 + }, + { + "epoch": 0.47389081614022277, + "grad_norm": 0.4796672761440277, + "learning_rate": 4.926117809729646e-06, + "loss": 0.5889, + "step": 5191 + }, + { + "epoch": 0.4739821069928793, + "grad_norm": 0.45183780789375305, + "learning_rate": 4.92608892613409e-06, + "loss": 0.6227, + "step": 5192 + }, + { + "epoch": 0.47407339784553587, + "grad_norm": 0.456427663564682, + "learning_rate": 4.926060036978456e-06, + "loss": 0.575, + "step": 5193 + }, + { + "epoch": 0.47416468869819245, + "grad_norm": 0.48802003264427185, + "learning_rate": 4.926031142262806e-06, + "loss": 0.5499, + "step": 5194 + }, + { + "epoch": 0.474255979550849, + "grad_norm": 0.46184441447257996, + "learning_rate": 4.926002241987209e-06, + "loss": 0.611, + "step": 5195 + }, + { + "epoch": 0.47434727040350555, + "grad_norm": 0.47873714566230774, + "learning_rate": 4.9259733361517305e-06, + "loss": 0.5556, + "step": 5196 + }, + { + "epoch": 0.47443856125616213, + "grad_norm": 0.4632604420185089, + "learning_rate": 4.925944424756437e-06, + "loss": 0.5853, + "step": 5197 + }, + { + "epoch": 0.4745298521088187, + "grad_norm": 0.5108810067176819, + "learning_rate": 4.925915507801393e-06, + "loss": 0.6014, + "step": 5198 + }, + { + "epoch": 0.4746211429614753, + "grad_norm": 0.4884178042411804, + "learning_rate": 4.925886585286667e-06, + "loss": 0.5842, + "step": 5199 + }, + { + "epoch": 0.4747124338141318, + "grad_norm": 0.4422912001609802, + "learning_rate": 4.925857657212325e-06, + "loss": 0.5792, + "step": 5200 + }, + { + "epoch": 0.4748037246667884, + "grad_norm": 0.44584232568740845, + "learning_rate": 4.925828723578432e-06, + "loss": 0.6224, + "step": 5201 + }, + { + "epoch": 0.47489501551944496, + "grad_norm": 0.4954949915409088, + "learning_rate": 4.925799784385055e-06, + "loss": 0.5955, + "step": 5202 + }, + { + "epoch": 0.47498630637210154, + "grad_norm": 0.4820752441883087, + "learning_rate": 4.92577083963226e-06, + "loss": 0.5413, + "step": 5203 + }, + { + "epoch": 0.47507759722475806, + "grad_norm": 0.4891171455383301, + "learning_rate": 4.925741889320114e-06, + "loss": 0.6033, + "step": 5204 + }, + { + "epoch": 0.47516888807741464, + "grad_norm": 0.506481945514679, + "learning_rate": 4.925712933448683e-06, + "loss": 0.559, + "step": 5205 + }, + { + "epoch": 0.4752601789300712, + "grad_norm": 0.45439425110816956, + "learning_rate": 4.925683972018034e-06, + "loss": 0.6133, + "step": 5206 + }, + { + "epoch": 0.4753514697827278, + "grad_norm": 0.48089462518692017, + "learning_rate": 4.925655005028233e-06, + "loss": 0.578, + "step": 5207 + }, + { + "epoch": 0.4754427606353843, + "grad_norm": 0.4608602225780487, + "learning_rate": 4.925626032479345e-06, + "loss": 0.5436, + "step": 5208 + }, + { + "epoch": 0.4755340514880409, + "grad_norm": 0.4958469271659851, + "learning_rate": 4.925597054371438e-06, + "loss": 0.5441, + "step": 5209 + }, + { + "epoch": 0.4756253423406975, + "grad_norm": 0.4464048147201538, + "learning_rate": 4.925568070704578e-06, + "loss": 0.6112, + "step": 5210 + }, + { + "epoch": 0.475716633193354, + "grad_norm": 0.470180481672287, + "learning_rate": 4.9255390814788314e-06, + "loss": 0.6025, + "step": 5211 + }, + { + "epoch": 0.4758079240460106, + "grad_norm": 0.4272676706314087, + "learning_rate": 4.925510086694264e-06, + "loss": 0.5588, + "step": 5212 + }, + { + "epoch": 0.47589921489866716, + "grad_norm": 0.4565192759037018, + "learning_rate": 4.925481086350944e-06, + "loss": 0.586, + "step": 5213 + }, + { + "epoch": 0.47599050575132373, + "grad_norm": 0.49506229162216187, + "learning_rate": 4.925452080448936e-06, + "loss": 0.5554, + "step": 5214 + }, + { + "epoch": 0.47608179660398026, + "grad_norm": 0.5302602648735046, + "learning_rate": 4.925423068988308e-06, + "loss": 0.5531, + "step": 5215 + }, + { + "epoch": 0.47617308745663683, + "grad_norm": 0.469797819852829, + "learning_rate": 4.925394051969124e-06, + "loss": 0.5908, + "step": 5216 + }, + { + "epoch": 0.4762643783092934, + "grad_norm": 0.47656774520874023, + "learning_rate": 4.925365029391454e-06, + "loss": 0.5889, + "step": 5217 + }, + { + "epoch": 0.47635566916195, + "grad_norm": 0.509390115737915, + "learning_rate": 4.9253360012553615e-06, + "loss": 0.5441, + "step": 5218 + }, + { + "epoch": 0.4764469600146065, + "grad_norm": 0.4550841450691223, + "learning_rate": 4.925306967560916e-06, + "loss": 0.5887, + "step": 5219 + }, + { + "epoch": 0.4765382508672631, + "grad_norm": 0.5045674443244934, + "learning_rate": 4.925277928308181e-06, + "loss": 0.5756, + "step": 5220 + }, + { + "epoch": 0.47662954171991967, + "grad_norm": 0.487061470746994, + "learning_rate": 4.925248883497224e-06, + "loss": 0.5638, + "step": 5221 + }, + { + "epoch": 0.47672083257257625, + "grad_norm": 0.45629289746284485, + "learning_rate": 4.925219833128112e-06, + "loss": 0.5979, + "step": 5222 + }, + { + "epoch": 0.47681212342523277, + "grad_norm": 0.4981171786785126, + "learning_rate": 4.925190777200913e-06, + "loss": 0.5256, + "step": 5223 + }, + { + "epoch": 0.47690341427788935, + "grad_norm": 0.4831734597682953, + "learning_rate": 4.92516171571569e-06, + "loss": 0.5938, + "step": 5224 + }, + { + "epoch": 0.4769947051305459, + "grad_norm": 0.4838641881942749, + "learning_rate": 4.925132648672512e-06, + "loss": 0.5553, + "step": 5225 + }, + { + "epoch": 0.4770859959832025, + "grad_norm": 0.466682493686676, + "learning_rate": 4.925103576071446e-06, + "loss": 0.6048, + "step": 5226 + }, + { + "epoch": 0.477177286835859, + "grad_norm": 0.49041640758514404, + "learning_rate": 4.925074497912558e-06, + "loss": 0.578, + "step": 5227 + }, + { + "epoch": 0.4772685776885156, + "grad_norm": 0.4678991138935089, + "learning_rate": 4.9250454141959145e-06, + "loss": 0.578, + "step": 5228 + }, + { + "epoch": 0.4773598685411722, + "grad_norm": 0.4865586459636688, + "learning_rate": 4.925016324921582e-06, + "loss": 0.5562, + "step": 5229 + }, + { + "epoch": 0.47745115939382876, + "grad_norm": 0.48947614431381226, + "learning_rate": 4.924987230089627e-06, + "loss": 0.5614, + "step": 5230 + }, + { + "epoch": 0.4775424502464853, + "grad_norm": 0.4662337601184845, + "learning_rate": 4.924958129700118e-06, + "loss": 0.5979, + "step": 5231 + }, + { + "epoch": 0.47763374109914186, + "grad_norm": 0.4944211542606354, + "learning_rate": 4.924929023753119e-06, + "loss": 0.5965, + "step": 5232 + }, + { + "epoch": 0.47772503195179844, + "grad_norm": 0.4637536406517029, + "learning_rate": 4.924899912248698e-06, + "loss": 0.591, + "step": 5233 + }, + { + "epoch": 0.477816322804455, + "grad_norm": 0.4879932403564453, + "learning_rate": 4.924870795186922e-06, + "loss": 0.5975, + "step": 5234 + }, + { + "epoch": 0.47790761365711154, + "grad_norm": 0.5291743278503418, + "learning_rate": 4.924841672567857e-06, + "loss": 0.579, + "step": 5235 + }, + { + "epoch": 0.4779989045097681, + "grad_norm": 0.4994063377380371, + "learning_rate": 4.92481254439157e-06, + "loss": 0.5384, + "step": 5236 + }, + { + "epoch": 0.4780901953624247, + "grad_norm": 0.4937995374202728, + "learning_rate": 4.924783410658128e-06, + "loss": 0.5863, + "step": 5237 + }, + { + "epoch": 0.4781814862150813, + "grad_norm": 0.4908248782157898, + "learning_rate": 4.924754271367598e-06, + "loss": 0.5875, + "step": 5238 + }, + { + "epoch": 0.4782727770677378, + "grad_norm": 0.5085771083831787, + "learning_rate": 4.924725126520047e-06, + "loss": 0.5826, + "step": 5239 + }, + { + "epoch": 0.4783640679203944, + "grad_norm": 0.4952421486377716, + "learning_rate": 4.924695976115541e-06, + "loss": 0.5759, + "step": 5240 + }, + { + "epoch": 0.47845535877305095, + "grad_norm": 0.5087546110153198, + "learning_rate": 4.9246668201541456e-06, + "loss": 0.5206, + "step": 5241 + }, + { + "epoch": 0.4785466496257075, + "grad_norm": 0.46562227606773376, + "learning_rate": 4.924637658635931e-06, + "loss": 0.5887, + "step": 5242 + }, + { + "epoch": 0.47863794047836405, + "grad_norm": 0.49651697278022766, + "learning_rate": 4.92460849156096e-06, + "loss": 0.5767, + "step": 5243 + }, + { + "epoch": 0.47872923133102063, + "grad_norm": 0.46696737408638, + "learning_rate": 4.9245793189293035e-06, + "loss": 0.6143, + "step": 5244 + }, + { + "epoch": 0.4788205221836772, + "grad_norm": 0.5134373903274536, + "learning_rate": 4.924550140741025e-06, + "loss": 0.5117, + "step": 5245 + }, + { + "epoch": 0.47891181303633373, + "grad_norm": 0.480295330286026, + "learning_rate": 4.924520956996194e-06, + "loss": 0.5301, + "step": 5246 + }, + { + "epoch": 0.4790031038889903, + "grad_norm": 0.48262766003608704, + "learning_rate": 4.924491767694876e-06, + "loss": 0.6065, + "step": 5247 + }, + { + "epoch": 0.4790943947416469, + "grad_norm": 0.48386260867118835, + "learning_rate": 4.924462572837137e-06, + "loss": 0.5679, + "step": 5248 + }, + { + "epoch": 0.47918568559430347, + "grad_norm": 0.4358294904232025, + "learning_rate": 4.924433372423046e-06, + "loss": 0.5808, + "step": 5249 + }, + { + "epoch": 0.47927697644696, + "grad_norm": 0.47455501556396484, + "learning_rate": 4.924404166452669e-06, + "loss": 0.5877, + "step": 5250 + }, + { + "epoch": 0.47936826729961657, + "grad_norm": 0.48719197511672974, + "learning_rate": 4.9243749549260715e-06, + "loss": 0.5521, + "step": 5251 + }, + { + "epoch": 0.47945955815227315, + "grad_norm": 0.4782212972640991, + "learning_rate": 4.924345737843323e-06, + "loss": 0.5792, + "step": 5252 + }, + { + "epoch": 0.4795508490049297, + "grad_norm": 0.492344468832016, + "learning_rate": 4.9243165152044885e-06, + "loss": 0.5608, + "step": 5253 + }, + { + "epoch": 0.47964213985758625, + "grad_norm": 0.478201687335968, + "learning_rate": 4.924287287009636e-06, + "loss": 0.569, + "step": 5254 + }, + { + "epoch": 0.4797334307102428, + "grad_norm": 0.500796377658844, + "learning_rate": 4.924258053258833e-06, + "loss": 0.5933, + "step": 5255 + }, + { + "epoch": 0.4798247215628994, + "grad_norm": 0.468729704618454, + "learning_rate": 4.924228813952144e-06, + "loss": 0.5664, + "step": 5256 + }, + { + "epoch": 0.479916012415556, + "grad_norm": 0.5166957974433899, + "learning_rate": 4.924199569089638e-06, + "loss": 0.5747, + "step": 5257 + }, + { + "epoch": 0.4800073032682125, + "grad_norm": 0.46559274196624756, + "learning_rate": 4.924170318671383e-06, + "loss": 0.6028, + "step": 5258 + }, + { + "epoch": 0.4800985941208691, + "grad_norm": 0.494193434715271, + "learning_rate": 4.924141062697444e-06, + "loss": 0.5667, + "step": 5259 + }, + { + "epoch": 0.48018988497352566, + "grad_norm": 0.489469975233078, + "learning_rate": 4.924111801167888e-06, + "loss": 0.5659, + "step": 5260 + }, + { + "epoch": 0.48028117582618224, + "grad_norm": 0.47748851776123047, + "learning_rate": 4.9240825340827835e-06, + "loss": 0.6039, + "step": 5261 + }, + { + "epoch": 0.48037246667883876, + "grad_norm": 0.48321476578712463, + "learning_rate": 4.9240532614421975e-06, + "loss": 0.5923, + "step": 5262 + }, + { + "epoch": 0.48046375753149534, + "grad_norm": 0.48704028129577637, + "learning_rate": 4.924023983246195e-06, + "loss": 0.537, + "step": 5263 + }, + { + "epoch": 0.4805550483841519, + "grad_norm": 0.480508416891098, + "learning_rate": 4.923994699494846e-06, + "loss": 0.5511, + "step": 5264 + }, + { + "epoch": 0.4806463392368085, + "grad_norm": 0.47075945138931274, + "learning_rate": 4.923965410188216e-06, + "loss": 0.5351, + "step": 5265 + }, + { + "epoch": 0.480737630089465, + "grad_norm": 0.47617608308792114, + "learning_rate": 4.923936115326372e-06, + "loss": 0.5754, + "step": 5266 + }, + { + "epoch": 0.4808289209421216, + "grad_norm": 0.4922209680080414, + "learning_rate": 4.923906814909382e-06, + "loss": 0.556, + "step": 5267 + }, + { + "epoch": 0.4809202117947782, + "grad_norm": 0.4578753113746643, + "learning_rate": 4.923877508937313e-06, + "loss": 0.5997, + "step": 5268 + }, + { + "epoch": 0.48101150264743475, + "grad_norm": 0.48223692178726196, + "learning_rate": 4.92384819741023e-06, + "loss": 0.5452, + "step": 5269 + }, + { + "epoch": 0.4811027935000913, + "grad_norm": 0.46853339672088623, + "learning_rate": 4.923818880328204e-06, + "loss": 0.6003, + "step": 5270 + }, + { + "epoch": 0.48119408435274785, + "grad_norm": 0.48948702216148376, + "learning_rate": 4.923789557691299e-06, + "loss": 0.5577, + "step": 5271 + }, + { + "epoch": 0.48128537520540443, + "grad_norm": 0.49894434213638306, + "learning_rate": 4.923760229499583e-06, + "loss": 0.5326, + "step": 5272 + }, + { + "epoch": 0.481376666058061, + "grad_norm": 0.4742240011692047, + "learning_rate": 4.923730895753125e-06, + "loss": 0.6035, + "step": 5273 + }, + { + "epoch": 0.48146795691071753, + "grad_norm": 0.47667208313941956, + "learning_rate": 4.92370155645199e-06, + "loss": 0.5908, + "step": 5274 + }, + { + "epoch": 0.4815592477633741, + "grad_norm": 0.49844783544540405, + "learning_rate": 4.923672211596246e-06, + "loss": 0.5697, + "step": 5275 + }, + { + "epoch": 0.4816505386160307, + "grad_norm": 0.4697147309780121, + "learning_rate": 4.92364286118596e-06, + "loss": 0.5988, + "step": 5276 + }, + { + "epoch": 0.4817418294686872, + "grad_norm": 0.4775295853614807, + "learning_rate": 4.9236135052212e-06, + "loss": 0.5746, + "step": 5277 + }, + { + "epoch": 0.4818331203213438, + "grad_norm": 0.5104407668113708, + "learning_rate": 4.923584143702032e-06, + "loss": 0.5905, + "step": 5278 + }, + { + "epoch": 0.48192441117400037, + "grad_norm": 0.4956637918949127, + "learning_rate": 4.923554776628526e-06, + "loss": 0.5248, + "step": 5279 + }, + { + "epoch": 0.48201570202665694, + "grad_norm": 0.4767639935016632, + "learning_rate": 4.923525404000746e-06, + "loss": 0.5667, + "step": 5280 + }, + { + "epoch": 0.48210699287931347, + "grad_norm": 0.4662400484085083, + "learning_rate": 4.923496025818761e-06, + "loss": 0.5593, + "step": 5281 + }, + { + "epoch": 0.48219828373197005, + "grad_norm": 0.5197505950927734, + "learning_rate": 4.9234666420826385e-06, + "loss": 0.5623, + "step": 5282 + }, + { + "epoch": 0.4822895745846266, + "grad_norm": 0.4782313406467438, + "learning_rate": 4.923437252792444e-06, + "loss": 0.602, + "step": 5283 + }, + { + "epoch": 0.4823808654372832, + "grad_norm": 0.4904867112636566, + "learning_rate": 4.923407857948248e-06, + "loss": 0.5288, + "step": 5284 + }, + { + "epoch": 0.4824721562899397, + "grad_norm": 0.46350619196891785, + "learning_rate": 4.923378457550115e-06, + "loss": 0.5578, + "step": 5285 + }, + { + "epoch": 0.4825634471425963, + "grad_norm": 0.46653318405151367, + "learning_rate": 4.923349051598115e-06, + "loss": 0.5745, + "step": 5286 + }, + { + "epoch": 0.4826547379952529, + "grad_norm": 0.454596608877182, + "learning_rate": 4.923319640092312e-06, + "loss": 0.5664, + "step": 5287 + }, + { + "epoch": 0.48274602884790946, + "grad_norm": 0.48732903599739075, + "learning_rate": 4.9232902230327765e-06, + "loss": 0.5793, + "step": 5288 + }, + { + "epoch": 0.482837319700566, + "grad_norm": 0.47197285294532776, + "learning_rate": 4.9232608004195744e-06, + "loss": 0.5945, + "step": 5289 + }, + { + "epoch": 0.48292861055322256, + "grad_norm": 0.4877869486808777, + "learning_rate": 4.923231372252774e-06, + "loss": 0.58, + "step": 5290 + }, + { + "epoch": 0.48301990140587914, + "grad_norm": 0.5026444792747498, + "learning_rate": 4.923201938532441e-06, + "loss": 0.5806, + "step": 5291 + }, + { + "epoch": 0.4831111922585357, + "grad_norm": 0.4775293469429016, + "learning_rate": 4.923172499258646e-06, + "loss": 0.5813, + "step": 5292 + }, + { + "epoch": 0.48320248311119224, + "grad_norm": 0.513573944568634, + "learning_rate": 4.923143054431453e-06, + "loss": 0.5453, + "step": 5293 + }, + { + "epoch": 0.4832937739638488, + "grad_norm": 0.4775591790676117, + "learning_rate": 4.9231136040509305e-06, + "loss": 0.5575, + "step": 5294 + }, + { + "epoch": 0.4833850648165054, + "grad_norm": 0.47343480587005615, + "learning_rate": 4.923084148117147e-06, + "loss": 0.5607, + "step": 5295 + }, + { + "epoch": 0.483476355669162, + "grad_norm": 0.44635170698165894, + "learning_rate": 4.923054686630171e-06, + "loss": 0.5583, + "step": 5296 + }, + { + "epoch": 0.4835676465218185, + "grad_norm": 0.4851375222206116, + "learning_rate": 4.923025219590067e-06, + "loss": 0.5565, + "step": 5297 + }, + { + "epoch": 0.4836589373744751, + "grad_norm": 0.43906840682029724, + "learning_rate": 4.922995746996905e-06, + "loss": 0.557, + "step": 5298 + }, + { + "epoch": 0.48375022822713165, + "grad_norm": 0.48198968172073364, + "learning_rate": 4.922966268850751e-06, + "loss": 0.5964, + "step": 5299 + }, + { + "epoch": 0.48384151907978823, + "grad_norm": 0.45564183592796326, + "learning_rate": 4.922936785151673e-06, + "loss": 0.5564, + "step": 5300 + }, + { + "epoch": 0.48393280993244475, + "grad_norm": 0.43410924077033997, + "learning_rate": 4.9229072958997385e-06, + "loss": 0.5945, + "step": 5301 + }, + { + "epoch": 0.48402410078510133, + "grad_norm": 0.4592640995979309, + "learning_rate": 4.922877801095016e-06, + "loss": 0.5791, + "step": 5302 + }, + { + "epoch": 0.4841153916377579, + "grad_norm": 0.4265062212944031, + "learning_rate": 4.9228483007375726e-06, + "loss": 0.5899, + "step": 5303 + }, + { + "epoch": 0.4842066824904145, + "grad_norm": 0.47927549481391907, + "learning_rate": 4.922818794827475e-06, + "loss": 0.5569, + "step": 5304 + }, + { + "epoch": 0.484297973343071, + "grad_norm": 0.47826677560806274, + "learning_rate": 4.9227892833647916e-06, + "loss": 0.5481, + "step": 5305 + }, + { + "epoch": 0.4843892641957276, + "grad_norm": 0.4670177400112152, + "learning_rate": 4.9227597663495905e-06, + "loss": 0.5829, + "step": 5306 + }, + { + "epoch": 0.48448055504838416, + "grad_norm": 0.5012476444244385, + "learning_rate": 4.922730243781939e-06, + "loss": 0.5607, + "step": 5307 + }, + { + "epoch": 0.48457184590104074, + "grad_norm": 0.4597015678882599, + "learning_rate": 4.922700715661904e-06, + "loss": 0.6025, + "step": 5308 + }, + { + "epoch": 0.48466313675369727, + "grad_norm": 0.5020685195922852, + "learning_rate": 4.922671181989554e-06, + "loss": 0.5402, + "step": 5309 + }, + { + "epoch": 0.48475442760635384, + "grad_norm": 0.4568482041358948, + "learning_rate": 4.922641642764956e-06, + "loss": 0.5897, + "step": 5310 + }, + { + "epoch": 0.4848457184590104, + "grad_norm": 0.4994346797466278, + "learning_rate": 4.9226120979881785e-06, + "loss": 0.5726, + "step": 5311 + }, + { + "epoch": 0.48493700931166694, + "grad_norm": 0.4391239583492279, + "learning_rate": 4.922582547659289e-06, + "loss": 0.5977, + "step": 5312 + }, + { + "epoch": 0.4850283001643235, + "grad_norm": 0.5048065781593323, + "learning_rate": 4.922552991778355e-06, + "loss": 0.544, + "step": 5313 + }, + { + "epoch": 0.4851195910169801, + "grad_norm": 0.4483005702495575, + "learning_rate": 4.922523430345444e-06, + "loss": 0.5914, + "step": 5314 + }, + { + "epoch": 0.4852108818696367, + "grad_norm": 0.46565285325050354, + "learning_rate": 4.9224938633606244e-06, + "loss": 0.5461, + "step": 5315 + }, + { + "epoch": 0.4853021727222932, + "grad_norm": 0.4524788558483124, + "learning_rate": 4.922464290823964e-06, + "loss": 0.5637, + "step": 5316 + }, + { + "epoch": 0.4853934635749498, + "grad_norm": 0.45025870203971863, + "learning_rate": 4.92243471273553e-06, + "loss": 0.5616, + "step": 5317 + }, + { + "epoch": 0.48548475442760636, + "grad_norm": 0.47499382495880127, + "learning_rate": 4.92240512909539e-06, + "loss": 0.5858, + "step": 5318 + }, + { + "epoch": 0.48557604528026294, + "grad_norm": 0.4515777826309204, + "learning_rate": 4.922375539903612e-06, + "loss": 0.5794, + "step": 5319 + }, + { + "epoch": 0.48566733613291946, + "grad_norm": 0.5057613253593445, + "learning_rate": 4.922345945160264e-06, + "loss": 0.5634, + "step": 5320 + }, + { + "epoch": 0.48575862698557604, + "grad_norm": 0.4876386523246765, + "learning_rate": 4.922316344865415e-06, + "loss": 0.5733, + "step": 5321 + }, + { + "epoch": 0.4858499178382326, + "grad_norm": 0.48876944184303284, + "learning_rate": 4.92228673901913e-06, + "loss": 0.5498, + "step": 5322 + }, + { + "epoch": 0.4859412086908892, + "grad_norm": 0.43927228450775146, + "learning_rate": 4.922257127621479e-06, + "loss": 0.5512, + "step": 5323 + }, + { + "epoch": 0.4860324995435457, + "grad_norm": 0.4584383964538574, + "learning_rate": 4.922227510672531e-06, + "loss": 0.6273, + "step": 5324 + }, + { + "epoch": 0.4861237903962023, + "grad_norm": 0.4932415187358856, + "learning_rate": 4.92219788817235e-06, + "loss": 0.591, + "step": 5325 + }, + { + "epoch": 0.48621508124885887, + "grad_norm": 0.43941426277160645, + "learning_rate": 4.922168260121007e-06, + "loss": 0.5957, + "step": 5326 + }, + { + "epoch": 0.48630637210151545, + "grad_norm": 0.4577223062515259, + "learning_rate": 4.922138626518569e-06, + "loss": 0.5581, + "step": 5327 + }, + { + "epoch": 0.48639766295417197, + "grad_norm": 0.4686656892299652, + "learning_rate": 4.922108987365104e-06, + "loss": 0.6094, + "step": 5328 + }, + { + "epoch": 0.48648895380682855, + "grad_norm": 0.4927853047847748, + "learning_rate": 4.92207934266068e-06, + "loss": 0.5537, + "step": 5329 + }, + { + "epoch": 0.48658024465948513, + "grad_norm": 0.49007734656333923, + "learning_rate": 4.9220496924053645e-06, + "loss": 0.5473, + "step": 5330 + }, + { + "epoch": 0.4866715355121417, + "grad_norm": 0.4729149341583252, + "learning_rate": 4.922020036599226e-06, + "loss": 0.5834, + "step": 5331 + }, + { + "epoch": 0.48676282636479823, + "grad_norm": 0.5058822631835938, + "learning_rate": 4.921990375242333e-06, + "loss": 0.5709, + "step": 5332 + }, + { + "epoch": 0.4868541172174548, + "grad_norm": 0.49747276306152344, + "learning_rate": 4.921960708334751e-06, + "loss": 0.5261, + "step": 5333 + }, + { + "epoch": 0.4869454080701114, + "grad_norm": 0.4943120777606964, + "learning_rate": 4.921931035876551e-06, + "loss": 0.5796, + "step": 5334 + }, + { + "epoch": 0.48703669892276796, + "grad_norm": 0.49173781275749207, + "learning_rate": 4.921901357867799e-06, + "loss": 0.5792, + "step": 5335 + }, + { + "epoch": 0.4871279897754245, + "grad_norm": 0.5035251379013062, + "learning_rate": 4.9218716743085646e-06, + "loss": 0.5275, + "step": 5336 + }, + { + "epoch": 0.48721928062808106, + "grad_norm": 0.44908958673477173, + "learning_rate": 4.921841985198914e-06, + "loss": 0.5617, + "step": 5337 + }, + { + "epoch": 0.48731057148073764, + "grad_norm": 0.4915446937084198, + "learning_rate": 4.921812290538917e-06, + "loss": 0.5621, + "step": 5338 + }, + { + "epoch": 0.4874018623333942, + "grad_norm": 0.48582732677459717, + "learning_rate": 4.92178259032864e-06, + "loss": 0.5671, + "step": 5339 + }, + { + "epoch": 0.48749315318605074, + "grad_norm": 0.4821195900440216, + "learning_rate": 4.921752884568152e-06, + "loss": 0.5772, + "step": 5340 + }, + { + "epoch": 0.4875844440387073, + "grad_norm": 0.4430207908153534, + "learning_rate": 4.921723173257522e-06, + "loss": 0.6298, + "step": 5341 + }, + { + "epoch": 0.4876757348913639, + "grad_norm": 0.448772668838501, + "learning_rate": 4.921693456396815e-06, + "loss": 0.5804, + "step": 5342 + }, + { + "epoch": 0.4877670257440204, + "grad_norm": 0.4686328172683716, + "learning_rate": 4.921663733986103e-06, + "loss": 0.593, + "step": 5343 + }, + { + "epoch": 0.487858316596677, + "grad_norm": 0.453292578458786, + "learning_rate": 4.921634006025452e-06, + "loss": 0.5696, + "step": 5344 + }, + { + "epoch": 0.4879496074493336, + "grad_norm": 0.44138529896736145, + "learning_rate": 4.921604272514931e-06, + "loss": 0.5792, + "step": 5345 + }, + { + "epoch": 0.48804089830199016, + "grad_norm": 0.4456600546836853, + "learning_rate": 4.921574533454606e-06, + "loss": 0.5988, + "step": 5346 + }, + { + "epoch": 0.4881321891546467, + "grad_norm": 0.48736050724983215, + "learning_rate": 4.921544788844547e-06, + "loss": 0.5422, + "step": 5347 + }, + { + "epoch": 0.48822348000730326, + "grad_norm": 0.46425938606262207, + "learning_rate": 4.9215150386848235e-06, + "loss": 0.5627, + "step": 5348 + }, + { + "epoch": 0.48831477085995983, + "grad_norm": 0.48106643557548523, + "learning_rate": 4.921485282975501e-06, + "loss": 0.6195, + "step": 5349 + }, + { + "epoch": 0.4884060617126164, + "grad_norm": 0.47150376439094543, + "learning_rate": 4.921455521716649e-06, + "loss": 0.581, + "step": 5350 + }, + { + "epoch": 0.48849735256527294, + "grad_norm": 0.4798491299152374, + "learning_rate": 4.921425754908335e-06, + "loss": 0.5653, + "step": 5351 + }, + { + "epoch": 0.4885886434179295, + "grad_norm": 0.47300752997398376, + "learning_rate": 4.921395982550628e-06, + "loss": 0.5808, + "step": 5352 + }, + { + "epoch": 0.4886799342705861, + "grad_norm": 0.47727155685424805, + "learning_rate": 4.921366204643596e-06, + "loss": 0.5839, + "step": 5353 + }, + { + "epoch": 0.48877122512324267, + "grad_norm": 0.4981520175933838, + "learning_rate": 4.9213364211873074e-06, + "loss": 0.5776, + "step": 5354 + }, + { + "epoch": 0.4888625159758992, + "grad_norm": 0.4647735059261322, + "learning_rate": 4.92130663218183e-06, + "loss": 0.5732, + "step": 5355 + }, + { + "epoch": 0.48895380682855577, + "grad_norm": 0.5062516331672668, + "learning_rate": 4.9212768376272325e-06, + "loss": 0.5917, + "step": 5356 + }, + { + "epoch": 0.48904509768121235, + "grad_norm": 0.47812727093696594, + "learning_rate": 4.921247037523582e-06, + "loss": 0.6088, + "step": 5357 + }, + { + "epoch": 0.4891363885338689, + "grad_norm": 0.4597615599632263, + "learning_rate": 4.921217231870949e-06, + "loss": 0.5801, + "step": 5358 + }, + { + "epoch": 0.48922767938652545, + "grad_norm": 0.47304046154022217, + "learning_rate": 4.9211874206694e-06, + "loss": 0.5267, + "step": 5359 + }, + { + "epoch": 0.489318970239182, + "grad_norm": 0.47415921092033386, + "learning_rate": 4.9211576039190045e-06, + "loss": 0.5752, + "step": 5360 + }, + { + "epoch": 0.4894102610918386, + "grad_norm": 0.4665432274341583, + "learning_rate": 4.921127781619829e-06, + "loss": 0.5905, + "step": 5361 + }, + { + "epoch": 0.4895015519444952, + "grad_norm": 0.5195504426956177, + "learning_rate": 4.921097953771944e-06, + "loss": 0.5265, + "step": 5362 + }, + { + "epoch": 0.4895928427971517, + "grad_norm": 0.47707921266555786, + "learning_rate": 4.9210681203754175e-06, + "loss": 0.5631, + "step": 5363 + }, + { + "epoch": 0.4896841336498083, + "grad_norm": 0.47399038076400757, + "learning_rate": 4.921038281430316e-06, + "loss": 0.5674, + "step": 5364 + }, + { + "epoch": 0.48977542450246486, + "grad_norm": 0.4358711540699005, + "learning_rate": 4.92100843693671e-06, + "loss": 0.6266, + "step": 5365 + }, + { + "epoch": 0.48986671535512144, + "grad_norm": 0.4947163760662079, + "learning_rate": 4.920978586894667e-06, + "loss": 0.5634, + "step": 5366 + }, + { + "epoch": 0.48995800620777796, + "grad_norm": 0.4785621166229248, + "learning_rate": 4.9209487313042555e-06, + "loss": 0.565, + "step": 5367 + }, + { + "epoch": 0.49004929706043454, + "grad_norm": 0.4995315670967102, + "learning_rate": 4.920918870165544e-06, + "loss": 0.5433, + "step": 5368 + }, + { + "epoch": 0.4901405879130911, + "grad_norm": 0.5061908960342407, + "learning_rate": 4.9208890034786005e-06, + "loss": 0.556, + "step": 5369 + }, + { + "epoch": 0.4902318787657477, + "grad_norm": 0.4719173014163971, + "learning_rate": 4.920859131243494e-06, + "loss": 0.6155, + "step": 5370 + }, + { + "epoch": 0.4903231696184042, + "grad_norm": 0.47789108753204346, + "learning_rate": 4.9208292534602935e-06, + "loss": 0.5765, + "step": 5371 + }, + { + "epoch": 0.4904144604710608, + "grad_norm": 0.48982521891593933, + "learning_rate": 4.920799370129065e-06, + "loss": 0.5478, + "step": 5372 + }, + { + "epoch": 0.4905057513237174, + "grad_norm": 0.47203269600868225, + "learning_rate": 4.920769481249881e-06, + "loss": 0.5526, + "step": 5373 + }, + { + "epoch": 0.49059704217637395, + "grad_norm": 0.47223278880119324, + "learning_rate": 4.920739586822806e-06, + "loss": 0.5389, + "step": 5374 + }, + { + "epoch": 0.4906883330290305, + "grad_norm": 0.5139344930648804, + "learning_rate": 4.920709686847911e-06, + "loss": 0.562, + "step": 5375 + }, + { + "epoch": 0.49077962388168705, + "grad_norm": 0.4986208975315094, + "learning_rate": 4.920679781325264e-06, + "loss": 0.5539, + "step": 5376 + }, + { + "epoch": 0.49087091473434363, + "grad_norm": 0.5115650296211243, + "learning_rate": 4.920649870254932e-06, + "loss": 0.54, + "step": 5377 + }, + { + "epoch": 0.49096220558700016, + "grad_norm": 0.4769827425479889, + "learning_rate": 4.920619953636986e-06, + "loss": 0.5588, + "step": 5378 + }, + { + "epoch": 0.49105349643965673, + "grad_norm": 0.46804845333099365, + "learning_rate": 4.920590031471494e-06, + "loss": 0.5778, + "step": 5379 + }, + { + "epoch": 0.4911447872923133, + "grad_norm": 0.491521418094635, + "learning_rate": 4.920560103758523e-06, + "loss": 0.5486, + "step": 5380 + }, + { + "epoch": 0.4912360781449699, + "grad_norm": 0.4739760458469391, + "learning_rate": 4.920530170498142e-06, + "loss": 0.5886, + "step": 5381 + }, + { + "epoch": 0.4913273689976264, + "grad_norm": 0.47983086109161377, + "learning_rate": 4.920500231690422e-06, + "loss": 0.5693, + "step": 5382 + }, + { + "epoch": 0.491418659850283, + "grad_norm": 0.4456682503223419, + "learning_rate": 4.9204702873354285e-06, + "loss": 0.5567, + "step": 5383 + }, + { + "epoch": 0.49150995070293957, + "grad_norm": 0.46711063385009766, + "learning_rate": 4.9204403374332316e-06, + "loss": 0.6032, + "step": 5384 + }, + { + "epoch": 0.49160124155559615, + "grad_norm": 0.45103543996810913, + "learning_rate": 4.9204103819839e-06, + "loss": 0.6202, + "step": 5385 + }, + { + "epoch": 0.49169253240825267, + "grad_norm": 0.4555191993713379, + "learning_rate": 4.9203804209875016e-06, + "loss": 0.5433, + "step": 5386 + }, + { + "epoch": 0.49178382326090925, + "grad_norm": 0.4695484936237335, + "learning_rate": 4.920350454444106e-06, + "loss": 0.5585, + "step": 5387 + }, + { + "epoch": 0.4918751141135658, + "grad_norm": 0.500866174697876, + "learning_rate": 4.920320482353782e-06, + "loss": 0.5805, + "step": 5388 + }, + { + "epoch": 0.4919664049662224, + "grad_norm": 0.48013001680374146, + "learning_rate": 4.920290504716597e-06, + "loss": 0.5611, + "step": 5389 + }, + { + "epoch": 0.4920576958188789, + "grad_norm": 0.4430987238883972, + "learning_rate": 4.920260521532621e-06, + "loss": 0.5788, + "step": 5390 + }, + { + "epoch": 0.4921489866715355, + "grad_norm": 0.46936744451522827, + "learning_rate": 4.920230532801921e-06, + "loss": 0.5503, + "step": 5391 + }, + { + "epoch": 0.4922402775241921, + "grad_norm": 0.49383410811424255, + "learning_rate": 4.9202005385245675e-06, + "loss": 0.5686, + "step": 5392 + }, + { + "epoch": 0.49233156837684866, + "grad_norm": 0.4592830538749695, + "learning_rate": 4.92017053870063e-06, + "loss": 0.6101, + "step": 5393 + }, + { + "epoch": 0.4924228592295052, + "grad_norm": 0.4628646969795227, + "learning_rate": 4.920140533330174e-06, + "loss": 0.5965, + "step": 5394 + }, + { + "epoch": 0.49251415008216176, + "grad_norm": 0.46165797114372253, + "learning_rate": 4.92011052241327e-06, + "loss": 0.5888, + "step": 5395 + }, + { + "epoch": 0.49260544093481834, + "grad_norm": 0.48088982701301575, + "learning_rate": 4.920080505949989e-06, + "loss": 0.5948, + "step": 5396 + }, + { + "epoch": 0.4926967317874749, + "grad_norm": 0.4725511074066162, + "learning_rate": 4.920050483940396e-06, + "loss": 0.5485, + "step": 5397 + }, + { + "epoch": 0.49278802264013144, + "grad_norm": 0.47099360823631287, + "learning_rate": 4.920020456384562e-06, + "loss": 0.5559, + "step": 5398 + }, + { + "epoch": 0.492879313492788, + "grad_norm": 0.4653220772743225, + "learning_rate": 4.919990423282556e-06, + "loss": 0.6134, + "step": 5399 + }, + { + "epoch": 0.4929706043454446, + "grad_norm": 0.4650966227054596, + "learning_rate": 4.919960384634446e-06, + "loss": 0.6086, + "step": 5400 + }, + { + "epoch": 0.4930618951981012, + "grad_norm": 0.48929283022880554, + "learning_rate": 4.9199303404403e-06, + "loss": 0.5857, + "step": 5401 + }, + { + "epoch": 0.4931531860507577, + "grad_norm": 0.5058397650718689, + "learning_rate": 4.919900290700188e-06, + "loss": 0.6085, + "step": 5402 + }, + { + "epoch": 0.4932444769034143, + "grad_norm": 0.4644952118396759, + "learning_rate": 4.9198702354141794e-06, + "loss": 0.5848, + "step": 5403 + }, + { + "epoch": 0.49333576775607085, + "grad_norm": 0.46132323145866394, + "learning_rate": 4.919840174582342e-06, + "loss": 0.5743, + "step": 5404 + }, + { + "epoch": 0.49342705860872743, + "grad_norm": 0.4855627119541168, + "learning_rate": 4.919810108204745e-06, + "loss": 0.5467, + "step": 5405 + }, + { + "epoch": 0.49351834946138395, + "grad_norm": 0.4744845926761627, + "learning_rate": 4.919780036281459e-06, + "loss": 0.5965, + "step": 5406 + }, + { + "epoch": 0.49360964031404053, + "grad_norm": 0.47785618901252747, + "learning_rate": 4.91974995881255e-06, + "loss": 0.5808, + "step": 5407 + }, + { + "epoch": 0.4937009311666971, + "grad_norm": 0.45547401905059814, + "learning_rate": 4.919719875798088e-06, + "loss": 0.5995, + "step": 5408 + }, + { + "epoch": 0.4937922220193537, + "grad_norm": 0.42801961302757263, + "learning_rate": 4.9196897872381425e-06, + "loss": 0.6305, + "step": 5409 + }, + { + "epoch": 0.4938835128720102, + "grad_norm": 0.4609259068965912, + "learning_rate": 4.9196596931327825e-06, + "loss": 0.5826, + "step": 5410 + }, + { + "epoch": 0.4939748037246668, + "grad_norm": 0.44870704412460327, + "learning_rate": 4.919629593482076e-06, + "loss": 0.5935, + "step": 5411 + }, + { + "epoch": 0.49406609457732337, + "grad_norm": 0.45357903838157654, + "learning_rate": 4.919599488286093e-06, + "loss": 0.5279, + "step": 5412 + }, + { + "epoch": 0.4941573854299799, + "grad_norm": 0.45270538330078125, + "learning_rate": 4.919569377544902e-06, + "loss": 0.5752, + "step": 5413 + }, + { + "epoch": 0.49424867628263647, + "grad_norm": 0.474771648645401, + "learning_rate": 4.919539261258572e-06, + "loss": 0.5941, + "step": 5414 + }, + { + "epoch": 0.49433996713529305, + "grad_norm": 0.4651047885417938, + "learning_rate": 4.9195091394271726e-06, + "loss": 0.6021, + "step": 5415 + }, + { + "epoch": 0.4944312579879496, + "grad_norm": 0.4848174452781677, + "learning_rate": 4.919479012050773e-06, + "loss": 0.5592, + "step": 5416 + }, + { + "epoch": 0.49452254884060615, + "grad_norm": 0.5028127431869507, + "learning_rate": 4.91944887912944e-06, + "loss": 0.5552, + "step": 5417 + }, + { + "epoch": 0.4946138396932627, + "grad_norm": 0.49866142868995667, + "learning_rate": 4.919418740663245e-06, + "loss": 0.5724, + "step": 5418 + }, + { + "epoch": 0.4947051305459193, + "grad_norm": 0.4838111400604248, + "learning_rate": 4.9193885966522564e-06, + "loss": 0.5804, + "step": 5419 + }, + { + "epoch": 0.4947964213985759, + "grad_norm": 0.5200469493865967, + "learning_rate": 4.9193584470965435e-06, + "loss": 0.5759, + "step": 5420 + }, + { + "epoch": 0.4948877122512324, + "grad_norm": 0.48452696204185486, + "learning_rate": 4.919328291996175e-06, + "loss": 0.5783, + "step": 5421 + }, + { + "epoch": 0.494979003103889, + "grad_norm": 0.48296964168548584, + "learning_rate": 4.91929813135122e-06, + "loss": 0.5613, + "step": 5422 + }, + { + "epoch": 0.49507029395654556, + "grad_norm": 0.44496747851371765, + "learning_rate": 4.919267965161748e-06, + "loss": 0.5835, + "step": 5423 + }, + { + "epoch": 0.49516158480920214, + "grad_norm": 0.49083462357521057, + "learning_rate": 4.919237793427827e-06, + "loss": 0.5154, + "step": 5424 + }, + { + "epoch": 0.49525287566185866, + "grad_norm": 0.4635895788669586, + "learning_rate": 4.9192076161495275e-06, + "loss": 0.5634, + "step": 5425 + }, + { + "epoch": 0.49534416651451524, + "grad_norm": 0.4782576262950897, + "learning_rate": 4.919177433326918e-06, + "loss": 0.5835, + "step": 5426 + }, + { + "epoch": 0.4954354573671718, + "grad_norm": 0.4712618887424469, + "learning_rate": 4.9191472449600685e-06, + "loss": 0.6058, + "step": 5427 + }, + { + "epoch": 0.4955267482198284, + "grad_norm": 0.5032932162284851, + "learning_rate": 4.919117051049048e-06, + "loss": 0.5597, + "step": 5428 + }, + { + "epoch": 0.4956180390724849, + "grad_norm": 0.47051575779914856, + "learning_rate": 4.919086851593924e-06, + "loss": 0.5842, + "step": 5429 + }, + { + "epoch": 0.4957093299251415, + "grad_norm": 0.47227269411087036, + "learning_rate": 4.919056646594768e-06, + "loss": 0.5798, + "step": 5430 + }, + { + "epoch": 0.4958006207777981, + "grad_norm": 0.4497244656085968, + "learning_rate": 4.919026436051647e-06, + "loss": 0.6303, + "step": 5431 + }, + { + "epoch": 0.49589191163045465, + "grad_norm": 0.4797205626964569, + "learning_rate": 4.918996219964632e-06, + "loss": 0.5662, + "step": 5432 + }, + { + "epoch": 0.4959832024831112, + "grad_norm": 0.4465992748737335, + "learning_rate": 4.918965998333792e-06, + "loss": 0.6103, + "step": 5433 + }, + { + "epoch": 0.49607449333576775, + "grad_norm": 0.519210934638977, + "learning_rate": 4.918935771159196e-06, + "loss": 0.5062, + "step": 5434 + }, + { + "epoch": 0.49616578418842433, + "grad_norm": 0.4818550944328308, + "learning_rate": 4.918905538440914e-06, + "loss": 0.5317, + "step": 5435 + }, + { + "epoch": 0.4962570750410809, + "grad_norm": 0.5011113882064819, + "learning_rate": 4.918875300179013e-06, + "loss": 0.577, + "step": 5436 + }, + { + "epoch": 0.49634836589373743, + "grad_norm": 0.485343337059021, + "learning_rate": 4.918845056373564e-06, + "loss": 0.52, + "step": 5437 + }, + { + "epoch": 0.496439656746394, + "grad_norm": 0.4691354036331177, + "learning_rate": 4.918814807024637e-06, + "loss": 0.5698, + "step": 5438 + }, + { + "epoch": 0.4965309475990506, + "grad_norm": 0.44084590673446655, + "learning_rate": 4.9187845521323e-06, + "loss": 0.5919, + "step": 5439 + }, + { + "epoch": 0.49662223845170717, + "grad_norm": 0.5105781555175781, + "learning_rate": 4.918754291696622e-06, + "loss": 0.5794, + "step": 5440 + }, + { + "epoch": 0.4967135293043637, + "grad_norm": 0.49370214343070984, + "learning_rate": 4.9187240257176735e-06, + "loss": 0.5811, + "step": 5441 + }, + { + "epoch": 0.49680482015702027, + "grad_norm": 0.5059999823570251, + "learning_rate": 4.918693754195524e-06, + "loss": 0.5694, + "step": 5442 + }, + { + "epoch": 0.49689611100967684, + "grad_norm": 0.5063275098800659, + "learning_rate": 4.918663477130242e-06, + "loss": 0.5216, + "step": 5443 + }, + { + "epoch": 0.49698740186233337, + "grad_norm": 0.4550934433937073, + "learning_rate": 4.918633194521897e-06, + "loss": 0.5809, + "step": 5444 + }, + { + "epoch": 0.49707869271498994, + "grad_norm": 0.45063406229019165, + "learning_rate": 4.918602906370559e-06, + "loss": 0.6048, + "step": 5445 + }, + { + "epoch": 0.4971699835676465, + "grad_norm": 0.48687422275543213, + "learning_rate": 4.918572612676298e-06, + "loss": 0.5422, + "step": 5446 + }, + { + "epoch": 0.4972612744203031, + "grad_norm": 0.47084787487983704, + "learning_rate": 4.918542313439181e-06, + "loss": 0.5686, + "step": 5447 + }, + { + "epoch": 0.4973525652729596, + "grad_norm": 0.4722340703010559, + "learning_rate": 4.9185120086592796e-06, + "loss": 0.571, + "step": 5448 + }, + { + "epoch": 0.4974438561256162, + "grad_norm": 0.4553404748439789, + "learning_rate": 4.9184816983366625e-06, + "loss": 0.5821, + "step": 5449 + }, + { + "epoch": 0.4975351469782728, + "grad_norm": 0.5000853538513184, + "learning_rate": 4.918451382471399e-06, + "loss": 0.5236, + "step": 5450 + }, + { + "epoch": 0.49762643783092936, + "grad_norm": 0.4866883158683777, + "learning_rate": 4.918421061063559e-06, + "loss": 0.5386, + "step": 5451 + }, + { + "epoch": 0.4977177286835859, + "grad_norm": 0.45887047052383423, + "learning_rate": 4.918390734113212e-06, + "loss": 0.5845, + "step": 5452 + }, + { + "epoch": 0.49780901953624246, + "grad_norm": 0.4674701988697052, + "learning_rate": 4.918360401620427e-06, + "loss": 0.5869, + "step": 5453 + }, + { + "epoch": 0.49790031038889904, + "grad_norm": 0.4723847806453705, + "learning_rate": 4.918330063585275e-06, + "loss": 0.5104, + "step": 5454 + }, + { + "epoch": 0.4979916012415556, + "grad_norm": 0.4912794828414917, + "learning_rate": 4.918299720007822e-06, + "loss": 0.5379, + "step": 5455 + }, + { + "epoch": 0.49808289209421214, + "grad_norm": 0.48249855637550354, + "learning_rate": 4.918269370888142e-06, + "loss": 0.5481, + "step": 5456 + }, + { + "epoch": 0.4981741829468687, + "grad_norm": 0.48314276337623596, + "learning_rate": 4.918239016226302e-06, + "loss": 0.5803, + "step": 5457 + }, + { + "epoch": 0.4982654737995253, + "grad_norm": 0.4688121974468231, + "learning_rate": 4.918208656022372e-06, + "loss": 0.5927, + "step": 5458 + }, + { + "epoch": 0.49835676465218187, + "grad_norm": 0.48474839329719543, + "learning_rate": 4.918178290276421e-06, + "loss": 0.5406, + "step": 5459 + }, + { + "epoch": 0.4984480555048384, + "grad_norm": 0.45417702198028564, + "learning_rate": 4.91814791898852e-06, + "loss": 0.6088, + "step": 5460 + }, + { + "epoch": 0.498539346357495, + "grad_norm": 0.4847501218318939, + "learning_rate": 4.918117542158737e-06, + "loss": 0.5833, + "step": 5461 + }, + { + "epoch": 0.49863063721015155, + "grad_norm": 0.4785056412220001, + "learning_rate": 4.918087159787143e-06, + "loss": 0.6309, + "step": 5462 + }, + { + "epoch": 0.49872192806280813, + "grad_norm": 0.4762747287750244, + "learning_rate": 4.918056771873806e-06, + "loss": 0.5603, + "step": 5463 + }, + { + "epoch": 0.49881321891546465, + "grad_norm": 0.4799424707889557, + "learning_rate": 4.9180263784187984e-06, + "loss": 0.5655, + "step": 5464 + }, + { + "epoch": 0.49890450976812123, + "grad_norm": 0.45290181040763855, + "learning_rate": 4.917995979422187e-06, + "loss": 0.5482, + "step": 5465 + }, + { + "epoch": 0.4989958006207778, + "grad_norm": 0.4770221710205078, + "learning_rate": 4.917965574884042e-06, + "loss": 0.5964, + "step": 5466 + }, + { + "epoch": 0.4990870914734344, + "grad_norm": 0.4891134798526764, + "learning_rate": 4.917935164804436e-06, + "loss": 0.5776, + "step": 5467 + }, + { + "epoch": 0.4991783823260909, + "grad_norm": 0.4790553152561188, + "learning_rate": 4.9179047491834344e-06, + "loss": 0.5667, + "step": 5468 + }, + { + "epoch": 0.4992696731787475, + "grad_norm": 0.5006332993507385, + "learning_rate": 4.91787432802111e-06, + "loss": 0.5224, + "step": 5469 + }, + { + "epoch": 0.49936096403140406, + "grad_norm": 0.4885442554950714, + "learning_rate": 4.91784390131753e-06, + "loss": 0.5555, + "step": 5470 + }, + { + "epoch": 0.49945225488406064, + "grad_norm": 0.49073341488838196, + "learning_rate": 4.917813469072767e-06, + "loss": 0.5388, + "step": 5471 + }, + { + "epoch": 0.49954354573671716, + "grad_norm": 0.47653162479400635, + "learning_rate": 4.917783031286889e-06, + "loss": 0.5515, + "step": 5472 + }, + { + "epoch": 0.49963483658937374, + "grad_norm": 0.4625287353992462, + "learning_rate": 4.917752587959965e-06, + "loss": 0.556, + "step": 5473 + }, + { + "epoch": 0.4997261274420303, + "grad_norm": 0.4951668083667755, + "learning_rate": 4.917722139092067e-06, + "loss": 0.5824, + "step": 5474 + }, + { + "epoch": 0.4998174182946869, + "grad_norm": 0.47988927364349365, + "learning_rate": 4.917691684683264e-06, + "loss": 0.5193, + "step": 5475 + }, + { + "epoch": 0.4999087091473434, + "grad_norm": 0.49080783128738403, + "learning_rate": 4.917661224733624e-06, + "loss": 0.5668, + "step": 5476 + }, + { + "epoch": 0.5, + "grad_norm": 0.4668911397457123, + "learning_rate": 4.917630759243218e-06, + "loss": 0.5778, + "step": 5477 + }, + { + "epoch": 0.5000912908526566, + "grad_norm": 0.46956855058670044, + "learning_rate": 4.917600288212118e-06, + "loss": 0.5454, + "step": 5478 + }, + { + "epoch": 0.5001825817053132, + "grad_norm": 0.47788023948669434, + "learning_rate": 4.91756981164039e-06, + "loss": 0.5888, + "step": 5479 + }, + { + "epoch": 0.5002738725579697, + "grad_norm": 0.4860265851020813, + "learning_rate": 4.917539329528107e-06, + "loss": 0.5646, + "step": 5480 + }, + { + "epoch": 0.5003651634106262, + "grad_norm": 0.4644804894924164, + "learning_rate": 4.917508841875337e-06, + "loss": 0.5941, + "step": 5481 + }, + { + "epoch": 0.5004564542632828, + "grad_norm": 0.47495827078819275, + "learning_rate": 4.91747834868215e-06, + "loss": 0.5468, + "step": 5482 + }, + { + "epoch": 0.5005477451159394, + "grad_norm": 0.48468801379203796, + "learning_rate": 4.917447849948617e-06, + "loss": 0.5908, + "step": 5483 + }, + { + "epoch": 0.5006390359685959, + "grad_norm": 0.4704834520816803, + "learning_rate": 4.917417345674807e-06, + "loss": 0.5723, + "step": 5484 + }, + { + "epoch": 0.5007303268212525, + "grad_norm": 0.46648678183555603, + "learning_rate": 4.91738683586079e-06, + "loss": 0.5755, + "step": 5485 + }, + { + "epoch": 0.5008216176739091, + "grad_norm": 0.43782132863998413, + "learning_rate": 4.9173563205066355e-06, + "loss": 0.6154, + "step": 5486 + }, + { + "epoch": 0.5009129085265657, + "grad_norm": 0.4310087561607361, + "learning_rate": 4.9173257996124146e-06, + "loss": 0.5981, + "step": 5487 + }, + { + "epoch": 0.5010041993792222, + "grad_norm": 0.49105843901634216, + "learning_rate": 4.917295273178197e-06, + "loss": 0.5808, + "step": 5488 + }, + { + "epoch": 0.5010954902318787, + "grad_norm": 0.48866236209869385, + "learning_rate": 4.917264741204052e-06, + "loss": 0.5724, + "step": 5489 + }, + { + "epoch": 0.5011867810845353, + "grad_norm": 0.5089410543441772, + "learning_rate": 4.9172342036900496e-06, + "loss": 0.5507, + "step": 5490 + }, + { + "epoch": 0.5012780719371919, + "grad_norm": 0.44488969445228577, + "learning_rate": 4.91720366063626e-06, + "loss": 0.579, + "step": 5491 + }, + { + "epoch": 0.5013693627898484, + "grad_norm": 0.5109453797340393, + "learning_rate": 4.917173112042753e-06, + "loss": 0.5345, + "step": 5492 + }, + { + "epoch": 0.501460653642505, + "grad_norm": 0.4729202091693878, + "learning_rate": 4.9171425579096e-06, + "loss": 0.5447, + "step": 5493 + }, + { + "epoch": 0.5015519444951616, + "grad_norm": 0.4639270007610321, + "learning_rate": 4.917111998236869e-06, + "loss": 0.6038, + "step": 5494 + }, + { + "epoch": 0.5016432353478182, + "grad_norm": 0.4756241738796234, + "learning_rate": 4.917081433024631e-06, + "loss": 0.532, + "step": 5495 + }, + { + "epoch": 0.5017345262004748, + "grad_norm": 0.468442440032959, + "learning_rate": 4.917050862272956e-06, + "loss": 0.584, + "step": 5496 + }, + { + "epoch": 0.5018258170531312, + "grad_norm": 0.47761407494544983, + "learning_rate": 4.917020285981914e-06, + "loss": 0.53, + "step": 5497 + }, + { + "epoch": 0.5019171079057878, + "grad_norm": 0.47354111075401306, + "learning_rate": 4.9169897041515745e-06, + "loss": 0.5693, + "step": 5498 + }, + { + "epoch": 0.5020083987584444, + "grad_norm": 0.4773169457912445, + "learning_rate": 4.916959116782009e-06, + "loss": 0.534, + "step": 5499 + }, + { + "epoch": 0.502099689611101, + "grad_norm": 0.4749637246131897, + "learning_rate": 4.9169285238732864e-06, + "loss": 0.5702, + "step": 5500 + }, + { + "epoch": 0.5021909804637575, + "grad_norm": 0.46524032950401306, + "learning_rate": 4.916897925425478e-06, + "loss": 0.5458, + "step": 5501 + }, + { + "epoch": 0.5022822713164141, + "grad_norm": 0.4442444145679474, + "learning_rate": 4.916867321438652e-06, + "loss": 0.5627, + "step": 5502 + }, + { + "epoch": 0.5023735621690707, + "grad_norm": 0.46574878692626953, + "learning_rate": 4.91683671191288e-06, + "loss": 0.566, + "step": 5503 + }, + { + "epoch": 0.5024648530217273, + "grad_norm": 0.4923990070819855, + "learning_rate": 4.9168060968482325e-06, + "loss": 0.5649, + "step": 5504 + }, + { + "epoch": 0.5025561438743837, + "grad_norm": 0.5303122997283936, + "learning_rate": 4.916775476244779e-06, + "loss": 0.5522, + "step": 5505 + }, + { + "epoch": 0.5026474347270403, + "grad_norm": 0.48779991269111633, + "learning_rate": 4.916744850102588e-06, + "loss": 0.5905, + "step": 5506 + }, + { + "epoch": 0.5027387255796969, + "grad_norm": 0.49413007497787476, + "learning_rate": 4.916714218421733e-06, + "loss": 0.5612, + "step": 5507 + }, + { + "epoch": 0.5028300164323535, + "grad_norm": 0.4509419798851013, + "learning_rate": 4.916683581202282e-06, + "loss": 0.5784, + "step": 5508 + }, + { + "epoch": 0.50292130728501, + "grad_norm": 0.4734085202217102, + "learning_rate": 4.916652938444306e-06, + "loss": 0.5746, + "step": 5509 + }, + { + "epoch": 0.5030125981376666, + "grad_norm": 0.4688590466976166, + "learning_rate": 4.916622290147874e-06, + "loss": 0.5923, + "step": 5510 + }, + { + "epoch": 0.5031038889903232, + "grad_norm": 0.4712166488170624, + "learning_rate": 4.9165916363130585e-06, + "loss": 0.6015, + "step": 5511 + }, + { + "epoch": 0.5031951798429797, + "grad_norm": 0.45994824171066284, + "learning_rate": 4.916560976939928e-06, + "loss": 0.5556, + "step": 5512 + }, + { + "epoch": 0.5032864706956363, + "grad_norm": 0.4952051341533661, + "learning_rate": 4.916530312028553e-06, + "loss": 0.533, + "step": 5513 + }, + { + "epoch": 0.5033777615482928, + "grad_norm": 0.5111138224601746, + "learning_rate": 4.916499641579004e-06, + "loss": 0.5664, + "step": 5514 + }, + { + "epoch": 0.5034690524009494, + "grad_norm": 0.49614185094833374, + "learning_rate": 4.916468965591351e-06, + "loss": 0.5491, + "step": 5515 + }, + { + "epoch": 0.503560343253606, + "grad_norm": 0.48322537541389465, + "learning_rate": 4.916438284065666e-06, + "loss": 0.558, + "step": 5516 + }, + { + "epoch": 0.5036516341062626, + "grad_norm": 0.47534826397895813, + "learning_rate": 4.916407597002016e-06, + "loss": 0.5768, + "step": 5517 + }, + { + "epoch": 0.5037429249589191, + "grad_norm": 0.4933330714702606, + "learning_rate": 4.916376904400475e-06, + "loss": 0.5623, + "step": 5518 + }, + { + "epoch": 0.5038342158115757, + "grad_norm": 0.5004243850708008, + "learning_rate": 4.9163462062611106e-06, + "loss": 0.5717, + "step": 5519 + }, + { + "epoch": 0.5039255066642322, + "grad_norm": 0.4849616289138794, + "learning_rate": 4.916315502583995e-06, + "loss": 0.5469, + "step": 5520 + }, + { + "epoch": 0.5040167975168888, + "grad_norm": 0.48847973346710205, + "learning_rate": 4.916284793369196e-06, + "loss": 0.5444, + "step": 5521 + }, + { + "epoch": 0.5041080883695453, + "grad_norm": 0.46279671788215637, + "learning_rate": 4.916254078616787e-06, + "loss": 0.6096, + "step": 5522 + }, + { + "epoch": 0.5041993792222019, + "grad_norm": 0.4770888388156891, + "learning_rate": 4.916223358326837e-06, + "loss": 0.5825, + "step": 5523 + }, + { + "epoch": 0.5042906700748585, + "grad_norm": 0.42077168822288513, + "learning_rate": 4.9161926324994166e-06, + "loss": 0.6304, + "step": 5524 + }, + { + "epoch": 0.5043819609275151, + "grad_norm": 0.48373615741729736, + "learning_rate": 4.9161619011345955e-06, + "loss": 0.5852, + "step": 5525 + }, + { + "epoch": 0.5044732517801717, + "grad_norm": 0.4452369809150696, + "learning_rate": 4.916131164232445e-06, + "loss": 0.5901, + "step": 5526 + }, + { + "epoch": 0.5045645426328282, + "grad_norm": 0.4939228892326355, + "learning_rate": 4.916100421793035e-06, + "loss": 0.5429, + "step": 5527 + }, + { + "epoch": 0.5046558334854847, + "grad_norm": 0.46353626251220703, + "learning_rate": 4.916069673816437e-06, + "loss": 0.5982, + "step": 5528 + }, + { + "epoch": 0.5047471243381413, + "grad_norm": 0.503764808177948, + "learning_rate": 4.91603892030272e-06, + "loss": 0.5452, + "step": 5529 + }, + { + "epoch": 0.5048384151907979, + "grad_norm": 0.48076802492141724, + "learning_rate": 4.916008161251956e-06, + "loss": 0.5575, + "step": 5530 + }, + { + "epoch": 0.5049297060434544, + "grad_norm": 0.4827316999435425, + "learning_rate": 4.915977396664214e-06, + "loss": 0.5753, + "step": 5531 + }, + { + "epoch": 0.505020996896111, + "grad_norm": 0.4812696576118469, + "learning_rate": 4.915946626539565e-06, + "loss": 0.5738, + "step": 5532 + }, + { + "epoch": 0.5051122877487676, + "grad_norm": 0.48376670479774475, + "learning_rate": 4.9159158508780804e-06, + "loss": 0.5859, + "step": 5533 + }, + { + "epoch": 0.5052035786014242, + "grad_norm": 0.5001038312911987, + "learning_rate": 4.91588506967983e-06, + "loss": 0.5565, + "step": 5534 + }, + { + "epoch": 0.5052948694540808, + "grad_norm": 0.48150089383125305, + "learning_rate": 4.9158542829448845e-06, + "loss": 0.5179, + "step": 5535 + }, + { + "epoch": 0.5053861603067372, + "grad_norm": 0.47186797857284546, + "learning_rate": 4.915823490673312e-06, + "loss": 0.6013, + "step": 5536 + }, + { + "epoch": 0.5054774511593938, + "grad_norm": 0.4570176601409912, + "learning_rate": 4.915792692865188e-06, + "loss": 0.5728, + "step": 5537 + }, + { + "epoch": 0.5055687420120504, + "grad_norm": 0.4941980540752411, + "learning_rate": 4.91576188952058e-06, + "loss": 0.5719, + "step": 5538 + }, + { + "epoch": 0.505660032864707, + "grad_norm": 0.45683354139328003, + "learning_rate": 4.9157310806395585e-06, + "loss": 0.5608, + "step": 5539 + }, + { + "epoch": 0.5057513237173635, + "grad_norm": 0.4861173927783966, + "learning_rate": 4.9157002662221955e-06, + "loss": 0.5565, + "step": 5540 + }, + { + "epoch": 0.5058426145700201, + "grad_norm": 0.4849236011505127, + "learning_rate": 4.91566944626856e-06, + "loss": 0.5736, + "step": 5541 + }, + { + "epoch": 0.5059339054226767, + "grad_norm": 0.4634306728839874, + "learning_rate": 4.915638620778724e-06, + "loss": 0.5241, + "step": 5542 + }, + { + "epoch": 0.5060251962753333, + "grad_norm": 0.4529936909675598, + "learning_rate": 4.915607789752757e-06, + "loss": 0.579, + "step": 5543 + }, + { + "epoch": 0.5061164871279897, + "grad_norm": 0.5019054412841797, + "learning_rate": 4.91557695319073e-06, + "loss": 0.6088, + "step": 5544 + }, + { + "epoch": 0.5062077779806463, + "grad_norm": 0.49592605233192444, + "learning_rate": 4.915546111092715e-06, + "loss": 0.5385, + "step": 5545 + }, + { + "epoch": 0.5062990688333029, + "grad_norm": 0.4587250053882599, + "learning_rate": 4.915515263458781e-06, + "loss": 0.5763, + "step": 5546 + }, + { + "epoch": 0.5063903596859595, + "grad_norm": 0.489738404750824, + "learning_rate": 4.915484410289e-06, + "loss": 0.5712, + "step": 5547 + }, + { + "epoch": 0.506481650538616, + "grad_norm": 0.4559507668018341, + "learning_rate": 4.915453551583441e-06, + "loss": 0.6247, + "step": 5548 + }, + { + "epoch": 0.5065729413912726, + "grad_norm": 0.47527626156806946, + "learning_rate": 4.915422687342175e-06, + "loss": 0.5924, + "step": 5549 + }, + { + "epoch": 0.5066642322439292, + "grad_norm": 0.44557926058769226, + "learning_rate": 4.915391817565275e-06, + "loss": 0.6217, + "step": 5550 + }, + { + "epoch": 0.5067555230965857, + "grad_norm": 0.5077995657920837, + "learning_rate": 4.91536094225281e-06, + "loss": 0.5532, + "step": 5551 + }, + { + "epoch": 0.5068468139492422, + "grad_norm": 0.488596111536026, + "learning_rate": 4.91533006140485e-06, + "loss": 0.5829, + "step": 5552 + }, + { + "epoch": 0.5069381048018988, + "grad_norm": 0.4709917902946472, + "learning_rate": 4.915299175021468e-06, + "loss": 0.5834, + "step": 5553 + }, + { + "epoch": 0.5070293956545554, + "grad_norm": 0.4635930061340332, + "learning_rate": 4.915268283102733e-06, + "loss": 0.5667, + "step": 5554 + }, + { + "epoch": 0.507120686507212, + "grad_norm": 0.48693913221359253, + "learning_rate": 4.915237385648716e-06, + "loss": 0.5427, + "step": 5555 + }, + { + "epoch": 0.5072119773598686, + "grad_norm": 0.4849471151828766, + "learning_rate": 4.915206482659488e-06, + "loss": 0.582, + "step": 5556 + }, + { + "epoch": 0.5073032682125251, + "grad_norm": 0.47305622696876526, + "learning_rate": 4.91517557413512e-06, + "loss": 0.6046, + "step": 5557 + }, + { + "epoch": 0.5073945590651817, + "grad_norm": 0.48654481768608093, + "learning_rate": 4.915144660075684e-06, + "loss": 0.5774, + "step": 5558 + }, + { + "epoch": 0.5074858499178382, + "grad_norm": 0.48979508876800537, + "learning_rate": 4.915113740481249e-06, + "loss": 0.5806, + "step": 5559 + }, + { + "epoch": 0.5075771407704948, + "grad_norm": 0.4782140254974365, + "learning_rate": 4.915082815351886e-06, + "loss": 0.5984, + "step": 5560 + }, + { + "epoch": 0.5076684316231513, + "grad_norm": 0.47453007102012634, + "learning_rate": 4.915051884687666e-06, + "loss": 0.5752, + "step": 5561 + }, + { + "epoch": 0.5077597224758079, + "grad_norm": 0.4851454496383667, + "learning_rate": 4.9150209484886615e-06, + "loss": 0.577, + "step": 5562 + }, + { + "epoch": 0.5078510133284645, + "grad_norm": 0.4579122066497803, + "learning_rate": 4.914990006754941e-06, + "loss": 0.575, + "step": 5563 + }, + { + "epoch": 0.5079423041811211, + "grad_norm": 0.48921462893486023, + "learning_rate": 4.9149590594865774e-06, + "loss": 0.5582, + "step": 5564 + }, + { + "epoch": 0.5080335950337777, + "grad_norm": 0.47414007782936096, + "learning_rate": 4.9149281066836405e-06, + "loss": 0.5579, + "step": 5565 + }, + { + "epoch": 0.5081248858864342, + "grad_norm": 0.47887569665908813, + "learning_rate": 4.914897148346201e-06, + "loss": 0.5935, + "step": 5566 + }, + { + "epoch": 0.5082161767390907, + "grad_norm": 0.4549287259578705, + "learning_rate": 4.914866184474331e-06, + "loss": 0.6005, + "step": 5567 + }, + { + "epoch": 0.5083074675917473, + "grad_norm": 0.47581353783607483, + "learning_rate": 4.914835215068101e-06, + "loss": 0.5932, + "step": 5568 + }, + { + "epoch": 0.5083987584444039, + "grad_norm": 0.4567379057407379, + "learning_rate": 4.914804240127581e-06, + "loss": 0.5437, + "step": 5569 + }, + { + "epoch": 0.5084900492970604, + "grad_norm": 0.5396776795387268, + "learning_rate": 4.9147732596528435e-06, + "loss": 0.5278, + "step": 5570 + }, + { + "epoch": 0.508581340149717, + "grad_norm": 0.43500685691833496, + "learning_rate": 4.9147422736439586e-06, + "loss": 0.629, + "step": 5571 + }, + { + "epoch": 0.5086726310023736, + "grad_norm": 0.49373364448547363, + "learning_rate": 4.914711282100998e-06, + "loss": 0.5402, + "step": 5572 + }, + { + "epoch": 0.5087639218550302, + "grad_norm": 0.4965064227581024, + "learning_rate": 4.914680285024031e-06, + "loss": 0.5763, + "step": 5573 + }, + { + "epoch": 0.5088552127076867, + "grad_norm": 0.46656566858291626, + "learning_rate": 4.914649282413131e-06, + "loss": 0.571, + "step": 5574 + }, + { + "epoch": 0.5089465035603432, + "grad_norm": 0.4588755667209625, + "learning_rate": 4.9146182742683675e-06, + "loss": 0.5763, + "step": 5575 + }, + { + "epoch": 0.5090377944129998, + "grad_norm": 0.48997172713279724, + "learning_rate": 4.914587260589812e-06, + "loss": 0.5456, + "step": 5576 + }, + { + "epoch": 0.5091290852656564, + "grad_norm": 0.4947170913219452, + "learning_rate": 4.9145562413775365e-06, + "loss": 0.572, + "step": 5577 + }, + { + "epoch": 0.509220376118313, + "grad_norm": 0.508762001991272, + "learning_rate": 4.91452521663161e-06, + "loss": 0.561, + "step": 5578 + }, + { + "epoch": 0.5093116669709695, + "grad_norm": 0.4865562915802002, + "learning_rate": 4.914494186352105e-06, + "loss": 0.566, + "step": 5579 + }, + { + "epoch": 0.5094029578236261, + "grad_norm": 0.47758299112319946, + "learning_rate": 4.914463150539093e-06, + "loss": 0.6054, + "step": 5580 + }, + { + "epoch": 0.5094942486762827, + "grad_norm": 0.4635421633720398, + "learning_rate": 4.9144321091926435e-06, + "loss": 0.579, + "step": 5581 + }, + { + "epoch": 0.5095855395289391, + "grad_norm": 0.5005338191986084, + "learning_rate": 4.914401062312829e-06, + "loss": 0.5283, + "step": 5582 + }, + { + "epoch": 0.5096768303815957, + "grad_norm": 0.47634387016296387, + "learning_rate": 4.914370009899721e-06, + "loss": 0.5729, + "step": 5583 + }, + { + "epoch": 0.5097681212342523, + "grad_norm": 0.47675225138664246, + "learning_rate": 4.914338951953389e-06, + "loss": 0.5711, + "step": 5584 + }, + { + "epoch": 0.5098594120869089, + "grad_norm": 0.4741598963737488, + "learning_rate": 4.914307888473905e-06, + "loss": 0.5367, + "step": 5585 + }, + { + "epoch": 0.5099507029395655, + "grad_norm": 0.47040945291519165, + "learning_rate": 4.914276819461341e-06, + "loss": 0.5966, + "step": 5586 + }, + { + "epoch": 0.510041993792222, + "grad_norm": 0.4987776279449463, + "learning_rate": 4.914245744915767e-06, + "loss": 0.5863, + "step": 5587 + }, + { + "epoch": 0.5101332846448786, + "grad_norm": 0.5209337472915649, + "learning_rate": 4.914214664837256e-06, + "loss": 0.5609, + "step": 5588 + }, + { + "epoch": 0.5102245754975352, + "grad_norm": 0.47109997272491455, + "learning_rate": 4.914183579225877e-06, + "loss": 0.5962, + "step": 5589 + }, + { + "epoch": 0.5103158663501917, + "grad_norm": 0.47390156984329224, + "learning_rate": 4.914152488081703e-06, + "loss": 0.5509, + "step": 5590 + }, + { + "epoch": 0.5104071572028482, + "grad_norm": 0.46762996912002563, + "learning_rate": 4.914121391404803e-06, + "loss": 0.5885, + "step": 5591 + }, + { + "epoch": 0.5104984480555048, + "grad_norm": 0.49599558115005493, + "learning_rate": 4.914090289195251e-06, + "loss": 0.5677, + "step": 5592 + }, + { + "epoch": 0.5105897389081614, + "grad_norm": 0.4515745937824249, + "learning_rate": 4.914059181453117e-06, + "loss": 0.5912, + "step": 5593 + }, + { + "epoch": 0.510681029760818, + "grad_norm": 0.49605369567871094, + "learning_rate": 4.914028068178471e-06, + "loss": 0.5401, + "step": 5594 + }, + { + "epoch": 0.5107723206134746, + "grad_norm": 0.46699947118759155, + "learning_rate": 4.913996949371387e-06, + "loss": 0.5747, + "step": 5595 + }, + { + "epoch": 0.5108636114661311, + "grad_norm": 0.4609794616699219, + "learning_rate": 4.913965825031935e-06, + "loss": 0.5771, + "step": 5596 + }, + { + "epoch": 0.5109549023187877, + "grad_norm": 0.5152279138565063, + "learning_rate": 4.913934695160185e-06, + "loss": 0.569, + "step": 5597 + }, + { + "epoch": 0.5110461931714442, + "grad_norm": 0.5024763941764832, + "learning_rate": 4.913903559756211e-06, + "loss": 0.5956, + "step": 5598 + }, + { + "epoch": 0.5111374840241008, + "grad_norm": 0.4844757616519928, + "learning_rate": 4.913872418820082e-06, + "loss": 0.5453, + "step": 5599 + }, + { + "epoch": 0.5112287748767573, + "grad_norm": 0.4651526212692261, + "learning_rate": 4.913841272351871e-06, + "loss": 0.5583, + "step": 5600 + }, + { + "epoch": 0.5113200657294139, + "grad_norm": 0.4819861650466919, + "learning_rate": 4.913810120351649e-06, + "loss": 0.5761, + "step": 5601 + }, + { + "epoch": 0.5114113565820705, + "grad_norm": 0.4668864607810974, + "learning_rate": 4.913778962819486e-06, + "loss": 0.5753, + "step": 5602 + }, + { + "epoch": 0.5115026474347271, + "grad_norm": 0.48279619216918945, + "learning_rate": 4.913747799755455e-06, + "loss": 0.5789, + "step": 5603 + }, + { + "epoch": 0.5115939382873836, + "grad_norm": 0.5011410713195801, + "learning_rate": 4.9137166311596275e-06, + "loss": 0.5251, + "step": 5604 + }, + { + "epoch": 0.5116852291400402, + "grad_norm": 0.48179754614830017, + "learning_rate": 4.913685457032074e-06, + "loss": 0.5852, + "step": 5605 + }, + { + "epoch": 0.5117765199926967, + "grad_norm": 0.46726179122924805, + "learning_rate": 4.913654277372866e-06, + "loss": 0.574, + "step": 5606 + }, + { + "epoch": 0.5118678108453533, + "grad_norm": 0.513190746307373, + "learning_rate": 4.913623092182075e-06, + "loss": 0.5526, + "step": 5607 + }, + { + "epoch": 0.5119591016980098, + "grad_norm": 0.47754716873168945, + "learning_rate": 4.913591901459773e-06, + "loss": 0.5817, + "step": 5608 + }, + { + "epoch": 0.5120503925506664, + "grad_norm": 0.4895193874835968, + "learning_rate": 4.913560705206032e-06, + "loss": 0.5412, + "step": 5609 + }, + { + "epoch": 0.512141683403323, + "grad_norm": 0.4759678244590759, + "learning_rate": 4.913529503420922e-06, + "loss": 0.586, + "step": 5610 + }, + { + "epoch": 0.5122329742559796, + "grad_norm": 0.46082887053489685, + "learning_rate": 4.913498296104516e-06, + "loss": 0.5702, + "step": 5611 + }, + { + "epoch": 0.5123242651086362, + "grad_norm": 0.46168437600135803, + "learning_rate": 4.913467083256884e-06, + "loss": 0.5565, + "step": 5612 + }, + { + "epoch": 0.5124155559612926, + "grad_norm": 0.45799797773361206, + "learning_rate": 4.913435864878098e-06, + "loss": 0.5757, + "step": 5613 + }, + { + "epoch": 0.5125068468139492, + "grad_norm": 0.4998265206813812, + "learning_rate": 4.91340464096823e-06, + "loss": 0.5585, + "step": 5614 + }, + { + "epoch": 0.5125981376666058, + "grad_norm": 0.5146597027778625, + "learning_rate": 4.913373411527352e-06, + "loss": 0.55, + "step": 5615 + }, + { + "epoch": 0.5126894285192624, + "grad_norm": 0.47376903891563416, + "learning_rate": 4.913342176555535e-06, + "loss": 0.6083, + "step": 5616 + }, + { + "epoch": 0.5127807193719189, + "grad_norm": 0.4664105474948883, + "learning_rate": 4.91331093605285e-06, + "loss": 0.5501, + "step": 5617 + }, + { + "epoch": 0.5128720102245755, + "grad_norm": 0.46060439944267273, + "learning_rate": 4.913279690019369e-06, + "loss": 0.5632, + "step": 5618 + }, + { + "epoch": 0.5129633010772321, + "grad_norm": 0.47275060415267944, + "learning_rate": 4.9132484384551646e-06, + "loss": 0.5768, + "step": 5619 + }, + { + "epoch": 0.5130545919298887, + "grad_norm": 0.5013675093650818, + "learning_rate": 4.913217181360307e-06, + "loss": 0.5522, + "step": 5620 + }, + { + "epoch": 0.5131458827825451, + "grad_norm": 0.4901649057865143, + "learning_rate": 4.913185918734869e-06, + "loss": 0.5581, + "step": 5621 + }, + { + "epoch": 0.5132371736352017, + "grad_norm": 0.4605564773082733, + "learning_rate": 4.91315465057892e-06, + "loss": 0.6091, + "step": 5622 + }, + { + "epoch": 0.5133284644878583, + "grad_norm": 0.5110851526260376, + "learning_rate": 4.913123376892534e-06, + "loss": 0.5626, + "step": 5623 + }, + { + "epoch": 0.5134197553405149, + "grad_norm": 0.4896886646747589, + "learning_rate": 4.913092097675783e-06, + "loss": 0.5536, + "step": 5624 + }, + { + "epoch": 0.5135110461931714, + "grad_norm": 0.473117858171463, + "learning_rate": 4.913060812928737e-06, + "loss": 0.5435, + "step": 5625 + }, + { + "epoch": 0.513602337045828, + "grad_norm": 0.4717472493648529, + "learning_rate": 4.913029522651469e-06, + "loss": 0.5309, + "step": 5626 + }, + { + "epoch": 0.5136936278984846, + "grad_norm": 0.4627336263656616, + "learning_rate": 4.912998226844049e-06, + "loss": 0.5803, + "step": 5627 + }, + { + "epoch": 0.5137849187511412, + "grad_norm": 0.4812946319580078, + "learning_rate": 4.912966925506551e-06, + "loss": 0.5883, + "step": 5628 + }, + { + "epoch": 0.5138762096037977, + "grad_norm": 0.47683820128440857, + "learning_rate": 4.912935618639044e-06, + "loss": 0.559, + "step": 5629 + }, + { + "epoch": 0.5139675004564542, + "grad_norm": 0.48873037099838257, + "learning_rate": 4.912904306241602e-06, + "loss": 0.5614, + "step": 5630 + }, + { + "epoch": 0.5140587913091108, + "grad_norm": 0.4400483965873718, + "learning_rate": 4.9128729883142965e-06, + "loss": 0.6155, + "step": 5631 + }, + { + "epoch": 0.5141500821617674, + "grad_norm": 0.4701092541217804, + "learning_rate": 4.912841664857198e-06, + "loss": 0.5669, + "step": 5632 + }, + { + "epoch": 0.514241373014424, + "grad_norm": 0.4969421625137329, + "learning_rate": 4.91281033587038e-06, + "loss": 0.5745, + "step": 5633 + }, + { + "epoch": 0.5143326638670805, + "grad_norm": 0.4940202832221985, + "learning_rate": 4.9127790013539126e-06, + "loss": 0.5419, + "step": 5634 + }, + { + "epoch": 0.5144239547197371, + "grad_norm": 0.47492310404777527, + "learning_rate": 4.91274766130787e-06, + "loss": 0.5399, + "step": 5635 + }, + { + "epoch": 0.5145152455723937, + "grad_norm": 0.4874407649040222, + "learning_rate": 4.9127163157323205e-06, + "loss": 0.5248, + "step": 5636 + }, + { + "epoch": 0.5146065364250502, + "grad_norm": 0.46802693605422974, + "learning_rate": 4.9126849646273385e-06, + "loss": 0.5958, + "step": 5637 + }, + { + "epoch": 0.5146978272777067, + "grad_norm": 0.4597409963607788, + "learning_rate": 4.912653607992996e-06, + "loss": 0.6011, + "step": 5638 + }, + { + "epoch": 0.5147891181303633, + "grad_norm": 0.4646095037460327, + "learning_rate": 4.9126222458293625e-06, + "loss": 0.5714, + "step": 5639 + }, + { + "epoch": 0.5148804089830199, + "grad_norm": 0.45038875937461853, + "learning_rate": 4.912590878136512e-06, + "loss": 0.596, + "step": 5640 + }, + { + "epoch": 0.5149716998356765, + "grad_norm": 0.5473830103874207, + "learning_rate": 4.912559504914517e-06, + "loss": 0.5437, + "step": 5641 + }, + { + "epoch": 0.5150629906883331, + "grad_norm": 0.4613938629627228, + "learning_rate": 4.9125281261634474e-06, + "loss": 0.5803, + "step": 5642 + }, + { + "epoch": 0.5151542815409896, + "grad_norm": 0.4769630432128906, + "learning_rate": 4.9124967418833765e-06, + "loss": 0.5953, + "step": 5643 + }, + { + "epoch": 0.5152455723936462, + "grad_norm": 0.4741278290748596, + "learning_rate": 4.912465352074375e-06, + "loss": 0.5674, + "step": 5644 + }, + { + "epoch": 0.5153368632463027, + "grad_norm": 0.5048242807388306, + "learning_rate": 4.912433956736517e-06, + "loss": 0.584, + "step": 5645 + }, + { + "epoch": 0.5154281540989593, + "grad_norm": 0.5227110385894775, + "learning_rate": 4.912402555869871e-06, + "loss": 0.5657, + "step": 5646 + }, + { + "epoch": 0.5155194449516158, + "grad_norm": 0.48803436756134033, + "learning_rate": 4.912371149474512e-06, + "loss": 0.5587, + "step": 5647 + }, + { + "epoch": 0.5156107358042724, + "grad_norm": 0.4749775826931, + "learning_rate": 4.912339737550511e-06, + "loss": 0.5413, + "step": 5648 + }, + { + "epoch": 0.515702026656929, + "grad_norm": 0.49634504318237305, + "learning_rate": 4.912308320097939e-06, + "loss": 0.5204, + "step": 5649 + }, + { + "epoch": 0.5157933175095856, + "grad_norm": 0.4856862425804138, + "learning_rate": 4.91227689711687e-06, + "loss": 0.5482, + "step": 5650 + }, + { + "epoch": 0.5158846083622421, + "grad_norm": 0.488716721534729, + "learning_rate": 4.912245468607375e-06, + "loss": 0.5771, + "step": 5651 + }, + { + "epoch": 0.5159758992148986, + "grad_norm": 0.46658381819725037, + "learning_rate": 4.9122140345695255e-06, + "loss": 0.5882, + "step": 5652 + }, + { + "epoch": 0.5160671900675552, + "grad_norm": 0.46909618377685547, + "learning_rate": 4.912182595003394e-06, + "loss": 0.5969, + "step": 5653 + }, + { + "epoch": 0.5161584809202118, + "grad_norm": 0.4892919659614563, + "learning_rate": 4.912151149909053e-06, + "loss": 0.5899, + "step": 5654 + }, + { + "epoch": 0.5162497717728683, + "grad_norm": 0.47779810428619385, + "learning_rate": 4.912119699286573e-06, + "loss": 0.5664, + "step": 5655 + }, + { + "epoch": 0.5163410626255249, + "grad_norm": 0.5049687027931213, + "learning_rate": 4.912088243136028e-06, + "loss": 0.5596, + "step": 5656 + }, + { + "epoch": 0.5164323534781815, + "grad_norm": 0.4597241282463074, + "learning_rate": 4.912056781457489e-06, + "loss": 0.5806, + "step": 5657 + }, + { + "epoch": 0.5165236443308381, + "grad_norm": 0.44544652104377747, + "learning_rate": 4.912025314251029e-06, + "loss": 0.6006, + "step": 5658 + }, + { + "epoch": 0.5166149351834947, + "grad_norm": 0.45664551854133606, + "learning_rate": 4.911993841516719e-06, + "loss": 0.5895, + "step": 5659 + }, + { + "epoch": 0.5167062260361511, + "grad_norm": 0.4825683534145355, + "learning_rate": 4.911962363254631e-06, + "loss": 0.5909, + "step": 5660 + }, + { + "epoch": 0.5167975168888077, + "grad_norm": 0.46001875400543213, + "learning_rate": 4.911930879464839e-06, + "loss": 0.5508, + "step": 5661 + }, + { + "epoch": 0.5168888077414643, + "grad_norm": 0.45370784401893616, + "learning_rate": 4.9118993901474134e-06, + "loss": 0.6182, + "step": 5662 + }, + { + "epoch": 0.5169800985941209, + "grad_norm": 0.4783395230770111, + "learning_rate": 4.911867895302427e-06, + "loss": 0.5761, + "step": 5663 + }, + { + "epoch": 0.5170713894467774, + "grad_norm": 0.46079859137535095, + "learning_rate": 4.911836394929952e-06, + "loss": 0.5765, + "step": 5664 + }, + { + "epoch": 0.517162680299434, + "grad_norm": 0.4787174463272095, + "learning_rate": 4.91180488903006e-06, + "loss": 0.6276, + "step": 5665 + }, + { + "epoch": 0.5172539711520906, + "grad_norm": 0.5144727826118469, + "learning_rate": 4.9117733776028245e-06, + "loss": 0.582, + "step": 5666 + }, + { + "epoch": 0.5173452620047472, + "grad_norm": 0.491254061460495, + "learning_rate": 4.911741860648316e-06, + "loss": 0.5662, + "step": 5667 + }, + { + "epoch": 0.5174365528574036, + "grad_norm": 0.49482330679893494, + "learning_rate": 4.911710338166608e-06, + "loss": 0.5582, + "step": 5668 + }, + { + "epoch": 0.5175278437100602, + "grad_norm": 0.48349887132644653, + "learning_rate": 4.9116788101577725e-06, + "loss": 0.549, + "step": 5669 + }, + { + "epoch": 0.5176191345627168, + "grad_norm": 0.4568379521369934, + "learning_rate": 4.911647276621881e-06, + "loss": 0.5818, + "step": 5670 + }, + { + "epoch": 0.5177104254153734, + "grad_norm": 0.47533050179481506, + "learning_rate": 4.9116157375590065e-06, + "loss": 0.5685, + "step": 5671 + }, + { + "epoch": 0.51780171626803, + "grad_norm": 0.47565433382987976, + "learning_rate": 4.9115841929692225e-06, + "loss": 0.5565, + "step": 5672 + }, + { + "epoch": 0.5178930071206865, + "grad_norm": 0.5088381171226501, + "learning_rate": 4.9115526428525984e-06, + "loss": 0.5261, + "step": 5673 + }, + { + "epoch": 0.5179842979733431, + "grad_norm": 0.48118847608566284, + "learning_rate": 4.911521087209209e-06, + "loss": 0.5367, + "step": 5674 + }, + { + "epoch": 0.5180755888259997, + "grad_norm": 0.49063706398010254, + "learning_rate": 4.911489526039125e-06, + "loss": 0.5998, + "step": 5675 + }, + { + "epoch": 0.5181668796786562, + "grad_norm": 0.4708549380302429, + "learning_rate": 4.911457959342421e-06, + "loss": 0.6053, + "step": 5676 + }, + { + "epoch": 0.5182581705313127, + "grad_norm": 0.4546043872833252, + "learning_rate": 4.911426387119166e-06, + "loss": 0.5989, + "step": 5677 + }, + { + "epoch": 0.5183494613839693, + "grad_norm": 0.44071561098098755, + "learning_rate": 4.9113948093694345e-06, + "loss": 0.5654, + "step": 5678 + }, + { + "epoch": 0.5184407522366259, + "grad_norm": 0.49213820695877075, + "learning_rate": 4.9113632260932985e-06, + "loss": 0.5373, + "step": 5679 + }, + { + "epoch": 0.5185320430892825, + "grad_norm": 0.4690503478050232, + "learning_rate": 4.911331637290831e-06, + "loss": 0.6148, + "step": 5680 + }, + { + "epoch": 0.518623333941939, + "grad_norm": 0.4856697618961334, + "learning_rate": 4.911300042962103e-06, + "loss": 0.6172, + "step": 5681 + }, + { + "epoch": 0.5187146247945956, + "grad_norm": 0.43726110458374023, + "learning_rate": 4.9112684431071885e-06, + "loss": 0.5823, + "step": 5682 + }, + { + "epoch": 0.5188059156472521, + "grad_norm": 0.46095216274261475, + "learning_rate": 4.911236837726159e-06, + "loss": 0.6146, + "step": 5683 + }, + { + "epoch": 0.5188972064999087, + "grad_norm": 0.47110456228256226, + "learning_rate": 4.911205226819086e-06, + "loss": 0.5347, + "step": 5684 + }, + { + "epoch": 0.5189884973525652, + "grad_norm": 0.4867580831050873, + "learning_rate": 4.911173610386044e-06, + "loss": 0.5419, + "step": 5685 + }, + { + "epoch": 0.5190797882052218, + "grad_norm": 0.4730314314365387, + "learning_rate": 4.911141988427104e-06, + "loss": 0.6029, + "step": 5686 + }, + { + "epoch": 0.5191710790578784, + "grad_norm": 0.46285536885261536, + "learning_rate": 4.911110360942339e-06, + "loss": 0.5599, + "step": 5687 + }, + { + "epoch": 0.519262369910535, + "grad_norm": 0.4994184672832489, + "learning_rate": 4.911078727931821e-06, + "loss": 0.548, + "step": 5688 + }, + { + "epoch": 0.5193536607631916, + "grad_norm": 0.47362932562828064, + "learning_rate": 4.911047089395624e-06, + "loss": 0.541, + "step": 5689 + }, + { + "epoch": 0.5194449516158481, + "grad_norm": 0.49214261770248413, + "learning_rate": 4.911015445333818e-06, + "loss": 0.5133, + "step": 5690 + }, + { + "epoch": 0.5195362424685046, + "grad_norm": 0.46721017360687256, + "learning_rate": 4.910983795746478e-06, + "loss": 0.5492, + "step": 5691 + }, + { + "epoch": 0.5196275333211612, + "grad_norm": 0.4454938471317291, + "learning_rate": 4.910952140633674e-06, + "loss": 0.6126, + "step": 5692 + }, + { + "epoch": 0.5197188241738178, + "grad_norm": 0.48181602358818054, + "learning_rate": 4.9109204799954815e-06, + "loss": 0.56, + "step": 5693 + }, + { + "epoch": 0.5198101150264743, + "grad_norm": 0.5028271079063416, + "learning_rate": 4.910888813831971e-06, + "loss": 0.5862, + "step": 5694 + }, + { + "epoch": 0.5199014058791309, + "grad_norm": 0.4653494954109192, + "learning_rate": 4.910857142143216e-06, + "loss": 0.594, + "step": 5695 + }, + { + "epoch": 0.5199926967317875, + "grad_norm": 0.48013588786125183, + "learning_rate": 4.910825464929289e-06, + "loss": 0.5682, + "step": 5696 + }, + { + "epoch": 0.5200839875844441, + "grad_norm": 0.4807908236980438, + "learning_rate": 4.9107937821902616e-06, + "loss": 0.5754, + "step": 5697 + }, + { + "epoch": 0.5201752784371007, + "grad_norm": 0.48921266198158264, + "learning_rate": 4.910762093926207e-06, + "loss": 0.5924, + "step": 5698 + }, + { + "epoch": 0.5202665692897571, + "grad_norm": 0.44103747606277466, + "learning_rate": 4.910730400137198e-06, + "loss": 0.5967, + "step": 5699 + }, + { + "epoch": 0.5203578601424137, + "grad_norm": 0.4875606298446655, + "learning_rate": 4.910698700823308e-06, + "loss": 0.5767, + "step": 5700 + }, + { + "epoch": 0.5204491509950703, + "grad_norm": 0.48453637957572937, + "learning_rate": 4.910666995984607e-06, + "loss": 0.5343, + "step": 5701 + }, + { + "epoch": 0.5205404418477269, + "grad_norm": 0.4807356297969818, + "learning_rate": 4.910635285621172e-06, + "loss": 0.6014, + "step": 5702 + }, + { + "epoch": 0.5206317327003834, + "grad_norm": 0.4670332074165344, + "learning_rate": 4.910603569733071e-06, + "loss": 0.5698, + "step": 5703 + }, + { + "epoch": 0.52072302355304, + "grad_norm": 0.4270171821117401, + "learning_rate": 4.9105718483203805e-06, + "loss": 0.5918, + "step": 5704 + }, + { + "epoch": 0.5208143144056966, + "grad_norm": 0.5046976208686829, + "learning_rate": 4.910540121383171e-06, + "loss": 0.5571, + "step": 5705 + }, + { + "epoch": 0.5209056052583532, + "grad_norm": 0.4856918752193451, + "learning_rate": 4.9105083889215144e-06, + "loss": 0.5915, + "step": 5706 + }, + { + "epoch": 0.5209968961110096, + "grad_norm": 0.46155378222465515, + "learning_rate": 4.9104766509354865e-06, + "loss": 0.6112, + "step": 5707 + }, + { + "epoch": 0.5210881869636662, + "grad_norm": 0.5230048894882202, + "learning_rate": 4.910444907425157e-06, + "loss": 0.5497, + "step": 5708 + }, + { + "epoch": 0.5211794778163228, + "grad_norm": 0.5339651703834534, + "learning_rate": 4.910413158390601e-06, + "loss": 0.5624, + "step": 5709 + }, + { + "epoch": 0.5212707686689794, + "grad_norm": 0.485435426235199, + "learning_rate": 4.91038140383189e-06, + "loss": 0.6033, + "step": 5710 + }, + { + "epoch": 0.521362059521636, + "grad_norm": 0.4824048578739166, + "learning_rate": 4.910349643749097e-06, + "loss": 0.5233, + "step": 5711 + }, + { + "epoch": 0.5214533503742925, + "grad_norm": 0.4813143312931061, + "learning_rate": 4.910317878142295e-06, + "loss": 0.5893, + "step": 5712 + }, + { + "epoch": 0.5215446412269491, + "grad_norm": 0.47503218054771423, + "learning_rate": 4.910286107011556e-06, + "loss": 0.599, + "step": 5713 + }, + { + "epoch": 0.5216359320796056, + "grad_norm": 0.4902461767196655, + "learning_rate": 4.910254330356954e-06, + "loss": 0.5414, + "step": 5714 + }, + { + "epoch": 0.5217272229322621, + "grad_norm": 0.4705300033092499, + "learning_rate": 4.9102225481785605e-06, + "loss": 0.5903, + "step": 5715 + }, + { + "epoch": 0.5218185137849187, + "grad_norm": 0.44804656505584717, + "learning_rate": 4.91019076047645e-06, + "loss": 0.5437, + "step": 5716 + }, + { + "epoch": 0.5219098046375753, + "grad_norm": 0.4624447524547577, + "learning_rate": 4.910158967250693e-06, + "loss": 0.5715, + "step": 5717 + }, + { + "epoch": 0.5220010954902319, + "grad_norm": 0.4569252133369446, + "learning_rate": 4.910127168501365e-06, + "loss": 0.5848, + "step": 5718 + }, + { + "epoch": 0.5220923863428885, + "grad_norm": 0.47168052196502686, + "learning_rate": 4.9100953642285375e-06, + "loss": 0.5432, + "step": 5719 + }, + { + "epoch": 0.522183677195545, + "grad_norm": 0.5199024677276611, + "learning_rate": 4.910063554432284e-06, + "loss": 0.5502, + "step": 5720 + }, + { + "epoch": 0.5222749680482016, + "grad_norm": 0.4836268723011017, + "learning_rate": 4.910031739112675e-06, + "loss": 0.5445, + "step": 5721 + }, + { + "epoch": 0.5223662589008581, + "grad_norm": 0.5086407661437988, + "learning_rate": 4.909999918269788e-06, + "loss": 0.57, + "step": 5722 + }, + { + "epoch": 0.5224575497535147, + "grad_norm": 0.44951051473617554, + "learning_rate": 4.909968091903691e-06, + "loss": 0.5991, + "step": 5723 + }, + { + "epoch": 0.5225488406061712, + "grad_norm": 0.47169411182403564, + "learning_rate": 4.9099362600144606e-06, + "loss": 0.6314, + "step": 5724 + }, + { + "epoch": 0.5226401314588278, + "grad_norm": 0.48799625039100647, + "learning_rate": 4.909904422602168e-06, + "loss": 0.5523, + "step": 5725 + }, + { + "epoch": 0.5227314223114844, + "grad_norm": 0.458114355802536, + "learning_rate": 4.909872579666887e-06, + "loss": 0.6208, + "step": 5726 + }, + { + "epoch": 0.522822713164141, + "grad_norm": 0.47310829162597656, + "learning_rate": 4.909840731208689e-06, + "loss": 0.5776, + "step": 5727 + }, + { + "epoch": 0.5229140040167976, + "grad_norm": 0.5018193125724792, + "learning_rate": 4.9098088772276485e-06, + "loss": 0.5565, + "step": 5728 + }, + { + "epoch": 0.5230052948694541, + "grad_norm": 0.47081223130226135, + "learning_rate": 4.909777017723839e-06, + "loss": 0.5965, + "step": 5729 + }, + { + "epoch": 0.5230965857221106, + "grad_norm": 0.47387686371803284, + "learning_rate": 4.909745152697332e-06, + "loss": 0.5872, + "step": 5730 + }, + { + "epoch": 0.5231878765747672, + "grad_norm": 0.49038761854171753, + "learning_rate": 4.9097132821482006e-06, + "loss": 0.5808, + "step": 5731 + }, + { + "epoch": 0.5232791674274238, + "grad_norm": 0.48374706506729126, + "learning_rate": 4.9096814060765196e-06, + "loss": 0.5229, + "step": 5732 + }, + { + "epoch": 0.5233704582800803, + "grad_norm": 0.46440503001213074, + "learning_rate": 4.90964952448236e-06, + "loss": 0.5464, + "step": 5733 + }, + { + "epoch": 0.5234617491327369, + "grad_norm": 0.4690033495426178, + "learning_rate": 4.909617637365796e-06, + "loss": 0.5766, + "step": 5734 + }, + { + "epoch": 0.5235530399853935, + "grad_norm": 0.4681682884693146, + "learning_rate": 4.9095857447269e-06, + "loss": 0.5386, + "step": 5735 + }, + { + "epoch": 0.5236443308380501, + "grad_norm": 0.48556166887283325, + "learning_rate": 4.909553846565746e-06, + "loss": 0.5353, + "step": 5736 + }, + { + "epoch": 0.5237356216907066, + "grad_norm": 0.4617718458175659, + "learning_rate": 4.909521942882407e-06, + "loss": 0.5904, + "step": 5737 + }, + { + "epoch": 0.5238269125433631, + "grad_norm": 0.4959988296031952, + "learning_rate": 4.9094900336769546e-06, + "loss": 0.5199, + "step": 5738 + }, + { + "epoch": 0.5239182033960197, + "grad_norm": 0.48183727264404297, + "learning_rate": 4.909458118949464e-06, + "loss": 0.5808, + "step": 5739 + }, + { + "epoch": 0.5240094942486763, + "grad_norm": 0.5038964748382568, + "learning_rate": 4.909426198700007e-06, + "loss": 0.5385, + "step": 5740 + }, + { + "epoch": 0.5241007851013328, + "grad_norm": 0.4527442157268524, + "learning_rate": 4.909394272928657e-06, + "loss": 0.5691, + "step": 5741 + }, + { + "epoch": 0.5241920759539894, + "grad_norm": 0.47490671277046204, + "learning_rate": 4.909362341635488e-06, + "loss": 0.5727, + "step": 5742 + }, + { + "epoch": 0.524283366806646, + "grad_norm": 0.49263569712638855, + "learning_rate": 4.909330404820571e-06, + "loss": 0.6173, + "step": 5743 + }, + { + "epoch": 0.5243746576593026, + "grad_norm": 0.5060118436813354, + "learning_rate": 4.909298462483981e-06, + "loss": 0.5481, + "step": 5744 + }, + { + "epoch": 0.5244659485119592, + "grad_norm": 0.5096070170402527, + "learning_rate": 4.909266514625791e-06, + "loss": 0.5781, + "step": 5745 + }, + { + "epoch": 0.5245572393646156, + "grad_norm": 0.4884781837463379, + "learning_rate": 4.909234561246076e-06, + "loss": 0.6203, + "step": 5746 + }, + { + "epoch": 0.5246485302172722, + "grad_norm": 0.48729830980300903, + "learning_rate": 4.909202602344905e-06, + "loss": 0.5826, + "step": 5747 + }, + { + "epoch": 0.5247398210699288, + "grad_norm": 0.47721680998802185, + "learning_rate": 4.909170637922355e-06, + "loss": 0.5704, + "step": 5748 + }, + { + "epoch": 0.5248311119225854, + "grad_norm": 0.48761746287345886, + "learning_rate": 4.909138667978497e-06, + "loss": 0.5772, + "step": 5749 + }, + { + "epoch": 0.5249224027752419, + "grad_norm": 0.48002520203590393, + "learning_rate": 4.909106692513405e-06, + "loss": 0.56, + "step": 5750 + }, + { + "epoch": 0.5250136936278985, + "grad_norm": 0.5031468868255615, + "learning_rate": 4.909074711527152e-06, + "loss": 0.612, + "step": 5751 + }, + { + "epoch": 0.5251049844805551, + "grad_norm": 0.4659029543399811, + "learning_rate": 4.909042725019813e-06, + "loss": 0.5644, + "step": 5752 + }, + { + "epoch": 0.5251962753332116, + "grad_norm": 0.4649001359939575, + "learning_rate": 4.909010732991459e-06, + "loss": 0.5441, + "step": 5753 + }, + { + "epoch": 0.5252875661858681, + "grad_norm": 0.46247398853302, + "learning_rate": 4.908978735442165e-06, + "loss": 0.5648, + "step": 5754 + }, + { + "epoch": 0.5253788570385247, + "grad_norm": 0.4794034957885742, + "learning_rate": 4.908946732372003e-06, + "loss": 0.5325, + "step": 5755 + }, + { + "epoch": 0.5254701478911813, + "grad_norm": 0.45652174949645996, + "learning_rate": 4.908914723781047e-06, + "loss": 0.5854, + "step": 5756 + }, + { + "epoch": 0.5255614387438379, + "grad_norm": 0.46709972620010376, + "learning_rate": 4.908882709669371e-06, + "loss": 0.553, + "step": 5757 + }, + { + "epoch": 0.5256527295964945, + "grad_norm": 0.5067505240440369, + "learning_rate": 4.908850690037047e-06, + "loss": 0.5748, + "step": 5758 + }, + { + "epoch": 0.525744020449151, + "grad_norm": 0.48359793424606323, + "learning_rate": 4.908818664884149e-06, + "loss": 0.5552, + "step": 5759 + }, + { + "epoch": 0.5258353113018076, + "grad_norm": 0.4738430976867676, + "learning_rate": 4.908786634210751e-06, + "loss": 0.5152, + "step": 5760 + }, + { + "epoch": 0.5259266021544641, + "grad_norm": 0.4520111680030823, + "learning_rate": 4.908754598016926e-06, + "loss": 0.5971, + "step": 5761 + }, + { + "epoch": 0.5260178930071207, + "grad_norm": 0.47307002544403076, + "learning_rate": 4.908722556302747e-06, + "loss": 0.5968, + "step": 5762 + }, + { + "epoch": 0.5261091838597772, + "grad_norm": 0.46290865540504456, + "learning_rate": 4.908690509068286e-06, + "loss": 0.5529, + "step": 5763 + }, + { + "epoch": 0.5262004747124338, + "grad_norm": 0.44159507751464844, + "learning_rate": 4.908658456313621e-06, + "loss": 0.6066, + "step": 5764 + }, + { + "epoch": 0.5262917655650904, + "grad_norm": 0.45439764857292175, + "learning_rate": 4.908626398038821e-06, + "loss": 0.5671, + "step": 5765 + }, + { + "epoch": 0.526383056417747, + "grad_norm": 0.4540586471557617, + "learning_rate": 4.908594334243962e-06, + "loss": 0.5509, + "step": 5766 + }, + { + "epoch": 0.5264743472704035, + "grad_norm": 0.514808177947998, + "learning_rate": 4.908562264929117e-06, + "loss": 0.5446, + "step": 5767 + }, + { + "epoch": 0.5265656381230601, + "grad_norm": 0.5048184990882874, + "learning_rate": 4.908530190094357e-06, + "loss": 0.541, + "step": 5768 + }, + { + "epoch": 0.5266569289757166, + "grad_norm": 0.5221703052520752, + "learning_rate": 4.908498109739759e-06, + "loss": 0.5643, + "step": 5769 + }, + { + "epoch": 0.5267482198283732, + "grad_norm": 0.4536086618900299, + "learning_rate": 4.9084660238653955e-06, + "loss": 0.6054, + "step": 5770 + }, + { + "epoch": 0.5268395106810297, + "grad_norm": 0.47027409076690674, + "learning_rate": 4.908433932471338e-06, + "loss": 0.5747, + "step": 5771 + }, + { + "epoch": 0.5269308015336863, + "grad_norm": 0.48649412393569946, + "learning_rate": 4.908401835557663e-06, + "loss": 0.5424, + "step": 5772 + }, + { + "epoch": 0.5270220923863429, + "grad_norm": 0.47270625829696655, + "learning_rate": 4.908369733124443e-06, + "loss": 0.5606, + "step": 5773 + }, + { + "epoch": 0.5271133832389995, + "grad_norm": 0.5016847252845764, + "learning_rate": 4.908337625171751e-06, + "loss": 0.5802, + "step": 5774 + }, + { + "epoch": 0.5272046740916561, + "grad_norm": 0.4708296060562134, + "learning_rate": 4.9083055116996605e-06, + "loss": 0.5848, + "step": 5775 + }, + { + "epoch": 0.5272959649443126, + "grad_norm": 0.4495997428894043, + "learning_rate": 4.908273392708246e-06, + "loss": 0.6134, + "step": 5776 + }, + { + "epoch": 0.5273872557969691, + "grad_norm": 0.4874475598335266, + "learning_rate": 4.908241268197581e-06, + "loss": 0.5667, + "step": 5777 + }, + { + "epoch": 0.5274785466496257, + "grad_norm": 0.48043420910835266, + "learning_rate": 4.908209138167738e-06, + "loss": 0.5498, + "step": 5778 + }, + { + "epoch": 0.5275698375022823, + "grad_norm": 0.4760023355484009, + "learning_rate": 4.9081770026187915e-06, + "loss": 0.5905, + "step": 5779 + }, + { + "epoch": 0.5276611283549388, + "grad_norm": 0.4807477593421936, + "learning_rate": 4.908144861550815e-06, + "loss": 0.5704, + "step": 5780 + }, + { + "epoch": 0.5277524192075954, + "grad_norm": 0.46546483039855957, + "learning_rate": 4.908112714963882e-06, + "loss": 0.6093, + "step": 5781 + }, + { + "epoch": 0.527843710060252, + "grad_norm": 0.4901910424232483, + "learning_rate": 4.9080805628580665e-06, + "loss": 0.5478, + "step": 5782 + }, + { + "epoch": 0.5279350009129086, + "grad_norm": 0.4726967513561249, + "learning_rate": 4.908048405233442e-06, + "loss": 0.5818, + "step": 5783 + }, + { + "epoch": 0.528026291765565, + "grad_norm": 0.4695385992527008, + "learning_rate": 4.9080162420900825e-06, + "loss": 0.6214, + "step": 5784 + }, + { + "epoch": 0.5281175826182216, + "grad_norm": 0.5147644877433777, + "learning_rate": 4.907984073428061e-06, + "loss": 0.5593, + "step": 5785 + }, + { + "epoch": 0.5282088734708782, + "grad_norm": 0.46989133954048157, + "learning_rate": 4.907951899247453e-06, + "loss": 0.58, + "step": 5786 + }, + { + "epoch": 0.5283001643235348, + "grad_norm": 0.46687111258506775, + "learning_rate": 4.9079197195483294e-06, + "loss": 0.5579, + "step": 5787 + }, + { + "epoch": 0.5283914551761913, + "grad_norm": 0.4710283875465393, + "learning_rate": 4.9078875343307655e-06, + "loss": 0.57, + "step": 5788 + }, + { + "epoch": 0.5284827460288479, + "grad_norm": 0.48956722021102905, + "learning_rate": 4.907855343594836e-06, + "loss": 0.5881, + "step": 5789 + }, + { + "epoch": 0.5285740368815045, + "grad_norm": 0.44624173641204834, + "learning_rate": 4.907823147340613e-06, + "loss": 0.6169, + "step": 5790 + }, + { + "epoch": 0.5286653277341611, + "grad_norm": 0.4847813844680786, + "learning_rate": 4.907790945568171e-06, + "loss": 0.5857, + "step": 5791 + }, + { + "epoch": 0.5287566185868176, + "grad_norm": 0.4732199013233185, + "learning_rate": 4.9077587382775844e-06, + "loss": 0.5874, + "step": 5792 + }, + { + "epoch": 0.5288479094394741, + "grad_norm": 0.46508002281188965, + "learning_rate": 4.907726525468925e-06, + "loss": 0.5641, + "step": 5793 + }, + { + "epoch": 0.5289392002921307, + "grad_norm": 0.4888749420642853, + "learning_rate": 4.9076943071422704e-06, + "loss": 0.5615, + "step": 5794 + }, + { + "epoch": 0.5290304911447873, + "grad_norm": 0.47634175419807434, + "learning_rate": 4.90766208329769e-06, + "loss": 0.562, + "step": 5795 + }, + { + "epoch": 0.5291217819974439, + "grad_norm": 0.4842388927936554, + "learning_rate": 4.907629853935261e-06, + "loss": 0.6019, + "step": 5796 + }, + { + "epoch": 0.5292130728501004, + "grad_norm": 0.5126813650131226, + "learning_rate": 4.907597619055055e-06, + "loss": 0.5452, + "step": 5797 + }, + { + "epoch": 0.529304363702757, + "grad_norm": 0.47978246212005615, + "learning_rate": 4.907565378657147e-06, + "loss": 0.5597, + "step": 5798 + }, + { + "epoch": 0.5293956545554136, + "grad_norm": 0.4822239577770233, + "learning_rate": 4.907533132741611e-06, + "loss": 0.584, + "step": 5799 + }, + { + "epoch": 0.5294869454080701, + "grad_norm": 0.5252254009246826, + "learning_rate": 4.90750088130852e-06, + "loss": 0.5362, + "step": 5800 + }, + { + "epoch": 0.5295782362607266, + "grad_norm": 0.47637826204299927, + "learning_rate": 4.907468624357949e-06, + "loss": 0.5876, + "step": 5801 + }, + { + "epoch": 0.5296695271133832, + "grad_norm": 0.4993629455566406, + "learning_rate": 4.907436361889972e-06, + "loss": 0.5437, + "step": 5802 + }, + { + "epoch": 0.5297608179660398, + "grad_norm": 0.4642774164676666, + "learning_rate": 4.907404093904662e-06, + "loss": 0.5608, + "step": 5803 + }, + { + "epoch": 0.5298521088186964, + "grad_norm": 0.4764350652694702, + "learning_rate": 4.907371820402094e-06, + "loss": 0.5761, + "step": 5804 + }, + { + "epoch": 0.529943399671353, + "grad_norm": 0.4617486298084259, + "learning_rate": 4.90733954138234e-06, + "loss": 0.5965, + "step": 5805 + }, + { + "epoch": 0.5300346905240095, + "grad_norm": 0.48675668239593506, + "learning_rate": 4.907307256845476e-06, + "loss": 0.5446, + "step": 5806 + }, + { + "epoch": 0.5301259813766661, + "grad_norm": 0.4992905855178833, + "learning_rate": 4.907274966791576e-06, + "loss": 0.608, + "step": 5807 + }, + { + "epoch": 0.5302172722293226, + "grad_norm": 0.48017561435699463, + "learning_rate": 4.907242671220712e-06, + "loss": 0.5789, + "step": 5808 + }, + { + "epoch": 0.5303085630819792, + "grad_norm": 0.45244982838630676, + "learning_rate": 4.90721037013296e-06, + "loss": 0.6366, + "step": 5809 + }, + { + "epoch": 0.5303998539346357, + "grad_norm": 0.46818244457244873, + "learning_rate": 4.907178063528393e-06, + "loss": 0.6244, + "step": 5810 + }, + { + "epoch": 0.5304911447872923, + "grad_norm": 0.5269179344177246, + "learning_rate": 4.907145751407086e-06, + "loss": 0.5208, + "step": 5811 + }, + { + "epoch": 0.5305824356399489, + "grad_norm": 0.4431898593902588, + "learning_rate": 4.907113433769112e-06, + "loss": 0.5842, + "step": 5812 + }, + { + "epoch": 0.5306737264926055, + "grad_norm": 0.48290377855300903, + "learning_rate": 4.9070811106145456e-06, + "loss": 0.5876, + "step": 5813 + }, + { + "epoch": 0.530765017345262, + "grad_norm": 0.4785521328449249, + "learning_rate": 4.9070487819434605e-06, + "loss": 0.6331, + "step": 5814 + }, + { + "epoch": 0.5308563081979185, + "grad_norm": 0.48731598258018494, + "learning_rate": 4.907016447755931e-06, + "loss": 0.5533, + "step": 5815 + }, + { + "epoch": 0.5309475990505751, + "grad_norm": 0.45906057953834534, + "learning_rate": 4.906984108052032e-06, + "loss": 0.6018, + "step": 5816 + }, + { + "epoch": 0.5310388899032317, + "grad_norm": 0.44254282116889954, + "learning_rate": 4.906951762831836e-06, + "loss": 0.5598, + "step": 5817 + }, + { + "epoch": 0.5311301807558882, + "grad_norm": 0.4852150082588196, + "learning_rate": 4.906919412095419e-06, + "loss": 0.5743, + "step": 5818 + }, + { + "epoch": 0.5312214716085448, + "grad_norm": 0.4499174952507019, + "learning_rate": 4.906887055842853e-06, + "loss": 0.6435, + "step": 5819 + }, + { + "epoch": 0.5313127624612014, + "grad_norm": 0.4822360873222351, + "learning_rate": 4.906854694074214e-06, + "loss": 0.5325, + "step": 5820 + }, + { + "epoch": 0.531404053313858, + "grad_norm": 0.46309328079223633, + "learning_rate": 4.906822326789575e-06, + "loss": 0.5964, + "step": 5821 + }, + { + "epoch": 0.5314953441665146, + "grad_norm": 0.4626096785068512, + "learning_rate": 4.906789953989011e-06, + "loss": 0.5478, + "step": 5822 + }, + { + "epoch": 0.531586635019171, + "grad_norm": 0.4705471694469452, + "learning_rate": 4.906757575672597e-06, + "loss": 0.5883, + "step": 5823 + }, + { + "epoch": 0.5316779258718276, + "grad_norm": 0.4862442910671234, + "learning_rate": 4.9067251918404045e-06, + "loss": 0.5632, + "step": 5824 + }, + { + "epoch": 0.5317692167244842, + "grad_norm": 0.47192490100860596, + "learning_rate": 4.90669280249251e-06, + "loss": 0.5473, + "step": 5825 + }, + { + "epoch": 0.5318605075771408, + "grad_norm": 0.4672238528728485, + "learning_rate": 4.906660407628986e-06, + "loss": 0.5656, + "step": 5826 + }, + { + "epoch": 0.5319517984297973, + "grad_norm": 0.500726580619812, + "learning_rate": 4.906628007249909e-06, + "loss": 0.5561, + "step": 5827 + }, + { + "epoch": 0.5320430892824539, + "grad_norm": 0.49744608998298645, + "learning_rate": 4.906595601355352e-06, + "loss": 0.5594, + "step": 5828 + }, + { + "epoch": 0.5321343801351105, + "grad_norm": 0.4905274212360382, + "learning_rate": 4.906563189945388e-06, + "loss": 0.5707, + "step": 5829 + }, + { + "epoch": 0.5322256709877671, + "grad_norm": 0.5417768359184265, + "learning_rate": 4.906530773020093e-06, + "loss": 0.5202, + "step": 5830 + }, + { + "epoch": 0.5323169618404235, + "grad_norm": 0.47582390904426575, + "learning_rate": 4.906498350579541e-06, + "loss": 0.5574, + "step": 5831 + }, + { + "epoch": 0.5324082526930801, + "grad_norm": 0.5115544199943542, + "learning_rate": 4.906465922623806e-06, + "loss": 0.566, + "step": 5832 + }, + { + "epoch": 0.5324995435457367, + "grad_norm": 0.4619791507720947, + "learning_rate": 4.906433489152963e-06, + "loss": 0.5387, + "step": 5833 + }, + { + "epoch": 0.5325908343983933, + "grad_norm": 0.46254193782806396, + "learning_rate": 4.906401050167085e-06, + "loss": 0.5892, + "step": 5834 + }, + { + "epoch": 0.5326821252510499, + "grad_norm": 0.5255115628242493, + "learning_rate": 4.906368605666249e-06, + "loss": 0.5642, + "step": 5835 + }, + { + "epoch": 0.5327734161037064, + "grad_norm": 0.47006163001060486, + "learning_rate": 4.9063361556505254e-06, + "loss": 0.5878, + "step": 5836 + }, + { + "epoch": 0.532864706956363, + "grad_norm": 0.4568566381931305, + "learning_rate": 4.906303700119991e-06, + "loss": 0.571, + "step": 5837 + }, + { + "epoch": 0.5329559978090196, + "grad_norm": 0.4839192032814026, + "learning_rate": 4.9062712390747205e-06, + "loss": 0.5712, + "step": 5838 + }, + { + "epoch": 0.533047288661676, + "grad_norm": 0.49265047907829285, + "learning_rate": 4.906238772514787e-06, + "loss": 0.5793, + "step": 5839 + }, + { + "epoch": 0.5331385795143326, + "grad_norm": 0.46779245138168335, + "learning_rate": 4.906206300440266e-06, + "loss": 0.5527, + "step": 5840 + }, + { + "epoch": 0.5332298703669892, + "grad_norm": 0.45020902156829834, + "learning_rate": 4.9061738228512315e-06, + "loss": 0.61, + "step": 5841 + }, + { + "epoch": 0.5333211612196458, + "grad_norm": 0.489759236574173, + "learning_rate": 4.906141339747758e-06, + "loss": 0.5853, + "step": 5842 + }, + { + "epoch": 0.5334124520723024, + "grad_norm": 0.43379849195480347, + "learning_rate": 4.906108851129919e-06, + "loss": 0.5687, + "step": 5843 + }, + { + "epoch": 0.533503742924959, + "grad_norm": 0.48126929998397827, + "learning_rate": 4.90607635699779e-06, + "loss": 0.5419, + "step": 5844 + }, + { + "epoch": 0.5335950337776155, + "grad_norm": 0.49213069677352905, + "learning_rate": 4.906043857351446e-06, + "loss": 0.542, + "step": 5845 + }, + { + "epoch": 0.5336863246302721, + "grad_norm": 0.4949229061603546, + "learning_rate": 4.90601135219096e-06, + "loss": 0.5803, + "step": 5846 + }, + { + "epoch": 0.5337776154829286, + "grad_norm": 0.4708085358142853, + "learning_rate": 4.905978841516407e-06, + "loss": 0.5638, + "step": 5847 + }, + { + "epoch": 0.5338689063355851, + "grad_norm": 0.46441060304641724, + "learning_rate": 4.905946325327863e-06, + "loss": 0.5733, + "step": 5848 + }, + { + "epoch": 0.5339601971882417, + "grad_norm": 0.4660014510154724, + "learning_rate": 4.9059138036254e-06, + "loss": 0.5423, + "step": 5849 + }, + { + "epoch": 0.5340514880408983, + "grad_norm": 0.5289700031280518, + "learning_rate": 4.905881276409094e-06, + "loss": 0.5837, + "step": 5850 + }, + { + "epoch": 0.5341427788935549, + "grad_norm": 0.47595223784446716, + "learning_rate": 4.9058487436790206e-06, + "loss": 0.5834, + "step": 5851 + }, + { + "epoch": 0.5342340697462115, + "grad_norm": 0.44571202993392944, + "learning_rate": 4.905816205435251e-06, + "loss": 0.603, + "step": 5852 + }, + { + "epoch": 0.534325360598868, + "grad_norm": 0.4639834761619568, + "learning_rate": 4.905783661677864e-06, + "loss": 0.5958, + "step": 5853 + }, + { + "epoch": 0.5344166514515245, + "grad_norm": 0.4756050705909729, + "learning_rate": 4.905751112406931e-06, + "loss": 0.5552, + "step": 5854 + }, + { + "epoch": 0.5345079423041811, + "grad_norm": 0.4981870651245117, + "learning_rate": 4.9057185576225275e-06, + "loss": 0.6047, + "step": 5855 + }, + { + "epoch": 0.5345992331568377, + "grad_norm": 0.47837987542152405, + "learning_rate": 4.905685997324728e-06, + "loss": 0.5635, + "step": 5856 + }, + { + "epoch": 0.5346905240094942, + "grad_norm": 0.4565769135951996, + "learning_rate": 4.905653431513608e-06, + "loss": 0.5666, + "step": 5857 + }, + { + "epoch": 0.5347818148621508, + "grad_norm": 0.4708888530731201, + "learning_rate": 4.9056208601892415e-06, + "loss": 0.6121, + "step": 5858 + }, + { + "epoch": 0.5348731057148074, + "grad_norm": 0.44803526997566223, + "learning_rate": 4.905588283351703e-06, + "loss": 0.5656, + "step": 5859 + }, + { + "epoch": 0.534964396567464, + "grad_norm": 0.4941561222076416, + "learning_rate": 4.9055557010010675e-06, + "loss": 0.556, + "step": 5860 + }, + { + "epoch": 0.5350556874201206, + "grad_norm": 0.48035523295402527, + "learning_rate": 4.905523113137409e-06, + "loss": 0.5404, + "step": 5861 + }, + { + "epoch": 0.535146978272777, + "grad_norm": 0.47108563780784607, + "learning_rate": 4.905490519760803e-06, + "loss": 0.6031, + "step": 5862 + }, + { + "epoch": 0.5352382691254336, + "grad_norm": 0.48104897141456604, + "learning_rate": 4.905457920871324e-06, + "loss": 0.5405, + "step": 5863 + }, + { + "epoch": 0.5353295599780902, + "grad_norm": 0.45397433638572693, + "learning_rate": 4.905425316469047e-06, + "loss": 0.5763, + "step": 5864 + }, + { + "epoch": 0.5354208508307468, + "grad_norm": 0.49347996711730957, + "learning_rate": 4.905392706554045e-06, + "loss": 0.5546, + "step": 5865 + }, + { + "epoch": 0.5355121416834033, + "grad_norm": 0.46660086512565613, + "learning_rate": 4.905360091126395e-06, + "loss": 0.5895, + "step": 5866 + }, + { + "epoch": 0.5356034325360599, + "grad_norm": 0.4632158875465393, + "learning_rate": 4.905327470186171e-06, + "loss": 0.6033, + "step": 5867 + }, + { + "epoch": 0.5356947233887165, + "grad_norm": 0.504510760307312, + "learning_rate": 4.905294843733447e-06, + "loss": 0.5959, + "step": 5868 + }, + { + "epoch": 0.5357860142413731, + "grad_norm": 0.4743782877922058, + "learning_rate": 4.905262211768298e-06, + "loss": 0.5943, + "step": 5869 + }, + { + "epoch": 0.5358773050940295, + "grad_norm": 0.5023870468139648, + "learning_rate": 4.9052295742908e-06, + "loss": 0.5486, + "step": 5870 + }, + { + "epoch": 0.5359685959466861, + "grad_norm": 0.4683002233505249, + "learning_rate": 4.905196931301026e-06, + "loss": 0.6498, + "step": 5871 + }, + { + "epoch": 0.5360598867993427, + "grad_norm": 0.49211397767066956, + "learning_rate": 4.905164282799052e-06, + "loss": 0.5755, + "step": 5872 + }, + { + "epoch": 0.5361511776519993, + "grad_norm": 0.48010674118995667, + "learning_rate": 4.905131628784953e-06, + "loss": 0.5631, + "step": 5873 + }, + { + "epoch": 0.5362424685046558, + "grad_norm": 0.49895572662353516, + "learning_rate": 4.905098969258803e-06, + "loss": 0.5486, + "step": 5874 + }, + { + "epoch": 0.5363337593573124, + "grad_norm": 0.5009551048278809, + "learning_rate": 4.905066304220678e-06, + "loss": 0.5078, + "step": 5875 + }, + { + "epoch": 0.536425050209969, + "grad_norm": 0.4559335708618164, + "learning_rate": 4.90503363367065e-06, + "loss": 0.5888, + "step": 5876 + }, + { + "epoch": 0.5365163410626256, + "grad_norm": 0.46491438150405884, + "learning_rate": 4.905000957608798e-06, + "loss": 0.5837, + "step": 5877 + }, + { + "epoch": 0.536607631915282, + "grad_norm": 0.497182160615921, + "learning_rate": 4.904968276035194e-06, + "loss": 0.5599, + "step": 5878 + }, + { + "epoch": 0.5366989227679386, + "grad_norm": 0.5019450187683105, + "learning_rate": 4.904935588949914e-06, + "loss": 0.5618, + "step": 5879 + }, + { + "epoch": 0.5367902136205952, + "grad_norm": 0.5253636837005615, + "learning_rate": 4.904902896353033e-06, + "loss": 0.5436, + "step": 5880 + }, + { + "epoch": 0.5368815044732518, + "grad_norm": 0.4902653694152832, + "learning_rate": 4.904870198244625e-06, + "loss": 0.5636, + "step": 5881 + }, + { + "epoch": 0.5369727953259084, + "grad_norm": 0.4550890326499939, + "learning_rate": 4.9048374946247655e-06, + "loss": 0.5671, + "step": 5882 + }, + { + "epoch": 0.5370640861785649, + "grad_norm": 0.4702504575252533, + "learning_rate": 4.90480478549353e-06, + "loss": 0.5902, + "step": 5883 + }, + { + "epoch": 0.5371553770312215, + "grad_norm": 0.48717474937438965, + "learning_rate": 4.9047720708509926e-06, + "loss": 0.536, + "step": 5884 + }, + { + "epoch": 0.537246667883878, + "grad_norm": 0.5186755657196045, + "learning_rate": 4.904739350697229e-06, + "loss": 0.5812, + "step": 5885 + }, + { + "epoch": 0.5373379587365346, + "grad_norm": 0.4921596348285675, + "learning_rate": 4.904706625032313e-06, + "loss": 0.6039, + "step": 5886 + }, + { + "epoch": 0.5374292495891911, + "grad_norm": 0.47199544310569763, + "learning_rate": 4.9046738938563205e-06, + "loss": 0.5722, + "step": 5887 + }, + { + "epoch": 0.5375205404418477, + "grad_norm": 0.5153874754905701, + "learning_rate": 4.904641157169327e-06, + "loss": 0.5215, + "step": 5888 + }, + { + "epoch": 0.5376118312945043, + "grad_norm": 0.49946990609169006, + "learning_rate": 4.904608414971406e-06, + "loss": 0.5436, + "step": 5889 + }, + { + "epoch": 0.5377031221471609, + "grad_norm": 0.47558102011680603, + "learning_rate": 4.904575667262634e-06, + "loss": 0.574, + "step": 5890 + }, + { + "epoch": 0.5377944129998175, + "grad_norm": 0.499635249376297, + "learning_rate": 4.904542914043086e-06, + "loss": 0.5621, + "step": 5891 + }, + { + "epoch": 0.537885703852474, + "grad_norm": 0.4730187654495239, + "learning_rate": 4.904510155312836e-06, + "loss": 0.5944, + "step": 5892 + }, + { + "epoch": 0.5379769947051305, + "grad_norm": 0.4855491816997528, + "learning_rate": 4.90447739107196e-06, + "loss": 0.5643, + "step": 5893 + }, + { + "epoch": 0.5380682855577871, + "grad_norm": 0.4692867398262024, + "learning_rate": 4.9044446213205325e-06, + "loss": 0.5804, + "step": 5894 + }, + { + "epoch": 0.5381595764104437, + "grad_norm": 0.4683692753314972, + "learning_rate": 4.904411846058629e-06, + "loss": 0.5802, + "step": 5895 + }, + { + "epoch": 0.5382508672631002, + "grad_norm": 0.4796103835105896, + "learning_rate": 4.904379065286324e-06, + "loss": 0.5939, + "step": 5896 + }, + { + "epoch": 0.5383421581157568, + "grad_norm": 0.4842425584793091, + "learning_rate": 4.9043462790036935e-06, + "loss": 0.5862, + "step": 5897 + }, + { + "epoch": 0.5384334489684134, + "grad_norm": 0.4809947609901428, + "learning_rate": 4.904313487210812e-06, + "loss": 0.5819, + "step": 5898 + }, + { + "epoch": 0.53852473982107, + "grad_norm": 0.4862591326236725, + "learning_rate": 4.904280689907755e-06, + "loss": 0.5409, + "step": 5899 + }, + { + "epoch": 0.5386160306737265, + "grad_norm": 0.48474565148353577, + "learning_rate": 4.9042478870945985e-06, + "loss": 0.5729, + "step": 5900 + }, + { + "epoch": 0.538707321526383, + "grad_norm": 0.44200628995895386, + "learning_rate": 4.904215078771415e-06, + "loss": 0.5804, + "step": 5901 + }, + { + "epoch": 0.5387986123790396, + "grad_norm": 0.4871337413787842, + "learning_rate": 4.904182264938283e-06, + "loss": 0.5477, + "step": 5902 + }, + { + "epoch": 0.5388899032316962, + "grad_norm": 0.48107022047042847, + "learning_rate": 4.904149445595275e-06, + "loss": 0.5568, + "step": 5903 + }, + { + "epoch": 0.5389811940843527, + "grad_norm": 0.47277355194091797, + "learning_rate": 4.9041166207424675e-06, + "loss": 0.5591, + "step": 5904 + }, + { + "epoch": 0.5390724849370093, + "grad_norm": 0.4594009816646576, + "learning_rate": 4.904083790379937e-06, + "loss": 0.61, + "step": 5905 + }, + { + "epoch": 0.5391637757896659, + "grad_norm": 0.5111934542655945, + "learning_rate": 4.9040509545077555e-06, + "loss": 0.5186, + "step": 5906 + }, + { + "epoch": 0.5392550666423225, + "grad_norm": 0.5352241396903992, + "learning_rate": 4.904018113126e-06, + "loss": 0.5446, + "step": 5907 + }, + { + "epoch": 0.5393463574949791, + "grad_norm": 0.4532737731933594, + "learning_rate": 4.903985266234746e-06, + "loss": 0.5942, + "step": 5908 + }, + { + "epoch": 0.5394376483476355, + "grad_norm": 0.47609943151474, + "learning_rate": 4.90395241383407e-06, + "loss": 0.5453, + "step": 5909 + }, + { + "epoch": 0.5395289392002921, + "grad_norm": 0.497300922870636, + "learning_rate": 4.903919555924045e-06, + "loss": 0.5245, + "step": 5910 + }, + { + "epoch": 0.5396202300529487, + "grad_norm": 0.4743969738483429, + "learning_rate": 4.903886692504747e-06, + "loss": 0.6264, + "step": 5911 + }, + { + "epoch": 0.5397115209056053, + "grad_norm": 0.47086986899375916, + "learning_rate": 4.903853823576251e-06, + "loss": 0.5371, + "step": 5912 + }, + { + "epoch": 0.5398028117582618, + "grad_norm": 0.498851478099823, + "learning_rate": 4.903820949138633e-06, + "loss": 0.6078, + "step": 5913 + }, + { + "epoch": 0.5398941026109184, + "grad_norm": 0.471200555562973, + "learning_rate": 4.903788069191969e-06, + "loss": 0.5752, + "step": 5914 + }, + { + "epoch": 0.539985393463575, + "grad_norm": 0.4758981168270111, + "learning_rate": 4.9037551837363335e-06, + "loss": 0.5631, + "step": 5915 + }, + { + "epoch": 0.5400766843162315, + "grad_norm": 0.49806368350982666, + "learning_rate": 4.903722292771801e-06, + "loss": 0.5692, + "step": 5916 + }, + { + "epoch": 0.540167975168888, + "grad_norm": 0.47880393266677856, + "learning_rate": 4.903689396298449e-06, + "loss": 0.5545, + "step": 5917 + }, + { + "epoch": 0.5402592660215446, + "grad_norm": 0.4932059347629547, + "learning_rate": 4.903656494316351e-06, + "loss": 0.5641, + "step": 5918 + }, + { + "epoch": 0.5403505568742012, + "grad_norm": 0.4992564022541046, + "learning_rate": 4.9036235868255824e-06, + "loss": 0.5495, + "step": 5919 + }, + { + "epoch": 0.5404418477268578, + "grad_norm": 0.5113099813461304, + "learning_rate": 4.90359067382622e-06, + "loss": 0.5821, + "step": 5920 + }, + { + "epoch": 0.5405331385795143, + "grad_norm": 0.4771302044391632, + "learning_rate": 4.903557755318339e-06, + "loss": 0.5561, + "step": 5921 + }, + { + "epoch": 0.5406244294321709, + "grad_norm": 0.4967689514160156, + "learning_rate": 4.9035248313020135e-06, + "loss": 0.5518, + "step": 5922 + }, + { + "epoch": 0.5407157202848275, + "grad_norm": 0.4762413501739502, + "learning_rate": 4.9034919017773195e-06, + "loss": 0.6076, + "step": 5923 + }, + { + "epoch": 0.540807011137484, + "grad_norm": 0.5014167428016663, + "learning_rate": 4.903458966744334e-06, + "loss": 0.569, + "step": 5924 + }, + { + "epoch": 0.5408983019901406, + "grad_norm": 0.4685235321521759, + "learning_rate": 4.903426026203131e-06, + "loss": 0.5518, + "step": 5925 + }, + { + "epoch": 0.5409895928427971, + "grad_norm": 0.4539291262626648, + "learning_rate": 4.903393080153785e-06, + "loss": 0.5789, + "step": 5926 + }, + { + "epoch": 0.5410808836954537, + "grad_norm": 0.4436762034893036, + "learning_rate": 4.903360128596374e-06, + "loss": 0.6209, + "step": 5927 + }, + { + "epoch": 0.5411721745481103, + "grad_norm": 0.47250568866729736, + "learning_rate": 4.903327171530972e-06, + "loss": 0.5874, + "step": 5928 + }, + { + "epoch": 0.5412634654007669, + "grad_norm": 0.5157903432846069, + "learning_rate": 4.903294208957655e-06, + "loss": 0.562, + "step": 5929 + }, + { + "epoch": 0.5413547562534234, + "grad_norm": 0.47426384687423706, + "learning_rate": 4.903261240876498e-06, + "loss": 0.5871, + "step": 5930 + }, + { + "epoch": 0.54144604710608, + "grad_norm": 0.4948837459087372, + "learning_rate": 4.903228267287576e-06, + "loss": 0.5338, + "step": 5931 + }, + { + "epoch": 0.5415373379587365, + "grad_norm": 0.5003277063369751, + "learning_rate": 4.9031952881909676e-06, + "loss": 0.5439, + "step": 5932 + }, + { + "epoch": 0.5416286288113931, + "grad_norm": 0.48600253462791443, + "learning_rate": 4.903162303586745e-06, + "loss": 0.5648, + "step": 5933 + }, + { + "epoch": 0.5417199196640496, + "grad_norm": 0.4563522934913635, + "learning_rate": 4.903129313474984e-06, + "loss": 0.5936, + "step": 5934 + }, + { + "epoch": 0.5418112105167062, + "grad_norm": 0.4940425157546997, + "learning_rate": 4.903096317855763e-06, + "loss": 0.5426, + "step": 5935 + }, + { + "epoch": 0.5419025013693628, + "grad_norm": 0.4799513518810272, + "learning_rate": 4.903063316729156e-06, + "loss": 0.5719, + "step": 5936 + }, + { + "epoch": 0.5419937922220194, + "grad_norm": 0.45075324177742004, + "learning_rate": 4.903030310095237e-06, + "loss": 0.5615, + "step": 5937 + }, + { + "epoch": 0.542085083074676, + "grad_norm": 0.48568597435951233, + "learning_rate": 4.902997297954084e-06, + "loss": 0.577, + "step": 5938 + }, + { + "epoch": 0.5421763739273325, + "grad_norm": 0.48321205377578735, + "learning_rate": 4.902964280305772e-06, + "loss": 0.5621, + "step": 5939 + }, + { + "epoch": 0.542267664779989, + "grad_norm": 0.5134565830230713, + "learning_rate": 4.902931257150377e-06, + "loss": 0.5414, + "step": 5940 + }, + { + "epoch": 0.5423589556326456, + "grad_norm": 0.49332234263420105, + "learning_rate": 4.902898228487972e-06, + "loss": 0.5577, + "step": 5941 + }, + { + "epoch": 0.5424502464853022, + "grad_norm": 0.4640614688396454, + "learning_rate": 4.902865194318637e-06, + "loss": 0.6049, + "step": 5942 + }, + { + "epoch": 0.5425415373379587, + "grad_norm": 0.4610499143600464, + "learning_rate": 4.902832154642444e-06, + "loss": 0.5414, + "step": 5943 + }, + { + "epoch": 0.5426328281906153, + "grad_norm": 0.5062588453292847, + "learning_rate": 4.902799109459471e-06, + "loss": 0.5758, + "step": 5944 + }, + { + "epoch": 0.5427241190432719, + "grad_norm": 0.48485320806503296, + "learning_rate": 4.902766058769793e-06, + "loss": 0.562, + "step": 5945 + }, + { + "epoch": 0.5428154098959285, + "grad_norm": 0.48698151111602783, + "learning_rate": 4.902733002573487e-06, + "loss": 0.6277, + "step": 5946 + }, + { + "epoch": 0.542906700748585, + "grad_norm": 0.5038280487060547, + "learning_rate": 4.902699940870625e-06, + "loss": 0.5537, + "step": 5947 + }, + { + "epoch": 0.5429979916012415, + "grad_norm": 0.49403637647628784, + "learning_rate": 4.902666873661287e-06, + "loss": 0.5521, + "step": 5948 + }, + { + "epoch": 0.5430892824538981, + "grad_norm": 0.468044638633728, + "learning_rate": 4.902633800945546e-06, + "loss": 0.581, + "step": 5949 + }, + { + "epoch": 0.5431805733065547, + "grad_norm": 0.45970964431762695, + "learning_rate": 4.902600722723481e-06, + "loss": 0.5605, + "step": 5950 + }, + { + "epoch": 0.5432718641592112, + "grad_norm": 0.48485246300697327, + "learning_rate": 4.902567638995164e-06, + "loss": 0.5577, + "step": 5951 + }, + { + "epoch": 0.5433631550118678, + "grad_norm": 0.4596354365348816, + "learning_rate": 4.902534549760673e-06, + "loss": 0.5848, + "step": 5952 + }, + { + "epoch": 0.5434544458645244, + "grad_norm": 0.4843301475048065, + "learning_rate": 4.902501455020082e-06, + "loss": 0.5118, + "step": 5953 + }, + { + "epoch": 0.543545736717181, + "grad_norm": 0.4566968083381653, + "learning_rate": 4.90246835477347e-06, + "loss": 0.6083, + "step": 5954 + }, + { + "epoch": 0.5436370275698374, + "grad_norm": 0.46726274490356445, + "learning_rate": 4.90243524902091e-06, + "loss": 0.5657, + "step": 5955 + }, + { + "epoch": 0.543728318422494, + "grad_norm": 0.44176003336906433, + "learning_rate": 4.902402137762479e-06, + "loss": 0.5705, + "step": 5956 + }, + { + "epoch": 0.5438196092751506, + "grad_norm": 0.4606260657310486, + "learning_rate": 4.9023690209982534e-06, + "loss": 0.5705, + "step": 5957 + }, + { + "epoch": 0.5439109001278072, + "grad_norm": 0.4961074888706207, + "learning_rate": 4.902335898728308e-06, + "loss": 0.5296, + "step": 5958 + }, + { + "epoch": 0.5440021909804638, + "grad_norm": 0.46639353036880493, + "learning_rate": 4.902302770952719e-06, + "loss": 0.6046, + "step": 5959 + }, + { + "epoch": 0.5440934818331203, + "grad_norm": 0.44504350423812866, + "learning_rate": 4.902269637671563e-06, + "loss": 0.5936, + "step": 5960 + }, + { + "epoch": 0.5441847726857769, + "grad_norm": 0.4559718072414398, + "learning_rate": 4.902236498884915e-06, + "loss": 0.5782, + "step": 5961 + }, + { + "epoch": 0.5442760635384335, + "grad_norm": 0.4772774279117584, + "learning_rate": 4.902203354592851e-06, + "loss": 0.57, + "step": 5962 + }, + { + "epoch": 0.54436735439109, + "grad_norm": 0.48089441657066345, + "learning_rate": 4.902170204795448e-06, + "loss": 0.5715, + "step": 5963 + }, + { + "epoch": 0.5444586452437465, + "grad_norm": 0.4845460057258606, + "learning_rate": 4.902137049492782e-06, + "loss": 0.5872, + "step": 5964 + }, + { + "epoch": 0.5445499360964031, + "grad_norm": 0.4920034110546112, + "learning_rate": 4.902103888684927e-06, + "loss": 0.5797, + "step": 5965 + }, + { + "epoch": 0.5446412269490597, + "grad_norm": 0.44497278332710266, + "learning_rate": 4.902070722371961e-06, + "loss": 0.5922, + "step": 5966 + }, + { + "epoch": 0.5447325178017163, + "grad_norm": 0.47445186972618103, + "learning_rate": 4.902037550553959e-06, + "loss": 0.588, + "step": 5967 + }, + { + "epoch": 0.5448238086543729, + "grad_norm": 0.46581369638442993, + "learning_rate": 4.902004373230997e-06, + "loss": 0.5807, + "step": 5968 + }, + { + "epoch": 0.5449150995070294, + "grad_norm": 0.46216070652008057, + "learning_rate": 4.901971190403153e-06, + "loss": 0.5999, + "step": 5969 + }, + { + "epoch": 0.545006390359686, + "grad_norm": 0.4715438783168793, + "learning_rate": 4.9019380020705e-06, + "loss": 0.5435, + "step": 5970 + }, + { + "epoch": 0.5450976812123425, + "grad_norm": 0.48366665840148926, + "learning_rate": 4.901904808233116e-06, + "loss": 0.5668, + "step": 5971 + }, + { + "epoch": 0.5451889720649991, + "grad_norm": 0.48758235573768616, + "learning_rate": 4.901871608891077e-06, + "loss": 0.5601, + "step": 5972 + }, + { + "epoch": 0.5452802629176556, + "grad_norm": 0.44441747665405273, + "learning_rate": 4.9018384040444576e-06, + "loss": 0.6159, + "step": 5973 + }, + { + "epoch": 0.5453715537703122, + "grad_norm": 0.4965152442455292, + "learning_rate": 4.901805193693335e-06, + "loss": 0.5646, + "step": 5974 + }, + { + "epoch": 0.5454628446229688, + "grad_norm": 0.4933408498764038, + "learning_rate": 4.901771977837786e-06, + "loss": 0.5165, + "step": 5975 + }, + { + "epoch": 0.5455541354756254, + "grad_norm": 0.4460979104042053, + "learning_rate": 4.901738756477885e-06, + "loss": 0.6034, + "step": 5976 + }, + { + "epoch": 0.545645426328282, + "grad_norm": 0.47806379199028015, + "learning_rate": 4.901705529613711e-06, + "loss": 0.5452, + "step": 5977 + }, + { + "epoch": 0.5457367171809385, + "grad_norm": 0.4844917356967926, + "learning_rate": 4.901672297245336e-06, + "loss": 0.5678, + "step": 5978 + }, + { + "epoch": 0.545828008033595, + "grad_norm": 0.4788999855518341, + "learning_rate": 4.9016390593728395e-06, + "loss": 0.557, + "step": 5979 + }, + { + "epoch": 0.5459192988862516, + "grad_norm": 0.4810718297958374, + "learning_rate": 4.9016058159962965e-06, + "loss": 0.5812, + "step": 5980 + }, + { + "epoch": 0.5460105897389081, + "grad_norm": 0.5018497705459595, + "learning_rate": 4.901572567115783e-06, + "loss": 0.5553, + "step": 5981 + }, + { + "epoch": 0.5461018805915647, + "grad_norm": 0.48090916872024536, + "learning_rate": 4.901539312731376e-06, + "loss": 0.577, + "step": 5982 + }, + { + "epoch": 0.5461931714442213, + "grad_norm": 0.5206862688064575, + "learning_rate": 4.901506052843151e-06, + "loss": 0.5268, + "step": 5983 + }, + { + "epoch": 0.5462844622968779, + "grad_norm": 0.4759020507335663, + "learning_rate": 4.901472787451185e-06, + "loss": 0.5661, + "step": 5984 + }, + { + "epoch": 0.5463757531495345, + "grad_norm": 0.4686395525932312, + "learning_rate": 4.901439516555553e-06, + "loss": 0.5989, + "step": 5985 + }, + { + "epoch": 0.5464670440021909, + "grad_norm": 0.470386266708374, + "learning_rate": 4.901406240156333e-06, + "loss": 0.5883, + "step": 5986 + }, + { + "epoch": 0.5465583348548475, + "grad_norm": 0.4531656801700592, + "learning_rate": 4.901372958253599e-06, + "loss": 0.5843, + "step": 5987 + }, + { + "epoch": 0.5466496257075041, + "grad_norm": 0.4527405798435211, + "learning_rate": 4.901339670847428e-06, + "loss": 0.6408, + "step": 5988 + }, + { + "epoch": 0.5467409165601607, + "grad_norm": 0.4552024006843567, + "learning_rate": 4.901306377937897e-06, + "loss": 0.5955, + "step": 5989 + }, + { + "epoch": 0.5468322074128172, + "grad_norm": 0.4381793439388275, + "learning_rate": 4.901273079525083e-06, + "loss": 0.5785, + "step": 5990 + }, + { + "epoch": 0.5469234982654738, + "grad_norm": 0.4470898509025574, + "learning_rate": 4.9012397756090605e-06, + "loss": 0.6148, + "step": 5991 + }, + { + "epoch": 0.5470147891181304, + "grad_norm": 0.49351242184638977, + "learning_rate": 4.901206466189907e-06, + "loss": 0.5917, + "step": 5992 + }, + { + "epoch": 0.547106079970787, + "grad_norm": 0.49015188217163086, + "learning_rate": 4.901173151267698e-06, + "loss": 0.5574, + "step": 5993 + }, + { + "epoch": 0.5471973708234434, + "grad_norm": 0.5058554410934448, + "learning_rate": 4.901139830842511e-06, + "loss": 0.5601, + "step": 5994 + }, + { + "epoch": 0.5472886616761, + "grad_norm": 0.4971855580806732, + "learning_rate": 4.9011065049144215e-06, + "loss": 0.5705, + "step": 5995 + }, + { + "epoch": 0.5473799525287566, + "grad_norm": 0.4729192852973938, + "learning_rate": 4.901073173483506e-06, + "loss": 0.5808, + "step": 5996 + }, + { + "epoch": 0.5474712433814132, + "grad_norm": 0.44957593083381653, + "learning_rate": 4.9010398365498415e-06, + "loss": 0.5642, + "step": 5997 + }, + { + "epoch": 0.5475625342340698, + "grad_norm": 0.4665408730506897, + "learning_rate": 4.9010064941135035e-06, + "loss": 0.5628, + "step": 5998 + }, + { + "epoch": 0.5476538250867263, + "grad_norm": 0.4636821150779724, + "learning_rate": 4.900973146174568e-06, + "loss": 0.5842, + "step": 5999 + }, + { + "epoch": 0.5477451159393829, + "grad_norm": 0.46663692593574524, + "learning_rate": 4.900939792733114e-06, + "loss": 0.6175, + "step": 6000 + }, + { + "epoch": 0.5478364067920395, + "grad_norm": 0.49236324429512024, + "learning_rate": 4.900906433789215e-06, + "loss": 0.5128, + "step": 6001 + }, + { + "epoch": 0.547927697644696, + "grad_norm": 0.47482800483703613, + "learning_rate": 4.900873069342949e-06, + "loss": 0.5805, + "step": 6002 + }, + { + "epoch": 0.5480189884973525, + "grad_norm": 0.49044907093048096, + "learning_rate": 4.900839699394392e-06, + "loss": 0.5489, + "step": 6003 + }, + { + "epoch": 0.5481102793500091, + "grad_norm": 0.4931601285934448, + "learning_rate": 4.90080632394362e-06, + "loss": 0.5488, + "step": 6004 + }, + { + "epoch": 0.5482015702026657, + "grad_norm": 0.42615294456481934, + "learning_rate": 4.90077294299071e-06, + "loss": 0.6183, + "step": 6005 + }, + { + "epoch": 0.5482928610553223, + "grad_norm": 0.44954338669776917, + "learning_rate": 4.900739556535739e-06, + "loss": 0.5968, + "step": 6006 + }, + { + "epoch": 0.5483841519079788, + "grad_norm": 0.4682718515396118, + "learning_rate": 4.900706164578784e-06, + "loss": 0.5428, + "step": 6007 + }, + { + "epoch": 0.5484754427606354, + "grad_norm": 0.4662148058414459, + "learning_rate": 4.900672767119919e-06, + "loss": 0.5725, + "step": 6008 + }, + { + "epoch": 0.548566733613292, + "grad_norm": 0.4893726706504822, + "learning_rate": 4.900639364159223e-06, + "loss": 0.5559, + "step": 6009 + }, + { + "epoch": 0.5486580244659485, + "grad_norm": 0.48207613825798035, + "learning_rate": 4.9006059556967725e-06, + "loss": 0.5804, + "step": 6010 + }, + { + "epoch": 0.548749315318605, + "grad_norm": 0.4610663056373596, + "learning_rate": 4.900572541732642e-06, + "loss": 0.5898, + "step": 6011 + }, + { + "epoch": 0.5488406061712616, + "grad_norm": 0.46681350469589233, + "learning_rate": 4.9005391222669096e-06, + "loss": 0.5891, + "step": 6012 + }, + { + "epoch": 0.5489318970239182, + "grad_norm": 0.4837457239627838, + "learning_rate": 4.900505697299652e-06, + "loss": 0.6045, + "step": 6013 + }, + { + "epoch": 0.5490231878765748, + "grad_norm": 0.4835025370121002, + "learning_rate": 4.900472266830944e-06, + "loss": 0.5686, + "step": 6014 + }, + { + "epoch": 0.5491144787292314, + "grad_norm": 0.5052234530448914, + "learning_rate": 4.900438830860865e-06, + "loss": 0.5295, + "step": 6015 + }, + { + "epoch": 0.5492057695818879, + "grad_norm": 0.47670286893844604, + "learning_rate": 4.900405389389491e-06, + "loss": 0.6229, + "step": 6016 + }, + { + "epoch": 0.5492970604345444, + "grad_norm": 0.4633272588253021, + "learning_rate": 4.900371942416897e-06, + "loss": 0.5684, + "step": 6017 + }, + { + "epoch": 0.549388351287201, + "grad_norm": 0.4901862144470215, + "learning_rate": 4.90033848994316e-06, + "loss": 0.5617, + "step": 6018 + }, + { + "epoch": 0.5494796421398576, + "grad_norm": 0.4973548650741577, + "learning_rate": 4.900305031968357e-06, + "loss": 0.5366, + "step": 6019 + }, + { + "epoch": 0.5495709329925141, + "grad_norm": 0.4711710214614868, + "learning_rate": 4.900271568492566e-06, + "loss": 0.5811, + "step": 6020 + }, + { + "epoch": 0.5496622238451707, + "grad_norm": 0.4244866669178009, + "learning_rate": 4.900238099515863e-06, + "loss": 0.6088, + "step": 6021 + }, + { + "epoch": 0.5497535146978273, + "grad_norm": 0.4541514813899994, + "learning_rate": 4.900204625038323e-06, + "loss": 0.5616, + "step": 6022 + }, + { + "epoch": 0.5498448055504839, + "grad_norm": 0.47941187024116516, + "learning_rate": 4.900171145060025e-06, + "loss": 0.6197, + "step": 6023 + }, + { + "epoch": 0.5499360964031405, + "grad_norm": 0.4927237331867218, + "learning_rate": 4.900137659581044e-06, + "loss": 0.549, + "step": 6024 + }, + { + "epoch": 0.5500273872557969, + "grad_norm": 0.47940927743911743, + "learning_rate": 4.9001041686014575e-06, + "loss": 0.5643, + "step": 6025 + }, + { + "epoch": 0.5501186781084535, + "grad_norm": 0.4622943103313446, + "learning_rate": 4.900070672121343e-06, + "loss": 0.5701, + "step": 6026 + }, + { + "epoch": 0.5502099689611101, + "grad_norm": 0.432483434677124, + "learning_rate": 4.900037170140776e-06, + "loss": 0.6148, + "step": 6027 + }, + { + "epoch": 0.5503012598137667, + "grad_norm": 0.47884926199913025, + "learning_rate": 4.900003662659833e-06, + "loss": 0.5844, + "step": 6028 + }, + { + "epoch": 0.5503925506664232, + "grad_norm": 0.49342942237854004, + "learning_rate": 4.899970149678593e-06, + "loss": 0.5623, + "step": 6029 + }, + { + "epoch": 0.5504838415190798, + "grad_norm": 0.4991005063056946, + "learning_rate": 4.899936631197131e-06, + "loss": 0.5395, + "step": 6030 + }, + { + "epoch": 0.5505751323717364, + "grad_norm": 0.4914511740207672, + "learning_rate": 4.899903107215524e-06, + "loss": 0.5804, + "step": 6031 + }, + { + "epoch": 0.550666423224393, + "grad_norm": 0.46785974502563477, + "learning_rate": 4.899869577733849e-06, + "loss": 0.5458, + "step": 6032 + }, + { + "epoch": 0.5507577140770494, + "grad_norm": 0.4617290794849396, + "learning_rate": 4.899836042752183e-06, + "loss": 0.5691, + "step": 6033 + }, + { + "epoch": 0.550849004929706, + "grad_norm": 0.4641018211841583, + "learning_rate": 4.899802502270603e-06, + "loss": 0.5424, + "step": 6034 + }, + { + "epoch": 0.5509402957823626, + "grad_norm": 0.49566957354545593, + "learning_rate": 4.899768956289185e-06, + "loss": 0.5304, + "step": 6035 + }, + { + "epoch": 0.5510315866350192, + "grad_norm": 0.4834592044353485, + "learning_rate": 4.899735404808007e-06, + "loss": 0.5683, + "step": 6036 + }, + { + "epoch": 0.5511228774876757, + "grad_norm": 0.4930475652217865, + "learning_rate": 4.899701847827145e-06, + "loss": 0.5637, + "step": 6037 + }, + { + "epoch": 0.5512141683403323, + "grad_norm": 0.4903873801231384, + "learning_rate": 4.899668285346676e-06, + "loss": 0.5559, + "step": 6038 + }, + { + "epoch": 0.5513054591929889, + "grad_norm": 0.4694110155105591, + "learning_rate": 4.899634717366678e-06, + "loss": 0.5873, + "step": 6039 + }, + { + "epoch": 0.5513967500456455, + "grad_norm": 0.5001910328865051, + "learning_rate": 4.8996011438872264e-06, + "loss": 0.5671, + "step": 6040 + }, + { + "epoch": 0.551488040898302, + "grad_norm": 0.5022817850112915, + "learning_rate": 4.8995675649084e-06, + "loss": 0.5629, + "step": 6041 + }, + { + "epoch": 0.5515793317509585, + "grad_norm": 0.4716678559780121, + "learning_rate": 4.8995339804302735e-06, + "loss": 0.5404, + "step": 6042 + }, + { + "epoch": 0.5516706226036151, + "grad_norm": 0.452644944190979, + "learning_rate": 4.899500390452925e-06, + "loss": 0.5851, + "step": 6043 + }, + { + "epoch": 0.5517619134562717, + "grad_norm": 0.5488539338111877, + "learning_rate": 4.8994667949764315e-06, + "loss": 0.5822, + "step": 6044 + }, + { + "epoch": 0.5518532043089283, + "grad_norm": 0.4463141858577728, + "learning_rate": 4.899433194000871e-06, + "loss": 0.6095, + "step": 6045 + }, + { + "epoch": 0.5519444951615848, + "grad_norm": 0.4821707010269165, + "learning_rate": 4.899399587526318e-06, + "loss": 0.539, + "step": 6046 + }, + { + "epoch": 0.5520357860142414, + "grad_norm": 0.4903773367404938, + "learning_rate": 4.8993659755528524e-06, + "loss": 0.5744, + "step": 6047 + }, + { + "epoch": 0.552127076866898, + "grad_norm": 0.47928234934806824, + "learning_rate": 4.899332358080548e-06, + "loss": 0.5365, + "step": 6048 + }, + { + "epoch": 0.5522183677195545, + "grad_norm": 0.48061197996139526, + "learning_rate": 4.899298735109486e-06, + "loss": 0.5369, + "step": 6049 + }, + { + "epoch": 0.552309658572211, + "grad_norm": 0.4798589050769806, + "learning_rate": 4.899265106639739e-06, + "loss": 0.5848, + "step": 6050 + }, + { + "epoch": 0.5524009494248676, + "grad_norm": 0.4798865020275116, + "learning_rate": 4.899231472671387e-06, + "loss": 0.579, + "step": 6051 + }, + { + "epoch": 0.5524922402775242, + "grad_norm": 0.5109149217605591, + "learning_rate": 4.8991978332045065e-06, + "loss": 0.517, + "step": 6052 + }, + { + "epoch": 0.5525835311301808, + "grad_norm": 0.49085232615470886, + "learning_rate": 4.899164188239175e-06, + "loss": 0.5714, + "step": 6053 + }, + { + "epoch": 0.5526748219828373, + "grad_norm": 0.5309929251670837, + "learning_rate": 4.899130537775468e-06, + "loss": 0.5311, + "step": 6054 + }, + { + "epoch": 0.5527661128354939, + "grad_norm": 0.489479660987854, + "learning_rate": 4.899096881813464e-06, + "loss": 0.5521, + "step": 6055 + }, + { + "epoch": 0.5528574036881504, + "grad_norm": 0.47628939151763916, + "learning_rate": 4.899063220353239e-06, + "loss": 0.5348, + "step": 6056 + }, + { + "epoch": 0.552948694540807, + "grad_norm": 0.4744003713130951, + "learning_rate": 4.8990295533948715e-06, + "loss": 0.5869, + "step": 6057 + }, + { + "epoch": 0.5530399853934636, + "grad_norm": 0.5100096464157104, + "learning_rate": 4.898995880938438e-06, + "loss": 0.5513, + "step": 6058 + }, + { + "epoch": 0.5531312762461201, + "grad_norm": 0.4913269579410553, + "learning_rate": 4.898962202984016e-06, + "loss": 0.5863, + "step": 6059 + }, + { + "epoch": 0.5532225670987767, + "grad_norm": 0.4594080448150635, + "learning_rate": 4.898928519531682e-06, + "loss": 0.5936, + "step": 6060 + }, + { + "epoch": 0.5533138579514333, + "grad_norm": 0.48449429869651794, + "learning_rate": 4.898894830581513e-06, + "loss": 0.5459, + "step": 6061 + }, + { + "epoch": 0.5534051488040899, + "grad_norm": 0.4885987341403961, + "learning_rate": 4.898861136133588e-06, + "loss": 0.5536, + "step": 6062 + }, + { + "epoch": 0.5534964396567464, + "grad_norm": 0.4673802852630615, + "learning_rate": 4.8988274361879824e-06, + "loss": 0.5827, + "step": 6063 + }, + { + "epoch": 0.5535877305094029, + "grad_norm": 0.5223199129104614, + "learning_rate": 4.898793730744774e-06, + "loss": 0.5308, + "step": 6064 + }, + { + "epoch": 0.5536790213620595, + "grad_norm": 0.46372008323669434, + "learning_rate": 4.8987600198040404e-06, + "loss": 0.5712, + "step": 6065 + }, + { + "epoch": 0.5537703122147161, + "grad_norm": 0.5261520743370056, + "learning_rate": 4.8987263033658585e-06, + "loss": 0.5607, + "step": 6066 + }, + { + "epoch": 0.5538616030673726, + "grad_norm": 0.4797687828540802, + "learning_rate": 4.898692581430306e-06, + "loss": 0.5595, + "step": 6067 + }, + { + "epoch": 0.5539528939200292, + "grad_norm": 0.4956081807613373, + "learning_rate": 4.898658853997459e-06, + "loss": 0.5439, + "step": 6068 + }, + { + "epoch": 0.5540441847726858, + "grad_norm": 0.48045381903648376, + "learning_rate": 4.898625121067396e-06, + "loss": 0.5223, + "step": 6069 + }, + { + "epoch": 0.5541354756253424, + "grad_norm": 0.47169408202171326, + "learning_rate": 4.898591382640193e-06, + "loss": 0.6021, + "step": 6070 + }, + { + "epoch": 0.554226766477999, + "grad_norm": 0.4704989790916443, + "learning_rate": 4.89855763871593e-06, + "loss": 0.5815, + "step": 6071 + }, + { + "epoch": 0.5543180573306554, + "grad_norm": 0.4867297410964966, + "learning_rate": 4.8985238892946806e-06, + "loss": 0.5355, + "step": 6072 + }, + { + "epoch": 0.554409348183312, + "grad_norm": 0.473016619682312, + "learning_rate": 4.898490134376525e-06, + "loss": 0.6021, + "step": 6073 + }, + { + "epoch": 0.5545006390359686, + "grad_norm": 0.5337784290313721, + "learning_rate": 4.898456373961541e-06, + "loss": 0.5048, + "step": 6074 + }, + { + "epoch": 0.5545919298886252, + "grad_norm": 0.4790983498096466, + "learning_rate": 4.898422608049802e-06, + "loss": 0.5646, + "step": 6075 + }, + { + "epoch": 0.5546832207412817, + "grad_norm": 0.4586557447910309, + "learning_rate": 4.8983888366413904e-06, + "loss": 0.5917, + "step": 6076 + }, + { + "epoch": 0.5547745115939383, + "grad_norm": 0.46261417865753174, + "learning_rate": 4.89835505973638e-06, + "loss": 0.5566, + "step": 6077 + }, + { + "epoch": 0.5548658024465949, + "grad_norm": 0.5001224875450134, + "learning_rate": 4.89832127733485e-06, + "loss": 0.554, + "step": 6078 + }, + { + "epoch": 0.5549570932992515, + "grad_norm": 0.5141593217849731, + "learning_rate": 4.898287489436877e-06, + "loss": 0.5689, + "step": 6079 + }, + { + "epoch": 0.5550483841519079, + "grad_norm": 0.48748430609703064, + "learning_rate": 4.898253696042538e-06, + "loss": 0.6079, + "step": 6080 + }, + { + "epoch": 0.5551396750045645, + "grad_norm": 0.505610466003418, + "learning_rate": 4.898219897151912e-06, + "loss": 0.5636, + "step": 6081 + }, + { + "epoch": 0.5552309658572211, + "grad_norm": 0.44179609417915344, + "learning_rate": 4.898186092765075e-06, + "loss": 0.5896, + "step": 6082 + }, + { + "epoch": 0.5553222567098777, + "grad_norm": 0.47376418113708496, + "learning_rate": 4.8981522828821055e-06, + "loss": 0.5492, + "step": 6083 + }, + { + "epoch": 0.5554135475625342, + "grad_norm": 0.4731316864490509, + "learning_rate": 4.89811846750308e-06, + "loss": 0.5625, + "step": 6084 + }, + { + "epoch": 0.5555048384151908, + "grad_norm": 0.47381293773651123, + "learning_rate": 4.898084646628078e-06, + "loss": 0.5478, + "step": 6085 + }, + { + "epoch": 0.5555961292678474, + "grad_norm": 0.5162723064422607, + "learning_rate": 4.898050820257174e-06, + "loss": 0.585, + "step": 6086 + }, + { + "epoch": 0.5556874201205039, + "grad_norm": 0.45257365703582764, + "learning_rate": 4.898016988390447e-06, + "loss": 0.5797, + "step": 6087 + }, + { + "epoch": 0.5557787109731605, + "grad_norm": 0.4606568217277527, + "learning_rate": 4.897983151027976e-06, + "loss": 0.5606, + "step": 6088 + }, + { + "epoch": 0.555870001825817, + "grad_norm": 0.4541129767894745, + "learning_rate": 4.897949308169836e-06, + "loss": 0.5665, + "step": 6089 + }, + { + "epoch": 0.5559612926784736, + "grad_norm": 0.5190519690513611, + "learning_rate": 4.897915459816106e-06, + "loss": 0.5625, + "step": 6090 + }, + { + "epoch": 0.5560525835311302, + "grad_norm": 0.4534878432750702, + "learning_rate": 4.897881605966864e-06, + "loss": 0.5697, + "step": 6091 + }, + { + "epoch": 0.5561438743837868, + "grad_norm": 0.47330161929130554, + "learning_rate": 4.8978477466221865e-06, + "loss": 0.5355, + "step": 6092 + }, + { + "epoch": 0.5562351652364433, + "grad_norm": 0.46969765424728394, + "learning_rate": 4.897813881782151e-06, + "loss": 0.598, + "step": 6093 + }, + { + "epoch": 0.5563264560890999, + "grad_norm": 0.49899670481681824, + "learning_rate": 4.897780011446836e-06, + "loss": 0.5579, + "step": 6094 + }, + { + "epoch": 0.5564177469417564, + "grad_norm": 0.4756987392902374, + "learning_rate": 4.897746135616318e-06, + "loss": 0.5453, + "step": 6095 + }, + { + "epoch": 0.556509037794413, + "grad_norm": 0.4710986018180847, + "learning_rate": 4.897712254290677e-06, + "loss": 0.5701, + "step": 6096 + }, + { + "epoch": 0.5566003286470695, + "grad_norm": 0.475674569606781, + "learning_rate": 4.897678367469988e-06, + "loss": 0.5862, + "step": 6097 + }, + { + "epoch": 0.5566916194997261, + "grad_norm": 0.4512636661529541, + "learning_rate": 4.8976444751543286e-06, + "loss": 0.5966, + "step": 6098 + }, + { + "epoch": 0.5567829103523827, + "grad_norm": 0.45967382192611694, + "learning_rate": 4.8976105773437785e-06, + "loss": 0.5511, + "step": 6099 + }, + { + "epoch": 0.5568742012050393, + "grad_norm": 0.4446479082107544, + "learning_rate": 4.897576674038415e-06, + "loss": 0.569, + "step": 6100 + }, + { + "epoch": 0.5569654920576959, + "grad_norm": 0.4601764380931854, + "learning_rate": 4.8975427652383146e-06, + "loss": 0.5925, + "step": 6101 + }, + { + "epoch": 0.5570567829103524, + "grad_norm": 0.44889581203460693, + "learning_rate": 4.8975088509435556e-06, + "loss": 0.591, + "step": 6102 + }, + { + "epoch": 0.5571480737630089, + "grad_norm": 0.4666432738304138, + "learning_rate": 4.897474931154217e-06, + "loss": 0.5871, + "step": 6103 + }, + { + "epoch": 0.5572393646156655, + "grad_norm": 0.44875630736351013, + "learning_rate": 4.897441005870373e-06, + "loss": 0.5778, + "step": 6104 + }, + { + "epoch": 0.5573306554683221, + "grad_norm": 0.4870792031288147, + "learning_rate": 4.897407075092106e-06, + "loss": 0.5329, + "step": 6105 + }, + { + "epoch": 0.5574219463209786, + "grad_norm": 0.49882739782333374, + "learning_rate": 4.89737313881949e-06, + "loss": 0.5829, + "step": 6106 + }, + { + "epoch": 0.5575132371736352, + "grad_norm": 0.410741925239563, + "learning_rate": 4.8973391970526044e-06, + "loss": 0.6206, + "step": 6107 + }, + { + "epoch": 0.5576045280262918, + "grad_norm": 0.458771288394928, + "learning_rate": 4.8973052497915264e-06, + "loss": 0.5637, + "step": 6108 + }, + { + "epoch": 0.5576958188789484, + "grad_norm": 0.5117417573928833, + "learning_rate": 4.897271297036334e-06, + "loss": 0.5594, + "step": 6109 + }, + { + "epoch": 0.557787109731605, + "grad_norm": 0.49272286891937256, + "learning_rate": 4.897237338787106e-06, + "loss": 0.5625, + "step": 6110 + }, + { + "epoch": 0.5578784005842614, + "grad_norm": 0.4929737150669098, + "learning_rate": 4.89720337504392e-06, + "loss": 0.551, + "step": 6111 + }, + { + "epoch": 0.557969691436918, + "grad_norm": 0.4836640954017639, + "learning_rate": 4.897169405806852e-06, + "loss": 0.5554, + "step": 6112 + }, + { + "epoch": 0.5580609822895746, + "grad_norm": 0.5075982213020325, + "learning_rate": 4.897135431075982e-06, + "loss": 0.5805, + "step": 6113 + }, + { + "epoch": 0.5581522731422311, + "grad_norm": 0.4966544508934021, + "learning_rate": 4.897101450851386e-06, + "loss": 0.5447, + "step": 6114 + }, + { + "epoch": 0.5582435639948877, + "grad_norm": 0.47095799446105957, + "learning_rate": 4.8970674651331435e-06, + "loss": 0.5656, + "step": 6115 + }, + { + "epoch": 0.5583348548475443, + "grad_norm": 0.5002098083496094, + "learning_rate": 4.897033473921332e-06, + "loss": 0.5768, + "step": 6116 + }, + { + "epoch": 0.5584261457002009, + "grad_norm": 0.48969268798828125, + "learning_rate": 4.896999477216028e-06, + "loss": 0.5642, + "step": 6117 + }, + { + "epoch": 0.5585174365528573, + "grad_norm": 0.46667611598968506, + "learning_rate": 4.896965475017311e-06, + "loss": 0.5241, + "step": 6118 + }, + { + "epoch": 0.5586087274055139, + "grad_norm": 0.5027022361755371, + "learning_rate": 4.8969314673252585e-06, + "loss": 0.5396, + "step": 6119 + }, + { + "epoch": 0.5587000182581705, + "grad_norm": 0.4976939260959625, + "learning_rate": 4.896897454139949e-06, + "loss": 0.5711, + "step": 6120 + }, + { + "epoch": 0.5587913091108271, + "grad_norm": 0.4773540198802948, + "learning_rate": 4.896863435461459e-06, + "loss": 0.5704, + "step": 6121 + }, + { + "epoch": 0.5588825999634837, + "grad_norm": 0.5033115148544312, + "learning_rate": 4.8968294112898686e-06, + "loss": 0.5465, + "step": 6122 + }, + { + "epoch": 0.5589738908161402, + "grad_norm": 0.4970756471157074, + "learning_rate": 4.896795381625253e-06, + "loss": 0.5609, + "step": 6123 + }, + { + "epoch": 0.5590651816687968, + "grad_norm": 0.4774245023727417, + "learning_rate": 4.896761346467693e-06, + "loss": 0.5572, + "step": 6124 + }, + { + "epoch": 0.5591564725214534, + "grad_norm": 0.4441593289375305, + "learning_rate": 4.896727305817264e-06, + "loss": 0.5724, + "step": 6125 + }, + { + "epoch": 0.5592477633741099, + "grad_norm": 0.4640040695667267, + "learning_rate": 4.896693259674046e-06, + "loss": 0.5248, + "step": 6126 + }, + { + "epoch": 0.5593390542267664, + "grad_norm": 0.4876876175403595, + "learning_rate": 4.896659208038117e-06, + "loss": 0.5404, + "step": 6127 + }, + { + "epoch": 0.559430345079423, + "grad_norm": 0.45493465662002563, + "learning_rate": 4.896625150909553e-06, + "loss": 0.6423, + "step": 6128 + }, + { + "epoch": 0.5595216359320796, + "grad_norm": 0.4479219317436218, + "learning_rate": 4.896591088288434e-06, + "loss": 0.5615, + "step": 6129 + }, + { + "epoch": 0.5596129267847362, + "grad_norm": 0.4674331247806549, + "learning_rate": 4.896557020174837e-06, + "loss": 0.5539, + "step": 6130 + }, + { + "epoch": 0.5597042176373928, + "grad_norm": 0.5004907846450806, + "learning_rate": 4.8965229465688416e-06, + "loss": 0.5826, + "step": 6131 + }, + { + "epoch": 0.5597955084900493, + "grad_norm": 0.49004361033439636, + "learning_rate": 4.896488867470524e-06, + "loss": 0.6007, + "step": 6132 + }, + { + "epoch": 0.5598867993427059, + "grad_norm": 0.5066350698471069, + "learning_rate": 4.896454782879963e-06, + "loss": 0.5343, + "step": 6133 + }, + { + "epoch": 0.5599780901953624, + "grad_norm": 0.46338483691215515, + "learning_rate": 4.896420692797238e-06, + "loss": 0.5734, + "step": 6134 + }, + { + "epoch": 0.560069381048019, + "grad_norm": 0.49561840295791626, + "learning_rate": 4.8963865972224256e-06, + "loss": 0.5508, + "step": 6135 + }, + { + "epoch": 0.5601606719006755, + "grad_norm": 0.4635690748691559, + "learning_rate": 4.896352496155603e-06, + "loss": 0.5893, + "step": 6136 + }, + { + "epoch": 0.5602519627533321, + "grad_norm": 0.5027961134910583, + "learning_rate": 4.896318389596851e-06, + "loss": 0.5624, + "step": 6137 + }, + { + "epoch": 0.5603432536059887, + "grad_norm": 0.45237627625465393, + "learning_rate": 4.896284277546246e-06, + "loss": 0.6123, + "step": 6138 + }, + { + "epoch": 0.5604345444586453, + "grad_norm": 0.4920901358127594, + "learning_rate": 4.896250160003867e-06, + "loss": 0.534, + "step": 6139 + }, + { + "epoch": 0.5605258353113018, + "grad_norm": 0.4522010087966919, + "learning_rate": 4.896216036969792e-06, + "loss": 0.53, + "step": 6140 + }, + { + "epoch": 0.5606171261639584, + "grad_norm": 0.4737125337123871, + "learning_rate": 4.896181908444098e-06, + "loss": 0.5511, + "step": 6141 + }, + { + "epoch": 0.5607084170166149, + "grad_norm": 0.4771151840686798, + "learning_rate": 4.896147774426865e-06, + "loss": 0.5901, + "step": 6142 + }, + { + "epoch": 0.5607997078692715, + "grad_norm": 0.5195150375366211, + "learning_rate": 4.896113634918171e-06, + "loss": 0.5333, + "step": 6143 + }, + { + "epoch": 0.560890998721928, + "grad_norm": 0.47544896602630615, + "learning_rate": 4.896079489918092e-06, + "loss": 0.5748, + "step": 6144 + }, + { + "epoch": 0.5609822895745846, + "grad_norm": 0.47504550218582153, + "learning_rate": 4.896045339426709e-06, + "loss": 0.5736, + "step": 6145 + }, + { + "epoch": 0.5610735804272412, + "grad_norm": 0.46492108702659607, + "learning_rate": 4.896011183444099e-06, + "loss": 0.5423, + "step": 6146 + }, + { + "epoch": 0.5611648712798978, + "grad_norm": 0.48525571823120117, + "learning_rate": 4.89597702197034e-06, + "loss": 0.5857, + "step": 6147 + }, + { + "epoch": 0.5612561621325544, + "grad_norm": 0.5153951644897461, + "learning_rate": 4.895942855005512e-06, + "loss": 0.5801, + "step": 6148 + }, + { + "epoch": 0.5613474529852109, + "grad_norm": 0.5117635130882263, + "learning_rate": 4.895908682549691e-06, + "loss": 0.5684, + "step": 6149 + }, + { + "epoch": 0.5614387438378674, + "grad_norm": 0.4332381784915924, + "learning_rate": 4.895874504602958e-06, + "loss": 0.5851, + "step": 6150 + }, + { + "epoch": 0.561530034690524, + "grad_norm": 0.47886306047439575, + "learning_rate": 4.895840321165388e-06, + "loss": 0.5836, + "step": 6151 + }, + { + "epoch": 0.5616213255431806, + "grad_norm": 0.457793265581131, + "learning_rate": 4.895806132237062e-06, + "loss": 0.5321, + "step": 6152 + }, + { + "epoch": 0.5617126163958371, + "grad_norm": 0.47949928045272827, + "learning_rate": 4.895771937818058e-06, + "loss": 0.5677, + "step": 6153 + }, + { + "epoch": 0.5618039072484937, + "grad_norm": 0.48836109042167664, + "learning_rate": 4.8957377379084526e-06, + "loss": 0.5523, + "step": 6154 + }, + { + "epoch": 0.5618951981011503, + "grad_norm": 0.49221497774124146, + "learning_rate": 4.895703532508326e-06, + "loss": 0.561, + "step": 6155 + }, + { + "epoch": 0.5619864889538069, + "grad_norm": 0.49645286798477173, + "learning_rate": 4.895669321617755e-06, + "loss": 0.5667, + "step": 6156 + }, + { + "epoch": 0.5620777798064633, + "grad_norm": 0.4808669090270996, + "learning_rate": 4.89563510523682e-06, + "loss": 0.5483, + "step": 6157 + }, + { + "epoch": 0.5621690706591199, + "grad_norm": 0.4790898859500885, + "learning_rate": 4.895600883365597e-06, + "loss": 0.5569, + "step": 6158 + }, + { + "epoch": 0.5622603615117765, + "grad_norm": 0.46694162487983704, + "learning_rate": 4.895566656004168e-06, + "loss": 0.5683, + "step": 6159 + }, + { + "epoch": 0.5623516523644331, + "grad_norm": 0.5322343111038208, + "learning_rate": 4.895532423152608e-06, + "loss": 0.5668, + "step": 6160 + }, + { + "epoch": 0.5624429432170897, + "grad_norm": 0.47503212094306946, + "learning_rate": 4.895498184810997e-06, + "loss": 0.5849, + "step": 6161 + }, + { + "epoch": 0.5625342340697462, + "grad_norm": 0.4970953166484833, + "learning_rate": 4.895463940979412e-06, + "loss": 0.5683, + "step": 6162 + }, + { + "epoch": 0.5626255249224028, + "grad_norm": 0.49990424513816833, + "learning_rate": 4.8954296916579345e-06, + "loss": 0.5719, + "step": 6163 + }, + { + "epoch": 0.5627168157750594, + "grad_norm": 0.476558655500412, + "learning_rate": 4.8953954368466405e-06, + "loss": 0.5852, + "step": 6164 + }, + { + "epoch": 0.5628081066277159, + "grad_norm": 0.4659779369831085, + "learning_rate": 4.895361176545609e-06, + "loss": 0.5692, + "step": 6165 + }, + { + "epoch": 0.5628993974803724, + "grad_norm": 0.47443461418151855, + "learning_rate": 4.895326910754918e-06, + "loss": 0.582, + "step": 6166 + }, + { + "epoch": 0.562990688333029, + "grad_norm": 0.48834049701690674, + "learning_rate": 4.895292639474647e-06, + "loss": 0.569, + "step": 6167 + }, + { + "epoch": 0.5630819791856856, + "grad_norm": 0.48152703046798706, + "learning_rate": 4.895258362704875e-06, + "loss": 0.567, + "step": 6168 + }, + { + "epoch": 0.5631732700383422, + "grad_norm": 0.4930168092250824, + "learning_rate": 4.895224080445679e-06, + "loss": 0.5384, + "step": 6169 + }, + { + "epoch": 0.5632645608909987, + "grad_norm": 0.4894840121269226, + "learning_rate": 4.895189792697139e-06, + "loss": 0.5547, + "step": 6170 + }, + { + "epoch": 0.5633558517436553, + "grad_norm": 0.47885748744010925, + "learning_rate": 4.895155499459333e-06, + "loss": 0.5696, + "step": 6171 + }, + { + "epoch": 0.5634471425963119, + "grad_norm": 0.5077277421951294, + "learning_rate": 4.895121200732339e-06, + "loss": 0.5739, + "step": 6172 + }, + { + "epoch": 0.5635384334489684, + "grad_norm": 0.5090850591659546, + "learning_rate": 4.895086896516237e-06, + "loss": 0.4878, + "step": 6173 + }, + { + "epoch": 0.563629724301625, + "grad_norm": 0.4990632236003876, + "learning_rate": 4.895052586811104e-06, + "loss": 0.4971, + "step": 6174 + }, + { + "epoch": 0.5637210151542815, + "grad_norm": 0.4662144184112549, + "learning_rate": 4.89501827161702e-06, + "loss": 0.5861, + "step": 6175 + }, + { + "epoch": 0.5638123060069381, + "grad_norm": 0.4957290291786194, + "learning_rate": 4.894983950934062e-06, + "loss": 0.5608, + "step": 6176 + }, + { + "epoch": 0.5639035968595947, + "grad_norm": 0.47934871912002563, + "learning_rate": 4.894949624762311e-06, + "loss": 0.5457, + "step": 6177 + }, + { + "epoch": 0.5639948877122513, + "grad_norm": 0.470684289932251, + "learning_rate": 4.894915293101843e-06, + "loss": 0.5608, + "step": 6178 + }, + { + "epoch": 0.5640861785649078, + "grad_norm": 0.4801064133644104, + "learning_rate": 4.894880955952738e-06, + "loss": 0.6042, + "step": 6179 + }, + { + "epoch": 0.5641774694175644, + "grad_norm": 0.49881771206855774, + "learning_rate": 4.8948466133150765e-06, + "loss": 0.5425, + "step": 6180 + }, + { + "epoch": 0.5642687602702209, + "grad_norm": 0.4832862615585327, + "learning_rate": 4.894812265188934e-06, + "loss": 0.5789, + "step": 6181 + }, + { + "epoch": 0.5643600511228775, + "grad_norm": 0.4704693555831909, + "learning_rate": 4.894777911574391e-06, + "loss": 0.564, + "step": 6182 + }, + { + "epoch": 0.564451341975534, + "grad_norm": 0.4967549443244934, + "learning_rate": 4.894743552471527e-06, + "loss": 0.6006, + "step": 6183 + }, + { + "epoch": 0.5645426328281906, + "grad_norm": 0.46926021575927734, + "learning_rate": 4.894709187880417e-06, + "loss": 0.5501, + "step": 6184 + }, + { + "epoch": 0.5646339236808472, + "grad_norm": 0.4911918342113495, + "learning_rate": 4.894674817801145e-06, + "loss": 0.5674, + "step": 6185 + }, + { + "epoch": 0.5647252145335038, + "grad_norm": 0.48593661189079285, + "learning_rate": 4.894640442233786e-06, + "loss": 0.5497, + "step": 6186 + }, + { + "epoch": 0.5648165053861604, + "grad_norm": 0.4926165044307709, + "learning_rate": 4.89460606117842e-06, + "loss": 0.5591, + "step": 6187 + }, + { + "epoch": 0.5649077962388168, + "grad_norm": 0.5030167698860168, + "learning_rate": 4.894571674635126e-06, + "loss": 0.5488, + "step": 6188 + }, + { + "epoch": 0.5649990870914734, + "grad_norm": 0.49672216176986694, + "learning_rate": 4.894537282603983e-06, + "loss": 0.5496, + "step": 6189 + }, + { + "epoch": 0.56509037794413, + "grad_norm": 0.478640615940094, + "learning_rate": 4.894502885085068e-06, + "loss": 0.6002, + "step": 6190 + }, + { + "epoch": 0.5651816687967866, + "grad_norm": 0.46410831809043884, + "learning_rate": 4.894468482078462e-06, + "loss": 0.5704, + "step": 6191 + }, + { + "epoch": 0.5652729596494431, + "grad_norm": 0.48615172505378723, + "learning_rate": 4.894434073584243e-06, + "loss": 0.5251, + "step": 6192 + }, + { + "epoch": 0.5653642505020997, + "grad_norm": 0.48576265573501587, + "learning_rate": 4.89439965960249e-06, + "loss": 0.571, + "step": 6193 + }, + { + "epoch": 0.5654555413547563, + "grad_norm": 0.47021961212158203, + "learning_rate": 4.894365240133282e-06, + "loss": 0.602, + "step": 6194 + }, + { + "epoch": 0.5655468322074129, + "grad_norm": 0.4797708988189697, + "learning_rate": 4.8943308151766965e-06, + "loss": 0.5669, + "step": 6195 + }, + { + "epoch": 0.5656381230600693, + "grad_norm": 0.44444355368614197, + "learning_rate": 4.894296384732814e-06, + "loss": 0.5668, + "step": 6196 + }, + { + "epoch": 0.5657294139127259, + "grad_norm": 0.45551231503486633, + "learning_rate": 4.894261948801714e-06, + "loss": 0.6096, + "step": 6197 + }, + { + "epoch": 0.5658207047653825, + "grad_norm": 0.485902339220047, + "learning_rate": 4.8942275073834734e-06, + "loss": 0.5517, + "step": 6198 + }, + { + "epoch": 0.5659119956180391, + "grad_norm": 0.48383769392967224, + "learning_rate": 4.8941930604781724e-06, + "loss": 0.567, + "step": 6199 + }, + { + "epoch": 0.5660032864706956, + "grad_norm": 0.5176546573638916, + "learning_rate": 4.8941586080858894e-06, + "loss": 0.5672, + "step": 6200 + }, + { + "epoch": 0.5660945773233522, + "grad_norm": 0.4372572600841522, + "learning_rate": 4.894124150206704e-06, + "loss": 0.5695, + "step": 6201 + }, + { + "epoch": 0.5661858681760088, + "grad_norm": 0.4959179759025574, + "learning_rate": 4.894089686840694e-06, + "loss": 0.5038, + "step": 6202 + }, + { + "epoch": 0.5662771590286654, + "grad_norm": 0.446879506111145, + "learning_rate": 4.894055217987941e-06, + "loss": 0.5994, + "step": 6203 + }, + { + "epoch": 0.5663684498813218, + "grad_norm": 0.49006423354148865, + "learning_rate": 4.89402074364852e-06, + "loss": 0.5212, + "step": 6204 + }, + { + "epoch": 0.5664597407339784, + "grad_norm": 0.49050942063331604, + "learning_rate": 4.893986263822513e-06, + "loss": 0.5667, + "step": 6205 + }, + { + "epoch": 0.566551031586635, + "grad_norm": 0.47023308277130127, + "learning_rate": 4.893951778509999e-06, + "loss": 0.5711, + "step": 6206 + }, + { + "epoch": 0.5666423224392916, + "grad_norm": 0.5168997645378113, + "learning_rate": 4.893917287711055e-06, + "loss": 0.5197, + "step": 6207 + }, + { + "epoch": 0.5667336132919482, + "grad_norm": 0.46062955260276794, + "learning_rate": 4.893882791425762e-06, + "loss": 0.5704, + "step": 6208 + }, + { + "epoch": 0.5668249041446047, + "grad_norm": 0.4801928400993347, + "learning_rate": 4.893848289654198e-06, + "loss": 0.5522, + "step": 6209 + }, + { + "epoch": 0.5669161949972613, + "grad_norm": 0.4767742455005646, + "learning_rate": 4.893813782396442e-06, + "loss": 0.573, + "step": 6210 + }, + { + "epoch": 0.5670074858499179, + "grad_norm": 0.47756922245025635, + "learning_rate": 4.893779269652574e-06, + "loss": 0.5651, + "step": 6211 + }, + { + "epoch": 0.5670987767025744, + "grad_norm": 0.45607396960258484, + "learning_rate": 4.8937447514226725e-06, + "loss": 0.5627, + "step": 6212 + }, + { + "epoch": 0.5671900675552309, + "grad_norm": 0.4822702705860138, + "learning_rate": 4.893710227706817e-06, + "loss": 0.562, + "step": 6213 + }, + { + "epoch": 0.5672813584078875, + "grad_norm": 0.49653396010398865, + "learning_rate": 4.893675698505086e-06, + "loss": 0.56, + "step": 6214 + }, + { + "epoch": 0.5673726492605441, + "grad_norm": 0.4760948717594147, + "learning_rate": 4.893641163817558e-06, + "loss": 0.5694, + "step": 6215 + }, + { + "epoch": 0.5674639401132007, + "grad_norm": 0.478130966424942, + "learning_rate": 4.893606623644315e-06, + "loss": 0.5637, + "step": 6216 + }, + { + "epoch": 0.5675552309658572, + "grad_norm": 0.48259398341178894, + "learning_rate": 4.8935720779854325e-06, + "loss": 0.5781, + "step": 6217 + }, + { + "epoch": 0.5676465218185138, + "grad_norm": 0.46621647477149963, + "learning_rate": 4.8935375268409925e-06, + "loss": 0.5387, + "step": 6218 + }, + { + "epoch": 0.5677378126711703, + "grad_norm": 0.46459999680519104, + "learning_rate": 4.893502970211072e-06, + "loss": 0.5515, + "step": 6219 + }, + { + "epoch": 0.5678291035238269, + "grad_norm": 0.5104745626449585, + "learning_rate": 4.893468408095752e-06, + "loss": 0.5325, + "step": 6220 + }, + { + "epoch": 0.5679203943764835, + "grad_norm": 0.477498322725296, + "learning_rate": 4.893433840495111e-06, + "loss": 0.5947, + "step": 6221 + }, + { + "epoch": 0.56801168522914, + "grad_norm": 0.48864510655403137, + "learning_rate": 4.893399267409228e-06, + "loss": 0.5565, + "step": 6222 + }, + { + "epoch": 0.5681029760817966, + "grad_norm": 0.44613775610923767, + "learning_rate": 4.893364688838183e-06, + "loss": 0.5874, + "step": 6223 + }, + { + "epoch": 0.5681942669344532, + "grad_norm": 0.47949063777923584, + "learning_rate": 4.893330104782055e-06, + "loss": 0.5707, + "step": 6224 + }, + { + "epoch": 0.5682855577871098, + "grad_norm": 0.4780610203742981, + "learning_rate": 4.893295515240922e-06, + "loss": 0.5608, + "step": 6225 + }, + { + "epoch": 0.5683768486397663, + "grad_norm": 0.4710118770599365, + "learning_rate": 4.893260920214864e-06, + "loss": 0.5594, + "step": 6226 + }, + { + "epoch": 0.5684681394924228, + "grad_norm": 0.4984630048274994, + "learning_rate": 4.893226319703961e-06, + "loss": 0.5634, + "step": 6227 + }, + { + "epoch": 0.5685594303450794, + "grad_norm": 0.4886170029640198, + "learning_rate": 4.893191713708291e-06, + "loss": 0.5414, + "step": 6228 + }, + { + "epoch": 0.568650721197736, + "grad_norm": 0.5017574429512024, + "learning_rate": 4.893157102227936e-06, + "loss": 0.5837, + "step": 6229 + }, + { + "epoch": 0.5687420120503925, + "grad_norm": 0.48353105783462524, + "learning_rate": 4.893122485262972e-06, + "loss": 0.5542, + "step": 6230 + }, + { + "epoch": 0.5688333029030491, + "grad_norm": 0.4750097095966339, + "learning_rate": 4.89308786281348e-06, + "loss": 0.601, + "step": 6231 + }, + { + "epoch": 0.5689245937557057, + "grad_norm": 0.4492305815219879, + "learning_rate": 4.89305323487954e-06, + "loss": 0.5908, + "step": 6232 + }, + { + "epoch": 0.5690158846083623, + "grad_norm": 0.4604153037071228, + "learning_rate": 4.89301860146123e-06, + "loss": 0.5821, + "step": 6233 + }, + { + "epoch": 0.5691071754610189, + "grad_norm": 0.47187814116477966, + "learning_rate": 4.89298396255863e-06, + "loss": 0.5656, + "step": 6234 + }, + { + "epoch": 0.5691984663136753, + "grad_norm": 0.4647344946861267, + "learning_rate": 4.892949318171819e-06, + "loss": 0.5598, + "step": 6235 + }, + { + "epoch": 0.5692897571663319, + "grad_norm": 0.45981907844543457, + "learning_rate": 4.892914668300876e-06, + "loss": 0.5719, + "step": 6236 + }, + { + "epoch": 0.5693810480189885, + "grad_norm": 0.4927498698234558, + "learning_rate": 4.892880012945882e-06, + "loss": 0.5257, + "step": 6237 + }, + { + "epoch": 0.5694723388716451, + "grad_norm": 0.46567612886428833, + "learning_rate": 4.892845352106914e-06, + "loss": 0.574, + "step": 6238 + }, + { + "epoch": 0.5695636297243016, + "grad_norm": 0.4531480669975281, + "learning_rate": 4.892810685784055e-06, + "loss": 0.6005, + "step": 6239 + }, + { + "epoch": 0.5696549205769582, + "grad_norm": 0.46784695982933044, + "learning_rate": 4.89277601397738e-06, + "loss": 0.5719, + "step": 6240 + }, + { + "epoch": 0.5697462114296148, + "grad_norm": 0.4796803295612335, + "learning_rate": 4.892741336686973e-06, + "loss": 0.5363, + "step": 6241 + }, + { + "epoch": 0.5698375022822714, + "grad_norm": 0.5163266658782959, + "learning_rate": 4.892706653912911e-06, + "loss": 0.5822, + "step": 6242 + }, + { + "epoch": 0.5699287931349278, + "grad_norm": 0.44146543741226196, + "learning_rate": 4.892671965655273e-06, + "loss": 0.6221, + "step": 6243 + }, + { + "epoch": 0.5700200839875844, + "grad_norm": 0.46458208560943604, + "learning_rate": 4.892637271914139e-06, + "loss": 0.5999, + "step": 6244 + }, + { + "epoch": 0.570111374840241, + "grad_norm": 0.45848119258880615, + "learning_rate": 4.89260257268959e-06, + "loss": 0.5527, + "step": 6245 + }, + { + "epoch": 0.5702026656928976, + "grad_norm": 0.5052474141120911, + "learning_rate": 4.8925678679817035e-06, + "loss": 0.5063, + "step": 6246 + }, + { + "epoch": 0.5702939565455541, + "grad_norm": 0.5069456696510315, + "learning_rate": 4.89253315779056e-06, + "loss": 0.5094, + "step": 6247 + }, + { + "epoch": 0.5703852473982107, + "grad_norm": 0.4627029597759247, + "learning_rate": 4.892498442116238e-06, + "loss": 0.5799, + "step": 6248 + }, + { + "epoch": 0.5704765382508673, + "grad_norm": 0.5124158263206482, + "learning_rate": 4.892463720958819e-06, + "loss": 0.5214, + "step": 6249 + }, + { + "epoch": 0.5705678291035238, + "grad_norm": 0.47142940759658813, + "learning_rate": 4.892428994318381e-06, + "loss": 0.579, + "step": 6250 + }, + { + "epoch": 0.5706591199561803, + "grad_norm": 0.4749268889427185, + "learning_rate": 4.892394262195005e-06, + "loss": 0.5923, + "step": 6251 + }, + { + "epoch": 0.5707504108088369, + "grad_norm": 0.46970608830451965, + "learning_rate": 4.892359524588769e-06, + "loss": 0.5619, + "step": 6252 + }, + { + "epoch": 0.5708417016614935, + "grad_norm": 0.4511125087738037, + "learning_rate": 4.892324781499753e-06, + "loss": 0.5793, + "step": 6253 + }, + { + "epoch": 0.5709329925141501, + "grad_norm": 0.4780823290348053, + "learning_rate": 4.892290032928037e-06, + "loss": 0.5907, + "step": 6254 + }, + { + "epoch": 0.5710242833668067, + "grad_norm": 0.4835485816001892, + "learning_rate": 4.892255278873702e-06, + "loss": 0.5899, + "step": 6255 + }, + { + "epoch": 0.5711155742194632, + "grad_norm": 0.4797489643096924, + "learning_rate": 4.892220519336825e-06, + "loss": 0.5431, + "step": 6256 + }, + { + "epoch": 0.5712068650721198, + "grad_norm": 0.47079113125801086, + "learning_rate": 4.8921857543174876e-06, + "loss": 0.5689, + "step": 6257 + }, + { + "epoch": 0.5712981559247763, + "grad_norm": 0.49991554021835327, + "learning_rate": 4.892150983815767e-06, + "loss": 0.5686, + "step": 6258 + }, + { + "epoch": 0.5713894467774329, + "grad_norm": 0.5234551429748535, + "learning_rate": 4.892116207831747e-06, + "loss": 0.5595, + "step": 6259 + }, + { + "epoch": 0.5714807376300894, + "grad_norm": 0.4887376129627228, + "learning_rate": 4.8920814263655035e-06, + "loss": 0.6001, + "step": 6260 + }, + { + "epoch": 0.571572028482746, + "grad_norm": 0.47461721301078796, + "learning_rate": 4.892046639417118e-06, + "loss": 0.516, + "step": 6261 + }, + { + "epoch": 0.5716633193354026, + "grad_norm": 0.4626765549182892, + "learning_rate": 4.8920118469866705e-06, + "loss": 0.5946, + "step": 6262 + }, + { + "epoch": 0.5717546101880592, + "grad_norm": 0.4675239324569702, + "learning_rate": 4.891977049074239e-06, + "loss": 0.603, + "step": 6263 + }, + { + "epoch": 0.5718459010407158, + "grad_norm": 0.48171332478523254, + "learning_rate": 4.891942245679905e-06, + "loss": 0.5405, + "step": 6264 + }, + { + "epoch": 0.5719371918933723, + "grad_norm": 0.4601679742336273, + "learning_rate": 4.891907436803748e-06, + "loss": 0.5797, + "step": 6265 + }, + { + "epoch": 0.5720284827460288, + "grad_norm": 0.4773991107940674, + "learning_rate": 4.8918726224458465e-06, + "loss": 0.6005, + "step": 6266 + }, + { + "epoch": 0.5721197735986854, + "grad_norm": 0.4493384063243866, + "learning_rate": 4.891837802606282e-06, + "loss": 0.6031, + "step": 6267 + }, + { + "epoch": 0.572211064451342, + "grad_norm": 0.48382797837257385, + "learning_rate": 4.891802977285133e-06, + "loss": 0.5533, + "step": 6268 + }, + { + "epoch": 0.5723023553039985, + "grad_norm": 0.4896867871284485, + "learning_rate": 4.89176814648248e-06, + "loss": 0.5564, + "step": 6269 + }, + { + "epoch": 0.5723936461566551, + "grad_norm": 0.5109674334526062, + "learning_rate": 4.891733310198402e-06, + "loss": 0.5777, + "step": 6270 + }, + { + "epoch": 0.5724849370093117, + "grad_norm": 0.49153974652290344, + "learning_rate": 4.8916984684329804e-06, + "loss": 0.5703, + "step": 6271 + }, + { + "epoch": 0.5725762278619683, + "grad_norm": 0.45587992668151855, + "learning_rate": 4.891663621186294e-06, + "loss": 0.5673, + "step": 6272 + }, + { + "epoch": 0.5726675187146248, + "grad_norm": 0.5079136490821838, + "learning_rate": 4.891628768458423e-06, + "loss": 0.5466, + "step": 6273 + }, + { + "epoch": 0.5727588095672813, + "grad_norm": 0.4628887176513672, + "learning_rate": 4.891593910249446e-06, + "loss": 0.5959, + "step": 6274 + }, + { + "epoch": 0.5728501004199379, + "grad_norm": 0.4720771312713623, + "learning_rate": 4.891559046559445e-06, + "loss": 0.5926, + "step": 6275 + }, + { + "epoch": 0.5729413912725945, + "grad_norm": 0.4641991853713989, + "learning_rate": 4.891524177388498e-06, + "loss": 0.5911, + "step": 6276 + }, + { + "epoch": 0.573032682125251, + "grad_norm": 0.4853973388671875, + "learning_rate": 4.891489302736687e-06, + "loss": 0.5978, + "step": 6277 + }, + { + "epoch": 0.5731239729779076, + "grad_norm": 0.4735313653945923, + "learning_rate": 4.891454422604089e-06, + "loss": 0.567, + "step": 6278 + }, + { + "epoch": 0.5732152638305642, + "grad_norm": 0.4305073320865631, + "learning_rate": 4.891419536990787e-06, + "loss": 0.5994, + "step": 6279 + }, + { + "epoch": 0.5733065546832208, + "grad_norm": 0.4559822380542755, + "learning_rate": 4.891384645896859e-06, + "loss": 0.5964, + "step": 6280 + }, + { + "epoch": 0.5733978455358774, + "grad_norm": 0.45128121972084045, + "learning_rate": 4.891349749322386e-06, + "loss": 0.5728, + "step": 6281 + }, + { + "epoch": 0.5734891363885338, + "grad_norm": 0.4600103795528412, + "learning_rate": 4.891314847267447e-06, + "loss": 0.5867, + "step": 6282 + }, + { + "epoch": 0.5735804272411904, + "grad_norm": 0.5041881799697876, + "learning_rate": 4.891279939732122e-06, + "loss": 0.5574, + "step": 6283 + }, + { + "epoch": 0.573671718093847, + "grad_norm": 0.4947059452533722, + "learning_rate": 4.891245026716492e-06, + "loss": 0.551, + "step": 6284 + }, + { + "epoch": 0.5737630089465036, + "grad_norm": 0.5062435269355774, + "learning_rate": 4.891210108220637e-06, + "loss": 0.5357, + "step": 6285 + }, + { + "epoch": 0.5738542997991601, + "grad_norm": 0.46365857124328613, + "learning_rate": 4.891175184244636e-06, + "loss": 0.5603, + "step": 6286 + }, + { + "epoch": 0.5739455906518167, + "grad_norm": 0.5477520823478699, + "learning_rate": 4.89114025478857e-06, + "loss": 0.5149, + "step": 6287 + }, + { + "epoch": 0.5740368815044733, + "grad_norm": 0.4935694932937622, + "learning_rate": 4.891105319852519e-06, + "loss": 0.5737, + "step": 6288 + }, + { + "epoch": 0.5741281723571298, + "grad_norm": 0.4521614909172058, + "learning_rate": 4.891070379436562e-06, + "loss": 0.5858, + "step": 6289 + }, + { + "epoch": 0.5742194632097863, + "grad_norm": 0.4587136209011078, + "learning_rate": 4.891035433540779e-06, + "loss": 0.5789, + "step": 6290 + }, + { + "epoch": 0.5743107540624429, + "grad_norm": 0.482604444026947, + "learning_rate": 4.891000482165252e-06, + "loss": 0.6138, + "step": 6291 + }, + { + "epoch": 0.5744020449150995, + "grad_norm": 0.4491262137889862, + "learning_rate": 4.89096552531006e-06, + "loss": 0.5964, + "step": 6292 + }, + { + "epoch": 0.5744933357677561, + "grad_norm": 0.46072423458099365, + "learning_rate": 4.890930562975283e-06, + "loss": 0.5749, + "step": 6293 + }, + { + "epoch": 0.5745846266204127, + "grad_norm": 0.47392070293426514, + "learning_rate": 4.890895595161e-06, + "loss": 0.5323, + "step": 6294 + }, + { + "epoch": 0.5746759174730692, + "grad_norm": 0.45849373936653137, + "learning_rate": 4.890860621867294e-06, + "loss": 0.5673, + "step": 6295 + }, + { + "epoch": 0.5747672083257258, + "grad_norm": 0.46308085322380066, + "learning_rate": 4.890825643094243e-06, + "loss": 0.5879, + "step": 6296 + }, + { + "epoch": 0.5748584991783823, + "grad_norm": 0.4566514790058136, + "learning_rate": 4.8907906588419275e-06, + "loss": 0.5551, + "step": 6297 + }, + { + "epoch": 0.5749497900310389, + "grad_norm": 0.496711790561676, + "learning_rate": 4.8907556691104275e-06, + "loss": 0.5358, + "step": 6298 + }, + { + "epoch": 0.5750410808836954, + "grad_norm": 0.4695892333984375, + "learning_rate": 4.890720673899824e-06, + "loss": 0.5988, + "step": 6299 + }, + { + "epoch": 0.575132371736352, + "grad_norm": 0.4597875773906708, + "learning_rate": 4.890685673210197e-06, + "loss": 0.5548, + "step": 6300 + }, + { + "epoch": 0.5752236625890086, + "grad_norm": 0.4803890585899353, + "learning_rate": 4.890650667041625e-06, + "loss": 0.5923, + "step": 6301 + }, + { + "epoch": 0.5753149534416652, + "grad_norm": 0.5036994814872742, + "learning_rate": 4.8906156553941905e-06, + "loss": 0.5747, + "step": 6302 + }, + { + "epoch": 0.5754062442943217, + "grad_norm": 0.4733734130859375, + "learning_rate": 4.890580638267973e-06, + "loss": 0.5615, + "step": 6303 + }, + { + "epoch": 0.5754975351469783, + "grad_norm": 0.48222070932388306, + "learning_rate": 4.890545615663053e-06, + "loss": 0.5517, + "step": 6304 + }, + { + "epoch": 0.5755888259996348, + "grad_norm": 0.47945860028266907, + "learning_rate": 4.89051058757951e-06, + "loss": 0.5427, + "step": 6305 + }, + { + "epoch": 0.5756801168522914, + "grad_norm": 0.5126798152923584, + "learning_rate": 4.890475554017425e-06, + "loss": 0.4883, + "step": 6306 + }, + { + "epoch": 0.575771407704948, + "grad_norm": 0.4769987165927887, + "learning_rate": 4.890440514976877e-06, + "loss": 0.6059, + "step": 6307 + }, + { + "epoch": 0.5758626985576045, + "grad_norm": 0.47413402795791626, + "learning_rate": 4.890405470457949e-06, + "loss": 0.5854, + "step": 6308 + }, + { + "epoch": 0.5759539894102611, + "grad_norm": 0.46459662914276123, + "learning_rate": 4.890370420460719e-06, + "loss": 0.5465, + "step": 6309 + }, + { + "epoch": 0.5760452802629177, + "grad_norm": 0.478500634431839, + "learning_rate": 4.8903353649852666e-06, + "loss": 0.5673, + "step": 6310 + }, + { + "epoch": 0.5761365711155743, + "grad_norm": 0.4861539304256439, + "learning_rate": 4.890300304031675e-06, + "loss": 0.5862, + "step": 6311 + }, + { + "epoch": 0.5762278619682308, + "grad_norm": 0.47699815034866333, + "learning_rate": 4.8902652376000225e-06, + "loss": 0.5644, + "step": 6312 + }, + { + "epoch": 0.5763191528208873, + "grad_norm": 0.4582294821739197, + "learning_rate": 4.89023016569039e-06, + "loss": 0.5602, + "step": 6313 + }, + { + "epoch": 0.5764104436735439, + "grad_norm": 0.4675796926021576, + "learning_rate": 4.890195088302857e-06, + "loss": 0.5626, + "step": 6314 + }, + { + "epoch": 0.5765017345262005, + "grad_norm": 0.5127478241920471, + "learning_rate": 4.890160005437506e-06, + "loss": 0.5284, + "step": 6315 + }, + { + "epoch": 0.576593025378857, + "grad_norm": 0.48623624444007874, + "learning_rate": 4.890124917094415e-06, + "loss": 0.5242, + "step": 6316 + }, + { + "epoch": 0.5766843162315136, + "grad_norm": 0.4802256226539612, + "learning_rate": 4.890089823273667e-06, + "loss": 0.5595, + "step": 6317 + }, + { + "epoch": 0.5767756070841702, + "grad_norm": 0.44104623794555664, + "learning_rate": 4.890054723975341e-06, + "loss": 0.5638, + "step": 6318 + }, + { + "epoch": 0.5768668979368268, + "grad_norm": 0.5077009797096252, + "learning_rate": 4.890019619199516e-06, + "loss": 0.5325, + "step": 6319 + }, + { + "epoch": 0.5769581887894832, + "grad_norm": 0.4960675835609436, + "learning_rate": 4.889984508946275e-06, + "loss": 0.5481, + "step": 6320 + }, + { + "epoch": 0.5770494796421398, + "grad_norm": 0.4782307744026184, + "learning_rate": 4.889949393215696e-06, + "loss": 0.5673, + "step": 6321 + }, + { + "epoch": 0.5771407704947964, + "grad_norm": 0.49734416604042053, + "learning_rate": 4.889914272007862e-06, + "loss": 0.5653, + "step": 6322 + }, + { + "epoch": 0.577232061347453, + "grad_norm": 0.5023691058158875, + "learning_rate": 4.889879145322852e-06, + "loss": 0.561, + "step": 6323 + }, + { + "epoch": 0.5773233522001096, + "grad_norm": 0.47628939151763916, + "learning_rate": 4.889844013160747e-06, + "loss": 0.5798, + "step": 6324 + }, + { + "epoch": 0.5774146430527661, + "grad_norm": 0.49037817120552063, + "learning_rate": 4.889808875521628e-06, + "loss": 0.5405, + "step": 6325 + }, + { + "epoch": 0.5775059339054227, + "grad_norm": 0.45529648661613464, + "learning_rate": 4.889773732405574e-06, + "loss": 0.5954, + "step": 6326 + }, + { + "epoch": 0.5775972247580793, + "grad_norm": 0.46859627962112427, + "learning_rate": 4.889738583812666e-06, + "loss": 0.5713, + "step": 6327 + }, + { + "epoch": 0.5776885156107358, + "grad_norm": 0.4712583124637604, + "learning_rate": 4.8897034297429855e-06, + "loss": 0.6018, + "step": 6328 + }, + { + "epoch": 0.5777798064633923, + "grad_norm": 0.48213309049606323, + "learning_rate": 4.889668270196613e-06, + "loss": 0.5473, + "step": 6329 + }, + { + "epoch": 0.5778710973160489, + "grad_norm": 0.4892452359199524, + "learning_rate": 4.889633105173628e-06, + "loss": 0.5379, + "step": 6330 + }, + { + "epoch": 0.5779623881687055, + "grad_norm": 0.47850003838539124, + "learning_rate": 4.889597934674112e-06, + "loss": 0.4883, + "step": 6331 + }, + { + "epoch": 0.5780536790213621, + "grad_norm": 0.46686574816703796, + "learning_rate": 4.889562758698144e-06, + "loss": 0.5521, + "step": 6332 + }, + { + "epoch": 0.5781449698740186, + "grad_norm": 0.4700627326965332, + "learning_rate": 4.889527577245807e-06, + "loss": 0.5906, + "step": 6333 + }, + { + "epoch": 0.5782362607266752, + "grad_norm": 0.4917871654033661, + "learning_rate": 4.889492390317181e-06, + "loss": 0.547, + "step": 6334 + }, + { + "epoch": 0.5783275515793318, + "grad_norm": 0.47208890318870544, + "learning_rate": 4.889457197912346e-06, + "loss": 0.5702, + "step": 6335 + }, + { + "epoch": 0.5784188424319883, + "grad_norm": 0.4815394878387451, + "learning_rate": 4.889422000031383e-06, + "loss": 0.6014, + "step": 6336 + }, + { + "epoch": 0.5785101332846448, + "grad_norm": 0.49691691994667053, + "learning_rate": 4.889386796674371e-06, + "loss": 0.5632, + "step": 6337 + }, + { + "epoch": 0.5786014241373014, + "grad_norm": 0.4736238121986389, + "learning_rate": 4.889351587841394e-06, + "loss": 0.5759, + "step": 6338 + }, + { + "epoch": 0.578692714989958, + "grad_norm": 0.46455118060112, + "learning_rate": 4.88931637353253e-06, + "loss": 0.6363, + "step": 6339 + }, + { + "epoch": 0.5787840058426146, + "grad_norm": 0.45997846126556396, + "learning_rate": 4.889281153747861e-06, + "loss": 0.5502, + "step": 6340 + }, + { + "epoch": 0.5788752966952712, + "grad_norm": 0.46371331810951233, + "learning_rate": 4.889245928487467e-06, + "loss": 0.5571, + "step": 6341 + }, + { + "epoch": 0.5789665875479277, + "grad_norm": 0.47908613085746765, + "learning_rate": 4.889210697751429e-06, + "loss": 0.551, + "step": 6342 + }, + { + "epoch": 0.5790578784005843, + "grad_norm": 0.45220357179641724, + "learning_rate": 4.889175461539828e-06, + "loss": 0.5918, + "step": 6343 + }, + { + "epoch": 0.5791491692532408, + "grad_norm": 0.48971909284591675, + "learning_rate": 4.8891402198527434e-06, + "loss": 0.5605, + "step": 6344 + }, + { + "epoch": 0.5792404601058974, + "grad_norm": 0.4666488766670227, + "learning_rate": 4.889104972690258e-06, + "loss": 0.5714, + "step": 6345 + }, + { + "epoch": 0.5793317509585539, + "grad_norm": 0.4551328420639038, + "learning_rate": 4.889069720052452e-06, + "loss": 0.6046, + "step": 6346 + }, + { + "epoch": 0.5794230418112105, + "grad_norm": 0.4951125681400299, + "learning_rate": 4.889034461939406e-06, + "loss": 0.5848, + "step": 6347 + }, + { + "epoch": 0.5795143326638671, + "grad_norm": 0.4938131272792816, + "learning_rate": 4.8889991983512e-06, + "loss": 0.5754, + "step": 6348 + }, + { + "epoch": 0.5796056235165237, + "grad_norm": 0.5201777815818787, + "learning_rate": 4.888963929287916e-06, + "loss": 0.5243, + "step": 6349 + }, + { + "epoch": 0.5796969143691802, + "grad_norm": 0.49896809458732605, + "learning_rate": 4.888928654749634e-06, + "loss": 0.5739, + "step": 6350 + }, + { + "epoch": 0.5797882052218367, + "grad_norm": 0.447409987449646, + "learning_rate": 4.888893374736436e-06, + "loss": 0.5704, + "step": 6351 + }, + { + "epoch": 0.5798794960744933, + "grad_norm": 0.4701460003852844, + "learning_rate": 4.8888580892484e-06, + "loss": 0.5171, + "step": 6352 + }, + { + "epoch": 0.5799707869271499, + "grad_norm": 0.4921683669090271, + "learning_rate": 4.88882279828561e-06, + "loss": 0.568, + "step": 6353 + }, + { + "epoch": 0.5800620777798065, + "grad_norm": 0.4921981394290924, + "learning_rate": 4.888787501848146e-06, + "loss": 0.5598, + "step": 6354 + }, + { + "epoch": 0.580153368632463, + "grad_norm": 0.48647117614746094, + "learning_rate": 4.888752199936088e-06, + "loss": 0.5321, + "step": 6355 + }, + { + "epoch": 0.5802446594851196, + "grad_norm": 0.4701475203037262, + "learning_rate": 4.888716892549518e-06, + "loss": 0.5707, + "step": 6356 + }, + { + "epoch": 0.5803359503377762, + "grad_norm": 0.5115872621536255, + "learning_rate": 4.888681579688517e-06, + "loss": 0.549, + "step": 6357 + }, + { + "epoch": 0.5804272411904328, + "grad_norm": 0.45856648683547974, + "learning_rate": 4.8886462613531645e-06, + "loss": 0.5844, + "step": 6358 + }, + { + "epoch": 0.5805185320430892, + "grad_norm": 0.46331286430358887, + "learning_rate": 4.888610937543542e-06, + "loss": 0.5997, + "step": 6359 + }, + { + "epoch": 0.5806098228957458, + "grad_norm": 0.48425477743148804, + "learning_rate": 4.888575608259732e-06, + "loss": 0.5287, + "step": 6360 + }, + { + "epoch": 0.5807011137484024, + "grad_norm": 0.44419190287590027, + "learning_rate": 4.888540273501813e-06, + "loss": 0.5721, + "step": 6361 + }, + { + "epoch": 0.580792404601059, + "grad_norm": 0.45930010080337524, + "learning_rate": 4.888504933269869e-06, + "loss": 0.6108, + "step": 6362 + }, + { + "epoch": 0.5808836954537155, + "grad_norm": 0.43813657760620117, + "learning_rate": 4.888469587563978e-06, + "loss": 0.5678, + "step": 6363 + }, + { + "epoch": 0.5809749863063721, + "grad_norm": 0.4547361731529236, + "learning_rate": 4.8884342363842215e-06, + "loss": 0.5677, + "step": 6364 + }, + { + "epoch": 0.5810662771590287, + "grad_norm": 0.48745375871658325, + "learning_rate": 4.888398879730682e-06, + "loss": 0.5529, + "step": 6365 + }, + { + "epoch": 0.5811575680116853, + "grad_norm": 0.5224339365959167, + "learning_rate": 4.888363517603441e-06, + "loss": 0.5286, + "step": 6366 + }, + { + "epoch": 0.5812488588643417, + "grad_norm": 0.5126514434814453, + "learning_rate": 4.888328150002577e-06, + "loss": 0.5412, + "step": 6367 + }, + { + "epoch": 0.5813401497169983, + "grad_norm": 0.4910336136817932, + "learning_rate": 4.888292776928172e-06, + "loss": 0.5256, + "step": 6368 + }, + { + "epoch": 0.5814314405696549, + "grad_norm": 0.46711641550064087, + "learning_rate": 4.888257398380309e-06, + "loss": 0.5572, + "step": 6369 + }, + { + "epoch": 0.5815227314223115, + "grad_norm": 0.47592517733573914, + "learning_rate": 4.888222014359065e-06, + "loss": 0.586, + "step": 6370 + }, + { + "epoch": 0.5816140222749681, + "grad_norm": 0.46935781836509705, + "learning_rate": 4.888186624864526e-06, + "loss": 0.5557, + "step": 6371 + }, + { + "epoch": 0.5817053131276246, + "grad_norm": 0.4974008798599243, + "learning_rate": 4.888151229896769e-06, + "loss": 0.56, + "step": 6372 + }, + { + "epoch": 0.5817966039802812, + "grad_norm": 0.45339876413345337, + "learning_rate": 4.888115829455879e-06, + "loss": 0.573, + "step": 6373 + }, + { + "epoch": 0.5818878948329378, + "grad_norm": 0.45758071541786194, + "learning_rate": 4.888080423541933e-06, + "loss": 0.6035, + "step": 6374 + }, + { + "epoch": 0.5819791856855943, + "grad_norm": 0.5028326511383057, + "learning_rate": 4.888045012155015e-06, + "loss": 0.5493, + "step": 6375 + }, + { + "epoch": 0.5820704765382508, + "grad_norm": 0.45866167545318604, + "learning_rate": 4.888009595295205e-06, + "loss": 0.5773, + "step": 6376 + }, + { + "epoch": 0.5821617673909074, + "grad_norm": 0.4778866767883301, + "learning_rate": 4.887974172962584e-06, + "loss": 0.6083, + "step": 6377 + }, + { + "epoch": 0.582253058243564, + "grad_norm": 0.472395122051239, + "learning_rate": 4.887938745157234e-06, + "loss": 0.5538, + "step": 6378 + }, + { + "epoch": 0.5823443490962206, + "grad_norm": 0.47186392545700073, + "learning_rate": 4.887903311879236e-06, + "loss": 0.5736, + "step": 6379 + }, + { + "epoch": 0.5824356399488771, + "grad_norm": 0.4532587230205536, + "learning_rate": 4.887867873128671e-06, + "loss": 0.5921, + "step": 6380 + }, + { + "epoch": 0.5825269308015337, + "grad_norm": 0.47421228885650635, + "learning_rate": 4.887832428905621e-06, + "loss": 0.5634, + "step": 6381 + }, + { + "epoch": 0.5826182216541903, + "grad_norm": 0.46503937244415283, + "learning_rate": 4.887796979210165e-06, + "loss": 0.5663, + "step": 6382 + }, + { + "epoch": 0.5827095125068468, + "grad_norm": 0.47915542125701904, + "learning_rate": 4.887761524042387e-06, + "loss": 0.6008, + "step": 6383 + }, + { + "epoch": 0.5828008033595033, + "grad_norm": 0.45217546820640564, + "learning_rate": 4.887726063402366e-06, + "loss": 0.5852, + "step": 6384 + }, + { + "epoch": 0.5828920942121599, + "grad_norm": 0.49964699149131775, + "learning_rate": 4.8876905972901845e-06, + "loss": 0.4961, + "step": 6385 + }, + { + "epoch": 0.5829833850648165, + "grad_norm": 0.48936378955841064, + "learning_rate": 4.887655125705924e-06, + "loss": 0.539, + "step": 6386 + }, + { + "epoch": 0.5830746759174731, + "grad_norm": 0.48570138216018677, + "learning_rate": 4.887619648649664e-06, + "loss": 0.56, + "step": 6387 + }, + { + "epoch": 0.5831659667701297, + "grad_norm": 0.5179204344749451, + "learning_rate": 4.887584166121488e-06, + "loss": 0.5851, + "step": 6388 + }, + { + "epoch": 0.5832572576227862, + "grad_norm": 0.4735226631164551, + "learning_rate": 4.887548678121476e-06, + "loss": 0.5817, + "step": 6389 + }, + { + "epoch": 0.5833485484754427, + "grad_norm": 0.5094680190086365, + "learning_rate": 4.887513184649711e-06, + "loss": 0.5308, + "step": 6390 + }, + { + "epoch": 0.5834398393280993, + "grad_norm": 0.48344558477401733, + "learning_rate": 4.887477685706273e-06, + "loss": 0.559, + "step": 6391 + }, + { + "epoch": 0.5835311301807559, + "grad_norm": 0.48303043842315674, + "learning_rate": 4.887442181291242e-06, + "loss": 0.5252, + "step": 6392 + }, + { + "epoch": 0.5836224210334124, + "grad_norm": 0.4934850037097931, + "learning_rate": 4.8874066714047015e-06, + "loss": 0.5401, + "step": 6393 + }, + { + "epoch": 0.583713711886069, + "grad_norm": 0.492668092250824, + "learning_rate": 4.887371156046732e-06, + "loss": 0.5376, + "step": 6394 + }, + { + "epoch": 0.5838050027387256, + "grad_norm": 0.46573591232299805, + "learning_rate": 4.887335635217416e-06, + "loss": 0.6085, + "step": 6395 + }, + { + "epoch": 0.5838962935913822, + "grad_norm": 0.49251505732536316, + "learning_rate": 4.8873001089168324e-06, + "loss": 0.5299, + "step": 6396 + }, + { + "epoch": 0.5839875844440388, + "grad_norm": 0.48749345541000366, + "learning_rate": 4.887264577145066e-06, + "loss": 0.5853, + "step": 6397 + }, + { + "epoch": 0.5840788752966952, + "grad_norm": 0.45482906699180603, + "learning_rate": 4.8872290399021955e-06, + "loss": 0.5439, + "step": 6398 + }, + { + "epoch": 0.5841701661493518, + "grad_norm": 0.4976540803909302, + "learning_rate": 4.887193497188303e-06, + "loss": 0.5409, + "step": 6399 + }, + { + "epoch": 0.5842614570020084, + "grad_norm": 0.4500160813331604, + "learning_rate": 4.887157949003471e-06, + "loss": 0.5555, + "step": 6400 + }, + { + "epoch": 0.584352747854665, + "grad_norm": 0.5162805318832397, + "learning_rate": 4.88712239534778e-06, + "loss": 0.5269, + "step": 6401 + }, + { + "epoch": 0.5844440387073215, + "grad_norm": 0.4487191140651703, + "learning_rate": 4.887086836221312e-06, + "loss": 0.6205, + "step": 6402 + }, + { + "epoch": 0.5845353295599781, + "grad_norm": 0.4939354658126831, + "learning_rate": 4.887051271624147e-06, + "loss": 0.5248, + "step": 6403 + }, + { + "epoch": 0.5846266204126347, + "grad_norm": 0.4808797538280487, + "learning_rate": 4.887015701556369e-06, + "loss": 0.5653, + "step": 6404 + }, + { + "epoch": 0.5847179112652913, + "grad_norm": 0.44644609093666077, + "learning_rate": 4.886980126018058e-06, + "loss": 0.6153, + "step": 6405 + }, + { + "epoch": 0.5848092021179477, + "grad_norm": 0.5013085603713989, + "learning_rate": 4.886944545009295e-06, + "loss": 0.5445, + "step": 6406 + }, + { + "epoch": 0.5849004929706043, + "grad_norm": 0.4678618013858795, + "learning_rate": 4.886908958530163e-06, + "loss": 0.5592, + "step": 6407 + }, + { + "epoch": 0.5849917838232609, + "grad_norm": 0.48802268505096436, + "learning_rate": 4.8868733665807435e-06, + "loss": 0.5562, + "step": 6408 + }, + { + "epoch": 0.5850830746759175, + "grad_norm": 0.45609980821609497, + "learning_rate": 4.886837769161116e-06, + "loss": 0.5661, + "step": 6409 + }, + { + "epoch": 0.585174365528574, + "grad_norm": 0.4963986575603485, + "learning_rate": 4.886802166271365e-06, + "loss": 0.5144, + "step": 6410 + }, + { + "epoch": 0.5852656563812306, + "grad_norm": 0.48400959372520447, + "learning_rate": 4.886766557911569e-06, + "loss": 0.5368, + "step": 6411 + }, + { + "epoch": 0.5853569472338872, + "grad_norm": 0.46047312021255493, + "learning_rate": 4.886730944081812e-06, + "loss": 0.5254, + "step": 6412 + }, + { + "epoch": 0.5854482380865438, + "grad_norm": 0.4786953032016754, + "learning_rate": 4.886695324782175e-06, + "loss": 0.5703, + "step": 6413 + }, + { + "epoch": 0.5855395289392002, + "grad_norm": 0.4665334224700928, + "learning_rate": 4.8866597000127395e-06, + "loss": 0.563, + "step": 6414 + }, + { + "epoch": 0.5856308197918568, + "grad_norm": 0.49572205543518066, + "learning_rate": 4.886624069773587e-06, + "loss": 0.5032, + "step": 6415 + }, + { + "epoch": 0.5857221106445134, + "grad_norm": 0.5268253087997437, + "learning_rate": 4.886588434064799e-06, + "loss": 0.5154, + "step": 6416 + }, + { + "epoch": 0.58581340149717, + "grad_norm": 0.510089635848999, + "learning_rate": 4.886552792886458e-06, + "loss": 0.5239, + "step": 6417 + }, + { + "epoch": 0.5859046923498266, + "grad_norm": 0.5015285015106201, + "learning_rate": 4.886517146238644e-06, + "loss": 0.5456, + "step": 6418 + }, + { + "epoch": 0.5859959832024831, + "grad_norm": 0.4884319305419922, + "learning_rate": 4.886481494121441e-06, + "loss": 0.5725, + "step": 6419 + }, + { + "epoch": 0.5860872740551397, + "grad_norm": 0.498200923204422, + "learning_rate": 4.886445836534929e-06, + "loss": 0.5636, + "step": 6420 + }, + { + "epoch": 0.5861785649077962, + "grad_norm": 0.4690415859222412, + "learning_rate": 4.88641017347919e-06, + "loss": 0.564, + "step": 6421 + }, + { + "epoch": 0.5862698557604528, + "grad_norm": 0.471091091632843, + "learning_rate": 4.8863745049543065e-06, + "loss": 0.5842, + "step": 6422 + }, + { + "epoch": 0.5863611466131093, + "grad_norm": 0.4523061215877533, + "learning_rate": 4.886338830960359e-06, + "loss": 0.5442, + "step": 6423 + }, + { + "epoch": 0.5864524374657659, + "grad_norm": 0.4827166497707367, + "learning_rate": 4.8863031514974305e-06, + "loss": 0.5608, + "step": 6424 + }, + { + "epoch": 0.5865437283184225, + "grad_norm": 0.459610253572464, + "learning_rate": 4.8862674665656025e-06, + "loss": 0.5985, + "step": 6425 + }, + { + "epoch": 0.5866350191710791, + "grad_norm": 0.4822022318840027, + "learning_rate": 4.886231776164956e-06, + "loss": 0.5604, + "step": 6426 + }, + { + "epoch": 0.5867263100237357, + "grad_norm": 0.46641218662261963, + "learning_rate": 4.886196080295574e-06, + "loss": 0.618, + "step": 6427 + }, + { + "epoch": 0.5868176008763922, + "grad_norm": 0.4883388578891754, + "learning_rate": 4.886160378957537e-06, + "loss": 0.5509, + "step": 6428 + }, + { + "epoch": 0.5869088917290487, + "grad_norm": 0.48901161551475525, + "learning_rate": 4.886124672150928e-06, + "loss": 0.5405, + "step": 6429 + }, + { + "epoch": 0.5870001825817053, + "grad_norm": 0.48492419719696045, + "learning_rate": 4.886088959875828e-06, + "loss": 0.5532, + "step": 6430 + }, + { + "epoch": 0.5870914734343619, + "grad_norm": 0.49255067110061646, + "learning_rate": 4.8860532421323185e-06, + "loss": 0.5455, + "step": 6431 + }, + { + "epoch": 0.5871827642870184, + "grad_norm": 0.4880709648132324, + "learning_rate": 4.886017518920483e-06, + "loss": 0.5525, + "step": 6432 + }, + { + "epoch": 0.587274055139675, + "grad_norm": 0.5019940137863159, + "learning_rate": 4.885981790240402e-06, + "loss": 0.5517, + "step": 6433 + }, + { + "epoch": 0.5873653459923316, + "grad_norm": 0.4910981059074402, + "learning_rate": 4.885946056092157e-06, + "loss": 0.5434, + "step": 6434 + }, + { + "epoch": 0.5874566368449882, + "grad_norm": 0.4973057508468628, + "learning_rate": 4.885910316475832e-06, + "loss": 0.5622, + "step": 6435 + }, + { + "epoch": 0.5875479276976447, + "grad_norm": 0.45952120423316956, + "learning_rate": 4.885874571391506e-06, + "loss": 0.5794, + "step": 6436 + }, + { + "epoch": 0.5876392185503012, + "grad_norm": 0.48942604660987854, + "learning_rate": 4.885838820839263e-06, + "loss": 0.5515, + "step": 6437 + }, + { + "epoch": 0.5877305094029578, + "grad_norm": 0.48265016078948975, + "learning_rate": 4.885803064819184e-06, + "loss": 0.5622, + "step": 6438 + }, + { + "epoch": 0.5878218002556144, + "grad_norm": 0.503254771232605, + "learning_rate": 4.885767303331352e-06, + "loss": 0.5611, + "step": 6439 + }, + { + "epoch": 0.587913091108271, + "grad_norm": 0.455854594707489, + "learning_rate": 4.8857315363758486e-06, + "loss": 0.61, + "step": 6440 + }, + { + "epoch": 0.5880043819609275, + "grad_norm": 0.48871222138404846, + "learning_rate": 4.885695763952755e-06, + "loss": 0.5625, + "step": 6441 + }, + { + "epoch": 0.5880956728135841, + "grad_norm": 0.48213493824005127, + "learning_rate": 4.8856599860621536e-06, + "loss": 0.604, + "step": 6442 + }, + { + "epoch": 0.5881869636662407, + "grad_norm": 0.45898258686065674, + "learning_rate": 4.885624202704126e-06, + "loss": 0.5674, + "step": 6443 + }, + { + "epoch": 0.5882782545188973, + "grad_norm": 0.5214084386825562, + "learning_rate": 4.8855884138787565e-06, + "loss": 0.5544, + "step": 6444 + }, + { + "epoch": 0.5883695453715537, + "grad_norm": 0.48941129446029663, + "learning_rate": 4.8855526195861235e-06, + "loss": 0.5794, + "step": 6445 + }, + { + "epoch": 0.5884608362242103, + "grad_norm": 0.4638481140136719, + "learning_rate": 4.885516819826311e-06, + "loss": 0.5966, + "step": 6446 + }, + { + "epoch": 0.5885521270768669, + "grad_norm": 0.4717240333557129, + "learning_rate": 4.885481014599401e-06, + "loss": 0.5761, + "step": 6447 + }, + { + "epoch": 0.5886434179295235, + "grad_norm": 0.4949371814727783, + "learning_rate": 4.885445203905476e-06, + "loss": 0.5692, + "step": 6448 + }, + { + "epoch": 0.58873470878218, + "grad_norm": 0.4667579233646393, + "learning_rate": 4.8854093877446175e-06, + "loss": 0.5318, + "step": 6449 + }, + { + "epoch": 0.5888259996348366, + "grad_norm": 0.46180304884910583, + "learning_rate": 4.8853735661169065e-06, + "loss": 0.5833, + "step": 6450 + }, + { + "epoch": 0.5889172904874932, + "grad_norm": 0.4927980899810791, + "learning_rate": 4.885337739022427e-06, + "loss": 0.5755, + "step": 6451 + }, + { + "epoch": 0.5890085813401497, + "grad_norm": 0.47749295830726624, + "learning_rate": 4.88530190646126e-06, + "loss": 0.5621, + "step": 6452 + }, + { + "epoch": 0.5890998721928062, + "grad_norm": 0.48452427983283997, + "learning_rate": 4.8852660684334875e-06, + "loss": 0.6179, + "step": 6453 + }, + { + "epoch": 0.5891911630454628, + "grad_norm": 0.47409436106681824, + "learning_rate": 4.8852302249391925e-06, + "loss": 0.6084, + "step": 6454 + }, + { + "epoch": 0.5892824538981194, + "grad_norm": 0.501957893371582, + "learning_rate": 4.885194375978457e-06, + "loss": 0.5507, + "step": 6455 + }, + { + "epoch": 0.589373744750776, + "grad_norm": 0.4941402077674866, + "learning_rate": 4.885158521551362e-06, + "loss": 0.5559, + "step": 6456 + }, + { + "epoch": 0.5894650356034326, + "grad_norm": 0.45545849204063416, + "learning_rate": 4.885122661657991e-06, + "loss": 0.5607, + "step": 6457 + }, + { + "epoch": 0.5895563264560891, + "grad_norm": 0.504289448261261, + "learning_rate": 4.885086796298426e-06, + "loss": 0.5619, + "step": 6458 + }, + { + "epoch": 0.5896476173087457, + "grad_norm": 0.4746076762676239, + "learning_rate": 4.885050925472748e-06, + "loss": 0.5892, + "step": 6459 + }, + { + "epoch": 0.5897389081614022, + "grad_norm": 0.52008056640625, + "learning_rate": 4.88501504918104e-06, + "loss": 0.5461, + "step": 6460 + }, + { + "epoch": 0.5898301990140588, + "grad_norm": 0.46087998151779175, + "learning_rate": 4.884979167423385e-06, + "loss": 0.555, + "step": 6461 + }, + { + "epoch": 0.5899214898667153, + "grad_norm": 0.49925151467323303, + "learning_rate": 4.884943280199865e-06, + "loss": 0.5606, + "step": 6462 + }, + { + "epoch": 0.5900127807193719, + "grad_norm": 0.48522260785102844, + "learning_rate": 4.88490738751056e-06, + "loss": 0.5567, + "step": 6463 + }, + { + "epoch": 0.5901040715720285, + "grad_norm": 0.45952874422073364, + "learning_rate": 4.884871489355556e-06, + "loss": 0.568, + "step": 6464 + }, + { + "epoch": 0.5901953624246851, + "grad_norm": 0.4719011187553406, + "learning_rate": 4.884835585734932e-06, + "loss": 0.5801, + "step": 6465 + }, + { + "epoch": 0.5902866532773416, + "grad_norm": 0.48330217599868774, + "learning_rate": 4.8847996766487725e-06, + "loss": 0.554, + "step": 6466 + }, + { + "epoch": 0.5903779441299982, + "grad_norm": 0.4820176064968109, + "learning_rate": 4.884763762097158e-06, + "loss": 0.5396, + "step": 6467 + }, + { + "epoch": 0.5904692349826547, + "grad_norm": 0.4772416055202484, + "learning_rate": 4.8847278420801725e-06, + "loss": 0.5734, + "step": 6468 + }, + { + "epoch": 0.5905605258353113, + "grad_norm": 0.5308769345283508, + "learning_rate": 4.884691916597897e-06, + "loss": 0.5164, + "step": 6469 + }, + { + "epoch": 0.5906518166879678, + "grad_norm": 0.5067552924156189, + "learning_rate": 4.884655985650415e-06, + "loss": 0.5193, + "step": 6470 + }, + { + "epoch": 0.5907431075406244, + "grad_norm": 0.5077924132347107, + "learning_rate": 4.884620049237807e-06, + "loss": 0.5246, + "step": 6471 + }, + { + "epoch": 0.590834398393281, + "grad_norm": 0.45320677757263184, + "learning_rate": 4.884584107360157e-06, + "loss": 0.5893, + "step": 6472 + }, + { + "epoch": 0.5909256892459376, + "grad_norm": 0.461190789937973, + "learning_rate": 4.884548160017548e-06, + "loss": 0.561, + "step": 6473 + }, + { + "epoch": 0.5910169800985942, + "grad_norm": 0.45885828137397766, + "learning_rate": 4.88451220721006e-06, + "loss": 0.558, + "step": 6474 + }, + { + "epoch": 0.5911082709512507, + "grad_norm": 0.4699913561344147, + "learning_rate": 4.884476248937777e-06, + "loss": 0.5395, + "step": 6475 + }, + { + "epoch": 0.5911995618039072, + "grad_norm": 0.47465983033180237, + "learning_rate": 4.884440285200781e-06, + "loss": 0.5483, + "step": 6476 + }, + { + "epoch": 0.5912908526565638, + "grad_norm": 0.4911355674266815, + "learning_rate": 4.884404315999156e-06, + "loss": 0.5523, + "step": 6477 + }, + { + "epoch": 0.5913821435092204, + "grad_norm": 0.49031174182891846, + "learning_rate": 4.884368341332981e-06, + "loss": 0.5566, + "step": 6478 + }, + { + "epoch": 0.5914734343618769, + "grad_norm": 0.467117041349411, + "learning_rate": 4.884332361202342e-06, + "loss": 0.5756, + "step": 6479 + }, + { + "epoch": 0.5915647252145335, + "grad_norm": 0.48417651653289795, + "learning_rate": 4.8842963756073184e-06, + "loss": 0.5715, + "step": 6480 + }, + { + "epoch": 0.5916560160671901, + "grad_norm": 0.4995787739753723, + "learning_rate": 4.884260384547995e-06, + "loss": 0.5554, + "step": 6481 + }, + { + "epoch": 0.5917473069198467, + "grad_norm": 0.4764935374259949, + "learning_rate": 4.8842243880244535e-06, + "loss": 0.5803, + "step": 6482 + }, + { + "epoch": 0.5918385977725032, + "grad_norm": 0.5043697357177734, + "learning_rate": 4.884188386036776e-06, + "loss": 0.5598, + "step": 6483 + }, + { + "epoch": 0.5919298886251597, + "grad_norm": 0.46521052718162537, + "learning_rate": 4.884152378585045e-06, + "loss": 0.6032, + "step": 6484 + }, + { + "epoch": 0.5920211794778163, + "grad_norm": 0.4708390533924103, + "learning_rate": 4.884116365669344e-06, + "loss": 0.5758, + "step": 6485 + }, + { + "epoch": 0.5921124703304729, + "grad_norm": 0.4854336082935333, + "learning_rate": 4.884080347289755e-06, + "loss": 0.5563, + "step": 6486 + }, + { + "epoch": 0.5922037611831295, + "grad_norm": 0.4676026701927185, + "learning_rate": 4.88404432344636e-06, + "loss": 0.5628, + "step": 6487 + }, + { + "epoch": 0.592295052035786, + "grad_norm": 0.4393673241138458, + "learning_rate": 4.884008294139242e-06, + "loss": 0.5981, + "step": 6488 + }, + { + "epoch": 0.5923863428884426, + "grad_norm": 0.47297725081443787, + "learning_rate": 4.8839722593684835e-06, + "loss": 0.5753, + "step": 6489 + }, + { + "epoch": 0.5924776337410992, + "grad_norm": 0.46867606043815613, + "learning_rate": 4.883936219134168e-06, + "loss": 0.5325, + "step": 6490 + }, + { + "epoch": 0.5925689245937557, + "grad_norm": 0.49977609515190125, + "learning_rate": 4.883900173436377e-06, + "loss": 0.5261, + "step": 6491 + }, + { + "epoch": 0.5926602154464122, + "grad_norm": 0.46704980731010437, + "learning_rate": 4.8838641222751926e-06, + "loss": 0.5819, + "step": 6492 + }, + { + "epoch": 0.5927515062990688, + "grad_norm": 0.49804919958114624, + "learning_rate": 4.883828065650698e-06, + "loss": 0.5111, + "step": 6493 + }, + { + "epoch": 0.5928427971517254, + "grad_norm": 0.45285043120384216, + "learning_rate": 4.8837920035629775e-06, + "loss": 0.5483, + "step": 6494 + }, + { + "epoch": 0.592934088004382, + "grad_norm": 0.4714106023311615, + "learning_rate": 4.883755936012111e-06, + "loss": 0.5627, + "step": 6495 + }, + { + "epoch": 0.5930253788570385, + "grad_norm": 0.42416325211524963, + "learning_rate": 4.8837198629981815e-06, + "loss": 0.6283, + "step": 6496 + }, + { + "epoch": 0.5931166697096951, + "grad_norm": 0.48940330743789673, + "learning_rate": 4.883683784521275e-06, + "loss": 0.5775, + "step": 6497 + }, + { + "epoch": 0.5932079605623517, + "grad_norm": 0.48339173197746277, + "learning_rate": 4.88364770058147e-06, + "loss": 0.5622, + "step": 6498 + }, + { + "epoch": 0.5932992514150082, + "grad_norm": 0.4918280839920044, + "learning_rate": 4.883611611178852e-06, + "loss": 0.5569, + "step": 6499 + }, + { + "epoch": 0.5933905422676647, + "grad_norm": 0.46016427874565125, + "learning_rate": 4.883575516313502e-06, + "loss": 0.5688, + "step": 6500 + }, + { + "epoch": 0.5934818331203213, + "grad_norm": 0.46248510479927063, + "learning_rate": 4.883539415985504e-06, + "loss": 0.5751, + "step": 6501 + }, + { + "epoch": 0.5935731239729779, + "grad_norm": 0.47789865732192993, + "learning_rate": 4.883503310194939e-06, + "loss": 0.5611, + "step": 6502 + }, + { + "epoch": 0.5936644148256345, + "grad_norm": 0.4491223990917206, + "learning_rate": 4.883467198941893e-06, + "loss": 0.5662, + "step": 6503 + }, + { + "epoch": 0.5937557056782911, + "grad_norm": 0.4752597212791443, + "learning_rate": 4.8834310822264445e-06, + "loss": 0.5931, + "step": 6504 + }, + { + "epoch": 0.5938469965309476, + "grad_norm": 0.48216578364372253, + "learning_rate": 4.883394960048679e-06, + "loss": 0.5609, + "step": 6505 + }, + { + "epoch": 0.5939382873836042, + "grad_norm": 0.5084952116012573, + "learning_rate": 4.883358832408679e-06, + "loss": 0.5417, + "step": 6506 + }, + { + "epoch": 0.5940295782362607, + "grad_norm": 0.47488492727279663, + "learning_rate": 4.883322699306527e-06, + "loss": 0.5742, + "step": 6507 + }, + { + "epoch": 0.5941208690889173, + "grad_norm": 0.479688823223114, + "learning_rate": 4.883286560742306e-06, + "loss": 0.5898, + "step": 6508 + }, + { + "epoch": 0.5942121599415738, + "grad_norm": 0.4379207491874695, + "learning_rate": 4.883250416716098e-06, + "loss": 0.6153, + "step": 6509 + }, + { + "epoch": 0.5943034507942304, + "grad_norm": 0.5127414464950562, + "learning_rate": 4.883214267227987e-06, + "loss": 0.5344, + "step": 6510 + }, + { + "epoch": 0.594394741646887, + "grad_norm": 0.5113755464553833, + "learning_rate": 4.883178112278055e-06, + "loss": 0.5639, + "step": 6511 + }, + { + "epoch": 0.5944860324995436, + "grad_norm": 0.48439326882362366, + "learning_rate": 4.883141951866385e-06, + "loss": 0.5272, + "step": 6512 + }, + { + "epoch": 0.5945773233522001, + "grad_norm": 0.46155640482902527, + "learning_rate": 4.883105785993061e-06, + "loss": 0.5226, + "step": 6513 + }, + { + "epoch": 0.5946686142048567, + "grad_norm": 0.4673139452934265, + "learning_rate": 4.883069614658163e-06, + "loss": 0.569, + "step": 6514 + }, + { + "epoch": 0.5947599050575132, + "grad_norm": 0.43703192472457886, + "learning_rate": 4.883033437861779e-06, + "loss": 0.5891, + "step": 6515 + }, + { + "epoch": 0.5948511959101698, + "grad_norm": 0.45459580421447754, + "learning_rate": 4.882997255603986e-06, + "loss": 0.5393, + "step": 6516 + }, + { + "epoch": 0.5949424867628264, + "grad_norm": 0.47797852754592896, + "learning_rate": 4.882961067884871e-06, + "loss": 0.5595, + "step": 6517 + }, + { + "epoch": 0.5950337776154829, + "grad_norm": 0.4725724458694458, + "learning_rate": 4.882924874704514e-06, + "loss": 0.6275, + "step": 6518 + }, + { + "epoch": 0.5951250684681395, + "grad_norm": 0.48130232095718384, + "learning_rate": 4.882888676063001e-06, + "loss": 0.5341, + "step": 6519 + }, + { + "epoch": 0.5952163593207961, + "grad_norm": 0.4835829734802246, + "learning_rate": 4.882852471960413e-06, + "loss": 0.5506, + "step": 6520 + }, + { + "epoch": 0.5953076501734527, + "grad_norm": 0.4927964210510254, + "learning_rate": 4.882816262396833e-06, + "loss": 0.5668, + "step": 6521 + }, + { + "epoch": 0.5953989410261091, + "grad_norm": 0.4705887734889984, + "learning_rate": 4.882780047372345e-06, + "loss": 0.5985, + "step": 6522 + }, + { + "epoch": 0.5954902318787657, + "grad_norm": 0.4971400499343872, + "learning_rate": 4.882743826887032e-06, + "loss": 0.5618, + "step": 6523 + }, + { + "epoch": 0.5955815227314223, + "grad_norm": 0.48778024315834045, + "learning_rate": 4.882707600940976e-06, + "loss": 0.5873, + "step": 6524 + }, + { + "epoch": 0.5956728135840789, + "grad_norm": 0.47679680585861206, + "learning_rate": 4.88267136953426e-06, + "loss": 0.5707, + "step": 6525 + }, + { + "epoch": 0.5957641044367354, + "grad_norm": 0.47309526801109314, + "learning_rate": 4.882635132666968e-06, + "loss": 0.5647, + "step": 6526 + }, + { + "epoch": 0.595855395289392, + "grad_norm": 0.4513322412967682, + "learning_rate": 4.882598890339182e-06, + "loss": 0.6001, + "step": 6527 + }, + { + "epoch": 0.5959466861420486, + "grad_norm": 0.47170230746269226, + "learning_rate": 4.882562642550987e-06, + "loss": 0.5389, + "step": 6528 + }, + { + "epoch": 0.5960379769947052, + "grad_norm": 0.5043600797653198, + "learning_rate": 4.882526389302463e-06, + "loss": 0.5306, + "step": 6529 + }, + { + "epoch": 0.5961292678473616, + "grad_norm": 0.47087448835372925, + "learning_rate": 4.882490130593695e-06, + "loss": 0.5764, + "step": 6530 + }, + { + "epoch": 0.5962205587000182, + "grad_norm": 0.48038503527641296, + "learning_rate": 4.882453866424767e-06, + "loss": 0.5797, + "step": 6531 + }, + { + "epoch": 0.5963118495526748, + "grad_norm": 0.4970826804637909, + "learning_rate": 4.882417596795761e-06, + "loss": 0.5674, + "step": 6532 + }, + { + "epoch": 0.5964031404053314, + "grad_norm": 0.48446330428123474, + "learning_rate": 4.8823813217067596e-06, + "loss": 0.5588, + "step": 6533 + }, + { + "epoch": 0.596494431257988, + "grad_norm": 0.4878464937210083, + "learning_rate": 4.882345041157846e-06, + "loss": 0.5135, + "step": 6534 + }, + { + "epoch": 0.5965857221106445, + "grad_norm": 0.4720379412174225, + "learning_rate": 4.882308755149104e-06, + "loss": 0.578, + "step": 6535 + }, + { + "epoch": 0.5966770129633011, + "grad_norm": 0.4641881585121155, + "learning_rate": 4.882272463680618e-06, + "loss": 0.5811, + "step": 6536 + }, + { + "epoch": 0.5967683038159577, + "grad_norm": 0.4536941945552826, + "learning_rate": 4.882236166752467e-06, + "loss": 0.593, + "step": 6537 + }, + { + "epoch": 0.5968595946686142, + "grad_norm": 0.4711575210094452, + "learning_rate": 4.882199864364738e-06, + "loss": 0.5938, + "step": 6538 + }, + { + "epoch": 0.5969508855212707, + "grad_norm": 0.4903985261917114, + "learning_rate": 4.882163556517514e-06, + "loss": 0.5824, + "step": 6539 + }, + { + "epoch": 0.5970421763739273, + "grad_norm": 0.4766027629375458, + "learning_rate": 4.8821272432108765e-06, + "loss": 0.5757, + "step": 6540 + }, + { + "epoch": 0.5971334672265839, + "grad_norm": 0.49340125918388367, + "learning_rate": 4.88209092444491e-06, + "loss": 0.5611, + "step": 6541 + }, + { + "epoch": 0.5972247580792405, + "grad_norm": 0.4471976161003113, + "learning_rate": 4.882054600219697e-06, + "loss": 0.6126, + "step": 6542 + }, + { + "epoch": 0.597316048931897, + "grad_norm": 0.4587464928627014, + "learning_rate": 4.882018270535321e-06, + "loss": 0.5568, + "step": 6543 + }, + { + "epoch": 0.5974073397845536, + "grad_norm": 0.46702611446380615, + "learning_rate": 4.881981935391866e-06, + "loss": 0.5692, + "step": 6544 + }, + { + "epoch": 0.5974986306372102, + "grad_norm": 0.4654316306114197, + "learning_rate": 4.881945594789413e-06, + "loss": 0.5742, + "step": 6545 + }, + { + "epoch": 0.5975899214898667, + "grad_norm": 0.48596587777137756, + "learning_rate": 4.881909248728048e-06, + "loss": 0.5568, + "step": 6546 + }, + { + "epoch": 0.5976812123425232, + "grad_norm": 0.4601462483406067, + "learning_rate": 4.8818728972078535e-06, + "loss": 0.576, + "step": 6547 + }, + { + "epoch": 0.5977725031951798, + "grad_norm": 0.499906986951828, + "learning_rate": 4.881836540228911e-06, + "loss": 0.5221, + "step": 6548 + }, + { + "epoch": 0.5978637940478364, + "grad_norm": 0.5080051422119141, + "learning_rate": 4.881800177791307e-06, + "loss": 0.5448, + "step": 6549 + }, + { + "epoch": 0.597955084900493, + "grad_norm": 0.47236722707748413, + "learning_rate": 4.881763809895122e-06, + "loss": 0.5902, + "step": 6550 + }, + { + "epoch": 0.5980463757531496, + "grad_norm": 0.5001646280288696, + "learning_rate": 4.881727436540441e-06, + "loss": 0.5387, + "step": 6551 + }, + { + "epoch": 0.5981376666058061, + "grad_norm": 0.44309815764427185, + "learning_rate": 4.881691057727346e-06, + "loss": 0.5676, + "step": 6552 + }, + { + "epoch": 0.5982289574584626, + "grad_norm": 0.4754754304885864, + "learning_rate": 4.881654673455922e-06, + "loss": 0.5843, + "step": 6553 + }, + { + "epoch": 0.5983202483111192, + "grad_norm": 0.4887717664241791, + "learning_rate": 4.881618283726251e-06, + "loss": 0.5984, + "step": 6554 + }, + { + "epoch": 0.5984115391637758, + "grad_norm": 0.500106155872345, + "learning_rate": 4.881581888538417e-06, + "loss": 0.5713, + "step": 6555 + }, + { + "epoch": 0.5985028300164323, + "grad_norm": 0.4928607642650604, + "learning_rate": 4.881545487892504e-06, + "loss": 0.531, + "step": 6556 + }, + { + "epoch": 0.5985941208690889, + "grad_norm": 0.5352163910865784, + "learning_rate": 4.8815090817885944e-06, + "loss": 0.5623, + "step": 6557 + }, + { + "epoch": 0.5986854117217455, + "grad_norm": 0.45755162835121155, + "learning_rate": 4.881472670226772e-06, + "loss": 0.6074, + "step": 6558 + }, + { + "epoch": 0.5987767025744021, + "grad_norm": 0.4536930322647095, + "learning_rate": 4.88143625320712e-06, + "loss": 0.582, + "step": 6559 + }, + { + "epoch": 0.5988679934270587, + "grad_norm": 0.47520968317985535, + "learning_rate": 4.881399830729722e-06, + "loss": 0.545, + "step": 6560 + }, + { + "epoch": 0.5989592842797151, + "grad_norm": 0.4858224093914032, + "learning_rate": 4.881363402794662e-06, + "loss": 0.6062, + "step": 6561 + }, + { + "epoch": 0.5990505751323717, + "grad_norm": 0.4473394453525543, + "learning_rate": 4.881326969402024e-06, + "loss": 0.5903, + "step": 6562 + }, + { + "epoch": 0.5991418659850283, + "grad_norm": 0.4745802581310272, + "learning_rate": 4.881290530551889e-06, + "loss": 0.5828, + "step": 6563 + }, + { + "epoch": 0.5992331568376849, + "grad_norm": 0.46459150314331055, + "learning_rate": 4.881254086244343e-06, + "loss": 0.5867, + "step": 6564 + }, + { + "epoch": 0.5993244476903414, + "grad_norm": 0.4516219198703766, + "learning_rate": 4.881217636479468e-06, + "loss": 0.5894, + "step": 6565 + }, + { + "epoch": 0.599415738542998, + "grad_norm": 0.5032559633255005, + "learning_rate": 4.881181181257349e-06, + "loss": 0.544, + "step": 6566 + }, + { + "epoch": 0.5995070293956546, + "grad_norm": 0.476102352142334, + "learning_rate": 4.881144720578068e-06, + "loss": 0.5271, + "step": 6567 + }, + { + "epoch": 0.5995983202483112, + "grad_norm": 0.4951307773590088, + "learning_rate": 4.88110825444171e-06, + "loss": 0.5563, + "step": 6568 + }, + { + "epoch": 0.5996896111009676, + "grad_norm": 0.48867273330688477, + "learning_rate": 4.881071782848357e-06, + "loss": 0.6125, + "step": 6569 + }, + { + "epoch": 0.5997809019536242, + "grad_norm": 0.49596887826919556, + "learning_rate": 4.881035305798094e-06, + "loss": 0.5639, + "step": 6570 + }, + { + "epoch": 0.5998721928062808, + "grad_norm": 0.48960182070732117, + "learning_rate": 4.880998823291004e-06, + "loss": 0.5664, + "step": 6571 + }, + { + "epoch": 0.5999634836589374, + "grad_norm": 0.4605914354324341, + "learning_rate": 4.8809623353271705e-06, + "loss": 0.5992, + "step": 6572 + }, + { + "epoch": 0.600054774511594, + "grad_norm": 0.47305938601493835, + "learning_rate": 4.880925841906678e-06, + "loss": 0.588, + "step": 6573 + }, + { + "epoch": 0.6001460653642505, + "grad_norm": 0.49272963404655457, + "learning_rate": 4.880889343029608e-06, + "loss": 0.5688, + "step": 6574 + }, + { + "epoch": 0.6002373562169071, + "grad_norm": 0.47459322214126587, + "learning_rate": 4.880852838696046e-06, + "loss": 0.574, + "step": 6575 + }, + { + "epoch": 0.6003286470695637, + "grad_norm": 0.4833112359046936, + "learning_rate": 4.880816328906075e-06, + "loss": 0.5858, + "step": 6576 + }, + { + "epoch": 0.6004199379222201, + "grad_norm": 0.4928268492221832, + "learning_rate": 4.88077981365978e-06, + "loss": 0.5656, + "step": 6577 + }, + { + "epoch": 0.6005112287748767, + "grad_norm": 0.4787386953830719, + "learning_rate": 4.8807432929572426e-06, + "loss": 0.5869, + "step": 6578 + }, + { + "epoch": 0.6006025196275333, + "grad_norm": 0.4662449061870575, + "learning_rate": 4.880706766798548e-06, + "loss": 0.5993, + "step": 6579 + }, + { + "epoch": 0.6006938104801899, + "grad_norm": 0.46287083625793457, + "learning_rate": 4.8806702351837785e-06, + "loss": 0.5975, + "step": 6580 + }, + { + "epoch": 0.6007851013328465, + "grad_norm": 0.4802215099334717, + "learning_rate": 4.88063369811302e-06, + "loss": 0.5957, + "step": 6581 + }, + { + "epoch": 0.600876392185503, + "grad_norm": 0.45625078678131104, + "learning_rate": 4.880597155586354e-06, + "loss": 0.5535, + "step": 6582 + }, + { + "epoch": 0.6009676830381596, + "grad_norm": 0.515893280506134, + "learning_rate": 4.8805606076038645e-06, + "loss": 0.5817, + "step": 6583 + }, + { + "epoch": 0.6010589738908162, + "grad_norm": 0.4616933763027191, + "learning_rate": 4.880524054165638e-06, + "loss": 0.5539, + "step": 6584 + }, + { + "epoch": 0.6011502647434727, + "grad_norm": 0.46335962414741516, + "learning_rate": 4.880487495271754e-06, + "loss": 0.534, + "step": 6585 + }, + { + "epoch": 0.6012415555961292, + "grad_norm": 0.45977574586868286, + "learning_rate": 4.8804509309223e-06, + "loss": 0.6338, + "step": 6586 + }, + { + "epoch": 0.6013328464487858, + "grad_norm": 0.4500700533390045, + "learning_rate": 4.880414361117357e-06, + "loss": 0.6019, + "step": 6587 + }, + { + "epoch": 0.6014241373014424, + "grad_norm": 0.5046050548553467, + "learning_rate": 4.880377785857011e-06, + "loss": 0.5524, + "step": 6588 + }, + { + "epoch": 0.601515428154099, + "grad_norm": 0.46856218576431274, + "learning_rate": 4.880341205141345e-06, + "loss": 0.5456, + "step": 6589 + }, + { + "epoch": 0.6016067190067556, + "grad_norm": 0.5118604302406311, + "learning_rate": 4.880304618970443e-06, + "loss": 0.545, + "step": 6590 + }, + { + "epoch": 0.6016980098594121, + "grad_norm": 0.49833473563194275, + "learning_rate": 4.880268027344388e-06, + "loss": 0.5779, + "step": 6591 + }, + { + "epoch": 0.6017893007120686, + "grad_norm": 0.4587029218673706, + "learning_rate": 4.880231430263265e-06, + "loss": 0.5756, + "step": 6592 + }, + { + "epoch": 0.6018805915647252, + "grad_norm": 0.4822520613670349, + "learning_rate": 4.880194827727156e-06, + "loss": 0.5858, + "step": 6593 + }, + { + "epoch": 0.6019718824173818, + "grad_norm": 0.4625145196914673, + "learning_rate": 4.8801582197361475e-06, + "loss": 0.5536, + "step": 6594 + }, + { + "epoch": 0.6020631732700383, + "grad_norm": 0.48068657517433167, + "learning_rate": 4.880121606290322e-06, + "loss": 0.5638, + "step": 6595 + }, + { + "epoch": 0.6021544641226949, + "grad_norm": 0.4966445863246918, + "learning_rate": 4.880084987389763e-06, + "loss": 0.5305, + "step": 6596 + }, + { + "epoch": 0.6022457549753515, + "grad_norm": 0.4684041142463684, + "learning_rate": 4.880048363034555e-06, + "loss": 0.5902, + "step": 6597 + }, + { + "epoch": 0.6023370458280081, + "grad_norm": 0.460013747215271, + "learning_rate": 4.880011733224782e-06, + "loss": 0.5682, + "step": 6598 + }, + { + "epoch": 0.6024283366806646, + "grad_norm": 0.48339635133743286, + "learning_rate": 4.879975097960529e-06, + "loss": 0.5819, + "step": 6599 + }, + { + "epoch": 0.6025196275333211, + "grad_norm": 0.4721268117427826, + "learning_rate": 4.879938457241878e-06, + "loss": 0.5865, + "step": 6600 + }, + { + "epoch": 0.6026109183859777, + "grad_norm": 0.46543437242507935, + "learning_rate": 4.8799018110689125e-06, + "loss": 0.5451, + "step": 6601 + }, + { + "epoch": 0.6027022092386343, + "grad_norm": 0.44648823142051697, + "learning_rate": 4.879865159441719e-06, + "loss": 0.591, + "step": 6602 + }, + { + "epoch": 0.6027935000912908, + "grad_norm": 0.5031692385673523, + "learning_rate": 4.87982850236038e-06, + "loss": 0.5572, + "step": 6603 + }, + { + "epoch": 0.6028847909439474, + "grad_norm": 0.4645007848739624, + "learning_rate": 4.87979183982498e-06, + "loss": 0.5586, + "step": 6604 + }, + { + "epoch": 0.602976081796604, + "grad_norm": 0.48214760422706604, + "learning_rate": 4.879755171835603e-06, + "loss": 0.5442, + "step": 6605 + }, + { + "epoch": 0.6030673726492606, + "grad_norm": 0.45928800106048584, + "learning_rate": 4.879718498392332e-06, + "loss": 0.5791, + "step": 6606 + }, + { + "epoch": 0.6031586635019172, + "grad_norm": 0.47456681728363037, + "learning_rate": 4.879681819495252e-06, + "loss": 0.5629, + "step": 6607 + }, + { + "epoch": 0.6032499543545736, + "grad_norm": 0.4956415593624115, + "learning_rate": 4.879645135144448e-06, + "loss": 0.5247, + "step": 6608 + }, + { + "epoch": 0.6033412452072302, + "grad_norm": 0.4872804880142212, + "learning_rate": 4.8796084453400014e-06, + "loss": 0.5273, + "step": 6609 + }, + { + "epoch": 0.6034325360598868, + "grad_norm": 0.4660143554210663, + "learning_rate": 4.879571750082e-06, + "loss": 0.5218, + "step": 6610 + }, + { + "epoch": 0.6035238269125434, + "grad_norm": 0.47248366475105286, + "learning_rate": 4.879535049370524e-06, + "loss": 0.5681, + "step": 6611 + }, + { + "epoch": 0.6036151177651999, + "grad_norm": 0.5217357873916626, + "learning_rate": 4.87949834320566e-06, + "loss": 0.5494, + "step": 6612 + }, + { + "epoch": 0.6037064086178565, + "grad_norm": 0.5180096626281738, + "learning_rate": 4.879461631587491e-06, + "loss": 0.5488, + "step": 6613 + }, + { + "epoch": 0.6037976994705131, + "grad_norm": 0.5026726722717285, + "learning_rate": 4.879424914516101e-06, + "loss": 0.5659, + "step": 6614 + }, + { + "epoch": 0.6038889903231697, + "grad_norm": 0.48060378432273865, + "learning_rate": 4.879388191991576e-06, + "loss": 0.5399, + "step": 6615 + }, + { + "epoch": 0.6039802811758261, + "grad_norm": 0.4905996322631836, + "learning_rate": 4.8793514640139984e-06, + "loss": 0.5831, + "step": 6616 + }, + { + "epoch": 0.6040715720284827, + "grad_norm": 0.48332178592681885, + "learning_rate": 4.879314730583452e-06, + "loss": 0.5388, + "step": 6617 + }, + { + "epoch": 0.6041628628811393, + "grad_norm": 0.46793362498283386, + "learning_rate": 4.879277991700023e-06, + "loss": 0.5677, + "step": 6618 + }, + { + "epoch": 0.6042541537337959, + "grad_norm": 0.494028776884079, + "learning_rate": 4.879241247363794e-06, + "loss": 0.5431, + "step": 6619 + }, + { + "epoch": 0.6043454445864525, + "grad_norm": 0.4762108325958252, + "learning_rate": 4.879204497574849e-06, + "loss": 0.5641, + "step": 6620 + }, + { + "epoch": 0.604436735439109, + "grad_norm": 0.5171505212783813, + "learning_rate": 4.879167742333274e-06, + "loss": 0.5205, + "step": 6621 + }, + { + "epoch": 0.6045280262917656, + "grad_norm": 0.481498122215271, + "learning_rate": 4.879130981639152e-06, + "loss": 0.5881, + "step": 6622 + }, + { + "epoch": 0.6046193171444221, + "grad_norm": 0.465772807598114, + "learning_rate": 4.879094215492567e-06, + "loss": 0.5671, + "step": 6623 + }, + { + "epoch": 0.6047106079970787, + "grad_norm": 0.4752053916454315, + "learning_rate": 4.879057443893603e-06, + "loss": 0.557, + "step": 6624 + }, + { + "epoch": 0.6048018988497352, + "grad_norm": 0.49054965376853943, + "learning_rate": 4.879020666842346e-06, + "loss": 0.5466, + "step": 6625 + }, + { + "epoch": 0.6048931897023918, + "grad_norm": 0.5034470558166504, + "learning_rate": 4.878983884338878e-06, + "loss": 0.5564, + "step": 6626 + }, + { + "epoch": 0.6049844805550484, + "grad_norm": 0.46935346722602844, + "learning_rate": 4.8789470963832855e-06, + "loss": 0.6284, + "step": 6627 + }, + { + "epoch": 0.605075771407705, + "grad_norm": 0.5029718279838562, + "learning_rate": 4.878910302975651e-06, + "loss": 0.5393, + "step": 6628 + }, + { + "epoch": 0.6051670622603615, + "grad_norm": 0.5063959360122681, + "learning_rate": 4.878873504116059e-06, + "loss": 0.5293, + "step": 6629 + }, + { + "epoch": 0.6052583531130181, + "grad_norm": 0.4406181871891022, + "learning_rate": 4.878836699804596e-06, + "loss": 0.5957, + "step": 6630 + }, + { + "epoch": 0.6053496439656746, + "grad_norm": 0.4913816452026367, + "learning_rate": 4.878799890041344e-06, + "loss": 0.5384, + "step": 6631 + }, + { + "epoch": 0.6054409348183312, + "grad_norm": 0.49894848465919495, + "learning_rate": 4.8787630748263885e-06, + "loss": 0.4832, + "step": 6632 + }, + { + "epoch": 0.6055322256709877, + "grad_norm": 0.49704280495643616, + "learning_rate": 4.878726254159812e-06, + "loss": 0.589, + "step": 6633 + }, + { + "epoch": 0.6056235165236443, + "grad_norm": 0.48128774762153625, + "learning_rate": 4.878689428041702e-06, + "loss": 0.5927, + "step": 6634 + }, + { + "epoch": 0.6057148073763009, + "grad_norm": 0.4738103449344635, + "learning_rate": 4.878652596472141e-06, + "loss": 0.5817, + "step": 6635 + }, + { + "epoch": 0.6058060982289575, + "grad_norm": 0.45133355259895325, + "learning_rate": 4.878615759451213e-06, + "loss": 0.5863, + "step": 6636 + }, + { + "epoch": 0.6058973890816141, + "grad_norm": 0.46876418590545654, + "learning_rate": 4.878578916979004e-06, + "loss": 0.5669, + "step": 6637 + }, + { + "epoch": 0.6059886799342706, + "grad_norm": 0.4404279589653015, + "learning_rate": 4.878542069055596e-06, + "loss": 0.5538, + "step": 6638 + }, + { + "epoch": 0.6060799707869271, + "grad_norm": 0.48807528614997864, + "learning_rate": 4.878505215681076e-06, + "loss": 0.5776, + "step": 6639 + }, + { + "epoch": 0.6061712616395837, + "grad_norm": 0.4696383774280548, + "learning_rate": 4.878468356855528e-06, + "loss": 0.5735, + "step": 6640 + }, + { + "epoch": 0.6062625524922403, + "grad_norm": 0.4758829176425934, + "learning_rate": 4.878431492579035e-06, + "loss": 0.5286, + "step": 6641 + }, + { + "epoch": 0.6063538433448968, + "grad_norm": 0.5062653422355652, + "learning_rate": 4.878394622851683e-06, + "loss": 0.5429, + "step": 6642 + }, + { + "epoch": 0.6064451341975534, + "grad_norm": 0.45333290100097656, + "learning_rate": 4.878357747673556e-06, + "loss": 0.573, + "step": 6643 + }, + { + "epoch": 0.60653642505021, + "grad_norm": 0.47655531764030457, + "learning_rate": 4.8783208670447385e-06, + "loss": 0.5489, + "step": 6644 + }, + { + "epoch": 0.6066277159028666, + "grad_norm": 0.46420758962631226, + "learning_rate": 4.878283980965315e-06, + "loss": 0.593, + "step": 6645 + }, + { + "epoch": 0.6067190067555231, + "grad_norm": 0.47172150015830994, + "learning_rate": 4.878247089435369e-06, + "loss": 0.5589, + "step": 6646 + }, + { + "epoch": 0.6068102976081796, + "grad_norm": 0.498750776052475, + "learning_rate": 4.878210192454987e-06, + "loss": 0.5619, + "step": 6647 + }, + { + "epoch": 0.6069015884608362, + "grad_norm": 0.4563499689102173, + "learning_rate": 4.878173290024253e-06, + "loss": 0.5581, + "step": 6648 + }, + { + "epoch": 0.6069928793134928, + "grad_norm": 0.501768946647644, + "learning_rate": 4.878136382143249e-06, + "loss": 0.555, + "step": 6649 + }, + { + "epoch": 0.6070841701661494, + "grad_norm": 0.41963446140289307, + "learning_rate": 4.878099468812063e-06, + "loss": 0.5899, + "step": 6650 + }, + { + "epoch": 0.6071754610188059, + "grad_norm": 0.4800238311290741, + "learning_rate": 4.878062550030779e-06, + "loss": 0.5919, + "step": 6651 + }, + { + "epoch": 0.6072667518714625, + "grad_norm": 0.48251470923423767, + "learning_rate": 4.87802562579948e-06, + "loss": 0.5465, + "step": 6652 + }, + { + "epoch": 0.6073580427241191, + "grad_norm": 0.49031537771224976, + "learning_rate": 4.877988696118253e-06, + "loss": 0.5631, + "step": 6653 + }, + { + "epoch": 0.6074493335767756, + "grad_norm": 0.4655381143093109, + "learning_rate": 4.87795176098718e-06, + "loss": 0.5409, + "step": 6654 + }, + { + "epoch": 0.6075406244294321, + "grad_norm": 0.4741600751876831, + "learning_rate": 4.8779148204063474e-06, + "loss": 0.5698, + "step": 6655 + }, + { + "epoch": 0.6076319152820887, + "grad_norm": 0.46388664841651917, + "learning_rate": 4.877877874375838e-06, + "loss": 0.5703, + "step": 6656 + }, + { + "epoch": 0.6077232061347453, + "grad_norm": 0.4835990369319916, + "learning_rate": 4.877840922895739e-06, + "loss": 0.5293, + "step": 6657 + }, + { + "epoch": 0.6078144969874019, + "grad_norm": 0.5238643288612366, + "learning_rate": 4.877803965966133e-06, + "loss": 0.5871, + "step": 6658 + }, + { + "epoch": 0.6079057878400584, + "grad_norm": 0.47979891300201416, + "learning_rate": 4.877767003587107e-06, + "loss": 0.5823, + "step": 6659 + }, + { + "epoch": 0.607997078692715, + "grad_norm": 0.4595835208892822, + "learning_rate": 4.877730035758744e-06, + "loss": 0.565, + "step": 6660 + }, + { + "epoch": 0.6080883695453716, + "grad_norm": 0.4723290801048279, + "learning_rate": 4.877693062481128e-06, + "loss": 0.5925, + "step": 6661 + }, + { + "epoch": 0.6081796603980281, + "grad_norm": 0.48561760783195496, + "learning_rate": 4.8776560837543455e-06, + "loss": 0.5807, + "step": 6662 + }, + { + "epoch": 0.6082709512506846, + "grad_norm": 0.514550507068634, + "learning_rate": 4.87761909957848e-06, + "loss": 0.5259, + "step": 6663 + }, + { + "epoch": 0.6083622421033412, + "grad_norm": 0.4767912030220032, + "learning_rate": 4.877582109953617e-06, + "loss": 0.586, + "step": 6664 + }, + { + "epoch": 0.6084535329559978, + "grad_norm": 0.4881967604160309, + "learning_rate": 4.8775451148798405e-06, + "loss": 0.583, + "step": 6665 + }, + { + "epoch": 0.6085448238086544, + "grad_norm": 0.49684855341911316, + "learning_rate": 4.877508114357237e-06, + "loss": 0.5864, + "step": 6666 + }, + { + "epoch": 0.608636114661311, + "grad_norm": 0.4959401786327362, + "learning_rate": 4.87747110838589e-06, + "loss": 0.5993, + "step": 6667 + }, + { + "epoch": 0.6087274055139675, + "grad_norm": 0.43664366006851196, + "learning_rate": 4.877434096965883e-06, + "loss": 0.5905, + "step": 6668 + }, + { + "epoch": 0.6088186963666241, + "grad_norm": 0.5053138136863708, + "learning_rate": 4.8773970800973026e-06, + "loss": 0.5167, + "step": 6669 + }, + { + "epoch": 0.6089099872192806, + "grad_norm": 0.48941323161125183, + "learning_rate": 4.877360057780234e-06, + "loss": 0.5256, + "step": 6670 + }, + { + "epoch": 0.6090012780719372, + "grad_norm": 0.5132427215576172, + "learning_rate": 4.877323030014761e-06, + "loss": 0.5295, + "step": 6671 + }, + { + "epoch": 0.6090925689245937, + "grad_norm": 0.4969777762889862, + "learning_rate": 4.877285996800969e-06, + "loss": 0.5407, + "step": 6672 + }, + { + "epoch": 0.6091838597772503, + "grad_norm": 0.48388800024986267, + "learning_rate": 4.877248958138942e-06, + "loss": 0.5351, + "step": 6673 + }, + { + "epoch": 0.6092751506299069, + "grad_norm": 0.45894569158554077, + "learning_rate": 4.877211914028766e-06, + "loss": 0.583, + "step": 6674 + }, + { + "epoch": 0.6093664414825635, + "grad_norm": 0.4784761667251587, + "learning_rate": 4.877174864470525e-06, + "loss": 0.575, + "step": 6675 + }, + { + "epoch": 0.60945773233522, + "grad_norm": 0.4827232360839844, + "learning_rate": 4.877137809464305e-06, + "loss": 0.599, + "step": 6676 + }, + { + "epoch": 0.6095490231878766, + "grad_norm": 0.521451473236084, + "learning_rate": 4.87710074901019e-06, + "loss": 0.5125, + "step": 6677 + }, + { + "epoch": 0.6096403140405331, + "grad_norm": 0.48504433035850525, + "learning_rate": 4.877063683108265e-06, + "loss": 0.5844, + "step": 6678 + }, + { + "epoch": 0.6097316048931897, + "grad_norm": 0.4770262837409973, + "learning_rate": 4.8770266117586156e-06, + "loss": 0.5809, + "step": 6679 + }, + { + "epoch": 0.6098228957458462, + "grad_norm": 0.4901217818260193, + "learning_rate": 4.876989534961326e-06, + "loss": 0.5382, + "step": 6680 + }, + { + "epoch": 0.6099141865985028, + "grad_norm": 0.4853779375553131, + "learning_rate": 4.876952452716481e-06, + "loss": 0.5413, + "step": 6681 + }, + { + "epoch": 0.6100054774511594, + "grad_norm": 0.43906691670417786, + "learning_rate": 4.876915365024167e-06, + "loss": 0.5954, + "step": 6682 + }, + { + "epoch": 0.610096768303816, + "grad_norm": 0.4842917323112488, + "learning_rate": 4.876878271884468e-06, + "loss": 0.562, + "step": 6683 + }, + { + "epoch": 0.6101880591564726, + "grad_norm": 0.4316321611404419, + "learning_rate": 4.876841173297468e-06, + "loss": 0.5613, + "step": 6684 + }, + { + "epoch": 0.6102793500091291, + "grad_norm": 0.5021042227745056, + "learning_rate": 4.8768040692632535e-06, + "loss": 0.5505, + "step": 6685 + }, + { + "epoch": 0.6103706408617856, + "grad_norm": 0.4679020941257477, + "learning_rate": 4.87676695978191e-06, + "loss": 0.5909, + "step": 6686 + }, + { + "epoch": 0.6104619317144422, + "grad_norm": 0.444809228181839, + "learning_rate": 4.876729844853521e-06, + "loss": 0.5742, + "step": 6687 + }, + { + "epoch": 0.6105532225670988, + "grad_norm": 0.4718126058578491, + "learning_rate": 4.876692724478172e-06, + "loss": 0.6272, + "step": 6688 + }, + { + "epoch": 0.6106445134197553, + "grad_norm": 0.4538072645664215, + "learning_rate": 4.8766555986559486e-06, + "loss": 0.5948, + "step": 6689 + }, + { + "epoch": 0.6107358042724119, + "grad_norm": 0.4900776147842407, + "learning_rate": 4.876618467386936e-06, + "loss": 0.5425, + "step": 6690 + }, + { + "epoch": 0.6108270951250685, + "grad_norm": 0.468636691570282, + "learning_rate": 4.8765813306712176e-06, + "loss": 0.5756, + "step": 6691 + }, + { + "epoch": 0.6109183859777251, + "grad_norm": 0.4879375994205475, + "learning_rate": 4.87654418850888e-06, + "loss": 0.5588, + "step": 6692 + }, + { + "epoch": 0.6110096768303815, + "grad_norm": 0.4592452347278595, + "learning_rate": 4.876507040900009e-06, + "loss": 0.5602, + "step": 6693 + }, + { + "epoch": 0.6111009676830381, + "grad_norm": 0.4656529724597931, + "learning_rate": 4.87646988784469e-06, + "loss": 0.5841, + "step": 6694 + }, + { + "epoch": 0.6111922585356947, + "grad_norm": 0.5078822374343872, + "learning_rate": 4.876432729343004e-06, + "loss": 0.5243, + "step": 6695 + }, + { + "epoch": 0.6112835493883513, + "grad_norm": 0.5077348947525024, + "learning_rate": 4.876395565395041e-06, + "loss": 0.5552, + "step": 6696 + }, + { + "epoch": 0.6113748402410079, + "grad_norm": 0.5060480833053589, + "learning_rate": 4.876358396000884e-06, + "loss": 0.5295, + "step": 6697 + }, + { + "epoch": 0.6114661310936644, + "grad_norm": 0.5024237632751465, + "learning_rate": 4.8763212211606195e-06, + "loss": 0.5405, + "step": 6698 + }, + { + "epoch": 0.611557421946321, + "grad_norm": 0.4694049656391144, + "learning_rate": 4.87628404087433e-06, + "loss": 0.5667, + "step": 6699 + }, + { + "epoch": 0.6116487127989776, + "grad_norm": 0.4793836176395416, + "learning_rate": 4.8762468551421036e-06, + "loss": 0.5393, + "step": 6700 + }, + { + "epoch": 0.6117400036516341, + "grad_norm": 0.47623714804649353, + "learning_rate": 4.8762096639640235e-06, + "loss": 0.6217, + "step": 6701 + }, + { + "epoch": 0.6118312945042906, + "grad_norm": 0.4685893654823303, + "learning_rate": 4.876172467340176e-06, + "loss": 0.5784, + "step": 6702 + }, + { + "epoch": 0.6119225853569472, + "grad_norm": 0.4784940779209137, + "learning_rate": 4.876135265270646e-06, + "loss": 0.5558, + "step": 6703 + }, + { + "epoch": 0.6120138762096038, + "grad_norm": 0.4810478091239929, + "learning_rate": 4.876098057755519e-06, + "loss": 0.5849, + "step": 6704 + }, + { + "epoch": 0.6121051670622604, + "grad_norm": 0.48445048928260803, + "learning_rate": 4.876060844794881e-06, + "loss": 0.5505, + "step": 6705 + }, + { + "epoch": 0.612196457914917, + "grad_norm": 0.4794257581233978, + "learning_rate": 4.8760236263888164e-06, + "loss": 0.5184, + "step": 6706 + }, + { + "epoch": 0.6122877487675735, + "grad_norm": 0.5021847486495972, + "learning_rate": 4.875986402537409e-06, + "loss": 0.5736, + "step": 6707 + }, + { + "epoch": 0.6123790396202301, + "grad_norm": 0.4724544584751129, + "learning_rate": 4.875949173240747e-06, + "loss": 0.5499, + "step": 6708 + }, + { + "epoch": 0.6124703304728866, + "grad_norm": 0.46069616079330444, + "learning_rate": 4.8759119384989135e-06, + "loss": 0.5568, + "step": 6709 + }, + { + "epoch": 0.6125616213255431, + "grad_norm": 0.483998566865921, + "learning_rate": 4.875874698311995e-06, + "loss": 0.5561, + "step": 6710 + }, + { + "epoch": 0.6126529121781997, + "grad_norm": 0.4514109492301941, + "learning_rate": 4.875837452680077e-06, + "loss": 0.5653, + "step": 6711 + }, + { + "epoch": 0.6127442030308563, + "grad_norm": 0.5066013336181641, + "learning_rate": 4.8758002016032435e-06, + "loss": 0.5495, + "step": 6712 + }, + { + "epoch": 0.6128354938835129, + "grad_norm": 0.46755924820899963, + "learning_rate": 4.875762945081582e-06, + "loss": 0.58, + "step": 6713 + }, + { + "epoch": 0.6129267847361695, + "grad_norm": 0.5198830962181091, + "learning_rate": 4.875725683115176e-06, + "loss": 0.5439, + "step": 6714 + }, + { + "epoch": 0.613018075588826, + "grad_norm": 0.5160527229309082, + "learning_rate": 4.87568841570411e-06, + "loss": 0.5552, + "step": 6715 + }, + { + "epoch": 0.6131093664414826, + "grad_norm": 0.4803176522254944, + "learning_rate": 4.8756511428484735e-06, + "loss": 0.5798, + "step": 6716 + }, + { + "epoch": 0.6132006572941391, + "grad_norm": 0.48177605867385864, + "learning_rate": 4.875613864548348e-06, + "loss": 0.5779, + "step": 6717 + }, + { + "epoch": 0.6132919481467957, + "grad_norm": 0.43784141540527344, + "learning_rate": 4.87557658080382e-06, + "loss": 0.6006, + "step": 6718 + }, + { + "epoch": 0.6133832389994522, + "grad_norm": 0.48374176025390625, + "learning_rate": 4.875539291614977e-06, + "loss": 0.555, + "step": 6719 + }, + { + "epoch": 0.6134745298521088, + "grad_norm": 0.5567234754562378, + "learning_rate": 4.875501996981902e-06, + "loss": 0.5305, + "step": 6720 + }, + { + "epoch": 0.6135658207047654, + "grad_norm": 0.4590087831020355, + "learning_rate": 4.87546469690468e-06, + "loss": 0.5688, + "step": 6721 + }, + { + "epoch": 0.613657111557422, + "grad_norm": 0.48353564739227295, + "learning_rate": 4.875427391383399e-06, + "loss": 0.5413, + "step": 6722 + }, + { + "epoch": 0.6137484024100786, + "grad_norm": 0.5299851298332214, + "learning_rate": 4.875390080418142e-06, + "loss": 0.508, + "step": 6723 + }, + { + "epoch": 0.613839693262735, + "grad_norm": 0.5225685238838196, + "learning_rate": 4.875352764008997e-06, + "loss": 0.5339, + "step": 6724 + }, + { + "epoch": 0.6139309841153916, + "grad_norm": 0.4891888201236725, + "learning_rate": 4.875315442156048e-06, + "loss": 0.5669, + "step": 6725 + }, + { + "epoch": 0.6140222749680482, + "grad_norm": 0.4962139427661896, + "learning_rate": 4.87527811485938e-06, + "loss": 0.547, + "step": 6726 + }, + { + "epoch": 0.6141135658207048, + "grad_norm": 0.4678439497947693, + "learning_rate": 4.8752407821190795e-06, + "loss": 0.5137, + "step": 6727 + }, + { + "epoch": 0.6142048566733613, + "grad_norm": 0.44920462369918823, + "learning_rate": 4.875203443935231e-06, + "loss": 0.5803, + "step": 6728 + }, + { + "epoch": 0.6142961475260179, + "grad_norm": 0.4586242437362671, + "learning_rate": 4.8751661003079225e-06, + "loss": 0.5633, + "step": 6729 + }, + { + "epoch": 0.6143874383786745, + "grad_norm": 0.5028007626533508, + "learning_rate": 4.875128751237237e-06, + "loss": 0.5592, + "step": 6730 + }, + { + "epoch": 0.6144787292313311, + "grad_norm": 0.4836203455924988, + "learning_rate": 4.875091396723262e-06, + "loss": 0.5814, + "step": 6731 + }, + { + "epoch": 0.6145700200839875, + "grad_norm": 0.4920401871204376, + "learning_rate": 4.875054036766082e-06, + "loss": 0.5631, + "step": 6732 + }, + { + "epoch": 0.6146613109366441, + "grad_norm": 0.48420894145965576, + "learning_rate": 4.875016671365782e-06, + "loss": 0.5339, + "step": 6733 + }, + { + "epoch": 0.6147526017893007, + "grad_norm": 0.4764941930770874, + "learning_rate": 4.874979300522449e-06, + "loss": 0.5848, + "step": 6734 + }, + { + "epoch": 0.6148438926419573, + "grad_norm": 0.4716431796550751, + "learning_rate": 4.874941924236168e-06, + "loss": 0.5837, + "step": 6735 + }, + { + "epoch": 0.6149351834946138, + "grad_norm": 0.4521605372428894, + "learning_rate": 4.874904542507025e-06, + "loss": 0.5948, + "step": 6736 + }, + { + "epoch": 0.6150264743472704, + "grad_norm": 0.4837581217288971, + "learning_rate": 4.874867155335105e-06, + "loss": 0.5684, + "step": 6737 + }, + { + "epoch": 0.615117765199927, + "grad_norm": 0.4844236373901367, + "learning_rate": 4.874829762720494e-06, + "loss": 0.5616, + "step": 6738 + }, + { + "epoch": 0.6152090560525836, + "grad_norm": 0.48173418641090393, + "learning_rate": 4.874792364663279e-06, + "loss": 0.563, + "step": 6739 + }, + { + "epoch": 0.61530034690524, + "grad_norm": 0.47091907262802124, + "learning_rate": 4.8747549611635425e-06, + "loss": 0.6046, + "step": 6740 + }, + { + "epoch": 0.6153916377578966, + "grad_norm": 0.46111443638801575, + "learning_rate": 4.874717552221374e-06, + "loss": 0.5701, + "step": 6741 + }, + { + "epoch": 0.6154829286105532, + "grad_norm": 0.4633390009403229, + "learning_rate": 4.874680137836857e-06, + "loss": 0.5766, + "step": 6742 + }, + { + "epoch": 0.6155742194632098, + "grad_norm": 0.48768845200538635, + "learning_rate": 4.874642718010077e-06, + "loss": 0.5945, + "step": 6743 + }, + { + "epoch": 0.6156655103158664, + "grad_norm": 0.4470047056674957, + "learning_rate": 4.874605292741122e-06, + "loss": 0.6077, + "step": 6744 + }, + { + "epoch": 0.6157568011685229, + "grad_norm": 0.4571237862110138, + "learning_rate": 4.874567862030074e-06, + "loss": 0.5568, + "step": 6745 + }, + { + "epoch": 0.6158480920211795, + "grad_norm": 0.4488978087902069, + "learning_rate": 4.874530425877023e-06, + "loss": 0.5826, + "step": 6746 + }, + { + "epoch": 0.6159393828738361, + "grad_norm": 0.4446945786476135, + "learning_rate": 4.874492984282052e-06, + "loss": 0.5576, + "step": 6747 + }, + { + "epoch": 0.6160306737264926, + "grad_norm": 0.49562668800354004, + "learning_rate": 4.874455537245249e-06, + "loss": 0.5456, + "step": 6748 + }, + { + "epoch": 0.6161219645791491, + "grad_norm": 0.4972386360168457, + "learning_rate": 4.874418084766696e-06, + "loss": 0.5853, + "step": 6749 + }, + { + "epoch": 0.6162132554318057, + "grad_norm": 0.44575411081314087, + "learning_rate": 4.874380626846483e-06, + "loss": 0.6198, + "step": 6750 + }, + { + "epoch": 0.6163045462844623, + "grad_norm": 0.4581645727157593, + "learning_rate": 4.8743431634846935e-06, + "loss": 0.5894, + "step": 6751 + }, + { + "epoch": 0.6163958371371189, + "grad_norm": 0.48095446825027466, + "learning_rate": 4.874305694681414e-06, + "loss": 0.5672, + "step": 6752 + }, + { + "epoch": 0.6164871279897755, + "grad_norm": 0.4494469463825226, + "learning_rate": 4.874268220436731e-06, + "loss": 0.6102, + "step": 6753 + }, + { + "epoch": 0.616578418842432, + "grad_norm": 0.4605487585067749, + "learning_rate": 4.8742307407507295e-06, + "loss": 0.52, + "step": 6754 + }, + { + "epoch": 0.6166697096950885, + "grad_norm": 0.4614282548427582, + "learning_rate": 4.874193255623495e-06, + "loss": 0.5684, + "step": 6755 + }, + { + "epoch": 0.6167610005477451, + "grad_norm": 0.47917234897613525, + "learning_rate": 4.874155765055115e-06, + "loss": 0.575, + "step": 6756 + }, + { + "epoch": 0.6168522914004017, + "grad_norm": 0.4985160231590271, + "learning_rate": 4.8741182690456734e-06, + "loss": 0.5594, + "step": 6757 + }, + { + "epoch": 0.6169435822530582, + "grad_norm": 0.459269255399704, + "learning_rate": 4.874080767595258e-06, + "loss": 0.5599, + "step": 6758 + }, + { + "epoch": 0.6170348731057148, + "grad_norm": 0.4751177132129669, + "learning_rate": 4.874043260703953e-06, + "loss": 0.5698, + "step": 6759 + }, + { + "epoch": 0.6171261639583714, + "grad_norm": 0.49993017315864563, + "learning_rate": 4.874005748371847e-06, + "loss": 0.5605, + "step": 6760 + }, + { + "epoch": 0.617217454811028, + "grad_norm": 0.47425952553749084, + "learning_rate": 4.873968230599023e-06, + "loss": 0.5808, + "step": 6761 + }, + { + "epoch": 0.6173087456636845, + "grad_norm": 0.5008487105369568, + "learning_rate": 4.873930707385568e-06, + "loss": 0.5424, + "step": 6762 + }, + { + "epoch": 0.617400036516341, + "grad_norm": 0.4794832766056061, + "learning_rate": 4.873893178731569e-06, + "loss": 0.5626, + "step": 6763 + }, + { + "epoch": 0.6174913273689976, + "grad_norm": 0.4427911043167114, + "learning_rate": 4.873855644637112e-06, + "loss": 0.5799, + "step": 6764 + }, + { + "epoch": 0.6175826182216542, + "grad_norm": 0.478507936000824, + "learning_rate": 4.873818105102281e-06, + "loss": 0.5379, + "step": 6765 + }, + { + "epoch": 0.6176739090743107, + "grad_norm": 0.47080838680267334, + "learning_rate": 4.873780560127164e-06, + "loss": 0.5492, + "step": 6766 + }, + { + "epoch": 0.6177651999269673, + "grad_norm": 0.5094258785247803, + "learning_rate": 4.873743009711846e-06, + "loss": 0.5649, + "step": 6767 + }, + { + "epoch": 0.6178564907796239, + "grad_norm": 0.46449899673461914, + "learning_rate": 4.873705453856414e-06, + "loss": 0.5514, + "step": 6768 + }, + { + "epoch": 0.6179477816322805, + "grad_norm": 0.46504729986190796, + "learning_rate": 4.873667892560952e-06, + "loss": 0.5595, + "step": 6769 + }, + { + "epoch": 0.6180390724849371, + "grad_norm": 0.47635912895202637, + "learning_rate": 4.873630325825549e-06, + "loss": 0.6034, + "step": 6770 + }, + { + "epoch": 0.6181303633375935, + "grad_norm": 0.46879950165748596, + "learning_rate": 4.873592753650289e-06, + "loss": 0.5675, + "step": 6771 + }, + { + "epoch": 0.6182216541902501, + "grad_norm": 0.44376999139785767, + "learning_rate": 4.873555176035258e-06, + "loss": 0.5893, + "step": 6772 + }, + { + "epoch": 0.6183129450429067, + "grad_norm": 0.49865826964378357, + "learning_rate": 4.873517592980544e-06, + "loss": 0.5276, + "step": 6773 + }, + { + "epoch": 0.6184042358955633, + "grad_norm": 0.5280443429946899, + "learning_rate": 4.873480004486232e-06, + "loss": 0.558, + "step": 6774 + }, + { + "epoch": 0.6184955267482198, + "grad_norm": 0.463270366191864, + "learning_rate": 4.873442410552409e-06, + "loss": 0.569, + "step": 6775 + }, + { + "epoch": 0.6185868176008764, + "grad_norm": 0.45212098956108093, + "learning_rate": 4.873404811179159e-06, + "loss": 0.5823, + "step": 6776 + }, + { + "epoch": 0.618678108453533, + "grad_norm": 0.4595543444156647, + "learning_rate": 4.873367206366569e-06, + "loss": 0.5589, + "step": 6777 + }, + { + "epoch": 0.6187693993061896, + "grad_norm": 0.4787905216217041, + "learning_rate": 4.873329596114728e-06, + "loss": 0.5238, + "step": 6778 + }, + { + "epoch": 0.618860690158846, + "grad_norm": 0.4639001190662384, + "learning_rate": 4.873291980423718e-06, + "loss": 0.5277, + "step": 6779 + }, + { + "epoch": 0.6189519810115026, + "grad_norm": 0.45444950461387634, + "learning_rate": 4.873254359293628e-06, + "loss": 0.5872, + "step": 6780 + }, + { + "epoch": 0.6190432718641592, + "grad_norm": 0.478276789188385, + "learning_rate": 4.873216732724542e-06, + "loss": 0.5079, + "step": 6781 + }, + { + "epoch": 0.6191345627168158, + "grad_norm": 0.4550018310546875, + "learning_rate": 4.8731791007165486e-06, + "loss": 0.5628, + "step": 6782 + }, + { + "epoch": 0.6192258535694724, + "grad_norm": 0.46640145778656006, + "learning_rate": 4.873141463269733e-06, + "loss": 0.5936, + "step": 6783 + }, + { + "epoch": 0.6193171444221289, + "grad_norm": 0.5526840686798096, + "learning_rate": 4.873103820384181e-06, + "loss": 0.4885, + "step": 6784 + }, + { + "epoch": 0.6194084352747855, + "grad_norm": 0.49185454845428467, + "learning_rate": 4.873066172059979e-06, + "loss": 0.557, + "step": 6785 + }, + { + "epoch": 0.6194997261274421, + "grad_norm": 0.46757274866104126, + "learning_rate": 4.873028518297215e-06, + "loss": 0.508, + "step": 6786 + }, + { + "epoch": 0.6195910169800986, + "grad_norm": 0.5046389102935791, + "learning_rate": 4.872990859095972e-06, + "loss": 0.527, + "step": 6787 + }, + { + "epoch": 0.6196823078327551, + "grad_norm": 0.49149593710899353, + "learning_rate": 4.872953194456339e-06, + "loss": 0.5307, + "step": 6788 + }, + { + "epoch": 0.6197735986854117, + "grad_norm": 0.47421544790267944, + "learning_rate": 4.872915524378402e-06, + "loss": 0.558, + "step": 6789 + }, + { + "epoch": 0.6198648895380683, + "grad_norm": 0.5136987566947937, + "learning_rate": 4.872877848862246e-06, + "loss": 0.5622, + "step": 6790 + }, + { + "epoch": 0.6199561803907249, + "grad_norm": 0.46013733744621277, + "learning_rate": 4.872840167907959e-06, + "loss": 0.5825, + "step": 6791 + }, + { + "epoch": 0.6200474712433814, + "grad_norm": 0.45961102843284607, + "learning_rate": 4.872802481515626e-06, + "loss": 0.5615, + "step": 6792 + }, + { + "epoch": 0.620138762096038, + "grad_norm": 0.5355058908462524, + "learning_rate": 4.872764789685335e-06, + "loss": 0.5736, + "step": 6793 + }, + { + "epoch": 0.6202300529486945, + "grad_norm": 0.470675528049469, + "learning_rate": 4.87272709241717e-06, + "loss": 0.5786, + "step": 6794 + }, + { + "epoch": 0.6203213438013511, + "grad_norm": 0.46330684423446655, + "learning_rate": 4.872689389711219e-06, + "loss": 0.5804, + "step": 6795 + }, + { + "epoch": 0.6204126346540076, + "grad_norm": 0.4798535108566284, + "learning_rate": 4.872651681567568e-06, + "loss": 0.5445, + "step": 6796 + }, + { + "epoch": 0.6205039255066642, + "grad_norm": 0.45411041378974915, + "learning_rate": 4.872613967986304e-06, + "loss": 0.5576, + "step": 6797 + }, + { + "epoch": 0.6205952163593208, + "grad_norm": 0.45589005947113037, + "learning_rate": 4.872576248967512e-06, + "loss": 0.5683, + "step": 6798 + }, + { + "epoch": 0.6206865072119774, + "grad_norm": 0.47630149126052856, + "learning_rate": 4.872538524511281e-06, + "loss": 0.5839, + "step": 6799 + }, + { + "epoch": 0.620777798064634, + "grad_norm": 0.4907068908214569, + "learning_rate": 4.872500794617694e-06, + "loss": 0.5579, + "step": 6800 + }, + { + "epoch": 0.6208690889172905, + "grad_norm": 0.42665061354637146, + "learning_rate": 4.87246305928684e-06, + "loss": 0.6018, + "step": 6801 + }, + { + "epoch": 0.620960379769947, + "grad_norm": 0.4853961169719696, + "learning_rate": 4.872425318518805e-06, + "loss": 0.5375, + "step": 6802 + }, + { + "epoch": 0.6210516706226036, + "grad_norm": 0.4988376200199127, + "learning_rate": 4.872387572313676e-06, + "loss": 0.5376, + "step": 6803 + }, + { + "epoch": 0.6211429614752602, + "grad_norm": 0.4592692255973816, + "learning_rate": 4.872349820671538e-06, + "loss": 0.6158, + "step": 6804 + }, + { + "epoch": 0.6212342523279167, + "grad_norm": 0.4625259041786194, + "learning_rate": 4.872312063592478e-06, + "loss": 0.5686, + "step": 6805 + }, + { + "epoch": 0.6213255431805733, + "grad_norm": 0.44266244769096375, + "learning_rate": 4.872274301076583e-06, + "loss": 0.5566, + "step": 6806 + }, + { + "epoch": 0.6214168340332299, + "grad_norm": 0.4729127883911133, + "learning_rate": 4.8722365331239395e-06, + "loss": 0.5455, + "step": 6807 + }, + { + "epoch": 0.6215081248858865, + "grad_norm": 0.45378631353378296, + "learning_rate": 4.872198759734634e-06, + "loss": 0.6228, + "step": 6808 + }, + { + "epoch": 0.621599415738543, + "grad_norm": 0.49315428733825684, + "learning_rate": 4.872160980908754e-06, + "loss": 0.5632, + "step": 6809 + }, + { + "epoch": 0.6216907065911995, + "grad_norm": 0.4899539053440094, + "learning_rate": 4.872123196646384e-06, + "loss": 0.576, + "step": 6810 + }, + { + "epoch": 0.6217819974438561, + "grad_norm": 0.49566248059272766, + "learning_rate": 4.8720854069476115e-06, + "loss": 0.5291, + "step": 6811 + }, + { + "epoch": 0.6218732882965127, + "grad_norm": 0.502640962600708, + "learning_rate": 4.872047611812524e-06, + "loss": 0.5481, + "step": 6812 + }, + { + "epoch": 0.6219645791491692, + "grad_norm": 0.4754643440246582, + "learning_rate": 4.872009811241206e-06, + "loss": 0.5341, + "step": 6813 + }, + { + "epoch": 0.6220558700018258, + "grad_norm": 0.49413207173347473, + "learning_rate": 4.871972005233747e-06, + "loss": 0.5896, + "step": 6814 + }, + { + "epoch": 0.6221471608544824, + "grad_norm": 0.4669203460216522, + "learning_rate": 4.871934193790232e-06, + "loss": 0.5793, + "step": 6815 + }, + { + "epoch": 0.622238451707139, + "grad_norm": 0.46994253993034363, + "learning_rate": 4.871896376910747e-06, + "loss": 0.578, + "step": 6816 + }, + { + "epoch": 0.6223297425597956, + "grad_norm": 0.45391371846199036, + "learning_rate": 4.87185855459538e-06, + "loss": 0.6101, + "step": 6817 + }, + { + "epoch": 0.622421033412452, + "grad_norm": 0.4863850474357605, + "learning_rate": 4.871820726844217e-06, + "loss": 0.5485, + "step": 6818 + }, + { + "epoch": 0.6225123242651086, + "grad_norm": 0.4765547513961792, + "learning_rate": 4.871782893657346e-06, + "loss": 0.5693, + "step": 6819 + }, + { + "epoch": 0.6226036151177652, + "grad_norm": 0.4878007769584656, + "learning_rate": 4.871745055034852e-06, + "loss": 0.529, + "step": 6820 + }, + { + "epoch": 0.6226949059704218, + "grad_norm": 0.5172663927078247, + "learning_rate": 4.871707210976821e-06, + "loss": 0.5024, + "step": 6821 + }, + { + "epoch": 0.6227861968230783, + "grad_norm": 0.4774305820465088, + "learning_rate": 4.871669361483343e-06, + "loss": 0.5872, + "step": 6822 + }, + { + "epoch": 0.6228774876757349, + "grad_norm": 0.4880296587944031, + "learning_rate": 4.8716315065545015e-06, + "loss": 0.5655, + "step": 6823 + }, + { + "epoch": 0.6229687785283915, + "grad_norm": 0.44014614820480347, + "learning_rate": 4.871593646190385e-06, + "loss": 0.5606, + "step": 6824 + }, + { + "epoch": 0.623060069381048, + "grad_norm": 0.43739625811576843, + "learning_rate": 4.8715557803910795e-06, + "loss": 0.6017, + "step": 6825 + }, + { + "epoch": 0.6231513602337045, + "grad_norm": 0.5093047618865967, + "learning_rate": 4.871517909156673e-06, + "loss": 0.5082, + "step": 6826 + }, + { + "epoch": 0.6232426510863611, + "grad_norm": 0.4460550546646118, + "learning_rate": 4.87148003248725e-06, + "loss": 0.5576, + "step": 6827 + }, + { + "epoch": 0.6233339419390177, + "grad_norm": 0.47157022356987, + "learning_rate": 4.8714421503829e-06, + "loss": 0.5478, + "step": 6828 + }, + { + "epoch": 0.6234252327916743, + "grad_norm": 0.4716569781303406, + "learning_rate": 4.8714042628437076e-06, + "loss": 0.6036, + "step": 6829 + }, + { + "epoch": 0.6235165236443309, + "grad_norm": 0.4632833003997803, + "learning_rate": 4.871366369869761e-06, + "loss": 0.5711, + "step": 6830 + }, + { + "epoch": 0.6236078144969874, + "grad_norm": 0.4717225432395935, + "learning_rate": 4.871328471461147e-06, + "loss": 0.5772, + "step": 6831 + }, + { + "epoch": 0.623699105349644, + "grad_norm": 0.4828265905380249, + "learning_rate": 4.87129056761795e-06, + "loss": 0.5341, + "step": 6832 + }, + { + "epoch": 0.6237903962023005, + "grad_norm": 0.4877874255180359, + "learning_rate": 4.871252658340261e-06, + "loss": 0.5172, + "step": 6833 + }, + { + "epoch": 0.6238816870549571, + "grad_norm": 0.5006727576255798, + "learning_rate": 4.871214743628164e-06, + "loss": 0.5262, + "step": 6834 + }, + { + "epoch": 0.6239729779076136, + "grad_norm": 0.48066458106040955, + "learning_rate": 4.871176823481746e-06, + "loss": 0.5341, + "step": 6835 + }, + { + "epoch": 0.6240642687602702, + "grad_norm": 0.4904614984989166, + "learning_rate": 4.8711388979010955e-06, + "loss": 0.5404, + "step": 6836 + }, + { + "epoch": 0.6241555596129268, + "grad_norm": 0.46654000878334045, + "learning_rate": 4.871100966886298e-06, + "loss": 0.563, + "step": 6837 + }, + { + "epoch": 0.6242468504655834, + "grad_norm": 0.4642787277698517, + "learning_rate": 4.8710630304374415e-06, + "loss": 0.5352, + "step": 6838 + }, + { + "epoch": 0.62433814131824, + "grad_norm": 0.4963721036911011, + "learning_rate": 4.871025088554613e-06, + "loss": 0.5363, + "step": 6839 + }, + { + "epoch": 0.6244294321708965, + "grad_norm": 0.45704901218414307, + "learning_rate": 4.870987141237897e-06, + "loss": 0.5509, + "step": 6840 + }, + { + "epoch": 0.624520723023553, + "grad_norm": 0.49865031242370605, + "learning_rate": 4.870949188487383e-06, + "loss": 0.5435, + "step": 6841 + }, + { + "epoch": 0.6246120138762096, + "grad_norm": 0.5105474591255188, + "learning_rate": 4.870911230303158e-06, + "loss": 0.5299, + "step": 6842 + }, + { + "epoch": 0.6247033047288661, + "grad_norm": 0.5004984736442566, + "learning_rate": 4.870873266685308e-06, + "loss": 0.5116, + "step": 6843 + }, + { + "epoch": 0.6247945955815227, + "grad_norm": 0.4658140540122986, + "learning_rate": 4.870835297633919e-06, + "loss": 0.5818, + "step": 6844 + }, + { + "epoch": 0.6248858864341793, + "grad_norm": 0.4421655237674713, + "learning_rate": 4.870797323149081e-06, + "loss": 0.5728, + "step": 6845 + }, + { + "epoch": 0.6249771772868359, + "grad_norm": 0.48934146761894226, + "learning_rate": 4.870759343230878e-06, + "loss": 0.5147, + "step": 6846 + }, + { + "epoch": 0.6250684681394925, + "grad_norm": 0.45801275968551636, + "learning_rate": 4.870721357879399e-06, + "loss": 0.5699, + "step": 6847 + }, + { + "epoch": 0.625159758992149, + "grad_norm": 0.4625970721244812, + "learning_rate": 4.870683367094731e-06, + "loss": 0.564, + "step": 6848 + }, + { + "epoch": 0.6252510498448055, + "grad_norm": 0.48571011424064636, + "learning_rate": 4.870645370876959e-06, + "loss": 0.5367, + "step": 6849 + }, + { + "epoch": 0.6253423406974621, + "grad_norm": 0.4968237578868866, + "learning_rate": 4.8706073692261726e-06, + "loss": 0.5727, + "step": 6850 + }, + { + "epoch": 0.6254336315501187, + "grad_norm": 0.4700919985771179, + "learning_rate": 4.8705693621424574e-06, + "loss": 0.5829, + "step": 6851 + }, + { + "epoch": 0.6255249224027752, + "grad_norm": 0.4891088902950287, + "learning_rate": 4.870531349625901e-06, + "loss": 0.5342, + "step": 6852 + }, + { + "epoch": 0.6256162132554318, + "grad_norm": 0.4778229296207428, + "learning_rate": 4.87049333167659e-06, + "loss": 0.5865, + "step": 6853 + }, + { + "epoch": 0.6257075041080884, + "grad_norm": 0.49072831869125366, + "learning_rate": 4.870455308294613e-06, + "loss": 0.5352, + "step": 6854 + }, + { + "epoch": 0.625798794960745, + "grad_norm": 0.47274938225746155, + "learning_rate": 4.870417279480056e-06, + "loss": 0.5789, + "step": 6855 + }, + { + "epoch": 0.6258900858134014, + "grad_norm": 0.484085351228714, + "learning_rate": 4.870379245233006e-06, + "loss": 0.5521, + "step": 6856 + }, + { + "epoch": 0.625981376666058, + "grad_norm": 0.46965155005455017, + "learning_rate": 4.8703412055535505e-06, + "loss": 0.5916, + "step": 6857 + }, + { + "epoch": 0.6260726675187146, + "grad_norm": 0.4781153202056885, + "learning_rate": 4.870303160441776e-06, + "loss": 0.5508, + "step": 6858 + }, + { + "epoch": 0.6261639583713712, + "grad_norm": 0.42534342408180237, + "learning_rate": 4.870265109897771e-06, + "loss": 0.5766, + "step": 6859 + }, + { + "epoch": 0.6262552492240278, + "grad_norm": 0.5037068128585815, + "learning_rate": 4.870227053921622e-06, + "loss": 0.5949, + "step": 6860 + }, + { + "epoch": 0.6263465400766843, + "grad_norm": 0.47929733991622925, + "learning_rate": 4.870188992513416e-06, + "loss": 0.5736, + "step": 6861 + }, + { + "epoch": 0.6264378309293409, + "grad_norm": 0.4777042865753174, + "learning_rate": 4.8701509256732406e-06, + "loss": 0.5556, + "step": 6862 + }, + { + "epoch": 0.6265291217819975, + "grad_norm": 0.48000460863113403, + "learning_rate": 4.870112853401183e-06, + "loss": 0.5524, + "step": 6863 + }, + { + "epoch": 0.626620412634654, + "grad_norm": 0.483927458524704, + "learning_rate": 4.87007477569733e-06, + "loss": 0.5435, + "step": 6864 + }, + { + "epoch": 0.6267117034873105, + "grad_norm": 0.47483983635902405, + "learning_rate": 4.870036692561769e-06, + "loss": 0.5703, + "step": 6865 + }, + { + "epoch": 0.6268029943399671, + "grad_norm": 0.48292627930641174, + "learning_rate": 4.869998603994589e-06, + "loss": 0.5705, + "step": 6866 + }, + { + "epoch": 0.6268942851926237, + "grad_norm": 0.49226683378219604, + "learning_rate": 4.869960509995875e-06, + "loss": 0.5301, + "step": 6867 + }, + { + "epoch": 0.6269855760452803, + "grad_norm": 0.4820464253425598, + "learning_rate": 4.869922410565714e-06, + "loss": 0.5795, + "step": 6868 + }, + { + "epoch": 0.6270768668979368, + "grad_norm": 0.4629313349723816, + "learning_rate": 4.869884305704196e-06, + "loss": 0.572, + "step": 6869 + }, + { + "epoch": 0.6271681577505934, + "grad_norm": 0.46360844373703003, + "learning_rate": 4.869846195411406e-06, + "loss": 0.5446, + "step": 6870 + }, + { + "epoch": 0.62725944860325, + "grad_norm": 0.4693383276462555, + "learning_rate": 4.869808079687432e-06, + "loss": 0.5754, + "step": 6871 + }, + { + "epoch": 0.6273507394559065, + "grad_norm": 0.4539606273174286, + "learning_rate": 4.869769958532362e-06, + "loss": 0.5743, + "step": 6872 + }, + { + "epoch": 0.627442030308563, + "grad_norm": 0.4697413444519043, + "learning_rate": 4.869731831946282e-06, + "loss": 0.6031, + "step": 6873 + }, + { + "epoch": 0.6275333211612196, + "grad_norm": 0.4686622619628906, + "learning_rate": 4.869693699929281e-06, + "loss": 0.5542, + "step": 6874 + }, + { + "epoch": 0.6276246120138762, + "grad_norm": 0.48759475350379944, + "learning_rate": 4.869655562481445e-06, + "loss": 0.5424, + "step": 6875 + }, + { + "epoch": 0.6277159028665328, + "grad_norm": 0.44222718477249146, + "learning_rate": 4.869617419602863e-06, + "loss": 0.5406, + "step": 6876 + }, + { + "epoch": 0.6278071937191894, + "grad_norm": 0.5160248279571533, + "learning_rate": 4.8695792712936195e-06, + "loss": 0.5334, + "step": 6877 + }, + { + "epoch": 0.6278984845718459, + "grad_norm": 0.46492019295692444, + "learning_rate": 4.869541117553805e-06, + "loss": 0.5694, + "step": 6878 + }, + { + "epoch": 0.6279897754245025, + "grad_norm": 0.4616580307483673, + "learning_rate": 4.869502958383505e-06, + "loss": 0.5585, + "step": 6879 + }, + { + "epoch": 0.628081066277159, + "grad_norm": 0.5028678178787231, + "learning_rate": 4.869464793782809e-06, + "loss": 0.518, + "step": 6880 + }, + { + "epoch": 0.6281723571298156, + "grad_norm": 0.4593624770641327, + "learning_rate": 4.869426623751802e-06, + "loss": 0.5661, + "step": 6881 + }, + { + "epoch": 0.6282636479824721, + "grad_norm": 0.46661704778671265, + "learning_rate": 4.869388448290574e-06, + "loss": 0.5228, + "step": 6882 + }, + { + "epoch": 0.6283549388351287, + "grad_norm": 0.4741383492946625, + "learning_rate": 4.86935026739921e-06, + "loss": 0.5462, + "step": 6883 + }, + { + "epoch": 0.6284462296877853, + "grad_norm": 0.4574805200099945, + "learning_rate": 4.8693120810777985e-06, + "loss": 0.5652, + "step": 6884 + }, + { + "epoch": 0.6285375205404419, + "grad_norm": 0.4897408187389374, + "learning_rate": 4.869273889326428e-06, + "loss": 0.5458, + "step": 6885 + }, + { + "epoch": 0.6286288113930985, + "grad_norm": 0.4664868414402008, + "learning_rate": 4.869235692145185e-06, + "loss": 0.583, + "step": 6886 + }, + { + "epoch": 0.628720102245755, + "grad_norm": 0.42029884457588196, + "learning_rate": 4.869197489534157e-06, + "loss": 0.6233, + "step": 6887 + }, + { + "epoch": 0.6288113930984115, + "grad_norm": 0.46478205919265747, + "learning_rate": 4.869159281493432e-06, + "loss": 0.5745, + "step": 6888 + }, + { + "epoch": 0.6289026839510681, + "grad_norm": 0.4756196439266205, + "learning_rate": 4.869121068023097e-06, + "loss": 0.5497, + "step": 6889 + }, + { + "epoch": 0.6289939748037247, + "grad_norm": 0.46128344535827637, + "learning_rate": 4.86908284912324e-06, + "loss": 0.5788, + "step": 6890 + }, + { + "epoch": 0.6290852656563812, + "grad_norm": 0.4700942039489746, + "learning_rate": 4.869044624793949e-06, + "loss": 0.601, + "step": 6891 + }, + { + "epoch": 0.6291765565090378, + "grad_norm": 0.47789236903190613, + "learning_rate": 4.869006395035311e-06, + "loss": 0.5304, + "step": 6892 + }, + { + "epoch": 0.6292678473616944, + "grad_norm": 0.503341555595398, + "learning_rate": 4.868968159847413e-06, + "loss": 0.5734, + "step": 6893 + }, + { + "epoch": 0.629359138214351, + "grad_norm": 0.46819308400154114, + "learning_rate": 4.868929919230344e-06, + "loss": 0.6069, + "step": 6894 + }, + { + "epoch": 0.6294504290670074, + "grad_norm": 0.4430152177810669, + "learning_rate": 4.8688916731841905e-06, + "loss": 0.574, + "step": 6895 + }, + { + "epoch": 0.629541719919664, + "grad_norm": 0.4904649257659912, + "learning_rate": 4.8688534217090415e-06, + "loss": 0.6013, + "step": 6896 + }, + { + "epoch": 0.6296330107723206, + "grad_norm": 0.4796767234802246, + "learning_rate": 4.868815164804983e-06, + "loss": 0.5195, + "step": 6897 + }, + { + "epoch": 0.6297243016249772, + "grad_norm": 0.4502108693122864, + "learning_rate": 4.868776902472104e-06, + "loss": 0.5817, + "step": 6898 + }, + { + "epoch": 0.6298155924776337, + "grad_norm": 0.48737093806266785, + "learning_rate": 4.868738634710491e-06, + "loss": 0.592, + "step": 6899 + }, + { + "epoch": 0.6299068833302903, + "grad_norm": 0.4905972480773926, + "learning_rate": 4.868700361520234e-06, + "loss": 0.5173, + "step": 6900 + }, + { + "epoch": 0.6299981741829469, + "grad_norm": 0.4769274890422821, + "learning_rate": 4.8686620829014175e-06, + "loss": 0.5651, + "step": 6901 + }, + { + "epoch": 0.6300894650356035, + "grad_norm": 0.5017004013061523, + "learning_rate": 4.8686237988541315e-06, + "loss": 0.4905, + "step": 6902 + }, + { + "epoch": 0.63018075588826, + "grad_norm": 0.48812243342399597, + "learning_rate": 4.868585509378463e-06, + "loss": 0.5717, + "step": 6903 + }, + { + "epoch": 0.6302720467409165, + "grad_norm": 0.4746856391429901, + "learning_rate": 4.8685472144745e-06, + "loss": 0.5066, + "step": 6904 + }, + { + "epoch": 0.6303633375935731, + "grad_norm": 0.5034896731376648, + "learning_rate": 4.8685089141423295e-06, + "loss": 0.5364, + "step": 6905 + }, + { + "epoch": 0.6304546284462297, + "grad_norm": 0.4957568347454071, + "learning_rate": 4.86847060838204e-06, + "loss": 0.5405, + "step": 6906 + }, + { + "epoch": 0.6305459192988863, + "grad_norm": 0.5095025300979614, + "learning_rate": 4.868432297193719e-06, + "loss": 0.5384, + "step": 6907 + }, + { + "epoch": 0.6306372101515428, + "grad_norm": 0.4737282395362854, + "learning_rate": 4.868393980577455e-06, + "loss": 0.5471, + "step": 6908 + }, + { + "epoch": 0.6307285010041994, + "grad_norm": 0.4682391285896301, + "learning_rate": 4.868355658533335e-06, + "loss": 0.5852, + "step": 6909 + }, + { + "epoch": 0.630819791856856, + "grad_norm": 0.49427366256713867, + "learning_rate": 4.868317331061447e-06, + "loss": 0.5588, + "step": 6910 + }, + { + "epoch": 0.6309110827095125, + "grad_norm": 0.5181918144226074, + "learning_rate": 4.868278998161879e-06, + "loss": 0.5211, + "step": 6911 + }, + { + "epoch": 0.631002373562169, + "grad_norm": 0.4796361029148102, + "learning_rate": 4.868240659834718e-06, + "loss": 0.5623, + "step": 6912 + }, + { + "epoch": 0.6310936644148256, + "grad_norm": 0.47504979372024536, + "learning_rate": 4.868202316080054e-06, + "loss": 0.5509, + "step": 6913 + }, + { + "epoch": 0.6311849552674822, + "grad_norm": 0.4888305366039276, + "learning_rate": 4.868163966897973e-06, + "loss": 0.5556, + "step": 6914 + }, + { + "epoch": 0.6312762461201388, + "grad_norm": 0.48096439242362976, + "learning_rate": 4.868125612288563e-06, + "loss": 0.549, + "step": 6915 + }, + { + "epoch": 0.6313675369727954, + "grad_norm": 0.48175519704818726, + "learning_rate": 4.868087252251913e-06, + "loss": 0.5826, + "step": 6916 + }, + { + "epoch": 0.6314588278254519, + "grad_norm": 0.4804011285305023, + "learning_rate": 4.8680488867881094e-06, + "loss": 0.5755, + "step": 6917 + }, + { + "epoch": 0.6315501186781085, + "grad_norm": 0.48141223192214966, + "learning_rate": 4.868010515897242e-06, + "loss": 0.536, + "step": 6918 + }, + { + "epoch": 0.631641409530765, + "grad_norm": 0.47179386019706726, + "learning_rate": 4.8679721395793964e-06, + "loss": 0.5372, + "step": 6919 + }, + { + "epoch": 0.6317327003834216, + "grad_norm": 0.4471628963947296, + "learning_rate": 4.867933757834663e-06, + "loss": 0.573, + "step": 6920 + }, + { + "epoch": 0.6318239912360781, + "grad_norm": 0.48807066679000854, + "learning_rate": 4.867895370663127e-06, + "loss": 0.5546, + "step": 6921 + }, + { + "epoch": 0.6319152820887347, + "grad_norm": 0.4657193124294281, + "learning_rate": 4.86785697806488e-06, + "loss": 0.5707, + "step": 6922 + }, + { + "epoch": 0.6320065729413913, + "grad_norm": 0.44942954182624817, + "learning_rate": 4.8678185800400065e-06, + "loss": 0.5718, + "step": 6923 + }, + { + "epoch": 0.6320978637940479, + "grad_norm": 0.4718988835811615, + "learning_rate": 4.867780176588596e-06, + "loss": 0.5401, + "step": 6924 + }, + { + "epoch": 0.6321891546467044, + "grad_norm": 0.4852621257305145, + "learning_rate": 4.867741767710737e-06, + "loss": 0.5293, + "step": 6925 + }, + { + "epoch": 0.6322804454993609, + "grad_norm": 0.4842994511127472, + "learning_rate": 4.867703353406517e-06, + "loss": 0.5672, + "step": 6926 + }, + { + "epoch": 0.6323717363520175, + "grad_norm": 0.4699617326259613, + "learning_rate": 4.867664933676024e-06, + "loss": 0.5485, + "step": 6927 + }, + { + "epoch": 0.6324630272046741, + "grad_norm": 0.48133841156959534, + "learning_rate": 4.867626508519346e-06, + "loss": 0.5645, + "step": 6928 + }, + { + "epoch": 0.6325543180573306, + "grad_norm": 0.49133872985839844, + "learning_rate": 4.8675880779365716e-06, + "loss": 0.5281, + "step": 6929 + }, + { + "epoch": 0.6326456089099872, + "grad_norm": 0.4915831685066223, + "learning_rate": 4.867549641927788e-06, + "loss": 0.5536, + "step": 6930 + }, + { + "epoch": 0.6327368997626438, + "grad_norm": 0.4706111550331116, + "learning_rate": 4.867511200493083e-06, + "loss": 0.5597, + "step": 6931 + }, + { + "epoch": 0.6328281906153004, + "grad_norm": 0.4814959168434143, + "learning_rate": 4.867472753632546e-06, + "loss": 0.5621, + "step": 6932 + }, + { + "epoch": 0.632919481467957, + "grad_norm": 0.46304836869239807, + "learning_rate": 4.8674343013462645e-06, + "loss": 0.5578, + "step": 6933 + }, + { + "epoch": 0.6330107723206134, + "grad_norm": 0.4548390209674835, + "learning_rate": 4.8673958436343265e-06, + "loss": 0.5654, + "step": 6934 + }, + { + "epoch": 0.63310206317327, + "grad_norm": 0.4931443929672241, + "learning_rate": 4.8673573804968206e-06, + "loss": 0.5216, + "step": 6935 + }, + { + "epoch": 0.6331933540259266, + "grad_norm": 0.4796827435493469, + "learning_rate": 4.867318911933835e-06, + "loss": 0.541, + "step": 6936 + }, + { + "epoch": 0.6332846448785832, + "grad_norm": 0.46182936429977417, + "learning_rate": 4.8672804379454565e-06, + "loss": 0.6159, + "step": 6937 + }, + { + "epoch": 0.6333759357312397, + "grad_norm": 0.467631995677948, + "learning_rate": 4.867241958531775e-06, + "loss": 0.591, + "step": 6938 + }, + { + "epoch": 0.6334672265838963, + "grad_norm": 0.491238534450531, + "learning_rate": 4.867203473692877e-06, + "loss": 0.5288, + "step": 6939 + }, + { + "epoch": 0.6335585174365529, + "grad_norm": 0.49552804231643677, + "learning_rate": 4.867164983428852e-06, + "loss": 0.6014, + "step": 6940 + }, + { + "epoch": 0.6336498082892095, + "grad_norm": 0.46457505226135254, + "learning_rate": 4.867126487739789e-06, + "loss": 0.5654, + "step": 6941 + }, + { + "epoch": 0.6337410991418659, + "grad_norm": 0.46944519877433777, + "learning_rate": 4.867087986625773e-06, + "loss": 0.5775, + "step": 6942 + }, + { + "epoch": 0.6338323899945225, + "grad_norm": 0.4367902874946594, + "learning_rate": 4.867049480086896e-06, + "loss": 0.609, + "step": 6943 + }, + { + "epoch": 0.6339236808471791, + "grad_norm": 0.4667724668979645, + "learning_rate": 4.867010968123244e-06, + "loss": 0.572, + "step": 6944 + }, + { + "epoch": 0.6340149716998357, + "grad_norm": 0.4801442325115204, + "learning_rate": 4.866972450734907e-06, + "loss": 0.5662, + "step": 6945 + }, + { + "epoch": 0.6341062625524923, + "grad_norm": 0.46184635162353516, + "learning_rate": 4.866933927921971e-06, + "loss": 0.5432, + "step": 6946 + }, + { + "epoch": 0.6341975534051488, + "grad_norm": 0.4677529036998749, + "learning_rate": 4.866895399684525e-06, + "loss": 0.5837, + "step": 6947 + }, + { + "epoch": 0.6342888442578054, + "grad_norm": 0.5006551146507263, + "learning_rate": 4.866856866022658e-06, + "loss": 0.5776, + "step": 6948 + }, + { + "epoch": 0.634380135110462, + "grad_norm": 0.5281372666358948, + "learning_rate": 4.866818326936458e-06, + "loss": 0.5081, + "step": 6949 + }, + { + "epoch": 0.6344714259631185, + "grad_norm": 0.4883608818054199, + "learning_rate": 4.866779782426014e-06, + "loss": 0.5929, + "step": 6950 + }, + { + "epoch": 0.634562716815775, + "grad_norm": 0.44630369544029236, + "learning_rate": 4.866741232491413e-06, + "loss": 0.607, + "step": 6951 + }, + { + "epoch": 0.6346540076684316, + "grad_norm": 0.4750424325466156, + "learning_rate": 4.866702677132744e-06, + "loss": 0.5835, + "step": 6952 + }, + { + "epoch": 0.6347452985210882, + "grad_norm": 0.48264655470848083, + "learning_rate": 4.866664116350096e-06, + "loss": 0.5965, + "step": 6953 + }, + { + "epoch": 0.6348365893737448, + "grad_norm": 0.5142357349395752, + "learning_rate": 4.866625550143556e-06, + "loss": 0.5803, + "step": 6954 + }, + { + "epoch": 0.6349278802264013, + "grad_norm": 0.48954251408576965, + "learning_rate": 4.866586978513213e-06, + "loss": 0.5331, + "step": 6955 + }, + { + "epoch": 0.6350191710790579, + "grad_norm": 0.4623229503631592, + "learning_rate": 4.866548401459156e-06, + "loss": 0.602, + "step": 6956 + }, + { + "epoch": 0.6351104619317144, + "grad_norm": 0.47418931126594543, + "learning_rate": 4.866509818981473e-06, + "loss": 0.5671, + "step": 6957 + }, + { + "epoch": 0.635201752784371, + "grad_norm": 0.4765178859233856, + "learning_rate": 4.866471231080252e-06, + "loss": 0.5559, + "step": 6958 + }, + { + "epoch": 0.6352930436370275, + "grad_norm": 0.4940134286880493, + "learning_rate": 4.866432637755582e-06, + "loss": 0.524, + "step": 6959 + }, + { + "epoch": 0.6353843344896841, + "grad_norm": 0.4492010474205017, + "learning_rate": 4.866394039007551e-06, + "loss": 0.5731, + "step": 6960 + }, + { + "epoch": 0.6354756253423407, + "grad_norm": 0.5066560506820679, + "learning_rate": 4.866355434836247e-06, + "loss": 0.5091, + "step": 6961 + }, + { + "epoch": 0.6355669161949973, + "grad_norm": 0.47546520829200745, + "learning_rate": 4.866316825241761e-06, + "loss": 0.5391, + "step": 6962 + }, + { + "epoch": 0.6356582070476539, + "grad_norm": 0.5029250979423523, + "learning_rate": 4.866278210224178e-06, + "loss": 0.5428, + "step": 6963 + }, + { + "epoch": 0.6357494979003104, + "grad_norm": 0.49665939807891846, + "learning_rate": 4.866239589783589e-06, + "loss": 0.5503, + "step": 6964 + }, + { + "epoch": 0.6358407887529669, + "grad_norm": 0.5135893821716309, + "learning_rate": 4.866200963920082e-06, + "loss": 0.509, + "step": 6965 + }, + { + "epoch": 0.6359320796056235, + "grad_norm": 0.4982486069202423, + "learning_rate": 4.866162332633745e-06, + "loss": 0.5718, + "step": 6966 + }, + { + "epoch": 0.6360233704582801, + "grad_norm": 0.48050379753112793, + "learning_rate": 4.866123695924666e-06, + "loss": 0.5946, + "step": 6967 + }, + { + "epoch": 0.6361146613109366, + "grad_norm": 0.5253972411155701, + "learning_rate": 4.866085053792935e-06, + "loss": 0.5486, + "step": 6968 + }, + { + "epoch": 0.6362059521635932, + "grad_norm": 0.4842381179332733, + "learning_rate": 4.866046406238639e-06, + "loss": 0.561, + "step": 6969 + }, + { + "epoch": 0.6362972430162498, + "grad_norm": 0.4771561622619629, + "learning_rate": 4.866007753261868e-06, + "loss": 0.5998, + "step": 6970 + }, + { + "epoch": 0.6363885338689064, + "grad_norm": 0.47672176361083984, + "learning_rate": 4.86596909486271e-06, + "loss": 0.5583, + "step": 6971 + }, + { + "epoch": 0.636479824721563, + "grad_norm": 0.4708032011985779, + "learning_rate": 4.865930431041253e-06, + "loss": 0.5304, + "step": 6972 + }, + { + "epoch": 0.6365711155742194, + "grad_norm": 0.4919012486934662, + "learning_rate": 4.865891761797587e-06, + "loss": 0.573, + "step": 6973 + }, + { + "epoch": 0.636662406426876, + "grad_norm": 0.47533199191093445, + "learning_rate": 4.8658530871318e-06, + "loss": 0.5401, + "step": 6974 + }, + { + "epoch": 0.6367536972795326, + "grad_norm": 0.49873265624046326, + "learning_rate": 4.865814407043979e-06, + "loss": 0.5685, + "step": 6975 + }, + { + "epoch": 0.6368449881321891, + "grad_norm": 0.5192239284515381, + "learning_rate": 4.865775721534216e-06, + "loss": 0.543, + "step": 6976 + }, + { + "epoch": 0.6369362789848457, + "grad_norm": 0.4921743869781494, + "learning_rate": 4.865737030602597e-06, + "loss": 0.555, + "step": 6977 + }, + { + "epoch": 0.6370275698375023, + "grad_norm": 0.4668711721897125, + "learning_rate": 4.865698334249211e-06, + "loss": 0.5756, + "step": 6978 + }, + { + "epoch": 0.6371188606901589, + "grad_norm": 0.48408249020576477, + "learning_rate": 4.865659632474147e-06, + "loss": 0.5142, + "step": 6979 + }, + { + "epoch": 0.6372101515428155, + "grad_norm": 0.4745343029499054, + "learning_rate": 4.865620925277494e-06, + "loss": 0.5648, + "step": 6980 + }, + { + "epoch": 0.6373014423954719, + "grad_norm": 0.5018688440322876, + "learning_rate": 4.86558221265934e-06, + "loss": 0.5168, + "step": 6981 + }, + { + "epoch": 0.6373927332481285, + "grad_norm": 0.45551785826683044, + "learning_rate": 4.8655434946197755e-06, + "loss": 0.5655, + "step": 6982 + }, + { + "epoch": 0.6374840241007851, + "grad_norm": 0.48265546560287476, + "learning_rate": 4.865504771158886e-06, + "loss": 0.587, + "step": 6983 + }, + { + "epoch": 0.6375753149534417, + "grad_norm": 0.5142802596092224, + "learning_rate": 4.865466042276764e-06, + "loss": 0.5322, + "step": 6984 + }, + { + "epoch": 0.6376666058060982, + "grad_norm": 0.5058342814445496, + "learning_rate": 4.865427307973497e-06, + "loss": 0.5547, + "step": 6985 + }, + { + "epoch": 0.6377578966587548, + "grad_norm": 0.46384134888648987, + "learning_rate": 4.865388568249172e-06, + "loss": 0.565, + "step": 6986 + }, + { + "epoch": 0.6378491875114114, + "grad_norm": 0.48899731040000916, + "learning_rate": 4.865349823103879e-06, + "loss": 0.5721, + "step": 6987 + }, + { + "epoch": 0.637940478364068, + "grad_norm": 0.5037791728973389, + "learning_rate": 4.865311072537708e-06, + "loss": 0.568, + "step": 6988 + }, + { + "epoch": 0.6380317692167244, + "grad_norm": 0.4998443126678467, + "learning_rate": 4.865272316550744e-06, + "loss": 0.5403, + "step": 6989 + }, + { + "epoch": 0.638123060069381, + "grad_norm": 0.48621243238449097, + "learning_rate": 4.865233555143081e-06, + "loss": 0.5668, + "step": 6990 + }, + { + "epoch": 0.6382143509220376, + "grad_norm": 0.4595537781715393, + "learning_rate": 4.865194788314805e-06, + "loss": 0.5926, + "step": 6991 + }, + { + "epoch": 0.6383056417746942, + "grad_norm": 0.5018857717514038, + "learning_rate": 4.8651560160660035e-06, + "loss": 0.5947, + "step": 6992 + }, + { + "epoch": 0.6383969326273508, + "grad_norm": 0.4915996193885803, + "learning_rate": 4.865117238396768e-06, + "loss": 0.5342, + "step": 6993 + }, + { + "epoch": 0.6384882234800073, + "grad_norm": 0.48663175106048584, + "learning_rate": 4.865078455307186e-06, + "loss": 0.5697, + "step": 6994 + }, + { + "epoch": 0.6385795143326639, + "grad_norm": 0.5148661136627197, + "learning_rate": 4.865039666797347e-06, + "loss": 0.516, + "step": 6995 + }, + { + "epoch": 0.6386708051853204, + "grad_norm": 0.4845469892024994, + "learning_rate": 4.86500087286734e-06, + "loss": 0.5908, + "step": 6996 + }, + { + "epoch": 0.638762096037977, + "grad_norm": 0.5054563879966736, + "learning_rate": 4.864962073517253e-06, + "loss": 0.5355, + "step": 6997 + }, + { + "epoch": 0.6388533868906335, + "grad_norm": 0.4722919464111328, + "learning_rate": 4.864923268747176e-06, + "loss": 0.5859, + "step": 6998 + }, + { + "epoch": 0.6389446777432901, + "grad_norm": 0.4796196222305298, + "learning_rate": 4.864884458557198e-06, + "loss": 0.594, + "step": 6999 + }, + { + "epoch": 0.6390359685959467, + "grad_norm": 0.5009047985076904, + "learning_rate": 4.864845642947405e-06, + "loss": 0.5252, + "step": 7000 + }, + { + "epoch": 0.6391272594486033, + "grad_norm": 0.49181586503982544, + "learning_rate": 4.86480682191789e-06, + "loss": 0.5716, + "step": 7001 + }, + { + "epoch": 0.6392185503012598, + "grad_norm": 0.4741979241371155, + "learning_rate": 4.864767995468741e-06, + "loss": 0.5478, + "step": 7002 + }, + { + "epoch": 0.6393098411539164, + "grad_norm": 0.49167874455451965, + "learning_rate": 4.864729163600045e-06, + "loss": 0.5602, + "step": 7003 + }, + { + "epoch": 0.6394011320065729, + "grad_norm": 0.4834587872028351, + "learning_rate": 4.864690326311893e-06, + "loss": 0.574, + "step": 7004 + }, + { + "epoch": 0.6394924228592295, + "grad_norm": 0.4738582670688629, + "learning_rate": 4.864651483604372e-06, + "loss": 0.6003, + "step": 7005 + }, + { + "epoch": 0.639583713711886, + "grad_norm": 0.4697592556476593, + "learning_rate": 4.8646126354775734e-06, + "loss": 0.5921, + "step": 7006 + }, + { + "epoch": 0.6396750045645426, + "grad_norm": 0.4961363971233368, + "learning_rate": 4.864573781931584e-06, + "loss": 0.5564, + "step": 7007 + }, + { + "epoch": 0.6397662954171992, + "grad_norm": 0.4693470597267151, + "learning_rate": 4.864534922966495e-06, + "loss": 0.5429, + "step": 7008 + }, + { + "epoch": 0.6398575862698558, + "grad_norm": 0.47899678349494934, + "learning_rate": 4.864496058582394e-06, + "loss": 0.5828, + "step": 7009 + }, + { + "epoch": 0.6399488771225124, + "grad_norm": 0.5104174017906189, + "learning_rate": 4.864457188779371e-06, + "loss": 0.5372, + "step": 7010 + }, + { + "epoch": 0.6400401679751689, + "grad_norm": 0.44969645142555237, + "learning_rate": 4.864418313557514e-06, + "loss": 0.5346, + "step": 7011 + }, + { + "epoch": 0.6401314588278254, + "grad_norm": 0.461300790309906, + "learning_rate": 4.864379432916913e-06, + "loss": 0.571, + "step": 7012 + }, + { + "epoch": 0.640222749680482, + "grad_norm": 0.4992125332355499, + "learning_rate": 4.864340546857655e-06, + "loss": 0.5409, + "step": 7013 + }, + { + "epoch": 0.6403140405331386, + "grad_norm": 0.46509861946105957, + "learning_rate": 4.864301655379832e-06, + "loss": 0.531, + "step": 7014 + }, + { + "epoch": 0.6404053313857951, + "grad_norm": 0.4667862355709076, + "learning_rate": 4.864262758483533e-06, + "loss": 0.5934, + "step": 7015 + }, + { + "epoch": 0.6404966222384517, + "grad_norm": 0.47297972440719604, + "learning_rate": 4.864223856168846e-06, + "loss": 0.5617, + "step": 7016 + }, + { + "epoch": 0.6405879130911083, + "grad_norm": 0.49526268243789673, + "learning_rate": 4.864184948435859e-06, + "loss": 0.5449, + "step": 7017 + }, + { + "epoch": 0.6406792039437649, + "grad_norm": 0.476052850484848, + "learning_rate": 4.864146035284664e-06, + "loss": 0.5855, + "step": 7018 + }, + { + "epoch": 0.6407704947964215, + "grad_norm": 0.4677605926990509, + "learning_rate": 4.864107116715348e-06, + "loss": 0.5383, + "step": 7019 + }, + { + "epoch": 0.6408617856490779, + "grad_norm": 0.4609716832637787, + "learning_rate": 4.864068192728001e-06, + "loss": 0.5594, + "step": 7020 + }, + { + "epoch": 0.6409530765017345, + "grad_norm": 0.474293977022171, + "learning_rate": 4.864029263322711e-06, + "loss": 0.6097, + "step": 7021 + }, + { + "epoch": 0.6410443673543911, + "grad_norm": 0.5020062923431396, + "learning_rate": 4.863990328499569e-06, + "loss": 0.5318, + "step": 7022 + }, + { + "epoch": 0.6411356582070477, + "grad_norm": 0.5006397366523743, + "learning_rate": 4.863951388258663e-06, + "loss": 0.5367, + "step": 7023 + }, + { + "epoch": 0.6412269490597042, + "grad_norm": 0.4524247348308563, + "learning_rate": 4.863912442600084e-06, + "loss": 0.6085, + "step": 7024 + }, + { + "epoch": 0.6413182399123608, + "grad_norm": 0.49144449830055237, + "learning_rate": 4.8638734915239185e-06, + "loss": 0.5757, + "step": 7025 + }, + { + "epoch": 0.6414095307650174, + "grad_norm": 0.45420485734939575, + "learning_rate": 4.863834535030258e-06, + "loss": 0.5535, + "step": 7026 + }, + { + "epoch": 0.6415008216176739, + "grad_norm": 0.48167258501052856, + "learning_rate": 4.863795573119191e-06, + "loss": 0.5748, + "step": 7027 + }, + { + "epoch": 0.6415921124703304, + "grad_norm": 0.4665902256965637, + "learning_rate": 4.863756605790807e-06, + "loss": 0.548, + "step": 7028 + }, + { + "epoch": 0.641683403322987, + "grad_norm": 0.4943673014640808, + "learning_rate": 4.863717633045196e-06, + "loss": 0.5778, + "step": 7029 + }, + { + "epoch": 0.6417746941756436, + "grad_norm": 0.47630271315574646, + "learning_rate": 4.863678654882444e-06, + "loss": 0.5224, + "step": 7030 + }, + { + "epoch": 0.6418659850283002, + "grad_norm": 0.48608219623565674, + "learning_rate": 4.863639671302645e-06, + "loss": 0.5409, + "step": 7031 + }, + { + "epoch": 0.6419572758809567, + "grad_norm": 0.5115810632705688, + "learning_rate": 4.863600682305885e-06, + "loss": 0.5382, + "step": 7032 + }, + { + "epoch": 0.6420485667336133, + "grad_norm": 0.47453412413597107, + "learning_rate": 4.863561687892255e-06, + "loss": 0.5651, + "step": 7033 + }, + { + "epoch": 0.6421398575862699, + "grad_norm": 0.4928090274333954, + "learning_rate": 4.863522688061844e-06, + "loss": 0.5524, + "step": 7034 + }, + { + "epoch": 0.6422311484389264, + "grad_norm": 0.49483200907707214, + "learning_rate": 4.863483682814741e-06, + "loss": 0.5659, + "step": 7035 + }, + { + "epoch": 0.642322439291583, + "grad_norm": 0.4834745526313782, + "learning_rate": 4.863444672151035e-06, + "loss": 0.5217, + "step": 7036 + }, + { + "epoch": 0.6424137301442395, + "grad_norm": 0.4966868460178375, + "learning_rate": 4.863405656070816e-06, + "loss": 0.5251, + "step": 7037 + }, + { + "epoch": 0.6425050209968961, + "grad_norm": 0.49362772703170776, + "learning_rate": 4.863366634574175e-06, + "loss": 0.5443, + "step": 7038 + }, + { + "epoch": 0.6425963118495527, + "grad_norm": 0.4965590834617615, + "learning_rate": 4.863327607661198e-06, + "loss": 0.5572, + "step": 7039 + }, + { + "epoch": 0.6426876027022093, + "grad_norm": 0.4994772672653198, + "learning_rate": 4.863288575331977e-06, + "loss": 0.5715, + "step": 7040 + }, + { + "epoch": 0.6427788935548658, + "grad_norm": 0.46661585569381714, + "learning_rate": 4.863249537586601e-06, + "loss": 0.5598, + "step": 7041 + }, + { + "epoch": 0.6428701844075224, + "grad_norm": 0.48692572116851807, + "learning_rate": 4.863210494425159e-06, + "loss": 0.5884, + "step": 7042 + }, + { + "epoch": 0.6429614752601789, + "grad_norm": 0.4793577492237091, + "learning_rate": 4.863171445847741e-06, + "loss": 0.5891, + "step": 7043 + }, + { + "epoch": 0.6430527661128355, + "grad_norm": 0.45973846316337585, + "learning_rate": 4.863132391854436e-06, + "loss": 0.6043, + "step": 7044 + }, + { + "epoch": 0.643144056965492, + "grad_norm": 0.5355404019355774, + "learning_rate": 4.8630933324453335e-06, + "loss": 0.5585, + "step": 7045 + }, + { + "epoch": 0.6432353478181486, + "grad_norm": 0.4608287513256073, + "learning_rate": 4.863054267620524e-06, + "loss": 0.5259, + "step": 7046 + }, + { + "epoch": 0.6433266386708052, + "grad_norm": 0.4698329567909241, + "learning_rate": 4.863015197380096e-06, + "loss": 0.5571, + "step": 7047 + }, + { + "epoch": 0.6434179295234618, + "grad_norm": 0.48829951882362366, + "learning_rate": 4.862976121724138e-06, + "loss": 0.5423, + "step": 7048 + }, + { + "epoch": 0.6435092203761184, + "grad_norm": 0.4767964482307434, + "learning_rate": 4.862937040652742e-06, + "loss": 0.5804, + "step": 7049 + }, + { + "epoch": 0.6436005112287749, + "grad_norm": 0.48586952686309814, + "learning_rate": 4.862897954165996e-06, + "loss": 0.5666, + "step": 7050 + }, + { + "epoch": 0.6436918020814314, + "grad_norm": 0.47358763217926025, + "learning_rate": 4.86285886226399e-06, + "loss": 0.582, + "step": 7051 + }, + { + "epoch": 0.643783092934088, + "grad_norm": 0.4813621938228607, + "learning_rate": 4.862819764946814e-06, + "loss": 0.5213, + "step": 7052 + }, + { + "epoch": 0.6438743837867446, + "grad_norm": 0.5047279000282288, + "learning_rate": 4.862780662214557e-06, + "loss": 0.5321, + "step": 7053 + }, + { + "epoch": 0.6439656746394011, + "grad_norm": 0.4845025837421417, + "learning_rate": 4.8627415540673084e-06, + "loss": 0.5895, + "step": 7054 + }, + { + "epoch": 0.6440569654920577, + "grad_norm": 0.5393257141113281, + "learning_rate": 4.862702440505159e-06, + "loss": 0.5734, + "step": 7055 + }, + { + "epoch": 0.6441482563447143, + "grad_norm": 0.4931747019290924, + "learning_rate": 4.862663321528197e-06, + "loss": 0.5723, + "step": 7056 + }, + { + "epoch": 0.6442395471973709, + "grad_norm": 0.4680810868740082, + "learning_rate": 4.862624197136513e-06, + "loss": 0.5893, + "step": 7057 + }, + { + "epoch": 0.6443308380500273, + "grad_norm": 0.478000670671463, + "learning_rate": 4.862585067330196e-06, + "loss": 0.5445, + "step": 7058 + }, + { + "epoch": 0.6444221289026839, + "grad_norm": 0.45748037099838257, + "learning_rate": 4.862545932109336e-06, + "loss": 0.5602, + "step": 7059 + }, + { + "epoch": 0.6445134197553405, + "grad_norm": 0.4726163148880005, + "learning_rate": 4.8625067914740236e-06, + "loss": 0.5647, + "step": 7060 + }, + { + "epoch": 0.6446047106079971, + "grad_norm": 0.46038323640823364, + "learning_rate": 4.862467645424347e-06, + "loss": 0.5948, + "step": 7061 + }, + { + "epoch": 0.6446960014606536, + "grad_norm": 0.4862045645713806, + "learning_rate": 4.862428493960397e-06, + "loss": 0.5658, + "step": 7062 + }, + { + "epoch": 0.6447872923133102, + "grad_norm": 0.4877092242240906, + "learning_rate": 4.862389337082262e-06, + "loss": 0.5691, + "step": 7063 + }, + { + "epoch": 0.6448785831659668, + "grad_norm": 0.46891269087791443, + "learning_rate": 4.862350174790034e-06, + "loss": 0.5793, + "step": 7064 + }, + { + "epoch": 0.6449698740186234, + "grad_norm": 0.48404431343078613, + "learning_rate": 4.8623110070838e-06, + "loss": 0.5765, + "step": 7065 + }, + { + "epoch": 0.6450611648712798, + "grad_norm": 0.4830872416496277, + "learning_rate": 4.862271833963651e-06, + "loss": 0.5711, + "step": 7066 + }, + { + "epoch": 0.6451524557239364, + "grad_norm": 0.4434204697608948, + "learning_rate": 4.862232655429678e-06, + "loss": 0.5389, + "step": 7067 + }, + { + "epoch": 0.645243746576593, + "grad_norm": 0.4916917681694031, + "learning_rate": 4.86219347148197e-06, + "loss": 0.54, + "step": 7068 + }, + { + "epoch": 0.6453350374292496, + "grad_norm": 0.4579000174999237, + "learning_rate": 4.8621542821206156e-06, + "loss": 0.5738, + "step": 7069 + }, + { + "epoch": 0.6454263282819062, + "grad_norm": 0.5112605094909668, + "learning_rate": 4.862115087345706e-06, + "loss": 0.5108, + "step": 7070 + }, + { + "epoch": 0.6455176191345627, + "grad_norm": 0.4885942041873932, + "learning_rate": 4.8620758871573295e-06, + "loss": 0.5343, + "step": 7071 + }, + { + "epoch": 0.6456089099872193, + "grad_norm": 0.4681268632411957, + "learning_rate": 4.862036681555577e-06, + "loss": 0.5762, + "step": 7072 + }, + { + "epoch": 0.6457002008398759, + "grad_norm": 0.4841708838939667, + "learning_rate": 4.86199747054054e-06, + "loss": 0.5512, + "step": 7073 + }, + { + "epoch": 0.6457914916925324, + "grad_norm": 0.47792258858680725, + "learning_rate": 4.861958254112305e-06, + "loss": 0.5858, + "step": 7074 + }, + { + "epoch": 0.6458827825451889, + "grad_norm": 0.49698325991630554, + "learning_rate": 4.861919032270965e-06, + "loss": 0.5534, + "step": 7075 + }, + { + "epoch": 0.6459740733978455, + "grad_norm": 0.46646907925605774, + "learning_rate": 4.861879805016608e-06, + "loss": 0.5792, + "step": 7076 + }, + { + "epoch": 0.6460653642505021, + "grad_norm": 0.5015003085136414, + "learning_rate": 4.861840572349325e-06, + "loss": 0.5573, + "step": 7077 + }, + { + "epoch": 0.6461566551031587, + "grad_norm": 0.48704519867897034, + "learning_rate": 4.861801334269204e-06, + "loss": 0.5425, + "step": 7078 + }, + { + "epoch": 0.6462479459558153, + "grad_norm": 0.4622795283794403, + "learning_rate": 4.861762090776336e-06, + "loss": 0.5988, + "step": 7079 + }, + { + "epoch": 0.6463392368084718, + "grad_norm": 0.4888950288295746, + "learning_rate": 4.861722841870811e-06, + "loss": 0.5336, + "step": 7080 + }, + { + "epoch": 0.6464305276611284, + "grad_norm": 0.45387011766433716, + "learning_rate": 4.8616835875527214e-06, + "loss": 0.5172, + "step": 7081 + }, + { + "epoch": 0.6465218185137849, + "grad_norm": 0.4789862036705017, + "learning_rate": 4.861644327822153e-06, + "loss": 0.5308, + "step": 7082 + }, + { + "epoch": 0.6466131093664415, + "grad_norm": 0.541013777256012, + "learning_rate": 4.861605062679198e-06, + "loss": 0.5328, + "step": 7083 + }, + { + "epoch": 0.646704400219098, + "grad_norm": 0.4703809320926666, + "learning_rate": 4.861565792123945e-06, + "loss": 0.5612, + "step": 7084 + }, + { + "epoch": 0.6467956910717546, + "grad_norm": 0.5052406787872314, + "learning_rate": 4.861526516156487e-06, + "loss": 0.5491, + "step": 7085 + }, + { + "epoch": 0.6468869819244112, + "grad_norm": 0.4887964427471161, + "learning_rate": 4.861487234776911e-06, + "loss": 0.5082, + "step": 7086 + }, + { + "epoch": 0.6469782727770678, + "grad_norm": 0.46693921089172363, + "learning_rate": 4.861447947985309e-06, + "loss": 0.537, + "step": 7087 + }, + { + "epoch": 0.6470695636297243, + "grad_norm": 0.47602561116218567, + "learning_rate": 4.861408655781768e-06, + "loss": 0.5793, + "step": 7088 + }, + { + "epoch": 0.6471608544823809, + "grad_norm": 0.48010677099227905, + "learning_rate": 4.861369358166382e-06, + "loss": 0.5289, + "step": 7089 + }, + { + "epoch": 0.6472521453350374, + "grad_norm": 0.4572601318359375, + "learning_rate": 4.861330055139238e-06, + "loss": 0.6123, + "step": 7090 + }, + { + "epoch": 0.647343436187694, + "grad_norm": 0.4742497205734253, + "learning_rate": 4.861290746700428e-06, + "loss": 0.578, + "step": 7091 + }, + { + "epoch": 0.6474347270403505, + "grad_norm": 0.4582093358039856, + "learning_rate": 4.861251432850041e-06, + "loss": 0.5714, + "step": 7092 + }, + { + "epoch": 0.6475260178930071, + "grad_norm": 0.4922527074813843, + "learning_rate": 4.861212113588167e-06, + "loss": 0.5294, + "step": 7093 + }, + { + "epoch": 0.6476173087456637, + "grad_norm": 0.44947686791419983, + "learning_rate": 4.861172788914897e-06, + "loss": 0.5785, + "step": 7094 + }, + { + "epoch": 0.6477085995983203, + "grad_norm": 0.47162026166915894, + "learning_rate": 4.861133458830322e-06, + "loss": 0.5993, + "step": 7095 + }, + { + "epoch": 0.6477998904509769, + "grad_norm": 0.4638842046260834, + "learning_rate": 4.861094123334529e-06, + "loss": 0.5784, + "step": 7096 + }, + { + "epoch": 0.6478911813036333, + "grad_norm": 0.4959758222103119, + "learning_rate": 4.8610547824276115e-06, + "loss": 0.5485, + "step": 7097 + }, + { + "epoch": 0.6479824721562899, + "grad_norm": 0.46112290024757385, + "learning_rate": 4.861015436109656e-06, + "loss": 0.5277, + "step": 7098 + }, + { + "epoch": 0.6480737630089465, + "grad_norm": 0.515872597694397, + "learning_rate": 4.860976084380757e-06, + "loss": 0.5704, + "step": 7099 + }, + { + "epoch": 0.6481650538616031, + "grad_norm": 0.45898380875587463, + "learning_rate": 4.8609367272410015e-06, + "loss": 0.567, + "step": 7100 + }, + { + "epoch": 0.6482563447142596, + "grad_norm": 0.4805999994277954, + "learning_rate": 4.860897364690481e-06, + "loss": 0.552, + "step": 7101 + }, + { + "epoch": 0.6483476355669162, + "grad_norm": 0.4865241050720215, + "learning_rate": 4.860857996729284e-06, + "loss": 0.5845, + "step": 7102 + }, + { + "epoch": 0.6484389264195728, + "grad_norm": 0.44255074858665466, + "learning_rate": 4.860818623357504e-06, + "loss": 0.5823, + "step": 7103 + }, + { + "epoch": 0.6485302172722294, + "grad_norm": 0.4624083638191223, + "learning_rate": 4.860779244575229e-06, + "loss": 0.551, + "step": 7104 + }, + { + "epoch": 0.6486215081248858, + "grad_norm": 0.4579180181026459, + "learning_rate": 4.86073986038255e-06, + "loss": 0.557, + "step": 7105 + }, + { + "epoch": 0.6487127989775424, + "grad_norm": 0.47918766736984253, + "learning_rate": 4.860700470779556e-06, + "loss": 0.6062, + "step": 7106 + }, + { + "epoch": 0.648804089830199, + "grad_norm": 0.5047083497047424, + "learning_rate": 4.860661075766337e-06, + "loss": 0.5351, + "step": 7107 + }, + { + "epoch": 0.6488953806828556, + "grad_norm": 0.4629473388195038, + "learning_rate": 4.860621675342987e-06, + "loss": 0.5547, + "step": 7108 + }, + { + "epoch": 0.6489866715355121, + "grad_norm": 0.500735878944397, + "learning_rate": 4.860582269509592e-06, + "loss": 0.5659, + "step": 7109 + }, + { + "epoch": 0.6490779623881687, + "grad_norm": 0.4849826991558075, + "learning_rate": 4.860542858266244e-06, + "loss": 0.5815, + "step": 7110 + }, + { + "epoch": 0.6491692532408253, + "grad_norm": 0.47594091296195984, + "learning_rate": 4.8605034416130346e-06, + "loss": 0.5887, + "step": 7111 + }, + { + "epoch": 0.6492605440934819, + "grad_norm": 0.47701653838157654, + "learning_rate": 4.860464019550052e-06, + "loss": 0.5575, + "step": 7112 + }, + { + "epoch": 0.6493518349461384, + "grad_norm": 0.48606082797050476, + "learning_rate": 4.860424592077387e-06, + "loss": 0.5218, + "step": 7113 + }, + { + "epoch": 0.6494431257987949, + "grad_norm": 0.5112506151199341, + "learning_rate": 4.860385159195132e-06, + "loss": 0.5747, + "step": 7114 + }, + { + "epoch": 0.6495344166514515, + "grad_norm": 0.4936199486255646, + "learning_rate": 4.860345720903375e-06, + "loss": 0.5472, + "step": 7115 + }, + { + "epoch": 0.6496257075041081, + "grad_norm": 0.5091608166694641, + "learning_rate": 4.860306277202207e-06, + "loss": 0.5501, + "step": 7116 + }, + { + "epoch": 0.6497169983567647, + "grad_norm": 0.46732449531555176, + "learning_rate": 4.860266828091718e-06, + "loss": 0.5417, + "step": 7117 + }, + { + "epoch": 0.6498082892094212, + "grad_norm": 0.47846463322639465, + "learning_rate": 4.860227373571999e-06, + "loss": 0.5099, + "step": 7118 + }, + { + "epoch": 0.6498995800620778, + "grad_norm": 0.46967703104019165, + "learning_rate": 4.860187913643142e-06, + "loss": 0.5583, + "step": 7119 + }, + { + "epoch": 0.6499908709147344, + "grad_norm": 0.47422489523887634, + "learning_rate": 4.860148448305234e-06, + "loss": 0.5568, + "step": 7120 + }, + { + "epoch": 0.6500821617673909, + "grad_norm": 0.4830613434314728, + "learning_rate": 4.8601089775583685e-06, + "loss": 0.5352, + "step": 7121 + }, + { + "epoch": 0.6501734526200474, + "grad_norm": 0.4961165487766266, + "learning_rate": 4.860069501402634e-06, + "loss": 0.5676, + "step": 7122 + }, + { + "epoch": 0.650264743472704, + "grad_norm": 0.4859030544757843, + "learning_rate": 4.860030019838122e-06, + "loss": 0.5569, + "step": 7123 + }, + { + "epoch": 0.6503560343253606, + "grad_norm": 0.4693850576877594, + "learning_rate": 4.859990532864922e-06, + "loss": 0.589, + "step": 7124 + }, + { + "epoch": 0.6504473251780172, + "grad_norm": 0.4513656795024872, + "learning_rate": 4.859951040483126e-06, + "loss": 0.5813, + "step": 7125 + }, + { + "epoch": 0.6505386160306738, + "grad_norm": 0.5004180073738098, + "learning_rate": 4.859911542692823e-06, + "loss": 0.5635, + "step": 7126 + }, + { + "epoch": 0.6506299068833303, + "grad_norm": 0.4728805422782898, + "learning_rate": 4.859872039494105e-06, + "loss": 0.5988, + "step": 7127 + }, + { + "epoch": 0.6507211977359868, + "grad_norm": 0.4887143075466156, + "learning_rate": 4.859832530887062e-06, + "loss": 0.5413, + "step": 7128 + }, + { + "epoch": 0.6508124885886434, + "grad_norm": 0.47410306334495544, + "learning_rate": 4.859793016871783e-06, + "loss": 0.5817, + "step": 7129 + }, + { + "epoch": 0.6509037794413, + "grad_norm": 0.4667634665966034, + "learning_rate": 4.859753497448361e-06, + "loss": 0.5774, + "step": 7130 + }, + { + "epoch": 0.6509950702939565, + "grad_norm": 0.44985172152519226, + "learning_rate": 4.8597139726168845e-06, + "loss": 0.6046, + "step": 7131 + }, + { + "epoch": 0.6510863611466131, + "grad_norm": 0.4747409224510193, + "learning_rate": 4.859674442377445e-06, + "loss": 0.5373, + "step": 7132 + }, + { + "epoch": 0.6511776519992697, + "grad_norm": 0.47171667218208313, + "learning_rate": 4.859634906730134e-06, + "loss": 0.5767, + "step": 7133 + }, + { + "epoch": 0.6512689428519263, + "grad_norm": 0.4726508855819702, + "learning_rate": 4.85959536567504e-06, + "loss": 0.5841, + "step": 7134 + }, + { + "epoch": 0.6513602337045828, + "grad_norm": 0.4402090311050415, + "learning_rate": 4.859555819212256e-06, + "loss": 0.6066, + "step": 7135 + }, + { + "epoch": 0.6514515245572393, + "grad_norm": 0.484708309173584, + "learning_rate": 4.859516267341871e-06, + "loss": 0.5336, + "step": 7136 + }, + { + "epoch": 0.6515428154098959, + "grad_norm": 0.48644691705703735, + "learning_rate": 4.859476710063976e-06, + "loss": 0.5299, + "step": 7137 + }, + { + "epoch": 0.6516341062625525, + "grad_norm": 0.46340444684028625, + "learning_rate": 4.859437147378662e-06, + "loss": 0.5757, + "step": 7138 + }, + { + "epoch": 0.651725397115209, + "grad_norm": 0.45315149426460266, + "learning_rate": 4.859397579286018e-06, + "loss": 0.5533, + "step": 7139 + }, + { + "epoch": 0.6518166879678656, + "grad_norm": 0.45527923107147217, + "learning_rate": 4.859358005786138e-06, + "loss": 0.5789, + "step": 7140 + }, + { + "epoch": 0.6519079788205222, + "grad_norm": 0.46083661913871765, + "learning_rate": 4.85931842687911e-06, + "loss": 0.5995, + "step": 7141 + }, + { + "epoch": 0.6519992696731788, + "grad_norm": 0.4881831407546997, + "learning_rate": 4.859278842565025e-06, + "loss": 0.5428, + "step": 7142 + }, + { + "epoch": 0.6520905605258354, + "grad_norm": 0.5081608295440674, + "learning_rate": 4.859239252843976e-06, + "loss": 0.5915, + "step": 7143 + }, + { + "epoch": 0.6521818513784918, + "grad_norm": 0.47374629974365234, + "learning_rate": 4.85919965771605e-06, + "loss": 0.5622, + "step": 7144 + }, + { + "epoch": 0.6522731422311484, + "grad_norm": 0.43932193517684937, + "learning_rate": 4.85916005718134e-06, + "loss": 0.5766, + "step": 7145 + }, + { + "epoch": 0.652364433083805, + "grad_norm": 0.4720102846622467, + "learning_rate": 4.859120451239937e-06, + "loss": 0.5677, + "step": 7146 + }, + { + "epoch": 0.6524557239364616, + "grad_norm": 0.48969054222106934, + "learning_rate": 4.859080839891931e-06, + "loss": 0.5425, + "step": 7147 + }, + { + "epoch": 0.6525470147891181, + "grad_norm": 0.461921364068985, + "learning_rate": 4.859041223137413e-06, + "loss": 0.5504, + "step": 7148 + }, + { + "epoch": 0.6526383056417747, + "grad_norm": 0.4490079879760742, + "learning_rate": 4.859001600976474e-06, + "loss": 0.5858, + "step": 7149 + }, + { + "epoch": 0.6527295964944313, + "grad_norm": 0.4981301724910736, + "learning_rate": 4.858961973409205e-06, + "loss": 0.546, + "step": 7150 + }, + { + "epoch": 0.6528208873470879, + "grad_norm": 0.4621693193912506, + "learning_rate": 4.8589223404356954e-06, + "loss": 0.5677, + "step": 7151 + }, + { + "epoch": 0.6529121781997443, + "grad_norm": 0.4753912687301636, + "learning_rate": 4.8588827020560375e-06, + "loss": 0.5623, + "step": 7152 + }, + { + "epoch": 0.6530034690524009, + "grad_norm": 0.48919281363487244, + "learning_rate": 4.858843058270321e-06, + "loss": 0.5239, + "step": 7153 + }, + { + "epoch": 0.6530947599050575, + "grad_norm": 0.4504534900188446, + "learning_rate": 4.8588034090786385e-06, + "loss": 0.5812, + "step": 7154 + }, + { + "epoch": 0.6531860507577141, + "grad_norm": 0.45736274123191833, + "learning_rate": 4.8587637544810795e-06, + "loss": 0.5804, + "step": 7155 + }, + { + "epoch": 0.6532773416103707, + "grad_norm": 0.478723406791687, + "learning_rate": 4.858724094477735e-06, + "loss": 0.5134, + "step": 7156 + }, + { + "epoch": 0.6533686324630272, + "grad_norm": 0.4470156729221344, + "learning_rate": 4.858684429068696e-06, + "loss": 0.6175, + "step": 7157 + }, + { + "epoch": 0.6534599233156838, + "grad_norm": 0.46333226561546326, + "learning_rate": 4.858644758254054e-06, + "loss": 0.5577, + "step": 7158 + }, + { + "epoch": 0.6535512141683403, + "grad_norm": 0.4942624270915985, + "learning_rate": 4.858605082033899e-06, + "loss": 0.5565, + "step": 7159 + }, + { + "epoch": 0.6536425050209969, + "grad_norm": 0.47877129912376404, + "learning_rate": 4.858565400408323e-06, + "loss": 0.5396, + "step": 7160 + }, + { + "epoch": 0.6537337958736534, + "grad_norm": 0.49689412117004395, + "learning_rate": 4.858525713377416e-06, + "loss": 0.5308, + "step": 7161 + }, + { + "epoch": 0.65382508672631, + "grad_norm": 0.49172186851501465, + "learning_rate": 4.858486020941269e-06, + "loss": 0.5586, + "step": 7162 + }, + { + "epoch": 0.6539163775789666, + "grad_norm": 0.5372164249420166, + "learning_rate": 4.858446323099972e-06, + "loss": 0.517, + "step": 7163 + }, + { + "epoch": 0.6540076684316232, + "grad_norm": 0.4705261290073395, + "learning_rate": 4.858406619853618e-06, + "loss": 0.5474, + "step": 7164 + }, + { + "epoch": 0.6540989592842797, + "grad_norm": 0.499245285987854, + "learning_rate": 4.858366911202298e-06, + "loss": 0.542, + "step": 7165 + }, + { + "epoch": 0.6541902501369363, + "grad_norm": 0.5053861141204834, + "learning_rate": 4.858327197146102e-06, + "loss": 0.5252, + "step": 7166 + }, + { + "epoch": 0.6542815409895928, + "grad_norm": 0.4513874053955078, + "learning_rate": 4.858287477685121e-06, + "loss": 0.575, + "step": 7167 + }, + { + "epoch": 0.6543728318422494, + "grad_norm": 0.4901714622974396, + "learning_rate": 4.858247752819446e-06, + "loss": 0.539, + "step": 7168 + }, + { + "epoch": 0.654464122694906, + "grad_norm": 0.4665526747703552, + "learning_rate": 4.858208022549168e-06, + "loss": 0.559, + "step": 7169 + }, + { + "epoch": 0.6545554135475625, + "grad_norm": 0.49120664596557617, + "learning_rate": 4.858168286874378e-06, + "loss": 0.5848, + "step": 7170 + }, + { + "epoch": 0.6546467044002191, + "grad_norm": 0.49522536993026733, + "learning_rate": 4.858128545795169e-06, + "loss": 0.5827, + "step": 7171 + }, + { + "epoch": 0.6547379952528757, + "grad_norm": 0.5080450177192688, + "learning_rate": 4.858088799311629e-06, + "loss": 0.5228, + "step": 7172 + }, + { + "epoch": 0.6548292861055323, + "grad_norm": 0.4768325984477997, + "learning_rate": 4.858049047423851e-06, + "loss": 0.6077, + "step": 7173 + }, + { + "epoch": 0.6549205769581888, + "grad_norm": 0.47718048095703125, + "learning_rate": 4.858009290131926e-06, + "loss": 0.556, + "step": 7174 + }, + { + "epoch": 0.6550118678108453, + "grad_norm": 0.46046605706214905, + "learning_rate": 4.857969527435944e-06, + "loss": 0.5832, + "step": 7175 + }, + { + "epoch": 0.6551031586635019, + "grad_norm": 0.4833528697490692, + "learning_rate": 4.857929759335997e-06, + "loss": 0.568, + "step": 7176 + }, + { + "epoch": 0.6551944495161585, + "grad_norm": 0.4943399131298065, + "learning_rate": 4.857889985832176e-06, + "loss": 0.515, + "step": 7177 + }, + { + "epoch": 0.655285740368815, + "grad_norm": 0.47899606823921204, + "learning_rate": 4.8578502069245726e-06, + "loss": 0.5901, + "step": 7178 + }, + { + "epoch": 0.6553770312214716, + "grad_norm": 0.47245970368385315, + "learning_rate": 4.8578104226132775e-06, + "loss": 0.5642, + "step": 7179 + }, + { + "epoch": 0.6554683220741282, + "grad_norm": 0.4367583692073822, + "learning_rate": 4.857770632898382e-06, + "loss": 0.5618, + "step": 7180 + }, + { + "epoch": 0.6555596129267848, + "grad_norm": 0.49014174938201904, + "learning_rate": 4.857730837779977e-06, + "loss": 0.5551, + "step": 7181 + }, + { + "epoch": 0.6556509037794414, + "grad_norm": 0.5088498592376709, + "learning_rate": 4.857691037258154e-06, + "loss": 0.5389, + "step": 7182 + }, + { + "epoch": 0.6557421946320978, + "grad_norm": 0.45328494906425476, + "learning_rate": 4.857651231333004e-06, + "loss": 0.5982, + "step": 7183 + }, + { + "epoch": 0.6558334854847544, + "grad_norm": 0.5039349794387817, + "learning_rate": 4.857611420004619e-06, + "loss": 0.5719, + "step": 7184 + }, + { + "epoch": 0.655924776337411, + "grad_norm": 0.48321253061294556, + "learning_rate": 4.857571603273089e-06, + "loss": 0.5567, + "step": 7185 + }, + { + "epoch": 0.6560160671900676, + "grad_norm": 0.49142149090766907, + "learning_rate": 4.857531781138506e-06, + "loss": 0.578, + "step": 7186 + }, + { + "epoch": 0.6561073580427241, + "grad_norm": 0.4998131990432739, + "learning_rate": 4.85749195360096e-06, + "loss": 0.5301, + "step": 7187 + }, + { + "epoch": 0.6561986488953807, + "grad_norm": 0.49009090662002563, + "learning_rate": 4.857452120660545e-06, + "loss": 0.5305, + "step": 7188 + }, + { + "epoch": 0.6562899397480373, + "grad_norm": 0.475995808839798, + "learning_rate": 4.85741228231735e-06, + "loss": 0.567, + "step": 7189 + }, + { + "epoch": 0.6563812306006939, + "grad_norm": 0.47839394211769104, + "learning_rate": 4.857372438571467e-06, + "loss": 0.5618, + "step": 7190 + }, + { + "epoch": 0.6564725214533503, + "grad_norm": 0.4895918667316437, + "learning_rate": 4.8573325894229875e-06, + "loss": 0.5009, + "step": 7191 + }, + { + "epoch": 0.6565638123060069, + "grad_norm": 0.5121069550514221, + "learning_rate": 4.857292734872002e-06, + "loss": 0.5123, + "step": 7192 + }, + { + "epoch": 0.6566551031586635, + "grad_norm": 0.47855260968208313, + "learning_rate": 4.857252874918603e-06, + "loss": 0.5509, + "step": 7193 + }, + { + "epoch": 0.6567463940113201, + "grad_norm": 0.4572885036468506, + "learning_rate": 4.857213009562881e-06, + "loss": 0.5591, + "step": 7194 + }, + { + "epoch": 0.6568376848639766, + "grad_norm": 0.4730658233165741, + "learning_rate": 4.857173138804928e-06, + "loss": 0.5522, + "step": 7195 + }, + { + "epoch": 0.6569289757166332, + "grad_norm": 0.4854143261909485, + "learning_rate": 4.8571332626448345e-06, + "loss": 0.5575, + "step": 7196 + }, + { + "epoch": 0.6570202665692898, + "grad_norm": 0.5143700242042542, + "learning_rate": 4.857093381082693e-06, + "loss": 0.5239, + "step": 7197 + }, + { + "epoch": 0.6571115574219463, + "grad_norm": 0.4854181110858917, + "learning_rate": 4.8570534941185945e-06, + "loss": 0.5881, + "step": 7198 + }, + { + "epoch": 0.6572028482746028, + "grad_norm": 0.4414745569229126, + "learning_rate": 4.857013601752629e-06, + "loss": 0.64, + "step": 7199 + }, + { + "epoch": 0.6572941391272594, + "grad_norm": 0.4956038296222687, + "learning_rate": 4.85697370398489e-06, + "loss": 0.5319, + "step": 7200 + }, + { + "epoch": 0.657385429979916, + "grad_norm": 0.48987290263175964, + "learning_rate": 4.856933800815469e-06, + "loss": 0.5791, + "step": 7201 + }, + { + "epoch": 0.6574767208325726, + "grad_norm": 0.4677886962890625, + "learning_rate": 4.856893892244455e-06, + "loss": 0.5476, + "step": 7202 + }, + { + "epoch": 0.6575680116852292, + "grad_norm": 0.48465079069137573, + "learning_rate": 4.856853978271942e-06, + "loss": 0.5415, + "step": 7203 + }, + { + "epoch": 0.6576593025378857, + "grad_norm": 0.44123995304107666, + "learning_rate": 4.856814058898021e-06, + "loss": 0.5737, + "step": 7204 + }, + { + "epoch": 0.6577505933905423, + "grad_norm": 0.47261926531791687, + "learning_rate": 4.856774134122781e-06, + "loss": 0.5354, + "step": 7205 + }, + { + "epoch": 0.6578418842431988, + "grad_norm": 0.4953323006629944, + "learning_rate": 4.856734203946317e-06, + "loss": 0.5102, + "step": 7206 + }, + { + "epoch": 0.6579331750958554, + "grad_norm": 0.4688613712787628, + "learning_rate": 4.856694268368719e-06, + "loss": 0.5628, + "step": 7207 + }, + { + "epoch": 0.6580244659485119, + "grad_norm": 0.5122462511062622, + "learning_rate": 4.856654327390078e-06, + "loss": 0.5686, + "step": 7208 + }, + { + "epoch": 0.6581157568011685, + "grad_norm": 0.48238950967788696, + "learning_rate": 4.856614381010487e-06, + "loss": 0.5493, + "step": 7209 + }, + { + "epoch": 0.6582070476538251, + "grad_norm": 0.48000627756118774, + "learning_rate": 4.856574429230035e-06, + "loss": 0.5606, + "step": 7210 + }, + { + "epoch": 0.6582983385064817, + "grad_norm": 0.5048037767410278, + "learning_rate": 4.856534472048816e-06, + "loss": 0.574, + "step": 7211 + }, + { + "epoch": 0.6583896293591383, + "grad_norm": 0.49979984760284424, + "learning_rate": 4.856494509466921e-06, + "loss": 0.5516, + "step": 7212 + }, + { + "epoch": 0.6584809202117948, + "grad_norm": 0.46125584840774536, + "learning_rate": 4.856454541484441e-06, + "loss": 0.5861, + "step": 7213 + }, + { + "epoch": 0.6585722110644513, + "grad_norm": 0.46435198187828064, + "learning_rate": 4.856414568101468e-06, + "loss": 0.5658, + "step": 7214 + }, + { + "epoch": 0.6586635019171079, + "grad_norm": 0.49285030364990234, + "learning_rate": 4.856374589318094e-06, + "loss": 0.5363, + "step": 7215 + }, + { + "epoch": 0.6587547927697645, + "grad_norm": 0.49599823355674744, + "learning_rate": 4.856334605134409e-06, + "loss": 0.5334, + "step": 7216 + }, + { + "epoch": 0.658846083622421, + "grad_norm": 0.4815271198749542, + "learning_rate": 4.856294615550506e-06, + "loss": 0.5335, + "step": 7217 + }, + { + "epoch": 0.6589373744750776, + "grad_norm": 0.4885315001010895, + "learning_rate": 4.8562546205664775e-06, + "loss": 0.5399, + "step": 7218 + }, + { + "epoch": 0.6590286653277342, + "grad_norm": 0.4695623219013214, + "learning_rate": 4.856214620182413e-06, + "loss": 0.5881, + "step": 7219 + }, + { + "epoch": 0.6591199561803908, + "grad_norm": 0.4848910868167877, + "learning_rate": 4.856174614398406e-06, + "loss": 0.5468, + "step": 7220 + }, + { + "epoch": 0.6592112470330473, + "grad_norm": 0.4608280062675476, + "learning_rate": 4.856134603214547e-06, + "loss": 0.5621, + "step": 7221 + }, + { + "epoch": 0.6593025378857038, + "grad_norm": 0.4610184133052826, + "learning_rate": 4.856094586630928e-06, + "loss": 0.5615, + "step": 7222 + }, + { + "epoch": 0.6593938287383604, + "grad_norm": 0.4771468937397003, + "learning_rate": 4.856054564647641e-06, + "loss": 0.6083, + "step": 7223 + }, + { + "epoch": 0.659485119591017, + "grad_norm": 0.465720534324646, + "learning_rate": 4.8560145372647786e-06, + "loss": 0.5969, + "step": 7224 + }, + { + "epoch": 0.6595764104436735, + "grad_norm": 0.47657373547554016, + "learning_rate": 4.855974504482431e-06, + "loss": 0.5534, + "step": 7225 + }, + { + "epoch": 0.6596677012963301, + "grad_norm": 0.49635306000709534, + "learning_rate": 4.855934466300689e-06, + "loss": 0.5249, + "step": 7226 + }, + { + "epoch": 0.6597589921489867, + "grad_norm": 0.5165335536003113, + "learning_rate": 4.855894422719647e-06, + "loss": 0.5731, + "step": 7227 + }, + { + "epoch": 0.6598502830016433, + "grad_norm": 0.45240360498428345, + "learning_rate": 4.855854373739395e-06, + "loss": 0.546, + "step": 7228 + }, + { + "epoch": 0.6599415738542997, + "grad_norm": 0.4581714868545532, + "learning_rate": 4.855814319360026e-06, + "loss": 0.5618, + "step": 7229 + }, + { + "epoch": 0.6600328647069563, + "grad_norm": 0.5206236839294434, + "learning_rate": 4.855774259581631e-06, + "loss": 0.5714, + "step": 7230 + }, + { + "epoch": 0.6601241555596129, + "grad_norm": 0.5051648020744324, + "learning_rate": 4.8557341944043025e-06, + "loss": 0.5677, + "step": 7231 + }, + { + "epoch": 0.6602154464122695, + "grad_norm": 0.4615474343299866, + "learning_rate": 4.8556941238281305e-06, + "loss": 0.5898, + "step": 7232 + }, + { + "epoch": 0.6603067372649261, + "grad_norm": 0.460552841424942, + "learning_rate": 4.855654047853209e-06, + "loss": 0.5875, + "step": 7233 + }, + { + "epoch": 0.6603980281175826, + "grad_norm": 0.4680655002593994, + "learning_rate": 4.85561396647963e-06, + "loss": 0.6137, + "step": 7234 + }, + { + "epoch": 0.6604893189702392, + "grad_norm": 0.48818591237068176, + "learning_rate": 4.855573879707482e-06, + "loss": 0.5847, + "step": 7235 + }, + { + "epoch": 0.6605806098228958, + "grad_norm": 0.47729727625846863, + "learning_rate": 4.8555337875368615e-06, + "loss": 0.5699, + "step": 7236 + }, + { + "epoch": 0.6606719006755523, + "grad_norm": 0.5324680805206299, + "learning_rate": 4.855493689967856e-06, + "loss": 0.5237, + "step": 7237 + }, + { + "epoch": 0.6607631915282088, + "grad_norm": 0.4779902398586273, + "learning_rate": 4.8554535870005605e-06, + "loss": 0.5723, + "step": 7238 + }, + { + "epoch": 0.6608544823808654, + "grad_norm": 0.45945021510124207, + "learning_rate": 4.855413478635066e-06, + "loss": 0.6236, + "step": 7239 + }, + { + "epoch": 0.660945773233522, + "grad_norm": 0.49429717659950256, + "learning_rate": 4.855373364871464e-06, + "loss": 0.557, + "step": 7240 + }, + { + "epoch": 0.6610370640861786, + "grad_norm": 0.4881008565425873, + "learning_rate": 4.855333245709848e-06, + "loss": 0.5554, + "step": 7241 + }, + { + "epoch": 0.6611283549388351, + "grad_norm": 0.4641586244106293, + "learning_rate": 4.8552931211503075e-06, + "loss": 0.5814, + "step": 7242 + }, + { + "epoch": 0.6612196457914917, + "grad_norm": 0.46047061681747437, + "learning_rate": 4.855252991192936e-06, + "loss": 0.5714, + "step": 7243 + }, + { + "epoch": 0.6613109366441483, + "grad_norm": 0.48724365234375, + "learning_rate": 4.855212855837824e-06, + "loss": 0.536, + "step": 7244 + }, + { + "epoch": 0.6614022274968048, + "grad_norm": 0.4724343717098236, + "learning_rate": 4.855172715085066e-06, + "loss": 0.5639, + "step": 7245 + }, + { + "epoch": 0.6614935183494614, + "grad_norm": 0.5118449926376343, + "learning_rate": 4.8551325689347516e-06, + "loss": 0.5445, + "step": 7246 + }, + { + "epoch": 0.6615848092021179, + "grad_norm": 0.47704020142555237, + "learning_rate": 4.855092417386974e-06, + "loss": 0.5402, + "step": 7247 + }, + { + "epoch": 0.6616761000547745, + "grad_norm": 0.45962950587272644, + "learning_rate": 4.855052260441826e-06, + "loss": 0.5445, + "step": 7248 + }, + { + "epoch": 0.6617673909074311, + "grad_norm": 0.45024213194847107, + "learning_rate": 4.855012098099397e-06, + "loss": 0.5929, + "step": 7249 + }, + { + "epoch": 0.6618586817600877, + "grad_norm": 0.49194779992103577, + "learning_rate": 4.854971930359782e-06, + "loss": 0.5749, + "step": 7250 + }, + { + "epoch": 0.6619499726127442, + "grad_norm": 0.4941506087779999, + "learning_rate": 4.854931757223072e-06, + "loss": 0.571, + "step": 7251 + }, + { + "epoch": 0.6620412634654008, + "grad_norm": 0.4672585427761078, + "learning_rate": 4.854891578689357e-06, + "loss": 0.5619, + "step": 7252 + }, + { + "epoch": 0.6621325543180573, + "grad_norm": 0.4558458626270294, + "learning_rate": 4.854851394758733e-06, + "loss": 0.5762, + "step": 7253 + }, + { + "epoch": 0.6622238451707139, + "grad_norm": 0.46121180057525635, + "learning_rate": 4.854811205431288e-06, + "loss": 0.5659, + "step": 7254 + }, + { + "epoch": 0.6623151360233704, + "grad_norm": 0.5164502859115601, + "learning_rate": 4.854771010707118e-06, + "loss": 0.5727, + "step": 7255 + }, + { + "epoch": 0.662406426876027, + "grad_norm": 0.4848521649837494, + "learning_rate": 4.854730810586312e-06, + "loss": 0.531, + "step": 7256 + }, + { + "epoch": 0.6624977177286836, + "grad_norm": 0.4651806056499481, + "learning_rate": 4.854690605068964e-06, + "loss": 0.6098, + "step": 7257 + }, + { + "epoch": 0.6625890085813402, + "grad_norm": 0.5104825496673584, + "learning_rate": 4.854650394155165e-06, + "loss": 0.535, + "step": 7258 + }, + { + "epoch": 0.6626802994339968, + "grad_norm": 0.46508389711380005, + "learning_rate": 4.854610177845007e-06, + "loss": 0.5848, + "step": 7259 + }, + { + "epoch": 0.6627715902866532, + "grad_norm": 0.4387427568435669, + "learning_rate": 4.854569956138584e-06, + "loss": 0.6041, + "step": 7260 + }, + { + "epoch": 0.6628628811393098, + "grad_norm": 0.5467803478240967, + "learning_rate": 4.8545297290359875e-06, + "loss": 0.5222, + "step": 7261 + }, + { + "epoch": 0.6629541719919664, + "grad_norm": 0.4884697496891022, + "learning_rate": 4.854489496537308e-06, + "loss": 0.5781, + "step": 7262 + }, + { + "epoch": 0.663045462844623, + "grad_norm": 0.5140752196311951, + "learning_rate": 4.854449258642638e-06, + "loss": 0.5365, + "step": 7263 + }, + { + "epoch": 0.6631367536972795, + "grad_norm": 0.5175594687461853, + "learning_rate": 4.8544090153520725e-06, + "loss": 0.5384, + "step": 7264 + }, + { + "epoch": 0.6632280445499361, + "grad_norm": 0.4912750720977783, + "learning_rate": 4.854368766665701e-06, + "loss": 0.5337, + "step": 7265 + }, + { + "epoch": 0.6633193354025927, + "grad_norm": 0.48027303814888, + "learning_rate": 4.854328512583616e-06, + "loss": 0.5961, + "step": 7266 + }, + { + "epoch": 0.6634106262552493, + "grad_norm": 0.5009695291519165, + "learning_rate": 4.854288253105911e-06, + "loss": 0.5973, + "step": 7267 + }, + { + "epoch": 0.6635019171079057, + "grad_norm": 0.47617796063423157, + "learning_rate": 4.8542479882326775e-06, + "loss": 0.5583, + "step": 7268 + }, + { + "epoch": 0.6635932079605623, + "grad_norm": 0.4768066108226776, + "learning_rate": 4.854207717964008e-06, + "loss": 0.5986, + "step": 7269 + }, + { + "epoch": 0.6636844988132189, + "grad_norm": 0.4470609128475189, + "learning_rate": 4.854167442299995e-06, + "loss": 0.5566, + "step": 7270 + }, + { + "epoch": 0.6637757896658755, + "grad_norm": 0.48051363229751587, + "learning_rate": 4.85412716124073e-06, + "loss": 0.5368, + "step": 7271 + }, + { + "epoch": 0.663867080518532, + "grad_norm": 0.46595731377601624, + "learning_rate": 4.854086874786306e-06, + "loss": 0.5783, + "step": 7272 + }, + { + "epoch": 0.6639583713711886, + "grad_norm": 0.49408936500549316, + "learning_rate": 4.8540465829368144e-06, + "loss": 0.5477, + "step": 7273 + }, + { + "epoch": 0.6640496622238452, + "grad_norm": 0.5094175338745117, + "learning_rate": 4.854006285692349e-06, + "loss": 0.5913, + "step": 7274 + }, + { + "epoch": 0.6641409530765018, + "grad_norm": 0.4654982388019562, + "learning_rate": 4.853965983053001e-06, + "loss": 0.5299, + "step": 7275 + }, + { + "epoch": 0.6642322439291583, + "grad_norm": 0.5005617737770081, + "learning_rate": 4.853925675018864e-06, + "loss": 0.5585, + "step": 7276 + }, + { + "epoch": 0.6643235347818148, + "grad_norm": 0.4647800624370575, + "learning_rate": 4.853885361590029e-06, + "loss": 0.5551, + "step": 7277 + }, + { + "epoch": 0.6644148256344714, + "grad_norm": 0.4752639830112457, + "learning_rate": 4.853845042766588e-06, + "loss": 0.5325, + "step": 7278 + }, + { + "epoch": 0.664506116487128, + "grad_norm": 0.4758591949939728, + "learning_rate": 4.853804718548635e-06, + "loss": 0.5677, + "step": 7279 + }, + { + "epoch": 0.6645974073397846, + "grad_norm": 0.44918420910835266, + "learning_rate": 4.853764388936262e-06, + "loss": 0.5884, + "step": 7280 + }, + { + "epoch": 0.6646886981924411, + "grad_norm": 0.46296411752700806, + "learning_rate": 4.853724053929561e-06, + "loss": 0.5645, + "step": 7281 + }, + { + "epoch": 0.6647799890450977, + "grad_norm": 0.46335914731025696, + "learning_rate": 4.853683713528624e-06, + "loss": 0.5799, + "step": 7282 + }, + { + "epoch": 0.6648712798977543, + "grad_norm": 0.4659444987773895, + "learning_rate": 4.853643367733545e-06, + "loss": 0.5757, + "step": 7283 + }, + { + "epoch": 0.6649625707504108, + "grad_norm": 0.49979034066200256, + "learning_rate": 4.853603016544415e-06, + "loss": 0.557, + "step": 7284 + }, + { + "epoch": 0.6650538616030673, + "grad_norm": 0.4691886007785797, + "learning_rate": 4.853562659961327e-06, + "loss": 0.5512, + "step": 7285 + }, + { + "epoch": 0.6651451524557239, + "grad_norm": 0.4984567165374756, + "learning_rate": 4.853522297984374e-06, + "loss": 0.5597, + "step": 7286 + }, + { + "epoch": 0.6652364433083805, + "grad_norm": 0.47861236333847046, + "learning_rate": 4.853481930613648e-06, + "loss": 0.5493, + "step": 7287 + }, + { + "epoch": 0.6653277341610371, + "grad_norm": 0.4729236960411072, + "learning_rate": 4.853441557849241e-06, + "loss": 0.5371, + "step": 7288 + }, + { + "epoch": 0.6654190250136937, + "grad_norm": 0.46715983748435974, + "learning_rate": 4.853401179691246e-06, + "loss": 0.545, + "step": 7289 + }, + { + "epoch": 0.6655103158663502, + "grad_norm": 0.49481284618377686, + "learning_rate": 4.853360796139756e-06, + "loss": 0.5425, + "step": 7290 + }, + { + "epoch": 0.6656016067190068, + "grad_norm": 0.4424994885921478, + "learning_rate": 4.853320407194863e-06, + "loss": 0.573, + "step": 7291 + }, + { + "epoch": 0.6656928975716633, + "grad_norm": 0.45931100845336914, + "learning_rate": 4.853280012856659e-06, + "loss": 0.5898, + "step": 7292 + }, + { + "epoch": 0.6657841884243199, + "grad_norm": 0.46849778294563293, + "learning_rate": 4.853239613125238e-06, + "loss": 0.5469, + "step": 7293 + }, + { + "epoch": 0.6658754792769764, + "grad_norm": 0.4827798306941986, + "learning_rate": 4.853199208000692e-06, + "loss": 0.5651, + "step": 7294 + }, + { + "epoch": 0.665966770129633, + "grad_norm": 0.465124249458313, + "learning_rate": 4.853158797483113e-06, + "loss": 0.5673, + "step": 7295 + }, + { + "epoch": 0.6660580609822896, + "grad_norm": 0.45225292444229126, + "learning_rate": 4.853118381572595e-06, + "loss": 0.5723, + "step": 7296 + }, + { + "epoch": 0.6661493518349462, + "grad_norm": 0.49438461661338806, + "learning_rate": 4.8530779602692276e-06, + "loss": 0.5488, + "step": 7297 + }, + { + "epoch": 0.6662406426876027, + "grad_norm": 0.4840424060821533, + "learning_rate": 4.853037533573107e-06, + "loss": 0.5153, + "step": 7298 + }, + { + "epoch": 0.6663319335402592, + "grad_norm": 0.4842396378517151, + "learning_rate": 4.8529971014843245e-06, + "loss": 0.5617, + "step": 7299 + }, + { + "epoch": 0.6664232243929158, + "grad_norm": 0.4970790445804596, + "learning_rate": 4.8529566640029725e-06, + "loss": 0.5674, + "step": 7300 + }, + { + "epoch": 0.6665145152455724, + "grad_norm": 0.4732799232006073, + "learning_rate": 4.852916221129144e-06, + "loss": 0.5867, + "step": 7301 + }, + { + "epoch": 0.666605806098229, + "grad_norm": 0.48706069588661194, + "learning_rate": 4.85287577286293e-06, + "loss": 0.5251, + "step": 7302 + }, + { + "epoch": 0.6666970969508855, + "grad_norm": 0.49358657002449036, + "learning_rate": 4.852835319204427e-06, + "loss": 0.5363, + "step": 7303 + }, + { + "epoch": 0.6667883878035421, + "grad_norm": 0.47760069370269775, + "learning_rate": 4.852794860153724e-06, + "loss": 0.5396, + "step": 7304 + }, + { + "epoch": 0.6668796786561987, + "grad_norm": 0.5064799189567566, + "learning_rate": 4.852754395710915e-06, + "loss": 0.5728, + "step": 7305 + }, + { + "epoch": 0.6669709695088553, + "grad_norm": 0.47785159945487976, + "learning_rate": 4.8527139258760935e-06, + "loss": 0.5511, + "step": 7306 + }, + { + "epoch": 0.6670622603615117, + "grad_norm": 0.4695308804512024, + "learning_rate": 4.852673450649351e-06, + "loss": 0.5713, + "step": 7307 + }, + { + "epoch": 0.6671535512141683, + "grad_norm": 0.49576982855796814, + "learning_rate": 4.852632970030782e-06, + "loss": 0.5262, + "step": 7308 + }, + { + "epoch": 0.6672448420668249, + "grad_norm": 0.4984179139137268, + "learning_rate": 4.852592484020476e-06, + "loss": 0.5172, + "step": 7309 + }, + { + "epoch": 0.6673361329194815, + "grad_norm": 0.47610512375831604, + "learning_rate": 4.85255199261853e-06, + "loss": 0.5785, + "step": 7310 + }, + { + "epoch": 0.667427423772138, + "grad_norm": 0.435683012008667, + "learning_rate": 4.852511495825034e-06, + "loss": 0.5855, + "step": 7311 + }, + { + "epoch": 0.6675187146247946, + "grad_norm": 0.48875948786735535, + "learning_rate": 4.8524709936400815e-06, + "loss": 0.5312, + "step": 7312 + }, + { + "epoch": 0.6676100054774512, + "grad_norm": 0.46453171968460083, + "learning_rate": 4.852430486063765e-06, + "loss": 0.5772, + "step": 7313 + }, + { + "epoch": 0.6677012963301078, + "grad_norm": 0.4582151472568512, + "learning_rate": 4.852389973096179e-06, + "loss": 0.5405, + "step": 7314 + }, + { + "epoch": 0.6677925871827642, + "grad_norm": 0.47501081228256226, + "learning_rate": 4.852349454737414e-06, + "loss": 0.5473, + "step": 7315 + }, + { + "epoch": 0.6678838780354208, + "grad_norm": 0.476561039686203, + "learning_rate": 4.8523089309875634e-06, + "loss": 0.5456, + "step": 7316 + }, + { + "epoch": 0.6679751688880774, + "grad_norm": 0.4935617446899414, + "learning_rate": 4.852268401846721e-06, + "loss": 0.5524, + "step": 7317 + }, + { + "epoch": 0.668066459740734, + "grad_norm": 0.5009442567825317, + "learning_rate": 4.85222786731498e-06, + "loss": 0.5378, + "step": 7318 + }, + { + "epoch": 0.6681577505933906, + "grad_norm": 0.4935367703437805, + "learning_rate": 4.852187327392431e-06, + "loss": 0.5316, + "step": 7319 + }, + { + "epoch": 0.6682490414460471, + "grad_norm": 0.512105405330658, + "learning_rate": 4.85214678207917e-06, + "loss": 0.5501, + "step": 7320 + }, + { + "epoch": 0.6683403322987037, + "grad_norm": 0.5283862948417664, + "learning_rate": 4.852106231375288e-06, + "loss": 0.4963, + "step": 7321 + }, + { + "epoch": 0.6684316231513603, + "grad_norm": 0.46751296520233154, + "learning_rate": 4.8520656752808784e-06, + "loss": 0.5811, + "step": 7322 + }, + { + "epoch": 0.6685229140040168, + "grad_norm": 0.4840230941772461, + "learning_rate": 4.852025113796034e-06, + "loss": 0.5262, + "step": 7323 + }, + { + "epoch": 0.6686142048566733, + "grad_norm": 0.48573118448257446, + "learning_rate": 4.851984546920847e-06, + "loss": 0.5517, + "step": 7324 + }, + { + "epoch": 0.6687054957093299, + "grad_norm": 0.4942059814929962, + "learning_rate": 4.851943974655412e-06, + "loss": 0.6022, + "step": 7325 + }, + { + "epoch": 0.6687967865619865, + "grad_norm": 0.4664885699748993, + "learning_rate": 4.851903396999821e-06, + "loss": 0.6036, + "step": 7326 + }, + { + "epoch": 0.6688880774146431, + "grad_norm": 0.4613644778728485, + "learning_rate": 4.851862813954167e-06, + "loss": 0.5544, + "step": 7327 + }, + { + "epoch": 0.6689793682672996, + "grad_norm": 0.5209174752235413, + "learning_rate": 4.851822225518544e-06, + "loss": 0.5552, + "step": 7328 + }, + { + "epoch": 0.6690706591199562, + "grad_norm": 0.46391114592552185, + "learning_rate": 4.851781631693043e-06, + "loss": 0.5847, + "step": 7329 + }, + { + "epoch": 0.6691619499726127, + "grad_norm": 0.4638427197933197, + "learning_rate": 4.851741032477759e-06, + "loss": 0.5489, + "step": 7330 + }, + { + "epoch": 0.6692532408252693, + "grad_norm": 0.4562641978263855, + "learning_rate": 4.851700427872784e-06, + "loss": 0.5589, + "step": 7331 + }, + { + "epoch": 0.6693445316779258, + "grad_norm": 0.4721721112728119, + "learning_rate": 4.851659817878211e-06, + "loss": 0.5869, + "step": 7332 + }, + { + "epoch": 0.6694358225305824, + "grad_norm": 0.485647588968277, + "learning_rate": 4.851619202494135e-06, + "loss": 0.5569, + "step": 7333 + }, + { + "epoch": 0.669527113383239, + "grad_norm": 0.4704609811306, + "learning_rate": 4.851578581720646e-06, + "loss": 0.5554, + "step": 7334 + }, + { + "epoch": 0.6696184042358956, + "grad_norm": 0.4445452392101288, + "learning_rate": 4.851537955557839e-06, + "loss": 0.604, + "step": 7335 + }, + { + "epoch": 0.6697096950885522, + "grad_norm": 0.4904800057411194, + "learning_rate": 4.8514973240058065e-06, + "loss": 0.5554, + "step": 7336 + }, + { + "epoch": 0.6698009859412087, + "grad_norm": 0.4686637818813324, + "learning_rate": 4.8514566870646415e-06, + "loss": 0.5718, + "step": 7337 + }, + { + "epoch": 0.6698922767938652, + "grad_norm": 0.4735316336154938, + "learning_rate": 4.851416044734438e-06, + "loss": 0.5374, + "step": 7338 + }, + { + "epoch": 0.6699835676465218, + "grad_norm": 0.4870794415473938, + "learning_rate": 4.8513753970152875e-06, + "loss": 0.5578, + "step": 7339 + }, + { + "epoch": 0.6700748584991784, + "grad_norm": 0.48403236269950867, + "learning_rate": 4.851334743907286e-06, + "loss": 0.584, + "step": 7340 + }, + { + "epoch": 0.6701661493518349, + "grad_norm": 0.47398367524147034, + "learning_rate": 4.851294085410523e-06, + "loss": 0.5622, + "step": 7341 + }, + { + "epoch": 0.6702574402044915, + "grad_norm": 0.452066034078598, + "learning_rate": 4.851253421525095e-06, + "loss": 0.579, + "step": 7342 + }, + { + "epoch": 0.6703487310571481, + "grad_norm": 0.4780009984970093, + "learning_rate": 4.8512127522510935e-06, + "loss": 0.5798, + "step": 7343 + }, + { + "epoch": 0.6704400219098047, + "grad_norm": 0.4707615077495575, + "learning_rate": 4.851172077588612e-06, + "loss": 0.5597, + "step": 7344 + }, + { + "epoch": 0.6705313127624613, + "grad_norm": 0.48901498317718506, + "learning_rate": 4.851131397537743e-06, + "loss": 0.5663, + "step": 7345 + }, + { + "epoch": 0.6706226036151177, + "grad_norm": 0.5111958980560303, + "learning_rate": 4.851090712098581e-06, + "loss": 0.5255, + "step": 7346 + }, + { + "epoch": 0.6707138944677743, + "grad_norm": 0.48838797211647034, + "learning_rate": 4.851050021271219e-06, + "loss": 0.5427, + "step": 7347 + }, + { + "epoch": 0.6708051853204309, + "grad_norm": 0.46353879570961, + "learning_rate": 4.85100932505575e-06, + "loss": 0.5514, + "step": 7348 + }, + { + "epoch": 0.6708964761730875, + "grad_norm": 0.45495283603668213, + "learning_rate": 4.8509686234522665e-06, + "loss": 0.5887, + "step": 7349 + }, + { + "epoch": 0.670987767025744, + "grad_norm": 0.49883216619491577, + "learning_rate": 4.850927916460863e-06, + "loss": 0.5341, + "step": 7350 + }, + { + "epoch": 0.6710790578784006, + "grad_norm": 0.49292486906051636, + "learning_rate": 4.850887204081631e-06, + "loss": 0.5797, + "step": 7351 + }, + { + "epoch": 0.6711703487310572, + "grad_norm": 0.4946886897087097, + "learning_rate": 4.8508464863146665e-06, + "loss": 0.5723, + "step": 7352 + }, + { + "epoch": 0.6712616395837138, + "grad_norm": 0.474916011095047, + "learning_rate": 4.8508057631600605e-06, + "loss": 0.577, + "step": 7353 + }, + { + "epoch": 0.6713529304363702, + "grad_norm": 0.45694535970687866, + "learning_rate": 4.850765034617908e-06, + "loss": 0.5934, + "step": 7354 + }, + { + "epoch": 0.6714442212890268, + "grad_norm": 0.5038122534751892, + "learning_rate": 4.850724300688301e-06, + "loss": 0.5151, + "step": 7355 + }, + { + "epoch": 0.6715355121416834, + "grad_norm": 0.4610559940338135, + "learning_rate": 4.850683561371334e-06, + "loss": 0.5532, + "step": 7356 + }, + { + "epoch": 0.67162680299434, + "grad_norm": 0.468099445104599, + "learning_rate": 4.8506428166670995e-06, + "loss": 0.5865, + "step": 7357 + }, + { + "epoch": 0.6717180938469965, + "grad_norm": 0.49857082962989807, + "learning_rate": 4.850602066575691e-06, + "loss": 0.5664, + "step": 7358 + }, + { + "epoch": 0.6718093846996531, + "grad_norm": 0.4967135488986969, + "learning_rate": 4.850561311097202e-06, + "loss": 0.5177, + "step": 7359 + }, + { + "epoch": 0.6719006755523097, + "grad_norm": 0.463706374168396, + "learning_rate": 4.850520550231726e-06, + "loss": 0.5713, + "step": 7360 + }, + { + "epoch": 0.6719919664049662, + "grad_norm": 0.4761810898780823, + "learning_rate": 4.850479783979356e-06, + "loss": 0.5419, + "step": 7361 + }, + { + "epoch": 0.6720832572576227, + "grad_norm": 0.48943987488746643, + "learning_rate": 4.850439012340187e-06, + "loss": 0.585, + "step": 7362 + }, + { + "epoch": 0.6721745481102793, + "grad_norm": 0.4462553858757019, + "learning_rate": 4.85039823531431e-06, + "loss": 0.6122, + "step": 7363 + }, + { + "epoch": 0.6722658389629359, + "grad_norm": 0.4529825448989868, + "learning_rate": 4.85035745290182e-06, + "loss": 0.5713, + "step": 7364 + }, + { + "epoch": 0.6723571298155925, + "grad_norm": 0.5013865232467651, + "learning_rate": 4.85031666510281e-06, + "loss": 0.5302, + "step": 7365 + }, + { + "epoch": 0.6724484206682491, + "grad_norm": 0.46524566411972046, + "learning_rate": 4.850275871917374e-06, + "loss": 0.5923, + "step": 7366 + }, + { + "epoch": 0.6725397115209056, + "grad_norm": 0.5003818869590759, + "learning_rate": 4.850235073345605e-06, + "loss": 0.5354, + "step": 7367 + }, + { + "epoch": 0.6726310023735622, + "grad_norm": 0.4915045201778412, + "learning_rate": 4.850194269387597e-06, + "loss": 0.5924, + "step": 7368 + }, + { + "epoch": 0.6727222932262187, + "grad_norm": 0.490651398897171, + "learning_rate": 4.8501534600434425e-06, + "loss": 0.5321, + "step": 7369 + }, + { + "epoch": 0.6728135840788753, + "grad_norm": 0.4836062788963318, + "learning_rate": 4.850112645313236e-06, + "loss": 0.5571, + "step": 7370 + }, + { + "epoch": 0.6729048749315318, + "grad_norm": 0.47128599882125854, + "learning_rate": 4.85007182519707e-06, + "loss": 0.5035, + "step": 7371 + }, + { + "epoch": 0.6729961657841884, + "grad_norm": 0.4838140308856964, + "learning_rate": 4.85003099969504e-06, + "loss": 0.5573, + "step": 7372 + }, + { + "epoch": 0.673087456636845, + "grad_norm": 0.4829912781715393, + "learning_rate": 4.849990168807237e-06, + "loss": 0.5738, + "step": 7373 + }, + { + "epoch": 0.6731787474895016, + "grad_norm": 0.4687310457229614, + "learning_rate": 4.849949332533757e-06, + "loss": 0.5956, + "step": 7374 + }, + { + "epoch": 0.6732700383421582, + "grad_norm": 0.49249035120010376, + "learning_rate": 4.849908490874692e-06, + "loss": 0.5581, + "step": 7375 + }, + { + "epoch": 0.6733613291948147, + "grad_norm": 0.4831129312515259, + "learning_rate": 4.849867643830136e-06, + "loss": 0.5896, + "step": 7376 + }, + { + "epoch": 0.6734526200474712, + "grad_norm": 0.4870467185974121, + "learning_rate": 4.849826791400183e-06, + "loss": 0.4841, + "step": 7377 + }, + { + "epoch": 0.6735439109001278, + "grad_norm": 0.5183960795402527, + "learning_rate": 4.849785933584926e-06, + "loss": 0.5142, + "step": 7378 + }, + { + "epoch": 0.6736352017527844, + "grad_norm": 0.4706781208515167, + "learning_rate": 4.849745070384459e-06, + "loss": 0.5831, + "step": 7379 + }, + { + "epoch": 0.6737264926054409, + "grad_norm": 0.4626379609107971, + "learning_rate": 4.849704201798875e-06, + "loss": 0.5661, + "step": 7380 + }, + { + "epoch": 0.6738177834580975, + "grad_norm": 0.4975823760032654, + "learning_rate": 4.849663327828269e-06, + "loss": 0.535, + "step": 7381 + }, + { + "epoch": 0.6739090743107541, + "grad_norm": 0.49688607454299927, + "learning_rate": 4.8496224484727336e-06, + "loss": 0.595, + "step": 7382 + }, + { + "epoch": 0.6740003651634107, + "grad_norm": 0.45354411005973816, + "learning_rate": 4.849581563732363e-06, + "loss": 0.6197, + "step": 7383 + }, + { + "epoch": 0.6740916560160672, + "grad_norm": 0.4690133035182953, + "learning_rate": 4.849540673607251e-06, + "loss": 0.5913, + "step": 7384 + }, + { + "epoch": 0.6741829468687237, + "grad_norm": 0.49579158425331116, + "learning_rate": 4.849499778097491e-06, + "loss": 0.5827, + "step": 7385 + }, + { + "epoch": 0.6742742377213803, + "grad_norm": 0.4734848737716675, + "learning_rate": 4.849458877203176e-06, + "loss": 0.535, + "step": 7386 + }, + { + "epoch": 0.6743655285740369, + "grad_norm": 0.4699765145778656, + "learning_rate": 4.849417970924401e-06, + "loss": 0.5854, + "step": 7387 + }, + { + "epoch": 0.6744568194266934, + "grad_norm": 0.4747566282749176, + "learning_rate": 4.849377059261258e-06, + "loss": 0.5819, + "step": 7388 + }, + { + "epoch": 0.67454811027935, + "grad_norm": 0.4746223986148834, + "learning_rate": 4.849336142213844e-06, + "loss": 0.5522, + "step": 7389 + }, + { + "epoch": 0.6746394011320066, + "grad_norm": 0.4621845781803131, + "learning_rate": 4.849295219782251e-06, + "loss": 0.5937, + "step": 7390 + }, + { + "epoch": 0.6747306919846632, + "grad_norm": 0.48051780462265015, + "learning_rate": 4.849254291966571e-06, + "loss": 0.5745, + "step": 7391 + }, + { + "epoch": 0.6748219828373198, + "grad_norm": 0.4583781659603119, + "learning_rate": 4.8492133587669e-06, + "loss": 0.5609, + "step": 7392 + }, + { + "epoch": 0.6749132736899762, + "grad_norm": 0.47042250633239746, + "learning_rate": 4.849172420183331e-06, + "loss": 0.5518, + "step": 7393 + }, + { + "epoch": 0.6750045645426328, + "grad_norm": 0.4805774688720703, + "learning_rate": 4.849131476215958e-06, + "loss": 0.5399, + "step": 7394 + }, + { + "epoch": 0.6750958553952894, + "grad_norm": 0.44706591963768005, + "learning_rate": 4.8490905268648744e-06, + "loss": 0.5958, + "step": 7395 + }, + { + "epoch": 0.675187146247946, + "grad_norm": 0.45598262548446655, + "learning_rate": 4.849049572130176e-06, + "loss": 0.5628, + "step": 7396 + }, + { + "epoch": 0.6752784371006025, + "grad_norm": 0.536511242389679, + "learning_rate": 4.849008612011954e-06, + "loss": 0.4945, + "step": 7397 + }, + { + "epoch": 0.6753697279532591, + "grad_norm": 0.4664759039878845, + "learning_rate": 4.848967646510303e-06, + "loss": 0.5876, + "step": 7398 + }, + { + "epoch": 0.6754610188059157, + "grad_norm": 0.4765852689743042, + "learning_rate": 4.848926675625318e-06, + "loss": 0.5913, + "step": 7399 + }, + { + "epoch": 0.6755523096585722, + "grad_norm": 0.4626813232898712, + "learning_rate": 4.848885699357092e-06, + "loss": 0.5695, + "step": 7400 + }, + { + "epoch": 0.6756436005112287, + "grad_norm": 0.5117310285568237, + "learning_rate": 4.848844717705719e-06, + "loss": 0.5562, + "step": 7401 + }, + { + "epoch": 0.6757348913638853, + "grad_norm": 0.49033260345458984, + "learning_rate": 4.848803730671293e-06, + "loss": 0.568, + "step": 7402 + }, + { + "epoch": 0.6758261822165419, + "grad_norm": 0.4685593545436859, + "learning_rate": 4.8487627382539085e-06, + "loss": 0.5971, + "step": 7403 + }, + { + "epoch": 0.6759174730691985, + "grad_norm": 0.4894942045211792, + "learning_rate": 4.848721740453658e-06, + "loss": 0.5604, + "step": 7404 + }, + { + "epoch": 0.676008763921855, + "grad_norm": 0.4692991077899933, + "learning_rate": 4.848680737270637e-06, + "loss": 0.5391, + "step": 7405 + }, + { + "epoch": 0.6761000547745116, + "grad_norm": 0.46881163120269775, + "learning_rate": 4.848639728704938e-06, + "loss": 0.561, + "step": 7406 + }, + { + "epoch": 0.6761913456271682, + "grad_norm": 0.48010045289993286, + "learning_rate": 4.848598714756657e-06, + "loss": 0.5605, + "step": 7407 + }, + { + "epoch": 0.6762826364798247, + "grad_norm": 0.4844435751438141, + "learning_rate": 4.848557695425886e-06, + "loss": 0.5424, + "step": 7408 + }, + { + "epoch": 0.6763739273324813, + "grad_norm": 0.488336980342865, + "learning_rate": 4.84851667071272e-06, + "loss": 0.5341, + "step": 7409 + }, + { + "epoch": 0.6764652181851378, + "grad_norm": 0.4720129072666168, + "learning_rate": 4.848475640617253e-06, + "loss": 0.5194, + "step": 7410 + }, + { + "epoch": 0.6765565090377944, + "grad_norm": 0.5013840794563293, + "learning_rate": 4.848434605139578e-06, + "loss": 0.5605, + "step": 7411 + }, + { + "epoch": 0.676647799890451, + "grad_norm": 0.4806298613548279, + "learning_rate": 4.848393564279791e-06, + "loss": 0.557, + "step": 7412 + }, + { + "epoch": 0.6767390907431076, + "grad_norm": 0.4850267469882965, + "learning_rate": 4.848352518037984e-06, + "loss": 0.5254, + "step": 7413 + }, + { + "epoch": 0.6768303815957641, + "grad_norm": 0.510312557220459, + "learning_rate": 4.848311466414253e-06, + "loss": 0.524, + "step": 7414 + }, + { + "epoch": 0.6769216724484207, + "grad_norm": 0.4741780161857605, + "learning_rate": 4.84827040940869e-06, + "loss": 0.5363, + "step": 7415 + }, + { + "epoch": 0.6770129633010772, + "grad_norm": 0.455799400806427, + "learning_rate": 4.848229347021391e-06, + "loss": 0.5491, + "step": 7416 + }, + { + "epoch": 0.6771042541537338, + "grad_norm": 0.46835264563560486, + "learning_rate": 4.84818827925245e-06, + "loss": 0.5458, + "step": 7417 + }, + { + "epoch": 0.6771955450063903, + "grad_norm": 0.46125173568725586, + "learning_rate": 4.848147206101959e-06, + "loss": 0.5607, + "step": 7418 + }, + { + "epoch": 0.6772868358590469, + "grad_norm": 0.4867168068885803, + "learning_rate": 4.848106127570015e-06, + "loss": 0.5525, + "step": 7419 + }, + { + "epoch": 0.6773781267117035, + "grad_norm": 0.4384082555770874, + "learning_rate": 4.848065043656709e-06, + "loss": 0.6119, + "step": 7420 + }, + { + "epoch": 0.6774694175643601, + "grad_norm": 0.5109984874725342, + "learning_rate": 4.848023954362137e-06, + "loss": 0.5707, + "step": 7421 + }, + { + "epoch": 0.6775607084170167, + "grad_norm": 0.5277615189552307, + "learning_rate": 4.847982859686395e-06, + "loss": 0.5514, + "step": 7422 + }, + { + "epoch": 0.6776519992696732, + "grad_norm": 0.4872805178165436, + "learning_rate": 4.847941759629574e-06, + "loss": 0.5402, + "step": 7423 + }, + { + "epoch": 0.6777432901223297, + "grad_norm": 0.48355576395988464, + "learning_rate": 4.84790065419177e-06, + "loss": 0.5445, + "step": 7424 + }, + { + "epoch": 0.6778345809749863, + "grad_norm": 0.48884081840515137, + "learning_rate": 4.847859543373075e-06, + "loss": 0.5578, + "step": 7425 + }, + { + "epoch": 0.6779258718276429, + "grad_norm": 0.49901387095451355, + "learning_rate": 4.847818427173586e-06, + "loss": 0.5081, + "step": 7426 + }, + { + "epoch": 0.6780171626802994, + "grad_norm": 0.4764804542064667, + "learning_rate": 4.847777305593396e-06, + "loss": 0.5713, + "step": 7427 + }, + { + "epoch": 0.678108453532956, + "grad_norm": 0.48073357343673706, + "learning_rate": 4.8477361786326e-06, + "loss": 0.5351, + "step": 7428 + }, + { + "epoch": 0.6781997443856126, + "grad_norm": 0.49854400753974915, + "learning_rate": 4.84769504629129e-06, + "loss": 0.5269, + "step": 7429 + }, + { + "epoch": 0.6782910352382692, + "grad_norm": 0.47368448972702026, + "learning_rate": 4.847653908569562e-06, + "loss": 0.5946, + "step": 7430 + }, + { + "epoch": 0.6783823260909256, + "grad_norm": 0.45530086755752563, + "learning_rate": 4.847612765467511e-06, + "loss": 0.5547, + "step": 7431 + }, + { + "epoch": 0.6784736169435822, + "grad_norm": 0.4732091724872589, + "learning_rate": 4.84757161698523e-06, + "loss": 0.5293, + "step": 7432 + }, + { + "epoch": 0.6785649077962388, + "grad_norm": 0.46298807859420776, + "learning_rate": 4.847530463122814e-06, + "loss": 0.579, + "step": 7433 + }, + { + "epoch": 0.6786561986488954, + "grad_norm": 0.4999375343322754, + "learning_rate": 4.8474893038803565e-06, + "loss": 0.5559, + "step": 7434 + }, + { + "epoch": 0.678747489501552, + "grad_norm": 0.4697343409061432, + "learning_rate": 4.847448139257953e-06, + "loss": 0.5929, + "step": 7435 + }, + { + "epoch": 0.6788387803542085, + "grad_norm": 0.528915524482727, + "learning_rate": 4.847406969255696e-06, + "loss": 0.5669, + "step": 7436 + }, + { + "epoch": 0.6789300712068651, + "grad_norm": 0.44664937257766724, + "learning_rate": 4.847365793873682e-06, + "loss": 0.5864, + "step": 7437 + }, + { + "epoch": 0.6790213620595217, + "grad_norm": 0.4649535119533539, + "learning_rate": 4.847324613112004e-06, + "loss": 0.5699, + "step": 7438 + }, + { + "epoch": 0.6791126529121781, + "grad_norm": 0.5005484819412231, + "learning_rate": 4.847283426970757e-06, + "loss": 0.5486, + "step": 7439 + }, + { + "epoch": 0.6792039437648347, + "grad_norm": 0.4806899130344391, + "learning_rate": 4.8472422354500345e-06, + "loss": 0.5401, + "step": 7440 + }, + { + "epoch": 0.6792952346174913, + "grad_norm": 0.463535338640213, + "learning_rate": 4.8472010385499324e-06, + "loss": 0.5911, + "step": 7441 + }, + { + "epoch": 0.6793865254701479, + "grad_norm": 0.4848122000694275, + "learning_rate": 4.847159836270544e-06, + "loss": 0.554, + "step": 7442 + }, + { + "epoch": 0.6794778163228045, + "grad_norm": 0.46939823031425476, + "learning_rate": 4.847118628611964e-06, + "loss": 0.5414, + "step": 7443 + }, + { + "epoch": 0.679569107175461, + "grad_norm": 0.4900142252445221, + "learning_rate": 4.847077415574287e-06, + "loss": 0.5414, + "step": 7444 + }, + { + "epoch": 0.6796603980281176, + "grad_norm": 0.5000782608985901, + "learning_rate": 4.847036197157608e-06, + "loss": 0.5481, + "step": 7445 + }, + { + "epoch": 0.6797516888807742, + "grad_norm": 0.4796607494354248, + "learning_rate": 4.846994973362019e-06, + "loss": 0.5646, + "step": 7446 + }, + { + "epoch": 0.6798429797334307, + "grad_norm": 0.5293567180633545, + "learning_rate": 4.846953744187617e-06, + "loss": 0.5695, + "step": 7447 + }, + { + "epoch": 0.6799342705860872, + "grad_norm": 0.5162000060081482, + "learning_rate": 4.846912509634496e-06, + "loss": 0.5034, + "step": 7448 + }, + { + "epoch": 0.6800255614387438, + "grad_norm": 0.4755857586860657, + "learning_rate": 4.84687126970275e-06, + "loss": 0.5626, + "step": 7449 + }, + { + "epoch": 0.6801168522914004, + "grad_norm": 0.4871100187301636, + "learning_rate": 4.846830024392474e-06, + "loss": 0.5144, + "step": 7450 + }, + { + "epoch": 0.680208143144057, + "grad_norm": 0.5141110420227051, + "learning_rate": 4.846788773703762e-06, + "loss": 0.5582, + "step": 7451 + }, + { + "epoch": 0.6802994339967136, + "grad_norm": 0.5114357471466064, + "learning_rate": 4.846747517636709e-06, + "loss": 0.5281, + "step": 7452 + }, + { + "epoch": 0.6803907248493701, + "grad_norm": 0.4880988895893097, + "learning_rate": 4.8467062561914095e-06, + "loss": 0.5476, + "step": 7453 + }, + { + "epoch": 0.6804820157020267, + "grad_norm": 0.4783984124660492, + "learning_rate": 4.8466649893679575e-06, + "loss": 0.579, + "step": 7454 + }, + { + "epoch": 0.6805733065546832, + "grad_norm": 0.4779207706451416, + "learning_rate": 4.846623717166449e-06, + "loss": 0.5655, + "step": 7455 + }, + { + "epoch": 0.6806645974073398, + "grad_norm": 0.46284177899360657, + "learning_rate": 4.846582439586976e-06, + "loss": 0.5734, + "step": 7456 + }, + { + "epoch": 0.6807558882599963, + "grad_norm": 0.44238534569740295, + "learning_rate": 4.846541156629635e-06, + "loss": 0.542, + "step": 7457 + }, + { + "epoch": 0.6808471791126529, + "grad_norm": 0.4457916021347046, + "learning_rate": 4.846499868294521e-06, + "loss": 0.5367, + "step": 7458 + }, + { + "epoch": 0.6809384699653095, + "grad_norm": 0.44585278630256653, + "learning_rate": 4.846458574581727e-06, + "loss": 0.5604, + "step": 7459 + }, + { + "epoch": 0.6810297608179661, + "grad_norm": 0.47243738174438477, + "learning_rate": 4.846417275491349e-06, + "loss": 0.5406, + "step": 7460 + }, + { + "epoch": 0.6811210516706226, + "grad_norm": 0.45742541551589966, + "learning_rate": 4.846375971023482e-06, + "loss": 0.5682, + "step": 7461 + }, + { + "epoch": 0.6812123425232791, + "grad_norm": 0.4922308623790741, + "learning_rate": 4.846334661178219e-06, + "loss": 0.5636, + "step": 7462 + }, + { + "epoch": 0.6813036333759357, + "grad_norm": 0.5251729488372803, + "learning_rate": 4.846293345955656e-06, + "loss": 0.5254, + "step": 7463 + }, + { + "epoch": 0.6813949242285923, + "grad_norm": 0.4690311551094055, + "learning_rate": 4.846252025355886e-06, + "loss": 0.5895, + "step": 7464 + }, + { + "epoch": 0.6814862150812488, + "grad_norm": 0.4972604215145111, + "learning_rate": 4.846210699379007e-06, + "loss": 0.5651, + "step": 7465 + }, + { + "epoch": 0.6815775059339054, + "grad_norm": 0.4563450813293457, + "learning_rate": 4.84616936802511e-06, + "loss": 0.5787, + "step": 7466 + }, + { + "epoch": 0.681668796786562, + "grad_norm": 0.4377853274345398, + "learning_rate": 4.846128031294291e-06, + "loss": 0.5909, + "step": 7467 + }, + { + "epoch": 0.6817600876392186, + "grad_norm": 0.47046419978141785, + "learning_rate": 4.846086689186647e-06, + "loss": 0.5367, + "step": 7468 + }, + { + "epoch": 0.6818513784918752, + "grad_norm": 0.4954589903354645, + "learning_rate": 4.846045341702269e-06, + "loss": 0.5389, + "step": 7469 + }, + { + "epoch": 0.6819426693445316, + "grad_norm": 0.4605828523635864, + "learning_rate": 4.846003988841255e-06, + "loss": 0.5552, + "step": 7470 + }, + { + "epoch": 0.6820339601971882, + "grad_norm": 0.4747658967971802, + "learning_rate": 4.845962630603697e-06, + "loss": 0.537, + "step": 7471 + }, + { + "epoch": 0.6821252510498448, + "grad_norm": 0.47481366991996765, + "learning_rate": 4.845921266989692e-06, + "loss": 0.558, + "step": 7472 + }, + { + "epoch": 0.6822165419025014, + "grad_norm": 0.44905686378479004, + "learning_rate": 4.845879897999334e-06, + "loss": 0.6078, + "step": 7473 + }, + { + "epoch": 0.6823078327551579, + "grad_norm": 0.47421538829803467, + "learning_rate": 4.845838523632718e-06, + "loss": 0.6015, + "step": 7474 + }, + { + "epoch": 0.6823991236078145, + "grad_norm": 0.5047857165336609, + "learning_rate": 4.8457971438899374e-06, + "loss": 0.5359, + "step": 7475 + }, + { + "epoch": 0.6824904144604711, + "grad_norm": 0.48733049631118774, + "learning_rate": 4.845755758771089e-06, + "loss": 0.5418, + "step": 7476 + }, + { + "epoch": 0.6825817053131277, + "grad_norm": 0.45579594373703003, + "learning_rate": 4.845714368276266e-06, + "loss": 0.577, + "step": 7477 + }, + { + "epoch": 0.6826729961657841, + "grad_norm": 0.4693908095359802, + "learning_rate": 4.845672972405565e-06, + "loss": 0.5863, + "step": 7478 + }, + { + "epoch": 0.6827642870184407, + "grad_norm": 0.5210265517234802, + "learning_rate": 4.8456315711590805e-06, + "loss": 0.5164, + "step": 7479 + }, + { + "epoch": 0.6828555778710973, + "grad_norm": 0.4703141152858734, + "learning_rate": 4.845590164536906e-06, + "loss": 0.5428, + "step": 7480 + }, + { + "epoch": 0.6829468687237539, + "grad_norm": 0.4482669234275818, + "learning_rate": 4.845548752539138e-06, + "loss": 0.5693, + "step": 7481 + }, + { + "epoch": 0.6830381595764105, + "grad_norm": 0.4800187945365906, + "learning_rate": 4.84550733516587e-06, + "loss": 0.5533, + "step": 7482 + }, + { + "epoch": 0.683129450429067, + "grad_norm": 0.4617426097393036, + "learning_rate": 4.845465912417198e-06, + "loss": 0.5855, + "step": 7483 + }, + { + "epoch": 0.6832207412817236, + "grad_norm": 0.5033509135246277, + "learning_rate": 4.845424484293215e-06, + "loss": 0.533, + "step": 7484 + }, + { + "epoch": 0.6833120321343802, + "grad_norm": 0.45530495047569275, + "learning_rate": 4.845383050794019e-06, + "loss": 0.5783, + "step": 7485 + }, + { + "epoch": 0.6834033229870367, + "grad_norm": 0.47528302669525146, + "learning_rate": 4.845341611919703e-06, + "loss": 0.6216, + "step": 7486 + }, + { + "epoch": 0.6834946138396932, + "grad_norm": 0.4810292422771454, + "learning_rate": 4.8453001676703625e-06, + "loss": 0.5358, + "step": 7487 + }, + { + "epoch": 0.6835859046923498, + "grad_norm": 0.4602789580821991, + "learning_rate": 4.8452587180460925e-06, + "loss": 0.5654, + "step": 7488 + }, + { + "epoch": 0.6836771955450064, + "grad_norm": 0.5077970027923584, + "learning_rate": 4.845217263046988e-06, + "loss": 0.5231, + "step": 7489 + }, + { + "epoch": 0.683768486397663, + "grad_norm": 0.46216192841529846, + "learning_rate": 4.845175802673143e-06, + "loss": 0.5698, + "step": 7490 + }, + { + "epoch": 0.6838597772503195, + "grad_norm": 0.48980939388275146, + "learning_rate": 4.845134336924654e-06, + "loss": 0.5773, + "step": 7491 + }, + { + "epoch": 0.6839510681029761, + "grad_norm": 0.5149030089378357, + "learning_rate": 4.845092865801615e-06, + "loss": 0.5289, + "step": 7492 + }, + { + "epoch": 0.6840423589556327, + "grad_norm": 0.4679614007472992, + "learning_rate": 4.845051389304122e-06, + "loss": 0.5241, + "step": 7493 + }, + { + "epoch": 0.6841336498082892, + "grad_norm": 0.4587371349334717, + "learning_rate": 4.8450099074322695e-06, + "loss": 0.5504, + "step": 7494 + }, + { + "epoch": 0.6842249406609457, + "grad_norm": 0.46904119849205017, + "learning_rate": 4.844968420186153e-06, + "loss": 0.5485, + "step": 7495 + }, + { + "epoch": 0.6843162315136023, + "grad_norm": 0.4522424340248108, + "learning_rate": 4.844926927565866e-06, + "loss": 0.5234, + "step": 7496 + }, + { + "epoch": 0.6844075223662589, + "grad_norm": 0.46548205614089966, + "learning_rate": 4.844885429571505e-06, + "loss": 0.5733, + "step": 7497 + }, + { + "epoch": 0.6844988132189155, + "grad_norm": 0.4681737422943115, + "learning_rate": 4.844843926203165e-06, + "loss": 0.5539, + "step": 7498 + }, + { + "epoch": 0.6845901040715721, + "grad_norm": 0.4815787672996521, + "learning_rate": 4.844802417460942e-06, + "loss": 0.5287, + "step": 7499 + }, + { + "epoch": 0.6846813949242286, + "grad_norm": 0.4581564962863922, + "learning_rate": 4.8447609033449295e-06, + "loss": 0.5685, + "step": 7500 + }, + { + "epoch": 0.6847726857768851, + "grad_norm": 0.4703502058982849, + "learning_rate": 4.844719383855223e-06, + "loss": 0.5576, + "step": 7501 + }, + { + "epoch": 0.6848639766295417, + "grad_norm": 0.5271943807601929, + "learning_rate": 4.844677858991917e-06, + "loss": 0.5336, + "step": 7502 + }, + { + "epoch": 0.6849552674821983, + "grad_norm": 0.4629783034324646, + "learning_rate": 4.844636328755109e-06, + "loss": 0.6345, + "step": 7503 + }, + { + "epoch": 0.6850465583348548, + "grad_norm": 0.49375486373901367, + "learning_rate": 4.844594793144892e-06, + "loss": 0.5218, + "step": 7504 + }, + { + "epoch": 0.6851378491875114, + "grad_norm": 0.4859990179538727, + "learning_rate": 4.844553252161363e-06, + "loss": 0.5341, + "step": 7505 + }, + { + "epoch": 0.685229140040168, + "grad_norm": 0.4771117866039276, + "learning_rate": 4.844511705804616e-06, + "loss": 0.5505, + "step": 7506 + }, + { + "epoch": 0.6853204308928246, + "grad_norm": 0.450382262468338, + "learning_rate": 4.8444701540747455e-06, + "loss": 0.5813, + "step": 7507 + }, + { + "epoch": 0.6854117217454812, + "grad_norm": 0.48363542556762695, + "learning_rate": 4.844428596971847e-06, + "loss": 0.5862, + "step": 7508 + }, + { + "epoch": 0.6855030125981376, + "grad_norm": 0.4981120824813843, + "learning_rate": 4.844387034496017e-06, + "loss": 0.5139, + "step": 7509 + }, + { + "epoch": 0.6855943034507942, + "grad_norm": 0.5171386003494263, + "learning_rate": 4.844345466647351e-06, + "loss": 0.5375, + "step": 7510 + }, + { + "epoch": 0.6856855943034508, + "grad_norm": 0.5090774893760681, + "learning_rate": 4.8443038934259415e-06, + "loss": 0.561, + "step": 7511 + }, + { + "epoch": 0.6857768851561074, + "grad_norm": 0.5207875370979309, + "learning_rate": 4.844262314831887e-06, + "loss": 0.5064, + "step": 7512 + }, + { + "epoch": 0.6858681760087639, + "grad_norm": 0.46237611770629883, + "learning_rate": 4.844220730865281e-06, + "loss": 0.533, + "step": 7513 + }, + { + "epoch": 0.6859594668614205, + "grad_norm": 0.47085586190223694, + "learning_rate": 4.84417914152622e-06, + "loss": 0.5634, + "step": 7514 + }, + { + "epoch": 0.6860507577140771, + "grad_norm": 0.4784994423389435, + "learning_rate": 4.844137546814797e-06, + "loss": 0.5299, + "step": 7515 + }, + { + "epoch": 0.6861420485667337, + "grad_norm": 0.4478099048137665, + "learning_rate": 4.84409594673111e-06, + "loss": 0.5703, + "step": 7516 + }, + { + "epoch": 0.6862333394193901, + "grad_norm": 0.48916661739349365, + "learning_rate": 4.844054341275253e-06, + "loss": 0.5422, + "step": 7517 + }, + { + "epoch": 0.6863246302720467, + "grad_norm": 0.4944252371788025, + "learning_rate": 4.8440127304473206e-06, + "loss": 0.576, + "step": 7518 + }, + { + "epoch": 0.6864159211247033, + "grad_norm": 0.4550376236438751, + "learning_rate": 4.84397111424741e-06, + "loss": 0.5653, + "step": 7519 + }, + { + "epoch": 0.6865072119773599, + "grad_norm": 0.46847003698349, + "learning_rate": 4.843929492675616e-06, + "loss": 0.5628, + "step": 7520 + }, + { + "epoch": 0.6865985028300164, + "grad_norm": 0.46270328760147095, + "learning_rate": 4.843887865732033e-06, + "loss": 0.5524, + "step": 7521 + }, + { + "epoch": 0.686689793682673, + "grad_norm": 0.4790252447128296, + "learning_rate": 4.843846233416757e-06, + "loss": 0.5592, + "step": 7522 + }, + { + "epoch": 0.6867810845353296, + "grad_norm": 0.4751707911491394, + "learning_rate": 4.843804595729884e-06, + "loss": 0.5553, + "step": 7523 + }, + { + "epoch": 0.6868723753879862, + "grad_norm": 0.45603474974632263, + "learning_rate": 4.843762952671509e-06, + "loss": 0.5771, + "step": 7524 + }, + { + "epoch": 0.6869636662406426, + "grad_norm": 0.48045051097869873, + "learning_rate": 4.843721304241726e-06, + "loss": 0.5839, + "step": 7525 + }, + { + "epoch": 0.6870549570932992, + "grad_norm": 0.491379976272583, + "learning_rate": 4.843679650440634e-06, + "loss": 0.5177, + "step": 7526 + }, + { + "epoch": 0.6871462479459558, + "grad_norm": 0.4845900237560272, + "learning_rate": 4.843637991268324e-06, + "loss": 0.5407, + "step": 7527 + }, + { + "epoch": 0.6872375387986124, + "grad_norm": 0.4833306670188904, + "learning_rate": 4.843596326724895e-06, + "loss": 0.5751, + "step": 7528 + }, + { + "epoch": 0.687328829651269, + "grad_norm": 0.5341408848762512, + "learning_rate": 4.843554656810441e-06, + "loss": 0.5511, + "step": 7529 + }, + { + "epoch": 0.6874201205039255, + "grad_norm": 0.4836404025554657, + "learning_rate": 4.843512981525058e-06, + "loss": 0.5406, + "step": 7530 + }, + { + "epoch": 0.6875114113565821, + "grad_norm": 0.4630701243877411, + "learning_rate": 4.843471300868841e-06, + "loss": 0.5649, + "step": 7531 + }, + { + "epoch": 0.6876027022092386, + "grad_norm": 0.4781249165534973, + "learning_rate": 4.843429614841885e-06, + "loss": 0.5927, + "step": 7532 + }, + { + "epoch": 0.6876939930618952, + "grad_norm": 0.48058900237083435, + "learning_rate": 4.843387923444287e-06, + "loss": 0.5383, + "step": 7533 + }, + { + "epoch": 0.6877852839145517, + "grad_norm": 0.4993137717247009, + "learning_rate": 4.843346226676142e-06, + "loss": 0.5254, + "step": 7534 + }, + { + "epoch": 0.6878765747672083, + "grad_norm": 0.4959280788898468, + "learning_rate": 4.843304524537545e-06, + "loss": 0.5278, + "step": 7535 + }, + { + "epoch": 0.6879678656198649, + "grad_norm": 0.4819318950176239, + "learning_rate": 4.843262817028592e-06, + "loss": 0.5644, + "step": 7536 + }, + { + "epoch": 0.6880591564725215, + "grad_norm": 0.4484500586986542, + "learning_rate": 4.8432211041493785e-06, + "loss": 0.5436, + "step": 7537 + }, + { + "epoch": 0.688150447325178, + "grad_norm": 0.4688652455806732, + "learning_rate": 4.8431793859e-06, + "loss": 0.5699, + "step": 7538 + }, + { + "epoch": 0.6882417381778346, + "grad_norm": 0.5224120616912842, + "learning_rate": 4.8431376622805524e-06, + "loss": 0.5628, + "step": 7539 + }, + { + "epoch": 0.6883330290304911, + "grad_norm": 0.4930811822414398, + "learning_rate": 4.843095933291132e-06, + "loss": 0.5417, + "step": 7540 + }, + { + "epoch": 0.6884243198831477, + "grad_norm": 0.4943450093269348, + "learning_rate": 4.843054198931832e-06, + "loss": 0.5902, + "step": 7541 + }, + { + "epoch": 0.6885156107358043, + "grad_norm": 0.48726800084114075, + "learning_rate": 4.843012459202751e-06, + "loss": 0.5769, + "step": 7542 + }, + { + "epoch": 0.6886069015884608, + "grad_norm": 0.4592522084712982, + "learning_rate": 4.842970714103983e-06, + "loss": 0.5882, + "step": 7543 + }, + { + "epoch": 0.6886981924411174, + "grad_norm": 0.482530802488327, + "learning_rate": 4.842928963635624e-06, + "loss": 0.5356, + "step": 7544 + }, + { + "epoch": 0.688789483293774, + "grad_norm": 0.4953005909919739, + "learning_rate": 4.842887207797769e-06, + "loss": 0.5515, + "step": 7545 + }, + { + "epoch": 0.6888807741464306, + "grad_norm": 0.4635062515735626, + "learning_rate": 4.842845446590515e-06, + "loss": 0.5857, + "step": 7546 + }, + { + "epoch": 0.6889720649990871, + "grad_norm": 0.517314612865448, + "learning_rate": 4.842803680013957e-06, + "loss": 0.5489, + "step": 7547 + }, + { + "epoch": 0.6890633558517436, + "grad_norm": 0.48042017221450806, + "learning_rate": 4.842761908068191e-06, + "loss": 0.5634, + "step": 7548 + }, + { + "epoch": 0.6891546467044002, + "grad_norm": 0.5029191374778748, + "learning_rate": 4.8427201307533115e-06, + "loss": 0.5349, + "step": 7549 + }, + { + "epoch": 0.6892459375570568, + "grad_norm": 0.4745711088180542, + "learning_rate": 4.842678348069416e-06, + "loss": 0.5359, + "step": 7550 + }, + { + "epoch": 0.6893372284097133, + "grad_norm": 0.471750944852829, + "learning_rate": 4.8426365600166e-06, + "loss": 0.5397, + "step": 7551 + }, + { + "epoch": 0.6894285192623699, + "grad_norm": 0.4966416358947754, + "learning_rate": 4.842594766594958e-06, + "loss": 0.5557, + "step": 7552 + }, + { + "epoch": 0.6895198101150265, + "grad_norm": 0.5011213421821594, + "learning_rate": 4.842552967804587e-06, + "loss": 0.5674, + "step": 7553 + }, + { + "epoch": 0.6896111009676831, + "grad_norm": 0.5335150957107544, + "learning_rate": 4.842511163645582e-06, + "loss": 0.546, + "step": 7554 + }, + { + "epoch": 0.6897023918203397, + "grad_norm": 0.4942062497138977, + "learning_rate": 4.842469354118039e-06, + "loss": 0.5389, + "step": 7555 + }, + { + "epoch": 0.6897936826729961, + "grad_norm": 0.4992084205150604, + "learning_rate": 4.842427539222055e-06, + "loss": 0.6073, + "step": 7556 + }, + { + "epoch": 0.6898849735256527, + "grad_norm": 0.47357457876205444, + "learning_rate": 4.842385718957724e-06, + "loss": 0.5822, + "step": 7557 + }, + { + "epoch": 0.6899762643783093, + "grad_norm": 0.4597555994987488, + "learning_rate": 4.842343893325143e-06, + "loss": 0.598, + "step": 7558 + }, + { + "epoch": 0.6900675552309659, + "grad_norm": 0.49759945273399353, + "learning_rate": 4.842302062324408e-06, + "loss": 0.5619, + "step": 7559 + }, + { + "epoch": 0.6901588460836224, + "grad_norm": 0.454775869846344, + "learning_rate": 4.8422602259556135e-06, + "loss": 0.5506, + "step": 7560 + }, + { + "epoch": 0.690250136936279, + "grad_norm": 0.45712143182754517, + "learning_rate": 4.842218384218856e-06, + "loss": 0.5655, + "step": 7561 + }, + { + "epoch": 0.6903414277889356, + "grad_norm": 0.5158949494361877, + "learning_rate": 4.8421765371142325e-06, + "loss": 0.5365, + "step": 7562 + }, + { + "epoch": 0.6904327186415921, + "grad_norm": 0.47485995292663574, + "learning_rate": 4.842134684641838e-06, + "loss": 0.5651, + "step": 7563 + }, + { + "epoch": 0.6905240094942486, + "grad_norm": 0.45769068598747253, + "learning_rate": 4.842092826801769e-06, + "loss": 0.5514, + "step": 7564 + }, + { + "epoch": 0.6906153003469052, + "grad_norm": 0.4958273470401764, + "learning_rate": 4.842050963594119e-06, + "loss": 0.5385, + "step": 7565 + }, + { + "epoch": 0.6907065911995618, + "grad_norm": 0.47581085562705994, + "learning_rate": 4.842009095018987e-06, + "loss": 0.5396, + "step": 7566 + }, + { + "epoch": 0.6907978820522184, + "grad_norm": 0.47087544202804565, + "learning_rate": 4.841967221076468e-06, + "loss": 0.5712, + "step": 7567 + }, + { + "epoch": 0.690889172904875, + "grad_norm": 0.5098168253898621, + "learning_rate": 4.841925341766658e-06, + "loss": 0.5084, + "step": 7568 + }, + { + "epoch": 0.6909804637575315, + "grad_norm": 0.5076172947883606, + "learning_rate": 4.841883457089652e-06, + "loss": 0.5234, + "step": 7569 + }, + { + "epoch": 0.6910717546101881, + "grad_norm": 0.46821847558021545, + "learning_rate": 4.841841567045548e-06, + "loss": 0.5605, + "step": 7570 + }, + { + "epoch": 0.6911630454628446, + "grad_norm": 0.5201215744018555, + "learning_rate": 4.84179967163444e-06, + "loss": 0.5726, + "step": 7571 + }, + { + "epoch": 0.6912543363155011, + "grad_norm": 0.46447184681892395, + "learning_rate": 4.841757770856424e-06, + "loss": 0.5711, + "step": 7572 + }, + { + "epoch": 0.6913456271681577, + "grad_norm": 0.4884410798549652, + "learning_rate": 4.841715864711597e-06, + "loss": 0.6032, + "step": 7573 + }, + { + "epoch": 0.6914369180208143, + "grad_norm": 0.47997599840164185, + "learning_rate": 4.841673953200055e-06, + "loss": 0.5459, + "step": 7574 + }, + { + "epoch": 0.6915282088734709, + "grad_norm": 0.4753723740577698, + "learning_rate": 4.841632036321895e-06, + "loss": 0.5776, + "step": 7575 + }, + { + "epoch": 0.6916194997261275, + "grad_norm": 0.45643484592437744, + "learning_rate": 4.841590114077211e-06, + "loss": 0.5798, + "step": 7576 + }, + { + "epoch": 0.691710790578784, + "grad_norm": 0.5202041268348694, + "learning_rate": 4.8415481864661004e-06, + "loss": 0.5098, + "step": 7577 + }, + { + "epoch": 0.6918020814314406, + "grad_norm": 0.4521573483943939, + "learning_rate": 4.841506253488658e-06, + "loss": 0.5292, + "step": 7578 + }, + { + "epoch": 0.6918933722840971, + "grad_norm": 0.44279882311820984, + "learning_rate": 4.841464315144981e-06, + "loss": 0.5936, + "step": 7579 + }, + { + "epoch": 0.6919846631367537, + "grad_norm": 0.5042122006416321, + "learning_rate": 4.8414223714351664e-06, + "loss": 0.5158, + "step": 7580 + }, + { + "epoch": 0.6920759539894102, + "grad_norm": 0.49890220165252686, + "learning_rate": 4.841380422359308e-06, + "loss": 0.5264, + "step": 7581 + }, + { + "epoch": 0.6921672448420668, + "grad_norm": 0.4604012370109558, + "learning_rate": 4.841338467917505e-06, + "loss": 0.5348, + "step": 7582 + }, + { + "epoch": 0.6922585356947234, + "grad_norm": 0.4697706997394562, + "learning_rate": 4.84129650810985e-06, + "loss": 0.5451, + "step": 7583 + }, + { + "epoch": 0.69234982654738, + "grad_norm": 0.4644516408443451, + "learning_rate": 4.841254542936442e-06, + "loss": 0.5571, + "step": 7584 + }, + { + "epoch": 0.6924411174000366, + "grad_norm": 0.528282880783081, + "learning_rate": 4.841212572397375e-06, + "loss": 0.5566, + "step": 7585 + }, + { + "epoch": 0.6925324082526931, + "grad_norm": 0.4796273708343506, + "learning_rate": 4.841170596492747e-06, + "loss": 0.5419, + "step": 7586 + }, + { + "epoch": 0.6926236991053496, + "grad_norm": 0.47427961230278015, + "learning_rate": 4.841128615222653e-06, + "loss": 0.533, + "step": 7587 + }, + { + "epoch": 0.6927149899580062, + "grad_norm": 0.4923619031906128, + "learning_rate": 4.84108662858719e-06, + "loss": 0.5538, + "step": 7588 + }, + { + "epoch": 0.6928062808106628, + "grad_norm": 0.4804999530315399, + "learning_rate": 4.841044636586454e-06, + "loss": 0.5569, + "step": 7589 + }, + { + "epoch": 0.6928975716633193, + "grad_norm": 0.4969477653503418, + "learning_rate": 4.841002639220541e-06, + "loss": 0.5416, + "step": 7590 + }, + { + "epoch": 0.6929888625159759, + "grad_norm": 0.4938586950302124, + "learning_rate": 4.840960636489548e-06, + "loss": 0.495, + "step": 7591 + }, + { + "epoch": 0.6930801533686325, + "grad_norm": 0.48736298084259033, + "learning_rate": 4.8409186283935695e-06, + "loss": 0.5507, + "step": 7592 + }, + { + "epoch": 0.6931714442212891, + "grad_norm": 0.5181859731674194, + "learning_rate": 4.840876614932703e-06, + "loss": 0.5227, + "step": 7593 + }, + { + "epoch": 0.6932627350739455, + "grad_norm": 0.4808383584022522, + "learning_rate": 4.8408345961070455e-06, + "loss": 0.5513, + "step": 7594 + }, + { + "epoch": 0.6933540259266021, + "grad_norm": 0.4930366575717926, + "learning_rate": 4.840792571916693e-06, + "loss": 0.5444, + "step": 7595 + }, + { + "epoch": 0.6934453167792587, + "grad_norm": 0.48070260882377625, + "learning_rate": 4.84075054236174e-06, + "loss": 0.5977, + "step": 7596 + }, + { + "epoch": 0.6935366076319153, + "grad_norm": 0.5116003751754761, + "learning_rate": 4.840708507442285e-06, + "loss": 0.5434, + "step": 7597 + }, + { + "epoch": 0.6936278984845718, + "grad_norm": 0.4961259365081787, + "learning_rate": 4.840666467158423e-06, + "loss": 0.5444, + "step": 7598 + }, + { + "epoch": 0.6937191893372284, + "grad_norm": 0.4786728322505951, + "learning_rate": 4.84062442151025e-06, + "loss": 0.5542, + "step": 7599 + }, + { + "epoch": 0.693810480189885, + "grad_norm": 0.47199010848999023, + "learning_rate": 4.840582370497865e-06, + "loss": 0.6029, + "step": 7600 + }, + { + "epoch": 0.6939017710425416, + "grad_norm": 0.49117985367774963, + "learning_rate": 4.840540314121361e-06, + "loss": 0.5688, + "step": 7601 + }, + { + "epoch": 0.693993061895198, + "grad_norm": 0.5003893971443176, + "learning_rate": 4.840498252380837e-06, + "loss": 0.5436, + "step": 7602 + }, + { + "epoch": 0.6940843527478546, + "grad_norm": 0.4898442029953003, + "learning_rate": 4.840456185276388e-06, + "loss": 0.5565, + "step": 7603 + }, + { + "epoch": 0.6941756436005112, + "grad_norm": 0.5156669616699219, + "learning_rate": 4.840414112808111e-06, + "loss": 0.4803, + "step": 7604 + }, + { + "epoch": 0.6942669344531678, + "grad_norm": 0.480644166469574, + "learning_rate": 4.8403720349761015e-06, + "loss": 0.5577, + "step": 7605 + }, + { + "epoch": 0.6943582253058244, + "grad_norm": 0.5104514956474304, + "learning_rate": 4.840329951780457e-06, + "loss": 0.5404, + "step": 7606 + }, + { + "epoch": 0.6944495161584809, + "grad_norm": 0.4731523096561432, + "learning_rate": 4.8402878632212736e-06, + "loss": 0.5312, + "step": 7607 + }, + { + "epoch": 0.6945408070111375, + "grad_norm": 0.47564175724983215, + "learning_rate": 4.840245769298648e-06, + "loss": 0.5561, + "step": 7608 + }, + { + "epoch": 0.6946320978637941, + "grad_norm": 0.4856836497783661, + "learning_rate": 4.840203670012676e-06, + "loss": 0.5942, + "step": 7609 + }, + { + "epoch": 0.6947233887164506, + "grad_norm": 0.5204800963401794, + "learning_rate": 4.840161565363454e-06, + "loss": 0.5525, + "step": 7610 + }, + { + "epoch": 0.6948146795691071, + "grad_norm": 0.46505656838417053, + "learning_rate": 4.84011945535108e-06, + "loss": 0.5442, + "step": 7611 + }, + { + "epoch": 0.6949059704217637, + "grad_norm": 0.48852160573005676, + "learning_rate": 4.840077339975649e-06, + "loss": 0.5504, + "step": 7612 + }, + { + "epoch": 0.6949972612744203, + "grad_norm": 0.5169325470924377, + "learning_rate": 4.8400352192372575e-06, + "loss": 0.5162, + "step": 7613 + }, + { + "epoch": 0.6950885521270769, + "grad_norm": 0.47351303696632385, + "learning_rate": 4.839993093136003e-06, + "loss": 0.5582, + "step": 7614 + }, + { + "epoch": 0.6951798429797335, + "grad_norm": 0.48546668887138367, + "learning_rate": 4.8399509616719814e-06, + "loss": 0.5957, + "step": 7615 + }, + { + "epoch": 0.69527113383239, + "grad_norm": 0.4857911169528961, + "learning_rate": 4.83990882484529e-06, + "loss": 0.5399, + "step": 7616 + }, + { + "epoch": 0.6953624246850466, + "grad_norm": 0.4905557334423065, + "learning_rate": 4.839866682656024e-06, + "loss": 0.544, + "step": 7617 + }, + { + "epoch": 0.6954537155377031, + "grad_norm": 0.4775714874267578, + "learning_rate": 4.83982453510428e-06, + "loss": 0.5957, + "step": 7618 + }, + { + "epoch": 0.6955450063903597, + "grad_norm": 0.4768305718898773, + "learning_rate": 4.839782382190157e-06, + "loss": 0.5576, + "step": 7619 + }, + { + "epoch": 0.6956362972430162, + "grad_norm": 0.4715264141559601, + "learning_rate": 4.839740223913749e-06, + "loss": 0.55, + "step": 7620 + }, + { + "epoch": 0.6957275880956728, + "grad_norm": 0.49111631512641907, + "learning_rate": 4.839698060275154e-06, + "loss": 0.5798, + "step": 7621 + }, + { + "epoch": 0.6958188789483294, + "grad_norm": 0.5032853484153748, + "learning_rate": 4.839655891274468e-06, + "loss": 0.5669, + "step": 7622 + }, + { + "epoch": 0.695910169800986, + "grad_norm": 0.48385244607925415, + "learning_rate": 4.839613716911788e-06, + "loss": 0.5742, + "step": 7623 + }, + { + "epoch": 0.6960014606536425, + "grad_norm": 0.479381263256073, + "learning_rate": 4.839571537187211e-06, + "loss": 0.5907, + "step": 7624 + }, + { + "epoch": 0.6960927515062991, + "grad_norm": 0.47665566205978394, + "learning_rate": 4.839529352100832e-06, + "loss": 0.5709, + "step": 7625 + }, + { + "epoch": 0.6961840423589556, + "grad_norm": 0.46343794465065, + "learning_rate": 4.839487161652749e-06, + "loss": 0.5885, + "step": 7626 + }, + { + "epoch": 0.6962753332116122, + "grad_norm": 0.46800050139427185, + "learning_rate": 4.83944496584306e-06, + "loss": 0.5844, + "step": 7627 + }, + { + "epoch": 0.6963666240642687, + "grad_norm": 0.5037343502044678, + "learning_rate": 4.839402764671859e-06, + "loss": 0.5667, + "step": 7628 + }, + { + "epoch": 0.6964579149169253, + "grad_norm": 0.4671333134174347, + "learning_rate": 4.839360558139245e-06, + "loss": 0.545, + "step": 7629 + }, + { + "epoch": 0.6965492057695819, + "grad_norm": 0.49493175745010376, + "learning_rate": 4.839318346245312e-06, + "loss": 0.5707, + "step": 7630 + }, + { + "epoch": 0.6966404966222385, + "grad_norm": 0.492003470659256, + "learning_rate": 4.83927612899016e-06, + "loss": 0.5362, + "step": 7631 + }, + { + "epoch": 0.6967317874748951, + "grad_norm": 0.47099247574806213, + "learning_rate": 4.839233906373884e-06, + "loss": 0.552, + "step": 7632 + }, + { + "epoch": 0.6968230783275515, + "grad_norm": 0.5222504138946533, + "learning_rate": 4.83919167839658e-06, + "loss": 0.5793, + "step": 7633 + }, + { + "epoch": 0.6969143691802081, + "grad_norm": 0.49734821915626526, + "learning_rate": 4.839149445058346e-06, + "loss": 0.5661, + "step": 7634 + }, + { + "epoch": 0.6970056600328647, + "grad_norm": 0.46780309081077576, + "learning_rate": 4.839107206359278e-06, + "loss": 0.5643, + "step": 7635 + }, + { + "epoch": 0.6970969508855213, + "grad_norm": 0.4867803454399109, + "learning_rate": 4.839064962299474e-06, + "loss": 0.5692, + "step": 7636 + }, + { + "epoch": 0.6971882417381778, + "grad_norm": 0.48459237813949585, + "learning_rate": 4.839022712879031e-06, + "loss": 0.5676, + "step": 7637 + }, + { + "epoch": 0.6972795325908344, + "grad_norm": 0.4765189588069916, + "learning_rate": 4.838980458098044e-06, + "loss": 0.5607, + "step": 7638 + }, + { + "epoch": 0.697370823443491, + "grad_norm": 0.47032588720321655, + "learning_rate": 4.838938197956611e-06, + "loss": 0.5755, + "step": 7639 + }, + { + "epoch": 0.6974621142961476, + "grad_norm": 0.5227696895599365, + "learning_rate": 4.838895932454829e-06, + "loss": 0.5696, + "step": 7640 + }, + { + "epoch": 0.697553405148804, + "grad_norm": 0.4786616265773773, + "learning_rate": 4.838853661592794e-06, + "loss": 0.5303, + "step": 7641 + }, + { + "epoch": 0.6976446960014606, + "grad_norm": 0.4548010528087616, + "learning_rate": 4.8388113853706034e-06, + "loss": 0.5694, + "step": 7642 + }, + { + "epoch": 0.6977359868541172, + "grad_norm": 0.463331937789917, + "learning_rate": 4.838769103788354e-06, + "loss": 0.5809, + "step": 7643 + }, + { + "epoch": 0.6978272777067738, + "grad_norm": 0.4894872307777405, + "learning_rate": 4.838726816846143e-06, + "loss": 0.5631, + "step": 7644 + }, + { + "epoch": 0.6979185685594304, + "grad_norm": 0.5191913843154907, + "learning_rate": 4.838684524544067e-06, + "loss": 0.5462, + "step": 7645 + }, + { + "epoch": 0.6980098594120869, + "grad_norm": 0.5268681049346924, + "learning_rate": 4.838642226882223e-06, + "loss": 0.5296, + "step": 7646 + }, + { + "epoch": 0.6981011502647435, + "grad_norm": 0.4443345367908478, + "learning_rate": 4.838599923860709e-06, + "loss": 0.6029, + "step": 7647 + }, + { + "epoch": 0.6981924411174001, + "grad_norm": 0.442633718252182, + "learning_rate": 4.8385576154796195e-06, + "loss": 0.6054, + "step": 7648 + }, + { + "epoch": 0.6982837319700566, + "grad_norm": 0.4566666781902313, + "learning_rate": 4.838515301739053e-06, + "loss": 0.5828, + "step": 7649 + }, + { + "epoch": 0.6983750228227131, + "grad_norm": 0.4937267601490021, + "learning_rate": 4.838472982639107e-06, + "loss": 0.5168, + "step": 7650 + }, + { + "epoch": 0.6984663136753697, + "grad_norm": 0.4737203121185303, + "learning_rate": 4.838430658179877e-06, + "loss": 0.5629, + "step": 7651 + }, + { + "epoch": 0.6985576045280263, + "grad_norm": 0.5186545848846436, + "learning_rate": 4.838388328361462e-06, + "loss": 0.5364, + "step": 7652 + }, + { + "epoch": 0.6986488953806829, + "grad_norm": 0.4840726852416992, + "learning_rate": 4.838345993183956e-06, + "loss": 0.5637, + "step": 7653 + }, + { + "epoch": 0.6987401862333394, + "grad_norm": 0.48341381549835205, + "learning_rate": 4.838303652647459e-06, + "loss": 0.5365, + "step": 7654 + }, + { + "epoch": 0.698831477085996, + "grad_norm": 0.4903356432914734, + "learning_rate": 4.838261306752068e-06, + "loss": 0.5716, + "step": 7655 + }, + { + "epoch": 0.6989227679386526, + "grad_norm": 0.47683197259902954, + "learning_rate": 4.8382189554978774e-06, + "loss": 0.5651, + "step": 7656 + }, + { + "epoch": 0.6990140587913091, + "grad_norm": 0.4595365524291992, + "learning_rate": 4.838176598884986e-06, + "loss": 0.5606, + "step": 7657 + }, + { + "epoch": 0.6991053496439656, + "grad_norm": 0.4714457094669342, + "learning_rate": 4.838134236913491e-06, + "loss": 0.5845, + "step": 7658 + }, + { + "epoch": 0.6991966404966222, + "grad_norm": 0.5047960877418518, + "learning_rate": 4.838091869583489e-06, + "loss": 0.5572, + "step": 7659 + }, + { + "epoch": 0.6992879313492788, + "grad_norm": 0.4744316339492798, + "learning_rate": 4.8380494968950776e-06, + "loss": 0.5926, + "step": 7660 + }, + { + "epoch": 0.6993792222019354, + "grad_norm": 0.5050554871559143, + "learning_rate": 4.838007118848353e-06, + "loss": 0.5172, + "step": 7661 + }, + { + "epoch": 0.699470513054592, + "grad_norm": 0.5012356638908386, + "learning_rate": 4.837964735443413e-06, + "loss": 0.5385, + "step": 7662 + }, + { + "epoch": 0.6995618039072485, + "grad_norm": 0.5006837844848633, + "learning_rate": 4.837922346680355e-06, + "loss": 0.5699, + "step": 7663 + }, + { + "epoch": 0.699653094759905, + "grad_norm": 0.5023362636566162, + "learning_rate": 4.8378799525592755e-06, + "loss": 0.5601, + "step": 7664 + }, + { + "epoch": 0.6997443856125616, + "grad_norm": 0.4406145513057709, + "learning_rate": 4.837837553080272e-06, + "loss": 0.5909, + "step": 7665 + }, + { + "epoch": 0.6998356764652182, + "grad_norm": 0.4850431978702545, + "learning_rate": 4.837795148243442e-06, + "loss": 0.5666, + "step": 7666 + }, + { + "epoch": 0.6999269673178747, + "grad_norm": 0.4708404242992401, + "learning_rate": 4.837752738048882e-06, + "loss": 0.5485, + "step": 7667 + }, + { + "epoch": 0.7000182581705313, + "grad_norm": 0.4548206031322479, + "learning_rate": 4.837710322496689e-06, + "loss": 0.5915, + "step": 7668 + }, + { + "epoch": 0.7001095490231879, + "grad_norm": 0.4503268003463745, + "learning_rate": 4.837667901586961e-06, + "loss": 0.5686, + "step": 7669 + }, + { + "epoch": 0.7002008398758445, + "grad_norm": 0.4687904417514801, + "learning_rate": 4.837625475319795e-06, + "loss": 0.5475, + "step": 7670 + }, + { + "epoch": 0.700292130728501, + "grad_norm": 0.4827136695384979, + "learning_rate": 4.837583043695288e-06, + "loss": 0.5573, + "step": 7671 + }, + { + "epoch": 0.7003834215811575, + "grad_norm": 0.5082269906997681, + "learning_rate": 4.837540606713538e-06, + "loss": 0.5528, + "step": 7672 + }, + { + "epoch": 0.7004747124338141, + "grad_norm": 0.4981808662414551, + "learning_rate": 4.8374981643746405e-06, + "loss": 0.5475, + "step": 7673 + }, + { + "epoch": 0.7005660032864707, + "grad_norm": 0.4727306365966797, + "learning_rate": 4.837455716678694e-06, + "loss": 0.5603, + "step": 7674 + }, + { + "epoch": 0.7006572941391273, + "grad_norm": 0.5081918835639954, + "learning_rate": 4.837413263625796e-06, + "loss": 0.5157, + "step": 7675 + }, + { + "epoch": 0.7007485849917838, + "grad_norm": 0.5849536061286926, + "learning_rate": 4.837370805216044e-06, + "loss": 0.5629, + "step": 7676 + }, + { + "epoch": 0.7008398758444404, + "grad_norm": 0.4958478808403015, + "learning_rate": 4.837328341449534e-06, + "loss": 0.564, + "step": 7677 + }, + { + "epoch": 0.700931166697097, + "grad_norm": 0.49095556139945984, + "learning_rate": 4.837285872326365e-06, + "loss": 0.5448, + "step": 7678 + }, + { + "epoch": 0.7010224575497536, + "grad_norm": 0.4943842589855194, + "learning_rate": 4.837243397846633e-06, + "loss": 0.5647, + "step": 7679 + }, + { + "epoch": 0.70111374840241, + "grad_norm": 0.4819280803203583, + "learning_rate": 4.8372009180104365e-06, + "loss": 0.5788, + "step": 7680 + }, + { + "epoch": 0.7012050392550666, + "grad_norm": 0.47057485580444336, + "learning_rate": 4.8371584328178705e-06, + "loss": 0.5577, + "step": 7681 + }, + { + "epoch": 0.7012963301077232, + "grad_norm": 0.44624778628349304, + "learning_rate": 4.837115942269035e-06, + "loss": 0.5787, + "step": 7682 + }, + { + "epoch": 0.7013876209603798, + "grad_norm": 0.5296005010604858, + "learning_rate": 4.837073446364026e-06, + "loss": 0.5157, + "step": 7683 + }, + { + "epoch": 0.7014789118130363, + "grad_norm": 0.51649409532547, + "learning_rate": 4.837030945102942e-06, + "loss": 0.547, + "step": 7684 + }, + { + "epoch": 0.7015702026656929, + "grad_norm": 0.5229208469390869, + "learning_rate": 4.836988438485879e-06, + "loss": 0.5311, + "step": 7685 + }, + { + "epoch": 0.7016614935183495, + "grad_norm": 0.4799450635910034, + "learning_rate": 4.8369459265129345e-06, + "loss": 0.5574, + "step": 7686 + }, + { + "epoch": 0.7017527843710061, + "grad_norm": 0.48206081986427307, + "learning_rate": 4.836903409184208e-06, + "loss": 0.5411, + "step": 7687 + }, + { + "epoch": 0.7018440752236625, + "grad_norm": 0.45959097146987915, + "learning_rate": 4.836860886499794e-06, + "loss": 0.5716, + "step": 7688 + }, + { + "epoch": 0.7019353660763191, + "grad_norm": 0.46353691816329956, + "learning_rate": 4.836818358459793e-06, + "loss": 0.5502, + "step": 7689 + }, + { + "epoch": 0.7020266569289757, + "grad_norm": 0.4674159586429596, + "learning_rate": 4.836775825064299e-06, + "loss": 0.5728, + "step": 7690 + }, + { + "epoch": 0.7021179477816323, + "grad_norm": 0.4636457562446594, + "learning_rate": 4.836733286313413e-06, + "loss": 0.5464, + "step": 7691 + }, + { + "epoch": 0.7022092386342889, + "grad_norm": 0.4820530414581299, + "learning_rate": 4.83669074220723e-06, + "loss": 0.5663, + "step": 7692 + }, + { + "epoch": 0.7023005294869454, + "grad_norm": 0.47332629561424255, + "learning_rate": 4.836648192745849e-06, + "loss": 0.5433, + "step": 7693 + }, + { + "epoch": 0.702391820339602, + "grad_norm": 0.508359432220459, + "learning_rate": 4.836605637929366e-06, + "loss": 0.5393, + "step": 7694 + }, + { + "epoch": 0.7024831111922585, + "grad_norm": 0.4789809584617615, + "learning_rate": 4.8365630777578795e-06, + "loss": 0.5854, + "step": 7695 + }, + { + "epoch": 0.7025744020449151, + "grad_norm": 0.49935805797576904, + "learning_rate": 4.836520512231487e-06, + "loss": 0.5608, + "step": 7696 + }, + { + "epoch": 0.7026656928975716, + "grad_norm": 0.5158013105392456, + "learning_rate": 4.836477941350287e-06, + "loss": 0.5195, + "step": 7697 + }, + { + "epoch": 0.7027569837502282, + "grad_norm": 0.4698948860168457, + "learning_rate": 4.836435365114375e-06, + "loss": 0.5479, + "step": 7698 + }, + { + "epoch": 0.7028482746028848, + "grad_norm": 0.4994618892669678, + "learning_rate": 4.83639278352385e-06, + "loss": 0.5255, + "step": 7699 + }, + { + "epoch": 0.7029395654555414, + "grad_norm": 0.4869847297668457, + "learning_rate": 4.836350196578808e-06, + "loss": 0.5769, + "step": 7700 + }, + { + "epoch": 0.703030856308198, + "grad_norm": 0.47907498478889465, + "learning_rate": 4.83630760427935e-06, + "loss": 0.5729, + "step": 7701 + }, + { + "epoch": 0.7031221471608545, + "grad_norm": 0.46321049332618713, + "learning_rate": 4.8362650066255695e-06, + "loss": 0.541, + "step": 7702 + }, + { + "epoch": 0.703213438013511, + "grad_norm": 0.49033859372138977, + "learning_rate": 4.836222403617567e-06, + "loss": 0.5704, + "step": 7703 + }, + { + "epoch": 0.7033047288661676, + "grad_norm": 0.5218607187271118, + "learning_rate": 4.836179795255439e-06, + "loss": 0.5283, + "step": 7704 + }, + { + "epoch": 0.7033960197188242, + "grad_norm": 0.4783133864402771, + "learning_rate": 4.836137181539283e-06, + "loss": 0.5717, + "step": 7705 + }, + { + "epoch": 0.7034873105714807, + "grad_norm": 0.44114264845848083, + "learning_rate": 4.8360945624691975e-06, + "loss": 0.5496, + "step": 7706 + }, + { + "epoch": 0.7035786014241373, + "grad_norm": 0.4828491806983948, + "learning_rate": 4.836051938045279e-06, + "loss": 0.5881, + "step": 7707 + }, + { + "epoch": 0.7036698922767939, + "grad_norm": 0.4563060998916626, + "learning_rate": 4.836009308267626e-06, + "loss": 0.5532, + "step": 7708 + }, + { + "epoch": 0.7037611831294505, + "grad_norm": 0.480857789516449, + "learning_rate": 4.835966673136336e-06, + "loss": 0.5438, + "step": 7709 + }, + { + "epoch": 0.703852473982107, + "grad_norm": 0.474071204662323, + "learning_rate": 4.835924032651508e-06, + "loss": 0.551, + "step": 7710 + }, + { + "epoch": 0.7039437648347635, + "grad_norm": 0.4798644483089447, + "learning_rate": 4.835881386813237e-06, + "loss": 0.5981, + "step": 7711 + }, + { + "epoch": 0.7040350556874201, + "grad_norm": 0.4698863923549652, + "learning_rate": 4.835838735621623e-06, + "loss": 0.6214, + "step": 7712 + }, + { + "epoch": 0.7041263465400767, + "grad_norm": 0.484441876411438, + "learning_rate": 4.8357960790767626e-06, + "loss": 0.5843, + "step": 7713 + }, + { + "epoch": 0.7042176373927332, + "grad_norm": 0.5003358125686646, + "learning_rate": 4.835753417178755e-06, + "loss": 0.5461, + "step": 7714 + }, + { + "epoch": 0.7043089282453898, + "grad_norm": 0.4838550090789795, + "learning_rate": 4.835710749927695e-06, + "loss": 0.5403, + "step": 7715 + }, + { + "epoch": 0.7044002190980464, + "grad_norm": 0.4626604914665222, + "learning_rate": 4.835668077323683e-06, + "loss": 0.5588, + "step": 7716 + }, + { + "epoch": 0.704491509950703, + "grad_norm": 0.47083085775375366, + "learning_rate": 4.835625399366816e-06, + "loss": 0.5838, + "step": 7717 + }, + { + "epoch": 0.7045828008033596, + "grad_norm": 0.4698387086391449, + "learning_rate": 4.835582716057193e-06, + "loss": 0.5583, + "step": 7718 + }, + { + "epoch": 0.704674091656016, + "grad_norm": 0.4821556508541107, + "learning_rate": 4.835540027394909e-06, + "loss": 0.5431, + "step": 7719 + }, + { + "epoch": 0.7047653825086726, + "grad_norm": 0.4590817093849182, + "learning_rate": 4.835497333380065e-06, + "loss": 0.5609, + "step": 7720 + }, + { + "epoch": 0.7048566733613292, + "grad_norm": 0.5151322484016418, + "learning_rate": 4.835454634012756e-06, + "loss": 0.543, + "step": 7721 + }, + { + "epoch": 0.7049479642139858, + "grad_norm": 0.4771815538406372, + "learning_rate": 4.835411929293083e-06, + "loss": 0.5328, + "step": 7722 + }, + { + "epoch": 0.7050392550666423, + "grad_norm": 0.493965744972229, + "learning_rate": 4.835369219221141e-06, + "loss": 0.5858, + "step": 7723 + }, + { + "epoch": 0.7051305459192989, + "grad_norm": 0.5148136019706726, + "learning_rate": 4.8353265037970286e-06, + "loss": 0.5256, + "step": 7724 + }, + { + "epoch": 0.7052218367719555, + "grad_norm": 0.4680113196372986, + "learning_rate": 4.835283783020845e-06, + "loss": 0.5516, + "step": 7725 + }, + { + "epoch": 0.7053131276246121, + "grad_norm": 0.4861176908016205, + "learning_rate": 4.835241056892687e-06, + "loss": 0.5587, + "step": 7726 + }, + { + "epoch": 0.7054044184772685, + "grad_norm": 0.4948202669620514, + "learning_rate": 4.835198325412652e-06, + "loss": 0.5205, + "step": 7727 + }, + { + "epoch": 0.7054957093299251, + "grad_norm": 0.4750542640686035, + "learning_rate": 4.8351555885808394e-06, + "loss": 0.5784, + "step": 7728 + }, + { + "epoch": 0.7055870001825817, + "grad_norm": 0.49976950883865356, + "learning_rate": 4.8351128463973464e-06, + "loss": 0.5424, + "step": 7729 + }, + { + "epoch": 0.7056782910352383, + "grad_norm": 0.4817083775997162, + "learning_rate": 4.8350700988622705e-06, + "loss": 0.5448, + "step": 7730 + }, + { + "epoch": 0.7057695818878948, + "grad_norm": 0.48548004031181335, + "learning_rate": 4.83502734597571e-06, + "loss": 0.5886, + "step": 7731 + }, + { + "epoch": 0.7058608727405514, + "grad_norm": 0.49686941504478455, + "learning_rate": 4.8349845877377636e-06, + "loss": 0.5524, + "step": 7732 + }, + { + "epoch": 0.705952163593208, + "grad_norm": 0.4859786033630371, + "learning_rate": 4.834941824148528e-06, + "loss": 0.5793, + "step": 7733 + }, + { + "epoch": 0.7060434544458645, + "grad_norm": 0.45237997174263, + "learning_rate": 4.834899055208102e-06, + "loss": 0.5807, + "step": 7734 + }, + { + "epoch": 0.706134745298521, + "grad_norm": 0.5015127658843994, + "learning_rate": 4.834856280916584e-06, + "loss": 0.5298, + "step": 7735 + }, + { + "epoch": 0.7062260361511776, + "grad_norm": 0.4805132746696472, + "learning_rate": 4.8348135012740715e-06, + "loss": 0.5441, + "step": 7736 + }, + { + "epoch": 0.7063173270038342, + "grad_norm": 0.44742488861083984, + "learning_rate": 4.834770716280663e-06, + "loss": 0.5741, + "step": 7737 + }, + { + "epoch": 0.7064086178564908, + "grad_norm": 0.47443899512290955, + "learning_rate": 4.834727925936455e-06, + "loss": 0.5861, + "step": 7738 + }, + { + "epoch": 0.7064999087091474, + "grad_norm": 0.4622455835342407, + "learning_rate": 4.834685130241547e-06, + "loss": 0.5368, + "step": 7739 + }, + { + "epoch": 0.7065911995618039, + "grad_norm": 0.4773368835449219, + "learning_rate": 4.834642329196037e-06, + "loss": 0.6106, + "step": 7740 + }, + { + "epoch": 0.7066824904144605, + "grad_norm": 0.45145946741104126, + "learning_rate": 4.8345995228000235e-06, + "loss": 0.576, + "step": 7741 + }, + { + "epoch": 0.706773781267117, + "grad_norm": 0.45428794622421265, + "learning_rate": 4.834556711053603e-06, + "loss": 0.5625, + "step": 7742 + }, + { + "epoch": 0.7068650721197736, + "grad_norm": 0.44757694005966187, + "learning_rate": 4.834513893956875e-06, + "loss": 0.6143, + "step": 7743 + }, + { + "epoch": 0.7069563629724301, + "grad_norm": 0.4756139814853668, + "learning_rate": 4.8344710715099365e-06, + "loss": 0.5375, + "step": 7744 + }, + { + "epoch": 0.7070476538250867, + "grad_norm": 0.4868684709072113, + "learning_rate": 4.834428243712887e-06, + "loss": 0.5486, + "step": 7745 + }, + { + "epoch": 0.7071389446777433, + "grad_norm": 0.519562840461731, + "learning_rate": 4.834385410565825e-06, + "loss": 0.5182, + "step": 7746 + }, + { + "epoch": 0.7072302355303999, + "grad_norm": 0.48640817403793335, + "learning_rate": 4.834342572068846e-06, + "loss": 0.5313, + "step": 7747 + }, + { + "epoch": 0.7073215263830565, + "grad_norm": 0.5185926556587219, + "learning_rate": 4.834299728222051e-06, + "loss": 0.5555, + "step": 7748 + }, + { + "epoch": 0.707412817235713, + "grad_norm": 0.4776992201805115, + "learning_rate": 4.834256879025536e-06, + "loss": 0.5582, + "step": 7749 + }, + { + "epoch": 0.7075041080883695, + "grad_norm": 0.4729340970516205, + "learning_rate": 4.8342140244794015e-06, + "loss": 0.5706, + "step": 7750 + }, + { + "epoch": 0.7075953989410261, + "grad_norm": 0.4800124168395996, + "learning_rate": 4.834171164583743e-06, + "loss": 0.5119, + "step": 7751 + }, + { + "epoch": 0.7076866897936827, + "grad_norm": 0.44740018248558044, + "learning_rate": 4.834128299338661e-06, + "loss": 0.5543, + "step": 7752 + }, + { + "epoch": 0.7077779806463392, + "grad_norm": 0.45836523175239563, + "learning_rate": 4.834085428744253e-06, + "loss": 0.5831, + "step": 7753 + }, + { + "epoch": 0.7078692714989958, + "grad_norm": 0.4577047526836395, + "learning_rate": 4.834042552800617e-06, + "loss": 0.5337, + "step": 7754 + }, + { + "epoch": 0.7079605623516524, + "grad_norm": 0.44678300619125366, + "learning_rate": 4.833999671507852e-06, + "loss": 0.5723, + "step": 7755 + }, + { + "epoch": 0.708051853204309, + "grad_norm": 0.48401138186454773, + "learning_rate": 4.833956784866055e-06, + "loss": 0.563, + "step": 7756 + }, + { + "epoch": 0.7081431440569655, + "grad_norm": 0.46870431303977966, + "learning_rate": 4.833913892875325e-06, + "loss": 0.5652, + "step": 7757 + }, + { + "epoch": 0.708234434909622, + "grad_norm": 0.48847174644470215, + "learning_rate": 4.8338709955357605e-06, + "loss": 0.5868, + "step": 7758 + }, + { + "epoch": 0.7083257257622786, + "grad_norm": 0.47584566473960876, + "learning_rate": 4.833828092847459e-06, + "loss": 0.5435, + "step": 7759 + }, + { + "epoch": 0.7084170166149352, + "grad_norm": 0.4697514474391937, + "learning_rate": 4.8337851848105206e-06, + "loss": 0.5786, + "step": 7760 + }, + { + "epoch": 0.7085083074675917, + "grad_norm": 0.47171908617019653, + "learning_rate": 4.833742271425042e-06, + "loss": 0.5271, + "step": 7761 + }, + { + "epoch": 0.7085995983202483, + "grad_norm": 0.4820275902748108, + "learning_rate": 4.8336993526911215e-06, + "loss": 0.6466, + "step": 7762 + }, + { + "epoch": 0.7086908891729049, + "grad_norm": 0.4750088155269623, + "learning_rate": 4.833656428608858e-06, + "loss": 0.5944, + "step": 7763 + }, + { + "epoch": 0.7087821800255615, + "grad_norm": 0.45942679047584534, + "learning_rate": 4.83361349917835e-06, + "loss": 0.5772, + "step": 7764 + }, + { + "epoch": 0.708873470878218, + "grad_norm": 0.5063385367393494, + "learning_rate": 4.833570564399696e-06, + "loss": 0.506, + "step": 7765 + }, + { + "epoch": 0.7089647617308745, + "grad_norm": 0.5015561580657959, + "learning_rate": 4.833527624272994e-06, + "loss": 0.5509, + "step": 7766 + }, + { + "epoch": 0.7090560525835311, + "grad_norm": 0.4956206977367401, + "learning_rate": 4.833484678798343e-06, + "loss": 0.5335, + "step": 7767 + }, + { + "epoch": 0.7091473434361877, + "grad_norm": 0.4781045913696289, + "learning_rate": 4.833441727975841e-06, + "loss": 0.5342, + "step": 7768 + }, + { + "epoch": 0.7092386342888443, + "grad_norm": 0.47650453448295593, + "learning_rate": 4.833398771805585e-06, + "loss": 0.5662, + "step": 7769 + }, + { + "epoch": 0.7093299251415008, + "grad_norm": 0.4875028431415558, + "learning_rate": 4.833355810287677e-06, + "loss": 0.5073, + "step": 7770 + }, + { + "epoch": 0.7094212159941574, + "grad_norm": 0.4639200270175934, + "learning_rate": 4.833312843422211e-06, + "loss": 0.557, + "step": 7771 + }, + { + "epoch": 0.709512506846814, + "grad_norm": 0.45254454016685486, + "learning_rate": 4.833269871209289e-06, + "loss": 0.5223, + "step": 7772 + }, + { + "epoch": 0.7096037976994705, + "grad_norm": 0.46317335963249207, + "learning_rate": 4.833226893649008e-06, + "loss": 0.6024, + "step": 7773 + }, + { + "epoch": 0.709695088552127, + "grad_norm": 0.4459012746810913, + "learning_rate": 4.833183910741467e-06, + "loss": 0.6158, + "step": 7774 + }, + { + "epoch": 0.7097863794047836, + "grad_norm": 0.48204752802848816, + "learning_rate": 4.833140922486764e-06, + "loss": 0.536, + "step": 7775 + }, + { + "epoch": 0.7098776702574402, + "grad_norm": 0.4932657480239868, + "learning_rate": 4.8330979288849974e-06, + "loss": 0.5576, + "step": 7776 + }, + { + "epoch": 0.7099689611100968, + "grad_norm": 0.4741196930408478, + "learning_rate": 4.833054929936267e-06, + "loss": 0.5632, + "step": 7777 + }, + { + "epoch": 0.7100602519627534, + "grad_norm": 0.5030644536018372, + "learning_rate": 4.8330119256406696e-06, + "loss": 0.582, + "step": 7778 + }, + { + "epoch": 0.7101515428154099, + "grad_norm": 0.4579746127128601, + "learning_rate": 4.832968915998305e-06, + "loss": 0.5584, + "step": 7779 + }, + { + "epoch": 0.7102428336680665, + "grad_norm": 0.5261034369468689, + "learning_rate": 4.832925901009271e-06, + "loss": 0.468, + "step": 7780 + }, + { + "epoch": 0.710334124520723, + "grad_norm": 0.48523277044296265, + "learning_rate": 4.832882880673668e-06, + "loss": 0.5597, + "step": 7781 + }, + { + "epoch": 0.7104254153733796, + "grad_norm": 0.4484133720397949, + "learning_rate": 4.8328398549915915e-06, + "loss": 0.57, + "step": 7782 + }, + { + "epoch": 0.7105167062260361, + "grad_norm": 0.485620379447937, + "learning_rate": 4.832796823963143e-06, + "loss": 0.5842, + "step": 7783 + }, + { + "epoch": 0.7106079970786927, + "grad_norm": 0.4537300765514374, + "learning_rate": 4.832753787588419e-06, + "loss": 0.6021, + "step": 7784 + }, + { + "epoch": 0.7106992879313493, + "grad_norm": 0.4911826252937317, + "learning_rate": 4.832710745867519e-06, + "loss": 0.5443, + "step": 7785 + }, + { + "epoch": 0.7107905787840059, + "grad_norm": 0.46658778190612793, + "learning_rate": 4.832667698800542e-06, + "loss": 0.5519, + "step": 7786 + }, + { + "epoch": 0.7108818696366624, + "grad_norm": 0.5161705017089844, + "learning_rate": 4.832624646387586e-06, + "loss": 0.5212, + "step": 7787 + }, + { + "epoch": 0.710973160489319, + "grad_norm": 0.47132793068885803, + "learning_rate": 4.832581588628751e-06, + "loss": 0.5459, + "step": 7788 + }, + { + "epoch": 0.7110644513419755, + "grad_norm": 0.4534129798412323, + "learning_rate": 4.832538525524133e-06, + "loss": 0.5915, + "step": 7789 + }, + { + "epoch": 0.7111557421946321, + "grad_norm": 0.5033395290374756, + "learning_rate": 4.832495457073834e-06, + "loss": 0.5611, + "step": 7790 + }, + { + "epoch": 0.7112470330472886, + "grad_norm": 0.4961112439632416, + "learning_rate": 4.832452383277949e-06, + "loss": 0.5227, + "step": 7791 + }, + { + "epoch": 0.7113383238999452, + "grad_norm": 0.5088924169540405, + "learning_rate": 4.83240930413658e-06, + "loss": 0.5327, + "step": 7792 + }, + { + "epoch": 0.7114296147526018, + "grad_norm": 0.5106251239776611, + "learning_rate": 4.832366219649825e-06, + "loss": 0.5524, + "step": 7793 + }, + { + "epoch": 0.7115209056052584, + "grad_norm": 0.4869944155216217, + "learning_rate": 4.832323129817782e-06, + "loss": 0.527, + "step": 7794 + }, + { + "epoch": 0.711612196457915, + "grad_norm": 0.47477585077285767, + "learning_rate": 4.83228003464055e-06, + "loss": 0.5612, + "step": 7795 + }, + { + "epoch": 0.7117034873105714, + "grad_norm": 0.5044073462486267, + "learning_rate": 4.832236934118226e-06, + "loss": 0.5275, + "step": 7796 + }, + { + "epoch": 0.711794778163228, + "grad_norm": 0.4775754511356354, + "learning_rate": 4.8321938282509125e-06, + "loss": 0.4898, + "step": 7797 + }, + { + "epoch": 0.7118860690158846, + "grad_norm": 0.4762308597564697, + "learning_rate": 4.832150717038706e-06, + "loss": 0.5448, + "step": 7798 + }, + { + "epoch": 0.7119773598685412, + "grad_norm": 0.45775848627090454, + "learning_rate": 4.8321076004817054e-06, + "loss": 0.579, + "step": 7799 + }, + { + "epoch": 0.7120686507211977, + "grad_norm": 0.4689716696739197, + "learning_rate": 4.83206447858001e-06, + "loss": 0.5811, + "step": 7800 + }, + { + "epoch": 0.7121599415738543, + "grad_norm": 0.43383559584617615, + "learning_rate": 4.832021351333718e-06, + "loss": 0.5953, + "step": 7801 + }, + { + "epoch": 0.7122512324265109, + "grad_norm": 0.5060896277427673, + "learning_rate": 4.831978218742928e-06, + "loss": 0.5084, + "step": 7802 + }, + { + "epoch": 0.7123425232791675, + "grad_norm": 0.47517457604408264, + "learning_rate": 4.83193508080774e-06, + "loss": 0.5816, + "step": 7803 + }, + { + "epoch": 0.7124338141318239, + "grad_norm": 0.4794948697090149, + "learning_rate": 4.831891937528252e-06, + "loss": 0.5541, + "step": 7804 + }, + { + "epoch": 0.7125251049844805, + "grad_norm": 0.45902833342552185, + "learning_rate": 4.831848788904564e-06, + "loss": 0.5823, + "step": 7805 + }, + { + "epoch": 0.7126163958371371, + "grad_norm": 0.450292706489563, + "learning_rate": 4.8318056349367735e-06, + "loss": 0.599, + "step": 7806 + }, + { + "epoch": 0.7127076866897937, + "grad_norm": 0.5139544010162354, + "learning_rate": 4.83176247562498e-06, + "loss": 0.5906, + "step": 7807 + }, + { + "epoch": 0.7127989775424503, + "grad_norm": 0.4521917998790741, + "learning_rate": 4.8317193109692825e-06, + "loss": 0.5734, + "step": 7808 + }, + { + "epoch": 0.7128902683951068, + "grad_norm": 0.48498740792274475, + "learning_rate": 4.83167614096978e-06, + "loss": 0.5204, + "step": 7809 + }, + { + "epoch": 0.7129815592477634, + "grad_norm": 0.4634242355823517, + "learning_rate": 4.831632965626571e-06, + "loss": 0.5611, + "step": 7810 + }, + { + "epoch": 0.71307285010042, + "grad_norm": 0.48173847794532776, + "learning_rate": 4.831589784939755e-06, + "loss": 0.5512, + "step": 7811 + }, + { + "epoch": 0.7131641409530765, + "grad_norm": 0.4999731183052063, + "learning_rate": 4.83154659890943e-06, + "loss": 0.5422, + "step": 7812 + }, + { + "epoch": 0.713255431805733, + "grad_norm": 0.5246940851211548, + "learning_rate": 4.831503407535696e-06, + "loss": 0.5329, + "step": 7813 + }, + { + "epoch": 0.7133467226583896, + "grad_norm": 0.4674629271030426, + "learning_rate": 4.831460210818652e-06, + "loss": 0.5777, + "step": 7814 + }, + { + "epoch": 0.7134380135110462, + "grad_norm": 0.4808408319950104, + "learning_rate": 4.831417008758396e-06, + "loss": 0.5297, + "step": 7815 + }, + { + "epoch": 0.7135293043637028, + "grad_norm": 0.5102907419204712, + "learning_rate": 4.831373801355028e-06, + "loss": 0.532, + "step": 7816 + }, + { + "epoch": 0.7136205952163593, + "grad_norm": 0.4846593737602234, + "learning_rate": 4.831330588608647e-06, + "loss": 0.5703, + "step": 7817 + }, + { + "epoch": 0.7137118860690159, + "grad_norm": 0.44568055868148804, + "learning_rate": 4.831287370519351e-06, + "loss": 0.569, + "step": 7818 + }, + { + "epoch": 0.7138031769216725, + "grad_norm": 0.5055840611457825, + "learning_rate": 4.83124414708724e-06, + "loss": 0.4973, + "step": 7819 + }, + { + "epoch": 0.713894467774329, + "grad_norm": 0.4796203076839447, + "learning_rate": 4.831200918312412e-06, + "loss": 0.5282, + "step": 7820 + }, + { + "epoch": 0.7139857586269855, + "grad_norm": 0.4552263021469116, + "learning_rate": 4.831157684194968e-06, + "loss": 0.5699, + "step": 7821 + }, + { + "epoch": 0.7140770494796421, + "grad_norm": 0.49696433544158936, + "learning_rate": 4.831114444735006e-06, + "loss": 0.5508, + "step": 7822 + }, + { + "epoch": 0.7141683403322987, + "grad_norm": 0.49457070231437683, + "learning_rate": 4.8310711999326235e-06, + "loss": 0.5952, + "step": 7823 + }, + { + "epoch": 0.7142596311849553, + "grad_norm": 0.46200281381607056, + "learning_rate": 4.831027949787922e-06, + "loss": 0.584, + "step": 7824 + }, + { + "epoch": 0.7143509220376119, + "grad_norm": 0.46368348598480225, + "learning_rate": 4.8309846943010005e-06, + "loss": 0.5321, + "step": 7825 + }, + { + "epoch": 0.7144422128902684, + "grad_norm": 0.47300565242767334, + "learning_rate": 4.830941433471957e-06, + "loss": 0.5368, + "step": 7826 + }, + { + "epoch": 0.714533503742925, + "grad_norm": 0.4819251596927643, + "learning_rate": 4.83089816730089e-06, + "loss": 0.5534, + "step": 7827 + }, + { + "epoch": 0.7146247945955815, + "grad_norm": 0.5165772438049316, + "learning_rate": 4.830854895787901e-06, + "loss": 0.5191, + "step": 7828 + }, + { + "epoch": 0.7147160854482381, + "grad_norm": 0.4611073434352875, + "learning_rate": 4.830811618933087e-06, + "loss": 0.5745, + "step": 7829 + }, + { + "epoch": 0.7148073763008946, + "grad_norm": 0.45616307854652405, + "learning_rate": 4.830768336736548e-06, + "loss": 0.5646, + "step": 7830 + }, + { + "epoch": 0.7148986671535512, + "grad_norm": 0.5122002363204956, + "learning_rate": 4.830725049198385e-06, + "loss": 0.5545, + "step": 7831 + }, + { + "epoch": 0.7149899580062078, + "grad_norm": 0.5043225288391113, + "learning_rate": 4.830681756318693e-06, + "loss": 0.5305, + "step": 7832 + }, + { + "epoch": 0.7150812488588644, + "grad_norm": 0.5099882483482361, + "learning_rate": 4.830638458097575e-06, + "loss": 0.5514, + "step": 7833 + }, + { + "epoch": 0.715172539711521, + "grad_norm": 0.47553306818008423, + "learning_rate": 4.830595154535129e-06, + "loss": 0.5651, + "step": 7834 + }, + { + "epoch": 0.7152638305641774, + "grad_norm": 0.4629254937171936, + "learning_rate": 4.830551845631453e-06, + "loss": 0.544, + "step": 7835 + }, + { + "epoch": 0.715355121416834, + "grad_norm": 0.4481985569000244, + "learning_rate": 4.830508531386649e-06, + "loss": 0.5755, + "step": 7836 + }, + { + "epoch": 0.7154464122694906, + "grad_norm": 0.4771207571029663, + "learning_rate": 4.830465211800814e-06, + "loss": 0.5262, + "step": 7837 + }, + { + "epoch": 0.7155377031221472, + "grad_norm": 0.5001769065856934, + "learning_rate": 4.830421886874047e-06, + "loss": 0.5421, + "step": 7838 + }, + { + "epoch": 0.7156289939748037, + "grad_norm": 0.46691370010375977, + "learning_rate": 4.830378556606449e-06, + "loss": 0.6236, + "step": 7839 + }, + { + "epoch": 0.7157202848274603, + "grad_norm": 0.47453397512435913, + "learning_rate": 4.8303352209981195e-06, + "loss": 0.575, + "step": 7840 + }, + { + "epoch": 0.7158115756801169, + "grad_norm": 0.5102117657661438, + "learning_rate": 4.830291880049155e-06, + "loss": 0.4983, + "step": 7841 + }, + { + "epoch": 0.7159028665327735, + "grad_norm": 0.4992825388908386, + "learning_rate": 4.830248533759659e-06, + "loss": 0.5383, + "step": 7842 + }, + { + "epoch": 0.7159941573854299, + "grad_norm": 0.48854753375053406, + "learning_rate": 4.830205182129726e-06, + "loss": 0.5227, + "step": 7843 + }, + { + "epoch": 0.7160854482380865, + "grad_norm": 0.48213618993759155, + "learning_rate": 4.830161825159459e-06, + "loss": 0.5164, + "step": 7844 + }, + { + "epoch": 0.7161767390907431, + "grad_norm": 0.4776793420314789, + "learning_rate": 4.8301184628489565e-06, + "loss": 0.5435, + "step": 7845 + }, + { + "epoch": 0.7162680299433997, + "grad_norm": 0.47374042868614197, + "learning_rate": 4.830075095198316e-06, + "loss": 0.5702, + "step": 7846 + }, + { + "epoch": 0.7163593207960562, + "grad_norm": 0.49726665019989014, + "learning_rate": 4.830031722207641e-06, + "loss": 0.5289, + "step": 7847 + }, + { + "epoch": 0.7164506116487128, + "grad_norm": 0.5032675862312317, + "learning_rate": 4.8299883438770265e-06, + "loss": 0.5284, + "step": 7848 + }, + { + "epoch": 0.7165419025013694, + "grad_norm": 0.49326494336128235, + "learning_rate": 4.829944960206575e-06, + "loss": 0.555, + "step": 7849 + }, + { + "epoch": 0.716633193354026, + "grad_norm": 0.4671342670917511, + "learning_rate": 4.829901571196384e-06, + "loss": 0.5658, + "step": 7850 + }, + { + "epoch": 0.7167244842066824, + "grad_norm": 0.46999505162239075, + "learning_rate": 4.8298581768465536e-06, + "loss": 0.5617, + "step": 7851 + }, + { + "epoch": 0.716815775059339, + "grad_norm": 0.5032041668891907, + "learning_rate": 4.829814777157184e-06, + "loss": 0.499, + "step": 7852 + }, + { + "epoch": 0.7169070659119956, + "grad_norm": 0.47816991806030273, + "learning_rate": 4.829771372128374e-06, + "loss": 0.5399, + "step": 7853 + }, + { + "epoch": 0.7169983567646522, + "grad_norm": 0.5017040967941284, + "learning_rate": 4.829727961760222e-06, + "loss": 0.5336, + "step": 7854 + }, + { + "epoch": 0.7170896476173088, + "grad_norm": 0.4594789743423462, + "learning_rate": 4.82968454605283e-06, + "loss": 0.5446, + "step": 7855 + }, + { + "epoch": 0.7171809384699653, + "grad_norm": 0.5024382472038269, + "learning_rate": 4.829641125006295e-06, + "loss": 0.5514, + "step": 7856 + }, + { + "epoch": 0.7172722293226219, + "grad_norm": 0.5062333941459656, + "learning_rate": 4.829597698620718e-06, + "loss": 0.5509, + "step": 7857 + }, + { + "epoch": 0.7173635201752785, + "grad_norm": 0.5370402932167053, + "learning_rate": 4.829554266896198e-06, + "loss": 0.5417, + "step": 7858 + }, + { + "epoch": 0.717454811027935, + "grad_norm": 0.4918448030948639, + "learning_rate": 4.8295108298328344e-06, + "loss": 0.5432, + "step": 7859 + }, + { + "epoch": 0.7175461018805915, + "grad_norm": 0.4481087327003479, + "learning_rate": 4.829467387430728e-06, + "loss": 0.5575, + "step": 7860 + }, + { + "epoch": 0.7176373927332481, + "grad_norm": 0.4761149287223816, + "learning_rate": 4.829423939689976e-06, + "loss": 0.5355, + "step": 7861 + }, + { + "epoch": 0.7177286835859047, + "grad_norm": 0.484456866979599, + "learning_rate": 4.82938048661068e-06, + "loss": 0.5979, + "step": 7862 + }, + { + "epoch": 0.7178199744385613, + "grad_norm": 0.4730950593948364, + "learning_rate": 4.829337028192939e-06, + "loss": 0.5558, + "step": 7863 + }, + { + "epoch": 0.7179112652912178, + "grad_norm": 0.4768770933151245, + "learning_rate": 4.829293564436851e-06, + "loss": 0.5368, + "step": 7864 + }, + { + "epoch": 0.7180025561438744, + "grad_norm": 0.4807499051094055, + "learning_rate": 4.8292500953425185e-06, + "loss": 0.5446, + "step": 7865 + }, + { + "epoch": 0.7180938469965309, + "grad_norm": 0.4919878840446472, + "learning_rate": 4.82920662091004e-06, + "loss": 0.5887, + "step": 7866 + }, + { + "epoch": 0.7181851378491875, + "grad_norm": 0.44884252548217773, + "learning_rate": 4.829163141139514e-06, + "loss": 0.5592, + "step": 7867 + }, + { + "epoch": 0.718276428701844, + "grad_norm": 0.494800329208374, + "learning_rate": 4.8291196560310414e-06, + "loss": 0.59, + "step": 7868 + }, + { + "epoch": 0.7183677195545006, + "grad_norm": 0.46168994903564453, + "learning_rate": 4.829076165584722e-06, + "loss": 0.5718, + "step": 7869 + }, + { + "epoch": 0.7184590104071572, + "grad_norm": 0.432211309671402, + "learning_rate": 4.829032669800654e-06, + "loss": 0.5516, + "step": 7870 + }, + { + "epoch": 0.7185503012598138, + "grad_norm": 0.5024248957633972, + "learning_rate": 4.828989168678938e-06, + "loss": 0.507, + "step": 7871 + }, + { + "epoch": 0.7186415921124704, + "grad_norm": 0.4989364743232727, + "learning_rate": 4.828945662219675e-06, + "loss": 0.5622, + "step": 7872 + }, + { + "epoch": 0.7187328829651269, + "grad_norm": 0.4925120770931244, + "learning_rate": 4.8289021504229625e-06, + "loss": 0.5437, + "step": 7873 + }, + { + "epoch": 0.7188241738177834, + "grad_norm": 0.4716944098472595, + "learning_rate": 4.828858633288901e-06, + "loss": 0.559, + "step": 7874 + }, + { + "epoch": 0.71891546467044, + "grad_norm": 0.5018686056137085, + "learning_rate": 4.82881511081759e-06, + "loss": 0.5195, + "step": 7875 + }, + { + "epoch": 0.7190067555230966, + "grad_norm": 0.4686587154865265, + "learning_rate": 4.828771583009131e-06, + "loss": 0.5755, + "step": 7876 + }, + { + "epoch": 0.7190980463757531, + "grad_norm": 0.49223512411117554, + "learning_rate": 4.828728049863622e-06, + "loss": 0.5509, + "step": 7877 + }, + { + "epoch": 0.7191893372284097, + "grad_norm": 0.4832856357097626, + "learning_rate": 4.828684511381162e-06, + "loss": 0.5652, + "step": 7878 + }, + { + "epoch": 0.7192806280810663, + "grad_norm": 0.48658692836761475, + "learning_rate": 4.828640967561852e-06, + "loss": 0.5171, + "step": 7879 + }, + { + "epoch": 0.7193719189337229, + "grad_norm": 0.4599522054195404, + "learning_rate": 4.828597418405793e-06, + "loss": 0.5245, + "step": 7880 + }, + { + "epoch": 0.7194632097863795, + "grad_norm": 0.47970959544181824, + "learning_rate": 4.828553863913083e-06, + "loss": 0.5829, + "step": 7881 + }, + { + "epoch": 0.7195545006390359, + "grad_norm": 0.471816748380661, + "learning_rate": 4.828510304083821e-06, + "loss": 0.5666, + "step": 7882 + }, + { + "epoch": 0.7196457914916925, + "grad_norm": 0.46678176522254944, + "learning_rate": 4.82846673891811e-06, + "loss": 0.5625, + "step": 7883 + }, + { + "epoch": 0.7197370823443491, + "grad_norm": 0.45691630244255066, + "learning_rate": 4.828423168416048e-06, + "loss": 0.6094, + "step": 7884 + }, + { + "epoch": 0.7198283731970057, + "grad_norm": 0.46379369497299194, + "learning_rate": 4.828379592577733e-06, + "loss": 0.5696, + "step": 7885 + }, + { + "epoch": 0.7199196640496622, + "grad_norm": 0.4727141857147217, + "learning_rate": 4.8283360114032684e-06, + "loss": 0.5467, + "step": 7886 + }, + { + "epoch": 0.7200109549023188, + "grad_norm": 0.48176339268684387, + "learning_rate": 4.828292424892752e-06, + "loss": 0.5842, + "step": 7887 + }, + { + "epoch": 0.7201022457549754, + "grad_norm": 0.5038732886314392, + "learning_rate": 4.828248833046284e-06, + "loss": 0.5678, + "step": 7888 + }, + { + "epoch": 0.720193536607632, + "grad_norm": 0.4774022400379181, + "learning_rate": 4.8282052358639645e-06, + "loss": 0.5609, + "step": 7889 + }, + { + "epoch": 0.7202848274602884, + "grad_norm": 0.4846770763397217, + "learning_rate": 4.828161633345893e-06, + "loss": 0.5457, + "step": 7890 + }, + { + "epoch": 0.720376118312945, + "grad_norm": 0.46306976675987244, + "learning_rate": 4.82811802549217e-06, + "loss": 0.5673, + "step": 7891 + }, + { + "epoch": 0.7204674091656016, + "grad_norm": 0.4872443675994873, + "learning_rate": 4.828074412302896e-06, + "loss": 0.5294, + "step": 7892 + }, + { + "epoch": 0.7205587000182582, + "grad_norm": 0.49732446670532227, + "learning_rate": 4.828030793778169e-06, + "loss": 0.546, + "step": 7893 + }, + { + "epoch": 0.7206499908709147, + "grad_norm": 0.4778928756713867, + "learning_rate": 4.82798716991809e-06, + "loss": 0.5673, + "step": 7894 + }, + { + "epoch": 0.7207412817235713, + "grad_norm": 0.48483890295028687, + "learning_rate": 4.8279435407227605e-06, + "loss": 0.5768, + "step": 7895 + }, + { + "epoch": 0.7208325725762279, + "grad_norm": 0.4556206464767456, + "learning_rate": 4.8278999061922784e-06, + "loss": 0.5615, + "step": 7896 + }, + { + "epoch": 0.7209238634288844, + "grad_norm": 0.48402202129364014, + "learning_rate": 4.827856266326743e-06, + "loss": 0.558, + "step": 7897 + }, + { + "epoch": 0.721015154281541, + "grad_norm": 0.4935781955718994, + "learning_rate": 4.8278126211262576e-06, + "loss": 0.4918, + "step": 7898 + }, + { + "epoch": 0.7211064451341975, + "grad_norm": 0.47059762477874756, + "learning_rate": 4.8277689705909195e-06, + "loss": 0.5328, + "step": 7899 + }, + { + "epoch": 0.7211977359868541, + "grad_norm": 0.45176559686660767, + "learning_rate": 4.827725314720829e-06, + "loss": 0.6158, + "step": 7900 + }, + { + "epoch": 0.7212890268395107, + "grad_norm": 0.5005335807800293, + "learning_rate": 4.827681653516088e-06, + "loss": 0.5668, + "step": 7901 + }, + { + "epoch": 0.7213803176921673, + "grad_norm": 0.4409199357032776, + "learning_rate": 4.827637986976794e-06, + "loss": 0.5914, + "step": 7902 + }, + { + "epoch": 0.7214716085448238, + "grad_norm": 0.4453255236148834, + "learning_rate": 4.827594315103049e-06, + "loss": 0.557, + "step": 7903 + }, + { + "epoch": 0.7215628993974804, + "grad_norm": 0.47398173809051514, + "learning_rate": 4.827550637894952e-06, + "loss": 0.586, + "step": 7904 + }, + { + "epoch": 0.7216541902501369, + "grad_norm": 0.4831480383872986, + "learning_rate": 4.827506955352604e-06, + "loss": 0.5364, + "step": 7905 + }, + { + "epoch": 0.7217454811027935, + "grad_norm": 0.4799190163612366, + "learning_rate": 4.827463267476104e-06, + "loss": 0.5466, + "step": 7906 + }, + { + "epoch": 0.72183677195545, + "grad_norm": 0.4823310673236847, + "learning_rate": 4.827419574265553e-06, + "loss": 0.5883, + "step": 7907 + }, + { + "epoch": 0.7219280628081066, + "grad_norm": 0.45588842034339905, + "learning_rate": 4.827375875721052e-06, + "loss": 0.5538, + "step": 7908 + }, + { + "epoch": 0.7220193536607632, + "grad_norm": 0.45512354373931885, + "learning_rate": 4.827332171842699e-06, + "loss": 0.5709, + "step": 7909 + }, + { + "epoch": 0.7221106445134198, + "grad_norm": 0.4450508952140808, + "learning_rate": 4.827288462630595e-06, + "loss": 0.5865, + "step": 7910 + }, + { + "epoch": 0.7222019353660764, + "grad_norm": 0.47597718238830566, + "learning_rate": 4.827244748084841e-06, + "loss": 0.5346, + "step": 7911 + }, + { + "epoch": 0.7222932262187329, + "grad_norm": 0.4873787760734558, + "learning_rate": 4.827201028205536e-06, + "loss": 0.569, + "step": 7912 + }, + { + "epoch": 0.7223845170713894, + "grad_norm": 0.4984319508075714, + "learning_rate": 4.827157302992781e-06, + "loss": 0.6047, + "step": 7913 + }, + { + "epoch": 0.722475807924046, + "grad_norm": 0.48107585310935974, + "learning_rate": 4.827113572446676e-06, + "loss": 0.5441, + "step": 7914 + }, + { + "epoch": 0.7225670987767026, + "grad_norm": 0.4945191740989685, + "learning_rate": 4.827069836567321e-06, + "loss": 0.548, + "step": 7915 + }, + { + "epoch": 0.7226583896293591, + "grad_norm": 0.4898282587528229, + "learning_rate": 4.827026095354816e-06, + "loss": 0.5235, + "step": 7916 + }, + { + "epoch": 0.7227496804820157, + "grad_norm": 0.4815913438796997, + "learning_rate": 4.826982348809262e-06, + "loss": 0.5534, + "step": 7917 + }, + { + "epoch": 0.7228409713346723, + "grad_norm": 0.4976922571659088, + "learning_rate": 4.826938596930759e-06, + "loss": 0.5302, + "step": 7918 + }, + { + "epoch": 0.7229322621873289, + "grad_norm": 0.4736659824848175, + "learning_rate": 4.826894839719408e-06, + "loss": 0.5693, + "step": 7919 + }, + { + "epoch": 0.7230235530399854, + "grad_norm": 0.5375016927719116, + "learning_rate": 4.826851077175308e-06, + "loss": 0.5207, + "step": 7920 + }, + { + "epoch": 0.7231148438926419, + "grad_norm": 0.5173313617706299, + "learning_rate": 4.826807309298559e-06, + "loss": 0.5248, + "step": 7921 + }, + { + "epoch": 0.7232061347452985, + "grad_norm": 0.4646654427051544, + "learning_rate": 4.826763536089262e-06, + "loss": 0.5504, + "step": 7922 + }, + { + "epoch": 0.7232974255979551, + "grad_norm": 0.4724435806274414, + "learning_rate": 4.826719757547518e-06, + "loss": 0.5986, + "step": 7923 + }, + { + "epoch": 0.7233887164506116, + "grad_norm": 0.5022038817405701, + "learning_rate": 4.826675973673427e-06, + "loss": 0.5563, + "step": 7924 + }, + { + "epoch": 0.7234800073032682, + "grad_norm": 0.46231022477149963, + "learning_rate": 4.826632184467089e-06, + "loss": 0.5734, + "step": 7925 + }, + { + "epoch": 0.7235712981559248, + "grad_norm": 0.4719022512435913, + "learning_rate": 4.826588389928604e-06, + "loss": 0.5745, + "step": 7926 + }, + { + "epoch": 0.7236625890085814, + "grad_norm": 0.48820960521698, + "learning_rate": 4.826544590058073e-06, + "loss": 0.5695, + "step": 7927 + }, + { + "epoch": 0.723753879861238, + "grad_norm": 0.4704132378101349, + "learning_rate": 4.826500784855596e-06, + "loss": 0.5566, + "step": 7928 + }, + { + "epoch": 0.7238451707138944, + "grad_norm": 0.5007596015930176, + "learning_rate": 4.826456974321274e-06, + "loss": 0.5818, + "step": 7929 + }, + { + "epoch": 0.723936461566551, + "grad_norm": 0.47049134969711304, + "learning_rate": 4.826413158455206e-06, + "loss": 0.5431, + "step": 7930 + }, + { + "epoch": 0.7240277524192076, + "grad_norm": 0.45337381958961487, + "learning_rate": 4.826369337257495e-06, + "loss": 0.5709, + "step": 7931 + }, + { + "epoch": 0.7241190432718642, + "grad_norm": 0.5159924626350403, + "learning_rate": 4.826325510728238e-06, + "loss": 0.5394, + "step": 7932 + }, + { + "epoch": 0.7242103341245207, + "grad_norm": 0.4765182137489319, + "learning_rate": 4.8262816788675384e-06, + "loss": 0.5236, + "step": 7933 + }, + { + "epoch": 0.7243016249771773, + "grad_norm": 0.5119240283966064, + "learning_rate": 4.826237841675495e-06, + "loss": 0.5307, + "step": 7934 + }, + { + "epoch": 0.7243929158298339, + "grad_norm": 0.46481773257255554, + "learning_rate": 4.826193999152209e-06, + "loss": 0.5567, + "step": 7935 + }, + { + "epoch": 0.7244842066824904, + "grad_norm": 0.4635971486568451, + "learning_rate": 4.82615015129778e-06, + "loss": 0.557, + "step": 7936 + }, + { + "epoch": 0.7245754975351469, + "grad_norm": 0.4975970685482025, + "learning_rate": 4.82610629811231e-06, + "loss": 0.5411, + "step": 7937 + }, + { + "epoch": 0.7246667883878035, + "grad_norm": 0.4783320724964142, + "learning_rate": 4.826062439595899e-06, + "loss": 0.5614, + "step": 7938 + }, + { + "epoch": 0.7247580792404601, + "grad_norm": 0.476397305727005, + "learning_rate": 4.826018575748647e-06, + "loss": 0.5377, + "step": 7939 + }, + { + "epoch": 0.7248493700931167, + "grad_norm": 0.4684249460697174, + "learning_rate": 4.825974706570653e-06, + "loss": 0.5986, + "step": 7940 + }, + { + "epoch": 0.7249406609457733, + "grad_norm": 0.47265878319740295, + "learning_rate": 4.825930832062021e-06, + "loss": 0.5266, + "step": 7941 + }, + { + "epoch": 0.7250319517984298, + "grad_norm": 0.5159679055213928, + "learning_rate": 4.825886952222849e-06, + "loss": 0.5446, + "step": 7942 + }, + { + "epoch": 0.7251232426510864, + "grad_norm": 0.4665565490722656, + "learning_rate": 4.825843067053239e-06, + "loss": 0.5779, + "step": 7943 + }, + { + "epoch": 0.7252145335037429, + "grad_norm": 0.4841405153274536, + "learning_rate": 4.82579917655329e-06, + "loss": 0.5427, + "step": 7944 + }, + { + "epoch": 0.7253058243563995, + "grad_norm": 0.5054945945739746, + "learning_rate": 4.8257552807231036e-06, + "loss": 0.534, + "step": 7945 + }, + { + "epoch": 0.725397115209056, + "grad_norm": 0.43055158853530884, + "learning_rate": 4.825711379562781e-06, + "loss": 0.5456, + "step": 7946 + }, + { + "epoch": 0.7254884060617126, + "grad_norm": 0.4951724410057068, + "learning_rate": 4.825667473072422e-06, + "loss": 0.5327, + "step": 7947 + }, + { + "epoch": 0.7255796969143692, + "grad_norm": 0.47042137384414673, + "learning_rate": 4.825623561252127e-06, + "loss": 0.5591, + "step": 7948 + }, + { + "epoch": 0.7256709877670258, + "grad_norm": 0.46502476930618286, + "learning_rate": 4.825579644101997e-06, + "loss": 0.5804, + "step": 7949 + }, + { + "epoch": 0.7257622786196823, + "grad_norm": 0.48108431696891785, + "learning_rate": 4.825535721622133e-06, + "loss": 0.5458, + "step": 7950 + }, + { + "epoch": 0.7258535694723389, + "grad_norm": 0.5118244886398315, + "learning_rate": 4.825491793812634e-06, + "loss": 0.5571, + "step": 7951 + }, + { + "epoch": 0.7259448603249954, + "grad_norm": 0.49242693185806274, + "learning_rate": 4.825447860673603e-06, + "loss": 0.5452, + "step": 7952 + }, + { + "epoch": 0.726036151177652, + "grad_norm": 0.49230387806892395, + "learning_rate": 4.8254039222051396e-06, + "loss": 0.5311, + "step": 7953 + }, + { + "epoch": 0.7261274420303085, + "grad_norm": 0.4883827865123749, + "learning_rate": 4.825359978407345e-06, + "loss": 0.5707, + "step": 7954 + }, + { + "epoch": 0.7262187328829651, + "grad_norm": 0.5171693563461304, + "learning_rate": 4.825316029280319e-06, + "loss": 0.5163, + "step": 7955 + }, + { + "epoch": 0.7263100237356217, + "grad_norm": 0.4611288011074066, + "learning_rate": 4.8252720748241625e-06, + "loss": 0.5178, + "step": 7956 + }, + { + "epoch": 0.7264013145882783, + "grad_norm": 0.4861257076263428, + "learning_rate": 4.825228115038977e-06, + "loss": 0.5624, + "step": 7957 + }, + { + "epoch": 0.7264926054409349, + "grad_norm": 0.48302143812179565, + "learning_rate": 4.825184149924862e-06, + "loss": 0.5531, + "step": 7958 + }, + { + "epoch": 0.7265838962935914, + "grad_norm": 0.46988198161125183, + "learning_rate": 4.825140179481919e-06, + "loss": 0.5639, + "step": 7959 + }, + { + "epoch": 0.7266751871462479, + "grad_norm": 0.4807118773460388, + "learning_rate": 4.82509620371025e-06, + "loss": 0.5728, + "step": 7960 + }, + { + "epoch": 0.7267664779989045, + "grad_norm": 0.472042053937912, + "learning_rate": 4.825052222609954e-06, + "loss": 0.5554, + "step": 7961 + }, + { + "epoch": 0.7268577688515611, + "grad_norm": 0.460647314786911, + "learning_rate": 4.825008236181131e-06, + "loss": 0.5579, + "step": 7962 + }, + { + "epoch": 0.7269490597042176, + "grad_norm": 0.4609195590019226, + "learning_rate": 4.824964244423886e-06, + "loss": 0.5386, + "step": 7963 + }, + { + "epoch": 0.7270403505568742, + "grad_norm": 0.4859964847564697, + "learning_rate": 4.824920247338315e-06, + "loss": 0.5866, + "step": 7964 + }, + { + "epoch": 0.7271316414095308, + "grad_norm": 0.5071070194244385, + "learning_rate": 4.82487624492452e-06, + "loss": 0.5394, + "step": 7965 + }, + { + "epoch": 0.7272229322621874, + "grad_norm": 0.47801876068115234, + "learning_rate": 4.824832237182604e-06, + "loss": 0.5689, + "step": 7966 + }, + { + "epoch": 0.7273142231148438, + "grad_norm": 0.4681428074836731, + "learning_rate": 4.824788224112667e-06, + "loss": 0.5651, + "step": 7967 + }, + { + "epoch": 0.7274055139675004, + "grad_norm": 0.4963405728340149, + "learning_rate": 4.8247442057148085e-06, + "loss": 0.594, + "step": 7968 + }, + { + "epoch": 0.727496804820157, + "grad_norm": 0.4556514620780945, + "learning_rate": 4.824700181989131e-06, + "loss": 0.5883, + "step": 7969 + }, + { + "epoch": 0.7275880956728136, + "grad_norm": 0.5027647614479065, + "learning_rate": 4.8246561529357325e-06, + "loss": 0.5671, + "step": 7970 + }, + { + "epoch": 0.7276793865254702, + "grad_norm": 0.4851844906806946, + "learning_rate": 4.824612118554718e-06, + "loss": 0.5419, + "step": 7971 + }, + { + "epoch": 0.7277706773781267, + "grad_norm": 0.4828394949436188, + "learning_rate": 4.824568078846187e-06, + "loss": 0.5495, + "step": 7972 + }, + { + "epoch": 0.7278619682307833, + "grad_norm": 0.4696093797683716, + "learning_rate": 4.824524033810238e-06, + "loss": 0.5431, + "step": 7973 + }, + { + "epoch": 0.7279532590834399, + "grad_norm": 0.457604318857193, + "learning_rate": 4.824479983446976e-06, + "loss": 0.5557, + "step": 7974 + }, + { + "epoch": 0.7280445499360964, + "grad_norm": 0.4630541503429413, + "learning_rate": 4.824435927756498e-06, + "loss": 0.5546, + "step": 7975 + }, + { + "epoch": 0.7281358407887529, + "grad_norm": 0.48437392711639404, + "learning_rate": 4.824391866738908e-06, + "loss": 0.5609, + "step": 7976 + }, + { + "epoch": 0.7282271316414095, + "grad_norm": 0.46142715215682983, + "learning_rate": 4.824347800394304e-06, + "loss": 0.5891, + "step": 7977 + }, + { + "epoch": 0.7283184224940661, + "grad_norm": 0.4812222123146057, + "learning_rate": 4.824303728722791e-06, + "loss": 0.5411, + "step": 7978 + }, + { + "epoch": 0.7284097133467227, + "grad_norm": 0.4909232258796692, + "learning_rate": 4.824259651724466e-06, + "loss": 0.4961, + "step": 7979 + }, + { + "epoch": 0.7285010041993792, + "grad_norm": 0.5128988027572632, + "learning_rate": 4.824215569399433e-06, + "loss": 0.514, + "step": 7980 + }, + { + "epoch": 0.7285922950520358, + "grad_norm": 0.4754428267478943, + "learning_rate": 4.824171481747792e-06, + "loss": 0.5363, + "step": 7981 + }, + { + "epoch": 0.7286835859046924, + "grad_norm": 0.462703138589859, + "learning_rate": 4.824127388769643e-06, + "loss": 0.6001, + "step": 7982 + }, + { + "epoch": 0.7287748767573489, + "grad_norm": 0.49702680110931396, + "learning_rate": 4.824083290465087e-06, + "loss": 0.5192, + "step": 7983 + }, + { + "epoch": 0.7288661676100054, + "grad_norm": 0.45799916982650757, + "learning_rate": 4.824039186834227e-06, + "loss": 0.5803, + "step": 7984 + }, + { + "epoch": 0.728957458462662, + "grad_norm": 0.46613335609436035, + "learning_rate": 4.8239950778771625e-06, + "loss": 0.5389, + "step": 7985 + }, + { + "epoch": 0.7290487493153186, + "grad_norm": 0.5209658741950989, + "learning_rate": 4.823950963593996e-06, + "loss": 0.5419, + "step": 7986 + }, + { + "epoch": 0.7291400401679752, + "grad_norm": 0.49936068058013916, + "learning_rate": 4.8239068439848275e-06, + "loss": 0.5803, + "step": 7987 + }, + { + "epoch": 0.7292313310206318, + "grad_norm": 0.4920365512371063, + "learning_rate": 4.823862719049758e-06, + "loss": 0.5866, + "step": 7988 + }, + { + "epoch": 0.7293226218732883, + "grad_norm": 0.46517816185951233, + "learning_rate": 4.823818588788889e-06, + "loss": 0.5628, + "step": 7989 + }, + { + "epoch": 0.7294139127259449, + "grad_norm": 0.499484658241272, + "learning_rate": 4.823774453202321e-06, + "loss": 0.5109, + "step": 7990 + }, + { + "epoch": 0.7295052035786014, + "grad_norm": 0.434158593416214, + "learning_rate": 4.823730312290157e-06, + "loss": 0.587, + "step": 7991 + }, + { + "epoch": 0.729596494431258, + "grad_norm": 0.48838478326797485, + "learning_rate": 4.823686166052496e-06, + "loss": 0.52, + "step": 7992 + }, + { + "epoch": 0.7296877852839145, + "grad_norm": 0.47046899795532227, + "learning_rate": 4.82364201448944e-06, + "loss": 0.6118, + "step": 7993 + }, + { + "epoch": 0.7297790761365711, + "grad_norm": 0.4780268371105194, + "learning_rate": 4.823597857601091e-06, + "loss": 0.5259, + "step": 7994 + }, + { + "epoch": 0.7298703669892277, + "grad_norm": 0.48101186752319336, + "learning_rate": 4.823553695387549e-06, + "loss": 0.6038, + "step": 7995 + }, + { + "epoch": 0.7299616578418843, + "grad_norm": 0.4643723666667938, + "learning_rate": 4.823509527848915e-06, + "loss": 0.548, + "step": 7996 + }, + { + "epoch": 0.7300529486945408, + "grad_norm": 0.4664468467235565, + "learning_rate": 4.823465354985292e-06, + "loss": 0.5398, + "step": 7997 + }, + { + "epoch": 0.7301442395471973, + "grad_norm": 0.5086386203765869, + "learning_rate": 4.82342117679678e-06, + "loss": 0.5275, + "step": 7998 + }, + { + "epoch": 0.7302355303998539, + "grad_norm": 0.4845692217350006, + "learning_rate": 4.823376993283479e-06, + "loss": 0.5304, + "step": 7999 + }, + { + "epoch": 0.7303268212525105, + "grad_norm": 0.49844270944595337, + "learning_rate": 4.823332804445494e-06, + "loss": 0.5692, + "step": 8000 + }, + { + "epoch": 0.730418112105167, + "grad_norm": 0.47865331172943115, + "learning_rate": 4.823288610282923e-06, + "loss": 0.5415, + "step": 8001 + }, + { + "epoch": 0.7305094029578236, + "grad_norm": 0.45583030581474304, + "learning_rate": 4.823244410795867e-06, + "loss": 0.5584, + "step": 8002 + }, + { + "epoch": 0.7306006938104802, + "grad_norm": 0.4568011462688446, + "learning_rate": 4.823200205984429e-06, + "loss": 0.5532, + "step": 8003 + }, + { + "epoch": 0.7306919846631368, + "grad_norm": 0.46125200390815735, + "learning_rate": 4.8231559958487105e-06, + "loss": 0.5512, + "step": 8004 + }, + { + "epoch": 0.7307832755157934, + "grad_norm": 0.4724193215370178, + "learning_rate": 4.823111780388811e-06, + "loss": 0.5662, + "step": 8005 + }, + { + "epoch": 0.7308745663684498, + "grad_norm": 0.5350020527839661, + "learning_rate": 4.823067559604833e-06, + "loss": 0.5125, + "step": 8006 + }, + { + "epoch": 0.7309658572211064, + "grad_norm": 0.4492398798465729, + "learning_rate": 4.823023333496879e-06, + "loss": 0.5352, + "step": 8007 + }, + { + "epoch": 0.731057148073763, + "grad_norm": 0.4987125098705292, + "learning_rate": 4.822979102065049e-06, + "loss": 0.5094, + "step": 8008 + }, + { + "epoch": 0.7311484389264196, + "grad_norm": 0.4613827168941498, + "learning_rate": 4.822934865309443e-06, + "loss": 0.6119, + "step": 8009 + }, + { + "epoch": 0.7312397297790761, + "grad_norm": 0.4631972312927246, + "learning_rate": 4.8228906232301655e-06, + "loss": 0.5348, + "step": 8010 + }, + { + "epoch": 0.7313310206317327, + "grad_norm": 0.48490968346595764, + "learning_rate": 4.822846375827316e-06, + "loss": 0.4958, + "step": 8011 + }, + { + "epoch": 0.7314223114843893, + "grad_norm": 0.44871634244918823, + "learning_rate": 4.822802123100995e-06, + "loss": 0.5581, + "step": 8012 + }, + { + "epoch": 0.7315136023370459, + "grad_norm": 0.49409303069114685, + "learning_rate": 4.822757865051306e-06, + "loss": 0.5704, + "step": 8013 + }, + { + "epoch": 0.7316048931897023, + "grad_norm": 0.48747238516807556, + "learning_rate": 4.822713601678349e-06, + "loss": 0.5637, + "step": 8014 + }, + { + "epoch": 0.7316961840423589, + "grad_norm": 0.486631840467453, + "learning_rate": 4.822669332982227e-06, + "loss": 0.5582, + "step": 8015 + }, + { + "epoch": 0.7317874748950155, + "grad_norm": 0.48733383417129517, + "learning_rate": 4.8226250589630395e-06, + "loss": 0.5514, + "step": 8016 + }, + { + "epoch": 0.7318787657476721, + "grad_norm": 0.4609944224357605, + "learning_rate": 4.8225807796208904e-06, + "loss": 0.5736, + "step": 8017 + }, + { + "epoch": 0.7319700566003287, + "grad_norm": 0.4762616753578186, + "learning_rate": 4.822536494955878e-06, + "loss": 0.5402, + "step": 8018 + }, + { + "epoch": 0.7320613474529852, + "grad_norm": 0.4798017144203186, + "learning_rate": 4.822492204968105e-06, + "loss": 0.5913, + "step": 8019 + }, + { + "epoch": 0.7321526383056418, + "grad_norm": 0.4702630043029785, + "learning_rate": 4.822447909657675e-06, + "loss": 0.593, + "step": 8020 + }, + { + "epoch": 0.7322439291582984, + "grad_norm": 0.47992897033691406, + "learning_rate": 4.822403609024687e-06, + "loss": 0.5149, + "step": 8021 + }, + { + "epoch": 0.7323352200109549, + "grad_norm": 0.46942123770713806, + "learning_rate": 4.822359303069244e-06, + "loss": 0.5864, + "step": 8022 + }, + { + "epoch": 0.7324265108636114, + "grad_norm": 0.504427969455719, + "learning_rate": 4.822314991791447e-06, + "loss": 0.5535, + "step": 8023 + }, + { + "epoch": 0.732517801716268, + "grad_norm": 0.4655669033527374, + "learning_rate": 4.822270675191397e-06, + "loss": 0.57, + "step": 8024 + }, + { + "epoch": 0.7326090925689246, + "grad_norm": 0.47251826524734497, + "learning_rate": 4.822226353269197e-06, + "loss": 0.5641, + "step": 8025 + }, + { + "epoch": 0.7327003834215812, + "grad_norm": 0.5017126202583313, + "learning_rate": 4.822182026024946e-06, + "loss": 0.5733, + "step": 8026 + }, + { + "epoch": 0.7327916742742377, + "grad_norm": 0.4910612106323242, + "learning_rate": 4.822137693458748e-06, + "loss": 0.5474, + "step": 8027 + }, + { + "epoch": 0.7328829651268943, + "grad_norm": 0.4895426630973816, + "learning_rate": 4.822093355570704e-06, + "loss": 0.4988, + "step": 8028 + }, + { + "epoch": 0.7329742559795509, + "grad_norm": 0.4670218229293823, + "learning_rate": 4.822049012360916e-06, + "loss": 0.5163, + "step": 8029 + }, + { + "epoch": 0.7330655468322074, + "grad_norm": 0.47456803917884827, + "learning_rate": 4.822004663829484e-06, + "loss": 0.5479, + "step": 8030 + }, + { + "epoch": 0.733156837684864, + "grad_norm": 0.47555211186408997, + "learning_rate": 4.821960309976512e-06, + "loss": 0.5515, + "step": 8031 + }, + { + "epoch": 0.7332481285375205, + "grad_norm": 0.5289431214332581, + "learning_rate": 4.821915950802099e-06, + "loss": 0.5258, + "step": 8032 + }, + { + "epoch": 0.7333394193901771, + "grad_norm": 0.4819989502429962, + "learning_rate": 4.8218715863063485e-06, + "loss": 0.527, + "step": 8033 + }, + { + "epoch": 0.7334307102428337, + "grad_norm": 0.45744743943214417, + "learning_rate": 4.821827216489362e-06, + "loss": 0.5584, + "step": 8034 + }, + { + "epoch": 0.7335220010954903, + "grad_norm": 0.4958653748035431, + "learning_rate": 4.821782841351241e-06, + "loss": 0.5286, + "step": 8035 + }, + { + "epoch": 0.7336132919481468, + "grad_norm": 0.4957478642463684, + "learning_rate": 4.821738460892087e-06, + "loss": 0.5, + "step": 8036 + }, + { + "epoch": 0.7337045828008033, + "grad_norm": 0.4906580150127411, + "learning_rate": 4.821694075112001e-06, + "loss": 0.5216, + "step": 8037 + }, + { + "epoch": 0.7337958736534599, + "grad_norm": 0.4686330258846283, + "learning_rate": 4.821649684011086e-06, + "loss": 0.5496, + "step": 8038 + }, + { + "epoch": 0.7338871645061165, + "grad_norm": 0.4842737019062042, + "learning_rate": 4.821605287589443e-06, + "loss": 0.5375, + "step": 8039 + }, + { + "epoch": 0.733978455358773, + "grad_norm": 0.5085281133651733, + "learning_rate": 4.821560885847174e-06, + "loss": 0.5667, + "step": 8040 + }, + { + "epoch": 0.7340697462114296, + "grad_norm": 0.4784477651119232, + "learning_rate": 4.8215164787843815e-06, + "loss": 0.5819, + "step": 8041 + }, + { + "epoch": 0.7341610370640862, + "grad_norm": 0.49676239490509033, + "learning_rate": 4.821472066401166e-06, + "loss": 0.5561, + "step": 8042 + }, + { + "epoch": 0.7342523279167428, + "grad_norm": 0.4886285364627838, + "learning_rate": 4.82142764869763e-06, + "loss": 0.555, + "step": 8043 + }, + { + "epoch": 0.7343436187693994, + "grad_norm": 0.47450536489486694, + "learning_rate": 4.821383225673875e-06, + "loss": 0.5591, + "step": 8044 + }, + { + "epoch": 0.7344349096220558, + "grad_norm": 0.4980453848838806, + "learning_rate": 4.821338797330003e-06, + "loss": 0.5373, + "step": 8045 + }, + { + "epoch": 0.7345262004747124, + "grad_norm": 0.44142046570777893, + "learning_rate": 4.821294363666116e-06, + "loss": 0.5801, + "step": 8046 + }, + { + "epoch": 0.734617491327369, + "grad_norm": 0.4821753203868866, + "learning_rate": 4.8212499246823155e-06, + "loss": 0.5344, + "step": 8047 + }, + { + "epoch": 0.7347087821800256, + "grad_norm": 0.4566929340362549, + "learning_rate": 4.8212054803787034e-06, + "loss": 0.5881, + "step": 8048 + }, + { + "epoch": 0.7348000730326821, + "grad_norm": 0.4791776239871979, + "learning_rate": 4.8211610307553814e-06, + "loss": 0.5683, + "step": 8049 + }, + { + "epoch": 0.7348913638853387, + "grad_norm": 0.4844374656677246, + "learning_rate": 4.821116575812451e-06, + "loss": 0.5527, + "step": 8050 + }, + { + "epoch": 0.7349826547379953, + "grad_norm": 0.4759657382965088, + "learning_rate": 4.821072115550015e-06, + "loss": 0.5794, + "step": 8051 + }, + { + "epoch": 0.7350739455906519, + "grad_norm": 0.46675753593444824, + "learning_rate": 4.8210276499681755e-06, + "loss": 0.5872, + "step": 8052 + }, + { + "epoch": 0.7351652364433083, + "grad_norm": 0.4969728887081146, + "learning_rate": 4.820983179067033e-06, + "loss": 0.5579, + "step": 8053 + }, + { + "epoch": 0.7352565272959649, + "grad_norm": 0.534882128238678, + "learning_rate": 4.820938702846691e-06, + "loss": 0.5095, + "step": 8054 + }, + { + "epoch": 0.7353478181486215, + "grad_norm": 0.4838569462299347, + "learning_rate": 4.820894221307249e-06, + "loss": 0.5255, + "step": 8055 + }, + { + "epoch": 0.7354391090012781, + "grad_norm": 0.4889320135116577, + "learning_rate": 4.820849734448813e-06, + "loss": 0.541, + "step": 8056 + }, + { + "epoch": 0.7355303998539346, + "grad_norm": 0.4390583336353302, + "learning_rate": 4.8208052422714815e-06, + "loss": 0.5507, + "step": 8057 + }, + { + "epoch": 0.7356216907065912, + "grad_norm": 0.4690489172935486, + "learning_rate": 4.8207607447753566e-06, + "loss": 0.5526, + "step": 8058 + }, + { + "epoch": 0.7357129815592478, + "grad_norm": 0.5131588578224182, + "learning_rate": 4.820716241960542e-06, + "loss": 0.4969, + "step": 8059 + }, + { + "epoch": 0.7358042724119044, + "grad_norm": 0.48847755789756775, + "learning_rate": 4.82067173382714e-06, + "loss": 0.5529, + "step": 8060 + }, + { + "epoch": 0.7358955632645608, + "grad_norm": 0.47555091977119446, + "learning_rate": 4.82062722037525e-06, + "loss": 0.5974, + "step": 8061 + }, + { + "epoch": 0.7359868541172174, + "grad_norm": 0.46547549962997437, + "learning_rate": 4.820582701604976e-06, + "loss": 0.5356, + "step": 8062 + }, + { + "epoch": 0.736078144969874, + "grad_norm": 0.4692527651786804, + "learning_rate": 4.82053817751642e-06, + "loss": 0.5402, + "step": 8063 + }, + { + "epoch": 0.7361694358225306, + "grad_norm": 0.4707784652709961, + "learning_rate": 4.820493648109683e-06, + "loss": 0.5854, + "step": 8064 + }, + { + "epoch": 0.7362607266751872, + "grad_norm": 0.46771663427352905, + "learning_rate": 4.8204491133848674e-06, + "loss": 0.5996, + "step": 8065 + }, + { + "epoch": 0.7363520175278437, + "grad_norm": 0.46153298020362854, + "learning_rate": 4.820404573342076e-06, + "loss": 0.5587, + "step": 8066 + }, + { + "epoch": 0.7364433083805003, + "grad_norm": 0.5119926333427429, + "learning_rate": 4.820360027981411e-06, + "loss": 0.5581, + "step": 8067 + }, + { + "epoch": 0.7365345992331568, + "grad_norm": 0.49994805455207825, + "learning_rate": 4.820315477302972e-06, + "loss": 0.5592, + "step": 8068 + }, + { + "epoch": 0.7366258900858134, + "grad_norm": 0.5111262202262878, + "learning_rate": 4.820270921306864e-06, + "loss": 0.5288, + "step": 8069 + }, + { + "epoch": 0.7367171809384699, + "grad_norm": 0.5094723105430603, + "learning_rate": 4.820226359993189e-06, + "loss": 0.5443, + "step": 8070 + }, + { + "epoch": 0.7368084717911265, + "grad_norm": 0.4669760763645172, + "learning_rate": 4.820181793362048e-06, + "loss": 0.5748, + "step": 8071 + }, + { + "epoch": 0.7368997626437831, + "grad_norm": 0.49173256754875183, + "learning_rate": 4.820137221413542e-06, + "loss": 0.5444, + "step": 8072 + }, + { + "epoch": 0.7369910534964397, + "grad_norm": 0.47187575697898865, + "learning_rate": 4.820092644147775e-06, + "loss": 0.5207, + "step": 8073 + }, + { + "epoch": 0.7370823443490963, + "grad_norm": 0.4860987067222595, + "learning_rate": 4.820048061564849e-06, + "loss": 0.5608, + "step": 8074 + }, + { + "epoch": 0.7371736352017528, + "grad_norm": 0.4613304138183594, + "learning_rate": 4.820003473664866e-06, + "loss": 0.5622, + "step": 8075 + }, + { + "epoch": 0.7372649260544093, + "grad_norm": 0.48470863699913025, + "learning_rate": 4.819958880447928e-06, + "loss": 0.5844, + "step": 8076 + }, + { + "epoch": 0.7373562169070659, + "grad_norm": 0.4990560710430145, + "learning_rate": 4.819914281914136e-06, + "loss": 0.5392, + "step": 8077 + }, + { + "epoch": 0.7374475077597225, + "grad_norm": 0.5058424472808838, + "learning_rate": 4.819869678063595e-06, + "loss": 0.5184, + "step": 8078 + }, + { + "epoch": 0.737538798612379, + "grad_norm": 0.5138607621192932, + "learning_rate": 4.8198250688964055e-06, + "loss": 0.5573, + "step": 8079 + }, + { + "epoch": 0.7376300894650356, + "grad_norm": 0.4944399893283844, + "learning_rate": 4.819780454412669e-06, + "loss": 0.5724, + "step": 8080 + }, + { + "epoch": 0.7377213803176922, + "grad_norm": 0.5035881400108337, + "learning_rate": 4.819735834612489e-06, + "loss": 0.5359, + "step": 8081 + }, + { + "epoch": 0.7378126711703488, + "grad_norm": 0.4888482093811035, + "learning_rate": 4.819691209495968e-06, + "loss": 0.5221, + "step": 8082 + }, + { + "epoch": 0.7379039620230053, + "grad_norm": 0.5021468997001648, + "learning_rate": 4.8196465790632065e-06, + "loss": 0.5651, + "step": 8083 + }, + { + "epoch": 0.7379952528756618, + "grad_norm": 0.46363934874534607, + "learning_rate": 4.819601943314309e-06, + "loss": 0.5179, + "step": 8084 + }, + { + "epoch": 0.7380865437283184, + "grad_norm": 0.5010078549385071, + "learning_rate": 4.819557302249376e-06, + "loss": 0.5661, + "step": 8085 + }, + { + "epoch": 0.738177834580975, + "grad_norm": 0.4805143475532532, + "learning_rate": 4.81951265586851e-06, + "loss": 0.5575, + "step": 8086 + }, + { + "epoch": 0.7382691254336315, + "grad_norm": 0.45979705452919006, + "learning_rate": 4.819468004171815e-06, + "loss": 0.5517, + "step": 8087 + }, + { + "epoch": 0.7383604162862881, + "grad_norm": 0.5043696761131287, + "learning_rate": 4.819423347159392e-06, + "loss": 0.5521, + "step": 8088 + }, + { + "epoch": 0.7384517071389447, + "grad_norm": 0.47772255539894104, + "learning_rate": 4.819378684831344e-06, + "loss": 0.5485, + "step": 8089 + }, + { + "epoch": 0.7385429979916013, + "grad_norm": 0.4730222225189209, + "learning_rate": 4.819334017187773e-06, + "loss": 0.5491, + "step": 8090 + }, + { + "epoch": 0.7386342888442579, + "grad_norm": 0.5120900273323059, + "learning_rate": 4.819289344228779e-06, + "loss": 0.569, + "step": 8091 + }, + { + "epoch": 0.7387255796969143, + "grad_norm": 0.46866777539253235, + "learning_rate": 4.819244665954469e-06, + "loss": 0.5388, + "step": 8092 + }, + { + "epoch": 0.7388168705495709, + "grad_norm": 0.496124267578125, + "learning_rate": 4.819199982364942e-06, + "loss": 0.5323, + "step": 8093 + }, + { + "epoch": 0.7389081614022275, + "grad_norm": 0.5012753009796143, + "learning_rate": 4.819155293460302e-06, + "loss": 0.548, + "step": 8094 + }, + { + "epoch": 0.7389994522548841, + "grad_norm": 0.4813225567340851, + "learning_rate": 4.819110599240651e-06, + "loss": 0.5463, + "step": 8095 + }, + { + "epoch": 0.7390907431075406, + "grad_norm": 0.4863443076610565, + "learning_rate": 4.81906589970609e-06, + "loss": 0.5808, + "step": 8096 + }, + { + "epoch": 0.7391820339601972, + "grad_norm": 0.4903642535209656, + "learning_rate": 4.819021194856724e-06, + "loss": 0.5562, + "step": 8097 + }, + { + "epoch": 0.7392733248128538, + "grad_norm": 0.49900004267692566, + "learning_rate": 4.818976484692653e-06, + "loss": 0.5798, + "step": 8098 + }, + { + "epoch": 0.7393646156655103, + "grad_norm": 0.5174857974052429, + "learning_rate": 4.818931769213983e-06, + "loss": 0.523, + "step": 8099 + }, + { + "epoch": 0.7394559065181668, + "grad_norm": 0.4908747971057892, + "learning_rate": 4.818887048420812e-06, + "loss": 0.5323, + "step": 8100 + }, + { + "epoch": 0.7395471973708234, + "grad_norm": 0.4944729208946228, + "learning_rate": 4.818842322313245e-06, + "loss": 0.547, + "step": 8101 + }, + { + "epoch": 0.73963848822348, + "grad_norm": 0.5427337884902954, + "learning_rate": 4.818797590891384e-06, + "loss": 0.5174, + "step": 8102 + }, + { + "epoch": 0.7397297790761366, + "grad_norm": 0.4728747308254242, + "learning_rate": 4.818752854155332e-06, + "loss": 0.5732, + "step": 8103 + }, + { + "epoch": 0.7398210699287932, + "grad_norm": 0.5108798146247864, + "learning_rate": 4.818708112105191e-06, + "loss": 0.5268, + "step": 8104 + }, + { + "epoch": 0.7399123607814497, + "grad_norm": 0.5222579836845398, + "learning_rate": 4.818663364741065e-06, + "loss": 0.538, + "step": 8105 + }, + { + "epoch": 0.7400036516341063, + "grad_norm": 0.4661048352718353, + "learning_rate": 4.818618612063054e-06, + "loss": 0.5804, + "step": 8106 + }, + { + "epoch": 0.7400949424867628, + "grad_norm": 0.49360233545303345, + "learning_rate": 4.818573854071262e-06, + "loss": 0.5268, + "step": 8107 + }, + { + "epoch": 0.7401862333394194, + "grad_norm": 0.48181360960006714, + "learning_rate": 4.8185290907657914e-06, + "loss": 0.546, + "step": 8108 + }, + { + "epoch": 0.7402775241920759, + "grad_norm": 0.4741285443305969, + "learning_rate": 4.8184843221467444e-06, + "loss": 0.5542, + "step": 8109 + }, + { + "epoch": 0.7403688150447325, + "grad_norm": 0.4600647985935211, + "learning_rate": 4.818439548214224e-06, + "loss": 0.6121, + "step": 8110 + }, + { + "epoch": 0.7404601058973891, + "grad_norm": 0.5082666873931885, + "learning_rate": 4.818394768968334e-06, + "loss": 0.526, + "step": 8111 + }, + { + "epoch": 0.7405513967500457, + "grad_norm": 0.4990268647670746, + "learning_rate": 4.818349984409174e-06, + "loss": 0.5289, + "step": 8112 + }, + { + "epoch": 0.7406426876027022, + "grad_norm": 0.48287761211395264, + "learning_rate": 4.8183051945368496e-06, + "loss": 0.5626, + "step": 8113 + }, + { + "epoch": 0.7407339784553588, + "grad_norm": 0.5008471608161926, + "learning_rate": 4.818260399351463e-06, + "loss": 0.5177, + "step": 8114 + }, + { + "epoch": 0.7408252693080153, + "grad_norm": 0.4902235269546509, + "learning_rate": 4.8182155988531146e-06, + "loss": 0.5141, + "step": 8115 + }, + { + "epoch": 0.7409165601606719, + "grad_norm": 0.484348863363266, + "learning_rate": 4.81817079304191e-06, + "loss": 0.5783, + "step": 8116 + }, + { + "epoch": 0.7410078510133284, + "grad_norm": 0.49130013585090637, + "learning_rate": 4.81812598191795e-06, + "loss": 0.5483, + "step": 8117 + }, + { + "epoch": 0.741099141865985, + "grad_norm": 0.46299391984939575, + "learning_rate": 4.818081165481337e-06, + "loss": 0.5135, + "step": 8118 + }, + { + "epoch": 0.7411904327186416, + "grad_norm": 0.5027362704277039, + "learning_rate": 4.818036343732175e-06, + "loss": 0.5432, + "step": 8119 + }, + { + "epoch": 0.7412817235712982, + "grad_norm": 0.47132769227027893, + "learning_rate": 4.817991516670567e-06, + "loss": 0.5976, + "step": 8120 + }, + { + "epoch": 0.7413730144239548, + "grad_norm": 0.46752747893333435, + "learning_rate": 4.817946684296615e-06, + "loss": 0.5828, + "step": 8121 + }, + { + "epoch": 0.7414643052766113, + "grad_norm": 0.4474285840988159, + "learning_rate": 4.817901846610421e-06, + "loss": 0.5583, + "step": 8122 + }, + { + "epoch": 0.7415555961292678, + "grad_norm": 0.4996338486671448, + "learning_rate": 4.817857003612089e-06, + "loss": 0.5593, + "step": 8123 + }, + { + "epoch": 0.7416468869819244, + "grad_norm": 0.4888657331466675, + "learning_rate": 4.817812155301721e-06, + "loss": 0.5565, + "step": 8124 + }, + { + "epoch": 0.741738177834581, + "grad_norm": 0.4893929958343506, + "learning_rate": 4.817767301679419e-06, + "loss": 0.5049, + "step": 8125 + }, + { + "epoch": 0.7418294686872375, + "grad_norm": 0.5016276836395264, + "learning_rate": 4.8177224427452886e-06, + "loss": 0.5523, + "step": 8126 + }, + { + "epoch": 0.7419207595398941, + "grad_norm": 0.4993427097797394, + "learning_rate": 4.81767757849943e-06, + "loss": 0.5728, + "step": 8127 + }, + { + "epoch": 0.7420120503925507, + "grad_norm": 0.4586520791053772, + "learning_rate": 4.817632708941947e-06, + "loss": 0.5347, + "step": 8128 + }, + { + "epoch": 0.7421033412452073, + "grad_norm": 0.4833102822303772, + "learning_rate": 4.817587834072942e-06, + "loss": 0.5695, + "step": 8129 + }, + { + "epoch": 0.7421946320978638, + "grad_norm": 0.45445042848587036, + "learning_rate": 4.817542953892519e-06, + "loss": 0.5627, + "step": 8130 + }, + { + "epoch": 0.7422859229505203, + "grad_norm": 0.5042928457260132, + "learning_rate": 4.81749806840078e-06, + "loss": 0.5476, + "step": 8131 + }, + { + "epoch": 0.7423772138031769, + "grad_norm": 0.48610740900039673, + "learning_rate": 4.817453177597827e-06, + "loss": 0.6086, + "step": 8132 + }, + { + "epoch": 0.7424685046558335, + "grad_norm": 0.4887692928314209, + "learning_rate": 4.817408281483764e-06, + "loss": 0.5666, + "step": 8133 + }, + { + "epoch": 0.74255979550849, + "grad_norm": 0.4654543399810791, + "learning_rate": 4.817363380058694e-06, + "loss": 0.5694, + "step": 8134 + }, + { + "epoch": 0.7426510863611466, + "grad_norm": 0.5042465329170227, + "learning_rate": 4.8173184733227196e-06, + "loss": 0.5061, + "step": 8135 + }, + { + "epoch": 0.7427423772138032, + "grad_norm": 0.49458441138267517, + "learning_rate": 4.817273561275943e-06, + "loss": 0.5673, + "step": 8136 + }, + { + "epoch": 0.7428336680664598, + "grad_norm": 0.5004248023033142, + "learning_rate": 4.8172286439184675e-06, + "loss": 0.5249, + "step": 8137 + }, + { + "epoch": 0.7429249589191163, + "grad_norm": 0.46865907311439514, + "learning_rate": 4.817183721250398e-06, + "loss": 0.5543, + "step": 8138 + }, + { + "epoch": 0.7430162497717728, + "grad_norm": 0.49807247519493103, + "learning_rate": 4.817138793271835e-06, + "loss": 0.5254, + "step": 8139 + }, + { + "epoch": 0.7431075406244294, + "grad_norm": 0.4614141583442688, + "learning_rate": 4.817093859982882e-06, + "loss": 0.5709, + "step": 8140 + }, + { + "epoch": 0.743198831477086, + "grad_norm": 0.5031154751777649, + "learning_rate": 4.817048921383642e-06, + "loss": 0.5149, + "step": 8141 + }, + { + "epoch": 0.7432901223297426, + "grad_norm": 0.4963267743587494, + "learning_rate": 4.817003977474218e-06, + "loss": 0.5649, + "step": 8142 + }, + { + "epoch": 0.7433814131823991, + "grad_norm": 0.49941086769104004, + "learning_rate": 4.8169590282547145e-06, + "loss": 0.5227, + "step": 8143 + }, + { + "epoch": 0.7434727040350557, + "grad_norm": 0.4866439402103424, + "learning_rate": 4.816914073725232e-06, + "loss": 0.5424, + "step": 8144 + }, + { + "epoch": 0.7435639948877123, + "grad_norm": 0.47735410928726196, + "learning_rate": 4.816869113885876e-06, + "loss": 0.5836, + "step": 8145 + }, + { + "epoch": 0.7436552857403688, + "grad_norm": 0.5016793608665466, + "learning_rate": 4.816824148736748e-06, + "loss": 0.5507, + "step": 8146 + }, + { + "epoch": 0.7437465765930253, + "grad_norm": 0.4790542423725128, + "learning_rate": 4.81677917827795e-06, + "loss": 0.5291, + "step": 8147 + }, + { + "epoch": 0.7438378674456819, + "grad_norm": 0.4950459599494934, + "learning_rate": 4.816734202509588e-06, + "loss": 0.5395, + "step": 8148 + }, + { + "epoch": 0.7439291582983385, + "grad_norm": 0.49313071370124817, + "learning_rate": 4.816689221431762e-06, + "loss": 0.5568, + "step": 8149 + }, + { + "epoch": 0.7440204491509951, + "grad_norm": 0.45647019147872925, + "learning_rate": 4.816644235044577e-06, + "loss": 0.5969, + "step": 8150 + }, + { + "epoch": 0.7441117400036517, + "grad_norm": 0.5273317098617554, + "learning_rate": 4.816599243348137e-06, + "loss": 0.5529, + "step": 8151 + }, + { + "epoch": 0.7442030308563082, + "grad_norm": 0.49991193413734436, + "learning_rate": 4.816554246342543e-06, + "loss": 0.5806, + "step": 8152 + }, + { + "epoch": 0.7442943217089648, + "grad_norm": 0.46776270866394043, + "learning_rate": 4.816509244027898e-06, + "loss": 0.5887, + "step": 8153 + }, + { + "epoch": 0.7443856125616213, + "grad_norm": 0.48917779326438904, + "learning_rate": 4.8164642364043064e-06, + "loss": 0.587, + "step": 8154 + }, + { + "epoch": 0.7444769034142779, + "grad_norm": 0.48157593607902527, + "learning_rate": 4.816419223471872e-06, + "loss": 0.5436, + "step": 8155 + }, + { + "epoch": 0.7445681942669344, + "grad_norm": 0.4721929132938385, + "learning_rate": 4.816374205230696e-06, + "loss": 0.5621, + "step": 8156 + }, + { + "epoch": 0.744659485119591, + "grad_norm": 0.4854852557182312, + "learning_rate": 4.816329181680883e-06, + "loss": 0.5402, + "step": 8157 + }, + { + "epoch": 0.7447507759722476, + "grad_norm": 0.495472252368927, + "learning_rate": 4.816284152822536e-06, + "loss": 0.5801, + "step": 8158 + }, + { + "epoch": 0.7448420668249042, + "grad_norm": 0.47248247265815735, + "learning_rate": 4.816239118655758e-06, + "loss": 0.5467, + "step": 8159 + }, + { + "epoch": 0.7449333576775607, + "grad_norm": 0.4820013642311096, + "learning_rate": 4.816194079180651e-06, + "loss": 0.5408, + "step": 8160 + }, + { + "epoch": 0.7450246485302173, + "grad_norm": 0.5023514628410339, + "learning_rate": 4.8161490343973204e-06, + "loss": 0.5732, + "step": 8161 + }, + { + "epoch": 0.7451159393828738, + "grad_norm": 0.45106443762779236, + "learning_rate": 4.816103984305867e-06, + "loss": 0.5647, + "step": 8162 + }, + { + "epoch": 0.7452072302355304, + "grad_norm": 0.4729031026363373, + "learning_rate": 4.816058928906398e-06, + "loss": 0.5566, + "step": 8163 + }, + { + "epoch": 0.745298521088187, + "grad_norm": 0.47874802350997925, + "learning_rate": 4.816013868199011e-06, + "loss": 0.5536, + "step": 8164 + }, + { + "epoch": 0.7453898119408435, + "grad_norm": 0.47347649931907654, + "learning_rate": 4.815968802183815e-06, + "loss": 0.566, + "step": 8165 + }, + { + "epoch": 0.7454811027935001, + "grad_norm": 0.49485889077186584, + "learning_rate": 4.8159237308609084e-06, + "loss": 0.5582, + "step": 8166 + }, + { + "epoch": 0.7455723936461567, + "grad_norm": 0.49908286333084106, + "learning_rate": 4.815878654230398e-06, + "loss": 0.5916, + "step": 8167 + }, + { + "epoch": 0.7456636844988133, + "grad_norm": 0.5011463165283203, + "learning_rate": 4.8158335722923855e-06, + "loss": 0.56, + "step": 8168 + }, + { + "epoch": 0.7457549753514697, + "grad_norm": 0.511960506439209, + "learning_rate": 4.815788485046975e-06, + "loss": 0.5342, + "step": 8169 + }, + { + "epoch": 0.7458462662041263, + "grad_norm": 0.49099478125572205, + "learning_rate": 4.8157433924942685e-06, + "loss": 0.6016, + "step": 8170 + }, + { + "epoch": 0.7459375570567829, + "grad_norm": 0.5101686716079712, + "learning_rate": 4.815698294634371e-06, + "loss": 0.5343, + "step": 8171 + }, + { + "epoch": 0.7460288479094395, + "grad_norm": 0.4954966604709625, + "learning_rate": 4.815653191467384e-06, + "loss": 0.536, + "step": 8172 + }, + { + "epoch": 0.746120138762096, + "grad_norm": 0.4673379063606262, + "learning_rate": 4.815608082993413e-06, + "loss": 0.5255, + "step": 8173 + }, + { + "epoch": 0.7462114296147526, + "grad_norm": 0.4654437303543091, + "learning_rate": 4.81556296921256e-06, + "loss": 0.5273, + "step": 8174 + }, + { + "epoch": 0.7463027204674092, + "grad_norm": 0.4775853455066681, + "learning_rate": 4.815517850124929e-06, + "loss": 0.5411, + "step": 8175 + }, + { + "epoch": 0.7463940113200658, + "grad_norm": 0.4886764585971832, + "learning_rate": 4.8154727257306235e-06, + "loss": 0.5434, + "step": 8176 + }, + { + "epoch": 0.7464853021727222, + "grad_norm": 0.4850536584854126, + "learning_rate": 4.8154275960297455e-06, + "loss": 0.5292, + "step": 8177 + }, + { + "epoch": 0.7465765930253788, + "grad_norm": 0.48196619749069214, + "learning_rate": 4.8153824610224e-06, + "loss": 0.5449, + "step": 8178 + }, + { + "epoch": 0.7466678838780354, + "grad_norm": 0.4832499921321869, + "learning_rate": 4.81533732070869e-06, + "loss": 0.5322, + "step": 8179 + }, + { + "epoch": 0.746759174730692, + "grad_norm": 0.47141125798225403, + "learning_rate": 4.815292175088719e-06, + "loss": 0.5454, + "step": 8180 + }, + { + "epoch": 0.7468504655833486, + "grad_norm": 0.5078049302101135, + "learning_rate": 4.815247024162589e-06, + "loss": 0.536, + "step": 8181 + }, + { + "epoch": 0.7469417564360051, + "grad_norm": 0.518681526184082, + "learning_rate": 4.815201867930406e-06, + "loss": 0.564, + "step": 8182 + }, + { + "epoch": 0.7470330472886617, + "grad_norm": 0.5113638043403625, + "learning_rate": 4.8151567063922715e-06, + "loss": 0.5982, + "step": 8183 + }, + { + "epoch": 0.7471243381413183, + "grad_norm": 0.5065628290176392, + "learning_rate": 4.815111539548291e-06, + "loss": 0.5511, + "step": 8184 + }, + { + "epoch": 0.7472156289939748, + "grad_norm": 0.4485168755054474, + "learning_rate": 4.815066367398566e-06, + "loss": 0.5701, + "step": 8185 + }, + { + "epoch": 0.7473069198466313, + "grad_norm": 0.5179392695426941, + "learning_rate": 4.8150211899432e-06, + "loss": 0.5238, + "step": 8186 + }, + { + "epoch": 0.7473982106992879, + "grad_norm": 0.49658337235450745, + "learning_rate": 4.814976007182298e-06, + "loss": 0.5347, + "step": 8187 + }, + { + "epoch": 0.7474895015519445, + "grad_norm": 0.46714749932289124, + "learning_rate": 4.814930819115963e-06, + "loss": 0.5907, + "step": 8188 + }, + { + "epoch": 0.7475807924046011, + "grad_norm": 0.47850826382637024, + "learning_rate": 4.814885625744299e-06, + "loss": 0.5591, + "step": 8189 + }, + { + "epoch": 0.7476720832572576, + "grad_norm": 0.46865448355674744, + "learning_rate": 4.814840427067409e-06, + "loss": 0.58, + "step": 8190 + }, + { + "epoch": 0.7477633741099142, + "grad_norm": 0.48245716094970703, + "learning_rate": 4.8147952230853955e-06, + "loss": 0.5475, + "step": 8191 + }, + { + "epoch": 0.7478546649625708, + "grad_norm": 0.4650101661682129, + "learning_rate": 4.814750013798364e-06, + "loss": 0.5953, + "step": 8192 + }, + { + "epoch": 0.7479459558152273, + "grad_norm": 0.484409898519516, + "learning_rate": 4.814704799206417e-06, + "loss": 0.5285, + "step": 8193 + }, + { + "epoch": 0.7480372466678838, + "grad_norm": 0.48178109526634216, + "learning_rate": 4.814659579309659e-06, + "loss": 0.5393, + "step": 8194 + }, + { + "epoch": 0.7481285375205404, + "grad_norm": 0.4937669634819031, + "learning_rate": 4.814614354108193e-06, + "loss": 0.5074, + "step": 8195 + }, + { + "epoch": 0.748219828373197, + "grad_norm": 0.5163665413856506, + "learning_rate": 4.814569123602122e-06, + "loss": 0.5551, + "step": 8196 + }, + { + "epoch": 0.7483111192258536, + "grad_norm": 0.48338180780410767, + "learning_rate": 4.814523887791552e-06, + "loss": 0.5614, + "step": 8197 + }, + { + "epoch": 0.7484024100785102, + "grad_norm": 0.45358142256736755, + "learning_rate": 4.814478646676584e-06, + "loss": 0.5419, + "step": 8198 + }, + { + "epoch": 0.7484937009311667, + "grad_norm": 0.43442797660827637, + "learning_rate": 4.814433400257323e-06, + "loss": 0.6125, + "step": 8199 + }, + { + "epoch": 0.7485849917838232, + "grad_norm": 0.5077213644981384, + "learning_rate": 4.814388148533871e-06, + "loss": 0.5099, + "step": 8200 + }, + { + "epoch": 0.7486762826364798, + "grad_norm": 0.4894047677516937, + "learning_rate": 4.814342891506335e-06, + "loss": 0.5365, + "step": 8201 + }, + { + "epoch": 0.7487675734891364, + "grad_norm": 0.48002687096595764, + "learning_rate": 4.814297629174815e-06, + "loss": 0.5703, + "step": 8202 + }, + { + "epoch": 0.7488588643417929, + "grad_norm": 0.49469390511512756, + "learning_rate": 4.814252361539419e-06, + "loss": 0.5617, + "step": 8203 + }, + { + "epoch": 0.7489501551944495, + "grad_norm": 0.5011662840843201, + "learning_rate": 4.814207088600246e-06, + "loss": 0.5115, + "step": 8204 + }, + { + "epoch": 0.7490414460471061, + "grad_norm": 0.4747611880302429, + "learning_rate": 4.814161810357404e-06, + "loss": 0.5598, + "step": 8205 + }, + { + "epoch": 0.7491327368997627, + "grad_norm": 0.5295849442481995, + "learning_rate": 4.814116526810994e-06, + "loss": 0.5491, + "step": 8206 + }, + { + "epoch": 0.7492240277524193, + "grad_norm": 0.4813678562641144, + "learning_rate": 4.814071237961121e-06, + "loss": 0.563, + "step": 8207 + }, + { + "epoch": 0.7493153186050757, + "grad_norm": 0.45875513553619385, + "learning_rate": 4.814025943807887e-06, + "loss": 0.5658, + "step": 8208 + }, + { + "epoch": 0.7494066094577323, + "grad_norm": 0.49552735686302185, + "learning_rate": 4.813980644351399e-06, + "loss": 0.6112, + "step": 8209 + }, + { + "epoch": 0.7494979003103889, + "grad_norm": 0.4709017872810364, + "learning_rate": 4.8139353395917586e-06, + "loss": 0.5782, + "step": 8210 + }, + { + "epoch": 0.7495891911630455, + "grad_norm": 0.4803330600261688, + "learning_rate": 4.81389002952907e-06, + "loss": 0.5435, + "step": 8211 + }, + { + "epoch": 0.749680482015702, + "grad_norm": 0.4745665490627289, + "learning_rate": 4.813844714163437e-06, + "loss": 0.5638, + "step": 8212 + }, + { + "epoch": 0.7497717728683586, + "grad_norm": 0.48814377188682556, + "learning_rate": 4.813799393494963e-06, + "loss": 0.5435, + "step": 8213 + }, + { + "epoch": 0.7498630637210152, + "grad_norm": 0.48193323612213135, + "learning_rate": 4.813754067523753e-06, + "loss": 0.5273, + "step": 8214 + }, + { + "epoch": 0.7499543545736718, + "grad_norm": 0.4881082773208618, + "learning_rate": 4.813708736249912e-06, + "loss": 0.5364, + "step": 8215 + }, + { + "epoch": 0.7500456454263282, + "grad_norm": 0.48668670654296875, + "learning_rate": 4.81366339967354e-06, + "loss": 0.5483, + "step": 8216 + }, + { + "epoch": 0.7501369362789848, + "grad_norm": 0.4616723954677582, + "learning_rate": 4.813618057794744e-06, + "loss": 0.5743, + "step": 8217 + }, + { + "epoch": 0.7502282271316414, + "grad_norm": 0.49908560514450073, + "learning_rate": 4.8135727106136265e-06, + "loss": 0.5265, + "step": 8218 + }, + { + "epoch": 0.750319517984298, + "grad_norm": 0.5144218802452087, + "learning_rate": 4.813527358130292e-06, + "loss": 0.5775, + "step": 8219 + }, + { + "epoch": 0.7504108088369545, + "grad_norm": 0.4612444043159485, + "learning_rate": 4.813482000344845e-06, + "loss": 0.5944, + "step": 8220 + }, + { + "epoch": 0.7505020996896111, + "grad_norm": 0.48545706272125244, + "learning_rate": 4.813436637257389e-06, + "loss": 0.5187, + "step": 8221 + }, + { + "epoch": 0.7505933905422677, + "grad_norm": 0.4828069508075714, + "learning_rate": 4.813391268868027e-06, + "loss": 0.6082, + "step": 8222 + }, + { + "epoch": 0.7506846813949243, + "grad_norm": 0.4654242694377899, + "learning_rate": 4.813345895176864e-06, + "loss": 0.53, + "step": 8223 + }, + { + "epoch": 0.7507759722475807, + "grad_norm": 0.45427143573760986, + "learning_rate": 4.813300516184004e-06, + "loss": 0.5596, + "step": 8224 + }, + { + "epoch": 0.7508672631002373, + "grad_norm": 0.4925324618816376, + "learning_rate": 4.813255131889551e-06, + "loss": 0.5343, + "step": 8225 + }, + { + "epoch": 0.7509585539528939, + "grad_norm": 0.4897556006908417, + "learning_rate": 4.813209742293608e-06, + "loss": 0.5461, + "step": 8226 + }, + { + "epoch": 0.7510498448055505, + "grad_norm": 0.4885581135749817, + "learning_rate": 4.8131643473962804e-06, + "loss": 0.5581, + "step": 8227 + }, + { + "epoch": 0.7511411356582071, + "grad_norm": 0.5004335045814514, + "learning_rate": 4.813118947197671e-06, + "loss": 0.5703, + "step": 8228 + }, + { + "epoch": 0.7512324265108636, + "grad_norm": 0.43764427304267883, + "learning_rate": 4.813073541697886e-06, + "loss": 0.589, + "step": 8229 + }, + { + "epoch": 0.7513237173635202, + "grad_norm": 0.482981413602829, + "learning_rate": 4.813028130897026e-06, + "loss": 0.5619, + "step": 8230 + }, + { + "epoch": 0.7514150082161768, + "grad_norm": 0.48799240589141846, + "learning_rate": 4.812982714795198e-06, + "loss": 0.603, + "step": 8231 + }, + { + "epoch": 0.7515062990688333, + "grad_norm": 0.49279773235321045, + "learning_rate": 4.8129372933925055e-06, + "loss": 0.5531, + "step": 8232 + }, + { + "epoch": 0.7515975899214898, + "grad_norm": 0.4808864891529083, + "learning_rate": 4.812891866689052e-06, + "loss": 0.542, + "step": 8233 + }, + { + "epoch": 0.7516888807741464, + "grad_norm": 0.47739747166633606, + "learning_rate": 4.812846434684941e-06, + "loss": 0.557, + "step": 8234 + }, + { + "epoch": 0.751780171626803, + "grad_norm": 0.46377065777778625, + "learning_rate": 4.812800997380278e-06, + "loss": 0.5524, + "step": 8235 + }, + { + "epoch": 0.7518714624794596, + "grad_norm": 0.4692660868167877, + "learning_rate": 4.812755554775167e-06, + "loss": 0.5517, + "step": 8236 + }, + { + "epoch": 0.7519627533321162, + "grad_norm": 0.44687286019325256, + "learning_rate": 4.812710106869711e-06, + "loss": 0.5953, + "step": 8237 + }, + { + "epoch": 0.7520540441847727, + "grad_norm": 0.45214444398880005, + "learning_rate": 4.8126646536640155e-06, + "loss": 0.5851, + "step": 8238 + }, + { + "epoch": 0.7521453350374292, + "grad_norm": 0.4624052941799164, + "learning_rate": 4.812619195158183e-06, + "loss": 0.5628, + "step": 8239 + }, + { + "epoch": 0.7522366258900858, + "grad_norm": 0.4901579022407532, + "learning_rate": 4.81257373135232e-06, + "loss": 0.5782, + "step": 8240 + }, + { + "epoch": 0.7523279167427424, + "grad_norm": 0.48687440156936646, + "learning_rate": 4.812528262246528e-06, + "loss": 0.5365, + "step": 8241 + }, + { + "epoch": 0.7524192075953989, + "grad_norm": 0.4833681285381317, + "learning_rate": 4.812482787840914e-06, + "loss": 0.5429, + "step": 8242 + }, + { + "epoch": 0.7525104984480555, + "grad_norm": 0.4802491366863251, + "learning_rate": 4.8124373081355804e-06, + "loss": 0.5706, + "step": 8243 + }, + { + "epoch": 0.7526017893007121, + "grad_norm": 0.4647470712661743, + "learning_rate": 4.812391823130631e-06, + "loss": 0.5691, + "step": 8244 + }, + { + "epoch": 0.7526930801533687, + "grad_norm": 0.48986032605171204, + "learning_rate": 4.812346332826172e-06, + "loss": 0.5329, + "step": 8245 + }, + { + "epoch": 0.7527843710060252, + "grad_norm": 0.47610563039779663, + "learning_rate": 4.8123008372223055e-06, + "loss": 0.5552, + "step": 8246 + }, + { + "epoch": 0.7528756618586817, + "grad_norm": 0.4791335165500641, + "learning_rate": 4.8122553363191386e-06, + "loss": 0.5239, + "step": 8247 + }, + { + "epoch": 0.7529669527113383, + "grad_norm": 0.5089547038078308, + "learning_rate": 4.812209830116772e-06, + "loss": 0.5027, + "step": 8248 + }, + { + "epoch": 0.7530582435639949, + "grad_norm": 0.48111242055892944, + "learning_rate": 4.8121643186153125e-06, + "loss": 0.5501, + "step": 8249 + }, + { + "epoch": 0.7531495344166514, + "grad_norm": 0.5320072770118713, + "learning_rate": 4.8121188018148634e-06, + "loss": 0.546, + "step": 8250 + }, + { + "epoch": 0.753240825269308, + "grad_norm": 0.4719356894493103, + "learning_rate": 4.8120732797155305e-06, + "loss": 0.53, + "step": 8251 + }, + { + "epoch": 0.7533321161219646, + "grad_norm": 0.46639034152030945, + "learning_rate": 4.812027752317415e-06, + "loss": 0.538, + "step": 8252 + }, + { + "epoch": 0.7534234069746212, + "grad_norm": 0.46076592803001404, + "learning_rate": 4.811982219620624e-06, + "loss": 0.5663, + "step": 8253 + }, + { + "epoch": 0.7535146978272778, + "grad_norm": 0.48981571197509766, + "learning_rate": 4.811936681625262e-06, + "loss": 0.6044, + "step": 8254 + }, + { + "epoch": 0.7536059886799342, + "grad_norm": 0.4719153046607971, + "learning_rate": 4.811891138331432e-06, + "loss": 0.5567, + "step": 8255 + }, + { + "epoch": 0.7536972795325908, + "grad_norm": 0.5025639533996582, + "learning_rate": 4.811845589739237e-06, + "loss": 0.5487, + "step": 8256 + }, + { + "epoch": 0.7537885703852474, + "grad_norm": 0.45474016666412354, + "learning_rate": 4.811800035848785e-06, + "loss": 0.6022, + "step": 8257 + }, + { + "epoch": 0.753879861237904, + "grad_norm": 0.4870639741420746, + "learning_rate": 4.811754476660177e-06, + "loss": 0.5621, + "step": 8258 + }, + { + "epoch": 0.7539711520905605, + "grad_norm": 0.4881424307823181, + "learning_rate": 4.811708912173521e-06, + "loss": 0.5602, + "step": 8259 + }, + { + "epoch": 0.7540624429432171, + "grad_norm": 0.46230554580688477, + "learning_rate": 4.811663342388918e-06, + "loss": 0.6079, + "step": 8260 + }, + { + "epoch": 0.7541537337958737, + "grad_norm": 0.4967263638973236, + "learning_rate": 4.811617767306475e-06, + "loss": 0.5545, + "step": 8261 + }, + { + "epoch": 0.7542450246485303, + "grad_norm": 0.4923378527164459, + "learning_rate": 4.811572186926294e-06, + "loss": 0.5614, + "step": 8262 + }, + { + "epoch": 0.7543363155011867, + "grad_norm": 0.5167490839958191, + "learning_rate": 4.811526601248481e-06, + "loss": 0.5293, + "step": 8263 + }, + { + "epoch": 0.7544276063538433, + "grad_norm": 0.4875034689903259, + "learning_rate": 4.811481010273141e-06, + "loss": 0.5688, + "step": 8264 + }, + { + "epoch": 0.7545188972064999, + "grad_norm": 0.47697120904922485, + "learning_rate": 4.811435414000377e-06, + "loss": 0.5416, + "step": 8265 + }, + { + "epoch": 0.7546101880591565, + "grad_norm": 0.4840117394924164, + "learning_rate": 4.811389812430294e-06, + "loss": 0.5734, + "step": 8266 + }, + { + "epoch": 0.754701478911813, + "grad_norm": 0.5006721615791321, + "learning_rate": 4.8113442055629975e-06, + "loss": 0.5366, + "step": 8267 + }, + { + "epoch": 0.7547927697644696, + "grad_norm": 0.4934602379798889, + "learning_rate": 4.81129859339859e-06, + "loss": 0.5414, + "step": 8268 + }, + { + "epoch": 0.7548840606171262, + "grad_norm": 0.4696256220340729, + "learning_rate": 4.811252975937179e-06, + "loss": 0.4998, + "step": 8269 + }, + { + "epoch": 0.7549753514697827, + "grad_norm": 0.4567723572254181, + "learning_rate": 4.811207353178866e-06, + "loss": 0.5472, + "step": 8270 + }, + { + "epoch": 0.7550666423224393, + "grad_norm": 0.4755626916885376, + "learning_rate": 4.811161725123757e-06, + "loss": 0.6277, + "step": 8271 + }, + { + "epoch": 0.7551579331750958, + "grad_norm": 0.438300758600235, + "learning_rate": 4.811116091771957e-06, + "loss": 0.5944, + "step": 8272 + }, + { + "epoch": 0.7552492240277524, + "grad_norm": 0.4883558452129364, + "learning_rate": 4.811070453123568e-06, + "loss": 0.5344, + "step": 8273 + }, + { + "epoch": 0.755340514880409, + "grad_norm": 0.48318731784820557, + "learning_rate": 4.811024809178699e-06, + "loss": 0.5651, + "step": 8274 + }, + { + "epoch": 0.7554318057330656, + "grad_norm": 0.468659371137619, + "learning_rate": 4.810979159937451e-06, + "loss": 0.512, + "step": 8275 + }, + { + "epoch": 0.7555230965857221, + "grad_norm": 0.5088440775871277, + "learning_rate": 4.810933505399931e-06, + "loss": 0.5239, + "step": 8276 + }, + { + "epoch": 0.7556143874383787, + "grad_norm": 0.4568905830383301, + "learning_rate": 4.810887845566241e-06, + "loss": 0.5631, + "step": 8277 + }, + { + "epoch": 0.7557056782910352, + "grad_norm": 0.48351579904556274, + "learning_rate": 4.810842180436488e-06, + "loss": 0.5669, + "step": 8278 + }, + { + "epoch": 0.7557969691436918, + "grad_norm": 0.45650967955589294, + "learning_rate": 4.810796510010776e-06, + "loss": 0.5319, + "step": 8279 + }, + { + "epoch": 0.7558882599963483, + "grad_norm": 0.4476128816604614, + "learning_rate": 4.810750834289208e-06, + "loss": 0.601, + "step": 8280 + }, + { + "epoch": 0.7559795508490049, + "grad_norm": 0.5073515772819519, + "learning_rate": 4.8107051532718916e-06, + "loss": 0.532, + "step": 8281 + }, + { + "epoch": 0.7560708417016615, + "grad_norm": 0.43594908714294434, + "learning_rate": 4.810659466958929e-06, + "loss": 0.605, + "step": 8282 + }, + { + "epoch": 0.7561621325543181, + "grad_norm": 0.4782649576663971, + "learning_rate": 4.810613775350427e-06, + "loss": 0.6124, + "step": 8283 + }, + { + "epoch": 0.7562534234069747, + "grad_norm": 0.49630677700042725, + "learning_rate": 4.810568078446488e-06, + "loss": 0.524, + "step": 8284 + }, + { + "epoch": 0.7563447142596312, + "grad_norm": 0.4393444061279297, + "learning_rate": 4.810522376247218e-06, + "loss": 0.577, + "step": 8285 + }, + { + "epoch": 0.7564360051122877, + "grad_norm": 0.46718335151672363, + "learning_rate": 4.810476668752723e-06, + "loss": 0.5727, + "step": 8286 + }, + { + "epoch": 0.7565272959649443, + "grad_norm": 0.49202200770378113, + "learning_rate": 4.8104309559631056e-06, + "loss": 0.5101, + "step": 8287 + }, + { + "epoch": 0.7566185868176009, + "grad_norm": 0.48448026180267334, + "learning_rate": 4.810385237878471e-06, + "loss": 0.5589, + "step": 8288 + }, + { + "epoch": 0.7567098776702574, + "grad_norm": 0.47514694929122925, + "learning_rate": 4.810339514498925e-06, + "loss": 0.5066, + "step": 8289 + }, + { + "epoch": 0.756801168522914, + "grad_norm": 0.4568246304988861, + "learning_rate": 4.8102937858245715e-06, + "loss": 0.5691, + "step": 8290 + }, + { + "epoch": 0.7568924593755706, + "grad_norm": 0.47591185569763184, + "learning_rate": 4.810248051855515e-06, + "loss": 0.5629, + "step": 8291 + }, + { + "epoch": 0.7569837502282272, + "grad_norm": 0.5062516331672668, + "learning_rate": 4.810202312591862e-06, + "loss": 0.5651, + "step": 8292 + }, + { + "epoch": 0.7570750410808837, + "grad_norm": 0.4851600229740143, + "learning_rate": 4.8101565680337154e-06, + "loss": 0.5522, + "step": 8293 + }, + { + "epoch": 0.7571663319335402, + "grad_norm": 0.5005179643630981, + "learning_rate": 4.810110818181181e-06, + "loss": 0.528, + "step": 8294 + }, + { + "epoch": 0.7572576227861968, + "grad_norm": 0.49506017565727234, + "learning_rate": 4.810065063034364e-06, + "loss": 0.5347, + "step": 8295 + }, + { + "epoch": 0.7573489136388534, + "grad_norm": 0.5063570737838745, + "learning_rate": 4.810019302593368e-06, + "loss": 0.5098, + "step": 8296 + }, + { + "epoch": 0.75744020449151, + "grad_norm": 0.505735456943512, + "learning_rate": 4.809973536858299e-06, + "loss": 0.5535, + "step": 8297 + }, + { + "epoch": 0.7575314953441665, + "grad_norm": 0.48455992341041565, + "learning_rate": 4.809927765829261e-06, + "loss": 0.5546, + "step": 8298 + }, + { + "epoch": 0.7576227861968231, + "grad_norm": 0.4814709424972534, + "learning_rate": 4.80988198950636e-06, + "loss": 0.5715, + "step": 8299 + }, + { + "epoch": 0.7577140770494797, + "grad_norm": 0.44957736134529114, + "learning_rate": 4.8098362078896995e-06, + "loss": 0.5833, + "step": 8300 + }, + { + "epoch": 0.7578053679021362, + "grad_norm": 0.4995761215686798, + "learning_rate": 4.8097904209793866e-06, + "loss": 0.573, + "step": 8301 + }, + { + "epoch": 0.7578966587547927, + "grad_norm": 0.4735499322414398, + "learning_rate": 4.8097446287755234e-06, + "loss": 0.596, + "step": 8302 + }, + { + "epoch": 0.7579879496074493, + "grad_norm": 0.49013441801071167, + "learning_rate": 4.809698831278217e-06, + "loss": 0.5751, + "step": 8303 + }, + { + "epoch": 0.7580792404601059, + "grad_norm": 0.49557530879974365, + "learning_rate": 4.809653028487572e-06, + "loss": 0.531, + "step": 8304 + }, + { + "epoch": 0.7581705313127625, + "grad_norm": 0.47995737195014954, + "learning_rate": 4.8096072204036925e-06, + "loss": 0.535, + "step": 8305 + }, + { + "epoch": 0.758261822165419, + "grad_norm": 0.4961608946323395, + "learning_rate": 4.809561407026684e-06, + "loss": 0.5473, + "step": 8306 + }, + { + "epoch": 0.7583531130180756, + "grad_norm": 0.46843117475509644, + "learning_rate": 4.809515588356652e-06, + "loss": 0.5817, + "step": 8307 + }, + { + "epoch": 0.7584444038707322, + "grad_norm": 0.4916762113571167, + "learning_rate": 4.8094697643937e-06, + "loss": 0.5126, + "step": 8308 + }, + { + "epoch": 0.7585356947233887, + "grad_norm": 0.4729004204273224, + "learning_rate": 4.809423935137936e-06, + "loss": 0.5781, + "step": 8309 + }, + { + "epoch": 0.7586269855760452, + "grad_norm": 0.47828999161720276, + "learning_rate": 4.809378100589461e-06, + "loss": 0.5641, + "step": 8310 + }, + { + "epoch": 0.7587182764287018, + "grad_norm": 0.5146558284759521, + "learning_rate": 4.809332260748384e-06, + "loss": 0.5547, + "step": 8311 + }, + { + "epoch": 0.7588095672813584, + "grad_norm": 0.46619266271591187, + "learning_rate": 4.809286415614806e-06, + "loss": 0.5601, + "step": 8312 + }, + { + "epoch": 0.758900858134015, + "grad_norm": 0.47480836510658264, + "learning_rate": 4.809240565188836e-06, + "loss": 0.5633, + "step": 8313 + }, + { + "epoch": 0.7589921489866716, + "grad_norm": 0.48911166191101074, + "learning_rate": 4.809194709470577e-06, + "loss": 0.5614, + "step": 8314 + }, + { + "epoch": 0.7590834398393281, + "grad_norm": 0.5392557978630066, + "learning_rate": 4.809148848460133e-06, + "loss": 0.5151, + "step": 8315 + }, + { + "epoch": 0.7591747306919847, + "grad_norm": 0.4953908920288086, + "learning_rate": 4.809102982157612e-06, + "loss": 0.5347, + "step": 8316 + }, + { + "epoch": 0.7592660215446412, + "grad_norm": 0.4545668661594391, + "learning_rate": 4.8090571105631176e-06, + "loss": 0.5638, + "step": 8317 + }, + { + "epoch": 0.7593573123972978, + "grad_norm": 0.46701815724372864, + "learning_rate": 4.809011233676754e-06, + "loss": 0.6038, + "step": 8318 + }, + { + "epoch": 0.7594486032499543, + "grad_norm": 0.5085429549217224, + "learning_rate": 4.808965351498628e-06, + "loss": 0.5446, + "step": 8319 + }, + { + "epoch": 0.7595398941026109, + "grad_norm": 0.4829077422618866, + "learning_rate": 4.808919464028844e-06, + "loss": 0.5727, + "step": 8320 + }, + { + "epoch": 0.7596311849552675, + "grad_norm": 0.48965659737586975, + "learning_rate": 4.808873571267507e-06, + "loss": 0.5431, + "step": 8321 + }, + { + "epoch": 0.7597224758079241, + "grad_norm": 0.4527161121368408, + "learning_rate": 4.808827673214723e-06, + "loss": 0.5647, + "step": 8322 + }, + { + "epoch": 0.7598137666605806, + "grad_norm": 0.4509449601173401, + "learning_rate": 4.808781769870596e-06, + "loss": 0.5617, + "step": 8323 + }, + { + "epoch": 0.7599050575132372, + "grad_norm": 0.4995587468147278, + "learning_rate": 4.808735861235232e-06, + "loss": 0.4961, + "step": 8324 + }, + { + "epoch": 0.7599963483658937, + "grad_norm": 0.493283748626709, + "learning_rate": 4.808689947308735e-06, + "loss": 0.5312, + "step": 8325 + }, + { + "epoch": 0.7600876392185503, + "grad_norm": 0.4814434051513672, + "learning_rate": 4.808644028091213e-06, + "loss": 0.5712, + "step": 8326 + }, + { + "epoch": 0.7601789300712068, + "grad_norm": 0.46418842673301697, + "learning_rate": 4.808598103582768e-06, + "loss": 0.5834, + "step": 8327 + }, + { + "epoch": 0.7602702209238634, + "grad_norm": 0.5048096179962158, + "learning_rate": 4.808552173783507e-06, + "loss": 0.5629, + "step": 8328 + }, + { + "epoch": 0.76036151177652, + "grad_norm": 0.46753403544425964, + "learning_rate": 4.808506238693535e-06, + "loss": 0.5419, + "step": 8329 + }, + { + "epoch": 0.7604528026291766, + "grad_norm": 0.5002855062484741, + "learning_rate": 4.808460298312958e-06, + "loss": 0.5225, + "step": 8330 + }, + { + "epoch": 0.7605440934818332, + "grad_norm": 0.4911825358867645, + "learning_rate": 4.808414352641881e-06, + "loss": 0.5335, + "step": 8331 + }, + { + "epoch": 0.7606353843344897, + "grad_norm": 0.47697022557258606, + "learning_rate": 4.808368401680407e-06, + "loss": 0.5403, + "step": 8332 + }, + { + "epoch": 0.7607266751871462, + "grad_norm": 0.4493778347969055, + "learning_rate": 4.808322445428644e-06, + "loss": 0.5485, + "step": 8333 + }, + { + "epoch": 0.7608179660398028, + "grad_norm": 0.5313048958778381, + "learning_rate": 4.808276483886696e-06, + "loss": 0.532, + "step": 8334 + }, + { + "epoch": 0.7609092568924594, + "grad_norm": 0.48686453700065613, + "learning_rate": 4.80823051705467e-06, + "loss": 0.5267, + "step": 8335 + }, + { + "epoch": 0.7610005477451159, + "grad_norm": 0.4828825891017914, + "learning_rate": 4.808184544932668e-06, + "loss": 0.5645, + "step": 8336 + }, + { + "epoch": 0.7610918385977725, + "grad_norm": 0.4940696060657501, + "learning_rate": 4.8081385675207995e-06, + "loss": 0.4959, + "step": 8337 + }, + { + "epoch": 0.7611831294504291, + "grad_norm": 0.4550492465496063, + "learning_rate": 4.808092584819166e-06, + "loss": 0.5654, + "step": 8338 + }, + { + "epoch": 0.7612744203030857, + "grad_norm": 0.4832066297531128, + "learning_rate": 4.808046596827877e-06, + "loss": 0.5143, + "step": 8339 + }, + { + "epoch": 0.7613657111557421, + "grad_norm": 0.5138266682624817, + "learning_rate": 4.8080006035470335e-06, + "loss": 0.5222, + "step": 8340 + }, + { + "epoch": 0.7614570020083987, + "grad_norm": 0.47259876132011414, + "learning_rate": 4.807954604976745e-06, + "loss": 0.5348, + "step": 8341 + }, + { + "epoch": 0.7615482928610553, + "grad_norm": 0.4529261291027069, + "learning_rate": 4.807908601117114e-06, + "loss": 0.5754, + "step": 8342 + }, + { + "epoch": 0.7616395837137119, + "grad_norm": 0.4960751235485077, + "learning_rate": 4.807862591968246e-06, + "loss": 0.5002, + "step": 8343 + }, + { + "epoch": 0.7617308745663685, + "grad_norm": 0.4962732195854187, + "learning_rate": 4.807816577530249e-06, + "loss": 0.5508, + "step": 8344 + }, + { + "epoch": 0.761822165419025, + "grad_norm": 0.4838384985923767, + "learning_rate": 4.807770557803226e-06, + "loss": 0.5741, + "step": 8345 + }, + { + "epoch": 0.7619134562716816, + "grad_norm": 0.48819223046302795, + "learning_rate": 4.8077245327872834e-06, + "loss": 0.5482, + "step": 8346 + }, + { + "epoch": 0.7620047471243382, + "grad_norm": 0.4536271393299103, + "learning_rate": 4.807678502482527e-06, + "loss": 0.5982, + "step": 8347 + }, + { + "epoch": 0.7620960379769947, + "grad_norm": 0.46885016560554504, + "learning_rate": 4.80763246688906e-06, + "loss": 0.5516, + "step": 8348 + }, + { + "epoch": 0.7621873288296512, + "grad_norm": 0.4978333115577698, + "learning_rate": 4.8075864260069914e-06, + "loss": 0.5245, + "step": 8349 + }, + { + "epoch": 0.7622786196823078, + "grad_norm": 0.4709928035736084, + "learning_rate": 4.807540379836425e-06, + "loss": 0.5724, + "step": 8350 + }, + { + "epoch": 0.7623699105349644, + "grad_norm": 0.45589789748191833, + "learning_rate": 4.807494328377466e-06, + "loss": 0.5152, + "step": 8351 + }, + { + "epoch": 0.762461201387621, + "grad_norm": 0.4783141613006592, + "learning_rate": 4.80744827163022e-06, + "loss": 0.5655, + "step": 8352 + }, + { + "epoch": 0.7625524922402775, + "grad_norm": 0.45282208919525146, + "learning_rate": 4.807402209594793e-06, + "loss": 0.5634, + "step": 8353 + }, + { + "epoch": 0.7626437830929341, + "grad_norm": 0.4799192249774933, + "learning_rate": 4.807356142271291e-06, + "loss": 0.594, + "step": 8354 + }, + { + "epoch": 0.7627350739455907, + "grad_norm": 0.48464110493659973, + "learning_rate": 4.807310069659819e-06, + "loss": 0.5676, + "step": 8355 + }, + { + "epoch": 0.7628263647982472, + "grad_norm": 0.4857197701931, + "learning_rate": 4.8072639917604815e-06, + "loss": 0.5733, + "step": 8356 + }, + { + "epoch": 0.7629176556509037, + "grad_norm": 0.45753106474876404, + "learning_rate": 4.807217908573386e-06, + "loss": 0.5604, + "step": 8357 + }, + { + "epoch": 0.7630089465035603, + "grad_norm": 0.486255019903183, + "learning_rate": 4.807171820098637e-06, + "loss": 0.5927, + "step": 8358 + }, + { + "epoch": 0.7631002373562169, + "grad_norm": 0.4924474358558655, + "learning_rate": 4.807125726336341e-06, + "loss": 0.6023, + "step": 8359 + }, + { + "epoch": 0.7631915282088735, + "grad_norm": 0.4854329526424408, + "learning_rate": 4.807079627286603e-06, + "loss": 0.556, + "step": 8360 + }, + { + "epoch": 0.7632828190615301, + "grad_norm": 0.43171340227127075, + "learning_rate": 4.807033522949528e-06, + "loss": 0.5669, + "step": 8361 + }, + { + "epoch": 0.7633741099141866, + "grad_norm": 0.5137307047843933, + "learning_rate": 4.806987413325224e-06, + "loss": 0.5704, + "step": 8362 + }, + { + "epoch": 0.7634654007668432, + "grad_norm": 0.5263938307762146, + "learning_rate": 4.806941298413793e-06, + "loss": 0.5188, + "step": 8363 + }, + { + "epoch": 0.7635566916194997, + "grad_norm": 0.4987289607524872, + "learning_rate": 4.806895178215344e-06, + "loss": 0.5252, + "step": 8364 + }, + { + "epoch": 0.7636479824721563, + "grad_norm": 0.4445905387401581, + "learning_rate": 4.806849052729981e-06, + "loss": 0.5651, + "step": 8365 + }, + { + "epoch": 0.7637392733248128, + "grad_norm": 0.47790345549583435, + "learning_rate": 4.806802921957811e-06, + "loss": 0.6263, + "step": 8366 + }, + { + "epoch": 0.7638305641774694, + "grad_norm": 0.522747814655304, + "learning_rate": 4.806756785898937e-06, + "loss": 0.534, + "step": 8367 + }, + { + "epoch": 0.763921855030126, + "grad_norm": 0.49811047315597534, + "learning_rate": 4.806710644553468e-06, + "loss": 0.5398, + "step": 8368 + }, + { + "epoch": 0.7640131458827826, + "grad_norm": 0.47383469343185425, + "learning_rate": 4.806664497921508e-06, + "loss": 0.5771, + "step": 8369 + }, + { + "epoch": 0.7641044367354392, + "grad_norm": 0.47467881441116333, + "learning_rate": 4.806618346003164e-06, + "loss": 0.5395, + "step": 8370 + }, + { + "epoch": 0.7641957275880956, + "grad_norm": 0.4973459243774414, + "learning_rate": 4.80657218879854e-06, + "loss": 0.5144, + "step": 8371 + }, + { + "epoch": 0.7642870184407522, + "grad_norm": 0.4827533960342407, + "learning_rate": 4.806526026307743e-06, + "loss": 0.545, + "step": 8372 + }, + { + "epoch": 0.7643783092934088, + "grad_norm": 0.49852311611175537, + "learning_rate": 4.806479858530878e-06, + "loss": 0.5803, + "step": 8373 + }, + { + "epoch": 0.7644696001460654, + "grad_norm": 0.47038671374320984, + "learning_rate": 4.806433685468051e-06, + "loss": 0.5765, + "step": 8374 + }, + { + "epoch": 0.7645608909987219, + "grad_norm": 0.4828142523765564, + "learning_rate": 4.806387507119369e-06, + "loss": 0.5175, + "step": 8375 + }, + { + "epoch": 0.7646521818513785, + "grad_norm": 0.47562316060066223, + "learning_rate": 4.806341323484937e-06, + "loss": 0.5745, + "step": 8376 + }, + { + "epoch": 0.7647434727040351, + "grad_norm": 0.4769808053970337, + "learning_rate": 4.80629513456486e-06, + "loss": 0.5362, + "step": 8377 + }, + { + "epoch": 0.7648347635566917, + "grad_norm": 0.4738505482673645, + "learning_rate": 4.806248940359245e-06, + "loss": 0.5715, + "step": 8378 + }, + { + "epoch": 0.7649260544093481, + "grad_norm": 0.4549485146999359, + "learning_rate": 4.806202740868198e-06, + "loss": 0.5695, + "step": 8379 + }, + { + "epoch": 0.7650173452620047, + "grad_norm": 0.4777669608592987, + "learning_rate": 4.806156536091823e-06, + "loss": 0.5599, + "step": 8380 + }, + { + "epoch": 0.7651086361146613, + "grad_norm": 0.4420334994792938, + "learning_rate": 4.806110326030228e-06, + "loss": 0.5688, + "step": 8381 + }, + { + "epoch": 0.7651999269673179, + "grad_norm": 0.4537273943424225, + "learning_rate": 4.806064110683518e-06, + "loss": 0.5322, + "step": 8382 + }, + { + "epoch": 0.7652912178199744, + "grad_norm": 0.45042240619659424, + "learning_rate": 4.806017890051799e-06, + "loss": 0.5745, + "step": 8383 + }, + { + "epoch": 0.765382508672631, + "grad_norm": 0.4819889962673187, + "learning_rate": 4.805971664135178e-06, + "loss": 0.5536, + "step": 8384 + }, + { + "epoch": 0.7654737995252876, + "grad_norm": 0.46802985668182373, + "learning_rate": 4.805925432933759e-06, + "loss": 0.5467, + "step": 8385 + }, + { + "epoch": 0.7655650903779442, + "grad_norm": 0.4628736674785614, + "learning_rate": 4.805879196447649e-06, + "loss": 0.578, + "step": 8386 + }, + { + "epoch": 0.7656563812306006, + "grad_norm": 0.48045361042022705, + "learning_rate": 4.805832954676954e-06, + "loss": 0.5669, + "step": 8387 + }, + { + "epoch": 0.7657476720832572, + "grad_norm": 0.517172634601593, + "learning_rate": 4.80578670762178e-06, + "loss": 0.5294, + "step": 8388 + }, + { + "epoch": 0.7658389629359138, + "grad_norm": 0.4881475865840912, + "learning_rate": 4.8057404552822325e-06, + "loss": 0.5641, + "step": 8389 + }, + { + "epoch": 0.7659302537885704, + "grad_norm": 0.48565909266471863, + "learning_rate": 4.805694197658418e-06, + "loss": 0.5577, + "step": 8390 + }, + { + "epoch": 0.766021544641227, + "grad_norm": 0.46886640787124634, + "learning_rate": 4.805647934750442e-06, + "loss": 0.5639, + "step": 8391 + }, + { + "epoch": 0.7661128354938835, + "grad_norm": 0.4497630298137665, + "learning_rate": 4.805601666558412e-06, + "loss": 0.5469, + "step": 8392 + }, + { + "epoch": 0.7662041263465401, + "grad_norm": 0.49478307366371155, + "learning_rate": 4.805555393082432e-06, + "loss": 0.5646, + "step": 8393 + }, + { + "epoch": 0.7662954171991967, + "grad_norm": 0.47358497977256775, + "learning_rate": 4.805509114322608e-06, + "loss": 0.5448, + "step": 8394 + }, + { + "epoch": 0.7663867080518532, + "grad_norm": 0.49647098779678345, + "learning_rate": 4.805462830279048e-06, + "loss": 0.5489, + "step": 8395 + }, + { + "epoch": 0.7664779989045097, + "grad_norm": 0.4940779507160187, + "learning_rate": 4.8054165409518576e-06, + "loss": 0.5612, + "step": 8396 + }, + { + "epoch": 0.7665692897571663, + "grad_norm": 0.4716794192790985, + "learning_rate": 4.805370246341141e-06, + "loss": 0.6011, + "step": 8397 + }, + { + "epoch": 0.7666605806098229, + "grad_norm": 0.46849870681762695, + "learning_rate": 4.805323946447007e-06, + "loss": 0.5752, + "step": 8398 + }, + { + "epoch": 0.7667518714624795, + "grad_norm": 0.4882155656814575, + "learning_rate": 4.80527764126956e-06, + "loss": 0.5691, + "step": 8399 + }, + { + "epoch": 0.766843162315136, + "grad_norm": 0.5003937482833862, + "learning_rate": 4.805231330808906e-06, + "loss": 0.5962, + "step": 8400 + }, + { + "epoch": 0.7669344531677926, + "grad_norm": 0.45256561040878296, + "learning_rate": 4.805185015065153e-06, + "loss": 0.5515, + "step": 8401 + }, + { + "epoch": 0.7670257440204491, + "grad_norm": 0.4895772635936737, + "learning_rate": 4.8051386940384035e-06, + "loss": 0.5457, + "step": 8402 + }, + { + "epoch": 0.7671170348731057, + "grad_norm": 0.4836488664150238, + "learning_rate": 4.8050923677287676e-06, + "loss": 0.5541, + "step": 8403 + }, + { + "epoch": 0.7672083257257623, + "grad_norm": 0.4514791965484619, + "learning_rate": 4.80504603613635e-06, + "loss": 0.5853, + "step": 8404 + }, + { + "epoch": 0.7672996165784188, + "grad_norm": 0.4723413288593292, + "learning_rate": 4.804999699261256e-06, + "loss": 0.5551, + "step": 8405 + }, + { + "epoch": 0.7673909074310754, + "grad_norm": 0.48882922530174255, + "learning_rate": 4.804953357103592e-06, + "loss": 0.5559, + "step": 8406 + }, + { + "epoch": 0.767482198283732, + "grad_norm": 0.459099680185318, + "learning_rate": 4.8049070096634655e-06, + "loss": 0.5462, + "step": 8407 + }, + { + "epoch": 0.7675734891363886, + "grad_norm": 0.4799515902996063, + "learning_rate": 4.8048606569409816e-06, + "loss": 0.5436, + "step": 8408 + }, + { + "epoch": 0.7676647799890451, + "grad_norm": 0.4722582697868347, + "learning_rate": 4.804814298936247e-06, + "loss": 0.5885, + "step": 8409 + }, + { + "epoch": 0.7677560708417016, + "grad_norm": 0.4676372706890106, + "learning_rate": 4.804767935649367e-06, + "loss": 0.5902, + "step": 8410 + }, + { + "epoch": 0.7678473616943582, + "grad_norm": 0.46280306577682495, + "learning_rate": 4.804721567080449e-06, + "loss": 0.5809, + "step": 8411 + }, + { + "epoch": 0.7679386525470148, + "grad_norm": 0.4823615252971649, + "learning_rate": 4.8046751932296e-06, + "loss": 0.5793, + "step": 8412 + }, + { + "epoch": 0.7680299433996713, + "grad_norm": 0.4954645037651062, + "learning_rate": 4.8046288140969245e-06, + "loss": 0.5434, + "step": 8413 + }, + { + "epoch": 0.7681212342523279, + "grad_norm": 0.4401443898677826, + "learning_rate": 4.804582429682529e-06, + "loss": 0.5667, + "step": 8414 + }, + { + "epoch": 0.7682125251049845, + "grad_norm": 0.4904170036315918, + "learning_rate": 4.804536039986521e-06, + "loss": 0.5073, + "step": 8415 + }, + { + "epoch": 0.7683038159576411, + "grad_norm": 0.49507397413253784, + "learning_rate": 4.804489645009006e-06, + "loss": 0.5052, + "step": 8416 + }, + { + "epoch": 0.7683951068102977, + "grad_norm": 0.5122511386871338, + "learning_rate": 4.80444324475009e-06, + "loss": 0.5232, + "step": 8417 + }, + { + "epoch": 0.7684863976629541, + "grad_norm": 0.5199733376502991, + "learning_rate": 4.80439683920988e-06, + "loss": 0.5103, + "step": 8418 + }, + { + "epoch": 0.7685776885156107, + "grad_norm": 0.4849315881729126, + "learning_rate": 4.804350428388481e-06, + "loss": 0.5614, + "step": 8419 + }, + { + "epoch": 0.7686689793682673, + "grad_norm": 0.4640842080116272, + "learning_rate": 4.804304012286002e-06, + "loss": 0.5699, + "step": 8420 + }, + { + "epoch": 0.7687602702209239, + "grad_norm": 0.4513789415359497, + "learning_rate": 4.804257590902547e-06, + "loss": 0.5846, + "step": 8421 + }, + { + "epoch": 0.7688515610735804, + "grad_norm": 0.4933246672153473, + "learning_rate": 4.804211164238224e-06, + "loss": 0.5519, + "step": 8422 + }, + { + "epoch": 0.768942851926237, + "grad_norm": 0.509556770324707, + "learning_rate": 4.804164732293139e-06, + "loss": 0.5335, + "step": 8423 + }, + { + "epoch": 0.7690341427788936, + "grad_norm": 0.46815019845962524, + "learning_rate": 4.804118295067396e-06, + "loss": 0.5538, + "step": 8424 + }, + { + "epoch": 0.7691254336315502, + "grad_norm": 0.4695843756198883, + "learning_rate": 4.804071852561105e-06, + "loss": 0.5597, + "step": 8425 + }, + { + "epoch": 0.7692167244842066, + "grad_norm": 0.4597708582878113, + "learning_rate": 4.8040254047743706e-06, + "loss": 0.5709, + "step": 8426 + }, + { + "epoch": 0.7693080153368632, + "grad_norm": 0.4695672392845154, + "learning_rate": 4.803978951707299e-06, + "loss": 0.5254, + "step": 8427 + }, + { + "epoch": 0.7693993061895198, + "grad_norm": 0.4851716458797455, + "learning_rate": 4.803932493359998e-06, + "loss": 0.5635, + "step": 8428 + }, + { + "epoch": 0.7694905970421764, + "grad_norm": 0.5383784770965576, + "learning_rate": 4.803886029732573e-06, + "loss": 0.4962, + "step": 8429 + }, + { + "epoch": 0.769581887894833, + "grad_norm": 0.4535285234451294, + "learning_rate": 4.803839560825131e-06, + "loss": 0.5849, + "step": 8430 + }, + { + "epoch": 0.7696731787474895, + "grad_norm": 0.4439583718776703, + "learning_rate": 4.803793086637778e-06, + "loss": 0.5784, + "step": 8431 + }, + { + "epoch": 0.7697644696001461, + "grad_norm": 0.5751417875289917, + "learning_rate": 4.803746607170621e-06, + "loss": 0.6046, + "step": 8432 + }, + { + "epoch": 0.7698557604528027, + "grad_norm": 0.5019198060035706, + "learning_rate": 4.803700122423766e-06, + "loss": 0.5511, + "step": 8433 + }, + { + "epoch": 0.7699470513054592, + "grad_norm": 0.4586998522281647, + "learning_rate": 4.80365363239732e-06, + "loss": 0.5164, + "step": 8434 + }, + { + "epoch": 0.7700383421581157, + "grad_norm": 0.4821791648864746, + "learning_rate": 4.80360713709139e-06, + "loss": 0.5415, + "step": 8435 + }, + { + "epoch": 0.7701296330107723, + "grad_norm": 0.4455914795398712, + "learning_rate": 4.80356063650608e-06, + "loss": 0.5904, + "step": 8436 + }, + { + "epoch": 0.7702209238634289, + "grad_norm": 0.4445124566555023, + "learning_rate": 4.8035141306415005e-06, + "loss": 0.5853, + "step": 8437 + }, + { + "epoch": 0.7703122147160855, + "grad_norm": 0.47661292552948, + "learning_rate": 4.803467619497756e-06, + "loss": 0.5146, + "step": 8438 + }, + { + "epoch": 0.770403505568742, + "grad_norm": 0.5458090305328369, + "learning_rate": 4.803421103074952e-06, + "loss": 0.5274, + "step": 8439 + }, + { + "epoch": 0.7704947964213986, + "grad_norm": 0.47140276432037354, + "learning_rate": 4.803374581373197e-06, + "loss": 0.5661, + "step": 8440 + }, + { + "epoch": 0.7705860872740551, + "grad_norm": 0.46101951599121094, + "learning_rate": 4.803328054392596e-06, + "loss": 0.5772, + "step": 8441 + }, + { + "epoch": 0.7706773781267117, + "grad_norm": 0.49851709604263306, + "learning_rate": 4.803281522133258e-06, + "loss": 0.5478, + "step": 8442 + }, + { + "epoch": 0.7707686689793682, + "grad_norm": 0.4679166078567505, + "learning_rate": 4.803234984595288e-06, + "loss": 0.5735, + "step": 8443 + }, + { + "epoch": 0.7708599598320248, + "grad_norm": 0.47837164998054504, + "learning_rate": 4.803188441778792e-06, + "loss": 0.5352, + "step": 8444 + }, + { + "epoch": 0.7709512506846814, + "grad_norm": 0.4784557521343231, + "learning_rate": 4.8031418936838775e-06, + "loss": 0.4874, + "step": 8445 + }, + { + "epoch": 0.771042541537338, + "grad_norm": 0.5111079216003418, + "learning_rate": 4.803095340310652e-06, + "loss": 0.5494, + "step": 8446 + }, + { + "epoch": 0.7711338323899946, + "grad_norm": 0.4842674434185028, + "learning_rate": 4.803048781659221e-06, + "loss": 0.5352, + "step": 8447 + }, + { + "epoch": 0.7712251232426511, + "grad_norm": 0.47488996386528015, + "learning_rate": 4.803002217729691e-06, + "loss": 0.553, + "step": 8448 + }, + { + "epoch": 0.7713164140953076, + "grad_norm": 0.4899521470069885, + "learning_rate": 4.802955648522169e-06, + "loss": 0.5472, + "step": 8449 + }, + { + "epoch": 0.7714077049479642, + "grad_norm": 0.47574225068092346, + "learning_rate": 4.802909074036764e-06, + "loss": 0.5905, + "step": 8450 + }, + { + "epoch": 0.7714989958006208, + "grad_norm": 0.49699294567108154, + "learning_rate": 4.802862494273579e-06, + "loss": 0.5776, + "step": 8451 + }, + { + "epoch": 0.7715902866532773, + "grad_norm": 0.45927509665489197, + "learning_rate": 4.802815909232722e-06, + "loss": 0.5657, + "step": 8452 + }, + { + "epoch": 0.7716815775059339, + "grad_norm": 0.4730685353279114, + "learning_rate": 4.8027693189143025e-06, + "loss": 0.5582, + "step": 8453 + }, + { + "epoch": 0.7717728683585905, + "grad_norm": 0.5023016333580017, + "learning_rate": 4.8027227233184235e-06, + "loss": 0.5612, + "step": 8454 + }, + { + "epoch": 0.7718641592112471, + "grad_norm": 0.4949773848056793, + "learning_rate": 4.802676122445193e-06, + "loss": 0.549, + "step": 8455 + }, + { + "epoch": 0.7719554500639036, + "grad_norm": 0.4513704478740692, + "learning_rate": 4.802629516294719e-06, + "loss": 0.5748, + "step": 8456 + }, + { + "epoch": 0.7720467409165601, + "grad_norm": 0.4667225778102875, + "learning_rate": 4.802582904867107e-06, + "loss": 0.545, + "step": 8457 + }, + { + "epoch": 0.7721380317692167, + "grad_norm": 0.46786609292030334, + "learning_rate": 4.802536288162464e-06, + "loss": 0.5994, + "step": 8458 + }, + { + "epoch": 0.7722293226218733, + "grad_norm": 0.49342092871665955, + "learning_rate": 4.802489666180898e-06, + "loss": 0.5384, + "step": 8459 + }, + { + "epoch": 0.7723206134745298, + "grad_norm": 0.49908798933029175, + "learning_rate": 4.802443038922514e-06, + "loss": 0.5187, + "step": 8460 + }, + { + "epoch": 0.7724119043271864, + "grad_norm": 0.47866290807724, + "learning_rate": 4.802396406387421e-06, + "loss": 0.5563, + "step": 8461 + }, + { + "epoch": 0.772503195179843, + "grad_norm": 0.4889097511768341, + "learning_rate": 4.802349768575724e-06, + "loss": 0.5314, + "step": 8462 + }, + { + "epoch": 0.7725944860324996, + "grad_norm": 0.5107873678207397, + "learning_rate": 4.802303125487529e-06, + "loss": 0.5445, + "step": 8463 + }, + { + "epoch": 0.7726857768851562, + "grad_norm": 0.5069677233695984, + "learning_rate": 4.802256477122947e-06, + "loss": 0.5741, + "step": 8464 + }, + { + "epoch": 0.7727770677378126, + "grad_norm": 0.47876664996147156, + "learning_rate": 4.802209823482081e-06, + "loss": 0.5438, + "step": 8465 + }, + { + "epoch": 0.7728683585904692, + "grad_norm": 0.4878784120082855, + "learning_rate": 4.80216316456504e-06, + "loss": 0.5503, + "step": 8466 + }, + { + "epoch": 0.7729596494431258, + "grad_norm": 0.47643181681632996, + "learning_rate": 4.802116500371929e-06, + "loss": 0.54, + "step": 8467 + }, + { + "epoch": 0.7730509402957824, + "grad_norm": 0.4908931255340576, + "learning_rate": 4.802069830902857e-06, + "loss": 0.5342, + "step": 8468 + }, + { + "epoch": 0.7731422311484389, + "grad_norm": 0.4872141182422638, + "learning_rate": 4.80202315615793e-06, + "loss": 0.5093, + "step": 8469 + }, + { + "epoch": 0.7732335220010955, + "grad_norm": 0.4900573194026947, + "learning_rate": 4.8019764761372555e-06, + "loss": 0.5514, + "step": 8470 + }, + { + "epoch": 0.7733248128537521, + "grad_norm": 0.5119401216506958, + "learning_rate": 4.80192979084094e-06, + "loss": 0.526, + "step": 8471 + }, + { + "epoch": 0.7734161037064086, + "grad_norm": 0.4897003471851349, + "learning_rate": 4.8018831002690895e-06, + "loss": 0.5101, + "step": 8472 + }, + { + "epoch": 0.7735073945590651, + "grad_norm": 0.4911273419857025, + "learning_rate": 4.801836404421814e-06, + "loss": 0.518, + "step": 8473 + }, + { + "epoch": 0.7735986854117217, + "grad_norm": 0.4673028886318207, + "learning_rate": 4.801789703299217e-06, + "loss": 0.5631, + "step": 8474 + }, + { + "epoch": 0.7736899762643783, + "grad_norm": 0.5061215758323669, + "learning_rate": 4.801742996901407e-06, + "loss": 0.5674, + "step": 8475 + }, + { + "epoch": 0.7737812671170349, + "grad_norm": 0.485584557056427, + "learning_rate": 4.8016962852284915e-06, + "loss": 0.5911, + "step": 8476 + }, + { + "epoch": 0.7738725579696915, + "grad_norm": 0.47567740082740784, + "learning_rate": 4.801649568280577e-06, + "loss": 0.5754, + "step": 8477 + }, + { + "epoch": 0.773963848822348, + "grad_norm": 0.4874565303325653, + "learning_rate": 4.801602846057771e-06, + "loss": 0.5758, + "step": 8478 + }, + { + "epoch": 0.7740551396750046, + "grad_norm": 0.5140563249588013, + "learning_rate": 4.801556118560181e-06, + "loss": 0.5542, + "step": 8479 + }, + { + "epoch": 0.7741464305276611, + "grad_norm": 0.48532360792160034, + "learning_rate": 4.801509385787913e-06, + "loss": 0.5452, + "step": 8480 + }, + { + "epoch": 0.7742377213803177, + "grad_norm": 0.4588431715965271, + "learning_rate": 4.8014626477410744e-06, + "loss": 0.5614, + "step": 8481 + }, + { + "epoch": 0.7743290122329742, + "grad_norm": 0.4809887111186981, + "learning_rate": 4.801415904419773e-06, + "loss": 0.5353, + "step": 8482 + }, + { + "epoch": 0.7744203030856308, + "grad_norm": 0.494127482175827, + "learning_rate": 4.801369155824113e-06, + "loss": 0.5406, + "step": 8483 + }, + { + "epoch": 0.7745115939382874, + "grad_norm": 0.4887081980705261, + "learning_rate": 4.801322401954207e-06, + "loss": 0.5446, + "step": 8484 + }, + { + "epoch": 0.774602884790944, + "grad_norm": 0.4703602194786072, + "learning_rate": 4.801275642810158e-06, + "loss": 0.5456, + "step": 8485 + }, + { + "epoch": 0.7746941756436005, + "grad_norm": 0.4446831941604614, + "learning_rate": 4.8012288783920736e-06, + "loss": 0.5957, + "step": 8486 + }, + { + "epoch": 0.7747854664962571, + "grad_norm": 0.4713685214519501, + "learning_rate": 4.801182108700062e-06, + "loss": 0.5256, + "step": 8487 + }, + { + "epoch": 0.7748767573489136, + "grad_norm": 0.47545212507247925, + "learning_rate": 4.80113533373423e-06, + "loss": 0.5519, + "step": 8488 + }, + { + "epoch": 0.7749680482015702, + "grad_norm": 0.5058714151382446, + "learning_rate": 4.801088553494685e-06, + "loss": 0.5095, + "step": 8489 + }, + { + "epoch": 0.7750593390542267, + "grad_norm": 0.4894867241382599, + "learning_rate": 4.801041767981533e-06, + "loss": 0.5397, + "step": 8490 + }, + { + "epoch": 0.7751506299068833, + "grad_norm": 0.4891330599784851, + "learning_rate": 4.800994977194884e-06, + "loss": 0.5716, + "step": 8491 + }, + { + "epoch": 0.7752419207595399, + "grad_norm": 0.48538583517074585, + "learning_rate": 4.800948181134843e-06, + "loss": 0.5624, + "step": 8492 + }, + { + "epoch": 0.7753332116121965, + "grad_norm": 0.4592862129211426, + "learning_rate": 4.800901379801517e-06, + "loss": 0.5501, + "step": 8493 + }, + { + "epoch": 0.7754245024648531, + "grad_norm": 0.47167840600013733, + "learning_rate": 4.8008545731950144e-06, + "loss": 0.5786, + "step": 8494 + }, + { + "epoch": 0.7755157933175096, + "grad_norm": 0.49123093485832214, + "learning_rate": 4.800807761315441e-06, + "loss": 0.5464, + "step": 8495 + }, + { + "epoch": 0.7756070841701661, + "grad_norm": 0.47401854395866394, + "learning_rate": 4.800760944162906e-06, + "loss": 0.5556, + "step": 8496 + }, + { + "epoch": 0.7756983750228227, + "grad_norm": 0.47532904148101807, + "learning_rate": 4.800714121737515e-06, + "loss": 0.5377, + "step": 8497 + }, + { + "epoch": 0.7757896658754793, + "grad_norm": 0.5044037699699402, + "learning_rate": 4.8006672940393775e-06, + "loss": 0.5183, + "step": 8498 + }, + { + "epoch": 0.7758809567281358, + "grad_norm": 0.4774356484413147, + "learning_rate": 4.8006204610685985e-06, + "loss": 0.5434, + "step": 8499 + }, + { + "epoch": 0.7759722475807924, + "grad_norm": 0.5000052452087402, + "learning_rate": 4.800573622825287e-06, + "loss": 0.5407, + "step": 8500 + }, + { + "epoch": 0.776063538433449, + "grad_norm": 0.4931187033653259, + "learning_rate": 4.800526779309549e-06, + "loss": 0.5323, + "step": 8501 + }, + { + "epoch": 0.7761548292861056, + "grad_norm": 0.49530521035194397, + "learning_rate": 4.800479930521492e-06, + "loss": 0.5839, + "step": 8502 + }, + { + "epoch": 0.776246120138762, + "grad_norm": 0.517794668674469, + "learning_rate": 4.800433076461225e-06, + "loss": 0.5194, + "step": 8503 + }, + { + "epoch": 0.7763374109914186, + "grad_norm": 0.4835329055786133, + "learning_rate": 4.800386217128853e-06, + "loss": 0.5098, + "step": 8504 + }, + { + "epoch": 0.7764287018440752, + "grad_norm": 0.4555058479309082, + "learning_rate": 4.800339352524485e-06, + "loss": 0.5633, + "step": 8505 + }, + { + "epoch": 0.7765199926967318, + "grad_norm": 0.5051475763320923, + "learning_rate": 4.8002924826482284e-06, + "loss": 0.5279, + "step": 8506 + }, + { + "epoch": 0.7766112835493884, + "grad_norm": 0.502088189125061, + "learning_rate": 4.80024560750019e-06, + "loss": 0.5159, + "step": 8507 + }, + { + "epoch": 0.7767025744020449, + "grad_norm": 0.4815809428691864, + "learning_rate": 4.800198727080477e-06, + "loss": 0.5608, + "step": 8508 + }, + { + "epoch": 0.7767938652547015, + "grad_norm": 0.47117096185684204, + "learning_rate": 4.8001518413891976e-06, + "loss": 0.5573, + "step": 8509 + }, + { + "epoch": 0.7768851561073581, + "grad_norm": 0.4776866137981415, + "learning_rate": 4.8001049504264594e-06, + "loss": 0.5468, + "step": 8510 + }, + { + "epoch": 0.7769764469600146, + "grad_norm": 0.5053306221961975, + "learning_rate": 4.800058054192369e-06, + "loss": 0.486, + "step": 8511 + }, + { + "epoch": 0.7770677378126711, + "grad_norm": 0.48697975277900696, + "learning_rate": 4.800011152687035e-06, + "loss": 0.5547, + "step": 8512 + }, + { + "epoch": 0.7771590286653277, + "grad_norm": 0.48940208554267883, + "learning_rate": 4.799964245910563e-06, + "loss": 0.5912, + "step": 8513 + }, + { + "epoch": 0.7772503195179843, + "grad_norm": 0.47632384300231934, + "learning_rate": 4.799917333863062e-06, + "loss": 0.5342, + "step": 8514 + }, + { + "epoch": 0.7773416103706409, + "grad_norm": 0.4882148206233978, + "learning_rate": 4.7998704165446395e-06, + "loss": 0.5244, + "step": 8515 + }, + { + "epoch": 0.7774329012232974, + "grad_norm": 0.49855712056159973, + "learning_rate": 4.799823493955403e-06, + "loss": 0.5365, + "step": 8516 + }, + { + "epoch": 0.777524192075954, + "grad_norm": 0.47467976808547974, + "learning_rate": 4.799776566095459e-06, + "loss": 0.5688, + "step": 8517 + }, + { + "epoch": 0.7776154829286106, + "grad_norm": 0.49330854415893555, + "learning_rate": 4.799729632964916e-06, + "loss": 0.5724, + "step": 8518 + }, + { + "epoch": 0.7777067737812671, + "grad_norm": 0.4744138717651367, + "learning_rate": 4.799682694563882e-06, + "loss": 0.5684, + "step": 8519 + }, + { + "epoch": 0.7777980646339236, + "grad_norm": 0.5071582198143005, + "learning_rate": 4.799635750892463e-06, + "loss": 0.5473, + "step": 8520 + }, + { + "epoch": 0.7778893554865802, + "grad_norm": 0.5045033097267151, + "learning_rate": 4.799588801950768e-06, + "loss": 0.5337, + "step": 8521 + }, + { + "epoch": 0.7779806463392368, + "grad_norm": 0.46868032217025757, + "learning_rate": 4.7995418477389035e-06, + "loss": 0.55, + "step": 8522 + }, + { + "epoch": 0.7780719371918934, + "grad_norm": 0.47323355078697205, + "learning_rate": 4.799494888256978e-06, + "loss": 0.5456, + "step": 8523 + }, + { + "epoch": 0.77816322804455, + "grad_norm": 0.4628780484199524, + "learning_rate": 4.7994479235051e-06, + "loss": 0.5671, + "step": 8524 + }, + { + "epoch": 0.7782545188972065, + "grad_norm": 0.4694605767726898, + "learning_rate": 4.799400953483375e-06, + "loss": 0.5479, + "step": 8525 + }, + { + "epoch": 0.7783458097498631, + "grad_norm": 0.4507406949996948, + "learning_rate": 4.799353978191911e-06, + "loss": 0.5531, + "step": 8526 + }, + { + "epoch": 0.7784371006025196, + "grad_norm": 0.46405231952667236, + "learning_rate": 4.7993069976308174e-06, + "loss": 0.5584, + "step": 8527 + }, + { + "epoch": 0.7785283914551762, + "grad_norm": 0.511735200881958, + "learning_rate": 4.7992600118001995e-06, + "loss": 0.528, + "step": 8528 + }, + { + "epoch": 0.7786196823078327, + "grad_norm": 0.46809372305870056, + "learning_rate": 4.7992130207001675e-06, + "loss": 0.5807, + "step": 8529 + }, + { + "epoch": 0.7787109731604893, + "grad_norm": 0.45167672634124756, + "learning_rate": 4.7991660243308275e-06, + "loss": 0.5714, + "step": 8530 + }, + { + "epoch": 0.7788022640131459, + "grad_norm": 0.45998308062553406, + "learning_rate": 4.799119022692288e-06, + "loss": 0.5982, + "step": 8531 + }, + { + "epoch": 0.7788935548658025, + "grad_norm": 0.5010191798210144, + "learning_rate": 4.799072015784655e-06, + "loss": 0.5089, + "step": 8532 + }, + { + "epoch": 0.778984845718459, + "grad_norm": 0.5044962167739868, + "learning_rate": 4.799025003608038e-06, + "loss": 0.5354, + "step": 8533 + }, + { + "epoch": 0.7790761365711156, + "grad_norm": 0.4894386827945709, + "learning_rate": 4.7989779861625445e-06, + "loss": 0.5463, + "step": 8534 + }, + { + "epoch": 0.7791674274237721, + "grad_norm": 0.5070360898971558, + "learning_rate": 4.798930963448281e-06, + "loss": 0.5188, + "step": 8535 + }, + { + "epoch": 0.7792587182764287, + "grad_norm": 0.48148271441459656, + "learning_rate": 4.798883935465357e-06, + "loss": 0.5397, + "step": 8536 + }, + { + "epoch": 0.7793500091290853, + "grad_norm": 0.4640694558620453, + "learning_rate": 4.798836902213879e-06, + "loss": 0.5303, + "step": 8537 + }, + { + "epoch": 0.7794412999817418, + "grad_norm": 0.47119638323783875, + "learning_rate": 4.798789863693957e-06, + "loss": 0.5501, + "step": 8538 + }, + { + "epoch": 0.7795325908343984, + "grad_norm": 0.46155670285224915, + "learning_rate": 4.798742819905695e-06, + "loss": 0.5296, + "step": 8539 + }, + { + "epoch": 0.779623881687055, + "grad_norm": 0.4510000944137573, + "learning_rate": 4.798695770849204e-06, + "loss": 0.5724, + "step": 8540 + }, + { + "epoch": 0.7797151725397116, + "grad_norm": 0.49539655447006226, + "learning_rate": 4.798648716524591e-06, + "loss": 0.5465, + "step": 8541 + }, + { + "epoch": 0.779806463392368, + "grad_norm": 0.4714738726615906, + "learning_rate": 4.798601656931963e-06, + "loss": 0.5597, + "step": 8542 + }, + { + "epoch": 0.7798977542450246, + "grad_norm": 0.5107331275939941, + "learning_rate": 4.798554592071428e-06, + "loss": 0.5796, + "step": 8543 + }, + { + "epoch": 0.7799890450976812, + "grad_norm": 0.4945402145385742, + "learning_rate": 4.7985075219430945e-06, + "loss": 0.5301, + "step": 8544 + }, + { + "epoch": 0.7800803359503378, + "grad_norm": 0.4775303602218628, + "learning_rate": 4.79846044654707e-06, + "loss": 0.6023, + "step": 8545 + }, + { + "epoch": 0.7801716268029943, + "grad_norm": 0.47877737879753113, + "learning_rate": 4.7984133658834644e-06, + "loss": 0.5724, + "step": 8546 + }, + { + "epoch": 0.7802629176556509, + "grad_norm": 0.4578505754470825, + "learning_rate": 4.7983662799523825e-06, + "loss": 0.5423, + "step": 8547 + }, + { + "epoch": 0.7803542085083075, + "grad_norm": 0.4779501259326935, + "learning_rate": 4.798319188753933e-06, + "loss": 0.5373, + "step": 8548 + }, + { + "epoch": 0.7804454993609641, + "grad_norm": 0.4353107810020447, + "learning_rate": 4.798272092288224e-06, + "loss": 0.5668, + "step": 8549 + }, + { + "epoch": 0.7805367902136205, + "grad_norm": 0.47819793224334717, + "learning_rate": 4.798224990555364e-06, + "loss": 0.526, + "step": 8550 + }, + { + "epoch": 0.7806280810662771, + "grad_norm": 0.4811854362487793, + "learning_rate": 4.798177883555462e-06, + "loss": 0.5506, + "step": 8551 + }, + { + "epoch": 0.7807193719189337, + "grad_norm": 0.4494722783565521, + "learning_rate": 4.798130771288624e-06, + "loss": 0.5872, + "step": 8552 + }, + { + "epoch": 0.7808106627715903, + "grad_norm": 0.47071364521980286, + "learning_rate": 4.798083653754958e-06, + "loss": 0.531, + "step": 8553 + }, + { + "epoch": 0.7809019536242469, + "grad_norm": 0.4779714345932007, + "learning_rate": 4.798036530954573e-06, + "loss": 0.5828, + "step": 8554 + }, + { + "epoch": 0.7809932444769034, + "grad_norm": 0.5057454109191895, + "learning_rate": 4.797989402887576e-06, + "loss": 0.5279, + "step": 8555 + }, + { + "epoch": 0.78108453532956, + "grad_norm": 0.4976443350315094, + "learning_rate": 4.797942269554076e-06, + "loss": 0.5739, + "step": 8556 + }, + { + "epoch": 0.7811758261822166, + "grad_norm": 0.4800550937652588, + "learning_rate": 4.797895130954181e-06, + "loss": 0.531, + "step": 8557 + }, + { + "epoch": 0.7812671170348731, + "grad_norm": 0.4471558928489685, + "learning_rate": 4.797847987087998e-06, + "loss": 0.6173, + "step": 8558 + }, + { + "epoch": 0.7813584078875296, + "grad_norm": 0.4621239900588989, + "learning_rate": 4.7978008379556366e-06, + "loss": 0.5632, + "step": 8559 + }, + { + "epoch": 0.7814496987401862, + "grad_norm": 0.46322327852249146, + "learning_rate": 4.797753683557203e-06, + "loss": 0.5381, + "step": 8560 + }, + { + "epoch": 0.7815409895928428, + "grad_norm": 0.48618564009666443, + "learning_rate": 4.7977065238928065e-06, + "loss": 0.5655, + "step": 8561 + }, + { + "epoch": 0.7816322804454994, + "grad_norm": 0.4870151877403259, + "learning_rate": 4.797659358962554e-06, + "loss": 0.5489, + "step": 8562 + }, + { + "epoch": 0.781723571298156, + "grad_norm": 0.45773404836654663, + "learning_rate": 4.797612188766556e-06, + "loss": 0.5619, + "step": 8563 + }, + { + "epoch": 0.7818148621508125, + "grad_norm": 0.4458918869495392, + "learning_rate": 4.797565013304918e-06, + "loss": 0.5776, + "step": 8564 + }, + { + "epoch": 0.7819061530034691, + "grad_norm": 0.46502238512039185, + "learning_rate": 4.79751783257775e-06, + "loss": 0.5716, + "step": 8565 + }, + { + "epoch": 0.7819974438561256, + "grad_norm": 0.49470055103302, + "learning_rate": 4.797470646585159e-06, + "loss": 0.5326, + "step": 8566 + }, + { + "epoch": 0.7820887347087822, + "grad_norm": 0.48592233657836914, + "learning_rate": 4.797423455327254e-06, + "loss": 0.577, + "step": 8567 + }, + { + "epoch": 0.7821800255614387, + "grad_norm": 0.4767446219921112, + "learning_rate": 4.797376258804142e-06, + "loss": 0.5615, + "step": 8568 + }, + { + "epoch": 0.7822713164140953, + "grad_norm": 0.48352721333503723, + "learning_rate": 4.7973290570159315e-06, + "loss": 0.5559, + "step": 8569 + }, + { + "epoch": 0.7823626072667519, + "grad_norm": 0.44265052676200867, + "learning_rate": 4.797281849962731e-06, + "loss": 0.5344, + "step": 8570 + }, + { + "epoch": 0.7824538981194085, + "grad_norm": 0.46556931734085083, + "learning_rate": 4.7972346376446486e-06, + "loss": 0.5363, + "step": 8571 + }, + { + "epoch": 0.782545188972065, + "grad_norm": 0.4946458637714386, + "learning_rate": 4.797187420061794e-06, + "loss": 0.5744, + "step": 8572 + }, + { + "epoch": 0.7826364798247215, + "grad_norm": 0.47389882802963257, + "learning_rate": 4.7971401972142724e-06, + "loss": 0.552, + "step": 8573 + }, + { + "epoch": 0.7827277706773781, + "grad_norm": 0.4833245873451233, + "learning_rate": 4.797092969102194e-06, + "loss": 0.5437, + "step": 8574 + }, + { + "epoch": 0.7828190615300347, + "grad_norm": 0.4813062250614166, + "learning_rate": 4.797045735725667e-06, + "loss": 0.5817, + "step": 8575 + }, + { + "epoch": 0.7829103523826912, + "grad_norm": 0.4490475058555603, + "learning_rate": 4.796998497084798e-06, + "loss": 0.5636, + "step": 8576 + }, + { + "epoch": 0.7830016432353478, + "grad_norm": 0.4532967209815979, + "learning_rate": 4.796951253179698e-06, + "loss": 0.6102, + "step": 8577 + }, + { + "epoch": 0.7830929340880044, + "grad_norm": 0.48643165826797485, + "learning_rate": 4.796904004010473e-06, + "loss": 0.5304, + "step": 8578 + }, + { + "epoch": 0.783184224940661, + "grad_norm": 0.4970118999481201, + "learning_rate": 4.796856749577233e-06, + "loss": 0.5826, + "step": 8579 + }, + { + "epoch": 0.7832755157933176, + "grad_norm": 0.5149760246276855, + "learning_rate": 4.7968094898800846e-06, + "loss": 0.5291, + "step": 8580 + }, + { + "epoch": 0.783366806645974, + "grad_norm": 0.4968699812889099, + "learning_rate": 4.796762224919137e-06, + "loss": 0.5417, + "step": 8581 + }, + { + "epoch": 0.7834580974986306, + "grad_norm": 0.4543778896331787, + "learning_rate": 4.796714954694498e-06, + "loss": 0.5519, + "step": 8582 + }, + { + "epoch": 0.7835493883512872, + "grad_norm": 0.48162177205085754, + "learning_rate": 4.7966676792062774e-06, + "loss": 0.553, + "step": 8583 + }, + { + "epoch": 0.7836406792039438, + "grad_norm": 0.46672481298446655, + "learning_rate": 4.796620398454582e-06, + "loss": 0.5959, + "step": 8584 + }, + { + "epoch": 0.7837319700566003, + "grad_norm": 0.4985421597957611, + "learning_rate": 4.796573112439521e-06, + "loss": 0.5747, + "step": 8585 + }, + { + "epoch": 0.7838232609092569, + "grad_norm": 0.47557908296585083, + "learning_rate": 4.796525821161202e-06, + "loss": 0.5377, + "step": 8586 + }, + { + "epoch": 0.7839145517619135, + "grad_norm": 0.48408013582229614, + "learning_rate": 4.7964785246197335e-06, + "loss": 0.5936, + "step": 8587 + }, + { + "epoch": 0.7840058426145701, + "grad_norm": 0.4959332346916199, + "learning_rate": 4.796431222815225e-06, + "loss": 0.5173, + "step": 8588 + }, + { + "epoch": 0.7840971334672265, + "grad_norm": 0.4631623923778534, + "learning_rate": 4.796383915747783e-06, + "loss": 0.6027, + "step": 8589 + }, + { + "epoch": 0.7841884243198831, + "grad_norm": 0.4567594826221466, + "learning_rate": 4.796336603417518e-06, + "loss": 0.5595, + "step": 8590 + }, + { + "epoch": 0.7842797151725397, + "grad_norm": 0.519442617893219, + "learning_rate": 4.796289285824537e-06, + "loss": 0.5267, + "step": 8591 + }, + { + "epoch": 0.7843710060251963, + "grad_norm": 0.4499680697917938, + "learning_rate": 4.796241962968949e-06, + "loss": 0.5972, + "step": 8592 + }, + { + "epoch": 0.7844622968778528, + "grad_norm": 0.4836391508579254, + "learning_rate": 4.796194634850863e-06, + "loss": 0.5721, + "step": 8593 + }, + { + "epoch": 0.7845535877305094, + "grad_norm": 0.4724508821964264, + "learning_rate": 4.7961473014703854e-06, + "loss": 0.5296, + "step": 8594 + }, + { + "epoch": 0.784644878583166, + "grad_norm": 0.4758210778236389, + "learning_rate": 4.796099962827628e-06, + "loss": 0.5455, + "step": 8595 + }, + { + "epoch": 0.7847361694358226, + "grad_norm": 0.49397799372673035, + "learning_rate": 4.796052618922695e-06, + "loss": 0.5347, + "step": 8596 + }, + { + "epoch": 0.784827460288479, + "grad_norm": 0.4604182541370392, + "learning_rate": 4.796005269755699e-06, + "loss": 0.5573, + "step": 8597 + }, + { + "epoch": 0.7849187511411356, + "grad_norm": 0.49199286103248596, + "learning_rate": 4.795957915326747e-06, + "loss": 0.5068, + "step": 8598 + }, + { + "epoch": 0.7850100419937922, + "grad_norm": 0.506045401096344, + "learning_rate": 4.795910555635946e-06, + "loss": 0.5589, + "step": 8599 + }, + { + "epoch": 0.7851013328464488, + "grad_norm": 0.4867546856403351, + "learning_rate": 4.795863190683406e-06, + "loss": 0.5872, + "step": 8600 + }, + { + "epoch": 0.7851926236991054, + "grad_norm": 0.478255957365036, + "learning_rate": 4.795815820469236e-06, + "loss": 0.63, + "step": 8601 + }, + { + "epoch": 0.7852839145517619, + "grad_norm": 0.4841618537902832, + "learning_rate": 4.7957684449935444e-06, + "loss": 0.5459, + "step": 8602 + }, + { + "epoch": 0.7853752054044185, + "grad_norm": 0.4707881212234497, + "learning_rate": 4.795721064256439e-06, + "loss": 0.6196, + "step": 8603 + }, + { + "epoch": 0.785466496257075, + "grad_norm": 0.4802955985069275, + "learning_rate": 4.795673678258028e-06, + "loss": 0.56, + "step": 8604 + }, + { + "epoch": 0.7855577871097316, + "grad_norm": 0.4703930616378784, + "learning_rate": 4.795626286998422e-06, + "loss": 0.515, + "step": 8605 + }, + { + "epoch": 0.7856490779623881, + "grad_norm": 0.4830450415611267, + "learning_rate": 4.795578890477728e-06, + "loss": 0.5409, + "step": 8606 + }, + { + "epoch": 0.7857403688150447, + "grad_norm": 0.5133271217346191, + "learning_rate": 4.795531488696054e-06, + "loss": 0.5397, + "step": 8607 + }, + { + "epoch": 0.7858316596677013, + "grad_norm": 0.4659886360168457, + "learning_rate": 4.795484081653511e-06, + "loss": 0.5573, + "step": 8608 + }, + { + "epoch": 0.7859229505203579, + "grad_norm": 0.4684041142463684, + "learning_rate": 4.7954366693502056e-06, + "loss": 0.5306, + "step": 8609 + }, + { + "epoch": 0.7860142413730145, + "grad_norm": 0.47227177023887634, + "learning_rate": 4.795389251786246e-06, + "loss": 0.5426, + "step": 8610 + }, + { + "epoch": 0.786105532225671, + "grad_norm": 0.4790871739387512, + "learning_rate": 4.795341828961743e-06, + "loss": 0.5091, + "step": 8611 + }, + { + "epoch": 0.7861968230783275, + "grad_norm": 0.49841731786727905, + "learning_rate": 4.795294400876805e-06, + "loss": 0.5413, + "step": 8612 + }, + { + "epoch": 0.7862881139309841, + "grad_norm": 0.48044079542160034, + "learning_rate": 4.7952469675315395e-06, + "loss": 0.5444, + "step": 8613 + }, + { + "epoch": 0.7863794047836407, + "grad_norm": 0.4673338830471039, + "learning_rate": 4.795199528926055e-06, + "loss": 0.5201, + "step": 8614 + }, + { + "epoch": 0.7864706956362972, + "grad_norm": 0.46954798698425293, + "learning_rate": 4.795152085060461e-06, + "loss": 0.5737, + "step": 8615 + }, + { + "epoch": 0.7865619864889538, + "grad_norm": 0.4934476315975189, + "learning_rate": 4.795104635934867e-06, + "loss": 0.5551, + "step": 8616 + }, + { + "epoch": 0.7866532773416104, + "grad_norm": 0.48005977272987366, + "learning_rate": 4.79505718154938e-06, + "loss": 0.5641, + "step": 8617 + }, + { + "epoch": 0.786744568194267, + "grad_norm": 0.49519485235214233, + "learning_rate": 4.79500972190411e-06, + "loss": 0.5636, + "step": 8618 + }, + { + "epoch": 0.7868358590469235, + "grad_norm": 0.49156883358955383, + "learning_rate": 4.794962256999165e-06, + "loss": 0.5555, + "step": 8619 + }, + { + "epoch": 0.78692714989958, + "grad_norm": 0.49284058809280396, + "learning_rate": 4.794914786834655e-06, + "loss": 0.5424, + "step": 8620 + }, + { + "epoch": 0.7870184407522366, + "grad_norm": 0.4990266263484955, + "learning_rate": 4.794867311410687e-06, + "loss": 0.5269, + "step": 8621 + }, + { + "epoch": 0.7871097316048932, + "grad_norm": 0.5101802349090576, + "learning_rate": 4.794819830727371e-06, + "loss": 0.5477, + "step": 8622 + }, + { + "epoch": 0.7872010224575497, + "grad_norm": 0.48576563596725464, + "learning_rate": 4.794772344784816e-06, + "loss": 0.5644, + "step": 8623 + }, + { + "epoch": 0.7872923133102063, + "grad_norm": 0.480350136756897, + "learning_rate": 4.79472485358313e-06, + "loss": 0.5552, + "step": 8624 + }, + { + "epoch": 0.7873836041628629, + "grad_norm": 0.47805526852607727, + "learning_rate": 4.7946773571224225e-06, + "loss": 0.5542, + "step": 8625 + }, + { + "epoch": 0.7874748950155195, + "grad_norm": 0.48450711369514465, + "learning_rate": 4.794629855402802e-06, + "loss": 0.57, + "step": 8626 + }, + { + "epoch": 0.7875661858681761, + "grad_norm": 0.44273510575294495, + "learning_rate": 4.794582348424378e-06, + "loss": 0.5774, + "step": 8627 + }, + { + "epoch": 0.7876574767208325, + "grad_norm": 0.4821823239326477, + "learning_rate": 4.794534836187258e-06, + "loss": 0.5309, + "step": 8628 + }, + { + "epoch": 0.7877487675734891, + "grad_norm": 0.4822966456413269, + "learning_rate": 4.794487318691552e-06, + "loss": 0.5403, + "step": 8629 + }, + { + "epoch": 0.7878400584261457, + "grad_norm": 0.4933781623840332, + "learning_rate": 4.794439795937368e-06, + "loss": 0.5089, + "step": 8630 + }, + { + "epoch": 0.7879313492788023, + "grad_norm": 0.487761914730072, + "learning_rate": 4.794392267924817e-06, + "loss": 0.5406, + "step": 8631 + }, + { + "epoch": 0.7880226401314588, + "grad_norm": 0.4876389503479004, + "learning_rate": 4.794344734654005e-06, + "loss": 0.556, + "step": 8632 + }, + { + "epoch": 0.7881139309841154, + "grad_norm": 0.46226200461387634, + "learning_rate": 4.794297196125043e-06, + "loss": 0.5651, + "step": 8633 + }, + { + "epoch": 0.788205221836772, + "grad_norm": 0.45507657527923584, + "learning_rate": 4.79424965233804e-06, + "loss": 0.5703, + "step": 8634 + }, + { + "epoch": 0.7882965126894286, + "grad_norm": 0.4878300726413727, + "learning_rate": 4.794202103293103e-06, + "loss": 0.5338, + "step": 8635 + }, + { + "epoch": 0.788387803542085, + "grad_norm": 0.45908018946647644, + "learning_rate": 4.794154548990343e-06, + "loss": 0.5622, + "step": 8636 + }, + { + "epoch": 0.7884790943947416, + "grad_norm": 0.4876819849014282, + "learning_rate": 4.794106989429869e-06, + "loss": 0.5167, + "step": 8637 + }, + { + "epoch": 0.7885703852473982, + "grad_norm": 0.46200016140937805, + "learning_rate": 4.794059424611788e-06, + "loss": 0.5771, + "step": 8638 + }, + { + "epoch": 0.7886616761000548, + "grad_norm": 0.4845002591609955, + "learning_rate": 4.79401185453621e-06, + "loss": 0.5167, + "step": 8639 + }, + { + "epoch": 0.7887529669527114, + "grad_norm": 0.47215744853019714, + "learning_rate": 4.793964279203246e-06, + "loss": 0.5634, + "step": 8640 + }, + { + "epoch": 0.7888442578053679, + "grad_norm": 0.4943224787712097, + "learning_rate": 4.793916698613001e-06, + "loss": 0.5085, + "step": 8641 + }, + { + "epoch": 0.7889355486580245, + "grad_norm": 0.4544547498226166, + "learning_rate": 4.793869112765589e-06, + "loss": 0.5457, + "step": 8642 + }, + { + "epoch": 0.789026839510681, + "grad_norm": 0.4573872983455658, + "learning_rate": 4.7938215216611145e-06, + "loss": 0.5534, + "step": 8643 + }, + { + "epoch": 0.7891181303633376, + "grad_norm": 0.4921853244304657, + "learning_rate": 4.793773925299689e-06, + "loss": 0.5519, + "step": 8644 + }, + { + "epoch": 0.7892094212159941, + "grad_norm": 0.4759393036365509, + "learning_rate": 4.793726323681421e-06, + "loss": 0.5446, + "step": 8645 + }, + { + "epoch": 0.7893007120686507, + "grad_norm": 0.5036045908927917, + "learning_rate": 4.79367871680642e-06, + "loss": 0.5534, + "step": 8646 + }, + { + "epoch": 0.7893920029213073, + "grad_norm": 0.46080562472343445, + "learning_rate": 4.793631104674795e-06, + "loss": 0.5745, + "step": 8647 + }, + { + "epoch": 0.7894832937739639, + "grad_norm": 0.4702524244785309, + "learning_rate": 4.793583487286654e-06, + "loss": 0.5415, + "step": 8648 + }, + { + "epoch": 0.7895745846266204, + "grad_norm": 0.4691113531589508, + "learning_rate": 4.793535864642107e-06, + "loss": 0.5856, + "step": 8649 + }, + { + "epoch": 0.789665875479277, + "grad_norm": 0.48739925026893616, + "learning_rate": 4.793488236741264e-06, + "loss": 0.5756, + "step": 8650 + }, + { + "epoch": 0.7897571663319335, + "grad_norm": 0.5025147199630737, + "learning_rate": 4.793440603584233e-06, + "loss": 0.5512, + "step": 8651 + }, + { + "epoch": 0.7898484571845901, + "grad_norm": 0.4959413409233093, + "learning_rate": 4.793392965171123e-06, + "loss": 0.5513, + "step": 8652 + }, + { + "epoch": 0.7899397480372466, + "grad_norm": 0.504814088344574, + "learning_rate": 4.793345321502044e-06, + "loss": 0.5601, + "step": 8653 + }, + { + "epoch": 0.7900310388899032, + "grad_norm": 0.4713726043701172, + "learning_rate": 4.793297672577105e-06, + "loss": 0.5229, + "step": 8654 + }, + { + "epoch": 0.7901223297425598, + "grad_norm": 0.4804232120513916, + "learning_rate": 4.793250018396416e-06, + "loss": 0.5581, + "step": 8655 + }, + { + "epoch": 0.7902136205952164, + "grad_norm": 0.46178656816482544, + "learning_rate": 4.7932023589600835e-06, + "loss": 0.5359, + "step": 8656 + }, + { + "epoch": 0.790304911447873, + "grad_norm": 0.47337839007377625, + "learning_rate": 4.793154694268219e-06, + "loss": 0.5613, + "step": 8657 + }, + { + "epoch": 0.7903962023005295, + "grad_norm": 0.493126779794693, + "learning_rate": 4.793107024320931e-06, + "loss": 0.5589, + "step": 8658 + }, + { + "epoch": 0.790487493153186, + "grad_norm": 0.4868144094944, + "learning_rate": 4.793059349118329e-06, + "loss": 0.5432, + "step": 8659 + }, + { + "epoch": 0.7905787840058426, + "grad_norm": 0.5096801519393921, + "learning_rate": 4.793011668660523e-06, + "loss": 0.5567, + "step": 8660 + }, + { + "epoch": 0.7906700748584992, + "grad_norm": 0.47107481956481934, + "learning_rate": 4.792963982947621e-06, + "loss": 0.5812, + "step": 8661 + }, + { + "epoch": 0.7907613657111557, + "grad_norm": 0.499024897813797, + "learning_rate": 4.792916291979733e-06, + "loss": 0.5685, + "step": 8662 + }, + { + "epoch": 0.7908526565638123, + "grad_norm": 0.48617812991142273, + "learning_rate": 4.792868595756968e-06, + "loss": 0.502, + "step": 8663 + }, + { + "epoch": 0.7909439474164689, + "grad_norm": 0.448546826839447, + "learning_rate": 4.792820894279435e-06, + "loss": 0.6088, + "step": 8664 + }, + { + "epoch": 0.7910352382691255, + "grad_norm": 0.4858374297618866, + "learning_rate": 4.792773187547244e-06, + "loss": 0.5302, + "step": 8665 + }, + { + "epoch": 0.791126529121782, + "grad_norm": 0.4853152334690094, + "learning_rate": 4.792725475560505e-06, + "loss": 0.5466, + "step": 8666 + }, + { + "epoch": 0.7912178199744385, + "grad_norm": 0.4560605585575104, + "learning_rate": 4.7926777583193245e-06, + "loss": 0.5913, + "step": 8667 + }, + { + "epoch": 0.7913091108270951, + "grad_norm": 0.5031903982162476, + "learning_rate": 4.792630035823815e-06, + "loss": 0.5481, + "step": 8668 + }, + { + "epoch": 0.7914004016797517, + "grad_norm": 0.4678965210914612, + "learning_rate": 4.792582308074084e-06, + "loss": 0.5823, + "step": 8669 + }, + { + "epoch": 0.7914916925324083, + "grad_norm": 0.4843676686286926, + "learning_rate": 4.792534575070242e-06, + "loss": 0.5024, + "step": 8670 + }, + { + "epoch": 0.7915829833850648, + "grad_norm": 0.49553245306015015, + "learning_rate": 4.792486836812398e-06, + "loss": 0.5839, + "step": 8671 + }, + { + "epoch": 0.7916742742377214, + "grad_norm": 0.4839991331100464, + "learning_rate": 4.792439093300661e-06, + "loss": 0.556, + "step": 8672 + }, + { + "epoch": 0.791765565090378, + "grad_norm": 0.5121210217475891, + "learning_rate": 4.79239134453514e-06, + "loss": 0.5286, + "step": 8673 + }, + { + "epoch": 0.7918568559430345, + "grad_norm": 0.4862041771411896, + "learning_rate": 4.792343590515946e-06, + "loss": 0.518, + "step": 8674 + }, + { + "epoch": 0.791948146795691, + "grad_norm": 0.49718913435935974, + "learning_rate": 4.792295831243188e-06, + "loss": 0.51, + "step": 8675 + }, + { + "epoch": 0.7920394376483476, + "grad_norm": 0.4589468836784363, + "learning_rate": 4.792248066716974e-06, + "loss": 0.582, + "step": 8676 + }, + { + "epoch": 0.7921307285010042, + "grad_norm": 0.509156346321106, + "learning_rate": 4.792200296937415e-06, + "loss": 0.5044, + "step": 8677 + }, + { + "epoch": 0.7922220193536608, + "grad_norm": 0.4861583113670349, + "learning_rate": 4.792152521904621e-06, + "loss": 0.6181, + "step": 8678 + }, + { + "epoch": 0.7923133102063173, + "grad_norm": 0.5027363300323486, + "learning_rate": 4.792104741618699e-06, + "loss": 0.5842, + "step": 8679 + }, + { + "epoch": 0.7924046010589739, + "grad_norm": 0.4679512083530426, + "learning_rate": 4.79205695607976e-06, + "loss": 0.5803, + "step": 8680 + }, + { + "epoch": 0.7924958919116305, + "grad_norm": 0.48120591044425964, + "learning_rate": 4.792009165287914e-06, + "loss": 0.5746, + "step": 8681 + }, + { + "epoch": 0.792587182764287, + "grad_norm": 0.49131840467453003, + "learning_rate": 4.7919613692432695e-06, + "loss": 0.581, + "step": 8682 + }, + { + "epoch": 0.7926784736169435, + "grad_norm": 0.49058985710144043, + "learning_rate": 4.791913567945937e-06, + "loss": 0.5112, + "step": 8683 + }, + { + "epoch": 0.7927697644696001, + "grad_norm": 0.45472463965415955, + "learning_rate": 4.7918657613960265e-06, + "loss": 0.5371, + "step": 8684 + }, + { + "epoch": 0.7928610553222567, + "grad_norm": 0.4993232190608978, + "learning_rate": 4.791817949593646e-06, + "loss": 0.5241, + "step": 8685 + }, + { + "epoch": 0.7929523461749133, + "grad_norm": 0.4813523590564728, + "learning_rate": 4.791770132538905e-06, + "loss": 0.556, + "step": 8686 + }, + { + "epoch": 0.7930436370275699, + "grad_norm": 0.4540644586086273, + "learning_rate": 4.791722310231914e-06, + "loss": 0.5504, + "step": 8687 + }, + { + "epoch": 0.7931349278802264, + "grad_norm": 0.47252151370048523, + "learning_rate": 4.791674482672784e-06, + "loss": 0.5268, + "step": 8688 + }, + { + "epoch": 0.793226218732883, + "grad_norm": 0.49685001373291016, + "learning_rate": 4.791626649861622e-06, + "loss": 0.5222, + "step": 8689 + }, + { + "epoch": 0.7933175095855395, + "grad_norm": 0.48470568656921387, + "learning_rate": 4.791578811798538e-06, + "loss": 0.5444, + "step": 8690 + }, + { + "epoch": 0.7934088004381961, + "grad_norm": 0.45394226908683777, + "learning_rate": 4.7915309684836426e-06, + "loss": 0.5672, + "step": 8691 + }, + { + "epoch": 0.7935000912908526, + "grad_norm": 0.4692838191986084, + "learning_rate": 4.791483119917045e-06, + "loss": 0.573, + "step": 8692 + }, + { + "epoch": 0.7935913821435092, + "grad_norm": 0.47113820910453796, + "learning_rate": 4.791435266098856e-06, + "loss": 0.5828, + "step": 8693 + }, + { + "epoch": 0.7936826729961658, + "grad_norm": 0.4912675619125366, + "learning_rate": 4.791387407029183e-06, + "loss": 0.538, + "step": 8694 + }, + { + "epoch": 0.7937739638488224, + "grad_norm": 0.4859634041786194, + "learning_rate": 4.791339542708138e-06, + "loss": 0.5404, + "step": 8695 + }, + { + "epoch": 0.793865254701479, + "grad_norm": 0.4833170175552368, + "learning_rate": 4.791291673135829e-06, + "loss": 0.5245, + "step": 8696 + }, + { + "epoch": 0.7939565455541355, + "grad_norm": 0.4957568943500519, + "learning_rate": 4.791243798312367e-06, + "loss": 0.5316, + "step": 8697 + }, + { + "epoch": 0.794047836406792, + "grad_norm": 0.477076917886734, + "learning_rate": 4.791195918237861e-06, + "loss": 0.5558, + "step": 8698 + }, + { + "epoch": 0.7941391272594486, + "grad_norm": 0.477383017539978, + "learning_rate": 4.791148032912421e-06, + "loss": 0.5549, + "step": 8699 + }, + { + "epoch": 0.7942304181121052, + "grad_norm": 0.48554378747940063, + "learning_rate": 4.791100142336156e-06, + "loss": 0.5208, + "step": 8700 + }, + { + "epoch": 0.7943217089647617, + "grad_norm": 0.4845151901245117, + "learning_rate": 4.791052246509176e-06, + "loss": 0.5411, + "step": 8701 + }, + { + "epoch": 0.7944129998174183, + "grad_norm": 0.4676029682159424, + "learning_rate": 4.791004345431592e-06, + "loss": 0.5336, + "step": 8702 + }, + { + "epoch": 0.7945042906700749, + "grad_norm": 0.47624292969703674, + "learning_rate": 4.790956439103512e-06, + "loss": 0.5575, + "step": 8703 + }, + { + "epoch": 0.7945955815227315, + "grad_norm": 0.49051040410995483, + "learning_rate": 4.790908527525046e-06, + "loss": 0.5263, + "step": 8704 + }, + { + "epoch": 0.7946868723753879, + "grad_norm": 0.4736633002758026, + "learning_rate": 4.790860610696306e-06, + "loss": 0.5935, + "step": 8705 + }, + { + "epoch": 0.7947781632280445, + "grad_norm": 0.4926193356513977, + "learning_rate": 4.7908126886174e-06, + "loss": 0.5422, + "step": 8706 + }, + { + "epoch": 0.7948694540807011, + "grad_norm": 0.44127580523490906, + "learning_rate": 4.790764761288438e-06, + "loss": 0.594, + "step": 8707 + }, + { + "epoch": 0.7949607449333577, + "grad_norm": 0.4965681731700897, + "learning_rate": 4.790716828709529e-06, + "loss": 0.537, + "step": 8708 + }, + { + "epoch": 0.7950520357860142, + "grad_norm": 0.4550781548023224, + "learning_rate": 4.790668890880785e-06, + "loss": 0.5793, + "step": 8709 + }, + { + "epoch": 0.7951433266386708, + "grad_norm": 0.48946207761764526, + "learning_rate": 4.790620947802315e-06, + "loss": 0.5443, + "step": 8710 + }, + { + "epoch": 0.7952346174913274, + "grad_norm": 0.48097091913223267, + "learning_rate": 4.790572999474227e-06, + "loss": 0.5625, + "step": 8711 + }, + { + "epoch": 0.795325908343984, + "grad_norm": 0.4477309286594391, + "learning_rate": 4.790525045896633e-06, + "loss": 0.5651, + "step": 8712 + }, + { + "epoch": 0.7954171991966404, + "grad_norm": 0.4886804223060608, + "learning_rate": 4.790477087069643e-06, + "loss": 0.5317, + "step": 8713 + }, + { + "epoch": 0.795508490049297, + "grad_norm": 0.46116524934768677, + "learning_rate": 4.790429122993365e-06, + "loss": 0.6065, + "step": 8714 + }, + { + "epoch": 0.7955997809019536, + "grad_norm": 0.4746030271053314, + "learning_rate": 4.790381153667911e-06, + "loss": 0.5301, + "step": 8715 + }, + { + "epoch": 0.7956910717546102, + "grad_norm": 0.4863830506801605, + "learning_rate": 4.7903331790933895e-06, + "loss": 0.5715, + "step": 8716 + }, + { + "epoch": 0.7957823626072668, + "grad_norm": 0.5151680111885071, + "learning_rate": 4.790285199269912e-06, + "loss": 0.5315, + "step": 8717 + }, + { + "epoch": 0.7958736534599233, + "grad_norm": 0.45970454812049866, + "learning_rate": 4.7902372141975865e-06, + "loss": 0.5518, + "step": 8718 + }, + { + "epoch": 0.7959649443125799, + "grad_norm": 0.493002325296402, + "learning_rate": 4.790189223876525e-06, + "loss": 0.5101, + "step": 8719 + }, + { + "epoch": 0.7960562351652365, + "grad_norm": 0.5010044574737549, + "learning_rate": 4.790141228306835e-06, + "loss": 0.5451, + "step": 8720 + }, + { + "epoch": 0.796147526017893, + "grad_norm": 0.4661238491535187, + "learning_rate": 4.790093227488629e-06, + "loss": 0.5403, + "step": 8721 + }, + { + "epoch": 0.7962388168705495, + "grad_norm": 0.4818657338619232, + "learning_rate": 4.790045221422015e-06, + "loss": 0.5387, + "step": 8722 + }, + { + "epoch": 0.7963301077232061, + "grad_norm": 0.4844629764556885, + "learning_rate": 4.7899972101071045e-06, + "loss": 0.544, + "step": 8723 + }, + { + "epoch": 0.7964213985758627, + "grad_norm": 0.5026785135269165, + "learning_rate": 4.789949193544008e-06, + "loss": 0.523, + "step": 8724 + }, + { + "epoch": 0.7965126894285193, + "grad_norm": 0.5013341307640076, + "learning_rate": 4.789901171732834e-06, + "loss": 0.5401, + "step": 8725 + }, + { + "epoch": 0.7966039802811758, + "grad_norm": 0.48804014921188354, + "learning_rate": 4.789853144673692e-06, + "loss": 0.565, + "step": 8726 + }, + { + "epoch": 0.7966952711338324, + "grad_norm": 0.4898534119129181, + "learning_rate": 4.7898051123666935e-06, + "loss": 0.5701, + "step": 8727 + }, + { + "epoch": 0.796786561986489, + "grad_norm": 0.4602054953575134, + "learning_rate": 4.789757074811949e-06, + "loss": 0.6157, + "step": 8728 + }, + { + "epoch": 0.7968778528391455, + "grad_norm": 0.4831562042236328, + "learning_rate": 4.789709032009567e-06, + "loss": 0.4979, + "step": 8729 + }, + { + "epoch": 0.796969143691802, + "grad_norm": 0.48825302720069885, + "learning_rate": 4.78966098395966e-06, + "loss": 0.4893, + "step": 8730 + }, + { + "epoch": 0.7970604345444586, + "grad_norm": 0.47536221146583557, + "learning_rate": 4.789612930662335e-06, + "loss": 0.5581, + "step": 8731 + }, + { + "epoch": 0.7971517253971152, + "grad_norm": 0.4703681766986847, + "learning_rate": 4.789564872117704e-06, + "loss": 0.5635, + "step": 8732 + }, + { + "epoch": 0.7972430162497718, + "grad_norm": 0.4638805687427521, + "learning_rate": 4.789516808325877e-06, + "loss": 0.546, + "step": 8733 + }, + { + "epoch": 0.7973343071024284, + "grad_norm": 0.5162973999977112, + "learning_rate": 4.7894687392869644e-06, + "loss": 0.5249, + "step": 8734 + }, + { + "epoch": 0.7974255979550849, + "grad_norm": 0.47882384061813354, + "learning_rate": 4.789420665001076e-06, + "loss": 0.5589, + "step": 8735 + }, + { + "epoch": 0.7975168888077415, + "grad_norm": 0.49286705255508423, + "learning_rate": 4.7893725854683205e-06, + "loss": 0.5671, + "step": 8736 + }, + { + "epoch": 0.797608179660398, + "grad_norm": 0.47743555903434753, + "learning_rate": 4.78932450068881e-06, + "loss": 0.5629, + "step": 8737 + }, + { + "epoch": 0.7976994705130546, + "grad_norm": 0.49691373109817505, + "learning_rate": 4.789276410662656e-06, + "loss": 0.5208, + "step": 8738 + }, + { + "epoch": 0.7977907613657111, + "grad_norm": 0.49323901534080505, + "learning_rate": 4.789228315389964e-06, + "loss": 0.5327, + "step": 8739 + }, + { + "epoch": 0.7978820522183677, + "grad_norm": 0.48563945293426514, + "learning_rate": 4.789180214870849e-06, + "loss": 0.5764, + "step": 8740 + }, + { + "epoch": 0.7979733430710243, + "grad_norm": 0.5156559348106384, + "learning_rate": 4.789132109105419e-06, + "loss": 0.5339, + "step": 8741 + }, + { + "epoch": 0.7980646339236809, + "grad_norm": 0.478508859872818, + "learning_rate": 4.789083998093785e-06, + "loss": 0.5687, + "step": 8742 + }, + { + "epoch": 0.7981559247763375, + "grad_norm": 0.4976117014884949, + "learning_rate": 4.7890358818360565e-06, + "loss": 0.5529, + "step": 8743 + }, + { + "epoch": 0.7982472156289939, + "grad_norm": 0.48779159784317017, + "learning_rate": 4.788987760332344e-06, + "loss": 0.5663, + "step": 8744 + }, + { + "epoch": 0.7983385064816505, + "grad_norm": 0.47049465775489807, + "learning_rate": 4.788939633582759e-06, + "loss": 0.5835, + "step": 8745 + }, + { + "epoch": 0.7984297973343071, + "grad_norm": 0.4743490219116211, + "learning_rate": 4.788891501587409e-06, + "loss": 0.5377, + "step": 8746 + }, + { + "epoch": 0.7985210881869637, + "grad_norm": 0.4700471758842468, + "learning_rate": 4.788843364346407e-06, + "loss": 0.5497, + "step": 8747 + }, + { + "epoch": 0.7986123790396202, + "grad_norm": 0.4820302724838257, + "learning_rate": 4.788795221859863e-06, + "loss": 0.5461, + "step": 8748 + }, + { + "epoch": 0.7987036698922768, + "grad_norm": 0.501882016658783, + "learning_rate": 4.788747074127885e-06, + "loss": 0.5209, + "step": 8749 + }, + { + "epoch": 0.7987949607449334, + "grad_norm": 0.4690801799297333, + "learning_rate": 4.7886989211505866e-06, + "loss": 0.5579, + "step": 8750 + }, + { + "epoch": 0.79888625159759, + "grad_norm": 0.48961034417152405, + "learning_rate": 4.788650762928076e-06, + "loss": 0.5678, + "step": 8751 + }, + { + "epoch": 0.7989775424502464, + "grad_norm": 0.4858415424823761, + "learning_rate": 4.788602599460465e-06, + "loss": 0.5374, + "step": 8752 + }, + { + "epoch": 0.799068833302903, + "grad_norm": 0.48672541975975037, + "learning_rate": 4.788554430747862e-06, + "loss": 0.5365, + "step": 8753 + }, + { + "epoch": 0.7991601241555596, + "grad_norm": 0.48638269305229187, + "learning_rate": 4.788506256790378e-06, + "loss": 0.5437, + "step": 8754 + }, + { + "epoch": 0.7992514150082162, + "grad_norm": 0.48185470700263977, + "learning_rate": 4.788458077588125e-06, + "loss": 0.5582, + "step": 8755 + }, + { + "epoch": 0.7993427058608727, + "grad_norm": 0.493673175573349, + "learning_rate": 4.788409893141212e-06, + "loss": 0.5588, + "step": 8756 + }, + { + "epoch": 0.7994339967135293, + "grad_norm": 0.48914891481399536, + "learning_rate": 4.788361703449751e-06, + "loss": 0.5293, + "step": 8757 + }, + { + "epoch": 0.7995252875661859, + "grad_norm": 0.5142388343811035, + "learning_rate": 4.78831350851385e-06, + "loss": 0.5525, + "step": 8758 + }, + { + "epoch": 0.7996165784188425, + "grad_norm": 0.4726998209953308, + "learning_rate": 4.78826530833362e-06, + "loss": 0.546, + "step": 8759 + }, + { + "epoch": 0.799707869271499, + "grad_norm": 0.48526841402053833, + "learning_rate": 4.7882171029091736e-06, + "loss": 0.5428, + "step": 8760 + }, + { + "epoch": 0.7997991601241555, + "grad_norm": 0.46835336089134216, + "learning_rate": 4.78816889224062e-06, + "loss": 0.5476, + "step": 8761 + }, + { + "epoch": 0.7998904509768121, + "grad_norm": 0.47115558385849, + "learning_rate": 4.7881206763280686e-06, + "loss": 0.5458, + "step": 8762 + }, + { + "epoch": 0.7999817418294687, + "grad_norm": 0.46596962213516235, + "learning_rate": 4.7880724551716306e-06, + "loss": 0.5663, + "step": 8763 + }, + { + "epoch": 0.8000730326821253, + "grad_norm": 0.43819621205329895, + "learning_rate": 4.788024228771417e-06, + "loss": 0.594, + "step": 8764 + }, + { + "epoch": 0.8001643235347818, + "grad_norm": 0.4651520848274231, + "learning_rate": 4.787975997127538e-06, + "loss": 0.5211, + "step": 8765 + }, + { + "epoch": 0.8002556143874384, + "grad_norm": 0.4829968512058258, + "learning_rate": 4.7879277602401045e-06, + "loss": 0.5779, + "step": 8766 + }, + { + "epoch": 0.800346905240095, + "grad_norm": 0.497847318649292, + "learning_rate": 4.7878795181092266e-06, + "loss": 0.5484, + "step": 8767 + }, + { + "epoch": 0.8004381960927515, + "grad_norm": 0.46317723393440247, + "learning_rate": 4.787831270735015e-06, + "loss": 0.5784, + "step": 8768 + }, + { + "epoch": 0.800529486945408, + "grad_norm": 0.5024718642234802, + "learning_rate": 4.78778301811758e-06, + "loss": 0.5036, + "step": 8769 + }, + { + "epoch": 0.8006207777980646, + "grad_norm": 0.4337300658226013, + "learning_rate": 4.787734760257034e-06, + "loss": 0.6043, + "step": 8770 + }, + { + "epoch": 0.8007120686507212, + "grad_norm": 0.47604459524154663, + "learning_rate": 4.7876864971534844e-06, + "loss": 0.5254, + "step": 8771 + }, + { + "epoch": 0.8008033595033778, + "grad_norm": 0.4904971420764923, + "learning_rate": 4.7876382288070436e-06, + "loss": 0.5625, + "step": 8772 + }, + { + "epoch": 0.8008946503560344, + "grad_norm": 0.4616314470767975, + "learning_rate": 4.787589955217822e-06, + "loss": 0.5426, + "step": 8773 + }, + { + "epoch": 0.8009859412086909, + "grad_norm": 0.5049353241920471, + "learning_rate": 4.7875416763859305e-06, + "loss": 0.5504, + "step": 8774 + }, + { + "epoch": 0.8010772320613474, + "grad_norm": 0.5044887661933899, + "learning_rate": 4.7874933923114795e-06, + "loss": 0.552, + "step": 8775 + }, + { + "epoch": 0.801168522914004, + "grad_norm": 0.5071374773979187, + "learning_rate": 4.78744510299458e-06, + "loss": 0.5341, + "step": 8776 + }, + { + "epoch": 0.8012598137666606, + "grad_norm": 0.45287859439849854, + "learning_rate": 4.787396808435342e-06, + "loss": 0.5516, + "step": 8777 + }, + { + "epoch": 0.8013511046193171, + "grad_norm": 0.4794839322566986, + "learning_rate": 4.787348508633877e-06, + "loss": 0.582, + "step": 8778 + }, + { + "epoch": 0.8014423954719737, + "grad_norm": 0.47161760926246643, + "learning_rate": 4.787300203590295e-06, + "loss": 0.5644, + "step": 8779 + }, + { + "epoch": 0.8015336863246303, + "grad_norm": 0.4475870132446289, + "learning_rate": 4.787251893304707e-06, + "loss": 0.6082, + "step": 8780 + }, + { + "epoch": 0.8016249771772869, + "grad_norm": 0.4770050346851349, + "learning_rate": 4.787203577777224e-06, + "loss": 0.556, + "step": 8781 + }, + { + "epoch": 0.8017162680299434, + "grad_norm": 0.47419556975364685, + "learning_rate": 4.787155257007956e-06, + "loss": 0.5241, + "step": 8782 + }, + { + "epoch": 0.8018075588825999, + "grad_norm": 0.4730914831161499, + "learning_rate": 4.787106930997014e-06, + "loss": 0.5624, + "step": 8783 + }, + { + "epoch": 0.8018988497352565, + "grad_norm": 0.47467854619026184, + "learning_rate": 4.7870585997445095e-06, + "loss": 0.5464, + "step": 8784 + }, + { + "epoch": 0.8019901405879131, + "grad_norm": 0.5105102062225342, + "learning_rate": 4.787010263250553e-06, + "loss": 0.5365, + "step": 8785 + }, + { + "epoch": 0.8020814314405696, + "grad_norm": 0.49065908789634705, + "learning_rate": 4.786961921515254e-06, + "loss": 0.5546, + "step": 8786 + }, + { + "epoch": 0.8021727222932262, + "grad_norm": 0.47243833541870117, + "learning_rate": 4.786913574538724e-06, + "loss": 0.571, + "step": 8787 + }, + { + "epoch": 0.8022640131458828, + "grad_norm": 0.48008182644844055, + "learning_rate": 4.786865222321075e-06, + "loss": 0.5489, + "step": 8788 + }, + { + "epoch": 0.8023553039985394, + "grad_norm": 0.46440234780311584, + "learning_rate": 4.786816864862417e-06, + "loss": 0.5826, + "step": 8789 + }, + { + "epoch": 0.802446594851196, + "grad_norm": 0.4555591642856598, + "learning_rate": 4.78676850216286e-06, + "loss": 0.568, + "step": 8790 + }, + { + "epoch": 0.8025378857038524, + "grad_norm": 0.5131699442863464, + "learning_rate": 4.786720134222515e-06, + "loss": 0.5244, + "step": 8791 + }, + { + "epoch": 0.802629176556509, + "grad_norm": 0.48745957016944885, + "learning_rate": 4.786671761041493e-06, + "loss": 0.5148, + "step": 8792 + }, + { + "epoch": 0.8027204674091656, + "grad_norm": 0.5013182759284973, + "learning_rate": 4.786623382619907e-06, + "loss": 0.5384, + "step": 8793 + }, + { + "epoch": 0.8028117582618222, + "grad_norm": 0.5354188084602356, + "learning_rate": 4.786574998957866e-06, + "loss": 0.55, + "step": 8794 + }, + { + "epoch": 0.8029030491144787, + "grad_norm": 0.4605894386768341, + "learning_rate": 4.786526610055479e-06, + "loss": 0.5558, + "step": 8795 + }, + { + "epoch": 0.8029943399671353, + "grad_norm": 0.5228502154350281, + "learning_rate": 4.786478215912861e-06, + "loss": 0.5261, + "step": 8796 + }, + { + "epoch": 0.8030856308197919, + "grad_norm": 0.46429088711738586, + "learning_rate": 4.78642981653012e-06, + "loss": 0.5456, + "step": 8797 + }, + { + "epoch": 0.8031769216724485, + "grad_norm": 0.4533984959125519, + "learning_rate": 4.786381411907367e-06, + "loss": 0.5719, + "step": 8798 + }, + { + "epoch": 0.8032682125251049, + "grad_norm": 0.4752206802368164, + "learning_rate": 4.786333002044714e-06, + "loss": 0.538, + "step": 8799 + }, + { + "epoch": 0.8033595033777615, + "grad_norm": 0.48935237526893616, + "learning_rate": 4.7862845869422725e-06, + "loss": 0.5209, + "step": 8800 + }, + { + "epoch": 0.8034507942304181, + "grad_norm": 0.5043189525604248, + "learning_rate": 4.786236166600152e-06, + "loss": 0.5405, + "step": 8801 + }, + { + "epoch": 0.8035420850830747, + "grad_norm": 0.49060508608818054, + "learning_rate": 4.786187741018464e-06, + "loss": 0.5337, + "step": 8802 + }, + { + "epoch": 0.8036333759357313, + "grad_norm": 0.5171728134155273, + "learning_rate": 4.786139310197319e-06, + "loss": 0.5109, + "step": 8803 + }, + { + "epoch": 0.8037246667883878, + "grad_norm": 0.48701047897338867, + "learning_rate": 4.786090874136829e-06, + "loss": 0.5604, + "step": 8804 + }, + { + "epoch": 0.8038159576410444, + "grad_norm": 0.522001326084137, + "learning_rate": 4.786042432837104e-06, + "loss": 0.5074, + "step": 8805 + }, + { + "epoch": 0.8039072484937009, + "grad_norm": 0.4730182886123657, + "learning_rate": 4.785993986298256e-06, + "loss": 0.5465, + "step": 8806 + }, + { + "epoch": 0.8039985393463575, + "grad_norm": 0.48554039001464844, + "learning_rate": 4.7859455345203954e-06, + "loss": 0.5907, + "step": 8807 + }, + { + "epoch": 0.804089830199014, + "grad_norm": 0.47861212491989136, + "learning_rate": 4.785897077503634e-06, + "loss": 0.5825, + "step": 8808 + }, + { + "epoch": 0.8041811210516706, + "grad_norm": 0.48820021748542786, + "learning_rate": 4.785848615248081e-06, + "loss": 0.5365, + "step": 8809 + }, + { + "epoch": 0.8042724119043272, + "grad_norm": 0.48591557145118713, + "learning_rate": 4.78580014775385e-06, + "loss": 0.574, + "step": 8810 + }, + { + "epoch": 0.8043637027569838, + "grad_norm": 0.47180214524269104, + "learning_rate": 4.785751675021049e-06, + "loss": 0.5691, + "step": 8811 + }, + { + "epoch": 0.8044549936096403, + "grad_norm": 0.4563051760196686, + "learning_rate": 4.785703197049792e-06, + "loss": 0.5708, + "step": 8812 + }, + { + "epoch": 0.8045462844622969, + "grad_norm": 0.5043448209762573, + "learning_rate": 4.785654713840189e-06, + "loss": 0.5808, + "step": 8813 + }, + { + "epoch": 0.8046375753149534, + "grad_norm": 0.4804025888442993, + "learning_rate": 4.785606225392351e-06, + "loss": 0.5161, + "step": 8814 + }, + { + "epoch": 0.80472886616761, + "grad_norm": 0.487589955329895, + "learning_rate": 4.7855577317063895e-06, + "loss": 0.5431, + "step": 8815 + }, + { + "epoch": 0.8048201570202665, + "grad_norm": 0.49233442544937134, + "learning_rate": 4.785509232782415e-06, + "loss": 0.5607, + "step": 8816 + }, + { + "epoch": 0.8049114478729231, + "grad_norm": 0.5052483081817627, + "learning_rate": 4.785460728620538e-06, + "loss": 0.5378, + "step": 8817 + }, + { + "epoch": 0.8050027387255797, + "grad_norm": 0.5281329154968262, + "learning_rate": 4.785412219220872e-06, + "loss": 0.5233, + "step": 8818 + }, + { + "epoch": 0.8050940295782363, + "grad_norm": 0.4943094849586487, + "learning_rate": 4.7853637045835265e-06, + "loss": 0.5757, + "step": 8819 + }, + { + "epoch": 0.8051853204308929, + "grad_norm": 0.48311424255371094, + "learning_rate": 4.785315184708613e-06, + "loss": 0.5737, + "step": 8820 + }, + { + "epoch": 0.8052766112835494, + "grad_norm": 0.47393888235092163, + "learning_rate": 4.785266659596243e-06, + "loss": 0.5661, + "step": 8821 + }, + { + "epoch": 0.8053679021362059, + "grad_norm": 0.4934878945350647, + "learning_rate": 4.785218129246527e-06, + "loss": 0.5373, + "step": 8822 + }, + { + "epoch": 0.8054591929888625, + "grad_norm": 0.4911004304885864, + "learning_rate": 4.785169593659577e-06, + "loss": 0.5452, + "step": 8823 + }, + { + "epoch": 0.8055504838415191, + "grad_norm": 0.483474463224411, + "learning_rate": 4.785121052835503e-06, + "loss": 0.5622, + "step": 8824 + }, + { + "epoch": 0.8056417746941756, + "grad_norm": 0.46669283509254456, + "learning_rate": 4.785072506774418e-06, + "loss": 0.5623, + "step": 8825 + }, + { + "epoch": 0.8057330655468322, + "grad_norm": 0.45655328035354614, + "learning_rate": 4.785023955476432e-06, + "loss": 0.5422, + "step": 8826 + }, + { + "epoch": 0.8058243563994888, + "grad_norm": 0.47357362508773804, + "learning_rate": 4.7849753989416565e-06, + "loss": 0.5502, + "step": 8827 + }, + { + "epoch": 0.8059156472521454, + "grad_norm": 0.4855296015739441, + "learning_rate": 4.784926837170203e-06, + "loss": 0.5102, + "step": 8828 + }, + { + "epoch": 0.806006938104802, + "grad_norm": 0.4646793603897095, + "learning_rate": 4.784878270162183e-06, + "loss": 0.5322, + "step": 8829 + }, + { + "epoch": 0.8060982289574584, + "grad_norm": 0.4668344259262085, + "learning_rate": 4.784829697917707e-06, + "loss": 0.5561, + "step": 8830 + }, + { + "epoch": 0.806189519810115, + "grad_norm": 0.5104469060897827, + "learning_rate": 4.784781120436887e-06, + "loss": 0.5491, + "step": 8831 + }, + { + "epoch": 0.8062808106627716, + "grad_norm": 0.46558085083961487, + "learning_rate": 4.784732537719835e-06, + "loss": 0.5139, + "step": 8832 + }, + { + "epoch": 0.8063721015154282, + "grad_norm": 0.48647549748420715, + "learning_rate": 4.78468394976666e-06, + "loss": 0.5308, + "step": 8833 + }, + { + "epoch": 0.8064633923680847, + "grad_norm": 0.48294487595558167, + "learning_rate": 4.784635356577476e-06, + "loss": 0.5426, + "step": 8834 + }, + { + "epoch": 0.8065546832207413, + "grad_norm": 0.48416945338249207, + "learning_rate": 4.7845867581523934e-06, + "loss": 0.5746, + "step": 8835 + }, + { + "epoch": 0.8066459740733979, + "grad_norm": 0.4712791442871094, + "learning_rate": 4.784538154491523e-06, + "loss": 0.5767, + "step": 8836 + }, + { + "epoch": 0.8067372649260545, + "grad_norm": 0.4625169634819031, + "learning_rate": 4.784489545594976e-06, + "loss": 0.5804, + "step": 8837 + }, + { + "epoch": 0.8068285557787109, + "grad_norm": 0.474427193403244, + "learning_rate": 4.784440931462865e-06, + "loss": 0.5382, + "step": 8838 + }, + { + "epoch": 0.8069198466313675, + "grad_norm": 0.47869399189949036, + "learning_rate": 4.784392312095301e-06, + "loss": 0.5528, + "step": 8839 + }, + { + "epoch": 0.8070111374840241, + "grad_norm": 0.45016518235206604, + "learning_rate": 4.7843436874923945e-06, + "loss": 0.5492, + "step": 8840 + }, + { + "epoch": 0.8071024283366807, + "grad_norm": 0.49825236201286316, + "learning_rate": 4.784295057654258e-06, + "loss": 0.5411, + "step": 8841 + }, + { + "epoch": 0.8071937191893372, + "grad_norm": 0.48637840151786804, + "learning_rate": 4.784246422581003e-06, + "loss": 0.5444, + "step": 8842 + }, + { + "epoch": 0.8072850100419938, + "grad_norm": 0.48492854833602905, + "learning_rate": 4.78419778227274e-06, + "loss": 0.5332, + "step": 8843 + }, + { + "epoch": 0.8073763008946504, + "grad_norm": 0.4735432267189026, + "learning_rate": 4.784149136729582e-06, + "loss": 0.5738, + "step": 8844 + }, + { + "epoch": 0.8074675917473069, + "grad_norm": 0.4448240101337433, + "learning_rate": 4.784100485951639e-06, + "loss": 0.5699, + "step": 8845 + }, + { + "epoch": 0.8075588825999634, + "grad_norm": 0.4823808968067169, + "learning_rate": 4.7840518299390226e-06, + "loss": 0.5423, + "step": 8846 + }, + { + "epoch": 0.80765017345262, + "grad_norm": 0.46589982509613037, + "learning_rate": 4.784003168691845e-06, + "loss": 0.5984, + "step": 8847 + }, + { + "epoch": 0.8077414643052766, + "grad_norm": 0.47140300273895264, + "learning_rate": 4.783954502210217e-06, + "loss": 0.5839, + "step": 8848 + }, + { + "epoch": 0.8078327551579332, + "grad_norm": 0.49924495816230774, + "learning_rate": 4.783905830494252e-06, + "loss": 0.5043, + "step": 8849 + }, + { + "epoch": 0.8079240460105898, + "grad_norm": 0.539383053779602, + "learning_rate": 4.783857153544059e-06, + "loss": 0.4926, + "step": 8850 + }, + { + "epoch": 0.8080153368632463, + "grad_norm": 0.46934542059898376, + "learning_rate": 4.783808471359751e-06, + "loss": 0.5643, + "step": 8851 + }, + { + "epoch": 0.8081066277159029, + "grad_norm": 0.4602328836917877, + "learning_rate": 4.783759783941439e-06, + "loss": 0.5777, + "step": 8852 + }, + { + "epoch": 0.8081979185685594, + "grad_norm": 0.46854591369628906, + "learning_rate": 4.783711091289236e-06, + "loss": 0.5578, + "step": 8853 + }, + { + "epoch": 0.808289209421216, + "grad_norm": 0.4629303514957428, + "learning_rate": 4.783662393403251e-06, + "loss": 0.5613, + "step": 8854 + }, + { + "epoch": 0.8083805002738725, + "grad_norm": 0.47049275040626526, + "learning_rate": 4.783613690283597e-06, + "loss": 0.5824, + "step": 8855 + }, + { + "epoch": 0.8084717911265291, + "grad_norm": 0.4825748801231384, + "learning_rate": 4.783564981930386e-06, + "loss": 0.5321, + "step": 8856 + }, + { + "epoch": 0.8085630819791857, + "grad_norm": 0.5027081966400146, + "learning_rate": 4.78351626834373e-06, + "loss": 0.511, + "step": 8857 + }, + { + "epoch": 0.8086543728318423, + "grad_norm": 0.5163506269454956, + "learning_rate": 4.78346754952374e-06, + "loss": 0.5701, + "step": 8858 + }, + { + "epoch": 0.8087456636844989, + "grad_norm": 0.49249303340911865, + "learning_rate": 4.7834188254705275e-06, + "loss": 0.5391, + "step": 8859 + }, + { + "epoch": 0.8088369545371554, + "grad_norm": 0.5034765601158142, + "learning_rate": 4.783370096184203e-06, + "loss": 0.5309, + "step": 8860 + }, + { + "epoch": 0.8089282453898119, + "grad_norm": 0.5060395002365112, + "learning_rate": 4.7833213616648805e-06, + "loss": 0.5431, + "step": 8861 + }, + { + "epoch": 0.8090195362424685, + "grad_norm": 0.4926706552505493, + "learning_rate": 4.7832726219126705e-06, + "loss": 0.5211, + "step": 8862 + }, + { + "epoch": 0.809110827095125, + "grad_norm": 0.4563317596912384, + "learning_rate": 4.783223876927685e-06, + "loss": 0.5553, + "step": 8863 + }, + { + "epoch": 0.8092021179477816, + "grad_norm": 0.4895704388618469, + "learning_rate": 4.783175126710035e-06, + "loss": 0.5181, + "step": 8864 + }, + { + "epoch": 0.8092934088004382, + "grad_norm": 0.49085289239883423, + "learning_rate": 4.783126371259833e-06, + "loss": 0.5535, + "step": 8865 + }, + { + "epoch": 0.8093846996530948, + "grad_norm": 0.4682203233242035, + "learning_rate": 4.78307761057719e-06, + "loss": 0.5634, + "step": 8866 + }, + { + "epoch": 0.8094759905057514, + "grad_norm": 0.4488212466239929, + "learning_rate": 4.7830288446622195e-06, + "loss": 0.5458, + "step": 8867 + }, + { + "epoch": 0.8095672813584079, + "grad_norm": 0.508390486240387, + "learning_rate": 4.782980073515031e-06, + "loss": 0.5146, + "step": 8868 + }, + { + "epoch": 0.8096585722110644, + "grad_norm": 0.510280191898346, + "learning_rate": 4.782931297135738e-06, + "loss": 0.5287, + "step": 8869 + }, + { + "epoch": 0.809749863063721, + "grad_norm": 0.4947509765625, + "learning_rate": 4.78288251552445e-06, + "loss": 0.541, + "step": 8870 + }, + { + "epoch": 0.8098411539163776, + "grad_norm": 0.4675896465778351, + "learning_rate": 4.782833728681282e-06, + "loss": 0.5606, + "step": 8871 + }, + { + "epoch": 0.8099324447690341, + "grad_norm": 0.4741961658000946, + "learning_rate": 4.782784936606344e-06, + "loss": 0.5758, + "step": 8872 + }, + { + "epoch": 0.8100237356216907, + "grad_norm": 0.458575040102005, + "learning_rate": 4.782736139299747e-06, + "loss": 0.5624, + "step": 8873 + }, + { + "epoch": 0.8101150264743473, + "grad_norm": 0.5006387829780579, + "learning_rate": 4.782687336761605e-06, + "loss": 0.5425, + "step": 8874 + }, + { + "epoch": 0.8102063173270039, + "grad_norm": 0.4703451693058014, + "learning_rate": 4.782638528992029e-06, + "loss": 0.5365, + "step": 8875 + }, + { + "epoch": 0.8102976081796603, + "grad_norm": 0.49742940068244934, + "learning_rate": 4.782589715991129e-06, + "loss": 0.5246, + "step": 8876 + }, + { + "epoch": 0.8103888990323169, + "grad_norm": 0.5251481533050537, + "learning_rate": 4.782540897759019e-06, + "loss": 0.5375, + "step": 8877 + }, + { + "epoch": 0.8104801898849735, + "grad_norm": 0.5044464468955994, + "learning_rate": 4.78249207429581e-06, + "loss": 0.5403, + "step": 8878 + }, + { + "epoch": 0.8105714807376301, + "grad_norm": 0.4920535087585449, + "learning_rate": 4.782443245601615e-06, + "loss": 0.5629, + "step": 8879 + }, + { + "epoch": 0.8106627715902867, + "grad_norm": 0.5069395303726196, + "learning_rate": 4.782394411676545e-06, + "loss": 0.5411, + "step": 8880 + }, + { + "epoch": 0.8107540624429432, + "grad_norm": 0.4769919514656067, + "learning_rate": 4.7823455725207115e-06, + "loss": 0.5435, + "step": 8881 + }, + { + "epoch": 0.8108453532955998, + "grad_norm": 0.509497344493866, + "learning_rate": 4.782296728134227e-06, + "loss": 0.5209, + "step": 8882 + }, + { + "epoch": 0.8109366441482564, + "grad_norm": 0.47363710403442383, + "learning_rate": 4.782247878517203e-06, + "loss": 0.5808, + "step": 8883 + }, + { + "epoch": 0.8110279350009129, + "grad_norm": 0.4595867991447449, + "learning_rate": 4.7821990236697524e-06, + "loss": 0.5743, + "step": 8884 + }, + { + "epoch": 0.8111192258535694, + "grad_norm": 0.4986952245235443, + "learning_rate": 4.7821501635919865e-06, + "loss": 0.5879, + "step": 8885 + }, + { + "epoch": 0.811210516706226, + "grad_norm": 0.4915205240249634, + "learning_rate": 4.7821012982840175e-06, + "loss": 0.5621, + "step": 8886 + }, + { + "epoch": 0.8113018075588826, + "grad_norm": 0.49767205119132996, + "learning_rate": 4.782052427745957e-06, + "loss": 0.5443, + "step": 8887 + }, + { + "epoch": 0.8113930984115392, + "grad_norm": 0.4659661650657654, + "learning_rate": 4.782003551977917e-06, + "loss": 0.5486, + "step": 8888 + }, + { + "epoch": 0.8114843892641957, + "grad_norm": 0.5185057520866394, + "learning_rate": 4.7819546709800106e-06, + "loss": 0.5221, + "step": 8889 + }, + { + "epoch": 0.8115756801168523, + "grad_norm": 0.4522666931152344, + "learning_rate": 4.781905784752348e-06, + "loss": 0.5992, + "step": 8890 + }, + { + "epoch": 0.8116669709695089, + "grad_norm": 0.48304176330566406, + "learning_rate": 4.781856893295043e-06, + "loss": 0.5648, + "step": 8891 + }, + { + "epoch": 0.8117582618221654, + "grad_norm": 0.5019969940185547, + "learning_rate": 4.781807996608206e-06, + "loss": 0.5646, + "step": 8892 + }, + { + "epoch": 0.811849552674822, + "grad_norm": 0.49892595410346985, + "learning_rate": 4.78175909469195e-06, + "loss": 0.5274, + "step": 8893 + }, + { + "epoch": 0.8119408435274785, + "grad_norm": 0.4799099564552307, + "learning_rate": 4.781710187546388e-06, + "loss": 0.535, + "step": 8894 + }, + { + "epoch": 0.8120321343801351, + "grad_norm": 0.4415416419506073, + "learning_rate": 4.781661275171631e-06, + "loss": 0.5501, + "step": 8895 + }, + { + "epoch": 0.8121234252327917, + "grad_norm": 0.5097004771232605, + "learning_rate": 4.781612357567791e-06, + "loss": 0.5266, + "step": 8896 + }, + { + "epoch": 0.8122147160854483, + "grad_norm": 0.48044002056121826, + "learning_rate": 4.781563434734979e-06, + "loss": 0.5578, + "step": 8897 + }, + { + "epoch": 0.8123060069381048, + "grad_norm": 0.5000449419021606, + "learning_rate": 4.78151450667331e-06, + "loss": 0.5485, + "step": 8898 + }, + { + "epoch": 0.8123972977907614, + "grad_norm": 0.49963057041168213, + "learning_rate": 4.781465573382894e-06, + "loss": 0.5392, + "step": 8899 + }, + { + "epoch": 0.8124885886434179, + "grad_norm": 0.4679911732673645, + "learning_rate": 4.781416634863844e-06, + "loss": 0.5659, + "step": 8900 + }, + { + "epoch": 0.8125798794960745, + "grad_norm": 0.48238101601600647, + "learning_rate": 4.781367691116271e-06, + "loss": 0.5705, + "step": 8901 + }, + { + "epoch": 0.812671170348731, + "grad_norm": 0.5033935308456421, + "learning_rate": 4.781318742140289e-06, + "loss": 0.5427, + "step": 8902 + }, + { + "epoch": 0.8127624612013876, + "grad_norm": 0.4933055341243744, + "learning_rate": 4.781269787936008e-06, + "loss": 0.5383, + "step": 8903 + }, + { + "epoch": 0.8128537520540442, + "grad_norm": 0.4695501923561096, + "learning_rate": 4.781220828503542e-06, + "loss": 0.5733, + "step": 8904 + }, + { + "epoch": 0.8129450429067008, + "grad_norm": 0.46459004282951355, + "learning_rate": 4.781171863843003e-06, + "loss": 0.573, + "step": 8905 + }, + { + "epoch": 0.8130363337593574, + "grad_norm": 0.47313642501831055, + "learning_rate": 4.7811228939545026e-06, + "loss": 0.565, + "step": 8906 + }, + { + "epoch": 0.8131276246120138, + "grad_norm": 0.47199881076812744, + "learning_rate": 4.781073918838153e-06, + "loss": 0.585, + "step": 8907 + }, + { + "epoch": 0.8132189154646704, + "grad_norm": 0.5018392205238342, + "learning_rate": 4.781024938494067e-06, + "loss": 0.5468, + "step": 8908 + }, + { + "epoch": 0.813310206317327, + "grad_norm": 0.5035330057144165, + "learning_rate": 4.780975952922357e-06, + "loss": 0.5596, + "step": 8909 + }, + { + "epoch": 0.8134014971699836, + "grad_norm": 0.5222269892692566, + "learning_rate": 4.780926962123134e-06, + "loss": 0.5172, + "step": 8910 + }, + { + "epoch": 0.8134927880226401, + "grad_norm": 0.4787147641181946, + "learning_rate": 4.780877966096511e-06, + "loss": 0.5675, + "step": 8911 + }, + { + "epoch": 0.8135840788752967, + "grad_norm": 0.5097895264625549, + "learning_rate": 4.780828964842601e-06, + "loss": 0.5305, + "step": 8912 + }, + { + "epoch": 0.8136753697279533, + "grad_norm": 0.48606446385383606, + "learning_rate": 4.780779958361516e-06, + "loss": 0.5078, + "step": 8913 + }, + { + "epoch": 0.8137666605806099, + "grad_norm": 0.4446316659450531, + "learning_rate": 4.7807309466533665e-06, + "loss": 0.5483, + "step": 8914 + }, + { + "epoch": 0.8138579514332663, + "grad_norm": 0.5161411166191101, + "learning_rate": 4.780681929718267e-06, + "loss": 0.5754, + "step": 8915 + }, + { + "epoch": 0.8139492422859229, + "grad_norm": 0.48257434368133545, + "learning_rate": 4.780632907556329e-06, + "loss": 0.5445, + "step": 8916 + }, + { + "epoch": 0.8140405331385795, + "grad_norm": 0.4977150559425354, + "learning_rate": 4.7805838801676655e-06, + "loss": 0.5246, + "step": 8917 + }, + { + "epoch": 0.8141318239912361, + "grad_norm": 0.4515613913536072, + "learning_rate": 4.780534847552388e-06, + "loss": 0.6021, + "step": 8918 + }, + { + "epoch": 0.8142231148438926, + "grad_norm": 0.4601685702800751, + "learning_rate": 4.78048580971061e-06, + "loss": 0.5644, + "step": 8919 + }, + { + "epoch": 0.8143144056965492, + "grad_norm": 0.471812903881073, + "learning_rate": 4.780436766642442e-06, + "loss": 0.5678, + "step": 8920 + }, + { + "epoch": 0.8144056965492058, + "grad_norm": 0.4814437925815582, + "learning_rate": 4.780387718347998e-06, + "loss": 0.5663, + "step": 8921 + }, + { + "epoch": 0.8144969874018624, + "grad_norm": 0.479836106300354, + "learning_rate": 4.78033866482739e-06, + "loss": 0.5341, + "step": 8922 + }, + { + "epoch": 0.8145882782545188, + "grad_norm": 0.4907008111476898, + "learning_rate": 4.78028960608073e-06, + "loss": 0.5463, + "step": 8923 + }, + { + "epoch": 0.8146795691071754, + "grad_norm": 0.48776209354400635, + "learning_rate": 4.780240542108131e-06, + "loss": 0.5537, + "step": 8924 + }, + { + "epoch": 0.814770859959832, + "grad_norm": 0.5017251968383789, + "learning_rate": 4.780191472909705e-06, + "loss": 0.5858, + "step": 8925 + }, + { + "epoch": 0.8148621508124886, + "grad_norm": 0.479200541973114, + "learning_rate": 4.780142398485564e-06, + "loss": 0.5531, + "step": 8926 + }, + { + "epoch": 0.8149534416651452, + "grad_norm": 0.50043123960495, + "learning_rate": 4.780093318835822e-06, + "loss": 0.5427, + "step": 8927 + }, + { + "epoch": 0.8150447325178017, + "grad_norm": 0.4750403165817261, + "learning_rate": 4.780044233960591e-06, + "loss": 0.5955, + "step": 8928 + }, + { + "epoch": 0.8151360233704583, + "grad_norm": 0.4883994460105896, + "learning_rate": 4.779995143859983e-06, + "loss": 0.5312, + "step": 8929 + }, + { + "epoch": 0.8152273142231149, + "grad_norm": 0.4951953887939453, + "learning_rate": 4.77994604853411e-06, + "loss": 0.523, + "step": 8930 + }, + { + "epoch": 0.8153186050757714, + "grad_norm": 0.4979424774646759, + "learning_rate": 4.779896947983086e-06, + "loss": 0.5373, + "step": 8931 + }, + { + "epoch": 0.8154098959284279, + "grad_norm": 0.4715363085269928, + "learning_rate": 4.779847842207022e-06, + "loss": 0.5671, + "step": 8932 + }, + { + "epoch": 0.8155011867810845, + "grad_norm": 0.4446210563182831, + "learning_rate": 4.779798731206031e-06, + "loss": 0.567, + "step": 8933 + }, + { + "epoch": 0.8155924776337411, + "grad_norm": 0.48817044496536255, + "learning_rate": 4.779749614980225e-06, + "loss": 0.5549, + "step": 8934 + }, + { + "epoch": 0.8156837684863977, + "grad_norm": 0.4702845513820648, + "learning_rate": 4.7797004935297185e-06, + "loss": 0.5492, + "step": 8935 + }, + { + "epoch": 0.8157750593390543, + "grad_norm": 0.4795511066913605, + "learning_rate": 4.779651366854623e-06, + "loss": 0.5766, + "step": 8936 + }, + { + "epoch": 0.8158663501917108, + "grad_norm": 0.47671812772750854, + "learning_rate": 4.779602234955051e-06, + "loss": 0.5473, + "step": 8937 + }, + { + "epoch": 0.8159576410443673, + "grad_norm": 0.47278693318367004, + "learning_rate": 4.779553097831114e-06, + "loss": 0.5705, + "step": 8938 + }, + { + "epoch": 0.8160489318970239, + "grad_norm": 0.473489910364151, + "learning_rate": 4.779503955482927e-06, + "loss": 0.5514, + "step": 8939 + }, + { + "epoch": 0.8161402227496805, + "grad_norm": 0.47156283259391785, + "learning_rate": 4.779454807910601e-06, + "loss": 0.5658, + "step": 8940 + }, + { + "epoch": 0.816231513602337, + "grad_norm": 0.4791010320186615, + "learning_rate": 4.779405655114247e-06, + "loss": 0.5827, + "step": 8941 + }, + { + "epoch": 0.8163228044549936, + "grad_norm": 0.45678165555000305, + "learning_rate": 4.779356497093982e-06, + "loss": 0.5782, + "step": 8942 + }, + { + "epoch": 0.8164140953076502, + "grad_norm": 0.49326759576797485, + "learning_rate": 4.779307333849915e-06, + "loss": 0.5256, + "step": 8943 + }, + { + "epoch": 0.8165053861603068, + "grad_norm": 0.4773661196231842, + "learning_rate": 4.779258165382162e-06, + "loss": 0.5599, + "step": 8944 + }, + { + "epoch": 0.8165966770129633, + "grad_norm": 0.48133090138435364, + "learning_rate": 4.779208991690832e-06, + "loss": 0.5438, + "step": 8945 + }, + { + "epoch": 0.8166879678656198, + "grad_norm": 0.46084731817245483, + "learning_rate": 4.779159812776038e-06, + "loss": 0.5458, + "step": 8946 + }, + { + "epoch": 0.8167792587182764, + "grad_norm": 0.4458521604537964, + "learning_rate": 4.779110628637895e-06, + "loss": 0.5926, + "step": 8947 + }, + { + "epoch": 0.816870549570933, + "grad_norm": 0.44972044229507446, + "learning_rate": 4.779061439276516e-06, + "loss": 0.5806, + "step": 8948 + }, + { + "epoch": 0.8169618404235895, + "grad_norm": 0.48054033517837524, + "learning_rate": 4.779012244692011e-06, + "loss": 0.542, + "step": 8949 + }, + { + "epoch": 0.8170531312762461, + "grad_norm": 0.438417911529541, + "learning_rate": 4.778963044884495e-06, + "loss": 0.559, + "step": 8950 + }, + { + "epoch": 0.8171444221289027, + "grad_norm": 0.48497191071510315, + "learning_rate": 4.7789138398540794e-06, + "loss": 0.5364, + "step": 8951 + }, + { + "epoch": 0.8172357129815593, + "grad_norm": 0.4763091504573822, + "learning_rate": 4.778864629600878e-06, + "loss": 0.5524, + "step": 8952 + }, + { + "epoch": 0.8173270038342159, + "grad_norm": 0.48742708563804626, + "learning_rate": 4.778815414125002e-06, + "loss": 0.5697, + "step": 8953 + }, + { + "epoch": 0.8174182946868723, + "grad_norm": 0.4520246088504791, + "learning_rate": 4.778766193426567e-06, + "loss": 0.5736, + "step": 8954 + }, + { + "epoch": 0.8175095855395289, + "grad_norm": 0.4648817479610443, + "learning_rate": 4.778716967505683e-06, + "loss": 0.5716, + "step": 8955 + }, + { + "epoch": 0.8176008763921855, + "grad_norm": 0.4725281298160553, + "learning_rate": 4.7786677363624636e-06, + "loss": 0.5112, + "step": 8956 + }, + { + "epoch": 0.8176921672448421, + "grad_norm": 0.44871610403060913, + "learning_rate": 4.778618499997023e-06, + "loss": 0.5655, + "step": 8957 + }, + { + "epoch": 0.8177834580974986, + "grad_norm": 0.46720653772354126, + "learning_rate": 4.778569258409472e-06, + "loss": 0.5556, + "step": 8958 + }, + { + "epoch": 0.8178747489501552, + "grad_norm": 0.4896829128265381, + "learning_rate": 4.778520011599925e-06, + "loss": 0.51, + "step": 8959 + }, + { + "epoch": 0.8179660398028118, + "grad_norm": 0.4979831874370575, + "learning_rate": 4.778470759568494e-06, + "loss": 0.5204, + "step": 8960 + }, + { + "epoch": 0.8180573306554684, + "grad_norm": 0.4569660425186157, + "learning_rate": 4.778421502315293e-06, + "loss": 0.5843, + "step": 8961 + }, + { + "epoch": 0.8181486215081248, + "grad_norm": 0.48806560039520264, + "learning_rate": 4.778372239840433e-06, + "loss": 0.5475, + "step": 8962 + }, + { + "epoch": 0.8182399123607814, + "grad_norm": 0.48286712169647217, + "learning_rate": 4.778322972144029e-06, + "loss": 0.5515, + "step": 8963 + }, + { + "epoch": 0.818331203213438, + "grad_norm": 0.4949800968170166, + "learning_rate": 4.7782736992261926e-06, + "loss": 0.5161, + "step": 8964 + }, + { + "epoch": 0.8184224940660946, + "grad_norm": 0.4838383197784424, + "learning_rate": 4.778224421087037e-06, + "loss": 0.5597, + "step": 8965 + }, + { + "epoch": 0.8185137849187512, + "grad_norm": 0.45870447158813477, + "learning_rate": 4.778175137726674e-06, + "loss": 0.5718, + "step": 8966 + }, + { + "epoch": 0.8186050757714077, + "grad_norm": 0.4956843852996826, + "learning_rate": 4.778125849145219e-06, + "loss": 0.5051, + "step": 8967 + }, + { + "epoch": 0.8186963666240643, + "grad_norm": 0.4810965061187744, + "learning_rate": 4.778076555342783e-06, + "loss": 0.5856, + "step": 8968 + }, + { + "epoch": 0.8187876574767209, + "grad_norm": 0.5026837587356567, + "learning_rate": 4.778027256319479e-06, + "loss": 0.5283, + "step": 8969 + }, + { + "epoch": 0.8188789483293774, + "grad_norm": 0.4911259114742279, + "learning_rate": 4.777977952075422e-06, + "loss": 0.585, + "step": 8970 + }, + { + "epoch": 0.8189702391820339, + "grad_norm": 0.5110474228858948, + "learning_rate": 4.7779286426107226e-06, + "loss": 0.499, + "step": 8971 + }, + { + "epoch": 0.8190615300346905, + "grad_norm": 0.459441214799881, + "learning_rate": 4.777879327925495e-06, + "loss": 0.5856, + "step": 8972 + }, + { + "epoch": 0.8191528208873471, + "grad_norm": 0.4797198474407196, + "learning_rate": 4.777830008019852e-06, + "loss": 0.5645, + "step": 8973 + }, + { + "epoch": 0.8192441117400037, + "grad_norm": 0.4756666123867035, + "learning_rate": 4.777780682893906e-06, + "loss": 0.5809, + "step": 8974 + }, + { + "epoch": 0.8193354025926602, + "grad_norm": 0.4832634925842285, + "learning_rate": 4.777731352547772e-06, + "loss": 0.5188, + "step": 8975 + }, + { + "epoch": 0.8194266934453168, + "grad_norm": 0.4971795380115509, + "learning_rate": 4.777682016981561e-06, + "loss": 0.5426, + "step": 8976 + }, + { + "epoch": 0.8195179842979733, + "grad_norm": 0.47104987502098083, + "learning_rate": 4.777632676195387e-06, + "loss": 0.595, + "step": 8977 + }, + { + "epoch": 0.8196092751506299, + "grad_norm": 0.49000853300094604, + "learning_rate": 4.777583330189363e-06, + "loss": 0.5347, + "step": 8978 + }, + { + "epoch": 0.8197005660032864, + "grad_norm": 0.47864681482315063, + "learning_rate": 4.777533978963602e-06, + "loss": 0.5307, + "step": 8979 + }, + { + "epoch": 0.819791856855943, + "grad_norm": 0.4829005002975464, + "learning_rate": 4.777484622518217e-06, + "loss": 0.5517, + "step": 8980 + }, + { + "epoch": 0.8198831477085996, + "grad_norm": 0.46264785528182983, + "learning_rate": 4.77743526085332e-06, + "loss": 0.5234, + "step": 8981 + }, + { + "epoch": 0.8199744385612562, + "grad_norm": 0.5061763525009155, + "learning_rate": 4.7773858939690275e-06, + "loss": 0.5503, + "step": 8982 + }, + { + "epoch": 0.8200657294139128, + "grad_norm": 0.4463125765323639, + "learning_rate": 4.777336521865449e-06, + "loss": 0.5613, + "step": 8983 + }, + { + "epoch": 0.8201570202665693, + "grad_norm": 0.49013257026672363, + "learning_rate": 4.777287144542699e-06, + "loss": 0.5548, + "step": 8984 + }, + { + "epoch": 0.8202483111192258, + "grad_norm": 0.4726695120334625, + "learning_rate": 4.777237762000891e-06, + "loss": 0.5672, + "step": 8985 + }, + { + "epoch": 0.8203396019718824, + "grad_norm": 0.47782397270202637, + "learning_rate": 4.777188374240139e-06, + "loss": 0.5264, + "step": 8986 + }, + { + "epoch": 0.820430892824539, + "grad_norm": 0.4880129098892212, + "learning_rate": 4.777138981260554e-06, + "loss": 0.5491, + "step": 8987 + }, + { + "epoch": 0.8205221836771955, + "grad_norm": 0.4716200828552246, + "learning_rate": 4.7770895830622505e-06, + "loss": 0.5553, + "step": 8988 + }, + { + "epoch": 0.8206134745298521, + "grad_norm": 0.4964984059333801, + "learning_rate": 4.777040179645341e-06, + "loss": 0.5409, + "step": 8989 + }, + { + "epoch": 0.8207047653825087, + "grad_norm": 0.4697037935256958, + "learning_rate": 4.776990771009941e-06, + "loss": 0.54, + "step": 8990 + }, + { + "epoch": 0.8207960562351653, + "grad_norm": 0.4735761284828186, + "learning_rate": 4.77694135715616e-06, + "loss": 0.566, + "step": 8991 + }, + { + "epoch": 0.8208873470878219, + "grad_norm": 0.4884600341320038, + "learning_rate": 4.776891938084115e-06, + "loss": 0.5727, + "step": 8992 + }, + { + "epoch": 0.8209786379404783, + "grad_norm": 0.4620833694934845, + "learning_rate": 4.776842513793916e-06, + "loss": 0.5391, + "step": 8993 + }, + { + "epoch": 0.8210699287931349, + "grad_norm": 0.47684821486473083, + "learning_rate": 4.776793084285678e-06, + "loss": 0.5497, + "step": 8994 + }, + { + "epoch": 0.8211612196457915, + "grad_norm": 0.48200562596321106, + "learning_rate": 4.776743649559514e-06, + "loss": 0.5673, + "step": 8995 + }, + { + "epoch": 0.821252510498448, + "grad_norm": 0.5181845426559448, + "learning_rate": 4.776694209615538e-06, + "loss": 0.5316, + "step": 8996 + }, + { + "epoch": 0.8213438013511046, + "grad_norm": 0.4908680021762848, + "learning_rate": 4.776644764453863e-06, + "loss": 0.6059, + "step": 8997 + }, + { + "epoch": 0.8214350922037612, + "grad_norm": 0.5279023051261902, + "learning_rate": 4.7765953140746015e-06, + "loss": 0.5468, + "step": 8998 + }, + { + "epoch": 0.8215263830564178, + "grad_norm": 0.46319010853767395, + "learning_rate": 4.776545858477866e-06, + "loss": 0.588, + "step": 8999 + }, + { + "epoch": 0.8216176739090744, + "grad_norm": 0.5126416087150574, + "learning_rate": 4.776496397663773e-06, + "loss": 0.525, + "step": 9000 + }, + { + "epoch": 0.8217089647617308, + "grad_norm": 0.4724971354007721, + "learning_rate": 4.7764469316324336e-06, + "loss": 0.616, + "step": 9001 + }, + { + "epoch": 0.8218002556143874, + "grad_norm": 0.4886290431022644, + "learning_rate": 4.776397460383961e-06, + "loss": 0.5285, + "step": 9002 + }, + { + "epoch": 0.821891546467044, + "grad_norm": 0.45681649446487427, + "learning_rate": 4.776347983918469e-06, + "loss": 0.5337, + "step": 9003 + }, + { + "epoch": 0.8219828373197006, + "grad_norm": 0.46706682443618774, + "learning_rate": 4.776298502236072e-06, + "loss": 0.5463, + "step": 9004 + }, + { + "epoch": 0.8220741281723571, + "grad_norm": 0.4717048108577728, + "learning_rate": 4.776249015336882e-06, + "loss": 0.5728, + "step": 9005 + }, + { + "epoch": 0.8221654190250137, + "grad_norm": 0.44807127118110657, + "learning_rate": 4.776199523221012e-06, + "loss": 0.5973, + "step": 9006 + }, + { + "epoch": 0.8222567098776703, + "grad_norm": 0.5069170594215393, + "learning_rate": 4.776150025888578e-06, + "loss": 0.5326, + "step": 9007 + }, + { + "epoch": 0.8223480007303268, + "grad_norm": 0.47609493136405945, + "learning_rate": 4.776100523339691e-06, + "loss": 0.5479, + "step": 9008 + }, + { + "epoch": 0.8224392915829833, + "grad_norm": 0.4825969338417053, + "learning_rate": 4.776051015574465e-06, + "loss": 0.5265, + "step": 9009 + }, + { + "epoch": 0.8225305824356399, + "grad_norm": 0.5047359466552734, + "learning_rate": 4.776001502593015e-06, + "loss": 0.5503, + "step": 9010 + }, + { + "epoch": 0.8226218732882965, + "grad_norm": 0.4877680242061615, + "learning_rate": 4.775951984395452e-06, + "loss": 0.5371, + "step": 9011 + }, + { + "epoch": 0.8227131641409531, + "grad_norm": 0.4787943363189697, + "learning_rate": 4.77590246098189e-06, + "loss": 0.5718, + "step": 9012 + }, + { + "epoch": 0.8228044549936097, + "grad_norm": 0.48334306478500366, + "learning_rate": 4.775852932352445e-06, + "loss": 0.5824, + "step": 9013 + }, + { + "epoch": 0.8228957458462662, + "grad_norm": 0.5074377059936523, + "learning_rate": 4.775803398507227e-06, + "loss": 0.5161, + "step": 9014 + }, + { + "epoch": 0.8229870366989228, + "grad_norm": 0.48869964480400085, + "learning_rate": 4.775753859446353e-06, + "loss": 0.5677, + "step": 9015 + }, + { + "epoch": 0.8230783275515793, + "grad_norm": 0.48651689291000366, + "learning_rate": 4.775704315169933e-06, + "loss": 0.5421, + "step": 9016 + }, + { + "epoch": 0.8231696184042359, + "grad_norm": 0.4956086277961731, + "learning_rate": 4.775654765678083e-06, + "loss": 0.5229, + "step": 9017 + }, + { + "epoch": 0.8232609092568924, + "grad_norm": 0.42812204360961914, + "learning_rate": 4.775605210970916e-06, + "loss": 0.5815, + "step": 9018 + }, + { + "epoch": 0.823352200109549, + "grad_norm": 0.4729367792606354, + "learning_rate": 4.775555651048545e-06, + "loss": 0.5354, + "step": 9019 + }, + { + "epoch": 0.8234434909622056, + "grad_norm": 0.45744338631629944, + "learning_rate": 4.775506085911085e-06, + "loss": 0.5818, + "step": 9020 + }, + { + "epoch": 0.8235347818148622, + "grad_norm": 0.46749451756477356, + "learning_rate": 4.775456515558647e-06, + "loss": 0.5521, + "step": 9021 + }, + { + "epoch": 0.8236260726675187, + "grad_norm": 0.4794185757637024, + "learning_rate": 4.775406939991347e-06, + "loss": 0.5561, + "step": 9022 + }, + { + "epoch": 0.8237173635201753, + "grad_norm": 0.49083176255226135, + "learning_rate": 4.775357359209298e-06, + "loss": 0.5335, + "step": 9023 + }, + { + "epoch": 0.8238086543728318, + "grad_norm": 0.48945602774620056, + "learning_rate": 4.775307773212613e-06, + "loss": 0.563, + "step": 9024 + }, + { + "epoch": 0.8238999452254884, + "grad_norm": 0.46477562189102173, + "learning_rate": 4.775258182001406e-06, + "loss": 0.5512, + "step": 9025 + }, + { + "epoch": 0.823991236078145, + "grad_norm": 0.48714515566825867, + "learning_rate": 4.7752085855757916e-06, + "loss": 0.5358, + "step": 9026 + }, + { + "epoch": 0.8240825269308015, + "grad_norm": 0.4837961196899414, + "learning_rate": 4.775158983935881e-06, + "loss": 0.5027, + "step": 9027 + }, + { + "epoch": 0.8241738177834581, + "grad_norm": 0.5082708597183228, + "learning_rate": 4.77510937708179e-06, + "loss": 0.5595, + "step": 9028 + }, + { + "epoch": 0.8242651086361147, + "grad_norm": 0.5223773121833801, + "learning_rate": 4.775059765013633e-06, + "loss": 0.5478, + "step": 9029 + }, + { + "epoch": 0.8243563994887713, + "grad_norm": 0.46763524413108826, + "learning_rate": 4.775010147731521e-06, + "loss": 0.5492, + "step": 9030 + }, + { + "epoch": 0.8244476903414278, + "grad_norm": 0.4751053750514984, + "learning_rate": 4.77496052523557e-06, + "loss": 0.5303, + "step": 9031 + }, + { + "epoch": 0.8245389811940843, + "grad_norm": 0.4719563126564026, + "learning_rate": 4.774910897525893e-06, + "loss": 0.5833, + "step": 9032 + }, + { + "epoch": 0.8246302720467409, + "grad_norm": 0.46774956583976746, + "learning_rate": 4.774861264602603e-06, + "loss": 0.5604, + "step": 9033 + }, + { + "epoch": 0.8247215628993975, + "grad_norm": 0.49060800671577454, + "learning_rate": 4.774811626465814e-06, + "loss": 0.5482, + "step": 9034 + }, + { + "epoch": 0.824812853752054, + "grad_norm": 0.49835577607154846, + "learning_rate": 4.7747619831156414e-06, + "loss": 0.5171, + "step": 9035 + }, + { + "epoch": 0.8249041446047106, + "grad_norm": 0.5141478180885315, + "learning_rate": 4.774712334552197e-06, + "loss": 0.5111, + "step": 9036 + }, + { + "epoch": 0.8249954354573672, + "grad_norm": 0.4993494749069214, + "learning_rate": 4.7746626807755945e-06, + "loss": 0.5437, + "step": 9037 + }, + { + "epoch": 0.8250867263100238, + "grad_norm": 0.49330419301986694, + "learning_rate": 4.77461302178595e-06, + "loss": 0.5663, + "step": 9038 + }, + { + "epoch": 0.8251780171626802, + "grad_norm": 0.4903471767902374, + "learning_rate": 4.774563357583375e-06, + "loss": 0.5512, + "step": 9039 + }, + { + "epoch": 0.8252693080153368, + "grad_norm": 0.5011706352233887, + "learning_rate": 4.774513688167985e-06, + "loss": 0.5611, + "step": 9040 + }, + { + "epoch": 0.8253605988679934, + "grad_norm": 0.4875344932079315, + "learning_rate": 4.774464013539892e-06, + "loss": 0.5459, + "step": 9041 + }, + { + "epoch": 0.82545188972065, + "grad_norm": 0.527966320514679, + "learning_rate": 4.77441433369921e-06, + "loss": 0.5423, + "step": 9042 + }, + { + "epoch": 0.8255431805733066, + "grad_norm": 0.4930715262889862, + "learning_rate": 4.774364648646054e-06, + "loss": 0.5644, + "step": 9043 + }, + { + "epoch": 0.8256344714259631, + "grad_norm": 0.4676394462585449, + "learning_rate": 4.7743149583805385e-06, + "loss": 0.5485, + "step": 9044 + }, + { + "epoch": 0.8257257622786197, + "grad_norm": 0.4767712354660034, + "learning_rate": 4.774265262902776e-06, + "loss": 0.5575, + "step": 9045 + }, + { + "epoch": 0.8258170531312763, + "grad_norm": 0.4784432351589203, + "learning_rate": 4.774215562212881e-06, + "loss": 0.5628, + "step": 9046 + }, + { + "epoch": 0.8259083439839328, + "grad_norm": 0.4759490489959717, + "learning_rate": 4.774165856310966e-06, + "loss": 0.5778, + "step": 9047 + }, + { + "epoch": 0.8259996348365893, + "grad_norm": 0.4773081839084625, + "learning_rate": 4.774116145197147e-06, + "loss": 0.5713, + "step": 9048 + }, + { + "epoch": 0.8260909256892459, + "grad_norm": 0.4794805347919464, + "learning_rate": 4.774066428871538e-06, + "loss": 0.555, + "step": 9049 + }, + { + "epoch": 0.8261822165419025, + "grad_norm": 0.48723095655441284, + "learning_rate": 4.77401670733425e-06, + "loss": 0.54, + "step": 9050 + }, + { + "epoch": 0.8262735073945591, + "grad_norm": 0.48435962200164795, + "learning_rate": 4.7739669805854e-06, + "loss": 0.5414, + "step": 9051 + }, + { + "epoch": 0.8263647982472156, + "grad_norm": 0.4832473695278168, + "learning_rate": 4.773917248625101e-06, + "loss": 0.5433, + "step": 9052 + }, + { + "epoch": 0.8264560890998722, + "grad_norm": 0.45835137367248535, + "learning_rate": 4.7738675114534664e-06, + "loss": 0.5754, + "step": 9053 + }, + { + "epoch": 0.8265473799525288, + "grad_norm": 0.4986194670200348, + "learning_rate": 4.7738177690706115e-06, + "loss": 0.5177, + "step": 9054 + }, + { + "epoch": 0.8266386708051853, + "grad_norm": 0.4653477072715759, + "learning_rate": 4.773768021476648e-06, + "loss": 0.5376, + "step": 9055 + }, + { + "epoch": 0.8267299616578418, + "grad_norm": 0.48717808723449707, + "learning_rate": 4.773718268671692e-06, + "loss": 0.5761, + "step": 9056 + }, + { + "epoch": 0.8268212525104984, + "grad_norm": 0.5014310479164124, + "learning_rate": 4.773668510655858e-06, + "loss": 0.5127, + "step": 9057 + }, + { + "epoch": 0.826912543363155, + "grad_norm": 0.47920289635658264, + "learning_rate": 4.7736187474292575e-06, + "loss": 0.5483, + "step": 9058 + }, + { + "epoch": 0.8270038342158116, + "grad_norm": 0.48517775535583496, + "learning_rate": 4.773568978992006e-06, + "loss": 0.5863, + "step": 9059 + }, + { + "epoch": 0.8270951250684682, + "grad_norm": 0.4781337082386017, + "learning_rate": 4.773519205344218e-06, + "loss": 0.5679, + "step": 9060 + }, + { + "epoch": 0.8271864159211247, + "grad_norm": 0.4978511929512024, + "learning_rate": 4.773469426486007e-06, + "loss": 0.5738, + "step": 9061 + }, + { + "epoch": 0.8272777067737813, + "grad_norm": 0.48433464765548706, + "learning_rate": 4.773419642417487e-06, + "loss": 0.5394, + "step": 9062 + }, + { + "epoch": 0.8273689976264378, + "grad_norm": 0.48417678475379944, + "learning_rate": 4.7733698531387725e-06, + "loss": 0.5657, + "step": 9063 + }, + { + "epoch": 0.8274602884790944, + "grad_norm": 0.47877007722854614, + "learning_rate": 4.773320058649977e-06, + "loss": 0.5566, + "step": 9064 + }, + { + "epoch": 0.8275515793317509, + "grad_norm": 0.48140501976013184, + "learning_rate": 4.773270258951215e-06, + "loss": 0.4941, + "step": 9065 + }, + { + "epoch": 0.8276428701844075, + "grad_norm": 0.46387335658073425, + "learning_rate": 4.773220454042601e-06, + "loss": 0.5592, + "step": 9066 + }, + { + "epoch": 0.8277341610370641, + "grad_norm": 0.49310287833213806, + "learning_rate": 4.773170643924248e-06, + "loss": 0.541, + "step": 9067 + }, + { + "epoch": 0.8278254518897207, + "grad_norm": 0.5052719116210938, + "learning_rate": 4.773120828596271e-06, + "loss": 0.5376, + "step": 9068 + }, + { + "epoch": 0.8279167427423773, + "grad_norm": 0.4653868079185486, + "learning_rate": 4.773071008058786e-06, + "loss": 0.5389, + "step": 9069 + }, + { + "epoch": 0.8280080335950338, + "grad_norm": 0.4715058505535126, + "learning_rate": 4.773021182311904e-06, + "loss": 0.5883, + "step": 9070 + }, + { + "epoch": 0.8280993244476903, + "grad_norm": 0.5080568194389343, + "learning_rate": 4.772971351355739e-06, + "loss": 0.5155, + "step": 9071 + }, + { + "epoch": 0.8281906153003469, + "grad_norm": 0.47454798221588135, + "learning_rate": 4.772921515190409e-06, + "loss": 0.613, + "step": 9072 + }, + { + "epoch": 0.8282819061530035, + "grad_norm": 0.460852712392807, + "learning_rate": 4.7728716738160245e-06, + "loss": 0.5616, + "step": 9073 + }, + { + "epoch": 0.82837319700566, + "grad_norm": 0.4778805375099182, + "learning_rate": 4.772821827232701e-06, + "loss": 0.5076, + "step": 9074 + }, + { + "epoch": 0.8284644878583166, + "grad_norm": 0.46719077229499817, + "learning_rate": 4.772771975440554e-06, + "loss": 0.5668, + "step": 9075 + }, + { + "epoch": 0.8285557787109732, + "grad_norm": 0.499035507440567, + "learning_rate": 4.772722118439695e-06, + "loss": 0.539, + "step": 9076 + }, + { + "epoch": 0.8286470695636298, + "grad_norm": 0.45701125264167786, + "learning_rate": 4.772672256230241e-06, + "loss": 0.5728, + "step": 9077 + }, + { + "epoch": 0.8287383604162862, + "grad_norm": 0.4698267877101898, + "learning_rate": 4.772622388812305e-06, + "loss": 0.5335, + "step": 9078 + }, + { + "epoch": 0.8288296512689428, + "grad_norm": 0.45723092555999756, + "learning_rate": 4.7725725161860005e-06, + "loss": 0.5492, + "step": 9079 + }, + { + "epoch": 0.8289209421215994, + "grad_norm": 0.519443929195404, + "learning_rate": 4.772522638351443e-06, + "loss": 0.5043, + "step": 9080 + }, + { + "epoch": 0.829012232974256, + "grad_norm": 0.47956380248069763, + "learning_rate": 4.772472755308747e-06, + "loss": 0.5488, + "step": 9081 + }, + { + "epoch": 0.8291035238269125, + "grad_norm": 0.478524774312973, + "learning_rate": 4.772422867058026e-06, + "loss": 0.5403, + "step": 9082 + }, + { + "epoch": 0.8291948146795691, + "grad_norm": 0.4906661808490753, + "learning_rate": 4.772372973599395e-06, + "loss": 0.5086, + "step": 9083 + }, + { + "epoch": 0.8292861055322257, + "grad_norm": 0.5265271067619324, + "learning_rate": 4.7723230749329675e-06, + "loss": 0.5301, + "step": 9084 + }, + { + "epoch": 0.8293773963848823, + "grad_norm": 0.48065605759620667, + "learning_rate": 4.772273171058859e-06, + "loss": 0.5058, + "step": 9085 + }, + { + "epoch": 0.8294686872375387, + "grad_norm": 0.5120744109153748, + "learning_rate": 4.772223261977182e-06, + "loss": 0.5242, + "step": 9086 + }, + { + "epoch": 0.8295599780901953, + "grad_norm": 0.49707651138305664, + "learning_rate": 4.772173347688054e-06, + "loss": 0.5118, + "step": 9087 + }, + { + "epoch": 0.8296512689428519, + "grad_norm": 0.5095075368881226, + "learning_rate": 4.7721234281915864e-06, + "loss": 0.5384, + "step": 9088 + }, + { + "epoch": 0.8297425597955085, + "grad_norm": 0.46576061844825745, + "learning_rate": 4.772073503487894e-06, + "loss": 0.5709, + "step": 9089 + }, + { + "epoch": 0.8298338506481651, + "grad_norm": 0.46764442324638367, + "learning_rate": 4.772023573577093e-06, + "loss": 0.543, + "step": 9090 + }, + { + "epoch": 0.8299251415008216, + "grad_norm": 0.47569191455841064, + "learning_rate": 4.7719736384592965e-06, + "loss": 0.5347, + "step": 9091 + }, + { + "epoch": 0.8300164323534782, + "grad_norm": 0.510219156742096, + "learning_rate": 4.771923698134619e-06, + "loss": 0.5217, + "step": 9092 + }, + { + "epoch": 0.8301077232061348, + "grad_norm": 0.4726157486438751, + "learning_rate": 4.771873752603175e-06, + "loss": 0.5431, + "step": 9093 + }, + { + "epoch": 0.8301990140587913, + "grad_norm": 0.5008355379104614, + "learning_rate": 4.77182380186508e-06, + "loss": 0.5156, + "step": 9094 + }, + { + "epoch": 0.8302903049114478, + "grad_norm": 0.4626731872558594, + "learning_rate": 4.771773845920447e-06, + "loss": 0.5633, + "step": 9095 + }, + { + "epoch": 0.8303815957641044, + "grad_norm": 0.47410139441490173, + "learning_rate": 4.771723884769391e-06, + "loss": 0.563, + "step": 9096 + }, + { + "epoch": 0.830472886616761, + "grad_norm": 0.46929931640625, + "learning_rate": 4.7716739184120265e-06, + "loss": 0.5485, + "step": 9097 + }, + { + "epoch": 0.8305641774694176, + "grad_norm": 0.5257697701454163, + "learning_rate": 4.771623946848468e-06, + "loss": 0.5171, + "step": 9098 + }, + { + "epoch": 0.8306554683220742, + "grad_norm": 0.4447985887527466, + "learning_rate": 4.7715739700788295e-06, + "loss": 0.5801, + "step": 9099 + }, + { + "epoch": 0.8307467591747307, + "grad_norm": 0.4570702314376831, + "learning_rate": 4.771523988103228e-06, + "loss": 0.553, + "step": 9100 + }, + { + "epoch": 0.8308380500273873, + "grad_norm": 0.46888142824172974, + "learning_rate": 4.771474000921776e-06, + "loss": 0.5746, + "step": 9101 + }, + { + "epoch": 0.8309293408800438, + "grad_norm": 0.5140640735626221, + "learning_rate": 4.771424008534587e-06, + "loss": 0.5543, + "step": 9102 + }, + { + "epoch": 0.8310206317327004, + "grad_norm": 0.46798011660575867, + "learning_rate": 4.771374010941776e-06, + "loss": 0.5446, + "step": 9103 + }, + { + "epoch": 0.8311119225853569, + "grad_norm": 0.49978527426719666, + "learning_rate": 4.7713240081434605e-06, + "loss": 0.6193, + "step": 9104 + }, + { + "epoch": 0.8312032134380135, + "grad_norm": 0.49367794394493103, + "learning_rate": 4.771274000139752e-06, + "loss": 0.5271, + "step": 9105 + }, + { + "epoch": 0.8312945042906701, + "grad_norm": 0.47458118200302124, + "learning_rate": 4.771223986930766e-06, + "loss": 0.5557, + "step": 9106 + }, + { + "epoch": 0.8313857951433267, + "grad_norm": 0.451216459274292, + "learning_rate": 4.771173968516618e-06, + "loss": 0.5581, + "step": 9107 + }, + { + "epoch": 0.8314770859959832, + "grad_norm": 0.49540793895721436, + "learning_rate": 4.771123944897421e-06, + "loss": 0.5266, + "step": 9108 + }, + { + "epoch": 0.8315683768486397, + "grad_norm": 0.47343909740448, + "learning_rate": 4.771073916073291e-06, + "loss": 0.5253, + "step": 9109 + }, + { + "epoch": 0.8316596677012963, + "grad_norm": 0.4972669184207916, + "learning_rate": 4.771023882044342e-06, + "loss": 0.5292, + "step": 9110 + }, + { + "epoch": 0.8317509585539529, + "grad_norm": 0.48130613565444946, + "learning_rate": 4.770973842810689e-06, + "loss": 0.5392, + "step": 9111 + }, + { + "epoch": 0.8318422494066094, + "grad_norm": 0.4519701302051544, + "learning_rate": 4.770923798372447e-06, + "loss": 0.554, + "step": 9112 + }, + { + "epoch": 0.831933540259266, + "grad_norm": 0.5012970566749573, + "learning_rate": 4.770873748729729e-06, + "loss": 0.5134, + "step": 9113 + }, + { + "epoch": 0.8320248311119226, + "grad_norm": 0.48344674706459045, + "learning_rate": 4.770823693882651e-06, + "loss": 0.5737, + "step": 9114 + }, + { + "epoch": 0.8321161219645792, + "grad_norm": 0.46325159072875977, + "learning_rate": 4.770773633831328e-06, + "loss": 0.5589, + "step": 9115 + }, + { + "epoch": 0.8322074128172358, + "grad_norm": 0.4770098030567169, + "learning_rate": 4.7707235685758755e-06, + "loss": 0.5713, + "step": 9116 + }, + { + "epoch": 0.8322987036698922, + "grad_norm": 0.47861725091934204, + "learning_rate": 4.770673498116406e-06, + "loss": 0.5522, + "step": 9117 + }, + { + "epoch": 0.8323899945225488, + "grad_norm": 0.4514285624027252, + "learning_rate": 4.770623422453035e-06, + "loss": 0.589, + "step": 9118 + }, + { + "epoch": 0.8324812853752054, + "grad_norm": 0.5027281045913696, + "learning_rate": 4.770573341585878e-06, + "loss": 0.534, + "step": 9119 + }, + { + "epoch": 0.832572576227862, + "grad_norm": 0.48334670066833496, + "learning_rate": 4.770523255515049e-06, + "loss": 0.545, + "step": 9120 + }, + { + "epoch": 0.8326638670805185, + "grad_norm": 0.5078654885292053, + "learning_rate": 4.770473164240664e-06, + "loss": 0.5421, + "step": 9121 + }, + { + "epoch": 0.8327551579331751, + "grad_norm": 0.4587283432483673, + "learning_rate": 4.770423067762836e-06, + "loss": 0.6238, + "step": 9122 + }, + { + "epoch": 0.8328464487858317, + "grad_norm": 0.45048484206199646, + "learning_rate": 4.770372966081682e-06, + "loss": 0.5397, + "step": 9123 + }, + { + "epoch": 0.8329377396384883, + "grad_norm": 0.4474965035915375, + "learning_rate": 4.770322859197315e-06, + "loss": 0.5418, + "step": 9124 + }, + { + "epoch": 0.8330290304911447, + "grad_norm": 0.4589014947414398, + "learning_rate": 4.770272747109849e-06, + "loss": 0.574, + "step": 9125 + }, + { + "epoch": 0.8331203213438013, + "grad_norm": 0.48522287607192993, + "learning_rate": 4.770222629819402e-06, + "loss": 0.5639, + "step": 9126 + }, + { + "epoch": 0.8332116121964579, + "grad_norm": 0.47381576895713806, + "learning_rate": 4.770172507326087e-06, + "loss": 0.534, + "step": 9127 + }, + { + "epoch": 0.8333029030491145, + "grad_norm": 0.5193504691123962, + "learning_rate": 4.770122379630019e-06, + "loss": 0.5376, + "step": 9128 + }, + { + "epoch": 0.833394193901771, + "grad_norm": 0.5023844242095947, + "learning_rate": 4.770072246731312e-06, + "loss": 0.5782, + "step": 9129 + }, + { + "epoch": 0.8334854847544276, + "grad_norm": 0.4907376170158386, + "learning_rate": 4.770022108630083e-06, + "loss": 0.53, + "step": 9130 + }, + { + "epoch": 0.8335767756070842, + "grad_norm": 0.48597654700279236, + "learning_rate": 4.769971965326445e-06, + "loss": 0.5195, + "step": 9131 + }, + { + "epoch": 0.8336680664597408, + "grad_norm": 0.46713024377822876, + "learning_rate": 4.769921816820513e-06, + "loss": 0.5291, + "step": 9132 + }, + { + "epoch": 0.8337593573123973, + "grad_norm": 0.4661901295185089, + "learning_rate": 4.769871663112404e-06, + "loss": 0.569, + "step": 9133 + }, + { + "epoch": 0.8338506481650538, + "grad_norm": 0.4624076783657074, + "learning_rate": 4.76982150420223e-06, + "loss": 0.5697, + "step": 9134 + }, + { + "epoch": 0.8339419390177104, + "grad_norm": 0.49936532974243164, + "learning_rate": 4.769771340090108e-06, + "loss": 0.5196, + "step": 9135 + }, + { + "epoch": 0.834033229870367, + "grad_norm": 0.45433303713798523, + "learning_rate": 4.769721170776153e-06, + "loss": 0.5751, + "step": 9136 + }, + { + "epoch": 0.8341245207230236, + "grad_norm": 0.4649598002433777, + "learning_rate": 4.769670996260478e-06, + "loss": 0.5572, + "step": 9137 + }, + { + "epoch": 0.8342158115756801, + "grad_norm": 0.4579378664493561, + "learning_rate": 4.7696208165432015e-06, + "loss": 0.576, + "step": 9138 + }, + { + "epoch": 0.8343071024283367, + "grad_norm": 0.47623562812805176, + "learning_rate": 4.769570631624434e-06, + "loss": 0.5968, + "step": 9139 + }, + { + "epoch": 0.8343983932809932, + "grad_norm": 0.4896826148033142, + "learning_rate": 4.769520441504294e-06, + "loss": 0.5472, + "step": 9140 + }, + { + "epoch": 0.8344896841336498, + "grad_norm": 0.4991288185119629, + "learning_rate": 4.769470246182895e-06, + "loss": 0.5299, + "step": 9141 + }, + { + "epoch": 0.8345809749863063, + "grad_norm": 0.4974534511566162, + "learning_rate": 4.769420045660353e-06, + "loss": 0.5374, + "step": 9142 + }, + { + "epoch": 0.8346722658389629, + "grad_norm": 0.48469263315200806, + "learning_rate": 4.769369839936783e-06, + "loss": 0.5512, + "step": 9143 + }, + { + "epoch": 0.8347635566916195, + "grad_norm": 0.4647294282913208, + "learning_rate": 4.769319629012299e-06, + "loss": 0.5709, + "step": 9144 + }, + { + "epoch": 0.8348548475442761, + "grad_norm": 0.46005716919898987, + "learning_rate": 4.769269412887016e-06, + "loss": 0.5887, + "step": 9145 + }, + { + "epoch": 0.8349461383969327, + "grad_norm": 0.47818994522094727, + "learning_rate": 4.76921919156105e-06, + "loss": 0.5403, + "step": 9146 + }, + { + "epoch": 0.8350374292495892, + "grad_norm": 0.5124673247337341, + "learning_rate": 4.7691689650345166e-06, + "loss": 0.559, + "step": 9147 + }, + { + "epoch": 0.8351287201022457, + "grad_norm": 0.46904775500297546, + "learning_rate": 4.769118733307529e-06, + "loss": 0.563, + "step": 9148 + }, + { + "epoch": 0.8352200109549023, + "grad_norm": 0.4647599756717682, + "learning_rate": 4.769068496380203e-06, + "loss": 0.5912, + "step": 9149 + }, + { + "epoch": 0.8353113018075589, + "grad_norm": 0.5248358249664307, + "learning_rate": 4.7690182542526555e-06, + "loss": 0.5092, + "step": 9150 + }, + { + "epoch": 0.8354025926602154, + "grad_norm": 0.4850047528743744, + "learning_rate": 4.7689680069249996e-06, + "loss": 0.5388, + "step": 9151 + }, + { + "epoch": 0.835493883512872, + "grad_norm": 0.44687411189079285, + "learning_rate": 4.768917754397352e-06, + "loss": 0.5787, + "step": 9152 + }, + { + "epoch": 0.8355851743655286, + "grad_norm": 0.48079758882522583, + "learning_rate": 4.768867496669826e-06, + "loss": 0.4706, + "step": 9153 + }, + { + "epoch": 0.8356764652181852, + "grad_norm": 0.48398879170417786, + "learning_rate": 4.768817233742539e-06, + "loss": 0.5746, + "step": 9154 + }, + { + "epoch": 0.8357677560708417, + "grad_norm": 0.47099435329437256, + "learning_rate": 4.7687669656156035e-06, + "loss": 0.5492, + "step": 9155 + }, + { + "epoch": 0.8358590469234982, + "grad_norm": 0.4805298149585724, + "learning_rate": 4.768716692289137e-06, + "loss": 0.5406, + "step": 9156 + }, + { + "epoch": 0.8359503377761548, + "grad_norm": 0.49708086252212524, + "learning_rate": 4.768666413763254e-06, + "loss": 0.5367, + "step": 9157 + }, + { + "epoch": 0.8360416286288114, + "grad_norm": 0.5176053047180176, + "learning_rate": 4.768616130038069e-06, + "loss": 0.5242, + "step": 9158 + }, + { + "epoch": 0.836132919481468, + "grad_norm": 0.4831135869026184, + "learning_rate": 4.768565841113699e-06, + "loss": 0.5757, + "step": 9159 + }, + { + "epoch": 0.8362242103341245, + "grad_norm": 0.49432799220085144, + "learning_rate": 4.768515546990257e-06, + "loss": 0.5511, + "step": 9160 + }, + { + "epoch": 0.8363155011867811, + "grad_norm": 0.5040497779846191, + "learning_rate": 4.768465247667861e-06, + "loss": 0.5435, + "step": 9161 + }, + { + "epoch": 0.8364067920394377, + "grad_norm": 0.4656459093093872, + "learning_rate": 4.768414943146623e-06, + "loss": 0.578, + "step": 9162 + }, + { + "epoch": 0.8364980828920943, + "grad_norm": 0.47041577100753784, + "learning_rate": 4.7683646334266605e-06, + "loss": 0.5827, + "step": 9163 + }, + { + "epoch": 0.8365893737447507, + "grad_norm": 0.47777438163757324, + "learning_rate": 4.768314318508088e-06, + "loss": 0.5509, + "step": 9164 + }, + { + "epoch": 0.8366806645974073, + "grad_norm": 0.5055814385414124, + "learning_rate": 4.7682639983910215e-06, + "loss": 0.4954, + "step": 9165 + }, + { + "epoch": 0.8367719554500639, + "grad_norm": 0.45233336091041565, + "learning_rate": 4.768213673075576e-06, + "loss": 0.5917, + "step": 9166 + }, + { + "epoch": 0.8368632463027205, + "grad_norm": 0.5045900940895081, + "learning_rate": 4.768163342561866e-06, + "loss": 0.5401, + "step": 9167 + }, + { + "epoch": 0.836954537155377, + "grad_norm": 0.4860118329524994, + "learning_rate": 4.7681130068500095e-06, + "loss": 0.5735, + "step": 9168 + }, + { + "epoch": 0.8370458280080336, + "grad_norm": 0.4809049367904663, + "learning_rate": 4.768062665940118e-06, + "loss": 0.5891, + "step": 9169 + }, + { + "epoch": 0.8371371188606902, + "grad_norm": 0.47868919372558594, + "learning_rate": 4.768012319832309e-06, + "loss": 0.5647, + "step": 9170 + }, + { + "epoch": 0.8372284097133468, + "grad_norm": 0.4932987689971924, + "learning_rate": 4.767961968526699e-06, + "loss": 0.5597, + "step": 9171 + }, + { + "epoch": 0.8373197005660032, + "grad_norm": 0.47906726598739624, + "learning_rate": 4.7679116120234e-06, + "loss": 0.5139, + "step": 9172 + }, + { + "epoch": 0.8374109914186598, + "grad_norm": 0.5141772031784058, + "learning_rate": 4.767861250322531e-06, + "loss": 0.5122, + "step": 9173 + }, + { + "epoch": 0.8375022822713164, + "grad_norm": 0.4859289526939392, + "learning_rate": 4.767810883424205e-06, + "loss": 0.5778, + "step": 9174 + }, + { + "epoch": 0.837593573123973, + "grad_norm": 0.476966917514801, + "learning_rate": 4.7677605113285394e-06, + "loss": 0.6064, + "step": 9175 + }, + { + "epoch": 0.8376848639766296, + "grad_norm": 0.5047740936279297, + "learning_rate": 4.767710134035649e-06, + "loss": 0.5106, + "step": 9176 + }, + { + "epoch": 0.8377761548292861, + "grad_norm": 0.517903745174408, + "learning_rate": 4.767659751545647e-06, + "loss": 0.4942, + "step": 9177 + }, + { + "epoch": 0.8378674456819427, + "grad_norm": 0.47790977358818054, + "learning_rate": 4.767609363858652e-06, + "loss": 0.5569, + "step": 9178 + }, + { + "epoch": 0.8379587365345992, + "grad_norm": 0.46844762563705444, + "learning_rate": 4.767558970974777e-06, + "loss": 0.5396, + "step": 9179 + }, + { + "epoch": 0.8380500273872558, + "grad_norm": 0.4656146168708801, + "learning_rate": 4.7675085728941405e-06, + "loss": 0.5074, + "step": 9180 + }, + { + "epoch": 0.8381413182399123, + "grad_norm": 0.48111194372177124, + "learning_rate": 4.767458169616855e-06, + "loss": 0.5492, + "step": 9181 + }, + { + "epoch": 0.8382326090925689, + "grad_norm": 0.46556952595710754, + "learning_rate": 4.767407761143037e-06, + "loss": 0.5278, + "step": 9182 + }, + { + "epoch": 0.8383238999452255, + "grad_norm": 0.47182294726371765, + "learning_rate": 4.767357347472803e-06, + "loss": 0.596, + "step": 9183 + }, + { + "epoch": 0.8384151907978821, + "grad_norm": 0.4932655096054077, + "learning_rate": 4.767306928606267e-06, + "loss": 0.5633, + "step": 9184 + }, + { + "epoch": 0.8385064816505386, + "grad_norm": 0.4862940013408661, + "learning_rate": 4.767256504543546e-06, + "loss": 0.5317, + "step": 9185 + }, + { + "epoch": 0.8385977725031952, + "grad_norm": 0.5141140222549438, + "learning_rate": 4.767206075284755e-06, + "loss": 0.518, + "step": 9186 + }, + { + "epoch": 0.8386890633558517, + "grad_norm": 0.4870923161506653, + "learning_rate": 4.767155640830008e-06, + "loss": 0.585, + "step": 9187 + }, + { + "epoch": 0.8387803542085083, + "grad_norm": 0.5108051300048828, + "learning_rate": 4.767105201179423e-06, + "loss": 0.5114, + "step": 9188 + }, + { + "epoch": 0.8388716450611649, + "grad_norm": 0.45966005325317383, + "learning_rate": 4.767054756333115e-06, + "loss": 0.6009, + "step": 9189 + }, + { + "epoch": 0.8389629359138214, + "grad_norm": 0.4635809063911438, + "learning_rate": 4.7670043062911985e-06, + "loss": 0.5919, + "step": 9190 + }, + { + "epoch": 0.839054226766478, + "grad_norm": 0.4718068242073059, + "learning_rate": 4.766953851053791e-06, + "loss": 0.561, + "step": 9191 + }, + { + "epoch": 0.8391455176191346, + "grad_norm": 0.4767955243587494, + "learning_rate": 4.766903390621005e-06, + "loss": 0.5525, + "step": 9192 + }, + { + "epoch": 0.8392368084717912, + "grad_norm": 0.46326005458831787, + "learning_rate": 4.76685292499296e-06, + "loss": 0.5606, + "step": 9193 + }, + { + "epoch": 0.8393280993244477, + "grad_norm": 0.47737520933151245, + "learning_rate": 4.766802454169769e-06, + "loss": 0.56, + "step": 9194 + }, + { + "epoch": 0.8394193901771042, + "grad_norm": 0.4800577759742737, + "learning_rate": 4.7667519781515485e-06, + "loss": 0.5428, + "step": 9195 + }, + { + "epoch": 0.8395106810297608, + "grad_norm": 0.483751505613327, + "learning_rate": 4.766701496938414e-06, + "loss": 0.5416, + "step": 9196 + }, + { + "epoch": 0.8396019718824174, + "grad_norm": 0.48667243123054504, + "learning_rate": 4.766651010530481e-06, + "loss": 0.545, + "step": 9197 + }, + { + "epoch": 0.8396932627350739, + "grad_norm": 0.4960426092147827, + "learning_rate": 4.766600518927866e-06, + "loss": 0.5705, + "step": 9198 + }, + { + "epoch": 0.8397845535877305, + "grad_norm": 0.4637939929962158, + "learning_rate": 4.766550022130684e-06, + "loss": 0.55, + "step": 9199 + }, + { + "epoch": 0.8398758444403871, + "grad_norm": 0.4858308732509613, + "learning_rate": 4.7664995201390514e-06, + "loss": 0.5045, + "step": 9200 + }, + { + "epoch": 0.8399671352930437, + "grad_norm": 0.49691885709762573, + "learning_rate": 4.7664490129530825e-06, + "loss": 0.5512, + "step": 9201 + }, + { + "epoch": 0.8400584261457003, + "grad_norm": 0.5006582736968994, + "learning_rate": 4.766398500572895e-06, + "loss": 0.5085, + "step": 9202 + }, + { + "epoch": 0.8401497169983567, + "grad_norm": 0.44985535740852356, + "learning_rate": 4.766347982998604e-06, + "loss": 0.5479, + "step": 9203 + }, + { + "epoch": 0.8402410078510133, + "grad_norm": 0.4936105012893677, + "learning_rate": 4.7662974602303245e-06, + "loss": 0.5237, + "step": 9204 + }, + { + "epoch": 0.8403322987036699, + "grad_norm": 0.49786320328712463, + "learning_rate": 4.766246932268173e-06, + "loss": 0.4966, + "step": 9205 + }, + { + "epoch": 0.8404235895563265, + "grad_norm": 0.4733334183692932, + "learning_rate": 4.7661963991122644e-06, + "loss": 0.5461, + "step": 9206 + }, + { + "epoch": 0.840514880408983, + "grad_norm": 0.4797079265117645, + "learning_rate": 4.766145860762716e-06, + "loss": 0.5431, + "step": 9207 + }, + { + "epoch": 0.8406061712616396, + "grad_norm": 0.4809713065624237, + "learning_rate": 4.766095317219642e-06, + "loss": 0.5445, + "step": 9208 + }, + { + "epoch": 0.8406974621142962, + "grad_norm": 0.47316211462020874, + "learning_rate": 4.76604476848316e-06, + "loss": 0.569, + "step": 9209 + }, + { + "epoch": 0.8407887529669527, + "grad_norm": 0.4545428454875946, + "learning_rate": 4.765994214553385e-06, + "loss": 0.5594, + "step": 9210 + }, + { + "epoch": 0.8408800438196092, + "grad_norm": 0.4762365221977234, + "learning_rate": 4.765943655430432e-06, + "loss": 0.5642, + "step": 9211 + }, + { + "epoch": 0.8409713346722658, + "grad_norm": 0.4678126275539398, + "learning_rate": 4.765893091114417e-06, + "loss": 0.5763, + "step": 9212 + }, + { + "epoch": 0.8410626255249224, + "grad_norm": 0.46606704592704773, + "learning_rate": 4.765842521605458e-06, + "loss": 0.5621, + "step": 9213 + }, + { + "epoch": 0.841153916377579, + "grad_norm": 0.47133731842041016, + "learning_rate": 4.765791946903669e-06, + "loss": 0.5322, + "step": 9214 + }, + { + "epoch": 0.8412452072302355, + "grad_norm": 0.5033503770828247, + "learning_rate": 4.7657413670091655e-06, + "loss": 0.5133, + "step": 9215 + }, + { + "epoch": 0.8413364980828921, + "grad_norm": 0.47360628843307495, + "learning_rate": 4.765690781922065e-06, + "loss": 0.542, + "step": 9216 + }, + { + "epoch": 0.8414277889355487, + "grad_norm": 0.43956905603408813, + "learning_rate": 4.7656401916424825e-06, + "loss": 0.5682, + "step": 9217 + }, + { + "epoch": 0.8415190797882052, + "grad_norm": 0.5169616937637329, + "learning_rate": 4.7655895961705345e-06, + "loss": 0.5562, + "step": 9218 + }, + { + "epoch": 0.8416103706408617, + "grad_norm": 0.4688188433647156, + "learning_rate": 4.765538995506336e-06, + "loss": 0.5478, + "step": 9219 + }, + { + "epoch": 0.8417016614935183, + "grad_norm": 0.49535566568374634, + "learning_rate": 4.765488389650003e-06, + "loss": 0.5323, + "step": 9220 + }, + { + "epoch": 0.8417929523461749, + "grad_norm": 0.5065163969993591, + "learning_rate": 4.765437778601654e-06, + "loss": 0.5581, + "step": 9221 + }, + { + "epoch": 0.8418842431988315, + "grad_norm": 0.4581555128097534, + "learning_rate": 4.765387162361401e-06, + "loss": 0.5597, + "step": 9222 + }, + { + "epoch": 0.8419755340514881, + "grad_norm": 0.510223925113678, + "learning_rate": 4.765336540929363e-06, + "loss": 0.5404, + "step": 9223 + }, + { + "epoch": 0.8420668249041446, + "grad_norm": 0.46829986572265625, + "learning_rate": 4.765285914305655e-06, + "loss": 0.5604, + "step": 9224 + }, + { + "epoch": 0.8421581157568012, + "grad_norm": 0.4814192056655884, + "learning_rate": 4.765235282490393e-06, + "loss": 0.5828, + "step": 9225 + }, + { + "epoch": 0.8422494066094577, + "grad_norm": 0.5009490847587585, + "learning_rate": 4.765184645483693e-06, + "loss": 0.5242, + "step": 9226 + }, + { + "epoch": 0.8423406974621143, + "grad_norm": 0.44762367010116577, + "learning_rate": 4.7651340032856705e-06, + "loss": 0.5552, + "step": 9227 + }, + { + "epoch": 0.8424319883147708, + "grad_norm": 0.46575725078582764, + "learning_rate": 4.765083355896442e-06, + "loss": 0.5585, + "step": 9228 + }, + { + "epoch": 0.8425232791674274, + "grad_norm": 0.4785533547401428, + "learning_rate": 4.765032703316124e-06, + "loss": 0.5409, + "step": 9229 + }, + { + "epoch": 0.842614570020084, + "grad_norm": 0.5008764266967773, + "learning_rate": 4.764982045544834e-06, + "loss": 0.5657, + "step": 9230 + }, + { + "epoch": 0.8427058608727406, + "grad_norm": 0.482276976108551, + "learning_rate": 4.7649313825826846e-06, + "loss": 0.5221, + "step": 9231 + }, + { + "epoch": 0.8427971517253972, + "grad_norm": 0.4821878671646118, + "learning_rate": 4.764880714429794e-06, + "loss": 0.536, + "step": 9232 + }, + { + "epoch": 0.8428884425780537, + "grad_norm": 0.452818363904953, + "learning_rate": 4.7648300410862785e-06, + "loss": 0.5731, + "step": 9233 + }, + { + "epoch": 0.8429797334307102, + "grad_norm": 0.47594913840293884, + "learning_rate": 4.764779362552254e-06, + "loss": 0.6003, + "step": 9234 + }, + { + "epoch": 0.8430710242833668, + "grad_norm": 0.4902011752128601, + "learning_rate": 4.764728678827836e-06, + "loss": 0.5319, + "step": 9235 + }, + { + "epoch": 0.8431623151360234, + "grad_norm": 0.4917480945587158, + "learning_rate": 4.764677989913141e-06, + "loss": 0.5504, + "step": 9236 + }, + { + "epoch": 0.8432536059886799, + "grad_norm": 0.4921296536922455, + "learning_rate": 4.764627295808286e-06, + "loss": 0.52, + "step": 9237 + }, + { + "epoch": 0.8433448968413365, + "grad_norm": 0.4611067473888397, + "learning_rate": 4.764576596513385e-06, + "loss": 0.5639, + "step": 9238 + }, + { + "epoch": 0.8434361876939931, + "grad_norm": 0.4500282108783722, + "learning_rate": 4.764525892028558e-06, + "loss": 0.5595, + "step": 9239 + }, + { + "epoch": 0.8435274785466497, + "grad_norm": 0.4641340970993042, + "learning_rate": 4.764475182353917e-06, + "loss": 0.5465, + "step": 9240 + }, + { + "epoch": 0.8436187693993061, + "grad_norm": 0.484530508518219, + "learning_rate": 4.76442446748958e-06, + "loss": 0.5539, + "step": 9241 + }, + { + "epoch": 0.8437100602519627, + "grad_norm": 0.49285396933555603, + "learning_rate": 4.764373747435664e-06, + "loss": 0.5588, + "step": 9242 + }, + { + "epoch": 0.8438013511046193, + "grad_norm": 0.4900442361831665, + "learning_rate": 4.764323022192284e-06, + "loss": 0.5667, + "step": 9243 + }, + { + "epoch": 0.8438926419572759, + "grad_norm": 0.48318004608154297, + "learning_rate": 4.764272291759557e-06, + "loss": 0.5389, + "step": 9244 + }, + { + "epoch": 0.8439839328099324, + "grad_norm": 0.486397922039032, + "learning_rate": 4.7642215561375996e-06, + "loss": 0.5129, + "step": 9245 + }, + { + "epoch": 0.844075223662589, + "grad_norm": 0.4834464192390442, + "learning_rate": 4.764170815326527e-06, + "loss": 0.5158, + "step": 9246 + }, + { + "epoch": 0.8441665145152456, + "grad_norm": 0.5283904075622559, + "learning_rate": 4.7641200693264555e-06, + "loss": 0.5329, + "step": 9247 + }, + { + "epoch": 0.8442578053679022, + "grad_norm": 0.4653095304965973, + "learning_rate": 4.764069318137503e-06, + "loss": 0.5543, + "step": 9248 + }, + { + "epoch": 0.8443490962205586, + "grad_norm": 0.4916286766529083, + "learning_rate": 4.764018561759784e-06, + "loss": 0.521, + "step": 9249 + }, + { + "epoch": 0.8444403870732152, + "grad_norm": 0.4871203601360321, + "learning_rate": 4.7639678001934154e-06, + "loss": 0.6023, + "step": 9250 + }, + { + "epoch": 0.8445316779258718, + "grad_norm": 0.491510808467865, + "learning_rate": 4.7639170334385146e-06, + "loss": 0.5049, + "step": 9251 + }, + { + "epoch": 0.8446229687785284, + "grad_norm": 0.49258407950401306, + "learning_rate": 4.7638662614951955e-06, + "loss": 0.4971, + "step": 9252 + }, + { + "epoch": 0.844714259631185, + "grad_norm": 0.49709951877593994, + "learning_rate": 4.763815484363578e-06, + "loss": 0.5318, + "step": 9253 + }, + { + "epoch": 0.8448055504838415, + "grad_norm": 0.47054725885391235, + "learning_rate": 4.763764702043775e-06, + "loss": 0.5541, + "step": 9254 + }, + { + "epoch": 0.8448968413364981, + "grad_norm": 0.4632517695426941, + "learning_rate": 4.763713914535904e-06, + "loss": 0.511, + "step": 9255 + }, + { + "epoch": 0.8449881321891547, + "grad_norm": 0.4814455807209015, + "learning_rate": 4.763663121840083e-06, + "loss": 0.5332, + "step": 9256 + }, + { + "epoch": 0.8450794230418112, + "grad_norm": 0.5216931700706482, + "learning_rate": 4.763612323956427e-06, + "loss": 0.5452, + "step": 9257 + }, + { + "epoch": 0.8451707138944677, + "grad_norm": 0.480977326631546, + "learning_rate": 4.763561520885052e-06, + "loss": 0.5756, + "step": 9258 + }, + { + "epoch": 0.8452620047471243, + "grad_norm": 0.4803130626678467, + "learning_rate": 4.763510712626074e-06, + "loss": 0.5388, + "step": 9259 + }, + { + "epoch": 0.8453532955997809, + "grad_norm": 0.5003547072410583, + "learning_rate": 4.763459899179612e-06, + "loss": 0.5368, + "step": 9260 + }, + { + "epoch": 0.8454445864524375, + "grad_norm": 0.4915032982826233, + "learning_rate": 4.76340908054578e-06, + "loss": 0.5224, + "step": 9261 + }, + { + "epoch": 0.845535877305094, + "grad_norm": 0.5004127621650696, + "learning_rate": 4.763358256724696e-06, + "loss": 0.5149, + "step": 9262 + }, + { + "epoch": 0.8456271681577506, + "grad_norm": 0.48152005672454834, + "learning_rate": 4.763307427716475e-06, + "loss": 0.5581, + "step": 9263 + }, + { + "epoch": 0.8457184590104072, + "grad_norm": 0.5089053511619568, + "learning_rate": 4.763256593521235e-06, + "loss": 0.5217, + "step": 9264 + }, + { + "epoch": 0.8458097498630637, + "grad_norm": 0.4990546703338623, + "learning_rate": 4.763205754139092e-06, + "loss": 0.5253, + "step": 9265 + }, + { + "epoch": 0.8459010407157203, + "grad_norm": 0.4961698651313782, + "learning_rate": 4.763154909570161e-06, + "loss": 0.5517, + "step": 9266 + }, + { + "epoch": 0.8459923315683768, + "grad_norm": 0.4679529666900635, + "learning_rate": 4.763104059814562e-06, + "loss": 0.5513, + "step": 9267 + }, + { + "epoch": 0.8460836224210334, + "grad_norm": 0.4931298494338989, + "learning_rate": 4.763053204872408e-06, + "loss": 0.5368, + "step": 9268 + }, + { + "epoch": 0.84617491327369, + "grad_norm": 0.45106831192970276, + "learning_rate": 4.763002344743817e-06, + "loss": 0.5554, + "step": 9269 + }, + { + "epoch": 0.8462662041263466, + "grad_norm": 0.47774162888526917, + "learning_rate": 4.762951479428906e-06, + "loss": 0.5201, + "step": 9270 + }, + { + "epoch": 0.8463574949790031, + "grad_norm": 0.473627507686615, + "learning_rate": 4.762900608927789e-06, + "loss": 0.584, + "step": 9271 + }, + { + "epoch": 0.8464487858316597, + "grad_norm": 0.5071237683296204, + "learning_rate": 4.762849733240587e-06, + "loss": 0.4877, + "step": 9272 + }, + { + "epoch": 0.8465400766843162, + "grad_norm": 0.4903643727302551, + "learning_rate": 4.762798852367413e-06, + "loss": 0.5033, + "step": 9273 + }, + { + "epoch": 0.8466313675369728, + "grad_norm": 0.49746817350387573, + "learning_rate": 4.762747966308386e-06, + "loss": 0.5828, + "step": 9274 + }, + { + "epoch": 0.8467226583896293, + "grad_norm": 0.4865061640739441, + "learning_rate": 4.762697075063621e-06, + "loss": 0.5522, + "step": 9275 + }, + { + "epoch": 0.8468139492422859, + "grad_norm": 0.4720616936683655, + "learning_rate": 4.762646178633235e-06, + "loss": 0.5723, + "step": 9276 + }, + { + "epoch": 0.8469052400949425, + "grad_norm": 0.5098199844360352, + "learning_rate": 4.762595277017344e-06, + "loss": 0.5395, + "step": 9277 + }, + { + "epoch": 0.8469965309475991, + "grad_norm": 0.4690760374069214, + "learning_rate": 4.762544370216067e-06, + "loss": 0.5593, + "step": 9278 + }, + { + "epoch": 0.8470878218002557, + "grad_norm": 0.5248705148696899, + "learning_rate": 4.762493458229518e-06, + "loss": 0.518, + "step": 9279 + }, + { + "epoch": 0.8471791126529121, + "grad_norm": 0.4847465753555298, + "learning_rate": 4.762442541057815e-06, + "loss": 0.5479, + "step": 9280 + }, + { + "epoch": 0.8472704035055687, + "grad_norm": 0.4889717400074005, + "learning_rate": 4.762391618701074e-06, + "loss": 0.5654, + "step": 9281 + }, + { + "epoch": 0.8473616943582253, + "grad_norm": 0.4804093539714813, + "learning_rate": 4.762340691159413e-06, + "loss": 0.528, + "step": 9282 + }, + { + "epoch": 0.8474529852108819, + "grad_norm": 0.4808216691017151, + "learning_rate": 4.762289758432947e-06, + "loss": 0.5292, + "step": 9283 + }, + { + "epoch": 0.8475442760635384, + "grad_norm": 0.4886792004108429, + "learning_rate": 4.762238820521794e-06, + "loss": 0.5269, + "step": 9284 + }, + { + "epoch": 0.847635566916195, + "grad_norm": 0.48032644391059875, + "learning_rate": 4.76218787742607e-06, + "loss": 0.586, + "step": 9285 + }, + { + "epoch": 0.8477268577688516, + "grad_norm": 0.43777865171432495, + "learning_rate": 4.762136929145893e-06, + "loss": 0.5641, + "step": 9286 + }, + { + "epoch": 0.8478181486215082, + "grad_norm": 0.4765686094760895, + "learning_rate": 4.762085975681377e-06, + "loss": 0.5417, + "step": 9287 + }, + { + "epoch": 0.8479094394741646, + "grad_norm": 0.46093302965164185, + "learning_rate": 4.762035017032643e-06, + "loss": 0.5232, + "step": 9288 + }, + { + "epoch": 0.8480007303268212, + "grad_norm": 0.4974121153354645, + "learning_rate": 4.761984053199804e-06, + "loss": 0.4897, + "step": 9289 + }, + { + "epoch": 0.8480920211794778, + "grad_norm": 0.507895290851593, + "learning_rate": 4.761933084182978e-06, + "loss": 0.5317, + "step": 9290 + }, + { + "epoch": 0.8481833120321344, + "grad_norm": 0.4996841549873352, + "learning_rate": 4.761882109982283e-06, + "loss": 0.5097, + "step": 9291 + }, + { + "epoch": 0.848274602884791, + "grad_norm": 0.4746413230895996, + "learning_rate": 4.761831130597834e-06, + "loss": 0.5507, + "step": 9292 + }, + { + "epoch": 0.8483658937374475, + "grad_norm": 0.4882107079029083, + "learning_rate": 4.761780146029748e-06, + "loss": 0.505, + "step": 9293 + }, + { + "epoch": 0.8484571845901041, + "grad_norm": 0.4625614285469055, + "learning_rate": 4.761729156278144e-06, + "loss": 0.5574, + "step": 9294 + }, + { + "epoch": 0.8485484754427607, + "grad_norm": 0.4972177743911743, + "learning_rate": 4.761678161343138e-06, + "loss": 0.562, + "step": 9295 + }, + { + "epoch": 0.8486397662954172, + "grad_norm": 0.4870285391807556, + "learning_rate": 4.761627161224844e-06, + "loss": 0.5318, + "step": 9296 + }, + { + "epoch": 0.8487310571480737, + "grad_norm": 0.5430012345314026, + "learning_rate": 4.761576155923382e-06, + "loss": 0.497, + "step": 9297 + }, + { + "epoch": 0.8488223480007303, + "grad_norm": 0.48608386516571045, + "learning_rate": 4.761525145438869e-06, + "loss": 0.5508, + "step": 9298 + }, + { + "epoch": 0.8489136388533869, + "grad_norm": 0.4955611228942871, + "learning_rate": 4.76147412977142e-06, + "loss": 0.5268, + "step": 9299 + }, + { + "epoch": 0.8490049297060435, + "grad_norm": 0.46399906277656555, + "learning_rate": 4.761423108921153e-06, + "loss": 0.5696, + "step": 9300 + }, + { + "epoch": 0.8490962205587, + "grad_norm": 0.479390025138855, + "learning_rate": 4.761372082888184e-06, + "loss": 0.5638, + "step": 9301 + }, + { + "epoch": 0.8491875114113566, + "grad_norm": 0.49369606375694275, + "learning_rate": 4.761321051672631e-06, + "loss": 0.5025, + "step": 9302 + }, + { + "epoch": 0.8492788022640132, + "grad_norm": 0.47570478916168213, + "learning_rate": 4.761270015274611e-06, + "loss": 0.5819, + "step": 9303 + }, + { + "epoch": 0.8493700931166697, + "grad_norm": 0.48985064029693604, + "learning_rate": 4.761218973694241e-06, + "loss": 0.5579, + "step": 9304 + }, + { + "epoch": 0.8494613839693262, + "grad_norm": 0.5355318784713745, + "learning_rate": 4.761167926931638e-06, + "loss": 0.4834, + "step": 9305 + }, + { + "epoch": 0.8495526748219828, + "grad_norm": 0.5184978246688843, + "learning_rate": 4.761116874986917e-06, + "loss": 0.5252, + "step": 9306 + }, + { + "epoch": 0.8496439656746394, + "grad_norm": 0.47709912061691284, + "learning_rate": 4.761065817860198e-06, + "loss": 0.527, + "step": 9307 + }, + { + "epoch": 0.849735256527296, + "grad_norm": 0.49624645709991455, + "learning_rate": 4.761014755551596e-06, + "loss": 0.5837, + "step": 9308 + }, + { + "epoch": 0.8498265473799526, + "grad_norm": 0.4511374235153198, + "learning_rate": 4.760963688061229e-06, + "loss": 0.567, + "step": 9309 + }, + { + "epoch": 0.8499178382326091, + "grad_norm": 0.4863639771938324, + "learning_rate": 4.7609126153892135e-06, + "loss": 0.5197, + "step": 9310 + }, + { + "epoch": 0.8500091290852656, + "grad_norm": 0.47443056106567383, + "learning_rate": 4.760861537535666e-06, + "loss": 0.5243, + "step": 9311 + }, + { + "epoch": 0.8501004199379222, + "grad_norm": 0.49592697620391846, + "learning_rate": 4.760810454500705e-06, + "loss": 0.5358, + "step": 9312 + }, + { + "epoch": 0.8501917107905788, + "grad_norm": 0.4750049412250519, + "learning_rate": 4.760759366284447e-06, + "loss": 0.5765, + "step": 9313 + }, + { + "epoch": 0.8502830016432353, + "grad_norm": 0.49842461943626404, + "learning_rate": 4.760708272887008e-06, + "loss": 0.5693, + "step": 9314 + }, + { + "epoch": 0.8503742924958919, + "grad_norm": 0.5019801259040833, + "learning_rate": 4.760657174308507e-06, + "loss": 0.5376, + "step": 9315 + }, + { + "epoch": 0.8504655833485485, + "grad_norm": 0.4613717794418335, + "learning_rate": 4.7606060705490595e-06, + "loss": 0.5797, + "step": 9316 + }, + { + "epoch": 0.8505568742012051, + "grad_norm": 0.46814414858818054, + "learning_rate": 4.760554961608783e-06, + "loss": 0.5761, + "step": 9317 + }, + { + "epoch": 0.8506481650538616, + "grad_norm": 0.4790698289871216, + "learning_rate": 4.760503847487796e-06, + "loss": 0.5465, + "step": 9318 + }, + { + "epoch": 0.8507394559065181, + "grad_norm": 0.4825998544692993, + "learning_rate": 4.7604527281862135e-06, + "loss": 0.522, + "step": 9319 + }, + { + "epoch": 0.8508307467591747, + "grad_norm": 0.4826820492744446, + "learning_rate": 4.760401603704155e-06, + "loss": 0.5292, + "step": 9320 + }, + { + "epoch": 0.8509220376118313, + "grad_norm": 0.4802405834197998, + "learning_rate": 4.760350474041735e-06, + "loss": 0.5456, + "step": 9321 + }, + { + "epoch": 0.8510133284644879, + "grad_norm": 0.476645827293396, + "learning_rate": 4.7602993391990725e-06, + "loss": 0.5501, + "step": 9322 + }, + { + "epoch": 0.8511046193171444, + "grad_norm": 0.5008077621459961, + "learning_rate": 4.760248199176284e-06, + "loss": 0.5649, + "step": 9323 + }, + { + "epoch": 0.851195910169801, + "grad_norm": 0.47815191745758057, + "learning_rate": 4.7601970539734865e-06, + "loss": 0.5254, + "step": 9324 + }, + { + "epoch": 0.8512872010224576, + "grad_norm": 0.45676201581954956, + "learning_rate": 4.760145903590798e-06, + "loss": 0.5802, + "step": 9325 + }, + { + "epoch": 0.8513784918751142, + "grad_norm": 0.5045804381370544, + "learning_rate": 4.760094748028335e-06, + "loss": 0.5063, + "step": 9326 + }, + { + "epoch": 0.8514697827277706, + "grad_norm": 0.4904516041278839, + "learning_rate": 4.760043587286216e-06, + "loss": 0.5509, + "step": 9327 + }, + { + "epoch": 0.8515610735804272, + "grad_norm": 0.4906446635723114, + "learning_rate": 4.759992421364557e-06, + "loss": 0.5222, + "step": 9328 + }, + { + "epoch": 0.8516523644330838, + "grad_norm": 0.49399831891059875, + "learning_rate": 4.759941250263476e-06, + "loss": 0.5799, + "step": 9329 + }, + { + "epoch": 0.8517436552857404, + "grad_norm": 0.4556521475315094, + "learning_rate": 4.759890073983089e-06, + "loss": 0.5911, + "step": 9330 + }, + { + "epoch": 0.8518349461383969, + "grad_norm": 0.4821110665798187, + "learning_rate": 4.7598388925235136e-06, + "loss": 0.5542, + "step": 9331 + }, + { + "epoch": 0.8519262369910535, + "grad_norm": 0.4664517641067505, + "learning_rate": 4.759787705884869e-06, + "loss": 0.5568, + "step": 9332 + }, + { + "epoch": 0.8520175278437101, + "grad_norm": 0.4885886311531067, + "learning_rate": 4.759736514067271e-06, + "loss": 0.5334, + "step": 9333 + }, + { + "epoch": 0.8521088186963667, + "grad_norm": 0.4483821988105774, + "learning_rate": 4.759685317070837e-06, + "loss": 0.5976, + "step": 9334 + }, + { + "epoch": 0.8522001095490231, + "grad_norm": 0.5006715655326843, + "learning_rate": 4.759634114895684e-06, + "loss": 0.5325, + "step": 9335 + }, + { + "epoch": 0.8522914004016797, + "grad_norm": 0.48726606369018555, + "learning_rate": 4.75958290754193e-06, + "loss": 0.5369, + "step": 9336 + }, + { + "epoch": 0.8523826912543363, + "grad_norm": 0.47809019684791565, + "learning_rate": 4.759531695009692e-06, + "loss": 0.5593, + "step": 9337 + }, + { + "epoch": 0.8524739821069929, + "grad_norm": 0.4949924647808075, + "learning_rate": 4.7594804772990875e-06, + "loss": 0.5158, + "step": 9338 + }, + { + "epoch": 0.8525652729596495, + "grad_norm": 0.460493803024292, + "learning_rate": 4.7594292544102335e-06, + "loss": 0.5642, + "step": 9339 + }, + { + "epoch": 0.852656563812306, + "grad_norm": 0.4830491244792938, + "learning_rate": 4.7593780263432485e-06, + "loss": 0.5393, + "step": 9340 + }, + { + "epoch": 0.8527478546649626, + "grad_norm": 0.45005685091018677, + "learning_rate": 4.759326793098249e-06, + "loss": 0.5871, + "step": 9341 + }, + { + "epoch": 0.8528391455176191, + "grad_norm": 0.4991980195045471, + "learning_rate": 4.7592755546753524e-06, + "loss": 0.5635, + "step": 9342 + }, + { + "epoch": 0.8529304363702757, + "grad_norm": 0.5371206402778625, + "learning_rate": 4.759224311074676e-06, + "loss": 0.514, + "step": 9343 + }, + { + "epoch": 0.8530217272229322, + "grad_norm": 0.524218738079071, + "learning_rate": 4.759173062296339e-06, + "loss": 0.5109, + "step": 9344 + }, + { + "epoch": 0.8531130180755888, + "grad_norm": 0.47218069434165955, + "learning_rate": 4.759121808340456e-06, + "loss": 0.552, + "step": 9345 + }, + { + "epoch": 0.8532043089282454, + "grad_norm": 0.4913916289806366, + "learning_rate": 4.759070549207146e-06, + "loss": 0.5291, + "step": 9346 + }, + { + "epoch": 0.853295599780902, + "grad_norm": 0.49881696701049805, + "learning_rate": 4.7590192848965265e-06, + "loss": 0.514, + "step": 9347 + }, + { + "epoch": 0.8533868906335585, + "grad_norm": 0.47502556443214417, + "learning_rate": 4.758968015408714e-06, + "loss": 0.5826, + "step": 9348 + }, + { + "epoch": 0.8534781814862151, + "grad_norm": 0.514914870262146, + "learning_rate": 4.7589167407438275e-06, + "loss": 0.5444, + "step": 9349 + }, + { + "epoch": 0.8535694723388716, + "grad_norm": 0.4645446240901947, + "learning_rate": 4.758865460901985e-06, + "loss": 0.5504, + "step": 9350 + }, + { + "epoch": 0.8536607631915282, + "grad_norm": 0.455588698387146, + "learning_rate": 4.758814175883301e-06, + "loss": 0.5991, + "step": 9351 + }, + { + "epoch": 0.8537520540441847, + "grad_norm": 0.49366527795791626, + "learning_rate": 4.758762885687895e-06, + "loss": 0.5659, + "step": 9352 + }, + { + "epoch": 0.8538433448968413, + "grad_norm": 0.4574674367904663, + "learning_rate": 4.7587115903158855e-06, + "loss": 0.5472, + "step": 9353 + }, + { + "epoch": 0.8539346357494979, + "grad_norm": 0.4697208106517792, + "learning_rate": 4.758660289767389e-06, + "loss": 0.5485, + "step": 9354 + }, + { + "epoch": 0.8540259266021545, + "grad_norm": 0.48891469836235046, + "learning_rate": 4.758608984042522e-06, + "loss": 0.5921, + "step": 9355 + }, + { + "epoch": 0.8541172174548111, + "grad_norm": 0.48352113366127014, + "learning_rate": 4.758557673141404e-06, + "loss": 0.5559, + "step": 9356 + }, + { + "epoch": 0.8542085083074676, + "grad_norm": 0.4979238510131836, + "learning_rate": 4.758506357064151e-06, + "loss": 0.5694, + "step": 9357 + }, + { + "epoch": 0.8542997991601241, + "grad_norm": 0.5179343223571777, + "learning_rate": 4.758455035810883e-06, + "loss": 0.5737, + "step": 9358 + }, + { + "epoch": 0.8543910900127807, + "grad_norm": 0.5137401819229126, + "learning_rate": 4.758403709381714e-06, + "loss": 0.5138, + "step": 9359 + }, + { + "epoch": 0.8544823808654373, + "grad_norm": 0.4981100559234619, + "learning_rate": 4.758352377776765e-06, + "loss": 0.5263, + "step": 9360 + }, + { + "epoch": 0.8545736717180938, + "grad_norm": 0.4987539052963257, + "learning_rate": 4.758301040996152e-06, + "loss": 0.494, + "step": 9361 + }, + { + "epoch": 0.8546649625707504, + "grad_norm": 0.4681205749511719, + "learning_rate": 4.758249699039991e-06, + "loss": 0.5592, + "step": 9362 + }, + { + "epoch": 0.854756253423407, + "grad_norm": 0.4775250554084778, + "learning_rate": 4.758198351908404e-06, + "loss": 0.5488, + "step": 9363 + }, + { + "epoch": 0.8548475442760636, + "grad_norm": 0.46008023619651794, + "learning_rate": 4.758146999601505e-06, + "loss": 0.5788, + "step": 9364 + }, + { + "epoch": 0.8549388351287202, + "grad_norm": 0.4731887876987457, + "learning_rate": 4.758095642119413e-06, + "loss": 0.5496, + "step": 9365 + }, + { + "epoch": 0.8550301259813766, + "grad_norm": 0.4866650104522705, + "learning_rate": 4.7580442794622464e-06, + "loss": 0.5549, + "step": 9366 + }, + { + "epoch": 0.8551214168340332, + "grad_norm": 0.46407726407051086, + "learning_rate": 4.757992911630121e-06, + "loss": 0.5369, + "step": 9367 + }, + { + "epoch": 0.8552127076866898, + "grad_norm": 0.4785013794898987, + "learning_rate": 4.7579415386231565e-06, + "loss": 0.5465, + "step": 9368 + }, + { + "epoch": 0.8553039985393464, + "grad_norm": 0.4796757102012634, + "learning_rate": 4.757890160441469e-06, + "loss": 0.5585, + "step": 9369 + }, + { + "epoch": 0.8553952893920029, + "grad_norm": 0.4768048822879791, + "learning_rate": 4.757838777085178e-06, + "loss": 0.5429, + "step": 9370 + }, + { + "epoch": 0.8554865802446595, + "grad_norm": 0.4832642376422882, + "learning_rate": 4.757787388554399e-06, + "loss": 0.5367, + "step": 9371 + }, + { + "epoch": 0.8555778710973161, + "grad_norm": 0.4651406705379486, + "learning_rate": 4.7577359948492514e-06, + "loss": 0.5797, + "step": 9372 + }, + { + "epoch": 0.8556691619499727, + "grad_norm": 0.4969429671764374, + "learning_rate": 4.757684595969854e-06, + "loss": 0.5477, + "step": 9373 + }, + { + "epoch": 0.8557604528026291, + "grad_norm": 0.5007738471031189, + "learning_rate": 4.757633191916322e-06, + "loss": 0.596, + "step": 9374 + }, + { + "epoch": 0.8558517436552857, + "grad_norm": 0.456560343503952, + "learning_rate": 4.757581782688775e-06, + "loss": 0.5779, + "step": 9375 + }, + { + "epoch": 0.8559430345079423, + "grad_norm": 0.4832362234592438, + "learning_rate": 4.7575303682873295e-06, + "loss": 0.5895, + "step": 9376 + }, + { + "epoch": 0.8560343253605989, + "grad_norm": 0.5065681338310242, + "learning_rate": 4.757478948712105e-06, + "loss": 0.5063, + "step": 9377 + }, + { + "epoch": 0.8561256162132554, + "grad_norm": 0.4730163514614105, + "learning_rate": 4.7574275239632175e-06, + "loss": 0.5543, + "step": 9378 + }, + { + "epoch": 0.856216907065912, + "grad_norm": 0.47820720076560974, + "learning_rate": 4.757376094040787e-06, + "loss": 0.5571, + "step": 9379 + }, + { + "epoch": 0.8563081979185686, + "grad_norm": 0.4809243977069855, + "learning_rate": 4.757324658944929e-06, + "loss": 0.5892, + "step": 9380 + }, + { + "epoch": 0.8563994887712251, + "grad_norm": 0.4735361337661743, + "learning_rate": 4.757273218675763e-06, + "loss": 0.5271, + "step": 9381 + }, + { + "epoch": 0.8564907796238816, + "grad_norm": 0.473947137594223, + "learning_rate": 4.757221773233407e-06, + "loss": 0.5507, + "step": 9382 + }, + { + "epoch": 0.8565820704765382, + "grad_norm": 0.46854376792907715, + "learning_rate": 4.757170322617978e-06, + "loss": 0.5188, + "step": 9383 + }, + { + "epoch": 0.8566733613291948, + "grad_norm": 0.4878682494163513, + "learning_rate": 4.757118866829594e-06, + "loss": 0.5851, + "step": 9384 + }, + { + "epoch": 0.8567646521818514, + "grad_norm": 0.47281575202941895, + "learning_rate": 4.757067405868373e-06, + "loss": 0.5716, + "step": 9385 + }, + { + "epoch": 0.856855943034508, + "grad_norm": 0.47606322169303894, + "learning_rate": 4.757015939734433e-06, + "loss": 0.5903, + "step": 9386 + }, + { + "epoch": 0.8569472338871645, + "grad_norm": 0.4743124842643738, + "learning_rate": 4.756964468427893e-06, + "loss": 0.5415, + "step": 9387 + }, + { + "epoch": 0.8570385247398211, + "grad_norm": 0.47457218170166016, + "learning_rate": 4.756912991948869e-06, + "loss": 0.5497, + "step": 9388 + }, + { + "epoch": 0.8571298155924776, + "grad_norm": 0.49058404564857483, + "learning_rate": 4.756861510297481e-06, + "loss": 0.5845, + "step": 9389 + }, + { + "epoch": 0.8572211064451342, + "grad_norm": 0.4797278940677643, + "learning_rate": 4.756810023473845e-06, + "loss": 0.5368, + "step": 9390 + }, + { + "epoch": 0.8573123972977907, + "grad_norm": 0.505166232585907, + "learning_rate": 4.756758531478081e-06, + "loss": 0.5158, + "step": 9391 + }, + { + "epoch": 0.8574036881504473, + "grad_norm": 0.5029577016830444, + "learning_rate": 4.756707034310305e-06, + "loss": 0.5322, + "step": 9392 + }, + { + "epoch": 0.8574949790031039, + "grad_norm": 0.5004286766052246, + "learning_rate": 4.7566555319706365e-06, + "loss": 0.5592, + "step": 9393 + }, + { + "epoch": 0.8575862698557605, + "grad_norm": 0.4632117748260498, + "learning_rate": 4.756604024459192e-06, + "loss": 0.5921, + "step": 9394 + }, + { + "epoch": 0.857677560708417, + "grad_norm": 0.47078946232795715, + "learning_rate": 4.756552511776093e-06, + "loss": 0.5551, + "step": 9395 + }, + { + "epoch": 0.8577688515610736, + "grad_norm": 0.4867919981479645, + "learning_rate": 4.756500993921452e-06, + "loss": 0.5503, + "step": 9396 + }, + { + "epoch": 0.8578601424137301, + "grad_norm": 0.4799707233905792, + "learning_rate": 4.756449470895393e-06, + "loss": 0.5125, + "step": 9397 + }, + { + "epoch": 0.8579514332663867, + "grad_norm": 0.48618850111961365, + "learning_rate": 4.75639794269803e-06, + "loss": 0.5436, + "step": 9398 + }, + { + "epoch": 0.8580427241190433, + "grad_norm": 0.4772762954235077, + "learning_rate": 4.756346409329482e-06, + "loss": 0.5358, + "step": 9399 + }, + { + "epoch": 0.8581340149716998, + "grad_norm": 0.46680039167404175, + "learning_rate": 4.756294870789868e-06, + "loss": 0.5751, + "step": 9400 + }, + { + "epoch": 0.8582253058243564, + "grad_norm": 0.4910690486431122, + "learning_rate": 4.756243327079306e-06, + "loss": 0.5072, + "step": 9401 + }, + { + "epoch": 0.858316596677013, + "grad_norm": 0.46024563908576965, + "learning_rate": 4.7561917781979125e-06, + "loss": 0.5575, + "step": 9402 + }, + { + "epoch": 0.8584078875296696, + "grad_norm": 0.48332729935646057, + "learning_rate": 4.7561402241458086e-06, + "loss": 0.5357, + "step": 9403 + }, + { + "epoch": 0.8584991783823261, + "grad_norm": 0.4875890612602234, + "learning_rate": 4.75608866492311e-06, + "loss": 0.534, + "step": 9404 + }, + { + "epoch": 0.8585904692349826, + "grad_norm": 0.4960979223251343, + "learning_rate": 4.7560371005299345e-06, + "loss": 0.5471, + "step": 9405 + }, + { + "epoch": 0.8586817600876392, + "grad_norm": 0.4757026135921478, + "learning_rate": 4.7559855309664024e-06, + "loss": 0.5451, + "step": 9406 + }, + { + "epoch": 0.8587730509402958, + "grad_norm": 0.5214437246322632, + "learning_rate": 4.75593395623263e-06, + "loss": 0.5601, + "step": 9407 + }, + { + "epoch": 0.8588643417929523, + "grad_norm": 0.47790512442588806, + "learning_rate": 4.755882376328738e-06, + "loss": 0.5869, + "step": 9408 + }, + { + "epoch": 0.8589556326456089, + "grad_norm": 0.47096043825149536, + "learning_rate": 4.7558307912548416e-06, + "loss": 0.5611, + "step": 9409 + }, + { + "epoch": 0.8590469234982655, + "grad_norm": 0.45388174057006836, + "learning_rate": 4.75577920101106e-06, + "loss": 0.5582, + "step": 9410 + }, + { + "epoch": 0.8591382143509221, + "grad_norm": 0.5228797793388367, + "learning_rate": 4.755727605597513e-06, + "loss": 0.5142, + "step": 9411 + }, + { + "epoch": 0.8592295052035785, + "grad_norm": 0.49362751841545105, + "learning_rate": 4.755676005014317e-06, + "loss": 0.5507, + "step": 9412 + }, + { + "epoch": 0.8593207960562351, + "grad_norm": 0.49859675765037537, + "learning_rate": 4.755624399261592e-06, + "loss": 0.5445, + "step": 9413 + }, + { + "epoch": 0.8594120869088917, + "grad_norm": 0.47101396322250366, + "learning_rate": 4.755572788339453e-06, + "loss": 0.5541, + "step": 9414 + }, + { + "epoch": 0.8595033777615483, + "grad_norm": 0.5004388093948364, + "learning_rate": 4.755521172248022e-06, + "loss": 0.5243, + "step": 9415 + }, + { + "epoch": 0.8595946686142049, + "grad_norm": 0.44851812720298767, + "learning_rate": 4.7554695509874154e-06, + "loss": 0.5649, + "step": 9416 + }, + { + "epoch": 0.8596859594668614, + "grad_norm": 0.4958984851837158, + "learning_rate": 4.755417924557752e-06, + "loss": 0.5822, + "step": 9417 + }, + { + "epoch": 0.859777250319518, + "grad_norm": 0.531606912612915, + "learning_rate": 4.75536629295915e-06, + "loss": 0.5105, + "step": 9418 + }, + { + "epoch": 0.8598685411721746, + "grad_norm": 0.4906858503818512, + "learning_rate": 4.7553146561917274e-06, + "loss": 0.5539, + "step": 9419 + }, + { + "epoch": 0.8599598320248311, + "grad_norm": 0.4846004545688629, + "learning_rate": 4.755263014255603e-06, + "loss": 0.5411, + "step": 9420 + }, + { + "epoch": 0.8600511228774876, + "grad_norm": 0.48061877489089966, + "learning_rate": 4.755211367150895e-06, + "loss": 0.5102, + "step": 9421 + }, + { + "epoch": 0.8601424137301442, + "grad_norm": 0.5076985955238342, + "learning_rate": 4.755159714877722e-06, + "loss": 0.5567, + "step": 9422 + }, + { + "epoch": 0.8602337045828008, + "grad_norm": 0.4604611098766327, + "learning_rate": 4.755108057436202e-06, + "loss": 0.5833, + "step": 9423 + }, + { + "epoch": 0.8603249954354574, + "grad_norm": 0.4560920298099518, + "learning_rate": 4.755056394826453e-06, + "loss": 0.5375, + "step": 9424 + }, + { + "epoch": 0.860416286288114, + "grad_norm": 0.4652129113674164, + "learning_rate": 4.755004727048594e-06, + "loss": 0.5952, + "step": 9425 + }, + { + "epoch": 0.8605075771407705, + "grad_norm": 0.5247227549552917, + "learning_rate": 4.754953054102744e-06, + "loss": 0.5494, + "step": 9426 + }, + { + "epoch": 0.8605988679934271, + "grad_norm": 0.49846410751342773, + "learning_rate": 4.754901375989021e-06, + "loss": 0.517, + "step": 9427 + }, + { + "epoch": 0.8606901588460836, + "grad_norm": 0.48720091581344604, + "learning_rate": 4.754849692707542e-06, + "loss": 0.5694, + "step": 9428 + }, + { + "epoch": 0.8607814496987402, + "grad_norm": 0.48520514369010925, + "learning_rate": 4.754798004258428e-06, + "loss": 0.53, + "step": 9429 + }, + { + "epoch": 0.8608727405513967, + "grad_norm": 0.4380895793437958, + "learning_rate": 4.754746310641794e-06, + "loss": 0.5385, + "step": 9430 + }, + { + "epoch": 0.8609640314040533, + "grad_norm": 0.47615787386894226, + "learning_rate": 4.754694611857763e-06, + "loss": 0.5211, + "step": 9431 + }, + { + "epoch": 0.8610553222567099, + "grad_norm": 0.4877399504184723, + "learning_rate": 4.754642907906449e-06, + "loss": 0.5704, + "step": 9432 + }, + { + "epoch": 0.8611466131093665, + "grad_norm": 0.4727266728878021, + "learning_rate": 4.754591198787974e-06, + "loss": 0.5842, + "step": 9433 + }, + { + "epoch": 0.861237903962023, + "grad_norm": 0.4810921251773834, + "learning_rate": 4.754539484502454e-06, + "loss": 0.5513, + "step": 9434 + }, + { + "epoch": 0.8613291948146796, + "grad_norm": 0.4890672266483307, + "learning_rate": 4.75448776505001e-06, + "loss": 0.5262, + "step": 9435 + }, + { + "epoch": 0.8614204856673361, + "grad_norm": 0.46897223591804504, + "learning_rate": 4.754436040430758e-06, + "loss": 0.5317, + "step": 9436 + }, + { + "epoch": 0.8615117765199927, + "grad_norm": 0.4724482595920563, + "learning_rate": 4.754384310644817e-06, + "loss": 0.5796, + "step": 9437 + }, + { + "epoch": 0.8616030673726492, + "grad_norm": 0.48229244351387024, + "learning_rate": 4.754332575692308e-06, + "loss": 0.596, + "step": 9438 + }, + { + "epoch": 0.8616943582253058, + "grad_norm": 0.4692899286746979, + "learning_rate": 4.754280835573346e-06, + "loss": 0.5367, + "step": 9439 + }, + { + "epoch": 0.8617856490779624, + "grad_norm": 0.4934101998806, + "learning_rate": 4.754229090288053e-06, + "loss": 0.5409, + "step": 9440 + }, + { + "epoch": 0.861876939930619, + "grad_norm": 0.466210275888443, + "learning_rate": 4.754177339836544e-06, + "loss": 0.5303, + "step": 9441 + }, + { + "epoch": 0.8619682307832756, + "grad_norm": 0.49343952536582947, + "learning_rate": 4.7541255842189415e-06, + "loss": 0.542, + "step": 9442 + }, + { + "epoch": 0.862059521635932, + "grad_norm": 0.46381568908691406, + "learning_rate": 4.754073823435361e-06, + "loss": 0.5797, + "step": 9443 + }, + { + "epoch": 0.8621508124885886, + "grad_norm": 0.508551836013794, + "learning_rate": 4.754022057485923e-06, + "loss": 0.5169, + "step": 9444 + }, + { + "epoch": 0.8622421033412452, + "grad_norm": 0.47972044348716736, + "learning_rate": 4.753970286370744e-06, + "loss": 0.5373, + "step": 9445 + }, + { + "epoch": 0.8623333941939018, + "grad_norm": 0.46342381834983826, + "learning_rate": 4.7539185100899455e-06, + "loss": 0.5949, + "step": 9446 + }, + { + "epoch": 0.8624246850465583, + "grad_norm": 0.48664504289627075, + "learning_rate": 4.753866728643643e-06, + "loss": 0.5194, + "step": 9447 + }, + { + "epoch": 0.8625159758992149, + "grad_norm": 0.48496994376182556, + "learning_rate": 4.753814942031959e-06, + "loss": 0.5268, + "step": 9448 + }, + { + "epoch": 0.8626072667518715, + "grad_norm": 0.456095814704895, + "learning_rate": 4.753763150255009e-06, + "loss": 0.5592, + "step": 9449 + }, + { + "epoch": 0.8626985576045281, + "grad_norm": 0.5262431502342224, + "learning_rate": 4.753711353312913e-06, + "loss": 0.5212, + "step": 9450 + }, + { + "epoch": 0.8627898484571845, + "grad_norm": 0.49542149901390076, + "learning_rate": 4.7536595512057894e-06, + "loss": 0.5625, + "step": 9451 + }, + { + "epoch": 0.8628811393098411, + "grad_norm": 0.4865521192550659, + "learning_rate": 4.753607743933757e-06, + "loss": 0.5529, + "step": 9452 + }, + { + "epoch": 0.8629724301624977, + "grad_norm": 0.49171626567840576, + "learning_rate": 4.753555931496934e-06, + "loss": 0.5459, + "step": 9453 + }, + { + "epoch": 0.8630637210151543, + "grad_norm": 0.5071594715118408, + "learning_rate": 4.75350411389544e-06, + "loss": 0.4847, + "step": 9454 + }, + { + "epoch": 0.8631550118678109, + "grad_norm": 0.5047277808189392, + "learning_rate": 4.753452291129394e-06, + "loss": 0.5635, + "step": 9455 + }, + { + "epoch": 0.8632463027204674, + "grad_norm": 0.4616003632545471, + "learning_rate": 4.753400463198914e-06, + "loss": 0.5911, + "step": 9456 + }, + { + "epoch": 0.863337593573124, + "grad_norm": 0.4990808367729187, + "learning_rate": 4.7533486301041184e-06, + "loss": 0.5304, + "step": 9457 + }, + { + "epoch": 0.8634288844257806, + "grad_norm": 0.4773634076118469, + "learning_rate": 4.753296791845127e-06, + "loss": 0.5672, + "step": 9458 + }, + { + "epoch": 0.863520175278437, + "grad_norm": 0.4875994026660919, + "learning_rate": 4.7532449484220575e-06, + "loss": 0.5214, + "step": 9459 + }, + { + "epoch": 0.8636114661310936, + "grad_norm": 0.47685685753822327, + "learning_rate": 4.75319309983503e-06, + "loss": 0.5678, + "step": 9460 + }, + { + "epoch": 0.8637027569837502, + "grad_norm": 0.4700556695461273, + "learning_rate": 4.753141246084162e-06, + "loss": 0.5429, + "step": 9461 + }, + { + "epoch": 0.8637940478364068, + "grad_norm": 0.4808772802352905, + "learning_rate": 4.7530893871695735e-06, + "loss": 0.56, + "step": 9462 + }, + { + "epoch": 0.8638853386890634, + "grad_norm": 0.4861903190612793, + "learning_rate": 4.753037523091384e-06, + "loss": 0.5471, + "step": 9463 + }, + { + "epoch": 0.8639766295417199, + "grad_norm": 0.46678662300109863, + "learning_rate": 4.75298565384971e-06, + "loss": 0.5495, + "step": 9464 + }, + { + "epoch": 0.8640679203943765, + "grad_norm": 0.4994303286075592, + "learning_rate": 4.752933779444672e-06, + "loss": 0.5435, + "step": 9465 + }, + { + "epoch": 0.8641592112470331, + "grad_norm": 0.4806506931781769, + "learning_rate": 4.752881899876388e-06, + "loss": 0.5758, + "step": 9466 + }, + { + "epoch": 0.8642505020996896, + "grad_norm": 0.4768432378768921, + "learning_rate": 4.752830015144978e-06, + "loss": 0.5391, + "step": 9467 + }, + { + "epoch": 0.8643417929523461, + "grad_norm": 0.4867499768733978, + "learning_rate": 4.75277812525056e-06, + "loss": 0.5283, + "step": 9468 + }, + { + "epoch": 0.8644330838050027, + "grad_norm": 0.5017002820968628, + "learning_rate": 4.752726230193253e-06, + "loss": 0.5223, + "step": 9469 + }, + { + "epoch": 0.8645243746576593, + "grad_norm": 0.4863189458847046, + "learning_rate": 4.752674329973176e-06, + "loss": 0.5805, + "step": 9470 + }, + { + "epoch": 0.8646156655103159, + "grad_norm": 0.5102943778038025, + "learning_rate": 4.752622424590449e-06, + "loss": 0.5719, + "step": 9471 + }, + { + "epoch": 0.8647069563629725, + "grad_norm": 0.49002930521965027, + "learning_rate": 4.75257051404519e-06, + "loss": 0.5164, + "step": 9472 + }, + { + "epoch": 0.864798247215629, + "grad_norm": 0.5121897459030151, + "learning_rate": 4.752518598337517e-06, + "loss": 0.5246, + "step": 9473 + }, + { + "epoch": 0.8648895380682856, + "grad_norm": 0.45700645446777344, + "learning_rate": 4.752466677467551e-06, + "loss": 0.5588, + "step": 9474 + }, + { + "epoch": 0.8649808289209421, + "grad_norm": 0.47176244854927063, + "learning_rate": 4.75241475143541e-06, + "loss": 0.5677, + "step": 9475 + }, + { + "epoch": 0.8650721197735987, + "grad_norm": 0.5160362124443054, + "learning_rate": 4.752362820241212e-06, + "loss": 0.5426, + "step": 9476 + }, + { + "epoch": 0.8651634106262552, + "grad_norm": 0.48482489585876465, + "learning_rate": 4.752310883885078e-06, + "loss": 0.5222, + "step": 9477 + }, + { + "epoch": 0.8652547014789118, + "grad_norm": 0.4905470907688141, + "learning_rate": 4.752258942367126e-06, + "loss": 0.5753, + "step": 9478 + }, + { + "epoch": 0.8653459923315684, + "grad_norm": 0.49586382508277893, + "learning_rate": 4.752206995687474e-06, + "loss": 0.5532, + "step": 9479 + }, + { + "epoch": 0.865437283184225, + "grad_norm": 0.5353582501411438, + "learning_rate": 4.752155043846243e-06, + "loss": 0.562, + "step": 9480 + }, + { + "epoch": 0.8655285740368815, + "grad_norm": 0.4730831980705261, + "learning_rate": 4.752103086843551e-06, + "loss": 0.5805, + "step": 9481 + }, + { + "epoch": 0.865619864889538, + "grad_norm": 0.4724521338939667, + "learning_rate": 4.752051124679518e-06, + "loss": 0.535, + "step": 9482 + }, + { + "epoch": 0.8657111557421946, + "grad_norm": 0.4686634838581085, + "learning_rate": 4.751999157354262e-06, + "loss": 0.5277, + "step": 9483 + }, + { + "epoch": 0.8658024465948512, + "grad_norm": 0.4521164000034332, + "learning_rate": 4.751947184867902e-06, + "loss": 0.5562, + "step": 9484 + }, + { + "epoch": 0.8658937374475077, + "grad_norm": 0.5248975157737732, + "learning_rate": 4.751895207220558e-06, + "loss": 0.5477, + "step": 9485 + }, + { + "epoch": 0.8659850283001643, + "grad_norm": 0.47632890939712524, + "learning_rate": 4.751843224412348e-06, + "loss": 0.5348, + "step": 9486 + }, + { + "epoch": 0.8660763191528209, + "grad_norm": 0.4980429708957672, + "learning_rate": 4.751791236443393e-06, + "loss": 0.5615, + "step": 9487 + }, + { + "epoch": 0.8661676100054775, + "grad_norm": 0.4872818887233734, + "learning_rate": 4.7517392433138095e-06, + "loss": 0.5679, + "step": 9488 + }, + { + "epoch": 0.8662589008581341, + "grad_norm": 0.5172061920166016, + "learning_rate": 4.7516872450237194e-06, + "loss": 0.4956, + "step": 9489 + }, + { + "epoch": 0.8663501917107905, + "grad_norm": 0.4829195737838745, + "learning_rate": 4.75163524157324e-06, + "loss": 0.5332, + "step": 9490 + }, + { + "epoch": 0.8664414825634471, + "grad_norm": 0.502886950969696, + "learning_rate": 4.751583232962491e-06, + "loss": 0.5477, + "step": 9491 + }, + { + "epoch": 0.8665327734161037, + "grad_norm": 0.4627119302749634, + "learning_rate": 4.751531219191592e-06, + "loss": 0.5558, + "step": 9492 + }, + { + "epoch": 0.8666240642687603, + "grad_norm": 0.4691259264945984, + "learning_rate": 4.751479200260662e-06, + "loss": 0.5358, + "step": 9493 + }, + { + "epoch": 0.8667153551214168, + "grad_norm": 0.4954845905303955, + "learning_rate": 4.751427176169819e-06, + "loss": 0.5077, + "step": 9494 + }, + { + "epoch": 0.8668066459740734, + "grad_norm": 0.49095630645751953, + "learning_rate": 4.751375146919185e-06, + "loss": 0.5297, + "step": 9495 + }, + { + "epoch": 0.86689793682673, + "grad_norm": 0.4847298562526703, + "learning_rate": 4.751323112508877e-06, + "loss": 0.5481, + "step": 9496 + }, + { + "epoch": 0.8669892276793866, + "grad_norm": 0.4900030493736267, + "learning_rate": 4.751271072939015e-06, + "loss": 0.5756, + "step": 9497 + }, + { + "epoch": 0.867080518532043, + "grad_norm": 0.47378993034362793, + "learning_rate": 4.751219028209717e-06, + "loss": 0.5569, + "step": 9498 + }, + { + "epoch": 0.8671718093846996, + "grad_norm": 0.4818296730518341, + "learning_rate": 4.751166978321103e-06, + "loss": 0.5466, + "step": 9499 + }, + { + "epoch": 0.8672631002373562, + "grad_norm": 0.49076351523399353, + "learning_rate": 4.7511149232732945e-06, + "loss": 0.5522, + "step": 9500 + }, + { + "epoch": 0.8673543910900128, + "grad_norm": 0.47350001335144043, + "learning_rate": 4.751062863066408e-06, + "loss": 0.5446, + "step": 9501 + }, + { + "epoch": 0.8674456819426694, + "grad_norm": 0.5036921501159668, + "learning_rate": 4.751010797700564e-06, + "loss": 0.5845, + "step": 9502 + }, + { + "epoch": 0.8675369727953259, + "grad_norm": 0.4748121500015259, + "learning_rate": 4.75095872717588e-06, + "loss": 0.5783, + "step": 9503 + }, + { + "epoch": 0.8676282636479825, + "grad_norm": 0.5022366046905518, + "learning_rate": 4.750906651492479e-06, + "loss": 0.5182, + "step": 9504 + }, + { + "epoch": 0.8677195545006391, + "grad_norm": 0.4652891457080841, + "learning_rate": 4.750854570650477e-06, + "loss": 0.5252, + "step": 9505 + }, + { + "epoch": 0.8678108453532956, + "grad_norm": 0.5078173279762268, + "learning_rate": 4.750802484649996e-06, + "loss": 0.5344, + "step": 9506 + }, + { + "epoch": 0.8679021362059521, + "grad_norm": 0.5338416695594788, + "learning_rate": 4.750750393491152e-06, + "loss": 0.4809, + "step": 9507 + }, + { + "epoch": 0.8679934270586087, + "grad_norm": 0.4992601275444031, + "learning_rate": 4.750698297174068e-06, + "loss": 0.5342, + "step": 9508 + }, + { + "epoch": 0.8680847179112653, + "grad_norm": 0.528672993183136, + "learning_rate": 4.750646195698861e-06, + "loss": 0.5036, + "step": 9509 + }, + { + "epoch": 0.8681760087639219, + "grad_norm": 0.5312074422836304, + "learning_rate": 4.750594089065652e-06, + "loss": 0.5701, + "step": 9510 + }, + { + "epoch": 0.8682672996165784, + "grad_norm": 0.47747981548309326, + "learning_rate": 4.750541977274559e-06, + "loss": 0.5328, + "step": 9511 + }, + { + "epoch": 0.868358590469235, + "grad_norm": 0.4820011854171753, + "learning_rate": 4.750489860325702e-06, + "loss": 0.566, + "step": 9512 + }, + { + "epoch": 0.8684498813218915, + "grad_norm": 0.4732617437839508, + "learning_rate": 4.7504377382192e-06, + "loss": 0.5256, + "step": 9513 + }, + { + "epoch": 0.8685411721745481, + "grad_norm": 0.4868529438972473, + "learning_rate": 4.750385610955173e-06, + "loss": 0.5034, + "step": 9514 + }, + { + "epoch": 0.8686324630272046, + "grad_norm": 0.5261827707290649, + "learning_rate": 4.750333478533741e-06, + "loss": 0.4983, + "step": 9515 + }, + { + "epoch": 0.8687237538798612, + "grad_norm": 0.43058666586875916, + "learning_rate": 4.750281340955022e-06, + "loss": 0.5772, + "step": 9516 + }, + { + "epoch": 0.8688150447325178, + "grad_norm": 0.51502525806427, + "learning_rate": 4.750229198219138e-06, + "loss": 0.54, + "step": 9517 + }, + { + "epoch": 0.8689063355851744, + "grad_norm": 0.45068904757499695, + "learning_rate": 4.7501770503262045e-06, + "loss": 0.5523, + "step": 9518 + }, + { + "epoch": 0.868997626437831, + "grad_norm": 0.4648347496986389, + "learning_rate": 4.750124897276345e-06, + "loss": 0.5532, + "step": 9519 + }, + { + "epoch": 0.8690889172904875, + "grad_norm": 0.48400673270225525, + "learning_rate": 4.750072739069677e-06, + "loss": 0.5335, + "step": 9520 + }, + { + "epoch": 0.869180208143144, + "grad_norm": 0.48469701409339905, + "learning_rate": 4.75002057570632e-06, + "loss": 0.5768, + "step": 9521 + }, + { + "epoch": 0.8692714989958006, + "grad_norm": 0.48065945506095886, + "learning_rate": 4.7499684071863935e-06, + "loss": 0.5364, + "step": 9522 + }, + { + "epoch": 0.8693627898484572, + "grad_norm": 0.46816113591194153, + "learning_rate": 4.749916233510019e-06, + "loss": 0.555, + "step": 9523 + }, + { + "epoch": 0.8694540807011137, + "grad_norm": 0.45516422390937805, + "learning_rate": 4.749864054677313e-06, + "loss": 0.5365, + "step": 9524 + }, + { + "epoch": 0.8695453715537703, + "grad_norm": 0.4922797679901123, + "learning_rate": 4.749811870688398e-06, + "loss": 0.5739, + "step": 9525 + }, + { + "epoch": 0.8696366624064269, + "grad_norm": 0.44994786381721497, + "learning_rate": 4.7497596815433915e-06, + "loss": 0.5721, + "step": 9526 + }, + { + "epoch": 0.8697279532590835, + "grad_norm": 0.4941295385360718, + "learning_rate": 4.749707487242414e-06, + "loss": 0.5371, + "step": 9527 + }, + { + "epoch": 0.86981924411174, + "grad_norm": 0.483884334564209, + "learning_rate": 4.749655287785585e-06, + "loss": 0.5876, + "step": 9528 + }, + { + "epoch": 0.8699105349643965, + "grad_norm": 0.47156238555908203, + "learning_rate": 4.749603083173025e-06, + "loss": 0.5433, + "step": 9529 + }, + { + "epoch": 0.8700018258170531, + "grad_norm": 0.5166431069374084, + "learning_rate": 4.7495508734048515e-06, + "loss": 0.5123, + "step": 9530 + }, + { + "epoch": 0.8700931166697097, + "grad_norm": 0.48333507776260376, + "learning_rate": 4.749498658481185e-06, + "loss": 0.5471, + "step": 9531 + }, + { + "epoch": 0.8701844075223663, + "grad_norm": 0.4696745276451111, + "learning_rate": 4.749446438402147e-06, + "loss": 0.5489, + "step": 9532 + }, + { + "epoch": 0.8702756983750228, + "grad_norm": 0.47837725281715393, + "learning_rate": 4.749394213167855e-06, + "loss": 0.5225, + "step": 9533 + }, + { + "epoch": 0.8703669892276794, + "grad_norm": 0.48202022910118103, + "learning_rate": 4.74934198277843e-06, + "loss": 0.5421, + "step": 9534 + }, + { + "epoch": 0.870458280080336, + "grad_norm": 0.4749104976654053, + "learning_rate": 4.749289747233991e-06, + "loss": 0.581, + "step": 9535 + }, + { + "epoch": 0.8705495709329926, + "grad_norm": 0.4717034697532654, + "learning_rate": 4.749237506534656e-06, + "loss": 0.5486, + "step": 9536 + }, + { + "epoch": 0.870640861785649, + "grad_norm": 0.5159360766410828, + "learning_rate": 4.749185260680549e-06, + "loss": 0.5164, + "step": 9537 + }, + { + "epoch": 0.8707321526383056, + "grad_norm": 0.5156829357147217, + "learning_rate": 4.749133009671787e-06, + "loss": 0.5051, + "step": 9538 + }, + { + "epoch": 0.8708234434909622, + "grad_norm": 0.46968817710876465, + "learning_rate": 4.749080753508489e-06, + "loss": 0.5237, + "step": 9539 + }, + { + "epoch": 0.8709147343436188, + "grad_norm": 0.5024563670158386, + "learning_rate": 4.749028492190776e-06, + "loss": 0.5491, + "step": 9540 + }, + { + "epoch": 0.8710060251962753, + "grad_norm": 0.4498848021030426, + "learning_rate": 4.748976225718768e-06, + "loss": 0.5722, + "step": 9541 + }, + { + "epoch": 0.8710973160489319, + "grad_norm": 0.4809415936470032, + "learning_rate": 4.748923954092584e-06, + "loss": 0.5878, + "step": 9542 + }, + { + "epoch": 0.8711886069015885, + "grad_norm": 0.4838898777961731, + "learning_rate": 4.748871677312344e-06, + "loss": 0.5555, + "step": 9543 + }, + { + "epoch": 0.871279897754245, + "grad_norm": 0.47653478384017944, + "learning_rate": 4.748819395378169e-06, + "loss": 0.5319, + "step": 9544 + }, + { + "epoch": 0.8713711886069015, + "grad_norm": 0.5057486295700073, + "learning_rate": 4.748767108290177e-06, + "loss": 0.5247, + "step": 9545 + }, + { + "epoch": 0.8714624794595581, + "grad_norm": 0.48310595750808716, + "learning_rate": 4.74871481604849e-06, + "loss": 0.5521, + "step": 9546 + }, + { + "epoch": 0.8715537703122147, + "grad_norm": 0.49448317289352417, + "learning_rate": 4.748662518653224e-06, + "loss": 0.5149, + "step": 9547 + }, + { + "epoch": 0.8716450611648713, + "grad_norm": 0.49902650713920593, + "learning_rate": 4.748610216104503e-06, + "loss": 0.5415, + "step": 9548 + }, + { + "epoch": 0.8717363520175279, + "grad_norm": 0.47871842980384827, + "learning_rate": 4.748557908402445e-06, + "loss": 0.5636, + "step": 9549 + }, + { + "epoch": 0.8718276428701844, + "grad_norm": 0.4903687536716461, + "learning_rate": 4.74850559554717e-06, + "loss": 0.533, + "step": 9550 + }, + { + "epoch": 0.871918933722841, + "grad_norm": 0.5059404969215393, + "learning_rate": 4.748453277538798e-06, + "loss": 0.5527, + "step": 9551 + }, + { + "epoch": 0.8720102245754975, + "grad_norm": 0.5049099326133728, + "learning_rate": 4.748400954377448e-06, + "loss": 0.552, + "step": 9552 + }, + { + "epoch": 0.8721015154281541, + "grad_norm": 0.4833239018917084, + "learning_rate": 4.748348626063242e-06, + "loss": 0.5638, + "step": 9553 + }, + { + "epoch": 0.8721928062808106, + "grad_norm": 0.4866022765636444, + "learning_rate": 4.748296292596298e-06, + "loss": 0.5639, + "step": 9554 + }, + { + "epoch": 0.8722840971334672, + "grad_norm": 0.46936625242233276, + "learning_rate": 4.748243953976737e-06, + "loss": 0.567, + "step": 9555 + }, + { + "epoch": 0.8723753879861238, + "grad_norm": 0.4562076926231384, + "learning_rate": 4.748191610204679e-06, + "loss": 0.5549, + "step": 9556 + }, + { + "epoch": 0.8724666788387804, + "grad_norm": 0.4617736339569092, + "learning_rate": 4.748139261280242e-06, + "loss": 0.5832, + "step": 9557 + }, + { + "epoch": 0.872557969691437, + "grad_norm": 0.49635326862335205, + "learning_rate": 4.748086907203549e-06, + "loss": 0.555, + "step": 9558 + }, + { + "epoch": 0.8726492605440935, + "grad_norm": 0.47815990447998047, + "learning_rate": 4.748034547974718e-06, + "loss": 0.5485, + "step": 9559 + }, + { + "epoch": 0.87274055139675, + "grad_norm": 0.47400760650634766, + "learning_rate": 4.74798218359387e-06, + "loss": 0.5904, + "step": 9560 + }, + { + "epoch": 0.8728318422494066, + "grad_norm": 0.4993385672569275, + "learning_rate": 4.7479298140611234e-06, + "loss": 0.5102, + "step": 9561 + }, + { + "epoch": 0.8729231331020632, + "grad_norm": 0.45379459857940674, + "learning_rate": 4.7478774393766e-06, + "loss": 0.5785, + "step": 9562 + }, + { + "epoch": 0.8730144239547197, + "grad_norm": 0.4754297435283661, + "learning_rate": 4.74782505954042e-06, + "loss": 0.5842, + "step": 9563 + }, + { + "epoch": 0.8731057148073763, + "grad_norm": 0.5144082307815552, + "learning_rate": 4.747772674552701e-06, + "loss": 0.5315, + "step": 9564 + }, + { + "epoch": 0.8731970056600329, + "grad_norm": 0.4880591928958893, + "learning_rate": 4.747720284413565e-06, + "loss": 0.5407, + "step": 9565 + }, + { + "epoch": 0.8732882965126895, + "grad_norm": 0.4451901912689209, + "learning_rate": 4.747667889123132e-06, + "loss": 0.6079, + "step": 9566 + }, + { + "epoch": 0.873379587365346, + "grad_norm": 0.46759718656539917, + "learning_rate": 4.747615488681522e-06, + "loss": 0.5677, + "step": 9567 + }, + { + "epoch": 0.8734708782180025, + "grad_norm": 0.49398207664489746, + "learning_rate": 4.7475630830888556e-06, + "loss": 0.5948, + "step": 9568 + }, + { + "epoch": 0.8735621690706591, + "grad_norm": 0.4762555658817291, + "learning_rate": 4.747510672345251e-06, + "loss": 0.5337, + "step": 9569 + }, + { + "epoch": 0.8736534599233157, + "grad_norm": 0.47198575735092163, + "learning_rate": 4.74745825645083e-06, + "loss": 0.5531, + "step": 9570 + }, + { + "epoch": 0.8737447507759722, + "grad_norm": 0.4926772117614746, + "learning_rate": 4.747405835405712e-06, + "loss": 0.5458, + "step": 9571 + }, + { + "epoch": 0.8738360416286288, + "grad_norm": 0.4650648236274719, + "learning_rate": 4.747353409210018e-06, + "loss": 0.5461, + "step": 9572 + }, + { + "epoch": 0.8739273324812854, + "grad_norm": 0.49315446615219116, + "learning_rate": 4.747300977863867e-06, + "loss": 0.5288, + "step": 9573 + }, + { + "epoch": 0.874018623333942, + "grad_norm": 0.4677671790122986, + "learning_rate": 4.747248541367379e-06, + "loss": 0.5575, + "step": 9574 + }, + { + "epoch": 0.8741099141865986, + "grad_norm": 0.4830012321472168, + "learning_rate": 4.7471960997206755e-06, + "loss": 0.5439, + "step": 9575 + }, + { + "epoch": 0.874201205039255, + "grad_norm": 0.5021581053733826, + "learning_rate": 4.747143652923877e-06, + "loss": 0.5439, + "step": 9576 + }, + { + "epoch": 0.8742924958919116, + "grad_norm": 0.5183447003364563, + "learning_rate": 4.747091200977102e-06, + "loss": 0.5728, + "step": 9577 + }, + { + "epoch": 0.8743837867445682, + "grad_norm": 0.4798256754875183, + "learning_rate": 4.747038743880471e-06, + "loss": 0.547, + "step": 9578 + }, + { + "epoch": 0.8744750775972248, + "grad_norm": 0.4540221393108368, + "learning_rate": 4.746986281634105e-06, + "loss": 0.5687, + "step": 9579 + }, + { + "epoch": 0.8745663684498813, + "grad_norm": 0.47090673446655273, + "learning_rate": 4.746933814238124e-06, + "loss": 0.537, + "step": 9580 + }, + { + "epoch": 0.8746576593025379, + "grad_norm": 0.46977609395980835, + "learning_rate": 4.746881341692648e-06, + "loss": 0.5533, + "step": 9581 + }, + { + "epoch": 0.8747489501551945, + "grad_norm": 0.49202799797058105, + "learning_rate": 4.746828863997797e-06, + "loss": 0.5274, + "step": 9582 + }, + { + "epoch": 0.874840241007851, + "grad_norm": 0.507973849773407, + "learning_rate": 4.746776381153693e-06, + "loss": 0.5126, + "step": 9583 + }, + { + "epoch": 0.8749315318605075, + "grad_norm": 0.46941542625427246, + "learning_rate": 4.746723893160453e-06, + "loss": 0.5382, + "step": 9584 + }, + { + "epoch": 0.8750228227131641, + "grad_norm": 0.4799335300922394, + "learning_rate": 4.746671400018201e-06, + "loss": 0.5653, + "step": 9585 + }, + { + "epoch": 0.8751141135658207, + "grad_norm": 0.47762203216552734, + "learning_rate": 4.746618901727054e-06, + "loss": 0.5466, + "step": 9586 + }, + { + "epoch": 0.8752054044184773, + "grad_norm": 0.4570963680744171, + "learning_rate": 4.7465663982871356e-06, + "loss": 0.5374, + "step": 9587 + }, + { + "epoch": 0.8752966952711339, + "grad_norm": 0.5035290718078613, + "learning_rate": 4.746513889698563e-06, + "loss": 0.5446, + "step": 9588 + }, + { + "epoch": 0.8753879861237904, + "grad_norm": 0.48533758521080017, + "learning_rate": 4.746461375961459e-06, + "loss": 0.5602, + "step": 9589 + }, + { + "epoch": 0.875479276976447, + "grad_norm": 0.48115742206573486, + "learning_rate": 4.746408857075943e-06, + "loss": 0.5432, + "step": 9590 + }, + { + "epoch": 0.8755705678291035, + "grad_norm": 0.465482234954834, + "learning_rate": 4.746356333042134e-06, + "loss": 0.5648, + "step": 9591 + }, + { + "epoch": 0.87566185868176, + "grad_norm": 0.49078455567359924, + "learning_rate": 4.746303803860154e-06, + "loss": 0.5143, + "step": 9592 + }, + { + "epoch": 0.8757531495344166, + "grad_norm": 0.4713704586029053, + "learning_rate": 4.746251269530123e-06, + "loss": 0.6195, + "step": 9593 + }, + { + "epoch": 0.8758444403870732, + "grad_norm": 0.517711877822876, + "learning_rate": 4.746198730052162e-06, + "loss": 0.5309, + "step": 9594 + }, + { + "epoch": 0.8759357312397298, + "grad_norm": 0.4797493815422058, + "learning_rate": 4.74614618542639e-06, + "loss": 0.5878, + "step": 9595 + }, + { + "epoch": 0.8760270220923864, + "grad_norm": 0.4764379858970642, + "learning_rate": 4.746093635652929e-06, + "loss": 0.5419, + "step": 9596 + }, + { + "epoch": 0.8761183129450429, + "grad_norm": 0.5190867185592651, + "learning_rate": 4.746041080731898e-06, + "loss": 0.5481, + "step": 9597 + }, + { + "epoch": 0.8762096037976995, + "grad_norm": 0.4868491590023041, + "learning_rate": 4.745988520663418e-06, + "loss": 0.5314, + "step": 9598 + }, + { + "epoch": 0.876300894650356, + "grad_norm": 0.46811193227767944, + "learning_rate": 4.74593595544761e-06, + "loss": 0.5164, + "step": 9599 + }, + { + "epoch": 0.8763921855030126, + "grad_norm": 0.47283685207366943, + "learning_rate": 4.745883385084594e-06, + "loss": 0.595, + "step": 9600 + }, + { + "epoch": 0.8764834763556691, + "grad_norm": 0.48550844192504883, + "learning_rate": 4.7458308095744906e-06, + "loss": 0.5073, + "step": 9601 + }, + { + "epoch": 0.8765747672083257, + "grad_norm": 0.5180028080940247, + "learning_rate": 4.74577822891742e-06, + "loss": 0.5296, + "step": 9602 + }, + { + "epoch": 0.8766660580609823, + "grad_norm": 0.49642428755760193, + "learning_rate": 4.745725643113503e-06, + "loss": 0.5377, + "step": 9603 + }, + { + "epoch": 0.8767573489136389, + "grad_norm": 0.46389830112457275, + "learning_rate": 4.74567305216286e-06, + "loss": 0.6008, + "step": 9604 + }, + { + "epoch": 0.8768486397662955, + "grad_norm": 0.5108553171157837, + "learning_rate": 4.745620456065612e-06, + "loss": 0.5402, + "step": 9605 + }, + { + "epoch": 0.876939930618952, + "grad_norm": 0.4795253872871399, + "learning_rate": 4.745567854821879e-06, + "loss": 0.5486, + "step": 9606 + }, + { + "epoch": 0.8770312214716085, + "grad_norm": 0.46842217445373535, + "learning_rate": 4.745515248431781e-06, + "loss": 0.5791, + "step": 9607 + }, + { + "epoch": 0.8771225123242651, + "grad_norm": 0.4942114055156708, + "learning_rate": 4.745462636895439e-06, + "loss": 0.5498, + "step": 9608 + }, + { + "epoch": 0.8772138031769217, + "grad_norm": 0.4640643298625946, + "learning_rate": 4.745410020212975e-06, + "loss": 0.5141, + "step": 9609 + }, + { + "epoch": 0.8773050940295782, + "grad_norm": 0.4629029929637909, + "learning_rate": 4.7453573983845065e-06, + "loss": 0.5728, + "step": 9610 + }, + { + "epoch": 0.8773963848822348, + "grad_norm": 0.46856802701950073, + "learning_rate": 4.745304771410158e-06, + "loss": 0.5242, + "step": 9611 + }, + { + "epoch": 0.8774876757348914, + "grad_norm": 0.46760281920433044, + "learning_rate": 4.745252139290047e-06, + "loss": 0.5645, + "step": 9612 + }, + { + "epoch": 0.877578966587548, + "grad_norm": 0.49154946208000183, + "learning_rate": 4.7451995020242954e-06, + "loss": 0.5751, + "step": 9613 + }, + { + "epoch": 0.8776702574402044, + "grad_norm": 0.5077400207519531, + "learning_rate": 4.7451468596130235e-06, + "loss": 0.543, + "step": 9614 + }, + { + "epoch": 0.877761548292861, + "grad_norm": 0.4897630214691162, + "learning_rate": 4.7450942120563525e-06, + "loss": 0.5543, + "step": 9615 + }, + { + "epoch": 0.8778528391455176, + "grad_norm": 0.4825259745121002, + "learning_rate": 4.745041559354402e-06, + "loss": 0.5449, + "step": 9616 + }, + { + "epoch": 0.8779441299981742, + "grad_norm": 0.5131142139434814, + "learning_rate": 4.744988901507294e-06, + "loss": 0.5494, + "step": 9617 + }, + { + "epoch": 0.8780354208508308, + "grad_norm": 0.5098428726196289, + "learning_rate": 4.744936238515147e-06, + "loss": 0.5106, + "step": 9618 + }, + { + "epoch": 0.8781267117034873, + "grad_norm": 0.4880376160144806, + "learning_rate": 4.744883570378085e-06, + "loss": 0.5456, + "step": 9619 + }, + { + "epoch": 0.8782180025561439, + "grad_norm": 0.48651373386383057, + "learning_rate": 4.744830897096226e-06, + "loss": 0.5016, + "step": 9620 + }, + { + "epoch": 0.8783092934088005, + "grad_norm": 0.49848857522010803, + "learning_rate": 4.744778218669692e-06, + "loss": 0.5814, + "step": 9621 + }, + { + "epoch": 0.878400584261457, + "grad_norm": 0.4793250858783722, + "learning_rate": 4.744725535098602e-06, + "loss": 0.5306, + "step": 9622 + }, + { + "epoch": 0.8784918751141135, + "grad_norm": 0.4891645312309265, + "learning_rate": 4.74467284638308e-06, + "loss": 0.5529, + "step": 9623 + }, + { + "epoch": 0.8785831659667701, + "grad_norm": 0.49622824788093567, + "learning_rate": 4.744620152523243e-06, + "loss": 0.5226, + "step": 9624 + }, + { + "epoch": 0.8786744568194267, + "grad_norm": 0.48680439591407776, + "learning_rate": 4.744567453519214e-06, + "loss": 0.5478, + "step": 9625 + }, + { + "epoch": 0.8787657476720833, + "grad_norm": 0.4678245186805725, + "learning_rate": 4.744514749371115e-06, + "loss": 0.5779, + "step": 9626 + }, + { + "epoch": 0.8788570385247398, + "grad_norm": 0.4934406876564026, + "learning_rate": 4.744462040079063e-06, + "loss": 0.5544, + "step": 9627 + }, + { + "epoch": 0.8789483293773964, + "grad_norm": 0.4843105673789978, + "learning_rate": 4.744409325643182e-06, + "loss": 0.5137, + "step": 9628 + }, + { + "epoch": 0.879039620230053, + "grad_norm": 0.4503657817840576, + "learning_rate": 4.744356606063591e-06, + "loss": 0.5716, + "step": 9629 + }, + { + "epoch": 0.8791309110827095, + "grad_norm": 0.500731885433197, + "learning_rate": 4.744303881340413e-06, + "loss": 0.5325, + "step": 9630 + }, + { + "epoch": 0.879222201935366, + "grad_norm": 0.49269455671310425, + "learning_rate": 4.744251151473767e-06, + "loss": 0.583, + "step": 9631 + }, + { + "epoch": 0.8793134927880226, + "grad_norm": 0.49824973940849304, + "learning_rate": 4.744198416463773e-06, + "loss": 0.5616, + "step": 9632 + }, + { + "epoch": 0.8794047836406792, + "grad_norm": 0.4785408675670624, + "learning_rate": 4.744145676310555e-06, + "loss": 0.5945, + "step": 9633 + }, + { + "epoch": 0.8794960744933358, + "grad_norm": 0.47946518659591675, + "learning_rate": 4.74409293101423e-06, + "loss": 0.5707, + "step": 9634 + }, + { + "epoch": 0.8795873653459924, + "grad_norm": 0.4761194586753845, + "learning_rate": 4.744040180574921e-06, + "loss": 0.5498, + "step": 9635 + }, + { + "epoch": 0.8796786561986489, + "grad_norm": 0.5119456052780151, + "learning_rate": 4.74398742499275e-06, + "loss": 0.4942, + "step": 9636 + }, + { + "epoch": 0.8797699470513055, + "grad_norm": 0.477285772562027, + "learning_rate": 4.743934664267836e-06, + "loss": 0.5642, + "step": 9637 + }, + { + "epoch": 0.879861237903962, + "grad_norm": 0.48715031147003174, + "learning_rate": 4.743881898400301e-06, + "loss": 0.5203, + "step": 9638 + }, + { + "epoch": 0.8799525287566186, + "grad_norm": 0.48082998394966125, + "learning_rate": 4.743829127390265e-06, + "loss": 0.5171, + "step": 9639 + }, + { + "epoch": 0.8800438196092751, + "grad_norm": 0.4954397678375244, + "learning_rate": 4.743776351237849e-06, + "loss": 0.5551, + "step": 9640 + }, + { + "epoch": 0.8801351104619317, + "grad_norm": 0.4693007171154022, + "learning_rate": 4.743723569943175e-06, + "loss": 0.5435, + "step": 9641 + }, + { + "epoch": 0.8802264013145883, + "grad_norm": 0.45413634181022644, + "learning_rate": 4.7436707835063635e-06, + "loss": 0.5648, + "step": 9642 + }, + { + "epoch": 0.8803176921672449, + "grad_norm": 0.4816807508468628, + "learning_rate": 4.743617991927535e-06, + "loss": 0.5373, + "step": 9643 + }, + { + "epoch": 0.8804089830199014, + "grad_norm": 0.49090075492858887, + "learning_rate": 4.7435651952068106e-06, + "loss": 0.5383, + "step": 9644 + }, + { + "epoch": 0.8805002738725579, + "grad_norm": 0.49735578894615173, + "learning_rate": 4.743512393344312e-06, + "loss": 0.5135, + "step": 9645 + }, + { + "epoch": 0.8805915647252145, + "grad_norm": 0.47273141145706177, + "learning_rate": 4.74345958634016e-06, + "loss": 0.5395, + "step": 9646 + }, + { + "epoch": 0.8806828555778711, + "grad_norm": 0.49544766545295715, + "learning_rate": 4.743406774194474e-06, + "loss": 0.5534, + "step": 9647 + }, + { + "epoch": 0.8807741464305276, + "grad_norm": 0.4980435073375702, + "learning_rate": 4.7433539569073786e-06, + "loss": 0.5791, + "step": 9648 + }, + { + "epoch": 0.8808654372831842, + "grad_norm": 0.4818391799926758, + "learning_rate": 4.74330113447899e-06, + "loss": 0.5441, + "step": 9649 + }, + { + "epoch": 0.8809567281358408, + "grad_norm": 0.4821249842643738, + "learning_rate": 4.743248306909433e-06, + "loss": 0.5198, + "step": 9650 + }, + { + "epoch": 0.8810480189884974, + "grad_norm": 0.49843814969062805, + "learning_rate": 4.743195474198828e-06, + "loss": 0.547, + "step": 9651 + }, + { + "epoch": 0.881139309841154, + "grad_norm": 0.4806634485721588, + "learning_rate": 4.743142636347296e-06, + "loss": 0.5822, + "step": 9652 + }, + { + "epoch": 0.8812306006938104, + "grad_norm": 0.5012041330337524, + "learning_rate": 4.743089793354957e-06, + "loss": 0.5106, + "step": 9653 + }, + { + "epoch": 0.881321891546467, + "grad_norm": 0.4762072265148163, + "learning_rate": 4.743036945221933e-06, + "loss": 0.5723, + "step": 9654 + }, + { + "epoch": 0.8814131823991236, + "grad_norm": 0.4843088984489441, + "learning_rate": 4.742984091948345e-06, + "loss": 0.5616, + "step": 9655 + }, + { + "epoch": 0.8815044732517802, + "grad_norm": 0.48841413855552673, + "learning_rate": 4.7429312335343135e-06, + "loss": 0.5175, + "step": 9656 + }, + { + "epoch": 0.8815957641044367, + "grad_norm": 0.4892910122871399, + "learning_rate": 4.742878369979961e-06, + "loss": 0.5683, + "step": 9657 + }, + { + "epoch": 0.8816870549570933, + "grad_norm": 0.49972495436668396, + "learning_rate": 4.7428255012854075e-06, + "loss": 0.5136, + "step": 9658 + }, + { + "epoch": 0.8817783458097499, + "grad_norm": 0.4842863976955414, + "learning_rate": 4.742772627450774e-06, + "loss": 0.5373, + "step": 9659 + }, + { + "epoch": 0.8818696366624065, + "grad_norm": 0.48536601662635803, + "learning_rate": 4.742719748476183e-06, + "loss": 0.5709, + "step": 9660 + }, + { + "epoch": 0.8819609275150629, + "grad_norm": 0.4844529628753662, + "learning_rate": 4.742666864361755e-06, + "loss": 0.5503, + "step": 9661 + }, + { + "epoch": 0.8820522183677195, + "grad_norm": 0.49564462900161743, + "learning_rate": 4.74261397510761e-06, + "loss": 0.5538, + "step": 9662 + }, + { + "epoch": 0.8821435092203761, + "grad_norm": 0.47797930240631104, + "learning_rate": 4.742561080713871e-06, + "loss": 0.6102, + "step": 9663 + }, + { + "epoch": 0.8822348000730327, + "grad_norm": 0.5045537352561951, + "learning_rate": 4.742508181180658e-06, + "loss": 0.5408, + "step": 9664 + }, + { + "epoch": 0.8823260909256893, + "grad_norm": 0.49151864647865295, + "learning_rate": 4.742455276508094e-06, + "loss": 0.5382, + "step": 9665 + }, + { + "epoch": 0.8824173817783458, + "grad_norm": 0.5141873955726624, + "learning_rate": 4.742402366696298e-06, + "loss": 0.5346, + "step": 9666 + }, + { + "epoch": 0.8825086726310024, + "grad_norm": 0.507693886756897, + "learning_rate": 4.742349451745393e-06, + "loss": 0.5084, + "step": 9667 + }, + { + "epoch": 0.882599963483659, + "grad_norm": 0.4712715148925781, + "learning_rate": 4.742296531655499e-06, + "loss": 0.6062, + "step": 9668 + }, + { + "epoch": 0.8826912543363155, + "grad_norm": 0.49028411507606506, + "learning_rate": 4.742243606426738e-06, + "loss": 0.5445, + "step": 9669 + }, + { + "epoch": 0.882782545188972, + "grad_norm": 0.4732586443424225, + "learning_rate": 4.742190676059231e-06, + "loss": 0.532, + "step": 9670 + }, + { + "epoch": 0.8828738360416286, + "grad_norm": 0.49051469564437866, + "learning_rate": 4.742137740553099e-06, + "loss": 0.487, + "step": 9671 + }, + { + "epoch": 0.8829651268942852, + "grad_norm": 0.4755912721157074, + "learning_rate": 4.742084799908464e-06, + "loss": 0.5237, + "step": 9672 + }, + { + "epoch": 0.8830564177469418, + "grad_norm": 0.5069429278373718, + "learning_rate": 4.7420318541254475e-06, + "loss": 0.5422, + "step": 9673 + }, + { + "epoch": 0.8831477085995983, + "grad_norm": 0.4722207486629486, + "learning_rate": 4.74197890320417e-06, + "loss": 0.5607, + "step": 9674 + }, + { + "epoch": 0.8832389994522549, + "grad_norm": 0.4870489239692688, + "learning_rate": 4.741925947144753e-06, + "loss": 0.5317, + "step": 9675 + }, + { + "epoch": 0.8833302903049115, + "grad_norm": 0.5010505318641663, + "learning_rate": 4.741872985947319e-06, + "loss": 0.5559, + "step": 9676 + }, + { + "epoch": 0.883421581157568, + "grad_norm": 0.483924925327301, + "learning_rate": 4.741820019611988e-06, + "loss": 0.5703, + "step": 9677 + }, + { + "epoch": 0.8835128720102245, + "grad_norm": 0.5099485516548157, + "learning_rate": 4.741767048138882e-06, + "loss": 0.549, + "step": 9678 + }, + { + "epoch": 0.8836041628628811, + "grad_norm": 0.5151556730270386, + "learning_rate": 4.741714071528121e-06, + "loss": 0.5373, + "step": 9679 + }, + { + "epoch": 0.8836954537155377, + "grad_norm": 0.4811212420463562, + "learning_rate": 4.74166108977983e-06, + "loss": 0.5635, + "step": 9680 + }, + { + "epoch": 0.8837867445681943, + "grad_norm": 0.4853441119194031, + "learning_rate": 4.741608102894127e-06, + "loss": 0.5561, + "step": 9681 + }, + { + "epoch": 0.8838780354208509, + "grad_norm": 0.4872954487800598, + "learning_rate": 4.741555110871134e-06, + "loss": 0.5866, + "step": 9682 + }, + { + "epoch": 0.8839693262735074, + "grad_norm": 0.5414175391197205, + "learning_rate": 4.741502113710974e-06, + "loss": 0.5212, + "step": 9683 + }, + { + "epoch": 0.8840606171261639, + "grad_norm": 0.47966763377189636, + "learning_rate": 4.741449111413767e-06, + "loss": 0.5361, + "step": 9684 + }, + { + "epoch": 0.8841519079788205, + "grad_norm": 0.4899732172489166, + "learning_rate": 4.741396103979634e-06, + "loss": 0.5263, + "step": 9685 + }, + { + "epoch": 0.8842431988314771, + "grad_norm": 0.4977807402610779, + "learning_rate": 4.741343091408699e-06, + "loss": 0.5322, + "step": 9686 + }, + { + "epoch": 0.8843344896841336, + "grad_norm": 0.4617640972137451, + "learning_rate": 4.741290073701081e-06, + "loss": 0.5573, + "step": 9687 + }, + { + "epoch": 0.8844257805367902, + "grad_norm": 0.4806778132915497, + "learning_rate": 4.7412370508569025e-06, + "loss": 0.5185, + "step": 9688 + }, + { + "epoch": 0.8845170713894468, + "grad_norm": 0.47333455085754395, + "learning_rate": 4.741184022876285e-06, + "loss": 0.5679, + "step": 9689 + }, + { + "epoch": 0.8846083622421034, + "grad_norm": 0.49010542035102844, + "learning_rate": 4.74113098975935e-06, + "loss": 0.5887, + "step": 9690 + }, + { + "epoch": 0.88469965309476, + "grad_norm": 0.4797598123550415, + "learning_rate": 4.741077951506219e-06, + "loss": 0.5736, + "step": 9691 + }, + { + "epoch": 0.8847909439474164, + "grad_norm": 0.44860225915908813, + "learning_rate": 4.7410249081170135e-06, + "loss": 0.566, + "step": 9692 + }, + { + "epoch": 0.884882234800073, + "grad_norm": 0.456317663192749, + "learning_rate": 4.7409718595918555e-06, + "loss": 0.5789, + "step": 9693 + }, + { + "epoch": 0.8849735256527296, + "grad_norm": 0.49896034598350525, + "learning_rate": 4.740918805930866e-06, + "loss": 0.5355, + "step": 9694 + }, + { + "epoch": 0.8850648165053862, + "grad_norm": 0.4755327105522156, + "learning_rate": 4.740865747134166e-06, + "loss": 0.5699, + "step": 9695 + }, + { + "epoch": 0.8851561073580427, + "grad_norm": 0.4783855378627777, + "learning_rate": 4.740812683201878e-06, + "loss": 0.5565, + "step": 9696 + }, + { + "epoch": 0.8852473982106993, + "grad_norm": 0.4633442461490631, + "learning_rate": 4.740759614134124e-06, + "loss": 0.6039, + "step": 9697 + }, + { + "epoch": 0.8853386890633559, + "grad_norm": 0.47562888264656067, + "learning_rate": 4.740706539931025e-06, + "loss": 0.5561, + "step": 9698 + }, + { + "epoch": 0.8854299799160125, + "grad_norm": 0.4741053581237793, + "learning_rate": 4.740653460592702e-06, + "loss": 0.5818, + "step": 9699 + }, + { + "epoch": 0.8855212707686689, + "grad_norm": 0.5000898241996765, + "learning_rate": 4.740600376119279e-06, + "loss": 0.5267, + "step": 9700 + }, + { + "epoch": 0.8856125616213255, + "grad_norm": 0.5132308602333069, + "learning_rate": 4.740547286510875e-06, + "loss": 0.5136, + "step": 9701 + }, + { + "epoch": 0.8857038524739821, + "grad_norm": 0.47409796714782715, + "learning_rate": 4.740494191767613e-06, + "loss": 0.5365, + "step": 9702 + }, + { + "epoch": 0.8857951433266387, + "grad_norm": 0.5042061805725098, + "learning_rate": 4.740441091889614e-06, + "loss": 0.519, + "step": 9703 + }, + { + "epoch": 0.8858864341792952, + "grad_norm": 0.4840458035469055, + "learning_rate": 4.740387986877e-06, + "loss": 0.5412, + "step": 9704 + }, + { + "epoch": 0.8859777250319518, + "grad_norm": 0.4831397533416748, + "learning_rate": 4.740334876729893e-06, + "loss": 0.5373, + "step": 9705 + }, + { + "epoch": 0.8860690158846084, + "grad_norm": 0.4515373706817627, + "learning_rate": 4.740281761448415e-06, + "loss": 0.5369, + "step": 9706 + }, + { + "epoch": 0.886160306737265, + "grad_norm": 0.47570496797561646, + "learning_rate": 4.740228641032687e-06, + "loss": 0.5467, + "step": 9707 + }, + { + "epoch": 0.8862515975899214, + "grad_norm": 0.46923238039016724, + "learning_rate": 4.740175515482831e-06, + "loss": 0.5572, + "step": 9708 + }, + { + "epoch": 0.886342888442578, + "grad_norm": 0.4569782614707947, + "learning_rate": 4.7401223847989685e-06, + "loss": 0.5914, + "step": 9709 + }, + { + "epoch": 0.8864341792952346, + "grad_norm": 0.5007009506225586, + "learning_rate": 4.740069248981222e-06, + "loss": 0.5431, + "step": 9710 + }, + { + "epoch": 0.8865254701478912, + "grad_norm": 0.4999227821826935, + "learning_rate": 4.740016108029712e-06, + "loss": 0.5812, + "step": 9711 + }, + { + "epoch": 0.8866167610005478, + "grad_norm": 0.4883512854576111, + "learning_rate": 4.739962961944561e-06, + "loss": 0.5086, + "step": 9712 + }, + { + "epoch": 0.8867080518532043, + "grad_norm": 0.5045924186706543, + "learning_rate": 4.739909810725891e-06, + "loss": 0.5264, + "step": 9713 + }, + { + "epoch": 0.8867993427058609, + "grad_norm": 0.5050753355026245, + "learning_rate": 4.739856654373824e-06, + "loss": 0.5157, + "step": 9714 + }, + { + "epoch": 0.8868906335585174, + "grad_norm": 0.4873996675014496, + "learning_rate": 4.739803492888482e-06, + "loss": 0.53, + "step": 9715 + }, + { + "epoch": 0.886981924411174, + "grad_norm": 0.4969002902507782, + "learning_rate": 4.739750326269985e-06, + "loss": 0.5344, + "step": 9716 + }, + { + "epoch": 0.8870732152638305, + "grad_norm": 0.46915677189826965, + "learning_rate": 4.739697154518458e-06, + "loss": 0.5369, + "step": 9717 + }, + { + "epoch": 0.8871645061164871, + "grad_norm": 0.4722156822681427, + "learning_rate": 4.7396439776340195e-06, + "loss": 0.4898, + "step": 9718 + }, + { + "epoch": 0.8872557969691437, + "grad_norm": 0.4830499291419983, + "learning_rate": 4.739590795616793e-06, + "loss": 0.5583, + "step": 9719 + }, + { + "epoch": 0.8873470878218003, + "grad_norm": 0.46647971868515015, + "learning_rate": 4.739537608466902e-06, + "loss": 0.5683, + "step": 9720 + }, + { + "epoch": 0.8874383786744569, + "grad_norm": 0.4906458258628845, + "learning_rate": 4.739484416184465e-06, + "loss": 0.5317, + "step": 9721 + }, + { + "epoch": 0.8875296695271134, + "grad_norm": 0.5000008940696716, + "learning_rate": 4.739431218769606e-06, + "loss": 0.549, + "step": 9722 + }, + { + "epoch": 0.8876209603797699, + "grad_norm": 0.4698043167591095, + "learning_rate": 4.739378016222446e-06, + "loss": 0.5536, + "step": 9723 + }, + { + "epoch": 0.8877122512324265, + "grad_norm": 0.4496721625328064, + "learning_rate": 4.739324808543108e-06, + "loss": 0.6167, + "step": 9724 + }, + { + "epoch": 0.887803542085083, + "grad_norm": 0.4953000247478485, + "learning_rate": 4.739271595731714e-06, + "loss": 0.5659, + "step": 9725 + }, + { + "epoch": 0.8878948329377396, + "grad_norm": 0.4600599408149719, + "learning_rate": 4.739218377788384e-06, + "loss": 0.5825, + "step": 9726 + }, + { + "epoch": 0.8879861237903962, + "grad_norm": 0.4407881796360016, + "learning_rate": 4.7391651547132414e-06, + "loss": 0.5454, + "step": 9727 + }, + { + "epoch": 0.8880774146430528, + "grad_norm": 0.510262668132782, + "learning_rate": 4.739111926506409e-06, + "loss": 0.5217, + "step": 9728 + }, + { + "epoch": 0.8881687054957094, + "grad_norm": 0.4992646276950836, + "learning_rate": 4.739058693168007e-06, + "loss": 0.5156, + "step": 9729 + }, + { + "epoch": 0.8882599963483659, + "grad_norm": 0.472706139087677, + "learning_rate": 4.739005454698159e-06, + "loss": 0.5426, + "step": 9730 + }, + { + "epoch": 0.8883512872010224, + "grad_norm": 0.5012562274932861, + "learning_rate": 4.738952211096986e-06, + "loss": 0.5105, + "step": 9731 + }, + { + "epoch": 0.888442578053679, + "grad_norm": 0.5046544671058655, + "learning_rate": 4.7388989623646095e-06, + "loss": 0.508, + "step": 9732 + }, + { + "epoch": 0.8885338689063356, + "grad_norm": 0.4672505557537079, + "learning_rate": 4.738845708501154e-06, + "loss": 0.5507, + "step": 9733 + }, + { + "epoch": 0.8886251597589921, + "grad_norm": 0.4680117964744568, + "learning_rate": 4.738792449506739e-06, + "loss": 0.5661, + "step": 9734 + }, + { + "epoch": 0.8887164506116487, + "grad_norm": 0.49207523465156555, + "learning_rate": 4.738739185381487e-06, + "loss": 0.5402, + "step": 9735 + }, + { + "epoch": 0.8888077414643053, + "grad_norm": 0.4868602752685547, + "learning_rate": 4.73868591612552e-06, + "loss": 0.5579, + "step": 9736 + }, + { + "epoch": 0.8888990323169619, + "grad_norm": 0.46213671565055847, + "learning_rate": 4.7386326417389625e-06, + "loss": 0.5548, + "step": 9737 + }, + { + "epoch": 0.8889903231696185, + "grad_norm": 0.496646910905838, + "learning_rate": 4.738579362221934e-06, + "loss": 0.5481, + "step": 9738 + }, + { + "epoch": 0.8890816140222749, + "grad_norm": 0.48929426074028015, + "learning_rate": 4.738526077574556e-06, + "loss": 0.5405, + "step": 9739 + }, + { + "epoch": 0.8891729048749315, + "grad_norm": 0.46411189436912537, + "learning_rate": 4.738472787796953e-06, + "loss": 0.5785, + "step": 9740 + }, + { + "epoch": 0.8892641957275881, + "grad_norm": 0.4699157476425171, + "learning_rate": 4.738419492889246e-06, + "loss": 0.5522, + "step": 9741 + }, + { + "epoch": 0.8893554865802447, + "grad_norm": 0.5140062570571899, + "learning_rate": 4.7383661928515565e-06, + "loss": 0.5088, + "step": 9742 + }, + { + "epoch": 0.8894467774329012, + "grad_norm": 0.4725669324398041, + "learning_rate": 4.738312887684008e-06, + "loss": 0.5573, + "step": 9743 + }, + { + "epoch": 0.8895380682855578, + "grad_norm": 0.45203277468681335, + "learning_rate": 4.738259577386722e-06, + "loss": 0.5922, + "step": 9744 + }, + { + "epoch": 0.8896293591382144, + "grad_norm": 0.48643580079078674, + "learning_rate": 4.73820626195982e-06, + "loss": 0.5255, + "step": 9745 + }, + { + "epoch": 0.8897206499908709, + "grad_norm": 0.5034816265106201, + "learning_rate": 4.738152941403426e-06, + "loss": 0.5361, + "step": 9746 + }, + { + "epoch": 0.8898119408435274, + "grad_norm": 0.4616742432117462, + "learning_rate": 4.738099615717659e-06, + "loss": 0.5574, + "step": 9747 + }, + { + "epoch": 0.889903231696184, + "grad_norm": 0.5081192851066589, + "learning_rate": 4.738046284902646e-06, + "loss": 0.5315, + "step": 9748 + }, + { + "epoch": 0.8899945225488406, + "grad_norm": 0.4964917004108429, + "learning_rate": 4.737992948958504e-06, + "loss": 0.5325, + "step": 9749 + }, + { + "epoch": 0.8900858134014972, + "grad_norm": 0.4885629415512085, + "learning_rate": 4.737939607885359e-06, + "loss": 0.5487, + "step": 9750 + }, + { + "epoch": 0.8901771042541538, + "grad_norm": 0.4960731863975525, + "learning_rate": 4.737886261683331e-06, + "loss": 0.5154, + "step": 9751 + }, + { + "epoch": 0.8902683951068103, + "grad_norm": 0.5034655332565308, + "learning_rate": 4.737832910352544e-06, + "loss": 0.4783, + "step": 9752 + }, + { + "epoch": 0.8903596859594669, + "grad_norm": 0.46919959783554077, + "learning_rate": 4.73777955389312e-06, + "loss": 0.5652, + "step": 9753 + }, + { + "epoch": 0.8904509768121234, + "grad_norm": 0.47106310725212097, + "learning_rate": 4.73772619230518e-06, + "loss": 0.5299, + "step": 9754 + }, + { + "epoch": 0.89054226766478, + "grad_norm": 0.5110413432121277, + "learning_rate": 4.7376728255888475e-06, + "loss": 0.5506, + "step": 9755 + }, + { + "epoch": 0.8906335585174365, + "grad_norm": 0.4769456386566162, + "learning_rate": 4.737619453744244e-06, + "loss": 0.5368, + "step": 9756 + }, + { + "epoch": 0.8907248493700931, + "grad_norm": 0.462380588054657, + "learning_rate": 4.737566076771492e-06, + "loss": 0.5442, + "step": 9757 + }, + { + "epoch": 0.8908161402227497, + "grad_norm": 0.49616411328315735, + "learning_rate": 4.737512694670714e-06, + "loss": 0.5401, + "step": 9758 + }, + { + "epoch": 0.8909074310754063, + "grad_norm": 0.46622931957244873, + "learning_rate": 4.7374593074420324e-06, + "loss": 0.5643, + "step": 9759 + }, + { + "epoch": 0.8909987219280628, + "grad_norm": 0.47032079100608826, + "learning_rate": 4.73740591508557e-06, + "loss": 0.5814, + "step": 9760 + }, + { + "epoch": 0.8910900127807194, + "grad_norm": 0.4921687841415405, + "learning_rate": 4.737352517601449e-06, + "loss": 0.5442, + "step": 9761 + }, + { + "epoch": 0.8911813036333759, + "grad_norm": 0.5057276487350464, + "learning_rate": 4.7372991149897905e-06, + "loss": 0.5661, + "step": 9762 + }, + { + "epoch": 0.8912725944860325, + "grad_norm": 0.4974578022956848, + "learning_rate": 4.737245707250718e-06, + "loss": 0.5038, + "step": 9763 + }, + { + "epoch": 0.891363885338689, + "grad_norm": 0.48101577162742615, + "learning_rate": 4.737192294384354e-06, + "loss": 0.5346, + "step": 9764 + }, + { + "epoch": 0.8914551761913456, + "grad_norm": 0.4678013324737549, + "learning_rate": 4.737138876390821e-06, + "loss": 0.5872, + "step": 9765 + }, + { + "epoch": 0.8915464670440022, + "grad_norm": 0.47695934772491455, + "learning_rate": 4.73708545327024e-06, + "loss": 0.578, + "step": 9766 + }, + { + "epoch": 0.8916377578966588, + "grad_norm": 0.44878166913986206, + "learning_rate": 4.737032025022735e-06, + "loss": 0.5681, + "step": 9767 + }, + { + "epoch": 0.8917290487493154, + "grad_norm": 0.4862305223941803, + "learning_rate": 4.736978591648429e-06, + "loss": 0.5754, + "step": 9768 + }, + { + "epoch": 0.8918203396019719, + "grad_norm": 0.4923976957798004, + "learning_rate": 4.736925153147442e-06, + "loss": 0.5265, + "step": 9769 + }, + { + "epoch": 0.8919116304546284, + "grad_norm": 0.479159951210022, + "learning_rate": 4.736871709519898e-06, + "loss": 0.5695, + "step": 9770 + }, + { + "epoch": 0.892002921307285, + "grad_norm": 0.4935130774974823, + "learning_rate": 4.736818260765919e-06, + "loss": 0.5518, + "step": 9771 + }, + { + "epoch": 0.8920942121599416, + "grad_norm": 0.4740699529647827, + "learning_rate": 4.736764806885629e-06, + "loss": 0.5396, + "step": 9772 + }, + { + "epoch": 0.8921855030125981, + "grad_norm": 0.5039395689964294, + "learning_rate": 4.7367113478791496e-06, + "loss": 0.5401, + "step": 9773 + }, + { + "epoch": 0.8922767938652547, + "grad_norm": 0.49850305914878845, + "learning_rate": 4.7366578837466025e-06, + "loss": 0.5349, + "step": 9774 + }, + { + "epoch": 0.8923680847179113, + "grad_norm": 0.4579481780529022, + "learning_rate": 4.736604414488111e-06, + "loss": 0.5736, + "step": 9775 + }, + { + "epoch": 0.8924593755705679, + "grad_norm": 0.4760442078113556, + "learning_rate": 4.736550940103797e-06, + "loss": 0.5377, + "step": 9776 + }, + { + "epoch": 0.8925506664232244, + "grad_norm": 0.4542732834815979, + "learning_rate": 4.736497460593783e-06, + "loss": 0.5828, + "step": 9777 + }, + { + "epoch": 0.8926419572758809, + "grad_norm": 0.4631330370903015, + "learning_rate": 4.7364439759581925e-06, + "loss": 0.5489, + "step": 9778 + }, + { + "epoch": 0.8927332481285375, + "grad_norm": 0.48207616806030273, + "learning_rate": 4.7363904861971474e-06, + "loss": 0.5627, + "step": 9779 + }, + { + "epoch": 0.8928245389811941, + "grad_norm": 0.47644343972206116, + "learning_rate": 4.736336991310771e-06, + "loss": 0.5534, + "step": 9780 + }, + { + "epoch": 0.8929158298338506, + "grad_norm": 0.48873427510261536, + "learning_rate": 4.736283491299186e-06, + "loss": 0.5033, + "step": 9781 + }, + { + "epoch": 0.8930071206865072, + "grad_norm": 0.5106305480003357, + "learning_rate": 4.736229986162514e-06, + "loss": 0.5412, + "step": 9782 + }, + { + "epoch": 0.8930984115391638, + "grad_norm": 0.5088502764701843, + "learning_rate": 4.736176475900878e-06, + "loss": 0.5076, + "step": 9783 + }, + { + "epoch": 0.8931897023918204, + "grad_norm": 0.47556397318840027, + "learning_rate": 4.7361229605144e-06, + "loss": 0.5775, + "step": 9784 + }, + { + "epoch": 0.8932809932444769, + "grad_norm": 0.5022513270378113, + "learning_rate": 4.736069440003204e-06, + "loss": 0.5055, + "step": 9785 + }, + { + "epoch": 0.8933722840971334, + "grad_norm": 0.4830203354358673, + "learning_rate": 4.7360159143674115e-06, + "loss": 0.517, + "step": 9786 + }, + { + "epoch": 0.89346357494979, + "grad_norm": 0.49323225021362305, + "learning_rate": 4.735962383607145e-06, + "loss": 0.518, + "step": 9787 + }, + { + "epoch": 0.8935548658024466, + "grad_norm": 0.4487753212451935, + "learning_rate": 4.73590884772253e-06, + "loss": 0.5352, + "step": 9788 + }, + { + "epoch": 0.8936461566551032, + "grad_norm": 0.46601957082748413, + "learning_rate": 4.7358553067136855e-06, + "loss": 0.5417, + "step": 9789 + }, + { + "epoch": 0.8937374475077597, + "grad_norm": 0.5103635191917419, + "learning_rate": 4.735801760580736e-06, + "loss": 0.5269, + "step": 9790 + }, + { + "epoch": 0.8938287383604163, + "grad_norm": 0.4724879562854767, + "learning_rate": 4.735748209323804e-06, + "loss": 0.5819, + "step": 9791 + }, + { + "epoch": 0.8939200292130729, + "grad_norm": 0.4547296464443207, + "learning_rate": 4.735694652943012e-06, + "loss": 0.5561, + "step": 9792 + }, + { + "epoch": 0.8940113200657294, + "grad_norm": 0.47230249643325806, + "learning_rate": 4.735641091438483e-06, + "loss": 0.5642, + "step": 9793 + }, + { + "epoch": 0.8941026109183859, + "grad_norm": 0.4829308092594147, + "learning_rate": 4.73558752481034e-06, + "loss": 0.5443, + "step": 9794 + }, + { + "epoch": 0.8941939017710425, + "grad_norm": 0.47356343269348145, + "learning_rate": 4.735533953058705e-06, + "loss": 0.5696, + "step": 9795 + }, + { + "epoch": 0.8942851926236991, + "grad_norm": 0.5267348885536194, + "learning_rate": 4.735480376183702e-06, + "loss": 0.5145, + "step": 9796 + }, + { + "epoch": 0.8943764834763557, + "grad_norm": 0.48739883303642273, + "learning_rate": 4.735426794185452e-06, + "loss": 0.5465, + "step": 9797 + }, + { + "epoch": 0.8944677743290123, + "grad_norm": 0.4826342761516571, + "learning_rate": 4.735373207064079e-06, + "loss": 0.5451, + "step": 9798 + }, + { + "epoch": 0.8945590651816688, + "grad_norm": 0.49948182702064514, + "learning_rate": 4.735319614819706e-06, + "loss": 0.5163, + "step": 9799 + }, + { + "epoch": 0.8946503560343254, + "grad_norm": 0.49127528071403503, + "learning_rate": 4.7352660174524554e-06, + "loss": 0.5742, + "step": 9800 + }, + { + "epoch": 0.8947416468869819, + "grad_norm": 0.48713234066963196, + "learning_rate": 4.735212414962449e-06, + "loss": 0.5614, + "step": 9801 + }, + { + "epoch": 0.8948329377396385, + "grad_norm": 0.5297887921333313, + "learning_rate": 4.735158807349812e-06, + "loss": 0.5401, + "step": 9802 + }, + { + "epoch": 0.894924228592295, + "grad_norm": 0.45844417810440063, + "learning_rate": 4.735105194614666e-06, + "loss": 0.616, + "step": 9803 + }, + { + "epoch": 0.8950155194449516, + "grad_norm": 0.47613558173179626, + "learning_rate": 4.735051576757133e-06, + "loss": 0.5571, + "step": 9804 + }, + { + "epoch": 0.8951068102976082, + "grad_norm": 0.4965307116508484, + "learning_rate": 4.7349979537773375e-06, + "loss": 0.5482, + "step": 9805 + }, + { + "epoch": 0.8951981011502648, + "grad_norm": 0.5061753392219543, + "learning_rate": 4.734944325675401e-06, + "loss": 0.4988, + "step": 9806 + }, + { + "epoch": 0.8952893920029213, + "grad_norm": 0.47214531898498535, + "learning_rate": 4.734890692451447e-06, + "loss": 0.6036, + "step": 9807 + }, + { + "epoch": 0.8953806828555779, + "grad_norm": 0.49732375144958496, + "learning_rate": 4.734837054105599e-06, + "loss": 0.5286, + "step": 9808 + }, + { + "epoch": 0.8954719737082344, + "grad_norm": 0.4880574941635132, + "learning_rate": 4.734783410637979e-06, + "loss": 0.5302, + "step": 9809 + }, + { + "epoch": 0.895563264560891, + "grad_norm": 0.47881925106048584, + "learning_rate": 4.734729762048709e-06, + "loss": 0.5661, + "step": 9810 + }, + { + "epoch": 0.8956545554135475, + "grad_norm": 0.46338170766830444, + "learning_rate": 4.734676108337915e-06, + "loss": 0.5547, + "step": 9811 + }, + { + "epoch": 0.8957458462662041, + "grad_norm": 0.4785629212856293, + "learning_rate": 4.7346224495057184e-06, + "loss": 0.5338, + "step": 9812 + }, + { + "epoch": 0.8958371371188607, + "grad_norm": 0.4559166431427002, + "learning_rate": 4.734568785552242e-06, + "loss": 0.5673, + "step": 9813 + }, + { + "epoch": 0.8959284279715173, + "grad_norm": 0.4641755223274231, + "learning_rate": 4.7345151164776075e-06, + "loss": 0.5727, + "step": 9814 + }, + { + "epoch": 0.8960197188241739, + "grad_norm": 0.4927518665790558, + "learning_rate": 4.73446144228194e-06, + "loss": 0.5489, + "step": 9815 + }, + { + "epoch": 0.8961110096768303, + "grad_norm": 0.4815288782119751, + "learning_rate": 4.734407762965361e-06, + "loss": 0.5511, + "step": 9816 + }, + { + "epoch": 0.8962023005294869, + "grad_norm": 0.5005871653556824, + "learning_rate": 4.734354078527994e-06, + "loss": 0.5359, + "step": 9817 + }, + { + "epoch": 0.8962935913821435, + "grad_norm": 0.49451667070388794, + "learning_rate": 4.7343003889699635e-06, + "loss": 0.529, + "step": 9818 + }, + { + "epoch": 0.8963848822348001, + "grad_norm": 0.47097814083099365, + "learning_rate": 4.734246694291391e-06, + "loss": 0.5879, + "step": 9819 + }, + { + "epoch": 0.8964761730874566, + "grad_norm": 0.4779756963253021, + "learning_rate": 4.734192994492399e-06, + "loss": 0.5726, + "step": 9820 + }, + { + "epoch": 0.8965674639401132, + "grad_norm": 0.4675411283969879, + "learning_rate": 4.734139289573112e-06, + "loss": 0.5454, + "step": 9821 + }, + { + "epoch": 0.8966587547927698, + "grad_norm": 0.48286470770835876, + "learning_rate": 4.734085579533652e-06, + "loss": 0.5485, + "step": 9822 + }, + { + "epoch": 0.8967500456454264, + "grad_norm": 0.4916849732398987, + "learning_rate": 4.734031864374143e-06, + "loss": 0.5463, + "step": 9823 + }, + { + "epoch": 0.8968413364980828, + "grad_norm": 0.4682149291038513, + "learning_rate": 4.733978144094708e-06, + "loss": 0.5713, + "step": 9824 + }, + { + "epoch": 0.8969326273507394, + "grad_norm": 0.46706944704055786, + "learning_rate": 4.733924418695469e-06, + "loss": 0.5817, + "step": 9825 + }, + { + "epoch": 0.897023918203396, + "grad_norm": 0.498561829328537, + "learning_rate": 4.7338706881765505e-06, + "loss": 0.5453, + "step": 9826 + }, + { + "epoch": 0.8971152090560526, + "grad_norm": 0.4657663106918335, + "learning_rate": 4.733816952538075e-06, + "loss": 0.5566, + "step": 9827 + }, + { + "epoch": 0.8972064999087092, + "grad_norm": 0.4928475618362427, + "learning_rate": 4.733763211780166e-06, + "loss": 0.5504, + "step": 9828 + }, + { + "epoch": 0.8972977907613657, + "grad_norm": 0.4916435182094574, + "learning_rate": 4.733709465902945e-06, + "loss": 0.485, + "step": 9829 + }, + { + "epoch": 0.8973890816140223, + "grad_norm": 0.5004660487174988, + "learning_rate": 4.733655714906538e-06, + "loss": 0.5384, + "step": 9830 + }, + { + "epoch": 0.8974803724666789, + "grad_norm": 0.4828376770019531, + "learning_rate": 4.733601958791066e-06, + "loss": 0.5289, + "step": 9831 + }, + { + "epoch": 0.8975716633193354, + "grad_norm": 0.5148735642433167, + "learning_rate": 4.733548197556653e-06, + "loss": 0.5531, + "step": 9832 + }, + { + "epoch": 0.8976629541719919, + "grad_norm": 0.48135173320770264, + "learning_rate": 4.733494431203423e-06, + "loss": 0.5512, + "step": 9833 + }, + { + "epoch": 0.8977542450246485, + "grad_norm": 0.4770042896270752, + "learning_rate": 4.733440659731497e-06, + "loss": 0.5701, + "step": 9834 + }, + { + "epoch": 0.8978455358773051, + "grad_norm": 0.5133042335510254, + "learning_rate": 4.733386883141e-06, + "loss": 0.5728, + "step": 9835 + }, + { + "epoch": 0.8979368267299617, + "grad_norm": 0.4642643928527832, + "learning_rate": 4.733333101432055e-06, + "loss": 0.5657, + "step": 9836 + }, + { + "epoch": 0.8980281175826182, + "grad_norm": 0.49312624335289, + "learning_rate": 4.733279314604785e-06, + "loss": 0.584, + "step": 9837 + }, + { + "epoch": 0.8981194084352748, + "grad_norm": 0.48182135820388794, + "learning_rate": 4.733225522659313e-06, + "loss": 0.5071, + "step": 9838 + }, + { + "epoch": 0.8982106992879314, + "grad_norm": 0.4926494061946869, + "learning_rate": 4.7331717255957645e-06, + "loss": 0.5562, + "step": 9839 + }, + { + "epoch": 0.8983019901405879, + "grad_norm": 0.49195167422294617, + "learning_rate": 4.733117923414259e-06, + "loss": 0.544, + "step": 9840 + }, + { + "epoch": 0.8983932809932444, + "grad_norm": 0.48937487602233887, + "learning_rate": 4.733064116114922e-06, + "loss": 0.574, + "step": 9841 + }, + { + "epoch": 0.898484571845901, + "grad_norm": 0.46777793765068054, + "learning_rate": 4.733010303697877e-06, + "loss": 0.5671, + "step": 9842 + }, + { + "epoch": 0.8985758626985576, + "grad_norm": 0.44442594051361084, + "learning_rate": 4.732956486163247e-06, + "loss": 0.5525, + "step": 9843 + }, + { + "epoch": 0.8986671535512142, + "grad_norm": 0.45389124751091003, + "learning_rate": 4.732902663511156e-06, + "loss": 0.5682, + "step": 9844 + }, + { + "epoch": 0.8987584444038708, + "grad_norm": 0.4954068064689636, + "learning_rate": 4.732848835741725e-06, + "loss": 0.5611, + "step": 9845 + }, + { + "epoch": 0.8988497352565273, + "grad_norm": 0.46667730808258057, + "learning_rate": 4.732795002855079e-06, + "loss": 0.5858, + "step": 9846 + }, + { + "epoch": 0.8989410261091838, + "grad_norm": 0.5057013034820557, + "learning_rate": 4.732741164851341e-06, + "loss": 0.5181, + "step": 9847 + }, + { + "epoch": 0.8990323169618404, + "grad_norm": 0.4958886504173279, + "learning_rate": 4.732687321730636e-06, + "loss": 0.5446, + "step": 9848 + }, + { + "epoch": 0.899123607814497, + "grad_norm": 0.46985819935798645, + "learning_rate": 4.732633473493086e-06, + "loss": 0.564, + "step": 9849 + }, + { + "epoch": 0.8992148986671535, + "grad_norm": 0.48028329014778137, + "learning_rate": 4.732579620138814e-06, + "loss": 0.5477, + "step": 9850 + }, + { + "epoch": 0.8993061895198101, + "grad_norm": 0.5036776065826416, + "learning_rate": 4.732525761667944e-06, + "loss": 0.544, + "step": 9851 + }, + { + "epoch": 0.8993974803724667, + "grad_norm": 0.4733130633831024, + "learning_rate": 4.732471898080599e-06, + "loss": 0.5379, + "step": 9852 + }, + { + "epoch": 0.8994887712251233, + "grad_norm": 0.44461432099342346, + "learning_rate": 4.732418029376903e-06, + "loss": 0.5413, + "step": 9853 + }, + { + "epoch": 0.8995800620777799, + "grad_norm": 0.48962876200675964, + "learning_rate": 4.7323641555569795e-06, + "loss": 0.5218, + "step": 9854 + }, + { + "epoch": 0.8996713529304363, + "grad_norm": 0.47116339206695557, + "learning_rate": 4.732310276620951e-06, + "loss": 0.5552, + "step": 9855 + }, + { + "epoch": 0.8997626437830929, + "grad_norm": 0.48543843626976013, + "learning_rate": 4.7322563925689425e-06, + "loss": 0.5368, + "step": 9856 + }, + { + "epoch": 0.8998539346357495, + "grad_norm": 0.5102585554122925, + "learning_rate": 4.7322025034010765e-06, + "loss": 0.5221, + "step": 9857 + }, + { + "epoch": 0.899945225488406, + "grad_norm": 0.4964187443256378, + "learning_rate": 4.732148609117476e-06, + "loss": 0.5888, + "step": 9858 + }, + { + "epoch": 0.9000365163410626, + "grad_norm": 0.47792354226112366, + "learning_rate": 4.732094709718266e-06, + "loss": 0.5667, + "step": 9859 + }, + { + "epoch": 0.9001278071937192, + "grad_norm": 0.48557576537132263, + "learning_rate": 4.732040805203569e-06, + "loss": 0.5563, + "step": 9860 + }, + { + "epoch": 0.9002190980463758, + "grad_norm": 0.46564826369285583, + "learning_rate": 4.731986895573509e-06, + "loss": 0.5713, + "step": 9861 + }, + { + "epoch": 0.9003103888990324, + "grad_norm": 0.4898087680339813, + "learning_rate": 4.731932980828209e-06, + "loss": 0.5991, + "step": 9862 + }, + { + "epoch": 0.9004016797516888, + "grad_norm": 0.4831758737564087, + "learning_rate": 4.731879060967793e-06, + "loss": 0.5711, + "step": 9863 + }, + { + "epoch": 0.9004929706043454, + "grad_norm": 0.5041172504425049, + "learning_rate": 4.731825135992384e-06, + "loss": 0.5497, + "step": 9864 + }, + { + "epoch": 0.900584261457002, + "grad_norm": 0.48464590311050415, + "learning_rate": 4.731771205902107e-06, + "loss": 0.5795, + "step": 9865 + }, + { + "epoch": 0.9006755523096586, + "grad_norm": 0.47571930289268494, + "learning_rate": 4.731717270697084e-06, + "loss": 0.5388, + "step": 9866 + }, + { + "epoch": 0.9007668431623151, + "grad_norm": 0.46722927689552307, + "learning_rate": 4.731663330377439e-06, + "loss": 0.5428, + "step": 9867 + }, + { + "epoch": 0.9008581340149717, + "grad_norm": 0.45760178565979004, + "learning_rate": 4.7316093849432964e-06, + "loss": 0.5335, + "step": 9868 + }, + { + "epoch": 0.9009494248676283, + "grad_norm": 0.48540550470352173, + "learning_rate": 4.731555434394779e-06, + "loss": 0.5304, + "step": 9869 + }, + { + "epoch": 0.9010407157202849, + "grad_norm": 0.45800378918647766, + "learning_rate": 4.73150147873201e-06, + "loss": 0.5634, + "step": 9870 + }, + { + "epoch": 0.9011320065729413, + "grad_norm": 0.4427598714828491, + "learning_rate": 4.731447517955115e-06, + "loss": 0.5874, + "step": 9871 + }, + { + "epoch": 0.9012232974255979, + "grad_norm": 0.4732412099838257, + "learning_rate": 4.7313935520642166e-06, + "loss": 0.5423, + "step": 9872 + }, + { + "epoch": 0.9013145882782545, + "grad_norm": 0.4985136091709137, + "learning_rate": 4.731339581059437e-06, + "loss": 0.5761, + "step": 9873 + }, + { + "epoch": 0.9014058791309111, + "grad_norm": 0.4953846335411072, + "learning_rate": 4.731285604940902e-06, + "loss": 0.5731, + "step": 9874 + }, + { + "epoch": 0.9014971699835677, + "grad_norm": 0.4622688889503479, + "learning_rate": 4.731231623708734e-06, + "loss": 0.5565, + "step": 9875 + }, + { + "epoch": 0.9015884608362242, + "grad_norm": 0.47934824228286743, + "learning_rate": 4.7311776373630584e-06, + "loss": 0.5636, + "step": 9876 + }, + { + "epoch": 0.9016797516888808, + "grad_norm": 0.48901331424713135, + "learning_rate": 4.731123645903996e-06, + "loss": 0.5627, + "step": 9877 + }, + { + "epoch": 0.9017710425415374, + "grad_norm": 0.4674794673919678, + "learning_rate": 4.731069649331674e-06, + "loss": 0.5771, + "step": 9878 + }, + { + "epoch": 0.9018623333941939, + "grad_norm": 0.4977312982082367, + "learning_rate": 4.731015647646213e-06, + "loss": 0.5251, + "step": 9879 + }, + { + "epoch": 0.9019536242468504, + "grad_norm": 0.47713813185691833, + "learning_rate": 4.73096164084774e-06, + "loss": 0.5565, + "step": 9880 + }, + { + "epoch": 0.902044915099507, + "grad_norm": 0.4924198091030121, + "learning_rate": 4.7309076289363755e-06, + "loss": 0.5288, + "step": 9881 + }, + { + "epoch": 0.9021362059521636, + "grad_norm": 0.5092541575431824, + "learning_rate": 4.730853611912245e-06, + "loss": 0.4568, + "step": 9882 + }, + { + "epoch": 0.9022274968048202, + "grad_norm": 0.4908459484577179, + "learning_rate": 4.730799589775472e-06, + "loss": 0.5307, + "step": 9883 + }, + { + "epoch": 0.9023187876574768, + "grad_norm": 0.49438899755477905, + "learning_rate": 4.730745562526181e-06, + "loss": 0.5521, + "step": 9884 + }, + { + "epoch": 0.9024100785101333, + "grad_norm": 0.4536588490009308, + "learning_rate": 4.730691530164495e-06, + "loss": 0.5528, + "step": 9885 + }, + { + "epoch": 0.9025013693627898, + "grad_norm": 0.49594399333000183, + "learning_rate": 4.730637492690538e-06, + "loss": 0.5305, + "step": 9886 + }, + { + "epoch": 0.9025926602154464, + "grad_norm": 0.45627498626708984, + "learning_rate": 4.730583450104433e-06, + "loss": 0.5989, + "step": 9887 + }, + { + "epoch": 0.902683951068103, + "grad_norm": 0.4839285910129547, + "learning_rate": 4.730529402406305e-06, + "loss": 0.5357, + "step": 9888 + }, + { + "epoch": 0.9027752419207595, + "grad_norm": 0.4767164885997772, + "learning_rate": 4.730475349596278e-06, + "loss": 0.5174, + "step": 9889 + }, + { + "epoch": 0.9028665327734161, + "grad_norm": 0.4995864927768707, + "learning_rate": 4.730421291674476e-06, + "loss": 0.5011, + "step": 9890 + }, + { + "epoch": 0.9029578236260727, + "grad_norm": 0.4681038558483124, + "learning_rate": 4.730367228641022e-06, + "loss": 0.5642, + "step": 9891 + }, + { + "epoch": 0.9030491144787293, + "grad_norm": 0.49457183480262756, + "learning_rate": 4.730313160496039e-06, + "loss": 0.5689, + "step": 9892 + }, + { + "epoch": 0.9031404053313858, + "grad_norm": 0.48648226261138916, + "learning_rate": 4.730259087239653e-06, + "loss": 0.5568, + "step": 9893 + }, + { + "epoch": 0.9032316961840423, + "grad_norm": 0.47758108377456665, + "learning_rate": 4.730205008871988e-06, + "loss": 0.5688, + "step": 9894 + }, + { + "epoch": 0.9033229870366989, + "grad_norm": 0.4785112142562866, + "learning_rate": 4.730150925393166e-06, + "loss": 0.5665, + "step": 9895 + }, + { + "epoch": 0.9034142778893555, + "grad_norm": 0.464194118976593, + "learning_rate": 4.730096836803313e-06, + "loss": 0.5566, + "step": 9896 + }, + { + "epoch": 0.903505568742012, + "grad_norm": 0.4765945374965668, + "learning_rate": 4.7300427431025505e-06, + "loss": 0.5511, + "step": 9897 + }, + { + "epoch": 0.9035968595946686, + "grad_norm": 0.4638978838920593, + "learning_rate": 4.7299886442910044e-06, + "loss": 0.5889, + "step": 9898 + }, + { + "epoch": 0.9036881504473252, + "grad_norm": 0.4611700177192688, + "learning_rate": 4.729934540368799e-06, + "loss": 0.5308, + "step": 9899 + }, + { + "epoch": 0.9037794412999818, + "grad_norm": 0.4926295578479767, + "learning_rate": 4.729880431336056e-06, + "loss": 0.5367, + "step": 9900 + }, + { + "epoch": 0.9038707321526384, + "grad_norm": 0.5196625590324402, + "learning_rate": 4.729826317192902e-06, + "loss": 0.5287, + "step": 9901 + }, + { + "epoch": 0.9039620230052948, + "grad_norm": 0.47941097617149353, + "learning_rate": 4.729772197939459e-06, + "loss": 0.5407, + "step": 9902 + }, + { + "epoch": 0.9040533138579514, + "grad_norm": 0.4970749616622925, + "learning_rate": 4.729718073575853e-06, + "loss": 0.526, + "step": 9903 + }, + { + "epoch": 0.904144604710608, + "grad_norm": 0.47836199402809143, + "learning_rate": 4.729663944102207e-06, + "loss": 0.5556, + "step": 9904 + }, + { + "epoch": 0.9042358955632646, + "grad_norm": 0.4768059551715851, + "learning_rate": 4.729609809518644e-06, + "loss": 0.5427, + "step": 9905 + }, + { + "epoch": 0.9043271864159211, + "grad_norm": 0.5082755088806152, + "learning_rate": 4.72955566982529e-06, + "loss": 0.5527, + "step": 9906 + }, + { + "epoch": 0.9044184772685777, + "grad_norm": 0.48219525814056396, + "learning_rate": 4.729501525022267e-06, + "loss": 0.5642, + "step": 9907 + }, + { + "epoch": 0.9045097681212343, + "grad_norm": 0.4828100800514221, + "learning_rate": 4.729447375109701e-06, + "loss": 0.4972, + "step": 9908 + }, + { + "epoch": 0.9046010589738909, + "grad_norm": 0.452394962310791, + "learning_rate": 4.729393220087716e-06, + "loss": 0.5669, + "step": 9909 + }, + { + "epoch": 0.9046923498265473, + "grad_norm": 0.4616602659225464, + "learning_rate": 4.729339059956434e-06, + "loss": 0.5837, + "step": 9910 + }, + { + "epoch": 0.9047836406792039, + "grad_norm": 0.4998503625392914, + "learning_rate": 4.729284894715982e-06, + "loss": 0.5612, + "step": 9911 + }, + { + "epoch": 0.9048749315318605, + "grad_norm": 0.46301794052124023, + "learning_rate": 4.729230724366481e-06, + "loss": 0.5689, + "step": 9912 + }, + { + "epoch": 0.9049662223845171, + "grad_norm": 0.47523534297943115, + "learning_rate": 4.729176548908059e-06, + "loss": 0.5271, + "step": 9913 + }, + { + "epoch": 0.9050575132371736, + "grad_norm": 0.5018452405929565, + "learning_rate": 4.729122368340837e-06, + "loss": 0.5577, + "step": 9914 + }, + { + "epoch": 0.9051488040898302, + "grad_norm": 0.48646417260169983, + "learning_rate": 4.72906818266494e-06, + "loss": 0.5482, + "step": 9915 + }, + { + "epoch": 0.9052400949424868, + "grad_norm": 0.4624713957309723, + "learning_rate": 4.729013991880492e-06, + "loss": 0.5657, + "step": 9916 + }, + { + "epoch": 0.9053313857951433, + "grad_norm": 0.45438528060913086, + "learning_rate": 4.728959795987619e-06, + "loss": 0.6015, + "step": 9917 + }, + { + "epoch": 0.9054226766477999, + "grad_norm": 0.505486011505127, + "learning_rate": 4.728905594986443e-06, + "loss": 0.5487, + "step": 9918 + }, + { + "epoch": 0.9055139675004564, + "grad_norm": 0.46505656838417053, + "learning_rate": 4.728851388877088e-06, + "loss": 0.5661, + "step": 9919 + }, + { + "epoch": 0.905605258353113, + "grad_norm": 0.5300215482711792, + "learning_rate": 4.728797177659681e-06, + "loss": 0.5272, + "step": 9920 + }, + { + "epoch": 0.9056965492057696, + "grad_norm": 0.4877456724643707, + "learning_rate": 4.728742961334343e-06, + "loss": 0.5181, + "step": 9921 + }, + { + "epoch": 0.9057878400584262, + "grad_norm": 0.5083968639373779, + "learning_rate": 4.728688739901201e-06, + "loss": 0.5712, + "step": 9922 + }, + { + "epoch": 0.9058791309110827, + "grad_norm": 0.46106255054473877, + "learning_rate": 4.728634513360377e-06, + "loss": 0.56, + "step": 9923 + }, + { + "epoch": 0.9059704217637393, + "grad_norm": 0.4730496108531952, + "learning_rate": 4.728580281711996e-06, + "loss": 0.5653, + "step": 9924 + }, + { + "epoch": 0.9060617126163958, + "grad_norm": 0.4703317880630493, + "learning_rate": 4.728526044956183e-06, + "loss": 0.5569, + "step": 9925 + }, + { + "epoch": 0.9061530034690524, + "grad_norm": 0.47093743085861206, + "learning_rate": 4.728471803093062e-06, + "loss": 0.5331, + "step": 9926 + }, + { + "epoch": 0.9062442943217089, + "grad_norm": 0.4783826470375061, + "learning_rate": 4.728417556122757e-06, + "loss": 0.5506, + "step": 9927 + }, + { + "epoch": 0.9063355851743655, + "grad_norm": 0.49756044149398804, + "learning_rate": 4.728363304045393e-06, + "loss": 0.5529, + "step": 9928 + }, + { + "epoch": 0.9064268760270221, + "grad_norm": 0.4892081022262573, + "learning_rate": 4.728309046861094e-06, + "loss": 0.5631, + "step": 9929 + }, + { + "epoch": 0.9065181668796787, + "grad_norm": 0.4994317889213562, + "learning_rate": 4.728254784569983e-06, + "loss": 0.4839, + "step": 9930 + }, + { + "epoch": 0.9066094577323353, + "grad_norm": 0.5035998225212097, + "learning_rate": 4.7282005171721865e-06, + "loss": 0.5521, + "step": 9931 + }, + { + "epoch": 0.9067007485849918, + "grad_norm": 0.48675253987312317, + "learning_rate": 4.728146244667827e-06, + "loss": 0.5529, + "step": 9932 + }, + { + "epoch": 0.9067920394376483, + "grad_norm": 0.4922734498977661, + "learning_rate": 4.728091967057031e-06, + "loss": 0.5223, + "step": 9933 + }, + { + "epoch": 0.9068833302903049, + "grad_norm": 0.48224061727523804, + "learning_rate": 4.7280376843399205e-06, + "loss": 0.5392, + "step": 9934 + }, + { + "epoch": 0.9069746211429615, + "grad_norm": 0.5154796838760376, + "learning_rate": 4.727983396516621e-06, + "loss": 0.5374, + "step": 9935 + }, + { + "epoch": 0.907065911995618, + "grad_norm": 0.4972100257873535, + "learning_rate": 4.7279291035872576e-06, + "loss": 0.5202, + "step": 9936 + }, + { + "epoch": 0.9071572028482746, + "grad_norm": 0.4726194441318512, + "learning_rate": 4.727874805551954e-06, + "loss": 0.5168, + "step": 9937 + }, + { + "epoch": 0.9072484937009312, + "grad_norm": 0.4616803824901581, + "learning_rate": 4.727820502410835e-06, + "loss": 0.5779, + "step": 9938 + }, + { + "epoch": 0.9073397845535878, + "grad_norm": 0.4477697014808655, + "learning_rate": 4.727766194164024e-06, + "loss": 0.5865, + "step": 9939 + }, + { + "epoch": 0.9074310754062443, + "grad_norm": 0.49112412333488464, + "learning_rate": 4.727711880811647e-06, + "loss": 0.5658, + "step": 9940 + }, + { + "epoch": 0.9075223662589008, + "grad_norm": 0.4907322824001312, + "learning_rate": 4.727657562353827e-06, + "loss": 0.526, + "step": 9941 + }, + { + "epoch": 0.9076136571115574, + "grad_norm": 0.48785364627838135, + "learning_rate": 4.72760323879069e-06, + "loss": 0.5396, + "step": 9942 + }, + { + "epoch": 0.907704947964214, + "grad_norm": 0.49457740783691406, + "learning_rate": 4.72754891012236e-06, + "loss": 0.5354, + "step": 9943 + }, + { + "epoch": 0.9077962388168705, + "grad_norm": 0.49246901273727417, + "learning_rate": 4.72749457634896e-06, + "loss": 0.5632, + "step": 9944 + }, + { + "epoch": 0.9078875296695271, + "grad_norm": 0.494587242603302, + "learning_rate": 4.727440237470617e-06, + "loss": 0.5193, + "step": 9945 + }, + { + "epoch": 0.9079788205221837, + "grad_norm": 0.4533807039260864, + "learning_rate": 4.7273858934874524e-06, + "loss": 0.577, + "step": 9946 + }, + { + "epoch": 0.9080701113748403, + "grad_norm": 0.4789557158946991, + "learning_rate": 4.727331544399595e-06, + "loss": 0.5507, + "step": 9947 + }, + { + "epoch": 0.9081614022274968, + "grad_norm": 0.4791032373905182, + "learning_rate": 4.727277190207166e-06, + "loss": 0.5273, + "step": 9948 + }, + { + "epoch": 0.9082526930801533, + "grad_norm": 0.46512699127197266, + "learning_rate": 4.7272228309102905e-06, + "loss": 0.5453, + "step": 9949 + }, + { + "epoch": 0.9083439839328099, + "grad_norm": 0.5074141621589661, + "learning_rate": 4.727168466509093e-06, + "loss": 0.5452, + "step": 9950 + }, + { + "epoch": 0.9084352747854665, + "grad_norm": 0.5039756298065186, + "learning_rate": 4.7271140970037e-06, + "loss": 0.5211, + "step": 9951 + }, + { + "epoch": 0.9085265656381231, + "grad_norm": 0.5132353901863098, + "learning_rate": 4.727059722394235e-06, + "loss": 0.5379, + "step": 9952 + }, + { + "epoch": 0.9086178564907796, + "grad_norm": 0.4644346237182617, + "learning_rate": 4.727005342680821e-06, + "loss": 0.5541, + "step": 9953 + }, + { + "epoch": 0.9087091473434362, + "grad_norm": 0.47302746772766113, + "learning_rate": 4.7269509578635845e-06, + "loss": 0.5543, + "step": 9954 + }, + { + "epoch": 0.9088004381960928, + "grad_norm": 0.47747960686683655, + "learning_rate": 4.72689656794265e-06, + "loss": 0.5962, + "step": 9955 + }, + { + "epoch": 0.9088917290487493, + "grad_norm": 0.49550142884254456, + "learning_rate": 4.726842172918142e-06, + "loss": 0.4882, + "step": 9956 + }, + { + "epoch": 0.9089830199014058, + "grad_norm": 0.4685433804988861, + "learning_rate": 4.726787772790184e-06, + "loss": 0.5439, + "step": 9957 + }, + { + "epoch": 0.9090743107540624, + "grad_norm": 0.4575497508049011, + "learning_rate": 4.7267333675589016e-06, + "loss": 0.5961, + "step": 9958 + }, + { + "epoch": 0.909165601606719, + "grad_norm": 0.49436357617378235, + "learning_rate": 4.7266789572244195e-06, + "loss": 0.543, + "step": 9959 + }, + { + "epoch": 0.9092568924593756, + "grad_norm": 0.4826354384422302, + "learning_rate": 4.726624541786863e-06, + "loss": 0.5016, + "step": 9960 + }, + { + "epoch": 0.9093481833120322, + "grad_norm": 0.4908037781715393, + "learning_rate": 4.726570121246356e-06, + "loss": 0.5187, + "step": 9961 + }, + { + "epoch": 0.9094394741646887, + "grad_norm": 0.4837283194065094, + "learning_rate": 4.726515695603023e-06, + "loss": 0.5713, + "step": 9962 + }, + { + "epoch": 0.9095307650173453, + "grad_norm": 0.4711589217185974, + "learning_rate": 4.7264612648569895e-06, + "loss": 0.5753, + "step": 9963 + }, + { + "epoch": 0.9096220558700018, + "grad_norm": 0.5043402314186096, + "learning_rate": 4.72640682900838e-06, + "loss": 0.5279, + "step": 9964 + }, + { + "epoch": 0.9097133467226584, + "grad_norm": 0.48549097776412964, + "learning_rate": 4.726352388057319e-06, + "loss": 0.5705, + "step": 9965 + }, + { + "epoch": 0.9098046375753149, + "grad_norm": 0.4780200719833374, + "learning_rate": 4.726297942003931e-06, + "loss": 0.4956, + "step": 9966 + }, + { + "epoch": 0.9098959284279715, + "grad_norm": 0.4807962477207184, + "learning_rate": 4.726243490848341e-06, + "loss": 0.5645, + "step": 9967 + }, + { + "epoch": 0.9099872192806281, + "grad_norm": 0.503011167049408, + "learning_rate": 4.7261890345906754e-06, + "loss": 0.5519, + "step": 9968 + }, + { + "epoch": 0.9100785101332847, + "grad_norm": 0.47522348165512085, + "learning_rate": 4.7261345732310565e-06, + "loss": 0.5827, + "step": 9969 + }, + { + "epoch": 0.9101698009859412, + "grad_norm": 0.44576725363731384, + "learning_rate": 4.726080106769611e-06, + "loss": 0.614, + "step": 9970 + }, + { + "epoch": 0.9102610918385978, + "grad_norm": 0.47965970635414124, + "learning_rate": 4.726025635206462e-06, + "loss": 0.5733, + "step": 9971 + }, + { + "epoch": 0.9103523826912543, + "grad_norm": 0.46604976058006287, + "learning_rate": 4.725971158541736e-06, + "loss": 0.5883, + "step": 9972 + }, + { + "epoch": 0.9104436735439109, + "grad_norm": 0.5180791616439819, + "learning_rate": 4.725916676775556e-06, + "loss": 0.5186, + "step": 9973 + }, + { + "epoch": 0.9105349643965674, + "grad_norm": 0.47660166025161743, + "learning_rate": 4.725862189908049e-06, + "loss": 0.5745, + "step": 9974 + }, + { + "epoch": 0.910626255249224, + "grad_norm": 0.5100136399269104, + "learning_rate": 4.725807697939338e-06, + "loss": 0.5537, + "step": 9975 + }, + { + "epoch": 0.9107175461018806, + "grad_norm": 0.46776628494262695, + "learning_rate": 4.72575320086955e-06, + "loss": 0.5175, + "step": 9976 + }, + { + "epoch": 0.9108088369545372, + "grad_norm": 0.4902673065662384, + "learning_rate": 4.725698698698808e-06, + "loss": 0.553, + "step": 9977 + }, + { + "epoch": 0.9109001278071938, + "grad_norm": 0.46758949756622314, + "learning_rate": 4.725644191427237e-06, + "loss": 0.5565, + "step": 9978 + }, + { + "epoch": 0.9109914186598503, + "grad_norm": 0.47617432475090027, + "learning_rate": 4.725589679054962e-06, + "loss": 0.532, + "step": 9979 + }, + { + "epoch": 0.9110827095125068, + "grad_norm": 0.5182346105575562, + "learning_rate": 4.725535161582109e-06, + "loss": 0.5272, + "step": 9980 + }, + { + "epoch": 0.9111740003651634, + "grad_norm": 0.47153136134147644, + "learning_rate": 4.725480639008803e-06, + "loss": 0.5423, + "step": 9981 + }, + { + "epoch": 0.91126529121782, + "grad_norm": 0.47210589051246643, + "learning_rate": 4.725426111335168e-06, + "loss": 0.5443, + "step": 9982 + }, + { + "epoch": 0.9113565820704765, + "grad_norm": 0.4603690207004547, + "learning_rate": 4.7253715785613284e-06, + "loss": 0.5879, + "step": 9983 + }, + { + "epoch": 0.9114478729231331, + "grad_norm": 0.4879186749458313, + "learning_rate": 4.725317040687411e-06, + "loss": 0.5502, + "step": 9984 + }, + { + "epoch": 0.9115391637757897, + "grad_norm": 0.5002353191375732, + "learning_rate": 4.725262497713539e-06, + "loss": 0.5778, + "step": 9985 + }, + { + "epoch": 0.9116304546284463, + "grad_norm": 0.48864707350730896, + "learning_rate": 4.7252079496398385e-06, + "loss": 0.559, + "step": 9986 + }, + { + "epoch": 0.9117217454811027, + "grad_norm": 0.5136873126029968, + "learning_rate": 4.725153396466434e-06, + "loss": 0.5313, + "step": 9987 + }, + { + "epoch": 0.9118130363337593, + "grad_norm": 0.5028541088104248, + "learning_rate": 4.7250988381934506e-06, + "loss": 0.5433, + "step": 9988 + }, + { + "epoch": 0.9119043271864159, + "grad_norm": 0.44334620237350464, + "learning_rate": 4.725044274821015e-06, + "loss": 0.537, + "step": 9989 + }, + { + "epoch": 0.9119956180390725, + "grad_norm": 0.5294224619865417, + "learning_rate": 4.724989706349249e-06, + "loss": 0.5444, + "step": 9990 + }, + { + "epoch": 0.912086908891729, + "grad_norm": 0.4561353921890259, + "learning_rate": 4.72493513277828e-06, + "loss": 0.5687, + "step": 9991 + }, + { + "epoch": 0.9121781997443856, + "grad_norm": 0.47811728715896606, + "learning_rate": 4.724880554108232e-06, + "loss": 0.534, + "step": 9992 + }, + { + "epoch": 0.9122694905970422, + "grad_norm": 0.5306810736656189, + "learning_rate": 4.724825970339232e-06, + "loss": 0.4915, + "step": 9993 + }, + { + "epoch": 0.9123607814496988, + "grad_norm": 0.4494639039039612, + "learning_rate": 4.724771381471402e-06, + "loss": 0.5695, + "step": 9994 + }, + { + "epoch": 0.9124520723023553, + "grad_norm": 0.5061866044998169, + "learning_rate": 4.72471678750487e-06, + "loss": 0.5724, + "step": 9995 + }, + { + "epoch": 0.9125433631550118, + "grad_norm": 0.47205090522766113, + "learning_rate": 4.724662188439758e-06, + "loss": 0.5146, + "step": 9996 + }, + { + "epoch": 0.9126346540076684, + "grad_norm": 0.4930199086666107, + "learning_rate": 4.724607584276195e-06, + "loss": 0.5798, + "step": 9997 + }, + { + "epoch": 0.912725944860325, + "grad_norm": 0.4910096526145935, + "learning_rate": 4.724552975014303e-06, + "loss": 0.5221, + "step": 9998 + }, + { + "epoch": 0.9128172357129816, + "grad_norm": 0.5222611427307129, + "learning_rate": 4.724498360654209e-06, + "loss": 0.5398, + "step": 9999 + }, + { + "epoch": 0.9129085265656381, + "grad_norm": 0.4741336405277252, + "learning_rate": 4.724443741196037e-06, + "loss": 0.5131, + "step": 10000 + }, + { + "epoch": 0.9129998174182947, + "grad_norm": 0.482166051864624, + "learning_rate": 4.724389116639912e-06, + "loss": 0.5478, + "step": 10001 + }, + { + "epoch": 0.9130911082709513, + "grad_norm": 0.49136465787887573, + "learning_rate": 4.7243344869859605e-06, + "loss": 0.5395, + "step": 10002 + }, + { + "epoch": 0.9131823991236078, + "grad_norm": 0.4955362379550934, + "learning_rate": 4.724279852234307e-06, + "loss": 0.5367, + "step": 10003 + }, + { + "epoch": 0.9132736899762643, + "grad_norm": 0.4735563099384308, + "learning_rate": 4.724225212385076e-06, + "loss": 0.5445, + "step": 10004 + }, + { + "epoch": 0.9133649808289209, + "grad_norm": 0.5149959325790405, + "learning_rate": 4.724170567438394e-06, + "loss": 0.5233, + "step": 10005 + }, + { + "epoch": 0.9134562716815775, + "grad_norm": 0.4666547179222107, + "learning_rate": 4.724115917394386e-06, + "loss": 0.547, + "step": 10006 + }, + { + "epoch": 0.9135475625342341, + "grad_norm": 0.4644847512245178, + "learning_rate": 4.724061262253177e-06, + "loss": 0.6123, + "step": 10007 + }, + { + "epoch": 0.9136388533868907, + "grad_norm": 0.5049655437469482, + "learning_rate": 4.7240066020148926e-06, + "loss": 0.5181, + "step": 10008 + }, + { + "epoch": 0.9137301442395472, + "grad_norm": 0.5054749250411987, + "learning_rate": 4.723951936679656e-06, + "loss": 0.5298, + "step": 10009 + }, + { + "epoch": 0.9138214350922038, + "grad_norm": 0.45761823654174805, + "learning_rate": 4.723897266247595e-06, + "loss": 0.5703, + "step": 10010 + }, + { + "epoch": 0.9139127259448603, + "grad_norm": 0.5146569609642029, + "learning_rate": 4.723842590718835e-06, + "loss": 0.4902, + "step": 10011 + }, + { + "epoch": 0.9140040167975169, + "grad_norm": 0.4820217788219452, + "learning_rate": 4.723787910093499e-06, + "loss": 0.5718, + "step": 10012 + }, + { + "epoch": 0.9140953076501734, + "grad_norm": 0.48264530301094055, + "learning_rate": 4.723733224371714e-06, + "loss": 0.5238, + "step": 10013 + }, + { + "epoch": 0.91418659850283, + "grad_norm": 0.46341755986213684, + "learning_rate": 4.723678533553605e-06, + "loss": 0.5696, + "step": 10014 + }, + { + "epoch": 0.9142778893554866, + "grad_norm": 0.46995818614959717, + "learning_rate": 4.723623837639297e-06, + "loss": 0.5952, + "step": 10015 + }, + { + "epoch": 0.9143691802081432, + "grad_norm": 0.47288861870765686, + "learning_rate": 4.723569136628916e-06, + "loss": 0.555, + "step": 10016 + }, + { + "epoch": 0.9144604710607998, + "grad_norm": 0.45159563422203064, + "learning_rate": 4.723514430522587e-06, + "loss": 0.5809, + "step": 10017 + }, + { + "epoch": 0.9145517619134562, + "grad_norm": 0.512373685836792, + "learning_rate": 4.723459719320436e-06, + "loss": 0.541, + "step": 10018 + }, + { + "epoch": 0.9146430527661128, + "grad_norm": 0.5057079195976257, + "learning_rate": 4.723405003022586e-06, + "loss": 0.5252, + "step": 10019 + }, + { + "epoch": 0.9147343436187694, + "grad_norm": 0.49143633246421814, + "learning_rate": 4.723350281629166e-06, + "loss": 0.5306, + "step": 10020 + }, + { + "epoch": 0.914825634471426, + "grad_norm": 0.5006198287010193, + "learning_rate": 4.723295555140299e-06, + "loss": 0.5119, + "step": 10021 + }, + { + "epoch": 0.9149169253240825, + "grad_norm": 0.5069392323493958, + "learning_rate": 4.723240823556111e-06, + "loss": 0.5361, + "step": 10022 + }, + { + "epoch": 0.9150082161767391, + "grad_norm": 0.49685248732566833, + "learning_rate": 4.723186086876727e-06, + "loss": 0.5176, + "step": 10023 + }, + { + "epoch": 0.9150995070293957, + "grad_norm": 0.4886334240436554, + "learning_rate": 4.723131345102273e-06, + "loss": 0.5536, + "step": 10024 + }, + { + "epoch": 0.9151907978820523, + "grad_norm": 0.47580698132514954, + "learning_rate": 4.723076598232875e-06, + "loss": 0.5792, + "step": 10025 + }, + { + "epoch": 0.9152820887347087, + "grad_norm": 0.5203409194946289, + "learning_rate": 4.723021846268658e-06, + "loss": 0.5456, + "step": 10026 + }, + { + "epoch": 0.9153733795873653, + "grad_norm": 0.5162091255187988, + "learning_rate": 4.722967089209746e-06, + "loss": 0.5118, + "step": 10027 + }, + { + "epoch": 0.9154646704400219, + "grad_norm": 0.4753343164920807, + "learning_rate": 4.722912327056267e-06, + "loss": 0.5579, + "step": 10028 + }, + { + "epoch": 0.9155559612926785, + "grad_norm": 0.48459503054618835, + "learning_rate": 4.722857559808345e-06, + "loss": 0.5358, + "step": 10029 + }, + { + "epoch": 0.915647252145335, + "grad_norm": 0.4766550362110138, + "learning_rate": 4.722802787466105e-06, + "loss": 0.5708, + "step": 10030 + }, + { + "epoch": 0.9157385429979916, + "grad_norm": 0.484122097492218, + "learning_rate": 4.722748010029675e-06, + "loss": 0.5962, + "step": 10031 + }, + { + "epoch": 0.9158298338506482, + "grad_norm": 0.47810661792755127, + "learning_rate": 4.7226932274991775e-06, + "loss": 0.5415, + "step": 10032 + }, + { + "epoch": 0.9159211247033048, + "grad_norm": 0.47671017050743103, + "learning_rate": 4.72263843987474e-06, + "loss": 0.5574, + "step": 10033 + }, + { + "epoch": 0.9160124155559612, + "grad_norm": 0.45475709438323975, + "learning_rate": 4.722583647156487e-06, + "loss": 0.5961, + "step": 10034 + }, + { + "epoch": 0.9161037064086178, + "grad_norm": 0.504963219165802, + "learning_rate": 4.722528849344545e-06, + "loss": 0.5097, + "step": 10035 + }, + { + "epoch": 0.9161949972612744, + "grad_norm": 0.5123279094696045, + "learning_rate": 4.7224740464390385e-06, + "loss": 0.5377, + "step": 10036 + }, + { + "epoch": 0.916286288113931, + "grad_norm": 0.4682862162590027, + "learning_rate": 4.722419238440095e-06, + "loss": 0.5643, + "step": 10037 + }, + { + "epoch": 0.9163775789665876, + "grad_norm": 0.5240476727485657, + "learning_rate": 4.722364425347837e-06, + "loss": 0.5713, + "step": 10038 + }, + { + "epoch": 0.9164688698192441, + "grad_norm": 0.49701714515686035, + "learning_rate": 4.722309607162394e-06, + "loss": 0.5686, + "step": 10039 + }, + { + "epoch": 0.9165601606719007, + "grad_norm": 0.4919106662273407, + "learning_rate": 4.722254783883889e-06, + "loss": 0.5228, + "step": 10040 + }, + { + "epoch": 0.9166514515245573, + "grad_norm": 0.4692884385585785, + "learning_rate": 4.7221999555124475e-06, + "loss": 0.579, + "step": 10041 + }, + { + "epoch": 0.9167427423772138, + "grad_norm": 0.5028102993965149, + "learning_rate": 4.722145122048196e-06, + "loss": 0.5708, + "step": 10042 + }, + { + "epoch": 0.9168340332298703, + "grad_norm": 0.4730675220489502, + "learning_rate": 4.722090283491261e-06, + "loss": 0.5531, + "step": 10043 + }, + { + "epoch": 0.9169253240825269, + "grad_norm": 0.4930233359336853, + "learning_rate": 4.722035439841766e-06, + "loss": 0.5295, + "step": 10044 + }, + { + "epoch": 0.9170166149351835, + "grad_norm": 0.49194079637527466, + "learning_rate": 4.721980591099838e-06, + "loss": 0.5812, + "step": 10045 + }, + { + "epoch": 0.9171079057878401, + "grad_norm": 0.5070290565490723, + "learning_rate": 4.721925737265603e-06, + "loss": 0.5361, + "step": 10046 + }, + { + "epoch": 0.9171991966404967, + "grad_norm": 0.49072781205177307, + "learning_rate": 4.721870878339188e-06, + "loss": 0.5191, + "step": 10047 + }, + { + "epoch": 0.9172904874931532, + "grad_norm": 0.49564144015312195, + "learning_rate": 4.721816014320715e-06, + "loss": 0.5589, + "step": 10048 + }, + { + "epoch": 0.9173817783458097, + "grad_norm": 0.4826485514640808, + "learning_rate": 4.721761145210312e-06, + "loss": 0.5451, + "step": 10049 + }, + { + "epoch": 0.9174730691984663, + "grad_norm": 0.5002872347831726, + "learning_rate": 4.721706271008105e-06, + "loss": 0.5192, + "step": 10050 + }, + { + "epoch": 0.9175643600511229, + "grad_norm": 0.4657069444656372, + "learning_rate": 4.7216513917142195e-06, + "loss": 0.5471, + "step": 10051 + }, + { + "epoch": 0.9176556509037794, + "grad_norm": 0.4492475688457489, + "learning_rate": 4.721596507328781e-06, + "loss": 0.5437, + "step": 10052 + }, + { + "epoch": 0.917746941756436, + "grad_norm": 0.4874633252620697, + "learning_rate": 4.7215416178519145e-06, + "loss": 0.5369, + "step": 10053 + }, + { + "epoch": 0.9178382326090926, + "grad_norm": 0.4756479859352112, + "learning_rate": 4.721486723283748e-06, + "loss": 0.5891, + "step": 10054 + }, + { + "epoch": 0.9179295234617492, + "grad_norm": 0.4888165295124054, + "learning_rate": 4.721431823624405e-06, + "loss": 0.573, + "step": 10055 + }, + { + "epoch": 0.9180208143144057, + "grad_norm": 0.5143833756446838, + "learning_rate": 4.721376918874013e-06, + "loss": 0.5001, + "step": 10056 + }, + { + "epoch": 0.9181121051670622, + "grad_norm": 0.4850429892539978, + "learning_rate": 4.721322009032697e-06, + "loss": 0.5863, + "step": 10057 + }, + { + "epoch": 0.9182033960197188, + "grad_norm": 0.4817862808704376, + "learning_rate": 4.721267094100582e-06, + "loss": 0.5221, + "step": 10058 + }, + { + "epoch": 0.9182946868723754, + "grad_norm": 0.4947293996810913, + "learning_rate": 4.721212174077796e-06, + "loss": 0.5244, + "step": 10059 + }, + { + "epoch": 0.9183859777250319, + "grad_norm": 0.49062204360961914, + "learning_rate": 4.721157248964463e-06, + "loss": 0.5698, + "step": 10060 + }, + { + "epoch": 0.9184772685776885, + "grad_norm": 0.49051159620285034, + "learning_rate": 4.72110231876071e-06, + "loss": 0.5647, + "step": 10061 + }, + { + "epoch": 0.9185685594303451, + "grad_norm": 0.47749847173690796, + "learning_rate": 4.7210473834666625e-06, + "loss": 0.5433, + "step": 10062 + }, + { + "epoch": 0.9186598502830017, + "grad_norm": 0.492953360080719, + "learning_rate": 4.720992443082446e-06, + "loss": 0.5258, + "step": 10063 + }, + { + "epoch": 0.9187511411356583, + "grad_norm": 0.4658903479576111, + "learning_rate": 4.7209374976081865e-06, + "loss": 0.5275, + "step": 10064 + }, + { + "epoch": 0.9188424319883147, + "grad_norm": 0.5118948221206665, + "learning_rate": 4.720882547044011e-06, + "loss": 0.5052, + "step": 10065 + }, + { + "epoch": 0.9189337228409713, + "grad_norm": 0.48916929960250854, + "learning_rate": 4.720827591390044e-06, + "loss": 0.5067, + "step": 10066 + }, + { + "epoch": 0.9190250136936279, + "grad_norm": 0.49146324396133423, + "learning_rate": 4.720772630646412e-06, + "loss": 0.5286, + "step": 10067 + }, + { + "epoch": 0.9191163045462845, + "grad_norm": 0.46782904863357544, + "learning_rate": 4.7207176648132415e-06, + "loss": 0.5644, + "step": 10068 + }, + { + "epoch": 0.919207595398941, + "grad_norm": 0.4733143746852875, + "learning_rate": 4.720662693890657e-06, + "loss": 0.4937, + "step": 10069 + }, + { + "epoch": 0.9192988862515976, + "grad_norm": 0.4873037338256836, + "learning_rate": 4.720607717878787e-06, + "loss": 0.5588, + "step": 10070 + }, + { + "epoch": 0.9193901771042542, + "grad_norm": 0.4664916694164276, + "learning_rate": 4.720552736777755e-06, + "loss": 0.5671, + "step": 10071 + }, + { + "epoch": 0.9194814679569108, + "grad_norm": 0.5090177059173584, + "learning_rate": 4.720497750587688e-06, + "loss": 0.5244, + "step": 10072 + }, + { + "epoch": 0.9195727588095672, + "grad_norm": 0.5070715546607971, + "learning_rate": 4.720442759308712e-06, + "loss": 0.5762, + "step": 10073 + }, + { + "epoch": 0.9196640496622238, + "grad_norm": 0.46115589141845703, + "learning_rate": 4.720387762940954e-06, + "loss": 0.5405, + "step": 10074 + }, + { + "epoch": 0.9197553405148804, + "grad_norm": 0.5095444917678833, + "learning_rate": 4.720332761484538e-06, + "loss": 0.5346, + "step": 10075 + }, + { + "epoch": 0.919846631367537, + "grad_norm": 0.46998438239097595, + "learning_rate": 4.7202777549395915e-06, + "loss": 0.5498, + "step": 10076 + }, + { + "epoch": 0.9199379222201935, + "grad_norm": 0.4839629530906677, + "learning_rate": 4.72022274330624e-06, + "loss": 0.5586, + "step": 10077 + }, + { + "epoch": 0.9200292130728501, + "grad_norm": 0.4760489761829376, + "learning_rate": 4.720167726584609e-06, + "loss": 0.5682, + "step": 10078 + }, + { + "epoch": 0.9201205039255067, + "grad_norm": 0.48515358567237854, + "learning_rate": 4.720112704774826e-06, + "loss": 0.5094, + "step": 10079 + }, + { + "epoch": 0.9202117947781633, + "grad_norm": 0.47982531785964966, + "learning_rate": 4.720057677877017e-06, + "loss": 0.5322, + "step": 10080 + }, + { + "epoch": 0.9203030856308198, + "grad_norm": 0.48056432604789734, + "learning_rate": 4.720002645891307e-06, + "loss": 0.5556, + "step": 10081 + }, + { + "epoch": 0.9203943764834763, + "grad_norm": 0.49811220169067383, + "learning_rate": 4.719947608817823e-06, + "loss": 0.5533, + "step": 10082 + }, + { + "epoch": 0.9204856673361329, + "grad_norm": 0.45263850688934326, + "learning_rate": 4.71989256665669e-06, + "loss": 0.5471, + "step": 10083 + }, + { + "epoch": 0.9205769581887895, + "grad_norm": 0.49510112404823303, + "learning_rate": 4.719837519408036e-06, + "loss": 0.5519, + "step": 10084 + }, + { + "epoch": 0.9206682490414461, + "grad_norm": 0.5075222849845886, + "learning_rate": 4.719782467071985e-06, + "loss": 0.5491, + "step": 10085 + }, + { + "epoch": 0.9207595398941026, + "grad_norm": 0.5371955633163452, + "learning_rate": 4.719727409648665e-06, + "loss": 0.4998, + "step": 10086 + }, + { + "epoch": 0.9208508307467592, + "grad_norm": 0.4999240040779114, + "learning_rate": 4.719672347138201e-06, + "loss": 0.5307, + "step": 10087 + }, + { + "epoch": 0.9209421215994157, + "grad_norm": 0.4558846950531006, + "learning_rate": 4.719617279540721e-06, + "loss": 0.568, + "step": 10088 + }, + { + "epoch": 0.9210334124520723, + "grad_norm": 0.48853635787963867, + "learning_rate": 4.719562206856348e-06, + "loss": 0.5072, + "step": 10089 + }, + { + "epoch": 0.9211247033047288, + "grad_norm": 0.4439081847667694, + "learning_rate": 4.719507129085211e-06, + "loss": 0.5634, + "step": 10090 + }, + { + "epoch": 0.9212159941573854, + "grad_norm": 0.5158904790878296, + "learning_rate": 4.719452046227435e-06, + "loss": 0.5339, + "step": 10091 + }, + { + "epoch": 0.921307285010042, + "grad_norm": 0.5050358176231384, + "learning_rate": 4.7193969582831464e-06, + "loss": 0.5341, + "step": 10092 + }, + { + "epoch": 0.9213985758626986, + "grad_norm": 0.45901384949684143, + "learning_rate": 4.719341865252472e-06, + "loss": 0.5615, + "step": 10093 + }, + { + "epoch": 0.9214898667153552, + "grad_norm": 0.48326388001441956, + "learning_rate": 4.719286767135538e-06, + "loss": 0.5563, + "step": 10094 + }, + { + "epoch": 0.9215811575680117, + "grad_norm": 0.5347924828529358, + "learning_rate": 4.71923166393247e-06, + "loss": 0.5345, + "step": 10095 + }, + { + "epoch": 0.9216724484206682, + "grad_norm": 0.4672700762748718, + "learning_rate": 4.719176555643393e-06, + "loss": 0.5758, + "step": 10096 + }, + { + "epoch": 0.9217637392733248, + "grad_norm": 0.5012917518615723, + "learning_rate": 4.7191214422684365e-06, + "loss": 0.5478, + "step": 10097 + }, + { + "epoch": 0.9218550301259814, + "grad_norm": 0.4564729630947113, + "learning_rate": 4.719066323807725e-06, + "loss": 0.5826, + "step": 10098 + }, + { + "epoch": 0.9219463209786379, + "grad_norm": 0.48222458362579346, + "learning_rate": 4.719011200261385e-06, + "loss": 0.513, + "step": 10099 + }, + { + "epoch": 0.9220376118312945, + "grad_norm": 0.505300760269165, + "learning_rate": 4.7189560716295425e-06, + "loss": 0.522, + "step": 10100 + }, + { + "epoch": 0.9221289026839511, + "grad_norm": 0.5077504515647888, + "learning_rate": 4.718900937912325e-06, + "loss": 0.5457, + "step": 10101 + }, + { + "epoch": 0.9222201935366077, + "grad_norm": 0.46542254090309143, + "learning_rate": 4.718845799109857e-06, + "loss": 0.5991, + "step": 10102 + }, + { + "epoch": 0.9223114843892642, + "grad_norm": 0.4941832423210144, + "learning_rate": 4.718790655222266e-06, + "loss": 0.5432, + "step": 10103 + }, + { + "epoch": 0.9224027752419207, + "grad_norm": 0.49271416664123535, + "learning_rate": 4.718735506249679e-06, + "loss": 0.5162, + "step": 10104 + }, + { + "epoch": 0.9224940660945773, + "grad_norm": 0.48459798097610474, + "learning_rate": 4.718680352192222e-06, + "loss": 0.527, + "step": 10105 + }, + { + "epoch": 0.9225853569472339, + "grad_norm": 0.5129713416099548, + "learning_rate": 4.71862519305002e-06, + "loss": 0.5382, + "step": 10106 + }, + { + "epoch": 0.9226766477998904, + "grad_norm": 0.5111424922943115, + "learning_rate": 4.718570028823201e-06, + "loss": 0.5269, + "step": 10107 + }, + { + "epoch": 0.922767938652547, + "grad_norm": 0.4429752826690674, + "learning_rate": 4.718514859511891e-06, + "loss": 0.5553, + "step": 10108 + }, + { + "epoch": 0.9228592295052036, + "grad_norm": 0.5447637438774109, + "learning_rate": 4.718459685116216e-06, + "loss": 0.5449, + "step": 10109 + }, + { + "epoch": 0.9229505203578602, + "grad_norm": 0.4563870429992676, + "learning_rate": 4.718404505636304e-06, + "loss": 0.5437, + "step": 10110 + }, + { + "epoch": 0.9230418112105168, + "grad_norm": 0.5064417123794556, + "learning_rate": 4.718349321072279e-06, + "loss": 0.5407, + "step": 10111 + }, + { + "epoch": 0.9231331020631732, + "grad_norm": 0.4873161017894745, + "learning_rate": 4.718294131424269e-06, + "loss": 0.5329, + "step": 10112 + }, + { + "epoch": 0.9232243929158298, + "grad_norm": 0.46036985516548157, + "learning_rate": 4.7182389366924e-06, + "loss": 0.5226, + "step": 10113 + }, + { + "epoch": 0.9233156837684864, + "grad_norm": 0.45667731761932373, + "learning_rate": 4.718183736876799e-06, + "loss": 0.5603, + "step": 10114 + }, + { + "epoch": 0.923406974621143, + "grad_norm": 0.4732189178466797, + "learning_rate": 4.718128531977591e-06, + "loss": 0.5961, + "step": 10115 + }, + { + "epoch": 0.9234982654737995, + "grad_norm": 0.49665766954421997, + "learning_rate": 4.718073321994906e-06, + "loss": 0.5501, + "step": 10116 + }, + { + "epoch": 0.9235895563264561, + "grad_norm": 0.4957312047481537, + "learning_rate": 4.718018106928866e-06, + "loss": 0.5435, + "step": 10117 + }, + { + "epoch": 0.9236808471791127, + "grad_norm": 0.4991961419582367, + "learning_rate": 4.717962886779602e-06, + "loss": 0.5338, + "step": 10118 + }, + { + "epoch": 0.9237721380317692, + "grad_norm": 0.4854702949523926, + "learning_rate": 4.717907661547237e-06, + "loss": 0.5293, + "step": 10119 + }, + { + "epoch": 0.9238634288844257, + "grad_norm": 0.4873320162296295, + "learning_rate": 4.7178524312319e-06, + "loss": 0.5261, + "step": 10120 + }, + { + "epoch": 0.9239547197370823, + "grad_norm": 0.4799528121948242, + "learning_rate": 4.717797195833715e-06, + "loss": 0.5964, + "step": 10121 + }, + { + "epoch": 0.9240460105897389, + "grad_norm": 0.4922874867916107, + "learning_rate": 4.717741955352812e-06, + "loss": 0.5668, + "step": 10122 + }, + { + "epoch": 0.9241373014423955, + "grad_norm": 0.465733140707016, + "learning_rate": 4.717686709789313e-06, + "loss": 0.4958, + "step": 10123 + }, + { + "epoch": 0.924228592295052, + "grad_norm": 0.5160918235778809, + "learning_rate": 4.7176314591433496e-06, + "loss": 0.5527, + "step": 10124 + }, + { + "epoch": 0.9243198831477086, + "grad_norm": 0.479754239320755, + "learning_rate": 4.717576203415045e-06, + "loss": 0.5261, + "step": 10125 + }, + { + "epoch": 0.9244111740003652, + "grad_norm": 0.4838091731071472, + "learning_rate": 4.7175209426045275e-06, + "loss": 0.5298, + "step": 10126 + }, + { + "epoch": 0.9245024648530217, + "grad_norm": 0.46685636043548584, + "learning_rate": 4.717465676711923e-06, + "loss": 0.5347, + "step": 10127 + }, + { + "epoch": 0.9245937557056783, + "grad_norm": 0.49359190464019775, + "learning_rate": 4.7174104057373586e-06, + "loss": 0.5223, + "step": 10128 + }, + { + "epoch": 0.9246850465583348, + "grad_norm": 0.4741775393486023, + "learning_rate": 4.71735512968096e-06, + "loss": 0.5353, + "step": 10129 + }, + { + "epoch": 0.9247763374109914, + "grad_norm": 0.5018892884254456, + "learning_rate": 4.717299848542856e-06, + "loss": 0.5392, + "step": 10130 + }, + { + "epoch": 0.924867628263648, + "grad_norm": 0.47684404253959656, + "learning_rate": 4.71724456232317e-06, + "loss": 0.5843, + "step": 10131 + }, + { + "epoch": 0.9249589191163046, + "grad_norm": 0.504158079624176, + "learning_rate": 4.717189271022032e-06, + "loss": 0.5617, + "step": 10132 + }, + { + "epoch": 0.9250502099689611, + "grad_norm": 0.5232440829277039, + "learning_rate": 4.717133974639567e-06, + "loss": 0.5346, + "step": 10133 + }, + { + "epoch": 0.9251415008216177, + "grad_norm": 0.42589306831359863, + "learning_rate": 4.717078673175902e-06, + "loss": 0.5952, + "step": 10134 + }, + { + "epoch": 0.9252327916742742, + "grad_norm": 0.4770195484161377, + "learning_rate": 4.717023366631163e-06, + "loss": 0.5323, + "step": 10135 + }, + { + "epoch": 0.9253240825269308, + "grad_norm": 0.4857587516307831, + "learning_rate": 4.716968055005478e-06, + "loss": 0.5353, + "step": 10136 + }, + { + "epoch": 0.9254153733795873, + "grad_norm": 0.4597930312156677, + "learning_rate": 4.716912738298975e-06, + "loss": 0.5586, + "step": 10137 + }, + { + "epoch": 0.9255066642322439, + "grad_norm": 0.46010035276412964, + "learning_rate": 4.7168574165117775e-06, + "loss": 0.5303, + "step": 10138 + }, + { + "epoch": 0.9255979550849005, + "grad_norm": 0.4702208936214447, + "learning_rate": 4.716802089644013e-06, + "loss": 0.5478, + "step": 10139 + }, + { + "epoch": 0.9256892459375571, + "grad_norm": 0.4801655113697052, + "learning_rate": 4.716746757695811e-06, + "loss": 0.5799, + "step": 10140 + }, + { + "epoch": 0.9257805367902137, + "grad_norm": 0.49429842829704285, + "learning_rate": 4.716691420667295e-06, + "loss": 0.5468, + "step": 10141 + }, + { + "epoch": 0.9258718276428702, + "grad_norm": 0.5129449367523193, + "learning_rate": 4.716636078558595e-06, + "loss": 0.535, + "step": 10142 + }, + { + "epoch": 0.9259631184955267, + "grad_norm": 0.48792505264282227, + "learning_rate": 4.7165807313698344e-06, + "loss": 0.5385, + "step": 10143 + }, + { + "epoch": 0.9260544093481833, + "grad_norm": 0.5162431001663208, + "learning_rate": 4.716525379101143e-06, + "loss": 0.5098, + "step": 10144 + }, + { + "epoch": 0.9261457002008399, + "grad_norm": 0.4941597282886505, + "learning_rate": 4.716470021752646e-06, + "loss": 0.5743, + "step": 10145 + }, + { + "epoch": 0.9262369910534964, + "grad_norm": 0.5120836496353149, + "learning_rate": 4.7164146593244706e-06, + "loss": 0.5361, + "step": 10146 + }, + { + "epoch": 0.926328281906153, + "grad_norm": 0.4855067729949951, + "learning_rate": 4.716359291816743e-06, + "loss": 0.5712, + "step": 10147 + }, + { + "epoch": 0.9264195727588096, + "grad_norm": 0.5027732849121094, + "learning_rate": 4.716303919229592e-06, + "loss": 0.574, + "step": 10148 + }, + { + "epoch": 0.9265108636114662, + "grad_norm": 0.4965061843395233, + "learning_rate": 4.7162485415631436e-06, + "loss": 0.5431, + "step": 10149 + }, + { + "epoch": 0.9266021544641226, + "grad_norm": 0.4756872355937958, + "learning_rate": 4.716193158817523e-06, + "loss": 0.5601, + "step": 10150 + }, + { + "epoch": 0.9266934453167792, + "grad_norm": 0.4891928434371948, + "learning_rate": 4.716137770992859e-06, + "loss": 0.561, + "step": 10151 + }, + { + "epoch": 0.9267847361694358, + "grad_norm": 0.49237534403800964, + "learning_rate": 4.716082378089279e-06, + "loss": 0.532, + "step": 10152 + }, + { + "epoch": 0.9268760270220924, + "grad_norm": 0.50583815574646, + "learning_rate": 4.7160269801069085e-06, + "loss": 0.5356, + "step": 10153 + }, + { + "epoch": 0.926967317874749, + "grad_norm": 0.46206504106521606, + "learning_rate": 4.715971577045875e-06, + "loss": 0.5773, + "step": 10154 + }, + { + "epoch": 0.9270586087274055, + "grad_norm": 0.45643216371536255, + "learning_rate": 4.715916168906306e-06, + "loss": 0.5714, + "step": 10155 + }, + { + "epoch": 0.9271498995800621, + "grad_norm": 0.5120880007743835, + "learning_rate": 4.715860755688327e-06, + "loss": 0.5178, + "step": 10156 + }, + { + "epoch": 0.9272411904327187, + "grad_norm": 0.4674672484397888, + "learning_rate": 4.715805337392068e-06, + "loss": 0.5549, + "step": 10157 + }, + { + "epoch": 0.9273324812853752, + "grad_norm": 0.4669644832611084, + "learning_rate": 4.715749914017652e-06, + "loss": 0.5591, + "step": 10158 + }, + { + "epoch": 0.9274237721380317, + "grad_norm": 0.47766849398612976, + "learning_rate": 4.715694485565209e-06, + "loss": 0.5427, + "step": 10159 + }, + { + "epoch": 0.9275150629906883, + "grad_norm": 0.46557021141052246, + "learning_rate": 4.715639052034865e-06, + "loss": 0.5526, + "step": 10160 + }, + { + "epoch": 0.9276063538433449, + "grad_norm": 0.4919843375682831, + "learning_rate": 4.715583613426746e-06, + "loss": 0.5747, + "step": 10161 + }, + { + "epoch": 0.9276976446960015, + "grad_norm": 0.4788530766963959, + "learning_rate": 4.715528169740982e-06, + "loss": 0.5626, + "step": 10162 + }, + { + "epoch": 0.927788935548658, + "grad_norm": 0.477473646402359, + "learning_rate": 4.715472720977696e-06, + "loss": 0.509, + "step": 10163 + }, + { + "epoch": 0.9278802264013146, + "grad_norm": 0.49196657538414, + "learning_rate": 4.715417267137019e-06, + "loss": 0.568, + "step": 10164 + }, + { + "epoch": 0.9279715172539712, + "grad_norm": 0.4962279796600342, + "learning_rate": 4.715361808219076e-06, + "loss": 0.5633, + "step": 10165 + }, + { + "epoch": 0.9280628081066277, + "grad_norm": 0.5083755850791931, + "learning_rate": 4.715306344223994e-06, + "loss": 0.5314, + "step": 10166 + }, + { + "epoch": 0.9281540989592842, + "grad_norm": 0.5018441081047058, + "learning_rate": 4.715250875151901e-06, + "loss": 0.5391, + "step": 10167 + }, + { + "epoch": 0.9282453898119408, + "grad_norm": 0.49034956097602844, + "learning_rate": 4.715195401002925e-06, + "loss": 0.5784, + "step": 10168 + }, + { + "epoch": 0.9283366806645974, + "grad_norm": 0.4811658263206482, + "learning_rate": 4.715139921777189e-06, + "loss": 0.5686, + "step": 10169 + }, + { + "epoch": 0.928427971517254, + "grad_norm": 0.4804944097995758, + "learning_rate": 4.715084437474825e-06, + "loss": 0.5589, + "step": 10170 + }, + { + "epoch": 0.9285192623699106, + "grad_norm": 0.49586722254753113, + "learning_rate": 4.715028948095958e-06, + "loss": 0.535, + "step": 10171 + }, + { + "epoch": 0.9286105532225671, + "grad_norm": 0.4868879020214081, + "learning_rate": 4.714973453640714e-06, + "loss": 0.5511, + "step": 10172 + }, + { + "epoch": 0.9287018440752237, + "grad_norm": 0.48626917600631714, + "learning_rate": 4.714917954109223e-06, + "loss": 0.5254, + "step": 10173 + }, + { + "epoch": 0.9287931349278802, + "grad_norm": 0.49428772926330566, + "learning_rate": 4.71486244950161e-06, + "loss": 0.5288, + "step": 10174 + }, + { + "epoch": 0.9288844257805368, + "grad_norm": 0.48026227951049805, + "learning_rate": 4.7148069398180035e-06, + "loss": 0.5367, + "step": 10175 + }, + { + "epoch": 0.9289757166331933, + "grad_norm": 0.5219705700874329, + "learning_rate": 4.71475142505853e-06, + "loss": 0.4902, + "step": 10176 + }, + { + "epoch": 0.9290670074858499, + "grad_norm": 0.4693206250667572, + "learning_rate": 4.714695905223317e-06, + "loss": 0.5303, + "step": 10177 + }, + { + "epoch": 0.9291582983385065, + "grad_norm": 0.4726804196834564, + "learning_rate": 4.714640380312491e-06, + "loss": 0.5494, + "step": 10178 + }, + { + "epoch": 0.9292495891911631, + "grad_norm": 0.4950944781303406, + "learning_rate": 4.714584850326179e-06, + "loss": 0.5047, + "step": 10179 + }, + { + "epoch": 0.9293408800438197, + "grad_norm": 0.5244894027709961, + "learning_rate": 4.714529315264511e-06, + "loss": 0.5018, + "step": 10180 + }, + { + "epoch": 0.9294321708964762, + "grad_norm": 0.4830930829048157, + "learning_rate": 4.714473775127611e-06, + "loss": 0.5472, + "step": 10181 + }, + { + "epoch": 0.9295234617491327, + "grad_norm": 0.5210926532745361, + "learning_rate": 4.714418229915608e-06, + "loss": 0.555, + "step": 10182 + }, + { + "epoch": 0.9296147526017893, + "grad_norm": 0.5055891275405884, + "learning_rate": 4.714362679628629e-06, + "loss": 0.5138, + "step": 10183 + }, + { + "epoch": 0.9297060434544459, + "grad_norm": 0.47751426696777344, + "learning_rate": 4.714307124266802e-06, + "loss": 0.606, + "step": 10184 + }, + { + "epoch": 0.9297973343071024, + "grad_norm": 0.46896326541900635, + "learning_rate": 4.714251563830252e-06, + "loss": 0.5555, + "step": 10185 + }, + { + "epoch": 0.929888625159759, + "grad_norm": 0.46401500701904297, + "learning_rate": 4.714195998319109e-06, + "loss": 0.5531, + "step": 10186 + }, + { + "epoch": 0.9299799160124156, + "grad_norm": 0.4680143892765045, + "learning_rate": 4.714140427733499e-06, + "loss": 0.5284, + "step": 10187 + }, + { + "epoch": 0.9300712068650722, + "grad_norm": 0.46032431721687317, + "learning_rate": 4.71408485207355e-06, + "loss": 0.5657, + "step": 10188 + }, + { + "epoch": 0.9301624977177286, + "grad_norm": 0.43931010365486145, + "learning_rate": 4.714029271339388e-06, + "loss": 0.601, + "step": 10189 + }, + { + "epoch": 0.9302537885703852, + "grad_norm": 0.48937222361564636, + "learning_rate": 4.713973685531143e-06, + "loss": 0.5561, + "step": 10190 + }, + { + "epoch": 0.9303450794230418, + "grad_norm": 0.48826342821121216, + "learning_rate": 4.7139180946489396e-06, + "loss": 0.5542, + "step": 10191 + }, + { + "epoch": 0.9304363702756984, + "grad_norm": 0.47193872928619385, + "learning_rate": 4.713862498692907e-06, + "loss": 0.5641, + "step": 10192 + }, + { + "epoch": 0.9305276611283549, + "grad_norm": 0.47214674949645996, + "learning_rate": 4.713806897663171e-06, + "loss": 0.5911, + "step": 10193 + }, + { + "epoch": 0.9306189519810115, + "grad_norm": 0.4960625171661377, + "learning_rate": 4.713751291559862e-06, + "loss": 0.5383, + "step": 10194 + }, + { + "epoch": 0.9307102428336681, + "grad_norm": 0.45264825224876404, + "learning_rate": 4.7136956803831035e-06, + "loss": 0.5835, + "step": 10195 + }, + { + "epoch": 0.9308015336863247, + "grad_norm": 0.465960294008255, + "learning_rate": 4.7136400641330245e-06, + "loss": 0.5631, + "step": 10196 + }, + { + "epoch": 0.9308928245389811, + "grad_norm": 0.4630751311779022, + "learning_rate": 4.713584442809754e-06, + "loss": 0.5557, + "step": 10197 + }, + { + "epoch": 0.9309841153916377, + "grad_norm": 0.5009815096855164, + "learning_rate": 4.713528816413418e-06, + "loss": 0.5037, + "step": 10198 + }, + { + "epoch": 0.9310754062442943, + "grad_norm": 0.4808002710342407, + "learning_rate": 4.713473184944145e-06, + "loss": 0.5841, + "step": 10199 + }, + { + "epoch": 0.9311666970969509, + "grad_norm": 0.4600621461868286, + "learning_rate": 4.713417548402061e-06, + "loss": 0.5725, + "step": 10200 + }, + { + "epoch": 0.9312579879496075, + "grad_norm": 0.47067174315452576, + "learning_rate": 4.713361906787295e-06, + "loss": 0.5359, + "step": 10201 + }, + { + "epoch": 0.931349278802264, + "grad_norm": 0.488389790058136, + "learning_rate": 4.713306260099973e-06, + "loss": 0.528, + "step": 10202 + }, + { + "epoch": 0.9314405696549206, + "grad_norm": 0.49044692516326904, + "learning_rate": 4.713250608340224e-06, + "loss": 0.5253, + "step": 10203 + }, + { + "epoch": 0.9315318605075772, + "grad_norm": 0.5118140578269958, + "learning_rate": 4.713194951508174e-06, + "loss": 0.5314, + "step": 10204 + }, + { + "epoch": 0.9316231513602337, + "grad_norm": 0.5165173411369324, + "learning_rate": 4.713139289603953e-06, + "loss": 0.5458, + "step": 10205 + }, + { + "epoch": 0.9317144422128902, + "grad_norm": 0.5106277465820312, + "learning_rate": 4.713083622627685e-06, + "loss": 0.5381, + "step": 10206 + }, + { + "epoch": 0.9318057330655468, + "grad_norm": 0.4910295605659485, + "learning_rate": 4.713027950579501e-06, + "loss": 0.5568, + "step": 10207 + }, + { + "epoch": 0.9318970239182034, + "grad_norm": 0.49050137400627136, + "learning_rate": 4.712972273459527e-06, + "loss": 0.5118, + "step": 10208 + }, + { + "epoch": 0.93198831477086, + "grad_norm": 0.449969083070755, + "learning_rate": 4.712916591267891e-06, + "loss": 0.5499, + "step": 10209 + }, + { + "epoch": 0.9320796056235165, + "grad_norm": 0.46507638692855835, + "learning_rate": 4.7128609040047195e-06, + "loss": 0.5523, + "step": 10210 + }, + { + "epoch": 0.9321708964761731, + "grad_norm": 0.46658098697662354, + "learning_rate": 4.7128052116701415e-06, + "loss": 0.5346, + "step": 10211 + }, + { + "epoch": 0.9322621873288297, + "grad_norm": 0.545132577419281, + "learning_rate": 4.712749514264284e-06, + "loss": 0.5141, + "step": 10212 + }, + { + "epoch": 0.9323534781814862, + "grad_norm": 0.49226096272468567, + "learning_rate": 4.712693811787276e-06, + "loss": 0.5371, + "step": 10213 + }, + { + "epoch": 0.9324447690341428, + "grad_norm": 0.5108422636985779, + "learning_rate": 4.7126381042392425e-06, + "loss": 0.5191, + "step": 10214 + }, + { + "epoch": 0.9325360598867993, + "grad_norm": 0.47562792897224426, + "learning_rate": 4.7125823916203125e-06, + "loss": 0.5299, + "step": 10215 + }, + { + "epoch": 0.9326273507394559, + "grad_norm": 0.4961760640144348, + "learning_rate": 4.7125266739306155e-06, + "loss": 0.5768, + "step": 10216 + }, + { + "epoch": 0.9327186415921125, + "grad_norm": 0.4691086709499359, + "learning_rate": 4.712470951170276e-06, + "loss": 0.5336, + "step": 10217 + }, + { + "epoch": 0.9328099324447691, + "grad_norm": 0.46523183584213257, + "learning_rate": 4.712415223339424e-06, + "loss": 0.5516, + "step": 10218 + }, + { + "epoch": 0.9329012232974256, + "grad_norm": 0.5150291323661804, + "learning_rate": 4.712359490438186e-06, + "loss": 0.5123, + "step": 10219 + }, + { + "epoch": 0.9329925141500821, + "grad_norm": 0.5024615526199341, + "learning_rate": 4.7123037524666896e-06, + "loss": 0.5601, + "step": 10220 + }, + { + "epoch": 0.9330838050027387, + "grad_norm": 0.5036181211471558, + "learning_rate": 4.7122480094250635e-06, + "loss": 0.5452, + "step": 10221 + }, + { + "epoch": 0.9331750958553953, + "grad_norm": 0.4811396896839142, + "learning_rate": 4.712192261313435e-06, + "loss": 0.5532, + "step": 10222 + }, + { + "epoch": 0.9332663867080518, + "grad_norm": 0.5015863180160522, + "learning_rate": 4.712136508131932e-06, + "loss": 0.5483, + "step": 10223 + }, + { + "epoch": 0.9333576775607084, + "grad_norm": 0.502521812915802, + "learning_rate": 4.712080749880683e-06, + "loss": 0.5268, + "step": 10224 + }, + { + "epoch": 0.933448968413365, + "grad_norm": 0.47080349922180176, + "learning_rate": 4.712024986559815e-06, + "loss": 0.5456, + "step": 10225 + }, + { + "epoch": 0.9335402592660216, + "grad_norm": 0.48184847831726074, + "learning_rate": 4.711969218169454e-06, + "loss": 0.5414, + "step": 10226 + }, + { + "epoch": 0.9336315501186782, + "grad_norm": 0.5008347630500793, + "learning_rate": 4.711913444709731e-06, + "loss": 0.5259, + "step": 10227 + }, + { + "epoch": 0.9337228409713346, + "grad_norm": 0.4949859082698822, + "learning_rate": 4.711857666180772e-06, + "loss": 0.5453, + "step": 10228 + }, + { + "epoch": 0.9338141318239912, + "grad_norm": 0.4632492661476135, + "learning_rate": 4.711801882582705e-06, + "loss": 0.5334, + "step": 10229 + }, + { + "epoch": 0.9339054226766478, + "grad_norm": 0.4622174799442291, + "learning_rate": 4.711746093915659e-06, + "loss": 0.5539, + "step": 10230 + }, + { + "epoch": 0.9339967135293044, + "grad_norm": 0.49333035945892334, + "learning_rate": 4.71169030017976e-06, + "loss": 0.5529, + "step": 10231 + }, + { + "epoch": 0.9340880043819609, + "grad_norm": 0.4822500944137573, + "learning_rate": 4.711634501375137e-06, + "loss": 0.5538, + "step": 10232 + }, + { + "epoch": 0.9341792952346175, + "grad_norm": 0.48127374053001404, + "learning_rate": 4.711578697501917e-06, + "loss": 0.523, + "step": 10233 + }, + { + "epoch": 0.9342705860872741, + "grad_norm": 0.4889320731163025, + "learning_rate": 4.71152288856023e-06, + "loss": 0.5338, + "step": 10234 + }, + { + "epoch": 0.9343618769399307, + "grad_norm": 0.46376827359199524, + "learning_rate": 4.711467074550202e-06, + "loss": 0.5767, + "step": 10235 + }, + { + "epoch": 0.9344531677925871, + "grad_norm": 0.4862171411514282, + "learning_rate": 4.71141125547196e-06, + "loss": 0.5781, + "step": 10236 + }, + { + "epoch": 0.9345444586452437, + "grad_norm": 0.4746403694152832, + "learning_rate": 4.711355431325635e-06, + "loss": 0.5501, + "step": 10237 + }, + { + "epoch": 0.9346357494979003, + "grad_norm": 0.47957804799079895, + "learning_rate": 4.711299602111353e-06, + "loss": 0.5276, + "step": 10238 + }, + { + "epoch": 0.9347270403505569, + "grad_norm": 0.46501532196998596, + "learning_rate": 4.711243767829242e-06, + "loss": 0.5481, + "step": 10239 + }, + { + "epoch": 0.9348183312032134, + "grad_norm": 0.4738713502883911, + "learning_rate": 4.71118792847943e-06, + "loss": 0.5455, + "step": 10240 + }, + { + "epoch": 0.93490962205587, + "grad_norm": 0.5049982070922852, + "learning_rate": 4.711132084062045e-06, + "loss": 0.5529, + "step": 10241 + }, + { + "epoch": 0.9350009129085266, + "grad_norm": 0.47931969165802, + "learning_rate": 4.711076234577215e-06, + "loss": 0.504, + "step": 10242 + }, + { + "epoch": 0.9350922037611832, + "grad_norm": 0.49097132682800293, + "learning_rate": 4.711020380025069e-06, + "loss": 0.528, + "step": 10243 + }, + { + "epoch": 0.9351834946138396, + "grad_norm": 0.49844470620155334, + "learning_rate": 4.710964520405733e-06, + "loss": 0.5472, + "step": 10244 + }, + { + "epoch": 0.9352747854664962, + "grad_norm": 0.5320307016372681, + "learning_rate": 4.710908655719337e-06, + "loss": 0.5235, + "step": 10245 + }, + { + "epoch": 0.9353660763191528, + "grad_norm": 0.48857125639915466, + "learning_rate": 4.7108527859660066e-06, + "loss": 0.5884, + "step": 10246 + }, + { + "epoch": 0.9354573671718094, + "grad_norm": 0.4788157045841217, + "learning_rate": 4.710796911145873e-06, + "loss": 0.5817, + "step": 10247 + }, + { + "epoch": 0.935548658024466, + "grad_norm": 0.4888882040977478, + "learning_rate": 4.710741031259062e-06, + "loss": 0.5251, + "step": 10248 + }, + { + "epoch": 0.9356399488771225, + "grad_norm": 0.5004715323448181, + "learning_rate": 4.710685146305703e-06, + "loss": 0.5563, + "step": 10249 + }, + { + "epoch": 0.9357312397297791, + "grad_norm": 0.45956119894981384, + "learning_rate": 4.710629256285922e-06, + "loss": 0.5536, + "step": 10250 + }, + { + "epoch": 0.9358225305824356, + "grad_norm": 0.490301251411438, + "learning_rate": 4.71057336119985e-06, + "loss": 0.541, + "step": 10251 + }, + { + "epoch": 0.9359138214350922, + "grad_norm": 0.4982045292854309, + "learning_rate": 4.710517461047613e-06, + "loss": 0.5362, + "step": 10252 + }, + { + "epoch": 0.9360051122877487, + "grad_norm": 0.4845951795578003, + "learning_rate": 4.71046155582934e-06, + "loss": 0.5221, + "step": 10253 + }, + { + "epoch": 0.9360964031404053, + "grad_norm": 0.4372490346431732, + "learning_rate": 4.710405645545159e-06, + "loss": 0.6018, + "step": 10254 + }, + { + "epoch": 0.9361876939930619, + "grad_norm": 0.48672911524772644, + "learning_rate": 4.710349730195196e-06, + "loss": 0.5384, + "step": 10255 + }, + { + "epoch": 0.9362789848457185, + "grad_norm": 0.4997353255748749, + "learning_rate": 4.710293809779583e-06, + "loss": 0.5014, + "step": 10256 + }, + { + "epoch": 0.936370275698375, + "grad_norm": 0.4956429600715637, + "learning_rate": 4.710237884298446e-06, + "loss": 0.5348, + "step": 10257 + }, + { + "epoch": 0.9364615665510316, + "grad_norm": 0.49111801385879517, + "learning_rate": 4.710181953751913e-06, + "loss": 0.5614, + "step": 10258 + }, + { + "epoch": 0.9365528574036881, + "grad_norm": 0.4892960786819458, + "learning_rate": 4.710126018140113e-06, + "loss": 0.5257, + "step": 10259 + }, + { + "epoch": 0.9366441482563447, + "grad_norm": 0.4838266968727112, + "learning_rate": 4.7100700774631735e-06, + "loss": 0.529, + "step": 10260 + }, + { + "epoch": 0.9367354391090013, + "grad_norm": 0.47259172797203064, + "learning_rate": 4.7100141317212235e-06, + "loss": 0.5445, + "step": 10261 + }, + { + "epoch": 0.9368267299616578, + "grad_norm": 0.47783970832824707, + "learning_rate": 4.70995818091439e-06, + "loss": 0.5461, + "step": 10262 + }, + { + "epoch": 0.9369180208143144, + "grad_norm": 0.5036233067512512, + "learning_rate": 4.709902225042803e-06, + "loss": 0.5135, + "step": 10263 + }, + { + "epoch": 0.937009311666971, + "grad_norm": 0.5164720416069031, + "learning_rate": 4.7098462641065876e-06, + "loss": 0.5043, + "step": 10264 + }, + { + "epoch": 0.9371006025196276, + "grad_norm": 0.47010165452957153, + "learning_rate": 4.709790298105876e-06, + "loss": 0.5385, + "step": 10265 + }, + { + "epoch": 0.9371918933722841, + "grad_norm": 0.49878647923469543, + "learning_rate": 4.7097343270407945e-06, + "loss": 0.5381, + "step": 10266 + }, + { + "epoch": 0.9372831842249406, + "grad_norm": 0.4849139153957367, + "learning_rate": 4.7096783509114706e-06, + "loss": 0.5284, + "step": 10267 + }, + { + "epoch": 0.9373744750775972, + "grad_norm": 0.5058600902557373, + "learning_rate": 4.709622369718034e-06, + "loss": 0.5383, + "step": 10268 + }, + { + "epoch": 0.9374657659302538, + "grad_norm": 0.5014117360115051, + "learning_rate": 4.709566383460612e-06, + "loss": 0.5321, + "step": 10269 + }, + { + "epoch": 0.9375570567829103, + "grad_norm": 0.4559449255466461, + "learning_rate": 4.7095103921393335e-06, + "loss": 0.5625, + "step": 10270 + }, + { + "epoch": 0.9376483476355669, + "grad_norm": 0.49976325035095215, + "learning_rate": 4.709454395754327e-06, + "loss": 0.527, + "step": 10271 + }, + { + "epoch": 0.9377396384882235, + "grad_norm": 0.47072723507881165, + "learning_rate": 4.7093983943057205e-06, + "loss": 0.5653, + "step": 10272 + }, + { + "epoch": 0.9378309293408801, + "grad_norm": 0.5012606978416443, + "learning_rate": 4.7093423877936424e-06, + "loss": 0.5184, + "step": 10273 + }, + { + "epoch": 0.9379222201935367, + "grad_norm": 0.4903220236301422, + "learning_rate": 4.709286376218221e-06, + "loss": 0.5704, + "step": 10274 + }, + { + "epoch": 0.9380135110461931, + "grad_norm": 0.4669097661972046, + "learning_rate": 4.709230359579584e-06, + "loss": 0.5605, + "step": 10275 + }, + { + "epoch": 0.9381048018988497, + "grad_norm": 0.4642968475818634, + "learning_rate": 4.709174337877862e-06, + "loss": 0.5908, + "step": 10276 + }, + { + "epoch": 0.9381960927515063, + "grad_norm": 0.478189617395401, + "learning_rate": 4.70911831111318e-06, + "loss": 0.5435, + "step": 10277 + }, + { + "epoch": 0.9382873836041629, + "grad_norm": 0.49553176760673523, + "learning_rate": 4.709062279285669e-06, + "loss": 0.5485, + "step": 10278 + }, + { + "epoch": 0.9383786744568194, + "grad_norm": 0.50111323595047, + "learning_rate": 4.709006242395457e-06, + "loss": 0.4772, + "step": 10279 + }, + { + "epoch": 0.938469965309476, + "grad_norm": 0.47541406750679016, + "learning_rate": 4.708950200442672e-06, + "loss": 0.5981, + "step": 10280 + }, + { + "epoch": 0.9385612561621326, + "grad_norm": 0.4755114018917084, + "learning_rate": 4.708894153427443e-06, + "loss": 0.5769, + "step": 10281 + }, + { + "epoch": 0.9386525470147891, + "grad_norm": 0.4909171760082245, + "learning_rate": 4.708838101349896e-06, + "loss": 0.5439, + "step": 10282 + }, + { + "epoch": 0.9387438378674456, + "grad_norm": 0.5141074657440186, + "learning_rate": 4.708782044210163e-06, + "loss": 0.5143, + "step": 10283 + }, + { + "epoch": 0.9388351287201022, + "grad_norm": 0.4944665729999542, + "learning_rate": 4.708725982008371e-06, + "loss": 0.5492, + "step": 10284 + }, + { + "epoch": 0.9389264195727588, + "grad_norm": 0.480218768119812, + "learning_rate": 4.708669914744648e-06, + "loss": 0.5294, + "step": 10285 + }, + { + "epoch": 0.9390177104254154, + "grad_norm": 0.46739450097084045, + "learning_rate": 4.708613842419123e-06, + "loss": 0.5783, + "step": 10286 + }, + { + "epoch": 0.939109001278072, + "grad_norm": 0.48406845331192017, + "learning_rate": 4.708557765031924e-06, + "loss": 0.533, + "step": 10287 + }, + { + "epoch": 0.9392002921307285, + "grad_norm": 0.5136422514915466, + "learning_rate": 4.70850168258318e-06, + "loss": 0.4718, + "step": 10288 + }, + { + "epoch": 0.9392915829833851, + "grad_norm": 0.4978468120098114, + "learning_rate": 4.70844559507302e-06, + "loss": 0.5431, + "step": 10289 + }, + { + "epoch": 0.9393828738360416, + "grad_norm": 0.49478912353515625, + "learning_rate": 4.708389502501571e-06, + "loss": 0.543, + "step": 10290 + }, + { + "epoch": 0.9394741646886982, + "grad_norm": 0.4768080711364746, + "learning_rate": 4.708333404868963e-06, + "loss": 0.558, + "step": 10291 + }, + { + "epoch": 0.9395654555413547, + "grad_norm": 0.5000780820846558, + "learning_rate": 4.708277302175325e-06, + "loss": 0.5244, + "step": 10292 + }, + { + "epoch": 0.9396567463940113, + "grad_norm": 0.4856523871421814, + "learning_rate": 4.708221194420784e-06, + "loss": 0.5474, + "step": 10293 + }, + { + "epoch": 0.9397480372466679, + "grad_norm": 0.47084465622901917, + "learning_rate": 4.708165081605468e-06, + "loss": 0.5517, + "step": 10294 + }, + { + "epoch": 0.9398393280993245, + "grad_norm": 0.4864419996738434, + "learning_rate": 4.708108963729508e-06, + "loss": 0.5382, + "step": 10295 + }, + { + "epoch": 0.939930618951981, + "grad_norm": 0.453632652759552, + "learning_rate": 4.708052840793032e-06, + "loss": 0.5915, + "step": 10296 + }, + { + "epoch": 0.9400219098046376, + "grad_norm": 0.48614057898521423, + "learning_rate": 4.707996712796168e-06, + "loss": 0.5358, + "step": 10297 + }, + { + "epoch": 0.9401132006572941, + "grad_norm": 0.5028074979782104, + "learning_rate": 4.707940579739044e-06, + "loss": 0.5429, + "step": 10298 + }, + { + "epoch": 0.9402044915099507, + "grad_norm": 0.5016192197799683, + "learning_rate": 4.707884441621789e-06, + "loss": 0.5207, + "step": 10299 + }, + { + "epoch": 0.9402957823626072, + "grad_norm": 0.4931145906448364, + "learning_rate": 4.707828298444533e-06, + "loss": 0.5501, + "step": 10300 + }, + { + "epoch": 0.9403870732152638, + "grad_norm": 0.4934966266155243, + "learning_rate": 4.707772150207403e-06, + "loss": 0.5209, + "step": 10301 + }, + { + "epoch": 0.9404783640679204, + "grad_norm": 0.4901537597179413, + "learning_rate": 4.707715996910529e-06, + "loss": 0.5405, + "step": 10302 + }, + { + "epoch": 0.940569654920577, + "grad_norm": 0.4934732913970947, + "learning_rate": 4.707659838554038e-06, + "loss": 0.542, + "step": 10303 + }, + { + "epoch": 0.9406609457732336, + "grad_norm": 0.4568291902542114, + "learning_rate": 4.707603675138061e-06, + "loss": 0.5869, + "step": 10304 + }, + { + "epoch": 0.9407522366258901, + "grad_norm": 0.47276386618614197, + "learning_rate": 4.707547506662725e-06, + "loss": 0.5558, + "step": 10305 + }, + { + "epoch": 0.9408435274785466, + "grad_norm": 0.48539891839027405, + "learning_rate": 4.707491333128159e-06, + "loss": 0.5613, + "step": 10306 + }, + { + "epoch": 0.9409348183312032, + "grad_norm": 0.4817909896373749, + "learning_rate": 4.707435154534492e-06, + "loss": 0.5133, + "step": 10307 + }, + { + "epoch": 0.9410261091838598, + "grad_norm": 0.49254387617111206, + "learning_rate": 4.7073789708818525e-06, + "loss": 0.5662, + "step": 10308 + }, + { + "epoch": 0.9411174000365163, + "grad_norm": 0.47545188665390015, + "learning_rate": 4.707322782170369e-06, + "loss": 0.5363, + "step": 10309 + }, + { + "epoch": 0.9412086908891729, + "grad_norm": 0.4915298819541931, + "learning_rate": 4.707266588400172e-06, + "loss": 0.5512, + "step": 10310 + }, + { + "epoch": 0.9412999817418295, + "grad_norm": 0.506955087184906, + "learning_rate": 4.707210389571388e-06, + "loss": 0.508, + "step": 10311 + }, + { + "epoch": 0.9413912725944861, + "grad_norm": 0.4497239589691162, + "learning_rate": 4.707154185684147e-06, + "loss": 0.5658, + "step": 10312 + }, + { + "epoch": 0.9414825634471427, + "grad_norm": 0.4811524450778961, + "learning_rate": 4.7070979767385785e-06, + "loss": 0.5446, + "step": 10313 + }, + { + "epoch": 0.9415738542997991, + "grad_norm": 0.4838257133960724, + "learning_rate": 4.70704176273481e-06, + "loss": 0.5175, + "step": 10314 + }, + { + "epoch": 0.9416651451524557, + "grad_norm": 0.4745556712150574, + "learning_rate": 4.706985543672969e-06, + "loss": 0.5392, + "step": 10315 + }, + { + "epoch": 0.9417564360051123, + "grad_norm": 0.5248891711235046, + "learning_rate": 4.706929319553189e-06, + "loss": 0.538, + "step": 10316 + }, + { + "epoch": 0.9418477268577689, + "grad_norm": 0.47763553261756897, + "learning_rate": 4.706873090375594e-06, + "loss": 0.5597, + "step": 10317 + }, + { + "epoch": 0.9419390177104254, + "grad_norm": 0.5125266313552856, + "learning_rate": 4.706816856140315e-06, + "loss": 0.5176, + "step": 10318 + }, + { + "epoch": 0.942030308563082, + "grad_norm": 0.475148469209671, + "learning_rate": 4.706760616847481e-06, + "loss": 0.5212, + "step": 10319 + }, + { + "epoch": 0.9421215994157386, + "grad_norm": 0.46153685450553894, + "learning_rate": 4.7067043724972206e-06, + "loss": 0.5456, + "step": 10320 + }, + { + "epoch": 0.942212890268395, + "grad_norm": 0.47271016240119934, + "learning_rate": 4.706648123089663e-06, + "loss": 0.5602, + "step": 10321 + }, + { + "epoch": 0.9423041811210516, + "grad_norm": 0.5202409625053406, + "learning_rate": 4.7065918686249355e-06, + "loss": 0.4979, + "step": 10322 + }, + { + "epoch": 0.9423954719737082, + "grad_norm": 0.5004376173019409, + "learning_rate": 4.70653560910317e-06, + "loss": 0.5384, + "step": 10323 + }, + { + "epoch": 0.9424867628263648, + "grad_norm": 0.47906294465065, + "learning_rate": 4.706479344524492e-06, + "loss": 0.582, + "step": 10324 + }, + { + "epoch": 0.9425780536790214, + "grad_norm": 0.47618916630744934, + "learning_rate": 4.706423074889034e-06, + "loss": 0.5109, + "step": 10325 + }, + { + "epoch": 0.9426693445316779, + "grad_norm": 0.46668434143066406, + "learning_rate": 4.706366800196922e-06, + "loss": 0.6099, + "step": 10326 + }, + { + "epoch": 0.9427606353843345, + "grad_norm": 0.4610290229320526, + "learning_rate": 4.7063105204482865e-06, + "loss": 0.5405, + "step": 10327 + }, + { + "epoch": 0.9428519262369911, + "grad_norm": 0.49212920665740967, + "learning_rate": 4.706254235643256e-06, + "loss": 0.5197, + "step": 10328 + }, + { + "epoch": 0.9429432170896476, + "grad_norm": 0.506344199180603, + "learning_rate": 4.70619794578196e-06, + "loss": 0.5244, + "step": 10329 + }, + { + "epoch": 0.9430345079423041, + "grad_norm": 0.472012996673584, + "learning_rate": 4.7061416508645265e-06, + "loss": 0.5439, + "step": 10330 + }, + { + "epoch": 0.9431257987949607, + "grad_norm": 0.4691878855228424, + "learning_rate": 4.706085350891086e-06, + "loss": 0.5554, + "step": 10331 + }, + { + "epoch": 0.9432170896476173, + "grad_norm": 0.5345527529716492, + "learning_rate": 4.706029045861766e-06, + "loss": 0.4805, + "step": 10332 + }, + { + "epoch": 0.9433083805002739, + "grad_norm": 0.5060455203056335, + "learning_rate": 4.705972735776696e-06, + "loss": 0.5396, + "step": 10333 + }, + { + "epoch": 0.9433996713529305, + "grad_norm": 0.4720635712146759, + "learning_rate": 4.705916420636006e-06, + "loss": 0.5688, + "step": 10334 + }, + { + "epoch": 0.943490962205587, + "grad_norm": 0.4793552756309509, + "learning_rate": 4.705860100439823e-06, + "loss": 0.5452, + "step": 10335 + }, + { + "epoch": 0.9435822530582436, + "grad_norm": 0.5282770991325378, + "learning_rate": 4.705803775188279e-06, + "loss": 0.532, + "step": 10336 + }, + { + "epoch": 0.9436735439109001, + "grad_norm": 0.4859688878059387, + "learning_rate": 4.7057474448815e-06, + "loss": 0.5269, + "step": 10337 + }, + { + "epoch": 0.9437648347635567, + "grad_norm": 0.495194673538208, + "learning_rate": 4.705691109519618e-06, + "loss": 0.5192, + "step": 10338 + }, + { + "epoch": 0.9438561256162132, + "grad_norm": 0.4670143127441406, + "learning_rate": 4.705634769102759e-06, + "loss": 0.5531, + "step": 10339 + }, + { + "epoch": 0.9439474164688698, + "grad_norm": 0.4880469739437103, + "learning_rate": 4.7055784236310544e-06, + "loss": 0.5492, + "step": 10340 + }, + { + "epoch": 0.9440387073215264, + "grad_norm": 0.4834745228290558, + "learning_rate": 4.705522073104634e-06, + "loss": 0.5465, + "step": 10341 + }, + { + "epoch": 0.944129998174183, + "grad_norm": 0.4579313397407532, + "learning_rate": 4.705465717523624e-06, + "loss": 0.5526, + "step": 10342 + }, + { + "epoch": 0.9442212890268395, + "grad_norm": 0.48014310002326965, + "learning_rate": 4.705409356888156e-06, + "loss": 0.5362, + "step": 10343 + }, + { + "epoch": 0.9443125798794961, + "grad_norm": 0.489585816860199, + "learning_rate": 4.705352991198358e-06, + "loss": 0.5461, + "step": 10344 + }, + { + "epoch": 0.9444038707321526, + "grad_norm": 0.469623327255249, + "learning_rate": 4.70529662045436e-06, + "loss": 0.5576, + "step": 10345 + }, + { + "epoch": 0.9444951615848092, + "grad_norm": 0.452656090259552, + "learning_rate": 4.705240244656291e-06, + "loss": 0.5713, + "step": 10346 + }, + { + "epoch": 0.9445864524374658, + "grad_norm": 0.5173936486244202, + "learning_rate": 4.705183863804279e-06, + "loss": 0.5366, + "step": 10347 + }, + { + "epoch": 0.9446777432901223, + "grad_norm": 0.4780719578266144, + "learning_rate": 4.705127477898454e-06, + "loss": 0.5679, + "step": 10348 + }, + { + "epoch": 0.9447690341427789, + "grad_norm": 0.48791736364364624, + "learning_rate": 4.705071086938947e-06, + "loss": 0.5309, + "step": 10349 + }, + { + "epoch": 0.9448603249954355, + "grad_norm": 0.5132375955581665, + "learning_rate": 4.7050146909258845e-06, + "loss": 0.5316, + "step": 10350 + }, + { + "epoch": 0.9449516158480921, + "grad_norm": 0.4949130415916443, + "learning_rate": 4.704958289859397e-06, + "loss": 0.5486, + "step": 10351 + }, + { + "epoch": 0.9450429067007485, + "grad_norm": 0.4843446612358093, + "learning_rate": 4.704901883739613e-06, + "loss": 0.5476, + "step": 10352 + }, + { + "epoch": 0.9451341975534051, + "grad_norm": 0.4654484987258911, + "learning_rate": 4.704845472566663e-06, + "loss": 0.5808, + "step": 10353 + }, + { + "epoch": 0.9452254884060617, + "grad_norm": 0.4911382496356964, + "learning_rate": 4.704789056340676e-06, + "loss": 0.5211, + "step": 10354 + }, + { + "epoch": 0.9453167792587183, + "grad_norm": 0.47032666206359863, + "learning_rate": 4.704732635061781e-06, + "loss": 0.4897, + "step": 10355 + }, + { + "epoch": 0.9454080701113748, + "grad_norm": 0.48071542382240295, + "learning_rate": 4.704676208730107e-06, + "loss": 0.5359, + "step": 10356 + }, + { + "epoch": 0.9454993609640314, + "grad_norm": 0.4416145980358124, + "learning_rate": 4.704619777345783e-06, + "loss": 0.5732, + "step": 10357 + }, + { + "epoch": 0.945590651816688, + "grad_norm": 0.4780254364013672, + "learning_rate": 4.7045633409089394e-06, + "loss": 0.5557, + "step": 10358 + }, + { + "epoch": 0.9456819426693446, + "grad_norm": 0.4671296179294586, + "learning_rate": 4.704506899419705e-06, + "loss": 0.5685, + "step": 10359 + }, + { + "epoch": 0.945773233522001, + "grad_norm": 0.4978792369365692, + "learning_rate": 4.7044504528782095e-06, + "loss": 0.5827, + "step": 10360 + }, + { + "epoch": 0.9458645243746576, + "grad_norm": 0.48584091663360596, + "learning_rate": 4.704394001284583e-06, + "loss": 0.5562, + "step": 10361 + }, + { + "epoch": 0.9459558152273142, + "grad_norm": 0.5168317556381226, + "learning_rate": 4.704337544638952e-06, + "loss": 0.5163, + "step": 10362 + }, + { + "epoch": 0.9460471060799708, + "grad_norm": 0.4994674324989319, + "learning_rate": 4.704281082941449e-06, + "loss": 0.4872, + "step": 10363 + }, + { + "epoch": 0.9461383969326274, + "grad_norm": 0.49110692739486694, + "learning_rate": 4.704224616192201e-06, + "loss": 0.5396, + "step": 10364 + }, + { + "epoch": 0.9462296877852839, + "grad_norm": 0.5029156804084778, + "learning_rate": 4.70416814439134e-06, + "loss": 0.5434, + "step": 10365 + }, + { + "epoch": 0.9463209786379405, + "grad_norm": 0.4912143647670746, + "learning_rate": 4.704111667538993e-06, + "loss": 0.5365, + "step": 10366 + }, + { + "epoch": 0.9464122694905971, + "grad_norm": 0.49808549880981445, + "learning_rate": 4.70405518563529e-06, + "loss": 0.5455, + "step": 10367 + }, + { + "epoch": 0.9465035603432536, + "grad_norm": 0.4774836003780365, + "learning_rate": 4.703998698680362e-06, + "loss": 0.5935, + "step": 10368 + }, + { + "epoch": 0.9465948511959101, + "grad_norm": 0.4518972337245941, + "learning_rate": 4.7039422066743365e-06, + "loss": 0.5345, + "step": 10369 + }, + { + "epoch": 0.9466861420485667, + "grad_norm": 0.47337332367897034, + "learning_rate": 4.703885709617344e-06, + "loss": 0.5475, + "step": 10370 + }, + { + "epoch": 0.9467774329012233, + "grad_norm": 0.4935971796512604, + "learning_rate": 4.7038292075095135e-06, + "loss": 0.5316, + "step": 10371 + }, + { + "epoch": 0.9468687237538799, + "grad_norm": 0.4970482289791107, + "learning_rate": 4.703772700350975e-06, + "loss": 0.5761, + "step": 10372 + }, + { + "epoch": 0.9469600146065364, + "grad_norm": 0.48212942481040955, + "learning_rate": 4.703716188141858e-06, + "loss": 0.5691, + "step": 10373 + }, + { + "epoch": 0.947051305459193, + "grad_norm": 0.5115379691123962, + "learning_rate": 4.7036596708822915e-06, + "loss": 0.5342, + "step": 10374 + }, + { + "epoch": 0.9471425963118496, + "grad_norm": 0.4855186641216278, + "learning_rate": 4.7036031485724055e-06, + "loss": 0.5547, + "step": 10375 + }, + { + "epoch": 0.9472338871645061, + "grad_norm": 0.4979780614376068, + "learning_rate": 4.7035466212123295e-06, + "loss": 0.5288, + "step": 10376 + }, + { + "epoch": 0.9473251780171627, + "grad_norm": 0.4770812690258026, + "learning_rate": 4.703490088802192e-06, + "loss": 0.5867, + "step": 10377 + }, + { + "epoch": 0.9474164688698192, + "grad_norm": 0.5068562626838684, + "learning_rate": 4.703433551342124e-06, + "loss": 0.5375, + "step": 10378 + }, + { + "epoch": 0.9475077597224758, + "grad_norm": 0.46656474471092224, + "learning_rate": 4.703377008832255e-06, + "loss": 0.5613, + "step": 10379 + }, + { + "epoch": 0.9475990505751324, + "grad_norm": 0.48134541511535645, + "learning_rate": 4.703320461272713e-06, + "loss": 0.5272, + "step": 10380 + }, + { + "epoch": 0.947690341427789, + "grad_norm": 0.4865495562553406, + "learning_rate": 4.70326390866363e-06, + "loss": 0.5167, + "step": 10381 + }, + { + "epoch": 0.9477816322804455, + "grad_norm": 0.471392422914505, + "learning_rate": 4.703207351005133e-06, + "loss": 0.5664, + "step": 10382 + }, + { + "epoch": 0.947872923133102, + "grad_norm": 0.4518100619316101, + "learning_rate": 4.703150788297353e-06, + "loss": 0.5407, + "step": 10383 + }, + { + "epoch": 0.9479642139857586, + "grad_norm": 0.4962434768676758, + "learning_rate": 4.70309422054042e-06, + "loss": 0.5508, + "step": 10384 + }, + { + "epoch": 0.9480555048384152, + "grad_norm": 0.4832470417022705, + "learning_rate": 4.703037647734464e-06, + "loss": 0.5283, + "step": 10385 + }, + { + "epoch": 0.9481467956910717, + "grad_norm": 0.5302375555038452, + "learning_rate": 4.702981069879613e-06, + "loss": 0.5566, + "step": 10386 + }, + { + "epoch": 0.9482380865437283, + "grad_norm": 0.48222804069519043, + "learning_rate": 4.7029244869759975e-06, + "loss": 0.5296, + "step": 10387 + }, + { + "epoch": 0.9483293773963849, + "grad_norm": 0.4950132369995117, + "learning_rate": 4.7028678990237474e-06, + "loss": 0.5188, + "step": 10388 + }, + { + "epoch": 0.9484206682490415, + "grad_norm": 0.4872421324253082, + "learning_rate": 4.702811306022992e-06, + "loss": 0.545, + "step": 10389 + }, + { + "epoch": 0.948511959101698, + "grad_norm": 0.516045868396759, + "learning_rate": 4.702754707973861e-06, + "loss": 0.5359, + "step": 10390 + }, + { + "epoch": 0.9486032499543545, + "grad_norm": 0.4743136763572693, + "learning_rate": 4.702698104876484e-06, + "loss": 0.5293, + "step": 10391 + }, + { + "epoch": 0.9486945408070111, + "grad_norm": 0.5137530565261841, + "learning_rate": 4.702641496730992e-06, + "loss": 0.5314, + "step": 10392 + }, + { + "epoch": 0.9487858316596677, + "grad_norm": 0.4765196442604065, + "learning_rate": 4.7025848835375135e-06, + "loss": 0.5273, + "step": 10393 + }, + { + "epoch": 0.9488771225123243, + "grad_norm": 0.48855486512184143, + "learning_rate": 4.702528265296178e-06, + "loss": 0.5395, + "step": 10394 + }, + { + "epoch": 0.9489684133649808, + "grad_norm": 0.4712100923061371, + "learning_rate": 4.702471642007116e-06, + "loss": 0.5416, + "step": 10395 + }, + { + "epoch": 0.9490597042176374, + "grad_norm": 0.4765852391719818, + "learning_rate": 4.7024150136704564e-06, + "loss": 0.5423, + "step": 10396 + }, + { + "epoch": 0.949150995070294, + "grad_norm": 0.47076326608657837, + "learning_rate": 4.70235838028633e-06, + "loss": 0.6023, + "step": 10397 + }, + { + "epoch": 0.9492422859229506, + "grad_norm": 0.4863806664943695, + "learning_rate": 4.7023017418548664e-06, + "loss": 0.5593, + "step": 10398 + }, + { + "epoch": 0.949333576775607, + "grad_norm": 0.4805188775062561, + "learning_rate": 4.702245098376195e-06, + "loss": 0.5385, + "step": 10399 + }, + { + "epoch": 0.9494248676282636, + "grad_norm": 0.487447589635849, + "learning_rate": 4.702188449850445e-06, + "loss": 0.5131, + "step": 10400 + }, + { + "epoch": 0.9495161584809202, + "grad_norm": 0.4791002571582794, + "learning_rate": 4.702131796277748e-06, + "loss": 0.5102, + "step": 10401 + }, + { + "epoch": 0.9496074493335768, + "grad_norm": 0.45606935024261475, + "learning_rate": 4.702075137658233e-06, + "loss": 0.5274, + "step": 10402 + }, + { + "epoch": 0.9496987401862333, + "grad_norm": 0.470157265663147, + "learning_rate": 4.702018473992029e-06, + "loss": 0.5289, + "step": 10403 + }, + { + "epoch": 0.9497900310388899, + "grad_norm": 0.4739989638328552, + "learning_rate": 4.701961805279267e-06, + "loss": 0.5393, + "step": 10404 + }, + { + "epoch": 0.9498813218915465, + "grad_norm": 0.5068950653076172, + "learning_rate": 4.701905131520076e-06, + "loss": 0.5562, + "step": 10405 + }, + { + "epoch": 0.9499726127442031, + "grad_norm": 0.4780932366847992, + "learning_rate": 4.701848452714587e-06, + "loss": 0.5725, + "step": 10406 + }, + { + "epoch": 0.9500639035968595, + "grad_norm": 0.5262019038200378, + "learning_rate": 4.701791768862929e-06, + "loss": 0.5064, + "step": 10407 + }, + { + "epoch": 0.9501551944495161, + "grad_norm": 0.509398877620697, + "learning_rate": 4.701735079965231e-06, + "loss": 0.5024, + "step": 10408 + }, + { + "epoch": 0.9502464853021727, + "grad_norm": 0.5227727293968201, + "learning_rate": 4.701678386021626e-06, + "loss": 0.4991, + "step": 10409 + }, + { + "epoch": 0.9503377761548293, + "grad_norm": 0.4966961741447449, + "learning_rate": 4.701621687032241e-06, + "loss": 0.5601, + "step": 10410 + }, + { + "epoch": 0.9504290670074859, + "grad_norm": 0.5197931528091431, + "learning_rate": 4.701564982997206e-06, + "loss": 0.5314, + "step": 10411 + }, + { + "epoch": 0.9505203578601424, + "grad_norm": 0.5094696879386902, + "learning_rate": 4.701508273916654e-06, + "loss": 0.5422, + "step": 10412 + }, + { + "epoch": 0.950611648712799, + "grad_norm": 0.4840279817581177, + "learning_rate": 4.701451559790711e-06, + "loss": 0.5428, + "step": 10413 + }, + { + "epoch": 0.9507029395654556, + "grad_norm": 0.49215978384017944, + "learning_rate": 4.70139484061951e-06, + "loss": 0.4975, + "step": 10414 + }, + { + "epoch": 0.9507942304181121, + "grad_norm": 0.5258597135543823, + "learning_rate": 4.701338116403179e-06, + "loss": 0.5404, + "step": 10415 + }, + { + "epoch": 0.9508855212707686, + "grad_norm": 0.5075180530548096, + "learning_rate": 4.70128138714185e-06, + "loss": 0.5187, + "step": 10416 + }, + { + "epoch": 0.9509768121234252, + "grad_norm": 0.4936593770980835, + "learning_rate": 4.70122465283565e-06, + "loss": 0.529, + "step": 10417 + }, + { + "epoch": 0.9510681029760818, + "grad_norm": 0.4999661445617676, + "learning_rate": 4.701167913484711e-06, + "loss": 0.5404, + "step": 10418 + }, + { + "epoch": 0.9511593938287384, + "grad_norm": 0.4746212661266327, + "learning_rate": 4.7011111690891646e-06, + "loss": 0.5155, + "step": 10419 + }, + { + "epoch": 0.951250684681395, + "grad_norm": 0.49124813079833984, + "learning_rate": 4.701054419649138e-06, + "loss": 0.5399, + "step": 10420 + }, + { + "epoch": 0.9513419755340515, + "grad_norm": 0.5059688091278076, + "learning_rate": 4.700997665164762e-06, + "loss": 0.549, + "step": 10421 + }, + { + "epoch": 0.951433266386708, + "grad_norm": 0.5072649121284485, + "learning_rate": 4.700940905636167e-06, + "loss": 0.5198, + "step": 10422 + }, + { + "epoch": 0.9515245572393646, + "grad_norm": 0.4794188439846039, + "learning_rate": 4.700884141063484e-06, + "loss": 0.5438, + "step": 10423 + }, + { + "epoch": 0.9516158480920212, + "grad_norm": 0.5034266114234924, + "learning_rate": 4.700827371446842e-06, + "loss": 0.5089, + "step": 10424 + }, + { + "epoch": 0.9517071389446777, + "grad_norm": 0.4620325267314911, + "learning_rate": 4.7007705967863714e-06, + "loss": 0.5159, + "step": 10425 + }, + { + "epoch": 0.9517984297973343, + "grad_norm": 0.48369693756103516, + "learning_rate": 4.700713817082202e-06, + "loss": 0.5516, + "step": 10426 + }, + { + "epoch": 0.9518897206499909, + "grad_norm": 0.44769081473350525, + "learning_rate": 4.700657032334463e-06, + "loss": 0.5369, + "step": 10427 + }, + { + "epoch": 0.9519810115026475, + "grad_norm": 0.48870572447776794, + "learning_rate": 4.700600242543288e-06, + "loss": 0.5188, + "step": 10428 + }, + { + "epoch": 0.952072302355304, + "grad_norm": 0.48923295736312866, + "learning_rate": 4.700543447708803e-06, + "loss": 0.532, + "step": 10429 + }, + { + "epoch": 0.9521635932079605, + "grad_norm": 0.5037952661514282, + "learning_rate": 4.7004866478311414e-06, + "loss": 0.5515, + "step": 10430 + }, + { + "epoch": 0.9522548840606171, + "grad_norm": 0.48409363627433777, + "learning_rate": 4.700429842910431e-06, + "loss": 0.5636, + "step": 10431 + }, + { + "epoch": 0.9523461749132737, + "grad_norm": 0.5076587200164795, + "learning_rate": 4.700373032946804e-06, + "loss": 0.5306, + "step": 10432 + }, + { + "epoch": 0.9524374657659302, + "grad_norm": 0.4663485288619995, + "learning_rate": 4.700316217940388e-06, + "loss": 0.5708, + "step": 10433 + }, + { + "epoch": 0.9525287566185868, + "grad_norm": 0.4795607030391693, + "learning_rate": 4.7002593978913155e-06, + "loss": 0.5282, + "step": 10434 + }, + { + "epoch": 0.9526200474712434, + "grad_norm": 0.5048150420188904, + "learning_rate": 4.700202572799717e-06, + "loss": 0.5314, + "step": 10435 + }, + { + "epoch": 0.9527113383239, + "grad_norm": 0.48369336128234863, + "learning_rate": 4.700145742665721e-06, + "loss": 0.5138, + "step": 10436 + }, + { + "epoch": 0.9528026291765566, + "grad_norm": 0.5067743062973022, + "learning_rate": 4.700088907489457e-06, + "loss": 0.5512, + "step": 10437 + }, + { + "epoch": 0.952893920029213, + "grad_norm": 0.4695732891559601, + "learning_rate": 4.7000320672710584e-06, + "loss": 0.5487, + "step": 10438 + }, + { + "epoch": 0.9529852108818696, + "grad_norm": 0.4839858114719391, + "learning_rate": 4.699975222010653e-06, + "loss": 0.5114, + "step": 10439 + }, + { + "epoch": 0.9530765017345262, + "grad_norm": 0.4626770317554474, + "learning_rate": 4.699918371708373e-06, + "loss": 0.5445, + "step": 10440 + }, + { + "epoch": 0.9531677925871828, + "grad_norm": 0.46950721740722656, + "learning_rate": 4.699861516364346e-06, + "loss": 0.5525, + "step": 10441 + }, + { + "epoch": 0.9532590834398393, + "grad_norm": 0.4748360216617584, + "learning_rate": 4.699804655978705e-06, + "loss": 0.5635, + "step": 10442 + }, + { + "epoch": 0.9533503742924959, + "grad_norm": 0.46283861994743347, + "learning_rate": 4.6997477905515785e-06, + "loss": 0.5337, + "step": 10443 + }, + { + "epoch": 0.9534416651451525, + "grad_norm": 0.4837128520011902, + "learning_rate": 4.699690920083098e-06, + "loss": 0.5758, + "step": 10444 + }, + { + "epoch": 0.9535329559978091, + "grad_norm": 0.4866531193256378, + "learning_rate": 4.699634044573393e-06, + "loss": 0.5079, + "step": 10445 + }, + { + "epoch": 0.9536242468504655, + "grad_norm": 0.5205571055412292, + "learning_rate": 4.699577164022594e-06, + "loss": 0.5216, + "step": 10446 + }, + { + "epoch": 0.9537155377031221, + "grad_norm": 0.46915102005004883, + "learning_rate": 4.699520278430833e-06, + "loss": 0.5573, + "step": 10447 + }, + { + "epoch": 0.9538068285557787, + "grad_norm": 0.4840642213821411, + "learning_rate": 4.699463387798238e-06, + "loss": 0.519, + "step": 10448 + }, + { + "epoch": 0.9538981194084353, + "grad_norm": 0.4999755918979645, + "learning_rate": 4.69940649212494e-06, + "loss": 0.5612, + "step": 10449 + }, + { + "epoch": 0.9539894102610919, + "grad_norm": 0.4865877330303192, + "learning_rate": 4.6993495914110685e-06, + "loss": 0.5449, + "step": 10450 + }, + { + "epoch": 0.9540807011137484, + "grad_norm": 0.499026894569397, + "learning_rate": 4.699292685656757e-06, + "loss": 0.524, + "step": 10451 + }, + { + "epoch": 0.954171991966405, + "grad_norm": 0.5080078840255737, + "learning_rate": 4.699235774862133e-06, + "loss": 0.5518, + "step": 10452 + }, + { + "epoch": 0.9542632828190615, + "grad_norm": 0.4905690550804138, + "learning_rate": 4.699178859027328e-06, + "loss": 0.5312, + "step": 10453 + }, + { + "epoch": 0.954354573671718, + "grad_norm": 0.47994014620780945, + "learning_rate": 4.699121938152473e-06, + "loss": 0.5454, + "step": 10454 + }, + { + "epoch": 0.9544458645243746, + "grad_norm": 0.473122775554657, + "learning_rate": 4.699065012237697e-06, + "loss": 0.5679, + "step": 10455 + }, + { + "epoch": 0.9545371553770312, + "grad_norm": 0.4679829478263855, + "learning_rate": 4.699008081283132e-06, + "loss": 0.5645, + "step": 10456 + }, + { + "epoch": 0.9546284462296878, + "grad_norm": 0.49324744939804077, + "learning_rate": 4.698951145288907e-06, + "loss": 0.5515, + "step": 10457 + }, + { + "epoch": 0.9547197370823444, + "grad_norm": 0.48764243721961975, + "learning_rate": 4.698894204255153e-06, + "loss": 0.5095, + "step": 10458 + }, + { + "epoch": 0.9548110279350009, + "grad_norm": 0.5099163055419922, + "learning_rate": 4.698837258182002e-06, + "loss": 0.5416, + "step": 10459 + }, + { + "epoch": 0.9549023187876575, + "grad_norm": 0.4823266565799713, + "learning_rate": 4.6987803070695825e-06, + "loss": 0.5447, + "step": 10460 + }, + { + "epoch": 0.954993609640314, + "grad_norm": 0.5029030442237854, + "learning_rate": 4.698723350918026e-06, + "loss": 0.565, + "step": 10461 + }, + { + "epoch": 0.9550849004929706, + "grad_norm": 0.4789113402366638, + "learning_rate": 4.698666389727462e-06, + "loss": 0.5369, + "step": 10462 + }, + { + "epoch": 0.9551761913456271, + "grad_norm": 0.48939183354377747, + "learning_rate": 4.6986094234980225e-06, + "loss": 0.5282, + "step": 10463 + }, + { + "epoch": 0.9552674821982837, + "grad_norm": 0.4725974202156067, + "learning_rate": 4.698552452229837e-06, + "loss": 0.5543, + "step": 10464 + }, + { + "epoch": 0.9553587730509403, + "grad_norm": 0.4728730022907257, + "learning_rate": 4.698495475923037e-06, + "loss": 0.5352, + "step": 10465 + }, + { + "epoch": 0.9554500639035969, + "grad_norm": 0.5004870295524597, + "learning_rate": 4.6984384945777526e-06, + "loss": 0.5525, + "step": 10466 + }, + { + "epoch": 0.9555413547562535, + "grad_norm": 0.48806729912757874, + "learning_rate": 4.6983815081941146e-06, + "loss": 0.538, + "step": 10467 + }, + { + "epoch": 0.95563264560891, + "grad_norm": 0.500878095626831, + "learning_rate": 4.698324516772253e-06, + "loss": 0.5809, + "step": 10468 + }, + { + "epoch": 0.9557239364615665, + "grad_norm": 0.4932298958301544, + "learning_rate": 4.698267520312297e-06, + "loss": 0.5821, + "step": 10469 + }, + { + "epoch": 0.9558152273142231, + "grad_norm": 0.46747666597366333, + "learning_rate": 4.698210518814381e-06, + "loss": 0.5626, + "step": 10470 + }, + { + "epoch": 0.9559065181668797, + "grad_norm": 0.4897354245185852, + "learning_rate": 4.6981535122786324e-06, + "loss": 0.5115, + "step": 10471 + }, + { + "epoch": 0.9559978090195362, + "grad_norm": 0.48065292835235596, + "learning_rate": 4.698096500705184e-06, + "loss": 0.5684, + "step": 10472 + }, + { + "epoch": 0.9560890998721928, + "grad_norm": 0.5139847993850708, + "learning_rate": 4.698039484094166e-06, + "loss": 0.4917, + "step": 10473 + }, + { + "epoch": 0.9561803907248494, + "grad_norm": 0.4622568190097809, + "learning_rate": 4.697982462445707e-06, + "loss": 0.5545, + "step": 10474 + }, + { + "epoch": 0.956271681577506, + "grad_norm": 0.46349844336509705, + "learning_rate": 4.697925435759941e-06, + "loss": 0.5094, + "step": 10475 + }, + { + "epoch": 0.9563629724301626, + "grad_norm": 0.4532683491706848, + "learning_rate": 4.697868404036995e-06, + "loss": 0.5508, + "step": 10476 + }, + { + "epoch": 0.956454263282819, + "grad_norm": 0.4723762273788452, + "learning_rate": 4.697811367277002e-06, + "loss": 0.5237, + "step": 10477 + }, + { + "epoch": 0.9565455541354756, + "grad_norm": 0.4965410828590393, + "learning_rate": 4.697754325480093e-06, + "loss": 0.522, + "step": 10478 + }, + { + "epoch": 0.9566368449881322, + "grad_norm": 0.4920322000980377, + "learning_rate": 4.697697278646398e-06, + "loss": 0.5956, + "step": 10479 + }, + { + "epoch": 0.9567281358407888, + "grad_norm": 0.5313526391983032, + "learning_rate": 4.697640226776048e-06, + "loss": 0.5065, + "step": 10480 + }, + { + "epoch": 0.9568194266934453, + "grad_norm": 0.5030452609062195, + "learning_rate": 4.6975831698691735e-06, + "loss": 0.5515, + "step": 10481 + }, + { + "epoch": 0.9569107175461019, + "grad_norm": 0.47200918197631836, + "learning_rate": 4.697526107925905e-06, + "loss": 0.5691, + "step": 10482 + }, + { + "epoch": 0.9570020083987585, + "grad_norm": 0.5271415710449219, + "learning_rate": 4.6974690409463745e-06, + "loss": 0.5158, + "step": 10483 + }, + { + "epoch": 0.957093299251415, + "grad_norm": 0.5012356042861938, + "learning_rate": 4.697411968930711e-06, + "loss": 0.5652, + "step": 10484 + }, + { + "epoch": 0.9571845901040715, + "grad_norm": 0.45664721727371216, + "learning_rate": 4.697354891879046e-06, + "loss": 0.5945, + "step": 10485 + }, + { + "epoch": 0.9572758809567281, + "grad_norm": 0.46154770255088806, + "learning_rate": 4.6972978097915115e-06, + "loss": 0.5617, + "step": 10486 + }, + { + "epoch": 0.9573671718093847, + "grad_norm": 0.4738750159740448, + "learning_rate": 4.697240722668236e-06, + "loss": 0.5538, + "step": 10487 + }, + { + "epoch": 0.9574584626620413, + "grad_norm": 0.4951688349246979, + "learning_rate": 4.697183630509352e-06, + "loss": 0.5492, + "step": 10488 + }, + { + "epoch": 0.9575497535146978, + "grad_norm": 0.47553005814552307, + "learning_rate": 4.69712653331499e-06, + "loss": 0.5277, + "step": 10489 + }, + { + "epoch": 0.9576410443673544, + "grad_norm": 0.4653623700141907, + "learning_rate": 4.697069431085282e-06, + "loss": 0.5928, + "step": 10490 + }, + { + "epoch": 0.957732335220011, + "grad_norm": 0.4700641334056854, + "learning_rate": 4.6970123238203565e-06, + "loss": 0.553, + "step": 10491 + }, + { + "epoch": 0.9578236260726675, + "grad_norm": 0.5023244023323059, + "learning_rate": 4.696955211520346e-06, + "loss": 0.5071, + "step": 10492 + }, + { + "epoch": 0.957914916925324, + "grad_norm": 0.4805217981338501, + "learning_rate": 4.696898094185382e-06, + "loss": 0.5333, + "step": 10493 + }, + { + "epoch": 0.9580062077779806, + "grad_norm": 0.47222986817359924, + "learning_rate": 4.696840971815593e-06, + "loss": 0.571, + "step": 10494 + }, + { + "epoch": 0.9580974986306372, + "grad_norm": 0.47988465428352356, + "learning_rate": 4.696783844411112e-06, + "loss": 0.5304, + "step": 10495 + }, + { + "epoch": 0.9581887894832938, + "grad_norm": 0.47036856412887573, + "learning_rate": 4.696726711972069e-06, + "loss": 0.5675, + "step": 10496 + }, + { + "epoch": 0.9582800803359504, + "grad_norm": 0.46877890825271606, + "learning_rate": 4.696669574498594e-06, + "loss": 0.5498, + "step": 10497 + }, + { + "epoch": 0.9583713711886069, + "grad_norm": 0.5043386220932007, + "learning_rate": 4.696612431990821e-06, + "loss": 0.4991, + "step": 10498 + }, + { + "epoch": 0.9584626620412635, + "grad_norm": 0.4689073860645294, + "learning_rate": 4.696555284448878e-06, + "loss": 0.5414, + "step": 10499 + }, + { + "epoch": 0.95855395289392, + "grad_norm": 0.47277456521987915, + "learning_rate": 4.696498131872898e-06, + "loss": 0.5535, + "step": 10500 + }, + { + "epoch": 0.9586452437465766, + "grad_norm": 0.46535295248031616, + "learning_rate": 4.69644097426301e-06, + "loss": 0.5519, + "step": 10501 + }, + { + "epoch": 0.9587365345992331, + "grad_norm": 0.4584050178527832, + "learning_rate": 4.696383811619346e-06, + "loss": 0.5258, + "step": 10502 + }, + { + "epoch": 0.9588278254518897, + "grad_norm": 0.47875353693962097, + "learning_rate": 4.6963266439420375e-06, + "loss": 0.5361, + "step": 10503 + }, + { + "epoch": 0.9589191163045463, + "grad_norm": 0.4697912037372589, + "learning_rate": 4.696269471231215e-06, + "loss": 0.5664, + "step": 10504 + }, + { + "epoch": 0.9590104071572029, + "grad_norm": 0.5166689157485962, + "learning_rate": 4.69621229348701e-06, + "loss": 0.5388, + "step": 10505 + }, + { + "epoch": 0.9591016980098594, + "grad_norm": 0.4730049967765808, + "learning_rate": 4.696155110709552e-06, + "loss": 0.5485, + "step": 10506 + }, + { + "epoch": 0.959192988862516, + "grad_norm": 0.48584064841270447, + "learning_rate": 4.6960979228989735e-06, + "loss": 0.5329, + "step": 10507 + }, + { + "epoch": 0.9592842797151725, + "grad_norm": 0.5017009377479553, + "learning_rate": 4.696040730055406e-06, + "loss": 0.5707, + "step": 10508 + }, + { + "epoch": 0.9593755705678291, + "grad_norm": 0.4848170876502991, + "learning_rate": 4.695983532178979e-06, + "loss": 0.5394, + "step": 10509 + }, + { + "epoch": 0.9594668614204857, + "grad_norm": 0.47536975145339966, + "learning_rate": 4.695926329269824e-06, + "loss": 0.5095, + "step": 10510 + }, + { + "epoch": 0.9595581522731422, + "grad_norm": 0.49033159017562866, + "learning_rate": 4.695869121328074e-06, + "loss": 0.5656, + "step": 10511 + }, + { + "epoch": 0.9596494431257988, + "grad_norm": 0.4965268075466156, + "learning_rate": 4.695811908353857e-06, + "loss": 0.5283, + "step": 10512 + }, + { + "epoch": 0.9597407339784554, + "grad_norm": 0.46721169352531433, + "learning_rate": 4.695754690347306e-06, + "loss": 0.5921, + "step": 10513 + }, + { + "epoch": 0.959832024831112, + "grad_norm": 0.4969152510166168, + "learning_rate": 4.695697467308553e-06, + "loss": 0.5369, + "step": 10514 + }, + { + "epoch": 0.9599233156837685, + "grad_norm": 0.5115148425102234, + "learning_rate": 4.695640239237726e-06, + "loss": 0.5293, + "step": 10515 + }, + { + "epoch": 0.960014606536425, + "grad_norm": 0.4841291010379791, + "learning_rate": 4.695583006134959e-06, + "loss": 0.5411, + "step": 10516 + }, + { + "epoch": 0.9601058973890816, + "grad_norm": 0.47597604990005493, + "learning_rate": 4.695525768000383e-06, + "loss": 0.5699, + "step": 10517 + }, + { + "epoch": 0.9601971882417382, + "grad_norm": 0.5062037706375122, + "learning_rate": 4.695468524834128e-06, + "loss": 0.5104, + "step": 10518 + }, + { + "epoch": 0.9602884790943947, + "grad_norm": 0.521986722946167, + "learning_rate": 4.695411276636326e-06, + "loss": 0.5579, + "step": 10519 + }, + { + "epoch": 0.9603797699470513, + "grad_norm": 0.49562516808509827, + "learning_rate": 4.6953540234071075e-06, + "loss": 0.5116, + "step": 10520 + }, + { + "epoch": 0.9604710607997079, + "grad_norm": 0.4992069602012634, + "learning_rate": 4.695296765146604e-06, + "loss": 0.5666, + "step": 10521 + }, + { + "epoch": 0.9605623516523645, + "grad_norm": 0.4779970645904541, + "learning_rate": 4.695239501854947e-06, + "loss": 0.56, + "step": 10522 + }, + { + "epoch": 0.9606536425050209, + "grad_norm": 0.5027718544006348, + "learning_rate": 4.695182233532267e-06, + "loss": 0.5277, + "step": 10523 + }, + { + "epoch": 0.9607449333576775, + "grad_norm": 0.4932379424571991, + "learning_rate": 4.695124960178696e-06, + "loss": 0.547, + "step": 10524 + }, + { + "epoch": 0.9608362242103341, + "grad_norm": 0.4630693197250366, + "learning_rate": 4.695067681794365e-06, + "loss": 0.55, + "step": 10525 + }, + { + "epoch": 0.9609275150629907, + "grad_norm": 0.454202264547348, + "learning_rate": 4.695010398379406e-06, + "loss": 0.5707, + "step": 10526 + }, + { + "epoch": 0.9610188059156473, + "grad_norm": 0.49375098943710327, + "learning_rate": 4.694953109933949e-06, + "loss": 0.549, + "step": 10527 + }, + { + "epoch": 0.9611100967683038, + "grad_norm": 0.5092496275901794, + "learning_rate": 4.694895816458126e-06, + "loss": 0.5298, + "step": 10528 + }, + { + "epoch": 0.9612013876209604, + "grad_norm": 0.4989064931869507, + "learning_rate": 4.694838517952069e-06, + "loss": 0.5113, + "step": 10529 + }, + { + "epoch": 0.961292678473617, + "grad_norm": 0.46892303228378296, + "learning_rate": 4.694781214415907e-06, + "loss": 0.5559, + "step": 10530 + }, + { + "epoch": 0.9613839693262735, + "grad_norm": 0.46873044967651367, + "learning_rate": 4.694723905849773e-06, + "loss": 0.5629, + "step": 10531 + }, + { + "epoch": 0.96147526017893, + "grad_norm": 0.47310495376586914, + "learning_rate": 4.694666592253799e-06, + "loss": 0.5573, + "step": 10532 + }, + { + "epoch": 0.9615665510315866, + "grad_norm": 0.5138007402420044, + "learning_rate": 4.694609273628115e-06, + "loss": 0.4968, + "step": 10533 + }, + { + "epoch": 0.9616578418842432, + "grad_norm": 0.4821135997772217, + "learning_rate": 4.694551949972853e-06, + "loss": 0.5484, + "step": 10534 + }, + { + "epoch": 0.9617491327368998, + "grad_norm": 0.5135130882263184, + "learning_rate": 4.694494621288144e-06, + "loss": 0.5009, + "step": 10535 + }, + { + "epoch": 0.9618404235895563, + "grad_norm": 0.4883638322353363, + "learning_rate": 4.69443728757412e-06, + "loss": 0.5399, + "step": 10536 + }, + { + "epoch": 0.9619317144422129, + "grad_norm": 0.5042656064033508, + "learning_rate": 4.694379948830911e-06, + "loss": 0.521, + "step": 10537 + }, + { + "epoch": 0.9620230052948695, + "grad_norm": 0.4938998818397522, + "learning_rate": 4.694322605058651e-06, + "loss": 0.5419, + "step": 10538 + }, + { + "epoch": 0.962114296147526, + "grad_norm": 0.4897509217262268, + "learning_rate": 4.6942652562574695e-06, + "loss": 0.5971, + "step": 10539 + }, + { + "epoch": 0.9622055870001825, + "grad_norm": 0.4904034435749054, + "learning_rate": 4.6942079024274975e-06, + "loss": 0.5655, + "step": 10540 + }, + { + "epoch": 0.9622968778528391, + "grad_norm": 0.4632389545440674, + "learning_rate": 4.694150543568869e-06, + "loss": 0.551, + "step": 10541 + }, + { + "epoch": 0.9623881687054957, + "grad_norm": 0.47015491127967834, + "learning_rate": 4.694093179681712e-06, + "loss": 0.5806, + "step": 10542 + }, + { + "epoch": 0.9624794595581523, + "grad_norm": 0.49763450026512146, + "learning_rate": 4.6940358107661595e-06, + "loss": 0.5275, + "step": 10543 + }, + { + "epoch": 0.9625707504108089, + "grad_norm": 0.48421037197113037, + "learning_rate": 4.693978436822344e-06, + "loss": 0.5315, + "step": 10544 + }, + { + "epoch": 0.9626620412634654, + "grad_norm": 0.49030038714408875, + "learning_rate": 4.693921057850395e-06, + "loss": 0.5537, + "step": 10545 + }, + { + "epoch": 0.962753332116122, + "grad_norm": 0.4686118960380554, + "learning_rate": 4.693863673850447e-06, + "loss": 0.5374, + "step": 10546 + }, + { + "epoch": 0.9628446229687785, + "grad_norm": 0.4982304871082306, + "learning_rate": 4.6938062848226284e-06, + "loss": 0.5507, + "step": 10547 + }, + { + "epoch": 0.9629359138214351, + "grad_norm": 0.539331316947937, + "learning_rate": 4.693748890767073e-06, + "loss": 0.5317, + "step": 10548 + }, + { + "epoch": 0.9630272046740916, + "grad_norm": 0.46709132194519043, + "learning_rate": 4.693691491683909e-06, + "loss": 0.6441, + "step": 10549 + }, + { + "epoch": 0.9631184955267482, + "grad_norm": 0.48948147892951965, + "learning_rate": 4.693634087573272e-06, + "loss": 0.5323, + "step": 10550 + }, + { + "epoch": 0.9632097863794048, + "grad_norm": 0.44796836376190186, + "learning_rate": 4.6935766784352925e-06, + "loss": 0.5533, + "step": 10551 + }, + { + "epoch": 0.9633010772320614, + "grad_norm": 0.4814597964286804, + "learning_rate": 4.6935192642701e-06, + "loss": 0.5245, + "step": 10552 + }, + { + "epoch": 0.963392368084718, + "grad_norm": 0.48113536834716797, + "learning_rate": 4.693461845077828e-06, + "loss": 0.533, + "step": 10553 + }, + { + "epoch": 0.9634836589373744, + "grad_norm": 0.46899208426475525, + "learning_rate": 4.693404420858607e-06, + "loss": 0.555, + "step": 10554 + }, + { + "epoch": 0.963574949790031, + "grad_norm": 0.5120817422866821, + "learning_rate": 4.6933469916125695e-06, + "loss": 0.5031, + "step": 10555 + }, + { + "epoch": 0.9636662406426876, + "grad_norm": 0.4684547185897827, + "learning_rate": 4.693289557339847e-06, + "loss": 0.5618, + "step": 10556 + }, + { + "epoch": 0.9637575314953442, + "grad_norm": 0.48432791233062744, + "learning_rate": 4.69323211804057e-06, + "loss": 0.4997, + "step": 10557 + }, + { + "epoch": 0.9638488223480007, + "grad_norm": 0.4785366356372833, + "learning_rate": 4.693174673714872e-06, + "loss": 0.5343, + "step": 10558 + }, + { + "epoch": 0.9639401132006573, + "grad_norm": 0.49685847759246826, + "learning_rate": 4.693117224362883e-06, + "loss": 0.501, + "step": 10559 + }, + { + "epoch": 0.9640314040533139, + "grad_norm": 0.50411057472229, + "learning_rate": 4.693059769984736e-06, + "loss": 0.5359, + "step": 10560 + }, + { + "epoch": 0.9641226949059705, + "grad_norm": 0.5000529885292053, + "learning_rate": 4.693002310580562e-06, + "loss": 0.5067, + "step": 10561 + }, + { + "epoch": 0.9642139857586269, + "grad_norm": 0.48237305879592896, + "learning_rate": 4.692944846150492e-06, + "loss": 0.4878, + "step": 10562 + }, + { + "epoch": 0.9643052766112835, + "grad_norm": 0.48016437888145447, + "learning_rate": 4.692887376694659e-06, + "loss": 0.5347, + "step": 10563 + }, + { + "epoch": 0.9643965674639401, + "grad_norm": 0.46068432927131653, + "learning_rate": 4.692829902213194e-06, + "loss": 0.5621, + "step": 10564 + }, + { + "epoch": 0.9644878583165967, + "grad_norm": 0.44737154245376587, + "learning_rate": 4.6927724227062284e-06, + "loss": 0.541, + "step": 10565 + }, + { + "epoch": 0.9645791491692532, + "grad_norm": 0.481238454580307, + "learning_rate": 4.692714938173895e-06, + "loss": 0.543, + "step": 10566 + }, + { + "epoch": 0.9646704400219098, + "grad_norm": 0.4951915740966797, + "learning_rate": 4.692657448616325e-06, + "loss": 0.5394, + "step": 10567 + }, + { + "epoch": 0.9647617308745664, + "grad_norm": 0.4696213901042938, + "learning_rate": 4.692599954033649e-06, + "loss": 0.5854, + "step": 10568 + }, + { + "epoch": 0.964853021727223, + "grad_norm": 0.46344074606895447, + "learning_rate": 4.692542454426e-06, + "loss": 0.5491, + "step": 10569 + }, + { + "epoch": 0.9649443125798794, + "grad_norm": 0.4642946422100067, + "learning_rate": 4.6924849497935105e-06, + "loss": 0.566, + "step": 10570 + }, + { + "epoch": 0.965035603432536, + "grad_norm": 0.4811120629310608, + "learning_rate": 4.692427440136311e-06, + "loss": 0.5351, + "step": 10571 + }, + { + "epoch": 0.9651268942851926, + "grad_norm": 0.5259604454040527, + "learning_rate": 4.692369925454533e-06, + "loss": 0.5383, + "step": 10572 + }, + { + "epoch": 0.9652181851378492, + "grad_norm": 0.4613897502422333, + "learning_rate": 4.6923124057483095e-06, + "loss": 0.5456, + "step": 10573 + }, + { + "epoch": 0.9653094759905058, + "grad_norm": 0.5172198414802551, + "learning_rate": 4.692254881017772e-06, + "loss": 0.5181, + "step": 10574 + }, + { + "epoch": 0.9654007668431623, + "grad_norm": 0.48004889488220215, + "learning_rate": 4.692197351263052e-06, + "loss": 0.5349, + "step": 10575 + }, + { + "epoch": 0.9654920576958189, + "grad_norm": 0.48731815814971924, + "learning_rate": 4.692139816484282e-06, + "loss": 0.5502, + "step": 10576 + }, + { + "epoch": 0.9655833485484755, + "grad_norm": 0.48747050762176514, + "learning_rate": 4.692082276681592e-06, + "loss": 0.5365, + "step": 10577 + }, + { + "epoch": 0.965674639401132, + "grad_norm": 0.4648570120334625, + "learning_rate": 4.692024731855116e-06, + "loss": 0.5652, + "step": 10578 + }, + { + "epoch": 0.9657659302537885, + "grad_norm": 0.45114123821258545, + "learning_rate": 4.6919671820049855e-06, + "loss": 0.5664, + "step": 10579 + }, + { + "epoch": 0.9658572211064451, + "grad_norm": 0.4939085841178894, + "learning_rate": 4.691909627131331e-06, + "loss": 0.5421, + "step": 10580 + }, + { + "epoch": 0.9659485119591017, + "grad_norm": 0.4894844591617584, + "learning_rate": 4.6918520672342866e-06, + "loss": 0.5251, + "step": 10581 + }, + { + "epoch": 0.9660398028117583, + "grad_norm": 0.4547593891620636, + "learning_rate": 4.691794502313982e-06, + "loss": 0.5829, + "step": 10582 + }, + { + "epoch": 0.9661310936644149, + "grad_norm": 0.48246151208877563, + "learning_rate": 4.691736932370551e-06, + "loss": 0.5292, + "step": 10583 + }, + { + "epoch": 0.9662223845170714, + "grad_norm": 0.48199623823165894, + "learning_rate": 4.691679357404124e-06, + "loss": 0.5487, + "step": 10584 + }, + { + "epoch": 0.9663136753697279, + "grad_norm": 0.4866146743297577, + "learning_rate": 4.691621777414834e-06, + "loss": 0.55, + "step": 10585 + }, + { + "epoch": 0.9664049662223845, + "grad_norm": 0.49148961901664734, + "learning_rate": 4.691564192402812e-06, + "loss": 0.5032, + "step": 10586 + }, + { + "epoch": 0.966496257075041, + "grad_norm": 0.4719935357570648, + "learning_rate": 4.691506602368192e-06, + "loss": 0.5518, + "step": 10587 + }, + { + "epoch": 0.9665875479276976, + "grad_norm": 0.5007601380348206, + "learning_rate": 4.691449007311103e-06, + "loss": 0.5374, + "step": 10588 + }, + { + "epoch": 0.9666788387803542, + "grad_norm": 0.49918949604034424, + "learning_rate": 4.691391407231679e-06, + "loss": 0.5095, + "step": 10589 + }, + { + "epoch": 0.9667701296330108, + "grad_norm": 0.4794861972332001, + "learning_rate": 4.691333802130052e-06, + "loss": 0.5712, + "step": 10590 + }, + { + "epoch": 0.9668614204856674, + "grad_norm": 0.47025343775749207, + "learning_rate": 4.691276192006353e-06, + "loss": 0.5573, + "step": 10591 + }, + { + "epoch": 0.966952711338324, + "grad_norm": 0.4529717266559601, + "learning_rate": 4.691218576860715e-06, + "loss": 0.5634, + "step": 10592 + }, + { + "epoch": 0.9670440021909804, + "grad_norm": 0.5036017298698425, + "learning_rate": 4.691160956693269e-06, + "loss": 0.5055, + "step": 10593 + }, + { + "epoch": 0.967135293043637, + "grad_norm": 0.5080528259277344, + "learning_rate": 4.691103331504148e-06, + "loss": 0.5451, + "step": 10594 + }, + { + "epoch": 0.9672265838962936, + "grad_norm": 0.4938625693321228, + "learning_rate": 4.691045701293484e-06, + "loss": 0.5385, + "step": 10595 + }, + { + "epoch": 0.9673178747489501, + "grad_norm": 0.4843849539756775, + "learning_rate": 4.690988066061408e-06, + "loss": 0.5524, + "step": 10596 + }, + { + "epoch": 0.9674091656016067, + "grad_norm": 0.46142253279685974, + "learning_rate": 4.690930425808054e-06, + "loss": 0.532, + "step": 10597 + }, + { + "epoch": 0.9675004564542633, + "grad_norm": 0.5092377662658691, + "learning_rate": 4.690872780533553e-06, + "loss": 0.5129, + "step": 10598 + }, + { + "epoch": 0.9675917473069199, + "grad_norm": 0.4807201325893402, + "learning_rate": 4.690815130238037e-06, + "loss": 0.5567, + "step": 10599 + }, + { + "epoch": 0.9676830381595765, + "grad_norm": 0.49138209223747253, + "learning_rate": 4.690757474921638e-06, + "loss": 0.5, + "step": 10600 + }, + { + "epoch": 0.9677743290122329, + "grad_norm": 0.44814831018447876, + "learning_rate": 4.6906998145844885e-06, + "loss": 0.571, + "step": 10601 + }, + { + "epoch": 0.9678656198648895, + "grad_norm": 0.500757098197937, + "learning_rate": 4.69064214922672e-06, + "loss": 0.5254, + "step": 10602 + }, + { + "epoch": 0.9679569107175461, + "grad_norm": 0.5283032655715942, + "learning_rate": 4.6905844788484655e-06, + "loss": 0.5096, + "step": 10603 + }, + { + "epoch": 0.9680482015702027, + "grad_norm": 0.47943800687789917, + "learning_rate": 4.690526803449857e-06, + "loss": 0.5342, + "step": 10604 + }, + { + "epoch": 0.9681394924228592, + "grad_norm": 0.46593958139419556, + "learning_rate": 4.690469123031026e-06, + "loss": 0.5545, + "step": 10605 + }, + { + "epoch": 0.9682307832755158, + "grad_norm": 0.4721100926399231, + "learning_rate": 4.690411437592106e-06, + "loss": 0.5453, + "step": 10606 + }, + { + "epoch": 0.9683220741281724, + "grad_norm": 0.5039898753166199, + "learning_rate": 4.690353747133228e-06, + "loss": 0.575, + "step": 10607 + }, + { + "epoch": 0.968413364980829, + "grad_norm": 0.4918341636657715, + "learning_rate": 4.690296051654525e-06, + "loss": 0.5328, + "step": 10608 + }, + { + "epoch": 0.9685046558334854, + "grad_norm": 0.4952203631401062, + "learning_rate": 4.690238351156128e-06, + "loss": 0.5332, + "step": 10609 + }, + { + "epoch": 0.968595946686142, + "grad_norm": 0.48009970784187317, + "learning_rate": 4.690180645638171e-06, + "loss": 0.569, + "step": 10610 + }, + { + "epoch": 0.9686872375387986, + "grad_norm": 0.47644877433776855, + "learning_rate": 4.690122935100785e-06, + "loss": 0.552, + "step": 10611 + }, + { + "epoch": 0.9687785283914552, + "grad_norm": 0.48443707823753357, + "learning_rate": 4.690065219544103e-06, + "loss": 0.4823, + "step": 10612 + }, + { + "epoch": 0.9688698192441118, + "grad_norm": 0.4800258278846741, + "learning_rate": 4.690007498968257e-06, + "loss": 0.5528, + "step": 10613 + }, + { + "epoch": 0.9689611100967683, + "grad_norm": 0.4739135503768921, + "learning_rate": 4.689949773373378e-06, + "loss": 0.54, + "step": 10614 + }, + { + "epoch": 0.9690524009494249, + "grad_norm": 0.4935866594314575, + "learning_rate": 4.689892042759601e-06, + "loss": 0.5463, + "step": 10615 + }, + { + "epoch": 0.9691436918020815, + "grad_norm": 0.481485515832901, + "learning_rate": 4.689834307127055e-06, + "loss": 0.5339, + "step": 10616 + }, + { + "epoch": 0.969234982654738, + "grad_norm": 0.4917216897010803, + "learning_rate": 4.689776566475875e-06, + "loss": 0.5148, + "step": 10617 + }, + { + "epoch": 0.9693262735073945, + "grad_norm": 0.5265994071960449, + "learning_rate": 4.689718820806193e-06, + "loss": 0.5145, + "step": 10618 + }, + { + "epoch": 0.9694175643600511, + "grad_norm": 0.4902411997318268, + "learning_rate": 4.68966107011814e-06, + "loss": 0.5023, + "step": 10619 + }, + { + "epoch": 0.9695088552127077, + "grad_norm": 0.4747949242591858, + "learning_rate": 4.689603314411849e-06, + "loss": 0.5562, + "step": 10620 + }, + { + "epoch": 0.9696001460653643, + "grad_norm": 0.4837745130062103, + "learning_rate": 4.689545553687454e-06, + "loss": 0.5741, + "step": 10621 + }, + { + "epoch": 0.9696914369180208, + "grad_norm": 0.5346899628639221, + "learning_rate": 4.689487787945084e-06, + "loss": 0.5078, + "step": 10622 + }, + { + "epoch": 0.9697827277706774, + "grad_norm": 0.4881665110588074, + "learning_rate": 4.689430017184874e-06, + "loss": 0.5342, + "step": 10623 + }, + { + "epoch": 0.9698740186233339, + "grad_norm": 0.49315837025642395, + "learning_rate": 4.689372241406955e-06, + "loss": 0.5478, + "step": 10624 + }, + { + "epoch": 0.9699653094759905, + "grad_norm": 0.47298482060432434, + "learning_rate": 4.689314460611461e-06, + "loss": 0.5589, + "step": 10625 + }, + { + "epoch": 0.970056600328647, + "grad_norm": 0.49427226185798645, + "learning_rate": 4.689256674798523e-06, + "loss": 0.5255, + "step": 10626 + }, + { + "epoch": 0.9701478911813036, + "grad_norm": 0.4779919385910034, + "learning_rate": 4.689198883968274e-06, + "loss": 0.5657, + "step": 10627 + }, + { + "epoch": 0.9702391820339602, + "grad_norm": 0.5102345943450928, + "learning_rate": 4.689141088120846e-06, + "loss": 0.5212, + "step": 10628 + }, + { + "epoch": 0.9703304728866168, + "grad_norm": 0.48364683985710144, + "learning_rate": 4.689083287256372e-06, + "loss": 0.5473, + "step": 10629 + }, + { + "epoch": 0.9704217637392734, + "grad_norm": 0.47424644231796265, + "learning_rate": 4.689025481374985e-06, + "loss": 0.5098, + "step": 10630 + }, + { + "epoch": 0.9705130545919299, + "grad_norm": 0.4796561598777771, + "learning_rate": 4.688967670476815e-06, + "loss": 0.535, + "step": 10631 + }, + { + "epoch": 0.9706043454445864, + "grad_norm": 0.48471489548683167, + "learning_rate": 4.6889098545619975e-06, + "loss": 0.5496, + "step": 10632 + }, + { + "epoch": 0.970695636297243, + "grad_norm": 0.5147897601127625, + "learning_rate": 4.6888520336306635e-06, + "loss": 0.526, + "step": 10633 + }, + { + "epoch": 0.9707869271498996, + "grad_norm": 0.48316633701324463, + "learning_rate": 4.688794207682946e-06, + "loss": 0.5367, + "step": 10634 + }, + { + "epoch": 0.9708782180025561, + "grad_norm": 0.5160855054855347, + "learning_rate": 4.688736376718977e-06, + "loss": 0.5055, + "step": 10635 + }, + { + "epoch": 0.9709695088552127, + "grad_norm": 0.5383740067481995, + "learning_rate": 4.68867854073889e-06, + "loss": 0.5605, + "step": 10636 + }, + { + "epoch": 0.9710607997078693, + "grad_norm": 0.4947530925273895, + "learning_rate": 4.688620699742816e-06, + "loss": 0.5551, + "step": 10637 + }, + { + "epoch": 0.9711520905605259, + "grad_norm": 0.4947873651981354, + "learning_rate": 4.688562853730888e-06, + "loss": 0.5429, + "step": 10638 + }, + { + "epoch": 0.9712433814131824, + "grad_norm": 0.498166561126709, + "learning_rate": 4.68850500270324e-06, + "loss": 0.5371, + "step": 10639 + }, + { + "epoch": 0.9713346722658389, + "grad_norm": 0.49100422859191895, + "learning_rate": 4.688447146660003e-06, + "loss": 0.5306, + "step": 10640 + }, + { + "epoch": 0.9714259631184955, + "grad_norm": 0.4647398889064789, + "learning_rate": 4.68838928560131e-06, + "loss": 0.5473, + "step": 10641 + }, + { + "epoch": 0.9715172539711521, + "grad_norm": 0.5085527896881104, + "learning_rate": 4.688331419527294e-06, + "loss": 0.4827, + "step": 10642 + }, + { + "epoch": 0.9716085448238087, + "grad_norm": 0.5052350163459778, + "learning_rate": 4.688273548438088e-06, + "loss": 0.5175, + "step": 10643 + }, + { + "epoch": 0.9716998356764652, + "grad_norm": 0.48794957995414734, + "learning_rate": 4.688215672333824e-06, + "loss": 0.5878, + "step": 10644 + }, + { + "epoch": 0.9717911265291218, + "grad_norm": 0.4800742268562317, + "learning_rate": 4.688157791214634e-06, + "loss": 0.5595, + "step": 10645 + }, + { + "epoch": 0.9718824173817784, + "grad_norm": 0.5084819793701172, + "learning_rate": 4.688099905080651e-06, + "loss": 0.513, + "step": 10646 + }, + { + "epoch": 0.971973708234435, + "grad_norm": 0.489926815032959, + "learning_rate": 4.688042013932009e-06, + "loss": 0.5391, + "step": 10647 + }, + { + "epoch": 0.9720649990870914, + "grad_norm": 0.4713817834854126, + "learning_rate": 4.687984117768839e-06, + "loss": 0.5419, + "step": 10648 + }, + { + "epoch": 0.972156289939748, + "grad_norm": 0.4883786141872406, + "learning_rate": 4.687926216591275e-06, + "loss": 0.501, + "step": 10649 + }, + { + "epoch": 0.9722475807924046, + "grad_norm": 0.4561455547809601, + "learning_rate": 4.687868310399447e-06, + "loss": 0.5181, + "step": 10650 + }, + { + "epoch": 0.9723388716450612, + "grad_norm": 0.5003864765167236, + "learning_rate": 4.687810399193492e-06, + "loss": 0.5442, + "step": 10651 + }, + { + "epoch": 0.9724301624977177, + "grad_norm": 0.4675595760345459, + "learning_rate": 4.68775248297354e-06, + "loss": 0.5534, + "step": 10652 + }, + { + "epoch": 0.9725214533503743, + "grad_norm": 0.4838394820690155, + "learning_rate": 4.687694561739723e-06, + "loss": 0.5375, + "step": 10653 + }, + { + "epoch": 0.9726127442030309, + "grad_norm": 0.49096307158470154, + "learning_rate": 4.687636635492177e-06, + "loss": 0.5588, + "step": 10654 + }, + { + "epoch": 0.9727040350556874, + "grad_norm": 0.5123430490493774, + "learning_rate": 4.68757870423103e-06, + "loss": 0.5228, + "step": 10655 + }, + { + "epoch": 0.9727953259083439, + "grad_norm": 0.5184785723686218, + "learning_rate": 4.68752076795642e-06, + "loss": 0.5329, + "step": 10656 + }, + { + "epoch": 0.9728866167610005, + "grad_norm": 0.5088127255439758, + "learning_rate": 4.687462826668475e-06, + "loss": 0.5954, + "step": 10657 + }, + { + "epoch": 0.9729779076136571, + "grad_norm": 0.4997313320636749, + "learning_rate": 4.687404880367332e-06, + "loss": 0.515, + "step": 10658 + }, + { + "epoch": 0.9730691984663137, + "grad_norm": 0.4997286796569824, + "learning_rate": 4.68734692905312e-06, + "loss": 0.5463, + "step": 10659 + }, + { + "epoch": 0.9731604893189703, + "grad_norm": 0.4610109031200409, + "learning_rate": 4.687288972725975e-06, + "loss": 0.5286, + "step": 10660 + }, + { + "epoch": 0.9732517801716268, + "grad_norm": 0.45230889320373535, + "learning_rate": 4.687231011386028e-06, + "loss": 0.561, + "step": 10661 + }, + { + "epoch": 0.9733430710242834, + "grad_norm": 0.48784855008125305, + "learning_rate": 4.687173045033412e-06, + "loss": 0.5905, + "step": 10662 + }, + { + "epoch": 0.9734343618769399, + "grad_norm": 0.4539150297641754, + "learning_rate": 4.687115073668261e-06, + "loss": 0.543, + "step": 10663 + }, + { + "epoch": 0.9735256527295965, + "grad_norm": 0.4687597453594208, + "learning_rate": 4.687057097290706e-06, + "loss": 0.5003, + "step": 10664 + }, + { + "epoch": 0.973616943582253, + "grad_norm": 0.502390444278717, + "learning_rate": 4.686999115900882e-06, + "loss": 0.4943, + "step": 10665 + }, + { + "epoch": 0.9737082344349096, + "grad_norm": 0.48293226957321167, + "learning_rate": 4.68694112949892e-06, + "loss": 0.5419, + "step": 10666 + }, + { + "epoch": 0.9737995252875662, + "grad_norm": 0.47143369913101196, + "learning_rate": 4.686883138084954e-06, + "loss": 0.5582, + "step": 10667 + }, + { + "epoch": 0.9738908161402228, + "grad_norm": 0.48168841004371643, + "learning_rate": 4.6868251416591155e-06, + "loss": 0.5297, + "step": 10668 + }, + { + "epoch": 0.9739821069928793, + "grad_norm": 0.49611011147499084, + "learning_rate": 4.6867671402215385e-06, + "loss": 0.5389, + "step": 10669 + }, + { + "epoch": 0.9740733978455359, + "grad_norm": 0.5104682445526123, + "learning_rate": 4.686709133772357e-06, + "loss": 0.5521, + "step": 10670 + }, + { + "epoch": 0.9741646886981924, + "grad_norm": 0.46928492188453674, + "learning_rate": 4.686651122311703e-06, + "loss": 0.5536, + "step": 10671 + }, + { + "epoch": 0.974255979550849, + "grad_norm": 0.5087167620658875, + "learning_rate": 4.686593105839708e-06, + "loss": 0.553, + "step": 10672 + }, + { + "epoch": 0.9743472704035055, + "grad_norm": 0.4869750142097473, + "learning_rate": 4.686535084356507e-06, + "loss": 0.5378, + "step": 10673 + }, + { + "epoch": 0.9744385612561621, + "grad_norm": 0.5090494751930237, + "learning_rate": 4.686477057862232e-06, + "loss": 0.5314, + "step": 10674 + }, + { + "epoch": 0.9745298521088187, + "grad_norm": 0.49120333790779114, + "learning_rate": 4.686419026357017e-06, + "loss": 0.5627, + "step": 10675 + }, + { + "epoch": 0.9746211429614753, + "grad_norm": 0.4967752695083618, + "learning_rate": 4.686360989840992e-06, + "loss": 0.5378, + "step": 10676 + }, + { + "epoch": 0.9747124338141319, + "grad_norm": 0.48771485686302185, + "learning_rate": 4.686302948314294e-06, + "loss": 0.5147, + "step": 10677 + }, + { + "epoch": 0.9748037246667884, + "grad_norm": 0.48854589462280273, + "learning_rate": 4.686244901777054e-06, + "loss": 0.5525, + "step": 10678 + }, + { + "epoch": 0.9748950155194449, + "grad_norm": 0.4843263626098633, + "learning_rate": 4.686186850229405e-06, + "loss": 0.5171, + "step": 10679 + }, + { + "epoch": 0.9749863063721015, + "grad_norm": 0.4612119197845459, + "learning_rate": 4.686128793671481e-06, + "loss": 0.5663, + "step": 10680 + }, + { + "epoch": 0.9750775972247581, + "grad_norm": 0.5115401744842529, + "learning_rate": 4.6860707321034134e-06, + "loss": 0.5437, + "step": 10681 + }, + { + "epoch": 0.9751688880774146, + "grad_norm": 0.4816150665283203, + "learning_rate": 4.686012665525336e-06, + "loss": 0.5442, + "step": 10682 + }, + { + "epoch": 0.9752601789300712, + "grad_norm": 0.4637904763221741, + "learning_rate": 4.685954593937383e-06, + "loss": 0.5433, + "step": 10683 + }, + { + "epoch": 0.9753514697827278, + "grad_norm": 0.4732491672039032, + "learning_rate": 4.685896517339686e-06, + "loss": 0.5288, + "step": 10684 + }, + { + "epoch": 0.9754427606353844, + "grad_norm": 0.49831557273864746, + "learning_rate": 4.6858384357323784e-06, + "loss": 0.5755, + "step": 10685 + }, + { + "epoch": 0.9755340514880408, + "grad_norm": 0.5099655389785767, + "learning_rate": 4.685780349115594e-06, + "loss": 0.5174, + "step": 10686 + }, + { + "epoch": 0.9756253423406974, + "grad_norm": 0.49868714809417725, + "learning_rate": 4.685722257489465e-06, + "loss": 0.5055, + "step": 10687 + }, + { + "epoch": 0.975716633193354, + "grad_norm": 0.48220258951187134, + "learning_rate": 4.685664160854126e-06, + "loss": 0.539, + "step": 10688 + }, + { + "epoch": 0.9758079240460106, + "grad_norm": 0.4949054419994354, + "learning_rate": 4.685606059209709e-06, + "loss": 0.5427, + "step": 10689 + }, + { + "epoch": 0.9758992148986672, + "grad_norm": 0.48223552107810974, + "learning_rate": 4.685547952556347e-06, + "loss": 0.5214, + "step": 10690 + }, + { + "epoch": 0.9759905057513237, + "grad_norm": 0.48880520462989807, + "learning_rate": 4.6854898408941736e-06, + "loss": 0.529, + "step": 10691 + }, + { + "epoch": 0.9760817966039803, + "grad_norm": 0.4946485757827759, + "learning_rate": 4.6854317242233215e-06, + "loss": 0.5577, + "step": 10692 + }, + { + "epoch": 0.9761730874566369, + "grad_norm": 0.47636279463768005, + "learning_rate": 4.6853736025439246e-06, + "loss": 0.5287, + "step": 10693 + }, + { + "epoch": 0.9762643783092934, + "grad_norm": 0.47987204790115356, + "learning_rate": 4.685315475856116e-06, + "loss": 0.5695, + "step": 10694 + }, + { + "epoch": 0.9763556691619499, + "grad_norm": 0.5093681216239929, + "learning_rate": 4.685257344160028e-06, + "loss": 0.4923, + "step": 10695 + }, + { + "epoch": 0.9764469600146065, + "grad_norm": 0.4707253575325012, + "learning_rate": 4.685199207455795e-06, + "loss": 0.53, + "step": 10696 + }, + { + "epoch": 0.9765382508672631, + "grad_norm": 0.45539215207099915, + "learning_rate": 4.68514106574355e-06, + "loss": 0.5778, + "step": 10697 + }, + { + "epoch": 0.9766295417199197, + "grad_norm": 0.4433453679084778, + "learning_rate": 4.685082919023425e-06, + "loss": 0.5072, + "step": 10698 + }, + { + "epoch": 0.9767208325725762, + "grad_norm": 0.4803867042064667, + "learning_rate": 4.685024767295555e-06, + "loss": 0.5233, + "step": 10699 + }, + { + "epoch": 0.9768121234252328, + "grad_norm": 0.49740034341812134, + "learning_rate": 4.684966610560072e-06, + "loss": 0.5271, + "step": 10700 + }, + { + "epoch": 0.9769034142778894, + "grad_norm": 0.48712870478630066, + "learning_rate": 4.684908448817111e-06, + "loss": 0.4753, + "step": 10701 + }, + { + "epoch": 0.9769947051305459, + "grad_norm": 0.473666787147522, + "learning_rate": 4.684850282066803e-06, + "loss": 0.5585, + "step": 10702 + }, + { + "epoch": 0.9770859959832024, + "grad_norm": 0.4958963096141815, + "learning_rate": 4.684792110309283e-06, + "loss": 0.5623, + "step": 10703 + }, + { + "epoch": 0.977177286835859, + "grad_norm": 0.5184721946716309, + "learning_rate": 4.684733933544684e-06, + "loss": 0.5313, + "step": 10704 + }, + { + "epoch": 0.9772685776885156, + "grad_norm": 0.4627058804035187, + "learning_rate": 4.684675751773139e-06, + "loss": 0.5535, + "step": 10705 + }, + { + "epoch": 0.9773598685411722, + "grad_norm": 0.4870513081550598, + "learning_rate": 4.68461756499478e-06, + "loss": 0.5123, + "step": 10706 + }, + { + "epoch": 0.9774511593938288, + "grad_norm": 0.4831150770187378, + "learning_rate": 4.684559373209743e-06, + "loss": 0.5443, + "step": 10707 + }, + { + "epoch": 0.9775424502464853, + "grad_norm": 0.46838831901550293, + "learning_rate": 4.68450117641816e-06, + "loss": 0.5388, + "step": 10708 + }, + { + "epoch": 0.9776337410991419, + "grad_norm": 0.4556792676448822, + "learning_rate": 4.684442974620164e-06, + "loss": 0.5078, + "step": 10709 + }, + { + "epoch": 0.9777250319517984, + "grad_norm": 0.48636704683303833, + "learning_rate": 4.684384767815889e-06, + "loss": 0.5779, + "step": 10710 + }, + { + "epoch": 0.977816322804455, + "grad_norm": 0.5031464099884033, + "learning_rate": 4.684326556005469e-06, + "loss": 0.534, + "step": 10711 + }, + { + "epoch": 0.9779076136571115, + "grad_norm": 0.503398060798645, + "learning_rate": 4.6842683391890355e-06, + "loss": 0.5257, + "step": 10712 + }, + { + "epoch": 0.9779989045097681, + "grad_norm": 0.520477831363678, + "learning_rate": 4.684210117366724e-06, + "loss": 0.5042, + "step": 10713 + }, + { + "epoch": 0.9780901953624247, + "grad_norm": 0.501585841178894, + "learning_rate": 4.684151890538667e-06, + "loss": 0.5372, + "step": 10714 + }, + { + "epoch": 0.9781814862150813, + "grad_norm": 0.4924875497817993, + "learning_rate": 4.684093658704998e-06, + "loss": 0.5382, + "step": 10715 + }, + { + "epoch": 0.9782727770677379, + "grad_norm": 0.4706679880619049, + "learning_rate": 4.68403542186585e-06, + "loss": 0.5696, + "step": 10716 + }, + { + "epoch": 0.9783640679203944, + "grad_norm": 0.4718855321407318, + "learning_rate": 4.683977180021357e-06, + "loss": 0.5628, + "step": 10717 + }, + { + "epoch": 0.9784553587730509, + "grad_norm": 0.4803738296031952, + "learning_rate": 4.683918933171653e-06, + "loss": 0.5209, + "step": 10718 + }, + { + "epoch": 0.9785466496257075, + "grad_norm": 0.4737606346607208, + "learning_rate": 4.683860681316869e-06, + "loss": 0.5116, + "step": 10719 + }, + { + "epoch": 0.978637940478364, + "grad_norm": 0.46940433979034424, + "learning_rate": 4.683802424457142e-06, + "loss": 0.5478, + "step": 10720 + }, + { + "epoch": 0.9787292313310206, + "grad_norm": 0.5020801424980164, + "learning_rate": 4.683744162592604e-06, + "loss": 0.5483, + "step": 10721 + }, + { + "epoch": 0.9788205221836772, + "grad_norm": 0.4908682703971863, + "learning_rate": 4.683685895723388e-06, + "loss": 0.519, + "step": 10722 + }, + { + "epoch": 0.9789118130363338, + "grad_norm": 0.4853624105453491, + "learning_rate": 4.683627623849628e-06, + "loss": 0.5593, + "step": 10723 + }, + { + "epoch": 0.9790031038889904, + "grad_norm": 0.4893377721309662, + "learning_rate": 4.683569346971458e-06, + "loss": 0.5063, + "step": 10724 + }, + { + "epoch": 0.9790943947416468, + "grad_norm": 0.4837678372859955, + "learning_rate": 4.68351106508901e-06, + "loss": 0.543, + "step": 10725 + }, + { + "epoch": 0.9791856855943034, + "grad_norm": 0.4899982511997223, + "learning_rate": 4.6834527782024194e-06, + "loss": 0.5652, + "step": 10726 + }, + { + "epoch": 0.97927697644696, + "grad_norm": 0.5040009617805481, + "learning_rate": 4.683394486311819e-06, + "loss": 0.5578, + "step": 10727 + }, + { + "epoch": 0.9793682672996166, + "grad_norm": 0.4849531650543213, + "learning_rate": 4.683336189417341e-06, + "loss": 0.5499, + "step": 10728 + }, + { + "epoch": 0.9794595581522731, + "grad_norm": 0.46070146560668945, + "learning_rate": 4.683277887519122e-06, + "loss": 0.6006, + "step": 10729 + }, + { + "epoch": 0.9795508490049297, + "grad_norm": 0.48488134145736694, + "learning_rate": 4.683219580617293e-06, + "loss": 0.545, + "step": 10730 + }, + { + "epoch": 0.9796421398575863, + "grad_norm": 0.4605168104171753, + "learning_rate": 4.68316126871199e-06, + "loss": 0.5819, + "step": 10731 + }, + { + "epoch": 0.9797334307102429, + "grad_norm": 0.47430410981178284, + "learning_rate": 4.683102951803344e-06, + "loss": 0.5318, + "step": 10732 + }, + { + "epoch": 0.9798247215628993, + "grad_norm": 0.4869585335254669, + "learning_rate": 4.68304462989149e-06, + "loss": 0.5621, + "step": 10733 + }, + { + "epoch": 0.9799160124155559, + "grad_norm": 0.4724131226539612, + "learning_rate": 4.6829863029765625e-06, + "loss": 0.561, + "step": 10734 + }, + { + "epoch": 0.9800073032682125, + "grad_norm": 0.4818357229232788, + "learning_rate": 4.682927971058693e-06, + "loss": 0.5249, + "step": 10735 + }, + { + "epoch": 0.9800985941208691, + "grad_norm": 0.50100177526474, + "learning_rate": 4.682869634138017e-06, + "loss": 0.5259, + "step": 10736 + }, + { + "epoch": 0.9801898849735257, + "grad_norm": 0.4679643213748932, + "learning_rate": 4.682811292214668e-06, + "loss": 0.5639, + "step": 10737 + }, + { + "epoch": 0.9802811758261822, + "grad_norm": 0.4888996183872223, + "learning_rate": 4.682752945288779e-06, + "loss": 0.5707, + "step": 10738 + }, + { + "epoch": 0.9803724666788388, + "grad_norm": 0.4457131028175354, + "learning_rate": 4.682694593360484e-06, + "loss": 0.5734, + "step": 10739 + }, + { + "epoch": 0.9804637575314954, + "grad_norm": 0.5224363207817078, + "learning_rate": 4.682636236429917e-06, + "loss": 0.535, + "step": 10740 + }, + { + "epoch": 0.9805550483841519, + "grad_norm": 0.49115586280822754, + "learning_rate": 4.682577874497212e-06, + "loss": 0.5475, + "step": 10741 + }, + { + "epoch": 0.9806463392368084, + "grad_norm": 0.492077499628067, + "learning_rate": 4.6825195075625e-06, + "loss": 0.5043, + "step": 10742 + }, + { + "epoch": 0.980737630089465, + "grad_norm": 0.4980716109275818, + "learning_rate": 4.68246113562592e-06, + "loss": 0.5596, + "step": 10743 + }, + { + "epoch": 0.9808289209421216, + "grad_norm": 0.4506245255470276, + "learning_rate": 4.682402758687601e-06, + "loss": 0.5619, + "step": 10744 + }, + { + "epoch": 0.9809202117947782, + "grad_norm": 0.48679542541503906, + "learning_rate": 4.6823443767476795e-06, + "loss": 0.5091, + "step": 10745 + }, + { + "epoch": 0.9810115026474348, + "grad_norm": 0.4814576506614685, + "learning_rate": 4.682285989806288e-06, + "loss": 0.5552, + "step": 10746 + }, + { + "epoch": 0.9811027935000913, + "grad_norm": 0.49151334166526794, + "learning_rate": 4.682227597863561e-06, + "loss": 0.521, + "step": 10747 + }, + { + "epoch": 0.9811940843527479, + "grad_norm": 0.4869278371334076, + "learning_rate": 4.682169200919632e-06, + "loss": 0.5562, + "step": 10748 + }, + { + "epoch": 0.9812853752054044, + "grad_norm": 0.47676247358322144, + "learning_rate": 4.682110798974635e-06, + "loss": 0.5766, + "step": 10749 + }, + { + "epoch": 0.981376666058061, + "grad_norm": 0.4819382429122925, + "learning_rate": 4.682052392028703e-06, + "loss": 0.472, + "step": 10750 + }, + { + "epoch": 0.9814679569107175, + "grad_norm": 0.45288780331611633, + "learning_rate": 4.681993980081972e-06, + "loss": 0.548, + "step": 10751 + }, + { + "epoch": 0.9815592477633741, + "grad_norm": 0.4924255609512329, + "learning_rate": 4.681935563134573e-06, + "loss": 0.5485, + "step": 10752 + }, + { + "epoch": 0.9816505386160307, + "grad_norm": 0.49499762058258057, + "learning_rate": 4.681877141186641e-06, + "loss": 0.5643, + "step": 10753 + }, + { + "epoch": 0.9817418294686873, + "grad_norm": 0.4608135223388672, + "learning_rate": 4.6818187142383124e-06, + "loss": 0.5764, + "step": 10754 + }, + { + "epoch": 0.9818331203213438, + "grad_norm": 0.4960813820362091, + "learning_rate": 4.681760282289717e-06, + "loss": 0.5272, + "step": 10755 + }, + { + "epoch": 0.9819244111740003, + "grad_norm": 0.5065093040466309, + "learning_rate": 4.681701845340991e-06, + "loss": 0.5239, + "step": 10756 + }, + { + "epoch": 0.9820157020266569, + "grad_norm": 0.4861600697040558, + "learning_rate": 4.681643403392269e-06, + "loss": 0.5349, + "step": 10757 + }, + { + "epoch": 0.9821069928793135, + "grad_norm": 0.4829762578010559, + "learning_rate": 4.681584956443683e-06, + "loss": 0.5547, + "step": 10758 + }, + { + "epoch": 0.98219828373197, + "grad_norm": 0.43089354038238525, + "learning_rate": 4.681526504495368e-06, + "loss": 0.6023, + "step": 10759 + }, + { + "epoch": 0.9822895745846266, + "grad_norm": 0.4902607798576355, + "learning_rate": 4.6814680475474575e-06, + "loss": 0.5486, + "step": 10760 + }, + { + "epoch": 0.9823808654372832, + "grad_norm": 0.4758524000644684, + "learning_rate": 4.6814095856000865e-06, + "loss": 0.5177, + "step": 10761 + }, + { + "epoch": 0.9824721562899398, + "grad_norm": 0.469940721988678, + "learning_rate": 4.681351118653387e-06, + "loss": 0.552, + "step": 10762 + }, + { + "epoch": 0.9825634471425964, + "grad_norm": 0.45858490467071533, + "learning_rate": 4.6812926467074955e-06, + "loss": 0.5639, + "step": 10763 + }, + { + "epoch": 0.9826547379952528, + "grad_norm": 0.49014654755592346, + "learning_rate": 4.681234169762544e-06, + "loss": 0.5388, + "step": 10764 + }, + { + "epoch": 0.9827460288479094, + "grad_norm": 0.46099787950515747, + "learning_rate": 4.681175687818667e-06, + "loss": 0.5546, + "step": 10765 + }, + { + "epoch": 0.982837319700566, + "grad_norm": 0.500209629535675, + "learning_rate": 4.681117200876e-06, + "loss": 0.5402, + "step": 10766 + }, + { + "epoch": 0.9829286105532226, + "grad_norm": 0.4981253147125244, + "learning_rate": 4.681058708934675e-06, + "loss": 0.5277, + "step": 10767 + }, + { + "epoch": 0.9830199014058791, + "grad_norm": 0.5097987055778503, + "learning_rate": 4.681000211994827e-06, + "loss": 0.5789, + "step": 10768 + }, + { + "epoch": 0.9831111922585357, + "grad_norm": 0.4906962513923645, + "learning_rate": 4.6809417100565905e-06, + "loss": 0.5294, + "step": 10769 + }, + { + "epoch": 0.9832024831111923, + "grad_norm": 0.5019679665565491, + "learning_rate": 4.680883203120099e-06, + "loss": 0.5246, + "step": 10770 + }, + { + "epoch": 0.9832937739638489, + "grad_norm": 0.5328053832054138, + "learning_rate": 4.680824691185486e-06, + "loss": 0.5042, + "step": 10771 + }, + { + "epoch": 0.9833850648165053, + "grad_norm": 0.4798697531223297, + "learning_rate": 4.680766174252886e-06, + "loss": 0.5552, + "step": 10772 + }, + { + "epoch": 0.9834763556691619, + "grad_norm": 0.4665995240211487, + "learning_rate": 4.680707652322433e-06, + "loss": 0.5459, + "step": 10773 + }, + { + "epoch": 0.9835676465218185, + "grad_norm": 0.4590808153152466, + "learning_rate": 4.680649125394263e-06, + "loss": 0.583, + "step": 10774 + }, + { + "epoch": 0.9836589373744751, + "grad_norm": 0.4684997797012329, + "learning_rate": 4.680590593468508e-06, + "loss": 0.6023, + "step": 10775 + }, + { + "epoch": 0.9837502282271317, + "grad_norm": 0.47937148809432983, + "learning_rate": 4.680532056545303e-06, + "loss": 0.5471, + "step": 10776 + }, + { + "epoch": 0.9838415190797882, + "grad_norm": 0.48710593581199646, + "learning_rate": 4.680473514624781e-06, + "loss": 0.519, + "step": 10777 + }, + { + "epoch": 0.9839328099324448, + "grad_norm": 0.5289419889450073, + "learning_rate": 4.680414967707077e-06, + "loss": 0.5109, + "step": 10778 + }, + { + "epoch": 0.9840241007851014, + "grad_norm": 0.4888526499271393, + "learning_rate": 4.680356415792326e-06, + "loss": 0.5935, + "step": 10779 + }, + { + "epoch": 0.9841153916377579, + "grad_norm": 0.506557822227478, + "learning_rate": 4.6802978588806605e-06, + "loss": 0.5113, + "step": 10780 + }, + { + "epoch": 0.9842066824904144, + "grad_norm": 0.4854324162006378, + "learning_rate": 4.680239296972217e-06, + "loss": 0.5146, + "step": 10781 + }, + { + "epoch": 0.984297973343071, + "grad_norm": 0.47878891229629517, + "learning_rate": 4.680180730067126e-06, + "loss": 0.5564, + "step": 10782 + }, + { + "epoch": 0.9843892641957276, + "grad_norm": 0.48888906836509705, + "learning_rate": 4.680122158165527e-06, + "loss": 0.5794, + "step": 10783 + }, + { + "epoch": 0.9844805550483842, + "grad_norm": 0.4804225564002991, + "learning_rate": 4.680063581267549e-06, + "loss": 0.5324, + "step": 10784 + }, + { + "epoch": 0.9845718459010407, + "grad_norm": 0.5052019357681274, + "learning_rate": 4.680004999373329e-06, + "loss": 0.4922, + "step": 10785 + }, + { + "epoch": 0.9846631367536973, + "grad_norm": 0.5277083516120911, + "learning_rate": 4.679946412483001e-06, + "loss": 0.5268, + "step": 10786 + }, + { + "epoch": 0.9847544276063538, + "grad_norm": 0.4493906795978546, + "learning_rate": 4.679887820596699e-06, + "loss": 0.5568, + "step": 10787 + }, + { + "epoch": 0.9848457184590104, + "grad_norm": 0.4490445852279663, + "learning_rate": 4.679829223714557e-06, + "loss": 0.5274, + "step": 10788 + }, + { + "epoch": 0.9849370093116669, + "grad_norm": 0.4830300509929657, + "learning_rate": 4.67977062183671e-06, + "loss": 0.4932, + "step": 10789 + }, + { + "epoch": 0.9850283001643235, + "grad_norm": 0.4905462861061096, + "learning_rate": 4.6797120149632925e-06, + "loss": 0.6023, + "step": 10790 + }, + { + "epoch": 0.9851195910169801, + "grad_norm": 0.48800408840179443, + "learning_rate": 4.679653403094437e-06, + "loss": 0.5326, + "step": 10791 + }, + { + "epoch": 0.9852108818696367, + "grad_norm": 0.48737236857414246, + "learning_rate": 4.67959478623028e-06, + "loss": 0.5484, + "step": 10792 + }, + { + "epoch": 0.9853021727222933, + "grad_norm": 0.4511359930038452, + "learning_rate": 4.679536164370955e-06, + "loss": 0.5451, + "step": 10793 + }, + { + "epoch": 0.9853934635749498, + "grad_norm": 0.47574442625045776, + "learning_rate": 4.679477537516595e-06, + "loss": 0.503, + "step": 10794 + }, + { + "epoch": 0.9854847544276063, + "grad_norm": 0.4862334728240967, + "learning_rate": 4.6794189056673365e-06, + "loss": 0.5662, + "step": 10795 + }, + { + "epoch": 0.9855760452802629, + "grad_norm": 0.5090768337249756, + "learning_rate": 4.679360268823313e-06, + "loss": 0.5427, + "step": 10796 + }, + { + "epoch": 0.9856673361329195, + "grad_norm": 0.5237883925437927, + "learning_rate": 4.679301626984659e-06, + "loss": 0.4977, + "step": 10797 + }, + { + "epoch": 0.985758626985576, + "grad_norm": 0.4965606927871704, + "learning_rate": 4.6792429801515084e-06, + "loss": 0.5635, + "step": 10798 + }, + { + "epoch": 0.9858499178382326, + "grad_norm": 0.4769175052642822, + "learning_rate": 4.679184328323996e-06, + "loss": 0.5511, + "step": 10799 + }, + { + "epoch": 0.9859412086908892, + "grad_norm": 0.492256760597229, + "learning_rate": 4.679125671502256e-06, + "loss": 0.5537, + "step": 10800 + }, + { + "epoch": 0.9860324995435458, + "grad_norm": 0.4515639841556549, + "learning_rate": 4.6790670096864235e-06, + "loss": 0.5768, + "step": 10801 + }, + { + "epoch": 0.9861237903962023, + "grad_norm": 0.4922903776168823, + "learning_rate": 4.679008342876632e-06, + "loss": 0.5478, + "step": 10802 + }, + { + "epoch": 0.9862150812488588, + "grad_norm": 0.49059635400772095, + "learning_rate": 4.6789496710730164e-06, + "loss": 0.5339, + "step": 10803 + }, + { + "epoch": 0.9863063721015154, + "grad_norm": 0.46528857946395874, + "learning_rate": 4.678890994275711e-06, + "loss": 0.5721, + "step": 10804 + }, + { + "epoch": 0.986397662954172, + "grad_norm": 0.4795750379562378, + "learning_rate": 4.6788323124848504e-06, + "loss": 0.5632, + "step": 10805 + }, + { + "epoch": 0.9864889538068286, + "grad_norm": 0.46383869647979736, + "learning_rate": 4.67877362570057e-06, + "loss": 0.542, + "step": 10806 + }, + { + "epoch": 0.9865802446594851, + "grad_norm": 0.4978409707546234, + "learning_rate": 4.678714933923003e-06, + "loss": 0.5399, + "step": 10807 + }, + { + "epoch": 0.9866715355121417, + "grad_norm": 0.4805450737476349, + "learning_rate": 4.678656237152284e-06, + "loss": 0.5373, + "step": 10808 + }, + { + "epoch": 0.9867628263647983, + "grad_norm": 0.48498356342315674, + "learning_rate": 4.678597535388548e-06, + "loss": 0.566, + "step": 10809 + }, + { + "epoch": 0.9868541172174549, + "grad_norm": 0.48760688304901123, + "learning_rate": 4.678538828631929e-06, + "loss": 0.5167, + "step": 10810 + }, + { + "epoch": 0.9869454080701113, + "grad_norm": 0.4887996017932892, + "learning_rate": 4.678480116882563e-06, + "loss": 0.5161, + "step": 10811 + }, + { + "epoch": 0.9870366989227679, + "grad_norm": 0.4775770306587219, + "learning_rate": 4.6784214001405826e-06, + "loss": 0.5271, + "step": 10812 + }, + { + "epoch": 0.9871279897754245, + "grad_norm": 0.5046222805976868, + "learning_rate": 4.678362678406123e-06, + "loss": 0.5134, + "step": 10813 + }, + { + "epoch": 0.9872192806280811, + "grad_norm": 0.48667651414871216, + "learning_rate": 4.67830395167932e-06, + "loss": 0.5331, + "step": 10814 + }, + { + "epoch": 0.9873105714807376, + "grad_norm": 0.4758765995502472, + "learning_rate": 4.678245219960307e-06, + "loss": 0.5436, + "step": 10815 + }, + { + "epoch": 0.9874018623333942, + "grad_norm": 0.4912208318710327, + "learning_rate": 4.678186483249218e-06, + "loss": 0.524, + "step": 10816 + }, + { + "epoch": 0.9874931531860508, + "grad_norm": 0.4795245826244354, + "learning_rate": 4.678127741546189e-06, + "loss": 0.5814, + "step": 10817 + }, + { + "epoch": 0.9875844440387074, + "grad_norm": 0.4616451561450958, + "learning_rate": 4.678068994851354e-06, + "loss": 0.5584, + "step": 10818 + }, + { + "epoch": 0.9876757348913638, + "grad_norm": 0.4833577573299408, + "learning_rate": 4.678010243164848e-06, + "loss": 0.5467, + "step": 10819 + }, + { + "epoch": 0.9877670257440204, + "grad_norm": 0.48190274834632874, + "learning_rate": 4.677951486486805e-06, + "loss": 0.5546, + "step": 10820 + }, + { + "epoch": 0.987858316596677, + "grad_norm": 0.4440569281578064, + "learning_rate": 4.67789272481736e-06, + "loss": 0.5776, + "step": 10821 + }, + { + "epoch": 0.9879496074493336, + "grad_norm": 0.49275684356689453, + "learning_rate": 4.677833958156647e-06, + "loss": 0.5543, + "step": 10822 + }, + { + "epoch": 0.9880408983019902, + "grad_norm": 0.4827674329280853, + "learning_rate": 4.677775186504802e-06, + "loss": 0.5333, + "step": 10823 + }, + { + "epoch": 0.9881321891546467, + "grad_norm": 0.505998432636261, + "learning_rate": 4.677716409861958e-06, + "loss": 0.5497, + "step": 10824 + }, + { + "epoch": 0.9882234800073033, + "grad_norm": 0.4766296148300171, + "learning_rate": 4.677657628228253e-06, + "loss": 0.5927, + "step": 10825 + }, + { + "epoch": 0.9883147708599598, + "grad_norm": 0.47851383686065674, + "learning_rate": 4.677598841603818e-06, + "loss": 0.5345, + "step": 10826 + }, + { + "epoch": 0.9884060617126164, + "grad_norm": 0.49508705735206604, + "learning_rate": 4.6775400499887894e-06, + "loss": 0.5407, + "step": 10827 + }, + { + "epoch": 0.9884973525652729, + "grad_norm": 0.4638800323009491, + "learning_rate": 4.677481253383301e-06, + "loss": 0.5681, + "step": 10828 + }, + { + "epoch": 0.9885886434179295, + "grad_norm": 0.488236665725708, + "learning_rate": 4.67742245178749e-06, + "loss": 0.5419, + "step": 10829 + }, + { + "epoch": 0.9886799342705861, + "grad_norm": 0.5007890462875366, + "learning_rate": 4.6773636452014884e-06, + "loss": 0.5354, + "step": 10830 + }, + { + "epoch": 0.9887712251232427, + "grad_norm": 0.4523685574531555, + "learning_rate": 4.6773048336254315e-06, + "loss": 0.5584, + "step": 10831 + }, + { + "epoch": 0.9888625159758992, + "grad_norm": 0.4841128885746002, + "learning_rate": 4.677246017059454e-06, + "loss": 0.5297, + "step": 10832 + }, + { + "epoch": 0.9889538068285558, + "grad_norm": 0.49388203024864197, + "learning_rate": 4.677187195503693e-06, + "loss": 0.5554, + "step": 10833 + }, + { + "epoch": 0.9890450976812123, + "grad_norm": 0.5202513337135315, + "learning_rate": 4.6771283689582815e-06, + "loss": 0.4959, + "step": 10834 + }, + { + "epoch": 0.9891363885338689, + "grad_norm": 0.48231226205825806, + "learning_rate": 4.6770695374233534e-06, + "loss": 0.5297, + "step": 10835 + }, + { + "epoch": 0.9892276793865254, + "grad_norm": 0.48473644256591797, + "learning_rate": 4.677010700899045e-06, + "loss": 0.5443, + "step": 10836 + }, + { + "epoch": 0.989318970239182, + "grad_norm": 0.49545612931251526, + "learning_rate": 4.67695185938549e-06, + "loss": 0.5643, + "step": 10837 + }, + { + "epoch": 0.9894102610918386, + "grad_norm": 0.489624947309494, + "learning_rate": 4.676893012882825e-06, + "loss": 0.5585, + "step": 10838 + }, + { + "epoch": 0.9895015519444952, + "grad_norm": 0.49332287907600403, + "learning_rate": 4.676834161391183e-06, + "loss": 0.5422, + "step": 10839 + }, + { + "epoch": 0.9895928427971518, + "grad_norm": 0.4972606301307678, + "learning_rate": 4.6767753049107e-06, + "loss": 0.5366, + "step": 10840 + }, + { + "epoch": 0.9896841336498083, + "grad_norm": 0.49009227752685547, + "learning_rate": 4.67671644344151e-06, + "loss": 0.5423, + "step": 10841 + }, + { + "epoch": 0.9897754245024648, + "grad_norm": 0.48014405369758606, + "learning_rate": 4.676657576983749e-06, + "loss": 0.5593, + "step": 10842 + }, + { + "epoch": 0.9898667153551214, + "grad_norm": 0.5064048767089844, + "learning_rate": 4.676598705537552e-06, + "loss": 0.5372, + "step": 10843 + }, + { + "epoch": 0.989958006207778, + "grad_norm": 0.4548211693763733, + "learning_rate": 4.676539829103051e-06, + "loss": 0.5571, + "step": 10844 + }, + { + "epoch": 0.9900492970604345, + "grad_norm": 0.5008878111839294, + "learning_rate": 4.676480947680386e-06, + "loss": 0.5228, + "step": 10845 + }, + { + "epoch": 0.9901405879130911, + "grad_norm": 0.501171886920929, + "learning_rate": 4.676422061269688e-06, + "loss": 0.5138, + "step": 10846 + }, + { + "epoch": 0.9902318787657477, + "grad_norm": 0.4982953667640686, + "learning_rate": 4.6763631698710924e-06, + "loss": 0.5498, + "step": 10847 + }, + { + "epoch": 0.9903231696184043, + "grad_norm": 0.5070527791976929, + "learning_rate": 4.676304273484735e-06, + "loss": 0.5495, + "step": 10848 + }, + { + "epoch": 0.9904144604710609, + "grad_norm": 0.47161197662353516, + "learning_rate": 4.676245372110751e-06, + "loss": 0.5645, + "step": 10849 + }, + { + "epoch": 0.9905057513237173, + "grad_norm": 0.4748864471912384, + "learning_rate": 4.676186465749276e-06, + "loss": 0.5432, + "step": 10850 + }, + { + "epoch": 0.9905970421763739, + "grad_norm": 0.4853319525718689, + "learning_rate": 4.676127554400443e-06, + "loss": 0.5715, + "step": 10851 + }, + { + "epoch": 0.9906883330290305, + "grad_norm": 0.5020993947982788, + "learning_rate": 4.676068638064388e-06, + "loss": 0.5078, + "step": 10852 + }, + { + "epoch": 0.990779623881687, + "grad_norm": 0.5044651031494141, + "learning_rate": 4.676009716741247e-06, + "loss": 0.5252, + "step": 10853 + }, + { + "epoch": 0.9908709147343436, + "grad_norm": 0.48288020491600037, + "learning_rate": 4.675950790431153e-06, + "loss": 0.5396, + "step": 10854 + }, + { + "epoch": 0.9909622055870002, + "grad_norm": 0.494132936000824, + "learning_rate": 4.675891859134243e-06, + "loss": 0.5414, + "step": 10855 + }, + { + "epoch": 0.9910534964396568, + "grad_norm": 0.45461609959602356, + "learning_rate": 4.675832922850651e-06, + "loss": 0.5566, + "step": 10856 + }, + { + "epoch": 0.9911447872923133, + "grad_norm": 0.4966484606266022, + "learning_rate": 4.675773981580511e-06, + "loss": 0.4933, + "step": 10857 + }, + { + "epoch": 0.9912360781449698, + "grad_norm": 0.4921632707118988, + "learning_rate": 4.675715035323961e-06, + "loss": 0.5416, + "step": 10858 + }, + { + "epoch": 0.9913273689976264, + "grad_norm": 0.5070950984954834, + "learning_rate": 4.675656084081133e-06, + "loss": 0.5083, + "step": 10859 + }, + { + "epoch": 0.991418659850283, + "grad_norm": 0.4629581570625305, + "learning_rate": 4.675597127852165e-06, + "loss": 0.5399, + "step": 10860 + }, + { + "epoch": 0.9915099507029396, + "grad_norm": 0.47559860348701477, + "learning_rate": 4.67553816663719e-06, + "loss": 0.5972, + "step": 10861 + }, + { + "epoch": 0.9916012415555961, + "grad_norm": 0.5035008788108826, + "learning_rate": 4.675479200436344e-06, + "loss": 0.5625, + "step": 10862 + }, + { + "epoch": 0.9916925324082527, + "grad_norm": 0.50284743309021, + "learning_rate": 4.675420229249762e-06, + "loss": 0.5697, + "step": 10863 + }, + { + "epoch": 0.9917838232609093, + "grad_norm": 0.47405093908309937, + "learning_rate": 4.675361253077579e-06, + "loss": 0.5538, + "step": 10864 + }, + { + "epoch": 0.9918751141135658, + "grad_norm": 0.49442389607429504, + "learning_rate": 4.6753022719199305e-06, + "loss": 0.5281, + "step": 10865 + }, + { + "epoch": 0.9919664049662223, + "grad_norm": 0.4942016303539276, + "learning_rate": 4.67524328577695e-06, + "loss": 0.511, + "step": 10866 + }, + { + "epoch": 0.9920576958188789, + "grad_norm": 0.4659757614135742, + "learning_rate": 4.675184294648776e-06, + "loss": 0.5447, + "step": 10867 + }, + { + "epoch": 0.9921489866715355, + "grad_norm": 0.4759533107280731, + "learning_rate": 4.675125298535541e-06, + "loss": 0.5417, + "step": 10868 + }, + { + "epoch": 0.9922402775241921, + "grad_norm": 0.5032129287719727, + "learning_rate": 4.67506629743738e-06, + "loss": 0.5364, + "step": 10869 + }, + { + "epoch": 0.9923315683768487, + "grad_norm": 0.4616597592830658, + "learning_rate": 4.675007291354431e-06, + "loss": 0.5492, + "step": 10870 + }, + { + "epoch": 0.9924228592295052, + "grad_norm": 0.4815647602081299, + "learning_rate": 4.674948280286827e-06, + "loss": 0.5671, + "step": 10871 + }, + { + "epoch": 0.9925141500821618, + "grad_norm": 0.46966344118118286, + "learning_rate": 4.6748892642347034e-06, + "loss": 0.574, + "step": 10872 + }, + { + "epoch": 0.9926054409348183, + "grad_norm": 0.47122684121131897, + "learning_rate": 4.674830243198196e-06, + "loss": 0.5148, + "step": 10873 + }, + { + "epoch": 0.9926967317874749, + "grad_norm": 0.5129715204238892, + "learning_rate": 4.67477121717744e-06, + "loss": 0.5122, + "step": 10874 + }, + { + "epoch": 0.9927880226401314, + "grad_norm": 0.4833076000213623, + "learning_rate": 4.67471218617257e-06, + "loss": 0.5485, + "step": 10875 + }, + { + "epoch": 0.992879313492788, + "grad_norm": 0.4895798861980438, + "learning_rate": 4.6746531501837214e-06, + "loss": 0.5723, + "step": 10876 + }, + { + "epoch": 0.9929706043454446, + "grad_norm": 0.4691166579723358, + "learning_rate": 4.6745941092110304e-06, + "loss": 0.5616, + "step": 10877 + }, + { + "epoch": 0.9930618951981012, + "grad_norm": 0.49492907524108887, + "learning_rate": 4.674535063254631e-06, + "loss": 0.5229, + "step": 10878 + }, + { + "epoch": 0.9931531860507578, + "grad_norm": 0.5003103613853455, + "learning_rate": 4.6744760123146606e-06, + "loss": 0.5417, + "step": 10879 + }, + { + "epoch": 0.9932444769034143, + "grad_norm": 0.5143778324127197, + "learning_rate": 4.674416956391252e-06, + "loss": 0.4813, + "step": 10880 + }, + { + "epoch": 0.9933357677560708, + "grad_norm": 0.4666283428668976, + "learning_rate": 4.674357895484543e-06, + "loss": 0.5951, + "step": 10881 + }, + { + "epoch": 0.9934270586087274, + "grad_norm": 0.472025990486145, + "learning_rate": 4.6742988295946664e-06, + "loss": 0.5352, + "step": 10882 + }, + { + "epoch": 0.993518349461384, + "grad_norm": 0.4856340289115906, + "learning_rate": 4.674239758721761e-06, + "loss": 0.502, + "step": 10883 + }, + { + "epoch": 0.9936096403140405, + "grad_norm": 0.5005359053611755, + "learning_rate": 4.674180682865958e-06, + "loss": 0.5644, + "step": 10884 + }, + { + "epoch": 0.9937009311666971, + "grad_norm": 0.46628743410110474, + "learning_rate": 4.674121602027396e-06, + "loss": 0.5599, + "step": 10885 + }, + { + "epoch": 0.9937922220193537, + "grad_norm": 0.4904373288154602, + "learning_rate": 4.674062516206208e-06, + "loss": 0.5136, + "step": 10886 + }, + { + "epoch": 0.9938835128720103, + "grad_norm": 0.49293437600135803, + "learning_rate": 4.674003425402532e-06, + "loss": 0.5729, + "step": 10887 + }, + { + "epoch": 0.9939748037246667, + "grad_norm": 0.4579869210720062, + "learning_rate": 4.6739443296165015e-06, + "loss": 0.5572, + "step": 10888 + }, + { + "epoch": 0.9940660945773233, + "grad_norm": 0.4913942813873291, + "learning_rate": 4.673885228848253e-06, + "loss": 0.519, + "step": 10889 + }, + { + "epoch": 0.9941573854299799, + "grad_norm": 0.46450671553611755, + "learning_rate": 4.673826123097921e-06, + "loss": 0.5386, + "step": 10890 + }, + { + "epoch": 0.9942486762826365, + "grad_norm": 0.45144522190093994, + "learning_rate": 4.6737670123656415e-06, + "loss": 0.6216, + "step": 10891 + }, + { + "epoch": 0.994339967135293, + "grad_norm": 0.4993046224117279, + "learning_rate": 4.67370789665155e-06, + "loss": 0.5436, + "step": 10892 + }, + { + "epoch": 0.9944312579879496, + "grad_norm": 0.49387413263320923, + "learning_rate": 4.673648775955782e-06, + "loss": 0.4826, + "step": 10893 + }, + { + "epoch": 0.9945225488406062, + "grad_norm": 0.49911725521087646, + "learning_rate": 4.673589650278473e-06, + "loss": 0.5316, + "step": 10894 + }, + { + "epoch": 0.9946138396932628, + "grad_norm": 0.4570023715496063, + "learning_rate": 4.6735305196197576e-06, + "loss": 0.5704, + "step": 10895 + }, + { + "epoch": 0.9947051305459192, + "grad_norm": 0.4599436819553375, + "learning_rate": 4.673471383979773e-06, + "loss": 0.5262, + "step": 10896 + }, + { + "epoch": 0.9947964213985758, + "grad_norm": 0.49289071559906006, + "learning_rate": 4.673412243358654e-06, + "loss": 0.5241, + "step": 10897 + }, + { + "epoch": 0.9948877122512324, + "grad_norm": 0.49062198400497437, + "learning_rate": 4.673353097756535e-06, + "loss": 0.5558, + "step": 10898 + }, + { + "epoch": 0.994979003103889, + "grad_norm": 0.4888291656970978, + "learning_rate": 4.673293947173553e-06, + "loss": 0.5539, + "step": 10899 + }, + { + "epoch": 0.9950702939565456, + "grad_norm": 0.4929935038089752, + "learning_rate": 4.673234791609843e-06, + "loss": 0.5177, + "step": 10900 + }, + { + "epoch": 0.9951615848092021, + "grad_norm": 0.4762588441371918, + "learning_rate": 4.673175631065541e-06, + "loss": 0.5382, + "step": 10901 + }, + { + "epoch": 0.9952528756618587, + "grad_norm": 0.4485534727573395, + "learning_rate": 4.6731164655407815e-06, + "loss": 0.5506, + "step": 10902 + }, + { + "epoch": 0.9953441665145153, + "grad_norm": 0.4969484806060791, + "learning_rate": 4.6730572950357016e-06, + "loss": 0.5169, + "step": 10903 + }, + { + "epoch": 0.9954354573671718, + "grad_norm": 0.4948281943798065, + "learning_rate": 4.6729981195504355e-06, + "loss": 0.5256, + "step": 10904 + }, + { + "epoch": 0.9955267482198283, + "grad_norm": 0.4367150664329529, + "learning_rate": 4.67293893908512e-06, + "loss": 0.508, + "step": 10905 + }, + { + "epoch": 0.9956180390724849, + "grad_norm": 0.4892383813858032, + "learning_rate": 4.67287975363989e-06, + "loss": 0.5478, + "step": 10906 + }, + { + "epoch": 0.9957093299251415, + "grad_norm": 0.4836331009864807, + "learning_rate": 4.672820563214881e-06, + "loss": 0.5081, + "step": 10907 + }, + { + "epoch": 0.9958006207777981, + "grad_norm": 0.4761894643306732, + "learning_rate": 4.672761367810229e-06, + "loss": 0.5296, + "step": 10908 + }, + { + "epoch": 0.9958919116304547, + "grad_norm": 0.4620759189128876, + "learning_rate": 4.67270216742607e-06, + "loss": 0.5746, + "step": 10909 + }, + { + "epoch": 0.9959832024831112, + "grad_norm": 0.499761164188385, + "learning_rate": 4.672642962062539e-06, + "loss": 0.5313, + "step": 10910 + }, + { + "epoch": 0.9960744933357678, + "grad_norm": 0.4707532227039337, + "learning_rate": 4.672583751719772e-06, + "loss": 0.5299, + "step": 10911 + }, + { + "epoch": 0.9961657841884243, + "grad_norm": 0.5076184272766113, + "learning_rate": 4.672524536397905e-06, + "loss": 0.5688, + "step": 10912 + }, + { + "epoch": 0.9962570750410809, + "grad_norm": 0.49606701731681824, + "learning_rate": 4.672465316097073e-06, + "loss": 0.5433, + "step": 10913 + }, + { + "epoch": 0.9963483658937374, + "grad_norm": 0.4766024351119995, + "learning_rate": 4.672406090817413e-06, + "loss": 0.5378, + "step": 10914 + }, + { + "epoch": 0.996439656746394, + "grad_norm": 0.48641857504844666, + "learning_rate": 4.672346860559059e-06, + "loss": 0.5123, + "step": 10915 + }, + { + "epoch": 0.9965309475990506, + "grad_norm": 0.4778951108455658, + "learning_rate": 4.672287625322148e-06, + "loss": 0.5332, + "step": 10916 + }, + { + "epoch": 0.9966222384517072, + "grad_norm": 0.48347508907318115, + "learning_rate": 4.672228385106815e-06, + "loss": 0.5388, + "step": 10917 + }, + { + "epoch": 0.9967135293043637, + "grad_norm": 0.46429598331451416, + "learning_rate": 4.672169139913196e-06, + "loss": 0.584, + "step": 10918 + }, + { + "epoch": 0.9968048201570203, + "grad_norm": 0.5229578018188477, + "learning_rate": 4.672109889741428e-06, + "loss": 0.4906, + "step": 10919 + }, + { + "epoch": 0.9968961110096768, + "grad_norm": 0.4771415889263153, + "learning_rate": 4.672050634591645e-06, + "loss": 0.5378, + "step": 10920 + }, + { + "epoch": 0.9969874018623334, + "grad_norm": 0.4874388873577118, + "learning_rate": 4.671991374463983e-06, + "loss": 0.5602, + "step": 10921 + }, + { + "epoch": 0.99707869271499, + "grad_norm": 0.484784871339798, + "learning_rate": 4.671932109358579e-06, + "loss": 0.5499, + "step": 10922 + }, + { + "epoch": 0.9971699835676465, + "grad_norm": 0.49560344219207764, + "learning_rate": 4.671872839275567e-06, + "loss": 0.5433, + "step": 10923 + }, + { + "epoch": 0.9972612744203031, + "grad_norm": 0.4959307312965393, + "learning_rate": 4.671813564215085e-06, + "loss": 0.5377, + "step": 10924 + }, + { + "epoch": 0.9973525652729597, + "grad_norm": 0.48872488737106323, + "learning_rate": 4.6717542841772676e-06, + "loss": 0.5407, + "step": 10925 + }, + { + "epoch": 0.9974438561256163, + "grad_norm": 0.5155363082885742, + "learning_rate": 4.671694999162251e-06, + "loss": 0.5573, + "step": 10926 + }, + { + "epoch": 0.9975351469782727, + "grad_norm": 0.4623861312866211, + "learning_rate": 4.671635709170171e-06, + "loss": 0.5554, + "step": 10927 + }, + { + "epoch": 0.9976264378309293, + "grad_norm": 0.48253506422042847, + "learning_rate": 4.671576414201163e-06, + "loss": 0.5612, + "step": 10928 + }, + { + "epoch": 0.9977177286835859, + "grad_norm": 0.4663447439670563, + "learning_rate": 4.671517114255364e-06, + "loss": 0.5781, + "step": 10929 + }, + { + "epoch": 0.9978090195362425, + "grad_norm": 0.5050520896911621, + "learning_rate": 4.6714578093329075e-06, + "loss": 0.5388, + "step": 10930 + }, + { + "epoch": 0.997900310388899, + "grad_norm": 0.46409979462623596, + "learning_rate": 4.671398499433932e-06, + "loss": 0.5705, + "step": 10931 + }, + { + "epoch": 0.9979916012415556, + "grad_norm": 0.5261378884315491, + "learning_rate": 4.671339184558573e-06, + "loss": 0.4836, + "step": 10932 + }, + { + "epoch": 0.9980828920942122, + "grad_norm": 0.4845105707645416, + "learning_rate": 4.671279864706966e-06, + "loss": 0.5499, + "step": 10933 + }, + { + "epoch": 0.9981741829468688, + "grad_norm": 0.5130664110183716, + "learning_rate": 4.671220539879247e-06, + "loss": 0.5404, + "step": 10934 + }, + { + "epoch": 0.9982654737995252, + "grad_norm": 0.51614910364151, + "learning_rate": 4.671161210075552e-06, + "loss": 0.5338, + "step": 10935 + }, + { + "epoch": 0.9983567646521818, + "grad_norm": 0.48654040694236755, + "learning_rate": 4.671101875296016e-06, + "loss": 0.5539, + "step": 10936 + }, + { + "epoch": 0.9984480555048384, + "grad_norm": 0.48595577478408813, + "learning_rate": 4.671042535540776e-06, + "loss": 0.5527, + "step": 10937 + }, + { + "epoch": 0.998539346357495, + "grad_norm": 0.4631651043891907, + "learning_rate": 4.6709831908099685e-06, + "loss": 0.5646, + "step": 10938 + }, + { + "epoch": 0.9986306372101516, + "grad_norm": 0.47435590624809265, + "learning_rate": 4.6709238411037285e-06, + "loss": 0.5352, + "step": 10939 + }, + { + "epoch": 0.9987219280628081, + "grad_norm": 0.46382707357406616, + "learning_rate": 4.670864486422192e-06, + "loss": 0.5523, + "step": 10940 + }, + { + "epoch": 0.9988132189154647, + "grad_norm": 0.4923785328865051, + "learning_rate": 4.6708051267654954e-06, + "loss": 0.542, + "step": 10941 + }, + { + "epoch": 0.9989045097681213, + "grad_norm": 0.46123600006103516, + "learning_rate": 4.670745762133775e-06, + "loss": 0.5817, + "step": 10942 + }, + { + "epoch": 0.9989958006207778, + "grad_norm": 0.5153965353965759, + "learning_rate": 4.670686392527168e-06, + "loss": 0.5279, + "step": 10943 + }, + { + "epoch": 0.9990870914734343, + "grad_norm": 0.4420625865459442, + "learning_rate": 4.670627017945807e-06, + "loss": 0.5715, + "step": 10944 + }, + { + "epoch": 0.9991783823260909, + "grad_norm": 0.5122845768928528, + "learning_rate": 4.67056763838983e-06, + "loss": 0.5749, + "step": 10945 + }, + { + "epoch": 0.9992696731787475, + "grad_norm": 0.5117788314819336, + "learning_rate": 4.670508253859374e-06, + "loss": 0.5165, + "step": 10946 + }, + { + "epoch": 0.9993609640314041, + "grad_norm": 0.500876247882843, + "learning_rate": 4.670448864354574e-06, + "loss": 0.5266, + "step": 10947 + }, + { + "epoch": 0.9994522548840606, + "grad_norm": 0.5144382119178772, + "learning_rate": 4.670389469875567e-06, + "loss": 0.5125, + "step": 10948 + }, + { + "epoch": 0.9995435457367172, + "grad_norm": 0.48666390776634216, + "learning_rate": 4.670330070422488e-06, + "loss": 0.5458, + "step": 10949 + }, + { + "epoch": 0.9996348365893738, + "grad_norm": 0.47769641876220703, + "learning_rate": 4.670270665995474e-06, + "loss": 0.5337, + "step": 10950 + }, + { + "epoch": 0.9997261274420303, + "grad_norm": 0.5210157632827759, + "learning_rate": 4.67021125659466e-06, + "loss": 0.5435, + "step": 10951 + }, + { + "epoch": 0.9998174182946868, + "grad_norm": 0.5022791028022766, + "learning_rate": 4.670151842220184e-06, + "loss": 0.5318, + "step": 10952 + }, + { + "epoch": 0.9999087091473434, + "grad_norm": 0.507364809513092, + "learning_rate": 4.670092422872181e-06, + "loss": 0.5386, + "step": 10953 + }, + { + "epoch": 1.0, + "grad_norm": 0.4740588963031769, + "learning_rate": 4.670032998550786e-06, + "loss": 0.5557, + "step": 10954 + } + ], + "logging_steps": 1, + "max_steps": 65724, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 10954, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.089754312970024e+19, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +}