LA4SR-Pythia-12B-23000 / trainer_state.json
David Roy Nelson
Upload 11 files
757335f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.04182968498155174,
"eval_steps": 500,
"global_step": 23000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.818681955719641e-05,
"grad_norm": 2.1063554286956787,
"learning_rate": 0.0002,
"loss": 1.9357,
"step": 10
},
{
"epoch": 3.637363911439282e-05,
"grad_norm": 0.9359453320503235,
"learning_rate": 0.0002,
"loss": 0.2208,
"step": 20
},
{
"epoch": 5.4560458671589234e-05,
"grad_norm": 0.5420117378234863,
"learning_rate": 0.0002,
"loss": 0.1459,
"step": 30
},
{
"epoch": 7.274727822878565e-05,
"grad_norm": 0.05442357063293457,
"learning_rate": 0.0002,
"loss": 0.0823,
"step": 40
},
{
"epoch": 9.093409778598205e-05,
"grad_norm": 0.0005907653248868883,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 50
},
{
"epoch": 0.00010912091734317847,
"grad_norm": 0.26516178250312805,
"learning_rate": 0.0002,
"loss": 1.0686,
"step": 60
},
{
"epoch": 0.00012730773690037487,
"grad_norm": 0.44067099690437317,
"learning_rate": 0.0002,
"loss": 0.2613,
"step": 70
},
{
"epoch": 0.0001454945564575713,
"grad_norm": 0.09356075525283813,
"learning_rate": 0.0002,
"loss": 0.1415,
"step": 80
},
{
"epoch": 0.0001636813760147677,
"grad_norm": 0.017799921333789825,
"learning_rate": 0.0002,
"loss": 0.1013,
"step": 90
},
{
"epoch": 0.0001818681955719641,
"grad_norm": 0.0018534553237259388,
"learning_rate": 0.0002,
"loss": 0.0001,
"step": 100
},
{
"epoch": 0.00020005501512916052,
"grad_norm": 0.35472020506858826,
"learning_rate": 0.0002,
"loss": 0.73,
"step": 110
},
{
"epoch": 0.00021824183468635694,
"grad_norm": 0.3880878686904907,
"learning_rate": 0.0002,
"loss": 0.1424,
"step": 120
},
{
"epoch": 0.00023642865424355333,
"grad_norm": 0.19027432799339294,
"learning_rate": 0.0002,
"loss": 0.1173,
"step": 130
},
{
"epoch": 0.00025461547380074975,
"grad_norm": 0.019047321751713753,
"learning_rate": 0.0002,
"loss": 0.0977,
"step": 140
},
{
"epoch": 0.00027280229335794617,
"grad_norm": 0.0003795044613070786,
"learning_rate": 0.0002,
"loss": 0.0007,
"step": 150
},
{
"epoch": 0.0002909891129151426,
"grad_norm": 0.08740618824958801,
"learning_rate": 0.0002,
"loss": 0.801,
"step": 160
},
{
"epoch": 0.000309175932472339,
"grad_norm": 0.2661634087562561,
"learning_rate": 0.0002,
"loss": 0.1274,
"step": 170
},
{
"epoch": 0.0003273627520295354,
"grad_norm": 0.05828547850251198,
"learning_rate": 0.0002,
"loss": 0.1184,
"step": 180
},
{
"epoch": 0.0003455495715867318,
"grad_norm": 0.02175055630505085,
"learning_rate": 0.0002,
"loss": 0.0752,
"step": 190
},
{
"epoch": 0.0003637363911439282,
"grad_norm": 0.0009504792396910489,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 200
},
{
"epoch": 0.0003819232107011246,
"grad_norm": 0.25059741735458374,
"learning_rate": 0.0002,
"loss": 0.5125,
"step": 210
},
{
"epoch": 0.00040011003025832104,
"grad_norm": 0.13256193697452545,
"learning_rate": 0.0002,
"loss": 0.1014,
"step": 220
},
{
"epoch": 0.00041829684981551746,
"grad_norm": 0.09446375072002411,
"learning_rate": 0.0002,
"loss": 0.0896,
"step": 230
},
{
"epoch": 0.0004364836693727139,
"grad_norm": 0.019389621913433075,
"learning_rate": 0.0002,
"loss": 0.0726,
"step": 240
},
{
"epoch": 0.0004546704889299103,
"grad_norm": 0.0032304900232702494,
"learning_rate": 0.0002,
"loss": 0.0023,
"step": 250
},
{
"epoch": 0.00047285730848710666,
"grad_norm": 2.5549609661102295,
"learning_rate": 0.0002,
"loss": 0.3884,
"step": 260
},
{
"epoch": 0.0004910441280443031,
"grad_norm": 0.44937047362327576,
"learning_rate": 0.0002,
"loss": 0.1071,
"step": 270
},
{
"epoch": 0.0005092309476014995,
"grad_norm": 0.1509999781847,
"learning_rate": 0.0002,
"loss": 0.0979,
"step": 280
},
{
"epoch": 0.0005274177671586959,
"grad_norm": 0.006468054372817278,
"learning_rate": 0.0002,
"loss": 0.0611,
"step": 290
},
{
"epoch": 0.0005456045867158923,
"grad_norm": 0.0002916739322245121,
"learning_rate": 0.0002,
"loss": 0.001,
"step": 300
},
{
"epoch": 0.0005637914062730887,
"grad_norm": 0.23081810772418976,
"learning_rate": 0.0002,
"loss": 0.5894,
"step": 310
},
{
"epoch": 0.0005819782258302852,
"grad_norm": 0.22755394876003265,
"learning_rate": 0.0002,
"loss": 0.114,
"step": 320
},
{
"epoch": 0.0006001650453874816,
"grad_norm": 0.49973106384277344,
"learning_rate": 0.0002,
"loss": 0.093,
"step": 330
},
{
"epoch": 0.000618351864944678,
"grad_norm": 0.08789435774087906,
"learning_rate": 0.0002,
"loss": 0.0745,
"step": 340
},
{
"epoch": 0.0006365386845018744,
"grad_norm": 0.0058497479185462,
"learning_rate": 0.0002,
"loss": 0.0007,
"step": 350
},
{
"epoch": 0.0006547255040590708,
"grad_norm": 0.30569636821746826,
"learning_rate": 0.0002,
"loss": 0.5169,
"step": 360
},
{
"epoch": 0.0006729123236162671,
"grad_norm": 0.2783024311065674,
"learning_rate": 0.0002,
"loss": 0.13,
"step": 370
},
{
"epoch": 0.0006910991431734636,
"grad_norm": 0.13052967190742493,
"learning_rate": 0.0002,
"loss": 0.0907,
"step": 380
},
{
"epoch": 0.00070928596273066,
"grad_norm": 0.15066476166248322,
"learning_rate": 0.0002,
"loss": 0.0996,
"step": 390
},
{
"epoch": 0.0007274727822878564,
"grad_norm": 0.0005865198327228427,
"learning_rate": 0.0002,
"loss": 0.0021,
"step": 400
},
{
"epoch": 0.0007456596018450528,
"grad_norm": 0.31872233748435974,
"learning_rate": 0.0002,
"loss": 0.4507,
"step": 410
},
{
"epoch": 0.0007638464214022492,
"grad_norm": 0.08874880522489548,
"learning_rate": 0.0002,
"loss": 0.136,
"step": 420
},
{
"epoch": 0.0007820332409594457,
"grad_norm": 0.10985178500413895,
"learning_rate": 0.0002,
"loss": 0.0992,
"step": 430
},
{
"epoch": 0.0008002200605166421,
"grad_norm": 0.10776215046644211,
"learning_rate": 0.0002,
"loss": 0.0661,
"step": 440
},
{
"epoch": 0.0008184068800738385,
"grad_norm": 0.006612936966121197,
"learning_rate": 0.0002,
"loss": 0.0009,
"step": 450
},
{
"epoch": 0.0008365936996310349,
"grad_norm": 0.2757071256637573,
"learning_rate": 0.0002,
"loss": 0.6376,
"step": 460
},
{
"epoch": 0.0008547805191882313,
"grad_norm": 0.24748466908931732,
"learning_rate": 0.0002,
"loss": 0.1241,
"step": 470
},
{
"epoch": 0.0008729673387454278,
"grad_norm": 0.1035066694021225,
"learning_rate": 0.0002,
"loss": 0.1008,
"step": 480
},
{
"epoch": 0.0008911541583026242,
"grad_norm": 0.06515783071517944,
"learning_rate": 0.0002,
"loss": 0.0711,
"step": 490
},
{
"epoch": 0.0009093409778598206,
"grad_norm": 0.011224807240068913,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 500
},
{
"epoch": 0.000927527797417017,
"grad_norm": 0.2669332027435303,
"learning_rate": 0.0002,
"loss": 0.5618,
"step": 510
},
{
"epoch": 0.0009457146169742133,
"grad_norm": 0.26048392057418823,
"learning_rate": 0.0002,
"loss": 0.1259,
"step": 520
},
{
"epoch": 0.0009639014365314097,
"grad_norm": 0.22928836941719055,
"learning_rate": 0.0002,
"loss": 0.0956,
"step": 530
},
{
"epoch": 0.0009820882560886062,
"grad_norm": 0.084063321352005,
"learning_rate": 0.0002,
"loss": 0.0708,
"step": 540
},
{
"epoch": 0.0010002750756458027,
"grad_norm": 0.004612344317138195,
"learning_rate": 0.0002,
"loss": 0.0007,
"step": 550
},
{
"epoch": 0.001018461895202999,
"grad_norm": 0.3866584599018097,
"learning_rate": 0.0002,
"loss": 0.5406,
"step": 560
},
{
"epoch": 0.0010366487147601955,
"grad_norm": 0.32303065061569214,
"learning_rate": 0.0002,
"loss": 0.1001,
"step": 570
},
{
"epoch": 0.0010548355343173918,
"grad_norm": 0.09439560770988464,
"learning_rate": 0.0002,
"loss": 0.1051,
"step": 580
},
{
"epoch": 0.0010730223538745881,
"grad_norm": 0.028145521879196167,
"learning_rate": 0.0002,
"loss": 0.0638,
"step": 590
},
{
"epoch": 0.0010912091734317847,
"grad_norm": 0.00048497263924218714,
"learning_rate": 0.0002,
"loss": 0.002,
"step": 600
},
{
"epoch": 0.001109395992988981,
"grad_norm": 0.32391539216041565,
"learning_rate": 0.0002,
"loss": 0.5483,
"step": 610
},
{
"epoch": 0.0011275828125461775,
"grad_norm": 0.02977031283080578,
"learning_rate": 0.0002,
"loss": 0.1264,
"step": 620
},
{
"epoch": 0.0011457696321033738,
"grad_norm": 0.07332426309585571,
"learning_rate": 0.0002,
"loss": 0.1018,
"step": 630
},
{
"epoch": 0.0011639564516605703,
"grad_norm": 0.05653443560004234,
"learning_rate": 0.0002,
"loss": 0.0666,
"step": 640
},
{
"epoch": 0.0011821432712177666,
"grad_norm": 0.0010635281214490533,
"learning_rate": 0.0002,
"loss": 0.0009,
"step": 650
},
{
"epoch": 0.0012003300907749632,
"grad_norm": 0.04933600872755051,
"learning_rate": 0.0002,
"loss": 0.3902,
"step": 660
},
{
"epoch": 0.0012185169103321595,
"grad_norm": 0.14713574945926666,
"learning_rate": 0.0002,
"loss": 0.0905,
"step": 670
},
{
"epoch": 0.001236703729889356,
"grad_norm": 0.05463952198624611,
"learning_rate": 0.0002,
"loss": 0.0909,
"step": 680
},
{
"epoch": 0.0012548905494465523,
"grad_norm": 0.10299955308437347,
"learning_rate": 0.0002,
"loss": 0.07,
"step": 690
},
{
"epoch": 0.0012730773690037488,
"grad_norm": 0.022791124880313873,
"learning_rate": 0.0002,
"loss": 0.0027,
"step": 700
},
{
"epoch": 0.0012912641885609452,
"grad_norm": 0.27977490425109863,
"learning_rate": 0.0002,
"loss": 0.4421,
"step": 710
},
{
"epoch": 0.0013094510081181417,
"grad_norm": 0.2346329241991043,
"learning_rate": 0.0002,
"loss": 0.1263,
"step": 720
},
{
"epoch": 0.001327637827675338,
"grad_norm": 0.09294597059488297,
"learning_rate": 0.0002,
"loss": 0.096,
"step": 730
},
{
"epoch": 0.0013458246472325343,
"grad_norm": 0.10317150503396988,
"learning_rate": 0.0002,
"loss": 0.0727,
"step": 740
},
{
"epoch": 0.0013640114667897308,
"grad_norm": 0.001372635131701827,
"learning_rate": 0.0002,
"loss": 0.001,
"step": 750
},
{
"epoch": 0.0013821982863469271,
"grad_norm": 0.10563486814498901,
"learning_rate": 0.0002,
"loss": 0.596,
"step": 760
},
{
"epoch": 0.0014003851059041237,
"grad_norm": 0.14429838955402374,
"learning_rate": 0.0002,
"loss": 0.1178,
"step": 770
},
{
"epoch": 0.00141857192546132,
"grad_norm": 0.0848163515329361,
"learning_rate": 0.0002,
"loss": 0.1008,
"step": 780
},
{
"epoch": 0.0014367587450185165,
"grad_norm": 0.07259710133075714,
"learning_rate": 0.0002,
"loss": 0.069,
"step": 790
},
{
"epoch": 0.0014549455645757128,
"grad_norm": 0.0019098519114777446,
"learning_rate": 0.0002,
"loss": 0.0023,
"step": 800
},
{
"epoch": 0.0014731323841329093,
"grad_norm": 0.2433256059885025,
"learning_rate": 0.0002,
"loss": 0.2937,
"step": 810
},
{
"epoch": 0.0014913192036901056,
"grad_norm": 0.04093409329652786,
"learning_rate": 0.0002,
"loss": 0.1133,
"step": 820
},
{
"epoch": 0.0015095060232473022,
"grad_norm": 0.0480966717004776,
"learning_rate": 0.0002,
"loss": 0.0969,
"step": 830
},
{
"epoch": 0.0015276928428044985,
"grad_norm": 0.14327965676784515,
"learning_rate": 0.0002,
"loss": 0.0866,
"step": 840
},
{
"epoch": 0.001545879662361695,
"grad_norm": 0.001585015095770359,
"learning_rate": 0.0002,
"loss": 0.0042,
"step": 850
},
{
"epoch": 0.0015640664819188913,
"grad_norm": 0.1842886209487915,
"learning_rate": 0.0002,
"loss": 0.3273,
"step": 860
},
{
"epoch": 0.0015822533014760878,
"grad_norm": 0.09671049565076828,
"learning_rate": 0.0002,
"loss": 0.1079,
"step": 870
},
{
"epoch": 0.0016004401210332842,
"grad_norm": 0.2730088233947754,
"learning_rate": 0.0002,
"loss": 0.1018,
"step": 880
},
{
"epoch": 0.0016186269405904805,
"grad_norm": 0.11702803522348404,
"learning_rate": 0.0002,
"loss": 0.0758,
"step": 890
},
{
"epoch": 0.001636813760147677,
"grad_norm": 0.004438066389411688,
"learning_rate": 0.0002,
"loss": 0.0033,
"step": 900
},
{
"epoch": 0.0016550005797048733,
"grad_norm": 0.18424616754055023,
"learning_rate": 0.0002,
"loss": 0.4028,
"step": 910
},
{
"epoch": 0.0016731873992620698,
"grad_norm": 0.12502820789813995,
"learning_rate": 0.0002,
"loss": 0.0979,
"step": 920
},
{
"epoch": 0.0016913742188192661,
"grad_norm": 0.05109328031539917,
"learning_rate": 0.0002,
"loss": 0.0889,
"step": 930
},
{
"epoch": 0.0017095610383764627,
"grad_norm": 0.18566183745861053,
"learning_rate": 0.0002,
"loss": 0.0833,
"step": 940
},
{
"epoch": 0.001727747857933659,
"grad_norm": 0.0012954511912539601,
"learning_rate": 0.0002,
"loss": 0.0029,
"step": 950
},
{
"epoch": 0.0017459346774908555,
"grad_norm": 0.06683014333248138,
"learning_rate": 0.0002,
"loss": 0.4614,
"step": 960
},
{
"epoch": 0.0017641214970480518,
"grad_norm": 0.27773013710975647,
"learning_rate": 0.0002,
"loss": 0.1131,
"step": 970
},
{
"epoch": 0.0017823083166052483,
"grad_norm": 0.1999790072441101,
"learning_rate": 0.0002,
"loss": 0.089,
"step": 980
},
{
"epoch": 0.0018004951361624446,
"grad_norm": 0.09625103324651718,
"learning_rate": 0.0002,
"loss": 0.0739,
"step": 990
},
{
"epoch": 0.0018186819557196412,
"grad_norm": 0.005470380187034607,
"learning_rate": 0.0002,
"loss": 0.0012,
"step": 1000
},
{
"epoch": 0.0018368687752768375,
"grad_norm": 0.038832616060972214,
"learning_rate": 0.0002,
"loss": 0.5521,
"step": 1010
},
{
"epoch": 0.001855055594834034,
"grad_norm": 0.1903093159198761,
"learning_rate": 0.0002,
"loss": 0.1237,
"step": 1020
},
{
"epoch": 0.0018732424143912303,
"grad_norm": 0.031102774664759636,
"learning_rate": 0.0002,
"loss": 0.0866,
"step": 1030
},
{
"epoch": 0.0018914292339484266,
"grad_norm": 0.043983202427625656,
"learning_rate": 0.0002,
"loss": 0.0611,
"step": 1040
},
{
"epoch": 0.0019096160535056232,
"grad_norm": 0.0002974902163259685,
"learning_rate": 0.0002,
"loss": 0.0035,
"step": 1050
},
{
"epoch": 0.0019278028730628195,
"grad_norm": 0.1936149299144745,
"learning_rate": 0.0002,
"loss": 0.3019,
"step": 1060
},
{
"epoch": 0.001945989692620016,
"grad_norm": 0.15767355263233185,
"learning_rate": 0.0002,
"loss": 0.108,
"step": 1070
},
{
"epoch": 0.0019641765121772123,
"grad_norm": 0.08244495838880539,
"learning_rate": 0.0002,
"loss": 0.091,
"step": 1080
},
{
"epoch": 0.0019823633317344086,
"grad_norm": 0.15848897397518158,
"learning_rate": 0.0002,
"loss": 0.0655,
"step": 1090
},
{
"epoch": 0.0020005501512916054,
"grad_norm": 0.0011951205087825656,
"learning_rate": 0.0002,
"loss": 0.0052,
"step": 1100
},
{
"epoch": 0.0020187369708488017,
"grad_norm": 0.13027112185955048,
"learning_rate": 0.0002,
"loss": 0.2943,
"step": 1110
},
{
"epoch": 0.002036923790405998,
"grad_norm": 0.19413979351520538,
"learning_rate": 0.0002,
"loss": 0.1329,
"step": 1120
},
{
"epoch": 0.0020551106099631943,
"grad_norm": 0.08515465259552002,
"learning_rate": 0.0002,
"loss": 0.0921,
"step": 1130
},
{
"epoch": 0.002073297429520391,
"grad_norm": 0.1244177296757698,
"learning_rate": 0.0002,
"loss": 0.0678,
"step": 1140
},
{
"epoch": 0.0020914842490775873,
"grad_norm": 0.0016714326338842511,
"learning_rate": 0.0002,
"loss": 0.0035,
"step": 1150
},
{
"epoch": 0.0021096710686347836,
"grad_norm": 0.24979737401008606,
"learning_rate": 0.0002,
"loss": 0.2643,
"step": 1160
},
{
"epoch": 0.00212785788819198,
"grad_norm": 0.14143353700637817,
"learning_rate": 0.0002,
"loss": 0.1037,
"step": 1170
},
{
"epoch": 0.0021460447077491763,
"grad_norm": 0.033794257789850235,
"learning_rate": 0.0002,
"loss": 0.087,
"step": 1180
},
{
"epoch": 0.002164231527306373,
"grad_norm": 0.11503162235021591,
"learning_rate": 0.0002,
"loss": 0.0659,
"step": 1190
},
{
"epoch": 0.0021824183468635693,
"grad_norm": 0.0014654065016657114,
"learning_rate": 0.0002,
"loss": 0.0056,
"step": 1200
},
{
"epoch": 0.0022006051664207656,
"grad_norm": 0.13292767107486725,
"learning_rate": 0.0002,
"loss": 0.2956,
"step": 1210
},
{
"epoch": 0.002218791985977962,
"grad_norm": 0.15238040685653687,
"learning_rate": 0.0002,
"loss": 0.1122,
"step": 1220
},
{
"epoch": 0.0022369788055351587,
"grad_norm": 0.045078523457050323,
"learning_rate": 0.0002,
"loss": 0.091,
"step": 1230
},
{
"epoch": 0.002255165625092355,
"grad_norm": 0.11438468098640442,
"learning_rate": 0.0002,
"loss": 0.0754,
"step": 1240
},
{
"epoch": 0.0022733524446495513,
"grad_norm": 0.001236733514815569,
"learning_rate": 0.0002,
"loss": 0.004,
"step": 1250
},
{
"epoch": 0.0022915392642067476,
"grad_norm": 0.23386552929878235,
"learning_rate": 0.0002,
"loss": 0.351,
"step": 1260
},
{
"epoch": 0.0023097260837639444,
"grad_norm": 0.030786139890551567,
"learning_rate": 0.0002,
"loss": 0.1074,
"step": 1270
},
{
"epoch": 0.0023279129033211407,
"grad_norm": 0.150347501039505,
"learning_rate": 0.0002,
"loss": 0.1064,
"step": 1280
},
{
"epoch": 0.002346099722878337,
"grad_norm": 0.1402382105588913,
"learning_rate": 0.0002,
"loss": 0.0675,
"step": 1290
},
{
"epoch": 0.0023642865424355333,
"grad_norm": 0.0006117303855717182,
"learning_rate": 0.0002,
"loss": 0.0031,
"step": 1300
},
{
"epoch": 0.00238247336199273,
"grad_norm": 0.16031372547149658,
"learning_rate": 0.0002,
"loss": 0.4344,
"step": 1310
},
{
"epoch": 0.0024006601815499263,
"grad_norm": 0.11017303168773651,
"learning_rate": 0.0002,
"loss": 0.1147,
"step": 1320
},
{
"epoch": 0.0024188470011071227,
"grad_norm": 0.055746905505657196,
"learning_rate": 0.0002,
"loss": 0.093,
"step": 1330
},
{
"epoch": 0.002437033820664319,
"grad_norm": 0.09806664288043976,
"learning_rate": 0.0002,
"loss": 0.0682,
"step": 1340
},
{
"epoch": 0.0024552206402215153,
"grad_norm": 0.000555588339921087,
"learning_rate": 0.0002,
"loss": 0.0045,
"step": 1350
},
{
"epoch": 0.002473407459778712,
"grad_norm": 0.04899182915687561,
"learning_rate": 0.0002,
"loss": 0.3454,
"step": 1360
},
{
"epoch": 0.0024915942793359083,
"grad_norm": 0.02870030514895916,
"learning_rate": 0.0002,
"loss": 0.1036,
"step": 1370
},
{
"epoch": 0.0025097810988931046,
"grad_norm": 0.08591730147600174,
"learning_rate": 0.0002,
"loss": 0.0962,
"step": 1380
},
{
"epoch": 0.002527967918450301,
"grad_norm": 0.1169242337346077,
"learning_rate": 0.0002,
"loss": 0.0627,
"step": 1390
},
{
"epoch": 0.0025461547380074977,
"grad_norm": 0.0008637752034701407,
"learning_rate": 0.0002,
"loss": 0.0025,
"step": 1400
},
{
"epoch": 0.002564341557564694,
"grad_norm": 0.11741841584444046,
"learning_rate": 0.0002,
"loss": 0.3703,
"step": 1410
},
{
"epoch": 0.0025825283771218903,
"grad_norm": 0.05232485383749008,
"learning_rate": 0.0002,
"loss": 0.1072,
"step": 1420
},
{
"epoch": 0.0026007151966790866,
"grad_norm": 0.025201110169291496,
"learning_rate": 0.0002,
"loss": 0.0893,
"step": 1430
},
{
"epoch": 0.0026189020162362834,
"grad_norm": 0.11462239921092987,
"learning_rate": 0.0002,
"loss": 0.0826,
"step": 1440
},
{
"epoch": 0.0026370888357934797,
"grad_norm": 0.002194227883592248,
"learning_rate": 0.0002,
"loss": 0.0049,
"step": 1450
},
{
"epoch": 0.002655275655350676,
"grad_norm": 0.05786404758691788,
"learning_rate": 0.0002,
"loss": 0.3187,
"step": 1460
},
{
"epoch": 0.0026734624749078723,
"grad_norm": 0.03776915743947029,
"learning_rate": 0.0002,
"loss": 0.1002,
"step": 1470
},
{
"epoch": 0.0026916492944650686,
"grad_norm": 0.08628734946250916,
"learning_rate": 0.0002,
"loss": 0.0933,
"step": 1480
},
{
"epoch": 0.0027098361140222653,
"grad_norm": 0.0933455228805542,
"learning_rate": 0.0002,
"loss": 0.0712,
"step": 1490
},
{
"epoch": 0.0027280229335794617,
"grad_norm": 0.0007446192903444171,
"learning_rate": 0.0002,
"loss": 0.003,
"step": 1500
},
{
"epoch": 0.002746209753136658,
"grad_norm": 0.04412281885743141,
"learning_rate": 0.0002,
"loss": 0.3738,
"step": 1510
},
{
"epoch": 0.0027643965726938543,
"grad_norm": 0.04729326814413071,
"learning_rate": 0.0002,
"loss": 0.1015,
"step": 1520
},
{
"epoch": 0.002782583392251051,
"grad_norm": 0.04822024703025818,
"learning_rate": 0.0002,
"loss": 0.0913,
"step": 1530
},
{
"epoch": 0.0028007702118082473,
"grad_norm": 0.15468090772628784,
"learning_rate": 0.0002,
"loss": 0.0794,
"step": 1540
},
{
"epoch": 0.0028189570313654436,
"grad_norm": 0.0011828596470877528,
"learning_rate": 0.0002,
"loss": 0.0089,
"step": 1550
},
{
"epoch": 0.00283714385092264,
"grad_norm": 0.030639037489891052,
"learning_rate": 0.0002,
"loss": 0.3382,
"step": 1560
},
{
"epoch": 0.0028553306704798367,
"grad_norm": 0.08429472148418427,
"learning_rate": 0.0002,
"loss": 0.1075,
"step": 1570
},
{
"epoch": 0.002873517490037033,
"grad_norm": 0.056431323289871216,
"learning_rate": 0.0002,
"loss": 0.0946,
"step": 1580
},
{
"epoch": 0.0028917043095942293,
"grad_norm": 0.1799512803554535,
"learning_rate": 0.0002,
"loss": 0.0795,
"step": 1590
},
{
"epoch": 0.0029098911291514256,
"grad_norm": 0.0018818675307556987,
"learning_rate": 0.0002,
"loss": 0.0082,
"step": 1600
},
{
"epoch": 0.002928077948708622,
"grad_norm": 0.061398155987262726,
"learning_rate": 0.0002,
"loss": 0.3414,
"step": 1610
},
{
"epoch": 0.0029462647682658187,
"grad_norm": 0.0657019093632698,
"learning_rate": 0.0002,
"loss": 0.1082,
"step": 1620
},
{
"epoch": 0.002964451587823015,
"grad_norm": 0.04701487720012665,
"learning_rate": 0.0002,
"loss": 0.0918,
"step": 1630
},
{
"epoch": 0.0029826384073802113,
"grad_norm": 0.1834430694580078,
"learning_rate": 0.0002,
"loss": 0.081,
"step": 1640
},
{
"epoch": 0.0030008252269374076,
"grad_norm": 0.004841644782572985,
"learning_rate": 0.0002,
"loss": 0.0138,
"step": 1650
},
{
"epoch": 0.0030190120464946043,
"grad_norm": 0.05793444439768791,
"learning_rate": 0.0002,
"loss": 0.2981,
"step": 1660
},
{
"epoch": 0.0030371988660518007,
"grad_norm": 0.049123138189315796,
"learning_rate": 0.0002,
"loss": 0.1072,
"step": 1670
},
{
"epoch": 0.003055385685608997,
"grad_norm": 0.033852141350507736,
"learning_rate": 0.0002,
"loss": 0.093,
"step": 1680
},
{
"epoch": 0.0030735725051661933,
"grad_norm": 0.16161279380321503,
"learning_rate": 0.0002,
"loss": 0.084,
"step": 1690
},
{
"epoch": 0.00309175932472339,
"grad_norm": 0.0011225020280107856,
"learning_rate": 0.0002,
"loss": 0.0059,
"step": 1700
},
{
"epoch": 0.0031099461442805863,
"grad_norm": 0.05849582701921463,
"learning_rate": 0.0002,
"loss": 0.3878,
"step": 1710
},
{
"epoch": 0.0031281329638377826,
"grad_norm": 0.033466637134552,
"learning_rate": 0.0002,
"loss": 0.1096,
"step": 1720
},
{
"epoch": 0.003146319783394979,
"grad_norm": 0.03488466143608093,
"learning_rate": 0.0002,
"loss": 0.0895,
"step": 1730
},
{
"epoch": 0.0031645066029521757,
"grad_norm": 0.15636079013347626,
"learning_rate": 0.0002,
"loss": 0.0716,
"step": 1740
},
{
"epoch": 0.003182693422509372,
"grad_norm": 0.001519509358331561,
"learning_rate": 0.0002,
"loss": 0.0062,
"step": 1750
},
{
"epoch": 0.0032008802420665683,
"grad_norm": 0.04979783296585083,
"learning_rate": 0.0002,
"loss": 0.3409,
"step": 1760
},
{
"epoch": 0.0032190670616237646,
"grad_norm": 0.09706272929906845,
"learning_rate": 0.0002,
"loss": 0.1052,
"step": 1770
},
{
"epoch": 0.003237253881180961,
"grad_norm": 0.08768483251333237,
"learning_rate": 0.0002,
"loss": 0.0938,
"step": 1780
},
{
"epoch": 0.0032554407007381577,
"grad_norm": 0.20421457290649414,
"learning_rate": 0.0002,
"loss": 0.085,
"step": 1790
},
{
"epoch": 0.003273627520295354,
"grad_norm": 0.0024727964773774147,
"learning_rate": 0.0002,
"loss": 0.0147,
"step": 1800
},
{
"epoch": 0.0032918143398525503,
"grad_norm": 0.04270516335964203,
"learning_rate": 0.0002,
"loss": 0.2872,
"step": 1810
},
{
"epoch": 0.0033100011594097466,
"grad_norm": 0.08055799454450607,
"learning_rate": 0.0002,
"loss": 0.0992,
"step": 1820
},
{
"epoch": 0.0033281879789669433,
"grad_norm": 0.02607434056699276,
"learning_rate": 0.0002,
"loss": 0.0803,
"step": 1830
},
{
"epoch": 0.0033463747985241397,
"grad_norm": 0.16260816156864166,
"learning_rate": 0.0002,
"loss": 0.0753,
"step": 1840
},
{
"epoch": 0.003364561618081336,
"grad_norm": 0.004690333269536495,
"learning_rate": 0.0002,
"loss": 0.012,
"step": 1850
},
{
"epoch": 0.0033827484376385323,
"grad_norm": 0.041513338685035706,
"learning_rate": 0.0002,
"loss": 0.2491,
"step": 1860
},
{
"epoch": 0.003400935257195729,
"grad_norm": 0.08935420960187912,
"learning_rate": 0.0002,
"loss": 0.1001,
"step": 1870
},
{
"epoch": 0.0034191220767529253,
"grad_norm": 0.03826737776398659,
"learning_rate": 0.0002,
"loss": 0.0877,
"step": 1880
},
{
"epoch": 0.0034373088963101216,
"grad_norm": 0.19423778355121613,
"learning_rate": 0.0002,
"loss": 0.0797,
"step": 1890
},
{
"epoch": 0.003455495715867318,
"grad_norm": 0.003520288970321417,
"learning_rate": 0.0002,
"loss": 0.013,
"step": 1900
},
{
"epoch": 0.0034736825354245143,
"grad_norm": 0.14648132026195526,
"learning_rate": 0.0002,
"loss": 0.3209,
"step": 1910
},
{
"epoch": 0.003491869354981711,
"grad_norm": 0.03780071437358856,
"learning_rate": 0.0002,
"loss": 0.0934,
"step": 1920
},
{
"epoch": 0.0035100561745389073,
"grad_norm": 0.05014612153172493,
"learning_rate": 0.0002,
"loss": 0.082,
"step": 1930
},
{
"epoch": 0.0035282429940961036,
"grad_norm": 0.12917590141296387,
"learning_rate": 0.0002,
"loss": 0.0733,
"step": 1940
},
{
"epoch": 0.0035464298136533,
"grad_norm": 0.0030132795218378305,
"learning_rate": 0.0002,
"loss": 0.0111,
"step": 1950
},
{
"epoch": 0.0035646166332104967,
"grad_norm": 0.03008626028895378,
"learning_rate": 0.0002,
"loss": 0.2126,
"step": 1960
},
{
"epoch": 0.003582803452767693,
"grad_norm": 0.0915503203868866,
"learning_rate": 0.0002,
"loss": 0.1097,
"step": 1970
},
{
"epoch": 0.0036009902723248893,
"grad_norm": 0.06607015430927277,
"learning_rate": 0.0002,
"loss": 0.0932,
"step": 1980
},
{
"epoch": 0.0036191770918820856,
"grad_norm": 0.18796613812446594,
"learning_rate": 0.0002,
"loss": 0.083,
"step": 1990
},
{
"epoch": 0.0036373639114392823,
"grad_norm": 0.0022257096134126186,
"learning_rate": 0.0002,
"loss": 0.0147,
"step": 2000
},
{
"epoch": 0.0036555507309964787,
"grad_norm": 0.0687415823340416,
"learning_rate": 0.0002,
"loss": 0.2604,
"step": 2010
},
{
"epoch": 0.003673737550553675,
"grad_norm": 0.025175679475069046,
"learning_rate": 0.0002,
"loss": 0.0998,
"step": 2020
},
{
"epoch": 0.0036919243701108713,
"grad_norm": 0.04275168478488922,
"learning_rate": 0.0002,
"loss": 0.0898,
"step": 2030
},
{
"epoch": 0.003710111189668068,
"grad_norm": 0.17306455969810486,
"learning_rate": 0.0002,
"loss": 0.0793,
"step": 2040
},
{
"epoch": 0.0037282980092252643,
"grad_norm": 0.007826454006135464,
"learning_rate": 0.0002,
"loss": 0.011,
"step": 2050
},
{
"epoch": 0.0037464848287824606,
"grad_norm": 0.06461178511381149,
"learning_rate": 0.0002,
"loss": 0.2597,
"step": 2060
},
{
"epoch": 0.003764671648339657,
"grad_norm": 0.061357177793979645,
"learning_rate": 0.0002,
"loss": 0.1001,
"step": 2070
},
{
"epoch": 0.0037828584678968533,
"grad_norm": 0.029154235497117043,
"learning_rate": 0.0002,
"loss": 0.0859,
"step": 2080
},
{
"epoch": 0.00380104528745405,
"grad_norm": 0.1350340098142624,
"learning_rate": 0.0002,
"loss": 0.0756,
"step": 2090
},
{
"epoch": 0.0038192321070112463,
"grad_norm": 0.0017614173702895641,
"learning_rate": 0.0002,
"loss": 0.0058,
"step": 2100
},
{
"epoch": 0.0038374189265684426,
"grad_norm": 0.024254316464066505,
"learning_rate": 0.0002,
"loss": 0.3349,
"step": 2110
},
{
"epoch": 0.003855605746125639,
"grad_norm": 0.07142530381679535,
"learning_rate": 0.0002,
"loss": 0.0953,
"step": 2120
},
{
"epoch": 0.0038737925656828357,
"grad_norm": 0.05570175498723984,
"learning_rate": 0.0002,
"loss": 0.0796,
"step": 2130
},
{
"epoch": 0.003891979385240032,
"grad_norm": 0.16996875405311584,
"learning_rate": 0.0002,
"loss": 0.0782,
"step": 2140
},
{
"epoch": 0.003910166204797228,
"grad_norm": 0.0058751595206558704,
"learning_rate": 0.0002,
"loss": 0.0206,
"step": 2150
},
{
"epoch": 0.003928353024354425,
"grad_norm": 0.029807811602950096,
"learning_rate": 0.0002,
"loss": 0.1926,
"step": 2160
},
{
"epoch": 0.003946539843911621,
"grad_norm": 0.11123469471931458,
"learning_rate": 0.0002,
"loss": 0.1082,
"step": 2170
},
{
"epoch": 0.003964726663468817,
"grad_norm": 0.074626125395298,
"learning_rate": 0.0002,
"loss": 0.081,
"step": 2180
},
{
"epoch": 0.003982913483026014,
"grad_norm": 0.17397737503051758,
"learning_rate": 0.0002,
"loss": 0.0729,
"step": 2190
},
{
"epoch": 0.004001100302583211,
"grad_norm": 0.007995887659490108,
"learning_rate": 0.0002,
"loss": 0.022,
"step": 2200
},
{
"epoch": 0.004019287122140407,
"grad_norm": 0.039921898394823074,
"learning_rate": 0.0002,
"loss": 0.1883,
"step": 2210
},
{
"epoch": 0.004037473941697603,
"grad_norm": 0.07736324518918991,
"learning_rate": 0.0002,
"loss": 0.0941,
"step": 2220
},
{
"epoch": 0.0040556607612548,
"grad_norm": 0.0867881178855896,
"learning_rate": 0.0002,
"loss": 0.0873,
"step": 2230
},
{
"epoch": 0.004073847580811996,
"grad_norm": 0.1497400403022766,
"learning_rate": 0.0002,
"loss": 0.0829,
"step": 2240
},
{
"epoch": 0.004092034400369192,
"grad_norm": 0.007458314299583435,
"learning_rate": 0.0002,
"loss": 0.02,
"step": 2250
},
{
"epoch": 0.004110221219926389,
"grad_norm": 0.04168029874563217,
"learning_rate": 0.0002,
"loss": 0.2176,
"step": 2260
},
{
"epoch": 0.004128408039483585,
"grad_norm": 0.10017130523920059,
"learning_rate": 0.0002,
"loss": 0.0958,
"step": 2270
},
{
"epoch": 0.004146594859040782,
"grad_norm": 0.02727416157722473,
"learning_rate": 0.0002,
"loss": 0.088,
"step": 2280
},
{
"epoch": 0.004164781678597978,
"grad_norm": 0.15034393966197968,
"learning_rate": 0.0002,
"loss": 0.0826,
"step": 2290
},
{
"epoch": 0.004182968498155175,
"grad_norm": 0.0023451410233974457,
"learning_rate": 0.0002,
"loss": 0.0102,
"step": 2300
},
{
"epoch": 0.004201155317712371,
"grad_norm": 0.03462455794215202,
"learning_rate": 0.0002,
"loss": 0.3404,
"step": 2310
},
{
"epoch": 0.004219342137269567,
"grad_norm": 0.02866148017346859,
"learning_rate": 0.0002,
"loss": 0.0932,
"step": 2320
},
{
"epoch": 0.004237528956826764,
"grad_norm": 0.0685456171631813,
"learning_rate": 0.0002,
"loss": 0.0806,
"step": 2330
},
{
"epoch": 0.00425571577638396,
"grad_norm": 0.17208056151866913,
"learning_rate": 0.0002,
"loss": 0.0826,
"step": 2340
},
{
"epoch": 0.004273902595941156,
"grad_norm": 0.008708455599844456,
"learning_rate": 0.0002,
"loss": 0.0171,
"step": 2350
},
{
"epoch": 0.0042920894154983525,
"grad_norm": 0.044025715440511703,
"learning_rate": 0.0002,
"loss": 0.212,
"step": 2360
},
{
"epoch": 0.00431027623505555,
"grad_norm": 0.050246164202690125,
"learning_rate": 0.0002,
"loss": 0.107,
"step": 2370
},
{
"epoch": 0.004328463054612746,
"grad_norm": 0.05257886275649071,
"learning_rate": 0.0002,
"loss": 0.0868,
"step": 2380
},
{
"epoch": 0.004346649874169942,
"grad_norm": 0.16567641496658325,
"learning_rate": 0.0002,
"loss": 0.0819,
"step": 2390
},
{
"epoch": 0.004364836693727139,
"grad_norm": 0.0062621901743113995,
"learning_rate": 0.0002,
"loss": 0.0171,
"step": 2400
},
{
"epoch": 0.004383023513284335,
"grad_norm": 0.03025338612496853,
"learning_rate": 0.0002,
"loss": 0.2141,
"step": 2410
},
{
"epoch": 0.004401210332841531,
"grad_norm": 0.06401577591896057,
"learning_rate": 0.0002,
"loss": 0.0982,
"step": 2420
},
{
"epoch": 0.004419397152398728,
"grad_norm": 0.12474781274795532,
"learning_rate": 0.0002,
"loss": 0.0834,
"step": 2430
},
{
"epoch": 0.004437583971955924,
"grad_norm": 0.18607665598392487,
"learning_rate": 0.0002,
"loss": 0.0801,
"step": 2440
},
{
"epoch": 0.004455770791513121,
"grad_norm": 0.0017643098253756762,
"learning_rate": 0.0002,
"loss": 0.0129,
"step": 2450
},
{
"epoch": 0.004473957611070317,
"grad_norm": 0.03936386480927467,
"learning_rate": 0.0002,
"loss": 0.2541,
"step": 2460
},
{
"epoch": 0.004492144430627514,
"grad_norm": 0.08961635082960129,
"learning_rate": 0.0002,
"loss": 0.0961,
"step": 2470
},
{
"epoch": 0.00451033125018471,
"grad_norm": 0.07525113970041275,
"learning_rate": 0.0002,
"loss": 0.0844,
"step": 2480
},
{
"epoch": 0.004528518069741906,
"grad_norm": 0.16746751964092255,
"learning_rate": 0.0002,
"loss": 0.071,
"step": 2490
},
{
"epoch": 0.004546704889299103,
"grad_norm": 0.0027625334914773703,
"learning_rate": 0.0002,
"loss": 0.0151,
"step": 2500
},
{
"epoch": 0.004564891708856299,
"grad_norm": 0.049662694334983826,
"learning_rate": 0.0002,
"loss": 0.253,
"step": 2510
},
{
"epoch": 0.004583078528413495,
"grad_norm": 0.08312079310417175,
"learning_rate": 0.0002,
"loss": 0.0922,
"step": 2520
},
{
"epoch": 0.0046012653479706915,
"grad_norm": 0.0646345317363739,
"learning_rate": 0.0002,
"loss": 0.0889,
"step": 2530
},
{
"epoch": 0.004619452167527889,
"grad_norm": 0.20036271214485168,
"learning_rate": 0.0002,
"loss": 0.081,
"step": 2540
},
{
"epoch": 0.004637638987085085,
"grad_norm": 0.010091719217598438,
"learning_rate": 0.0002,
"loss": 0.024,
"step": 2550
},
{
"epoch": 0.004655825806642281,
"grad_norm": 0.048885516822338104,
"learning_rate": 0.0002,
"loss": 0.184,
"step": 2560
},
{
"epoch": 0.004674012626199478,
"grad_norm": 0.09142889827489853,
"learning_rate": 0.0002,
"loss": 0.0935,
"step": 2570
},
{
"epoch": 0.004692199445756674,
"grad_norm": 0.049207963049411774,
"learning_rate": 0.0002,
"loss": 0.0816,
"step": 2580
},
{
"epoch": 0.00471038626531387,
"grad_norm": 0.1498396098613739,
"learning_rate": 0.0002,
"loss": 0.0698,
"step": 2590
},
{
"epoch": 0.004728573084871067,
"grad_norm": 0.00522881094366312,
"learning_rate": 0.0002,
"loss": 0.0189,
"step": 2600
},
{
"epoch": 0.004746759904428263,
"grad_norm": 0.07461311668157578,
"learning_rate": 0.0002,
"loss": 0.1944,
"step": 2610
},
{
"epoch": 0.00476494672398546,
"grad_norm": 0.048005711287260056,
"learning_rate": 0.0002,
"loss": 0.0883,
"step": 2620
},
{
"epoch": 0.004783133543542656,
"grad_norm": 0.10151612013578415,
"learning_rate": 0.0002,
"loss": 0.0827,
"step": 2630
},
{
"epoch": 0.004801320363099853,
"grad_norm": 0.1504422426223755,
"learning_rate": 0.0002,
"loss": 0.078,
"step": 2640
},
{
"epoch": 0.004819507182657049,
"grad_norm": 0.004988422151654959,
"learning_rate": 0.0002,
"loss": 0.0229,
"step": 2650
},
{
"epoch": 0.004837694002214245,
"grad_norm": 0.025008924305438995,
"learning_rate": 0.0002,
"loss": 0.1818,
"step": 2660
},
{
"epoch": 0.004855880821771442,
"grad_norm": 0.027460169047117233,
"learning_rate": 0.0002,
"loss": 0.0966,
"step": 2670
},
{
"epoch": 0.004874067641328638,
"grad_norm": 0.09704197943210602,
"learning_rate": 0.0002,
"loss": 0.0824,
"step": 2680
},
{
"epoch": 0.004892254460885834,
"grad_norm": 0.138654425740242,
"learning_rate": 0.0002,
"loss": 0.0746,
"step": 2690
},
{
"epoch": 0.0049104412804430305,
"grad_norm": 0.00859556533396244,
"learning_rate": 0.0002,
"loss": 0.0187,
"step": 2700
},
{
"epoch": 0.004928628100000228,
"grad_norm": 0.05207522585988045,
"learning_rate": 0.0002,
"loss": 0.1985,
"step": 2710
},
{
"epoch": 0.004946814919557424,
"grad_norm": 0.07787417620420456,
"learning_rate": 0.0002,
"loss": 0.101,
"step": 2720
},
{
"epoch": 0.00496500173911462,
"grad_norm": 0.02819981426000595,
"learning_rate": 0.0002,
"loss": 0.0845,
"step": 2730
},
{
"epoch": 0.004983188558671817,
"grad_norm": 0.13569314777851105,
"learning_rate": 0.0002,
"loss": 0.0756,
"step": 2740
},
{
"epoch": 0.005001375378229013,
"grad_norm": 0.05175986513495445,
"learning_rate": 0.0002,
"loss": 0.024,
"step": 2750
},
{
"epoch": 0.005019562197786209,
"grad_norm": 0.037230249494314194,
"learning_rate": 0.0002,
"loss": 0.2056,
"step": 2760
},
{
"epoch": 0.005037749017343406,
"grad_norm": 0.05532974749803543,
"learning_rate": 0.0002,
"loss": 0.0939,
"step": 2770
},
{
"epoch": 0.005055935836900602,
"grad_norm": 0.06930708140134811,
"learning_rate": 0.0002,
"loss": 0.0853,
"step": 2780
},
{
"epoch": 0.005074122656457798,
"grad_norm": 0.16405801475048065,
"learning_rate": 0.0002,
"loss": 0.0766,
"step": 2790
},
{
"epoch": 0.005092309476014995,
"grad_norm": 0.006398684345185757,
"learning_rate": 0.0002,
"loss": 0.0124,
"step": 2800
},
{
"epoch": 0.005110496295572192,
"grad_norm": 0.06269315630197525,
"learning_rate": 0.0002,
"loss": 0.2703,
"step": 2810
},
{
"epoch": 0.005128683115129388,
"grad_norm": 0.049293261021375656,
"learning_rate": 0.0002,
"loss": 0.0943,
"step": 2820
},
{
"epoch": 0.005146869934686584,
"grad_norm": 0.08814405649900436,
"learning_rate": 0.0002,
"loss": 0.0855,
"step": 2830
},
{
"epoch": 0.005165056754243781,
"grad_norm": 0.17452259361743927,
"learning_rate": 0.0002,
"loss": 0.0822,
"step": 2840
},
{
"epoch": 0.005183243573800977,
"grad_norm": 0.005008229520171881,
"learning_rate": 0.0002,
"loss": 0.0136,
"step": 2850
},
{
"epoch": 0.005201430393358173,
"grad_norm": 0.04459540545940399,
"learning_rate": 0.0002,
"loss": 0.2623,
"step": 2860
},
{
"epoch": 0.0052196172129153695,
"grad_norm": 0.042845603078603745,
"learning_rate": 0.0002,
"loss": 0.0929,
"step": 2870
},
{
"epoch": 0.005237804032472567,
"grad_norm": 0.03079635463654995,
"learning_rate": 0.0002,
"loss": 0.0844,
"step": 2880
},
{
"epoch": 0.005255990852029763,
"grad_norm": 0.14457851648330688,
"learning_rate": 0.0002,
"loss": 0.0753,
"step": 2890
},
{
"epoch": 0.005274177671586959,
"grad_norm": 0.0009016963304020464,
"learning_rate": 0.0002,
"loss": 0.0037,
"step": 2900
},
{
"epoch": 0.005292364491144156,
"grad_norm": 0.0983906164765358,
"learning_rate": 0.0002,
"loss": 0.3661,
"step": 2910
},
{
"epoch": 0.005310551310701352,
"grad_norm": 0.08794154971837997,
"learning_rate": 0.0002,
"loss": 0.0894,
"step": 2920
},
{
"epoch": 0.005328738130258548,
"grad_norm": 0.026981573551893234,
"learning_rate": 0.0002,
"loss": 0.0779,
"step": 2930
},
{
"epoch": 0.005346924949815745,
"grad_norm": 0.15572553873062134,
"learning_rate": 0.0002,
"loss": 0.077,
"step": 2940
},
{
"epoch": 0.005365111769372941,
"grad_norm": 0.005491070915013552,
"learning_rate": 0.0002,
"loss": 0.0092,
"step": 2950
},
{
"epoch": 0.005383298588930137,
"grad_norm": 0.07383686304092407,
"learning_rate": 0.0002,
"loss": 0.2574,
"step": 2960
},
{
"epoch": 0.005401485408487334,
"grad_norm": 0.05919960141181946,
"learning_rate": 0.0002,
"loss": 0.1045,
"step": 2970
},
{
"epoch": 0.005419672228044531,
"grad_norm": 0.06027739867568016,
"learning_rate": 0.0002,
"loss": 0.0822,
"step": 2980
},
{
"epoch": 0.005437859047601727,
"grad_norm": 0.1288602501153946,
"learning_rate": 0.0002,
"loss": 0.0688,
"step": 2990
},
{
"epoch": 0.005456045867158923,
"grad_norm": 0.007565880194306374,
"learning_rate": 0.0002,
"loss": 0.0192,
"step": 3000
},
{
"epoch": 0.00547423268671612,
"grad_norm": 0.024412864819169044,
"learning_rate": 0.0002,
"loss": 0.1782,
"step": 3010
},
{
"epoch": 0.005492419506273316,
"grad_norm": 0.05559355765581131,
"learning_rate": 0.0002,
"loss": 0.1072,
"step": 3020
},
{
"epoch": 0.005510606325830512,
"grad_norm": 0.07073906064033508,
"learning_rate": 0.0002,
"loss": 0.0863,
"step": 3030
},
{
"epoch": 0.0055287931453877085,
"grad_norm": 0.14979414641857147,
"learning_rate": 0.0002,
"loss": 0.0788,
"step": 3040
},
{
"epoch": 0.005546979964944906,
"grad_norm": 0.0057297456078231335,
"learning_rate": 0.0002,
"loss": 0.0192,
"step": 3050
},
{
"epoch": 0.005565166784502102,
"grad_norm": 0.03195042535662651,
"learning_rate": 0.0002,
"loss": 0.1879,
"step": 3060
},
{
"epoch": 0.005583353604059298,
"grad_norm": 0.05925082787871361,
"learning_rate": 0.0002,
"loss": 0.0992,
"step": 3070
},
{
"epoch": 0.005601540423616495,
"grad_norm": 0.052063606679439545,
"learning_rate": 0.0002,
"loss": 0.0826,
"step": 3080
},
{
"epoch": 0.005619727243173691,
"grad_norm": 0.16005952656269073,
"learning_rate": 0.0002,
"loss": 0.0743,
"step": 3090
},
{
"epoch": 0.005637914062730887,
"grad_norm": 0.005742133595049381,
"learning_rate": 0.0002,
"loss": 0.0137,
"step": 3100
},
{
"epoch": 0.005656100882288084,
"grad_norm": 0.07523638010025024,
"learning_rate": 0.0002,
"loss": 0.2072,
"step": 3110
},
{
"epoch": 0.00567428770184528,
"grad_norm": 0.23799611628055573,
"learning_rate": 0.0002,
"loss": 0.0906,
"step": 3120
},
{
"epoch": 0.005692474521402476,
"grad_norm": 0.06176261603832245,
"learning_rate": 0.0002,
"loss": 0.088,
"step": 3130
},
{
"epoch": 0.005710661340959673,
"grad_norm": 0.13692723214626312,
"learning_rate": 0.0002,
"loss": 0.0733,
"step": 3140
},
{
"epoch": 0.00572884816051687,
"grad_norm": 0.007059803698211908,
"learning_rate": 0.0002,
"loss": 0.0194,
"step": 3150
},
{
"epoch": 0.005747034980074066,
"grad_norm": 0.08868405222892761,
"learning_rate": 0.0002,
"loss": 0.1745,
"step": 3160
},
{
"epoch": 0.005765221799631262,
"grad_norm": 0.05126733332872391,
"learning_rate": 0.0002,
"loss": 0.1024,
"step": 3170
},
{
"epoch": 0.005783408619188459,
"grad_norm": 0.06377821415662766,
"learning_rate": 0.0002,
"loss": 0.0846,
"step": 3180
},
{
"epoch": 0.005801595438745655,
"grad_norm": 0.10748566687107086,
"learning_rate": 0.0002,
"loss": 0.0769,
"step": 3190
},
{
"epoch": 0.005819782258302851,
"grad_norm": 0.004992443602532148,
"learning_rate": 0.0002,
"loss": 0.0114,
"step": 3200
},
{
"epoch": 0.0058379690778600475,
"grad_norm": 0.0420277863740921,
"learning_rate": 0.0002,
"loss": 0.2159,
"step": 3210
},
{
"epoch": 0.005856155897417244,
"grad_norm": 0.02828531712293625,
"learning_rate": 0.0002,
"loss": 0.0923,
"step": 3220
},
{
"epoch": 0.005874342716974441,
"grad_norm": 0.028216248378157616,
"learning_rate": 0.0002,
"loss": 0.0789,
"step": 3230
},
{
"epoch": 0.005892529536531637,
"grad_norm": 0.11420746147632599,
"learning_rate": 0.0002,
"loss": 0.0696,
"step": 3240
},
{
"epoch": 0.005910716356088834,
"grad_norm": 0.0019631448667496443,
"learning_rate": 0.0002,
"loss": 0.0128,
"step": 3250
},
{
"epoch": 0.00592890317564603,
"grad_norm": 0.05514012649655342,
"learning_rate": 0.0002,
"loss": 0.2609,
"step": 3260
},
{
"epoch": 0.005947089995203226,
"grad_norm": 0.0917636826634407,
"learning_rate": 0.0002,
"loss": 0.0996,
"step": 3270
},
{
"epoch": 0.005965276814760423,
"grad_norm": 0.03648284077644348,
"learning_rate": 0.0002,
"loss": 0.084,
"step": 3280
},
{
"epoch": 0.005983463634317619,
"grad_norm": 0.13859149813652039,
"learning_rate": 0.0002,
"loss": 0.0807,
"step": 3290
},
{
"epoch": 0.006001650453874815,
"grad_norm": 0.013779910281300545,
"learning_rate": 0.0002,
"loss": 0.0181,
"step": 3300
},
{
"epoch": 0.006019837273432012,
"grad_norm": 0.02654041163623333,
"learning_rate": 0.0002,
"loss": 0.1636,
"step": 3310
},
{
"epoch": 0.006038024092989209,
"grad_norm": 0.062298137694597244,
"learning_rate": 0.0002,
"loss": 0.0872,
"step": 3320
},
{
"epoch": 0.006056210912546405,
"grad_norm": 0.0351388119161129,
"learning_rate": 0.0002,
"loss": 0.0802,
"step": 3330
},
{
"epoch": 0.006074397732103601,
"grad_norm": 0.16063807904720306,
"learning_rate": 0.0002,
"loss": 0.0753,
"step": 3340
},
{
"epoch": 0.006092584551660798,
"grad_norm": 0.009991235099732876,
"learning_rate": 0.0002,
"loss": 0.016,
"step": 3350
},
{
"epoch": 0.006110771371217994,
"grad_norm": 0.052919622510671616,
"learning_rate": 0.0002,
"loss": 0.2027,
"step": 3360
},
{
"epoch": 0.00612895819077519,
"grad_norm": 0.03228602185845375,
"learning_rate": 0.0002,
"loss": 0.0985,
"step": 3370
},
{
"epoch": 0.0061471450103323865,
"grad_norm": 0.11311203986406326,
"learning_rate": 0.0002,
"loss": 0.0797,
"step": 3380
},
{
"epoch": 0.006165331829889583,
"grad_norm": 0.1674620360136032,
"learning_rate": 0.0002,
"loss": 0.072,
"step": 3390
},
{
"epoch": 0.00618351864944678,
"grad_norm": 0.015154430642724037,
"learning_rate": 0.0002,
"loss": 0.0186,
"step": 3400
},
{
"epoch": 0.006201705469003976,
"grad_norm": 0.043151434510946274,
"learning_rate": 0.0002,
"loss": 0.1892,
"step": 3410
},
{
"epoch": 0.006219892288561173,
"grad_norm": 0.12342707067728043,
"learning_rate": 0.0002,
"loss": 0.0907,
"step": 3420
},
{
"epoch": 0.006238079108118369,
"grad_norm": 0.08350827544927597,
"learning_rate": 0.0002,
"loss": 0.0783,
"step": 3430
},
{
"epoch": 0.006256265927675565,
"grad_norm": 0.11938697844743729,
"learning_rate": 0.0002,
"loss": 0.0666,
"step": 3440
},
{
"epoch": 0.006274452747232762,
"grad_norm": 0.015424132347106934,
"learning_rate": 0.0002,
"loss": 0.0173,
"step": 3450
},
{
"epoch": 0.006292639566789958,
"grad_norm": 0.04220043867826462,
"learning_rate": 0.0002,
"loss": 0.1805,
"step": 3460
},
{
"epoch": 0.006310826386347154,
"grad_norm": 0.08813903480768204,
"learning_rate": 0.0002,
"loss": 0.096,
"step": 3470
},
{
"epoch": 0.006329013205904351,
"grad_norm": 0.07647278904914856,
"learning_rate": 0.0002,
"loss": 0.0821,
"step": 3480
},
{
"epoch": 0.006347200025461548,
"grad_norm": 0.14242641627788544,
"learning_rate": 0.0002,
"loss": 0.0769,
"step": 3490
},
{
"epoch": 0.006365386845018744,
"grad_norm": 0.011115231551229954,
"learning_rate": 0.0002,
"loss": 0.0221,
"step": 3500
},
{
"epoch": 0.00638357366457594,
"grad_norm": 0.036351826041936874,
"learning_rate": 0.0002,
"loss": 0.1557,
"step": 3510
},
{
"epoch": 0.006401760484133137,
"grad_norm": 0.08549819141626358,
"learning_rate": 0.0002,
"loss": 0.0864,
"step": 3520
},
{
"epoch": 0.006419947303690333,
"grad_norm": 0.047141823917627335,
"learning_rate": 0.0002,
"loss": 0.079,
"step": 3530
},
{
"epoch": 0.006438134123247529,
"grad_norm": 0.13143447041511536,
"learning_rate": 0.0002,
"loss": 0.0717,
"step": 3540
},
{
"epoch": 0.0064563209428047256,
"grad_norm": 0.013524871319532394,
"learning_rate": 0.0002,
"loss": 0.0149,
"step": 3550
},
{
"epoch": 0.006474507762361922,
"grad_norm": 0.03367459774017334,
"learning_rate": 0.0002,
"loss": 0.1715,
"step": 3560
},
{
"epoch": 0.006492694581919119,
"grad_norm": 0.045889757573604584,
"learning_rate": 0.0002,
"loss": 0.0949,
"step": 3570
},
{
"epoch": 0.006510881401476315,
"grad_norm": 0.04099202901124954,
"learning_rate": 0.0002,
"loss": 0.0813,
"step": 3580
},
{
"epoch": 0.006529068221033512,
"grad_norm": 0.133371040225029,
"learning_rate": 0.0002,
"loss": 0.0767,
"step": 3590
},
{
"epoch": 0.006547255040590708,
"grad_norm": 0.00645647756755352,
"learning_rate": 0.0002,
"loss": 0.0186,
"step": 3600
},
{
"epoch": 0.006565441860147904,
"grad_norm": 0.050674330443143845,
"learning_rate": 0.0002,
"loss": 0.2179,
"step": 3610
},
{
"epoch": 0.006583628679705101,
"grad_norm": 0.07087302207946777,
"learning_rate": 0.0002,
"loss": 0.0882,
"step": 3620
},
{
"epoch": 0.006601815499262297,
"grad_norm": 0.02759486250579357,
"learning_rate": 0.0002,
"loss": 0.0789,
"step": 3630
},
{
"epoch": 0.006620002318819493,
"grad_norm": 0.12163479626178741,
"learning_rate": 0.0002,
"loss": 0.0689,
"step": 3640
},
{
"epoch": 0.00663818913837669,
"grad_norm": 0.00969718024134636,
"learning_rate": 0.0002,
"loss": 0.0112,
"step": 3650
},
{
"epoch": 0.006656375957933887,
"grad_norm": 0.07106204330921173,
"learning_rate": 0.0002,
"loss": 0.199,
"step": 3660
},
{
"epoch": 0.006674562777491083,
"grad_norm": 0.08954132348299026,
"learning_rate": 0.0002,
"loss": 0.0985,
"step": 3670
},
{
"epoch": 0.006692749597048279,
"grad_norm": 0.09899396449327469,
"learning_rate": 0.0002,
"loss": 0.0811,
"step": 3680
},
{
"epoch": 0.006710936416605476,
"grad_norm": 0.12119311839342117,
"learning_rate": 0.0002,
"loss": 0.0698,
"step": 3690
},
{
"epoch": 0.006729123236162672,
"grad_norm": 0.013957214541733265,
"learning_rate": 0.0002,
"loss": 0.018,
"step": 3700
},
{
"epoch": 0.006747310055719868,
"grad_norm": 0.03089285083115101,
"learning_rate": 0.0002,
"loss": 0.1434,
"step": 3710
},
{
"epoch": 0.0067654968752770646,
"grad_norm": 0.025650829076766968,
"learning_rate": 0.0002,
"loss": 0.0886,
"step": 3720
},
{
"epoch": 0.006783683694834261,
"grad_norm": 0.044103365391492844,
"learning_rate": 0.0002,
"loss": 0.0788,
"step": 3730
},
{
"epoch": 0.006801870514391458,
"grad_norm": 0.09726370871067047,
"learning_rate": 0.0002,
"loss": 0.0674,
"step": 3740
},
{
"epoch": 0.006820057333948654,
"grad_norm": 0.018105274066329002,
"learning_rate": 0.0002,
"loss": 0.0232,
"step": 3750
},
{
"epoch": 0.006838244153505851,
"grad_norm": 0.021543240174651146,
"learning_rate": 0.0002,
"loss": 0.1406,
"step": 3760
},
{
"epoch": 0.006856430973063047,
"grad_norm": 0.09367050975561142,
"learning_rate": 0.0002,
"loss": 0.0973,
"step": 3770
},
{
"epoch": 0.006874617792620243,
"grad_norm": 0.06836032122373581,
"learning_rate": 0.0002,
"loss": 0.0848,
"step": 3780
},
{
"epoch": 0.00689280461217744,
"grad_norm": 0.11758081614971161,
"learning_rate": 0.0002,
"loss": 0.0693,
"step": 3790
},
{
"epoch": 0.006910991431734636,
"grad_norm": 0.008669364266097546,
"learning_rate": 0.0002,
"loss": 0.0223,
"step": 3800
},
{
"epoch": 0.006929178251291832,
"grad_norm": 0.03903719782829285,
"learning_rate": 0.0002,
"loss": 0.1519,
"step": 3810
},
{
"epoch": 0.0069473650708490285,
"grad_norm": 0.030682874843478203,
"learning_rate": 0.0002,
"loss": 0.0931,
"step": 3820
},
{
"epoch": 0.006965551890406226,
"grad_norm": 0.02693006955087185,
"learning_rate": 0.0002,
"loss": 0.0784,
"step": 3830
},
{
"epoch": 0.006983738709963422,
"grad_norm": 0.09535166621208191,
"learning_rate": 0.0002,
"loss": 0.0696,
"step": 3840
},
{
"epoch": 0.007001925529520618,
"grad_norm": 0.014680403284728527,
"learning_rate": 0.0002,
"loss": 0.0176,
"step": 3850
},
{
"epoch": 0.007020112349077815,
"grad_norm": 0.031090212985873222,
"learning_rate": 0.0002,
"loss": 0.1544,
"step": 3860
},
{
"epoch": 0.007038299168635011,
"grad_norm": 0.05870644003152847,
"learning_rate": 0.0002,
"loss": 0.0898,
"step": 3870
},
{
"epoch": 0.007056485988192207,
"grad_norm": 0.03480982780456543,
"learning_rate": 0.0002,
"loss": 0.0812,
"step": 3880
},
{
"epoch": 0.0070746728077494036,
"grad_norm": 0.09751418977975845,
"learning_rate": 0.0002,
"loss": 0.0724,
"step": 3890
},
{
"epoch": 0.0070928596273066,
"grad_norm": 0.022084850817918777,
"learning_rate": 0.0002,
"loss": 0.019,
"step": 3900
},
{
"epoch": 0.007111046446863797,
"grad_norm": 0.06994971632957458,
"learning_rate": 0.0002,
"loss": 0.1478,
"step": 3910
},
{
"epoch": 0.007129233266420993,
"grad_norm": 0.05761263892054558,
"learning_rate": 0.0002,
"loss": 0.0932,
"step": 3920
},
{
"epoch": 0.00714742008597819,
"grad_norm": 0.029772033914923668,
"learning_rate": 0.0002,
"loss": 0.0855,
"step": 3930
},
{
"epoch": 0.007165606905535386,
"grad_norm": 0.11868726462125778,
"learning_rate": 0.0002,
"loss": 0.0727,
"step": 3940
},
{
"epoch": 0.007183793725092582,
"grad_norm": 0.0065403408370912075,
"learning_rate": 0.0002,
"loss": 0.0174,
"step": 3950
},
{
"epoch": 0.007201980544649779,
"grad_norm": 0.031544361263513565,
"learning_rate": 0.0002,
"loss": 0.1827,
"step": 3960
},
{
"epoch": 0.007220167364206975,
"grad_norm": 0.031641531735658646,
"learning_rate": 0.0002,
"loss": 0.0867,
"step": 3970
},
{
"epoch": 0.007238354183764171,
"grad_norm": 0.028574040159583092,
"learning_rate": 0.0002,
"loss": 0.0798,
"step": 3980
},
{
"epoch": 0.0072565410033213675,
"grad_norm": 0.12866555154323578,
"learning_rate": 0.0002,
"loss": 0.0708,
"step": 3990
},
{
"epoch": 0.007274727822878565,
"grad_norm": 0.00843430683016777,
"learning_rate": 0.0002,
"loss": 0.0127,
"step": 4000
},
{
"epoch": 0.007292914642435761,
"grad_norm": 0.03737691789865494,
"learning_rate": 0.0002,
"loss": 0.2201,
"step": 4010
},
{
"epoch": 0.007311101461992957,
"grad_norm": 0.05326579511165619,
"learning_rate": 0.0002,
"loss": 0.0838,
"step": 4020
},
{
"epoch": 0.007329288281550154,
"grad_norm": 0.031934209167957306,
"learning_rate": 0.0002,
"loss": 0.0778,
"step": 4030
},
{
"epoch": 0.00734747510110735,
"grad_norm": 0.17401957511901855,
"learning_rate": 0.0002,
"loss": 0.073,
"step": 4040
},
{
"epoch": 0.007365661920664546,
"grad_norm": 0.005256639327853918,
"learning_rate": 0.0002,
"loss": 0.0122,
"step": 4050
},
{
"epoch": 0.0073838487402217426,
"grad_norm": 0.05043623968958855,
"learning_rate": 0.0002,
"loss": 0.2524,
"step": 4060
},
{
"epoch": 0.007402035559778939,
"grad_norm": 0.06662425398826599,
"learning_rate": 0.0002,
"loss": 0.0976,
"step": 4070
},
{
"epoch": 0.007420222379336136,
"grad_norm": 0.13419686257839203,
"learning_rate": 0.0002,
"loss": 0.0833,
"step": 4080
},
{
"epoch": 0.007438409198893332,
"grad_norm": 0.176285520195961,
"learning_rate": 0.0002,
"loss": 0.0761,
"step": 4090
},
{
"epoch": 0.007456596018450529,
"grad_norm": 0.008489354513585567,
"learning_rate": 0.0002,
"loss": 0.0182,
"step": 4100
},
{
"epoch": 0.007474782838007725,
"grad_norm": 0.06247509643435478,
"learning_rate": 0.0002,
"loss": 0.2232,
"step": 4110
},
{
"epoch": 0.007492969657564921,
"grad_norm": 0.05744702368974686,
"learning_rate": 0.0002,
"loss": 0.0875,
"step": 4120
},
{
"epoch": 0.007511156477122118,
"grad_norm": 0.053026407957077026,
"learning_rate": 0.0002,
"loss": 0.0807,
"step": 4130
},
{
"epoch": 0.007529343296679314,
"grad_norm": 0.11734003573656082,
"learning_rate": 0.0002,
"loss": 0.0724,
"step": 4140
},
{
"epoch": 0.00754753011623651,
"grad_norm": 0.005216363817453384,
"learning_rate": 0.0002,
"loss": 0.0129,
"step": 4150
},
{
"epoch": 0.0075657169357937065,
"grad_norm": 0.08154789358377457,
"learning_rate": 0.0002,
"loss": 0.2221,
"step": 4160
},
{
"epoch": 0.007583903755350904,
"grad_norm": 0.03619784861803055,
"learning_rate": 0.0002,
"loss": 0.0993,
"step": 4170
},
{
"epoch": 0.0076020905749081,
"grad_norm": 0.08239256590604782,
"learning_rate": 0.0002,
"loss": 0.0811,
"step": 4180
},
{
"epoch": 0.007620277394465296,
"grad_norm": 0.11934535950422287,
"learning_rate": 0.0002,
"loss": 0.0726,
"step": 4190
},
{
"epoch": 0.007638464214022493,
"grad_norm": 0.006965799257159233,
"learning_rate": 0.0002,
"loss": 0.0181,
"step": 4200
},
{
"epoch": 0.007656651033579689,
"grad_norm": 0.04328077286481857,
"learning_rate": 0.0002,
"loss": 0.1983,
"step": 4210
},
{
"epoch": 0.007674837853136885,
"grad_norm": 0.08253510296344757,
"learning_rate": 0.0002,
"loss": 0.0954,
"step": 4220
},
{
"epoch": 0.0076930246726940816,
"grad_norm": 0.06146657094359398,
"learning_rate": 0.0002,
"loss": 0.0843,
"step": 4230
},
{
"epoch": 0.007711211492251278,
"grad_norm": 0.13579218089580536,
"learning_rate": 0.0002,
"loss": 0.0672,
"step": 4240
},
{
"epoch": 0.007729398311808474,
"grad_norm": 0.0038396338932216167,
"learning_rate": 0.0002,
"loss": 0.0131,
"step": 4250
},
{
"epoch": 0.007747585131365671,
"grad_norm": 0.03109130822122097,
"learning_rate": 0.0002,
"loss": 0.2102,
"step": 4260
},
{
"epoch": 0.007765771950922868,
"grad_norm": 0.04971664398908615,
"learning_rate": 0.0002,
"loss": 0.0903,
"step": 4270
},
{
"epoch": 0.007783958770480064,
"grad_norm": 0.06476306915283203,
"learning_rate": 0.0002,
"loss": 0.0859,
"step": 4280
},
{
"epoch": 0.00780214559003726,
"grad_norm": 0.15377041697502136,
"learning_rate": 0.0002,
"loss": 0.0828,
"step": 4290
},
{
"epoch": 0.007820332409594457,
"grad_norm": 0.005592274013906717,
"learning_rate": 0.0002,
"loss": 0.014,
"step": 4300
},
{
"epoch": 0.007838519229151653,
"grad_norm": 0.04387212172150612,
"learning_rate": 0.0002,
"loss": 0.1907,
"step": 4310
},
{
"epoch": 0.00785670604870885,
"grad_norm": 0.06001356989145279,
"learning_rate": 0.0002,
"loss": 0.0864,
"step": 4320
},
{
"epoch": 0.007874892868266046,
"grad_norm": 0.030866140499711037,
"learning_rate": 0.0002,
"loss": 0.0748,
"step": 4330
},
{
"epoch": 0.007893079687823242,
"grad_norm": 0.13280808925628662,
"learning_rate": 0.0002,
"loss": 0.0686,
"step": 4340
},
{
"epoch": 0.007911266507380438,
"grad_norm": 0.015559020452201366,
"learning_rate": 0.0002,
"loss": 0.016,
"step": 4350
},
{
"epoch": 0.007929453326937634,
"grad_norm": 0.0669974684715271,
"learning_rate": 0.0002,
"loss": 0.1916,
"step": 4360
},
{
"epoch": 0.00794764014649483,
"grad_norm": 0.0759076252579689,
"learning_rate": 0.0002,
"loss": 0.0925,
"step": 4370
},
{
"epoch": 0.007965826966052029,
"grad_norm": 0.029388410970568657,
"learning_rate": 0.0002,
"loss": 0.086,
"step": 4380
},
{
"epoch": 0.007984013785609225,
"grad_norm": 0.17637981474399567,
"learning_rate": 0.0002,
"loss": 0.0697,
"step": 4390
},
{
"epoch": 0.008002200605166421,
"grad_norm": 0.008022189140319824,
"learning_rate": 0.0002,
"loss": 0.0196,
"step": 4400
},
{
"epoch": 0.008020387424723618,
"grad_norm": 0.04126167669892311,
"learning_rate": 0.0002,
"loss": 0.192,
"step": 4410
},
{
"epoch": 0.008038574244280814,
"grad_norm": 0.08132971078157425,
"learning_rate": 0.0002,
"loss": 0.093,
"step": 4420
},
{
"epoch": 0.00805676106383801,
"grad_norm": 0.07568484544754028,
"learning_rate": 0.0002,
"loss": 0.0823,
"step": 4430
},
{
"epoch": 0.008074947883395207,
"grad_norm": 0.1259222775697708,
"learning_rate": 0.0002,
"loss": 0.0696,
"step": 4440
},
{
"epoch": 0.008093134702952403,
"grad_norm": 0.009711826220154762,
"learning_rate": 0.0002,
"loss": 0.0232,
"step": 4450
},
{
"epoch": 0.0081113215225096,
"grad_norm": 0.029734279960393906,
"learning_rate": 0.0002,
"loss": 0.1595,
"step": 4460
},
{
"epoch": 0.008129508342066796,
"grad_norm": 0.04886960610747337,
"learning_rate": 0.0002,
"loss": 0.0919,
"step": 4470
},
{
"epoch": 0.008147695161623992,
"grad_norm": 0.07031470537185669,
"learning_rate": 0.0002,
"loss": 0.0813,
"step": 4480
},
{
"epoch": 0.008165881981181188,
"grad_norm": 0.12099859863519669,
"learning_rate": 0.0002,
"loss": 0.0731,
"step": 4490
},
{
"epoch": 0.008184068800738385,
"grad_norm": 0.02181529812514782,
"learning_rate": 0.0002,
"loss": 0.021,
"step": 4500
},
{
"epoch": 0.00820225562029558,
"grad_norm": 0.035477787256240845,
"learning_rate": 0.0002,
"loss": 0.1429,
"step": 4510
},
{
"epoch": 0.008220442439852777,
"grad_norm": 0.07788772135972977,
"learning_rate": 0.0002,
"loss": 0.0842,
"step": 4520
},
{
"epoch": 0.008238629259409973,
"grad_norm": 0.045833125710487366,
"learning_rate": 0.0002,
"loss": 0.0829,
"step": 4530
},
{
"epoch": 0.00825681607896717,
"grad_norm": 0.12271951884031296,
"learning_rate": 0.0002,
"loss": 0.0707,
"step": 4540
},
{
"epoch": 0.008275002898524366,
"grad_norm": 0.01919553242623806,
"learning_rate": 0.0002,
"loss": 0.0213,
"step": 4550
},
{
"epoch": 0.008293189718081564,
"grad_norm": 0.032527096569538116,
"learning_rate": 0.0002,
"loss": 0.1397,
"step": 4560
},
{
"epoch": 0.00831137653763876,
"grad_norm": 0.045243579894304276,
"learning_rate": 0.0002,
"loss": 0.0854,
"step": 4570
},
{
"epoch": 0.008329563357195957,
"grad_norm": 0.04226524010300636,
"learning_rate": 0.0002,
"loss": 0.0728,
"step": 4580
},
{
"epoch": 0.008347750176753153,
"grad_norm": 0.09887039661407471,
"learning_rate": 0.0002,
"loss": 0.0661,
"step": 4590
},
{
"epoch": 0.00836593699631035,
"grad_norm": 0.01822318509221077,
"learning_rate": 0.0002,
"loss": 0.0169,
"step": 4600
},
{
"epoch": 0.008384123815867546,
"grad_norm": 0.05729951336979866,
"learning_rate": 0.0002,
"loss": 0.137,
"step": 4610
},
{
"epoch": 0.008402310635424742,
"grad_norm": 0.041520439088344574,
"learning_rate": 0.0002,
"loss": 0.0825,
"step": 4620
},
{
"epoch": 0.008420497454981938,
"grad_norm": 0.051164623349905014,
"learning_rate": 0.0002,
"loss": 0.0818,
"step": 4630
},
{
"epoch": 0.008438684274539135,
"grad_norm": 0.1289409101009369,
"learning_rate": 0.0002,
"loss": 0.0664,
"step": 4640
},
{
"epoch": 0.008456871094096331,
"grad_norm": 0.0085114361718297,
"learning_rate": 0.0002,
"loss": 0.0229,
"step": 4650
},
{
"epoch": 0.008475057913653527,
"grad_norm": 0.03594676032662392,
"learning_rate": 0.0002,
"loss": 0.1401,
"step": 4660
},
{
"epoch": 0.008493244733210724,
"grad_norm": 0.0316978394985199,
"learning_rate": 0.0002,
"loss": 0.0877,
"step": 4670
},
{
"epoch": 0.00851143155276792,
"grad_norm": 0.023302162066102028,
"learning_rate": 0.0002,
"loss": 0.0764,
"step": 4680
},
{
"epoch": 0.008529618372325116,
"grad_norm": 0.1329929083585739,
"learning_rate": 0.0002,
"loss": 0.0788,
"step": 4690
},
{
"epoch": 0.008547805191882312,
"grad_norm": 0.01048013661056757,
"learning_rate": 0.0002,
"loss": 0.0234,
"step": 4700
},
{
"epoch": 0.008565992011439509,
"grad_norm": 0.03505022078752518,
"learning_rate": 0.0002,
"loss": 0.1509,
"step": 4710
},
{
"epoch": 0.008584178830996705,
"grad_norm": 0.03877585008740425,
"learning_rate": 0.0002,
"loss": 0.0802,
"step": 4720
},
{
"epoch": 0.008602365650553903,
"grad_norm": 0.041193027049303055,
"learning_rate": 0.0002,
"loss": 0.0695,
"step": 4730
},
{
"epoch": 0.0086205524701111,
"grad_norm": 0.17310455441474915,
"learning_rate": 0.0002,
"loss": 0.0794,
"step": 4740
},
{
"epoch": 0.008638739289668296,
"grad_norm": 0.0061012376099824905,
"learning_rate": 0.0002,
"loss": 0.0158,
"step": 4750
},
{
"epoch": 0.008656926109225492,
"grad_norm": 0.04843207076191902,
"learning_rate": 0.0002,
"loss": 0.2103,
"step": 4760
},
{
"epoch": 0.008675112928782688,
"grad_norm": 0.04483436048030853,
"learning_rate": 0.0002,
"loss": 0.0878,
"step": 4770
},
{
"epoch": 0.008693299748339885,
"grad_norm": 0.056655965745449066,
"learning_rate": 0.0002,
"loss": 0.0752,
"step": 4780
},
{
"epoch": 0.008711486567897081,
"grad_norm": 0.11626063287258148,
"learning_rate": 0.0002,
"loss": 0.0685,
"step": 4790
},
{
"epoch": 0.008729673387454277,
"grad_norm": 0.013872025534510612,
"learning_rate": 0.0002,
"loss": 0.0198,
"step": 4800
},
{
"epoch": 0.008747860207011474,
"grad_norm": 0.06217370182275772,
"learning_rate": 0.0002,
"loss": 0.1371,
"step": 4810
},
{
"epoch": 0.00876604702656867,
"grad_norm": 0.027149083092808723,
"learning_rate": 0.0002,
"loss": 0.0849,
"step": 4820
},
{
"epoch": 0.008784233846125866,
"grad_norm": 0.043290987610816956,
"learning_rate": 0.0002,
"loss": 0.0739,
"step": 4830
},
{
"epoch": 0.008802420665683063,
"grad_norm": 0.10664638131856918,
"learning_rate": 0.0002,
"loss": 0.0722,
"step": 4840
},
{
"epoch": 0.008820607485240259,
"grad_norm": 0.033459801226854324,
"learning_rate": 0.0002,
"loss": 0.0234,
"step": 4850
},
{
"epoch": 0.008838794304797455,
"grad_norm": 0.049193184822797775,
"learning_rate": 0.0002,
"loss": 0.1173,
"step": 4860
},
{
"epoch": 0.008856981124354651,
"grad_norm": 0.05060647428035736,
"learning_rate": 0.0002,
"loss": 0.0883,
"step": 4870
},
{
"epoch": 0.008875167943911848,
"grad_norm": 0.028496885672211647,
"learning_rate": 0.0002,
"loss": 0.0747,
"step": 4880
},
{
"epoch": 0.008893354763469044,
"grad_norm": 0.10652820765972137,
"learning_rate": 0.0002,
"loss": 0.0707,
"step": 4890
},
{
"epoch": 0.008911541583026242,
"grad_norm": 0.007879966869950294,
"learning_rate": 0.0002,
"loss": 0.0178,
"step": 4900
},
{
"epoch": 0.008929728402583438,
"grad_norm": 0.05227983742952347,
"learning_rate": 0.0002,
"loss": 0.1379,
"step": 4910
},
{
"epoch": 0.008947915222140635,
"grad_norm": 0.06054231896996498,
"learning_rate": 0.0002,
"loss": 0.0934,
"step": 4920
},
{
"epoch": 0.008966102041697831,
"grad_norm": 0.029085835441946983,
"learning_rate": 0.0002,
"loss": 0.0816,
"step": 4930
},
{
"epoch": 0.008984288861255027,
"grad_norm": 0.09829402714967728,
"learning_rate": 0.0002,
"loss": 0.0672,
"step": 4940
},
{
"epoch": 0.009002475680812224,
"grad_norm": 0.005579107441008091,
"learning_rate": 0.0002,
"loss": 0.0172,
"step": 4950
},
{
"epoch": 0.00902066250036942,
"grad_norm": 0.027280857786536217,
"learning_rate": 0.0002,
"loss": 0.1659,
"step": 4960
},
{
"epoch": 0.009038849319926616,
"grad_norm": 0.10321583598852158,
"learning_rate": 0.0002,
"loss": 0.0947,
"step": 4970
},
{
"epoch": 0.009057036139483813,
"grad_norm": 0.03381946310400963,
"learning_rate": 0.0002,
"loss": 0.0837,
"step": 4980
},
{
"epoch": 0.009075222959041009,
"grad_norm": 0.14493779838085175,
"learning_rate": 0.0002,
"loss": 0.0736,
"step": 4990
},
{
"epoch": 0.009093409778598205,
"grad_norm": 0.009917684830725193,
"learning_rate": 0.0002,
"loss": 0.0188,
"step": 5000
},
{
"epoch": 0.009111596598155402,
"grad_norm": 1.003450632095337,
"learning_rate": 0.0002,
"loss": 0.218,
"step": 5010
},
{
"epoch": 0.009129783417712598,
"grad_norm": 0.09081514924764633,
"learning_rate": 0.0002,
"loss": 0.1714,
"step": 5020
},
{
"epoch": 0.009147970237269794,
"grad_norm": 0.042343392968177795,
"learning_rate": 0.0002,
"loss": 0.0823,
"step": 5030
},
{
"epoch": 0.00916615705682699,
"grad_norm": 0.09944835305213928,
"learning_rate": 0.0002,
"loss": 0.0667,
"step": 5040
},
{
"epoch": 0.009184343876384187,
"grad_norm": 0.008264658972620964,
"learning_rate": 0.0002,
"loss": 0.0122,
"step": 5050
},
{
"epoch": 0.009202530695941383,
"grad_norm": 0.08990125358104706,
"learning_rate": 0.0002,
"loss": 0.1685,
"step": 5060
},
{
"epoch": 0.009220717515498581,
"grad_norm": 0.0331488698720932,
"learning_rate": 0.0002,
"loss": 0.0885,
"step": 5070
},
{
"epoch": 0.009238904335055777,
"grad_norm": 0.029458707198500633,
"learning_rate": 0.0002,
"loss": 0.078,
"step": 5080
},
{
"epoch": 0.009257091154612974,
"grad_norm": 0.10468839108943939,
"learning_rate": 0.0002,
"loss": 0.0683,
"step": 5090
},
{
"epoch": 0.00927527797417017,
"grad_norm": 0.002719841431826353,
"learning_rate": 0.0002,
"loss": 0.0117,
"step": 5100
},
{
"epoch": 0.009293464793727366,
"grad_norm": 0.0411439947783947,
"learning_rate": 0.0002,
"loss": 0.2025,
"step": 5110
},
{
"epoch": 0.009311651613284563,
"grad_norm": 0.03695548698306084,
"learning_rate": 0.0002,
"loss": 0.0831,
"step": 5120
},
{
"epoch": 0.009329838432841759,
"grad_norm": 0.06067590415477753,
"learning_rate": 0.0002,
"loss": 0.0779,
"step": 5130
},
{
"epoch": 0.009348025252398955,
"grad_norm": 0.11754634976387024,
"learning_rate": 0.0002,
"loss": 0.0667,
"step": 5140
},
{
"epoch": 0.009366212071956152,
"grad_norm": 0.004248317331075668,
"learning_rate": 0.0002,
"loss": 0.0113,
"step": 5150
},
{
"epoch": 0.009384398891513348,
"grad_norm": 0.03073648177087307,
"learning_rate": 0.0002,
"loss": 0.2289,
"step": 5160
},
{
"epoch": 0.009402585711070544,
"grad_norm": 0.10287592560052872,
"learning_rate": 0.0002,
"loss": 0.0977,
"step": 5170
},
{
"epoch": 0.00942077253062774,
"grad_norm": 0.06832946836948395,
"learning_rate": 0.0002,
"loss": 0.0764,
"step": 5180
},
{
"epoch": 0.009438959350184937,
"grad_norm": 0.1760883778333664,
"learning_rate": 0.0002,
"loss": 0.0795,
"step": 5190
},
{
"epoch": 0.009457146169742133,
"grad_norm": 0.02968805655837059,
"learning_rate": 0.0002,
"loss": 0.0253,
"step": 5200
},
{
"epoch": 0.00947533298929933,
"grad_norm": 0.046602651476860046,
"learning_rate": 0.0002,
"loss": 0.1432,
"step": 5210
},
{
"epoch": 0.009493519808856526,
"grad_norm": 0.051989324390888214,
"learning_rate": 0.0002,
"loss": 0.0807,
"step": 5220
},
{
"epoch": 0.009511706628413722,
"grad_norm": 0.04583961144089699,
"learning_rate": 0.0002,
"loss": 0.0782,
"step": 5230
},
{
"epoch": 0.00952989344797092,
"grad_norm": 0.13195525109767914,
"learning_rate": 0.0002,
"loss": 0.0688,
"step": 5240
},
{
"epoch": 0.009548080267528116,
"grad_norm": 0.011369351297616959,
"learning_rate": 0.0002,
"loss": 0.0232,
"step": 5250
},
{
"epoch": 0.009566267087085313,
"grad_norm": 0.05092083290219307,
"learning_rate": 0.0002,
"loss": 0.145,
"step": 5260
},
{
"epoch": 0.009584453906642509,
"grad_norm": 0.05051489174365997,
"learning_rate": 0.0002,
"loss": 0.0803,
"step": 5270
},
{
"epoch": 0.009602640726199705,
"grad_norm": 0.05730990320444107,
"learning_rate": 0.0002,
"loss": 0.0717,
"step": 5280
},
{
"epoch": 0.009620827545756902,
"grad_norm": 0.11170202493667603,
"learning_rate": 0.0002,
"loss": 0.0711,
"step": 5290
},
{
"epoch": 0.009639014365314098,
"grad_norm": 0.011571788229048252,
"learning_rate": 0.0002,
"loss": 0.0204,
"step": 5300
},
{
"epoch": 0.009657201184871294,
"grad_norm": 0.04396244138479233,
"learning_rate": 0.0002,
"loss": 0.1764,
"step": 5310
},
{
"epoch": 0.00967538800442849,
"grad_norm": 0.047808658331632614,
"learning_rate": 0.0002,
"loss": 0.0855,
"step": 5320
},
{
"epoch": 0.009693574823985687,
"grad_norm": 0.09201673418283463,
"learning_rate": 0.0002,
"loss": 0.0737,
"step": 5330
},
{
"epoch": 0.009711761643542883,
"grad_norm": 0.12273146212100983,
"learning_rate": 0.0002,
"loss": 0.0658,
"step": 5340
},
{
"epoch": 0.00972994846310008,
"grad_norm": 0.014599839225411415,
"learning_rate": 0.0002,
"loss": 0.0254,
"step": 5350
},
{
"epoch": 0.009748135282657276,
"grad_norm": 0.049732692539691925,
"learning_rate": 0.0002,
"loss": 0.1432,
"step": 5360
},
{
"epoch": 0.009766322102214472,
"grad_norm": 0.07791377604007721,
"learning_rate": 0.0002,
"loss": 0.0865,
"step": 5370
},
{
"epoch": 0.009784508921771668,
"grad_norm": 0.06298892199993134,
"learning_rate": 0.0002,
"loss": 0.0816,
"step": 5380
},
{
"epoch": 0.009802695741328865,
"grad_norm": 0.08924435079097748,
"learning_rate": 0.0002,
"loss": 0.0709,
"step": 5390
},
{
"epoch": 0.009820882560886061,
"grad_norm": 0.02383723482489586,
"learning_rate": 0.0002,
"loss": 0.0208,
"step": 5400
},
{
"epoch": 0.009839069380443257,
"grad_norm": 0.042910825461149216,
"learning_rate": 0.0002,
"loss": 0.1383,
"step": 5410
},
{
"epoch": 0.009857256200000455,
"grad_norm": 0.05560186505317688,
"learning_rate": 0.0002,
"loss": 0.0827,
"step": 5420
},
{
"epoch": 0.009875443019557652,
"grad_norm": 0.08179624378681183,
"learning_rate": 0.0002,
"loss": 0.0758,
"step": 5430
},
{
"epoch": 0.009893629839114848,
"grad_norm": 0.17111806571483612,
"learning_rate": 0.0002,
"loss": 0.0688,
"step": 5440
},
{
"epoch": 0.009911816658672044,
"grad_norm": 0.008684845641255379,
"learning_rate": 0.0002,
"loss": 0.0177,
"step": 5450
},
{
"epoch": 0.00993000347822924,
"grad_norm": 0.044370412826538086,
"learning_rate": 0.0002,
"loss": 0.2036,
"step": 5460
},
{
"epoch": 0.009948190297786437,
"grad_norm": 0.08403154462575912,
"learning_rate": 0.0002,
"loss": 0.0878,
"step": 5470
},
{
"epoch": 0.009966377117343633,
"grad_norm": 0.10712645202875137,
"learning_rate": 0.0002,
"loss": 0.0787,
"step": 5480
},
{
"epoch": 0.00998456393690083,
"grad_norm": 0.12575705349445343,
"learning_rate": 0.0002,
"loss": 0.0637,
"step": 5490
},
{
"epoch": 0.010002750756458026,
"grad_norm": 0.018583891913294792,
"learning_rate": 0.0002,
"loss": 0.0179,
"step": 5500
},
{
"epoch": 0.010020937576015222,
"grad_norm": 0.040852561593055725,
"learning_rate": 0.0002,
"loss": 0.1545,
"step": 5510
},
{
"epoch": 0.010039124395572419,
"grad_norm": 0.09006325900554657,
"learning_rate": 0.0002,
"loss": 0.0888,
"step": 5520
},
{
"epoch": 0.010057311215129615,
"grad_norm": 0.06323093175888062,
"learning_rate": 0.0002,
"loss": 0.0778,
"step": 5530
},
{
"epoch": 0.010075498034686811,
"grad_norm": 0.10159824043512344,
"learning_rate": 0.0002,
"loss": 0.0662,
"step": 5540
},
{
"epoch": 0.010093684854244007,
"grad_norm": 0.012086872011423111,
"learning_rate": 0.0002,
"loss": 0.0237,
"step": 5550
},
{
"epoch": 0.010111871673801204,
"grad_norm": 0.02518664114177227,
"learning_rate": 0.0002,
"loss": 0.1246,
"step": 5560
},
{
"epoch": 0.0101300584933584,
"grad_norm": 0.056161828339099884,
"learning_rate": 0.0002,
"loss": 0.086,
"step": 5570
},
{
"epoch": 0.010148245312915596,
"grad_norm": 0.03376586362719536,
"learning_rate": 0.0002,
"loss": 0.0842,
"step": 5580
},
{
"epoch": 0.010166432132472794,
"grad_norm": 0.09921032190322876,
"learning_rate": 0.0002,
"loss": 0.0667,
"step": 5590
},
{
"epoch": 0.01018461895202999,
"grad_norm": 0.009120604954659939,
"learning_rate": 0.0002,
"loss": 0.0209,
"step": 5600
},
{
"epoch": 0.010202805771587187,
"grad_norm": 0.037767425179481506,
"learning_rate": 0.0002,
"loss": 0.1248,
"step": 5610
},
{
"epoch": 0.010220992591144383,
"grad_norm": 0.05255524069070816,
"learning_rate": 0.0002,
"loss": 0.0794,
"step": 5620
},
{
"epoch": 0.01023917941070158,
"grad_norm": 0.038734354078769684,
"learning_rate": 0.0002,
"loss": 0.0791,
"step": 5630
},
{
"epoch": 0.010257366230258776,
"grad_norm": 0.09293238073587418,
"learning_rate": 0.0002,
"loss": 0.064,
"step": 5640
},
{
"epoch": 0.010275553049815972,
"grad_norm": 0.013020232319831848,
"learning_rate": 0.0002,
"loss": 0.0174,
"step": 5650
},
{
"epoch": 0.010293739869373169,
"grad_norm": 0.030535893514752388,
"learning_rate": 0.0002,
"loss": 0.1615,
"step": 5660
},
{
"epoch": 0.010311926688930365,
"grad_norm": 0.08644227683544159,
"learning_rate": 0.0002,
"loss": 0.0856,
"step": 5670
},
{
"epoch": 0.010330113508487561,
"grad_norm": 0.04769067466259003,
"learning_rate": 0.0002,
"loss": 0.0777,
"step": 5680
},
{
"epoch": 0.010348300328044758,
"grad_norm": 0.1528550088405609,
"learning_rate": 0.0002,
"loss": 0.0757,
"step": 5690
},
{
"epoch": 0.010366487147601954,
"grad_norm": 0.012257793918251991,
"learning_rate": 0.0002,
"loss": 0.0163,
"step": 5700
},
{
"epoch": 0.01038467396715915,
"grad_norm": 0.5761304497718811,
"learning_rate": 0.0002,
"loss": 0.1787,
"step": 5710
},
{
"epoch": 0.010402860786716346,
"grad_norm": 0.07034485787153244,
"learning_rate": 0.0002,
"loss": 0.0964,
"step": 5720
},
{
"epoch": 0.010421047606273543,
"grad_norm": 0.04541708156466484,
"learning_rate": 0.0002,
"loss": 0.0793,
"step": 5730
},
{
"epoch": 0.010439234425830739,
"grad_norm": 0.12013612687587738,
"learning_rate": 0.0002,
"loss": 0.07,
"step": 5740
},
{
"epoch": 0.010457421245387935,
"grad_norm": 0.014152747578918934,
"learning_rate": 0.0002,
"loss": 0.0208,
"step": 5750
},
{
"epoch": 0.010475608064945133,
"grad_norm": 0.029470542445778847,
"learning_rate": 0.0002,
"loss": 0.1352,
"step": 5760
},
{
"epoch": 0.01049379488450233,
"grad_norm": 0.04889104515314102,
"learning_rate": 0.0002,
"loss": 0.0748,
"step": 5770
},
{
"epoch": 0.010511981704059526,
"grad_norm": 0.0311355609446764,
"learning_rate": 0.0002,
"loss": 0.0764,
"step": 5780
},
{
"epoch": 0.010530168523616722,
"grad_norm": 0.16830098628997803,
"learning_rate": 0.0002,
"loss": 0.0734,
"step": 5790
},
{
"epoch": 0.010548355343173919,
"grad_norm": 0.013224232010543346,
"learning_rate": 0.0002,
"loss": 0.0218,
"step": 5800
},
{
"epoch": 0.010566542162731115,
"grad_norm": 0.03710555657744408,
"learning_rate": 0.0002,
"loss": 0.1403,
"step": 5810
},
{
"epoch": 0.010584728982288311,
"grad_norm": 0.05788695067167282,
"learning_rate": 0.0002,
"loss": 0.0863,
"step": 5820
},
{
"epoch": 0.010602915801845508,
"grad_norm": 0.03398163616657257,
"learning_rate": 0.0002,
"loss": 0.0751,
"step": 5830
},
{
"epoch": 0.010621102621402704,
"grad_norm": 0.13862720131874084,
"learning_rate": 0.0002,
"loss": 0.07,
"step": 5840
},
{
"epoch": 0.0106392894409599,
"grad_norm": 0.016240287572145462,
"learning_rate": 0.0002,
"loss": 0.0209,
"step": 5850
},
{
"epoch": 0.010657476260517097,
"grad_norm": 0.030351752415299416,
"learning_rate": 0.0002,
"loss": 0.157,
"step": 5860
},
{
"epoch": 0.010675663080074293,
"grad_norm": 0.038465555757284164,
"learning_rate": 0.0002,
"loss": 0.072,
"step": 5870
},
{
"epoch": 0.01069384989963149,
"grad_norm": 0.07298482209444046,
"learning_rate": 0.0002,
"loss": 0.0796,
"step": 5880
},
{
"epoch": 0.010712036719188685,
"grad_norm": 0.13822157680988312,
"learning_rate": 0.0002,
"loss": 0.0687,
"step": 5890
},
{
"epoch": 0.010730223538745882,
"grad_norm": 0.014381729066371918,
"learning_rate": 0.0002,
"loss": 0.0192,
"step": 5900
},
{
"epoch": 0.010748410358303078,
"grad_norm": 0.040448348969221115,
"learning_rate": 0.0002,
"loss": 0.1714,
"step": 5910
},
{
"epoch": 0.010766597177860274,
"grad_norm": 0.06950225681066513,
"learning_rate": 0.0002,
"loss": 0.098,
"step": 5920
},
{
"epoch": 0.010784783997417472,
"grad_norm": 0.04581855982542038,
"learning_rate": 0.0002,
"loss": 0.0752,
"step": 5930
},
{
"epoch": 0.010802970816974669,
"grad_norm": 0.10498905926942825,
"learning_rate": 0.0002,
"loss": 0.0627,
"step": 5940
},
{
"epoch": 0.010821157636531865,
"grad_norm": 0.009345698170363903,
"learning_rate": 0.0002,
"loss": 0.0183,
"step": 5950
},
{
"epoch": 0.010839344456089061,
"grad_norm": 0.02440352365374565,
"learning_rate": 0.0002,
"loss": 0.1289,
"step": 5960
},
{
"epoch": 0.010857531275646258,
"grad_norm": 0.051523737609386444,
"learning_rate": 0.0002,
"loss": 0.0813,
"step": 5970
},
{
"epoch": 0.010875718095203454,
"grad_norm": 0.031664300709962845,
"learning_rate": 0.0002,
"loss": 0.0736,
"step": 5980
},
{
"epoch": 0.01089390491476065,
"grad_norm": 0.10166060924530029,
"learning_rate": 0.0002,
"loss": 0.0631,
"step": 5990
},
{
"epoch": 0.010912091734317847,
"grad_norm": 0.01642071269452572,
"learning_rate": 0.0002,
"loss": 0.0198,
"step": 6000
},
{
"epoch": 0.010930278553875043,
"grad_norm": 0.04028782621026039,
"learning_rate": 0.0002,
"loss": 0.1355,
"step": 6010
},
{
"epoch": 0.01094846537343224,
"grad_norm": 0.04289260134100914,
"learning_rate": 0.0002,
"loss": 0.0794,
"step": 6020
},
{
"epoch": 0.010966652192989436,
"grad_norm": 0.03854202851653099,
"learning_rate": 0.0002,
"loss": 0.0765,
"step": 6030
},
{
"epoch": 0.010984839012546632,
"grad_norm": 0.07910823822021484,
"learning_rate": 0.0002,
"loss": 0.0618,
"step": 6040
},
{
"epoch": 0.011003025832103828,
"grad_norm": 0.009719946421682835,
"learning_rate": 0.0002,
"loss": 0.0145,
"step": 6050
},
{
"epoch": 0.011021212651661024,
"grad_norm": 0.06853003799915314,
"learning_rate": 0.0002,
"loss": 0.1563,
"step": 6060
},
{
"epoch": 0.01103939947121822,
"grad_norm": 0.02887076325714588,
"learning_rate": 0.0002,
"loss": 0.0803,
"step": 6070
},
{
"epoch": 0.011057586290775417,
"grad_norm": 0.060147739946842194,
"learning_rate": 0.0002,
"loss": 0.0758,
"step": 6080
},
{
"epoch": 0.011075773110332613,
"grad_norm": 0.10197418928146362,
"learning_rate": 0.0002,
"loss": 0.0627,
"step": 6090
},
{
"epoch": 0.011093959929889811,
"grad_norm": 0.015125100500881672,
"learning_rate": 0.0002,
"loss": 0.0164,
"step": 6100
},
{
"epoch": 0.011112146749447008,
"grad_norm": 0.029526161029934883,
"learning_rate": 0.0002,
"loss": 0.1526,
"step": 6110
},
{
"epoch": 0.011130333569004204,
"grad_norm": 0.05942453444004059,
"learning_rate": 0.0002,
"loss": 0.0891,
"step": 6120
},
{
"epoch": 0.0111485203885614,
"grad_norm": 0.07344426214694977,
"learning_rate": 0.0002,
"loss": 0.0753,
"step": 6130
},
{
"epoch": 0.011166707208118597,
"grad_norm": 0.1394059658050537,
"learning_rate": 0.0002,
"loss": 0.0776,
"step": 6140
},
{
"epoch": 0.011184894027675793,
"grad_norm": 0.00965851079672575,
"learning_rate": 0.0002,
"loss": 0.019,
"step": 6150
},
{
"epoch": 0.01120308084723299,
"grad_norm": 0.041846372187137604,
"learning_rate": 0.0002,
"loss": 0.1776,
"step": 6160
},
{
"epoch": 0.011221267666790186,
"grad_norm": 0.04657486826181412,
"learning_rate": 0.0002,
"loss": 0.0878,
"step": 6170
},
{
"epoch": 0.011239454486347382,
"grad_norm": 0.026520246639847755,
"learning_rate": 0.0002,
"loss": 0.0768,
"step": 6180
},
{
"epoch": 0.011257641305904578,
"grad_norm": 0.10318096727132797,
"learning_rate": 0.0002,
"loss": 0.0617,
"step": 6190
},
{
"epoch": 0.011275828125461775,
"grad_norm": 0.019912905991077423,
"learning_rate": 0.0002,
"loss": 0.0202,
"step": 6200
},
{
"epoch": 0.01129401494501897,
"grad_norm": 0.05316480994224548,
"learning_rate": 0.0002,
"loss": 0.1412,
"step": 6210
},
{
"epoch": 0.011312201764576167,
"grad_norm": 0.02944323979318142,
"learning_rate": 0.0002,
"loss": 0.0829,
"step": 6220
},
{
"epoch": 0.011330388584133363,
"grad_norm": 0.0285831056535244,
"learning_rate": 0.0002,
"loss": 0.074,
"step": 6230
},
{
"epoch": 0.01134857540369056,
"grad_norm": 0.0975700169801712,
"learning_rate": 0.0002,
"loss": 0.0681,
"step": 6240
},
{
"epoch": 0.011366762223247756,
"grad_norm": 0.025717545300722122,
"learning_rate": 0.0002,
"loss": 0.0221,
"step": 6250
},
{
"epoch": 0.011384949042804952,
"grad_norm": 0.02859714813530445,
"learning_rate": 0.0002,
"loss": 0.1142,
"step": 6260
},
{
"epoch": 0.01140313586236215,
"grad_norm": 0.04395005479454994,
"learning_rate": 0.0002,
"loss": 0.0777,
"step": 6270
},
{
"epoch": 0.011421322681919347,
"grad_norm": 0.05116860568523407,
"learning_rate": 0.0002,
"loss": 0.0763,
"step": 6280
},
{
"epoch": 0.011439509501476543,
"grad_norm": 0.06850302964448929,
"learning_rate": 0.0002,
"loss": 0.0632,
"step": 6290
},
{
"epoch": 0.01145769632103374,
"grad_norm": 0.016113542020320892,
"learning_rate": 0.0002,
"loss": 0.0178,
"step": 6300
},
{
"epoch": 0.011475883140590936,
"grad_norm": 0.032306116074323654,
"learning_rate": 0.0002,
"loss": 0.1306,
"step": 6310
},
{
"epoch": 0.011494069960148132,
"grad_norm": 0.055701326578855515,
"learning_rate": 0.0002,
"loss": 0.0834,
"step": 6320
},
{
"epoch": 0.011512256779705328,
"grad_norm": 0.022934190928936005,
"learning_rate": 0.0002,
"loss": 0.0737,
"step": 6330
},
{
"epoch": 0.011530443599262525,
"grad_norm": 0.08375566452741623,
"learning_rate": 0.0002,
"loss": 0.0661,
"step": 6340
},
{
"epoch": 0.011548630418819721,
"grad_norm": 0.013614729046821594,
"learning_rate": 0.0002,
"loss": 0.0187,
"step": 6350
},
{
"epoch": 0.011566817238376917,
"grad_norm": 0.028269700706005096,
"learning_rate": 0.0002,
"loss": 0.1245,
"step": 6360
},
{
"epoch": 0.011585004057934114,
"grad_norm": 0.03646335378289223,
"learning_rate": 0.0002,
"loss": 0.0866,
"step": 6370
},
{
"epoch": 0.01160319087749131,
"grad_norm": 0.0371277742087841,
"learning_rate": 0.0002,
"loss": 0.0737,
"step": 6380
},
{
"epoch": 0.011621377697048506,
"grad_norm": 0.13698458671569824,
"learning_rate": 0.0002,
"loss": 0.0679,
"step": 6390
},
{
"epoch": 0.011639564516605702,
"grad_norm": 0.009350700303912163,
"learning_rate": 0.0002,
"loss": 0.024,
"step": 6400
},
{
"epoch": 0.011657751336162899,
"grad_norm": 0.03187236189842224,
"learning_rate": 0.0002,
"loss": 0.1555,
"step": 6410
},
{
"epoch": 0.011675938155720095,
"grad_norm": 0.06672242283821106,
"learning_rate": 0.0002,
"loss": 0.0835,
"step": 6420
},
{
"epoch": 0.011694124975277291,
"grad_norm": 0.07821471244096756,
"learning_rate": 0.0002,
"loss": 0.0746,
"step": 6430
},
{
"epoch": 0.011712311794834488,
"grad_norm": 0.14781107008457184,
"learning_rate": 0.0002,
"loss": 0.0662,
"step": 6440
},
{
"epoch": 0.011730498614391686,
"grad_norm": 0.0057207453064620495,
"learning_rate": 0.0002,
"loss": 0.0169,
"step": 6450
},
{
"epoch": 0.011748685433948882,
"grad_norm": 0.04252105578780174,
"learning_rate": 0.0002,
"loss": 0.1868,
"step": 6460
},
{
"epoch": 0.011766872253506078,
"grad_norm": 0.05041474476456642,
"learning_rate": 0.0002,
"loss": 0.0842,
"step": 6470
},
{
"epoch": 0.011785059073063275,
"grad_norm": 0.06584125757217407,
"learning_rate": 0.0002,
"loss": 0.0779,
"step": 6480
},
{
"epoch": 0.011803245892620471,
"grad_norm": 0.14610575139522552,
"learning_rate": 0.0002,
"loss": 0.063,
"step": 6490
},
{
"epoch": 0.011821432712177667,
"grad_norm": 0.01419675163924694,
"learning_rate": 0.0002,
"loss": 0.0152,
"step": 6500
},
{
"epoch": 0.011839619531734864,
"grad_norm": 0.03371060639619827,
"learning_rate": 0.0002,
"loss": 0.1725,
"step": 6510
},
{
"epoch": 0.01185780635129206,
"grad_norm": 0.028900766745209694,
"learning_rate": 0.0002,
"loss": 0.0815,
"step": 6520
},
{
"epoch": 0.011875993170849256,
"grad_norm": 0.059519629925489426,
"learning_rate": 0.0002,
"loss": 0.0785,
"step": 6530
},
{
"epoch": 0.011894179990406453,
"grad_norm": 0.12085167318582535,
"learning_rate": 0.0002,
"loss": 0.0615,
"step": 6540
},
{
"epoch": 0.011912366809963649,
"grad_norm": 0.028604619204998016,
"learning_rate": 0.0002,
"loss": 0.0196,
"step": 6550
},
{
"epoch": 0.011930553629520845,
"grad_norm": 0.03659407049417496,
"learning_rate": 0.0002,
"loss": 0.1403,
"step": 6560
},
{
"epoch": 0.011948740449078041,
"grad_norm": 0.034444138407707214,
"learning_rate": 0.0002,
"loss": 0.0812,
"step": 6570
},
{
"epoch": 0.011966927268635238,
"grad_norm": 0.029788263142108917,
"learning_rate": 0.0002,
"loss": 0.0713,
"step": 6580
},
{
"epoch": 0.011985114088192434,
"grad_norm": 0.1271272599697113,
"learning_rate": 0.0002,
"loss": 0.0672,
"step": 6590
},
{
"epoch": 0.01200330090774963,
"grad_norm": 0.018705012276768684,
"learning_rate": 0.0002,
"loss": 0.0212,
"step": 6600
},
{
"epoch": 0.012021487727306827,
"grad_norm": 0.02982541173696518,
"learning_rate": 0.0002,
"loss": 0.1152,
"step": 6610
},
{
"epoch": 0.012039674546864025,
"grad_norm": 0.06942040473222733,
"learning_rate": 0.0002,
"loss": 0.0963,
"step": 6620
},
{
"epoch": 0.012057861366421221,
"grad_norm": 0.06102292984724045,
"learning_rate": 0.0002,
"loss": 0.0775,
"step": 6630
},
{
"epoch": 0.012076048185978417,
"grad_norm": 0.10115987807512283,
"learning_rate": 0.0002,
"loss": 0.0729,
"step": 6640
},
{
"epoch": 0.012094235005535614,
"grad_norm": 0.011439867317676544,
"learning_rate": 0.0002,
"loss": 0.0253,
"step": 6650
},
{
"epoch": 0.01211242182509281,
"grad_norm": 0.062434904277324677,
"learning_rate": 0.0002,
"loss": 0.1166,
"step": 6660
},
{
"epoch": 0.012130608644650006,
"grad_norm": 0.055352553725242615,
"learning_rate": 0.0002,
"loss": 0.0802,
"step": 6670
},
{
"epoch": 0.012148795464207203,
"grad_norm": 0.031538888812065125,
"learning_rate": 0.0002,
"loss": 0.0786,
"step": 6680
},
{
"epoch": 0.012166982283764399,
"grad_norm": 0.10964162647724152,
"learning_rate": 0.0002,
"loss": 0.0626,
"step": 6690
},
{
"epoch": 0.012185169103321595,
"grad_norm": 0.011173764243721962,
"learning_rate": 0.0002,
"loss": 0.0205,
"step": 6700
},
{
"epoch": 0.012203355922878792,
"grad_norm": 0.035984206944704056,
"learning_rate": 0.0002,
"loss": 0.1412,
"step": 6710
},
{
"epoch": 0.012221542742435988,
"grad_norm": 0.07189827412366867,
"learning_rate": 0.0002,
"loss": 0.0818,
"step": 6720
},
{
"epoch": 0.012239729561993184,
"grad_norm": 0.0400136299431324,
"learning_rate": 0.0002,
"loss": 0.0676,
"step": 6730
},
{
"epoch": 0.01225791638155038,
"grad_norm": 0.14700625836849213,
"learning_rate": 0.0002,
"loss": 0.0663,
"step": 6740
},
{
"epoch": 0.012276103201107577,
"grad_norm": 0.007156179752200842,
"learning_rate": 0.0002,
"loss": 0.0171,
"step": 6750
},
{
"epoch": 0.012294290020664773,
"grad_norm": 0.04911777004599571,
"learning_rate": 0.0002,
"loss": 0.1657,
"step": 6760
},
{
"epoch": 0.01231247684022197,
"grad_norm": 0.03729144483804703,
"learning_rate": 0.0002,
"loss": 0.0806,
"step": 6770
},
{
"epoch": 0.012330663659779166,
"grad_norm": 0.037231944501399994,
"learning_rate": 0.0002,
"loss": 0.0736,
"step": 6780
},
{
"epoch": 0.012348850479336364,
"grad_norm": 0.09694401919841766,
"learning_rate": 0.0002,
"loss": 0.0642,
"step": 6790
},
{
"epoch": 0.01236703729889356,
"grad_norm": 0.025534989312291145,
"learning_rate": 0.0002,
"loss": 0.0208,
"step": 6800
},
{
"epoch": 0.012385224118450756,
"grad_norm": 0.033654361963272095,
"learning_rate": 0.0002,
"loss": 0.1295,
"step": 6810
},
{
"epoch": 0.012403410938007953,
"grad_norm": 0.04499521851539612,
"learning_rate": 0.0002,
"loss": 0.0902,
"step": 6820
},
{
"epoch": 0.012421597757565149,
"grad_norm": 0.0335836224257946,
"learning_rate": 0.0002,
"loss": 0.0781,
"step": 6830
},
{
"epoch": 0.012439784577122345,
"grad_norm": 0.1040850430727005,
"learning_rate": 0.0002,
"loss": 0.0679,
"step": 6840
},
{
"epoch": 0.012457971396679542,
"grad_norm": 0.015963764861226082,
"learning_rate": 0.0002,
"loss": 0.0226,
"step": 6850
},
{
"epoch": 0.012476158216236738,
"grad_norm": 0.05578307807445526,
"learning_rate": 0.0002,
"loss": 0.1119,
"step": 6860
},
{
"epoch": 0.012494345035793934,
"grad_norm": 0.0364505760371685,
"learning_rate": 0.0002,
"loss": 0.0805,
"step": 6870
},
{
"epoch": 0.01251253185535113,
"grad_norm": 0.027990469709038734,
"learning_rate": 0.0002,
"loss": 0.0826,
"step": 6880
},
{
"epoch": 0.012530718674908327,
"grad_norm": 0.08282670378684998,
"learning_rate": 0.0002,
"loss": 0.0685,
"step": 6890
},
{
"epoch": 0.012548905494465523,
"grad_norm": 0.02172144502401352,
"learning_rate": 0.0002,
"loss": 0.0259,
"step": 6900
},
{
"epoch": 0.01256709231402272,
"grad_norm": 0.04074740409851074,
"learning_rate": 0.0002,
"loss": 0.1211,
"step": 6910
},
{
"epoch": 0.012585279133579916,
"grad_norm": 0.05433020740747452,
"learning_rate": 0.0002,
"loss": 0.0776,
"step": 6920
},
{
"epoch": 0.012603465953137112,
"grad_norm": 0.05479983240365982,
"learning_rate": 0.0002,
"loss": 0.0778,
"step": 6930
},
{
"epoch": 0.012621652772694308,
"grad_norm": 1.6031180620193481,
"learning_rate": 0.0002,
"loss": 0.2265,
"step": 6940
},
{
"epoch": 0.012639839592251505,
"grad_norm": 1.0940366983413696,
"learning_rate": 0.0002,
"loss": 0.4586,
"step": 6950
},
{
"epoch": 0.012658026411808703,
"grad_norm": 0.0412282720208168,
"learning_rate": 0.0002,
"loss": 0.1072,
"step": 6960
},
{
"epoch": 0.012676213231365899,
"grad_norm": 0.03705910965800285,
"learning_rate": 0.0002,
"loss": 0.1014,
"step": 6970
},
{
"epoch": 0.012694400050923095,
"grad_norm": 0.07444313168525696,
"learning_rate": 0.0002,
"loss": 0.0881,
"step": 6980
},
{
"epoch": 0.012712586870480292,
"grad_norm": 0.08558017760515213,
"learning_rate": 0.0002,
"loss": 0.0619,
"step": 6990
},
{
"epoch": 0.012730773690037488,
"grad_norm": 0.0004157133516855538,
"learning_rate": 0.0002,
"loss": 0.0045,
"step": 7000
},
{
"epoch": 0.012748960509594684,
"grad_norm": 0.07950109243392944,
"learning_rate": 0.0002,
"loss": 0.1801,
"step": 7010
},
{
"epoch": 0.01276714732915188,
"grad_norm": 0.08424151688814163,
"learning_rate": 0.0002,
"loss": 0.0753,
"step": 7020
},
{
"epoch": 0.012785334148709077,
"grad_norm": 0.47635558247566223,
"learning_rate": 0.0002,
"loss": 0.0753,
"step": 7030
},
{
"epoch": 0.012803520968266273,
"grad_norm": 0.0452958345413208,
"learning_rate": 0.0002,
"loss": 0.0731,
"step": 7040
},
{
"epoch": 0.01282170778782347,
"grad_norm": 0.007719043176621199,
"learning_rate": 0.0002,
"loss": 0.0193,
"step": 7050
},
{
"epoch": 0.012839894607380666,
"grad_norm": 0.2408572882413864,
"learning_rate": 0.0002,
"loss": 0.4117,
"step": 7060
},
{
"epoch": 0.012858081426937862,
"grad_norm": 0.7272363305091858,
"learning_rate": 0.0002,
"loss": 0.0852,
"step": 7070
},
{
"epoch": 0.012876268246495058,
"grad_norm": 0.5539261698722839,
"learning_rate": 0.0002,
"loss": 0.075,
"step": 7080
},
{
"epoch": 0.012894455066052255,
"grad_norm": 4.608922481536865,
"learning_rate": 0.0002,
"loss": 0.2301,
"step": 7090
},
{
"epoch": 0.012912641885609451,
"grad_norm": 0.0012216357281431556,
"learning_rate": 0.0002,
"loss": 0.0034,
"step": 7100
},
{
"epoch": 0.012930828705166647,
"grad_norm": 0.15025563538074493,
"learning_rate": 0.0002,
"loss": 0.2717,
"step": 7110
},
{
"epoch": 0.012949015524723844,
"grad_norm": 0.06209970638155937,
"learning_rate": 0.0002,
"loss": 0.0852,
"step": 7120
},
{
"epoch": 0.012967202344281042,
"grad_norm": 0.6127016544342041,
"learning_rate": 0.0002,
"loss": 0.1271,
"step": 7130
},
{
"epoch": 0.012985389163838238,
"grad_norm": 0.047152891755104065,
"learning_rate": 0.0002,
"loss": 0.0626,
"step": 7140
},
{
"epoch": 0.013003575983395434,
"grad_norm": 0.0005132685182616115,
"learning_rate": 0.0002,
"loss": 0.0029,
"step": 7150
},
{
"epoch": 0.01302176280295263,
"grad_norm": 0.08946029096841812,
"learning_rate": 0.0002,
"loss": 0.309,
"step": 7160
},
{
"epoch": 0.013039949622509827,
"grad_norm": 0.18610751628875732,
"learning_rate": 0.0002,
"loss": 0.0867,
"step": 7170
},
{
"epoch": 0.013058136442067023,
"grad_norm": 0.07280854880809784,
"learning_rate": 0.0002,
"loss": 0.0832,
"step": 7180
},
{
"epoch": 0.01307632326162422,
"grad_norm": 0.11997990310192108,
"learning_rate": 0.0002,
"loss": 0.0746,
"step": 7190
},
{
"epoch": 0.013094510081181416,
"grad_norm": 0.00019475500448606908,
"learning_rate": 0.0002,
"loss": 0.01,
"step": 7200
},
{
"epoch": 0.013112696900738612,
"grad_norm": 0.07719916105270386,
"learning_rate": 0.0002,
"loss": 0.3035,
"step": 7210
},
{
"epoch": 0.013130883720295809,
"grad_norm": 0.0990060344338417,
"learning_rate": 0.0002,
"loss": 0.0902,
"step": 7220
},
{
"epoch": 0.013149070539853005,
"grad_norm": 0.22215688228607178,
"learning_rate": 0.0002,
"loss": 0.0797,
"step": 7230
},
{
"epoch": 0.013167257359410201,
"grad_norm": 0.08412040770053864,
"learning_rate": 0.0002,
"loss": 0.0646,
"step": 7240
},
{
"epoch": 0.013185444178967397,
"grad_norm": 0.0017518314998596907,
"learning_rate": 0.0002,
"loss": 0.007,
"step": 7250
},
{
"epoch": 0.013203630998524594,
"grad_norm": 0.1554754078388214,
"learning_rate": 0.0002,
"loss": 0.2319,
"step": 7260
},
{
"epoch": 0.01322181781808179,
"grad_norm": 0.052371326833963394,
"learning_rate": 0.0002,
"loss": 0.0832,
"step": 7270
},
{
"epoch": 0.013240004637638986,
"grad_norm": 0.9168817400932312,
"learning_rate": 0.0002,
"loss": 0.0791,
"step": 7280
},
{
"epoch": 0.013258191457196183,
"grad_norm": 0.07169363647699356,
"learning_rate": 0.0002,
"loss": 0.0602,
"step": 7290
},
{
"epoch": 0.01327637827675338,
"grad_norm": 0.0009911650558933616,
"learning_rate": 0.0002,
"loss": 0.0041,
"step": 7300
},
{
"epoch": 0.013294565096310577,
"grad_norm": 0.2644541263580322,
"learning_rate": 0.0002,
"loss": 0.2193,
"step": 7310
},
{
"epoch": 0.013312751915867773,
"grad_norm": 0.12140689790248871,
"learning_rate": 0.0002,
"loss": 0.0944,
"step": 7320
},
{
"epoch": 0.01333093873542497,
"grad_norm": 0.03627191483974457,
"learning_rate": 0.0002,
"loss": 0.0812,
"step": 7330
},
{
"epoch": 0.013349125554982166,
"grad_norm": 0.06252894550561905,
"learning_rate": 0.0002,
"loss": 0.0596,
"step": 7340
},
{
"epoch": 0.013367312374539362,
"grad_norm": 0.20318441092967987,
"learning_rate": 0.0002,
"loss": 0.0064,
"step": 7350
},
{
"epoch": 0.013385499194096559,
"grad_norm": 0.4231732189655304,
"learning_rate": 0.0002,
"loss": 0.4329,
"step": 7360
},
{
"epoch": 0.013403686013653755,
"grad_norm": 0.07567082345485687,
"learning_rate": 0.0002,
"loss": 0.089,
"step": 7370
},
{
"epoch": 0.013421872833210951,
"grad_norm": 0.23021474480628967,
"learning_rate": 0.0002,
"loss": 0.0833,
"step": 7380
},
{
"epoch": 0.013440059652768148,
"grad_norm": 0.09458985179662704,
"learning_rate": 0.0002,
"loss": 0.1391,
"step": 7390
},
{
"epoch": 0.013458246472325344,
"grad_norm": 0.010052111931145191,
"learning_rate": 0.0002,
"loss": 0.0073,
"step": 7400
},
{
"epoch": 0.01347643329188254,
"grad_norm": 0.2159787267446518,
"learning_rate": 0.0002,
"loss": 0.2249,
"step": 7410
},
{
"epoch": 0.013494620111439736,
"grad_norm": 0.11222853511571884,
"learning_rate": 0.0002,
"loss": 0.093,
"step": 7420
},
{
"epoch": 0.013512806930996933,
"grad_norm": 0.08586139976978302,
"learning_rate": 0.0002,
"loss": 0.0821,
"step": 7430
},
{
"epoch": 0.013530993750554129,
"grad_norm": 0.12232748419046402,
"learning_rate": 0.0002,
"loss": 0.0736,
"step": 7440
},
{
"epoch": 0.013549180570111325,
"grad_norm": 0.006977527402341366,
"learning_rate": 0.0002,
"loss": 0.0115,
"step": 7450
},
{
"epoch": 0.013567367389668522,
"grad_norm": 0.051690369844436646,
"learning_rate": 0.0002,
"loss": 0.2247,
"step": 7460
},
{
"epoch": 0.013585554209225718,
"grad_norm": 0.06542158871889114,
"learning_rate": 0.0002,
"loss": 0.1056,
"step": 7470
},
{
"epoch": 0.013603741028782916,
"grad_norm": 0.18546995520591736,
"learning_rate": 0.0002,
"loss": 0.1102,
"step": 7480
},
{
"epoch": 0.013621927848340112,
"grad_norm": 13.399182319641113,
"learning_rate": 0.0002,
"loss": 2.0806,
"step": 7490
},
{
"epoch": 0.013640114667897309,
"grad_norm": 0.0982588455080986,
"learning_rate": 0.0002,
"loss": 0.2158,
"step": 7500
},
{
"epoch": 0.013658301487454505,
"grad_norm": 0.07860754430294037,
"learning_rate": 0.0002,
"loss": 0.125,
"step": 7510
},
{
"epoch": 0.013676488307011701,
"grad_norm": 0.1165497750043869,
"learning_rate": 0.0002,
"loss": 0.0899,
"step": 7520
},
{
"epoch": 0.013694675126568898,
"grad_norm": 0.2813965380191803,
"learning_rate": 0.0002,
"loss": 0.0758,
"step": 7530
},
{
"epoch": 0.013712861946126094,
"grad_norm": 0.33458462357521057,
"learning_rate": 0.0002,
"loss": 0.0683,
"step": 7540
},
{
"epoch": 0.01373104876568329,
"grad_norm": 0.012062279507517815,
"learning_rate": 0.0002,
"loss": 0.0135,
"step": 7550
},
{
"epoch": 0.013749235585240487,
"grad_norm": 0.1787721961736679,
"learning_rate": 0.0002,
"loss": 0.1763,
"step": 7560
},
{
"epoch": 0.013767422404797683,
"grad_norm": 0.05922751501202583,
"learning_rate": 0.0002,
"loss": 0.1223,
"step": 7570
},
{
"epoch": 0.01378560922435488,
"grad_norm": 0.11594684422016144,
"learning_rate": 0.0002,
"loss": 0.0934,
"step": 7580
},
{
"epoch": 0.013803796043912075,
"grad_norm": 0.2290794998407364,
"learning_rate": 0.0002,
"loss": 0.0799,
"step": 7590
},
{
"epoch": 0.013821982863469272,
"grad_norm": 0.04903063178062439,
"learning_rate": 0.0002,
"loss": 0.0195,
"step": 7600
},
{
"epoch": 0.013840169683026468,
"grad_norm": 123.61300659179688,
"learning_rate": 0.0002,
"loss": 0.8359,
"step": 7610
},
{
"epoch": 0.013858356502583664,
"grad_norm": 0.25403347611427307,
"learning_rate": 0.0002,
"loss": 1.0464,
"step": 7620
},
{
"epoch": 0.01387654332214086,
"grad_norm": 0.08144152164459229,
"learning_rate": 0.0002,
"loss": 0.0794,
"step": 7630
},
{
"epoch": 0.013894730141698057,
"grad_norm": 0.11679713428020477,
"learning_rate": 0.0002,
"loss": 0.0706,
"step": 7640
},
{
"epoch": 0.013912916961255255,
"grad_norm": 0.00391317019239068,
"learning_rate": 0.0002,
"loss": 0.0075,
"step": 7650
},
{
"epoch": 0.013931103780812451,
"grad_norm": 0.13209663331508636,
"learning_rate": 0.0002,
"loss": 0.2228,
"step": 7660
},
{
"epoch": 0.013949290600369648,
"grad_norm": 0.06067880615592003,
"learning_rate": 0.0002,
"loss": 0.089,
"step": 7670
},
{
"epoch": 0.013967477419926844,
"grad_norm": 0.04806550592184067,
"learning_rate": 0.0002,
"loss": 0.0769,
"step": 7680
},
{
"epoch": 0.01398566423948404,
"grad_norm": 0.09506970643997192,
"learning_rate": 0.0002,
"loss": 0.0689,
"step": 7690
},
{
"epoch": 0.014003851059041237,
"grad_norm": 0.002536884741857648,
"learning_rate": 0.0002,
"loss": 0.0172,
"step": 7700
},
{
"epoch": 0.014022037878598433,
"grad_norm": 0.13837113976478577,
"learning_rate": 0.0002,
"loss": 0.2243,
"step": 7710
},
{
"epoch": 0.01404022469815563,
"grad_norm": 0.08101535588502884,
"learning_rate": 0.0002,
"loss": 0.0878,
"step": 7720
},
{
"epoch": 0.014058411517712826,
"grad_norm": 0.04018868878483772,
"learning_rate": 0.0002,
"loss": 0.0776,
"step": 7730
},
{
"epoch": 0.014076598337270022,
"grad_norm": 0.1377197653055191,
"learning_rate": 0.0002,
"loss": 0.0681,
"step": 7740
},
{
"epoch": 0.014094785156827218,
"grad_norm": 0.0006735012284480035,
"learning_rate": 0.0002,
"loss": 0.0041,
"step": 7750
},
{
"epoch": 0.014112971976384414,
"grad_norm": 0.17503094673156738,
"learning_rate": 0.0002,
"loss": 0.3114,
"step": 7760
},
{
"epoch": 0.01413115879594161,
"grad_norm": 0.07190551608800888,
"learning_rate": 0.0002,
"loss": 0.1018,
"step": 7770
},
{
"epoch": 0.014149345615498807,
"grad_norm": 0.036945659667253494,
"learning_rate": 0.0002,
"loss": 0.0785,
"step": 7780
},
{
"epoch": 0.014167532435056003,
"grad_norm": 0.13999724388122559,
"learning_rate": 0.0002,
"loss": 0.0725,
"step": 7790
},
{
"epoch": 0.0141857192546132,
"grad_norm": 0.0031171294394880533,
"learning_rate": 0.0002,
"loss": 0.0144,
"step": 7800
},
{
"epoch": 0.014203906074170396,
"grad_norm": 0.059554051607847214,
"learning_rate": 0.0002,
"loss": 0.2442,
"step": 7810
},
{
"epoch": 0.014222092893727594,
"grad_norm": 0.06873622536659241,
"learning_rate": 0.0002,
"loss": 0.0904,
"step": 7820
},
{
"epoch": 0.01424027971328479,
"grad_norm": 0.11261582374572754,
"learning_rate": 0.0002,
"loss": 0.0758,
"step": 7830
},
{
"epoch": 0.014258466532841987,
"grad_norm": 1.497631311416626,
"learning_rate": 0.0002,
"loss": 0.0689,
"step": 7840
},
{
"epoch": 0.014276653352399183,
"grad_norm": 0.004822546616196632,
"learning_rate": 0.0002,
"loss": 0.0156,
"step": 7850
},
{
"epoch": 0.01429484017195638,
"grad_norm": 0.0575052835047245,
"learning_rate": 0.0002,
"loss": 0.1895,
"step": 7860
},
{
"epoch": 0.014313026991513576,
"grad_norm": 0.10657750070095062,
"learning_rate": 0.0002,
"loss": 0.0855,
"step": 7870
},
{
"epoch": 0.014331213811070772,
"grad_norm": 0.07080844044685364,
"learning_rate": 0.0002,
"loss": 0.0716,
"step": 7880
},
{
"epoch": 0.014349400630627968,
"grad_norm": 0.1628514677286148,
"learning_rate": 0.0002,
"loss": 0.071,
"step": 7890
},
{
"epoch": 0.014367587450185165,
"grad_norm": 0.013860347680747509,
"learning_rate": 0.0002,
"loss": 0.0227,
"step": 7900
},
{
"epoch": 0.014385774269742361,
"grad_norm": 0.5240967869758606,
"learning_rate": 0.0002,
"loss": 0.1854,
"step": 7910
},
{
"epoch": 0.014403961089299557,
"grad_norm": 1.0027457475662231,
"learning_rate": 0.0002,
"loss": 0.0942,
"step": 7920
},
{
"epoch": 0.014422147908856753,
"grad_norm": 0.05730056390166283,
"learning_rate": 0.0002,
"loss": 0.0778,
"step": 7930
},
{
"epoch": 0.01444033472841395,
"grad_norm": 0.1485404521226883,
"learning_rate": 0.0002,
"loss": 0.0719,
"step": 7940
},
{
"epoch": 0.014458521547971146,
"grad_norm": 0.009702637791633606,
"learning_rate": 0.0002,
"loss": 0.0136,
"step": 7950
},
{
"epoch": 0.014476708367528342,
"grad_norm": 0.046543315052986145,
"learning_rate": 0.0002,
"loss": 0.1697,
"step": 7960
},
{
"epoch": 0.014494895187085539,
"grad_norm": 0.05248842388391495,
"learning_rate": 0.0002,
"loss": 0.0888,
"step": 7970
},
{
"epoch": 0.014513082006642735,
"grad_norm": 0.047813788056373596,
"learning_rate": 0.0002,
"loss": 0.0806,
"step": 7980
},
{
"epoch": 0.014531268826199933,
"grad_norm": 0.19744129478931427,
"learning_rate": 0.0002,
"loss": 0.0788,
"step": 7990
},
{
"epoch": 0.01454945564575713,
"grad_norm": 0.005265017040073872,
"learning_rate": 0.0002,
"loss": 0.014,
"step": 8000
},
{
"epoch": 0.014567642465314326,
"grad_norm": 0.0564056858420372,
"learning_rate": 0.0002,
"loss": 0.2681,
"step": 8010
},
{
"epoch": 0.014585829284871522,
"grad_norm": 0.0958496481180191,
"learning_rate": 0.0002,
"loss": 0.0823,
"step": 8020
},
{
"epoch": 0.014604016104428718,
"grad_norm": 0.12000919133424759,
"learning_rate": 0.0002,
"loss": 0.073,
"step": 8030
},
{
"epoch": 0.014622202923985915,
"grad_norm": 0.15912771224975586,
"learning_rate": 0.0002,
"loss": 0.075,
"step": 8040
},
{
"epoch": 0.014640389743543111,
"grad_norm": 0.004025776404887438,
"learning_rate": 0.0002,
"loss": 0.012,
"step": 8050
},
{
"epoch": 0.014658576563100307,
"grad_norm": 0.1682930886745453,
"learning_rate": 0.0002,
"loss": 0.2926,
"step": 8060
},
{
"epoch": 0.014676763382657504,
"grad_norm": 0.057362254709005356,
"learning_rate": 0.0002,
"loss": 0.0869,
"step": 8070
},
{
"epoch": 0.0146949502022147,
"grad_norm": 0.0814078077673912,
"learning_rate": 0.0002,
"loss": 0.0825,
"step": 8080
},
{
"epoch": 0.014713137021771896,
"grad_norm": 0.18205074965953827,
"learning_rate": 0.0002,
"loss": 0.0699,
"step": 8090
},
{
"epoch": 0.014731323841329092,
"grad_norm": 0.013200881890952587,
"learning_rate": 0.0002,
"loss": 0.0228,
"step": 8100
},
{
"epoch": 0.014749510660886289,
"grad_norm": 0.21043474972248077,
"learning_rate": 0.0002,
"loss": 0.2138,
"step": 8110
},
{
"epoch": 0.014767697480443485,
"grad_norm": 0.1000015065073967,
"learning_rate": 0.0002,
"loss": 0.0906,
"step": 8120
},
{
"epoch": 0.014785884300000681,
"grad_norm": 0.045657768845558167,
"learning_rate": 0.0002,
"loss": 0.0826,
"step": 8130
},
{
"epoch": 0.014804071119557878,
"grad_norm": 0.13545630872249603,
"learning_rate": 0.0002,
"loss": 0.066,
"step": 8140
},
{
"epoch": 0.014822257939115074,
"grad_norm": 0.01422254927456379,
"learning_rate": 0.0002,
"loss": 0.0179,
"step": 8150
},
{
"epoch": 0.014840444758672272,
"grad_norm": 0.12108676135540009,
"learning_rate": 0.0002,
"loss": 0.1717,
"step": 8160
},
{
"epoch": 0.014858631578229468,
"grad_norm": 0.10441934317350388,
"learning_rate": 0.0002,
"loss": 0.106,
"step": 8170
},
{
"epoch": 0.014876818397786665,
"grad_norm": 0.08105968683958054,
"learning_rate": 0.0002,
"loss": 0.0829,
"step": 8180
},
{
"epoch": 0.014895005217343861,
"grad_norm": 0.12230301648378372,
"learning_rate": 0.0002,
"loss": 0.0731,
"step": 8190
},
{
"epoch": 0.014913192036901057,
"grad_norm": 0.033857300877571106,
"learning_rate": 0.0002,
"loss": 0.029,
"step": 8200
},
{
"epoch": 0.014931378856458254,
"grad_norm": 0.04827893525362015,
"learning_rate": 0.0002,
"loss": 0.1369,
"step": 8210
},
{
"epoch": 0.01494956567601545,
"grad_norm": 0.056212421506643295,
"learning_rate": 0.0002,
"loss": 0.0879,
"step": 8220
},
{
"epoch": 0.014967752495572646,
"grad_norm": 0.03163846209645271,
"learning_rate": 0.0002,
"loss": 0.0698,
"step": 8230
},
{
"epoch": 0.014985939315129843,
"grad_norm": 0.09394920617341995,
"learning_rate": 0.0002,
"loss": 0.0688,
"step": 8240
},
{
"epoch": 0.015004126134687039,
"grad_norm": 0.024936649948358536,
"learning_rate": 0.0002,
"loss": 0.0211,
"step": 8250
},
{
"epoch": 0.015022312954244235,
"grad_norm": 4.499615669250488,
"learning_rate": 0.0002,
"loss": 2.7596,
"step": 8260
},
{
"epoch": 0.015040499773801431,
"grad_norm": 9.221298217773438,
"learning_rate": 0.0002,
"loss": 0.9135,
"step": 8270
},
{
"epoch": 0.015058686593358628,
"grad_norm": 0.5199778079986572,
"learning_rate": 0.0002,
"loss": 0.1441,
"step": 8280
},
{
"epoch": 0.015076873412915824,
"grad_norm": 0.07028087228536606,
"learning_rate": 0.0002,
"loss": 0.0793,
"step": 8290
},
{
"epoch": 0.01509506023247302,
"grad_norm": 0.0003307730657979846,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 8300
},
{
"epoch": 0.015113247052030217,
"grad_norm": 0.7940683960914612,
"learning_rate": 0.0002,
"loss": 0.7233,
"step": 8310
},
{
"epoch": 0.015131433871587413,
"grad_norm": 0.09774448722600937,
"learning_rate": 0.0002,
"loss": 0.1451,
"step": 8320
},
{
"epoch": 0.015149620691144611,
"grad_norm": 0.3088306188583374,
"learning_rate": 0.0002,
"loss": 0.0986,
"step": 8330
},
{
"epoch": 0.015167807510701807,
"grad_norm": 0.08629265427589417,
"learning_rate": 0.0002,
"loss": 0.0581,
"step": 8340
},
{
"epoch": 0.015185994330259004,
"grad_norm": 0.0011582528240978718,
"learning_rate": 0.0002,
"loss": 0.0008,
"step": 8350
},
{
"epoch": 0.0152041811498162,
"grad_norm": 0.48978063464164734,
"learning_rate": 0.0002,
"loss": 0.5111,
"step": 8360
},
{
"epoch": 0.015222367969373396,
"grad_norm": 0.2633112668991089,
"learning_rate": 0.0002,
"loss": 0.1354,
"step": 8370
},
{
"epoch": 0.015240554788930593,
"grad_norm": 0.058184925466775894,
"learning_rate": 0.0002,
"loss": 0.0963,
"step": 8380
},
{
"epoch": 0.015258741608487789,
"grad_norm": 0.397290974855423,
"learning_rate": 0.0002,
"loss": 0.0915,
"step": 8390
},
{
"epoch": 0.015276928428044985,
"grad_norm": 0.0013334077084437013,
"learning_rate": 0.0002,
"loss": 0.0117,
"step": 8400
},
{
"epoch": 0.015295115247602182,
"grad_norm": 3.2027626037597656,
"learning_rate": 0.0002,
"loss": 0.3642,
"step": 8410
},
{
"epoch": 0.015313302067159378,
"grad_norm": 0.4110456705093384,
"learning_rate": 0.0002,
"loss": 0.1347,
"step": 8420
},
{
"epoch": 0.015331488886716574,
"grad_norm": 0.19789688289165497,
"learning_rate": 0.0002,
"loss": 0.0946,
"step": 8430
},
{
"epoch": 0.01534967570627377,
"grad_norm": 0.15914630889892578,
"learning_rate": 0.0002,
"loss": 0.0619,
"step": 8440
},
{
"epoch": 0.015367862525830967,
"grad_norm": 0.004021051339805126,
"learning_rate": 0.0002,
"loss": 0.0081,
"step": 8450
},
{
"epoch": 0.015386049345388163,
"grad_norm": 0.25250542163848877,
"learning_rate": 0.0002,
"loss": 0.2409,
"step": 8460
},
{
"epoch": 0.01540423616494536,
"grad_norm": 0.16660314798355103,
"learning_rate": 0.0002,
"loss": 0.1041,
"step": 8470
},
{
"epoch": 0.015422422984502556,
"grad_norm": 0.09435573220252991,
"learning_rate": 0.0002,
"loss": 0.0838,
"step": 8480
},
{
"epoch": 0.015440609804059752,
"grad_norm": 0.1622086614370346,
"learning_rate": 0.0002,
"loss": 0.0648,
"step": 8490
},
{
"epoch": 0.015458796623616948,
"grad_norm": 0.002267120871692896,
"learning_rate": 0.0002,
"loss": 0.0057,
"step": 8500
},
{
"epoch": 0.015476983443174146,
"grad_norm": 0.11559420824050903,
"learning_rate": 0.0002,
"loss": 0.2994,
"step": 8510
},
{
"epoch": 0.015495170262731343,
"grad_norm": 0.18291179835796356,
"learning_rate": 0.0002,
"loss": 0.0908,
"step": 8520
},
{
"epoch": 0.015513357082288539,
"grad_norm": 0.14989323914051056,
"learning_rate": 0.0002,
"loss": 0.0912,
"step": 8530
},
{
"epoch": 0.015531543901845735,
"grad_norm": 0.09752708673477173,
"learning_rate": 0.0002,
"loss": 0.0586,
"step": 8540
},
{
"epoch": 0.015549730721402932,
"grad_norm": 0.0005314307054504752,
"learning_rate": 0.0002,
"loss": 0.0026,
"step": 8550
},
{
"epoch": 0.015567917540960128,
"grad_norm": 0.18309178948402405,
"learning_rate": 0.0002,
"loss": 0.3059,
"step": 8560
},
{
"epoch": 0.015586104360517324,
"grad_norm": 0.8144251108169556,
"learning_rate": 0.0002,
"loss": 0.1103,
"step": 8570
},
{
"epoch": 0.01560429118007452,
"grad_norm": 0.0331404022872448,
"learning_rate": 0.0002,
"loss": 0.0898,
"step": 8580
},
{
"epoch": 0.015622477999631717,
"grad_norm": 0.1460132598876953,
"learning_rate": 0.0002,
"loss": 0.0656,
"step": 8590
},
{
"epoch": 0.015640664819188913,
"grad_norm": 0.013606027700006962,
"learning_rate": 0.0002,
"loss": 0.0076,
"step": 8600
},
{
"epoch": 0.01565885163874611,
"grad_norm": 0.22224061191082,
"learning_rate": 0.0002,
"loss": 0.2609,
"step": 8610
},
{
"epoch": 0.015677038458303306,
"grad_norm": 0.22729800641536713,
"learning_rate": 0.0002,
"loss": 0.1028,
"step": 8620
},
{
"epoch": 0.015695225277860502,
"grad_norm": 0.0848810002207756,
"learning_rate": 0.0002,
"loss": 0.0871,
"step": 8630
},
{
"epoch": 0.0157134120974177,
"grad_norm": 0.17896370589733124,
"learning_rate": 0.0002,
"loss": 0.0636,
"step": 8640
},
{
"epoch": 0.015731598916974895,
"grad_norm": 0.006263076793402433,
"learning_rate": 0.0002,
"loss": 0.0068,
"step": 8650
},
{
"epoch": 0.01574978573653209,
"grad_norm": 0.29927679896354675,
"learning_rate": 0.0002,
"loss": 0.2761,
"step": 8660
},
{
"epoch": 0.015767972556089287,
"grad_norm": 0.05662700906395912,
"learning_rate": 0.0002,
"loss": 0.1029,
"step": 8670
},
{
"epoch": 0.015786159375646484,
"grad_norm": 0.09140895307064056,
"learning_rate": 0.0002,
"loss": 0.0854,
"step": 8680
},
{
"epoch": 0.01580434619520368,
"grad_norm": 0.21034927666187286,
"learning_rate": 0.0002,
"loss": 0.0691,
"step": 8690
},
{
"epoch": 0.015822533014760876,
"grad_norm": 0.0010229075560346246,
"learning_rate": 0.0002,
"loss": 0.0033,
"step": 8700
},
{
"epoch": 0.015840719834318073,
"grad_norm": 0.0626237690448761,
"learning_rate": 0.0002,
"loss": 0.3583,
"step": 8710
},
{
"epoch": 0.01585890665387527,
"grad_norm": 0.10027278959751129,
"learning_rate": 0.0002,
"loss": 0.0959,
"step": 8720
},
{
"epoch": 0.015877093473432465,
"grad_norm": 0.0870286151766777,
"learning_rate": 0.0002,
"loss": 0.0767,
"step": 8730
},
{
"epoch": 0.01589528029298966,
"grad_norm": 0.16106969118118286,
"learning_rate": 0.0002,
"loss": 0.0763,
"step": 8740
},
{
"epoch": 0.015913467112546858,
"grad_norm": 0.0022529088892042637,
"learning_rate": 0.0002,
"loss": 0.0108,
"step": 8750
},
{
"epoch": 0.015931653932104058,
"grad_norm": 0.06070050224661827,
"learning_rate": 0.0002,
"loss": 0.2606,
"step": 8760
},
{
"epoch": 0.015949840751661254,
"grad_norm": 0.09406338632106781,
"learning_rate": 0.0002,
"loss": 0.1062,
"step": 8770
},
{
"epoch": 0.01596802757121845,
"grad_norm": 0.1367248147726059,
"learning_rate": 0.0002,
"loss": 0.0757,
"step": 8780
},
{
"epoch": 0.015986214390775647,
"grad_norm": 0.26938319206237793,
"learning_rate": 0.0002,
"loss": 0.0733,
"step": 8790
},
{
"epoch": 0.016004401210332843,
"grad_norm": 0.011559409089386463,
"learning_rate": 0.0002,
"loss": 0.0176,
"step": 8800
},
{
"epoch": 0.01602258802989004,
"grad_norm": 0.12351766228675842,
"learning_rate": 0.0002,
"loss": 0.24,
"step": 8810
},
{
"epoch": 0.016040774849447235,
"grad_norm": 0.08965809643268585,
"learning_rate": 0.0002,
"loss": 0.0947,
"step": 8820
},
{
"epoch": 0.016058961669004432,
"grad_norm": 0.027005961164832115,
"learning_rate": 0.0002,
"loss": 0.0738,
"step": 8830
},
{
"epoch": 0.016077148488561628,
"grad_norm": 0.18656685948371887,
"learning_rate": 0.0002,
"loss": 0.0667,
"step": 8840
},
{
"epoch": 0.016095335308118824,
"grad_norm": 0.003148626768961549,
"learning_rate": 0.0002,
"loss": 0.0119,
"step": 8850
},
{
"epoch": 0.01611352212767602,
"grad_norm": 0.07959452271461487,
"learning_rate": 0.0002,
"loss": 0.275,
"step": 8860
},
{
"epoch": 0.016131708947233217,
"grad_norm": 0.19433775544166565,
"learning_rate": 0.0002,
"loss": 0.0872,
"step": 8870
},
{
"epoch": 0.016149895766790413,
"grad_norm": 0.1376393735408783,
"learning_rate": 0.0002,
"loss": 0.0779,
"step": 8880
},
{
"epoch": 0.01616808258634761,
"grad_norm": 0.18282419443130493,
"learning_rate": 0.0002,
"loss": 0.068,
"step": 8890
},
{
"epoch": 0.016186269405904806,
"grad_norm": 0.0112565653398633,
"learning_rate": 0.0002,
"loss": 0.0137,
"step": 8900
},
{
"epoch": 0.016204456225462002,
"grad_norm": 0.08975637704133987,
"learning_rate": 0.0002,
"loss": 0.23,
"step": 8910
},
{
"epoch": 0.0162226430450192,
"grad_norm": 0.19316238164901733,
"learning_rate": 0.0002,
"loss": 0.0975,
"step": 8920
},
{
"epoch": 0.016240829864576395,
"grad_norm": 0.1870724856853485,
"learning_rate": 0.0002,
"loss": 0.0862,
"step": 8930
},
{
"epoch": 0.01625901668413359,
"grad_norm": 0.19031721353530884,
"learning_rate": 0.0002,
"loss": 0.0735,
"step": 8940
},
{
"epoch": 0.016277203503690788,
"grad_norm": 0.015979783609509468,
"learning_rate": 0.0002,
"loss": 0.0249,
"step": 8950
},
{
"epoch": 0.016295390323247984,
"grad_norm": 0.09105712175369263,
"learning_rate": 0.0002,
"loss": 0.1573,
"step": 8960
},
{
"epoch": 0.01631357714280518,
"grad_norm": 0.13035650551319122,
"learning_rate": 0.0002,
"loss": 0.0958,
"step": 8970
},
{
"epoch": 0.016331763962362376,
"grad_norm": 0.18613573908805847,
"learning_rate": 0.0002,
"loss": 0.088,
"step": 8980
},
{
"epoch": 0.016349950781919573,
"grad_norm": 0.2518664300441742,
"learning_rate": 0.0002,
"loss": 0.0725,
"step": 8990
},
{
"epoch": 0.01636813760147677,
"grad_norm": 0.03324449062347412,
"learning_rate": 0.0002,
"loss": 0.0256,
"step": 9000
},
{
"epoch": 0.016386324421033965,
"grad_norm": 0.08766523003578186,
"learning_rate": 0.0002,
"loss": 0.1531,
"step": 9010
},
{
"epoch": 0.01640451124059116,
"grad_norm": 0.14177583158016205,
"learning_rate": 0.0002,
"loss": 0.0861,
"step": 9020
},
{
"epoch": 0.016422698060148358,
"grad_norm": 0.1354762315750122,
"learning_rate": 0.0002,
"loss": 0.0782,
"step": 9030
},
{
"epoch": 0.016440884879705554,
"grad_norm": 0.15894347429275513,
"learning_rate": 0.0002,
"loss": 0.073,
"step": 9040
},
{
"epoch": 0.01645907169926275,
"grad_norm": 0.02154761180281639,
"learning_rate": 0.0002,
"loss": 0.0156,
"step": 9050
},
{
"epoch": 0.016477258518819947,
"grad_norm": 0.06432317197322845,
"learning_rate": 0.0002,
"loss": 0.1384,
"step": 9060
},
{
"epoch": 0.016495445338377143,
"grad_norm": 0.12112505733966827,
"learning_rate": 0.0002,
"loss": 0.093,
"step": 9070
},
{
"epoch": 0.01651363215793434,
"grad_norm": 0.10628003627061844,
"learning_rate": 0.0002,
"loss": 0.0738,
"step": 9080
},
{
"epoch": 0.016531818977491536,
"grad_norm": 0.1930958330631256,
"learning_rate": 0.0002,
"loss": 0.0678,
"step": 9090
},
{
"epoch": 0.016550005797048732,
"grad_norm": 0.03878525644540787,
"learning_rate": 0.0002,
"loss": 0.0235,
"step": 9100
},
{
"epoch": 0.016568192616605932,
"grad_norm": 0.0920896977186203,
"learning_rate": 0.0002,
"loss": 0.1661,
"step": 9110
},
{
"epoch": 0.016586379436163128,
"grad_norm": 0.11687818914651871,
"learning_rate": 0.0002,
"loss": 0.0847,
"step": 9120
},
{
"epoch": 0.016604566255720325,
"grad_norm": 0.10511167347431183,
"learning_rate": 0.0002,
"loss": 0.0832,
"step": 9130
},
{
"epoch": 0.01662275307527752,
"grad_norm": 0.26365017890930176,
"learning_rate": 0.0002,
"loss": 0.0717,
"step": 9140
},
{
"epoch": 0.016640939894834717,
"grad_norm": 0.02445841394364834,
"learning_rate": 0.0002,
"loss": 0.0233,
"step": 9150
},
{
"epoch": 0.016659126714391913,
"grad_norm": 0.08213133364915848,
"learning_rate": 0.0002,
"loss": 0.1439,
"step": 9160
},
{
"epoch": 0.01667731353394911,
"grad_norm": 0.17025598883628845,
"learning_rate": 0.0002,
"loss": 0.0852,
"step": 9170
},
{
"epoch": 0.016695500353506306,
"grad_norm": 0.098059743642807,
"learning_rate": 0.0002,
"loss": 0.0761,
"step": 9180
},
{
"epoch": 0.016713687173063502,
"grad_norm": 0.18436011672019958,
"learning_rate": 0.0002,
"loss": 0.0674,
"step": 9190
},
{
"epoch": 0.0167318739926207,
"grad_norm": 0.011012010276317596,
"learning_rate": 0.0002,
"loss": 0.0221,
"step": 9200
},
{
"epoch": 0.016750060812177895,
"grad_norm": 0.07544030994176865,
"learning_rate": 0.0002,
"loss": 0.161,
"step": 9210
},
{
"epoch": 0.01676824763173509,
"grad_norm": 0.16041946411132812,
"learning_rate": 0.0002,
"loss": 0.0824,
"step": 9220
},
{
"epoch": 0.016786434451292288,
"grad_norm": 0.17295844852924347,
"learning_rate": 0.0002,
"loss": 0.0797,
"step": 9230
},
{
"epoch": 0.016804621270849484,
"grad_norm": 0.1818791776895523,
"learning_rate": 0.0002,
"loss": 0.0683,
"step": 9240
},
{
"epoch": 0.01682280809040668,
"grad_norm": 0.019515013322234154,
"learning_rate": 0.0002,
"loss": 0.0188,
"step": 9250
},
{
"epoch": 0.016840994909963877,
"grad_norm": 0.15059705078601837,
"learning_rate": 0.0002,
"loss": 0.1743,
"step": 9260
},
{
"epoch": 0.016859181729521073,
"grad_norm": 0.1481601595878601,
"learning_rate": 0.0002,
"loss": 0.0906,
"step": 9270
},
{
"epoch": 0.01687736854907827,
"grad_norm": 0.07433108985424042,
"learning_rate": 0.0002,
"loss": 0.08,
"step": 9280
},
{
"epoch": 0.016895555368635466,
"grad_norm": 0.1752692312002182,
"learning_rate": 0.0002,
"loss": 0.06,
"step": 9290
},
{
"epoch": 0.016913742188192662,
"grad_norm": 0.027612384408712387,
"learning_rate": 0.0002,
"loss": 0.0157,
"step": 9300
},
{
"epoch": 0.016931929007749858,
"grad_norm": 0.08575212955474854,
"learning_rate": 0.0002,
"loss": 0.1679,
"step": 9310
},
{
"epoch": 0.016950115827307054,
"grad_norm": 0.11127147823572159,
"learning_rate": 0.0002,
"loss": 0.0848,
"step": 9320
},
{
"epoch": 0.01696830264686425,
"grad_norm": 0.08989393711090088,
"learning_rate": 0.0002,
"loss": 0.0823,
"step": 9330
},
{
"epoch": 0.016986489466421447,
"grad_norm": 0.18898548185825348,
"learning_rate": 0.0002,
"loss": 0.0687,
"step": 9340
},
{
"epoch": 0.017004676285978643,
"grad_norm": 0.023646721616387367,
"learning_rate": 0.0002,
"loss": 0.0244,
"step": 9350
},
{
"epoch": 0.01702286310553584,
"grad_norm": 0.11511775106191635,
"learning_rate": 0.0002,
"loss": 0.1642,
"step": 9360
},
{
"epoch": 0.017041049925093036,
"grad_norm": 0.1458021104335785,
"learning_rate": 0.0002,
"loss": 0.084,
"step": 9370
},
{
"epoch": 0.017059236744650232,
"grad_norm": 0.060528095811605453,
"learning_rate": 0.0002,
"loss": 0.0809,
"step": 9380
},
{
"epoch": 0.01707742356420743,
"grad_norm": 0.16314280033111572,
"learning_rate": 0.0002,
"loss": 0.0661,
"step": 9390
},
{
"epoch": 0.017095610383764625,
"grad_norm": 0.03078557923436165,
"learning_rate": 0.0002,
"loss": 0.015,
"step": 9400
},
{
"epoch": 0.01711379720332182,
"grad_norm": 0.11488370597362518,
"learning_rate": 0.0002,
"loss": 0.1712,
"step": 9410
},
{
"epoch": 0.017131984022879018,
"grad_norm": 0.0972781702876091,
"learning_rate": 0.0002,
"loss": 0.0856,
"step": 9420
},
{
"epoch": 0.017150170842436214,
"grad_norm": 0.08523645251989365,
"learning_rate": 0.0002,
"loss": 0.0744,
"step": 9430
},
{
"epoch": 0.01716835766199341,
"grad_norm": 0.18629521131515503,
"learning_rate": 0.0002,
"loss": 0.0659,
"step": 9440
},
{
"epoch": 0.01718654448155061,
"grad_norm": 0.00908618327230215,
"learning_rate": 0.0002,
"loss": 0.0219,
"step": 9450
},
{
"epoch": 0.017204731301107806,
"grad_norm": 0.05552325397729874,
"learning_rate": 0.0002,
"loss": 0.1377,
"step": 9460
},
{
"epoch": 0.017222918120665003,
"grad_norm": 0.16133128106594086,
"learning_rate": 0.0002,
"loss": 0.0885,
"step": 9470
},
{
"epoch": 0.0172411049402222,
"grad_norm": 0.0965205654501915,
"learning_rate": 0.0002,
"loss": 0.0713,
"step": 9480
},
{
"epoch": 0.017259291759779395,
"grad_norm": 0.21675604581832886,
"learning_rate": 0.0002,
"loss": 0.0658,
"step": 9490
},
{
"epoch": 0.01727747857933659,
"grad_norm": 0.043898243457078934,
"learning_rate": 0.0002,
"loss": 0.0213,
"step": 9500
},
{
"epoch": 0.017295665398893788,
"grad_norm": 0.0968618243932724,
"learning_rate": 0.0002,
"loss": 0.1391,
"step": 9510
},
{
"epoch": 0.017313852218450984,
"grad_norm": 0.15061378479003906,
"learning_rate": 0.0002,
"loss": 0.0879,
"step": 9520
},
{
"epoch": 0.01733203903800818,
"grad_norm": 0.08481590449810028,
"learning_rate": 0.0002,
"loss": 0.0771,
"step": 9530
},
{
"epoch": 0.017350225857565377,
"grad_norm": 0.20935995876789093,
"learning_rate": 0.0002,
"loss": 0.0705,
"step": 9540
},
{
"epoch": 0.017368412677122573,
"grad_norm": 0.04010302573442459,
"learning_rate": 0.0002,
"loss": 0.0257,
"step": 9550
},
{
"epoch": 0.01738659949667977,
"grad_norm": 0.10532956570386887,
"learning_rate": 0.0002,
"loss": 0.1528,
"step": 9560
},
{
"epoch": 0.017404786316236966,
"grad_norm": 0.1484638750553131,
"learning_rate": 0.0002,
"loss": 0.0847,
"step": 9570
},
{
"epoch": 0.017422973135794162,
"grad_norm": 0.05873465910553932,
"learning_rate": 0.0002,
"loss": 0.0765,
"step": 9580
},
{
"epoch": 0.01744115995535136,
"grad_norm": 0.1689092516899109,
"learning_rate": 0.0002,
"loss": 0.0673,
"step": 9590
},
{
"epoch": 0.017459346774908555,
"grad_norm": 0.014237391762435436,
"learning_rate": 0.0002,
"loss": 0.0165,
"step": 9600
},
{
"epoch": 0.01747753359446575,
"grad_norm": 0.06250491738319397,
"learning_rate": 0.0002,
"loss": 0.1635,
"step": 9610
},
{
"epoch": 0.017495720414022947,
"grad_norm": 0.08895017951726913,
"learning_rate": 0.0002,
"loss": 0.0765,
"step": 9620
},
{
"epoch": 0.017513907233580144,
"grad_norm": 0.08614445477724075,
"learning_rate": 0.0002,
"loss": 0.0852,
"step": 9630
},
{
"epoch": 0.01753209405313734,
"grad_norm": 0.25440698862075806,
"learning_rate": 0.0002,
"loss": 0.0735,
"step": 9640
},
{
"epoch": 0.017550280872694536,
"grad_norm": 0.015447271056473255,
"learning_rate": 0.0002,
"loss": 0.0199,
"step": 9650
},
{
"epoch": 0.017568467692251732,
"grad_norm": 0.08685171604156494,
"learning_rate": 0.0002,
"loss": 0.1721,
"step": 9660
},
{
"epoch": 0.01758665451180893,
"grad_norm": 0.1007658839225769,
"learning_rate": 0.0002,
"loss": 0.0858,
"step": 9670
},
{
"epoch": 0.017604841331366125,
"grad_norm": 0.1291055977344513,
"learning_rate": 0.0002,
"loss": 0.0817,
"step": 9680
},
{
"epoch": 0.01762302815092332,
"grad_norm": 0.21103522181510925,
"learning_rate": 0.0002,
"loss": 0.0707,
"step": 9690
},
{
"epoch": 0.017641214970480518,
"grad_norm": 0.027955593541264534,
"learning_rate": 0.0002,
"loss": 0.0199,
"step": 9700
},
{
"epoch": 0.017659401790037714,
"grad_norm": 0.06710019707679749,
"learning_rate": 0.0002,
"loss": 0.1623,
"step": 9710
},
{
"epoch": 0.01767758860959491,
"grad_norm": 0.09083720296621323,
"learning_rate": 0.0002,
"loss": 0.0845,
"step": 9720
},
{
"epoch": 0.017695775429152107,
"grad_norm": 0.07230041921138763,
"learning_rate": 0.0002,
"loss": 0.0767,
"step": 9730
},
{
"epoch": 0.017713962248709303,
"grad_norm": 0.19016912579536438,
"learning_rate": 0.0002,
"loss": 0.0648,
"step": 9740
},
{
"epoch": 0.0177321490682665,
"grad_norm": 0.03999534249305725,
"learning_rate": 0.0002,
"loss": 0.0216,
"step": 9750
},
{
"epoch": 0.017750335887823696,
"grad_norm": 0.08057496696710587,
"learning_rate": 0.0002,
"loss": 0.1251,
"step": 9760
},
{
"epoch": 0.017768522707380892,
"grad_norm": 0.16494789719581604,
"learning_rate": 0.0002,
"loss": 0.0896,
"step": 9770
},
{
"epoch": 0.017786709526938088,
"grad_norm": 0.07119818776845932,
"learning_rate": 0.0002,
"loss": 0.0749,
"step": 9780
},
{
"epoch": 0.017804896346495288,
"grad_norm": 0.1790028065443039,
"learning_rate": 0.0002,
"loss": 0.0718,
"step": 9790
},
{
"epoch": 0.017823083166052484,
"grad_norm": 0.055643875151872635,
"learning_rate": 0.0002,
"loss": 0.0294,
"step": 9800
},
{
"epoch": 0.01784126998560968,
"grad_norm": 0.15530900657176971,
"learning_rate": 0.0002,
"loss": 0.1343,
"step": 9810
},
{
"epoch": 0.017859456805166877,
"grad_norm": 0.08989892899990082,
"learning_rate": 0.0002,
"loss": 0.0778,
"step": 9820
},
{
"epoch": 0.017877643624724073,
"grad_norm": 0.038054581731557846,
"learning_rate": 0.0002,
"loss": 0.0842,
"step": 9830
},
{
"epoch": 0.01789583044428127,
"grad_norm": 0.12264154851436615,
"learning_rate": 0.0002,
"loss": 0.065,
"step": 9840
},
{
"epoch": 0.017914017263838466,
"grad_norm": 0.03432893753051758,
"learning_rate": 0.0002,
"loss": 0.02,
"step": 9850
},
{
"epoch": 0.017932204083395662,
"grad_norm": 0.0516468770802021,
"learning_rate": 0.0002,
"loss": 0.1339,
"step": 9860
},
{
"epoch": 0.01795039090295286,
"grad_norm": 0.11306226998567581,
"learning_rate": 0.0002,
"loss": 0.0842,
"step": 9870
},
{
"epoch": 0.017968577722510055,
"grad_norm": 0.051579318940639496,
"learning_rate": 0.0002,
"loss": 0.0795,
"step": 9880
},
{
"epoch": 0.01798676454206725,
"grad_norm": 0.19050930440425873,
"learning_rate": 0.0002,
"loss": 0.0673,
"step": 9890
},
{
"epoch": 0.018004951361624447,
"grad_norm": 0.015286738984286785,
"learning_rate": 0.0002,
"loss": 0.0169,
"step": 9900
},
{
"epoch": 0.018023138181181644,
"grad_norm": 0.16055025160312653,
"learning_rate": 0.0002,
"loss": 0.1655,
"step": 9910
},
{
"epoch": 0.01804132500073884,
"grad_norm": 0.05445674806833267,
"learning_rate": 0.0002,
"loss": 0.0786,
"step": 9920
},
{
"epoch": 0.018059511820296036,
"grad_norm": 0.07221481204032898,
"learning_rate": 0.0002,
"loss": 0.0726,
"step": 9930
},
{
"epoch": 0.018077698639853233,
"grad_norm": 0.15800146758556366,
"learning_rate": 0.0002,
"loss": 0.0607,
"step": 9940
},
{
"epoch": 0.01809588545941043,
"grad_norm": 0.007713336031883955,
"learning_rate": 0.0002,
"loss": 0.0148,
"step": 9950
},
{
"epoch": 0.018114072278967625,
"grad_norm": 0.04677269607782364,
"learning_rate": 0.0002,
"loss": 0.1718,
"step": 9960
},
{
"epoch": 0.01813225909852482,
"grad_norm": 0.1699189841747284,
"learning_rate": 0.0002,
"loss": 0.0865,
"step": 9970
},
{
"epoch": 0.018150445918082018,
"grad_norm": 0.04046279937028885,
"learning_rate": 0.0002,
"loss": 0.0781,
"step": 9980
},
{
"epoch": 0.018168632737639214,
"grad_norm": 0.164504736661911,
"learning_rate": 0.0002,
"loss": 0.0645,
"step": 9990
},
{
"epoch": 0.01818681955719641,
"grad_norm": 0.014479747042059898,
"learning_rate": 0.0002,
"loss": 0.0186,
"step": 10000
},
{
"epoch": 0.018205006376753607,
"grad_norm": 0.051388438791036606,
"learning_rate": 0.0002,
"loss": 0.1414,
"step": 10010
},
{
"epoch": 0.018223193196310803,
"grad_norm": 0.11734543740749359,
"learning_rate": 0.0002,
"loss": 0.0894,
"step": 10020
},
{
"epoch": 0.018241380015868,
"grad_norm": 0.022312749177217484,
"learning_rate": 0.0002,
"loss": 0.0775,
"step": 10030
},
{
"epoch": 0.018259566835425196,
"grad_norm": 0.1579144448041916,
"learning_rate": 0.0002,
"loss": 0.0668,
"step": 10040
},
{
"epoch": 0.018277753654982392,
"grad_norm": 0.02757895737886429,
"learning_rate": 0.0002,
"loss": 0.0197,
"step": 10050
},
{
"epoch": 0.01829594047453959,
"grad_norm": 0.07557844370603561,
"learning_rate": 0.0002,
"loss": 0.1526,
"step": 10060
},
{
"epoch": 0.018314127294096785,
"grad_norm": 0.1216227188706398,
"learning_rate": 0.0002,
"loss": 0.0871,
"step": 10070
},
{
"epoch": 0.01833231411365398,
"grad_norm": 0.04201141744852066,
"learning_rate": 0.0002,
"loss": 0.0723,
"step": 10080
},
{
"epoch": 0.018350500933211177,
"grad_norm": 0.151902437210083,
"learning_rate": 0.0002,
"loss": 0.063,
"step": 10090
},
{
"epoch": 0.018368687752768374,
"grad_norm": 0.028730260208249092,
"learning_rate": 0.0002,
"loss": 0.0154,
"step": 10100
},
{
"epoch": 0.01838687457232557,
"grad_norm": 0.0815989300608635,
"learning_rate": 0.0002,
"loss": 0.1439,
"step": 10110
},
{
"epoch": 0.018405061391882766,
"grad_norm": 0.16359028220176697,
"learning_rate": 0.0002,
"loss": 0.0901,
"step": 10120
},
{
"epoch": 0.018423248211439962,
"grad_norm": 0.055030226707458496,
"learning_rate": 0.0002,
"loss": 0.0812,
"step": 10130
},
{
"epoch": 0.018441435030997162,
"grad_norm": 0.17064853012561798,
"learning_rate": 0.0002,
"loss": 0.0713,
"step": 10140
},
{
"epoch": 0.01845962185055436,
"grad_norm": 0.024902408942580223,
"learning_rate": 0.0002,
"loss": 0.0201,
"step": 10150
},
{
"epoch": 0.018477808670111555,
"grad_norm": 0.037377748638391495,
"learning_rate": 0.0002,
"loss": 0.1394,
"step": 10160
},
{
"epoch": 0.01849599548966875,
"grad_norm": 0.14072410762310028,
"learning_rate": 0.0002,
"loss": 0.088,
"step": 10170
},
{
"epoch": 0.018514182309225947,
"grad_norm": 0.07339414954185486,
"learning_rate": 0.0002,
"loss": 0.0739,
"step": 10180
},
{
"epoch": 0.018532369128783144,
"grad_norm": 0.166766956448555,
"learning_rate": 0.0002,
"loss": 0.0648,
"step": 10190
},
{
"epoch": 0.01855055594834034,
"grad_norm": 0.009605699218809605,
"learning_rate": 0.0002,
"loss": 0.0148,
"step": 10200
},
{
"epoch": 0.018568742767897536,
"grad_norm": 0.045747216790914536,
"learning_rate": 0.0002,
"loss": 0.1426,
"step": 10210
},
{
"epoch": 0.018586929587454733,
"grad_norm": 0.09927495568990707,
"learning_rate": 0.0002,
"loss": 0.0757,
"step": 10220
},
{
"epoch": 0.01860511640701193,
"grad_norm": 0.032050736248493195,
"learning_rate": 0.0002,
"loss": 0.0732,
"step": 10230
},
{
"epoch": 0.018623303226569125,
"grad_norm": 0.14915086328983307,
"learning_rate": 0.0002,
"loss": 0.0619,
"step": 10240
},
{
"epoch": 0.01864149004612632,
"grad_norm": 0.019674960523843765,
"learning_rate": 0.0002,
"loss": 0.0176,
"step": 10250
},
{
"epoch": 0.018659676865683518,
"grad_norm": 0.0990150198340416,
"learning_rate": 0.0002,
"loss": 0.156,
"step": 10260
},
{
"epoch": 0.018677863685240714,
"grad_norm": 0.1409665048122406,
"learning_rate": 0.0002,
"loss": 0.0843,
"step": 10270
},
{
"epoch": 0.01869605050479791,
"grad_norm": 0.0232121329754591,
"learning_rate": 0.0002,
"loss": 0.0712,
"step": 10280
},
{
"epoch": 0.018714237324355107,
"grad_norm": 0.14811532199382782,
"learning_rate": 0.0002,
"loss": 0.0649,
"step": 10290
},
{
"epoch": 0.018732424143912303,
"grad_norm": 0.025812385603785515,
"learning_rate": 0.0002,
"loss": 0.0191,
"step": 10300
},
{
"epoch": 0.0187506109634695,
"grad_norm": 0.03710811957716942,
"learning_rate": 0.0002,
"loss": 0.1323,
"step": 10310
},
{
"epoch": 0.018768797783026696,
"grad_norm": 0.16586032509803772,
"learning_rate": 0.0002,
"loss": 0.0868,
"step": 10320
},
{
"epoch": 0.018786984602583892,
"grad_norm": 0.09154761582612991,
"learning_rate": 0.0002,
"loss": 0.0762,
"step": 10330
},
{
"epoch": 0.01880517142214109,
"grad_norm": 0.20400644838809967,
"learning_rate": 0.0002,
"loss": 0.072,
"step": 10340
},
{
"epoch": 0.018823358241698285,
"grad_norm": 0.04426256939768791,
"learning_rate": 0.0002,
"loss": 0.0281,
"step": 10350
},
{
"epoch": 0.01884154506125548,
"grad_norm": 0.10118848830461502,
"learning_rate": 0.0002,
"loss": 0.116,
"step": 10360
},
{
"epoch": 0.018859731880812677,
"grad_norm": 0.11934473365545273,
"learning_rate": 0.0002,
"loss": 0.0779,
"step": 10370
},
{
"epoch": 0.018877918700369874,
"grad_norm": 0.04116957262158394,
"learning_rate": 0.0002,
"loss": 0.0811,
"step": 10380
},
{
"epoch": 0.01889610551992707,
"grad_norm": 0.16668827831745148,
"learning_rate": 0.0002,
"loss": 0.064,
"step": 10390
},
{
"epoch": 0.018914292339484266,
"grad_norm": 0.04703928530216217,
"learning_rate": 0.0002,
"loss": 0.0189,
"step": 10400
},
{
"epoch": 0.018932479159041463,
"grad_norm": 0.10670439153909683,
"learning_rate": 0.0002,
"loss": 0.1329,
"step": 10410
},
{
"epoch": 0.01895066597859866,
"grad_norm": 0.033486492931842804,
"learning_rate": 0.0002,
"loss": 0.0812,
"step": 10420
},
{
"epoch": 0.018968852798155855,
"grad_norm": 0.03778929263353348,
"learning_rate": 0.0002,
"loss": 0.0739,
"step": 10430
},
{
"epoch": 0.01898703961771305,
"grad_norm": 0.1499231606721878,
"learning_rate": 0.0002,
"loss": 0.0691,
"step": 10440
},
{
"epoch": 0.019005226437270248,
"grad_norm": 0.020496509969234467,
"learning_rate": 0.0002,
"loss": 0.0166,
"step": 10450
},
{
"epoch": 0.019023413256827444,
"grad_norm": 0.07973606884479523,
"learning_rate": 0.0002,
"loss": 0.1647,
"step": 10460
},
{
"epoch": 0.01904160007638464,
"grad_norm": 0.2187214344739914,
"learning_rate": 0.0002,
"loss": 0.0851,
"step": 10470
},
{
"epoch": 0.01905978689594184,
"grad_norm": 0.05374719575047493,
"learning_rate": 0.0002,
"loss": 0.0763,
"step": 10480
},
{
"epoch": 0.019077973715499037,
"grad_norm": 0.20388802886009216,
"learning_rate": 0.0002,
"loss": 0.0661,
"step": 10490
},
{
"epoch": 0.019096160535056233,
"grad_norm": 0.023114027455449104,
"learning_rate": 0.0002,
"loss": 0.0196,
"step": 10500
},
{
"epoch": 0.01911434735461343,
"grad_norm": 0.07263924926519394,
"learning_rate": 0.0002,
"loss": 0.1397,
"step": 10510
},
{
"epoch": 0.019132534174170625,
"grad_norm": 0.13590484857559204,
"learning_rate": 0.0002,
"loss": 0.0866,
"step": 10520
},
{
"epoch": 0.019150720993727822,
"grad_norm": 0.03279007971286774,
"learning_rate": 0.0002,
"loss": 0.0845,
"step": 10530
},
{
"epoch": 0.019168907813285018,
"grad_norm": 0.16929341852664948,
"learning_rate": 0.0002,
"loss": 0.0638,
"step": 10540
},
{
"epoch": 0.019187094632842214,
"grad_norm": 0.043504901230335236,
"learning_rate": 0.0002,
"loss": 0.0217,
"step": 10550
},
{
"epoch": 0.01920528145239941,
"grad_norm": 0.05582214519381523,
"learning_rate": 0.0002,
"loss": 0.1454,
"step": 10560
},
{
"epoch": 0.019223468271956607,
"grad_norm": 0.12112174928188324,
"learning_rate": 0.0002,
"loss": 0.0773,
"step": 10570
},
{
"epoch": 0.019241655091513803,
"grad_norm": 0.028584860265254974,
"learning_rate": 0.0002,
"loss": 0.0766,
"step": 10580
},
{
"epoch": 0.019259841911071,
"grad_norm": 0.14817841351032257,
"learning_rate": 0.0002,
"loss": 0.07,
"step": 10590
},
{
"epoch": 0.019278028730628196,
"grad_norm": 0.0354049950838089,
"learning_rate": 0.0002,
"loss": 0.0205,
"step": 10600
},
{
"epoch": 0.019296215550185392,
"grad_norm": 0.0580359622836113,
"learning_rate": 0.0002,
"loss": 0.126,
"step": 10610
},
{
"epoch": 0.01931440236974259,
"grad_norm": 0.1495518982410431,
"learning_rate": 0.0002,
"loss": 0.0759,
"step": 10620
},
{
"epoch": 0.019332589189299785,
"grad_norm": 0.029057197272777557,
"learning_rate": 0.0002,
"loss": 0.0751,
"step": 10630
},
{
"epoch": 0.01935077600885698,
"grad_norm": 0.17057828605175018,
"learning_rate": 0.0002,
"loss": 0.0675,
"step": 10640
},
{
"epoch": 0.019368962828414178,
"grad_norm": 0.029123524203896523,
"learning_rate": 0.0002,
"loss": 0.0207,
"step": 10650
},
{
"epoch": 0.019387149647971374,
"grad_norm": 0.06929099559783936,
"learning_rate": 0.0002,
"loss": 0.1272,
"step": 10660
},
{
"epoch": 0.01940533646752857,
"grad_norm": 0.0806749165058136,
"learning_rate": 0.0002,
"loss": 0.0825,
"step": 10670
},
{
"epoch": 0.019423523287085766,
"grad_norm": 0.025454839691519737,
"learning_rate": 0.0002,
"loss": 0.0786,
"step": 10680
},
{
"epoch": 0.019441710106642963,
"grad_norm": 0.1879327893257141,
"learning_rate": 0.0002,
"loss": 0.0664,
"step": 10690
},
{
"epoch": 0.01945989692620016,
"grad_norm": 0.03334587439894676,
"learning_rate": 0.0002,
"loss": 0.0217,
"step": 10700
},
{
"epoch": 0.019478083745757355,
"grad_norm": 0.05760979652404785,
"learning_rate": 0.0002,
"loss": 0.141,
"step": 10710
},
{
"epoch": 0.01949627056531455,
"grad_norm": 0.03565089777112007,
"learning_rate": 0.0002,
"loss": 0.0849,
"step": 10720
},
{
"epoch": 0.019514457384871748,
"grad_norm": 0.1484966278076172,
"learning_rate": 0.0002,
"loss": 0.0839,
"step": 10730
},
{
"epoch": 0.019532644204428944,
"grad_norm": 0.22200991213321686,
"learning_rate": 0.0002,
"loss": 0.0673,
"step": 10740
},
{
"epoch": 0.01955083102398614,
"grad_norm": 0.017915472388267517,
"learning_rate": 0.0002,
"loss": 0.0203,
"step": 10750
},
{
"epoch": 0.019569017843543337,
"grad_norm": 0.11213338375091553,
"learning_rate": 0.0002,
"loss": 0.126,
"step": 10760
},
{
"epoch": 0.019587204663100533,
"grad_norm": 0.1563912183046341,
"learning_rate": 0.0002,
"loss": 0.0803,
"step": 10770
},
{
"epoch": 0.01960539148265773,
"grad_norm": 0.02315036952495575,
"learning_rate": 0.0002,
"loss": 0.0801,
"step": 10780
},
{
"epoch": 0.019623578302214926,
"grad_norm": 0.14482071995735168,
"learning_rate": 0.0002,
"loss": 0.0701,
"step": 10790
},
{
"epoch": 0.019641765121772122,
"grad_norm": 0.0369495190680027,
"learning_rate": 0.0002,
"loss": 0.022,
"step": 10800
},
{
"epoch": 0.01965995194132932,
"grad_norm": 0.0659516304731369,
"learning_rate": 0.0002,
"loss": 0.1282,
"step": 10810
},
{
"epoch": 0.019678138760886515,
"grad_norm": 0.09046377241611481,
"learning_rate": 0.0002,
"loss": 0.0748,
"step": 10820
},
{
"epoch": 0.019696325580443715,
"grad_norm": 0.05669049918651581,
"learning_rate": 0.0002,
"loss": 0.0808,
"step": 10830
},
{
"epoch": 0.01971451240000091,
"grad_norm": 0.16696439683437347,
"learning_rate": 0.0002,
"loss": 0.0696,
"step": 10840
},
{
"epoch": 0.019732699219558107,
"grad_norm": 0.02596648782491684,
"learning_rate": 0.0002,
"loss": 0.0189,
"step": 10850
},
{
"epoch": 0.019750886039115303,
"grad_norm": 0.030568787828087807,
"learning_rate": 0.0002,
"loss": 0.1431,
"step": 10860
},
{
"epoch": 0.0197690728586725,
"grad_norm": 0.11519906669855118,
"learning_rate": 0.0002,
"loss": 0.0832,
"step": 10870
},
{
"epoch": 0.019787259678229696,
"grad_norm": 0.12018325924873352,
"learning_rate": 0.0002,
"loss": 0.0769,
"step": 10880
},
{
"epoch": 0.019805446497786892,
"grad_norm": 0.15875691175460815,
"learning_rate": 0.0002,
"loss": 0.0679,
"step": 10890
},
{
"epoch": 0.01982363331734409,
"grad_norm": 0.02812560275197029,
"learning_rate": 0.0002,
"loss": 0.0236,
"step": 10900
},
{
"epoch": 0.019841820136901285,
"grad_norm": 0.039342913776636124,
"learning_rate": 0.0002,
"loss": 0.1433,
"step": 10910
},
{
"epoch": 0.01986000695645848,
"grad_norm": 0.1218978762626648,
"learning_rate": 0.0002,
"loss": 0.0785,
"step": 10920
},
{
"epoch": 0.019878193776015678,
"grad_norm": 0.02437124028801918,
"learning_rate": 0.0002,
"loss": 0.081,
"step": 10930
},
{
"epoch": 0.019896380595572874,
"grad_norm": 0.16295987367630005,
"learning_rate": 0.0002,
"loss": 0.0615,
"step": 10940
},
{
"epoch": 0.01991456741513007,
"grad_norm": 0.03147517144680023,
"learning_rate": 0.0002,
"loss": 0.0167,
"step": 10950
},
{
"epoch": 0.019932754234687267,
"grad_norm": 0.051139310002326965,
"learning_rate": 0.0002,
"loss": 0.1486,
"step": 10960
},
{
"epoch": 0.019950941054244463,
"grad_norm": 0.10385333746671677,
"learning_rate": 0.0002,
"loss": 0.0835,
"step": 10970
},
{
"epoch": 0.01996912787380166,
"grad_norm": 0.029570510610938072,
"learning_rate": 0.0002,
"loss": 0.0808,
"step": 10980
},
{
"epoch": 0.019987314693358856,
"grad_norm": 0.1457994282245636,
"learning_rate": 0.0002,
"loss": 0.062,
"step": 10990
},
{
"epoch": 0.020005501512916052,
"grad_norm": 0.013582763262093067,
"learning_rate": 0.0002,
"loss": 0.0149,
"step": 11000
},
{
"epoch": 0.020023688332473248,
"grad_norm": 0.13736847043037415,
"learning_rate": 0.0002,
"loss": 0.164,
"step": 11010
},
{
"epoch": 0.020041875152030444,
"grad_norm": 0.146778866648674,
"learning_rate": 0.0002,
"loss": 0.0865,
"step": 11020
},
{
"epoch": 0.02006006197158764,
"grad_norm": 0.09848106652498245,
"learning_rate": 0.0002,
"loss": 0.0785,
"step": 11030
},
{
"epoch": 0.020078248791144837,
"grad_norm": 0.19981160759925842,
"learning_rate": 0.0002,
"loss": 0.0738,
"step": 11040
},
{
"epoch": 0.020096435610702033,
"grad_norm": 0.0248726736754179,
"learning_rate": 0.0002,
"loss": 0.0232,
"step": 11050
},
{
"epoch": 0.02011462243025923,
"grad_norm": 0.09688897430896759,
"learning_rate": 0.0002,
"loss": 0.1361,
"step": 11060
},
{
"epoch": 0.020132809249816426,
"grad_norm": 0.09953918308019638,
"learning_rate": 0.0002,
"loss": 0.0815,
"step": 11070
},
{
"epoch": 0.020150996069373622,
"grad_norm": 0.05801590532064438,
"learning_rate": 0.0002,
"loss": 0.0794,
"step": 11080
},
{
"epoch": 0.02016918288893082,
"grad_norm": 0.2029600441455841,
"learning_rate": 0.0002,
"loss": 0.0661,
"step": 11090
},
{
"epoch": 0.020187369708488015,
"grad_norm": 0.026677627116441727,
"learning_rate": 0.0002,
"loss": 0.022,
"step": 11100
},
{
"epoch": 0.02020555652804521,
"grad_norm": 0.054907217621803284,
"learning_rate": 0.0002,
"loss": 0.1356,
"step": 11110
},
{
"epoch": 0.020223743347602408,
"grad_norm": 0.16302120685577393,
"learning_rate": 0.0002,
"loss": 0.0721,
"step": 11120
},
{
"epoch": 0.020241930167159604,
"grad_norm": 0.03393812105059624,
"learning_rate": 0.0002,
"loss": 0.0733,
"step": 11130
},
{
"epoch": 0.0202601169867168,
"grad_norm": 0.16455304622650146,
"learning_rate": 0.0002,
"loss": 0.062,
"step": 11140
},
{
"epoch": 0.020278303806273996,
"grad_norm": 0.026239484548568726,
"learning_rate": 0.0002,
"loss": 0.0148,
"step": 11150
},
{
"epoch": 0.020296490625831193,
"grad_norm": 0.10048040002584457,
"learning_rate": 0.0002,
"loss": 0.1398,
"step": 11160
},
{
"epoch": 0.020314677445388393,
"grad_norm": 0.14221400022506714,
"learning_rate": 0.0002,
"loss": 0.0782,
"step": 11170
},
{
"epoch": 0.02033286426494559,
"grad_norm": 0.08432412147521973,
"learning_rate": 0.0002,
"loss": 0.0807,
"step": 11180
},
{
"epoch": 0.020351051084502785,
"grad_norm": 0.172295480966568,
"learning_rate": 0.0002,
"loss": 0.0655,
"step": 11190
},
{
"epoch": 0.02036923790405998,
"grad_norm": 0.023976756259799004,
"learning_rate": 0.0002,
"loss": 0.0218,
"step": 11200
},
{
"epoch": 0.020387424723617178,
"grad_norm": 0.03286349028348923,
"learning_rate": 0.0002,
"loss": 0.1441,
"step": 11210
},
{
"epoch": 0.020405611543174374,
"grad_norm": 0.04403531551361084,
"learning_rate": 0.0002,
"loss": 0.0825,
"step": 11220
},
{
"epoch": 0.02042379836273157,
"grad_norm": 0.0398452989757061,
"learning_rate": 0.0002,
"loss": 0.0755,
"step": 11230
},
{
"epoch": 0.020441985182288767,
"grad_norm": 0.15185104310512543,
"learning_rate": 0.0002,
"loss": 0.0591,
"step": 11240
},
{
"epoch": 0.020460172001845963,
"grad_norm": 0.005839187186211348,
"learning_rate": 0.0002,
"loss": 0.0172,
"step": 11250
},
{
"epoch": 0.02047835882140316,
"grad_norm": 0.031195368617773056,
"learning_rate": 0.0002,
"loss": 0.1594,
"step": 11260
},
{
"epoch": 0.020496545640960356,
"grad_norm": 0.1997426599264145,
"learning_rate": 0.0002,
"loss": 0.0797,
"step": 11270
},
{
"epoch": 0.020514732460517552,
"grad_norm": 0.03075752593576908,
"learning_rate": 0.0002,
"loss": 0.0798,
"step": 11280
},
{
"epoch": 0.02053291928007475,
"grad_norm": 0.17717675864696503,
"learning_rate": 0.0002,
"loss": 0.0654,
"step": 11290
},
{
"epoch": 0.020551106099631945,
"grad_norm": 0.036260057240724564,
"learning_rate": 0.0002,
"loss": 0.0196,
"step": 11300
},
{
"epoch": 0.02056929291918914,
"grad_norm": 0.11961262673139572,
"learning_rate": 0.0002,
"loss": 0.1313,
"step": 11310
},
{
"epoch": 0.020587479738746337,
"grad_norm": 0.12344212830066681,
"learning_rate": 0.0002,
"loss": 0.0798,
"step": 11320
},
{
"epoch": 0.020605666558303534,
"grad_norm": 0.12796273827552795,
"learning_rate": 0.0002,
"loss": 0.0693,
"step": 11330
},
{
"epoch": 0.02062385337786073,
"grad_norm": 0.12038332223892212,
"learning_rate": 0.0002,
"loss": 0.0637,
"step": 11340
},
{
"epoch": 0.020642040197417926,
"grad_norm": 0.013724497519433498,
"learning_rate": 0.0002,
"loss": 0.0134,
"step": 11350
},
{
"epoch": 0.020660227016975122,
"grad_norm": 0.030014917254447937,
"learning_rate": 0.0002,
"loss": 0.1355,
"step": 11360
},
{
"epoch": 0.02067841383653232,
"grad_norm": 0.05455614998936653,
"learning_rate": 0.0002,
"loss": 0.0824,
"step": 11370
},
{
"epoch": 0.020696600656089515,
"grad_norm": 0.09036605060100555,
"learning_rate": 0.0002,
"loss": 0.0712,
"step": 11380
},
{
"epoch": 0.02071478747564671,
"grad_norm": 0.15607796609401703,
"learning_rate": 0.0002,
"loss": 0.0613,
"step": 11390
},
{
"epoch": 0.020732974295203908,
"grad_norm": 0.029900453984737396,
"learning_rate": 0.0002,
"loss": 0.0216,
"step": 11400
},
{
"epoch": 0.020751161114761104,
"grad_norm": 0.06108042970299721,
"learning_rate": 0.0002,
"loss": 0.1223,
"step": 11410
},
{
"epoch": 0.0207693479343183,
"grad_norm": 0.052377600222826004,
"learning_rate": 0.0002,
"loss": 0.0795,
"step": 11420
},
{
"epoch": 0.020787534753875497,
"grad_norm": 0.063735231757164,
"learning_rate": 0.0002,
"loss": 0.0746,
"step": 11430
},
{
"epoch": 0.020805721573432693,
"grad_norm": 0.16977328062057495,
"learning_rate": 0.0002,
"loss": 0.0634,
"step": 11440
},
{
"epoch": 0.02082390839298989,
"grad_norm": 0.04451785981655121,
"learning_rate": 0.0002,
"loss": 0.0298,
"step": 11450
},
{
"epoch": 0.020842095212547086,
"grad_norm": 1.1584863662719727,
"learning_rate": 0.0002,
"loss": 0.1133,
"step": 11460
},
{
"epoch": 0.020860282032104282,
"grad_norm": 0.09867832064628601,
"learning_rate": 0.0002,
"loss": 0.0774,
"step": 11470
},
{
"epoch": 0.020878468851661478,
"grad_norm": 0.05493566766381264,
"learning_rate": 0.0002,
"loss": 0.0752,
"step": 11480
},
{
"epoch": 0.020896655671218674,
"grad_norm": 0.2149093896150589,
"learning_rate": 0.0002,
"loss": 0.0682,
"step": 11490
},
{
"epoch": 0.02091484249077587,
"grad_norm": 0.02243107184767723,
"learning_rate": 0.0002,
"loss": 0.0191,
"step": 11500
},
{
"epoch": 0.02093302931033307,
"grad_norm": 0.27817150950431824,
"learning_rate": 0.0002,
"loss": 0.1658,
"step": 11510
},
{
"epoch": 0.020951216129890267,
"grad_norm": 0.14467410743236542,
"learning_rate": 0.0002,
"loss": 0.083,
"step": 11520
},
{
"epoch": 0.020969402949447463,
"grad_norm": 0.1027064323425293,
"learning_rate": 0.0002,
"loss": 0.0825,
"step": 11530
},
{
"epoch": 0.02098758976900466,
"grad_norm": 0.2156657725572586,
"learning_rate": 0.0002,
"loss": 0.0694,
"step": 11540
},
{
"epoch": 0.021005776588561856,
"grad_norm": 0.023746902123093605,
"learning_rate": 0.0002,
"loss": 0.024,
"step": 11550
},
{
"epoch": 0.021023963408119052,
"grad_norm": 0.19738778471946716,
"learning_rate": 0.0002,
"loss": 0.1473,
"step": 11560
},
{
"epoch": 0.02104215022767625,
"grad_norm": 0.19759760797023773,
"learning_rate": 0.0002,
"loss": 0.078,
"step": 11570
},
{
"epoch": 0.021060337047233445,
"grad_norm": 9.88092041015625,
"learning_rate": 0.0002,
"loss": 0.0866,
"step": 11580
},
{
"epoch": 0.02107852386679064,
"grad_norm": 0.22301238775253296,
"learning_rate": 0.0002,
"loss": 0.0685,
"step": 11590
},
{
"epoch": 0.021096710686347837,
"grad_norm": 0.023191403597593307,
"learning_rate": 0.0002,
"loss": 0.0468,
"step": 11600
},
{
"epoch": 0.021114897505905034,
"grad_norm": 0.10442623496055603,
"learning_rate": 0.0002,
"loss": 0.2046,
"step": 11610
},
{
"epoch": 0.02113308432546223,
"grad_norm": 0.18771864473819733,
"learning_rate": 0.0002,
"loss": 0.0805,
"step": 11620
},
{
"epoch": 0.021151271145019426,
"grad_norm": 0.05516243353486061,
"learning_rate": 0.0002,
"loss": 0.0795,
"step": 11630
},
{
"epoch": 0.021169457964576623,
"grad_norm": 0.21308554708957672,
"learning_rate": 0.0002,
"loss": 0.0725,
"step": 11640
},
{
"epoch": 0.02118764478413382,
"grad_norm": 0.010607315227389336,
"learning_rate": 0.0002,
"loss": 0.0241,
"step": 11650
},
{
"epoch": 0.021205831603691015,
"grad_norm": 0.0542677640914917,
"learning_rate": 0.0002,
"loss": 0.1648,
"step": 11660
},
{
"epoch": 0.02122401842324821,
"grad_norm": 0.11239166557788849,
"learning_rate": 0.0002,
"loss": 0.0825,
"step": 11670
},
{
"epoch": 0.021242205242805408,
"grad_norm": 0.032700493931770325,
"learning_rate": 0.0002,
"loss": 0.0727,
"step": 11680
},
{
"epoch": 0.021260392062362604,
"grad_norm": 0.2005159705877304,
"learning_rate": 0.0002,
"loss": 0.0708,
"step": 11690
},
{
"epoch": 0.0212785788819198,
"grad_norm": 0.01741277053952217,
"learning_rate": 0.0002,
"loss": 0.0232,
"step": 11700
},
{
"epoch": 0.021296765701476997,
"grad_norm": 0.04048267379403114,
"learning_rate": 0.0002,
"loss": 0.1403,
"step": 11710
},
{
"epoch": 0.021314952521034193,
"grad_norm": 0.18796616792678833,
"learning_rate": 0.0002,
"loss": 0.0886,
"step": 11720
},
{
"epoch": 0.02133313934059139,
"grad_norm": 0.06360754370689392,
"learning_rate": 0.0002,
"loss": 0.0731,
"step": 11730
},
{
"epoch": 0.021351326160148586,
"grad_norm": 0.14168913662433624,
"learning_rate": 0.0002,
"loss": 0.0622,
"step": 11740
},
{
"epoch": 0.021369512979705782,
"grad_norm": 0.012988853268325329,
"learning_rate": 0.0002,
"loss": 0.0144,
"step": 11750
},
{
"epoch": 0.02138769979926298,
"grad_norm": 0.09176674485206604,
"learning_rate": 0.0002,
"loss": 0.1574,
"step": 11760
},
{
"epoch": 0.021405886618820175,
"grad_norm": 0.11934395134449005,
"learning_rate": 0.0002,
"loss": 0.079,
"step": 11770
},
{
"epoch": 0.02142407343837737,
"grad_norm": 0.11853605508804321,
"learning_rate": 0.0002,
"loss": 0.076,
"step": 11780
},
{
"epoch": 0.021442260257934567,
"grad_norm": 0.1625816971063614,
"learning_rate": 0.0002,
"loss": 0.0649,
"step": 11790
},
{
"epoch": 0.021460447077491764,
"grad_norm": 0.023221928626298904,
"learning_rate": 0.0002,
"loss": 0.0228,
"step": 11800
},
{
"epoch": 0.02147863389704896,
"grad_norm": 0.0494253933429718,
"learning_rate": 0.0002,
"loss": 0.1418,
"step": 11810
},
{
"epoch": 0.021496820716606156,
"grad_norm": 0.18250688910484314,
"learning_rate": 0.0002,
"loss": 0.0827,
"step": 11820
},
{
"epoch": 0.021515007536163352,
"grad_norm": 0.13340160250663757,
"learning_rate": 0.0002,
"loss": 0.0794,
"step": 11830
},
{
"epoch": 0.02153319435572055,
"grad_norm": 0.15497778356075287,
"learning_rate": 0.0002,
"loss": 0.0613,
"step": 11840
},
{
"epoch": 0.021551381175277745,
"grad_norm": 0.03259354829788208,
"learning_rate": 0.0002,
"loss": 0.023,
"step": 11850
},
{
"epoch": 0.021569567994834945,
"grad_norm": 0.09126435220241547,
"learning_rate": 0.0002,
"loss": 0.1235,
"step": 11860
},
{
"epoch": 0.02158775481439214,
"grad_norm": 0.13455496728420258,
"learning_rate": 0.0002,
"loss": 0.0806,
"step": 11870
},
{
"epoch": 0.021605941633949338,
"grad_norm": 0.10817539691925049,
"learning_rate": 0.0002,
"loss": 0.0829,
"step": 11880
},
{
"epoch": 0.021624128453506534,
"grad_norm": 0.1913878321647644,
"learning_rate": 0.0002,
"loss": 0.0636,
"step": 11890
},
{
"epoch": 0.02164231527306373,
"grad_norm": 0.025634530931711197,
"learning_rate": 0.0002,
"loss": 0.0216,
"step": 11900
},
{
"epoch": 0.021660502092620926,
"grad_norm": 0.10507725924253464,
"learning_rate": 0.0002,
"loss": 0.1326,
"step": 11910
},
{
"epoch": 0.021678688912178123,
"grad_norm": 0.09721452742815018,
"learning_rate": 0.0002,
"loss": 0.0857,
"step": 11920
},
{
"epoch": 0.02169687573173532,
"grad_norm": 0.028759269043803215,
"learning_rate": 0.0002,
"loss": 0.0751,
"step": 11930
},
{
"epoch": 0.021715062551292515,
"grad_norm": 0.17618104815483093,
"learning_rate": 0.0002,
"loss": 0.062,
"step": 11940
},
{
"epoch": 0.02173324937084971,
"grad_norm": 0.02503124624490738,
"learning_rate": 0.0002,
"loss": 0.0182,
"step": 11950
},
{
"epoch": 0.021751436190406908,
"grad_norm": 0.10976126044988632,
"learning_rate": 0.0002,
"loss": 0.1564,
"step": 11960
},
{
"epoch": 0.021769623009964104,
"grad_norm": 0.0833989605307579,
"learning_rate": 0.0002,
"loss": 0.0784,
"step": 11970
},
{
"epoch": 0.0217878098295213,
"grad_norm": 0.06359647959470749,
"learning_rate": 0.0002,
"loss": 0.0795,
"step": 11980
},
{
"epoch": 0.021805996649078497,
"grad_norm": 0.1677824705839157,
"learning_rate": 0.0002,
"loss": 0.0699,
"step": 11990
},
{
"epoch": 0.021824183468635693,
"grad_norm": 0.018009621649980545,
"learning_rate": 0.0002,
"loss": 0.0185,
"step": 12000
},
{
"epoch": 0.02184237028819289,
"grad_norm": 0.12256644666194916,
"learning_rate": 0.0002,
"loss": 0.1839,
"step": 12010
},
{
"epoch": 0.021860557107750086,
"grad_norm": 0.11677028983831406,
"learning_rate": 0.0002,
"loss": 0.0829,
"step": 12020
},
{
"epoch": 0.021878743927307282,
"grad_norm": 0.12885046005249023,
"learning_rate": 0.0002,
"loss": 0.0812,
"step": 12030
},
{
"epoch": 0.02189693074686448,
"grad_norm": 0.1394425481557846,
"learning_rate": 0.0002,
"loss": 0.0668,
"step": 12040
},
{
"epoch": 0.021915117566421675,
"grad_norm": 0.024974076077342033,
"learning_rate": 0.0002,
"loss": 0.0192,
"step": 12050
},
{
"epoch": 0.02193330438597887,
"grad_norm": 0.11284986138343811,
"learning_rate": 0.0002,
"loss": 0.1492,
"step": 12060
},
{
"epoch": 0.021951491205536067,
"grad_norm": 0.0605492927134037,
"learning_rate": 0.0002,
"loss": 0.0787,
"step": 12070
},
{
"epoch": 0.021969678025093264,
"grad_norm": 0.040298718959093094,
"learning_rate": 0.0002,
"loss": 0.0778,
"step": 12080
},
{
"epoch": 0.02198786484465046,
"grad_norm": 0.1555332988500595,
"learning_rate": 0.0002,
"loss": 0.0683,
"step": 12090
},
{
"epoch": 0.022006051664207656,
"grad_norm": 0.022474724799394608,
"learning_rate": 0.0002,
"loss": 0.0139,
"step": 12100
},
{
"epoch": 0.022024238483764853,
"grad_norm": 0.08212363719940186,
"learning_rate": 0.0002,
"loss": 0.1513,
"step": 12110
},
{
"epoch": 0.02204242530332205,
"grad_norm": 0.16297335922718048,
"learning_rate": 0.0002,
"loss": 0.087,
"step": 12120
},
{
"epoch": 0.022060612122879245,
"grad_norm": 0.026817265897989273,
"learning_rate": 0.0002,
"loss": 0.0763,
"step": 12130
},
{
"epoch": 0.02207879894243644,
"grad_norm": 0.15199647843837738,
"learning_rate": 0.0002,
"loss": 0.0632,
"step": 12140
},
{
"epoch": 0.022096985761993638,
"grad_norm": 0.021619049832224846,
"learning_rate": 0.0002,
"loss": 0.0221,
"step": 12150
},
{
"epoch": 0.022115172581550834,
"grad_norm": 0.071327805519104,
"learning_rate": 0.0002,
"loss": 0.138,
"step": 12160
},
{
"epoch": 0.02213335940110803,
"grad_norm": 0.07506705075502396,
"learning_rate": 0.0002,
"loss": 0.0802,
"step": 12170
},
{
"epoch": 0.022151546220665227,
"grad_norm": 0.05193526670336723,
"learning_rate": 0.0002,
"loss": 0.0726,
"step": 12180
},
{
"epoch": 0.022169733040222423,
"grad_norm": 0.125730961561203,
"learning_rate": 0.0002,
"loss": 0.0658,
"step": 12190
},
{
"epoch": 0.022187919859779623,
"grad_norm": 0.01939002424478531,
"learning_rate": 0.0002,
"loss": 0.0174,
"step": 12200
},
{
"epoch": 0.02220610667933682,
"grad_norm": 0.05645585432648659,
"learning_rate": 0.0002,
"loss": 0.1447,
"step": 12210
},
{
"epoch": 0.022224293498894016,
"grad_norm": 0.12416274845600128,
"learning_rate": 0.0002,
"loss": 0.0727,
"step": 12220
},
{
"epoch": 0.022242480318451212,
"grad_norm": 0.05618545040488243,
"learning_rate": 0.0002,
"loss": 0.0801,
"step": 12230
},
{
"epoch": 0.022260667138008408,
"grad_norm": 0.12334968894720078,
"learning_rate": 0.0002,
"loss": 0.0598,
"step": 12240
},
{
"epoch": 0.022278853957565604,
"grad_norm": 0.024331970140337944,
"learning_rate": 0.0002,
"loss": 0.0179,
"step": 12250
},
{
"epoch": 0.0222970407771228,
"grad_norm": 0.05856281518936157,
"learning_rate": 0.0002,
"loss": 0.126,
"step": 12260
},
{
"epoch": 0.022315227596679997,
"grad_norm": 0.07432300597429276,
"learning_rate": 0.0002,
"loss": 0.0839,
"step": 12270
},
{
"epoch": 0.022333414416237193,
"grad_norm": 0.07249715179204941,
"learning_rate": 0.0002,
"loss": 0.0815,
"step": 12280
},
{
"epoch": 0.02235160123579439,
"grad_norm": 0.14335612952709198,
"learning_rate": 0.0002,
"loss": 0.0605,
"step": 12290
},
{
"epoch": 0.022369788055351586,
"grad_norm": 0.03603110462427139,
"learning_rate": 0.0002,
"loss": 0.0185,
"step": 12300
},
{
"epoch": 0.022387974874908782,
"grad_norm": 0.08532091230154037,
"learning_rate": 0.0002,
"loss": 0.1339,
"step": 12310
},
{
"epoch": 0.02240616169446598,
"grad_norm": 0.13663236796855927,
"learning_rate": 0.0002,
"loss": 0.0761,
"step": 12320
},
{
"epoch": 0.022424348514023175,
"grad_norm": 0.10088011622428894,
"learning_rate": 0.0002,
"loss": 0.0737,
"step": 12330
},
{
"epoch": 0.02244253533358037,
"grad_norm": 0.17186152935028076,
"learning_rate": 0.0002,
"loss": 0.0661,
"step": 12340
},
{
"epoch": 0.022460722153137568,
"grad_norm": 0.01941334828734398,
"learning_rate": 0.0002,
"loss": 0.0135,
"step": 12350
},
{
"epoch": 0.022478908972694764,
"grad_norm": 0.12438862770795822,
"learning_rate": 0.0002,
"loss": 0.1474,
"step": 12360
},
{
"epoch": 0.02249709579225196,
"grad_norm": 0.08050791174173355,
"learning_rate": 0.0002,
"loss": 0.0792,
"step": 12370
},
{
"epoch": 0.022515282611809156,
"grad_norm": 0.04660952091217041,
"learning_rate": 0.0002,
"loss": 0.0826,
"step": 12380
},
{
"epoch": 0.022533469431366353,
"grad_norm": 0.16433311998844147,
"learning_rate": 0.0002,
"loss": 0.0691,
"step": 12390
},
{
"epoch": 0.02255165625092355,
"grad_norm": 0.04376552626490593,
"learning_rate": 0.0002,
"loss": 0.0219,
"step": 12400
},
{
"epoch": 0.022569843070480745,
"grad_norm": 0.06648654490709305,
"learning_rate": 0.0002,
"loss": 0.1346,
"step": 12410
},
{
"epoch": 0.02258802989003794,
"grad_norm": 0.11318199336528778,
"learning_rate": 0.0002,
"loss": 0.0736,
"step": 12420
},
{
"epoch": 0.022606216709595138,
"grad_norm": 0.0922408252954483,
"learning_rate": 0.0002,
"loss": 0.0819,
"step": 12430
},
{
"epoch": 0.022624403529152334,
"grad_norm": 0.1696896106004715,
"learning_rate": 0.0002,
"loss": 0.0642,
"step": 12440
},
{
"epoch": 0.02264259034870953,
"grad_norm": 0.03212421387434006,
"learning_rate": 0.0002,
"loss": 0.0247,
"step": 12450
},
{
"epoch": 0.022660777168266727,
"grad_norm": 0.12295889109373093,
"learning_rate": 0.0002,
"loss": 0.1504,
"step": 12460
},
{
"epoch": 0.022678963987823923,
"grad_norm": 0.10351194441318512,
"learning_rate": 0.0002,
"loss": 0.0757,
"step": 12470
},
{
"epoch": 0.02269715080738112,
"grad_norm": 0.022580118849873543,
"learning_rate": 0.0002,
"loss": 0.0756,
"step": 12480
},
{
"epoch": 0.022715337626938316,
"grad_norm": 0.16330066323280334,
"learning_rate": 0.0002,
"loss": 0.0645,
"step": 12490
},
{
"epoch": 0.022733524446495512,
"grad_norm": 0.021431026980280876,
"learning_rate": 0.0002,
"loss": 0.0224,
"step": 12500
},
{
"epoch": 0.02275171126605271,
"grad_norm": 0.053853604942560196,
"learning_rate": 0.0002,
"loss": 0.1304,
"step": 12510
},
{
"epoch": 0.022769898085609905,
"grad_norm": 0.129705548286438,
"learning_rate": 0.0002,
"loss": 0.0799,
"step": 12520
},
{
"epoch": 0.0227880849051671,
"grad_norm": 0.027473529800772667,
"learning_rate": 0.0002,
"loss": 0.0771,
"step": 12530
},
{
"epoch": 0.0228062717247243,
"grad_norm": 0.2045305222272873,
"learning_rate": 0.0002,
"loss": 0.0615,
"step": 12540
},
{
"epoch": 0.022824458544281497,
"grad_norm": 0.041042860597372055,
"learning_rate": 0.0002,
"loss": 0.026,
"step": 12550
},
{
"epoch": 0.022842645363838694,
"grad_norm": 0.05624527484178543,
"learning_rate": 0.0002,
"loss": 0.1327,
"step": 12560
},
{
"epoch": 0.02286083218339589,
"grad_norm": 0.09647081047296524,
"learning_rate": 0.0002,
"loss": 0.0758,
"step": 12570
},
{
"epoch": 0.022879019002953086,
"grad_norm": 0.03362264856696129,
"learning_rate": 0.0002,
"loss": 0.076,
"step": 12580
},
{
"epoch": 0.022897205822510282,
"grad_norm": 0.1459503322839737,
"learning_rate": 0.0002,
"loss": 0.0603,
"step": 12590
},
{
"epoch": 0.02291539264206748,
"grad_norm": 0.025729481130838394,
"learning_rate": 0.0002,
"loss": 0.0196,
"step": 12600
},
{
"epoch": 0.022933579461624675,
"grad_norm": 0.19940927624702454,
"learning_rate": 0.0002,
"loss": 0.1298,
"step": 12610
},
{
"epoch": 0.02295176628118187,
"grad_norm": 0.13796600699424744,
"learning_rate": 0.0002,
"loss": 0.086,
"step": 12620
},
{
"epoch": 0.022969953100739068,
"grad_norm": 0.08884158730506897,
"learning_rate": 0.0002,
"loss": 0.0808,
"step": 12630
},
{
"epoch": 0.022988139920296264,
"grad_norm": 0.15814751386642456,
"learning_rate": 0.0002,
"loss": 0.0658,
"step": 12640
},
{
"epoch": 0.02300632673985346,
"grad_norm": 0.03503837063908577,
"learning_rate": 0.0002,
"loss": 0.0232,
"step": 12650
},
{
"epoch": 0.023024513559410657,
"grad_norm": 0.09701854735612869,
"learning_rate": 0.0002,
"loss": 0.136,
"step": 12660
},
{
"epoch": 0.023042700378967853,
"grad_norm": 0.13909977674484253,
"learning_rate": 0.0002,
"loss": 0.0839,
"step": 12670
},
{
"epoch": 0.02306088719852505,
"grad_norm": 0.03152406960725784,
"learning_rate": 0.0002,
"loss": 0.0733,
"step": 12680
},
{
"epoch": 0.023079074018082246,
"grad_norm": 0.13872750103473663,
"learning_rate": 0.0002,
"loss": 0.0604,
"step": 12690
},
{
"epoch": 0.023097260837639442,
"grad_norm": 0.03626656159758568,
"learning_rate": 0.0002,
"loss": 0.0234,
"step": 12700
},
{
"epoch": 0.023115447657196638,
"grad_norm": 0.10111619532108307,
"learning_rate": 0.0002,
"loss": 0.1507,
"step": 12710
},
{
"epoch": 0.023133634476753834,
"grad_norm": 0.09038366377353668,
"learning_rate": 0.0002,
"loss": 0.0839,
"step": 12720
},
{
"epoch": 0.02315182129631103,
"grad_norm": 0.026116544380784035,
"learning_rate": 0.0002,
"loss": 0.0777,
"step": 12730
},
{
"epoch": 0.023170008115868227,
"grad_norm": 0.2067679613828659,
"learning_rate": 0.0002,
"loss": 0.0661,
"step": 12740
},
{
"epoch": 0.023188194935425423,
"grad_norm": 0.02005072310566902,
"learning_rate": 0.0002,
"loss": 0.0165,
"step": 12750
},
{
"epoch": 0.02320638175498262,
"grad_norm": 0.03261101245880127,
"learning_rate": 0.0002,
"loss": 0.159,
"step": 12760
},
{
"epoch": 0.023224568574539816,
"grad_norm": 0.1416555494070053,
"learning_rate": 0.0002,
"loss": 0.0856,
"step": 12770
},
{
"epoch": 0.023242755394097012,
"grad_norm": 0.09400717914104462,
"learning_rate": 0.0002,
"loss": 0.0745,
"step": 12780
},
{
"epoch": 0.02326094221365421,
"grad_norm": 0.17093195021152496,
"learning_rate": 0.0002,
"loss": 0.0691,
"step": 12790
},
{
"epoch": 0.023279129033211405,
"grad_norm": 0.0209200382232666,
"learning_rate": 0.0002,
"loss": 0.0168,
"step": 12800
},
{
"epoch": 0.0232973158527686,
"grad_norm": 0.10523302853107452,
"learning_rate": 0.0002,
"loss": 0.1628,
"step": 12810
},
{
"epoch": 0.023315502672325798,
"grad_norm": 0.06932856142520905,
"learning_rate": 0.0002,
"loss": 0.079,
"step": 12820
},
{
"epoch": 0.023333689491882994,
"grad_norm": 0.03244032710790634,
"learning_rate": 0.0002,
"loss": 0.0699,
"step": 12830
},
{
"epoch": 0.02335187631144019,
"grad_norm": 0.13403338193893433,
"learning_rate": 0.0002,
"loss": 0.0619,
"step": 12840
},
{
"epoch": 0.023370063130997386,
"grad_norm": 0.034033093601465225,
"learning_rate": 0.0002,
"loss": 0.0166,
"step": 12850
},
{
"epoch": 0.023388249950554583,
"grad_norm": 0.07277385890483856,
"learning_rate": 0.0002,
"loss": 0.1377,
"step": 12860
},
{
"epoch": 0.02340643677011178,
"grad_norm": 0.10873163491487503,
"learning_rate": 0.0002,
"loss": 0.0895,
"step": 12870
},
{
"epoch": 0.023424623589668975,
"grad_norm": 0.06244732066988945,
"learning_rate": 0.0002,
"loss": 0.0745,
"step": 12880
},
{
"epoch": 0.023442810409226175,
"grad_norm": 0.1937248259782791,
"learning_rate": 0.0002,
"loss": 0.0633,
"step": 12890
},
{
"epoch": 0.02346099722878337,
"grad_norm": 0.03432930260896683,
"learning_rate": 0.0002,
"loss": 0.0246,
"step": 12900
},
{
"epoch": 0.023479184048340568,
"grad_norm": 0.33358234167099,
"learning_rate": 0.0002,
"loss": 0.1249,
"step": 12910
},
{
"epoch": 0.023497370867897764,
"grad_norm": 0.12039615213871002,
"learning_rate": 0.0002,
"loss": 0.0734,
"step": 12920
},
{
"epoch": 0.02351555768745496,
"grad_norm": 0.02666555717587471,
"learning_rate": 0.0002,
"loss": 0.0849,
"step": 12930
},
{
"epoch": 0.023533744507012157,
"grad_norm": 0.128091961145401,
"learning_rate": 0.0002,
"loss": 0.0647,
"step": 12940
},
{
"epoch": 0.023551931326569353,
"grad_norm": 0.030916422605514526,
"learning_rate": 0.0002,
"loss": 0.0217,
"step": 12950
},
{
"epoch": 0.02357011814612655,
"grad_norm": 0.09280567616224289,
"learning_rate": 0.0002,
"loss": 0.1281,
"step": 12960
},
{
"epoch": 0.023588304965683746,
"grad_norm": 0.09032955765724182,
"learning_rate": 0.0002,
"loss": 0.0834,
"step": 12970
},
{
"epoch": 0.023606491785240942,
"grad_norm": 0.3660918176174164,
"learning_rate": 0.0002,
"loss": 0.0776,
"step": 12980
},
{
"epoch": 0.02362467860479814,
"grad_norm": 0.15715408325195312,
"learning_rate": 0.0002,
"loss": 0.0611,
"step": 12990
},
{
"epoch": 0.023642865424355335,
"grad_norm": 0.03867153823375702,
"learning_rate": 0.0002,
"loss": 0.0214,
"step": 13000
},
{
"epoch": 0.02366105224391253,
"grad_norm": 0.37568527460098267,
"learning_rate": 0.0002,
"loss": 0.2529,
"step": 13010
},
{
"epoch": 0.023679239063469727,
"grad_norm": 0.14888867735862732,
"learning_rate": 0.0002,
"loss": 0.0801,
"step": 13020
},
{
"epoch": 0.023697425883026924,
"grad_norm": 0.04271422699093819,
"learning_rate": 0.0002,
"loss": 0.0863,
"step": 13030
},
{
"epoch": 0.02371561270258412,
"grad_norm": 0.190608948469162,
"learning_rate": 0.0002,
"loss": 0.0773,
"step": 13040
},
{
"epoch": 0.023733799522141316,
"grad_norm": 0.020333535969257355,
"learning_rate": 0.0002,
"loss": 0.0201,
"step": 13050
},
{
"epoch": 0.023751986341698512,
"grad_norm": 0.143577441573143,
"learning_rate": 0.0002,
"loss": 0.1709,
"step": 13060
},
{
"epoch": 0.02377017316125571,
"grad_norm": 0.09225071966648102,
"learning_rate": 0.0002,
"loss": 0.0854,
"step": 13070
},
{
"epoch": 0.023788359980812905,
"grad_norm": 0.08655473589897156,
"learning_rate": 0.0002,
"loss": 0.0727,
"step": 13080
},
{
"epoch": 0.0238065468003701,
"grad_norm": 0.14465250074863434,
"learning_rate": 0.0002,
"loss": 0.0632,
"step": 13090
},
{
"epoch": 0.023824733619927298,
"grad_norm": 0.019399341195821762,
"learning_rate": 0.0002,
"loss": 0.0204,
"step": 13100
},
{
"epoch": 0.023842920439484494,
"grad_norm": 0.09221036732196808,
"learning_rate": 0.0002,
"loss": 0.1646,
"step": 13110
},
{
"epoch": 0.02386110725904169,
"grad_norm": 0.1308157742023468,
"learning_rate": 0.0002,
"loss": 0.089,
"step": 13120
},
{
"epoch": 0.023879294078598887,
"grad_norm": 0.04212506487965584,
"learning_rate": 0.0002,
"loss": 0.0776,
"step": 13130
},
{
"epoch": 0.023897480898156083,
"grad_norm": 0.13541243970394135,
"learning_rate": 0.0002,
"loss": 0.0694,
"step": 13140
},
{
"epoch": 0.02391566771771328,
"grad_norm": 0.016859933733940125,
"learning_rate": 0.0002,
"loss": 0.0191,
"step": 13150
},
{
"epoch": 0.023933854537270476,
"grad_norm": 0.1553143709897995,
"learning_rate": 0.0002,
"loss": 0.1653,
"step": 13160
},
{
"epoch": 0.023952041356827672,
"grad_norm": 0.07960142940282822,
"learning_rate": 0.0002,
"loss": 0.0938,
"step": 13170
},
{
"epoch": 0.023970228176384868,
"grad_norm": 0.0719163790345192,
"learning_rate": 0.0002,
"loss": 0.0767,
"step": 13180
},
{
"epoch": 0.023988414995942065,
"grad_norm": 0.14845407009124756,
"learning_rate": 0.0002,
"loss": 0.0642,
"step": 13190
},
{
"epoch": 0.02400660181549926,
"grad_norm": 0.01817360520362854,
"learning_rate": 0.0002,
"loss": 0.0229,
"step": 13200
},
{
"epoch": 0.024024788635056457,
"grad_norm": 0.03876543045043945,
"learning_rate": 0.0002,
"loss": 0.1377,
"step": 13210
},
{
"epoch": 0.024042975454613653,
"grad_norm": 0.05972164496779442,
"learning_rate": 0.0002,
"loss": 0.0802,
"step": 13220
},
{
"epoch": 0.024061162274170853,
"grad_norm": 0.09239703416824341,
"learning_rate": 0.0002,
"loss": 0.0816,
"step": 13230
},
{
"epoch": 0.02407934909372805,
"grad_norm": 0.15912885963916779,
"learning_rate": 0.0002,
"loss": 0.0598,
"step": 13240
},
{
"epoch": 0.024097535913285246,
"grad_norm": 0.024279551580548286,
"learning_rate": 0.0002,
"loss": 0.0235,
"step": 13250
},
{
"epoch": 0.024115722732842442,
"grad_norm": 0.06568270921707153,
"learning_rate": 0.0002,
"loss": 0.1255,
"step": 13260
},
{
"epoch": 0.02413390955239964,
"grad_norm": 0.04041383042931557,
"learning_rate": 0.0002,
"loss": 0.0718,
"step": 13270
},
{
"epoch": 0.024152096371956835,
"grad_norm": 0.046768829226493835,
"learning_rate": 0.0002,
"loss": 0.0741,
"step": 13280
},
{
"epoch": 0.02417028319151403,
"grad_norm": 0.21418194472789764,
"learning_rate": 0.0002,
"loss": 0.0683,
"step": 13290
},
{
"epoch": 0.024188470011071227,
"grad_norm": 0.04398053511977196,
"learning_rate": 0.0002,
"loss": 0.0262,
"step": 13300
},
{
"epoch": 0.024206656830628424,
"grad_norm": 0.1672079861164093,
"learning_rate": 0.0002,
"loss": 0.1408,
"step": 13310
},
{
"epoch": 0.02422484365018562,
"grad_norm": 0.05705881491303444,
"learning_rate": 0.0002,
"loss": 0.0773,
"step": 13320
},
{
"epoch": 0.024243030469742816,
"grad_norm": 0.0667627677321434,
"learning_rate": 0.0002,
"loss": 0.0823,
"step": 13330
},
{
"epoch": 0.024261217289300013,
"grad_norm": 0.16610710322856903,
"learning_rate": 0.0002,
"loss": 0.0682,
"step": 13340
},
{
"epoch": 0.02427940410885721,
"grad_norm": 0.028300171718001366,
"learning_rate": 0.0002,
"loss": 0.0185,
"step": 13350
},
{
"epoch": 0.024297590928414405,
"grad_norm": 0.10226302593946457,
"learning_rate": 0.0002,
"loss": 0.1406,
"step": 13360
},
{
"epoch": 0.0243157777479716,
"grad_norm": 0.0939667820930481,
"learning_rate": 0.0002,
"loss": 0.0755,
"step": 13370
},
{
"epoch": 0.024333964567528798,
"grad_norm": 0.029998745769262314,
"learning_rate": 0.0002,
"loss": 0.0748,
"step": 13380
},
{
"epoch": 0.024352151387085994,
"grad_norm": 0.1240144744515419,
"learning_rate": 0.0002,
"loss": 0.0639,
"step": 13390
},
{
"epoch": 0.02437033820664319,
"grad_norm": 0.017499787732958794,
"learning_rate": 0.0002,
"loss": 0.0156,
"step": 13400
},
{
"epoch": 0.024388525026200387,
"grad_norm": 0.11781036853790283,
"learning_rate": 0.0002,
"loss": 0.1385,
"step": 13410
},
{
"epoch": 0.024406711845757583,
"grad_norm": 0.09330960363149643,
"learning_rate": 0.0002,
"loss": 0.0789,
"step": 13420
},
{
"epoch": 0.02442489866531478,
"grad_norm": 0.03347505256533623,
"learning_rate": 0.0002,
"loss": 0.0742,
"step": 13430
},
{
"epoch": 0.024443085484871976,
"grad_norm": 0.18877847492694855,
"learning_rate": 0.0002,
"loss": 0.0701,
"step": 13440
},
{
"epoch": 0.024461272304429172,
"grad_norm": 0.03831986337900162,
"learning_rate": 0.0002,
"loss": 0.0243,
"step": 13450
},
{
"epoch": 0.02447945912398637,
"grad_norm": 0.07360157370567322,
"learning_rate": 0.0002,
"loss": 0.1237,
"step": 13460
},
{
"epoch": 0.024497645943543565,
"grad_norm": 0.0442088283598423,
"learning_rate": 0.0002,
"loss": 0.0742,
"step": 13470
},
{
"epoch": 0.02451583276310076,
"grad_norm": 0.07053640484809875,
"learning_rate": 0.0002,
"loss": 0.0793,
"step": 13480
},
{
"epoch": 0.024534019582657957,
"grad_norm": 0.20134539902210236,
"learning_rate": 0.0002,
"loss": 0.0621,
"step": 13490
},
{
"epoch": 0.024552206402215154,
"grad_norm": 0.016353536397218704,
"learning_rate": 0.0002,
"loss": 0.0204,
"step": 13500
},
{
"epoch": 0.02457039322177235,
"grad_norm": 0.15373657643795013,
"learning_rate": 0.0002,
"loss": 0.1446,
"step": 13510
},
{
"epoch": 0.024588580041329546,
"grad_norm": 2.457998037338257,
"learning_rate": 0.0002,
"loss": 0.0959,
"step": 13520
},
{
"epoch": 0.024606766860886743,
"grad_norm": 0.11631426215171814,
"learning_rate": 0.0002,
"loss": 0.0718,
"step": 13530
},
{
"epoch": 0.02462495368044394,
"grad_norm": 0.15928395092487335,
"learning_rate": 0.0002,
"loss": 0.0638,
"step": 13540
},
{
"epoch": 0.024643140500001135,
"grad_norm": 0.01724998839199543,
"learning_rate": 0.0002,
"loss": 0.0127,
"step": 13550
},
{
"epoch": 0.02466132731955833,
"grad_norm": 0.10434440523386002,
"learning_rate": 0.0002,
"loss": 0.1676,
"step": 13560
},
{
"epoch": 0.02467951413911553,
"grad_norm": 0.09029936045408249,
"learning_rate": 0.0002,
"loss": 0.0792,
"step": 13570
},
{
"epoch": 0.024697700958672728,
"grad_norm": 0.07413540780544281,
"learning_rate": 0.0002,
"loss": 0.074,
"step": 13580
},
{
"epoch": 0.024715887778229924,
"grad_norm": 0.15171368420124054,
"learning_rate": 0.0002,
"loss": 0.0646,
"step": 13590
},
{
"epoch": 0.02473407459778712,
"grad_norm": 0.03615165874361992,
"learning_rate": 0.0002,
"loss": 0.0253,
"step": 13600
},
{
"epoch": 0.024752261417344316,
"grad_norm": 0.08074207603931427,
"learning_rate": 0.0002,
"loss": 0.1251,
"step": 13610
},
{
"epoch": 0.024770448236901513,
"grad_norm": 0.12725302577018738,
"learning_rate": 0.0002,
"loss": 0.0868,
"step": 13620
},
{
"epoch": 0.02478863505645871,
"grad_norm": 0.02872832864522934,
"learning_rate": 0.0002,
"loss": 0.072,
"step": 13630
},
{
"epoch": 0.024806821876015905,
"grad_norm": 0.14573116600513458,
"learning_rate": 0.0002,
"loss": 0.06,
"step": 13640
},
{
"epoch": 0.0248250086955731,
"grad_norm": 0.039421938359737396,
"learning_rate": 0.0002,
"loss": 0.0259,
"step": 13650
},
{
"epoch": 0.024843195515130298,
"grad_norm": 0.08786037564277649,
"learning_rate": 0.0002,
"loss": 0.1255,
"step": 13660
},
{
"epoch": 0.024861382334687494,
"grad_norm": 0.7118334174156189,
"learning_rate": 0.0002,
"loss": 0.1096,
"step": 13670
},
{
"epoch": 0.02487956915424469,
"grad_norm": 0.05718977376818657,
"learning_rate": 0.0002,
"loss": 0.1057,
"step": 13680
},
{
"epoch": 0.024897755973801887,
"grad_norm": 0.19388055801391602,
"learning_rate": 0.0002,
"loss": 0.0668,
"step": 13690
},
{
"epoch": 0.024915942793359083,
"grad_norm": 0.02519839070737362,
"learning_rate": 0.0002,
"loss": 0.0182,
"step": 13700
},
{
"epoch": 0.02493412961291628,
"grad_norm": 0.15939857065677643,
"learning_rate": 0.0002,
"loss": 0.1685,
"step": 13710
},
{
"epoch": 0.024952316432473476,
"grad_norm": 0.07893367856740952,
"learning_rate": 0.0002,
"loss": 0.0781,
"step": 13720
},
{
"epoch": 0.024970503252030672,
"grad_norm": 0.0573757067322731,
"learning_rate": 0.0002,
"loss": 0.0819,
"step": 13730
},
{
"epoch": 0.02498869007158787,
"grad_norm": 0.1089317575097084,
"learning_rate": 0.0002,
"loss": 0.0645,
"step": 13740
},
{
"epoch": 0.025006876891145065,
"grad_norm": 0.03239568695425987,
"learning_rate": 0.0002,
"loss": 0.0199,
"step": 13750
},
{
"epoch": 0.02502506371070226,
"grad_norm": 0.04015114903450012,
"learning_rate": 0.0002,
"loss": 0.146,
"step": 13760
},
{
"epoch": 0.025043250530259457,
"grad_norm": 0.15218386054039001,
"learning_rate": 0.0002,
"loss": 0.0854,
"step": 13770
},
{
"epoch": 0.025061437349816654,
"grad_norm": 0.04461386427283287,
"learning_rate": 0.0002,
"loss": 0.0734,
"step": 13780
},
{
"epoch": 0.02507962416937385,
"grad_norm": 0.17443357408046722,
"learning_rate": 0.0002,
"loss": 0.0677,
"step": 13790
},
{
"epoch": 0.025097810988931046,
"grad_norm": 1.0899302959442139,
"learning_rate": 0.0002,
"loss": 0.0312,
"step": 13800
},
{
"epoch": 0.025115997808488243,
"grad_norm": 0.04115718603134155,
"learning_rate": 0.0002,
"loss": 0.1392,
"step": 13810
},
{
"epoch": 0.02513418462804544,
"grad_norm": 0.06605038046836853,
"learning_rate": 0.0002,
"loss": 0.0826,
"step": 13820
},
{
"epoch": 0.025152371447602635,
"grad_norm": 0.115416020154953,
"learning_rate": 0.0002,
"loss": 0.0709,
"step": 13830
},
{
"epoch": 0.02517055826715983,
"grad_norm": 0.1582881212234497,
"learning_rate": 0.0002,
"loss": 0.066,
"step": 13840
},
{
"epoch": 0.025188745086717028,
"grad_norm": 0.037643156945705414,
"learning_rate": 0.0002,
"loss": 0.0226,
"step": 13850
},
{
"epoch": 0.025206931906274224,
"grad_norm": 0.08343279361724854,
"learning_rate": 0.0002,
"loss": 0.1197,
"step": 13860
},
{
"epoch": 0.02522511872583142,
"grad_norm": 0.13482169806957245,
"learning_rate": 0.0002,
"loss": 0.0799,
"step": 13870
},
{
"epoch": 0.025243305545388617,
"grad_norm": 0.10373103618621826,
"learning_rate": 0.0002,
"loss": 0.075,
"step": 13880
},
{
"epoch": 0.025261492364945813,
"grad_norm": 0.1348303109407425,
"learning_rate": 0.0002,
"loss": 0.0603,
"step": 13890
},
{
"epoch": 0.02527967918450301,
"grad_norm": 0.058479245752096176,
"learning_rate": 0.0002,
"loss": 0.0252,
"step": 13900
},
{
"epoch": 0.025297866004060206,
"grad_norm": 0.19177350401878357,
"learning_rate": 0.0002,
"loss": 0.122,
"step": 13910
},
{
"epoch": 0.025316052823617406,
"grad_norm": 0.11044300347566605,
"learning_rate": 0.0002,
"loss": 0.0724,
"step": 13920
},
{
"epoch": 0.025334239643174602,
"grad_norm": 0.05279375612735748,
"learning_rate": 0.0002,
"loss": 0.0836,
"step": 13930
},
{
"epoch": 0.025352426462731798,
"grad_norm": 0.12162257730960846,
"learning_rate": 0.0002,
"loss": 0.0615,
"step": 13940
},
{
"epoch": 0.025370613282288994,
"grad_norm": 0.026728983968496323,
"learning_rate": 0.0002,
"loss": 0.0207,
"step": 13950
},
{
"epoch": 0.02538880010184619,
"grad_norm": 0.08440329879522324,
"learning_rate": 0.0002,
"loss": 0.1171,
"step": 13960
},
{
"epoch": 0.025406986921403387,
"grad_norm": 0.10090481489896774,
"learning_rate": 0.0002,
"loss": 0.0851,
"step": 13970
},
{
"epoch": 0.025425173740960583,
"grad_norm": 0.03063822351396084,
"learning_rate": 0.0002,
"loss": 0.0783,
"step": 13980
},
{
"epoch": 0.02544336056051778,
"grad_norm": 0.14754973351955414,
"learning_rate": 0.0002,
"loss": 0.0662,
"step": 13990
},
{
"epoch": 0.025461547380074976,
"grad_norm": 0.04844941198825836,
"learning_rate": 0.0002,
"loss": 0.0204,
"step": 14000
},
{
"epoch": 0.025479734199632172,
"grad_norm": 0.08291894942522049,
"learning_rate": 0.0002,
"loss": 0.13,
"step": 14010
},
{
"epoch": 0.02549792101918937,
"grad_norm": 0.05875542387366295,
"learning_rate": 0.0002,
"loss": 0.0732,
"step": 14020
},
{
"epoch": 0.025516107838746565,
"grad_norm": 0.04103298857808113,
"learning_rate": 0.0002,
"loss": 0.0796,
"step": 14030
},
{
"epoch": 0.02553429465830376,
"grad_norm": 0.20349934697151184,
"learning_rate": 0.0002,
"loss": 0.0672,
"step": 14040
},
{
"epoch": 0.025552481477860958,
"grad_norm": 0.05419473722577095,
"learning_rate": 0.0002,
"loss": 0.0231,
"step": 14050
},
{
"epoch": 0.025570668297418154,
"grad_norm": 0.05501960590481758,
"learning_rate": 0.0002,
"loss": 0.1281,
"step": 14060
},
{
"epoch": 0.02558885511697535,
"grad_norm": 0.07140739262104034,
"learning_rate": 0.0002,
"loss": 0.0746,
"step": 14070
},
{
"epoch": 0.025607041936532546,
"grad_norm": 0.04564960300922394,
"learning_rate": 0.0002,
"loss": 0.0746,
"step": 14080
},
{
"epoch": 0.025625228756089743,
"grad_norm": 0.16987308859825134,
"learning_rate": 0.0002,
"loss": 0.0642,
"step": 14090
},
{
"epoch": 0.02564341557564694,
"grad_norm": 0.017460890114307404,
"learning_rate": 0.0002,
"loss": 0.0218,
"step": 14100
},
{
"epoch": 0.025661602395204135,
"grad_norm": 0.15666340291500092,
"learning_rate": 0.0002,
"loss": 0.1572,
"step": 14110
},
{
"epoch": 0.02567978921476133,
"grad_norm": 0.06847309321165085,
"learning_rate": 0.0002,
"loss": 0.0744,
"step": 14120
},
{
"epoch": 0.025697976034318528,
"grad_norm": 0.03678276389837265,
"learning_rate": 0.0002,
"loss": 0.0718,
"step": 14130
},
{
"epoch": 0.025716162853875724,
"grad_norm": 0.1861123889684677,
"learning_rate": 0.0002,
"loss": 0.06,
"step": 14140
},
{
"epoch": 0.02573434967343292,
"grad_norm": 0.010294788517057896,
"learning_rate": 0.0002,
"loss": 0.0183,
"step": 14150
},
{
"epoch": 0.025752536492990117,
"grad_norm": 0.0643458440899849,
"learning_rate": 0.0002,
"loss": 0.1594,
"step": 14160
},
{
"epoch": 0.025770723312547313,
"grad_norm": 0.10639938712120056,
"learning_rate": 0.0002,
"loss": 0.0761,
"step": 14170
},
{
"epoch": 0.02578891013210451,
"grad_norm": 0.056529924273490906,
"learning_rate": 0.0002,
"loss": 0.082,
"step": 14180
},
{
"epoch": 0.025807096951661706,
"grad_norm": 0.18884658813476562,
"learning_rate": 0.0002,
"loss": 0.0683,
"step": 14190
},
{
"epoch": 0.025825283771218902,
"grad_norm": 0.035667784512043,
"learning_rate": 0.0002,
"loss": 0.0263,
"step": 14200
},
{
"epoch": 0.0258434705907761,
"grad_norm": 0.14650103449821472,
"learning_rate": 0.0002,
"loss": 0.1314,
"step": 14210
},
{
"epoch": 0.025861657410333295,
"grad_norm": 0.12219654768705368,
"learning_rate": 0.0002,
"loss": 0.0755,
"step": 14220
},
{
"epoch": 0.02587984422989049,
"grad_norm": 0.05271647870540619,
"learning_rate": 0.0002,
"loss": 0.0789,
"step": 14230
},
{
"epoch": 0.025898031049447687,
"grad_norm": 0.1669916957616806,
"learning_rate": 0.0002,
"loss": 0.0641,
"step": 14240
},
{
"epoch": 0.025916217869004884,
"grad_norm": 0.035175371915102005,
"learning_rate": 0.0002,
"loss": 0.0222,
"step": 14250
},
{
"epoch": 0.025934404688562084,
"grad_norm": 0.14658409357070923,
"learning_rate": 0.0002,
"loss": 0.1382,
"step": 14260
},
{
"epoch": 0.02595259150811928,
"grad_norm": 0.07525639981031418,
"learning_rate": 0.0002,
"loss": 0.0815,
"step": 14270
},
{
"epoch": 0.025970778327676476,
"grad_norm": 0.02428872510790825,
"learning_rate": 0.0002,
"loss": 0.076,
"step": 14280
},
{
"epoch": 0.025988965147233672,
"grad_norm": 0.1825665533542633,
"learning_rate": 0.0002,
"loss": 0.0652,
"step": 14290
},
{
"epoch": 0.02600715196679087,
"grad_norm": 0.033867619931697845,
"learning_rate": 0.0002,
"loss": 0.0206,
"step": 14300
},
{
"epoch": 0.026025338786348065,
"grad_norm": 0.051891107112169266,
"learning_rate": 0.0002,
"loss": 0.1576,
"step": 14310
},
{
"epoch": 0.02604352560590526,
"grad_norm": 0.1111353188753128,
"learning_rate": 0.0002,
"loss": 0.0889,
"step": 14320
},
{
"epoch": 0.026061712425462458,
"grad_norm": 0.04253942146897316,
"learning_rate": 0.0002,
"loss": 0.079,
"step": 14330
},
{
"epoch": 0.026079899245019654,
"grad_norm": 0.17151106894016266,
"learning_rate": 0.0002,
"loss": 0.0678,
"step": 14340
},
{
"epoch": 0.02609808606457685,
"grad_norm": 0.03877005726099014,
"learning_rate": 0.0002,
"loss": 0.0206,
"step": 14350
},
{
"epoch": 0.026116272884134047,
"grad_norm": 0.03517235442996025,
"learning_rate": 0.0002,
"loss": 0.1343,
"step": 14360
},
{
"epoch": 0.026134459703691243,
"grad_norm": 0.08157488703727722,
"learning_rate": 0.0002,
"loss": 0.0764,
"step": 14370
},
{
"epoch": 0.02615264652324844,
"grad_norm": 0.03245632350444794,
"learning_rate": 0.0002,
"loss": 0.0758,
"step": 14380
},
{
"epoch": 0.026170833342805636,
"grad_norm": 0.20079655945301056,
"learning_rate": 0.0002,
"loss": 0.0691,
"step": 14390
},
{
"epoch": 0.026189020162362832,
"grad_norm": 0.03477077558636665,
"learning_rate": 0.0002,
"loss": 0.0232,
"step": 14400
},
{
"epoch": 0.026207206981920028,
"grad_norm": 0.14853888750076294,
"learning_rate": 0.0002,
"loss": 0.1436,
"step": 14410
},
{
"epoch": 0.026225393801477224,
"grad_norm": 0.12416905164718628,
"learning_rate": 0.0002,
"loss": 0.0755,
"step": 14420
},
{
"epoch": 0.02624358062103442,
"grad_norm": 0.03126871958374977,
"learning_rate": 0.0002,
"loss": 0.0762,
"step": 14430
},
{
"epoch": 0.026261767440591617,
"grad_norm": 0.20726743340492249,
"learning_rate": 0.0002,
"loss": 0.0614,
"step": 14440
},
{
"epoch": 0.026279954260148813,
"grad_norm": 0.039617493748664856,
"learning_rate": 0.0002,
"loss": 0.0181,
"step": 14450
},
{
"epoch": 0.02629814107970601,
"grad_norm": 0.08146277070045471,
"learning_rate": 0.0002,
"loss": 0.132,
"step": 14460
},
{
"epoch": 0.026316327899263206,
"grad_norm": 0.07181694358587265,
"learning_rate": 0.0002,
"loss": 0.0706,
"step": 14470
},
{
"epoch": 0.026334514718820402,
"grad_norm": 0.04080040752887726,
"learning_rate": 0.0002,
"loss": 0.0792,
"step": 14480
},
{
"epoch": 0.0263527015383776,
"grad_norm": 0.1903056502342224,
"learning_rate": 0.0002,
"loss": 0.0647,
"step": 14490
},
{
"epoch": 0.026370888357934795,
"grad_norm": 0.027256207540631294,
"learning_rate": 0.0002,
"loss": 0.0202,
"step": 14500
},
{
"epoch": 0.02638907517749199,
"grad_norm": 0.1434287130832672,
"learning_rate": 0.0002,
"loss": 0.1262,
"step": 14510
},
{
"epoch": 0.026407261997049188,
"grad_norm": 0.06977452337741852,
"learning_rate": 0.0002,
"loss": 0.0722,
"step": 14520
},
{
"epoch": 0.026425448816606384,
"grad_norm": 0.03453589975833893,
"learning_rate": 0.0002,
"loss": 0.0778,
"step": 14530
},
{
"epoch": 0.02644363563616358,
"grad_norm": 0.1455768346786499,
"learning_rate": 0.0002,
"loss": 0.0678,
"step": 14540
},
{
"epoch": 0.026461822455720777,
"grad_norm": 0.02977900207042694,
"learning_rate": 0.0002,
"loss": 0.0227,
"step": 14550
},
{
"epoch": 0.026480009275277973,
"grad_norm": 0.06667467951774597,
"learning_rate": 0.0002,
"loss": 0.1345,
"step": 14560
},
{
"epoch": 0.02649819609483517,
"grad_norm": 0.05125528201460838,
"learning_rate": 0.0002,
"loss": 0.078,
"step": 14570
},
{
"epoch": 0.026516382914392365,
"grad_norm": 0.02796974405646324,
"learning_rate": 0.0002,
"loss": 0.0782,
"step": 14580
},
{
"epoch": 0.026534569733949562,
"grad_norm": 0.18518763780593872,
"learning_rate": 0.0002,
"loss": 0.0722,
"step": 14590
},
{
"epoch": 0.02655275655350676,
"grad_norm": 0.01827179454267025,
"learning_rate": 0.0002,
"loss": 0.0193,
"step": 14600
},
{
"epoch": 0.026570943373063958,
"grad_norm": 0.1146678775548935,
"learning_rate": 0.0002,
"loss": 0.1651,
"step": 14610
},
{
"epoch": 0.026589130192621154,
"grad_norm": 3.385193109512329,
"learning_rate": 0.0002,
"loss": 0.2165,
"step": 14620
},
{
"epoch": 0.02660731701217835,
"grad_norm": 0.3052279055118561,
"learning_rate": 0.0002,
"loss": 0.1489,
"step": 14630
},
{
"epoch": 0.026625503831735547,
"grad_norm": 0.12762853503227234,
"learning_rate": 0.0002,
"loss": 0.0693,
"step": 14640
},
{
"epoch": 0.026643690651292743,
"grad_norm": 0.003925936296582222,
"learning_rate": 0.0002,
"loss": 0.0078,
"step": 14650
},
{
"epoch": 0.02666187747084994,
"grad_norm": 0.28632932901382446,
"learning_rate": 0.0002,
"loss": 0.2533,
"step": 14660
},
{
"epoch": 0.026680064290407136,
"grad_norm": 0.037552788853645325,
"learning_rate": 0.0002,
"loss": 0.0852,
"step": 14670
},
{
"epoch": 0.026698251109964332,
"grad_norm": 0.0911126434803009,
"learning_rate": 0.0002,
"loss": 0.0751,
"step": 14680
},
{
"epoch": 0.02671643792952153,
"grad_norm": 0.18434865772724152,
"learning_rate": 0.0002,
"loss": 0.084,
"step": 14690
},
{
"epoch": 0.026734624749078725,
"grad_norm": 0.03813793510198593,
"learning_rate": 0.0002,
"loss": 0.0165,
"step": 14700
},
{
"epoch": 0.02675281156863592,
"grad_norm": 0.04764392226934433,
"learning_rate": 0.0002,
"loss": 0.1642,
"step": 14710
},
{
"epoch": 0.026770998388193117,
"grad_norm": 0.04611713066697121,
"learning_rate": 0.0002,
"loss": 0.088,
"step": 14720
},
{
"epoch": 0.026789185207750314,
"grad_norm": 0.07171179354190826,
"learning_rate": 0.0002,
"loss": 0.1417,
"step": 14730
},
{
"epoch": 0.02680737202730751,
"grad_norm": 0.14135649800300598,
"learning_rate": 0.0002,
"loss": 0.0692,
"step": 14740
},
{
"epoch": 0.026825558846864706,
"grad_norm": 0.004508219193667173,
"learning_rate": 0.0002,
"loss": 0.016,
"step": 14750
},
{
"epoch": 0.026843745666421902,
"grad_norm": 0.09732682257890701,
"learning_rate": 0.0002,
"loss": 0.2089,
"step": 14760
},
{
"epoch": 0.0268619324859791,
"grad_norm": 0.12676575779914856,
"learning_rate": 0.0002,
"loss": 0.0849,
"step": 14770
},
{
"epoch": 0.026880119305536295,
"grad_norm": 0.0696650817990303,
"learning_rate": 0.0002,
"loss": 0.08,
"step": 14780
},
{
"epoch": 0.02689830612509349,
"grad_norm": 0.17883484065532684,
"learning_rate": 0.0002,
"loss": 0.0682,
"step": 14790
},
{
"epoch": 0.026916492944650688,
"grad_norm": 0.0567975677549839,
"learning_rate": 0.0002,
"loss": 0.0149,
"step": 14800
},
{
"epoch": 0.026934679764207884,
"grad_norm": 0.4884565472602844,
"learning_rate": 0.0002,
"loss": 0.6381,
"step": 14810
},
{
"epoch": 0.02695286658376508,
"grad_norm": 0.0742981806397438,
"learning_rate": 0.0002,
"loss": 0.0845,
"step": 14820
},
{
"epoch": 0.026971053403322277,
"grad_norm": 0.030466781929135323,
"learning_rate": 0.0002,
"loss": 0.0818,
"step": 14830
},
{
"epoch": 0.026989240222879473,
"grad_norm": 0.13108357787132263,
"learning_rate": 0.0002,
"loss": 0.065,
"step": 14840
},
{
"epoch": 0.02700742704243667,
"grad_norm": 0.019065184518694878,
"learning_rate": 0.0002,
"loss": 0.0168,
"step": 14850
},
{
"epoch": 0.027025613861993866,
"grad_norm": 0.21891777217388153,
"learning_rate": 0.0002,
"loss": 0.1456,
"step": 14860
},
{
"epoch": 0.027043800681551062,
"grad_norm": 0.0836934968829155,
"learning_rate": 0.0002,
"loss": 0.0781,
"step": 14870
},
{
"epoch": 0.027061987501108258,
"grad_norm": 0.0643845945596695,
"learning_rate": 0.0002,
"loss": 0.0795,
"step": 14880
},
{
"epoch": 0.027080174320665455,
"grad_norm": 0.27108556032180786,
"learning_rate": 0.0002,
"loss": 0.0722,
"step": 14890
},
{
"epoch": 0.02709836114022265,
"grad_norm": 0.008289041928946972,
"learning_rate": 0.0002,
"loss": 0.0201,
"step": 14900
},
{
"epoch": 0.027116547959779847,
"grad_norm": 0.03284185752272606,
"learning_rate": 0.0002,
"loss": 0.1509,
"step": 14910
},
{
"epoch": 0.027134734779337043,
"grad_norm": 0.051129039376974106,
"learning_rate": 0.0002,
"loss": 0.0831,
"step": 14920
},
{
"epoch": 0.02715292159889424,
"grad_norm": 0.046401191502809525,
"learning_rate": 0.0002,
"loss": 0.0694,
"step": 14930
},
{
"epoch": 0.027171108418451436,
"grad_norm": 0.19945313036441803,
"learning_rate": 0.0002,
"loss": 0.0734,
"step": 14940
},
{
"epoch": 0.027189295238008636,
"grad_norm": 0.03877973556518555,
"learning_rate": 0.0002,
"loss": 0.026,
"step": 14950
},
{
"epoch": 0.027207482057565832,
"grad_norm": 0.19090695679187775,
"learning_rate": 0.0002,
"loss": 0.136,
"step": 14960
},
{
"epoch": 0.02722566887712303,
"grad_norm": 0.11352288722991943,
"learning_rate": 0.0002,
"loss": 0.0797,
"step": 14970
},
{
"epoch": 0.027243855696680225,
"grad_norm": 0.055218834429979324,
"learning_rate": 0.0002,
"loss": 0.0763,
"step": 14980
},
{
"epoch": 0.02726204251623742,
"grad_norm": 0.1060803234577179,
"learning_rate": 0.0002,
"loss": 0.059,
"step": 14990
},
{
"epoch": 0.027280229335794617,
"grad_norm": 0.03370797634124756,
"learning_rate": 0.0002,
"loss": 0.0172,
"step": 15000
},
{
"epoch": 0.027298416155351814,
"grad_norm": 0.19884982705116272,
"learning_rate": 0.0002,
"loss": 0.1408,
"step": 15010
},
{
"epoch": 0.02731660297490901,
"grad_norm": 0.1186273992061615,
"learning_rate": 0.0002,
"loss": 0.0769,
"step": 15020
},
{
"epoch": 0.027334789794466206,
"grad_norm": 0.0494297556579113,
"learning_rate": 0.0002,
"loss": 0.0818,
"step": 15030
},
{
"epoch": 0.027352976614023403,
"grad_norm": 0.17990480363368988,
"learning_rate": 0.0002,
"loss": 0.06,
"step": 15040
},
{
"epoch": 0.0273711634335806,
"grad_norm": 0.015269913710653782,
"learning_rate": 0.0002,
"loss": 0.0143,
"step": 15050
},
{
"epoch": 0.027389350253137795,
"grad_norm": 0.1387794464826584,
"learning_rate": 0.0002,
"loss": 0.171,
"step": 15060
},
{
"epoch": 0.02740753707269499,
"grad_norm": 0.11648393422365189,
"learning_rate": 0.0002,
"loss": 0.0769,
"step": 15070
},
{
"epoch": 0.027425723892252188,
"grad_norm": 0.04039733111858368,
"learning_rate": 0.0002,
"loss": 0.0707,
"step": 15080
},
{
"epoch": 0.027443910711809384,
"grad_norm": 0.19274230301380157,
"learning_rate": 0.0002,
"loss": 0.0657,
"step": 15090
},
{
"epoch": 0.02746209753136658,
"grad_norm": 0.03266929090023041,
"learning_rate": 0.0002,
"loss": 0.0155,
"step": 15100
},
{
"epoch": 0.027480284350923777,
"grad_norm": 0.44524702429771423,
"learning_rate": 0.0002,
"loss": 0.3075,
"step": 15110
},
{
"epoch": 0.027498471170480973,
"grad_norm": 0.15604422986507416,
"learning_rate": 0.0002,
"loss": 0.0874,
"step": 15120
},
{
"epoch": 0.02751665799003817,
"grad_norm": 0.043061114847660065,
"learning_rate": 0.0002,
"loss": 0.0814,
"step": 15130
},
{
"epoch": 0.027534844809595366,
"grad_norm": 0.2331482172012329,
"learning_rate": 0.0002,
"loss": 0.0638,
"step": 15140
},
{
"epoch": 0.027553031629152562,
"grad_norm": 0.011037157848477364,
"learning_rate": 0.0002,
"loss": 0.0197,
"step": 15150
},
{
"epoch": 0.02757121844870976,
"grad_norm": 0.0758776143193245,
"learning_rate": 0.0002,
"loss": 0.1481,
"step": 15160
},
{
"epoch": 0.027589405268266955,
"grad_norm": 0.18878699839115143,
"learning_rate": 0.0002,
"loss": 0.083,
"step": 15170
},
{
"epoch": 0.02760759208782415,
"grad_norm": 0.042469121515750885,
"learning_rate": 0.0002,
"loss": 0.0799,
"step": 15180
},
{
"epoch": 0.027625778907381347,
"grad_norm": 0.1603335440158844,
"learning_rate": 0.0002,
"loss": 0.0579,
"step": 15190
},
{
"epoch": 0.027643965726938544,
"grad_norm": 0.03533349186182022,
"learning_rate": 0.0002,
"loss": 0.0195,
"step": 15200
},
{
"epoch": 0.02766215254649574,
"grad_norm": 0.2014724314212799,
"learning_rate": 0.0002,
"loss": 0.1443,
"step": 15210
},
{
"epoch": 0.027680339366052936,
"grad_norm": 0.04604899883270264,
"learning_rate": 0.0002,
"loss": 0.0701,
"step": 15220
},
{
"epoch": 0.027698526185610133,
"grad_norm": 0.04726789519190788,
"learning_rate": 0.0002,
"loss": 0.078,
"step": 15230
},
{
"epoch": 0.02771671300516733,
"grad_norm": 0.16189764440059662,
"learning_rate": 0.0002,
"loss": 0.0686,
"step": 15240
},
{
"epoch": 0.027734899824724525,
"grad_norm": 0.018077973276376724,
"learning_rate": 0.0002,
"loss": 0.0155,
"step": 15250
},
{
"epoch": 0.02775308664428172,
"grad_norm": 0.09486963599920273,
"learning_rate": 0.0002,
"loss": 0.1695,
"step": 15260
},
{
"epoch": 0.027771273463838918,
"grad_norm": 0.19950449466705322,
"learning_rate": 0.0002,
"loss": 0.0784,
"step": 15270
},
{
"epoch": 0.027789460283396114,
"grad_norm": 0.03350493311882019,
"learning_rate": 0.0002,
"loss": 0.0797,
"step": 15280
},
{
"epoch": 0.027807647102953314,
"grad_norm": 0.14408868551254272,
"learning_rate": 0.0002,
"loss": 0.0624,
"step": 15290
},
{
"epoch": 0.02782583392251051,
"grad_norm": 0.03824521601200104,
"learning_rate": 0.0002,
"loss": 0.0182,
"step": 15300
},
{
"epoch": 0.027844020742067706,
"grad_norm": 0.051167964935302734,
"learning_rate": 0.0002,
"loss": 0.1342,
"step": 15310
},
{
"epoch": 0.027862207561624903,
"grad_norm": 0.08440420031547546,
"learning_rate": 0.0002,
"loss": 0.0775,
"step": 15320
},
{
"epoch": 0.0278803943811821,
"grad_norm": 0.05162487551569939,
"learning_rate": 0.0002,
"loss": 0.0824,
"step": 15330
},
{
"epoch": 0.027898581200739295,
"grad_norm": 0.1576220989227295,
"learning_rate": 0.0002,
"loss": 0.0607,
"step": 15340
},
{
"epoch": 0.02791676802029649,
"grad_norm": 0.03840797394514084,
"learning_rate": 0.0002,
"loss": 0.0197,
"step": 15350
},
{
"epoch": 0.027934954839853688,
"grad_norm": 0.1418246179819107,
"learning_rate": 0.0002,
"loss": 0.151,
"step": 15360
},
{
"epoch": 0.027953141659410884,
"grad_norm": 0.07326096296310425,
"learning_rate": 0.0002,
"loss": 0.0764,
"step": 15370
},
{
"epoch": 0.02797132847896808,
"grad_norm": 0.0582844614982605,
"learning_rate": 0.0002,
"loss": 0.0745,
"step": 15380
},
{
"epoch": 0.027989515298525277,
"grad_norm": 0.2234935164451599,
"learning_rate": 0.0002,
"loss": 0.0687,
"step": 15390
},
{
"epoch": 0.028007702118082473,
"grad_norm": 0.04384669288992882,
"learning_rate": 0.0002,
"loss": 0.023,
"step": 15400
},
{
"epoch": 0.02802588893763967,
"grad_norm": 0.14306089282035828,
"learning_rate": 0.0002,
"loss": 0.1477,
"step": 15410
},
{
"epoch": 0.028044075757196866,
"grad_norm": 0.1326105296611786,
"learning_rate": 0.0002,
"loss": 0.0784,
"step": 15420
},
{
"epoch": 0.028062262576754062,
"grad_norm": 0.05531894043087959,
"learning_rate": 0.0002,
"loss": 0.0813,
"step": 15430
},
{
"epoch": 0.02808044939631126,
"grad_norm": 0.14875297248363495,
"learning_rate": 0.0002,
"loss": 0.0622,
"step": 15440
},
{
"epoch": 0.028098636215868455,
"grad_norm": 0.03749268501996994,
"learning_rate": 0.0002,
"loss": 0.0181,
"step": 15450
},
{
"epoch": 0.02811682303542565,
"grad_norm": 0.05747106671333313,
"learning_rate": 0.0002,
"loss": 0.1157,
"step": 15460
},
{
"epoch": 0.028135009854982847,
"grad_norm": 0.06197863444685936,
"learning_rate": 0.0002,
"loss": 0.0779,
"step": 15470
},
{
"epoch": 0.028153196674540044,
"grad_norm": 0.09997677057981491,
"learning_rate": 0.0002,
"loss": 0.0784,
"step": 15480
},
{
"epoch": 0.02817138349409724,
"grad_norm": 0.18067684769630432,
"learning_rate": 0.0002,
"loss": 0.0728,
"step": 15490
},
{
"epoch": 0.028189570313654436,
"grad_norm": 0.03378088399767876,
"learning_rate": 0.0002,
"loss": 0.0252,
"step": 15500
},
{
"epoch": 0.028207757133211633,
"grad_norm": 0.14048723876476288,
"learning_rate": 0.0002,
"loss": 0.1392,
"step": 15510
},
{
"epoch": 0.02822594395276883,
"grad_norm": 0.09573493152856827,
"learning_rate": 0.0002,
"loss": 0.0751,
"step": 15520
},
{
"epoch": 0.028244130772326025,
"grad_norm": 0.11000777781009674,
"learning_rate": 0.0002,
"loss": 0.08,
"step": 15530
},
{
"epoch": 0.02826231759188322,
"grad_norm": 0.17712855339050293,
"learning_rate": 0.0002,
"loss": 0.0658,
"step": 15540
},
{
"epoch": 0.028280504411440418,
"grad_norm": 0.0183733981102705,
"learning_rate": 0.0002,
"loss": 0.0188,
"step": 15550
},
{
"epoch": 0.028298691230997614,
"grad_norm": 0.15027762949466705,
"learning_rate": 0.0002,
"loss": 0.1235,
"step": 15560
},
{
"epoch": 0.02831687805055481,
"grad_norm": 0.10586661100387573,
"learning_rate": 0.0002,
"loss": 0.0791,
"step": 15570
},
{
"epoch": 0.028335064870112007,
"grad_norm": 0.031083540990948677,
"learning_rate": 0.0002,
"loss": 0.0765,
"step": 15580
},
{
"epoch": 0.028353251689669203,
"grad_norm": 0.12294827401638031,
"learning_rate": 0.0002,
"loss": 0.0615,
"step": 15590
},
{
"epoch": 0.0283714385092264,
"grad_norm": 0.03652534633874893,
"learning_rate": 0.0002,
"loss": 0.0203,
"step": 15600
},
{
"epoch": 0.028389625328783596,
"grad_norm": 0.046638645231723785,
"learning_rate": 0.0002,
"loss": 0.1327,
"step": 15610
},
{
"epoch": 0.028407812148340792,
"grad_norm": 0.07200415432453156,
"learning_rate": 0.0002,
"loss": 0.0765,
"step": 15620
},
{
"epoch": 0.028425998967897992,
"grad_norm": 0.040679559111595154,
"learning_rate": 0.0002,
"loss": 0.0812,
"step": 15630
},
{
"epoch": 0.028444185787455188,
"grad_norm": 0.1572960615158081,
"learning_rate": 0.0002,
"loss": 0.0637,
"step": 15640
},
{
"epoch": 0.028462372607012384,
"grad_norm": 0.036091506481170654,
"learning_rate": 0.0002,
"loss": 0.0266,
"step": 15650
},
{
"epoch": 0.02848055942656958,
"grad_norm": 0.10555437207221985,
"learning_rate": 0.0002,
"loss": 0.1093,
"step": 15660
},
{
"epoch": 0.028498746246126777,
"grad_norm": 0.08854329586029053,
"learning_rate": 0.0002,
"loss": 0.0741,
"step": 15670
},
{
"epoch": 0.028516933065683973,
"grad_norm": 0.02908560261130333,
"learning_rate": 0.0002,
"loss": 0.0732,
"step": 15680
},
{
"epoch": 0.02853511988524117,
"grad_norm": 0.1568380743265152,
"learning_rate": 0.0002,
"loss": 0.0586,
"step": 15690
},
{
"epoch": 0.028553306704798366,
"grad_norm": 0.04985487833619118,
"learning_rate": 0.0002,
"loss": 0.0247,
"step": 15700
},
{
"epoch": 0.028571493524355562,
"grad_norm": 0.07582605630159378,
"learning_rate": 0.0002,
"loss": 0.1196,
"step": 15710
},
{
"epoch": 0.02858968034391276,
"grad_norm": 0.02401849813759327,
"learning_rate": 0.0002,
"loss": 0.075,
"step": 15720
},
{
"epoch": 0.028607867163469955,
"grad_norm": 0.032545965164899826,
"learning_rate": 0.0002,
"loss": 0.0761,
"step": 15730
},
{
"epoch": 0.02862605398302715,
"grad_norm": 0.1098649650812149,
"learning_rate": 0.0002,
"loss": 0.0599,
"step": 15740
},
{
"epoch": 0.028644240802584348,
"grad_norm": 0.021166007965803146,
"learning_rate": 0.0002,
"loss": 0.0169,
"step": 15750
},
{
"epoch": 0.028662427622141544,
"grad_norm": 0.0823541134595871,
"learning_rate": 0.0002,
"loss": 0.1337,
"step": 15760
},
{
"epoch": 0.02868061444169874,
"grad_norm": 0.1009572371840477,
"learning_rate": 0.0002,
"loss": 0.0779,
"step": 15770
},
{
"epoch": 0.028698801261255937,
"grad_norm": 0.09160738438367844,
"learning_rate": 0.0002,
"loss": 0.0766,
"step": 15780
},
{
"epoch": 0.028716988080813133,
"grad_norm": 0.14419673383235931,
"learning_rate": 0.0002,
"loss": 0.0594,
"step": 15790
},
{
"epoch": 0.02873517490037033,
"grad_norm": 0.01628550887107849,
"learning_rate": 0.0002,
"loss": 0.0218,
"step": 15800
},
{
"epoch": 0.028753361719927525,
"grad_norm": 0.15207678079605103,
"learning_rate": 0.0002,
"loss": 0.1262,
"step": 15810
},
{
"epoch": 0.028771548539484722,
"grad_norm": 0.14951761066913605,
"learning_rate": 0.0002,
"loss": 0.0812,
"step": 15820
},
{
"epoch": 0.028789735359041918,
"grad_norm": 0.028078215196728706,
"learning_rate": 0.0002,
"loss": 0.0783,
"step": 15830
},
{
"epoch": 0.028807922178599114,
"grad_norm": 0.16079741716384888,
"learning_rate": 0.0002,
"loss": 0.0633,
"step": 15840
},
{
"epoch": 0.02882610899815631,
"grad_norm": 0.04218870773911476,
"learning_rate": 0.0002,
"loss": 0.0217,
"step": 15850
},
{
"epoch": 0.028844295817713507,
"grad_norm": 0.13758492469787598,
"learning_rate": 0.0002,
"loss": 0.1358,
"step": 15860
},
{
"epoch": 0.028862482637270703,
"grad_norm": 0.10366559028625488,
"learning_rate": 0.0002,
"loss": 0.0787,
"step": 15870
},
{
"epoch": 0.0288806694568279,
"grad_norm": 0.04433147609233856,
"learning_rate": 0.0002,
"loss": 0.0781,
"step": 15880
},
{
"epoch": 0.028898856276385096,
"grad_norm": 0.16709402203559875,
"learning_rate": 0.0002,
"loss": 0.0684,
"step": 15890
},
{
"epoch": 0.028917043095942292,
"grad_norm": 0.03370310738682747,
"learning_rate": 0.0002,
"loss": 0.0191,
"step": 15900
},
{
"epoch": 0.02893522991549949,
"grad_norm": 0.15469267964363098,
"learning_rate": 0.0002,
"loss": 0.1487,
"step": 15910
},
{
"epoch": 0.028953416735056685,
"grad_norm": 0.19974654912948608,
"learning_rate": 0.0002,
"loss": 0.0769,
"step": 15920
},
{
"epoch": 0.02897160355461388,
"grad_norm": 0.04307623952627182,
"learning_rate": 0.0002,
"loss": 0.075,
"step": 15930
},
{
"epoch": 0.028989790374171077,
"grad_norm": 0.21828149259090424,
"learning_rate": 0.0002,
"loss": 0.0691,
"step": 15940
},
{
"epoch": 0.029007977193728274,
"grad_norm": 0.0268656387925148,
"learning_rate": 0.0002,
"loss": 0.022,
"step": 15950
},
{
"epoch": 0.02902616401328547,
"grad_norm": 0.11213699728250504,
"learning_rate": 0.0002,
"loss": 0.1326,
"step": 15960
},
{
"epoch": 0.029044350832842666,
"grad_norm": 0.2018963098526001,
"learning_rate": 0.0002,
"loss": 0.0772,
"step": 15970
},
{
"epoch": 0.029062537652399866,
"grad_norm": 0.06034110113978386,
"learning_rate": 0.0002,
"loss": 0.0712,
"step": 15980
},
{
"epoch": 0.029080724471957062,
"grad_norm": 0.1817707121372223,
"learning_rate": 0.0002,
"loss": 0.0692,
"step": 15990
},
{
"epoch": 0.02909891129151426,
"grad_norm": 0.03466440737247467,
"learning_rate": 0.0002,
"loss": 0.0205,
"step": 16000
},
{
"epoch": 0.029117098111071455,
"grad_norm": 0.1375580132007599,
"learning_rate": 0.0002,
"loss": 0.1499,
"step": 16010
},
{
"epoch": 0.02913528493062865,
"grad_norm": 0.14308910071849823,
"learning_rate": 0.0002,
"loss": 0.083,
"step": 16020
},
{
"epoch": 0.029153471750185848,
"grad_norm": 0.041022926568984985,
"learning_rate": 0.0002,
"loss": 0.0785,
"step": 16030
},
{
"epoch": 0.029171658569743044,
"grad_norm": 0.1701498180627823,
"learning_rate": 0.0002,
"loss": 0.0656,
"step": 16040
},
{
"epoch": 0.02918984538930024,
"grad_norm": 0.023075805976986885,
"learning_rate": 0.0002,
"loss": 0.0225,
"step": 16050
},
{
"epoch": 0.029208032208857437,
"grad_norm": 0.05303549766540527,
"learning_rate": 0.0002,
"loss": 0.1369,
"step": 16060
},
{
"epoch": 0.029226219028414633,
"grad_norm": 0.044178470969200134,
"learning_rate": 0.0002,
"loss": 0.0754,
"step": 16070
},
{
"epoch": 0.02924440584797183,
"grad_norm": 0.03951259329915047,
"learning_rate": 0.0002,
"loss": 0.0759,
"step": 16080
},
{
"epoch": 0.029262592667529026,
"grad_norm": 0.13762067258358002,
"learning_rate": 0.0002,
"loss": 0.0605,
"step": 16090
},
{
"epoch": 0.029280779487086222,
"grad_norm": 0.021227868273854256,
"learning_rate": 0.0002,
"loss": 0.0173,
"step": 16100
},
{
"epoch": 0.029298966306643418,
"grad_norm": 0.19493195414543152,
"learning_rate": 0.0002,
"loss": 0.1307,
"step": 16110
},
{
"epoch": 0.029317153126200615,
"grad_norm": 0.09980791062116623,
"learning_rate": 0.0002,
"loss": 0.0724,
"step": 16120
},
{
"epoch": 0.02933533994575781,
"grad_norm": 0.08762095868587494,
"learning_rate": 0.0002,
"loss": 0.0734,
"step": 16130
},
{
"epoch": 0.029353526765315007,
"grad_norm": 0.14261308312416077,
"learning_rate": 0.0002,
"loss": 0.071,
"step": 16140
},
{
"epoch": 0.029371713584872203,
"grad_norm": 0.033154651522636414,
"learning_rate": 0.0002,
"loss": 0.0238,
"step": 16150
},
{
"epoch": 0.0293899004044294,
"grad_norm": 0.1422877162694931,
"learning_rate": 0.0002,
"loss": 0.1285,
"step": 16160
},
{
"epoch": 0.029408087223986596,
"grad_norm": 0.1342266947031021,
"learning_rate": 0.0002,
"loss": 0.0765,
"step": 16170
},
{
"epoch": 0.029426274043543792,
"grad_norm": 0.031525906175374985,
"learning_rate": 0.0002,
"loss": 0.0772,
"step": 16180
},
{
"epoch": 0.02944446086310099,
"grad_norm": 0.14790122210979462,
"learning_rate": 0.0002,
"loss": 0.0627,
"step": 16190
},
{
"epoch": 0.029462647682658185,
"grad_norm": 0.025354932993650436,
"learning_rate": 0.0002,
"loss": 0.0212,
"step": 16200
},
{
"epoch": 0.02948083450221538,
"grad_norm": 0.1287624090909958,
"learning_rate": 0.0002,
"loss": 0.1457,
"step": 16210
},
{
"epoch": 0.029499021321772578,
"grad_norm": 0.1079782247543335,
"learning_rate": 0.0002,
"loss": 0.0819,
"step": 16220
},
{
"epoch": 0.029517208141329774,
"grad_norm": 0.04884497448801994,
"learning_rate": 0.0002,
"loss": 0.0843,
"step": 16230
},
{
"epoch": 0.02953539496088697,
"grad_norm": 0.14452646672725677,
"learning_rate": 0.0002,
"loss": 0.0664,
"step": 16240
},
{
"epoch": 0.029553581780444167,
"grad_norm": 0.029236188158392906,
"learning_rate": 0.0002,
"loss": 0.0182,
"step": 16250
},
{
"epoch": 0.029571768600001363,
"grad_norm": 0.18048252165317535,
"learning_rate": 0.0002,
"loss": 0.1382,
"step": 16260
},
{
"epoch": 0.02958995541955856,
"grad_norm": 0.08402508497238159,
"learning_rate": 0.0002,
"loss": 0.078,
"step": 16270
},
{
"epoch": 0.029608142239115755,
"grad_norm": 0.07740433514118195,
"learning_rate": 0.0002,
"loss": 0.0776,
"step": 16280
},
{
"epoch": 0.029626329058672952,
"grad_norm": 0.1414123773574829,
"learning_rate": 0.0002,
"loss": 0.0611,
"step": 16290
},
{
"epoch": 0.029644515878230148,
"grad_norm": 0.03296574577689171,
"learning_rate": 0.0002,
"loss": 0.0228,
"step": 16300
},
{
"epoch": 0.029662702697787344,
"grad_norm": 0.09312735497951508,
"learning_rate": 0.0002,
"loss": 0.1213,
"step": 16310
},
{
"epoch": 0.029680889517344544,
"grad_norm": 0.07857484370470047,
"learning_rate": 0.0002,
"loss": 0.0812,
"step": 16320
},
{
"epoch": 0.02969907633690174,
"grad_norm": 0.0680379793047905,
"learning_rate": 0.0002,
"loss": 0.0774,
"step": 16330
},
{
"epoch": 0.029717263156458937,
"grad_norm": 0.18506748974323273,
"learning_rate": 0.0002,
"loss": 0.0675,
"step": 16340
},
{
"epoch": 0.029735449976016133,
"grad_norm": 0.029233543202280998,
"learning_rate": 0.0002,
"loss": 0.0187,
"step": 16350
},
{
"epoch": 0.02975363679557333,
"grad_norm": 0.1133171021938324,
"learning_rate": 0.0002,
"loss": 0.1217,
"step": 16360
},
{
"epoch": 0.029771823615130526,
"grad_norm": 0.06985988467931747,
"learning_rate": 0.0002,
"loss": 0.0761,
"step": 16370
},
{
"epoch": 0.029790010434687722,
"grad_norm": 0.13158757984638214,
"learning_rate": 0.0002,
"loss": 0.0764,
"step": 16380
},
{
"epoch": 0.02980819725424492,
"grad_norm": 0.19751304388046265,
"learning_rate": 0.0002,
"loss": 0.0652,
"step": 16390
},
{
"epoch": 0.029826384073802115,
"grad_norm": 0.019567493349313736,
"learning_rate": 0.0002,
"loss": 0.0166,
"step": 16400
},
{
"epoch": 0.02984457089335931,
"grad_norm": 0.1859702467918396,
"learning_rate": 0.0002,
"loss": 0.1482,
"step": 16410
},
{
"epoch": 0.029862757712916507,
"grad_norm": 0.03211350366473198,
"learning_rate": 0.0002,
"loss": 0.073,
"step": 16420
},
{
"epoch": 0.029880944532473704,
"grad_norm": 0.10664219409227371,
"learning_rate": 0.0002,
"loss": 0.075,
"step": 16430
},
{
"epoch": 0.0298991313520309,
"grad_norm": 0.18254978954792023,
"learning_rate": 0.0002,
"loss": 0.0666,
"step": 16440
},
{
"epoch": 0.029917318171588096,
"grad_norm": 0.03076091594994068,
"learning_rate": 0.0002,
"loss": 0.0217,
"step": 16450
},
{
"epoch": 0.029935504991145293,
"grad_norm": 0.11172248423099518,
"learning_rate": 0.0002,
"loss": 0.1115,
"step": 16460
},
{
"epoch": 0.02995369181070249,
"grad_norm": 0.1121174767613411,
"learning_rate": 0.0002,
"loss": 0.0838,
"step": 16470
},
{
"epoch": 0.029971878630259685,
"grad_norm": 0.05544061213731766,
"learning_rate": 0.0002,
"loss": 0.0773,
"step": 16480
},
{
"epoch": 0.02999006544981688,
"grad_norm": 0.13899610936641693,
"learning_rate": 0.0002,
"loss": 0.0648,
"step": 16490
},
{
"epoch": 0.030008252269374078,
"grad_norm": 0.031017031520605087,
"learning_rate": 0.0002,
"loss": 0.0205,
"step": 16500
},
{
"epoch": 0.030026439088931274,
"grad_norm": 0.5919166803359985,
"learning_rate": 0.0002,
"loss": 0.1454,
"step": 16510
},
{
"epoch": 0.03004462590848847,
"grad_norm": 2.5127646923065186,
"learning_rate": 0.0002,
"loss": 0.0925,
"step": 16520
},
{
"epoch": 0.030062812728045667,
"grad_norm": 0.12587642669677734,
"learning_rate": 0.0002,
"loss": 0.0896,
"step": 16530
},
{
"epoch": 0.030080999547602863,
"grad_norm": 0.29352524876594543,
"learning_rate": 0.0002,
"loss": 0.0692,
"step": 16540
},
{
"epoch": 0.03009918636716006,
"grad_norm": 0.012585405260324478,
"learning_rate": 0.0002,
"loss": 0.021,
"step": 16550
},
{
"epoch": 0.030117373186717256,
"grad_norm": 2.432018756866455,
"learning_rate": 0.0002,
"loss": 0.239,
"step": 16560
},
{
"epoch": 0.030135560006274452,
"grad_norm": 0.09337054193019867,
"learning_rate": 0.0002,
"loss": 0.0859,
"step": 16570
},
{
"epoch": 0.030153746825831648,
"grad_norm": 0.05135548114776611,
"learning_rate": 0.0002,
"loss": 0.0794,
"step": 16580
},
{
"epoch": 0.030171933645388845,
"grad_norm": 0.15056684613227844,
"learning_rate": 0.0002,
"loss": 0.0697,
"step": 16590
},
{
"epoch": 0.03019012046494604,
"grad_norm": 5.883757694391534e-05,
"learning_rate": 0.0002,
"loss": 0.0085,
"step": 16600
},
{
"epoch": 0.030208307284503237,
"grad_norm": 1.0368543863296509,
"learning_rate": 0.0002,
"loss": 0.1861,
"step": 16610
},
{
"epoch": 0.030226494104060433,
"grad_norm": 0.07987317442893982,
"learning_rate": 0.0002,
"loss": 0.0938,
"step": 16620
},
{
"epoch": 0.03024468092361763,
"grad_norm": 0.02812887355685234,
"learning_rate": 0.0002,
"loss": 0.0753,
"step": 16630
},
{
"epoch": 0.030262867743174826,
"grad_norm": 0.24061231315135956,
"learning_rate": 0.0002,
"loss": 0.0653,
"step": 16640
},
{
"epoch": 0.030281054562732022,
"grad_norm": 0.0402507558465004,
"learning_rate": 0.0002,
"loss": 0.0266,
"step": 16650
},
{
"epoch": 0.030299241382289222,
"grad_norm": 0.13552093505859375,
"learning_rate": 0.0002,
"loss": 0.1709,
"step": 16660
},
{
"epoch": 0.03031742820184642,
"grad_norm": 0.6093604564666748,
"learning_rate": 0.0002,
"loss": 0.0857,
"step": 16670
},
{
"epoch": 0.030335615021403615,
"grad_norm": 0.11608528345823288,
"learning_rate": 0.0002,
"loss": 0.0874,
"step": 16680
},
{
"epoch": 0.03035380184096081,
"grad_norm": 0.23376339673995972,
"learning_rate": 0.0002,
"loss": 0.0688,
"step": 16690
},
{
"epoch": 0.030371988660518007,
"grad_norm": 0.03484225273132324,
"learning_rate": 0.0002,
"loss": 0.0172,
"step": 16700
},
{
"epoch": 0.030390175480075204,
"grad_norm": 0.30532532930374146,
"learning_rate": 0.0002,
"loss": 0.1686,
"step": 16710
},
{
"epoch": 0.0304083622996324,
"grad_norm": 0.05142231658101082,
"learning_rate": 0.0002,
"loss": 0.0766,
"step": 16720
},
{
"epoch": 0.030426549119189596,
"grad_norm": 0.08218207955360413,
"learning_rate": 0.0002,
"loss": 0.0839,
"step": 16730
},
{
"epoch": 0.030444735938746793,
"grad_norm": 0.15296520292758942,
"learning_rate": 0.0002,
"loss": 0.0717,
"step": 16740
},
{
"epoch": 0.03046292275830399,
"grad_norm": 0.009951476007699966,
"learning_rate": 0.0002,
"loss": 0.0103,
"step": 16750
},
{
"epoch": 0.030481109577861185,
"grad_norm": 0.18752850592136383,
"learning_rate": 0.0002,
"loss": 0.2382,
"step": 16760
},
{
"epoch": 0.03049929639741838,
"grad_norm": 0.1473335325717926,
"learning_rate": 0.0002,
"loss": 0.0975,
"step": 16770
},
{
"epoch": 0.030517483216975578,
"grad_norm": 0.04578230902552605,
"learning_rate": 0.0002,
"loss": 0.0812,
"step": 16780
},
{
"epoch": 0.030535670036532774,
"grad_norm": 0.2557182312011719,
"learning_rate": 0.0002,
"loss": 0.0691,
"step": 16790
},
{
"epoch": 0.03055385685608997,
"grad_norm": 1.473021388053894,
"learning_rate": 0.0002,
"loss": 0.2088,
"step": 16800
},
{
"epoch": 0.030572043675647167,
"grad_norm": 1.0227181911468506,
"learning_rate": 0.0002,
"loss": 0.7207,
"step": 16810
},
{
"epoch": 0.030590230495204363,
"grad_norm": 0.11395780742168427,
"learning_rate": 0.0002,
"loss": 0.0943,
"step": 16820
},
{
"epoch": 0.03060841731476156,
"grad_norm": 6.501937389373779,
"learning_rate": 0.0002,
"loss": 0.0871,
"step": 16830
},
{
"epoch": 0.030626604134318756,
"grad_norm": 0.17187578976154327,
"learning_rate": 0.0002,
"loss": 0.0672,
"step": 16840
},
{
"epoch": 0.030644790953875952,
"grad_norm": 0.03396519273519516,
"learning_rate": 0.0002,
"loss": 0.0224,
"step": 16850
},
{
"epoch": 0.03066297777343315,
"grad_norm": 3.397012948989868,
"learning_rate": 0.0002,
"loss": 0.1641,
"step": 16860
},
{
"epoch": 0.030681164592990345,
"grad_norm": 0.44838130474090576,
"learning_rate": 0.0002,
"loss": 0.0868,
"step": 16870
},
{
"epoch": 0.03069935141254754,
"grad_norm": 0.08598771691322327,
"learning_rate": 0.0002,
"loss": 0.0766,
"step": 16880
},
{
"epoch": 0.030717538232104737,
"grad_norm": 0.15339739620685577,
"learning_rate": 0.0002,
"loss": 0.0609,
"step": 16890
},
{
"epoch": 0.030735725051661934,
"grad_norm": 0.04086040332913399,
"learning_rate": 0.0002,
"loss": 0.0218,
"step": 16900
},
{
"epoch": 0.03075391187121913,
"grad_norm": 0.40313076972961426,
"learning_rate": 0.0002,
"loss": 0.2017,
"step": 16910
},
{
"epoch": 0.030772098690776326,
"grad_norm": 0.2068721503019333,
"learning_rate": 0.0002,
"loss": 0.0906,
"step": 16920
},
{
"epoch": 0.030790285510333523,
"grad_norm": 0.12770770490169525,
"learning_rate": 0.0002,
"loss": 0.0801,
"step": 16930
},
{
"epoch": 0.03080847232989072,
"grad_norm": 17.294641494750977,
"learning_rate": 0.0002,
"loss": 0.0701,
"step": 16940
},
{
"epoch": 0.030826659149447915,
"grad_norm": 0.04612286388874054,
"learning_rate": 0.0002,
"loss": 0.0287,
"step": 16950
},
{
"epoch": 0.03084484596900511,
"grad_norm": 0.10311487317085266,
"learning_rate": 0.0002,
"loss": 0.136,
"step": 16960
},
{
"epoch": 0.030863032788562308,
"grad_norm": 0.20878446102142334,
"learning_rate": 0.0002,
"loss": 0.0886,
"step": 16970
},
{
"epoch": 0.030881219608119504,
"grad_norm": 1.412353515625,
"learning_rate": 0.0002,
"loss": 0.0843,
"step": 16980
},
{
"epoch": 0.0308994064276767,
"grad_norm": 0.27046918869018555,
"learning_rate": 0.0002,
"loss": 0.0755,
"step": 16990
},
{
"epoch": 0.030917593247233897,
"grad_norm": 0.5227788090705872,
"learning_rate": 0.0002,
"loss": 0.0234,
"step": 17000
},
{
"epoch": 0.030935780066791096,
"grad_norm": 0.16006655991077423,
"learning_rate": 0.0002,
"loss": 0.183,
"step": 17010
},
{
"epoch": 0.030953966886348293,
"grad_norm": 0.1297607421875,
"learning_rate": 0.0002,
"loss": 0.0868,
"step": 17020
},
{
"epoch": 0.03097215370590549,
"grad_norm": 11.198999404907227,
"learning_rate": 0.0002,
"loss": 0.0998,
"step": 17030
},
{
"epoch": 0.030990340525462685,
"grad_norm": 0.39887136220932007,
"learning_rate": 0.0002,
"loss": 0.0898,
"step": 17040
},
{
"epoch": 0.03100852734501988,
"grad_norm": 0.009262642823159695,
"learning_rate": 0.0002,
"loss": 0.0215,
"step": 17050
},
{
"epoch": 0.031026714164577078,
"grad_norm": 0.15820527076721191,
"learning_rate": 0.0002,
"loss": 0.2017,
"step": 17060
},
{
"epoch": 0.031044900984134274,
"grad_norm": 0.11645558476448059,
"learning_rate": 0.0002,
"loss": 0.085,
"step": 17070
},
{
"epoch": 0.03106308780369147,
"grad_norm": 0.03981775790452957,
"learning_rate": 0.0002,
"loss": 0.0803,
"step": 17080
},
{
"epoch": 0.031081274623248667,
"grad_norm": 0.1584177166223526,
"learning_rate": 0.0002,
"loss": 0.0635,
"step": 17090
},
{
"epoch": 0.031099461442805863,
"grad_norm": 0.0005907397717237473,
"learning_rate": 0.0002,
"loss": 0.006,
"step": 17100
},
{
"epoch": 0.03111764826236306,
"grad_norm": 0.05344061553478241,
"learning_rate": 0.0002,
"loss": 0.3098,
"step": 17110
},
{
"epoch": 0.031135835081920256,
"grad_norm": 0.05249408632516861,
"learning_rate": 0.0002,
"loss": 0.1002,
"step": 17120
},
{
"epoch": 0.031154021901477452,
"grad_norm": 0.04177263006567955,
"learning_rate": 0.0002,
"loss": 0.0969,
"step": 17130
},
{
"epoch": 0.03117220872103465,
"grad_norm": 0.18396486341953278,
"learning_rate": 0.0002,
"loss": 0.0727,
"step": 17140
},
{
"epoch": 0.031190395540591845,
"grad_norm": 0.0019848416559398174,
"learning_rate": 0.0002,
"loss": 0.0092,
"step": 17150
},
{
"epoch": 0.03120858236014904,
"grad_norm": 0.23747271299362183,
"learning_rate": 0.0002,
"loss": 0.3243,
"step": 17160
},
{
"epoch": 0.031226769179706237,
"grad_norm": 0.2365376353263855,
"learning_rate": 0.0002,
"loss": 0.094,
"step": 17170
},
{
"epoch": 0.031244955999263434,
"grad_norm": 0.21784919500350952,
"learning_rate": 0.0002,
"loss": 0.0795,
"step": 17180
},
{
"epoch": 0.03126314281882063,
"grad_norm": 0.27253153920173645,
"learning_rate": 0.0002,
"loss": 0.0748,
"step": 17190
},
{
"epoch": 0.031281329638377826,
"grad_norm": 0.004298684187233448,
"learning_rate": 0.0002,
"loss": 0.014,
"step": 17200
},
{
"epoch": 0.03129951645793502,
"grad_norm": 0.267871230840683,
"learning_rate": 0.0002,
"loss": 0.2938,
"step": 17210
},
{
"epoch": 0.03131770327749222,
"grad_norm": 0.1428530067205429,
"learning_rate": 0.0002,
"loss": 0.0901,
"step": 17220
},
{
"epoch": 0.031335890097049415,
"grad_norm": 0.10623782873153687,
"learning_rate": 0.0002,
"loss": 0.0752,
"step": 17230
},
{
"epoch": 0.03135407691660661,
"grad_norm": 0.2869247496128082,
"learning_rate": 0.0002,
"loss": 0.0707,
"step": 17240
},
{
"epoch": 0.03137226373616381,
"grad_norm": 0.011321209371089935,
"learning_rate": 0.0002,
"loss": 0.0168,
"step": 17250
},
{
"epoch": 0.031390450555721004,
"grad_norm": 0.09432020783424377,
"learning_rate": 0.0002,
"loss": 0.2046,
"step": 17260
},
{
"epoch": 0.0314086373752782,
"grad_norm": 0.190867081284523,
"learning_rate": 0.0002,
"loss": 0.0866,
"step": 17270
},
{
"epoch": 0.0314268241948354,
"grad_norm": 0.14274829626083374,
"learning_rate": 0.0002,
"loss": 0.0796,
"step": 17280
},
{
"epoch": 0.03144501101439259,
"grad_norm": 0.29910504817962646,
"learning_rate": 0.0002,
"loss": 0.0711,
"step": 17290
},
{
"epoch": 0.03146319783394979,
"grad_norm": 0.031730011105537415,
"learning_rate": 0.0002,
"loss": 0.0217,
"step": 17300
},
{
"epoch": 0.031481384653506986,
"grad_norm": 0.23042625188827515,
"learning_rate": 0.0002,
"loss": 0.1491,
"step": 17310
},
{
"epoch": 0.03149957147306418,
"grad_norm": 0.15560220181941986,
"learning_rate": 0.0002,
"loss": 0.0761,
"step": 17320
},
{
"epoch": 0.03151775829262138,
"grad_norm": 0.051929160952568054,
"learning_rate": 0.0002,
"loss": 0.0893,
"step": 17330
},
{
"epoch": 0.031535945112178575,
"grad_norm": 0.16162756085395813,
"learning_rate": 0.0002,
"loss": 0.0623,
"step": 17340
},
{
"epoch": 0.03155413193173577,
"grad_norm": 0.019480068236589432,
"learning_rate": 0.0002,
"loss": 0.0137,
"step": 17350
},
{
"epoch": 0.03157231875129297,
"grad_norm": 0.24700693786144257,
"learning_rate": 0.0002,
"loss": 0.1481,
"step": 17360
},
{
"epoch": 0.031590505570850164,
"grad_norm": 0.17574873566627502,
"learning_rate": 0.0002,
"loss": 0.079,
"step": 17370
},
{
"epoch": 0.03160869239040736,
"grad_norm": 0.10368580371141434,
"learning_rate": 0.0002,
"loss": 0.0811,
"step": 17380
},
{
"epoch": 0.031626879209964556,
"grad_norm": 0.23330622911453247,
"learning_rate": 0.0002,
"loss": 0.0669,
"step": 17390
},
{
"epoch": 0.03164506602952175,
"grad_norm": 0.031393859535455704,
"learning_rate": 0.0002,
"loss": 0.0183,
"step": 17400
},
{
"epoch": 0.03166325284907895,
"grad_norm": 0.22080129384994507,
"learning_rate": 0.0002,
"loss": 0.1567,
"step": 17410
},
{
"epoch": 0.031681439668636145,
"grad_norm": 0.177025705575943,
"learning_rate": 0.0002,
"loss": 0.0798,
"step": 17420
},
{
"epoch": 0.03169962648819334,
"grad_norm": 0.054285600781440735,
"learning_rate": 0.0002,
"loss": 0.0709,
"step": 17430
},
{
"epoch": 0.03171781330775054,
"grad_norm": 0.20625421404838562,
"learning_rate": 0.0002,
"loss": 0.0592,
"step": 17440
},
{
"epoch": 0.031736000127307734,
"grad_norm": 0.042640089988708496,
"learning_rate": 0.0002,
"loss": 0.0199,
"step": 17450
},
{
"epoch": 0.03175418694686493,
"grad_norm": 0.2505437731742859,
"learning_rate": 0.0002,
"loss": 0.131,
"step": 17460
},
{
"epoch": 0.03177237376642213,
"grad_norm": 0.24848629534244537,
"learning_rate": 0.0002,
"loss": 0.0826,
"step": 17470
},
{
"epoch": 0.03179056058597932,
"grad_norm": 0.056854844093322754,
"learning_rate": 0.0002,
"loss": 0.0779,
"step": 17480
},
{
"epoch": 0.03180874740553652,
"grad_norm": 0.23022660613059998,
"learning_rate": 0.0002,
"loss": 0.0703,
"step": 17490
},
{
"epoch": 0.031826934225093716,
"grad_norm": 0.033501993864774704,
"learning_rate": 0.0002,
"loss": 0.0229,
"step": 17500
},
{
"epoch": 0.03184512104465091,
"grad_norm": 0.25061148405075073,
"learning_rate": 0.0002,
"loss": 0.1588,
"step": 17510
},
{
"epoch": 0.031863307864208115,
"grad_norm": 0.21534167230129242,
"learning_rate": 0.0002,
"loss": 0.079,
"step": 17520
},
{
"epoch": 0.03188149468376531,
"grad_norm": 0.04823959991335869,
"learning_rate": 0.0002,
"loss": 0.0826,
"step": 17530
},
{
"epoch": 0.03189968150332251,
"grad_norm": 0.23680952191352844,
"learning_rate": 0.0002,
"loss": 0.0617,
"step": 17540
},
{
"epoch": 0.031917868322879704,
"grad_norm": 0.016636351123452187,
"learning_rate": 0.0002,
"loss": 0.0143,
"step": 17550
},
{
"epoch": 0.0319360551424369,
"grad_norm": 0.3684225082397461,
"learning_rate": 0.0002,
"loss": 0.2011,
"step": 17560
},
{
"epoch": 0.0319542419619941,
"grad_norm": 0.07126643508672714,
"learning_rate": 0.0002,
"loss": 0.0792,
"step": 17570
},
{
"epoch": 0.03197242878155129,
"grad_norm": 0.05354290455579758,
"learning_rate": 0.0002,
"loss": 0.0831,
"step": 17580
},
{
"epoch": 0.03199061560110849,
"grad_norm": 0.20318995416164398,
"learning_rate": 0.0002,
"loss": 0.0617,
"step": 17590
},
{
"epoch": 0.032008802420665686,
"grad_norm": 0.021502351388335228,
"learning_rate": 0.0002,
"loss": 0.0137,
"step": 17600
},
{
"epoch": 0.03202698924022288,
"grad_norm": 0.3471545875072479,
"learning_rate": 0.0002,
"loss": 0.1823,
"step": 17610
},
{
"epoch": 0.03204517605978008,
"grad_norm": 0.23191972076892853,
"learning_rate": 0.0002,
"loss": 0.0837,
"step": 17620
},
{
"epoch": 0.032063362879337275,
"grad_norm": 0.0479818731546402,
"learning_rate": 0.0002,
"loss": 0.0845,
"step": 17630
},
{
"epoch": 0.03208154969889447,
"grad_norm": 0.2193339467048645,
"learning_rate": 0.0002,
"loss": 0.068,
"step": 17640
},
{
"epoch": 0.03209973651845167,
"grad_norm": 0.03661821037530899,
"learning_rate": 0.0002,
"loss": 0.0234,
"step": 17650
},
{
"epoch": 0.032117923338008864,
"grad_norm": 0.10396943986415863,
"learning_rate": 0.0002,
"loss": 0.1295,
"step": 17660
},
{
"epoch": 0.03213611015756606,
"grad_norm": 0.16999179124832153,
"learning_rate": 0.0002,
"loss": 0.0823,
"step": 17670
},
{
"epoch": 0.032154296977123256,
"grad_norm": 0.09069819748401642,
"learning_rate": 0.0002,
"loss": 0.0748,
"step": 17680
},
{
"epoch": 0.03217248379668045,
"grad_norm": 0.24210433661937714,
"learning_rate": 0.0002,
"loss": 0.0611,
"step": 17690
},
{
"epoch": 0.03219067061623765,
"grad_norm": 0.028281020000576973,
"learning_rate": 0.0002,
"loss": 0.018,
"step": 17700
},
{
"epoch": 0.032208857435794845,
"grad_norm": 0.4133516252040863,
"learning_rate": 0.0002,
"loss": 0.1704,
"step": 17710
},
{
"epoch": 0.03222704425535204,
"grad_norm": 0.20207400619983673,
"learning_rate": 0.0002,
"loss": 0.0804,
"step": 17720
},
{
"epoch": 0.03224523107490924,
"grad_norm": 0.043604232370853424,
"learning_rate": 0.0002,
"loss": 0.0929,
"step": 17730
},
{
"epoch": 0.032263417894466434,
"grad_norm": 0.1995580494403839,
"learning_rate": 0.0002,
"loss": 0.062,
"step": 17740
},
{
"epoch": 0.03228160471402363,
"grad_norm": 0.03241848200559616,
"learning_rate": 0.0002,
"loss": 0.0137,
"step": 17750
},
{
"epoch": 0.03229979153358083,
"grad_norm": 0.28819000720977783,
"learning_rate": 0.0002,
"loss": 0.1696,
"step": 17760
},
{
"epoch": 0.03231797835313802,
"grad_norm": 0.2625056803226471,
"learning_rate": 0.0002,
"loss": 0.0704,
"step": 17770
},
{
"epoch": 0.03233616517269522,
"grad_norm": 0.03986202925443649,
"learning_rate": 0.0002,
"loss": 0.0848,
"step": 17780
},
{
"epoch": 0.032354351992252416,
"grad_norm": 0.24770867824554443,
"learning_rate": 0.0002,
"loss": 0.0608,
"step": 17790
},
{
"epoch": 0.03237253881180961,
"grad_norm": 0.031353630125522614,
"learning_rate": 0.0002,
"loss": 0.0145,
"step": 17800
},
{
"epoch": 0.03239072563136681,
"grad_norm": 0.2273588478565216,
"learning_rate": 0.0002,
"loss": 0.1765,
"step": 17810
},
{
"epoch": 0.032408912450924005,
"grad_norm": 0.19741755723953247,
"learning_rate": 0.0002,
"loss": 0.0818,
"step": 17820
},
{
"epoch": 0.0324270992704812,
"grad_norm": 0.03193483129143715,
"learning_rate": 0.0002,
"loss": 0.0737,
"step": 17830
},
{
"epoch": 0.0324452860900384,
"grad_norm": 0.13962946832180023,
"learning_rate": 0.0002,
"loss": 0.0575,
"step": 17840
},
{
"epoch": 0.03246347290959559,
"grad_norm": 0.01755092851817608,
"learning_rate": 0.0002,
"loss": 0.0159,
"step": 17850
},
{
"epoch": 0.03248165972915279,
"grad_norm": 0.21713244915008545,
"learning_rate": 0.0002,
"loss": 0.1476,
"step": 17860
},
{
"epoch": 0.032499846548709986,
"grad_norm": 0.15362155437469482,
"learning_rate": 0.0002,
"loss": 0.0747,
"step": 17870
},
{
"epoch": 0.03251803336826718,
"grad_norm": 0.02643916755914688,
"learning_rate": 0.0002,
"loss": 0.0793,
"step": 17880
},
{
"epoch": 0.03253622018782438,
"grad_norm": 0.2702760100364685,
"learning_rate": 0.0002,
"loss": 0.0641,
"step": 17890
},
{
"epoch": 0.032554407007381575,
"grad_norm": 0.05910428613424301,
"learning_rate": 0.0002,
"loss": 0.022,
"step": 17900
},
{
"epoch": 0.03257259382693877,
"grad_norm": 0.17692551016807556,
"learning_rate": 0.0002,
"loss": 0.1407,
"step": 17910
},
{
"epoch": 0.03259078064649597,
"grad_norm": 0.19877870380878448,
"learning_rate": 0.0002,
"loss": 0.0798,
"step": 17920
},
{
"epoch": 0.032608967466053164,
"grad_norm": 0.06731924414634705,
"learning_rate": 0.0002,
"loss": 0.0798,
"step": 17930
},
{
"epoch": 0.03262715428561036,
"grad_norm": 0.20342952013015747,
"learning_rate": 0.0002,
"loss": 0.0571,
"step": 17940
},
{
"epoch": 0.03264534110516756,
"grad_norm": 0.06299301236867905,
"learning_rate": 0.0002,
"loss": 0.0154,
"step": 17950
},
{
"epoch": 0.03266352792472475,
"grad_norm": 0.30317986011505127,
"learning_rate": 0.0002,
"loss": 0.1496,
"step": 17960
},
{
"epoch": 0.03268171474428195,
"grad_norm": 0.2737327218055725,
"learning_rate": 0.0002,
"loss": 0.0777,
"step": 17970
},
{
"epoch": 0.032699901563839145,
"grad_norm": 0.03226702660322189,
"learning_rate": 0.0002,
"loss": 0.0799,
"step": 17980
},
{
"epoch": 0.03271808838339634,
"grad_norm": 0.20195341110229492,
"learning_rate": 0.0002,
"loss": 0.0654,
"step": 17990
},
{
"epoch": 0.03273627520295354,
"grad_norm": 0.03351292014122009,
"learning_rate": 0.0002,
"loss": 0.0194,
"step": 18000
},
{
"epoch": 0.032754462022510734,
"grad_norm": 0.2281372845172882,
"learning_rate": 0.0002,
"loss": 0.154,
"step": 18010
},
{
"epoch": 0.03277264884206793,
"grad_norm": 0.19263891875743866,
"learning_rate": 0.0002,
"loss": 0.0803,
"step": 18020
},
{
"epoch": 0.03279083566162513,
"grad_norm": 0.04183288663625717,
"learning_rate": 0.0002,
"loss": 0.0842,
"step": 18030
},
{
"epoch": 0.03280902248118232,
"grad_norm": 0.284759521484375,
"learning_rate": 0.0002,
"loss": 0.067,
"step": 18040
},
{
"epoch": 0.03282720930073952,
"grad_norm": 0.02972390688955784,
"learning_rate": 0.0002,
"loss": 0.016,
"step": 18050
},
{
"epoch": 0.032845396120296716,
"grad_norm": 0.28630614280700684,
"learning_rate": 0.0002,
"loss": 0.1866,
"step": 18060
},
{
"epoch": 0.03286358293985391,
"grad_norm": 0.16426514089107513,
"learning_rate": 0.0002,
"loss": 0.0812,
"step": 18070
},
{
"epoch": 0.03288176975941111,
"grad_norm": 0.05643441155552864,
"learning_rate": 0.0002,
"loss": 0.0773,
"step": 18080
},
{
"epoch": 0.032899956578968305,
"grad_norm": 0.19082742929458618,
"learning_rate": 0.0002,
"loss": 0.0582,
"step": 18090
},
{
"epoch": 0.0329181433985255,
"grad_norm": 0.017512233927845955,
"learning_rate": 0.0002,
"loss": 0.0174,
"step": 18100
},
{
"epoch": 0.0329363302180827,
"grad_norm": 0.22619640827178955,
"learning_rate": 0.0002,
"loss": 0.166,
"step": 18110
},
{
"epoch": 0.032954517037639894,
"grad_norm": 0.10430974513292313,
"learning_rate": 0.0002,
"loss": 0.0716,
"step": 18120
},
{
"epoch": 0.03297270385719709,
"grad_norm": 0.07371710985898972,
"learning_rate": 0.0002,
"loss": 0.0733,
"step": 18130
},
{
"epoch": 0.032990890676754286,
"grad_norm": 0.19163483381271362,
"learning_rate": 0.0002,
"loss": 0.0609,
"step": 18140
},
{
"epoch": 0.03300907749631148,
"grad_norm": 0.03743975609540939,
"learning_rate": 0.0002,
"loss": 0.017,
"step": 18150
},
{
"epoch": 0.03302726431586868,
"grad_norm": 0.19496546685695648,
"learning_rate": 0.0002,
"loss": 0.1622,
"step": 18160
},
{
"epoch": 0.033045451135425875,
"grad_norm": 0.13054883480072021,
"learning_rate": 0.0002,
"loss": 0.0728,
"step": 18170
},
{
"epoch": 0.03306363795498307,
"grad_norm": 0.10058756172657013,
"learning_rate": 0.0002,
"loss": 0.0738,
"step": 18180
},
{
"epoch": 0.03308182477454027,
"grad_norm": 0.220932736992836,
"learning_rate": 0.0002,
"loss": 0.063,
"step": 18190
},
{
"epoch": 0.033100011594097464,
"grad_norm": 0.04396356642246246,
"learning_rate": 0.0002,
"loss": 0.0207,
"step": 18200
},
{
"epoch": 0.03311819841365467,
"grad_norm": 0.23554326593875885,
"learning_rate": 0.0002,
"loss": 0.1484,
"step": 18210
},
{
"epoch": 0.033136385233211864,
"grad_norm": 0.11277181655168533,
"learning_rate": 0.0002,
"loss": 0.0763,
"step": 18220
},
{
"epoch": 0.03315457205276906,
"grad_norm": 0.05176365375518799,
"learning_rate": 0.0002,
"loss": 0.076,
"step": 18230
},
{
"epoch": 0.033172758872326256,
"grad_norm": 0.1521395444869995,
"learning_rate": 0.0002,
"loss": 0.0605,
"step": 18240
},
{
"epoch": 0.03319094569188345,
"grad_norm": 0.04682580381631851,
"learning_rate": 0.0002,
"loss": 0.0149,
"step": 18250
},
{
"epoch": 0.03320913251144065,
"grad_norm": 0.16890883445739746,
"learning_rate": 0.0002,
"loss": 0.1402,
"step": 18260
},
{
"epoch": 0.033227319330997845,
"grad_norm": 0.17221559584140778,
"learning_rate": 0.0002,
"loss": 0.0819,
"step": 18270
},
{
"epoch": 0.03324550615055504,
"grad_norm": 0.07434559613466263,
"learning_rate": 0.0002,
"loss": 0.0784,
"step": 18280
},
{
"epoch": 0.03326369297011224,
"grad_norm": 0.1912834346294403,
"learning_rate": 0.0002,
"loss": 0.0614,
"step": 18290
},
{
"epoch": 0.033281879789669434,
"grad_norm": 0.04286884889006615,
"learning_rate": 0.0002,
"loss": 0.0185,
"step": 18300
},
{
"epoch": 0.03330006660922663,
"grad_norm": 0.29059842228889465,
"learning_rate": 0.0002,
"loss": 0.1357,
"step": 18310
},
{
"epoch": 0.03331825342878383,
"grad_norm": 0.2289486825466156,
"learning_rate": 0.0002,
"loss": 0.0865,
"step": 18320
},
{
"epoch": 0.03333644024834102,
"grad_norm": 0.027094636112451553,
"learning_rate": 0.0002,
"loss": 0.0841,
"step": 18330
},
{
"epoch": 0.03335462706789822,
"grad_norm": 0.21263600885868073,
"learning_rate": 0.0002,
"loss": 0.0628,
"step": 18340
},
{
"epoch": 0.033372813887455416,
"grad_norm": 0.03497980535030365,
"learning_rate": 0.0002,
"loss": 0.0158,
"step": 18350
},
{
"epoch": 0.03339100070701261,
"grad_norm": 0.20155973732471466,
"learning_rate": 0.0002,
"loss": 0.1523,
"step": 18360
},
{
"epoch": 0.03340918752656981,
"grad_norm": 0.03746286779642105,
"learning_rate": 0.0002,
"loss": 0.0781,
"step": 18370
},
{
"epoch": 0.033427374346127005,
"grad_norm": 0.06747066229581833,
"learning_rate": 0.0002,
"loss": 0.0792,
"step": 18380
},
{
"epoch": 0.0334455611656842,
"grad_norm": 0.23699060082435608,
"learning_rate": 0.0002,
"loss": 0.0651,
"step": 18390
},
{
"epoch": 0.0334637479852414,
"grad_norm": 0.047832150012254715,
"learning_rate": 0.0002,
"loss": 0.0181,
"step": 18400
},
{
"epoch": 0.033481934804798594,
"grad_norm": 0.3178698420524597,
"learning_rate": 0.0002,
"loss": 0.1537,
"step": 18410
},
{
"epoch": 0.03350012162435579,
"grad_norm": 0.16258081793785095,
"learning_rate": 0.0002,
"loss": 0.0722,
"step": 18420
},
{
"epoch": 0.033518308443912986,
"grad_norm": 0.02807716652750969,
"learning_rate": 0.0002,
"loss": 0.0844,
"step": 18430
},
{
"epoch": 0.03353649526347018,
"grad_norm": 0.16596710681915283,
"learning_rate": 0.0002,
"loss": 0.0607,
"step": 18440
},
{
"epoch": 0.03355468208302738,
"grad_norm": 0.04448723793029785,
"learning_rate": 0.0002,
"loss": 0.0183,
"step": 18450
},
{
"epoch": 0.033572868902584575,
"grad_norm": 0.39318934082984924,
"learning_rate": 0.0002,
"loss": 0.1497,
"step": 18460
},
{
"epoch": 0.03359105572214177,
"grad_norm": 0.17387263476848602,
"learning_rate": 0.0002,
"loss": 0.0787,
"step": 18470
},
{
"epoch": 0.03360924254169897,
"grad_norm": 0.14859163761138916,
"learning_rate": 0.0002,
"loss": 0.0837,
"step": 18480
},
{
"epoch": 0.033627429361256164,
"grad_norm": 0.24148601293563843,
"learning_rate": 0.0002,
"loss": 0.0655,
"step": 18490
},
{
"epoch": 0.03364561618081336,
"grad_norm": 0.04743284359574318,
"learning_rate": 0.0002,
"loss": 0.0174,
"step": 18500
},
{
"epoch": 0.03366380300037056,
"grad_norm": 0.25396591424942017,
"learning_rate": 0.0002,
"loss": 0.1438,
"step": 18510
},
{
"epoch": 0.03368198981992775,
"grad_norm": 0.1759178638458252,
"learning_rate": 0.0002,
"loss": 0.0758,
"step": 18520
},
{
"epoch": 0.03370017663948495,
"grad_norm": 0.06611669808626175,
"learning_rate": 0.0002,
"loss": 0.0787,
"step": 18530
},
{
"epoch": 0.033718363459042146,
"grad_norm": 0.22699445486068726,
"learning_rate": 0.0002,
"loss": 0.0697,
"step": 18540
},
{
"epoch": 0.03373655027859934,
"grad_norm": 0.02634899877011776,
"learning_rate": 0.0002,
"loss": 0.0189,
"step": 18550
},
{
"epoch": 0.03375473709815654,
"grad_norm": 0.3238360285758972,
"learning_rate": 0.0002,
"loss": 0.1496,
"step": 18560
},
{
"epoch": 0.033772923917713735,
"grad_norm": 0.16044601798057556,
"learning_rate": 0.0002,
"loss": 0.076,
"step": 18570
},
{
"epoch": 0.03379111073727093,
"grad_norm": 0.029841836541891098,
"learning_rate": 0.0002,
"loss": 0.0718,
"step": 18580
},
{
"epoch": 0.03380929755682813,
"grad_norm": 0.21851007640361786,
"learning_rate": 0.0002,
"loss": 0.0656,
"step": 18590
},
{
"epoch": 0.033827484376385324,
"grad_norm": 0.02096417360007763,
"learning_rate": 0.0002,
"loss": 0.0173,
"step": 18600
},
{
"epoch": 0.03384567119594252,
"grad_norm": 0.29625844955444336,
"learning_rate": 0.0002,
"loss": 0.1716,
"step": 18610
},
{
"epoch": 0.033863858015499716,
"grad_norm": 0.1510130614042282,
"learning_rate": 0.0002,
"loss": 0.0792,
"step": 18620
},
{
"epoch": 0.03388204483505691,
"grad_norm": 0.04192917421460152,
"learning_rate": 0.0002,
"loss": 0.0717,
"step": 18630
},
{
"epoch": 0.03390023165461411,
"grad_norm": 0.23139427602291107,
"learning_rate": 0.0002,
"loss": 0.0609,
"step": 18640
},
{
"epoch": 0.033918418474171305,
"grad_norm": 0.03887970373034477,
"learning_rate": 0.0002,
"loss": 0.0127,
"step": 18650
},
{
"epoch": 0.0339366052937285,
"grad_norm": 0.1315147578716278,
"learning_rate": 0.0002,
"loss": 0.1434,
"step": 18660
},
{
"epoch": 0.0339547921132857,
"grad_norm": 0.13328243792057037,
"learning_rate": 0.0002,
"loss": 0.0673,
"step": 18670
},
{
"epoch": 0.033972978932842894,
"grad_norm": 0.07161080092191696,
"learning_rate": 0.0002,
"loss": 0.0692,
"step": 18680
},
{
"epoch": 0.03399116575240009,
"grad_norm": 0.16019296646118164,
"learning_rate": 0.0002,
"loss": 0.0641,
"step": 18690
},
{
"epoch": 0.03400935257195729,
"grad_norm": 0.042882539331912994,
"learning_rate": 0.0002,
"loss": 0.0196,
"step": 18700
},
{
"epoch": 0.03402753939151448,
"grad_norm": 0.15019817650318146,
"learning_rate": 0.0002,
"loss": 0.1239,
"step": 18710
},
{
"epoch": 0.03404572621107168,
"grad_norm": 0.140267476439476,
"learning_rate": 0.0002,
"loss": 0.0715,
"step": 18720
},
{
"epoch": 0.034063913030628876,
"grad_norm": 0.060760073363780975,
"learning_rate": 0.0002,
"loss": 0.079,
"step": 18730
},
{
"epoch": 0.03408209985018607,
"grad_norm": 0.1783122718334198,
"learning_rate": 0.0002,
"loss": 0.0616,
"step": 18740
},
{
"epoch": 0.03410028666974327,
"grad_norm": 0.023139121010899544,
"learning_rate": 0.0002,
"loss": 0.0171,
"step": 18750
},
{
"epoch": 0.034118473489300465,
"grad_norm": 0.2645978331565857,
"learning_rate": 0.0002,
"loss": 0.1355,
"step": 18760
},
{
"epoch": 0.03413666030885766,
"grad_norm": 0.21009914577007294,
"learning_rate": 0.0002,
"loss": 0.0757,
"step": 18770
},
{
"epoch": 0.03415484712841486,
"grad_norm": 0.13494494557380676,
"learning_rate": 0.0002,
"loss": 0.0774,
"step": 18780
},
{
"epoch": 0.034173033947972054,
"grad_norm": 0.19806784391403198,
"learning_rate": 0.0002,
"loss": 0.0636,
"step": 18790
},
{
"epoch": 0.03419122076752925,
"grad_norm": 0.020482519641518593,
"learning_rate": 0.0002,
"loss": 0.0194,
"step": 18800
},
{
"epoch": 0.034209407587086446,
"grad_norm": 0.34826937317848206,
"learning_rate": 0.0002,
"loss": 0.1521,
"step": 18810
},
{
"epoch": 0.03422759440664364,
"grad_norm": 0.1293957680463791,
"learning_rate": 0.0002,
"loss": 0.0742,
"step": 18820
},
{
"epoch": 0.03424578122620084,
"grad_norm": 0.06574539095163345,
"learning_rate": 0.0002,
"loss": 0.0792,
"step": 18830
},
{
"epoch": 0.034263968045758035,
"grad_norm": 0.2005399614572525,
"learning_rate": 0.0002,
"loss": 0.0618,
"step": 18840
},
{
"epoch": 0.03428215486531523,
"grad_norm": 0.04699913039803505,
"learning_rate": 0.0002,
"loss": 0.0176,
"step": 18850
},
{
"epoch": 0.03430034168487243,
"grad_norm": 0.2593109905719757,
"learning_rate": 0.0002,
"loss": 0.1709,
"step": 18860
},
{
"epoch": 0.034318528504429624,
"grad_norm": 0.587365448474884,
"learning_rate": 0.0002,
"loss": 0.0794,
"step": 18870
},
{
"epoch": 0.03433671532398682,
"grad_norm": 0.0371614433825016,
"learning_rate": 0.0002,
"loss": 0.076,
"step": 18880
},
{
"epoch": 0.03435490214354402,
"grad_norm": 0.2164178341627121,
"learning_rate": 0.0002,
"loss": 0.0577,
"step": 18890
},
{
"epoch": 0.03437308896310122,
"grad_norm": 0.028071587905287743,
"learning_rate": 0.0002,
"loss": 0.0184,
"step": 18900
},
{
"epoch": 0.034391275782658416,
"grad_norm": 0.25464126467704773,
"learning_rate": 0.0002,
"loss": 0.1616,
"step": 18910
},
{
"epoch": 0.03440946260221561,
"grad_norm": 0.2830415368080139,
"learning_rate": 0.0002,
"loss": 0.0795,
"step": 18920
},
{
"epoch": 0.03442764942177281,
"grad_norm": 0.07880273461341858,
"learning_rate": 0.0002,
"loss": 0.0717,
"step": 18930
},
{
"epoch": 0.034445836241330005,
"grad_norm": 0.19671671092510223,
"learning_rate": 0.0002,
"loss": 0.0625,
"step": 18940
},
{
"epoch": 0.0344640230608872,
"grad_norm": 0.038350027054548264,
"learning_rate": 0.0002,
"loss": 0.0172,
"step": 18950
},
{
"epoch": 0.0344822098804444,
"grad_norm": 0.196768656373024,
"learning_rate": 0.0002,
"loss": 0.1586,
"step": 18960
},
{
"epoch": 0.034500396700001594,
"grad_norm": 0.1861678808927536,
"learning_rate": 0.0002,
"loss": 0.0871,
"step": 18970
},
{
"epoch": 0.03451858351955879,
"grad_norm": 0.1074979305267334,
"learning_rate": 0.0002,
"loss": 0.0697,
"step": 18980
},
{
"epoch": 0.03453677033911599,
"grad_norm": 0.18214645981788635,
"learning_rate": 0.0002,
"loss": 0.0594,
"step": 18990
},
{
"epoch": 0.03455495715867318,
"grad_norm": 0.035948049277067184,
"learning_rate": 0.0002,
"loss": 0.0177,
"step": 19000
},
{
"epoch": 0.03457314397823038,
"grad_norm": 0.2434094399213791,
"learning_rate": 0.0002,
"loss": 0.1402,
"step": 19010
},
{
"epoch": 0.034591330797787576,
"grad_norm": 0.06897670775651932,
"learning_rate": 0.0002,
"loss": 0.0758,
"step": 19020
},
{
"epoch": 0.03460951761734477,
"grad_norm": 0.13107649981975555,
"learning_rate": 0.0002,
"loss": 0.0826,
"step": 19030
},
{
"epoch": 0.03462770443690197,
"grad_norm": 0.1787865310907364,
"learning_rate": 0.0002,
"loss": 0.0619,
"step": 19040
},
{
"epoch": 0.034645891256459165,
"grad_norm": 0.0460963137447834,
"learning_rate": 0.0002,
"loss": 0.0203,
"step": 19050
},
{
"epoch": 0.03466407807601636,
"grad_norm": 0.20582084357738495,
"learning_rate": 0.0002,
"loss": 0.1325,
"step": 19060
},
{
"epoch": 0.03468226489557356,
"grad_norm": 0.16120313107967377,
"learning_rate": 0.0002,
"loss": 0.08,
"step": 19070
},
{
"epoch": 0.03470045171513075,
"grad_norm": 0.04322347044944763,
"learning_rate": 0.0002,
"loss": 0.0753,
"step": 19080
},
{
"epoch": 0.03471863853468795,
"grad_norm": 0.1764109879732132,
"learning_rate": 0.0002,
"loss": 0.0618,
"step": 19090
},
{
"epoch": 0.034736825354245146,
"grad_norm": 0.04453815147280693,
"learning_rate": 0.0002,
"loss": 0.0172,
"step": 19100
},
{
"epoch": 0.03475501217380234,
"grad_norm": 0.32023972272872925,
"learning_rate": 0.0002,
"loss": 0.1394,
"step": 19110
},
{
"epoch": 0.03477319899335954,
"grad_norm": 0.09920009225606918,
"learning_rate": 0.0002,
"loss": 0.0788,
"step": 19120
},
{
"epoch": 0.034791385812916735,
"grad_norm": 0.047868456691503525,
"learning_rate": 0.0002,
"loss": 0.0745,
"step": 19130
},
{
"epoch": 0.03480957263247393,
"grad_norm": 0.219430074095726,
"learning_rate": 0.0002,
"loss": 0.063,
"step": 19140
},
{
"epoch": 0.03482775945203113,
"grad_norm": 0.04879681020975113,
"learning_rate": 0.0002,
"loss": 0.0161,
"step": 19150
},
{
"epoch": 0.034845946271588324,
"grad_norm": 0.21360138058662415,
"learning_rate": 0.0002,
"loss": 0.1602,
"step": 19160
},
{
"epoch": 0.03486413309114552,
"grad_norm": 0.1391269713640213,
"learning_rate": 0.0002,
"loss": 0.0798,
"step": 19170
},
{
"epoch": 0.03488231991070272,
"grad_norm": 0.06293737888336182,
"learning_rate": 0.0002,
"loss": 0.0717,
"step": 19180
},
{
"epoch": 0.03490050673025991,
"grad_norm": 0.20241963863372803,
"learning_rate": 0.0002,
"loss": 0.0612,
"step": 19190
},
{
"epoch": 0.03491869354981711,
"grad_norm": 0.06246611103415489,
"learning_rate": 0.0002,
"loss": 0.0148,
"step": 19200
},
{
"epoch": 0.034936880369374305,
"grad_norm": 0.16479995846748352,
"learning_rate": 0.0002,
"loss": 0.1611,
"step": 19210
},
{
"epoch": 0.0349550671889315,
"grad_norm": 0.12036983668804169,
"learning_rate": 0.0002,
"loss": 0.0724,
"step": 19220
},
{
"epoch": 0.0349732540084887,
"grad_norm": 0.03939517214894295,
"learning_rate": 0.0002,
"loss": 0.0758,
"step": 19230
},
{
"epoch": 0.034991440828045894,
"grad_norm": 0.17047277092933655,
"learning_rate": 0.0002,
"loss": 0.066,
"step": 19240
},
{
"epoch": 0.03500962764760309,
"grad_norm": 0.031782686710357666,
"learning_rate": 0.0002,
"loss": 0.0203,
"step": 19250
},
{
"epoch": 0.03502781446716029,
"grad_norm": 0.2545730471611023,
"learning_rate": 0.0002,
"loss": 0.1716,
"step": 19260
},
{
"epoch": 0.03504600128671748,
"grad_norm": 0.11225811392068863,
"learning_rate": 0.0002,
"loss": 0.0791,
"step": 19270
},
{
"epoch": 0.03506418810627468,
"grad_norm": 0.049140989780426025,
"learning_rate": 0.0002,
"loss": 0.0784,
"step": 19280
},
{
"epoch": 0.035082374925831876,
"grad_norm": 0.16942913830280304,
"learning_rate": 0.0002,
"loss": 0.0638,
"step": 19290
},
{
"epoch": 0.03510056174538907,
"grad_norm": 0.03836115077137947,
"learning_rate": 0.0002,
"loss": 0.0193,
"step": 19300
},
{
"epoch": 0.03511874856494627,
"grad_norm": 0.13004787266254425,
"learning_rate": 0.0002,
"loss": 0.1477,
"step": 19310
},
{
"epoch": 0.035136935384503465,
"grad_norm": 0.2054329216480255,
"learning_rate": 0.0002,
"loss": 0.0792,
"step": 19320
},
{
"epoch": 0.03515512220406066,
"grad_norm": 0.06592074781656265,
"learning_rate": 0.0002,
"loss": 0.0784,
"step": 19330
},
{
"epoch": 0.03517330902361786,
"grad_norm": 0.19228027760982513,
"learning_rate": 0.0002,
"loss": 0.067,
"step": 19340
},
{
"epoch": 0.035191495843175054,
"grad_norm": 0.04050719738006592,
"learning_rate": 0.0002,
"loss": 0.017,
"step": 19350
},
{
"epoch": 0.03520968266273225,
"grad_norm": 0.28715401887893677,
"learning_rate": 0.0002,
"loss": 0.1499,
"step": 19360
},
{
"epoch": 0.035227869482289446,
"grad_norm": 0.13954712450504303,
"learning_rate": 0.0002,
"loss": 0.0787,
"step": 19370
},
{
"epoch": 0.03524605630184664,
"grad_norm": 0.08851815015077591,
"learning_rate": 0.0002,
"loss": 0.0739,
"step": 19380
},
{
"epoch": 0.03526424312140384,
"grad_norm": 0.1788545697927475,
"learning_rate": 0.0002,
"loss": 0.0576,
"step": 19390
},
{
"epoch": 0.035282429940961035,
"grad_norm": 0.03644658252596855,
"learning_rate": 0.0002,
"loss": 0.0143,
"step": 19400
},
{
"epoch": 0.03530061676051823,
"grad_norm": 0.3140568137168884,
"learning_rate": 0.0002,
"loss": 0.1498,
"step": 19410
},
{
"epoch": 0.03531880358007543,
"grad_norm": 0.14550529420375824,
"learning_rate": 0.0002,
"loss": 0.0748,
"step": 19420
},
{
"epoch": 0.035336990399632624,
"grad_norm": 0.10995481163263321,
"learning_rate": 0.0002,
"loss": 0.0801,
"step": 19430
},
{
"epoch": 0.03535517721918982,
"grad_norm": 0.17238560318946838,
"learning_rate": 0.0002,
"loss": 0.0608,
"step": 19440
},
{
"epoch": 0.03537336403874702,
"grad_norm": 0.031363293528556824,
"learning_rate": 0.0002,
"loss": 0.0154,
"step": 19450
},
{
"epoch": 0.03539155085830421,
"grad_norm": 0.14145390689373016,
"learning_rate": 0.0002,
"loss": 0.1511,
"step": 19460
},
{
"epoch": 0.03540973767786141,
"grad_norm": 0.19073855876922607,
"learning_rate": 0.0002,
"loss": 0.0725,
"step": 19470
},
{
"epoch": 0.035427924497418606,
"grad_norm": 0.15639430284500122,
"learning_rate": 0.0002,
"loss": 0.0836,
"step": 19480
},
{
"epoch": 0.0354461113169758,
"grad_norm": 0.2566238045692444,
"learning_rate": 0.0002,
"loss": 0.0617,
"step": 19490
},
{
"epoch": 0.035464298136533,
"grad_norm": 0.055755820125341415,
"learning_rate": 0.0002,
"loss": 0.0178,
"step": 19500
},
{
"epoch": 0.035482484956090195,
"grad_norm": 0.2835562527179718,
"learning_rate": 0.0002,
"loss": 0.1306,
"step": 19510
},
{
"epoch": 0.03550067177564739,
"grad_norm": 0.2310812920331955,
"learning_rate": 0.0002,
"loss": 0.0766,
"step": 19520
},
{
"epoch": 0.03551885859520459,
"grad_norm": 0.1287071257829666,
"learning_rate": 0.0002,
"loss": 0.0791,
"step": 19530
},
{
"epoch": 0.035537045414761784,
"grad_norm": 0.21308869123458862,
"learning_rate": 0.0002,
"loss": 0.0584,
"step": 19540
},
{
"epoch": 0.03555523223431898,
"grad_norm": 0.0662735179066658,
"learning_rate": 0.0002,
"loss": 0.0207,
"step": 19550
},
{
"epoch": 0.035573419053876176,
"grad_norm": 0.21706523001194,
"learning_rate": 0.0002,
"loss": 0.1308,
"step": 19560
},
{
"epoch": 0.03559160587343337,
"grad_norm": 0.09376335144042969,
"learning_rate": 0.0002,
"loss": 0.0677,
"step": 19570
},
{
"epoch": 0.035609792692990576,
"grad_norm": 0.1093437597155571,
"learning_rate": 0.0002,
"loss": 0.0741,
"step": 19580
},
{
"epoch": 0.03562797951254777,
"grad_norm": 0.21057911217212677,
"learning_rate": 0.0002,
"loss": 0.0637,
"step": 19590
},
{
"epoch": 0.03564616633210497,
"grad_norm": 0.04383830726146698,
"learning_rate": 0.0002,
"loss": 0.019,
"step": 19600
},
{
"epoch": 0.035664353151662165,
"grad_norm": 0.3657427132129669,
"learning_rate": 0.0002,
"loss": 0.1421,
"step": 19610
},
{
"epoch": 0.03568253997121936,
"grad_norm": 0.17154265940189362,
"learning_rate": 0.0002,
"loss": 0.0779,
"step": 19620
},
{
"epoch": 0.03570072679077656,
"grad_norm": 0.041993435472249985,
"learning_rate": 0.0002,
"loss": 0.0768,
"step": 19630
},
{
"epoch": 0.035718913610333754,
"grad_norm": 0.1658252775669098,
"learning_rate": 0.0002,
"loss": 0.0602,
"step": 19640
},
{
"epoch": 0.03573710042989095,
"grad_norm": 0.028523078188300133,
"learning_rate": 0.0002,
"loss": 0.0151,
"step": 19650
},
{
"epoch": 0.035755287249448146,
"grad_norm": 0.2624453902244568,
"learning_rate": 0.0002,
"loss": 0.1355,
"step": 19660
},
{
"epoch": 0.03577347406900534,
"grad_norm": 0.12055794149637222,
"learning_rate": 0.0002,
"loss": 0.079,
"step": 19670
},
{
"epoch": 0.03579166088856254,
"grad_norm": 0.043441224843263626,
"learning_rate": 0.0002,
"loss": 0.0722,
"step": 19680
},
{
"epoch": 0.035809847708119735,
"grad_norm": 0.2464340627193451,
"learning_rate": 0.0002,
"loss": 0.0673,
"step": 19690
},
{
"epoch": 0.03582803452767693,
"grad_norm": 0.04004153981804848,
"learning_rate": 0.0002,
"loss": 0.0212,
"step": 19700
},
{
"epoch": 0.03584622134723413,
"grad_norm": 0.3159453570842743,
"learning_rate": 0.0002,
"loss": 0.1806,
"step": 19710
},
{
"epoch": 0.035864408166791324,
"grad_norm": 0.11327318102121353,
"learning_rate": 0.0002,
"loss": 0.0748,
"step": 19720
},
{
"epoch": 0.03588259498634852,
"grad_norm": 0.0980909988284111,
"learning_rate": 0.0002,
"loss": 0.0807,
"step": 19730
},
{
"epoch": 0.03590078180590572,
"grad_norm": 0.15508098900318146,
"learning_rate": 0.0002,
"loss": 0.0576,
"step": 19740
},
{
"epoch": 0.03591896862546291,
"grad_norm": 0.019624806940555573,
"learning_rate": 0.0002,
"loss": 0.0135,
"step": 19750
},
{
"epoch": 0.03593715544502011,
"grad_norm": 0.20336109399795532,
"learning_rate": 0.0002,
"loss": 0.1702,
"step": 19760
},
{
"epoch": 0.035955342264577306,
"grad_norm": 0.12767620384693146,
"learning_rate": 0.0002,
"loss": 0.0776,
"step": 19770
},
{
"epoch": 0.0359735290841345,
"grad_norm": 0.19050805270671844,
"learning_rate": 0.0002,
"loss": 0.0838,
"step": 19780
},
{
"epoch": 0.0359917159036917,
"grad_norm": 0.17471866309642792,
"learning_rate": 0.0002,
"loss": 0.0561,
"step": 19790
},
{
"epoch": 0.036009902723248895,
"grad_norm": 0.044348277151584625,
"learning_rate": 0.0002,
"loss": 0.0159,
"step": 19800
},
{
"epoch": 0.03602808954280609,
"grad_norm": 0.30847081542015076,
"learning_rate": 0.0002,
"loss": 0.1686,
"step": 19810
},
{
"epoch": 0.03604627636236329,
"grad_norm": 0.08963622897863388,
"learning_rate": 0.0002,
"loss": 0.078,
"step": 19820
},
{
"epoch": 0.036064463181920484,
"grad_norm": 0.0580587275326252,
"learning_rate": 0.0002,
"loss": 0.0741,
"step": 19830
},
{
"epoch": 0.03608265000147768,
"grad_norm": 0.1698184460401535,
"learning_rate": 0.0002,
"loss": 0.0631,
"step": 19840
},
{
"epoch": 0.036100836821034876,
"grad_norm": 0.025531867519021034,
"learning_rate": 0.0002,
"loss": 0.0166,
"step": 19850
},
{
"epoch": 0.03611902364059207,
"grad_norm": 0.3544731140136719,
"learning_rate": 0.0002,
"loss": 0.1886,
"step": 19860
},
{
"epoch": 0.03613721046014927,
"grad_norm": 0.2552841901779175,
"learning_rate": 0.0002,
"loss": 0.0859,
"step": 19870
},
{
"epoch": 0.036155397279706465,
"grad_norm": 0.07771942019462585,
"learning_rate": 0.0002,
"loss": 0.0859,
"step": 19880
},
{
"epoch": 0.03617358409926366,
"grad_norm": 0.15945585072040558,
"learning_rate": 0.0002,
"loss": 0.0609,
"step": 19890
},
{
"epoch": 0.03619177091882086,
"grad_norm": 0.04583865404129028,
"learning_rate": 0.0002,
"loss": 0.0196,
"step": 19900
},
{
"epoch": 0.036209957738378054,
"grad_norm": 0.2110920548439026,
"learning_rate": 0.0002,
"loss": 0.1305,
"step": 19910
},
{
"epoch": 0.03622814455793525,
"grad_norm": 0.22165755927562714,
"learning_rate": 0.0002,
"loss": 0.0767,
"step": 19920
},
{
"epoch": 0.03624633137749245,
"grad_norm": 0.0866742879152298,
"learning_rate": 0.0002,
"loss": 0.0785,
"step": 19930
},
{
"epoch": 0.03626451819704964,
"grad_norm": 0.19838224351406097,
"learning_rate": 0.0002,
"loss": 0.0663,
"step": 19940
},
{
"epoch": 0.03628270501660684,
"grad_norm": 0.05543521046638489,
"learning_rate": 0.0002,
"loss": 0.023,
"step": 19950
},
{
"epoch": 0.036300891836164036,
"grad_norm": 0.20800183713436127,
"learning_rate": 0.0002,
"loss": 0.1468,
"step": 19960
},
{
"epoch": 0.03631907865572123,
"grad_norm": 0.14951092004776,
"learning_rate": 0.0002,
"loss": 0.0698,
"step": 19970
},
{
"epoch": 0.03633726547527843,
"grad_norm": 0.10162603855133057,
"learning_rate": 0.0002,
"loss": 0.0841,
"step": 19980
},
{
"epoch": 0.036355452294835625,
"grad_norm": 0.24774019420146942,
"learning_rate": 0.0002,
"loss": 0.0658,
"step": 19990
},
{
"epoch": 0.03637363911439282,
"grad_norm": 0.02705777995288372,
"learning_rate": 0.0002,
"loss": 0.02,
"step": 20000
},
{
"epoch": 0.03639182593395002,
"grad_norm": 0.2509992718696594,
"learning_rate": 0.0002,
"loss": 0.1529,
"step": 20010
},
{
"epoch": 0.036410012753507214,
"grad_norm": 0.2126697599887848,
"learning_rate": 0.0002,
"loss": 0.0716,
"step": 20020
},
{
"epoch": 0.03642819957306441,
"grad_norm": 0.1463591754436493,
"learning_rate": 0.0002,
"loss": 0.076,
"step": 20030
},
{
"epoch": 0.036446386392621606,
"grad_norm": 0.21879518032073975,
"learning_rate": 0.0002,
"loss": 0.0677,
"step": 20040
},
{
"epoch": 0.0364645732121788,
"grad_norm": 0.028337355703115463,
"learning_rate": 0.0002,
"loss": 0.0131,
"step": 20050
},
{
"epoch": 0.036482760031736,
"grad_norm": 0.335788756608963,
"learning_rate": 0.0002,
"loss": 0.1693,
"step": 20060
},
{
"epoch": 0.036500946851293195,
"grad_norm": 0.17615728080272675,
"learning_rate": 0.0002,
"loss": 0.0791,
"step": 20070
},
{
"epoch": 0.03651913367085039,
"grad_norm": 0.034229181706905365,
"learning_rate": 0.0002,
"loss": 0.0774,
"step": 20080
},
{
"epoch": 0.03653732049040759,
"grad_norm": 0.20637790858745575,
"learning_rate": 0.0002,
"loss": 0.0544,
"step": 20090
},
{
"epoch": 0.036555507309964784,
"grad_norm": 0.033659741282463074,
"learning_rate": 0.0002,
"loss": 0.0128,
"step": 20100
},
{
"epoch": 0.03657369412952198,
"grad_norm": 0.18249601125717163,
"learning_rate": 0.0002,
"loss": 0.1939,
"step": 20110
},
{
"epoch": 0.03659188094907918,
"grad_norm": 0.18065877258777618,
"learning_rate": 0.0002,
"loss": 0.0816,
"step": 20120
},
{
"epoch": 0.03661006776863637,
"grad_norm": 0.4361811876296997,
"learning_rate": 0.0002,
"loss": 0.0978,
"step": 20130
},
{
"epoch": 0.03662825458819357,
"grad_norm": 0.24488002061843872,
"learning_rate": 0.0002,
"loss": 0.0742,
"step": 20140
},
{
"epoch": 0.036646441407750766,
"grad_norm": 0.023062752559781075,
"learning_rate": 0.0002,
"loss": 0.0196,
"step": 20150
},
{
"epoch": 0.03666462822730796,
"grad_norm": 0.22796255350112915,
"learning_rate": 0.0002,
"loss": 0.1457,
"step": 20160
},
{
"epoch": 0.03668281504686516,
"grad_norm": 0.16665758192539215,
"learning_rate": 0.0002,
"loss": 0.138,
"step": 20170
},
{
"epoch": 0.036701001866422354,
"grad_norm": 0.0503946952521801,
"learning_rate": 0.0002,
"loss": 0.079,
"step": 20180
},
{
"epoch": 0.03671918868597955,
"grad_norm": 0.1672963798046112,
"learning_rate": 0.0002,
"loss": 0.0621,
"step": 20190
},
{
"epoch": 0.03673737550553675,
"grad_norm": 0.06765859574079514,
"learning_rate": 0.0002,
"loss": 0.0171,
"step": 20200
},
{
"epoch": 0.03675556232509394,
"grad_norm": 0.6076682806015015,
"learning_rate": 0.0002,
"loss": 0.6804,
"step": 20210
},
{
"epoch": 0.03677374914465114,
"grad_norm": 0.04764563590288162,
"learning_rate": 0.0002,
"loss": 0.0965,
"step": 20220
},
{
"epoch": 0.036791935964208336,
"grad_norm": 0.6847806572914124,
"learning_rate": 0.0002,
"loss": 0.0784,
"step": 20230
},
{
"epoch": 0.03681012278376553,
"grad_norm": 0.2678837478160858,
"learning_rate": 0.0002,
"loss": 0.069,
"step": 20240
},
{
"epoch": 0.03682830960332273,
"grad_norm": 0.039824239909648895,
"learning_rate": 0.0002,
"loss": 0.0206,
"step": 20250
},
{
"epoch": 0.036846496422879925,
"grad_norm": 0.19583609700202942,
"learning_rate": 0.0002,
"loss": 0.1588,
"step": 20260
},
{
"epoch": 0.03686468324243713,
"grad_norm": 0.08613055944442749,
"learning_rate": 0.0002,
"loss": 0.0777,
"step": 20270
},
{
"epoch": 0.036882870061994324,
"grad_norm": 0.028818165883421898,
"learning_rate": 0.0002,
"loss": 0.0704,
"step": 20280
},
{
"epoch": 0.03690105688155152,
"grad_norm": 0.19514115154743195,
"learning_rate": 0.0002,
"loss": 0.0654,
"step": 20290
},
{
"epoch": 0.03691924370110872,
"grad_norm": 0.043222617357969284,
"learning_rate": 0.0002,
"loss": 0.0216,
"step": 20300
},
{
"epoch": 0.03693743052066591,
"grad_norm": 0.2490546703338623,
"learning_rate": 0.0002,
"loss": 0.1472,
"step": 20310
},
{
"epoch": 0.03695561734022311,
"grad_norm": 0.16989269852638245,
"learning_rate": 0.0002,
"loss": 0.081,
"step": 20320
},
{
"epoch": 0.036973804159780306,
"grad_norm": 0.09191739559173584,
"learning_rate": 0.0002,
"loss": 0.0733,
"step": 20330
},
{
"epoch": 0.0369919909793375,
"grad_norm": 0.18435023725032806,
"learning_rate": 0.0002,
"loss": 0.0654,
"step": 20340
},
{
"epoch": 0.0370101777988947,
"grad_norm": 0.031144114211201668,
"learning_rate": 0.0002,
"loss": 0.0226,
"step": 20350
},
{
"epoch": 0.037028364618451895,
"grad_norm": 0.3244694769382477,
"learning_rate": 0.0002,
"loss": 0.1304,
"step": 20360
},
{
"epoch": 0.03704655143800909,
"grad_norm": 0.13787488639354706,
"learning_rate": 0.0002,
"loss": 0.0811,
"step": 20370
},
{
"epoch": 0.03706473825756629,
"grad_norm": 0.058523450046777725,
"learning_rate": 0.0002,
"loss": 0.0806,
"step": 20380
},
{
"epoch": 0.037082925077123484,
"grad_norm": 0.3001325726509094,
"learning_rate": 0.0002,
"loss": 0.0694,
"step": 20390
},
{
"epoch": 0.03710111189668068,
"grad_norm": 0.04447292909026146,
"learning_rate": 0.0002,
"loss": 0.0218,
"step": 20400
},
{
"epoch": 0.037119298716237877,
"grad_norm": 0.25786396861076355,
"learning_rate": 0.0002,
"loss": 0.1499,
"step": 20410
},
{
"epoch": 0.03713748553579507,
"grad_norm": 0.11381134390830994,
"learning_rate": 0.0002,
"loss": 0.0822,
"step": 20420
},
{
"epoch": 0.03715567235535227,
"grad_norm": 0.022713568061590195,
"learning_rate": 0.0002,
"loss": 0.0785,
"step": 20430
},
{
"epoch": 0.037173859174909465,
"grad_norm": 0.15770909190177917,
"learning_rate": 0.0002,
"loss": 0.0625,
"step": 20440
},
{
"epoch": 0.03719204599446666,
"grad_norm": 0.021412041038274765,
"learning_rate": 0.0002,
"loss": 0.0126,
"step": 20450
},
{
"epoch": 0.03721023281402386,
"grad_norm": 0.24260753393173218,
"learning_rate": 0.0002,
"loss": 0.1777,
"step": 20460
},
{
"epoch": 0.037228419633581054,
"grad_norm": 0.10953031480312347,
"learning_rate": 0.0002,
"loss": 0.073,
"step": 20470
},
{
"epoch": 0.03724660645313825,
"grad_norm": 0.03975062072277069,
"learning_rate": 0.0002,
"loss": 0.0907,
"step": 20480
},
{
"epoch": 0.03726479327269545,
"grad_norm": 0.2025018036365509,
"learning_rate": 0.0002,
"loss": 0.0631,
"step": 20490
},
{
"epoch": 0.03728298009225264,
"grad_norm": 0.031849734485149384,
"learning_rate": 0.0002,
"loss": 0.0156,
"step": 20500
},
{
"epoch": 0.03730116691180984,
"grad_norm": 0.2650098502635956,
"learning_rate": 0.0002,
"loss": 0.1569,
"step": 20510
},
{
"epoch": 0.037319353731367036,
"grad_norm": 0.14113937318325043,
"learning_rate": 0.0002,
"loss": 0.0824,
"step": 20520
},
{
"epoch": 0.03733754055092423,
"grad_norm": 0.10276420414447784,
"learning_rate": 0.0002,
"loss": 0.0797,
"step": 20530
},
{
"epoch": 0.03735572737048143,
"grad_norm": 0.2258286476135254,
"learning_rate": 0.0002,
"loss": 0.0671,
"step": 20540
},
{
"epoch": 0.037373914190038625,
"grad_norm": 0.10343242436647415,
"learning_rate": 0.0002,
"loss": 0.0178,
"step": 20550
},
{
"epoch": 0.03739210100959582,
"grad_norm": 0.19423982501029968,
"learning_rate": 0.0002,
"loss": 0.1423,
"step": 20560
},
{
"epoch": 0.03741028782915302,
"grad_norm": 0.12046124786138535,
"learning_rate": 0.0002,
"loss": 0.0827,
"step": 20570
},
{
"epoch": 0.037428474648710214,
"grad_norm": 0.026751041412353516,
"learning_rate": 0.0002,
"loss": 0.0743,
"step": 20580
},
{
"epoch": 0.03744666146826741,
"grad_norm": 0.23576834797859192,
"learning_rate": 0.0002,
"loss": 0.0629,
"step": 20590
},
{
"epoch": 0.037464848287824606,
"grad_norm": 0.05146399885416031,
"learning_rate": 0.0002,
"loss": 0.0205,
"step": 20600
},
{
"epoch": 0.0374830351073818,
"grad_norm": 0.21750135719776154,
"learning_rate": 0.0002,
"loss": 0.1397,
"step": 20610
},
{
"epoch": 0.037501221926939,
"grad_norm": 0.08351115882396698,
"learning_rate": 0.0002,
"loss": 0.0801,
"step": 20620
},
{
"epoch": 0.037519408746496195,
"grad_norm": 0.07272092998027802,
"learning_rate": 0.0002,
"loss": 0.0881,
"step": 20630
},
{
"epoch": 0.03753759556605339,
"grad_norm": 0.23707769811153412,
"learning_rate": 0.0002,
"loss": 0.0706,
"step": 20640
},
{
"epoch": 0.03755578238561059,
"grad_norm": 0.05208323150873184,
"learning_rate": 0.0002,
"loss": 0.024,
"step": 20650
},
{
"epoch": 0.037573969205167784,
"grad_norm": 0.4163022041320801,
"learning_rate": 0.0002,
"loss": 0.159,
"step": 20660
},
{
"epoch": 0.03759215602472498,
"grad_norm": 0.1036575511097908,
"learning_rate": 0.0002,
"loss": 0.0814,
"step": 20670
},
{
"epoch": 0.03761034284428218,
"grad_norm": 0.09861626476049423,
"learning_rate": 0.0002,
"loss": 0.0828,
"step": 20680
},
{
"epoch": 0.03762852966383937,
"grad_norm": 0.1685744971036911,
"learning_rate": 0.0002,
"loss": 0.0597,
"step": 20690
},
{
"epoch": 0.03764671648339657,
"grad_norm": 0.02716050110757351,
"learning_rate": 0.0002,
"loss": 0.0164,
"step": 20700
},
{
"epoch": 0.037664903302953766,
"grad_norm": 0.46858713030815125,
"learning_rate": 0.0002,
"loss": 0.1596,
"step": 20710
},
{
"epoch": 0.03768309012251096,
"grad_norm": 0.15260715782642365,
"learning_rate": 0.0002,
"loss": 0.0835,
"step": 20720
},
{
"epoch": 0.03770127694206816,
"grad_norm": 0.2063397914171219,
"learning_rate": 0.0002,
"loss": 0.0845,
"step": 20730
},
{
"epoch": 0.037719463761625355,
"grad_norm": 0.16447599232196808,
"learning_rate": 0.0002,
"loss": 0.0595,
"step": 20740
},
{
"epoch": 0.03773765058118255,
"grad_norm": 0.020755184814333916,
"learning_rate": 0.0002,
"loss": 0.0164,
"step": 20750
},
{
"epoch": 0.03775583740073975,
"grad_norm": 0.23675021529197693,
"learning_rate": 0.0002,
"loss": 0.1634,
"step": 20760
},
{
"epoch": 0.037774024220296944,
"grad_norm": 0.08625516295433044,
"learning_rate": 0.0002,
"loss": 0.0685,
"step": 20770
},
{
"epoch": 0.03779221103985414,
"grad_norm": 0.043796882033348083,
"learning_rate": 0.0002,
"loss": 0.0812,
"step": 20780
},
{
"epoch": 0.037810397859411336,
"grad_norm": 0.20600435137748718,
"learning_rate": 0.0002,
"loss": 0.0651,
"step": 20790
},
{
"epoch": 0.03782858467896853,
"grad_norm": 0.04963940382003784,
"learning_rate": 0.0002,
"loss": 0.0202,
"step": 20800
},
{
"epoch": 0.03784677149852573,
"grad_norm": 0.34920167922973633,
"learning_rate": 0.0002,
"loss": 0.1494,
"step": 20810
},
{
"epoch": 0.037864958318082925,
"grad_norm": 0.18662041425704956,
"learning_rate": 0.0002,
"loss": 0.0823,
"step": 20820
},
{
"epoch": 0.03788314513764012,
"grad_norm": 0.12615887820720673,
"learning_rate": 0.0002,
"loss": 0.0856,
"step": 20830
},
{
"epoch": 0.03790133195719732,
"grad_norm": 0.1857282668352127,
"learning_rate": 0.0002,
"loss": 0.0676,
"step": 20840
},
{
"epoch": 0.037919518776754514,
"grad_norm": 0.05569197237491608,
"learning_rate": 0.0002,
"loss": 0.0181,
"step": 20850
},
{
"epoch": 0.03793770559631171,
"grad_norm": 0.29011765122413635,
"learning_rate": 0.0002,
"loss": 0.1418,
"step": 20860
},
{
"epoch": 0.03795589241586891,
"grad_norm": 0.14119744300842285,
"learning_rate": 0.0002,
"loss": 0.0812,
"step": 20870
},
{
"epoch": 0.0379740792354261,
"grad_norm": 0.039884984493255615,
"learning_rate": 0.0002,
"loss": 0.0781,
"step": 20880
},
{
"epoch": 0.0379922660549833,
"grad_norm": 0.23705685138702393,
"learning_rate": 0.0002,
"loss": 0.0621,
"step": 20890
},
{
"epoch": 0.038010452874540496,
"grad_norm": 0.07462739199399948,
"learning_rate": 0.0002,
"loss": 0.022,
"step": 20900
},
{
"epoch": 0.03802863969409769,
"grad_norm": 0.2610052824020386,
"learning_rate": 0.0002,
"loss": 0.1517,
"step": 20910
},
{
"epoch": 0.03804682651365489,
"grad_norm": 0.12775090336799622,
"learning_rate": 0.0002,
"loss": 0.0758,
"step": 20920
},
{
"epoch": 0.038065013333212085,
"grad_norm": 0.03661905974149704,
"learning_rate": 0.0002,
"loss": 0.0738,
"step": 20930
},
{
"epoch": 0.03808320015276928,
"grad_norm": 0.20907218754291534,
"learning_rate": 0.0002,
"loss": 0.0627,
"step": 20940
},
{
"epoch": 0.03810138697232648,
"grad_norm": 0.022804679349064827,
"learning_rate": 0.0002,
"loss": 0.0205,
"step": 20950
},
{
"epoch": 0.03811957379188368,
"grad_norm": 0.258284330368042,
"learning_rate": 0.0002,
"loss": 0.1428,
"step": 20960
},
{
"epoch": 0.03813776061144088,
"grad_norm": 0.1477317065000534,
"learning_rate": 0.0002,
"loss": 0.0789,
"step": 20970
},
{
"epoch": 0.03815594743099807,
"grad_norm": 0.0610325001180172,
"learning_rate": 0.0002,
"loss": 0.0836,
"step": 20980
},
{
"epoch": 0.03817413425055527,
"grad_norm": 0.18825507164001465,
"learning_rate": 0.0002,
"loss": 0.0621,
"step": 20990
},
{
"epoch": 0.038192321070112466,
"grad_norm": 0.03943372145295143,
"learning_rate": 0.0002,
"loss": 0.0185,
"step": 21000
},
{
"epoch": 0.03821050788966966,
"grad_norm": 0.34519344568252563,
"learning_rate": 0.0002,
"loss": 0.1345,
"step": 21010
},
{
"epoch": 0.03822869470922686,
"grad_norm": 0.09635084867477417,
"learning_rate": 0.0002,
"loss": 0.0753,
"step": 21020
},
{
"epoch": 0.038246881528784055,
"grad_norm": 0.032520972192287445,
"learning_rate": 0.0002,
"loss": 0.082,
"step": 21030
},
{
"epoch": 0.03826506834834125,
"grad_norm": 0.18068930506706238,
"learning_rate": 0.0002,
"loss": 0.0609,
"step": 21040
},
{
"epoch": 0.03828325516789845,
"grad_norm": 0.05550973862409592,
"learning_rate": 0.0002,
"loss": 0.0241,
"step": 21050
},
{
"epoch": 0.038301441987455644,
"grad_norm": 0.19561107456684113,
"learning_rate": 0.0002,
"loss": 0.1337,
"step": 21060
},
{
"epoch": 0.03831962880701284,
"grad_norm": 0.1852179914712906,
"learning_rate": 0.0002,
"loss": 0.0724,
"step": 21070
},
{
"epoch": 0.038337815626570036,
"grad_norm": 0.11915116757154465,
"learning_rate": 0.0002,
"loss": 0.0836,
"step": 21080
},
{
"epoch": 0.03835600244612723,
"grad_norm": 0.21116836369037628,
"learning_rate": 0.0002,
"loss": 0.0628,
"step": 21090
},
{
"epoch": 0.03837418926568443,
"grad_norm": 0.042745884507894516,
"learning_rate": 0.0002,
"loss": 0.0214,
"step": 21100
},
{
"epoch": 0.038392376085241625,
"grad_norm": 0.43089792132377625,
"learning_rate": 0.0002,
"loss": 0.1351,
"step": 21110
},
{
"epoch": 0.03841056290479882,
"grad_norm": 0.09607810527086258,
"learning_rate": 0.0002,
"loss": 0.0778,
"step": 21120
},
{
"epoch": 0.03842874972435602,
"grad_norm": 0.13603460788726807,
"learning_rate": 0.0002,
"loss": 0.0787,
"step": 21130
},
{
"epoch": 0.038446936543913214,
"grad_norm": 0.20110103487968445,
"learning_rate": 0.0002,
"loss": 0.067,
"step": 21140
},
{
"epoch": 0.03846512336347041,
"grad_norm": 0.042503997683525085,
"learning_rate": 0.0002,
"loss": 0.0194,
"step": 21150
},
{
"epoch": 0.03848331018302761,
"grad_norm": 0.2605084478855133,
"learning_rate": 0.0002,
"loss": 0.1374,
"step": 21160
},
{
"epoch": 0.0385014970025848,
"grad_norm": 0.09476794302463531,
"learning_rate": 0.0002,
"loss": 0.078,
"step": 21170
},
{
"epoch": 0.038519683822142,
"grad_norm": 0.03458428382873535,
"learning_rate": 0.0002,
"loss": 0.08,
"step": 21180
},
{
"epoch": 0.038537870641699196,
"grad_norm": 0.31196194887161255,
"learning_rate": 0.0002,
"loss": 0.0664,
"step": 21190
},
{
"epoch": 0.03855605746125639,
"grad_norm": 0.037113118916749954,
"learning_rate": 0.0002,
"loss": 0.0221,
"step": 21200
},
{
"epoch": 0.03857424428081359,
"grad_norm": 0.3699415922164917,
"learning_rate": 0.0002,
"loss": 0.1534,
"step": 21210
},
{
"epoch": 0.038592431100370785,
"grad_norm": 0.06454256922006607,
"learning_rate": 0.0002,
"loss": 0.0762,
"step": 21220
},
{
"epoch": 0.03861061791992798,
"grad_norm": 0.09858033806085587,
"learning_rate": 0.0002,
"loss": 0.0785,
"step": 21230
},
{
"epoch": 0.03862880473948518,
"grad_norm": 0.1482791304588318,
"learning_rate": 0.0002,
"loss": 0.062,
"step": 21240
},
{
"epoch": 0.038646991559042373,
"grad_norm": 0.031473588198423386,
"learning_rate": 0.0002,
"loss": 0.0163,
"step": 21250
},
{
"epoch": 0.03866517837859957,
"grad_norm": 0.09360513091087341,
"learning_rate": 0.0002,
"loss": 0.1397,
"step": 21260
},
{
"epoch": 0.038683365198156766,
"grad_norm": 0.10830901563167572,
"learning_rate": 0.0002,
"loss": 0.0789,
"step": 21270
},
{
"epoch": 0.03870155201771396,
"grad_norm": 0.08910014480352402,
"learning_rate": 0.0002,
"loss": 0.0758,
"step": 21280
},
{
"epoch": 0.03871973883727116,
"grad_norm": 0.21524523198604584,
"learning_rate": 0.0002,
"loss": 0.0628,
"step": 21290
},
{
"epoch": 0.038737925656828355,
"grad_norm": 0.03794678673148155,
"learning_rate": 0.0002,
"loss": 0.0229,
"step": 21300
},
{
"epoch": 0.03875611247638555,
"grad_norm": 0.46754345297813416,
"learning_rate": 0.0002,
"loss": 0.1291,
"step": 21310
},
{
"epoch": 0.03877429929594275,
"grad_norm": 0.07472983002662659,
"learning_rate": 0.0002,
"loss": 0.076,
"step": 21320
},
{
"epoch": 0.038792486115499944,
"grad_norm": 0.11820811778306961,
"learning_rate": 0.0002,
"loss": 0.0772,
"step": 21330
},
{
"epoch": 0.03881067293505714,
"grad_norm": 0.21140390634536743,
"learning_rate": 0.0002,
"loss": 0.0539,
"step": 21340
},
{
"epoch": 0.03882885975461434,
"grad_norm": 0.044819217175245285,
"learning_rate": 0.0002,
"loss": 0.0228,
"step": 21350
},
{
"epoch": 0.03884704657417153,
"grad_norm": 0.2267816811800003,
"learning_rate": 0.0002,
"loss": 0.1462,
"step": 21360
},
{
"epoch": 0.03886523339372873,
"grad_norm": 0.10087496787309647,
"learning_rate": 0.0002,
"loss": 0.0766,
"step": 21370
},
{
"epoch": 0.038883420213285926,
"grad_norm": 0.09982341527938843,
"learning_rate": 0.0002,
"loss": 0.0798,
"step": 21380
},
{
"epoch": 0.03890160703284312,
"grad_norm": 0.21729151904582977,
"learning_rate": 0.0002,
"loss": 0.0586,
"step": 21390
},
{
"epoch": 0.03891979385240032,
"grad_norm": 0.020691821351647377,
"learning_rate": 0.0002,
"loss": 0.0175,
"step": 21400
},
{
"epoch": 0.038937980671957514,
"grad_norm": 0.33531665802001953,
"learning_rate": 0.0002,
"loss": 0.149,
"step": 21410
},
{
"epoch": 0.03895616749151471,
"grad_norm": 0.11777795851230621,
"learning_rate": 0.0002,
"loss": 0.0736,
"step": 21420
},
{
"epoch": 0.03897435431107191,
"grad_norm": 0.07860718667507172,
"learning_rate": 0.0002,
"loss": 0.0788,
"step": 21430
},
{
"epoch": 0.0389925411306291,
"grad_norm": 0.16030597686767578,
"learning_rate": 0.0002,
"loss": 0.0581,
"step": 21440
},
{
"epoch": 0.0390107279501863,
"grad_norm": 0.01747356541454792,
"learning_rate": 0.0002,
"loss": 0.0185,
"step": 21450
},
{
"epoch": 0.039028914769743496,
"grad_norm": 0.2313859909772873,
"learning_rate": 0.0002,
"loss": 0.1383,
"step": 21460
},
{
"epoch": 0.03904710158930069,
"grad_norm": 0.14510080218315125,
"learning_rate": 0.0002,
"loss": 0.0805,
"step": 21470
},
{
"epoch": 0.03906528840885789,
"grad_norm": 0.04511871561408043,
"learning_rate": 0.0002,
"loss": 0.0793,
"step": 21480
},
{
"epoch": 0.039083475228415085,
"grad_norm": 0.24205265939235687,
"learning_rate": 0.0002,
"loss": 0.0624,
"step": 21490
},
{
"epoch": 0.03910166204797228,
"grad_norm": 0.08096791058778763,
"learning_rate": 0.0002,
"loss": 0.0208,
"step": 21500
},
{
"epoch": 0.03911984886752948,
"grad_norm": 0.14405490458011627,
"learning_rate": 0.0002,
"loss": 0.1189,
"step": 21510
},
{
"epoch": 0.039138035687086674,
"grad_norm": 0.06753374636173248,
"learning_rate": 0.0002,
"loss": 0.0772,
"step": 21520
},
{
"epoch": 0.03915622250664387,
"grad_norm": 0.029025042429566383,
"learning_rate": 0.0002,
"loss": 0.0761,
"step": 21530
},
{
"epoch": 0.039174409326201066,
"grad_norm": 0.2987070381641388,
"learning_rate": 0.0002,
"loss": 0.0656,
"step": 21540
},
{
"epoch": 0.03919259614575826,
"grad_norm": 0.04445091262459755,
"learning_rate": 0.0002,
"loss": 0.0241,
"step": 21550
},
{
"epoch": 0.03921078296531546,
"grad_norm": 0.34976306557655334,
"learning_rate": 0.0002,
"loss": 0.138,
"step": 21560
},
{
"epoch": 0.039228969784872655,
"grad_norm": 0.07521916925907135,
"learning_rate": 0.0002,
"loss": 0.0774,
"step": 21570
},
{
"epoch": 0.03924715660442985,
"grad_norm": 0.1445412039756775,
"learning_rate": 0.0002,
"loss": 0.087,
"step": 21580
},
{
"epoch": 0.03926534342398705,
"grad_norm": 0.2688128352165222,
"learning_rate": 0.0002,
"loss": 0.0712,
"step": 21590
},
{
"epoch": 0.039283530243544244,
"grad_norm": 0.05321233719587326,
"learning_rate": 0.0002,
"loss": 0.0245,
"step": 21600
},
{
"epoch": 0.03930171706310144,
"grad_norm": 0.44459134340286255,
"learning_rate": 0.0002,
"loss": 0.1524,
"step": 21610
},
{
"epoch": 0.03931990388265864,
"grad_norm": 0.13169553875923157,
"learning_rate": 0.0002,
"loss": 0.0726,
"step": 21620
},
{
"epoch": 0.03933809070221583,
"grad_norm": 0.0908237174153328,
"learning_rate": 0.0002,
"loss": 0.0785,
"step": 21630
},
{
"epoch": 0.03935627752177303,
"grad_norm": 0.18110623955726624,
"learning_rate": 0.0002,
"loss": 0.0606,
"step": 21640
},
{
"epoch": 0.03937446434133023,
"grad_norm": 0.021362677216529846,
"learning_rate": 0.0002,
"loss": 0.0175,
"step": 21650
},
{
"epoch": 0.03939265116088743,
"grad_norm": 0.27973899245262146,
"learning_rate": 0.0002,
"loss": 0.1641,
"step": 21660
},
{
"epoch": 0.039410837980444625,
"grad_norm": 0.09090718626976013,
"learning_rate": 0.0002,
"loss": 0.073,
"step": 21670
},
{
"epoch": 0.03942902480000182,
"grad_norm": 0.13408254086971283,
"learning_rate": 0.0002,
"loss": 0.0769,
"step": 21680
},
{
"epoch": 0.03944721161955902,
"grad_norm": 0.2530055046081543,
"learning_rate": 0.0002,
"loss": 0.0729,
"step": 21690
},
{
"epoch": 0.039465398439116214,
"grad_norm": 0.027523871511220932,
"learning_rate": 0.0002,
"loss": 0.017,
"step": 21700
},
{
"epoch": 0.03948358525867341,
"grad_norm": 0.2520642578601837,
"learning_rate": 0.0002,
"loss": 0.1804,
"step": 21710
},
{
"epoch": 0.03950177207823061,
"grad_norm": 0.11017465591430664,
"learning_rate": 0.0002,
"loss": 0.0767,
"step": 21720
},
{
"epoch": 0.0395199588977878,
"grad_norm": 0.05129052326083183,
"learning_rate": 0.0002,
"loss": 0.0723,
"step": 21730
},
{
"epoch": 0.039538145717345,
"grad_norm": 0.1846659779548645,
"learning_rate": 0.0002,
"loss": 0.0619,
"step": 21740
},
{
"epoch": 0.039556332536902196,
"grad_norm": 0.014305013231933117,
"learning_rate": 0.0002,
"loss": 0.0171,
"step": 21750
},
{
"epoch": 0.03957451935645939,
"grad_norm": 0.21667814254760742,
"learning_rate": 0.0002,
"loss": 0.157,
"step": 21760
},
{
"epoch": 0.03959270617601659,
"grad_norm": 0.21456903219223022,
"learning_rate": 0.0002,
"loss": 0.0803,
"step": 21770
},
{
"epoch": 0.039610892995573785,
"grad_norm": 0.03621416166424751,
"learning_rate": 0.0002,
"loss": 0.0796,
"step": 21780
},
{
"epoch": 0.03962907981513098,
"grad_norm": 0.20819205045700073,
"learning_rate": 0.0002,
"loss": 0.0633,
"step": 21790
},
{
"epoch": 0.03964726663468818,
"grad_norm": 0.06860963255167007,
"learning_rate": 0.0002,
"loss": 0.0172,
"step": 21800
},
{
"epoch": 0.039665453454245374,
"grad_norm": 0.2568039894104004,
"learning_rate": 0.0002,
"loss": 0.134,
"step": 21810
},
{
"epoch": 0.03968364027380257,
"grad_norm": 0.08747372031211853,
"learning_rate": 0.0002,
"loss": 0.0753,
"step": 21820
},
{
"epoch": 0.039701827093359766,
"grad_norm": 0.13403570652008057,
"learning_rate": 0.0002,
"loss": 0.0807,
"step": 21830
},
{
"epoch": 0.03972001391291696,
"grad_norm": 0.20756667852401733,
"learning_rate": 0.0002,
"loss": 0.0625,
"step": 21840
},
{
"epoch": 0.03973820073247416,
"grad_norm": 0.03678170591592789,
"learning_rate": 0.0002,
"loss": 0.019,
"step": 21850
},
{
"epoch": 0.039756387552031355,
"grad_norm": 0.1847693920135498,
"learning_rate": 0.0002,
"loss": 0.1385,
"step": 21860
},
{
"epoch": 0.03977457437158855,
"grad_norm": 0.1627635508775711,
"learning_rate": 0.0002,
"loss": 0.0765,
"step": 21870
},
{
"epoch": 0.03979276119114575,
"grad_norm": 0.0535571426153183,
"learning_rate": 0.0002,
"loss": 0.0741,
"step": 21880
},
{
"epoch": 0.039810948010702944,
"grad_norm": 0.3128276765346527,
"learning_rate": 0.0002,
"loss": 0.0598,
"step": 21890
},
{
"epoch": 0.03982913483026014,
"grad_norm": 0.03369860351085663,
"learning_rate": 0.0002,
"loss": 0.0217,
"step": 21900
},
{
"epoch": 0.03984732164981734,
"grad_norm": 0.1962599903345108,
"learning_rate": 0.0002,
"loss": 0.1319,
"step": 21910
},
{
"epoch": 0.03986550846937453,
"grad_norm": 0.1397421509027481,
"learning_rate": 0.0002,
"loss": 0.068,
"step": 21920
},
{
"epoch": 0.03988369528893173,
"grad_norm": 0.10252605378627777,
"learning_rate": 0.0002,
"loss": 0.0736,
"step": 21930
},
{
"epoch": 0.039901882108488926,
"grad_norm": 0.22179432213306427,
"learning_rate": 0.0002,
"loss": 0.0625,
"step": 21940
},
{
"epoch": 0.03992006892804612,
"grad_norm": 0.06068069487810135,
"learning_rate": 0.0002,
"loss": 0.0242,
"step": 21950
},
{
"epoch": 0.03993825574760332,
"grad_norm": 0.20243950188159943,
"learning_rate": 0.0002,
"loss": 0.143,
"step": 21960
},
{
"epoch": 0.039956442567160515,
"grad_norm": 0.11786511540412903,
"learning_rate": 0.0002,
"loss": 0.0779,
"step": 21970
},
{
"epoch": 0.03997462938671771,
"grad_norm": 0.08299421519041061,
"learning_rate": 0.0002,
"loss": 0.0774,
"step": 21980
},
{
"epoch": 0.03999281620627491,
"grad_norm": 0.2844075858592987,
"learning_rate": 0.0002,
"loss": 0.0711,
"step": 21990
},
{
"epoch": 0.040011003025832104,
"grad_norm": 0.034433312714099884,
"learning_rate": 0.0002,
"loss": 0.0217,
"step": 22000
},
{
"epoch": 0.0400291898453893,
"grad_norm": 0.3878481388092041,
"learning_rate": 0.0002,
"loss": 0.1525,
"step": 22010
},
{
"epoch": 0.040047376664946496,
"grad_norm": 0.16157971322536469,
"learning_rate": 0.0002,
"loss": 0.0788,
"step": 22020
},
{
"epoch": 0.04006556348450369,
"grad_norm": 0.10347063094377518,
"learning_rate": 0.0002,
"loss": 0.0809,
"step": 22030
},
{
"epoch": 0.04008375030406089,
"grad_norm": 0.20982638001441956,
"learning_rate": 0.0002,
"loss": 0.0662,
"step": 22040
},
{
"epoch": 0.040101937123618085,
"grad_norm": 5.856126308441162,
"learning_rate": 0.0002,
"loss": 0.0578,
"step": 22050
},
{
"epoch": 0.04012012394317528,
"grad_norm": 0.21289357542991638,
"learning_rate": 0.0002,
"loss": 0.1257,
"step": 22060
},
{
"epoch": 0.04013831076273248,
"grad_norm": 0.040848907083272934,
"learning_rate": 0.0002,
"loss": 0.0783,
"step": 22070
},
{
"epoch": 0.040156497582289674,
"grad_norm": 0.056517478078603745,
"learning_rate": 0.0002,
"loss": 0.0693,
"step": 22080
},
{
"epoch": 0.04017468440184687,
"grad_norm": 0.274312287569046,
"learning_rate": 0.0002,
"loss": 0.0685,
"step": 22090
},
{
"epoch": 0.04019287122140407,
"grad_norm": 0.06353340297937393,
"learning_rate": 0.0002,
"loss": 0.0263,
"step": 22100
},
{
"epoch": 0.04021105804096126,
"grad_norm": 0.287201464176178,
"learning_rate": 0.0002,
"loss": 0.1425,
"step": 22110
},
{
"epoch": 0.04022924486051846,
"grad_norm": 0.0990116223692894,
"learning_rate": 0.0002,
"loss": 0.0732,
"step": 22120
},
{
"epoch": 0.040247431680075656,
"grad_norm": 0.03471527248620987,
"learning_rate": 0.0002,
"loss": 0.0806,
"step": 22130
},
{
"epoch": 0.04026561849963285,
"grad_norm": 0.16411902010440826,
"learning_rate": 0.0002,
"loss": 0.0646,
"step": 22140
},
{
"epoch": 0.04028380531919005,
"grad_norm": 0.032927367836236954,
"learning_rate": 0.0002,
"loss": 0.0225,
"step": 22150
},
{
"epoch": 0.040301992138747245,
"grad_norm": 0.31128716468811035,
"learning_rate": 0.0002,
"loss": 0.1227,
"step": 22160
},
{
"epoch": 0.04032017895830444,
"grad_norm": 0.14056596159934998,
"learning_rate": 0.0002,
"loss": 0.0866,
"step": 22170
},
{
"epoch": 0.04033836577786164,
"grad_norm": 0.10555677115917206,
"learning_rate": 0.0002,
"loss": 0.0785,
"step": 22180
},
{
"epoch": 0.040356552597418834,
"grad_norm": 0.25597816705703735,
"learning_rate": 0.0002,
"loss": 0.0667,
"step": 22190
},
{
"epoch": 0.04037473941697603,
"grad_norm": 0.04694845899939537,
"learning_rate": 0.0002,
"loss": 0.021,
"step": 22200
},
{
"epoch": 0.040392926236533226,
"grad_norm": 0.2536766529083252,
"learning_rate": 0.0002,
"loss": 0.1485,
"step": 22210
},
{
"epoch": 0.04041111305609042,
"grad_norm": 0.0536673367023468,
"learning_rate": 0.0002,
"loss": 0.0737,
"step": 22220
},
{
"epoch": 0.04042929987564762,
"grad_norm": 0.13121111690998077,
"learning_rate": 0.0002,
"loss": 0.0793,
"step": 22230
},
{
"epoch": 0.040447486695204815,
"grad_norm": 0.23850645124912262,
"learning_rate": 0.0002,
"loss": 0.0698,
"step": 22240
},
{
"epoch": 0.04046567351476201,
"grad_norm": 0.04178560525178909,
"learning_rate": 0.0002,
"loss": 0.0212,
"step": 22250
},
{
"epoch": 0.04048386033431921,
"grad_norm": 0.42834579944610596,
"learning_rate": 0.0002,
"loss": 0.1352,
"step": 22260
},
{
"epoch": 0.040502047153876404,
"grad_norm": 0.050178542733192444,
"learning_rate": 0.0002,
"loss": 0.0853,
"step": 22270
},
{
"epoch": 0.0405202339734336,
"grad_norm": 0.042758237570524216,
"learning_rate": 0.0002,
"loss": 0.0709,
"step": 22280
},
{
"epoch": 0.0405384207929908,
"grad_norm": 0.2604416012763977,
"learning_rate": 0.0002,
"loss": 0.0643,
"step": 22290
},
{
"epoch": 0.04055660761254799,
"grad_norm": 0.06166388466954231,
"learning_rate": 0.0002,
"loss": 0.0236,
"step": 22300
},
{
"epoch": 0.04057479443210519,
"grad_norm": 0.2337518334388733,
"learning_rate": 0.0002,
"loss": 0.132,
"step": 22310
},
{
"epoch": 0.040592981251662386,
"grad_norm": 0.15794694423675537,
"learning_rate": 0.0002,
"loss": 0.0739,
"step": 22320
},
{
"epoch": 0.04061116807121959,
"grad_norm": 0.12059915065765381,
"learning_rate": 0.0002,
"loss": 0.0743,
"step": 22330
},
{
"epoch": 0.040629354890776785,
"grad_norm": 0.25351977348327637,
"learning_rate": 0.0002,
"loss": 0.065,
"step": 22340
},
{
"epoch": 0.04064754171033398,
"grad_norm": 0.03265364468097687,
"learning_rate": 0.0002,
"loss": 0.02,
"step": 22350
},
{
"epoch": 0.04066572852989118,
"grad_norm": 0.22959749400615692,
"learning_rate": 0.0002,
"loss": 0.1278,
"step": 22360
},
{
"epoch": 0.040683915349448374,
"grad_norm": 0.11381889134645462,
"learning_rate": 0.0002,
"loss": 0.0823,
"step": 22370
},
{
"epoch": 0.04070210216900557,
"grad_norm": 0.03541165217757225,
"learning_rate": 0.0002,
"loss": 0.0809,
"step": 22380
},
{
"epoch": 0.04072028898856277,
"grad_norm": 0.20604047179222107,
"learning_rate": 0.0002,
"loss": 0.0693,
"step": 22390
},
{
"epoch": 0.04073847580811996,
"grad_norm": 0.051576532423496246,
"learning_rate": 0.0002,
"loss": 0.0213,
"step": 22400
},
{
"epoch": 0.04075666262767716,
"grad_norm": 0.208265483379364,
"learning_rate": 0.0002,
"loss": 0.1203,
"step": 22410
},
{
"epoch": 0.040774849447234356,
"grad_norm": 0.14376410841941833,
"learning_rate": 0.0002,
"loss": 0.0832,
"step": 22420
},
{
"epoch": 0.04079303626679155,
"grad_norm": 0.0634629875421524,
"learning_rate": 0.0002,
"loss": 0.0797,
"step": 22430
},
{
"epoch": 0.04081122308634875,
"grad_norm": 0.22782418131828308,
"learning_rate": 0.0002,
"loss": 0.0594,
"step": 22440
},
{
"epoch": 0.040829409905905945,
"grad_norm": 0.034153662621974945,
"learning_rate": 0.0002,
"loss": 0.0197,
"step": 22450
},
{
"epoch": 0.04084759672546314,
"grad_norm": 0.22994177043437958,
"learning_rate": 0.0002,
"loss": 0.1276,
"step": 22460
},
{
"epoch": 0.04086578354502034,
"grad_norm": 0.37397289276123047,
"learning_rate": 0.0002,
"loss": 0.0794,
"step": 22470
},
{
"epoch": 0.040883970364577533,
"grad_norm": 0.03585643321275711,
"learning_rate": 0.0002,
"loss": 0.0765,
"step": 22480
},
{
"epoch": 0.04090215718413473,
"grad_norm": 0.2266087681055069,
"learning_rate": 0.0002,
"loss": 0.0661,
"step": 22490
},
{
"epoch": 0.040920344003691926,
"grad_norm": 0.03867397829890251,
"learning_rate": 0.0002,
"loss": 0.0241,
"step": 22500
},
{
"epoch": 0.04093853082324912,
"grad_norm": 0.23483702540397644,
"learning_rate": 0.0002,
"loss": 0.1442,
"step": 22510
},
{
"epoch": 0.04095671764280632,
"grad_norm": 0.11447428911924362,
"learning_rate": 0.0002,
"loss": 0.0759,
"step": 22520
},
{
"epoch": 0.040974904462363515,
"grad_norm": 0.1060417965054512,
"learning_rate": 0.0002,
"loss": 0.0792,
"step": 22530
},
{
"epoch": 0.04099309128192071,
"grad_norm": 0.1915966123342514,
"learning_rate": 0.0002,
"loss": 0.0679,
"step": 22540
},
{
"epoch": 0.04101127810147791,
"grad_norm": 0.05328527092933655,
"learning_rate": 0.0002,
"loss": 0.0211,
"step": 22550
},
{
"epoch": 0.041029464921035104,
"grad_norm": 0.31612515449523926,
"learning_rate": 0.0002,
"loss": 0.1395,
"step": 22560
},
{
"epoch": 0.0410476517405923,
"grad_norm": 0.1860841065645218,
"learning_rate": 0.0002,
"loss": 0.0704,
"step": 22570
},
{
"epoch": 0.0410658385601495,
"grad_norm": 0.11183702945709229,
"learning_rate": 0.0002,
"loss": 0.073,
"step": 22580
},
{
"epoch": 0.04108402537970669,
"grad_norm": 0.2028307020664215,
"learning_rate": 0.0002,
"loss": 0.0592,
"step": 22590
},
{
"epoch": 0.04110221219926389,
"grad_norm": 0.032915905117988586,
"learning_rate": 0.0002,
"loss": 0.0211,
"step": 22600
},
{
"epoch": 0.041120399018821086,
"grad_norm": 0.2932131588459015,
"learning_rate": 0.0002,
"loss": 0.1542,
"step": 22610
},
{
"epoch": 0.04113858583837828,
"grad_norm": 0.08883325010538101,
"learning_rate": 0.0002,
"loss": 0.079,
"step": 22620
},
{
"epoch": 0.04115677265793548,
"grad_norm": 0.07874555885791779,
"learning_rate": 0.0002,
"loss": 0.0801,
"step": 22630
},
{
"epoch": 0.041174959477492674,
"grad_norm": 0.13785040378570557,
"learning_rate": 0.0002,
"loss": 0.0636,
"step": 22640
},
{
"epoch": 0.04119314629704987,
"grad_norm": 0.0321812778711319,
"learning_rate": 0.0002,
"loss": 0.0208,
"step": 22650
},
{
"epoch": 0.04121133311660707,
"grad_norm": 0.142785906791687,
"learning_rate": 0.0002,
"loss": 0.1292,
"step": 22660
},
{
"epoch": 0.04122951993616426,
"grad_norm": 0.15572668612003326,
"learning_rate": 0.0002,
"loss": 0.0774,
"step": 22670
},
{
"epoch": 0.04124770675572146,
"grad_norm": 0.033191781491041183,
"learning_rate": 0.0002,
"loss": 0.0805,
"step": 22680
},
{
"epoch": 0.041265893575278656,
"grad_norm": 0.23840776085853577,
"learning_rate": 0.0002,
"loss": 0.06,
"step": 22690
},
{
"epoch": 0.04128408039483585,
"grad_norm": 0.05943412706255913,
"learning_rate": 0.0002,
"loss": 0.0215,
"step": 22700
},
{
"epoch": 0.04130226721439305,
"grad_norm": 0.05142183229327202,
"learning_rate": 0.0002,
"loss": 0.1181,
"step": 22710
},
{
"epoch": 0.041320454033950245,
"grad_norm": 0.1583058387041092,
"learning_rate": 0.0002,
"loss": 0.0822,
"step": 22720
},
{
"epoch": 0.04133864085350744,
"grad_norm": 0.035809941589832306,
"learning_rate": 0.0002,
"loss": 0.0723,
"step": 22730
},
{
"epoch": 0.04135682767306464,
"grad_norm": 0.24066607654094696,
"learning_rate": 0.0002,
"loss": 0.0621,
"step": 22740
},
{
"epoch": 0.041375014492621834,
"grad_norm": 0.0327225998044014,
"learning_rate": 0.0002,
"loss": 0.0264,
"step": 22750
},
{
"epoch": 0.04139320131217903,
"grad_norm": 0.16599033772945404,
"learning_rate": 0.0002,
"loss": 0.1082,
"step": 22760
},
{
"epoch": 0.041411388131736226,
"grad_norm": 0.18834830820560455,
"learning_rate": 0.0002,
"loss": 0.0767,
"step": 22770
},
{
"epoch": 0.04142957495129342,
"grad_norm": 0.04162973538041115,
"learning_rate": 0.0002,
"loss": 0.0821,
"step": 22780
},
{
"epoch": 0.04144776177085062,
"grad_norm": 0.21065399050712585,
"learning_rate": 0.0002,
"loss": 0.0591,
"step": 22790
},
{
"epoch": 0.041465948590407815,
"grad_norm": 0.03744394704699516,
"learning_rate": 0.0002,
"loss": 0.0199,
"step": 22800
},
{
"epoch": 0.04148413540996501,
"grad_norm": 0.30440911650657654,
"learning_rate": 0.0002,
"loss": 0.1321,
"step": 22810
},
{
"epoch": 0.04150232222952221,
"grad_norm": 0.07215052098035812,
"learning_rate": 0.0002,
"loss": 0.077,
"step": 22820
},
{
"epoch": 0.041520509049079404,
"grad_norm": 0.0822744220495224,
"learning_rate": 0.0002,
"loss": 0.0695,
"step": 22830
},
{
"epoch": 0.0415386958686366,
"grad_norm": 0.20610104501247406,
"learning_rate": 0.0002,
"loss": 0.0668,
"step": 22840
},
{
"epoch": 0.0415568826881938,
"grad_norm": 0.05089128017425537,
"learning_rate": 0.0002,
"loss": 0.0275,
"step": 22850
},
{
"epoch": 0.04157506950775099,
"grad_norm": 0.23365797102451324,
"learning_rate": 0.0002,
"loss": 0.1308,
"step": 22860
},
{
"epoch": 0.04159325632730819,
"grad_norm": 0.03983612358570099,
"learning_rate": 0.0002,
"loss": 0.0738,
"step": 22870
},
{
"epoch": 0.041611443146865386,
"grad_norm": 0.12472117692232132,
"learning_rate": 0.0002,
"loss": 0.082,
"step": 22880
},
{
"epoch": 0.04162962996642258,
"grad_norm": 0.19599118828773499,
"learning_rate": 0.0002,
"loss": 0.0614,
"step": 22890
},
{
"epoch": 0.04164781678597978,
"grad_norm": 0.04077763110399246,
"learning_rate": 0.0002,
"loss": 0.0298,
"step": 22900
},
{
"epoch": 0.041666003605536975,
"grad_norm": 0.3027828633785248,
"learning_rate": 0.0002,
"loss": 0.1294,
"step": 22910
},
{
"epoch": 0.04168419042509417,
"grad_norm": 0.1551598757505417,
"learning_rate": 0.0002,
"loss": 0.0716,
"step": 22920
},
{
"epoch": 0.04170237724465137,
"grad_norm": 0.06512947380542755,
"learning_rate": 0.0002,
"loss": 0.071,
"step": 22930
},
{
"epoch": 0.041720564064208564,
"grad_norm": 0.2486017346382141,
"learning_rate": 0.0002,
"loss": 0.0726,
"step": 22940
},
{
"epoch": 0.04173875088376576,
"grad_norm": 0.0658118799328804,
"learning_rate": 0.0002,
"loss": 0.0211,
"step": 22950
},
{
"epoch": 0.041756937703322956,
"grad_norm": 0.18327641487121582,
"learning_rate": 0.0002,
"loss": 0.1307,
"step": 22960
},
{
"epoch": 0.04177512452288015,
"grad_norm": 0.06218123063445091,
"learning_rate": 0.0002,
"loss": 0.0839,
"step": 22970
},
{
"epoch": 0.04179331134243735,
"grad_norm": 0.07085203379392624,
"learning_rate": 0.0002,
"loss": 0.0786,
"step": 22980
},
{
"epoch": 0.041811498161994545,
"grad_norm": 0.19552426040172577,
"learning_rate": 0.0002,
"loss": 0.0646,
"step": 22990
},
{
"epoch": 0.04182968498155174,
"grad_norm": 0.06710335612297058,
"learning_rate": 0.0002,
"loss": 0.0252,
"step": 23000
}
],
"logging_steps": 10,
"max_steps": 100000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.0398767809662812e+19,
"train_batch_size": 24,
"trial_name": null,
"trial_params": null
}