| { |
| "best_metric": 0.7127333519086096, |
| "best_model_checkpoint": "Louis_Emotion_DF_Image_VIT_V1/checkpoint-5385", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 5385, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005571030640668524, |
| "grad_norm": 1.948201060295105, |
| "learning_rate": 4.990714948932219e-05, |
| "loss": 1.8845, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.011142061281337047, |
| "grad_norm": 2.1474852561950684, |
| "learning_rate": 4.981429897864439e-05, |
| "loss": 1.7726, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.016713091922005572, |
| "grad_norm": 1.7504295110702515, |
| "learning_rate": 4.972144846796657e-05, |
| "loss": 1.7291, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.022284122562674095, |
| "grad_norm": 1.9806510210037231, |
| "learning_rate": 4.962859795728877e-05, |
| "loss": 1.631, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.027855153203342618, |
| "grad_norm": 2.3543717861175537, |
| "learning_rate": 4.953574744661096e-05, |
| "loss": 1.5524, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.033426183844011144, |
| "grad_norm": 2.5557103157043457, |
| "learning_rate": 4.9442896935933144e-05, |
| "loss": 1.5061, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03899721448467967, |
| "grad_norm": 2.1124281883239746, |
| "learning_rate": 4.9350046425255343e-05, |
| "loss": 1.5135, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04456824512534819, |
| "grad_norm": 2.636732578277588, |
| "learning_rate": 4.925719591457753e-05, |
| "loss": 1.4545, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05013927576601671, |
| "grad_norm": 2.1990904808044434, |
| "learning_rate": 4.916434540389973e-05, |
| "loss": 1.3375, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.055710306406685235, |
| "grad_norm": 2.593843698501587, |
| "learning_rate": 4.9071494893221914e-05, |
| "loss": 1.3125, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06128133704735376, |
| "grad_norm": 2.3658862113952637, |
| "learning_rate": 4.897864438254411e-05, |
| "loss": 1.2438, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.06685236768802229, |
| "grad_norm": 3.614569902420044, |
| "learning_rate": 4.88857938718663e-05, |
| "loss": 1.352, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07242339832869081, |
| "grad_norm": 3.075983762741089, |
| "learning_rate": 4.8792943361188485e-05, |
| "loss": 1.3506, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.07799442896935933, |
| "grad_norm": 2.9833717346191406, |
| "learning_rate": 4.8700092850510685e-05, |
| "loss": 1.3486, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.08356545961002786, |
| "grad_norm": 4.222170829772949, |
| "learning_rate": 4.860724233983287e-05, |
| "loss": 1.3921, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.08913649025069638, |
| "grad_norm": 2.5788440704345703, |
| "learning_rate": 4.851439182915506e-05, |
| "loss": 1.3421, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0947075208913649, |
| "grad_norm": 2.7408132553100586, |
| "learning_rate": 4.8421541318477255e-05, |
| "loss": 1.1812, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.10027855153203342, |
| "grad_norm": 2.91229510307312, |
| "learning_rate": 4.832869080779944e-05, |
| "loss": 1.1881, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.10584958217270195, |
| "grad_norm": 2.4319872856140137, |
| "learning_rate": 4.823584029712164e-05, |
| "loss": 1.3427, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.11142061281337047, |
| "grad_norm": 3.0504567623138428, |
| "learning_rate": 4.8142989786443826e-05, |
| "loss": 1.2334, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.116991643454039, |
| "grad_norm": 2.9363996982574463, |
| "learning_rate": 4.805013927576602e-05, |
| "loss": 1.1809, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.12256267409470752, |
| "grad_norm": 2.2370383739471436, |
| "learning_rate": 4.795728876508821e-05, |
| "loss": 1.1973, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.12813370473537605, |
| "grad_norm": 2.7246251106262207, |
| "learning_rate": 4.7864438254410404e-05, |
| "loss": 1.2224, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.13370473537604458, |
| "grad_norm": 3.5851166248321533, |
| "learning_rate": 4.7771587743732597e-05, |
| "loss": 1.0445, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1392757660167131, |
| "grad_norm": 1.9959110021591187, |
| "learning_rate": 4.767873723305478e-05, |
| "loss": 1.1164, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.14484679665738162, |
| "grad_norm": 2.933548927307129, |
| "learning_rate": 4.7585886722376975e-05, |
| "loss": 1.1229, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.15041782729805014, |
| "grad_norm": 4.168135643005371, |
| "learning_rate": 4.749303621169917e-05, |
| "loss": 1.25, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.15598885793871867, |
| "grad_norm": 3.2790322303771973, |
| "learning_rate": 4.740018570102136e-05, |
| "loss": 1.177, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1615598885793872, |
| "grad_norm": 4.025652885437012, |
| "learning_rate": 4.7307335190343546e-05, |
| "loss": 1.1185, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1671309192200557, |
| "grad_norm": 2.921226978302002, |
| "learning_rate": 4.721448467966574e-05, |
| "loss": 1.1773, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.17270194986072424, |
| "grad_norm": 3.095017671585083, |
| "learning_rate": 4.712163416898793e-05, |
| "loss": 1.2818, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.17827298050139276, |
| "grad_norm": 2.596940279006958, |
| "learning_rate": 4.702878365831012e-05, |
| "loss": 1.0857, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.18384401114206128, |
| "grad_norm": 3.0986833572387695, |
| "learning_rate": 4.6935933147632316e-05, |
| "loss": 1.161, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1894150417827298, |
| "grad_norm": 3.9067625999450684, |
| "learning_rate": 4.68430826369545e-05, |
| "loss": 1.126, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.19498607242339833, |
| "grad_norm": 2.4455080032348633, |
| "learning_rate": 4.6750232126276694e-05, |
| "loss": 1.0944, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.20055710306406685, |
| "grad_norm": 3.0159671306610107, |
| "learning_rate": 4.665738161559889e-05, |
| "loss": 1.0525, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.20612813370473537, |
| "grad_norm": 3.257310152053833, |
| "learning_rate": 4.656453110492108e-05, |
| "loss": 1.1423, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2116991643454039, |
| "grad_norm": 2.324500322341919, |
| "learning_rate": 4.647168059424327e-05, |
| "loss": 1.0441, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.21727019498607242, |
| "grad_norm": 2.893946409225464, |
| "learning_rate": 4.637883008356546e-05, |
| "loss": 1.0956, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.22284122562674094, |
| "grad_norm": 3.661740779876709, |
| "learning_rate": 4.628597957288766e-05, |
| "loss": 1.0648, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.22841225626740946, |
| "grad_norm": 3.601794719696045, |
| "learning_rate": 4.619312906220984e-05, |
| "loss": 1.1556, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.233983286908078, |
| "grad_norm": 2.6910550594329834, |
| "learning_rate": 4.6100278551532035e-05, |
| "loss": 0.9603, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2395543175487465, |
| "grad_norm": 2.952540636062622, |
| "learning_rate": 4.600742804085423e-05, |
| "loss": 1.1827, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.24512534818941503, |
| "grad_norm": 4.099498271942139, |
| "learning_rate": 4.5914577530176414e-05, |
| "loss": 1.0573, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.25069637883008355, |
| "grad_norm": 3.884427070617676, |
| "learning_rate": 4.582172701949861e-05, |
| "loss": 1.1742, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2562674094707521, |
| "grad_norm": 3.0782787799835205, |
| "learning_rate": 4.57288765088208e-05, |
| "loss": 1.051, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2618384401114206, |
| "grad_norm": 4.079422473907471, |
| "learning_rate": 4.563602599814299e-05, |
| "loss": 0.9732, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.26740947075208915, |
| "grad_norm": 3.0462193489074707, |
| "learning_rate": 4.5543175487465184e-05, |
| "loss": 1.0204, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.27298050139275765, |
| "grad_norm": 3.944453716278076, |
| "learning_rate": 4.545032497678737e-05, |
| "loss": 1.2336, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2785515320334262, |
| "grad_norm": 3.276047945022583, |
| "learning_rate": 4.535747446610957e-05, |
| "loss": 1.1331, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2841225626740947, |
| "grad_norm": 2.8910679817199707, |
| "learning_rate": 4.5264623955431755e-05, |
| "loss": 1.0437, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.28969359331476324, |
| "grad_norm": 2.332932472229004, |
| "learning_rate": 4.5171773444753954e-05, |
| "loss": 1.264, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.29526462395543174, |
| "grad_norm": 3.207519292831421, |
| "learning_rate": 4.507892293407614e-05, |
| "loss": 1.0362, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3008356545961003, |
| "grad_norm": 3.792386770248413, |
| "learning_rate": 4.4986072423398326e-05, |
| "loss": 1.0799, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3064066852367688, |
| "grad_norm": 3.6370913982391357, |
| "learning_rate": 4.4893221912720525e-05, |
| "loss": 1.0702, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.31197771587743733, |
| "grad_norm": 7.727144241333008, |
| "learning_rate": 4.480037140204271e-05, |
| "loss": 1.0313, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.31754874651810583, |
| "grad_norm": 3.0483336448669434, |
| "learning_rate": 4.470752089136491e-05, |
| "loss": 1.0816, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.3231197771587744, |
| "grad_norm": 4.045107841491699, |
| "learning_rate": 4.4614670380687096e-05, |
| "loss": 1.1036, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3286908077994429, |
| "grad_norm": 2.4204320907592773, |
| "learning_rate": 4.452181987000928e-05, |
| "loss": 0.9899, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.3342618384401114, |
| "grad_norm": 1.9364632368087769, |
| "learning_rate": 4.442896935933148e-05, |
| "loss": 0.9319, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3398328690807799, |
| "grad_norm": 2.769692897796631, |
| "learning_rate": 4.433611884865367e-05, |
| "loss": 0.9948, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.34540389972144847, |
| "grad_norm": 3.5920908451080322, |
| "learning_rate": 4.4243268337975866e-05, |
| "loss": 0.9915, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.35097493036211697, |
| "grad_norm": 3.9949071407318115, |
| "learning_rate": 4.415041782729805e-05, |
| "loss": 1.0652, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.3565459610027855, |
| "grad_norm": 3.3296191692352295, |
| "learning_rate": 4.4057567316620244e-05, |
| "loss": 1.0703, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.362116991643454, |
| "grad_norm": 5.59266996383667, |
| "learning_rate": 4.396471680594244e-05, |
| "loss": 1.0078, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.36768802228412256, |
| "grad_norm": 3.6050076484680176, |
| "learning_rate": 4.387186629526462e-05, |
| "loss": 0.9541, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.3732590529247911, |
| "grad_norm": 2.4195094108581543, |
| "learning_rate": 4.377901578458682e-05, |
| "loss": 0.8932, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.3788300835654596, |
| "grad_norm": 3.135007619857788, |
| "learning_rate": 4.368616527390901e-05, |
| "loss": 1.13, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.38440111420612816, |
| "grad_norm": 3.0735456943511963, |
| "learning_rate": 4.35933147632312e-05, |
| "loss": 0.9413, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.38997214484679665, |
| "grad_norm": 4.281991481781006, |
| "learning_rate": 4.350046425255339e-05, |
| "loss": 0.9636, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3955431754874652, |
| "grad_norm": 3.6378397941589355, |
| "learning_rate": 4.340761374187558e-05, |
| "loss": 1.0549, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.4011142061281337, |
| "grad_norm": 2.570322036743164, |
| "learning_rate": 4.331476323119778e-05, |
| "loss": 1.03, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.40668523676880225, |
| "grad_norm": 3.7980899810791016, |
| "learning_rate": 4.3221912720519964e-05, |
| "loss": 0.9204, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.41225626740947074, |
| "grad_norm": 2.180097818374634, |
| "learning_rate": 4.3129062209842156e-05, |
| "loss": 0.9956, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.4178272980501393, |
| "grad_norm": 3.4004600048065186, |
| "learning_rate": 4.303621169916435e-05, |
| "loss": 1.0413, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4233983286908078, |
| "grad_norm": 2.9130539894104004, |
| "learning_rate": 4.2943361188486535e-05, |
| "loss": 0.9712, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.42896935933147634, |
| "grad_norm": 2.5885844230651855, |
| "learning_rate": 4.2850510677808734e-05, |
| "loss": 1.0015, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.43454038997214484, |
| "grad_norm": 2.890307664871216, |
| "learning_rate": 4.275766016713092e-05, |
| "loss": 0.9238, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.4401114206128134, |
| "grad_norm": 2.586287498474121, |
| "learning_rate": 4.266480965645311e-05, |
| "loss": 1.1793, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.4456824512534819, |
| "grad_norm": 3.8789122104644775, |
| "learning_rate": 4.2571959145775305e-05, |
| "loss": 0.9671, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.45125348189415043, |
| "grad_norm": 2.513338804244995, |
| "learning_rate": 4.24791086350975e-05, |
| "loss": 1.1206, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.4568245125348189, |
| "grad_norm": 4.040161609649658, |
| "learning_rate": 4.238625812441969e-05, |
| "loss": 0.9741, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.4623955431754875, |
| "grad_norm": 3.625004768371582, |
| "learning_rate": 4.2293407613741876e-05, |
| "loss": 1.0511, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.467966573816156, |
| "grad_norm": 2.6117324829101562, |
| "learning_rate": 4.220055710306407e-05, |
| "loss": 0.973, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.4735376044568245, |
| "grad_norm": 1.8209770917892456, |
| "learning_rate": 4.210770659238626e-05, |
| "loss": 0.9672, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.479108635097493, |
| "grad_norm": 2.686767339706421, |
| "learning_rate": 4.201485608170845e-05, |
| "loss": 0.993, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.48467966573816157, |
| "grad_norm": 2.2788872718811035, |
| "learning_rate": 4.192200557103064e-05, |
| "loss": 0.9141, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.49025069637883006, |
| "grad_norm": 3.2874503135681152, |
| "learning_rate": 4.182915506035283e-05, |
| "loss": 1.0949, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4958217270194986, |
| "grad_norm": 3.21272349357605, |
| "learning_rate": 4.1736304549675024e-05, |
| "loss": 0.9701, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5013927576601671, |
| "grad_norm": 4.310153007507324, |
| "learning_rate": 4.164345403899722e-05, |
| "loss": 0.9595, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5069637883008357, |
| "grad_norm": 2.943368434906006, |
| "learning_rate": 4.155060352831941e-05, |
| "loss": 1.0342, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5125348189415042, |
| "grad_norm": 3.337778329849243, |
| "learning_rate": 4.1457753017641595e-05, |
| "loss": 0.8848, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5181058495821727, |
| "grad_norm": 3.8520374298095703, |
| "learning_rate": 4.1364902506963794e-05, |
| "loss": 0.955, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5236768802228412, |
| "grad_norm": 3.1856610774993896, |
| "learning_rate": 4.127205199628598e-05, |
| "loss": 0.9171, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5292479108635098, |
| "grad_norm": 2.813004493713379, |
| "learning_rate": 4.117920148560817e-05, |
| "loss": 0.8912, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5348189415041783, |
| "grad_norm": 3.73842716217041, |
| "learning_rate": 4.1086350974930365e-05, |
| "loss": 0.87, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5403899721448467, |
| "grad_norm": 4.500996112823486, |
| "learning_rate": 4.099350046425255e-05, |
| "loss": 1.0216, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5459610027855153, |
| "grad_norm": 2.80609393119812, |
| "learning_rate": 4.090064995357475e-05, |
| "loss": 0.9803, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5515320334261838, |
| "grad_norm": 3.340052604675293, |
| "learning_rate": 4.0807799442896936e-05, |
| "loss": 0.8977, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.5571030640668524, |
| "grad_norm": 3.450836420059204, |
| "learning_rate": 4.071494893221913e-05, |
| "loss": 0.9845, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5626740947075209, |
| "grad_norm": 2.639638662338257, |
| "learning_rate": 4.062209842154132e-05, |
| "loss": 0.9415, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.5682451253481894, |
| "grad_norm": 3.1470372676849365, |
| "learning_rate": 4.052924791086351e-05, |
| "loss": 1.0006, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.5738161559888579, |
| "grad_norm": 3.556126832962036, |
| "learning_rate": 4.0436397400185706e-05, |
| "loss": 0.9859, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.5793871866295265, |
| "grad_norm": 4.580634117126465, |
| "learning_rate": 4.034354688950789e-05, |
| "loss": 0.8674, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.584958217270195, |
| "grad_norm": 3.796309232711792, |
| "learning_rate": 4.0250696378830085e-05, |
| "loss": 0.9327, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.5905292479108635, |
| "grad_norm": 2.9451568126678467, |
| "learning_rate": 4.015784586815228e-05, |
| "loss": 0.9889, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.596100278551532, |
| "grad_norm": 2.4490549564361572, |
| "learning_rate": 4.006499535747446e-05, |
| "loss": 0.9661, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.6016713091922006, |
| "grad_norm": 3.1423044204711914, |
| "learning_rate": 3.997214484679666e-05, |
| "loss": 0.8991, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.6072423398328691, |
| "grad_norm": 2.6744227409362793, |
| "learning_rate": 3.987929433611885e-05, |
| "loss": 0.9822, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6128133704735376, |
| "grad_norm": 2.410128593444824, |
| "learning_rate": 3.978644382544105e-05, |
| "loss": 0.87, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6183844011142061, |
| "grad_norm": 3.4056427478790283, |
| "learning_rate": 3.969359331476323e-05, |
| "loss": 0.9011, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6239554317548747, |
| "grad_norm": 3.5832958221435547, |
| "learning_rate": 3.960074280408542e-05, |
| "loss": 0.9179, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6295264623955432, |
| "grad_norm": 4.678562164306641, |
| "learning_rate": 3.950789229340762e-05, |
| "loss": 0.9475, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.6350974930362117, |
| "grad_norm": 3.114361047744751, |
| "learning_rate": 3.9415041782729804e-05, |
| "loss": 0.95, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6406685236768802, |
| "grad_norm": 2.6210415363311768, |
| "learning_rate": 3.9322191272052003e-05, |
| "loss": 0.8933, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6462395543175488, |
| "grad_norm": 2.759856939315796, |
| "learning_rate": 3.922934076137419e-05, |
| "loss": 0.9336, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.6518105849582173, |
| "grad_norm": 3.449307680130005, |
| "learning_rate": 3.9136490250696375e-05, |
| "loss": 0.9851, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.6573816155988857, |
| "grad_norm": 4.323282718658447, |
| "learning_rate": 3.9043639740018574e-05, |
| "loss": 0.9735, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.6629526462395543, |
| "grad_norm": 2.79763126373291, |
| "learning_rate": 3.895078922934076e-05, |
| "loss": 0.8509, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.6685236768802229, |
| "grad_norm": 5.2140045166015625, |
| "learning_rate": 3.885793871866296e-05, |
| "loss": 1.0342, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.6740947075208914, |
| "grad_norm": 3.2437567710876465, |
| "learning_rate": 3.8765088207985145e-05, |
| "loss": 0.9919, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.6796657381615598, |
| "grad_norm": 3.9220008850097656, |
| "learning_rate": 3.867223769730734e-05, |
| "loss": 0.9809, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.6852367688022284, |
| "grad_norm": 1.8723160028457642, |
| "learning_rate": 3.857938718662953e-05, |
| "loss": 0.7315, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.6908077994428969, |
| "grad_norm": 5.3923211097717285, |
| "learning_rate": 3.8486536675951716e-05, |
| "loss": 0.9709, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.6963788300835655, |
| "grad_norm": 3.8164379596710205, |
| "learning_rate": 3.8393686165273915e-05, |
| "loss": 1.0331, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.7019498607242339, |
| "grad_norm": 4.3825836181640625, |
| "learning_rate": 3.83008356545961e-05, |
| "loss": 1.0042, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.7075208913649025, |
| "grad_norm": 2.7108051776885986, |
| "learning_rate": 3.8207985143918294e-05, |
| "loss": 0.974, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.713091922005571, |
| "grad_norm": 2.64860463142395, |
| "learning_rate": 3.8115134633240486e-05, |
| "loss": 0.8971, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7186629526462396, |
| "grad_norm": 3.869168758392334, |
| "learning_rate": 3.802228412256267e-05, |
| "loss": 0.8225, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.724233983286908, |
| "grad_norm": 3.02199125289917, |
| "learning_rate": 3.792943361188487e-05, |
| "loss": 0.9492, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7298050139275766, |
| "grad_norm": 5.071811199188232, |
| "learning_rate": 3.783658310120706e-05, |
| "loss": 1.018, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7353760445682451, |
| "grad_norm": 3.2059361934661865, |
| "learning_rate": 3.774373259052925e-05, |
| "loss": 0.9163, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.7409470752089137, |
| "grad_norm": 4.003905296325684, |
| "learning_rate": 3.765088207985144e-05, |
| "loss": 0.8783, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.7465181058495822, |
| "grad_norm": 3.159921646118164, |
| "learning_rate": 3.755803156917363e-05, |
| "loss": 0.7841, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.7520891364902507, |
| "grad_norm": 2.852421760559082, |
| "learning_rate": 3.746518105849583e-05, |
| "loss": 0.9545, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.7576601671309192, |
| "grad_norm": 4.6871490478515625, |
| "learning_rate": 3.737233054781801e-05, |
| "loss": 0.9276, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.7632311977715878, |
| "grad_norm": 1.9484212398529053, |
| "learning_rate": 3.7279480037140206e-05, |
| "loss": 0.9676, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.7688022284122563, |
| "grad_norm": 4.423326015472412, |
| "learning_rate": 3.71866295264624e-05, |
| "loss": 1.0121, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.7743732590529248, |
| "grad_norm": 3.569786310195923, |
| "learning_rate": 3.709377901578459e-05, |
| "loss": 0.9413, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.7799442896935933, |
| "grad_norm": 1.8504449129104614, |
| "learning_rate": 3.700092850510678e-05, |
| "loss": 0.9478, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.7855153203342619, |
| "grad_norm": 3.4679548740386963, |
| "learning_rate": 3.690807799442897e-05, |
| "loss": 0.88, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.7910863509749304, |
| "grad_norm": 6.313425064086914, |
| "learning_rate": 3.681522748375116e-05, |
| "loss": 1.0565, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.7966573816155988, |
| "grad_norm": 3.7939631938934326, |
| "learning_rate": 3.6722376973073354e-05, |
| "loss": 1.0129, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.8022284122562674, |
| "grad_norm": 4.260021209716797, |
| "learning_rate": 3.662952646239555e-05, |
| "loss": 0.8527, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.807799442896936, |
| "grad_norm": 3.4048142433166504, |
| "learning_rate": 3.653667595171773e-05, |
| "loss": 1.0715, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.8133704735376045, |
| "grad_norm": 4.1536736488342285, |
| "learning_rate": 3.6443825441039925e-05, |
| "loss": 0.9155, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.8189415041782729, |
| "grad_norm": 3.329630136489868, |
| "learning_rate": 3.635097493036212e-05, |
| "loss": 1.0557, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.8245125348189415, |
| "grad_norm": 2.2894420623779297, |
| "learning_rate": 3.625812441968431e-05, |
| "loss": 0.9423, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.83008356545961, |
| "grad_norm": 2.885784864425659, |
| "learning_rate": 3.61652739090065e-05, |
| "loss": 0.9669, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.8356545961002786, |
| "grad_norm": 3.7429628372192383, |
| "learning_rate": 3.607242339832869e-05, |
| "loss": 0.9009, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.841225626740947, |
| "grad_norm": 2.1827738285064697, |
| "learning_rate": 3.597957288765089e-05, |
| "loss": 0.9713, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.8467966573816156, |
| "grad_norm": 1.2572903633117676, |
| "learning_rate": 3.5886722376973074e-05, |
| "loss": 0.9426, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.8523676880222841, |
| "grad_norm": 4.154526710510254, |
| "learning_rate": 3.5793871866295266e-05, |
| "loss": 0.9636, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.8579387186629527, |
| "grad_norm": 3.4728987216949463, |
| "learning_rate": 3.570102135561746e-05, |
| "loss": 1.1009, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.8635097493036211, |
| "grad_norm": 1.9633491039276123, |
| "learning_rate": 3.5608170844939645e-05, |
| "loss": 0.9549, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.8690807799442897, |
| "grad_norm": 3.696641206741333, |
| "learning_rate": 3.5515320334261844e-05, |
| "loss": 0.9211, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.8746518105849582, |
| "grad_norm": 3.0027382373809814, |
| "learning_rate": 3.542246982358403e-05, |
| "loss": 0.8844, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.8802228412256268, |
| "grad_norm": 1.705255150794983, |
| "learning_rate": 3.532961931290622e-05, |
| "loss": 0.8733, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.8857938718662952, |
| "grad_norm": 4.757926940917969, |
| "learning_rate": 3.5236768802228415e-05, |
| "loss": 0.9165, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.8913649025069638, |
| "grad_norm": 3.9741427898406982, |
| "learning_rate": 3.51439182915506e-05, |
| "loss": 0.8411, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.8969359331476323, |
| "grad_norm": 3.1221024990081787, |
| "learning_rate": 3.50510677808728e-05, |
| "loss": 0.8529, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.9025069637883009, |
| "grad_norm": 5.116636753082275, |
| "learning_rate": 3.4958217270194986e-05, |
| "loss": 1.0049, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.9080779944289693, |
| "grad_norm": 2.5207858085632324, |
| "learning_rate": 3.4865366759517185e-05, |
| "loss": 0.8374, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.9136490250696379, |
| "grad_norm": 4.013001918792725, |
| "learning_rate": 3.477251624883937e-05, |
| "loss": 0.8902, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.9192200557103064, |
| "grad_norm": 5.2283935546875, |
| "learning_rate": 3.4679665738161556e-05, |
| "loss": 0.8788, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.924791086350975, |
| "grad_norm": 2.6664016246795654, |
| "learning_rate": 3.4586815227483756e-05, |
| "loss": 0.9204, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.9303621169916435, |
| "grad_norm": 4.3207292556762695, |
| "learning_rate": 3.449396471680594e-05, |
| "loss": 0.9589, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.935933147632312, |
| "grad_norm": 5.634909152984619, |
| "learning_rate": 3.440111420612814e-05, |
| "loss": 0.8883, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.9415041782729805, |
| "grad_norm": 4.487209320068359, |
| "learning_rate": 3.430826369545033e-05, |
| "loss": 0.9111, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.947075208913649, |
| "grad_norm": 3.9250285625457764, |
| "learning_rate": 3.421541318477251e-05, |
| "loss": 0.8759, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.9526462395543176, |
| "grad_norm": 4.271919250488281, |
| "learning_rate": 3.412256267409471e-05, |
| "loss": 1.0486, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.958217270194986, |
| "grad_norm": 3.3821752071380615, |
| "learning_rate": 3.40297121634169e-05, |
| "loss": 0.8707, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.9637883008356546, |
| "grad_norm": 2.2104811668395996, |
| "learning_rate": 3.39368616527391e-05, |
| "loss": 0.8422, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.9693593314763231, |
| "grad_norm": 2.4628074169158936, |
| "learning_rate": 3.384401114206128e-05, |
| "loss": 0.8869, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.9749303621169917, |
| "grad_norm": 4.211429595947266, |
| "learning_rate": 3.375116063138347e-05, |
| "loss": 0.8037, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.9805013927576601, |
| "grad_norm": 2.9044151306152344, |
| "learning_rate": 3.365831012070567e-05, |
| "loss": 0.9166, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.9860724233983287, |
| "grad_norm": 3.072744846343994, |
| "learning_rate": 3.3565459610027854e-05, |
| "loss": 0.9653, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.9916434540389972, |
| "grad_norm": 3.8640897274017334, |
| "learning_rate": 3.347260909935005e-05, |
| "loss": 0.8946, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.9972144846796658, |
| "grad_norm": 3.481308698654175, |
| "learning_rate": 3.337975858867224e-05, |
| "loss": 0.9613, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.6692672053496795, |
| "eval_loss": 0.9019954800605774, |
| "eval_runtime": 53.0161, |
| "eval_samples_per_second": 67.696, |
| "eval_steps_per_second": 4.244, |
| "step": 1795 |
| }, |
| { |
| "epoch": 1.0027855153203342, |
| "grad_norm": 2.660780191421509, |
| "learning_rate": 3.328690807799443e-05, |
| "loss": 0.8447, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.0083565459610029, |
| "grad_norm": 5.112563133239746, |
| "learning_rate": 3.3194057567316624e-05, |
| "loss": 0.7709, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.0139275766016713, |
| "grad_norm": 3.217790126800537, |
| "learning_rate": 3.310120705663881e-05, |
| "loss": 0.6957, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.0194986072423398, |
| "grad_norm": 2.386460065841675, |
| "learning_rate": 3.300835654596101e-05, |
| "loss": 0.5801, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.0250696378830084, |
| "grad_norm": 3.742408514022827, |
| "learning_rate": 3.2915506035283195e-05, |
| "loss": 0.7494, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.0306406685236769, |
| "grad_norm": 5.288496017456055, |
| "learning_rate": 3.282265552460539e-05, |
| "loss": 0.7163, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.0362116991643453, |
| "grad_norm": 3.0935630798339844, |
| "learning_rate": 3.272980501392758e-05, |
| "loss": 0.8476, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.041782729805014, |
| "grad_norm": 3.274470329284668, |
| "learning_rate": 3.2636954503249766e-05, |
| "loss": 0.7659, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.0473537604456824, |
| "grad_norm": 4.337296485900879, |
| "learning_rate": 3.2544103992571965e-05, |
| "loss": 0.8371, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.052924791086351, |
| "grad_norm": 3.3266661167144775, |
| "learning_rate": 3.245125348189415e-05, |
| "loss": 0.7201, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.0584958217270195, |
| "grad_norm": 2.803584337234497, |
| "learning_rate": 3.235840297121634e-05, |
| "loss": 0.7094, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.064066852367688, |
| "grad_norm": 2.7187163829803467, |
| "learning_rate": 3.2265552460538536e-05, |
| "loss": 0.7677, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.0696378830083566, |
| "grad_norm": 3.310220956802368, |
| "learning_rate": 3.217270194986073e-05, |
| "loss": 0.8175, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.075208913649025, |
| "grad_norm": 3.0058999061584473, |
| "learning_rate": 3.207985143918292e-05, |
| "loss": 0.6781, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.0807799442896937, |
| "grad_norm": 5.092315673828125, |
| "learning_rate": 3.1987000928505107e-05, |
| "loss": 0.8581, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.0863509749303621, |
| "grad_norm": 3.070526599884033, |
| "learning_rate": 3.18941504178273e-05, |
| "loss": 0.7415, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.0919220055710306, |
| "grad_norm": 3.610179901123047, |
| "learning_rate": 3.180129990714949e-05, |
| "loss": 0.7457, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.0974930362116992, |
| "grad_norm": 5.436527729034424, |
| "learning_rate": 3.1708449396471684e-05, |
| "loss": 0.8023, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.1030640668523677, |
| "grad_norm": 2.607541084289551, |
| "learning_rate": 3.161559888579388e-05, |
| "loss": 0.7311, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.1086350974930361, |
| "grad_norm": 3.960480213165283, |
| "learning_rate": 3.152274837511606e-05, |
| "loss": 0.825, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.1142061281337048, |
| "grad_norm": 2.607386589050293, |
| "learning_rate": 3.1429897864438255e-05, |
| "loss": 0.7247, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.1197771587743732, |
| "grad_norm": 4.60785436630249, |
| "learning_rate": 3.133704735376045e-05, |
| "loss": 0.6815, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.1253481894150417, |
| "grad_norm": 2.857929229736328, |
| "learning_rate": 3.124419684308264e-05, |
| "loss": 0.8937, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.1309192200557103, |
| "grad_norm": 3.8009235858917236, |
| "learning_rate": 3.1151346332404826e-05, |
| "loss": 0.663, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.1364902506963788, |
| "grad_norm": 2.8590617179870605, |
| "learning_rate": 3.105849582172702e-05, |
| "loss": 0.7552, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.1420612813370474, |
| "grad_norm": 2.73160719871521, |
| "learning_rate": 3.096564531104921e-05, |
| "loss": 0.6979, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.1476323119777159, |
| "grad_norm": 3.1918087005615234, |
| "learning_rate": 3.0872794800371404e-05, |
| "loss": 0.7999, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.1532033426183843, |
| "grad_norm": 1.336449384689331, |
| "learning_rate": 3.0779944289693596e-05, |
| "loss": 0.5635, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.158774373259053, |
| "grad_norm": 3.852604627609253, |
| "learning_rate": 3.068709377901578e-05, |
| "loss": 0.7839, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.1643454038997214, |
| "grad_norm": 3.373872995376587, |
| "learning_rate": 3.059424326833798e-05, |
| "loss": 0.6604, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.16991643454039, |
| "grad_norm": 5.2556586265563965, |
| "learning_rate": 3.050139275766017e-05, |
| "loss": 0.7676, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.1754874651810585, |
| "grad_norm": 4.4580912590026855, |
| "learning_rate": 3.0408542246982356e-05, |
| "loss": 0.7738, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.181058495821727, |
| "grad_norm": 2.804713726043701, |
| "learning_rate": 3.0315691736304552e-05, |
| "loss": 0.6995, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.1866295264623956, |
| "grad_norm": 3.6438372135162354, |
| "learning_rate": 3.022284122562674e-05, |
| "loss": 0.6511, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.192200557103064, |
| "grad_norm": 2.212045907974243, |
| "learning_rate": 3.0129990714948937e-05, |
| "loss": 0.603, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.1977715877437327, |
| "grad_norm": 3.458812713623047, |
| "learning_rate": 3.0037140204271123e-05, |
| "loss": 0.7317, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.2033426183844012, |
| "grad_norm": 2.6540637016296387, |
| "learning_rate": 2.9944289693593312e-05, |
| "loss": 0.7921, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.2089136490250696, |
| "grad_norm": 5.65538215637207, |
| "learning_rate": 2.9851439182915508e-05, |
| "loss": 0.8803, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.2144846796657383, |
| "grad_norm": 3.8509607315063477, |
| "learning_rate": 2.9758588672237697e-05, |
| "loss": 0.7686, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.2200557103064067, |
| "grad_norm": 4.367794990539551, |
| "learning_rate": 2.9665738161559893e-05, |
| "loss": 0.7243, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.2256267409470751, |
| "grad_norm": 3.6232688426971436, |
| "learning_rate": 2.957288765088208e-05, |
| "loss": 0.6483, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.2311977715877438, |
| "grad_norm": 3.4824819564819336, |
| "learning_rate": 2.9480037140204275e-05, |
| "loss": 0.8249, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.2367688022284122, |
| "grad_norm": 5.086853981018066, |
| "learning_rate": 2.9387186629526464e-05, |
| "loss": 0.7303, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.2423398328690807, |
| "grad_norm": 3.1858561038970947, |
| "learning_rate": 2.9294336118848653e-05, |
| "loss": 0.6699, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.2479108635097493, |
| "grad_norm": 5.4035139083862305, |
| "learning_rate": 2.920148560817085e-05, |
| "loss": 0.8184, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.2534818941504178, |
| "grad_norm": 4.2386345863342285, |
| "learning_rate": 2.9108635097493035e-05, |
| "loss": 0.783, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.2590529247910864, |
| "grad_norm": 5.696313858032227, |
| "learning_rate": 2.901578458681523e-05, |
| "loss": 0.7901, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.2646239554317549, |
| "grad_norm": 2.38380765914917, |
| "learning_rate": 2.892293407613742e-05, |
| "loss": 0.7271, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.2701949860724233, |
| "grad_norm": 2.800536632537842, |
| "learning_rate": 2.883008356545961e-05, |
| "loss": 0.7253, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.275766016713092, |
| "grad_norm": 5.40772008895874, |
| "learning_rate": 2.8737233054781805e-05, |
| "loss": 0.8461, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.2813370473537604, |
| "grad_norm": 2.7099337577819824, |
| "learning_rate": 2.864438254410399e-05, |
| "loss": 0.7545, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.286908077994429, |
| "grad_norm": 5.34475040435791, |
| "learning_rate": 2.8551532033426187e-05, |
| "loss": 0.6853, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.2924791086350975, |
| "grad_norm": 3.6724138259887695, |
| "learning_rate": 2.8458681522748376e-05, |
| "loss": 0.6719, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.298050139275766, |
| "grad_norm": 6.54721212387085, |
| "learning_rate": 2.8365831012070565e-05, |
| "loss": 0.6367, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.3036211699164346, |
| "grad_norm": 4.257522106170654, |
| "learning_rate": 2.827298050139276e-05, |
| "loss": 0.8234, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.309192200557103, |
| "grad_norm": 4.256605625152588, |
| "learning_rate": 2.8180129990714947e-05, |
| "loss": 0.8149, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.3147632311977717, |
| "grad_norm": 3.181926965713501, |
| "learning_rate": 2.8087279480037143e-05, |
| "loss": 0.7522, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.3203342618384402, |
| "grad_norm": 2.8374698162078857, |
| "learning_rate": 2.7994428969359332e-05, |
| "loss": 0.6871, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.3259052924791086, |
| "grad_norm": 7.006384372711182, |
| "learning_rate": 2.7901578458681528e-05, |
| "loss": 0.7822, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.331476323119777, |
| "grad_norm": 5.170847415924072, |
| "learning_rate": 2.7808727948003717e-05, |
| "loss": 0.8323, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.3370473537604457, |
| "grad_norm": 4.078238010406494, |
| "learning_rate": 2.7715877437325903e-05, |
| "loss": 0.6984, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.3426183844011141, |
| "grad_norm": 3.4335429668426514, |
| "learning_rate": 2.76230269266481e-05, |
| "loss": 0.6943, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.3481894150417828, |
| "grad_norm": 3.247312545776367, |
| "learning_rate": 2.7530176415970288e-05, |
| "loss": 0.7341, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.3537604456824512, |
| "grad_norm": 5.7520318031311035, |
| "learning_rate": 2.7437325905292484e-05, |
| "loss": 0.7103, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.3593314763231197, |
| "grad_norm": 1.9237465858459473, |
| "learning_rate": 2.735376044568245e-05, |
| "loss": 0.7064, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.3649025069637883, |
| "grad_norm": 3.3437535762786865, |
| "learning_rate": 2.726090993500464e-05, |
| "loss": 0.7222, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.3704735376044568, |
| "grad_norm": 3.8634493350982666, |
| "learning_rate": 2.7168059424326836e-05, |
| "loss": 0.5842, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.3760445682451254, |
| "grad_norm": 4.395915508270264, |
| "learning_rate": 2.7075208913649025e-05, |
| "loss": 0.8458, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.3816155988857939, |
| "grad_norm": 4.998650074005127, |
| "learning_rate": 2.6982358402971218e-05, |
| "loss": 0.7941, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.3871866295264623, |
| "grad_norm": 5.347882270812988, |
| "learning_rate": 2.6889507892293407e-05, |
| "loss": 0.728, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.392757660167131, |
| "grad_norm": 4.277406692504883, |
| "learning_rate": 2.6796657381615596e-05, |
| "loss": 0.7203, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.3983286908077994, |
| "grad_norm": 3.2961020469665527, |
| "learning_rate": 2.6703806870937792e-05, |
| "loss": 0.675, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.403899721448468, |
| "grad_norm": 3.227165460586548, |
| "learning_rate": 2.661095636025998e-05, |
| "loss": 0.8199, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.4094707520891365, |
| "grad_norm": 2.727691650390625, |
| "learning_rate": 2.6518105849582174e-05, |
| "loss": 0.6727, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.415041782729805, |
| "grad_norm": 3.660712957382202, |
| "learning_rate": 2.6425255338904363e-05, |
| "loss": 0.7168, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.4206128133704734, |
| "grad_norm": 4.9975266456604, |
| "learning_rate": 2.633240482822656e-05, |
| "loss": 0.7329, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.426183844011142, |
| "grad_norm": 4.956869602203369, |
| "learning_rate": 2.6239554317548748e-05, |
| "loss": 0.7576, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.4317548746518105, |
| "grad_norm": 3.2841885089874268, |
| "learning_rate": 2.6146703806870937e-05, |
| "loss": 0.687, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.4373259052924792, |
| "grad_norm": 5.767863750457764, |
| "learning_rate": 2.605385329619313e-05, |
| "loss": 0.7346, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.4428969359331476, |
| "grad_norm": 2.8011934757232666, |
| "learning_rate": 2.596100278551532e-05, |
| "loss": 0.7072, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.448467966573816, |
| "grad_norm": 2.4750354290008545, |
| "learning_rate": 2.5868152274837515e-05, |
| "loss": 0.5924, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.4540389972144847, |
| "grad_norm": 2.4504594802856445, |
| "learning_rate": 2.5775301764159704e-05, |
| "loss": 0.6229, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.4596100278551531, |
| "grad_norm": 6.350492000579834, |
| "learning_rate": 2.5682451253481893e-05, |
| "loss": 0.7909, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.4651810584958218, |
| "grad_norm": 3.0052127838134766, |
| "learning_rate": 2.5589600742804086e-05, |
| "loss": 0.6516, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.4707520891364902, |
| "grad_norm": 3.0360071659088135, |
| "learning_rate": 2.5496750232126275e-05, |
| "loss": 0.7202, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.4763231197771587, |
| "grad_norm": 5.270174026489258, |
| "learning_rate": 2.540389972144847e-05, |
| "loss": 0.7491, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.4818941504178273, |
| "grad_norm": 6.691749572753906, |
| "learning_rate": 2.531104921077066e-05, |
| "loss": 0.6879, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.4874651810584958, |
| "grad_norm": 3.4790680408477783, |
| "learning_rate": 2.5218198700092853e-05, |
| "loss": 0.7869, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.4930362116991645, |
| "grad_norm": 4.1881914138793945, |
| "learning_rate": 2.5125348189415042e-05, |
| "loss": 0.7038, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.498607242339833, |
| "grad_norm": 3.5954504013061523, |
| "learning_rate": 2.503249767873723e-05, |
| "loss": 0.8112, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.5041782729805013, |
| "grad_norm": 5.3881354331970215, |
| "learning_rate": 2.4939647168059427e-05, |
| "loss": 0.8329, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.5097493036211698, |
| "grad_norm": 2.5170273780822754, |
| "learning_rate": 2.4846796657381616e-05, |
| "loss": 0.5086, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.5153203342618384, |
| "grad_norm": 2.370330333709717, |
| "learning_rate": 2.475394614670381e-05, |
| "loss": 0.6625, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.520891364902507, |
| "grad_norm": 2.416132926940918, |
| "learning_rate": 2.4661095636025998e-05, |
| "loss": 0.7835, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.5264623955431755, |
| "grad_norm": 4.824071407318115, |
| "learning_rate": 2.456824512534819e-05, |
| "loss": 0.6605, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.532033426183844, |
| "grad_norm": 5.555083274841309, |
| "learning_rate": 2.4475394614670383e-05, |
| "loss": 0.675, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.5376044568245124, |
| "grad_norm": 5.953351020812988, |
| "learning_rate": 2.4382544103992576e-05, |
| "loss": 0.6572, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.543175487465181, |
| "grad_norm": 3.702897787094116, |
| "learning_rate": 2.4289693593314765e-05, |
| "loss": 0.7674, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.5487465181058497, |
| "grad_norm": 3.995495080947876, |
| "learning_rate": 2.4196843082636954e-05, |
| "loss": 0.7461, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.5543175487465182, |
| "grad_norm": 4.654627323150635, |
| "learning_rate": 2.4103992571959146e-05, |
| "loss": 0.7709, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.5598885793871866, |
| "grad_norm": 4.607250690460205, |
| "learning_rate": 2.401114206128134e-05, |
| "loss": 0.8664, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.565459610027855, |
| "grad_norm": 4.200578212738037, |
| "learning_rate": 2.391829155060353e-05, |
| "loss": 0.7295, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.5710306406685237, |
| "grad_norm": 3.5812277793884277, |
| "learning_rate": 2.382544103992572e-05, |
| "loss": 0.7653, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.5766016713091922, |
| "grad_norm": 3.6298224925994873, |
| "learning_rate": 2.373259052924791e-05, |
| "loss": 0.6548, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.5821727019498608, |
| "grad_norm": 3.4680569171905518, |
| "learning_rate": 2.3639740018570102e-05, |
| "loss": 0.699, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.5877437325905293, |
| "grad_norm": 3.139227867126465, |
| "learning_rate": 2.3546889507892295e-05, |
| "loss": 0.6505, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.5933147632311977, |
| "grad_norm": 4.027279853820801, |
| "learning_rate": 2.3454038997214488e-05, |
| "loss": 0.7388, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.5988857938718661, |
| "grad_norm": 3.547569990158081, |
| "learning_rate": 2.3361188486536677e-05, |
| "loss": 0.642, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.6044568245125348, |
| "grad_norm": 5.281991004943848, |
| "learning_rate": 2.3268337975858866e-05, |
| "loss": 0.6009, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.6100278551532035, |
| "grad_norm": 3.9563615322113037, |
| "learning_rate": 2.317548746518106e-05, |
| "loss": 0.695, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.615598885793872, |
| "grad_norm": 5.508429050445557, |
| "learning_rate": 2.308263695450325e-05, |
| "loss": 0.7169, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.6211699164345403, |
| "grad_norm": 4.158377647399902, |
| "learning_rate": 2.2989786443825444e-05, |
| "loss": 0.6337, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.6267409470752088, |
| "grad_norm": 4.293817520141602, |
| "learning_rate": 2.2896935933147633e-05, |
| "loss": 0.6769, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.6323119777158774, |
| "grad_norm": 3.0530028343200684, |
| "learning_rate": 2.2804085422469825e-05, |
| "loss": 0.6436, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.637883008356546, |
| "grad_norm": 3.3768982887268066, |
| "learning_rate": 2.2711234911792014e-05, |
| "loss": 0.6443, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.6434540389972145, |
| "grad_norm": 6.057602882385254, |
| "learning_rate": 2.2618384401114207e-05, |
| "loss": 0.6208, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.649025069637883, |
| "grad_norm": 7.647638320922852, |
| "learning_rate": 2.25255338904364e-05, |
| "loss": 0.8125, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.6545961002785514, |
| "grad_norm": 6.915018081665039, |
| "learning_rate": 2.243268337975859e-05, |
| "loss": 0.8279, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.66016713091922, |
| "grad_norm": 3.073479413986206, |
| "learning_rate": 2.233983286908078e-05, |
| "loss": 0.5983, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.6657381615598887, |
| "grad_norm": 6.27379035949707, |
| "learning_rate": 2.2246982358402974e-05, |
| "loss": 0.7413, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.6713091922005572, |
| "grad_norm": 2.8790829181671143, |
| "learning_rate": 2.2154131847725163e-05, |
| "loss": 0.5785, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.6768802228412256, |
| "grad_norm": 3.810025691986084, |
| "learning_rate": 2.2061281337047355e-05, |
| "loss": 0.6187, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.682451253481894, |
| "grad_norm": 4.597643852233887, |
| "learning_rate": 2.1968430826369545e-05, |
| "loss": 0.7871, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.6880222841225627, |
| "grad_norm": 4.470458507537842, |
| "learning_rate": 2.1875580315691737e-05, |
| "loss": 0.7397, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.6935933147632312, |
| "grad_norm": 3.957707643508911, |
| "learning_rate": 2.178272980501393e-05, |
| "loss": 0.6893, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.6991643454038998, |
| "grad_norm": 4.973650932312012, |
| "learning_rate": 2.1689879294336122e-05, |
| "loss": 0.601, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.7047353760445683, |
| "grad_norm": 3.1551012992858887, |
| "learning_rate": 2.159702878365831e-05, |
| "loss": 0.6792, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.7103064066852367, |
| "grad_norm": 21.842958450317383, |
| "learning_rate": 2.15041782729805e-05, |
| "loss": 0.7582, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.7158774373259051, |
| "grad_norm": 6.199334144592285, |
| "learning_rate": 2.1411327762302693e-05, |
| "loss": 0.635, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.7214484679665738, |
| "grad_norm": 5.763338088989258, |
| "learning_rate": 2.1318477251624886e-05, |
| "loss": 0.7636, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.7270194986072425, |
| "grad_norm": 3.1481924057006836, |
| "learning_rate": 2.1225626740947078e-05, |
| "loss": 0.7613, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.732590529247911, |
| "grad_norm": 2.0901215076446533, |
| "learning_rate": 2.1132776230269267e-05, |
| "loss": 0.6326, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.7381615598885793, |
| "grad_norm": 3.8175764083862305, |
| "learning_rate": 2.1039925719591457e-05, |
| "loss": 0.668, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.7437325905292478, |
| "grad_norm": 5.669551849365234, |
| "learning_rate": 2.094707520891365e-05, |
| "loss": 0.6897, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.7493036211699164, |
| "grad_norm": 2.3246147632598877, |
| "learning_rate": 2.085422469823584e-05, |
| "loss": 0.9081, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.754874651810585, |
| "grad_norm": 4.618974685668945, |
| "learning_rate": 2.0761374187558034e-05, |
| "loss": 0.7226, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.7604456824512535, |
| "grad_norm": 3.3900840282440186, |
| "learning_rate": 2.0668523676880223e-05, |
| "loss": 0.719, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.766016713091922, |
| "grad_norm": 6.443446159362793, |
| "learning_rate": 2.0575673166202413e-05, |
| "loss": 0.6676, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.7715877437325904, |
| "grad_norm": 3.93987774848938, |
| "learning_rate": 2.0482822655524605e-05, |
| "loss": 0.6198, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.777158774373259, |
| "grad_norm": 3.811434030532837, |
| "learning_rate": 2.0389972144846798e-05, |
| "loss": 0.7962, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.7827298050139275, |
| "grad_norm": 6.011105537414551, |
| "learning_rate": 2.029712163416899e-05, |
| "loss": 0.6282, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.7883008356545962, |
| "grad_norm": 4.521935939788818, |
| "learning_rate": 2.020427112349118e-05, |
| "loss": 0.6502, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.7938718662952646, |
| "grad_norm": 6.527938365936279, |
| "learning_rate": 2.0111420612813372e-05, |
| "loss": 0.731, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.799442896935933, |
| "grad_norm": 3.292177438735962, |
| "learning_rate": 2.001857010213556e-05, |
| "loss": 0.6674, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.8050139275766015, |
| "grad_norm": 4.2430548667907715, |
| "learning_rate": 1.9925719591457754e-05, |
| "loss": 0.6192, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.8105849582172702, |
| "grad_norm": 2.8606340885162354, |
| "learning_rate": 1.9832869080779946e-05, |
| "loss": 0.724, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.8161559888579388, |
| "grad_norm": 5.849732398986816, |
| "learning_rate": 1.9740018570102135e-05, |
| "loss": 0.6134, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.8217270194986073, |
| "grad_norm": 2.940549850463867, |
| "learning_rate": 1.9647168059424328e-05, |
| "loss": 0.5696, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.8272980501392757, |
| "grad_norm": 5.734061241149902, |
| "learning_rate": 1.955431754874652e-05, |
| "loss": 0.6265, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.8328690807799441, |
| "grad_norm": 4.678475379943848, |
| "learning_rate": 1.946146703806871e-05, |
| "loss": 0.543, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.8384401114206128, |
| "grad_norm": 5.988973617553711, |
| "learning_rate": 1.9368616527390902e-05, |
| "loss": 0.5986, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.8440111420612815, |
| "grad_norm": 5.648441314697266, |
| "learning_rate": 1.927576601671309e-05, |
| "loss": 0.7231, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.84958217270195, |
| "grad_norm": 3.596468448638916, |
| "learning_rate": 1.9182915506035284e-05, |
| "loss": 0.7182, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.8551532033426184, |
| "grad_norm": 3.6702165603637695, |
| "learning_rate": 1.9090064995357476e-05, |
| "loss": 0.6835, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.8607242339832868, |
| "grad_norm": 5.897074222564697, |
| "learning_rate": 1.899721448467967e-05, |
| "loss": 0.6812, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.8662952646239555, |
| "grad_norm": 5.111124515533447, |
| "learning_rate": 1.8904363974001858e-05, |
| "loss": 0.7924, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.8718662952646241, |
| "grad_norm": 5.206398010253906, |
| "learning_rate": 1.8811513463324047e-05, |
| "loss": 0.6493, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.8774373259052926, |
| "grad_norm": 3.4764597415924072, |
| "learning_rate": 1.871866295264624e-05, |
| "loss": 0.6969, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.883008356545961, |
| "grad_norm": 3.6830670833587646, |
| "learning_rate": 1.8625812441968432e-05, |
| "loss": 0.6874, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.8885793871866294, |
| "grad_norm": 5.803642272949219, |
| "learning_rate": 1.8532961931290625e-05, |
| "loss": 0.5756, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.894150417827298, |
| "grad_norm": 5.948879718780518, |
| "learning_rate": 1.8440111420612814e-05, |
| "loss": 0.741, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.8997214484679665, |
| "grad_norm": 5.844179630279541, |
| "learning_rate": 1.8347260909935003e-05, |
| "loss": 0.5393, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.9052924791086352, |
| "grad_norm": 4.613275051116943, |
| "learning_rate": 1.8254410399257196e-05, |
| "loss": 0.8167, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.9108635097493036, |
| "grad_norm": 4.484188079833984, |
| "learning_rate": 1.816155988857939e-05, |
| "loss": 0.6047, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.916434540389972, |
| "grad_norm": 5.882885932922363, |
| "learning_rate": 1.806870937790158e-05, |
| "loss": 0.5126, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.9220055710306405, |
| "grad_norm": 4.281798839569092, |
| "learning_rate": 1.797585886722377e-05, |
| "loss": 0.6512, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.9275766016713092, |
| "grad_norm": 7.347241401672363, |
| "learning_rate": 1.788300835654596e-05, |
| "loss": 0.7092, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.9331476323119778, |
| "grad_norm": 5.490599155426025, |
| "learning_rate": 1.7790157845868152e-05, |
| "loss": 0.7738, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.9387186629526463, |
| "grad_norm": 4.461576461791992, |
| "learning_rate": 1.7697307335190344e-05, |
| "loss": 0.7116, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.9442896935933147, |
| "grad_norm": 3.0554678440093994, |
| "learning_rate": 1.7604456824512537e-05, |
| "loss": 0.7397, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.9498607242339832, |
| "grad_norm": 3.393239736557007, |
| "learning_rate": 1.7511606313834726e-05, |
| "loss": 0.7141, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.9554317548746518, |
| "grad_norm": 3.1929807662963867, |
| "learning_rate": 1.74280408542247e-05, |
| "loss": 0.6955, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.9610027855153205, |
| "grad_norm": 5.299395561218262, |
| "learning_rate": 1.733519034354689e-05, |
| "loss": 0.6491, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.966573816155989, |
| "grad_norm": 3.9555628299713135, |
| "learning_rate": 1.7242339832869082e-05, |
| "loss": 0.6156, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.9721448467966574, |
| "grad_norm": 4.251757621765137, |
| "learning_rate": 1.7149489322191274e-05, |
| "loss": 0.6155, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.9777158774373258, |
| "grad_norm": 7.542901515960693, |
| "learning_rate": 1.7056638811513463e-05, |
| "loss": 0.7173, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.9832869080779945, |
| "grad_norm": 4.958390712738037, |
| "learning_rate": 1.6963788300835656e-05, |
| "loss": 0.5184, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.988857938718663, |
| "grad_norm": 4.884737491607666, |
| "learning_rate": 1.687093779015785e-05, |
| "loss": 0.6729, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.9944289693593316, |
| "grad_norm": 4.648261547088623, |
| "learning_rate": 1.6778087279480038e-05, |
| "loss": 0.6181, |
| "step": 3580 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 6.863659381866455, |
| "learning_rate": 1.668523676880223e-05, |
| "loss": 0.5198, |
| "step": 3590 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.7071607690164391, |
| "eval_loss": 0.817263662815094, |
| "eval_runtime": 37.2879, |
| "eval_samples_per_second": 96.251, |
| "eval_steps_per_second": 6.034, |
| "step": 3590 |
| }, |
| { |
| "epoch": 2.0055710306406684, |
| "grad_norm": 4.615173816680908, |
| "learning_rate": 1.659238625812442e-05, |
| "loss": 0.5817, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.011142061281337, |
| "grad_norm": 2.830014944076538, |
| "learning_rate": 1.6499535747446612e-05, |
| "loss": 0.4535, |
| "step": 3610 |
| }, |
| { |
| "epoch": 2.0167130919220058, |
| "grad_norm": 4.435030460357666, |
| "learning_rate": 1.6406685236768805e-05, |
| "loss": 0.4874, |
| "step": 3620 |
| }, |
| { |
| "epoch": 2.022284122562674, |
| "grad_norm": 6.543429374694824, |
| "learning_rate": 1.6313834726090994e-05, |
| "loss": 0.5775, |
| "step": 3630 |
| }, |
| { |
| "epoch": 2.0278551532033426, |
| "grad_norm": 4.525574684143066, |
| "learning_rate": 1.6220984215413186e-05, |
| "loss": 0.6027, |
| "step": 3640 |
| }, |
| { |
| "epoch": 2.033426183844011, |
| "grad_norm": 10.019755363464355, |
| "learning_rate": 1.6128133704735375e-05, |
| "loss": 0.4922, |
| "step": 3650 |
| }, |
| { |
| "epoch": 2.0389972144846795, |
| "grad_norm": 4.950735569000244, |
| "learning_rate": 1.6035283194057568e-05, |
| "loss": 0.4555, |
| "step": 3660 |
| }, |
| { |
| "epoch": 2.0445682451253484, |
| "grad_norm": 2.9464316368103027, |
| "learning_rate": 1.594243268337976e-05, |
| "loss": 0.454, |
| "step": 3670 |
| }, |
| { |
| "epoch": 2.050139275766017, |
| "grad_norm": 10.776598930358887, |
| "learning_rate": 1.5849582172701953e-05, |
| "loss": 0.4013, |
| "step": 3680 |
| }, |
| { |
| "epoch": 2.0557103064066853, |
| "grad_norm": 4.557794094085693, |
| "learning_rate": 1.5756731662024142e-05, |
| "loss": 0.4761, |
| "step": 3690 |
| }, |
| { |
| "epoch": 2.0612813370473537, |
| "grad_norm": 4.7235107421875, |
| "learning_rate": 1.566388115134633e-05, |
| "loss": 0.4825, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.066852367688022, |
| "grad_norm": 3.367194414138794, |
| "learning_rate": 1.5571030640668524e-05, |
| "loss": 0.5587, |
| "step": 3710 |
| }, |
| { |
| "epoch": 2.0724233983286906, |
| "grad_norm": 4.634751319885254, |
| "learning_rate": 1.5478180129990717e-05, |
| "loss": 0.5894, |
| "step": 3720 |
| }, |
| { |
| "epoch": 2.0779944289693595, |
| "grad_norm": 3.5962271690368652, |
| "learning_rate": 1.538532961931291e-05, |
| "loss": 0.5022, |
| "step": 3730 |
| }, |
| { |
| "epoch": 2.083565459610028, |
| "grad_norm": 2.4621849060058594, |
| "learning_rate": 1.5292479108635098e-05, |
| "loss": 0.3771, |
| "step": 3740 |
| }, |
| { |
| "epoch": 2.0891364902506964, |
| "grad_norm": 0.884714663028717, |
| "learning_rate": 1.5199628597957289e-05, |
| "loss": 0.4954, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.094707520891365, |
| "grad_norm": 3.478480577468872, |
| "learning_rate": 1.510677808727948e-05, |
| "loss": 0.504, |
| "step": 3760 |
| }, |
| { |
| "epoch": 2.1002785515320332, |
| "grad_norm": 4.723543643951416, |
| "learning_rate": 1.5013927576601673e-05, |
| "loss": 0.4238, |
| "step": 3770 |
| }, |
| { |
| "epoch": 2.105849582172702, |
| "grad_norm": 6.865334510803223, |
| "learning_rate": 1.4921077065923863e-05, |
| "loss": 0.5369, |
| "step": 3780 |
| }, |
| { |
| "epoch": 2.1114206128133706, |
| "grad_norm": 3.348172664642334, |
| "learning_rate": 1.4828226555246056e-05, |
| "loss": 0.4709, |
| "step": 3790 |
| }, |
| { |
| "epoch": 2.116991643454039, |
| "grad_norm": 3.8873541355133057, |
| "learning_rate": 1.4735376044568247e-05, |
| "loss": 0.3504, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.1225626740947074, |
| "grad_norm": 6.476009845733643, |
| "learning_rate": 1.4642525533890436e-05, |
| "loss": 0.378, |
| "step": 3810 |
| }, |
| { |
| "epoch": 2.128133704735376, |
| "grad_norm": 4.0402445793151855, |
| "learning_rate": 1.4549675023212628e-05, |
| "loss": 0.4167, |
| "step": 3820 |
| }, |
| { |
| "epoch": 2.1337047353760448, |
| "grad_norm": 5.197554588317871, |
| "learning_rate": 1.445682451253482e-05, |
| "loss": 0.5053, |
| "step": 3830 |
| }, |
| { |
| "epoch": 2.139275766016713, |
| "grad_norm": 3.450927495956421, |
| "learning_rate": 1.4363974001857012e-05, |
| "loss": 0.4532, |
| "step": 3840 |
| }, |
| { |
| "epoch": 2.1448467966573816, |
| "grad_norm": 4.431534767150879, |
| "learning_rate": 1.4271123491179203e-05, |
| "loss": 0.5658, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.15041782729805, |
| "grad_norm": 3.2116923332214355, |
| "learning_rate": 1.4178272980501395e-05, |
| "loss": 0.3628, |
| "step": 3860 |
| }, |
| { |
| "epoch": 2.1559888579387185, |
| "grad_norm": 3.2377572059631348, |
| "learning_rate": 1.4085422469823584e-05, |
| "loss": 0.3513, |
| "step": 3870 |
| }, |
| { |
| "epoch": 2.1615598885793874, |
| "grad_norm": 6.522520065307617, |
| "learning_rate": 1.3992571959145775e-05, |
| "loss": 0.4474, |
| "step": 3880 |
| }, |
| { |
| "epoch": 2.167130919220056, |
| "grad_norm": 4.152089595794678, |
| "learning_rate": 1.3899721448467968e-05, |
| "loss": 0.4717, |
| "step": 3890 |
| }, |
| { |
| "epoch": 2.1727019498607243, |
| "grad_norm": 3.9194087982177734, |
| "learning_rate": 1.3806870937790159e-05, |
| "loss": 0.4681, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.1782729805013927, |
| "grad_norm": 3.955049991607666, |
| "learning_rate": 1.3714020427112351e-05, |
| "loss": 0.4825, |
| "step": 3910 |
| }, |
| { |
| "epoch": 2.183844011142061, |
| "grad_norm": 7.73003625869751, |
| "learning_rate": 1.362116991643454e-05, |
| "loss": 0.4505, |
| "step": 3920 |
| }, |
| { |
| "epoch": 2.1894150417827296, |
| "grad_norm": 7.201216220855713, |
| "learning_rate": 1.3528319405756731e-05, |
| "loss": 0.5176, |
| "step": 3930 |
| }, |
| { |
| "epoch": 2.1949860724233985, |
| "grad_norm": 4.639294624328613, |
| "learning_rate": 1.3435468895078924e-05, |
| "loss": 0.4535, |
| "step": 3940 |
| }, |
| { |
| "epoch": 2.200557103064067, |
| "grad_norm": 4.728760242462158, |
| "learning_rate": 1.3342618384401115e-05, |
| "loss": 0.5659, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.2061281337047354, |
| "grad_norm": 2.1953437328338623, |
| "learning_rate": 1.3249767873723307e-05, |
| "loss": 0.3706, |
| "step": 3960 |
| }, |
| { |
| "epoch": 2.211699164345404, |
| "grad_norm": 2.9534456729888916, |
| "learning_rate": 1.3156917363045498e-05, |
| "loss": 0.5308, |
| "step": 3970 |
| }, |
| { |
| "epoch": 2.2172701949860723, |
| "grad_norm": 6.660586833953857, |
| "learning_rate": 1.3064066852367687e-05, |
| "loss": 0.4657, |
| "step": 3980 |
| }, |
| { |
| "epoch": 2.222841225626741, |
| "grad_norm": 3.0846714973449707, |
| "learning_rate": 1.297121634168988e-05, |
| "loss": 0.4446, |
| "step": 3990 |
| }, |
| { |
| "epoch": 2.2284122562674096, |
| "grad_norm": 9.60133171081543, |
| "learning_rate": 1.287836583101207e-05, |
| "loss": 0.5949, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.233983286908078, |
| "grad_norm": 2.2764580249786377, |
| "learning_rate": 1.2785515320334263e-05, |
| "loss": 0.4514, |
| "step": 4010 |
| }, |
| { |
| "epoch": 2.2395543175487465, |
| "grad_norm": 2.598289728164673, |
| "learning_rate": 1.2692664809656454e-05, |
| "loss": 0.4929, |
| "step": 4020 |
| }, |
| { |
| "epoch": 2.245125348189415, |
| "grad_norm": 2.7438085079193115, |
| "learning_rate": 1.2599814298978647e-05, |
| "loss": 0.5142, |
| "step": 4030 |
| }, |
| { |
| "epoch": 2.2506963788300833, |
| "grad_norm": 2.9122653007507324, |
| "learning_rate": 1.2506963788300836e-05, |
| "loss": 0.4944, |
| "step": 4040 |
| }, |
| { |
| "epoch": 2.256267409470752, |
| "grad_norm": 4.093932151794434, |
| "learning_rate": 1.2414113277623028e-05, |
| "loss": 0.4334, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.2618384401114207, |
| "grad_norm": 5.076513290405273, |
| "learning_rate": 1.232126276694522e-05, |
| "loss": 0.4213, |
| "step": 4060 |
| }, |
| { |
| "epoch": 2.267409470752089, |
| "grad_norm": 3.2467005252838135, |
| "learning_rate": 1.222841225626741e-05, |
| "loss": 0.6094, |
| "step": 4070 |
| }, |
| { |
| "epoch": 2.2729805013927575, |
| "grad_norm": 2.402123212814331, |
| "learning_rate": 1.2135561745589603e-05, |
| "loss": 0.4469, |
| "step": 4080 |
| }, |
| { |
| "epoch": 2.2785515320334264, |
| "grad_norm": 4.679557800292969, |
| "learning_rate": 1.2042711234911792e-05, |
| "loss": 0.4029, |
| "step": 4090 |
| }, |
| { |
| "epoch": 2.284122562674095, |
| "grad_norm": 1.4909316301345825, |
| "learning_rate": 1.1949860724233984e-05, |
| "loss": 0.3351, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.2896935933147633, |
| "grad_norm": 6.870358467102051, |
| "learning_rate": 1.1857010213556175e-05, |
| "loss": 0.5222, |
| "step": 4110 |
| }, |
| { |
| "epoch": 2.2952646239554317, |
| "grad_norm": 1.7165311574935913, |
| "learning_rate": 1.1764159702878366e-05, |
| "loss": 0.326, |
| "step": 4120 |
| }, |
| { |
| "epoch": 2.3008356545961, |
| "grad_norm": 5.359385967254639, |
| "learning_rate": 1.1671309192200559e-05, |
| "loss": 0.36, |
| "step": 4130 |
| }, |
| { |
| "epoch": 2.3064066852367686, |
| "grad_norm": 4.74082612991333, |
| "learning_rate": 1.1578458681522748e-05, |
| "loss": 0.5068, |
| "step": 4140 |
| }, |
| { |
| "epoch": 2.3119777158774375, |
| "grad_norm": 2.6061348915100098, |
| "learning_rate": 1.148560817084494e-05, |
| "loss": 0.4599, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.317548746518106, |
| "grad_norm": 6.639983177185059, |
| "learning_rate": 1.1392757660167131e-05, |
| "loss": 0.5885, |
| "step": 4160 |
| }, |
| { |
| "epoch": 2.3231197771587744, |
| "grad_norm": 3.9153246879577637, |
| "learning_rate": 1.1299907149489322e-05, |
| "loss": 0.418, |
| "step": 4170 |
| }, |
| { |
| "epoch": 2.328690807799443, |
| "grad_norm": 7.114092826843262, |
| "learning_rate": 1.1207056638811515e-05, |
| "loss": 0.4293, |
| "step": 4180 |
| }, |
| { |
| "epoch": 2.3342618384401113, |
| "grad_norm": 3.978114366531372, |
| "learning_rate": 1.1114206128133705e-05, |
| "loss": 0.4194, |
| "step": 4190 |
| }, |
| { |
| "epoch": 2.33983286908078, |
| "grad_norm": 3.607412099838257, |
| "learning_rate": 1.1021355617455896e-05, |
| "loss": 0.4783, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.3454038997214486, |
| "grad_norm": 6.030869960784912, |
| "learning_rate": 1.0928505106778087e-05, |
| "loss": 0.421, |
| "step": 4210 |
| }, |
| { |
| "epoch": 2.350974930362117, |
| "grad_norm": 2.8264944553375244, |
| "learning_rate": 1.083565459610028e-05, |
| "loss": 0.4424, |
| "step": 4220 |
| }, |
| { |
| "epoch": 2.3565459610027855, |
| "grad_norm": 3.267326593399048, |
| "learning_rate": 1.074280408542247e-05, |
| "loss": 0.4451, |
| "step": 4230 |
| }, |
| { |
| "epoch": 2.362116991643454, |
| "grad_norm": 3.6425423622131348, |
| "learning_rate": 1.0649953574744661e-05, |
| "loss": 0.5231, |
| "step": 4240 |
| }, |
| { |
| "epoch": 2.3676880222841223, |
| "grad_norm": 3.3133020401000977, |
| "learning_rate": 1.0557103064066854e-05, |
| "loss": 0.5509, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.3732590529247912, |
| "grad_norm": 3.444788694381714, |
| "learning_rate": 1.0464252553389043e-05, |
| "loss": 0.2895, |
| "step": 4260 |
| }, |
| { |
| "epoch": 2.3788300835654597, |
| "grad_norm": 7.917379856109619, |
| "learning_rate": 1.0371402042711236e-05, |
| "loss": 0.5842, |
| "step": 4270 |
| }, |
| { |
| "epoch": 2.384401114206128, |
| "grad_norm": 4.689010143280029, |
| "learning_rate": 1.0278551532033427e-05, |
| "loss": 0.467, |
| "step": 4280 |
| }, |
| { |
| "epoch": 2.3899721448467965, |
| "grad_norm": 4.501764297485352, |
| "learning_rate": 1.0185701021355617e-05, |
| "loss": 0.4238, |
| "step": 4290 |
| }, |
| { |
| "epoch": 2.3955431754874654, |
| "grad_norm": 4.01366662979126, |
| "learning_rate": 1.009285051067781e-05, |
| "loss": 0.4781, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.401114206128134, |
| "grad_norm": 11.674251556396484, |
| "learning_rate": 1e-05, |
| "loss": 0.403, |
| "step": 4310 |
| }, |
| { |
| "epoch": 2.4066852367688023, |
| "grad_norm": 5.943558692932129, |
| "learning_rate": 9.907149489322192e-06, |
| "loss": 0.3621, |
| "step": 4320 |
| }, |
| { |
| "epoch": 2.4122562674094707, |
| "grad_norm": 1.9158827066421509, |
| "learning_rate": 9.814298978644383e-06, |
| "loss": 0.3801, |
| "step": 4330 |
| }, |
| { |
| "epoch": 2.417827298050139, |
| "grad_norm": 3.426053047180176, |
| "learning_rate": 9.721448467966575e-06, |
| "loss": 0.419, |
| "step": 4340 |
| }, |
| { |
| "epoch": 2.4233983286908076, |
| "grad_norm": 3.975104570388794, |
| "learning_rate": 9.628597957288766e-06, |
| "loss": 0.414, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.4289693593314765, |
| "grad_norm": 2.1982619762420654, |
| "learning_rate": 9.535747446610957e-06, |
| "loss": 0.4647, |
| "step": 4360 |
| }, |
| { |
| "epoch": 2.434540389972145, |
| "grad_norm": 4.273013591766357, |
| "learning_rate": 9.44289693593315e-06, |
| "loss": 0.5589, |
| "step": 4370 |
| }, |
| { |
| "epoch": 2.4401114206128134, |
| "grad_norm": 3.543038845062256, |
| "learning_rate": 9.350046425255339e-06, |
| "loss": 0.4927, |
| "step": 4380 |
| }, |
| { |
| "epoch": 2.445682451253482, |
| "grad_norm": 3.844630718231201, |
| "learning_rate": 9.257195914577531e-06, |
| "loss": 0.3665, |
| "step": 4390 |
| }, |
| { |
| "epoch": 2.4512534818941503, |
| "grad_norm": 5.011111736297607, |
| "learning_rate": 9.164345403899722e-06, |
| "loss": 0.4216, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.456824512534819, |
| "grad_norm": 5.382537841796875, |
| "learning_rate": 9.071494893221913e-06, |
| "loss": 0.4904, |
| "step": 4410 |
| }, |
| { |
| "epoch": 2.4623955431754876, |
| "grad_norm": 2.391098976135254, |
| "learning_rate": 8.978644382544105e-06, |
| "loss": 0.4759, |
| "step": 4420 |
| }, |
| { |
| "epoch": 2.467966573816156, |
| "grad_norm": 3.50219464302063, |
| "learning_rate": 8.885793871866294e-06, |
| "loss": 0.4777, |
| "step": 4430 |
| }, |
| { |
| "epoch": 2.4735376044568245, |
| "grad_norm": 4.964571475982666, |
| "learning_rate": 8.792943361188487e-06, |
| "loss": 0.3474, |
| "step": 4440 |
| }, |
| { |
| "epoch": 2.479108635097493, |
| "grad_norm": 5.0892510414123535, |
| "learning_rate": 8.700092850510678e-06, |
| "loss": 0.3845, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.4846796657381613, |
| "grad_norm": 6.2226362228393555, |
| "learning_rate": 8.607242339832869e-06, |
| "loss": 0.5149, |
| "step": 4460 |
| }, |
| { |
| "epoch": 2.4902506963788302, |
| "grad_norm": 5.676084995269775, |
| "learning_rate": 8.514391829155061e-06, |
| "loss": 0.3087, |
| "step": 4470 |
| }, |
| { |
| "epoch": 2.4958217270194987, |
| "grad_norm": 5.1531982421875, |
| "learning_rate": 8.421541318477252e-06, |
| "loss": 0.5312, |
| "step": 4480 |
| }, |
| { |
| "epoch": 2.501392757660167, |
| "grad_norm": 4.618714332580566, |
| "learning_rate": 8.328690807799443e-06, |
| "loss": 0.4738, |
| "step": 4490 |
| }, |
| { |
| "epoch": 2.5069637883008355, |
| "grad_norm": 3.6886181831359863, |
| "learning_rate": 8.235840297121634e-06, |
| "loss": 0.5054, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.5125348189415044, |
| "grad_norm": 6.665517330169678, |
| "learning_rate": 8.142989786443826e-06, |
| "loss": 0.4265, |
| "step": 4510 |
| }, |
| { |
| "epoch": 2.518105849582173, |
| "grad_norm": 4.075329303741455, |
| "learning_rate": 8.050139275766017e-06, |
| "loss": 0.414, |
| "step": 4520 |
| }, |
| { |
| "epoch": 2.5236768802228413, |
| "grad_norm": 4.333959579467773, |
| "learning_rate": 7.957288765088208e-06, |
| "loss": 0.4542, |
| "step": 4530 |
| }, |
| { |
| "epoch": 2.5292479108635098, |
| "grad_norm": 4.7477827072143555, |
| "learning_rate": 7.8644382544104e-06, |
| "loss": 0.4223, |
| "step": 4540 |
| }, |
| { |
| "epoch": 2.534818941504178, |
| "grad_norm": 4.310934066772461, |
| "learning_rate": 7.77158774373259e-06, |
| "loss": 0.342, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.5403899721448466, |
| "grad_norm": 3.5395619869232178, |
| "learning_rate": 7.678737233054782e-06, |
| "loss": 0.4755, |
| "step": 4560 |
| }, |
| { |
| "epoch": 2.545961002785515, |
| "grad_norm": 4.999756813049316, |
| "learning_rate": 7.585886722376974e-06, |
| "loss": 0.4619, |
| "step": 4570 |
| }, |
| { |
| "epoch": 2.551532033426184, |
| "grad_norm": 5.0525383949279785, |
| "learning_rate": 7.493036211699164e-06, |
| "loss": 0.4846, |
| "step": 4580 |
| }, |
| { |
| "epoch": 2.5571030640668524, |
| "grad_norm": 8.564767837524414, |
| "learning_rate": 7.400185701021356e-06, |
| "loss": 0.4261, |
| "step": 4590 |
| }, |
| { |
| "epoch": 2.562674094707521, |
| "grad_norm": 4.90235710144043, |
| "learning_rate": 7.3073351903435475e-06, |
| "loss": 0.435, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.5682451253481893, |
| "grad_norm": 5.1071672439575195, |
| "learning_rate": 7.214484679665738e-06, |
| "loss": 0.4378, |
| "step": 4610 |
| }, |
| { |
| "epoch": 2.573816155988858, |
| "grad_norm": 4.659866809844971, |
| "learning_rate": 7.12163416898793e-06, |
| "loss": 0.4755, |
| "step": 4620 |
| }, |
| { |
| "epoch": 2.5793871866295266, |
| "grad_norm": 6.740070343017578, |
| "learning_rate": 7.028783658310122e-06, |
| "loss": 0.5263, |
| "step": 4630 |
| }, |
| { |
| "epoch": 2.584958217270195, |
| "grad_norm": 7.245567321777344, |
| "learning_rate": 6.935933147632312e-06, |
| "loss": 0.4753, |
| "step": 4640 |
| }, |
| { |
| "epoch": 2.5905292479108635, |
| "grad_norm": 3.481055736541748, |
| "learning_rate": 6.8430826369545035e-06, |
| "loss": 0.334, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.596100278551532, |
| "grad_norm": 9.103506088256836, |
| "learning_rate": 6.750232126276695e-06, |
| "loss": 0.5612, |
| "step": 4660 |
| }, |
| { |
| "epoch": 2.6016713091922004, |
| "grad_norm": 2.4863336086273193, |
| "learning_rate": 6.657381615598886e-06, |
| "loss": 0.4572, |
| "step": 4670 |
| }, |
| { |
| "epoch": 2.6072423398328692, |
| "grad_norm": 3.1609058380126953, |
| "learning_rate": 6.564531104921078e-06, |
| "loss": 0.4082, |
| "step": 4680 |
| }, |
| { |
| "epoch": 2.6128133704735377, |
| "grad_norm": 4.247745037078857, |
| "learning_rate": 6.4716805942432695e-06, |
| "loss": 0.5278, |
| "step": 4690 |
| }, |
| { |
| "epoch": 2.618384401114206, |
| "grad_norm": 5.853703022003174, |
| "learning_rate": 6.3788300835654595e-06, |
| "loss": 0.4127, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.6239554317548746, |
| "grad_norm": 4.09820032119751, |
| "learning_rate": 6.285979572887651e-06, |
| "loss": 0.399, |
| "step": 4710 |
| }, |
| { |
| "epoch": 2.6295264623955434, |
| "grad_norm": 4.088954925537109, |
| "learning_rate": 6.193129062209842e-06, |
| "loss": 0.3394, |
| "step": 4720 |
| }, |
| { |
| "epoch": 2.635097493036212, |
| "grad_norm": 5.091104984283447, |
| "learning_rate": 6.100278551532034e-06, |
| "loss": 0.4218, |
| "step": 4730 |
| }, |
| { |
| "epoch": 2.6406685236768803, |
| "grad_norm": 4.649893760681152, |
| "learning_rate": 6.0074280408542255e-06, |
| "loss": 0.4801, |
| "step": 4740 |
| }, |
| { |
| "epoch": 2.6462395543175488, |
| "grad_norm": 6.470804691314697, |
| "learning_rate": 5.914577530176416e-06, |
| "loss": 0.5307, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.651810584958217, |
| "grad_norm": 3.143728017807007, |
| "learning_rate": 5.821727019498607e-06, |
| "loss": 0.3806, |
| "step": 4760 |
| }, |
| { |
| "epoch": 2.6573816155988856, |
| "grad_norm": 3.0394294261932373, |
| "learning_rate": 5.728876508820799e-06, |
| "loss": 0.4565, |
| "step": 4770 |
| }, |
| { |
| "epoch": 2.662952646239554, |
| "grad_norm": 4.775937557220459, |
| "learning_rate": 5.63602599814299e-06, |
| "loss": 0.3728, |
| "step": 4780 |
| }, |
| { |
| "epoch": 2.668523676880223, |
| "grad_norm": 3.5294461250305176, |
| "learning_rate": 5.5431754874651814e-06, |
| "loss": 0.4864, |
| "step": 4790 |
| }, |
| { |
| "epoch": 2.6740947075208914, |
| "grad_norm": 6.9017486572265625, |
| "learning_rate": 5.450324976787373e-06, |
| "loss": 0.4679, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.67966573816156, |
| "grad_norm": 2.8387668132781982, |
| "learning_rate": 5.357474466109564e-06, |
| "loss": 0.2643, |
| "step": 4810 |
| }, |
| { |
| "epoch": 2.6852367688022283, |
| "grad_norm": 5.770737171173096, |
| "learning_rate": 5.264623955431755e-06, |
| "loss": 0.4973, |
| "step": 4820 |
| }, |
| { |
| "epoch": 2.690807799442897, |
| "grad_norm": 2.5987281799316406, |
| "learning_rate": 5.171773444753946e-06, |
| "loss": 0.4445, |
| "step": 4830 |
| }, |
| { |
| "epoch": 2.6963788300835656, |
| "grad_norm": 5.727529048919678, |
| "learning_rate": 5.078922934076137e-06, |
| "loss": 0.4761, |
| "step": 4840 |
| }, |
| { |
| "epoch": 2.701949860724234, |
| "grad_norm": 3.2433037757873535, |
| "learning_rate": 4.986072423398329e-06, |
| "loss": 0.5198, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.7075208913649025, |
| "grad_norm": 2.264634847640991, |
| "learning_rate": 4.89322191272052e-06, |
| "loss": 0.3812, |
| "step": 4860 |
| }, |
| { |
| "epoch": 2.713091922005571, |
| "grad_norm": 3.4057681560516357, |
| "learning_rate": 4.800371402042712e-06, |
| "loss": 0.5236, |
| "step": 4870 |
| }, |
| { |
| "epoch": 2.7186629526462394, |
| "grad_norm": 5.159498691558838, |
| "learning_rate": 4.7075208913649025e-06, |
| "loss": 0.4401, |
| "step": 4880 |
| }, |
| { |
| "epoch": 2.724233983286908, |
| "grad_norm": 6.718446731567383, |
| "learning_rate": 4.614670380687093e-06, |
| "loss": 0.4825, |
| "step": 4890 |
| }, |
| { |
| "epoch": 2.7298050139275767, |
| "grad_norm": 4.028508186340332, |
| "learning_rate": 4.521819870009285e-06, |
| "loss": 0.3836, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.735376044568245, |
| "grad_norm": 2.027120351791382, |
| "learning_rate": 4.428969359331477e-06, |
| "loss": 0.3456, |
| "step": 4910 |
| }, |
| { |
| "epoch": 2.7409470752089136, |
| "grad_norm": 3.1550886631011963, |
| "learning_rate": 4.336118848653668e-06, |
| "loss": 0.3802, |
| "step": 4920 |
| }, |
| { |
| "epoch": 2.7465181058495824, |
| "grad_norm": 5.188984394073486, |
| "learning_rate": 4.243268337975859e-06, |
| "loss": 0.383, |
| "step": 4930 |
| }, |
| { |
| "epoch": 2.752089136490251, |
| "grad_norm": 6.997900009155273, |
| "learning_rate": 4.15041782729805e-06, |
| "loss": 0.4606, |
| "step": 4940 |
| }, |
| { |
| "epoch": 2.7576601671309193, |
| "grad_norm": 4.508044719696045, |
| "learning_rate": 4.057567316620241e-06, |
| "loss": 0.4017, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.7632311977715878, |
| "grad_norm": 6.631628036499023, |
| "learning_rate": 3.964716805942433e-06, |
| "loss": 0.398, |
| "step": 4960 |
| }, |
| { |
| "epoch": 2.768802228412256, |
| "grad_norm": 7.107889175415039, |
| "learning_rate": 3.8718662952646245e-06, |
| "loss": 0.4823, |
| "step": 4970 |
| }, |
| { |
| "epoch": 2.7743732590529246, |
| "grad_norm": 4.978562355041504, |
| "learning_rate": 3.7790157845868153e-06, |
| "loss": 0.3995, |
| "step": 4980 |
| }, |
| { |
| "epoch": 2.779944289693593, |
| "grad_norm": 5.335329055786133, |
| "learning_rate": 3.686165273909006e-06, |
| "loss": 0.39, |
| "step": 4990 |
| }, |
| { |
| "epoch": 2.785515320334262, |
| "grad_norm": 3.1053812503814697, |
| "learning_rate": 3.593314763231198e-06, |
| "loss": 0.3362, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.7910863509749304, |
| "grad_norm": 4.562804222106934, |
| "learning_rate": 3.500464252553389e-06, |
| "loss": 0.3663, |
| "step": 5010 |
| }, |
| { |
| "epoch": 2.796657381615599, |
| "grad_norm": 2.6694986820220947, |
| "learning_rate": 3.40761374187558e-06, |
| "loss": 0.3584, |
| "step": 5020 |
| }, |
| { |
| "epoch": 2.8022284122562673, |
| "grad_norm": 5.513758659362793, |
| "learning_rate": 3.3147632311977717e-06, |
| "loss": 0.4915, |
| "step": 5030 |
| }, |
| { |
| "epoch": 2.807799442896936, |
| "grad_norm": 4.651453971862793, |
| "learning_rate": 3.221912720519963e-06, |
| "loss": 0.364, |
| "step": 5040 |
| }, |
| { |
| "epoch": 2.8133704735376046, |
| "grad_norm": 4.250921249389648, |
| "learning_rate": 3.129062209842154e-06, |
| "loss": 0.3958, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.818941504178273, |
| "grad_norm": 3.3700807094573975, |
| "learning_rate": 3.0362116991643456e-06, |
| "loss": 0.2812, |
| "step": 5060 |
| }, |
| { |
| "epoch": 2.8245125348189415, |
| "grad_norm": 5.31024694442749, |
| "learning_rate": 2.943361188486537e-06, |
| "loss": 0.4218, |
| "step": 5070 |
| }, |
| { |
| "epoch": 2.83008356545961, |
| "grad_norm": 2.610002279281616, |
| "learning_rate": 2.850510677808728e-06, |
| "loss": 0.3653, |
| "step": 5080 |
| }, |
| { |
| "epoch": 2.8356545961002784, |
| "grad_norm": 5.629008769989014, |
| "learning_rate": 2.7576601671309194e-06, |
| "loss": 0.418, |
| "step": 5090 |
| }, |
| { |
| "epoch": 2.841225626740947, |
| "grad_norm": 3.8689396381378174, |
| "learning_rate": 2.6648096564531107e-06, |
| "loss": 0.3105, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.8467966573816157, |
| "grad_norm": 3.4457309246063232, |
| "learning_rate": 2.5719591457753016e-06, |
| "loss": 0.4024, |
| "step": 5110 |
| }, |
| { |
| "epoch": 2.852367688022284, |
| "grad_norm": 4.193653583526611, |
| "learning_rate": 2.4791086350974933e-06, |
| "loss": 0.4946, |
| "step": 5120 |
| }, |
| { |
| "epoch": 2.8579387186629526, |
| "grad_norm": 8.699234962463379, |
| "learning_rate": 2.3862581244196846e-06, |
| "loss": 0.4511, |
| "step": 5130 |
| }, |
| { |
| "epoch": 2.863509749303621, |
| "grad_norm": 4.196930408477783, |
| "learning_rate": 2.2934076137418754e-06, |
| "loss": 0.4671, |
| "step": 5140 |
| }, |
| { |
| "epoch": 2.86908077994429, |
| "grad_norm": 5.245868682861328, |
| "learning_rate": 2.200557103064067e-06, |
| "loss": 0.4111, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.8746518105849583, |
| "grad_norm": 4.189416885375977, |
| "learning_rate": 2.1077065923862584e-06, |
| "loss": 0.4043, |
| "step": 5160 |
| }, |
| { |
| "epoch": 2.8802228412256268, |
| "grad_norm": 2.8182320594787598, |
| "learning_rate": 2.0148560817084493e-06, |
| "loss": 0.3343, |
| "step": 5170 |
| }, |
| { |
| "epoch": 2.885793871866295, |
| "grad_norm": 7.292479038238525, |
| "learning_rate": 1.922005571030641e-06, |
| "loss": 0.3844, |
| "step": 5180 |
| }, |
| { |
| "epoch": 2.8913649025069637, |
| "grad_norm": 2.767341375350952, |
| "learning_rate": 1.8291550603528318e-06, |
| "loss": 0.3948, |
| "step": 5190 |
| }, |
| { |
| "epoch": 2.896935933147632, |
| "grad_norm": 5.170936107635498, |
| "learning_rate": 1.7363045496750233e-06, |
| "loss": 0.4235, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.902506963788301, |
| "grad_norm": 7.14333438873291, |
| "learning_rate": 1.6434540389972146e-06, |
| "loss": 0.3677, |
| "step": 5210 |
| }, |
| { |
| "epoch": 2.9080779944289694, |
| "grad_norm": 6.775882244110107, |
| "learning_rate": 1.5506035283194059e-06, |
| "loss": 0.486, |
| "step": 5220 |
| }, |
| { |
| "epoch": 2.913649025069638, |
| "grad_norm": 4.250217914581299, |
| "learning_rate": 1.4577530176415971e-06, |
| "loss": 0.5274, |
| "step": 5230 |
| }, |
| { |
| "epoch": 2.9192200557103063, |
| "grad_norm": 4.6204633712768555, |
| "learning_rate": 1.3649025069637884e-06, |
| "loss": 0.5229, |
| "step": 5240 |
| }, |
| { |
| "epoch": 2.924791086350975, |
| "grad_norm": 3.8870279788970947, |
| "learning_rate": 1.2720519962859795e-06, |
| "loss": 0.535, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.9303621169916436, |
| "grad_norm": 9.080047607421875, |
| "learning_rate": 1.179201485608171e-06, |
| "loss": 0.507, |
| "step": 5260 |
| }, |
| { |
| "epoch": 2.935933147632312, |
| "grad_norm": 10.549107551574707, |
| "learning_rate": 1.0863509749303623e-06, |
| "loss": 0.4738, |
| "step": 5270 |
| }, |
| { |
| "epoch": 2.9415041782729805, |
| "grad_norm": 3.7926011085510254, |
| "learning_rate": 9.935004642525533e-07, |
| "loss": 0.4498, |
| "step": 5280 |
| }, |
| { |
| "epoch": 2.947075208913649, |
| "grad_norm": 2.45975399017334, |
| "learning_rate": 9.006499535747446e-07, |
| "loss": 0.3481, |
| "step": 5290 |
| }, |
| { |
| "epoch": 2.9526462395543174, |
| "grad_norm": 4.364011764526367, |
| "learning_rate": 8.07799442896936e-07, |
| "loss": 0.4118, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.958217270194986, |
| "grad_norm": 5.1891255378723145, |
| "learning_rate": 7.149489322191273e-07, |
| "loss": 0.5408, |
| "step": 5310 |
| }, |
| { |
| "epoch": 2.9637883008356547, |
| "grad_norm": 5.055240631103516, |
| "learning_rate": 6.220984215413185e-07, |
| "loss": 0.3917, |
| "step": 5320 |
| }, |
| { |
| "epoch": 2.969359331476323, |
| "grad_norm": 4.434938907623291, |
| "learning_rate": 5.292479108635097e-07, |
| "loss": 0.3526, |
| "step": 5330 |
| }, |
| { |
| "epoch": 2.9749303621169916, |
| "grad_norm": 5.2575860023498535, |
| "learning_rate": 4.36397400185701e-07, |
| "loss": 0.3464, |
| "step": 5340 |
| }, |
| { |
| "epoch": 2.98050139275766, |
| "grad_norm": 8.497797966003418, |
| "learning_rate": 3.435468895078923e-07, |
| "loss": 0.5106, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.986072423398329, |
| "grad_norm": 4.38588285446167, |
| "learning_rate": 2.506963788300836e-07, |
| "loss": 0.3509, |
| "step": 5360 |
| }, |
| { |
| "epoch": 2.9916434540389973, |
| "grad_norm": 7.261599540710449, |
| "learning_rate": 1.5784586815227484e-07, |
| "loss": 0.5137, |
| "step": 5370 |
| }, |
| { |
| "epoch": 2.997214484679666, |
| "grad_norm": 7.3228936195373535, |
| "learning_rate": 6.499535747446612e-08, |
| "loss": 0.4838, |
| "step": 5380 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.7127333519086096, |
| "eval_loss": 0.8501355051994324, |
| "eval_runtime": 37.2414, |
| "eval_samples_per_second": 96.371, |
| "eval_steps_per_second": 6.042, |
| "step": 5385 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 5385, |
| "total_flos": 6.674451681129007e+18, |
| "train_loss": 0.7317446124697573, |
| "train_runtime": 2231.3604, |
| "train_samples_per_second": 38.598, |
| "train_steps_per_second": 2.413 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 5385, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.674451681129007e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|