Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_global_step": 55000, | |
| "best_metric": 0.9045753492836575, | |
| "best_model_checkpoint": "./lang-ner-xlmr/checkpoint-55000", | |
| "epoch": 2.0, | |
| "eval_steps": 2500, | |
| "global_step": 55938, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0035753870356466087, | |
| "grad_norm": 4.756625652313232, | |
| "learning_rate": 4.991150917086775e-05, | |
| "loss": 4.384464111328125, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.007150774071293217, | |
| "grad_norm": 2.1916704177856445, | |
| "learning_rate": 4.982212449497658e-05, | |
| "loss": 0.6957355499267578, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.010726161106939826, | |
| "grad_norm": 3.089632987976074, | |
| "learning_rate": 4.973273981908542e-05, | |
| "loss": 0.3664897537231445, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.014301548142586435, | |
| "grad_norm": 3.1561801433563232, | |
| "learning_rate": 4.964335514319425e-05, | |
| "loss": 0.24268556594848634, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.017876935178233043, | |
| "grad_norm": 4.541702747344971, | |
| "learning_rate": 4.955397046730309e-05, | |
| "loss": 0.18832412719726563, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.021452322213879653, | |
| "grad_norm": 3.958519220352173, | |
| "learning_rate": 4.946458579141192e-05, | |
| "loss": 0.1659502410888672, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.025027709249526263, | |
| "grad_norm": 2.7777926921844482, | |
| "learning_rate": 4.9375201115520756e-05, | |
| "loss": 0.1568641757965088, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.02860309628517287, | |
| "grad_norm": 2.150230646133423, | |
| "learning_rate": 4.928581643962959e-05, | |
| "loss": 0.14989984512329102, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.032178483320819476, | |
| "grad_norm": 1.7589229345321655, | |
| "learning_rate": 4.9196431763738424e-05, | |
| "loss": 0.13244229316711426, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.035753870356466086, | |
| "grad_norm": 1.2541024684906006, | |
| "learning_rate": 4.910704708784726e-05, | |
| "loss": 0.1411085033416748, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.039329257392112696, | |
| "grad_norm": 1.043690800666809, | |
| "learning_rate": 4.901766241195609e-05, | |
| "loss": 0.12802630424499511, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.042904644427759306, | |
| "grad_norm": 1.5866156816482544, | |
| "learning_rate": 4.892827773606493e-05, | |
| "loss": 0.11472611427307129, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.046480031463405916, | |
| "grad_norm": 2.9468393325805664, | |
| "learning_rate": 4.883889306017377e-05, | |
| "loss": 0.11635817527770996, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.050055418499052526, | |
| "grad_norm": 2.623593330383301, | |
| "learning_rate": 4.87495083842826e-05, | |
| "loss": 0.11469986915588379, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.05363080553469913, | |
| "grad_norm": 1.0270402431488037, | |
| "learning_rate": 4.8660123708391436e-05, | |
| "loss": 0.10951638221740723, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.05720619257034574, | |
| "grad_norm": 0.6011027693748474, | |
| "learning_rate": 4.8570739032500274e-05, | |
| "loss": 0.1056843090057373, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.06078157960599235, | |
| "grad_norm": 1.5310850143432617, | |
| "learning_rate": 4.8481354356609104e-05, | |
| "loss": 0.10531362533569336, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.06435696664163895, | |
| "grad_norm": 1.9218846559524536, | |
| "learning_rate": 4.839196968071794e-05, | |
| "loss": 0.10761914253234864, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.06793235367728556, | |
| "grad_norm": 0.9941307306289673, | |
| "learning_rate": 4.830258500482677e-05, | |
| "loss": 0.10573001861572266, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.07150774071293217, | |
| "grad_norm": 1.5511739253997803, | |
| "learning_rate": 4.821320032893561e-05, | |
| "loss": 0.09843612670898437, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.07508312774857878, | |
| "grad_norm": 3.8423593044281006, | |
| "learning_rate": 4.812381565304445e-05, | |
| "loss": 0.09764796257019043, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.07865851478422539, | |
| "grad_norm": 2.3102476596832275, | |
| "learning_rate": 4.803443097715328e-05, | |
| "loss": 0.0996187973022461, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.082233901819872, | |
| "grad_norm": 0.8750975131988525, | |
| "learning_rate": 4.7945046301262116e-05, | |
| "loss": 0.09662745475769043, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.08580928885551861, | |
| "grad_norm": 1.4319772720336914, | |
| "learning_rate": 4.7855661625370954e-05, | |
| "loss": 0.09479823112487792, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.08938467589116522, | |
| "grad_norm": 1.1493583917617798, | |
| "learning_rate": 4.7766276949479785e-05, | |
| "loss": 0.09187865257263184, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.08938467589116522, | |
| "eval_accuracy": 0.97115720940899, | |
| "eval_f1": 0.7833422259443614, | |
| "eval_loss": 0.12425321340560913, | |
| "eval_precision": 0.7387798259806108, | |
| "eval_recall": 0.8336256217117557, | |
| "eval_runtime": 30.7236, | |
| "eval_samples_per_second": 732.336, | |
| "eval_steps_per_second": 20.343, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.09296006292681183, | |
| "grad_norm": 0.4979458749294281, | |
| "learning_rate": 4.767689227358862e-05, | |
| "loss": 0.0913974666595459, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.09653544996245844, | |
| "grad_norm": 0.9877503514289856, | |
| "learning_rate": 4.758750759769745e-05, | |
| "loss": 0.09267548561096191, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.10011083699810505, | |
| "grad_norm": 1.4115008115768433, | |
| "learning_rate": 4.749812292180629e-05, | |
| "loss": 0.09185708045959473, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.10368622403375165, | |
| "grad_norm": 1.2117033004760742, | |
| "learning_rate": 4.740873824591512e-05, | |
| "loss": 0.09133506774902343, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.10726161106939826, | |
| "grad_norm": 0.527315080165863, | |
| "learning_rate": 4.731935357002396e-05, | |
| "loss": 0.08854376792907714, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.11083699810504487, | |
| "grad_norm": 0.5725809931755066, | |
| "learning_rate": 4.722996889413279e-05, | |
| "loss": 0.08516644477844239, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.11441238514069148, | |
| "grad_norm": 1.4227476119995117, | |
| "learning_rate": 4.714058421824163e-05, | |
| "loss": 0.08871203422546386, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.11798777217633809, | |
| "grad_norm": 1.2104847431182861, | |
| "learning_rate": 4.705119954235046e-05, | |
| "loss": 0.0874100399017334, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.1215631592119847, | |
| "grad_norm": 1.4136381149291992, | |
| "learning_rate": 4.6961814866459295e-05, | |
| "loss": 0.09060199737548828, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.1251385462476313, | |
| "grad_norm": 1.7565488815307617, | |
| "learning_rate": 4.687243019056813e-05, | |
| "loss": 0.09233291625976563, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.1287139332832779, | |
| "grad_norm": 1.2004791498184204, | |
| "learning_rate": 4.6783045514676964e-05, | |
| "loss": 0.08300934791564941, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.1322893203189245, | |
| "grad_norm": 1.7836707830429077, | |
| "learning_rate": 4.66936608387858e-05, | |
| "loss": 0.09250588417053222, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.13586470735457112, | |
| "grad_norm": 1.83432137966156, | |
| "learning_rate": 4.660427616289463e-05, | |
| "loss": 0.08511058807373047, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.13944009439021773, | |
| "grad_norm": 1.1962814331054688, | |
| "learning_rate": 4.651489148700347e-05, | |
| "loss": 0.07956169128417968, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.14301548142586434, | |
| "grad_norm": 1.145377278327942, | |
| "learning_rate": 4.642550681111231e-05, | |
| "loss": 0.08179279327392579, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.14659086846151095, | |
| "grad_norm": 4.283623218536377, | |
| "learning_rate": 4.633612213522114e-05, | |
| "loss": 0.09118062019348144, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.15016625549715756, | |
| "grad_norm": 2.0267841815948486, | |
| "learning_rate": 4.6246737459329975e-05, | |
| "loss": 0.0859706974029541, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.15374164253280417, | |
| "grad_norm": 1.3412806987762451, | |
| "learning_rate": 4.615735278343881e-05, | |
| "loss": 0.07687939643859863, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.15731702956845078, | |
| "grad_norm": 1.2748081684112549, | |
| "learning_rate": 4.6067968107547644e-05, | |
| "loss": 0.0789797306060791, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.1608924166040974, | |
| "grad_norm": 0.8491079807281494, | |
| "learning_rate": 4.597858343165648e-05, | |
| "loss": 0.07809987068176269, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.164467803639744, | |
| "grad_norm": 1.1583634614944458, | |
| "learning_rate": 4.588919875576531e-05, | |
| "loss": 0.07350683212280273, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.1680431906753906, | |
| "grad_norm": 0.6579107642173767, | |
| "learning_rate": 4.579981407987415e-05, | |
| "loss": 0.07875243663787841, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.17161857771103722, | |
| "grad_norm": 0.9742094278335571, | |
| "learning_rate": 4.571042940398299e-05, | |
| "loss": 0.08122955322265625, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.17519396474668383, | |
| "grad_norm": 0.7365472912788391, | |
| "learning_rate": 4.562104472809182e-05, | |
| "loss": 0.07848617553710938, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.17876935178233044, | |
| "grad_norm": 4.312972545623779, | |
| "learning_rate": 4.5531660052200655e-05, | |
| "loss": 0.07981382846832276, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.17876935178233044, | |
| "eval_accuracy": 0.9774074625381929, | |
| "eval_f1": 0.8253539377731214, | |
| "eval_loss": 0.09500592201948166, | |
| "eval_precision": 0.7928386037396048, | |
| "eval_recall": 0.8606503176839261, | |
| "eval_runtime": 27.5737, | |
| "eval_samples_per_second": 815.994, | |
| "eval_steps_per_second": 22.666, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.18234473881797705, | |
| "grad_norm": 0.7737888097763062, | |
| "learning_rate": 4.544227537630949e-05, | |
| "loss": 0.07826550960540772, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.18592012585362366, | |
| "grad_norm": 1.3171430826187134, | |
| "learning_rate": 4.5352890700418324e-05, | |
| "loss": 0.07463918685913086, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.18949551288927027, | |
| "grad_norm": 1.445436716079712, | |
| "learning_rate": 4.526350602452716e-05, | |
| "loss": 0.07105834484100342, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.19307089992491688, | |
| "grad_norm": 1.4572588205337524, | |
| "learning_rate": 4.517412134863599e-05, | |
| "loss": 0.07838897705078125, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.1966462869605635, | |
| "grad_norm": 0.940371572971344, | |
| "learning_rate": 4.508473667274482e-05, | |
| "loss": 0.07499915599822998, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.2002216739962101, | |
| "grad_norm": 0.6899816393852234, | |
| "learning_rate": 4.499535199685366e-05, | |
| "loss": 0.07030135154724121, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.20379706103185669, | |
| "grad_norm": 1.0485793352127075, | |
| "learning_rate": 4.490596732096249e-05, | |
| "loss": 0.07988662719726562, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.2073724480675033, | |
| "grad_norm": 1.5643068552017212, | |
| "learning_rate": 4.481658264507133e-05, | |
| "loss": 0.06960040092468261, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.2109478351031499, | |
| "grad_norm": 0.5016098022460938, | |
| "learning_rate": 4.4727197969180166e-05, | |
| "loss": 0.07297846794128418, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.21452322213879652, | |
| "grad_norm": 1.0210011005401611, | |
| "learning_rate": 4.4637813293289e-05, | |
| "loss": 0.07059600353240966, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.21809860917444313, | |
| "grad_norm": 1.0845718383789062, | |
| "learning_rate": 4.4548428617397835e-05, | |
| "loss": 0.06686034202575683, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.22167399621008974, | |
| "grad_norm": 0.8528701663017273, | |
| "learning_rate": 4.445904394150667e-05, | |
| "loss": 0.06841277122497559, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.22524938324573635, | |
| "grad_norm": 1.8847771883010864, | |
| "learning_rate": 4.43696592656155e-05, | |
| "loss": 0.0757840919494629, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.22882477028138296, | |
| "grad_norm": 2.079796075820923, | |
| "learning_rate": 4.428027458972434e-05, | |
| "loss": 0.06774754524230957, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.23240015731702957, | |
| "grad_norm": 1.0023269653320312, | |
| "learning_rate": 4.419088991383317e-05, | |
| "loss": 0.07408513069152832, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.23597554435267618, | |
| "grad_norm": 1.2481714487075806, | |
| "learning_rate": 4.410150523794201e-05, | |
| "loss": 0.07167030811309814, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.23955093138832279, | |
| "grad_norm": 1.2812612056732178, | |
| "learning_rate": 4.4012120562050846e-05, | |
| "loss": 0.07096508502960205, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.2431263184239694, | |
| "grad_norm": 1.1778594255447388, | |
| "learning_rate": 4.392273588615968e-05, | |
| "loss": 0.06785487651824951, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.246701705459616, | |
| "grad_norm": 1.0923346281051636, | |
| "learning_rate": 4.3833351210268515e-05, | |
| "loss": 0.07081903457641602, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.2502770924952626, | |
| "grad_norm": 0.9252088069915771, | |
| "learning_rate": 4.374396653437735e-05, | |
| "loss": 0.0647373390197754, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.2538524795309092, | |
| "grad_norm": 2.192573070526123, | |
| "learning_rate": 4.365458185848618e-05, | |
| "loss": 0.0676526165008545, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.2574278665665558, | |
| "grad_norm": 1.6381704807281494, | |
| "learning_rate": 4.356519718259502e-05, | |
| "loss": 0.07402976512908936, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.26100325360220245, | |
| "grad_norm": 0.9283214807510376, | |
| "learning_rate": 4.347581250670385e-05, | |
| "loss": 0.06920474052429199, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.264578640637849, | |
| "grad_norm": 0.8774147033691406, | |
| "learning_rate": 4.338642783081269e-05, | |
| "loss": 0.07280929565429688, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.26815402767349567, | |
| "grad_norm": 1.8515883684158325, | |
| "learning_rate": 4.3297043154921526e-05, | |
| "loss": 0.07380086898803712, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.26815402767349567, | |
| "eval_accuracy": 0.9785046625741701, | |
| "eval_f1": 0.8438232328500399, | |
| "eval_loss": 0.08568981289863586, | |
| "eval_precision": 0.8172658575681245, | |
| "eval_recall": 0.8721645631486645, | |
| "eval_runtime": 27.0275, | |
| "eval_samples_per_second": 832.487, | |
| "eval_steps_per_second": 23.125, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.27172941470914225, | |
| "grad_norm": 0.7200068235397339, | |
| "learning_rate": 4.320765847903036e-05, | |
| "loss": 0.0730604887008667, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.2753048017447889, | |
| "grad_norm": 0.915267288684845, | |
| "learning_rate": 4.3118273803139195e-05, | |
| "loss": 0.07050428867340088, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.27888018878043547, | |
| "grad_norm": 0.7131528854370117, | |
| "learning_rate": 4.3028889127248025e-05, | |
| "loss": 0.0710810136795044, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.2824555758160821, | |
| "grad_norm": 1.1002038717269897, | |
| "learning_rate": 4.293950445135686e-05, | |
| "loss": 0.07342493057250976, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.2860309628517287, | |
| "grad_norm": 0.9407269954681396, | |
| "learning_rate": 4.2850119775465694e-05, | |
| "loss": 0.0673301601409912, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.2896063498873753, | |
| "grad_norm": 2.832193613052368, | |
| "learning_rate": 4.276073509957453e-05, | |
| "loss": 0.06240209102630615, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.2931817369230219, | |
| "grad_norm": 0.8768466114997864, | |
| "learning_rate": 4.267135042368336e-05, | |
| "loss": 0.06878421783447265, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.29675712395866854, | |
| "grad_norm": 2.6219418048858643, | |
| "learning_rate": 4.25819657477922e-05, | |
| "loss": 0.06775379657745362, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.3003325109943151, | |
| "grad_norm": 1.4696264266967773, | |
| "learning_rate": 4.249258107190103e-05, | |
| "loss": 0.06918183803558349, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.30390789802996176, | |
| "grad_norm": 0.3726998269557953, | |
| "learning_rate": 4.240319639600987e-05, | |
| "loss": 0.0662617588043213, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.30748328506560835, | |
| "grad_norm": 0.7445316314697266, | |
| "learning_rate": 4.2313811720118706e-05, | |
| "loss": 0.06442654609680176, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.311058672101255, | |
| "grad_norm": 1.971909761428833, | |
| "learning_rate": 4.2224427044227536e-05, | |
| "loss": 0.0726364278793335, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.31463405913690157, | |
| "grad_norm": 1.5563815832138062, | |
| "learning_rate": 4.2135042368336374e-05, | |
| "loss": 0.06712177753448487, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.3182094461725482, | |
| "grad_norm": 0.7900974154472351, | |
| "learning_rate": 4.204565769244521e-05, | |
| "loss": 0.058818936347961426, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.3217848332081948, | |
| "grad_norm": 1.3865457773208618, | |
| "learning_rate": 4.195627301655404e-05, | |
| "loss": 0.06370719909667968, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.32536022024384137, | |
| "grad_norm": 0.34235015511512756, | |
| "learning_rate": 4.186688834066288e-05, | |
| "loss": 0.06904962062835693, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.328935607279488, | |
| "grad_norm": 2.1909384727478027, | |
| "learning_rate": 4.177750366477171e-05, | |
| "loss": 0.06057620048522949, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.3325109943151346, | |
| "grad_norm": 1.308127760887146, | |
| "learning_rate": 4.168811898888055e-05, | |
| "loss": 0.06866058826446533, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.3360863813507812, | |
| "grad_norm": 0.6863975524902344, | |
| "learning_rate": 4.1598734312989386e-05, | |
| "loss": 0.06358649730682372, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.3396617683864278, | |
| "grad_norm": 1.1869947910308838, | |
| "learning_rate": 4.1509349637098216e-05, | |
| "loss": 0.06475292205810547, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.34323715542207445, | |
| "grad_norm": 1.4386121034622192, | |
| "learning_rate": 4.1419964961207054e-05, | |
| "loss": 0.06661314010620117, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.34681254245772103, | |
| "grad_norm": 0.48181113600730896, | |
| "learning_rate": 4.133058028531589e-05, | |
| "loss": 0.060897083282470704, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.35038792949336767, | |
| "grad_norm": 0.8885261416435242, | |
| "learning_rate": 4.124119560942472e-05, | |
| "loss": 0.06239647388458252, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.35396331652901425, | |
| "grad_norm": 1.2147257328033447, | |
| "learning_rate": 4.115181093353356e-05, | |
| "loss": 0.06007009029388428, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.3575387035646609, | |
| "grad_norm": 3.1831276416778564, | |
| "learning_rate": 4.106242625764239e-05, | |
| "loss": 0.06108261108398438, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.3575387035646609, | |
| "eval_accuracy": 0.9812439807847978, | |
| "eval_f1": 0.8498944390638173, | |
| "eval_loss": 0.07966496795415878, | |
| "eval_precision": 0.8246859491839411, | |
| "eval_recall": 0.8766926372078314, | |
| "eval_runtime": 27.6584, | |
| "eval_samples_per_second": 813.495, | |
| "eval_steps_per_second": 22.597, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.36111409060030747, | |
| "grad_norm": 1.2144405841827393, | |
| "learning_rate": 4.097304158175123e-05, | |
| "loss": 0.06152146816253662, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.3646894776359541, | |
| "grad_norm": 0.777988076210022, | |
| "learning_rate": 4.0883656905860066e-05, | |
| "loss": 0.06342405319213867, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.3682648646716007, | |
| "grad_norm": 0.6419842839241028, | |
| "learning_rate": 4.0794272229968896e-05, | |
| "loss": 0.055976643562316894, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.3718402517072473, | |
| "grad_norm": 0.45166343450546265, | |
| "learning_rate": 4.0704887554077734e-05, | |
| "loss": 0.07191664695739747, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.3754156387428939, | |
| "grad_norm": 0.5005468726158142, | |
| "learning_rate": 4.0615502878186565e-05, | |
| "loss": 0.06205938339233399, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.37899102577854055, | |
| "grad_norm": 0.6201260089874268, | |
| "learning_rate": 4.0526118202295396e-05, | |
| "loss": 0.061759543418884275, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.38256641281418713, | |
| "grad_norm": 0.4341242015361786, | |
| "learning_rate": 4.043673352640423e-05, | |
| "loss": 0.06618201732635498, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.38614179984983377, | |
| "grad_norm": 0.4003482460975647, | |
| "learning_rate": 4.034734885051307e-05, | |
| "loss": 0.06178065299987793, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.38971718688548035, | |
| "grad_norm": 1.0296162366867065, | |
| "learning_rate": 4.02579641746219e-05, | |
| "loss": 0.06249929904937744, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.393292573921127, | |
| "grad_norm": 1.362121820449829, | |
| "learning_rate": 4.016857949873074e-05, | |
| "loss": 0.05500233173370361, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.39686796095677357, | |
| "grad_norm": 0.7699733376502991, | |
| "learning_rate": 4.007919482283957e-05, | |
| "loss": 0.060595006942749025, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.4004433479924202, | |
| "grad_norm": 1.3927844762802124, | |
| "learning_rate": 3.998981014694841e-05, | |
| "loss": 0.05860278129577637, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.4040187350280668, | |
| "grad_norm": 0.5842928290367126, | |
| "learning_rate": 3.9900425471057245e-05, | |
| "loss": 0.062330193519592285, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.40759412206371337, | |
| "grad_norm": 1.231602430343628, | |
| "learning_rate": 3.9811040795166076e-05, | |
| "loss": 0.05743512153625488, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.41116950909936, | |
| "grad_norm": 0.33235710859298706, | |
| "learning_rate": 3.972165611927491e-05, | |
| "loss": 0.059948296546936036, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.4147448961350066, | |
| "grad_norm": 0.812560498714447, | |
| "learning_rate": 3.963227144338375e-05, | |
| "loss": 0.06013148784637451, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.41832028317065323, | |
| "grad_norm": 0.7160065174102783, | |
| "learning_rate": 3.954288676749258e-05, | |
| "loss": 0.0654984951019287, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.4218956702062998, | |
| "grad_norm": 0.959859311580658, | |
| "learning_rate": 3.945350209160142e-05, | |
| "loss": 0.061361746788024904, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.42547105724194645, | |
| "grad_norm": 0.661882758140564, | |
| "learning_rate": 3.936411741571025e-05, | |
| "loss": 0.05800935268402099, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.42904644427759303, | |
| "grad_norm": 1.3494808673858643, | |
| "learning_rate": 3.927473273981909e-05, | |
| "loss": 0.058743157386779786, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.43262183131323967, | |
| "grad_norm": 0.3964793384075165, | |
| "learning_rate": 3.9185348063927925e-05, | |
| "loss": 0.05860978603363037, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.43619721834888625, | |
| "grad_norm": 0.6984548568725586, | |
| "learning_rate": 3.9095963388036756e-05, | |
| "loss": 0.05355045795440674, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.4397726053845329, | |
| "grad_norm": 0.9193189144134521, | |
| "learning_rate": 3.900657871214559e-05, | |
| "loss": 0.05985400676727295, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.44334799242017947, | |
| "grad_norm": 2.1851706504821777, | |
| "learning_rate": 3.891719403625443e-05, | |
| "loss": 0.06027592182159424, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.4469233794558261, | |
| "grad_norm": 2.280050754547119, | |
| "learning_rate": 3.882780936036326e-05, | |
| "loss": 0.05881267070770264, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.4469233794558261, | |
| "eval_accuracy": 0.9822486078551317, | |
| "eval_f1": 0.8582031250000001, | |
| "eval_loss": 0.07316970080137253, | |
| "eval_precision": 0.8336179093151205, | |
| "eval_recall": 0.884282551821292, | |
| "eval_runtime": 27.7811, | |
| "eval_samples_per_second": 809.904, | |
| "eval_steps_per_second": 22.497, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.4504987664914727, | |
| "grad_norm": 0.9513980746269226, | |
| "learning_rate": 3.87384246844721e-05, | |
| "loss": 0.05991718769073486, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.45407415352711933, | |
| "grad_norm": 0.8513447046279907, | |
| "learning_rate": 3.8649040008580937e-05, | |
| "loss": 0.059436683654785154, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.4576495405627659, | |
| "grad_norm": 3.6959080696105957, | |
| "learning_rate": 3.855965533268977e-05, | |
| "loss": 0.06146327018737793, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.46122492759841255, | |
| "grad_norm": 0.4215289056301117, | |
| "learning_rate": 3.8470270656798605e-05, | |
| "loss": 0.051028499603271486, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.46480031463405913, | |
| "grad_norm": 0.553249716758728, | |
| "learning_rate": 3.8380885980907436e-05, | |
| "loss": 0.05888910293579101, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.46837570166970577, | |
| "grad_norm": 0.534638524055481, | |
| "learning_rate": 3.8291501305016266e-05, | |
| "loss": 0.056477956771850586, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.47195108870535235, | |
| "grad_norm": 0.5859609842300415, | |
| "learning_rate": 3.8202116629125104e-05, | |
| "loss": 0.05791654109954834, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.475526475740999, | |
| "grad_norm": 0.6610586047172546, | |
| "learning_rate": 3.8112731953233935e-05, | |
| "loss": 0.05362565040588379, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.47910186277664557, | |
| "grad_norm": 0.6048291325569153, | |
| "learning_rate": 3.802334727734277e-05, | |
| "loss": 0.057788271903991696, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.4826772498122922, | |
| "grad_norm": 0.7319697141647339, | |
| "learning_rate": 3.793396260145161e-05, | |
| "loss": 0.05477115631103516, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.4862526368479388, | |
| "grad_norm": 0.5771811008453369, | |
| "learning_rate": 3.784457792556044e-05, | |
| "loss": 0.059410476684570314, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.4898280238835854, | |
| "grad_norm": 1.9499260187149048, | |
| "learning_rate": 3.775519324966928e-05, | |
| "loss": 0.052494893074035646, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.493403410919232, | |
| "grad_norm": 0.8795179128646851, | |
| "learning_rate": 3.766580857377811e-05, | |
| "loss": 0.05528387546539307, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.4969787979548786, | |
| "grad_norm": 0.5892202258110046, | |
| "learning_rate": 3.7576423897886947e-05, | |
| "loss": 0.05600544452667236, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.5005541849905252, | |
| "grad_norm": 0.6402941346168518, | |
| "learning_rate": 3.7487039221995784e-05, | |
| "loss": 0.05357628345489502, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.5041295720261718, | |
| "grad_norm": 0.5255988836288452, | |
| "learning_rate": 3.7397654546104615e-05, | |
| "loss": 0.057103352546691896, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.5077049590618185, | |
| "grad_norm": 0.8301808834075928, | |
| "learning_rate": 3.730826987021345e-05, | |
| "loss": 0.0532010555267334, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.5112803460974651, | |
| "grad_norm": 0.6901052594184875, | |
| "learning_rate": 3.721888519432229e-05, | |
| "loss": 0.05516294002532959, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.5148557331331116, | |
| "grad_norm": 0.9628658890724182, | |
| "learning_rate": 3.712950051843112e-05, | |
| "loss": 0.06214995384216308, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.5184311201687583, | |
| "grad_norm": 1.3679792881011963, | |
| "learning_rate": 3.704011584253996e-05, | |
| "loss": 0.05541347503662109, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.5220065072044049, | |
| "grad_norm": 0.23267334699630737, | |
| "learning_rate": 3.695073116664879e-05, | |
| "loss": 0.0589248275756836, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.5255818942400515, | |
| "grad_norm": 0.6239579319953918, | |
| "learning_rate": 3.6861346490757627e-05, | |
| "loss": 0.053284521102905276, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.529157281275698, | |
| "grad_norm": 0.7674051523208618, | |
| "learning_rate": 3.6771961814866464e-05, | |
| "loss": 0.05738714218139648, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.5327326683113447, | |
| "grad_norm": 0.8594136834144592, | |
| "learning_rate": 3.6682577138975295e-05, | |
| "loss": 0.055074062347412106, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.5363080553469913, | |
| "grad_norm": 1.3505005836486816, | |
| "learning_rate": 3.659319246308413e-05, | |
| "loss": 0.05417671680450439, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.5363080553469913, | |
| "eval_accuracy": 0.9838496993745539, | |
| "eval_f1": 0.8737497800457504, | |
| "eval_loss": 0.06651480495929718, | |
| "eval_precision": 0.8560178736432719, | |
| "eval_recall": 0.8922318373918293, | |
| "eval_runtime": 27.3392, | |
| "eval_samples_per_second": 822.994, | |
| "eval_steps_per_second": 22.861, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.539883442382638, | |
| "grad_norm": 0.7868797779083252, | |
| "learning_rate": 3.650380778719297e-05, | |
| "loss": 0.060230064392089847, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.5434588294182845, | |
| "grad_norm": 0.3154486119747162, | |
| "learning_rate": 3.64144231113018e-05, | |
| "loss": 0.05918198108673096, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.5470342164539311, | |
| "grad_norm": 0.5093942284584045, | |
| "learning_rate": 3.632503843541064e-05, | |
| "loss": 0.05554147720336914, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.5506096034895778, | |
| "grad_norm": 1.080651044845581, | |
| "learning_rate": 3.623565375951947e-05, | |
| "loss": 0.05167547702789307, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.5541849905252244, | |
| "grad_norm": 1.2834564447402954, | |
| "learning_rate": 3.614626908362831e-05, | |
| "loss": 0.05269266128540039, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.5577603775608709, | |
| "grad_norm": 0.9456666707992554, | |
| "learning_rate": 3.605688440773714e-05, | |
| "loss": 0.05228121280670166, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.5613357645965176, | |
| "grad_norm": 1.931270718574524, | |
| "learning_rate": 3.5967499731845975e-05, | |
| "loss": 0.05532039642333984, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.5649111516321642, | |
| "grad_norm": 1.9416167736053467, | |
| "learning_rate": 3.5878115055954806e-05, | |
| "loss": 0.05132888793945312, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.5684865386678107, | |
| "grad_norm": 0.2992418110370636, | |
| "learning_rate": 3.578873038006364e-05, | |
| "loss": 0.05806799411773682, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.5720619257034574, | |
| "grad_norm": 0.7173650860786438, | |
| "learning_rate": 3.5699345704172474e-05, | |
| "loss": 0.05833985805511475, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.575637312739104, | |
| "grad_norm": 1.0283321142196655, | |
| "learning_rate": 3.560996102828131e-05, | |
| "loss": 0.05651096820831299, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.5792126997747507, | |
| "grad_norm": 0.43172529339790344, | |
| "learning_rate": 3.552057635239015e-05, | |
| "loss": 0.05330658435821533, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.5827880868103972, | |
| "grad_norm": 0.6333898901939392, | |
| "learning_rate": 3.543119167649898e-05, | |
| "loss": 0.053462224006652834, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.5863634738460438, | |
| "grad_norm": 0.8817270994186401, | |
| "learning_rate": 3.534180700060782e-05, | |
| "loss": 0.05549070358276367, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.5899388608816905, | |
| "grad_norm": 4.280094146728516, | |
| "learning_rate": 3.525242232471665e-05, | |
| "loss": 0.05985762119293213, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.5935142479173371, | |
| "grad_norm": 0.62297523021698, | |
| "learning_rate": 3.5163037648825486e-05, | |
| "loss": 0.05666534423828125, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.5970896349529836, | |
| "grad_norm": 0.29738688468933105, | |
| "learning_rate": 3.507365297293432e-05, | |
| "loss": 0.053336749076843264, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.6006650219886303, | |
| "grad_norm": 1.139436960220337, | |
| "learning_rate": 3.4984268297043154e-05, | |
| "loss": 0.05532379150390625, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.6042404090242769, | |
| "grad_norm": 0.37320244312286377, | |
| "learning_rate": 3.489488362115199e-05, | |
| "loss": 0.05435383796691894, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.6078157960599235, | |
| "grad_norm": 0.5908817648887634, | |
| "learning_rate": 3.480549894526083e-05, | |
| "loss": 0.052842388153076174, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.6113911830955701, | |
| "grad_norm": 0.4973529279232025, | |
| "learning_rate": 3.471611426936966e-05, | |
| "loss": 0.05500569343566895, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.6149665701312167, | |
| "grad_norm": 1.438362717628479, | |
| "learning_rate": 3.46267295934785e-05, | |
| "loss": 0.04875383853912354, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.6185419571668633, | |
| "grad_norm": 1.1460702419281006, | |
| "learning_rate": 3.4537344917587335e-05, | |
| "loss": 0.05489758968353271, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.62211734420251, | |
| "grad_norm": 0.359030157327652, | |
| "learning_rate": 3.4447960241696166e-05, | |
| "loss": 0.0537039852142334, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.6256927312381565, | |
| "grad_norm": 1.0160428285598755, | |
| "learning_rate": 3.4358575565805e-05, | |
| "loss": 0.05569758415222168, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.6256927312381565, | |
| "eval_accuracy": 0.9845403147375635, | |
| "eval_f1": 0.877477096546864, | |
| "eval_loss": 0.0613168403506279, | |
| "eval_precision": 0.8606879199270053, | |
| "eval_recall": 0.8949343069890464, | |
| "eval_runtime": 27.832, | |
| "eval_samples_per_second": 808.423, | |
| "eval_steps_per_second": 22.456, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.6292681182738031, | |
| "grad_norm": 0.9637561440467834, | |
| "learning_rate": 3.4269190889913834e-05, | |
| "loss": 0.05049953460693359, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.6328435053094498, | |
| "grad_norm": 0.4047839343547821, | |
| "learning_rate": 3.417980621402267e-05, | |
| "loss": 0.051105165481567384, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.6364188923450964, | |
| "grad_norm": 0.5562448501586914, | |
| "learning_rate": 3.409042153813151e-05, | |
| "loss": 0.04887496471405029, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.6399942793807429, | |
| "grad_norm": 0.7675971984863281, | |
| "learning_rate": 3.400103686224034e-05, | |
| "loss": 0.05429211139678955, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.6435696664163896, | |
| "grad_norm": 0.44871142506599426, | |
| "learning_rate": 3.391165218634918e-05, | |
| "loss": 0.04755040645599365, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.6471450534520362, | |
| "grad_norm": 0.4453502297401428, | |
| "learning_rate": 3.382226751045801e-05, | |
| "loss": 0.05987214088439941, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.6507204404876827, | |
| "grad_norm": 0.4004403352737427, | |
| "learning_rate": 3.373288283456684e-05, | |
| "loss": 0.054094972610473635, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.6542958275233294, | |
| "grad_norm": 0.8362923264503479, | |
| "learning_rate": 3.364349815867568e-05, | |
| "loss": 0.04843898296356201, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.657871214558976, | |
| "grad_norm": 0.6269751787185669, | |
| "learning_rate": 3.355411348278451e-05, | |
| "loss": 0.05007925033569336, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.6614466015946227, | |
| "grad_norm": 0.7181591987609863, | |
| "learning_rate": 3.3464728806893345e-05, | |
| "loss": 0.05742511749267578, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.6650219886302692, | |
| "grad_norm": 2.8255951404571533, | |
| "learning_rate": 3.337534413100218e-05, | |
| "loss": 0.050363807678222655, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.6685973756659158, | |
| "grad_norm": 1.1854428052902222, | |
| "learning_rate": 3.328595945511101e-05, | |
| "loss": 0.05580689430236816, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.6721727627015625, | |
| "grad_norm": 0.3564029335975647, | |
| "learning_rate": 3.319657477921985e-05, | |
| "loss": 0.04986191749572754, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.6757481497372091, | |
| "grad_norm": 0.9392517805099487, | |
| "learning_rate": 3.310719010332869e-05, | |
| "loss": 0.05029686450958252, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.6793235367728556, | |
| "grad_norm": 0.9811071157455444, | |
| "learning_rate": 3.301780542743752e-05, | |
| "loss": 0.05468404293060303, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.6828989238085023, | |
| "grad_norm": 2.1979386806488037, | |
| "learning_rate": 3.292842075154636e-05, | |
| "loss": 0.04795463562011719, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.6864743108441489, | |
| "grad_norm": 4.135185241699219, | |
| "learning_rate": 3.2839036075655194e-05, | |
| "loss": 0.051746668815612795, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.6900496978797955, | |
| "grad_norm": 0.611629843711853, | |
| "learning_rate": 3.2749651399764025e-05, | |
| "loss": 0.05136622428894043, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.6936250849154421, | |
| "grad_norm": 0.7905089259147644, | |
| "learning_rate": 3.266026672387286e-05, | |
| "loss": 0.0534757661819458, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.6972004719510887, | |
| "grad_norm": 0.3704472482204437, | |
| "learning_rate": 3.257088204798169e-05, | |
| "loss": 0.05190816879272461, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.7007758589867353, | |
| "grad_norm": 0.41257503628730774, | |
| "learning_rate": 3.248149737209053e-05, | |
| "loss": 0.05314404487609863, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.704351246022382, | |
| "grad_norm": 1.0130038261413574, | |
| "learning_rate": 3.239211269619937e-05, | |
| "loss": 0.051221070289611814, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.7079266330580285, | |
| "grad_norm": 0.44306495785713196, | |
| "learning_rate": 3.23027280203082e-05, | |
| "loss": 0.05151443004608154, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.7115020200936751, | |
| "grad_norm": 1.3375622034072876, | |
| "learning_rate": 3.221334334441704e-05, | |
| "loss": 0.051753206253051756, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.7150774071293218, | |
| "grad_norm": 0.48512154817581177, | |
| "learning_rate": 3.2123958668525874e-05, | |
| "loss": 0.04863485813140869, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.7150774071293218, | |
| "eval_accuracy": 0.9851494027728759, | |
| "eval_f1": 0.8755350929603205, | |
| "eval_loss": 0.05904531106352806, | |
| "eval_precision": 0.856669280182671, | |
| "eval_recall": 0.8952505534312739, | |
| "eval_runtime": 27.4851, | |
| "eval_samples_per_second": 818.624, | |
| "eval_steps_per_second": 22.74, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.7186527941649684, | |
| "grad_norm": 0.45322614908218384, | |
| "learning_rate": 3.2034573992634705e-05, | |
| "loss": 0.05499778270721436, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.7222281812006149, | |
| "grad_norm": 0.4665698707103729, | |
| "learning_rate": 3.194518931674354e-05, | |
| "loss": 0.05100120544433594, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.7258035682362616, | |
| "grad_norm": 0.7074053883552551, | |
| "learning_rate": 3.185580464085237e-05, | |
| "loss": 0.04919565200805664, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.7293789552719082, | |
| "grad_norm": 1.2581121921539307, | |
| "learning_rate": 3.176641996496121e-05, | |
| "loss": 0.05387771606445312, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.7329543423075547, | |
| "grad_norm": 0.3161942660808563, | |
| "learning_rate": 3.167703528907004e-05, | |
| "loss": 0.04680909633636474, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.7365297293432014, | |
| "grad_norm": 0.8641468286514282, | |
| "learning_rate": 3.158765061317888e-05, | |
| "loss": 0.04961400508880615, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.740105116378848, | |
| "grad_norm": 0.6563690304756165, | |
| "learning_rate": 3.149826593728771e-05, | |
| "loss": 0.05145148754119873, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.7436805034144947, | |
| "grad_norm": 0.3394390940666199, | |
| "learning_rate": 3.140888126139655e-05, | |
| "loss": 0.048502054214477536, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.7472558904501412, | |
| "grad_norm": 0.5382287502288818, | |
| "learning_rate": 3.131949658550538e-05, | |
| "loss": 0.052634720802307126, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.7508312774857878, | |
| "grad_norm": 0.5506078004837036, | |
| "learning_rate": 3.1230111909614216e-05, | |
| "loss": 0.05615939140319824, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.7544066645214345, | |
| "grad_norm": 0.4533487558364868, | |
| "learning_rate": 3.114072723372305e-05, | |
| "loss": 0.0571517276763916, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.7579820515570811, | |
| "grad_norm": 1.2659982442855835, | |
| "learning_rate": 3.1051342557831884e-05, | |
| "loss": 0.05127411842346191, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.7615574385927276, | |
| "grad_norm": 0.38378211855888367, | |
| "learning_rate": 3.096195788194072e-05, | |
| "loss": 0.04847681522369385, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.7651328256283743, | |
| "grad_norm": 0.2992658317089081, | |
| "learning_rate": 3.087257320604955e-05, | |
| "loss": 0.05205928325653076, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.7687082126640209, | |
| "grad_norm": 0.5818284749984741, | |
| "learning_rate": 3.078318853015839e-05, | |
| "loss": 0.04922466278076172, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.7722835996996675, | |
| "grad_norm": 0.41028082370758057, | |
| "learning_rate": 3.069380385426723e-05, | |
| "loss": 0.04695847034454346, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.7758589867353141, | |
| "grad_norm": 0.31596678495407104, | |
| "learning_rate": 3.060441917837606e-05, | |
| "loss": 0.049401440620422364, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.7794343737709607, | |
| "grad_norm": 0.39899763464927673, | |
| "learning_rate": 3.0515034502484896e-05, | |
| "loss": 0.0458904504776001, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.7830097608066073, | |
| "grad_norm": 4.016449928283691, | |
| "learning_rate": 3.0425649826593733e-05, | |
| "loss": 0.04808720588684082, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.786585147842254, | |
| "grad_norm": 1.8184044361114502, | |
| "learning_rate": 3.0336265150702564e-05, | |
| "loss": 0.050203371047973636, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.7901605348779005, | |
| "grad_norm": 0.47340500354766846, | |
| "learning_rate": 3.0246880474811402e-05, | |
| "loss": 0.04804760932922363, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.7937359219135471, | |
| "grad_norm": 1.306254506111145, | |
| "learning_rate": 3.0157495798920233e-05, | |
| "loss": 0.04765232563018799, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.7973113089491938, | |
| "grad_norm": 0.6133173704147339, | |
| "learning_rate": 3.006811112302907e-05, | |
| "loss": 0.04909511566162109, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.8008866959848404, | |
| "grad_norm": 1.063022494316101, | |
| "learning_rate": 2.9978726447137904e-05, | |
| "loss": 0.048132557868957516, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.8044620830204869, | |
| "grad_norm": 0.4442903697490692, | |
| "learning_rate": 2.988934177124674e-05, | |
| "loss": 0.04739914894104004, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.8044620830204869, | |
| "eval_accuracy": 0.9853803092042249, | |
| "eval_f1": 0.8812850838481906, | |
| "eval_loss": 0.060121480375528336, | |
| "eval_precision": 0.8660403280645027, | |
| "eval_recall": 0.8970761578932237, | |
| "eval_runtime": 27.7438, | |
| "eval_samples_per_second": 810.991, | |
| "eval_steps_per_second": 22.528, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.8080374700561336, | |
| "grad_norm": 0.8813098073005676, | |
| "learning_rate": 2.9799957095355573e-05, | |
| "loss": 0.05161878108978271, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.8116128570917802, | |
| "grad_norm": 0.7460477948188782, | |
| "learning_rate": 2.971057241946441e-05, | |
| "loss": 0.0515793514251709, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.8151882441274267, | |
| "grad_norm": 0.5062021613121033, | |
| "learning_rate": 2.962118774357324e-05, | |
| "loss": 0.04754622936248779, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.8187636311630734, | |
| "grad_norm": 0.7567230463027954, | |
| "learning_rate": 2.953180306768208e-05, | |
| "loss": 0.05149875164031983, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.82233901819872, | |
| "grad_norm": 0.7439789772033691, | |
| "learning_rate": 2.944241839179091e-05, | |
| "loss": 0.04974982738494873, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.8259144052343667, | |
| "grad_norm": 0.669979453086853, | |
| "learning_rate": 2.9353033715899747e-05, | |
| "loss": 0.04604334354400635, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.8294897922700132, | |
| "grad_norm": 1.005071759223938, | |
| "learning_rate": 2.9263649040008584e-05, | |
| "loss": 0.04706980228424072, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.8330651793056598, | |
| "grad_norm": 0.31772536039352417, | |
| "learning_rate": 2.9174264364117415e-05, | |
| "loss": 0.05056349754333496, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.8366405663413065, | |
| "grad_norm": 0.32514145970344543, | |
| "learning_rate": 2.9084879688226253e-05, | |
| "loss": 0.04744285106658935, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.8402159533769531, | |
| "grad_norm": 1.0965938568115234, | |
| "learning_rate": 2.899549501233509e-05, | |
| "loss": 0.04618396759033203, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.8437913404125996, | |
| "grad_norm": 0.6312568783760071, | |
| "learning_rate": 2.890611033644392e-05, | |
| "loss": 0.04719692230224609, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.8473667274482463, | |
| "grad_norm": 0.5469244122505188, | |
| "learning_rate": 2.881672566055276e-05, | |
| "loss": 0.04657519817352295, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.8509421144838929, | |
| "grad_norm": 0.9338961839675903, | |
| "learning_rate": 2.8727340984661593e-05, | |
| "loss": 0.04994749069213867, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.8545175015195395, | |
| "grad_norm": 0.6873934268951416, | |
| "learning_rate": 2.8637956308770423e-05, | |
| "loss": 0.04766389846801758, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.8580928885551861, | |
| "grad_norm": 1.3465129137039185, | |
| "learning_rate": 2.854857163287926e-05, | |
| "loss": 0.04612489223480225, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.8616682755908327, | |
| "grad_norm": 0.3835633397102356, | |
| "learning_rate": 2.8459186956988092e-05, | |
| "loss": 0.048950729370117185, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.8652436626264793, | |
| "grad_norm": 0.7884401082992554, | |
| "learning_rate": 2.836980228109693e-05, | |
| "loss": 0.046166911125183105, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.868819049662126, | |
| "grad_norm": 0.49389323592185974, | |
| "learning_rate": 2.8280417605205767e-05, | |
| "loss": 0.046818752288818356, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.8723944366977725, | |
| "grad_norm": 0.6339199542999268, | |
| "learning_rate": 2.8191032929314598e-05, | |
| "loss": 0.04933880805969238, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.8759698237334191, | |
| "grad_norm": 0.5761122703552246, | |
| "learning_rate": 2.8101648253423435e-05, | |
| "loss": 0.044534187316894534, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.8795452107690658, | |
| "grad_norm": 0.45685720443725586, | |
| "learning_rate": 2.8012263577532273e-05, | |
| "loss": 0.051560683250427244, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.8831205978047124, | |
| "grad_norm": 0.4419282078742981, | |
| "learning_rate": 2.7922878901641104e-05, | |
| "loss": 0.043671913146972656, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.8866959848403589, | |
| "grad_norm": 0.734449028968811, | |
| "learning_rate": 2.783349422574994e-05, | |
| "loss": 0.05153060913085938, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.8902713718760056, | |
| "grad_norm": 1.0401020050048828, | |
| "learning_rate": 2.7744109549858772e-05, | |
| "loss": 0.04694102287292481, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.8938467589116522, | |
| "grad_norm": 0.646715521812439, | |
| "learning_rate": 2.765472487396761e-05, | |
| "loss": 0.054542098045349124, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.8938467589116522, | |
| "eval_accuracy": 0.9856964248425865, | |
| "eval_f1": 0.8835749303424683, | |
| "eval_loss": 0.05743265897035599, | |
| "eval_precision": 0.8674635382761534, | |
| "eval_recall": 0.9002961216686313, | |
| "eval_runtime": 27.4328, | |
| "eval_samples_per_second": 820.186, | |
| "eval_steps_per_second": 22.783, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.8974221459472987, | |
| "grad_norm": 0.3341001570224762, | |
| "learning_rate": 2.7565340198076444e-05, | |
| "loss": 0.04484391689300537, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.9009975329829454, | |
| "grad_norm": 0.700167715549469, | |
| "learning_rate": 2.7475955522185278e-05, | |
| "loss": 0.04423677921295166, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.904572920018592, | |
| "grad_norm": 1.2379734516143799, | |
| "learning_rate": 2.7386570846294112e-05, | |
| "loss": 0.04488907337188721, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.9081483070542387, | |
| "grad_norm": 0.4145027697086334, | |
| "learning_rate": 2.729718617040295e-05, | |
| "loss": 0.04520434856414795, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.9117236940898852, | |
| "grad_norm": 0.3579607605934143, | |
| "learning_rate": 2.720780149451178e-05, | |
| "loss": 0.04551751613616943, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.9152990811255318, | |
| "grad_norm": 0.5503469705581665, | |
| "learning_rate": 2.7118416818620618e-05, | |
| "loss": 0.04752420425415039, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.9188744681611785, | |
| "grad_norm": 0.41558948159217834, | |
| "learning_rate": 2.702903214272945e-05, | |
| "loss": 0.05269415855407715, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.9224498551968251, | |
| "grad_norm": 1.5605533123016357, | |
| "learning_rate": 2.6939647466838286e-05, | |
| "loss": 0.0499528169631958, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.9260252422324716, | |
| "grad_norm": 0.6252946853637695, | |
| "learning_rate": 2.6850262790947124e-05, | |
| "loss": 0.04681193351745606, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.9296006292681183, | |
| "grad_norm": 0.4643714427947998, | |
| "learning_rate": 2.6760878115055954e-05, | |
| "loss": 0.04491585254669189, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.9331760163037649, | |
| "grad_norm": 1.0552211999893188, | |
| "learning_rate": 2.6671493439164792e-05, | |
| "loss": 0.050134167671203614, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.9367514033394115, | |
| "grad_norm": 0.2919712960720062, | |
| "learning_rate": 2.6582108763273626e-05, | |
| "loss": 0.045297045707702634, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.9403267903750581, | |
| "grad_norm": 0.5062688589096069, | |
| "learning_rate": 2.649272408738246e-05, | |
| "loss": 0.04247344017028808, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.9439021774107047, | |
| "grad_norm": 0.4406910538673401, | |
| "learning_rate": 2.6403339411491294e-05, | |
| "loss": 0.0437799072265625, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.9474775644463513, | |
| "grad_norm": 0.41486886143684387, | |
| "learning_rate": 2.6313954735600132e-05, | |
| "loss": 0.04669870376586914, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.951052951481998, | |
| "grad_norm": 0.6877465844154358, | |
| "learning_rate": 2.6224570059708963e-05, | |
| "loss": 0.04583415985107422, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.9546283385176445, | |
| "grad_norm": 0.6501809358596802, | |
| "learning_rate": 2.61351853838178e-05, | |
| "loss": 0.04593777179718018, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.9582037255532911, | |
| "grad_norm": 0.7312682271003723, | |
| "learning_rate": 2.604580070792663e-05, | |
| "loss": 0.050377216339111325, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.9617791125889378, | |
| "grad_norm": 0.8844775557518005, | |
| "learning_rate": 2.595641603203547e-05, | |
| "loss": 0.04860093593597412, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.9653544996245844, | |
| "grad_norm": 0.4647756814956665, | |
| "learning_rate": 2.5867031356144306e-05, | |
| "loss": 0.0445063066482544, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.9689298866602309, | |
| "grad_norm": 0.20223687589168549, | |
| "learning_rate": 2.5777646680253137e-05, | |
| "loss": 0.04691956520080567, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.9725052736958776, | |
| "grad_norm": 0.9210941195487976, | |
| "learning_rate": 2.5688262004361974e-05, | |
| "loss": 0.049297604560852054, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.9760806607315242, | |
| "grad_norm": 0.35992079973220825, | |
| "learning_rate": 2.5598877328470812e-05, | |
| "loss": 0.04701284408569336, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.9796560477671707, | |
| "grad_norm": 0.6507813334465027, | |
| "learning_rate": 2.5509492652579643e-05, | |
| "loss": 0.04716668605804444, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.9832314348028174, | |
| "grad_norm": 0.5909741520881653, | |
| "learning_rate": 2.542010797668848e-05, | |
| "loss": 0.048493666648864744, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.9832314348028174, | |
| "eval_accuracy": 0.9858305504462175, | |
| "eval_f1": 0.8868203247033212, | |
| "eval_loss": 0.05660928413271904, | |
| "eval_precision": 0.8723250413671315, | |
| "eval_recall": 0.9018054796883535, | |
| "eval_runtime": 27.7774, | |
| "eval_samples_per_second": 810.012, | |
| "eval_steps_per_second": 22.5, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.986806821838464, | |
| "grad_norm": 0.47291576862335205, | |
| "learning_rate": 2.533072330079731e-05, | |
| "loss": 0.04355491161346436, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.9903822088741107, | |
| "grad_norm": 0.4872467815876007, | |
| "learning_rate": 2.5241338624906145e-05, | |
| "loss": 0.0435347318649292, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.9939575959097572, | |
| "grad_norm": 1.711300015449524, | |
| "learning_rate": 2.5151953949014983e-05, | |
| "loss": 0.04561484336853027, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.9975329829454038, | |
| "grad_norm": 0.2917760908603668, | |
| "learning_rate": 2.5062569273123814e-05, | |
| "loss": 0.047463297843933105, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 1.0011083699810504, | |
| "grad_norm": 0.2678261697292328, | |
| "learning_rate": 2.497318459723265e-05, | |
| "loss": 0.04366901874542237, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.004683757016697, | |
| "grad_norm": 0.3751468062400818, | |
| "learning_rate": 2.4883799921341485e-05, | |
| "loss": 0.03846597194671631, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 1.0082591440523436, | |
| "grad_norm": 0.41662493348121643, | |
| "learning_rate": 2.4794415245450323e-05, | |
| "loss": 0.03653419733047485, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 1.0118345310879904, | |
| "grad_norm": 0.6062248945236206, | |
| "learning_rate": 2.4705030569559157e-05, | |
| "loss": 0.037252871990203856, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 1.015409918123637, | |
| "grad_norm": 0.7458221316337585, | |
| "learning_rate": 2.461564589366799e-05, | |
| "loss": 0.03445641756057739, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 1.0189853051592834, | |
| "grad_norm": 0.13679973781108856, | |
| "learning_rate": 2.4526261217776825e-05, | |
| "loss": 0.03599729061126709, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.0225606921949302, | |
| "grad_norm": 1.258949637413025, | |
| "learning_rate": 2.4436876541885663e-05, | |
| "loss": 0.037976634502410886, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 1.0261360792305767, | |
| "grad_norm": 0.27776288986206055, | |
| "learning_rate": 2.4347491865994497e-05, | |
| "loss": 0.03968371391296387, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 1.0297114662662232, | |
| "grad_norm": 0.34287697076797485, | |
| "learning_rate": 2.425810719010333e-05, | |
| "loss": 0.03572561502456665, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 1.03328685330187, | |
| "grad_norm": 0.5158637166023254, | |
| "learning_rate": 2.4168722514212165e-05, | |
| "loss": 0.036703295707702636, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 1.0368622403375165, | |
| "grad_norm": 0.8635151982307434, | |
| "learning_rate": 2.4079337838321e-05, | |
| "loss": 0.035954997539520264, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.040437627373163, | |
| "grad_norm": 0.6386840343475342, | |
| "learning_rate": 2.3989953162429834e-05, | |
| "loss": 0.039990205764770505, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 1.0440130144088098, | |
| "grad_norm": 0.2795710861682892, | |
| "learning_rate": 2.3900568486538668e-05, | |
| "loss": 0.03744415760040283, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 1.0475884014444563, | |
| "grad_norm": 0.674773097038269, | |
| "learning_rate": 2.3811183810647502e-05, | |
| "loss": 0.038765432834625246, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 1.051163788480103, | |
| "grad_norm": 0.5345519185066223, | |
| "learning_rate": 2.372179913475634e-05, | |
| "loss": 0.03793670177459717, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 1.0547391755157496, | |
| "grad_norm": 0.19475312530994415, | |
| "learning_rate": 2.3632414458865174e-05, | |
| "loss": 0.03510812759399414, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.058314562551396, | |
| "grad_norm": 0.6469267010688782, | |
| "learning_rate": 2.3543029782974008e-05, | |
| "loss": 0.03977480411529541, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 1.0618899495870429, | |
| "grad_norm": 0.3818305432796478, | |
| "learning_rate": 2.3453645107082842e-05, | |
| "loss": 0.03915615558624268, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 1.0654653366226894, | |
| "grad_norm": 0.7031393051147461, | |
| "learning_rate": 2.336426043119168e-05, | |
| "loss": 0.03701666355133057, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 1.069040723658336, | |
| "grad_norm": 0.34952452778816223, | |
| "learning_rate": 2.3274875755300514e-05, | |
| "loss": 0.03564514398574829, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 1.0726161106939827, | |
| "grad_norm": 0.5351042747497559, | |
| "learning_rate": 2.3185491079409348e-05, | |
| "loss": 0.04400619983673096, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.0726161106939827, | |
| "eval_accuracy": 0.9867462864302234, | |
| "eval_f1": 0.8903530810550676, | |
| "eval_loss": 0.05216454714536667, | |
| "eval_precision": 0.8769046324564705, | |
| "eval_recall": 0.9042204525199091, | |
| "eval_runtime": 27.3869, | |
| "eval_samples_per_second": 821.559, | |
| "eval_steps_per_second": 22.821, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.0761914977296292, | |
| "grad_norm": 0.6395847201347351, | |
| "learning_rate": 2.3096106403518182e-05, | |
| "loss": 0.03795994281768799, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 1.079766884765276, | |
| "grad_norm": 0.2738804221153259, | |
| "learning_rate": 2.3006721727627016e-05, | |
| "loss": 0.034112286567687986, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 1.0833422718009225, | |
| "grad_norm": 0.36416754126548767, | |
| "learning_rate": 2.291733705173585e-05, | |
| "loss": 0.03839835166931152, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 1.086917658836569, | |
| "grad_norm": 0.8902291059494019, | |
| "learning_rate": 2.2827952375844684e-05, | |
| "loss": 0.04109617233276367, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 1.0904930458722157, | |
| "grad_norm": 0.47186803817749023, | |
| "learning_rate": 2.2738567699953522e-05, | |
| "loss": 0.03920984029769897, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.0940684329078623, | |
| "grad_norm": 3.810819625854492, | |
| "learning_rate": 2.2649183024062356e-05, | |
| "loss": 0.0391163420677185, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 1.0976438199435088, | |
| "grad_norm": 0.8752216696739197, | |
| "learning_rate": 2.255979834817119e-05, | |
| "loss": 0.038404548168182374, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 1.1012192069791555, | |
| "grad_norm": 0.2776939570903778, | |
| "learning_rate": 2.2470413672280025e-05, | |
| "loss": 0.037470765113830566, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 1.104794594014802, | |
| "grad_norm": 0.549679160118103, | |
| "learning_rate": 2.2381028996388862e-05, | |
| "loss": 0.03804266691207886, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 1.1083699810504486, | |
| "grad_norm": 0.7605739235877991, | |
| "learning_rate": 2.2291644320497696e-05, | |
| "loss": 0.03416654348373413, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.1119453680860953, | |
| "grad_norm": 0.16704197227954865, | |
| "learning_rate": 2.220225964460653e-05, | |
| "loss": 0.034537038803100585, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 1.1155207551217419, | |
| "grad_norm": 0.5772648453712463, | |
| "learning_rate": 2.2112874968715365e-05, | |
| "loss": 0.03786729097366333, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 1.1190961421573886, | |
| "grad_norm": 0.3576936423778534, | |
| "learning_rate": 2.2023490292824202e-05, | |
| "loss": 0.04146803379058838, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 1.1226715291930351, | |
| "grad_norm": 0.24434928596019745, | |
| "learning_rate": 2.1934105616933033e-05, | |
| "loss": 0.03837924718856812, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 1.1262469162286817, | |
| "grad_norm": 0.8151653409004211, | |
| "learning_rate": 2.1844720941041867e-05, | |
| "loss": 0.03402991771697998, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.1298223032643284, | |
| "grad_norm": 0.803303062915802, | |
| "learning_rate": 2.17553362651507e-05, | |
| "loss": 0.03701550483703613, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 1.133397690299975, | |
| "grad_norm": 0.5276838541030884, | |
| "learning_rate": 2.166595158925954e-05, | |
| "loss": 0.037687735557556154, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 1.1369730773356217, | |
| "grad_norm": 1.2563331127166748, | |
| "learning_rate": 2.1576566913368373e-05, | |
| "loss": 0.04105483055114746, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 1.1405484643712682, | |
| "grad_norm": 2.2794508934020996, | |
| "learning_rate": 2.1487182237477207e-05, | |
| "loss": 0.03871995687484741, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 1.1441238514069147, | |
| "grad_norm": 0.5270197987556458, | |
| "learning_rate": 2.139779756158604e-05, | |
| "loss": 0.03748847007751465, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.1476992384425615, | |
| "grad_norm": 0.4776967763900757, | |
| "learning_rate": 2.130841288569488e-05, | |
| "loss": 0.04054388523101807, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 1.151274625478208, | |
| "grad_norm": 0.281562864780426, | |
| "learning_rate": 2.1219028209803713e-05, | |
| "loss": 0.03565767288208008, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 1.1548500125138546, | |
| "grad_norm": 0.986331582069397, | |
| "learning_rate": 2.1129643533912547e-05, | |
| "loss": 0.03515695333480835, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 1.1584253995495013, | |
| "grad_norm": 1.0339690446853638, | |
| "learning_rate": 2.104025885802138e-05, | |
| "loss": 0.03650200843811035, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 1.1620007865851478, | |
| "grad_norm": 0.6622812747955322, | |
| "learning_rate": 2.095087418213022e-05, | |
| "loss": 0.03963910102844238, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.1620007865851478, | |
| "eval_accuracy": 0.9872691132930045, | |
| "eval_f1": 0.8901178950048444, | |
| "eval_loss": 0.050942763686180115, | |
| "eval_precision": 0.8760841419442859, | |
| "eval_recall": 0.904608573153552, | |
| "eval_runtime": 27.7471, | |
| "eval_samples_per_second": 810.896, | |
| "eval_steps_per_second": 22.525, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.1655761736207944, | |
| "grad_norm": 0.4157122075557709, | |
| "learning_rate": 2.0861489506239053e-05, | |
| "loss": 0.03528056383132935, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 1.169151560656441, | |
| "grad_norm": 1.0833650827407837, | |
| "learning_rate": 2.0772104830347887e-05, | |
| "loss": 0.0338432765007019, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 1.1727269476920876, | |
| "grad_norm": 0.6234818696975708, | |
| "learning_rate": 2.068272015445672e-05, | |
| "loss": 0.03545186996459961, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 1.1763023347277342, | |
| "grad_norm": 0.46430152654647827, | |
| "learning_rate": 2.0593335478565555e-05, | |
| "loss": 0.03938552379608154, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 1.179877721763381, | |
| "grad_norm": 0.32441213726997375, | |
| "learning_rate": 2.050395080267439e-05, | |
| "loss": 0.03765884399414063, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.1834531087990274, | |
| "grad_norm": 0.5149340033531189, | |
| "learning_rate": 2.0414566126783224e-05, | |
| "loss": 0.04111374378204346, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 1.1870284958346742, | |
| "grad_norm": 0.6311440467834473, | |
| "learning_rate": 2.032518145089206e-05, | |
| "loss": 0.03235443115234375, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 1.1906038828703207, | |
| "grad_norm": 0.41769224405288696, | |
| "learning_rate": 2.0235796775000895e-05, | |
| "loss": 0.03542477607727051, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 1.1941792699059672, | |
| "grad_norm": 1.399487853050232, | |
| "learning_rate": 2.014641209910973e-05, | |
| "loss": 0.03973909854888916, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 1.197754656941614, | |
| "grad_norm": 0.44740626215934753, | |
| "learning_rate": 2.0057027423218564e-05, | |
| "loss": 0.03419320821762085, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.2013300439772605, | |
| "grad_norm": 0.7771443128585815, | |
| "learning_rate": 1.99676427473274e-05, | |
| "loss": 0.03688163042068481, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 1.2049054310129073, | |
| "grad_norm": 0.33263227343559265, | |
| "learning_rate": 1.9878258071436235e-05, | |
| "loss": 0.0361082911491394, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 1.2084808180485538, | |
| "grad_norm": 0.586033821105957, | |
| "learning_rate": 1.978887339554507e-05, | |
| "loss": 0.037032432556152343, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 1.2120562050842003, | |
| "grad_norm": 0.17661893367767334, | |
| "learning_rate": 1.9699488719653904e-05, | |
| "loss": 0.03797416687011719, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 1.215631592119847, | |
| "grad_norm": 0.6682581305503845, | |
| "learning_rate": 1.9610104043762738e-05, | |
| "loss": 0.03688710927963257, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.2192069791554936, | |
| "grad_norm": 0.33618828654289246, | |
| "learning_rate": 1.9520719367871572e-05, | |
| "loss": 0.03531041145324707, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 1.2227823661911401, | |
| "grad_norm": 0.2299039363861084, | |
| "learning_rate": 1.9431334691980406e-05, | |
| "loss": 0.037303669452667235, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 1.2263577532267869, | |
| "grad_norm": 0.38670745491981506, | |
| "learning_rate": 1.934195001608924e-05, | |
| "loss": 0.03624207735061646, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 1.2299331402624334, | |
| "grad_norm": 0.28273847699165344, | |
| "learning_rate": 1.9252565340198078e-05, | |
| "loss": 0.03737942218780518, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 1.23350852729808, | |
| "grad_norm": 0.1840369552373886, | |
| "learning_rate": 1.9163180664306912e-05, | |
| "loss": 0.04193697929382324, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.2370839143337267, | |
| "grad_norm": 0.3581949770450592, | |
| "learning_rate": 1.9073795988415746e-05, | |
| "loss": 0.03538564205169678, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 1.2406593013693732, | |
| "grad_norm": 0.47306036949157715, | |
| "learning_rate": 1.898441131252458e-05, | |
| "loss": 0.03894999265670776, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 1.24423468840502, | |
| "grad_norm": 0.961359977722168, | |
| "learning_rate": 1.8895026636633418e-05, | |
| "loss": 0.03768787622451782, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 1.2478100754406665, | |
| "grad_norm": 0.873396098613739, | |
| "learning_rate": 1.8805641960742252e-05, | |
| "loss": 0.03798648834228516, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 1.251385462476313, | |
| "grad_norm": 0.27755600214004517, | |
| "learning_rate": 1.8716257284851086e-05, | |
| "loss": 0.03826235771179199, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.251385462476313, | |
| "eval_accuracy": 0.9878845131214289, | |
| "eval_f1": 0.8920698296733638, | |
| "eval_loss": 0.04892827197909355, | |
| "eval_precision": 0.8788428276516208, | |
| "eval_recall": 0.9057010608630653, | |
| "eval_runtime": 27.2527, | |
| "eval_samples_per_second": 825.607, | |
| "eval_steps_per_second": 22.934, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.2549608495119597, | |
| "grad_norm": 0.19469444453716278, | |
| "learning_rate": 1.862687260895992e-05, | |
| "loss": 0.037444868087768556, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 1.2585362365476063, | |
| "grad_norm": 0.7563005685806274, | |
| "learning_rate": 1.8537487933068755e-05, | |
| "loss": 0.03585953950881958, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 1.262111623583253, | |
| "grad_norm": 0.748693585395813, | |
| "learning_rate": 1.844810325717759e-05, | |
| "loss": 0.036836111545562746, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 1.2656870106188995, | |
| "grad_norm": 0.2749057114124298, | |
| "learning_rate": 1.8358718581286423e-05, | |
| "loss": 0.035653345584869385, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 1.269262397654546, | |
| "grad_norm": 0.46990424394607544, | |
| "learning_rate": 1.826933390539526e-05, | |
| "loss": 0.038926541805267334, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.2728377846901928, | |
| "grad_norm": 0.5694590210914612, | |
| "learning_rate": 1.8179949229504095e-05, | |
| "loss": 0.041200418472290036, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 1.2764131717258393, | |
| "grad_norm": 0.44198593497276306, | |
| "learning_rate": 1.809056455361293e-05, | |
| "loss": 0.03306173086166382, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 1.2799885587614859, | |
| "grad_norm": 1.5265918970108032, | |
| "learning_rate": 1.8001179877721763e-05, | |
| "loss": 0.03929618358612061, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 1.2835639457971326, | |
| "grad_norm": 0.568000078201294, | |
| "learning_rate": 1.79117952018306e-05, | |
| "loss": 0.035215189456939695, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 1.2871393328327791, | |
| "grad_norm": 0.3256838619709015, | |
| "learning_rate": 1.7822410525939435e-05, | |
| "loss": 0.03693248510360718, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.2907147198684257, | |
| "grad_norm": 0.37276744842529297, | |
| "learning_rate": 1.773302585004827e-05, | |
| "loss": 0.038254330158233645, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 1.2942901069040724, | |
| "grad_norm": 0.9104180335998535, | |
| "learning_rate": 1.7643641174157103e-05, | |
| "loss": 0.03706887722015381, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 1.297865493939719, | |
| "grad_norm": 0.855074942111969, | |
| "learning_rate": 1.755425649826594e-05, | |
| "loss": 0.039341244697570804, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 1.3014408809753655, | |
| "grad_norm": 1.0919744968414307, | |
| "learning_rate": 1.7464871822374775e-05, | |
| "loss": 0.03796007394790649, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 1.3050162680110122, | |
| "grad_norm": 0.4765317142009735, | |
| "learning_rate": 1.737548714648361e-05, | |
| "loss": 0.03425301790237427, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.3085916550466588, | |
| "grad_norm": 0.28184378147125244, | |
| "learning_rate": 1.728610247059244e-05, | |
| "loss": 0.03511073589324951, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 1.3121670420823053, | |
| "grad_norm": 0.26926326751708984, | |
| "learning_rate": 1.7196717794701277e-05, | |
| "loss": 0.03917940616607666, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 1.315742429117952, | |
| "grad_norm": 2.2863128185272217, | |
| "learning_rate": 1.710733311881011e-05, | |
| "loss": 0.03748450517654419, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 1.3193178161535986, | |
| "grad_norm": 0.47158753871917725, | |
| "learning_rate": 1.7017948442918946e-05, | |
| "loss": 0.034841620922088624, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 1.3228932031892453, | |
| "grad_norm": 0.3611966371536255, | |
| "learning_rate": 1.692856376702778e-05, | |
| "loss": 0.03597846508026123, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.3264685902248918, | |
| "grad_norm": 0.19897930324077606, | |
| "learning_rate": 1.6839179091136617e-05, | |
| "loss": 0.0384373140335083, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 1.3300439772605386, | |
| "grad_norm": 0.4929654002189636, | |
| "learning_rate": 1.674979441524545e-05, | |
| "loss": 0.03474846363067627, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 1.333619364296185, | |
| "grad_norm": 1.4330233335494995, | |
| "learning_rate": 1.6660409739354286e-05, | |
| "loss": 0.03804588317871094, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 1.3371947513318316, | |
| "grad_norm": 0.7935028076171875, | |
| "learning_rate": 1.6571025063463123e-05, | |
| "loss": 0.036091580390930175, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 1.3407701383674784, | |
| "grad_norm": 0.6093057990074158, | |
| "learning_rate": 1.6481640387571957e-05, | |
| "loss": 0.036958491802215575, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.3407701383674784, | |
| "eval_accuracy": 0.9876767499314908, | |
| "eval_f1": 0.8963199795830114, | |
| "eval_loss": 0.048646602779626846, | |
| "eval_precision": 0.8842404151455387, | |
| "eval_recall": 0.9087341517407929, | |
| "eval_runtime": 27.7954, | |
| "eval_samples_per_second": 809.486, | |
| "eval_steps_per_second": 22.486, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.344345525403125, | |
| "grad_norm": 0.530693531036377, | |
| "learning_rate": 1.639225571168079e-05, | |
| "loss": 0.040179696083068844, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 1.3479209124387714, | |
| "grad_norm": 0.70650714635849, | |
| "learning_rate": 1.6302871035789626e-05, | |
| "loss": 0.03599003076553345, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 1.3514962994744182, | |
| "grad_norm": 0.673740029335022, | |
| "learning_rate": 1.621348635989846e-05, | |
| "loss": 0.03707956552505493, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 1.3550716865100647, | |
| "grad_norm": 0.28047823905944824, | |
| "learning_rate": 1.6124101684007294e-05, | |
| "loss": 0.034383256435394284, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 1.3586470735457112, | |
| "grad_norm": 0.4644497036933899, | |
| "learning_rate": 1.6034717008116128e-05, | |
| "loss": 0.039096081256866456, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.362222460581358, | |
| "grad_norm": 0.2905023992061615, | |
| "learning_rate": 1.5945332332224962e-05, | |
| "loss": 0.031935737133026124, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 1.3657978476170045, | |
| "grad_norm": 0.519289493560791, | |
| "learning_rate": 1.58559476563338e-05, | |
| "loss": 0.03160768747329712, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 1.369373234652651, | |
| "grad_norm": 0.4803026616573334, | |
| "learning_rate": 1.5766562980442634e-05, | |
| "loss": 0.03475278615951538, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 1.3729486216882978, | |
| "grad_norm": 0.2219659686088562, | |
| "learning_rate": 1.5677178304551468e-05, | |
| "loss": 0.03382747411727905, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 1.3765240087239443, | |
| "grad_norm": 0.9020390510559082, | |
| "learning_rate": 1.5587793628660302e-05, | |
| "loss": 0.03778740644454956, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.3800993957595908, | |
| "grad_norm": 0.4074041247367859, | |
| "learning_rate": 1.549840895276914e-05, | |
| "loss": 0.03417648077011108, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 1.3836747827952376, | |
| "grad_norm": 0.2950891852378845, | |
| "learning_rate": 1.5409024276877974e-05, | |
| "loss": 0.037335121631622316, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 1.3872501698308841, | |
| "grad_norm": 0.5112789869308472, | |
| "learning_rate": 1.5319639600986808e-05, | |
| "loss": 0.03443581342697143, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 1.3908255568665309, | |
| "grad_norm": 0.6883418560028076, | |
| "learning_rate": 1.523025492509564e-05, | |
| "loss": 0.03647557497024536, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 1.3944009439021774, | |
| "grad_norm": 0.22857694327831268, | |
| "learning_rate": 1.5140870249204478e-05, | |
| "loss": 0.03520648956298828, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.3979763309378241, | |
| "grad_norm": 1.4312663078308105, | |
| "learning_rate": 1.5051485573313312e-05, | |
| "loss": 0.031425106525421145, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 1.4015517179734707, | |
| "grad_norm": 0.7821195125579834, | |
| "learning_rate": 1.4962100897422146e-05, | |
| "loss": 0.03315335750579834, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 1.4051271050091172, | |
| "grad_norm": 0.27848535776138306, | |
| "learning_rate": 1.487271622153098e-05, | |
| "loss": 0.033316426277160645, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 1.408702492044764, | |
| "grad_norm": 0.6713240146636963, | |
| "learning_rate": 1.4783331545639816e-05, | |
| "loss": 0.033266935348510746, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 1.4122778790804105, | |
| "grad_norm": 3.596701145172119, | |
| "learning_rate": 1.469394686974865e-05, | |
| "loss": 0.03419414281845093, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.415853266116057, | |
| "grad_norm": 1.069840908050537, | |
| "learning_rate": 1.4604562193857485e-05, | |
| "loss": 0.035397300720214846, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 1.4194286531517037, | |
| "grad_norm": 0.2466162145137787, | |
| "learning_rate": 1.4515177517966322e-05, | |
| "loss": 0.0358107590675354, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 1.4230040401873503, | |
| "grad_norm": 0.5182567834854126, | |
| "learning_rate": 1.4425792842075156e-05, | |
| "loss": 0.03377439260482788, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 1.4265794272229968, | |
| "grad_norm": 0.8782963752746582, | |
| "learning_rate": 1.433640816618399e-05, | |
| "loss": 0.03737137794494629, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 1.4301548142586435, | |
| "grad_norm": 0.2662527561187744, | |
| "learning_rate": 1.4247023490292825e-05, | |
| "loss": 0.035046143531799315, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.4301548142586435, | |
| "eval_accuracy": 0.9874205963276935, | |
| "eval_f1": 0.8909038185431681, | |
| "eval_loss": 0.048918217420578, | |
| "eval_precision": 0.8769057265778372, | |
| "eval_recall": 0.9053560647442717, | |
| "eval_runtime": 27.308, | |
| "eval_samples_per_second": 823.934, | |
| "eval_steps_per_second": 22.887, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.43373020129429, | |
| "grad_norm": 4.632917404174805, | |
| "learning_rate": 1.415763881440166e-05, | |
| "loss": 0.03318638563156128, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 1.4373055883299366, | |
| "grad_norm": 0.34400591254234314, | |
| "learning_rate": 1.4068254138510495e-05, | |
| "loss": 0.03380630970001221, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 1.4408809753655833, | |
| "grad_norm": 0.3949352204799652, | |
| "learning_rate": 1.3978869462619329e-05, | |
| "loss": 0.035887646675109866, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 1.4444563624012299, | |
| "grad_norm": 0.21083228290081024, | |
| "learning_rate": 1.3889484786728163e-05, | |
| "loss": 0.02981067180633545, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 1.4480317494368766, | |
| "grad_norm": 0.5403398871421814, | |
| "learning_rate": 1.3800100110836999e-05, | |
| "loss": 0.03951683759689331, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.4516071364725232, | |
| "grad_norm": 0.37334415316581726, | |
| "learning_rate": 1.3710715434945833e-05, | |
| "loss": 0.03376241683959961, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 1.4551825235081697, | |
| "grad_norm": 0.6374111771583557, | |
| "learning_rate": 1.3621330759054667e-05, | |
| "loss": 0.035758087635040285, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 1.4587579105438164, | |
| "grad_norm": 0.4704621434211731, | |
| "learning_rate": 1.3531946083163501e-05, | |
| "loss": 0.03579946041107178, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 1.462333297579463, | |
| "grad_norm": 0.31890979409217834, | |
| "learning_rate": 1.3442561407272339e-05, | |
| "loss": 0.036891818046569824, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 1.4659086846151097, | |
| "grad_norm": 0.36003023386001587, | |
| "learning_rate": 1.3353176731381173e-05, | |
| "loss": 0.03722346544265747, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.4694840716507562, | |
| "grad_norm": 0.3868881165981293, | |
| "learning_rate": 1.3263792055490007e-05, | |
| "loss": 0.03188649654388428, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 1.4730594586864028, | |
| "grad_norm": 0.1989583820104599, | |
| "learning_rate": 1.3174407379598841e-05, | |
| "loss": 0.03385810136795044, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 1.4766348457220495, | |
| "grad_norm": 1.653865933418274, | |
| "learning_rate": 1.3085022703707677e-05, | |
| "loss": 0.033811585903167726, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 1.480210232757696, | |
| "grad_norm": 0.4005359709262848, | |
| "learning_rate": 1.2995638027816512e-05, | |
| "loss": 0.034179413318634035, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 1.4837856197933426, | |
| "grad_norm": 0.40698060393333435, | |
| "learning_rate": 1.2906253351925346e-05, | |
| "loss": 0.0344992733001709, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.4873610068289893, | |
| "grad_norm": 0.23063120245933533, | |
| "learning_rate": 1.281686867603418e-05, | |
| "loss": 0.036886801719665525, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 1.4909363938646358, | |
| "grad_norm": 0.36372461915016174, | |
| "learning_rate": 1.2727484000143017e-05, | |
| "loss": 0.03418808460235596, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 1.4945117809002824, | |
| "grad_norm": 3.4656498432159424, | |
| "learning_rate": 1.2638099324251852e-05, | |
| "loss": 0.035131211280822756, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 1.498087167935929, | |
| "grad_norm": 0.5397525429725647, | |
| "learning_rate": 1.2548714648360686e-05, | |
| "loss": 0.032310936450958255, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 1.5016625549715756, | |
| "grad_norm": 0.803663969039917, | |
| "learning_rate": 1.245932997246952e-05, | |
| "loss": 0.034068484306335446, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.5052379420072222, | |
| "grad_norm": 0.44578149914741516, | |
| "learning_rate": 1.2369945296578356e-05, | |
| "loss": 0.033568575382232665, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 1.508813329042869, | |
| "grad_norm": 0.3740385174751282, | |
| "learning_rate": 1.228056062068719e-05, | |
| "loss": 0.0316014552116394, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 1.5123887160785157, | |
| "grad_norm": 0.7885581254959106, | |
| "learning_rate": 1.2191175944796026e-05, | |
| "loss": 0.036092112064361574, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 1.515964103114162, | |
| "grad_norm": 0.2616823613643646, | |
| "learning_rate": 1.210179126890486e-05, | |
| "loss": 0.0364843225479126, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 1.5195394901498087, | |
| "grad_norm": 1.1933097839355469, | |
| "learning_rate": 1.2012406593013694e-05, | |
| "loss": 0.032956657409667967, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.5195394901498087, | |
| "eval_accuracy": 0.9878976626904123, | |
| "eval_f1": 0.8964901338171921, | |
| "eval_loss": 0.04782980680465698, | |
| "eval_precision": 0.8842314252957132, | |
| "eval_recall": 0.9090935226978697, | |
| "eval_runtime": 27.8912, | |
| "eval_samples_per_second": 806.706, | |
| "eval_steps_per_second": 22.409, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.5231148771854555, | |
| "grad_norm": 1.002236247062683, | |
| "learning_rate": 1.1923021917122528e-05, | |
| "loss": 0.03267708301544189, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 1.526690264221102, | |
| "grad_norm": 0.2965432405471802, | |
| "learning_rate": 1.1833637241231364e-05, | |
| "loss": 0.03969228982925415, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 1.5302656512567485, | |
| "grad_norm": 0.35980096459388733, | |
| "learning_rate": 1.1744252565340198e-05, | |
| "loss": 0.033807692527770994, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 1.5338410382923953, | |
| "grad_norm": 0.4036603271961212, | |
| "learning_rate": 1.1654867889449034e-05, | |
| "loss": 0.036050264835357664, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 1.5374164253280418, | |
| "grad_norm": 0.4341689348220825, | |
| "learning_rate": 1.1565483213557868e-05, | |
| "loss": 0.03344399690628052, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.5409918123636883, | |
| "grad_norm": 0.35666847229003906, | |
| "learning_rate": 1.1476098537666702e-05, | |
| "loss": 0.035790588855743405, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 1.544567199399335, | |
| "grad_norm": 2.009552001953125, | |
| "learning_rate": 1.1386713861775537e-05, | |
| "loss": 0.03580213069915771, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 1.5481425864349816, | |
| "grad_norm": 0.9199197888374329, | |
| "learning_rate": 1.1297329185884372e-05, | |
| "loss": 0.035557851791381836, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 1.5517179734706281, | |
| "grad_norm": 0.3379763662815094, | |
| "learning_rate": 1.1207944509993207e-05, | |
| "loss": 0.037502107620239256, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 1.5552933605062749, | |
| "grad_norm": 0.4002296030521393, | |
| "learning_rate": 1.1118559834102042e-05, | |
| "loss": 0.03514168262481689, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.5588687475419214, | |
| "grad_norm": 0.44335803389549255, | |
| "learning_rate": 1.1029175158210877e-05, | |
| "loss": 0.03210949659347534, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 1.562444134577568, | |
| "grad_norm": 0.3367313742637634, | |
| "learning_rate": 1.0939790482319712e-05, | |
| "loss": 0.03381946325302124, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 1.5660195216132147, | |
| "grad_norm": 0.3180839419364929, | |
| "learning_rate": 1.0850405806428547e-05, | |
| "loss": 0.033136572837829587, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 1.5695949086488612, | |
| "grad_norm": 0.49929025769233704, | |
| "learning_rate": 1.076102113053738e-05, | |
| "loss": 0.03284239530563354, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 1.5731702956845077, | |
| "grad_norm": 0.36956411600112915, | |
| "learning_rate": 1.0671636454646217e-05, | |
| "loss": 0.032391068935394285, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.5767456827201545, | |
| "grad_norm": 0.3806305527687073, | |
| "learning_rate": 1.058225177875505e-05, | |
| "loss": 0.03159698247909546, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 1.5803210697558012, | |
| "grad_norm": 0.24886535108089447, | |
| "learning_rate": 1.0492867102863887e-05, | |
| "loss": 0.03376968622207641, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 1.5838964567914475, | |
| "grad_norm": 0.8062007427215576, | |
| "learning_rate": 1.040348242697272e-05, | |
| "loss": 0.031125342845916747, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 1.5874718438270943, | |
| "grad_norm": 0.32632651925086975, | |
| "learning_rate": 1.0314097751081555e-05, | |
| "loss": 0.032405462265014645, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 1.591047230862741, | |
| "grad_norm": 0.9697968363761902, | |
| "learning_rate": 1.0224713075190389e-05, | |
| "loss": 0.0316835880279541, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 1.5946226178983876, | |
| "grad_norm": 0.7041149735450745, | |
| "learning_rate": 1.0135328399299225e-05, | |
| "loss": 0.03227449417114258, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 1.598198004934034, | |
| "grad_norm": 1.0169494152069092, | |
| "learning_rate": 1.0045943723408059e-05, | |
| "loss": 0.03636837244033814, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 1.6017733919696808, | |
| "grad_norm": 1.4278594255447388, | |
| "learning_rate": 9.956559047516895e-06, | |
| "loss": 0.036050994396209714, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 1.6053487790053274, | |
| "grad_norm": 0.21218614280223846, | |
| "learning_rate": 9.867174371625729e-06, | |
| "loss": 0.03155009746551514, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 1.6089241660409739, | |
| "grad_norm": 0.2901414930820465, | |
| "learning_rate": 9.777789695734563e-06, | |
| "loss": 0.030832624435424803, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.6089241660409739, | |
| "eval_accuracy": 0.9888076128640655, | |
| "eval_f1": 0.9007828635915198, | |
| "eval_loss": 0.04577971622347832, | |
| "eval_precision": 0.8896631009295218, | |
| "eval_recall": 0.9121841129287296, | |
| "eval_runtime": 27.4639, | |
| "eval_samples_per_second": 819.259, | |
| "eval_steps_per_second": 22.757, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.6124995530766206, | |
| "grad_norm": 0.6742628812789917, | |
| "learning_rate": 9.688405019843397e-06, | |
| "loss": 0.03396400213241577, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 1.6160749401122672, | |
| "grad_norm": 0.30497708916664124, | |
| "learning_rate": 9.599020343952233e-06, | |
| "loss": 0.030751326084136964, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 1.6196503271479137, | |
| "grad_norm": 0.33833158016204834, | |
| "learning_rate": 9.509635668061067e-06, | |
| "loss": 0.03044323444366455, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 1.6232257141835604, | |
| "grad_norm": 0.35390418767929077, | |
| "learning_rate": 9.420250992169903e-06, | |
| "loss": 0.03425618410110474, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 1.626801101219207, | |
| "grad_norm": 0.6008805632591248, | |
| "learning_rate": 9.330866316278737e-06, | |
| "loss": 0.03422411203384399, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 1.6303764882548535, | |
| "grad_norm": 0.7057814598083496, | |
| "learning_rate": 9.241481640387573e-06, | |
| "loss": 0.03580734968185425, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 1.6339518752905002, | |
| "grad_norm": 0.6222581267356873, | |
| "learning_rate": 9.152096964496407e-06, | |
| "loss": 0.03245258092880249, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 1.6375272623261468, | |
| "grad_norm": 0.19113455712795258, | |
| "learning_rate": 9.062712288605242e-06, | |
| "loss": 0.03314180135726929, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 1.6411026493617933, | |
| "grad_norm": 0.35139983892440796, | |
| "learning_rate": 8.973327612714076e-06, | |
| "loss": 0.03314854860305786, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 1.64467803639744, | |
| "grad_norm": 2.3638358116149902, | |
| "learning_rate": 8.883942936822912e-06, | |
| "loss": 0.03374920845031738, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.6482534234330868, | |
| "grad_norm": 0.3906150162220001, | |
| "learning_rate": 8.794558260931746e-06, | |
| "loss": 0.030902385711669922, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 1.651828810468733, | |
| "grad_norm": 1.5684771537780762, | |
| "learning_rate": 8.705173585040582e-06, | |
| "loss": 0.03261609077453613, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 1.6554041975043798, | |
| "grad_norm": 0.5489705801010132, | |
| "learning_rate": 8.615788909149416e-06, | |
| "loss": 0.032475869655609134, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 1.6589795845400266, | |
| "grad_norm": 0.4629211127758026, | |
| "learning_rate": 8.52640423325825e-06, | |
| "loss": 0.0343438458442688, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 1.6625549715756731, | |
| "grad_norm": 0.35416728258132935, | |
| "learning_rate": 8.437019557367086e-06, | |
| "loss": 0.030291988849639892, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.6661303586113196, | |
| "grad_norm": 0.3730672597885132, | |
| "learning_rate": 8.34763488147592e-06, | |
| "loss": 0.03114586353302002, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 1.6697057456469664, | |
| "grad_norm": 0.8023098111152649, | |
| "learning_rate": 8.258250205584756e-06, | |
| "loss": 0.031157519817352295, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 1.673281132682613, | |
| "grad_norm": 0.3616831600666046, | |
| "learning_rate": 8.16886552969359e-06, | |
| "loss": 0.03633548498153687, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 1.6768565197182594, | |
| "grad_norm": 0.2969978451728821, | |
| "learning_rate": 8.079480853802424e-06, | |
| "loss": 0.030888726711273195, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 1.6804319067539062, | |
| "grad_norm": 0.5954911708831787, | |
| "learning_rate": 7.990096177911258e-06, | |
| "loss": 0.02800543785095215, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.6840072937895527, | |
| "grad_norm": 0.28519004583358765, | |
| "learning_rate": 7.900711502020094e-06, | |
| "loss": 0.0348360013961792, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 1.6875826808251992, | |
| "grad_norm": 3.0812149047851562, | |
| "learning_rate": 7.811326826128928e-06, | |
| "loss": 0.03429551839828491, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 1.691158067860846, | |
| "grad_norm": 0.3664245903491974, | |
| "learning_rate": 7.721942150237764e-06, | |
| "loss": 0.03342988014221191, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 1.6947334548964925, | |
| "grad_norm": 0.4746117889881134, | |
| "learning_rate": 7.632557474346598e-06, | |
| "loss": 0.031046552658081053, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 1.698308841932139, | |
| "grad_norm": 0.26298218965530396, | |
| "learning_rate": 7.543172798455433e-06, | |
| "loss": 0.03168731689453125, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.698308841932139, | |
| "eval_accuracy": 0.9886813770018247, | |
| "eval_f1": 0.8991618091307493, | |
| "eval_loss": 0.04538652300834656, | |
| "eval_precision": 0.8872949672507418, | |
| "eval_recall": 0.9113503723083115, | |
| "eval_runtime": 27.7648, | |
| "eval_samples_per_second": 810.377, | |
| "eval_steps_per_second": 22.51, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.7018842289677858, | |
| "grad_norm": 1.6149009466171265, | |
| "learning_rate": 7.4537881225642675e-06, | |
| "loss": 0.035295097827911376, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 1.7054596160034323, | |
| "grad_norm": 0.37669169902801514, | |
| "learning_rate": 7.3644034466731025e-06, | |
| "loss": 0.03364665269851685, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 1.7090350030390788, | |
| "grad_norm": 0.5029271841049194, | |
| "learning_rate": 7.275018770781937e-06, | |
| "loss": 0.032778596878051756, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 1.7126103900747256, | |
| "grad_norm": 0.265184611082077, | |
| "learning_rate": 7.1856340948907725e-06, | |
| "loss": 0.033551807403564456, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 1.7161857771103723, | |
| "grad_norm": 0.5929502248764038, | |
| "learning_rate": 7.096249418999607e-06, | |
| "loss": 0.033371658325195314, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.7197611641460187, | |
| "grad_norm": 0.6151393055915833, | |
| "learning_rate": 7.006864743108442e-06, | |
| "loss": 0.034622840881347657, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 1.7233365511816654, | |
| "grad_norm": Infinity, | |
| "learning_rate": 6.917480067217276e-06, | |
| "loss": 0.032550268173217774, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 1.7269119382173121, | |
| "grad_norm": 3.7852137088775635, | |
| "learning_rate": 6.828095391326112e-06, | |
| "loss": 0.03138866424560547, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 1.7304873252529587, | |
| "grad_norm": 0.1753600835800171, | |
| "learning_rate": 6.738710715434946e-06, | |
| "loss": 0.03186697244644165, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 1.7340627122886052, | |
| "grad_norm": 6.609533786773682, | |
| "learning_rate": 6.649326039543781e-06, | |
| "loss": 0.031199581623077392, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.737638099324252, | |
| "grad_norm": 1.9689279794692993, | |
| "learning_rate": 6.559941363652617e-06, | |
| "loss": 0.03473323583602905, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 1.7412134863598985, | |
| "grad_norm": 1.0971671342849731, | |
| "learning_rate": 6.47055668776145e-06, | |
| "loss": 0.031001167297363283, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 1.744788873395545, | |
| "grad_norm": 0.5941652655601501, | |
| "learning_rate": 6.381172011870286e-06, | |
| "loss": 0.03148573875427246, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 1.7483642604311918, | |
| "grad_norm": 1.0142033100128174, | |
| "learning_rate": 6.29178733597912e-06, | |
| "loss": 0.03321949720382691, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 1.7519396474668383, | |
| "grad_norm": 1.1377204656600952, | |
| "learning_rate": 6.202402660087954e-06, | |
| "loss": 0.03343360424041748, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.7555150345024848, | |
| "grad_norm": 0.5484851002693176, | |
| "learning_rate": 6.113017984196789e-06, | |
| "loss": 0.03009215831756592, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 1.7590904215381316, | |
| "grad_norm": 0.4845998287200928, | |
| "learning_rate": 6.023633308305624e-06, | |
| "loss": 0.03416025161743164, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 1.762665808573778, | |
| "grad_norm": 2.4999592304229736, | |
| "learning_rate": 5.934248632414459e-06, | |
| "loss": 0.03311382532119751, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 1.7662411956094246, | |
| "grad_norm": 0.8577232956886292, | |
| "learning_rate": 5.844863956523293e-06, | |
| "loss": 0.030206308364868165, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 1.7698165826450714, | |
| "grad_norm": 0.90534508228302, | |
| "learning_rate": 5.755479280632128e-06, | |
| "loss": 0.03304917335510254, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 1.7733919696807179, | |
| "grad_norm": 0.4702795445919037, | |
| "learning_rate": 5.666094604740963e-06, | |
| "loss": 0.03289535760879517, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 1.7769673567163644, | |
| "grad_norm": 0.3340344727039337, | |
| "learning_rate": 5.5767099288497984e-06, | |
| "loss": 0.03143750667572021, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 1.7805427437520112, | |
| "grad_norm": 0.8033680319786072, | |
| "learning_rate": 5.4873252529586334e-06, | |
| "loss": 0.03799154043197632, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 1.784118130787658, | |
| "grad_norm": 0.3498431444168091, | |
| "learning_rate": 5.3979405770674684e-06, | |
| "loss": 0.032227945327758786, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 1.7876935178233042, | |
| "grad_norm": 0.5044463276863098, | |
| "learning_rate": 5.308555901176303e-06, | |
| "loss": 0.03224561214447021, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.7876935178233042, | |
| "eval_accuracy": 0.9888107687606216, | |
| "eval_f1": 0.9007455797770362, | |
| "eval_loss": 0.04468328878283501, | |
| "eval_precision": 0.890014593623709, | |
| "eval_recall": 0.9117384929419544, | |
| "eval_runtime": 27.446, | |
| "eval_samples_per_second": 819.793, | |
| "eval_steps_per_second": 22.772, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.791268904858951, | |
| "grad_norm": 0.2692296504974365, | |
| "learning_rate": 5.219171225285138e-06, | |
| "loss": 0.03332348108291626, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 1.7948442918945977, | |
| "grad_norm": 0.29106396436691284, | |
| "learning_rate": 5.129786549393973e-06, | |
| "loss": 0.032147047519683836, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 1.7984196789302442, | |
| "grad_norm": 0.20724542438983917, | |
| "learning_rate": 5.040401873502807e-06, | |
| "loss": 0.02886124849319458, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 1.8019950659658908, | |
| "grad_norm": 0.7092130184173584, | |
| "learning_rate": 4.951017197611642e-06, | |
| "loss": 0.033159823417663575, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 1.8055704530015375, | |
| "grad_norm": 0.432674765586853, | |
| "learning_rate": 4.861632521720477e-06, | |
| "loss": 0.03299700260162353, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 1.809145840037184, | |
| "grad_norm": 0.9785314798355103, | |
| "learning_rate": 4.772247845829311e-06, | |
| "loss": 0.03019791841506958, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 1.8127212270728306, | |
| "grad_norm": 0.5002002120018005, | |
| "learning_rate": 4.682863169938146e-06, | |
| "loss": 0.035624983310699465, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 1.8162966141084773, | |
| "grad_norm": 0.765285313129425, | |
| "learning_rate": 4.593478494046981e-06, | |
| "loss": 0.02971407175064087, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 1.8198720011441238, | |
| "grad_norm": 0.534965991973877, | |
| "learning_rate": 4.504093818155815e-06, | |
| "loss": 0.03354018688201904, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 1.8234473881797704, | |
| "grad_norm": 0.7223150134086609, | |
| "learning_rate": 4.41470914226465e-06, | |
| "loss": 0.02953230619430542, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 1.8270227752154171, | |
| "grad_norm": 0.38850611448287964, | |
| "learning_rate": 4.325324466373485e-06, | |
| "loss": 0.030534558296203614, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 1.8305981622510636, | |
| "grad_norm": 0.36119019985198975, | |
| "learning_rate": 4.23593979048232e-06, | |
| "loss": 0.030811927318572997, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 1.8341735492867102, | |
| "grad_norm": 0.4112676978111267, | |
| "learning_rate": 4.146555114591154e-06, | |
| "loss": 0.036168689727783206, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 1.837748936322357, | |
| "grad_norm": 0.38200223445892334, | |
| "learning_rate": 4.057170438699989e-06, | |
| "loss": 0.03023934841156006, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 1.8413243233580037, | |
| "grad_norm": 0.22987698018550873, | |
| "learning_rate": 3.967785762808824e-06, | |
| "loss": 0.03280112981796265, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 1.84489971039365, | |
| "grad_norm": 0.5126951336860657, | |
| "learning_rate": 3.8784010869176585e-06, | |
| "loss": 0.032214133739471434, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 1.8484750974292967, | |
| "grad_norm": 0.3394624888896942, | |
| "learning_rate": 3.7890164110264935e-06, | |
| "loss": 0.0288789963722229, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 1.8520504844649435, | |
| "grad_norm": 0.8338372111320496, | |
| "learning_rate": 3.699631735135328e-06, | |
| "loss": 0.03252574443817138, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 1.8556258715005898, | |
| "grad_norm": 0.2515293061733246, | |
| "learning_rate": 3.6102470592441635e-06, | |
| "loss": 0.029772815704345704, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 1.8592012585362365, | |
| "grad_norm": 0.5206916332244873, | |
| "learning_rate": 3.5208623833529985e-06, | |
| "loss": 0.030335335731506347, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.8627766455718833, | |
| "grad_norm": 2.3129968643188477, | |
| "learning_rate": 3.431477707461833e-06, | |
| "loss": 0.032417423725128174, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 1.8663520326075298, | |
| "grad_norm": 1.627025842666626, | |
| "learning_rate": 3.342093031570668e-06, | |
| "loss": 0.03170029640197754, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 1.8699274196431763, | |
| "grad_norm": 1.4574371576309204, | |
| "learning_rate": 3.2527083556795027e-06, | |
| "loss": 0.03141381978988647, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 1.873502806678823, | |
| "grad_norm": 0.3863239288330078, | |
| "learning_rate": 3.1633236797883373e-06, | |
| "loss": 0.031075146198272705, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 1.8770781937144696, | |
| "grad_norm": 0.4181801676750183, | |
| "learning_rate": 3.0739390038971723e-06, | |
| "loss": 0.031000993251800536, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 1.8770781937144696, | |
| "eval_accuracy": 0.9888013010709535, | |
| "eval_f1": 0.9016717087789566, | |
| "eval_loss": 0.04389448091387749, | |
| "eval_precision": 0.8910285200988098, | |
| "eval_recall": 0.9125722335623724, | |
| "eval_runtime": 27.8666, | |
| "eval_samples_per_second": 807.418, | |
| "eval_steps_per_second": 22.428, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 1.8806535807501161, | |
| "grad_norm": 0.2707064151763916, | |
| "learning_rate": 2.984554328006007e-06, | |
| "loss": 0.03185615539550781, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 1.8842289677857629, | |
| "grad_norm": 0.5553069710731506, | |
| "learning_rate": 2.895169652114842e-06, | |
| "loss": 0.03002817392349243, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 1.8878043548214094, | |
| "grad_norm": 0.3491911292076111, | |
| "learning_rate": 2.8057849762236764e-06, | |
| "loss": 0.028789632320404053, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 1.891379741857056, | |
| "grad_norm": 0.25187739729881287, | |
| "learning_rate": 2.716400300332511e-06, | |
| "loss": 0.030605175495147706, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 1.8949551288927027, | |
| "grad_norm": 0.9672222137451172, | |
| "learning_rate": 2.627015624441346e-06, | |
| "loss": 0.026704788208007812, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 1.8985305159283492, | |
| "grad_norm": 0.20565390586853027, | |
| "learning_rate": 2.5376309485501806e-06, | |
| "loss": 0.03059121608734131, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 1.9021059029639957, | |
| "grad_norm": 0.28167805075645447, | |
| "learning_rate": 2.448246272659015e-06, | |
| "loss": 0.03177599668502808, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 1.9056812899996425, | |
| "grad_norm": 0.24386221170425415, | |
| "learning_rate": 2.35886159676785e-06, | |
| "loss": 0.029768753051757812, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 1.9092566770352892, | |
| "grad_norm": 3.4795925617218018, | |
| "learning_rate": 2.2694769208766848e-06, | |
| "loss": 0.030632736682891844, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 1.9128320640709355, | |
| "grad_norm": 0.28710371255874634, | |
| "learning_rate": 2.1800922449855198e-06, | |
| "loss": 0.03532270431518555, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 1.9164074511065823, | |
| "grad_norm": 1.0009117126464844, | |
| "learning_rate": 2.090707569094355e-06, | |
| "loss": 0.030157883167266846, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 1.919982838142229, | |
| "grad_norm": 0.8986654877662659, | |
| "learning_rate": 2.0013228932031894e-06, | |
| "loss": 0.02968831777572632, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 1.9235582251778756, | |
| "grad_norm": 0.4408089518547058, | |
| "learning_rate": 1.9119382173120244e-06, | |
| "loss": 0.031650230884552, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 1.927133612213522, | |
| "grad_norm": 0.44061407446861267, | |
| "learning_rate": 1.822553541420859e-06, | |
| "loss": 0.03314239501953125, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 1.9307089992491688, | |
| "grad_norm": 0.31529247760772705, | |
| "learning_rate": 1.7331688655296938e-06, | |
| "loss": 0.028174445629119874, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.9342843862848154, | |
| "grad_norm": 0.46949172019958496, | |
| "learning_rate": 1.6437841896385283e-06, | |
| "loss": 0.03205679178237915, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 1.9378597733204619, | |
| "grad_norm": 0.42985737323760986, | |
| "learning_rate": 1.5543995137473631e-06, | |
| "loss": 0.03423054218292236, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 1.9414351603561086, | |
| "grad_norm": 0.3582230806350708, | |
| "learning_rate": 1.465014837856198e-06, | |
| "loss": 0.036082537174224855, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 1.9450105473917552, | |
| "grad_norm": 0.2743465304374695, | |
| "learning_rate": 1.375630161965033e-06, | |
| "loss": 0.03133800745010376, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 1.9485859344274017, | |
| "grad_norm": 0.3252977728843689, | |
| "learning_rate": 1.2862454860738675e-06, | |
| "loss": 0.029351208209991455, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 1.9521613214630484, | |
| "grad_norm": 0.7166300415992737, | |
| "learning_rate": 1.1968608101827023e-06, | |
| "loss": 0.03286364078521729, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 1.955736708498695, | |
| "grad_norm": 0.4002815783023834, | |
| "learning_rate": 1.1074761342915371e-06, | |
| "loss": 0.03330163955688477, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 1.9593120955343415, | |
| "grad_norm": 0.6636976003646851, | |
| "learning_rate": 1.018091458400372e-06, | |
| "loss": 0.03203016996383667, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 1.9628874825699882, | |
| "grad_norm": 0.9583289623260498, | |
| "learning_rate": 9.287067825092066e-07, | |
| "loss": 0.03129979610443115, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 1.9664628696056348, | |
| "grad_norm": 0.31978148221969604, | |
| "learning_rate": 8.393221066180415e-07, | |
| "loss": 0.029429452419281008, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.9664628696056348, | |
| "eval_accuracy": 0.9891878983990663, | |
| "eval_f1": 0.9045753492836575, | |
| "eval_loss": 0.04267999157309532, | |
| "eval_precision": 0.8949478748997595, | |
| "eval_recall": 0.9144122128626053, | |
| "eval_runtime": 27.5433, | |
| "eval_samples_per_second": 816.897, | |
| "eval_steps_per_second": 22.692, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.9700382566412813, | |
| "grad_norm": 2.8054332733154297, | |
| "learning_rate": 7.499374307268763e-07, | |
| "loss": 0.03312858819961548, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 1.973613643676928, | |
| "grad_norm": 0.5224851369857788, | |
| "learning_rate": 6.60552754835711e-07, | |
| "loss": 0.028790268898010254, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 1.9771890307125748, | |
| "grad_norm": 0.26614582538604736, | |
| "learning_rate": 5.711680789445458e-07, | |
| "loss": 0.028711328506469725, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 1.980764417748221, | |
| "grad_norm": 0.7065221667289734, | |
| "learning_rate": 4.817834030533806e-07, | |
| "loss": 0.03409520626068115, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 1.9843398047838678, | |
| "grad_norm": 0.5520646572113037, | |
| "learning_rate": 3.923987271622153e-07, | |
| "loss": 0.030617287158966066, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 1.9879151918195146, | |
| "grad_norm": 0.8152151703834534, | |
| "learning_rate": 3.030140512710501e-07, | |
| "loss": 0.034760825634002686, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 1.9914905788551611, | |
| "grad_norm": 0.7719851136207581, | |
| "learning_rate": 2.136293753798849e-07, | |
| "loss": 0.033238520622253416, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 1.9950659658908076, | |
| "grad_norm": 0.3627885580062866, | |
| "learning_rate": 1.2424469948871967e-07, | |
| "loss": 0.029695370197296143, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 1.9986413529264544, | |
| "grad_norm": 1.9493422508239746, | |
| "learning_rate": 3.4860023597554434e-08, | |
| "loss": 0.032883105278015134, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 55938, | |
| "total_flos": 1.889848580814228e+18, | |
| "train_loss": 0.058781605476671404, | |
| "train_runtime": 18337.3113, | |
| "train_samples_per_second": 439.268, | |
| "train_steps_per_second": 3.051 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 55938, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 2500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.889848580814228e+18, | |
| "train_batch_size": 72, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |