| { | |
| "best_metric": 0.6971784234046936, | |
| "best_model_checkpoint": "./cardiffnlp-twitter-xlmr-finetuned-txtnly-all-42/checkpoint-16500", | |
| "epoch": 3.787878787878788, | |
| "eval_steps": 500, | |
| "global_step": 31500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 3.298647880554199, | |
| "learning_rate": 4.994023569023569e-05, | |
| "loss": 0.6122, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_f1": 0.4840638597456899, | |
| "eval_loss": 0.854165256023407, | |
| "eval_precision": 0.6558887250350466, | |
| "eval_recall": 0.49045198529069495, | |
| "eval_runtime": 5.9285, | |
| "eval_samples_per_second": 148.099, | |
| "eval_steps_per_second": 9.277, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 5.411099433898926, | |
| "learning_rate": 4.988011063011063e-05, | |
| "loss": 0.5497, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_f1": 0.6209225023342669, | |
| "eval_loss": 0.8037390112876892, | |
| "eval_precision": 0.704421745545341, | |
| "eval_recall": 0.6070083321696225, | |
| "eval_runtime": 6.1691, | |
| "eval_samples_per_second": 142.322, | |
| "eval_steps_per_second": 8.915, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 5.836483001708984, | |
| "learning_rate": 4.9820105820105825e-05, | |
| "loss": 0.5404, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_f1": 0.3652071944289921, | |
| "eval_loss": 0.9700150489807129, | |
| "eval_precision": 0.5591482310679367, | |
| "eval_recall": 0.4176288227901131, | |
| "eval_runtime": 5.8886, | |
| "eval_samples_per_second": 149.101, | |
| "eval_steps_per_second": 9.34, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 13.717193603515625, | |
| "learning_rate": 4.975998075998076e-05, | |
| "loss": 0.5165, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_f1": 0.5369027892847279, | |
| "eval_loss": 0.744874894618988, | |
| "eval_precision": 0.7349445049700448, | |
| "eval_recall": 0.529664385793418, | |
| "eval_runtime": 5.996, | |
| "eval_samples_per_second": 146.43, | |
| "eval_steps_per_second": 9.173, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 2.4534995555877686, | |
| "learning_rate": 4.969997594997595e-05, | |
| "loss": 0.5136, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_f1": 0.5001381202499963, | |
| "eval_loss": 0.7884698510169983, | |
| "eval_precision": 0.6766332095394413, | |
| "eval_recall": 0.5025275799469348, | |
| "eval_runtime": 5.9085, | |
| "eval_samples_per_second": 148.6, | |
| "eval_steps_per_second": 9.309, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 3.195244550704956, | |
| "learning_rate": 4.963985088985089e-05, | |
| "loss": 0.5072, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_f1": 0.5917137619940201, | |
| "eval_loss": 0.8123684525489807, | |
| "eval_precision": 0.6076358199852175, | |
| "eval_recall": 0.6132374435600242, | |
| "eval_runtime": 6.1108, | |
| "eval_samples_per_second": 143.68, | |
| "eval_steps_per_second": 9.0, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 7.579603672027588, | |
| "learning_rate": 4.957972582972583e-05, | |
| "loss": 0.5011, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_f1": 0.578405909718061, | |
| "eval_loss": 0.8767459392547607, | |
| "eval_precision": 0.642659899090607, | |
| "eval_recall": 0.5987143322627193, | |
| "eval_runtime": 6.1563, | |
| "eval_samples_per_second": 142.618, | |
| "eval_steps_per_second": 8.934, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 3.266787052154541, | |
| "learning_rate": 4.951960076960077e-05, | |
| "loss": 0.5021, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_f1": 0.6502990015105321, | |
| "eval_loss": 0.7957776784896851, | |
| "eval_precision": 0.6847923256926328, | |
| "eval_recall": 0.636192338127822, | |
| "eval_runtime": 6.5221, | |
| "eval_samples_per_second": 134.618, | |
| "eval_steps_per_second": 8.433, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 6.044332027435303, | |
| "learning_rate": 4.945959595959596e-05, | |
| "loss": 0.4946, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_f1": 0.4982912515017284, | |
| "eval_loss": 0.8045271039009094, | |
| "eval_precision": 0.7220405815528763, | |
| "eval_recall": 0.4968300516687614, | |
| "eval_runtime": 6.1928, | |
| "eval_samples_per_second": 141.778, | |
| "eval_steps_per_second": 8.881, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 5.152063846588135, | |
| "learning_rate": 4.93994708994709e-05, | |
| "loss": 0.4928, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_f1": 0.550273048506264, | |
| "eval_loss": 0.780342698097229, | |
| "eval_precision": 0.7581894624319455, | |
| "eval_recall": 0.5380887213145278, | |
| "eval_runtime": 6.123, | |
| "eval_samples_per_second": 143.395, | |
| "eval_steps_per_second": 8.983, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 4.54200553894043, | |
| "learning_rate": 4.933934583934584e-05, | |
| "loss": 0.5008, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_f1": 0.4594232264185665, | |
| "eval_loss": 0.7507085204124451, | |
| "eval_precision": 0.44070483572560937, | |
| "eval_recall": 0.47984452823162504, | |
| "eval_runtime": 5.932, | |
| "eval_samples_per_second": 148.011, | |
| "eval_steps_per_second": 9.272, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 4.075632095336914, | |
| "learning_rate": 4.927922077922078e-05, | |
| "loss": 0.4966, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_f1": 0.6310991936984806, | |
| "eval_loss": 0.8238988518714905, | |
| "eval_precision": 0.6139657275796522, | |
| "eval_recall": 0.6767434715821813, | |
| "eval_runtime": 5.8918, | |
| "eval_samples_per_second": 149.02, | |
| "eval_steps_per_second": 9.335, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 4.8725104331970215, | |
| "learning_rate": 4.921909571909572e-05, | |
| "loss": 0.4791, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_f1": 0.5412559573187593, | |
| "eval_loss": 0.7028306722640991, | |
| "eval_precision": 0.6567775474615866, | |
| "eval_recall": 0.520631196760229, | |
| "eval_runtime": 6.3113, | |
| "eval_samples_per_second": 139.116, | |
| "eval_steps_per_second": 8.715, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.4915893077850342, | |
| "learning_rate": 4.915897065897066e-05, | |
| "loss": 0.494, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_f1": 0.5227267406470947, | |
| "eval_loss": 0.8033522367477417, | |
| "eval_precision": 0.6660302960734323, | |
| "eval_recall": 0.5188623562817111, | |
| "eval_runtime": 6.1252, | |
| "eval_samples_per_second": 143.342, | |
| "eval_steps_per_second": 8.979, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 2.151014804840088, | |
| "learning_rate": 4.90988455988456e-05, | |
| "loss": 0.4861, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_f1": 0.4541201667750796, | |
| "eval_loss": 0.900325357913971, | |
| "eval_precision": 0.5780562441152168, | |
| "eval_recall": 0.4784564539403249, | |
| "eval_runtime": 6.144, | |
| "eval_samples_per_second": 142.903, | |
| "eval_steps_per_second": 8.952, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 4.770496368408203, | |
| "learning_rate": 4.903872053872054e-05, | |
| "loss": 0.4804, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_f1": 0.5791890202588422, | |
| "eval_loss": 0.773960530757904, | |
| "eval_precision": 0.6238945275403609, | |
| "eval_recall": 0.5775003491132523, | |
| "eval_runtime": 6.556, | |
| "eval_samples_per_second": 133.923, | |
| "eval_steps_per_second": 8.389, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 2.520460367202759, | |
| "learning_rate": 4.897859547859548e-05, | |
| "loss": 0.4614, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_f1": 0.6470888284841774, | |
| "eval_loss": 0.7397181391716003, | |
| "eval_precision": 0.6848151355984641, | |
| "eval_recall": 0.6312358609132803, | |
| "eval_runtime": 6.1813, | |
| "eval_samples_per_second": 142.042, | |
| "eval_steps_per_second": 8.898, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 4.375688552856445, | |
| "learning_rate": 4.891847041847042e-05, | |
| "loss": 0.4315, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_f1": 0.614857769662433, | |
| "eval_loss": 0.788919985294342, | |
| "eval_precision": 0.6641593406916259, | |
| "eval_recall": 0.6034743750872783, | |
| "eval_runtime": 6.1798, | |
| "eval_samples_per_second": 142.076, | |
| "eval_steps_per_second": 8.9, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 4.091088771820068, | |
| "learning_rate": 4.885834535834536e-05, | |
| "loss": 0.4506, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_f1": 0.4967964786589283, | |
| "eval_loss": 0.8783875703811646, | |
| "eval_precision": 0.6387377173091459, | |
| "eval_recall": 0.5016645719871526, | |
| "eval_runtime": 5.9164, | |
| "eval_samples_per_second": 148.401, | |
| "eval_steps_per_second": 9.296, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 3.3903276920318604, | |
| "learning_rate": 4.87982202982203e-05, | |
| "loss": 0.4489, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_f1": 0.4949153076705755, | |
| "eval_loss": 0.7994188070297241, | |
| "eval_precision": 0.5340329579250159, | |
| "eval_recall": 0.49638597961178615, | |
| "eval_runtime": 5.9029, | |
| "eval_samples_per_second": 148.74, | |
| "eval_steps_per_second": 9.317, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 3.929879903793335, | |
| "learning_rate": 4.8738095238095235e-05, | |
| "loss": 0.4466, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_f1": 0.44642812881455524, | |
| "eval_loss": 0.8109920024871826, | |
| "eval_precision": 0.5776119229607602, | |
| "eval_recall": 0.47351207931853095, | |
| "eval_runtime": 5.9766, | |
| "eval_samples_per_second": 146.907, | |
| "eval_steps_per_second": 9.203, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 6.443171501159668, | |
| "learning_rate": 4.8677970177970176e-05, | |
| "loss": 0.4319, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_f1": 0.5481427288492505, | |
| "eval_loss": 0.8068605661392212, | |
| "eval_precision": 0.6612496177619213, | |
| "eval_recall": 0.5399497276916632, | |
| "eval_runtime": 5.9001, | |
| "eval_samples_per_second": 148.811, | |
| "eval_steps_per_second": 9.322, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 7.633645057678223, | |
| "learning_rate": 4.8617845117845116e-05, | |
| "loss": 0.4243, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_f1": 0.5797306372413114, | |
| "eval_loss": 0.7941620349884033, | |
| "eval_precision": 0.5948358635007136, | |
| "eval_recall": 0.5704752595075175, | |
| "eval_runtime": 6.145, | |
| "eval_samples_per_second": 142.881, | |
| "eval_steps_per_second": 8.95, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 3.275371789932251, | |
| "learning_rate": 4.8557720057720056e-05, | |
| "loss": 0.4398, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_f1": 0.5247242844808815, | |
| "eval_loss": 0.9738017916679382, | |
| "eval_precision": 0.5370369073777802, | |
| "eval_recall": 0.6070139179816599, | |
| "eval_runtime": 6.219, | |
| "eval_samples_per_second": 141.18, | |
| "eval_steps_per_second": 8.844, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 2.4162724018096924, | |
| "learning_rate": 4.8497594997595e-05, | |
| "loss": 0.4526, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_f1": 0.5589742980399895, | |
| "eval_loss": 0.7195601463317871, | |
| "eval_precision": 0.7046240283838195, | |
| "eval_recall": 0.5477959316668994, | |
| "eval_runtime": 6.3918, | |
| "eval_samples_per_second": 137.363, | |
| "eval_steps_per_second": 8.605, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 6.926381587982178, | |
| "learning_rate": 4.8437469937469944e-05, | |
| "loss": 0.4529, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_f1": 0.5863097712686139, | |
| "eval_loss": 0.8049713969230652, | |
| "eval_precision": 0.6419448505612538, | |
| "eval_recall": 0.5730605595121724, | |
| "eval_runtime": 6.3636, | |
| "eval_samples_per_second": 137.971, | |
| "eval_steps_per_second": 8.643, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 1.8420650959014893, | |
| "learning_rate": 4.837746512746513e-05, | |
| "loss": 0.446, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_f1": 0.6107236144330398, | |
| "eval_loss": 0.7564206719398499, | |
| "eval_precision": 0.6520992658162544, | |
| "eval_recall": 0.5912358609132803, | |
| "eval_runtime": 6.4128, | |
| "eval_samples_per_second": 136.914, | |
| "eval_steps_per_second": 8.577, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 2.423569679260254, | |
| "learning_rate": 4.831746031746032e-05, | |
| "loss": 0.4315, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_f1": 0.621245910301715, | |
| "eval_loss": 0.751511812210083, | |
| "eval_precision": 0.6474767054531395, | |
| "eval_recall": 0.6069198901456967, | |
| "eval_runtime": 5.9833, | |
| "eval_samples_per_second": 146.741, | |
| "eval_steps_per_second": 9.192, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 6.773381233215332, | |
| "learning_rate": 4.825733525733526e-05, | |
| "loss": 0.4464, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_f1": 0.559868694735591, | |
| "eval_loss": 0.8307517170906067, | |
| "eval_precision": 0.627583612882644, | |
| "eval_recall": 0.5512991667830377, | |
| "eval_runtime": 6.1679, | |
| "eval_samples_per_second": 142.35, | |
| "eval_steps_per_second": 8.917, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 6.220128059387207, | |
| "learning_rate": 4.8197330447330455e-05, | |
| "loss": 0.4423, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_f1": 0.5991996711711277, | |
| "eval_loss": 0.798150360584259, | |
| "eval_precision": 0.6176196711770697, | |
| "eval_recall": 0.5936535865568123, | |
| "eval_runtime": 6.0738, | |
| "eval_samples_per_second": 144.556, | |
| "eval_steps_per_second": 9.055, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 1.1065833568572998, | |
| "learning_rate": 4.8137205387205395e-05, | |
| "loss": 0.4551, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_f1": 0.6019748538222912, | |
| "eval_loss": 0.822293221950531, | |
| "eval_precision": 0.6355921902599784, | |
| "eval_recall": 0.5933528836754642, | |
| "eval_runtime": 6.1197, | |
| "eval_samples_per_second": 143.472, | |
| "eval_steps_per_second": 8.987, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 8.631648063659668, | |
| "learning_rate": 4.807708032708033e-05, | |
| "loss": 0.4408, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_f1": 0.5131249172090748, | |
| "eval_loss": 0.7691208124160767, | |
| "eval_precision": 0.608759764068229, | |
| "eval_recall": 0.5147484057161477, | |
| "eval_runtime": 6.3609, | |
| "eval_samples_per_second": 138.031, | |
| "eval_steps_per_second": 8.647, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 6.755849361419678, | |
| "learning_rate": 4.801695526695527e-05, | |
| "loss": 0.4389, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_f1": 0.6702519892656928, | |
| "eval_loss": 0.6971784234046936, | |
| "eval_precision": 0.6686766810877821, | |
| "eval_recall": 0.6729106735558349, | |
| "eval_runtime": 6.1341, | |
| "eval_samples_per_second": 143.134, | |
| "eval_steps_per_second": 8.966, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 19.813188552856445, | |
| "learning_rate": 4.795683020683021e-05, | |
| "loss": 0.3886, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_f1": 0.5543489692487942, | |
| "eval_loss": 0.7798230648040771, | |
| "eval_precision": 0.6125764375980934, | |
| "eval_recall": 0.543671740445934, | |
| "eval_runtime": 6.7491, | |
| "eval_samples_per_second": 130.09, | |
| "eval_steps_per_second": 8.149, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 7.927220821380615, | |
| "learning_rate": 4.789670514670515e-05, | |
| "loss": 0.3883, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "eval_f1": 0.5978449313058904, | |
| "eval_loss": 0.8385018110275269, | |
| "eval_precision": 0.5948463716988197, | |
| "eval_recall": 0.6225499231950845, | |
| "eval_runtime": 6.122, | |
| "eval_samples_per_second": 143.416, | |
| "eval_steps_per_second": 8.984, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 6.237366199493408, | |
| "learning_rate": 4.783658008658009e-05, | |
| "loss": 0.4011, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "eval_f1": 0.5914931472808443, | |
| "eval_loss": 0.7754688858985901, | |
| "eval_precision": 0.655128213311837, | |
| "eval_recall": 0.578716194200065, | |
| "eval_runtime": 6.558, | |
| "eval_samples_per_second": 133.882, | |
| "eval_steps_per_second": 8.387, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 3.3301048278808594, | |
| "learning_rate": 4.777645502645503e-05, | |
| "loss": 0.3992, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "eval_f1": 0.5472455226037474, | |
| "eval_loss": 0.788632333278656, | |
| "eval_precision": 0.558195855728615, | |
| "eval_recall": 0.5519042964204254, | |
| "eval_runtime": 6.124, | |
| "eval_samples_per_second": 143.371, | |
| "eval_steps_per_second": 8.981, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 8.471348762512207, | |
| "learning_rate": 4.771645021645022e-05, | |
| "loss": 0.393, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "eval_f1": 0.5889012942356766, | |
| "eval_loss": 0.7660124897956848, | |
| "eval_precision": 0.5901145289176211, | |
| "eval_recall": 0.592326956197924, | |
| "eval_runtime": 5.8572, | |
| "eval_samples_per_second": 149.9, | |
| "eval_steps_per_second": 9.39, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 15.840304374694824, | |
| "learning_rate": 4.765632515632516e-05, | |
| "loss": 0.3891, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "eval_f1": 0.5354251462409856, | |
| "eval_loss": 0.7701670527458191, | |
| "eval_precision": 0.579215207029406, | |
| "eval_recall": 0.5330605595121725, | |
| "eval_runtime": 6.1187, | |
| "eval_samples_per_second": 143.495, | |
| "eval_steps_per_second": 8.989, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 1.6515294313430786, | |
| "learning_rate": 4.75962000962001e-05, | |
| "loss": 0.4119, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_f1": 0.5110658029804255, | |
| "eval_loss": 0.8545361161231995, | |
| "eval_precision": 0.5405823804957771, | |
| "eval_recall": 0.5243262114229856, | |
| "eval_runtime": 6.2418, | |
| "eval_samples_per_second": 140.665, | |
| "eval_steps_per_second": 8.812, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 3.166147470474243, | |
| "learning_rate": 4.753607503607504e-05, | |
| "loss": 0.3981, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "eval_f1": 0.53639943040752, | |
| "eval_loss": 0.864085853099823, | |
| "eval_precision": 0.5695344700259635, | |
| "eval_recall": 0.5536247265279522, | |
| "eval_runtime": 5.9635, | |
| "eval_samples_per_second": 147.229, | |
| "eval_steps_per_second": 9.223, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 4.143538475036621, | |
| "learning_rate": 4.747594997594998e-05, | |
| "loss": 0.4, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_f1": 0.582186065915728, | |
| "eval_loss": 0.8044998049736023, | |
| "eval_precision": 0.5987904356270873, | |
| "eval_recall": 0.5844826141600334, | |
| "eval_runtime": 5.9156, | |
| "eval_samples_per_second": 148.422, | |
| "eval_steps_per_second": 9.298, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 5.849362850189209, | |
| "learning_rate": 4.741582491582492e-05, | |
| "loss": 0.4059, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "eval_f1": 0.569600279809319, | |
| "eval_loss": 0.8023470044136047, | |
| "eval_precision": 0.6300909361955873, | |
| "eval_recall": 0.5548880510170833, | |
| "eval_runtime": 5.9073, | |
| "eval_samples_per_second": 148.629, | |
| "eval_steps_per_second": 9.31, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 2.0296847820281982, | |
| "learning_rate": 4.735582010582011e-05, | |
| "loss": 0.3805, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "eval_f1": 0.5387095557628462, | |
| "eval_loss": 0.8242425322532654, | |
| "eval_precision": 0.5632921859195318, | |
| "eval_recall": 0.536337569240795, | |
| "eval_runtime": 6.1681, | |
| "eval_samples_per_second": 142.345, | |
| "eval_steps_per_second": 8.917, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 5.022754192352295, | |
| "learning_rate": 4.729569504569505e-05, | |
| "loss": 0.4126, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_f1": 0.525337187977395, | |
| "eval_loss": 0.8866151571273804, | |
| "eval_precision": 0.563019122327633, | |
| "eval_recall": 0.5244211702276219, | |
| "eval_runtime": 6.5791, | |
| "eval_samples_per_second": 133.453, | |
| "eval_steps_per_second": 8.36, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 6.320919990539551, | |
| "learning_rate": 4.7235690235690236e-05, | |
| "loss": 0.3959, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_f1": 0.5715827904573106, | |
| "eval_loss": 0.922848641872406, | |
| "eval_precision": 0.6485667793604627, | |
| "eval_recall": 0.5569566634082763, | |
| "eval_runtime": 6.5486, | |
| "eval_samples_per_second": 134.075, | |
| "eval_steps_per_second": 8.399, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 3.2674639225006104, | |
| "learning_rate": 4.717556517556518e-05, | |
| "loss": 0.3972, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "eval_f1": 0.6330230633421515, | |
| "eval_loss": 0.8297170400619507, | |
| "eval_precision": 0.64149542011954, | |
| "eval_recall": 0.633559558720849, | |
| "eval_runtime": 6.1502, | |
| "eval_samples_per_second": 142.759, | |
| "eval_steps_per_second": 8.943, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 5.248292922973633, | |
| "learning_rate": 4.711544011544012e-05, | |
| "loss": 0.3779, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "eval_f1": 0.5897470753706388, | |
| "eval_loss": 0.8682935833930969, | |
| "eval_precision": 0.6023327508623889, | |
| "eval_recall": 0.5919508448540706, | |
| "eval_runtime": 6.3839, | |
| "eval_samples_per_second": 137.534, | |
| "eval_steps_per_second": 8.615, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 4.1834635734558105, | |
| "learning_rate": 4.705531505531506e-05, | |
| "loss": 0.3951, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "eval_f1": 0.5124969418380673, | |
| "eval_loss": 0.8628427982330322, | |
| "eval_precision": 0.5891878367677518, | |
| "eval_recall": 0.5116492110040497, | |
| "eval_runtime": 6.1272, | |
| "eval_samples_per_second": 143.295, | |
| "eval_steps_per_second": 8.976, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "grad_norm": 12.86809253692627, | |
| "learning_rate": 4.699518999519e-05, | |
| "loss": 0.3916, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "eval_f1": 0.5024144172335627, | |
| "eval_loss": 0.9203388094902039, | |
| "eval_precision": 0.6304846593419121, | |
| "eval_recall": 0.5026001955034213, | |
| "eval_runtime": 6.0613, | |
| "eval_samples_per_second": 144.854, | |
| "eval_steps_per_second": 9.074, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "grad_norm": 3.2101404666900635, | |
| "learning_rate": 4.693506493506494e-05, | |
| "loss": 0.3524, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "eval_f1": 0.5010573535401949, | |
| "eval_loss": 0.9825400710105896, | |
| "eval_precision": 0.6088672873311428, | |
| "eval_recall": 0.5039249639249639, | |
| "eval_runtime": 5.9279, | |
| "eval_samples_per_second": 148.113, | |
| "eval_steps_per_second": 9.278, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "grad_norm": 16.025983810424805, | |
| "learning_rate": 4.687493987493988e-05, | |
| "loss": 0.3332, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "eval_f1": 0.5814110917677252, | |
| "eval_loss": 0.8755331635475159, | |
| "eval_precision": 0.5979503457905185, | |
| "eval_recall": 0.5711502117953731, | |
| "eval_runtime": 6.5321, | |
| "eval_samples_per_second": 134.413, | |
| "eval_steps_per_second": 8.42, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "grad_norm": 12.575716972351074, | |
| "learning_rate": 4.681481481481482e-05, | |
| "loss": 0.3517, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "eval_f1": 0.6181463909269773, | |
| "eval_loss": 0.9921577572822571, | |
| "eval_precision": 0.6701390442386371, | |
| "eval_recall": 0.5940511101801424, | |
| "eval_runtime": 6.2002, | |
| "eval_samples_per_second": 141.609, | |
| "eval_steps_per_second": 8.871, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "grad_norm": 2.219468355178833, | |
| "learning_rate": 4.675468975468976e-05, | |
| "loss": 0.3534, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "eval_f1": 0.5242620258087817, | |
| "eval_loss": 0.9572548866271973, | |
| "eval_precision": 0.5652503976549385, | |
| "eval_recall": 0.5174640413350091, | |
| "eval_runtime": 6.4041, | |
| "eval_samples_per_second": 137.101, | |
| "eval_steps_per_second": 8.588, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "grad_norm": 2.1716973781585693, | |
| "learning_rate": 4.6694684944684945e-05, | |
| "loss": 0.3544, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "eval_f1": 0.5551290620723939, | |
| "eval_loss": 0.9826774001121521, | |
| "eval_precision": 0.5738657811880764, | |
| "eval_recall": 0.5531322440999861, | |
| "eval_runtime": 5.8897, | |
| "eval_samples_per_second": 149.075, | |
| "eval_steps_per_second": 9.338, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "grad_norm": 5.642761707305908, | |
| "learning_rate": 4.6634559884559885e-05, | |
| "loss": 0.3526, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "eval_f1": 0.46574966897620484, | |
| "eval_loss": 0.9517427682876587, | |
| "eval_precision": 0.6019158514451703, | |
| "eval_recall": 0.4737364427687008, | |
| "eval_runtime": 6.2232, | |
| "eval_samples_per_second": 141.086, | |
| "eval_steps_per_second": 8.838, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "grad_norm": 8.693815231323242, | |
| "learning_rate": 4.6574434824434825e-05, | |
| "loss": 0.3448, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "eval_f1": 0.5231658522131929, | |
| "eval_loss": 0.955856204032898, | |
| "eval_precision": 0.5743577178625582, | |
| "eval_recall": 0.5138062654191686, | |
| "eval_runtime": 6.2254, | |
| "eval_samples_per_second": 141.036, | |
| "eval_steps_per_second": 8.835, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "grad_norm": 10.058433532714844, | |
| "learning_rate": 4.6514309764309766e-05, | |
| "loss": 0.3662, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "eval_f1": 0.6173176500366803, | |
| "eval_loss": 0.8469758033752441, | |
| "eval_precision": 0.6416565078769693, | |
| "eval_recall": 0.6176418563515337, | |
| "eval_runtime": 6.1339, | |
| "eval_samples_per_second": 143.14, | |
| "eval_steps_per_second": 8.967, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "grad_norm": 9.207432746887207, | |
| "learning_rate": 4.645466570466571e-05, | |
| "loss": 0.3502, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "eval_f1": 0.5911826792863208, | |
| "eval_loss": 0.8524171113967896, | |
| "eval_precision": 0.6606129937002267, | |
| "eval_recall": 0.577619513103384, | |
| "eval_runtime": 5.9367, | |
| "eval_samples_per_second": 147.893, | |
| "eval_steps_per_second": 9.264, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "grad_norm": 2.538233757019043, | |
| "learning_rate": 4.639454064454065e-05, | |
| "loss": 0.3733, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "eval_f1": 0.5466184654496565, | |
| "eval_loss": 0.9210164546966553, | |
| "eval_precision": 0.5577658998711631, | |
| "eval_recall": 0.5554857329050877, | |
| "eval_runtime": 6.4254, | |
| "eval_samples_per_second": 136.645, | |
| "eval_steps_per_second": 8.56, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "grad_norm": 2.017235279083252, | |
| "learning_rate": 4.633441558441559e-05, | |
| "loss": 0.3424, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "eval_f1": 0.5809192439862544, | |
| "eval_loss": 0.9294881820678711, | |
| "eval_precision": 0.5863171312403235, | |
| "eval_recall": 0.6100302564818694, | |
| "eval_runtime": 6.2949, | |
| "eval_samples_per_second": 139.477, | |
| "eval_steps_per_second": 8.737, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "grad_norm": 7.538774490356445, | |
| "learning_rate": 4.627429052429053e-05, | |
| "loss": 0.3591, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "eval_f1": 0.4588251776601326, | |
| "eval_loss": 0.970705509185791, | |
| "eval_precision": 0.5827537007312288, | |
| "eval_recall": 0.4768803239770982, | |
| "eval_runtime": 6.0168, | |
| "eval_samples_per_second": 145.925, | |
| "eval_steps_per_second": 9.141, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "grad_norm": 4.64936637878418, | |
| "learning_rate": 4.621416546416546e-05, | |
| "loss": 0.3634, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "eval_f1": 0.575160103511553, | |
| "eval_loss": 0.8524229526519775, | |
| "eval_precision": 0.6136046998053873, | |
| "eval_recall": 0.5680603267700042, | |
| "eval_runtime": 6.5694, | |
| "eval_samples_per_second": 133.651, | |
| "eval_steps_per_second": 8.372, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "step": 31500, | |
| "total_flos": 1.3260126913238016e+17, | |
| "train_loss": 0.42908321610708083, | |
| "train_runtime": 7590.5884, | |
| "train_samples_per_second": 876.388, | |
| "train_steps_per_second": 54.778 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 415800, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "total_flos": 1.3260126913238016e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |