| { |
| "best_global_step": 3892, |
| "best_metric": 0.9585253456221198, |
| "best_model_checkpoint": "nb_bert_base_relevance_weighted/checkpoint-3892", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 7784, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.012846865364850977, |
| "grad_norm": 1.7052006721496582, |
| "learning_rate": 1.9874100719424462e-05, |
| "loss": 0.6012, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.025693730729701953, |
| "grad_norm": 32.50202941894531, |
| "learning_rate": 1.9745632065775954e-05, |
| "loss": 0.6445, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03854059609455293, |
| "grad_norm": 0.2601597011089325, |
| "learning_rate": 1.9617163412127443e-05, |
| "loss": 0.5417, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.051387461459403906, |
| "grad_norm": 0.4800266921520233, |
| "learning_rate": 1.948869475847893e-05, |
| "loss": 0.4754, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06423432682425488, |
| "grad_norm": 0.1330161839723587, |
| "learning_rate": 1.9360226104830423e-05, |
| "loss": 0.6532, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.07708119218910586, |
| "grad_norm": 15.620330810546875, |
| "learning_rate": 1.9231757451181915e-05, |
| "loss": 0.7903, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.08992805755395683, |
| "grad_norm": 0.08341807126998901, |
| "learning_rate": 1.9103288797533403e-05, |
| "loss": 0.3625, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.10277492291880781, |
| "grad_norm": 27.293317794799805, |
| "learning_rate": 1.8974820143884892e-05, |
| "loss": 0.5664, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1156217882836588, |
| "grad_norm": 0.09574569761753082, |
| "learning_rate": 1.8846351490236384e-05, |
| "loss": 0.4336, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.12846865364850976, |
| "grad_norm": 0.04544169455766678, |
| "learning_rate": 1.8717882836587876e-05, |
| "loss": 0.5034, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.14131551901336073, |
| "grad_norm": 43.972049713134766, |
| "learning_rate": 1.8589414182939364e-05, |
| "loss": 0.5966, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.15416238437821173, |
| "grad_norm": 4.7578582763671875, |
| "learning_rate": 1.8460945529290856e-05, |
| "loss": 0.2444, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.1670092497430627, |
| "grad_norm": 28.756206512451172, |
| "learning_rate": 1.8332476875642344e-05, |
| "loss": 0.4511, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.17985611510791366, |
| "grad_norm": 0.1027965322136879, |
| "learning_rate": 1.8204008221993833e-05, |
| "loss": 0.4211, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.19270298047276466, |
| "grad_norm": 0.023025257512927055, |
| "learning_rate": 1.8075539568345325e-05, |
| "loss": 0.4003, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.20554984583761562, |
| "grad_norm": 11.712437629699707, |
| "learning_rate": 1.7947070914696817e-05, |
| "loss": 0.5503, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2183967112024666, |
| "grad_norm": 0.0492803193628788, |
| "learning_rate": 1.7818602261048305e-05, |
| "loss": 0.336, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.2312435765673176, |
| "grad_norm": 0.8247962594032288, |
| "learning_rate": 1.7690133607399797e-05, |
| "loss": 0.4743, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.24409044193216856, |
| "grad_norm": 0.03478659689426422, |
| "learning_rate": 1.7561664953751285e-05, |
| "loss": 0.4543, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.2569373072970195, |
| "grad_norm": 0.3004553020000458, |
| "learning_rate": 1.7433196300102777e-05, |
| "loss": 0.2296, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2697841726618705, |
| "grad_norm": 2.058995246887207, |
| "learning_rate": 1.7304727646454266e-05, |
| "loss": 0.3517, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.28263103802672146, |
| "grad_norm": 0.18223777413368225, |
| "learning_rate": 1.7176258992805758e-05, |
| "loss": 0.3628, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.2954779033915725, |
| "grad_norm": 30.207103729248047, |
| "learning_rate": 1.7047790339157246e-05, |
| "loss": 0.3983, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.30832476875642345, |
| "grad_norm": 0.0368342325091362, |
| "learning_rate": 1.6919321685508738e-05, |
| "loss": 0.5342, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3211716341212744, |
| "grad_norm": 0.28241753578186035, |
| "learning_rate": 1.679085303186023e-05, |
| "loss": 0.3943, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.3340184994861254, |
| "grad_norm": 0.033875174820423126, |
| "learning_rate": 1.6662384378211718e-05, |
| "loss": 0.2127, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.34686536485097635, |
| "grad_norm": 0.11910529434680939, |
| "learning_rate": 1.6533915724563207e-05, |
| "loss": 0.4309, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.3597122302158273, |
| "grad_norm": 0.5434423089027405, |
| "learning_rate": 1.64054470709147e-05, |
| "loss": 0.1101, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3725590955806783, |
| "grad_norm": 47.17951202392578, |
| "learning_rate": 1.627697841726619e-05, |
| "loss": 0.2418, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.3854059609455293, |
| "grad_norm": 2.3193013668060303, |
| "learning_rate": 1.614850976361768e-05, |
| "loss": 0.2157, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.3982528263103803, |
| "grad_norm": 0.10629215836524963, |
| "learning_rate": 1.6020041109969167e-05, |
| "loss": 0.4207, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.41109969167523125, |
| "grad_norm": 0.027008764445781708, |
| "learning_rate": 1.589157245632066e-05, |
| "loss": 0.2517, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.4239465570400822, |
| "grad_norm": 0.026176316663622856, |
| "learning_rate": 1.5763103802672148e-05, |
| "loss": 0.2796, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.4367934224049332, |
| "grad_norm": 0.09446433931589127, |
| "learning_rate": 1.563463514902364e-05, |
| "loss": 0.2172, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.44964028776978415, |
| "grad_norm": 0.08241437375545502, |
| "learning_rate": 1.550616649537513e-05, |
| "loss": 0.3683, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.4624871531346352, |
| "grad_norm": 44.19976806640625, |
| "learning_rate": 1.537769784172662e-05, |
| "loss": 0.4662, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.47533401849948614, |
| "grad_norm": 0.03341331705451012, |
| "learning_rate": 1.524922918807811e-05, |
| "loss": 0.2785, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.4881808838643371, |
| "grad_norm": 0.13134817779064178, |
| "learning_rate": 1.5120760534429599e-05, |
| "loss": 0.3011, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.501027749229188, |
| "grad_norm": 0.09789072722196579, |
| "learning_rate": 1.4992291880781092e-05, |
| "loss": 0.132, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.513874614594039, |
| "grad_norm": 0.5926951766014099, |
| "learning_rate": 1.486382322713258e-05, |
| "loss": 0.3846, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5267214799588901, |
| "grad_norm": 0.016193868592381477, |
| "learning_rate": 1.473535457348407e-05, |
| "loss": 0.1759, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.539568345323741, |
| "grad_norm": 107.60330963134766, |
| "learning_rate": 1.4606885919835561e-05, |
| "loss": 0.329, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.552415210688592, |
| "grad_norm": 0.013422131538391113, |
| "learning_rate": 1.4478417266187053e-05, |
| "loss": 0.2515, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.5652620760534429, |
| "grad_norm": 0.015327083878219128, |
| "learning_rate": 1.4349948612538543e-05, |
| "loss": 0.2247, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.5781089414182939, |
| "grad_norm": 46.177696228027344, |
| "learning_rate": 1.4221479958890031e-05, |
| "loss": 0.3581, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.590955806783145, |
| "grad_norm": 0.16671152412891388, |
| "learning_rate": 1.4093011305241522e-05, |
| "loss": 0.2248, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.6038026721479959, |
| "grad_norm": 0.021253060549497604, |
| "learning_rate": 1.3964542651593012e-05, |
| "loss": 0.0853, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.6166495375128469, |
| "grad_norm": 0.0710051879286766, |
| "learning_rate": 1.3836073997944504e-05, |
| "loss": 0.4464, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6294964028776978, |
| "grad_norm": 44.617759704589844, |
| "learning_rate": 1.3707605344295994e-05, |
| "loss": 0.261, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.6423432682425488, |
| "grad_norm": 0.012564734555780888, |
| "learning_rate": 1.3579136690647484e-05, |
| "loss": 0.0948, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.6551901336073997, |
| "grad_norm": 0.10313341021537781, |
| "learning_rate": 1.3450668036998972e-05, |
| "loss": 0.2493, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.6680369989722508, |
| "grad_norm": 0.16787320375442505, |
| "learning_rate": 1.3322199383350463e-05, |
| "loss": 0.1627, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.6808838643371018, |
| "grad_norm": 0.23290768265724182, |
| "learning_rate": 1.3193730729701954e-05, |
| "loss": 0.4074, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.6937307297019527, |
| "grad_norm": 0.15848670899868011, |
| "learning_rate": 1.3065262076053445e-05, |
| "loss": 0.2521, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.7065775950668037, |
| "grad_norm": 0.1227729544043541, |
| "learning_rate": 1.2936793422404935e-05, |
| "loss": 0.2727, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.7194244604316546, |
| "grad_norm": 14.59352970123291, |
| "learning_rate": 1.2808324768756423e-05, |
| "loss": 0.1938, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7322713257965057, |
| "grad_norm": 0.011683103628456593, |
| "learning_rate": 1.2679856115107915e-05, |
| "loss": 0.3774, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.7451181911613566, |
| "grad_norm": 0.15291182696819305, |
| "learning_rate": 1.2551387461459405e-05, |
| "loss": 0.414, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.7579650565262076, |
| "grad_norm": 0.008359256200492382, |
| "learning_rate": 1.2422918807810895e-05, |
| "loss": 0.1985, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.7708119218910586, |
| "grad_norm": 0.20358124375343323, |
| "learning_rate": 1.2294450154162386e-05, |
| "loss": 0.3415, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.7836587872559095, |
| "grad_norm": 0.07088713347911835, |
| "learning_rate": 1.2165981500513874e-05, |
| "loss": 0.3632, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.7965056526207606, |
| "grad_norm": 0.09244630485773087, |
| "learning_rate": 1.2037512846865368e-05, |
| "loss": 0.0333, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.8093525179856115, |
| "grad_norm": 0.012910844758152962, |
| "learning_rate": 1.1909044193216856e-05, |
| "loss": 0.3435, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.8221993833504625, |
| "grad_norm": 0.17528291046619415, |
| "learning_rate": 1.1780575539568346e-05, |
| "loss": 0.4858, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8350462487153134, |
| "grad_norm": 0.23448841273784637, |
| "learning_rate": 1.1652106885919836e-05, |
| "loss": 0.239, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.8478931140801644, |
| "grad_norm": 0.7074928879737854, |
| "learning_rate": 1.1523638232271327e-05, |
| "loss": 0.1674, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.8607399794450155, |
| "grad_norm": 0.1711515188217163, |
| "learning_rate": 1.1395169578622818e-05, |
| "loss": 0.4993, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.8735868448098664, |
| "grad_norm": 0.08787185698747635, |
| "learning_rate": 1.1266700924974307e-05, |
| "loss": 0.1309, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.8864337101747174, |
| "grad_norm": 0.12081218510866165, |
| "learning_rate": 1.1138232271325797e-05, |
| "loss": 0.3276, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.8992805755395683, |
| "grad_norm": 0.0644136592745781, |
| "learning_rate": 1.1009763617677287e-05, |
| "loss": 0.2722, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.9121274409044193, |
| "grad_norm": 0.10556616634130478, |
| "learning_rate": 1.0881294964028777e-05, |
| "loss": 0.4247, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.9249743062692704, |
| "grad_norm": 0.2076292783021927, |
| "learning_rate": 1.075282631038027e-05, |
| "loss": 0.3477, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.9378211716341213, |
| "grad_norm": 0.2808614671230316, |
| "learning_rate": 1.062435765673176e-05, |
| "loss": 0.4806, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.9506680369989723, |
| "grad_norm": 0.0461493581533432, |
| "learning_rate": 1.0495889003083248e-05, |
| "loss": 0.2213, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.9635149023638232, |
| "grad_norm": 15.367751121520996, |
| "learning_rate": 1.0367420349434738e-05, |
| "loss": 0.2639, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.9763617677286742, |
| "grad_norm": 0.3630225658416748, |
| "learning_rate": 1.023895169578623e-05, |
| "loss": 0.1761, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.9892086330935251, |
| "grad_norm": 0.12363607436418533, |
| "learning_rate": 1.011048304213772e-05, |
| "loss": 0.2654, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_f1": 0.6698872785829307, |
| "eval_loss": 0.14509662985801697, |
| "eval_precision": 0.5148514851485149, |
| "eval_recall": 0.9585253456221198, |
| "eval_runtime": 174.2843, |
| "eval_samples_per_second": 89.325, |
| "eval_steps_per_second": 2.794, |
| "step": 3892 |
| }, |
| { |
| "epoch": 1.002055498458376, |
| "grad_norm": 3.4445955753326416, |
| "learning_rate": 9.98201438848921e-06, |
| "loss": 0.1798, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.014902363823227, |
| "grad_norm": 0.015314252115786076, |
| "learning_rate": 9.853545734840699e-06, |
| "loss": 0.1287, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.027749229188078, |
| "grad_norm": 0.010747662745416164, |
| "learning_rate": 9.72507708119219e-06, |
| "loss": 0.203, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.0405960945529291, |
| "grad_norm": 0.06875327974557877, |
| "learning_rate": 9.59660842754368e-06, |
| "loss": 0.0572, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.0534429599177801, |
| "grad_norm": 0.10550885647535324, |
| "learning_rate": 9.468139773895171e-06, |
| "loss": 0.0816, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.066289825282631, |
| "grad_norm": 0.19790449738502502, |
| "learning_rate": 9.339671120246661e-06, |
| "loss": 0.2839, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.079136690647482, |
| "grad_norm": 0.18247967958450317, |
| "learning_rate": 9.21120246659815e-06, |
| "loss": 0.2604, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.091983556012333, |
| "grad_norm": 0.5019229650497437, |
| "learning_rate": 9.082733812949641e-06, |
| "loss": 0.2138, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.104830421377184, |
| "grad_norm": 0.19784913957118988, |
| "learning_rate": 8.954265159301132e-06, |
| "loss": 0.1053, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.117677286742035, |
| "grad_norm": 0.16224659979343414, |
| "learning_rate": 8.825796505652622e-06, |
| "loss": 0.1271, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.1305241521068858, |
| "grad_norm": 0.006647386122494936, |
| "learning_rate": 8.697327852004112e-06, |
| "loss": 0.0294, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.1433710174717369, |
| "grad_norm": 0.0799582228064537, |
| "learning_rate": 8.568859198355602e-06, |
| "loss": 0.3137, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.1562178828365879, |
| "grad_norm": 0.013868354260921478, |
| "learning_rate": 8.440390544707092e-06, |
| "loss": 0.1883, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.169064748201439, |
| "grad_norm": 0.008541465736925602, |
| "learning_rate": 8.311921891058582e-06, |
| "loss": 0.1162, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.1819116135662897, |
| "grad_norm": 0.07767793536186218, |
| "learning_rate": 8.183453237410073e-06, |
| "loss": 0.1677, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.1947584789311407, |
| "grad_norm": 0.014269966632127762, |
| "learning_rate": 8.054984583761563e-06, |
| "loss": 0.1206, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.2076053442959918, |
| "grad_norm": 52.39109420776367, |
| "learning_rate": 7.926515930113053e-06, |
| "loss": 0.2928, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.2204522096608428, |
| "grad_norm": 0.010533468797802925, |
| "learning_rate": 7.798047276464543e-06, |
| "loss": 0.1325, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.2332990750256938, |
| "grad_norm": 0.012057777494192123, |
| "learning_rate": 7.669578622816033e-06, |
| "loss": 0.1304, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.2461459403905448, |
| "grad_norm": 0.0717141404747963, |
| "learning_rate": 7.541109969167524e-06, |
| "loss": 0.1904, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.2589928057553956, |
| "grad_norm": 0.024251999333500862, |
| "learning_rate": 7.4126413155190135e-06, |
| "loss": 0.2172, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.2718396711202467, |
| "grad_norm": 0.19064459204673767, |
| "learning_rate": 7.2841726618705045e-06, |
| "loss": 0.1055, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.2846865364850977, |
| "grad_norm": 0.16635222733020782, |
| "learning_rate": 7.155704008221994e-06, |
| "loss": 0.1576, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.2975334018499485, |
| "grad_norm": 0.15081870555877686, |
| "learning_rate": 7.027235354573485e-06, |
| "loss": 0.2825, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.3103802672147995, |
| "grad_norm": 0.02650671824812889, |
| "learning_rate": 6.898766700924975e-06, |
| "loss": 0.2817, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.3232271325796505, |
| "grad_norm": 0.1502298265695572, |
| "learning_rate": 6.770298047276464e-06, |
| "loss": 0.1877, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.3360739979445015, |
| "grad_norm": 0.004524695686995983, |
| "learning_rate": 6.641829393627955e-06, |
| "loss": 0.0584, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.3489208633093526, |
| "grad_norm": 0.014236578717827797, |
| "learning_rate": 6.5133607399794455e-06, |
| "loss": 0.118, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.3617677286742036, |
| "grad_norm": 0.003478697268292308, |
| "learning_rate": 6.384892086330936e-06, |
| "loss": 0.0895, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.3746145940390544, |
| "grad_norm": 0.003573470050469041, |
| "learning_rate": 6.256423432682426e-06, |
| "loss": 0.1558, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.3874614594039054, |
| "grad_norm": 0.03104526177048683, |
| "learning_rate": 6.127954779033917e-06, |
| "loss": 0.1444, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.4003083247687564, |
| "grad_norm": 0.18471628427505493, |
| "learning_rate": 5.999486125385406e-06, |
| "loss": 0.0891, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.4131551901336075, |
| "grad_norm": 0.008260599337518215, |
| "learning_rate": 5.871017471736896e-06, |
| "loss": 0.1163, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.4260020554984583, |
| "grad_norm": 0.002708755899220705, |
| "learning_rate": 5.742548818088387e-06, |
| "loss": 0.1499, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.4388489208633093, |
| "grad_norm": 0.004611977841705084, |
| "learning_rate": 5.614080164439877e-06, |
| "loss": 0.1448, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.4516957862281603, |
| "grad_norm": 0.0890050157904625, |
| "learning_rate": 5.485611510791368e-06, |
| "loss": 0.3199, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.4645426515930113, |
| "grad_norm": 57.1250114440918, |
| "learning_rate": 5.357142857142857e-06, |
| "loss": 0.1571, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.4773895169578624, |
| "grad_norm": 0.01847957633435726, |
| "learning_rate": 5.228674203494348e-06, |
| "loss": 0.0781, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.4902363823227134, |
| "grad_norm": 0.003477481659501791, |
| "learning_rate": 5.100205549845838e-06, |
| "loss": 0.1001, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.5030832476875642, |
| "grad_norm": 0.12159717082977295, |
| "learning_rate": 4.971736896197328e-06, |
| "loss": 0.0512, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.5159301130524152, |
| "grad_norm": 0.15670272707939148, |
| "learning_rate": 4.8432682425488185e-06, |
| "loss": 0.1314, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.5287769784172662, |
| "grad_norm": 0.008123679086565971, |
| "learning_rate": 4.714799588900309e-06, |
| "loss": 0.1374, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.541623843782117, |
| "grad_norm": 0.5019196271896362, |
| "learning_rate": 4.586330935251799e-06, |
| "loss": 0.1858, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.554470709146968, |
| "grad_norm": 51.346214294433594, |
| "learning_rate": 4.457862281603289e-06, |
| "loss": 0.2563, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.567317574511819, |
| "grad_norm": 88.88087463378906, |
| "learning_rate": 4.329393627954779e-06, |
| "loss": 0.113, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.58016443987667, |
| "grad_norm": 42.9530143737793, |
| "learning_rate": 4.200924974306269e-06, |
| "loss": 0.3144, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.5930113052415211, |
| "grad_norm": 0.011328631080687046, |
| "learning_rate": 4.07245632065776e-06, |
| "loss": 0.1511, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.6058581706063721, |
| "grad_norm": 0.004166710190474987, |
| "learning_rate": 3.9439876670092506e-06, |
| "loss": 0.1997, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.6187050359712232, |
| "grad_norm": 0.005003561731427908, |
| "learning_rate": 3.81551901336074e-06, |
| "loss": 0.14, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.631551901336074, |
| "grad_norm": 0.05702698230743408, |
| "learning_rate": 3.6870503597122305e-06, |
| "loss": 0.2309, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.644398766700925, |
| "grad_norm": 0.005246581044048071, |
| "learning_rate": 3.5585817060637206e-06, |
| "loss": 0.1816, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.6572456320657758, |
| "grad_norm": 0.17065295577049255, |
| "learning_rate": 3.430113052415211e-06, |
| "loss": 0.1451, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.6700924974306268, |
| "grad_norm": 104.84506225585938, |
| "learning_rate": 3.3016443987667014e-06, |
| "loss": 0.1092, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.6829393627954778, |
| "grad_norm": 0.00523362448439002, |
| "learning_rate": 3.1731757451181915e-06, |
| "loss": 0.1821, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.6957862281603289, |
| "grad_norm": 0.18176575005054474, |
| "learning_rate": 3.0447070914696817e-06, |
| "loss": 0.2245, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.70863309352518, |
| "grad_norm": 0.013213906437158585, |
| "learning_rate": 2.9162384378211715e-06, |
| "loss": 0.1323, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.721479958890031, |
| "grad_norm": 0.07671088725328445, |
| "learning_rate": 2.787769784172662e-06, |
| "loss": 0.3215, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.734326824254882, |
| "grad_norm": 0.07234475016593933, |
| "learning_rate": 2.6593011305241522e-06, |
| "loss": 0.2066, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.7471736896197327, |
| "grad_norm": 0.008514286018908024, |
| "learning_rate": 2.530832476875643e-06, |
| "loss": 0.0692, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.7600205549845838, |
| "grad_norm": 0.04811855033040047, |
| "learning_rate": 2.402363823227133e-06, |
| "loss": 0.0172, |
| "step": 6850 |
| }, |
| { |
| "epoch": 1.7728674203494348, |
| "grad_norm": 0.34988638758659363, |
| "learning_rate": 2.273895169578623e-06, |
| "loss": 0.0573, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.7857142857142856, |
| "grad_norm": 0.10103049874305725, |
| "learning_rate": 2.1454265159301133e-06, |
| "loss": 0.386, |
| "step": 6950 |
| }, |
| { |
| "epoch": 1.7985611510791366, |
| "grad_norm": 0.08461391180753708, |
| "learning_rate": 2.0169578622816035e-06, |
| "loss": 0.1131, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.8114080164439876, |
| "grad_norm": 0.039088811725378036, |
| "learning_rate": 1.8884892086330936e-06, |
| "loss": 0.2684, |
| "step": 7050 |
| }, |
| { |
| "epoch": 1.8242548818088387, |
| "grad_norm": 0.055630821734666824, |
| "learning_rate": 1.760020554984584e-06, |
| "loss": 0.0983, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.8371017471736897, |
| "grad_norm": 0.004123068414628506, |
| "learning_rate": 1.6315519013360742e-06, |
| "loss": 0.091, |
| "step": 7150 |
| }, |
| { |
| "epoch": 1.8499486125385407, |
| "grad_norm": 0.0713982954621315, |
| "learning_rate": 1.5030832476875643e-06, |
| "loss": 0.1892, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.8627954779033917, |
| "grad_norm": 0.0326739139854908, |
| "learning_rate": 1.3746145940390545e-06, |
| "loss": 0.2522, |
| "step": 7250 |
| }, |
| { |
| "epoch": 1.8756423432682425, |
| "grad_norm": 0.04678690433502197, |
| "learning_rate": 1.2461459403905449e-06, |
| "loss": 0.1552, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.8884892086330936, |
| "grad_norm": 0.008167228661477566, |
| "learning_rate": 1.117677286742035e-06, |
| "loss": 0.2152, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.9013360739979444, |
| "grad_norm": 76.49407958984375, |
| "learning_rate": 9.892086330935252e-07, |
| "loss": 0.336, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.9141829393627954, |
| "grad_norm": 0.060154203325510025, |
| "learning_rate": 8.607399794450155e-07, |
| "loss": 0.1207, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.9270298047276464, |
| "grad_norm": 0.006918394006788731, |
| "learning_rate": 7.322713257965057e-07, |
| "loss": 0.239, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.9398766700924974, |
| "grad_norm": 0.10576531291007996, |
| "learning_rate": 6.038026721479959e-07, |
| "loss": 0.0316, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.9527235354573484, |
| "grad_norm": 28.669675827026367, |
| "learning_rate": 4.753340184994862e-07, |
| "loss": 0.2856, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.9655704008221995, |
| "grad_norm": 0.02562900446355343, |
| "learning_rate": 3.468653648509764e-07, |
| "loss": 0.1881, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.9784172661870505, |
| "grad_norm": 0.11044430732727051, |
| "learning_rate": 2.1839671120246663e-07, |
| "loss": 0.1464, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.9912641315519013, |
| "grad_norm": 0.14684735238552094, |
| "learning_rate": 8.992805755395684e-08, |
| "loss": 0.0918, |
| "step": 7750 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_f1": 0.8341013824884793, |
| "eval_loss": 0.23306386172771454, |
| "eval_precision": 0.8341013824884793, |
| "eval_recall": 0.8341013824884793, |
| "eval_runtime": 176.4143, |
| "eval_samples_per_second": 88.247, |
| "eval_steps_per_second": 2.761, |
| "step": 7784 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 7784, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.638366230621184e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|