| { | |
| "best_global_step": 6586, | |
| "best_metric": 0.847490661036219, | |
| "best_model_checkpoint": "outputs/final-run/checkpoint-6586", | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 6586, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.015186028853454821, | |
| "grad_norm": 2.3480491638183594, | |
| "learning_rate": 9.999658552822536e-06, | |
| "loss": 0.6629, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.030372057706909643, | |
| "grad_norm": 3.3713912963867188, | |
| "learning_rate": 9.998606244733398e-06, | |
| "loss": 0.5702, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04555808656036447, | |
| "grad_norm": 5.710732460021973, | |
| "learning_rate": 9.996843083169648e-06, | |
| "loss": 0.5245, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.060744115413819286, | |
| "grad_norm": 5.55316686630249, | |
| "learning_rate": 9.994369318871088e-06, | |
| "loss": 0.4684, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07593014426727411, | |
| "grad_norm": 8.149177551269531, | |
| "learning_rate": 9.991185303632574e-06, | |
| "loss": 0.4783, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09111617312072894, | |
| "grad_norm": 7.209503650665283, | |
| "learning_rate": 9.987291490253976e-06, | |
| "loss": 0.4589, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.10630220197418375, | |
| "grad_norm": 7.983670234680176, | |
| "learning_rate": 9.98268843247581e-06, | |
| "loss": 0.4623, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.12148823082763857, | |
| "grad_norm": 4.2466206550598145, | |
| "learning_rate": 9.977376784900465e-06, | |
| "loss": 0.4694, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1366742596810934, | |
| "grad_norm": 6.11989164352417, | |
| "learning_rate": 9.971357302899133e-06, | |
| "loss": 0.4965, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.15186028853454822, | |
| "grad_norm": 4.606278896331787, | |
| "learning_rate": 9.964630842504372e-06, | |
| "loss": 0.4919, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.16704631738800305, | |
| "grad_norm": 5.91365909576416, | |
| "learning_rate": 9.957198360288374e-06, | |
| "loss": 0.4536, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.18223234624145787, | |
| "grad_norm": 5.541100025177002, | |
| "learning_rate": 9.949060913226936e-06, | |
| "loss": 0.4719, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.19741837509491267, | |
| "grad_norm": 4.289076805114746, | |
| "learning_rate": 9.94021965854914e-06, | |
| "loss": 0.4136, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2126044039483675, | |
| "grad_norm": 14.322737693786621, | |
| "learning_rate": 9.930675853572787e-06, | |
| "loss": 0.4705, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.22779043280182232, | |
| "grad_norm": 6.375570297241211, | |
| "learning_rate": 9.920430855525589e-06, | |
| "loss": 0.4701, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.24297646165527714, | |
| "grad_norm": 3.261223316192627, | |
| "learning_rate": 9.909486121352163e-06, | |
| "loss": 0.4528, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.25816249050873197, | |
| "grad_norm": 3.854436159133911, | |
| "learning_rate": 9.89784320750684e-06, | |
| "loss": 0.4265, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2733485193621868, | |
| "grad_norm": 10.977453231811523, | |
| "learning_rate": 9.885503769732304e-06, | |
| "loss": 0.4329, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.2885345482156416, | |
| "grad_norm": 6.016301155090332, | |
| "learning_rate": 9.872469562824157e-06, | |
| "loss": 0.4147, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.30372057706909644, | |
| "grad_norm": 5.806178569793701, | |
| "learning_rate": 9.858742440381343e-06, | |
| "loss": 0.4718, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.31890660592255127, | |
| "grad_norm": 6.394021987915039, | |
| "learning_rate": 9.844324354542558e-06, | |
| "loss": 0.3912, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.3340926347760061, | |
| "grad_norm": 7.948565483093262, | |
| "learning_rate": 9.82921735570864e-06, | |
| "loss": 0.4223, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.3492786636294609, | |
| "grad_norm": 5.151625156402588, | |
| "learning_rate": 9.813423592250969e-06, | |
| "loss": 0.4079, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.36446469248291574, | |
| "grad_norm": 6.724940299987793, | |
| "learning_rate": 9.796945310205958e-06, | |
| "loss": 0.4306, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.37965072133637057, | |
| "grad_norm": 3.623936176300049, | |
| "learning_rate": 9.779784852955636e-06, | |
| "loss": 0.438, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.39483675018982534, | |
| "grad_norm": 6.648692607879639, | |
| "learning_rate": 9.761944660894397e-06, | |
| "loss": 0.4515, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.41002277904328016, | |
| "grad_norm": 5.989009380340576, | |
| "learning_rate": 9.743427271081954e-06, | |
| "loss": 0.3911, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.425208807896735, | |
| "grad_norm": 3.3359053134918213, | |
| "learning_rate": 9.724235316882537e-06, | |
| "loss": 0.4454, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.4403948367501898, | |
| "grad_norm": 11.017061233520508, | |
| "learning_rate": 9.704371527590404e-06, | |
| "loss": 0.4022, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.45558086560364464, | |
| "grad_norm": 7.159852981567383, | |
| "learning_rate": 9.68383872804171e-06, | |
| "loss": 0.4464, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.47076689445709946, | |
| "grad_norm": 4.259323596954346, | |
| "learning_rate": 9.662639838212781e-06, | |
| "loss": 0.3829, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.4859529233105543, | |
| "grad_norm": 7.917912483215332, | |
| "learning_rate": 9.640777872804868e-06, | |
| "loss": 0.4186, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5011389521640092, | |
| "grad_norm": 8.7236967086792, | |
| "learning_rate": 9.61825594081542e-06, | |
| "loss": 0.3766, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.5163249810174639, | |
| "grad_norm": 4.761518478393555, | |
| "learning_rate": 9.595077245095959e-06, | |
| "loss": 0.4057, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.5315110098709187, | |
| "grad_norm": 2.5256729125976562, | |
| "learning_rate": 9.571245081896594e-06, | |
| "loss": 0.4321, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5466970387243736, | |
| "grad_norm": 9.82975959777832, | |
| "learning_rate": 9.546762840397268e-06, | |
| "loss": 0.4067, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5618830675778284, | |
| "grad_norm": 4.607714653015137, | |
| "learning_rate": 9.521634002225774e-06, | |
| "loss": 0.3834, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5770690964312832, | |
| "grad_norm": 8.330415725708008, | |
| "learning_rate": 9.495862140962638e-06, | |
| "loss": 0.374, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.592255125284738, | |
| "grad_norm": 5.7992634773254395, | |
| "learning_rate": 9.469450921632912e-06, | |
| "loss": 0.3852, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.6074411541381929, | |
| "grad_norm": 5.298435211181641, | |
| "learning_rate": 9.44240410018498e-06, | |
| "loss": 0.4345, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6226271829916477, | |
| "grad_norm": 6.483381271362305, | |
| "learning_rate": 9.414725522956414e-06, | |
| "loss": 0.407, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.6378132118451025, | |
| "grad_norm": 5.179783821105957, | |
| "learning_rate": 9.386419126126983e-06, | |
| "loss": 0.432, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.6529992406985573, | |
| "grad_norm": 5.316011428833008, | |
| "learning_rate": 9.357488935158897e-06, | |
| "loss": 0.4071, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6681852695520122, | |
| "grad_norm": 10.58410930633545, | |
| "learning_rate": 9.327939064224346e-06, | |
| "loss": 0.3772, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.683371298405467, | |
| "grad_norm": 4.013734817504883, | |
| "learning_rate": 9.297773715620406e-06, | |
| "loss": 0.4064, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.6985573272589218, | |
| "grad_norm": 9.252372741699219, | |
| "learning_rate": 9.266997179171442e-06, | |
| "loss": 0.3911, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.7137433561123766, | |
| "grad_norm": 8.192291259765625, | |
| "learning_rate": 9.235613831619052e-06, | |
| "loss": 0.3816, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.7289293849658315, | |
| "grad_norm": 4.068896770477295, | |
| "learning_rate": 9.203628135999643e-06, | |
| "loss": 0.4304, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.7441154138192863, | |
| "grad_norm": 2.9444737434387207, | |
| "learning_rate": 9.171044641009741e-06, | |
| "loss": 0.4231, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.7593014426727411, | |
| "grad_norm": 4.700106620788574, | |
| "learning_rate": 9.137867980359126e-06, | |
| "loss": 0.3982, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7744874715261959, | |
| "grad_norm": 14.975322723388672, | |
| "learning_rate": 9.104102872111858e-06, | |
| "loss": 0.4241, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.7896735003796507, | |
| "grad_norm": 4.325404644012451, | |
| "learning_rate": 9.069754118015339e-06, | |
| "loss": 0.3725, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.8048595292331056, | |
| "grad_norm": 3.829643964767456, | |
| "learning_rate": 9.034826602817433e-06, | |
| "loss": 0.4048, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.8200455580865603, | |
| "grad_norm": 6.086367607116699, | |
| "learning_rate": 8.99932529357182e-06, | |
| "loss": 0.4333, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.8352315869400152, | |
| "grad_norm": 4.058459758758545, | |
| "learning_rate": 8.963255238931623e-06, | |
| "loss": 0.4004, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.85041761579347, | |
| "grad_norm": 4.049592971801758, | |
| "learning_rate": 8.926621568431442e-06, | |
| "loss": 0.4126, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.8656036446469249, | |
| "grad_norm": 3.434569835662842, | |
| "learning_rate": 8.889429491757872e-06, | |
| "loss": 0.4134, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.8807896735003796, | |
| "grad_norm": 5.300995349884033, | |
| "learning_rate": 8.851684298008642e-06, | |
| "loss": 0.4224, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.8959757023538345, | |
| "grad_norm": 8.158344268798828, | |
| "learning_rate": 8.813391354940445e-06, | |
| "loss": 0.3538, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.9111617312072893, | |
| "grad_norm": 6.747292518615723, | |
| "learning_rate": 8.77455610820559e-06, | |
| "loss": 0.3907, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9263477600607442, | |
| "grad_norm": 6.279948711395264, | |
| "learning_rate": 8.735184080577569e-06, | |
| "loss": 0.4344, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.9415337889141989, | |
| "grad_norm": 4.355826377868652, | |
| "learning_rate": 8.69528087116567e-06, | |
| "loss": 0.4082, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.9567198177676538, | |
| "grad_norm": 6.685491561889648, | |
| "learning_rate": 8.65485215461872e-06, | |
| "loss": 0.3851, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.9719058466211086, | |
| "grad_norm": 5.933023452758789, | |
| "learning_rate": 8.61390368031809e-06, | |
| "loss": 0.3734, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.9870918754745635, | |
| "grad_norm": 9.179722785949707, | |
| "learning_rate": 8.572441271560077e-06, | |
| "loss": 0.3934, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.8445935154128733, | |
| "eval_loss": 0.37980714440345764, | |
| "eval_runtime": 7.8494, | |
| "eval_samples_per_second": 745.663, | |
| "eval_steps_per_second": 23.314, | |
| "step": 3293 | |
| }, | |
| { | |
| "epoch": 1.0021260440394837, | |
| "grad_norm": 1.5981299877166748, | |
| "learning_rate": 8.53047082472777e-06, | |
| "loss": 0.3967, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.0173120728929386, | |
| "grad_norm": 5.159671783447266, | |
| "learning_rate": 8.487998308452525e-06, | |
| "loss": 0.3125, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.0324981017463932, | |
| "grad_norm": 8.904830932617188, | |
| "learning_rate": 8.445029762765159e-06, | |
| "loss": 0.3201, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.047684130599848, | |
| "grad_norm": 4.215548992156982, | |
| "learning_rate": 8.401571298237e-06, | |
| "loss": 0.3043, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.062870159453303, | |
| "grad_norm": 2.9603254795074463, | |
| "learning_rate": 8.357629095110906e-06, | |
| "loss": 0.307, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.0780561883067579, | |
| "grad_norm": 8.665258407592773, | |
| "learning_rate": 8.313209402422348e-06, | |
| "loss": 0.3081, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.0932422171602125, | |
| "grad_norm": 7.101922512054443, | |
| "learning_rate": 8.268318537110762e-06, | |
| "loss": 0.3536, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.1084282460136674, | |
| "grad_norm": 9.113100051879883, | |
| "learning_rate": 8.222962883121196e-06, | |
| "loss": 0.3557, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.1236142748671223, | |
| "grad_norm": 3.427243947982788, | |
| "learning_rate": 8.177148890496452e-06, | |
| "loss": 0.2984, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.138800303720577, | |
| "grad_norm": 6.6492695808410645, | |
| "learning_rate": 8.130883074459823e-06, | |
| "loss": 0.3407, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.1539863325740318, | |
| "grad_norm": 9.254618644714355, | |
| "learning_rate": 8.084172014488564e-06, | |
| "loss": 0.3487, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.1691723614274867, | |
| "grad_norm": 3.8507754802703857, | |
| "learning_rate": 8.037022353378218e-06, | |
| "loss": 0.3374, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.1843583902809416, | |
| "grad_norm": 18.62590217590332, | |
| "learning_rate": 7.989440796297943e-06, | |
| "loss": 0.3269, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.1995444191343965, | |
| "grad_norm": 14.359010696411133, | |
| "learning_rate": 7.941434109836968e-06, | |
| "loss": 0.3219, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.2147304479878511, | |
| "grad_norm": 8.173829078674316, | |
| "learning_rate": 7.893009121042314e-06, | |
| "loss": 0.2944, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.229916476841306, | |
| "grad_norm": 6.0913591384887695, | |
| "learning_rate": 7.844172716447918e-06, | |
| "loss": 0.366, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.2451025056947609, | |
| "grad_norm": 8.989174842834473, | |
| "learning_rate": 7.794931841095297e-06, | |
| "loss": 0.3223, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.2602885345482155, | |
| "grad_norm": 4.618454456329346, | |
| "learning_rate": 7.745293497545892e-06, | |
| "loss": 0.3718, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.2754745634016704, | |
| "grad_norm": 6.966646194458008, | |
| "learning_rate": 7.695264744885225e-06, | |
| "loss": 0.34, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.2906605922551253, | |
| "grad_norm": 8.476325988769531, | |
| "learning_rate": 7.64485269771903e-06, | |
| "loss": 0.309, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.3058466211085802, | |
| "grad_norm": 3.3412492275238037, | |
| "learning_rate": 7.594064525161487e-06, | |
| "loss": 0.3491, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.321032649962035, | |
| "grad_norm": 9.971606254577637, | |
| "learning_rate": 7.54290744981569e-06, | |
| "loss": 0.3097, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.3362186788154897, | |
| "grad_norm": 7.083515167236328, | |
| "learning_rate": 7.491388746746522e-06, | |
| "loss": 0.3446, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.3514047076689446, | |
| "grad_norm": 5.6028361320495605, | |
| "learning_rate": 7.439515742446065e-06, | |
| "loss": 0.3229, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.3665907365223995, | |
| "grad_norm": 9.373847961425781, | |
| "learning_rate": 7.387295813791705e-06, | |
| "loss": 0.3022, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.3817767653758541, | |
| "grad_norm": 5.378981590270996, | |
| "learning_rate": 7.334736386997049e-06, | |
| "loss": 0.2955, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.396962794229309, | |
| "grad_norm": 9.248358726501465, | |
| "learning_rate": 7.281844936555853e-06, | |
| "loss": 0.3562, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.412148823082764, | |
| "grad_norm": 6.579871654510498, | |
| "learning_rate": 7.228628984179068e-06, | |
| "loss": 0.3436, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.4273348519362186, | |
| "grad_norm": 2.5316176414489746, | |
| "learning_rate": 7.175096097725169e-06, | |
| "loss": 0.3464, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.4425208807896734, | |
| "grad_norm": 12.828206062316895, | |
| "learning_rate": 7.121253890123941e-06, | |
| "loss": 0.3333, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.4577069096431283, | |
| "grad_norm": 8.807774543762207, | |
| "learning_rate": 7.067110018293828e-06, | |
| "loss": 0.2955, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.4728929384965832, | |
| "grad_norm": 10.35312557220459, | |
| "learning_rate": 7.012672182053043e-06, | |
| "loss": 0.3321, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.488078967350038, | |
| "grad_norm": 2.2814652919769287, | |
| "learning_rate": 6.9579481230245835e-06, | |
| "loss": 0.3466, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.5032649962034927, | |
| "grad_norm": 5.442550182342529, | |
| "learning_rate": 6.9029456235352795e-06, | |
| "loss": 0.3321, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.5184510250569476, | |
| "grad_norm": 12.557025909423828, | |
| "learning_rate": 6.847672505509079e-06, | |
| "loss": 0.3429, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.5336370539104025, | |
| "grad_norm": 4.002285480499268, | |
| "learning_rate": 6.792136629354677e-06, | |
| "loss": 0.3274, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.5488230827638572, | |
| "grad_norm": 17.179048538208008, | |
| "learning_rate": 6.736345892847691e-06, | |
| "loss": 0.3472, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.564009111617312, | |
| "grad_norm": 8.354110717773438, | |
| "learning_rate": 6.680308230007521e-06, | |
| "loss": 0.3282, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.579195140470767, | |
| "grad_norm": 5.1743035316467285, | |
| "learning_rate": 6.624031609969036e-06, | |
| "loss": 0.3443, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.5943811693242216, | |
| "grad_norm": 6.959432601928711, | |
| "learning_rate": 6.567524035849293e-06, | |
| "loss": 0.35, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.6095671981776767, | |
| "grad_norm": 20.55417823791504, | |
| "learning_rate": 6.5107935436094076e-06, | |
| "loss": 0.3158, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.6247532270311313, | |
| "grad_norm": 14.025495529174805, | |
| "learning_rate": 6.453848200911752e-06, | |
| "loss": 0.3287, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.6399392558845862, | |
| "grad_norm": 12.094548225402832, | |
| "learning_rate": 6.396696105972655e-06, | |
| "loss": 0.3448, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.655125284738041, | |
| "grad_norm": 3.596747398376465, | |
| "learning_rate": 6.339345386410756e-06, | |
| "loss": 0.3544, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.6703113135914958, | |
| "grad_norm": 4.897212505340576, | |
| "learning_rate": 6.2818041980911635e-06, | |
| "loss": 0.3363, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.6854973424449506, | |
| "grad_norm": 2.7992074489593506, | |
| "learning_rate": 6.224080723965616e-06, | |
| "loss": 0.3405, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.7006833712984055, | |
| "grad_norm": 8.647635459899902, | |
| "learning_rate": 6.1661831729087705e-06, | |
| "loss": 0.3218, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.7158694001518602, | |
| "grad_norm": 16.2703800201416, | |
| "learning_rate": 6.1081197785508335e-06, | |
| "loss": 0.3569, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.731055429005315, | |
| "grad_norm": 9.62259578704834, | |
| "learning_rate": 6.049898798106636e-06, | |
| "loss": 0.3181, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.74624145785877, | |
| "grad_norm": 10.183274269104004, | |
| "learning_rate": 5.991528511201382e-06, | |
| "loss": 0.3191, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.7614274867122246, | |
| "grad_norm": 20.28440284729004, | |
| "learning_rate": 5.933017218693193e-06, | |
| "loss": 0.3162, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.7766135155656797, | |
| "grad_norm": 18.231319427490234, | |
| "learning_rate": 5.874373241492651e-06, | |
| "loss": 0.3788, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.7917995444191344, | |
| "grad_norm": 14.682201385498047, | |
| "learning_rate": 5.815604919379472e-06, | |
| "loss": 0.3242, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.8069855732725892, | |
| "grad_norm": 6.563547611236572, | |
| "learning_rate": 5.7567206098164965e-06, | |
| "loss": 0.3377, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.8221716021260441, | |
| "grad_norm": 8.406890869140625, | |
| "learning_rate": 5.697728686761189e-06, | |
| "loss": 0.3222, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.8373576309794988, | |
| "grad_norm": 5.706462860107422, | |
| "learning_rate": 5.638637539474758e-06, | |
| "loss": 0.3169, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.8525436598329537, | |
| "grad_norm": 3.566732883453369, | |
| "learning_rate": 5.579455571329128e-06, | |
| "loss": 0.2993, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.8677296886864085, | |
| "grad_norm": 21.842191696166992, | |
| "learning_rate": 5.520191198611883e-06, | |
| "loss": 0.3411, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.8829157175398632, | |
| "grad_norm": 7.155375957489014, | |
| "learning_rate": 5.460852849329394e-06, | |
| "loss": 0.3168, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.8981017463933183, | |
| "grad_norm": 5.166109085083008, | |
| "learning_rate": 5.401448962008262e-06, | |
| "loss": 0.3526, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.913287775246773, | |
| "grad_norm": 10.691755294799805, | |
| "learning_rate": 5.341987984495275e-06, | |
| "loss": 0.334, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.9284738041002278, | |
| "grad_norm": 1.8157846927642822, | |
| "learning_rate": 5.282478372756036e-06, | |
| "loss": 0.2981, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.9436598329536827, | |
| "grad_norm": 6.267528057098389, | |
| "learning_rate": 5.222928589672436e-06, | |
| "loss": 0.3443, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.9588458618071374, | |
| "grad_norm": 8.20384407043457, | |
| "learning_rate": 5.163347103839149e-06, | |
| "loss": 0.3196, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.9740318906605923, | |
| "grad_norm": 6.2834882736206055, | |
| "learning_rate": 5.10374238835931e-06, | |
| "loss": 0.3176, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.9892179195140471, | |
| "grad_norm": 7.512860298156738, | |
| "learning_rate": 5.0441229196395416e-06, | |
| "loss": 0.3216, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.847490661036219, | |
| "eval_loss": 0.39481809735298157, | |
| "eval_runtime": 7.8513, | |
| "eval_samples_per_second": 745.486, | |
| "eval_steps_per_second": 23.308, | |
| "step": 6586 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 13172, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.771769723795456e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |