| { | |
| "best_global_step": 716, | |
| "best_metric": 0.9046100241222194, | |
| "best_model_checkpoint": "../results\\checkpoint-716", | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 1074, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.055865921787709494, | |
| "grad_norm": 3.396728038787842, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 0.7053, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.11173184357541899, | |
| "grad_norm": 4.982423782348633, | |
| "learning_rate": 2.5333333333333338e-06, | |
| "loss": 0.6988, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.16759776536312848, | |
| "grad_norm": 2.757077217102051, | |
| "learning_rate": 3.866666666666667e-06, | |
| "loss": 0.6758, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.22346368715083798, | |
| "grad_norm": 2.6699273586273193, | |
| "learning_rate": 5.2e-06, | |
| "loss": 0.6315, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.27932960893854747, | |
| "grad_norm": 4.097516059875488, | |
| "learning_rate": 6.533333333333334e-06, | |
| "loss": 0.6425, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.33519553072625696, | |
| "grad_norm": 4.445736408233643, | |
| "learning_rate": 7.866666666666667e-06, | |
| "loss": 0.6255, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.39106145251396646, | |
| "grad_norm": 4.867759704589844, | |
| "learning_rate": 9.200000000000002e-06, | |
| "loss": 0.5891, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.44692737430167595, | |
| "grad_norm": 6.38022518157959, | |
| "learning_rate": 1.0533333333333333e-05, | |
| "loss": 0.4952, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5027932960893855, | |
| "grad_norm": 3.1345021724700928, | |
| "learning_rate": 1.186666666666667e-05, | |
| "loss": 0.5234, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5586592178770949, | |
| "grad_norm": 5.645432949066162, | |
| "learning_rate": 1.3200000000000002e-05, | |
| "loss": 0.5089, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6145251396648045, | |
| "grad_norm": 3.7119741439819336, | |
| "learning_rate": 1.4533333333333335e-05, | |
| "loss": 0.4735, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6703910614525139, | |
| "grad_norm": 8.215346336364746, | |
| "learning_rate": 1.586666666666667e-05, | |
| "loss": 0.5003, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7262569832402235, | |
| "grad_norm": 5.145977020263672, | |
| "learning_rate": 1.72e-05, | |
| "loss": 0.4147, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7821229050279329, | |
| "grad_norm": 2.537804126739502, | |
| "learning_rate": 1.8533333333333334e-05, | |
| "loss": 0.3685, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.8379888268156425, | |
| "grad_norm": 4.58748722076416, | |
| "learning_rate": 1.9866666666666667e-05, | |
| "loss": 0.3349, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8938547486033519, | |
| "grad_norm": 5.2894134521484375, | |
| "learning_rate": 1.9836808703535813e-05, | |
| "loss": 0.294, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9497206703910615, | |
| "grad_norm": 19.09000015258789, | |
| "learning_rate": 1.9655485040797827e-05, | |
| "loss": 0.288, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8775510204081632, | |
| "eval_f1": 0.8773557452999963, | |
| "eval_loss": 0.30696821212768555, | |
| "eval_precision": 0.8774895500368822, | |
| "eval_recall": 0.8775510204081632, | |
| "eval_runtime": 44.9376, | |
| "eval_samples_per_second": 3.271, | |
| "eval_steps_per_second": 0.111, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.005586592178771, | |
| "grad_norm": 5.151806354522705, | |
| "learning_rate": 1.9474161378059838e-05, | |
| "loss": 0.2949, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.0614525139664805, | |
| "grad_norm": 5.022265911102295, | |
| "learning_rate": 1.9292837715321852e-05, | |
| "loss": 0.165, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.1173184357541899, | |
| "grad_norm": 9.743254661560059, | |
| "learning_rate": 1.9111514052583863e-05, | |
| "loss": 0.2579, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.1731843575418994, | |
| "grad_norm": 12.344402313232422, | |
| "learning_rate": 1.8930190389845877e-05, | |
| "loss": 0.283, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.229050279329609, | |
| "grad_norm": 6.7589111328125, | |
| "learning_rate": 1.8748866727107888e-05, | |
| "loss": 0.2604, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.2849162011173183, | |
| "grad_norm": 3.3086533546447754, | |
| "learning_rate": 1.8567543064369902e-05, | |
| "loss": 0.1972, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.3407821229050279, | |
| "grad_norm": 5.3050923347473145, | |
| "learning_rate": 1.8386219401631916e-05, | |
| "loss": 0.2585, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.3966480446927374, | |
| "grad_norm": 4.015077590942383, | |
| "learning_rate": 1.8204895738893927e-05, | |
| "loss": 0.2061, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.452513966480447, | |
| "grad_norm": 3.9509942531585693, | |
| "learning_rate": 1.802357207615594e-05, | |
| "loss": 0.2497, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.5083798882681565, | |
| "grad_norm": 5.380828857421875, | |
| "learning_rate": 1.784224841341795e-05, | |
| "loss": 0.2784, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.564245810055866, | |
| "grad_norm": 3.400120973587036, | |
| "learning_rate": 1.7660924750679966e-05, | |
| "loss": 0.2659, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.6201117318435754, | |
| "grad_norm": 1.753212809562683, | |
| "learning_rate": 1.7479601087941977e-05, | |
| "loss": 0.2076, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.675977653631285, | |
| "grad_norm": 5.839695930480957, | |
| "learning_rate": 1.729827742520399e-05, | |
| "loss": 0.2168, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.7318435754189943, | |
| "grad_norm": 3.75396466255188, | |
| "learning_rate": 1.7116953762466005e-05, | |
| "loss": 0.1925, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.7877094972067038, | |
| "grad_norm": 4.7805986404418945, | |
| "learning_rate": 1.6935630099728016e-05, | |
| "loss": 0.2179, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.8435754189944134, | |
| "grad_norm": 15.363788604736328, | |
| "learning_rate": 1.675430643699003e-05, | |
| "loss": 0.2328, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.899441340782123, | |
| "grad_norm": 4.211331844329834, | |
| "learning_rate": 1.657298277425204e-05, | |
| "loss": 0.1723, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.9553072625698324, | |
| "grad_norm": 2.37222957611084, | |
| "learning_rate": 1.6391659111514055e-05, | |
| "loss": 0.263, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9047619047619048, | |
| "eval_f1": 0.9036272534175698, | |
| "eval_loss": 0.2916606664657593, | |
| "eval_precision": 0.9113029827315541, | |
| "eval_recall": 0.9047619047619048, | |
| "eval_runtime": 43.0995, | |
| "eval_samples_per_second": 3.411, | |
| "eval_steps_per_second": 0.116, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 2.011173184357542, | |
| "grad_norm": 3.767514228820801, | |
| "learning_rate": 1.6210335448776065e-05, | |
| "loss": 0.1492, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.0670391061452515, | |
| "grad_norm": 6.10928201675415, | |
| "learning_rate": 1.602901178603808e-05, | |
| "loss": 0.164, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.122905027932961, | |
| "grad_norm": 5.148931503295898, | |
| "learning_rate": 1.584768812330009e-05, | |
| "loss": 0.1319, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.17877094972067, | |
| "grad_norm": 3.880173683166504, | |
| "learning_rate": 1.5666364460562104e-05, | |
| "loss": 0.1354, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.2346368715083798, | |
| "grad_norm": 3.432164192199707, | |
| "learning_rate": 1.548504079782412e-05, | |
| "loss": 0.1028, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.2905027932960893, | |
| "grad_norm": 8.787654876708984, | |
| "learning_rate": 1.530371713508613e-05, | |
| "loss": 0.1777, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.346368715083799, | |
| "grad_norm": 4.742533206939697, | |
| "learning_rate": 1.5122393472348142e-05, | |
| "loss": 0.1124, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.4022346368715084, | |
| "grad_norm": 4.186207294464111, | |
| "learning_rate": 1.4941069809610156e-05, | |
| "loss": 0.1848, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.458100558659218, | |
| "grad_norm": 1.7014570236206055, | |
| "learning_rate": 1.4759746146872167e-05, | |
| "loss": 0.1311, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.5139664804469275, | |
| "grad_norm": 5.445011615753174, | |
| "learning_rate": 1.4578422484134181e-05, | |
| "loss": 0.1633, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.5698324022346366, | |
| "grad_norm": 5.768681049346924, | |
| "learning_rate": 1.4397098821396193e-05, | |
| "loss": 0.1426, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.6256983240223466, | |
| "grad_norm": 9.130986213684082, | |
| "learning_rate": 1.4215775158658207e-05, | |
| "loss": 0.0882, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.6815642458100557, | |
| "grad_norm": 2.656816244125366, | |
| "learning_rate": 1.4034451495920218e-05, | |
| "loss": 0.1253, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.7374301675977653, | |
| "grad_norm": 4.939051628112793, | |
| "learning_rate": 1.3853127833182232e-05, | |
| "loss": 0.176, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.793296089385475, | |
| "grad_norm": 8.688886642456055, | |
| "learning_rate": 1.3671804170444245e-05, | |
| "loss": 0.1189, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.8491620111731844, | |
| "grad_norm": 4.239016056060791, | |
| "learning_rate": 1.3490480507706256e-05, | |
| "loss": 0.1324, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.905027932960894, | |
| "grad_norm": 9.253652572631836, | |
| "learning_rate": 1.330915684496827e-05, | |
| "loss": 0.2247, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.9608938547486034, | |
| "grad_norm": 8.102351188659668, | |
| "learning_rate": 1.3127833182230282e-05, | |
| "loss": 0.144, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8979591836734694, | |
| "eval_f1": 0.8978827235479027, | |
| "eval_loss": 0.30465707182884216, | |
| "eval_precision": 0.8978902630470574, | |
| "eval_recall": 0.8979591836734694, | |
| "eval_runtime": 42.2881, | |
| "eval_samples_per_second": 3.476, | |
| "eval_steps_per_second": 0.118, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 3.016759776536313, | |
| "grad_norm": 2.3151211738586426, | |
| "learning_rate": 1.2946509519492295e-05, | |
| "loss": 0.0805, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.0726256983240225, | |
| "grad_norm": 0.773265540599823, | |
| "learning_rate": 1.2765185856754307e-05, | |
| "loss": 0.0878, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.1284916201117317, | |
| "grad_norm": 2.0086417198181152, | |
| "learning_rate": 1.2583862194016321e-05, | |
| "loss": 0.1024, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.184357541899441, | |
| "grad_norm": 4.90833854675293, | |
| "learning_rate": 1.2402538531278332e-05, | |
| "loss": 0.0831, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.2402234636871508, | |
| "grad_norm": 3.742814064025879, | |
| "learning_rate": 1.2221214868540346e-05, | |
| "loss": 0.1213, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.2960893854748603, | |
| "grad_norm": 1.9768400192260742, | |
| "learning_rate": 1.2039891205802359e-05, | |
| "loss": 0.077, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.35195530726257, | |
| "grad_norm": 0.833071768283844, | |
| "learning_rate": 1.185856754306437e-05, | |
| "loss": 0.094, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.4078212290502794, | |
| "grad_norm": 1.4159188270568848, | |
| "learning_rate": 1.1677243880326384e-05, | |
| "loss": 0.1288, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.463687150837989, | |
| "grad_norm": 7.14526891708374, | |
| "learning_rate": 1.1495920217588396e-05, | |
| "loss": 0.0656, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.5195530726256985, | |
| "grad_norm": 6.060492038726807, | |
| "learning_rate": 1.131459655485041e-05, | |
| "loss": 0.0598, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.5754189944134076, | |
| "grad_norm": 4.400679111480713, | |
| "learning_rate": 1.1133272892112421e-05, | |
| "loss": 0.0919, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.631284916201117, | |
| "grad_norm": 9.306907653808594, | |
| "learning_rate": 1.0951949229374435e-05, | |
| "loss": 0.0754, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.6871508379888267, | |
| "grad_norm": 1.2165876626968384, | |
| "learning_rate": 1.0770625566636447e-05, | |
| "loss": 0.1537, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 3.7430167597765363, | |
| "grad_norm": 4.806360244750977, | |
| "learning_rate": 1.0589301903898458e-05, | |
| "loss": 0.0336, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 3.798882681564246, | |
| "grad_norm": 4.607973098754883, | |
| "learning_rate": 1.0407978241160472e-05, | |
| "loss": 0.0667, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 3.8547486033519553, | |
| "grad_norm": 6.711869239807129, | |
| "learning_rate": 1.0226654578422487e-05, | |
| "loss": 0.1215, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 3.910614525139665, | |
| "grad_norm": 8.619525909423828, | |
| "learning_rate": 1.0045330915684497e-05, | |
| "loss": 0.0875, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.9664804469273744, | |
| "grad_norm": 4.532635688781738, | |
| "learning_rate": 9.86400725294651e-06, | |
| "loss": 0.088, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9047619047619048, | |
| "eval_f1": 0.9046100241222194, | |
| "eval_loss": 0.34424835443496704, | |
| "eval_precision": 0.9047977624784854, | |
| "eval_recall": 0.9047619047619048, | |
| "eval_runtime": 42.5788, | |
| "eval_samples_per_second": 3.452, | |
| "eval_steps_per_second": 0.117, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 4.022346368715084, | |
| "grad_norm": 3.24448561668396, | |
| "learning_rate": 9.682683590208522e-06, | |
| "loss": 0.1583, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.078212290502793, | |
| "grad_norm": 8.842151641845703, | |
| "learning_rate": 9.501359927470536e-06, | |
| "loss": 0.0533, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.134078212290503, | |
| "grad_norm": 1.76890230178833, | |
| "learning_rate": 9.320036264732549e-06, | |
| "loss": 0.0312, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.189944134078212, | |
| "grad_norm": 2.4802823066711426, | |
| "learning_rate": 9.138712601994561e-06, | |
| "loss": 0.0176, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.245810055865922, | |
| "grad_norm": 1.202589750289917, | |
| "learning_rate": 8.957388939256574e-06, | |
| "loss": 0.0912, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.301675977653631, | |
| "grad_norm": 0.26222002506256104, | |
| "learning_rate": 8.776065276518586e-06, | |
| "loss": 0.0501, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.35754189944134, | |
| "grad_norm": 12.392189979553223, | |
| "learning_rate": 8.5947416137806e-06, | |
| "loss": 0.0433, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 4.41340782122905, | |
| "grad_norm": 8.532198905944824, | |
| "learning_rate": 8.413417951042611e-06, | |
| "loss": 0.096, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 4.4692737430167595, | |
| "grad_norm": 3.229271411895752, | |
| "learning_rate": 8.232094288304623e-06, | |
| "loss": 0.0743, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.5251396648044695, | |
| "grad_norm": 0.5314301252365112, | |
| "learning_rate": 8.050770625566638e-06, | |
| "loss": 0.0588, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 4.581005586592179, | |
| "grad_norm": 12.809399604797363, | |
| "learning_rate": 7.86944696282865e-06, | |
| "loss": 0.0761, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 4.636871508379889, | |
| "grad_norm": 9.645480155944824, | |
| "learning_rate": 7.688123300090663e-06, | |
| "loss": 0.0666, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 4.692737430167598, | |
| "grad_norm": 2.1018457412719727, | |
| "learning_rate": 7.506799637352675e-06, | |
| "loss": 0.0437, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.748603351955307, | |
| "grad_norm": 11.410074234008789, | |
| "learning_rate": 7.3254759746146875e-06, | |
| "loss": 0.0753, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 4.804469273743017, | |
| "grad_norm": 8.368203163146973, | |
| "learning_rate": 7.144152311876701e-06, | |
| "loss": 0.0743, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 4.860335195530726, | |
| "grad_norm": 4.603096008300781, | |
| "learning_rate": 6.962828649138713e-06, | |
| "loss": 0.058, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 4.916201117318436, | |
| "grad_norm": 1.7863306999206543, | |
| "learning_rate": 6.781504986400726e-06, | |
| "loss": 0.0089, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 4.972067039106145, | |
| "grad_norm": 5.33497953414917, | |
| "learning_rate": 6.600181323662739e-06, | |
| "loss": 0.0941, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8979591836734694, | |
| "eval_f1": 0.8978827235479027, | |
| "eval_loss": 0.4454677999019623, | |
| "eval_precision": 0.8978902630470574, | |
| "eval_recall": 0.8979591836734694, | |
| "eval_runtime": 44.1091, | |
| "eval_samples_per_second": 3.333, | |
| "eval_steps_per_second": 0.113, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 5.027932960893855, | |
| "grad_norm": 13.331377029418945, | |
| "learning_rate": 6.418857660924751e-06, | |
| "loss": 0.0554, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 5.083798882681564, | |
| "grad_norm": 43.700767517089844, | |
| "learning_rate": 6.237533998186765e-06, | |
| "loss": 0.0662, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 5.139664804469274, | |
| "grad_norm": 3.050325393676758, | |
| "learning_rate": 6.056210335448776e-06, | |
| "loss": 0.0361, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 5.195530726256983, | |
| "grad_norm": 3.3658792972564697, | |
| "learning_rate": 5.874886672710789e-06, | |
| "loss": 0.0293, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 5.251396648044693, | |
| "grad_norm": 0.4701756238937378, | |
| "learning_rate": 5.693563009972802e-06, | |
| "loss": 0.0193, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 5.307262569832402, | |
| "grad_norm": 0.5358762741088867, | |
| "learning_rate": 5.5122393472348145e-06, | |
| "loss": 0.0507, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 5.363128491620111, | |
| "grad_norm": 2.983640193939209, | |
| "learning_rate": 5.330915684496827e-06, | |
| "loss": 0.0208, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 5.418994413407821, | |
| "grad_norm": 18.32625389099121, | |
| "learning_rate": 5.14959202175884e-06, | |
| "loss": 0.0702, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 5.4748603351955305, | |
| "grad_norm": 8.444658279418945, | |
| "learning_rate": 4.968268359020853e-06, | |
| "loss": 0.0374, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 5.5307262569832405, | |
| "grad_norm": 0.7589412927627563, | |
| "learning_rate": 4.786944696282865e-06, | |
| "loss": 0.0412, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 5.58659217877095, | |
| "grad_norm": 3.848323106765747, | |
| "learning_rate": 4.6056210335448785e-06, | |
| "loss": 0.0623, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.64245810055866, | |
| "grad_norm": 8.687700271606445, | |
| "learning_rate": 4.424297370806891e-06, | |
| "loss": 0.0595, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 5.698324022346369, | |
| "grad_norm": 0.09025302529335022, | |
| "learning_rate": 4.242973708068903e-06, | |
| "loss": 0.0087, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 5.754189944134078, | |
| "grad_norm": 0.11568494141101837, | |
| "learning_rate": 4.061650045330916e-06, | |
| "loss": 0.0241, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 5.810055865921788, | |
| "grad_norm": 1.0072994232177734, | |
| "learning_rate": 3.880326382592929e-06, | |
| "loss": 0.0196, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 5.865921787709497, | |
| "grad_norm": 2.54008150100708, | |
| "learning_rate": 3.6990027198549416e-06, | |
| "loss": 0.033, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 5.921787709497207, | |
| "grad_norm": 0.12731680274009705, | |
| "learning_rate": 3.517679057116954e-06, | |
| "loss": 0.0568, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 5.977653631284916, | |
| "grad_norm": 14.050686836242676, | |
| "learning_rate": 3.336355394378967e-06, | |
| "loss": 0.0479, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8979591836734694, | |
| "eval_f1": 0.8978827235479027, | |
| "eval_loss": 0.4888676702976227, | |
| "eval_precision": 0.8978902630470574, | |
| "eval_recall": 0.8979591836734694, | |
| "eval_runtime": 43.2366, | |
| "eval_samples_per_second": 3.4, | |
| "eval_steps_per_second": 0.116, | |
| "step": 1074 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1253, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 2, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 2 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8699999323206960.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |