| { | |
| "best_global_step": 1000, | |
| "best_metric": 0.8438986049887841, | |
| "best_model_checkpoint": "./results/checkpoint-1000", | |
| "epoch": 0.029189526267256648, | |
| "eval_steps": 100, | |
| "global_step": 1100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00026535932970233315, | |
| "grad_norm": 4.446621894836426, | |
| "learning_rate": 3.6e-07, | |
| "loss": 1.855, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0005307186594046663, | |
| "grad_norm": 6.435311317443848, | |
| "learning_rate": 7.6e-07, | |
| "loss": 1.8131, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0007960779891069995, | |
| "grad_norm": 3.5201945304870605, | |
| "learning_rate": 1.1600000000000001e-06, | |
| "loss": 1.7821, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0010614373188093326, | |
| "grad_norm": 5.47129487991333, | |
| "learning_rate": 1.56e-06, | |
| "loss": 1.8187, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.001326796648511666, | |
| "grad_norm": 7.340343475341797, | |
| "learning_rate": 1.9600000000000003e-06, | |
| "loss": 1.7791, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.001592155978213999, | |
| "grad_norm": 5.010741233825684, | |
| "learning_rate": 2.3600000000000003e-06, | |
| "loss": 1.7705, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0018575153079163323, | |
| "grad_norm": 4.706456184387207, | |
| "learning_rate": 2.7600000000000003e-06, | |
| "loss": 1.7616, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.002122874637618665, | |
| "grad_norm": 3.865488290786743, | |
| "learning_rate": 3.1600000000000002e-06, | |
| "loss": 1.7265, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0023882339673209985, | |
| "grad_norm": 6.390440464019775, | |
| "learning_rate": 3.5600000000000002e-06, | |
| "loss": 1.7143, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.002653593297023332, | |
| "grad_norm": 5.747302532196045, | |
| "learning_rate": 3.96e-06, | |
| "loss": 1.6675, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.002653593297023332, | |
| "eval_accuracy": 0.3978160782284494, | |
| "eval_f1": 0.22667861021073907, | |
| "eval_loss": 1.7215721607208252, | |
| "eval_precision": 0.3277163462999534, | |
| "eval_recall": 0.3978160782284494, | |
| "eval_runtime": 1141.6234, | |
| "eval_samples_per_second": 66.019, | |
| "eval_steps_per_second": 8.253, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0029189526267256647, | |
| "grad_norm": 5.64985466003418, | |
| "learning_rate": 4.360000000000001e-06, | |
| "loss": 1.6701, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.003184311956427998, | |
| "grad_norm": 7.48480224609375, | |
| "learning_rate": 4.76e-06, | |
| "loss": 1.6665, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0034496712861303313, | |
| "grad_norm": 6.59235954284668, | |
| "learning_rate": 5.1600000000000006e-06, | |
| "loss": 1.6049, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0037150306158326646, | |
| "grad_norm": 5.440073013305664, | |
| "learning_rate": 5.560000000000001e-06, | |
| "loss": 1.6174, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0039803899455349975, | |
| "grad_norm": 5.735574245452881, | |
| "learning_rate": 5.9600000000000005e-06, | |
| "loss": 1.5654, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.00424574927523733, | |
| "grad_norm": 5.891671180725098, | |
| "learning_rate": 6.360000000000001e-06, | |
| "loss": 1.5445, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.004511108604939664, | |
| "grad_norm": 7.2471089363098145, | |
| "learning_rate": 6.760000000000001e-06, | |
| "loss": 1.5545, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.004776467934641997, | |
| "grad_norm": 8.074533462524414, | |
| "learning_rate": 7.16e-06, | |
| "loss": 1.5121, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.00504182726434433, | |
| "grad_norm": 6.5745768547058105, | |
| "learning_rate": 7.5600000000000005e-06, | |
| "loss": 1.4951, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.005307186594046664, | |
| "grad_norm": 7.1547112464904785, | |
| "learning_rate": 7.960000000000002e-06, | |
| "loss": 1.5225, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.005307186594046664, | |
| "eval_accuracy": 0.5399965503058286, | |
| "eval_f1": 0.4954655997286682, | |
| "eval_loss": 1.4961189031600952, | |
| "eval_precision": 0.5230960318752841, | |
| "eval_recall": 0.5399965503058286, | |
| "eval_runtime": 1146.418, | |
| "eval_samples_per_second": 65.743, | |
| "eval_steps_per_second": 8.219, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0055725459237489965, | |
| "grad_norm": 10.55716609954834, | |
| "learning_rate": 8.36e-06, | |
| "loss": 1.486, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.005837905253451329, | |
| "grad_norm": 10.896256446838379, | |
| "learning_rate": 8.76e-06, | |
| "loss": 1.3551, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.006103264583153663, | |
| "grad_norm": 10.593475341796875, | |
| "learning_rate": 9.16e-06, | |
| "loss": 1.3457, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.006368623912855996, | |
| "grad_norm": 12.37448787689209, | |
| "learning_rate": 9.56e-06, | |
| "loss": 1.4264, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.006633983242558329, | |
| "grad_norm": 15.805830001831055, | |
| "learning_rate": 9.960000000000001e-06, | |
| "loss": 1.1405, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.006899342572260663, | |
| "grad_norm": 7.878324031829834, | |
| "learning_rate": 1.036e-05, | |
| "loss": 1.3623, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0071647019019629955, | |
| "grad_norm": 9.763978004455566, | |
| "learning_rate": 1.0760000000000002e-05, | |
| "loss": 1.1918, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.007430061231665329, | |
| "grad_norm": 8.449042320251465, | |
| "learning_rate": 1.1160000000000002e-05, | |
| "loss": 1.1437, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.007695420561367662, | |
| "grad_norm": 10.577520370483398, | |
| "learning_rate": 1.156e-05, | |
| "loss": 1.0793, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.007960779891069995, | |
| "grad_norm": 11.707535743713379, | |
| "learning_rate": 1.196e-05, | |
| "loss": 1.1895, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.007960779891069995, | |
| "eval_accuracy": 0.6815534238214651, | |
| "eval_f1": 0.6624035468309406, | |
| "eval_loss": 1.228468894958496, | |
| "eval_precision": 0.6548079319665443, | |
| "eval_recall": 0.6815534238214651, | |
| "eval_runtime": 1147.8213, | |
| "eval_samples_per_second": 65.663, | |
| "eval_steps_per_second": 8.209, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.008226139220772328, | |
| "grad_norm": 7.825327396392822, | |
| "learning_rate": 1.236e-05, | |
| "loss": 1.0, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.00849149855047466, | |
| "grad_norm": 9.7120943069458, | |
| "learning_rate": 1.2760000000000001e-05, | |
| "loss": 1.2152, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.008756857880176995, | |
| "grad_norm": 10.450387954711914, | |
| "learning_rate": 1.3160000000000001e-05, | |
| "loss": 1.2838, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.009022217209879328, | |
| "grad_norm": 6.901065826416016, | |
| "learning_rate": 1.3560000000000002e-05, | |
| "loss": 1.1989, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.009287576539581661, | |
| "grad_norm": 7.151400089263916, | |
| "learning_rate": 1.396e-05, | |
| "loss": 1.0571, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.009552935869283994, | |
| "grad_norm": 7.359131813049316, | |
| "learning_rate": 1.4360000000000001e-05, | |
| "loss": 1.2923, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.009818295198986327, | |
| "grad_norm": 6.762883186340332, | |
| "learning_rate": 1.4760000000000001e-05, | |
| "loss": 1.2465, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.01008365452868866, | |
| "grad_norm": 7.525689125061035, | |
| "learning_rate": 1.516e-05, | |
| "loss": 0.889, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.010349013858390994, | |
| "grad_norm": 12.986421585083008, | |
| "learning_rate": 1.556e-05, | |
| "loss": 1.2117, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.010614373188093327, | |
| "grad_norm": 10.896193504333496, | |
| "learning_rate": 1.5960000000000003e-05, | |
| "loss": 1.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.010614373188093327, | |
| "eval_accuracy": 0.7040295081532195, | |
| "eval_f1": 0.7118047953474329, | |
| "eval_loss": 1.004681944847107, | |
| "eval_precision": 0.7430775529469658, | |
| "eval_recall": 0.7040295081532195, | |
| "eval_runtime": 1149.1166, | |
| "eval_samples_per_second": 65.589, | |
| "eval_steps_per_second": 8.199, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.01087973251779566, | |
| "grad_norm": 10.948391914367676, | |
| "learning_rate": 1.636e-05, | |
| "loss": 1.1142, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.011145091847497993, | |
| "grad_norm": 9.782318115234375, | |
| "learning_rate": 1.6760000000000002e-05, | |
| "loss": 1.0815, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.011410451177200326, | |
| "grad_norm": 15.20601749420166, | |
| "learning_rate": 1.7160000000000002e-05, | |
| "loss": 1.1052, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.011675810506902659, | |
| "grad_norm": 13.752850532531738, | |
| "learning_rate": 1.756e-05, | |
| "loss": 0.9449, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.011941169836604993, | |
| "grad_norm": 14.253933906555176, | |
| "learning_rate": 1.796e-05, | |
| "loss": 0.8107, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.012206529166307326, | |
| "grad_norm": 10.736539840698242, | |
| "learning_rate": 1.8360000000000004e-05, | |
| "loss": 0.8507, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.012471888496009659, | |
| "grad_norm": 19.90713119506836, | |
| "learning_rate": 1.876e-05, | |
| "loss": 0.8218, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.012737247825711992, | |
| "grad_norm": 5.8942084312438965, | |
| "learning_rate": 1.916e-05, | |
| "loss": 0.8322, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.013002607155414325, | |
| "grad_norm": 5.852909088134766, | |
| "learning_rate": 1.9560000000000002e-05, | |
| "loss": 0.72, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.013267966485116658, | |
| "grad_norm": 8.721864700317383, | |
| "learning_rate": 1.9960000000000002e-05, | |
| "loss": 0.8702, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.013267966485116658, | |
| "eval_accuracy": 0.7919303692499569, | |
| "eval_f1": 0.7952185444780413, | |
| "eval_loss": 0.7821776270866394, | |
| "eval_precision": 0.8109789187273935, | |
| "eval_recall": 0.7919303692499569, | |
| "eval_runtime": 1149.5055, | |
| "eval_samples_per_second": 65.566, | |
| "eval_steps_per_second": 8.197, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.013533325814818992, | |
| "grad_norm": 7.045879364013672, | |
| "learning_rate": 1.9999042145593872e-05, | |
| "loss": 0.8162, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.013798685144521325, | |
| "grad_norm": 12.590984344482422, | |
| "learning_rate": 1.9997977862920393e-05, | |
| "loss": 0.8281, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.014064044474223658, | |
| "grad_norm": 4.697333812713623, | |
| "learning_rate": 1.9996913580246914e-05, | |
| "loss": 0.8597, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.014329403803925991, | |
| "grad_norm": 7.0437188148498535, | |
| "learning_rate": 1.999584929757344e-05, | |
| "loss": 0.6957, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.014594763133628324, | |
| "grad_norm": 12.778396606445312, | |
| "learning_rate": 1.999478501489996e-05, | |
| "loss": 0.8493, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.014860122463330658, | |
| "grad_norm": 30.98528480529785, | |
| "learning_rate": 1.999372073222648e-05, | |
| "loss": 0.8598, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.015125481793032991, | |
| "grad_norm": 6.842329502105713, | |
| "learning_rate": 1.9992656449553e-05, | |
| "loss": 0.7805, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.015390841122735324, | |
| "grad_norm": 7.865843772888184, | |
| "learning_rate": 1.9991592166879526e-05, | |
| "loss": 0.6858, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.01565620045243766, | |
| "grad_norm": 7.990331172943115, | |
| "learning_rate": 1.9990527884206047e-05, | |
| "loss": 0.8487, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.01592155978213999, | |
| "grad_norm": 7.9941935539245605, | |
| "learning_rate": 1.9989463601532568e-05, | |
| "loss": 0.7483, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.01592155978213999, | |
| "eval_accuracy": 0.8143533813636906, | |
| "eval_f1": 0.8091828053145604, | |
| "eval_loss": 0.7799807190895081, | |
| "eval_precision": 0.8135668963695762, | |
| "eval_recall": 0.8143533813636906, | |
| "eval_runtime": 1149.6535, | |
| "eval_samples_per_second": 65.558, | |
| "eval_steps_per_second": 8.196, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.016186919111842325, | |
| "grad_norm": 7.86228609085083, | |
| "learning_rate": 1.9988399318859092e-05, | |
| "loss": 0.8267, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.016452278441544656, | |
| "grad_norm": 17.805816650390625, | |
| "learning_rate": 1.9987335036185613e-05, | |
| "loss": 0.7449, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.01671763777124699, | |
| "grad_norm": 20.419509887695312, | |
| "learning_rate": 1.9986270753512134e-05, | |
| "loss": 0.806, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.01698299710094932, | |
| "grad_norm": 8.158143043518066, | |
| "learning_rate": 1.9985206470838655e-05, | |
| "loss": 0.5455, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.017248356430651656, | |
| "grad_norm": 19.537343978881836, | |
| "learning_rate": 1.998414218816518e-05, | |
| "loss": 0.6334, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.01751371576035399, | |
| "grad_norm": 13.77968692779541, | |
| "learning_rate": 1.99830779054917e-05, | |
| "loss": 0.6848, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.017779075090056322, | |
| "grad_norm": 13.80045223236084, | |
| "learning_rate": 1.9982013622818222e-05, | |
| "loss": 0.8403, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.018044434419758656, | |
| "grad_norm": 7.564460277557373, | |
| "learning_rate": 1.9980949340144743e-05, | |
| "loss": 0.8387, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.018309793749460988, | |
| "grad_norm": 9.932668685913086, | |
| "learning_rate": 1.9979885057471267e-05, | |
| "loss": 0.6252, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.018575153079163322, | |
| "grad_norm": 3.894618034362793, | |
| "learning_rate": 1.997882077479779e-05, | |
| "loss": 0.6915, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.018575153079163322, | |
| "eval_accuracy": 0.8227255237564516, | |
| "eval_f1": 0.8164082076886476, | |
| "eval_loss": 0.7442639470100403, | |
| "eval_precision": 0.8239897054500639, | |
| "eval_recall": 0.8227255237564516, | |
| "eval_runtime": 1149.8124, | |
| "eval_samples_per_second": 65.549, | |
| "eval_steps_per_second": 8.194, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.018840512408865657, | |
| "grad_norm": 8.771450996398926, | |
| "learning_rate": 1.997775649212431e-05, | |
| "loss": 0.5164, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.019105871738567988, | |
| "grad_norm": 13.675606727600098, | |
| "learning_rate": 1.997669220945083e-05, | |
| "loss": 0.7917, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.019371231068270323, | |
| "grad_norm": 7.2742462158203125, | |
| "learning_rate": 1.9975627926777355e-05, | |
| "loss": 0.5966, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.019636590397972654, | |
| "grad_norm": 8.653703689575195, | |
| "learning_rate": 1.9974563644103876e-05, | |
| "loss": 0.7009, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.01990194972767499, | |
| "grad_norm": 4.837522983551025, | |
| "learning_rate": 1.9973499361430397e-05, | |
| "loss": 0.6471, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.02016730905737732, | |
| "grad_norm": 51.784481048583984, | |
| "learning_rate": 1.997243507875692e-05, | |
| "loss": 0.5877, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.020432668387079654, | |
| "grad_norm": 10.597892761230469, | |
| "learning_rate": 1.9971370796083442e-05, | |
| "loss": 0.5925, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.02069802771678199, | |
| "grad_norm": 13.237262725830078, | |
| "learning_rate": 1.9970306513409963e-05, | |
| "loss": 0.6298, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.02096338704648432, | |
| "grad_norm": 9.751429557800293, | |
| "learning_rate": 1.9969242230736484e-05, | |
| "loss": 0.5414, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.021228746376186654, | |
| "grad_norm": 15.811433792114258, | |
| "learning_rate": 1.996817794806301e-05, | |
| "loss": 0.7402, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.021228746376186654, | |
| "eval_accuracy": 0.8336318645596996, | |
| "eval_f1": 0.8326606774493011, | |
| "eval_loss": 0.6281165480613708, | |
| "eval_precision": 0.8348071025847044, | |
| "eval_recall": 0.8336318645596996, | |
| "eval_runtime": 1150.0313, | |
| "eval_samples_per_second": 65.536, | |
| "eval_steps_per_second": 8.193, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.021494105705888986, | |
| "grad_norm": 6.421304225921631, | |
| "learning_rate": 1.9967113665389526e-05, | |
| "loss": 0.5246, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.02175946503559132, | |
| "grad_norm": 10.94848918914795, | |
| "learning_rate": 1.996604938271605e-05, | |
| "loss": 0.4998, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.022024824365293655, | |
| "grad_norm": 18.42159080505371, | |
| "learning_rate": 1.9964985100042572e-05, | |
| "loss": 0.3855, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.022290183694995986, | |
| "grad_norm": 6.594385623931885, | |
| "learning_rate": 1.9963920817369096e-05, | |
| "loss": 0.6205, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.02255554302469832, | |
| "grad_norm": 15.90577220916748, | |
| "learning_rate": 1.9962856534695617e-05, | |
| "loss": 0.5837, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.022820902354400652, | |
| "grad_norm": 13.169767379760742, | |
| "learning_rate": 1.996179225202214e-05, | |
| "loss": 0.6141, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.023086261684102986, | |
| "grad_norm": 18.30284881591797, | |
| "learning_rate": 1.9960727969348663e-05, | |
| "loss": 0.5903, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.023351621013805317, | |
| "grad_norm": 18.575279235839844, | |
| "learning_rate": 1.9959663686675184e-05, | |
| "loss": 0.6436, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.023616980343507652, | |
| "grad_norm": 7.2494587898254395, | |
| "learning_rate": 1.9958599404001705e-05, | |
| "loss": 0.5912, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.023882339673209987, | |
| "grad_norm": 27.128528594970703, | |
| "learning_rate": 1.9957535121328226e-05, | |
| "loss": 0.6165, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.023882339673209987, | |
| "eval_accuracy": 0.8347198450291233, | |
| "eval_f1": 0.8366928520828808, | |
| "eval_loss": 0.6065123081207275, | |
| "eval_precision": 0.8420416556337017, | |
| "eval_recall": 0.8347198450291233, | |
| "eval_runtime": 1150.012, | |
| "eval_samples_per_second": 65.538, | |
| "eval_steps_per_second": 8.193, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.024147699002912318, | |
| "grad_norm": 11.817070960998535, | |
| "learning_rate": 1.995647083865475e-05, | |
| "loss": 0.6235, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.024413058332614652, | |
| "grad_norm": 13.93792724609375, | |
| "learning_rate": 1.9955406555981268e-05, | |
| "loss": 0.5442, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.024678417662316984, | |
| "grad_norm": 12.457510948181152, | |
| "learning_rate": 1.9954342273307792e-05, | |
| "loss": 0.5218, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.024943776992019318, | |
| "grad_norm": 19.165714263916016, | |
| "learning_rate": 1.9953277990634313e-05, | |
| "loss": 0.6517, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.025209136321721653, | |
| "grad_norm": 16.57741355895996, | |
| "learning_rate": 1.9952213707960838e-05, | |
| "loss": 0.6778, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.025474495651423984, | |
| "grad_norm": 10.39408016204834, | |
| "learning_rate": 1.995114942528736e-05, | |
| "loss": 0.7138, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.02573985498112632, | |
| "grad_norm": 9.648213386535645, | |
| "learning_rate": 1.995008514261388e-05, | |
| "loss": 0.63, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.02600521431082865, | |
| "grad_norm": 18.695945739746094, | |
| "learning_rate": 1.99490208599404e-05, | |
| "loss": 0.6557, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.026270573640530984, | |
| "grad_norm": 16.400314331054688, | |
| "learning_rate": 1.9947956577266925e-05, | |
| "loss": 0.7228, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.026535932970233315, | |
| "grad_norm": 6.129698753356934, | |
| "learning_rate": 1.9946892294593446e-05, | |
| "loss": 0.4626, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.026535932970233315, | |
| "eval_accuracy": 0.846382464939166, | |
| "eval_f1": 0.8438986049887841, | |
| "eval_loss": 0.6415271759033203, | |
| "eval_precision": 0.8464411216655519, | |
| "eval_recall": 0.846382464939166, | |
| "eval_runtime": 1149.903, | |
| "eval_samples_per_second": 65.544, | |
| "eval_steps_per_second": 8.194, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.02680129229993565, | |
| "grad_norm": 9.027094841003418, | |
| "learning_rate": 1.9945828011919967e-05, | |
| "loss": 0.7249, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.027066651629637985, | |
| "grad_norm": 14.91533374786377, | |
| "learning_rate": 1.9944763729246492e-05, | |
| "loss": 0.6765, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.027332010959340316, | |
| "grad_norm": 3.9888482093811035, | |
| "learning_rate": 1.994369944657301e-05, | |
| "loss": 0.7675, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.02759737028904265, | |
| "grad_norm": 7.831120491027832, | |
| "learning_rate": 1.9942635163899534e-05, | |
| "loss": 0.5764, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.02786272961874498, | |
| "grad_norm": 7.661794185638428, | |
| "learning_rate": 1.9941570881226055e-05, | |
| "loss": 0.6496, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.028128088948447316, | |
| "grad_norm": 5.446905136108398, | |
| "learning_rate": 1.994050659855258e-05, | |
| "loss": 0.7055, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.02839344827814965, | |
| "grad_norm": 11.230253219604492, | |
| "learning_rate": 1.9939442315879097e-05, | |
| "loss": 0.6096, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.028658807607851982, | |
| "grad_norm": 4.4712982177734375, | |
| "learning_rate": 1.993837803320562e-05, | |
| "loss": 0.603, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.028924166937554317, | |
| "grad_norm": 6.14396858215332, | |
| "learning_rate": 1.9937313750532142e-05, | |
| "loss": 0.7755, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.029189526267256648, | |
| "grad_norm": 15.914993286132812, | |
| "learning_rate": 1.9936249467858667e-05, | |
| "loss": 0.4619, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.029189526267256648, | |
| "eval_accuracy": 0.846621289920259, | |
| "eval_f1": 0.842526016669277, | |
| "eval_loss": 0.6318368911743164, | |
| "eval_precision": 0.8510012460868045, | |
| "eval_recall": 0.846621289920259, | |
| "eval_runtime": 1149.9706, | |
| "eval_samples_per_second": 65.54, | |
| "eval_steps_per_second": 8.193, | |
| "step": 1100 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 188420, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 3 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4630920885043200.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |