| { | |
| "best_global_step": 4180, | |
| "best_metric": 3.6224466271050915e-07, | |
| "best_model_checkpoint": "./code_corruptor_model_v2\\checkpoint-4180", | |
| "epoch": 12.0, | |
| "eval_steps": 500, | |
| "global_step": 4560, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.13157894736842105, | |
| "grad_norm": 0.10753314197063446, | |
| "learning_rate": 4.948464912280702e-05, | |
| "loss": 0.4466, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2631578947368421, | |
| "grad_norm": 0.006062635686248541, | |
| "learning_rate": 4.8936403508771935e-05, | |
| "loss": 0.0037, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.39473684210526316, | |
| "grad_norm": 0.008465762250125408, | |
| "learning_rate": 4.838815789473685e-05, | |
| "loss": 0.001, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 0.01755240559577942, | |
| "learning_rate": 4.7839912280701754e-05, | |
| "loss": 0.0015, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6578947368421053, | |
| "grad_norm": 0.042610205709934235, | |
| "learning_rate": 4.7291666666666666e-05, | |
| "loss": 0.0009, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7894736842105263, | |
| "grad_norm": 0.05834781005978584, | |
| "learning_rate": 4.674342105263158e-05, | |
| "loss": 0.0006, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.9210526315789473, | |
| "grad_norm": 0.0006559228058904409, | |
| "learning_rate": 4.619517543859649e-05, | |
| "loss": 0.0005, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.6028698155423626e-05, | |
| "eval_runtime": 6.2183, | |
| "eval_samples_per_second": 24.444, | |
| "eval_steps_per_second": 12.222, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 0.055409740656614304, | |
| "learning_rate": 4.5646929824561405e-05, | |
| "loss": 0.0005, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.1842105263157894, | |
| "grad_norm": 0.07685278356075287, | |
| "learning_rate": 4.509868421052632e-05, | |
| "loss": 0.0007, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.3157894736842106, | |
| "grad_norm": 0.0034991835709661245, | |
| "learning_rate": 4.455043859649123e-05, | |
| "loss": 0.0004, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.4473684210526316, | |
| "grad_norm": 0.0010067017283290625, | |
| "learning_rate": 4.400219298245614e-05, | |
| "loss": 0.0002, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.5789473684210527, | |
| "grad_norm": 0.0004999448428861797, | |
| "learning_rate": 4.3453947368421056e-05, | |
| "loss": 0.0005, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.7105263157894737, | |
| "grad_norm": 0.0048265825025737286, | |
| "learning_rate": 4.290570175438597e-05, | |
| "loss": 0.0001, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.8421052631578947, | |
| "grad_norm": 0.0009590413537807763, | |
| "learning_rate": 4.235745614035088e-05, | |
| "loss": 0.0002, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.973684210526316, | |
| "grad_norm": 0.0026596838142722845, | |
| "learning_rate": 4.180921052631579e-05, | |
| "loss": 0.0002, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 9.181250788969919e-06, | |
| "eval_runtime": 6.0411, | |
| "eval_samples_per_second": 25.161, | |
| "eval_steps_per_second": 12.58, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 0.0004917697515338659, | |
| "learning_rate": 4.12609649122807e-05, | |
| "loss": 0.0003, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.236842105263158, | |
| "grad_norm": 0.0010589464800432324, | |
| "learning_rate": 4.071271929824562e-05, | |
| "loss": 0.0005, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.3684210526315788, | |
| "grad_norm": 0.001737129525281489, | |
| "learning_rate": 4.016447368421053e-05, | |
| "loss": 0.0004, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.00043353348155505955, | |
| "learning_rate": 3.9616228070175445e-05, | |
| "loss": 0.0002, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.6315789473684212, | |
| "grad_norm": 0.000876229431014508, | |
| "learning_rate": 3.906798245614035e-05, | |
| "loss": 0.0001, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.763157894736842, | |
| "grad_norm": 0.006859931629151106, | |
| "learning_rate": 3.8519736842105264e-05, | |
| "loss": 0.0, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.8947368421052633, | |
| "grad_norm": 0.00029508452280424535, | |
| "learning_rate": 3.7971491228070176e-05, | |
| "loss": 0.0005, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 4.4037507905159146e-05, | |
| "eval_runtime": 6.0562, | |
| "eval_samples_per_second": 25.098, | |
| "eval_steps_per_second": 12.549, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 3.026315789473684, | |
| "grad_norm": 0.02006879821419716, | |
| "learning_rate": 3.742324561403509e-05, | |
| "loss": 0.001, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.1578947368421053, | |
| "grad_norm": 0.0015200282214209437, | |
| "learning_rate": 3.6875e-05, | |
| "loss": 0.0001, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.2894736842105265, | |
| "grad_norm": 0.0007221988635137677, | |
| "learning_rate": 3.6326754385964915e-05, | |
| "loss": 0.0, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 3.4210526315789473, | |
| "grad_norm": 0.0002022625703830272, | |
| "learning_rate": 3.577850877192983e-05, | |
| "loss": 0.0002, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.5526315789473686, | |
| "grad_norm": 0.2107187658548355, | |
| "learning_rate": 3.523026315789474e-05, | |
| "loss": 0.0009, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 3.6842105263157894, | |
| "grad_norm": 0.0004320333246141672, | |
| "learning_rate": 3.468201754385965e-05, | |
| "loss": 0.0009, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.8157894736842106, | |
| "grad_norm": 0.0014643239555880427, | |
| "learning_rate": 3.4133771929824566e-05, | |
| "loss": 0.0005, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.9473684210526314, | |
| "grad_norm": 0.0002134596143150702, | |
| "learning_rate": 3.358552631578947e-05, | |
| "loss": 0.0, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.688454176473897e-05, | |
| "eval_runtime": 6.1697, | |
| "eval_samples_per_second": 24.637, | |
| "eval_steps_per_second": 12.318, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 4.078947368421052, | |
| "grad_norm": 0.007211349904537201, | |
| "learning_rate": 3.3037280701754384e-05, | |
| "loss": 0.0002, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 4.2105263157894735, | |
| "grad_norm": 0.00045391780440695584, | |
| "learning_rate": 3.24890350877193e-05, | |
| "loss": 0.0003, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.342105263157895, | |
| "grad_norm": 0.00023115136718843132, | |
| "learning_rate": 3.194078947368421e-05, | |
| "loss": 0.0001, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 4.473684210526316, | |
| "grad_norm": 0.14033561944961548, | |
| "learning_rate": 3.139254385964913e-05, | |
| "loss": 0.0001, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 4.605263157894737, | |
| "grad_norm": 0.0017103628488257527, | |
| "learning_rate": 3.0844298245614035e-05, | |
| "loss": 0.0001, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 4.7368421052631575, | |
| "grad_norm": 0.0032462095841765404, | |
| "learning_rate": 3.0296052631578948e-05, | |
| "loss": 0.0, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 4.868421052631579, | |
| "grad_norm": 0.002826864365488291, | |
| "learning_rate": 2.974780701754386e-05, | |
| "loss": 0.0001, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.0006184170488268137, | |
| "learning_rate": 2.9199561403508774e-05, | |
| "loss": 0.0005, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 3.343406660860637e-06, | |
| "eval_runtime": 6.1148, | |
| "eval_samples_per_second": 24.858, | |
| "eval_steps_per_second": 12.429, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 5.131578947368421, | |
| "grad_norm": 0.0009092154796235263, | |
| "learning_rate": 2.8651315789473686e-05, | |
| "loss": 0.0, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 5.2631578947368425, | |
| "grad_norm": 0.0006162663921713829, | |
| "learning_rate": 2.81030701754386e-05, | |
| "loss": 0.0, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.394736842105263, | |
| "grad_norm": 0.0013979279901832342, | |
| "learning_rate": 2.755482456140351e-05, | |
| "loss": 0.0001, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 5.526315789473684, | |
| "grad_norm": 0.00045684297219850123, | |
| "learning_rate": 2.700657894736842e-05, | |
| "loss": 0.0, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 5.657894736842105, | |
| "grad_norm": 0.017059462144970894, | |
| "learning_rate": 2.6458333333333334e-05, | |
| "loss": 0.0, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 5.7894736842105265, | |
| "grad_norm": 9.451019286643714e-05, | |
| "learning_rate": 2.591008771929825e-05, | |
| "loss": 0.0001, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 5.921052631578947, | |
| "grad_norm": 0.0002531503851059824, | |
| "learning_rate": 2.5361842105263163e-05, | |
| "loss": 0.0002, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 8.532630317859002e-07, | |
| "eval_runtime": 6.1846, | |
| "eval_samples_per_second": 24.577, | |
| "eval_steps_per_second": 12.289, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 6.052631578947368, | |
| "grad_norm": 0.00012987718218937516, | |
| "learning_rate": 2.4813596491228072e-05, | |
| "loss": 0.0, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 6.184210526315789, | |
| "grad_norm": 8.179421274689957e-05, | |
| "learning_rate": 2.426535087719298e-05, | |
| "loss": 0.0, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 6.315789473684211, | |
| "grad_norm": 0.0004638760001398623, | |
| "learning_rate": 2.3717105263157898e-05, | |
| "loss": 0.0, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 6.447368421052632, | |
| "grad_norm": 0.0002866844297386706, | |
| "learning_rate": 2.316885964912281e-05, | |
| "loss": 0.0005, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 6.578947368421053, | |
| "grad_norm": 7.315115362871438e-05, | |
| "learning_rate": 2.262061403508772e-05, | |
| "loss": 0.0, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 6.7105263157894735, | |
| "grad_norm": 0.00015406313468702137, | |
| "learning_rate": 2.2072368421052632e-05, | |
| "loss": 0.0, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 6.842105263157895, | |
| "grad_norm": 8.98495563887991e-05, | |
| "learning_rate": 2.1524122807017545e-05, | |
| "loss": 0.0, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 6.973684210526316, | |
| "grad_norm": 0.00020581792341545224, | |
| "learning_rate": 2.0975877192982458e-05, | |
| "loss": 0.0, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 8.157680895237718e-07, | |
| "eval_runtime": 6.1011, | |
| "eval_samples_per_second": 24.914, | |
| "eval_steps_per_second": 12.457, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 7.105263157894737, | |
| "grad_norm": 0.00012650905409827828, | |
| "learning_rate": 2.042763157894737e-05, | |
| "loss": 0.0, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 7.2368421052631575, | |
| "grad_norm": 0.00016898708418011665, | |
| "learning_rate": 1.987938596491228e-05, | |
| "loss": 0.0, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 7.368421052631579, | |
| "grad_norm": 0.0003560652839951217, | |
| "learning_rate": 1.9331140350877193e-05, | |
| "loss": 0.0, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "grad_norm": 8.456506475340575e-05, | |
| "learning_rate": 1.8782894736842105e-05, | |
| "loss": 0.0, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 7.631578947368421, | |
| "grad_norm": 0.00023642393352929503, | |
| "learning_rate": 1.8234649122807018e-05, | |
| "loss": 0.0, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 7.7631578947368425, | |
| "grad_norm": 8.907222945708781e-05, | |
| "learning_rate": 1.768640350877193e-05, | |
| "loss": 0.0, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 7.894736842105263, | |
| "grad_norm": 6.318661326076835e-05, | |
| "learning_rate": 1.7138157894736844e-05, | |
| "loss": 0.0, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 5.999586392135825e-07, | |
| "eval_runtime": 6.1117, | |
| "eval_samples_per_second": 24.87, | |
| "eval_steps_per_second": 12.435, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 8.026315789473685, | |
| "grad_norm": 0.04527832567691803, | |
| "learning_rate": 1.6589912280701756e-05, | |
| "loss": 0.0002, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 8.157894736842104, | |
| "grad_norm": 0.0002466822334099561, | |
| "learning_rate": 1.604166666666667e-05, | |
| "loss": 0.0, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 8.289473684210526, | |
| "grad_norm": 0.00023732382396701723, | |
| "learning_rate": 1.549342105263158e-05, | |
| "loss": 0.0, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 8.421052631578947, | |
| "grad_norm": 8.941477426560596e-05, | |
| "learning_rate": 1.4945175438596493e-05, | |
| "loss": 0.0001, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 8.552631578947368, | |
| "grad_norm": 0.00011901962716365233, | |
| "learning_rate": 1.4396929824561402e-05, | |
| "loss": 0.0, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 8.68421052631579, | |
| "grad_norm": 9.798636165214702e-05, | |
| "learning_rate": 1.3848684210526317e-05, | |
| "loss": 0.0, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 8.81578947368421, | |
| "grad_norm": 3.7345100281527266e-05, | |
| "learning_rate": 1.330043859649123e-05, | |
| "loss": 0.0, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 8.947368421052632, | |
| "grad_norm": 2.8350032152957283e-05, | |
| "learning_rate": 1.275219298245614e-05, | |
| "loss": 0.0, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 4.4665341647487367e-07, | |
| "eval_runtime": 6.1054, | |
| "eval_samples_per_second": 24.896, | |
| "eval_steps_per_second": 12.448, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 9.078947368421053, | |
| "grad_norm": 8.318301843246445e-05, | |
| "learning_rate": 1.2203947368421053e-05, | |
| "loss": 0.0, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 9.210526315789474, | |
| "grad_norm": 4.566019197227433e-05, | |
| "learning_rate": 1.1655701754385966e-05, | |
| "loss": 0.0, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 9.342105263157896, | |
| "grad_norm": 0.03128642588853836, | |
| "learning_rate": 1.1107456140350877e-05, | |
| "loss": 0.0, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 9.473684210526315, | |
| "grad_norm": 4.535232073976658e-05, | |
| "learning_rate": 1.055921052631579e-05, | |
| "loss": 0.0, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 9.605263157894736, | |
| "grad_norm": 7.957038906170055e-05, | |
| "learning_rate": 1.0010964912280703e-05, | |
| "loss": 0.0, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 9.736842105263158, | |
| "grad_norm": 5.397196946432814e-05, | |
| "learning_rate": 9.462719298245615e-06, | |
| "loss": 0.0, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 9.868421052631579, | |
| "grad_norm": 0.00015735568013042212, | |
| "learning_rate": 8.914473684210526e-06, | |
| "loss": 0.0, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 3.112037666141987e-05, | |
| "learning_rate": 8.36622807017544e-06, | |
| "loss": 0.0, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 3.8959919379522034e-07, | |
| "eval_runtime": 6.0027, | |
| "eval_samples_per_second": 25.322, | |
| "eval_steps_per_second": 12.661, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 10.131578947368421, | |
| "grad_norm": 2.3204265744425356e-05, | |
| "learning_rate": 7.81798245614035e-06, | |
| "loss": 0.0, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 10.263157894736842, | |
| "grad_norm": 9.128025703830644e-05, | |
| "learning_rate": 7.269736842105264e-06, | |
| "loss": 0.0, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 10.394736842105264, | |
| "grad_norm": 1.550123852211982e-05, | |
| "learning_rate": 6.721491228070176e-06, | |
| "loss": 0.0, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 10.526315789473685, | |
| "grad_norm": 0.00013698793190997094, | |
| "learning_rate": 6.173245614035088e-06, | |
| "loss": 0.0, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 10.657894736842106, | |
| "grad_norm": 3.800446938839741e-05, | |
| "learning_rate": 5.625e-06, | |
| "loss": 0.0, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 10.789473684210526, | |
| "grad_norm": 1.4189299690769985e-05, | |
| "learning_rate": 5.076754385964912e-06, | |
| "loss": 0.0, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 10.921052631578947, | |
| "grad_norm": 4.4892298319609836e-05, | |
| "learning_rate": 4.528508771929825e-06, | |
| "loss": 0.0, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 3.6224466271050915e-07, | |
| "eval_runtime": 6.0832, | |
| "eval_samples_per_second": 24.987, | |
| "eval_steps_per_second": 12.493, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 11.052631578947368, | |
| "grad_norm": 0.00010307016054866835, | |
| "learning_rate": 3.980263157894737e-06, | |
| "loss": 0.0, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 11.18421052631579, | |
| "grad_norm": 0.0009463855531066656, | |
| "learning_rate": 3.4320175438596496e-06, | |
| "loss": 0.0, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 11.31578947368421, | |
| "grad_norm": 3.068053410970606e-05, | |
| "learning_rate": 2.8837719298245615e-06, | |
| "loss": 0.0, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 11.447368421052632, | |
| "grad_norm": 0.0005556969554163516, | |
| "learning_rate": 2.335526315789474e-06, | |
| "loss": 0.0, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 11.578947368421053, | |
| "grad_norm": 2.4526205379515886e-05, | |
| "learning_rate": 1.787280701754386e-06, | |
| "loss": 0.0, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 11.710526315789474, | |
| "grad_norm": 6.120463513070717e-05, | |
| "learning_rate": 1.2390350877192983e-06, | |
| "loss": 0.0, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 11.842105263157894, | |
| "grad_norm": 0.00036876738886348903, | |
| "learning_rate": 6.907894736842105e-07, | |
| "loss": 0.0, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 11.973684210526315, | |
| "grad_norm": 4.0039503801381215e-05, | |
| "learning_rate": 1.4254385964912283e-07, | |
| "loss": 0.0, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 3.6446573403736693e-07, | |
| "eval_runtime": 6.0772, | |
| "eval_samples_per_second": 25.011, | |
| "eval_steps_per_second": 12.506, | |
| "step": 4560 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 4560, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 12, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5546388467220480.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |