| { | |
| "best_metric": 0.0003763487620744854, | |
| "best_model_checkpoint": "./vit-base-fruit-punch/checkpoint-1000", | |
| "epoch": 8.0, | |
| "eval_steps": 100, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 100187.125, | |
| "learning_rate": 4.9500000000000004e-05, | |
| "loss": 1.127, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 96563.390625, | |
| "learning_rate": 4.9e-05, | |
| "loss": 0.7007, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 64723.85546875, | |
| "learning_rate": 4.85e-05, | |
| "loss": 0.4167, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 40675.4140625, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.2602, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 28872.85546875, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.16, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 26935.421875, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.1115, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 19970.119140625, | |
| "learning_rate": 4.6500000000000005e-05, | |
| "loss": 0.0944, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 19774.392578125, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.0761, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 14233.1318359375, | |
| "learning_rate": 4.55e-05, | |
| "loss": 0.0588, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 12306.6767578125, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.0488, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.04701722040772438, | |
| "eval_runtime": 12.891, | |
| "eval_samples_per_second": 77.574, | |
| "eval_steps_per_second": 4.887, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 11777.6708984375, | |
| "learning_rate": 4.4500000000000004e-05, | |
| "loss": 0.0429, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 10813.1064453125, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.0458, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 8880.1279296875, | |
| "learning_rate": 4.35e-05, | |
| "loss": 0.0384, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 8182.60107421875, | |
| "learning_rate": 4.3e-05, | |
| "loss": 0.0309, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 7128.07275390625, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.0281, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 6803.498046875, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.0254, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 6198.88037109375, | |
| "learning_rate": 4.15e-05, | |
| "loss": 0.0232, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 5394.99072265625, | |
| "learning_rate": 4.1e-05, | |
| "loss": 0.021, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 5170.45458984375, | |
| "learning_rate": 4.05e-05, | |
| "loss": 0.0191, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 4661.20263671875, | |
| "learning_rate": 4e-05, | |
| "loss": 0.0174, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.017339378595352173, | |
| "eval_runtime": 13.5483, | |
| "eval_samples_per_second": 73.81, | |
| "eval_steps_per_second": 4.65, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 4417.46337890625, | |
| "learning_rate": 3.9500000000000005e-05, | |
| "loss": 0.016, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 3967.432861328125, | |
| "learning_rate": 3.9000000000000006e-05, | |
| "loss": 0.0146, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 3568.837646484375, | |
| "learning_rate": 3.85e-05, | |
| "loss": 0.0134, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 3472.84716796875, | |
| "learning_rate": 3.8e-05, | |
| "loss": 0.0123, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3190.490966796875, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.0113, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 2825.7802734375, | |
| "learning_rate": 3.7e-05, | |
| "loss": 0.0104, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 2605.496337890625, | |
| "learning_rate": 3.65e-05, | |
| "loss": 0.0095, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 2393.7314453125, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.0088, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 2354.353515625, | |
| "learning_rate": 3.55e-05, | |
| "loss": 0.0081, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 2027.639404296875, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.0074, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.007476483471691608, | |
| "eval_runtime": 13.6387, | |
| "eval_samples_per_second": 73.321, | |
| "eval_steps_per_second": 4.619, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 1890.090087890625, | |
| "learning_rate": 3.45e-05, | |
| "loss": 0.0068, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 1713.1453857421875, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 0.0063, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 1611.7166748046875, | |
| "learning_rate": 3.35e-05, | |
| "loss": 0.0059, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 1491.3282470703125, | |
| "learning_rate": 3.3e-05, | |
| "loss": 0.0054, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 1385.7913818359375, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.005, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 1317.3277587890625, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 0.0047, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 1197.3973388671875, | |
| "learning_rate": 3.15e-05, | |
| "loss": 0.0043, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 1110.10693359375, | |
| "learning_rate": 3.1e-05, | |
| "loss": 0.004, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 1046.7801513671875, | |
| "learning_rate": 3.05e-05, | |
| "loss": 0.0037, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 958.0781860351562, | |
| "learning_rate": 3e-05, | |
| "loss": 0.0035, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0034720886033028364, | |
| "eval_runtime": 13.6477, | |
| "eval_samples_per_second": 73.272, | |
| "eval_steps_per_second": 4.616, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.2800000000000002, | |
| "grad_norm": 986.9517822265625, | |
| "learning_rate": 2.95e-05, | |
| "loss": 0.0032, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 841.5371704101562, | |
| "learning_rate": 2.9e-05, | |
| "loss": 0.003, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 797.5939331054688, | |
| "learning_rate": 2.8499999999999998e-05, | |
| "loss": 0.0028, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 735.5321655273438, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 0.0026, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 701.638427734375, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.0024, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 647.36279296875, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 0.0023, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 599.30126953125, | |
| "learning_rate": 2.6500000000000004e-05, | |
| "loss": 0.0021, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 590.8321533203125, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 0.002, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 546.0530395507812, | |
| "learning_rate": 2.5500000000000003e-05, | |
| "loss": 0.0019, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 500.25738525390625, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0018, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0022775332909077406, | |
| "eval_runtime": 13.3322, | |
| "eval_samples_per_second": 75.006, | |
| "eval_steps_per_second": 4.725, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 494.0545349121094, | |
| "learning_rate": 2.45e-05, | |
| "loss": 0.0017, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 452.84375, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.0016, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 434.6565246582031, | |
| "learning_rate": 2.35e-05, | |
| "loss": 0.0015, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 404.3072204589844, | |
| "learning_rate": 2.3000000000000003e-05, | |
| "loss": 0.0014, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 389.670166015625, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.0013, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 387.8797607421875, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 0.0012, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 4.5600000000000005, | |
| "grad_norm": 357.6133728027344, | |
| "learning_rate": 2.15e-05, | |
| "loss": 0.0012, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 320.7620544433594, | |
| "learning_rate": 2.1e-05, | |
| "loss": 0.0011, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 309.20062255859375, | |
| "learning_rate": 2.05e-05, | |
| "loss": 0.001, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 292.0805358886719, | |
| "learning_rate": 2e-05, | |
| "loss": 0.001, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.001073041232302785, | |
| "eval_runtime": 13.7046, | |
| "eval_samples_per_second": 72.968, | |
| "eval_steps_per_second": 4.597, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 283.4959411621094, | |
| "learning_rate": 1.9500000000000003e-05, | |
| "loss": 0.0009, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "grad_norm": 261.8572998046875, | |
| "learning_rate": 1.9e-05, | |
| "loss": 0.0009, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 252.91981506347656, | |
| "learning_rate": 1.85e-05, | |
| "loss": 0.0009, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "grad_norm": 239.09896850585938, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.0008, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 232.7013397216797, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.0008, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "grad_norm": 220.06301879882812, | |
| "learning_rate": 1.7000000000000003e-05, | |
| "loss": 0.0007, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "grad_norm": 219.54986572265625, | |
| "learning_rate": 1.65e-05, | |
| "loss": 0.0007, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "grad_norm": 206.40716552734375, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.0007, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "grad_norm": 196.642578125, | |
| "learning_rate": 1.55e-05, | |
| "loss": 0.0007, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 187.69554138183594, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.0006, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0006605549133382738, | |
| "eval_runtime": 13.6953, | |
| "eval_samples_per_second": 73.018, | |
| "eval_steps_per_second": 4.6, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "grad_norm": 183.08045959472656, | |
| "learning_rate": 1.45e-05, | |
| "loss": 0.0006, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "grad_norm": 174.93222045898438, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 0.0006, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "grad_norm": 177.07530212402344, | |
| "learning_rate": 1.3500000000000001e-05, | |
| "loss": 0.0006, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "grad_norm": 166.14947509765625, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 0.0005, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 165.67318725585938, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.0005, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "grad_norm": 158.77545166015625, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.0005, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "grad_norm": 149.71511840820312, | |
| "learning_rate": 1.1500000000000002e-05, | |
| "loss": 0.0005, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "grad_norm": 144.20770263671875, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 0.0005, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "grad_norm": 148.6312255859375, | |
| "learning_rate": 1.05e-05, | |
| "loss": 0.0005, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 135.4142303466797, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0005, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0004884201916866004, | |
| "eval_runtime": 13.5521, | |
| "eval_samples_per_second": 73.79, | |
| "eval_steps_per_second": 4.649, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "grad_norm": 133.83642578125, | |
| "learning_rate": 9.5e-06, | |
| "loss": 0.0004, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 6.5600000000000005, | |
| "grad_norm": 138.82203674316406, | |
| "learning_rate": 9e-06, | |
| "loss": 0.0004, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "grad_norm": 127.43915557861328, | |
| "learning_rate": 8.500000000000002e-06, | |
| "loss": 0.0004, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "grad_norm": 126.12251281738281, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.0004, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "grad_norm": 121.66053771972656, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.0004, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "grad_norm": 121.25574493408203, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 0.0004, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "grad_norm": 126.3290023803711, | |
| "learning_rate": 6.5000000000000004e-06, | |
| "loss": 0.0004, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "grad_norm": 117.99575805664062, | |
| "learning_rate": 6e-06, | |
| "loss": 0.0004, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "grad_norm": 116.10645294189453, | |
| "learning_rate": 5.500000000000001e-06, | |
| "loss": 0.0004, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "grad_norm": 113.11275482177734, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0004, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0003921452153008431, | |
| "eval_runtime": 13.7013, | |
| "eval_samples_per_second": 72.986, | |
| "eval_steps_per_second": 4.598, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "grad_norm": 114.9211196899414, | |
| "learning_rate": 4.5e-06, | |
| "loss": 0.0004, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "grad_norm": 110.5498046875, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.0004, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "grad_norm": 110.70841979980469, | |
| "learning_rate": 3.5000000000000004e-06, | |
| "loss": 0.0004, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "grad_norm": 115.6305160522461, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0004, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "grad_norm": 106.5681381225586, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.0004, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "grad_norm": 109.81066131591797, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.0003, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "grad_norm": 107.74824523925781, | |
| "learning_rate": 1.5e-06, | |
| "loss": 0.0003, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "grad_norm": 108.34854888916016, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.0003, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "grad_norm": 107.01416778564453, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 0.0003, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 111.06143188476562, | |
| "learning_rate": 0.0, | |
| "loss": 0.0003, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0003763487620744854, | |
| "eval_runtime": 13.1171, | |
| "eval_samples_per_second": 76.236, | |
| "eval_steps_per_second": 4.803, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 1000, | |
| "total_flos": 2.479168170953736e+18, | |
| "train_loss": 0.03565365221118554, | |
| "train_runtime": 1044.6113, | |
| "train_samples_per_second": 30.626, | |
| "train_steps_per_second": 0.957 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.479168170953736e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |