| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 1450, | |
| "global_step": 2900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 8.9224, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 8.5406, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3e-06, | |
| "loss": 7.9663, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 8.388, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 5e-06, | |
| "loss": 8.6117, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 6e-06, | |
| "loss": 8.2034, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 7.7261, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 8.6426, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 9e-06, | |
| "loss": 8.3122, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1e-05, | |
| "loss": 7.9465, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 7.9357, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.2e-05, | |
| "loss": 8.333, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 8.0391, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 7.5063, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 1.5e-05, | |
| "loss": 7.9733, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 7.8903, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 1.7000000000000003e-05, | |
| "loss": 7.6222, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.8e-05, | |
| "loss": 7.1372, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.9e-05, | |
| "loss": 8.0093, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2e-05, | |
| "loss": 7.4925, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.1e-05, | |
| "loss": 7.0882, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 7.3101, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.3000000000000003e-05, | |
| "loss": 7.3497, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.4e-05, | |
| "loss": 6.957, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.5e-05, | |
| "loss": 6.6912, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 6.9221, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 6.7065, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 6.3174, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 2.9e-05, | |
| "loss": 6.1904, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 3e-05, | |
| "loss": 6.4006, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.1e-05, | |
| "loss": 6.0932, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 5.717, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 3.3e-05, | |
| "loss": 5.8356, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 5.6073, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 3.5e-05, | |
| "loss": 5.3597, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.6e-05, | |
| "loss": 5.0843, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.7e-05, | |
| "loss": 5.3473, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 3.8e-05, | |
| "loss": 5.0528, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.9000000000000006e-05, | |
| "loss": 4.8183, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4e-05, | |
| "loss": 4.7668, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 4.1e-05, | |
| "loss": 4.6995, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 4.2e-05, | |
| "loss": 4.5144, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.3e-05, | |
| "loss": 4.3133, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 4.3413, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 4.5e-05, | |
| "loss": 4.1984, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 4.0813, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 4.7e-05, | |
| "loss": 4.0248, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 4.8e-05, | |
| "loss": 4.0405, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 4.9e-05, | |
| "loss": 3.9165, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 5e-05, | |
| "loss": 3.8314, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 5.1000000000000006e-05, | |
| "loss": 3.8916, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 5.2000000000000004e-05, | |
| "loss": 3.8167, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 5.300000000000001e-05, | |
| "loss": 3.7126, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 5.4000000000000005e-05, | |
| "loss": 3.6607, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 5.500000000000001e-05, | |
| "loss": 3.7683, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 5.6000000000000006e-05, | |
| "loss": 3.6667, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 5.6999999999999996e-05, | |
| "loss": 3.6181, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 5.8e-05, | |
| "loss": 3.6345, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 5.9e-05, | |
| "loss": 3.6936, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 6e-05, | |
| "loss": 3.5988, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 6.1e-05, | |
| "loss": 3.5657, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 6.2e-05, | |
| "loss": 3.6574, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 6.3e-05, | |
| "loss": 3.602, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 6.400000000000001e-05, | |
| "loss": 3.5477, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 6.500000000000001e-05, | |
| "loss": 3.5357, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 6.6e-05, | |
| "loss": 3.6488, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 6.7e-05, | |
| "loss": 3.5509, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 6.800000000000001e-05, | |
| "loss": 3.5384, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 6.9e-05, | |
| "loss": 3.5553, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 7e-05, | |
| "loss": 3.5934, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 7.1e-05, | |
| "loss": 3.5065, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 7.2e-05, | |
| "loss": 3.473, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 7.3e-05, | |
| "loss": 3.5895, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 7.4e-05, | |
| "loss": 3.5227, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 3.5018, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 7.6e-05, | |
| "loss": 3.4822, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 7.7e-05, | |
| "loss": 3.5872, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 7.800000000000001e-05, | |
| "loss": 3.482, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 7.900000000000001e-05, | |
| "loss": 3.4622, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 8e-05, | |
| "loss": 3.4979, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 8.1e-05, | |
| "loss": 3.5079, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 8.2e-05, | |
| "loss": 3.4341, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 8.3e-05, | |
| "loss": 3.4151, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 8.4e-05, | |
| "loss": 3.5327, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 8.5e-05, | |
| "loss": 3.4457, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 8.6e-05, | |
| "loss": 3.4219, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 8.7e-05, | |
| "loss": 3.434, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 8.800000000000001e-05, | |
| "loss": 3.5263, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 8.900000000000001e-05, | |
| "loss": 3.4327, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 9e-05, | |
| "loss": 3.3951, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 9.1e-05, | |
| "loss": 3.4945, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 9.200000000000001e-05, | |
| "loss": 3.4294, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 9.300000000000001e-05, | |
| "loss": 3.3794, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 9.4e-05, | |
| "loss": 3.3755, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 9.5e-05, | |
| "loss": 3.4776, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 9.6e-05, | |
| "loss": 3.4096, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 9.7e-05, | |
| "loss": 3.366, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 9.8e-05, | |
| "loss": 3.3924, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 9.900000000000001e-05, | |
| "loss": 3.4268, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 0.0001, | |
| "loss": 3.3572, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 9.947368421052632e-05, | |
| "loss": 3.3574, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 9.894736842105263e-05, | |
| "loss": 3.4417, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 9.842105263157894e-05, | |
| "loss": 3.3858, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 9.789473684210527e-05, | |
| "loss": 3.3541, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "learning_rate": 9.736842105263158e-05, | |
| "loss": 3.334, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 9.68421052631579e-05, | |
| "loss": 3.4488, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 9.631578947368421e-05, | |
| "loss": 3.3454, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 9.578947368421052e-05, | |
| "loss": 3.3057, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 9.526315789473685e-05, | |
| "loss": 3.3948, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 9.473684210526316e-05, | |
| "loss": 3.3869, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 9.421052631578949e-05, | |
| "loss": 3.3038, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 9.36842105263158e-05, | |
| "loss": 3.3054, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 9.315789473684211e-05, | |
| "loss": 3.4006, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 9.263157894736843e-05, | |
| "loss": 3.3313, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 9.210526315789474e-05, | |
| "loss": 3.2982, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 9.157894736842105e-05, | |
| "loss": 3.3071, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 9.105263157894738e-05, | |
| "loss": 3.3822, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 9.052631578947369e-05, | |
| "loss": 3.3196, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 9e-05, | |
| "loss": 3.2765, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "learning_rate": 8.947368421052632e-05, | |
| "loss": 3.3805, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 8.894736842105263e-05, | |
| "loss": 3.3321, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 8.842105263157894e-05, | |
| "loss": 3.2837, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 8.789473684210526e-05, | |
| "loss": 3.2773, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "learning_rate": 8.736842105263158e-05, | |
| "loss": 3.3566, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "learning_rate": 8.68421052631579e-05, | |
| "loss": 3.2951, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 8.631578947368421e-05, | |
| "loss": 3.2632, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "learning_rate": 8.578947368421054e-05, | |
| "loss": 3.3313, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "learning_rate": 8.526315789473685e-05, | |
| "loss": 3.3523, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 8.473684210526316e-05, | |
| "loss": 3.2767, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "learning_rate": 8.421052631578948e-05, | |
| "loss": 3.2448, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 8.36842105263158e-05, | |
| "loss": 3.3419, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 8.315789473684212e-05, | |
| "loss": 3.2813, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "learning_rate": 8.263157894736843e-05, | |
| "loss": 3.2558, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "learning_rate": 8.210526315789474e-05, | |
| "loss": 3.2502, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "learning_rate": 8.157894736842105e-05, | |
| "loss": 3.3594, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 9.38, | |
| "learning_rate": 8.105263157894737e-05, | |
| "loss": 3.2739, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "learning_rate": 8.052631578947368e-05, | |
| "loss": 3.2404, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 8e-05, | |
| "loss": 3.3528, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "learning_rate": 7.947368421052632e-05, | |
| "loss": 3.3139, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "learning_rate": 7.894736842105263e-05, | |
| "loss": 3.2333, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "learning_rate": 7.842105263157895e-05, | |
| "loss": 3.225, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 9.79, | |
| "learning_rate": 7.789473684210526e-05, | |
| "loss": 3.3473, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 7.736842105263159e-05, | |
| "loss": 3.2565, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 9.93, | |
| "learning_rate": 7.68421052631579e-05, | |
| "loss": 3.2372, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 7.631578947368422e-05, | |
| "loss": 3.2725, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 3.4699134826660156, | |
| "eval_runtime": 8.1516, | |
| "eval_samples_per_second": 206.093, | |
| "eval_steps_per_second": 6.502, | |
| "eval_wer": 1.0005513059058646, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 10.07, | |
| "learning_rate": 7.578947368421054e-05, | |
| "loss": 3.3204, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 10.14, | |
| "learning_rate": 7.526315789473685e-05, | |
| "loss": 3.2301, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 10.21, | |
| "learning_rate": 7.473684210526316e-05, | |
| "loss": 3.2381, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "learning_rate": 7.421052631578948e-05, | |
| "loss": 3.3093, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 10.34, | |
| "learning_rate": 7.368421052631579e-05, | |
| "loss": 3.2747, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 10.41, | |
| "learning_rate": 7.315789473684212e-05, | |
| "loss": 3.2268, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 10.48, | |
| "learning_rate": 7.263157894736843e-05, | |
| "loss": 3.2105, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 10.55, | |
| "learning_rate": 7.210526315789474e-05, | |
| "loss": 3.3174, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 10.62, | |
| "learning_rate": 7.157894736842105e-05, | |
| "loss": 3.2254, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 10.69, | |
| "learning_rate": 7.105263157894737e-05, | |
| "loss": 3.2547, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 10.76, | |
| "learning_rate": 7.052631578947368e-05, | |
| "loss": 3.2704, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 10.83, | |
| "learning_rate": 7e-05, | |
| "loss": 3.3047, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 10.9, | |
| "learning_rate": 6.947368421052632e-05, | |
| "loss": 3.2407, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "learning_rate": 6.894736842105263e-05, | |
| "loss": 3.1856, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 11.03, | |
| "learning_rate": 6.842105263157895e-05, | |
| "loss": 3.3326, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 11.1, | |
| "learning_rate": 6.789473684210527e-05, | |
| "loss": 3.2289, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 11.17, | |
| "learning_rate": 6.736842105263159e-05, | |
| "loss": 3.229, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 11.24, | |
| "learning_rate": 6.68421052631579e-05, | |
| "loss": 3.2157, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 11.31, | |
| "learning_rate": 6.631578947368421e-05, | |
| "loss": 3.3323, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 11.38, | |
| "learning_rate": 6.578947368421054e-05, | |
| "loss": 3.2229, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 11.45, | |
| "learning_rate": 6.526315789473685e-05, | |
| "loss": 3.1919, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "learning_rate": 6.473684210526316e-05, | |
| "loss": 3.2794, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 11.59, | |
| "learning_rate": 6.421052631578948e-05, | |
| "loss": 3.2455, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 11.66, | |
| "learning_rate": 6.368421052631579e-05, | |
| "loss": 3.1852, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 11.72, | |
| "learning_rate": 6.31578947368421e-05, | |
| "loss": 3.1948, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 11.79, | |
| "learning_rate": 6.263157894736842e-05, | |
| "loss": 3.3379, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 11.86, | |
| "learning_rate": 6.210526315789474e-05, | |
| "loss": 3.2203, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 11.93, | |
| "learning_rate": 6.157894736842106e-05, | |
| "loss": 3.2083, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 6.105263157894737e-05, | |
| "loss": 3.2124, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 12.07, | |
| "learning_rate": 6.052631578947369e-05, | |
| "loss": 3.2972, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 12.14, | |
| "learning_rate": 6e-05, | |
| "loss": 3.1723, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 12.21, | |
| "learning_rate": 5.9473684210526315e-05, | |
| "loss": 3.1838, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 12.28, | |
| "learning_rate": 5.894736842105263e-05, | |
| "loss": 3.2744, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 12.34, | |
| "learning_rate": 5.8421052631578954e-05, | |
| "loss": 3.257, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 12.41, | |
| "learning_rate": 5.789473684210527e-05, | |
| "loss": 3.2009, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 12.48, | |
| "learning_rate": 5.736842105263158e-05, | |
| "loss": 3.1939, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 12.55, | |
| "learning_rate": 5.68421052631579e-05, | |
| "loss": 3.3005, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 12.62, | |
| "learning_rate": 5.631578947368421e-05, | |
| "loss": 3.1852, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 12.69, | |
| "learning_rate": 5.5789473684210526e-05, | |
| "loss": 3.1796, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 12.76, | |
| "learning_rate": 5.526315789473685e-05, | |
| "loss": 3.2465, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 12.83, | |
| "learning_rate": 5.4736842105263165e-05, | |
| "loss": 3.2793, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "learning_rate": 5.421052631578948e-05, | |
| "loss": 3.179, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 12.97, | |
| "learning_rate": 5.368421052631579e-05, | |
| "loss": 3.2026, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 13.03, | |
| "learning_rate": 5.3157894736842104e-05, | |
| "loss": 3.2608, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 13.1, | |
| "learning_rate": 5.2631578947368424e-05, | |
| "loss": 3.2105, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 13.17, | |
| "learning_rate": 5.210526315789474e-05, | |
| "loss": 3.1839, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 13.24, | |
| "learning_rate": 5.157894736842106e-05, | |
| "loss": 3.2157, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 13.31, | |
| "learning_rate": 5.1052631578947376e-05, | |
| "loss": 3.2742, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 13.38, | |
| "learning_rate": 5.052631578947369e-05, | |
| "loss": 3.1615, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 13.45, | |
| "learning_rate": 5e-05, | |
| "loss": 3.1736, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 13.52, | |
| "learning_rate": 4.9473684210526315e-05, | |
| "loss": 3.2578, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 13.59, | |
| "learning_rate": 4.8947368421052635e-05, | |
| "loss": 3.2397, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 13.66, | |
| "learning_rate": 4.842105263157895e-05, | |
| "loss": 3.1528, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 13.72, | |
| "learning_rate": 4.789473684210526e-05, | |
| "loss": 3.1764, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 13.79, | |
| "learning_rate": 4.736842105263158e-05, | |
| "loss": 3.2888, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 13.86, | |
| "learning_rate": 4.68421052631579e-05, | |
| "loss": 3.1991, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 13.93, | |
| "learning_rate": 4.6315789473684214e-05, | |
| "loss": 3.1734, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 4.5789473684210527e-05, | |
| "loss": 3.193, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 14.07, | |
| "learning_rate": 4.5263157894736846e-05, | |
| "loss": 3.2672, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 14.14, | |
| "learning_rate": 4.473684210526316e-05, | |
| "loss": 3.1536, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 14.21, | |
| "learning_rate": 4.421052631578947e-05, | |
| "loss": 3.1904, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 14.28, | |
| "learning_rate": 4.368421052631579e-05, | |
| "loss": 3.2782, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 14.34, | |
| "learning_rate": 4.3157894736842105e-05, | |
| "loss": 3.2108, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 14.41, | |
| "learning_rate": 4.2631578947368425e-05, | |
| "loss": 3.1988, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 14.48, | |
| "learning_rate": 4.210526315789474e-05, | |
| "loss": 3.1915, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 14.55, | |
| "learning_rate": 4.157894736842106e-05, | |
| "loss": 3.2633, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "learning_rate": 4.105263157894737e-05, | |
| "loss": 3.1832, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 14.69, | |
| "learning_rate": 4.0526315789473684e-05, | |
| "loss": 3.1445, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 14.76, | |
| "learning_rate": 4e-05, | |
| "loss": 3.2, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 14.83, | |
| "learning_rate": 3.9473684210526316e-05, | |
| "loss": 3.2517, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 14.9, | |
| "learning_rate": 3.894736842105263e-05, | |
| "loss": 3.1483, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 14.97, | |
| "learning_rate": 3.842105263157895e-05, | |
| "loss": 3.1521, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 15.03, | |
| "learning_rate": 3.789473684210527e-05, | |
| "loss": 3.273, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 15.1, | |
| "learning_rate": 3.736842105263158e-05, | |
| "loss": 3.2115, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 15.17, | |
| "learning_rate": 3.6842105263157895e-05, | |
| "loss": 3.1547, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 15.24, | |
| "learning_rate": 3.6315789473684214e-05, | |
| "loss": 3.1564, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 15.31, | |
| "learning_rate": 3.578947368421053e-05, | |
| "loss": 3.2829, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 15.38, | |
| "learning_rate": 3.526315789473684e-05, | |
| "loss": 3.1727, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 15.45, | |
| "learning_rate": 3.473684210526316e-05, | |
| "loss": 3.1617, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 15.52, | |
| "learning_rate": 3.421052631578947e-05, | |
| "loss": 3.2143, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 15.59, | |
| "learning_rate": 3.368421052631579e-05, | |
| "loss": 3.2045, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 15.66, | |
| "learning_rate": 3.3157894736842106e-05, | |
| "loss": 3.1499, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 15.72, | |
| "learning_rate": 3.2631578947368426e-05, | |
| "loss": 3.1733, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 15.79, | |
| "learning_rate": 3.210526315789474e-05, | |
| "loss": 3.2579, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 15.86, | |
| "learning_rate": 3.157894736842105e-05, | |
| "loss": 3.1808, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 15.93, | |
| "learning_rate": 3.105263157894737e-05, | |
| "loss": 3.1587, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 3.0526315789473684e-05, | |
| "loss": 3.229, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 16.07, | |
| "learning_rate": 3e-05, | |
| "loss": 3.2771, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 16.14, | |
| "learning_rate": 2.9473684210526314e-05, | |
| "loss": 3.178, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 16.21, | |
| "learning_rate": 2.8947368421052634e-05, | |
| "loss": 3.1357, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 16.28, | |
| "learning_rate": 2.842105263157895e-05, | |
| "loss": 3.2502, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 16.34, | |
| "learning_rate": 2.7894736842105263e-05, | |
| "loss": 3.1893, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 16.41, | |
| "learning_rate": 2.7368421052631583e-05, | |
| "loss": 3.1526, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 16.48, | |
| "learning_rate": 2.6842105263157896e-05, | |
| "loss": 3.1775, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 16.55, | |
| "learning_rate": 2.6315789473684212e-05, | |
| "loss": 3.266, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 16.62, | |
| "learning_rate": 2.578947368421053e-05, | |
| "loss": 3.1632, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 16.69, | |
| "learning_rate": 2.5263157894736845e-05, | |
| "loss": 3.1558, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 16.76, | |
| "learning_rate": 2.4736842105263158e-05, | |
| "loss": 3.2264, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 16.83, | |
| "learning_rate": 2.4210526315789474e-05, | |
| "loss": 3.2213, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 16.9, | |
| "learning_rate": 2.368421052631579e-05, | |
| "loss": 3.1527, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 16.97, | |
| "learning_rate": 2.3157894736842107e-05, | |
| "loss": 3.1292, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 17.03, | |
| "learning_rate": 2.2631578947368423e-05, | |
| "loss": 3.251, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 17.1, | |
| "learning_rate": 2.2105263157894736e-05, | |
| "loss": 3.149, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 17.17, | |
| "learning_rate": 2.1578947368421053e-05, | |
| "loss": 3.1467, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 17.24, | |
| "learning_rate": 2.105263157894737e-05, | |
| "loss": 3.1744, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 17.31, | |
| "learning_rate": 2.0526315789473685e-05, | |
| "loss": 3.2617, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 17.38, | |
| "learning_rate": 2e-05, | |
| "loss": 3.1516, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 17.45, | |
| "learning_rate": 1.9473684210526315e-05, | |
| "loss": 3.1366, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 17.52, | |
| "learning_rate": 1.8947368421052634e-05, | |
| "loss": 3.2229, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 17.59, | |
| "learning_rate": 1.8421052631578947e-05, | |
| "loss": 3.195, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 17.66, | |
| "learning_rate": 1.7894736842105264e-05, | |
| "loss": 3.1612, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 17.72, | |
| "learning_rate": 1.736842105263158e-05, | |
| "loss": 3.1497, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 17.79, | |
| "learning_rate": 1.6842105263157896e-05, | |
| "loss": 3.2591, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 17.86, | |
| "learning_rate": 1.6315789473684213e-05, | |
| "loss": 3.1834, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 17.93, | |
| "learning_rate": 1.5789473684210526e-05, | |
| "loss": 3.1799, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 1.5263157894736842e-05, | |
| "loss": 3.1597, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 18.07, | |
| "learning_rate": 1.4736842105263157e-05, | |
| "loss": 3.2753, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 18.14, | |
| "learning_rate": 1.4210526315789475e-05, | |
| "loss": 3.1269, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 18.21, | |
| "learning_rate": 1.3684210526315791e-05, | |
| "loss": 3.1321, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 18.28, | |
| "learning_rate": 1.3157894736842106e-05, | |
| "loss": 3.2213, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 18.34, | |
| "learning_rate": 1.2631578947368422e-05, | |
| "loss": 3.1916, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 18.41, | |
| "learning_rate": 1.2105263157894737e-05, | |
| "loss": 3.1388, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 18.48, | |
| "learning_rate": 1.1578947368421053e-05, | |
| "loss": 3.1401, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 18.55, | |
| "learning_rate": 1.1052631578947368e-05, | |
| "loss": 3.2428, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 18.62, | |
| "learning_rate": 1.0526315789473684e-05, | |
| "loss": 3.1645, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 18.69, | |
| "learning_rate": 1e-05, | |
| "loss": 3.1607, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 18.76, | |
| "learning_rate": 9.473684210526317e-06, | |
| "loss": 3.2464, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 18.83, | |
| "learning_rate": 8.947368421052632e-06, | |
| "loss": 3.2228, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 18.9, | |
| "learning_rate": 8.421052631578948e-06, | |
| "loss": 3.1488, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 18.97, | |
| "learning_rate": 7.894736842105263e-06, | |
| "loss": 3.162, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 19.03, | |
| "learning_rate": 7.3684210526315784e-06, | |
| "loss": 3.2708, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 19.1, | |
| "learning_rate": 6.842105263157896e-06, | |
| "loss": 3.1619, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 19.17, | |
| "learning_rate": 6.315789473684211e-06, | |
| "loss": 3.1504, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 19.24, | |
| "learning_rate": 5.789473684210527e-06, | |
| "loss": 3.1647, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 19.31, | |
| "learning_rate": 5.263157894736842e-06, | |
| "loss": 3.2402, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 19.38, | |
| "learning_rate": 4.736842105263159e-06, | |
| "loss": 3.1644, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 19.45, | |
| "learning_rate": 4.210526315789474e-06, | |
| "loss": 3.1178, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 19.52, | |
| "learning_rate": 3.6842105263157892e-06, | |
| "loss": 3.2254, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 19.59, | |
| "learning_rate": 3.1578947368421056e-06, | |
| "loss": 3.1963, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 19.66, | |
| "learning_rate": 2.631578947368421e-06, | |
| "loss": 3.1382, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 19.72, | |
| "learning_rate": 2.105263157894737e-06, | |
| "loss": 3.1406, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 19.79, | |
| "learning_rate": 1.5789473684210528e-06, | |
| "loss": 3.2676, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 19.86, | |
| "learning_rate": 1.0526315789473685e-06, | |
| "loss": 3.1524, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 19.93, | |
| "learning_rate": 5.263157894736843e-07, | |
| "loss": 3.1674, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 0.0, | |
| "loss": 3.1682, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 3.362812042236328, | |
| "eval_runtime": 8.2276, | |
| "eval_samples_per_second": 204.191, | |
| "eval_steps_per_second": 6.442, | |
| "eval_wer": 0.9993108676176694, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 2900, | |
| "total_flos": 2.6569362344615726e+18, | |
| "train_loss": 3.8477164847275307, | |
| "train_runtime": 343.8146, | |
| "train_samples_per_second": 268.749, | |
| "train_steps_per_second": 8.435 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2900, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 1450, | |
| "total_flos": 2.6569362344615726e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |