| { |
| "best_metric": 0.15455523133277893, |
| "best_model_checkpoint": "/content/drive/My Drive/Colab Data/LLaMA-LoRA Tuner/lora_models/guitarGPT00/checkpoint-9900", |
| "epoch": 20.454545454545453, |
| "global_step": 9900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 2.9999999999999997e-05, |
| "loss": 0.7034, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 5.9999999999999995e-05, |
| "loss": 0.7056, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 8.999999999999999e-05, |
| "loss": 0.6413, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00011999999999999999, |
| "loss": 0.574, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00015, |
| "loss": 0.5307, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00017999999999999998, |
| "loss": 0.4797, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00020999999999999998, |
| "loss": 0.4554, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00023999999999999998, |
| "loss": 0.4219, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00027, |
| "loss": 0.4163, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0003, |
| "loss": 0.4183, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00029987068965517237, |
| "loss": 0.3947, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0002997413793103448, |
| "loss": 0.413, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0002996120689655172, |
| "loss": 0.4028, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00029948275862068965, |
| "loss": 0.3807, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00029935344827586205, |
| "loss": 0.3982, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00029922413793103444, |
| "loss": 0.3907, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0002990948275862069, |
| "loss": 0.391, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0002989655172413793, |
| "loss": 0.3807, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0002988362068965517, |
| "loss": 0.3735, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0002987068965517241, |
| "loss": 0.4056, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0002985775862068965, |
| "loss": 0.3732, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00029844827586206896, |
| "loss": 0.3873, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00029831896551724135, |
| "loss": 0.3421, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00029818965517241375, |
| "loss": 0.3708, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0002980603448275862, |
| "loss": 0.371, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0002979310344827586, |
| "loss": 0.3747, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00029780172413793103, |
| "loss": 0.3608, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.0002976724137931034, |
| "loss": 0.3559, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0002975431034482758, |
| "loss": 0.3838, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00029741379310344827, |
| "loss": 0.3587, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_loss": 0.36829814314842224, |
| "eval_runtime": 676.778, |
| "eval_samples_per_second": 3.398, |
| "eval_steps_per_second": 0.426, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00029728448275862066, |
| "loss": 0.3585, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00029715517241379305, |
| "loss": 0.3536, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0002970258620689655, |
| "loss": 0.3721, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.0002968965517241379, |
| "loss": 0.37, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00029676724137931034, |
| "loss": 0.3738, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00029663793103448273, |
| "loss": 0.3542, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0002965086206896552, |
| "loss": 0.378, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00029637931034482757, |
| "loss": 0.3443, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00029624999999999996, |
| "loss": 0.3484, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00029612068965517236, |
| "loss": 0.35, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0002959913793103448, |
| "loss": 0.3622, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0002958620689655172, |
| "loss": 0.3479, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00029573275862068964, |
| "loss": 0.3535, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00029560344827586204, |
| "loss": 0.3547, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.0002954741379310345, |
| "loss": 0.365, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0002953448275862069, |
| "loss": 0.3292, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00029521551724137927, |
| "loss": 0.3377, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0002950862068965517, |
| "loss": 0.349, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.0002949568965517241, |
| "loss": 0.3638, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.0002948275862068965, |
| "loss": 0.3488, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.00029469827586206895, |
| "loss": 0.3438, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00029456896551724134, |
| "loss": 0.3367, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.0002944396551724138, |
| "loss": 0.364, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.0002943103448275862, |
| "loss": 0.3557, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00029418103448275863, |
| "loss": 0.3513, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.000294051724137931, |
| "loss": 0.3519, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.0002939224137931034, |
| "loss": 0.341, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.0002937931034482758, |
| "loss": 0.3404, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.00029366379310344826, |
| "loss": 0.3381, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00029353448275862065, |
| "loss": 0.347, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.29, |
| "eval_loss": 0.3440234959125519, |
| "eval_runtime": 676.8128, |
| "eval_samples_per_second": 3.398, |
| "eval_steps_per_second": 0.426, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.0002934051724137931, |
| "loss": 0.3377, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.0002932758620689655, |
| "loss": 0.3573, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00029314655172413794, |
| "loss": 0.3264, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00029301724137931033, |
| "loss": 0.3458, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.0002928879310344827, |
| "loss": 0.3432, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.0002927586206896551, |
| "loss": 0.3321, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00029262931034482756, |
| "loss": 0.3396, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00029249999999999995, |
| "loss": 0.3314, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.0002923706896551724, |
| "loss": 0.3389, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.0002922413793103448, |
| "loss": 0.3194, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 0.00029211206896551724, |
| "loss": 0.3379, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.00029198275862068963, |
| "loss": 0.3546, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 0.0002918534482758621, |
| "loss": 0.3146, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 0.0002917241379310344, |
| "loss": 0.3381, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 0.00029159482758620687, |
| "loss": 0.3165, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 0.00029146551724137926, |
| "loss": 0.3372, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 0.0002913362068965517, |
| "loss": 0.3452, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 0.0002912068965517241, |
| "loss": 0.3549, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 0.00029107758620689655, |
| "loss": 0.3149, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 0.00029094827586206894, |
| "loss": 0.3278, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 0.0002908189655172414, |
| "loss": 0.3432, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 0.0002906896551724138, |
| "loss": 0.3475, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 0.00029056034482758617, |
| "loss": 0.3271, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 0.00029043103448275857, |
| "loss": 0.3298, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 0.000290301724137931, |
| "loss": 0.3247, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 0.0002901724137931034, |
| "loss": 0.324, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 0.00029004310344827585, |
| "loss": 0.3015, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 0.00028991379310344825, |
| "loss": 0.322, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 0.0002897844827586207, |
| "loss": 0.3127, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 0.0002896551724137931, |
| "loss": 0.3205, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.93, |
| "eval_loss": 0.33089256286621094, |
| "eval_runtime": 676.8619, |
| "eval_samples_per_second": 3.398, |
| "eval_steps_per_second": 0.425, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 0.00028952586206896553, |
| "loss": 0.3216, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 0.00028939655172413787, |
| "loss": 0.3227, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 0.0002892672413793103, |
| "loss": 0.3126, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 0.0002891379310344827, |
| "loss": 0.3202, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 0.00028900862068965516, |
| "loss": 0.3162, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 0.00028887931034482755, |
| "loss": 0.331, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 0.00028875, |
| "loss": 0.3201, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 0.0002886206896551724, |
| "loss": 0.3291, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 0.00028849137931034484, |
| "loss": 0.318, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 0.0002883620689655172, |
| "loss": 0.3331, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 0.0002882327586206896, |
| "loss": 0.3236, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 0.000288103448275862, |
| "loss": 0.3201, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 0.00028797413793103446, |
| "loss": 0.314, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 0.00028784482758620686, |
| "loss": 0.3027, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 0.0002877155172413793, |
| "loss": 0.3326, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 0.0002875862068965517, |
| "loss": 0.3305, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 0.00028745689655172414, |
| "loss": 0.315, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 0.00028732758620689654, |
| "loss": 0.3193, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 0.00028719827586206893, |
| "loss": 0.3239, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 0.0002870689655172413, |
| "loss": 0.3075, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 0.00028693965517241377, |
| "loss": 0.3204, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 0.00028681034482758616, |
| "loss": 0.3104, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 0.0002866810344827586, |
| "loss": 0.3393, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 0.000286551724137931, |
| "loss": 0.3235, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 0.00028642241379310345, |
| "loss": 0.322, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 0.00028629310344827584, |
| "loss": 0.3176, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 0.00028616379310344824, |
| "loss": 0.308, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 0.00028603448275862063, |
| "loss": 0.3223, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 0.0002859051724137931, |
| "loss": 0.321, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 0.00028577586206896547, |
| "loss": 0.3088, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.58, |
| "eval_loss": 0.3221810758113861, |
| "eval_runtime": 679.1093, |
| "eval_samples_per_second": 3.387, |
| "eval_steps_per_second": 0.424, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 0.0002856465517241379, |
| "loss": 0.3218, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 0.0002855172413793103, |
| "loss": 0.2987, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 0.00028538793103448276, |
| "loss": 0.3135, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 0.00028525862068965515, |
| "loss": 0.296, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 0.0002851293103448276, |
| "loss": 0.3006, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 0.000285, |
| "loss": 0.3192, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 0.0002848706896551724, |
| "loss": 0.3041, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 0.0002847413793103448, |
| "loss": 0.3106, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 0.0002846120689655172, |
| "loss": 0.3193, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 0.0002844827586206896, |
| "loss": 0.3111, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 0.00028435344827586206, |
| "loss": 0.3187, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 0.00028422413793103445, |
| "loss": 0.3125, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 0.0002840948275862069, |
| "loss": 0.3117, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 0.0002839655172413793, |
| "loss": 0.316, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 0.0002838362068965517, |
| "loss": 0.3213, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 0.0002837068965517241, |
| "loss": 0.3168, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 0.00028357758620689653, |
| "loss": 0.3107, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 0.0002834482758620689, |
| "loss": 0.2958, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 0.00028331896551724137, |
| "loss": 0.3153, |
| "step": 1390 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 0.00028318965517241376, |
| "loss": 0.3131, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.03, |
| "learning_rate": 0.0002830603448275862, |
| "loss": 0.2915, |
| "step": 1410 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 0.0002829310344827586, |
| "loss": 0.3039, |
| "step": 1420 |
| }, |
| { |
| "epoch": 3.07, |
| "learning_rate": 0.000282801724137931, |
| "loss": 0.3074, |
| "step": 1430 |
| }, |
| { |
| "epoch": 3.09, |
| "learning_rate": 0.00028267241379310344, |
| "loss": 0.3062, |
| "step": 1440 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 0.00028254310344827583, |
| "loss": 0.3005, |
| "step": 1450 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 0.0002824137931034482, |
| "loss": 0.3038, |
| "step": 1460 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 0.0002822844827586207, |
| "loss": 0.3078, |
| "step": 1470 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 0.00028215517241379307, |
| "loss": 0.3118, |
| "step": 1480 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 0.0002820258620689655, |
| "loss": 0.286, |
| "step": 1490 |
| }, |
| { |
| "epoch": 3.22, |
| "learning_rate": 0.0002818965517241379, |
| "loss": 0.2966, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.22, |
| "eval_loss": 0.3142649531364441, |
| "eval_runtime": 678.9797, |
| "eval_samples_per_second": 3.387, |
| "eval_steps_per_second": 0.424, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.24, |
| "learning_rate": 0.0002817672413793103, |
| "loss": 0.3101, |
| "step": 1510 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 0.00028163793103448275, |
| "loss": 0.2922, |
| "step": 1520 |
| }, |
| { |
| "epoch": 3.28, |
| "learning_rate": 0.00028150862068965514, |
| "loss": 0.3128, |
| "step": 1530 |
| }, |
| { |
| "epoch": 3.3, |
| "learning_rate": 0.00028137931034482753, |
| "loss": 0.2973, |
| "step": 1540 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 0.00028125, |
| "loss": 0.2921, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 0.00028112068965517237, |
| "loss": 0.3094, |
| "step": 1560 |
| }, |
| { |
| "epoch": 3.37, |
| "learning_rate": 0.0002809913793103448, |
| "loss": 0.2876, |
| "step": 1570 |
| }, |
| { |
| "epoch": 3.39, |
| "learning_rate": 0.0002808620689655172, |
| "loss": 0.3019, |
| "step": 1580 |
| }, |
| { |
| "epoch": 3.41, |
| "learning_rate": 0.00028073275862068966, |
| "loss": 0.3127, |
| "step": 1590 |
| }, |
| { |
| "epoch": 3.43, |
| "learning_rate": 0.00028060344827586205, |
| "loss": 0.3078, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 0.00028047413793103444, |
| "loss": 0.2967, |
| "step": 1610 |
| }, |
| { |
| "epoch": 3.48, |
| "learning_rate": 0.0002803448275862069, |
| "loss": 0.2984, |
| "step": 1620 |
| }, |
| { |
| "epoch": 3.5, |
| "learning_rate": 0.0002802155172413793, |
| "loss": 0.2886, |
| "step": 1630 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 0.0002800862068965517, |
| "loss": 0.3113, |
| "step": 1640 |
| }, |
| { |
| "epoch": 3.54, |
| "learning_rate": 0.0002799568965517241, |
| "loss": 0.3091, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.56, |
| "learning_rate": 0.0002798275862068965, |
| "loss": 0.2969, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 0.00027969827586206896, |
| "loss": 0.2985, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.61, |
| "learning_rate": 0.00027956896551724136, |
| "loss": 0.2863, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.63, |
| "learning_rate": 0.00027943965517241375, |
| "loss": 0.2922, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 0.0002793103448275862, |
| "loss": 0.2753, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.67, |
| "learning_rate": 0.0002791810344827586, |
| "loss": 0.3047, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 0.000279051724137931, |
| "loss": 0.309, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.71, |
| "learning_rate": 0.00027892241379310343, |
| "loss": 0.3058, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 0.0002787931034482758, |
| "loss": 0.3171, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.76, |
| "learning_rate": 0.00027866379310344827, |
| "loss": 0.2826, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 0.00027853448275862066, |
| "loss": 0.3029, |
| "step": 1760 |
| }, |
| { |
| "epoch": 3.8, |
| "learning_rate": 0.00027840517241379306, |
| "loss": 0.297, |
| "step": 1770 |
| }, |
| { |
| "epoch": 3.82, |
| "learning_rate": 0.0002782758620689655, |
| "loss": 0.3007, |
| "step": 1780 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 0.0002781465517241379, |
| "loss": 0.3096, |
| "step": 1790 |
| }, |
| { |
| "epoch": 3.86, |
| "learning_rate": 0.00027801724137931034, |
| "loss": 0.3013, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.86, |
| "eval_loss": 0.3079967796802521, |
| "eval_runtime": 678.0738, |
| "eval_samples_per_second": 3.392, |
| "eval_steps_per_second": 0.425, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 0.00027788793103448274, |
| "loss": 0.2907, |
| "step": 1810 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 0.00027775862068965513, |
| "loss": 0.2792, |
| "step": 1820 |
| }, |
| { |
| "epoch": 3.93, |
| "learning_rate": 0.0002776293103448276, |
| "loss": 0.2911, |
| "step": 1830 |
| }, |
| { |
| "epoch": 3.95, |
| "learning_rate": 0.00027749999999999997, |
| "loss": 0.2841, |
| "step": 1840 |
| }, |
| { |
| "epoch": 3.97, |
| "learning_rate": 0.00027737068965517236, |
| "loss": 0.3033, |
| "step": 1850 |
| }, |
| { |
| "epoch": 3.99, |
| "learning_rate": 0.0002772413793103448, |
| "loss": 0.2975, |
| "step": 1860 |
| }, |
| { |
| "epoch": 4.01, |
| "learning_rate": 0.0002771120689655172, |
| "loss": 0.2823, |
| "step": 1870 |
| }, |
| { |
| "epoch": 4.03, |
| "learning_rate": 0.00027698275862068965, |
| "loss": 0.2907, |
| "step": 1880 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 0.00027685344827586204, |
| "loss": 0.2747, |
| "step": 1890 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 0.0002767241379310345, |
| "loss": 0.2823, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 0.0002765948275862069, |
| "loss": 0.3116, |
| "step": 1910 |
| }, |
| { |
| "epoch": 4.12, |
| "learning_rate": 0.0002764655172413793, |
| "loss": 0.3064, |
| "step": 1920 |
| }, |
| { |
| "epoch": 4.14, |
| "learning_rate": 0.0002763362068965517, |
| "loss": 0.2753, |
| "step": 1930 |
| }, |
| { |
| "epoch": 4.16, |
| "learning_rate": 0.0002762068965517241, |
| "loss": 0.2974, |
| "step": 1940 |
| }, |
| { |
| "epoch": 4.18, |
| "learning_rate": 0.0002760775862068965, |
| "loss": 0.2912, |
| "step": 1950 |
| }, |
| { |
| "epoch": 4.21, |
| "learning_rate": 0.00027594827586206895, |
| "loss": 0.2968, |
| "step": 1960 |
| }, |
| { |
| "epoch": 4.23, |
| "learning_rate": 0.00027581896551724135, |
| "loss": 0.2768, |
| "step": 1970 |
| }, |
| { |
| "epoch": 4.25, |
| "learning_rate": 0.0002756896551724138, |
| "loss": 0.3027, |
| "step": 1980 |
| }, |
| { |
| "epoch": 4.27, |
| "learning_rate": 0.0002755603448275862, |
| "loss": 0.2825, |
| "step": 1990 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 0.0002754310344827586, |
| "loss": 0.2946, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.31, |
| "learning_rate": 0.00027530172413793103, |
| "loss": 0.2913, |
| "step": 2010 |
| }, |
| { |
| "epoch": 4.33, |
| "learning_rate": 0.0002751724137931034, |
| "loss": 0.282, |
| "step": 2020 |
| }, |
| { |
| "epoch": 4.36, |
| "learning_rate": 0.0002750431034482758, |
| "loss": 0.2866, |
| "step": 2030 |
| }, |
| { |
| "epoch": 4.38, |
| "learning_rate": 0.00027491379310344826, |
| "loss": 0.2857, |
| "step": 2040 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 0.00027478448275862065, |
| "loss": 0.2883, |
| "step": 2050 |
| }, |
| { |
| "epoch": 4.42, |
| "learning_rate": 0.0002746551724137931, |
| "loss": 0.2891, |
| "step": 2060 |
| }, |
| { |
| "epoch": 4.44, |
| "learning_rate": 0.0002745258620689655, |
| "loss": 0.2908, |
| "step": 2070 |
| }, |
| { |
| "epoch": 4.46, |
| "learning_rate": 0.00027439655172413794, |
| "loss": 0.2808, |
| "step": 2080 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 0.00027426724137931033, |
| "loss": 0.2848, |
| "step": 2090 |
| }, |
| { |
| "epoch": 4.51, |
| "learning_rate": 0.0002741379310344827, |
| "loss": 0.2944, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.51, |
| "eval_loss": 0.3032459020614624, |
| "eval_runtime": 676.6943, |
| "eval_samples_per_second": 3.399, |
| "eval_steps_per_second": 0.426, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.53, |
| "learning_rate": 0.0002740086206896551, |
| "loss": 0.2835, |
| "step": 2110 |
| }, |
| { |
| "epoch": 4.55, |
| "learning_rate": 0.00027387931034482757, |
| "loss": 0.292, |
| "step": 2120 |
| }, |
| { |
| "epoch": 4.57, |
| "learning_rate": 0.00027374999999999996, |
| "loss": 0.2992, |
| "step": 2130 |
| }, |
| { |
| "epoch": 4.59, |
| "learning_rate": 0.0002736206896551724, |
| "loss": 0.2874, |
| "step": 2140 |
| }, |
| { |
| "epoch": 4.61, |
| "learning_rate": 0.0002734913793103448, |
| "loss": 0.2827, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.64, |
| "learning_rate": 0.00027336206896551725, |
| "loss": 0.2887, |
| "step": 2160 |
| }, |
| { |
| "epoch": 4.66, |
| "learning_rate": 0.00027323275862068964, |
| "loss": 0.2824, |
| "step": 2170 |
| }, |
| { |
| "epoch": 4.68, |
| "learning_rate": 0.00027310344827586203, |
| "loss": 0.3099, |
| "step": 2180 |
| }, |
| { |
| "epoch": 4.7, |
| "learning_rate": 0.0002729741379310344, |
| "loss": 0.2931, |
| "step": 2190 |
| }, |
| { |
| "epoch": 4.72, |
| "learning_rate": 0.00027284482758620687, |
| "loss": 0.2983, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.74, |
| "learning_rate": 0.00027271551724137926, |
| "loss": 0.2816, |
| "step": 2210 |
| }, |
| { |
| "epoch": 4.76, |
| "learning_rate": 0.0002725862068965517, |
| "loss": 0.2888, |
| "step": 2220 |
| }, |
| { |
| "epoch": 4.79, |
| "learning_rate": 0.0002724568965517241, |
| "loss": 0.2769, |
| "step": 2230 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 0.00027232758620689655, |
| "loss": 0.2824, |
| "step": 2240 |
| }, |
| { |
| "epoch": 4.83, |
| "learning_rate": 0.00027219827586206894, |
| "loss": 0.2823, |
| "step": 2250 |
| }, |
| { |
| "epoch": 4.85, |
| "learning_rate": 0.0002720689655172414, |
| "loss": 0.2934, |
| "step": 2260 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 0.0002719396551724138, |
| "loss": 0.2856, |
| "step": 2270 |
| }, |
| { |
| "epoch": 4.89, |
| "learning_rate": 0.0002718103448275862, |
| "loss": 0.2867, |
| "step": 2280 |
| }, |
| { |
| "epoch": 4.91, |
| "learning_rate": 0.00027168103448275857, |
| "loss": 0.2873, |
| "step": 2290 |
| }, |
| { |
| "epoch": 4.94, |
| "learning_rate": 0.000271551724137931, |
| "loss": 0.2857, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.96, |
| "learning_rate": 0.0002714224137931034, |
| "loss": 0.2749, |
| "step": 2310 |
| }, |
| { |
| "epoch": 4.98, |
| "learning_rate": 0.00027129310344827586, |
| "loss": 0.2775, |
| "step": 2320 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 0.00027116379310344825, |
| "loss": 0.2721, |
| "step": 2330 |
| }, |
| { |
| "epoch": 5.02, |
| "learning_rate": 0.0002710344827586207, |
| "loss": 0.2682, |
| "step": 2340 |
| }, |
| { |
| "epoch": 5.04, |
| "learning_rate": 0.0002709051724137931, |
| "loss": 0.2676, |
| "step": 2350 |
| }, |
| { |
| "epoch": 5.06, |
| "learning_rate": 0.0002707758620689655, |
| "loss": 0.2752, |
| "step": 2360 |
| }, |
| { |
| "epoch": 5.09, |
| "learning_rate": 0.0002706465517241379, |
| "loss": 0.2742, |
| "step": 2370 |
| }, |
| { |
| "epoch": 5.11, |
| "learning_rate": 0.0002705172413793103, |
| "loss": 0.2784, |
| "step": 2380 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 0.0002703879310344827, |
| "loss": 0.2739, |
| "step": 2390 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 0.00027025862068965516, |
| "loss": 0.2874, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.15, |
| "eval_loss": 0.2981237769126892, |
| "eval_runtime": 678.281, |
| "eval_samples_per_second": 3.391, |
| "eval_steps_per_second": 0.425, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.17, |
| "learning_rate": 0.00027012931034482756, |
| "loss": 0.2797, |
| "step": 2410 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 0.00027, |
| "loss": 0.294, |
| "step": 2420 |
| }, |
| { |
| "epoch": 5.21, |
| "learning_rate": 0.0002698706896551724, |
| "loss": 0.2776, |
| "step": 2430 |
| }, |
| { |
| "epoch": 5.24, |
| "learning_rate": 0.00026974137931034484, |
| "loss": 0.2727, |
| "step": 2440 |
| }, |
| { |
| "epoch": 5.26, |
| "learning_rate": 0.0002696120689655172, |
| "loss": 0.2719, |
| "step": 2450 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 0.00026948275862068963, |
| "loss": 0.2664, |
| "step": 2460 |
| }, |
| { |
| "epoch": 5.3, |
| "learning_rate": 0.000269353448275862, |
| "loss": 0.2915, |
| "step": 2470 |
| }, |
| { |
| "epoch": 5.32, |
| "learning_rate": 0.00026922413793103447, |
| "loss": 0.2562, |
| "step": 2480 |
| }, |
| { |
| "epoch": 5.34, |
| "learning_rate": 0.00026909482758620686, |
| "loss": 0.264, |
| "step": 2490 |
| }, |
| { |
| "epoch": 5.36, |
| "learning_rate": 0.0002689655172413793, |
| "loss": 0.2758, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.39, |
| "learning_rate": 0.0002688362068965517, |
| "loss": 0.2764, |
| "step": 2510 |
| }, |
| { |
| "epoch": 5.41, |
| "learning_rate": 0.00026870689655172415, |
| "loss": 0.2697, |
| "step": 2520 |
| }, |
| { |
| "epoch": 5.43, |
| "learning_rate": 0.00026857758620689654, |
| "loss": 0.2795, |
| "step": 2530 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 0.00026844827586206893, |
| "loss": 0.2863, |
| "step": 2540 |
| }, |
| { |
| "epoch": 5.47, |
| "learning_rate": 0.00026831896551724133, |
| "loss": 0.2807, |
| "step": 2550 |
| }, |
| { |
| "epoch": 5.49, |
| "learning_rate": 0.0002681896551724138, |
| "loss": 0.272, |
| "step": 2560 |
| }, |
| { |
| "epoch": 5.52, |
| "learning_rate": 0.00026806034482758617, |
| "loss": 0.2522, |
| "step": 2570 |
| }, |
| { |
| "epoch": 5.54, |
| "learning_rate": 0.0002679310344827586, |
| "loss": 0.2706, |
| "step": 2580 |
| }, |
| { |
| "epoch": 5.56, |
| "learning_rate": 0.000267801724137931, |
| "loss": 0.2689, |
| "step": 2590 |
| }, |
| { |
| "epoch": 5.58, |
| "learning_rate": 0.00026767241379310345, |
| "loss": 0.2607, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.6, |
| "learning_rate": 0.00026754310344827585, |
| "loss": 0.2543, |
| "step": 2610 |
| }, |
| { |
| "epoch": 5.62, |
| "learning_rate": 0.00026741379310344824, |
| "loss": 0.288, |
| "step": 2620 |
| }, |
| { |
| "epoch": 5.64, |
| "learning_rate": 0.00026728448275862063, |
| "loss": 0.2698, |
| "step": 2630 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 0.0002671551724137931, |
| "loss": 0.2629, |
| "step": 2640 |
| }, |
| { |
| "epoch": 5.69, |
| "learning_rate": 0.0002670258620689655, |
| "loss": 0.2798, |
| "step": 2650 |
| }, |
| { |
| "epoch": 5.71, |
| "learning_rate": 0.0002668965517241379, |
| "loss": 0.2804, |
| "step": 2660 |
| }, |
| { |
| "epoch": 5.73, |
| "learning_rate": 0.0002667672413793103, |
| "loss": 0.2824, |
| "step": 2670 |
| }, |
| { |
| "epoch": 5.75, |
| "learning_rate": 0.00026663793103448276, |
| "loss": 0.2797, |
| "step": 2680 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 0.00026650862068965515, |
| "loss": 0.2605, |
| "step": 2690 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 0.0002663793103448276, |
| "loss": 0.2766, |
| "step": 2700 |
| }, |
| { |
| "epoch": 5.79, |
| "eval_loss": 0.292289137840271, |
| "eval_runtime": 676.8375, |
| "eval_samples_per_second": 3.398, |
| "eval_steps_per_second": 0.426, |
| "step": 2700 |
| }, |
| { |
| "epoch": 5.82, |
| "learning_rate": 0.00026624999999999994, |
| "loss": 0.2546, |
| "step": 2710 |
| }, |
| { |
| "epoch": 5.84, |
| "learning_rate": 0.0002661206896551724, |
| "loss": 0.2561, |
| "step": 2720 |
| }, |
| { |
| "epoch": 5.86, |
| "learning_rate": 0.0002659913793103448, |
| "loss": 0.2653, |
| "step": 2730 |
| }, |
| { |
| "epoch": 5.88, |
| "learning_rate": 0.0002658620689655172, |
| "loss": 0.2647, |
| "step": 2740 |
| }, |
| { |
| "epoch": 5.9, |
| "learning_rate": 0.0002657327586206896, |
| "loss": 0.2678, |
| "step": 2750 |
| }, |
| { |
| "epoch": 5.92, |
| "learning_rate": 0.00026560344827586207, |
| "loss": 0.2672, |
| "step": 2760 |
| }, |
| { |
| "epoch": 5.94, |
| "learning_rate": 0.00026547413793103446, |
| "loss": 0.2639, |
| "step": 2770 |
| }, |
| { |
| "epoch": 5.97, |
| "learning_rate": 0.0002653448275862069, |
| "loss": 0.2529, |
| "step": 2780 |
| }, |
| { |
| "epoch": 5.99, |
| "learning_rate": 0.00026521551724137925, |
| "loss": 0.2789, |
| "step": 2790 |
| }, |
| { |
| "epoch": 6.01, |
| "learning_rate": 0.0002650862068965517, |
| "loss": 0.2753, |
| "step": 2800 |
| }, |
| { |
| "epoch": 6.03, |
| "learning_rate": 0.0002649568965517241, |
| "loss": 0.2579, |
| "step": 2810 |
| }, |
| { |
| "epoch": 6.05, |
| "learning_rate": 0.00026482758620689653, |
| "loss": 0.2534, |
| "step": 2820 |
| }, |
| { |
| "epoch": 6.07, |
| "learning_rate": 0.0002646982758620689, |
| "loss": 0.2653, |
| "step": 2830 |
| }, |
| { |
| "epoch": 6.09, |
| "learning_rate": 0.00026456896551724137, |
| "loss": 0.2669, |
| "step": 2840 |
| }, |
| { |
| "epoch": 6.12, |
| "learning_rate": 0.00026443965517241377, |
| "loss": 0.2464, |
| "step": 2850 |
| }, |
| { |
| "epoch": 6.14, |
| "learning_rate": 0.0002643103448275862, |
| "loss": 0.2466, |
| "step": 2860 |
| }, |
| { |
| "epoch": 6.16, |
| "learning_rate": 0.0002641810344827586, |
| "loss": 0.2494, |
| "step": 2870 |
| }, |
| { |
| "epoch": 6.18, |
| "learning_rate": 0.000264051724137931, |
| "loss": 0.2532, |
| "step": 2880 |
| }, |
| { |
| "epoch": 6.2, |
| "learning_rate": 0.0002639224137931034, |
| "loss": 0.2513, |
| "step": 2890 |
| }, |
| { |
| "epoch": 6.22, |
| "learning_rate": 0.00026379310344827584, |
| "loss": 0.2577, |
| "step": 2900 |
| }, |
| { |
| "epoch": 6.24, |
| "learning_rate": 0.00026366379310344823, |
| "loss": 0.2594, |
| "step": 2910 |
| }, |
| { |
| "epoch": 6.27, |
| "learning_rate": 0.0002635344827586207, |
| "loss": 0.2507, |
| "step": 2920 |
| }, |
| { |
| "epoch": 6.29, |
| "learning_rate": 0.00026340517241379307, |
| "loss": 0.2716, |
| "step": 2930 |
| }, |
| { |
| "epoch": 6.31, |
| "learning_rate": 0.0002632758620689655, |
| "loss": 0.2697, |
| "step": 2940 |
| }, |
| { |
| "epoch": 6.33, |
| "learning_rate": 0.0002631465517241379, |
| "loss": 0.2516, |
| "step": 2950 |
| }, |
| { |
| "epoch": 6.35, |
| "learning_rate": 0.0002630172413793103, |
| "loss": 0.2691, |
| "step": 2960 |
| }, |
| { |
| "epoch": 6.37, |
| "learning_rate": 0.00026288793103448275, |
| "loss": 0.2675, |
| "step": 2970 |
| }, |
| { |
| "epoch": 6.39, |
| "learning_rate": 0.00026275862068965514, |
| "loss": 0.2628, |
| "step": 2980 |
| }, |
| { |
| "epoch": 6.42, |
| "learning_rate": 0.00026262931034482754, |
| "loss": 0.2627, |
| "step": 2990 |
| }, |
| { |
| "epoch": 6.44, |
| "learning_rate": 0.0002625, |
| "loss": 0.2603, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.44, |
| "eval_loss": 0.2889377772808075, |
| "eval_runtime": 697.121, |
| "eval_samples_per_second": 3.299, |
| "eval_steps_per_second": 0.413, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.46, |
| "learning_rate": 0.0002623706896551724, |
| "loss": 0.2323, |
| "step": 3010 |
| }, |
| { |
| "epoch": 6.48, |
| "learning_rate": 0.0002622413793103448, |
| "loss": 0.2548, |
| "step": 3020 |
| }, |
| { |
| "epoch": 6.5, |
| "learning_rate": 0.0002621120689655172, |
| "loss": 0.2591, |
| "step": 3030 |
| }, |
| { |
| "epoch": 6.52, |
| "learning_rate": 0.00026198275862068966, |
| "loss": 0.2568, |
| "step": 3040 |
| }, |
| { |
| "epoch": 6.55, |
| "learning_rate": 0.00026185344827586206, |
| "loss": 0.2473, |
| "step": 3050 |
| }, |
| { |
| "epoch": 6.57, |
| "learning_rate": 0.00026172413793103445, |
| "loss": 0.2512, |
| "step": 3060 |
| }, |
| { |
| "epoch": 6.59, |
| "learning_rate": 0.00026159482758620684, |
| "loss": 0.2635, |
| "step": 3070 |
| }, |
| { |
| "epoch": 6.61, |
| "learning_rate": 0.0002614655172413793, |
| "loss": 0.2509, |
| "step": 3080 |
| }, |
| { |
| "epoch": 6.63, |
| "learning_rate": 0.0002613362068965517, |
| "loss": 0.2648, |
| "step": 3090 |
| }, |
| { |
| "epoch": 6.65, |
| "learning_rate": 0.00026120689655172413, |
| "loss": 0.2544, |
| "step": 3100 |
| }, |
| { |
| "epoch": 6.67, |
| "learning_rate": 0.0002610775862068965, |
| "loss": 0.26, |
| "step": 3110 |
| }, |
| { |
| "epoch": 6.7, |
| "learning_rate": 0.00026094827586206897, |
| "loss": 0.2611, |
| "step": 3120 |
| }, |
| { |
| "epoch": 6.72, |
| "learning_rate": 0.00026081896551724136, |
| "loss": 0.2719, |
| "step": 3130 |
| }, |
| { |
| "epoch": 6.74, |
| "learning_rate": 0.00026068965517241376, |
| "loss": 0.2795, |
| "step": 3140 |
| }, |
| { |
| "epoch": 6.76, |
| "learning_rate": 0.0002605603448275862, |
| "loss": 0.2605, |
| "step": 3150 |
| }, |
| { |
| "epoch": 6.78, |
| "learning_rate": 0.0002604310344827586, |
| "loss": 0.2669, |
| "step": 3160 |
| }, |
| { |
| "epoch": 6.8, |
| "learning_rate": 0.000260301724137931, |
| "loss": 0.2407, |
| "step": 3170 |
| }, |
| { |
| "epoch": 6.82, |
| "learning_rate": 0.00026017241379310344, |
| "loss": 0.2578, |
| "step": 3180 |
| }, |
| { |
| "epoch": 6.85, |
| "learning_rate": 0.00026004310344827583, |
| "loss": 0.2535, |
| "step": 3190 |
| }, |
| { |
| "epoch": 6.87, |
| "learning_rate": 0.0002599137931034483, |
| "loss": 0.2655, |
| "step": 3200 |
| }, |
| { |
| "epoch": 6.89, |
| "learning_rate": 0.00025978448275862067, |
| "loss": 0.27, |
| "step": 3210 |
| }, |
| { |
| "epoch": 6.91, |
| "learning_rate": 0.00025965517241379306, |
| "loss": 0.2757, |
| "step": 3220 |
| }, |
| { |
| "epoch": 6.93, |
| "learning_rate": 0.0002595258620689655, |
| "loss": 0.2615, |
| "step": 3230 |
| }, |
| { |
| "epoch": 6.95, |
| "learning_rate": 0.0002593965517241379, |
| "loss": 0.2728, |
| "step": 3240 |
| }, |
| { |
| "epoch": 6.97, |
| "learning_rate": 0.0002592672413793103, |
| "loss": 0.2576, |
| "step": 3250 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 0.00025913793103448274, |
| "loss": 0.2554, |
| "step": 3260 |
| }, |
| { |
| "epoch": 7.02, |
| "learning_rate": 0.00025900862068965513, |
| "loss": 0.2266, |
| "step": 3270 |
| }, |
| { |
| "epoch": 7.04, |
| "learning_rate": 0.0002588793103448276, |
| "loss": 0.2371, |
| "step": 3280 |
| }, |
| { |
| "epoch": 7.06, |
| "learning_rate": 0.00025875, |
| "loss": 0.2471, |
| "step": 3290 |
| }, |
| { |
| "epoch": 7.08, |
| "learning_rate": 0.00025862068965517237, |
| "loss": 0.2477, |
| "step": 3300 |
| }, |
| { |
| "epoch": 7.08, |
| "eval_loss": 0.2846280038356781, |
| "eval_runtime": 696.1493, |
| "eval_samples_per_second": 3.304, |
| "eval_steps_per_second": 0.414, |
| "step": 3300 |
| }, |
| { |
| "epoch": 7.1, |
| "learning_rate": 0.0002584913793103448, |
| "loss": 0.241, |
| "step": 3310 |
| }, |
| { |
| "epoch": 7.12, |
| "learning_rate": 0.0002583620689655172, |
| "loss": 0.2466, |
| "step": 3320 |
| }, |
| { |
| "epoch": 7.15, |
| "learning_rate": 0.00025823275862068965, |
| "loss": 0.2477, |
| "step": 3330 |
| }, |
| { |
| "epoch": 7.17, |
| "learning_rate": 0.00025810344827586205, |
| "loss": 0.2338, |
| "step": 3340 |
| }, |
| { |
| "epoch": 7.19, |
| "learning_rate": 0.00025797413793103444, |
| "loss": 0.2496, |
| "step": 3350 |
| }, |
| { |
| "epoch": 7.21, |
| "learning_rate": 0.0002578448275862069, |
| "loss": 0.247, |
| "step": 3360 |
| }, |
| { |
| "epoch": 7.23, |
| "learning_rate": 0.0002577155172413793, |
| "loss": 0.2441, |
| "step": 3370 |
| }, |
| { |
| "epoch": 7.25, |
| "learning_rate": 0.0002575862068965517, |
| "loss": 0.241, |
| "step": 3380 |
| }, |
| { |
| "epoch": 7.27, |
| "learning_rate": 0.0002574568965517241, |
| "loss": 0.2464, |
| "step": 3390 |
| }, |
| { |
| "epoch": 7.3, |
| "learning_rate": 0.0002573275862068965, |
| "loss": 0.2487, |
| "step": 3400 |
| }, |
| { |
| "epoch": 7.32, |
| "learning_rate": 0.00025719827586206896, |
| "loss": 0.2499, |
| "step": 3410 |
| }, |
| { |
| "epoch": 7.34, |
| "learning_rate": 0.00025706896551724135, |
| "loss": 0.2315, |
| "step": 3420 |
| }, |
| { |
| "epoch": 7.36, |
| "learning_rate": 0.00025693965517241375, |
| "loss": 0.2453, |
| "step": 3430 |
| }, |
| { |
| "epoch": 7.38, |
| "learning_rate": 0.0002568103448275862, |
| "loss": 0.2436, |
| "step": 3440 |
| }, |
| { |
| "epoch": 7.4, |
| "learning_rate": 0.0002566810344827586, |
| "loss": 0.2632, |
| "step": 3450 |
| }, |
| { |
| "epoch": 7.42, |
| "learning_rate": 0.00025655172413793103, |
| "loss": 0.2435, |
| "step": 3460 |
| }, |
| { |
| "epoch": 7.45, |
| "learning_rate": 0.0002564224137931034, |
| "loss": 0.2447, |
| "step": 3470 |
| }, |
| { |
| "epoch": 7.47, |
| "learning_rate": 0.0002562931034482758, |
| "loss": 0.2292, |
| "step": 3480 |
| }, |
| { |
| "epoch": 7.49, |
| "learning_rate": 0.00025616379310344827, |
| "loss": 0.2432, |
| "step": 3490 |
| }, |
| { |
| "epoch": 7.51, |
| "learning_rate": 0.00025603448275862066, |
| "loss": 0.2491, |
| "step": 3500 |
| }, |
| { |
| "epoch": 7.53, |
| "learning_rate": 0.0002559051724137931, |
| "loss": 0.2618, |
| "step": 3510 |
| }, |
| { |
| "epoch": 7.55, |
| "learning_rate": 0.0002557758620689655, |
| "loss": 0.2465, |
| "step": 3520 |
| }, |
| { |
| "epoch": 7.58, |
| "learning_rate": 0.0002556465517241379, |
| "loss": 0.2377, |
| "step": 3530 |
| }, |
| { |
| "epoch": 7.6, |
| "learning_rate": 0.00025551724137931034, |
| "loss": 0.2529, |
| "step": 3540 |
| }, |
| { |
| "epoch": 7.62, |
| "learning_rate": 0.00025538793103448273, |
| "loss": 0.2377, |
| "step": 3550 |
| }, |
| { |
| "epoch": 7.64, |
| "learning_rate": 0.0002552586206896551, |
| "loss": 0.2479, |
| "step": 3560 |
| }, |
| { |
| "epoch": 7.66, |
| "learning_rate": 0.00025512931034482757, |
| "loss": 0.2459, |
| "step": 3570 |
| }, |
| { |
| "epoch": 7.68, |
| "learning_rate": 0.00025499999999999996, |
| "loss": 0.2248, |
| "step": 3580 |
| }, |
| { |
| "epoch": 7.7, |
| "learning_rate": 0.0002548706896551724, |
| "loss": 0.2528, |
| "step": 3590 |
| }, |
| { |
| "epoch": 7.73, |
| "learning_rate": 0.0002547413793103448, |
| "loss": 0.2517, |
| "step": 3600 |
| }, |
| { |
| "epoch": 7.73, |
| "eval_loss": 0.2782333195209503, |
| "eval_runtime": 697.1594, |
| "eval_samples_per_second": 3.299, |
| "eval_steps_per_second": 0.413, |
| "step": 3600 |
| }, |
| { |
| "epoch": 7.75, |
| "learning_rate": 0.00025461206896551725, |
| "loss": 0.2463, |
| "step": 3610 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 0.00025448275862068964, |
| "loss": 0.2488, |
| "step": 3620 |
| }, |
| { |
| "epoch": 7.79, |
| "learning_rate": 0.00025435344827586204, |
| "loss": 0.2433, |
| "step": 3630 |
| }, |
| { |
| "epoch": 7.81, |
| "learning_rate": 0.00025422413793103443, |
| "loss": 0.2435, |
| "step": 3640 |
| }, |
| { |
| "epoch": 7.83, |
| "learning_rate": 0.0002540948275862069, |
| "loss": 0.2518, |
| "step": 3650 |
| }, |
| { |
| "epoch": 7.85, |
| "learning_rate": 0.00025396551724137927, |
| "loss": 0.2242, |
| "step": 3660 |
| }, |
| { |
| "epoch": 7.88, |
| "learning_rate": 0.0002538362068965517, |
| "loss": 0.2347, |
| "step": 3670 |
| }, |
| { |
| "epoch": 7.9, |
| "learning_rate": 0.0002537068965517241, |
| "loss": 0.255, |
| "step": 3680 |
| }, |
| { |
| "epoch": 7.92, |
| "learning_rate": 0.00025357758620689656, |
| "loss": 0.2581, |
| "step": 3690 |
| }, |
| { |
| "epoch": 7.94, |
| "learning_rate": 0.00025344827586206895, |
| "loss": 0.2508, |
| "step": 3700 |
| }, |
| { |
| "epoch": 7.96, |
| "learning_rate": 0.00025331896551724134, |
| "loss": 0.2435, |
| "step": 3710 |
| }, |
| { |
| "epoch": 7.98, |
| "learning_rate": 0.0002531896551724138, |
| "loss": 0.2538, |
| "step": 3720 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 0.0002530603448275862, |
| "loss": 0.2446, |
| "step": 3730 |
| }, |
| { |
| "epoch": 8.03, |
| "learning_rate": 0.0002529310344827586, |
| "loss": 0.2321, |
| "step": 3740 |
| }, |
| { |
| "epoch": 8.05, |
| "learning_rate": 0.000252801724137931, |
| "loss": 0.2252, |
| "step": 3750 |
| }, |
| { |
| "epoch": 8.07, |
| "learning_rate": 0.0002526724137931034, |
| "loss": 0.2237, |
| "step": 3760 |
| }, |
| { |
| "epoch": 8.09, |
| "learning_rate": 0.00025254310344827586, |
| "loss": 0.2128, |
| "step": 3770 |
| }, |
| { |
| "epoch": 8.11, |
| "learning_rate": 0.00025241379310344826, |
| "loss": 0.233, |
| "step": 3780 |
| }, |
| { |
| "epoch": 8.13, |
| "learning_rate": 0.0002522844827586207, |
| "loss": 0.25, |
| "step": 3790 |
| }, |
| { |
| "epoch": 8.15, |
| "learning_rate": 0.0002521551724137931, |
| "loss": 0.2375, |
| "step": 3800 |
| }, |
| { |
| "epoch": 8.18, |
| "learning_rate": 0.0002520258620689655, |
| "loss": 0.2222, |
| "step": 3810 |
| }, |
| { |
| "epoch": 8.2, |
| "learning_rate": 0.0002518965517241379, |
| "loss": 0.2345, |
| "step": 3820 |
| }, |
| { |
| "epoch": 8.22, |
| "learning_rate": 0.00025176724137931033, |
| "loss": 0.2341, |
| "step": 3830 |
| }, |
| { |
| "epoch": 8.24, |
| "learning_rate": 0.0002516379310344827, |
| "loss": 0.218, |
| "step": 3840 |
| }, |
| { |
| "epoch": 8.26, |
| "learning_rate": 0.00025150862068965517, |
| "loss": 0.2404, |
| "step": 3850 |
| }, |
| { |
| "epoch": 8.28, |
| "learning_rate": 0.00025137931034482756, |
| "loss": 0.224, |
| "step": 3860 |
| }, |
| { |
| "epoch": 8.3, |
| "learning_rate": 0.00025125, |
| "loss": 0.231, |
| "step": 3870 |
| }, |
| { |
| "epoch": 8.33, |
| "learning_rate": 0.0002511206896551724, |
| "loss": 0.2366, |
| "step": 3880 |
| }, |
| { |
| "epoch": 8.35, |
| "learning_rate": 0.0002509913793103448, |
| "loss": 0.2283, |
| "step": 3890 |
| }, |
| { |
| "epoch": 8.37, |
| "learning_rate": 0.0002508620689655172, |
| "loss": 0.2409, |
| "step": 3900 |
| }, |
| { |
| "epoch": 8.37, |
| "eval_loss": 0.27595165371894836, |
| "eval_runtime": 696.4728, |
| "eval_samples_per_second": 3.302, |
| "eval_steps_per_second": 0.414, |
| "step": 3900 |
| }, |
| { |
| "epoch": 8.39, |
| "learning_rate": 0.00025073275862068963, |
| "loss": 0.237, |
| "step": 3910 |
| }, |
| { |
| "epoch": 8.41, |
| "learning_rate": 0.00025060344827586203, |
| "loss": 0.2325, |
| "step": 3920 |
| }, |
| { |
| "epoch": 8.43, |
| "learning_rate": 0.0002504741379310345, |
| "loss": 0.2418, |
| "step": 3930 |
| }, |
| { |
| "epoch": 8.45, |
| "learning_rate": 0.00025034482758620687, |
| "loss": 0.2277, |
| "step": 3940 |
| }, |
| { |
| "epoch": 8.48, |
| "learning_rate": 0.0002502155172413793, |
| "loss": 0.227, |
| "step": 3950 |
| }, |
| { |
| "epoch": 8.5, |
| "learning_rate": 0.0002500862068965517, |
| "loss": 0.235, |
| "step": 3960 |
| }, |
| { |
| "epoch": 8.52, |
| "learning_rate": 0.00024995689655172415, |
| "loss": 0.2322, |
| "step": 3970 |
| }, |
| { |
| "epoch": 8.54, |
| "learning_rate": 0.00024982758620689655, |
| "loss": 0.231, |
| "step": 3980 |
| }, |
| { |
| "epoch": 8.56, |
| "learning_rate": 0.00024969827586206894, |
| "loss": 0.2249, |
| "step": 3990 |
| }, |
| { |
| "epoch": 8.58, |
| "learning_rate": 0.00024956896551724133, |
| "loss": 0.213, |
| "step": 4000 |
| }, |
| { |
| "epoch": 8.61, |
| "learning_rate": 0.0002494396551724138, |
| "loss": 0.2199, |
| "step": 4010 |
| }, |
| { |
| "epoch": 8.63, |
| "learning_rate": 0.0002493103448275862, |
| "loss": 0.2232, |
| "step": 4020 |
| }, |
| { |
| "epoch": 8.65, |
| "learning_rate": 0.0002491810344827586, |
| "loss": 0.2248, |
| "step": 4030 |
| }, |
| { |
| "epoch": 8.67, |
| "learning_rate": 0.000249051724137931, |
| "loss": 0.2211, |
| "step": 4040 |
| }, |
| { |
| "epoch": 8.69, |
| "learning_rate": 0.00024892241379310346, |
| "loss": 0.2295, |
| "step": 4050 |
| }, |
| { |
| "epoch": 8.71, |
| "learning_rate": 0.00024879310344827585, |
| "loss": 0.2374, |
| "step": 4060 |
| }, |
| { |
| "epoch": 8.73, |
| "learning_rate": 0.00024866379310344825, |
| "loss": 0.2287, |
| "step": 4070 |
| }, |
| { |
| "epoch": 8.76, |
| "learning_rate": 0.00024853448275862064, |
| "loss": 0.2402, |
| "step": 4080 |
| }, |
| { |
| "epoch": 8.78, |
| "learning_rate": 0.0002484051724137931, |
| "loss": 0.2258, |
| "step": 4090 |
| }, |
| { |
| "epoch": 8.8, |
| "learning_rate": 0.0002482758620689655, |
| "loss": 0.2312, |
| "step": 4100 |
| }, |
| { |
| "epoch": 8.82, |
| "learning_rate": 0.0002481465517241379, |
| "loss": 0.2474, |
| "step": 4110 |
| }, |
| { |
| "epoch": 8.84, |
| "learning_rate": 0.0002480172413793103, |
| "loss": 0.2429, |
| "step": 4120 |
| }, |
| { |
| "epoch": 8.86, |
| "learning_rate": 0.00024788793103448277, |
| "loss": 0.2287, |
| "step": 4130 |
| }, |
| { |
| "epoch": 8.88, |
| "learning_rate": 0.00024775862068965516, |
| "loss": 0.2269, |
| "step": 4140 |
| }, |
| { |
| "epoch": 8.91, |
| "learning_rate": 0.0002476293103448276, |
| "loss": 0.2428, |
| "step": 4150 |
| }, |
| { |
| "epoch": 8.93, |
| "learning_rate": 0.00024749999999999994, |
| "loss": 0.2282, |
| "step": 4160 |
| }, |
| { |
| "epoch": 8.95, |
| "learning_rate": 0.0002473706896551724, |
| "loss": 0.2236, |
| "step": 4170 |
| }, |
| { |
| "epoch": 8.97, |
| "learning_rate": 0.0002472413793103448, |
| "loss": 0.2284, |
| "step": 4180 |
| }, |
| { |
| "epoch": 8.99, |
| "learning_rate": 0.00024711206896551723, |
| "loss": 0.2365, |
| "step": 4190 |
| }, |
| { |
| "epoch": 9.01, |
| "learning_rate": 0.0002469827586206896, |
| "loss": 0.2369, |
| "step": 4200 |
| }, |
| { |
| "epoch": 9.01, |
| "eval_loss": 0.2714526355266571, |
| "eval_runtime": 696.7746, |
| "eval_samples_per_second": 3.301, |
| "eval_steps_per_second": 0.413, |
| "step": 4200 |
| }, |
| { |
| "epoch": 9.03, |
| "learning_rate": 0.00024685344827586207, |
| "loss": 0.2092, |
| "step": 4210 |
| }, |
| { |
| "epoch": 9.06, |
| "learning_rate": 0.00024672413793103446, |
| "loss": 0.2239, |
| "step": 4220 |
| }, |
| { |
| "epoch": 9.08, |
| "learning_rate": 0.0002465948275862069, |
| "loss": 0.2132, |
| "step": 4230 |
| }, |
| { |
| "epoch": 9.1, |
| "learning_rate": 0.00024646551724137925, |
| "loss": 0.1977, |
| "step": 4240 |
| }, |
| { |
| "epoch": 9.12, |
| "learning_rate": 0.0002463362068965517, |
| "loss": 0.2083, |
| "step": 4250 |
| }, |
| { |
| "epoch": 9.14, |
| "learning_rate": 0.0002462068965517241, |
| "loss": 0.2238, |
| "step": 4260 |
| }, |
| { |
| "epoch": 9.16, |
| "learning_rate": 0.00024607758620689654, |
| "loss": 0.2004, |
| "step": 4270 |
| }, |
| { |
| "epoch": 9.18, |
| "learning_rate": 0.00024594827586206893, |
| "loss": 0.2157, |
| "step": 4280 |
| }, |
| { |
| "epoch": 9.21, |
| "learning_rate": 0.0002458189655172414, |
| "loss": 0.2144, |
| "step": 4290 |
| }, |
| { |
| "epoch": 9.23, |
| "learning_rate": 0.00024568965517241377, |
| "loss": 0.2082, |
| "step": 4300 |
| }, |
| { |
| "epoch": 9.25, |
| "learning_rate": 0.0002455603448275862, |
| "loss": 0.2243, |
| "step": 4310 |
| }, |
| { |
| "epoch": 9.27, |
| "learning_rate": 0.0002454310344827586, |
| "loss": 0.2091, |
| "step": 4320 |
| }, |
| { |
| "epoch": 9.29, |
| "learning_rate": 0.000245301724137931, |
| "loss": 0.2147, |
| "step": 4330 |
| }, |
| { |
| "epoch": 9.31, |
| "learning_rate": 0.0002451724137931034, |
| "loss": 0.2214, |
| "step": 4340 |
| }, |
| { |
| "epoch": 9.33, |
| "learning_rate": 0.00024504310344827584, |
| "loss": 0.233, |
| "step": 4350 |
| }, |
| { |
| "epoch": 9.36, |
| "learning_rate": 0.00024491379310344824, |
| "loss": 0.2233, |
| "step": 4360 |
| }, |
| { |
| "epoch": 9.38, |
| "learning_rate": 0.0002447844827586207, |
| "loss": 0.2096, |
| "step": 4370 |
| }, |
| { |
| "epoch": 9.4, |
| "learning_rate": 0.0002446551724137931, |
| "loss": 0.2266, |
| "step": 4380 |
| }, |
| { |
| "epoch": 9.42, |
| "learning_rate": 0.0002445258620689655, |
| "loss": 0.2252, |
| "step": 4390 |
| }, |
| { |
| "epoch": 9.44, |
| "learning_rate": 0.0002443965517241379, |
| "loss": 0.2189, |
| "step": 4400 |
| }, |
| { |
| "epoch": 9.46, |
| "learning_rate": 0.0002442672413793103, |
| "loss": 0.2122, |
| "step": 4410 |
| }, |
| { |
| "epoch": 9.48, |
| "learning_rate": 0.00024413793103448273, |
| "loss": 0.2241, |
| "step": 4420 |
| }, |
| { |
| "epoch": 9.51, |
| "learning_rate": 0.00024400862068965515, |
| "loss": 0.2246, |
| "step": 4430 |
| }, |
| { |
| "epoch": 9.53, |
| "learning_rate": 0.00024387931034482757, |
| "loss": 0.2305, |
| "step": 4440 |
| }, |
| { |
| "epoch": 9.55, |
| "learning_rate": 0.00024375, |
| "loss": 0.215, |
| "step": 4450 |
| }, |
| { |
| "epoch": 9.57, |
| "learning_rate": 0.0002436206896551724, |
| "loss": 0.2259, |
| "step": 4460 |
| }, |
| { |
| "epoch": 9.59, |
| "learning_rate": 0.0002434913793103448, |
| "loss": 0.2256, |
| "step": 4470 |
| }, |
| { |
| "epoch": 9.61, |
| "learning_rate": 0.00024336206896551722, |
| "loss": 0.2202, |
| "step": 4480 |
| }, |
| { |
| "epoch": 9.64, |
| "learning_rate": 0.00024323275862068964, |
| "loss": 0.209, |
| "step": 4490 |
| }, |
| { |
| "epoch": 9.66, |
| "learning_rate": 0.00024310344827586203, |
| "loss": 0.2151, |
| "step": 4500 |
| }, |
| { |
| "epoch": 9.66, |
| "eval_loss": 0.267652302980423, |
| "eval_runtime": 696.7098, |
| "eval_samples_per_second": 3.301, |
| "eval_steps_per_second": 0.413, |
| "step": 4500 |
| }, |
| { |
| "epoch": 9.68, |
| "learning_rate": 0.00024297413793103445, |
| "loss": 0.2142, |
| "step": 4510 |
| }, |
| { |
| "epoch": 9.7, |
| "learning_rate": 0.00024284482758620687, |
| "loss": 0.2209, |
| "step": 4520 |
| }, |
| { |
| "epoch": 9.72, |
| "learning_rate": 0.0002427155172413793, |
| "loss": 0.2168, |
| "step": 4530 |
| }, |
| { |
| "epoch": 9.74, |
| "learning_rate": 0.00024258620689655171, |
| "loss": 0.2014, |
| "step": 4540 |
| }, |
| { |
| "epoch": 9.76, |
| "learning_rate": 0.00024245689655172413, |
| "loss": 0.2208, |
| "step": 4550 |
| }, |
| { |
| "epoch": 9.79, |
| "learning_rate": 0.00024232758620689653, |
| "loss": 0.2216, |
| "step": 4560 |
| }, |
| { |
| "epoch": 9.81, |
| "learning_rate": 0.00024219827586206895, |
| "loss": 0.22, |
| "step": 4570 |
| }, |
| { |
| "epoch": 9.83, |
| "learning_rate": 0.00024206896551724134, |
| "loss": 0.236, |
| "step": 4580 |
| }, |
| { |
| "epoch": 9.85, |
| "learning_rate": 0.00024193965517241376, |
| "loss": 0.2205, |
| "step": 4590 |
| }, |
| { |
| "epoch": 9.87, |
| "learning_rate": 0.00024181034482758618, |
| "loss": 0.2111, |
| "step": 4600 |
| }, |
| { |
| "epoch": 9.89, |
| "learning_rate": 0.0002416810344827586, |
| "loss": 0.2107, |
| "step": 4610 |
| }, |
| { |
| "epoch": 9.91, |
| "learning_rate": 0.00024155172413793102, |
| "loss": 0.2156, |
| "step": 4620 |
| }, |
| { |
| "epoch": 9.94, |
| "learning_rate": 0.00024142241379310344, |
| "loss": 0.2224, |
| "step": 4630 |
| }, |
| { |
| "epoch": 9.96, |
| "learning_rate": 0.00024129310344827586, |
| "loss": 0.2261, |
| "step": 4640 |
| }, |
| { |
| "epoch": 9.98, |
| "learning_rate": 0.00024116379310344825, |
| "loss": 0.2371, |
| "step": 4650 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 0.00024103448275862067, |
| "loss": 0.2169, |
| "step": 4660 |
| }, |
| { |
| "epoch": 10.02, |
| "learning_rate": 0.00024090517241379307, |
| "loss": 0.2099, |
| "step": 4670 |
| }, |
| { |
| "epoch": 10.04, |
| "learning_rate": 0.00024077586206896549, |
| "loss": 0.1993, |
| "step": 4680 |
| }, |
| { |
| "epoch": 10.06, |
| "learning_rate": 0.0002406465517241379, |
| "loss": 0.2091, |
| "step": 4690 |
| }, |
| { |
| "epoch": 10.09, |
| "learning_rate": 0.00024051724137931033, |
| "loss": 0.2007, |
| "step": 4700 |
| }, |
| { |
| "epoch": 10.11, |
| "learning_rate": 0.00024038793103448275, |
| "loss": 0.1941, |
| "step": 4710 |
| }, |
| { |
| "epoch": 10.13, |
| "learning_rate": 0.00024025862068965517, |
| "loss": 0.2048, |
| "step": 4720 |
| }, |
| { |
| "epoch": 10.15, |
| "learning_rate": 0.00024012931034482759, |
| "loss": 0.195, |
| "step": 4730 |
| }, |
| { |
| "epoch": 10.17, |
| "learning_rate": 0.00023999999999999998, |
| "loss": 0.2154, |
| "step": 4740 |
| }, |
| { |
| "epoch": 10.19, |
| "learning_rate": 0.00023987068965517237, |
| "loss": 0.2071, |
| "step": 4750 |
| }, |
| { |
| "epoch": 10.21, |
| "learning_rate": 0.0002397413793103448, |
| "loss": 0.2151, |
| "step": 4760 |
| }, |
| { |
| "epoch": 10.24, |
| "learning_rate": 0.0002396120689655172, |
| "loss": 0.1928, |
| "step": 4770 |
| }, |
| { |
| "epoch": 10.26, |
| "learning_rate": 0.00023948275862068963, |
| "loss": 0.2161, |
| "step": 4780 |
| }, |
| { |
| "epoch": 10.28, |
| "learning_rate": 0.00023935344827586205, |
| "loss": 0.1951, |
| "step": 4790 |
| }, |
| { |
| "epoch": 10.3, |
| "learning_rate": 0.00023922413793103447, |
| "loss": 0.2064, |
| "step": 4800 |
| }, |
| { |
| "epoch": 10.3, |
| "eval_loss": 0.2673773169517517, |
| "eval_runtime": 694.4441, |
| "eval_samples_per_second": 3.312, |
| "eval_steps_per_second": 0.415, |
| "step": 4800 |
| }, |
| { |
| "epoch": 10.32, |
| "learning_rate": 0.0002390948275862069, |
| "loss": 0.1987, |
| "step": 4810 |
| }, |
| { |
| "epoch": 10.34, |
| "learning_rate": 0.0002389655172413793, |
| "loss": 0.1996, |
| "step": 4820 |
| }, |
| { |
| "epoch": 10.36, |
| "learning_rate": 0.00023883620689655173, |
| "loss": 0.1964, |
| "step": 4830 |
| }, |
| { |
| "epoch": 10.39, |
| "learning_rate": 0.0002387068965517241, |
| "loss": 0.2069, |
| "step": 4840 |
| }, |
| { |
| "epoch": 10.41, |
| "learning_rate": 0.00023857758620689652, |
| "loss": 0.2126, |
| "step": 4850 |
| }, |
| { |
| "epoch": 10.43, |
| "learning_rate": 0.00023844827586206894, |
| "loss": 0.2194, |
| "step": 4860 |
| }, |
| { |
| "epoch": 10.45, |
| "learning_rate": 0.00023831896551724136, |
| "loss": 0.2165, |
| "step": 4870 |
| }, |
| { |
| "epoch": 10.47, |
| "learning_rate": 0.00023818965517241378, |
| "loss": 0.2031, |
| "step": 4880 |
| }, |
| { |
| "epoch": 10.49, |
| "learning_rate": 0.0002380603448275862, |
| "loss": 0.2023, |
| "step": 4890 |
| }, |
| { |
| "epoch": 10.52, |
| "learning_rate": 0.00023793103448275862, |
| "loss": 0.2235, |
| "step": 4900 |
| }, |
| { |
| "epoch": 10.54, |
| "learning_rate": 0.00023780172413793104, |
| "loss": 0.2144, |
| "step": 4910 |
| }, |
| { |
| "epoch": 10.56, |
| "learning_rate": 0.0002376724137931034, |
| "loss": 0.202, |
| "step": 4920 |
| }, |
| { |
| "epoch": 10.58, |
| "learning_rate": 0.00023754310344827582, |
| "loss": 0.2016, |
| "step": 4930 |
| }, |
| { |
| "epoch": 10.6, |
| "learning_rate": 0.00023741379310344824, |
| "loss": 0.1998, |
| "step": 4940 |
| }, |
| { |
| "epoch": 10.62, |
| "learning_rate": 0.00023728448275862066, |
| "loss": 0.2135, |
| "step": 4950 |
| }, |
| { |
| "epoch": 10.64, |
| "learning_rate": 0.00023715517241379308, |
| "loss": 0.2106, |
| "step": 4960 |
| }, |
| { |
| "epoch": 10.67, |
| "learning_rate": 0.0002370258620689655, |
| "loss": 0.2158, |
| "step": 4970 |
| }, |
| { |
| "epoch": 10.69, |
| "learning_rate": 0.00023689655172413792, |
| "loss": 0.206, |
| "step": 4980 |
| }, |
| { |
| "epoch": 10.71, |
| "learning_rate": 0.00023676724137931034, |
| "loss": 0.2224, |
| "step": 4990 |
| }, |
| { |
| "epoch": 10.73, |
| "learning_rate": 0.00023663793103448276, |
| "loss": 0.2159, |
| "step": 5000 |
| }, |
| { |
| "epoch": 10.75, |
| "learning_rate": 0.00023650862068965513, |
| "loss": 0.1939, |
| "step": 5010 |
| }, |
| { |
| "epoch": 10.77, |
| "learning_rate": 0.00023637931034482755, |
| "loss": 0.208, |
| "step": 5020 |
| }, |
| { |
| "epoch": 10.79, |
| "learning_rate": 0.00023624999999999997, |
| "loss": 0.1979, |
| "step": 5030 |
| }, |
| { |
| "epoch": 10.82, |
| "learning_rate": 0.0002361206896551724, |
| "loss": 0.203, |
| "step": 5040 |
| }, |
| { |
| "epoch": 10.84, |
| "learning_rate": 0.0002359913793103448, |
| "loss": 0.1821, |
| "step": 5050 |
| }, |
| { |
| "epoch": 10.86, |
| "learning_rate": 0.00023586206896551723, |
| "loss": 0.2111, |
| "step": 5060 |
| }, |
| { |
| "epoch": 10.88, |
| "learning_rate": 0.00023573275862068965, |
| "loss": 0.2065, |
| "step": 5070 |
| }, |
| { |
| "epoch": 10.9, |
| "learning_rate": 0.00023560344827586207, |
| "loss": 0.2195, |
| "step": 5080 |
| }, |
| { |
| "epoch": 10.92, |
| "learning_rate": 0.00023547413793103443, |
| "loss": 0.2047, |
| "step": 5090 |
| }, |
| { |
| "epoch": 10.94, |
| "learning_rate": 0.00023534482758620685, |
| "loss": 0.2156, |
| "step": 5100 |
| }, |
| { |
| "epoch": 10.94, |
| "eval_loss": 0.2624925971031189, |
| "eval_runtime": 695.4942, |
| "eval_samples_per_second": 3.307, |
| "eval_steps_per_second": 0.414, |
| "step": 5100 |
| }, |
| { |
| "epoch": 10.97, |
| "learning_rate": 0.00023521551724137927, |
| "loss": 0.1963, |
| "step": 5110 |
| }, |
| { |
| "epoch": 10.99, |
| "learning_rate": 0.0002350862068965517, |
| "loss": 0.2057, |
| "step": 5120 |
| }, |
| { |
| "epoch": 11.01, |
| "learning_rate": 0.00023495689655172411, |
| "loss": 0.2016, |
| "step": 5130 |
| }, |
| { |
| "epoch": 11.03, |
| "learning_rate": 0.00023482758620689653, |
| "loss": 0.1746, |
| "step": 5140 |
| }, |
| { |
| "epoch": 11.05, |
| "learning_rate": 0.00023469827586206895, |
| "loss": 0.1906, |
| "step": 5150 |
| }, |
| { |
| "epoch": 11.07, |
| "learning_rate": 0.00023456896551724137, |
| "loss": 0.2023, |
| "step": 5160 |
| }, |
| { |
| "epoch": 11.09, |
| "learning_rate": 0.0002344396551724138, |
| "loss": 0.1901, |
| "step": 5170 |
| }, |
| { |
| "epoch": 11.12, |
| "learning_rate": 0.00023431034482758616, |
| "loss": 0.198, |
| "step": 5180 |
| }, |
| { |
| "epoch": 11.14, |
| "learning_rate": 0.00023418103448275858, |
| "loss": 0.2133, |
| "step": 5190 |
| }, |
| { |
| "epoch": 11.16, |
| "learning_rate": 0.000234051724137931, |
| "loss": 0.1953, |
| "step": 5200 |
| }, |
| { |
| "epoch": 11.18, |
| "learning_rate": 0.00023392241379310342, |
| "loss": 0.201, |
| "step": 5210 |
| }, |
| { |
| "epoch": 11.2, |
| "learning_rate": 0.00023379310344827584, |
| "loss": 0.1881, |
| "step": 5220 |
| }, |
| { |
| "epoch": 11.22, |
| "learning_rate": 0.00023366379310344826, |
| "loss": 0.2097, |
| "step": 5230 |
| }, |
| { |
| "epoch": 11.24, |
| "learning_rate": 0.00023353448275862068, |
| "loss": 0.1912, |
| "step": 5240 |
| }, |
| { |
| "epoch": 11.27, |
| "learning_rate": 0.0002334051724137931, |
| "loss": 0.1907, |
| "step": 5250 |
| }, |
| { |
| "epoch": 11.29, |
| "learning_rate": 0.00023327586206896552, |
| "loss": 0.1999, |
| "step": 5260 |
| }, |
| { |
| "epoch": 11.31, |
| "learning_rate": 0.00023314655172413789, |
| "loss": 0.194, |
| "step": 5270 |
| }, |
| { |
| "epoch": 11.33, |
| "learning_rate": 0.0002330172413793103, |
| "loss": 0.181, |
| "step": 5280 |
| }, |
| { |
| "epoch": 11.35, |
| "learning_rate": 0.00023288793103448273, |
| "loss": 0.1823, |
| "step": 5290 |
| }, |
| { |
| "epoch": 11.37, |
| "learning_rate": 0.00023275862068965515, |
| "loss": 0.1928, |
| "step": 5300 |
| }, |
| { |
| "epoch": 11.39, |
| "learning_rate": 0.00023262931034482757, |
| "loss": 0.2028, |
| "step": 5310 |
| }, |
| { |
| "epoch": 11.42, |
| "learning_rate": 0.00023249999999999999, |
| "loss": 0.2061, |
| "step": 5320 |
| }, |
| { |
| "epoch": 11.44, |
| "learning_rate": 0.0002323706896551724, |
| "loss": 0.2075, |
| "step": 5330 |
| }, |
| { |
| "epoch": 11.46, |
| "learning_rate": 0.00023224137931034483, |
| "loss": 0.1822, |
| "step": 5340 |
| }, |
| { |
| "epoch": 11.48, |
| "learning_rate": 0.00023211206896551722, |
| "loss": 0.1974, |
| "step": 5350 |
| }, |
| { |
| "epoch": 11.5, |
| "learning_rate": 0.0002319827586206896, |
| "loss": 0.1843, |
| "step": 5360 |
| }, |
| { |
| "epoch": 11.52, |
| "learning_rate": 0.00023185344827586203, |
| "loss": 0.2105, |
| "step": 5370 |
| }, |
| { |
| "epoch": 11.55, |
| "learning_rate": 0.00023172413793103445, |
| "loss": 0.1977, |
| "step": 5380 |
| }, |
| { |
| "epoch": 11.57, |
| "learning_rate": 0.00023159482758620687, |
| "loss": 0.1923, |
| "step": 5390 |
| }, |
| { |
| "epoch": 11.59, |
| "learning_rate": 0.0002314655172413793, |
| "loss": 0.1871, |
| "step": 5400 |
| }, |
| { |
| "epoch": 11.59, |
| "eval_loss": 0.26061421632766724, |
| "eval_runtime": 697.8127, |
| "eval_samples_per_second": 3.296, |
| "eval_steps_per_second": 0.413, |
| "step": 5400 |
| }, |
| { |
| "epoch": 11.61, |
| "learning_rate": 0.0002313362068965517, |
| "loss": 0.1952, |
| "step": 5410 |
| }, |
| { |
| "epoch": 11.63, |
| "learning_rate": 0.00023120689655172413, |
| "loss": 0.1876, |
| "step": 5420 |
| }, |
| { |
| "epoch": 11.65, |
| "learning_rate": 0.00023107758620689655, |
| "loss": 0.181, |
| "step": 5430 |
| }, |
| { |
| "epoch": 11.67, |
| "learning_rate": 0.00023094827586206895, |
| "loss": 0.2049, |
| "step": 5440 |
| }, |
| { |
| "epoch": 11.7, |
| "learning_rate": 0.00023081896551724134, |
| "loss": 0.1924, |
| "step": 5450 |
| }, |
| { |
| "epoch": 11.72, |
| "learning_rate": 0.00023068965517241376, |
| "loss": 0.1945, |
| "step": 5460 |
| }, |
| { |
| "epoch": 11.74, |
| "learning_rate": 0.00023056034482758618, |
| "loss": 0.2041, |
| "step": 5470 |
| }, |
| { |
| "epoch": 11.76, |
| "learning_rate": 0.0002304310344827586, |
| "loss": 0.2127, |
| "step": 5480 |
| }, |
| { |
| "epoch": 11.78, |
| "learning_rate": 0.00023030172413793102, |
| "loss": 0.2006, |
| "step": 5490 |
| }, |
| { |
| "epoch": 11.8, |
| "learning_rate": 0.00023017241379310344, |
| "loss": 0.1861, |
| "step": 5500 |
| }, |
| { |
| "epoch": 11.82, |
| "learning_rate": 0.00023004310344827586, |
| "loss": 0.195, |
| "step": 5510 |
| }, |
| { |
| "epoch": 11.85, |
| "learning_rate": 0.00022991379310344825, |
| "loss": 0.1761, |
| "step": 5520 |
| }, |
| { |
| "epoch": 11.87, |
| "learning_rate": 0.00022978448275862067, |
| "loss": 0.2019, |
| "step": 5530 |
| }, |
| { |
| "epoch": 11.89, |
| "learning_rate": 0.00022965517241379306, |
| "loss": 0.194, |
| "step": 5540 |
| }, |
| { |
| "epoch": 11.91, |
| "learning_rate": 0.00022952586206896548, |
| "loss": 0.2097, |
| "step": 5550 |
| }, |
| { |
| "epoch": 11.93, |
| "learning_rate": 0.0002293965517241379, |
| "loss": 0.1994, |
| "step": 5560 |
| }, |
| { |
| "epoch": 11.95, |
| "learning_rate": 0.00022926724137931032, |
| "loss": 0.1912, |
| "step": 5570 |
| }, |
| { |
| "epoch": 11.97, |
| "learning_rate": 0.00022913793103448274, |
| "loss": 0.1982, |
| "step": 5580 |
| }, |
| { |
| "epoch": 12.0, |
| "learning_rate": 0.00022900862068965516, |
| "loss": 0.2096, |
| "step": 5590 |
| }, |
| { |
| "epoch": 12.02, |
| "learning_rate": 0.00022887931034482758, |
| "loss": 0.1864, |
| "step": 5600 |
| }, |
| { |
| "epoch": 12.04, |
| "learning_rate": 0.00022874999999999998, |
| "loss": 0.1852, |
| "step": 5610 |
| }, |
| { |
| "epoch": 12.06, |
| "learning_rate": 0.0002286206896551724, |
| "loss": 0.1758, |
| "step": 5620 |
| }, |
| { |
| "epoch": 12.08, |
| "learning_rate": 0.0002284913793103448, |
| "loss": 0.1845, |
| "step": 5630 |
| }, |
| { |
| "epoch": 12.1, |
| "learning_rate": 0.0002283620689655172, |
| "loss": 0.1699, |
| "step": 5640 |
| }, |
| { |
| "epoch": 12.12, |
| "learning_rate": 0.00022823275862068963, |
| "loss": 0.1843, |
| "step": 5650 |
| }, |
| { |
| "epoch": 12.15, |
| "learning_rate": 0.00022810344827586205, |
| "loss": 0.1836, |
| "step": 5660 |
| }, |
| { |
| "epoch": 12.17, |
| "learning_rate": 0.00022797413793103447, |
| "loss": 0.1804, |
| "step": 5670 |
| }, |
| { |
| "epoch": 12.19, |
| "learning_rate": 0.0002278448275862069, |
| "loss": 0.169, |
| "step": 5680 |
| }, |
| { |
| "epoch": 12.21, |
| "learning_rate": 0.00022771551724137928, |
| "loss": 0.1882, |
| "step": 5690 |
| }, |
| { |
| "epoch": 12.23, |
| "learning_rate": 0.0002275862068965517, |
| "loss": 0.1849, |
| "step": 5700 |
| }, |
| { |
| "epoch": 12.23, |
| "eval_loss": 0.25843313336372375, |
| "eval_runtime": 692.9994, |
| "eval_samples_per_second": 3.319, |
| "eval_steps_per_second": 0.416, |
| "step": 5700 |
| }, |
| { |
| "epoch": 12.25, |
| "learning_rate": 0.00022745689655172412, |
| "loss": 0.1851, |
| "step": 5710 |
| }, |
| { |
| "epoch": 12.27, |
| "learning_rate": 0.00022732758620689652, |
| "loss": 0.1736, |
| "step": 5720 |
| }, |
| { |
| "epoch": 12.3, |
| "learning_rate": 0.00022719827586206894, |
| "loss": 0.1854, |
| "step": 5730 |
| }, |
| { |
| "epoch": 12.32, |
| "learning_rate": 0.00022706896551724136, |
| "loss": 0.169, |
| "step": 5740 |
| }, |
| { |
| "epoch": 12.34, |
| "learning_rate": 0.00022693965517241378, |
| "loss": 0.1715, |
| "step": 5750 |
| }, |
| { |
| "epoch": 12.36, |
| "learning_rate": 0.0002268103448275862, |
| "loss": 0.1883, |
| "step": 5760 |
| }, |
| { |
| "epoch": 12.38, |
| "learning_rate": 0.00022668103448275862, |
| "loss": 0.1848, |
| "step": 5770 |
| }, |
| { |
| "epoch": 12.4, |
| "learning_rate": 0.000226551724137931, |
| "loss": 0.2009, |
| "step": 5780 |
| }, |
| { |
| "epoch": 12.42, |
| "learning_rate": 0.00022642241379310343, |
| "loss": 0.1845, |
| "step": 5790 |
| }, |
| { |
| "epoch": 12.45, |
| "learning_rate": 0.00022629310344827585, |
| "loss": 0.1808, |
| "step": 5800 |
| }, |
| { |
| "epoch": 12.47, |
| "learning_rate": 0.00022616379310344824, |
| "loss": 0.1946, |
| "step": 5810 |
| }, |
| { |
| "epoch": 12.49, |
| "learning_rate": 0.00022603448275862066, |
| "loss": 0.179, |
| "step": 5820 |
| }, |
| { |
| "epoch": 12.51, |
| "learning_rate": 0.00022590517241379308, |
| "loss": 0.1908, |
| "step": 5830 |
| }, |
| { |
| "epoch": 12.53, |
| "learning_rate": 0.0002257758620689655, |
| "loss": 0.1677, |
| "step": 5840 |
| }, |
| { |
| "epoch": 12.55, |
| "learning_rate": 0.00022564655172413792, |
| "loss": 0.1872, |
| "step": 5850 |
| }, |
| { |
| "epoch": 12.58, |
| "learning_rate": 0.00022551724137931031, |
| "loss": 0.1771, |
| "step": 5860 |
| }, |
| { |
| "epoch": 12.6, |
| "learning_rate": 0.00022538793103448273, |
| "loss": 0.1729, |
| "step": 5870 |
| }, |
| { |
| "epoch": 12.62, |
| "learning_rate": 0.00022525862068965515, |
| "loss": 0.1847, |
| "step": 5880 |
| }, |
| { |
| "epoch": 12.64, |
| "learning_rate": 0.00022512931034482757, |
| "loss": 0.1833, |
| "step": 5890 |
| }, |
| { |
| "epoch": 12.66, |
| "learning_rate": 0.000225, |
| "loss": 0.1783, |
| "step": 5900 |
| }, |
| { |
| "epoch": 12.68, |
| "learning_rate": 0.0002248706896551724, |
| "loss": 0.1912, |
| "step": 5910 |
| }, |
| { |
| "epoch": 12.7, |
| "learning_rate": 0.0002247413793103448, |
| "loss": 0.1652, |
| "step": 5920 |
| }, |
| { |
| "epoch": 12.73, |
| "learning_rate": 0.00022461206896551723, |
| "loss": 0.1796, |
| "step": 5930 |
| }, |
| { |
| "epoch": 12.75, |
| "learning_rate": 0.00022448275862068965, |
| "loss": 0.1819, |
| "step": 5940 |
| }, |
| { |
| "epoch": 12.77, |
| "learning_rate": 0.00022435344827586204, |
| "loss": 0.1849, |
| "step": 5950 |
| }, |
| { |
| "epoch": 12.79, |
| "learning_rate": 0.00022422413793103446, |
| "loss": 0.1799, |
| "step": 5960 |
| }, |
| { |
| "epoch": 12.81, |
| "learning_rate": 0.00022409482758620688, |
| "loss": 0.1895, |
| "step": 5970 |
| }, |
| { |
| "epoch": 12.83, |
| "learning_rate": 0.0002239655172413793, |
| "loss": 0.1834, |
| "step": 5980 |
| }, |
| { |
| "epoch": 12.85, |
| "learning_rate": 0.00022383620689655172, |
| "loss": 0.1888, |
| "step": 5990 |
| }, |
| { |
| "epoch": 12.88, |
| "learning_rate": 0.0002237068965517241, |
| "loss": 0.1908, |
| "step": 6000 |
| }, |
| { |
| "epoch": 12.88, |
| "eval_loss": 0.25369590520858765, |
| "eval_runtime": 702.5811, |
| "eval_samples_per_second": 3.274, |
| "eval_steps_per_second": 0.41, |
| "step": 6000 |
| }, |
| { |
| "epoch": 12.9, |
| "learning_rate": 0.00022357758620689653, |
| "loss": 0.1805, |
| "step": 6010 |
| }, |
| { |
| "epoch": 12.92, |
| "learning_rate": 0.00022344827586206895, |
| "loss": 0.1754, |
| "step": 6020 |
| }, |
| { |
| "epoch": 12.94, |
| "learning_rate": 0.00022331896551724135, |
| "loss": 0.1816, |
| "step": 6030 |
| }, |
| { |
| "epoch": 12.96, |
| "learning_rate": 0.00022318965517241377, |
| "loss": 0.182, |
| "step": 6040 |
| }, |
| { |
| "epoch": 12.98, |
| "learning_rate": 0.00022306034482758619, |
| "loss": 0.1808, |
| "step": 6050 |
| }, |
| { |
| "epoch": 13.0, |
| "learning_rate": 0.0002229310344827586, |
| "loss": 0.1851, |
| "step": 6060 |
| }, |
| { |
| "epoch": 13.03, |
| "learning_rate": 0.00022280172413793103, |
| "loss": 0.1728, |
| "step": 6070 |
| }, |
| { |
| "epoch": 13.05, |
| "learning_rate": 0.00022267241379310345, |
| "loss": 0.1704, |
| "step": 6080 |
| }, |
| { |
| "epoch": 13.07, |
| "learning_rate": 0.00022254310344827584, |
| "loss": 0.1723, |
| "step": 6090 |
| }, |
| { |
| "epoch": 13.09, |
| "learning_rate": 0.00022241379310344826, |
| "loss": 0.1768, |
| "step": 6100 |
| }, |
| { |
| "epoch": 13.11, |
| "learning_rate": 0.00022228448275862068, |
| "loss": 0.1752, |
| "step": 6110 |
| }, |
| { |
| "epoch": 13.13, |
| "learning_rate": 0.00022215517241379307, |
| "loss": 0.1812, |
| "step": 6120 |
| }, |
| { |
| "epoch": 13.15, |
| "learning_rate": 0.0002220258620689655, |
| "loss": 0.1809, |
| "step": 6130 |
| }, |
| { |
| "epoch": 13.18, |
| "learning_rate": 0.0002218965517241379, |
| "loss": 0.1744, |
| "step": 6140 |
| }, |
| { |
| "epoch": 13.2, |
| "learning_rate": 0.00022176724137931033, |
| "loss": 0.1765, |
| "step": 6150 |
| }, |
| { |
| "epoch": 13.22, |
| "learning_rate": 0.00022163793103448275, |
| "loss": 0.1867, |
| "step": 6160 |
| }, |
| { |
| "epoch": 13.24, |
| "learning_rate": 0.00022150862068965517, |
| "loss": 0.1785, |
| "step": 6170 |
| }, |
| { |
| "epoch": 13.26, |
| "learning_rate": 0.00022137931034482756, |
| "loss": 0.1776, |
| "step": 6180 |
| }, |
| { |
| "epoch": 13.28, |
| "learning_rate": 0.00022124999999999998, |
| "loss": 0.1709, |
| "step": 6190 |
| }, |
| { |
| "epoch": 13.3, |
| "learning_rate": 0.00022112068965517238, |
| "loss": 0.1743, |
| "step": 6200 |
| }, |
| { |
| "epoch": 13.33, |
| "learning_rate": 0.0002209913793103448, |
| "loss": 0.188, |
| "step": 6210 |
| }, |
| { |
| "epoch": 13.35, |
| "learning_rate": 0.00022086206896551722, |
| "loss": 0.1724, |
| "step": 6220 |
| }, |
| { |
| "epoch": 13.37, |
| "learning_rate": 0.00022073275862068964, |
| "loss": 0.189, |
| "step": 6230 |
| }, |
| { |
| "epoch": 13.39, |
| "learning_rate": 0.00022060344827586206, |
| "loss": 0.1759, |
| "step": 6240 |
| }, |
| { |
| "epoch": 13.41, |
| "learning_rate": 0.00022047413793103448, |
| "loss": 0.1852, |
| "step": 6250 |
| }, |
| { |
| "epoch": 13.43, |
| "learning_rate": 0.0002203448275862069, |
| "loss": 0.1881, |
| "step": 6260 |
| }, |
| { |
| "epoch": 13.45, |
| "learning_rate": 0.0002202155172413793, |
| "loss": 0.1715, |
| "step": 6270 |
| }, |
| { |
| "epoch": 13.48, |
| "learning_rate": 0.0002200862068965517, |
| "loss": 0.1663, |
| "step": 6280 |
| }, |
| { |
| "epoch": 13.5, |
| "learning_rate": 0.0002199568965517241, |
| "loss": 0.1735, |
| "step": 6290 |
| }, |
| { |
| "epoch": 13.52, |
| "learning_rate": 0.00021982758620689652, |
| "loss": 0.1779, |
| "step": 6300 |
| }, |
| { |
| "epoch": 13.52, |
| "eval_loss": 0.2541274428367615, |
| "eval_runtime": 702.7934, |
| "eval_samples_per_second": 3.273, |
| "eval_steps_per_second": 0.41, |
| "step": 6300 |
| }, |
| { |
| "epoch": 13.54, |
| "learning_rate": 0.00021969827586206894, |
| "loss": 0.169, |
| "step": 6310 |
| }, |
| { |
| "epoch": 13.56, |
| "learning_rate": 0.00021956896551724136, |
| "loss": 0.1739, |
| "step": 6320 |
| }, |
| { |
| "epoch": 13.58, |
| "learning_rate": 0.00021943965517241378, |
| "loss": 0.1799, |
| "step": 6330 |
| }, |
| { |
| "epoch": 13.61, |
| "learning_rate": 0.0002193103448275862, |
| "loss": 0.1724, |
| "step": 6340 |
| }, |
| { |
| "epoch": 13.63, |
| "learning_rate": 0.00021918103448275862, |
| "loss": 0.183, |
| "step": 6350 |
| }, |
| { |
| "epoch": 13.65, |
| "learning_rate": 0.00021905172413793102, |
| "loss": 0.1722, |
| "step": 6360 |
| }, |
| { |
| "epoch": 13.67, |
| "learning_rate": 0.0002189224137931034, |
| "loss": 0.1711, |
| "step": 6370 |
| }, |
| { |
| "epoch": 13.69, |
| "learning_rate": 0.00021879310344827583, |
| "loss": 0.1682, |
| "step": 6380 |
| }, |
| { |
| "epoch": 13.71, |
| "learning_rate": 0.00021866379310344825, |
| "loss": 0.1718, |
| "step": 6390 |
| }, |
| { |
| "epoch": 13.73, |
| "learning_rate": 0.00021853448275862067, |
| "loss": 0.1858, |
| "step": 6400 |
| }, |
| { |
| "epoch": 13.76, |
| "learning_rate": 0.0002184051724137931, |
| "loss": 0.1746, |
| "step": 6410 |
| }, |
| { |
| "epoch": 13.78, |
| "learning_rate": 0.0002182758620689655, |
| "loss": 0.1685, |
| "step": 6420 |
| }, |
| { |
| "epoch": 13.8, |
| "learning_rate": 0.00021814655172413793, |
| "loss": 0.166, |
| "step": 6430 |
| }, |
| { |
| "epoch": 13.82, |
| "learning_rate": 0.00021801724137931035, |
| "loss": 0.1813, |
| "step": 6440 |
| }, |
| { |
| "epoch": 13.84, |
| "learning_rate": 0.00021788793103448274, |
| "loss": 0.1879, |
| "step": 6450 |
| }, |
| { |
| "epoch": 13.86, |
| "learning_rate": 0.00021775862068965513, |
| "loss": 0.18, |
| "step": 6460 |
| }, |
| { |
| "epoch": 13.88, |
| "learning_rate": 0.00021762931034482755, |
| "loss": 0.175, |
| "step": 6470 |
| }, |
| { |
| "epoch": 13.91, |
| "learning_rate": 0.00021749999999999997, |
| "loss": 0.1763, |
| "step": 6480 |
| }, |
| { |
| "epoch": 13.93, |
| "learning_rate": 0.0002173706896551724, |
| "loss": 0.1757, |
| "step": 6490 |
| }, |
| { |
| "epoch": 13.95, |
| "learning_rate": 0.00021724137931034481, |
| "loss": 0.1699, |
| "step": 6500 |
| }, |
| { |
| "epoch": 13.97, |
| "learning_rate": 0.00021711206896551723, |
| "loss": 0.1912, |
| "step": 6510 |
| }, |
| { |
| "epoch": 13.99, |
| "learning_rate": 0.00021698275862068965, |
| "loss": 0.1873, |
| "step": 6520 |
| }, |
| { |
| "epoch": 14.01, |
| "learning_rate": 0.00021685344827586207, |
| "loss": 0.181, |
| "step": 6530 |
| }, |
| { |
| "epoch": 14.03, |
| "learning_rate": 0.00021672413793103444, |
| "loss": 0.1639, |
| "step": 6540 |
| }, |
| { |
| "epoch": 14.06, |
| "learning_rate": 0.00021659482758620686, |
| "loss": 0.1635, |
| "step": 6550 |
| }, |
| { |
| "epoch": 14.08, |
| "learning_rate": 0.00021646551724137928, |
| "loss": 0.1701, |
| "step": 6560 |
| }, |
| { |
| "epoch": 14.1, |
| "learning_rate": 0.0002163362068965517, |
| "loss": 0.1679, |
| "step": 6570 |
| }, |
| { |
| "epoch": 14.12, |
| "learning_rate": 0.00021620689655172412, |
| "loss": 0.1491, |
| "step": 6580 |
| }, |
| { |
| "epoch": 14.14, |
| "learning_rate": 0.00021607758620689654, |
| "loss": 0.1556, |
| "step": 6590 |
| }, |
| { |
| "epoch": 14.16, |
| "learning_rate": 0.00021594827586206896, |
| "loss": 0.1688, |
| "step": 6600 |
| }, |
| { |
| "epoch": 14.16, |
| "eval_loss": 0.25178754329681396, |
| "eval_runtime": 701.9482, |
| "eval_samples_per_second": 3.277, |
| "eval_steps_per_second": 0.41, |
| "step": 6600 |
| }, |
| { |
| "epoch": 14.18, |
| "learning_rate": 0.00021581896551724138, |
| "loss": 0.1708, |
| "step": 6610 |
| }, |
| { |
| "epoch": 14.21, |
| "learning_rate": 0.0002156896551724138, |
| "loss": 0.162, |
| "step": 6620 |
| }, |
| { |
| "epoch": 14.23, |
| "learning_rate": 0.00021556034482758617, |
| "loss": 0.153, |
| "step": 6630 |
| }, |
| { |
| "epoch": 14.25, |
| "learning_rate": 0.00021543103448275859, |
| "loss": 0.1552, |
| "step": 6640 |
| }, |
| { |
| "epoch": 14.27, |
| "learning_rate": 0.000215301724137931, |
| "loss": 0.1685, |
| "step": 6650 |
| }, |
| { |
| "epoch": 14.29, |
| "learning_rate": 0.00021517241379310343, |
| "loss": 0.1716, |
| "step": 6660 |
| }, |
| { |
| "epoch": 14.31, |
| "learning_rate": 0.00021504310344827585, |
| "loss": 0.1685, |
| "step": 6670 |
| }, |
| { |
| "epoch": 14.33, |
| "learning_rate": 0.00021491379310344827, |
| "loss": 0.1709, |
| "step": 6680 |
| }, |
| { |
| "epoch": 14.36, |
| "learning_rate": 0.00021478448275862069, |
| "loss": 0.1819, |
| "step": 6690 |
| }, |
| { |
| "epoch": 14.38, |
| "learning_rate": 0.0002146551724137931, |
| "loss": 0.1629, |
| "step": 6700 |
| }, |
| { |
| "epoch": 14.4, |
| "learning_rate": 0.00021452586206896553, |
| "loss": 0.1647, |
| "step": 6710 |
| }, |
| { |
| "epoch": 14.42, |
| "learning_rate": 0.0002143965517241379, |
| "loss": 0.165, |
| "step": 6720 |
| }, |
| { |
| "epoch": 14.44, |
| "learning_rate": 0.0002142672413793103, |
| "loss": 0.1676, |
| "step": 6730 |
| }, |
| { |
| "epoch": 14.46, |
| "learning_rate": 0.00021413793103448273, |
| "loss": 0.1686, |
| "step": 6740 |
| }, |
| { |
| "epoch": 14.48, |
| "learning_rate": 0.00021400862068965515, |
| "loss": 0.1636, |
| "step": 6750 |
| }, |
| { |
| "epoch": 14.51, |
| "learning_rate": 0.00021387931034482757, |
| "loss": 0.1815, |
| "step": 6760 |
| }, |
| { |
| "epoch": 14.53, |
| "learning_rate": 0.00021375, |
| "loss": 0.1663, |
| "step": 6770 |
| }, |
| { |
| "epoch": 14.55, |
| "learning_rate": 0.0002136206896551724, |
| "loss": 0.1627, |
| "step": 6780 |
| }, |
| { |
| "epoch": 14.57, |
| "learning_rate": 0.00021349137931034483, |
| "loss": 0.1696, |
| "step": 6790 |
| }, |
| { |
| "epoch": 14.59, |
| "learning_rate": 0.0002133620689655172, |
| "loss": 0.1688, |
| "step": 6800 |
| }, |
| { |
| "epoch": 14.61, |
| "learning_rate": 0.00021323275862068962, |
| "loss": 0.1767, |
| "step": 6810 |
| }, |
| { |
| "epoch": 14.64, |
| "learning_rate": 0.00021310344827586204, |
| "loss": 0.1775, |
| "step": 6820 |
| }, |
| { |
| "epoch": 14.66, |
| "learning_rate": 0.00021297413793103446, |
| "loss": 0.1761, |
| "step": 6830 |
| }, |
| { |
| "epoch": 14.68, |
| "learning_rate": 0.00021284482758620688, |
| "loss": 0.173, |
| "step": 6840 |
| }, |
| { |
| "epoch": 14.7, |
| "learning_rate": 0.0002127155172413793, |
| "loss": 0.1685, |
| "step": 6850 |
| }, |
| { |
| "epoch": 14.72, |
| "learning_rate": 0.00021258620689655172, |
| "loss": 0.18, |
| "step": 6860 |
| }, |
| { |
| "epoch": 14.74, |
| "learning_rate": 0.00021245689655172414, |
| "loss": 0.1693, |
| "step": 6870 |
| }, |
| { |
| "epoch": 14.76, |
| "learning_rate": 0.00021232758620689656, |
| "loss": 0.1698, |
| "step": 6880 |
| }, |
| { |
| "epoch": 14.79, |
| "learning_rate": 0.00021219827586206892, |
| "loss": 0.1693, |
| "step": 6890 |
| }, |
| { |
| "epoch": 14.81, |
| "learning_rate": 0.00021206896551724134, |
| "loss": 0.1776, |
| "step": 6900 |
| }, |
| { |
| "epoch": 14.81, |
| "eval_loss": 0.24745148420333862, |
| "eval_runtime": 664.4524, |
| "eval_samples_per_second": 3.461, |
| "eval_steps_per_second": 0.433, |
| "step": 6900 |
| }, |
| { |
| "epoch": 14.83, |
| "learning_rate": 0.00021193965517241376, |
| "loss": 0.1721, |
| "step": 6910 |
| }, |
| { |
| "epoch": 14.85, |
| "learning_rate": 0.00021181034482758618, |
| "loss": 0.1668, |
| "step": 6920 |
| }, |
| { |
| "epoch": 14.87, |
| "learning_rate": 0.0002116810344827586, |
| "loss": 0.1688, |
| "step": 6930 |
| }, |
| { |
| "epoch": 14.89, |
| "learning_rate": 0.00021155172413793102, |
| "loss": 0.1877, |
| "step": 6940 |
| }, |
| { |
| "epoch": 14.91, |
| "learning_rate": 0.00021142241379310344, |
| "loss": 0.1643, |
| "step": 6950 |
| }, |
| { |
| "epoch": 14.94, |
| "learning_rate": 0.00021129310344827586, |
| "loss": 0.1651, |
| "step": 6960 |
| }, |
| { |
| "epoch": 14.96, |
| "learning_rate": 0.00021116379310344823, |
| "loss": 0.1672, |
| "step": 6970 |
| }, |
| { |
| "epoch": 14.98, |
| "learning_rate": 0.00021103448275862065, |
| "loss": 0.1697, |
| "step": 6980 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 0.00021090517241379307, |
| "loss": 0.1619, |
| "step": 6990 |
| }, |
| { |
| "epoch": 15.02, |
| "learning_rate": 0.0002107758620689655, |
| "loss": 0.1643, |
| "step": 7000 |
| }, |
| { |
| "epoch": 15.04, |
| "learning_rate": 0.0002106465517241379, |
| "loss": 0.1576, |
| "step": 7010 |
| }, |
| { |
| "epoch": 15.06, |
| "learning_rate": 0.00021051724137931033, |
| "loss": 0.1561, |
| "step": 7020 |
| }, |
| { |
| "epoch": 15.09, |
| "learning_rate": 0.00021038793103448275, |
| "loss": 0.1548, |
| "step": 7030 |
| }, |
| { |
| "epoch": 15.11, |
| "learning_rate": 0.00021025862068965517, |
| "loss": 0.149, |
| "step": 7040 |
| }, |
| { |
| "epoch": 15.13, |
| "learning_rate": 0.0002101293103448276, |
| "loss": 0.1555, |
| "step": 7050 |
| }, |
| { |
| "epoch": 15.15, |
| "learning_rate": 0.00020999999999999998, |
| "loss": 0.1436, |
| "step": 7060 |
| }, |
| { |
| "epoch": 15.17, |
| "learning_rate": 0.00020987068965517237, |
| "loss": 0.1544, |
| "step": 7070 |
| }, |
| { |
| "epoch": 15.19, |
| "learning_rate": 0.0002097413793103448, |
| "loss": 0.1596, |
| "step": 7080 |
| }, |
| { |
| "epoch": 15.21, |
| "learning_rate": 0.00020961206896551721, |
| "loss": 0.1535, |
| "step": 7090 |
| }, |
| { |
| "epoch": 15.24, |
| "learning_rate": 0.00020948275862068963, |
| "loss": 0.145, |
| "step": 7100 |
| }, |
| { |
| "epoch": 15.26, |
| "learning_rate": 0.00020935344827586205, |
| "loss": 0.1606, |
| "step": 7110 |
| }, |
| { |
| "epoch": 15.28, |
| "learning_rate": 0.00020922413793103447, |
| "loss": 0.1526, |
| "step": 7120 |
| }, |
| { |
| "epoch": 15.3, |
| "learning_rate": 0.0002090948275862069, |
| "loss": 0.1652, |
| "step": 7130 |
| }, |
| { |
| "epoch": 15.32, |
| "learning_rate": 0.0002089655172413793, |
| "loss": 0.1654, |
| "step": 7140 |
| }, |
| { |
| "epoch": 15.34, |
| "learning_rate": 0.0002088362068965517, |
| "loss": 0.1564, |
| "step": 7150 |
| }, |
| { |
| "epoch": 15.36, |
| "learning_rate": 0.0002087068965517241, |
| "loss": 0.1594, |
| "step": 7160 |
| }, |
| { |
| "epoch": 15.39, |
| "learning_rate": 0.00020857758620689652, |
| "loss": 0.1665, |
| "step": 7170 |
| }, |
| { |
| "epoch": 15.41, |
| "learning_rate": 0.00020844827586206894, |
| "loss": 0.1598, |
| "step": 7180 |
| }, |
| { |
| "epoch": 15.43, |
| "learning_rate": 0.00020831896551724136, |
| "loss": 0.1597, |
| "step": 7190 |
| }, |
| { |
| "epoch": 15.45, |
| "learning_rate": 0.00020818965517241378, |
| "loss": 0.1645, |
| "step": 7200 |
| }, |
| { |
| "epoch": 15.45, |
| "eval_loss": 0.24765853583812714, |
| "eval_runtime": 666.9815, |
| "eval_samples_per_second": 3.448, |
| "eval_steps_per_second": 0.432, |
| "step": 7200 |
| }, |
| { |
| "epoch": 15.47, |
| "learning_rate": 0.0002080603448275862, |
| "loss": 0.1484, |
| "step": 7210 |
| }, |
| { |
| "epoch": 15.49, |
| "learning_rate": 0.00020793103448275862, |
| "loss": 0.1556, |
| "step": 7220 |
| }, |
| { |
| "epoch": 15.52, |
| "learning_rate": 0.000207801724137931, |
| "loss": 0.1516, |
| "step": 7230 |
| }, |
| { |
| "epoch": 15.54, |
| "learning_rate": 0.00020767241379310343, |
| "loss": 0.1528, |
| "step": 7240 |
| }, |
| { |
| "epoch": 15.56, |
| "learning_rate": 0.00020754310344827583, |
| "loss": 0.1648, |
| "step": 7250 |
| }, |
| { |
| "epoch": 15.58, |
| "learning_rate": 0.00020741379310344825, |
| "loss": 0.1543, |
| "step": 7260 |
| }, |
| { |
| "epoch": 15.6, |
| "learning_rate": 0.00020728448275862067, |
| "loss": 0.1504, |
| "step": 7270 |
| }, |
| { |
| "epoch": 15.62, |
| "learning_rate": 0.00020715517241379309, |
| "loss": 0.1441, |
| "step": 7280 |
| }, |
| { |
| "epoch": 15.64, |
| "learning_rate": 0.0002070258620689655, |
| "loss": 0.162, |
| "step": 7290 |
| }, |
| { |
| "epoch": 15.67, |
| "learning_rate": 0.00020689655172413793, |
| "loss": 0.1444, |
| "step": 7300 |
| }, |
| { |
| "epoch": 15.69, |
| "learning_rate": 0.00020676724137931032, |
| "loss": 0.1478, |
| "step": 7310 |
| }, |
| { |
| "epoch": 15.71, |
| "learning_rate": 0.00020663793103448274, |
| "loss": 0.1672, |
| "step": 7320 |
| }, |
| { |
| "epoch": 15.73, |
| "learning_rate": 0.00020650862068965516, |
| "loss": 0.155, |
| "step": 7330 |
| }, |
| { |
| "epoch": 15.75, |
| "learning_rate": 0.00020637931034482755, |
| "loss": 0.1546, |
| "step": 7340 |
| }, |
| { |
| "epoch": 15.77, |
| "learning_rate": 0.00020624999999999997, |
| "loss": 0.1654, |
| "step": 7350 |
| }, |
| { |
| "epoch": 15.79, |
| "learning_rate": 0.0002061206896551724, |
| "loss": 0.1686, |
| "step": 7360 |
| }, |
| { |
| "epoch": 15.82, |
| "learning_rate": 0.0002059913793103448, |
| "loss": 0.1592, |
| "step": 7370 |
| }, |
| { |
| "epoch": 15.84, |
| "learning_rate": 0.00020586206896551723, |
| "loss": 0.1664, |
| "step": 7380 |
| }, |
| { |
| "epoch": 15.86, |
| "learning_rate": 0.00020573275862068965, |
| "loss": 0.1559, |
| "step": 7390 |
| }, |
| { |
| "epoch": 15.88, |
| "learning_rate": 0.00020560344827586204, |
| "loss": 0.1663, |
| "step": 7400 |
| }, |
| { |
| "epoch": 15.9, |
| "learning_rate": 0.00020547413793103446, |
| "loss": 0.1539, |
| "step": 7410 |
| }, |
| { |
| "epoch": 15.92, |
| "learning_rate": 0.00020534482758620688, |
| "loss": 0.1556, |
| "step": 7420 |
| }, |
| { |
| "epoch": 15.94, |
| "learning_rate": 0.00020521551724137928, |
| "loss": 0.1671, |
| "step": 7430 |
| }, |
| { |
| "epoch": 15.97, |
| "learning_rate": 0.0002050862068965517, |
| "loss": 0.1516, |
| "step": 7440 |
| }, |
| { |
| "epoch": 15.99, |
| "learning_rate": 0.00020495689655172412, |
| "loss": 0.1608, |
| "step": 7450 |
| }, |
| { |
| "epoch": 16.01, |
| "learning_rate": 0.00020482758620689654, |
| "loss": 0.1714, |
| "step": 7460 |
| }, |
| { |
| "epoch": 16.03, |
| "learning_rate": 0.00020469827586206896, |
| "loss": 0.1403, |
| "step": 7470 |
| }, |
| { |
| "epoch": 16.05, |
| "learning_rate": 0.00020456896551724135, |
| "loss": 0.142, |
| "step": 7480 |
| }, |
| { |
| "epoch": 16.07, |
| "learning_rate": 0.00020443965517241377, |
| "loss": 0.1429, |
| "step": 7490 |
| }, |
| { |
| "epoch": 16.09, |
| "learning_rate": 0.0002043103448275862, |
| "loss": 0.1586, |
| "step": 7500 |
| }, |
| { |
| "epoch": 16.09, |
| "eval_loss": 0.24779050052165985, |
| "eval_runtime": 701.4097, |
| "eval_samples_per_second": 3.279, |
| "eval_steps_per_second": 0.411, |
| "step": 7500 |
| }, |
| { |
| "epoch": 16.12, |
| "learning_rate": 0.0002041810344827586, |
| "loss": 0.1495, |
| "step": 7510 |
| }, |
| { |
| "epoch": 16.14, |
| "learning_rate": 0.000204051724137931, |
| "loss": 0.141, |
| "step": 7520 |
| }, |
| { |
| "epoch": 16.16, |
| "learning_rate": 0.00020392241379310342, |
| "loss": 0.1535, |
| "step": 7530 |
| }, |
| { |
| "epoch": 16.18, |
| "learning_rate": 0.00020379310344827584, |
| "loss": 0.1507, |
| "step": 7540 |
| }, |
| { |
| "epoch": 16.2, |
| "learning_rate": 0.00020366379310344826, |
| "loss": 0.153, |
| "step": 7550 |
| }, |
| { |
| "epoch": 16.22, |
| "learning_rate": 0.00020353448275862068, |
| "loss": 0.1403, |
| "step": 7560 |
| }, |
| { |
| "epoch": 16.24, |
| "learning_rate": 0.00020340517241379308, |
| "loss": 0.1719, |
| "step": 7570 |
| }, |
| { |
| "epoch": 16.27, |
| "learning_rate": 0.0002032758620689655, |
| "loss": 0.1544, |
| "step": 7580 |
| }, |
| { |
| "epoch": 16.29, |
| "learning_rate": 0.00020314655172413792, |
| "loss": 0.1517, |
| "step": 7590 |
| }, |
| { |
| "epoch": 16.31, |
| "learning_rate": 0.00020301724137931034, |
| "loss": 0.1559, |
| "step": 7600 |
| }, |
| { |
| "epoch": 16.33, |
| "learning_rate": 0.00020288793103448273, |
| "loss": 0.1535, |
| "step": 7610 |
| }, |
| { |
| "epoch": 16.35, |
| "learning_rate": 0.00020275862068965515, |
| "loss": 0.1444, |
| "step": 7620 |
| }, |
| { |
| "epoch": 16.37, |
| "learning_rate": 0.00020262931034482757, |
| "loss": 0.1718, |
| "step": 7630 |
| }, |
| { |
| "epoch": 16.39, |
| "learning_rate": 0.0002025, |
| "loss": 0.1462, |
| "step": 7640 |
| }, |
| { |
| "epoch": 16.42, |
| "learning_rate": 0.00020237068965517238, |
| "loss": 0.1567, |
| "step": 7650 |
| }, |
| { |
| "epoch": 16.44, |
| "learning_rate": 0.0002022413793103448, |
| "loss": 0.1608, |
| "step": 7660 |
| }, |
| { |
| "epoch": 16.46, |
| "learning_rate": 0.00020211206896551722, |
| "loss": 0.1713, |
| "step": 7670 |
| }, |
| { |
| "epoch": 16.48, |
| "learning_rate": 0.00020198275862068964, |
| "loss": 0.1564, |
| "step": 7680 |
| }, |
| { |
| "epoch": 16.5, |
| "learning_rate": 0.00020185344827586206, |
| "loss": 0.1566, |
| "step": 7690 |
| }, |
| { |
| "epoch": 16.52, |
| "learning_rate": 0.00020172413793103448, |
| "loss": 0.1384, |
| "step": 7700 |
| }, |
| { |
| "epoch": 16.55, |
| "learning_rate": 0.00020159482758620687, |
| "loss": 0.1522, |
| "step": 7710 |
| }, |
| { |
| "epoch": 16.57, |
| "learning_rate": 0.0002014655172413793, |
| "loss": 0.1599, |
| "step": 7720 |
| }, |
| { |
| "epoch": 16.59, |
| "learning_rate": 0.00020133620689655171, |
| "loss": 0.1548, |
| "step": 7730 |
| }, |
| { |
| "epoch": 16.61, |
| "learning_rate": 0.0002012068965517241, |
| "loss": 0.154, |
| "step": 7740 |
| }, |
| { |
| "epoch": 16.63, |
| "learning_rate": 0.00020107758620689653, |
| "loss": 0.1502, |
| "step": 7750 |
| }, |
| { |
| "epoch": 16.65, |
| "learning_rate": 0.00020094827586206895, |
| "loss": 0.1554, |
| "step": 7760 |
| }, |
| { |
| "epoch": 16.67, |
| "learning_rate": 0.00020081896551724137, |
| "loss": 0.1432, |
| "step": 7770 |
| }, |
| { |
| "epoch": 16.7, |
| "learning_rate": 0.0002006896551724138, |
| "loss": 0.1644, |
| "step": 7780 |
| }, |
| { |
| "epoch": 16.72, |
| "learning_rate": 0.0002005603448275862, |
| "loss": 0.1601, |
| "step": 7790 |
| }, |
| { |
| "epoch": 16.74, |
| "learning_rate": 0.0002004310344827586, |
| "loss": 0.1434, |
| "step": 7800 |
| }, |
| { |
| "epoch": 16.74, |
| "eval_loss": 0.24495865404605865, |
| "eval_runtime": 700.6557, |
| "eval_samples_per_second": 3.283, |
| "eval_steps_per_second": 0.411, |
| "step": 7800 |
| }, |
| { |
| "epoch": 16.76, |
| "learning_rate": 0.00020030172413793102, |
| "loss": 0.1649, |
| "step": 7810 |
| }, |
| { |
| "epoch": 16.78, |
| "learning_rate": 0.0002001724137931034, |
| "loss": 0.156, |
| "step": 7820 |
| }, |
| { |
| "epoch": 16.8, |
| "learning_rate": 0.00020004310344827583, |
| "loss": 0.1596, |
| "step": 7830 |
| }, |
| { |
| "epoch": 16.82, |
| "learning_rate": 0.00019991379310344825, |
| "loss": 0.1562, |
| "step": 7840 |
| }, |
| { |
| "epoch": 16.85, |
| "learning_rate": 0.00019978448275862067, |
| "loss": 0.1445, |
| "step": 7850 |
| }, |
| { |
| "epoch": 16.87, |
| "learning_rate": 0.0001996551724137931, |
| "loss": 0.165, |
| "step": 7860 |
| }, |
| { |
| "epoch": 16.89, |
| "learning_rate": 0.0001995258620689655, |
| "loss": 0.1407, |
| "step": 7870 |
| }, |
| { |
| "epoch": 16.91, |
| "learning_rate": 0.00019939655172413793, |
| "loss": 0.1613, |
| "step": 7880 |
| }, |
| { |
| "epoch": 16.93, |
| "learning_rate": 0.00019926724137931033, |
| "loss": 0.1726, |
| "step": 7890 |
| }, |
| { |
| "epoch": 16.95, |
| "learning_rate": 0.00019913793103448275, |
| "loss": 0.163, |
| "step": 7900 |
| }, |
| { |
| "epoch": 16.97, |
| "learning_rate": 0.00019900862068965514, |
| "loss": 0.1601, |
| "step": 7910 |
| }, |
| { |
| "epoch": 17.0, |
| "learning_rate": 0.00019887931034482756, |
| "loss": 0.1667, |
| "step": 7920 |
| }, |
| { |
| "epoch": 17.02, |
| "learning_rate": 0.00019874999999999998, |
| "loss": 0.1466, |
| "step": 7930 |
| }, |
| { |
| "epoch": 17.04, |
| "learning_rate": 0.0001986206896551724, |
| "loss": 0.1312, |
| "step": 7940 |
| }, |
| { |
| "epoch": 17.06, |
| "learning_rate": 0.00019849137931034482, |
| "loss": 0.1368, |
| "step": 7950 |
| }, |
| { |
| "epoch": 17.08, |
| "learning_rate": 0.00019836206896551724, |
| "loss": 0.1357, |
| "step": 7960 |
| }, |
| { |
| "epoch": 17.1, |
| "learning_rate": 0.00019823275862068966, |
| "loss": 0.1432, |
| "step": 7970 |
| }, |
| { |
| "epoch": 17.12, |
| "learning_rate": 0.00019810344827586205, |
| "loss": 0.1354, |
| "step": 7980 |
| }, |
| { |
| "epoch": 17.15, |
| "learning_rate": 0.00019797413793103444, |
| "loss": 0.1546, |
| "step": 7990 |
| }, |
| { |
| "epoch": 17.17, |
| "learning_rate": 0.00019784482758620686, |
| "loss": 0.1307, |
| "step": 8000 |
| }, |
| { |
| "epoch": 17.19, |
| "learning_rate": 0.00019771551724137928, |
| "loss": 0.1379, |
| "step": 8010 |
| }, |
| { |
| "epoch": 17.21, |
| "learning_rate": 0.0001975862068965517, |
| "loss": 0.1458, |
| "step": 8020 |
| }, |
| { |
| "epoch": 17.23, |
| "learning_rate": 0.00019745689655172412, |
| "loss": 0.1464, |
| "step": 8030 |
| }, |
| { |
| "epoch": 17.25, |
| "learning_rate": 0.00019732758620689654, |
| "loss": 0.1579, |
| "step": 8040 |
| }, |
| { |
| "epoch": 17.27, |
| "learning_rate": 0.00019719827586206896, |
| "loss": 0.1436, |
| "step": 8050 |
| }, |
| { |
| "epoch": 17.3, |
| "learning_rate": 0.00019706896551724138, |
| "loss": 0.1681, |
| "step": 8060 |
| }, |
| { |
| "epoch": 17.32, |
| "learning_rate": 0.00019693965517241378, |
| "loss": 0.1513, |
| "step": 8070 |
| }, |
| { |
| "epoch": 17.34, |
| "learning_rate": 0.00019681034482758617, |
| "loss": 0.1521, |
| "step": 8080 |
| }, |
| { |
| "epoch": 17.36, |
| "learning_rate": 0.0001966810344827586, |
| "loss": 0.1356, |
| "step": 8090 |
| }, |
| { |
| "epoch": 17.38, |
| "learning_rate": 0.000196551724137931, |
| "loss": 0.1468, |
| "step": 8100 |
| }, |
| { |
| "epoch": 17.38, |
| "eval_loss": 0.24360989034175873, |
| "eval_runtime": 701.7588, |
| "eval_samples_per_second": 3.277, |
| "eval_steps_per_second": 0.41, |
| "step": 8100 |
| }, |
| { |
| "epoch": 17.4, |
| "learning_rate": 0.00019642241379310343, |
| "loss": 0.1523, |
| "step": 8110 |
| }, |
| { |
| "epoch": 17.42, |
| "learning_rate": 0.00019629310344827585, |
| "loss": 0.1335, |
| "step": 8120 |
| }, |
| { |
| "epoch": 17.45, |
| "learning_rate": 0.00019616379310344827, |
| "loss": 0.1431, |
| "step": 8130 |
| }, |
| { |
| "epoch": 17.47, |
| "learning_rate": 0.0001960344827586207, |
| "loss": 0.152, |
| "step": 8140 |
| }, |
| { |
| "epoch": 17.49, |
| "learning_rate": 0.0001959051724137931, |
| "loss": 0.1443, |
| "step": 8150 |
| }, |
| { |
| "epoch": 17.51, |
| "learning_rate": 0.0001957758620689655, |
| "loss": 0.1586, |
| "step": 8160 |
| }, |
| { |
| "epoch": 17.53, |
| "learning_rate": 0.0001956465517241379, |
| "loss": 0.1423, |
| "step": 8170 |
| }, |
| { |
| "epoch": 17.55, |
| "learning_rate": 0.00019551724137931032, |
| "loss": 0.1314, |
| "step": 8180 |
| }, |
| { |
| "epoch": 17.58, |
| "learning_rate": 0.00019538793103448274, |
| "loss": 0.1356, |
| "step": 8190 |
| }, |
| { |
| "epoch": 17.6, |
| "learning_rate": 0.00019525862068965516, |
| "loss": 0.1508, |
| "step": 8200 |
| }, |
| { |
| "epoch": 17.62, |
| "learning_rate": 0.00019512931034482758, |
| "loss": 0.1655, |
| "step": 8210 |
| }, |
| { |
| "epoch": 17.64, |
| "learning_rate": 0.000195, |
| "loss": 0.1443, |
| "step": 8220 |
| }, |
| { |
| "epoch": 17.66, |
| "learning_rate": 0.00019487068965517242, |
| "loss": 0.1518, |
| "step": 8230 |
| }, |
| { |
| "epoch": 17.68, |
| "learning_rate": 0.00019474137931034484, |
| "loss": 0.1586, |
| "step": 8240 |
| }, |
| { |
| "epoch": 17.7, |
| "learning_rate": 0.0001946120689655172, |
| "loss": 0.1459, |
| "step": 8250 |
| }, |
| { |
| "epoch": 17.73, |
| "learning_rate": 0.00019448275862068962, |
| "loss": 0.1466, |
| "step": 8260 |
| }, |
| { |
| "epoch": 17.75, |
| "learning_rate": 0.00019435344827586204, |
| "loss": 0.1336, |
| "step": 8270 |
| }, |
| { |
| "epoch": 17.77, |
| "learning_rate": 0.00019422413793103446, |
| "loss": 0.1522, |
| "step": 8280 |
| }, |
| { |
| "epoch": 17.79, |
| "learning_rate": 0.00019409482758620688, |
| "loss": 0.1589, |
| "step": 8290 |
| }, |
| { |
| "epoch": 17.81, |
| "learning_rate": 0.0001939655172413793, |
| "loss": 0.1497, |
| "step": 8300 |
| }, |
| { |
| "epoch": 17.83, |
| "learning_rate": 0.00019383620689655172, |
| "loss": 0.1363, |
| "step": 8310 |
| }, |
| { |
| "epoch": 17.85, |
| "learning_rate": 0.00019370689655172414, |
| "loss": 0.1529, |
| "step": 8320 |
| }, |
| { |
| "epoch": 17.88, |
| "learning_rate": 0.00019357758620689656, |
| "loss": 0.1609, |
| "step": 8330 |
| }, |
| { |
| "epoch": 17.9, |
| "learning_rate": 0.00019344827586206893, |
| "loss": 0.1446, |
| "step": 8340 |
| }, |
| { |
| "epoch": 17.92, |
| "learning_rate": 0.00019331896551724135, |
| "loss": 0.1655, |
| "step": 8350 |
| }, |
| { |
| "epoch": 17.94, |
| "learning_rate": 0.00019318965517241377, |
| "loss": 0.1559, |
| "step": 8360 |
| }, |
| { |
| "epoch": 17.96, |
| "learning_rate": 0.0001930603448275862, |
| "loss": 0.1672, |
| "step": 8370 |
| }, |
| { |
| "epoch": 17.98, |
| "learning_rate": 0.0001929310344827586, |
| "loss": 0.1556, |
| "step": 8380 |
| }, |
| { |
| "epoch": 18.0, |
| "learning_rate": 0.00019280172413793103, |
| "loss": 0.1601, |
| "step": 8390 |
| }, |
| { |
| "epoch": 18.03, |
| "learning_rate": 0.00019267241379310345, |
| "loss": 0.1474, |
| "step": 8400 |
| }, |
| { |
| "epoch": 18.03, |
| "eval_loss": 0.24331876635551453, |
| "eval_runtime": 702.0821, |
| "eval_samples_per_second": 3.276, |
| "eval_steps_per_second": 0.41, |
| "step": 8400 |
| }, |
| { |
| "epoch": 18.05, |
| "learning_rate": 0.00019254310344827587, |
| "loss": 0.1252, |
| "step": 8410 |
| }, |
| { |
| "epoch": 18.07, |
| "learning_rate": 0.00019241379310344823, |
| "loss": 0.1316, |
| "step": 8420 |
| }, |
| { |
| "epoch": 18.09, |
| "learning_rate": 0.00019228448275862065, |
| "loss": 0.1449, |
| "step": 8430 |
| }, |
| { |
| "epoch": 18.11, |
| "learning_rate": 0.00019215517241379307, |
| "loss": 0.1487, |
| "step": 8440 |
| }, |
| { |
| "epoch": 18.13, |
| "learning_rate": 0.0001920258620689655, |
| "loss": 0.1426, |
| "step": 8450 |
| }, |
| { |
| "epoch": 18.15, |
| "learning_rate": 0.00019189655172413791, |
| "loss": 0.1359, |
| "step": 8460 |
| }, |
| { |
| "epoch": 18.18, |
| "learning_rate": 0.00019176724137931033, |
| "loss": 0.1491, |
| "step": 8470 |
| }, |
| { |
| "epoch": 18.2, |
| "learning_rate": 0.00019163793103448275, |
| "loss": 0.1315, |
| "step": 8480 |
| }, |
| { |
| "epoch": 18.22, |
| "learning_rate": 0.00019150862068965517, |
| "loss": 0.1515, |
| "step": 8490 |
| }, |
| { |
| "epoch": 18.24, |
| "learning_rate": 0.0001913793103448276, |
| "loss": 0.1311, |
| "step": 8500 |
| }, |
| { |
| "epoch": 18.26, |
| "learning_rate": 0.00019124999999999996, |
| "loss": 0.1258, |
| "step": 8510 |
| }, |
| { |
| "epoch": 18.28, |
| "learning_rate": 0.00019112068965517238, |
| "loss": 0.1348, |
| "step": 8520 |
| }, |
| { |
| "epoch": 18.3, |
| "learning_rate": 0.0001909913793103448, |
| "loss": 0.1434, |
| "step": 8530 |
| }, |
| { |
| "epoch": 18.33, |
| "learning_rate": 0.00019086206896551722, |
| "loss": 0.1474, |
| "step": 8540 |
| }, |
| { |
| "epoch": 18.35, |
| "learning_rate": 0.00019073275862068964, |
| "loss": 0.1474, |
| "step": 8550 |
| }, |
| { |
| "epoch": 18.37, |
| "learning_rate": 0.00019060344827586206, |
| "loss": 0.1537, |
| "step": 8560 |
| }, |
| { |
| "epoch": 18.39, |
| "learning_rate": 0.00019047413793103448, |
| "loss": 0.1423, |
| "step": 8570 |
| }, |
| { |
| "epoch": 18.41, |
| "learning_rate": 0.0001903448275862069, |
| "loss": 0.1582, |
| "step": 8580 |
| }, |
| { |
| "epoch": 18.43, |
| "learning_rate": 0.00019021551724137927, |
| "loss": 0.135, |
| "step": 8590 |
| }, |
| { |
| "epoch": 18.45, |
| "learning_rate": 0.00019008620689655169, |
| "loss": 0.137, |
| "step": 8600 |
| }, |
| { |
| "epoch": 18.48, |
| "learning_rate": 0.0001899568965517241, |
| "loss": 0.1336, |
| "step": 8610 |
| }, |
| { |
| "epoch": 18.5, |
| "learning_rate": 0.00018982758620689653, |
| "loss": 0.1551, |
| "step": 8620 |
| }, |
| { |
| "epoch": 18.52, |
| "learning_rate": 0.00018969827586206895, |
| "loss": 0.1365, |
| "step": 8630 |
| }, |
| { |
| "epoch": 18.54, |
| "learning_rate": 0.00018956896551724137, |
| "loss": 0.1297, |
| "step": 8640 |
| }, |
| { |
| "epoch": 18.56, |
| "learning_rate": 0.00018943965517241379, |
| "loss": 0.144, |
| "step": 8650 |
| }, |
| { |
| "epoch": 18.58, |
| "learning_rate": 0.0001893103448275862, |
| "loss": 0.1445, |
| "step": 8660 |
| }, |
| { |
| "epoch": 18.61, |
| "learning_rate": 0.00018918103448275863, |
| "loss": 0.1405, |
| "step": 8670 |
| }, |
| { |
| "epoch": 18.63, |
| "learning_rate": 0.000189051724137931, |
| "loss": 0.1427, |
| "step": 8680 |
| }, |
| { |
| "epoch": 18.65, |
| "learning_rate": 0.0001889224137931034, |
| "loss": 0.1319, |
| "step": 8690 |
| }, |
| { |
| "epoch": 18.67, |
| "learning_rate": 0.00018879310344827583, |
| "loss": 0.1425, |
| "step": 8700 |
| }, |
| { |
| "epoch": 18.67, |
| "eval_loss": 0.2412930428981781, |
| "eval_runtime": 699.0175, |
| "eval_samples_per_second": 3.29, |
| "eval_steps_per_second": 0.412, |
| "step": 8700 |
| }, |
| { |
| "epoch": 18.69, |
| "learning_rate": 0.00018866379310344825, |
| "loss": 0.146, |
| "step": 8710 |
| }, |
| { |
| "epoch": 18.71, |
| "learning_rate": 0.00018853448275862067, |
| "loss": 0.1516, |
| "step": 8720 |
| }, |
| { |
| "epoch": 18.73, |
| "learning_rate": 0.0001884051724137931, |
| "loss": 0.1346, |
| "step": 8730 |
| }, |
| { |
| "epoch": 18.76, |
| "learning_rate": 0.0001882758620689655, |
| "loss": 0.14, |
| "step": 8740 |
| }, |
| { |
| "epoch": 18.78, |
| "learning_rate": 0.00018814655172413793, |
| "loss": 0.1342, |
| "step": 8750 |
| }, |
| { |
| "epoch": 18.8, |
| "learning_rate": 0.00018801724137931032, |
| "loss": 0.1529, |
| "step": 8760 |
| }, |
| { |
| "epoch": 18.82, |
| "learning_rate": 0.00018788793103448274, |
| "loss": 0.1355, |
| "step": 8770 |
| }, |
| { |
| "epoch": 18.84, |
| "learning_rate": 0.00018775862068965514, |
| "loss": 0.1333, |
| "step": 8780 |
| }, |
| { |
| "epoch": 18.86, |
| "learning_rate": 0.00018762931034482756, |
| "loss": 0.1501, |
| "step": 8790 |
| }, |
| { |
| "epoch": 18.88, |
| "learning_rate": 0.00018749999999999998, |
| "loss": 0.1513, |
| "step": 8800 |
| }, |
| { |
| "epoch": 18.91, |
| "learning_rate": 0.0001873706896551724, |
| "loss": 0.1459, |
| "step": 8810 |
| }, |
| { |
| "epoch": 18.93, |
| "learning_rate": 0.00018724137931034482, |
| "loss": 0.1342, |
| "step": 8820 |
| }, |
| { |
| "epoch": 18.95, |
| "learning_rate": 0.00018711206896551724, |
| "loss": 0.1409, |
| "step": 8830 |
| }, |
| { |
| "epoch": 18.97, |
| "learning_rate": 0.00018698275862068966, |
| "loss": 0.1373, |
| "step": 8840 |
| }, |
| { |
| "epoch": 18.99, |
| "learning_rate": 0.00018685344827586205, |
| "loss": 0.1442, |
| "step": 8850 |
| }, |
| { |
| "epoch": 19.01, |
| "learning_rate": 0.00018672413793103447, |
| "loss": 0.1359, |
| "step": 8860 |
| }, |
| { |
| "epoch": 19.03, |
| "learning_rate": 0.00018659482758620686, |
| "loss": 0.1371, |
| "step": 8870 |
| }, |
| { |
| "epoch": 19.06, |
| "learning_rate": 0.00018646551724137928, |
| "loss": 0.124, |
| "step": 8880 |
| }, |
| { |
| "epoch": 19.08, |
| "learning_rate": 0.0001863362068965517, |
| "loss": 0.1314, |
| "step": 8890 |
| }, |
| { |
| "epoch": 19.1, |
| "learning_rate": 0.00018620689655172412, |
| "loss": 0.1369, |
| "step": 8900 |
| }, |
| { |
| "epoch": 19.12, |
| "learning_rate": 0.00018607758620689654, |
| "loss": 0.1416, |
| "step": 8910 |
| }, |
| { |
| "epoch": 19.14, |
| "learning_rate": 0.00018594827586206896, |
| "loss": 0.1432, |
| "step": 8920 |
| }, |
| { |
| "epoch": 19.16, |
| "learning_rate": 0.00018581896551724136, |
| "loss": 0.1359, |
| "step": 8930 |
| }, |
| { |
| "epoch": 19.18, |
| "learning_rate": 0.00018568965517241378, |
| "loss": 0.1262, |
| "step": 8940 |
| }, |
| { |
| "epoch": 19.21, |
| "learning_rate": 0.0001855603448275862, |
| "loss": 0.1113, |
| "step": 8950 |
| }, |
| { |
| "epoch": 19.23, |
| "learning_rate": 0.0001854310344827586, |
| "loss": 0.1287, |
| "step": 8960 |
| }, |
| { |
| "epoch": 19.25, |
| "learning_rate": 0.000185301724137931, |
| "loss": 0.1329, |
| "step": 8970 |
| }, |
| { |
| "epoch": 19.27, |
| "learning_rate": 0.00018517241379310343, |
| "loss": 0.1245, |
| "step": 8980 |
| }, |
| { |
| "epoch": 19.29, |
| "learning_rate": 0.00018504310344827585, |
| "loss": 0.1259, |
| "step": 8990 |
| }, |
| { |
| "epoch": 19.31, |
| "learning_rate": 0.00018491379310344827, |
| "loss": 0.1346, |
| "step": 9000 |
| }, |
| { |
| "epoch": 19.31, |
| "eval_loss": 0.24312053620815277, |
| "eval_runtime": 698.4404, |
| "eval_samples_per_second": 3.293, |
| "eval_steps_per_second": 0.412, |
| "step": 9000 |
| }, |
| { |
| "epoch": 19.33, |
| "learning_rate": 0.0001847844827586207, |
| "loss": 0.1292, |
| "step": 9010 |
| }, |
| { |
| "epoch": 19.36, |
| "learning_rate": 0.00018465517241379308, |
| "loss": 0.1301, |
| "step": 9020 |
| }, |
| { |
| "epoch": 19.38, |
| "learning_rate": 0.0001845258620689655, |
| "loss": 0.1402, |
| "step": 9030 |
| }, |
| { |
| "epoch": 19.4, |
| "learning_rate": 0.00018439655172413792, |
| "loss": 0.1383, |
| "step": 9040 |
| }, |
| { |
| "epoch": 19.42, |
| "learning_rate": 0.00018426724137931031, |
| "loss": 0.1343, |
| "step": 9050 |
| }, |
| { |
| "epoch": 19.44, |
| "learning_rate": 0.00018413793103448273, |
| "loss": 0.1299, |
| "step": 9060 |
| }, |
| { |
| "epoch": 19.46, |
| "learning_rate": 0.00018400862068965515, |
| "loss": 0.1326, |
| "step": 9070 |
| }, |
| { |
| "epoch": 19.48, |
| "learning_rate": 0.00018387931034482757, |
| "loss": 0.1439, |
| "step": 9080 |
| }, |
| { |
| "epoch": 19.51, |
| "learning_rate": 0.00018375, |
| "loss": 0.1357, |
| "step": 9090 |
| }, |
| { |
| "epoch": 19.53, |
| "learning_rate": 0.0001836206896551724, |
| "loss": 0.1316, |
| "step": 9100 |
| }, |
| { |
| "epoch": 19.55, |
| "learning_rate": 0.0001834913793103448, |
| "loss": 0.1471, |
| "step": 9110 |
| }, |
| { |
| "epoch": 19.57, |
| "learning_rate": 0.00018336206896551723, |
| "loss": 0.137, |
| "step": 9120 |
| }, |
| { |
| "epoch": 19.59, |
| "learning_rate": 0.00018323275862068965, |
| "loss": 0.1302, |
| "step": 9130 |
| }, |
| { |
| "epoch": 19.61, |
| "learning_rate": 0.00018310344827586204, |
| "loss": 0.146, |
| "step": 9140 |
| }, |
| { |
| "epoch": 19.64, |
| "learning_rate": 0.00018297413793103446, |
| "loss": 0.1427, |
| "step": 9150 |
| }, |
| { |
| "epoch": 19.66, |
| "learning_rate": 0.00018284482758620688, |
| "loss": 0.1431, |
| "step": 9160 |
| }, |
| { |
| "epoch": 19.68, |
| "learning_rate": 0.0001827155172413793, |
| "loss": 0.1535, |
| "step": 9170 |
| }, |
| { |
| "epoch": 19.7, |
| "learning_rate": 0.00018258620689655172, |
| "loss": 0.1573, |
| "step": 9180 |
| }, |
| { |
| "epoch": 19.72, |
| "learning_rate": 0.0001824568965517241, |
| "loss": 0.1412, |
| "step": 9190 |
| }, |
| { |
| "epoch": 19.74, |
| "learning_rate": 0.00018232758620689653, |
| "loss": 0.1377, |
| "step": 9200 |
| }, |
| { |
| "epoch": 19.76, |
| "learning_rate": 0.00018219827586206895, |
| "loss": 0.137, |
| "step": 9210 |
| }, |
| { |
| "epoch": 19.79, |
| "learning_rate": 0.00018206896551724137, |
| "loss": 0.1385, |
| "step": 9220 |
| }, |
| { |
| "epoch": 19.81, |
| "learning_rate": 0.00018193965517241377, |
| "loss": 0.1224, |
| "step": 9230 |
| }, |
| { |
| "epoch": 19.83, |
| "learning_rate": 0.00018181034482758619, |
| "loss": 0.1443, |
| "step": 9240 |
| }, |
| { |
| "epoch": 19.85, |
| "learning_rate": 0.0001816810344827586, |
| "loss": 0.1326, |
| "step": 9250 |
| }, |
| { |
| "epoch": 19.87, |
| "learning_rate": 0.00018155172413793103, |
| "loss": 0.1249, |
| "step": 9260 |
| }, |
| { |
| "epoch": 19.89, |
| "learning_rate": 0.00018142241379310342, |
| "loss": 0.1447, |
| "step": 9270 |
| }, |
| { |
| "epoch": 19.91, |
| "learning_rate": 0.00018129310344827584, |
| "loss": 0.1464, |
| "step": 9280 |
| }, |
| { |
| "epoch": 19.94, |
| "learning_rate": 0.00018116379310344826, |
| "loss": 0.1273, |
| "step": 9290 |
| }, |
| { |
| "epoch": 19.96, |
| "learning_rate": 0.00018103448275862068, |
| "loss": 0.1485, |
| "step": 9300 |
| }, |
| { |
| "epoch": 19.96, |
| "eval_loss": 0.240739107131958, |
| "eval_runtime": 698.1438, |
| "eval_samples_per_second": 3.294, |
| "eval_steps_per_second": 0.413, |
| "step": 9300 |
| }, |
| { |
| "epoch": 19.98, |
| "learning_rate": 0.0001809051724137931, |
| "loss": 0.1484, |
| "step": 9310 |
| }, |
| { |
| "epoch": 20.0, |
| "learning_rate": 0.0001807758620689655, |
| "loss": 0.1398, |
| "step": 9320 |
| }, |
| { |
| "epoch": 20.02, |
| "learning_rate": 0.0001806465517241379, |
| "loss": 0.1239, |
| "step": 9330 |
| }, |
| { |
| "epoch": 20.04, |
| "learning_rate": 0.00018051724137931033, |
| "loss": 0.1296, |
| "step": 9340 |
| }, |
| { |
| "epoch": 20.06, |
| "learning_rate": 0.00018038793103448275, |
| "loss": 0.122, |
| "step": 9350 |
| }, |
| { |
| "epoch": 20.09, |
| "learning_rate": 0.00018025862068965514, |
| "loss": 0.1353, |
| "step": 9360 |
| }, |
| { |
| "epoch": 20.11, |
| "learning_rate": 0.00018012931034482756, |
| "loss": 0.1152, |
| "step": 9370 |
| }, |
| { |
| "epoch": 20.13, |
| "learning_rate": 0.00017999999999999998, |
| "loss": 0.1367, |
| "step": 9380 |
| }, |
| { |
| "epoch": 20.15, |
| "learning_rate": 0.0001798706896551724, |
| "loss": 0.1367, |
| "step": 9390 |
| }, |
| { |
| "epoch": 20.17, |
| "learning_rate": 0.00017974137931034482, |
| "loss": 0.1274, |
| "step": 9400 |
| }, |
| { |
| "epoch": 20.19, |
| "learning_rate": 0.00017961206896551724, |
| "loss": 0.1285, |
| "step": 9410 |
| }, |
| { |
| "epoch": 20.21, |
| "learning_rate": 0.00017948275862068964, |
| "loss": 0.1322, |
| "step": 9420 |
| }, |
| { |
| "epoch": 20.24, |
| "learning_rate": 0.00017935344827586206, |
| "loss": 0.1453, |
| "step": 9430 |
| }, |
| { |
| "epoch": 20.26, |
| "learning_rate": 0.00017922413793103445, |
| "loss": 0.1247, |
| "step": 9440 |
| }, |
| { |
| "epoch": 20.28, |
| "learning_rate": 0.00017909482758620687, |
| "loss": 0.126, |
| "step": 9450 |
| }, |
| { |
| "epoch": 20.3, |
| "learning_rate": 0.0001789655172413793, |
| "loss": 0.1279, |
| "step": 9460 |
| }, |
| { |
| "epoch": 20.32, |
| "learning_rate": 0.0001788362068965517, |
| "loss": 0.1296, |
| "step": 9470 |
| }, |
| { |
| "epoch": 20.34, |
| "learning_rate": 0.00017870689655172413, |
| "loss": 0.1399, |
| "step": 9480 |
| }, |
| { |
| "epoch": 20.36, |
| "learning_rate": 0.00017857758620689655, |
| "loss": 0.1395, |
| "step": 9490 |
| }, |
| { |
| "epoch": 20.39, |
| "learning_rate": 0.00017844827586206897, |
| "loss": 0.1307, |
| "step": 9500 |
| }, |
| { |
| "epoch": 20.41, |
| "learning_rate": 0.00017831896551724136, |
| "loss": 0.1346, |
| "step": 9510 |
| }, |
| { |
| "epoch": 20.43, |
| "learning_rate": 0.00017818965517241378, |
| "loss": 0.1197, |
| "step": 9520 |
| }, |
| { |
| "epoch": 20.45, |
| "learning_rate": 0.00017806034482758618, |
| "loss": 0.1264, |
| "step": 9530 |
| }, |
| { |
| "epoch": 20.47, |
| "learning_rate": 0.0001779310344827586, |
| "loss": 0.1307, |
| "step": 9540 |
| }, |
| { |
| "epoch": 20.49, |
| "learning_rate": 0.00017780172413793102, |
| "loss": 0.1241, |
| "step": 9550 |
| }, |
| { |
| "epoch": 20.52, |
| "learning_rate": 0.00017767241379310344, |
| "loss": 0.142, |
| "step": 9560 |
| }, |
| { |
| "epoch": 20.54, |
| "learning_rate": 0.00017754310344827586, |
| "loss": 0.1208, |
| "step": 9570 |
| }, |
| { |
| "epoch": 20.56, |
| "learning_rate": 0.00017741379310344828, |
| "loss": 0.1388, |
| "step": 9580 |
| }, |
| { |
| "epoch": 20.58, |
| "learning_rate": 0.0001772844827586207, |
| "loss": 0.1401, |
| "step": 9590 |
| }, |
| { |
| "epoch": 20.6, |
| "learning_rate": 0.0001771551724137931, |
| "loss": 0.1369, |
| "step": 9600 |
| }, |
| { |
| "epoch": 20.6, |
| "eval_loss": 0.2398538887500763, |
| "eval_runtime": 728.8049, |
| "eval_samples_per_second": 3.156, |
| "eval_steps_per_second": 0.395, |
| "step": 9600 |
| }, |
| { |
| "epoch": 19.86, |
| "learning_rate": 0.00010214979195561719, |
| "loss": 0.166, |
| "step": 9610 |
| }, |
| { |
| "epoch": 19.88, |
| "learning_rate": 0.00010194174757281553, |
| "loss": 0.166, |
| "step": 9620 |
| }, |
| { |
| "epoch": 19.9, |
| "learning_rate": 0.00010173370319001386, |
| "loss": 0.1616, |
| "step": 9630 |
| }, |
| { |
| "epoch": 19.92, |
| "learning_rate": 0.00010154646324549237, |
| "loss": 0.1703, |
| "step": 9640 |
| }, |
| { |
| "epoch": 19.94, |
| "learning_rate": 0.0001013384188626907, |
| "loss": 0.1638, |
| "step": 9650 |
| }, |
| { |
| "epoch": 19.96, |
| "learning_rate": 0.00010113037447988902, |
| "loss": 0.1641, |
| "step": 9660 |
| }, |
| { |
| "epoch": 19.98, |
| "learning_rate": 0.00010092233009708737, |
| "loss": 0.1628, |
| "step": 9670 |
| }, |
| { |
| "epoch": 20.0, |
| "learning_rate": 0.0001007142857142857, |
| "loss": 0.1576, |
| "step": 9680 |
| }, |
| { |
| "epoch": 20.02, |
| "learning_rate": 0.00010050624133148403, |
| "loss": 0.1587, |
| "step": 9690 |
| }, |
| { |
| "epoch": 20.04, |
| "learning_rate": 0.00010029819694868237, |
| "loss": 0.1625, |
| "step": 9700 |
| }, |
| { |
| "epoch": 20.06, |
| "learning_rate": 0.0001000901525658807, |
| "loss": 0.1456, |
| "step": 9710 |
| }, |
| { |
| "epoch": 20.08, |
| "learning_rate": 9.988210818307904e-05, |
| "loss": 0.1658, |
| "step": 9720 |
| }, |
| { |
| "epoch": 20.1, |
| "learning_rate": 9.967406380027738e-05, |
| "loss": 0.1501, |
| "step": 9730 |
| }, |
| { |
| "epoch": 20.12, |
| "learning_rate": 9.946601941747571e-05, |
| "loss": 0.1608, |
| "step": 9740 |
| }, |
| { |
| "epoch": 20.14, |
| "learning_rate": 9.925797503467404e-05, |
| "loss": 0.1573, |
| "step": 9750 |
| }, |
| { |
| "epoch": 20.17, |
| "learning_rate": 9.904993065187239e-05, |
| "loss": 0.1685, |
| "step": 9760 |
| }, |
| { |
| "epoch": 20.19, |
| "learning_rate": 9.884188626907072e-05, |
| "loss": 0.1479, |
| "step": 9770 |
| }, |
| { |
| "epoch": 20.21, |
| "learning_rate": 9.863384188626906e-05, |
| "loss": 0.1452, |
| "step": 9780 |
| }, |
| { |
| "epoch": 20.23, |
| "learning_rate": 9.842579750346739e-05, |
| "loss": 0.1647, |
| "step": 9790 |
| }, |
| { |
| "epoch": 20.25, |
| "learning_rate": 9.821775312066572e-05, |
| "loss": 0.1735, |
| "step": 9800 |
| }, |
| { |
| "epoch": 20.27, |
| "learning_rate": 9.800970873786407e-05, |
| "loss": 0.1359, |
| "step": 9810 |
| }, |
| { |
| "epoch": 20.29, |
| "learning_rate": 9.78016643550624e-05, |
| "loss": 0.1716, |
| "step": 9820 |
| }, |
| { |
| "epoch": 20.31, |
| "learning_rate": 9.759361997226073e-05, |
| "loss": 0.162, |
| "step": 9830 |
| }, |
| { |
| "epoch": 20.33, |
| "learning_rate": 9.738557558945907e-05, |
| "loss": 0.1539, |
| "step": 9840 |
| }, |
| { |
| "epoch": 20.35, |
| "learning_rate": 9.71775312066574e-05, |
| "loss": 0.1608, |
| "step": 9850 |
| }, |
| { |
| "epoch": 20.37, |
| "learning_rate": 9.696948682385574e-05, |
| "loss": 0.1459, |
| "step": 9860 |
| }, |
| { |
| "epoch": 20.39, |
| "learning_rate": 9.676144244105408e-05, |
| "loss": 0.1717, |
| "step": 9870 |
| }, |
| { |
| "epoch": 20.41, |
| "learning_rate": 9.655339805825241e-05, |
| "loss": 0.1604, |
| "step": 9880 |
| }, |
| { |
| "epoch": 20.43, |
| "learning_rate": 9.634535367545074e-05, |
| "loss": 0.1616, |
| "step": 9890 |
| }, |
| { |
| "epoch": 20.45, |
| "learning_rate": 9.613730929264909e-05, |
| "loss": 0.1675, |
| "step": 9900 |
| }, |
| { |
| "epoch": 20.45, |
| "eval_loss": 0.15455523133277893, |
| "eval_runtime": 743.7243, |
| "eval_samples_per_second": 3.093, |
| "eval_steps_per_second": 0.387, |
| "step": 9900 |
| } |
| ], |
| "max_steps": 14520, |
| "num_train_epochs": 30, |
| "total_flos": 1.2721442696103199e+19, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|