| { |
| "best_metric": 0.2889377772808075, |
| "best_model_checkpoint": "/content/drive/My Drive/Colab Data/LLaMA-LoRA Tuner/lora_models/guitarGPT0/checkpoint-3000", |
| "epoch": 6.437768240343348, |
| "global_step": 3000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 2.9999999999999997e-05, |
| "loss": 0.7034, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 5.9999999999999995e-05, |
| "loss": 0.7056, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 8.999999999999999e-05, |
| "loss": 0.6413, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00011999999999999999, |
| "loss": 0.574, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00015, |
| "loss": 0.5307, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00017999999999999998, |
| "loss": 0.4797, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00020999999999999998, |
| "loss": 0.4554, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00023999999999999998, |
| "loss": 0.4219, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00027, |
| "loss": 0.4163, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0003, |
| "loss": 0.4183, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00029987068965517237, |
| "loss": 0.3947, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0002997413793103448, |
| "loss": 0.413, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0002996120689655172, |
| "loss": 0.4028, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00029948275862068965, |
| "loss": 0.3807, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00029935344827586205, |
| "loss": 0.3982, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00029922413793103444, |
| "loss": 0.3907, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0002990948275862069, |
| "loss": 0.391, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0002989655172413793, |
| "loss": 0.3807, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0002988362068965517, |
| "loss": 0.3735, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0002987068965517241, |
| "loss": 0.4056, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0002985775862068965, |
| "loss": 0.3732, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00029844827586206896, |
| "loss": 0.3873, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00029831896551724135, |
| "loss": 0.3421, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00029818965517241375, |
| "loss": 0.3708, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0002980603448275862, |
| "loss": 0.371, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0002979310344827586, |
| "loss": 0.3747, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00029780172413793103, |
| "loss": 0.3608, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.0002976724137931034, |
| "loss": 0.3559, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0002975431034482758, |
| "loss": 0.3838, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00029741379310344827, |
| "loss": 0.3587, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_loss": 0.36829814314842224, |
| "eval_runtime": 676.778, |
| "eval_samples_per_second": 3.398, |
| "eval_steps_per_second": 0.426, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00029728448275862066, |
| "loss": 0.3585, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00029715517241379305, |
| "loss": 0.3536, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0002970258620689655, |
| "loss": 0.3721, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.0002968965517241379, |
| "loss": 0.37, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00029676724137931034, |
| "loss": 0.3738, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00029663793103448273, |
| "loss": 0.3542, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0002965086206896552, |
| "loss": 0.378, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00029637931034482757, |
| "loss": 0.3443, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00029624999999999996, |
| "loss": 0.3484, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00029612068965517236, |
| "loss": 0.35, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0002959913793103448, |
| "loss": 0.3622, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0002958620689655172, |
| "loss": 0.3479, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00029573275862068964, |
| "loss": 0.3535, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00029560344827586204, |
| "loss": 0.3547, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.0002954741379310345, |
| "loss": 0.365, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0002953448275862069, |
| "loss": 0.3292, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00029521551724137927, |
| "loss": 0.3377, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0002950862068965517, |
| "loss": 0.349, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.0002949568965517241, |
| "loss": 0.3638, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.0002948275862068965, |
| "loss": 0.3488, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.00029469827586206895, |
| "loss": 0.3438, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00029456896551724134, |
| "loss": 0.3367, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.0002944396551724138, |
| "loss": 0.364, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.0002943103448275862, |
| "loss": 0.3557, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00029418103448275863, |
| "loss": 0.3513, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.000294051724137931, |
| "loss": 0.3519, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.0002939224137931034, |
| "loss": 0.341, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.0002937931034482758, |
| "loss": 0.3404, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.00029366379310344826, |
| "loss": 0.3381, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00029353448275862065, |
| "loss": 0.347, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.29, |
| "eval_loss": 0.3440234959125519, |
| "eval_runtime": 676.8128, |
| "eval_samples_per_second": 3.398, |
| "eval_steps_per_second": 0.426, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.0002934051724137931, |
| "loss": 0.3377, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.0002932758620689655, |
| "loss": 0.3573, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00029314655172413794, |
| "loss": 0.3264, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00029301724137931033, |
| "loss": 0.3458, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.0002928879310344827, |
| "loss": 0.3432, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.0002927586206896551, |
| "loss": 0.3321, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00029262931034482756, |
| "loss": 0.3396, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00029249999999999995, |
| "loss": 0.3314, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.0002923706896551724, |
| "loss": 0.3389, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.0002922413793103448, |
| "loss": 0.3194, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 0.00029211206896551724, |
| "loss": 0.3379, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.00029198275862068963, |
| "loss": 0.3546, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 0.0002918534482758621, |
| "loss": 0.3146, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 0.0002917241379310344, |
| "loss": 0.3381, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 0.00029159482758620687, |
| "loss": 0.3165, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 0.00029146551724137926, |
| "loss": 0.3372, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 0.0002913362068965517, |
| "loss": 0.3452, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 0.0002912068965517241, |
| "loss": 0.3549, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 0.00029107758620689655, |
| "loss": 0.3149, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 0.00029094827586206894, |
| "loss": 0.3278, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 0.0002908189655172414, |
| "loss": 0.3432, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 0.0002906896551724138, |
| "loss": 0.3475, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 0.00029056034482758617, |
| "loss": 0.3271, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 0.00029043103448275857, |
| "loss": 0.3298, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 0.000290301724137931, |
| "loss": 0.3247, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 0.0002901724137931034, |
| "loss": 0.324, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 0.00029004310344827585, |
| "loss": 0.3015, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 0.00028991379310344825, |
| "loss": 0.322, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 0.0002897844827586207, |
| "loss": 0.3127, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 0.0002896551724137931, |
| "loss": 0.3205, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.93, |
| "eval_loss": 0.33089256286621094, |
| "eval_runtime": 676.8619, |
| "eval_samples_per_second": 3.398, |
| "eval_steps_per_second": 0.425, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 0.00028952586206896553, |
| "loss": 0.3216, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 0.00028939655172413787, |
| "loss": 0.3227, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 0.0002892672413793103, |
| "loss": 0.3126, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 0.0002891379310344827, |
| "loss": 0.3202, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 0.00028900862068965516, |
| "loss": 0.3162, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 0.00028887931034482755, |
| "loss": 0.331, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 0.00028875, |
| "loss": 0.3201, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 0.0002886206896551724, |
| "loss": 0.3291, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 0.00028849137931034484, |
| "loss": 0.318, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 0.0002883620689655172, |
| "loss": 0.3331, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 0.0002882327586206896, |
| "loss": 0.3236, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 0.000288103448275862, |
| "loss": 0.3201, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 0.00028797413793103446, |
| "loss": 0.314, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 0.00028784482758620686, |
| "loss": 0.3027, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 0.0002877155172413793, |
| "loss": 0.3326, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 0.0002875862068965517, |
| "loss": 0.3305, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 0.00028745689655172414, |
| "loss": 0.315, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 0.00028732758620689654, |
| "loss": 0.3193, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 0.00028719827586206893, |
| "loss": 0.3239, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 0.0002870689655172413, |
| "loss": 0.3075, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 0.00028693965517241377, |
| "loss": 0.3204, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 0.00028681034482758616, |
| "loss": 0.3104, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 0.0002866810344827586, |
| "loss": 0.3393, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 0.000286551724137931, |
| "loss": 0.3235, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 0.00028642241379310345, |
| "loss": 0.322, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 0.00028629310344827584, |
| "loss": 0.3176, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 0.00028616379310344824, |
| "loss": 0.308, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 0.00028603448275862063, |
| "loss": 0.3223, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 0.0002859051724137931, |
| "loss": 0.321, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 0.00028577586206896547, |
| "loss": 0.3088, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.58, |
| "eval_loss": 0.3221810758113861, |
| "eval_runtime": 679.1093, |
| "eval_samples_per_second": 3.387, |
| "eval_steps_per_second": 0.424, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 0.0002856465517241379, |
| "loss": 0.3218, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 0.0002855172413793103, |
| "loss": 0.2987, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 0.00028538793103448276, |
| "loss": 0.3135, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 0.00028525862068965515, |
| "loss": 0.296, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 0.0002851293103448276, |
| "loss": 0.3006, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 0.000285, |
| "loss": 0.3192, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 0.0002848706896551724, |
| "loss": 0.3041, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 0.0002847413793103448, |
| "loss": 0.3106, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 0.0002846120689655172, |
| "loss": 0.3193, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 0.0002844827586206896, |
| "loss": 0.3111, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 0.00028435344827586206, |
| "loss": 0.3187, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 0.00028422413793103445, |
| "loss": 0.3125, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 0.0002840948275862069, |
| "loss": 0.3117, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 0.0002839655172413793, |
| "loss": 0.316, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 0.0002838362068965517, |
| "loss": 0.3213, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 0.0002837068965517241, |
| "loss": 0.3168, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 0.00028357758620689653, |
| "loss": 0.3107, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 0.0002834482758620689, |
| "loss": 0.2958, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 0.00028331896551724137, |
| "loss": 0.3153, |
| "step": 1390 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 0.00028318965517241376, |
| "loss": 0.3131, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.03, |
| "learning_rate": 0.0002830603448275862, |
| "loss": 0.2915, |
| "step": 1410 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 0.0002829310344827586, |
| "loss": 0.3039, |
| "step": 1420 |
| }, |
| { |
| "epoch": 3.07, |
| "learning_rate": 0.000282801724137931, |
| "loss": 0.3074, |
| "step": 1430 |
| }, |
| { |
| "epoch": 3.09, |
| "learning_rate": 0.00028267241379310344, |
| "loss": 0.3062, |
| "step": 1440 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 0.00028254310344827583, |
| "loss": 0.3005, |
| "step": 1450 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 0.0002824137931034482, |
| "loss": 0.3038, |
| "step": 1460 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 0.0002822844827586207, |
| "loss": 0.3078, |
| "step": 1470 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 0.00028215517241379307, |
| "loss": 0.3118, |
| "step": 1480 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 0.0002820258620689655, |
| "loss": 0.286, |
| "step": 1490 |
| }, |
| { |
| "epoch": 3.22, |
| "learning_rate": 0.0002818965517241379, |
| "loss": 0.2966, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.22, |
| "eval_loss": 0.3142649531364441, |
| "eval_runtime": 678.9797, |
| "eval_samples_per_second": 3.387, |
| "eval_steps_per_second": 0.424, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.24, |
| "learning_rate": 0.0002817672413793103, |
| "loss": 0.3101, |
| "step": 1510 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 0.00028163793103448275, |
| "loss": 0.2922, |
| "step": 1520 |
| }, |
| { |
| "epoch": 3.28, |
| "learning_rate": 0.00028150862068965514, |
| "loss": 0.3128, |
| "step": 1530 |
| }, |
| { |
| "epoch": 3.3, |
| "learning_rate": 0.00028137931034482753, |
| "loss": 0.2973, |
| "step": 1540 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 0.00028125, |
| "loss": 0.2921, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 0.00028112068965517237, |
| "loss": 0.3094, |
| "step": 1560 |
| }, |
| { |
| "epoch": 3.37, |
| "learning_rate": 0.0002809913793103448, |
| "loss": 0.2876, |
| "step": 1570 |
| }, |
| { |
| "epoch": 3.39, |
| "learning_rate": 0.0002808620689655172, |
| "loss": 0.3019, |
| "step": 1580 |
| }, |
| { |
| "epoch": 3.41, |
| "learning_rate": 0.00028073275862068966, |
| "loss": 0.3127, |
| "step": 1590 |
| }, |
| { |
| "epoch": 3.43, |
| "learning_rate": 0.00028060344827586205, |
| "loss": 0.3078, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 0.00028047413793103444, |
| "loss": 0.2967, |
| "step": 1610 |
| }, |
| { |
| "epoch": 3.48, |
| "learning_rate": 0.0002803448275862069, |
| "loss": 0.2984, |
| "step": 1620 |
| }, |
| { |
| "epoch": 3.5, |
| "learning_rate": 0.0002802155172413793, |
| "loss": 0.2886, |
| "step": 1630 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 0.0002800862068965517, |
| "loss": 0.3113, |
| "step": 1640 |
| }, |
| { |
| "epoch": 3.54, |
| "learning_rate": 0.0002799568965517241, |
| "loss": 0.3091, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.56, |
| "learning_rate": 0.0002798275862068965, |
| "loss": 0.2969, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 0.00027969827586206896, |
| "loss": 0.2985, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.61, |
| "learning_rate": 0.00027956896551724136, |
| "loss": 0.2863, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.63, |
| "learning_rate": 0.00027943965517241375, |
| "loss": 0.2922, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 0.0002793103448275862, |
| "loss": 0.2753, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.67, |
| "learning_rate": 0.0002791810344827586, |
| "loss": 0.3047, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 0.000279051724137931, |
| "loss": 0.309, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.71, |
| "learning_rate": 0.00027892241379310343, |
| "loss": 0.3058, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 0.0002787931034482758, |
| "loss": 0.3171, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.76, |
| "learning_rate": 0.00027866379310344827, |
| "loss": 0.2826, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 0.00027853448275862066, |
| "loss": 0.3029, |
| "step": 1760 |
| }, |
| { |
| "epoch": 3.8, |
| "learning_rate": 0.00027840517241379306, |
| "loss": 0.297, |
| "step": 1770 |
| }, |
| { |
| "epoch": 3.82, |
| "learning_rate": 0.0002782758620689655, |
| "loss": 0.3007, |
| "step": 1780 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 0.0002781465517241379, |
| "loss": 0.3096, |
| "step": 1790 |
| }, |
| { |
| "epoch": 3.86, |
| "learning_rate": 0.00027801724137931034, |
| "loss": 0.3013, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.86, |
| "eval_loss": 0.3079967796802521, |
| "eval_runtime": 678.0738, |
| "eval_samples_per_second": 3.392, |
| "eval_steps_per_second": 0.425, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 0.00027788793103448274, |
| "loss": 0.2907, |
| "step": 1810 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 0.00027775862068965513, |
| "loss": 0.2792, |
| "step": 1820 |
| }, |
| { |
| "epoch": 3.93, |
| "learning_rate": 0.0002776293103448276, |
| "loss": 0.2911, |
| "step": 1830 |
| }, |
| { |
| "epoch": 3.95, |
| "learning_rate": 0.00027749999999999997, |
| "loss": 0.2841, |
| "step": 1840 |
| }, |
| { |
| "epoch": 3.97, |
| "learning_rate": 0.00027737068965517236, |
| "loss": 0.3033, |
| "step": 1850 |
| }, |
| { |
| "epoch": 3.99, |
| "learning_rate": 0.0002772413793103448, |
| "loss": 0.2975, |
| "step": 1860 |
| }, |
| { |
| "epoch": 4.01, |
| "learning_rate": 0.0002771120689655172, |
| "loss": 0.2823, |
| "step": 1870 |
| }, |
| { |
| "epoch": 4.03, |
| "learning_rate": 0.00027698275862068965, |
| "loss": 0.2907, |
| "step": 1880 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 0.00027685344827586204, |
| "loss": 0.2747, |
| "step": 1890 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 0.0002767241379310345, |
| "loss": 0.2823, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 0.0002765948275862069, |
| "loss": 0.3116, |
| "step": 1910 |
| }, |
| { |
| "epoch": 4.12, |
| "learning_rate": 0.0002764655172413793, |
| "loss": 0.3064, |
| "step": 1920 |
| }, |
| { |
| "epoch": 4.14, |
| "learning_rate": 0.0002763362068965517, |
| "loss": 0.2753, |
| "step": 1930 |
| }, |
| { |
| "epoch": 4.16, |
| "learning_rate": 0.0002762068965517241, |
| "loss": 0.2974, |
| "step": 1940 |
| }, |
| { |
| "epoch": 4.18, |
| "learning_rate": 0.0002760775862068965, |
| "loss": 0.2912, |
| "step": 1950 |
| }, |
| { |
| "epoch": 4.21, |
| "learning_rate": 0.00027594827586206895, |
| "loss": 0.2968, |
| "step": 1960 |
| }, |
| { |
| "epoch": 4.23, |
| "learning_rate": 0.00027581896551724135, |
| "loss": 0.2768, |
| "step": 1970 |
| }, |
| { |
| "epoch": 4.25, |
| "learning_rate": 0.0002756896551724138, |
| "loss": 0.3027, |
| "step": 1980 |
| }, |
| { |
| "epoch": 4.27, |
| "learning_rate": 0.0002755603448275862, |
| "loss": 0.2825, |
| "step": 1990 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 0.0002754310344827586, |
| "loss": 0.2946, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.31, |
| "learning_rate": 0.00027530172413793103, |
| "loss": 0.2913, |
| "step": 2010 |
| }, |
| { |
| "epoch": 4.33, |
| "learning_rate": 0.0002751724137931034, |
| "loss": 0.282, |
| "step": 2020 |
| }, |
| { |
| "epoch": 4.36, |
| "learning_rate": 0.0002750431034482758, |
| "loss": 0.2866, |
| "step": 2030 |
| }, |
| { |
| "epoch": 4.38, |
| "learning_rate": 0.00027491379310344826, |
| "loss": 0.2857, |
| "step": 2040 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 0.00027478448275862065, |
| "loss": 0.2883, |
| "step": 2050 |
| }, |
| { |
| "epoch": 4.42, |
| "learning_rate": 0.0002746551724137931, |
| "loss": 0.2891, |
| "step": 2060 |
| }, |
| { |
| "epoch": 4.44, |
| "learning_rate": 0.0002745258620689655, |
| "loss": 0.2908, |
| "step": 2070 |
| }, |
| { |
| "epoch": 4.46, |
| "learning_rate": 0.00027439655172413794, |
| "loss": 0.2808, |
| "step": 2080 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 0.00027426724137931033, |
| "loss": 0.2848, |
| "step": 2090 |
| }, |
| { |
| "epoch": 4.51, |
| "learning_rate": 0.0002741379310344827, |
| "loss": 0.2944, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.51, |
| "eval_loss": 0.3032459020614624, |
| "eval_runtime": 676.6943, |
| "eval_samples_per_second": 3.399, |
| "eval_steps_per_second": 0.426, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.53, |
| "learning_rate": 0.0002740086206896551, |
| "loss": 0.2835, |
| "step": 2110 |
| }, |
| { |
| "epoch": 4.55, |
| "learning_rate": 0.00027387931034482757, |
| "loss": 0.292, |
| "step": 2120 |
| }, |
| { |
| "epoch": 4.57, |
| "learning_rate": 0.00027374999999999996, |
| "loss": 0.2992, |
| "step": 2130 |
| }, |
| { |
| "epoch": 4.59, |
| "learning_rate": 0.0002736206896551724, |
| "loss": 0.2874, |
| "step": 2140 |
| }, |
| { |
| "epoch": 4.61, |
| "learning_rate": 0.0002734913793103448, |
| "loss": 0.2827, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.64, |
| "learning_rate": 0.00027336206896551725, |
| "loss": 0.2887, |
| "step": 2160 |
| }, |
| { |
| "epoch": 4.66, |
| "learning_rate": 0.00027323275862068964, |
| "loss": 0.2824, |
| "step": 2170 |
| }, |
| { |
| "epoch": 4.68, |
| "learning_rate": 0.00027310344827586203, |
| "loss": 0.3099, |
| "step": 2180 |
| }, |
| { |
| "epoch": 4.7, |
| "learning_rate": 0.0002729741379310344, |
| "loss": 0.2931, |
| "step": 2190 |
| }, |
| { |
| "epoch": 4.72, |
| "learning_rate": 0.00027284482758620687, |
| "loss": 0.2983, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.74, |
| "learning_rate": 0.00027271551724137926, |
| "loss": 0.2816, |
| "step": 2210 |
| }, |
| { |
| "epoch": 4.76, |
| "learning_rate": 0.0002725862068965517, |
| "loss": 0.2888, |
| "step": 2220 |
| }, |
| { |
| "epoch": 4.79, |
| "learning_rate": 0.0002724568965517241, |
| "loss": 0.2769, |
| "step": 2230 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 0.00027232758620689655, |
| "loss": 0.2824, |
| "step": 2240 |
| }, |
| { |
| "epoch": 4.83, |
| "learning_rate": 0.00027219827586206894, |
| "loss": 0.2823, |
| "step": 2250 |
| }, |
| { |
| "epoch": 4.85, |
| "learning_rate": 0.0002720689655172414, |
| "loss": 0.2934, |
| "step": 2260 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 0.0002719396551724138, |
| "loss": 0.2856, |
| "step": 2270 |
| }, |
| { |
| "epoch": 4.89, |
| "learning_rate": 0.0002718103448275862, |
| "loss": 0.2867, |
| "step": 2280 |
| }, |
| { |
| "epoch": 4.91, |
| "learning_rate": 0.00027168103448275857, |
| "loss": 0.2873, |
| "step": 2290 |
| }, |
| { |
| "epoch": 4.94, |
| "learning_rate": 0.000271551724137931, |
| "loss": 0.2857, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.96, |
| "learning_rate": 0.0002714224137931034, |
| "loss": 0.2749, |
| "step": 2310 |
| }, |
| { |
| "epoch": 4.98, |
| "learning_rate": 0.00027129310344827586, |
| "loss": 0.2775, |
| "step": 2320 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 0.00027116379310344825, |
| "loss": 0.2721, |
| "step": 2330 |
| }, |
| { |
| "epoch": 5.02, |
| "learning_rate": 0.0002710344827586207, |
| "loss": 0.2682, |
| "step": 2340 |
| }, |
| { |
| "epoch": 5.04, |
| "learning_rate": 0.0002709051724137931, |
| "loss": 0.2676, |
| "step": 2350 |
| }, |
| { |
| "epoch": 5.06, |
| "learning_rate": 0.0002707758620689655, |
| "loss": 0.2752, |
| "step": 2360 |
| }, |
| { |
| "epoch": 5.09, |
| "learning_rate": 0.0002706465517241379, |
| "loss": 0.2742, |
| "step": 2370 |
| }, |
| { |
| "epoch": 5.11, |
| "learning_rate": 0.0002705172413793103, |
| "loss": 0.2784, |
| "step": 2380 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 0.0002703879310344827, |
| "loss": 0.2739, |
| "step": 2390 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 0.00027025862068965516, |
| "loss": 0.2874, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.15, |
| "eval_loss": 0.2981237769126892, |
| "eval_runtime": 678.281, |
| "eval_samples_per_second": 3.391, |
| "eval_steps_per_second": 0.425, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.17, |
| "learning_rate": 0.00027012931034482756, |
| "loss": 0.2797, |
| "step": 2410 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 0.00027, |
| "loss": 0.294, |
| "step": 2420 |
| }, |
| { |
| "epoch": 5.21, |
| "learning_rate": 0.0002698706896551724, |
| "loss": 0.2776, |
| "step": 2430 |
| }, |
| { |
| "epoch": 5.24, |
| "learning_rate": 0.00026974137931034484, |
| "loss": 0.2727, |
| "step": 2440 |
| }, |
| { |
| "epoch": 5.26, |
| "learning_rate": 0.0002696120689655172, |
| "loss": 0.2719, |
| "step": 2450 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 0.00026948275862068963, |
| "loss": 0.2664, |
| "step": 2460 |
| }, |
| { |
| "epoch": 5.3, |
| "learning_rate": 0.000269353448275862, |
| "loss": 0.2915, |
| "step": 2470 |
| }, |
| { |
| "epoch": 5.32, |
| "learning_rate": 0.00026922413793103447, |
| "loss": 0.2562, |
| "step": 2480 |
| }, |
| { |
| "epoch": 5.34, |
| "learning_rate": 0.00026909482758620686, |
| "loss": 0.264, |
| "step": 2490 |
| }, |
| { |
| "epoch": 5.36, |
| "learning_rate": 0.0002689655172413793, |
| "loss": 0.2758, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.39, |
| "learning_rate": 0.0002688362068965517, |
| "loss": 0.2764, |
| "step": 2510 |
| }, |
| { |
| "epoch": 5.41, |
| "learning_rate": 0.00026870689655172415, |
| "loss": 0.2697, |
| "step": 2520 |
| }, |
| { |
| "epoch": 5.43, |
| "learning_rate": 0.00026857758620689654, |
| "loss": 0.2795, |
| "step": 2530 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 0.00026844827586206893, |
| "loss": 0.2863, |
| "step": 2540 |
| }, |
| { |
| "epoch": 5.47, |
| "learning_rate": 0.00026831896551724133, |
| "loss": 0.2807, |
| "step": 2550 |
| }, |
| { |
| "epoch": 5.49, |
| "learning_rate": 0.0002681896551724138, |
| "loss": 0.272, |
| "step": 2560 |
| }, |
| { |
| "epoch": 5.52, |
| "learning_rate": 0.00026806034482758617, |
| "loss": 0.2522, |
| "step": 2570 |
| }, |
| { |
| "epoch": 5.54, |
| "learning_rate": 0.0002679310344827586, |
| "loss": 0.2706, |
| "step": 2580 |
| }, |
| { |
| "epoch": 5.56, |
| "learning_rate": 0.000267801724137931, |
| "loss": 0.2689, |
| "step": 2590 |
| }, |
| { |
| "epoch": 5.58, |
| "learning_rate": 0.00026767241379310345, |
| "loss": 0.2607, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.6, |
| "learning_rate": 0.00026754310344827585, |
| "loss": 0.2543, |
| "step": 2610 |
| }, |
| { |
| "epoch": 5.62, |
| "learning_rate": 0.00026741379310344824, |
| "loss": 0.288, |
| "step": 2620 |
| }, |
| { |
| "epoch": 5.64, |
| "learning_rate": 0.00026728448275862063, |
| "loss": 0.2698, |
| "step": 2630 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 0.0002671551724137931, |
| "loss": 0.2629, |
| "step": 2640 |
| }, |
| { |
| "epoch": 5.69, |
| "learning_rate": 0.0002670258620689655, |
| "loss": 0.2798, |
| "step": 2650 |
| }, |
| { |
| "epoch": 5.71, |
| "learning_rate": 0.0002668965517241379, |
| "loss": 0.2804, |
| "step": 2660 |
| }, |
| { |
| "epoch": 5.73, |
| "learning_rate": 0.0002667672413793103, |
| "loss": 0.2824, |
| "step": 2670 |
| }, |
| { |
| "epoch": 5.75, |
| "learning_rate": 0.00026663793103448276, |
| "loss": 0.2797, |
| "step": 2680 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 0.00026650862068965515, |
| "loss": 0.2605, |
| "step": 2690 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 0.0002663793103448276, |
| "loss": 0.2766, |
| "step": 2700 |
| }, |
| { |
| "epoch": 5.79, |
| "eval_loss": 0.292289137840271, |
| "eval_runtime": 676.8375, |
| "eval_samples_per_second": 3.398, |
| "eval_steps_per_second": 0.426, |
| "step": 2700 |
| }, |
| { |
| "epoch": 5.82, |
| "learning_rate": 0.00026624999999999994, |
| "loss": 0.2546, |
| "step": 2710 |
| }, |
| { |
| "epoch": 5.84, |
| "learning_rate": 0.0002661206896551724, |
| "loss": 0.2561, |
| "step": 2720 |
| }, |
| { |
| "epoch": 5.86, |
| "learning_rate": 0.0002659913793103448, |
| "loss": 0.2653, |
| "step": 2730 |
| }, |
| { |
| "epoch": 5.88, |
| "learning_rate": 0.0002658620689655172, |
| "loss": 0.2647, |
| "step": 2740 |
| }, |
| { |
| "epoch": 5.9, |
| "learning_rate": 0.0002657327586206896, |
| "loss": 0.2678, |
| "step": 2750 |
| }, |
| { |
| "epoch": 5.92, |
| "learning_rate": 0.00026560344827586207, |
| "loss": 0.2672, |
| "step": 2760 |
| }, |
| { |
| "epoch": 5.94, |
| "learning_rate": 0.00026547413793103446, |
| "loss": 0.2639, |
| "step": 2770 |
| }, |
| { |
| "epoch": 5.97, |
| "learning_rate": 0.0002653448275862069, |
| "loss": 0.2529, |
| "step": 2780 |
| }, |
| { |
| "epoch": 5.99, |
| "learning_rate": 0.00026521551724137925, |
| "loss": 0.2789, |
| "step": 2790 |
| }, |
| { |
| "epoch": 6.01, |
| "learning_rate": 0.0002650862068965517, |
| "loss": 0.2753, |
| "step": 2800 |
| }, |
| { |
| "epoch": 6.03, |
| "learning_rate": 0.0002649568965517241, |
| "loss": 0.2579, |
| "step": 2810 |
| }, |
| { |
| "epoch": 6.05, |
| "learning_rate": 0.00026482758620689653, |
| "loss": 0.2534, |
| "step": 2820 |
| }, |
| { |
| "epoch": 6.07, |
| "learning_rate": 0.0002646982758620689, |
| "loss": 0.2653, |
| "step": 2830 |
| }, |
| { |
| "epoch": 6.09, |
| "learning_rate": 0.00026456896551724137, |
| "loss": 0.2669, |
| "step": 2840 |
| }, |
| { |
| "epoch": 6.12, |
| "learning_rate": 0.00026443965517241377, |
| "loss": 0.2464, |
| "step": 2850 |
| }, |
| { |
| "epoch": 6.14, |
| "learning_rate": 0.0002643103448275862, |
| "loss": 0.2466, |
| "step": 2860 |
| }, |
| { |
| "epoch": 6.16, |
| "learning_rate": 0.0002641810344827586, |
| "loss": 0.2494, |
| "step": 2870 |
| }, |
| { |
| "epoch": 6.18, |
| "learning_rate": 0.000264051724137931, |
| "loss": 0.2532, |
| "step": 2880 |
| }, |
| { |
| "epoch": 6.2, |
| "learning_rate": 0.0002639224137931034, |
| "loss": 0.2513, |
| "step": 2890 |
| }, |
| { |
| "epoch": 6.22, |
| "learning_rate": 0.00026379310344827584, |
| "loss": 0.2577, |
| "step": 2900 |
| }, |
| { |
| "epoch": 6.24, |
| "learning_rate": 0.00026366379310344823, |
| "loss": 0.2594, |
| "step": 2910 |
| }, |
| { |
| "epoch": 6.27, |
| "learning_rate": 0.0002635344827586207, |
| "loss": 0.2507, |
| "step": 2920 |
| }, |
| { |
| "epoch": 6.29, |
| "learning_rate": 0.00026340517241379307, |
| "loss": 0.2716, |
| "step": 2930 |
| }, |
| { |
| "epoch": 6.31, |
| "learning_rate": 0.0002632758620689655, |
| "loss": 0.2697, |
| "step": 2940 |
| }, |
| { |
| "epoch": 6.33, |
| "learning_rate": 0.0002631465517241379, |
| "loss": 0.2516, |
| "step": 2950 |
| }, |
| { |
| "epoch": 6.35, |
| "learning_rate": 0.0002630172413793103, |
| "loss": 0.2691, |
| "step": 2960 |
| }, |
| { |
| "epoch": 6.37, |
| "learning_rate": 0.00026288793103448275, |
| "loss": 0.2675, |
| "step": 2970 |
| }, |
| { |
| "epoch": 6.39, |
| "learning_rate": 0.00026275862068965514, |
| "loss": 0.2628, |
| "step": 2980 |
| }, |
| { |
| "epoch": 6.42, |
| "learning_rate": 0.00026262931034482754, |
| "loss": 0.2627, |
| "step": 2990 |
| }, |
| { |
| "epoch": 6.44, |
| "learning_rate": 0.0002625, |
| "loss": 0.2603, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.44, |
| "eval_loss": 0.2889377772808075, |
| "eval_runtime": 697.121, |
| "eval_samples_per_second": 3.299, |
| "eval_steps_per_second": 0.413, |
| "step": 3000 |
| } |
| ], |
| "max_steps": 23300, |
| "num_train_epochs": 50, |
| "total_flos": 3.848260350396334e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|