| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.110671936758893, |
| "eval_steps": 10, |
| "global_step": 1201, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00019999576610920983, |
| "loss": 3.0929, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_loss": 2.167867660522461, |
| "eval_runtime": 21.3399, |
| "eval_samples_per_second": 23.383, |
| "eval_steps_per_second": 5.858, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019998306479535586, |
| "loss": 1.8074, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_loss": 1.4092761278152466, |
| "eval_runtime": 21.3617, |
| "eval_samples_per_second": 23.36, |
| "eval_steps_per_second": 5.852, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019996189713395766, |
| "loss": 1.2188, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_loss": 1.1258704662322998, |
| "eval_runtime": 21.3588, |
| "eval_samples_per_second": 23.363, |
| "eval_steps_per_second": 5.852, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00019993226491744662, |
| "loss": 1.0841, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_loss": 0.9605854153633118, |
| "eval_runtime": 21.3693, |
| "eval_samples_per_second": 23.351, |
| "eval_steps_per_second": 5.85, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019989417065501396, |
| "loss": 0.9594, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_loss": 0.8681771159172058, |
| "eval_runtime": 21.3662, |
| "eval_samples_per_second": 23.355, |
| "eval_steps_per_second": 5.85, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00019984761757239875, |
| "loss": 0.8765, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_loss": 0.816562294960022, |
| "eval_runtime": 21.3405, |
| "eval_samples_per_second": 23.383, |
| "eval_steps_per_second": 5.857, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00019979260961161427, |
| "loss": 0.852, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.12, |
| "eval_loss": 0.7803006768226624, |
| "eval_runtime": 21.3622, |
| "eval_samples_per_second": 23.359, |
| "eval_steps_per_second": 5.851, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019972915143061455, |
| "loss": 0.8404, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_loss": 0.760216236114502, |
| "eval_runtime": 21.379, |
| "eval_samples_per_second": 23.341, |
| "eval_steps_per_second": 5.847, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0001996572484028997, |
| "loss": 0.8183, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_loss": 0.7222956418991089, |
| "eval_runtime": 21.3895, |
| "eval_samples_per_second": 23.329, |
| "eval_steps_per_second": 5.844, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00019957690661706108, |
| "loss": 0.7816, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.18, |
| "eval_loss": 0.7133845686912537, |
| "eval_runtime": 21.3742, |
| "eval_samples_per_second": 23.346, |
| "eval_steps_per_second": 5.848, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00019948813287626563, |
| "loss": 0.7792, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_loss": 0.7233743667602539, |
| "eval_runtime": 21.3703, |
| "eval_samples_per_second": 23.35, |
| "eval_steps_per_second": 5.849, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0001993909346976798, |
| "loss": 0.7648, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_loss": 0.6882979273796082, |
| "eval_runtime": 21.3682, |
| "eval_samples_per_second": 23.352, |
| "eval_steps_per_second": 5.85, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0001992853203118331, |
| "loss": 0.8132, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.23, |
| "eval_loss": 0.7019714117050171, |
| "eval_runtime": 21.374, |
| "eval_samples_per_second": 23.346, |
| "eval_steps_per_second": 5.848, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.000199171298661921, |
| "loss": 0.7599, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_loss": 0.668350338935852, |
| "eval_runtime": 21.323, |
| "eval_samples_per_second": 23.402, |
| "eval_steps_per_second": 5.862, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0001990488794030478, |
| "loss": 0.7518, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_loss": 0.6716361045837402, |
| "eval_runtime": 21.3312, |
| "eval_samples_per_second": 23.393, |
| "eval_steps_per_second": 5.86, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00019891807290140892, |
| "loss": 0.7452, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.28, |
| "eval_loss": 0.6634441018104553, |
| "eval_runtime": 21.3388, |
| "eval_samples_per_second": 23.385, |
| "eval_steps_per_second": 5.858, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00019877889023341323, |
| "loss": 0.7215, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3, |
| "eval_loss": 0.6609596610069275, |
| "eval_runtime": 21.3355, |
| "eval_samples_per_second": 23.388, |
| "eval_steps_per_second": 5.859, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00019863134318474503, |
| "loss": 0.7088, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.32, |
| "eval_loss": 0.659795880317688, |
| "eval_runtime": 21.3459, |
| "eval_samples_per_second": 23.377, |
| "eval_steps_per_second": 5.856, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0001984754442493662, |
| "loss": 0.7237, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.33, |
| "eval_loss": 0.6469973921775818, |
| "eval_runtime": 21.3405, |
| "eval_samples_per_second": 23.383, |
| "eval_steps_per_second": 5.857, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.000198311206628458, |
| "loss": 0.7353, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.35, |
| "eval_loss": 0.6315197348594666, |
| "eval_runtime": 21.3347, |
| "eval_samples_per_second": 23.389, |
| "eval_steps_per_second": 5.859, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00019813864422930347, |
| "loss": 0.7111, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.37, |
| "eval_loss": 0.6466153860092163, |
| "eval_runtime": 21.3369, |
| "eval_samples_per_second": 23.387, |
| "eval_steps_per_second": 5.858, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00019795777166410966, |
| "loss": 0.7136, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_loss": 0.632926344871521, |
| "eval_runtime": 21.3244, |
| "eval_samples_per_second": 23.4, |
| "eval_steps_per_second": 5.862, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00019776860424877032, |
| "loss": 0.7044, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_loss": 0.6356912851333618, |
| "eval_runtime": 21.3235, |
| "eval_samples_per_second": 23.401, |
| "eval_steps_per_second": 5.862, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.000197571158001569, |
| "loss": 0.7369, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_loss": 0.6214553713798523, |
| "eval_runtime": 21.3355, |
| "eval_samples_per_second": 23.388, |
| "eval_steps_per_second": 5.859, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00019736544964182268, |
| "loss": 0.6995, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.44, |
| "eval_loss": 0.6103290915489197, |
| "eval_runtime": 21.3392, |
| "eval_samples_per_second": 23.384, |
| "eval_steps_per_second": 5.858, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00019715149658846591, |
| "loss": 0.7027, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.46, |
| "eval_loss": 0.5964030027389526, |
| "eval_runtime": 21.3644, |
| "eval_samples_per_second": 23.357, |
| "eval_steps_per_second": 5.851, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.000196929316958576, |
| "loss": 0.6872, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.47, |
| "eval_loss": 0.60444176197052, |
| "eval_runtime": 21.3353, |
| "eval_samples_per_second": 23.389, |
| "eval_steps_per_second": 5.859, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00019669892956583867, |
| "loss": 0.7182, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.49, |
| "eval_loss": 0.6127080917358398, |
| "eval_runtime": 21.3451, |
| "eval_samples_per_second": 23.378, |
| "eval_steps_per_second": 5.856, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00019646035391895512, |
| "loss": 0.6897, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.51, |
| "eval_loss": 0.6016324758529663, |
| "eval_runtime": 21.324, |
| "eval_samples_per_second": 23.401, |
| "eval_steps_per_second": 5.862, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00019621361021999008, |
| "loss": 0.6824, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.53, |
| "eval_loss": 0.5880205631256104, |
| "eval_runtime": 21.3379, |
| "eval_samples_per_second": 23.386, |
| "eval_steps_per_second": 5.858, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.000195958719362661, |
| "loss": 0.673, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.54, |
| "eval_loss": 0.5902190804481506, |
| "eval_runtime": 21.3191, |
| "eval_samples_per_second": 23.406, |
| "eval_steps_per_second": 5.863, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00019569570293056894, |
| "loss": 0.6956, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.56, |
| "eval_loss": 0.5811321139335632, |
| "eval_runtime": 21.3365, |
| "eval_samples_per_second": 23.387, |
| "eval_steps_per_second": 5.859, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00019542458319537093, |
| "loss": 0.6889, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.58, |
| "eval_loss": 0.5849844813346863, |
| "eval_runtime": 21.3231, |
| "eval_samples_per_second": 23.402, |
| "eval_steps_per_second": 5.862, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00019514538311489395, |
| "loss": 0.6773, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_loss": 0.5933501720428467, |
| "eval_runtime": 21.3446, |
| "eval_samples_per_second": 23.378, |
| "eval_steps_per_second": 5.856, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00019485812633119096, |
| "loss": 0.6782, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.61, |
| "eval_loss": 0.594153642654419, |
| "eval_runtime": 21.3347, |
| "eval_samples_per_second": 23.389, |
| "eval_steps_per_second": 5.859, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00019456283716853904, |
| "loss": 0.719, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.63, |
| "eval_loss": 0.5848734974861145, |
| "eval_runtime": 21.3162, |
| "eval_samples_per_second": 23.409, |
| "eval_steps_per_second": 5.864, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00019425954063137947, |
| "loss": 0.6809, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.65, |
| "eval_loss": 0.579924464225769, |
| "eval_runtime": 21.3339, |
| "eval_samples_per_second": 23.39, |
| "eval_steps_per_second": 5.859, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00019394826240220057, |
| "loss": 0.6412, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_loss": 0.5709846019744873, |
| "eval_runtime": 21.3459, |
| "eval_samples_per_second": 23.377, |
| "eval_steps_per_second": 5.856, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00019362902883936288, |
| "loss": 0.6411, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.69, |
| "eval_loss": 0.562785267829895, |
| "eval_runtime": 21.3375, |
| "eval_samples_per_second": 23.386, |
| "eval_steps_per_second": 5.858, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00019330186697486722, |
| "loss": 0.6519, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7, |
| "eval_loss": 0.5611785650253296, |
| "eval_runtime": 21.3506, |
| "eval_samples_per_second": 23.372, |
| "eval_steps_per_second": 5.855, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00019296680451206575, |
| "loss": 0.6446, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.72, |
| "eval_loss": 0.5562126636505127, |
| "eval_runtime": 21.3482, |
| "eval_samples_per_second": 23.374, |
| "eval_steps_per_second": 5.855, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00019262386982331594, |
| "loss": 0.6574, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.74, |
| "eval_loss": 0.5644647479057312, |
| "eval_runtime": 21.3719, |
| "eval_samples_per_second": 23.348, |
| "eval_steps_per_second": 5.849, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.00019227309194757818, |
| "loss": 0.6633, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.76, |
| "eval_loss": 0.5663937926292419, |
| "eval_runtime": 21.3728, |
| "eval_samples_per_second": 23.347, |
| "eval_steps_per_second": 5.849, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00019191450058795683, |
| "loss": 0.6673, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_loss": 0.5483366847038269, |
| "eval_runtime": 21.352, |
| "eval_samples_per_second": 23.37, |
| "eval_steps_per_second": 5.854, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.00019154812610918501, |
| "loss": 0.6466, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_loss": 0.554151713848114, |
| "eval_runtime": 21.4045, |
| "eval_samples_per_second": 23.313, |
| "eval_steps_per_second": 5.84, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00019117399953505335, |
| "loss": 0.653, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.81, |
| "eval_loss": 0.5411431789398193, |
| "eval_runtime": 21.3349, |
| "eval_samples_per_second": 23.389, |
| "eval_steps_per_second": 5.859, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00019079215254578293, |
| "loss": 0.6384, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_loss": 0.5362362265586853, |
| "eval_runtime": 21.3477, |
| "eval_samples_per_second": 23.375, |
| "eval_steps_per_second": 5.855, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00019040261747534283, |
| "loss": 0.6287, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.84, |
| "eval_loss": 0.5452967286109924, |
| "eval_runtime": 21.3462, |
| "eval_samples_per_second": 23.377, |
| "eval_steps_per_second": 5.856, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00019000542730871197, |
| "loss": 0.661, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.86, |
| "eval_loss": 0.5644904971122742, |
| "eval_runtime": 21.3569, |
| "eval_samples_per_second": 23.365, |
| "eval_steps_per_second": 5.853, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0001896006156790861, |
| "loss": 0.608, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.88, |
| "eval_loss": 0.5245234370231628, |
| "eval_runtime": 21.3459, |
| "eval_samples_per_second": 23.377, |
| "eval_steps_per_second": 5.856, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.00018918821686502989, |
| "loss": 0.6584, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.9, |
| "eval_loss": 0.5376425385475159, |
| "eval_runtime": 21.3569, |
| "eval_samples_per_second": 23.365, |
| "eval_steps_per_second": 5.853, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.0001887682657875741, |
| "loss": 0.6416, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.91, |
| "eval_loss": 0.5471484661102295, |
| "eval_runtime": 21.3644, |
| "eval_samples_per_second": 23.357, |
| "eval_steps_per_second": 5.851, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00018834079800725872, |
| "loss": 0.6527, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.93, |
| "eval_loss": 0.5425943732261658, |
| "eval_runtime": 21.3669, |
| "eval_samples_per_second": 23.354, |
| "eval_steps_per_second": 5.85, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00018790584972112174, |
| "loss": 0.6164, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.95, |
| "eval_loss": 0.5284227728843689, |
| "eval_runtime": 21.3751, |
| "eval_samples_per_second": 23.345, |
| "eval_steps_per_second": 5.848, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00018746345775963395, |
| "loss": 0.611, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.97, |
| "eval_loss": 0.5312528014183044, |
| "eval_runtime": 21.3628, |
| "eval_samples_per_second": 23.358, |
| "eval_steps_per_second": 5.851, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00018701365958358047, |
| "loss": 0.614, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.98, |
| "eval_loss": 0.5262718796730042, |
| "eval_runtime": 21.3578, |
| "eval_samples_per_second": 23.364, |
| "eval_steps_per_second": 5.853, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.00018655649328088835, |
| "loss": 0.6382, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.5316660404205322, |
| "eval_runtime": 21.3512, |
| "eval_samples_per_second": 23.371, |
| "eval_steps_per_second": 5.854, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00018609199756340156, |
| "loss": 0.5804, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.02, |
| "eval_loss": 0.5207402110099792, |
| "eval_runtime": 21.3663, |
| "eval_samples_per_second": 23.355, |
| "eval_steps_per_second": 5.85, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.0001856202117636029, |
| "loss": 0.6291, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.04, |
| "eval_loss": 0.5237697958946228, |
| "eval_runtime": 21.3922, |
| "eval_samples_per_second": 23.326, |
| "eval_steps_per_second": 5.843, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.00018514117583128347, |
| "loss": 0.5911, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.05, |
| "eval_loss": 0.517393171787262, |
| "eval_runtime": 21.3498, |
| "eval_samples_per_second": 23.373, |
| "eval_steps_per_second": 5.855, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00018465493033015967, |
| "loss": 0.6111, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.07, |
| "eval_loss": 0.5281241536140442, |
| "eval_runtime": 21.3522, |
| "eval_samples_per_second": 23.37, |
| "eval_steps_per_second": 5.854, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0001841615164344385, |
| "loss": 0.5578, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.09, |
| "eval_loss": 0.5255175232887268, |
| "eval_runtime": 21.3637, |
| "eval_samples_per_second": 23.357, |
| "eval_steps_per_second": 5.851, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00018366097592533093, |
| "loss": 0.6055, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.11, |
| "eval_loss": 0.5177362561225891, |
| "eval_runtime": 21.3827, |
| "eval_samples_per_second": 23.337, |
| "eval_steps_per_second": 5.846, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00018315335118751396, |
| "loss": 0.6015, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.12, |
| "eval_loss": 0.5130926370620728, |
| "eval_runtime": 21.4348, |
| "eval_samples_per_second": 23.28, |
| "eval_steps_per_second": 5.832, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.0001826386852055417, |
| "loss": 0.6072, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.14, |
| "eval_loss": 0.5168054103851318, |
| "eval_runtime": 21.3583, |
| "eval_samples_per_second": 23.363, |
| "eval_steps_per_second": 5.853, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.0001821170215602053, |
| "loss": 0.5956, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.16, |
| "eval_loss": 0.5168840289115906, |
| "eval_runtime": 21.3753, |
| "eval_samples_per_second": 23.345, |
| "eval_steps_per_second": 5.848, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.0001815884044248429, |
| "loss": 0.6099, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.18, |
| "eval_loss": 0.5169732570648193, |
| "eval_runtime": 21.3472, |
| "eval_samples_per_second": 23.375, |
| "eval_steps_per_second": 5.856, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.0001810528785615989, |
| "loss": 0.6038, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.19, |
| "eval_loss": 0.5055590867996216, |
| "eval_runtime": 21.3489, |
| "eval_samples_per_second": 23.374, |
| "eval_steps_per_second": 5.855, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00018051048931763366, |
| "loss": 0.583, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.21, |
| "eval_loss": 0.5121394395828247, |
| "eval_runtime": 21.3436, |
| "eval_samples_per_second": 23.379, |
| "eval_steps_per_second": 5.857, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.0001799612826212837, |
| "loss": 0.5885, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.23, |
| "eval_loss": 0.523388683795929, |
| "eval_runtime": 21.3616, |
| "eval_samples_per_second": 23.36, |
| "eval_steps_per_second": 5.852, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00017940530497817254, |
| "loss": 0.5784, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.25, |
| "eval_loss": 0.5028228163719177, |
| "eval_runtime": 21.3469, |
| "eval_samples_per_second": 23.376, |
| "eval_steps_per_second": 5.856, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00017884260346727254, |
| "loss": 0.5744, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.26, |
| "eval_loss": 0.5100187063217163, |
| "eval_runtime": 21.3603, |
| "eval_samples_per_second": 23.361, |
| "eval_steps_per_second": 5.852, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00017827322573691872, |
| "loss": 0.6014, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_loss": 0.5038166046142578, |
| "eval_runtime": 21.3431, |
| "eval_samples_per_second": 23.38, |
| "eval_steps_per_second": 5.857, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.0001776972200007735, |
| "loss": 0.6185, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.3, |
| "eval_loss": 0.5146144032478333, |
| "eval_runtime": 21.3367, |
| "eval_samples_per_second": 23.387, |
| "eval_steps_per_second": 5.858, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00017711463503374466, |
| "loss": 0.6184, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.32, |
| "eval_loss": 0.5316588282585144, |
| "eval_runtime": 21.3617, |
| "eval_samples_per_second": 23.36, |
| "eval_steps_per_second": 5.852, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.0001765255201678546, |
| "loss": 0.6141, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.34, |
| "eval_loss": 0.5080065727233887, |
| "eval_runtime": 21.3913, |
| "eval_samples_per_second": 23.327, |
| "eval_steps_per_second": 5.844, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00017592992528806352, |
| "loss": 0.6146, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.35, |
| "eval_loss": 0.5165488719940186, |
| "eval_runtime": 21.3517, |
| "eval_samples_per_second": 23.371, |
| "eval_steps_per_second": 5.854, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.0001753279008280449, |
| "loss": 0.5721, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.37, |
| "eval_loss": 0.5040128231048584, |
| "eval_runtime": 21.3467, |
| "eval_samples_per_second": 23.376, |
| "eval_steps_per_second": 5.856, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00017471949776591504, |
| "loss": 0.5931, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.39, |
| "eval_loss": 0.49337631464004517, |
| "eval_runtime": 21.3473, |
| "eval_samples_per_second": 23.375, |
| "eval_steps_per_second": 5.856, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00017410476761991643, |
| "loss": 0.5944, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.41, |
| "eval_loss": 0.487575501203537, |
| "eval_runtime": 21.3451, |
| "eval_samples_per_second": 23.378, |
| "eval_steps_per_second": 5.856, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00017348376244405512, |
| "loss": 0.6002, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.42, |
| "eval_loss": 0.4929651618003845, |
| "eval_runtime": 21.3604, |
| "eval_samples_per_second": 23.361, |
| "eval_steps_per_second": 5.852, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.000172856534823693, |
| "loss": 0.5557, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.44, |
| "eval_loss": 0.4913093149662018, |
| "eval_runtime": 21.3626, |
| "eval_samples_per_second": 23.359, |
| "eval_steps_per_second": 5.851, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00017222313787109496, |
| "loss": 0.58, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.46, |
| "eval_loss": 0.4909800887107849, |
| "eval_runtime": 21.4163, |
| "eval_samples_per_second": 23.3, |
| "eval_steps_per_second": 5.837, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.00017158362522093153, |
| "loss": 0.5459, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.48, |
| "eval_loss": 0.4883653223514557, |
| "eval_runtime": 21.3483, |
| "eval_samples_per_second": 23.374, |
| "eval_steps_per_second": 5.855, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.00017093805102573706, |
| "loss": 0.5871, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.49, |
| "eval_loss": 0.48601067066192627, |
| "eval_runtime": 21.3674, |
| "eval_samples_per_second": 23.353, |
| "eval_steps_per_second": 5.85, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.00017028646995132435, |
| "loss": 0.5554, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.51, |
| "eval_loss": 0.4856807291507721, |
| "eval_runtime": 21.366, |
| "eval_samples_per_second": 23.355, |
| "eval_steps_per_second": 5.85, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 0.0001696289371721556, |
| "loss": 0.5819, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.53, |
| "eval_loss": 0.4648899435997009, |
| "eval_runtime": 21.3516, |
| "eval_samples_per_second": 23.371, |
| "eval_steps_per_second": 5.854, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.00016896550836667035, |
| "loss": 0.5649, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.55, |
| "eval_loss": 0.47903972864151, |
| "eval_runtime": 21.3528, |
| "eval_samples_per_second": 23.369, |
| "eval_steps_per_second": 5.854, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.00016829623971257088, |
| "loss": 0.5779, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.56, |
| "eval_loss": 0.4807458519935608, |
| "eval_runtime": 21.377, |
| "eval_samples_per_second": 23.343, |
| "eval_steps_per_second": 5.847, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 0.00016762118788206487, |
| "loss": 0.5756, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_loss": 0.483437180519104, |
| "eval_runtime": 21.3913, |
| "eval_samples_per_second": 23.327, |
| "eval_steps_per_second": 5.843, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 0.000166940410037067, |
| "loss": 0.5563, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_loss": 0.49455228447914124, |
| "eval_runtime": 21.365, |
| "eval_samples_per_second": 23.356, |
| "eval_steps_per_second": 5.851, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 0.00016625396382435813, |
| "loss": 0.5393, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.62, |
| "eval_loss": 0.4847542643547058, |
| "eval_runtime": 21.3614, |
| "eval_samples_per_second": 23.36, |
| "eval_steps_per_second": 5.852, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 0.00016556190737070428, |
| "loss": 0.5551, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.63, |
| "eval_loss": 0.4845309257507324, |
| "eval_runtime": 21.4008, |
| "eval_samples_per_second": 23.317, |
| "eval_steps_per_second": 5.841, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 0.00016486429927793436, |
| "loss": 0.5687, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.65, |
| "eval_loss": 0.4806869626045227, |
| "eval_runtime": 21.374, |
| "eval_samples_per_second": 23.346, |
| "eval_steps_per_second": 5.848, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 0.00016416119861797796, |
| "loss": 0.5469, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.67, |
| "eval_loss": 0.4748505651950836, |
| "eval_runtime": 21.355, |
| "eval_samples_per_second": 23.367, |
| "eval_steps_per_second": 5.853, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 0.0001634526649278632, |
| "loss": 0.5771, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.69, |
| "eval_loss": 0.4859110414981842, |
| "eval_runtime": 21.3418, |
| "eval_samples_per_second": 23.381, |
| "eval_steps_per_second": 5.857, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 0.00016273875820467545, |
| "loss": 0.5689, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.7, |
| "eval_loss": 0.4734295606613159, |
| "eval_runtime": 21.3607, |
| "eval_samples_per_second": 23.361, |
| "eval_steps_per_second": 5.852, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 0.0001620195389004767, |
| "loss": 0.5741, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.72, |
| "eval_loss": 0.4881950914859772, |
| "eval_runtime": 21.3786, |
| "eval_samples_per_second": 23.341, |
| "eval_steps_per_second": 5.847, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 0.00016129506791718665, |
| "loss": 0.5643, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.74, |
| "eval_loss": 0.4815501570701599, |
| "eval_runtime": 21.3699, |
| "eval_samples_per_second": 23.351, |
| "eval_steps_per_second": 5.849, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 0.00016056540660142586, |
| "loss": 0.5603, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.76, |
| "eval_loss": 0.46760401129722595, |
| "eval_runtime": 21.354, |
| "eval_samples_per_second": 23.368, |
| "eval_steps_per_second": 5.854, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 0.0001598306167393208, |
| "loss": 0.5925, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.77, |
| "eval_loss": 0.46860912442207336, |
| "eval_runtime": 21.3958, |
| "eval_samples_per_second": 23.322, |
| "eval_steps_per_second": 5.842, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 0.00015909076055127202, |
| "loss": 0.5834, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.79, |
| "eval_loss": 0.47431066632270813, |
| "eval_runtime": 21.4034, |
| "eval_samples_per_second": 23.314, |
| "eval_steps_per_second": 5.84, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 0.00019360595357389735, |
| "loss": 0.5902, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.81, |
| "eval_loss": 0.49162757396698, |
| "eval_runtime": 38.6636, |
| "eval_samples_per_second": 12.906, |
| "eval_steps_per_second": 3.233, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 0.00019348256763960145, |
| "loss": 0.5777, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.83, |
| "eval_loss": 0.47481468319892883, |
| "eval_runtime": 41.2268, |
| "eval_samples_per_second": 12.104, |
| "eval_steps_per_second": 3.032, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 0.00019335804264972018, |
| "loss": 0.5921, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.84, |
| "eval_loss": 0.48432889580726624, |
| "eval_runtime": 30.5847, |
| "eval_samples_per_second": 16.315, |
| "eval_steps_per_second": 4.087, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 0.00019323238012155123, |
| "loss": 0.5877, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.86, |
| "eval_loss": 0.47419798374176025, |
| "eval_runtime": 37.8731, |
| "eval_samples_per_second": 13.176, |
| "eval_steps_per_second": 3.3, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 0.00019310558158625285, |
| "loss": 0.5453, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.88, |
| "eval_loss": 0.4705266058444977, |
| "eval_runtime": 22.4824, |
| "eval_samples_per_second": 22.195, |
| "eval_steps_per_second": 2.802, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 0.00019297764858882514, |
| "loss": 0.5445, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.9, |
| "eval_loss": 0.4662667214870453, |
| "eval_runtime": 31.6864, |
| "eval_samples_per_second": 15.748, |
| "eval_steps_per_second": 1.988, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 0.00019284858268809137, |
| "loss": 0.5686, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_loss": 0.47445282340049744, |
| "eval_runtime": 27.1784, |
| "eval_samples_per_second": 18.36, |
| "eval_steps_per_second": 2.318, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 0.00019271838545667876, |
| "loss": 0.5712, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.93, |
| "eval_loss": 0.48884764313697815, |
| "eval_runtime": 37.118, |
| "eval_samples_per_second": 13.444, |
| "eval_steps_per_second": 1.697, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 0.0001925870584809995, |
| "loss": 0.6032, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.95, |
| "eval_loss": 0.48608502745628357, |
| "eval_runtime": 36.4351, |
| "eval_samples_per_second": 13.696, |
| "eval_steps_per_second": 1.729, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 0.00019245460336123134, |
| "loss": 0.5491, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_loss": 0.472098708152771, |
| "eval_runtime": 22.4827, |
| "eval_samples_per_second": 22.195, |
| "eval_steps_per_second": 2.802, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 0.00019232102171129811, |
| "loss": 0.5452, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.99, |
| "eval_loss": 0.4644794762134552, |
| "eval_runtime": 27.0746, |
| "eval_samples_per_second": 18.431, |
| "eval_steps_per_second": 2.327, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 0.00019218631515885006, |
| "loss": 0.5526, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.48768851161003113, |
| "eval_runtime": 32.1887, |
| "eval_samples_per_second": 15.502, |
| "eval_steps_per_second": 1.957, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 0.00019205048534524406, |
| "loss": 0.5443, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.02, |
| "eval_loss": 0.4716103971004486, |
| "eval_runtime": 30.3462, |
| "eval_samples_per_second": 16.444, |
| "eval_steps_per_second": 2.076, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 0.00019191353392552344, |
| "loss": 0.5103, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.04, |
| "eval_loss": 0.46319034695625305, |
| "eval_runtime": 23.7207, |
| "eval_samples_per_second": 21.037, |
| "eval_steps_per_second": 2.656, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 0.00019177546256839812, |
| "loss": 0.5202, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.06, |
| "eval_loss": 0.4802156984806061, |
| "eval_runtime": 24.1547, |
| "eval_samples_per_second": 20.658, |
| "eval_steps_per_second": 2.608, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 0.00019163627295622397, |
| "loss": 0.5436, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.07, |
| "eval_loss": 0.4681110680103302, |
| "eval_runtime": 26.9736, |
| "eval_samples_per_second": 18.5, |
| "eval_steps_per_second": 2.336, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 0.0001914959667849825, |
| "loss": 0.5454, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.09, |
| "eval_loss": 0.470931738615036, |
| "eval_runtime": 36.9982, |
| "eval_samples_per_second": 13.487, |
| "eval_steps_per_second": 1.703, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 0.0001913545457642601, |
| "loss": 0.5183, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.11, |
| "eval_loss": 0.47423675656318665, |
| "eval_runtime": 33.685, |
| "eval_samples_per_second": 14.814, |
| "eval_steps_per_second": 1.87, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.11, |
| "step": 1201, |
| "total_flos": 3.450559153050747e+17, |
| "train_loss": 0.000492346822768822, |
| "train_runtime": 4.9952, |
| "train_samples_per_second": 3843.723, |
| "train_steps_per_second": 240.233 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1200, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 10, |
| "total_flos": 3.450559153050747e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|