| { |
| "best_metric": 0.5067562460899353, |
| "best_model_checkpoint": "/scratch/mriyadh/llama_omni_asr_tts/exp/omni_stage_two_full/checkpoint-300", |
| "epoch": 59.171597633136095, |
| "eval_steps": 300, |
| "global_step": 20000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.8875739644970414, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.6641, |
| "eval_samples_per_second": 36.821, |
| "eval_steps_per_second": 2.318, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.4792899408284024, |
| "grad_norm": 86.5, |
| "learning_rate": 0.0001666666666666667, |
| "loss": 11.072, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.7751479289940828, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8455, |
| "eval_samples_per_second": 36.64, |
| "eval_steps_per_second": 2.307, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.662721893491124, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.7706, |
| "eval_samples_per_second": 36.714, |
| "eval_steps_per_second": 2.312, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.9585798816568047, |
| "grad_norm": 32.75, |
| "learning_rate": 0.00019979028262377118, |
| "loss": 5.0622, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.5502958579881656, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.9679, |
| "eval_samples_per_second": 36.518, |
| "eval_steps_per_second": 2.299, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.437869822485207, |
| "grad_norm": 31.0, |
| "learning_rate": 0.00019893981312363562, |
| "loss": 3.234, |
| "step": 1500 |
| }, |
| { |
| "epoch": 4.437869822485207, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.907, |
| "eval_samples_per_second": 36.578, |
| "eval_steps_per_second": 2.303, |
| "step": 1500 |
| }, |
| { |
| "epoch": 5.325443786982248, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8765, |
| "eval_samples_per_second": 36.609, |
| "eval_steps_per_second": 2.305, |
| "step": 1800 |
| }, |
| { |
| "epoch": 5.9171597633136095, |
| "grad_norm": 6.96875, |
| "learning_rate": 0.00019744105246469263, |
| "loss": 2.5031, |
| "step": 2000 |
| }, |
| { |
| "epoch": 6.21301775147929, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.7905, |
| "eval_samples_per_second": 36.694, |
| "eval_steps_per_second": 2.31, |
| "step": 2100 |
| }, |
| { |
| "epoch": 7.100591715976331, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8163, |
| "eval_samples_per_second": 36.669, |
| "eval_steps_per_second": 2.309, |
| "step": 2400 |
| }, |
| { |
| "epoch": 7.396449704142012, |
| "grad_norm": 6.5625, |
| "learning_rate": 0.0001953038210948861, |
| "loss": 2.0242, |
| "step": 2500 |
| }, |
| { |
| "epoch": 7.988165680473373, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.9078, |
| "eval_samples_per_second": 36.578, |
| "eval_steps_per_second": 2.303, |
| "step": 2700 |
| }, |
| { |
| "epoch": 8.875739644970414, |
| "grad_norm": 10.875, |
| "learning_rate": 0.00019254212296427044, |
| "loss": 1.7989, |
| "step": 3000 |
| }, |
| { |
| "epoch": 8.875739644970414, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.9591, |
| "eval_samples_per_second": 36.527, |
| "eval_steps_per_second": 2.3, |
| "step": 3000 |
| }, |
| { |
| "epoch": 9.763313609467456, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8178, |
| "eval_samples_per_second": 36.667, |
| "eval_steps_per_second": 2.309, |
| "step": 3300 |
| }, |
| { |
| "epoch": 10.355029585798816, |
| "grad_norm": 9.375, |
| "learning_rate": 0.00018917405376582145, |
| "loss": 1.5521, |
| "step": 3500 |
| }, |
| { |
| "epoch": 10.650887573964496, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.5472, |
| "eval_samples_per_second": 36.939, |
| "eval_steps_per_second": 2.326, |
| "step": 3600 |
| }, |
| { |
| "epoch": 11.538461538461538, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.7909, |
| "eval_samples_per_second": 36.694, |
| "eval_steps_per_second": 2.31, |
| "step": 3900 |
| }, |
| { |
| "epoch": 11.834319526627219, |
| "grad_norm": 5.875, |
| "learning_rate": 0.00018522168236559695, |
| "loss": 1.3787, |
| "step": 4000 |
| }, |
| { |
| "epoch": 12.42603550295858, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.9259, |
| "eval_samples_per_second": 36.56, |
| "eval_steps_per_second": 2.302, |
| "step": 4200 |
| }, |
| { |
| "epoch": 13.31360946745562, |
| "grad_norm": 4.5625, |
| "learning_rate": 0.00018071090619916093, |
| "loss": 1.2505, |
| "step": 4500 |
| }, |
| { |
| "epoch": 13.31360946745562, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8178, |
| "eval_samples_per_second": 36.667, |
| "eval_steps_per_second": 2.309, |
| "step": 4500 |
| }, |
| { |
| "epoch": 14.201183431952662, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8898, |
| "eval_samples_per_second": 36.596, |
| "eval_steps_per_second": 2.304, |
| "step": 4800 |
| }, |
| { |
| "epoch": 14.792899408284024, |
| "grad_norm": 3.921875, |
| "learning_rate": 0.00017567128158176953, |
| "loss": 1.1568, |
| "step": 5000 |
| }, |
| { |
| "epoch": 15.088757396449704, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.6043, |
| "eval_samples_per_second": 36.881, |
| "eval_steps_per_second": 2.322, |
| "step": 5100 |
| }, |
| { |
| "epoch": 15.976331360946746, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8561, |
| "eval_samples_per_second": 36.629, |
| "eval_steps_per_second": 2.306, |
| "step": 5400 |
| }, |
| { |
| "epoch": 16.272189349112427, |
| "grad_norm": 2.984375, |
| "learning_rate": 0.00017013583004418993, |
| "loss": 1.0789, |
| "step": 5500 |
| }, |
| { |
| "epoch": 16.86390532544379, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.913, |
| "eval_samples_per_second": 36.572, |
| "eval_steps_per_second": 2.303, |
| "step": 5700 |
| }, |
| { |
| "epoch": 17.75147928994083, |
| "grad_norm": 3.53125, |
| "learning_rate": 0.000164140821963114, |
| "loss": 0.9769, |
| "step": 6000 |
| }, |
| { |
| "epoch": 17.75147928994083, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.802, |
| "eval_samples_per_second": 36.683, |
| "eval_steps_per_second": 2.31, |
| "step": 6000 |
| }, |
| { |
| "epoch": 18.63905325443787, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.9543, |
| "eval_samples_per_second": 36.532, |
| "eval_steps_per_second": 2.3, |
| "step": 6300 |
| }, |
| { |
| "epoch": 19.23076923076923, |
| "grad_norm": 3.046875, |
| "learning_rate": 0.00015772553890390197, |
| "loss": 0.913, |
| "step": 6500 |
| }, |
| { |
| "epoch": 19.526627218934912, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 37.0561, |
| "eval_samples_per_second": 36.431, |
| "eval_steps_per_second": 2.294, |
| "step": 6600 |
| }, |
| { |
| "epoch": 20.414201183431953, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.9, |
| "eval_samples_per_second": 36.585, |
| "eval_steps_per_second": 2.304, |
| "step": 6900 |
| }, |
| { |
| "epoch": 20.71005917159763, |
| "grad_norm": 3.1875, |
| "learning_rate": 0.00015093201623287631, |
| "loss": 0.8395, |
| "step": 7000 |
| }, |
| { |
| "epoch": 21.301775147928993, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8939, |
| "eval_samples_per_second": 36.591, |
| "eval_steps_per_second": 2.304, |
| "step": 7200 |
| }, |
| { |
| "epoch": 22.189349112426036, |
| "grad_norm": 2.765625, |
| "learning_rate": 0.00014380476768566824, |
| "loss": 0.7613, |
| "step": 7500 |
| }, |
| { |
| "epoch": 22.189349112426036, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 37.0429, |
| "eval_samples_per_second": 36.444, |
| "eval_steps_per_second": 2.295, |
| "step": 7500 |
| }, |
| { |
| "epoch": 23.076923076923077, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8494, |
| "eval_samples_per_second": 36.636, |
| "eval_steps_per_second": 2.307, |
| "step": 7800 |
| }, |
| { |
| "epoch": 23.668639053254438, |
| "grad_norm": 3.359375, |
| "learning_rate": 0.00013639049369634876, |
| "loss": 0.7133, |
| "step": 8000 |
| }, |
| { |
| "epoch": 23.964497041420117, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.6719, |
| "eval_samples_per_second": 36.813, |
| "eval_steps_per_second": 2.318, |
| "step": 8100 |
| }, |
| { |
| "epoch": 24.85207100591716, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8698, |
| "eval_samples_per_second": 36.615, |
| "eval_steps_per_second": 2.305, |
| "step": 8400 |
| }, |
| { |
| "epoch": 25.14792899408284, |
| "grad_norm": 4.1875, |
| "learning_rate": 0.00012873777539848283, |
| "loss": 0.6546, |
| "step": 8500 |
| }, |
| { |
| "epoch": 25.7396449704142, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.693, |
| "eval_samples_per_second": 36.792, |
| "eval_steps_per_second": 2.317, |
| "step": 8700 |
| }, |
| { |
| "epoch": 26.62721893491124, |
| "grad_norm": 2.40625, |
| "learning_rate": 0.00012089675630312754, |
| "loss": 0.5857, |
| "step": 9000 |
| }, |
| { |
| "epoch": 26.62721893491124, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.5163, |
| "eval_samples_per_second": 36.97, |
| "eval_steps_per_second": 2.328, |
| "step": 9000 |
| }, |
| { |
| "epoch": 27.514792899408285, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.9675, |
| "eval_samples_per_second": 36.519, |
| "eval_steps_per_second": 2.299, |
| "step": 9300 |
| }, |
| { |
| "epoch": 28.106508875739646, |
| "grad_norm": 2.265625, |
| "learning_rate": 0.00011291881373954065, |
| "loss": 0.5462, |
| "step": 9500 |
| }, |
| { |
| "epoch": 28.402366863905325, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8575, |
| "eval_samples_per_second": 36.628, |
| "eval_steps_per_second": 2.306, |
| "step": 9600 |
| }, |
| { |
| "epoch": 29.28994082840237, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.7695, |
| "eval_samples_per_second": 36.715, |
| "eval_steps_per_second": 2.312, |
| "step": 9900 |
| }, |
| { |
| "epoch": 29.585798816568047, |
| "grad_norm": 1.921875, |
| "learning_rate": 0.00010485622221144484, |
| "loss": 0.4881, |
| "step": 10000 |
| }, |
| { |
| "epoch": 30.17751479289941, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8382, |
| "eval_samples_per_second": 36.647, |
| "eval_steps_per_second": 2.307, |
| "step": 10200 |
| }, |
| { |
| "epoch": 31.06508875739645, |
| "grad_norm": 2.140625, |
| "learning_rate": 9.676181087466444e-05, |
| "loss": 0.4573, |
| "step": 10500 |
| }, |
| { |
| "epoch": 31.06508875739645, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8178, |
| "eval_samples_per_second": 36.667, |
| "eval_steps_per_second": 2.309, |
| "step": 10500 |
| }, |
| { |
| "epoch": 31.952662721893493, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8459, |
| "eval_samples_per_second": 36.639, |
| "eval_steps_per_second": 2.307, |
| "step": 10800 |
| }, |
| { |
| "epoch": 32.544378698224854, |
| "grad_norm": 1.953125, |
| "learning_rate": 8.868861738047158e-05, |
| "loss": 0.4072, |
| "step": 11000 |
| }, |
| { |
| "epoch": 32.84023668639053, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.9905, |
| "eval_samples_per_second": 36.496, |
| "eval_steps_per_second": 2.298, |
| "step": 11100 |
| }, |
| { |
| "epoch": 33.72781065088758, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 37.2072, |
| "eval_samples_per_second": 36.283, |
| "eval_steps_per_second": 2.285, |
| "step": 11400 |
| }, |
| { |
| "epoch": 34.023668639053255, |
| "grad_norm": 2.1875, |
| "learning_rate": 8.068954035279121e-05, |
| "loss": 0.3735, |
| "step": 11500 |
| }, |
| { |
| "epoch": 34.61538461538461, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8893, |
| "eval_samples_per_second": 36.596, |
| "eval_steps_per_second": 2.304, |
| "step": 11700 |
| }, |
| { |
| "epoch": 35.50295857988166, |
| "grad_norm": 1.8046875, |
| "learning_rate": 7.281699277636572e-05, |
| "loss": 0.3356, |
| "step": 12000 |
| }, |
| { |
| "epoch": 35.50295857988166, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8597, |
| "eval_samples_per_second": 36.625, |
| "eval_steps_per_second": 2.306, |
| "step": 12000 |
| }, |
| { |
| "epoch": 36.3905325443787, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 37.126, |
| "eval_samples_per_second": 36.363, |
| "eval_steps_per_second": 2.289, |
| "step": 12300 |
| }, |
| { |
| "epoch": 36.98224852071006, |
| "grad_norm": 1.7578125, |
| "learning_rate": 6.512255856701177e-05, |
| "loss": 0.3124, |
| "step": 12500 |
| }, |
| { |
| "epoch": 37.27810650887574, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.7155, |
| "eval_samples_per_second": 36.769, |
| "eval_steps_per_second": 2.315, |
| "step": 12600 |
| }, |
| { |
| "epoch": 38.16568047337278, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 37.3255, |
| "eval_samples_per_second": 36.168, |
| "eval_steps_per_second": 2.277, |
| "step": 12900 |
| }, |
| { |
| "epoch": 38.46153846153846, |
| "grad_norm": 1.25, |
| "learning_rate": 5.765665457425102e-05, |
| "loss": 0.2847, |
| "step": 13000 |
| }, |
| { |
| "epoch": 39.053254437869825, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8115, |
| "eval_samples_per_second": 36.673, |
| "eval_steps_per_second": 2.309, |
| "step": 13200 |
| }, |
| { |
| "epoch": 39.94082840236686, |
| "grad_norm": 1.90625, |
| "learning_rate": 5.0468200231001286e-05, |
| "loss": 0.2682, |
| "step": 13500 |
| }, |
| { |
| "epoch": 39.94082840236686, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8329, |
| "eval_samples_per_second": 36.652, |
| "eval_steps_per_second": 2.308, |
| "step": 13500 |
| }, |
| { |
| "epoch": 40.828402366863905, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.9982, |
| "eval_samples_per_second": 36.488, |
| "eval_steps_per_second": 2.297, |
| "step": 13800 |
| }, |
| { |
| "epoch": 41.42011834319526, |
| "grad_norm": 0.79296875, |
| "learning_rate": 4.360429701490934e-05, |
| "loss": 0.2514, |
| "step": 14000 |
| }, |
| { |
| "epoch": 41.71597633136095, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.9527, |
| "eval_samples_per_second": 36.533, |
| "eval_steps_per_second": 2.3, |
| "step": 14100 |
| }, |
| { |
| "epoch": 42.603550295857985, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8048, |
| "eval_samples_per_second": 36.68, |
| "eval_steps_per_second": 2.309, |
| "step": 14400 |
| }, |
| { |
| "epoch": 42.89940828402367, |
| "grad_norm": 0.9921875, |
| "learning_rate": 3.710991982161555e-05, |
| "loss": 0.2398, |
| "step": 14500 |
| }, |
| { |
| "epoch": 43.49112426035503, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.6719, |
| "eval_samples_per_second": 36.813, |
| "eval_steps_per_second": 2.318, |
| "step": 14700 |
| }, |
| { |
| "epoch": 44.37869822485207, |
| "grad_norm": 0.65234375, |
| "learning_rate": 3.102762227218957e-05, |
| "loss": 0.2324, |
| "step": 15000 |
| }, |
| { |
| "epoch": 44.37869822485207, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.9077, |
| "eval_samples_per_second": 36.578, |
| "eval_steps_per_second": 2.303, |
| "step": 15000 |
| }, |
| { |
| "epoch": 45.26627218934911, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.9609, |
| "eval_samples_per_second": 36.525, |
| "eval_steps_per_second": 2.3, |
| "step": 15300 |
| }, |
| { |
| "epoch": 45.857988165680474, |
| "grad_norm": 0.6796875, |
| "learning_rate": 2.5397257885675397e-05, |
| "loss": 0.2276, |
| "step": 15500 |
| }, |
| { |
| "epoch": 46.15384615384615, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.9434, |
| "eval_samples_per_second": 36.542, |
| "eval_steps_per_second": 2.301, |
| "step": 15600 |
| }, |
| { |
| "epoch": 47.0414201183432, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 37.1485, |
| "eval_samples_per_second": 36.341, |
| "eval_steps_per_second": 2.288, |
| "step": 15900 |
| }, |
| { |
| "epoch": 47.337278106508876, |
| "grad_norm": 0.73046875, |
| "learning_rate": 2.025571894372794e-05, |
| "loss": 0.2244, |
| "step": 16000 |
| }, |
| { |
| "epoch": 47.928994082840234, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 37.0304, |
| "eval_samples_per_second": 36.457, |
| "eval_steps_per_second": 2.295, |
| "step": 16200 |
| }, |
| { |
| "epoch": 48.81656804733728, |
| "grad_norm": 0.66796875, |
| "learning_rate": 1.563669475839956e-05, |
| "loss": 0.2242, |
| "step": 16500 |
| }, |
| { |
| "epoch": 48.81656804733728, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8527, |
| "eval_samples_per_second": 36.632, |
| "eval_steps_per_second": 2.306, |
| "step": 16500 |
| }, |
| { |
| "epoch": 49.70414201183432, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 37.017, |
| "eval_samples_per_second": 36.47, |
| "eval_steps_per_second": 2.296, |
| "step": 16800 |
| }, |
| { |
| "epoch": 50.29585798816568, |
| "grad_norm": 0.71875, |
| "learning_rate": 1.1570450926997655e-05, |
| "loss": 0.222, |
| "step": 17000 |
| }, |
| { |
| "epoch": 50.59171597633136, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 37.0005, |
| "eval_samples_per_second": 36.486, |
| "eval_steps_per_second": 2.297, |
| "step": 17100 |
| }, |
| { |
| "epoch": 51.4792899408284, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 37.0483, |
| "eval_samples_per_second": 36.439, |
| "eval_steps_per_second": 2.294, |
| "step": 17400 |
| }, |
| { |
| "epoch": 51.77514792899408, |
| "grad_norm": 0.73046875, |
| "learning_rate": 8.083631020418791e-06, |
| "loss": 0.2219, |
| "step": 17500 |
| }, |
| { |
| "epoch": 52.366863905325445, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8489, |
| "eval_samples_per_second": 36.636, |
| "eval_steps_per_second": 2.307, |
| "step": 17700 |
| }, |
| { |
| "epoch": 53.25443786982248, |
| "grad_norm": 0.99609375, |
| "learning_rate": 5.199082004372957e-06, |
| "loss": 0.2215, |
| "step": 18000 |
| }, |
| { |
| "epoch": 53.25443786982248, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8278, |
| "eval_samples_per_second": 36.657, |
| "eval_steps_per_second": 2.308, |
| "step": 18000 |
| }, |
| { |
| "epoch": 54.142011834319526, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8191, |
| "eval_samples_per_second": 36.666, |
| "eval_steps_per_second": 2.309, |
| "step": 18300 |
| }, |
| { |
| "epoch": 54.73372781065089, |
| "grad_norm": 1.1328125, |
| "learning_rate": 2.9357045374040825e-06, |
| "loss": 0.2224, |
| "step": 18500 |
| }, |
| { |
| "epoch": 55.02958579881657, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.9293, |
| "eval_samples_per_second": 36.556, |
| "eval_steps_per_second": 2.302, |
| "step": 18600 |
| }, |
| { |
| "epoch": 55.917159763313606, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8842, |
| "eval_samples_per_second": 36.601, |
| "eval_steps_per_second": 2.305, |
| "step": 18900 |
| }, |
| { |
| "epoch": 56.21301775147929, |
| "grad_norm": 0.7421875, |
| "learning_rate": 1.30832912661093e-06, |
| "loss": 0.2211, |
| "step": 19000 |
| }, |
| { |
| "epoch": 56.80473372781065, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.681, |
| "eval_samples_per_second": 36.804, |
| "eval_steps_per_second": 2.317, |
| "step": 19200 |
| }, |
| { |
| "epoch": 57.69230769230769, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.2761895254306287e-07, |
| "loss": 0.2212, |
| "step": 19500 |
| }, |
| { |
| "epoch": 57.69230769230769, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.6389, |
| "eval_samples_per_second": 36.846, |
| "eval_steps_per_second": 2.32, |
| "step": 19500 |
| }, |
| { |
| "epoch": 58.57988165680474, |
| "eval_loss": 0.5067562460899353, |
| "eval_runtime": 36.8606, |
| "eval_samples_per_second": 36.624, |
| "eval_steps_per_second": 2.306, |
| "step": 19800 |
| }, |
| { |
| "epoch": 59.171597633136095, |
| "grad_norm": 0.78515625, |
| "learning_rate": 0.0, |
| "loss": 0.2214, |
| "step": 20000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 60, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.745192353814282e+18, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|