| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9761904761904763, |
| "eval_steps": 50, |
| "global_step": 750, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0992063492063492, |
| "grad_norm": 2.4192488193511963, |
| "learning_rate": 4.7619047619047615e-06, |
| "loss": 1.3237, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1984126984126984, |
| "grad_norm": 0.8920113444328308, |
| "learning_rate": 9.722222222222223e-06, |
| "loss": 0.6721, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1984126984126984, |
| "eval_loss": 0.48523738980293274, |
| "eval_runtime": 17.2583, |
| "eval_samples_per_second": 4.867, |
| "eval_steps_per_second": 2.434, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2976190476190476, |
| "grad_norm": 0.5771058797836304, |
| "learning_rate": 1.4682539682539683e-05, |
| "loss": 0.3509, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.3968253968253968, |
| "grad_norm": 0.5407119393348694, |
| "learning_rate": 1.9642857142857145e-05, |
| "loss": 0.2735, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3968253968253968, |
| "eval_loss": 0.28276199102401733, |
| "eval_runtime": 17.2337, |
| "eval_samples_per_second": 4.874, |
| "eval_steps_per_second": 2.437, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.49603174603174605, |
| "grad_norm": 0.496040016412735, |
| "learning_rate": 2.4603174603174602e-05, |
| "loss": 0.2463, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5952380952380952, |
| "grad_norm": 0.4996989667415619, |
| "learning_rate": 2.9563492063492066e-05, |
| "loss": 0.2279, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5952380952380952, |
| "eval_loss": 0.24540141224861145, |
| "eval_runtime": 17.2092, |
| "eval_samples_per_second": 4.881, |
| "eval_steps_per_second": 2.441, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6944444444444444, |
| "grad_norm": 0.5580428838729858, |
| "learning_rate": 3.4523809523809526e-05, |
| "loss": 0.214, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.7936507936507936, |
| "grad_norm": 0.4554564356803894, |
| "learning_rate": 3.9484126984126986e-05, |
| "loss": 0.2046, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7936507936507936, |
| "eval_loss": 0.22907117009162903, |
| "eval_runtime": 17.3315, |
| "eval_samples_per_second": 4.847, |
| "eval_steps_per_second": 2.423, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8928571428571429, |
| "grad_norm": 0.5286790132522583, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 0.1943, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.9920634920634921, |
| "grad_norm": 0.4137091040611267, |
| "learning_rate": 4.940476190476191e-05, |
| "loss": 0.1885, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9920634920634921, |
| "eval_loss": 0.20971575379371643, |
| "eval_runtime": 17.2857, |
| "eval_samples_per_second": 4.859, |
| "eval_steps_per_second": 2.43, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.0912698412698412, |
| "grad_norm": 0.4036062955856323, |
| "learning_rate": 5.436507936507936e-05, |
| "loss": 0.1656, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.1904761904761905, |
| "grad_norm": 0.43942078948020935, |
| "learning_rate": 5.932539682539683e-05, |
| "loss": 0.1668, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.1904761904761905, |
| "eval_loss": 0.19860731065273285, |
| "eval_runtime": 17.4085, |
| "eval_samples_per_second": 4.825, |
| "eval_steps_per_second": 2.413, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.2896825396825398, |
| "grad_norm": 0.38786229491233826, |
| "learning_rate": 6.428571428571429e-05, |
| "loss": 0.1575, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.3888888888888888, |
| "grad_norm": 0.3493235409259796, |
| "learning_rate": 6.924603174603174e-05, |
| "loss": 0.1619, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.3888888888888888, |
| "eval_loss": 0.19389225542545319, |
| "eval_runtime": 17.2223, |
| "eval_samples_per_second": 4.877, |
| "eval_steps_per_second": 2.439, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.4880952380952381, |
| "grad_norm": 0.39436835050582886, |
| "learning_rate": 7.420634920634921e-05, |
| "loss": 0.1524, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.5873015873015874, |
| "grad_norm": 0.3463725745677948, |
| "learning_rate": 7.916666666666666e-05, |
| "loss": 0.1482, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.5873015873015874, |
| "eval_loss": 0.185992032289505, |
| "eval_runtime": 17.0685, |
| "eval_samples_per_second": 4.921, |
| "eval_steps_per_second": 2.461, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.6865079365079365, |
| "grad_norm": 0.3713228702545166, |
| "learning_rate": 8.412698412698413e-05, |
| "loss": 0.149, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.7857142857142856, |
| "grad_norm": 0.30970633029937744, |
| "learning_rate": 8.90873015873016e-05, |
| "loss": 0.1486, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.7857142857142856, |
| "eval_loss": 0.17747902870178223, |
| "eval_runtime": 17.1226, |
| "eval_samples_per_second": 4.906, |
| "eval_steps_per_second": 2.453, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.8849206349206349, |
| "grad_norm": 0.3172999918460846, |
| "learning_rate": 9.404761904761905e-05, |
| "loss": 0.147, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.9841269841269842, |
| "grad_norm": 0.3906099498271942, |
| "learning_rate": 9.900793650793652e-05, |
| "loss": 0.1427, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.9841269841269842, |
| "eval_loss": 0.17465326189994812, |
| "eval_runtime": 17.3304, |
| "eval_samples_per_second": 4.847, |
| "eval_steps_per_second": 2.423, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.0833333333333335, |
| "grad_norm": 0.402475506067276, |
| "learning_rate": 9.999520325413887e-05, |
| "loss": 0.1208, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.1825396825396823, |
| "grad_norm": 0.37534043192863464, |
| "learning_rate": 9.997571805142639e-05, |
| "loss": 0.1163, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.1825396825396823, |
| "eval_loss": 0.1724633127450943, |
| "eval_runtime": 17.1218, |
| "eval_samples_per_second": 4.906, |
| "eval_steps_per_second": 2.453, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.2817460317460316, |
| "grad_norm": 0.30866020917892456, |
| "learning_rate": 9.994125043229752e-05, |
| "loss": 0.1104, |
| "step": 575 |
| }, |
| { |
| "epoch": 2.380952380952381, |
| "grad_norm": 0.3464547395706177, |
| "learning_rate": 9.989181072993494e-05, |
| "loss": 0.1111, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.380952380952381, |
| "eval_loss": 0.16887226700782776, |
| "eval_runtime": 17.1039, |
| "eval_samples_per_second": 4.911, |
| "eval_steps_per_second": 2.456, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.4801587301587302, |
| "grad_norm": 0.3208065927028656, |
| "learning_rate": 9.982741376606078e-05, |
| "loss": 0.1153, |
| "step": 625 |
| }, |
| { |
| "epoch": 2.5793650793650795, |
| "grad_norm": 0.2699253559112549, |
| "learning_rate": 9.97480788464933e-05, |
| "loss": 0.106, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.5793650793650795, |
| "eval_loss": 0.16333948075771332, |
| "eval_runtime": 17.2826, |
| "eval_samples_per_second": 4.86, |
| "eval_steps_per_second": 2.43, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.678571428571429, |
| "grad_norm": 0.2834033668041229, |
| "learning_rate": 9.965382975535902e-05, |
| "loss": 0.1121, |
| "step": 675 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "grad_norm": 0.2606027126312256, |
| "learning_rate": 9.954469474796241e-05, |
| "loss": 0.1127, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "eval_loss": 0.1618286520242691, |
| "eval_runtime": 17.132, |
| "eval_samples_per_second": 4.903, |
| "eval_steps_per_second": 2.452, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.876984126984127, |
| "grad_norm": 0.30125725269317627, |
| "learning_rate": 9.942070654231517e-05, |
| "loss": 0.1044, |
| "step": 725 |
| }, |
| { |
| "epoch": 2.9761904761904763, |
| "grad_norm": 0.29861846566200256, |
| "learning_rate": 9.928190230932746e-05, |
| "loss": 0.1136, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.9761904761904763, |
| "eval_loss": 0.1563536822795868, |
| "eval_runtime": 17.1692, |
| "eval_samples_per_second": 4.892, |
| "eval_steps_per_second": 2.446, |
| "step": 750 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 5040, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.9601356198961e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|