| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.603550295857988, |
| "eval_steps": 5, |
| "global_step": 110, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.11834319526627218, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 2.6428, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.11834319526627218, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.5284, |
| "eval_samples_per_second": 0.606, |
| "eval_steps_per_second": 0.153, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.23668639053254437, |
| "grad_norm": 0.0, |
| "learning_rate": 1.992981096013517e-05, |
| "loss": 2.6411, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.23668639053254437, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.7203, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.35502958579881655, |
| "grad_norm": 0.0, |
| "learning_rate": 1.964635581908359e-05, |
| "loss": 2.6552, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.35502958579881655, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.7242, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.47337278106508873, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9151456172430186e-05, |
| "loss": 2.6293, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.47337278106508873, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.967, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.591715976331361, |
| "grad_norm": 0.0, |
| "learning_rate": 1.845596003501826e-05, |
| "loss": 2.6225, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.591715976331361, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 268.4624, |
| "eval_samples_per_second": 0.603, |
| "eval_steps_per_second": 0.153, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.7100591715976331, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7575112421616203e-05, |
| "loss": 2.6365, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.7100591715976331, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.8379, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8284023668639053, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6528221181905217e-05, |
| "loss": 2.6304, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.8284023668639053, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.5669, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.9467455621301775, |
| "grad_norm": 0.0, |
| "learning_rate": 1.533823377964791e-05, |
| "loss": 2.6426, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.9467455621301775, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.4996, |
| "eval_samples_per_second": 0.606, |
| "eval_steps_per_second": 0.153, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.0650887573964498, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4031234292879726e-05, |
| "loss": 2.6404, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.0650887573964498, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.7725, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.183431952662722, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2635871660690677e-05, |
| "loss": 2.6214, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.183431952662722, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.7285, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.301775147928994, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1182731709213658e-05, |
| "loss": 2.636, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.301775147928994, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.906, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.4201183431952662, |
| "grad_norm": 0.0, |
| "learning_rate": 9.703666721774403e-06, |
| "loss": 2.6574, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.4201183431952662, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.4678, |
| "eval_samples_per_second": 0.606, |
| "eval_steps_per_second": 0.153, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 8.231097248774273e-06, |
| "loss": 2.6211, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.8967, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.6568047337278107, |
| "grad_norm": 0.0, |
| "learning_rate": 6.797301461371626e-06, |
| "loss": 2.6171, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.6568047337278107, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.422, |
| "eval_samples_per_second": 0.606, |
| "eval_steps_per_second": 0.153, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.7751479289940828, |
| "grad_norm": 0.0, |
| "learning_rate": 5.43370762606287e-06, |
| "loss": 2.6375, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.7751479289940828, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 268.3368, |
| "eval_samples_per_second": 0.604, |
| "eval_steps_per_second": 0.153, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.893491124260355, |
| "grad_norm": 0.0, |
| "learning_rate": 4.170205208855281e-06, |
| "loss": 2.6548, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.893491124260355, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.6946, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.0118343195266273, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0344897093700333e-06, |
| "loss": 2.6684, |
| "step": 85 |
| }, |
| { |
| "epoch": 2.0118343195266273, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.6543, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 85 |
| }, |
| { |
| "epoch": 2.1301775147928996, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0514555858664663e-06, |
| "loss": 2.6305, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.1301775147928996, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.8601, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.2485207100591715, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2426505780436326e-06, |
| "loss": 2.6421, |
| "step": 95 |
| }, |
| { |
| "epoch": 2.2485207100591715, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.7705, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 95 |
| }, |
| { |
| "epoch": 2.366863905325444, |
| "grad_norm": 0.0, |
| "learning_rate": 6.258033886587911e-07, |
| "loss": 2.6469, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.366863905325444, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.5145, |
| "eval_samples_per_second": 0.606, |
| "eval_steps_per_second": 0.153, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.485207100591716, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1443507700495968e-07, |
| "loss": 2.6275, |
| "step": 105 |
| }, |
| { |
| "epoch": 2.485207100591716, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.7646, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 105 |
| }, |
| { |
| "epoch": 2.603550295857988, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7562682356786488e-08, |
| "loss": 2.6352, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.603550295857988, |
| "eval_loss": 2.5045294761657715, |
| "eval_runtime": 267.6496, |
| "eval_samples_per_second": 0.605, |
| "eval_steps_per_second": 0.153, |
| "step": 110 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 112, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 5, |
| "total_flos": 5.736198700007424e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|