| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9878260869565216, |
| "eval_steps": 500, |
| "global_step": 213, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06956521739130435, |
| "grad_norm": 1.1811336278915405, |
| "learning_rate": 4.99565044765711e-05, |
| "loss": 0.7566, |
| "num_input_tokens_seen": 137920, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.1391304347826087, |
| "grad_norm": 0.6466936469078064, |
| "learning_rate": 4.978006327248537e-05, |
| "loss": 0.6594, |
| "num_input_tokens_seen": 271824, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.20869565217391303, |
| "grad_norm": 0.7790480852127075, |
| "learning_rate": 4.946891632198452e-05, |
| "loss": 0.6611, |
| "num_input_tokens_seen": 411200, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.2782608695652174, |
| "grad_norm": 0.6762462258338928, |
| "learning_rate": 4.9024755036496795e-05, |
| "loss": 0.6368, |
| "num_input_tokens_seen": 544816, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 0.5914238095283508, |
| "learning_rate": 4.8449993900474187e-05, |
| "loss": 0.6475, |
| "num_input_tokens_seen": 682528, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.41739130434782606, |
| "grad_norm": 0.7006980776786804, |
| "learning_rate": 4.774775734612604e-05, |
| "loss": 0.6333, |
| "num_input_tokens_seen": 813456, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.48695652173913045, |
| "grad_norm": 0.6066564321517944, |
| "learning_rate": 4.6921862768838855e-05, |
| "loss": 0.581, |
| "num_input_tokens_seen": 949040, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.5565217391304348, |
| "grad_norm": 0.5768821835517883, |
| "learning_rate": 4.597679977561122e-05, |
| "loss": 0.5915, |
| "num_input_tokens_seen": 1085984, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.6260869565217392, |
| "grad_norm": 0.7307961583137512, |
| "learning_rate": 4.491770577931057e-05, |
| "loss": 0.5869, |
| "num_input_tokens_seen": 1217520, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 0.6931986808776855, |
| "learning_rate": 4.375033807142267e-05, |
| "loss": 0.5684, |
| "num_input_tokens_seen": 1353968, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.7652173913043478, |
| "grad_norm": 0.5990815758705139, |
| "learning_rate": 4.2481042525107854e-05, |
| "loss": 0.5967, |
| "num_input_tokens_seen": 1491728, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.8347826086956521, |
| "grad_norm": 0.5957231521606445, |
| "learning_rate": 4.111671909869582e-05, |
| "loss": 0.5831, |
| "num_input_tokens_seen": 1625184, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.9043478260869565, |
| "grad_norm": 0.6507815718650818, |
| "learning_rate": 3.9664784327143955e-05, |
| "loss": 0.5782, |
| "num_input_tokens_seen": 1758384, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.9739130434782609, |
| "grad_norm": 0.6880366802215576, |
| "learning_rate": 3.813313100535747e-05, |
| "loss": 0.5842, |
| "num_input_tokens_seen": 1893088, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.0556521739130436, |
| "grad_norm": 0.6173655390739441, |
| "learning_rate": 3.653008528253509e-05, |
| "loss": 0.5629, |
| "num_input_tokens_seen": 2051200, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.1252173913043477, |
| "grad_norm": 0.7451260685920715, |
| "learning_rate": 3.486436140077764e-05, |
| "loss": 0.5549, |
| "num_input_tokens_seen": 2183136, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.1947826086956521, |
| "grad_norm": 0.6699721217155457, |
| "learning_rate": 3.3145014324002944e-05, |
| "loss": 0.5263, |
| "num_input_tokens_seen": 2317616, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.2643478260869565, |
| "grad_norm": 0.6695335507392883, |
| "learning_rate": 3.1381390514678696e-05, |
| "loss": 0.505, |
| "num_input_tokens_seen": 2457440, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.333913043478261, |
| "grad_norm": 0.7196683287620544, |
| "learning_rate": 2.9583077125953716e-05, |
| "loss": 0.555, |
| "num_input_tokens_seen": 2592320, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.4034782608695653, |
| "grad_norm": 0.6481020450592041, |
| "learning_rate": 2.775984988538175e-05, |
| "loss": 0.5266, |
| "num_input_tokens_seen": 2729856, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.4730434782608697, |
| "grad_norm": 0.8577863574028015, |
| "learning_rate": 2.592161995354479e-05, |
| "loss": 0.5364, |
| "num_input_tokens_seen": 2866464, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.542608695652174, |
| "grad_norm": 0.7868128418922424, |
| "learning_rate": 2.4078380046455222e-05, |
| "loss": 0.5215, |
| "num_input_tokens_seen": 3003392, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.6121739130434782, |
| "grad_norm": 0.8645579218864441, |
| "learning_rate": 2.224015011461826e-05, |
| "loss": 0.5354, |
| "num_input_tokens_seen": 3141984, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.6817391304347826, |
| "grad_norm": 0.6744593381881714, |
| "learning_rate": 2.0416922874046293e-05, |
| "loss": 0.5098, |
| "num_input_tokens_seen": 3281264, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.7513043478260868, |
| "grad_norm": 0.808380126953125, |
| "learning_rate": 1.8618609485321313e-05, |
| "loss": 0.4939, |
| "num_input_tokens_seen": 3418208, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.8208695652173912, |
| "grad_norm": 0.7624004483222961, |
| "learning_rate": 1.6854985675997066e-05, |
| "loss": 0.4919, |
| "num_input_tokens_seen": 3552624, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.8904347826086956, |
| "grad_norm": 0.8070163726806641, |
| "learning_rate": 1.5135638599222368e-05, |
| "loss": 0.5245, |
| "num_input_tokens_seen": 3686752, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 0.9934524297714233, |
| "learning_rate": 1.3469914717464916e-05, |
| "loss": 0.5566, |
| "num_input_tokens_seen": 3809216, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.0417391304347827, |
| "grad_norm": 0.855629563331604, |
| "learning_rate": 1.1866868994642535e-05, |
| "loss": 0.5672, |
| "num_input_tokens_seen": 3971664, |
| "step": 145 |
| }, |
| { |
| "epoch": 2.111304347826087, |
| "grad_norm": 0.8984305262565613, |
| "learning_rate": 1.0335215672856046e-05, |
| "loss": 0.5116, |
| "num_input_tokens_seen": 4108736, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.1808695652173915, |
| "grad_norm": 0.9083377718925476, |
| "learning_rate": 8.883280901304187e-06, |
| "loss": 0.4855, |
| "num_input_tokens_seen": 4246544, |
| "step": 155 |
| }, |
| { |
| "epoch": 2.2504347826086954, |
| "grad_norm": 0.8181976079940796, |
| "learning_rate": 7.518957474892149e-06, |
| "loss": 0.4634, |
| "num_input_tokens_seen": 4385600, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 0.8508033156394958, |
| "learning_rate": 6.2496619285773356e-06, |
| "loss": 0.4784, |
| "num_input_tokens_seen": 4515424, |
| "step": 165 |
| }, |
| { |
| "epoch": 2.3895652173913042, |
| "grad_norm": 0.813805103302002, |
| "learning_rate": 5.082294220689435e-06, |
| "loss": 0.4562, |
| "num_input_tokens_seen": 4656832, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.4591304347826086, |
| "grad_norm": 0.8060736656188965, |
| "learning_rate": 4.023200224388787e-06, |
| "loss": 0.4635, |
| "num_input_tokens_seen": 4787584, |
| "step": 175 |
| }, |
| { |
| "epoch": 2.528695652173913, |
| "grad_norm": 0.9117311239242554, |
| "learning_rate": 3.078137231161146e-06, |
| "loss": 0.5231, |
| "num_input_tokens_seen": 4920640, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.5982608695652174, |
| "grad_norm": 0.751384437084198, |
| "learning_rate": 2.2522426538739566e-06, |
| "loss": 0.4908, |
| "num_input_tokens_seen": 5061360, |
| "step": 185 |
| }, |
| { |
| "epoch": 2.667826086956522, |
| "grad_norm": 0.9320093989372253, |
| "learning_rate": 1.5500060995258137e-06, |
| "loss": 0.5252, |
| "num_input_tokens_seen": 5192048, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.737391304347826, |
| "grad_norm": 0.7602404356002808, |
| "learning_rate": 9.75244963503205e-07, |
| "loss": 0.4728, |
| "num_input_tokens_seen": 5317552, |
| "step": 195 |
| }, |
| { |
| "epoch": 2.8069565217391306, |
| "grad_norm": 0.7692611813545227, |
| "learning_rate": 5.310836780154899e-07, |
| "loss": 0.4781, |
| "num_input_tokens_seen": 5456864, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.8765217391304345, |
| "grad_norm": 0.9036366939544678, |
| "learning_rate": 2.1993672751463579e-07, |
| "loss": 0.5074, |
| "num_input_tokens_seen": 5592320, |
| "step": 205 |
| }, |
| { |
| "epoch": 2.9460869565217394, |
| "grad_norm": 0.8938913345336914, |
| "learning_rate": 4.3495523428899174e-08, |
| "loss": 0.454, |
| "num_input_tokens_seen": 5728832, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.9878260869565216, |
| "num_input_tokens_seen": 5812176, |
| "step": 213, |
| "total_flos": 1.3025650857335194e+17, |
| "train_loss": 0.5454816756673821, |
| "train_runtime": 5250.0748, |
| "train_samples_per_second": 0.657, |
| "train_steps_per_second": 0.041 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 213, |
| "num_input_tokens_seen": 5812176, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3025650857335194e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|