| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.6777363605557438, |
| "eval_steps": 100, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03388681802778719, |
| "grad_norm": 0.2737605571746826, |
| "learning_rate": 9.6e-05, |
| "loss": 1.4843, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06777363605557438, |
| "grad_norm": 0.29454904794692993, |
| "learning_rate": 0.000196, |
| "loss": 1.3338, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.10166045408336158, |
| "grad_norm": 0.4346487522125244, |
| "learning_rate": 0.00019663394109396915, |
| "loss": 1.21, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.13554727211114875, |
| "grad_norm": 0.45564034581184387, |
| "learning_rate": 0.00019312762973352034, |
| "loss": 1.2042, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.13554727211114875, |
| "eval_loss": 1.1878175735473633, |
| "eval_runtime": 596.4366, |
| "eval_samples_per_second": 8.796, |
| "eval_steps_per_second": 1.1, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.16943409013893596, |
| "grad_norm": 0.3995042145252228, |
| "learning_rate": 0.00018962131837307154, |
| "loss": 1.1788, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.20332090816672316, |
| "grad_norm": 0.4114416837692261, |
| "learning_rate": 0.00018611500701262273, |
| "loss": 1.1759, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.23720772619451033, |
| "grad_norm": 0.4253525733947754, |
| "learning_rate": 0.00018260869565217392, |
| "loss": 1.1666, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.2710945442222975, |
| "grad_norm": 0.3987375497817993, |
| "learning_rate": 0.00017910238429172512, |
| "loss": 1.1516, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2710945442222975, |
| "eval_loss": 1.1261389255523682, |
| "eval_runtime": 593.1768, |
| "eval_samples_per_second": 8.844, |
| "eval_steps_per_second": 1.106, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3049813622500847, |
| "grad_norm": 0.45106807351112366, |
| "learning_rate": 0.0001755960729312763, |
| "loss": 1.1174, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.3388681802778719, |
| "grad_norm": 0.46433955430984497, |
| "learning_rate": 0.0001720897615708275, |
| "loss": 1.1032, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3727549983056591, |
| "grad_norm": 0.48211586475372314, |
| "learning_rate": 0.0001685834502103787, |
| "loss": 1.123, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.4066418163334463, |
| "grad_norm": 0.4357963800430298, |
| "learning_rate": 0.0001650771388499299, |
| "loss": 1.094, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4066418163334463, |
| "eval_loss": 1.088568091392517, |
| "eval_runtime": 593.2602, |
| "eval_samples_per_second": 8.843, |
| "eval_steps_per_second": 1.106, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.44052863436123346, |
| "grad_norm": 0.5000167489051819, |
| "learning_rate": 0.00016157082748948106, |
| "loss": 1.0956, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.47441545238902066, |
| "grad_norm": 0.483510822057724, |
| "learning_rate": 0.00015806451612903225, |
| "loss": 1.0973, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5083022704168079, |
| "grad_norm": 0.6117793917655945, |
| "learning_rate": 0.00015455820476858344, |
| "loss": 1.1039, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.542189088444595, |
| "grad_norm": 0.5380231142044067, |
| "learning_rate": 0.00015105189340813466, |
| "loss": 1.1156, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.542189088444595, |
| "eval_loss": 1.0656226873397827, |
| "eval_runtime": 592.5891, |
| "eval_samples_per_second": 8.853, |
| "eval_steps_per_second": 1.107, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5760759064723823, |
| "grad_norm": 0.5856944918632507, |
| "learning_rate": 0.00014754558204768586, |
| "loss": 1.098, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.6099627245001694, |
| "grad_norm": 0.43336766958236694, |
| "learning_rate": 0.00014403927068723705, |
| "loss": 1.0713, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.6438495425279567, |
| "grad_norm": 0.4466949701309204, |
| "learning_rate": 0.00014053295932678822, |
| "loss": 1.0524, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.6777363605557438, |
| "grad_norm": 0.6057330369949341, |
| "learning_rate": 0.0001370266479663394, |
| "loss": 1.0503, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6777363605557438, |
| "eval_loss": 1.0403542518615723, |
| "eval_runtime": 592.4391, |
| "eval_samples_per_second": 8.855, |
| "eval_steps_per_second": 1.107, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 1476, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.4299920826785792e+17, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|