{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.22586109542631283, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00564652738565782, "grad_norm": 0.0635828971862793, "learning_rate": 1.2531328320802006e-05, "loss": 1.3836, "step": 25 }, { "epoch": 0.01129305477131564, "grad_norm": 0.2005225569009781, "learning_rate": 2.506265664160401e-05, "loss": 1.6497, "step": 50 }, { "epoch": 0.01693958215697346, "grad_norm": 0.07235410064458847, "learning_rate": 3.759398496240601e-05, "loss": 1.1768, "step": 75 }, { "epoch": 0.02258610954263128, "grad_norm": 0.17648495733737946, "learning_rate": 5.012531328320802e-05, "loss": 1.2146, "step": 100 }, { "epoch": 0.028232636928289104, "grad_norm": 0.06953724473714828, "learning_rate": 6.265664160401002e-05, "loss": 0.8725, "step": 125 }, { "epoch": 0.03387916431394692, "grad_norm": 0.12059248238801956, "learning_rate": 7.518796992481203e-05, "loss": 0.7548, "step": 150 }, { "epoch": 0.039525691699604744, "grad_norm": 0.0689113661646843, "learning_rate": 8.771929824561403e-05, "loss": 0.7917, "step": 175 }, { "epoch": 0.04517221908526256, "grad_norm": 0.16621273756027222, "learning_rate": 0.00010025062656641604, "loss": 0.6985, "step": 200 }, { "epoch": 0.050818746470920384, "grad_norm": 0.07838872820138931, "learning_rate": 0.00011278195488721806, "loss": 0.7874, "step": 225 }, { "epoch": 0.05646527385657821, "grad_norm": 0.11714337766170502, "learning_rate": 0.00012531328320802005, "loss": 0.689, "step": 250 }, { "epoch": 0.062111801242236024, "grad_norm": 0.07283364981412888, "learning_rate": 0.00013784461152882208, "loss": 0.7719, "step": 275 }, { "epoch": 0.06775832862789384, "grad_norm": 0.1499466449022293, "learning_rate": 0.00015037593984962405, "loss": 0.655, "step": 300 }, { "epoch": 0.07340485601355166, "grad_norm": 0.08195521682500839, "learning_rate": 0.00016290726817042608, "loss": 0.7508, "step": 325 }, { "epoch": 0.07905138339920949, "grad_norm": 0.1490202099084854, "learning_rate": 0.00017543859649122806, "loss": 0.6372, "step": 350 }, { "epoch": 0.08469791078486731, "grad_norm": 0.1617508977651596, "learning_rate": 0.00018796992481203009, "loss": 0.7396, "step": 375 }, { "epoch": 0.09034443817052512, "grad_norm": 0.180181622505188, "learning_rate": 0.00019999999702625888, "loss": 0.6318, "step": 400 }, { "epoch": 0.09599096555618294, "grad_norm": 0.08272858709096909, "learning_rate": 0.00019999798975772924, "loss": 0.724, "step": 425 }, { "epoch": 0.10163749294184077, "grad_norm": 0.120403952896595, "learning_rate": 0.00019999226539902187, "loss": 0.6271, "step": 450 }, { "epoch": 0.10728402032749859, "grad_norm": 0.07971920073032379, "learning_rate": 0.00019998282416292055, "loss": 0.7256, "step": 475 }, { "epoch": 0.11293054771315642, "grad_norm": 0.1328658014535904, "learning_rate": 0.00019996966640037166, "loss": 0.6231, "step": 500 }, { "epoch": 0.11857707509881422, "grad_norm": 0.07475866377353668, "learning_rate": 0.00019995279260047092, "loss": 0.7251, "step": 525 }, { "epoch": 0.12422360248447205, "grad_norm": 0.1371573656797409, "learning_rate": 0.00019993220339044524, "loss": 0.5907, "step": 550 }, { "epoch": 0.12987012987012986, "grad_norm": 0.07268711924552917, "learning_rate": 0.00019990789953562961, "loss": 0.7304, "step": 575 }, { "epoch": 0.13551665725578768, "grad_norm": 0.14224526286125183, "learning_rate": 0.0001998798819394383, "loss": 0.5931, "step": 600 }, { "epoch": 0.1411631846414455, "grad_norm": 0.0803467407822609, "learning_rate": 0.00019984815164333163, "loss": 0.7076, "step": 625 }, { "epoch": 0.14680971202710333, "grad_norm": 0.12445727735757828, "learning_rate": 0.00019981270982677698, "loss": 0.5566, "step": 650 }, { "epoch": 0.15245623941276115, "grad_norm": 0.07665792107582092, "learning_rate": 0.00019977355780720514, "loss": 0.6985, "step": 675 }, { "epoch": 0.15810276679841898, "grad_norm": 0.13053999841213226, "learning_rate": 0.00019973069703996125, "loss": 0.5901, "step": 700 }, { "epoch": 0.1637492941840768, "grad_norm": 0.07512692362070084, "learning_rate": 0.00019968412911825067, "loss": 0.7184, "step": 725 }, { "epoch": 0.16939582156973462, "grad_norm": 0.12033283710479736, "learning_rate": 0.00019963385577307987, "loss": 0.6013, "step": 750 }, { "epoch": 0.17504234895539245, "grad_norm": 0.0809774249792099, "learning_rate": 0.000199579878873192, "loss": 0.7054, "step": 775 }, { "epoch": 0.18068887634105024, "grad_norm": 0.12490582466125488, "learning_rate": 0.0001995222004249974, "loss": 0.5714, "step": 800 }, { "epoch": 0.18633540372670807, "grad_norm": 0.08289226144552231, "learning_rate": 0.00019946082257249912, "loss": 0.7304, "step": 825 }, { "epoch": 0.1919819311123659, "grad_norm": 0.14385385811328888, "learning_rate": 0.00019939574759721316, "loss": 0.5639, "step": 850 }, { "epoch": 0.1976284584980237, "grad_norm": 0.07623141258955002, "learning_rate": 0.00019932697791808366, "loss": 0.7126, "step": 875 }, { "epoch": 0.20327498588368154, "grad_norm": 0.11150185018777847, "learning_rate": 0.000199254516091393, "loss": 0.5903, "step": 900 }, { "epoch": 0.20892151326933936, "grad_norm": 0.07193930447101593, "learning_rate": 0.00019917836481066675, "loss": 0.6952, "step": 925 }, { "epoch": 0.21456804065499718, "grad_norm": 0.11242598295211792, "learning_rate": 0.00019909852690657359, "loss": 0.5853, "step": 950 }, { "epoch": 0.220214568040655, "grad_norm": 0.07508910447359085, "learning_rate": 0.0001990150053468201, "loss": 0.6969, "step": 975 }, { "epoch": 0.22586109542631283, "grad_norm": 0.11998436599969864, "learning_rate": 0.00019892780323604035, "loss": 0.5791, "step": 1000 } ], "logging_steps": 25, "max_steps": 13281, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.919624163446989e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }