| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.22586109542631283, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00564652738565782, | |
| "grad_norm": 0.0635828971862793, | |
| "learning_rate": 1.2531328320802006e-05, | |
| "loss": 1.3836, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.01129305477131564, | |
| "grad_norm": 0.2005225569009781, | |
| "learning_rate": 2.506265664160401e-05, | |
| "loss": 1.6497, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01693958215697346, | |
| "grad_norm": 0.07235410064458847, | |
| "learning_rate": 3.759398496240601e-05, | |
| "loss": 1.1768, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.02258610954263128, | |
| "grad_norm": 0.17648495733737946, | |
| "learning_rate": 5.012531328320802e-05, | |
| "loss": 1.2146, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.028232636928289104, | |
| "grad_norm": 0.06953724473714828, | |
| "learning_rate": 6.265664160401002e-05, | |
| "loss": 0.8725, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.03387916431394692, | |
| "grad_norm": 0.12059248238801956, | |
| "learning_rate": 7.518796992481203e-05, | |
| "loss": 0.7548, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.039525691699604744, | |
| "grad_norm": 0.0689113661646843, | |
| "learning_rate": 8.771929824561403e-05, | |
| "loss": 0.7917, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.04517221908526256, | |
| "grad_norm": 0.16621273756027222, | |
| "learning_rate": 0.00010025062656641604, | |
| "loss": 0.6985, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.050818746470920384, | |
| "grad_norm": 0.07838872820138931, | |
| "learning_rate": 0.00011278195488721806, | |
| "loss": 0.7874, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.05646527385657821, | |
| "grad_norm": 0.11714337766170502, | |
| "learning_rate": 0.00012531328320802005, | |
| "loss": 0.689, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.062111801242236024, | |
| "grad_norm": 0.07283364981412888, | |
| "learning_rate": 0.00013784461152882208, | |
| "loss": 0.7719, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.06775832862789384, | |
| "grad_norm": 0.1499466449022293, | |
| "learning_rate": 0.00015037593984962405, | |
| "loss": 0.655, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.07340485601355166, | |
| "grad_norm": 0.08195521682500839, | |
| "learning_rate": 0.00016290726817042608, | |
| "loss": 0.7508, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.07905138339920949, | |
| "grad_norm": 0.1490202099084854, | |
| "learning_rate": 0.00017543859649122806, | |
| "loss": 0.6372, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.08469791078486731, | |
| "grad_norm": 0.1617508977651596, | |
| "learning_rate": 0.00018796992481203009, | |
| "loss": 0.7396, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.09034443817052512, | |
| "grad_norm": 0.180181622505188, | |
| "learning_rate": 0.00019999999702625888, | |
| "loss": 0.6318, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.09599096555618294, | |
| "grad_norm": 0.08272858709096909, | |
| "learning_rate": 0.00019999798975772924, | |
| "loss": 0.724, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.10163749294184077, | |
| "grad_norm": 0.120403952896595, | |
| "learning_rate": 0.00019999226539902187, | |
| "loss": 0.6271, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.10728402032749859, | |
| "grad_norm": 0.07971920073032379, | |
| "learning_rate": 0.00019998282416292055, | |
| "loss": 0.7256, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.11293054771315642, | |
| "grad_norm": 0.1328658014535904, | |
| "learning_rate": 0.00019996966640037166, | |
| "loss": 0.6231, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.11857707509881422, | |
| "grad_norm": 0.07475866377353668, | |
| "learning_rate": 0.00019995279260047092, | |
| "loss": 0.7251, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.12422360248447205, | |
| "grad_norm": 0.1371573656797409, | |
| "learning_rate": 0.00019993220339044524, | |
| "loss": 0.5907, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.12987012987012986, | |
| "grad_norm": 0.07268711924552917, | |
| "learning_rate": 0.00019990789953562961, | |
| "loss": 0.7304, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.13551665725578768, | |
| "grad_norm": 0.14224526286125183, | |
| "learning_rate": 0.0001998798819394383, | |
| "loss": 0.5931, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1411631846414455, | |
| "grad_norm": 0.0803467407822609, | |
| "learning_rate": 0.00019984815164333163, | |
| "loss": 0.7076, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.14680971202710333, | |
| "grad_norm": 0.12445727735757828, | |
| "learning_rate": 0.00019981270982677698, | |
| "loss": 0.5566, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.15245623941276115, | |
| "grad_norm": 0.07665792107582092, | |
| "learning_rate": 0.00019977355780720514, | |
| "loss": 0.6985, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.15810276679841898, | |
| "grad_norm": 0.13053999841213226, | |
| "learning_rate": 0.00019973069703996125, | |
| "loss": 0.5901, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.1637492941840768, | |
| "grad_norm": 0.07512692362070084, | |
| "learning_rate": 0.00019968412911825067, | |
| "loss": 0.7184, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.16939582156973462, | |
| "grad_norm": 0.12033283710479736, | |
| "learning_rate": 0.00019963385577307987, | |
| "loss": 0.6013, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.17504234895539245, | |
| "grad_norm": 0.0809774249792099, | |
| "learning_rate": 0.000199579878873192, | |
| "loss": 0.7054, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.18068887634105024, | |
| "grad_norm": 0.12490582466125488, | |
| "learning_rate": 0.0001995222004249974, | |
| "loss": 0.5714, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.18633540372670807, | |
| "grad_norm": 0.08289226144552231, | |
| "learning_rate": 0.00019946082257249912, | |
| "loss": 0.7304, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.1919819311123659, | |
| "grad_norm": 0.14385385811328888, | |
| "learning_rate": 0.00019939574759721316, | |
| "loss": 0.5639, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.1976284584980237, | |
| "grad_norm": 0.07623141258955002, | |
| "learning_rate": 0.00019932697791808366, | |
| "loss": 0.7126, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.20327498588368154, | |
| "grad_norm": 0.11150185018777847, | |
| "learning_rate": 0.000199254516091393, | |
| "loss": 0.5903, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.20892151326933936, | |
| "grad_norm": 0.07193930447101593, | |
| "learning_rate": 0.00019917836481066675, | |
| "loss": 0.6952, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.21456804065499718, | |
| "grad_norm": 0.11242598295211792, | |
| "learning_rate": 0.00019909852690657359, | |
| "loss": 0.5853, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.220214568040655, | |
| "grad_norm": 0.07508910447359085, | |
| "learning_rate": 0.0001990150053468201, | |
| "loss": 0.6969, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.22586109542631283, | |
| "grad_norm": 0.11998436599969864, | |
| "learning_rate": 0.00019892780323604035, | |
| "loss": 0.5791, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 13281, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.919624163446989e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |