{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.6821130676552363, "eval_steps": 50, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14828544949026876, "grad_norm": 3.0305959544719996, "learning_rate": 6.4285714285714295e-06, "loss": 0.662, "step": 10 }, { "epoch": 0.2965708989805375, "grad_norm": 2.7485654160854565, "learning_rate": 9.990735836893226e-06, "loss": 0.1743, "step": 20 }, { "epoch": 0.4448563484708063, "grad_norm": 1.1430530985128236, "learning_rate": 9.91682838414733e-06, "loss": 0.1469, "step": 30 }, { "epoch": 0.593141797961075, "grad_norm": 1.2712538596934821, "learning_rate": 9.770107968877004e-06, "loss": 0.1355, "step": 40 }, { "epoch": 0.7414272474513438, "grad_norm": 0.8986934874682488, "learning_rate": 9.552747363297172e-06, "loss": 0.1262, "step": 50 }, { "epoch": 0.7414272474513438, "eval_loss": 0.12907367944717407, "eval_runtime": 40.4419, "eval_samples_per_second": 6.404, "eval_steps_per_second": 0.42, "step": 50 }, { "epoch": 0.8897126969416126, "grad_norm": 1.2078337800655208, "learning_rate": 9.267965445186733e-06, "loss": 0.1183, "step": 60 }, { "epoch": 1.0296570898980537, "grad_norm": 0.7746195941421475, "learning_rate": 8.919979529756008e-06, "loss": 0.1103, "step": 70 }, { "epoch": 1.1779425393883225, "grad_norm": 0.9315687365661568, "learning_rate": 8.513942915725159e-06, "loss": 0.0967, "step": 80 }, { "epoch": 1.3262279888785913, "grad_norm": 0.9221104617429706, "learning_rate": 8.055868570489247e-06, "loss": 0.0921, "step": 90 }, { "epoch": 1.47451343836886, "grad_norm": 0.8320239593764249, "learning_rate": 7.552540084510896e-06, "loss": 0.0936, "step": 100 }, { "epoch": 1.47451343836886, "eval_loss": 0.10962820053100586, "eval_runtime": 39.3838, "eval_samples_per_second": 6.576, "eval_steps_per_second": 0.432, "step": 100 }, { "epoch": 1.6227988878591288, "grad_norm": 0.9524938303051578, "learning_rate": 7.011411213610663e-06, "loss": 0.0924, "step": 110 }, { "epoch": 1.7710843373493976, "grad_norm": 0.8096971233176471, "learning_rate": 6.440495496826189e-06, "loss": 0.0926, "step": 120 }, { "epoch": 1.9193697868396664, "grad_norm": 1.0892516777146075, "learning_rate": 5.848247584481424e-06, "loss": 0.0912, "step": 130 }, { "epoch": 2.0593141797961074, "grad_norm": 0.6756500246813129, "learning_rate": 5.243438033870126e-06, "loss": 0.079, "step": 140 }, { "epoch": 2.2075996292863764, "grad_norm": 0.6535243252037057, "learning_rate": 4.635023426695462e-06, "loss": 0.0649, "step": 150 }, { "epoch": 2.2075996292863764, "eval_loss": 0.09530726075172424, "eval_runtime": 39.7995, "eval_samples_per_second": 6.508, "eval_steps_per_second": 0.427, "step": 150 }, { "epoch": 2.355885078776645, "grad_norm": 0.7206259212079309, "learning_rate": 4.032013731687351e-06, "loss": 0.0662, "step": 160 }, { "epoch": 2.504170528266914, "grad_norm": 0.6535997772962888, "learning_rate": 3.443338876615092e-06, "loss": 0.0631, "step": 170 }, { "epoch": 2.6524559777571826, "grad_norm": 0.6645747980854602, "learning_rate": 2.8777165056209256e-06, "loss": 0.0654, "step": 180 }, { "epoch": 2.800741427247451, "grad_norm": 0.5993925035499262, "learning_rate": 2.343522880246734e-06, "loss": 0.0615, "step": 190 }, { "epoch": 2.94902687673772, "grad_norm": 1.1938299764947649, "learning_rate": 1.8486688359714567e-06, "loss": 0.0582, "step": 200 }, { "epoch": 2.94902687673772, "eval_loss": 0.09135068207979202, "eval_runtime": 40.159, "eval_samples_per_second": 6.449, "eval_steps_per_second": 0.423, "step": 200 }, { "epoch": 3.088971269694161, "grad_norm": 0.9541639227633548, "learning_rate": 1.4004826312100218e-06, "loss": 0.0474, "step": 210 }, { "epoch": 3.23725671918443, "grad_norm": 0.6148134757370654, "learning_rate": 1.0056014236546647e-06, "loss": 0.0403, "step": 220 }, { "epoch": 3.3855421686746987, "grad_norm": 0.7706323260120672, "learning_rate": 6.698729810778065e-07, "loss": 0.0399, "step": 230 }, { "epoch": 3.5338276181649677, "grad_norm": 0.7014591388584486, "learning_rate": 3.9826908215420344e-07, "loss": 0.0377, "step": 240 }, { "epoch": 3.6821130676552363, "grad_norm": 0.8460829330545679, "learning_rate": 1.9481188974346698e-07, "loss": 0.0379, "step": 250 }, { "epoch": 3.6821130676552363, "eval_loss": 0.0978400707244873, "eval_runtime": 39.5311, "eval_samples_per_second": 6.552, "eval_steps_per_second": 0.43, "step": 250 } ], "logging_steps": 10, "max_steps": 272, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 912772139319296.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }