| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.6821130676552363, |
| "eval_steps": 50, |
| "global_step": 250, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.14828544949026876, |
| "grad_norm": 3.0305959544719996, |
| "learning_rate": 6.4285714285714295e-06, |
| "loss": 0.662, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.2965708989805375, |
| "grad_norm": 2.7485654160854565, |
| "learning_rate": 9.990735836893226e-06, |
| "loss": 0.1743, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.4448563484708063, |
| "grad_norm": 1.1430530985128236, |
| "learning_rate": 9.91682838414733e-06, |
| "loss": 0.1469, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.593141797961075, |
| "grad_norm": 1.2712538596934821, |
| "learning_rate": 9.770107968877004e-06, |
| "loss": 0.1355, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.7414272474513438, |
| "grad_norm": 0.8986934874682488, |
| "learning_rate": 9.552747363297172e-06, |
| "loss": 0.1262, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.7414272474513438, |
| "eval_loss": 0.12907367944717407, |
| "eval_runtime": 40.4419, |
| "eval_samples_per_second": 6.404, |
| "eval_steps_per_second": 0.42, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.8897126969416126, |
| "grad_norm": 1.2078337800655208, |
| "learning_rate": 9.267965445186733e-06, |
| "loss": 0.1183, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.0296570898980537, |
| "grad_norm": 0.7746195941421475, |
| "learning_rate": 8.919979529756008e-06, |
| "loss": 0.1103, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.1779425393883225, |
| "grad_norm": 0.9315687365661568, |
| "learning_rate": 8.513942915725159e-06, |
| "loss": 0.0967, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.3262279888785913, |
| "grad_norm": 0.9221104617429706, |
| "learning_rate": 8.055868570489247e-06, |
| "loss": 0.0921, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.47451343836886, |
| "grad_norm": 0.8320239593764249, |
| "learning_rate": 7.552540084510896e-06, |
| "loss": 0.0936, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.47451343836886, |
| "eval_loss": 0.10962820053100586, |
| "eval_runtime": 39.3838, |
| "eval_samples_per_second": 6.576, |
| "eval_steps_per_second": 0.432, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.6227988878591288, |
| "grad_norm": 0.9524938303051578, |
| "learning_rate": 7.011411213610663e-06, |
| "loss": 0.0924, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.7710843373493976, |
| "grad_norm": 0.8096971233176471, |
| "learning_rate": 6.440495496826189e-06, |
| "loss": 0.0926, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.9193697868396664, |
| "grad_norm": 1.0892516777146075, |
| "learning_rate": 5.848247584481424e-06, |
| "loss": 0.0912, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.0593141797961074, |
| "grad_norm": 0.6756500246813129, |
| "learning_rate": 5.243438033870126e-06, |
| "loss": 0.079, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.2075996292863764, |
| "grad_norm": 0.6535243252037057, |
| "learning_rate": 4.635023426695462e-06, |
| "loss": 0.0649, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.2075996292863764, |
| "eval_loss": 0.09530726075172424, |
| "eval_runtime": 39.7995, |
| "eval_samples_per_second": 6.508, |
| "eval_steps_per_second": 0.427, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.355885078776645, |
| "grad_norm": 0.7206259212079309, |
| "learning_rate": 4.032013731687351e-06, |
| "loss": 0.0662, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.504170528266914, |
| "grad_norm": 0.6535997772962888, |
| "learning_rate": 3.443338876615092e-06, |
| "loss": 0.0631, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.6524559777571826, |
| "grad_norm": 0.6645747980854602, |
| "learning_rate": 2.8777165056209256e-06, |
| "loss": 0.0654, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.800741427247451, |
| "grad_norm": 0.5993925035499262, |
| "learning_rate": 2.343522880246734e-06, |
| "loss": 0.0615, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.94902687673772, |
| "grad_norm": 1.1938299764947649, |
| "learning_rate": 1.8486688359714567e-06, |
| "loss": 0.0582, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.94902687673772, |
| "eval_loss": 0.09135068207979202, |
| "eval_runtime": 40.159, |
| "eval_samples_per_second": 6.449, |
| "eval_steps_per_second": 0.423, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.088971269694161, |
| "grad_norm": 0.9541639227633548, |
| "learning_rate": 1.4004826312100218e-06, |
| "loss": 0.0474, |
| "step": 210 |
| }, |
| { |
| "epoch": 3.23725671918443, |
| "grad_norm": 0.6148134757370654, |
| "learning_rate": 1.0056014236546647e-06, |
| "loss": 0.0403, |
| "step": 220 |
| }, |
| { |
| "epoch": 3.3855421686746987, |
| "grad_norm": 0.7706323260120672, |
| "learning_rate": 6.698729810778065e-07, |
| "loss": 0.0399, |
| "step": 230 |
| }, |
| { |
| "epoch": 3.5338276181649677, |
| "grad_norm": 0.7014591388584486, |
| "learning_rate": 3.9826908215420344e-07, |
| "loss": 0.0377, |
| "step": 240 |
| }, |
| { |
| "epoch": 3.6821130676552363, |
| "grad_norm": 0.8460829330545679, |
| "learning_rate": 1.9481188974346698e-07, |
| "loss": 0.0379, |
| "step": 250 |
| }, |
| { |
| "epoch": 3.6821130676552363, |
| "eval_loss": 0.0978400707244873, |
| "eval_runtime": 39.5311, |
| "eval_samples_per_second": 6.552, |
| "eval_steps_per_second": 0.43, |
| "step": 250 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 272, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 912772139319296.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|