| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999347982004303, | |
| "eval_steps": 500, | |
| "global_step": 1917, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0521614396557345, | |
| "grad_norm": 6.860655307769775, | |
| "learning_rate": 9.895833333333334e-06, | |
| "loss": 0.2352, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.104322879311469, | |
| "grad_norm": 0.910341203212738, | |
| "learning_rate": 1.996521739130435e-05, | |
| "loss": 0.0854, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1564843189672035, | |
| "grad_norm": 15.565857887268066, | |
| "learning_rate": 1.8817391304347828e-05, | |
| "loss": 0.0579, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.208645758622938, | |
| "grad_norm": 8.692508697509766, | |
| "learning_rate": 1.7657971014492754e-05, | |
| "loss": 0.0662, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2608071982786725, | |
| "grad_norm": 0.031041253358125687, | |
| "learning_rate": 1.6510144927536233e-05, | |
| "loss": 0.0784, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.312968637934407, | |
| "grad_norm": 3.0286190509796143, | |
| "learning_rate": 1.535072463768116e-05, | |
| "loss": 0.0975, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3651300775901415, | |
| "grad_norm": 17.107019424438477, | |
| "learning_rate": 1.4191304347826087e-05, | |
| "loss": 0.1162, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.417291517245876, | |
| "grad_norm": 0.3376404047012329, | |
| "learning_rate": 1.3031884057971015e-05, | |
| "loss": 0.1132, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.4694529569016105, | |
| "grad_norm": 0.49836966395378113, | |
| "learning_rate": 1.1872463768115942e-05, | |
| "loss": 0.192, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.521614396557345, | |
| "grad_norm": 0.1673300564289093, | |
| "learning_rate": 1.071304347826087e-05, | |
| "loss": 0.1107, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5737758362130795, | |
| "grad_norm": 1.4948456287384033, | |
| "learning_rate": 9.553623188405798e-06, | |
| "loss": 0.1355, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.625937275868814, | |
| "grad_norm": 2.7801311016082764, | |
| "learning_rate": 8.394202898550725e-06, | |
| "loss": 0.1275, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6780987155245485, | |
| "grad_norm": 5.762394428253174, | |
| "learning_rate": 7.234782608695653e-06, | |
| "loss": 0.1514, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.730260155180283, | |
| "grad_norm": 3.5544540882110596, | |
| "learning_rate": 6.07536231884058e-06, | |
| "loss": 0.165, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7824215948360175, | |
| "grad_norm": 15.184954643249512, | |
| "learning_rate": 4.915942028985508e-06, | |
| "loss": 0.1568, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.834583034491752, | |
| "grad_norm": 0.3603503108024597, | |
| "learning_rate": 3.7565217391304347e-06, | |
| "loss": 0.2338, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.8867444741474865, | |
| "grad_norm": 21.175050735473633, | |
| "learning_rate": 2.5971014492753625e-06, | |
| "loss": 0.2064, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.938905913803221, | |
| "grad_norm": 4.581367015838623, | |
| "learning_rate": 1.4376811594202899e-06, | |
| "loss": 0.3337, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.9910673534589555, | |
| "grad_norm": 627.7379760742188, | |
| "learning_rate": 2.7826086956521744e-07, | |
| "loss": 0.6769, | |
| "step": 1900 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 1917, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |