| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9901768172888015, |
| "eval_steps": 500, |
| "global_step": 254, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.07858546168958742, |
| "grad_norm": 0.9010041952133179, |
| "learning_rate": 2e-05, |
| "loss": 1.3369, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.15717092337917485, |
| "grad_norm": 0.9163873791694641, |
| "learning_rate": 4e-05, |
| "loss": 1.3095, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2357563850687623, |
| "grad_norm": 0.8760176301002502, |
| "learning_rate": 6e-05, |
| "loss": 1.2597, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.3143418467583497, |
| "grad_norm": 0.9276160001754761, |
| "learning_rate": 8e-05, |
| "loss": 1.1331, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3929273084479371, |
| "grad_norm": 0.9490565061569214, |
| "learning_rate": 0.0001, |
| "loss": 0.9676, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4715127701375246, |
| "grad_norm": 1.088897943496704, |
| "learning_rate": 9.509803921568627e-05, |
| "loss": 0.7618, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.550098231827112, |
| "grad_norm": 0.7143833041191101, |
| "learning_rate": 9.019607843137255e-05, |
| "loss": 0.6247, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6286836935166994, |
| "grad_norm": 0.6208414435386658, |
| "learning_rate": 8.529411764705883e-05, |
| "loss": 0.5592, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.7072691552062869, |
| "grad_norm": 0.5866327881813049, |
| "learning_rate": 8.039215686274511e-05, |
| "loss": 0.5452, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.7858546168958742, |
| "grad_norm": 0.48709139227867126, |
| "learning_rate": 7.549019607843137e-05, |
| "loss": 0.5323, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.8644400785854617, |
| "grad_norm": 0.45397838950157166, |
| "learning_rate": 7.058823529411765e-05, |
| "loss": 0.498, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.9430255402750491, |
| "grad_norm": 0.44005146622657776, |
| "learning_rate": 6.568627450980392e-05, |
| "loss": 0.5066, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.0157170923379175, |
| "grad_norm": 0.40322428941726685, |
| "learning_rate": 6.078431372549019e-05, |
| "loss": 0.4765, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.0943025540275049, |
| "grad_norm": 0.3713725805282593, |
| "learning_rate": 5.588235294117647e-05, |
| "loss": 0.4957, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.1728880157170924, |
| "grad_norm": 0.3928331732749939, |
| "learning_rate": 5.0980392156862745e-05, |
| "loss": 0.4857, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.2514734774066798, |
| "grad_norm": 0.5155062675476074, |
| "learning_rate": 4.607843137254902e-05, |
| "loss": 0.4885, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.3300589390962672, |
| "grad_norm": 0.43860283493995667, |
| "learning_rate": 4.11764705882353e-05, |
| "loss": 0.5031, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.4086444007858545, |
| "grad_norm": 0.3964126706123352, |
| "learning_rate": 3.627450980392157e-05, |
| "loss": 0.5079, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.487229862475442, |
| "grad_norm": 0.401035338640213, |
| "learning_rate": 3.137254901960784e-05, |
| "loss": 0.4938, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.5658153241650294, |
| "grad_norm": 0.4307032525539398, |
| "learning_rate": 2.647058823529412e-05, |
| "loss": 0.4737, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.644400785854617, |
| "grad_norm": 0.44220617413520813, |
| "learning_rate": 2.1568627450980395e-05, |
| "loss": 0.4633, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.7229862475442044, |
| "grad_norm": 0.4620322287082672, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.4723, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.8015717092337917, |
| "grad_norm": 0.41082972288131714, |
| "learning_rate": 1.1764705882352942e-05, |
| "loss": 0.4767, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.880157170923379, |
| "grad_norm": 0.5318537950515747, |
| "learning_rate": 6.862745098039216e-06, |
| "loss": 0.4725, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.9587426326129664, |
| "grad_norm": 0.3858027458190918, |
| "learning_rate": 1.96078431372549e-06, |
| "loss": 0.4795, |
| "step": 250 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 254, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6444092488679424.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|