| { | |
| "best_global_step": 350, | |
| "best_metric": 0.6386752128601074, | |
| "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-350", | |
| "epoch": 1.0, | |
| "eval_steps": 50, | |
| "global_step": 396, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02527646129541864, | |
| "grad_norm": 14.924928665161133, | |
| "learning_rate": 4.5e-06, | |
| "loss": 1.6848, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05055292259083728, | |
| "grad_norm": 9.861122131347656, | |
| "learning_rate": 9.5e-06, | |
| "loss": 0.9777, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07582938388625593, | |
| "grad_norm": 8.668806076049805, | |
| "learning_rate": 1.45e-05, | |
| "loss": 0.8539, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.10110584518167456, | |
| "grad_norm": 8.786724090576172, | |
| "learning_rate": 1.95e-05, | |
| "loss": 0.824, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1263823064770932, | |
| "grad_norm": 6.658519744873047, | |
| "learning_rate": 1.996847707779778e-05, | |
| "loss": 0.7459, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1263823064770932, | |
| "eval_loss": 0.7653215527534485, | |
| "eval_runtime": 413.3579, | |
| "eval_samples_per_second": 3.404, | |
| "eval_steps_per_second": 0.852, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.15165876777251186, | |
| "grad_norm": 7.256345748901367, | |
| "learning_rate": 1.985976407793198e-05, | |
| "loss": 0.7691, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1769352290679305, | |
| "grad_norm": 6.2496724128723145, | |
| "learning_rate": 1.9674317682923535e-05, | |
| "loss": 0.6842, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.20221169036334913, | |
| "grad_norm": 6.8075408935546875, | |
| "learning_rate": 1.941358112522644e-05, | |
| "loss": 0.7287, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.22748815165876776, | |
| "grad_norm": 6.674585819244385, | |
| "learning_rate": 1.907958358131508e-05, | |
| "loss": 0.7173, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2527646129541864, | |
| "grad_norm": 6.967682838439941, | |
| "learning_rate": 1.867492437966334e-05, | |
| "loss": 0.6676, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2527646129541864, | |
| "eval_loss": 0.7142078876495361, | |
| "eval_runtime": 413.3775, | |
| "eval_samples_per_second": 3.404, | |
| "eval_steps_per_second": 0.852, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.27804107424960506, | |
| "grad_norm": 6.488720893859863, | |
| "learning_rate": 1.820275277152846e-05, | |
| "loss": 0.6861, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3033175355450237, | |
| "grad_norm": 6.127718448638916, | |
| "learning_rate": 1.7666743421972986e-05, | |
| "loss": 0.681, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3285939968404423, | |
| "grad_norm": 5.501866340637207, | |
| "learning_rate": 1.7071067811865477e-05, | |
| "loss": 0.6915, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.353870458135861, | |
| "grad_norm": 5.910053730010986, | |
| "learning_rate": 1.6420361773423205e-05, | |
| "loss": 0.677, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3791469194312796, | |
| "grad_norm": 6.499867916107178, | |
| "learning_rate": 1.571968941195081e-05, | |
| "loss": 0.7094, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3791469194312796, | |
| "eval_loss": 0.6856361031532288, | |
| "eval_runtime": 413.5831, | |
| "eval_samples_per_second": 3.402, | |
| "eval_steps_per_second": 0.851, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.40442338072669826, | |
| "grad_norm": 5.411413669586182, | |
| "learning_rate": 1.4974503694553119e-05, | |
| "loss": 0.6963, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4296998420221169, | |
| "grad_norm": 6.184033393859863, | |
| "learning_rate": 1.4190604012539684e-05, | |
| "loss": 0.6738, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4549763033175355, | |
| "grad_norm": 6.484604358673096, | |
| "learning_rate": 1.3374091047790585e-05, | |
| "loss": 0.6558, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4802527646129542, | |
| "grad_norm": 5.717767238616943, | |
| "learning_rate": 1.2531319294335084e-05, | |
| "loss": 0.612, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5055292259083728, | |
| "grad_norm": 6.289947032928467, | |
| "learning_rate": 1.1668847604642861e-05, | |
| "loss": 0.6612, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5055292259083728, | |
| "eval_loss": 0.666141152381897, | |
| "eval_runtime": 413.6045, | |
| "eval_samples_per_second": 3.402, | |
| "eval_steps_per_second": 0.851, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5308056872037915, | |
| "grad_norm": 5.043293476104736, | |
| "learning_rate": 1.0793388145500199e-05, | |
| "loss": 0.6594, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5560821484992101, | |
| "grad_norm": 5.2453203201293945, | |
| "learning_rate": 9.911754160720924e-06, | |
| "loss": 0.6347, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5813586097946287, | |
| "grad_norm": 6.464652061462402, | |
| "learning_rate": 9.030806947227607e-06, | |
| "loss": 0.6543, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6066350710900474, | |
| "grad_norm": 5.646970272064209, | |
| "learning_rate": 8.157402457160539e-06, | |
| "loss": 0.6667, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.631911532385466, | |
| "grad_norm": 5.6440534591674805, | |
| "learning_rate": 7.298337941582314e-06, | |
| "loss": 0.6349, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.631911532385466, | |
| "eval_loss": 0.652662992477417, | |
| "eval_runtime": 413.1161, | |
| "eval_samples_per_second": 3.406, | |
| "eval_steps_per_second": 0.852, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6571879936808847, | |
| "grad_norm": 5.690296173095703, | |
| "learning_rate": 6.460299051022285e-06, | |
| "loss": 0.6293, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6824644549763034, | |
| "grad_norm": 5.673945903778076, | |
| "learning_rate": 5.649807804549663e-06, | |
| "loss": 0.6402, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.707740916271722, | |
| "grad_norm": 6.384261131286621, | |
| "learning_rate": 4.873171832304852e-06, | |
| "loss": 0.6626, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7330173775671406, | |
| "grad_norm": 6.072003364562988, | |
| "learning_rate": 4.13643528650785e-06, | |
| "loss": 0.6121, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7582938388625592, | |
| "grad_norm": 5.644257545471191, | |
| "learning_rate": 3.4453318029777096e-06, | |
| "loss": 0.6486, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7582938388625592, | |
| "eval_loss": 0.6433162093162537, | |
| "eval_runtime": 413.2942, | |
| "eval_samples_per_second": 3.404, | |
| "eval_steps_per_second": 0.852, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7835703001579779, | |
| "grad_norm": 6.254457950592041, | |
| "learning_rate": 2.8052398792390155e-06, | |
| "loss": 0.6275, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8088467614533965, | |
| "grad_norm": 4.991596698760986, | |
| "learning_rate": 2.2211410164842605e-06, | |
| "loss": 0.6194, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8341232227488151, | |
| "grad_norm": 5.583685874938965, | |
| "learning_rate": 1.6975809511513352e-06, | |
| "loss": 0.5763, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8593996840442338, | |
| "grad_norm": 5.555745601654053, | |
| "learning_rate": 1.2386342778305993e-06, | |
| "loss": 0.6468, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8846761453396524, | |
| "grad_norm": 5.202249526977539, | |
| "learning_rate": 8.478727388228736e-07, | |
| "loss": 0.617, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8846761453396524, | |
| "eval_loss": 0.6386752128601074, | |
| "eval_runtime": 413.3068, | |
| "eval_samples_per_second": 3.404, | |
| "eval_steps_per_second": 0.852, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.909952606635071, | |
| "grad_norm": 5.008814334869385, | |
| "learning_rate": 5.283374271342645e-07, | |
| "loss": 0.6197, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9352290679304898, | |
| "grad_norm": 6.082056999206543, | |
| "learning_rate": 2.8251511923731655e-07, | |
| "loss": 0.6289, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9605055292259084, | |
| "grad_norm": 5.0787553787231445, | |
| "learning_rate": 1.1231892178829474e-07, | |
| "loss": 0.6276, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.985781990521327, | |
| "grad_norm": 5.158481121063232, | |
| "learning_rate": 1.9073382917097482e-08, | |
| "loss": 0.5999, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 396, | |
| "total_flos": 9.150706299070054e+16, | |
| "train_loss": 0.701680494077278, | |
| "train_runtime": 17774.8464, | |
| "train_samples_per_second": 0.712, | |
| "train_steps_per_second": 0.022 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 396, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.150706299070054e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |