| { | |
| "best_metric": 0.6774637699127197, | |
| "best_model_checkpoint": "output/fine_tuning/checkpoints/Meta-Llama-3.1-8B-Instruct/sft/aixpa-ground-short-docs-checkpoint/checkpoint-340", | |
| "epoch": 1.8181818181818183, | |
| "eval_steps": 20, | |
| "global_step": 340, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.10695187165775401, | |
| "grad_norm": 0.2608170211315155, | |
| "learning_rate": 1.4819165403057078e-05, | |
| "loss": 1.5177, | |
| "mean_token_accuracy": 0.6778935924172401, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10695187165775401, | |
| "eval_loss": 1.4139869213104248, | |
| "eval_mean_token_accuracy": 0.6904795635037306, | |
| "eval_runtime": 1035.6849, | |
| "eval_samples_per_second": 0.238, | |
| "eval_steps_per_second": 0.119, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.21390374331550802, | |
| "grad_norm": 0.2079063504934311, | |
| "learning_rate": 1.8247997414535347e-05, | |
| "loss": 1.3019, | |
| "mean_token_accuracy": 0.7044360123574733, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.21390374331550802, | |
| "eval_loss": 1.1822656393051147, | |
| "eval_mean_token_accuracy": 0.7299752424402934, | |
| "eval_runtime": 1036.3743, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.32085561497326204, | |
| "grad_norm": 0.24613960087299347, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1518, | |
| "mean_token_accuracy": 0.7350467927753925, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.32085561497326204, | |
| "eval_loss": 1.0543467998504639, | |
| "eval_mean_token_accuracy": 0.7558261015550877, | |
| "eval_runtime": 1037.5825, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.42780748663101603, | |
| "grad_norm": 0.21977411210536957, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0451, | |
| "mean_token_accuracy": 0.758122804760933, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.42780748663101603, | |
| "eval_loss": 0.9605845808982849, | |
| "eval_mean_token_accuracy": 0.7781746368098065, | |
| "eval_runtime": 1036.6763, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5347593582887701, | |
| "grad_norm": 0.29401177167892456, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9781, | |
| "mean_token_accuracy": 0.7737521544098854, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5347593582887701, | |
| "eval_loss": 0.9001632332801819, | |
| "eval_mean_token_accuracy": 0.7922655029025504, | |
| "eval_runtime": 1037.6159, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6417112299465241, | |
| "grad_norm": 0.34717148542404175, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9113, | |
| "mean_token_accuracy": 0.7882794156670571, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6417112299465241, | |
| "eval_loss": 0.8677236437797546, | |
| "eval_mean_token_accuracy": 0.7979402353123921, | |
| "eval_runtime": 1037.0163, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7486631016042781, | |
| "grad_norm": 0.3498166799545288, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8725, | |
| "mean_token_accuracy": 0.7943845748901367, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7486631016042781, | |
| "eval_loss": 0.8351719379425049, | |
| "eval_mean_token_accuracy": 0.8040957174650053, | |
| "eval_runtime": 1036.2597, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.8556149732620321, | |
| "grad_norm": 0.3868383467197418, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8721, | |
| "mean_token_accuracy": 0.7931242920458317, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8556149732620321, | |
| "eval_loss": 0.8117150068283081, | |
| "eval_mean_token_accuracy": 0.808245500413383, | |
| "eval_runtime": 1036.3899, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9625668449197861, | |
| "grad_norm": 0.5154958367347717, | |
| "learning_rate": 2e-05, | |
| "loss": 0.83, | |
| "mean_token_accuracy": 0.8012300632894039, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9625668449197861, | |
| "eval_loss": 0.7896639108657837, | |
| "eval_mean_token_accuracy": 0.8132512133295943, | |
| "eval_runtime": 1036.5717, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.0695187165775402, | |
| "grad_norm": 0.42563366889953613, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8034, | |
| "mean_token_accuracy": 0.8062243178486824, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0695187165775402, | |
| "eval_loss": 0.7641515731811523, | |
| "eval_mean_token_accuracy": 0.8180098591781244, | |
| "eval_runtime": 1037.8429, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 0.39126402139663696, | |
| "learning_rate": 2e-05, | |
| "loss": 0.7637, | |
| "mean_token_accuracy": 0.8159952461719513, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "eval_loss": 0.7506969571113586, | |
| "eval_mean_token_accuracy": 0.8211809416127399, | |
| "eval_runtime": 1036.9492, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.2834224598930482, | |
| "grad_norm": 0.525314211845398, | |
| "learning_rate": 2e-05, | |
| "loss": 0.7014, | |
| "mean_token_accuracy": 0.8259521864354611, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.2834224598930482, | |
| "eval_loss": 0.7359923124313354, | |
| "eval_mean_token_accuracy": 0.8241757876504727, | |
| "eval_runtime": 1037.3586, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.3903743315508021, | |
| "grad_norm": 1.2710996866226196, | |
| "learning_rate": 2e-05, | |
| "loss": 0.7084, | |
| "mean_token_accuracy": 0.8261168003082275, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.3903743315508021, | |
| "eval_loss": 0.7302640676498413, | |
| "eval_mean_token_accuracy": 0.8257462154559003, | |
| "eval_runtime": 1037.1577, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.4973262032085561, | |
| "grad_norm": 0.5921723246574402, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6984, | |
| "mean_token_accuracy": 0.8257287561893463, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.4973262032085561, | |
| "eval_loss": 0.716602087020874, | |
| "eval_mean_token_accuracy": 0.8293129685448437, | |
| "eval_runtime": 1037.3171, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.6042780748663101, | |
| "grad_norm": 0.6089026927947998, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6591, | |
| "mean_token_accuracy": 0.8372392967343331, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.6042780748663101, | |
| "eval_loss": 0.7121440768241882, | |
| "eval_mean_token_accuracy": 0.8315709296280775, | |
| "eval_runtime": 1038.1176, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.118, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.7112299465240641, | |
| "grad_norm": 0.751674473285675, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6036, | |
| "mean_token_accuracy": 0.8498695828020573, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.7112299465240641, | |
| "eval_loss": 0.697968602180481, | |
| "eval_mean_token_accuracy": 0.8344496600027007, | |
| "eval_runtime": 1037.7814, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 1.045749545097351, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6099, | |
| "mean_token_accuracy": 0.845644561946392, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "eval_loss": 0.6774637699127197, | |
| "eval_mean_token_accuracy": 0.8406207769866881, | |
| "eval_runtime": 1037.5418, | |
| "eval_samples_per_second": 0.237, | |
| "eval_steps_per_second": 0.119, | |
| "step": 340 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 1870, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 20, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.857740816294871e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |