{ "best_global_step": 2000, "best_metric": 0.9388425925925926, "best_model_checkpoint": "myocardial_pubmedbert_mps_final_v3/checkpoint-2000", "epoch": 0.37037037037037035, "eval_steps": 1000, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.018518518518518517, "grad_norm": 2.475142478942871, "learning_rate": 9.9e-08, "loss": 0.1439, "step": 100 }, { "epoch": 0.037037037037037035, "grad_norm": 1.806380271911621, "learning_rate": 1.99e-07, "loss": 0.1511, "step": 200 }, { "epoch": 0.05555555555555555, "grad_norm": 1.5451908111572266, "learning_rate": 2.9899999999999996e-07, "loss": 0.1491, "step": 300 }, { "epoch": 0.07407407407407407, "grad_norm": 2.704113483428955, "learning_rate": 3.99e-07, "loss": 0.153, "step": 400 }, { "epoch": 0.09259259259259259, "grad_norm": 4.833273887634277, "learning_rate": 4.99e-07, "loss": 0.149, "step": 500 }, { "epoch": 0.1111111111111111, "grad_norm": 1.2810375690460205, "learning_rate": 5.989999999999999e-07, "loss": 0.1582, "step": 600 }, { "epoch": 0.12962962962962962, "grad_norm": 9.05034065246582, "learning_rate": 6.989999999999999e-07, "loss": 0.1711, "step": 700 }, { "epoch": 0.14814814814814814, "grad_norm": 7.097633361816406, "learning_rate": 7.99e-07, "loss": 0.1419, "step": 800 }, { "epoch": 0.16666666666666666, "grad_norm": 10.607206344604492, "learning_rate": 8.99e-07, "loss": 0.1298, "step": 900 }, { "epoch": 0.18518518518518517, "grad_norm": 1.5295910835266113, "learning_rate": 9.989999999999999e-07, "loss": 0.1401, "step": 1000 }, { "epoch": 0.18518518518518517, "eval_accuracy": 0.9386111111111111, "eval_loss": 0.1337883323431015, "eval_runtime": 906.5981, "eval_samples_per_second": 47.651, "eval_steps_per_second": 1.489, "step": 1000 }, { "epoch": 0.2037037037037037, "grad_norm": 7.718695163726807, "learning_rate": 9.98132075471698e-07, "loss": 0.1463, "step": 1100 }, { "epoch": 0.2222222222222222, "grad_norm": 2.2934257984161377, "learning_rate": 9.962452830188678e-07, "loss": 0.1505, "step": 1200 }, { "epoch": 0.24074074074074073, "grad_norm": 4.265007495880127, "learning_rate": 9.943584905660378e-07, "loss": 0.1451, "step": 1300 }, { "epoch": 0.25925925925925924, "grad_norm": 8.984698295593262, "learning_rate": 9.924716981132075e-07, "loss": 0.1412, "step": 1400 }, { "epoch": 0.2777777777777778, "grad_norm": 6.165067672729492, "learning_rate": 9.905849056603772e-07, "loss": 0.1354, "step": 1500 }, { "epoch": 0.2962962962962963, "grad_norm": 14.949434280395508, "learning_rate": 9.886981132075472e-07, "loss": 0.1305, "step": 1600 }, { "epoch": 0.3148148148148148, "grad_norm": 5.017131805419922, "learning_rate": 9.86811320754717e-07, "loss": 0.147, "step": 1700 }, { "epoch": 0.3333333333333333, "grad_norm": 6.54106330871582, "learning_rate": 9.849245283018867e-07, "loss": 0.1435, "step": 1800 }, { "epoch": 0.35185185185185186, "grad_norm": 10.726076126098633, "learning_rate": 9.830377358490567e-07, "loss": 0.1347, "step": 1900 }, { "epoch": 0.37037037037037035, "grad_norm": 7.49448299407959, "learning_rate": 9.811509433962264e-07, "loss": 0.1539, "step": 2000 }, { "epoch": 0.37037037037037035, "eval_accuracy": 0.9388425925925926, "eval_loss": 0.13230355083942413, "eval_runtime": 839.9501, "eval_samples_per_second": 51.432, "eval_steps_per_second": 1.607, "step": 2000 } ], "logging_steps": 100, "max_steps": 54000, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1000, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.6840165883904e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }