| { | |
| "best_metric": 0.7317561030375677, | |
| "best_model_checkpoint": "./results\\checkpoint-1690", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 1690, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.14792899408284024, | |
| "grad_norm": 3.1896114349365234, | |
| "learning_rate": 5.562130177514793e-06, | |
| "loss": 1.0881, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2958579881656805, | |
| "grad_norm": 3.5786380767822266, | |
| "learning_rate": 1.1479289940828404e-05, | |
| "loss": 1.0702, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4437869822485207, | |
| "grad_norm": 9.252333641052246, | |
| "learning_rate": 1.7396449704142012e-05, | |
| "loss": 0.9906, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.591715976331361, | |
| "grad_norm": 10.281839370727539, | |
| "learning_rate": 1.963182117028271e-05, | |
| "loss": 0.9185, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7396449704142012, | |
| "grad_norm": 10.87905502319336, | |
| "learning_rate": 1.8974358974358975e-05, | |
| "loss": 0.8892, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8875739644970414, | |
| "grad_norm": 7.557311058044434, | |
| "learning_rate": 1.8316896778435242e-05, | |
| "loss": 0.8507, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6605351170568562, | |
| "eval_loss": 0.7316408753395081, | |
| "eval_macro_f1": 0.6313443909417599, | |
| "eval_precision": 0.6433290821476324, | |
| "eval_recall": 0.6496354509341522, | |
| "eval_runtime": 1.4514, | |
| "eval_samples_per_second": 412.02, | |
| "eval_steps_per_second": 26.182, | |
| "eval_weighted_f1": 0.6689507482328343, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.0355029585798816, | |
| "grad_norm": 11.54389762878418, | |
| "learning_rate": 1.765943458251151e-05, | |
| "loss": 0.829, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.183431952662722, | |
| "grad_norm": 9.747392654418945, | |
| "learning_rate": 1.7001972386587773e-05, | |
| "loss": 0.7475, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.331360946745562, | |
| "grad_norm": 8.204669952392578, | |
| "learning_rate": 1.6344510190664036e-05, | |
| "loss": 0.7326, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.4792899408284024, | |
| "grad_norm": 5.944102764129639, | |
| "learning_rate": 1.5687047994740304e-05, | |
| "loss": 0.7375, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.6272189349112427, | |
| "grad_norm": 9.364075660705566, | |
| "learning_rate": 1.5029585798816569e-05, | |
| "loss": 0.7505, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.7751479289940828, | |
| "grad_norm": 7.409163951873779, | |
| "learning_rate": 1.4372123602892836e-05, | |
| "loss": 0.6668, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.9230769230769231, | |
| "grad_norm": 12.596030235290527, | |
| "learning_rate": 1.3714661406969102e-05, | |
| "loss": 0.67, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7408026755852842, | |
| "eval_loss": 0.6650733351707458, | |
| "eval_macro_f1": 0.7028637657616557, | |
| "eval_precision": 0.7033778322629879, | |
| "eval_recall": 0.7057446044459033, | |
| "eval_runtime": 1.3215, | |
| "eval_samples_per_second": 452.526, | |
| "eval_steps_per_second": 28.756, | |
| "eval_weighted_f1": 0.7397522777158159, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.0710059171597632, | |
| "grad_norm": 8.544220924377441, | |
| "learning_rate": 1.3057199211045365e-05, | |
| "loss": 0.6231, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.2189349112426036, | |
| "grad_norm": 20.543594360351562, | |
| "learning_rate": 1.239973701512163e-05, | |
| "loss": 0.6099, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.366863905325444, | |
| "grad_norm": 6.354947566986084, | |
| "learning_rate": 1.1742274819197896e-05, | |
| "loss": 0.5998, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.5147928994082838, | |
| "grad_norm": 11.728825569152832, | |
| "learning_rate": 1.1084812623274163e-05, | |
| "loss": 0.5539, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.662721893491124, | |
| "grad_norm": 6.896145343780518, | |
| "learning_rate": 1.0427350427350429e-05, | |
| "loss": 0.5705, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.8106508875739644, | |
| "grad_norm": 13.93707275390625, | |
| "learning_rate": 9.769888231426694e-06, | |
| "loss": 0.6028, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.9585798816568047, | |
| "grad_norm": 10.1209135055542, | |
| "learning_rate": 9.11242603550296e-06, | |
| "loss": 0.5902, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7357859531772575, | |
| "eval_loss": 0.668661892414093, | |
| "eval_macro_f1": 0.7024505567287104, | |
| "eval_precision": 0.7182104259645156, | |
| "eval_recall": 0.6973090479583987, | |
| "eval_runtime": 1.2508, | |
| "eval_samples_per_second": 478.109, | |
| "eval_steps_per_second": 30.382, | |
| "eval_weighted_f1": 0.7352357361433975, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 3.106508875739645, | |
| "grad_norm": 15.182376861572266, | |
| "learning_rate": 8.454963839579225e-06, | |
| "loss": 0.5552, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.2544378698224854, | |
| "grad_norm": 10.012931823730469, | |
| "learning_rate": 7.79750164365549e-06, | |
| "loss": 0.4979, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.4023668639053253, | |
| "grad_norm": 15.364001274108887, | |
| "learning_rate": 7.140039447731756e-06, | |
| "loss": 0.4272, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.5502958579881656, | |
| "grad_norm": 11.208831787109375, | |
| "learning_rate": 6.482577251808022e-06, | |
| "loss": 0.4183, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.698224852071006, | |
| "grad_norm": 12.957846641540527, | |
| "learning_rate": 5.825115055884288e-06, | |
| "loss": 0.4733, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 3.8461538461538463, | |
| "grad_norm": 9.980995178222656, | |
| "learning_rate": 5.167652859960552e-06, | |
| "loss": 0.4714, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.994082840236686, | |
| "grad_norm": 8.93154525756836, | |
| "learning_rate": 4.5101906640368185e-06, | |
| "loss": 0.4399, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.754180602006689, | |
| "eval_loss": 0.6718000173568726, | |
| "eval_macro_f1": 0.7221273105833076, | |
| "eval_precision": 0.7226927352058, | |
| "eval_recall": 0.7217587295509373, | |
| "eval_runtime": 1.243, | |
| "eval_samples_per_second": 481.084, | |
| "eval_steps_per_second": 30.571, | |
| "eval_weighted_f1": 0.7534201331590002, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 4.1420118343195265, | |
| "grad_norm": 15.335867881774902, | |
| "learning_rate": 3.852728468113084e-06, | |
| "loss": 0.3822, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.289940828402367, | |
| "grad_norm": 12.809405326843262, | |
| "learning_rate": 3.1952662721893497e-06, | |
| "loss": 0.3858, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 4.437869822485207, | |
| "grad_norm": 9.913019180297852, | |
| "learning_rate": 2.5378040762656148e-06, | |
| "loss": 0.3597, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.585798816568047, | |
| "grad_norm": 20.306541442871094, | |
| "learning_rate": 1.8803418803418804e-06, | |
| "loss": 0.3578, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 4.733727810650888, | |
| "grad_norm": 14.110806465148926, | |
| "learning_rate": 1.222879684418146e-06, | |
| "loss": 0.4013, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.881656804733728, | |
| "grad_norm": 10.495901107788086, | |
| "learning_rate": 5.654174884944117e-07, | |
| "loss": 0.3811, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7642140468227425, | |
| "eval_loss": 0.7175509333610535, | |
| "eval_macro_f1": 0.7317561030375677, | |
| "eval_precision": 0.7390880016128589, | |
| "eval_recall": 0.7268900710459153, | |
| "eval_runtime": 2.4767, | |
| "eval_samples_per_second": 241.453, | |
| "eval_steps_per_second": 15.343, | |
| "eval_weighted_f1": 0.762570091666699, | |
| "step": 1690 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 1690, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1773713327258880.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |