{ "best_global_step": 1497, "best_metric": 0.7813953488372093, "best_model_checkpoint": "./electra-small-heading-classifier-expanded\\checkpoint-1497", "epoch": 1.0, "eval_steps": 500, "global_step": 1497, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.033400133600534405, "grad_norm": 1.82759428024292, "learning_rate": 2.45e-05, "loss": 0.5801, "step": 50 }, { "epoch": 0.06680026720106881, "grad_norm": 0.7336680293083191, "learning_rate": 4.9500000000000004e-05, "loss": 0.3191, "step": 100 }, { "epoch": 0.10020040080160321, "grad_norm": 0.759293258190155, "learning_rate": 4.9583899456521745e-05, "loss": 0.2216, "step": 150 }, { "epoch": 0.13360053440213762, "grad_norm": 0.5498169660568237, "learning_rate": 4.915930706521739e-05, "loss": 0.2167, "step": 200 }, { "epoch": 0.16700066800267202, "grad_norm": 3.572784662246704, "learning_rate": 4.8734714673913044e-05, "loss": 0.1797, "step": 250 }, { "epoch": 0.20040080160320642, "grad_norm": 1.3190865516662598, "learning_rate": 4.83101222826087e-05, "loss": 0.2202, "step": 300 }, { "epoch": 0.23380093520374082, "grad_norm": 1.5133466720581055, "learning_rate": 4.788552989130435e-05, "loss": 0.1925, "step": 350 }, { "epoch": 0.26720106880427524, "grad_norm": 1.3208128213882446, "learning_rate": 4.7460937500000004e-05, "loss": 0.265, "step": 400 }, { "epoch": 0.30060120240480964, "grad_norm": 0.9725052118301392, "learning_rate": 4.703634510869566e-05, "loss": 0.2866, "step": 450 }, { "epoch": 0.33400133600534404, "grad_norm": 1.570516586303711, "learning_rate": 4.6611752717391304e-05, "loss": 0.1881, "step": 500 }, { "epoch": 0.36740146960587844, "grad_norm": 4.067606449127197, "learning_rate": 4.618716032608696e-05, "loss": 0.2258, "step": 550 }, { "epoch": 0.40080160320641284, "grad_norm": 0.2925869822502136, "learning_rate": 4.576256793478261e-05, "loss": 0.1488, "step": 600 }, { "epoch": 0.43420173680694724, "grad_norm": 0.9354973435401917, "learning_rate": 4.5337975543478264e-05, "loss": 0.1611, "step": 650 }, { "epoch": 0.46760187040748163, "grad_norm": 0.3285213112831116, "learning_rate": 4.491338315217392e-05, "loss": 0.1259, "step": 700 }, { "epoch": 0.501002004008016, "grad_norm": 13.97227954864502, "learning_rate": 4.448879076086957e-05, "loss": 0.1593, "step": 750 }, { "epoch": 0.5344021376085505, "grad_norm": 0.10469996929168701, "learning_rate": 4.406419836956522e-05, "loss": 0.0923, "step": 800 }, { "epoch": 0.5678022712090849, "grad_norm": 0.19770751893520355, "learning_rate": 4.363960597826087e-05, "loss": 0.1067, "step": 850 }, { "epoch": 0.6012024048096193, "grad_norm": 0.10104553401470184, "learning_rate": 4.321501358695652e-05, "loss": 0.1214, "step": 900 }, { "epoch": 0.6346025384101537, "grad_norm": 0.10949808359146118, "learning_rate": 4.279042119565218e-05, "loss": 0.1175, "step": 950 }, { "epoch": 0.6680026720106881, "grad_norm": 0.10336494445800781, "learning_rate": 4.236582880434783e-05, "loss": 0.0849, "step": 1000 }, { "epoch": 0.7014028056112225, "grad_norm": 0.12163177132606506, "learning_rate": 4.1941236413043476e-05, "loss": 0.1486, "step": 1050 }, { "epoch": 0.7348029392117569, "grad_norm": 0.0906616598367691, "learning_rate": 4.151664402173913e-05, "loss": 0.0671, "step": 1100 }, { "epoch": 0.7682030728122913, "grad_norm": 0.12954840064048767, "learning_rate": 4.109205163043478e-05, "loss": 0.072, "step": 1150 }, { "epoch": 0.8016032064128257, "grad_norm": 0.06860172003507614, "learning_rate": 4.0667459239130436e-05, "loss": 0.0735, "step": 1200 }, { "epoch": 0.8350033400133601, "grad_norm": 0.06437909603118896, "learning_rate": 4.024286684782609e-05, "loss": 0.16, "step": 1250 }, { "epoch": 0.8684034736138945, "grad_norm": 3.2551543712615967, "learning_rate": 3.981827445652174e-05, "loss": 0.0868, "step": 1300 }, { "epoch": 0.9018036072144289, "grad_norm": 0.12183782458305359, "learning_rate": 3.939368206521739e-05, "loss": 0.1346, "step": 1350 }, { "epoch": 0.9352037408149633, "grad_norm": 5.742549419403076, "learning_rate": 3.896908967391304e-05, "loss": 0.0977, "step": 1400 }, { "epoch": 0.9686038744154977, "grad_norm": 0.049525078386068344, "learning_rate": 3.85444972826087e-05, "loss": 0.0771, "step": 1450 }, { "epoch": 1.0, "eval_accuracy": 0.9777567439659253, "eval_f1": 0.7813953488372093, "eval_loss": 0.07437803596258163, "eval_runtime": 8.1317, "eval_samples_per_second": 259.846, "eval_steps_per_second": 32.588, "step": 1497 } ], "logging_steps": 50, "max_steps": 5988, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 352211940040704.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }