| { | |
| "best_global_step": 1497, | |
| "best_metric": 0.7813953488372093, | |
| "best_model_checkpoint": "./electra-small-heading-classifier-expanded\\checkpoint-1497", | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 1497, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.033400133600534405, | |
| "grad_norm": 1.82759428024292, | |
| "learning_rate": 2.45e-05, | |
| "loss": 0.5801, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06680026720106881, | |
| "grad_norm": 0.7336680293083191, | |
| "learning_rate": 4.9500000000000004e-05, | |
| "loss": 0.3191, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10020040080160321, | |
| "grad_norm": 0.759293258190155, | |
| "learning_rate": 4.9583899456521745e-05, | |
| "loss": 0.2216, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.13360053440213762, | |
| "grad_norm": 0.5498169660568237, | |
| "learning_rate": 4.915930706521739e-05, | |
| "loss": 0.2167, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16700066800267202, | |
| "grad_norm": 3.572784662246704, | |
| "learning_rate": 4.8734714673913044e-05, | |
| "loss": 0.1797, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.20040080160320642, | |
| "grad_norm": 1.3190865516662598, | |
| "learning_rate": 4.83101222826087e-05, | |
| "loss": 0.2202, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23380093520374082, | |
| "grad_norm": 1.5133466720581055, | |
| "learning_rate": 4.788552989130435e-05, | |
| "loss": 0.1925, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.26720106880427524, | |
| "grad_norm": 1.3208128213882446, | |
| "learning_rate": 4.7460937500000004e-05, | |
| "loss": 0.265, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.30060120240480964, | |
| "grad_norm": 0.9725052118301392, | |
| "learning_rate": 4.703634510869566e-05, | |
| "loss": 0.2866, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.33400133600534404, | |
| "grad_norm": 1.570516586303711, | |
| "learning_rate": 4.6611752717391304e-05, | |
| "loss": 0.1881, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.36740146960587844, | |
| "grad_norm": 4.067606449127197, | |
| "learning_rate": 4.618716032608696e-05, | |
| "loss": 0.2258, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.40080160320641284, | |
| "grad_norm": 0.2925869822502136, | |
| "learning_rate": 4.576256793478261e-05, | |
| "loss": 0.1488, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.43420173680694724, | |
| "grad_norm": 0.9354973435401917, | |
| "learning_rate": 4.5337975543478264e-05, | |
| "loss": 0.1611, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.46760187040748163, | |
| "grad_norm": 0.3285213112831116, | |
| "learning_rate": 4.491338315217392e-05, | |
| "loss": 0.1259, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.501002004008016, | |
| "grad_norm": 13.97227954864502, | |
| "learning_rate": 4.448879076086957e-05, | |
| "loss": 0.1593, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5344021376085505, | |
| "grad_norm": 0.10469996929168701, | |
| "learning_rate": 4.406419836956522e-05, | |
| "loss": 0.0923, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5678022712090849, | |
| "grad_norm": 0.19770751893520355, | |
| "learning_rate": 4.363960597826087e-05, | |
| "loss": 0.1067, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6012024048096193, | |
| "grad_norm": 0.10104553401470184, | |
| "learning_rate": 4.321501358695652e-05, | |
| "loss": 0.1214, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6346025384101537, | |
| "grad_norm": 0.10949808359146118, | |
| "learning_rate": 4.279042119565218e-05, | |
| "loss": 0.1175, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6680026720106881, | |
| "grad_norm": 0.10336494445800781, | |
| "learning_rate": 4.236582880434783e-05, | |
| "loss": 0.0849, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7014028056112225, | |
| "grad_norm": 0.12163177132606506, | |
| "learning_rate": 4.1941236413043476e-05, | |
| "loss": 0.1486, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7348029392117569, | |
| "grad_norm": 0.0906616598367691, | |
| "learning_rate": 4.151664402173913e-05, | |
| "loss": 0.0671, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7682030728122913, | |
| "grad_norm": 0.12954840064048767, | |
| "learning_rate": 4.109205163043478e-05, | |
| "loss": 0.072, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8016032064128257, | |
| "grad_norm": 0.06860172003507614, | |
| "learning_rate": 4.0667459239130436e-05, | |
| "loss": 0.0735, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8350033400133601, | |
| "grad_norm": 0.06437909603118896, | |
| "learning_rate": 4.024286684782609e-05, | |
| "loss": 0.16, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8684034736138945, | |
| "grad_norm": 3.2551543712615967, | |
| "learning_rate": 3.981827445652174e-05, | |
| "loss": 0.0868, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9018036072144289, | |
| "grad_norm": 0.12183782458305359, | |
| "learning_rate": 3.939368206521739e-05, | |
| "loss": 0.1346, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9352037408149633, | |
| "grad_norm": 5.742549419403076, | |
| "learning_rate": 3.896908967391304e-05, | |
| "loss": 0.0977, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9686038744154977, | |
| "grad_norm": 0.049525078386068344, | |
| "learning_rate": 3.85444972826087e-05, | |
| "loss": 0.0771, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9777567439659253, | |
| "eval_f1": 0.7813953488372093, | |
| "eval_loss": 0.07437803596258163, | |
| "eval_runtime": 8.1317, | |
| "eval_samples_per_second": 259.846, | |
| "eval_steps_per_second": 32.588, | |
| "step": 1497 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 5988, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 352211940040704.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |