{ "best_global_step": 2994, "best_metric": 0.8489208633093526, "best_model_checkpoint": "./electra-small-heading-classifier-expanded\\checkpoint-2994", "epoch": 2.0, "eval_steps": 500, "global_step": 2994, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.033400133600534405, "grad_norm": 1.82759428024292, "learning_rate": 2.45e-05, "loss": 0.5801, "step": 50 }, { "epoch": 0.06680026720106881, "grad_norm": 0.7336680293083191, "learning_rate": 4.9500000000000004e-05, "loss": 0.3191, "step": 100 }, { "epoch": 0.10020040080160321, "grad_norm": 0.759293258190155, "learning_rate": 4.9583899456521745e-05, "loss": 0.2216, "step": 150 }, { "epoch": 0.13360053440213762, "grad_norm": 0.5498169660568237, "learning_rate": 4.915930706521739e-05, "loss": 0.2167, "step": 200 }, { "epoch": 0.16700066800267202, "grad_norm": 3.572784662246704, "learning_rate": 4.8734714673913044e-05, "loss": 0.1797, "step": 250 }, { "epoch": 0.20040080160320642, "grad_norm": 1.3190865516662598, "learning_rate": 4.83101222826087e-05, "loss": 0.2202, "step": 300 }, { "epoch": 0.23380093520374082, "grad_norm": 1.5133466720581055, "learning_rate": 4.788552989130435e-05, "loss": 0.1925, "step": 350 }, { "epoch": 0.26720106880427524, "grad_norm": 1.3208128213882446, "learning_rate": 4.7460937500000004e-05, "loss": 0.265, "step": 400 }, { "epoch": 0.30060120240480964, "grad_norm": 0.9725052118301392, "learning_rate": 4.703634510869566e-05, "loss": 0.2866, "step": 450 }, { "epoch": 0.33400133600534404, "grad_norm": 1.570516586303711, "learning_rate": 4.6611752717391304e-05, "loss": 0.1881, "step": 500 }, { "epoch": 0.36740146960587844, "grad_norm": 4.067606449127197, "learning_rate": 4.618716032608696e-05, "loss": 0.2258, "step": 550 }, { "epoch": 0.40080160320641284, "grad_norm": 0.2925869822502136, "learning_rate": 4.576256793478261e-05, "loss": 0.1488, "step": 600 }, { "epoch": 0.43420173680694724, "grad_norm": 0.9354973435401917, "learning_rate": 4.5337975543478264e-05, "loss": 0.1611, "step": 650 }, { "epoch": 0.46760187040748163, "grad_norm": 0.3285213112831116, "learning_rate": 4.491338315217392e-05, "loss": 0.1259, "step": 700 }, { "epoch": 0.501002004008016, "grad_norm": 13.97227954864502, "learning_rate": 4.448879076086957e-05, "loss": 0.1593, "step": 750 }, { "epoch": 0.5344021376085505, "grad_norm": 0.10469996929168701, "learning_rate": 4.406419836956522e-05, "loss": 0.0923, "step": 800 }, { "epoch": 0.5678022712090849, "grad_norm": 0.19770751893520355, "learning_rate": 4.363960597826087e-05, "loss": 0.1067, "step": 850 }, { "epoch": 0.6012024048096193, "grad_norm": 0.10104553401470184, "learning_rate": 4.321501358695652e-05, "loss": 0.1214, "step": 900 }, { "epoch": 0.6346025384101537, "grad_norm": 0.10949808359146118, "learning_rate": 4.279042119565218e-05, "loss": 0.1175, "step": 950 }, { "epoch": 0.6680026720106881, "grad_norm": 0.10336494445800781, "learning_rate": 4.236582880434783e-05, "loss": 0.0849, "step": 1000 }, { "epoch": 0.7014028056112225, "grad_norm": 0.12163177132606506, "learning_rate": 4.1941236413043476e-05, "loss": 0.1486, "step": 1050 }, { "epoch": 0.7348029392117569, "grad_norm": 0.0906616598367691, "learning_rate": 4.151664402173913e-05, "loss": 0.0671, "step": 1100 }, { "epoch": 0.7682030728122913, "grad_norm": 0.12954840064048767, "learning_rate": 4.109205163043478e-05, "loss": 0.072, "step": 1150 }, { "epoch": 0.8016032064128257, "grad_norm": 0.06860172003507614, "learning_rate": 4.0667459239130436e-05, "loss": 0.0735, "step": 1200 }, { "epoch": 0.8350033400133601, "grad_norm": 0.06437909603118896, "learning_rate": 4.024286684782609e-05, "loss": 0.16, "step": 1250 }, { "epoch": 0.8684034736138945, "grad_norm": 3.2551543712615967, "learning_rate": 3.981827445652174e-05, "loss": 0.0868, "step": 1300 }, { "epoch": 0.9018036072144289, "grad_norm": 0.12183782458305359, "learning_rate": 3.939368206521739e-05, "loss": 0.1346, "step": 1350 }, { "epoch": 0.9352037408149633, "grad_norm": 5.742549419403076, "learning_rate": 3.896908967391304e-05, "loss": 0.0977, "step": 1400 }, { "epoch": 0.9686038744154977, "grad_norm": 0.049525078386068344, "learning_rate": 3.85444972826087e-05, "loss": 0.0771, "step": 1450 }, { "epoch": 1.0, "eval_accuracy": 0.9777567439659253, "eval_f1": 0.7813953488372093, "eval_loss": 0.07437803596258163, "eval_runtime": 8.1317, "eval_samples_per_second": 259.846, "eval_steps_per_second": 32.588, "step": 1497 }, { "epoch": 1.002004008016032, "grad_norm": 0.08354274183511734, "learning_rate": 3.811990489130435e-05, "loss": 0.1122, "step": 1500 }, { "epoch": 1.0354041416165665, "grad_norm": 0.07657646387815475, "learning_rate": 3.76953125e-05, "loss": 0.0776, "step": 1550 }, { "epoch": 1.0688042752171008, "grad_norm": 0.029183173552155495, "learning_rate": 3.7270720108695656e-05, "loss": 0.048, "step": 1600 }, { "epoch": 1.1022044088176353, "grad_norm": 0.6650001406669617, "learning_rate": 3.68461277173913e-05, "loss": 0.0651, "step": 1650 }, { "epoch": 1.1356045424181698, "grad_norm": 0.13291126489639282, "learning_rate": 3.6421535326086955e-05, "loss": 0.1035, "step": 1700 }, { "epoch": 1.169004676018704, "grad_norm": 0.14512984454631805, "learning_rate": 3.5996942934782615e-05, "loss": 0.0896, "step": 1750 }, { "epoch": 1.2024048096192386, "grad_norm": 0.09467575699090958, "learning_rate": 3.557235054347826e-05, "loss": 0.0763, "step": 1800 }, { "epoch": 1.2358049432197729, "grad_norm": 0.5623799562454224, "learning_rate": 3.5147758152173915e-05, "loss": 0.059, "step": 1850 }, { "epoch": 1.2692050768203074, "grad_norm": 0.16416147351264954, "learning_rate": 3.472316576086957e-05, "loss": 0.0483, "step": 1900 }, { "epoch": 1.3026052104208417, "grad_norm": 3.0964038372039795, "learning_rate": 3.4298573369565215e-05, "loss": 0.1058, "step": 1950 }, { "epoch": 1.3360053440213762, "grad_norm": 0.030443737283349037, "learning_rate": 3.3873980978260875e-05, "loss": 0.0517, "step": 2000 }, { "epoch": 1.3694054776219104, "grad_norm": 0.04263285547494888, "learning_rate": 3.344938858695653e-05, "loss": 0.0486, "step": 2050 }, { "epoch": 1.402805611222445, "grad_norm": 0.06713565438985825, "learning_rate": 3.3024796195652175e-05, "loss": 0.0494, "step": 2100 }, { "epoch": 1.4362057448229792, "grad_norm": 0.04323391616344452, "learning_rate": 3.260020380434783e-05, "loss": 0.0518, "step": 2150 }, { "epoch": 1.4696058784235138, "grad_norm": 0.9192191362380981, "learning_rate": 3.2175611413043474e-05, "loss": 0.0498, "step": 2200 }, { "epoch": 1.503006012024048, "grad_norm": 0.08719488233327866, "learning_rate": 3.1751019021739135e-05, "loss": 0.1155, "step": 2250 }, { "epoch": 1.5364061456245826, "grad_norm": 6.060549259185791, "learning_rate": 3.132642663043479e-05, "loss": 0.1349, "step": 2300 }, { "epoch": 1.569806279225117, "grad_norm": 0.09453130513429642, "learning_rate": 3.0901834239130434e-05, "loss": 0.0639, "step": 2350 }, { "epoch": 1.6032064128256514, "grad_norm": 0.02592223510146141, "learning_rate": 3.0477241847826088e-05, "loss": 0.0929, "step": 2400 }, { "epoch": 1.6366065464261856, "grad_norm": 0.027360519394278526, "learning_rate": 3.005264945652174e-05, "loss": 0.071, "step": 2450 }, { "epoch": 1.6700066800267201, "grad_norm": 0.14285916090011597, "learning_rate": 2.962805706521739e-05, "loss": 0.0768, "step": 2500 }, { "epoch": 1.7034068136272547, "grad_norm": 0.04874153807759285, "learning_rate": 2.9203464673913044e-05, "loss": 0.076, "step": 2550 }, { "epoch": 1.736806947227789, "grad_norm": 0.09328486025333405, "learning_rate": 2.8778872282608697e-05, "loss": 0.0505, "step": 2600 }, { "epoch": 1.7702070808283232, "grad_norm": 0.34670591354370117, "learning_rate": 2.8354279891304347e-05, "loss": 0.0763, "step": 2650 }, { "epoch": 1.8036072144288577, "grad_norm": 0.057789236307144165, "learning_rate": 2.79296875e-05, "loss": 0.0832, "step": 2700 }, { "epoch": 1.8370073480293923, "grad_norm": 0.059989944100379944, "learning_rate": 2.7505095108695657e-05, "loss": 0.062, "step": 2750 }, { "epoch": 1.8704074816299265, "grad_norm": 0.02221057377755642, "learning_rate": 2.7080502717391304e-05, "loss": 0.0649, "step": 2800 }, { "epoch": 1.9038076152304608, "grad_norm": 0.154799684882164, "learning_rate": 2.6655910326086957e-05, "loss": 0.144, "step": 2850 }, { "epoch": 1.9372077488309953, "grad_norm": 0.0422816276550293, "learning_rate": 2.6231317934782613e-05, "loss": 0.0531, "step": 2900 }, { "epoch": 1.9706078824315298, "grad_norm": 0.019839206710457802, "learning_rate": 2.580672554347826e-05, "loss": 0.0545, "step": 2950 }, { "epoch": 2.0, "eval_accuracy": 0.9801230477993375, "eval_f1": 0.8489208633093526, "eval_loss": 0.07019170373678207, "eval_runtime": 8.2123, "eval_samples_per_second": 257.298, "eval_steps_per_second": 32.269, "step": 2994 } ], "logging_steps": 50, "max_steps": 5988, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 704423880081408.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }