| { | |
| "best_global_step": 2994, | |
| "best_metric": 0.8489208633093526, | |
| "best_model_checkpoint": "./electra-small-heading-classifier-expanded\\checkpoint-2994", | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 2994, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.033400133600534405, | |
| "grad_norm": 1.82759428024292, | |
| "learning_rate": 2.45e-05, | |
| "loss": 0.5801, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06680026720106881, | |
| "grad_norm": 0.7336680293083191, | |
| "learning_rate": 4.9500000000000004e-05, | |
| "loss": 0.3191, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10020040080160321, | |
| "grad_norm": 0.759293258190155, | |
| "learning_rate": 4.9583899456521745e-05, | |
| "loss": 0.2216, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.13360053440213762, | |
| "grad_norm": 0.5498169660568237, | |
| "learning_rate": 4.915930706521739e-05, | |
| "loss": 0.2167, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16700066800267202, | |
| "grad_norm": 3.572784662246704, | |
| "learning_rate": 4.8734714673913044e-05, | |
| "loss": 0.1797, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.20040080160320642, | |
| "grad_norm": 1.3190865516662598, | |
| "learning_rate": 4.83101222826087e-05, | |
| "loss": 0.2202, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23380093520374082, | |
| "grad_norm": 1.5133466720581055, | |
| "learning_rate": 4.788552989130435e-05, | |
| "loss": 0.1925, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.26720106880427524, | |
| "grad_norm": 1.3208128213882446, | |
| "learning_rate": 4.7460937500000004e-05, | |
| "loss": 0.265, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.30060120240480964, | |
| "grad_norm": 0.9725052118301392, | |
| "learning_rate": 4.703634510869566e-05, | |
| "loss": 0.2866, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.33400133600534404, | |
| "grad_norm": 1.570516586303711, | |
| "learning_rate": 4.6611752717391304e-05, | |
| "loss": 0.1881, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.36740146960587844, | |
| "grad_norm": 4.067606449127197, | |
| "learning_rate": 4.618716032608696e-05, | |
| "loss": 0.2258, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.40080160320641284, | |
| "grad_norm": 0.2925869822502136, | |
| "learning_rate": 4.576256793478261e-05, | |
| "loss": 0.1488, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.43420173680694724, | |
| "grad_norm": 0.9354973435401917, | |
| "learning_rate": 4.5337975543478264e-05, | |
| "loss": 0.1611, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.46760187040748163, | |
| "grad_norm": 0.3285213112831116, | |
| "learning_rate": 4.491338315217392e-05, | |
| "loss": 0.1259, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.501002004008016, | |
| "grad_norm": 13.97227954864502, | |
| "learning_rate": 4.448879076086957e-05, | |
| "loss": 0.1593, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5344021376085505, | |
| "grad_norm": 0.10469996929168701, | |
| "learning_rate": 4.406419836956522e-05, | |
| "loss": 0.0923, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5678022712090849, | |
| "grad_norm": 0.19770751893520355, | |
| "learning_rate": 4.363960597826087e-05, | |
| "loss": 0.1067, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6012024048096193, | |
| "grad_norm": 0.10104553401470184, | |
| "learning_rate": 4.321501358695652e-05, | |
| "loss": 0.1214, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6346025384101537, | |
| "grad_norm": 0.10949808359146118, | |
| "learning_rate": 4.279042119565218e-05, | |
| "loss": 0.1175, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6680026720106881, | |
| "grad_norm": 0.10336494445800781, | |
| "learning_rate": 4.236582880434783e-05, | |
| "loss": 0.0849, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7014028056112225, | |
| "grad_norm": 0.12163177132606506, | |
| "learning_rate": 4.1941236413043476e-05, | |
| "loss": 0.1486, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7348029392117569, | |
| "grad_norm": 0.0906616598367691, | |
| "learning_rate": 4.151664402173913e-05, | |
| "loss": 0.0671, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7682030728122913, | |
| "grad_norm": 0.12954840064048767, | |
| "learning_rate": 4.109205163043478e-05, | |
| "loss": 0.072, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8016032064128257, | |
| "grad_norm": 0.06860172003507614, | |
| "learning_rate": 4.0667459239130436e-05, | |
| "loss": 0.0735, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8350033400133601, | |
| "grad_norm": 0.06437909603118896, | |
| "learning_rate": 4.024286684782609e-05, | |
| "loss": 0.16, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8684034736138945, | |
| "grad_norm": 3.2551543712615967, | |
| "learning_rate": 3.981827445652174e-05, | |
| "loss": 0.0868, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9018036072144289, | |
| "grad_norm": 0.12183782458305359, | |
| "learning_rate": 3.939368206521739e-05, | |
| "loss": 0.1346, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9352037408149633, | |
| "grad_norm": 5.742549419403076, | |
| "learning_rate": 3.896908967391304e-05, | |
| "loss": 0.0977, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9686038744154977, | |
| "grad_norm": 0.049525078386068344, | |
| "learning_rate": 3.85444972826087e-05, | |
| "loss": 0.0771, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9777567439659253, | |
| "eval_f1": 0.7813953488372093, | |
| "eval_loss": 0.07437803596258163, | |
| "eval_runtime": 8.1317, | |
| "eval_samples_per_second": 259.846, | |
| "eval_steps_per_second": 32.588, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 1.002004008016032, | |
| "grad_norm": 0.08354274183511734, | |
| "learning_rate": 3.811990489130435e-05, | |
| "loss": 0.1122, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0354041416165665, | |
| "grad_norm": 0.07657646387815475, | |
| "learning_rate": 3.76953125e-05, | |
| "loss": 0.0776, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.0688042752171008, | |
| "grad_norm": 0.029183173552155495, | |
| "learning_rate": 3.7270720108695656e-05, | |
| "loss": 0.048, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.1022044088176353, | |
| "grad_norm": 0.6650001406669617, | |
| "learning_rate": 3.68461277173913e-05, | |
| "loss": 0.0651, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.1356045424181698, | |
| "grad_norm": 0.13291126489639282, | |
| "learning_rate": 3.6421535326086955e-05, | |
| "loss": 0.1035, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.169004676018704, | |
| "grad_norm": 0.14512984454631805, | |
| "learning_rate": 3.5996942934782615e-05, | |
| "loss": 0.0896, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.2024048096192386, | |
| "grad_norm": 0.09467575699090958, | |
| "learning_rate": 3.557235054347826e-05, | |
| "loss": 0.0763, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.2358049432197729, | |
| "grad_norm": 0.5623799562454224, | |
| "learning_rate": 3.5147758152173915e-05, | |
| "loss": 0.059, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.2692050768203074, | |
| "grad_norm": 0.16416147351264954, | |
| "learning_rate": 3.472316576086957e-05, | |
| "loss": 0.0483, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.3026052104208417, | |
| "grad_norm": 3.0964038372039795, | |
| "learning_rate": 3.4298573369565215e-05, | |
| "loss": 0.1058, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.3360053440213762, | |
| "grad_norm": 0.030443737283349037, | |
| "learning_rate": 3.3873980978260875e-05, | |
| "loss": 0.0517, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.3694054776219104, | |
| "grad_norm": 0.04263285547494888, | |
| "learning_rate": 3.344938858695653e-05, | |
| "loss": 0.0486, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.402805611222445, | |
| "grad_norm": 0.06713565438985825, | |
| "learning_rate": 3.3024796195652175e-05, | |
| "loss": 0.0494, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.4362057448229792, | |
| "grad_norm": 0.04323391616344452, | |
| "learning_rate": 3.260020380434783e-05, | |
| "loss": 0.0518, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.4696058784235138, | |
| "grad_norm": 0.9192191362380981, | |
| "learning_rate": 3.2175611413043474e-05, | |
| "loss": 0.0498, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.503006012024048, | |
| "grad_norm": 0.08719488233327866, | |
| "learning_rate": 3.1751019021739135e-05, | |
| "loss": 0.1155, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.5364061456245826, | |
| "grad_norm": 6.060549259185791, | |
| "learning_rate": 3.132642663043479e-05, | |
| "loss": 0.1349, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.569806279225117, | |
| "grad_norm": 0.09453130513429642, | |
| "learning_rate": 3.0901834239130434e-05, | |
| "loss": 0.0639, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.6032064128256514, | |
| "grad_norm": 0.02592223510146141, | |
| "learning_rate": 3.0477241847826088e-05, | |
| "loss": 0.0929, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.6366065464261856, | |
| "grad_norm": 0.027360519394278526, | |
| "learning_rate": 3.005264945652174e-05, | |
| "loss": 0.071, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.6700066800267201, | |
| "grad_norm": 0.14285916090011597, | |
| "learning_rate": 2.962805706521739e-05, | |
| "loss": 0.0768, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.7034068136272547, | |
| "grad_norm": 0.04874153807759285, | |
| "learning_rate": 2.9203464673913044e-05, | |
| "loss": 0.076, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.736806947227789, | |
| "grad_norm": 0.09328486025333405, | |
| "learning_rate": 2.8778872282608697e-05, | |
| "loss": 0.0505, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.7702070808283232, | |
| "grad_norm": 0.34670591354370117, | |
| "learning_rate": 2.8354279891304347e-05, | |
| "loss": 0.0763, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.8036072144288577, | |
| "grad_norm": 0.057789236307144165, | |
| "learning_rate": 2.79296875e-05, | |
| "loss": 0.0832, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.8370073480293923, | |
| "grad_norm": 0.059989944100379944, | |
| "learning_rate": 2.7505095108695657e-05, | |
| "loss": 0.062, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.8704074816299265, | |
| "grad_norm": 0.02221057377755642, | |
| "learning_rate": 2.7080502717391304e-05, | |
| "loss": 0.0649, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.9038076152304608, | |
| "grad_norm": 0.154799684882164, | |
| "learning_rate": 2.6655910326086957e-05, | |
| "loss": 0.144, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.9372077488309953, | |
| "grad_norm": 0.0422816276550293, | |
| "learning_rate": 2.6231317934782613e-05, | |
| "loss": 0.0531, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.9706078824315298, | |
| "grad_norm": 0.019839206710457802, | |
| "learning_rate": 2.580672554347826e-05, | |
| "loss": 0.0545, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9801230477993375, | |
| "eval_f1": 0.8489208633093526, | |
| "eval_loss": 0.07019170373678207, | |
| "eval_runtime": 8.2123, | |
| "eval_samples_per_second": 257.298, | |
| "eval_steps_per_second": 32.269, | |
| "step": 2994 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 5988, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 704423880081408.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |