| { | |
| "best_global_step": 5988, | |
| "best_metric": 0.9053497942386831, | |
| "best_model_checkpoint": "./electra-small-heading-classifier-expanded\\checkpoint-5988", | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 5988, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.033400133600534405, | |
| "grad_norm": 1.82759428024292, | |
| "learning_rate": 2.45e-05, | |
| "loss": 0.5801, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06680026720106881, | |
| "grad_norm": 0.7336680293083191, | |
| "learning_rate": 4.9500000000000004e-05, | |
| "loss": 0.3191, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10020040080160321, | |
| "grad_norm": 0.759293258190155, | |
| "learning_rate": 4.9583899456521745e-05, | |
| "loss": 0.2216, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.13360053440213762, | |
| "grad_norm": 0.5498169660568237, | |
| "learning_rate": 4.915930706521739e-05, | |
| "loss": 0.2167, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16700066800267202, | |
| "grad_norm": 3.572784662246704, | |
| "learning_rate": 4.8734714673913044e-05, | |
| "loss": 0.1797, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.20040080160320642, | |
| "grad_norm": 1.3190865516662598, | |
| "learning_rate": 4.83101222826087e-05, | |
| "loss": 0.2202, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23380093520374082, | |
| "grad_norm": 1.5133466720581055, | |
| "learning_rate": 4.788552989130435e-05, | |
| "loss": 0.1925, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.26720106880427524, | |
| "grad_norm": 1.3208128213882446, | |
| "learning_rate": 4.7460937500000004e-05, | |
| "loss": 0.265, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.30060120240480964, | |
| "grad_norm": 0.9725052118301392, | |
| "learning_rate": 4.703634510869566e-05, | |
| "loss": 0.2866, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.33400133600534404, | |
| "grad_norm": 1.570516586303711, | |
| "learning_rate": 4.6611752717391304e-05, | |
| "loss": 0.1881, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.36740146960587844, | |
| "grad_norm": 4.067606449127197, | |
| "learning_rate": 4.618716032608696e-05, | |
| "loss": 0.2258, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.40080160320641284, | |
| "grad_norm": 0.2925869822502136, | |
| "learning_rate": 4.576256793478261e-05, | |
| "loss": 0.1488, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.43420173680694724, | |
| "grad_norm": 0.9354973435401917, | |
| "learning_rate": 4.5337975543478264e-05, | |
| "loss": 0.1611, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.46760187040748163, | |
| "grad_norm": 0.3285213112831116, | |
| "learning_rate": 4.491338315217392e-05, | |
| "loss": 0.1259, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.501002004008016, | |
| "grad_norm": 13.97227954864502, | |
| "learning_rate": 4.448879076086957e-05, | |
| "loss": 0.1593, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5344021376085505, | |
| "grad_norm": 0.10469996929168701, | |
| "learning_rate": 4.406419836956522e-05, | |
| "loss": 0.0923, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5678022712090849, | |
| "grad_norm": 0.19770751893520355, | |
| "learning_rate": 4.363960597826087e-05, | |
| "loss": 0.1067, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6012024048096193, | |
| "grad_norm": 0.10104553401470184, | |
| "learning_rate": 4.321501358695652e-05, | |
| "loss": 0.1214, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6346025384101537, | |
| "grad_norm": 0.10949808359146118, | |
| "learning_rate": 4.279042119565218e-05, | |
| "loss": 0.1175, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6680026720106881, | |
| "grad_norm": 0.10336494445800781, | |
| "learning_rate": 4.236582880434783e-05, | |
| "loss": 0.0849, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7014028056112225, | |
| "grad_norm": 0.12163177132606506, | |
| "learning_rate": 4.1941236413043476e-05, | |
| "loss": 0.1486, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7348029392117569, | |
| "grad_norm": 0.0906616598367691, | |
| "learning_rate": 4.151664402173913e-05, | |
| "loss": 0.0671, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7682030728122913, | |
| "grad_norm": 0.12954840064048767, | |
| "learning_rate": 4.109205163043478e-05, | |
| "loss": 0.072, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8016032064128257, | |
| "grad_norm": 0.06860172003507614, | |
| "learning_rate": 4.0667459239130436e-05, | |
| "loss": 0.0735, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8350033400133601, | |
| "grad_norm": 0.06437909603118896, | |
| "learning_rate": 4.024286684782609e-05, | |
| "loss": 0.16, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8684034736138945, | |
| "grad_norm": 3.2551543712615967, | |
| "learning_rate": 3.981827445652174e-05, | |
| "loss": 0.0868, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9018036072144289, | |
| "grad_norm": 0.12183782458305359, | |
| "learning_rate": 3.939368206521739e-05, | |
| "loss": 0.1346, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9352037408149633, | |
| "grad_norm": 5.742549419403076, | |
| "learning_rate": 3.896908967391304e-05, | |
| "loss": 0.0977, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9686038744154977, | |
| "grad_norm": 0.049525078386068344, | |
| "learning_rate": 3.85444972826087e-05, | |
| "loss": 0.0771, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9777567439659253, | |
| "eval_f1": 0.7813953488372093, | |
| "eval_loss": 0.07437803596258163, | |
| "eval_runtime": 8.1317, | |
| "eval_samples_per_second": 259.846, | |
| "eval_steps_per_second": 32.588, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 1.002004008016032, | |
| "grad_norm": 0.08354274183511734, | |
| "learning_rate": 3.811990489130435e-05, | |
| "loss": 0.1122, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0354041416165665, | |
| "grad_norm": 0.07657646387815475, | |
| "learning_rate": 3.76953125e-05, | |
| "loss": 0.0776, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.0688042752171008, | |
| "grad_norm": 0.029183173552155495, | |
| "learning_rate": 3.7270720108695656e-05, | |
| "loss": 0.048, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.1022044088176353, | |
| "grad_norm": 0.6650001406669617, | |
| "learning_rate": 3.68461277173913e-05, | |
| "loss": 0.0651, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.1356045424181698, | |
| "grad_norm": 0.13291126489639282, | |
| "learning_rate": 3.6421535326086955e-05, | |
| "loss": 0.1035, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.169004676018704, | |
| "grad_norm": 0.14512984454631805, | |
| "learning_rate": 3.5996942934782615e-05, | |
| "loss": 0.0896, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.2024048096192386, | |
| "grad_norm": 0.09467575699090958, | |
| "learning_rate": 3.557235054347826e-05, | |
| "loss": 0.0763, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.2358049432197729, | |
| "grad_norm": 0.5623799562454224, | |
| "learning_rate": 3.5147758152173915e-05, | |
| "loss": 0.059, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.2692050768203074, | |
| "grad_norm": 0.16416147351264954, | |
| "learning_rate": 3.472316576086957e-05, | |
| "loss": 0.0483, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.3026052104208417, | |
| "grad_norm": 3.0964038372039795, | |
| "learning_rate": 3.4298573369565215e-05, | |
| "loss": 0.1058, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.3360053440213762, | |
| "grad_norm": 0.030443737283349037, | |
| "learning_rate": 3.3873980978260875e-05, | |
| "loss": 0.0517, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.3694054776219104, | |
| "grad_norm": 0.04263285547494888, | |
| "learning_rate": 3.344938858695653e-05, | |
| "loss": 0.0486, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.402805611222445, | |
| "grad_norm": 0.06713565438985825, | |
| "learning_rate": 3.3024796195652175e-05, | |
| "loss": 0.0494, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.4362057448229792, | |
| "grad_norm": 0.04323391616344452, | |
| "learning_rate": 3.260020380434783e-05, | |
| "loss": 0.0518, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.4696058784235138, | |
| "grad_norm": 0.9192191362380981, | |
| "learning_rate": 3.2175611413043474e-05, | |
| "loss": 0.0498, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.503006012024048, | |
| "grad_norm": 0.08719488233327866, | |
| "learning_rate": 3.1751019021739135e-05, | |
| "loss": 0.1155, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.5364061456245826, | |
| "grad_norm": 6.060549259185791, | |
| "learning_rate": 3.132642663043479e-05, | |
| "loss": 0.1349, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.569806279225117, | |
| "grad_norm": 0.09453130513429642, | |
| "learning_rate": 3.0901834239130434e-05, | |
| "loss": 0.0639, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.6032064128256514, | |
| "grad_norm": 0.02592223510146141, | |
| "learning_rate": 3.0477241847826088e-05, | |
| "loss": 0.0929, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.6366065464261856, | |
| "grad_norm": 0.027360519394278526, | |
| "learning_rate": 3.005264945652174e-05, | |
| "loss": 0.071, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.6700066800267201, | |
| "grad_norm": 0.14285916090011597, | |
| "learning_rate": 2.962805706521739e-05, | |
| "loss": 0.0768, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.7034068136272547, | |
| "grad_norm": 0.04874153807759285, | |
| "learning_rate": 2.9203464673913044e-05, | |
| "loss": 0.076, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.736806947227789, | |
| "grad_norm": 0.09328486025333405, | |
| "learning_rate": 2.8778872282608697e-05, | |
| "loss": 0.0505, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.7702070808283232, | |
| "grad_norm": 0.34670591354370117, | |
| "learning_rate": 2.8354279891304347e-05, | |
| "loss": 0.0763, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.8036072144288577, | |
| "grad_norm": 0.057789236307144165, | |
| "learning_rate": 2.79296875e-05, | |
| "loss": 0.0832, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.8370073480293923, | |
| "grad_norm": 0.059989944100379944, | |
| "learning_rate": 2.7505095108695657e-05, | |
| "loss": 0.062, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.8704074816299265, | |
| "grad_norm": 0.02221057377755642, | |
| "learning_rate": 2.7080502717391304e-05, | |
| "loss": 0.0649, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.9038076152304608, | |
| "grad_norm": 0.154799684882164, | |
| "learning_rate": 2.6655910326086957e-05, | |
| "loss": 0.144, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.9372077488309953, | |
| "grad_norm": 0.0422816276550293, | |
| "learning_rate": 2.6231317934782613e-05, | |
| "loss": 0.0531, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.9706078824315298, | |
| "grad_norm": 0.019839206710457802, | |
| "learning_rate": 2.580672554347826e-05, | |
| "loss": 0.0545, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9801230477993375, | |
| "eval_f1": 0.8489208633093526, | |
| "eval_loss": 0.07019170373678207, | |
| "eval_runtime": 8.2123, | |
| "eval_samples_per_second": 257.298, | |
| "eval_steps_per_second": 32.269, | |
| "step": 2994 | |
| }, | |
| { | |
| "epoch": 2.004008016032064, | |
| "grad_norm": 3.3859903812408447, | |
| "learning_rate": 2.5382133152173913e-05, | |
| "loss": 0.0891, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.0374081496325984, | |
| "grad_norm": 0.055893734097480774, | |
| "learning_rate": 2.4957540760869567e-05, | |
| "loss": 0.0498, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.070808283233133, | |
| "grad_norm": 0.030930357053875923, | |
| "learning_rate": 2.4532948369565216e-05, | |
| "loss": 0.0272, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.1042084168336674, | |
| "grad_norm": 13.285860061645508, | |
| "learning_rate": 2.4108355978260873e-05, | |
| "loss": 0.0327, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.1376085504342015, | |
| "grad_norm": 0.06732609122991562, | |
| "learning_rate": 2.3683763586956523e-05, | |
| "loss": 0.0366, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.171008684034736, | |
| "grad_norm": 0.042977988719940186, | |
| "learning_rate": 2.3259171195652173e-05, | |
| "loss": 0.0979, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.2044088176352705, | |
| "grad_norm": 0.2444009780883789, | |
| "learning_rate": 2.283457880434783e-05, | |
| "loss": 0.0185, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.237808951235805, | |
| "grad_norm": 0.013526637107133865, | |
| "learning_rate": 2.240998641304348e-05, | |
| "loss": 0.049, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.2712090848363395, | |
| "grad_norm": 0.034718696027994156, | |
| "learning_rate": 2.198539402173913e-05, | |
| "loss": 0.0512, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.3046092184368736, | |
| "grad_norm": 4.7193403244018555, | |
| "learning_rate": 2.1560801630434786e-05, | |
| "loss": 0.0479, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.338009352037408, | |
| "grad_norm": 0.019014885649085045, | |
| "learning_rate": 2.1136209239130436e-05, | |
| "loss": 0.0924, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.3714094856379426, | |
| "grad_norm": 0.03284880891442299, | |
| "learning_rate": 2.071161684782609e-05, | |
| "loss": 0.045, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.404809619238477, | |
| "grad_norm": 0.014006136916577816, | |
| "learning_rate": 2.028702445652174e-05, | |
| "loss": 0.0518, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.438209752839011, | |
| "grad_norm": 0.05049414187669754, | |
| "learning_rate": 1.9862432065217392e-05, | |
| "loss": 0.0482, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.4716098864395457, | |
| "grad_norm": 0.045781753957271576, | |
| "learning_rate": 1.9437839673913045e-05, | |
| "loss": 0.0143, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.50501002004008, | |
| "grad_norm": 0.009661749005317688, | |
| "learning_rate": 1.9013247282608695e-05, | |
| "loss": 0.0551, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.5384101536406147, | |
| "grad_norm": 0.009859848767518997, | |
| "learning_rate": 1.858865489130435e-05, | |
| "loss": 0.0495, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.5718102872411492, | |
| "grad_norm": 10.117756843566895, | |
| "learning_rate": 1.8164062500000002e-05, | |
| "loss": 0.0412, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.6052104208416833, | |
| "grad_norm": 0.015856023877859116, | |
| "learning_rate": 1.7739470108695652e-05, | |
| "loss": 0.0505, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.638610554442218, | |
| "grad_norm": 0.08077731728553772, | |
| "learning_rate": 1.7314877717391305e-05, | |
| "loss": 0.0458, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.6720106880427523, | |
| "grad_norm": 0.1191636249423027, | |
| "learning_rate": 1.689028532608696e-05, | |
| "loss": 0.0473, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.7054108216432864, | |
| "grad_norm": 0.012686469592154026, | |
| "learning_rate": 1.6465692934782608e-05, | |
| "loss": 0.0546, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.738810955243821, | |
| "grad_norm": 0.011968459002673626, | |
| "learning_rate": 1.604110054347826e-05, | |
| "loss": 0.0259, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.7722110888443554, | |
| "grad_norm": 0.013011530973017216, | |
| "learning_rate": 1.5616508152173915e-05, | |
| "loss": 0.0165, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.80561122244489, | |
| "grad_norm": 0.03517712280154228, | |
| "learning_rate": 1.5191915760869566e-05, | |
| "loss": 0.0291, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.8390113560454244, | |
| "grad_norm": 0.03560580685734749, | |
| "learning_rate": 1.4767323369565216e-05, | |
| "loss": 0.0686, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.8724114896459585, | |
| "grad_norm": 0.021690141409635544, | |
| "learning_rate": 1.4342730978260871e-05, | |
| "loss": 0.0482, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.905811623246493, | |
| "grad_norm": 0.015762945637106895, | |
| "learning_rate": 1.3918138586956523e-05, | |
| "loss": 0.0242, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.9392117568470275, | |
| "grad_norm": 0.12395244836807251, | |
| "learning_rate": 1.3493546195652174e-05, | |
| "loss": 0.0451, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.9726118904475616, | |
| "grad_norm": 0.012854584492743015, | |
| "learning_rate": 1.3068953804347828e-05, | |
| "loss": 0.0521, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9862754377662092, | |
| "eval_f1": 0.888030888030888, | |
| "eval_loss": 0.056181490421295166, | |
| "eval_runtime": 8.2762, | |
| "eval_samples_per_second": 255.31, | |
| "eval_steps_per_second": 32.02, | |
| "step": 4491 | |
| }, | |
| { | |
| "epoch": 3.006012024048096, | |
| "grad_norm": 0.020279008895158768, | |
| "learning_rate": 1.264436141304348e-05, | |
| "loss": 0.09, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.0394121576486306, | |
| "grad_norm": 0.00978385005146265, | |
| "learning_rate": 1.2219769021739132e-05, | |
| "loss": 0.0353, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 3.072812291249165, | |
| "grad_norm": 0.16378536820411682, | |
| "learning_rate": 1.1795176630434782e-05, | |
| "loss": 0.0214, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 3.1062124248496996, | |
| "grad_norm": 0.00774746248498559, | |
| "learning_rate": 1.1370584239130436e-05, | |
| "loss": 0.0061, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 3.1396125584502337, | |
| "grad_norm": 0.03240759298205376, | |
| "learning_rate": 1.0945991847826087e-05, | |
| "loss": 0.0437, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 3.173012692050768, | |
| "grad_norm": 0.06572789698839188, | |
| "learning_rate": 1.052139945652174e-05, | |
| "loss": 0.0175, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 3.2064128256513027, | |
| "grad_norm": 0.00863126665353775, | |
| "learning_rate": 1.0096807065217392e-05, | |
| "loss": 0.0357, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.239812959251837, | |
| "grad_norm": 0.008642938919365406, | |
| "learning_rate": 9.672214673913044e-06, | |
| "loss": 0.0156, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 3.2732130928523713, | |
| "grad_norm": 0.01715974509716034, | |
| "learning_rate": 9.247622282608697e-06, | |
| "loss": 0.03, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 3.306613226452906, | |
| "grad_norm": 0.007449580822139978, | |
| "learning_rate": 8.823029891304348e-06, | |
| "loss": 0.0136, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 3.3400133600534403, | |
| "grad_norm": 0.011774774640798569, | |
| "learning_rate": 8.3984375e-06, | |
| "loss": 0.0213, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.373413493653975, | |
| "grad_norm": 0.006117586512118578, | |
| "learning_rate": 7.973845108695653e-06, | |
| "loss": 0.0438, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 3.406813627254509, | |
| "grad_norm": 0.08415533602237701, | |
| "learning_rate": 7.549252717391304e-06, | |
| "loss": 0.0375, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 3.4402137608550434, | |
| "grad_norm": 0.013756927102804184, | |
| "learning_rate": 7.124660326086957e-06, | |
| "loss": 0.0477, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 3.473613894455578, | |
| "grad_norm": 0.05258077010512352, | |
| "learning_rate": 6.700067934782608e-06, | |
| "loss": 0.0408, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 3.507014028056112, | |
| "grad_norm": 0.03193260356783867, | |
| "learning_rate": 6.275475543478261e-06, | |
| "loss": 0.0739, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 3.5404141616566465, | |
| "grad_norm": 0.0606362447142601, | |
| "learning_rate": 5.850883152173913e-06, | |
| "loss": 0.0445, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 3.573814295257181, | |
| "grad_norm": 0.009357443079352379, | |
| "learning_rate": 5.426290760869566e-06, | |
| "loss": 0.0363, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 3.6072144288577155, | |
| "grad_norm": 14.05784797668457, | |
| "learning_rate": 5.001698369565218e-06, | |
| "loss": 0.0227, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 3.64061456245825, | |
| "grad_norm": 0.07442031055688858, | |
| "learning_rate": 4.57710597826087e-06, | |
| "loss": 0.005, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 3.6740146960587845, | |
| "grad_norm": 0.007993385195732117, | |
| "learning_rate": 4.152513586956522e-06, | |
| "loss": 0.0121, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.7074148296593186, | |
| "grad_norm": 0.020146405324339867, | |
| "learning_rate": 3.7279211956521737e-06, | |
| "loss": 0.0092, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 3.740814963259853, | |
| "grad_norm": 0.0066772401332855225, | |
| "learning_rate": 3.3033288043478265e-06, | |
| "loss": 0.0322, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 3.7742150968603876, | |
| "grad_norm": 0.007795071694999933, | |
| "learning_rate": 2.8787364130434785e-06, | |
| "loss": 0.0018, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 3.8076152304609217, | |
| "grad_norm": 0.008354957215487957, | |
| "learning_rate": 2.4541440217391305e-06, | |
| "loss": 0.0221, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 3.841015364061456, | |
| "grad_norm": 1.7550073862075806, | |
| "learning_rate": 2.029551630434783e-06, | |
| "loss": 0.0331, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 3.8744154976619907, | |
| "grad_norm": 0.03482593223452568, | |
| "learning_rate": 1.604959239130435e-06, | |
| "loss": 0.0316, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 3.907815631262525, | |
| "grad_norm": 0.0070787896402180195, | |
| "learning_rate": 1.180366847826087e-06, | |
| "loss": 0.0354, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 3.9412157648630597, | |
| "grad_norm": 0.00712177250534296, | |
| "learning_rate": 7.557744565217392e-07, | |
| "loss": 0.0014, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 3.9746158984635938, | |
| "grad_norm": 0.008186141029000282, | |
| "learning_rate": 3.3118206521739133e-07, | |
| "loss": 0.006, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9891150023663038, | |
| "eval_f1": 0.9053497942386831, | |
| "eval_loss": 0.054513588547706604, | |
| "eval_runtime": 8.2838, | |
| "eval_samples_per_second": 255.077, | |
| "eval_steps_per_second": 31.99, | |
| "step": 5988 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 5988, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1408847760162816.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |