| { | |
| "best_metric": 0.32019519805908203, | |
| "best_model_checkpoint": "./deberta_multilabel_safetyattribution_run1/checkpoint-2172", | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 5792, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06906077348066299, | |
| "grad_norm": 17.054950714111328, | |
| "learning_rate": 5.753739930955121e-07, | |
| "loss": 2.7202, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.13812154696132597, | |
| "grad_norm": 15.804380416870117, | |
| "learning_rate": 1.1507479861910242e-06, | |
| "loss": 2.4784, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.20718232044198895, | |
| "grad_norm": 4.005581378936768, | |
| "learning_rate": 1.7261219792865363e-06, | |
| "loss": 1.9331, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.27624309392265195, | |
| "grad_norm": 3.5140247344970703, | |
| "learning_rate": 2.3014959723820484e-06, | |
| "loss": 1.3647, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3453038674033149, | |
| "grad_norm": 10.396773338317871, | |
| "learning_rate": 2.8768699654775607e-06, | |
| "loss": 1.1814, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4143646408839779, | |
| "grad_norm": 4.15683126449585, | |
| "learning_rate": 3.4522439585730726e-06, | |
| "loss": 1.1349, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.48342541436464087, | |
| "grad_norm": 13.827107429504395, | |
| "learning_rate": 4.027617951668585e-06, | |
| "loss": 1.0992, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5524861878453039, | |
| "grad_norm": 5.466891765594482, | |
| "learning_rate": 4.602991944764097e-06, | |
| "loss": 1.0756, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6215469613259669, | |
| "grad_norm": 5.3988118171691895, | |
| "learning_rate": 5.1783659378596095e-06, | |
| "loss": 1.0385, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6906077348066298, | |
| "grad_norm": 6.582638740539551, | |
| "learning_rate": 5.753739930955121e-06, | |
| "loss": 0.9732, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7596685082872928, | |
| "grad_norm": 4.769979953765869, | |
| "learning_rate": 6.329113924050634e-06, | |
| "loss": 0.9272, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8287292817679558, | |
| "grad_norm": 7.325048923492432, | |
| "learning_rate": 6.904487917146145e-06, | |
| "loss": 0.9391, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8977900552486188, | |
| "grad_norm": 5.211851596832275, | |
| "learning_rate": 7.479861910241658e-06, | |
| "loss": 0.9031, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9668508287292817, | |
| "grad_norm": 8.523693084716797, | |
| "learning_rate": 8.05523590333717e-06, | |
| "loss": 0.8428, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_class_accuracy": { | |
| "label_0": 0.7745931503521982, | |
| "label_1": 0.9664804469273743, | |
| "label_2": 0.8299732815156667 | |
| }, | |
| "eval_class_f1": { | |
| "label_0": 0.5691736304549675, | |
| "label_1": 0.7523923444976076, | |
| "label_2": 0.6285815351963212 | |
| }, | |
| "eval_class_false_positive_rate": { | |
| "label_0": 0.06384009691095531, | |
| "label_1": 0.018147086914993647, | |
| "label_2": 0.0660200595172417 | |
| }, | |
| "eval_class_precision": { | |
| "label_0": 0.7772612003381234, | |
| "label_1": 0.7505966587112172, | |
| "label_2": 0.747895622895623 | |
| }, | |
| "eval_class_recall": { | |
| "label_0": 0.448974609375, | |
| "label_1": 0.7541966426858513, | |
| "label_2": 0.5420988407565589 | |
| }, | |
| "eval_loss": 0.4103058874607086, | |
| "eval_macro_f1": 0.6500491700496321, | |
| "eval_macro_precision": 0.7585844939816545, | |
| "eval_macro_recall": 0.5817566976058034, | |
| "eval_micro_f1": 0.6157528285465622, | |
| "eval_micro_precision": 0.760752688172043, | |
| "eval_micro_recall": 0.5171783625730995, | |
| "eval_runtime": 132.9806, | |
| "eval_samples_per_second": 92.878, | |
| "eval_steps_per_second": 1.451, | |
| "eval_subset_accuracy": 0.6513642620030766, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.0359116022099448, | |
| "grad_norm": 5.507997989654541, | |
| "learning_rate": 8.630609896432683e-06, | |
| "loss": 0.8297, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.1049723756906078, | |
| "grad_norm": 6.73417329788208, | |
| "learning_rate": 9.205983889528194e-06, | |
| "loss": 0.7722, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.1740331491712708, | |
| "grad_norm": 5.604831218719482, | |
| "learning_rate": 9.781357882623706e-06, | |
| "loss": 0.7892, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.2430939226519337, | |
| "grad_norm": 6.269744873046875, | |
| "learning_rate": 9.93703026609791e-06, | |
| "loss": 0.7489, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.3121546961325967, | |
| "grad_norm": 5.223318576812744, | |
| "learning_rate": 9.83546617915905e-06, | |
| "loss": 0.756, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.3812154696132597, | |
| "grad_norm": 6.4933180809021, | |
| "learning_rate": 9.733902092220193e-06, | |
| "loss": 0.706, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4502762430939227, | |
| "grad_norm": 4.81841516494751, | |
| "learning_rate": 9.632338005281333e-06, | |
| "loss": 0.7007, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.5193370165745856, | |
| "grad_norm": 4.895750999450684, | |
| "learning_rate": 9.530773918342476e-06, | |
| "loss": 0.7067, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.5883977900552486, | |
| "grad_norm": 15.617860794067383, | |
| "learning_rate": 9.429209831403616e-06, | |
| "loss": 0.6879, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.6574585635359116, | |
| "grad_norm": 5.868403911590576, | |
| "learning_rate": 9.327645744464759e-06, | |
| "loss": 0.6599, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.7265193370165746, | |
| "grad_norm": 5.1376423835754395, | |
| "learning_rate": 9.2260816575259e-06, | |
| "loss": 0.6524, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.7955801104972375, | |
| "grad_norm": 8.1726713180542, | |
| "learning_rate": 9.124517570587042e-06, | |
| "loss": 0.6301, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.8646408839779005, | |
| "grad_norm": 5.737952709197998, | |
| "learning_rate": 9.022953483648182e-06, | |
| "loss": 0.6554, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.9337016574585635, | |
| "grad_norm": 8.225056648254395, | |
| "learning_rate": 8.921389396709325e-06, | |
| "loss": 0.6457, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_class_accuracy": { | |
| "label_0": 0.8472998137802608, | |
| "label_1": 0.9697190510889806, | |
| "label_2": 0.8784713788357218 | |
| }, | |
| "eval_class_f1": { | |
| "label_0": 0.7341415280518748, | |
| "label_1": 0.7713936430317848, | |
| "label_2": 0.7481966113068277 | |
| }, | |
| "eval_class_false_positive_rate": { | |
| "label_0": 0.04772864930344667, | |
| "label_1": 0.014847616566812985, | |
| "label_2": 0.049928358866962425 | |
| }, | |
| "eval_class_precision": { | |
| "label_0": 0.8685790527018012, | |
| "label_1": 0.786783042394015, | |
| "label_2": 0.8311591502049944 | |
| }, | |
| "eval_class_recall": { | |
| "label_0": 0.6357421875, | |
| "label_1": 0.7565947242206235, | |
| "label_2": 0.6802928615009152 | |
| }, | |
| "eval_loss": 0.3337920904159546, | |
| "eval_macro_f1": 0.7512439274634959, | |
| "eval_macro_precision": 0.8288404151002702, | |
| "eval_macro_recall": 0.6908765910738461, | |
| "eval_micro_f1": 0.7439929208358859, | |
| "eval_micro_precision": 0.8429739318216874, | |
| "eval_micro_recall": 0.6658138401559455, | |
| "eval_runtime": 132.9461, | |
| "eval_samples_per_second": 92.902, | |
| "eval_steps_per_second": 1.452, | |
| "eval_subset_accuracy": 0.7538660837179175, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 2.0027624309392267, | |
| "grad_norm": 4.911665439605713, | |
| "learning_rate": 8.819825309770465e-06, | |
| "loss": 0.6339, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.0718232044198897, | |
| "grad_norm": 5.494454383850098, | |
| "learning_rate": 8.718261222831608e-06, | |
| "loss": 0.6068, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.1408839779005526, | |
| "grad_norm": 7.909505844116211, | |
| "learning_rate": 8.616697135892748e-06, | |
| "loss": 0.6289, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.2099447513812156, | |
| "grad_norm": 6.768340587615967, | |
| "learning_rate": 8.515133048953891e-06, | |
| "loss": 0.6021, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.2790055248618786, | |
| "grad_norm": 5.231354236602783, | |
| "learning_rate": 8.413568962015032e-06, | |
| "loss": 0.585, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.3480662983425415, | |
| "grad_norm": 3.701436758041382, | |
| "learning_rate": 8.312004875076174e-06, | |
| "loss": 0.5728, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.4171270718232045, | |
| "grad_norm": 3.403343915939331, | |
| "learning_rate": 8.210440788137315e-06, | |
| "loss": 0.5746, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.4861878453038675, | |
| "grad_norm": 4.147812843322754, | |
| "learning_rate": 8.108876701198457e-06, | |
| "loss": 0.6028, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.5552486187845305, | |
| "grad_norm": 8.234676361083984, | |
| "learning_rate": 8.007312614259598e-06, | |
| "loss": 0.5451, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.6243093922651934, | |
| "grad_norm": 3.46964955329895, | |
| "learning_rate": 7.90574852732074e-06, | |
| "loss": 0.591, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.6933701657458564, | |
| "grad_norm": 5.345615386962891, | |
| "learning_rate": 7.80418444038188e-06, | |
| "loss": 0.5685, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.7624309392265194, | |
| "grad_norm": 5.062536239624023, | |
| "learning_rate": 7.702620353443023e-06, | |
| "loss": 0.5786, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.8314917127071824, | |
| "grad_norm": 5.448482036590576, | |
| "learning_rate": 7.6010562665041645e-06, | |
| "loss": 0.5773, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.9005524861878453, | |
| "grad_norm": 7.135024547576904, | |
| "learning_rate": 7.499492179565306e-06, | |
| "loss": 0.5849, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.9696132596685083, | |
| "grad_norm": 4.267426490783691, | |
| "learning_rate": 7.3979280926264475e-06, | |
| "loss": 0.5663, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_class_accuracy": { | |
| "label_0": 0.8591207189701239, | |
| "label_1": 0.9690713302566594, | |
| "label_2": 0.8903732491296251 | |
| }, | |
| "eval_class_f1": { | |
| "label_0": 0.7654986522911051, | |
| "label_1": 0.7768691588785047, | |
| "label_2": 0.7841198979591837 | |
| }, | |
| "eval_class_false_positive_rate": { | |
| "label_0": 0.05863113264687357, | |
| "label_1": 0.01849439958322319, | |
| "label_2": 0.05896616334177681 | |
| }, | |
| "eval_class_precision": { | |
| "label_0": 0.8543922984356197, | |
| "label_1": 0.7574031890660592, | |
| "label_2": 0.821309285237141 | |
| }, | |
| "eval_class_recall": { | |
| "label_0": 0.693359375, | |
| "label_1": 0.7973621103117506, | |
| "label_2": 0.7501525320317267 | |
| }, | |
| "eval_loss": 0.32019519805908203, | |
| "eval_macro_f1": 0.775495903042931, | |
| "eval_macro_precision": 0.8110349242462732, | |
| "eval_macro_recall": 0.746958005781159, | |
| "eval_micro_f1": 0.7743443261490522, | |
| "eval_micro_precision": 0.8287937743190662, | |
| "eval_micro_recall": 0.7266081871345029, | |
| "eval_runtime": 132.8667, | |
| "eval_samples_per_second": 92.958, | |
| "eval_steps_per_second": 1.453, | |
| "eval_subset_accuracy": 0.7761314873289612, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 3.0386740331491713, | |
| "grad_norm": 3.6615378856658936, | |
| "learning_rate": 7.29636400568759e-06, | |
| "loss": 0.5629, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.1077348066298343, | |
| "grad_norm": 7.720168590545654, | |
| "learning_rate": 7.194799918748731e-06, | |
| "loss": 0.5422, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 3.1767955801104972, | |
| "grad_norm": 5.942800521850586, | |
| "learning_rate": 7.093235831809873e-06, | |
| "loss": 0.5198, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.24585635359116, | |
| "grad_norm": 4.374157428741455, | |
| "learning_rate": 6.991671744871014e-06, | |
| "loss": 0.528, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 3.314917127071823, | |
| "grad_norm": 4.527683734893799, | |
| "learning_rate": 6.890107657932156e-06, | |
| "loss": 0.5481, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.383977900552486, | |
| "grad_norm": 3.410897731781006, | |
| "learning_rate": 6.7885435709932975e-06, | |
| "loss": 0.531, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 3.453038674033149, | |
| "grad_norm": 3.105001449584961, | |
| "learning_rate": 6.686979484054439e-06, | |
| "loss": 0.5193, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.522099447513812, | |
| "grad_norm": 4.019840717315674, | |
| "learning_rate": 6.585415397115581e-06, | |
| "loss": 0.5435, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 3.591160220994475, | |
| "grad_norm": 2.8091235160827637, | |
| "learning_rate": 6.483851310176722e-06, | |
| "loss": 0.5028, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.660220994475138, | |
| "grad_norm": 6.829853057861328, | |
| "learning_rate": 6.382287223237864e-06, | |
| "loss": 0.5308, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 3.729281767955801, | |
| "grad_norm": 6.366962432861328, | |
| "learning_rate": 6.280723136299005e-06, | |
| "loss": 0.5431, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.798342541436464, | |
| "grad_norm": 7.169396877288818, | |
| "learning_rate": 6.1791590493601475e-06, | |
| "loss": 0.5297, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 3.867403314917127, | |
| "grad_norm": 4.60045051574707, | |
| "learning_rate": 6.077594962421288e-06, | |
| "loss": 0.5226, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.93646408839779, | |
| "grad_norm": 3.7710654735565186, | |
| "learning_rate": 5.9760308754824305e-06, | |
| "loss": 0.528, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_class_accuracy": { | |
| "label_0": 0.8668933689579791, | |
| "label_1": 0.9708525625455429, | |
| "label_2": 0.8956359808922354 | |
| }, | |
| "eval_class_f1": { | |
| "label_0": 0.7924242424242425, | |
| "label_1": 0.7904540162980209, | |
| "label_2": 0.7929982334992773 | |
| }, | |
| "eval_class_false_positive_rate": { | |
| "label_0": 0.08310115081767618, | |
| "label_1": 0.017799774246764103, | |
| "label_2": 0.05290422131598667 | |
| }, | |
| "eval_class_precision": { | |
| "label_0": 0.8206066945606695, | |
| "label_1": 0.7680995475113123, | |
| "label_2": 0.8372329603255341 | |
| }, | |
| "eval_class_recall": { | |
| "label_0": 0.76611328125, | |
| "label_1": 0.8141486810551559, | |
| "label_2": 0.7532031726662599 | |
| }, | |
| "eval_loss": 0.3253972828388214, | |
| "eval_macro_f1": 0.7919588307405135, | |
| "eval_macro_precision": 0.8086464007991719, | |
| "eval_macro_recall": 0.7778217116571385, | |
| "eval_micro_f1": 0.7924361802710369, | |
| "eval_micro_precision": 0.8209481520177615, | |
| "eval_micro_recall": 0.7658382066276803, | |
| "eval_runtime": 132.8554, | |
| "eval_samples_per_second": 92.966, | |
| "eval_steps_per_second": 1.453, | |
| "eval_subset_accuracy": 0.7909481013683103, | |
| "step": 2896 | |
| }, | |
| { | |
| "epoch": 4.005524861878453, | |
| "grad_norm": 6.861307621002197, | |
| "learning_rate": 5.874466788543571e-06, | |
| "loss": 0.5269, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 4.074585635359116, | |
| "grad_norm": 5.186521530151367, | |
| "learning_rate": 5.7729027016047135e-06, | |
| "loss": 0.5032, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 4.143646408839779, | |
| "grad_norm": 2.319528579711914, | |
| "learning_rate": 5.671338614665854e-06, | |
| "loss": 0.4947, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.212707182320442, | |
| "grad_norm": 7.0490403175354, | |
| "learning_rate": 5.5697745277269966e-06, | |
| "loss": 0.475, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 4.281767955801105, | |
| "grad_norm": 6.733311653137207, | |
| "learning_rate": 5.468210440788137e-06, | |
| "loss": 0.4758, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 4.350828729281768, | |
| "grad_norm": 5.118820667266846, | |
| "learning_rate": 5.36664635384928e-06, | |
| "loss": 0.5216, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 4.419889502762431, | |
| "grad_norm": 4.245698928833008, | |
| "learning_rate": 5.26508226691042e-06, | |
| "loss": 0.4965, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 4.488950276243094, | |
| "grad_norm": 6.310864448547363, | |
| "learning_rate": 5.163518179971563e-06, | |
| "loss": 0.4979, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 4.558011049723757, | |
| "grad_norm": 10.312039375305176, | |
| "learning_rate": 5.061954093032704e-06, | |
| "loss": 0.4812, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 4.62707182320442, | |
| "grad_norm": 5.925340175628662, | |
| "learning_rate": 4.960390006093846e-06, | |
| "loss": 0.4946, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 4.696132596685083, | |
| "grad_norm": 2.886112689971924, | |
| "learning_rate": 4.858825919154987e-06, | |
| "loss": 0.488, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 4.765193370165746, | |
| "grad_norm": 5.802464008331299, | |
| "learning_rate": 4.757261832216129e-06, | |
| "loss": 0.4914, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 4.834254143646409, | |
| "grad_norm": 3.5704684257507324, | |
| "learning_rate": 4.65569774527727e-06, | |
| "loss": 0.4581, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.903314917127072, | |
| "grad_norm": 4.317540168762207, | |
| "learning_rate": 4.554133658338412e-06, | |
| "loss": 0.4881, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 4.972375690607735, | |
| "grad_norm": 2.523470401763916, | |
| "learning_rate": 4.452569571399554e-06, | |
| "loss": 0.4949, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_class_accuracy": { | |
| "label_0": 0.8691603918711035, | |
| "label_1": 0.9713383531697838, | |
| "label_2": 0.8945024694356732 | |
| }, | |
| "eval_class_f1": { | |
| "label_0": 0.7941925624044829, | |
| "label_1": 0.787515006002401, | |
| "label_2": 0.7994458981068185 | |
| }, | |
| "eval_class_false_positive_rate": { | |
| "label_0": 0.07728649303451517, | |
| "label_1": 0.015281757402099914, | |
| "label_2": 0.0685550534552994 | |
| }, | |
| "eval_class_precision": { | |
| "label_0": 0.8301384451544196, | |
| "label_1": 0.7884615384615384, | |
| "label_2": 0.8067722895309102 | |
| }, | |
| "eval_class_recall": { | |
| "label_0": 0.76123046875, | |
| "label_1": 0.7865707434052758, | |
| "label_2": 0.7922513727882855 | |
| }, | |
| "eval_loss": 0.3214770257472992, | |
| "eval_macro_f1": 0.7937178221712342, | |
| "eval_macro_precision": 0.8084574243822894, | |
| "eval_macro_recall": 0.7800175283145204, | |
| "eval_micro_f1": 0.7956290977208866, | |
| "eval_micro_precision": 0.816062508005636, | |
| "eval_micro_recall": 0.7761939571150097, | |
| "eval_runtime": 133.0557, | |
| "eval_samples_per_second": 92.826, | |
| "eval_steps_per_second": 1.451, | |
| "eval_subset_accuracy": 0.791838717512752, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 5.041436464088398, | |
| "grad_norm": 5.695593357086182, | |
| "learning_rate": 4.351005484460696e-06, | |
| "loss": 0.4785, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 5.110497237569061, | |
| "grad_norm": 6.8877692222595215, | |
| "learning_rate": 4.249441397521837e-06, | |
| "loss": 0.4648, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 5.179558011049724, | |
| "grad_norm": 4.254743576049805, | |
| "learning_rate": 4.147877310582979e-06, | |
| "loss": 0.4543, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 5.248618784530387, | |
| "grad_norm": 4.990494251251221, | |
| "learning_rate": 4.04631322364412e-06, | |
| "loss": 0.4654, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 5.31767955801105, | |
| "grad_norm": 20.026336669921875, | |
| "learning_rate": 3.944749136705262e-06, | |
| "loss": 0.4667, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 5.386740331491713, | |
| "grad_norm": 5.121794700622559, | |
| "learning_rate": 3.843185049766403e-06, | |
| "loss": 0.4763, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 5.455801104972376, | |
| "grad_norm": 7.3256516456604, | |
| "learning_rate": 3.7416209628275447e-06, | |
| "loss": 0.4418, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 5.524861878453039, | |
| "grad_norm": 7.178898811340332, | |
| "learning_rate": 3.6400568758886863e-06, | |
| "loss": 0.467, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.593922651933702, | |
| "grad_norm": 7.906302452087402, | |
| "learning_rate": 3.5384927889498278e-06, | |
| "loss": 0.426, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 5.662983425414365, | |
| "grad_norm": 4.6435546875, | |
| "learning_rate": 3.4369287020109693e-06, | |
| "loss": 0.4585, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 5.732044198895028, | |
| "grad_norm": 6.0284104347229, | |
| "learning_rate": 3.335364615072111e-06, | |
| "loss": 0.441, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 5.801104972375691, | |
| "grad_norm": 4.433581829071045, | |
| "learning_rate": 3.2338005281332523e-06, | |
| "loss": 0.4645, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 5.870165745856354, | |
| "grad_norm": 4.757821083068848, | |
| "learning_rate": 3.132236441194394e-06, | |
| "loss": 0.4652, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 5.939226519337017, | |
| "grad_norm": 6.152480602264404, | |
| "learning_rate": 3.0306723542555354e-06, | |
| "loss": 0.4484, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_class_accuracy": { | |
| "label_0": 0.8723989960327099, | |
| "label_1": 0.9698000161930208, | |
| "label_2": 0.8949882600599142 | |
| }, | |
| "eval_class_f1": { | |
| "label_0": 0.7942021415513189, | |
| "label_1": 0.7852619458837076, | |
| "label_2": 0.7991949218145223 | |
| }, | |
| "eval_class_false_positive_rate": { | |
| "label_0": 0.0631132646880602, | |
| "label_1": 0.019189024919682277, | |
| "label_2": 0.06613027664498335 | |
| }, | |
| "eval_class_precision": { | |
| "label_0": 0.8537338573834924, | |
| "label_1": 0.7552602436323367, | |
| "label_2": 0.8113800691606413 | |
| }, | |
| "eval_class_recall": { | |
| "label_0": 0.742431640625, | |
| "label_1": 0.8177458033573142, | |
| "label_2": 0.7873703477730324 | |
| }, | |
| "eval_loss": 0.3344503939151764, | |
| "eval_macro_f1": 0.7928863364165163, | |
| "eval_macro_precision": 0.8067913900588235, | |
| "eval_macro_recall": 0.7825159305851156, | |
| "eval_micro_f1": 0.7952567175476221, | |
| "eval_micro_precision": 0.8244833900078472, | |
| "eval_micro_recall": 0.7680311890838206, | |
| "eval_runtime": 132.822, | |
| "eval_samples_per_second": 92.989, | |
| "eval_steps_per_second": 1.453, | |
| "eval_subset_accuracy": 0.792324508136993, | |
| "step": 4344 | |
| }, | |
| { | |
| "epoch": 6.00828729281768, | |
| "grad_norm": 7.724102973937988, | |
| "learning_rate": 2.929108267316677e-06, | |
| "loss": 0.4457, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 6.077348066298343, | |
| "grad_norm": 7.960653305053711, | |
| "learning_rate": 2.827544180377819e-06, | |
| "loss": 0.4507, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 6.1464088397790055, | |
| "grad_norm": 7.553245544433594, | |
| "learning_rate": 2.7259800934389603e-06, | |
| "loss": 0.4214, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 6.2154696132596685, | |
| "grad_norm": 13.122285842895508, | |
| "learning_rate": 2.624416006500102e-06, | |
| "loss": 0.4561, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 6.2845303867403315, | |
| "grad_norm": 5.380446910858154, | |
| "learning_rate": 2.5228519195612434e-06, | |
| "loss": 0.4195, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 6.3535911602209945, | |
| "grad_norm": 8.281150817871094, | |
| "learning_rate": 2.421287832622385e-06, | |
| "loss": 0.4411, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 6.422651933701657, | |
| "grad_norm": 5.860757350921631, | |
| "learning_rate": 2.3197237456835264e-06, | |
| "loss": 0.461, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 6.49171270718232, | |
| "grad_norm": 5.274202346801758, | |
| "learning_rate": 2.218159658744668e-06, | |
| "loss": 0.4469, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 6.560773480662983, | |
| "grad_norm": 3.1023948192596436, | |
| "learning_rate": 2.1165955718058095e-06, | |
| "loss": 0.4353, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 6.629834254143646, | |
| "grad_norm": 3.906625270843506, | |
| "learning_rate": 2.015031484866951e-06, | |
| "loss": 0.413, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 6.698895027624309, | |
| "grad_norm": 4.5051422119140625, | |
| "learning_rate": 1.9134673979280925e-06, | |
| "loss": 0.4255, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 6.767955801104972, | |
| "grad_norm": 6.132110118865967, | |
| "learning_rate": 1.8119033109892342e-06, | |
| "loss": 0.4412, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 6.837016574585635, | |
| "grad_norm": 3.9629147052764893, | |
| "learning_rate": 1.710339224050376e-06, | |
| "loss": 0.4081, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 6.906077348066298, | |
| "grad_norm": 8.018949508666992, | |
| "learning_rate": 1.6087751371115177e-06, | |
| "loss": 0.4497, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 6.975138121546961, | |
| "grad_norm": 4.3800177574157715, | |
| "learning_rate": 1.5072110501726592e-06, | |
| "loss": 0.4381, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_class_accuracy": { | |
| "label_0": 0.8718322403044287, | |
| "label_1": 0.9686665047364585, | |
| "label_2": 0.896445631932637 | |
| }, | |
| "eval_class_f1": { | |
| "label_0": 0.7970772977823356, | |
| "label_1": 0.7792355961209355, | |
| "label_2": 0.8016746782446891 | |
| }, | |
| "eval_class_false_positive_rate": { | |
| "label_0": 0.07219866747424927, | |
| "label_1": 0.020491447425543066, | |
| "label_2": 0.0645872368566004 | |
| }, | |
| "eval_class_precision": { | |
| "label_0": 0.839136302294197, | |
| "label_1": 0.7431991294885746, | |
| "label_2": 0.815200252286345 | |
| }, | |
| "eval_class_recall": { | |
| "label_0": 0.759033203125, | |
| "label_1": 0.8189448441247003, | |
| "label_2": 0.7885906040268457 | |
| }, | |
| "eval_loss": 0.34001022577285767, | |
| "eval_macro_f1": 0.79266252404932, | |
| "eval_macro_precision": 0.7991785613563721, | |
| "eval_macro_recall": 0.788856217092182, | |
| "eval_micro_f1": 0.7969755670811722, | |
| "eval_micro_precision": 0.8180885182809493, | |
| "eval_micro_recall": 0.7769249512670565, | |
| "eval_runtime": 133.0263, | |
| "eval_samples_per_second": 92.846, | |
| "eval_steps_per_second": 1.451, | |
| "eval_subset_accuracy": 0.7941867055299167, | |
| "step": 5068 | |
| }, | |
| { | |
| "epoch": 7.044198895027624, | |
| "grad_norm": 5.756816387176514, | |
| "learning_rate": 1.4056469632338007e-06, | |
| "loss": 0.4057, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 7.113259668508287, | |
| "grad_norm": 10.789379119873047, | |
| "learning_rate": 1.3040828762949422e-06, | |
| "loss": 0.4082, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 7.18232044198895, | |
| "grad_norm": 6.213741779327393, | |
| "learning_rate": 1.2025187893560838e-06, | |
| "loss": 0.4371, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 7.251381215469613, | |
| "grad_norm": 10.314269065856934, | |
| "learning_rate": 1.1009547024172255e-06, | |
| "loss": 0.414, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 7.320441988950276, | |
| "grad_norm": 4.766229152679443, | |
| "learning_rate": 9.99390615478367e-07, | |
| "loss": 0.4262, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 7.389502762430939, | |
| "grad_norm": 10.421786308288574, | |
| "learning_rate": 8.978265285395085e-07, | |
| "loss": 0.4089, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 7.458563535911602, | |
| "grad_norm": 7.079195976257324, | |
| "learning_rate": 7.9626244160065e-07, | |
| "loss": 0.4176, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 7.527624309392265, | |
| "grad_norm": 11.755300521850586, | |
| "learning_rate": 6.946983546617917e-07, | |
| "loss": 0.4175, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 7.596685082872928, | |
| "grad_norm": 5.82271146774292, | |
| "learning_rate": 5.931342677229332e-07, | |
| "loss": 0.4028, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 7.665745856353591, | |
| "grad_norm": 5.967957019805908, | |
| "learning_rate": 4.915701807840748e-07, | |
| "loss": 0.4248, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 7.734806629834254, | |
| "grad_norm": 8.050609588623047, | |
| "learning_rate": 3.900060938452164e-07, | |
| "loss": 0.4203, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 7.803867403314917, | |
| "grad_norm": 5.390321254730225, | |
| "learning_rate": 2.8844200690635795e-07, | |
| "loss": 0.4054, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 7.87292817679558, | |
| "grad_norm": 4.956524848937988, | |
| "learning_rate": 1.868779199674995e-07, | |
| "loss": 0.4327, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 7.941988950276243, | |
| "grad_norm": 3.822096347808838, | |
| "learning_rate": 8.531383302864108e-08, | |
| "loss": 0.4183, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_class_accuracy": { | |
| "label_0": 0.8722370658246296, | |
| "label_1": 0.9690713302566594, | |
| "label_2": 0.8970123876609182 | |
| }, | |
| "eval_class_f1": { | |
| "label_0": 0.797640420620672, | |
| "label_1": 0.7791907514450868, | |
| "label_2": 0.8029129222187791 | |
| }, | |
| "eval_class_false_positive_rate": { | |
| "label_0": 0.07171411265898586, | |
| "label_1": 0.019275853086739663, | |
| "label_2": 0.06447701972885876 | |
| }, | |
| "eval_class_precision": { | |
| "label_0": 0.8400864397622907, | |
| "label_1": 0.7522321428571429, | |
| "label_2": 0.8158060453400504 | |
| }, | |
| "eval_class_recall": { | |
| "label_0": 0.75927734375, | |
| "label_1": 0.8081534772182254, | |
| "label_2": 0.7904209884075656 | |
| }, | |
| "eval_loss": 0.34776991605758667, | |
| "eval_macro_f1": 0.7932480314281793, | |
| "eval_macro_precision": 0.8027082093198281, | |
| "eval_macro_recall": 0.7859506031252637, | |
| "eval_micro_f1": 0.7977724940558127, | |
| "eval_micro_precision": 0.8200411628505274, | |
| "eval_micro_recall": 0.7766812865497076, | |
| "eval_runtime": 132.8386, | |
| "eval_samples_per_second": 92.977, | |
| "eval_steps_per_second": 1.453, | |
| "eval_subset_accuracy": 0.7951582867783985, | |
| "step": 5792 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 5792, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.9496632912293888e+17, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |