| { |
| "best_global_step": 13206, |
| "best_metric": 0.9868791709426586, |
| "best_model_checkpoint": "/home/myid/bp67339/plant_disease/models/deberta_v3_base_textclf_phase3/checkpoint-13206", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 13206, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011358473421172195, |
| "grad_norm": 2.552668809890747, |
| "learning_rate": 1.9925791306981678e-05, |
| "loss": 0.1196, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02271694684234439, |
| "grad_norm": 0.47541937232017517, |
| "learning_rate": 1.985006815084053e-05, |
| "loss": 0.0812, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.034075420263516586, |
| "grad_norm": 7.045262813568115, |
| "learning_rate": 1.977434499469938e-05, |
| "loss": 0.0859, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.04543389368468878, |
| "grad_norm": 0.03415582329034805, |
| "learning_rate": 1.9698621838558232e-05, |
| "loss": 0.0847, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05679236710586097, |
| "grad_norm": 0.05752035602927208, |
| "learning_rate": 1.9622898682417084e-05, |
| "loss": 0.0988, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.06815084052703317, |
| "grad_norm": 3.0614030361175537, |
| "learning_rate": 1.9547175526275936e-05, |
| "loss": 0.1058, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.07950931394820536, |
| "grad_norm": 1.2528300285339355, |
| "learning_rate": 1.947145237013479e-05, |
| "loss": 0.0842, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.09086778736937756, |
| "grad_norm": 8.556694984436035, |
| "learning_rate": 1.9395729213993642e-05, |
| "loss": 0.0777, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.10222626079054975, |
| "grad_norm": 20.690099716186523, |
| "learning_rate": 1.9320006057852494e-05, |
| "loss": 0.0764, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.11358473421172194, |
| "grad_norm": 4.0012030601501465, |
| "learning_rate": 1.9244282901711345e-05, |
| "loss": 0.0814, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.12494320763289414, |
| "grad_norm": 0.06608668714761734, |
| "learning_rate": 1.9168559745570197e-05, |
| "loss": 0.0637, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.13630168105406634, |
| "grad_norm": 5.306321620941162, |
| "learning_rate": 1.909283658942905e-05, |
| "loss": 0.0479, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.14766015447523853, |
| "grad_norm": 5.900157451629639, |
| "learning_rate": 1.90171134332879e-05, |
| "loss": 0.0892, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.15901862789641072, |
| "grad_norm": 2.73388934135437, |
| "learning_rate": 1.8941390277146755e-05, |
| "loss": 0.0721, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.1703771013175829, |
| "grad_norm": 6.770120143890381, |
| "learning_rate": 1.8865667121005603e-05, |
| "loss": 0.082, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.18173557473875512, |
| "grad_norm": 0.010230629704892635, |
| "learning_rate": 1.8789943964864458e-05, |
| "loss": 0.0604, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.1930940481599273, |
| "grad_norm": 11.530449867248535, |
| "learning_rate": 1.871422080872331e-05, |
| "loss": 0.0578, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.2044525215810995, |
| "grad_norm": 0.014014088548719883, |
| "learning_rate": 1.863849765258216e-05, |
| "loss": 0.0574, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2158109950022717, |
| "grad_norm": 0.016662120819091797, |
| "learning_rate": 1.8562774496441013e-05, |
| "loss": 0.0654, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.22716946842344388, |
| "grad_norm": 1.2837393283843994, |
| "learning_rate": 1.8487051340299865e-05, |
| "loss": 0.0638, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2385279418446161, |
| "grad_norm": 0.45574066042900085, |
| "learning_rate": 1.8411328184158716e-05, |
| "loss": 0.0448, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.24988641526578828, |
| "grad_norm": 2.423413038253784, |
| "learning_rate": 1.8335605028017568e-05, |
| "loss": 0.0535, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.2612448886869605, |
| "grad_norm": 4.537928104400635, |
| "learning_rate": 1.8259881871876423e-05, |
| "loss": 0.0833, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.2726033621081327, |
| "grad_norm": 6.994387626647949, |
| "learning_rate": 1.8184158715735274e-05, |
| "loss": 0.06, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.2839618355293049, |
| "grad_norm": 0.01928258314728737, |
| "learning_rate": 1.8108435559594126e-05, |
| "loss": 0.0498, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.29532030895047706, |
| "grad_norm": 0.4064951241016388, |
| "learning_rate": 1.8032712403452977e-05, |
| "loss": 0.0491, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.30667878237164925, |
| "grad_norm": 2.6388704776763916, |
| "learning_rate": 1.795698924731183e-05, |
| "loss": 0.0327, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.31803725579282144, |
| "grad_norm": 2.0852885246276855, |
| "learning_rate": 1.788126609117068e-05, |
| "loss": 0.0978, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3293957292139936, |
| "grad_norm": 2.8278560638427734, |
| "learning_rate": 1.7805542935029532e-05, |
| "loss": 0.0521, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.3407542026351658, |
| "grad_norm": 4.4425177574157715, |
| "learning_rate": 1.7729819778888387e-05, |
| "loss": 0.0532, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.352112676056338, |
| "grad_norm": 0.0768510028719902, |
| "learning_rate": 1.7654096622747235e-05, |
| "loss": 0.0486, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.36347114947751025, |
| "grad_norm": 0.4614756405353546, |
| "learning_rate": 1.757837346660609e-05, |
| "loss": 0.0419, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.37482962289868244, |
| "grad_norm": 0.5945758819580078, |
| "learning_rate": 1.7502650310464942e-05, |
| "loss": 0.055, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.3861880963198546, |
| "grad_norm": 0.5370413661003113, |
| "learning_rate": 1.7426927154323794e-05, |
| "loss": 0.0434, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.3975465697410268, |
| "grad_norm": 13.6658296585083, |
| "learning_rate": 1.7351203998182645e-05, |
| "loss": 0.0329, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.408905043162199, |
| "grad_norm": 0.9185870885848999, |
| "learning_rate": 1.72754808420415e-05, |
| "loss": 0.0584, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.4202635165833712, |
| "grad_norm": 0.3048367500305176, |
| "learning_rate": 1.719975768590035e-05, |
| "loss": 0.0561, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.4316219900045434, |
| "grad_norm": 3.6233348846435547, |
| "learning_rate": 1.71240345297592e-05, |
| "loss": 0.0568, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.44298046342571556, |
| "grad_norm": 7.301445007324219, |
| "learning_rate": 1.7048311373618055e-05, |
| "loss": 0.0545, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.45433893684688775, |
| "grad_norm": 12.261190414428711, |
| "learning_rate": 1.6972588217476907e-05, |
| "loss": 0.0347, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.46569741026806, |
| "grad_norm": 0.004870145116001368, |
| "learning_rate": 1.6896865061335758e-05, |
| "loss": 0.0297, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.4770558836892322, |
| "grad_norm": 0.08898526430130005, |
| "learning_rate": 1.682114190519461e-05, |
| "loss": 0.0368, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.4884143571104044, |
| "grad_norm": 0.007771740201860666, |
| "learning_rate": 1.674541874905346e-05, |
| "loss": 0.037, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.49977283053157656, |
| "grad_norm": 0.06338020414113998, |
| "learning_rate": 1.6669695592912313e-05, |
| "loss": 0.0351, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.5111313039527488, |
| "grad_norm": 0.2080918550491333, |
| "learning_rate": 1.6593972436771168e-05, |
| "loss": 0.0424, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.522489777373921, |
| "grad_norm": 0.005484211724251509, |
| "learning_rate": 1.651824928063002e-05, |
| "loss": 0.0498, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.5338482507950931, |
| "grad_norm": 1.5300865173339844, |
| "learning_rate": 1.6442526124488868e-05, |
| "loss": 0.0309, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.5452067242162654, |
| "grad_norm": 1.4512425661087036, |
| "learning_rate": 1.6366802968347723e-05, |
| "loss": 0.0397, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5565651976374375, |
| "grad_norm": 0.07551686465740204, |
| "learning_rate": 1.6291079812206574e-05, |
| "loss": 0.0368, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.5679236710586097, |
| "grad_norm": 5.5924272537231445, |
| "learning_rate": 1.6215356656065426e-05, |
| "loss": 0.0169, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5792821444797819, |
| "grad_norm": 0.03456870838999748, |
| "learning_rate": 1.6139633499924277e-05, |
| "loss": 0.0397, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.5906406179009541, |
| "grad_norm": 1.605539321899414, |
| "learning_rate": 1.6063910343783132e-05, |
| "loss": 0.0353, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.6019990913221263, |
| "grad_norm": 2.287083864212036, |
| "learning_rate": 1.598818718764198e-05, |
| "loss": 0.0233, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.6133575647432985, |
| "grad_norm": 0.02498025633394718, |
| "learning_rate": 1.5912464031500836e-05, |
| "loss": 0.0389, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.6247160381644707, |
| "grad_norm": 0.009853340685367584, |
| "learning_rate": 1.5836740875359687e-05, |
| "loss": 0.0343, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.6360745115856429, |
| "grad_norm": 0.006630271207541227, |
| "learning_rate": 1.576101771921854e-05, |
| "loss": 0.0211, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.6474329850068151, |
| "grad_norm": 9.345346450805664, |
| "learning_rate": 1.568529456307739e-05, |
| "loss": 0.0374, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.6587914584279873, |
| "grad_norm": 6.142024040222168, |
| "learning_rate": 1.5609571406936242e-05, |
| "loss": 0.0551, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.6701499318491595, |
| "grad_norm": 0.0021689136046916246, |
| "learning_rate": 1.5533848250795093e-05, |
| "loss": 0.0223, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.6815084052703316, |
| "grad_norm": 0.12782888114452362, |
| "learning_rate": 1.5458125094653945e-05, |
| "loss": 0.0397, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.6928668786915039, |
| "grad_norm": 0.021150466054677963, |
| "learning_rate": 1.53824019385128e-05, |
| "loss": 0.0465, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.704225352112676, |
| "grad_norm": 0.03622226044535637, |
| "learning_rate": 1.530667878237165e-05, |
| "loss": 0.0245, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.7155838255338483, |
| "grad_norm": 0.01021323911845684, |
| "learning_rate": 1.5230955626230503e-05, |
| "loss": 0.04, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.7269422989550205, |
| "grad_norm": 0.3106459677219391, |
| "learning_rate": 1.5155232470089353e-05, |
| "loss": 0.0257, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.7383007723761926, |
| "grad_norm": 0.00275537115521729, |
| "learning_rate": 1.5079509313948206e-05, |
| "loss": 0.0263, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.7496592457973649, |
| "grad_norm": 2.062044143676758, |
| "learning_rate": 1.5003786157807058e-05, |
| "loss": 0.0309, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.761017719218537, |
| "grad_norm": 9.71252155303955, |
| "learning_rate": 1.4928063001665911e-05, |
| "loss": 0.0398, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.7723761926397092, |
| "grad_norm": 0.1981002688407898, |
| "learning_rate": 1.4852339845524763e-05, |
| "loss": 0.0361, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.7837346660608814, |
| "grad_norm": 5.37298059463501, |
| "learning_rate": 1.4776616689383614e-05, |
| "loss": 0.028, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.7950931394820536, |
| "grad_norm": 0.01882290095090866, |
| "learning_rate": 1.4700893533242466e-05, |
| "loss": 0.0326, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.8064516129032258, |
| "grad_norm": 0.035887233912944794, |
| "learning_rate": 1.462517037710132e-05, |
| "loss": 0.0416, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.817810086324398, |
| "grad_norm": 13.781176567077637, |
| "learning_rate": 1.4549447220960171e-05, |
| "loss": 0.023, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.8291685597455702, |
| "grad_norm": 0.06990953534841537, |
| "learning_rate": 1.4473724064819024e-05, |
| "loss": 0.0213, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.8405270331667424, |
| "grad_norm": 0.6063719987869263, |
| "learning_rate": 1.4398000908677874e-05, |
| "loss": 0.0228, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.8518855065879146, |
| "grad_norm": 4.930353164672852, |
| "learning_rate": 1.4322277752536726e-05, |
| "loss": 0.0335, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.8632439800090868, |
| "grad_norm": 0.004565690644085407, |
| "learning_rate": 1.4246554596395579e-05, |
| "loss": 0.0214, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.874602453430259, |
| "grad_norm": 0.17241248488426208, |
| "learning_rate": 1.417083144025443e-05, |
| "loss": 0.0401, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.8859609268514311, |
| "grad_norm": 5.231073379516602, |
| "learning_rate": 1.4095108284113284e-05, |
| "loss": 0.0168, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.8973194002726034, |
| "grad_norm": 0.25602054595947266, |
| "learning_rate": 1.4019385127972135e-05, |
| "loss": 0.0399, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.9086778736937755, |
| "grad_norm": 0.04758666455745697, |
| "learning_rate": 1.3943661971830987e-05, |
| "loss": 0.0232, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.9200363471149478, |
| "grad_norm": 0.007415900472551584, |
| "learning_rate": 1.3867938815689839e-05, |
| "loss": 0.0254, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.93139482053612, |
| "grad_norm": 0.05941289663314819, |
| "learning_rate": 1.3792215659548692e-05, |
| "loss": 0.0182, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.9427532939572921, |
| "grad_norm": 0.2857076823711395, |
| "learning_rate": 1.3716492503407543e-05, |
| "loss": 0.015, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.9541117673784644, |
| "grad_norm": 6.7106852531433105, |
| "learning_rate": 1.3640769347266397e-05, |
| "loss": 0.0218, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.9654702407996365, |
| "grad_norm": 2.165870189666748, |
| "learning_rate": 1.3565046191125247e-05, |
| "loss": 0.0477, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.9768287142208087, |
| "grad_norm": 1.2232965230941772, |
| "learning_rate": 1.3489323034984098e-05, |
| "loss": 0.0227, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.9881871876419809, |
| "grad_norm": 6.756147384643555, |
| "learning_rate": 1.3413599878842951e-05, |
| "loss": 0.0273, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.9995456610631531, |
| "grad_norm": 0.07053057104349136, |
| "learning_rate": 1.3337876722701803e-05, |
| "loss": 0.0217, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.9839739413680781, |
| "eval_f1_macro": 0.982934850082554, |
| "eval_loss": 0.08169186860322952, |
| "eval_runtime": 25.3611, |
| "eval_samples_per_second": 302.629, |
| "eval_steps_per_second": 4.732, |
| "step": 4402 |
| }, |
| { |
| "epoch": 1.0109041344843253, |
| "grad_norm": 0.005929551087319851, |
| "learning_rate": 1.3262153566560656e-05, |
| "loss": 0.0101, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.0222626079054975, |
| "grad_norm": 0.011584239080548286, |
| "learning_rate": 1.3186430410419508e-05, |
| "loss": 0.012, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.0336210813266697, |
| "grad_norm": 0.06345133483409882, |
| "learning_rate": 1.3110707254278358e-05, |
| "loss": 0.0295, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.044979554747842, |
| "grad_norm": 0.003701810259371996, |
| "learning_rate": 1.3034984098137211e-05, |
| "loss": 0.0096, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.056338028169014, |
| "grad_norm": 0.0013922312064096332, |
| "learning_rate": 1.2959260941996063e-05, |
| "loss": 0.0267, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.0676965015901863, |
| "grad_norm": 0.0026676456909626722, |
| "learning_rate": 1.2883537785854916e-05, |
| "loss": 0.0145, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.0790549750113585, |
| "grad_norm": 0.0031162630766630173, |
| "learning_rate": 1.2807814629713768e-05, |
| "loss": 0.0124, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.0904134484325307, |
| "grad_norm": 3.748222589492798, |
| "learning_rate": 1.273209147357262e-05, |
| "loss": 0.0074, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.1017719218537028, |
| "grad_norm": 0.0014477827353402972, |
| "learning_rate": 1.265636831743147e-05, |
| "loss": 0.0179, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.113130395274875, |
| "grad_norm": 4.411365985870361, |
| "learning_rate": 1.2580645161290324e-05, |
| "loss": 0.0181, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.1244888686960473, |
| "grad_norm": 0.05877981334924698, |
| "learning_rate": 1.2504922005149176e-05, |
| "loss": 0.013, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.1358473421172195, |
| "grad_norm": 0.999143123626709, |
| "learning_rate": 1.2429198849008029e-05, |
| "loss": 0.0114, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.1472058155383915, |
| "grad_norm": 0.0030792481265962124, |
| "learning_rate": 1.2353475692866879e-05, |
| "loss": 0.0114, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.1585642889595638, |
| "grad_norm": 0.0014463861007243395, |
| "learning_rate": 1.227775253672573e-05, |
| "loss": 0.0188, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.169922762380736, |
| "grad_norm": 0.0017361458158120513, |
| "learning_rate": 1.2202029380584584e-05, |
| "loss": 0.0234, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.1812812358019082, |
| "grad_norm": 13.90465259552002, |
| "learning_rate": 1.2126306224443435e-05, |
| "loss": 0.0078, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.1926397092230805, |
| "grad_norm": 5.497283935546875, |
| "learning_rate": 1.2050583068302289e-05, |
| "loss": 0.0124, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.2039981826442525, |
| "grad_norm": 3.7859766483306885, |
| "learning_rate": 1.197485991216114e-05, |
| "loss": 0.0319, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.2153566560654248, |
| "grad_norm": 0.0024443278089165688, |
| "learning_rate": 1.1899136756019992e-05, |
| "loss": 0.0177, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.226715129486597, |
| "grad_norm": 0.20258040726184845, |
| "learning_rate": 1.1823413599878843e-05, |
| "loss": 0.009, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.2380736029077692, |
| "grad_norm": 0.0014519239775836468, |
| "learning_rate": 1.1747690443737697e-05, |
| "loss": 0.0069, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.2494320763289415, |
| "grad_norm": 0.02450985088944435, |
| "learning_rate": 1.1671967287596548e-05, |
| "loss": 0.017, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.2607905497501135, |
| "grad_norm": 0.004507457371801138, |
| "learning_rate": 1.1596244131455401e-05, |
| "loss": 0.01, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.2721490231712858, |
| "grad_norm": 9.089266777038574, |
| "learning_rate": 1.1520520975314251e-05, |
| "loss": 0.0382, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.283507496592458, |
| "grad_norm": 5.648477077484131, |
| "learning_rate": 1.1444797819173103e-05, |
| "loss": 0.0165, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.2948659700136302, |
| "grad_norm": 1.2645410299301147, |
| "learning_rate": 1.1369074663031956e-05, |
| "loss": 0.0163, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.3062244434348025, |
| "grad_norm": 0.0017288923263549805, |
| "learning_rate": 1.1293351506890808e-05, |
| "loss": 0.0202, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.3175829168559745, |
| "grad_norm": 0.00359937222674489, |
| "learning_rate": 1.1217628350749661e-05, |
| "loss": 0.0135, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.3289413902771468, |
| "grad_norm": 4.165374279022217, |
| "learning_rate": 1.1141905194608513e-05, |
| "loss": 0.0132, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.340299863698319, |
| "grad_norm": 0.0018008551560342312, |
| "learning_rate": 1.1066182038467364e-05, |
| "loss": 0.0112, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.351658337119491, |
| "grad_norm": 0.5573896765708923, |
| "learning_rate": 1.0990458882326216e-05, |
| "loss": 0.0221, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.3630168105406633, |
| "grad_norm": 1.456154704093933, |
| "learning_rate": 1.091473572618507e-05, |
| "loss": 0.011, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.3743752839618355, |
| "grad_norm": 3.8196120262145996, |
| "learning_rate": 1.083901257004392e-05, |
| "loss": 0.0158, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.3857337573830077, |
| "grad_norm": 0.0017874937038868666, |
| "learning_rate": 1.0763289413902774e-05, |
| "loss": 0.0107, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.39709223080418, |
| "grad_norm": 0.0008583781309425831, |
| "learning_rate": 1.0687566257761624e-05, |
| "loss": 0.0031, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.408450704225352, |
| "grad_norm": 0.44672343134880066, |
| "learning_rate": 1.0611843101620476e-05, |
| "loss": 0.0223, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.4198091776465243, |
| "grad_norm": 0.009662467055022717, |
| "learning_rate": 1.0536119945479329e-05, |
| "loss": 0.0157, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.4311676510676965, |
| "grad_norm": 0.0007439041510224342, |
| "learning_rate": 1.046039678933818e-05, |
| "loss": 0.0165, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.4425261244888687, |
| "grad_norm": 0.001364586642012, |
| "learning_rate": 1.0384673633197034e-05, |
| "loss": 0.0054, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.453884597910041, |
| "grad_norm": 0.001398891326971352, |
| "learning_rate": 1.0308950477055884e-05, |
| "loss": 0.0087, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.465243071331213, |
| "grad_norm": 0.002500066999346018, |
| "learning_rate": 1.0233227320914735e-05, |
| "loss": 0.0117, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.4766015447523853, |
| "grad_norm": 0.0015706096310168505, |
| "learning_rate": 1.0157504164773588e-05, |
| "loss": 0.0173, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.4879600181735575, |
| "grad_norm": 0.0015295586781576276, |
| "learning_rate": 1.008178100863244e-05, |
| "loss": 0.0284, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.4993184915947297, |
| "grad_norm": 2.286747932434082, |
| "learning_rate": 1.0006057852491293e-05, |
| "loss": 0.011, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.510676965015902, |
| "grad_norm": 0.002311094431206584, |
| "learning_rate": 9.930334696350145e-06, |
| "loss": 0.0166, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.522035438437074, |
| "grad_norm": 0.0014053047634661198, |
| "learning_rate": 9.854611540208996e-06, |
| "loss": 0.0174, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.5333939118582463, |
| "grad_norm": 0.012581984512507915, |
| "learning_rate": 9.778888384067848e-06, |
| "loss": 0.0099, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.5447523852794185, |
| "grad_norm": 0.023738177493214607, |
| "learning_rate": 9.703165227926701e-06, |
| "loss": 0.0098, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.5561108587005905, |
| "grad_norm": 0.014806082472205162, |
| "learning_rate": 9.627442071785553e-06, |
| "loss": 0.017, |
| "step": 6850 |
| }, |
| { |
| "epoch": 1.567469332121763, |
| "grad_norm": 0.11848437041044235, |
| "learning_rate": 9.551718915644405e-06, |
| "loss": 0.0051, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.578827805542935, |
| "grad_norm": 2.107661485671997, |
| "learning_rate": 9.475995759503258e-06, |
| "loss": 0.0066, |
| "step": 6950 |
| }, |
| { |
| "epoch": 1.5901862789641072, |
| "grad_norm": 0.08532427251338959, |
| "learning_rate": 9.40027260336211e-06, |
| "loss": 0.0162, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.6015447523852795, |
| "grad_norm": 0.0039615873247385025, |
| "learning_rate": 9.324549447220961e-06, |
| "loss": 0.0043, |
| "step": 7050 |
| }, |
| { |
| "epoch": 1.6129032258064515, |
| "grad_norm": 0.0013789839576929808, |
| "learning_rate": 9.248826291079813e-06, |
| "loss": 0.0127, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.6242616992276238, |
| "grad_norm": 9.550993919372559, |
| "learning_rate": 9.173103134938664e-06, |
| "loss": 0.0106, |
| "step": 7150 |
| }, |
| { |
| "epoch": 1.635620172648796, |
| "grad_norm": 0.0011268022935837507, |
| "learning_rate": 9.097379978797517e-06, |
| "loss": 0.009, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.6469786460699682, |
| "grad_norm": 0.0006976621225476265, |
| "learning_rate": 9.021656822656369e-06, |
| "loss": 0.015, |
| "step": 7250 |
| }, |
| { |
| "epoch": 1.6583371194911405, |
| "grad_norm": 0.0014887871220707893, |
| "learning_rate": 8.94593366651522e-06, |
| "loss": 0.015, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.6696955929123125, |
| "grad_norm": 0.0009298041695728898, |
| "learning_rate": 8.870210510374074e-06, |
| "loss": 0.0155, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.6810540663334848, |
| "grad_norm": 0.287706583738327, |
| "learning_rate": 8.794487354232925e-06, |
| "loss": 0.0116, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.692412539754657, |
| "grad_norm": 0.051804594695568085, |
| "learning_rate": 8.718764198091777e-06, |
| "loss": 0.0086, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.703771013175829, |
| "grad_norm": 1.3675575256347656, |
| "learning_rate": 8.643041041950629e-06, |
| "loss": 0.023, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.7151294865970015, |
| "grad_norm": 0.0012162342900410295, |
| "learning_rate": 8.56731788580948e-06, |
| "loss": 0.0081, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.7264879600181735, |
| "grad_norm": 0.0013599529629573226, |
| "learning_rate": 8.491594729668334e-06, |
| "loss": 0.0075, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.7378464334393458, |
| "grad_norm": 0.0011247453512623906, |
| "learning_rate": 8.415871573527185e-06, |
| "loss": 0.0079, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.749204906860518, |
| "grad_norm": 0.003529078559949994, |
| "learning_rate": 8.340148417386037e-06, |
| "loss": 0.0053, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.76056338028169, |
| "grad_norm": 13.674657821655273, |
| "learning_rate": 8.26442526124489e-06, |
| "loss": 0.0058, |
| "step": 7750 |
| }, |
| { |
| "epoch": 1.7719218537028625, |
| "grad_norm": 0.0018937140703201294, |
| "learning_rate": 8.188702105103742e-06, |
| "loss": 0.0073, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.7832803271240345, |
| "grad_norm": 0.0012737379875034094, |
| "learning_rate": 8.112978948962593e-06, |
| "loss": 0.0067, |
| "step": 7850 |
| }, |
| { |
| "epoch": 1.7946388005452067, |
| "grad_norm": 0.0010197096271440387, |
| "learning_rate": 8.037255792821446e-06, |
| "loss": 0.0057, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.805997273966379, |
| "grad_norm": 0.009366828948259354, |
| "learning_rate": 7.961532636680296e-06, |
| "loss": 0.026, |
| "step": 7950 |
| }, |
| { |
| "epoch": 1.817355747387551, |
| "grad_norm": 0.012473216280341148, |
| "learning_rate": 7.88580948053915e-06, |
| "loss": 0.024, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.8287142208087233, |
| "grad_norm": 0.26876014471054077, |
| "learning_rate": 7.810086324398001e-06, |
| "loss": 0.0079, |
| "step": 8050 |
| }, |
| { |
| "epoch": 1.8400726942298955, |
| "grad_norm": 0.0026707984507083893, |
| "learning_rate": 7.734363168256853e-06, |
| "loss": 0.0149, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.8514311676510677, |
| "grad_norm": 0.036166541278362274, |
| "learning_rate": 7.658640012115706e-06, |
| "loss": 0.0062, |
| "step": 8150 |
| }, |
| { |
| "epoch": 1.86278964107224, |
| "grad_norm": 0.013438834808766842, |
| "learning_rate": 7.582916855974558e-06, |
| "loss": 0.0047, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.874148114493412, |
| "grad_norm": 0.0015038993442431092, |
| "learning_rate": 7.507193699833409e-06, |
| "loss": 0.0155, |
| "step": 8250 |
| }, |
| { |
| "epoch": 1.8855065879145843, |
| "grad_norm": 0.0006626308313570917, |
| "learning_rate": 7.431470543692262e-06, |
| "loss": 0.0014, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.8968650613357565, |
| "grad_norm": 0.0032690023072063923, |
| "learning_rate": 7.355747387551114e-06, |
| "loss": 0.0103, |
| "step": 8350 |
| }, |
| { |
| "epoch": 1.9082235347569285, |
| "grad_norm": 0.0008181309676729143, |
| "learning_rate": 7.280024231409966e-06, |
| "loss": 0.0184, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.919582008178101, |
| "grad_norm": 0.0026349183171987534, |
| "learning_rate": 7.204301075268818e-06, |
| "loss": 0.0074, |
| "step": 8450 |
| }, |
| { |
| "epoch": 1.930940481599273, |
| "grad_norm": 0.0025627773720771074, |
| "learning_rate": 7.12857791912767e-06, |
| "loss": 0.0061, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.9422989550204453, |
| "grad_norm": 0.000963448139373213, |
| "learning_rate": 7.052854762986522e-06, |
| "loss": 0.0049, |
| "step": 8550 |
| }, |
| { |
| "epoch": 1.9536574284416175, |
| "grad_norm": 0.0005609341314993799, |
| "learning_rate": 6.977131606845375e-06, |
| "loss": 0.0007, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.9650159018627895, |
| "grad_norm": 0.0011694286949932575, |
| "learning_rate": 6.901408450704225e-06, |
| "loss": 0.0017, |
| "step": 8650 |
| }, |
| { |
| "epoch": 1.976374375283962, |
| "grad_norm": 0.0010692242067307234, |
| "learning_rate": 6.825685294563078e-06, |
| "loss": 0.0115, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.987732848705134, |
| "grad_norm": 0.05440627783536911, |
| "learning_rate": 6.74996213842193e-06, |
| "loss": 0.0068, |
| "step": 8750 |
| }, |
| { |
| "epoch": 1.9990913221263062, |
| "grad_norm": 0.004324722569435835, |
| "learning_rate": 6.674238982280782e-06, |
| "loss": 0.0147, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.9846254071661238, |
| "eval_f1_macro": 0.9835472402504412, |
| "eval_loss": 0.1140478253364563, |
| "eval_runtime": 24.9, |
| "eval_samples_per_second": 308.233, |
| "eval_steps_per_second": 4.819, |
| "step": 8804 |
| }, |
| { |
| "epoch": 2.0104497955474785, |
| "grad_norm": 0.000489537778776139, |
| "learning_rate": 6.598515826139634e-06, |
| "loss": 0.0007, |
| "step": 8850 |
| }, |
| { |
| "epoch": 2.0218082689686505, |
| "grad_norm": 0.0013550950679928064, |
| "learning_rate": 6.522792669998486e-06, |
| "loss": 0.0027, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.033166742389823, |
| "grad_norm": 0.0009673606837168336, |
| "learning_rate": 6.447069513857338e-06, |
| "loss": 0.0091, |
| "step": 8950 |
| }, |
| { |
| "epoch": 2.044525215810995, |
| "grad_norm": 0.0004022994253318757, |
| "learning_rate": 6.371346357716191e-06, |
| "loss": 0.0003, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.055883689232167, |
| "grad_norm": 0.0007892374997027218, |
| "learning_rate": 6.2956232015750415e-06, |
| "loss": 0.0024, |
| "step": 9050 |
| }, |
| { |
| "epoch": 2.0672421626533395, |
| "grad_norm": 0.012260637246072292, |
| "learning_rate": 6.219900045433894e-06, |
| "loss": 0.0016, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.0786006360745115, |
| "grad_norm": 0.002542909001931548, |
| "learning_rate": 6.144176889292746e-06, |
| "loss": 0.0001, |
| "step": 9150 |
| }, |
| { |
| "epoch": 2.089959109495684, |
| "grad_norm": 0.002916930941864848, |
| "learning_rate": 6.068453733151598e-06, |
| "loss": 0.0073, |
| "step": 9200 |
| }, |
| { |
| "epoch": 2.101317582916856, |
| "grad_norm": 0.0003816105308942497, |
| "learning_rate": 5.99273057701045e-06, |
| "loss": 0.0089, |
| "step": 9250 |
| }, |
| { |
| "epoch": 2.112676056338028, |
| "grad_norm": 0.0005522597930394113, |
| "learning_rate": 5.917007420869302e-06, |
| "loss": 0.0049, |
| "step": 9300 |
| }, |
| { |
| "epoch": 2.1240345297592005, |
| "grad_norm": 0.0011015296913683414, |
| "learning_rate": 5.841284264728154e-06, |
| "loss": 0.0066, |
| "step": 9350 |
| }, |
| { |
| "epoch": 2.1353930031803725, |
| "grad_norm": 0.00041281242738477886, |
| "learning_rate": 5.765561108587007e-06, |
| "loss": 0.0019, |
| "step": 9400 |
| }, |
| { |
| "epoch": 2.146751476601545, |
| "grad_norm": 0.0007952914456836879, |
| "learning_rate": 5.689837952445858e-06, |
| "loss": 0.0036, |
| "step": 9450 |
| }, |
| { |
| "epoch": 2.158109950022717, |
| "grad_norm": 0.0004673805378843099, |
| "learning_rate": 5.614114796304711e-06, |
| "loss": 0.0042, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.169468423443889, |
| "grad_norm": 0.00038412591675296426, |
| "learning_rate": 5.538391640163563e-06, |
| "loss": 0.0001, |
| "step": 9550 |
| }, |
| { |
| "epoch": 2.1808268968650615, |
| "grad_norm": 0.00038422050420194864, |
| "learning_rate": 5.462668484022414e-06, |
| "loss": 0.0093, |
| "step": 9600 |
| }, |
| { |
| "epoch": 2.1921853702862335, |
| "grad_norm": 0.0006754286587238312, |
| "learning_rate": 5.3869453278812665e-06, |
| "loss": 0.0028, |
| "step": 9650 |
| }, |
| { |
| "epoch": 2.2035438437074055, |
| "grad_norm": 0.0004887759569101036, |
| "learning_rate": 5.311222171740119e-06, |
| "loss": 0.0051, |
| "step": 9700 |
| }, |
| { |
| "epoch": 2.214902317128578, |
| "grad_norm": 0.05471280589699745, |
| "learning_rate": 5.2354990155989705e-06, |
| "loss": 0.0059, |
| "step": 9750 |
| }, |
| { |
| "epoch": 2.22626079054975, |
| "grad_norm": 0.0008195925038307905, |
| "learning_rate": 5.159775859457823e-06, |
| "loss": 0.0036, |
| "step": 9800 |
| }, |
| { |
| "epoch": 2.2376192639709225, |
| "grad_norm": 0.000433308829087764, |
| "learning_rate": 5.0840527033166745e-06, |
| "loss": 0.0022, |
| "step": 9850 |
| }, |
| { |
| "epoch": 2.2489777373920945, |
| "grad_norm": 0.012868039309978485, |
| "learning_rate": 5.008329547175527e-06, |
| "loss": 0.0051, |
| "step": 9900 |
| }, |
| { |
| "epoch": 2.2603362108132665, |
| "grad_norm": 0.5331919193267822, |
| "learning_rate": 4.9326063910343785e-06, |
| "loss": 0.0115, |
| "step": 9950 |
| }, |
| { |
| "epoch": 2.271694684234439, |
| "grad_norm": 0.0005174391553737223, |
| "learning_rate": 4.856883234893231e-06, |
| "loss": 0.0007, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.283053157655611, |
| "grad_norm": 0.00032180239213630557, |
| "learning_rate": 4.7811600787520825e-06, |
| "loss": 0.0095, |
| "step": 10050 |
| }, |
| { |
| "epoch": 2.294411631076783, |
| "grad_norm": 0.07410291582345963, |
| "learning_rate": 4.705436922610935e-06, |
| "loss": 0.008, |
| "step": 10100 |
| }, |
| { |
| "epoch": 2.3057701044979555, |
| "grad_norm": 0.024364503100514412, |
| "learning_rate": 4.6297137664697866e-06, |
| "loss": 0.0026, |
| "step": 10150 |
| }, |
| { |
| "epoch": 2.3171285779191275, |
| "grad_norm": 0.00038883680826984346, |
| "learning_rate": 4.553990610328639e-06, |
| "loss": 0.0026, |
| "step": 10200 |
| }, |
| { |
| "epoch": 2.3284870513403, |
| "grad_norm": 0.0038193499203771353, |
| "learning_rate": 4.478267454187491e-06, |
| "loss": 0.0001, |
| "step": 10250 |
| }, |
| { |
| "epoch": 2.339845524761472, |
| "grad_norm": 0.0023336990270763636, |
| "learning_rate": 4.402544298046343e-06, |
| "loss": 0.0084, |
| "step": 10300 |
| }, |
| { |
| "epoch": 2.351203998182644, |
| "grad_norm": 0.04238196834921837, |
| "learning_rate": 4.326821141905195e-06, |
| "loss": 0.0033, |
| "step": 10350 |
| }, |
| { |
| "epoch": 2.3625624716038165, |
| "grad_norm": 0.000569637690205127, |
| "learning_rate": 4.251097985764047e-06, |
| "loss": 0.0012, |
| "step": 10400 |
| }, |
| { |
| "epoch": 2.3739209450249885, |
| "grad_norm": 0.00034202905953861773, |
| "learning_rate": 4.1753748296228995e-06, |
| "loss": 0.0036, |
| "step": 10450 |
| }, |
| { |
| "epoch": 2.385279418446161, |
| "grad_norm": 0.00034673314075917006, |
| "learning_rate": 4.099651673481751e-06, |
| "loss": 0.0013, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.396637891867333, |
| "grad_norm": 0.0012820770498365164, |
| "learning_rate": 4.023928517340603e-06, |
| "loss": 0.0006, |
| "step": 10550 |
| }, |
| { |
| "epoch": 2.407996365288505, |
| "grad_norm": 0.0017663290491327643, |
| "learning_rate": 3.948205361199455e-06, |
| "loss": 0.0002, |
| "step": 10600 |
| }, |
| { |
| "epoch": 2.4193548387096775, |
| "grad_norm": 13.78622817993164, |
| "learning_rate": 3.8724822050583075e-06, |
| "loss": 0.0029, |
| "step": 10650 |
| }, |
| { |
| "epoch": 2.4307133121308495, |
| "grad_norm": 0.00041380742914043367, |
| "learning_rate": 3.796759048917159e-06, |
| "loss": 0.0051, |
| "step": 10700 |
| }, |
| { |
| "epoch": 2.442071785552022, |
| "grad_norm": 0.0015727384015917778, |
| "learning_rate": 3.721035892776011e-06, |
| "loss": 0.0002, |
| "step": 10750 |
| }, |
| { |
| "epoch": 2.453430258973194, |
| "grad_norm": 0.00039928112528286874, |
| "learning_rate": 3.6453127366348636e-06, |
| "loss": 0.0041, |
| "step": 10800 |
| }, |
| { |
| "epoch": 2.464788732394366, |
| "grad_norm": 0.0007231036433950067, |
| "learning_rate": 3.569589580493715e-06, |
| "loss": 0.0013, |
| "step": 10850 |
| }, |
| { |
| "epoch": 2.4761472058155385, |
| "grad_norm": 0.0004043302033096552, |
| "learning_rate": 3.493866424352567e-06, |
| "loss": 0.0038, |
| "step": 10900 |
| }, |
| { |
| "epoch": 2.4875056792367105, |
| "grad_norm": 0.00035005921381525695, |
| "learning_rate": 3.418143268211419e-06, |
| "loss": 0.0042, |
| "step": 10950 |
| }, |
| { |
| "epoch": 2.498864152657883, |
| "grad_norm": 13.098333358764648, |
| "learning_rate": 3.3424201120702716e-06, |
| "loss": 0.0039, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.510222626079055, |
| "grad_norm": 0.0030333756003528833, |
| "learning_rate": 3.2666969559291232e-06, |
| "loss": 0.0002, |
| "step": 11050 |
| }, |
| { |
| "epoch": 2.521581099500227, |
| "grad_norm": 0.0002801103692036122, |
| "learning_rate": 3.1909737997879752e-06, |
| "loss": 0.0003, |
| "step": 11100 |
| }, |
| { |
| "epoch": 2.5329395729213995, |
| "grad_norm": 0.5628141164779663, |
| "learning_rate": 3.1152506436468277e-06, |
| "loss": 0.0064, |
| "step": 11150 |
| }, |
| { |
| "epoch": 2.5442980463425715, |
| "grad_norm": 0.002491287887096405, |
| "learning_rate": 3.0395274875056797e-06, |
| "loss": 0.0037, |
| "step": 11200 |
| }, |
| { |
| "epoch": 2.555656519763744, |
| "grad_norm": 0.002808187622576952, |
| "learning_rate": 2.9638043313645317e-06, |
| "loss": 0.0028, |
| "step": 11250 |
| }, |
| { |
| "epoch": 2.567014993184916, |
| "grad_norm": 0.043582722544670105, |
| "learning_rate": 2.8880811752233833e-06, |
| "loss": 0.002, |
| "step": 11300 |
| }, |
| { |
| "epoch": 2.578373466606088, |
| "grad_norm": 0.00028100161580368876, |
| "learning_rate": 2.8123580190822357e-06, |
| "loss": 0.0072, |
| "step": 11350 |
| }, |
| { |
| "epoch": 2.5897319400272605, |
| "grad_norm": 0.0003854044189210981, |
| "learning_rate": 2.7366348629410877e-06, |
| "loss": 0.0085, |
| "step": 11400 |
| }, |
| { |
| "epoch": 2.6010904134484325, |
| "grad_norm": 41.185237884521484, |
| "learning_rate": 2.6609117067999397e-06, |
| "loss": 0.0031, |
| "step": 11450 |
| }, |
| { |
| "epoch": 2.612448886869605, |
| "grad_norm": 0.0006349310860969126, |
| "learning_rate": 2.5851885506587913e-06, |
| "loss": 0.0001, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.623807360290777, |
| "grad_norm": 0.0002767475671134889, |
| "learning_rate": 2.5094653945176438e-06, |
| "loss": 0.0065, |
| "step": 11550 |
| }, |
| { |
| "epoch": 2.635165833711949, |
| "grad_norm": 0.002331246854737401, |
| "learning_rate": 2.4337422383764958e-06, |
| "loss": 0.006, |
| "step": 11600 |
| }, |
| { |
| "epoch": 2.646524307133121, |
| "grad_norm": 0.0015208119293674827, |
| "learning_rate": 2.3580190822353478e-06, |
| "loss": 0.0037, |
| "step": 11650 |
| }, |
| { |
| "epoch": 2.6578827805542935, |
| "grad_norm": 0.026821589097380638, |
| "learning_rate": 2.2822959260942e-06, |
| "loss": 0.0093, |
| "step": 11700 |
| }, |
| { |
| "epoch": 2.669241253975466, |
| "grad_norm": 5.861845016479492, |
| "learning_rate": 2.206572769953052e-06, |
| "loss": 0.0003, |
| "step": 11750 |
| }, |
| { |
| "epoch": 2.680599727396638, |
| "grad_norm": 0.00030583186889998615, |
| "learning_rate": 2.130849613811904e-06, |
| "loss": 0.0015, |
| "step": 11800 |
| }, |
| { |
| "epoch": 2.69195820081781, |
| "grad_norm": 0.004688043612986803, |
| "learning_rate": 2.055126457670756e-06, |
| "loss": 0.0069, |
| "step": 11850 |
| }, |
| { |
| "epoch": 2.703316674238982, |
| "grad_norm": 0.0014823823003098369, |
| "learning_rate": 1.979403301529608e-06, |
| "loss": 0.0011, |
| "step": 11900 |
| }, |
| { |
| "epoch": 2.7146751476601545, |
| "grad_norm": 0.0003014960384462029, |
| "learning_rate": 1.9036801453884599e-06, |
| "loss": 0.0001, |
| "step": 11950 |
| }, |
| { |
| "epoch": 2.7260336210813265, |
| "grad_norm": 0.0012561273761093616, |
| "learning_rate": 1.827956989247312e-06, |
| "loss": 0.0029, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.737392094502499, |
| "grad_norm": 0.08546403795480728, |
| "learning_rate": 1.7522338331061639e-06, |
| "loss": 0.0027, |
| "step": 12050 |
| }, |
| { |
| "epoch": 2.748750567923671, |
| "grad_norm": 0.0004358178994152695, |
| "learning_rate": 1.676510676965016e-06, |
| "loss": 0.0002, |
| "step": 12100 |
| }, |
| { |
| "epoch": 2.760109041344843, |
| "grad_norm": 0.0006170666310936213, |
| "learning_rate": 1.600787520823868e-06, |
| "loss": 0.0035, |
| "step": 12150 |
| }, |
| { |
| "epoch": 2.7714675147660155, |
| "grad_norm": 0.00022272793285083026, |
| "learning_rate": 1.5250643646827201e-06, |
| "loss": 0.0005, |
| "step": 12200 |
| }, |
| { |
| "epoch": 2.7828259881871875, |
| "grad_norm": 0.0002821196976583451, |
| "learning_rate": 1.449341208541572e-06, |
| "loss": 0.0003, |
| "step": 12250 |
| }, |
| { |
| "epoch": 2.79418446160836, |
| "grad_norm": 0.0002386348060099408, |
| "learning_rate": 1.3736180524004241e-06, |
| "loss": 0.0009, |
| "step": 12300 |
| }, |
| { |
| "epoch": 2.805542935029532, |
| "grad_norm": 0.05056820809841156, |
| "learning_rate": 1.2978948962592762e-06, |
| "loss": 0.0016, |
| "step": 12350 |
| }, |
| { |
| "epoch": 2.816901408450704, |
| "grad_norm": 0.0051028188318014145, |
| "learning_rate": 1.2221717401181282e-06, |
| "loss": 0.0005, |
| "step": 12400 |
| }, |
| { |
| "epoch": 2.8282598818718765, |
| "grad_norm": 0.00031195359770208597, |
| "learning_rate": 1.1464485839769802e-06, |
| "loss": 0.0047, |
| "step": 12450 |
| }, |
| { |
| "epoch": 2.8396183552930485, |
| "grad_norm": 0.0005304196383804083, |
| "learning_rate": 1.0707254278358322e-06, |
| "loss": 0.0005, |
| "step": 12500 |
| }, |
| { |
| "epoch": 2.850976828714221, |
| "grad_norm": 0.004895700607448816, |
| "learning_rate": 9.950022716946842e-07, |
| "loss": 0.0023, |
| "step": 12550 |
| }, |
| { |
| "epoch": 2.862335302135393, |
| "grad_norm": 0.00021093177201692015, |
| "learning_rate": 9.192791155535363e-07, |
| "loss": 0.0002, |
| "step": 12600 |
| }, |
| { |
| "epoch": 2.873693775556565, |
| "grad_norm": 0.00027342038811184466, |
| "learning_rate": 8.435559594123883e-07, |
| "loss": 0.0002, |
| "step": 12650 |
| }, |
| { |
| "epoch": 2.8850522489777375, |
| "grad_norm": 0.0002328462287550792, |
| "learning_rate": 7.678328032712403e-07, |
| "loss": 0.0001, |
| "step": 12700 |
| }, |
| { |
| "epoch": 2.8964107223989095, |
| "grad_norm": 0.00026081790565513074, |
| "learning_rate": 6.921096471300924e-07, |
| "loss": 0.0008, |
| "step": 12750 |
| }, |
| { |
| "epoch": 2.907769195820082, |
| "grad_norm": 0.07156021147966385, |
| "learning_rate": 6.163864909889445e-07, |
| "loss": 0.0015, |
| "step": 12800 |
| }, |
| { |
| "epoch": 2.919127669241254, |
| "grad_norm": 0.002825228963047266, |
| "learning_rate": 5.406633348477965e-07, |
| "loss": 0.0001, |
| "step": 12850 |
| }, |
| { |
| "epoch": 2.930486142662426, |
| "grad_norm": 0.0006366794114001095, |
| "learning_rate": 4.6494017870664856e-07, |
| "loss": 0.0049, |
| "step": 12900 |
| }, |
| { |
| "epoch": 2.9418446160835985, |
| "grad_norm": 3.086198329925537, |
| "learning_rate": 3.8921702256550057e-07, |
| "loss": 0.0009, |
| "step": 12950 |
| }, |
| { |
| "epoch": 2.9532030895047705, |
| "grad_norm": 0.000288445531623438, |
| "learning_rate": 3.134938664243526e-07, |
| "loss": 0.0003, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.964561562925943, |
| "grad_norm": 0.00021304447727743536, |
| "learning_rate": 2.3777071028320462e-07, |
| "loss": 0.0006, |
| "step": 13050 |
| }, |
| { |
| "epoch": 2.975920036347115, |
| "grad_norm": 0.0010498821502551436, |
| "learning_rate": 1.6204755414205666e-07, |
| "loss": 0.0001, |
| "step": 13100 |
| }, |
| { |
| "epoch": 2.987278509768287, |
| "grad_norm": 0.00040187168633565307, |
| "learning_rate": 8.632439800090868e-08, |
| "loss": 0.0005, |
| "step": 13150 |
| }, |
| { |
| "epoch": 2.9986369831894595, |
| "grad_norm": 0.0003258317010477185, |
| "learning_rate": 1.0601241859760717e-08, |
| "loss": 0.0092, |
| "step": 13200 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9877524429967427, |
| "eval_f1_macro": 0.9868791709426586, |
| "eval_loss": 0.09756067395210266, |
| "eval_runtime": 25.8171, |
| "eval_samples_per_second": 297.284, |
| "eval_steps_per_second": 4.648, |
| "step": 13206 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 13206, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.341359158497128e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|