{ "best_global_step": 13206, "best_metric": 0.9868791709426586, "best_model_checkpoint": "/home/myid/bp67339/plant_disease/models/deberta_v3_base_textclf_phase3/checkpoint-13206", "epoch": 3.0, "eval_steps": 500, "global_step": 13206, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011358473421172195, "grad_norm": 2.552668809890747, "learning_rate": 1.9925791306981678e-05, "loss": 0.1196, "step": 50 }, { "epoch": 0.02271694684234439, "grad_norm": 0.47541937232017517, "learning_rate": 1.985006815084053e-05, "loss": 0.0812, "step": 100 }, { "epoch": 0.034075420263516586, "grad_norm": 7.045262813568115, "learning_rate": 1.977434499469938e-05, "loss": 0.0859, "step": 150 }, { "epoch": 0.04543389368468878, "grad_norm": 0.03415582329034805, "learning_rate": 1.9698621838558232e-05, "loss": 0.0847, "step": 200 }, { "epoch": 0.05679236710586097, "grad_norm": 0.05752035602927208, "learning_rate": 1.9622898682417084e-05, "loss": 0.0988, "step": 250 }, { "epoch": 0.06815084052703317, "grad_norm": 3.0614030361175537, "learning_rate": 1.9547175526275936e-05, "loss": 0.1058, "step": 300 }, { "epoch": 0.07950931394820536, "grad_norm": 1.2528300285339355, "learning_rate": 1.947145237013479e-05, "loss": 0.0842, "step": 350 }, { "epoch": 0.09086778736937756, "grad_norm": 8.556694984436035, "learning_rate": 1.9395729213993642e-05, "loss": 0.0777, "step": 400 }, { "epoch": 0.10222626079054975, "grad_norm": 20.690099716186523, "learning_rate": 1.9320006057852494e-05, "loss": 0.0764, "step": 450 }, { "epoch": 0.11358473421172194, "grad_norm": 4.0012030601501465, "learning_rate": 1.9244282901711345e-05, "loss": 0.0814, "step": 500 }, { "epoch": 0.12494320763289414, "grad_norm": 0.06608668714761734, "learning_rate": 1.9168559745570197e-05, "loss": 0.0637, "step": 550 }, { "epoch": 0.13630168105406634, "grad_norm": 5.306321620941162, "learning_rate": 1.909283658942905e-05, "loss": 0.0479, "step": 600 }, { "epoch": 0.14766015447523853, "grad_norm": 5.900157451629639, "learning_rate": 1.90171134332879e-05, "loss": 0.0892, "step": 650 }, { "epoch": 0.15901862789641072, "grad_norm": 2.73388934135437, "learning_rate": 1.8941390277146755e-05, "loss": 0.0721, "step": 700 }, { "epoch": 0.1703771013175829, "grad_norm": 6.770120143890381, "learning_rate": 1.8865667121005603e-05, "loss": 0.082, "step": 750 }, { "epoch": 0.18173557473875512, "grad_norm": 0.010230629704892635, "learning_rate": 1.8789943964864458e-05, "loss": 0.0604, "step": 800 }, { "epoch": 0.1930940481599273, "grad_norm": 11.530449867248535, "learning_rate": 1.871422080872331e-05, "loss": 0.0578, "step": 850 }, { "epoch": 0.2044525215810995, "grad_norm": 0.014014088548719883, "learning_rate": 1.863849765258216e-05, "loss": 0.0574, "step": 900 }, { "epoch": 0.2158109950022717, "grad_norm": 0.016662120819091797, "learning_rate": 1.8562774496441013e-05, "loss": 0.0654, "step": 950 }, { "epoch": 0.22716946842344388, "grad_norm": 1.2837393283843994, "learning_rate": 1.8487051340299865e-05, "loss": 0.0638, "step": 1000 }, { "epoch": 0.2385279418446161, "grad_norm": 0.45574066042900085, "learning_rate": 1.8411328184158716e-05, "loss": 0.0448, "step": 1050 }, { "epoch": 0.24988641526578828, "grad_norm": 2.423413038253784, "learning_rate": 1.8335605028017568e-05, "loss": 0.0535, "step": 1100 }, { "epoch": 0.2612448886869605, "grad_norm": 4.537928104400635, "learning_rate": 1.8259881871876423e-05, "loss": 0.0833, "step": 1150 }, { "epoch": 0.2726033621081327, "grad_norm": 6.994387626647949, "learning_rate": 1.8184158715735274e-05, "loss": 0.06, "step": 1200 }, { "epoch": 0.2839618355293049, "grad_norm": 0.01928258314728737, "learning_rate": 1.8108435559594126e-05, "loss": 0.0498, "step": 1250 }, { "epoch": 0.29532030895047706, "grad_norm": 0.4064951241016388, "learning_rate": 1.8032712403452977e-05, "loss": 0.0491, "step": 1300 }, { "epoch": 0.30667878237164925, "grad_norm": 2.6388704776763916, "learning_rate": 1.795698924731183e-05, "loss": 0.0327, "step": 1350 }, { "epoch": 0.31803725579282144, "grad_norm": 2.0852885246276855, "learning_rate": 1.788126609117068e-05, "loss": 0.0978, "step": 1400 }, { "epoch": 0.3293957292139936, "grad_norm": 2.8278560638427734, "learning_rate": 1.7805542935029532e-05, "loss": 0.0521, "step": 1450 }, { "epoch": 0.3407542026351658, "grad_norm": 4.4425177574157715, "learning_rate": 1.7729819778888387e-05, "loss": 0.0532, "step": 1500 }, { "epoch": 0.352112676056338, "grad_norm": 0.0768510028719902, "learning_rate": 1.7654096622747235e-05, "loss": 0.0486, "step": 1550 }, { "epoch": 0.36347114947751025, "grad_norm": 0.4614756405353546, "learning_rate": 1.757837346660609e-05, "loss": 0.0419, "step": 1600 }, { "epoch": 0.37482962289868244, "grad_norm": 0.5945758819580078, "learning_rate": 1.7502650310464942e-05, "loss": 0.055, "step": 1650 }, { "epoch": 0.3861880963198546, "grad_norm": 0.5370413661003113, "learning_rate": 1.7426927154323794e-05, "loss": 0.0434, "step": 1700 }, { "epoch": 0.3975465697410268, "grad_norm": 13.6658296585083, "learning_rate": 1.7351203998182645e-05, "loss": 0.0329, "step": 1750 }, { "epoch": 0.408905043162199, "grad_norm": 0.9185870885848999, "learning_rate": 1.72754808420415e-05, "loss": 0.0584, "step": 1800 }, { "epoch": 0.4202635165833712, "grad_norm": 0.3048367500305176, "learning_rate": 1.719975768590035e-05, "loss": 0.0561, "step": 1850 }, { "epoch": 0.4316219900045434, "grad_norm": 3.6233348846435547, "learning_rate": 1.71240345297592e-05, "loss": 0.0568, "step": 1900 }, { "epoch": 0.44298046342571556, "grad_norm": 7.301445007324219, "learning_rate": 1.7048311373618055e-05, "loss": 0.0545, "step": 1950 }, { "epoch": 0.45433893684688775, "grad_norm": 12.261190414428711, "learning_rate": 1.6972588217476907e-05, "loss": 0.0347, "step": 2000 }, { "epoch": 0.46569741026806, "grad_norm": 0.004870145116001368, "learning_rate": 1.6896865061335758e-05, "loss": 0.0297, "step": 2050 }, { "epoch": 0.4770558836892322, "grad_norm": 0.08898526430130005, "learning_rate": 1.682114190519461e-05, "loss": 0.0368, "step": 2100 }, { "epoch": 0.4884143571104044, "grad_norm": 0.007771740201860666, "learning_rate": 1.674541874905346e-05, "loss": 0.037, "step": 2150 }, { "epoch": 0.49977283053157656, "grad_norm": 0.06338020414113998, "learning_rate": 1.6669695592912313e-05, "loss": 0.0351, "step": 2200 }, { "epoch": 0.5111313039527488, "grad_norm": 0.2080918550491333, "learning_rate": 1.6593972436771168e-05, "loss": 0.0424, "step": 2250 }, { "epoch": 0.522489777373921, "grad_norm": 0.005484211724251509, "learning_rate": 1.651824928063002e-05, "loss": 0.0498, "step": 2300 }, { "epoch": 0.5338482507950931, "grad_norm": 1.5300865173339844, "learning_rate": 1.6442526124488868e-05, "loss": 0.0309, "step": 2350 }, { "epoch": 0.5452067242162654, "grad_norm": 1.4512425661087036, "learning_rate": 1.6366802968347723e-05, "loss": 0.0397, "step": 2400 }, { "epoch": 0.5565651976374375, "grad_norm": 0.07551686465740204, "learning_rate": 1.6291079812206574e-05, "loss": 0.0368, "step": 2450 }, { "epoch": 0.5679236710586097, "grad_norm": 5.5924272537231445, "learning_rate": 1.6215356656065426e-05, "loss": 0.0169, "step": 2500 }, { "epoch": 0.5792821444797819, "grad_norm": 0.03456870838999748, "learning_rate": 1.6139633499924277e-05, "loss": 0.0397, "step": 2550 }, { "epoch": 0.5906406179009541, "grad_norm": 1.605539321899414, "learning_rate": 1.6063910343783132e-05, "loss": 0.0353, "step": 2600 }, { "epoch": 0.6019990913221263, "grad_norm": 2.287083864212036, "learning_rate": 1.598818718764198e-05, "loss": 0.0233, "step": 2650 }, { "epoch": 0.6133575647432985, "grad_norm": 0.02498025633394718, "learning_rate": 1.5912464031500836e-05, "loss": 0.0389, "step": 2700 }, { "epoch": 0.6247160381644707, "grad_norm": 0.009853340685367584, "learning_rate": 1.5836740875359687e-05, "loss": 0.0343, "step": 2750 }, { "epoch": 0.6360745115856429, "grad_norm": 0.006630271207541227, "learning_rate": 1.576101771921854e-05, "loss": 0.0211, "step": 2800 }, { "epoch": 0.6474329850068151, "grad_norm": 9.345346450805664, "learning_rate": 1.568529456307739e-05, "loss": 0.0374, "step": 2850 }, { "epoch": 0.6587914584279873, "grad_norm": 6.142024040222168, "learning_rate": 1.5609571406936242e-05, "loss": 0.0551, "step": 2900 }, { "epoch": 0.6701499318491595, "grad_norm": 0.0021689136046916246, "learning_rate": 1.5533848250795093e-05, "loss": 0.0223, "step": 2950 }, { "epoch": 0.6815084052703316, "grad_norm": 0.12782888114452362, "learning_rate": 1.5458125094653945e-05, "loss": 0.0397, "step": 3000 }, { "epoch": 0.6928668786915039, "grad_norm": 0.021150466054677963, "learning_rate": 1.53824019385128e-05, "loss": 0.0465, "step": 3050 }, { "epoch": 0.704225352112676, "grad_norm": 0.03622226044535637, "learning_rate": 1.530667878237165e-05, "loss": 0.0245, "step": 3100 }, { "epoch": 0.7155838255338483, "grad_norm": 0.01021323911845684, "learning_rate": 1.5230955626230503e-05, "loss": 0.04, "step": 3150 }, { "epoch": 0.7269422989550205, "grad_norm": 0.3106459677219391, "learning_rate": 1.5155232470089353e-05, "loss": 0.0257, "step": 3200 }, { "epoch": 0.7383007723761926, "grad_norm": 0.00275537115521729, "learning_rate": 1.5079509313948206e-05, "loss": 0.0263, "step": 3250 }, { "epoch": 0.7496592457973649, "grad_norm": 2.062044143676758, "learning_rate": 1.5003786157807058e-05, "loss": 0.0309, "step": 3300 }, { "epoch": 0.761017719218537, "grad_norm": 9.71252155303955, "learning_rate": 1.4928063001665911e-05, "loss": 0.0398, "step": 3350 }, { "epoch": 0.7723761926397092, "grad_norm": 0.1981002688407898, "learning_rate": 1.4852339845524763e-05, "loss": 0.0361, "step": 3400 }, { "epoch": 0.7837346660608814, "grad_norm": 5.37298059463501, "learning_rate": 1.4776616689383614e-05, "loss": 0.028, "step": 3450 }, { "epoch": 0.7950931394820536, "grad_norm": 0.01882290095090866, "learning_rate": 1.4700893533242466e-05, "loss": 0.0326, "step": 3500 }, { "epoch": 0.8064516129032258, "grad_norm": 0.035887233912944794, "learning_rate": 1.462517037710132e-05, "loss": 0.0416, "step": 3550 }, { "epoch": 0.817810086324398, "grad_norm": 13.781176567077637, "learning_rate": 1.4549447220960171e-05, "loss": 0.023, "step": 3600 }, { "epoch": 0.8291685597455702, "grad_norm": 0.06990953534841537, "learning_rate": 1.4473724064819024e-05, "loss": 0.0213, "step": 3650 }, { "epoch": 0.8405270331667424, "grad_norm": 0.6063719987869263, "learning_rate": 1.4398000908677874e-05, "loss": 0.0228, "step": 3700 }, { "epoch": 0.8518855065879146, "grad_norm": 4.930353164672852, "learning_rate": 1.4322277752536726e-05, "loss": 0.0335, "step": 3750 }, { "epoch": 0.8632439800090868, "grad_norm": 0.004565690644085407, "learning_rate": 1.4246554596395579e-05, "loss": 0.0214, "step": 3800 }, { "epoch": 0.874602453430259, "grad_norm": 0.17241248488426208, "learning_rate": 1.417083144025443e-05, "loss": 0.0401, "step": 3850 }, { "epoch": 0.8859609268514311, "grad_norm": 5.231073379516602, "learning_rate": 1.4095108284113284e-05, "loss": 0.0168, "step": 3900 }, { "epoch": 0.8973194002726034, "grad_norm": 0.25602054595947266, "learning_rate": 1.4019385127972135e-05, "loss": 0.0399, "step": 3950 }, { "epoch": 0.9086778736937755, "grad_norm": 0.04758666455745697, "learning_rate": 1.3943661971830987e-05, "loss": 0.0232, "step": 4000 }, { "epoch": 0.9200363471149478, "grad_norm": 0.007415900472551584, "learning_rate": 1.3867938815689839e-05, "loss": 0.0254, "step": 4050 }, { "epoch": 0.93139482053612, "grad_norm": 0.05941289663314819, "learning_rate": 1.3792215659548692e-05, "loss": 0.0182, "step": 4100 }, { "epoch": 0.9427532939572921, "grad_norm": 0.2857076823711395, "learning_rate": 1.3716492503407543e-05, "loss": 0.015, "step": 4150 }, { "epoch": 0.9541117673784644, "grad_norm": 6.7106852531433105, "learning_rate": 1.3640769347266397e-05, "loss": 0.0218, "step": 4200 }, { "epoch": 0.9654702407996365, "grad_norm": 2.165870189666748, "learning_rate": 1.3565046191125247e-05, "loss": 0.0477, "step": 4250 }, { "epoch": 0.9768287142208087, "grad_norm": 1.2232965230941772, "learning_rate": 1.3489323034984098e-05, "loss": 0.0227, "step": 4300 }, { "epoch": 0.9881871876419809, "grad_norm": 6.756147384643555, "learning_rate": 1.3413599878842951e-05, "loss": 0.0273, "step": 4350 }, { "epoch": 0.9995456610631531, "grad_norm": 0.07053057104349136, "learning_rate": 1.3337876722701803e-05, "loss": 0.0217, "step": 4400 }, { "epoch": 1.0, "eval_accuracy": 0.9839739413680781, "eval_f1_macro": 0.982934850082554, "eval_loss": 0.08169186860322952, "eval_runtime": 25.3611, "eval_samples_per_second": 302.629, "eval_steps_per_second": 4.732, "step": 4402 }, { "epoch": 1.0109041344843253, "grad_norm": 0.005929551087319851, "learning_rate": 1.3262153566560656e-05, "loss": 0.0101, "step": 4450 }, { "epoch": 1.0222626079054975, "grad_norm": 0.011584239080548286, "learning_rate": 1.3186430410419508e-05, "loss": 0.012, "step": 4500 }, { "epoch": 1.0336210813266697, "grad_norm": 0.06345133483409882, "learning_rate": 1.3110707254278358e-05, "loss": 0.0295, "step": 4550 }, { "epoch": 1.044979554747842, "grad_norm": 0.003701810259371996, "learning_rate": 1.3034984098137211e-05, "loss": 0.0096, "step": 4600 }, { "epoch": 1.056338028169014, "grad_norm": 0.0013922312064096332, "learning_rate": 1.2959260941996063e-05, "loss": 0.0267, "step": 4650 }, { "epoch": 1.0676965015901863, "grad_norm": 0.0026676456909626722, "learning_rate": 1.2883537785854916e-05, "loss": 0.0145, "step": 4700 }, { "epoch": 1.0790549750113585, "grad_norm": 0.0031162630766630173, "learning_rate": 1.2807814629713768e-05, "loss": 0.0124, "step": 4750 }, { "epoch": 1.0904134484325307, "grad_norm": 3.748222589492798, "learning_rate": 1.273209147357262e-05, "loss": 0.0074, "step": 4800 }, { "epoch": 1.1017719218537028, "grad_norm": 0.0014477827353402972, "learning_rate": 1.265636831743147e-05, "loss": 0.0179, "step": 4850 }, { "epoch": 1.113130395274875, "grad_norm": 4.411365985870361, "learning_rate": 1.2580645161290324e-05, "loss": 0.0181, "step": 4900 }, { "epoch": 1.1244888686960473, "grad_norm": 0.05877981334924698, "learning_rate": 1.2504922005149176e-05, "loss": 0.013, "step": 4950 }, { "epoch": 1.1358473421172195, "grad_norm": 0.999143123626709, "learning_rate": 1.2429198849008029e-05, "loss": 0.0114, "step": 5000 }, { "epoch": 1.1472058155383915, "grad_norm": 0.0030792481265962124, "learning_rate": 1.2353475692866879e-05, "loss": 0.0114, "step": 5050 }, { "epoch": 1.1585642889595638, "grad_norm": 0.0014463861007243395, "learning_rate": 1.227775253672573e-05, "loss": 0.0188, "step": 5100 }, { "epoch": 1.169922762380736, "grad_norm": 0.0017361458158120513, "learning_rate": 1.2202029380584584e-05, "loss": 0.0234, "step": 5150 }, { "epoch": 1.1812812358019082, "grad_norm": 13.90465259552002, "learning_rate": 1.2126306224443435e-05, "loss": 0.0078, "step": 5200 }, { "epoch": 1.1926397092230805, "grad_norm": 5.497283935546875, "learning_rate": 1.2050583068302289e-05, "loss": 0.0124, "step": 5250 }, { "epoch": 1.2039981826442525, "grad_norm": 3.7859766483306885, "learning_rate": 1.197485991216114e-05, "loss": 0.0319, "step": 5300 }, { "epoch": 1.2153566560654248, "grad_norm": 0.0024443278089165688, "learning_rate": 1.1899136756019992e-05, "loss": 0.0177, "step": 5350 }, { "epoch": 1.226715129486597, "grad_norm": 0.20258040726184845, "learning_rate": 1.1823413599878843e-05, "loss": 0.009, "step": 5400 }, { "epoch": 1.2380736029077692, "grad_norm": 0.0014519239775836468, "learning_rate": 1.1747690443737697e-05, "loss": 0.0069, "step": 5450 }, { "epoch": 1.2494320763289415, "grad_norm": 0.02450985088944435, "learning_rate": 1.1671967287596548e-05, "loss": 0.017, "step": 5500 }, { "epoch": 1.2607905497501135, "grad_norm": 0.004507457371801138, "learning_rate": 1.1596244131455401e-05, "loss": 0.01, "step": 5550 }, { "epoch": 1.2721490231712858, "grad_norm": 9.089266777038574, "learning_rate": 1.1520520975314251e-05, "loss": 0.0382, "step": 5600 }, { "epoch": 1.283507496592458, "grad_norm": 5.648477077484131, "learning_rate": 1.1444797819173103e-05, "loss": 0.0165, "step": 5650 }, { "epoch": 1.2948659700136302, "grad_norm": 1.2645410299301147, "learning_rate": 1.1369074663031956e-05, "loss": 0.0163, "step": 5700 }, { "epoch": 1.3062244434348025, "grad_norm": 0.0017288923263549805, "learning_rate": 1.1293351506890808e-05, "loss": 0.0202, "step": 5750 }, { "epoch": 1.3175829168559745, "grad_norm": 0.00359937222674489, "learning_rate": 1.1217628350749661e-05, "loss": 0.0135, "step": 5800 }, { "epoch": 1.3289413902771468, "grad_norm": 4.165374279022217, "learning_rate": 1.1141905194608513e-05, "loss": 0.0132, "step": 5850 }, { "epoch": 1.340299863698319, "grad_norm": 0.0018008551560342312, "learning_rate": 1.1066182038467364e-05, "loss": 0.0112, "step": 5900 }, { "epoch": 1.351658337119491, "grad_norm": 0.5573896765708923, "learning_rate": 1.0990458882326216e-05, "loss": 0.0221, "step": 5950 }, { "epoch": 1.3630168105406633, "grad_norm": 1.456154704093933, "learning_rate": 1.091473572618507e-05, "loss": 0.011, "step": 6000 }, { "epoch": 1.3743752839618355, "grad_norm": 3.8196120262145996, "learning_rate": 1.083901257004392e-05, "loss": 0.0158, "step": 6050 }, { "epoch": 1.3857337573830077, "grad_norm": 0.0017874937038868666, "learning_rate": 1.0763289413902774e-05, "loss": 0.0107, "step": 6100 }, { "epoch": 1.39709223080418, "grad_norm": 0.0008583781309425831, "learning_rate": 1.0687566257761624e-05, "loss": 0.0031, "step": 6150 }, { "epoch": 1.408450704225352, "grad_norm": 0.44672343134880066, "learning_rate": 1.0611843101620476e-05, "loss": 0.0223, "step": 6200 }, { "epoch": 1.4198091776465243, "grad_norm": 0.009662467055022717, "learning_rate": 1.0536119945479329e-05, "loss": 0.0157, "step": 6250 }, { "epoch": 1.4311676510676965, "grad_norm": 0.0007439041510224342, "learning_rate": 1.046039678933818e-05, "loss": 0.0165, "step": 6300 }, { "epoch": 1.4425261244888687, "grad_norm": 0.001364586642012, "learning_rate": 1.0384673633197034e-05, "loss": 0.0054, "step": 6350 }, { "epoch": 1.453884597910041, "grad_norm": 0.001398891326971352, "learning_rate": 1.0308950477055884e-05, "loss": 0.0087, "step": 6400 }, { "epoch": 1.465243071331213, "grad_norm": 0.002500066999346018, "learning_rate": 1.0233227320914735e-05, "loss": 0.0117, "step": 6450 }, { "epoch": 1.4766015447523853, "grad_norm": 0.0015706096310168505, "learning_rate": 1.0157504164773588e-05, "loss": 0.0173, "step": 6500 }, { "epoch": 1.4879600181735575, "grad_norm": 0.0015295586781576276, "learning_rate": 1.008178100863244e-05, "loss": 0.0284, "step": 6550 }, { "epoch": 1.4993184915947297, "grad_norm": 2.286747932434082, "learning_rate": 1.0006057852491293e-05, "loss": 0.011, "step": 6600 }, { "epoch": 1.510676965015902, "grad_norm": 0.002311094431206584, "learning_rate": 9.930334696350145e-06, "loss": 0.0166, "step": 6650 }, { "epoch": 1.522035438437074, "grad_norm": 0.0014053047634661198, "learning_rate": 9.854611540208996e-06, "loss": 0.0174, "step": 6700 }, { "epoch": 1.5333939118582463, "grad_norm": 0.012581984512507915, "learning_rate": 9.778888384067848e-06, "loss": 0.0099, "step": 6750 }, { "epoch": 1.5447523852794185, "grad_norm": 0.023738177493214607, "learning_rate": 9.703165227926701e-06, "loss": 0.0098, "step": 6800 }, { "epoch": 1.5561108587005905, "grad_norm": 0.014806082472205162, "learning_rate": 9.627442071785553e-06, "loss": 0.017, "step": 6850 }, { "epoch": 1.567469332121763, "grad_norm": 0.11848437041044235, "learning_rate": 9.551718915644405e-06, "loss": 0.0051, "step": 6900 }, { "epoch": 1.578827805542935, "grad_norm": 2.107661485671997, "learning_rate": 9.475995759503258e-06, "loss": 0.0066, "step": 6950 }, { "epoch": 1.5901862789641072, "grad_norm": 0.08532427251338959, "learning_rate": 9.40027260336211e-06, "loss": 0.0162, "step": 7000 }, { "epoch": 1.6015447523852795, "grad_norm": 0.0039615873247385025, "learning_rate": 9.324549447220961e-06, "loss": 0.0043, "step": 7050 }, { "epoch": 1.6129032258064515, "grad_norm": 0.0013789839576929808, "learning_rate": 9.248826291079813e-06, "loss": 0.0127, "step": 7100 }, { "epoch": 1.6242616992276238, "grad_norm": 9.550993919372559, "learning_rate": 9.173103134938664e-06, "loss": 0.0106, "step": 7150 }, { "epoch": 1.635620172648796, "grad_norm": 0.0011268022935837507, "learning_rate": 9.097379978797517e-06, "loss": 0.009, "step": 7200 }, { "epoch": 1.6469786460699682, "grad_norm": 0.0006976621225476265, "learning_rate": 9.021656822656369e-06, "loss": 0.015, "step": 7250 }, { "epoch": 1.6583371194911405, "grad_norm": 0.0014887871220707893, "learning_rate": 8.94593366651522e-06, "loss": 0.015, "step": 7300 }, { "epoch": 1.6696955929123125, "grad_norm": 0.0009298041695728898, "learning_rate": 8.870210510374074e-06, "loss": 0.0155, "step": 7350 }, { "epoch": 1.6810540663334848, "grad_norm": 0.287706583738327, "learning_rate": 8.794487354232925e-06, "loss": 0.0116, "step": 7400 }, { "epoch": 1.692412539754657, "grad_norm": 0.051804594695568085, "learning_rate": 8.718764198091777e-06, "loss": 0.0086, "step": 7450 }, { "epoch": 1.703771013175829, "grad_norm": 1.3675575256347656, "learning_rate": 8.643041041950629e-06, "loss": 0.023, "step": 7500 }, { "epoch": 1.7151294865970015, "grad_norm": 0.0012162342900410295, "learning_rate": 8.56731788580948e-06, "loss": 0.0081, "step": 7550 }, { "epoch": 1.7264879600181735, "grad_norm": 0.0013599529629573226, "learning_rate": 8.491594729668334e-06, "loss": 0.0075, "step": 7600 }, { "epoch": 1.7378464334393458, "grad_norm": 0.0011247453512623906, "learning_rate": 8.415871573527185e-06, "loss": 0.0079, "step": 7650 }, { "epoch": 1.749204906860518, "grad_norm": 0.003529078559949994, "learning_rate": 8.340148417386037e-06, "loss": 0.0053, "step": 7700 }, { "epoch": 1.76056338028169, "grad_norm": 13.674657821655273, "learning_rate": 8.26442526124489e-06, "loss": 0.0058, "step": 7750 }, { "epoch": 1.7719218537028625, "grad_norm": 0.0018937140703201294, "learning_rate": 8.188702105103742e-06, "loss": 0.0073, "step": 7800 }, { "epoch": 1.7832803271240345, "grad_norm": 0.0012737379875034094, "learning_rate": 8.112978948962593e-06, "loss": 0.0067, "step": 7850 }, { "epoch": 1.7946388005452067, "grad_norm": 0.0010197096271440387, "learning_rate": 8.037255792821446e-06, "loss": 0.0057, "step": 7900 }, { "epoch": 1.805997273966379, "grad_norm": 0.009366828948259354, "learning_rate": 7.961532636680296e-06, "loss": 0.026, "step": 7950 }, { "epoch": 1.817355747387551, "grad_norm": 0.012473216280341148, "learning_rate": 7.88580948053915e-06, "loss": 0.024, "step": 8000 }, { "epoch": 1.8287142208087233, "grad_norm": 0.26876014471054077, "learning_rate": 7.810086324398001e-06, "loss": 0.0079, "step": 8050 }, { "epoch": 1.8400726942298955, "grad_norm": 0.0026707984507083893, "learning_rate": 7.734363168256853e-06, "loss": 0.0149, "step": 8100 }, { "epoch": 1.8514311676510677, "grad_norm": 0.036166541278362274, "learning_rate": 7.658640012115706e-06, "loss": 0.0062, "step": 8150 }, { "epoch": 1.86278964107224, "grad_norm": 0.013438834808766842, "learning_rate": 7.582916855974558e-06, "loss": 0.0047, "step": 8200 }, { "epoch": 1.874148114493412, "grad_norm": 0.0015038993442431092, "learning_rate": 7.507193699833409e-06, "loss": 0.0155, "step": 8250 }, { "epoch": 1.8855065879145843, "grad_norm": 0.0006626308313570917, "learning_rate": 7.431470543692262e-06, "loss": 0.0014, "step": 8300 }, { "epoch": 1.8968650613357565, "grad_norm": 0.0032690023072063923, "learning_rate": 7.355747387551114e-06, "loss": 0.0103, "step": 8350 }, { "epoch": 1.9082235347569285, "grad_norm": 0.0008181309676729143, "learning_rate": 7.280024231409966e-06, "loss": 0.0184, "step": 8400 }, { "epoch": 1.919582008178101, "grad_norm": 0.0026349183171987534, "learning_rate": 7.204301075268818e-06, "loss": 0.0074, "step": 8450 }, { "epoch": 1.930940481599273, "grad_norm": 0.0025627773720771074, "learning_rate": 7.12857791912767e-06, "loss": 0.0061, "step": 8500 }, { "epoch": 1.9422989550204453, "grad_norm": 0.000963448139373213, "learning_rate": 7.052854762986522e-06, "loss": 0.0049, "step": 8550 }, { "epoch": 1.9536574284416175, "grad_norm": 0.0005609341314993799, "learning_rate": 6.977131606845375e-06, "loss": 0.0007, "step": 8600 }, { "epoch": 1.9650159018627895, "grad_norm": 0.0011694286949932575, "learning_rate": 6.901408450704225e-06, "loss": 0.0017, "step": 8650 }, { "epoch": 1.976374375283962, "grad_norm": 0.0010692242067307234, "learning_rate": 6.825685294563078e-06, "loss": 0.0115, "step": 8700 }, { "epoch": 1.987732848705134, "grad_norm": 0.05440627783536911, "learning_rate": 6.74996213842193e-06, "loss": 0.0068, "step": 8750 }, { "epoch": 1.9990913221263062, "grad_norm": 0.004324722569435835, "learning_rate": 6.674238982280782e-06, "loss": 0.0147, "step": 8800 }, { "epoch": 2.0, "eval_accuracy": 0.9846254071661238, "eval_f1_macro": 0.9835472402504412, "eval_loss": 0.1140478253364563, "eval_runtime": 24.9, "eval_samples_per_second": 308.233, "eval_steps_per_second": 4.819, "step": 8804 }, { "epoch": 2.0104497955474785, "grad_norm": 0.000489537778776139, "learning_rate": 6.598515826139634e-06, "loss": 0.0007, "step": 8850 }, { "epoch": 2.0218082689686505, "grad_norm": 0.0013550950679928064, "learning_rate": 6.522792669998486e-06, "loss": 0.0027, "step": 8900 }, { "epoch": 2.033166742389823, "grad_norm": 0.0009673606837168336, "learning_rate": 6.447069513857338e-06, "loss": 0.0091, "step": 8950 }, { "epoch": 2.044525215810995, "grad_norm": 0.0004022994253318757, "learning_rate": 6.371346357716191e-06, "loss": 0.0003, "step": 9000 }, { "epoch": 2.055883689232167, "grad_norm": 0.0007892374997027218, "learning_rate": 6.2956232015750415e-06, "loss": 0.0024, "step": 9050 }, { "epoch": 2.0672421626533395, "grad_norm": 0.012260637246072292, "learning_rate": 6.219900045433894e-06, "loss": 0.0016, "step": 9100 }, { "epoch": 2.0786006360745115, "grad_norm": 0.002542909001931548, "learning_rate": 6.144176889292746e-06, "loss": 0.0001, "step": 9150 }, { "epoch": 2.089959109495684, "grad_norm": 0.002916930941864848, "learning_rate": 6.068453733151598e-06, "loss": 0.0073, "step": 9200 }, { "epoch": 2.101317582916856, "grad_norm": 0.0003816105308942497, "learning_rate": 5.99273057701045e-06, "loss": 0.0089, "step": 9250 }, { "epoch": 2.112676056338028, "grad_norm": 0.0005522597930394113, "learning_rate": 5.917007420869302e-06, "loss": 0.0049, "step": 9300 }, { "epoch": 2.1240345297592005, "grad_norm": 0.0011015296913683414, "learning_rate": 5.841284264728154e-06, "loss": 0.0066, "step": 9350 }, { "epoch": 2.1353930031803725, "grad_norm": 0.00041281242738477886, "learning_rate": 5.765561108587007e-06, "loss": 0.0019, "step": 9400 }, { "epoch": 2.146751476601545, "grad_norm": 0.0007952914456836879, "learning_rate": 5.689837952445858e-06, "loss": 0.0036, "step": 9450 }, { "epoch": 2.158109950022717, "grad_norm": 0.0004673805378843099, "learning_rate": 5.614114796304711e-06, "loss": 0.0042, "step": 9500 }, { "epoch": 2.169468423443889, "grad_norm": 0.00038412591675296426, "learning_rate": 5.538391640163563e-06, "loss": 0.0001, "step": 9550 }, { "epoch": 2.1808268968650615, "grad_norm": 0.00038422050420194864, "learning_rate": 5.462668484022414e-06, "loss": 0.0093, "step": 9600 }, { "epoch": 2.1921853702862335, "grad_norm": 0.0006754286587238312, "learning_rate": 5.3869453278812665e-06, "loss": 0.0028, "step": 9650 }, { "epoch": 2.2035438437074055, "grad_norm": 0.0004887759569101036, "learning_rate": 5.311222171740119e-06, "loss": 0.0051, "step": 9700 }, { "epoch": 2.214902317128578, "grad_norm": 0.05471280589699745, "learning_rate": 5.2354990155989705e-06, "loss": 0.0059, "step": 9750 }, { "epoch": 2.22626079054975, "grad_norm": 0.0008195925038307905, "learning_rate": 5.159775859457823e-06, "loss": 0.0036, "step": 9800 }, { "epoch": 2.2376192639709225, "grad_norm": 0.000433308829087764, "learning_rate": 5.0840527033166745e-06, "loss": 0.0022, "step": 9850 }, { "epoch": 2.2489777373920945, "grad_norm": 0.012868039309978485, "learning_rate": 5.008329547175527e-06, "loss": 0.0051, "step": 9900 }, { "epoch": 2.2603362108132665, "grad_norm": 0.5331919193267822, "learning_rate": 4.9326063910343785e-06, "loss": 0.0115, "step": 9950 }, { "epoch": 2.271694684234439, "grad_norm": 0.0005174391553737223, "learning_rate": 4.856883234893231e-06, "loss": 0.0007, "step": 10000 }, { "epoch": 2.283053157655611, "grad_norm": 0.00032180239213630557, "learning_rate": 4.7811600787520825e-06, "loss": 0.0095, "step": 10050 }, { "epoch": 2.294411631076783, "grad_norm": 0.07410291582345963, "learning_rate": 4.705436922610935e-06, "loss": 0.008, "step": 10100 }, { "epoch": 2.3057701044979555, "grad_norm": 0.024364503100514412, "learning_rate": 4.6297137664697866e-06, "loss": 0.0026, "step": 10150 }, { "epoch": 2.3171285779191275, "grad_norm": 0.00038883680826984346, "learning_rate": 4.553990610328639e-06, "loss": 0.0026, "step": 10200 }, { "epoch": 2.3284870513403, "grad_norm": 0.0038193499203771353, "learning_rate": 4.478267454187491e-06, "loss": 0.0001, "step": 10250 }, { "epoch": 2.339845524761472, "grad_norm": 0.0023336990270763636, "learning_rate": 4.402544298046343e-06, "loss": 0.0084, "step": 10300 }, { "epoch": 2.351203998182644, "grad_norm": 0.04238196834921837, "learning_rate": 4.326821141905195e-06, "loss": 0.0033, "step": 10350 }, { "epoch": 2.3625624716038165, "grad_norm": 0.000569637690205127, "learning_rate": 4.251097985764047e-06, "loss": 0.0012, "step": 10400 }, { "epoch": 2.3739209450249885, "grad_norm": 0.00034202905953861773, "learning_rate": 4.1753748296228995e-06, "loss": 0.0036, "step": 10450 }, { "epoch": 2.385279418446161, "grad_norm": 0.00034673314075917006, "learning_rate": 4.099651673481751e-06, "loss": 0.0013, "step": 10500 }, { "epoch": 2.396637891867333, "grad_norm": 0.0012820770498365164, "learning_rate": 4.023928517340603e-06, "loss": 0.0006, "step": 10550 }, { "epoch": 2.407996365288505, "grad_norm": 0.0017663290491327643, "learning_rate": 3.948205361199455e-06, "loss": 0.0002, "step": 10600 }, { "epoch": 2.4193548387096775, "grad_norm": 13.78622817993164, "learning_rate": 3.8724822050583075e-06, "loss": 0.0029, "step": 10650 }, { "epoch": 2.4307133121308495, "grad_norm": 0.00041380742914043367, "learning_rate": 3.796759048917159e-06, "loss": 0.0051, "step": 10700 }, { "epoch": 2.442071785552022, "grad_norm": 0.0015727384015917778, "learning_rate": 3.721035892776011e-06, "loss": 0.0002, "step": 10750 }, { "epoch": 2.453430258973194, "grad_norm": 0.00039928112528286874, "learning_rate": 3.6453127366348636e-06, "loss": 0.0041, "step": 10800 }, { "epoch": 2.464788732394366, "grad_norm": 0.0007231036433950067, "learning_rate": 3.569589580493715e-06, "loss": 0.0013, "step": 10850 }, { "epoch": 2.4761472058155385, "grad_norm": 0.0004043302033096552, "learning_rate": 3.493866424352567e-06, "loss": 0.0038, "step": 10900 }, { "epoch": 2.4875056792367105, "grad_norm": 0.00035005921381525695, "learning_rate": 3.418143268211419e-06, "loss": 0.0042, "step": 10950 }, { "epoch": 2.498864152657883, "grad_norm": 13.098333358764648, "learning_rate": 3.3424201120702716e-06, "loss": 0.0039, "step": 11000 }, { "epoch": 2.510222626079055, "grad_norm": 0.0030333756003528833, "learning_rate": 3.2666969559291232e-06, "loss": 0.0002, "step": 11050 }, { "epoch": 2.521581099500227, "grad_norm": 0.0002801103692036122, "learning_rate": 3.1909737997879752e-06, "loss": 0.0003, "step": 11100 }, { "epoch": 2.5329395729213995, "grad_norm": 0.5628141164779663, "learning_rate": 3.1152506436468277e-06, "loss": 0.0064, "step": 11150 }, { "epoch": 2.5442980463425715, "grad_norm": 0.002491287887096405, "learning_rate": 3.0395274875056797e-06, "loss": 0.0037, "step": 11200 }, { "epoch": 2.555656519763744, "grad_norm": 0.002808187622576952, "learning_rate": 2.9638043313645317e-06, "loss": 0.0028, "step": 11250 }, { "epoch": 2.567014993184916, "grad_norm": 0.043582722544670105, "learning_rate": 2.8880811752233833e-06, "loss": 0.002, "step": 11300 }, { "epoch": 2.578373466606088, "grad_norm": 0.00028100161580368876, "learning_rate": 2.8123580190822357e-06, "loss": 0.0072, "step": 11350 }, { "epoch": 2.5897319400272605, "grad_norm": 0.0003854044189210981, "learning_rate": 2.7366348629410877e-06, "loss": 0.0085, "step": 11400 }, { "epoch": 2.6010904134484325, "grad_norm": 41.185237884521484, "learning_rate": 2.6609117067999397e-06, "loss": 0.0031, "step": 11450 }, { "epoch": 2.612448886869605, "grad_norm": 0.0006349310860969126, "learning_rate": 2.5851885506587913e-06, "loss": 0.0001, "step": 11500 }, { "epoch": 2.623807360290777, "grad_norm": 0.0002767475671134889, "learning_rate": 2.5094653945176438e-06, "loss": 0.0065, "step": 11550 }, { "epoch": 2.635165833711949, "grad_norm": 0.002331246854737401, "learning_rate": 2.4337422383764958e-06, "loss": 0.006, "step": 11600 }, { "epoch": 2.646524307133121, "grad_norm": 0.0015208119293674827, "learning_rate": 2.3580190822353478e-06, "loss": 0.0037, "step": 11650 }, { "epoch": 2.6578827805542935, "grad_norm": 0.026821589097380638, "learning_rate": 2.2822959260942e-06, "loss": 0.0093, "step": 11700 }, { "epoch": 2.669241253975466, "grad_norm": 5.861845016479492, "learning_rate": 2.206572769953052e-06, "loss": 0.0003, "step": 11750 }, { "epoch": 2.680599727396638, "grad_norm": 0.00030583186889998615, "learning_rate": 2.130849613811904e-06, "loss": 0.0015, "step": 11800 }, { "epoch": 2.69195820081781, "grad_norm": 0.004688043612986803, "learning_rate": 2.055126457670756e-06, "loss": 0.0069, "step": 11850 }, { "epoch": 2.703316674238982, "grad_norm": 0.0014823823003098369, "learning_rate": 1.979403301529608e-06, "loss": 0.0011, "step": 11900 }, { "epoch": 2.7146751476601545, "grad_norm": 0.0003014960384462029, "learning_rate": 1.9036801453884599e-06, "loss": 0.0001, "step": 11950 }, { "epoch": 2.7260336210813265, "grad_norm": 0.0012561273761093616, "learning_rate": 1.827956989247312e-06, "loss": 0.0029, "step": 12000 }, { "epoch": 2.737392094502499, "grad_norm": 0.08546403795480728, "learning_rate": 1.7522338331061639e-06, "loss": 0.0027, "step": 12050 }, { "epoch": 2.748750567923671, "grad_norm": 0.0004358178994152695, "learning_rate": 1.676510676965016e-06, "loss": 0.0002, "step": 12100 }, { "epoch": 2.760109041344843, "grad_norm": 0.0006170666310936213, "learning_rate": 1.600787520823868e-06, "loss": 0.0035, "step": 12150 }, { "epoch": 2.7714675147660155, "grad_norm": 0.00022272793285083026, "learning_rate": 1.5250643646827201e-06, "loss": 0.0005, "step": 12200 }, { "epoch": 2.7828259881871875, "grad_norm": 0.0002821196976583451, "learning_rate": 1.449341208541572e-06, "loss": 0.0003, "step": 12250 }, { "epoch": 2.79418446160836, "grad_norm": 0.0002386348060099408, "learning_rate": 1.3736180524004241e-06, "loss": 0.0009, "step": 12300 }, { "epoch": 2.805542935029532, "grad_norm": 0.05056820809841156, "learning_rate": 1.2978948962592762e-06, "loss": 0.0016, "step": 12350 }, { "epoch": 2.816901408450704, "grad_norm": 0.0051028188318014145, "learning_rate": 1.2221717401181282e-06, "loss": 0.0005, "step": 12400 }, { "epoch": 2.8282598818718765, "grad_norm": 0.00031195359770208597, "learning_rate": 1.1464485839769802e-06, "loss": 0.0047, "step": 12450 }, { "epoch": 2.8396183552930485, "grad_norm": 0.0005304196383804083, "learning_rate": 1.0707254278358322e-06, "loss": 0.0005, "step": 12500 }, { "epoch": 2.850976828714221, "grad_norm": 0.004895700607448816, "learning_rate": 9.950022716946842e-07, "loss": 0.0023, "step": 12550 }, { "epoch": 2.862335302135393, "grad_norm": 0.00021093177201692015, "learning_rate": 9.192791155535363e-07, "loss": 0.0002, "step": 12600 }, { "epoch": 2.873693775556565, "grad_norm": 0.00027342038811184466, "learning_rate": 8.435559594123883e-07, "loss": 0.0002, "step": 12650 }, { "epoch": 2.8850522489777375, "grad_norm": 0.0002328462287550792, "learning_rate": 7.678328032712403e-07, "loss": 0.0001, "step": 12700 }, { "epoch": 2.8964107223989095, "grad_norm": 0.00026081790565513074, "learning_rate": 6.921096471300924e-07, "loss": 0.0008, "step": 12750 }, { "epoch": 2.907769195820082, "grad_norm": 0.07156021147966385, "learning_rate": 6.163864909889445e-07, "loss": 0.0015, "step": 12800 }, { "epoch": 2.919127669241254, "grad_norm": 0.002825228963047266, "learning_rate": 5.406633348477965e-07, "loss": 0.0001, "step": 12850 }, { "epoch": 2.930486142662426, "grad_norm": 0.0006366794114001095, "learning_rate": 4.6494017870664856e-07, "loss": 0.0049, "step": 12900 }, { "epoch": 2.9418446160835985, "grad_norm": 3.086198329925537, "learning_rate": 3.8921702256550057e-07, "loss": 0.0009, "step": 12950 }, { "epoch": 2.9532030895047705, "grad_norm": 0.000288445531623438, "learning_rate": 3.134938664243526e-07, "loss": 0.0003, "step": 13000 }, { "epoch": 2.964561562925943, "grad_norm": 0.00021304447727743536, "learning_rate": 2.3777071028320462e-07, "loss": 0.0006, "step": 13050 }, { "epoch": 2.975920036347115, "grad_norm": 0.0010498821502551436, "learning_rate": 1.6204755414205666e-07, "loss": 0.0001, "step": 13100 }, { "epoch": 2.987278509768287, "grad_norm": 0.00040187168633565307, "learning_rate": 8.632439800090868e-08, "loss": 0.0005, "step": 13150 }, { "epoch": 2.9986369831894595, "grad_norm": 0.0003258317010477185, "learning_rate": 1.0601241859760717e-08, "loss": 0.0092, "step": 13200 }, { "epoch": 3.0, "eval_accuracy": 0.9877524429967427, "eval_f1_macro": 0.9868791709426586, "eval_loss": 0.09756067395210266, "eval_runtime": 25.8171, "eval_samples_per_second": 297.284, "eval_steps_per_second": 4.648, "step": 13206 } ], "logging_steps": 50, "max_steps": 13206, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.341359158497128e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }