anishdhandore's picture
Upload folder using huggingface_hub
e3b04e1 verified
{
"best_global_step": 18484,
"best_metric": 0.30577707290649414,
"best_model_checkpoint": "/content/drive/MyDrive/DanceAI/roberta_goemotions_6cat/checkpoint-18484",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 18484,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005410084397316598,
"grad_norm": 0.5793700218200684,
"learning_rate": 1.997349058645315e-05,
"loss": 0.4819,
"step": 50
},
{
"epoch": 0.010820168794633196,
"grad_norm": 2.736356735229492,
"learning_rate": 1.9946440164466567e-05,
"loss": 0.4086,
"step": 100
},
{
"epoch": 0.016230253191949793,
"grad_norm": 2.994725227355957,
"learning_rate": 1.9919389742479985e-05,
"loss": 0.3826,
"step": 150
},
{
"epoch": 0.02164033758926639,
"grad_norm": 2.1226229667663574,
"learning_rate": 1.98923393204934e-05,
"loss": 0.3733,
"step": 200
},
{
"epoch": 0.02705042198658299,
"grad_norm": 2.5911431312561035,
"learning_rate": 1.986528889850682e-05,
"loss": 0.3587,
"step": 250
},
{
"epoch": 0.032460506383899586,
"grad_norm": 2.090728282928467,
"learning_rate": 1.9838238476520234e-05,
"loss": 0.3521,
"step": 300
},
{
"epoch": 0.037870590781216185,
"grad_norm": 2.1891307830810547,
"learning_rate": 1.9811188054533653e-05,
"loss": 0.3435,
"step": 350
},
{
"epoch": 0.04328067517853278,
"grad_norm": 2.6659998893737793,
"learning_rate": 1.9784137632547068e-05,
"loss": 0.3556,
"step": 400
},
{
"epoch": 0.04869075957584938,
"grad_norm": 2.140326499938965,
"learning_rate": 1.9757087210560487e-05,
"loss": 0.3345,
"step": 450
},
{
"epoch": 0.05410084397316598,
"grad_norm": 2.83417010307312,
"learning_rate": 1.9730036788573902e-05,
"loss": 0.3542,
"step": 500
},
{
"epoch": 0.05951092837048258,
"grad_norm": 2.66451358795166,
"learning_rate": 1.970298636658732e-05,
"loss": 0.3409,
"step": 550
},
{
"epoch": 0.06492101276779917,
"grad_norm": 3.015265941619873,
"learning_rate": 1.9675935944600736e-05,
"loss": 0.3515,
"step": 600
},
{
"epoch": 0.07033109716511578,
"grad_norm": 1.8967560529708862,
"learning_rate": 1.9648885522614155e-05,
"loss": 0.3372,
"step": 650
},
{
"epoch": 0.07574118156243237,
"grad_norm": 1.8910878896713257,
"learning_rate": 1.962183510062757e-05,
"loss": 0.3364,
"step": 700
},
{
"epoch": 0.08115126595974897,
"grad_norm": 1.7993358373641968,
"learning_rate": 1.959478467864099e-05,
"loss": 0.3415,
"step": 750
},
{
"epoch": 0.08656135035706557,
"grad_norm": 4.3601484298706055,
"learning_rate": 1.9567734256654404e-05,
"loss": 0.3106,
"step": 800
},
{
"epoch": 0.09197143475438217,
"grad_norm": 3.3065075874328613,
"learning_rate": 1.9540683834667822e-05,
"loss": 0.3251,
"step": 850
},
{
"epoch": 0.09738151915169876,
"grad_norm": 2.635791301727295,
"learning_rate": 1.9513633412681238e-05,
"loss": 0.3308,
"step": 900
},
{
"epoch": 0.10279160354901537,
"grad_norm": 2.1672942638397217,
"learning_rate": 1.9486582990694656e-05,
"loss": 0.3242,
"step": 950
},
{
"epoch": 0.10820168794633196,
"grad_norm": 1.772702693939209,
"learning_rate": 1.945953256870807e-05,
"loss": 0.3445,
"step": 1000
},
{
"epoch": 0.11361177234364857,
"grad_norm": 2.092458963394165,
"learning_rate": 1.943248214672149e-05,
"loss": 0.3317,
"step": 1050
},
{
"epoch": 0.11902185674096516,
"grad_norm": 3.1785027980804443,
"learning_rate": 1.9405431724734905e-05,
"loss": 0.3366,
"step": 1100
},
{
"epoch": 0.12443194113828175,
"grad_norm": 1.6550190448760986,
"learning_rate": 1.9378381302748324e-05,
"loss": 0.3245,
"step": 1150
},
{
"epoch": 0.12984202553559834,
"grad_norm": 2.842160701751709,
"learning_rate": 1.9351871889201474e-05,
"loss": 0.3141,
"step": 1200
},
{
"epoch": 0.13525210993291495,
"grad_norm": 3.7268848419189453,
"learning_rate": 1.932482146721489e-05,
"loss": 0.3217,
"step": 1250
},
{
"epoch": 0.14066219433023155,
"grad_norm": 2.2874679565429688,
"learning_rate": 1.9297771045228308e-05,
"loss": 0.313,
"step": 1300
},
{
"epoch": 0.14607227872754816,
"grad_norm": 2.965384006500244,
"learning_rate": 1.9270720623241723e-05,
"loss": 0.3112,
"step": 1350
},
{
"epoch": 0.15148236312486474,
"grad_norm": 2.0067362785339355,
"learning_rate": 1.924367020125514e-05,
"loss": 0.3207,
"step": 1400
},
{
"epoch": 0.15689244752218134,
"grad_norm": 1.780565619468689,
"learning_rate": 1.9216619779268557e-05,
"loss": 0.3186,
"step": 1450
},
{
"epoch": 0.16230253191949795,
"grad_norm": 1.802378535270691,
"learning_rate": 1.9189569357281975e-05,
"loss": 0.3236,
"step": 1500
},
{
"epoch": 0.16771261631681456,
"grad_norm": 2.4041481018066406,
"learning_rate": 1.916251893529539e-05,
"loss": 0.3316,
"step": 1550
},
{
"epoch": 0.17312270071413113,
"grad_norm": 2.3465633392333984,
"learning_rate": 1.913546851330881e-05,
"loss": 0.3288,
"step": 1600
},
{
"epoch": 0.17853278511144774,
"grad_norm": 2.623065948486328,
"learning_rate": 1.9108418091322228e-05,
"loss": 0.33,
"step": 1650
},
{
"epoch": 0.18394286950876435,
"grad_norm": 1.6979801654815674,
"learning_rate": 1.9081367669335643e-05,
"loss": 0.3234,
"step": 1700
},
{
"epoch": 0.18935295390608092,
"grad_norm": 2.876146078109741,
"learning_rate": 1.905431724734906e-05,
"loss": 0.3139,
"step": 1750
},
{
"epoch": 0.19476303830339753,
"grad_norm": 1.7591594457626343,
"learning_rate": 1.9027266825362477e-05,
"loss": 0.333,
"step": 1800
},
{
"epoch": 0.20017312270071413,
"grad_norm": 2.460012674331665,
"learning_rate": 1.9000216403375896e-05,
"loss": 0.3197,
"step": 1850
},
{
"epoch": 0.20558320709803074,
"grad_norm": 2.852691411972046,
"learning_rate": 1.897316598138931e-05,
"loss": 0.3177,
"step": 1900
},
{
"epoch": 0.21099329149534732,
"grad_norm": 2.61527419090271,
"learning_rate": 1.894611555940273e-05,
"loss": 0.3243,
"step": 1950
},
{
"epoch": 0.21640337589266392,
"grad_norm": 2.6071314811706543,
"learning_rate": 1.8919065137416145e-05,
"loss": 0.3098,
"step": 2000
},
{
"epoch": 0.22181346028998053,
"grad_norm": 3.4292609691619873,
"learning_rate": 1.8892014715429563e-05,
"loss": 0.3172,
"step": 2050
},
{
"epoch": 0.22722354468729714,
"grad_norm": 2.155167579650879,
"learning_rate": 1.886496429344298e-05,
"loss": 0.323,
"step": 2100
},
{
"epoch": 0.2326336290846137,
"grad_norm": 1.8653080463409424,
"learning_rate": 1.8837913871456397e-05,
"loss": 0.3273,
"step": 2150
},
{
"epoch": 0.23804371348193032,
"grad_norm": 2.2988572120666504,
"learning_rate": 1.8810863449469812e-05,
"loss": 0.3099,
"step": 2200
},
{
"epoch": 0.24345379787924692,
"grad_norm": 2.6169512271881104,
"learning_rate": 1.878381302748323e-05,
"loss": 0.3168,
"step": 2250
},
{
"epoch": 0.2488638822765635,
"grad_norm": 1.9101380109786987,
"learning_rate": 1.8756762605496646e-05,
"loss": 0.32,
"step": 2300
},
{
"epoch": 0.25427396667388014,
"grad_norm": 1.495621919631958,
"learning_rate": 1.8729712183510065e-05,
"loss": 0.3333,
"step": 2350
},
{
"epoch": 0.2596840510711967,
"grad_norm": 1.646639347076416,
"learning_rate": 1.870266176152348e-05,
"loss": 0.3238,
"step": 2400
},
{
"epoch": 0.2650941354685133,
"grad_norm": 2.967376708984375,
"learning_rate": 1.86756113395369e-05,
"loss": 0.3113,
"step": 2450
},
{
"epoch": 0.2705042198658299,
"grad_norm": 3.208625316619873,
"learning_rate": 1.8648560917550317e-05,
"loss": 0.3165,
"step": 2500
},
{
"epoch": 0.2759143042631465,
"grad_norm": 1.583409070968628,
"learning_rate": 1.8621510495563733e-05,
"loss": 0.3135,
"step": 2550
},
{
"epoch": 0.2813243886604631,
"grad_norm": 2.1211490631103516,
"learning_rate": 1.859446007357715e-05,
"loss": 0.3226,
"step": 2600
},
{
"epoch": 0.2867344730577797,
"grad_norm": 2.71584415435791,
"learning_rate": 1.8567409651590566e-05,
"loss": 0.3151,
"step": 2650
},
{
"epoch": 0.2921445574550963,
"grad_norm": 1.1264439821243286,
"learning_rate": 1.8540359229603985e-05,
"loss": 0.3015,
"step": 2700
},
{
"epoch": 0.29755464185241287,
"grad_norm": 2.2039103507995605,
"learning_rate": 1.85133088076174e-05,
"loss": 0.3199,
"step": 2750
},
{
"epoch": 0.3029647262497295,
"grad_norm": 2.2126784324645996,
"learning_rate": 1.848625838563082e-05,
"loss": 0.3084,
"step": 2800
},
{
"epoch": 0.3083748106470461,
"grad_norm": 1.9607175588607788,
"learning_rate": 1.8459207963644234e-05,
"loss": 0.3293,
"step": 2850
},
{
"epoch": 0.3137848950443627,
"grad_norm": 2.560570240020752,
"learning_rate": 1.8432157541657653e-05,
"loss": 0.3108,
"step": 2900
},
{
"epoch": 0.3191949794416793,
"grad_norm": 2.3421413898468018,
"learning_rate": 1.8405107119671068e-05,
"loss": 0.3171,
"step": 2950
},
{
"epoch": 0.3246050638389959,
"grad_norm": 2.5096020698547363,
"learning_rate": 1.8378056697684487e-05,
"loss": 0.3191,
"step": 3000
},
{
"epoch": 0.3300151482363125,
"grad_norm": 2.1862289905548096,
"learning_rate": 1.8351006275697902e-05,
"loss": 0.3191,
"step": 3050
},
{
"epoch": 0.3354252326336291,
"grad_norm": 2.0171096324920654,
"learning_rate": 1.832395585371132e-05,
"loss": 0.3148,
"step": 3100
},
{
"epoch": 0.34083531703094566,
"grad_norm": 2.438683032989502,
"learning_rate": 1.8296905431724736e-05,
"loss": 0.3046,
"step": 3150
},
{
"epoch": 0.34624540142826227,
"grad_norm": 1.8642085790634155,
"learning_rate": 1.8269855009738154e-05,
"loss": 0.3174,
"step": 3200
},
{
"epoch": 0.3516554858255789,
"grad_norm": 1.5657634735107422,
"learning_rate": 1.824280458775157e-05,
"loss": 0.3188,
"step": 3250
},
{
"epoch": 0.3570655702228955,
"grad_norm": 1.6534936428070068,
"learning_rate": 1.8215754165764988e-05,
"loss": 0.3071,
"step": 3300
},
{
"epoch": 0.3624756546202121,
"grad_norm": 2.6682286262512207,
"learning_rate": 1.8188703743778404e-05,
"loss": 0.323,
"step": 3350
},
{
"epoch": 0.3678857390175287,
"grad_norm": 2.3177874088287354,
"learning_rate": 1.8161653321791822e-05,
"loss": 0.3265,
"step": 3400
},
{
"epoch": 0.3732958234148453,
"grad_norm": 1.8575387001037598,
"learning_rate": 1.8134602899805237e-05,
"loss": 0.3235,
"step": 3450
},
{
"epoch": 0.37870590781216185,
"grad_norm": 2.2886006832122803,
"learning_rate": 1.8107552477818656e-05,
"loss": 0.3235,
"step": 3500
},
{
"epoch": 0.38411599220947845,
"grad_norm": 3.049358606338501,
"learning_rate": 1.808050205583207e-05,
"loss": 0.3121,
"step": 3550
},
{
"epoch": 0.38952607660679506,
"grad_norm": 1.3371657133102417,
"learning_rate": 1.805345163384549e-05,
"loss": 0.3136,
"step": 3600
},
{
"epoch": 0.39493616100411166,
"grad_norm": 2.506653308868408,
"learning_rate": 1.8026401211858905e-05,
"loss": 0.3203,
"step": 3650
},
{
"epoch": 0.40034624540142827,
"grad_norm": 2.045966148376465,
"learning_rate": 1.7999350789872324e-05,
"loss": 0.3289,
"step": 3700
},
{
"epoch": 0.4057563297987449,
"grad_norm": 2.645087957382202,
"learning_rate": 1.797230036788574e-05,
"loss": 0.3078,
"step": 3750
},
{
"epoch": 0.4111664141960615,
"grad_norm": 1.7792373895645142,
"learning_rate": 1.7945249945899158e-05,
"loss": 0.3109,
"step": 3800
},
{
"epoch": 0.41657649859337803,
"grad_norm": 2.439561128616333,
"learning_rate": 1.7918199523912573e-05,
"loss": 0.3116,
"step": 3850
},
{
"epoch": 0.42198658299069464,
"grad_norm": 1.7365974187850952,
"learning_rate": 1.789114910192599e-05,
"loss": 0.3195,
"step": 3900
},
{
"epoch": 0.42739666738801124,
"grad_norm": 1.4129964113235474,
"learning_rate": 1.7864098679939407e-05,
"loss": 0.3179,
"step": 3950
},
{
"epoch": 0.43280675178532785,
"grad_norm": 2.3082122802734375,
"learning_rate": 1.7837048257952825e-05,
"loss": 0.3006,
"step": 4000
},
{
"epoch": 0.43821683618264445,
"grad_norm": 2.0384387969970703,
"learning_rate": 1.780999783596624e-05,
"loss": 0.3022,
"step": 4050
},
{
"epoch": 0.44362692057996106,
"grad_norm": 3.2233986854553223,
"learning_rate": 1.778294741397966e-05,
"loss": 0.3185,
"step": 4100
},
{
"epoch": 0.44903700497727767,
"grad_norm": 3.3285160064697266,
"learning_rate": 1.7755896991993074e-05,
"loss": 0.3149,
"step": 4150
},
{
"epoch": 0.45444708937459427,
"grad_norm": 2.251983165740967,
"learning_rate": 1.7728846570006493e-05,
"loss": 0.3165,
"step": 4200
},
{
"epoch": 0.4598571737719108,
"grad_norm": 2.060551404953003,
"learning_rate": 1.770179614801991e-05,
"loss": 0.3141,
"step": 4250
},
{
"epoch": 0.4652672581692274,
"grad_norm": 1.4859846830368042,
"learning_rate": 1.7674745726033327e-05,
"loss": 0.3181,
"step": 4300
},
{
"epoch": 0.47067734256654403,
"grad_norm": 2.066725730895996,
"learning_rate": 1.7647695304046742e-05,
"loss": 0.3147,
"step": 4350
},
{
"epoch": 0.47608742696386064,
"grad_norm": 1.7268898487091064,
"learning_rate": 1.762064488206016e-05,
"loss": 0.3117,
"step": 4400
},
{
"epoch": 0.48149751136117724,
"grad_norm": 2.5234525203704834,
"learning_rate": 1.7593594460073576e-05,
"loss": 0.3081,
"step": 4450
},
{
"epoch": 0.48690759575849385,
"grad_norm": 2.0419929027557373,
"learning_rate": 1.7566544038086995e-05,
"loss": 0.3315,
"step": 4500
},
{
"epoch": 0.49231768015581046,
"grad_norm": 1.9306811094284058,
"learning_rate": 1.7539493616100413e-05,
"loss": 0.3149,
"step": 4550
},
{
"epoch": 0.497727764553127,
"grad_norm": 1.1427522897720337,
"learning_rate": 1.7512443194113832e-05,
"loss": 0.3214,
"step": 4600
},
{
"epoch": 0.5031378489504437,
"grad_norm": 2.0656473636627197,
"learning_rate": 1.7485392772127247e-05,
"loss": 0.3226,
"step": 4650
},
{
"epoch": 0.5085479333477603,
"grad_norm": 2.1577677726745605,
"learning_rate": 1.7458342350140666e-05,
"loss": 0.3098,
"step": 4700
},
{
"epoch": 0.5139580177450769,
"grad_norm": 2.430352210998535,
"learning_rate": 1.743129192815408e-05,
"loss": 0.3103,
"step": 4750
},
{
"epoch": 0.5193681021423934,
"grad_norm": 2.0063912868499756,
"learning_rate": 1.74042415061675e-05,
"loss": 0.3062,
"step": 4800
},
{
"epoch": 0.52477818653971,
"grad_norm": 2.357673168182373,
"learning_rate": 1.7377191084180915e-05,
"loss": 0.3172,
"step": 4850
},
{
"epoch": 0.5301882709370266,
"grad_norm": 2.1735177040100098,
"learning_rate": 1.7350140662194334e-05,
"loss": 0.3051,
"step": 4900
},
{
"epoch": 0.5355983553343432,
"grad_norm": 2.143653154373169,
"learning_rate": 1.732309024020775e-05,
"loss": 0.3264,
"step": 4950
},
{
"epoch": 0.5410084397316598,
"grad_norm": 3.2294411659240723,
"learning_rate": 1.7296039818221167e-05,
"loss": 0.3168,
"step": 5000
},
{
"epoch": 0.5464185241289764,
"grad_norm": 3.9202592372894287,
"learning_rate": 1.7268989396234583e-05,
"loss": 0.3038,
"step": 5050
},
{
"epoch": 0.551828608526293,
"grad_norm": 2.067411422729492,
"learning_rate": 1.7241938974248e-05,
"loss": 0.3094,
"step": 5100
},
{
"epoch": 0.5572386929236096,
"grad_norm": 3.1209259033203125,
"learning_rate": 1.7214888552261416e-05,
"loss": 0.2853,
"step": 5150
},
{
"epoch": 0.5626487773209262,
"grad_norm": 2.2904303073883057,
"learning_rate": 1.7187838130274835e-05,
"loss": 0.334,
"step": 5200
},
{
"epoch": 0.5680588617182428,
"grad_norm": 2.144474983215332,
"learning_rate": 1.716078770828825e-05,
"loss": 0.3206,
"step": 5250
},
{
"epoch": 0.5734689461155594,
"grad_norm": 1.682051181793213,
"learning_rate": 1.71342782947414e-05,
"loss": 0.3098,
"step": 5300
},
{
"epoch": 0.578879030512876,
"grad_norm": 2.1887052059173584,
"learning_rate": 1.7107227872754815e-05,
"loss": 0.3051,
"step": 5350
},
{
"epoch": 0.5842891149101926,
"grad_norm": 1.5054807662963867,
"learning_rate": 1.7080177450768234e-05,
"loss": 0.3205,
"step": 5400
},
{
"epoch": 0.5896991993075092,
"grad_norm": 1.9038208723068237,
"learning_rate": 1.705312702878165e-05,
"loss": 0.3196,
"step": 5450
},
{
"epoch": 0.5951092837048257,
"grad_norm": 2.6153712272644043,
"learning_rate": 1.7026076606795068e-05,
"loss": 0.3096,
"step": 5500
},
{
"epoch": 0.6005193681021423,
"grad_norm": 1.7780612707138062,
"learning_rate": 1.6999026184808483e-05,
"loss": 0.3167,
"step": 5550
},
{
"epoch": 0.605929452499459,
"grad_norm": 1.6894149780273438,
"learning_rate": 1.6971975762821902e-05,
"loss": 0.3156,
"step": 5600
},
{
"epoch": 0.6113395368967756,
"grad_norm": 1.0445067882537842,
"learning_rate": 1.694492534083532e-05,
"loss": 0.3029,
"step": 5650
},
{
"epoch": 0.6167496212940922,
"grad_norm": 2.145226001739502,
"learning_rate": 1.6917874918848736e-05,
"loss": 0.3147,
"step": 5700
},
{
"epoch": 0.6221597056914088,
"grad_norm": 2.216933250427246,
"learning_rate": 1.6890824496862154e-05,
"loss": 0.3169,
"step": 5750
},
{
"epoch": 0.6275697900887254,
"grad_norm": 1.8254783153533936,
"learning_rate": 1.686377407487557e-05,
"loss": 0.2997,
"step": 5800
},
{
"epoch": 0.632979874486042,
"grad_norm": 2.0882210731506348,
"learning_rate": 1.6836723652888988e-05,
"loss": 0.3348,
"step": 5850
},
{
"epoch": 0.6383899588833586,
"grad_norm": 1.6735379695892334,
"learning_rate": 1.6809673230902403e-05,
"loss": 0.3073,
"step": 5900
},
{
"epoch": 0.6438000432806752,
"grad_norm": 2.022500514984131,
"learning_rate": 1.6782622808915822e-05,
"loss": 0.3188,
"step": 5950
},
{
"epoch": 0.6492101276779918,
"grad_norm": 1.9945366382598877,
"learning_rate": 1.6755572386929237e-05,
"loss": 0.2968,
"step": 6000
},
{
"epoch": 0.6546202120753084,
"grad_norm": 2.076066493988037,
"learning_rate": 1.6729062973382387e-05,
"loss": 0.3111,
"step": 6050
},
{
"epoch": 0.660030296472625,
"grad_norm": 2.083757162094116,
"learning_rate": 1.6702012551395802e-05,
"loss": 0.3083,
"step": 6100
},
{
"epoch": 0.6654403808699416,
"grad_norm": 1.9437183141708374,
"learning_rate": 1.667496212940922e-05,
"loss": 0.328,
"step": 6150
},
{
"epoch": 0.6708504652672582,
"grad_norm": 1.824617862701416,
"learning_rate": 1.6647911707422636e-05,
"loss": 0.3042,
"step": 6200
},
{
"epoch": 0.6762605496645747,
"grad_norm": 1.78602135181427,
"learning_rate": 1.6620861285436055e-05,
"loss": 0.3167,
"step": 6250
},
{
"epoch": 0.6816706340618913,
"grad_norm": 2.4211480617523193,
"learning_rate": 1.659381086344947e-05,
"loss": 0.3009,
"step": 6300
},
{
"epoch": 0.6870807184592079,
"grad_norm": 1.5942953824996948,
"learning_rate": 1.656676044146289e-05,
"loss": 0.3073,
"step": 6350
},
{
"epoch": 0.6924908028565245,
"grad_norm": 1.8902363777160645,
"learning_rate": 1.6539710019476304e-05,
"loss": 0.3289,
"step": 6400
},
{
"epoch": 0.6979008872538411,
"grad_norm": 1.8132948875427246,
"learning_rate": 1.6512659597489722e-05,
"loss": 0.3142,
"step": 6450
},
{
"epoch": 0.7033109716511577,
"grad_norm": 2.8677127361297607,
"learning_rate": 1.6485609175503138e-05,
"loss": 0.3035,
"step": 6500
},
{
"epoch": 0.7087210560484744,
"grad_norm": 1.7309463024139404,
"learning_rate": 1.6458558753516556e-05,
"loss": 0.3149,
"step": 6550
},
{
"epoch": 0.714131140445791,
"grad_norm": 1.587862253189087,
"learning_rate": 1.643150833152997e-05,
"loss": 0.3137,
"step": 6600
},
{
"epoch": 0.7195412248431076,
"grad_norm": 2.0411460399627686,
"learning_rate": 1.640445790954339e-05,
"loss": 0.3188,
"step": 6650
},
{
"epoch": 0.7249513092404242,
"grad_norm": 2.4019126892089844,
"learning_rate": 1.6377407487556805e-05,
"loss": 0.3053,
"step": 6700
},
{
"epoch": 0.7303613936377408,
"grad_norm": 1.4498906135559082,
"learning_rate": 1.6350357065570224e-05,
"loss": 0.3203,
"step": 6750
},
{
"epoch": 0.7357714780350574,
"grad_norm": 1.8330546617507935,
"learning_rate": 1.632330664358364e-05,
"loss": 0.3131,
"step": 6800
},
{
"epoch": 0.741181562432374,
"grad_norm": 2.0546889305114746,
"learning_rate": 1.6296256221597058e-05,
"loss": 0.3162,
"step": 6850
},
{
"epoch": 0.7465916468296906,
"grad_norm": 2.014270544052124,
"learning_rate": 1.6269205799610473e-05,
"loss": 0.3123,
"step": 6900
},
{
"epoch": 0.7520017312270071,
"grad_norm": 2.8432347774505615,
"learning_rate": 1.6242155377623892e-05,
"loss": 0.3155,
"step": 6950
},
{
"epoch": 0.7574118156243237,
"grad_norm": 1.6952821016311646,
"learning_rate": 1.6215104955637307e-05,
"loss": 0.3014,
"step": 7000
},
{
"epoch": 0.7628219000216403,
"grad_norm": 1.657174825668335,
"learning_rate": 1.6188054533650726e-05,
"loss": 0.2904,
"step": 7050
},
{
"epoch": 0.7682319844189569,
"grad_norm": 2.069336175918579,
"learning_rate": 1.616100411166414e-05,
"loss": 0.3098,
"step": 7100
},
{
"epoch": 0.7736420688162735,
"grad_norm": 2.974534749984741,
"learning_rate": 1.613395368967756e-05,
"loss": 0.3103,
"step": 7150
},
{
"epoch": 0.7790521532135901,
"grad_norm": 1.8722079992294312,
"learning_rate": 1.6106903267690978e-05,
"loss": 0.3181,
"step": 7200
},
{
"epoch": 0.7844622376109067,
"grad_norm": 1.8573060035705566,
"learning_rate": 1.6079852845704393e-05,
"loss": 0.3095,
"step": 7250
},
{
"epoch": 0.7898723220082233,
"grad_norm": 1.5203043222427368,
"learning_rate": 1.6052802423717812e-05,
"loss": 0.3062,
"step": 7300
},
{
"epoch": 0.7952824064055399,
"grad_norm": 1.3434637784957886,
"learning_rate": 1.6025752001731227e-05,
"loss": 0.315,
"step": 7350
},
{
"epoch": 0.8006924908028565,
"grad_norm": 2.0199999809265137,
"learning_rate": 1.5998701579744646e-05,
"loss": 0.301,
"step": 7400
},
{
"epoch": 0.8061025752001731,
"grad_norm": 1.8482146263122559,
"learning_rate": 1.597165115775806e-05,
"loss": 0.2947,
"step": 7450
},
{
"epoch": 0.8115126595974897,
"grad_norm": 2.2450520992279053,
"learning_rate": 1.594460073577148e-05,
"loss": 0.3098,
"step": 7500
},
{
"epoch": 0.8169227439948064,
"grad_norm": 2.0248937606811523,
"learning_rate": 1.59175503137849e-05,
"loss": 0.3125,
"step": 7550
},
{
"epoch": 0.822332828392123,
"grad_norm": 1.8997951745986938,
"learning_rate": 1.5890499891798314e-05,
"loss": 0.3054,
"step": 7600
},
{
"epoch": 0.8277429127894396,
"grad_norm": 1.7810888290405273,
"learning_rate": 1.5863449469811732e-05,
"loss": 0.3054,
"step": 7650
},
{
"epoch": 0.8331529971867561,
"grad_norm": 1.5169132947921753,
"learning_rate": 1.5836399047825147e-05,
"loss": 0.3051,
"step": 7700
},
{
"epoch": 0.8385630815840727,
"grad_norm": 1.8637396097183228,
"learning_rate": 1.5809348625838566e-05,
"loss": 0.3002,
"step": 7750
},
{
"epoch": 0.8439731659813893,
"grad_norm": 2.1024274826049805,
"learning_rate": 1.578229820385198e-05,
"loss": 0.2988,
"step": 7800
},
{
"epoch": 0.8493832503787059,
"grad_norm": 2.098560333251953,
"learning_rate": 1.57552477818654e-05,
"loss": 0.3231,
"step": 7850
},
{
"epoch": 0.8547933347760225,
"grad_norm": 2.609898328781128,
"learning_rate": 1.5728197359878815e-05,
"loss": 0.3076,
"step": 7900
},
{
"epoch": 0.8602034191733391,
"grad_norm": 1.3566176891326904,
"learning_rate": 1.5701146937892234e-05,
"loss": 0.3052,
"step": 7950
},
{
"epoch": 0.8656135035706557,
"grad_norm": 2.245142936706543,
"learning_rate": 1.567409651590565e-05,
"loss": 0.3079,
"step": 8000
},
{
"epoch": 0.8710235879679723,
"grad_norm": 2.1084117889404297,
"learning_rate": 1.5647046093919068e-05,
"loss": 0.3096,
"step": 8050
},
{
"epoch": 0.8764336723652889,
"grad_norm": 1.445786714553833,
"learning_rate": 1.5619995671932483e-05,
"loss": 0.3006,
"step": 8100
},
{
"epoch": 0.8818437567626055,
"grad_norm": 1.5979697704315186,
"learning_rate": 1.55929452499459e-05,
"loss": 0.3138,
"step": 8150
},
{
"epoch": 0.8872538411599221,
"grad_norm": 1.7282668352127075,
"learning_rate": 1.5565894827959317e-05,
"loss": 0.3102,
"step": 8200
},
{
"epoch": 0.8926639255572387,
"grad_norm": 1.5071444511413574,
"learning_rate": 1.5538844405972735e-05,
"loss": 0.2989,
"step": 8250
},
{
"epoch": 0.8980740099545553,
"grad_norm": 2.033233880996704,
"learning_rate": 1.551179398398615e-05,
"loss": 0.3162,
"step": 8300
},
{
"epoch": 0.9034840943518719,
"grad_norm": 1.9847067594528198,
"learning_rate": 1.548474356199957e-05,
"loss": 0.3208,
"step": 8350
},
{
"epoch": 0.9088941787491885,
"grad_norm": 1.9450207948684692,
"learning_rate": 1.5457693140012985e-05,
"loss": 0.2982,
"step": 8400
},
{
"epoch": 0.914304263146505,
"grad_norm": 2.0153732299804688,
"learning_rate": 1.5430642718026403e-05,
"loss": 0.2965,
"step": 8450
},
{
"epoch": 0.9197143475438216,
"grad_norm": 1.7769889831542969,
"learning_rate": 1.540359229603982e-05,
"loss": 0.3325,
"step": 8500
},
{
"epoch": 0.9251244319411382,
"grad_norm": 1.949591040611267,
"learning_rate": 1.5376541874053237e-05,
"loss": 0.3019,
"step": 8550
},
{
"epoch": 0.9305345163384549,
"grad_norm": 1.7794471979141235,
"learning_rate": 1.5349491452066652e-05,
"loss": 0.3156,
"step": 8600
},
{
"epoch": 0.9359446007357715,
"grad_norm": 1.8910231590270996,
"learning_rate": 1.532244103008007e-05,
"loss": 0.3186,
"step": 8650
},
{
"epoch": 0.9413546851330881,
"grad_norm": 1.9011621475219727,
"learning_rate": 1.5295390608093486e-05,
"loss": 0.3123,
"step": 8700
},
{
"epoch": 0.9467647695304047,
"grad_norm": 2.2883121967315674,
"learning_rate": 1.5268340186106905e-05,
"loss": 0.3039,
"step": 8750
},
{
"epoch": 0.9521748539277213,
"grad_norm": 1.4748516082763672,
"learning_rate": 1.5241289764120322e-05,
"loss": 0.3132,
"step": 8800
},
{
"epoch": 0.9575849383250379,
"grad_norm": 2.059274673461914,
"learning_rate": 1.5214239342133739e-05,
"loss": 0.3051,
"step": 8850
},
{
"epoch": 0.9629950227223545,
"grad_norm": 1.7099647521972656,
"learning_rate": 1.5187188920147156e-05,
"loss": 0.3168,
"step": 8900
},
{
"epoch": 0.9684051071196711,
"grad_norm": 1.8200836181640625,
"learning_rate": 1.5160138498160572e-05,
"loss": 0.3029,
"step": 8950
},
{
"epoch": 0.9738151915169877,
"grad_norm": 1.5641443729400635,
"learning_rate": 1.513308807617399e-05,
"loss": 0.2976,
"step": 9000
},
{
"epoch": 0.9792252759143043,
"grad_norm": 2.299715757369995,
"learning_rate": 1.5106037654187406e-05,
"loss": 0.3085,
"step": 9050
},
{
"epoch": 0.9846353603116209,
"grad_norm": 2.057692289352417,
"learning_rate": 1.5078987232200823e-05,
"loss": 0.3134,
"step": 9100
},
{
"epoch": 0.9900454447089374,
"grad_norm": 3.2328195571899414,
"learning_rate": 1.505193681021424e-05,
"loss": 0.3082,
"step": 9150
},
{
"epoch": 0.995455529106254,
"grad_norm": 1.7043819427490234,
"learning_rate": 1.5024886388227657e-05,
"loss": 0.3054,
"step": 9200
},
{
"epoch": 1.0,
"eval_accuracy": 0.8663521020073223,
"eval_hamming_loss": 0.1336478979926777,
"eval_jaccard_score": 0.460505302360813,
"eval_loss": 0.3095310926437378,
"eval_runtime": 65.5546,
"eval_samples_per_second": 483.322,
"eval_steps_per_second": 30.219,
"step": 9242
},
{
"epoch": 1.0008656135035707,
"grad_norm": 1.3396296501159668,
"learning_rate": 1.4997835966241074e-05,
"loss": 0.3116,
"step": 9250
},
{
"epoch": 1.0062756979008873,
"grad_norm": 1.8290706872940063,
"learning_rate": 1.4970785544254491e-05,
"loss": 0.2928,
"step": 9300
},
{
"epoch": 1.011685782298204,
"grad_norm": 1.7000977993011475,
"learning_rate": 1.4943735122267908e-05,
"loss": 0.302,
"step": 9350
},
{
"epoch": 1.0170958666955205,
"grad_norm": 1.5283267498016357,
"learning_rate": 1.4916684700281325e-05,
"loss": 0.304,
"step": 9400
},
{
"epoch": 1.0225059510928372,
"grad_norm": 2.1828360557556152,
"learning_rate": 1.4889634278294742e-05,
"loss": 0.2936,
"step": 9450
},
{
"epoch": 1.0279160354901538,
"grad_norm": 1.8878413438796997,
"learning_rate": 1.4862583856308159e-05,
"loss": 0.2837,
"step": 9500
},
{
"epoch": 1.0333261198874701,
"grad_norm": 2.093980073928833,
"learning_rate": 1.4835533434321576e-05,
"loss": 0.2787,
"step": 9550
},
{
"epoch": 1.0387362042847867,
"grad_norm": 2.5507097244262695,
"learning_rate": 1.4808483012334993e-05,
"loss": 0.2896,
"step": 9600
},
{
"epoch": 1.0441462886821034,
"grad_norm": 2.2995388507843018,
"learning_rate": 1.478143259034841e-05,
"loss": 0.3053,
"step": 9650
},
{
"epoch": 1.04955637307942,
"grad_norm": 2.930950880050659,
"learning_rate": 1.4754382168361826e-05,
"loss": 0.2963,
"step": 9700
},
{
"epoch": 1.0549664574767366,
"grad_norm": 2.29239821434021,
"learning_rate": 1.4727331746375243e-05,
"loss": 0.2886,
"step": 9750
},
{
"epoch": 1.0603765418740532,
"grad_norm": 2.4556353092193604,
"learning_rate": 1.470028132438866e-05,
"loss": 0.292,
"step": 9800
},
{
"epoch": 1.0657866262713698,
"grad_norm": 2.0122013092041016,
"learning_rate": 1.4673230902402077e-05,
"loss": 0.2896,
"step": 9850
},
{
"epoch": 1.0711967106686864,
"grad_norm": 2.613096237182617,
"learning_rate": 1.4646180480415494e-05,
"loss": 0.2939,
"step": 9900
},
{
"epoch": 1.076606795066003,
"grad_norm": 1.474997639656067,
"learning_rate": 1.4619130058428911e-05,
"loss": 0.307,
"step": 9950
},
{
"epoch": 1.0820168794633196,
"grad_norm": 2.1981139183044434,
"learning_rate": 1.459207963644233e-05,
"loss": 0.2986,
"step": 10000
},
{
"epoch": 1.0874269638606362,
"grad_norm": 1.849653959274292,
"learning_rate": 1.4565029214455747e-05,
"loss": 0.2939,
"step": 10050
},
{
"epoch": 1.0928370482579528,
"grad_norm": 1.8995155096054077,
"learning_rate": 1.4537978792469164e-05,
"loss": 0.3063,
"step": 10100
},
{
"epoch": 1.0982471326552694,
"grad_norm": 2.691397190093994,
"learning_rate": 1.451092837048258e-05,
"loss": 0.3049,
"step": 10150
},
{
"epoch": 1.103657217052586,
"grad_norm": 1.8928948640823364,
"learning_rate": 1.4483877948495997e-05,
"loss": 0.3065,
"step": 10200
},
{
"epoch": 1.1090673014499026,
"grad_norm": 3.0470404624938965,
"learning_rate": 1.4456827526509414e-05,
"loss": 0.2972,
"step": 10250
},
{
"epoch": 1.1144773858472192,
"grad_norm": 1.8250367641448975,
"learning_rate": 1.4429777104522831e-05,
"loss": 0.2881,
"step": 10300
},
{
"epoch": 1.1198874702445358,
"grad_norm": 2.1483049392700195,
"learning_rate": 1.4402726682536248e-05,
"loss": 0.2972,
"step": 10350
},
{
"epoch": 1.1252975546418524,
"grad_norm": 1.9604085683822632,
"learning_rate": 1.4375676260549665e-05,
"loss": 0.2895,
"step": 10400
},
{
"epoch": 1.130707639039169,
"grad_norm": 1.5873773097991943,
"learning_rate": 1.4348625838563082e-05,
"loss": 0.3044,
"step": 10450
},
{
"epoch": 1.1361177234364856,
"grad_norm": 3.0002996921539307,
"learning_rate": 1.43215754165765e-05,
"loss": 0.3158,
"step": 10500
},
{
"epoch": 1.1415278078338023,
"grad_norm": 1.7767727375030518,
"learning_rate": 1.4294524994589918e-05,
"loss": 0.2872,
"step": 10550
},
{
"epoch": 1.1469378922311189,
"grad_norm": 2.038928270339966,
"learning_rate": 1.4267474572603335e-05,
"loss": 0.2894,
"step": 10600
},
{
"epoch": 1.1523479766284355,
"grad_norm": 2.048238515853882,
"learning_rate": 1.4240424150616752e-05,
"loss": 0.296,
"step": 10650
},
{
"epoch": 1.157758061025752,
"grad_norm": 1.646410346031189,
"learning_rate": 1.4213373728630169e-05,
"loss": 0.2903,
"step": 10700
},
{
"epoch": 1.1631681454230687,
"grad_norm": 1.8049273490905762,
"learning_rate": 1.4186323306643585e-05,
"loss": 0.2963,
"step": 10750
},
{
"epoch": 1.1685782298203853,
"grad_norm": 2.3281192779541016,
"learning_rate": 1.4159272884657002e-05,
"loss": 0.3096,
"step": 10800
},
{
"epoch": 1.173988314217702,
"grad_norm": 2.3356218338012695,
"learning_rate": 1.413222246267042e-05,
"loss": 0.3096,
"step": 10850
},
{
"epoch": 1.1793983986150183,
"grad_norm": 2.3634541034698486,
"learning_rate": 1.4105172040683836e-05,
"loss": 0.2944,
"step": 10900
},
{
"epoch": 1.1848084830123349,
"grad_norm": 2.3536300659179688,
"learning_rate": 1.4078121618697253e-05,
"loss": 0.2938,
"step": 10950
},
{
"epoch": 1.1902185674096515,
"grad_norm": 2.2869951725006104,
"learning_rate": 1.4051612205150403e-05,
"loss": 0.298,
"step": 11000
},
{
"epoch": 1.195628651806968,
"grad_norm": 2.1792991161346436,
"learning_rate": 1.402456178316382e-05,
"loss": 0.2931,
"step": 11050
},
{
"epoch": 1.2010387362042847,
"grad_norm": 1.900748372077942,
"learning_rate": 1.3997511361177237e-05,
"loss": 0.2837,
"step": 11100
},
{
"epoch": 1.2064488206016013,
"grad_norm": 1.2853705883026123,
"learning_rate": 1.3970460939190654e-05,
"loss": 0.3129,
"step": 11150
},
{
"epoch": 1.211858904998918,
"grad_norm": 2.5928916931152344,
"learning_rate": 1.394341051720407e-05,
"loss": 0.2912,
"step": 11200
},
{
"epoch": 1.2172689893962345,
"grad_norm": 1.8576390743255615,
"learning_rate": 1.3916360095217488e-05,
"loss": 0.2999,
"step": 11250
},
{
"epoch": 1.2226790737935511,
"grad_norm": 1.708588719367981,
"learning_rate": 1.3889309673230905e-05,
"loss": 0.2923,
"step": 11300
},
{
"epoch": 1.2280891581908677,
"grad_norm": 2.3297269344329834,
"learning_rate": 1.3862259251244321e-05,
"loss": 0.2894,
"step": 11350
},
{
"epoch": 1.2334992425881843,
"grad_norm": 1.9831498861312866,
"learning_rate": 1.3835208829257738e-05,
"loss": 0.3088,
"step": 11400
},
{
"epoch": 1.238909326985501,
"grad_norm": 1.94819974899292,
"learning_rate": 1.3808158407271155e-05,
"loss": 0.28,
"step": 11450
},
{
"epoch": 1.2443194113828175,
"grad_norm": 2.235166549682617,
"learning_rate": 1.3781107985284572e-05,
"loss": 0.2992,
"step": 11500
},
{
"epoch": 1.2497294957801341,
"grad_norm": 3.5507967472076416,
"learning_rate": 1.375405756329799e-05,
"loss": 0.3057,
"step": 11550
},
{
"epoch": 1.2551395801774508,
"grad_norm": 2.4944968223571777,
"learning_rate": 1.3727007141311406e-05,
"loss": 0.2928,
"step": 11600
},
{
"epoch": 1.2605496645747674,
"grad_norm": 2.121217727661133,
"learning_rate": 1.3699956719324823e-05,
"loss": 0.2912,
"step": 11650
},
{
"epoch": 1.265959748972084,
"grad_norm": 1.9403678178787231,
"learning_rate": 1.367290629733824e-05,
"loss": 0.2787,
"step": 11700
},
{
"epoch": 1.2713698333694006,
"grad_norm": 2.8676600456237793,
"learning_rate": 1.3645855875351657e-05,
"loss": 0.2966,
"step": 11750
},
{
"epoch": 1.2767799177667172,
"grad_norm": 1.6059439182281494,
"learning_rate": 1.3618805453365074e-05,
"loss": 0.2953,
"step": 11800
},
{
"epoch": 1.2821900021640338,
"grad_norm": 1.5992461442947388,
"learning_rate": 1.359175503137849e-05,
"loss": 0.2858,
"step": 11850
},
{
"epoch": 1.2876000865613504,
"grad_norm": 2.614466667175293,
"learning_rate": 1.3564704609391908e-05,
"loss": 0.2905,
"step": 11900
},
{
"epoch": 1.293010170958667,
"grad_norm": 2.2092373371124268,
"learning_rate": 1.3537654187405325e-05,
"loss": 0.2939,
"step": 11950
},
{
"epoch": 1.2984202553559836,
"grad_norm": 1.8631094694137573,
"learning_rate": 1.3510603765418742e-05,
"loss": 0.3166,
"step": 12000
},
{
"epoch": 1.3038303397533002,
"grad_norm": 2.1259829998016357,
"learning_rate": 1.3483553343432159e-05,
"loss": 0.2941,
"step": 12050
},
{
"epoch": 1.3092404241506168,
"grad_norm": 2.443293333053589,
"learning_rate": 1.3456502921445575e-05,
"loss": 0.2841,
"step": 12100
},
{
"epoch": 1.3146505085479334,
"grad_norm": 1.8442476987838745,
"learning_rate": 1.3429452499458992e-05,
"loss": 0.2988,
"step": 12150
},
{
"epoch": 1.32006059294525,
"grad_norm": 2.4069666862487793,
"learning_rate": 1.340240207747241e-05,
"loss": 0.2897,
"step": 12200
},
{
"epoch": 1.3254706773425666,
"grad_norm": 3.02299427986145,
"learning_rate": 1.3375351655485826e-05,
"loss": 0.3033,
"step": 12250
},
{
"epoch": 1.3308807617398832,
"grad_norm": 1.7643568515777588,
"learning_rate": 1.3348301233499243e-05,
"loss": 0.285,
"step": 12300
},
{
"epoch": 1.3362908461371998,
"grad_norm": 1.788694977760315,
"learning_rate": 1.332125081151266e-05,
"loss": 0.2863,
"step": 12350
},
{
"epoch": 1.3417009305345164,
"grad_norm": 1.921799898147583,
"learning_rate": 1.3294200389526077e-05,
"loss": 0.3018,
"step": 12400
},
{
"epoch": 1.347111014931833,
"grad_norm": 1.9117213487625122,
"learning_rate": 1.3267149967539494e-05,
"loss": 0.2934,
"step": 12450
},
{
"epoch": 1.3525210993291497,
"grad_norm": 1.407914400100708,
"learning_rate": 1.3240099545552911e-05,
"loss": 0.2976,
"step": 12500
},
{
"epoch": 1.3579311837264663,
"grad_norm": 1.9407671689987183,
"learning_rate": 1.3213049123566328e-05,
"loss": 0.3091,
"step": 12550
},
{
"epoch": 1.3633412681237829,
"grad_norm": 2.697124481201172,
"learning_rate": 1.3185998701579745e-05,
"loss": 0.2967,
"step": 12600
},
{
"epoch": 1.3687513525210993,
"grad_norm": 2.1249263286590576,
"learning_rate": 1.3158948279593162e-05,
"loss": 0.3008,
"step": 12650
},
{
"epoch": 1.3741614369184159,
"grad_norm": 2.2134153842926025,
"learning_rate": 1.3131897857606579e-05,
"loss": 0.2971,
"step": 12700
},
{
"epoch": 1.3795715213157325,
"grad_norm": 2.4941091537475586,
"learning_rate": 1.3104847435619996e-05,
"loss": 0.2909,
"step": 12750
},
{
"epoch": 1.384981605713049,
"grad_norm": 2.3787550926208496,
"learning_rate": 1.3077797013633413e-05,
"loss": 0.3048,
"step": 12800
},
{
"epoch": 1.3903916901103657,
"grad_norm": 1.608185887336731,
"learning_rate": 1.305074659164683e-05,
"loss": 0.2942,
"step": 12850
},
{
"epoch": 1.3958017745076823,
"grad_norm": 2.492053270339966,
"learning_rate": 1.3023696169660246e-05,
"loss": 0.2814,
"step": 12900
},
{
"epoch": 1.4012118589049989,
"grad_norm": 1.7174736261367798,
"learning_rate": 1.2996645747673663e-05,
"loss": 0.3046,
"step": 12950
},
{
"epoch": 1.4066219433023155,
"grad_norm": 1.7538783550262451,
"learning_rate": 1.296959532568708e-05,
"loss": 0.2958,
"step": 13000
},
{
"epoch": 1.412032027699632,
"grad_norm": 2.1111483573913574,
"learning_rate": 1.2942544903700497e-05,
"loss": 0.295,
"step": 13050
},
{
"epoch": 1.4174421120969487,
"grad_norm": 1.6928726434707642,
"learning_rate": 1.2915494481713914e-05,
"loss": 0.2899,
"step": 13100
},
{
"epoch": 1.4228521964942653,
"grad_norm": 2.2727274894714355,
"learning_rate": 1.2888444059727333e-05,
"loss": 0.309,
"step": 13150
},
{
"epoch": 1.428262280891582,
"grad_norm": 1.9953638315200806,
"learning_rate": 1.286139363774075e-05,
"loss": 0.2911,
"step": 13200
},
{
"epoch": 1.4336723652888985,
"grad_norm": 1.7269879579544067,
"learning_rate": 1.2834343215754167e-05,
"loss": 0.3023,
"step": 13250
},
{
"epoch": 1.4390824496862151,
"grad_norm": 2.24790620803833,
"learning_rate": 1.2807292793767584e-05,
"loss": 0.2737,
"step": 13300
},
{
"epoch": 1.4444925340835317,
"grad_norm": 1.5782350301742554,
"learning_rate": 1.2780242371781e-05,
"loss": 0.2814,
"step": 13350
},
{
"epoch": 1.4499026184808483,
"grad_norm": 1.7679520845413208,
"learning_rate": 1.2753191949794417e-05,
"loss": 0.2947,
"step": 13400
},
{
"epoch": 1.455312702878165,
"grad_norm": 2.2966043949127197,
"learning_rate": 1.2726141527807834e-05,
"loss": 0.3022,
"step": 13450
},
{
"epoch": 1.4607227872754815,
"grad_norm": 2.3050730228424072,
"learning_rate": 1.2699091105821253e-05,
"loss": 0.2854,
"step": 13500
},
{
"epoch": 1.4661328716727982,
"grad_norm": 2.056730270385742,
"learning_rate": 1.267204068383467e-05,
"loss": 0.304,
"step": 13550
},
{
"epoch": 1.4715429560701148,
"grad_norm": 1.7711883783340454,
"learning_rate": 1.2644990261848087e-05,
"loss": 0.2874,
"step": 13600
},
{
"epoch": 1.4769530404674314,
"grad_norm": 2.4947686195373535,
"learning_rate": 1.2617939839861504e-05,
"loss": 0.2863,
"step": 13650
},
{
"epoch": 1.482363124864748,
"grad_norm": 2.20076060295105,
"learning_rate": 1.259088941787492e-05,
"loss": 0.2902,
"step": 13700
},
{
"epoch": 1.4877732092620644,
"grad_norm": 1.9857763051986694,
"learning_rate": 1.2563838995888338e-05,
"loss": 0.2976,
"step": 13750
},
{
"epoch": 1.493183293659381,
"grad_norm": 1.9671763181686401,
"learning_rate": 1.2536788573901755e-05,
"loss": 0.2987,
"step": 13800
},
{
"epoch": 1.4985933780566976,
"grad_norm": 2.057361602783203,
"learning_rate": 1.2509738151915172e-05,
"loss": 0.2935,
"step": 13850
},
{
"epoch": 1.5040034624540142,
"grad_norm": 1.8263319730758667,
"learning_rate": 1.2482687729928588e-05,
"loss": 0.2981,
"step": 13900
},
{
"epoch": 1.5094135468513308,
"grad_norm": 1.7946516275405884,
"learning_rate": 1.2455637307942005e-05,
"loss": 0.3111,
"step": 13950
},
{
"epoch": 1.5148236312486474,
"grad_norm": 1.6943495273590088,
"learning_rate": 1.2428586885955422e-05,
"loss": 0.2949,
"step": 14000
},
{
"epoch": 1.520233715645964,
"grad_norm": 1.7670172452926636,
"learning_rate": 1.240153646396884e-05,
"loss": 0.285,
"step": 14050
},
{
"epoch": 1.5256438000432806,
"grad_norm": 2.126786470413208,
"learning_rate": 1.2375027050421989e-05,
"loss": 0.2937,
"step": 14100
},
{
"epoch": 1.5310538844405972,
"grad_norm": 1.107203483581543,
"learning_rate": 1.2347976628435406e-05,
"loss": 0.2831,
"step": 14150
},
{
"epoch": 1.5364639688379138,
"grad_norm": 1.985102891921997,
"learning_rate": 1.2320926206448823e-05,
"loss": 0.3039,
"step": 14200
},
{
"epoch": 1.5418740532352304,
"grad_norm": 2.21325945854187,
"learning_rate": 1.229387578446224e-05,
"loss": 0.289,
"step": 14250
},
{
"epoch": 1.547284137632547,
"grad_norm": 1.7797402143478394,
"learning_rate": 1.2266825362475657e-05,
"loss": 0.2999,
"step": 14300
},
{
"epoch": 1.5526942220298636,
"grad_norm": 1.8672326803207397,
"learning_rate": 1.2239774940489074e-05,
"loss": 0.2992,
"step": 14350
},
{
"epoch": 1.5581043064271802,
"grad_norm": 1.652946949005127,
"learning_rate": 1.221272451850249e-05,
"loss": 0.3027,
"step": 14400
},
{
"epoch": 1.5635143908244968,
"grad_norm": 2.3132338523864746,
"learning_rate": 1.2185674096515908e-05,
"loss": 0.298,
"step": 14450
},
{
"epoch": 1.5689244752218134,
"grad_norm": 1.5479793548583984,
"learning_rate": 1.2158623674529324e-05,
"loss": 0.3063,
"step": 14500
},
{
"epoch": 1.57433455961913,
"grad_norm": 2.814436912536621,
"learning_rate": 1.2131573252542741e-05,
"loss": 0.2853,
"step": 14550
},
{
"epoch": 1.5797446440164467,
"grad_norm": 1.8235867023468018,
"learning_rate": 1.2104522830556158e-05,
"loss": 0.2854,
"step": 14600
},
{
"epoch": 1.5851547284137633,
"grad_norm": 2.052896499633789,
"learning_rate": 1.2077472408569575e-05,
"loss": 0.2952,
"step": 14650
},
{
"epoch": 1.5905648128110799,
"grad_norm": 1.8663949966430664,
"learning_rate": 1.2050421986582992e-05,
"loss": 0.2934,
"step": 14700
},
{
"epoch": 1.5959748972083965,
"grad_norm": 1.9430421590805054,
"learning_rate": 1.202337156459641e-05,
"loss": 0.294,
"step": 14750
},
{
"epoch": 1.601384981605713,
"grad_norm": 2.4206387996673584,
"learning_rate": 1.1996321142609826e-05,
"loss": 0.3056,
"step": 14800
},
{
"epoch": 1.6067950660030297,
"grad_norm": 2.826404333114624,
"learning_rate": 1.1969270720623243e-05,
"loss": 0.2945,
"step": 14850
},
{
"epoch": 1.6122051504003463,
"grad_norm": 1.83290433883667,
"learning_rate": 1.194222029863666e-05,
"loss": 0.3009,
"step": 14900
},
{
"epoch": 1.617615234797663,
"grad_norm": 1.7646769285202026,
"learning_rate": 1.1915169876650077e-05,
"loss": 0.283,
"step": 14950
},
{
"epoch": 1.6230253191949795,
"grad_norm": 1.6220637559890747,
"learning_rate": 1.1888119454663494e-05,
"loss": 0.3089,
"step": 15000
},
{
"epoch": 1.628435403592296,
"grad_norm": 1.905791163444519,
"learning_rate": 1.186106903267691e-05,
"loss": 0.289,
"step": 15050
},
{
"epoch": 1.6338454879896127,
"grad_norm": 1.5982850790023804,
"learning_rate": 1.1834018610690328e-05,
"loss": 0.2913,
"step": 15100
},
{
"epoch": 1.6392555723869293,
"grad_norm": 2.2029902935028076,
"learning_rate": 1.1806968188703745e-05,
"loss": 0.283,
"step": 15150
},
{
"epoch": 1.644665656784246,
"grad_norm": 2.1125001907348633,
"learning_rate": 1.1779917766717162e-05,
"loss": 0.2924,
"step": 15200
},
{
"epoch": 1.6500757411815625,
"grad_norm": 1.6996169090270996,
"learning_rate": 1.1752867344730578e-05,
"loss": 0.3027,
"step": 15250
},
{
"epoch": 1.6554858255788791,
"grad_norm": 1.54236900806427,
"learning_rate": 1.1725816922743995e-05,
"loss": 0.306,
"step": 15300
},
{
"epoch": 1.6608959099761957,
"grad_norm": 2.4978113174438477,
"learning_rate": 1.1698766500757412e-05,
"loss": 0.3037,
"step": 15350
},
{
"epoch": 1.6663059943735123,
"grad_norm": 1.9665488004684448,
"learning_rate": 1.167171607877083e-05,
"loss": 0.3019,
"step": 15400
},
{
"epoch": 1.671716078770829,
"grad_norm": 1.758878231048584,
"learning_rate": 1.1644665656784246e-05,
"loss": 0.3005,
"step": 15450
},
{
"epoch": 1.6771261631681456,
"grad_norm": 1.5586014986038208,
"learning_rate": 1.1617615234797663e-05,
"loss": 0.3004,
"step": 15500
},
{
"epoch": 1.6825362475654622,
"grad_norm": 1.176145315170288,
"learning_rate": 1.159056481281108e-05,
"loss": 0.2877,
"step": 15550
},
{
"epoch": 1.6879463319627788,
"grad_norm": 1.9588565826416016,
"learning_rate": 1.1563514390824497e-05,
"loss": 0.297,
"step": 15600
},
{
"epoch": 1.6933564163600954,
"grad_norm": 2.419602870941162,
"learning_rate": 1.1536463968837914e-05,
"loss": 0.303,
"step": 15650
},
{
"epoch": 1.698766500757412,
"grad_norm": 2.2711267471313477,
"learning_rate": 1.1509413546851331e-05,
"loss": 0.2947,
"step": 15700
},
{
"epoch": 1.7041765851547284,
"grad_norm": 2.223388433456421,
"learning_rate": 1.1482363124864748e-05,
"loss": 0.2741,
"step": 15750
},
{
"epoch": 1.709586669552045,
"grad_norm": 3.3137338161468506,
"learning_rate": 1.1455312702878165e-05,
"loss": 0.2979,
"step": 15800
},
{
"epoch": 1.7149967539493616,
"grad_norm": 1.5524086952209473,
"learning_rate": 1.1428262280891582e-05,
"loss": 0.2848,
"step": 15850
},
{
"epoch": 1.7204068383466782,
"grad_norm": 2.8329715728759766,
"learning_rate": 1.1401211858904999e-05,
"loss": 0.2835,
"step": 15900
},
{
"epoch": 1.7258169227439948,
"grad_norm": 1.8086459636688232,
"learning_rate": 1.1374161436918416e-05,
"loss": 0.2967,
"step": 15950
},
{
"epoch": 1.7312270071413114,
"grad_norm": 1.848944902420044,
"learning_rate": 1.1347652023371565e-05,
"loss": 0.2906,
"step": 16000
},
{
"epoch": 1.736637091538628,
"grad_norm": 1.8679676055908203,
"learning_rate": 1.1320601601384982e-05,
"loss": 0.308,
"step": 16050
},
{
"epoch": 1.7420471759359446,
"grad_norm": 2.902830123901367,
"learning_rate": 1.12935511793984e-05,
"loss": 0.2892,
"step": 16100
},
{
"epoch": 1.7474572603332612,
"grad_norm": 2.772718906402588,
"learning_rate": 1.1266500757411816e-05,
"loss": 0.2995,
"step": 16150
},
{
"epoch": 1.7528673447305778,
"grad_norm": 2.3687069416046143,
"learning_rate": 1.1239450335425233e-05,
"loss": 0.2875,
"step": 16200
},
{
"epoch": 1.7582774291278944,
"grad_norm": 2.0232176780700684,
"learning_rate": 1.121239991343865e-05,
"loss": 0.2951,
"step": 16250
},
{
"epoch": 1.763687513525211,
"grad_norm": 1.7384998798370361,
"learning_rate": 1.1185349491452067e-05,
"loss": 0.3026,
"step": 16300
},
{
"epoch": 1.7690975979225276,
"grad_norm": 2.728248357772827,
"learning_rate": 1.1158299069465484e-05,
"loss": 0.2839,
"step": 16350
},
{
"epoch": 1.7745076823198442,
"grad_norm": 2.3606762886047363,
"learning_rate": 1.11312486474789e-05,
"loss": 0.3013,
"step": 16400
},
{
"epoch": 1.7799177667171608,
"grad_norm": 1.9604283571243286,
"learning_rate": 1.1104198225492318e-05,
"loss": 0.2878,
"step": 16450
},
{
"epoch": 1.7853278511144772,
"grad_norm": 2.1552507877349854,
"learning_rate": 1.1077147803505735e-05,
"loss": 0.3008,
"step": 16500
},
{
"epoch": 1.7907379355117938,
"grad_norm": 1.8902772665023804,
"learning_rate": 1.1050097381519153e-05,
"loss": 0.2961,
"step": 16550
},
{
"epoch": 1.7961480199091104,
"grad_norm": 3.5575530529022217,
"learning_rate": 1.102304695953257e-05,
"loss": 0.291,
"step": 16600
},
{
"epoch": 1.801558104306427,
"grad_norm": 1.5154167413711548,
"learning_rate": 1.0995996537545987e-05,
"loss": 0.2858,
"step": 16650
},
{
"epoch": 1.8069681887037436,
"grad_norm": 2.2316699028015137,
"learning_rate": 1.0968946115559404e-05,
"loss": 0.2928,
"step": 16700
},
{
"epoch": 1.8123782731010603,
"grad_norm": 2.4230222702026367,
"learning_rate": 1.0941895693572821e-05,
"loss": 0.2979,
"step": 16750
},
{
"epoch": 1.8177883574983769,
"grad_norm": 2.080579996109009,
"learning_rate": 1.0914845271586238e-05,
"loss": 0.2957,
"step": 16800
},
{
"epoch": 1.8231984418956935,
"grad_norm": 1.679219365119934,
"learning_rate": 1.0887794849599655e-05,
"loss": 0.311,
"step": 16850
},
{
"epoch": 1.82860852629301,
"grad_norm": 3.401188611984253,
"learning_rate": 1.0860744427613072e-05,
"loss": 0.2906,
"step": 16900
},
{
"epoch": 1.8340186106903267,
"grad_norm": 2.529232978820801,
"learning_rate": 1.0833694005626489e-05,
"loss": 0.2924,
"step": 16950
},
{
"epoch": 1.8394286950876433,
"grad_norm": 2.5550222396850586,
"learning_rate": 1.0806643583639906e-05,
"loss": 0.3036,
"step": 17000
},
{
"epoch": 1.84483877948496,
"grad_norm": 2.1273419857025146,
"learning_rate": 1.0779593161653323e-05,
"loss": 0.2959,
"step": 17050
},
{
"epoch": 1.8502488638822765,
"grad_norm": 2.560131788253784,
"learning_rate": 1.075254273966674e-05,
"loss": 0.2903,
"step": 17100
},
{
"epoch": 1.855658948279593,
"grad_norm": 2.8354437351226807,
"learning_rate": 1.0725492317680156e-05,
"loss": 0.2945,
"step": 17150
},
{
"epoch": 1.8610690326769097,
"grad_norm": 1.8256155252456665,
"learning_rate": 1.0698441895693575e-05,
"loss": 0.3005,
"step": 17200
},
{
"epoch": 1.8664791170742263,
"grad_norm": 2.123786211013794,
"learning_rate": 1.0671391473706992e-05,
"loss": 0.2916,
"step": 17250
},
{
"epoch": 1.871889201471543,
"grad_norm": 2.604180097579956,
"learning_rate": 1.0644341051720409e-05,
"loss": 0.2676,
"step": 17300
},
{
"epoch": 1.8772992858688595,
"grad_norm": 2.563234806060791,
"learning_rate": 1.0617290629733826e-05,
"loss": 0.2913,
"step": 17350
},
{
"epoch": 1.8827093702661761,
"grad_norm": 1.5203748941421509,
"learning_rate": 1.0590240207747243e-05,
"loss": 0.2963,
"step": 17400
},
{
"epoch": 1.8881194546634927,
"grad_norm": 2.2248854637145996,
"learning_rate": 1.056318978576066e-05,
"loss": 0.2863,
"step": 17450
},
{
"epoch": 1.8935295390608093,
"grad_norm": 2.470536708831787,
"learning_rate": 1.0536139363774077e-05,
"loss": 0.2908,
"step": 17500
},
{
"epoch": 1.898939623458126,
"grad_norm": 2.862772226333618,
"learning_rate": 1.0509088941787494e-05,
"loss": 0.2777,
"step": 17550
},
{
"epoch": 1.9043497078554426,
"grad_norm": 1.9825083017349243,
"learning_rate": 1.048203851980091e-05,
"loss": 0.2876,
"step": 17600
},
{
"epoch": 1.9097597922527592,
"grad_norm": 2.3272595405578613,
"learning_rate": 1.0454988097814327e-05,
"loss": 0.2976,
"step": 17650
},
{
"epoch": 1.9151698766500758,
"grad_norm": 1.9145753383636475,
"learning_rate": 1.0427937675827744e-05,
"loss": 0.2906,
"step": 17700
},
{
"epoch": 1.9205799610473924,
"grad_norm": 2.9724373817443848,
"learning_rate": 1.0400887253841161e-05,
"loss": 0.2955,
"step": 17750
},
{
"epoch": 1.925990045444709,
"grad_norm": 2.3501229286193848,
"learning_rate": 1.0373836831854578e-05,
"loss": 0.3014,
"step": 17800
},
{
"epoch": 1.9314001298420256,
"grad_norm": 2.1227500438690186,
"learning_rate": 1.0346786409867995e-05,
"loss": 0.3032,
"step": 17850
},
{
"epoch": 1.9368102142393422,
"grad_norm": 2.2915961742401123,
"learning_rate": 1.0319735987881412e-05,
"loss": 0.3016,
"step": 17900
},
{
"epoch": 1.9422202986366588,
"grad_norm": 2.912987232208252,
"learning_rate": 1.0292685565894829e-05,
"loss": 0.3037,
"step": 17950
},
{
"epoch": 1.9476303830339754,
"grad_norm": 2.616238832473755,
"learning_rate": 1.0265635143908246e-05,
"loss": 0.3021,
"step": 18000
},
{
"epoch": 1.953040467431292,
"grad_norm": 2.370420455932617,
"learning_rate": 1.0239125730361394e-05,
"loss": 0.2975,
"step": 18050
},
{
"epoch": 1.9584505518286086,
"grad_norm": 3.267179012298584,
"learning_rate": 1.0212075308374811e-05,
"loss": 0.2798,
"step": 18100
},
{
"epoch": 1.9638606362259252,
"grad_norm": 3.6033174991607666,
"learning_rate": 1.0185024886388228e-05,
"loss": 0.2867,
"step": 18150
},
{
"epoch": 1.9692707206232418,
"grad_norm": 2.4550583362579346,
"learning_rate": 1.0157974464401645e-05,
"loss": 0.3088,
"step": 18200
},
{
"epoch": 1.9746808050205584,
"grad_norm": 1.8558053970336914,
"learning_rate": 1.0130924042415062e-05,
"loss": 0.2727,
"step": 18250
},
{
"epoch": 1.980090889417875,
"grad_norm": 2.0998899936676025,
"learning_rate": 1.0103873620428479e-05,
"loss": 0.2905,
"step": 18300
},
{
"epoch": 1.9855009738151916,
"grad_norm": 2.2105298042297363,
"learning_rate": 1.0076823198441896e-05,
"loss": 0.2912,
"step": 18350
},
{
"epoch": 1.9909110582125082,
"grad_norm": 1.970096230506897,
"learning_rate": 1.0049772776455313e-05,
"loss": 0.3149,
"step": 18400
},
{
"epoch": 1.9963211426098249,
"grad_norm": 2.061622381210327,
"learning_rate": 1.002272235446873e-05,
"loss": 0.2945,
"step": 18450
},
{
"epoch": 2.0,
"eval_accuracy": 0.8672147876951564,
"eval_hamming_loss": 0.13278521230484366,
"eval_jaccard_score": 0.5006501704330262,
"eval_loss": 0.30577707290649414,
"eval_runtime": 64.9813,
"eval_samples_per_second": 487.586,
"eval_steps_per_second": 30.486,
"step": 18484
}
],
"logging_steps": 50,
"max_steps": 36968,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.945210424147251e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}