turner_ml / ml_models /ner_model /trainer_state.json
aamirtaymoor's picture
Upload 17 files
f06a730 verified
{
"best_metric": 0.591158390045166,
"best_model_checkpoint": "/home/ubuntu/lf_output_V12_locked_warmup/checkpoint-15189",
"epoch": 90.0,
"global_step": 455670,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"learning_rate": 2.48e-07,
"loss": 0.3185,
"step": 500
},
{
"epoch": 0.2,
"learning_rate": 4.98e-07,
"loss": 0.3178,
"step": 1000
},
{
"epoch": 0.3,
"learning_rate": 7.480000000000001e-07,
"loss": 0.3206,
"step": 1500
},
{
"epoch": 0.4,
"learning_rate": 9.98e-07,
"loss": 0.3145,
"step": 2000
},
{
"epoch": 0.49,
"learning_rate": 1.2475000000000001e-06,
"loss": 0.3127,
"step": 2500
},
{
"epoch": 0.59,
"learning_rate": 1.4975e-06,
"loss": 0.3226,
"step": 3000
},
{
"epoch": 0.69,
"learning_rate": 1.7470000000000002e-06,
"loss": 0.3161,
"step": 3500
},
{
"epoch": 0.79,
"learning_rate": 1.997e-06,
"loss": 0.3184,
"step": 4000
},
{
"epoch": 0.89,
"learning_rate": 2.2470000000000003e-06,
"loss": 0.3248,
"step": 4500
},
{
"epoch": 0.99,
"learning_rate": 2.4970000000000004e-06,
"loss": 0.3276,
"step": 5000
},
{
"epoch": 1.0,
"eval_accuracy": 0.8403498029009913,
"eval_f1": 0.4677493043153217,
"eval_loss": 0.5944358706474304,
"eval_precision": 0.4114724150529777,
"eval_recall": 0.5418591224018475,
"eval_runtime": 23.8244,
"eval_samples_per_second": 127.978,
"eval_steps_per_second": 16.034,
"step": 5063
},
{
"epoch": 1.09,
"learning_rate": 2.4975364053461002e-06,
"loss": 0.3306,
"step": 5500
},
{
"epoch": 1.19,
"learning_rate": 2.4950428884899264e-06,
"loss": 0.3286,
"step": 6000
},
{
"epoch": 1.28,
"learning_rate": 2.4925493716337525e-06,
"loss": 0.3257,
"step": 6500
},
{
"epoch": 1.38,
"learning_rate": 2.4900558547775786e-06,
"loss": 0.3242,
"step": 7000
},
{
"epoch": 1.48,
"learning_rate": 2.4875623379214043e-06,
"loss": 0.3223,
"step": 7500
},
{
"epoch": 1.58,
"learning_rate": 2.4850738080989428e-06,
"loss": 0.3276,
"step": 8000
},
{
"epoch": 1.68,
"learning_rate": 2.482580291242769e-06,
"loss": 0.3321,
"step": 8500
},
{
"epoch": 1.78,
"learning_rate": 2.480086774386595e-06,
"loss": 0.3232,
"step": 9000
},
{
"epoch": 1.88,
"learning_rate": 2.477593257530421e-06,
"loss": 0.3214,
"step": 9500
},
{
"epoch": 1.98,
"learning_rate": 2.4751047277079596e-06,
"loss": 0.3293,
"step": 10000
},
{
"epoch": 2.0,
"eval_accuracy": 0.8365149833518313,
"eval_f1": 0.4657225216192505,
"eval_loss": 0.6071960926055908,
"eval_precision": 0.4115171650055371,
"eval_recall": 0.5363741339491916,
"eval_runtime": 24.2984,
"eval_samples_per_second": 125.482,
"eval_steps_per_second": 15.721,
"step": 10126
},
{
"epoch": 2.07,
"learning_rate": 2.472616197885498e-06,
"loss": 0.3365,
"step": 10500
},
{
"epoch": 2.17,
"learning_rate": 2.4701226810293237e-06,
"loss": 0.3266,
"step": 11000
},
{
"epoch": 2.27,
"learning_rate": 2.4676291641731503e-06,
"loss": 0.3354,
"step": 11500
},
{
"epoch": 2.37,
"learning_rate": 2.465135647316976e-06,
"loss": 0.3345,
"step": 12000
},
{
"epoch": 2.47,
"learning_rate": 2.462642130460802e-06,
"loss": 0.3273,
"step": 12500
},
{
"epoch": 2.57,
"learning_rate": 2.4601536006383405e-06,
"loss": 0.328,
"step": 13000
},
{
"epoch": 2.67,
"learning_rate": 2.4576600837821667e-06,
"loss": 0.3278,
"step": 13500
},
{
"epoch": 2.77,
"learning_rate": 2.455166566925993e-06,
"loss": 0.316,
"step": 14000
},
{
"epoch": 2.86,
"learning_rate": 2.4526730500698185e-06,
"loss": 0.3277,
"step": 14500
},
{
"epoch": 2.96,
"learning_rate": 2.4501795332136446e-06,
"loss": 0.3325,
"step": 15000
},
{
"epoch": 3.0,
"eval_accuracy": 0.8391633816831873,
"eval_f1": 0.4654475777898113,
"eval_loss": 0.591158390045166,
"eval_precision": 0.41035622475211164,
"eval_recall": 0.5376250962278676,
"eval_runtime": 24.0176,
"eval_samples_per_second": 126.949,
"eval_steps_per_second": 15.905,
"step": 15189
},
{
"epoch": 3.06,
"learning_rate": 2.4476860163574707e-06,
"loss": 0.3239,
"step": 15500
},
{
"epoch": 3.16,
"learning_rate": 2.445192499501297e-06,
"loss": 0.3314,
"step": 16000
},
{
"epoch": 3.26,
"learning_rate": 2.4427039696788353e-06,
"loss": 0.3241,
"step": 16500
},
{
"epoch": 3.36,
"learning_rate": 2.4402104528226614e-06,
"loss": 0.3135,
"step": 17000
},
{
"epoch": 3.46,
"learning_rate": 2.437716935966487e-06,
"loss": 0.3267,
"step": 17500
},
{
"epoch": 3.56,
"learning_rate": 2.4352234191103133e-06,
"loss": 0.3215,
"step": 18000
},
{
"epoch": 3.65,
"learning_rate": 2.4327299022541394e-06,
"loss": 0.315,
"step": 18500
},
{
"epoch": 3.75,
"learning_rate": 2.4302363853979655e-06,
"loss": 0.323,
"step": 19000
},
{
"epoch": 3.85,
"learning_rate": 2.427742868541791e-06,
"loss": 0.3224,
"step": 19500
},
{
"epoch": 3.95,
"learning_rate": 2.4252493516856178e-06,
"loss": 0.3183,
"step": 20000
},
{
"epoch": 4.0,
"eval_accuracy": 0.8383711584829117,
"eval_f1": 0.4724905501889962,
"eval_loss": 0.5957902669906616,
"eval_precision": 0.4192129974660903,
"eval_recall": 0.5412817551963048,
"eval_runtime": 23.7397,
"eval_samples_per_second": 128.435,
"eval_steps_per_second": 16.091,
"step": 20252
},
{
"epoch": 4.05,
"learning_rate": 2.4227658088968685e-06,
"loss": 0.3154,
"step": 20500
},
{
"epoch": 4.15,
"learning_rate": 2.4202722920406942e-06,
"loss": 0.319,
"step": 21000
},
{
"epoch": 4.25,
"learning_rate": 2.4177787751845203e-06,
"loss": 0.3196,
"step": 21500
},
{
"epoch": 4.35,
"learning_rate": 2.4152852583283465e-06,
"loss": 0.3197,
"step": 22000
},
{
"epoch": 4.44,
"learning_rate": 2.4127917414721726e-06,
"loss": 0.3256,
"step": 22500
},
{
"epoch": 4.54,
"learning_rate": 2.4102982246159987e-06,
"loss": 0.3183,
"step": 23000
},
{
"epoch": 4.64,
"learning_rate": 2.407809694793537e-06,
"loss": 0.3209,
"step": 23500
},
{
"epoch": 4.74,
"learning_rate": 2.405316177937363e-06,
"loss": 0.3317,
"step": 24000
},
{
"epoch": 4.84,
"learning_rate": 2.402822661081189e-06,
"loss": 0.3144,
"step": 24500
},
{
"epoch": 4.94,
"learning_rate": 2.400329144225015e-06,
"loss": 0.312,
"step": 25000
},
{
"epoch": 5.0,
"eval_accuracy": 0.8374870833173869,
"eval_f1": 0.4686698121016472,
"eval_loss": 0.6015240550041199,
"eval_precision": 0.4111837327523602,
"eval_recall": 0.5448421862971516,
"eval_runtime": 24.0516,
"eval_samples_per_second": 126.769,
"eval_steps_per_second": 15.883,
"step": 25315
},
{
"epoch": 5.04,
"learning_rate": 2.3978356273688412e-06,
"loss": 0.323,
"step": 25500
},
{
"epoch": 5.14,
"learning_rate": 2.3953421105126674e-06,
"loss": 0.325,
"step": 26000
},
{
"epoch": 5.23,
"learning_rate": 2.392848593656493e-06,
"loss": 0.3243,
"step": 26500
},
{
"epoch": 5.33,
"learning_rate": 2.390360063834032e-06,
"loss": 0.3281,
"step": 27000
},
{
"epoch": 5.43,
"learning_rate": 2.3878665469778576e-06,
"loss": 0.3126,
"step": 27500
},
{
"epoch": 5.53,
"learning_rate": 2.3853730301216838e-06,
"loss": 0.3271,
"step": 28000
},
{
"epoch": 5.63,
"learning_rate": 2.38287951326551e-06,
"loss": 0.3153,
"step": 28500
},
{
"epoch": 5.73,
"learning_rate": 2.380385996409336e-06,
"loss": 0.3243,
"step": 29000
},
{
"epoch": 5.83,
"learning_rate": 2.3778924795531617e-06,
"loss": 0.3068,
"step": 29500
},
{
"epoch": 5.93,
"learning_rate": 2.3753989626969883e-06,
"loss": 0.3108,
"step": 30000
},
{
"epoch": 6.0,
"eval_accuracy": 0.8403842473879597,
"eval_f1": 0.47207586933614337,
"eval_loss": 0.60483717918396,
"eval_precision": 0.4200105002625066,
"eval_recall": 0.5388760585065435,
"eval_runtime": 24.2316,
"eval_samples_per_second": 125.828,
"eval_steps_per_second": 15.765,
"step": 30378
},
{
"epoch": 6.02,
"learning_rate": 2.372905445840814e-06,
"loss": 0.3109,
"step": 30500
},
{
"epoch": 6.12,
"learning_rate": 2.3704169160183524e-06,
"loss": 0.308,
"step": 31000
},
{
"epoch": 6.22,
"learning_rate": 2.367928386195891e-06,
"loss": 0.3135,
"step": 31500
},
{
"epoch": 6.32,
"learning_rate": 2.365434869339717e-06,
"loss": 0.3175,
"step": 32000
},
{
"epoch": 6.42,
"learning_rate": 2.362941352483543e-06,
"loss": 0.3083,
"step": 32500
},
{
"epoch": 6.52,
"learning_rate": 2.3604478356273692e-06,
"loss": 0.3176,
"step": 33000
},
{
"epoch": 6.62,
"learning_rate": 2.3579593058049072e-06,
"loss": 0.3176,
"step": 33500
},
{
"epoch": 6.72,
"learning_rate": 2.3554657889487334e-06,
"loss": 0.3123,
"step": 34000
},
{
"epoch": 6.81,
"learning_rate": 2.3529722720925595e-06,
"loss": 0.317,
"step": 34500
},
{
"epoch": 6.91,
"learning_rate": 2.3504787552363856e-06,
"loss": 0.3041,
"step": 35000
},
{
"epoch": 7.0,
"eval_accuracy": 0.8368594282215164,
"eval_f1": 0.4661911316898177,
"eval_loss": 0.617274284362793,
"eval_precision": 0.4114571828289522,
"eval_recall": 0.537721324095458,
"eval_runtime": 23.4998,
"eval_samples_per_second": 129.746,
"eval_steps_per_second": 16.255,
"step": 35441
},
{
"epoch": 7.01,
"learning_rate": 2.3479852383802117e-06,
"loss": 0.3207,
"step": 35500
},
{
"epoch": 7.11,
"learning_rate": 2.345491721524038e-06,
"loss": 0.3163,
"step": 36000
},
{
"epoch": 7.21,
"learning_rate": 2.3429982046678636e-06,
"loss": 0.3053,
"step": 36500
},
{
"epoch": 7.31,
"learning_rate": 2.34050468781169e-06,
"loss": 0.3114,
"step": 37000
},
{
"epoch": 7.41,
"learning_rate": 2.338011170955516e-06,
"loss": 0.3071,
"step": 37500
},
{
"epoch": 7.51,
"learning_rate": 2.335517654099342e-06,
"loss": 0.3131,
"step": 38000
},
{
"epoch": 7.6,
"learning_rate": 2.333024137243168e-06,
"loss": 0.3149,
"step": 38500
},
{
"epoch": 7.7,
"learning_rate": 2.330530620386994e-06,
"loss": 0.3185,
"step": 39000
},
{
"epoch": 7.8,
"learning_rate": 2.32803710353082e-06,
"loss": 0.3135,
"step": 39500
},
{
"epoch": 7.9,
"learning_rate": 2.3255535607420706e-06,
"loss": 0.3119,
"step": 40000
},
{
"epoch": 8.0,
"learning_rate": 2.3230600438858968e-06,
"loss": 0.3154,
"step": 40500
},
{
"epoch": 8.0,
"eval_accuracy": 0.834505721612002,
"eval_f1": 0.46665559338925333,
"eval_loss": 0.6168600916862488,
"eval_precision": 0.41044558071585097,
"eval_recall": 0.5407043879907621,
"eval_runtime": 24.2307,
"eval_samples_per_second": 125.832,
"eval_steps_per_second": 15.765,
"step": 40504
},
{
"epoch": 8.1,
"learning_rate": 2.320566527029723e-06,
"loss": 0.3061,
"step": 41000
},
{
"epoch": 8.2,
"learning_rate": 2.318073010173549e-06,
"loss": 0.3099,
"step": 41500
},
{
"epoch": 8.3,
"learning_rate": 2.315579493317375e-06,
"loss": 0.3216,
"step": 42000
},
{
"epoch": 8.39,
"learning_rate": 2.313085976461201e-06,
"loss": 0.3064,
"step": 42500
},
{
"epoch": 8.49,
"learning_rate": 2.3105924596050274e-06,
"loss": 0.3075,
"step": 43000
},
{
"epoch": 8.59,
"learning_rate": 2.308098942748853e-06,
"loss": 0.3126,
"step": 43500
},
{
"epoch": 8.69,
"learning_rate": 2.3056104129263915e-06,
"loss": 0.3141,
"step": 44000
},
{
"epoch": 8.79,
"learning_rate": 2.3031168960702177e-06,
"loss": 0.3132,
"step": 44500
},
{
"epoch": 8.89,
"learning_rate": 2.300623379214044e-06,
"loss": 0.3156,
"step": 45000
},
{
"epoch": 8.99,
"learning_rate": 2.298134849391582e-06,
"loss": 0.313,
"step": 45500
},
{
"epoch": 9.0,
"eval_accuracy": 0.8388112824830648,
"eval_f1": 0.467983144674874,
"eval_loss": 0.6226583123207092,
"eval_precision": 0.41001882148544955,
"eval_recall": 0.5450346420323325,
"eval_runtime": 23.4782,
"eval_samples_per_second": 129.865,
"eval_steps_per_second": 16.27,
"step": 45567
},
{
"epoch": 9.09,
"learning_rate": 2.2956413325354084e-06,
"loss": 0.3038,
"step": 46000
},
{
"epoch": 9.18,
"learning_rate": 2.293147815679234e-06,
"loss": 0.3101,
"step": 46500
},
{
"epoch": 9.28,
"learning_rate": 2.29065429882306e-06,
"loss": 0.3245,
"step": 47000
},
{
"epoch": 9.38,
"learning_rate": 2.2881607819668863e-06,
"loss": 0.3215,
"step": 47500
},
{
"epoch": 9.48,
"learning_rate": 2.2856672651107124e-06,
"loss": 0.316,
"step": 48000
},
{
"epoch": 9.58,
"learning_rate": 2.2831737482545386e-06,
"loss": 0.3109,
"step": 48500
},
{
"epoch": 9.68,
"learning_rate": 2.2806802313983647e-06,
"loss": 0.3119,
"step": 49000
},
{
"epoch": 9.78,
"learning_rate": 2.2781867145421904e-06,
"loss": 0.3092,
"step": 49500
},
{
"epoch": 9.88,
"learning_rate": 2.275703171753441e-06,
"loss": 0.3061,
"step": 50000
},
{
"epoch": 9.97,
"learning_rate": 2.2732096548972673e-06,
"loss": 0.3205,
"step": 50500
},
{
"epoch": 10.0,
"eval_accuracy": 0.8380420222741016,
"eval_f1": 0.4691952972456483,
"eval_loss": 0.607540488243103,
"eval_precision": 0.4128225747496162,
"eval_recall": 0.5433987682832948,
"eval_runtime": 23.5452,
"eval_samples_per_second": 129.496,
"eval_steps_per_second": 16.224,
"step": 50630
},
{
"epoch": 10.07,
"learning_rate": 2.2707161380410934e-06,
"loss": 0.2944,
"step": 51000
},
{
"epoch": 10.17,
"learning_rate": 2.268222621184919e-06,
"loss": 0.3022,
"step": 51500
},
{
"epoch": 10.27,
"learning_rate": 2.2657291043287456e-06,
"loss": 0.3006,
"step": 52000
},
{
"epoch": 10.37,
"learning_rate": 2.2632355874725713e-06,
"loss": 0.3047,
"step": 52500
},
{
"epoch": 10.47,
"learning_rate": 2.2607420706163975e-06,
"loss": 0.3105,
"step": 53000
},
{
"epoch": 10.57,
"learning_rate": 2.2582485537602236e-06,
"loss": 0.3173,
"step": 53500
},
{
"epoch": 10.67,
"learning_rate": 2.2557550369040497e-06,
"loss": 0.3052,
"step": 54000
},
{
"epoch": 10.76,
"learning_rate": 2.253261520047876e-06,
"loss": 0.2972,
"step": 54500
},
{
"epoch": 10.86,
"learning_rate": 2.2507779772591266e-06,
"loss": 0.299,
"step": 55000
},
{
"epoch": 10.96,
"learning_rate": 2.2482844604029523e-06,
"loss": 0.3055,
"step": 55500
},
{
"epoch": 11.0,
"eval_accuracy": 0.8372344904129512,
"eval_f1": 0.4717340848806366,
"eval_loss": 0.6190317869186401,
"eval_precision": 0.41431275480489227,
"eval_recall": 0.5476327944572749,
"eval_runtime": 24.5177,
"eval_samples_per_second": 124.359,
"eval_steps_per_second": 15.581,
"step": 55693
},
{
"epoch": 11.06,
"learning_rate": 2.245790943546779e-06,
"loss": 0.3127,
"step": 56000
},
{
"epoch": 11.16,
"learning_rate": 2.2432974266906046e-06,
"loss": 0.3065,
"step": 56500
},
{
"epoch": 11.26,
"learning_rate": 2.2408039098344307e-06,
"loss": 0.3004,
"step": 57000
},
{
"epoch": 11.36,
"learning_rate": 2.238310392978257e-06,
"loss": 0.2948,
"step": 57500
},
{
"epoch": 11.46,
"learning_rate": 2.235816876122083e-06,
"loss": 0.2973,
"step": 58000
},
{
"epoch": 11.55,
"learning_rate": 2.2333233592659086e-06,
"loss": 0.3115,
"step": 58500
},
{
"epoch": 11.65,
"learning_rate": 2.2308298424097348e-06,
"loss": 0.3011,
"step": 59000
},
{
"epoch": 11.75,
"learning_rate": 2.228341312587273e-06,
"loss": 0.3096,
"step": 59500
},
{
"epoch": 11.85,
"learning_rate": 2.2258527827648116e-06,
"loss": 0.3055,
"step": 60000
},
{
"epoch": 11.95,
"learning_rate": 2.2233592659086378e-06,
"loss": 0.3054,
"step": 60500
},
{
"epoch": 12.0,
"eval_accuracy": 0.8388572084656895,
"eval_f1": 0.4739519825085145,
"eval_loss": 0.627370297908783,
"eval_precision": 0.4208796953177507,
"eval_recall": 0.5423402617397999,
"eval_runtime": 24.2182,
"eval_samples_per_second": 125.897,
"eval_steps_per_second": 15.773,
"step": 60756
},
{
"epoch": 12.05,
"learning_rate": 2.220865749052464e-06,
"loss": 0.2957,
"step": 61000
},
{
"epoch": 12.15,
"learning_rate": 2.2183722321962896e-06,
"loss": 0.3049,
"step": 61500
},
{
"epoch": 12.25,
"learning_rate": 2.215878715340116e-06,
"loss": 0.2989,
"step": 62000
},
{
"epoch": 12.34,
"learning_rate": 2.213385198483942e-06,
"loss": 0.3047,
"step": 62500
},
{
"epoch": 12.44,
"learning_rate": 2.210891681627768e-06,
"loss": 0.3031,
"step": 63000
},
{
"epoch": 12.54,
"learning_rate": 2.208398164771594e-06,
"loss": 0.2882,
"step": 63500
},
{
"epoch": 12.64,
"learning_rate": 2.2059046479154202e-06,
"loss": 0.3107,
"step": 64000
},
{
"epoch": 12.74,
"learning_rate": 2.2034161180929582e-06,
"loss": 0.3006,
"step": 64500
},
{
"epoch": 12.84,
"learning_rate": 2.2009226012367848e-06,
"loss": 0.297,
"step": 65000
},
{
"epoch": 12.94,
"learning_rate": 2.1984290843806105e-06,
"loss": 0.3023,
"step": 65500
},
{
"epoch": 13.0,
"eval_accuracy": 0.8370890581346396,
"eval_f1": 0.47168787815564367,
"eval_loss": 0.6190006732940674,
"eval_precision": 0.4164149414278347,
"eval_recall": 0.5438799076212472,
"eval_runtime": 24.4283,
"eval_samples_per_second": 124.814,
"eval_steps_per_second": 15.638,
"step": 65819
},
{
"epoch": 13.04,
"learning_rate": 2.195940554558149e-06,
"loss": 0.3061,
"step": 66000
},
{
"epoch": 13.13,
"learning_rate": 2.193447037701975e-06,
"loss": 0.3053,
"step": 66500
},
{
"epoch": 13.23,
"learning_rate": 2.190953520845801e-06,
"loss": 0.3003,
"step": 67000
},
{
"epoch": 13.33,
"learning_rate": 2.1884600039896273e-06,
"loss": 0.2951,
"step": 67500
},
{
"epoch": 13.43,
"learning_rate": 2.1859664871334534e-06,
"loss": 0.2988,
"step": 68000
},
{
"epoch": 13.53,
"learning_rate": 2.183472970277279e-06,
"loss": 0.2967,
"step": 68500
},
{
"epoch": 13.63,
"learning_rate": 2.1809794534211052e-06,
"loss": 0.2943,
"step": 69000
},
{
"epoch": 13.73,
"learning_rate": 2.1784909235986437e-06,
"loss": 0.293,
"step": 69500
},
{
"epoch": 13.83,
"learning_rate": 2.17599740674247e-06,
"loss": 0.2929,
"step": 70000
},
{
"epoch": 13.92,
"learning_rate": 2.173503889886296e-06,
"loss": 0.3002,
"step": 70500
},
{
"epoch": 14.0,
"eval_accuracy": 0.8366718971257989,
"eval_f1": 0.46755174404221983,
"eval_loss": 0.6290284991264343,
"eval_precision": 0.40903188573077476,
"eval_recall": 0.5456120092378753,
"eval_runtime": 24.3972,
"eval_samples_per_second": 124.973,
"eval_steps_per_second": 15.658,
"step": 70882
},
{
"epoch": 14.02,
"learning_rate": 2.1710103730301216e-06,
"loss": 0.3033,
"step": 71000
},
{
"epoch": 14.12,
"learning_rate": 2.1685168561739478e-06,
"loss": 0.2906,
"step": 71500
},
{
"epoch": 14.22,
"learning_rate": 2.166023339317774e-06,
"loss": 0.294,
"step": 72000
},
{
"epoch": 14.32,
"learning_rate": 2.1635298224616e-06,
"loss": 0.2994,
"step": 72500
},
{
"epoch": 14.42,
"learning_rate": 2.161036305605426e-06,
"loss": 0.2927,
"step": 73000
},
{
"epoch": 14.52,
"learning_rate": 2.1585427887492523e-06,
"loss": 0.3221,
"step": 73500
},
{
"epoch": 14.62,
"learning_rate": 2.156049271893078e-06,
"loss": 0.294,
"step": 74000
},
{
"epoch": 14.71,
"learning_rate": 2.1535607420706164e-06,
"loss": 0.292,
"step": 74500
},
{
"epoch": 14.81,
"learning_rate": 2.1510672252144425e-06,
"loss": 0.2976,
"step": 75000
},
{
"epoch": 14.91,
"learning_rate": 2.1485737083582687e-06,
"loss": 0.2959,
"step": 75500
},
{
"epoch": 15.0,
"eval_accuracy": 0.8372536262390448,
"eval_f1": 0.4673331397519393,
"eval_loss": 0.6437515020370483,
"eval_precision": 0.4107181917608458,
"eval_recall": 0.5420515781370285,
"eval_runtime": 24.1488,
"eval_samples_per_second": 126.259,
"eval_steps_per_second": 15.819,
"step": 75945
},
{
"epoch": 15.01,
"learning_rate": 2.1460801915020948e-06,
"loss": 0.2922,
"step": 76000
},
{
"epoch": 15.11,
"learning_rate": 2.1435916616796332e-06,
"loss": 0.2853,
"step": 76500
},
{
"epoch": 15.21,
"learning_rate": 2.141098144823459e-06,
"loss": 0.2995,
"step": 77000
},
{
"epoch": 15.31,
"learning_rate": 2.1386046279672855e-06,
"loss": 0.2983,
"step": 77500
},
{
"epoch": 15.41,
"learning_rate": 2.136111111111111e-06,
"loss": 0.2869,
"step": 78000
},
{
"epoch": 15.5,
"learning_rate": 2.1336225812886496e-06,
"loss": 0.2956,
"step": 78500
},
{
"epoch": 15.6,
"learning_rate": 2.1311290644324757e-06,
"loss": 0.2963,
"step": 79000
},
{
"epoch": 15.7,
"learning_rate": 2.128635547576302e-06,
"loss": 0.2911,
"step": 79500
},
{
"epoch": 15.8,
"learning_rate": 2.1261420307201276e-06,
"loss": 0.2931,
"step": 80000
},
{
"epoch": 15.9,
"learning_rate": 2.1236535008976664e-06,
"loss": 0.2867,
"step": 80500
},
{
"epoch": 16.0,
"learning_rate": 2.121159984041492e-06,
"loss": 0.3072,
"step": 81000
},
{
"epoch": 16.0,
"eval_accuracy": 0.8390447395614069,
"eval_f1": 0.4751165113182424,
"eval_loss": 0.6363312602043152,
"eval_precision": 0.41854838709677417,
"eval_recall": 0.549364896073903,
"eval_runtime": 24.5371,
"eval_samples_per_second": 124.261,
"eval_steps_per_second": 15.568,
"step": 81008
},
{
"epoch": 16.1,
"learning_rate": 2.1186714542190306e-06,
"loss": 0.3032,
"step": 81500
},
{
"epoch": 16.2,
"learning_rate": 2.1161779373628567e-06,
"loss": 0.2825,
"step": 82000
},
{
"epoch": 16.29,
"learning_rate": 2.113684420506683e-06,
"loss": 0.3021,
"step": 82500
},
{
"epoch": 16.39,
"learning_rate": 2.1111958906842213e-06,
"loss": 0.2898,
"step": 83000
},
{
"epoch": 16.49,
"learning_rate": 2.1087023738280474e-06,
"loss": 0.2983,
"step": 83500
},
{
"epoch": 16.59,
"learning_rate": 2.1062088569718735e-06,
"loss": 0.2914,
"step": 84000
},
{
"epoch": 16.69,
"learning_rate": 2.1037153401156992e-06,
"loss": 0.2878,
"step": 84500
},
{
"epoch": 16.79,
"learning_rate": 2.1012218232595253e-06,
"loss": 0.2899,
"step": 85000
},
{
"epoch": 16.89,
"learning_rate": 2.0987283064033515e-06,
"loss": 0.2821,
"step": 85500
},
{
"epoch": 16.99,
"learning_rate": 2.0962347895471776e-06,
"loss": 0.2922,
"step": 86000
},
{
"epoch": 17.0,
"eval_accuracy": 0.8359906617168663,
"eval_f1": 0.47037298848184955,
"eval_loss": 0.6440777778625488,
"eval_precision": 0.4141465914915428,
"eval_recall": 0.544264819091609,
"eval_runtime": 23.6318,
"eval_samples_per_second": 129.021,
"eval_steps_per_second": 16.165,
"step": 86071
},
{
"epoch": 17.08,
"learning_rate": 2.0937412726910037e-06,
"loss": 0.2905,
"step": 86500
},
{
"epoch": 17.18,
"learning_rate": 2.0912477558348294e-06,
"loss": 0.2984,
"step": 87000
},
{
"epoch": 17.28,
"learning_rate": 2.0887542389786555e-06,
"loss": 0.2914,
"step": 87500
},
{
"epoch": 17.38,
"learning_rate": 2.0862607221224817e-06,
"loss": 0.2974,
"step": 88000
},
{
"epoch": 17.48,
"learning_rate": 2.083767205266308e-06,
"loss": 0.2833,
"step": 88500
},
{
"epoch": 17.58,
"learning_rate": 2.081273688410134e-06,
"loss": 0.2928,
"step": 89000
},
{
"epoch": 17.68,
"learning_rate": 2.07878017155396e-06,
"loss": 0.2865,
"step": 89500
},
{
"epoch": 17.78,
"learning_rate": 2.076291641731498e-06,
"loss": 0.2916,
"step": 90000
},
{
"epoch": 17.87,
"learning_rate": 2.0737981248753246e-06,
"loss": 0.29,
"step": 90500
},
{
"epoch": 17.97,
"learning_rate": 2.0713046080191503e-06,
"loss": 0.2917,
"step": 91000
},
{
"epoch": 18.0,
"eval_accuracy": 0.8377128860652915,
"eval_f1": 0.47028940245679784,
"eval_loss": 0.6375299096107483,
"eval_precision": 0.4145195625045878,
"eval_recall": 0.5433987682832948,
"eval_runtime": 23.5063,
"eval_samples_per_second": 129.71,
"eval_steps_per_second": 16.251,
"step": 91134
},
{
"epoch": 18.07,
"learning_rate": 2.0688110911629764e-06,
"loss": 0.2896,
"step": 91500
},
{
"epoch": 18.17,
"learning_rate": 2.066322561340515e-06,
"loss": 0.2799,
"step": 92000
},
{
"epoch": 18.27,
"learning_rate": 2.063829044484341e-06,
"loss": 0.2841,
"step": 92500
},
{
"epoch": 18.37,
"learning_rate": 2.0613355276281667e-06,
"loss": 0.2844,
"step": 93000
},
{
"epoch": 18.47,
"learning_rate": 2.0588420107719933e-06,
"loss": 0.29,
"step": 93500
},
{
"epoch": 18.57,
"learning_rate": 2.056348493915819e-06,
"loss": 0.2793,
"step": 94000
},
{
"epoch": 18.66,
"learning_rate": 2.0538599640933574e-06,
"loss": 0.2849,
"step": 94500
},
{
"epoch": 18.76,
"learning_rate": 2.0513664472371835e-06,
"loss": 0.3024,
"step": 95000
},
{
"epoch": 18.86,
"learning_rate": 2.0488729303810097e-06,
"loss": 0.2812,
"step": 95500
},
{
"epoch": 18.96,
"learning_rate": 2.0463794135248354e-06,
"loss": 0.2902,
"step": 96000
},
{
"epoch": 19.0,
"eval_accuracy": 0.8378315281870719,
"eval_f1": 0.473609487222315,
"eval_loss": 0.6511485576629639,
"eval_precision": 0.41833874299203305,
"eval_recall": 0.5457082371054658,
"eval_runtime": 24.4979,
"eval_samples_per_second": 124.459,
"eval_steps_per_second": 15.593,
"step": 96197
},
{
"epoch": 19.06,
"learning_rate": 2.043885896668662e-06,
"loss": 0.3001,
"step": 96500
},
{
"epoch": 19.16,
"learning_rate": 2.0413973668462e-06,
"loss": 0.2863,
"step": 97000
},
{
"epoch": 19.26,
"learning_rate": 2.038903849990026e-06,
"loss": 0.2861,
"step": 97500
},
{
"epoch": 19.36,
"learning_rate": 2.036410333133852e-06,
"loss": 0.2863,
"step": 98000
},
{
"epoch": 19.45,
"learning_rate": 2.0339168162776783e-06,
"loss": 0.2827,
"step": 98500
},
{
"epoch": 19.55,
"learning_rate": 2.031423299421504e-06,
"loss": 0.2832,
"step": 99000
},
{
"epoch": 19.65,
"learning_rate": 2.0289297825653305e-06,
"loss": 0.2847,
"step": 99500
},
{
"epoch": 19.75,
"learning_rate": 2.0264412527428686e-06,
"loss": 0.2889,
"step": 100000
},
{
"epoch": 19.85,
"learning_rate": 2.0239477358866947e-06,
"loss": 0.2835,
"step": 100500
},
{
"epoch": 19.95,
"learning_rate": 2.021454219030521e-06,
"loss": 0.2878,
"step": 101000
},
{
"epoch": 20.0,
"eval_accuracy": 0.8355620192123694,
"eval_f1": 0.46679081315952825,
"eval_loss": 0.6431704163551331,
"eval_precision": 0.4094968416467001,
"eval_recall": 0.5427251732101617,
"eval_runtime": 23.6323,
"eval_samples_per_second": 129.018,
"eval_steps_per_second": 16.164,
"step": 101260
},
{
"epoch": 20.05,
"learning_rate": 2.018960702174347e-06,
"loss": 0.2854,
"step": 101500
},
{
"epoch": 20.15,
"learning_rate": 2.016467185318173e-06,
"loss": 0.2831,
"step": 102000
},
{
"epoch": 20.24,
"learning_rate": 2.0139786554957115e-06,
"loss": 0.2859,
"step": 102500
},
{
"epoch": 20.34,
"learning_rate": 2.011485138639537e-06,
"loss": 0.2892,
"step": 103000
},
{
"epoch": 20.44,
"learning_rate": 2.0089916217833638e-06,
"loss": 0.2886,
"step": 103500
},
{
"epoch": 20.54,
"learning_rate": 2.0064981049271895e-06,
"loss": 0.2792,
"step": 104000
},
{
"epoch": 20.64,
"learning_rate": 2.0040045880710156e-06,
"loss": 0.2864,
"step": 104500
},
{
"epoch": 20.74,
"learning_rate": 2.0015110712148417e-06,
"loss": 0.274,
"step": 105000
},
{
"epoch": 20.84,
"learning_rate": 1.999017554358668e-06,
"loss": 0.2821,
"step": 105500
},
{
"epoch": 20.94,
"learning_rate": 1.9965240375024935e-06,
"loss": 0.2755,
"step": 106000
},
{
"epoch": 21.0,
"eval_accuracy": 0.8393623942745608,
"eval_f1": 0.47563336419493313,
"eval_loss": 0.6629786491394043,
"eval_precision": 0.4226626776364996,
"eval_recall": 0.5437836797536567,
"eval_runtime": 24.3778,
"eval_samples_per_second": 125.073,
"eval_steps_per_second": 15.67,
"step": 106323
},
{
"epoch": 21.03,
"learning_rate": 1.9940305206463197e-06,
"loss": 0.2888,
"step": 106500
},
{
"epoch": 21.13,
"learning_rate": 1.9915370037901458e-06,
"loss": 0.2889,
"step": 107000
},
{
"epoch": 21.23,
"learning_rate": 1.9890484739676842e-06,
"loss": 0.2746,
"step": 107500
},
{
"epoch": 21.33,
"learning_rate": 1.9865599441452227e-06,
"loss": 0.2697,
"step": 108000
},
{
"epoch": 21.43,
"learning_rate": 1.984066427289049e-06,
"loss": 0.2843,
"step": 108500
},
{
"epoch": 21.53,
"learning_rate": 1.9815729104328745e-06,
"loss": 0.2823,
"step": 109000
},
{
"epoch": 21.63,
"learning_rate": 1.9790793935767006e-06,
"loss": 0.2864,
"step": 109500
},
{
"epoch": 21.73,
"learning_rate": 1.9765858767205267e-06,
"loss": 0.2849,
"step": 110000
},
{
"epoch": 21.83,
"learning_rate": 1.974092359864353e-06,
"loss": 0.2831,
"step": 110500
},
{
"epoch": 21.92,
"learning_rate": 1.971598843008179e-06,
"loss": 0.2842,
"step": 111000
},
{
"epoch": 22.0,
"eval_accuracy": 0.8373914041869187,
"eval_f1": 0.47138215446138465,
"eval_loss": 0.64870285987854,
"eval_precision": 0.41560158660202734,
"eval_recall": 0.5444572748267898,
"eval_runtime": 23.5627,
"eval_samples_per_second": 129.399,
"eval_steps_per_second": 16.212,
"step": 111386
},
{
"epoch": 22.02,
"learning_rate": 1.9691053261520047e-06,
"loss": 0.2736,
"step": 111500
},
{
"epoch": 22.12,
"learning_rate": 1.966616796329543e-06,
"loss": 0.2822,
"step": 112000
},
{
"epoch": 22.22,
"learning_rate": 1.9641232794733693e-06,
"loss": 0.2809,
"step": 112500
},
{
"epoch": 22.32,
"learning_rate": 1.9616297626171954e-06,
"loss": 0.2829,
"step": 113000
},
{
"epoch": 22.42,
"learning_rate": 1.959141232794734e-06,
"loss": 0.2886,
"step": 113500
},
{
"epoch": 22.52,
"learning_rate": 1.95664771593856e-06,
"loss": 0.2802,
"step": 114000
},
{
"epoch": 22.62,
"learning_rate": 1.954154199082386e-06,
"loss": 0.2795,
"step": 114500
},
{
"epoch": 22.71,
"learning_rate": 1.951665669259924e-06,
"loss": 0.2797,
"step": 115000
},
{
"epoch": 22.81,
"learning_rate": 1.9491721524037506e-06,
"loss": 0.2828,
"step": 115500
},
{
"epoch": 22.91,
"learning_rate": 1.9466786355475763e-06,
"loss": 0.2845,
"step": 116000
},
{
"epoch": 23.0,
"eval_accuracy": 0.8386811588656282,
"eval_f1": 0.47448125836680055,
"eval_loss": 0.6472445726394653,
"eval_precision": 0.4197010065127294,
"eval_recall": 0.5457082371054658,
"eval_runtime": 23.7562,
"eval_samples_per_second": 128.345,
"eval_steps_per_second": 16.08,
"step": 116449
},
{
"epoch": 23.01,
"learning_rate": 1.9441851186914025e-06,
"loss": 0.282,
"step": 116500
},
{
"epoch": 23.11,
"learning_rate": 1.9416916018352286e-06,
"loss": 0.2757,
"step": 117000
},
{
"epoch": 23.21,
"learning_rate": 1.9391980849790547e-06,
"loss": 0.2837,
"step": 117500
},
{
"epoch": 23.31,
"learning_rate": 1.936704568122881e-06,
"loss": 0.2737,
"step": 118000
},
{
"epoch": 23.41,
"learning_rate": 1.9342110512667065e-06,
"loss": 0.2774,
"step": 118500
},
{
"epoch": 23.5,
"learning_rate": 1.9317175344105327e-06,
"loss": 0.2779,
"step": 119000
},
{
"epoch": 23.6,
"learning_rate": 1.929229004588071e-06,
"loss": 0.2776,
"step": 119500
},
{
"epoch": 23.7,
"learning_rate": 1.9267354877318972e-06,
"loss": 0.289,
"step": 120000
},
{
"epoch": 23.8,
"learning_rate": 1.9242419708757234e-06,
"loss": 0.2731,
"step": 120500
},
{
"epoch": 23.9,
"learning_rate": 1.9217484540195495e-06,
"loss": 0.2738,
"step": 121000
},
{
"epoch": 24.0,
"learning_rate": 1.919259924197088e-06,
"loss": 0.2877,
"step": 121500
},
{
"epoch": 24.0,
"eval_accuracy": 0.8382027632132879,
"eval_f1": 0.47804428816610156,
"eval_loss": 0.6589922308921814,
"eval_precision": 0.4230569756242128,
"eval_recall": 0.5494611239414935,
"eval_runtime": 23.7626,
"eval_samples_per_second": 128.311,
"eval_steps_per_second": 16.076,
"step": 121512
},
{
"epoch": 24.1,
"learning_rate": 1.9167664073409136e-06,
"loss": 0.2805,
"step": 122000
},
{
"epoch": 24.2,
"learning_rate": 1.9142728904847398e-06,
"loss": 0.2735,
"step": 122500
},
{
"epoch": 24.29,
"learning_rate": 1.911779373628566e-06,
"loss": 0.2688,
"step": 123000
},
{
"epoch": 24.39,
"learning_rate": 1.909285856772392e-06,
"loss": 0.2881,
"step": 123500
},
{
"epoch": 24.49,
"learning_rate": 1.9067923399162181e-06,
"loss": 0.2755,
"step": 124000
},
{
"epoch": 24.59,
"learning_rate": 1.904298823060044e-06,
"loss": 0.2799,
"step": 124500
},
{
"epoch": 24.69,
"learning_rate": 1.9018053062038702e-06,
"loss": 0.2716,
"step": 125000
},
{
"epoch": 24.79,
"learning_rate": 1.899311789347696e-06,
"loss": 0.2837,
"step": 125500
},
{
"epoch": 24.89,
"learning_rate": 1.8968182724915222e-06,
"loss": 0.2855,
"step": 126000
},
{
"epoch": 24.99,
"learning_rate": 1.8943247556353483e-06,
"loss": 0.2745,
"step": 126500
},
{
"epoch": 25.0,
"eval_accuracy": 0.8376172069348233,
"eval_f1": 0.47353063776573573,
"eval_loss": 0.6596588492393494,
"eval_precision": 0.41770848654213855,
"eval_recall": 0.5465742879137798,
"eval_runtime": 23.6288,
"eval_samples_per_second": 129.037,
"eval_steps_per_second": 16.167,
"step": 126575
},
{
"epoch": 25.08,
"learning_rate": 1.8918312387791742e-06,
"loss": 0.2715,
"step": 127000
},
{
"epoch": 25.18,
"learning_rate": 1.8893377219230001e-06,
"loss": 0.2714,
"step": 127500
},
{
"epoch": 25.28,
"learning_rate": 1.8868442050668265e-06,
"loss": 0.2827,
"step": 128000
},
{
"epoch": 25.38,
"learning_rate": 1.8843506882106524e-06,
"loss": 0.2745,
"step": 128500
},
{
"epoch": 25.48,
"learning_rate": 1.8818621583881908e-06,
"loss": 0.2717,
"step": 129000
},
{
"epoch": 25.58,
"learning_rate": 1.879368641532017e-06,
"loss": 0.2747,
"step": 129500
},
{
"epoch": 25.68,
"learning_rate": 1.8768751246758429e-06,
"loss": 0.269,
"step": 130000
},
{
"epoch": 25.78,
"learning_rate": 1.8743816078196688e-06,
"loss": 0.2766,
"step": 130500
},
{
"epoch": 25.87,
"learning_rate": 1.8718880909634951e-06,
"loss": 0.2684,
"step": 131000
},
{
"epoch": 25.97,
"learning_rate": 1.8693995611410334e-06,
"loss": 0.2714,
"step": 131500
},
{
"epoch": 26.0,
"eval_accuracy": 0.8359294270733667,
"eval_f1": 0.4756838269174381,
"eval_loss": 0.6624502539634705,
"eval_precision": 0.42118537200504413,
"eval_recall": 0.5463818321785989,
"eval_runtime": 24.4351,
"eval_samples_per_second": 124.78,
"eval_steps_per_second": 15.633,
"step": 131638
},
{
"epoch": 26.07,
"learning_rate": 1.8669060442848595e-06,
"loss": 0.2782,
"step": 132000
},
{
"epoch": 26.17,
"learning_rate": 1.8644125274286856e-06,
"loss": 0.2704,
"step": 132500
},
{
"epoch": 26.27,
"learning_rate": 1.8619190105725115e-06,
"loss": 0.2747,
"step": 133000
},
{
"epoch": 26.37,
"learning_rate": 1.8594254937163379e-06,
"loss": 0.2756,
"step": 133500
},
{
"epoch": 26.47,
"learning_rate": 1.8569319768601638e-06,
"loss": 0.2747,
"step": 134000
},
{
"epoch": 26.57,
"learning_rate": 1.8544484340714145e-06,
"loss": 0.2793,
"step": 134500
},
{
"epoch": 26.66,
"learning_rate": 1.8519549172152404e-06,
"loss": 0.2729,
"step": 135000
},
{
"epoch": 26.76,
"learning_rate": 1.8494614003590666e-06,
"loss": 0.2852,
"step": 135500
},
{
"epoch": 26.86,
"learning_rate": 1.8469678835028925e-06,
"loss": 0.2648,
"step": 136000
},
{
"epoch": 26.96,
"learning_rate": 1.8444743666467188e-06,
"loss": 0.2696,
"step": 136500
},
{
"epoch": 27.0,
"eval_accuracy": 0.8369244900302346,
"eval_f1": 0.47798795129965876,
"eval_loss": 0.6801736354827881,
"eval_precision": 0.42510303484451106,
"eval_recall": 0.5459006928406467,
"eval_runtime": 23.7599,
"eval_samples_per_second": 128.325,
"eval_steps_per_second": 16.077,
"step": 136701
},
{
"epoch": 27.06,
"learning_rate": 1.8419808497905447e-06,
"loss": 0.2712,
"step": 137000
},
{
"epoch": 27.16,
"learning_rate": 1.8394923199680832e-06,
"loss": 0.2618,
"step": 137500
},
{
"epoch": 27.26,
"learning_rate": 1.8369988031119093e-06,
"loss": 0.2696,
"step": 138000
},
{
"epoch": 27.36,
"learning_rate": 1.8345052862557352e-06,
"loss": 0.2811,
"step": 138500
},
{
"epoch": 27.45,
"learning_rate": 1.8320117693995611e-06,
"loss": 0.2694,
"step": 139000
},
{
"epoch": 27.55,
"learning_rate": 1.8295182525433875e-06,
"loss": 0.2752,
"step": 139500
},
{
"epoch": 27.65,
"learning_rate": 1.8270247356872134e-06,
"loss": 0.2695,
"step": 140000
},
{
"epoch": 27.75,
"learning_rate": 1.8245312188310393e-06,
"loss": 0.2798,
"step": 140500
},
{
"epoch": 27.85,
"learning_rate": 1.8220377019748656e-06,
"loss": 0.2761,
"step": 141000
},
{
"epoch": 27.95,
"learning_rate": 1.8195491721524039e-06,
"loss": 0.2731,
"step": 141500
},
{
"epoch": 28.0,
"eval_accuracy": 0.8383022695089747,
"eval_f1": 0.4802879383945761,
"eval_loss": 0.6660904884338379,
"eval_precision": 0.4249740779143831,
"eval_recall": 0.5521555042340262,
"eval_runtime": 24.3606,
"eval_samples_per_second": 125.161,
"eval_steps_per_second": 15.681,
"step": 141764
},
{
"epoch": 28.05,
"learning_rate": 1.8170556552962298e-06,
"loss": 0.2698,
"step": 142000
},
{
"epoch": 28.15,
"learning_rate": 1.8145621384400561e-06,
"loss": 0.264,
"step": 142500
},
{
"epoch": 28.24,
"learning_rate": 1.812068621583882e-06,
"loss": 0.2612,
"step": 143000
},
{
"epoch": 28.34,
"learning_rate": 1.809575104727708e-06,
"loss": 0.2699,
"step": 143500
},
{
"epoch": 28.44,
"learning_rate": 1.8070815878715343e-06,
"loss": 0.2689,
"step": 144000
},
{
"epoch": 28.54,
"learning_rate": 1.8045930580490725e-06,
"loss": 0.2776,
"step": 144500
},
{
"epoch": 28.64,
"learning_rate": 1.802104528226611e-06,
"loss": 0.2742,
"step": 145000
},
{
"epoch": 28.74,
"learning_rate": 1.799611011370437e-06,
"loss": 0.2666,
"step": 145500
},
{
"epoch": 28.84,
"learning_rate": 1.797117494514263e-06,
"loss": 0.2659,
"step": 146000
},
{
"epoch": 28.94,
"learning_rate": 1.7946239776580889e-06,
"loss": 0.2701,
"step": 146500
},
{
"epoch": 29.0,
"eval_accuracy": 0.8383520226568181,
"eval_f1": 0.47666694694358025,
"eval_loss": 0.6661925315856934,
"eval_precision": 0.42324921606689564,
"eval_recall": 0.5455157813702848,
"eval_runtime": 23.5786,
"eval_samples_per_second": 129.312,
"eval_steps_per_second": 16.201,
"step": 146827
},
{
"epoch": 29.03,
"learning_rate": 1.7921304608019152e-06,
"loss": 0.2604,
"step": 147000
},
{
"epoch": 29.13,
"learning_rate": 1.7896369439457411e-06,
"loss": 0.2697,
"step": 147500
},
{
"epoch": 29.23,
"learning_rate": 1.7871434270895675e-06,
"loss": 0.2711,
"step": 148000
},
{
"epoch": 29.33,
"learning_rate": 1.7846499102333934e-06,
"loss": 0.2653,
"step": 148500
},
{
"epoch": 29.43,
"learning_rate": 1.7821563933772193e-06,
"loss": 0.2611,
"step": 149000
},
{
"epoch": 29.53,
"learning_rate": 1.779667863554758e-06,
"loss": 0.2751,
"step": 149500
},
{
"epoch": 29.63,
"learning_rate": 1.7771743466985839e-06,
"loss": 0.2696,
"step": 150000
},
{
"epoch": 29.73,
"learning_rate": 1.7746808298424098e-06,
"loss": 0.265,
"step": 150500
},
{
"epoch": 29.82,
"learning_rate": 1.7721873129862361e-06,
"loss": 0.2761,
"step": 151000
},
{
"epoch": 29.92,
"learning_rate": 1.769693796130062e-06,
"loss": 0.2653,
"step": 151500
},
{
"epoch": 30.0,
"eval_accuracy": 0.8400933828313368,
"eval_f1": 0.48114688043432163,
"eval_loss": 0.67595374584198,
"eval_precision": 0.43018581721653393,
"eval_recall": 0.5458044649730562,
"eval_runtime": 24.4081,
"eval_samples_per_second": 124.917,
"eval_steps_per_second": 15.651,
"step": 151890
},
{
"epoch": 30.02,
"learning_rate": 1.7672052663076003e-06,
"loss": 0.2619,
"step": 152000
},
{
"epoch": 30.12,
"learning_rate": 1.7647117494514266e-06,
"loss": 0.2608,
"step": 152500
},
{
"epoch": 30.22,
"learning_rate": 1.7622182325952525e-06,
"loss": 0.2693,
"step": 153000
},
{
"epoch": 30.32,
"learning_rate": 1.7597247157390784e-06,
"loss": 0.2671,
"step": 153500
},
{
"epoch": 30.42,
"learning_rate": 1.7572311988829048e-06,
"loss": 0.2735,
"step": 154000
},
{
"epoch": 30.52,
"learning_rate": 1.754742669060443e-06,
"loss": 0.2573,
"step": 154500
},
{
"epoch": 30.61,
"learning_rate": 1.752249152204269e-06,
"loss": 0.2622,
"step": 155000
},
{
"epoch": 30.71,
"learning_rate": 1.7497556353480952e-06,
"loss": 0.2791,
"step": 155500
},
{
"epoch": 30.81,
"learning_rate": 1.7472621184919212e-06,
"loss": 0.2673,
"step": 156000
},
{
"epoch": 30.91,
"learning_rate": 1.7447735886694594e-06,
"loss": 0.2547,
"step": 156500
},
{
"epoch": 31.0,
"eval_accuracy": 0.8380305407784454,
"eval_f1": 0.4841168996188056,
"eval_loss": 0.6825248003005981,
"eval_precision": 0.43236495687698595,
"eval_recall": 0.5499422632794457,
"eval_runtime": 23.8749,
"eval_samples_per_second": 127.707,
"eval_steps_per_second": 16.0,
"step": 156953
},
{
"epoch": 31.01,
"learning_rate": 1.742285058846998e-06,
"loss": 0.2735,
"step": 157000
},
{
"epoch": 31.11,
"learning_rate": 1.739791541990824e-06,
"loss": 0.2532,
"step": 157500
},
{
"epoch": 31.21,
"learning_rate": 1.7372980251346499e-06,
"loss": 0.2748,
"step": 158000
},
{
"epoch": 31.31,
"learning_rate": 1.7348045082784762e-06,
"loss": 0.2593,
"step": 158500
},
{
"epoch": 31.4,
"learning_rate": 1.7323109914223021e-06,
"loss": 0.2634,
"step": 159000
},
{
"epoch": 31.5,
"learning_rate": 1.729817474566128e-06,
"loss": 0.2645,
"step": 159500
},
{
"epoch": 31.6,
"learning_rate": 1.7273239577099544e-06,
"loss": 0.2637,
"step": 160000
},
{
"epoch": 31.7,
"learning_rate": 1.7248304408537803e-06,
"loss": 0.2652,
"step": 160500
},
{
"epoch": 31.8,
"learning_rate": 1.7223369239976064e-06,
"loss": 0.2757,
"step": 161000
},
{
"epoch": 31.9,
"learning_rate": 1.7198434071414325e-06,
"loss": 0.2655,
"step": 161500
},
{
"epoch": 32.0,
"learning_rate": 1.7173498902852584e-06,
"loss": 0.2682,
"step": 162000
},
{
"epoch": 32.0,
"eval_accuracy": 0.8379693061349458,
"eval_f1": 0.48521260841971653,
"eval_loss": 0.6861287355422974,
"eval_precision": 0.4329834629615646,
"eval_recall": 0.5517705927636644,
"eval_runtime": 23.7494,
"eval_samples_per_second": 128.382,
"eval_steps_per_second": 16.085,
"step": 162016
},
{
"epoch": 32.1,
"learning_rate": 1.714861360462797e-06,
"loss": 0.2703,
"step": 162500
},
{
"epoch": 32.19,
"learning_rate": 1.712367843606623e-06,
"loss": 0.2646,
"step": 163000
},
{
"epoch": 32.29,
"learning_rate": 1.709874326750449e-06,
"loss": 0.2559,
"step": 163500
},
{
"epoch": 32.39,
"learning_rate": 1.707380809894275e-06,
"loss": 0.2658,
"step": 164000
},
{
"epoch": 32.49,
"learning_rate": 1.7048872930381012e-06,
"loss": 0.2594,
"step": 164500
},
{
"epoch": 32.59,
"learning_rate": 1.702393776181927e-06,
"loss": 0.2635,
"step": 165000
},
{
"epoch": 32.69,
"learning_rate": 1.6999052463594655e-06,
"loss": 0.2641,
"step": 165500
},
{
"epoch": 32.79,
"learning_rate": 1.6974117295032917e-06,
"loss": 0.2594,
"step": 166000
},
{
"epoch": 32.89,
"learning_rate": 1.6949182126471176e-06,
"loss": 0.264,
"step": 166500
},
{
"epoch": 32.98,
"learning_rate": 1.6924246957909437e-06,
"loss": 0.2579,
"step": 167000
},
{
"epoch": 33.0,
"eval_accuracy": 0.8380075777871331,
"eval_f1": 0.4819972826086957,
"eval_loss": 0.6779205203056335,
"eval_precision": 0.43130699088145896,
"eval_recall": 0.546189376443418,
"eval_runtime": 23.6072,
"eval_samples_per_second": 129.156,
"eval_steps_per_second": 16.182,
"step": 167079
},
{
"epoch": 33.08,
"learning_rate": 1.6899361659684821e-06,
"loss": 0.2686,
"step": 167500
},
{
"epoch": 33.18,
"learning_rate": 1.687442649112308e-06,
"loss": 0.2653,
"step": 168000
},
{
"epoch": 33.28,
"learning_rate": 1.6849491322561342e-06,
"loss": 0.254,
"step": 168500
},
{
"epoch": 33.38,
"learning_rate": 1.6824556153999603e-06,
"loss": 0.2575,
"step": 169000
},
{
"epoch": 33.48,
"learning_rate": 1.6799620985437862e-06,
"loss": 0.2601,
"step": 169500
},
{
"epoch": 33.58,
"learning_rate": 1.6774735687213249e-06,
"loss": 0.2525,
"step": 170000
},
{
"epoch": 33.68,
"learning_rate": 1.6749800518651508e-06,
"loss": 0.2604,
"step": 170500
},
{
"epoch": 33.77,
"learning_rate": 1.6724865350089767e-06,
"loss": 0.2631,
"step": 171000
},
{
"epoch": 33.87,
"learning_rate": 1.6699980051865153e-06,
"loss": 0.2613,
"step": 171500
},
{
"epoch": 33.97,
"learning_rate": 1.6675044883303413e-06,
"loss": 0.2682,
"step": 172000
},
{
"epoch": 34.0,
"eval_accuracy": 0.8394504190745915,
"eval_f1": 0.488143978164449,
"eval_loss": 0.686414361000061,
"eval_precision": 0.43834252450980393,
"eval_recall": 0.5507120862201693,
"eval_runtime": 23.3121,
"eval_samples_per_second": 130.791,
"eval_steps_per_second": 16.386,
"step": 172142
},
{
"epoch": 34.07,
"learning_rate": 1.6650109714741672e-06,
"loss": 0.2538,
"step": 172500
},
{
"epoch": 34.17,
"learning_rate": 1.6625174546179935e-06,
"loss": 0.2737,
"step": 173000
},
{
"epoch": 34.27,
"learning_rate": 1.6600239377618194e-06,
"loss": 0.262,
"step": 173500
},
{
"epoch": 34.37,
"learning_rate": 1.6575304209056455e-06,
"loss": 0.2621,
"step": 174000
},
{
"epoch": 34.47,
"learning_rate": 1.6550369040494715e-06,
"loss": 0.2641,
"step": 174500
},
{
"epoch": 34.56,
"learning_rate": 1.6525433871932976e-06,
"loss": 0.269,
"step": 175000
},
{
"epoch": 34.66,
"learning_rate": 1.6500498703371237e-06,
"loss": 0.2574,
"step": 175500
},
{
"epoch": 34.76,
"learning_rate": 1.6475563534809496e-06,
"loss": 0.2641,
"step": 176000
},
{
"epoch": 34.86,
"learning_rate": 1.6450628366247755e-06,
"loss": 0.2542,
"step": 176500
},
{
"epoch": 34.96,
"learning_rate": 1.6425693197686019e-06,
"loss": 0.2576,
"step": 177000
},
{
"epoch": 35.0,
"eval_accuracy": 0.8376593057522294,
"eval_f1": 0.48530031168393556,
"eval_loss": 0.6970050930976868,
"eval_precision": 0.43153558052434454,
"eval_recall": 0.5543687451886066,
"eval_runtime": 24.2014,
"eval_samples_per_second": 125.984,
"eval_steps_per_second": 15.784,
"step": 177205
},
{
"epoch": 35.06,
"learning_rate": 1.6400758029124278e-06,
"loss": 0.2672,
"step": 177500
},
{
"epoch": 35.16,
"learning_rate": 1.6375972471573909e-06,
"loss": 0.2669,
"step": 178000
},
{
"epoch": 35.26,
"learning_rate": 1.6351037303012172e-06,
"loss": 0.2566,
"step": 178500
},
{
"epoch": 35.35,
"learning_rate": 1.6326102134450431e-06,
"loss": 0.261,
"step": 179000
},
{
"epoch": 35.45,
"learning_rate": 1.630116696588869e-06,
"loss": 0.2639,
"step": 179500
},
{
"epoch": 35.55,
"learning_rate": 1.6276231797326951e-06,
"loss": 0.271,
"step": 180000
},
{
"epoch": 35.65,
"learning_rate": 1.6251296628765213e-06,
"loss": 0.2673,
"step": 180500
},
{
"epoch": 35.75,
"learning_rate": 1.6226361460203472e-06,
"loss": 0.2645,
"step": 181000
},
{
"epoch": 35.85,
"learning_rate": 1.6201426291641733e-06,
"loss": 0.2517,
"step": 181500
},
{
"epoch": 35.95,
"learning_rate": 1.6176491123079992e-06,
"loss": 0.2671,
"step": 182000
},
{
"epoch": 36.0,
"eval_accuracy": 0.8383175781698496,
"eval_f1": 0.4893308296346876,
"eval_loss": 0.6999177932739258,
"eval_precision": 0.43964723926380367,
"eval_recall": 0.5516743648960739,
"eval_runtime": 23.6795,
"eval_samples_per_second": 128.761,
"eval_steps_per_second": 16.132,
"step": 182268
},
{
"epoch": 36.05,
"learning_rate": 1.6151605824855377e-06,
"loss": 0.2671,
"step": 182500
},
{
"epoch": 36.14,
"learning_rate": 1.6126670656293638e-06,
"loss": 0.2579,
"step": 183000
},
{
"epoch": 36.24,
"learning_rate": 1.61017354877319e-06,
"loss": 0.2638,
"step": 183500
},
{
"epoch": 36.34,
"learning_rate": 1.6076800319170158e-06,
"loss": 0.2613,
"step": 184000
},
{
"epoch": 36.44,
"learning_rate": 1.605186515060842e-06,
"loss": 0.2675,
"step": 184500
},
{
"epoch": 36.54,
"learning_rate": 1.6026929982046679e-06,
"loss": 0.2473,
"step": 185000
},
{
"epoch": 36.64,
"learning_rate": 1.6002044683822063e-06,
"loss": 0.2561,
"step": 185500
},
{
"epoch": 36.74,
"learning_rate": 1.5977109515260324e-06,
"loss": 0.2563,
"step": 186000
},
{
"epoch": 36.84,
"learning_rate": 1.5952174346698586e-06,
"loss": 0.2556,
"step": 186500
},
{
"epoch": 36.93,
"learning_rate": 1.5927239178136847e-06,
"loss": 0.2556,
"step": 187000
},
{
"epoch": 37.0,
"eval_accuracy": 0.8364690573692066,
"eval_f1": 0.48543032353563115,
"eval_loss": 0.7063097357749939,
"eval_precision": 0.4330340300309364,
"eval_recall": 0.5522517321016166,
"eval_runtime": 23.9877,
"eval_samples_per_second": 127.107,
"eval_steps_per_second": 15.925,
"step": 187331
},
{
"epoch": 37.03,
"learning_rate": 1.5902304009575106e-06,
"loss": 0.2535,
"step": 187500
},
{
"epoch": 37.13,
"learning_rate": 1.5877368841013365e-06,
"loss": 0.2611,
"step": 188000
},
{
"epoch": 37.23,
"learning_rate": 1.5852483542788752e-06,
"loss": 0.2493,
"step": 188500
},
{
"epoch": 37.33,
"learning_rate": 1.582754837422701e-06,
"loss": 0.249,
"step": 189000
},
{
"epoch": 37.43,
"learning_rate": 1.580261320566527e-06,
"loss": 0.2495,
"step": 189500
},
{
"epoch": 37.53,
"learning_rate": 1.5777678037103533e-06,
"loss": 0.2591,
"step": 190000
},
{
"epoch": 37.63,
"learning_rate": 1.5752792738878916e-06,
"loss": 0.244,
"step": 190500
},
{
"epoch": 37.72,
"learning_rate": 1.5727857570317177e-06,
"loss": 0.258,
"step": 191000
},
{
"epoch": 37.82,
"learning_rate": 1.5702922401755438e-06,
"loss": 0.2564,
"step": 191500
},
{
"epoch": 37.92,
"learning_rate": 1.5677987233193697e-06,
"loss": 0.2557,
"step": 192000
},
{
"epoch": 38.0,
"eval_accuracy": 0.837223008917295,
"eval_f1": 0.48226590572817585,
"eval_loss": 0.7124492526054382,
"eval_precision": 0.4300686119279198,
"eval_recall": 0.5488837567359507,
"eval_runtime": 24.7192,
"eval_samples_per_second": 123.346,
"eval_steps_per_second": 15.454,
"step": 192394
},
{
"epoch": 38.02,
"learning_rate": 1.5653052064631956e-06,
"loss": 0.2488,
"step": 192500
},
{
"epoch": 38.12,
"learning_rate": 1.5628166766407343e-06,
"loss": 0.2465,
"step": 193000
},
{
"epoch": 38.22,
"learning_rate": 1.5603231597845602e-06,
"loss": 0.2465,
"step": 193500
},
{
"epoch": 38.32,
"learning_rate": 1.5578346299620986e-06,
"loss": 0.2622,
"step": 194000
},
{
"epoch": 38.42,
"learning_rate": 1.5553411131059248e-06,
"loss": 0.2587,
"step": 194500
},
{
"epoch": 38.51,
"learning_rate": 1.5528475962497509e-06,
"loss": 0.2482,
"step": 195000
},
{
"epoch": 38.61,
"learning_rate": 1.5503540793935768e-06,
"loss": 0.2426,
"step": 195500
},
{
"epoch": 38.71,
"learning_rate": 1.5478655495711153e-06,
"loss": 0.2529,
"step": 196000
},
{
"epoch": 38.81,
"learning_rate": 1.5453720327149414e-06,
"loss": 0.2564,
"step": 196500
},
{
"epoch": 38.91,
"learning_rate": 1.5428785158587673e-06,
"loss": 0.2518,
"step": 197000
},
{
"epoch": 39.0,
"eval_accuracy": 0.8392246163266868,
"eval_f1": 0.487758945386064,
"eval_loss": 0.7146723866462708,
"eval_precision": 0.43925377736663584,
"eval_recall": 0.548306389530408,
"eval_runtime": 23.4197,
"eval_samples_per_second": 130.189,
"eval_steps_per_second": 16.311,
"step": 197457
},
{
"epoch": 39.01,
"learning_rate": 1.5403849990025934e-06,
"loss": 0.2478,
"step": 197500
},
{
"epoch": 39.11,
"learning_rate": 1.5378914821464193e-06,
"loss": 0.2541,
"step": 198000
},
{
"epoch": 39.21,
"learning_rate": 1.5353979652902454e-06,
"loss": 0.2546,
"step": 198500
},
{
"epoch": 39.3,
"learning_rate": 1.5329044484340716e-06,
"loss": 0.2498,
"step": 199000
},
{
"epoch": 39.4,
"learning_rate": 1.5304109315778975e-06,
"loss": 0.2499,
"step": 199500
},
{
"epoch": 39.5,
"learning_rate": 1.5279174147217238e-06,
"loss": 0.2615,
"step": 200000
},
{
"epoch": 39.6,
"learning_rate": 1.525428884899262e-06,
"loss": 0.2469,
"step": 200500
},
{
"epoch": 39.7,
"learning_rate": 1.522935368043088e-06,
"loss": 0.247,
"step": 201000
},
{
"epoch": 39.8,
"learning_rate": 1.5204418511869143e-06,
"loss": 0.25,
"step": 201500
},
{
"epoch": 39.9,
"learning_rate": 1.5179483343307402e-06,
"loss": 0.2564,
"step": 202000
},
{
"epoch": 40.0,
"learning_rate": 1.5154548174745661e-06,
"loss": 0.2515,
"step": 202500
},
{
"epoch": 40.0,
"eval_accuracy": 0.8385012821003482,
"eval_f1": 0.4914221993480871,
"eval_loss": 0.7163015604019165,
"eval_precision": 0.4432838130609718,
"eval_recall": 0.5512894534257121,
"eval_runtime": 24.4401,
"eval_samples_per_second": 124.754,
"eval_steps_per_second": 15.63,
"step": 202520
},
{
"epoch": 40.09,
"learning_rate": 1.5129613006183925e-06,
"loss": 0.2662,
"step": 203000
},
{
"epoch": 40.19,
"learning_rate": 1.5104677837622184e-06,
"loss": 0.2548,
"step": 203500
},
{
"epoch": 40.29,
"learning_rate": 1.5079742669060443e-06,
"loss": 0.251,
"step": 204000
},
{
"epoch": 40.39,
"learning_rate": 1.5054807500498706e-06,
"loss": 0.2498,
"step": 204500
},
{
"epoch": 40.49,
"learning_rate": 1.5029872331936965e-06,
"loss": 0.2461,
"step": 205000
},
{
"epoch": 40.59,
"learning_rate": 1.5004937163375225e-06,
"loss": 0.2457,
"step": 205500
},
{
"epoch": 40.69,
"learning_rate": 1.4980001994813486e-06,
"loss": 0.2554,
"step": 206000
},
{
"epoch": 40.79,
"learning_rate": 1.495511669658887e-06,
"loss": 0.2522,
"step": 206500
},
{
"epoch": 40.88,
"learning_rate": 1.493018152802713e-06,
"loss": 0.2436,
"step": 207000
},
{
"epoch": 40.98,
"learning_rate": 1.4905246359465393e-06,
"loss": 0.243,
"step": 207500
},
{
"epoch": 41.0,
"eval_accuracy": 0.8361093038386467,
"eval_f1": 0.4854713868798376,
"eval_loss": 0.7213875651359558,
"eval_precision": 0.43309938872537923,
"eval_recall": 0.5522517321016166,
"eval_runtime": 23.8024,
"eval_samples_per_second": 128.096,
"eval_steps_per_second": 16.049,
"step": 207583
},
{
"epoch": 41.08,
"learning_rate": 1.4880311190903652e-06,
"loss": 0.2445,
"step": 208000
},
{
"epoch": 41.18,
"learning_rate": 1.4855425892679034e-06,
"loss": 0.2505,
"step": 208500
},
{
"epoch": 41.28,
"learning_rate": 1.4830490724117297e-06,
"loss": 0.2508,
"step": 209000
},
{
"epoch": 41.38,
"learning_rate": 1.4805555555555557e-06,
"loss": 0.2499,
"step": 209500
},
{
"epoch": 41.48,
"learning_rate": 1.4780620386993818e-06,
"loss": 0.2459,
"step": 210000
},
{
"epoch": 41.58,
"learning_rate": 1.4755735088769202e-06,
"loss": 0.2502,
"step": 210500
},
{
"epoch": 41.67,
"learning_rate": 1.4730799920207461e-06,
"loss": 0.2509,
"step": 211000
},
{
"epoch": 41.77,
"learning_rate": 1.4705864751645723e-06,
"loss": 0.2568,
"step": 211500
},
{
"epoch": 41.87,
"learning_rate": 1.4680929583083984e-06,
"loss": 0.2475,
"step": 212000
},
{
"epoch": 41.97,
"learning_rate": 1.4656044284859366e-06,
"loss": 0.2504,
"step": 212500
},
{
"epoch": 42.0,
"eval_accuracy": 0.8370967124650771,
"eval_f1": 0.4893995929443691,
"eval_loss": 0.7146164178848267,
"eval_precision": 0.4374620982413584,
"eval_recall": 0.5553310238645112,
"eval_runtime": 24.4183,
"eval_samples_per_second": 124.865,
"eval_steps_per_second": 15.644,
"step": 212646
},
{
"epoch": 42.07,
"learning_rate": 1.463110911629763e-06,
"loss": 0.2393,
"step": 213000
},
{
"epoch": 42.17,
"learning_rate": 1.4606173947735889e-06,
"loss": 0.2512,
"step": 213500
},
{
"epoch": 42.27,
"learning_rate": 1.4581238779174148e-06,
"loss": 0.2452,
"step": 214000
},
{
"epoch": 42.37,
"learning_rate": 1.455630361061241e-06,
"loss": 0.2527,
"step": 214500
},
{
"epoch": 42.46,
"learning_rate": 1.453136844205067e-06,
"loss": 0.2433,
"step": 215000
},
{
"epoch": 42.56,
"learning_rate": 1.450643327348893e-06,
"loss": 0.2451,
"step": 215500
},
{
"epoch": 42.66,
"learning_rate": 1.448149810492719e-06,
"loss": 0.2452,
"step": 216000
},
{
"epoch": 42.76,
"learning_rate": 1.445656293636545e-06,
"loss": 0.2595,
"step": 216500
},
{
"epoch": 42.86,
"learning_rate": 1.4431677638140834e-06,
"loss": 0.2398,
"step": 217000
},
{
"epoch": 42.96,
"learning_rate": 1.4406742469579096e-06,
"loss": 0.2467,
"step": 217500
},
{
"epoch": 43.0,
"eval_accuracy": 0.8372115274216388,
"eval_f1": 0.4870404878875148,
"eval_loss": 0.7194024324417114,
"eval_precision": 0.434947049924357,
"eval_recall": 0.5533102386451116,
"eval_runtime": 24.5076,
"eval_samples_per_second": 124.41,
"eval_steps_per_second": 15.587,
"step": 217709
},
{
"epoch": 43.06,
"learning_rate": 1.4381807301017357e-06,
"loss": 0.2496,
"step": 218000
},
{
"epoch": 43.16,
"learning_rate": 1.4356872132455616e-06,
"loss": 0.246,
"step": 218500
},
{
"epoch": 43.25,
"learning_rate": 1.4331936963893877e-06,
"loss": 0.2455,
"step": 219000
},
{
"epoch": 43.35,
"learning_rate": 1.4307051665669262e-06,
"loss": 0.2452,
"step": 219500
},
{
"epoch": 43.45,
"learning_rate": 1.428211649710752e-06,
"loss": 0.2481,
"step": 220000
},
{
"epoch": 43.55,
"learning_rate": 1.4257181328545782e-06,
"loss": 0.2471,
"step": 220500
},
{
"epoch": 43.65,
"learning_rate": 1.4232246159984043e-06,
"loss": 0.2448,
"step": 221000
},
{
"epoch": 43.75,
"learning_rate": 1.4207360861759426e-06,
"loss": 0.2474,
"step": 221500
},
{
"epoch": 43.85,
"learning_rate": 1.4182425693197687e-06,
"loss": 0.2453,
"step": 222000
},
{
"epoch": 43.95,
"learning_rate": 1.4157490524635948e-06,
"loss": 0.2485,
"step": 222500
},
{
"epoch": 44.0,
"eval_accuracy": 0.8376210341000421,
"eval_f1": 0.4923129484113427,
"eval_loss": 0.7221771478652954,
"eval_precision": 0.44256756756756754,
"eval_recall": 0.554657428791378,
"eval_runtime": 23.7235,
"eval_samples_per_second": 128.523,
"eval_steps_per_second": 16.102,
"step": 222772
},
{
"epoch": 44.05,
"learning_rate": 1.413255535607421e-06,
"loss": 0.2506,
"step": 223000
},
{
"epoch": 44.14,
"learning_rate": 1.4107620187512468e-06,
"loss": 0.2426,
"step": 223500
},
{
"epoch": 44.24,
"learning_rate": 1.4082734889287853e-06,
"loss": 0.2462,
"step": 224000
},
{
"epoch": 44.34,
"learning_rate": 1.4057799720726112e-06,
"loss": 0.2574,
"step": 224500
},
{
"epoch": 44.44,
"learning_rate": 1.4032864552164373e-06,
"loss": 0.25,
"step": 225000
},
{
"epoch": 44.54,
"learning_rate": 1.4007929383602634e-06,
"loss": 0.2494,
"step": 225500
},
{
"epoch": 44.64,
"learning_rate": 1.3983044085378017e-06,
"loss": 0.2491,
"step": 226000
},
{
"epoch": 44.74,
"learning_rate": 1.395810891681628e-06,
"loss": 0.2412,
"step": 226500
},
{
"epoch": 44.84,
"learning_rate": 1.393317374825454e-06,
"loss": 0.2578,
"step": 227000
},
{
"epoch": 44.93,
"learning_rate": 1.39082385796928e-06,
"loss": 0.2407,
"step": 227500
},
{
"epoch": 45.0,
"eval_accuracy": 0.8363848597343947,
"eval_f1": 0.4902906406043276,
"eval_loss": 0.7273271083831787,
"eval_precision": 0.44052458010583634,
"eval_recall": 0.552732871439569,
"eval_runtime": 24.6123,
"eval_samples_per_second": 123.881,
"eval_steps_per_second": 15.521,
"step": 227835
},
{
"epoch": 45.03,
"learning_rate": 1.388330341113106e-06,
"loss": 0.2457,
"step": 228000
},
{
"epoch": 45.13,
"learning_rate": 1.385836824256932e-06,
"loss": 0.246,
"step": 228500
},
{
"epoch": 45.23,
"learning_rate": 1.3833433074007582e-06,
"loss": 0.2406,
"step": 229000
},
{
"epoch": 45.33,
"learning_rate": 1.3808497905445841e-06,
"loss": 0.2444,
"step": 229500
},
{
"epoch": 45.43,
"learning_rate": 1.3783612607221226e-06,
"loss": 0.241,
"step": 230000
},
{
"epoch": 45.53,
"learning_rate": 1.3758677438659487e-06,
"loss": 0.2425,
"step": 230500
},
{
"epoch": 45.63,
"learning_rate": 1.3733742270097746e-06,
"loss": 0.2333,
"step": 231000
},
{
"epoch": 45.72,
"learning_rate": 1.3708807101536007e-06,
"loss": 0.2361,
"step": 231500
},
{
"epoch": 45.82,
"learning_rate": 1.3683871932974269e-06,
"loss": 0.2418,
"step": 232000
},
{
"epoch": 45.92,
"learning_rate": 1.3658986634749653e-06,
"loss": 0.2508,
"step": 232500
},
{
"epoch": 46.0,
"eval_accuracy": 0.8403995560488347,
"eval_f1": 0.49452783665700567,
"eval_loss": 0.7349366545677185,
"eval_precision": 0.4491944990176817,
"eval_recall": 0.5500384911470362,
"eval_runtime": 24.4005,
"eval_samples_per_second": 124.956,
"eval_steps_per_second": 15.655,
"step": 232898
},
{
"epoch": 46.02,
"learning_rate": 1.3634051466187912e-06,
"loss": 0.2456,
"step": 233000
},
{
"epoch": 46.12,
"learning_rate": 1.3609116297626173e-06,
"loss": 0.2381,
"step": 233500
},
{
"epoch": 46.22,
"learning_rate": 1.3584181129064432e-06,
"loss": 0.2434,
"step": 234000
},
{
"epoch": 46.32,
"learning_rate": 1.3559245960502696e-06,
"loss": 0.2546,
"step": 234500
},
{
"epoch": 46.42,
"learning_rate": 1.3534360662278078e-06,
"loss": 0.2551,
"step": 235000
},
{
"epoch": 46.51,
"learning_rate": 1.3509425493716337e-06,
"loss": 0.2374,
"step": 235500
},
{
"epoch": 46.61,
"learning_rate": 1.3484490325154599e-06,
"loss": 0.2452,
"step": 236000
},
{
"epoch": 46.71,
"learning_rate": 1.345955515659286e-06,
"loss": 0.2417,
"step": 236500
},
{
"epoch": 46.81,
"learning_rate": 1.3434669858368244e-06,
"loss": 0.2434,
"step": 237000
},
{
"epoch": 46.91,
"learning_rate": 1.3409734689806503e-06,
"loss": 0.2407,
"step": 237500
},
{
"epoch": 47.0,
"eval_accuracy": 0.8388304183091584,
"eval_f1": 0.4877632813166198,
"eval_loss": 0.7191833257675171,
"eval_precision": 0.4379114990047466,
"eval_recall": 0.550423402617398,
"eval_runtime": 23.4257,
"eval_samples_per_second": 130.156,
"eval_steps_per_second": 16.307,
"step": 237961
},
{
"epoch": 47.01,
"learning_rate": 1.3384799521244765e-06,
"loss": 0.2404,
"step": 238000
},
{
"epoch": 47.11,
"learning_rate": 1.3359864352683024e-06,
"loss": 0.2394,
"step": 238500
},
{
"epoch": 47.21,
"learning_rate": 1.3334929184121287e-06,
"loss": 0.2508,
"step": 239000
},
{
"epoch": 47.3,
"learning_rate": 1.331004388589667e-06,
"loss": 0.2454,
"step": 239500
},
{
"epoch": 47.4,
"learning_rate": 1.328510871733493e-06,
"loss": 0.2387,
"step": 240000
},
{
"epoch": 47.5,
"learning_rate": 1.3260173548773192e-06,
"loss": 0.2426,
"step": 240500
},
{
"epoch": 47.6,
"learning_rate": 1.323523838021145e-06,
"loss": 0.2313,
"step": 241000
},
{
"epoch": 47.7,
"learning_rate": 1.321030321164971e-06,
"loss": 0.2434,
"step": 241500
},
{
"epoch": 47.8,
"learning_rate": 1.3185368043087973e-06,
"loss": 0.2422,
"step": 242000
},
{
"epoch": 47.9,
"learning_rate": 1.3160432874526233e-06,
"loss": 0.2391,
"step": 242500
},
{
"epoch": 48.0,
"learning_rate": 1.3135497705964492e-06,
"loss": 0.2397,
"step": 243000
},
{
"epoch": 48.0,
"eval_accuracy": 0.8379080714914463,
"eval_f1": 0.48708299860471016,
"eval_loss": 0.7313714623451233,
"eval_precision": 0.4344219021042311,
"eval_recall": 0.5542725173210161,
"eval_runtime": 23.7407,
"eval_samples_per_second": 128.429,
"eval_steps_per_second": 16.091,
"step": 243024
},
{
"epoch": 48.09,
"learning_rate": 1.3110612407739878e-06,
"loss": 0.2454,
"step": 243500
},
{
"epoch": 48.19,
"learning_rate": 1.3085677239178137e-06,
"loss": 0.2403,
"step": 244000
},
{
"epoch": 48.29,
"learning_rate": 1.3060742070616397e-06,
"loss": 0.2336,
"step": 244500
},
{
"epoch": 48.39,
"learning_rate": 1.3035856772391783e-06,
"loss": 0.2453,
"step": 245000
},
{
"epoch": 48.49,
"learning_rate": 1.3010921603830042e-06,
"loss": 0.2378,
"step": 245500
},
{
"epoch": 48.59,
"learning_rate": 1.2985986435268301e-06,
"loss": 0.2423,
"step": 246000
},
{
"epoch": 48.69,
"learning_rate": 1.2961051266706565e-06,
"loss": 0.2421,
"step": 246500
},
{
"epoch": 48.79,
"learning_rate": 1.2936116098144824e-06,
"loss": 0.2428,
"step": 247000
},
{
"epoch": 48.88,
"learning_rate": 1.2911180929583083e-06,
"loss": 0.2423,
"step": 247500
},
{
"epoch": 48.98,
"learning_rate": 1.288629563135847e-06,
"loss": 0.237,
"step": 248000
},
{
"epoch": 49.0,
"eval_accuracy": 0.8377014045696353,
"eval_f1": 0.4901437548420418,
"eval_loss": 0.7322969436645508,
"eval_precision": 0.44338888023672324,
"eval_recall": 0.5479214780600462,
"eval_runtime": 23.821,
"eval_samples_per_second": 127.996,
"eval_steps_per_second": 16.036,
"step": 248087
},
{
"epoch": 49.08,
"learning_rate": 1.2861360462796729e-06,
"loss": 0.2419,
"step": 248500
},
{
"epoch": 49.18,
"learning_rate": 1.2836425294234988e-06,
"loss": 0.2345,
"step": 249000
},
{
"epoch": 49.28,
"learning_rate": 1.2811490125673251e-06,
"loss": 0.2374,
"step": 249500
},
{
"epoch": 49.38,
"learning_rate": 1.278655495711151e-06,
"loss": 0.241,
"step": 250000
},
{
"epoch": 49.48,
"learning_rate": 1.2761669658886895e-06,
"loss": 0.2362,
"step": 250500
},
{
"epoch": 49.58,
"learning_rate": 1.2736734490325156e-06,
"loss": 0.2375,
"step": 251000
},
{
"epoch": 49.67,
"learning_rate": 1.2711799321763415e-06,
"loss": 0.2377,
"step": 251500
},
{
"epoch": 49.77,
"learning_rate": 1.2686864153201678e-06,
"loss": 0.2387,
"step": 252000
},
{
"epoch": 49.87,
"learning_rate": 1.266197885497706e-06,
"loss": 0.2382,
"step": 252500
},
{
"epoch": 49.97,
"learning_rate": 1.263704368641532e-06,
"loss": 0.2419,
"step": 253000
},
{
"epoch": 50.0,
"eval_accuracy": 0.8384898006046921,
"eval_f1": 0.4932128634436689,
"eval_loss": 0.7357666492462158,
"eval_precision": 0.44433299899699097,
"eval_recall": 0.5541762894534257,
"eval_runtime": 23.5559,
"eval_samples_per_second": 129.437,
"eval_steps_per_second": 16.217,
"step": 253150
},
{
"epoch": 50.07,
"learning_rate": 1.2612108517853583e-06,
"loss": 0.2507,
"step": 253500
},
{
"epoch": 50.17,
"learning_rate": 1.2587173349291842e-06,
"loss": 0.2388,
"step": 254000
},
{
"epoch": 50.27,
"learning_rate": 1.2562238180730102e-06,
"loss": 0.2325,
"step": 254500
},
{
"epoch": 50.37,
"learning_rate": 1.2537352882505488e-06,
"loss": 0.2406,
"step": 255000
},
{
"epoch": 50.46,
"learning_rate": 1.2512417713943747e-06,
"loss": 0.2246,
"step": 255500
},
{
"epoch": 50.56,
"learning_rate": 1.2487482545382008e-06,
"loss": 0.2405,
"step": 256000
},
{
"epoch": 50.66,
"learning_rate": 1.2462547376820268e-06,
"loss": 0.2297,
"step": 256500
},
{
"epoch": 50.76,
"learning_rate": 1.2437612208258529e-06,
"loss": 0.2303,
"step": 257000
},
{
"epoch": 50.86,
"learning_rate": 1.2412726910033913e-06,
"loss": 0.2514,
"step": 257500
},
{
"epoch": 50.96,
"learning_rate": 1.2387791741472172e-06,
"loss": 0.248,
"step": 258000
},
{
"epoch": 51.0,
"eval_accuracy": 0.8372612805694822,
"eval_f1": 0.48591877355693897,
"eval_loss": 0.7367214560508728,
"eval_precision": 0.43393086755918614,
"eval_recall": 0.5520592763664357,
"eval_runtime": 23.3101,
"eval_samples_per_second": 130.802,
"eval_steps_per_second": 16.388,
"step": 258213
},
{
"epoch": 51.06,
"learning_rate": 1.2362856572910434e-06,
"loss": 0.2515,
"step": 258500
},
{
"epoch": 51.16,
"learning_rate": 1.2337921404348695e-06,
"loss": 0.2386,
"step": 259000
},
{
"epoch": 51.25,
"learning_rate": 1.2312986235786954e-06,
"loss": 0.2333,
"step": 259500
},
{
"epoch": 51.35,
"learning_rate": 1.2288100937562338e-06,
"loss": 0.2427,
"step": 260000
},
{
"epoch": 51.45,
"learning_rate": 1.22631657690006e-06,
"loss": 0.238,
"step": 260500
},
{
"epoch": 51.55,
"learning_rate": 1.2238230600438859e-06,
"loss": 0.2393,
"step": 261000
},
{
"epoch": 51.65,
"learning_rate": 1.221329543187712e-06,
"loss": 0.2428,
"step": 261500
},
{
"epoch": 51.75,
"learning_rate": 1.2188360263315381e-06,
"loss": 0.2339,
"step": 262000
},
{
"epoch": 51.85,
"learning_rate": 1.2163474965090764e-06,
"loss": 0.2446,
"step": 262500
},
{
"epoch": 51.95,
"learning_rate": 1.2138539796529025e-06,
"loss": 0.2447,
"step": 263000
},
{
"epoch": 52.0,
"eval_accuracy": 0.8375712809521987,
"eval_f1": 0.4877675840978593,
"eval_loss": 0.7343346476554871,
"eval_precision": 0.4365875912408759,
"eval_recall": 0.5525404157043879,
"eval_runtime": 23.403,
"eval_samples_per_second": 130.282,
"eval_steps_per_second": 16.323,
"step": 263276
},
{
"epoch": 52.04,
"learning_rate": 1.2113604627967286e-06,
"loss": 0.2364,
"step": 263500
},
{
"epoch": 52.14,
"learning_rate": 1.2088669459405547e-06,
"loss": 0.2466,
"step": 264000
},
{
"epoch": 52.24,
"learning_rate": 1.2063834031518055e-06,
"loss": 0.2397,
"step": 264500
},
{
"epoch": 52.34,
"learning_rate": 1.2038898862956314e-06,
"loss": 0.2418,
"step": 265000
},
{
"epoch": 52.44,
"learning_rate": 1.2013963694394575e-06,
"loss": 0.2417,
"step": 265500
},
{
"epoch": 52.54,
"learning_rate": 1.1989028525832834e-06,
"loss": 0.2397,
"step": 266000
},
{
"epoch": 52.64,
"learning_rate": 1.1964093357271096e-06,
"loss": 0.2353,
"step": 266500
},
{
"epoch": 52.74,
"learning_rate": 1.1939158188709357e-06,
"loss": 0.2339,
"step": 267000
},
{
"epoch": 52.83,
"learning_rate": 1.1914223020147616e-06,
"loss": 0.2397,
"step": 267500
},
{
"epoch": 52.93,
"learning_rate": 1.1889287851585877e-06,
"loss": 0.2365,
"step": 268000
},
{
"epoch": 53.0,
"eval_accuracy": 0.8388954801178767,
"eval_f1": 0.4917008898015058,
"eval_loss": 0.7243747711181641,
"eval_precision": 0.44262168823166975,
"eval_recall": 0.5530215550423403,
"eval_runtime": 23.8942,
"eval_samples_per_second": 127.604,
"eval_steps_per_second": 15.987,
"step": 268339
},
{
"epoch": 53.03,
"learning_rate": 1.1864352683024139e-06,
"loss": 0.2441,
"step": 268500
},
{
"epoch": 53.13,
"learning_rate": 1.183946738479952e-06,
"loss": 0.2346,
"step": 269000
},
{
"epoch": 53.23,
"learning_rate": 1.1814532216237782e-06,
"loss": 0.2338,
"step": 269500
},
{
"epoch": 53.33,
"learning_rate": 1.1789597047676043e-06,
"loss": 0.2317,
"step": 270000
},
{
"epoch": 53.43,
"learning_rate": 1.1764661879114305e-06,
"loss": 0.2358,
"step": 270500
},
{
"epoch": 53.53,
"learning_rate": 1.1739726710552564e-06,
"loss": 0.2344,
"step": 271000
},
{
"epoch": 53.62,
"learning_rate": 1.1714791541990825e-06,
"loss": 0.2296,
"step": 271500
},
{
"epoch": 53.72,
"learning_rate": 1.168990624376621e-06,
"loss": 0.2459,
"step": 272000
},
{
"epoch": 53.82,
"learning_rate": 1.1664971075204469e-06,
"loss": 0.2351,
"step": 272500
},
{
"epoch": 53.92,
"learning_rate": 1.164003590664273e-06,
"loss": 0.239,
"step": 273000
},
{
"epoch": 54.0,
"eval_accuracy": 0.8386275785525661,
"eval_f1": 0.4864291772688719,
"eval_loss": 0.7417691946029663,
"eval_precision": 0.4348650288140734,
"eval_recall": 0.5518668206312548,
"eval_runtime": 24.0372,
"eval_samples_per_second": 126.845,
"eval_steps_per_second": 15.892,
"step": 273402
},
{
"epoch": 54.02,
"learning_rate": 1.1615100738080991e-06,
"loss": 0.2333,
"step": 273500
},
{
"epoch": 54.12,
"learning_rate": 1.159016556951925e-06,
"loss": 0.2399,
"step": 274000
},
{
"epoch": 54.22,
"learning_rate": 1.1565280271294635e-06,
"loss": 0.2508,
"step": 274500
},
{
"epoch": 54.32,
"learning_rate": 1.1540345102732896e-06,
"loss": 0.2289,
"step": 275000
},
{
"epoch": 54.41,
"learning_rate": 1.1515409934171155e-06,
"loss": 0.2352,
"step": 275500
},
{
"epoch": 54.51,
"learning_rate": 1.1490474765609416e-06,
"loss": 0.2317,
"step": 276000
},
{
"epoch": 54.61,
"learning_rate": 1.1465539597047677e-06,
"loss": 0.2275,
"step": 276500
},
{
"epoch": 54.71,
"learning_rate": 1.144065429882306e-06,
"loss": 0.2359,
"step": 277000
},
{
"epoch": 54.81,
"learning_rate": 1.1415719130261321e-06,
"loss": 0.2288,
"step": 277500
},
{
"epoch": 54.91,
"learning_rate": 1.1390783961699582e-06,
"loss": 0.2273,
"step": 278000
},
{
"epoch": 55.0,
"eval_accuracy": 0.8372651077347009,
"eval_f1": 0.4906031936298643,
"eval_loss": 0.7420364618301392,
"eval_precision": 0.44189095396005246,
"eval_recall": 0.5513856812933026,
"eval_runtime": 23.2371,
"eval_samples_per_second": 131.213,
"eval_steps_per_second": 16.439,
"step": 278465
},
{
"epoch": 55.01,
"learning_rate": 1.1365848793137844e-06,
"loss": 0.2417,
"step": 278500
},
{
"epoch": 55.11,
"learning_rate": 1.1340913624576103e-06,
"loss": 0.2337,
"step": 279000
},
{
"epoch": 55.2,
"learning_rate": 1.1315978456014364e-06,
"loss": 0.2307,
"step": 279500
},
{
"epoch": 55.3,
"learning_rate": 1.1291143028126872e-06,
"loss": 0.2269,
"step": 280000
},
{
"epoch": 55.4,
"learning_rate": 1.126620785956513e-06,
"loss": 0.2289,
"step": 280500
},
{
"epoch": 55.5,
"learning_rate": 1.1241272691003392e-06,
"loss": 0.2377,
"step": 281000
},
{
"epoch": 55.6,
"learning_rate": 1.1216337522441653e-06,
"loss": 0.2288,
"step": 281500
},
{
"epoch": 55.7,
"learning_rate": 1.1191402353879912e-06,
"loss": 0.2312,
"step": 282000
},
{
"epoch": 55.8,
"learning_rate": 1.1166467185318174e-06,
"loss": 0.2358,
"step": 282500
},
{
"epoch": 55.9,
"learning_rate": 1.1141532016756435e-06,
"loss": 0.2381,
"step": 283000
},
{
"epoch": 55.99,
"learning_rate": 1.1116596848194696e-06,
"loss": 0.2383,
"step": 283500
},
{
"epoch": 56.0,
"eval_accuracy": 0.837800910865322,
"eval_f1": 0.48603658794203386,
"eval_loss": 0.7458629012107849,
"eval_precision": 0.433230398433381,
"eval_recall": 0.5535026943802925,
"eval_runtime": 23.9922,
"eval_samples_per_second": 127.083,
"eval_steps_per_second": 15.922,
"step": 283528
},
{
"epoch": 56.09,
"learning_rate": 1.1091661679632955e-06,
"loss": 0.2293,
"step": 284000
},
{
"epoch": 56.19,
"learning_rate": 1.1066726511071216e-06,
"loss": 0.237,
"step": 284500
},
{
"epoch": 56.29,
"learning_rate": 1.10418412128466e-06,
"loss": 0.2326,
"step": 285000
},
{
"epoch": 56.39,
"learning_rate": 1.101690604428486e-06,
"loss": 0.232,
"step": 285500
},
{
"epoch": 56.49,
"learning_rate": 1.0992020746060244e-06,
"loss": 0.233,
"step": 286000
},
{
"epoch": 56.59,
"learning_rate": 1.0967085577498504e-06,
"loss": 0.2384,
"step": 286500
},
{
"epoch": 56.69,
"learning_rate": 1.0942150408936765e-06,
"loss": 0.2327,
"step": 287000
},
{
"epoch": 56.78,
"learning_rate": 1.0917215240375026e-06,
"loss": 0.2289,
"step": 287500
},
{
"epoch": 56.88,
"learning_rate": 1.0892280071813287e-06,
"loss": 0.2313,
"step": 288000
},
{
"epoch": 56.98,
"learning_rate": 1.0867344903251546e-06,
"loss": 0.2301,
"step": 288500
},
{
"epoch": 57.0,
"eval_accuracy": 0.8362585632821768,
"eval_f1": 0.4886406904429496,
"eval_loss": 0.745151937007904,
"eval_precision": 0.4360135900339751,
"eval_recall": 0.555715935334873,
"eval_runtime": 23.3249,
"eval_samples_per_second": 130.719,
"eval_steps_per_second": 16.377,
"step": 288591
},
{
"epoch": 57.08,
"learning_rate": 1.0842409734689808e-06,
"loss": 0.2353,
"step": 289000
},
{
"epoch": 57.18,
"learning_rate": 1.0817474566128069e-06,
"loss": 0.2281,
"step": 289500
},
{
"epoch": 57.28,
"learning_rate": 1.0792539397566328e-06,
"loss": 0.2427,
"step": 290000
},
{
"epoch": 57.38,
"learning_rate": 1.076760422900459e-06,
"loss": 0.2285,
"step": 290500
},
{
"epoch": 57.48,
"learning_rate": 1.0742718930779974e-06,
"loss": 0.2288,
"step": 291000
},
{
"epoch": 57.57,
"learning_rate": 1.0717783762218235e-06,
"loss": 0.2317,
"step": 291500
},
{
"epoch": 57.67,
"learning_rate": 1.0692848593656494e-06,
"loss": 0.2247,
"step": 292000
},
{
"epoch": 57.77,
"learning_rate": 1.0667913425094755e-06,
"loss": 0.2336,
"step": 292500
},
{
"epoch": 57.87,
"learning_rate": 1.0642978256533014e-06,
"loss": 0.2302,
"step": 293000
},
{
"epoch": 57.97,
"learning_rate": 1.0618043087971276e-06,
"loss": 0.2382,
"step": 293500
},
{
"epoch": 58.0,
"eval_accuracy": 0.8389949864135635,
"eval_f1": 0.49114762450643235,
"eval_loss": 0.7444238662719727,
"eval_precision": 0.4394802826532938,
"eval_recall": 0.5565819861431871,
"eval_runtime": 23.2287,
"eval_samples_per_second": 131.26,
"eval_steps_per_second": 16.445,
"step": 293654
},
{
"epoch": 58.07,
"learning_rate": 1.0593107919409535e-06,
"loss": 0.2279,
"step": 294000
},
{
"epoch": 58.17,
"learning_rate": 1.0568172750847796e-06,
"loss": 0.2375,
"step": 294500
},
{
"epoch": 58.27,
"learning_rate": 1.0543337322960304e-06,
"loss": 0.2264,
"step": 295000
},
{
"epoch": 58.36,
"learning_rate": 1.0518402154398565e-06,
"loss": 0.2357,
"step": 295500
},
{
"epoch": 58.46,
"learning_rate": 1.0493466985836826e-06,
"loss": 0.2345,
"step": 296000
},
{
"epoch": 58.56,
"learning_rate": 1.0468531817275085e-06,
"loss": 0.2256,
"step": 296500
},
{
"epoch": 58.66,
"learning_rate": 1.0443596648713347e-06,
"loss": 0.234,
"step": 297000
},
{
"epoch": 58.76,
"learning_rate": 1.041871135048873e-06,
"loss": 0.2261,
"step": 297500
},
{
"epoch": 58.86,
"learning_rate": 1.039377618192699e-06,
"loss": 0.2392,
"step": 298000
},
{
"epoch": 58.96,
"learning_rate": 1.0368841013365251e-06,
"loss": 0.2324,
"step": 298500
},
{
"epoch": 59.0,
"eval_accuracy": 0.8388265911439396,
"eval_f1": 0.4904518329070759,
"eval_loss": 0.7444677948951721,
"eval_precision": 0.44023569023569026,
"eval_recall": 0.553598922247883,
"eval_runtime": 24.0702,
"eval_samples_per_second": 126.671,
"eval_steps_per_second": 15.87,
"step": 298717
},
{
"epoch": 59.06,
"learning_rate": 1.0343905844803513e-06,
"loss": 0.241,
"step": 299000
},
{
"epoch": 59.15,
"learning_rate": 1.0318970676241772e-06,
"loss": 0.2239,
"step": 299500
},
{
"epoch": 59.25,
"learning_rate": 1.0294035507680033e-06,
"loss": 0.227,
"step": 300000
},
{
"epoch": 59.35,
"learning_rate": 1.0269150209455417e-06,
"loss": 0.2331,
"step": 300500
},
{
"epoch": 59.45,
"learning_rate": 1.0244215040893679e-06,
"loss": 0.2204,
"step": 301000
},
{
"epoch": 59.55,
"learning_rate": 1.0219279872331938e-06,
"loss": 0.2362,
"step": 301500
},
{
"epoch": 59.65,
"learning_rate": 1.01943447037702e-06,
"loss": 0.2263,
"step": 302000
},
{
"epoch": 59.75,
"learning_rate": 1.0169459405545583e-06,
"loss": 0.2359,
"step": 302500
},
{
"epoch": 59.85,
"learning_rate": 1.0144524236983843e-06,
"loss": 0.2358,
"step": 303000
},
{
"epoch": 59.94,
"learning_rate": 1.0119589068422104e-06,
"loss": 0.2377,
"step": 303500
},
{
"epoch": 60.0,
"eval_accuracy": 0.8369665888476405,
"eval_f1": 0.48964250629991884,
"eval_loss": 0.752280056476593,
"eval_precision": 0.4402119652868443,
"eval_recall": 0.5515781370284835,
"eval_runtime": 23.2481,
"eval_samples_per_second": 131.15,
"eval_steps_per_second": 16.431,
"step": 303780
},
{
"epoch": 60.04,
"learning_rate": 1.0094653899860365e-06,
"loss": 0.2355,
"step": 304000
},
{
"epoch": 60.14,
"learning_rate": 1.0069718731298624e-06,
"loss": 0.2269,
"step": 304500
},
{
"epoch": 60.24,
"learning_rate": 1.0044833433074009e-06,
"loss": 0.2294,
"step": 305000
},
{
"epoch": 60.34,
"learning_rate": 1.001989826451227e-06,
"loss": 0.2317,
"step": 305500
},
{
"epoch": 60.44,
"learning_rate": 9.99496309595053e-07,
"loss": 0.2251,
"step": 306000
},
{
"epoch": 60.54,
"learning_rate": 9.97002792738879e-07,
"loss": 0.2348,
"step": 306500
},
{
"epoch": 60.64,
"learning_rate": 9.945142629164175e-07,
"loss": 0.2337,
"step": 307000
},
{
"epoch": 60.73,
"learning_rate": 9.920207460602436e-07,
"loss": 0.2243,
"step": 307500
},
{
"epoch": 60.83,
"learning_rate": 9.895272292040695e-07,
"loss": 0.2304,
"step": 308000
},
{
"epoch": 60.93,
"learning_rate": 9.870337123478956e-07,
"loss": 0.22,
"step": 308500
},
{
"epoch": 61.0,
"eval_accuracy": 0.8390600482222818,
"eval_f1": 0.49161582113751756,
"eval_loss": 0.7522650957107544,
"eval_precision": 0.44162514373323114,
"eval_recall": 0.5543687451886066,
"eval_runtime": 24.048,
"eval_samples_per_second": 126.788,
"eval_steps_per_second": 15.885,
"step": 308843
},
{
"epoch": 61.03,
"learning_rate": 9.84545182525434e-07,
"loss": 0.2371,
"step": 309000
},
{
"epoch": 61.13,
"learning_rate": 9.8205166566926e-07,
"loss": 0.2343,
"step": 309500
},
{
"epoch": 61.23,
"learning_rate": 9.795581488130861e-07,
"loss": 0.2277,
"step": 310000
},
{
"epoch": 61.33,
"learning_rate": 9.770646319569122e-07,
"loss": 0.222,
"step": 310500
},
{
"epoch": 61.43,
"learning_rate": 9.745711151007382e-07,
"loss": 0.2267,
"step": 311000
},
{
"epoch": 61.52,
"learning_rate": 9.720825852782766e-07,
"loss": 0.2287,
"step": 311500
},
{
"epoch": 61.62,
"learning_rate": 9.695890684221027e-07,
"loss": 0.2382,
"step": 312000
},
{
"epoch": 61.72,
"learning_rate": 9.670955515659286e-07,
"loss": 0.2295,
"step": 312500
},
{
"epoch": 61.82,
"learning_rate": 9.646020347097548e-07,
"loss": 0.2211,
"step": 313000
},
{
"epoch": 61.92,
"learning_rate": 9.621085178535809e-07,
"loss": 0.2241,
"step": 313500
},
{
"epoch": 62.0,
"eval_accuracy": 0.8374909104826055,
"eval_f1": 0.48761904761904756,
"eval_loss": 0.7565447688102722,
"eval_precision": 0.4352754477442757,
"eval_recall": 0.5542725173210161,
"eval_runtime": 23.4085,
"eval_samples_per_second": 130.252,
"eval_steps_per_second": 16.319,
"step": 313906
},
{
"epoch": 62.02,
"learning_rate": 9.596150009974068e-07,
"loss": 0.2265,
"step": 314000
},
{
"epoch": 62.12,
"learning_rate": 9.57121484141233e-07,
"loss": 0.2231,
"step": 314500
},
{
"epoch": 62.22,
"learning_rate": 9.546279672850588e-07,
"loss": 0.2279,
"step": 315000
},
{
"epoch": 62.31,
"learning_rate": 9.521394374625974e-07,
"loss": 0.2293,
"step": 315500
},
{
"epoch": 62.41,
"learning_rate": 9.496459206064233e-07,
"loss": 0.2265,
"step": 316000
},
{
"epoch": 62.51,
"learning_rate": 9.471573907839618e-07,
"loss": 0.2177,
"step": 316500
},
{
"epoch": 62.61,
"learning_rate": 9.446638739277879e-07,
"loss": 0.2271,
"step": 317000
},
{
"epoch": 62.71,
"learning_rate": 9.421703570716139e-07,
"loss": 0.2298,
"step": 317500
},
{
"epoch": 62.81,
"learning_rate": 9.396768402154399e-07,
"loss": 0.2224,
"step": 318000
},
{
"epoch": 62.91,
"learning_rate": 9.371883103929783e-07,
"loss": 0.2294,
"step": 318500
},
{
"epoch": 63.0,
"eval_accuracy": 0.8375751081174174,
"eval_f1": 0.49037727850784235,
"eval_loss": 0.7564020156860352,
"eval_precision": 0.43824821942718595,
"eval_recall": 0.5565819861431871,
"eval_runtime": 23.5821,
"eval_samples_per_second": 129.293,
"eval_steps_per_second": 16.199,
"step": 318969
},
{
"epoch": 63.01,
"learning_rate": 9.346947935368044e-07,
"loss": 0.2259,
"step": 319000
},
{
"epoch": 63.1,
"learning_rate": 9.322012766806304e-07,
"loss": 0.2192,
"step": 319500
},
{
"epoch": 63.2,
"learning_rate": 9.297127468581689e-07,
"loss": 0.2248,
"step": 320000
},
{
"epoch": 63.3,
"learning_rate": 9.272192300019948e-07,
"loss": 0.2301,
"step": 320500
},
{
"epoch": 63.4,
"learning_rate": 9.24725713145821e-07,
"loss": 0.2351,
"step": 321000
},
{
"epoch": 63.5,
"learning_rate": 9.22232196289647e-07,
"loss": 0.2335,
"step": 321500
},
{
"epoch": 63.6,
"learning_rate": 9.197386794334731e-07,
"loss": 0.2288,
"step": 322000
},
{
"epoch": 63.7,
"learning_rate": 9.17245162577299e-07,
"loss": 0.2216,
"step": 322500
},
{
"epoch": 63.8,
"learning_rate": 9.147516457211251e-07,
"loss": 0.225,
"step": 323000
},
{
"epoch": 63.89,
"learning_rate": 9.122581288649513e-07,
"loss": 0.2375,
"step": 323500
},
{
"epoch": 63.99,
"learning_rate": 9.097646120087772e-07,
"loss": 0.2271,
"step": 324000
},
{
"epoch": 64.0,
"eval_accuracy": 0.8373033793868881,
"eval_f1": 0.49073640274287667,
"eval_loss": 0.7552084922790527,
"eval_precision": 0.44020783984106365,
"eval_recall": 0.5543687451886066,
"eval_runtime": 24.2365,
"eval_samples_per_second": 125.802,
"eval_steps_per_second": 15.761,
"step": 324032
},
{
"epoch": 64.09,
"learning_rate": 9.072710951526033e-07,
"loss": 0.2197,
"step": 324500
},
{
"epoch": 64.19,
"learning_rate": 9.047825653301418e-07,
"loss": 0.2251,
"step": 325000
},
{
"epoch": 64.29,
"learning_rate": 9.022890484739677e-07,
"loss": 0.2232,
"step": 325500
},
{
"epoch": 64.39,
"learning_rate": 8.997955316177938e-07,
"loss": 0.2338,
"step": 326000
},
{
"epoch": 64.49,
"learning_rate": 8.973020147616199e-07,
"loss": 0.2304,
"step": 326500
},
{
"epoch": 64.59,
"learning_rate": 8.948134849391583e-07,
"loss": 0.2264,
"step": 327000
},
{
"epoch": 64.68,
"learning_rate": 8.923199680829843e-07,
"loss": 0.2274,
"step": 327500
},
{
"epoch": 64.78,
"learning_rate": 8.898264512268104e-07,
"loss": 0.232,
"step": 328000
},
{
"epoch": 64.88,
"learning_rate": 8.873329343706365e-07,
"loss": 0.2282,
"step": 328500
},
{
"epoch": 64.98,
"learning_rate": 8.848394175144624e-07,
"loss": 0.2273,
"step": 329000
},
{
"epoch": 65.0,
"eval_accuracy": 0.8390409123961882,
"eval_f1": 0.49424991417782355,
"eval_loss": 0.7529916763305664,
"eval_precision": 0.44601920693928127,
"eval_recall": 0.5541762894534257,
"eval_runtime": 23.3862,
"eval_samples_per_second": 130.376,
"eval_steps_per_second": 16.334,
"step": 329095
},
{
"epoch": 65.08,
"learning_rate": 8.823508876920009e-07,
"loss": 0.2284,
"step": 329500
},
{
"epoch": 65.18,
"learning_rate": 8.79857370835827e-07,
"loss": 0.2289,
"step": 330000
},
{
"epoch": 65.28,
"learning_rate": 8.773638539796529e-07,
"loss": 0.2342,
"step": 330500
},
{
"epoch": 65.38,
"learning_rate": 8.748753241571914e-07,
"loss": 0.2251,
"step": 331000
},
{
"epoch": 65.48,
"learning_rate": 8.723818073010175e-07,
"loss": 0.2293,
"step": 331500
},
{
"epoch": 65.57,
"learning_rate": 8.698882904448434e-07,
"loss": 0.2346,
"step": 332000
},
{
"epoch": 65.67,
"learning_rate": 8.673947735886695e-07,
"loss": 0.2329,
"step": 332500
},
{
"epoch": 65.77,
"learning_rate": 8.649012567324956e-07,
"loss": 0.2259,
"step": 333000
},
{
"epoch": 65.87,
"learning_rate": 8.624077398763217e-07,
"loss": 0.2227,
"step": 333500
},
{
"epoch": 65.97,
"learning_rate": 8.599142230201477e-07,
"loss": 0.2217,
"step": 334000
},
{
"epoch": 66.0,
"eval_accuracy": 0.8372995522216694,
"eval_f1": 0.4910174542358451,
"eval_loss": 0.7608162760734558,
"eval_precision": 0.44029622843182165,
"eval_recall": 0.5549461123941494,
"eval_runtime": 24.1639,
"eval_samples_per_second": 126.18,
"eval_steps_per_second": 15.809,
"step": 334158
},
{
"epoch": 66.07,
"learning_rate": 8.574207061639737e-07,
"loss": 0.2267,
"step": 334500
},
{
"epoch": 66.17,
"learning_rate": 8.549271893077998e-07,
"loss": 0.2262,
"step": 335000
},
{
"epoch": 66.27,
"learning_rate": 8.524386594853382e-07,
"loss": 0.2277,
"step": 335500
},
{
"epoch": 66.36,
"learning_rate": 8.499451426291643e-07,
"loss": 0.2242,
"step": 336000
},
{
"epoch": 66.46,
"learning_rate": 8.474516257729903e-07,
"loss": 0.2329,
"step": 336500
},
{
"epoch": 66.56,
"learning_rate": 8.449581089168163e-07,
"loss": 0.227,
"step": 337000
},
{
"epoch": 66.66,
"learning_rate": 8.424645920606423e-07,
"loss": 0.2201,
"step": 337500
},
{
"epoch": 66.76,
"learning_rate": 8.399760622381809e-07,
"loss": 0.2354,
"step": 338000
},
{
"epoch": 66.86,
"learning_rate": 8.374825453820068e-07,
"loss": 0.227,
"step": 338500
},
{
"epoch": 66.96,
"learning_rate": 8.349890285258329e-07,
"loss": 0.2236,
"step": 339000
},
{
"epoch": 67.0,
"eval_accuracy": 0.8383099238394122,
"eval_f1": 0.4886561954624782,
"eval_loss": 0.7648459076881409,
"eval_precision": 0.4381344935501107,
"eval_recall": 0.552347959969207,
"eval_runtime": 24.0499,
"eval_samples_per_second": 126.778,
"eval_steps_per_second": 15.884,
"step": 339221
},
{
"epoch": 67.06,
"learning_rate": 8.324955116696589e-07,
"loss": 0.2249,
"step": 339500
},
{
"epoch": 67.15,
"learning_rate": 8.300019948134851e-07,
"loss": 0.2288,
"step": 340000
},
{
"epoch": 67.25,
"learning_rate": 8.275134649910234e-07,
"loss": 0.2279,
"step": 340500
},
{
"epoch": 67.35,
"learning_rate": 8.250199481348494e-07,
"loss": 0.2237,
"step": 341000
},
{
"epoch": 67.45,
"learning_rate": 8.225264312786756e-07,
"loss": 0.2217,
"step": 341500
},
{
"epoch": 67.55,
"learning_rate": 8.200329144225016e-07,
"loss": 0.2337,
"step": 342000
},
{
"epoch": 67.65,
"learning_rate": 8.1754438460004e-07,
"loss": 0.2227,
"step": 342500
},
{
"epoch": 67.75,
"learning_rate": 8.15050867743866e-07,
"loss": 0.2251,
"step": 343000
},
{
"epoch": 67.85,
"learning_rate": 8.12557350887692e-07,
"loss": 0.2152,
"step": 343500
},
{
"epoch": 67.94,
"learning_rate": 8.100638340315181e-07,
"loss": 0.2186,
"step": 344000
},
{
"epoch": 68.0,
"eval_accuracy": 0.8376669600826667,
"eval_f1": 0.48827047893776776,
"eval_loss": 0.7637174129486084,
"eval_precision": 0.43661330703285034,
"eval_recall": 0.5537913779830639,
"eval_runtime": 23.9648,
"eval_samples_per_second": 127.228,
"eval_steps_per_second": 15.94,
"step": 344284
},
{
"epoch": 68.04,
"learning_rate": 8.075753042090566e-07,
"loss": 0.2306,
"step": 344500
},
{
"epoch": 68.14,
"learning_rate": 8.05086774386595e-07,
"loss": 0.2247,
"step": 345000
},
{
"epoch": 68.24,
"learning_rate": 8.02593257530421e-07,
"loss": 0.2277,
"step": 345500
},
{
"epoch": 68.34,
"learning_rate": 8.000997406742471e-07,
"loss": 0.2197,
"step": 346000
},
{
"epoch": 68.44,
"learning_rate": 7.97606223818073e-07,
"loss": 0.2357,
"step": 346500
},
{
"epoch": 68.54,
"learning_rate": 7.951127069618991e-07,
"loss": 0.2294,
"step": 347000
},
{
"epoch": 68.64,
"learning_rate": 7.926191901057253e-07,
"loss": 0.2169,
"step": 347500
},
{
"epoch": 68.73,
"learning_rate": 7.901256732495512e-07,
"loss": 0.2302,
"step": 348000
},
{
"epoch": 68.83,
"learning_rate": 7.876321563933773e-07,
"loss": 0.2298,
"step": 348500
},
{
"epoch": 68.93,
"learning_rate": 7.851386395372033e-07,
"loss": 0.2138,
"step": 349000
},
{
"epoch": 69.0,
"eval_accuracy": 0.8371770829346703,
"eval_f1": 0.48854312800238064,
"eval_loss": 0.7689042091369629,
"eval_precision": 0.43759043484883103,
"eval_recall": 0.5529253271747498,
"eval_runtime": 25.1434,
"eval_samples_per_second": 121.265,
"eval_steps_per_second": 15.193,
"step": 349347
},
{
"epoch": 69.03,
"learning_rate": 7.826501097147417e-07,
"loss": 0.2283,
"step": 349500
},
{
"epoch": 69.13,
"learning_rate": 7.801565928585678e-07,
"loss": 0.2206,
"step": 350000
},
{
"epoch": 69.23,
"learning_rate": 7.776630760023938e-07,
"loss": 0.2242,
"step": 350500
},
{
"epoch": 69.33,
"learning_rate": 7.751745461799321e-07,
"loss": 0.2172,
"step": 351000
},
{
"epoch": 69.43,
"learning_rate": 7.726810293237583e-07,
"loss": 0.2189,
"step": 351500
},
{
"epoch": 69.52,
"learning_rate": 7.701875124675844e-07,
"loss": 0.2224,
"step": 352000
},
{
"epoch": 69.62,
"learning_rate": 7.676939956114104e-07,
"loss": 0.2122,
"step": 352500
},
{
"epoch": 69.72,
"learning_rate": 7.652004787552364e-07,
"loss": 0.2306,
"step": 353000
},
{
"epoch": 69.82,
"learning_rate": 7.627069618990624e-07,
"loss": 0.2245,
"step": 353500
},
{
"epoch": 69.92,
"learning_rate": 7.602134450428886e-07,
"loss": 0.2353,
"step": 354000
},
{
"epoch": 70.0,
"eval_accuracy": 0.8379654789697272,
"eval_f1": 0.49071425521922896,
"eval_loss": 0.7559919953346252,
"eval_precision": 0.44102524748676236,
"eval_recall": 0.5530215550423403,
"eval_runtime": 23.4919,
"eval_samples_per_second": 129.789,
"eval_steps_per_second": 16.261,
"step": 354410
},
{
"epoch": 70.02,
"learning_rate": 7.577199281867147e-07,
"loss": 0.2265,
"step": 354500
},
{
"epoch": 70.12,
"learning_rate": 7.552264113305406e-07,
"loss": 0.2252,
"step": 355000
},
{
"epoch": 70.22,
"learning_rate": 7.52737881508079e-07,
"loss": 0.2197,
"step": 355500
},
{
"epoch": 70.31,
"learning_rate": 7.502443646519052e-07,
"loss": 0.2233,
"step": 356000
},
{
"epoch": 70.41,
"learning_rate": 7.477508477957311e-07,
"loss": 0.223,
"step": 356500
},
{
"epoch": 70.51,
"learning_rate": 7.452573309395572e-07,
"loss": 0.2363,
"step": 357000
},
{
"epoch": 70.61,
"learning_rate": 7.427638140833833e-07,
"loss": 0.2181,
"step": 357500
},
{
"epoch": 70.71,
"learning_rate": 7.402702972272092e-07,
"loss": 0.2208,
"step": 358000
},
{
"epoch": 70.81,
"learning_rate": 7.377767803710354e-07,
"loss": 0.2221,
"step": 358500
},
{
"epoch": 70.91,
"learning_rate": 7.352832635148614e-07,
"loss": 0.2245,
"step": 359000
},
{
"epoch": 71.0,
"eval_accuracy": 0.839415974587623,
"eval_f1": 0.489203721089016,
"eval_loss": 0.7731532454490662,
"eval_precision": 0.4395031436896182,
"eval_recall": 0.5515781370284835,
"eval_runtime": 23.9793,
"eval_samples_per_second": 127.151,
"eval_steps_per_second": 15.93,
"step": 359473
},
{
"epoch": 71.01,
"learning_rate": 7.327947336923997e-07,
"loss": 0.2226,
"step": 359500
},
{
"epoch": 71.1,
"learning_rate": 7.303062038699383e-07,
"loss": 0.2242,
"step": 360000
},
{
"epoch": 71.2,
"learning_rate": 7.278176740474765e-07,
"loss": 0.221,
"step": 360500
},
{
"epoch": 71.3,
"learning_rate": 7.253241571913026e-07,
"loss": 0.2187,
"step": 361000
},
{
"epoch": 71.4,
"learning_rate": 7.228306403351288e-07,
"loss": 0.2279,
"step": 361500
},
{
"epoch": 71.5,
"learning_rate": 7.203371234789548e-07,
"loss": 0.2328,
"step": 362000
},
{
"epoch": 71.6,
"learning_rate": 7.178436066227808e-07,
"loss": 0.2199,
"step": 362500
},
{
"epoch": 71.7,
"learning_rate": 7.153500897666068e-07,
"loss": 0.2272,
"step": 363000
},
{
"epoch": 71.8,
"learning_rate": 7.128565729104329e-07,
"loss": 0.227,
"step": 363500
},
{
"epoch": 71.89,
"learning_rate": 7.103680430879713e-07,
"loss": 0.2213,
"step": 364000
},
{
"epoch": 71.99,
"learning_rate": 7.078745262317974e-07,
"loss": 0.2239,
"step": 364500
},
{
"epoch": 72.0,
"eval_accuracy": 0.8369130085345784,
"eval_f1": 0.48701106072805866,
"eval_loss": 0.7701214551925659,
"eval_precision": 0.43513820522529345,
"eval_recall": 0.5529253271747498,
"eval_runtime": 23.5415,
"eval_samples_per_second": 129.516,
"eval_steps_per_second": 16.227,
"step": 364536
},
{
"epoch": 72.09,
"learning_rate": 7.053810093756234e-07,
"loss": 0.2288,
"step": 365000
},
{
"epoch": 72.19,
"learning_rate": 7.028874925194495e-07,
"loss": 0.2218,
"step": 365500
},
{
"epoch": 72.29,
"learning_rate": 7.003939756632755e-07,
"loss": 0.2138,
"step": 366000
},
{
"epoch": 72.39,
"learning_rate": 6.979004588071016e-07,
"loss": 0.225,
"step": 366500
},
{
"epoch": 72.49,
"learning_rate": 6.954069419509277e-07,
"loss": 0.2252,
"step": 367000
},
{
"epoch": 72.59,
"learning_rate": 6.929134250947537e-07,
"loss": 0.2271,
"step": 367500
},
{
"epoch": 72.68,
"learning_rate": 6.904248952722921e-07,
"loss": 0.2195,
"step": 368000
},
{
"epoch": 72.78,
"learning_rate": 6.879313784161182e-07,
"loss": 0.226,
"step": 368500
},
{
"epoch": 72.88,
"learning_rate": 6.854378615599443e-07,
"loss": 0.2214,
"step": 369000
},
{
"epoch": 72.98,
"learning_rate": 6.829443447037702e-07,
"loss": 0.2129,
"step": 369500
},
{
"epoch": 73.0,
"eval_accuracy": 0.8376707872478855,
"eval_f1": 0.4907186648501363,
"eval_loss": 0.7788528203964233,
"eval_precision": 0.44005803298717167,
"eval_recall": 0.5545612009237876,
"eval_runtime": 23.966,
"eval_samples_per_second": 127.222,
"eval_steps_per_second": 15.939,
"step": 369599
},
{
"epoch": 73.08,
"learning_rate": 6.804558148813087e-07,
"loss": 0.2357,
"step": 370000
},
{
"epoch": 73.18,
"learning_rate": 6.77967285058847e-07,
"loss": 0.216,
"step": 370500
},
{
"epoch": 73.28,
"learning_rate": 6.754737682026731e-07,
"loss": 0.2102,
"step": 371000
},
{
"epoch": 73.38,
"learning_rate": 6.729802513464991e-07,
"loss": 0.2182,
"step": 371500
},
{
"epoch": 73.47,
"learning_rate": 6.704867344903252e-07,
"loss": 0.2189,
"step": 372000
},
{
"epoch": 73.57,
"learning_rate": 6.679932176341512e-07,
"loss": 0.2218,
"step": 372500
},
{
"epoch": 73.67,
"learning_rate": 6.654997007779773e-07,
"loss": 0.2262,
"step": 373000
},
{
"epoch": 73.77,
"learning_rate": 6.630061839218034e-07,
"loss": 0.2245,
"step": 373500
},
{
"epoch": 73.87,
"learning_rate": 6.605126670656293e-07,
"loss": 0.2231,
"step": 374000
},
{
"epoch": 73.97,
"learning_rate": 6.580241372431678e-07,
"loss": 0.2207,
"step": 374500
},
{
"epoch": 74.0,
"eval_accuracy": 0.838684986030847,
"eval_f1": 0.49589123607107516,
"eval_loss": 0.78349369764328,
"eval_precision": 0.44844759162711073,
"eval_recall": 0.5545612009237876,
"eval_runtime": 24.1896,
"eval_samples_per_second": 126.046,
"eval_steps_per_second": 15.792,
"step": 374662
},
{
"epoch": 74.07,
"learning_rate": 6.555306203869939e-07,
"loss": 0.2211,
"step": 375000
},
{
"epoch": 74.17,
"learning_rate": 6.530371035308198e-07,
"loss": 0.2198,
"step": 375500
},
{
"epoch": 74.26,
"learning_rate": 6.50543586674646e-07,
"loss": 0.2185,
"step": 376000
},
{
"epoch": 74.36,
"learning_rate": 6.480500698184721e-07,
"loss": 0.2228,
"step": 376500
},
{
"epoch": 74.46,
"learning_rate": 6.455615399960104e-07,
"loss": 0.2249,
"step": 377000
},
{
"epoch": 74.56,
"learning_rate": 6.430680231398364e-07,
"loss": 0.2134,
"step": 377500
},
{
"epoch": 74.66,
"learning_rate": 6.405745062836626e-07,
"loss": 0.2162,
"step": 378000
},
{
"epoch": 74.76,
"learning_rate": 6.380809894274887e-07,
"loss": 0.2149,
"step": 378500
},
{
"epoch": 74.86,
"learning_rate": 6.355874725713146e-07,
"loss": 0.214,
"step": 379000
},
{
"epoch": 74.96,
"learning_rate": 6.331039297825655e-07,
"loss": 0.2218,
"step": 379500
},
{
"epoch": 75.0,
"eval_accuracy": 0.8382793065176624,
"eval_f1": 0.4926268861454046,
"eval_loss": 0.7711018919944763,
"eval_precision": 0.44418676561533704,
"eval_recall": 0.5529253271747498,
"eval_runtime": 24.2584,
"eval_samples_per_second": 125.688,
"eval_steps_per_second": 15.747,
"step": 379725
},
{
"epoch": 75.05,
"learning_rate": 6.306104129263914e-07,
"loss": 0.2316,
"step": 380000
},
{
"epoch": 75.15,
"learning_rate": 6.281168960702175e-07,
"loss": 0.2168,
"step": 380500
},
{
"epoch": 75.25,
"learning_rate": 6.256233792140435e-07,
"loss": 0.2174,
"step": 381000
},
{
"epoch": 75.35,
"learning_rate": 6.231298623578695e-07,
"loss": 0.2229,
"step": 381500
},
{
"epoch": 75.45,
"learning_rate": 6.20641332535408e-07,
"loss": 0.2168,
"step": 382000
},
{
"epoch": 75.55,
"learning_rate": 6.181478156792341e-07,
"loss": 0.2184,
"step": 382500
},
{
"epoch": 75.65,
"learning_rate": 6.156542988230601e-07,
"loss": 0.2099,
"step": 383000
},
{
"epoch": 75.75,
"learning_rate": 6.131607819668861e-07,
"loss": 0.2153,
"step": 383500
},
{
"epoch": 75.84,
"learning_rate": 6.106722521444246e-07,
"loss": 0.2214,
"step": 384000
},
{
"epoch": 75.94,
"learning_rate": 6.081787352882506e-07,
"loss": 0.2257,
"step": 384500
},
{
"epoch": 76.0,
"eval_accuracy": 0.837069922308546,
"eval_f1": 0.4922721239885259,
"eval_loss": 0.7748467922210693,
"eval_precision": 0.4434246047049749,
"eval_recall": 0.5532140107775212,
"eval_runtime": 23.4467,
"eval_samples_per_second": 130.04,
"eval_steps_per_second": 16.292,
"step": 384788
},
{
"epoch": 76.04,
"learning_rate": 6.056852184320766e-07,
"loss": 0.2208,
"step": 385000
},
{
"epoch": 76.14,
"learning_rate": 6.031917015759027e-07,
"loss": 0.23,
"step": 385500
},
{
"epoch": 76.24,
"learning_rate": 6.006981847197288e-07,
"loss": 0.2093,
"step": 386000
},
{
"epoch": 76.34,
"learning_rate": 5.982046678635548e-07,
"loss": 0.2182,
"step": 386500
},
{
"epoch": 76.44,
"learning_rate": 5.957111510073808e-07,
"loss": 0.2226,
"step": 387000
},
{
"epoch": 76.54,
"learning_rate": 5.932176341512069e-07,
"loss": 0.2185,
"step": 387500
},
{
"epoch": 76.63,
"learning_rate": 5.90724117295033e-07,
"loss": 0.2246,
"step": 388000
},
{
"epoch": 76.73,
"learning_rate": 5.882355874725714e-07,
"loss": 0.2287,
"step": 388500
},
{
"epoch": 76.83,
"learning_rate": 5.857420706163974e-07,
"loss": 0.2206,
"step": 389000
},
{
"epoch": 76.93,
"learning_rate": 5.832535407939359e-07,
"loss": 0.231,
"step": 389500
},
{
"epoch": 77.0,
"eval_accuracy": 0.8387041218569405,
"eval_f1": 0.49053553958058616,
"eval_loss": 0.7716959118843079,
"eval_precision": 0.4395822215445605,
"eval_recall": 0.5548498845265589,
"eval_runtime": 24.1315,
"eval_samples_per_second": 126.349,
"eval_steps_per_second": 15.83,
"step": 389851
},
{
"epoch": 77.03,
"learning_rate": 5.807600239377619e-07,
"loss": 0.2233,
"step": 390000
},
{
"epoch": 77.13,
"learning_rate": 5.782665070815879e-07,
"loss": 0.2144,
"step": 390500
},
{
"epoch": 77.23,
"learning_rate": 5.757729902254139e-07,
"loss": 0.223,
"step": 391000
},
{
"epoch": 77.33,
"learning_rate": 5.7327947336924e-07,
"loss": 0.2235,
"step": 391500
},
{
"epoch": 77.42,
"learning_rate": 5.707859565130661e-07,
"loss": 0.2127,
"step": 392000
},
{
"epoch": 77.52,
"learning_rate": 5.682924396568922e-07,
"loss": 0.2161,
"step": 392500
},
{
"epoch": 77.62,
"learning_rate": 5.658039098344305e-07,
"loss": 0.2258,
"step": 393000
},
{
"epoch": 77.72,
"learning_rate": 5.633103929782565e-07,
"loss": 0.2197,
"step": 393500
},
{
"epoch": 77.82,
"learning_rate": 5.608168761220827e-07,
"loss": 0.2291,
"step": 394000
},
{
"epoch": 77.92,
"learning_rate": 5.583233592659087e-07,
"loss": 0.2187,
"step": 394500
},
{
"epoch": 78.0,
"eval_accuracy": 0.8378353553522906,
"eval_f1": 0.49114893617021277,
"eval_loss": 0.7635823488235474,
"eval_precision": 0.44026548672566373,
"eval_recall": 0.5553310238645112,
"eval_runtime": 24.4109,
"eval_samples_per_second": 124.903,
"eval_steps_per_second": 15.649,
"step": 394914
},
{
"epoch": 78.02,
"learning_rate": 5.558298424097348e-07,
"loss": 0.2156,
"step": 395000
},
{
"epoch": 78.12,
"learning_rate": 5.533363255535608e-07,
"loss": 0.2141,
"step": 395500
},
{
"epoch": 78.21,
"learning_rate": 5.508428086973868e-07,
"loss": 0.216,
"step": 396000
},
{
"epoch": 78.31,
"learning_rate": 5.483492918412129e-07,
"loss": 0.2186,
"step": 396500
},
{
"epoch": 78.41,
"learning_rate": 5.458557749850389e-07,
"loss": 0.2144,
"step": 397000
},
{
"epoch": 78.51,
"learning_rate": 5.43362258128865e-07,
"loss": 0.221,
"step": 397500
},
{
"epoch": 78.61,
"learning_rate": 5.408737283064034e-07,
"loss": 0.226,
"step": 398000
},
{
"epoch": 78.71,
"learning_rate": 5.383802114502295e-07,
"loss": 0.2218,
"step": 398500
},
{
"epoch": 78.81,
"learning_rate": 5.358866945940555e-07,
"loss": 0.2271,
"step": 399000
},
{
"epoch": 78.91,
"learning_rate": 5.333931777378815e-07,
"loss": 0.2165,
"step": 399500
},
{
"epoch": 79.0,
"eval_accuracy": 0.8372153545868575,
"eval_f1": 0.4910901243536601,
"eval_loss": 0.7801523804664612,
"eval_precision": 0.4416942116995926,
"eval_recall": 0.5529253271747498,
"eval_runtime": 24.1131,
"eval_samples_per_second": 126.446,
"eval_steps_per_second": 15.842,
"step": 399977
},
{
"epoch": 79.0,
"learning_rate": 5.309046479154199e-07,
"loss": 0.2287,
"step": 400000
},
{
"epoch": 79.1,
"learning_rate": 5.284111310592461e-07,
"loss": 0.2196,
"step": 400500
},
{
"epoch": 79.2,
"learning_rate": 5.259176142030721e-07,
"loss": 0.2266,
"step": 401000
},
{
"epoch": 79.3,
"learning_rate": 5.234290843806104e-07,
"loss": 0.2199,
"step": 401500
},
{
"epoch": 79.4,
"learning_rate": 5.209355675244365e-07,
"loss": 0.2189,
"step": 402000
},
{
"epoch": 79.5,
"learning_rate": 5.184420506682626e-07,
"loss": 0.2273,
"step": 402500
},
{
"epoch": 79.6,
"learning_rate": 5.159535208458009e-07,
"loss": 0.2125,
"step": 403000
},
{
"epoch": 79.7,
"learning_rate": 5.13460003989627e-07,
"loss": 0.2241,
"step": 403500
},
{
"epoch": 79.79,
"learning_rate": 5.10966487133453e-07,
"loss": 0.2177,
"step": 404000
},
{
"epoch": 79.89,
"learning_rate": 5.084729702772792e-07,
"loss": 0.2159,
"step": 404500
},
{
"epoch": 79.99,
"learning_rate": 5.059794534211052e-07,
"loss": 0.2173,
"step": 405000
},
{
"epoch": 80.0,
"eval_accuracy": 0.8373301695434192,
"eval_f1": 0.49182712879760376,
"eval_loss": 0.7780812978744507,
"eval_precision": 0.44282632146709816,
"eval_recall": 0.5530215550423403,
"eval_runtime": 23.2524,
"eval_samples_per_second": 131.126,
"eval_steps_per_second": 16.428,
"step": 405040
},
{
"epoch": 80.09,
"learning_rate": 5.034859365649312e-07,
"loss": 0.2145,
"step": 405500
},
{
"epoch": 80.19,
"learning_rate": 5.009924197087572e-07,
"loss": 0.2144,
"step": 406000
},
{
"epoch": 80.29,
"learning_rate": 4.984989028525834e-07,
"loss": 0.2121,
"step": 406500
},
{
"epoch": 80.39,
"learning_rate": 4.960053859964094e-07,
"loss": 0.2188,
"step": 407000
},
{
"epoch": 80.49,
"learning_rate": 4.935118691402354e-07,
"loss": 0.2163,
"step": 407500
},
{
"epoch": 80.58,
"learning_rate": 4.910183522840615e-07,
"loss": 0.2056,
"step": 408000
},
{
"epoch": 80.68,
"learning_rate": 4.885248354278875e-07,
"loss": 0.2233,
"step": 408500
},
{
"epoch": 80.78,
"learning_rate": 4.860363056054259e-07,
"loss": 0.2178,
"step": 409000
},
{
"epoch": 80.88,
"learning_rate": 4.83542788749252e-07,
"loss": 0.2088,
"step": 409500
},
{
"epoch": 80.98,
"learning_rate": 4.81049271893078e-07,
"loss": 0.2251,
"step": 410000
},
{
"epoch": 81.0,
"eval_accuracy": 0.8379425159784147,
"eval_f1": 0.49120115897567007,
"eval_loss": 0.7737105488777161,
"eval_precision": 0.4407738778007188,
"eval_recall": 0.554657428791378,
"eval_runtime": 23.4259,
"eval_samples_per_second": 130.155,
"eval_steps_per_second": 16.307,
"step": 410103
},
{
"epoch": 81.08,
"learning_rate": 4.785557550369041e-07,
"loss": 0.2209,
"step": 410500
},
{
"epoch": 81.18,
"learning_rate": 4.760622381807301e-07,
"loss": 0.2155,
"step": 411000
},
{
"epoch": 81.28,
"learning_rate": 4.7356872132455623e-07,
"loss": 0.2093,
"step": 411500
},
{
"epoch": 81.37,
"learning_rate": 4.710801915020946e-07,
"loss": 0.2196,
"step": 412000
},
{
"epoch": 81.47,
"learning_rate": 4.6858667464592064e-07,
"loss": 0.2227,
"step": 412500
},
{
"epoch": 81.57,
"learning_rate": 4.660931577897467e-07,
"loss": 0.2134,
"step": 413000
},
{
"epoch": 81.67,
"learning_rate": 4.6359964093357273e-07,
"loss": 0.2152,
"step": 413500
},
{
"epoch": 81.77,
"learning_rate": 4.611061240773988e-07,
"loss": 0.2141,
"step": 414000
},
{
"epoch": 81.87,
"learning_rate": 4.5861759425493724e-07,
"loss": 0.2238,
"step": 414500
},
{
"epoch": 81.97,
"learning_rate": 4.5612407739876326e-07,
"loss": 0.2195,
"step": 415000
},
{
"epoch": 82.0,
"eval_accuracy": 0.8383596769872556,
"eval_f1": 0.4898150515639649,
"eval_loss": 0.7843115925788879,
"eval_precision": 0.43957472846871654,
"eval_recall": 0.5530215550423403,
"eval_runtime": 24.3105,
"eval_samples_per_second": 125.419,
"eval_steps_per_second": 15.713,
"step": 415166
},
{
"epoch": 82.07,
"learning_rate": 4.5363056054258933e-07,
"loss": 0.2136,
"step": 415500
},
{
"epoch": 82.16,
"learning_rate": 4.5113704368641535e-07,
"loss": 0.2171,
"step": 416000
},
{
"epoch": 82.26,
"learning_rate": 4.486435268302414e-07,
"loss": 0.2173,
"step": 416500
},
{
"epoch": 82.36,
"learning_rate": 4.4615000997406744e-07,
"loss": 0.2093,
"step": 417000
},
{
"epoch": 82.46,
"learning_rate": 4.4366148015160583e-07,
"loss": 0.2203,
"step": 417500
},
{
"epoch": 82.56,
"learning_rate": 4.4116796329543196e-07,
"loss": 0.2237,
"step": 418000
},
{
"epoch": 82.66,
"learning_rate": 4.38674446439258e-07,
"loss": 0.2147,
"step": 418500
},
{
"epoch": 82.76,
"learning_rate": 4.36180929583084e-07,
"loss": 0.2145,
"step": 419000
},
{
"epoch": 82.86,
"learning_rate": 4.3368741272691007e-07,
"loss": 0.2124,
"step": 419500
},
{
"epoch": 82.95,
"learning_rate": 4.3119888290444846e-07,
"loss": 0.2208,
"step": 420000
},
{
"epoch": 83.0,
"eval_accuracy": 0.8378506640131654,
"eval_f1": 0.4901736066895878,
"eval_loss": 0.7780388593673706,
"eval_precision": 0.4385205437837017,
"eval_recall": 0.5556197074672825,
"eval_runtime": 24.1182,
"eval_samples_per_second": 126.419,
"eval_steps_per_second": 15.839,
"step": 420229
},
{
"epoch": 83.05,
"learning_rate": 4.287053660482745e-07,
"loss": 0.2155,
"step": 420500
},
{
"epoch": 83.15,
"learning_rate": 4.262118491921006e-07,
"loss": 0.2045,
"step": 421000
},
{
"epoch": 83.25,
"learning_rate": 4.237183323359266e-07,
"loss": 0.2212,
"step": 421500
},
{
"epoch": 83.35,
"learning_rate": 4.21229802513465e-07,
"loss": 0.2137,
"step": 422000
},
{
"epoch": 83.45,
"learning_rate": 4.187362856572911e-07,
"loss": 0.2226,
"step": 422500
},
{
"epoch": 83.55,
"learning_rate": 4.162427688011171e-07,
"loss": 0.2136,
"step": 423000
},
{
"epoch": 83.65,
"learning_rate": 4.1374925194494317e-07,
"loss": 0.2184,
"step": 423500
},
{
"epoch": 83.74,
"learning_rate": 4.1125573508876924e-07,
"loss": 0.2121,
"step": 424000
},
{
"epoch": 83.84,
"learning_rate": 4.0876720526630763e-07,
"loss": 0.2127,
"step": 424500
},
{
"epoch": 83.94,
"learning_rate": 4.062736884101337e-07,
"loss": 0.2144,
"step": 425000
},
{
"epoch": 84.0,
"eval_accuracy": 0.8380764667610701,
"eval_f1": 0.49133415662394075,
"eval_loss": 0.7888113856315613,
"eval_precision": 0.44068443969139104,
"eval_recall": 0.5551385681293303,
"eval_runtime": 24.19,
"eval_samples_per_second": 126.044,
"eval_steps_per_second": 15.792,
"step": 425292
},
{
"epoch": 84.04,
"learning_rate": 4.037801715539597e-07,
"loss": 0.2187,
"step": 425500
},
{
"epoch": 84.14,
"learning_rate": 4.012866546977858e-07,
"loss": 0.2165,
"step": 426000
},
{
"epoch": 84.24,
"learning_rate": 3.987931378416118e-07,
"loss": 0.2147,
"step": 426500
},
{
"epoch": 84.34,
"learning_rate": 3.9629962098543794e-07,
"loss": 0.2092,
"step": 427000
},
{
"epoch": 84.44,
"learning_rate": 3.9381109116297633e-07,
"loss": 0.2179,
"step": 427500
},
{
"epoch": 84.53,
"learning_rate": 3.9131757430680235e-07,
"loss": 0.2123,
"step": 428000
},
{
"epoch": 84.63,
"learning_rate": 3.888240574506284e-07,
"loss": 0.2127,
"step": 428500
},
{
"epoch": 84.73,
"learning_rate": 3.8633054059445444e-07,
"loss": 0.2192,
"step": 429000
},
{
"epoch": 84.83,
"learning_rate": 3.8383702373828046e-07,
"loss": 0.2166,
"step": 429500
},
{
"epoch": 84.93,
"learning_rate": 3.813435068821066e-07,
"loss": 0.2123,
"step": 430000
},
{
"epoch": 85.0,
"eval_accuracy": 0.8388112824830648,
"eval_f1": 0.4910889400528694,
"eval_loss": 0.7885710000991821,
"eval_precision": 0.4408972592252335,
"eval_recall": 0.5541762894534257,
"eval_runtime": 24.1837,
"eval_samples_per_second": 126.077,
"eval_steps_per_second": 15.796,
"step": 430355
},
{
"epoch": 85.03,
"learning_rate": 3.788499900259326e-07,
"loss": 0.2197,
"step": 430500
},
{
"epoch": 85.13,
"learning_rate": 3.76361460203471e-07,
"loss": 0.2193,
"step": 431000
},
{
"epoch": 85.23,
"learning_rate": 3.7386794334729706e-07,
"loss": 0.213,
"step": 431500
},
{
"epoch": 85.32,
"learning_rate": 3.713744264911231e-07,
"loss": 0.216,
"step": 432000
},
{
"epoch": 85.42,
"learning_rate": 3.6888090963494915e-07,
"loss": 0.2137,
"step": 432500
},
{
"epoch": 85.52,
"learning_rate": 3.6639237981248754e-07,
"loss": 0.2204,
"step": 433000
},
{
"epoch": 85.62,
"learning_rate": 3.638988629563136e-07,
"loss": 0.2121,
"step": 433500
},
{
"epoch": 85.72,
"learning_rate": 3.61410333133852e-07,
"loss": 0.2184,
"step": 434000
},
{
"epoch": 85.82,
"learning_rate": 3.589168162776781e-07,
"loss": 0.2083,
"step": 434500
},
{
"epoch": 85.92,
"learning_rate": 3.564232994215041e-07,
"loss": 0.2195,
"step": 435000
},
{
"epoch": 86.0,
"eval_accuracy": 0.8386696773699721,
"eval_f1": 0.49464025383757826,
"eval_loss": 0.7830442786216736,
"eval_precision": 0.4460943542150039,
"eval_recall": 0.5550423402617398,
"eval_runtime": 23.4037,
"eval_samples_per_second": 130.278,
"eval_steps_per_second": 16.322,
"step": 435418
},
{
"epoch": 86.02,
"learning_rate": 3.5392978256533017e-07,
"loss": 0.2097,
"step": 435500
},
{
"epoch": 86.11,
"learning_rate": 3.514362657091562e-07,
"loss": 0.2071,
"step": 436000
},
{
"epoch": 86.21,
"learning_rate": 3.489477358866946e-07,
"loss": 0.2152,
"step": 436500
},
{
"epoch": 86.31,
"learning_rate": 3.464542190305207e-07,
"loss": 0.2152,
"step": 437000
},
{
"epoch": 86.41,
"learning_rate": 3.439607021743467e-07,
"loss": 0.218,
"step": 437500
},
{
"epoch": 86.51,
"learning_rate": 3.414671853181728e-07,
"loss": 0.2185,
"step": 438000
},
{
"epoch": 86.61,
"learning_rate": 3.389736684619988e-07,
"loss": 0.2211,
"step": 438500
},
{
"epoch": 86.71,
"learning_rate": 3.3648015160582493e-07,
"loss": 0.2207,
"step": 439000
},
{
"epoch": 86.81,
"learning_rate": 3.3398663474965095e-07,
"loss": 0.2272,
"step": 439500
},
{
"epoch": 86.9,
"learning_rate": 3.31493117893477e-07,
"loss": 0.2105,
"step": 440000
},
{
"epoch": 87.0,
"eval_accuracy": 0.8389414061005014,
"eval_f1": 0.492742453436095,
"eval_loss": 0.7846280932426453,
"eval_precision": 0.44387873177505205,
"eval_recall": 0.5536951501154734,
"eval_runtime": 23.9106,
"eval_samples_per_second": 127.517,
"eval_steps_per_second": 15.976,
"step": 440481
},
{
"epoch": 87.0,
"learning_rate": 3.2899960103730304e-07,
"loss": 0.2167,
"step": 440500
},
{
"epoch": 87.1,
"learning_rate": 3.2651107121484143e-07,
"loss": 0.2137,
"step": 441000
},
{
"epoch": 87.2,
"learning_rate": 3.240175543586675e-07,
"loss": 0.2162,
"step": 441500
},
{
"epoch": 87.3,
"learning_rate": 3.215240375024935e-07,
"loss": 0.2167,
"step": 442000
},
{
"epoch": 87.4,
"learning_rate": 3.190355076800319e-07,
"loss": 0.2144,
"step": 442500
},
{
"epoch": 87.5,
"learning_rate": 3.16541990823858e-07,
"loss": 0.2148,
"step": 443000
},
{
"epoch": 87.6,
"learning_rate": 3.1404847396768406e-07,
"loss": 0.218,
"step": 443500
},
{
"epoch": 87.7,
"learning_rate": 3.1155495711151013e-07,
"loss": 0.223,
"step": 444000
},
{
"epoch": 87.79,
"learning_rate": 3.0906144025533615e-07,
"loss": 0.2204,
"step": 444500
},
{
"epoch": 87.89,
"learning_rate": 3.065679233991622e-07,
"loss": 0.2136,
"step": 445000
},
{
"epoch": 87.99,
"learning_rate": 3.040744065429883e-07,
"loss": 0.2203,
"step": 445500
},
{
"epoch": 88.0,
"eval_accuracy": 0.8378238738566344,
"eval_f1": 0.4915982473305824,
"eval_loss": 0.7851070761680603,
"eval_precision": 0.4405642394205109,
"eval_recall": 0.5560046189376443,
"eval_runtime": 23.4737,
"eval_samples_per_second": 129.89,
"eval_steps_per_second": 16.274,
"step": 445544
},
{
"epoch": 88.09,
"learning_rate": 3.015808896868143e-07,
"loss": 0.2159,
"step": 446000
},
{
"epoch": 88.19,
"learning_rate": 2.990923598643527e-07,
"loss": 0.2045,
"step": 446500
},
{
"epoch": 88.29,
"learning_rate": 2.9659884300817877e-07,
"loss": 0.2064,
"step": 447000
},
{
"epoch": 88.39,
"learning_rate": 2.9410532615200484e-07,
"loss": 0.2214,
"step": 447500
},
{
"epoch": 88.49,
"learning_rate": 2.9161180929583086e-07,
"loss": 0.2136,
"step": 448000
},
{
"epoch": 88.58,
"learning_rate": 2.891182924396569e-07,
"loss": 0.2092,
"step": 448500
},
{
"epoch": 88.68,
"learning_rate": 2.8662477558348295e-07,
"loss": 0.2104,
"step": 449000
},
{
"epoch": 88.78,
"learning_rate": 2.84131258727309e-07,
"loss": 0.2084,
"step": 449500
},
{
"epoch": 88.88,
"learning_rate": 2.816427289048474e-07,
"loss": 0.2156,
"step": 450000
},
{
"epoch": 88.98,
"learning_rate": 2.791492120486735e-07,
"loss": 0.2079,
"step": 450500
},
{
"epoch": 89.0,
"eval_accuracy": 0.8380956025871636,
"eval_f1": 0.49355966902669957,
"eval_loss": 0.7890189290046692,
"eval_precision": 0.443235789796231,
"eval_recall": 0.556774441878368,
"eval_runtime": 23.2357,
"eval_samples_per_second": 131.22,
"eval_steps_per_second": 16.44,
"step": 450607
},
{
"epoch": 89.08,
"learning_rate": 2.766556951924995e-07,
"loss": 0.2051,
"step": 451000
},
{
"epoch": 89.18,
"learning_rate": 2.741621783363256e-07,
"loss": 0.2181,
"step": 451500
},
{
"epoch": 89.28,
"learning_rate": 2.7166866148015165e-07,
"loss": 0.2126,
"step": 452000
},
{
"epoch": 89.37,
"learning_rate": 2.6918013165769004e-07,
"loss": 0.2175,
"step": 452500
},
{
"epoch": 89.47,
"learning_rate": 2.6668661480151606e-07,
"loss": 0.2105,
"step": 453000
},
{
"epoch": 89.57,
"learning_rate": 2.6419309794534213e-07,
"loss": 0.207,
"step": 453500
},
{
"epoch": 89.67,
"learning_rate": 2.616995810891682e-07,
"loss": 0.2101,
"step": 454000
},
{
"epoch": 89.77,
"learning_rate": 2.592110512667066e-07,
"loss": 0.2084,
"step": 454500
},
{
"epoch": 89.87,
"learning_rate": 2.5671753441053266e-07,
"loss": 0.208,
"step": 455000
},
{
"epoch": 89.97,
"learning_rate": 2.542240175543587e-07,
"loss": 0.2109,
"step": 455500
},
{
"epoch": 90.0,
"eval_accuracy": 0.8384974549351295,
"eval_f1": 0.49219216656711323,
"eval_loss": 0.7881253957748413,
"eval_precision": 0.44212785528131227,
"eval_recall": 0.5550423402617398,
"eval_runtime": 23.2564,
"eval_samples_per_second": 131.104,
"eval_steps_per_second": 16.426,
"step": 455670
}
],
"max_steps": 506300,
"num_train_epochs": 100,
"total_flos": 6.575301079423942e+17,
"trial_name": null,
"trial_params": null
}