{ "best_metric": 0.591158390045166, "best_model_checkpoint": "/home/ubuntu/lf_output_V12_locked_warmup/checkpoint-15189", "epoch": 90.0, "global_step": 455670, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 2.48e-07, "loss": 0.3185, "step": 500 }, { "epoch": 0.2, "learning_rate": 4.98e-07, "loss": 0.3178, "step": 1000 }, { "epoch": 0.3, "learning_rate": 7.480000000000001e-07, "loss": 0.3206, "step": 1500 }, { "epoch": 0.4, "learning_rate": 9.98e-07, "loss": 0.3145, "step": 2000 }, { "epoch": 0.49, "learning_rate": 1.2475000000000001e-06, "loss": 0.3127, "step": 2500 }, { "epoch": 0.59, "learning_rate": 1.4975e-06, "loss": 0.3226, "step": 3000 }, { "epoch": 0.69, "learning_rate": 1.7470000000000002e-06, "loss": 0.3161, "step": 3500 }, { "epoch": 0.79, "learning_rate": 1.997e-06, "loss": 0.3184, "step": 4000 }, { "epoch": 0.89, "learning_rate": 2.2470000000000003e-06, "loss": 0.3248, "step": 4500 }, { "epoch": 0.99, "learning_rate": 2.4970000000000004e-06, "loss": 0.3276, "step": 5000 }, { "epoch": 1.0, "eval_accuracy": 0.8403498029009913, "eval_f1": 0.4677493043153217, "eval_loss": 0.5944358706474304, "eval_precision": 0.4114724150529777, "eval_recall": 0.5418591224018475, "eval_runtime": 23.8244, "eval_samples_per_second": 127.978, "eval_steps_per_second": 16.034, "step": 5063 }, { "epoch": 1.09, "learning_rate": 2.4975364053461002e-06, "loss": 0.3306, "step": 5500 }, { "epoch": 1.19, "learning_rate": 2.4950428884899264e-06, "loss": 0.3286, "step": 6000 }, { "epoch": 1.28, "learning_rate": 2.4925493716337525e-06, "loss": 0.3257, "step": 6500 }, { "epoch": 1.38, "learning_rate": 2.4900558547775786e-06, "loss": 0.3242, "step": 7000 }, { "epoch": 1.48, "learning_rate": 2.4875623379214043e-06, "loss": 0.3223, "step": 7500 }, { "epoch": 1.58, "learning_rate": 2.4850738080989428e-06, "loss": 0.3276, "step": 8000 }, { "epoch": 1.68, "learning_rate": 2.482580291242769e-06, "loss": 0.3321, "step": 8500 }, { "epoch": 1.78, "learning_rate": 2.480086774386595e-06, "loss": 0.3232, "step": 9000 }, { "epoch": 1.88, "learning_rate": 2.477593257530421e-06, "loss": 0.3214, "step": 9500 }, { "epoch": 1.98, "learning_rate": 2.4751047277079596e-06, "loss": 0.3293, "step": 10000 }, { "epoch": 2.0, "eval_accuracy": 0.8365149833518313, "eval_f1": 0.4657225216192505, "eval_loss": 0.6071960926055908, "eval_precision": 0.4115171650055371, "eval_recall": 0.5363741339491916, "eval_runtime": 24.2984, "eval_samples_per_second": 125.482, "eval_steps_per_second": 15.721, "step": 10126 }, { "epoch": 2.07, "learning_rate": 2.472616197885498e-06, "loss": 0.3365, "step": 10500 }, { "epoch": 2.17, "learning_rate": 2.4701226810293237e-06, "loss": 0.3266, "step": 11000 }, { "epoch": 2.27, "learning_rate": 2.4676291641731503e-06, "loss": 0.3354, "step": 11500 }, { "epoch": 2.37, "learning_rate": 2.465135647316976e-06, "loss": 0.3345, "step": 12000 }, { "epoch": 2.47, "learning_rate": 2.462642130460802e-06, "loss": 0.3273, "step": 12500 }, { "epoch": 2.57, "learning_rate": 2.4601536006383405e-06, "loss": 0.328, "step": 13000 }, { "epoch": 2.67, "learning_rate": 2.4576600837821667e-06, "loss": 0.3278, "step": 13500 }, { "epoch": 2.77, "learning_rate": 2.455166566925993e-06, "loss": 0.316, "step": 14000 }, { "epoch": 2.86, "learning_rate": 2.4526730500698185e-06, "loss": 0.3277, "step": 14500 }, { "epoch": 2.96, "learning_rate": 2.4501795332136446e-06, "loss": 0.3325, "step": 15000 }, { "epoch": 3.0, "eval_accuracy": 0.8391633816831873, "eval_f1": 0.4654475777898113, "eval_loss": 0.591158390045166, "eval_precision": 0.41035622475211164, "eval_recall": 0.5376250962278676, "eval_runtime": 24.0176, "eval_samples_per_second": 126.949, "eval_steps_per_second": 15.905, "step": 15189 }, { "epoch": 3.06, "learning_rate": 2.4476860163574707e-06, "loss": 0.3239, "step": 15500 }, { "epoch": 3.16, "learning_rate": 2.445192499501297e-06, "loss": 0.3314, "step": 16000 }, { "epoch": 3.26, "learning_rate": 2.4427039696788353e-06, "loss": 0.3241, "step": 16500 }, { "epoch": 3.36, "learning_rate": 2.4402104528226614e-06, "loss": 0.3135, "step": 17000 }, { "epoch": 3.46, "learning_rate": 2.437716935966487e-06, "loss": 0.3267, "step": 17500 }, { "epoch": 3.56, "learning_rate": 2.4352234191103133e-06, "loss": 0.3215, "step": 18000 }, { "epoch": 3.65, "learning_rate": 2.4327299022541394e-06, "loss": 0.315, "step": 18500 }, { "epoch": 3.75, "learning_rate": 2.4302363853979655e-06, "loss": 0.323, "step": 19000 }, { "epoch": 3.85, "learning_rate": 2.427742868541791e-06, "loss": 0.3224, "step": 19500 }, { "epoch": 3.95, "learning_rate": 2.4252493516856178e-06, "loss": 0.3183, "step": 20000 }, { "epoch": 4.0, "eval_accuracy": 0.8383711584829117, "eval_f1": 0.4724905501889962, "eval_loss": 0.5957902669906616, "eval_precision": 0.4192129974660903, "eval_recall": 0.5412817551963048, "eval_runtime": 23.7397, "eval_samples_per_second": 128.435, "eval_steps_per_second": 16.091, "step": 20252 }, { "epoch": 4.05, "learning_rate": 2.4227658088968685e-06, "loss": 0.3154, "step": 20500 }, { "epoch": 4.15, "learning_rate": 2.4202722920406942e-06, "loss": 0.319, "step": 21000 }, { "epoch": 4.25, "learning_rate": 2.4177787751845203e-06, "loss": 0.3196, "step": 21500 }, { "epoch": 4.35, "learning_rate": 2.4152852583283465e-06, "loss": 0.3197, "step": 22000 }, { "epoch": 4.44, "learning_rate": 2.4127917414721726e-06, "loss": 0.3256, "step": 22500 }, { "epoch": 4.54, "learning_rate": 2.4102982246159987e-06, "loss": 0.3183, "step": 23000 }, { "epoch": 4.64, "learning_rate": 2.407809694793537e-06, "loss": 0.3209, "step": 23500 }, { "epoch": 4.74, "learning_rate": 2.405316177937363e-06, "loss": 0.3317, "step": 24000 }, { "epoch": 4.84, "learning_rate": 2.402822661081189e-06, "loss": 0.3144, "step": 24500 }, { "epoch": 4.94, "learning_rate": 2.400329144225015e-06, "loss": 0.312, "step": 25000 }, { "epoch": 5.0, "eval_accuracy": 0.8374870833173869, "eval_f1": 0.4686698121016472, "eval_loss": 0.6015240550041199, "eval_precision": 0.4111837327523602, "eval_recall": 0.5448421862971516, "eval_runtime": 24.0516, "eval_samples_per_second": 126.769, "eval_steps_per_second": 15.883, "step": 25315 }, { "epoch": 5.04, "learning_rate": 2.3978356273688412e-06, "loss": 0.323, "step": 25500 }, { "epoch": 5.14, "learning_rate": 2.3953421105126674e-06, "loss": 0.325, "step": 26000 }, { "epoch": 5.23, "learning_rate": 2.392848593656493e-06, "loss": 0.3243, "step": 26500 }, { "epoch": 5.33, "learning_rate": 2.390360063834032e-06, "loss": 0.3281, "step": 27000 }, { "epoch": 5.43, "learning_rate": 2.3878665469778576e-06, "loss": 0.3126, "step": 27500 }, { "epoch": 5.53, "learning_rate": 2.3853730301216838e-06, "loss": 0.3271, "step": 28000 }, { "epoch": 5.63, "learning_rate": 2.38287951326551e-06, "loss": 0.3153, "step": 28500 }, { "epoch": 5.73, "learning_rate": 2.380385996409336e-06, "loss": 0.3243, "step": 29000 }, { "epoch": 5.83, "learning_rate": 2.3778924795531617e-06, "loss": 0.3068, "step": 29500 }, { "epoch": 5.93, "learning_rate": 2.3753989626969883e-06, "loss": 0.3108, "step": 30000 }, { "epoch": 6.0, "eval_accuracy": 0.8403842473879597, "eval_f1": 0.47207586933614337, "eval_loss": 0.60483717918396, "eval_precision": 0.4200105002625066, "eval_recall": 0.5388760585065435, "eval_runtime": 24.2316, "eval_samples_per_second": 125.828, "eval_steps_per_second": 15.765, "step": 30378 }, { "epoch": 6.02, "learning_rate": 2.372905445840814e-06, "loss": 0.3109, "step": 30500 }, { "epoch": 6.12, "learning_rate": 2.3704169160183524e-06, "loss": 0.308, "step": 31000 }, { "epoch": 6.22, "learning_rate": 2.367928386195891e-06, "loss": 0.3135, "step": 31500 }, { "epoch": 6.32, "learning_rate": 2.365434869339717e-06, "loss": 0.3175, "step": 32000 }, { "epoch": 6.42, "learning_rate": 2.362941352483543e-06, "loss": 0.3083, "step": 32500 }, { "epoch": 6.52, "learning_rate": 2.3604478356273692e-06, "loss": 0.3176, "step": 33000 }, { "epoch": 6.62, "learning_rate": 2.3579593058049072e-06, "loss": 0.3176, "step": 33500 }, { "epoch": 6.72, "learning_rate": 2.3554657889487334e-06, "loss": 0.3123, "step": 34000 }, { "epoch": 6.81, "learning_rate": 2.3529722720925595e-06, "loss": 0.317, "step": 34500 }, { "epoch": 6.91, "learning_rate": 2.3504787552363856e-06, "loss": 0.3041, "step": 35000 }, { "epoch": 7.0, "eval_accuracy": 0.8368594282215164, "eval_f1": 0.4661911316898177, "eval_loss": 0.617274284362793, "eval_precision": 0.4114571828289522, "eval_recall": 0.537721324095458, "eval_runtime": 23.4998, "eval_samples_per_second": 129.746, "eval_steps_per_second": 16.255, "step": 35441 }, { "epoch": 7.01, "learning_rate": 2.3479852383802117e-06, "loss": 0.3207, "step": 35500 }, { "epoch": 7.11, "learning_rate": 2.345491721524038e-06, "loss": 0.3163, "step": 36000 }, { "epoch": 7.21, "learning_rate": 2.3429982046678636e-06, "loss": 0.3053, "step": 36500 }, { "epoch": 7.31, "learning_rate": 2.34050468781169e-06, "loss": 0.3114, "step": 37000 }, { "epoch": 7.41, "learning_rate": 2.338011170955516e-06, "loss": 0.3071, "step": 37500 }, { "epoch": 7.51, "learning_rate": 2.335517654099342e-06, "loss": 0.3131, "step": 38000 }, { "epoch": 7.6, "learning_rate": 2.333024137243168e-06, "loss": 0.3149, "step": 38500 }, { "epoch": 7.7, "learning_rate": 2.330530620386994e-06, "loss": 0.3185, "step": 39000 }, { "epoch": 7.8, "learning_rate": 2.32803710353082e-06, "loss": 0.3135, "step": 39500 }, { "epoch": 7.9, "learning_rate": 2.3255535607420706e-06, "loss": 0.3119, "step": 40000 }, { "epoch": 8.0, "learning_rate": 2.3230600438858968e-06, "loss": 0.3154, "step": 40500 }, { "epoch": 8.0, "eval_accuracy": 0.834505721612002, "eval_f1": 0.46665559338925333, "eval_loss": 0.6168600916862488, "eval_precision": 0.41044558071585097, "eval_recall": 0.5407043879907621, "eval_runtime": 24.2307, "eval_samples_per_second": 125.832, "eval_steps_per_second": 15.765, "step": 40504 }, { "epoch": 8.1, "learning_rate": 2.320566527029723e-06, "loss": 0.3061, "step": 41000 }, { "epoch": 8.2, "learning_rate": 2.318073010173549e-06, "loss": 0.3099, "step": 41500 }, { "epoch": 8.3, "learning_rate": 2.315579493317375e-06, "loss": 0.3216, "step": 42000 }, { "epoch": 8.39, "learning_rate": 2.313085976461201e-06, "loss": 0.3064, "step": 42500 }, { "epoch": 8.49, "learning_rate": 2.3105924596050274e-06, "loss": 0.3075, "step": 43000 }, { "epoch": 8.59, "learning_rate": 2.308098942748853e-06, "loss": 0.3126, "step": 43500 }, { "epoch": 8.69, "learning_rate": 2.3056104129263915e-06, "loss": 0.3141, "step": 44000 }, { "epoch": 8.79, "learning_rate": 2.3031168960702177e-06, "loss": 0.3132, "step": 44500 }, { "epoch": 8.89, "learning_rate": 2.300623379214044e-06, "loss": 0.3156, "step": 45000 }, { "epoch": 8.99, "learning_rate": 2.298134849391582e-06, "loss": 0.313, "step": 45500 }, { "epoch": 9.0, "eval_accuracy": 0.8388112824830648, "eval_f1": 0.467983144674874, "eval_loss": 0.6226583123207092, "eval_precision": 0.41001882148544955, "eval_recall": 0.5450346420323325, "eval_runtime": 23.4782, "eval_samples_per_second": 129.865, "eval_steps_per_second": 16.27, "step": 45567 }, { "epoch": 9.09, "learning_rate": 2.2956413325354084e-06, "loss": 0.3038, "step": 46000 }, { "epoch": 9.18, "learning_rate": 2.293147815679234e-06, "loss": 0.3101, "step": 46500 }, { "epoch": 9.28, "learning_rate": 2.29065429882306e-06, "loss": 0.3245, "step": 47000 }, { "epoch": 9.38, "learning_rate": 2.2881607819668863e-06, "loss": 0.3215, "step": 47500 }, { "epoch": 9.48, "learning_rate": 2.2856672651107124e-06, "loss": 0.316, "step": 48000 }, { "epoch": 9.58, "learning_rate": 2.2831737482545386e-06, "loss": 0.3109, "step": 48500 }, { "epoch": 9.68, "learning_rate": 2.2806802313983647e-06, "loss": 0.3119, "step": 49000 }, { "epoch": 9.78, "learning_rate": 2.2781867145421904e-06, "loss": 0.3092, "step": 49500 }, { "epoch": 9.88, "learning_rate": 2.275703171753441e-06, "loss": 0.3061, "step": 50000 }, { "epoch": 9.97, "learning_rate": 2.2732096548972673e-06, "loss": 0.3205, "step": 50500 }, { "epoch": 10.0, "eval_accuracy": 0.8380420222741016, "eval_f1": 0.4691952972456483, "eval_loss": 0.607540488243103, "eval_precision": 0.4128225747496162, "eval_recall": 0.5433987682832948, "eval_runtime": 23.5452, "eval_samples_per_second": 129.496, "eval_steps_per_second": 16.224, "step": 50630 }, { "epoch": 10.07, "learning_rate": 2.2707161380410934e-06, "loss": 0.2944, "step": 51000 }, { "epoch": 10.17, "learning_rate": 2.268222621184919e-06, "loss": 0.3022, "step": 51500 }, { "epoch": 10.27, "learning_rate": 2.2657291043287456e-06, "loss": 0.3006, "step": 52000 }, { "epoch": 10.37, "learning_rate": 2.2632355874725713e-06, "loss": 0.3047, "step": 52500 }, { "epoch": 10.47, "learning_rate": 2.2607420706163975e-06, "loss": 0.3105, "step": 53000 }, { "epoch": 10.57, "learning_rate": 2.2582485537602236e-06, "loss": 0.3173, "step": 53500 }, { "epoch": 10.67, "learning_rate": 2.2557550369040497e-06, "loss": 0.3052, "step": 54000 }, { "epoch": 10.76, "learning_rate": 2.253261520047876e-06, "loss": 0.2972, "step": 54500 }, { "epoch": 10.86, "learning_rate": 2.2507779772591266e-06, "loss": 0.299, "step": 55000 }, { "epoch": 10.96, "learning_rate": 2.2482844604029523e-06, "loss": 0.3055, "step": 55500 }, { "epoch": 11.0, "eval_accuracy": 0.8372344904129512, "eval_f1": 0.4717340848806366, "eval_loss": 0.6190317869186401, "eval_precision": 0.41431275480489227, "eval_recall": 0.5476327944572749, "eval_runtime": 24.5177, "eval_samples_per_second": 124.359, "eval_steps_per_second": 15.581, "step": 55693 }, { "epoch": 11.06, "learning_rate": 2.245790943546779e-06, "loss": 0.3127, "step": 56000 }, { "epoch": 11.16, "learning_rate": 2.2432974266906046e-06, "loss": 0.3065, "step": 56500 }, { "epoch": 11.26, "learning_rate": 2.2408039098344307e-06, "loss": 0.3004, "step": 57000 }, { "epoch": 11.36, "learning_rate": 2.238310392978257e-06, "loss": 0.2948, "step": 57500 }, { "epoch": 11.46, "learning_rate": 2.235816876122083e-06, "loss": 0.2973, "step": 58000 }, { "epoch": 11.55, "learning_rate": 2.2333233592659086e-06, "loss": 0.3115, "step": 58500 }, { "epoch": 11.65, "learning_rate": 2.2308298424097348e-06, "loss": 0.3011, "step": 59000 }, { "epoch": 11.75, "learning_rate": 2.228341312587273e-06, "loss": 0.3096, "step": 59500 }, { "epoch": 11.85, "learning_rate": 2.2258527827648116e-06, "loss": 0.3055, "step": 60000 }, { "epoch": 11.95, "learning_rate": 2.2233592659086378e-06, "loss": 0.3054, "step": 60500 }, { "epoch": 12.0, "eval_accuracy": 0.8388572084656895, "eval_f1": 0.4739519825085145, "eval_loss": 0.627370297908783, "eval_precision": 0.4208796953177507, "eval_recall": 0.5423402617397999, "eval_runtime": 24.2182, "eval_samples_per_second": 125.897, "eval_steps_per_second": 15.773, "step": 60756 }, { "epoch": 12.05, "learning_rate": 2.220865749052464e-06, "loss": 0.2957, "step": 61000 }, { "epoch": 12.15, "learning_rate": 2.2183722321962896e-06, "loss": 0.3049, "step": 61500 }, { "epoch": 12.25, "learning_rate": 2.215878715340116e-06, "loss": 0.2989, "step": 62000 }, { "epoch": 12.34, "learning_rate": 2.213385198483942e-06, "loss": 0.3047, "step": 62500 }, { "epoch": 12.44, "learning_rate": 2.210891681627768e-06, "loss": 0.3031, "step": 63000 }, { "epoch": 12.54, "learning_rate": 2.208398164771594e-06, "loss": 0.2882, "step": 63500 }, { "epoch": 12.64, "learning_rate": 2.2059046479154202e-06, "loss": 0.3107, "step": 64000 }, { "epoch": 12.74, "learning_rate": 2.2034161180929582e-06, "loss": 0.3006, "step": 64500 }, { "epoch": 12.84, "learning_rate": 2.2009226012367848e-06, "loss": 0.297, "step": 65000 }, { "epoch": 12.94, "learning_rate": 2.1984290843806105e-06, "loss": 0.3023, "step": 65500 }, { "epoch": 13.0, "eval_accuracy": 0.8370890581346396, "eval_f1": 0.47168787815564367, "eval_loss": 0.6190006732940674, "eval_precision": 0.4164149414278347, "eval_recall": 0.5438799076212472, "eval_runtime": 24.4283, "eval_samples_per_second": 124.814, "eval_steps_per_second": 15.638, "step": 65819 }, { "epoch": 13.04, "learning_rate": 2.195940554558149e-06, "loss": 0.3061, "step": 66000 }, { "epoch": 13.13, "learning_rate": 2.193447037701975e-06, "loss": 0.3053, "step": 66500 }, { "epoch": 13.23, "learning_rate": 2.190953520845801e-06, "loss": 0.3003, "step": 67000 }, { "epoch": 13.33, "learning_rate": 2.1884600039896273e-06, "loss": 0.2951, "step": 67500 }, { "epoch": 13.43, "learning_rate": 2.1859664871334534e-06, "loss": 0.2988, "step": 68000 }, { "epoch": 13.53, "learning_rate": 2.183472970277279e-06, "loss": 0.2967, "step": 68500 }, { "epoch": 13.63, "learning_rate": 2.1809794534211052e-06, "loss": 0.2943, "step": 69000 }, { "epoch": 13.73, "learning_rate": 2.1784909235986437e-06, "loss": 0.293, "step": 69500 }, { "epoch": 13.83, "learning_rate": 2.17599740674247e-06, "loss": 0.2929, "step": 70000 }, { "epoch": 13.92, "learning_rate": 2.173503889886296e-06, "loss": 0.3002, "step": 70500 }, { "epoch": 14.0, "eval_accuracy": 0.8366718971257989, "eval_f1": 0.46755174404221983, "eval_loss": 0.6290284991264343, "eval_precision": 0.40903188573077476, "eval_recall": 0.5456120092378753, "eval_runtime": 24.3972, "eval_samples_per_second": 124.973, "eval_steps_per_second": 15.658, "step": 70882 }, { "epoch": 14.02, "learning_rate": 2.1710103730301216e-06, "loss": 0.3033, "step": 71000 }, { "epoch": 14.12, "learning_rate": 2.1685168561739478e-06, "loss": 0.2906, "step": 71500 }, { "epoch": 14.22, "learning_rate": 2.166023339317774e-06, "loss": 0.294, "step": 72000 }, { "epoch": 14.32, "learning_rate": 2.1635298224616e-06, "loss": 0.2994, "step": 72500 }, { "epoch": 14.42, "learning_rate": 2.161036305605426e-06, "loss": 0.2927, "step": 73000 }, { "epoch": 14.52, "learning_rate": 2.1585427887492523e-06, "loss": 0.3221, "step": 73500 }, { "epoch": 14.62, "learning_rate": 2.156049271893078e-06, "loss": 0.294, "step": 74000 }, { "epoch": 14.71, "learning_rate": 2.1535607420706164e-06, "loss": 0.292, "step": 74500 }, { "epoch": 14.81, "learning_rate": 2.1510672252144425e-06, "loss": 0.2976, "step": 75000 }, { "epoch": 14.91, "learning_rate": 2.1485737083582687e-06, "loss": 0.2959, "step": 75500 }, { "epoch": 15.0, "eval_accuracy": 0.8372536262390448, "eval_f1": 0.4673331397519393, "eval_loss": 0.6437515020370483, "eval_precision": 0.4107181917608458, "eval_recall": 0.5420515781370285, "eval_runtime": 24.1488, "eval_samples_per_second": 126.259, "eval_steps_per_second": 15.819, "step": 75945 }, { "epoch": 15.01, "learning_rate": 2.1460801915020948e-06, "loss": 0.2922, "step": 76000 }, { "epoch": 15.11, "learning_rate": 2.1435916616796332e-06, "loss": 0.2853, "step": 76500 }, { "epoch": 15.21, "learning_rate": 2.141098144823459e-06, "loss": 0.2995, "step": 77000 }, { "epoch": 15.31, "learning_rate": 2.1386046279672855e-06, "loss": 0.2983, "step": 77500 }, { "epoch": 15.41, "learning_rate": 2.136111111111111e-06, "loss": 0.2869, "step": 78000 }, { "epoch": 15.5, "learning_rate": 2.1336225812886496e-06, "loss": 0.2956, "step": 78500 }, { "epoch": 15.6, "learning_rate": 2.1311290644324757e-06, "loss": 0.2963, "step": 79000 }, { "epoch": 15.7, "learning_rate": 2.128635547576302e-06, "loss": 0.2911, "step": 79500 }, { "epoch": 15.8, "learning_rate": 2.1261420307201276e-06, "loss": 0.2931, "step": 80000 }, { "epoch": 15.9, "learning_rate": 2.1236535008976664e-06, "loss": 0.2867, "step": 80500 }, { "epoch": 16.0, "learning_rate": 2.121159984041492e-06, "loss": 0.3072, "step": 81000 }, { "epoch": 16.0, "eval_accuracy": 0.8390447395614069, "eval_f1": 0.4751165113182424, "eval_loss": 0.6363312602043152, "eval_precision": 0.41854838709677417, "eval_recall": 0.549364896073903, "eval_runtime": 24.5371, "eval_samples_per_second": 124.261, "eval_steps_per_second": 15.568, "step": 81008 }, { "epoch": 16.1, "learning_rate": 2.1186714542190306e-06, "loss": 0.3032, "step": 81500 }, { "epoch": 16.2, "learning_rate": 2.1161779373628567e-06, "loss": 0.2825, "step": 82000 }, { "epoch": 16.29, "learning_rate": 2.113684420506683e-06, "loss": 0.3021, "step": 82500 }, { "epoch": 16.39, "learning_rate": 2.1111958906842213e-06, "loss": 0.2898, "step": 83000 }, { "epoch": 16.49, "learning_rate": 2.1087023738280474e-06, "loss": 0.2983, "step": 83500 }, { "epoch": 16.59, "learning_rate": 2.1062088569718735e-06, "loss": 0.2914, "step": 84000 }, { "epoch": 16.69, "learning_rate": 2.1037153401156992e-06, "loss": 0.2878, "step": 84500 }, { "epoch": 16.79, "learning_rate": 2.1012218232595253e-06, "loss": 0.2899, "step": 85000 }, { "epoch": 16.89, "learning_rate": 2.0987283064033515e-06, "loss": 0.2821, "step": 85500 }, { "epoch": 16.99, "learning_rate": 2.0962347895471776e-06, "loss": 0.2922, "step": 86000 }, { "epoch": 17.0, "eval_accuracy": 0.8359906617168663, "eval_f1": 0.47037298848184955, "eval_loss": 0.6440777778625488, "eval_precision": 0.4141465914915428, "eval_recall": 0.544264819091609, "eval_runtime": 23.6318, "eval_samples_per_second": 129.021, "eval_steps_per_second": 16.165, "step": 86071 }, { "epoch": 17.08, "learning_rate": 2.0937412726910037e-06, "loss": 0.2905, "step": 86500 }, { "epoch": 17.18, "learning_rate": 2.0912477558348294e-06, "loss": 0.2984, "step": 87000 }, { "epoch": 17.28, "learning_rate": 2.0887542389786555e-06, "loss": 0.2914, "step": 87500 }, { "epoch": 17.38, "learning_rate": 2.0862607221224817e-06, "loss": 0.2974, "step": 88000 }, { "epoch": 17.48, "learning_rate": 2.083767205266308e-06, "loss": 0.2833, "step": 88500 }, { "epoch": 17.58, "learning_rate": 2.081273688410134e-06, "loss": 0.2928, "step": 89000 }, { "epoch": 17.68, "learning_rate": 2.07878017155396e-06, "loss": 0.2865, "step": 89500 }, { "epoch": 17.78, "learning_rate": 2.076291641731498e-06, "loss": 0.2916, "step": 90000 }, { "epoch": 17.87, "learning_rate": 2.0737981248753246e-06, "loss": 0.29, "step": 90500 }, { "epoch": 17.97, "learning_rate": 2.0713046080191503e-06, "loss": 0.2917, "step": 91000 }, { "epoch": 18.0, "eval_accuracy": 0.8377128860652915, "eval_f1": 0.47028940245679784, "eval_loss": 0.6375299096107483, "eval_precision": 0.4145195625045878, "eval_recall": 0.5433987682832948, "eval_runtime": 23.5063, "eval_samples_per_second": 129.71, "eval_steps_per_second": 16.251, "step": 91134 }, { "epoch": 18.07, "learning_rate": 2.0688110911629764e-06, "loss": 0.2896, "step": 91500 }, { "epoch": 18.17, "learning_rate": 2.066322561340515e-06, "loss": 0.2799, "step": 92000 }, { "epoch": 18.27, "learning_rate": 2.063829044484341e-06, "loss": 0.2841, "step": 92500 }, { "epoch": 18.37, "learning_rate": 2.0613355276281667e-06, "loss": 0.2844, "step": 93000 }, { "epoch": 18.47, "learning_rate": 2.0588420107719933e-06, "loss": 0.29, "step": 93500 }, { "epoch": 18.57, "learning_rate": 2.056348493915819e-06, "loss": 0.2793, "step": 94000 }, { "epoch": 18.66, "learning_rate": 2.0538599640933574e-06, "loss": 0.2849, "step": 94500 }, { "epoch": 18.76, "learning_rate": 2.0513664472371835e-06, "loss": 0.3024, "step": 95000 }, { "epoch": 18.86, "learning_rate": 2.0488729303810097e-06, "loss": 0.2812, "step": 95500 }, { "epoch": 18.96, "learning_rate": 2.0463794135248354e-06, "loss": 0.2902, "step": 96000 }, { "epoch": 19.0, "eval_accuracy": 0.8378315281870719, "eval_f1": 0.473609487222315, "eval_loss": 0.6511485576629639, "eval_precision": 0.41833874299203305, "eval_recall": 0.5457082371054658, "eval_runtime": 24.4979, "eval_samples_per_second": 124.459, "eval_steps_per_second": 15.593, "step": 96197 }, { "epoch": 19.06, "learning_rate": 2.043885896668662e-06, "loss": 0.3001, "step": 96500 }, { "epoch": 19.16, "learning_rate": 2.0413973668462e-06, "loss": 0.2863, "step": 97000 }, { "epoch": 19.26, "learning_rate": 2.038903849990026e-06, "loss": 0.2861, "step": 97500 }, { "epoch": 19.36, "learning_rate": 2.036410333133852e-06, "loss": 0.2863, "step": 98000 }, { "epoch": 19.45, "learning_rate": 2.0339168162776783e-06, "loss": 0.2827, "step": 98500 }, { "epoch": 19.55, "learning_rate": 2.031423299421504e-06, "loss": 0.2832, "step": 99000 }, { "epoch": 19.65, "learning_rate": 2.0289297825653305e-06, "loss": 0.2847, "step": 99500 }, { "epoch": 19.75, "learning_rate": 2.0264412527428686e-06, "loss": 0.2889, "step": 100000 }, { "epoch": 19.85, "learning_rate": 2.0239477358866947e-06, "loss": 0.2835, "step": 100500 }, { "epoch": 19.95, "learning_rate": 2.021454219030521e-06, "loss": 0.2878, "step": 101000 }, { "epoch": 20.0, "eval_accuracy": 0.8355620192123694, "eval_f1": 0.46679081315952825, "eval_loss": 0.6431704163551331, "eval_precision": 0.4094968416467001, "eval_recall": 0.5427251732101617, "eval_runtime": 23.6323, "eval_samples_per_second": 129.018, "eval_steps_per_second": 16.164, "step": 101260 }, { "epoch": 20.05, "learning_rate": 2.018960702174347e-06, "loss": 0.2854, "step": 101500 }, { "epoch": 20.15, "learning_rate": 2.016467185318173e-06, "loss": 0.2831, "step": 102000 }, { "epoch": 20.24, "learning_rate": 2.0139786554957115e-06, "loss": 0.2859, "step": 102500 }, { "epoch": 20.34, "learning_rate": 2.011485138639537e-06, "loss": 0.2892, "step": 103000 }, { "epoch": 20.44, "learning_rate": 2.0089916217833638e-06, "loss": 0.2886, "step": 103500 }, { "epoch": 20.54, "learning_rate": 2.0064981049271895e-06, "loss": 0.2792, "step": 104000 }, { "epoch": 20.64, "learning_rate": 2.0040045880710156e-06, "loss": 0.2864, "step": 104500 }, { "epoch": 20.74, "learning_rate": 2.0015110712148417e-06, "loss": 0.274, "step": 105000 }, { "epoch": 20.84, "learning_rate": 1.999017554358668e-06, "loss": 0.2821, "step": 105500 }, { "epoch": 20.94, "learning_rate": 1.9965240375024935e-06, "loss": 0.2755, "step": 106000 }, { "epoch": 21.0, "eval_accuracy": 0.8393623942745608, "eval_f1": 0.47563336419493313, "eval_loss": 0.6629786491394043, "eval_precision": 0.4226626776364996, "eval_recall": 0.5437836797536567, "eval_runtime": 24.3778, "eval_samples_per_second": 125.073, "eval_steps_per_second": 15.67, "step": 106323 }, { "epoch": 21.03, "learning_rate": 1.9940305206463197e-06, "loss": 0.2888, "step": 106500 }, { "epoch": 21.13, "learning_rate": 1.9915370037901458e-06, "loss": 0.2889, "step": 107000 }, { "epoch": 21.23, "learning_rate": 1.9890484739676842e-06, "loss": 0.2746, "step": 107500 }, { "epoch": 21.33, "learning_rate": 1.9865599441452227e-06, "loss": 0.2697, "step": 108000 }, { "epoch": 21.43, "learning_rate": 1.984066427289049e-06, "loss": 0.2843, "step": 108500 }, { "epoch": 21.53, "learning_rate": 1.9815729104328745e-06, "loss": 0.2823, "step": 109000 }, { "epoch": 21.63, "learning_rate": 1.9790793935767006e-06, "loss": 0.2864, "step": 109500 }, { "epoch": 21.73, "learning_rate": 1.9765858767205267e-06, "loss": 0.2849, "step": 110000 }, { "epoch": 21.83, "learning_rate": 1.974092359864353e-06, "loss": 0.2831, "step": 110500 }, { "epoch": 21.92, "learning_rate": 1.971598843008179e-06, "loss": 0.2842, "step": 111000 }, { "epoch": 22.0, "eval_accuracy": 0.8373914041869187, "eval_f1": 0.47138215446138465, "eval_loss": 0.64870285987854, "eval_precision": 0.41560158660202734, "eval_recall": 0.5444572748267898, "eval_runtime": 23.5627, "eval_samples_per_second": 129.399, "eval_steps_per_second": 16.212, "step": 111386 }, { "epoch": 22.02, "learning_rate": 1.9691053261520047e-06, "loss": 0.2736, "step": 111500 }, { "epoch": 22.12, "learning_rate": 1.966616796329543e-06, "loss": 0.2822, "step": 112000 }, { "epoch": 22.22, "learning_rate": 1.9641232794733693e-06, "loss": 0.2809, "step": 112500 }, { "epoch": 22.32, "learning_rate": 1.9616297626171954e-06, "loss": 0.2829, "step": 113000 }, { "epoch": 22.42, "learning_rate": 1.959141232794734e-06, "loss": 0.2886, "step": 113500 }, { "epoch": 22.52, "learning_rate": 1.95664771593856e-06, "loss": 0.2802, "step": 114000 }, { "epoch": 22.62, "learning_rate": 1.954154199082386e-06, "loss": 0.2795, "step": 114500 }, { "epoch": 22.71, "learning_rate": 1.951665669259924e-06, "loss": 0.2797, "step": 115000 }, { "epoch": 22.81, "learning_rate": 1.9491721524037506e-06, "loss": 0.2828, "step": 115500 }, { "epoch": 22.91, "learning_rate": 1.9466786355475763e-06, "loss": 0.2845, "step": 116000 }, { "epoch": 23.0, "eval_accuracy": 0.8386811588656282, "eval_f1": 0.47448125836680055, "eval_loss": 0.6472445726394653, "eval_precision": 0.4197010065127294, "eval_recall": 0.5457082371054658, "eval_runtime": 23.7562, "eval_samples_per_second": 128.345, "eval_steps_per_second": 16.08, "step": 116449 }, { "epoch": 23.01, "learning_rate": 1.9441851186914025e-06, "loss": 0.282, "step": 116500 }, { "epoch": 23.11, "learning_rate": 1.9416916018352286e-06, "loss": 0.2757, "step": 117000 }, { "epoch": 23.21, "learning_rate": 1.9391980849790547e-06, "loss": 0.2837, "step": 117500 }, { "epoch": 23.31, "learning_rate": 1.936704568122881e-06, "loss": 0.2737, "step": 118000 }, { "epoch": 23.41, "learning_rate": 1.9342110512667065e-06, "loss": 0.2774, "step": 118500 }, { "epoch": 23.5, "learning_rate": 1.9317175344105327e-06, "loss": 0.2779, "step": 119000 }, { "epoch": 23.6, "learning_rate": 1.929229004588071e-06, "loss": 0.2776, "step": 119500 }, { "epoch": 23.7, "learning_rate": 1.9267354877318972e-06, "loss": 0.289, "step": 120000 }, { "epoch": 23.8, "learning_rate": 1.9242419708757234e-06, "loss": 0.2731, "step": 120500 }, { "epoch": 23.9, "learning_rate": 1.9217484540195495e-06, "loss": 0.2738, "step": 121000 }, { "epoch": 24.0, "learning_rate": 1.919259924197088e-06, "loss": 0.2877, "step": 121500 }, { "epoch": 24.0, "eval_accuracy": 0.8382027632132879, "eval_f1": 0.47804428816610156, "eval_loss": 0.6589922308921814, "eval_precision": 0.4230569756242128, "eval_recall": 0.5494611239414935, "eval_runtime": 23.7626, "eval_samples_per_second": 128.311, "eval_steps_per_second": 16.076, "step": 121512 }, { "epoch": 24.1, "learning_rate": 1.9167664073409136e-06, "loss": 0.2805, "step": 122000 }, { "epoch": 24.2, "learning_rate": 1.9142728904847398e-06, "loss": 0.2735, "step": 122500 }, { "epoch": 24.29, "learning_rate": 1.911779373628566e-06, "loss": 0.2688, "step": 123000 }, { "epoch": 24.39, "learning_rate": 1.909285856772392e-06, "loss": 0.2881, "step": 123500 }, { "epoch": 24.49, "learning_rate": 1.9067923399162181e-06, "loss": 0.2755, "step": 124000 }, { "epoch": 24.59, "learning_rate": 1.904298823060044e-06, "loss": 0.2799, "step": 124500 }, { "epoch": 24.69, "learning_rate": 1.9018053062038702e-06, "loss": 0.2716, "step": 125000 }, { "epoch": 24.79, "learning_rate": 1.899311789347696e-06, "loss": 0.2837, "step": 125500 }, { "epoch": 24.89, "learning_rate": 1.8968182724915222e-06, "loss": 0.2855, "step": 126000 }, { "epoch": 24.99, "learning_rate": 1.8943247556353483e-06, "loss": 0.2745, "step": 126500 }, { "epoch": 25.0, "eval_accuracy": 0.8376172069348233, "eval_f1": 0.47353063776573573, "eval_loss": 0.6596588492393494, "eval_precision": 0.41770848654213855, "eval_recall": 0.5465742879137798, "eval_runtime": 23.6288, "eval_samples_per_second": 129.037, "eval_steps_per_second": 16.167, "step": 126575 }, { "epoch": 25.08, "learning_rate": 1.8918312387791742e-06, "loss": 0.2715, "step": 127000 }, { "epoch": 25.18, "learning_rate": 1.8893377219230001e-06, "loss": 0.2714, "step": 127500 }, { "epoch": 25.28, "learning_rate": 1.8868442050668265e-06, "loss": 0.2827, "step": 128000 }, { "epoch": 25.38, "learning_rate": 1.8843506882106524e-06, "loss": 0.2745, "step": 128500 }, { "epoch": 25.48, "learning_rate": 1.8818621583881908e-06, "loss": 0.2717, "step": 129000 }, { "epoch": 25.58, "learning_rate": 1.879368641532017e-06, "loss": 0.2747, "step": 129500 }, { "epoch": 25.68, "learning_rate": 1.8768751246758429e-06, "loss": 0.269, "step": 130000 }, { "epoch": 25.78, "learning_rate": 1.8743816078196688e-06, "loss": 0.2766, "step": 130500 }, { "epoch": 25.87, "learning_rate": 1.8718880909634951e-06, "loss": 0.2684, "step": 131000 }, { "epoch": 25.97, "learning_rate": 1.8693995611410334e-06, "loss": 0.2714, "step": 131500 }, { "epoch": 26.0, "eval_accuracy": 0.8359294270733667, "eval_f1": 0.4756838269174381, "eval_loss": 0.6624502539634705, "eval_precision": 0.42118537200504413, "eval_recall": 0.5463818321785989, "eval_runtime": 24.4351, "eval_samples_per_second": 124.78, "eval_steps_per_second": 15.633, "step": 131638 }, { "epoch": 26.07, "learning_rate": 1.8669060442848595e-06, "loss": 0.2782, "step": 132000 }, { "epoch": 26.17, "learning_rate": 1.8644125274286856e-06, "loss": 0.2704, "step": 132500 }, { "epoch": 26.27, "learning_rate": 1.8619190105725115e-06, "loss": 0.2747, "step": 133000 }, { "epoch": 26.37, "learning_rate": 1.8594254937163379e-06, "loss": 0.2756, "step": 133500 }, { "epoch": 26.47, "learning_rate": 1.8569319768601638e-06, "loss": 0.2747, "step": 134000 }, { "epoch": 26.57, "learning_rate": 1.8544484340714145e-06, "loss": 0.2793, "step": 134500 }, { "epoch": 26.66, "learning_rate": 1.8519549172152404e-06, "loss": 0.2729, "step": 135000 }, { "epoch": 26.76, "learning_rate": 1.8494614003590666e-06, "loss": 0.2852, "step": 135500 }, { "epoch": 26.86, "learning_rate": 1.8469678835028925e-06, "loss": 0.2648, "step": 136000 }, { "epoch": 26.96, "learning_rate": 1.8444743666467188e-06, "loss": 0.2696, "step": 136500 }, { "epoch": 27.0, "eval_accuracy": 0.8369244900302346, "eval_f1": 0.47798795129965876, "eval_loss": 0.6801736354827881, "eval_precision": 0.42510303484451106, "eval_recall": 0.5459006928406467, "eval_runtime": 23.7599, "eval_samples_per_second": 128.325, "eval_steps_per_second": 16.077, "step": 136701 }, { "epoch": 27.06, "learning_rate": 1.8419808497905447e-06, "loss": 0.2712, "step": 137000 }, { "epoch": 27.16, "learning_rate": 1.8394923199680832e-06, "loss": 0.2618, "step": 137500 }, { "epoch": 27.26, "learning_rate": 1.8369988031119093e-06, "loss": 0.2696, "step": 138000 }, { "epoch": 27.36, "learning_rate": 1.8345052862557352e-06, "loss": 0.2811, "step": 138500 }, { "epoch": 27.45, "learning_rate": 1.8320117693995611e-06, "loss": 0.2694, "step": 139000 }, { "epoch": 27.55, "learning_rate": 1.8295182525433875e-06, "loss": 0.2752, "step": 139500 }, { "epoch": 27.65, "learning_rate": 1.8270247356872134e-06, "loss": 0.2695, "step": 140000 }, { "epoch": 27.75, "learning_rate": 1.8245312188310393e-06, "loss": 0.2798, "step": 140500 }, { "epoch": 27.85, "learning_rate": 1.8220377019748656e-06, "loss": 0.2761, "step": 141000 }, { "epoch": 27.95, "learning_rate": 1.8195491721524039e-06, "loss": 0.2731, "step": 141500 }, { "epoch": 28.0, "eval_accuracy": 0.8383022695089747, "eval_f1": 0.4802879383945761, "eval_loss": 0.6660904884338379, "eval_precision": 0.4249740779143831, "eval_recall": 0.5521555042340262, "eval_runtime": 24.3606, "eval_samples_per_second": 125.161, "eval_steps_per_second": 15.681, "step": 141764 }, { "epoch": 28.05, "learning_rate": 1.8170556552962298e-06, "loss": 0.2698, "step": 142000 }, { "epoch": 28.15, "learning_rate": 1.8145621384400561e-06, "loss": 0.264, "step": 142500 }, { "epoch": 28.24, "learning_rate": 1.812068621583882e-06, "loss": 0.2612, "step": 143000 }, { "epoch": 28.34, "learning_rate": 1.809575104727708e-06, "loss": 0.2699, "step": 143500 }, { "epoch": 28.44, "learning_rate": 1.8070815878715343e-06, "loss": 0.2689, "step": 144000 }, { "epoch": 28.54, "learning_rate": 1.8045930580490725e-06, "loss": 0.2776, "step": 144500 }, { "epoch": 28.64, "learning_rate": 1.802104528226611e-06, "loss": 0.2742, "step": 145000 }, { "epoch": 28.74, "learning_rate": 1.799611011370437e-06, "loss": 0.2666, "step": 145500 }, { "epoch": 28.84, "learning_rate": 1.797117494514263e-06, "loss": 0.2659, "step": 146000 }, { "epoch": 28.94, "learning_rate": 1.7946239776580889e-06, "loss": 0.2701, "step": 146500 }, { "epoch": 29.0, "eval_accuracy": 0.8383520226568181, "eval_f1": 0.47666694694358025, "eval_loss": 0.6661925315856934, "eval_precision": 0.42324921606689564, "eval_recall": 0.5455157813702848, "eval_runtime": 23.5786, "eval_samples_per_second": 129.312, "eval_steps_per_second": 16.201, "step": 146827 }, { "epoch": 29.03, "learning_rate": 1.7921304608019152e-06, "loss": 0.2604, "step": 147000 }, { "epoch": 29.13, "learning_rate": 1.7896369439457411e-06, "loss": 0.2697, "step": 147500 }, { "epoch": 29.23, "learning_rate": 1.7871434270895675e-06, "loss": 0.2711, "step": 148000 }, { "epoch": 29.33, "learning_rate": 1.7846499102333934e-06, "loss": 0.2653, "step": 148500 }, { "epoch": 29.43, "learning_rate": 1.7821563933772193e-06, "loss": 0.2611, "step": 149000 }, { "epoch": 29.53, "learning_rate": 1.779667863554758e-06, "loss": 0.2751, "step": 149500 }, { "epoch": 29.63, "learning_rate": 1.7771743466985839e-06, "loss": 0.2696, "step": 150000 }, { "epoch": 29.73, "learning_rate": 1.7746808298424098e-06, "loss": 0.265, "step": 150500 }, { "epoch": 29.82, "learning_rate": 1.7721873129862361e-06, "loss": 0.2761, "step": 151000 }, { "epoch": 29.92, "learning_rate": 1.769693796130062e-06, "loss": 0.2653, "step": 151500 }, { "epoch": 30.0, "eval_accuracy": 0.8400933828313368, "eval_f1": 0.48114688043432163, "eval_loss": 0.67595374584198, "eval_precision": 0.43018581721653393, "eval_recall": 0.5458044649730562, "eval_runtime": 24.4081, "eval_samples_per_second": 124.917, "eval_steps_per_second": 15.651, "step": 151890 }, { "epoch": 30.02, "learning_rate": 1.7672052663076003e-06, "loss": 0.2619, "step": 152000 }, { "epoch": 30.12, "learning_rate": 1.7647117494514266e-06, "loss": 0.2608, "step": 152500 }, { "epoch": 30.22, "learning_rate": 1.7622182325952525e-06, "loss": 0.2693, "step": 153000 }, { "epoch": 30.32, "learning_rate": 1.7597247157390784e-06, "loss": 0.2671, "step": 153500 }, { "epoch": 30.42, "learning_rate": 1.7572311988829048e-06, "loss": 0.2735, "step": 154000 }, { "epoch": 30.52, "learning_rate": 1.754742669060443e-06, "loss": 0.2573, "step": 154500 }, { "epoch": 30.61, "learning_rate": 1.752249152204269e-06, "loss": 0.2622, "step": 155000 }, { "epoch": 30.71, "learning_rate": 1.7497556353480952e-06, "loss": 0.2791, "step": 155500 }, { "epoch": 30.81, "learning_rate": 1.7472621184919212e-06, "loss": 0.2673, "step": 156000 }, { "epoch": 30.91, "learning_rate": 1.7447735886694594e-06, "loss": 0.2547, "step": 156500 }, { "epoch": 31.0, "eval_accuracy": 0.8380305407784454, "eval_f1": 0.4841168996188056, "eval_loss": 0.6825248003005981, "eval_precision": 0.43236495687698595, "eval_recall": 0.5499422632794457, "eval_runtime": 23.8749, "eval_samples_per_second": 127.707, "eval_steps_per_second": 16.0, "step": 156953 }, { "epoch": 31.01, "learning_rate": 1.742285058846998e-06, "loss": 0.2735, "step": 157000 }, { "epoch": 31.11, "learning_rate": 1.739791541990824e-06, "loss": 0.2532, "step": 157500 }, { "epoch": 31.21, "learning_rate": 1.7372980251346499e-06, "loss": 0.2748, "step": 158000 }, { "epoch": 31.31, "learning_rate": 1.7348045082784762e-06, "loss": 0.2593, "step": 158500 }, { "epoch": 31.4, "learning_rate": 1.7323109914223021e-06, "loss": 0.2634, "step": 159000 }, { "epoch": 31.5, "learning_rate": 1.729817474566128e-06, "loss": 0.2645, "step": 159500 }, { "epoch": 31.6, "learning_rate": 1.7273239577099544e-06, "loss": 0.2637, "step": 160000 }, { "epoch": 31.7, "learning_rate": 1.7248304408537803e-06, "loss": 0.2652, "step": 160500 }, { "epoch": 31.8, "learning_rate": 1.7223369239976064e-06, "loss": 0.2757, "step": 161000 }, { "epoch": 31.9, "learning_rate": 1.7198434071414325e-06, "loss": 0.2655, "step": 161500 }, { "epoch": 32.0, "learning_rate": 1.7173498902852584e-06, "loss": 0.2682, "step": 162000 }, { "epoch": 32.0, "eval_accuracy": 0.8379693061349458, "eval_f1": 0.48521260841971653, "eval_loss": 0.6861287355422974, "eval_precision": 0.4329834629615646, "eval_recall": 0.5517705927636644, "eval_runtime": 23.7494, "eval_samples_per_second": 128.382, "eval_steps_per_second": 16.085, "step": 162016 }, { "epoch": 32.1, "learning_rate": 1.714861360462797e-06, "loss": 0.2703, "step": 162500 }, { "epoch": 32.19, "learning_rate": 1.712367843606623e-06, "loss": 0.2646, "step": 163000 }, { "epoch": 32.29, "learning_rate": 1.709874326750449e-06, "loss": 0.2559, "step": 163500 }, { "epoch": 32.39, "learning_rate": 1.707380809894275e-06, "loss": 0.2658, "step": 164000 }, { "epoch": 32.49, "learning_rate": 1.7048872930381012e-06, "loss": 0.2594, "step": 164500 }, { "epoch": 32.59, "learning_rate": 1.702393776181927e-06, "loss": 0.2635, "step": 165000 }, { "epoch": 32.69, "learning_rate": 1.6999052463594655e-06, "loss": 0.2641, "step": 165500 }, { "epoch": 32.79, "learning_rate": 1.6974117295032917e-06, "loss": 0.2594, "step": 166000 }, { "epoch": 32.89, "learning_rate": 1.6949182126471176e-06, "loss": 0.264, "step": 166500 }, { "epoch": 32.98, "learning_rate": 1.6924246957909437e-06, "loss": 0.2579, "step": 167000 }, { "epoch": 33.0, "eval_accuracy": 0.8380075777871331, "eval_f1": 0.4819972826086957, "eval_loss": 0.6779205203056335, "eval_precision": 0.43130699088145896, "eval_recall": 0.546189376443418, "eval_runtime": 23.6072, "eval_samples_per_second": 129.156, "eval_steps_per_second": 16.182, "step": 167079 }, { "epoch": 33.08, "learning_rate": 1.6899361659684821e-06, "loss": 0.2686, "step": 167500 }, { "epoch": 33.18, "learning_rate": 1.687442649112308e-06, "loss": 0.2653, "step": 168000 }, { "epoch": 33.28, "learning_rate": 1.6849491322561342e-06, "loss": 0.254, "step": 168500 }, { "epoch": 33.38, "learning_rate": 1.6824556153999603e-06, "loss": 0.2575, "step": 169000 }, { "epoch": 33.48, "learning_rate": 1.6799620985437862e-06, "loss": 0.2601, "step": 169500 }, { "epoch": 33.58, "learning_rate": 1.6774735687213249e-06, "loss": 0.2525, "step": 170000 }, { "epoch": 33.68, "learning_rate": 1.6749800518651508e-06, "loss": 0.2604, "step": 170500 }, { "epoch": 33.77, "learning_rate": 1.6724865350089767e-06, "loss": 0.2631, "step": 171000 }, { "epoch": 33.87, "learning_rate": 1.6699980051865153e-06, "loss": 0.2613, "step": 171500 }, { "epoch": 33.97, "learning_rate": 1.6675044883303413e-06, "loss": 0.2682, "step": 172000 }, { "epoch": 34.0, "eval_accuracy": 0.8394504190745915, "eval_f1": 0.488143978164449, "eval_loss": 0.686414361000061, "eval_precision": 0.43834252450980393, "eval_recall": 0.5507120862201693, "eval_runtime": 23.3121, "eval_samples_per_second": 130.791, "eval_steps_per_second": 16.386, "step": 172142 }, { "epoch": 34.07, "learning_rate": 1.6650109714741672e-06, "loss": 0.2538, "step": 172500 }, { "epoch": 34.17, "learning_rate": 1.6625174546179935e-06, "loss": 0.2737, "step": 173000 }, { "epoch": 34.27, "learning_rate": 1.6600239377618194e-06, "loss": 0.262, "step": 173500 }, { "epoch": 34.37, "learning_rate": 1.6575304209056455e-06, "loss": 0.2621, "step": 174000 }, { "epoch": 34.47, "learning_rate": 1.6550369040494715e-06, "loss": 0.2641, "step": 174500 }, { "epoch": 34.56, "learning_rate": 1.6525433871932976e-06, "loss": 0.269, "step": 175000 }, { "epoch": 34.66, "learning_rate": 1.6500498703371237e-06, "loss": 0.2574, "step": 175500 }, { "epoch": 34.76, "learning_rate": 1.6475563534809496e-06, "loss": 0.2641, "step": 176000 }, { "epoch": 34.86, "learning_rate": 1.6450628366247755e-06, "loss": 0.2542, "step": 176500 }, { "epoch": 34.96, "learning_rate": 1.6425693197686019e-06, "loss": 0.2576, "step": 177000 }, { "epoch": 35.0, "eval_accuracy": 0.8376593057522294, "eval_f1": 0.48530031168393556, "eval_loss": 0.6970050930976868, "eval_precision": 0.43153558052434454, "eval_recall": 0.5543687451886066, "eval_runtime": 24.2014, "eval_samples_per_second": 125.984, "eval_steps_per_second": 15.784, "step": 177205 }, { "epoch": 35.06, "learning_rate": 1.6400758029124278e-06, "loss": 0.2672, "step": 177500 }, { "epoch": 35.16, "learning_rate": 1.6375972471573909e-06, "loss": 0.2669, "step": 178000 }, { "epoch": 35.26, "learning_rate": 1.6351037303012172e-06, "loss": 0.2566, "step": 178500 }, { "epoch": 35.35, "learning_rate": 1.6326102134450431e-06, "loss": 0.261, "step": 179000 }, { "epoch": 35.45, "learning_rate": 1.630116696588869e-06, "loss": 0.2639, "step": 179500 }, { "epoch": 35.55, "learning_rate": 1.6276231797326951e-06, "loss": 0.271, "step": 180000 }, { "epoch": 35.65, "learning_rate": 1.6251296628765213e-06, "loss": 0.2673, "step": 180500 }, { "epoch": 35.75, "learning_rate": 1.6226361460203472e-06, "loss": 0.2645, "step": 181000 }, { "epoch": 35.85, "learning_rate": 1.6201426291641733e-06, "loss": 0.2517, "step": 181500 }, { "epoch": 35.95, "learning_rate": 1.6176491123079992e-06, "loss": 0.2671, "step": 182000 }, { "epoch": 36.0, "eval_accuracy": 0.8383175781698496, "eval_f1": 0.4893308296346876, "eval_loss": 0.6999177932739258, "eval_precision": 0.43964723926380367, "eval_recall": 0.5516743648960739, "eval_runtime": 23.6795, "eval_samples_per_second": 128.761, "eval_steps_per_second": 16.132, "step": 182268 }, { "epoch": 36.05, "learning_rate": 1.6151605824855377e-06, "loss": 0.2671, "step": 182500 }, { "epoch": 36.14, "learning_rate": 1.6126670656293638e-06, "loss": 0.2579, "step": 183000 }, { "epoch": 36.24, "learning_rate": 1.61017354877319e-06, "loss": 0.2638, "step": 183500 }, { "epoch": 36.34, "learning_rate": 1.6076800319170158e-06, "loss": 0.2613, "step": 184000 }, { "epoch": 36.44, "learning_rate": 1.605186515060842e-06, "loss": 0.2675, "step": 184500 }, { "epoch": 36.54, "learning_rate": 1.6026929982046679e-06, "loss": 0.2473, "step": 185000 }, { "epoch": 36.64, "learning_rate": 1.6002044683822063e-06, "loss": 0.2561, "step": 185500 }, { "epoch": 36.74, "learning_rate": 1.5977109515260324e-06, "loss": 0.2563, "step": 186000 }, { "epoch": 36.84, "learning_rate": 1.5952174346698586e-06, "loss": 0.2556, "step": 186500 }, { "epoch": 36.93, "learning_rate": 1.5927239178136847e-06, "loss": 0.2556, "step": 187000 }, { "epoch": 37.0, "eval_accuracy": 0.8364690573692066, "eval_f1": 0.48543032353563115, "eval_loss": 0.7063097357749939, "eval_precision": 0.4330340300309364, "eval_recall": 0.5522517321016166, "eval_runtime": 23.9877, "eval_samples_per_second": 127.107, "eval_steps_per_second": 15.925, "step": 187331 }, { "epoch": 37.03, "learning_rate": 1.5902304009575106e-06, "loss": 0.2535, "step": 187500 }, { "epoch": 37.13, "learning_rate": 1.5877368841013365e-06, "loss": 0.2611, "step": 188000 }, { "epoch": 37.23, "learning_rate": 1.5852483542788752e-06, "loss": 0.2493, "step": 188500 }, { "epoch": 37.33, "learning_rate": 1.582754837422701e-06, "loss": 0.249, "step": 189000 }, { "epoch": 37.43, "learning_rate": 1.580261320566527e-06, "loss": 0.2495, "step": 189500 }, { "epoch": 37.53, "learning_rate": 1.5777678037103533e-06, "loss": 0.2591, "step": 190000 }, { "epoch": 37.63, "learning_rate": 1.5752792738878916e-06, "loss": 0.244, "step": 190500 }, { "epoch": 37.72, "learning_rate": 1.5727857570317177e-06, "loss": 0.258, "step": 191000 }, { "epoch": 37.82, "learning_rate": 1.5702922401755438e-06, "loss": 0.2564, "step": 191500 }, { "epoch": 37.92, "learning_rate": 1.5677987233193697e-06, "loss": 0.2557, "step": 192000 }, { "epoch": 38.0, "eval_accuracy": 0.837223008917295, "eval_f1": 0.48226590572817585, "eval_loss": 0.7124492526054382, "eval_precision": 0.4300686119279198, "eval_recall": 0.5488837567359507, "eval_runtime": 24.7192, "eval_samples_per_second": 123.346, "eval_steps_per_second": 15.454, "step": 192394 }, { "epoch": 38.02, "learning_rate": 1.5653052064631956e-06, "loss": 0.2488, "step": 192500 }, { "epoch": 38.12, "learning_rate": 1.5628166766407343e-06, "loss": 0.2465, "step": 193000 }, { "epoch": 38.22, "learning_rate": 1.5603231597845602e-06, "loss": 0.2465, "step": 193500 }, { "epoch": 38.32, "learning_rate": 1.5578346299620986e-06, "loss": 0.2622, "step": 194000 }, { "epoch": 38.42, "learning_rate": 1.5553411131059248e-06, "loss": 0.2587, "step": 194500 }, { "epoch": 38.51, "learning_rate": 1.5528475962497509e-06, "loss": 0.2482, "step": 195000 }, { "epoch": 38.61, "learning_rate": 1.5503540793935768e-06, "loss": 0.2426, "step": 195500 }, { "epoch": 38.71, "learning_rate": 1.5478655495711153e-06, "loss": 0.2529, "step": 196000 }, { "epoch": 38.81, "learning_rate": 1.5453720327149414e-06, "loss": 0.2564, "step": 196500 }, { "epoch": 38.91, "learning_rate": 1.5428785158587673e-06, "loss": 0.2518, "step": 197000 }, { "epoch": 39.0, "eval_accuracy": 0.8392246163266868, "eval_f1": 0.487758945386064, "eval_loss": 0.7146723866462708, "eval_precision": 0.43925377736663584, "eval_recall": 0.548306389530408, "eval_runtime": 23.4197, "eval_samples_per_second": 130.189, "eval_steps_per_second": 16.311, "step": 197457 }, { "epoch": 39.01, "learning_rate": 1.5403849990025934e-06, "loss": 0.2478, "step": 197500 }, { "epoch": 39.11, "learning_rate": 1.5378914821464193e-06, "loss": 0.2541, "step": 198000 }, { "epoch": 39.21, "learning_rate": 1.5353979652902454e-06, "loss": 0.2546, "step": 198500 }, { "epoch": 39.3, "learning_rate": 1.5329044484340716e-06, "loss": 0.2498, "step": 199000 }, { "epoch": 39.4, "learning_rate": 1.5304109315778975e-06, "loss": 0.2499, "step": 199500 }, { "epoch": 39.5, "learning_rate": 1.5279174147217238e-06, "loss": 0.2615, "step": 200000 }, { "epoch": 39.6, "learning_rate": 1.525428884899262e-06, "loss": 0.2469, "step": 200500 }, { "epoch": 39.7, "learning_rate": 1.522935368043088e-06, "loss": 0.247, "step": 201000 }, { "epoch": 39.8, "learning_rate": 1.5204418511869143e-06, "loss": 0.25, "step": 201500 }, { "epoch": 39.9, "learning_rate": 1.5179483343307402e-06, "loss": 0.2564, "step": 202000 }, { "epoch": 40.0, "learning_rate": 1.5154548174745661e-06, "loss": 0.2515, "step": 202500 }, { "epoch": 40.0, "eval_accuracy": 0.8385012821003482, "eval_f1": 0.4914221993480871, "eval_loss": 0.7163015604019165, "eval_precision": 0.4432838130609718, "eval_recall": 0.5512894534257121, "eval_runtime": 24.4401, "eval_samples_per_second": 124.754, "eval_steps_per_second": 15.63, "step": 202520 }, { "epoch": 40.09, "learning_rate": 1.5129613006183925e-06, "loss": 0.2662, "step": 203000 }, { "epoch": 40.19, "learning_rate": 1.5104677837622184e-06, "loss": 0.2548, "step": 203500 }, { "epoch": 40.29, "learning_rate": 1.5079742669060443e-06, "loss": 0.251, "step": 204000 }, { "epoch": 40.39, "learning_rate": 1.5054807500498706e-06, "loss": 0.2498, "step": 204500 }, { "epoch": 40.49, "learning_rate": 1.5029872331936965e-06, "loss": 0.2461, "step": 205000 }, { "epoch": 40.59, "learning_rate": 1.5004937163375225e-06, "loss": 0.2457, "step": 205500 }, { "epoch": 40.69, "learning_rate": 1.4980001994813486e-06, "loss": 0.2554, "step": 206000 }, { "epoch": 40.79, "learning_rate": 1.495511669658887e-06, "loss": 0.2522, "step": 206500 }, { "epoch": 40.88, "learning_rate": 1.493018152802713e-06, "loss": 0.2436, "step": 207000 }, { "epoch": 40.98, "learning_rate": 1.4905246359465393e-06, "loss": 0.243, "step": 207500 }, { "epoch": 41.0, "eval_accuracy": 0.8361093038386467, "eval_f1": 0.4854713868798376, "eval_loss": 0.7213875651359558, "eval_precision": 0.43309938872537923, "eval_recall": 0.5522517321016166, "eval_runtime": 23.8024, "eval_samples_per_second": 128.096, "eval_steps_per_second": 16.049, "step": 207583 }, { "epoch": 41.08, "learning_rate": 1.4880311190903652e-06, "loss": 0.2445, "step": 208000 }, { "epoch": 41.18, "learning_rate": 1.4855425892679034e-06, "loss": 0.2505, "step": 208500 }, { "epoch": 41.28, "learning_rate": 1.4830490724117297e-06, "loss": 0.2508, "step": 209000 }, { "epoch": 41.38, "learning_rate": 1.4805555555555557e-06, "loss": 0.2499, "step": 209500 }, { "epoch": 41.48, "learning_rate": 1.4780620386993818e-06, "loss": 0.2459, "step": 210000 }, { "epoch": 41.58, "learning_rate": 1.4755735088769202e-06, "loss": 0.2502, "step": 210500 }, { "epoch": 41.67, "learning_rate": 1.4730799920207461e-06, "loss": 0.2509, "step": 211000 }, { "epoch": 41.77, "learning_rate": 1.4705864751645723e-06, "loss": 0.2568, "step": 211500 }, { "epoch": 41.87, "learning_rate": 1.4680929583083984e-06, "loss": 0.2475, "step": 212000 }, { "epoch": 41.97, "learning_rate": 1.4656044284859366e-06, "loss": 0.2504, "step": 212500 }, { "epoch": 42.0, "eval_accuracy": 0.8370967124650771, "eval_f1": 0.4893995929443691, "eval_loss": 0.7146164178848267, "eval_precision": 0.4374620982413584, "eval_recall": 0.5553310238645112, "eval_runtime": 24.4183, "eval_samples_per_second": 124.865, "eval_steps_per_second": 15.644, "step": 212646 }, { "epoch": 42.07, "learning_rate": 1.463110911629763e-06, "loss": 0.2393, "step": 213000 }, { "epoch": 42.17, "learning_rate": 1.4606173947735889e-06, "loss": 0.2512, "step": 213500 }, { "epoch": 42.27, "learning_rate": 1.4581238779174148e-06, "loss": 0.2452, "step": 214000 }, { "epoch": 42.37, "learning_rate": 1.455630361061241e-06, "loss": 0.2527, "step": 214500 }, { "epoch": 42.46, "learning_rate": 1.453136844205067e-06, "loss": 0.2433, "step": 215000 }, { "epoch": 42.56, "learning_rate": 1.450643327348893e-06, "loss": 0.2451, "step": 215500 }, { "epoch": 42.66, "learning_rate": 1.448149810492719e-06, "loss": 0.2452, "step": 216000 }, { "epoch": 42.76, "learning_rate": 1.445656293636545e-06, "loss": 0.2595, "step": 216500 }, { "epoch": 42.86, "learning_rate": 1.4431677638140834e-06, "loss": 0.2398, "step": 217000 }, { "epoch": 42.96, "learning_rate": 1.4406742469579096e-06, "loss": 0.2467, "step": 217500 }, { "epoch": 43.0, "eval_accuracy": 0.8372115274216388, "eval_f1": 0.4870404878875148, "eval_loss": 0.7194024324417114, "eval_precision": 0.434947049924357, "eval_recall": 0.5533102386451116, "eval_runtime": 24.5076, "eval_samples_per_second": 124.41, "eval_steps_per_second": 15.587, "step": 217709 }, { "epoch": 43.06, "learning_rate": 1.4381807301017357e-06, "loss": 0.2496, "step": 218000 }, { "epoch": 43.16, "learning_rate": 1.4356872132455616e-06, "loss": 0.246, "step": 218500 }, { "epoch": 43.25, "learning_rate": 1.4331936963893877e-06, "loss": 0.2455, "step": 219000 }, { "epoch": 43.35, "learning_rate": 1.4307051665669262e-06, "loss": 0.2452, "step": 219500 }, { "epoch": 43.45, "learning_rate": 1.428211649710752e-06, "loss": 0.2481, "step": 220000 }, { "epoch": 43.55, "learning_rate": 1.4257181328545782e-06, "loss": 0.2471, "step": 220500 }, { "epoch": 43.65, "learning_rate": 1.4232246159984043e-06, "loss": 0.2448, "step": 221000 }, { "epoch": 43.75, "learning_rate": 1.4207360861759426e-06, "loss": 0.2474, "step": 221500 }, { "epoch": 43.85, "learning_rate": 1.4182425693197687e-06, "loss": 0.2453, "step": 222000 }, { "epoch": 43.95, "learning_rate": 1.4157490524635948e-06, "loss": 0.2485, "step": 222500 }, { "epoch": 44.0, "eval_accuracy": 0.8376210341000421, "eval_f1": 0.4923129484113427, "eval_loss": 0.7221771478652954, "eval_precision": 0.44256756756756754, "eval_recall": 0.554657428791378, "eval_runtime": 23.7235, "eval_samples_per_second": 128.523, "eval_steps_per_second": 16.102, "step": 222772 }, { "epoch": 44.05, "learning_rate": 1.413255535607421e-06, "loss": 0.2506, "step": 223000 }, { "epoch": 44.14, "learning_rate": 1.4107620187512468e-06, "loss": 0.2426, "step": 223500 }, { "epoch": 44.24, "learning_rate": 1.4082734889287853e-06, "loss": 0.2462, "step": 224000 }, { "epoch": 44.34, "learning_rate": 1.4057799720726112e-06, "loss": 0.2574, "step": 224500 }, { "epoch": 44.44, "learning_rate": 1.4032864552164373e-06, "loss": 0.25, "step": 225000 }, { "epoch": 44.54, "learning_rate": 1.4007929383602634e-06, "loss": 0.2494, "step": 225500 }, { "epoch": 44.64, "learning_rate": 1.3983044085378017e-06, "loss": 0.2491, "step": 226000 }, { "epoch": 44.74, "learning_rate": 1.395810891681628e-06, "loss": 0.2412, "step": 226500 }, { "epoch": 44.84, "learning_rate": 1.393317374825454e-06, "loss": 0.2578, "step": 227000 }, { "epoch": 44.93, "learning_rate": 1.39082385796928e-06, "loss": 0.2407, "step": 227500 }, { "epoch": 45.0, "eval_accuracy": 0.8363848597343947, "eval_f1": 0.4902906406043276, "eval_loss": 0.7273271083831787, "eval_precision": 0.44052458010583634, "eval_recall": 0.552732871439569, "eval_runtime": 24.6123, "eval_samples_per_second": 123.881, "eval_steps_per_second": 15.521, "step": 227835 }, { "epoch": 45.03, "learning_rate": 1.388330341113106e-06, "loss": 0.2457, "step": 228000 }, { "epoch": 45.13, "learning_rate": 1.385836824256932e-06, "loss": 0.246, "step": 228500 }, { "epoch": 45.23, "learning_rate": 1.3833433074007582e-06, "loss": 0.2406, "step": 229000 }, { "epoch": 45.33, "learning_rate": 1.3808497905445841e-06, "loss": 0.2444, "step": 229500 }, { "epoch": 45.43, "learning_rate": 1.3783612607221226e-06, "loss": 0.241, "step": 230000 }, { "epoch": 45.53, "learning_rate": 1.3758677438659487e-06, "loss": 0.2425, "step": 230500 }, { "epoch": 45.63, "learning_rate": 1.3733742270097746e-06, "loss": 0.2333, "step": 231000 }, { "epoch": 45.72, "learning_rate": 1.3708807101536007e-06, "loss": 0.2361, "step": 231500 }, { "epoch": 45.82, "learning_rate": 1.3683871932974269e-06, "loss": 0.2418, "step": 232000 }, { "epoch": 45.92, "learning_rate": 1.3658986634749653e-06, "loss": 0.2508, "step": 232500 }, { "epoch": 46.0, "eval_accuracy": 0.8403995560488347, "eval_f1": 0.49452783665700567, "eval_loss": 0.7349366545677185, "eval_precision": 0.4491944990176817, "eval_recall": 0.5500384911470362, "eval_runtime": 24.4005, "eval_samples_per_second": 124.956, "eval_steps_per_second": 15.655, "step": 232898 }, { "epoch": 46.02, "learning_rate": 1.3634051466187912e-06, "loss": 0.2456, "step": 233000 }, { "epoch": 46.12, "learning_rate": 1.3609116297626173e-06, "loss": 0.2381, "step": 233500 }, { "epoch": 46.22, "learning_rate": 1.3584181129064432e-06, "loss": 0.2434, "step": 234000 }, { "epoch": 46.32, "learning_rate": 1.3559245960502696e-06, "loss": 0.2546, "step": 234500 }, { "epoch": 46.42, "learning_rate": 1.3534360662278078e-06, "loss": 0.2551, "step": 235000 }, { "epoch": 46.51, "learning_rate": 1.3509425493716337e-06, "loss": 0.2374, "step": 235500 }, { "epoch": 46.61, "learning_rate": 1.3484490325154599e-06, "loss": 0.2452, "step": 236000 }, { "epoch": 46.71, "learning_rate": 1.345955515659286e-06, "loss": 0.2417, "step": 236500 }, { "epoch": 46.81, "learning_rate": 1.3434669858368244e-06, "loss": 0.2434, "step": 237000 }, { "epoch": 46.91, "learning_rate": 1.3409734689806503e-06, "loss": 0.2407, "step": 237500 }, { "epoch": 47.0, "eval_accuracy": 0.8388304183091584, "eval_f1": 0.4877632813166198, "eval_loss": 0.7191833257675171, "eval_precision": 0.4379114990047466, "eval_recall": 0.550423402617398, "eval_runtime": 23.4257, "eval_samples_per_second": 130.156, "eval_steps_per_second": 16.307, "step": 237961 }, { "epoch": 47.01, "learning_rate": 1.3384799521244765e-06, "loss": 0.2404, "step": 238000 }, { "epoch": 47.11, "learning_rate": 1.3359864352683024e-06, "loss": 0.2394, "step": 238500 }, { "epoch": 47.21, "learning_rate": 1.3334929184121287e-06, "loss": 0.2508, "step": 239000 }, { "epoch": 47.3, "learning_rate": 1.331004388589667e-06, "loss": 0.2454, "step": 239500 }, { "epoch": 47.4, "learning_rate": 1.328510871733493e-06, "loss": 0.2387, "step": 240000 }, { "epoch": 47.5, "learning_rate": 1.3260173548773192e-06, "loss": 0.2426, "step": 240500 }, { "epoch": 47.6, "learning_rate": 1.323523838021145e-06, "loss": 0.2313, "step": 241000 }, { "epoch": 47.7, "learning_rate": 1.321030321164971e-06, "loss": 0.2434, "step": 241500 }, { "epoch": 47.8, "learning_rate": 1.3185368043087973e-06, "loss": 0.2422, "step": 242000 }, { "epoch": 47.9, "learning_rate": 1.3160432874526233e-06, "loss": 0.2391, "step": 242500 }, { "epoch": 48.0, "learning_rate": 1.3135497705964492e-06, "loss": 0.2397, "step": 243000 }, { "epoch": 48.0, "eval_accuracy": 0.8379080714914463, "eval_f1": 0.48708299860471016, "eval_loss": 0.7313714623451233, "eval_precision": 0.4344219021042311, "eval_recall": 0.5542725173210161, "eval_runtime": 23.7407, "eval_samples_per_second": 128.429, "eval_steps_per_second": 16.091, "step": 243024 }, { "epoch": 48.09, "learning_rate": 1.3110612407739878e-06, "loss": 0.2454, "step": 243500 }, { "epoch": 48.19, "learning_rate": 1.3085677239178137e-06, "loss": 0.2403, "step": 244000 }, { "epoch": 48.29, "learning_rate": 1.3060742070616397e-06, "loss": 0.2336, "step": 244500 }, { "epoch": 48.39, "learning_rate": 1.3035856772391783e-06, "loss": 0.2453, "step": 245000 }, { "epoch": 48.49, "learning_rate": 1.3010921603830042e-06, "loss": 0.2378, "step": 245500 }, { "epoch": 48.59, "learning_rate": 1.2985986435268301e-06, "loss": 0.2423, "step": 246000 }, { "epoch": 48.69, "learning_rate": 1.2961051266706565e-06, "loss": 0.2421, "step": 246500 }, { "epoch": 48.79, "learning_rate": 1.2936116098144824e-06, "loss": 0.2428, "step": 247000 }, { "epoch": 48.88, "learning_rate": 1.2911180929583083e-06, "loss": 0.2423, "step": 247500 }, { "epoch": 48.98, "learning_rate": 1.288629563135847e-06, "loss": 0.237, "step": 248000 }, { "epoch": 49.0, "eval_accuracy": 0.8377014045696353, "eval_f1": 0.4901437548420418, "eval_loss": 0.7322969436645508, "eval_precision": 0.44338888023672324, "eval_recall": 0.5479214780600462, "eval_runtime": 23.821, "eval_samples_per_second": 127.996, "eval_steps_per_second": 16.036, "step": 248087 }, { "epoch": 49.08, "learning_rate": 1.2861360462796729e-06, "loss": 0.2419, "step": 248500 }, { "epoch": 49.18, "learning_rate": 1.2836425294234988e-06, "loss": 0.2345, "step": 249000 }, { "epoch": 49.28, "learning_rate": 1.2811490125673251e-06, "loss": 0.2374, "step": 249500 }, { "epoch": 49.38, "learning_rate": 1.278655495711151e-06, "loss": 0.241, "step": 250000 }, { "epoch": 49.48, "learning_rate": 1.2761669658886895e-06, "loss": 0.2362, "step": 250500 }, { "epoch": 49.58, "learning_rate": 1.2736734490325156e-06, "loss": 0.2375, "step": 251000 }, { "epoch": 49.67, "learning_rate": 1.2711799321763415e-06, "loss": 0.2377, "step": 251500 }, { "epoch": 49.77, "learning_rate": 1.2686864153201678e-06, "loss": 0.2387, "step": 252000 }, { "epoch": 49.87, "learning_rate": 1.266197885497706e-06, "loss": 0.2382, "step": 252500 }, { "epoch": 49.97, "learning_rate": 1.263704368641532e-06, "loss": 0.2419, "step": 253000 }, { "epoch": 50.0, "eval_accuracy": 0.8384898006046921, "eval_f1": 0.4932128634436689, "eval_loss": 0.7357666492462158, "eval_precision": 0.44433299899699097, "eval_recall": 0.5541762894534257, "eval_runtime": 23.5559, "eval_samples_per_second": 129.437, "eval_steps_per_second": 16.217, "step": 253150 }, { "epoch": 50.07, "learning_rate": 1.2612108517853583e-06, "loss": 0.2507, "step": 253500 }, { "epoch": 50.17, "learning_rate": 1.2587173349291842e-06, "loss": 0.2388, "step": 254000 }, { "epoch": 50.27, "learning_rate": 1.2562238180730102e-06, "loss": 0.2325, "step": 254500 }, { "epoch": 50.37, "learning_rate": 1.2537352882505488e-06, "loss": 0.2406, "step": 255000 }, { "epoch": 50.46, "learning_rate": 1.2512417713943747e-06, "loss": 0.2246, "step": 255500 }, { "epoch": 50.56, "learning_rate": 1.2487482545382008e-06, "loss": 0.2405, "step": 256000 }, { "epoch": 50.66, "learning_rate": 1.2462547376820268e-06, "loss": 0.2297, "step": 256500 }, { "epoch": 50.76, "learning_rate": 1.2437612208258529e-06, "loss": 0.2303, "step": 257000 }, { "epoch": 50.86, "learning_rate": 1.2412726910033913e-06, "loss": 0.2514, "step": 257500 }, { "epoch": 50.96, "learning_rate": 1.2387791741472172e-06, "loss": 0.248, "step": 258000 }, { "epoch": 51.0, "eval_accuracy": 0.8372612805694822, "eval_f1": 0.48591877355693897, "eval_loss": 0.7367214560508728, "eval_precision": 0.43393086755918614, "eval_recall": 0.5520592763664357, "eval_runtime": 23.3101, "eval_samples_per_second": 130.802, "eval_steps_per_second": 16.388, "step": 258213 }, { "epoch": 51.06, "learning_rate": 1.2362856572910434e-06, "loss": 0.2515, "step": 258500 }, { "epoch": 51.16, "learning_rate": 1.2337921404348695e-06, "loss": 0.2386, "step": 259000 }, { "epoch": 51.25, "learning_rate": 1.2312986235786954e-06, "loss": 0.2333, "step": 259500 }, { "epoch": 51.35, "learning_rate": 1.2288100937562338e-06, "loss": 0.2427, "step": 260000 }, { "epoch": 51.45, "learning_rate": 1.22631657690006e-06, "loss": 0.238, "step": 260500 }, { "epoch": 51.55, "learning_rate": 1.2238230600438859e-06, "loss": 0.2393, "step": 261000 }, { "epoch": 51.65, "learning_rate": 1.221329543187712e-06, "loss": 0.2428, "step": 261500 }, { "epoch": 51.75, "learning_rate": 1.2188360263315381e-06, "loss": 0.2339, "step": 262000 }, { "epoch": 51.85, "learning_rate": 1.2163474965090764e-06, "loss": 0.2446, "step": 262500 }, { "epoch": 51.95, "learning_rate": 1.2138539796529025e-06, "loss": 0.2447, "step": 263000 }, { "epoch": 52.0, "eval_accuracy": 0.8375712809521987, "eval_f1": 0.4877675840978593, "eval_loss": 0.7343346476554871, "eval_precision": 0.4365875912408759, "eval_recall": 0.5525404157043879, "eval_runtime": 23.403, "eval_samples_per_second": 130.282, "eval_steps_per_second": 16.323, "step": 263276 }, { "epoch": 52.04, "learning_rate": 1.2113604627967286e-06, "loss": 0.2364, "step": 263500 }, { "epoch": 52.14, "learning_rate": 1.2088669459405547e-06, "loss": 0.2466, "step": 264000 }, { "epoch": 52.24, "learning_rate": 1.2063834031518055e-06, "loss": 0.2397, "step": 264500 }, { "epoch": 52.34, "learning_rate": 1.2038898862956314e-06, "loss": 0.2418, "step": 265000 }, { "epoch": 52.44, "learning_rate": 1.2013963694394575e-06, "loss": 0.2417, "step": 265500 }, { "epoch": 52.54, "learning_rate": 1.1989028525832834e-06, "loss": 0.2397, "step": 266000 }, { "epoch": 52.64, "learning_rate": 1.1964093357271096e-06, "loss": 0.2353, "step": 266500 }, { "epoch": 52.74, "learning_rate": 1.1939158188709357e-06, "loss": 0.2339, "step": 267000 }, { "epoch": 52.83, "learning_rate": 1.1914223020147616e-06, "loss": 0.2397, "step": 267500 }, { "epoch": 52.93, "learning_rate": 1.1889287851585877e-06, "loss": 0.2365, "step": 268000 }, { "epoch": 53.0, "eval_accuracy": 0.8388954801178767, "eval_f1": 0.4917008898015058, "eval_loss": 0.7243747711181641, "eval_precision": 0.44262168823166975, "eval_recall": 0.5530215550423403, "eval_runtime": 23.8942, "eval_samples_per_second": 127.604, "eval_steps_per_second": 15.987, "step": 268339 }, { "epoch": 53.03, "learning_rate": 1.1864352683024139e-06, "loss": 0.2441, "step": 268500 }, { "epoch": 53.13, "learning_rate": 1.183946738479952e-06, "loss": 0.2346, "step": 269000 }, { "epoch": 53.23, "learning_rate": 1.1814532216237782e-06, "loss": 0.2338, "step": 269500 }, { "epoch": 53.33, "learning_rate": 1.1789597047676043e-06, "loss": 0.2317, "step": 270000 }, { "epoch": 53.43, "learning_rate": 1.1764661879114305e-06, "loss": 0.2358, "step": 270500 }, { "epoch": 53.53, "learning_rate": 1.1739726710552564e-06, "loss": 0.2344, "step": 271000 }, { "epoch": 53.62, "learning_rate": 1.1714791541990825e-06, "loss": 0.2296, "step": 271500 }, { "epoch": 53.72, "learning_rate": 1.168990624376621e-06, "loss": 0.2459, "step": 272000 }, { "epoch": 53.82, "learning_rate": 1.1664971075204469e-06, "loss": 0.2351, "step": 272500 }, { "epoch": 53.92, "learning_rate": 1.164003590664273e-06, "loss": 0.239, "step": 273000 }, { "epoch": 54.0, "eval_accuracy": 0.8386275785525661, "eval_f1": 0.4864291772688719, "eval_loss": 0.7417691946029663, "eval_precision": 0.4348650288140734, "eval_recall": 0.5518668206312548, "eval_runtime": 24.0372, "eval_samples_per_second": 126.845, "eval_steps_per_second": 15.892, "step": 273402 }, { "epoch": 54.02, "learning_rate": 1.1615100738080991e-06, "loss": 0.2333, "step": 273500 }, { "epoch": 54.12, "learning_rate": 1.159016556951925e-06, "loss": 0.2399, "step": 274000 }, { "epoch": 54.22, "learning_rate": 1.1565280271294635e-06, "loss": 0.2508, "step": 274500 }, { "epoch": 54.32, "learning_rate": 1.1540345102732896e-06, "loss": 0.2289, "step": 275000 }, { "epoch": 54.41, "learning_rate": 1.1515409934171155e-06, "loss": 0.2352, "step": 275500 }, { "epoch": 54.51, "learning_rate": 1.1490474765609416e-06, "loss": 0.2317, "step": 276000 }, { "epoch": 54.61, "learning_rate": 1.1465539597047677e-06, "loss": 0.2275, "step": 276500 }, { "epoch": 54.71, "learning_rate": 1.144065429882306e-06, "loss": 0.2359, "step": 277000 }, { "epoch": 54.81, "learning_rate": 1.1415719130261321e-06, "loss": 0.2288, "step": 277500 }, { "epoch": 54.91, "learning_rate": 1.1390783961699582e-06, "loss": 0.2273, "step": 278000 }, { "epoch": 55.0, "eval_accuracy": 0.8372651077347009, "eval_f1": 0.4906031936298643, "eval_loss": 0.7420364618301392, "eval_precision": 0.44189095396005246, "eval_recall": 0.5513856812933026, "eval_runtime": 23.2371, "eval_samples_per_second": 131.213, "eval_steps_per_second": 16.439, "step": 278465 }, { "epoch": 55.01, "learning_rate": 1.1365848793137844e-06, "loss": 0.2417, "step": 278500 }, { "epoch": 55.11, "learning_rate": 1.1340913624576103e-06, "loss": 0.2337, "step": 279000 }, { "epoch": 55.2, "learning_rate": 1.1315978456014364e-06, "loss": 0.2307, "step": 279500 }, { "epoch": 55.3, "learning_rate": 1.1291143028126872e-06, "loss": 0.2269, "step": 280000 }, { "epoch": 55.4, "learning_rate": 1.126620785956513e-06, "loss": 0.2289, "step": 280500 }, { "epoch": 55.5, "learning_rate": 1.1241272691003392e-06, "loss": 0.2377, "step": 281000 }, { "epoch": 55.6, "learning_rate": 1.1216337522441653e-06, "loss": 0.2288, "step": 281500 }, { "epoch": 55.7, "learning_rate": 1.1191402353879912e-06, "loss": 0.2312, "step": 282000 }, { "epoch": 55.8, "learning_rate": 1.1166467185318174e-06, "loss": 0.2358, "step": 282500 }, { "epoch": 55.9, "learning_rate": 1.1141532016756435e-06, "loss": 0.2381, "step": 283000 }, { "epoch": 55.99, "learning_rate": 1.1116596848194696e-06, "loss": 0.2383, "step": 283500 }, { "epoch": 56.0, "eval_accuracy": 0.837800910865322, "eval_f1": 0.48603658794203386, "eval_loss": 0.7458629012107849, "eval_precision": 0.433230398433381, "eval_recall": 0.5535026943802925, "eval_runtime": 23.9922, "eval_samples_per_second": 127.083, "eval_steps_per_second": 15.922, "step": 283528 }, { "epoch": 56.09, "learning_rate": 1.1091661679632955e-06, "loss": 0.2293, "step": 284000 }, { "epoch": 56.19, "learning_rate": 1.1066726511071216e-06, "loss": 0.237, "step": 284500 }, { "epoch": 56.29, "learning_rate": 1.10418412128466e-06, "loss": 0.2326, "step": 285000 }, { "epoch": 56.39, "learning_rate": 1.101690604428486e-06, "loss": 0.232, "step": 285500 }, { "epoch": 56.49, "learning_rate": 1.0992020746060244e-06, "loss": 0.233, "step": 286000 }, { "epoch": 56.59, "learning_rate": 1.0967085577498504e-06, "loss": 0.2384, "step": 286500 }, { "epoch": 56.69, "learning_rate": 1.0942150408936765e-06, "loss": 0.2327, "step": 287000 }, { "epoch": 56.78, "learning_rate": 1.0917215240375026e-06, "loss": 0.2289, "step": 287500 }, { "epoch": 56.88, "learning_rate": 1.0892280071813287e-06, "loss": 0.2313, "step": 288000 }, { "epoch": 56.98, "learning_rate": 1.0867344903251546e-06, "loss": 0.2301, "step": 288500 }, { "epoch": 57.0, "eval_accuracy": 0.8362585632821768, "eval_f1": 0.4886406904429496, "eval_loss": 0.745151937007904, "eval_precision": 0.4360135900339751, "eval_recall": 0.555715935334873, "eval_runtime": 23.3249, "eval_samples_per_second": 130.719, "eval_steps_per_second": 16.377, "step": 288591 }, { "epoch": 57.08, "learning_rate": 1.0842409734689808e-06, "loss": 0.2353, "step": 289000 }, { "epoch": 57.18, "learning_rate": 1.0817474566128069e-06, "loss": 0.2281, "step": 289500 }, { "epoch": 57.28, "learning_rate": 1.0792539397566328e-06, "loss": 0.2427, "step": 290000 }, { "epoch": 57.38, "learning_rate": 1.076760422900459e-06, "loss": 0.2285, "step": 290500 }, { "epoch": 57.48, "learning_rate": 1.0742718930779974e-06, "loss": 0.2288, "step": 291000 }, { "epoch": 57.57, "learning_rate": 1.0717783762218235e-06, "loss": 0.2317, "step": 291500 }, { "epoch": 57.67, "learning_rate": 1.0692848593656494e-06, "loss": 0.2247, "step": 292000 }, { "epoch": 57.77, "learning_rate": 1.0667913425094755e-06, "loss": 0.2336, "step": 292500 }, { "epoch": 57.87, "learning_rate": 1.0642978256533014e-06, "loss": 0.2302, "step": 293000 }, { "epoch": 57.97, "learning_rate": 1.0618043087971276e-06, "loss": 0.2382, "step": 293500 }, { "epoch": 58.0, "eval_accuracy": 0.8389949864135635, "eval_f1": 0.49114762450643235, "eval_loss": 0.7444238662719727, "eval_precision": 0.4394802826532938, "eval_recall": 0.5565819861431871, "eval_runtime": 23.2287, "eval_samples_per_second": 131.26, "eval_steps_per_second": 16.445, "step": 293654 }, { "epoch": 58.07, "learning_rate": 1.0593107919409535e-06, "loss": 0.2279, "step": 294000 }, { "epoch": 58.17, "learning_rate": 1.0568172750847796e-06, "loss": 0.2375, "step": 294500 }, { "epoch": 58.27, "learning_rate": 1.0543337322960304e-06, "loss": 0.2264, "step": 295000 }, { "epoch": 58.36, "learning_rate": 1.0518402154398565e-06, "loss": 0.2357, "step": 295500 }, { "epoch": 58.46, "learning_rate": 1.0493466985836826e-06, "loss": 0.2345, "step": 296000 }, { "epoch": 58.56, "learning_rate": 1.0468531817275085e-06, "loss": 0.2256, "step": 296500 }, { "epoch": 58.66, "learning_rate": 1.0443596648713347e-06, "loss": 0.234, "step": 297000 }, { "epoch": 58.76, "learning_rate": 1.041871135048873e-06, "loss": 0.2261, "step": 297500 }, { "epoch": 58.86, "learning_rate": 1.039377618192699e-06, "loss": 0.2392, "step": 298000 }, { "epoch": 58.96, "learning_rate": 1.0368841013365251e-06, "loss": 0.2324, "step": 298500 }, { "epoch": 59.0, "eval_accuracy": 0.8388265911439396, "eval_f1": 0.4904518329070759, "eval_loss": 0.7444677948951721, "eval_precision": 0.44023569023569026, "eval_recall": 0.553598922247883, "eval_runtime": 24.0702, "eval_samples_per_second": 126.671, "eval_steps_per_second": 15.87, "step": 298717 }, { "epoch": 59.06, "learning_rate": 1.0343905844803513e-06, "loss": 0.241, "step": 299000 }, { "epoch": 59.15, "learning_rate": 1.0318970676241772e-06, "loss": 0.2239, "step": 299500 }, { "epoch": 59.25, "learning_rate": 1.0294035507680033e-06, "loss": 0.227, "step": 300000 }, { "epoch": 59.35, "learning_rate": 1.0269150209455417e-06, "loss": 0.2331, "step": 300500 }, { "epoch": 59.45, "learning_rate": 1.0244215040893679e-06, "loss": 0.2204, "step": 301000 }, { "epoch": 59.55, "learning_rate": 1.0219279872331938e-06, "loss": 0.2362, "step": 301500 }, { "epoch": 59.65, "learning_rate": 1.01943447037702e-06, "loss": 0.2263, "step": 302000 }, { "epoch": 59.75, "learning_rate": 1.0169459405545583e-06, "loss": 0.2359, "step": 302500 }, { "epoch": 59.85, "learning_rate": 1.0144524236983843e-06, "loss": 0.2358, "step": 303000 }, { "epoch": 59.94, "learning_rate": 1.0119589068422104e-06, "loss": 0.2377, "step": 303500 }, { "epoch": 60.0, "eval_accuracy": 0.8369665888476405, "eval_f1": 0.48964250629991884, "eval_loss": 0.752280056476593, "eval_precision": 0.4402119652868443, "eval_recall": 0.5515781370284835, "eval_runtime": 23.2481, "eval_samples_per_second": 131.15, "eval_steps_per_second": 16.431, "step": 303780 }, { "epoch": 60.04, "learning_rate": 1.0094653899860365e-06, "loss": 0.2355, "step": 304000 }, { "epoch": 60.14, "learning_rate": 1.0069718731298624e-06, "loss": 0.2269, "step": 304500 }, { "epoch": 60.24, "learning_rate": 1.0044833433074009e-06, "loss": 0.2294, "step": 305000 }, { "epoch": 60.34, "learning_rate": 1.001989826451227e-06, "loss": 0.2317, "step": 305500 }, { "epoch": 60.44, "learning_rate": 9.99496309595053e-07, "loss": 0.2251, "step": 306000 }, { "epoch": 60.54, "learning_rate": 9.97002792738879e-07, "loss": 0.2348, "step": 306500 }, { "epoch": 60.64, "learning_rate": 9.945142629164175e-07, "loss": 0.2337, "step": 307000 }, { "epoch": 60.73, "learning_rate": 9.920207460602436e-07, "loss": 0.2243, "step": 307500 }, { "epoch": 60.83, "learning_rate": 9.895272292040695e-07, "loss": 0.2304, "step": 308000 }, { "epoch": 60.93, "learning_rate": 9.870337123478956e-07, "loss": 0.22, "step": 308500 }, { "epoch": 61.0, "eval_accuracy": 0.8390600482222818, "eval_f1": 0.49161582113751756, "eval_loss": 0.7522650957107544, "eval_precision": 0.44162514373323114, "eval_recall": 0.5543687451886066, "eval_runtime": 24.048, "eval_samples_per_second": 126.788, "eval_steps_per_second": 15.885, "step": 308843 }, { "epoch": 61.03, "learning_rate": 9.84545182525434e-07, "loss": 0.2371, "step": 309000 }, { "epoch": 61.13, "learning_rate": 9.8205166566926e-07, "loss": 0.2343, "step": 309500 }, { "epoch": 61.23, "learning_rate": 9.795581488130861e-07, "loss": 0.2277, "step": 310000 }, { "epoch": 61.33, "learning_rate": 9.770646319569122e-07, "loss": 0.222, "step": 310500 }, { "epoch": 61.43, "learning_rate": 9.745711151007382e-07, "loss": 0.2267, "step": 311000 }, { "epoch": 61.52, "learning_rate": 9.720825852782766e-07, "loss": 0.2287, "step": 311500 }, { "epoch": 61.62, "learning_rate": 9.695890684221027e-07, "loss": 0.2382, "step": 312000 }, { "epoch": 61.72, "learning_rate": 9.670955515659286e-07, "loss": 0.2295, "step": 312500 }, { "epoch": 61.82, "learning_rate": 9.646020347097548e-07, "loss": 0.2211, "step": 313000 }, { "epoch": 61.92, "learning_rate": 9.621085178535809e-07, "loss": 0.2241, "step": 313500 }, { "epoch": 62.0, "eval_accuracy": 0.8374909104826055, "eval_f1": 0.48761904761904756, "eval_loss": 0.7565447688102722, "eval_precision": 0.4352754477442757, "eval_recall": 0.5542725173210161, "eval_runtime": 23.4085, "eval_samples_per_second": 130.252, "eval_steps_per_second": 16.319, "step": 313906 }, { "epoch": 62.02, "learning_rate": 9.596150009974068e-07, "loss": 0.2265, "step": 314000 }, { "epoch": 62.12, "learning_rate": 9.57121484141233e-07, "loss": 0.2231, "step": 314500 }, { "epoch": 62.22, "learning_rate": 9.546279672850588e-07, "loss": 0.2279, "step": 315000 }, { "epoch": 62.31, "learning_rate": 9.521394374625974e-07, "loss": 0.2293, "step": 315500 }, { "epoch": 62.41, "learning_rate": 9.496459206064233e-07, "loss": 0.2265, "step": 316000 }, { "epoch": 62.51, "learning_rate": 9.471573907839618e-07, "loss": 0.2177, "step": 316500 }, { "epoch": 62.61, "learning_rate": 9.446638739277879e-07, "loss": 0.2271, "step": 317000 }, { "epoch": 62.71, "learning_rate": 9.421703570716139e-07, "loss": 0.2298, "step": 317500 }, { "epoch": 62.81, "learning_rate": 9.396768402154399e-07, "loss": 0.2224, "step": 318000 }, { "epoch": 62.91, "learning_rate": 9.371883103929783e-07, "loss": 0.2294, "step": 318500 }, { "epoch": 63.0, "eval_accuracy": 0.8375751081174174, "eval_f1": 0.49037727850784235, "eval_loss": 0.7564020156860352, "eval_precision": 0.43824821942718595, "eval_recall": 0.5565819861431871, "eval_runtime": 23.5821, "eval_samples_per_second": 129.293, "eval_steps_per_second": 16.199, "step": 318969 }, { "epoch": 63.01, "learning_rate": 9.346947935368044e-07, "loss": 0.2259, "step": 319000 }, { "epoch": 63.1, "learning_rate": 9.322012766806304e-07, "loss": 0.2192, "step": 319500 }, { "epoch": 63.2, "learning_rate": 9.297127468581689e-07, "loss": 0.2248, "step": 320000 }, { "epoch": 63.3, "learning_rate": 9.272192300019948e-07, "loss": 0.2301, "step": 320500 }, { "epoch": 63.4, "learning_rate": 9.24725713145821e-07, "loss": 0.2351, "step": 321000 }, { "epoch": 63.5, "learning_rate": 9.22232196289647e-07, "loss": 0.2335, "step": 321500 }, { "epoch": 63.6, "learning_rate": 9.197386794334731e-07, "loss": 0.2288, "step": 322000 }, { "epoch": 63.7, "learning_rate": 9.17245162577299e-07, "loss": 0.2216, "step": 322500 }, { "epoch": 63.8, "learning_rate": 9.147516457211251e-07, "loss": 0.225, "step": 323000 }, { "epoch": 63.89, "learning_rate": 9.122581288649513e-07, "loss": 0.2375, "step": 323500 }, { "epoch": 63.99, "learning_rate": 9.097646120087772e-07, "loss": 0.2271, "step": 324000 }, { "epoch": 64.0, "eval_accuracy": 0.8373033793868881, "eval_f1": 0.49073640274287667, "eval_loss": 0.7552084922790527, "eval_precision": 0.44020783984106365, "eval_recall": 0.5543687451886066, "eval_runtime": 24.2365, "eval_samples_per_second": 125.802, "eval_steps_per_second": 15.761, "step": 324032 }, { "epoch": 64.09, "learning_rate": 9.072710951526033e-07, "loss": 0.2197, "step": 324500 }, { "epoch": 64.19, "learning_rate": 9.047825653301418e-07, "loss": 0.2251, "step": 325000 }, { "epoch": 64.29, "learning_rate": 9.022890484739677e-07, "loss": 0.2232, "step": 325500 }, { "epoch": 64.39, "learning_rate": 8.997955316177938e-07, "loss": 0.2338, "step": 326000 }, { "epoch": 64.49, "learning_rate": 8.973020147616199e-07, "loss": 0.2304, "step": 326500 }, { "epoch": 64.59, "learning_rate": 8.948134849391583e-07, "loss": 0.2264, "step": 327000 }, { "epoch": 64.68, "learning_rate": 8.923199680829843e-07, "loss": 0.2274, "step": 327500 }, { "epoch": 64.78, "learning_rate": 8.898264512268104e-07, "loss": 0.232, "step": 328000 }, { "epoch": 64.88, "learning_rate": 8.873329343706365e-07, "loss": 0.2282, "step": 328500 }, { "epoch": 64.98, "learning_rate": 8.848394175144624e-07, "loss": 0.2273, "step": 329000 }, { "epoch": 65.0, "eval_accuracy": 0.8390409123961882, "eval_f1": 0.49424991417782355, "eval_loss": 0.7529916763305664, "eval_precision": 0.44601920693928127, "eval_recall": 0.5541762894534257, "eval_runtime": 23.3862, "eval_samples_per_second": 130.376, "eval_steps_per_second": 16.334, "step": 329095 }, { "epoch": 65.08, "learning_rate": 8.823508876920009e-07, "loss": 0.2284, "step": 329500 }, { "epoch": 65.18, "learning_rate": 8.79857370835827e-07, "loss": 0.2289, "step": 330000 }, { "epoch": 65.28, "learning_rate": 8.773638539796529e-07, "loss": 0.2342, "step": 330500 }, { "epoch": 65.38, "learning_rate": 8.748753241571914e-07, "loss": 0.2251, "step": 331000 }, { "epoch": 65.48, "learning_rate": 8.723818073010175e-07, "loss": 0.2293, "step": 331500 }, { "epoch": 65.57, "learning_rate": 8.698882904448434e-07, "loss": 0.2346, "step": 332000 }, { "epoch": 65.67, "learning_rate": 8.673947735886695e-07, "loss": 0.2329, "step": 332500 }, { "epoch": 65.77, "learning_rate": 8.649012567324956e-07, "loss": 0.2259, "step": 333000 }, { "epoch": 65.87, "learning_rate": 8.624077398763217e-07, "loss": 0.2227, "step": 333500 }, { "epoch": 65.97, "learning_rate": 8.599142230201477e-07, "loss": 0.2217, "step": 334000 }, { "epoch": 66.0, "eval_accuracy": 0.8372995522216694, "eval_f1": 0.4910174542358451, "eval_loss": 0.7608162760734558, "eval_precision": 0.44029622843182165, "eval_recall": 0.5549461123941494, "eval_runtime": 24.1639, "eval_samples_per_second": 126.18, "eval_steps_per_second": 15.809, "step": 334158 }, { "epoch": 66.07, "learning_rate": 8.574207061639737e-07, "loss": 0.2267, "step": 334500 }, { "epoch": 66.17, "learning_rate": 8.549271893077998e-07, "loss": 0.2262, "step": 335000 }, { "epoch": 66.27, "learning_rate": 8.524386594853382e-07, "loss": 0.2277, "step": 335500 }, { "epoch": 66.36, "learning_rate": 8.499451426291643e-07, "loss": 0.2242, "step": 336000 }, { "epoch": 66.46, "learning_rate": 8.474516257729903e-07, "loss": 0.2329, "step": 336500 }, { "epoch": 66.56, "learning_rate": 8.449581089168163e-07, "loss": 0.227, "step": 337000 }, { "epoch": 66.66, "learning_rate": 8.424645920606423e-07, "loss": 0.2201, "step": 337500 }, { "epoch": 66.76, "learning_rate": 8.399760622381809e-07, "loss": 0.2354, "step": 338000 }, { "epoch": 66.86, "learning_rate": 8.374825453820068e-07, "loss": 0.227, "step": 338500 }, { "epoch": 66.96, "learning_rate": 8.349890285258329e-07, "loss": 0.2236, "step": 339000 }, { "epoch": 67.0, "eval_accuracy": 0.8383099238394122, "eval_f1": 0.4886561954624782, "eval_loss": 0.7648459076881409, "eval_precision": 0.4381344935501107, "eval_recall": 0.552347959969207, "eval_runtime": 24.0499, "eval_samples_per_second": 126.778, "eval_steps_per_second": 15.884, "step": 339221 }, { "epoch": 67.06, "learning_rate": 8.324955116696589e-07, "loss": 0.2249, "step": 339500 }, { "epoch": 67.15, "learning_rate": 8.300019948134851e-07, "loss": 0.2288, "step": 340000 }, { "epoch": 67.25, "learning_rate": 8.275134649910234e-07, "loss": 0.2279, "step": 340500 }, { "epoch": 67.35, "learning_rate": 8.250199481348494e-07, "loss": 0.2237, "step": 341000 }, { "epoch": 67.45, "learning_rate": 8.225264312786756e-07, "loss": 0.2217, "step": 341500 }, { "epoch": 67.55, "learning_rate": 8.200329144225016e-07, "loss": 0.2337, "step": 342000 }, { "epoch": 67.65, "learning_rate": 8.1754438460004e-07, "loss": 0.2227, "step": 342500 }, { "epoch": 67.75, "learning_rate": 8.15050867743866e-07, "loss": 0.2251, "step": 343000 }, { "epoch": 67.85, "learning_rate": 8.12557350887692e-07, "loss": 0.2152, "step": 343500 }, { "epoch": 67.94, "learning_rate": 8.100638340315181e-07, "loss": 0.2186, "step": 344000 }, { "epoch": 68.0, "eval_accuracy": 0.8376669600826667, "eval_f1": 0.48827047893776776, "eval_loss": 0.7637174129486084, "eval_precision": 0.43661330703285034, "eval_recall": 0.5537913779830639, "eval_runtime": 23.9648, "eval_samples_per_second": 127.228, "eval_steps_per_second": 15.94, "step": 344284 }, { "epoch": 68.04, "learning_rate": 8.075753042090566e-07, "loss": 0.2306, "step": 344500 }, { "epoch": 68.14, "learning_rate": 8.05086774386595e-07, "loss": 0.2247, "step": 345000 }, { "epoch": 68.24, "learning_rate": 8.02593257530421e-07, "loss": 0.2277, "step": 345500 }, { "epoch": 68.34, "learning_rate": 8.000997406742471e-07, "loss": 0.2197, "step": 346000 }, { "epoch": 68.44, "learning_rate": 7.97606223818073e-07, "loss": 0.2357, "step": 346500 }, { "epoch": 68.54, "learning_rate": 7.951127069618991e-07, "loss": 0.2294, "step": 347000 }, { "epoch": 68.64, "learning_rate": 7.926191901057253e-07, "loss": 0.2169, "step": 347500 }, { "epoch": 68.73, "learning_rate": 7.901256732495512e-07, "loss": 0.2302, "step": 348000 }, { "epoch": 68.83, "learning_rate": 7.876321563933773e-07, "loss": 0.2298, "step": 348500 }, { "epoch": 68.93, "learning_rate": 7.851386395372033e-07, "loss": 0.2138, "step": 349000 }, { "epoch": 69.0, "eval_accuracy": 0.8371770829346703, "eval_f1": 0.48854312800238064, "eval_loss": 0.7689042091369629, "eval_precision": 0.43759043484883103, "eval_recall": 0.5529253271747498, "eval_runtime": 25.1434, "eval_samples_per_second": 121.265, "eval_steps_per_second": 15.193, "step": 349347 }, { "epoch": 69.03, "learning_rate": 7.826501097147417e-07, "loss": 0.2283, "step": 349500 }, { "epoch": 69.13, "learning_rate": 7.801565928585678e-07, "loss": 0.2206, "step": 350000 }, { "epoch": 69.23, "learning_rate": 7.776630760023938e-07, "loss": 0.2242, "step": 350500 }, { "epoch": 69.33, "learning_rate": 7.751745461799321e-07, "loss": 0.2172, "step": 351000 }, { "epoch": 69.43, "learning_rate": 7.726810293237583e-07, "loss": 0.2189, "step": 351500 }, { "epoch": 69.52, "learning_rate": 7.701875124675844e-07, "loss": 0.2224, "step": 352000 }, { "epoch": 69.62, "learning_rate": 7.676939956114104e-07, "loss": 0.2122, "step": 352500 }, { "epoch": 69.72, "learning_rate": 7.652004787552364e-07, "loss": 0.2306, "step": 353000 }, { "epoch": 69.82, "learning_rate": 7.627069618990624e-07, "loss": 0.2245, "step": 353500 }, { "epoch": 69.92, "learning_rate": 7.602134450428886e-07, "loss": 0.2353, "step": 354000 }, { "epoch": 70.0, "eval_accuracy": 0.8379654789697272, "eval_f1": 0.49071425521922896, "eval_loss": 0.7559919953346252, "eval_precision": 0.44102524748676236, "eval_recall": 0.5530215550423403, "eval_runtime": 23.4919, "eval_samples_per_second": 129.789, "eval_steps_per_second": 16.261, "step": 354410 }, { "epoch": 70.02, "learning_rate": 7.577199281867147e-07, "loss": 0.2265, "step": 354500 }, { "epoch": 70.12, "learning_rate": 7.552264113305406e-07, "loss": 0.2252, "step": 355000 }, { "epoch": 70.22, "learning_rate": 7.52737881508079e-07, "loss": 0.2197, "step": 355500 }, { "epoch": 70.31, "learning_rate": 7.502443646519052e-07, "loss": 0.2233, "step": 356000 }, { "epoch": 70.41, "learning_rate": 7.477508477957311e-07, "loss": 0.223, "step": 356500 }, { "epoch": 70.51, "learning_rate": 7.452573309395572e-07, "loss": 0.2363, "step": 357000 }, { "epoch": 70.61, "learning_rate": 7.427638140833833e-07, "loss": 0.2181, "step": 357500 }, { "epoch": 70.71, "learning_rate": 7.402702972272092e-07, "loss": 0.2208, "step": 358000 }, { "epoch": 70.81, "learning_rate": 7.377767803710354e-07, "loss": 0.2221, "step": 358500 }, { "epoch": 70.91, "learning_rate": 7.352832635148614e-07, "loss": 0.2245, "step": 359000 }, { "epoch": 71.0, "eval_accuracy": 0.839415974587623, "eval_f1": 0.489203721089016, "eval_loss": 0.7731532454490662, "eval_precision": 0.4395031436896182, "eval_recall": 0.5515781370284835, "eval_runtime": 23.9793, "eval_samples_per_second": 127.151, "eval_steps_per_second": 15.93, "step": 359473 }, { "epoch": 71.01, "learning_rate": 7.327947336923997e-07, "loss": 0.2226, "step": 359500 }, { "epoch": 71.1, "learning_rate": 7.303062038699383e-07, "loss": 0.2242, "step": 360000 }, { "epoch": 71.2, "learning_rate": 7.278176740474765e-07, "loss": 0.221, "step": 360500 }, { "epoch": 71.3, "learning_rate": 7.253241571913026e-07, "loss": 0.2187, "step": 361000 }, { "epoch": 71.4, "learning_rate": 7.228306403351288e-07, "loss": 0.2279, "step": 361500 }, { "epoch": 71.5, "learning_rate": 7.203371234789548e-07, "loss": 0.2328, "step": 362000 }, { "epoch": 71.6, "learning_rate": 7.178436066227808e-07, "loss": 0.2199, "step": 362500 }, { "epoch": 71.7, "learning_rate": 7.153500897666068e-07, "loss": 0.2272, "step": 363000 }, { "epoch": 71.8, "learning_rate": 7.128565729104329e-07, "loss": 0.227, "step": 363500 }, { "epoch": 71.89, "learning_rate": 7.103680430879713e-07, "loss": 0.2213, "step": 364000 }, { "epoch": 71.99, "learning_rate": 7.078745262317974e-07, "loss": 0.2239, "step": 364500 }, { "epoch": 72.0, "eval_accuracy": 0.8369130085345784, "eval_f1": 0.48701106072805866, "eval_loss": 0.7701214551925659, "eval_precision": 0.43513820522529345, "eval_recall": 0.5529253271747498, "eval_runtime": 23.5415, "eval_samples_per_second": 129.516, "eval_steps_per_second": 16.227, "step": 364536 }, { "epoch": 72.09, "learning_rate": 7.053810093756234e-07, "loss": 0.2288, "step": 365000 }, { "epoch": 72.19, "learning_rate": 7.028874925194495e-07, "loss": 0.2218, "step": 365500 }, { "epoch": 72.29, "learning_rate": 7.003939756632755e-07, "loss": 0.2138, "step": 366000 }, { "epoch": 72.39, "learning_rate": 6.979004588071016e-07, "loss": 0.225, "step": 366500 }, { "epoch": 72.49, "learning_rate": 6.954069419509277e-07, "loss": 0.2252, "step": 367000 }, { "epoch": 72.59, "learning_rate": 6.929134250947537e-07, "loss": 0.2271, "step": 367500 }, { "epoch": 72.68, "learning_rate": 6.904248952722921e-07, "loss": 0.2195, "step": 368000 }, { "epoch": 72.78, "learning_rate": 6.879313784161182e-07, "loss": 0.226, "step": 368500 }, { "epoch": 72.88, "learning_rate": 6.854378615599443e-07, "loss": 0.2214, "step": 369000 }, { "epoch": 72.98, "learning_rate": 6.829443447037702e-07, "loss": 0.2129, "step": 369500 }, { "epoch": 73.0, "eval_accuracy": 0.8376707872478855, "eval_f1": 0.4907186648501363, "eval_loss": 0.7788528203964233, "eval_precision": 0.44005803298717167, "eval_recall": 0.5545612009237876, "eval_runtime": 23.966, "eval_samples_per_second": 127.222, "eval_steps_per_second": 15.939, "step": 369599 }, { "epoch": 73.08, "learning_rate": 6.804558148813087e-07, "loss": 0.2357, "step": 370000 }, { "epoch": 73.18, "learning_rate": 6.77967285058847e-07, "loss": 0.216, "step": 370500 }, { "epoch": 73.28, "learning_rate": 6.754737682026731e-07, "loss": 0.2102, "step": 371000 }, { "epoch": 73.38, "learning_rate": 6.729802513464991e-07, "loss": 0.2182, "step": 371500 }, { "epoch": 73.47, "learning_rate": 6.704867344903252e-07, "loss": 0.2189, "step": 372000 }, { "epoch": 73.57, "learning_rate": 6.679932176341512e-07, "loss": 0.2218, "step": 372500 }, { "epoch": 73.67, "learning_rate": 6.654997007779773e-07, "loss": 0.2262, "step": 373000 }, { "epoch": 73.77, "learning_rate": 6.630061839218034e-07, "loss": 0.2245, "step": 373500 }, { "epoch": 73.87, "learning_rate": 6.605126670656293e-07, "loss": 0.2231, "step": 374000 }, { "epoch": 73.97, "learning_rate": 6.580241372431678e-07, "loss": 0.2207, "step": 374500 }, { "epoch": 74.0, "eval_accuracy": 0.838684986030847, "eval_f1": 0.49589123607107516, "eval_loss": 0.78349369764328, "eval_precision": 0.44844759162711073, "eval_recall": 0.5545612009237876, "eval_runtime": 24.1896, "eval_samples_per_second": 126.046, "eval_steps_per_second": 15.792, "step": 374662 }, { "epoch": 74.07, "learning_rate": 6.555306203869939e-07, "loss": 0.2211, "step": 375000 }, { "epoch": 74.17, "learning_rate": 6.530371035308198e-07, "loss": 0.2198, "step": 375500 }, { "epoch": 74.26, "learning_rate": 6.50543586674646e-07, "loss": 0.2185, "step": 376000 }, { "epoch": 74.36, "learning_rate": 6.480500698184721e-07, "loss": 0.2228, "step": 376500 }, { "epoch": 74.46, "learning_rate": 6.455615399960104e-07, "loss": 0.2249, "step": 377000 }, { "epoch": 74.56, "learning_rate": 6.430680231398364e-07, "loss": 0.2134, "step": 377500 }, { "epoch": 74.66, "learning_rate": 6.405745062836626e-07, "loss": 0.2162, "step": 378000 }, { "epoch": 74.76, "learning_rate": 6.380809894274887e-07, "loss": 0.2149, "step": 378500 }, { "epoch": 74.86, "learning_rate": 6.355874725713146e-07, "loss": 0.214, "step": 379000 }, { "epoch": 74.96, "learning_rate": 6.331039297825655e-07, "loss": 0.2218, "step": 379500 }, { "epoch": 75.0, "eval_accuracy": 0.8382793065176624, "eval_f1": 0.4926268861454046, "eval_loss": 0.7711018919944763, "eval_precision": 0.44418676561533704, "eval_recall": 0.5529253271747498, "eval_runtime": 24.2584, "eval_samples_per_second": 125.688, "eval_steps_per_second": 15.747, "step": 379725 }, { "epoch": 75.05, "learning_rate": 6.306104129263914e-07, "loss": 0.2316, "step": 380000 }, { "epoch": 75.15, "learning_rate": 6.281168960702175e-07, "loss": 0.2168, "step": 380500 }, { "epoch": 75.25, "learning_rate": 6.256233792140435e-07, "loss": 0.2174, "step": 381000 }, { "epoch": 75.35, "learning_rate": 6.231298623578695e-07, "loss": 0.2229, "step": 381500 }, { "epoch": 75.45, "learning_rate": 6.20641332535408e-07, "loss": 0.2168, "step": 382000 }, { "epoch": 75.55, "learning_rate": 6.181478156792341e-07, "loss": 0.2184, "step": 382500 }, { "epoch": 75.65, "learning_rate": 6.156542988230601e-07, "loss": 0.2099, "step": 383000 }, { "epoch": 75.75, "learning_rate": 6.131607819668861e-07, "loss": 0.2153, "step": 383500 }, { "epoch": 75.84, "learning_rate": 6.106722521444246e-07, "loss": 0.2214, "step": 384000 }, { "epoch": 75.94, "learning_rate": 6.081787352882506e-07, "loss": 0.2257, "step": 384500 }, { "epoch": 76.0, "eval_accuracy": 0.837069922308546, "eval_f1": 0.4922721239885259, "eval_loss": 0.7748467922210693, "eval_precision": 0.4434246047049749, "eval_recall": 0.5532140107775212, "eval_runtime": 23.4467, "eval_samples_per_second": 130.04, "eval_steps_per_second": 16.292, "step": 384788 }, { "epoch": 76.04, "learning_rate": 6.056852184320766e-07, "loss": 0.2208, "step": 385000 }, { "epoch": 76.14, "learning_rate": 6.031917015759027e-07, "loss": 0.23, "step": 385500 }, { "epoch": 76.24, "learning_rate": 6.006981847197288e-07, "loss": 0.2093, "step": 386000 }, { "epoch": 76.34, "learning_rate": 5.982046678635548e-07, "loss": 0.2182, "step": 386500 }, { "epoch": 76.44, "learning_rate": 5.957111510073808e-07, "loss": 0.2226, "step": 387000 }, { "epoch": 76.54, "learning_rate": 5.932176341512069e-07, "loss": 0.2185, "step": 387500 }, { "epoch": 76.63, "learning_rate": 5.90724117295033e-07, "loss": 0.2246, "step": 388000 }, { "epoch": 76.73, "learning_rate": 5.882355874725714e-07, "loss": 0.2287, "step": 388500 }, { "epoch": 76.83, "learning_rate": 5.857420706163974e-07, "loss": 0.2206, "step": 389000 }, { "epoch": 76.93, "learning_rate": 5.832535407939359e-07, "loss": 0.231, "step": 389500 }, { "epoch": 77.0, "eval_accuracy": 0.8387041218569405, "eval_f1": 0.49053553958058616, "eval_loss": 0.7716959118843079, "eval_precision": 0.4395822215445605, "eval_recall": 0.5548498845265589, "eval_runtime": 24.1315, "eval_samples_per_second": 126.349, "eval_steps_per_second": 15.83, "step": 389851 }, { "epoch": 77.03, "learning_rate": 5.807600239377619e-07, "loss": 0.2233, "step": 390000 }, { "epoch": 77.13, "learning_rate": 5.782665070815879e-07, "loss": 0.2144, "step": 390500 }, { "epoch": 77.23, "learning_rate": 5.757729902254139e-07, "loss": 0.223, "step": 391000 }, { "epoch": 77.33, "learning_rate": 5.7327947336924e-07, "loss": 0.2235, "step": 391500 }, { "epoch": 77.42, "learning_rate": 5.707859565130661e-07, "loss": 0.2127, "step": 392000 }, { "epoch": 77.52, "learning_rate": 5.682924396568922e-07, "loss": 0.2161, "step": 392500 }, { "epoch": 77.62, "learning_rate": 5.658039098344305e-07, "loss": 0.2258, "step": 393000 }, { "epoch": 77.72, "learning_rate": 5.633103929782565e-07, "loss": 0.2197, "step": 393500 }, { "epoch": 77.82, "learning_rate": 5.608168761220827e-07, "loss": 0.2291, "step": 394000 }, { "epoch": 77.92, "learning_rate": 5.583233592659087e-07, "loss": 0.2187, "step": 394500 }, { "epoch": 78.0, "eval_accuracy": 0.8378353553522906, "eval_f1": 0.49114893617021277, "eval_loss": 0.7635823488235474, "eval_precision": 0.44026548672566373, "eval_recall": 0.5553310238645112, "eval_runtime": 24.4109, "eval_samples_per_second": 124.903, "eval_steps_per_second": 15.649, "step": 394914 }, { "epoch": 78.02, "learning_rate": 5.558298424097348e-07, "loss": 0.2156, "step": 395000 }, { "epoch": 78.12, "learning_rate": 5.533363255535608e-07, "loss": 0.2141, "step": 395500 }, { "epoch": 78.21, "learning_rate": 5.508428086973868e-07, "loss": 0.216, "step": 396000 }, { "epoch": 78.31, "learning_rate": 5.483492918412129e-07, "loss": 0.2186, "step": 396500 }, { "epoch": 78.41, "learning_rate": 5.458557749850389e-07, "loss": 0.2144, "step": 397000 }, { "epoch": 78.51, "learning_rate": 5.43362258128865e-07, "loss": 0.221, "step": 397500 }, { "epoch": 78.61, "learning_rate": 5.408737283064034e-07, "loss": 0.226, "step": 398000 }, { "epoch": 78.71, "learning_rate": 5.383802114502295e-07, "loss": 0.2218, "step": 398500 }, { "epoch": 78.81, "learning_rate": 5.358866945940555e-07, "loss": 0.2271, "step": 399000 }, { "epoch": 78.91, "learning_rate": 5.333931777378815e-07, "loss": 0.2165, "step": 399500 }, { "epoch": 79.0, "eval_accuracy": 0.8372153545868575, "eval_f1": 0.4910901243536601, "eval_loss": 0.7801523804664612, "eval_precision": 0.4416942116995926, "eval_recall": 0.5529253271747498, "eval_runtime": 24.1131, "eval_samples_per_second": 126.446, "eval_steps_per_second": 15.842, "step": 399977 }, { "epoch": 79.0, "learning_rate": 5.309046479154199e-07, "loss": 0.2287, "step": 400000 }, { "epoch": 79.1, "learning_rate": 5.284111310592461e-07, "loss": 0.2196, "step": 400500 }, { "epoch": 79.2, "learning_rate": 5.259176142030721e-07, "loss": 0.2266, "step": 401000 }, { "epoch": 79.3, "learning_rate": 5.234290843806104e-07, "loss": 0.2199, "step": 401500 }, { "epoch": 79.4, "learning_rate": 5.209355675244365e-07, "loss": 0.2189, "step": 402000 }, { "epoch": 79.5, "learning_rate": 5.184420506682626e-07, "loss": 0.2273, "step": 402500 }, { "epoch": 79.6, "learning_rate": 5.159535208458009e-07, "loss": 0.2125, "step": 403000 }, { "epoch": 79.7, "learning_rate": 5.13460003989627e-07, "loss": 0.2241, "step": 403500 }, { "epoch": 79.79, "learning_rate": 5.10966487133453e-07, "loss": 0.2177, "step": 404000 }, { "epoch": 79.89, "learning_rate": 5.084729702772792e-07, "loss": 0.2159, "step": 404500 }, { "epoch": 79.99, "learning_rate": 5.059794534211052e-07, "loss": 0.2173, "step": 405000 }, { "epoch": 80.0, "eval_accuracy": 0.8373301695434192, "eval_f1": 0.49182712879760376, "eval_loss": 0.7780812978744507, "eval_precision": 0.44282632146709816, "eval_recall": 0.5530215550423403, "eval_runtime": 23.2524, "eval_samples_per_second": 131.126, "eval_steps_per_second": 16.428, "step": 405040 }, { "epoch": 80.09, "learning_rate": 5.034859365649312e-07, "loss": 0.2145, "step": 405500 }, { "epoch": 80.19, "learning_rate": 5.009924197087572e-07, "loss": 0.2144, "step": 406000 }, { "epoch": 80.29, "learning_rate": 4.984989028525834e-07, "loss": 0.2121, "step": 406500 }, { "epoch": 80.39, "learning_rate": 4.960053859964094e-07, "loss": 0.2188, "step": 407000 }, { "epoch": 80.49, "learning_rate": 4.935118691402354e-07, "loss": 0.2163, "step": 407500 }, { "epoch": 80.58, "learning_rate": 4.910183522840615e-07, "loss": 0.2056, "step": 408000 }, { "epoch": 80.68, "learning_rate": 4.885248354278875e-07, "loss": 0.2233, "step": 408500 }, { "epoch": 80.78, "learning_rate": 4.860363056054259e-07, "loss": 0.2178, "step": 409000 }, { "epoch": 80.88, "learning_rate": 4.83542788749252e-07, "loss": 0.2088, "step": 409500 }, { "epoch": 80.98, "learning_rate": 4.81049271893078e-07, "loss": 0.2251, "step": 410000 }, { "epoch": 81.0, "eval_accuracy": 0.8379425159784147, "eval_f1": 0.49120115897567007, "eval_loss": 0.7737105488777161, "eval_precision": 0.4407738778007188, "eval_recall": 0.554657428791378, "eval_runtime": 23.4259, "eval_samples_per_second": 130.155, "eval_steps_per_second": 16.307, "step": 410103 }, { "epoch": 81.08, "learning_rate": 4.785557550369041e-07, "loss": 0.2209, "step": 410500 }, { "epoch": 81.18, "learning_rate": 4.760622381807301e-07, "loss": 0.2155, "step": 411000 }, { "epoch": 81.28, "learning_rate": 4.7356872132455623e-07, "loss": 0.2093, "step": 411500 }, { "epoch": 81.37, "learning_rate": 4.710801915020946e-07, "loss": 0.2196, "step": 412000 }, { "epoch": 81.47, "learning_rate": 4.6858667464592064e-07, "loss": 0.2227, "step": 412500 }, { "epoch": 81.57, "learning_rate": 4.660931577897467e-07, "loss": 0.2134, "step": 413000 }, { "epoch": 81.67, "learning_rate": 4.6359964093357273e-07, "loss": 0.2152, "step": 413500 }, { "epoch": 81.77, "learning_rate": 4.611061240773988e-07, "loss": 0.2141, "step": 414000 }, { "epoch": 81.87, "learning_rate": 4.5861759425493724e-07, "loss": 0.2238, "step": 414500 }, { "epoch": 81.97, "learning_rate": 4.5612407739876326e-07, "loss": 0.2195, "step": 415000 }, { "epoch": 82.0, "eval_accuracy": 0.8383596769872556, "eval_f1": 0.4898150515639649, "eval_loss": 0.7843115925788879, "eval_precision": 0.43957472846871654, "eval_recall": 0.5530215550423403, "eval_runtime": 24.3105, "eval_samples_per_second": 125.419, "eval_steps_per_second": 15.713, "step": 415166 }, { "epoch": 82.07, "learning_rate": 4.5363056054258933e-07, "loss": 0.2136, "step": 415500 }, { "epoch": 82.16, "learning_rate": 4.5113704368641535e-07, "loss": 0.2171, "step": 416000 }, { "epoch": 82.26, "learning_rate": 4.486435268302414e-07, "loss": 0.2173, "step": 416500 }, { "epoch": 82.36, "learning_rate": 4.4615000997406744e-07, "loss": 0.2093, "step": 417000 }, { "epoch": 82.46, "learning_rate": 4.4366148015160583e-07, "loss": 0.2203, "step": 417500 }, { "epoch": 82.56, "learning_rate": 4.4116796329543196e-07, "loss": 0.2237, "step": 418000 }, { "epoch": 82.66, "learning_rate": 4.38674446439258e-07, "loss": 0.2147, "step": 418500 }, { "epoch": 82.76, "learning_rate": 4.36180929583084e-07, "loss": 0.2145, "step": 419000 }, { "epoch": 82.86, "learning_rate": 4.3368741272691007e-07, "loss": 0.2124, "step": 419500 }, { "epoch": 82.95, "learning_rate": 4.3119888290444846e-07, "loss": 0.2208, "step": 420000 }, { "epoch": 83.0, "eval_accuracy": 0.8378506640131654, "eval_f1": 0.4901736066895878, "eval_loss": 0.7780388593673706, "eval_precision": 0.4385205437837017, "eval_recall": 0.5556197074672825, "eval_runtime": 24.1182, "eval_samples_per_second": 126.419, "eval_steps_per_second": 15.839, "step": 420229 }, { "epoch": 83.05, "learning_rate": 4.287053660482745e-07, "loss": 0.2155, "step": 420500 }, { "epoch": 83.15, "learning_rate": 4.262118491921006e-07, "loss": 0.2045, "step": 421000 }, { "epoch": 83.25, "learning_rate": 4.237183323359266e-07, "loss": 0.2212, "step": 421500 }, { "epoch": 83.35, "learning_rate": 4.21229802513465e-07, "loss": 0.2137, "step": 422000 }, { "epoch": 83.45, "learning_rate": 4.187362856572911e-07, "loss": 0.2226, "step": 422500 }, { "epoch": 83.55, "learning_rate": 4.162427688011171e-07, "loss": 0.2136, "step": 423000 }, { "epoch": 83.65, "learning_rate": 4.1374925194494317e-07, "loss": 0.2184, "step": 423500 }, { "epoch": 83.74, "learning_rate": 4.1125573508876924e-07, "loss": 0.2121, "step": 424000 }, { "epoch": 83.84, "learning_rate": 4.0876720526630763e-07, "loss": 0.2127, "step": 424500 }, { "epoch": 83.94, "learning_rate": 4.062736884101337e-07, "loss": 0.2144, "step": 425000 }, { "epoch": 84.0, "eval_accuracy": 0.8380764667610701, "eval_f1": 0.49133415662394075, "eval_loss": 0.7888113856315613, "eval_precision": 0.44068443969139104, "eval_recall": 0.5551385681293303, "eval_runtime": 24.19, "eval_samples_per_second": 126.044, "eval_steps_per_second": 15.792, "step": 425292 }, { "epoch": 84.04, "learning_rate": 4.037801715539597e-07, "loss": 0.2187, "step": 425500 }, { "epoch": 84.14, "learning_rate": 4.012866546977858e-07, "loss": 0.2165, "step": 426000 }, { "epoch": 84.24, "learning_rate": 3.987931378416118e-07, "loss": 0.2147, "step": 426500 }, { "epoch": 84.34, "learning_rate": 3.9629962098543794e-07, "loss": 0.2092, "step": 427000 }, { "epoch": 84.44, "learning_rate": 3.9381109116297633e-07, "loss": 0.2179, "step": 427500 }, { "epoch": 84.53, "learning_rate": 3.9131757430680235e-07, "loss": 0.2123, "step": 428000 }, { "epoch": 84.63, "learning_rate": 3.888240574506284e-07, "loss": 0.2127, "step": 428500 }, { "epoch": 84.73, "learning_rate": 3.8633054059445444e-07, "loss": 0.2192, "step": 429000 }, { "epoch": 84.83, "learning_rate": 3.8383702373828046e-07, "loss": 0.2166, "step": 429500 }, { "epoch": 84.93, "learning_rate": 3.813435068821066e-07, "loss": 0.2123, "step": 430000 }, { "epoch": 85.0, "eval_accuracy": 0.8388112824830648, "eval_f1": 0.4910889400528694, "eval_loss": 0.7885710000991821, "eval_precision": 0.4408972592252335, "eval_recall": 0.5541762894534257, "eval_runtime": 24.1837, "eval_samples_per_second": 126.077, "eval_steps_per_second": 15.796, "step": 430355 }, { "epoch": 85.03, "learning_rate": 3.788499900259326e-07, "loss": 0.2197, "step": 430500 }, { "epoch": 85.13, "learning_rate": 3.76361460203471e-07, "loss": 0.2193, "step": 431000 }, { "epoch": 85.23, "learning_rate": 3.7386794334729706e-07, "loss": 0.213, "step": 431500 }, { "epoch": 85.32, "learning_rate": 3.713744264911231e-07, "loss": 0.216, "step": 432000 }, { "epoch": 85.42, "learning_rate": 3.6888090963494915e-07, "loss": 0.2137, "step": 432500 }, { "epoch": 85.52, "learning_rate": 3.6639237981248754e-07, "loss": 0.2204, "step": 433000 }, { "epoch": 85.62, "learning_rate": 3.638988629563136e-07, "loss": 0.2121, "step": 433500 }, { "epoch": 85.72, "learning_rate": 3.61410333133852e-07, "loss": 0.2184, "step": 434000 }, { "epoch": 85.82, "learning_rate": 3.589168162776781e-07, "loss": 0.2083, "step": 434500 }, { "epoch": 85.92, "learning_rate": 3.564232994215041e-07, "loss": 0.2195, "step": 435000 }, { "epoch": 86.0, "eval_accuracy": 0.8386696773699721, "eval_f1": 0.49464025383757826, "eval_loss": 0.7830442786216736, "eval_precision": 0.4460943542150039, "eval_recall": 0.5550423402617398, "eval_runtime": 23.4037, "eval_samples_per_second": 130.278, "eval_steps_per_second": 16.322, "step": 435418 }, { "epoch": 86.02, "learning_rate": 3.5392978256533017e-07, "loss": 0.2097, "step": 435500 }, { "epoch": 86.11, "learning_rate": 3.514362657091562e-07, "loss": 0.2071, "step": 436000 }, { "epoch": 86.21, "learning_rate": 3.489477358866946e-07, "loss": 0.2152, "step": 436500 }, { "epoch": 86.31, "learning_rate": 3.464542190305207e-07, "loss": 0.2152, "step": 437000 }, { "epoch": 86.41, "learning_rate": 3.439607021743467e-07, "loss": 0.218, "step": 437500 }, { "epoch": 86.51, "learning_rate": 3.414671853181728e-07, "loss": 0.2185, "step": 438000 }, { "epoch": 86.61, "learning_rate": 3.389736684619988e-07, "loss": 0.2211, "step": 438500 }, { "epoch": 86.71, "learning_rate": 3.3648015160582493e-07, "loss": 0.2207, "step": 439000 }, { "epoch": 86.81, "learning_rate": 3.3398663474965095e-07, "loss": 0.2272, "step": 439500 }, { "epoch": 86.9, "learning_rate": 3.31493117893477e-07, "loss": 0.2105, "step": 440000 }, { "epoch": 87.0, "eval_accuracy": 0.8389414061005014, "eval_f1": 0.492742453436095, "eval_loss": 0.7846280932426453, "eval_precision": 0.44387873177505205, "eval_recall": 0.5536951501154734, "eval_runtime": 23.9106, "eval_samples_per_second": 127.517, "eval_steps_per_second": 15.976, "step": 440481 }, { "epoch": 87.0, "learning_rate": 3.2899960103730304e-07, "loss": 0.2167, "step": 440500 }, { "epoch": 87.1, "learning_rate": 3.2651107121484143e-07, "loss": 0.2137, "step": 441000 }, { "epoch": 87.2, "learning_rate": 3.240175543586675e-07, "loss": 0.2162, "step": 441500 }, { "epoch": 87.3, "learning_rate": 3.215240375024935e-07, "loss": 0.2167, "step": 442000 }, { "epoch": 87.4, "learning_rate": 3.190355076800319e-07, "loss": 0.2144, "step": 442500 }, { "epoch": 87.5, "learning_rate": 3.16541990823858e-07, "loss": 0.2148, "step": 443000 }, { "epoch": 87.6, "learning_rate": 3.1404847396768406e-07, "loss": 0.218, "step": 443500 }, { "epoch": 87.7, "learning_rate": 3.1155495711151013e-07, "loss": 0.223, "step": 444000 }, { "epoch": 87.79, "learning_rate": 3.0906144025533615e-07, "loss": 0.2204, "step": 444500 }, { "epoch": 87.89, "learning_rate": 3.065679233991622e-07, "loss": 0.2136, "step": 445000 }, { "epoch": 87.99, "learning_rate": 3.040744065429883e-07, "loss": 0.2203, "step": 445500 }, { "epoch": 88.0, "eval_accuracy": 0.8378238738566344, "eval_f1": 0.4915982473305824, "eval_loss": 0.7851070761680603, "eval_precision": 0.4405642394205109, "eval_recall": 0.5560046189376443, "eval_runtime": 23.4737, "eval_samples_per_second": 129.89, "eval_steps_per_second": 16.274, "step": 445544 }, { "epoch": 88.09, "learning_rate": 3.015808896868143e-07, "loss": 0.2159, "step": 446000 }, { "epoch": 88.19, "learning_rate": 2.990923598643527e-07, "loss": 0.2045, "step": 446500 }, { "epoch": 88.29, "learning_rate": 2.9659884300817877e-07, "loss": 0.2064, "step": 447000 }, { "epoch": 88.39, "learning_rate": 2.9410532615200484e-07, "loss": 0.2214, "step": 447500 }, { "epoch": 88.49, "learning_rate": 2.9161180929583086e-07, "loss": 0.2136, "step": 448000 }, { "epoch": 88.58, "learning_rate": 2.891182924396569e-07, "loss": 0.2092, "step": 448500 }, { "epoch": 88.68, "learning_rate": 2.8662477558348295e-07, "loss": 0.2104, "step": 449000 }, { "epoch": 88.78, "learning_rate": 2.84131258727309e-07, "loss": 0.2084, "step": 449500 }, { "epoch": 88.88, "learning_rate": 2.816427289048474e-07, "loss": 0.2156, "step": 450000 }, { "epoch": 88.98, "learning_rate": 2.791492120486735e-07, "loss": 0.2079, "step": 450500 }, { "epoch": 89.0, "eval_accuracy": 0.8380956025871636, "eval_f1": 0.49355966902669957, "eval_loss": 0.7890189290046692, "eval_precision": 0.443235789796231, "eval_recall": 0.556774441878368, "eval_runtime": 23.2357, "eval_samples_per_second": 131.22, "eval_steps_per_second": 16.44, "step": 450607 }, { "epoch": 89.08, "learning_rate": 2.766556951924995e-07, "loss": 0.2051, "step": 451000 }, { "epoch": 89.18, "learning_rate": 2.741621783363256e-07, "loss": 0.2181, "step": 451500 }, { "epoch": 89.28, "learning_rate": 2.7166866148015165e-07, "loss": 0.2126, "step": 452000 }, { "epoch": 89.37, "learning_rate": 2.6918013165769004e-07, "loss": 0.2175, "step": 452500 }, { "epoch": 89.47, "learning_rate": 2.6668661480151606e-07, "loss": 0.2105, "step": 453000 }, { "epoch": 89.57, "learning_rate": 2.6419309794534213e-07, "loss": 0.207, "step": 453500 }, { "epoch": 89.67, "learning_rate": 2.616995810891682e-07, "loss": 0.2101, "step": 454000 }, { "epoch": 89.77, "learning_rate": 2.592110512667066e-07, "loss": 0.2084, "step": 454500 }, { "epoch": 89.87, "learning_rate": 2.5671753441053266e-07, "loss": 0.208, "step": 455000 }, { "epoch": 89.97, "learning_rate": 2.542240175543587e-07, "loss": 0.2109, "step": 455500 }, { "epoch": 90.0, "eval_accuracy": 0.8384974549351295, "eval_f1": 0.49219216656711323, "eval_loss": 0.7881253957748413, "eval_precision": 0.44212785528131227, "eval_recall": 0.5550423402617398, "eval_runtime": 23.2564, "eval_samples_per_second": 131.104, "eval_steps_per_second": 16.426, "step": 455670 } ], "max_steps": 506300, "num_train_epochs": 100, "total_flos": 6.575301079423942e+17, "trial_name": null, "trial_params": null }