| { |
| "best_metric": 0.8990160346845362, |
| "best_model_checkpoint": "./save_models/qqp/roberta-base_lr1e-05_run0/checkpoint-204670", |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 204670, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.0713296962788053e-07, |
| "loss": 0.6914, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 8.142659392557611e-07, |
| "loss": 0.6479, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.2213989088836414e-06, |
| "loss": 0.5224, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.6285318785115221e-06, |
| "loss": 0.4532, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 2.0356648481394024e-06, |
| "loss": 0.4179, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 2.442797817767283e-06, |
| "loss": 0.4075, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2.8499307873951637e-06, |
| "loss": 0.3777, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 3.2570637570230442e-06, |
| "loss": 0.3735, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 3.6641967266509243e-06, |
| "loss": 0.3609, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.071329696278805e-06, |
| "loss": 0.3624, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.478462665906685e-06, |
| "loss": 0.3574, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.885595635534566e-06, |
| "loss": 0.3509, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 5.292728605162446e-06, |
| "loss": 0.3387, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 5.6998615747903275e-06, |
| "loss": 0.335, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 6.106994544418208e-06, |
| "loss": 0.3242, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 6.5141275140460884e-06, |
| "loss": 0.3317, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 6.921260483673968e-06, |
| "loss": 0.3182, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 7.3283934533018485e-06, |
| "loss": 0.3197, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 7.73552642292973e-06, |
| "loss": 0.3213, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 8.14265939255761e-06, |
| "loss": 0.3204, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 8.54979236218549e-06, |
| "loss": 0.3271, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 8.95692533181337e-06, |
| "loss": 0.3107, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 9.364058301441251e-06, |
| "loss": 0.31, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 9.771191271069131e-06, |
| "loss": 0.3089, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 9.988616812811544e-06, |
| "loss": 0.2948, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 9.962627800965753e-06, |
| "loss": 0.3019, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 9.936638789119961e-06, |
| "loss": 0.3093, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 9.91064977727417e-06, |
| "loss": 0.3032, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 9.884660765428378e-06, |
| "loss": 0.3014, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 9.858671753582586e-06, |
| "loss": 0.3039, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 9.832682741736795e-06, |
| "loss": 0.2994, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 9.806693729891003e-06, |
| "loss": 0.2954, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.78070471804521e-06, |
| "loss": 0.2907, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 9.75471570619942e-06, |
| "loss": 0.2887, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.728726694353629e-06, |
| "loss": 0.299, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.702737682507836e-06, |
| "loss": 0.2789, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 9.676748670662046e-06, |
| "loss": 0.2883, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 9.650759658816253e-06, |
| "loss": 0.2873, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.624770646970461e-06, |
| "loss": 0.2948, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.59878163512467e-06, |
| "loss": 0.2746, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8817094956712931, |
| "eval_averaged_scores": 0.8643799788235642, |
| "eval_f1": 0.8470504619758352, |
| "eval_loss": 0.2860792577266693, |
| "eval_runtime": 36.8913, |
| "eval_samples_per_second": 986.277, |
| "eval_steps_per_second": 61.668, |
| "step": 20467 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 9.572792623278878e-06, |
| "loss": 0.2906, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 9.546803611433086e-06, |
| "loss": 0.2525, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 9.520814599587295e-06, |
| "loss": 0.2551, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 9.494825587741504e-06, |
| "loss": 0.259, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 9.468836575895712e-06, |
| "loss": 0.2459, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 9.442847564049921e-06, |
| "loss": 0.256, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 9.416858552204129e-06, |
| "loss": 0.2573, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 9.390869540358337e-06, |
| "loss": 0.2465, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 9.364880528512546e-06, |
| "loss": 0.2519, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 9.338891516666754e-06, |
| "loss": 0.2457, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 9.312902504820961e-06, |
| "loss": 0.2502, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 9.28691349297517e-06, |
| "loss": 0.253, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 9.26092448112938e-06, |
| "loss": 0.2453, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 9.234935469283588e-06, |
| "loss": 0.2576, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 9.208946457437797e-06, |
| "loss": 0.2505, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 9.182957445592005e-06, |
| "loss": 0.252, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 9.156968433746212e-06, |
| "loss": 0.2536, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 9.130979421900422e-06, |
| "loss": 0.2369, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 9.10499041005463e-06, |
| "loss": 0.2459, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 9.079001398208837e-06, |
| "loss": 0.2446, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 9.053012386363046e-06, |
| "loss": 0.2434, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 9.027023374517256e-06, |
| "loss": 0.2493, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 9.001034362671463e-06, |
| "loss": 0.2506, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 8.975045350825672e-06, |
| "loss": 0.2421, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 8.94905633897988e-06, |
| "loss": 0.2467, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 8.923067327134088e-06, |
| "loss": 0.2439, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 8.897078315288297e-06, |
| "loss": 0.2423, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 8.871089303442505e-06, |
| "loss": 0.25, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 8.845100291596712e-06, |
| "loss": 0.2367, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.819111279750922e-06, |
| "loss": 0.2387, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 8.793122267905131e-06, |
| "loss": 0.2455, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 8.767133256059339e-06, |
| "loss": 0.2439, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 8.741144244213548e-06, |
| "loss": 0.2332, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 8.715155232367756e-06, |
| "loss": 0.2446, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 8.689166220521963e-06, |
| "loss": 0.2389, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 8.663177208676173e-06, |
| "loss": 0.2346, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 8.63718819683038e-06, |
| "loss": 0.2305, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 8.61119918498459e-06, |
| "loss": 0.2331, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 8.585210173138797e-06, |
| "loss": 0.2393, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 8.559221161293007e-06, |
| "loss": 0.2371, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 8.533232149447214e-06, |
| "loss": 0.2291, |
| "step": 40500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.8982272914662636, |
| "eval_averaged_scores": 0.8808907930119556, |
| "eval_f1": 0.8635542945576477, |
| "eval_loss": 0.27606385946273804, |
| "eval_runtime": 36.4573, |
| "eval_samples_per_second": 998.018, |
| "eval_steps_per_second": 62.402, |
| "step": 40934 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 8.507243137601424e-06, |
| "loss": 0.2293, |
| "step": 41000 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 8.481254125755631e-06, |
| "loss": 0.1943, |
| "step": 41500 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 8.455265113909839e-06, |
| "loss": 0.2058, |
| "step": 42000 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 8.429276102064048e-06, |
| "loss": 0.2089, |
| "step": 42500 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 8.403287090218256e-06, |
| "loss": 0.2043, |
| "step": 43000 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 8.377298078372465e-06, |
| "loss": 0.2005, |
| "step": 43500 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 8.351309066526673e-06, |
| "loss": 0.2061, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 8.325320054680882e-06, |
| "loss": 0.2058, |
| "step": 44500 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 8.29933104283509e-06, |
| "loss": 0.2114, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 8.2733420309893e-06, |
| "loss": 0.2008, |
| "step": 45500 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 8.247353019143507e-06, |
| "loss": 0.2198, |
| "step": 46000 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 8.221364007297714e-06, |
| "loss": 0.2153, |
| "step": 46500 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 8.195374995451924e-06, |
| "loss": 0.2116, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 8.169385983606131e-06, |
| "loss": 0.1998, |
| "step": 47500 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 8.14339697176034e-06, |
| "loss": 0.2122, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 8.117407959914548e-06, |
| "loss": 0.2036, |
| "step": 48500 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 8.091418948068758e-06, |
| "loss": 0.2065, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 8.065429936222965e-06, |
| "loss": 0.2038, |
| "step": 49500 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 8.039440924377175e-06, |
| "loss": 0.207, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 8.013451912531382e-06, |
| "loss": 0.2046, |
| "step": 50500 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 7.98746290068559e-06, |
| "loss": 0.2116, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 7.9614738888398e-06, |
| "loss": 0.2072, |
| "step": 51500 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 7.935484876994007e-06, |
| "loss": 0.2117, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 7.909495865148216e-06, |
| "loss": 0.2197, |
| "step": 52500 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 7.883506853302424e-06, |
| "loss": 0.2037, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 7.857517841456633e-06, |
| "loss": 0.207, |
| "step": 53500 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 7.831528829610841e-06, |
| "loss": 0.219, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 7.80553981776505e-06, |
| "loss": 0.2082, |
| "step": 54500 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 7.779550805919258e-06, |
| "loss": 0.201, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 7.753561794073467e-06, |
| "loss": 0.2127, |
| "step": 55500 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 7.727572782227675e-06, |
| "loss": 0.2165, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 7.701583770381883e-06, |
| "loss": 0.203, |
| "step": 56500 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 7.675594758536092e-06, |
| "loss": 0.2147, |
| "step": 57000 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 7.6496057466903e-06, |
| "loss": 0.2006, |
| "step": 57500 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 7.623616734844508e-06, |
| "loss": 0.2063, |
| "step": 58000 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 7.597627722998717e-06, |
| "loss": 0.2046, |
| "step": 58500 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 7.571638711152926e-06, |
| "loss": 0.1942, |
| "step": 59000 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 7.5456496993071335e-06, |
| "loss": 0.1976, |
| "step": 59500 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 7.519660687461342e-06, |
| "loss": 0.2047, |
| "step": 60000 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 7.4936716756155505e-06, |
| "loss": 0.2047, |
| "step": 60500 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 7.467682663769759e-06, |
| "loss": 0.2035, |
| "step": 61000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9074618661536348, |
| "eval_averaged_scores": 0.8925016186560136, |
| "eval_f1": 0.8775413711583925, |
| "eval_loss": 0.2975204288959503, |
| "eval_runtime": 36.3008, |
| "eval_samples_per_second": 1002.319, |
| "eval_steps_per_second": 62.671, |
| "step": 61401 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 7.441693651923967e-06, |
| "loss": 0.1955, |
| "step": 61500 |
| }, |
| { |
| "epoch": 3.03, |
| "learning_rate": 7.415704640078175e-06, |
| "loss": 0.1745, |
| "step": 62000 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 7.389715628232384e-06, |
| "loss": 0.1855, |
| "step": 62500 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 7.363726616386593e-06, |
| "loss": 0.1743, |
| "step": 63000 |
| }, |
| { |
| "epoch": 3.1, |
| "learning_rate": 7.337737604540801e-06, |
| "loss": 0.1829, |
| "step": 63500 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 7.311748592695009e-06, |
| "loss": 0.1898, |
| "step": 64000 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 7.2857595808492175e-06, |
| "loss": 0.1878, |
| "step": 64500 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 7.259770569003426e-06, |
| "loss": 0.1886, |
| "step": 65000 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 7.2337815571576345e-06, |
| "loss": 0.1941, |
| "step": 65500 |
| }, |
| { |
| "epoch": 3.22, |
| "learning_rate": 7.207792545311842e-06, |
| "loss": 0.1901, |
| "step": 66000 |
| }, |
| { |
| "epoch": 3.25, |
| "learning_rate": 7.181803533466051e-06, |
| "loss": 0.1813, |
| "step": 66500 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 7.155814521620259e-06, |
| "loss": 0.1947, |
| "step": 67000 |
| }, |
| { |
| "epoch": 3.3, |
| "learning_rate": 7.1298255097744685e-06, |
| "loss": 0.1787, |
| "step": 67500 |
| }, |
| { |
| "epoch": 3.32, |
| "learning_rate": 7.103836497928677e-06, |
| "loss": 0.1927, |
| "step": 68000 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 7.077847486082885e-06, |
| "loss": 0.1961, |
| "step": 68500 |
| }, |
| { |
| "epoch": 3.37, |
| "learning_rate": 7.051858474237093e-06, |
| "loss": 0.1785, |
| "step": 69000 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 7.025869462391302e-06, |
| "loss": 0.1892, |
| "step": 69500 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 6.99988045054551e-06, |
| "loss": 0.1853, |
| "step": 70000 |
| }, |
| { |
| "epoch": 3.44, |
| "learning_rate": 6.973891438699718e-06, |
| "loss": 0.1892, |
| "step": 70500 |
| }, |
| { |
| "epoch": 3.47, |
| "learning_rate": 6.947902426853926e-06, |
| "loss": 0.1908, |
| "step": 71000 |
| }, |
| { |
| "epoch": 3.49, |
| "learning_rate": 6.921913415008135e-06, |
| "loss": 0.1859, |
| "step": 71500 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 6.895924403162344e-06, |
| "loss": 0.1855, |
| "step": 72000 |
| }, |
| { |
| "epoch": 3.54, |
| "learning_rate": 6.8699353913165525e-06, |
| "loss": 0.1763, |
| "step": 72500 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 6.84394637947076e-06, |
| "loss": 0.1886, |
| "step": 73000 |
| }, |
| { |
| "epoch": 3.59, |
| "learning_rate": 6.817957367624969e-06, |
| "loss": 0.1935, |
| "step": 73500 |
| }, |
| { |
| "epoch": 3.62, |
| "learning_rate": 6.791968355779177e-06, |
| "loss": 0.1889, |
| "step": 74000 |
| }, |
| { |
| "epoch": 3.64, |
| "learning_rate": 6.765979343933386e-06, |
| "loss": 0.1837, |
| "step": 74500 |
| }, |
| { |
| "epoch": 3.66, |
| "learning_rate": 6.739990332087593e-06, |
| "loss": 0.1782, |
| "step": 75000 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 6.714001320241802e-06, |
| "loss": 0.1738, |
| "step": 75500 |
| }, |
| { |
| "epoch": 3.71, |
| "learning_rate": 6.68801230839601e-06, |
| "loss": 0.1833, |
| "step": 76000 |
| }, |
| { |
| "epoch": 3.74, |
| "learning_rate": 6.66202329655022e-06, |
| "loss": 0.1826, |
| "step": 76500 |
| }, |
| { |
| "epoch": 3.76, |
| "learning_rate": 6.636034284704428e-06, |
| "loss": 0.1856, |
| "step": 77000 |
| }, |
| { |
| "epoch": 3.79, |
| "learning_rate": 6.610045272858637e-06, |
| "loss": 0.1843, |
| "step": 77500 |
| }, |
| { |
| "epoch": 3.81, |
| "learning_rate": 6.584056261012844e-06, |
| "loss": 0.1948, |
| "step": 78000 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 6.558067249167053e-06, |
| "loss": 0.1764, |
| "step": 78500 |
| }, |
| { |
| "epoch": 3.86, |
| "learning_rate": 6.532078237321261e-06, |
| "loss": 0.179, |
| "step": 79000 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 6.50608922547547e-06, |
| "loss": 0.1906, |
| "step": 79500 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 6.480100213629677e-06, |
| "loss": 0.1853, |
| "step": 80000 |
| }, |
| { |
| "epoch": 3.93, |
| "learning_rate": 6.454111201783886e-06, |
| "loss": 0.1918, |
| "step": 80500 |
| }, |
| { |
| "epoch": 3.96, |
| "learning_rate": 6.428122189938095e-06, |
| "loss": 0.1769, |
| "step": 81000 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 6.402133178092304e-06, |
| "loss": 0.1969, |
| "step": 81500 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.9094407035866429, |
| "eval_averaged_scores": 0.8949438560203089, |
| "eval_f1": 0.8804470084539749, |
| "eval_loss": 0.3116997480392456, |
| "eval_runtime": 36.2351, |
| "eval_samples_per_second": 1004.137, |
| "eval_steps_per_second": 62.784, |
| "step": 81868 |
| }, |
| { |
| "epoch": 4.01, |
| "learning_rate": 6.376144166246512e-06, |
| "loss": 0.1737, |
| "step": 82000 |
| }, |
| { |
| "epoch": 4.03, |
| "learning_rate": 6.35015515440072e-06, |
| "loss": 0.1467, |
| "step": 82500 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 6.324166142554928e-06, |
| "loss": 0.1557, |
| "step": 83000 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 6.298177130709137e-06, |
| "loss": 0.1533, |
| "step": 83500 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 6.272188118863345e-06, |
| "loss": 0.1573, |
| "step": 84000 |
| }, |
| { |
| "epoch": 4.13, |
| "learning_rate": 6.246199107017553e-06, |
| "loss": 0.1602, |
| "step": 84500 |
| }, |
| { |
| "epoch": 4.15, |
| "learning_rate": 6.220210095171761e-06, |
| "loss": 0.1598, |
| "step": 85000 |
| }, |
| { |
| "epoch": 4.18, |
| "learning_rate": 6.194221083325971e-06, |
| "loss": 0.162, |
| "step": 85500 |
| }, |
| { |
| "epoch": 4.2, |
| "learning_rate": 6.168232071480179e-06, |
| "loss": 0.1668, |
| "step": 86000 |
| }, |
| { |
| "epoch": 4.23, |
| "learning_rate": 6.142243059634388e-06, |
| "loss": 0.1673, |
| "step": 86500 |
| }, |
| { |
| "epoch": 4.25, |
| "learning_rate": 6.116254047788595e-06, |
| "loss": 0.159, |
| "step": 87000 |
| }, |
| { |
| "epoch": 4.28, |
| "learning_rate": 6.090265035942804e-06, |
| "loss": 0.1615, |
| "step": 87500 |
| }, |
| { |
| "epoch": 4.3, |
| "learning_rate": 6.064276024097012e-06, |
| "loss": 0.1742, |
| "step": 88000 |
| }, |
| { |
| "epoch": 4.32, |
| "learning_rate": 6.038287012251221e-06, |
| "loss": 0.1663, |
| "step": 88500 |
| }, |
| { |
| "epoch": 4.35, |
| "learning_rate": 6.0122980004054285e-06, |
| "loss": 0.1699, |
| "step": 89000 |
| }, |
| { |
| "epoch": 4.37, |
| "learning_rate": 5.986308988559637e-06, |
| "loss": 0.1625, |
| "step": 89500 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 5.9603199767138455e-06, |
| "loss": 0.158, |
| "step": 90000 |
| }, |
| { |
| "epoch": 4.42, |
| "learning_rate": 5.934330964868055e-06, |
| "loss": 0.1636, |
| "step": 90500 |
| }, |
| { |
| "epoch": 4.45, |
| "learning_rate": 5.908341953022263e-06, |
| "loss": 0.1568, |
| "step": 91000 |
| }, |
| { |
| "epoch": 4.47, |
| "learning_rate": 5.882352941176471e-06, |
| "loss": 0.165, |
| "step": 91500 |
| }, |
| { |
| "epoch": 4.5, |
| "learning_rate": 5.856363929330679e-06, |
| "loss": 0.1687, |
| "step": 92000 |
| }, |
| { |
| "epoch": 4.52, |
| "learning_rate": 5.830374917484888e-06, |
| "loss": 0.1595, |
| "step": 92500 |
| }, |
| { |
| "epoch": 4.54, |
| "learning_rate": 5.804385905639096e-06, |
| "loss": 0.1769, |
| "step": 93000 |
| }, |
| { |
| "epoch": 4.57, |
| "learning_rate": 5.778396893793304e-06, |
| "loss": 0.1567, |
| "step": 93500 |
| }, |
| { |
| "epoch": 4.59, |
| "learning_rate": 5.7524078819475125e-06, |
| "loss": 0.1724, |
| "step": 94000 |
| }, |
| { |
| "epoch": 4.62, |
| "learning_rate": 5.726418870101721e-06, |
| "loss": 0.1745, |
| "step": 94500 |
| }, |
| { |
| "epoch": 4.64, |
| "learning_rate": 5.70042985825593e-06, |
| "loss": 0.167, |
| "step": 95000 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 5.674440846410139e-06, |
| "loss": 0.1632, |
| "step": 95500 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 5.6484518345643465e-06, |
| "loss": 0.1737, |
| "step": 96000 |
| }, |
| { |
| "epoch": 4.71, |
| "learning_rate": 5.622462822718555e-06, |
| "loss": 0.1625, |
| "step": 96500 |
| }, |
| { |
| "epoch": 4.74, |
| "learning_rate": 5.5964738108727635e-06, |
| "loss": 0.1582, |
| "step": 97000 |
| }, |
| { |
| "epoch": 4.76, |
| "learning_rate": 5.570484799026972e-06, |
| "loss": 0.1691, |
| "step": 97500 |
| }, |
| { |
| "epoch": 4.79, |
| "learning_rate": 5.54449578718118e-06, |
| "loss": 0.1652, |
| "step": 98000 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 5.518506775335388e-06, |
| "loss": 0.1813, |
| "step": 98500 |
| }, |
| { |
| "epoch": 4.84, |
| "learning_rate": 5.4925177634895966e-06, |
| "loss": 0.1738, |
| "step": 99000 |
| }, |
| { |
| "epoch": 4.86, |
| "learning_rate": 5.466528751643806e-06, |
| "loss": 0.1717, |
| "step": 99500 |
| }, |
| { |
| "epoch": 4.89, |
| "learning_rate": 5.440539739798014e-06, |
| "loss": 0.1623, |
| "step": 100000 |
| }, |
| { |
| "epoch": 4.91, |
| "learning_rate": 5.414550727952223e-06, |
| "loss": 0.1771, |
| "step": 100500 |
| }, |
| { |
| "epoch": 4.93, |
| "learning_rate": 5.3885617161064305e-06, |
| "loss": 0.157, |
| "step": 101000 |
| }, |
| { |
| "epoch": 4.96, |
| "learning_rate": 5.362572704260639e-06, |
| "loss": 0.1654, |
| "step": 101500 |
| }, |
| { |
| "epoch": 4.98, |
| "learning_rate": 5.3365836924148475e-06, |
| "loss": 0.165, |
| "step": 102000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.9078741239521781, |
| "eval_averaged_scores": 0.893031472907116, |
| "eval_f1": 0.8781888218620539, |
| "eval_loss": 0.3768274188041687, |
| "eval_runtime": 36.2704, |
| "eval_samples_per_second": 1003.16, |
| "eval_steps_per_second": 62.723, |
| "step": 102335 |
| }, |
| { |
| "epoch": 5.01, |
| "learning_rate": 5.310594680569056e-06, |
| "loss": 0.1649, |
| "step": 102500 |
| }, |
| { |
| "epoch": 5.03, |
| "learning_rate": 5.284605668723264e-06, |
| "loss": 0.1318, |
| "step": 103000 |
| }, |
| { |
| "epoch": 5.06, |
| "learning_rate": 5.258616656877472e-06, |
| "loss": 0.1436, |
| "step": 103500 |
| }, |
| { |
| "epoch": 5.08, |
| "learning_rate": 5.2326276450316815e-06, |
| "loss": 0.1509, |
| "step": 104000 |
| }, |
| { |
| "epoch": 5.11, |
| "learning_rate": 5.20663863318589e-06, |
| "loss": 0.1363, |
| "step": 104500 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 5.1806496213400985e-06, |
| "loss": 0.1321, |
| "step": 105000 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 5.154660609494306e-06, |
| "loss": 0.1412, |
| "step": 105500 |
| }, |
| { |
| "epoch": 5.18, |
| "learning_rate": 5.128671597648515e-06, |
| "loss": 0.1347, |
| "step": 106000 |
| }, |
| { |
| "epoch": 5.2, |
| "learning_rate": 5.102682585802723e-06, |
| "loss": 0.1455, |
| "step": 106500 |
| }, |
| { |
| "epoch": 5.23, |
| "learning_rate": 5.0766935739569316e-06, |
| "loss": 0.1356, |
| "step": 107000 |
| }, |
| { |
| "epoch": 5.25, |
| "learning_rate": 5.050704562111139e-06, |
| "loss": 0.1446, |
| "step": 107500 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 5.024715550265348e-06, |
| "loss": 0.1403, |
| "step": 108000 |
| }, |
| { |
| "epoch": 5.3, |
| "learning_rate": 4.998726538419556e-06, |
| "loss": 0.1339, |
| "step": 108500 |
| }, |
| { |
| "epoch": 5.33, |
| "learning_rate": 4.972737526573765e-06, |
| "loss": 0.1342, |
| "step": 109000 |
| }, |
| { |
| "epoch": 5.35, |
| "learning_rate": 4.946748514727974e-06, |
| "loss": 0.1281, |
| "step": 109500 |
| }, |
| { |
| "epoch": 5.37, |
| "learning_rate": 4.920759502882182e-06, |
| "loss": 0.1373, |
| "step": 110000 |
| }, |
| { |
| "epoch": 5.4, |
| "learning_rate": 4.89477049103639e-06, |
| "loss": 0.1349, |
| "step": 110500 |
| }, |
| { |
| "epoch": 5.42, |
| "learning_rate": 4.868781479190599e-06, |
| "loss": 0.1572, |
| "step": 111000 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 4.842792467344807e-06, |
| "loss": 0.1316, |
| "step": 111500 |
| }, |
| { |
| "epoch": 5.47, |
| "learning_rate": 4.816803455499016e-06, |
| "loss": 0.1287, |
| "step": 112000 |
| }, |
| { |
| "epoch": 5.5, |
| "learning_rate": 4.790814443653224e-06, |
| "loss": 0.1416, |
| "step": 112500 |
| }, |
| { |
| "epoch": 5.52, |
| "learning_rate": 4.764825431807433e-06, |
| "loss": 0.1419, |
| "step": 113000 |
| }, |
| { |
| "epoch": 5.55, |
| "learning_rate": 4.73883641996164e-06, |
| "loss": 0.1397, |
| "step": 113500 |
| }, |
| { |
| "epoch": 5.57, |
| "learning_rate": 4.712847408115849e-06, |
| "loss": 0.1466, |
| "step": 114000 |
| }, |
| { |
| "epoch": 5.59, |
| "learning_rate": 4.686858396270057e-06, |
| "loss": 0.143, |
| "step": 114500 |
| }, |
| { |
| "epoch": 5.62, |
| "learning_rate": 4.660869384424266e-06, |
| "loss": 0.1428, |
| "step": 115000 |
| }, |
| { |
| "epoch": 5.64, |
| "learning_rate": 4.634880372578474e-06, |
| "loss": 0.1413, |
| "step": 115500 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 4.608891360732683e-06, |
| "loss": 0.1479, |
| "step": 116000 |
| }, |
| { |
| "epoch": 5.69, |
| "learning_rate": 4.582902348886891e-06, |
| "loss": 0.1454, |
| "step": 116500 |
| }, |
| { |
| "epoch": 5.72, |
| "learning_rate": 4.5569133370411e-06, |
| "loss": 0.1332, |
| "step": 117000 |
| }, |
| { |
| "epoch": 5.74, |
| "learning_rate": 4.530924325195308e-06, |
| "loss": 0.1473, |
| "step": 117500 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 4.504935313349516e-06, |
| "loss": 0.1353, |
| "step": 118000 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 4.478946301503724e-06, |
| "loss": 0.1557, |
| "step": 118500 |
| }, |
| { |
| "epoch": 5.81, |
| "learning_rate": 4.452957289657933e-06, |
| "loss": 0.1452, |
| "step": 119000 |
| }, |
| { |
| "epoch": 5.84, |
| "learning_rate": 4.426968277812141e-06, |
| "loss": 0.1469, |
| "step": 119500 |
| }, |
| { |
| "epoch": 5.86, |
| "learning_rate": 4.40097926596635e-06, |
| "loss": 0.1394, |
| "step": 120000 |
| }, |
| { |
| "epoch": 5.89, |
| "learning_rate": 4.374990254120558e-06, |
| "loss": 0.1525, |
| "step": 120500 |
| }, |
| { |
| "epoch": 5.91, |
| "learning_rate": 4.349001242274767e-06, |
| "loss": 0.1448, |
| "step": 121000 |
| }, |
| { |
| "epoch": 5.94, |
| "learning_rate": 4.323012230428975e-06, |
| "loss": 0.146, |
| "step": 121500 |
| }, |
| { |
| "epoch": 5.96, |
| "learning_rate": 4.297023218583184e-06, |
| "loss": 0.1365, |
| "step": 122000 |
| }, |
| { |
| "epoch": 5.99, |
| "learning_rate": 4.271034206737391e-06, |
| "loss": 0.1467, |
| "step": 122500 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.9117768311117218, |
| "eval_averaged_scores": 0.8970713531108301, |
| "eval_f1": 0.8823658751099384, |
| "eval_loss": 0.44876691699028015, |
| "eval_runtime": 36.3498, |
| "eval_samples_per_second": 1000.969, |
| "eval_steps_per_second": 62.586, |
| "step": 122802 |
| }, |
| { |
| "epoch": 6.01, |
| "learning_rate": 4.2450451948916e-06, |
| "loss": 0.1278, |
| "step": 123000 |
| }, |
| { |
| "epoch": 6.03, |
| "learning_rate": 4.219056183045809e-06, |
| "loss": 0.1097, |
| "step": 123500 |
| }, |
| { |
| "epoch": 6.06, |
| "learning_rate": 4.193067171200017e-06, |
| "loss": 0.118, |
| "step": 124000 |
| }, |
| { |
| "epoch": 6.08, |
| "learning_rate": 4.167078159354225e-06, |
| "loss": 0.1108, |
| "step": 124500 |
| }, |
| { |
| "epoch": 6.11, |
| "learning_rate": 4.141089147508434e-06, |
| "loss": 0.1124, |
| "step": 125000 |
| }, |
| { |
| "epoch": 6.13, |
| "learning_rate": 4.115100135662642e-06, |
| "loss": 0.1161, |
| "step": 125500 |
| }, |
| { |
| "epoch": 6.16, |
| "learning_rate": 4.089111123816851e-06, |
| "loss": 0.1159, |
| "step": 126000 |
| }, |
| { |
| "epoch": 6.18, |
| "learning_rate": 4.063122111971059e-06, |
| "loss": 0.1152, |
| "step": 126500 |
| }, |
| { |
| "epoch": 6.21, |
| "learning_rate": 4.037133100125267e-06, |
| "loss": 0.1125, |
| "step": 127000 |
| }, |
| { |
| "epoch": 6.23, |
| "learning_rate": 4.011144088279475e-06, |
| "loss": 0.1126, |
| "step": 127500 |
| }, |
| { |
| "epoch": 6.25, |
| "learning_rate": 3.985155076433685e-06, |
| "loss": 0.1233, |
| "step": 128000 |
| }, |
| { |
| "epoch": 6.28, |
| "learning_rate": 3.959166064587892e-06, |
| "loss": 0.1183, |
| "step": 128500 |
| }, |
| { |
| "epoch": 6.3, |
| "learning_rate": 3.933177052742101e-06, |
| "loss": 0.1207, |
| "step": 129000 |
| }, |
| { |
| "epoch": 6.33, |
| "learning_rate": 3.907188040896309e-06, |
| "loss": 0.1114, |
| "step": 129500 |
| }, |
| { |
| "epoch": 6.35, |
| "learning_rate": 3.881199029050518e-06, |
| "loss": 0.1164, |
| "step": 130000 |
| }, |
| { |
| "epoch": 6.38, |
| "learning_rate": 3.855210017204726e-06, |
| "loss": 0.1179, |
| "step": 130500 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 3.829221005358935e-06, |
| "loss": 0.1115, |
| "step": 131000 |
| }, |
| { |
| "epoch": 6.42, |
| "learning_rate": 3.803231993513143e-06, |
| "loss": 0.1124, |
| "step": 131500 |
| }, |
| { |
| "epoch": 6.45, |
| "learning_rate": 3.777242981667351e-06, |
| "loss": 0.1228, |
| "step": 132000 |
| }, |
| { |
| "epoch": 6.47, |
| "learning_rate": 3.75125396982156e-06, |
| "loss": 0.1314, |
| "step": 132500 |
| }, |
| { |
| "epoch": 6.5, |
| "learning_rate": 3.7252649579757684e-06, |
| "loss": 0.1076, |
| "step": 133000 |
| }, |
| { |
| "epoch": 6.52, |
| "learning_rate": 3.6992759461299764e-06, |
| "loss": 0.1189, |
| "step": 133500 |
| }, |
| { |
| "epoch": 6.55, |
| "learning_rate": 3.673286934284185e-06, |
| "loss": 0.1234, |
| "step": 134000 |
| }, |
| { |
| "epoch": 6.57, |
| "learning_rate": 3.647297922438393e-06, |
| "loss": 0.1199, |
| "step": 134500 |
| }, |
| { |
| "epoch": 6.6, |
| "learning_rate": 3.621308910592602e-06, |
| "loss": 0.1295, |
| "step": 135000 |
| }, |
| { |
| "epoch": 6.62, |
| "learning_rate": 3.5953198987468104e-06, |
| "loss": 0.1282, |
| "step": 135500 |
| }, |
| { |
| "epoch": 6.64, |
| "learning_rate": 3.5693308869010185e-06, |
| "loss": 0.124, |
| "step": 136000 |
| }, |
| { |
| "epoch": 6.67, |
| "learning_rate": 3.543341875055227e-06, |
| "loss": 0.1292, |
| "step": 136500 |
| }, |
| { |
| "epoch": 6.69, |
| "learning_rate": 3.5173528632094355e-06, |
| "loss": 0.112, |
| "step": 137000 |
| }, |
| { |
| "epoch": 6.72, |
| "learning_rate": 3.491363851363644e-06, |
| "loss": 0.1225, |
| "step": 137500 |
| }, |
| { |
| "epoch": 6.74, |
| "learning_rate": 3.465374839517852e-06, |
| "loss": 0.1262, |
| "step": 138000 |
| }, |
| { |
| "epoch": 6.77, |
| "learning_rate": 3.4393858276720605e-06, |
| "loss": 0.1174, |
| "step": 138500 |
| }, |
| { |
| "epoch": 6.79, |
| "learning_rate": 3.4133968158262686e-06, |
| "loss": 0.1219, |
| "step": 139000 |
| }, |
| { |
| "epoch": 6.82, |
| "learning_rate": 3.3874078039804775e-06, |
| "loss": 0.1172, |
| "step": 139500 |
| }, |
| { |
| "epoch": 6.84, |
| "learning_rate": 3.361418792134686e-06, |
| "loss": 0.126, |
| "step": 140000 |
| }, |
| { |
| "epoch": 6.86, |
| "learning_rate": 3.335429780288894e-06, |
| "loss": 0.1206, |
| "step": 140500 |
| }, |
| { |
| "epoch": 6.89, |
| "learning_rate": 3.3094407684431025e-06, |
| "loss": 0.1216, |
| "step": 141000 |
| }, |
| { |
| "epoch": 6.91, |
| "learning_rate": 3.283451756597311e-06, |
| "loss": 0.1215, |
| "step": 141500 |
| }, |
| { |
| "epoch": 6.94, |
| "learning_rate": 3.2574627447515195e-06, |
| "loss": 0.12, |
| "step": 142000 |
| }, |
| { |
| "epoch": 6.96, |
| "learning_rate": 3.2314737329057276e-06, |
| "loss": 0.1198, |
| "step": 142500 |
| }, |
| { |
| "epoch": 6.99, |
| "learning_rate": 3.205484721059936e-06, |
| "loss": 0.1093, |
| "step": 143000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.9125738628555724, |
| "eval_averaged_scores": 0.8981269870097512, |
| "eval_f1": 0.8836801111639301, |
| "eval_loss": 0.48590707778930664, |
| "eval_runtime": 36.5067, |
| "eval_samples_per_second": 996.667, |
| "eval_steps_per_second": 62.317, |
| "step": 143269 |
| }, |
| { |
| "epoch": 7.01, |
| "learning_rate": 3.179495709214144e-06, |
| "loss": 0.1072, |
| "step": 143500 |
| }, |
| { |
| "epoch": 7.04, |
| "learning_rate": 3.153506697368353e-06, |
| "loss": 0.0934, |
| "step": 144000 |
| }, |
| { |
| "epoch": 7.06, |
| "learning_rate": 3.1275176855225615e-06, |
| "loss": 0.0963, |
| "step": 144500 |
| }, |
| { |
| "epoch": 7.08, |
| "learning_rate": 3.1015286736767696e-06, |
| "loss": 0.0962, |
| "step": 145000 |
| }, |
| { |
| "epoch": 7.11, |
| "learning_rate": 3.075539661830978e-06, |
| "loss": 0.0964, |
| "step": 145500 |
| }, |
| { |
| "epoch": 7.13, |
| "learning_rate": 3.049550649985186e-06, |
| "loss": 0.0983, |
| "step": 146000 |
| }, |
| { |
| "epoch": 7.16, |
| "learning_rate": 3.023561638139395e-06, |
| "loss": 0.0942, |
| "step": 146500 |
| }, |
| { |
| "epoch": 7.18, |
| "learning_rate": 2.9975726262936036e-06, |
| "loss": 0.1016, |
| "step": 147000 |
| }, |
| { |
| "epoch": 7.21, |
| "learning_rate": 2.9715836144478116e-06, |
| "loss": 0.0806, |
| "step": 147500 |
| }, |
| { |
| "epoch": 7.23, |
| "learning_rate": 2.94559460260202e-06, |
| "loss": 0.1037, |
| "step": 148000 |
| }, |
| { |
| "epoch": 7.26, |
| "learning_rate": 2.9196055907562286e-06, |
| "loss": 0.1013, |
| "step": 148500 |
| }, |
| { |
| "epoch": 7.28, |
| "learning_rate": 2.893616578910437e-06, |
| "loss": 0.0929, |
| "step": 149000 |
| }, |
| { |
| "epoch": 7.3, |
| "learning_rate": 2.867627567064645e-06, |
| "loss": 0.1034, |
| "step": 149500 |
| }, |
| { |
| "epoch": 7.33, |
| "learning_rate": 2.8416385552188536e-06, |
| "loss": 0.0955, |
| "step": 150000 |
| }, |
| { |
| "epoch": 7.35, |
| "learning_rate": 2.8156495433730617e-06, |
| "loss": 0.0857, |
| "step": 150500 |
| }, |
| { |
| "epoch": 7.38, |
| "learning_rate": 2.7896605315272706e-06, |
| "loss": 0.106, |
| "step": 151000 |
| }, |
| { |
| "epoch": 7.4, |
| "learning_rate": 2.763671519681479e-06, |
| "loss": 0.0914, |
| "step": 151500 |
| }, |
| { |
| "epoch": 7.43, |
| "learning_rate": 2.737682507835687e-06, |
| "loss": 0.0979, |
| "step": 152000 |
| }, |
| { |
| "epoch": 7.45, |
| "learning_rate": 2.7116934959898957e-06, |
| "loss": 0.0986, |
| "step": 152500 |
| }, |
| { |
| "epoch": 7.48, |
| "learning_rate": 2.685704484144104e-06, |
| "loss": 0.1024, |
| "step": 153000 |
| }, |
| { |
| "epoch": 7.5, |
| "learning_rate": 2.6597154722983127e-06, |
| "loss": 0.0968, |
| "step": 153500 |
| }, |
| { |
| "epoch": 7.52, |
| "learning_rate": 2.6337264604525207e-06, |
| "loss": 0.0964, |
| "step": 154000 |
| }, |
| { |
| "epoch": 7.55, |
| "learning_rate": 2.607737448606729e-06, |
| "loss": 0.1112, |
| "step": 154500 |
| }, |
| { |
| "epoch": 7.57, |
| "learning_rate": 2.5817484367609373e-06, |
| "loss": 0.0961, |
| "step": 155000 |
| }, |
| { |
| "epoch": 7.6, |
| "learning_rate": 2.555759424915146e-06, |
| "loss": 0.0994, |
| "step": 155500 |
| }, |
| { |
| "epoch": 7.62, |
| "learning_rate": 2.5297704130693547e-06, |
| "loss": 0.0928, |
| "step": 156000 |
| }, |
| { |
| "epoch": 7.65, |
| "learning_rate": 2.5037814012235627e-06, |
| "loss": 0.0964, |
| "step": 156500 |
| }, |
| { |
| "epoch": 7.67, |
| "learning_rate": 2.4777923893777712e-06, |
| "loss": 0.0817, |
| "step": 157000 |
| }, |
| { |
| "epoch": 7.7, |
| "learning_rate": 2.4518033775319797e-06, |
| "loss": 0.1125, |
| "step": 157500 |
| }, |
| { |
| "epoch": 7.72, |
| "learning_rate": 2.425814365686188e-06, |
| "loss": 0.0903, |
| "step": 158000 |
| }, |
| { |
| "epoch": 7.74, |
| "learning_rate": 2.3998253538403967e-06, |
| "loss": 0.0947, |
| "step": 158500 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 2.3738363419946048e-06, |
| "loss": 0.0947, |
| "step": 159000 |
| }, |
| { |
| "epoch": 7.79, |
| "learning_rate": 2.3478473301488133e-06, |
| "loss": 0.0958, |
| "step": 159500 |
| }, |
| { |
| "epoch": 7.82, |
| "learning_rate": 2.3218583183030217e-06, |
| "loss": 0.1071, |
| "step": 160000 |
| }, |
| { |
| "epoch": 7.84, |
| "learning_rate": 2.2958693064572302e-06, |
| "loss": 0.0878, |
| "step": 160500 |
| }, |
| { |
| "epoch": 7.87, |
| "learning_rate": 2.2698802946114383e-06, |
| "loss": 0.0985, |
| "step": 161000 |
| }, |
| { |
| "epoch": 7.89, |
| "learning_rate": 2.243891282765647e-06, |
| "loss": 0.0942, |
| "step": 161500 |
| }, |
| { |
| "epoch": 7.92, |
| "learning_rate": 2.2179022709198553e-06, |
| "loss": 0.1043, |
| "step": 162000 |
| }, |
| { |
| "epoch": 7.94, |
| "learning_rate": 2.1919132590740634e-06, |
| "loss": 0.0908, |
| "step": 162500 |
| }, |
| { |
| "epoch": 7.96, |
| "learning_rate": 2.1659242472282723e-06, |
| "loss": 0.0969, |
| "step": 163000 |
| }, |
| { |
| "epoch": 7.99, |
| "learning_rate": 2.1399352353824803e-06, |
| "loss": 0.1005, |
| "step": 163500 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.9120241857908479, |
| "eval_averaged_scores": 0.897542067688103, |
| "eval_f1": 0.8830599495853579, |
| "eval_loss": 0.543989360332489, |
| "eval_runtime": 36.3255, |
| "eval_samples_per_second": 1001.637, |
| "eval_steps_per_second": 62.628, |
| "step": 163736 |
| }, |
| { |
| "epoch": 8.01, |
| "learning_rate": 2.113946223536689e-06, |
| "loss": 0.0745, |
| "step": 164000 |
| }, |
| { |
| "epoch": 8.04, |
| "learning_rate": 2.0879572116908973e-06, |
| "loss": 0.0656, |
| "step": 164500 |
| }, |
| { |
| "epoch": 8.06, |
| "learning_rate": 2.061968199845106e-06, |
| "loss": 0.0775, |
| "step": 165000 |
| }, |
| { |
| "epoch": 8.09, |
| "learning_rate": 2.035979187999314e-06, |
| "loss": 0.0877, |
| "step": 165500 |
| }, |
| { |
| "epoch": 8.11, |
| "learning_rate": 2.0099901761535224e-06, |
| "loss": 0.0785, |
| "step": 166000 |
| }, |
| { |
| "epoch": 8.14, |
| "learning_rate": 1.984001164307731e-06, |
| "loss": 0.07, |
| "step": 166500 |
| }, |
| { |
| "epoch": 8.16, |
| "learning_rate": 1.9580121524619393e-06, |
| "loss": 0.0866, |
| "step": 167000 |
| }, |
| { |
| "epoch": 8.18, |
| "learning_rate": 1.932023140616148e-06, |
| "loss": 0.0714, |
| "step": 167500 |
| }, |
| { |
| "epoch": 8.21, |
| "learning_rate": 1.9060341287703559e-06, |
| "loss": 0.0878, |
| "step": 168000 |
| }, |
| { |
| "epoch": 8.23, |
| "learning_rate": 1.8800451169245646e-06, |
| "loss": 0.083, |
| "step": 168500 |
| }, |
| { |
| "epoch": 8.26, |
| "learning_rate": 1.8540561050787729e-06, |
| "loss": 0.081, |
| "step": 169000 |
| }, |
| { |
| "epoch": 8.28, |
| "learning_rate": 1.8280670932329814e-06, |
| "loss": 0.0726, |
| "step": 169500 |
| }, |
| { |
| "epoch": 8.31, |
| "learning_rate": 1.8020780813871896e-06, |
| "loss": 0.0764, |
| "step": 170000 |
| }, |
| { |
| "epoch": 8.33, |
| "learning_rate": 1.776089069541398e-06, |
| "loss": 0.0739, |
| "step": 170500 |
| }, |
| { |
| "epoch": 8.35, |
| "learning_rate": 1.7501000576956064e-06, |
| "loss": 0.0881, |
| "step": 171000 |
| }, |
| { |
| "epoch": 8.38, |
| "learning_rate": 1.7241110458498147e-06, |
| "loss": 0.0763, |
| "step": 171500 |
| }, |
| { |
| "epoch": 8.4, |
| "learning_rate": 1.6981220340040234e-06, |
| "loss": 0.0754, |
| "step": 172000 |
| }, |
| { |
| "epoch": 8.43, |
| "learning_rate": 1.6721330221582317e-06, |
| "loss": 0.0842, |
| "step": 172500 |
| }, |
| { |
| "epoch": 8.45, |
| "learning_rate": 1.6461440103124402e-06, |
| "loss": 0.0804, |
| "step": 173000 |
| }, |
| { |
| "epoch": 8.48, |
| "learning_rate": 1.6201549984666484e-06, |
| "loss": 0.0871, |
| "step": 173500 |
| }, |
| { |
| "epoch": 8.5, |
| "learning_rate": 1.5941659866208567e-06, |
| "loss": 0.0784, |
| "step": 174000 |
| }, |
| { |
| "epoch": 8.53, |
| "learning_rate": 1.5681769747750652e-06, |
| "loss": 0.0769, |
| "step": 174500 |
| }, |
| { |
| "epoch": 8.55, |
| "learning_rate": 1.5421879629292735e-06, |
| "loss": 0.0786, |
| "step": 175000 |
| }, |
| { |
| "epoch": 8.57, |
| "learning_rate": 1.5161989510834822e-06, |
| "loss": 0.0787, |
| "step": 175500 |
| }, |
| { |
| "epoch": 8.6, |
| "learning_rate": 1.4902099392376905e-06, |
| "loss": 0.0735, |
| "step": 176000 |
| }, |
| { |
| "epoch": 8.62, |
| "learning_rate": 1.464220927391899e-06, |
| "loss": 0.0853, |
| "step": 176500 |
| }, |
| { |
| "epoch": 8.65, |
| "learning_rate": 1.4382319155461072e-06, |
| "loss": 0.0775, |
| "step": 177000 |
| }, |
| { |
| "epoch": 8.67, |
| "learning_rate": 1.4122429037003157e-06, |
| "loss": 0.0794, |
| "step": 177500 |
| }, |
| { |
| "epoch": 8.7, |
| "learning_rate": 1.386253891854524e-06, |
| "loss": 0.0814, |
| "step": 178000 |
| }, |
| { |
| "epoch": 8.72, |
| "learning_rate": 1.3602648800087323e-06, |
| "loss": 0.0792, |
| "step": 178500 |
| }, |
| { |
| "epoch": 8.75, |
| "learning_rate": 1.3342758681629408e-06, |
| "loss": 0.0857, |
| "step": 179000 |
| }, |
| { |
| "epoch": 8.77, |
| "learning_rate": 1.308286856317149e-06, |
| "loss": 0.086, |
| "step": 179500 |
| }, |
| { |
| "epoch": 8.79, |
| "learning_rate": 1.2822978444713577e-06, |
| "loss": 0.0811, |
| "step": 180000 |
| }, |
| { |
| "epoch": 8.82, |
| "learning_rate": 1.256308832625566e-06, |
| "loss": 0.077, |
| "step": 180500 |
| }, |
| { |
| "epoch": 8.84, |
| "learning_rate": 1.2303198207797743e-06, |
| "loss": 0.0813, |
| "step": 181000 |
| }, |
| { |
| "epoch": 8.87, |
| "learning_rate": 1.2043308089339828e-06, |
| "loss": 0.0726, |
| "step": 181500 |
| }, |
| { |
| "epoch": 8.89, |
| "learning_rate": 1.1783417970881913e-06, |
| "loss": 0.0813, |
| "step": 182000 |
| }, |
| { |
| "epoch": 8.92, |
| "learning_rate": 1.1523527852423996e-06, |
| "loss": 0.081, |
| "step": 182500 |
| }, |
| { |
| "epoch": 8.94, |
| "learning_rate": 1.126363773396608e-06, |
| "loss": 0.0774, |
| "step": 183000 |
| }, |
| { |
| "epoch": 8.97, |
| "learning_rate": 1.1003747615508165e-06, |
| "loss": 0.0806, |
| "step": 183500 |
| }, |
| { |
| "epoch": 8.99, |
| "learning_rate": 1.0743857497050248e-06, |
| "loss": 0.0671, |
| "step": 184000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.9127112821217535, |
| "eval_averaged_scores": 0.8981017892633713, |
| "eval_f1": 0.883492296404989, |
| "eval_loss": 0.5674276351928711, |
| "eval_runtime": 36.287, |
| "eval_samples_per_second": 1002.701, |
| "eval_steps_per_second": 62.695, |
| "step": 184203 |
| }, |
| { |
| "epoch": 9.01, |
| "learning_rate": 1.048396737859233e-06, |
| "loss": 0.073, |
| "step": 184500 |
| }, |
| { |
| "epoch": 9.04, |
| "learning_rate": 1.0224077260134416e-06, |
| "loss": 0.0623, |
| "step": 185000 |
| }, |
| { |
| "epoch": 9.06, |
| "learning_rate": 9.9641871416765e-07, |
| "loss": 0.0562, |
| "step": 185500 |
| }, |
| { |
| "epoch": 9.09, |
| "learning_rate": 9.704297023218584e-07, |
| "loss": 0.062, |
| "step": 186000 |
| }, |
| { |
| "epoch": 9.11, |
| "learning_rate": 9.444406904760668e-07, |
| "loss": 0.0753, |
| "step": 186500 |
| }, |
| { |
| "epoch": 9.14, |
| "learning_rate": 9.184516786302752e-07, |
| "loss": 0.0624, |
| "step": 187000 |
| }, |
| { |
| "epoch": 9.16, |
| "learning_rate": 8.924626667844836e-07, |
| "loss": 0.0593, |
| "step": 187500 |
| }, |
| { |
| "epoch": 9.19, |
| "learning_rate": 8.664736549386919e-07, |
| "loss": 0.0758, |
| "step": 188000 |
| }, |
| { |
| "epoch": 9.21, |
| "learning_rate": 8.404846430929004e-07, |
| "loss": 0.0766, |
| "step": 188500 |
| }, |
| { |
| "epoch": 9.23, |
| "learning_rate": 8.144956312471088e-07, |
| "loss": 0.0681, |
| "step": 189000 |
| }, |
| { |
| "epoch": 9.26, |
| "learning_rate": 7.885066194013171e-07, |
| "loss": 0.0778, |
| "step": 189500 |
| }, |
| { |
| "epoch": 9.28, |
| "learning_rate": 7.625176075555256e-07, |
| "loss": 0.0596, |
| "step": 190000 |
| }, |
| { |
| "epoch": 9.31, |
| "learning_rate": 7.36528595709734e-07, |
| "loss": 0.0702, |
| "step": 190500 |
| }, |
| { |
| "epoch": 9.33, |
| "learning_rate": 7.105395838639424e-07, |
| "loss": 0.0658, |
| "step": 191000 |
| }, |
| { |
| "epoch": 9.36, |
| "learning_rate": 6.845505720181508e-07, |
| "loss": 0.0729, |
| "step": 191500 |
| }, |
| { |
| "epoch": 9.38, |
| "learning_rate": 6.585615601723591e-07, |
| "loss": 0.0656, |
| "step": 192000 |
| }, |
| { |
| "epoch": 9.41, |
| "learning_rate": 6.325725483265676e-07, |
| "loss": 0.0592, |
| "step": 192500 |
| }, |
| { |
| "epoch": 9.43, |
| "learning_rate": 6.065835364807759e-07, |
| "loss": 0.0653, |
| "step": 193000 |
| }, |
| { |
| "epoch": 9.45, |
| "learning_rate": 5.805945246349843e-07, |
| "loss": 0.0637, |
| "step": 193500 |
| }, |
| { |
| "epoch": 9.48, |
| "learning_rate": 5.546055127891928e-07, |
| "loss": 0.0597, |
| "step": 194000 |
| }, |
| { |
| "epoch": 9.5, |
| "learning_rate": 5.286165009434012e-07, |
| "loss": 0.074, |
| "step": 194500 |
| }, |
| { |
| "epoch": 9.53, |
| "learning_rate": 5.026274890976096e-07, |
| "loss": 0.0531, |
| "step": 195000 |
| }, |
| { |
| "epoch": 9.55, |
| "learning_rate": 4.7663847725181796e-07, |
| "loss": 0.0644, |
| "step": 195500 |
| }, |
| { |
| "epoch": 9.58, |
| "learning_rate": 4.5064946540602635e-07, |
| "loss": 0.0585, |
| "step": 196000 |
| }, |
| { |
| "epoch": 9.6, |
| "learning_rate": 4.246604535602348e-07, |
| "loss": 0.0624, |
| "step": 196500 |
| }, |
| { |
| "epoch": 9.63, |
| "learning_rate": 3.986714417144431e-07, |
| "loss": 0.0615, |
| "step": 197000 |
| }, |
| { |
| "epoch": 9.65, |
| "learning_rate": 3.7268242986865155e-07, |
| "loss": 0.07, |
| "step": 197500 |
| }, |
| { |
| "epoch": 9.67, |
| "learning_rate": 3.4669341802286e-07, |
| "loss": 0.0612, |
| "step": 198000 |
| }, |
| { |
| "epoch": 9.7, |
| "learning_rate": 3.207044061770684e-07, |
| "loss": 0.0703, |
| "step": 198500 |
| }, |
| { |
| "epoch": 9.72, |
| "learning_rate": 2.9471539433127676e-07, |
| "loss": 0.0748, |
| "step": 199000 |
| }, |
| { |
| "epoch": 9.75, |
| "learning_rate": 2.6872638248548514e-07, |
| "loss": 0.0602, |
| "step": 199500 |
| }, |
| { |
| "epoch": 9.77, |
| "learning_rate": 2.427373706396936e-07, |
| "loss": 0.063, |
| "step": 200000 |
| }, |
| { |
| "epoch": 9.8, |
| "learning_rate": 2.1674835879390194e-07, |
| "loss": 0.0722, |
| "step": 200500 |
| }, |
| { |
| "epoch": 9.82, |
| "learning_rate": 1.9075934694811035e-07, |
| "loss": 0.0528, |
| "step": 201000 |
| }, |
| { |
| "epoch": 9.85, |
| "learning_rate": 1.6477033510231873e-07, |
| "loss": 0.0783, |
| "step": 201500 |
| }, |
| { |
| "epoch": 9.87, |
| "learning_rate": 1.3878132325652717e-07, |
| "loss": 0.0623, |
| "step": 202000 |
| }, |
| { |
| "epoch": 9.89, |
| "learning_rate": 1.1279231141073555e-07, |
| "loss": 0.0745, |
| "step": 202500 |
| }, |
| { |
| "epoch": 9.92, |
| "learning_rate": 8.680329956494395e-08, |
| "loss": 0.0507, |
| "step": 203000 |
| }, |
| { |
| "epoch": 9.94, |
| "learning_rate": 6.081428771915235e-08, |
| "loss": 0.0633, |
| "step": 203500 |
| }, |
| { |
| "epoch": 9.97, |
| "learning_rate": 3.4825275873360744e-08, |
| "loss": 0.0659, |
| "step": 204000 |
| }, |
| { |
| "epoch": 9.99, |
| "learning_rate": 8.836264027569144e-09, |
| "loss": 0.0669, |
| "step": 204500 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.913398378452659, |
| "eval_averaged_scores": 0.8990160346845362, |
| "eval_f1": 0.8846336909164134, |
| "eval_loss": 0.611711859703064, |
| "eval_runtime": 36.3229, |
| "eval_samples_per_second": 1001.71, |
| "eval_steps_per_second": 62.633, |
| "step": 204670 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 204670, |
| "total_flos": 1.0022032836134272e+17, |
| "train_loss": 0.16490516577327513, |
| "train_runtime": 10899.0474, |
| "train_samples_per_second": 300.449, |
| "train_steps_per_second": 18.779 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 204670, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "total_flos": 1.0022032836134272e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|