| { | |
| "best_metric": 0.029198406264185905, | |
| "best_model_checkpoint": "doc-topic-model/checkpoint-33000", | |
| "epoch": 4.561706324744175, | |
| "eval_steps": 1000, | |
| "global_step": 37000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06164468006411047, | |
| "grad_norm": 0.338105708360672, | |
| "learning_rate": 1.987671063987178e-05, | |
| "loss": 0.1634, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12328936012822093, | |
| "grad_norm": 0.36255261301994324, | |
| "learning_rate": 1.9753421279743558e-05, | |
| "loss": 0.0913, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12328936012822093, | |
| "eval_accuracy": 0.9814654995273908, | |
| "eval_f1": 0.0, | |
| "eval_loss": 0.08827067911624908, | |
| "eval_precision": 0.0, | |
| "eval_recall": 0.0, | |
| "eval_runtime": 26.1724, | |
| "eval_samples_per_second": 309.906, | |
| "eval_steps_per_second": 19.372, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1849340401923314, | |
| "grad_norm": 0.32189494371414185, | |
| "learning_rate": 1.9630131919615338e-05, | |
| "loss": 0.0857, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.24657872025644187, | |
| "grad_norm": 0.3812316358089447, | |
| "learning_rate": 1.9506842559487118e-05, | |
| "loss": 0.0762, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.24657872025644187, | |
| "eval_accuracy": 0.9814654995273908, | |
| "eval_f1": 0.003808363165511463, | |
| "eval_loss": 0.06931016594171524, | |
| "eval_precision": 0.5, | |
| "eval_recall": 0.0019114611208808012, | |
| "eval_runtime": 26.3139, | |
| "eval_samples_per_second": 308.24, | |
| "eval_steps_per_second": 19.267, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.30822340032055234, | |
| "grad_norm": 0.37316232919692993, | |
| "learning_rate": 1.9383553199358898e-05, | |
| "loss": 0.0684, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.3698680803846628, | |
| "grad_norm": 0.38068899512290955, | |
| "learning_rate": 1.9260263839230674e-05, | |
| "loss": 0.0631, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3698680803846628, | |
| "eval_accuracy": 0.9820110903739352, | |
| "eval_f1": 0.07126134035703834, | |
| "eval_loss": 0.057068832218647, | |
| "eval_precision": 0.8268251273344652, | |
| "eval_recall": 0.03723526263475801, | |
| "eval_runtime": 27.0378, | |
| "eval_samples_per_second": 299.987, | |
| "eval_steps_per_second": 18.751, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.4315127604487733, | |
| "grad_norm": 0.3704540431499481, | |
| "learning_rate": 1.9136974479102454e-05, | |
| "loss": 0.0573, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.49315744051288374, | |
| "grad_norm": 0.36780521273612976, | |
| "learning_rate": 1.9013685118974234e-05, | |
| "loss": 0.055, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.49315744051288374, | |
| "eval_accuracy": 0.9837343071775665, | |
| "eval_f1": 0.28778853313477293, | |
| "eval_loss": 0.04972168058156967, | |
| "eval_precision": 0.7635824827132038, | |
| "eval_recall": 0.17730713357290312, | |
| "eval_runtime": 26.1019, | |
| "eval_samples_per_second": 310.744, | |
| "eval_steps_per_second": 19.424, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5548021205769942, | |
| "grad_norm": 0.35589325428009033, | |
| "learning_rate": 1.8890395758846014e-05, | |
| "loss": 0.0523, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.6164468006411047, | |
| "grad_norm": 0.4649755358695984, | |
| "learning_rate": 1.876710639871779e-05, | |
| "loss": 0.0485, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.6164468006411047, | |
| "eval_accuracy": 0.9850139657085525, | |
| "eval_f1": 0.4096466253558868, | |
| "eval_loss": 0.04525615647435188, | |
| "eval_precision": 0.7589987587918908, | |
| "eval_recall": 0.2805260341004664, | |
| "eval_runtime": 27.21, | |
| "eval_samples_per_second": 298.089, | |
| "eval_steps_per_second": 18.633, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.6780914807052152, | |
| "grad_norm": 0.3579052984714508, | |
| "learning_rate": 1.864381703858957e-05, | |
| "loss": 0.0467, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.7397361607693256, | |
| "grad_norm": 0.373826801776886, | |
| "learning_rate": 1.852052767846135e-05, | |
| "loss": 0.0455, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.7397361607693256, | |
| "eval_accuracy": 0.9859691039697757, | |
| "eval_f1": 0.48744629083190977, | |
| "eval_loss": 0.042106639593839645, | |
| "eval_precision": 0.7547290798332799, | |
| "eval_recall": 0.3599663582842725, | |
| "eval_runtime": 26.1757, | |
| "eval_samples_per_second": 309.867, | |
| "eval_steps_per_second": 19.369, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.8013808408334361, | |
| "grad_norm": 0.3768202066421509, | |
| "learning_rate": 1.839723831833313e-05, | |
| "loss": 0.0443, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.8630255208975466, | |
| "grad_norm": 0.4180874526500702, | |
| "learning_rate": 1.827394895820491e-05, | |
| "loss": 0.0427, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.8630255208975466, | |
| "eval_accuracy": 0.9864466731003873, | |
| "eval_f1": 0.5342812621737437, | |
| "eval_loss": 0.03985895588994026, | |
| "eval_precision": 0.735684591658844, | |
| "eval_recall": 0.41945102836608306, | |
| "eval_runtime": 26.179, | |
| "eval_samples_per_second": 309.828, | |
| "eval_steps_per_second": 19.367, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.924670200961657, | |
| "grad_norm": 0.3766914904117584, | |
| "learning_rate": 1.8150659598076686e-05, | |
| "loss": 0.0426, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.9863148810257675, | |
| "grad_norm": 0.3016286790370941, | |
| "learning_rate": 1.8027370237948466e-05, | |
| "loss": 0.0415, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.9863148810257675, | |
| "eval_accuracy": 0.9869695900416208, | |
| "eval_f1": 0.560279278848453, | |
| "eval_loss": 0.037820931524038315, | |
| "eval_precision": 0.7479570990806946, | |
| "eval_recall": 0.44789356984478934, | |
| "eval_runtime": 26.4209, | |
| "eval_samples_per_second": 306.992, | |
| "eval_steps_per_second": 19.189, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.047959561089878, | |
| "grad_norm": 0.3540589213371277, | |
| "learning_rate": 1.7904080877820246e-05, | |
| "loss": 0.0392, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.1096042411539884, | |
| "grad_norm": 0.27701956033706665, | |
| "learning_rate": 1.7780791517692026e-05, | |
| "loss": 0.0378, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.1096042411539884, | |
| "eval_accuracy": 0.9872600994534172, | |
| "eval_f1": 0.5714966634890372, | |
| "eval_loss": 0.03682604432106018, | |
| "eval_precision": 0.7587647133274269, | |
| "eval_recall": 0.45836837678721615, | |
| "eval_runtime": 26.3246, | |
| "eval_samples_per_second": 308.114, | |
| "eval_steps_per_second": 19.26, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.171248921218099, | |
| "grad_norm": 0.5498178601264954, | |
| "learning_rate": 1.7657502157563805e-05, | |
| "loss": 0.0374, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.2328936012822094, | |
| "grad_norm": 0.5045135021209717, | |
| "learning_rate": 1.7534212797435582e-05, | |
| "loss": 0.038, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.2328936012822094, | |
| "eval_accuracy": 0.9875761170086883, | |
| "eval_f1": 0.5866377481257956, | |
| "eval_loss": 0.03590531274676323, | |
| "eval_precision": 0.765190651906519, | |
| "eval_recall": 0.4756479853199786, | |
| "eval_runtime": 26.7199, | |
| "eval_samples_per_second": 303.556, | |
| "eval_steps_per_second": 18.975, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.2945382813463198, | |
| "grad_norm": 0.3471441864967346, | |
| "learning_rate": 1.7410923437307362e-05, | |
| "loss": 0.0375, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.3561829614104304, | |
| "grad_norm": 0.43855801224708557, | |
| "learning_rate": 1.728763407717914e-05, | |
| "loss": 0.0369, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.3561829614104304, | |
| "eval_accuracy": 0.9877617596084216, | |
| "eval_f1": 0.601660516605166, | |
| "eval_loss": 0.034964971244335175, | |
| "eval_precision": 0.7582839204743634, | |
| "eval_recall": 0.4986619772153834, | |
| "eval_runtime": 26.523, | |
| "eval_samples_per_second": 305.81, | |
| "eval_steps_per_second": 19.115, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.4178276414745408, | |
| "grad_norm": 0.2650409936904907, | |
| "learning_rate": 1.716434471705092e-05, | |
| "loss": 0.0369, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.4794723215386512, | |
| "grad_norm": 0.3710925579071045, | |
| "learning_rate": 1.70410553569227e-05, | |
| "loss": 0.0368, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.4794723215386512, | |
| "eval_accuracy": 0.9880607717347096, | |
| "eval_f1": 0.6121979286536249, | |
| "eval_loss": 0.03410067781805992, | |
| "eval_precision": 0.7691417996761508, | |
| "eval_recall": 0.5084486581542932, | |
| "eval_runtime": 26.738, | |
| "eval_samples_per_second": 303.351, | |
| "eval_steps_per_second": 18.962, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.5411170016027618, | |
| "grad_norm": 0.4056677222251892, | |
| "learning_rate": 1.6917765996794478e-05, | |
| "loss": 0.0358, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.6027616816668722, | |
| "grad_norm": 0.4474567770957947, | |
| "learning_rate": 1.6794476636666258e-05, | |
| "loss": 0.0348, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.6027616816668722, | |
| "eval_accuracy": 0.9879474022081549, | |
| "eval_f1": 0.6121927864666454, | |
| "eval_loss": 0.03413223475217819, | |
| "eval_precision": 0.758359692724808, | |
| "eval_recall": 0.5132655401789128, | |
| "eval_runtime": 26.2727, | |
| "eval_samples_per_second": 308.724, | |
| "eval_steps_per_second": 19.298, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.6644063617309826, | |
| "grad_norm": 0.38684606552124023, | |
| "learning_rate": 1.6671187276538038e-05, | |
| "loss": 0.0344, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.7260510417950932, | |
| "grad_norm": 0.26073306798934937, | |
| "learning_rate": 1.6547897916409814e-05, | |
| "loss": 0.0348, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.7260510417950932, | |
| "eval_accuracy": 0.9882265746672959, | |
| "eval_f1": 0.6325194621372965, | |
| "eval_loss": 0.03322906419634819, | |
| "eval_precision": 0.7503410641200545, | |
| "eval_recall": 0.5466778805719091, | |
| "eval_runtime": 26.3689, | |
| "eval_samples_per_second": 307.597, | |
| "eval_steps_per_second": 19.227, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.7876957218592036, | |
| "grad_norm": 0.46767064929008484, | |
| "learning_rate": 1.6424608556281594e-05, | |
| "loss": 0.035, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.849340401923314, | |
| "grad_norm": 0.6348612308502197, | |
| "learning_rate": 1.6301319196153374e-05, | |
| "loss": 0.0331, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.849340401923314, | |
| "eval_accuracy": 0.9885312552699116, | |
| "eval_f1": 0.6380428462811396, | |
| "eval_loss": 0.03276761621236801, | |
| "eval_precision": 0.7686422413793104, | |
| "eval_recall": 0.5453780870097102, | |
| "eval_runtime": 26.5681, | |
| "eval_samples_per_second": 305.291, | |
| "eval_steps_per_second": 19.083, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.9109850819874246, | |
| "grad_norm": 0.3464473783969879, | |
| "learning_rate": 1.6178029836025154e-05, | |
| "loss": 0.0338, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.972629762051535, | |
| "grad_norm": 0.2982787489891052, | |
| "learning_rate": 1.605474047589693e-05, | |
| "loss": 0.0347, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.972629762051535, | |
| "eval_accuracy": 0.9886233680102373, | |
| "eval_f1": 0.6415111190497456, | |
| "eval_loss": 0.0323067270219326, | |
| "eval_precision": 0.7711218464841654, | |
| "eval_recall": 0.5492010092514719, | |
| "eval_runtime": 26.354, | |
| "eval_samples_per_second": 307.771, | |
| "eval_steps_per_second": 19.238, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.0342744421156453, | |
| "grad_norm": 0.4357479512691498, | |
| "learning_rate": 1.593145111576871e-05, | |
| "loss": 0.0322, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.095919122179756, | |
| "grad_norm": 0.40149009227752686, | |
| "learning_rate": 1.580816175564049e-05, | |
| "loss": 0.0309, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.095919122179756, | |
| "eval_accuracy": 0.9887041437979075, | |
| "eval_f1": 0.6466285410293922, | |
| "eval_loss": 0.03258202597498894, | |
| "eval_precision": 0.7694661320953787, | |
| "eval_recall": 0.5576114381833474, | |
| "eval_runtime": 27.0385, | |
| "eval_samples_per_second": 299.979, | |
| "eval_steps_per_second": 18.751, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.1575638022438666, | |
| "grad_norm": 0.5995791554450989, | |
| "learning_rate": 1.568487239551227e-05, | |
| "loss": 0.0305, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.2192084823079767, | |
| "grad_norm": 0.47302648425102234, | |
| "learning_rate": 1.5561583035384046e-05, | |
| "loss": 0.0308, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.2192084823079767, | |
| "eval_accuracy": 0.9887707483947583, | |
| "eval_f1": 0.6517841448409211, | |
| "eval_loss": 0.0319632813334465, | |
| "eval_precision": 0.766353208639041, | |
| "eval_recall": 0.5670158268980809, | |
| "eval_runtime": 26.7148, | |
| "eval_samples_per_second": 303.615, | |
| "eval_steps_per_second": 18.978, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.2808531623720874, | |
| "grad_norm": 0.47022199630737305, | |
| "learning_rate": 1.5438293675255826e-05, | |
| "loss": 0.0313, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.342497842436198, | |
| "grad_norm": 0.2270212471485138, | |
| "learning_rate": 1.5315004315127606e-05, | |
| "loss": 0.0307, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.342497842436198, | |
| "eval_accuracy": 0.9889209630174434, | |
| "eval_f1": 0.6525642165140876, | |
| "eval_loss": 0.03187329322099686, | |
| "eval_precision": 0.7791573808765786, | |
| "eval_recall": 0.5613579019802737, | |
| "eval_runtime": 26.2719, | |
| "eval_samples_per_second": 308.733, | |
| "eval_steps_per_second": 19.298, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.404142522500308, | |
| "grad_norm": 0.3840814232826233, | |
| "learning_rate": 1.5191714954999386e-05, | |
| "loss": 0.0306, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.4657872025644187, | |
| "grad_norm": 0.27500978112220764, | |
| "learning_rate": 1.5068425594871164e-05, | |
| "loss": 0.0305, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.4657872025644187, | |
| "eval_accuracy": 0.9888160962053802, | |
| "eval_f1": 0.6618680377035132, | |
| "eval_loss": 0.03158288821578026, | |
| "eval_precision": 0.7527531429685216, | |
| "eval_recall": 0.5905650279073323, | |
| "eval_runtime": 26.2735, | |
| "eval_samples_per_second": 308.714, | |
| "eval_steps_per_second": 19.297, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.5274318826285294, | |
| "grad_norm": 0.37221288681030273, | |
| "learning_rate": 1.4945136234742942e-05, | |
| "loss": 0.0307, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.5890765626926395, | |
| "grad_norm": 0.37331074476242065, | |
| "learning_rate": 1.4821846874614722e-05, | |
| "loss": 0.031, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.5890765626926395, | |
| "eval_accuracy": 0.9891434507133069, | |
| "eval_f1": 0.6675346092088703, | |
| "eval_loss": 0.030963797122240067, | |
| "eval_precision": 0.7718787635487756, | |
| "eval_recall": 0.5880418992277697, | |
| "eval_runtime": 27.4023, | |
| "eval_samples_per_second": 295.997, | |
| "eval_steps_per_second": 18.502, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.65072124275675, | |
| "grad_norm": 0.40730220079421997, | |
| "learning_rate": 1.46985575144865e-05, | |
| "loss": 0.0301, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.7123659228208608, | |
| "grad_norm": 0.36121124029159546, | |
| "learning_rate": 1.457526815435828e-05, | |
| "loss": 0.0308, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.7123659228208608, | |
| "eval_accuracy": 0.9891108569744225, | |
| "eval_f1": 0.669448507270068, | |
| "eval_loss": 0.0308150053024292, | |
| "eval_precision": 0.7653191698632832, | |
| "eval_recall": 0.5949231592629406, | |
| "eval_runtime": 27.2748, | |
| "eval_samples_per_second": 297.38, | |
| "eval_steps_per_second": 18.589, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.774010602884971, | |
| "grad_norm": 0.4057904779911041, | |
| "learning_rate": 1.445197879423006e-05, | |
| "loss": 0.0311, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.8356552829490815, | |
| "grad_norm": 0.42312464118003845, | |
| "learning_rate": 1.432868943410184e-05, | |
| "loss": 0.03, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.8356552829490815, | |
| "eval_accuracy": 0.9892851626215002, | |
| "eval_f1": 0.6722157194260199, | |
| "eval_loss": 0.030529338866472244, | |
| "eval_precision": 0.776231477773328, | |
| "eval_recall": 0.592782322807554, | |
| "eval_runtime": 27.2694, | |
| "eval_samples_per_second": 297.439, | |
| "eval_steps_per_second": 18.592, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.897299963013192, | |
| "grad_norm": 0.44624337553977966, | |
| "learning_rate": 1.4205400073973616e-05, | |
| "loss": 0.029, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.9589446430773023, | |
| "grad_norm": 0.19307683408260345, | |
| "learning_rate": 1.4082110713845396e-05, | |
| "loss": 0.0295, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.9589446430773023, | |
| "eval_accuracy": 0.9891717930949455, | |
| "eval_f1": 0.6669862715188494, | |
| "eval_loss": 0.030626777559518814, | |
| "eval_precision": 0.7755929454692885, | |
| "eval_recall": 0.5850600198791956, | |
| "eval_runtime": 26.6782, | |
| "eval_samples_per_second": 304.031, | |
| "eval_steps_per_second": 19.004, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.020589323141413, | |
| "grad_norm": 0.15014292299747467, | |
| "learning_rate": 1.3958821353717176e-05, | |
| "loss": 0.0282, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 3.0822340032055235, | |
| "grad_norm": 0.36681485176086426, | |
| "learning_rate": 1.3835531993588956e-05, | |
| "loss": 0.0271, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.0822340032055235, | |
| "eval_accuracy": 0.989312087884057, | |
| "eval_f1": 0.6787904599659285, | |
| "eval_loss": 0.030400587245821953, | |
| "eval_precision": 0.7661763291991155, | |
| "eval_recall": 0.6092973468919642, | |
| "eval_runtime": 26.2103, | |
| "eval_samples_per_second": 309.458, | |
| "eval_steps_per_second": 19.344, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.1438786832696337, | |
| "grad_norm": 0.539164125919342, | |
| "learning_rate": 1.3712242633460732e-05, | |
| "loss": 0.0272, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 3.2055233633337443, | |
| "grad_norm": 0.36777427792549133, | |
| "learning_rate": 1.3588953273332512e-05, | |
| "loss": 0.0275, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.2055233633337443, | |
| "eval_accuracy": 0.9893092536458932, | |
| "eval_f1": 0.6747995516854902, | |
| "eval_loss": 0.03040589578449726, | |
| "eval_precision": 0.7734954046842574, | |
| "eval_recall": 0.5984402477253613, | |
| "eval_runtime": 27.3631, | |
| "eval_samples_per_second": 296.421, | |
| "eval_steps_per_second": 18.529, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.267168043397855, | |
| "grad_norm": 0.24132761359214783, | |
| "learning_rate": 1.3465663913204292e-05, | |
| "loss": 0.0271, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 3.328812723461965, | |
| "grad_norm": 0.40002453327178955, | |
| "learning_rate": 1.334237455307607e-05, | |
| "loss": 0.0273, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.328812723461965, | |
| "eval_accuracy": 0.9894750565784793, | |
| "eval_f1": 0.678136511375948, | |
| "eval_loss": 0.030225617811083794, | |
| "eval_precision": 0.7827130852340937, | |
| "eval_recall": 0.5982108723908556, | |
| "eval_runtime": 26.2863, | |
| "eval_samples_per_second": 308.564, | |
| "eval_steps_per_second": 19.288, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.3904574035260757, | |
| "grad_norm": 0.3788999021053314, | |
| "learning_rate": 1.3219085192947848e-05, | |
| "loss": 0.0271, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 3.4521020835901863, | |
| "grad_norm": 0.41983020305633545, | |
| "learning_rate": 1.3095795832819628e-05, | |
| "loss": 0.0267, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.4521020835901863, | |
| "eval_accuracy": 0.9894623025067419, | |
| "eval_f1": 0.68241223199795, | |
| "eval_loss": 0.029832862317562103, | |
| "eval_precision": 0.7730043541364296, | |
| "eval_recall": 0.6108265157886689, | |
| "eval_runtime": 28.4628, | |
| "eval_samples_per_second": 284.968, | |
| "eval_steps_per_second": 17.813, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.5137467636542965, | |
| "grad_norm": 0.2937909960746765, | |
| "learning_rate": 1.2972506472691408e-05, | |
| "loss": 0.0269, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.575391443718407, | |
| "grad_norm": 0.46021705865859985, | |
| "learning_rate": 1.2849217112563186e-05, | |
| "loss": 0.0272, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.575391443718407, | |
| "eval_accuracy": 0.9895558323661495, | |
| "eval_f1": 0.6867029416765856, | |
| "eval_loss": 0.029792238026857376, | |
| "eval_precision": 0.7732886548587841, | |
| "eval_recall": 0.6175548589341693, | |
| "eval_runtime": 26.2698, | |
| "eval_samples_per_second": 308.757, | |
| "eval_steps_per_second": 19.3, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.6370361237825177, | |
| "grad_norm": 0.24183115363121033, | |
| "learning_rate": 1.2725927752434966e-05, | |
| "loss": 0.0276, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 3.698680803846628, | |
| "grad_norm": 0.2443745732307434, | |
| "learning_rate": 1.2602638392306746e-05, | |
| "loss": 0.0268, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.698680803846628, | |
| "eval_accuracy": 0.9894552169113323, | |
| "eval_f1": 0.6908728345311786, | |
| "eval_loss": 0.02983880043029785, | |
| "eval_precision": 0.7564592430858806, | |
| "eval_recall": 0.6357519688049545, | |
| "eval_runtime": 26.5951, | |
| "eval_samples_per_second": 304.981, | |
| "eval_steps_per_second": 19.064, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.7603254839107385, | |
| "grad_norm": 0.4146226942539215, | |
| "learning_rate": 1.2479349032178524e-05, | |
| "loss": 0.0266, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.821970163974849, | |
| "grad_norm": 0.2495744824409485, | |
| "learning_rate": 1.2356059672050302e-05, | |
| "loss": 0.0265, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.821970163974849, | |
| "eval_accuracy": 0.9894934791265445, | |
| "eval_f1": 0.6934843724160741, | |
| "eval_loss": 0.029959995299577713, | |
| "eval_precision": 0.7549734449545413, | |
| "eval_recall": 0.6412569768330912, | |
| "eval_runtime": 26.4331, | |
| "eval_samples_per_second": 306.851, | |
| "eval_steps_per_second": 19.181, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.8836148440389593, | |
| "grad_norm": 0.24122150242328644, | |
| "learning_rate": 1.2232770311922082e-05, | |
| "loss": 0.0267, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 3.94525952410307, | |
| "grad_norm": 0.39071714878082275, | |
| "learning_rate": 1.2109480951793862e-05, | |
| "loss": 0.0274, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.94525952410307, | |
| "eval_accuracy": 0.9894438799586768, | |
| "eval_f1": 0.6884436822953699, | |
| "eval_loss": 0.02959488332271576, | |
| "eval_precision": 0.7599261311172668, | |
| "eval_recall": 0.6292530009939598, | |
| "eval_runtime": 26.3935, | |
| "eval_samples_per_second": 307.31, | |
| "eval_steps_per_second": 19.209, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 4.0069042041671805, | |
| "grad_norm": 0.43818220496177673, | |
| "learning_rate": 1.1986191591665642e-05, | |
| "loss": 0.0267, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 4.068548884231291, | |
| "grad_norm": 0.440415620803833, | |
| "learning_rate": 1.1862902231537418e-05, | |
| "loss": 0.0242, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 4.068548884231291, | |
| "eval_accuracy": 0.9897754858238492, | |
| "eval_f1": 0.695839129884912, | |
| "eval_loss": 0.029198406264185905, | |
| "eval_precision": 0.77551212178162, | |
| "eval_recall": 0.6310115452251701, | |
| "eval_runtime": 26.505, | |
| "eval_samples_per_second": 306.017, | |
| "eval_steps_per_second": 19.128, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 4.130193564295402, | |
| "grad_norm": 0.30030354857444763, | |
| "learning_rate": 1.1739612871409198e-05, | |
| "loss": 0.0241, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 4.191838244359512, | |
| "grad_norm": 0.3251016139984131, | |
| "learning_rate": 1.1616323511280978e-05, | |
| "loss": 0.0245, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 4.191838244359512, | |
| "eval_accuracy": 0.9897669831093576, | |
| "eval_f1": 0.6934669100479688, | |
| "eval_loss": 0.029462898150086403, | |
| "eval_precision": 0.7795380797862187, | |
| "eval_recall": 0.6245125774141754, | |
| "eval_runtime": 26.3655, | |
| "eval_samples_per_second": 307.637, | |
| "eval_steps_per_second": 19.23, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 4.253482924423622, | |
| "grad_norm": 0.42990005016326904, | |
| "learning_rate": 1.1493034151152756e-05, | |
| "loss": 0.0242, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 4.315127604487733, | |
| "grad_norm": 0.4957457184791565, | |
| "learning_rate": 1.1369744791024534e-05, | |
| "loss": 0.024, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 4.315127604487733, | |
| "eval_accuracy": 0.9897088812269984, | |
| "eval_f1": 0.6963539053353404, | |
| "eval_loss": 0.029602373018860817, | |
| "eval_precision": 0.7683860847097905, | |
| "eval_recall": 0.6366694701429773, | |
| "eval_runtime": 27.9882, | |
| "eval_samples_per_second": 289.801, | |
| "eval_steps_per_second": 18.115, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 4.376772284551843, | |
| "grad_norm": 0.44469302892684937, | |
| "learning_rate": 1.1246455430896314e-05, | |
| "loss": 0.025, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 4.4384169646159535, | |
| "grad_norm": 0.6926087737083435, | |
| "learning_rate": 1.1123166070768094e-05, | |
| "loss": 0.0241, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 4.4384169646159535, | |
| "eval_accuracy": 0.989632356796574, | |
| "eval_f1": 0.6952174637560407, | |
| "eval_loss": 0.02927555702626705, | |
| "eval_precision": 0.7637528604118993, | |
| "eval_recall": 0.6379692637051763, | |
| "eval_runtime": 26.5512, | |
| "eval_samples_per_second": 305.485, | |
| "eval_steps_per_second": 19.095, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 4.5000616446800645, | |
| "grad_norm": 0.3777313530445099, | |
| "learning_rate": 1.0999876710639872e-05, | |
| "loss": 0.0233, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 4.561706324744175, | |
| "grad_norm": 0.43268105387687683, | |
| "learning_rate": 1.0876587350511652e-05, | |
| "loss": 0.0239, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 4.561706324744175, | |
| "eval_accuracy": 0.9896252712011643, | |
| "eval_f1": 0.6914485607114258, | |
| "eval_loss": 0.029605692252516747, | |
| "eval_precision": 0.7703794139744553, | |
| "eval_recall": 0.6271886229834085, | |
| "eval_runtime": 26.473, | |
| "eval_samples_per_second": 306.388, | |
| "eval_steps_per_second": 19.152, | |
| "step": 37000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 81110, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 1000, | |
| "total_flos": 836798674117788.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |