| { | |
| "best_metric": 0.4315283000469208, | |
| "best_model_checkpoint": "xblock-large-patch2-224/checkpoint-5181", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 5181, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 19.092004776000977, | |
| "learning_rate": 2.2157996146435453e-06, | |
| "loss": 2.5421, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 11.274575233459473, | |
| "learning_rate": 4.624277456647399e-06, | |
| "loss": 1.9399, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 10.260701179504395, | |
| "learning_rate": 7.032755298651253e-06, | |
| "loss": 1.5011, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 9.719381332397461, | |
| "learning_rate": 9.441233140655107e-06, | |
| "loss": 1.3912, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 10.234914779663086, | |
| "learning_rate": 1.184971098265896e-05, | |
| "loss": 1.2166, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 13.722230911254883, | |
| "learning_rate": 1.4258188824662813e-05, | |
| "loss": 1.1751, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 17.890995025634766, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 1.1862, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 13.72986125946045, | |
| "learning_rate": 1.907514450867052e-05, | |
| "loss": 0.8828, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 12.903642654418945, | |
| "learning_rate": 2.1483622350674377e-05, | |
| "loss": 0.9541, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 8.937955856323242, | |
| "learning_rate": 2.3892100192678228e-05, | |
| "loss": 0.9032, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 10.743949890136719, | |
| "learning_rate": 2.6300578034682083e-05, | |
| "loss": 1.0891, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 9.766569137573242, | |
| "learning_rate": 2.8709055876685937e-05, | |
| "loss": 0.8951, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 10.973127365112305, | |
| "learning_rate": 3.111753371868979e-05, | |
| "loss": 1.0709, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 13.805391311645508, | |
| "learning_rate": 3.352601156069364e-05, | |
| "loss": 1.1141, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 7.355842113494873, | |
| "learning_rate": 3.59344894026975e-05, | |
| "loss": 1.0031, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 10.688377380371094, | |
| "learning_rate": 3.834296724470135e-05, | |
| "loss": 1.0344, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 8.896137237548828, | |
| "learning_rate": 4.07514450867052e-05, | |
| "loss": 1.0991, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 11.027874946594238, | |
| "learning_rate": 4.3159922928709055e-05, | |
| "loss": 1.0221, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 27.792613983154297, | |
| "learning_rate": 4.556840077071291e-05, | |
| "loss": 0.8256, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 8.005478858947754, | |
| "learning_rate": 4.7976878612716764e-05, | |
| "loss": 0.9862, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 11.962843894958496, | |
| "learning_rate": 4.995709995709996e-05, | |
| "loss": 1.0462, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 7.288177013397217, | |
| "learning_rate": 4.9688974688974696e-05, | |
| "loss": 0.9999, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 3.769800901412964, | |
| "learning_rate": 4.9420849420849425e-05, | |
| "loss": 1.1262, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 8.510008811950684, | |
| "learning_rate": 4.9152724152724154e-05, | |
| "loss": 0.944, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 8.264263153076172, | |
| "learning_rate": 4.888459888459889e-05, | |
| "loss": 0.9652, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 10.604584693908691, | |
| "learning_rate": 4.861647361647362e-05, | |
| "loss": 0.9246, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 5.483927249908447, | |
| "learning_rate": 4.834834834834835e-05, | |
| "loss": 1.0557, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 10.221104621887207, | |
| "learning_rate": 4.808022308022308e-05, | |
| "loss": 0.7906, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 5.725340843200684, | |
| "learning_rate": 4.781209781209782e-05, | |
| "loss": 0.9, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 7.057939052581787, | |
| "learning_rate": 4.754397254397255e-05, | |
| "loss": 0.8744, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 9.354517936706543, | |
| "learning_rate": 4.727584727584728e-05, | |
| "loss": 1.0611, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 4.2830491065979, | |
| "learning_rate": 4.700772200772201e-05, | |
| "loss": 0.8894, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 10.702705383300781, | |
| "learning_rate": 4.673959673959674e-05, | |
| "loss": 1.0246, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 4.7863359451293945, | |
| "learning_rate": 4.647147147147147e-05, | |
| "loss": 0.7967, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 7.841278553009033, | |
| "learning_rate": 4.62033462033462e-05, | |
| "loss": 0.8616, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 6.256266117095947, | |
| "learning_rate": 4.593522093522094e-05, | |
| "loss": 0.9735, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 6.177362442016602, | |
| "learning_rate": 4.566709566709567e-05, | |
| "loss": 0.8287, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 6.448288917541504, | |
| "learning_rate": 4.53989703989704e-05, | |
| "loss": 1.0062, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 7.912018299102783, | |
| "learning_rate": 4.513084513084513e-05, | |
| "loss": 1.0174, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 9.055561065673828, | |
| "learning_rate": 4.486271986271987e-05, | |
| "loss": 0.9616, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 7.433628559112549, | |
| "learning_rate": 4.4594594594594596e-05, | |
| "loss": 0.9309, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 3.5334954261779785, | |
| "learning_rate": 4.4326469326469325e-05, | |
| "loss": 0.807, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 5.620259761810303, | |
| "learning_rate": 4.405834405834406e-05, | |
| "loss": 0.8042, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 7.67726469039917, | |
| "learning_rate": 4.379021879021879e-05, | |
| "loss": 0.7394, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 9.430630683898926, | |
| "learning_rate": 4.3522093522093526e-05, | |
| "loss": 0.7895, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 9.241034507751465, | |
| "learning_rate": 4.3253968253968256e-05, | |
| "loss": 0.8032, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 7.471988201141357, | |
| "learning_rate": 4.298584298584299e-05, | |
| "loss": 0.7669, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 3.161353588104248, | |
| "learning_rate": 4.271771771771772e-05, | |
| "loss": 0.8795, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 6.6813201904296875, | |
| "learning_rate": 4.244959244959245e-05, | |
| "loss": 0.819, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 16.08786392211914, | |
| "learning_rate": 4.2181467181467186e-05, | |
| "loss": 0.8779, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 3.753849506378174, | |
| "learning_rate": 4.1913341913341915e-05, | |
| "loss": 0.9255, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 5.4661431312561035, | |
| "learning_rate": 4.1645216645216644e-05, | |
| "loss": 0.8028, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 6.559650897979736, | |
| "learning_rate": 4.137709137709138e-05, | |
| "loss": 0.7556, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 5.7179341316223145, | |
| "learning_rate": 4.1108966108966116e-05, | |
| "loss": 0.7076, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 12.687734603881836, | |
| "learning_rate": 4.0840840840840845e-05, | |
| "loss": 0.8583, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 5.1677398681640625, | |
| "learning_rate": 4.0572715572715574e-05, | |
| "loss": 0.8944, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 11.766656875610352, | |
| "learning_rate": 4.03045903045903e-05, | |
| "loss": 0.5638, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 4.12522554397583, | |
| "learning_rate": 4.003646503646504e-05, | |
| "loss": 0.6883, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 2.586186408996582, | |
| "learning_rate": 3.976833976833977e-05, | |
| "loss": 0.8784, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 4.075995445251465, | |
| "learning_rate": 3.95002145002145e-05, | |
| "loss": 0.7596, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 12.722098350524902, | |
| "learning_rate": 3.923208923208923e-05, | |
| "loss": 0.6316, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 12.86962890625, | |
| "learning_rate": 3.896396396396397e-05, | |
| "loss": 0.7721, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 10.640520095825195, | |
| "learning_rate": 3.86958386958387e-05, | |
| "loss": 0.711, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 7.080173015594482, | |
| "learning_rate": 3.842771342771343e-05, | |
| "loss": 0.8145, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 2.814232587814331, | |
| "learning_rate": 3.815958815958816e-05, | |
| "loss": 0.8509, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 12.209416389465332, | |
| "learning_rate": 3.789146289146289e-05, | |
| "loss": 0.8037, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 3.5646421909332275, | |
| "learning_rate": 3.762333762333762e-05, | |
| "loss": 0.8347, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 8.00243854522705, | |
| "learning_rate": 3.735521235521236e-05, | |
| "loss": 0.7534, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 5.955112934112549, | |
| "learning_rate": 3.708708708708709e-05, | |
| "loss": 0.9062, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7815393518518519, | |
| "eval_f1_macro": 0.36763187534206965, | |
| "eval_f1_micro": 0.7815393518518519, | |
| "eval_f1_weighted": 0.7649698649865229, | |
| "eval_loss": 0.6848556399345398, | |
| "eval_precision_macro": 0.4527657259795588, | |
| "eval_precision_micro": 0.7815393518518519, | |
| "eval_precision_weighted": 0.7691072277887989, | |
| "eval_recall_macro": 0.3815255183458625, | |
| "eval_recall_micro": 0.7815393518518519, | |
| "eval_recall_weighted": 0.7815393518518519, | |
| "eval_runtime": 3330.8434, | |
| "eval_samples_per_second": 1.038, | |
| "eval_steps_per_second": 0.065, | |
| "step": 1727 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 3.4409847259521484, | |
| "learning_rate": 3.681896181896182e-05, | |
| "loss": 0.8201, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 7.564268589019775, | |
| "learning_rate": 3.655083655083655e-05, | |
| "loss": 0.548, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 10.243828773498535, | |
| "learning_rate": 3.628271128271129e-05, | |
| "loss": 0.6348, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 14.57152271270752, | |
| "learning_rate": 3.6014586014586017e-05, | |
| "loss": 0.66, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 4.4091973304748535, | |
| "learning_rate": 3.5746460746460746e-05, | |
| "loss": 0.8169, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 5.423861503601074, | |
| "learning_rate": 3.547833547833548e-05, | |
| "loss": 0.7353, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 6.510718822479248, | |
| "learning_rate": 3.521021021021021e-05, | |
| "loss": 0.7008, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 2.5035436153411865, | |
| "learning_rate": 3.4942084942084947e-05, | |
| "loss": 0.5975, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 15.364286422729492, | |
| "learning_rate": 3.4673959673959676e-05, | |
| "loss": 0.8198, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 16.03240966796875, | |
| "learning_rate": 3.440583440583441e-05, | |
| "loss": 0.7089, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 8.521039962768555, | |
| "learning_rate": 3.413770913770914e-05, | |
| "loss": 0.6514, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 5.206024169921875, | |
| "learning_rate": 3.386958386958387e-05, | |
| "loss": 0.7545, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 7.756472110748291, | |
| "learning_rate": 3.36014586014586e-05, | |
| "loss": 0.8055, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 17.274944305419922, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.7212, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 5.303420543670654, | |
| "learning_rate": 3.3065208065208064e-05, | |
| "loss": 0.7299, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 5.07370138168335, | |
| "learning_rate": 3.27970827970828e-05, | |
| "loss": 0.5858, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 6.2755937576293945, | |
| "learning_rate": 3.252895752895753e-05, | |
| "loss": 0.7687, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 12.160276412963867, | |
| "learning_rate": 3.227155727155727e-05, | |
| "loss": 0.727, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 3.9984447956085205, | |
| "learning_rate": 3.2003432003432e-05, | |
| "loss": 0.6697, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 5.756568908691406, | |
| "learning_rate": 3.173530673530674e-05, | |
| "loss": 0.7912, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 8.783411026000977, | |
| "learning_rate": 3.1467181467181466e-05, | |
| "loss": 0.7035, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 5.451704502105713, | |
| "learning_rate": 3.1199056199056196e-05, | |
| "loss": 0.7144, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 5.039503574371338, | |
| "learning_rate": 3.093093093093093e-05, | |
| "loss": 0.7893, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 12.280179023742676, | |
| "learning_rate": 3.066280566280567e-05, | |
| "loss": 0.5903, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 6.999240398406982, | |
| "learning_rate": 3.0394680394680397e-05, | |
| "loss": 0.651, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 7.355953216552734, | |
| "learning_rate": 3.012655512655513e-05, | |
| "loss": 0.878, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 12.737029075622559, | |
| "learning_rate": 2.9858429858429858e-05, | |
| "loss": 0.731, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 4.184784889221191, | |
| "learning_rate": 2.959030459030459e-05, | |
| "loss": 0.8775, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 9.188583374023438, | |
| "learning_rate": 2.9322179322179323e-05, | |
| "loss": 0.7259, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 12.98018741607666, | |
| "learning_rate": 2.906477906477907e-05, | |
| "loss": 0.6219, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 12.084989547729492, | |
| "learning_rate": 2.87966537966538e-05, | |
| "loss": 0.5074, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 5.283312797546387, | |
| "learning_rate": 2.852852852852853e-05, | |
| "loss": 0.6606, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 19.32860565185547, | |
| "learning_rate": 2.826040326040326e-05, | |
| "loss": 0.7651, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 4.0794901847839355, | |
| "learning_rate": 2.7992277992277993e-05, | |
| "loss": 0.6737, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 33.1405029296875, | |
| "learning_rate": 2.7724152724152726e-05, | |
| "loss": 0.654, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 4.921344757080078, | |
| "learning_rate": 2.7456027456027455e-05, | |
| "loss": 0.7194, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 5.453707695007324, | |
| "learning_rate": 2.7187902187902187e-05, | |
| "loss": 0.6851, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 8.766169548034668, | |
| "learning_rate": 2.6919776919776923e-05, | |
| "loss": 0.8228, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 8.957389831542969, | |
| "learning_rate": 2.6651651651651656e-05, | |
| "loss": 0.6645, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 5.715158939361572, | |
| "learning_rate": 2.6383526383526385e-05, | |
| "loss": 0.6905, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 6.306962490081787, | |
| "learning_rate": 2.6115401115401117e-05, | |
| "loss": 0.6924, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 8.548517227172852, | |
| "learning_rate": 2.5847275847275846e-05, | |
| "loss": 0.8402, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 7.5719895362854, | |
| "learning_rate": 2.557915057915058e-05, | |
| "loss": 0.7758, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 8.910326957702637, | |
| "learning_rate": 2.531102531102531e-05, | |
| "loss": 0.5645, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 8.304277420043945, | |
| "learning_rate": 2.504290004290004e-05, | |
| "loss": 0.7066, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 13.99254035949707, | |
| "learning_rate": 2.4774774774774777e-05, | |
| "loss": 0.7396, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 3.806931257247925, | |
| "learning_rate": 2.4506649506649506e-05, | |
| "loss": 0.6334, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 6.550988674163818, | |
| "learning_rate": 2.423852423852424e-05, | |
| "loss": 0.9251, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 3.1442198753356934, | |
| "learning_rate": 2.397039897039897e-05, | |
| "loss": 0.546, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 5.249305248260498, | |
| "learning_rate": 2.3702273702273703e-05, | |
| "loss": 0.6419, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 5.300810813903809, | |
| "learning_rate": 2.3434148434148436e-05, | |
| "loss": 0.535, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 8.079426765441895, | |
| "learning_rate": 2.3166023166023168e-05, | |
| "loss": 0.8142, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 6.737719535827637, | |
| "learning_rate": 2.28978978978979e-05, | |
| "loss": 0.5974, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 5.037626266479492, | |
| "learning_rate": 2.262977262977263e-05, | |
| "loss": 0.7068, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 3.4523470401763916, | |
| "learning_rate": 2.2361647361647362e-05, | |
| "loss": 0.5756, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 2.2966675758361816, | |
| "learning_rate": 2.2093522093522095e-05, | |
| "loss": 0.4941, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 11.497820854187012, | |
| "learning_rate": 2.1825396825396827e-05, | |
| "loss": 0.8353, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 11.813599586486816, | |
| "learning_rate": 2.1557271557271557e-05, | |
| "loss": 0.8303, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 2.504293203353882, | |
| "learning_rate": 2.128914628914629e-05, | |
| "loss": 0.5574, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 8.983193397521973, | |
| "learning_rate": 2.102102102102102e-05, | |
| "loss": 0.6033, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 7.629824161529541, | |
| "learning_rate": 2.0752895752895754e-05, | |
| "loss": 0.6305, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 10.86919116973877, | |
| "learning_rate": 2.0484770484770487e-05, | |
| "loss": 0.6045, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 6.0854387283325195, | |
| "learning_rate": 2.0216645216645216e-05, | |
| "loss": 0.6208, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 6.228011131286621, | |
| "learning_rate": 1.994851994851995e-05, | |
| "loss": 0.6249, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 7.076812744140625, | |
| "learning_rate": 1.968039468039468e-05, | |
| "loss": 0.6176, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 7.893978595733643, | |
| "learning_rate": 1.9412269412269413e-05, | |
| "loss": 0.7779, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 7.72683048248291, | |
| "learning_rate": 1.9144144144144142e-05, | |
| "loss": 0.669, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 3.820025682449341, | |
| "learning_rate": 1.887601887601888e-05, | |
| "loss": 0.6182, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 5.642152309417725, | |
| "learning_rate": 1.8607893607893607e-05, | |
| "loss": 0.6453, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8454861111111112, | |
| "eval_f1_macro": 0.5038698715266201, | |
| "eval_f1_micro": 0.845486111111111, | |
| "eval_f1_weighted": 0.830657390454042, | |
| "eval_loss": 0.4813511371612549, | |
| "eval_precision_macro": 0.7578927995388053, | |
| "eval_precision_micro": 0.8454861111111112, | |
| "eval_precision_weighted": 0.8347843050246918, | |
| "eval_recall_macro": 0.456096431265928, | |
| "eval_recall_micro": 0.8454861111111112, | |
| "eval_recall_weighted": 0.8454861111111112, | |
| "eval_runtime": 3281.605, | |
| "eval_samples_per_second": 1.053, | |
| "eval_steps_per_second": 0.066, | |
| "step": 3454 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 8.240702629089355, | |
| "learning_rate": 1.833976833976834e-05, | |
| "loss": 0.4854, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 7.087810039520264, | |
| "learning_rate": 1.8071643071643072e-05, | |
| "loss": 0.5459, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 0.7334815859794617, | |
| "learning_rate": 1.7803517803517805e-05, | |
| "loss": 0.5212, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 6.095980167388916, | |
| "learning_rate": 1.7535392535392538e-05, | |
| "loss": 0.5191, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 10.246546745300293, | |
| "learning_rate": 1.7267267267267267e-05, | |
| "loss": 0.6645, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 3.6809566020965576, | |
| "learning_rate": 1.6999141999142e-05, | |
| "loss": 0.5629, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 13.609752655029297, | |
| "learning_rate": 1.673101673101673e-05, | |
| "loss": 0.6598, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 5.554472923278809, | |
| "learning_rate": 1.6462891462891464e-05, | |
| "loss": 0.4935, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 16.72881317138672, | |
| "learning_rate": 1.6205491205491204e-05, | |
| "loss": 0.379, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 9.573266983032227, | |
| "learning_rate": 1.593736593736594e-05, | |
| "loss": 0.7141, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 5.245655059814453, | |
| "learning_rate": 1.566924066924067e-05, | |
| "loss": 0.7518, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 13.11945915222168, | |
| "learning_rate": 1.54011154011154e-05, | |
| "loss": 0.6588, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 6.219137191772461, | |
| "learning_rate": 1.5132990132990132e-05, | |
| "loss": 0.5884, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 6.516097545623779, | |
| "learning_rate": 1.4864864864864867e-05, | |
| "loss": 0.4572, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 4.23282527923584, | |
| "learning_rate": 1.4596739596739597e-05, | |
| "loss": 0.6028, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 2.6169815063476562, | |
| "learning_rate": 1.4328614328614328e-05, | |
| "loss": 0.6198, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 4.720090389251709, | |
| "learning_rate": 1.4060489060489059e-05, | |
| "loss": 0.556, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 7.367048263549805, | |
| "learning_rate": 1.3792363792363793e-05, | |
| "loss": 0.5812, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 7.3934173583984375, | |
| "learning_rate": 1.3524238524238526e-05, | |
| "loss": 0.6141, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 10.528743743896484, | |
| "learning_rate": 1.3256113256113257e-05, | |
| "loss": 0.6047, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 7.282771110534668, | |
| "learning_rate": 1.2987987987987987e-05, | |
| "loss": 0.6281, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 11.970826148986816, | |
| "learning_rate": 1.2719862719862722e-05, | |
| "loss": 0.4103, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 6.620480537414551, | |
| "learning_rate": 1.2451737451737452e-05, | |
| "loss": 0.5386, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 5.699476718902588, | |
| "learning_rate": 1.2183612183612183e-05, | |
| "loss": 0.6507, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 2.477766275405884, | |
| "learning_rate": 1.1915486915486916e-05, | |
| "loss": 0.524, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 6.517852306365967, | |
| "learning_rate": 1.1647361647361647e-05, | |
| "loss": 0.6979, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 4.506691932678223, | |
| "learning_rate": 1.1379236379236379e-05, | |
| "loss": 0.4651, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 6.522432804107666, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 0.6845, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 12.015291213989258, | |
| "learning_rate": 1.0842985842985844e-05, | |
| "loss": 0.5348, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 7.297937393188477, | |
| "learning_rate": 1.0574860574860575e-05, | |
| "loss": 0.5412, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 13.665657997131348, | |
| "learning_rate": 1.0306735306735307e-05, | |
| "loss": 0.5137, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 11.743260383605957, | |
| "learning_rate": 1.0038610038610038e-05, | |
| "loss": 0.5738, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 10.24691104888916, | |
| "learning_rate": 9.77048477048477e-06, | |
| "loss": 0.5134, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 4.543239116668701, | |
| "learning_rate": 9.502359502359502e-06, | |
| "loss": 0.6055, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 7.915064811706543, | |
| "learning_rate": 9.234234234234234e-06, | |
| "loss": 0.5153, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 8.37210750579834, | |
| "learning_rate": 8.966108966108967e-06, | |
| "loss": 0.4754, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 7.3417487144470215, | |
| "learning_rate": 8.6979836979837e-06, | |
| "loss": 0.6285, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 9.027023315429688, | |
| "learning_rate": 8.42985842985843e-06, | |
| "loss": 0.4925, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 7.813179016113281, | |
| "learning_rate": 8.161733161733163e-06, | |
| "loss": 0.5089, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 13.45531940460205, | |
| "learning_rate": 7.893607893607893e-06, | |
| "loss": 0.4717, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 7.09887170791626, | |
| "learning_rate": 7.625482625482626e-06, | |
| "loss": 0.6506, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 4.6297383308410645, | |
| "learning_rate": 7.357357357357357e-06, | |
| "loss": 0.4828, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 8.980986595153809, | |
| "learning_rate": 7.089232089232089e-06, | |
| "loss": 0.4233, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 26.76249885559082, | |
| "learning_rate": 6.821106821106821e-06, | |
| "loss": 0.5748, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 8.72842788696289, | |
| "learning_rate": 6.552981552981553e-06, | |
| "loss": 0.6565, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 9.191315650939941, | |
| "learning_rate": 6.284856284856284e-06, | |
| "loss": 0.5332, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 7.631181240081787, | |
| "learning_rate": 6.016731016731017e-06, | |
| "loss": 0.4692, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 7.811351299285889, | |
| "learning_rate": 5.748605748605749e-06, | |
| "loss": 0.6485, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 8.997116088867188, | |
| "learning_rate": 5.480480480480481e-06, | |
| "loss": 0.4207, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 9.758033752441406, | |
| "learning_rate": 5.212355212355213e-06, | |
| "loss": 0.5205, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 5.98590612411499, | |
| "learning_rate": 4.944229944229944e-06, | |
| "loss": 0.6115, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 11.506319046020508, | |
| "learning_rate": 4.676104676104676e-06, | |
| "loss": 0.4449, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 7.969517230987549, | |
| "learning_rate": 4.4079794079794084e-06, | |
| "loss": 0.5384, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 0.8463253974914551, | |
| "learning_rate": 4.13985413985414e-06, | |
| "loss": 0.4981, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 12.771890640258789, | |
| "learning_rate": 3.871728871728872e-06, | |
| "loss": 0.4786, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 0.6047688126564026, | |
| "learning_rate": 3.603603603603604e-06, | |
| "loss": 0.4913, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 7.117040157318115, | |
| "learning_rate": 3.3354783354783355e-06, | |
| "loss": 0.5386, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 5.262890338897705, | |
| "learning_rate": 3.0673530673530676e-06, | |
| "loss": 0.6115, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 0.5500399470329285, | |
| "learning_rate": 2.7992277992277993e-06, | |
| "loss": 0.5285, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 2.5653154850006104, | |
| "learning_rate": 2.531102531102531e-06, | |
| "loss": 0.3621, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 5.71751594543457, | |
| "learning_rate": 2.262977262977263e-06, | |
| "loss": 0.6007, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 12.138904571533203, | |
| "learning_rate": 1.9948519948519947e-06, | |
| "loss": 0.5797, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 8.795024871826172, | |
| "learning_rate": 1.7267267267267268e-06, | |
| "loss": 0.5585, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 3.7619569301605225, | |
| "learning_rate": 1.4586014586014587e-06, | |
| "loss": 0.3951, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 2.763073205947876, | |
| "learning_rate": 1.1904761904761904e-06, | |
| "loss": 0.287, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 7.337412357330322, | |
| "learning_rate": 9.223509223509224e-07, | |
| "loss": 0.4416, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 4.266438961029053, | |
| "learning_rate": 6.542256542256542e-07, | |
| "loss": 0.5275, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 8.12879753112793, | |
| "learning_rate": 3.8610038610038613e-07, | |
| "loss": 0.4297, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 6.199108123779297, | |
| "learning_rate": 1.1797511797511798e-07, | |
| "loss": 0.4389, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8602430555555556, | |
| "eval_f1_macro": 0.6149830093941424, | |
| "eval_f1_micro": 0.8602430555555556, | |
| "eval_f1_weighted": 0.8515059109185544, | |
| "eval_loss": 0.4315283000469208, | |
| "eval_precision_macro": 0.7610988679415244, | |
| "eval_precision_micro": 0.8602430555555556, | |
| "eval_precision_weighted": 0.8532444856848228, | |
| "eval_recall_macro": 0.5527145295483504, | |
| "eval_recall_micro": 0.8602430555555556, | |
| "eval_recall_weighted": 0.8602430555555556, | |
| "eval_runtime": 3373.7409, | |
| "eval_samples_per_second": 1.024, | |
| "eval_steps_per_second": 0.064, | |
| "step": 5181 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 5181, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 1.135272556528692e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |