| { |
| "best_global_step": 750, |
| "best_metric": 0.26988574862480164, |
| "best_model_checkpoint": "models/qwen-0.5b-distilled/checkpoint-750", |
| "epoch": 0.9829619921363041, |
| "eval_steps": 25, |
| "global_step": 750, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.001310615989515072, |
| "grad_norm": 9.375, |
| "learning_rate": 0.0, |
| "loss": 0.548, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01310615989515072, |
| "grad_norm": 6.375, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.43, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02621231979030144, |
| "grad_norm": 9.25, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.4675, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0327653997378768, |
| "eval_loss": 0.4063957631587982, |
| "eval_runtime": 4.2148, |
| "eval_samples_per_second": 7.355, |
| "eval_steps_per_second": 0.949, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.039318479685452164, |
| "grad_norm": 4.84375, |
| "learning_rate": 5.8e-06, |
| "loss": 0.3877, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05242463958060288, |
| "grad_norm": 5.125, |
| "learning_rate": 7.800000000000002e-06, |
| "loss": 0.3403, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0655307994757536, |
| "grad_norm": 6.375, |
| "learning_rate": 9.800000000000001e-06, |
| "loss": 0.3379, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0655307994757536, |
| "eval_loss": 0.3531549572944641, |
| "eval_runtime": 3.6193, |
| "eval_samples_per_second": 8.565, |
| "eval_steps_per_second": 1.105, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07863695937090433, |
| "grad_norm": 4.75, |
| "learning_rate": 1.18e-05, |
| "loss": 0.3865, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.09174311926605505, |
| "grad_norm": 5.34375, |
| "learning_rate": 1.38e-05, |
| "loss": 0.3487, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0982961992136304, |
| "eval_loss": 0.3395631015300751, |
| "eval_runtime": 3.6271, |
| "eval_samples_per_second": 8.547, |
| "eval_steps_per_second": 1.103, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.10484927916120576, |
| "grad_norm": 4.1875, |
| "learning_rate": 1.58e-05, |
| "loss": 0.3037, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.11795543905635648, |
| "grad_norm": 7.46875, |
| "learning_rate": 1.7800000000000002e-05, |
| "loss": 0.3588, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1310615989515072, |
| "grad_norm": 5.9375, |
| "learning_rate": 1.98e-05, |
| "loss": 0.3423, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1310615989515072, |
| "eval_loss": 0.3489256501197815, |
| "eval_runtime": 3.6228, |
| "eval_samples_per_second": 8.557, |
| "eval_steps_per_second": 1.104, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.14416775884665792, |
| "grad_norm": 5.65625, |
| "learning_rate": 1.9873772791023845e-05, |
| "loss": 0.344, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.15727391874180865, |
| "grad_norm": 6.71875, |
| "learning_rate": 1.973352033660589e-05, |
| "loss": 0.3335, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.16382699868938402, |
| "eval_loss": 0.32981202006340027, |
| "eval_runtime": 3.631, |
| "eval_samples_per_second": 8.538, |
| "eval_steps_per_second": 1.102, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.17038007863695936, |
| "grad_norm": 4.25, |
| "learning_rate": 1.959326788218794e-05, |
| "loss": 0.3455, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1834862385321101, |
| "grad_norm": 4.40625, |
| "learning_rate": 1.9453015427769988e-05, |
| "loss": 0.3583, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1965923984272608, |
| "grad_norm": 8.5625, |
| "learning_rate": 1.9312762973352036e-05, |
| "loss": 0.3583, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1965923984272608, |
| "eval_loss": 0.32496821880340576, |
| "eval_runtime": 3.6332, |
| "eval_samples_per_second": 8.532, |
| "eval_steps_per_second": 1.101, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.20969855832241152, |
| "grad_norm": 3.921875, |
| "learning_rate": 1.9172510518934082e-05, |
| "loss": 0.3653, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.22280471821756226, |
| "grad_norm": 5.96875, |
| "learning_rate": 1.903225806451613e-05, |
| "loss": 0.3176, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.22935779816513763, |
| "eval_loss": 0.31902241706848145, |
| "eval_runtime": 3.6232, |
| "eval_samples_per_second": 8.556, |
| "eval_steps_per_second": 1.104, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.23591087811271297, |
| "grad_norm": 5.0625, |
| "learning_rate": 1.889200561009818e-05, |
| "loss": 0.3316, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2490170380078637, |
| "grad_norm": 5.03125, |
| "learning_rate": 1.8751753155680224e-05, |
| "loss": 0.3265, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.2621231979030144, |
| "grad_norm": 4.1875, |
| "learning_rate": 1.8611500701262273e-05, |
| "loss": 0.3195, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2621231979030144, |
| "eval_loss": 0.314563125371933, |
| "eval_runtime": 3.6254, |
| "eval_samples_per_second": 8.551, |
| "eval_steps_per_second": 1.103, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.27522935779816515, |
| "grad_norm": 4.6875, |
| "learning_rate": 1.8471248246844322e-05, |
| "loss": 0.3281, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.28833551769331583, |
| "grad_norm": 3.421875, |
| "learning_rate": 1.833099579242637e-05, |
| "loss": 0.2814, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2948885976408912, |
| "eval_loss": 0.3087914288043976, |
| "eval_runtime": 3.6237, |
| "eval_samples_per_second": 8.555, |
| "eval_steps_per_second": 1.104, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.30144167758846657, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.8190743338008416e-05, |
| "loss": 0.3419, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3145478374836173, |
| "grad_norm": 6.28125, |
| "learning_rate": 1.8050490883590464e-05, |
| "loss": 0.3428, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.32765399737876805, |
| "grad_norm": 6.28125, |
| "learning_rate": 1.7910238429172513e-05, |
| "loss": 0.3723, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.32765399737876805, |
| "eval_loss": 0.31134819984436035, |
| "eval_runtime": 3.6258, |
| "eval_samples_per_second": 8.55, |
| "eval_steps_per_second": 1.103, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.34076015727391873, |
| "grad_norm": 6.96875, |
| "learning_rate": 1.776998597475456e-05, |
| "loss": 0.3545, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.35386631716906947, |
| "grad_norm": 5.375, |
| "learning_rate": 1.7629733520336607e-05, |
| "loss": 0.3441, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.36041939711664484, |
| "eval_loss": 0.30370065569877625, |
| "eval_runtime": 3.6249, |
| "eval_samples_per_second": 8.552, |
| "eval_steps_per_second": 1.103, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.3669724770642202, |
| "grad_norm": 4.5625, |
| "learning_rate": 1.7489481065918656e-05, |
| "loss": 0.3107, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3800786369593709, |
| "grad_norm": 3.203125, |
| "learning_rate": 1.7349228611500704e-05, |
| "loss": 0.2933, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3931847968545216, |
| "grad_norm": 3.75, |
| "learning_rate": 1.720897615708275e-05, |
| "loss": 0.291, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3931847968545216, |
| "eval_loss": 0.30050498247146606, |
| "eval_runtime": 3.6252, |
| "eval_samples_per_second": 8.551, |
| "eval_steps_per_second": 1.103, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.40629095674967236, |
| "grad_norm": 4.125, |
| "learning_rate": 1.70687237026648e-05, |
| "loss": 0.3167, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.41939711664482304, |
| "grad_norm": 4.90625, |
| "learning_rate": 1.6928471248246844e-05, |
| "loss": 0.3104, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.4259501965923984, |
| "eval_loss": 0.2980906069278717, |
| "eval_runtime": 3.6274, |
| "eval_samples_per_second": 8.546, |
| "eval_steps_per_second": 1.103, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.4325032765399738, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.6788218793828896e-05, |
| "loss": 0.3122, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4456094364351245, |
| "grad_norm": 5.21875, |
| "learning_rate": 1.664796633941094e-05, |
| "loss": 0.3053, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.45871559633027525, |
| "grad_norm": 3.09375, |
| "learning_rate": 1.650771388499299e-05, |
| "loss": 0.2632, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.45871559633027525, |
| "eval_loss": 0.29447969794273376, |
| "eval_runtime": 3.6275, |
| "eval_samples_per_second": 8.546, |
| "eval_steps_per_second": 1.103, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.47182175622542594, |
| "grad_norm": 4.40625, |
| "learning_rate": 1.6367461430575035e-05, |
| "loss": 0.2968, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.4849279161205767, |
| "grad_norm": 3.78125, |
| "learning_rate": 1.6227208976157084e-05, |
| "loss": 0.3154, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.49148099606815204, |
| "eval_loss": 0.29292187094688416, |
| "eval_runtime": 3.6253, |
| "eval_samples_per_second": 8.551, |
| "eval_steps_per_second": 1.103, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.4980340760157274, |
| "grad_norm": 3.984375, |
| "learning_rate": 1.6086956521739132e-05, |
| "loss": 0.2635, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5111402359108781, |
| "grad_norm": 3.5, |
| "learning_rate": 1.5946704067321178e-05, |
| "loss": 0.3126, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.5242463958060288, |
| "grad_norm": 5.40625, |
| "learning_rate": 1.5806451612903226e-05, |
| "loss": 0.3126, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5242463958060288, |
| "eval_loss": 0.2929743826389313, |
| "eval_runtime": 3.6233, |
| "eval_samples_per_second": 8.556, |
| "eval_steps_per_second": 1.104, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5373525557011796, |
| "grad_norm": 3.984375, |
| "learning_rate": 1.5666199158485275e-05, |
| "loss": 0.279, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.5504587155963303, |
| "grad_norm": 4.5, |
| "learning_rate": 1.5525946704067324e-05, |
| "loss": 0.2817, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.5570117955439057, |
| "eval_loss": 0.2881828844547272, |
| "eval_runtime": 3.6309, |
| "eval_samples_per_second": 8.538, |
| "eval_steps_per_second": 1.102, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.563564875491481, |
| "grad_norm": 3.671875, |
| "learning_rate": 1.538569424964937e-05, |
| "loss": 0.2967, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.5766710353866317, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.5245441795231418e-05, |
| "loss": 0.3231, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.5897771952817824, |
| "grad_norm": 3.65625, |
| "learning_rate": 1.5105189340813466e-05, |
| "loss": 0.3371, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5897771952817824, |
| "eval_loss": 0.285811185836792, |
| "eval_runtime": 3.6291, |
| "eval_samples_per_second": 8.542, |
| "eval_steps_per_second": 1.102, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.6028833551769331, |
| "grad_norm": 3.328125, |
| "learning_rate": 1.4964936886395513e-05, |
| "loss": 0.3081, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6159895150720839, |
| "grad_norm": 3.984375, |
| "learning_rate": 1.482468443197756e-05, |
| "loss": 0.2821, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.6225425950196593, |
| "eval_loss": 0.28545138239860535, |
| "eval_runtime": 3.6247, |
| "eval_samples_per_second": 8.553, |
| "eval_steps_per_second": 1.104, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.6290956749672346, |
| "grad_norm": 3.65625, |
| "learning_rate": 1.4684431977559607e-05, |
| "loss": 0.3088, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6422018348623854, |
| "grad_norm": 3.4375, |
| "learning_rate": 1.4544179523141658e-05, |
| "loss": 0.3111, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.6553079947575361, |
| "grad_norm": 3.65625, |
| "learning_rate": 1.4403927068723705e-05, |
| "loss": 0.2942, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6553079947575361, |
| "eval_loss": 0.28335702419281006, |
| "eval_runtime": 3.6367, |
| "eval_samples_per_second": 8.524, |
| "eval_steps_per_second": 1.1, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6684141546526867, |
| "grad_norm": 3.671875, |
| "learning_rate": 1.4263674614305752e-05, |
| "loss": 0.2872, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.6815203145478375, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.4123422159887799e-05, |
| "loss": 0.3081, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.6880733944954128, |
| "eval_loss": 0.2832925617694855, |
| "eval_runtime": 3.6251, |
| "eval_samples_per_second": 8.551, |
| "eval_steps_per_second": 1.103, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.6946264744429882, |
| "grad_norm": 5.03125, |
| "learning_rate": 1.3983169705469847e-05, |
| "loss": 0.3289, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.7077326343381389, |
| "grad_norm": 3.75, |
| "learning_rate": 1.3842917251051894e-05, |
| "loss": 0.2875, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.7208387942332897, |
| "grad_norm": 2.78125, |
| "learning_rate": 1.3702664796633941e-05, |
| "loss": 0.2218, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.7208387942332897, |
| "eval_loss": 0.28089821338653564, |
| "eval_runtime": 3.6305, |
| "eval_samples_per_second": 8.539, |
| "eval_steps_per_second": 1.102, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.7339449541284404, |
| "grad_norm": 4.4375, |
| "learning_rate": 1.356241234221599e-05, |
| "loss": 0.264, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.747051114023591, |
| "grad_norm": 3.140625, |
| "learning_rate": 1.3422159887798039e-05, |
| "loss": 0.3062, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.7536041939711664, |
| "eval_loss": 0.2809857130050659, |
| "eval_runtime": 3.6291, |
| "eval_samples_per_second": 8.542, |
| "eval_steps_per_second": 1.102, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.7601572739187418, |
| "grad_norm": 4.125, |
| "learning_rate": 1.3281907433380086e-05, |
| "loss": 0.2615, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.7732634338138925, |
| "grad_norm": 3.53125, |
| "learning_rate": 1.3141654978962133e-05, |
| "loss": 0.2747, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.7863695937090432, |
| "grad_norm": 3.34375, |
| "learning_rate": 1.300140252454418e-05, |
| "loss": 0.2837, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7863695937090432, |
| "eval_loss": 0.276317834854126, |
| "eval_runtime": 3.6322, |
| "eval_samples_per_second": 8.535, |
| "eval_steps_per_second": 1.101, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.799475753604194, |
| "grad_norm": 5.0, |
| "learning_rate": 1.286115007012623e-05, |
| "loss": 0.3009, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.8125819134993447, |
| "grad_norm": 3.46875, |
| "learning_rate": 1.2720897615708277e-05, |
| "loss": 0.3129, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.8191349934469201, |
| "eval_loss": 0.27676212787628174, |
| "eval_runtime": 3.6276, |
| "eval_samples_per_second": 8.546, |
| "eval_steps_per_second": 1.103, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.8256880733944955, |
| "grad_norm": 2.90625, |
| "learning_rate": 1.2580645161290324e-05, |
| "loss": 0.2404, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.8387942332896461, |
| "grad_norm": 3.09375, |
| "learning_rate": 1.2440392706872371e-05, |
| "loss": 0.2858, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.8519003931847968, |
| "grad_norm": 3.90625, |
| "learning_rate": 1.230014025245442e-05, |
| "loss": 0.2947, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.8519003931847968, |
| "eval_loss": 0.2739439606666565, |
| "eval_runtime": 3.6305, |
| "eval_samples_per_second": 8.539, |
| "eval_steps_per_second": 1.102, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.8650065530799476, |
| "grad_norm": 3.15625, |
| "learning_rate": 1.2159887798036467e-05, |
| "loss": 0.291, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.8781127129750983, |
| "grad_norm": 3.828125, |
| "learning_rate": 1.2019635343618514e-05, |
| "loss": 0.3276, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.8846657929226737, |
| "eval_loss": 0.2710360586643219, |
| "eval_runtime": 3.6251, |
| "eval_samples_per_second": 8.551, |
| "eval_steps_per_second": 1.103, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.891218872870249, |
| "grad_norm": 4.75, |
| "learning_rate": 1.187938288920056e-05, |
| "loss": 0.289, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.9043250327653998, |
| "grad_norm": 3.53125, |
| "learning_rate": 1.1739130434782611e-05, |
| "loss": 0.2442, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.9174311926605505, |
| "grad_norm": 3.0625, |
| "learning_rate": 1.1598877980364658e-05, |
| "loss": 0.3024, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9174311926605505, |
| "eval_loss": 0.2719181180000305, |
| "eval_runtime": 3.625, |
| "eval_samples_per_second": 8.552, |
| "eval_steps_per_second": 1.103, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9305373525557011, |
| "grad_norm": 3.15625, |
| "learning_rate": 1.1458625525946705e-05, |
| "loss": 0.2883, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.9436435124508519, |
| "grad_norm": 4.0625, |
| "learning_rate": 1.1318373071528752e-05, |
| "loss": 0.2915, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.9501965923984272, |
| "eval_loss": 0.27032172679901123, |
| "eval_runtime": 3.6246, |
| "eval_samples_per_second": 8.553, |
| "eval_steps_per_second": 1.104, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.9567496723460026, |
| "grad_norm": 3.09375, |
| "learning_rate": 1.11781206171108e-05, |
| "loss": 0.2846, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.9698558322411533, |
| "grad_norm": 3.859375, |
| "learning_rate": 1.103786816269285e-05, |
| "loss": 0.2954, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.9829619921363041, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.0897615708274896e-05, |
| "loss": 0.2825, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.9829619921363041, |
| "eval_loss": 0.26988574862480164, |
| "eval_runtime": 3.6297, |
| "eval_samples_per_second": 8.541, |
| "eval_steps_per_second": 1.102, |
| "step": 750 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1526, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 25, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9744231345598464.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|