{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9958330532965927, "eval_steps": 100, "global_step": 464, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004301364339001277, "grad_norm": 73.13473510742188, "learning_rate": 1.0638297872340425e-08, "logits/chosen": -0.302734375, "logits/rejected": -0.3828125, "logps/chosen": -202.0, "logps/rejected": -172.0, "loss": 44.25, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.04301364339001277, "grad_norm": 71.8135986328125, "learning_rate": 1.0638297872340425e-07, "logits/chosen": -0.3203125, "logits/rejected": -0.337890625, "logps/chosen": -198.0, "logps/rejected": -165.0, "loss": 44.3581, "rewards/accuracies": 0.2395833283662796, "rewards/chosen": -0.0023651123046875, "rewards/margins": -0.00262451171875, "rewards/rejected": 0.0002613067626953125, "step": 10 }, { "epoch": 0.08602728678002554, "grad_norm": 65.34407806396484, "learning_rate": 2.127659574468085e-07, "logits/chosen": -0.296875, "logits/rejected": -0.3359375, "logps/chosen": -192.0, "logps/rejected": -167.0, "loss": 44.3793, "rewards/accuracies": 0.29218751192092896, "rewards/chosen": -0.00238037109375, "rewards/margins": -0.00157928466796875, "rewards/rejected": -0.000804901123046875, "step": 20 }, { "epoch": 0.12904093017003831, "grad_norm": 79.51785278320312, "learning_rate": 3.1914893617021275e-07, "logits/chosen": -0.3125, "logits/rejected": -0.337890625, "logps/chosen": -206.0, "logps/rejected": -159.0, "loss": 44.3758, "rewards/accuracies": 0.28593748807907104, "rewards/chosen": 0.00067138671875, "rewards/margins": -0.00144195556640625, "rewards/rejected": 0.002105712890625, "step": 30 }, { "epoch": 0.17205457356005108, "grad_norm": 73.9058837890625, "learning_rate": 4.25531914893617e-07, "logits/chosen": -0.33984375, "logits/rejected": -0.361328125, "logps/chosen": -197.0, "logps/rejected": -159.0, "loss": 44.3793, "rewards/accuracies": 0.2953124940395355, "rewards/chosen": -0.00061798095703125, "rewards/margins": -0.001556396484375, "rewards/rejected": 0.000934600830078125, "step": 40 }, { "epoch": 0.21506821695006384, "grad_norm": 90.93192291259766, "learning_rate": 4.999361498869529e-07, "logits/chosen": -0.3359375, "logits/rejected": -0.36328125, "logps/chosen": -196.0, "logps/rejected": -166.0, "loss": 44.3191, "rewards/accuracies": 0.30781251192092896, "rewards/chosen": 0.001434326171875, "rewards/margins": 0.00069427490234375, "rewards/rejected": 0.000743865966796875, "step": 50 }, { "epoch": 0.25808186034007663, "grad_norm": 69.464599609375, "learning_rate": 4.988019438437758e-07, "logits/chosen": -0.3046875, "logits/rejected": -0.318359375, "logps/chosen": -189.0, "logps/rejected": -168.0, "loss": 44.3687, "rewards/accuracies": 0.29374998807907104, "rewards/chosen": 0.0023193359375, "rewards/margins": -0.0009918212890625, "rewards/rejected": 0.0033111572265625, "step": 60 }, { "epoch": 0.30109550373008936, "grad_norm": 73.38098907470703, "learning_rate": 4.962562537324176e-07, "logits/chosen": -0.30859375, "logits/rejected": -0.333984375, "logps/chosen": -189.0, "logps/rejected": -167.0, "loss": 44.2961, "rewards/accuracies": 0.3125, "rewards/chosen": 0.00762939453125, "rewards/margins": 0.00148773193359375, "rewards/rejected": 0.006134033203125, "step": 70 }, { "epoch": 0.34410914712010215, "grad_norm": 78.60209655761719, "learning_rate": 4.923135215663896e-07, "logits/chosen": -0.302734375, "logits/rejected": -0.322265625, "logps/chosen": -190.0, "logps/rejected": -164.0, "loss": 44.2723, "rewards/accuracies": 0.3031249940395355, "rewards/chosen": 0.00958251953125, "rewards/margins": 0.00165557861328125, "rewards/rejected": 0.0079345703125, "step": 80 }, { "epoch": 0.38712279051011494, "grad_norm": 72.0979232788086, "learning_rate": 4.8699611495083e-07, "logits/chosen": -0.298828125, "logits/rejected": -0.326171875, "logps/chosen": -192.0, "logps/rejected": -160.0, "loss": 44.318, "rewards/accuracies": 0.3296875059604645, "rewards/chosen": 0.01177978515625, "rewards/margins": 0.000476837158203125, "rewards/rejected": 0.01129150390625, "step": 90 }, { "epoch": 0.4301364339001277, "grad_norm": 80.73914337158203, "learning_rate": 4.803342001883246e-07, "logits/chosen": -0.3125, "logits/rejected": -0.33203125, "logps/chosen": -199.0, "logps/rejected": -173.0, "loss": 44.2637, "rewards/accuracies": 0.3109374940395355, "rewards/chosen": 0.0164794921875, "rewards/margins": 0.0026397705078125, "rewards/rejected": 0.01385498046875, "step": 100 }, { "epoch": 0.4301364339001277, "eval_logits/chosen": -0.30078125, "eval_logits/rejected": -0.302734375, "eval_logps/chosen": -184.0, "eval_logps/rejected": -174.0, "eval_loss": 0.6918657422065735, "eval_rewards/accuracies": 0.3381776511669159, "eval_rewards/chosen": 0.01611328125, "eval_rewards/margins": 0.00162506103515625, "eval_rewards/rejected": 0.0145263671875, "eval_runtime": 2092.6368, "eval_samples_per_second": 2.925, "eval_steps_per_second": 0.732, "step": 100 }, { "epoch": 0.47315007729014047, "grad_norm": 75.33040618896484, "learning_rate": 4.72365571141757e-07, "logits/chosen": -0.341796875, "logits/rejected": -0.359375, "logps/chosen": -193.0, "logps/rejected": -170.0, "loss": 44.3273, "rewards/accuracies": 0.34687501192092896, "rewards/chosen": 0.017333984375, "rewards/margins": 0.00176239013671875, "rewards/rejected": 0.0155029296875, "step": 110 }, { "epoch": 0.5161637206801533, "grad_norm": 87.9056625366211, "learning_rate": 4.6313543482507056e-07, "logits/chosen": -0.34375, "logits/rejected": -0.34375, "logps/chosen": -187.0, "logps/rejected": -163.0, "loss": 44.2031, "rewards/accuracies": 0.3531250059604645, "rewards/chosen": 0.01953125, "rewards/margins": 0.00360107421875, "rewards/rejected": 0.015869140625, "step": 120 }, { "epoch": 0.559177364070166, "grad_norm": 80.84737396240234, "learning_rate": 4.526961549383108e-07, "logits/chosen": -0.314453125, "logits/rejected": -0.326171875, "logps/chosen": -203.0, "logps/rejected": -166.0, "loss": 44.1418, "rewards/accuracies": 0.3765625059604645, "rewards/chosen": 0.024658203125, "rewards/margins": 0.005950927734375, "rewards/rejected": 0.0186767578125, "step": 130 }, { "epoch": 0.6021910074601787, "grad_norm": 76.54241180419922, "learning_rate": 4.4110695480190597e-07, "logits/chosen": -0.3359375, "logits/rejected": -0.357421875, "logps/chosen": -189.0, "logps/rejected": -161.0, "loss": 44.0926, "rewards/accuracies": 0.40312498807907104, "rewards/chosen": 0.029296875, "rewards/margins": 0.00738525390625, "rewards/rejected": 0.02197265625, "step": 140 }, { "epoch": 0.6452046508501915, "grad_norm": 69.73096466064453, "learning_rate": 4.284335813754769e-07, "logits/chosen": -0.318359375, "logits/rejected": -0.341796875, "logps/chosen": -194.0, "logps/rejected": -168.0, "loss": 44.1789, "rewards/accuracies": 0.40312498807907104, "rewards/chosen": 0.031982421875, "rewards/margins": 0.00555419921875, "rewards/rejected": 0.0264892578125, "step": 150 }, { "epoch": 0.6882182942402043, "grad_norm": 79.0284423828125, "learning_rate": 4.1474793226723825e-07, "logits/chosen": -0.314453125, "logits/rejected": -0.333984375, "logps/chosen": -182.0, "logps/rejected": -164.0, "loss": 44.3586, "rewards/accuracies": 0.375, "rewards/chosen": 0.0306396484375, "rewards/margins": -0.00011539459228515625, "rewards/rejected": 0.03076171875, "step": 160 }, { "epoch": 0.7312319376302171, "grad_norm": 77.1414794921875, "learning_rate": 4.001276478500126e-07, "logits/chosen": -0.330078125, "logits/rejected": -0.345703125, "logps/chosen": -194.0, "logps/rejected": -165.0, "loss": 44.2418, "rewards/accuracies": 0.421875, "rewards/chosen": 0.03662109375, "rewards/margins": 0.004119873046875, "rewards/rejected": 0.032470703125, "step": 170 }, { "epoch": 0.7742455810202299, "grad_norm": 98.8071517944336, "learning_rate": 3.846556707978337e-07, "logits/chosen": -0.3359375, "logits/rejected": -0.357421875, "logps/chosen": -187.0, "logps/rejected": -154.0, "loss": 44.0305, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": 0.044921875, "rewards/margins": 0.0101318359375, "rewards/rejected": 0.034912109375, "step": 180 }, { "epoch": 0.8172592244102426, "grad_norm": 72.15998840332031, "learning_rate": 3.684197755419419e-07, "logits/chosen": -0.3203125, "logits/rejected": -0.369140625, "logps/chosen": -193.0, "logps/rejected": -166.0, "loss": 44.027, "rewards/accuracies": 0.4468750059604645, "rewards/chosen": 0.047119140625, "rewards/margins": 0.01068115234375, "rewards/rejected": 0.036376953125, "step": 190 }, { "epoch": 0.8602728678002554, "grad_norm": 86.6330795288086, "learning_rate": 3.5151207031562633e-07, "logits/chosen": -0.32421875, "logits/rejected": -0.333984375, "logps/chosen": -197.0, "logps/rejected": -166.0, "loss": 44.0617, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": 0.0556640625, "rewards/margins": 0.00958251953125, "rewards/rejected": 0.046142578125, "step": 200 }, { "epoch": 0.8602728678002554, "eval_logits/chosen": -0.298828125, "eval_logits/rejected": -0.302734375, "eval_logps/chosen": -184.0, "eval_logps/rejected": -173.0, "eval_loss": 0.6901950240135193, "eval_rewards/accuracies": 0.42161986231803894, "eval_rewards/chosen": 0.055419921875, "eval_rewards/margins": 0.00567626953125, "eval_rewards/rejected": 0.049560546875, "eval_runtime": 2098.8198, "eval_samples_per_second": 2.916, "eval_steps_per_second": 0.729, "step": 200 }, { "epoch": 0.9032865111902681, "grad_norm": 77.22724151611328, "learning_rate": 3.34028474612874e-07, "logits/chosen": -0.302734375, "logits/rejected": -0.345703125, "logps/chosen": -197.0, "logps/rejected": -172.0, "loss": 44.2062, "rewards/accuracies": 0.3968749940395355, "rewards/chosen": 0.059814453125, "rewards/margins": 0.00506591796875, "rewards/rejected": 0.0546875, "step": 210 }, { "epoch": 0.9463001545802809, "grad_norm": 74.18938446044922, "learning_rate": 3.1606817502526736e-07, "logits/chosen": -0.302734375, "logits/rejected": -0.318359375, "logps/chosen": -193.0, "logps/rejected": -168.0, "loss": 44.0441, "rewards/accuracies": 0.43437498807907104, "rewards/chosen": 0.064453125, "rewards/margins": 0.0096435546875, "rewards/rejected": 0.0546875, "step": 220 }, { "epoch": 0.9893137979702937, "grad_norm": 111.19681549072266, "learning_rate": 2.9773306254423513e-07, "logits/chosen": -0.3203125, "logits/rejected": -0.33984375, "logps/chosen": -199.0, "logps/rejected": -168.0, "loss": 44.1539, "rewards/accuracies": 0.4375, "rewards/chosen": 0.06884765625, "rewards/margins": 0.0081787109375, "rewards/rejected": 0.060791015625, "step": 230 }, { "epoch": 1.0323274413603065, "grad_norm": 85.9101333618164, "learning_rate": 2.791271545209101e-07, "logits/chosen": -0.333984375, "logits/rejected": -0.3515625, "logps/chosen": -203.0, "logps/rejected": -173.0, "loss": 44.1109, "rewards/accuracies": 0.4390625059604645, "rewards/chosen": 0.0712890625, "rewards/margins": 0.00860595703125, "rewards/rejected": 0.06298828125, "step": 240 }, { "epoch": 1.0753410847503193, "grad_norm": 75.81817626953125, "learning_rate": 2.603560045628857e-07, "logits/chosen": -0.322265625, "logits/rejected": -0.337890625, "logps/chosen": -184.0, "logps/rejected": -160.0, "loss": 43.9852, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.07373046875, "rewards/margins": 0.0123291015625, "rewards/rejected": 0.061279296875, "step": 250 }, { "epoch": 1.118354728140332, "grad_norm": 86.1444320678711, "learning_rate": 2.4152610371560093e-07, "logits/chosen": -0.33203125, "logits/rejected": -0.357421875, "logps/chosen": -183.0, "logps/rejected": -160.0, "loss": 43.9813, "rewards/accuracies": 0.45781248807907104, "rewards/chosen": 0.07666015625, "rewards/margins": 0.0120849609375, "rewards/rejected": 0.064453125, "step": 260 }, { "epoch": 1.1613683715303447, "grad_norm": 82.82176208496094, "learning_rate": 2.2274427632552503e-07, "logits/chosen": -0.330078125, "logits/rejected": -0.333984375, "logps/chosen": -199.0, "logps/rejected": -169.0, "loss": 43.8387, "rewards/accuracies": 0.5078125, "rewards/chosen": 0.083984375, "rewards/margins": 0.01611328125, "rewards/rejected": 0.06787109375, "step": 270 }, { "epoch": 1.2043820149203575, "grad_norm": 78.16357421875, "learning_rate": 2.0411707401248403e-07, "logits/chosen": -0.318359375, "logits/rejected": -0.330078125, "logps/chosen": -181.0, "logps/rejected": -164.0, "loss": 44.0895, "rewards/accuracies": 0.4453125, "rewards/chosen": 0.08349609375, "rewards/margins": 0.0096435546875, "rewards/rejected": 0.07373046875, "step": 280 }, { "epoch": 1.2473956583103702, "grad_norm": 83.6078109741211, "learning_rate": 1.8575017118919928e-07, "logits/chosen": -0.310546875, "logits/rejected": -0.32421875, "logps/chosen": -202.0, "logps/rejected": -170.0, "loss": 44.0609, "rewards/accuracies": 0.4546875059604645, "rewards/chosen": 0.0869140625, "rewards/margins": 0.01104736328125, "rewards/rejected": 0.076171875, "step": 290 }, { "epoch": 1.290409301700383, "grad_norm": 69.07476043701172, "learning_rate": 1.6774776555733028e-07, "logits/chosen": -0.3125, "logits/rejected": -0.33203125, "logps/chosen": -188.0, "logps/rejected": -167.0, "loss": 43.8176, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": 0.09228515625, "rewards/margins": 0.0174560546875, "rewards/rejected": 0.07470703125, "step": 300 }, { "epoch": 1.290409301700383, "eval_logits/chosen": -0.298828125, "eval_logits/rejected": -0.30078125, "eval_logps/chosen": -183.0, "eval_logps/rejected": -173.0, "eval_loss": 0.6887055039405823, "eval_rewards/accuracies": 0.45101240277290344, "eval_rewards/chosen": 0.09033203125, "eval_rewards/margins": 0.009033203125, "eval_rewards/rejected": 0.0810546875, "eval_runtime": 2095.2015, "eval_samples_per_second": 2.921, "eval_steps_per_second": 0.731, "step": 300 }, { "epoch": 1.3334229450903958, "grad_norm": 82.81778717041016, "learning_rate": 1.5021198698108036e-07, "logits/chosen": -0.310546875, "logits/rejected": -0.3125, "logps/chosen": -204.0, "logps/rejected": -178.0, "loss": 43.9469, "rewards/accuracies": 0.43437498807907104, "rewards/chosen": 0.0966796875, "rewards/margins": 0.012939453125, "rewards/rejected": 0.083984375, "step": 310 }, { "epoch": 1.3764365884804086, "grad_norm": 94.56529998779297, "learning_rate": 1.3324231809189983e-07, "logits/chosen": -0.31640625, "logits/rejected": -0.33984375, "logps/chosen": -204.0, "logps/rejected": -176.0, "loss": 43.9461, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.09765625, "rewards/margins": 0.0133056640625, "rewards/rejected": 0.083984375, "step": 320 }, { "epoch": 1.4194502318704214, "grad_norm": 80.80192565917969, "learning_rate": 1.1693502991126608e-07, "logits/chosen": -0.30859375, "logits/rejected": -0.3515625, "logps/chosen": -193.0, "logps/rejected": -161.0, "loss": 43.9762, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": 0.09765625, "rewards/margins": 0.012939453125, "rewards/rejected": 0.08447265625, "step": 330 }, { "epoch": 1.4624638752604342, "grad_norm": 74.29733276367188, "learning_rate": 1.0138263569332267e-07, "logits/chosen": -0.302734375, "logits/rejected": -0.326171875, "logps/chosen": -200.0, "logps/rejected": -161.0, "loss": 43.7793, "rewards/accuracies": 0.5140625238418579, "rewards/chosen": 0.10595703125, "rewards/margins": 0.0184326171875, "rewards/rejected": 0.08740234375, "step": 340 }, { "epoch": 1.505477518650447, "grad_norm": 77.55677795410156, "learning_rate": 8.667336608579487e-08, "logits/chosen": -0.333984375, "logits/rejected": -0.365234375, "logps/chosen": -187.0, "logps/rejected": -160.0, "loss": 43.8336, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.1015625, "rewards/margins": 0.01708984375, "rewards/rejected": 0.08447265625, "step": 350 }, { "epoch": 1.5484911620404596, "grad_norm": 79.69268798828125, "learning_rate": 7.28906685866599e-08, "logits/chosen": -0.32421875, "logits/rejected": -0.345703125, "logps/chosen": -192.0, "logps/rejected": -169.0, "loss": 43.9348, "rewards/accuracies": 0.47968751192092896, "rewards/chosen": 0.1015625, "rewards/margins": 0.01409912109375, "rewards/rejected": 0.08740234375, "step": 360 }, { "epoch": 1.5915048054304726, "grad_norm": 74.55278015136719, "learning_rate": 6.01127341362138e-08, "logits/chosen": -0.3046875, "logits/rejected": -0.33203125, "logps/chosen": -185.0, "logps/rejected": -160.0, "loss": 43.8746, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": 0.10205078125, "rewards/margins": 0.0162353515625, "rewards/rejected": 0.0859375, "step": 370 }, { "epoch": 1.6345184488204851, "grad_norm": 73.96065521240234, "learning_rate": 4.841205353023714e-08, "logits/chosen": -0.318359375, "logits/rejected": -0.33984375, "logps/chosen": -185.0, "logps/rejected": -165.0, "loss": 43.8695, "rewards/accuracies": 0.504687488079071, "rewards/chosen": 0.10498046875, "rewards/margins": 0.0164794921875, "rewards/rejected": 0.0888671875, "step": 380 }, { "epoch": 1.6775320922104981, "grad_norm": 72.94511413574219, "learning_rate": 3.785500617078424e-08, "logits/chosen": -0.298828125, "logits/rejected": -0.33203125, "logps/chosen": -191.0, "logps/rejected": -159.0, "loss": 43.8547, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": 0.10498046875, "rewards/margins": 0.016845703125, "rewards/rejected": 0.087890625, "step": 390 }, { "epoch": 1.7205457356005107, "grad_norm": 72.55449676513672, "learning_rate": 2.850148348765921e-08, "logits/chosen": -0.337890625, "logits/rejected": -0.361328125, "logps/chosen": -183.0, "logps/rejected": -159.0, "loss": 43.9145, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": 0.1064453125, "rewards/margins": 0.0150146484375, "rewards/rejected": 0.09130859375, "step": 400 }, { "epoch": 1.7205457356005107, "eval_logits/chosen": -0.296875, "eval_logits/rejected": -0.30078125, "eval_logps/chosen": -183.0, "eval_logps/rejected": -172.0, "eval_loss": 0.6879779696464539, "eval_rewards/accuracies": 0.4557478725910187, "eval_rewards/chosen": 0.10498046875, "eval_rewards/margins": 0.010986328125, "eval_rewards/rejected": 0.09423828125, "eval_runtime": 2088.408, "eval_samples_per_second": 2.931, "eval_steps_per_second": 0.733, "step": 400 }, { "epoch": 1.7635593789905235, "grad_norm": 80.52165985107422, "learning_rate": 2.0404549166959718e-08, "logits/chosen": -0.30078125, "logits/rejected": -0.330078125, "logps/chosen": -196.0, "logps/rejected": -158.0, "loss": 43.7961, "rewards/accuracies": 0.503125011920929, "rewards/chosen": 0.1083984375, "rewards/margins": 0.01806640625, "rewards/rejected": 0.09033203125, "step": 410 }, { "epoch": 1.8065730223805363, "grad_norm": 75.35971069335938, "learning_rate": 1.3610138114250519e-08, "logits/chosen": -0.333984375, "logits/rejected": -0.373046875, "logps/chosen": -185.0, "logps/rejected": -165.0, "loss": 43.9035, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": 0.1064453125, "rewards/margins": 0.0159912109375, "rewards/rejected": 0.0908203125, "step": 420 }, { "epoch": 1.849586665770549, "grad_norm": 78.83647155761719, "learning_rate": 8.156795860187027e-09, "logits/chosen": -0.302734375, "logits/rejected": -0.328125, "logps/chosen": -197.0, "logps/rejected": -168.0, "loss": 44.0375, "rewards/accuracies": 0.4609375, "rewards/chosen": 0.10595703125, "rewards/margins": 0.01165771484375, "rewards/rejected": 0.09423828125, "step": 430 }, { "epoch": 1.8926003091605619, "grad_norm": 78.35073852539062, "learning_rate": 4.075459886973082e-09, "logits/chosen": -0.318359375, "logits/rejected": -0.3515625, "logps/chosen": -192.0, "logps/rejected": -154.0, "loss": 43.7273, "rewards/accuracies": 0.5140625238418579, "rewards/chosen": 0.109375, "rewards/margins": 0.0205078125, "rewards/rejected": 0.0888671875, "step": 440 }, { "epoch": 1.9356139525505747, "grad_norm": 71.64038848876953, "learning_rate": 1.3892841162143899e-09, "logits/chosen": -0.29296875, "logits/rejected": -0.326171875, "logps/chosen": -196.0, "logps/rejected": -157.0, "loss": 43.7586, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": 0.10888671875, "rewards/margins": 0.020263671875, "rewards/rejected": 0.08837890625, "step": 450 }, { "epoch": 1.9786275959405875, "grad_norm": 77.12970733642578, "learning_rate": 1.1350755386951849e-10, "logits/chosen": -0.3203125, "logits/rejected": -0.34765625, "logps/chosen": -194.0, "logps/rejected": -165.0, "loss": 43.8836, "rewards/accuracies": 0.48906248807907104, "rewards/chosen": 0.107421875, "rewards/margins": 0.0159912109375, "rewards/rejected": 0.09130859375, "step": 460 }, { "epoch": 1.9958330532965927, "step": 464, "total_flos": 0.0, "train_loss": 44.06969793911638, "train_runtime": 23064.0827, "train_samples_per_second": 1.29, "train_steps_per_second": 0.02 } ], "logging_steps": 10, "max_steps": 464, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }