{ "best_metric": 0.2909594774246216, "best_model_checkpoint": "/data2/ckpts/GenRM/qwen-2.5-math-instruct/full/GenPRM-78k-train-5:5-decontamination/checkpoint-1200", "epoch": 0.9995883079456567, "eval_steps": 100, "global_step": 1214, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016467682173734045, "grad_norm": 2.2536449432373047, "learning_rate": 2.702702702702703e-06, "loss": 0.9706, "step": 20 }, { "epoch": 0.03293536434746809, "grad_norm": 0.7532587051391602, "learning_rate": 4.999919851200522e-06, "loss": 0.5433, "step": 40 }, { "epoch": 0.04940304652120214, "grad_norm": 0.6202924847602844, "learning_rate": 4.995290485881111e-06, "loss": 0.4319, "step": 60 }, { "epoch": 0.06587072869493618, "grad_norm": 0.5724588632583618, "learning_rate": 4.983551854245604e-06, "loss": 0.3985, "step": 80 }, { "epoch": 0.08233841086867023, "grad_norm": 0.6023426651954651, "learning_rate": 4.9647374005198125e-06, "loss": 0.3951, "step": 100 }, { "epoch": 0.08233841086867023, "eval_loss": 0.37706664204597473, "eval_runtime": 38.4153, "eval_samples_per_second": 52.063, "eval_steps_per_second": 6.508, "step": 100 }, { "epoch": 0.09880609304240429, "grad_norm": 0.6422224640846252, "learning_rate": 4.938900728467664e-06, "loss": 0.381, "step": 120 }, { "epoch": 0.11527377521613832, "grad_norm": 0.5755812525749207, "learning_rate": 4.9061154486701204e-06, "loss": 0.3649, "step": 140 }, { "epoch": 0.13174145738987236, "grad_norm": 0.5953928232192993, "learning_rate": 4.866474968803222e-06, "loss": 0.3605, "step": 160 }, { "epoch": 0.14820913956360643, "grad_norm": 0.608504593372345, "learning_rate": 4.820092227512736e-06, "loss": 0.3514, "step": 180 }, { "epoch": 0.16467682173734047, "grad_norm": 0.6068744659423828, "learning_rate": 4.767099372643641e-06, "loss": 0.3471, "step": 200 }, { "epoch": 0.16467682173734047, "eval_loss": 0.3430534303188324, "eval_runtime": 38.4427, "eval_samples_per_second": 52.026, "eval_steps_per_second": 6.503, "step": 200 }, { "epoch": 0.1811445039110745, "grad_norm": 0.6543622612953186, "learning_rate": 4.707647384741187e-06, "loss": 0.3494, "step": 220 }, { "epoch": 0.19761218608480857, "grad_norm": 0.6283196210861206, "learning_rate": 4.641905646896205e-06, "loss": 0.3382, "step": 240 }, { "epoch": 0.2140798682585426, "grad_norm": 0.6050875186920166, "learning_rate": 4.570061462160209e-06, "loss": 0.3382, "step": 260 }, { "epoch": 0.23054755043227665, "grad_norm": 0.6354455947875977, "learning_rate": 4.492319519905217e-06, "loss": 0.3319, "step": 280 }, { "epoch": 0.24701523260601072, "grad_norm": 0.5852451920509338, "learning_rate": 4.408901312648652e-06, "loss": 0.3295, "step": 300 }, { "epoch": 0.24701523260601072, "eval_loss": 0.3265990614891052, "eval_runtime": 38.4536, "eval_samples_per_second": 52.011, "eval_steps_per_second": 6.501, "step": 300 }, { "epoch": 0.2634829147797447, "grad_norm": 0.6466448307037354, "learning_rate": 4.3200445050048545e-06, "loss": 0.3322, "step": 320 }, { "epoch": 0.2799505969534788, "grad_norm": 0.6878635287284851, "learning_rate": 4.226002256561101e-06, "loss": 0.3336, "step": 340 }, { "epoch": 0.29641827912721286, "grad_norm": 0.6180429458618164, "learning_rate": 4.127042500607298e-06, "loss": 0.3287, "step": 360 }, { "epoch": 0.31288596130094687, "grad_norm": 0.6696975231170654, "learning_rate": 4.023447180774308e-06, "loss": 0.3208, "step": 380 }, { "epoch": 0.32935364347468093, "grad_norm": 0.6044342517852783, "learning_rate": 3.915511447755793e-06, "loss": 0.3162, "step": 400 }, { "epoch": 0.32935364347468093, "eval_loss": 0.3160727918148041, "eval_runtime": 38.4863, "eval_samples_per_second": 51.967, "eval_steps_per_second": 6.496, "step": 400 }, { "epoch": 0.345821325648415, "grad_norm": 0.6135950088500977, "learning_rate": 3.803542818402154e-06, "loss": 0.3167, "step": 420 }, { "epoch": 0.362289007822149, "grad_norm": 0.6029672026634216, "learning_rate": 3.687860299582381e-06, "loss": 0.3221, "step": 440 }, { "epoch": 0.3787566899958831, "grad_norm": 0.6770069003105164, "learning_rate": 3.568793479309998e-06, "loss": 0.319, "step": 460 }, { "epoch": 0.39522437216961714, "grad_norm": 0.6067743897438049, "learning_rate": 3.4466815877225456e-06, "loss": 0.3122, "step": 480 }, { "epoch": 0.41169205434335115, "grad_norm": 0.5844372510910034, "learning_rate": 3.3218725305899402e-06, "loss": 0.3143, "step": 500 }, { "epoch": 0.41169205434335115, "eval_loss": 0.3084495961666107, "eval_runtime": 38.5709, "eval_samples_per_second": 51.853, "eval_steps_per_second": 6.482, "step": 500 }, { "epoch": 0.4281597365170852, "grad_norm": 0.6176360249519348, "learning_rate": 3.194721898105323e-06, "loss": 0.3128, "step": 520 }, { "epoch": 0.4446274186908193, "grad_norm": 0.6112409830093384, "learning_rate": 3.0655919517824167e-06, "loss": 0.315, "step": 540 }, { "epoch": 0.4610951008645533, "grad_norm": 0.5909674763679504, "learning_rate": 2.9348505923458008e-06, "loss": 0.3179, "step": 560 }, { "epoch": 0.47756278303828736, "grad_norm": 0.5776228904724121, "learning_rate": 2.802870311554661e-06, "loss": 0.3109, "step": 580 }, { "epoch": 0.49403046521202143, "grad_norm": 0.5729939937591553, "learning_rate": 2.6700271309463397e-06, "loss": 0.3054, "step": 600 }, { "epoch": 0.49403046521202143, "eval_loss": 0.3028527796268463, "eval_runtime": 38.5353, "eval_samples_per_second": 51.9, "eval_steps_per_second": 6.488, "step": 600 }, { "epoch": 0.5104981473857555, "grad_norm": 0.6428977847099304, "learning_rate": 2.536699530523292e-06, "loss": 0.3051, "step": 620 }, { "epoch": 0.5269658295594895, "grad_norm": 0.652348518371582, "learning_rate": 2.4032673704356912e-06, "loss": 0.3006, "step": 640 }, { "epoch": 0.5434335117332235, "grad_norm": 0.665675699710846, "learning_rate": 2.2701108087318935e-06, "loss": 0.3068, "step": 660 }, { "epoch": 0.5599011939069576, "grad_norm": 0.568688154220581, "learning_rate": 2.1376092182601694e-06, "loss": 0.3079, "step": 680 }, { "epoch": 0.5763688760806917, "grad_norm": 0.6417534351348877, "learning_rate": 2.006140105807523e-06, "loss": 0.3031, "step": 700 }, { "epoch": 0.5763688760806917, "eval_loss": 0.29845306277275085, "eval_runtime": 38.6545, "eval_samples_per_second": 51.74, "eval_steps_per_second": 6.468, "step": 700 }, { "epoch": 0.5928365582544257, "grad_norm": 0.6650199294090271, "learning_rate": 1.8760780365550654e-06, "loss": 0.304, "step": 720 }, { "epoch": 0.6093042404281598, "grad_norm": 0.6189924478530884, "learning_rate": 1.7477935669142319e-06, "loss": 0.3034, "step": 740 }, { "epoch": 0.6257719226018937, "grad_norm": 0.64354008436203, "learning_rate": 1.6216521887842863e-06, "loss": 0.306, "step": 760 }, { "epoch": 0.6422396047756278, "grad_norm": 0.5726205110549927, "learning_rate": 1.4980132882389836e-06, "loss": 0.2985, "step": 780 }, { "epoch": 0.6587072869493619, "grad_norm": 0.6365756988525391, "learning_rate": 1.3772291216091954e-06, "loss": 0.2988, "step": 800 }, { "epoch": 0.6587072869493619, "eval_loss": 0.2953297197818756, "eval_runtime": 38.4039, "eval_samples_per_second": 52.078, "eval_steps_per_second": 6.51, "step": 800 }, { "epoch": 0.6751749691230959, "grad_norm": 0.6121465563774109, "learning_rate": 1.2596438118786732e-06, "loss": 0.3006, "step": 820 }, { "epoch": 0.69164265129683, "grad_norm": 0.601615309715271, "learning_rate": 1.1455923682523476e-06, "loss": 0.298, "step": 840 }, { "epoch": 0.7081103334705641, "grad_norm": 0.5637004971504211, "learning_rate": 1.0353997316904204e-06, "loss": 0.3021, "step": 860 }, { "epoch": 0.724578015644298, "grad_norm": 0.6115686893463135, "learning_rate": 9.293798491276612e-07, "loss": 0.3007, "step": 880 }, { "epoch": 0.7410456978180321, "grad_norm": 0.56075119972229, "learning_rate": 8.278347790154595e-07, "loss": 0.2965, "step": 900 }, { "epoch": 0.7410456978180321, "eval_loss": 0.2931554615497589, "eval_runtime": 38.421, "eval_samples_per_second": 52.055, "eval_steps_per_second": 6.507, "step": 900 }, { "epoch": 0.7575133799917662, "grad_norm": 0.5739229917526245, "learning_rate": 7.310538307350684e-07, "loss": 0.2949, "step": 920 }, { "epoch": 0.7739810621655002, "grad_norm": 0.6038621664047241, "learning_rate": 6.393127403338714e-07, "loss": 0.2999, "step": 940 }, { "epoch": 0.7904487443392343, "grad_norm": 0.6664382815361023, "learning_rate": 5.528728849330817e-07, "loss": 0.3014, "step": 960 }, { "epoch": 0.8069164265129684, "grad_norm": 0.5887951850891113, "learning_rate": 4.719805380450804e-07, "loss": 0.2979, "step": 980 }, { "epoch": 0.8233841086867023, "grad_norm": 0.584530234336853, "learning_rate": 3.9686616792204677e-07, "loss": 0.2935, "step": 1000 }, { "epoch": 0.8233841086867023, "eval_loss": 0.29177162051200867, "eval_runtime": 38.3967, "eval_samples_per_second": 52.088, "eval_steps_per_second": 6.511, "step": 1000 }, { "epoch": 0.8398517908604364, "grad_norm": 0.520577609539032, "learning_rate": 3.2774378093494067e-07, "loss": 0.2974, "step": 1020 }, { "epoch": 0.8563194730341704, "grad_norm": 0.60713130235672, "learning_rate": 2.6481031185358995e-07, "loss": 0.3022, "step": 1040 }, { "epoch": 0.8727871552079045, "grad_norm": 0.655536413192749, "learning_rate": 2.0824506276503898e-07, "loss": 0.2961, "step": 1060 }, { "epoch": 0.8892548373816386, "grad_norm": 0.5735917091369629, "learning_rate": 1.5820919222869325e-07, "loss": 0.2983, "step": 1080 }, { "epoch": 0.9057225195553725, "grad_norm": 0.5977383852005005, "learning_rate": 1.1484525612372372e-07, "loss": 0.2975, "step": 1100 }, { "epoch": 0.9057225195553725, "eval_loss": 0.2911008894443512, "eval_runtime": 38.7911, "eval_samples_per_second": 51.558, "eval_steps_per_second": 6.445, "step": 1100 }, { "epoch": 0.9221902017291066, "grad_norm": 0.602390468120575, "learning_rate": 7.827680149686879e-08, "loss": 0.299, "step": 1120 }, { "epoch": 0.9386578839028407, "grad_norm": 0.5963403582572937, "learning_rate": 4.86080145678014e-08, "loss": 0.2954, "step": 1140 }, { "epoch": 0.9551255660765747, "grad_norm": 0.5622375011444092, "learning_rate": 2.5923423894919365e-08, "loss": 0.2968, "step": 1160 }, { "epoch": 0.9715932482503088, "grad_norm": 0.5845054984092712, "learning_rate": 1.02876595472573e-08, "loss": 0.292, "step": 1180 }, { "epoch": 0.9880609304240429, "grad_norm": 0.6160269975662231, "learning_rate": 1.7452689686631164e-09, "loss": 0.304, "step": 1200 }, { "epoch": 0.9880609304240429, "eval_loss": 0.2909594774246216, "eval_runtime": 38.9593, "eval_samples_per_second": 51.336, "eval_steps_per_second": 6.417, "step": 1200 }, { "epoch": 0.9995883079456567, "step": 1214, "total_flos": 5.587165226987422e+18, "train_loss": 0.332974739875982, "train_runtime": 7444.239, "train_samples_per_second": 10.439, "train_steps_per_second": 0.163 } ], "logging_steps": 20, "max_steps": 1214, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.587165226987422e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }