| { | |
| "best_metric": 0.2909594774246216, | |
| "best_model_checkpoint": "/data2/ckpts/GenRM/qwen-2.5-math-instruct/full/GenPRM-78k-train-5:5-decontamination/checkpoint-1200", | |
| "epoch": 0.9995883079456567, | |
| "eval_steps": 100, | |
| "global_step": 1214, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.016467682173734045, | |
| "grad_norm": 2.2536449432373047, | |
| "learning_rate": 2.702702702702703e-06, | |
| "loss": 0.9706, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03293536434746809, | |
| "grad_norm": 0.7532587051391602, | |
| "learning_rate": 4.999919851200522e-06, | |
| "loss": 0.5433, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04940304652120214, | |
| "grad_norm": 0.6202924847602844, | |
| "learning_rate": 4.995290485881111e-06, | |
| "loss": 0.4319, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06587072869493618, | |
| "grad_norm": 0.5724588632583618, | |
| "learning_rate": 4.983551854245604e-06, | |
| "loss": 0.3985, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08233841086867023, | |
| "grad_norm": 0.6023426651954651, | |
| "learning_rate": 4.9647374005198125e-06, | |
| "loss": 0.3951, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08233841086867023, | |
| "eval_loss": 0.37706664204597473, | |
| "eval_runtime": 38.4153, | |
| "eval_samples_per_second": 52.063, | |
| "eval_steps_per_second": 6.508, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09880609304240429, | |
| "grad_norm": 0.6422224640846252, | |
| "learning_rate": 4.938900728467664e-06, | |
| "loss": 0.381, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.11527377521613832, | |
| "grad_norm": 0.5755812525749207, | |
| "learning_rate": 4.9061154486701204e-06, | |
| "loss": 0.3649, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.13174145738987236, | |
| "grad_norm": 0.5953928232192993, | |
| "learning_rate": 4.866474968803222e-06, | |
| "loss": 0.3605, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.14820913956360643, | |
| "grad_norm": 0.608504593372345, | |
| "learning_rate": 4.820092227512736e-06, | |
| "loss": 0.3514, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.16467682173734047, | |
| "grad_norm": 0.6068744659423828, | |
| "learning_rate": 4.767099372643641e-06, | |
| "loss": 0.3471, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16467682173734047, | |
| "eval_loss": 0.3430534303188324, | |
| "eval_runtime": 38.4427, | |
| "eval_samples_per_second": 52.026, | |
| "eval_steps_per_second": 6.503, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1811445039110745, | |
| "grad_norm": 0.6543622612953186, | |
| "learning_rate": 4.707647384741187e-06, | |
| "loss": 0.3494, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.19761218608480857, | |
| "grad_norm": 0.6283196210861206, | |
| "learning_rate": 4.641905646896205e-06, | |
| "loss": 0.3382, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2140798682585426, | |
| "grad_norm": 0.6050875186920166, | |
| "learning_rate": 4.570061462160209e-06, | |
| "loss": 0.3382, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.23054755043227665, | |
| "grad_norm": 0.6354455947875977, | |
| "learning_rate": 4.492319519905217e-06, | |
| "loss": 0.3319, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.24701523260601072, | |
| "grad_norm": 0.5852451920509338, | |
| "learning_rate": 4.408901312648652e-06, | |
| "loss": 0.3295, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.24701523260601072, | |
| "eval_loss": 0.3265990614891052, | |
| "eval_runtime": 38.4536, | |
| "eval_samples_per_second": 52.011, | |
| "eval_steps_per_second": 6.501, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2634829147797447, | |
| "grad_norm": 0.6466448307037354, | |
| "learning_rate": 4.3200445050048545e-06, | |
| "loss": 0.3322, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2799505969534788, | |
| "grad_norm": 0.6878635287284851, | |
| "learning_rate": 4.226002256561101e-06, | |
| "loss": 0.3336, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.29641827912721286, | |
| "grad_norm": 0.6180429458618164, | |
| "learning_rate": 4.127042500607298e-06, | |
| "loss": 0.3287, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.31288596130094687, | |
| "grad_norm": 0.6696975231170654, | |
| "learning_rate": 4.023447180774308e-06, | |
| "loss": 0.3208, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.32935364347468093, | |
| "grad_norm": 0.6044342517852783, | |
| "learning_rate": 3.915511447755793e-06, | |
| "loss": 0.3162, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.32935364347468093, | |
| "eval_loss": 0.3160727918148041, | |
| "eval_runtime": 38.4863, | |
| "eval_samples_per_second": 51.967, | |
| "eval_steps_per_second": 6.496, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.345821325648415, | |
| "grad_norm": 0.6135950088500977, | |
| "learning_rate": 3.803542818402154e-06, | |
| "loss": 0.3167, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.362289007822149, | |
| "grad_norm": 0.6029672026634216, | |
| "learning_rate": 3.687860299582381e-06, | |
| "loss": 0.3221, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.3787566899958831, | |
| "grad_norm": 0.6770069003105164, | |
| "learning_rate": 3.568793479309998e-06, | |
| "loss": 0.319, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.39522437216961714, | |
| "grad_norm": 0.6067743897438049, | |
| "learning_rate": 3.4466815877225456e-06, | |
| "loss": 0.3122, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.41169205434335115, | |
| "grad_norm": 0.5844372510910034, | |
| "learning_rate": 3.3218725305899402e-06, | |
| "loss": 0.3143, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.41169205434335115, | |
| "eval_loss": 0.3084495961666107, | |
| "eval_runtime": 38.5709, | |
| "eval_samples_per_second": 51.853, | |
| "eval_steps_per_second": 6.482, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4281597365170852, | |
| "grad_norm": 0.6176360249519348, | |
| "learning_rate": 3.194721898105323e-06, | |
| "loss": 0.3128, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.4446274186908193, | |
| "grad_norm": 0.6112409830093384, | |
| "learning_rate": 3.0655919517824167e-06, | |
| "loss": 0.315, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.4610951008645533, | |
| "grad_norm": 0.5909674763679504, | |
| "learning_rate": 2.9348505923458008e-06, | |
| "loss": 0.3179, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.47756278303828736, | |
| "grad_norm": 0.5776228904724121, | |
| "learning_rate": 2.802870311554661e-06, | |
| "loss": 0.3109, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.49403046521202143, | |
| "grad_norm": 0.5729939937591553, | |
| "learning_rate": 2.6700271309463397e-06, | |
| "loss": 0.3054, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.49403046521202143, | |
| "eval_loss": 0.3028527796268463, | |
| "eval_runtime": 38.5353, | |
| "eval_samples_per_second": 51.9, | |
| "eval_steps_per_second": 6.488, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5104981473857555, | |
| "grad_norm": 0.6428977847099304, | |
| "learning_rate": 2.536699530523292e-06, | |
| "loss": 0.3051, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5269658295594895, | |
| "grad_norm": 0.652348518371582, | |
| "learning_rate": 2.4032673704356912e-06, | |
| "loss": 0.3006, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5434335117332235, | |
| "grad_norm": 0.665675699710846, | |
| "learning_rate": 2.2701108087318935e-06, | |
| "loss": 0.3068, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5599011939069576, | |
| "grad_norm": 0.568688154220581, | |
| "learning_rate": 2.1376092182601694e-06, | |
| "loss": 0.3079, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5763688760806917, | |
| "grad_norm": 0.6417534351348877, | |
| "learning_rate": 2.006140105807523e-06, | |
| "loss": 0.3031, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5763688760806917, | |
| "eval_loss": 0.29845306277275085, | |
| "eval_runtime": 38.6545, | |
| "eval_samples_per_second": 51.74, | |
| "eval_steps_per_second": 6.468, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5928365582544257, | |
| "grad_norm": 0.6650199294090271, | |
| "learning_rate": 1.8760780365550654e-06, | |
| "loss": 0.304, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6093042404281598, | |
| "grad_norm": 0.6189924478530884, | |
| "learning_rate": 1.7477935669142319e-06, | |
| "loss": 0.3034, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.6257719226018937, | |
| "grad_norm": 0.64354008436203, | |
| "learning_rate": 1.6216521887842863e-06, | |
| "loss": 0.306, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.6422396047756278, | |
| "grad_norm": 0.5726205110549927, | |
| "learning_rate": 1.4980132882389836e-06, | |
| "loss": 0.2985, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.6587072869493619, | |
| "grad_norm": 0.6365756988525391, | |
| "learning_rate": 1.3772291216091954e-06, | |
| "loss": 0.2988, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6587072869493619, | |
| "eval_loss": 0.2953297197818756, | |
| "eval_runtime": 38.4039, | |
| "eval_samples_per_second": 52.078, | |
| "eval_steps_per_second": 6.51, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6751749691230959, | |
| "grad_norm": 0.6121465563774109, | |
| "learning_rate": 1.2596438118786732e-06, | |
| "loss": 0.3006, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.69164265129683, | |
| "grad_norm": 0.601615309715271, | |
| "learning_rate": 1.1455923682523476e-06, | |
| "loss": 0.298, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.7081103334705641, | |
| "grad_norm": 0.5637004971504211, | |
| "learning_rate": 1.0353997316904204e-06, | |
| "loss": 0.3021, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.724578015644298, | |
| "grad_norm": 0.6115686893463135, | |
| "learning_rate": 9.293798491276612e-07, | |
| "loss": 0.3007, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.7410456978180321, | |
| "grad_norm": 0.56075119972229, | |
| "learning_rate": 8.278347790154595e-07, | |
| "loss": 0.2965, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7410456978180321, | |
| "eval_loss": 0.2931554615497589, | |
| "eval_runtime": 38.421, | |
| "eval_samples_per_second": 52.055, | |
| "eval_steps_per_second": 6.507, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7575133799917662, | |
| "grad_norm": 0.5739229917526245, | |
| "learning_rate": 7.310538307350684e-07, | |
| "loss": 0.2949, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.7739810621655002, | |
| "grad_norm": 0.6038621664047241, | |
| "learning_rate": 6.393127403338714e-07, | |
| "loss": 0.2999, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.7904487443392343, | |
| "grad_norm": 0.6664382815361023, | |
| "learning_rate": 5.528728849330817e-07, | |
| "loss": 0.3014, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.8069164265129684, | |
| "grad_norm": 0.5887951850891113, | |
| "learning_rate": 4.719805380450804e-07, | |
| "loss": 0.2979, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.8233841086867023, | |
| "grad_norm": 0.584530234336853, | |
| "learning_rate": 3.9686616792204677e-07, | |
| "loss": 0.2935, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8233841086867023, | |
| "eval_loss": 0.29177162051200867, | |
| "eval_runtime": 38.3967, | |
| "eval_samples_per_second": 52.088, | |
| "eval_steps_per_second": 6.511, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8398517908604364, | |
| "grad_norm": 0.520577609539032, | |
| "learning_rate": 3.2774378093494067e-07, | |
| "loss": 0.2974, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.8563194730341704, | |
| "grad_norm": 0.60713130235672, | |
| "learning_rate": 2.6481031185358995e-07, | |
| "loss": 0.3022, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.8727871552079045, | |
| "grad_norm": 0.655536413192749, | |
| "learning_rate": 2.0824506276503898e-07, | |
| "loss": 0.2961, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.8892548373816386, | |
| "grad_norm": 0.5735917091369629, | |
| "learning_rate": 1.5820919222869325e-07, | |
| "loss": 0.2983, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.9057225195553725, | |
| "grad_norm": 0.5977383852005005, | |
| "learning_rate": 1.1484525612372372e-07, | |
| "loss": 0.2975, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9057225195553725, | |
| "eval_loss": 0.2911008894443512, | |
| "eval_runtime": 38.7911, | |
| "eval_samples_per_second": 51.558, | |
| "eval_steps_per_second": 6.445, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9221902017291066, | |
| "grad_norm": 0.602390468120575, | |
| "learning_rate": 7.827680149686879e-08, | |
| "loss": 0.299, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.9386578839028407, | |
| "grad_norm": 0.5963403582572937, | |
| "learning_rate": 4.86080145678014e-08, | |
| "loss": 0.2954, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.9551255660765747, | |
| "grad_norm": 0.5622375011444092, | |
| "learning_rate": 2.5923423894919365e-08, | |
| "loss": 0.2968, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.9715932482503088, | |
| "grad_norm": 0.5845054984092712, | |
| "learning_rate": 1.02876595472573e-08, | |
| "loss": 0.292, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.9880609304240429, | |
| "grad_norm": 0.6160269975662231, | |
| "learning_rate": 1.7452689686631164e-09, | |
| "loss": 0.304, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9880609304240429, | |
| "eval_loss": 0.2909594774246216, | |
| "eval_runtime": 38.9593, | |
| "eval_samples_per_second": 51.336, | |
| "eval_steps_per_second": 6.417, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9995883079456567, | |
| "step": 1214, | |
| "total_flos": 5.587165226987422e+18, | |
| "train_loss": 0.332974739875982, | |
| "train_runtime": 7444.239, | |
| "train_samples_per_second": 10.439, | |
| "train_steps_per_second": 0.163 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 1214, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.587165226987422e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |