| { |
| "best_global_step": 100, |
| "best_metric": 0.4209205210208893, |
| "best_model_checkpoint": "results/Qwen2.5-3B-Instruct-SFT/checkpoint-100", |
| "epoch": 0.9941582580987786, |
| "eval_steps": 50, |
| "global_step": 117, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0, |
| "eval_loss": 1.0300904512405396, |
| "eval_num_tokens": 0.0, |
| "eval_runtime": 5.1949, |
| "eval_samples_per_second": 3.85, |
| "eval_steps_per_second": 1.925, |
| "step": 0 |
| }, |
| { |
| "epoch": 0, |
| "eval_reward_final_answer": 0.0, |
| "step": 0 |
| }, |
| { |
| "epoch": 0, |
| "eval_reward_response_format": 0.0, |
| "step": 0 |
| }, |
| { |
| "epoch": 0, |
| "eval_reward_interaction": 0.0, |
| "step": 0 |
| }, |
| { |
| "epoch": 0.008497079129049389, |
| "grad_norm": 2.155670404434204, |
| "learning_rate": 0.0, |
| "loss": 0.9504, |
| "num_tokens": 221701.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.016994158258098777, |
| "grad_norm": 2.175981044769287, |
| "learning_rate": 5e-06, |
| "loss": 0.9609, |
| "num_tokens": 443374.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.025491237387148168, |
| "grad_norm": 2.1068813800811768, |
| "learning_rate": 1e-05, |
| "loss": 0.9455, |
| "num_tokens": 667777.0, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.033988316516197555, |
| "grad_norm": 2.1638681888580322, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.9521, |
| "num_tokens": 887347.0, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.04248539564524695, |
| "grad_norm": 2.1004698276519775, |
| "learning_rate": 2e-05, |
| "loss": 0.9523, |
| "num_tokens": 1111957.0, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.050982474774296335, |
| "grad_norm": 2.0184059143066406, |
| "learning_rate": 1.9996582763224565e-05, |
| "loss": 0.9102, |
| "num_tokens": 1340874.0, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.05947955390334572, |
| "grad_norm": 2.0365166664123535, |
| "learning_rate": 1.9986333647899847e-05, |
| "loss": 0.9047, |
| "num_tokens": 1553400.0, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.06797663303239511, |
| "grad_norm": 1.7037795782089233, |
| "learning_rate": 1.9969260437060028e-05, |
| "loss": 0.8468, |
| "num_tokens": 1779751.0, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.07647371216144451, |
| "grad_norm": 1.5123393535614014, |
| "learning_rate": 1.9945376095861546e-05, |
| "loss": 0.818, |
| "num_tokens": 1996995.0, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0849707912904939, |
| "grad_norm": 1.132192611694336, |
| "learning_rate": 1.991469876173753e-05, |
| "loss": 0.7753, |
| "num_tokens": 2228097.0, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09346787041954328, |
| "grad_norm": 0.8642380237579346, |
| "learning_rate": 1.9877251730624504e-05, |
| "loss": 0.7495, |
| "num_tokens": 2451216.0, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.10196494954859267, |
| "grad_norm": 0.6592623591423035, |
| "learning_rate": 1.983306343927176e-05, |
| "loss": 0.7353, |
| "num_tokens": 2671449.0, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.11046202867764206, |
| "grad_norm": 0.5095056295394897, |
| "learning_rate": 1.978216744364692e-05, |
| "loss": 0.7199, |
| "num_tokens": 2887598.0, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.11895910780669144, |
| "grad_norm": 0.43245795369148254, |
| "learning_rate": 1.9724602393453976e-05, |
| "loss": 0.7074, |
| "num_tokens": 3102297.0, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.12745618693574085, |
| "grad_norm": 0.3649689853191376, |
| "learning_rate": 1.9660412002783254e-05, |
| "loss": 0.6916, |
| "num_tokens": 3322488.0, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.13595326606479022, |
| "grad_norm": 0.32840588688850403, |
| "learning_rate": 1.958964501691557e-05, |
| "loss": 0.6907, |
| "num_tokens": 3552428.0, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.14445034519383962, |
| "grad_norm": 0.30777254700660706, |
| "learning_rate": 1.9512355175305713e-05, |
| "loss": 0.6805, |
| "num_tokens": 3779989.0, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.15294742432288902, |
| "grad_norm": 0.2842954695224762, |
| "learning_rate": 1.9428601170773492e-05, |
| "loss": 0.6681, |
| "num_tokens": 4019860.0, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.1614445034519384, |
| "grad_norm": 0.28378936648368835, |
| "learning_rate": 1.9338446604933218e-05, |
| "loss": 0.6591, |
| "num_tokens": 4243770.0, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.1699415825809878, |
| "grad_norm": 0.2757631242275238, |
| "learning_rate": 1.9241959939895518e-05, |
| "loss": 0.6637, |
| "num_tokens": 4471904.0, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.17843866171003717, |
| "grad_norm": 0.2628677487373352, |
| "learning_rate": 1.91392144462782e-05, |
| "loss": 0.6455, |
| "num_tokens": 4707414.0, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.18693574083908657, |
| "grad_norm": 0.2680025100708008, |
| "learning_rate": 1.9030288147565547e-05, |
| "loss": 0.6415, |
| "num_tokens": 4934312.0, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.19543281996813594, |
| "grad_norm": 0.26622337102890015, |
| "learning_rate": 1.89152637608584e-05, |
| "loss": 0.635, |
| "num_tokens": 5154540.0, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.20392989909718534, |
| "grad_norm": 0.25727367401123047, |
| "learning_rate": 1.879422863405995e-05, |
| "loss": 0.6281, |
| "num_tokens": 5381802.0, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.21242697822623474, |
| "grad_norm": 0.2600906491279602, |
| "learning_rate": 1.8667274679544944e-05, |
| "loss": 0.6198, |
| "num_tokens": 5600149.0, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.22092405735528411, |
| "grad_norm": 0.24757230281829834, |
| "learning_rate": 1.8534498304362758e-05, |
| "loss": 0.6131, |
| "num_tokens": 5827367.0, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.22942113648433352, |
| "grad_norm": 0.2472054362297058, |
| "learning_rate": 1.8396000337027208e-05, |
| "loss": 0.6062, |
| "num_tokens": 6047147.0, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.2379182156133829, |
| "grad_norm": 0.23867134749889374, |
| "learning_rate": 1.8251885950948805e-05, |
| "loss": 0.5917, |
| "num_tokens": 6267160.0, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.2464152947424323, |
| "grad_norm": 0.23080606758594513, |
| "learning_rate": 1.8102264584567543e-05, |
| "loss": 0.588, |
| "num_tokens": 6501872.0, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.2549123738714817, |
| "grad_norm": 0.22549813985824585, |
| "learning_rate": 1.7947249858246888e-05, |
| "loss": 0.5868, |
| "num_tokens": 6739822.0, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2634094530005311, |
| "grad_norm": 0.22806097567081451, |
| "learning_rate": 1.7786959487992068e-05, |
| "loss": 0.5726, |
| "num_tokens": 6969353.0, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.27190653212958044, |
| "grad_norm": 0.23609699308872223, |
| "learning_rate": 1.7621515196058188e-05, |
| "loss": 0.5695, |
| "num_tokens": 7196669.0, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.28040361125862984, |
| "grad_norm": 0.2319885790348053, |
| "learning_rate": 1.7451042618516063e-05, |
| "loss": 0.5592, |
| "num_tokens": 7427812.0, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.28890069038767924, |
| "grad_norm": 0.2277105748653412, |
| "learning_rate": 1.727567120984596e-05, |
| "loss": 0.5453, |
| "num_tokens": 7664616.0, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.29739776951672864, |
| "grad_norm": 0.24143275618553162, |
| "learning_rate": 1.709553414463167e-05, |
| "loss": 0.5381, |
| "num_tokens": 7892289.0, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.30589484864577804, |
| "grad_norm": 0.23762071132659912, |
| "learning_rate": 1.6910768216429613e-05, |
| "loss": 0.5447, |
| "num_tokens": 8125714.0, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.3143919277748274, |
| "grad_norm": 0.2455436736345291, |
| "learning_rate": 1.6721513733889716e-05, |
| "loss": 0.5234, |
| "num_tokens": 8354957.0, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.3228890069038768, |
| "grad_norm": 0.25508517026901245, |
| "learning_rate": 1.6527914414207012e-05, |
| "loss": 0.5173, |
| "num_tokens": 8575672.0, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.3313860860329262, |
| "grad_norm": 0.256209135055542, |
| "learning_rate": 1.6330117273984822e-05, |
| "loss": 0.5232, |
| "num_tokens": 8795680.0, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.3398831651619756, |
| "grad_norm": 0.2554221451282501, |
| "learning_rate": 1.6128272517592397e-05, |
| "loss": 0.5019, |
| "num_tokens": 9017987.0, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.348380244291025, |
| "grad_norm": 0.24708497524261475, |
| "learning_rate": 1.5922533423101843e-05, |
| "loss": 0.4874, |
| "num_tokens": 9251233.0, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.35687732342007433, |
| "grad_norm": 0.2547791302204132, |
| "learning_rate": 1.5713056225890904e-05, |
| "loss": 0.4929, |
| "num_tokens": 9483146.0, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.36537440254912373, |
| "grad_norm": 0.26357918977737427, |
| "learning_rate": 1.55e-05, |
| "loss": 0.4735, |
| "num_tokens": 9707559.0, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.37387148167817313, |
| "grad_norm": 0.272932767868042, |
| "learning_rate": 1.5283526537333664e-05, |
| "loss": 0.4728, |
| "num_tokens": 9927096.0, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.38236856080722254, |
| "grad_norm": 0.2784619629383087, |
| "learning_rate": 1.5063800224798007e-05, |
| "loss": 0.4537, |
| "num_tokens": 10146036.0, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.3908656399362719, |
| "grad_norm": 0.28873109817504883, |
| "learning_rate": 1.4840987919467634e-05, |
| "loss": 0.4438, |
| "num_tokens": 10360134.0, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.3993627190653213, |
| "grad_norm": 0.2874409854412079, |
| "learning_rate": 1.4615258821876728e-05, |
| "loss": 0.4411, |
| "num_tokens": 10580887.0, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.4078597981943707, |
| "grad_norm": 0.28669029474258423, |
| "learning_rate": 1.4386784347530522e-05, |
| "loss": 0.4284, |
| "num_tokens": 10803846.0, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.4163568773234201, |
| "grad_norm": 0.28335171937942505, |
| "learning_rate": 1.4155737996734791e-05, |
| "loss": 0.4254, |
| "num_tokens": 11035018.0, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "grad_norm": 0.29582643508911133, |
| "learning_rate": 1.3922295222842153e-05, |
| "loss": 0.4183, |
| "num_tokens": 11259327.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "eval_loss": 0.44191795587539673, |
| "eval_num_tokens": 11259327.0, |
| "eval_runtime": 6.3535, |
| "eval_samples_per_second": 3.148, |
| "eval_steps_per_second": 1.574, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "eval_reward_final_answer": 0.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "eval_reward_response_format": 0.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "eval_reward_interaction": 0.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "eval_loss": 0.44191795587539673, |
| "eval_num_tokens": 0.0, |
| "eval_runtime": 5.1228, |
| "eval_samples_per_second": 3.904, |
| "eval_steps_per_second": 1.952, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "eval_reward_final_answer": 0.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "eval_reward_response_format": 0.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "eval_reward_interaction": 0.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "eval_diagnostic/format_valid_ratio": 0.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "eval_diagnostic/tool_parse_success_ratio": 0.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "eval_diagnostic/answer_attempted_ratio": 0.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "eval_diagnostic/avg_turns_taken": 1.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "eval_diagnostic/stop_reason/extraction_failed": 1.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.43335103558151883, |
| "grad_norm": 0.29690828919410706, |
| "learning_rate": 1.3686633299015254e-05, |
| "loss": 0.4135, |
| "num_tokens": 227214.0, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.44184811471056823, |
| "grad_norm": 0.3105921745300293, |
| "learning_rate": 1.3448931183608016e-05, |
| "loss": 0.4018, |
| "num_tokens": 448367.0, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.45034519383961763, |
| "grad_norm": 0.30604615807533264, |
| "learning_rate": 1.3209369384267194e-05, |
| "loss": 0.396, |
| "num_tokens": 675175.0, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.45884227296866703, |
| "grad_norm": 0.3152115046977997, |
| "learning_rate": 1.2968129820857384e-05, |
| "loss": 0.3767, |
| "num_tokens": 900784.0, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.46733935209771643, |
| "grad_norm": 0.3190907835960388, |
| "learning_rate": 1.2725395687313646e-05, |
| "loss": 0.3731, |
| "num_tokens": 1128575.0, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.4758364312267658, |
| "grad_norm": 0.3275493383407593, |
| "learning_rate": 1.2481351312526606e-05, |
| "loss": 0.3668, |
| "num_tokens": 1347830.0, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.4843335103558152, |
| "grad_norm": 0.30037736892700195, |
| "learning_rate": 1.2236182020365675e-05, |
| "loss": 0.368, |
| "num_tokens": 1584214.0, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.4928305894848646, |
| "grad_norm": 0.30363041162490845, |
| "learning_rate": 1.1990073988946716e-05, |
| "loss": 0.3584, |
| "num_tokens": 1806122.0, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.501327668613914, |
| "grad_norm": 0.27165499329566956, |
| "learning_rate": 1.1743214109250994e-05, |
| "loss": 0.3474, |
| "num_tokens": 2038933.0, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.5098247477429634, |
| "grad_norm": 0.26867446303367615, |
| "learning_rate": 1.1495789843202792e-05, |
| "loss": 0.3432, |
| "num_tokens": 2257233.0, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5183218268720128, |
| "grad_norm": 0.22571073472499847, |
| "learning_rate": 1.124798908131346e-05, |
| "loss": 0.3497, |
| "num_tokens": 2502206.0, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.5268189060010622, |
| "grad_norm": 0.22096557915210724, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 0.3357, |
| "num_tokens": 2733328.0, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.5353159851301115, |
| "grad_norm": 0.21603769063949585, |
| "learning_rate": 1.0752010918686544e-05, |
| "loss": 0.3323, |
| "num_tokens": 2958525.0, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.5438130642591609, |
| "grad_norm": 0.2027578502893448, |
| "learning_rate": 1.050421015679721e-05, |
| "loss": 0.3346, |
| "num_tokens": 3189000.0, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.5523101433882103, |
| "grad_norm": 0.20277895033359528, |
| "learning_rate": 1.0256785890749011e-05, |
| "loss": 0.3126, |
| "num_tokens": 3407547.0, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5608072225172597, |
| "grad_norm": 0.187447190284729, |
| "learning_rate": 1.0009926011053289e-05, |
| "loss": 0.3175, |
| "num_tokens": 3638073.0, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.5693043016463091, |
| "grad_norm": 0.18870113790035248, |
| "learning_rate": 9.763817979634327e-06, |
| "loss": 0.3053, |
| "num_tokens": 3857870.0, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.5778013807753585, |
| "grad_norm": 0.18314893543720245, |
| "learning_rate": 9.518648687473396e-06, |
| "loss": 0.3087, |
| "num_tokens": 4081907.0, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.5862984599044079, |
| "grad_norm": 0.17281264066696167, |
| "learning_rate": 9.274604312686356e-06, |
| "loss": 0.3167, |
| "num_tokens": 4318189.0, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.5947955390334573, |
| "grad_norm": 0.1761389523744583, |
| "learning_rate": 9.031870179142619e-06, |
| "loss": 0.2964, |
| "num_tokens": 4542756.0, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6032926181625067, |
| "grad_norm": 0.17463359236717224, |
| "learning_rate": 8.790630615732809e-06, |
| "loss": 0.2915, |
| "num_tokens": 4764901.0, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.6117896972915561, |
| "grad_norm": 0.1684163361787796, |
| "learning_rate": 8.551068816391984e-06, |
| "loss": 0.292, |
| "num_tokens": 4989831.0, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.6202867764206054, |
| "grad_norm": 0.17157095670700073, |
| "learning_rate": 8.313366700984753e-06, |
| "loss": 0.2779, |
| "num_tokens": 5202719.0, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.6287838555496548, |
| "grad_norm": 0.16972655057907104, |
| "learning_rate": 8.07770477715785e-06, |
| "loss": 0.285, |
| "num_tokens": 5424906.0, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.6372809346787042, |
| "grad_norm": 0.16237466037273407, |
| "learning_rate": 7.844262003265214e-06, |
| "loss": 0.2903, |
| "num_tokens": 5653610.0, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.6457780138077536, |
| "grad_norm": 0.1564113050699234, |
| "learning_rate": 7.613215652469481e-06, |
| "loss": 0.298, |
| "num_tokens": 5886275.0, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.654275092936803, |
| "grad_norm": 0.1518831104040146, |
| "learning_rate": 7.384741178123278e-06, |
| "loss": 0.29, |
| "num_tokens": 6127331.0, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.6627721720658524, |
| "grad_norm": 0.1514195203781128, |
| "learning_rate": 7.159012080532368e-06, |
| "loss": 0.2808, |
| "num_tokens": 6350243.0, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.6712692511949018, |
| "grad_norm": 0.14839230477809906, |
| "learning_rate": 6.936199775201998e-06, |
| "loss": 0.2759, |
| "num_tokens": 6574494.0, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.6797663303239512, |
| "grad_norm": 0.1434764415025711, |
| "learning_rate": 6.7164734626663384e-06, |
| "loss": 0.2812, |
| "num_tokens": 6805646.0, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6882634094530006, |
| "grad_norm": 0.14427417516708374, |
| "learning_rate": 6.500000000000003e-06, |
| "loss": 0.2708, |
| "num_tokens": 7029262.0, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.69676048858205, |
| "grad_norm": 0.14127525687217712, |
| "learning_rate": 6.2869437741091e-06, |
| "loss": 0.2813, |
| "num_tokens": 7257816.0, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.7052575677110993, |
| "grad_norm": 0.1428772211074829, |
| "learning_rate": 6.077466576898161e-06, |
| "loss": 0.2711, |
| "num_tokens": 7481399.0, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.7137546468401487, |
| "grad_norm": 0.14086274802684784, |
| "learning_rate": 5.871727482407605e-06, |
| "loss": 0.2679, |
| "num_tokens": 7703455.0, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.7222517259691981, |
| "grad_norm": 0.13953223824501038, |
| "learning_rate": 5.669882726015181e-06, |
| "loss": 0.2692, |
| "num_tokens": 7924077.0, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.7307488050982475, |
| "grad_norm": 0.14064429700374603, |
| "learning_rate": 5.47208558579299e-06, |
| "loss": 0.259, |
| "num_tokens": 8143604.0, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.7392458842272969, |
| "grad_norm": 0.13626375794410706, |
| "learning_rate": 5.27848626611029e-06, |
| "loss": 0.2674, |
| "num_tokens": 8370430.0, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.7477429633563463, |
| "grad_norm": 0.13688842952251434, |
| "learning_rate": 5.089231783570392e-06, |
| "loss": 0.2763, |
| "num_tokens": 8603383.0, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.7562400424853957, |
| "grad_norm": 0.13918966054916382, |
| "learning_rate": 4.904465855368333e-06, |
| "loss": 0.2742, |
| "num_tokens": 8830677.0, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.7647371216144451, |
| "grad_norm": 0.14204055070877075, |
| "learning_rate": 4.724328790154042e-06, |
| "loss": 0.2525, |
| "num_tokens": 9049384.0, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.7732342007434945, |
| "grad_norm": 0.13645566999912262, |
| "learning_rate": 4.548957381483941e-06, |
| "loss": 0.2752, |
| "num_tokens": 9283178.0, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.7817312798725438, |
| "grad_norm": 0.14060115814208984, |
| "learning_rate": 4.378484803941816e-06, |
| "loss": 0.2561, |
| "num_tokens": 9507128.0, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.7902283590015932, |
| "grad_norm": 0.140924334526062, |
| "learning_rate": 4.2130405120079356e-06, |
| "loss": 0.2615, |
| "num_tokens": 9733410.0, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.7987254381306426, |
| "grad_norm": 0.1394304782152176, |
| "learning_rate": 4.052750141753112e-06, |
| "loss": 0.2616, |
| "num_tokens": 9960712.0, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.807222517259692, |
| "grad_norm": 0.1327814757823944, |
| "learning_rate": 3.89773541543246e-06, |
| "loss": 0.2618, |
| "num_tokens": 10189564.0, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.8157195963887414, |
| "grad_norm": 0.13691458106040955, |
| "learning_rate": 3.748114049051197e-06, |
| "loss": 0.2528, |
| "num_tokens": 10412249.0, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.8242166755177908, |
| "grad_norm": 0.13488516211509705, |
| "learning_rate": 3.603999662972795e-06, |
| "loss": 0.2524, |
| "num_tokens": 10633903.0, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.8327137546468402, |
| "grad_norm": 0.12742717564105988, |
| "learning_rate": 3.4655016956372432e-06, |
| "loss": 0.2646, |
| "num_tokens": 10864574.0, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.8412108337758896, |
| "grad_norm": 0.12795040011405945, |
| "learning_rate": 3.332725320455058e-06, |
| "loss": 0.2442, |
| "num_tokens": 11083990.0, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.849707912904939, |
| "grad_norm": 0.12140467017889023, |
| "learning_rate": 3.205771365940052e-06, |
| "loss": 0.2523, |
| "num_tokens": 11315846.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.849707912904939, |
| "eval_loss": 0.4209205210208893, |
| "eval_num_tokens": 11315846.0, |
| "eval_runtime": 4.5682, |
| "eval_samples_per_second": 4.378, |
| "eval_steps_per_second": 2.189, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.849707912904939, |
| "eval_reward_final_answer": 0.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.849707912904939, |
| "eval_reward_response_format": 0.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.849707912904939, |
| "eval_reward_interaction": 0.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.849707912904939, |
| "eval_diagnostic/format_valid_ratio": 0.1, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.849707912904939, |
| "eval_diagnostic/tool_parse_success_ratio": 0.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.849707912904939, |
| "eval_diagnostic/answer_attempted_ratio": 0.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.849707912904939, |
| "eval_diagnostic/avg_turns_taken": 1.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.849707912904939, |
| "eval_diagnostic/stop_reason/extraction_failed": 0.9, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.849707912904939, |
| "eval_diagnostic/stop_reason/no_action": 0.1, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.8582049920339884, |
| "grad_norm": 0.12342186272144318, |
| "learning_rate": 3.0847362391415995e-06, |
| "loss": 0.2586, |
| "num_tokens": 11540477.0, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.8667020711630377, |
| "grad_norm": 0.1200730949640274, |
| "learning_rate": 2.969711852434454e-06, |
| "loss": 0.2604, |
| "num_tokens": 11771582.0, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.8751991502920871, |
| "grad_norm": 0.12449000030755997, |
| "learning_rate": 2.860785553721803e-06, |
| "loss": 0.2453, |
| "num_tokens": 11992209.0, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.8836962294211365, |
| "grad_norm": 0.11838784068822861, |
| "learning_rate": 2.7580400601044825e-06, |
| "loss": 0.2523, |
| "num_tokens": 12214354.0, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.8921933085501859, |
| "grad_norm": 0.11962386220693588, |
| "learning_rate": 2.6615533950667844e-06, |
| "loss": 0.2456, |
| "num_tokens": 12433134.0, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.9006903876792353, |
| "grad_norm": 0.11446674913167953, |
| "learning_rate": 2.5713988292265084e-06, |
| "loss": 0.2656, |
| "num_tokens": 12672477.0, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.9091874668082847, |
| "grad_norm": 0.1163739487528801, |
| "learning_rate": 2.4876448246942884e-06, |
| "loss": 0.2462, |
| "num_tokens": 12894135.0, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.9176845459373341, |
| "grad_norm": 0.11075320094823837, |
| "learning_rate": 2.4103549830844328e-06, |
| "loss": 0.2641, |
| "num_tokens": 13129073.0, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.9261816250663835, |
| "grad_norm": 0.11537513881921768, |
| "learning_rate": 2.3395879972167464e-06, |
| "loss": 0.2474, |
| "num_tokens": 13348255.0, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.9346787041954329, |
| "grad_norm": 0.10961325466632843, |
| "learning_rate": 2.275397606546027e-06, |
| "loss": 0.2697, |
| "num_tokens": 13592454.0, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.9431757833244823, |
| "grad_norm": 0.11164919286966324, |
| "learning_rate": 2.21783255635308e-06, |
| "loss": 0.2552, |
| "num_tokens": 13819727.0, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.9516728624535316, |
| "grad_norm": 0.10987438261508942, |
| "learning_rate": 2.1669365607282396e-06, |
| "loss": 0.2548, |
| "num_tokens": 14051729.0, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.960169941582581, |
| "grad_norm": 0.11316878348588943, |
| "learning_rate": 2.1227482693754995e-06, |
| "loss": 0.2492, |
| "num_tokens": 14274432.0, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.9686670207116304, |
| "grad_norm": 0.10757338255643845, |
| "learning_rate": 2.085301238262471e-06, |
| "loss": 0.2531, |
| "num_tokens": 14498986.0, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.9771640998406798, |
| "grad_norm": 0.10984344780445099, |
| "learning_rate": 2.054623904138455e-06, |
| "loss": 0.2535, |
| "num_tokens": 14724914.0, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.9856611789697292, |
| "grad_norm": 0.10751090943813324, |
| "learning_rate": 2.0307395629399716e-06, |
| "loss": 0.2517, |
| "num_tokens": 14957559.0, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.9941582580987786, |
| "grad_norm": 0.10235580801963806, |
| "learning_rate": 2.0136663521001547e-06, |
| "loss": 0.2592, |
| "num_tokens": 15193005.0, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.9941582580987786, |
| "step": 117, |
| "total_flos": 5.5251981578153165e+17, |
| "train_loss": 0.16806216436064142, |
| "train_runtime": 6592.4366, |
| "train_samples_per_second": 1.142, |
| "train_steps_per_second": 0.018 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 118, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.5251981578153165e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|