| { |
| "best_global_step": 383, |
| "best_metric": 0.3293873369693756, |
| "best_model_checkpoint": "./lora_qwen32b_cpp_abdiff_v1/checkpoint-383", |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 383, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02611818478615736, |
| "grad_norm": 0.6525485515594482, |
| "learning_rate": 2.347826086956522e-06, |
| "loss": 1.5822, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05223636957231472, |
| "grad_norm": 0.832341730594635, |
| "learning_rate": 4.956521739130435e-06, |
| "loss": 1.5516, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07835455435847209, |
| "grad_norm": 0.9468288421630859, |
| "learning_rate": 7.5652173913043475e-06, |
| "loss": 1.5544, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10447273914462944, |
| "grad_norm": 0.6425743103027344, |
| "learning_rate": 1.017391304347826e-05, |
| "loss": 1.3415, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1305909239307868, |
| "grad_norm": 0.6995680332183838, |
| "learning_rate": 1.2782608695652173e-05, |
| "loss": 1.2298, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.15670910871694418, |
| "grad_norm": 0.559788703918457, |
| "learning_rate": 1.5391304347826088e-05, |
| "loss": 1.0121, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.18282729350310153, |
| "grad_norm": 0.565027117729187, |
| "learning_rate": 1.8e-05, |
| "loss": 0.7633, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.20894547828925888, |
| "grad_norm": 0.2943669557571411, |
| "learning_rate": 2.0608695652173913e-05, |
| "loss": 0.6203, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.23506366307541626, |
| "grad_norm": 0.2539921998977661, |
| "learning_rate": 2.3217391304347826e-05, |
| "loss": 0.5356, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2611818478615736, |
| "grad_norm": 0.32755884528160095, |
| "learning_rate": 2.582608695652174e-05, |
| "loss": 0.5063, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.28730003264773096, |
| "grad_norm": 0.2826889753341675, |
| "learning_rate": 2.8434782608695652e-05, |
| "loss": 0.4491, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.31341821743388837, |
| "grad_norm": 0.26905569434165955, |
| "learning_rate": 2.988394584139265e-05, |
| "loss": 0.3921, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.3395364022200457, |
| "grad_norm": 0.23731912672519684, |
| "learning_rate": 2.9593810444874276e-05, |
| "loss": 0.3865, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.36565458700620307, |
| "grad_norm": 0.34480321407318115, |
| "learning_rate": 2.93036750483559e-05, |
| "loss": 0.37, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3917727717923604, |
| "grad_norm": 0.33365756273269653, |
| "learning_rate": 2.9013539651837528e-05, |
| "loss": 0.3918, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.41789095657851777, |
| "grad_norm": 0.24923010170459747, |
| "learning_rate": 2.872340425531915e-05, |
| "loss": 0.3652, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4440091413646752, |
| "grad_norm": 0.2503843605518341, |
| "learning_rate": 2.8433268858800773e-05, |
| "loss": 0.3506, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.4701273261508325, |
| "grad_norm": 0.2814203202724457, |
| "learning_rate": 2.81431334622824e-05, |
| "loss": 0.3173, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4962455109369899, |
| "grad_norm": 0.22126232087612152, |
| "learning_rate": 2.785299806576402e-05, |
| "loss": 0.3291, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5223636957231472, |
| "grad_norm": 0.3136242926120758, |
| "learning_rate": 2.7562862669245647e-05, |
| "loss": 0.3456, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5484818805093046, |
| "grad_norm": 0.26128554344177246, |
| "learning_rate": 2.7272727272727273e-05, |
| "loss": 0.2983, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5746000652954619, |
| "grad_norm": 0.21475030481815338, |
| "learning_rate": 2.69825918762089e-05, |
| "loss": 0.3252, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6007182500816193, |
| "grad_norm": 0.20808374881744385, |
| "learning_rate": 2.669245647969052e-05, |
| "loss": 0.3057, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6268364348677767, |
| "grad_norm": 0.3940744698047638, |
| "learning_rate": 2.6402321083172148e-05, |
| "loss": 0.3157, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6529546196539341, |
| "grad_norm": 0.28545427322387695, |
| "learning_rate": 2.6112185686653773e-05, |
| "loss": 0.3109, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6790728044400914, |
| "grad_norm": 0.2335038036108017, |
| "learning_rate": 2.5822050290135396e-05, |
| "loss": 0.3023, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7051909892262488, |
| "grad_norm": 0.23341821134090424, |
| "learning_rate": 2.5531914893617022e-05, |
| "loss": 0.2742, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7313091740124061, |
| "grad_norm": 0.2653687000274658, |
| "learning_rate": 2.5241779497098648e-05, |
| "loss": 0.2947, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7574273587985635, |
| "grad_norm": 0.21947993338108063, |
| "learning_rate": 2.495164410058027e-05, |
| "loss": 0.2753, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7835455435847208, |
| "grad_norm": 0.2334894835948944, |
| "learning_rate": 2.4661508704061896e-05, |
| "loss": 0.2558, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8096637283708782, |
| "grad_norm": 0.2747754752635956, |
| "learning_rate": 2.4371373307543522e-05, |
| "loss": 0.2852, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8357819131570355, |
| "grad_norm": 0.2383272498846054, |
| "learning_rate": 2.408123791102515e-05, |
| "loss": 0.3035, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.861900097943193, |
| "grad_norm": 0.2756940722465515, |
| "learning_rate": 2.379110251450677e-05, |
| "loss": 0.2706, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8880182827293504, |
| "grad_norm": 0.4882807731628418, |
| "learning_rate": 2.3500967117988397e-05, |
| "loss": 0.2538, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9141364675155077, |
| "grad_norm": 0.2678774893283844, |
| "learning_rate": 2.321083172147002e-05, |
| "loss": 0.2715, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.940254652301665, |
| "grad_norm": 0.2795256972312927, |
| "learning_rate": 2.2920696324951642e-05, |
| "loss": 0.2563, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9663728370878224, |
| "grad_norm": 0.29648348689079285, |
| "learning_rate": 2.2630560928433268e-05, |
| "loss": 0.2649, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9924910218739798, |
| "grad_norm": 0.2755793333053589, |
| "learning_rate": 2.2340425531914894e-05, |
| "loss": 0.2495, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.3293873369693756, |
| "eval_runtime": 167.8172, |
| "eval_samples_per_second": 5.47, |
| "eval_steps_per_second": 2.735, |
| "step": 383 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1149, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.667970512056578e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|