| { | |
| "best_global_step": 383, | |
| "best_metric": 0.3038630783557892, | |
| "best_model_checkpoint": "./lora_qwen7b_cpp_abdiff_v1/checkpoint-383", | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 383, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02611818478615736, | |
| "grad_norm": 0.272135853767395, | |
| "learning_rate": 2.347826086956522e-06, | |
| "loss": 0.747, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05223636957231472, | |
| "grad_norm": 0.2994903028011322, | |
| "learning_rate": 4.956521739130435e-06, | |
| "loss": 0.729, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07835455435847209, | |
| "grad_norm": 0.3347424566745758, | |
| "learning_rate": 7.5652173913043475e-06, | |
| "loss": 0.7694, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.10447273914462944, | |
| "grad_norm": 0.24008171260356903, | |
| "learning_rate": 1.017391304347826e-05, | |
| "loss": 0.6931, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1305909239307868, | |
| "grad_norm": 0.31324484944343567, | |
| "learning_rate": 1.2782608695652173e-05, | |
| "loss": 0.6601, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.15670910871694418, | |
| "grad_norm": 0.3652394115924835, | |
| "learning_rate": 1.5391304347826088e-05, | |
| "loss": 0.4995, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.18282729350310153, | |
| "grad_norm": 0.18758858740329742, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.3874, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.20894547828925888, | |
| "grad_norm": 0.11186491698026657, | |
| "learning_rate": 2.0608695652173913e-05, | |
| "loss": 0.3579, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.23506366307541626, | |
| "grad_norm": 0.0821973979473114, | |
| "learning_rate": 2.3217391304347826e-05, | |
| "loss": 0.3375, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2611818478615736, | |
| "grad_norm": 0.10780682414770126, | |
| "learning_rate": 2.582608695652174e-05, | |
| "loss": 0.3286, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.28730003264773096, | |
| "grad_norm": 0.09552709758281708, | |
| "learning_rate": 2.8434782608695652e-05, | |
| "loss": 0.3135, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.31341821743388837, | |
| "grad_norm": 0.1099899634718895, | |
| "learning_rate": 2.988394584139265e-05, | |
| "loss": 0.2828, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3395364022200457, | |
| "grad_norm": 0.08481885492801666, | |
| "learning_rate": 2.9593810444874276e-05, | |
| "loss": 0.2884, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.36565458700620307, | |
| "grad_norm": 0.0872187465429306, | |
| "learning_rate": 2.93036750483559e-05, | |
| "loss": 0.2856, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3917727717923604, | |
| "grad_norm": 0.09415256232023239, | |
| "learning_rate": 2.9013539651837528e-05, | |
| "loss": 0.2958, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.41789095657851777, | |
| "grad_norm": 0.09529490023851395, | |
| "learning_rate": 2.872340425531915e-05, | |
| "loss": 0.2832, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4440091413646752, | |
| "grad_norm": 0.12695518136024475, | |
| "learning_rate": 2.8433268858800773e-05, | |
| "loss": 0.2852, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4701273261508325, | |
| "grad_norm": 0.09822621941566467, | |
| "learning_rate": 2.81431334622824e-05, | |
| "loss": 0.2647, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4962455109369899, | |
| "grad_norm": 0.11181768029928207, | |
| "learning_rate": 2.785299806576402e-05, | |
| "loss": 0.2718, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5223636957231472, | |
| "grad_norm": 0.0999976173043251, | |
| "learning_rate": 2.7562862669245647e-05, | |
| "loss": 0.287, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5484818805093046, | |
| "grad_norm": 0.11232498288154602, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 0.2657, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5746000652954619, | |
| "grad_norm": 0.09556713700294495, | |
| "learning_rate": 2.69825918762089e-05, | |
| "loss": 0.2728, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6007182500816193, | |
| "grad_norm": 0.09838665276765823, | |
| "learning_rate": 2.669245647969052e-05, | |
| "loss": 0.2679, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6268364348677767, | |
| "grad_norm": 0.12542720139026642, | |
| "learning_rate": 2.6402321083172148e-05, | |
| "loss": 0.2717, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6529546196539341, | |
| "grad_norm": 0.12866875529289246, | |
| "learning_rate": 2.6112185686653773e-05, | |
| "loss": 0.2722, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6790728044400914, | |
| "grad_norm": 0.12063395977020264, | |
| "learning_rate": 2.5822050290135396e-05, | |
| "loss": 0.2665, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.7051909892262488, | |
| "grad_norm": 0.12060956656932831, | |
| "learning_rate": 2.5531914893617022e-05, | |
| "loss": 0.2498, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7313091740124061, | |
| "grad_norm": 0.1269434541463852, | |
| "learning_rate": 2.5241779497098648e-05, | |
| "loss": 0.2685, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7574273587985635, | |
| "grad_norm": 0.13224200904369354, | |
| "learning_rate": 2.495164410058027e-05, | |
| "loss": 0.2562, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7835455435847208, | |
| "grad_norm": 0.12492657452821732, | |
| "learning_rate": 2.4661508704061896e-05, | |
| "loss": 0.2436, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8096637283708782, | |
| "grad_norm": 0.19281449913978577, | |
| "learning_rate": 2.4371373307543522e-05, | |
| "loss": 0.2593, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8357819131570355, | |
| "grad_norm": 0.13644647598266602, | |
| "learning_rate": 2.408123791102515e-05, | |
| "loss": 0.2678, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.861900097943193, | |
| "grad_norm": 0.13078241050243378, | |
| "learning_rate": 2.379110251450677e-05, | |
| "loss": 0.2586, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8880182827293504, | |
| "grad_norm": 0.16267353296279907, | |
| "learning_rate": 2.3500967117988397e-05, | |
| "loss": 0.2441, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9141364675155077, | |
| "grad_norm": 0.14218953251838684, | |
| "learning_rate": 2.321083172147002e-05, | |
| "loss": 0.2561, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.940254652301665, | |
| "grad_norm": 0.14463242888450623, | |
| "learning_rate": 2.2920696324951642e-05, | |
| "loss": 0.2437, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9663728370878224, | |
| "grad_norm": 0.15065905451774597, | |
| "learning_rate": 2.2630560928433268e-05, | |
| "loss": 0.2448, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9924910218739798, | |
| "grad_norm": 0.1440647393465042, | |
| "learning_rate": 2.2340425531914894e-05, | |
| "loss": 0.2407, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.3038630783557892, | |
| "eval_runtime": 46.9955, | |
| "eval_samples_per_second": 19.534, | |
| "eval_steps_per_second": 9.767, | |
| "step": 383 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1149, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.6853700492339405e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |