{ "best_global_step": 2075, "best_metric": 0.5185689926147461, "best_model_checkpoint": "./mcqa_qwen3_letter_alex/checkpoint-2075", "epoch": 1.0, "eval_steps": 500, "global_step": 2075, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.024102193299590263, "grad_norm": 70.03743743896484, "learning_rate": 2.6506024096385546e-07, "loss": 0.8067, "step": 50 }, { "epoch": 0.048204386599180526, "grad_norm": 58.46215057373047, "learning_rate": 5.662650602409639e-07, "loss": 0.6862, "step": 100 }, { "epoch": 0.07230657989877079, "grad_norm": 81.54195404052734, "learning_rate": 8.674698795180723e-07, "loss": 0.6603, "step": 150 }, { "epoch": 0.09640877319836105, "grad_norm": 100.12779235839844, "learning_rate": 1.1686746987951808e-06, "loss": 0.604, "step": 200 }, { "epoch": 0.12051096649795132, "grad_norm": 74.84662628173828, "learning_rate": 1.4698795180722893e-06, "loss": 0.6487, "step": 250 }, { "epoch": 0.14461315979754158, "grad_norm": 167.1442108154297, "learning_rate": 1.7710843373493978e-06, "loss": 0.5321, "step": 300 }, { "epoch": 0.16871535309713184, "grad_norm": 60.8509407043457, "learning_rate": 2.0722891566265063e-06, "loss": 0.5429, "step": 350 }, { "epoch": 0.1928175463967221, "grad_norm": 89.475830078125, "learning_rate": 2.373493975903615e-06, "loss": 0.6072, "step": 400 }, { "epoch": 0.21691973969631237, "grad_norm": 64.44173431396484, "learning_rate": 2.674698795180723e-06, "loss": 0.7013, "step": 450 }, { "epoch": 0.24102193299590263, "grad_norm": 34.119449615478516, "learning_rate": 2.975903614457832e-06, "loss": 0.6217, "step": 500 }, { "epoch": 0.26512412629549287, "grad_norm": 74.39945983886719, "learning_rate": 3.2771084337349403e-06, "loss": 0.6821, "step": 550 }, { "epoch": 0.28922631959508316, "grad_norm": 119.9459457397461, "learning_rate": 3.5783132530120484e-06, "loss": 0.6658, "step": 600 }, { "epoch": 0.3133285128946734, "grad_norm": 72.3450698852539, "learning_rate": 3.879518072289157e-06, "loss": 0.6301, "step": 650 }, { "epoch": 0.3374307061942637, "grad_norm": 43.53814697265625, "learning_rate": 4.180722891566266e-06, "loss": 0.6853, "step": 700 }, { "epoch": 0.3615328994938539, "grad_norm": 47.84103012084961, "learning_rate": 4.481927710843374e-06, "loss": 0.6084, "step": 750 }, { "epoch": 0.3856350927934442, "grad_norm": 115.1520767211914, "learning_rate": 4.783132530120482e-06, "loss": 0.6315, "step": 800 }, { "epoch": 0.40973728609303445, "grad_norm": 64.09156799316406, "learning_rate": 4.990624162871685e-06, "loss": 0.642, "step": 850 }, { "epoch": 0.43383947939262474, "grad_norm": 85.29337310791016, "learning_rate": 4.95713903027056e-06, "loss": 0.6143, "step": 900 }, { "epoch": 0.457941672692215, "grad_norm": 53.96143341064453, "learning_rate": 4.9236538976694355e-06, "loss": 0.6186, "step": 950 }, { "epoch": 0.48204386599180526, "grad_norm": 121.21419525146484, "learning_rate": 4.89016876506831e-06, "loss": 0.5645, "step": 1000 }, { "epoch": 0.5061460592913956, "grad_norm": 53.390052795410156, "learning_rate": 4.856683632467185e-06, "loss": 0.6374, "step": 1050 }, { "epoch": 0.5302482525909857, "grad_norm": 40.36591720581055, "learning_rate": 4.823198499866059e-06, "loss": 0.6469, "step": 1100 }, { "epoch": 0.554350445890576, "grad_norm": 64.5780258178711, "learning_rate": 4.789713367264935e-06, "loss": 0.6051, "step": 1150 }, { "epoch": 0.5784526391901663, "grad_norm": 73.48707580566406, "learning_rate": 4.7562282346638096e-06, "loss": 0.5213, "step": 1200 }, { "epoch": 0.6025548324897566, "grad_norm": 52.3371467590332, "learning_rate": 4.722743102062684e-06, "loss": 0.6024, "step": 1250 }, { "epoch": 0.6266570257893468, "grad_norm": 37.63548278808594, "learning_rate": 4.689257969461559e-06, "loss": 0.6795, "step": 1300 }, { "epoch": 0.6507592190889371, "grad_norm": 54.38778305053711, "learning_rate": 4.655772836860434e-06, "loss": 0.6296, "step": 1350 }, { "epoch": 0.6748614123885274, "grad_norm": 72.60975646972656, "learning_rate": 4.62228770425931e-06, "loss": 0.6312, "step": 1400 }, { "epoch": 0.6989636056881177, "grad_norm": 89.08965301513672, "learning_rate": 4.588802571658184e-06, "loss": 0.5056, "step": 1450 }, { "epoch": 0.7230657989877078, "grad_norm": 64.65660095214844, "learning_rate": 4.555317439057059e-06, "loss": 0.5618, "step": 1500 }, { "epoch": 0.7471679922872981, "grad_norm": 72.32450103759766, "learning_rate": 4.521832306455934e-06, "loss": 0.5906, "step": 1550 }, { "epoch": 0.7712701855868884, "grad_norm": 53.48661804199219, "learning_rate": 4.488347173854809e-06, "loss": 0.5037, "step": 1600 }, { "epoch": 0.7953723788864787, "grad_norm": 112.4849624633789, "learning_rate": 4.454862041253684e-06, "loss": 0.5712, "step": 1650 }, { "epoch": 0.8194745721860689, "grad_norm": 120.62342071533203, "learning_rate": 4.421376908652559e-06, "loss": 0.5657, "step": 1700 }, { "epoch": 0.8435767654856592, "grad_norm": 32.52986145019531, "learning_rate": 4.387891776051433e-06, "loss": 0.605, "step": 1750 }, { "epoch": 0.8676789587852495, "grad_norm": 48.4597282409668, "learning_rate": 4.354406643450309e-06, "loss": 0.4997, "step": 1800 }, { "epoch": 0.8917811520848398, "grad_norm": 44.31755828857422, "learning_rate": 4.3209215108491835e-06, "loss": 0.5591, "step": 1850 }, { "epoch": 0.91588334538443, "grad_norm": 19.54684829711914, "learning_rate": 4.287436378248058e-06, "loss": 0.5647, "step": 1900 }, { "epoch": 0.9399855386840202, "grad_norm": 56.949459075927734, "learning_rate": 4.253951245646933e-06, "loss": 0.5271, "step": 1950 }, { "epoch": 0.9640877319836105, "grad_norm": 87.43113708496094, "learning_rate": 4.220466113045808e-06, "loss": 0.5798, "step": 2000 }, { "epoch": 0.9881899252832008, "grad_norm": 109.82182312011719, "learning_rate": 4.187650683096705e-06, "loss": 0.5794, "step": 2050 }, { "epoch": 1.0, "eval_loss": 0.5185689926147461, "eval_runtime": 68.6783, "eval_samples_per_second": 45.298, "eval_steps_per_second": 5.664, "step": 2075 } ], "logging_steps": 50, "max_steps": 8296, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7893370617200640.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }