| { | |
| "best_global_step": 2075, | |
| "best_metric": 0.5185689926147461, | |
| "best_model_checkpoint": "./mcqa_qwen3_letter_alex/checkpoint-2075", | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 2075, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.024102193299590263, | |
| "grad_norm": 70.03743743896484, | |
| "learning_rate": 2.6506024096385546e-07, | |
| "loss": 0.8067, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.048204386599180526, | |
| "grad_norm": 58.46215057373047, | |
| "learning_rate": 5.662650602409639e-07, | |
| "loss": 0.6862, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07230657989877079, | |
| "grad_norm": 81.54195404052734, | |
| "learning_rate": 8.674698795180723e-07, | |
| "loss": 0.6603, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09640877319836105, | |
| "grad_norm": 100.12779235839844, | |
| "learning_rate": 1.1686746987951808e-06, | |
| "loss": 0.604, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.12051096649795132, | |
| "grad_norm": 74.84662628173828, | |
| "learning_rate": 1.4698795180722893e-06, | |
| "loss": 0.6487, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14461315979754158, | |
| "grad_norm": 167.1442108154297, | |
| "learning_rate": 1.7710843373493978e-06, | |
| "loss": 0.5321, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.16871535309713184, | |
| "grad_norm": 60.8509407043457, | |
| "learning_rate": 2.0722891566265063e-06, | |
| "loss": 0.5429, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1928175463967221, | |
| "grad_norm": 89.475830078125, | |
| "learning_rate": 2.373493975903615e-06, | |
| "loss": 0.6072, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.21691973969631237, | |
| "grad_norm": 64.44173431396484, | |
| "learning_rate": 2.674698795180723e-06, | |
| "loss": 0.7013, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.24102193299590263, | |
| "grad_norm": 34.119449615478516, | |
| "learning_rate": 2.975903614457832e-06, | |
| "loss": 0.6217, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.26512412629549287, | |
| "grad_norm": 74.39945983886719, | |
| "learning_rate": 3.2771084337349403e-06, | |
| "loss": 0.6821, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.28922631959508316, | |
| "grad_norm": 119.9459457397461, | |
| "learning_rate": 3.5783132530120484e-06, | |
| "loss": 0.6658, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3133285128946734, | |
| "grad_norm": 72.3450698852539, | |
| "learning_rate": 3.879518072289157e-06, | |
| "loss": 0.6301, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3374307061942637, | |
| "grad_norm": 43.53814697265625, | |
| "learning_rate": 4.180722891566266e-06, | |
| "loss": 0.6853, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3615328994938539, | |
| "grad_norm": 47.84103012084961, | |
| "learning_rate": 4.481927710843374e-06, | |
| "loss": 0.6084, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3856350927934442, | |
| "grad_norm": 115.1520767211914, | |
| "learning_rate": 4.783132530120482e-06, | |
| "loss": 0.6315, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.40973728609303445, | |
| "grad_norm": 64.09156799316406, | |
| "learning_rate": 4.990624162871685e-06, | |
| "loss": 0.642, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.43383947939262474, | |
| "grad_norm": 85.29337310791016, | |
| "learning_rate": 4.95713903027056e-06, | |
| "loss": 0.6143, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.457941672692215, | |
| "grad_norm": 53.96143341064453, | |
| "learning_rate": 4.9236538976694355e-06, | |
| "loss": 0.6186, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.48204386599180526, | |
| "grad_norm": 121.21419525146484, | |
| "learning_rate": 4.89016876506831e-06, | |
| "loss": 0.5645, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5061460592913956, | |
| "grad_norm": 53.390052795410156, | |
| "learning_rate": 4.856683632467185e-06, | |
| "loss": 0.6374, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5302482525909857, | |
| "grad_norm": 40.36591720581055, | |
| "learning_rate": 4.823198499866059e-06, | |
| "loss": 0.6469, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.554350445890576, | |
| "grad_norm": 64.5780258178711, | |
| "learning_rate": 4.789713367264935e-06, | |
| "loss": 0.6051, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.5784526391901663, | |
| "grad_norm": 73.48707580566406, | |
| "learning_rate": 4.7562282346638096e-06, | |
| "loss": 0.5213, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6025548324897566, | |
| "grad_norm": 52.3371467590332, | |
| "learning_rate": 4.722743102062684e-06, | |
| "loss": 0.6024, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6266570257893468, | |
| "grad_norm": 37.63548278808594, | |
| "learning_rate": 4.689257969461559e-06, | |
| "loss": 0.6795, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6507592190889371, | |
| "grad_norm": 54.38778305053711, | |
| "learning_rate": 4.655772836860434e-06, | |
| "loss": 0.6296, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6748614123885274, | |
| "grad_norm": 72.60975646972656, | |
| "learning_rate": 4.62228770425931e-06, | |
| "loss": 0.6312, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.6989636056881177, | |
| "grad_norm": 89.08965301513672, | |
| "learning_rate": 4.588802571658184e-06, | |
| "loss": 0.5056, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7230657989877078, | |
| "grad_norm": 64.65660095214844, | |
| "learning_rate": 4.555317439057059e-06, | |
| "loss": 0.5618, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7471679922872981, | |
| "grad_norm": 72.32450103759766, | |
| "learning_rate": 4.521832306455934e-06, | |
| "loss": 0.5906, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.7712701855868884, | |
| "grad_norm": 53.48661804199219, | |
| "learning_rate": 4.488347173854809e-06, | |
| "loss": 0.5037, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.7953723788864787, | |
| "grad_norm": 112.4849624633789, | |
| "learning_rate": 4.454862041253684e-06, | |
| "loss": 0.5712, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8194745721860689, | |
| "grad_norm": 120.62342071533203, | |
| "learning_rate": 4.421376908652559e-06, | |
| "loss": 0.5657, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.8435767654856592, | |
| "grad_norm": 32.52986145019531, | |
| "learning_rate": 4.387891776051433e-06, | |
| "loss": 0.605, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.8676789587852495, | |
| "grad_norm": 48.4597282409668, | |
| "learning_rate": 4.354406643450309e-06, | |
| "loss": 0.4997, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8917811520848398, | |
| "grad_norm": 44.31755828857422, | |
| "learning_rate": 4.3209215108491835e-06, | |
| "loss": 0.5591, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.91588334538443, | |
| "grad_norm": 19.54684829711914, | |
| "learning_rate": 4.287436378248058e-06, | |
| "loss": 0.5647, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.9399855386840202, | |
| "grad_norm": 56.949459075927734, | |
| "learning_rate": 4.253951245646933e-06, | |
| "loss": 0.5271, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.9640877319836105, | |
| "grad_norm": 87.43113708496094, | |
| "learning_rate": 4.220466113045808e-06, | |
| "loss": 0.5798, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9881899252832008, | |
| "grad_norm": 109.82182312011719, | |
| "learning_rate": 4.187650683096705e-06, | |
| "loss": 0.5794, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.5185689926147461, | |
| "eval_runtime": 68.6783, | |
| "eval_samples_per_second": 45.298, | |
| "eval_steps_per_second": 5.664, | |
| "step": 2075 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 8296, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7893370617200640.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |