{ "best_global_step": 4954, "best_metric": 1.776762843132019, "best_model_checkpoint": "./mcqa_qwen3_letter_best/checkpoint-4954", "epoch": 1.0, "eval_steps": 500, "global_step": 4954, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010092854259184497, "grad_norm": 48.55782699584961, "learning_rate": 8.879919273461152e-07, "loss": 2.3985, "step": 50 }, { "epoch": 0.020185708518368994, "grad_norm": 25.696617126464844, "learning_rate": 1.8970736629667005e-06, "loss": 2.053, "step": 100 }, { "epoch": 0.030278562777553492, "grad_norm": 27.860021591186523, "learning_rate": 2.906155398587286e-06, "loss": 1.9305, "step": 150 }, { "epoch": 0.04037141703673799, "grad_norm": 17.68500518798828, "learning_rate": 3.915237134207871e-06, "loss": 1.9294, "step": 200 }, { "epoch": 0.050464271295922486, "grad_norm": 26.112218856811523, "learning_rate": 4.924318869828457e-06, "loss": 1.8834, "step": 250 }, { "epoch": 0.060557125555106985, "grad_norm": 25.835376739501953, "learning_rate": 5.933400605449042e-06, "loss": 1.8517, "step": 300 }, { "epoch": 0.07064997981429148, "grad_norm": 22.44589614868164, "learning_rate": 6.942482341069627e-06, "loss": 1.8978, "step": 350 }, { "epoch": 0.08074283407347597, "grad_norm": 32.82951354980469, "learning_rate": 7.951564076690212e-06, "loss": 1.8867, "step": 400 }, { "epoch": 0.09083568833266048, "grad_norm": 35.665794372558594, "learning_rate": 8.960645812310798e-06, "loss": 1.9055, "step": 450 }, { "epoch": 0.10092854259184497, "grad_norm": 22.500865936279297, "learning_rate": 9.969727547931384e-06, "loss": 1.8755, "step": 500 }, { "epoch": 0.11102139685102948, "grad_norm": 40.59410095214844, "learning_rate": 1.0978809283551967e-05, "loss": 1.8881, "step": 550 }, { "epoch": 0.12111425111021397, "grad_norm": 28.769454956054688, "learning_rate": 1.1987891019172555e-05, "loss": 1.8713, "step": 600 }, { "epoch": 0.13120710536939847, "grad_norm": 17.596820831298828, "learning_rate": 1.299697275479314e-05, "loss": 1.8694, "step": 650 }, { "epoch": 0.14129995962858297, "grad_norm": 17.149999618530273, "learning_rate": 1.4006054490413725e-05, "loss": 1.8809, "step": 700 }, { "epoch": 0.15139281388776746, "grad_norm": 19.181955337524414, "learning_rate": 1.5015136226034311e-05, "loss": 1.8697, "step": 750 }, { "epoch": 0.16148566814695195, "grad_norm": 24.227073669433594, "learning_rate": 1.6024217961654894e-05, "loss": 1.9201, "step": 800 }, { "epoch": 0.17157852240613647, "grad_norm": 18.42403221130371, "learning_rate": 1.703329969727548e-05, "loss": 1.8876, "step": 850 }, { "epoch": 0.18167137666532096, "grad_norm": 21.015230178833008, "learning_rate": 1.8042381432896066e-05, "loss": 1.8697, "step": 900 }, { "epoch": 0.19176423092450545, "grad_norm": 16.02488899230957, "learning_rate": 1.905146316851665e-05, "loss": 1.9102, "step": 950 }, { "epoch": 0.20185708518368994, "grad_norm": 25.045923233032227, "learning_rate": 1.9993271279578333e-05, "loss": 1.9121, "step": 1000 }, { "epoch": 0.21194993944287444, "grad_norm": 17.414430618286133, "learning_rate": 1.9881125939217227e-05, "loss": 1.9449, "step": 1050 }, { "epoch": 0.22204279370205895, "grad_norm": 15.37423324584961, "learning_rate": 1.976898059885612e-05, "loss": 1.9139, "step": 1100 }, { "epoch": 0.23213564796124345, "grad_norm": 20.543489456176758, "learning_rate": 1.965683525849501e-05, "loss": 1.92, "step": 1150 }, { "epoch": 0.24222850222042794, "grad_norm": 12.01870346069336, "learning_rate": 1.9544689918133902e-05, "loss": 1.8962, "step": 1200 }, { "epoch": 0.25232135647961246, "grad_norm": 15.475773811340332, "learning_rate": 1.9432544577772796e-05, "loss": 1.9483, "step": 1250 }, { "epoch": 0.26241421073879695, "grad_norm": 11.753213882446289, "learning_rate": 1.9320399237411686e-05, "loss": 1.919, "step": 1300 }, { "epoch": 0.27250706499798144, "grad_norm": 14.90489673614502, "learning_rate": 1.920825389705058e-05, "loss": 1.8742, "step": 1350 }, { "epoch": 0.28259991925716593, "grad_norm": 12.925189971923828, "learning_rate": 1.909610855668947e-05, "loss": 1.8822, "step": 1400 }, { "epoch": 0.2926927735163504, "grad_norm": 17.215579986572266, "learning_rate": 1.898396321632836e-05, "loss": 1.8796, "step": 1450 }, { "epoch": 0.3027856277755349, "grad_norm": 16.483861923217773, "learning_rate": 1.8871817875967255e-05, "loss": 1.8442, "step": 1500 }, { "epoch": 0.3128784820347194, "grad_norm": 18.10808753967285, "learning_rate": 1.875967253560615e-05, "loss": 1.9131, "step": 1550 }, { "epoch": 0.3229713362939039, "grad_norm": 14.261265754699707, "learning_rate": 1.864752719524504e-05, "loss": 1.7602, "step": 1600 }, { "epoch": 0.3330641905530884, "grad_norm": 16.223392486572266, "learning_rate": 1.8535381854883933e-05, "loss": 1.8392, "step": 1650 }, { "epoch": 0.34315704481227294, "grad_norm": 14.012106895446777, "learning_rate": 1.8423236514522824e-05, "loss": 1.8335, "step": 1700 }, { "epoch": 0.35324989907145743, "grad_norm": 13.234374046325684, "learning_rate": 1.8311091174161714e-05, "loss": 1.8501, "step": 1750 }, { "epoch": 0.3633427533306419, "grad_norm": 11.787166595458984, "learning_rate": 1.8198945833800608e-05, "loss": 1.8704, "step": 1800 }, { "epoch": 0.3734356075898264, "grad_norm": 15.64974308013916, "learning_rate": 1.80868004934395e-05, "loss": 1.85, "step": 1850 }, { "epoch": 0.3835284618490109, "grad_norm": 13.893998146057129, "learning_rate": 1.7974655153078392e-05, "loss": 1.8807, "step": 1900 }, { "epoch": 0.3936213161081954, "grad_norm": 15.42603588104248, "learning_rate": 1.7862509812717283e-05, "loss": 1.8124, "step": 1950 }, { "epoch": 0.4037141703673799, "grad_norm": 12.293023109436035, "learning_rate": 1.7750364472356173e-05, "loss": 1.8112, "step": 2000 }, { "epoch": 0.4138070246265644, "grad_norm": 17.576618194580078, "learning_rate": 1.7638219131995067e-05, "loss": 1.8468, "step": 2050 }, { "epoch": 0.42389987888574887, "grad_norm": 36.62916946411133, "learning_rate": 1.752607379163396e-05, "loss": 1.8563, "step": 2100 }, { "epoch": 0.43399273314493336, "grad_norm": 12.232354164123535, "learning_rate": 1.741392845127285e-05, "loss": 1.8643, "step": 2150 }, { "epoch": 0.4440855874041179, "grad_norm": 9.772968292236328, "learning_rate": 1.7301783110911742e-05, "loss": 1.8686, "step": 2200 }, { "epoch": 0.4541784416633024, "grad_norm": 13.78654956817627, "learning_rate": 1.7189637770550636e-05, "loss": 1.8477, "step": 2250 }, { "epoch": 0.4642712959224869, "grad_norm": 14.448091506958008, "learning_rate": 1.7077492430189526e-05, "loss": 1.828, "step": 2300 }, { "epoch": 0.4743641501816714, "grad_norm": 10.872529983520508, "learning_rate": 1.696534708982842e-05, "loss": 1.7916, "step": 2350 }, { "epoch": 0.4844570044408559, "grad_norm": 14.716806411743164, "learning_rate": 1.685320174946731e-05, "loss": 1.7982, "step": 2400 }, { "epoch": 0.49454985870004037, "grad_norm": 15.155656814575195, "learning_rate": 1.67410564091062e-05, "loss": 1.8422, "step": 2450 }, { "epoch": 0.5046427129592249, "grad_norm": 11.369612693786621, "learning_rate": 1.6628911068745095e-05, "loss": 1.8217, "step": 2500 }, { "epoch": 0.5147355672184094, "grad_norm": 15.491066932678223, "learning_rate": 1.651676572838399e-05, "loss": 1.8487, "step": 2550 }, { "epoch": 0.5248284214775939, "grad_norm": 12.249984741210938, "learning_rate": 1.640462038802288e-05, "loss": 1.7951, "step": 2600 }, { "epoch": 0.5349212757367784, "grad_norm": 14.075465202331543, "learning_rate": 1.629247504766177e-05, "loss": 1.8115, "step": 2650 }, { "epoch": 0.5450141299959629, "grad_norm": 9.785154342651367, "learning_rate": 1.6180329707300664e-05, "loss": 1.8576, "step": 2700 }, { "epoch": 0.5551069842551474, "grad_norm": 14.559487342834473, "learning_rate": 1.6068184366939554e-05, "loss": 1.8263, "step": 2750 }, { "epoch": 0.5651998385143319, "grad_norm": 15.150165557861328, "learning_rate": 1.5956039026578448e-05, "loss": 1.8029, "step": 2800 }, { "epoch": 0.5752926927735164, "grad_norm": 13.863632202148438, "learning_rate": 1.584389368621734e-05, "loss": 1.7863, "step": 2850 }, { "epoch": 0.5853855470327008, "grad_norm": 9.358270645141602, "learning_rate": 1.573174834585623e-05, "loss": 1.806, "step": 2900 }, { "epoch": 0.5954784012918853, "grad_norm": 12.770975112915039, "learning_rate": 1.5619603005495123e-05, "loss": 1.7417, "step": 2950 }, { "epoch": 0.6055712555510698, "grad_norm": 12.026569366455078, "learning_rate": 1.5507457665134017e-05, "loss": 1.7623, "step": 3000 }, { "epoch": 0.6156641098102543, "grad_norm": 9.8405122756958, "learning_rate": 1.5395312324772907e-05, "loss": 1.7941, "step": 3050 }, { "epoch": 0.6257569640694388, "grad_norm": 13.649519920349121, "learning_rate": 1.5283166984411798e-05, "loss": 1.7499, "step": 3100 }, { "epoch": 0.6358498183286233, "grad_norm": 13.303316116333008, "learning_rate": 1.5171021644050692e-05, "loss": 1.7821, "step": 3150 }, { "epoch": 0.6459426725878078, "grad_norm": 14.893158912658691, "learning_rate": 1.5058876303689582e-05, "loss": 1.8423, "step": 3200 }, { "epoch": 0.6560355268469923, "grad_norm": 14.434380531311035, "learning_rate": 1.4946730963328474e-05, "loss": 1.8138, "step": 3250 }, { "epoch": 0.6661283811061768, "grad_norm": 9.59044075012207, "learning_rate": 1.4834585622967368e-05, "loss": 1.7734, "step": 3300 }, { "epoch": 0.6762212353653613, "grad_norm": 12.524561882019043, "learning_rate": 1.4722440282606259e-05, "loss": 1.8246, "step": 3350 }, { "epoch": 0.6863140896245459, "grad_norm": 13.521296501159668, "learning_rate": 1.4610294942245151e-05, "loss": 1.7847, "step": 3400 }, { "epoch": 0.6964069438837304, "grad_norm": 10.999866485595703, "learning_rate": 1.4498149601884043e-05, "loss": 1.8027, "step": 3450 }, { "epoch": 0.7064997981429149, "grad_norm": 15.364250183105469, "learning_rate": 1.4386004261522934e-05, "loss": 1.7802, "step": 3500 }, { "epoch": 0.7165926524020994, "grad_norm": 13.141353607177734, "learning_rate": 1.4273858921161828e-05, "loss": 1.7464, "step": 3550 }, { "epoch": 0.7266855066612838, "grad_norm": 9.018637657165527, "learning_rate": 1.4161713580800718e-05, "loss": 1.7553, "step": 3600 }, { "epoch": 0.7367783609204683, "grad_norm": 11.081124305725098, "learning_rate": 1.404956824043961e-05, "loss": 1.7922, "step": 3650 }, { "epoch": 0.7468712151796528, "grad_norm": 10.0188627243042, "learning_rate": 1.3937422900078504e-05, "loss": 1.7769, "step": 3700 }, { "epoch": 0.7569640694388373, "grad_norm": 10.286458015441895, "learning_rate": 1.3825277559717395e-05, "loss": 1.7696, "step": 3750 }, { "epoch": 0.7670569236980218, "grad_norm": 11.746405601501465, "learning_rate": 1.3713132219356287e-05, "loss": 1.7188, "step": 3800 }, { "epoch": 0.7771497779572063, "grad_norm": 11.215723991394043, "learning_rate": 1.3600986878995179e-05, "loss": 1.6803, "step": 3850 }, { "epoch": 0.7872426322163908, "grad_norm": 8.982596397399902, "learning_rate": 1.348884153863407e-05, "loss": 1.7696, "step": 3900 }, { "epoch": 0.7973354864755753, "grad_norm": 12.450457572937012, "learning_rate": 1.3376696198272963e-05, "loss": 1.8021, "step": 3950 }, { "epoch": 0.8074283407347598, "grad_norm": 10.87128734588623, "learning_rate": 1.3264550857911855e-05, "loss": 1.7492, "step": 4000 }, { "epoch": 0.8175211949939443, "grad_norm": 11.78647518157959, "learning_rate": 1.3152405517550746e-05, "loss": 1.7883, "step": 4050 }, { "epoch": 0.8276140492531288, "grad_norm": 12.425263404846191, "learning_rate": 1.3040260177189638e-05, "loss": 1.7546, "step": 4100 }, { "epoch": 0.8377069035123133, "grad_norm": 11.663323402404785, "learning_rate": 1.2928114836828532e-05, "loss": 1.8018, "step": 4150 }, { "epoch": 0.8477997577714977, "grad_norm": 17.913087844848633, "learning_rate": 1.2815969496467423e-05, "loss": 1.7827, "step": 4200 }, { "epoch": 0.8578926120306822, "grad_norm": 9.219327926635742, "learning_rate": 1.2703824156106315e-05, "loss": 1.7245, "step": 4250 }, { "epoch": 0.8679854662898667, "grad_norm": 11.107460021972656, "learning_rate": 1.2591678815745207e-05, "loss": 1.7264, "step": 4300 }, { "epoch": 0.8780783205490512, "grad_norm": 10.487607955932617, "learning_rate": 1.2479533475384097e-05, "loss": 1.753, "step": 4350 }, { "epoch": 0.8881711748082358, "grad_norm": 13.2865571975708, "learning_rate": 1.2367388135022991e-05, "loss": 1.7317, "step": 4400 }, { "epoch": 0.8982640290674203, "grad_norm": 10.927115440368652, "learning_rate": 1.2255242794661883e-05, "loss": 1.7651, "step": 4450 }, { "epoch": 0.9083568833266048, "grad_norm": 10.536073684692383, "learning_rate": 1.2143097454300774e-05, "loss": 1.7578, "step": 4500 }, { "epoch": 0.9184497375857893, "grad_norm": 13.544109344482422, "learning_rate": 1.2030952113939666e-05, "loss": 1.7505, "step": 4550 }, { "epoch": 0.9285425918449738, "grad_norm": 9.343710899353027, "learning_rate": 1.1921049680385782e-05, "loss": 1.6865, "step": 4600 }, { "epoch": 0.9386354461041583, "grad_norm": 11.518623352050781, "learning_rate": 1.1808904340024674e-05, "loss": 1.7203, "step": 4650 }, { "epoch": 0.9487283003633428, "grad_norm": 7.897172927856445, "learning_rate": 1.1696758999663564e-05, "loss": 1.7201, "step": 4700 }, { "epoch": 0.9588211546225273, "grad_norm": 11.530837059020996, "learning_rate": 1.1584613659302457e-05, "loss": 1.8117, "step": 4750 }, { "epoch": 0.9689140088817118, "grad_norm": 11.721019744873047, "learning_rate": 1.147246831894135e-05, "loss": 1.7663, "step": 4800 }, { "epoch": 0.9790068631408962, "grad_norm": 11.470191955566406, "learning_rate": 1.1360322978580241e-05, "loss": 1.7655, "step": 4850 }, { "epoch": 0.9890997174000807, "grad_norm": 12.892107009887695, "learning_rate": 1.1248177638219133e-05, "loss": 1.759, "step": 4900 }, { "epoch": 0.9991925716592652, "grad_norm": 13.869138717651367, "learning_rate": 1.1136032297858025e-05, "loss": 1.7831, "step": 4950 }, { "epoch": 1.0, "eval_loss": 1.776762843132019, "eval_runtime": 226.5804, "eval_samples_per_second": 16.396, "eval_steps_per_second": 2.052, "step": 4954 } ], "logging_steps": 50, "max_steps": 9908, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.681334714807091e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }