| { | |
| "best_metric": 0.57263505, | |
| "best_model_checkpoint": "/export/home2/zli/kc/mm_rag/Qwen2.5-14B-Instruct_lora/checkpoint-256", | |
| "epoch": 0.9978075517661389, | |
| "eval_steps": 100, | |
| "global_step": 256, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00389768574908648, | |
| "grad_norm": 0.3756321966648102, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 0.760678231716156, | |
| "memory(GiB)": 64.79, | |
| "step": 1, | |
| "token_acc": 0.794377180381695, | |
| "train_speed(iter/s)": 0.008942 | |
| }, | |
| { | |
| "epoch": 0.0194884287454324, | |
| "grad_norm": 0.4006075859069824, | |
| "learning_rate": 3.846153846153846e-05, | |
| "loss": 0.8511521816253662, | |
| "memory(GiB)": 69.21, | |
| "step": 5, | |
| "token_acc": 0.7628462851792697, | |
| "train_speed(iter/s)": 0.009262 | |
| }, | |
| { | |
| "epoch": 0.0389768574908648, | |
| "grad_norm": 0.40541699528694153, | |
| "learning_rate": 7.692307692307693e-05, | |
| "loss": 0.8799286842346191, | |
| "memory(GiB)": 69.21, | |
| "step": 10, | |
| "token_acc": 0.755640420079393, | |
| "train_speed(iter/s)": 0.009459 | |
| }, | |
| { | |
| "epoch": 0.058465286236297195, | |
| "grad_norm": 0.28694042563438416, | |
| "learning_rate": 9.998328666948438e-05, | |
| "loss": 0.7268682479858398, | |
| "memory(GiB)": 69.21, | |
| "step": 15, | |
| "token_acc": 0.7857048253940223, | |
| "train_speed(iter/s)": 0.009584 | |
| }, | |
| { | |
| "epoch": 0.0779537149817296, | |
| "grad_norm": 0.29239076375961304, | |
| "learning_rate": 9.979538999730047e-05, | |
| "loss": 0.6990458011627197, | |
| "memory(GiB)": 69.21, | |
| "step": 20, | |
| "token_acc": 0.7923689929968607, | |
| "train_speed(iter/s)": 0.009633 | |
| }, | |
| { | |
| "epoch": 0.09744214372716199, | |
| "grad_norm": 0.25992509722709656, | |
| "learning_rate": 9.939949247384046e-05, | |
| "loss": 0.7058870792388916, | |
| "memory(GiB)": 71.47, | |
| "step": 25, | |
| "token_acc": 0.7879081525173794, | |
| "train_speed(iter/s)": 0.009549 | |
| }, | |
| { | |
| "epoch": 0.11693057247259439, | |
| "grad_norm": 0.1924816370010376, | |
| "learning_rate": 9.879724780684519e-05, | |
| "loss": 0.7370085716247559, | |
| "memory(GiB)": 71.47, | |
| "step": 30, | |
| "token_acc": 0.7818104986383698, | |
| "train_speed(iter/s)": 0.009632 | |
| }, | |
| { | |
| "epoch": 0.1364190012180268, | |
| "grad_norm": 0.2375946342945099, | |
| "learning_rate": 9.799117163889559e-05, | |
| "loss": 0.6951110363006592, | |
| "memory(GiB)": 71.47, | |
| "step": 35, | |
| "token_acc": 0.7915360501567398, | |
| "train_speed(iter/s)": 0.009685 | |
| }, | |
| { | |
| "epoch": 0.1559074299634592, | |
| "grad_norm": 0.19938509166240692, | |
| "learning_rate": 9.698463103929542e-05, | |
| "loss": 0.634189510345459, | |
| "memory(GiB)": 71.47, | |
| "step": 40, | |
| "token_acc": 0.8057028291378926, | |
| "train_speed(iter/s)": 0.009727 | |
| }, | |
| { | |
| "epoch": 0.1753958587088916, | |
| "grad_norm": 0.2671877145767212, | |
| "learning_rate": 9.57818304394503e-05, | |
| "loss": 0.6699491500854492, | |
| "memory(GiB)": 71.47, | |
| "step": 45, | |
| "token_acc": 0.8010264675941635, | |
| "train_speed(iter/s)": 0.009776 | |
| }, | |
| { | |
| "epoch": 0.19488428745432398, | |
| "grad_norm": 0.3463566303253174, | |
| "learning_rate": 9.438779407049281e-05, | |
| "loss": 0.6760294437408447, | |
| "memory(GiB)": 71.47, | |
| "step": 50, | |
| "token_acc": 0.793778801843318, | |
| "train_speed(iter/s)": 0.009808 | |
| }, | |
| { | |
| "epoch": 0.2143727161997564, | |
| "grad_norm": 0.22926518321037292, | |
| "learning_rate": 9.280834497651334e-05, | |
| "loss": 0.6952155113220215, | |
| "memory(GiB)": 71.47, | |
| "step": 55, | |
| "token_acc": 0.7856186209763081, | |
| "train_speed(iter/s)": 0.009819 | |
| }, | |
| { | |
| "epoch": 0.23386114494518878, | |
| "grad_norm": 0.26200923323631287, | |
| "learning_rate": 9.105008069106093e-05, | |
| "loss": 0.6922572135925293, | |
| "memory(GiB)": 71.47, | |
| "step": 60, | |
| "token_acc": 0.784200469035004, | |
| "train_speed(iter/s)": 0.009839 | |
| }, | |
| { | |
| "epoch": 0.25334957369062117, | |
| "grad_norm": 0.2608520984649658, | |
| "learning_rate": 8.912034567851599e-05, | |
| "loss": 0.6162422180175782, | |
| "memory(GiB)": 71.47, | |
| "step": 65, | |
| "token_acc": 0.8122029543994862, | |
| "train_speed(iter/s)": 0.009864 | |
| }, | |
| { | |
| "epoch": 0.2728380024360536, | |
| "grad_norm": 0.23958414793014526, | |
| "learning_rate": 8.702720065545024e-05, | |
| "loss": 0.6201987266540527, | |
| "memory(GiB)": 71.47, | |
| "step": 70, | |
| "token_acc": 0.807689317368994, | |
| "train_speed(iter/s)": 0.00982 | |
| }, | |
| { | |
| "epoch": 0.292326431181486, | |
| "grad_norm": 0.24707700312137604, | |
| "learning_rate": 8.47793889201221e-05, | |
| "loss": 0.6751769542694092, | |
| "memory(GiB)": 71.47, | |
| "step": 75, | |
| "token_acc": 0.7933965994464215, | |
| "train_speed(iter/s)": 0.009843 | |
| }, | |
| { | |
| "epoch": 0.3118148599269184, | |
| "grad_norm": 0.280536413192749, | |
| "learning_rate": 8.238629983075294e-05, | |
| "loss": 0.6537846088409424, | |
| "memory(GiB)": 71.47, | |
| "step": 80, | |
| "token_acc": 0.800564772326156, | |
| "train_speed(iter/s)": 0.00983 | |
| }, | |
| { | |
| "epoch": 0.3313032886723508, | |
| "grad_norm": 0.29046493768692017, | |
| "learning_rate": 7.985792958513931e-05, | |
| "loss": 0.6765205383300781, | |
| "memory(GiB)": 71.47, | |
| "step": 85, | |
| "token_acc": 0.7913814331445457, | |
| "train_speed(iter/s)": 0.009828 | |
| }, | |
| { | |
| "epoch": 0.3507917174177832, | |
| "grad_norm": 0.2879982888698578, | |
| "learning_rate": 7.720483946542914e-05, | |
| "loss": 0.6516193866729736, | |
| "memory(GiB)": 71.47, | |
| "step": 90, | |
| "token_acc": 0.7983723394009439, | |
| "train_speed(iter/s)": 0.009817 | |
| }, | |
| { | |
| "epoch": 0.3702801461632156, | |
| "grad_norm": 0.28098541498184204, | |
| "learning_rate": 7.443811172247821e-05, | |
| "loss": 0.6455563545227051, | |
| "memory(GiB)": 71.47, | |
| "step": 95, | |
| "token_acc": 0.8004983012457532, | |
| "train_speed(iter/s)": 0.009833 | |
| }, | |
| { | |
| "epoch": 0.38976857490864797, | |
| "grad_norm": 0.2407151460647583, | |
| "learning_rate": 7.156930328406268e-05, | |
| "loss": 0.6356846332550049, | |
| "memory(GiB)": 71.47, | |
| "step": 100, | |
| "token_acc": 0.8042028604018602, | |
| "train_speed(iter/s)": 0.009804 | |
| }, | |
| { | |
| "epoch": 0.38976857490864797, | |
| "eval_loss": 0.598721981048584, | |
| "eval_runtime": 15.1098, | |
| "eval_samples_per_second": 0.265, | |
| "eval_steps_per_second": 0.132, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4092570036540804, | |
| "grad_norm": 0.2836802899837494, | |
| "learning_rate": 6.861039748031351e-05, | |
| "loss": 0.6598537921905517, | |
| "memory(GiB)": 77.63, | |
| "step": 105, | |
| "token_acc": 0.7957189390414147, | |
| "train_speed(iter/s)": 0.009789 | |
| }, | |
| { | |
| "epoch": 0.4287454323995128, | |
| "grad_norm": 0.26613113284111023, | |
| "learning_rate": 6.557375398802123e-05, | |
| "loss": 0.6290350437164307, | |
| "memory(GiB)": 77.63, | |
| "step": 110, | |
| "token_acc": 0.8080845540515483, | |
| "train_speed(iter/s)": 0.009743 | |
| }, | |
| { | |
| "epoch": 0.4482338611449452, | |
| "grad_norm": 0.3952692747116089, | |
| "learning_rate": 6.247205720289907e-05, | |
| "loss": 0.6128222942352295, | |
| "memory(GiB)": 77.63, | |
| "step": 115, | |
| "token_acc": 0.8121368342872122, | |
| "train_speed(iter/s)": 0.009719 | |
| }, | |
| { | |
| "epoch": 0.46772228989037756, | |
| "grad_norm": 0.29315847158432007, | |
| "learning_rate": 5.9318263255459116e-05, | |
| "loss": 0.6240291595458984, | |
| "memory(GiB)": 77.63, | |
| "step": 120, | |
| "token_acc": 0.8027493895269965, | |
| "train_speed(iter/s)": 0.009694 | |
| }, | |
| { | |
| "epoch": 0.48721071863581, | |
| "grad_norm": 0.2671997547149658, | |
| "learning_rate": 5.6125545891822274e-05, | |
| "loss": 0.6649426460266114, | |
| "memory(GiB)": 77.63, | |
| "step": 125, | |
| "token_acc": 0.7922591927838373, | |
| "train_speed(iter/s)": 0.009676 | |
| }, | |
| { | |
| "epoch": 0.5066991473812423, | |
| "grad_norm": 0.26974523067474365, | |
| "learning_rate": 5.290724144552379e-05, | |
| "loss": 0.6172929763793945, | |
| "memory(GiB)": 77.63, | |
| "step": 130, | |
| "token_acc": 0.8085726351351351, | |
| "train_speed(iter/s)": 0.009667 | |
| }, | |
| { | |
| "epoch": 0.5261875761266748, | |
| "grad_norm": 0.2798590660095215, | |
| "learning_rate": 4.967679313017303e-05, | |
| "loss": 0.6145929336547852, | |
| "memory(GiB)": 77.63, | |
| "step": 135, | |
| "token_acc": 0.8118081180811808, | |
| "train_speed(iter/s)": 0.009683 | |
| }, | |
| { | |
| "epoch": 0.5456760048721072, | |
| "grad_norm": 0.27218812704086304, | |
| "learning_rate": 4.6447694885663514e-05, | |
| "loss": 0.618541955947876, | |
| "memory(GiB)": 77.63, | |
| "step": 140, | |
| "token_acc": 0.8050674649659422, | |
| "train_speed(iter/s)": 0.009706 | |
| }, | |
| { | |
| "epoch": 0.5651644336175395, | |
| "grad_norm": 0.29498329758644104, | |
| "learning_rate": 4.323343501249346e-05, | |
| "loss": 0.6341890335083008, | |
| "memory(GiB)": 77.63, | |
| "step": 145, | |
| "token_acc": 0.801550316061459, | |
| "train_speed(iter/s)": 0.009735 | |
| }, | |
| { | |
| "epoch": 0.584652862362972, | |
| "grad_norm": 0.2799496352672577, | |
| "learning_rate": 4.004743982964298e-05, | |
| "loss": 0.6000133037567139, | |
| "memory(GiB)": 77.63, | |
| "step": 150, | |
| "token_acc": 0.8125188083057479, | |
| "train_speed(iter/s)": 0.009742 | |
| }, | |
| { | |
| "epoch": 0.6041412911084044, | |
| "grad_norm": 0.28971585631370544, | |
| "learning_rate": 3.6903017591354706e-05, | |
| "loss": 0.6476753711700439, | |
| "memory(GiB)": 77.63, | |
| "step": 155, | |
| "token_acc": 0.8055159053833605, | |
| "train_speed(iter/s)": 0.009734 | |
| }, | |
| { | |
| "epoch": 0.6236297198538368, | |
| "grad_norm": 0.2888241708278656, | |
| "learning_rate": 3.381330289708396e-05, | |
| "loss": 0.6604040622711181, | |
| "memory(GiB)": 77.63, | |
| "step": 160, | |
| "token_acc": 0.7972270363951474, | |
| "train_speed(iter/s)": 0.009743 | |
| }, | |
| { | |
| "epoch": 0.6431181485992692, | |
| "grad_norm": 0.31141144037246704, | |
| "learning_rate": 3.079120182682412e-05, | |
| "loss": 0.5943418025970459, | |
| "memory(GiB)": 77.63, | |
| "step": 165, | |
| "token_acc": 0.8125655328607977, | |
| "train_speed(iter/s)": 0.009733 | |
| }, | |
| { | |
| "epoch": 0.6626065773447016, | |
| "grad_norm": 0.28593260049819946, | |
| "learning_rate": 2.7849338030983257e-05, | |
| "loss": 0.6077968120574951, | |
| "memory(GiB)": 77.63, | |
| "step": 170, | |
| "token_acc": 0.811592125382263, | |
| "train_speed(iter/s)": 0.009731 | |
| }, | |
| { | |
| "epoch": 0.682095006090134, | |
| "grad_norm": 0.3223704993724823, | |
| "learning_rate": 2.500000000000001e-05, | |
| "loss": 0.6201191425323487, | |
| "memory(GiB)": 77.63, | |
| "step": 175, | |
| "token_acc": 0.806087783666259, | |
| "train_speed(iter/s)": 0.009728 | |
| }, | |
| { | |
| "epoch": 0.7015834348355664, | |
| "grad_norm": 0.3051086962223053, | |
| "learning_rate": 2.225508973396016e-05, | |
| "loss": 0.6003672122955322, | |
| "memory(GiB)": 77.63, | |
| "step": 180, | |
| "token_acc": 0.8096822043421585, | |
| "train_speed(iter/s)": 0.009738 | |
| }, | |
| { | |
| "epoch": 0.7210718635809987, | |
| "grad_norm": 0.33248814940452576, | |
| "learning_rate": 1.9626073026625818e-05, | |
| "loss": 0.6649184226989746, | |
| "memory(GiB)": 77.63, | |
| "step": 185, | |
| "token_acc": 0.7927942418271744, | |
| "train_speed(iter/s)": 0.009754 | |
| }, | |
| { | |
| "epoch": 0.7405602923264312, | |
| "grad_norm": 0.32766395807266235, | |
| "learning_rate": 1.7123931571546827e-05, | |
| "loss": 0.620220422744751, | |
| "memory(GiB)": 77.63, | |
| "step": 190, | |
| "token_acc": 0.8047315233083407, | |
| "train_speed(iter/s)": 0.009761 | |
| }, | |
| { | |
| "epoch": 0.7600487210718636, | |
| "grad_norm": 0.3414992392063141, | |
| "learning_rate": 1.4759117090312197e-05, | |
| "loss": 0.622031831741333, | |
| "memory(GiB)": 77.63, | |
| "step": 195, | |
| "token_acc": 0.807596541264883, | |
| "train_speed(iter/s)": 0.009765 | |
| }, | |
| { | |
| "epoch": 0.7795371498172959, | |
| "grad_norm": 0.32977092266082764, | |
| "learning_rate": 1.25415076745532e-05, | |
| "loss": 0.6251283168792725, | |
| "memory(GiB)": 77.63, | |
| "step": 200, | |
| "token_acc": 0.8072234762979684, | |
| "train_speed(iter/s)": 0.009771 | |
| }, | |
| { | |
| "epoch": 0.7795371498172959, | |
| "eval_loss": 0.5760576725006104, | |
| "eval_runtime": 14.2053, | |
| "eval_samples_per_second": 0.282, | |
| "eval_steps_per_second": 0.141, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7990255785627284, | |
| "grad_norm": 0.2861402630805969, | |
| "learning_rate": 1.0480366524062042e-05, | |
| "loss": 0.5975393772125244, | |
| "memory(GiB)": 77.63, | |
| "step": 205, | |
| "token_acc": 0.8152735624641129, | |
| "train_speed(iter/s)": 0.009771 | |
| }, | |
| { | |
| "epoch": 0.8185140073081608, | |
| "grad_norm": 0.30515122413635254, | |
| "learning_rate": 8.584303253381847e-06, | |
| "loss": 0.6246855735778809, | |
| "memory(GiB)": 77.63, | |
| "step": 210, | |
| "token_acc": 0.8046837508398119, | |
| "train_speed(iter/s)": 0.009789 | |
| }, | |
| { | |
| "epoch": 0.8380024360535931, | |
| "grad_norm": 0.3188154101371765, | |
| "learning_rate": 6.861237928494579e-06, | |
| "loss": 0.5521413803100585, | |
| "memory(GiB)": 77.63, | |
| "step": 215, | |
| "token_acc": 0.8276072283910298, | |
| "train_speed(iter/s)": 0.009786 | |
| }, | |
| { | |
| "epoch": 0.8574908647990256, | |
| "grad_norm": 0.3368200361728668, | |
| "learning_rate": 5.318367983829392e-06, | |
| "loss": 0.5954047679901123, | |
| "memory(GiB)": 77.63, | |
| "step": 220, | |
| "token_acc": 0.8130686517783292, | |
| "train_speed(iter/s)": 0.009788 | |
| }, | |
| { | |
| "epoch": 0.876979293544458, | |
| "grad_norm": 0.3377918601036072, | |
| "learning_rate": 3.962138157783085e-06, | |
| "loss": 0.6312428951263428, | |
| "memory(GiB)": 77.63, | |
| "step": 225, | |
| "token_acc": 0.8009544725061238, | |
| "train_speed(iter/s)": 0.009783 | |
| }, | |
| { | |
| "epoch": 0.8964677222898904, | |
| "grad_norm": 0.34851112961769104, | |
| "learning_rate": 2.798213572335001e-06, | |
| "loss": 0.5601920127868653, | |
| "memory(GiB)": 77.63, | |
| "step": 230, | |
| "token_acc": 0.8258236434108527, | |
| "train_speed(iter/s)": 0.009778 | |
| }, | |
| { | |
| "epoch": 0.9159561510353228, | |
| "grad_norm": 0.3354482650756836, | |
| "learning_rate": 1.8314560692059835e-06, | |
| "loss": 0.5771200656890869, | |
| "memory(GiB)": 77.63, | |
| "step": 235, | |
| "token_acc": 0.8201089968594125, | |
| "train_speed(iter/s)": 0.009786 | |
| }, | |
| { | |
| "epoch": 0.9354445797807551, | |
| "grad_norm": 0.29914259910583496, | |
| "learning_rate": 1.0659039014077944e-06, | |
| "loss": 0.6100361347198486, | |
| "memory(GiB)": 77.63, | |
| "step": 240, | |
| "token_acc": 0.8085165989634403, | |
| "train_speed(iter/s)": 0.009798 | |
| }, | |
| { | |
| "epoch": 0.9549330085261876, | |
| "grad_norm": 0.36409541964530945, | |
| "learning_rate": 5.047548650136513e-07, | |
| "loss": 0.6194799423217774, | |
| "memory(GiB)": 77.63, | |
| "step": 245, | |
| "token_acc": 0.807660813392776, | |
| "train_speed(iter/s)": 0.009801 | |
| }, | |
| { | |
| "epoch": 0.97442143727162, | |
| "grad_norm": 0.40651243925094604, | |
| "learning_rate": 1.503529416103988e-07, | |
| "loss": 0.5671232223510743, | |
| "memory(GiB)": 77.63, | |
| "step": 250, | |
| "token_acc": 0.8216745683871545, | |
| "train_speed(iter/s)": 0.009809 | |
| }, | |
| { | |
| "epoch": 0.9939098660170523, | |
| "grad_norm": 0.3596270978450775, | |
| "learning_rate": 4.178507228136397e-09, | |
| "loss": 0.6255767822265625, | |
| "memory(GiB)": 77.63, | |
| "step": 255, | |
| "token_acc": 0.8013068013068013, | |
| "train_speed(iter/s)": 0.009788 | |
| }, | |
| { | |
| "epoch": 0.9978075517661389, | |
| "eval_loss": 0.5726350545883179, | |
| "eval_runtime": 14.0103, | |
| "eval_samples_per_second": 0.286, | |
| "eval_steps_per_second": 0.143, | |
| "step": 256 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 256, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3517329026916086e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |