{ "best_metric": 0.65715516, "best_model_checkpoint": "Qwen2.5-7B-Instruct_lora/checkpoint-128", "epoch": 0.997564539698003, "eval_steps": 100, "global_step": 128, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007793472966390648, "grad_norm": 0.4758923351764679, "learning_rate": 1.4285714285714285e-05, "loss": 0.82492995262146, "memory(GiB)": 77.98, "step": 1, "token_acc": 0.7762530413625304, "train_speed(iter/s)": 0.01272 }, { "epoch": 0.03896736483195324, "grad_norm": 0.5201897621154785, "learning_rate": 7.142857142857143e-05, "loss": 0.9614015817642212, "memory(GiB)": 77.98, "step": 5, "token_acc": 0.7459884860362568, "train_speed(iter/s)": 0.013169 }, { "epoch": 0.07793472966390648, "grad_norm": 0.3151409327983856, "learning_rate": 9.984840253435568e-05, "loss": 0.8129005432128906, "memory(GiB)": 77.98, "step": 10, "token_acc": 0.7703662699511025, "train_speed(iter/s)": 0.013385 }, { "epoch": 0.11690209449585971, "grad_norm": 0.22351491451263428, "learning_rate": 9.892530053338909e-05, "loss": 0.8129632949829102, "memory(GiB)": 78.1, "step": 15, "token_acc": 0.7659050029978016, "train_speed(iter/s)": 0.013457 }, { "epoch": 0.15586945932781296, "grad_norm": 0.20141544938087463, "learning_rate": 9.71788333255991e-05, "loss": 0.737860107421875, "memory(GiB)": 78.1, "step": 20, "token_acc": 0.780184425765231, "train_speed(iter/s)": 0.013638 }, { "epoch": 0.1948368241597662, "grad_norm": 0.19052022695541382, "learning_rate": 9.46383922548932e-05, "loss": 0.7453306198120118, "memory(GiB)": 78.1, "step": 25, "token_acc": 0.7835497835497836, "train_speed(iter/s)": 0.013751 }, { "epoch": 0.23380418899171942, "grad_norm": 0.19930361211299896, "learning_rate": 9.134673047280645e-05, "loss": 0.7681457042694092, "memory(GiB)": 78.1, "step": 30, "token_acc": 0.7709214619843775, "train_speed(iter/s)": 0.013864 }, { "epoch": 0.2727715538236727, "grad_norm": 0.1565362960100174, "learning_rate": 8.735924344455732e-05, "loss": 0.684519624710083, "memory(GiB)": 78.1, "step": 35, "token_acc": 0.7952077263187921, "train_speed(iter/s)": 0.013849 }, { "epoch": 0.3117389186556259, "grad_norm": 0.1997351050376892, "learning_rate": 8.274303669726426e-05, "loss": 0.7397434711456299, "memory(GiB)": 78.1, "step": 40, "token_acc": 0.7784489429149091, "train_speed(iter/s)": 0.013865 }, { "epoch": 0.35070628348757915, "grad_norm": 0.23139981925487518, "learning_rate": 7.757579649921354e-05, "loss": 0.7341322422027587, "memory(GiB)": 78.1, "step": 45, "token_acc": 0.7790655206836941, "train_speed(iter/s)": 0.013866 }, { "epoch": 0.3896736483195324, "grad_norm": 0.19183649122714996, "learning_rate": 7.19444824755478e-05, "loss": 0.7072115421295166, "memory(GiB)": 78.1, "step": 50, "token_acc": 0.7878044975372752, "train_speed(iter/s)": 0.013846 }, { "epoch": 0.4286410131514856, "grad_norm": 0.22789426147937775, "learning_rate": 6.594386416238094e-05, "loss": 0.7093594074249268, "memory(GiB)": 78.1, "step": 55, "token_acc": 0.7885790362075934, "train_speed(iter/s)": 0.013798 }, { "epoch": 0.46760837798343885, "grad_norm": 0.21530242264270782, "learning_rate": 5.967492612770999e-05, "loss": 0.6775043964385986, "memory(GiB)": 78.1, "step": 60, "token_acc": 0.7929026953758639, "train_speed(iter/s)": 0.01371 }, { "epoch": 0.5065757428153921, "grad_norm": 0.20659631490707397, "learning_rate": 5.324316849938715e-05, "loss": 0.7007603645324707, "memory(GiB)": 78.1, "step": 65, "token_acc": 0.787200775176727, "train_speed(iter/s)": 0.013663 }, { "epoch": 0.5455431076473454, "grad_norm": 0.2197224497795105, "learning_rate": 4.675683150061285e-05, "loss": 0.6781778335571289, "memory(GiB)": 78.1, "step": 70, "token_acc": 0.7972211791212918, "train_speed(iter/s)": 0.013694 }, { "epoch": 0.5845104724792985, "grad_norm": 0.24823956191539764, "learning_rate": 4.032507387229002e-05, "loss": 0.6829503536224365, "memory(GiB)": 78.1, "step": 75, "token_acc": 0.7936122659873591, "train_speed(iter/s)": 0.013751 }, { "epoch": 0.6234778373112518, "grad_norm": 0.20789459347724915, "learning_rate": 3.4056135837619074e-05, "loss": 0.7214729309082031, "memory(GiB)": 78.1, "step": 80, "token_acc": 0.7854131534569984, "train_speed(iter/s)": 0.013775 }, { "epoch": 0.662445202143205, "grad_norm": 0.22102876007556915, "learning_rate": 2.805551752445222e-05, "loss": 0.6610813617706299, "memory(GiB)": 78.1, "step": 85, "token_acc": 0.7997587724196467, "train_speed(iter/s)": 0.013766 }, { "epoch": 0.7014125669751583, "grad_norm": 0.24639996886253357, "learning_rate": 2.2424203500786474e-05, "loss": 0.6775108814239502, "memory(GiB)": 78.1, "step": 90, "token_acc": 0.7919411605450771, "train_speed(iter/s)": 0.013768 }, { "epoch": 0.7403799318071116, "grad_norm": 0.26909467577934265, "learning_rate": 1.725696330273575e-05, "loss": 0.7113357543945312, "memory(GiB)": 78.1, "step": 95, "token_acc": 0.7827240947736395, "train_speed(iter/s)": 0.013806 }, { "epoch": 0.7793472966390648, "grad_norm": 0.2722283601760864, "learning_rate": 1.2640756555442684e-05, "loss": 0.6915526390075684, "memory(GiB)": 78.1, "step": 100, "token_acc": 0.7925359976491331, "train_speed(iter/s)": 0.013811 }, { "epoch": 0.7793472966390648, "eval_loss": 0.6591519713401794, "eval_runtime": 3.54, "eval_samples_per_second": 1.13, "eval_steps_per_second": 0.565, "step": 100 }, { "epoch": 0.8183146614710181, "grad_norm": 0.24852623045444489, "learning_rate": 8.65326952719357e-06, "loss": 0.6732324600219727, "memory(GiB)": 78.1, "step": 105, "token_acc": 0.7969924812030075, "train_speed(iter/s)": 0.013826 }, { "epoch": 0.8572820263029712, "grad_norm": 0.2657414972782135, "learning_rate": 5.361607745106817e-06, "loss": 0.6405797481536866, "memory(GiB)": 78.1, "step": 110, "token_acc": 0.8046042213240682, "train_speed(iter/s)": 0.013822 }, { "epoch": 0.8962493911349245, "grad_norm": 0.23174554109573364, "learning_rate": 2.8211666744009047e-06, "loss": 0.6503153800964355, "memory(GiB)": 78.1, "step": 115, "token_acc": 0.7991786632970802, "train_speed(iter/s)": 0.013813 }, { "epoch": 0.9352167559668777, "grad_norm": 0.24391594529151917, "learning_rate": 1.0746994666109234e-06, "loss": 0.6571870803833008, "memory(GiB)": 78.1, "step": 120, "token_acc": 0.799995356288746, "train_speed(iter/s)": 0.013833 }, { "epoch": 0.974184120798831, "grad_norm": 0.25083476305007935, "learning_rate": 1.5159746564433198e-07, "loss": 0.6554246425628663, "memory(GiB)": 78.1, "step": 125, "token_acc": 0.7995940460081191, "train_speed(iter/s)": 0.013845 }, { "epoch": 0.997564539698003, "eval_loss": 0.6571551561355591, "eval_runtime": 3.9227, "eval_samples_per_second": 1.02, "eval_steps_per_second": 0.51, "step": 128 } ], "logging_steps": 5, "max_steps": 128, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.054032501501809e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }