| { | |
| "best_metric": 0.65715516, | |
| "best_model_checkpoint": "Qwen2.5-7B-Instruct_lora/checkpoint-128", | |
| "epoch": 0.997564539698003, | |
| "eval_steps": 100, | |
| "global_step": 128, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007793472966390648, | |
| "grad_norm": 0.4758923351764679, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 0.82492995262146, | |
| "memory(GiB)": 77.98, | |
| "step": 1, | |
| "token_acc": 0.7762530413625304, | |
| "train_speed(iter/s)": 0.01272 | |
| }, | |
| { | |
| "epoch": 0.03896736483195324, | |
| "grad_norm": 0.5201897621154785, | |
| "learning_rate": 7.142857142857143e-05, | |
| "loss": 0.9614015817642212, | |
| "memory(GiB)": 77.98, | |
| "step": 5, | |
| "token_acc": 0.7459884860362568, | |
| "train_speed(iter/s)": 0.013169 | |
| }, | |
| { | |
| "epoch": 0.07793472966390648, | |
| "grad_norm": 0.3151409327983856, | |
| "learning_rate": 9.984840253435568e-05, | |
| "loss": 0.8129005432128906, | |
| "memory(GiB)": 77.98, | |
| "step": 10, | |
| "token_acc": 0.7703662699511025, | |
| "train_speed(iter/s)": 0.013385 | |
| }, | |
| { | |
| "epoch": 0.11690209449585971, | |
| "grad_norm": 0.22351491451263428, | |
| "learning_rate": 9.892530053338909e-05, | |
| "loss": 0.8129632949829102, | |
| "memory(GiB)": 78.1, | |
| "step": 15, | |
| "token_acc": 0.7659050029978016, | |
| "train_speed(iter/s)": 0.013457 | |
| }, | |
| { | |
| "epoch": 0.15586945932781296, | |
| "grad_norm": 0.20141544938087463, | |
| "learning_rate": 9.71788333255991e-05, | |
| "loss": 0.737860107421875, | |
| "memory(GiB)": 78.1, | |
| "step": 20, | |
| "token_acc": 0.780184425765231, | |
| "train_speed(iter/s)": 0.013638 | |
| }, | |
| { | |
| "epoch": 0.1948368241597662, | |
| "grad_norm": 0.19052022695541382, | |
| "learning_rate": 9.46383922548932e-05, | |
| "loss": 0.7453306198120118, | |
| "memory(GiB)": 78.1, | |
| "step": 25, | |
| "token_acc": 0.7835497835497836, | |
| "train_speed(iter/s)": 0.013751 | |
| }, | |
| { | |
| "epoch": 0.23380418899171942, | |
| "grad_norm": 0.19930361211299896, | |
| "learning_rate": 9.134673047280645e-05, | |
| "loss": 0.7681457042694092, | |
| "memory(GiB)": 78.1, | |
| "step": 30, | |
| "token_acc": 0.7709214619843775, | |
| "train_speed(iter/s)": 0.013864 | |
| }, | |
| { | |
| "epoch": 0.2727715538236727, | |
| "grad_norm": 0.1565362960100174, | |
| "learning_rate": 8.735924344455732e-05, | |
| "loss": 0.684519624710083, | |
| "memory(GiB)": 78.1, | |
| "step": 35, | |
| "token_acc": 0.7952077263187921, | |
| "train_speed(iter/s)": 0.013849 | |
| }, | |
| { | |
| "epoch": 0.3117389186556259, | |
| "grad_norm": 0.1997351050376892, | |
| "learning_rate": 8.274303669726426e-05, | |
| "loss": 0.7397434711456299, | |
| "memory(GiB)": 78.1, | |
| "step": 40, | |
| "token_acc": 0.7784489429149091, | |
| "train_speed(iter/s)": 0.013865 | |
| }, | |
| { | |
| "epoch": 0.35070628348757915, | |
| "grad_norm": 0.23139981925487518, | |
| "learning_rate": 7.757579649921354e-05, | |
| "loss": 0.7341322422027587, | |
| "memory(GiB)": 78.1, | |
| "step": 45, | |
| "token_acc": 0.7790655206836941, | |
| "train_speed(iter/s)": 0.013866 | |
| }, | |
| { | |
| "epoch": 0.3896736483195324, | |
| "grad_norm": 0.19183649122714996, | |
| "learning_rate": 7.19444824755478e-05, | |
| "loss": 0.7072115421295166, | |
| "memory(GiB)": 78.1, | |
| "step": 50, | |
| "token_acc": 0.7878044975372752, | |
| "train_speed(iter/s)": 0.013846 | |
| }, | |
| { | |
| "epoch": 0.4286410131514856, | |
| "grad_norm": 0.22789426147937775, | |
| "learning_rate": 6.594386416238094e-05, | |
| "loss": 0.7093594074249268, | |
| "memory(GiB)": 78.1, | |
| "step": 55, | |
| "token_acc": 0.7885790362075934, | |
| "train_speed(iter/s)": 0.013798 | |
| }, | |
| { | |
| "epoch": 0.46760837798343885, | |
| "grad_norm": 0.21530242264270782, | |
| "learning_rate": 5.967492612770999e-05, | |
| "loss": 0.6775043964385986, | |
| "memory(GiB)": 78.1, | |
| "step": 60, | |
| "token_acc": 0.7929026953758639, | |
| "train_speed(iter/s)": 0.01371 | |
| }, | |
| { | |
| "epoch": 0.5065757428153921, | |
| "grad_norm": 0.20659631490707397, | |
| "learning_rate": 5.324316849938715e-05, | |
| "loss": 0.7007603645324707, | |
| "memory(GiB)": 78.1, | |
| "step": 65, | |
| "token_acc": 0.787200775176727, | |
| "train_speed(iter/s)": 0.013663 | |
| }, | |
| { | |
| "epoch": 0.5455431076473454, | |
| "grad_norm": 0.2197224497795105, | |
| "learning_rate": 4.675683150061285e-05, | |
| "loss": 0.6781778335571289, | |
| "memory(GiB)": 78.1, | |
| "step": 70, | |
| "token_acc": 0.7972211791212918, | |
| "train_speed(iter/s)": 0.013694 | |
| }, | |
| { | |
| "epoch": 0.5845104724792985, | |
| "grad_norm": 0.24823956191539764, | |
| "learning_rate": 4.032507387229002e-05, | |
| "loss": 0.6829503536224365, | |
| "memory(GiB)": 78.1, | |
| "step": 75, | |
| "token_acc": 0.7936122659873591, | |
| "train_speed(iter/s)": 0.013751 | |
| }, | |
| { | |
| "epoch": 0.6234778373112518, | |
| "grad_norm": 0.20789459347724915, | |
| "learning_rate": 3.4056135837619074e-05, | |
| "loss": 0.7214729309082031, | |
| "memory(GiB)": 78.1, | |
| "step": 80, | |
| "token_acc": 0.7854131534569984, | |
| "train_speed(iter/s)": 0.013775 | |
| }, | |
| { | |
| "epoch": 0.662445202143205, | |
| "grad_norm": 0.22102876007556915, | |
| "learning_rate": 2.805551752445222e-05, | |
| "loss": 0.6610813617706299, | |
| "memory(GiB)": 78.1, | |
| "step": 85, | |
| "token_acc": 0.7997587724196467, | |
| "train_speed(iter/s)": 0.013766 | |
| }, | |
| { | |
| "epoch": 0.7014125669751583, | |
| "grad_norm": 0.24639996886253357, | |
| "learning_rate": 2.2424203500786474e-05, | |
| "loss": 0.6775108814239502, | |
| "memory(GiB)": 78.1, | |
| "step": 90, | |
| "token_acc": 0.7919411605450771, | |
| "train_speed(iter/s)": 0.013768 | |
| }, | |
| { | |
| "epoch": 0.7403799318071116, | |
| "grad_norm": 0.26909467577934265, | |
| "learning_rate": 1.725696330273575e-05, | |
| "loss": 0.7113357543945312, | |
| "memory(GiB)": 78.1, | |
| "step": 95, | |
| "token_acc": 0.7827240947736395, | |
| "train_speed(iter/s)": 0.013806 | |
| }, | |
| { | |
| "epoch": 0.7793472966390648, | |
| "grad_norm": 0.2722283601760864, | |
| "learning_rate": 1.2640756555442684e-05, | |
| "loss": 0.6915526390075684, | |
| "memory(GiB)": 78.1, | |
| "step": 100, | |
| "token_acc": 0.7925359976491331, | |
| "train_speed(iter/s)": 0.013811 | |
| }, | |
| { | |
| "epoch": 0.7793472966390648, | |
| "eval_loss": 0.6591519713401794, | |
| "eval_runtime": 3.54, | |
| "eval_samples_per_second": 1.13, | |
| "eval_steps_per_second": 0.565, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8183146614710181, | |
| "grad_norm": 0.24852623045444489, | |
| "learning_rate": 8.65326952719357e-06, | |
| "loss": 0.6732324600219727, | |
| "memory(GiB)": 78.1, | |
| "step": 105, | |
| "token_acc": 0.7969924812030075, | |
| "train_speed(iter/s)": 0.013826 | |
| }, | |
| { | |
| "epoch": 0.8572820263029712, | |
| "grad_norm": 0.2657414972782135, | |
| "learning_rate": 5.361607745106817e-06, | |
| "loss": 0.6405797481536866, | |
| "memory(GiB)": 78.1, | |
| "step": 110, | |
| "token_acc": 0.8046042213240682, | |
| "train_speed(iter/s)": 0.013822 | |
| }, | |
| { | |
| "epoch": 0.8962493911349245, | |
| "grad_norm": 0.23174554109573364, | |
| "learning_rate": 2.8211666744009047e-06, | |
| "loss": 0.6503153800964355, | |
| "memory(GiB)": 78.1, | |
| "step": 115, | |
| "token_acc": 0.7991786632970802, | |
| "train_speed(iter/s)": 0.013813 | |
| }, | |
| { | |
| "epoch": 0.9352167559668777, | |
| "grad_norm": 0.24391594529151917, | |
| "learning_rate": 1.0746994666109234e-06, | |
| "loss": 0.6571870803833008, | |
| "memory(GiB)": 78.1, | |
| "step": 120, | |
| "token_acc": 0.799995356288746, | |
| "train_speed(iter/s)": 0.013833 | |
| }, | |
| { | |
| "epoch": 0.974184120798831, | |
| "grad_norm": 0.25083476305007935, | |
| "learning_rate": 1.5159746564433198e-07, | |
| "loss": 0.6554246425628663, | |
| "memory(GiB)": 78.1, | |
| "step": 125, | |
| "token_acc": 0.7995940460081191, | |
| "train_speed(iter/s)": 0.013845 | |
| }, | |
| { | |
| "epoch": 0.997564539698003, | |
| "eval_loss": 0.6571551561355591, | |
| "eval_runtime": 3.9227, | |
| "eval_samples_per_second": 1.02, | |
| "eval_steps_per_second": 0.51, | |
| "step": 128 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 128, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.054032501501809e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |