| { |
| "best_global_step": 70, |
| "best_metric": 0.08522226, |
| "best_model_checkpoint": "/fsx-neo/dedicated-fsx-data-repo-neo-us-east-1/kayleexl/tree_reasoning/logical-reasoning/ms-swift/output/v2-20260217-151608/checkpoint-70", |
| "epoch": 1.9628318584070796, |
| "eval_steps": 50, |
| "global_step": 70, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02831858407079646, |
| "grad_norm": 5.061161041259766, |
| "learning_rate": 2.5e-05, |
| "loss": 1.2367206811904907, |
| "memory(GiB)": 124.74, |
| "step": 1, |
| "token_acc": 0.8723252496433667, |
| "train_speed(iter/s)": 0.002208 |
| }, |
| { |
| "epoch": 0.1415929203539823, |
| "grad_norm": 2.7257096767425537, |
| "learning_rate": 9.99433669591504e-05, |
| "loss": 1.09165358543396, |
| "memory(GiB)": 124.93, |
| "step": 5, |
| "token_acc": 0.8669217247440802, |
| "train_speed(iter/s)": 0.002356 |
| }, |
| { |
| "epoch": 0.2831858407079646, |
| "grad_norm": 0.3928062915802002, |
| "learning_rate": 9.797464868072488e-05, |
| "loss": 0.29025247097015383, |
| "memory(GiB)": 158.54, |
| "step": 10, |
| "token_acc": 0.9363323320389773, |
| "train_speed(iter/s)": 0.002404 |
| }, |
| { |
| "epoch": 0.4247787610619469, |
| "grad_norm": 0.2691526412963867, |
| "learning_rate": 9.330127018922194e-05, |
| "loss": 0.1598877191543579, |
| "memory(GiB)": 158.54, |
| "step": 15, |
| "token_acc": 0.9668136632035279, |
| "train_speed(iter/s)": 0.00243 |
| }, |
| { |
| "epoch": 0.5663716814159292, |
| "grad_norm": 0.21604329347610474, |
| "learning_rate": 8.618670190525352e-05, |
| "loss": 0.11259323358535767, |
| "memory(GiB)": 158.55, |
| "step": 20, |
| "token_acc": 0.973859138098439, |
| "train_speed(iter/s)": 0.002451 |
| }, |
| { |
| "epoch": 0.7079646017699115, |
| "grad_norm": 0.23480446636676788, |
| "learning_rate": 7.703204087277988e-05, |
| "loss": 0.12505736351013183, |
| "memory(GiB)": 158.55, |
| "step": 25, |
| "token_acc": 0.9689548639891493, |
| "train_speed(iter/s)": 0.002465 |
| }, |
| { |
| "epoch": 0.8495575221238938, |
| "grad_norm": 0.24651798605918884, |
| "learning_rate": 6.635339816587109e-05, |
| "loss": 0.08011133074760438, |
| "memory(GiB)": 158.55, |
| "step": 30, |
| "token_acc": 0.9791823804495399, |
| "train_speed(iter/s)": 0.002473 |
| }, |
| { |
| "epoch": 0.9911504424778761, |
| "grad_norm": 0.17473125457763672, |
| "learning_rate": 5.475280216520913e-05, |
| "loss": 0.0713156819343567, |
| "memory(GiB)": 158.55, |
| "step": 35, |
| "token_acc": 0.9800586032882956, |
| "train_speed(iter/s)": 0.002476 |
| }, |
| { |
| "epoch": 1.1132743362831858, |
| "grad_norm": 0.2975701093673706, |
| "learning_rate": 4.288425808633575e-05, |
| "loss": 0.08693617582321167, |
| "memory(GiB)": 158.55, |
| "step": 40, |
| "token_acc": 0.9789600758816935, |
| "train_speed(iter/s)": 0.002523 |
| }, |
| { |
| "epoch": 1.2548672566371681, |
| "grad_norm": 0.39325493574142456, |
| "learning_rate": 3.141687721698363e-05, |
| "loss": 0.10126246213912964, |
| "memory(GiB)": 158.55, |
| "step": 45, |
| "token_acc": 0.9747304582210242, |
| "train_speed(iter/s)": 0.002509 |
| }, |
| { |
| "epoch": 1.3964601769911504, |
| "grad_norm": 0.10699591040611267, |
| "learning_rate": 2.09971545214401e-05, |
| "loss": 0.0974161148071289, |
| "memory(GiB)": 158.55, |
| "step": 50, |
| "token_acc": 0.9751442521083, |
| "train_speed(iter/s)": 0.002503 |
| }, |
| { |
| "epoch": 1.3964601769911504, |
| "eval_loss": 0.08729944378137589, |
| "eval_runtime": 33.267, |
| "eval_samples_per_second": 0.15, |
| "eval_steps_per_second": 0.15, |
| "eval_token_acc": 0.97632058287796, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.5380530973451327, |
| "grad_norm": 0.1755068004131317, |
| "learning_rate": 1.2212521282287092e-05, |
| "loss": 0.06404162645339966, |
| "memory(GiB)": 158.55, |
| "step": 55, |
| "token_acc": 0.9836145586389625, |
| "train_speed(iter/s)": 0.002493 |
| }, |
| { |
| "epoch": 1.679646017699115, |
| "grad_norm": 0.3299904763698578, |
| "learning_rate": 5.558227567253832e-06, |
| "loss": 0.08525026440620423, |
| "memory(GiB)": 158.55, |
| "step": 60, |
| "token_acc": 0.9771212972625832, |
| "train_speed(iter/s)": 0.002487 |
| }, |
| { |
| "epoch": 1.8212389380530973, |
| "grad_norm": 0.08729847520589828, |
| "learning_rate": 1.4094215838229176e-06, |
| "loss": 0.08384163379669189, |
| "memory(GiB)": 158.55, |
| "step": 65, |
| "token_acc": 0.9795660573636499, |
| "train_speed(iter/s)": 0.002484 |
| }, |
| { |
| "epoch": 1.9628318584070796, |
| "grad_norm": 0.1614108830690384, |
| "learning_rate": 0.0, |
| "loss": 0.07610564231872559, |
| "memory(GiB)": 158.55, |
| "step": 70, |
| "token_acc": 0.979417594145449, |
| "train_speed(iter/s)": 0.002476 |
| }, |
| { |
| "epoch": 1.9628318584070796, |
| "eval_loss": 0.0852222591638565, |
| "eval_runtime": 34.8529, |
| "eval_samples_per_second": 0.143, |
| "eval_steps_per_second": 0.143, |
| "eval_token_acc": 0.97632058287796, |
| "step": 70 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 70, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.662882357433631e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|