| { |
| "best_global_step": 5000, |
| "best_metric": 0.538067638874054, |
| "best_model_checkpoint": "./banking-sms-parser-v10/checkpoint-5000", |
| "epoch": 1.4814814814814814, |
| "eval_steps": 500, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05925925925925926, |
| "grad_norm": 0.47084560990333557, |
| "learning_rate": 0.0002955777777777778, |
| "loss": 2.0436, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.11851851851851852, |
| "grad_norm": 0.40365636348724365, |
| "learning_rate": 0.0002911333333333333, |
| "loss": 1.3594, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.14814814814814814, |
| "eval_loss": 1.1683876514434814, |
| "eval_runtime": 29.5095, |
| "eval_samples_per_second": 101.662, |
| "eval_steps_per_second": 12.708, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 0.5327613353729248, |
| "learning_rate": 0.0002866888888888889, |
| "loss": 1.2262, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.23703703703703705, |
| "grad_norm": 0.5959504246711731, |
| "learning_rate": 0.0002822444444444444, |
| "loss": 1.1381, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2962962962962963, |
| "grad_norm": 0.4996171295642853, |
| "learning_rate": 0.0002778, |
| "loss": 1.066, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2962962962962963, |
| "eval_loss": 0.9456237554550171, |
| "eval_runtime": 29.4299, |
| "eval_samples_per_second": 101.937, |
| "eval_steps_per_second": 12.742, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 0.47130197286605835, |
| "learning_rate": 0.0002733555555555555, |
| "loss": 1.0067, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.4148148148148148, |
| "grad_norm": 0.693696916103363, |
| "learning_rate": 0.0002689111111111111, |
| "loss": 0.969, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "eval_loss": 0.8003639578819275, |
| "eval_runtime": 29.3553, |
| "eval_samples_per_second": 102.196, |
| "eval_steps_per_second": 12.775, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.4740740740740741, |
| "grad_norm": 0.5786986351013184, |
| "learning_rate": 0.0002644666666666666, |
| "loss": 0.9081, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 0.5209688544273376, |
| "learning_rate": 0.0002600222222222222, |
| "loss": 0.8801, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "grad_norm": 0.7497162818908691, |
| "learning_rate": 0.0002555777777777778, |
| "loss": 0.8183, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "eval_loss": 0.7105592489242554, |
| "eval_runtime": 29.7273, |
| "eval_samples_per_second": 100.917, |
| "eval_steps_per_second": 12.615, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6518518518518519, |
| "grad_norm": 0.7358675599098206, |
| "learning_rate": 0.00025113333333333333, |
| "loss": 0.809, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 0.5220760107040405, |
| "learning_rate": 0.0002466888888888889, |
| "loss": 0.7799, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.7407407407407407, |
| "eval_loss": 0.6572133898735046, |
| "eval_runtime": 29.3661, |
| "eval_samples_per_second": 102.158, |
| "eval_steps_per_second": 12.77, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.7703703703703704, |
| "grad_norm": 0.4607163965702057, |
| "learning_rate": 0.00024224444444444443, |
| "loss": 0.7557, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8296296296296296, |
| "grad_norm": 0.5450932383537292, |
| "learning_rate": 0.00023779999999999998, |
| "loss": 0.7264, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.5149521231651306, |
| "learning_rate": 0.00023335555555555556, |
| "loss": 0.7054, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "eval_loss": 0.6189373731613159, |
| "eval_runtime": 29.6913, |
| "eval_samples_per_second": 101.04, |
| "eval_steps_per_second": 12.63, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9481481481481482, |
| "grad_norm": 0.5345056056976318, |
| "learning_rate": 0.0002289111111111111, |
| "loss": 0.7026, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.0074074074074073, |
| "grad_norm": 0.5385560989379883, |
| "learning_rate": 0.00022446666666666666, |
| "loss": 0.6888, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.037037037037037, |
| "eval_loss": 0.5899662971496582, |
| "eval_runtime": 29.4218, |
| "eval_samples_per_second": 101.965, |
| "eval_steps_per_second": 12.746, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.0666666666666667, |
| "grad_norm": 0.5609071850776672, |
| "learning_rate": 0.0002200222222222222, |
| "loss": 0.6671, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.125925925925926, |
| "grad_norm": 0.5616484880447388, |
| "learning_rate": 0.00021557777777777776, |
| "loss": 0.6636, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.1851851851851851, |
| "grad_norm": 0.5596534609794617, |
| "learning_rate": 0.0002111333333333333, |
| "loss": 0.6515, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.1851851851851851, |
| "eval_loss": 0.5668333172798157, |
| "eval_runtime": 29.3385, |
| "eval_samples_per_second": 102.255, |
| "eval_steps_per_second": 12.782, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.2444444444444445, |
| "grad_norm": 0.5900132656097412, |
| "learning_rate": 0.00020668888888888888, |
| "loss": 0.6409, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.3037037037037038, |
| "grad_norm": 0.6175922751426697, |
| "learning_rate": 0.00020224444444444443, |
| "loss": 0.641, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "eval_loss": 0.5480825304985046, |
| "eval_runtime": 29.457, |
| "eval_samples_per_second": 101.843, |
| "eval_steps_per_second": 12.73, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.362962962962963, |
| "grad_norm": 0.42820265889167786, |
| "learning_rate": 0.00019779999999999998, |
| "loss": 0.6212, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.4222222222222223, |
| "grad_norm": 0.7179502248764038, |
| "learning_rate": 0.00019335555555555553, |
| "loss": 0.6268, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.4814814814814814, |
| "grad_norm": 0.4201723635196686, |
| "learning_rate": 0.00018891111111111108, |
| "loss": 0.6083, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.4814814814814814, |
| "eval_loss": 0.538067638874054, |
| "eval_runtime": 29.3858, |
| "eval_samples_per_second": 102.09, |
| "eval_steps_per_second": 12.761, |
| "step": 5000 |
| } |
| ], |
| "logging_steps": 200, |
| "max_steps": 13500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3328494796800000.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|