| { | |
| "best_metric": 0.00030898803379386663, | |
| "best_model_checkpoint": "AlexWang99/byt5_add/checkpoint-1275", | |
| "epoch": 51.0, | |
| "eval_steps": 500, | |
| "global_step": 1275, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.747314691543579, | |
| "eval_runtime": 11.1213, | |
| "eval_samples_per_second": 899.172, | |
| "eval_steps_per_second": 1.169, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.6477937698364258, | |
| "eval_runtime": 10.7967, | |
| "eval_samples_per_second": 926.209, | |
| "eval_steps_per_second": 1.204, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.5999796390533447, | |
| "eval_runtime": 10.8098, | |
| "eval_samples_per_second": 925.089, | |
| "eval_steps_per_second": 1.203, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.4885144233703613, | |
| "eval_runtime": 10.8417, | |
| "eval_samples_per_second": 922.367, | |
| "eval_steps_per_second": 1.199, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.3953396081924438, | |
| "eval_runtime": 10.8419, | |
| "eval_samples_per_second": 922.345, | |
| "eval_steps_per_second": 1.199, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.2306207418441772, | |
| "eval_runtime": 10.8327, | |
| "eval_samples_per_second": 923.131, | |
| "eval_steps_per_second": 1.2, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.0172127485275269, | |
| "eval_runtime": 10.8404, | |
| "eval_samples_per_second": 922.478, | |
| "eval_steps_per_second": 1.199, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.7508996725082397, | |
| "eval_runtime": 10.867, | |
| "eval_samples_per_second": 920.221, | |
| "eval_steps_per_second": 1.196, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.5204245448112488, | |
| "eval_runtime": 10.837, | |
| "eval_samples_per_second": 922.761, | |
| "eval_steps_per_second": 1.2, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.3563512861728668, | |
| "eval_runtime": 11.004, | |
| "eval_samples_per_second": 908.763, | |
| "eval_steps_per_second": 1.181, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 0.3062296211719513, | |
| "eval_runtime": 10.8369, | |
| "eval_samples_per_second": 922.772, | |
| "eval_steps_per_second": 1.2, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 0.23057429492473602, | |
| "eval_runtime": 10.799, | |
| "eval_samples_per_second": 926.015, | |
| "eval_steps_per_second": 1.204, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 0.17026692628860474, | |
| "eval_runtime": 10.8267, | |
| "eval_samples_per_second": 923.643, | |
| "eval_steps_per_second": 1.201, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 0.14094401895999908, | |
| "eval_runtime": 10.8171, | |
| "eval_samples_per_second": 924.461, | |
| "eval_steps_per_second": 1.202, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 0.11562483012676239, | |
| "eval_runtime": 10.8544, | |
| "eval_samples_per_second": 921.286, | |
| "eval_steps_per_second": 1.198, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 0.1076672226190567, | |
| "eval_runtime": 10.7997, | |
| "eval_samples_per_second": 925.947, | |
| "eval_steps_per_second": 1.204, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 0.07891710102558136, | |
| "eval_runtime": 10.8355, | |
| "eval_samples_per_second": 922.895, | |
| "eval_steps_per_second": 1.2, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 0.07825633883476257, | |
| "eval_runtime": 10.9577, | |
| "eval_samples_per_second": 912.598, | |
| "eval_steps_per_second": 1.186, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 0.053240709006786346, | |
| "eval_runtime": 10.8407, | |
| "eval_samples_per_second": 922.45, | |
| "eval_steps_per_second": 1.199, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.8626, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 0.03896724432706833, | |
| "eval_runtime": 10.8919, | |
| "eval_samples_per_second": 918.117, | |
| "eval_steps_per_second": 1.194, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 0.0326126404106617, | |
| "eval_runtime": 10.9808, | |
| "eval_samples_per_second": 910.682, | |
| "eval_steps_per_second": 1.184, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 0.026844095438718796, | |
| "eval_runtime": 10.8647, | |
| "eval_samples_per_second": 920.415, | |
| "eval_steps_per_second": 1.197, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 0.022708676755428314, | |
| "eval_runtime": 11.1211, | |
| "eval_samples_per_second": 899.191, | |
| "eval_steps_per_second": 1.169, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 0.020555635914206505, | |
| "eval_runtime": 10.8169, | |
| "eval_samples_per_second": 924.48, | |
| "eval_steps_per_second": 1.202, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 0.016072452068328857, | |
| "eval_runtime": 10.8195, | |
| "eval_samples_per_second": 924.261, | |
| "eval_steps_per_second": 1.202, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 0.015775442123413086, | |
| "eval_runtime": 11.0521, | |
| "eval_samples_per_second": 904.809, | |
| "eval_steps_per_second": 1.176, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 0.010050756856799126, | |
| "eval_runtime": 10.96, | |
| "eval_samples_per_second": 912.407, | |
| "eval_steps_per_second": 1.186, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 0.009800990112125874, | |
| "eval_runtime": 10.8085, | |
| "eval_samples_per_second": 925.196, | |
| "eval_steps_per_second": 1.203, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 0.0077048842795193195, | |
| "eval_runtime": 10.9528, | |
| "eval_samples_per_second": 913.005, | |
| "eval_steps_per_second": 1.187, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 0.005685885436832905, | |
| "eval_runtime": 10.9631, | |
| "eval_samples_per_second": 912.147, | |
| "eval_steps_per_second": 1.186, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_loss": 0.006655455566942692, | |
| "eval_runtime": 10.8367, | |
| "eval_samples_per_second": 922.788, | |
| "eval_steps_per_second": 1.2, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_loss": 0.004621443338692188, | |
| "eval_runtime": 10.8165, | |
| "eval_samples_per_second": 924.51, | |
| "eval_steps_per_second": 1.202, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_loss": 0.0033882376737892628, | |
| "eval_runtime": 10.9293, | |
| "eval_samples_per_second": 914.976, | |
| "eval_steps_per_second": 1.189, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_loss": 0.0038037376943975687, | |
| "eval_runtime": 10.7973, | |
| "eval_samples_per_second": 926.155, | |
| "eval_steps_per_second": 1.204, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_loss": 0.003371346276253462, | |
| "eval_runtime": 10.834, | |
| "eval_samples_per_second": 923.021, | |
| "eval_steps_per_second": 1.2, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_loss": 0.0024659824557602406, | |
| "eval_runtime": 10.7902, | |
| "eval_samples_per_second": 926.766, | |
| "eval_steps_per_second": 1.205, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_loss": 0.0022366114426404238, | |
| "eval_runtime": 10.8096, | |
| "eval_samples_per_second": 925.1, | |
| "eval_steps_per_second": 1.203, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_loss": 0.0022026619408279657, | |
| "eval_runtime": 10.8109, | |
| "eval_samples_per_second": 924.992, | |
| "eval_steps_per_second": 1.202, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_loss": 0.0024010157212615013, | |
| "eval_runtime": 11.1034, | |
| "eval_samples_per_second": 900.623, | |
| "eval_steps_per_second": 1.171, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 4e-05, | |
| "loss": 0.0919, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 0.0013342766324058175, | |
| "eval_runtime": 10.7511, | |
| "eval_samples_per_second": 930.139, | |
| "eval_steps_per_second": 1.209, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_loss": 0.0016493805451318622, | |
| "eval_runtime": 10.7987, | |
| "eval_samples_per_second": 926.034, | |
| "eval_steps_per_second": 1.204, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_loss": 0.001088765449821949, | |
| "eval_runtime": 10.8106, | |
| "eval_samples_per_second": 925.017, | |
| "eval_steps_per_second": 1.203, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_loss": 0.0009081660537049174, | |
| "eval_runtime": 10.7945, | |
| "eval_samples_per_second": 926.398, | |
| "eval_steps_per_second": 1.204, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_loss": 0.0007170450408011675, | |
| "eval_runtime": 10.9388, | |
| "eval_samples_per_second": 914.174, | |
| "eval_steps_per_second": 1.188, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_loss": 0.0006850157515145838, | |
| "eval_runtime": 10.8231, | |
| "eval_samples_per_second": 923.948, | |
| "eval_steps_per_second": 1.201, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_loss": 0.0007588361040689051, | |
| "eval_runtime": 10.9442, | |
| "eval_samples_per_second": 913.729, | |
| "eval_steps_per_second": 1.188, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_loss": 0.0007894792361184955, | |
| "eval_runtime": 10.9394, | |
| "eval_samples_per_second": 914.125, | |
| "eval_steps_per_second": 1.188, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_loss": 0.0004850537225138396, | |
| "eval_runtime": 10.8141, | |
| "eval_samples_per_second": 924.722, | |
| "eval_steps_per_second": 1.202, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_loss": 0.0003986251540482044, | |
| "eval_runtime": 10.7964, | |
| "eval_samples_per_second": 926.231, | |
| "eval_steps_per_second": 1.204, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 0.0005350292194634676, | |
| "eval_runtime": 10.9488, | |
| "eval_samples_per_second": 913.343, | |
| "eval_steps_per_second": 1.187, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_loss": 0.00030898803379386663, | |
| "eval_runtime": 10.8156, | |
| "eval_samples_per_second": 924.594, | |
| "eval_steps_per_second": 1.202, | |
| "step": 1275 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 5000, | |
| "num_train_epochs": 200, | |
| "save_steps": 500, | |
| "total_flos": 2.928514277376e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |