| { | |
| "best_metric": 0.8317757009345794, | |
| "best_model_checkpoint": "BEiT-RHS-NDA\\checkpoint-272", | |
| "epoch": 40.0, | |
| "eval_steps": 500, | |
| "global_step": 320, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 0.6851425170898438, | |
| "eval_runtime": 2.6682, | |
| "eval_samples_per_second": 40.103, | |
| "eval_steps_per_second": 2.624, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.125e-05, | |
| "loss": 0.6911, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 0.6720580458641052, | |
| "eval_runtime": 2.054, | |
| "eval_samples_per_second": 52.093, | |
| "eval_steps_per_second": 3.408, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 4.9342105263157894e-05, | |
| "loss": 0.6739, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 0.6504218578338623, | |
| "eval_runtime": 2.4524, | |
| "eval_samples_per_second": 43.631, | |
| "eval_steps_per_second": 2.854, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 4.769736842105263e-05, | |
| "loss": 0.6595, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 0.6432350873947144, | |
| "eval_runtime": 2.1328, | |
| "eval_samples_per_second": 50.169, | |
| "eval_steps_per_second": 3.282, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 4.605263157894737e-05, | |
| "loss": 0.646, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.6822429906542056, | |
| "eval_loss": 0.6316895484924316, | |
| "eval_runtime": 2.4096, | |
| "eval_samples_per_second": 44.405, | |
| "eval_steps_per_second": 2.905, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.6915887850467289, | |
| "eval_loss": 0.617514431476593, | |
| "eval_runtime": 2.0335, | |
| "eval_samples_per_second": 52.617, | |
| "eval_steps_per_second": 3.442, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 4.440789473684211e-05, | |
| "loss": 0.6142, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.6915887850467289, | |
| "eval_loss": 0.6269640326499939, | |
| "eval_runtime": 2.0515, | |
| "eval_samples_per_second": 52.156, | |
| "eval_steps_per_second": 3.412, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 4.2763157894736847e-05, | |
| "loss": 0.608, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6915887850467289, | |
| "eval_loss": 0.6617878675460815, | |
| "eval_runtime": 2.0447, | |
| "eval_samples_per_second": 52.331, | |
| "eval_steps_per_second": 3.424, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 4.111842105263158e-05, | |
| "loss": 0.5927, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.6915887850467289, | |
| "eval_loss": 0.5347260236740112, | |
| "eval_runtime": 2.139, | |
| "eval_samples_per_second": 50.023, | |
| "eval_steps_per_second": 3.273, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 3.9473684210526316e-05, | |
| "loss": 0.5333, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.6448598130841121, | |
| "eval_loss": 0.5743899941444397, | |
| "eval_runtime": 2.1225, | |
| "eval_samples_per_second": 50.412, | |
| "eval_steps_per_second": 3.298, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7476635514018691, | |
| "eval_loss": 0.4974236786365509, | |
| "eval_runtime": 2.059, | |
| "eval_samples_per_second": 51.967, | |
| "eval_steps_per_second": 3.4, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "learning_rate": 3.7828947368421054e-05, | |
| "loss": 0.4987, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.6448598130841121, | |
| "eval_loss": 0.5970269441604614, | |
| "eval_runtime": 2.1492, | |
| "eval_samples_per_second": 49.787, | |
| "eval_steps_per_second": 3.257, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 3.618421052631579e-05, | |
| "loss": 0.5421, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.7383177570093458, | |
| "eval_loss": 0.5137068629264832, | |
| "eval_runtime": 2.4494, | |
| "eval_samples_per_second": 43.684, | |
| "eval_steps_per_second": 2.858, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 13.75, | |
| "learning_rate": 3.4539473684210524e-05, | |
| "loss": 0.4881, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7663551401869159, | |
| "eval_loss": 0.47269827127456665, | |
| "eval_runtime": 2.4181, | |
| "eval_samples_per_second": 44.249, | |
| "eval_steps_per_second": 2.895, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 3.289473684210527e-05, | |
| "loss": 0.4408, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7663551401869159, | |
| "eval_loss": 0.5161357522010803, | |
| "eval_runtime": 2.086, | |
| "eval_samples_per_second": 51.295, | |
| "eval_steps_per_second": 3.356, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.6915887850467289, | |
| "eval_loss": 0.6732468008995056, | |
| "eval_runtime": 2.0757, | |
| "eval_samples_per_second": 51.548, | |
| "eval_steps_per_second": 3.372, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 16.25, | |
| "learning_rate": 3.125e-05, | |
| "loss": 0.4923, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7009345794392523, | |
| "eval_loss": 0.6567767262458801, | |
| "eval_runtime": 2.0563, | |
| "eval_samples_per_second": 52.036, | |
| "eval_steps_per_second": 3.404, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "learning_rate": 2.9605263157894735e-05, | |
| "loss": 0.4135, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7009345794392523, | |
| "eval_loss": 0.665261447429657, | |
| "eval_runtime": 2.454, | |
| "eval_samples_per_second": 43.602, | |
| "eval_steps_per_second": 2.852, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "learning_rate": 2.7960526315789477e-05, | |
| "loss": 0.4308, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.719626168224299, | |
| "eval_loss": 0.6031992435455322, | |
| "eval_runtime": 2.0319, | |
| "eval_samples_per_second": 52.66, | |
| "eval_steps_per_second": 3.445, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 2.6315789473684212e-05, | |
| "loss": 0.3837, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8037383177570093, | |
| "eval_loss": 0.44923561811447144, | |
| "eval_runtime": 2.1355, | |
| "eval_samples_per_second": 50.106, | |
| "eval_steps_per_second": 3.278, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.794392523364486, | |
| "eval_loss": 0.454855740070343, | |
| "eval_runtime": 2.1041, | |
| "eval_samples_per_second": 50.854, | |
| "eval_steps_per_second": 3.327, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 21.25, | |
| "learning_rate": 2.4671052631578947e-05, | |
| "loss": 0.3297, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7663551401869159, | |
| "eval_loss": 0.5525509715080261, | |
| "eval_runtime": 2.3723, | |
| "eval_samples_per_second": 45.104, | |
| "eval_steps_per_second": 2.951, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "learning_rate": 2.3026315789473685e-05, | |
| "loss": 0.3264, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.794392523364486, | |
| "eval_loss": 0.5171772241592407, | |
| "eval_runtime": 2.1842, | |
| "eval_samples_per_second": 48.989, | |
| "eval_steps_per_second": 3.205, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 23.75, | |
| "learning_rate": 2.1381578947368423e-05, | |
| "loss": 0.3487, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7663551401869159, | |
| "eval_loss": 0.5104933381080627, | |
| "eval_runtime": 2.1764, | |
| "eval_samples_per_second": 49.164, | |
| "eval_steps_per_second": 3.216, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 1.9736842105263158e-05, | |
| "loss": 0.2892, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.7757009345794392, | |
| "eval_loss": 0.4565769135951996, | |
| "eval_runtime": 2.1452, | |
| "eval_samples_per_second": 49.879, | |
| "eval_steps_per_second": 3.263, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.794392523364486, | |
| "eval_loss": 0.523303747177124, | |
| "eval_runtime": 2.1458, | |
| "eval_samples_per_second": 49.865, | |
| "eval_steps_per_second": 3.262, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 26.25, | |
| "learning_rate": 1.8092105263157896e-05, | |
| "loss": 0.2505, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.794392523364486, | |
| "eval_loss": 0.4817139804363251, | |
| "eval_runtime": 2.0456, | |
| "eval_samples_per_second": 52.308, | |
| "eval_steps_per_second": 3.422, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 27.5, | |
| "learning_rate": 1.6447368421052635e-05, | |
| "loss": 0.2542, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.8037383177570093, | |
| "eval_loss": 0.5034652948379517, | |
| "eval_runtime": 2.3168, | |
| "eval_samples_per_second": 46.184, | |
| "eval_steps_per_second": 3.021, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 28.75, | |
| "learning_rate": 1.4802631578947368e-05, | |
| "loss": 0.2285, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.794392523364486, | |
| "eval_loss": 0.5281862616539001, | |
| "eval_runtime": 2.1165, | |
| "eval_samples_per_second": 50.556, | |
| "eval_steps_per_second": 3.307, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 1.3157894736842106e-05, | |
| "loss": 0.2053, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.8130841121495327, | |
| "eval_loss": 0.5637905597686768, | |
| "eval_runtime": 2.1396, | |
| "eval_samples_per_second": 50.009, | |
| "eval_steps_per_second": 3.272, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.7570093457943925, | |
| "eval_loss": 0.6189974546432495, | |
| "eval_runtime": 2.2612, | |
| "eval_samples_per_second": 47.32, | |
| "eval_steps_per_second": 3.096, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 31.25, | |
| "learning_rate": 1.1513157894736843e-05, | |
| "loss": 0.2205, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7850467289719626, | |
| "eval_loss": 0.614178478717804, | |
| "eval_runtime": 2.5358, | |
| "eval_samples_per_second": 42.196, | |
| "eval_steps_per_second": 2.761, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 32.5, | |
| "learning_rate": 9.868421052631579e-06, | |
| "loss": 0.2081, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.7850467289719626, | |
| "eval_loss": 0.575212836265564, | |
| "eval_runtime": 2.0662, | |
| "eval_samples_per_second": 51.787, | |
| "eval_steps_per_second": 3.388, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 33.75, | |
| "learning_rate": 8.223684210526317e-06, | |
| "loss": 0.2075, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.8317757009345794, | |
| "eval_loss": 0.5321738719940186, | |
| "eval_runtime": 2.1157, | |
| "eval_samples_per_second": 50.573, | |
| "eval_steps_per_second": 3.309, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 6.578947368421053e-06, | |
| "loss": 0.2286, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.794392523364486, | |
| "eval_loss": 0.5312566161155701, | |
| "eval_runtime": 2.4167, | |
| "eval_samples_per_second": 44.276, | |
| "eval_steps_per_second": 2.897, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.8130841121495327, | |
| "eval_loss": 0.5189207792282104, | |
| "eval_runtime": 2.2397, | |
| "eval_samples_per_second": 47.773, | |
| "eval_steps_per_second": 3.125, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 36.25, | |
| "learning_rate": 4.9342105263157895e-06, | |
| "loss": 0.2008, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.7850467289719626, | |
| "eval_loss": 0.5589626431465149, | |
| "eval_runtime": 2.5429, | |
| "eval_samples_per_second": 42.078, | |
| "eval_steps_per_second": 2.753, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 37.5, | |
| "learning_rate": 3.2894736842105265e-06, | |
| "loss": 0.1884, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.794392523364486, | |
| "eval_loss": 0.5488373041152954, | |
| "eval_runtime": 2.042, | |
| "eval_samples_per_second": 52.399, | |
| "eval_steps_per_second": 3.428, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 38.75, | |
| "learning_rate": 1.6447368421052632e-06, | |
| "loss": 0.1819, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_accuracy": 0.8037383177570093, | |
| "eval_loss": 0.556251585483551, | |
| "eval_runtime": 2.015, | |
| "eval_samples_per_second": 53.102, | |
| "eval_steps_per_second": 3.474, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.1698, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.794392523364486, | |
| "eval_loss": 0.5678603053092957, | |
| "eval_runtime": 2.1445, | |
| "eval_samples_per_second": 49.894, | |
| "eval_steps_per_second": 3.264, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "step": 320, | |
| "total_flos": 1.5429806632629043e+18, | |
| "train_loss": 0.3920826520770788, | |
| "train_runtime": 766.5439, | |
| "train_samples_per_second": 25.987, | |
| "train_steps_per_second": 0.417 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 320, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 500, | |
| "total_flos": 1.5429806632629043e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |