| { | |
| "best_metric": 0.9115511551155115, | |
| "best_model_checkpoint": "food-image-classification/checkpoint-35000", | |
| "epoch": 54.91024287222809, | |
| "eval_steps": 1000, | |
| "global_step": 52000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.0559662090813095e-06, | |
| "loss": 4.6112, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_accuracy": 0.034851485148514855, | |
| "eval_loss": 4.575930118560791, | |
| "eval_runtime": 157.053, | |
| "eval_samples_per_second": 96.464, | |
| "eval_steps_per_second": 6.03, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 2.111932418162619e-06, | |
| "loss": 4.4899, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_accuracy": 0.3103630363036304, | |
| "eval_loss": 4.3788862228393555, | |
| "eval_runtime": 154.3948, | |
| "eval_samples_per_second": 98.125, | |
| "eval_steps_per_second": 6.134, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 3.167898627243928e-06, | |
| "loss": 4.2111, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "eval_accuracy": 0.5498349834983498, | |
| "eval_loss": 4.030922889709473, | |
| "eval_runtime": 155.2257, | |
| "eval_samples_per_second": 97.6, | |
| "eval_steps_per_second": 6.101, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 4.223864836325238e-06, | |
| "loss": 3.8257, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "eval_accuracy": 0.6111551155115511, | |
| "eval_loss": 3.634243965148926, | |
| "eval_runtime": 156.2293, | |
| "eval_samples_per_second": 96.973, | |
| "eval_steps_per_second": 6.062, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 5.279831045406547e-06, | |
| "loss": 3.4182, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "eval_accuracy": 0.6514851485148515, | |
| "eval_loss": 3.225186586380005, | |
| "eval_runtime": 154.6911, | |
| "eval_samples_per_second": 97.937, | |
| "eval_steps_per_second": 6.122, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 6.335797254487856e-06, | |
| "loss": 2.9962, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "eval_accuracy": 0.687062706270627, | |
| "eval_loss": 2.805878162384033, | |
| "eval_runtime": 154.6826, | |
| "eval_samples_per_second": 97.943, | |
| "eval_steps_per_second": 6.122, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 7.3917634635691666e-06, | |
| "loss": 2.5605, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "eval_accuracy": 0.7070627062706271, | |
| "eval_loss": 2.382246494293213, | |
| "eval_runtime": 155.5967, | |
| "eval_samples_per_second": 97.367, | |
| "eval_steps_per_second": 6.086, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "learning_rate": 8.447729672650476e-06, | |
| "loss": 2.1397, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "eval_accuracy": 0.7335973597359736, | |
| "eval_loss": 1.975380539894104, | |
| "eval_runtime": 156.8793, | |
| "eval_samples_per_second": 96.571, | |
| "eval_steps_per_second": 6.036, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 9.503695881731786e-06, | |
| "loss": 1.7383, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "eval_accuracy": 0.7576897689768977, | |
| "eval_loss": 1.608676552772522, | |
| "eval_runtime": 154.5661, | |
| "eval_samples_per_second": 98.016, | |
| "eval_steps_per_second": 6.127, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "learning_rate": 1.0559662090813093e-05, | |
| "loss": 1.3909, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "eval_accuracy": 0.7758415841584159, | |
| "eval_loss": 1.3203929662704468, | |
| "eval_runtime": 155.6061, | |
| "eval_samples_per_second": 97.361, | |
| "eval_steps_per_second": 6.086, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 11.62, | |
| "learning_rate": 1.1615628299894405e-05, | |
| "loss": 1.1223, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 11.62, | |
| "eval_accuracy": 0.7831683168316832, | |
| "eval_loss": 1.1283260583877563, | |
| "eval_runtime": 153.8564, | |
| "eval_samples_per_second": 98.468, | |
| "eval_steps_per_second": 6.155, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 12.67, | |
| "learning_rate": 1.2671594508975712e-05, | |
| "loss": 0.9312, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 12.67, | |
| "eval_accuracy": 0.7946534653465347, | |
| "eval_loss": 0.9766868352890015, | |
| "eval_runtime": 155.0819, | |
| "eval_samples_per_second": 97.69, | |
| "eval_steps_per_second": 6.106, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 13.73, | |
| "learning_rate": 1.3727560718057022e-05, | |
| "loss": 0.7817, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 13.73, | |
| "eval_accuracy": 0.7984158415841585, | |
| "eval_loss": 0.8917332887649536, | |
| "eval_runtime": 154.0819, | |
| "eval_samples_per_second": 98.324, | |
| "eval_steps_per_second": 6.146, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "learning_rate": 1.4783526927138333e-05, | |
| "loss": 0.697, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "eval_accuracy": 0.8043564356435644, | |
| "eval_loss": 0.8234559297561646, | |
| "eval_runtime": 153.6348, | |
| "eval_samples_per_second": 98.61, | |
| "eval_steps_per_second": 6.164, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 15.84, | |
| "learning_rate": 1.583949313621964e-05, | |
| "loss": 0.6281, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 15.84, | |
| "eval_accuracy": 0.8075247524752476, | |
| "eval_loss": 0.7959182858467102, | |
| "eval_runtime": 153.9402, | |
| "eval_samples_per_second": 98.415, | |
| "eval_steps_per_second": 6.152, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 16.9, | |
| "learning_rate": 1.6895459345300952e-05, | |
| "loss": 0.5659, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 16.9, | |
| "eval_accuracy": 0.8158415841584158, | |
| "eval_loss": 0.7547946572303772, | |
| "eval_runtime": 155.6477, | |
| "eval_samples_per_second": 97.335, | |
| "eval_steps_per_second": 6.084, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 17.95, | |
| "learning_rate": 1.795142555438226e-05, | |
| "loss": 0.5198, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 17.95, | |
| "eval_accuracy": 0.8104950495049505, | |
| "eval_loss": 0.7739244103431702, | |
| "eval_runtime": 155.4026, | |
| "eval_samples_per_second": 97.489, | |
| "eval_steps_per_second": 6.094, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 19.01, | |
| "learning_rate": 1.900739176346357e-05, | |
| "loss": 0.4951, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 19.01, | |
| "eval_accuracy": 0.8151815181518152, | |
| "eval_loss": 0.7517885565757751, | |
| "eval_runtime": 153.2443, | |
| "eval_samples_per_second": 98.862, | |
| "eval_steps_per_second": 6.18, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 20.06, | |
| "learning_rate": 2.006335797254488e-05, | |
| "loss": 0.4656, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 20.06, | |
| "eval_accuracy": 0.8158415841584158, | |
| "eval_loss": 0.7491214275360107, | |
| "eval_runtime": 155.8688, | |
| "eval_samples_per_second": 97.197, | |
| "eval_steps_per_second": 6.076, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 21.12, | |
| "learning_rate": 2.1119324181626187e-05, | |
| "loss": 0.4385, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 21.12, | |
| "eval_accuracy": 0.8151815181518152, | |
| "eval_loss": 0.7403990030288696, | |
| "eval_runtime": 154.5972, | |
| "eval_samples_per_second": 97.997, | |
| "eval_steps_per_second": 6.126, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 22.18, | |
| "learning_rate": 2.21752903907075e-05, | |
| "loss": 0.4148, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 22.18, | |
| "eval_accuracy": 0.8112871287128713, | |
| "eval_loss": 0.7465632557868958, | |
| "eval_runtime": 155.3899, | |
| "eval_samples_per_second": 97.497, | |
| "eval_steps_per_second": 6.094, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 23.23, | |
| "learning_rate": 2.323125659978881e-05, | |
| "loss": 0.3926, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 23.23, | |
| "eval_accuracy": 0.8201980198019801, | |
| "eval_loss": 0.7243059873580933, | |
| "eval_runtime": 153.8982, | |
| "eval_samples_per_second": 98.442, | |
| "eval_steps_per_second": 6.153, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 24.29, | |
| "learning_rate": 2.4287222808870115e-05, | |
| "loss": 0.3785, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 24.29, | |
| "eval_accuracy": 0.8103630363036304, | |
| "eval_loss": 0.7593609690666199, | |
| "eval_runtime": 155.7313, | |
| "eval_samples_per_second": 97.283, | |
| "eval_steps_per_second": 6.081, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 25.34, | |
| "learning_rate": 2.5343189017951425e-05, | |
| "loss": 0.3574, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 25.34, | |
| "eval_accuracy": 0.815973597359736, | |
| "eval_loss": 0.7465734481811523, | |
| "eval_runtime": 154.8193, | |
| "eval_samples_per_second": 97.856, | |
| "eval_steps_per_second": 6.117, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 26.4, | |
| "learning_rate": 2.6399155227032734e-05, | |
| "loss": 0.3438, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 26.4, | |
| "eval_accuracy": 0.8155775577557756, | |
| "eval_loss": 0.7651433944702148, | |
| "eval_runtime": 154.1129, | |
| "eval_samples_per_second": 98.305, | |
| "eval_steps_per_second": 6.145, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 27.46, | |
| "learning_rate": 2.7455121436114044e-05, | |
| "loss": 0.3274, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 27.46, | |
| "eval_accuracy": 0.8148514851485148, | |
| "eval_loss": 0.760901927947998, | |
| "eval_runtime": 155.6779, | |
| "eval_samples_per_second": 97.316, | |
| "eval_steps_per_second": 6.083, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 28.51, | |
| "learning_rate": 1.0559662090813095e-06, | |
| "loss": 0.3793, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 28.51, | |
| "eval_accuracy": 0.9097029702970297, | |
| "eval_loss": 0.3658629059791565, | |
| "eval_runtime": 156.004, | |
| "eval_samples_per_second": 97.113, | |
| "eval_steps_per_second": 6.07, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 29.57, | |
| "learning_rate": 2.111932418162619e-06, | |
| "loss": 0.355, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 29.57, | |
| "eval_accuracy": 0.9113531353135313, | |
| "eval_loss": 0.35932043194770813, | |
| "eval_runtime": 156.9177, | |
| "eval_samples_per_second": 96.547, | |
| "eval_steps_per_second": 6.035, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 30.62, | |
| "learning_rate": 3.167898627243928e-06, | |
| "loss": 0.3494, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 30.62, | |
| "eval_accuracy": 0.9111551155115512, | |
| "eval_loss": 0.3594682812690735, | |
| "eval_runtime": 154.85, | |
| "eval_samples_per_second": 97.837, | |
| "eval_steps_per_second": 6.116, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 31.68, | |
| "learning_rate": 4.223864836325238e-06, | |
| "loss": 0.3297, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 31.68, | |
| "eval_accuracy": 0.9101650165016502, | |
| "eval_loss": 0.35688260197639465, | |
| "eval_runtime": 155.9707, | |
| "eval_samples_per_second": 97.134, | |
| "eval_steps_per_second": 6.072, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 32.73, | |
| "learning_rate": 5.279831045406547e-06, | |
| "loss": 0.3252, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 32.73, | |
| "eval_accuracy": 0.9083168316831683, | |
| "eval_loss": 0.3627123236656189, | |
| "eval_runtime": 156.6773, | |
| "eval_samples_per_second": 96.696, | |
| "eval_steps_per_second": 6.044, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 33.79, | |
| "learning_rate": 6.335797254487856e-06, | |
| "loss": 0.3189, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 33.79, | |
| "eval_accuracy": 0.9108910891089109, | |
| "eval_loss": 0.35579344630241394, | |
| "eval_runtime": 155.255, | |
| "eval_samples_per_second": 97.581, | |
| "eval_steps_per_second": 6.1, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 34.85, | |
| "learning_rate": 7.3917634635691666e-06, | |
| "loss": 0.3064, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 34.85, | |
| "eval_accuracy": 0.9067326732673268, | |
| "eval_loss": 0.3623407185077667, | |
| "eval_runtime": 155.6758, | |
| "eval_samples_per_second": 97.318, | |
| "eval_steps_per_second": 6.083, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 35.9, | |
| "learning_rate": 8.447729672650476e-06, | |
| "loss": 0.3, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 35.9, | |
| "eval_accuracy": 0.9073927392739274, | |
| "eval_loss": 0.36413270235061646, | |
| "eval_runtime": 156.8691, | |
| "eval_samples_per_second": 96.577, | |
| "eval_steps_per_second": 6.037, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 36.96, | |
| "learning_rate": 9.503695881731786e-06, | |
| "loss": 0.289, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 36.96, | |
| "eval_accuracy": 0.9115511551155115, | |
| "eval_loss": 0.34877872467041016, | |
| "eval_runtime": 156.4541, | |
| "eval_samples_per_second": 96.834, | |
| "eval_steps_per_second": 6.053, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 38.01, | |
| "learning_rate": 1.0559662090813093e-05, | |
| "loss": 0.2811, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 38.01, | |
| "eval_accuracy": 0.90996699669967, | |
| "eval_loss": 0.3593011796474457, | |
| "eval_runtime": 155.6871, | |
| "eval_samples_per_second": 97.311, | |
| "eval_steps_per_second": 6.083, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 39.07, | |
| "learning_rate": 1.1615628299894405e-05, | |
| "loss": 0.2674, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 39.07, | |
| "eval_accuracy": 0.9037623762376238, | |
| "eval_loss": 0.37522387504577637, | |
| "eval_runtime": 155.8581, | |
| "eval_samples_per_second": 97.204, | |
| "eval_steps_per_second": 6.076, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 40.13, | |
| "learning_rate": 1.2671594508975712e-05, | |
| "loss": 0.2644, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 40.13, | |
| "eval_accuracy": 0.9054785478547854, | |
| "eval_loss": 0.3814030885696411, | |
| "eval_runtime": 155.1819, | |
| "eval_samples_per_second": 97.627, | |
| "eval_steps_per_second": 6.103, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 41.18, | |
| "learning_rate": 1.3727560718057022e-05, | |
| "loss": 0.2585, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 41.18, | |
| "eval_accuracy": 0.9052805280528052, | |
| "eval_loss": 0.3803286850452423, | |
| "eval_runtime": 154.4972, | |
| "eval_samples_per_second": 98.06, | |
| "eval_steps_per_second": 6.13, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 42.24, | |
| "learning_rate": 1.4783526927138333e-05, | |
| "loss": 0.2581, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 42.24, | |
| "eval_accuracy": 0.9038283828382838, | |
| "eval_loss": 0.37817618250846863, | |
| "eval_runtime": 156.2669, | |
| "eval_samples_per_second": 96.95, | |
| "eval_steps_per_second": 6.06, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 43.29, | |
| "learning_rate": 1.583949313621964e-05, | |
| "loss": 0.2516, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 43.29, | |
| "eval_accuracy": 0.9011221122112211, | |
| "eval_loss": 0.39773184061050415, | |
| "eval_runtime": 155.7863, | |
| "eval_samples_per_second": 97.249, | |
| "eval_steps_per_second": 6.079, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 44.35, | |
| "learning_rate": 1.6895459345300952e-05, | |
| "loss": 0.2431, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 44.35, | |
| "eval_accuracy": 0.8992079207920792, | |
| "eval_loss": 0.40550053119659424, | |
| "eval_runtime": 154.9661, | |
| "eval_samples_per_second": 97.763, | |
| "eval_steps_per_second": 6.111, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 45.41, | |
| "learning_rate": 1.795142555438226e-05, | |
| "loss": 0.2429, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 45.41, | |
| "eval_accuracy": 0.8975577557755775, | |
| "eval_loss": 0.4172586500644684, | |
| "eval_runtime": 154.5817, | |
| "eval_samples_per_second": 98.006, | |
| "eval_steps_per_second": 6.126, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 46.46, | |
| "learning_rate": 1.900739176346357e-05, | |
| "loss": 0.2406, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 46.46, | |
| "eval_accuracy": 0.893993399339934, | |
| "eval_loss": 0.4206344783306122, | |
| "eval_runtime": 156.2155, | |
| "eval_samples_per_second": 96.981, | |
| "eval_steps_per_second": 6.062, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 47.52, | |
| "learning_rate": 2.006335797254488e-05, | |
| "loss": 0.2351, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 47.52, | |
| "eval_accuracy": 0.8926732673267327, | |
| "eval_loss": 0.4330624043941498, | |
| "eval_runtime": 154.5274, | |
| "eval_samples_per_second": 98.041, | |
| "eval_steps_per_second": 6.128, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 48.57, | |
| "learning_rate": 2.1119324181626187e-05, | |
| "loss": 0.2333, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 48.57, | |
| "eval_accuracy": 0.8938613861386139, | |
| "eval_loss": 0.43345457315444946, | |
| "eval_runtime": 155.962, | |
| "eval_samples_per_second": 97.139, | |
| "eval_steps_per_second": 6.072, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 49.63, | |
| "learning_rate": 2.21752903907075e-05, | |
| "loss": 0.2231, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 49.63, | |
| "eval_accuracy": 0.8863366336633663, | |
| "eval_loss": 0.45071929693222046, | |
| "eval_runtime": 156.5229, | |
| "eval_samples_per_second": 96.791, | |
| "eval_steps_per_second": 6.05, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 50.69, | |
| "learning_rate": 2.323125659978881e-05, | |
| "loss": 0.2247, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 50.69, | |
| "eval_accuracy": 0.8875907590759076, | |
| "eval_loss": 0.4481562674045563, | |
| "eval_runtime": 155.8513, | |
| "eval_samples_per_second": 97.208, | |
| "eval_steps_per_second": 6.076, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 51.74, | |
| "learning_rate": 2.4287222808870115e-05, | |
| "loss": 0.2201, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 51.74, | |
| "eval_accuracy": 0.8867326732673267, | |
| "eval_loss": 0.45674923062324524, | |
| "eval_runtime": 154.5848, | |
| "eval_samples_per_second": 98.004, | |
| "eval_steps_per_second": 6.126, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 52.8, | |
| "learning_rate": 2.5343189017951425e-05, | |
| "loss": 0.2166, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 52.8, | |
| "eval_accuracy": 0.8831683168316832, | |
| "eval_loss": 0.460601806640625, | |
| "eval_runtime": 156.293, | |
| "eval_samples_per_second": 96.933, | |
| "eval_steps_per_second": 6.059, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 53.85, | |
| "learning_rate": 2.6399155227032734e-05, | |
| "loss": 0.2174, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 53.85, | |
| "eval_accuracy": 0.8804620462046204, | |
| "eval_loss": 0.4750025272369385, | |
| "eval_runtime": 155.6344, | |
| "eval_samples_per_second": 97.344, | |
| "eval_steps_per_second": 6.085, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 54.91, | |
| "learning_rate": 2.7455121436114044e-05, | |
| "loss": 0.2164, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 54.91, | |
| "eval_accuracy": 0.8831023102310231, | |
| "eval_loss": 0.4645076394081116, | |
| "eval_runtime": 156.6057, | |
| "eval_samples_per_second": 96.74, | |
| "eval_steps_per_second": 6.047, | |
| "step": 52000 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 473500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 500, | |
| "save_steps": 1000, | |
| "total_flos": 2.5808866542217573e+20, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |