{ "best_metric": 0.26617246866226196, "best_model_checkpoint": "./apple/mobilevit-small-garbage/checkpoint-500", "epoch": 5.0, "global_step": 555, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 0.0001963963963963964, "loss": 1.7495, "step": 10 }, { "epoch": 0.18, "learning_rate": 0.00019279279279279282, "loss": 1.6591, "step": 20 }, { "epoch": 0.27, "learning_rate": 0.0001891891891891892, "loss": 1.5418, "step": 30 }, { "epoch": 0.36, "learning_rate": 0.00018594594594594597, "loss": 1.3934, "step": 40 }, { "epoch": 0.45, "learning_rate": 0.00018234234234234235, "loss": 1.2802, "step": 50 }, { "epoch": 0.54, "learning_rate": 0.00017873873873873876, "loss": 1.18, "step": 60 }, { "epoch": 0.63, "learning_rate": 0.00017513513513513516, "loss": 0.9854, "step": 70 }, { "epoch": 0.72, "learning_rate": 0.00017153153153153154, "loss": 0.9527, "step": 80 }, { "epoch": 0.81, "learning_rate": 0.00016792792792792794, "loss": 0.8574, "step": 90 }, { "epoch": 0.9, "learning_rate": 0.00016432432432432435, "loss": 0.8589, "step": 100 }, { "epoch": 0.9, "eval_accuracy": 0.8079268292682927, "eval_loss": 0.661000669002533, "eval_runtime": 4.4994, "eval_samples_per_second": 72.898, "eval_steps_per_second": 9.112, "step": 100 }, { "epoch": 0.99, "learning_rate": 0.0001610810810810811, "loss": 0.7726, "step": 110 }, { "epoch": 1.08, "learning_rate": 0.00015747747747747747, "loss": 0.74, "step": 120 }, { "epoch": 1.17, "learning_rate": 0.00015387387387387388, "loss": 0.6325, "step": 130 }, { "epoch": 1.26, "learning_rate": 0.00015063063063063063, "loss": 0.5792, "step": 140 }, { "epoch": 1.35, "learning_rate": 0.00014702702702702703, "loss": 0.5582, "step": 150 }, { "epoch": 1.44, "learning_rate": 0.00014342342342342344, "loss": 0.5644, "step": 160 }, { "epoch": 1.53, "learning_rate": 0.00013981981981981982, "loss": 0.4776, "step": 170 }, { "epoch": 1.62, "learning_rate": 0.00013621621621621622, "loss": 0.4537, "step": 180 }, { "epoch": 1.71, "learning_rate": 0.00013261261261261263, "loss": 0.4813, "step": 190 }, { "epoch": 1.8, "learning_rate": 0.000129009009009009, "loss": 0.4676, "step": 200 }, { "epoch": 1.8, "eval_accuracy": 0.8597560975609756, "eval_loss": 0.4162694811820984, "eval_runtime": 4.3992, "eval_samples_per_second": 74.559, "eval_steps_per_second": 9.32, "step": 200 }, { "epoch": 1.89, "learning_rate": 0.0001254054054054054, "loss": 0.5374, "step": 210 }, { "epoch": 1.98, "learning_rate": 0.0001218018018018018, "loss": 0.4006, "step": 220 }, { "epoch": 2.07, "learning_rate": 0.00011855855855855858, "loss": 0.4236, "step": 230 }, { "epoch": 2.16, "learning_rate": 0.00011495495495495497, "loss": 0.4494, "step": 240 }, { "epoch": 2.25, "learning_rate": 0.00011135135135135135, "loss": 0.3139, "step": 250 }, { "epoch": 2.34, "learning_rate": 0.00010774774774774776, "loss": 0.3151, "step": 260 }, { "epoch": 2.43, "learning_rate": 0.00010414414414414416, "loss": 0.2666, "step": 270 }, { "epoch": 2.52, "learning_rate": 0.00010054054054054053, "loss": 0.404, "step": 280 }, { "epoch": 2.61, "learning_rate": 9.693693693693694e-05, "loss": 0.3745, "step": 290 }, { "epoch": 2.7, "learning_rate": 9.333333333333334e-05, "loss": 0.3111, "step": 300 }, { "epoch": 2.7, "eval_accuracy": 0.8932926829268293, "eval_loss": 0.3442399501800537, "eval_runtime": 4.2748, "eval_samples_per_second": 76.729, "eval_steps_per_second": 9.591, "step": 300 }, { "epoch": 2.79, "learning_rate": 8.972972972972973e-05, "loss": 0.3372, "step": 310 }, { "epoch": 2.88, "learning_rate": 8.612612612612613e-05, "loss": 0.4432, "step": 320 }, { "epoch": 2.97, "learning_rate": 8.252252252252253e-05, "loss": 0.3007, "step": 330 }, { "epoch": 3.06, "learning_rate": 7.891891891891892e-05, "loss": 0.2942, "step": 340 }, { "epoch": 3.15, "learning_rate": 7.531531531531531e-05, "loss": 0.2356, "step": 350 }, { "epoch": 3.24, "learning_rate": 7.171171171171172e-05, "loss": 0.2971, "step": 360 }, { "epoch": 3.33, "learning_rate": 6.810810810810811e-05, "loss": 0.2004, "step": 370 }, { "epoch": 3.42, "learning_rate": 6.45045045045045e-05, "loss": 0.3405, "step": 380 }, { "epoch": 3.51, "learning_rate": 6.09009009009009e-05, "loss": 0.3609, "step": 390 }, { "epoch": 3.6, "learning_rate": 5.7297297297297305e-05, "loss": 0.3709, "step": 400 }, { "epoch": 3.6, "eval_accuracy": 0.9146341463414634, "eval_loss": 0.3073720932006836, "eval_runtime": 4.3122, "eval_samples_per_second": 76.064, "eval_steps_per_second": 9.508, "step": 400 }, { "epoch": 3.69, "learning_rate": 5.369369369369369e-05, "loss": 0.3094, "step": 410 }, { "epoch": 3.78, "learning_rate": 5.009009009009009e-05, "loss": 0.2294, "step": 420 }, { "epoch": 3.87, "learning_rate": 4.648648648648649e-05, "loss": 0.3965, "step": 430 }, { "epoch": 3.96, "learning_rate": 4.2882882882882885e-05, "loss": 0.2148, "step": 440 }, { "epoch": 4.05, "learning_rate": 3.927927927927928e-05, "loss": 0.3167, "step": 450 }, { "epoch": 4.14, "learning_rate": 3.567567567567568e-05, "loss": 0.1351, "step": 460 }, { "epoch": 4.23, "learning_rate": 3.207207207207207e-05, "loss": 0.275, "step": 470 }, { "epoch": 4.32, "learning_rate": 2.8468468468468467e-05, "loss": 0.1743, "step": 480 }, { "epoch": 4.41, "learning_rate": 2.486486486486487e-05, "loss": 0.1459, "step": 490 }, { "epoch": 4.5, "learning_rate": 2.126126126126126e-05, "loss": 0.1942, "step": 500 }, { "epoch": 4.5, "eval_accuracy": 0.9115853658536586, "eval_loss": 0.26617246866226196, "eval_runtime": 4.2639, "eval_samples_per_second": 76.925, "eval_steps_per_second": 9.616, "step": 500 }, { "epoch": 4.59, "learning_rate": 1.765765765765766e-05, "loss": 0.1843, "step": 510 }, { "epoch": 4.68, "learning_rate": 1.4054054054054055e-05, "loss": 0.3644, "step": 520 }, { "epoch": 4.77, "learning_rate": 1.0450450450450452e-05, "loss": 0.1587, "step": 530 }, { "epoch": 4.86, "learning_rate": 6.846846846846847e-06, "loss": 0.1534, "step": 540 }, { "epoch": 4.95, "learning_rate": 3.2432432432432437e-06, "loss": 0.1954, "step": 550 }, { "epoch": 5.0, "step": 555, "total_flos": 5.153016901533696e+16, "train_loss": 0.5183452816696854, "train_runtime": 159.3173, "train_samples_per_second": 55.487, "train_steps_per_second": 3.484 } ], "max_steps": 555, "num_train_epochs": 5, "total_flos": 5.153016901533696e+16, "trial_name": null, "trial_params": null }