| { | |
| "best_metric": 2.5673868656158447, | |
| "best_model_checkpoint": "./robot22/checkpoint-400", | |
| "epoch": 1.0, | |
| "global_step": 430, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019534883720930232, | |
| "loss": 4.9495, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019069767441860466, | |
| "loss": 4.8535, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.000186046511627907, | |
| "loss": 4.6491, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0001813953488372093, | |
| "loss": 4.5284, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00017674418604651164, | |
| "loss": 4.4311, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00017209302325581395, | |
| "loss": 4.2551, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00016744186046511629, | |
| "loss": 4.2248, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00016279069767441862, | |
| "loss": 4.0917, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00015813953488372093, | |
| "loss": 3.964, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00015348837209302327, | |
| "loss": 3.9154, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 0.22130177514792898, | |
| "eval_loss": 3.8417413234710693, | |
| "eval_runtime": 33.9476, | |
| "eval_samples_per_second": 49.783, | |
| "eval_steps_per_second": 6.245, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00014883720930232558, | |
| "loss": 3.8266, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00014418604651162791, | |
| "loss": 3.721, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00013953488372093025, | |
| "loss": 3.5688, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00013488372093023256, | |
| "loss": 3.5956, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001302325581395349, | |
| "loss": 3.5324, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001255813953488372, | |
| "loss": 3.4582, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00012093023255813953, | |
| "loss": 3.4678, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00011627906976744187, | |
| "loss": 3.3492, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00011162790697674419, | |
| "loss": 3.4026, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00010697674418604651, | |
| "loss": 3.1764, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.3201183431952663, | |
| "eval_loss": 3.2242767810821533, | |
| "eval_runtime": 31.328, | |
| "eval_samples_per_second": 53.945, | |
| "eval_steps_per_second": 6.767, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00010232558139534885, | |
| "loss": 3.1029, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 9.767441860465116e-05, | |
| "loss": 3.144, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.30232558139535e-05, | |
| "loss": 3.092, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.837209302325582e-05, | |
| "loss": 3.1839, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.372093023255814e-05, | |
| "loss": 3.0839, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.906976744186047e-05, | |
| "loss": 2.9506, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.441860465116279e-05, | |
| "loss": 2.8607, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.976744186046513e-05, | |
| "loss": 3.0074, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 6.511627906976745e-05, | |
| "loss": 2.8763, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 6.0465116279069765e-05, | |
| "loss": 2.8186, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.4284023668639053, | |
| "eval_loss": 2.7973387241363525, | |
| "eval_runtime": 30.8947, | |
| "eval_samples_per_second": 54.702, | |
| "eval_steps_per_second": 6.862, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 5.5813953488372095e-05, | |
| "loss": 2.8303, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 5.1162790697674425e-05, | |
| "loss": 2.8085, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.651162790697675e-05, | |
| "loss": 2.8264, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.186046511627907e-05, | |
| "loss": 2.7512, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.7209302325581394e-05, | |
| "loss": 2.6599, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.2558139534883724e-05, | |
| "loss": 2.5858, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 2.7906976744186048e-05, | |
| "loss": 2.6807, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 2.3255813953488374e-05, | |
| "loss": 2.6653, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.8604651162790697e-05, | |
| "loss": 2.4515, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.3953488372093024e-05, | |
| "loss": 2.632, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.5076923076923077, | |
| "eval_loss": 2.5673868656158447, | |
| "eval_runtime": 33.1328, | |
| "eval_samples_per_second": 51.007, | |
| "eval_steps_per_second": 6.398, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.302325581395349e-06, | |
| "loss": 2.6872, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.651162790697674e-06, | |
| "loss": 2.556, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.5929, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 430, | |
| "total_flos": 5.338473146715341e+17, | |
| "train_loss": 3.3443971855695858, | |
| "train_runtime": 396.8195, | |
| "train_samples_per_second": 17.338, | |
| "train_steps_per_second": 1.084 | |
| } | |
| ], | |
| "max_steps": 430, | |
| "num_train_epochs": 1, | |
| "total_flos": 5.338473146715341e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |