| { | |
| "best_metric": 3.468397378921509, | |
| "best_model_checkpoint": "/datadrive/disk1/pierpaolo/llm/itlangadapt/bloom-1b3_it/checkpoint-50000", | |
| "epoch": 5.236680193742637, | |
| "global_step": 50000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00098, | |
| "loss": 3.4734, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00096, | |
| "loss": 3.466, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00094, | |
| "loss": 3.461, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00092, | |
| "loss": 3.4725, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0009000000000000001, | |
| "loss": 3.4614, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_loss": 3.469589948654175, | |
| "eval_runtime": 358.4452, | |
| "eval_samples_per_second": 10.624, | |
| "eval_steps_per_second": 5.312, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00088, | |
| "loss": 3.4653, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00086, | |
| "loss": 3.4618, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00084, | |
| "loss": 3.4707, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00082002, | |
| "loss": 3.4706, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0008000199999999999, | |
| "loss": 3.4694, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_loss": 3.4690916538238525, | |
| "eval_runtime": 358.2981, | |
| "eval_samples_per_second": 10.628, | |
| "eval_steps_per_second": 5.314, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0007800200000000001, | |
| "loss": 3.4686, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00076002, | |
| "loss": 3.4636, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.0007400400000000001, | |
| "loss": 3.463, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.00072006, | |
| "loss": 3.4694, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.0007000600000000001, | |
| "loss": 3.4709, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_loss": 3.4688496589660645, | |
| "eval_runtime": 358.397, | |
| "eval_samples_per_second": 10.625, | |
| "eval_steps_per_second": 5.313, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00068006, | |
| "loss": 3.4627, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.0006600800000000001, | |
| "loss": 3.4658, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.00064008, | |
| "loss": 3.4661, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.00062008, | |
| "loss": 3.4615, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 0.0006001, | |
| "loss": 3.481, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_loss": 3.468804359436035, | |
| "eval_runtime": 359.4028, | |
| "eval_samples_per_second": 10.595, | |
| "eval_steps_per_second": 5.298, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 0.0005801, | |
| "loss": 3.465, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 0.00056012, | |
| "loss": 3.4656, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 0.00054012, | |
| "loss": 3.472, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 0.00052012, | |
| "loss": 3.4574, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 0.0005001200000000001, | |
| "loss": 3.4638, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_loss": 3.468665599822998, | |
| "eval_runtime": 358.6249, | |
| "eval_samples_per_second": 10.618, | |
| "eval_steps_per_second": 5.309, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 0.00048012, | |
| "loss": 3.4611, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 0.00046012, | |
| "loss": 3.4631, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 0.00044014, | |
| "loss": 3.4626, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 0.00042014, | |
| "loss": 3.469, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 0.00040018, | |
| "loss": 3.4704, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "eval_loss": 3.468623399734497, | |
| "eval_runtime": 359.7336, | |
| "eval_samples_per_second": 10.586, | |
| "eval_steps_per_second": 5.293, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 0.00038018000000000004, | |
| "loss": 3.4659, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 0.00036018, | |
| "loss": 3.4706, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 0.00034018, | |
| "loss": 3.4659, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 0.00032018000000000004, | |
| "loss": 3.4669, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 0.00030018, | |
| "loss": 3.4617, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "eval_loss": 3.4684813022613525, | |
| "eval_runtime": 361.1986, | |
| "eval_samples_per_second": 10.543, | |
| "eval_steps_per_second": 5.271, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 0.00028020000000000003, | |
| "loss": 3.4703, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 0.00026024, | |
| "loss": 3.4606, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 0.00024026, | |
| "loss": 3.4549, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 0.00022026, | |
| "loss": 3.4701, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 0.00020026, | |
| "loss": 3.4636, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "eval_loss": 3.468475103378296, | |
| "eval_runtime": 358.6453, | |
| "eval_samples_per_second": 10.618, | |
| "eval_steps_per_second": 5.309, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 0.00018026, | |
| "loss": 3.4541, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 0.00016026000000000001, | |
| "loss": 3.4724, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 0.00014026, | |
| "loss": 3.4683, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 0.00012026, | |
| "loss": 3.463, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 0.00010026, | |
| "loss": 3.4668, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "eval_loss": 3.4684269428253174, | |
| "eval_runtime": 358.1004, | |
| "eval_samples_per_second": 10.634, | |
| "eval_steps_per_second": 5.317, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 8.028000000000001e-05, | |
| "loss": 3.4696, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 6.028e-05, | |
| "loss": 3.4605, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 4.028000000000001e-05, | |
| "loss": 3.4645, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 2.028e-05, | |
| "loss": 3.4704, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 2.7999999999999997e-07, | |
| "loss": 3.4623, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "eval_loss": 3.468397378921509, | |
| "eval_runtime": 358.1322, | |
| "eval_samples_per_second": 10.633, | |
| "eval_steps_per_second": 5.316, | |
| "step": 50000 | |
| } | |
| ], | |
| "max_steps": 50000, | |
| "num_train_epochs": 6, | |
| "total_flos": 2.9704946454626304e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |