{ "best_metric": 42.9095, "best_model_checkpoint": "opus_base_adapt_wce_gloss_train-sampled_precision_3_ubweight_2.0/checkpoint-80000", "epoch": 3.943217665615142, "global_step": 80000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.9753918572555207e-05, "loss": 0.213, "step": 4000 }, { "epoch": 0.2, "eval_bleu": 32.1992, "eval_gen_len": 43.558, "eval_loss": 0.10932592302560806, "eval_runtime": 185.4985, "eval_samples_per_second": 5.623, "eval_steps_per_second": 0.178, "step": 4000 }, { "epoch": 0.39, "learning_rate": 1.950746746845426e-05, "loss": 0.1788, "step": 8000 }, { "epoch": 0.39, "eval_bleu": 40.1195, "eval_gen_len": 36.4458, "eval_loss": 0.10684426873922348, "eval_runtime": 173.1992, "eval_samples_per_second": 6.022, "eval_steps_per_second": 0.191, "step": 8000 }, { "epoch": 0.59, "learning_rate": 1.9261016364353314e-05, "loss": 0.1716, "step": 12000 }, { "epoch": 0.59, "eval_bleu": 41.5118, "eval_gen_len": 34.5973, "eval_loss": 0.1055770218372345, "eval_runtime": 177.753, "eval_samples_per_second": 5.868, "eval_steps_per_second": 0.186, "step": 12000 }, { "epoch": 0.79, "learning_rate": 1.9014565260252367e-05, "loss": 0.1659, "step": 16000 }, { "epoch": 0.79, "eval_bleu": 41.3846, "eval_gen_len": 34.2397, "eval_loss": 0.10453091561794281, "eval_runtime": 131.5252, "eval_samples_per_second": 7.93, "eval_steps_per_second": 0.251, "step": 16000 }, { "epoch": 0.99, "learning_rate": 1.876811415615142e-05, "loss": 0.1634, "step": 20000 }, { "epoch": 0.99, "eval_bleu": 41.5931, "eval_gen_len": 35.1074, "eval_loss": 0.10396245121955872, "eval_runtime": 178.097, "eval_samples_per_second": 5.856, "eval_steps_per_second": 0.185, "step": 20000 }, { "epoch": 1.18, "learning_rate": 1.8521786277602524e-05, "loss": 0.1513, "step": 24000 }, { "epoch": 1.18, "eval_bleu": 41.2579, "eval_gen_len": 36.1802, "eval_loss": 0.10382075607776642, "eval_runtime": 152.5475, "eval_samples_per_second": 6.837, "eval_steps_per_second": 0.216, "step": 24000 }, { "epoch": 1.38, "learning_rate": 1.8275520011829655e-05, "loss": 0.151, "step": 28000 }, { "epoch": 1.38, "eval_bleu": 40.956, "eval_gen_len": 36.1687, "eval_loss": 0.10313059389591217, "eval_runtime": 206.7763, "eval_samples_per_second": 5.044, "eval_steps_per_second": 0.16, "step": 28000 }, { "epoch": 1.58, "learning_rate": 1.8029130520504733e-05, "loss": 0.1506, "step": 32000 }, { "epoch": 1.58, "eval_bleu": 41.8294, "eval_gen_len": 35.3432, "eval_loss": 0.10248984396457672, "eval_runtime": 140.6628, "eval_samples_per_second": 7.415, "eval_steps_per_second": 0.235, "step": 32000 }, { "epoch": 1.77, "learning_rate": 1.7782741029179815e-05, "loss": 0.149, "step": 36000 }, { "epoch": 1.77, "eval_bleu": 40.6541, "eval_gen_len": 37.2934, "eval_loss": 0.10204267501831055, "eval_runtime": 116.2311, "eval_samples_per_second": 8.973, "eval_steps_per_second": 0.284, "step": 36000 }, { "epoch": 1.97, "learning_rate": 1.7536413150630914e-05, "loss": 0.1482, "step": 40000 }, { "epoch": 1.97, "eval_bleu": 42.167, "eval_gen_len": 35.0, "eval_loss": 0.10223417729139328, "eval_runtime": 120.9127, "eval_samples_per_second": 8.626, "eval_steps_per_second": 0.273, "step": 40000 }, { "epoch": 2.17, "learning_rate": 1.7290023659305996e-05, "loss": 0.1404, "step": 44000 }, { "epoch": 2.17, "eval_bleu": 42.2093, "eval_gen_len": 35.4257, "eval_loss": 0.10255546122789383, "eval_runtime": 152.5867, "eval_samples_per_second": 6.835, "eval_steps_per_second": 0.216, "step": 44000 }, { "epoch": 2.37, "learning_rate": 1.7043634167981074e-05, "loss": 0.1394, "step": 48000 }, { "epoch": 2.37, "eval_bleu": 42.2705, "eval_gen_len": 35.6999, "eval_loss": 0.10232062637805939, "eval_runtime": 127.5651, "eval_samples_per_second": 8.176, "eval_steps_per_second": 0.259, "step": 48000 }, { "epoch": 2.56, "learning_rate": 1.6797306289432177e-05, "loss": 0.1384, "step": 52000 }, { "epoch": 2.56, "eval_bleu": 42.0109, "eval_gen_len": 35.4017, "eval_loss": 0.10198231786489487, "eval_runtime": 144.4455, "eval_samples_per_second": 7.221, "eval_steps_per_second": 0.228, "step": 52000 }, { "epoch": 2.76, "learning_rate": 1.6550978410883283e-05, "loss": 0.1386, "step": 56000 }, { "epoch": 2.76, "eval_bleu": 41.651, "eval_gen_len": 36.2234, "eval_loss": 0.10184463113546371, "eval_runtime": 164.0439, "eval_samples_per_second": 6.358, "eval_steps_per_second": 0.201, "step": 56000 }, { "epoch": 2.96, "learning_rate": 1.6304650532334386e-05, "loss": 0.139, "step": 60000 }, { "epoch": 2.96, "eval_bleu": 42.5275, "eval_gen_len": 34.4708, "eval_loss": 0.10123319178819656, "eval_runtime": 170.2072, "eval_samples_per_second": 6.128, "eval_steps_per_second": 0.194, "step": 60000 }, { "epoch": 3.15, "learning_rate": 1.605832265378549e-05, "loss": 0.1323, "step": 64000 }, { "epoch": 3.15, "eval_bleu": 42.2691, "eval_gen_len": 35.162, "eval_loss": 0.10197851061820984, "eval_runtime": 153.8385, "eval_samples_per_second": 6.78, "eval_steps_per_second": 0.215, "step": 64000 }, { "epoch": 3.35, "learning_rate": 1.581205638801262e-05, "loss": 0.1318, "step": 68000 }, { "epoch": 3.35, "eval_bleu": 42.6981, "eval_gen_len": 34.2464, "eval_loss": 0.10211456567049026, "eval_runtime": 119.2355, "eval_samples_per_second": 8.747, "eval_steps_per_second": 0.277, "step": 68000 }, { "epoch": 3.55, "learning_rate": 1.5565605283911674e-05, "loss": 0.1306, "step": 72000 }, { "epoch": 3.55, "eval_bleu": 42.3809, "eval_gen_len": 34.6769, "eval_loss": 0.10217085480690002, "eval_runtime": 86.738, "eval_samples_per_second": 12.025, "eval_steps_per_second": 0.38, "step": 72000 }, { "epoch": 3.75, "learning_rate": 1.5319277405362777e-05, "loss": 0.131, "step": 76000 }, { "epoch": 3.75, "eval_bleu": 42.1038, "eval_gen_len": 35.7728, "eval_loss": 0.10192063450813293, "eval_runtime": 96.8001, "eval_samples_per_second": 10.775, "eval_steps_per_second": 0.341, "step": 76000 }, { "epoch": 3.94, "learning_rate": 1.507294952681388e-05, "loss": 0.1318, "step": 80000 }, { "epoch": 3.94, "eval_bleu": 42.9095, "eval_gen_len": 34.2752, "eval_loss": 0.10165005922317505, "eval_runtime": 104.8741, "eval_samples_per_second": 9.945, "eval_steps_per_second": 0.315, "step": 80000 } ], "max_steps": 324608, "num_train_epochs": 16, "total_flos": 1.3796012702564352e+17, "trial_name": null, "trial_params": null }