| { | |
| "best_metric": 42.9095, | |
| "best_model_checkpoint": "opus_base_adapt_wce_gloss_train-sampled_precision_3_ubweight_2.0/checkpoint-80000", | |
| "epoch": 3.943217665615142, | |
| "global_step": 80000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.9753918572555207e-05, | |
| "loss": 0.213, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_bleu": 32.1992, | |
| "eval_gen_len": 43.558, | |
| "eval_loss": 0.10932592302560806, | |
| "eval_runtime": 185.4985, | |
| "eval_samples_per_second": 5.623, | |
| "eval_steps_per_second": 0.178, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.950746746845426e-05, | |
| "loss": 0.1788, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_bleu": 40.1195, | |
| "eval_gen_len": 36.4458, | |
| "eval_loss": 0.10684426873922348, | |
| "eval_runtime": 173.1992, | |
| "eval_samples_per_second": 6.022, | |
| "eval_steps_per_second": 0.191, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.9261016364353314e-05, | |
| "loss": 0.1716, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_bleu": 41.5118, | |
| "eval_gen_len": 34.5973, | |
| "eval_loss": 0.1055770218372345, | |
| "eval_runtime": 177.753, | |
| "eval_samples_per_second": 5.868, | |
| "eval_steps_per_second": 0.186, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.9014565260252367e-05, | |
| "loss": 0.1659, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_bleu": 41.3846, | |
| "eval_gen_len": 34.2397, | |
| "eval_loss": 0.10453091561794281, | |
| "eval_runtime": 131.5252, | |
| "eval_samples_per_second": 7.93, | |
| "eval_steps_per_second": 0.251, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.876811415615142e-05, | |
| "loss": 0.1634, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_bleu": 41.5931, | |
| "eval_gen_len": 35.1074, | |
| "eval_loss": 0.10396245121955872, | |
| "eval_runtime": 178.097, | |
| "eval_samples_per_second": 5.856, | |
| "eval_steps_per_second": 0.185, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.8521786277602524e-05, | |
| "loss": 0.1513, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_bleu": 41.2579, | |
| "eval_gen_len": 36.1802, | |
| "eval_loss": 0.10382075607776642, | |
| "eval_runtime": 152.5475, | |
| "eval_samples_per_second": 6.837, | |
| "eval_steps_per_second": 0.216, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.8275520011829655e-05, | |
| "loss": 0.151, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_bleu": 40.956, | |
| "eval_gen_len": 36.1687, | |
| "eval_loss": 0.10313059389591217, | |
| "eval_runtime": 206.7763, | |
| "eval_samples_per_second": 5.044, | |
| "eval_steps_per_second": 0.16, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.8029130520504733e-05, | |
| "loss": 0.1506, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_bleu": 41.8294, | |
| "eval_gen_len": 35.3432, | |
| "eval_loss": 0.10248984396457672, | |
| "eval_runtime": 140.6628, | |
| "eval_samples_per_second": 7.415, | |
| "eval_steps_per_second": 0.235, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 1.7782741029179815e-05, | |
| "loss": 0.149, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_bleu": 40.6541, | |
| "eval_gen_len": 37.2934, | |
| "eval_loss": 0.10204267501831055, | |
| "eval_runtime": 116.2311, | |
| "eval_samples_per_second": 8.973, | |
| "eval_steps_per_second": 0.284, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7536413150630914e-05, | |
| "loss": 0.1482, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_bleu": 42.167, | |
| "eval_gen_len": 35.0, | |
| "eval_loss": 0.10223417729139328, | |
| "eval_runtime": 120.9127, | |
| "eval_samples_per_second": 8.626, | |
| "eval_steps_per_second": 0.273, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.7290023659305996e-05, | |
| "loss": 0.1404, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_bleu": 42.2093, | |
| "eval_gen_len": 35.4257, | |
| "eval_loss": 0.10255546122789383, | |
| "eval_runtime": 152.5867, | |
| "eval_samples_per_second": 6.835, | |
| "eval_steps_per_second": 0.216, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.7043634167981074e-05, | |
| "loss": 0.1394, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_bleu": 42.2705, | |
| "eval_gen_len": 35.6999, | |
| "eval_loss": 0.10232062637805939, | |
| "eval_runtime": 127.5651, | |
| "eval_samples_per_second": 8.176, | |
| "eval_steps_per_second": 0.259, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 1.6797306289432177e-05, | |
| "loss": 0.1384, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_bleu": 42.0109, | |
| "eval_gen_len": 35.4017, | |
| "eval_loss": 0.10198231786489487, | |
| "eval_runtime": 144.4455, | |
| "eval_samples_per_second": 7.221, | |
| "eval_steps_per_second": 0.228, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 1.6550978410883283e-05, | |
| "loss": 0.1386, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_bleu": 41.651, | |
| "eval_gen_len": 36.2234, | |
| "eval_loss": 0.10184463113546371, | |
| "eval_runtime": 164.0439, | |
| "eval_samples_per_second": 6.358, | |
| "eval_steps_per_second": 0.201, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 1.6304650532334386e-05, | |
| "loss": 0.139, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_bleu": 42.5275, | |
| "eval_gen_len": 34.4708, | |
| "eval_loss": 0.10123319178819656, | |
| "eval_runtime": 170.2072, | |
| "eval_samples_per_second": 6.128, | |
| "eval_steps_per_second": 0.194, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 1.605832265378549e-05, | |
| "loss": 0.1323, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "eval_bleu": 42.2691, | |
| "eval_gen_len": 35.162, | |
| "eval_loss": 0.10197851061820984, | |
| "eval_runtime": 153.8385, | |
| "eval_samples_per_second": 6.78, | |
| "eval_steps_per_second": 0.215, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 1.581205638801262e-05, | |
| "loss": 0.1318, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "eval_bleu": 42.6981, | |
| "eval_gen_len": 34.2464, | |
| "eval_loss": 0.10211456567049026, | |
| "eval_runtime": 119.2355, | |
| "eval_samples_per_second": 8.747, | |
| "eval_steps_per_second": 0.277, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 1.5565605283911674e-05, | |
| "loss": 0.1306, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "eval_bleu": 42.3809, | |
| "eval_gen_len": 34.6769, | |
| "eval_loss": 0.10217085480690002, | |
| "eval_runtime": 86.738, | |
| "eval_samples_per_second": 12.025, | |
| "eval_steps_per_second": 0.38, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 1.5319277405362777e-05, | |
| "loss": 0.131, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "eval_bleu": 42.1038, | |
| "eval_gen_len": 35.7728, | |
| "eval_loss": 0.10192063450813293, | |
| "eval_runtime": 96.8001, | |
| "eval_samples_per_second": 10.775, | |
| "eval_steps_per_second": 0.341, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 1.507294952681388e-05, | |
| "loss": 0.1318, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "eval_bleu": 42.9095, | |
| "eval_gen_len": 34.2752, | |
| "eval_loss": 0.10165005922317505, | |
| "eval_runtime": 104.8741, | |
| "eval_samples_per_second": 9.945, | |
| "eval_steps_per_second": 0.315, | |
| "step": 80000 | |
| } | |
| ], | |
| "max_steps": 324608, | |
| "num_train_epochs": 16, | |
| "total_flos": 1.3796012702564352e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |