| { |
| "best_metric": 42.9095, |
| "best_model_checkpoint": "opus_base_adapt_wce_gloss_train-sampled_precision_3_ubweight_2.0/checkpoint-80000", |
| "epoch": 3.943217665615142, |
| "global_step": 80000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.9753918572555207e-05, |
| "loss": 0.213, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_bleu": 32.1992, |
| "eval_gen_len": 43.558, |
| "eval_loss": 0.10932592302560806, |
| "eval_runtime": 185.4985, |
| "eval_samples_per_second": 5.623, |
| "eval_steps_per_second": 0.178, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.950746746845426e-05, |
| "loss": 0.1788, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_bleu": 40.1195, |
| "eval_gen_len": 36.4458, |
| "eval_loss": 0.10684426873922348, |
| "eval_runtime": 173.1992, |
| "eval_samples_per_second": 6.022, |
| "eval_steps_per_second": 0.191, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.9261016364353314e-05, |
| "loss": 0.1716, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_bleu": 41.5118, |
| "eval_gen_len": 34.5973, |
| "eval_loss": 0.1055770218372345, |
| "eval_runtime": 177.753, |
| "eval_samples_per_second": 5.868, |
| "eval_steps_per_second": 0.186, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.9014565260252367e-05, |
| "loss": 0.1659, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_bleu": 41.3846, |
| "eval_gen_len": 34.2397, |
| "eval_loss": 0.10453091561794281, |
| "eval_runtime": 131.5252, |
| "eval_samples_per_second": 7.93, |
| "eval_steps_per_second": 0.251, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.876811415615142e-05, |
| "loss": 0.1634, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_bleu": 41.5931, |
| "eval_gen_len": 35.1074, |
| "eval_loss": 0.10396245121955872, |
| "eval_runtime": 178.097, |
| "eval_samples_per_second": 5.856, |
| "eval_steps_per_second": 0.185, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.8521786277602524e-05, |
| "loss": 0.1513, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.18, |
| "eval_bleu": 41.2579, |
| "eval_gen_len": 36.1802, |
| "eval_loss": 0.10382075607776642, |
| "eval_runtime": 152.5475, |
| "eval_samples_per_second": 6.837, |
| "eval_steps_per_second": 0.216, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.8275520011829655e-05, |
| "loss": 0.151, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_bleu": 40.956, |
| "eval_gen_len": 36.1687, |
| "eval_loss": 0.10313059389591217, |
| "eval_runtime": 206.7763, |
| "eval_samples_per_second": 5.044, |
| "eval_steps_per_second": 0.16, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.8029130520504733e-05, |
| "loss": 0.1506, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_bleu": 41.8294, |
| "eval_gen_len": 35.3432, |
| "eval_loss": 0.10248984396457672, |
| "eval_runtime": 140.6628, |
| "eval_samples_per_second": 7.415, |
| "eval_steps_per_second": 0.235, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.7782741029179815e-05, |
| "loss": 0.149, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.77, |
| "eval_bleu": 40.6541, |
| "eval_gen_len": 37.2934, |
| "eval_loss": 0.10204267501831055, |
| "eval_runtime": 116.2311, |
| "eval_samples_per_second": 8.973, |
| "eval_steps_per_second": 0.284, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.7536413150630914e-05, |
| "loss": 0.1482, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_bleu": 42.167, |
| "eval_gen_len": 35.0, |
| "eval_loss": 0.10223417729139328, |
| "eval_runtime": 120.9127, |
| "eval_samples_per_second": 8.626, |
| "eval_steps_per_second": 0.273, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 1.7290023659305996e-05, |
| "loss": 0.1404, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.17, |
| "eval_bleu": 42.2093, |
| "eval_gen_len": 35.4257, |
| "eval_loss": 0.10255546122789383, |
| "eval_runtime": 152.5867, |
| "eval_samples_per_second": 6.835, |
| "eval_steps_per_second": 0.216, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.7043634167981074e-05, |
| "loss": 0.1394, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.37, |
| "eval_bleu": 42.2705, |
| "eval_gen_len": 35.6999, |
| "eval_loss": 0.10232062637805939, |
| "eval_runtime": 127.5651, |
| "eval_samples_per_second": 8.176, |
| "eval_steps_per_second": 0.259, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.6797306289432177e-05, |
| "loss": 0.1384, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_bleu": 42.0109, |
| "eval_gen_len": 35.4017, |
| "eval_loss": 0.10198231786489487, |
| "eval_runtime": 144.4455, |
| "eval_samples_per_second": 7.221, |
| "eval_steps_per_second": 0.228, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 1.6550978410883283e-05, |
| "loss": 0.1386, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.76, |
| "eval_bleu": 41.651, |
| "eval_gen_len": 36.2234, |
| "eval_loss": 0.10184463113546371, |
| "eval_runtime": 164.0439, |
| "eval_samples_per_second": 6.358, |
| "eval_steps_per_second": 0.201, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 1.6304650532334386e-05, |
| "loss": 0.139, |
| "step": 60000 |
| }, |
| { |
| "epoch": 2.96, |
| "eval_bleu": 42.5275, |
| "eval_gen_len": 34.4708, |
| "eval_loss": 0.10123319178819656, |
| "eval_runtime": 170.2072, |
| "eval_samples_per_second": 6.128, |
| "eval_steps_per_second": 0.194, |
| "step": 60000 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 1.605832265378549e-05, |
| "loss": 0.1323, |
| "step": 64000 |
| }, |
| { |
| "epoch": 3.15, |
| "eval_bleu": 42.2691, |
| "eval_gen_len": 35.162, |
| "eval_loss": 0.10197851061820984, |
| "eval_runtime": 153.8385, |
| "eval_samples_per_second": 6.78, |
| "eval_steps_per_second": 0.215, |
| "step": 64000 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 1.581205638801262e-05, |
| "loss": 0.1318, |
| "step": 68000 |
| }, |
| { |
| "epoch": 3.35, |
| "eval_bleu": 42.6981, |
| "eval_gen_len": 34.2464, |
| "eval_loss": 0.10211456567049026, |
| "eval_runtime": 119.2355, |
| "eval_samples_per_second": 8.747, |
| "eval_steps_per_second": 0.277, |
| "step": 68000 |
| }, |
| { |
| "epoch": 3.55, |
| "learning_rate": 1.5565605283911674e-05, |
| "loss": 0.1306, |
| "step": 72000 |
| }, |
| { |
| "epoch": 3.55, |
| "eval_bleu": 42.3809, |
| "eval_gen_len": 34.6769, |
| "eval_loss": 0.10217085480690002, |
| "eval_runtime": 86.738, |
| "eval_samples_per_second": 12.025, |
| "eval_steps_per_second": 0.38, |
| "step": 72000 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 1.5319277405362777e-05, |
| "loss": 0.131, |
| "step": 76000 |
| }, |
| { |
| "epoch": 3.75, |
| "eval_bleu": 42.1038, |
| "eval_gen_len": 35.7728, |
| "eval_loss": 0.10192063450813293, |
| "eval_runtime": 96.8001, |
| "eval_samples_per_second": 10.775, |
| "eval_steps_per_second": 0.341, |
| "step": 76000 |
| }, |
| { |
| "epoch": 3.94, |
| "learning_rate": 1.507294952681388e-05, |
| "loss": 0.1318, |
| "step": 80000 |
| }, |
| { |
| "epoch": 3.94, |
| "eval_bleu": 42.9095, |
| "eval_gen_len": 34.2752, |
| "eval_loss": 0.10165005922317505, |
| "eval_runtime": 104.8741, |
| "eval_samples_per_second": 9.945, |
| "eval_steps_per_second": 0.315, |
| "step": 80000 |
| } |
| ], |
| "max_steps": 324608, |
| "num_train_epochs": 16, |
| "total_flos": 1.3796012702564352e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|