| { |
| "best_metric": 42.7722, |
| "best_model_checkpoint": "opus_base_adapt_wce_gloss_unsampled_precision_3_ubweight_1.25/checkpoint-80000", |
| "epoch": 3.943217665615142, |
| "global_step": 80000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.9753918572555207e-05, |
| "loss": 0.2076, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_bleu": 35.6766, |
| "eval_gen_len": 39.8754, |
| "eval_loss": 0.10901036113500595, |
| "eval_runtime": 190.0619, |
| "eval_samples_per_second": 5.488, |
| "eval_steps_per_second": 0.174, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.950746746845426e-05, |
| "loss": 0.1743, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_bleu": 37.712, |
| "eval_gen_len": 37.931, |
| "eval_loss": 0.10654148459434509, |
| "eval_runtime": 187.1025, |
| "eval_samples_per_second": 5.574, |
| "eval_steps_per_second": 0.176, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.9261016364353314e-05, |
| "loss": 0.1672, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_bleu": 41.1339, |
| "eval_gen_len": 34.9118, |
| "eval_loss": 0.10531440377235413, |
| "eval_runtime": 148.6317, |
| "eval_samples_per_second": 7.017, |
| "eval_steps_per_second": 0.222, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.9014565260252367e-05, |
| "loss": 0.1618, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_bleu": 41.355, |
| "eval_gen_len": 35.2416, |
| "eval_loss": 0.10421621054410934, |
| "eval_runtime": 124.1237, |
| "eval_samples_per_second": 8.403, |
| "eval_steps_per_second": 0.266, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.876811415615142e-05, |
| "loss": 0.1594, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_bleu": 41.6651, |
| "eval_gen_len": 35.4228, |
| "eval_loss": 0.10366757214069366, |
| "eval_runtime": 159.7806, |
| "eval_samples_per_second": 6.528, |
| "eval_steps_per_second": 0.207, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.8521786277602524e-05, |
| "loss": 0.1477, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.18, |
| "eval_bleu": 41.9522, |
| "eval_gen_len": 35.0451, |
| "eval_loss": 0.10360125452280045, |
| "eval_runtime": 138.0407, |
| "eval_samples_per_second": 7.556, |
| "eval_steps_per_second": 0.239, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.827545839905363e-05, |
| "loss": 0.1475, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_bleu": 40.2121, |
| "eval_gen_len": 36.4669, |
| "eval_loss": 0.10278471559286118, |
| "eval_runtime": 128.7397, |
| "eval_samples_per_second": 8.102, |
| "eval_steps_per_second": 0.256, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.8029130520504733e-05, |
| "loss": 0.147, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_bleu": 41.9375, |
| "eval_gen_len": 35.2301, |
| "eval_loss": 0.10222012549638748, |
| "eval_runtime": 132.4166, |
| "eval_samples_per_second": 7.877, |
| "eval_steps_per_second": 0.249, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.7782802641955836e-05, |
| "loss": 0.1456, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.77, |
| "eval_bleu": 40.9742, |
| "eval_gen_len": 37.1965, |
| "eval_loss": 0.10183104127645493, |
| "eval_runtime": 119.5559, |
| "eval_samples_per_second": 8.724, |
| "eval_steps_per_second": 0.276, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.7536474763406942e-05, |
| "loss": 0.1448, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_bleu": 41.7528, |
| "eval_gen_len": 35.7248, |
| "eval_loss": 0.10195966809988022, |
| "eval_runtime": 123.0307, |
| "eval_samples_per_second": 8.478, |
| "eval_steps_per_second": 0.268, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 1.7290146884858045e-05, |
| "loss": 0.1372, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.17, |
| "eval_bleu": 42.1909, |
| "eval_gen_len": 35.2924, |
| "eval_loss": 0.1022593304514885, |
| "eval_runtime": 159.0132, |
| "eval_samples_per_second": 6.559, |
| "eval_steps_per_second": 0.208, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.7043757393533123e-05, |
| "loss": 0.1363, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.37, |
| "eval_bleu": 42.6162, |
| "eval_gen_len": 34.9866, |
| "eval_loss": 0.10216603428125381, |
| "eval_runtime": 116.2982, |
| "eval_samples_per_second": 8.968, |
| "eval_steps_per_second": 0.284, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.679742951498423e-05, |
| "loss": 0.1352, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_bleu": 42.4525, |
| "eval_gen_len": 34.8888, |
| "eval_loss": 0.10181364417076111, |
| "eval_runtime": 134.9256, |
| "eval_samples_per_second": 7.73, |
| "eval_steps_per_second": 0.245, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 1.6551101636435333e-05, |
| "loss": 0.1355, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.76, |
| "eval_bleu": 41.9729, |
| "eval_gen_len": 35.9051, |
| "eval_loss": 0.10166899114847183, |
| "eval_runtime": 135.9018, |
| "eval_samples_per_second": 7.675, |
| "eval_steps_per_second": 0.243, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 1.6304773757886436e-05, |
| "loss": 0.1358, |
| "step": 60000 |
| }, |
| { |
| "epoch": 2.96, |
| "eval_bleu": 42.3275, |
| "eval_gen_len": 34.8514, |
| "eval_loss": 0.10106752812862396, |
| "eval_runtime": 160.3679, |
| "eval_samples_per_second": 6.504, |
| "eval_steps_per_second": 0.206, |
| "step": 60000 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 1.6058445879337542e-05, |
| "loss": 0.1294, |
| "step": 64000 |
| }, |
| { |
| "epoch": 3.15, |
| "eval_bleu": 42.2988, |
| "eval_gen_len": 34.8188, |
| "eval_loss": 0.10183101147413254, |
| "eval_runtime": 137.4561, |
| "eval_samples_per_second": 7.588, |
| "eval_steps_per_second": 0.24, |
| "step": 64000 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 1.5812118000788645e-05, |
| "loss": 0.1289, |
| "step": 68000 |
| }, |
| { |
| "epoch": 3.35, |
| "eval_bleu": 42.7527, |
| "eval_gen_len": 34.7766, |
| "eval_loss": 0.10201847553253174, |
| "eval_runtime": 136.3856, |
| "eval_samples_per_second": 7.647, |
| "eval_steps_per_second": 0.242, |
| "step": 68000 |
| }, |
| { |
| "epoch": 3.55, |
| "learning_rate": 1.5565728509463723e-05, |
| "loss": 0.1277, |
| "step": 72000 |
| }, |
| { |
| "epoch": 3.55, |
| "eval_bleu": 42.3528, |
| "eval_gen_len": 35.2416, |
| "eval_loss": 0.10199479013681412, |
| "eval_runtime": 110.1197, |
| "eval_samples_per_second": 9.472, |
| "eval_steps_per_second": 0.3, |
| "step": 72000 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 1.5319400630914826e-05, |
| "loss": 0.1282, |
| "step": 76000 |
| }, |
| { |
| "epoch": 3.75, |
| "eval_bleu": 42.4538, |
| "eval_gen_len": 35.4746, |
| "eval_loss": 0.10173474997282028, |
| "eval_runtime": 126.659, |
| "eval_samples_per_second": 8.235, |
| "eval_steps_per_second": 0.261, |
| "step": 76000 |
| }, |
| { |
| "epoch": 3.94, |
| "learning_rate": 1.5073072752365931e-05, |
| "loss": 0.129, |
| "step": 80000 |
| }, |
| { |
| "epoch": 3.94, |
| "eval_bleu": 42.7722, |
| "eval_gen_len": 34.8581, |
| "eval_loss": 0.10149160027503967, |
| "eval_runtime": 151.6781, |
| "eval_samples_per_second": 6.876, |
| "eval_steps_per_second": 0.218, |
| "step": 80000 |
| } |
| ], |
| "max_steps": 324608, |
| "num_train_epochs": 16, |
| "total_flos": 1.3796012702564352e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|