| { |
| "best_metric": 43.0826, |
| "best_model_checkpoint": "opus_base_adapt_wce_gloss_train-sampled_prop_0.6_weight_1.5/checkpoint-80000", |
| "epoch": 3.943217665615142, |
| "global_step": 80000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.9753918572555207e-05, |
| "loss": 0.2059, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_bleu": 35.3446, |
| "eval_gen_len": 40.1457, |
| "eval_loss": 0.10910351574420929, |
| "eval_runtime": 196.1214, |
| "eval_samples_per_second": 5.318, |
| "eval_steps_per_second": 0.168, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.950746746845426e-05, |
| "loss": 0.1726, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_bleu": 37.4693, |
| "eval_gen_len": 38.4343, |
| "eval_loss": 0.10661692172288895, |
| "eval_runtime": 185.2791, |
| "eval_samples_per_second": 5.629, |
| "eval_steps_per_second": 0.178, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.9261016364353314e-05, |
| "loss": 0.1657, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_bleu": 38.9285, |
| "eval_gen_len": 37.1179, |
| "eval_loss": 0.10535775870084763, |
| "eval_runtime": 179.0918, |
| "eval_samples_per_second": 5.824, |
| "eval_steps_per_second": 0.184, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.9014565260252367e-05, |
| "loss": 0.1602, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_bleu": 41.3026, |
| "eval_gen_len": 35.4842, |
| "eval_loss": 0.10433036834001541, |
| "eval_runtime": 135.9487, |
| "eval_samples_per_second": 7.672, |
| "eval_steps_per_second": 0.243, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.876811415615142e-05, |
| "loss": 0.1579, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_bleu": 41.5544, |
| "eval_gen_len": 35.0029, |
| "eval_loss": 0.10376698523759842, |
| "eval_runtime": 170.9924, |
| "eval_samples_per_second": 6.1, |
| "eval_steps_per_second": 0.193, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.8521786277602524e-05, |
| "loss": 0.1462, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.18, |
| "eval_bleu": 41.856, |
| "eval_gen_len": 35.0441, |
| "eval_loss": 0.10364395380020142, |
| "eval_runtime": 145.061, |
| "eval_samples_per_second": 7.19, |
| "eval_steps_per_second": 0.227, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.827545839905363e-05, |
| "loss": 0.146, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_bleu": 41.8961, |
| "eval_gen_len": 34.8571, |
| "eval_loss": 0.10288944095373154, |
| "eval_runtime": 217.7773, |
| "eval_samples_per_second": 4.789, |
| "eval_steps_per_second": 0.152, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.8029130520504733e-05, |
| "loss": 0.1456, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_bleu": 42.0089, |
| "eval_gen_len": 35.3337, |
| "eval_loss": 0.10227638483047485, |
| "eval_runtime": 225.0758, |
| "eval_samples_per_second": 4.634, |
| "eval_steps_per_second": 0.147, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.7782802641955836e-05, |
| "loss": 0.1442, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.77, |
| "eval_bleu": 40.2595, |
| "eval_gen_len": 37.6098, |
| "eval_loss": 0.10185742378234863, |
| "eval_runtime": 135.0722, |
| "eval_samples_per_second": 7.722, |
| "eval_steps_per_second": 0.244, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.7536413150630914e-05, |
| "loss": 0.1434, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_bleu": 42.1927, |
| "eval_gen_len": 34.373, |
| "eval_loss": 0.10200384259223938, |
| "eval_runtime": 102.6237, |
| "eval_samples_per_second": 10.163, |
| "eval_steps_per_second": 0.322, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 1.729008527208202e-05, |
| "loss": 0.1359, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.17, |
| "eval_bleu": 40.111, |
| "eval_gen_len": 37.2148, |
| "eval_loss": 0.10229738801717758, |
| "eval_runtime": 164.8297, |
| "eval_samples_per_second": 6.328, |
| "eval_steps_per_second": 0.2, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.70436957807571e-05, |
| "loss": 0.135, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.37, |
| "eval_bleu": 42.1318, |
| "eval_gen_len": 35.3394, |
| "eval_loss": 0.1021457388997078, |
| "eval_runtime": 151.08, |
| "eval_samples_per_second": 6.904, |
| "eval_steps_per_second": 0.218, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.679742951498423e-05, |
| "loss": 0.1339, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_bleu": 42.4231, |
| "eval_gen_len": 34.3615, |
| "eval_loss": 0.10178423672914505, |
| "eval_runtime": 148.054, |
| "eval_samples_per_second": 7.045, |
| "eval_steps_per_second": 0.223, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 1.6550978410883283e-05, |
| "loss": 0.1342, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.76, |
| "eval_bleu": 42.1116, |
| "eval_gen_len": 35.9003, |
| "eval_loss": 0.10168300569057465, |
| "eval_runtime": 163.2182, |
| "eval_samples_per_second": 6.39, |
| "eval_steps_per_second": 0.202, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 1.6304650532334386e-05, |
| "loss": 0.1345, |
| "step": 60000 |
| }, |
| { |
| "epoch": 2.96, |
| "eval_bleu": 42.5261, |
| "eval_gen_len": 34.0211, |
| "eval_loss": 0.10101277381181717, |
| "eval_runtime": 154.3841, |
| "eval_samples_per_second": 6.756, |
| "eval_steps_per_second": 0.214, |
| "step": 60000 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 1.605832265378549e-05, |
| "loss": 0.1282, |
| "step": 64000 |
| }, |
| { |
| "epoch": 3.15, |
| "eval_bleu": 42.2183, |
| "eval_gen_len": 35.2656, |
| "eval_loss": 0.10183349996805191, |
| "eval_runtime": 155.1849, |
| "eval_samples_per_second": 6.721, |
| "eval_steps_per_second": 0.213, |
| "step": 64000 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 1.581205638801262e-05, |
| "loss": 0.1276, |
| "step": 68000 |
| }, |
| { |
| "epoch": 3.35, |
| "eval_bleu": 42.8197, |
| "eval_gen_len": 34.7641, |
| "eval_loss": 0.10200904309749603, |
| "eval_runtime": 140.3278, |
| "eval_samples_per_second": 7.433, |
| "eval_steps_per_second": 0.235, |
| "step": 68000 |
| }, |
| { |
| "epoch": 3.55, |
| "learning_rate": 1.5565605283911674e-05, |
| "loss": 0.1265, |
| "step": 72000 |
| }, |
| { |
| "epoch": 3.55, |
| "eval_bleu": 42.3004, |
| "eval_gen_len": 35.209, |
| "eval_loss": 0.10207869112491608, |
| "eval_runtime": 105.974, |
| "eval_samples_per_second": 9.842, |
| "eval_steps_per_second": 0.311, |
| "step": 72000 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 1.5319277405362777e-05, |
| "loss": 0.1269, |
| "step": 76000 |
| }, |
| { |
| "epoch": 3.75, |
| "eval_bleu": 42.4185, |
| "eval_gen_len": 35.6155, |
| "eval_loss": 0.10173621028661728, |
| "eval_runtime": 126.6351, |
| "eval_samples_per_second": 8.236, |
| "eval_steps_per_second": 0.261, |
| "step": 76000 |
| }, |
| { |
| "epoch": 3.94, |
| "learning_rate": 1.507294952681388e-05, |
| "loss": 0.1277, |
| "step": 80000 |
| }, |
| { |
| "epoch": 3.94, |
| "eval_bleu": 43.0826, |
| "eval_gen_len": 34.3269, |
| "eval_loss": 0.10156024992465973, |
| "eval_runtime": 123.2072, |
| "eval_samples_per_second": 8.465, |
| "eval_steps_per_second": 0.268, |
| "step": 80000 |
| } |
| ], |
| "max_steps": 324608, |
| "num_train_epochs": 16, |
| "total_flos": 1.3796012702564352e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|