José Ángel González commited on
Commit
35fd7c5
·
1 Parent(s): 841bbde
Files changed (7) hide show
  1. README.md +19 -19
  2. all_results.json +15 -15
  3. eval_results.json +10 -10
  4. pytorch_model.bin +1 -1
  5. train_results.json +5 -5
  6. trainer_state.json +3511 -211
  7. training_args.bin +1 -1
README.md CHANGED
@@ -13,7 +13,7 @@ model-index:
13
  metrics:
14
  - name: Rouge1
15
  type: rouge
16
- value: 23.9947
17
  ---
18
 
19
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -23,12 +23,12 @@ should probably proofread and complete it, then remove this comment. -->
23
 
24
  This model is a fine-tuned version of [facebook/bart-large](https://huggingface.co/facebook/bart-large) on an unknown dataset.
25
  It achieves the following results on the evaluation set:
26
- - Loss: 4.6366
27
- - Rouge1: 23.9947
28
- - Rouge2: 5.3034
29
- - Rougel: 16.3635
30
- - Rougelsum: 19.7575
31
- - Gen Len: 55.25
32
 
33
  ## Model description
34
 
@@ -59,18 +59,18 @@ The following hyperparameters were used during training:
59
 
60
  ### Training results
61
 
62
- | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
63
- |:-------------:|:-----:|:----:|:---------------:|:-------:|:------:|:-------:|:---------:|:-------:|
64
- | 3.6608 | 1.0 | 125 | 3.3274 | 15.2816 | 4.0723 | 12.0873 | 13.6532 | 19.23 |
65
- | 3.3135 | 2.0 | 250 | 3.3133 | 16.5105 | 4.4915 | 13.8639 | 14.8435 | 19.31 |
66
- | 2.7732 | 3.0 | 375 | 3.3856 | 16.8687 | 4.904 | 13.4615 | 14.7432 | 19.93 |
67
- | 2.2759 | 4.0 | 500 | 3.5508 | 15.4023 | 4.1761 | 12.5635 | 13.7679 | 19.27 |
68
- | 1.8199 | 5.0 | 625 | 3.7591 | 17.7686 | 4.532 | 13.9086 | 15.3305 | 19.93 |
69
- | 1.4575 | 6.0 | 750 | 3.9726 | 16.4133 | 4.359 | 13.4621 | 14.5896 | 19.92 |
70
- | 1.126 | 7.0 | 875 | 4.2964 | 17.3934 | 3.6935 | 13.7934 | 14.9719 | 19.43 |
71
- | 0.9073 | 8.0 | 1000 | 4.4205 | 17.4328 | 3.8734 | 13.4282 | 14.7105 | 19.83 |
72
- | 0.7925 | 9.0 | 1125 | 4.5501 | 17.3798 | 3.9775 | 13.3141 | 14.7692 | 19.87 |
73
- | 0.6844 | 10.0 | 1250 | 4.6366 | 17.3867 | 4.2671 | 13.7924 | 15.1543 | 19.81 |
74
 
75
 
76
  ### Framework versions
 
13
  metrics:
14
  - name: Rouge1
15
  type: rouge
16
+ value: 24.5193
17
  ---
18
 
19
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
23
 
24
  This model is a fine-tuned version of [facebook/bart-large](https://huggingface.co/facebook/bart-large) on an unknown dataset.
25
  It achieves the following results on the evaluation set:
26
+ - Loss: 3.7900
27
+ - Rouge1: 24.5193
28
+ - Rouge2: 6.267
29
+ - Rougel: 17.4389
30
+ - Rougelsum: 20.5821
31
+ - Gen Len: 56.8235
32
 
33
  ## Model description
34
 
 
59
 
60
  ### Training results
61
 
62
+ | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
63
+ |:-------------:|:-----:|:-----:|:---------------:|:-------:|:------:|:-------:|:---------:|:-------:|
64
+ | 3.2899 | 1.0 | 2875 | 3.0328 | 16.185 | 4.0368 | 12.9047 | 14.0748 | 19.3457 |
65
+ | 3.0916 | 2.0 | 5750 | 3.0548 | 16.2962 | 3.9567 | 13.0426 | 14.2023 | 19.3427 |
66
+ | 2.8345 | 3.0 | 8625 | 3.0645 | 16.4597 | 4.2017 | 13.3787 | 14.5527 | 19.9707 |
67
+ | 2.5522 | 4.0 | 11500 | 3.0988 | 16.8388 | 4.3742 | 13.5688 | 14.7003 | 19.9324 |
68
+ | 2.2307 | 5.0 | 14375 | 3.2058 | 16.4764 | 4.2906 | 13.3875 | 14.5223 | 19.8502 |
69
+ | 1.8381 | 6.0 | 17250 | 3.3179 | 16.6764 | 4.4834 | 13.5489 | 14.6173 | 19.9681 |
70
+ | 1.6203 | 7.0 | 20125 | 3.4763 | 17.0434 | 4.5045 | 13.8329 | 14.9286 | 19.9105 |
71
+ | 1.4982 | 8.0 | 23000 | 3.6031 | 17.0044 | 4.7727 | 13.8743 | 14.9683 | 19.9539 |
72
+ | 1.3385 | 9.0 | 25875 | 3.7051 | 17.0903 | 4.5413 | 13.8897 | 15.0091 | 19.8291 |
73
+ | 1.2211 | 10.0 | 28750 | 3.7900 | 16.7843 | 4.4907 | 13.6418 | 14.7366 | 19.9066 |
74
 
75
 
76
  ### Framework versions
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_gen_len": 55.25,
4
- "eval_loss": 4.636623382568359,
5
- "eval_rouge1": 23.9947,
6
- "eval_rouge2": 5.3034,
7
- "eval_rougeL": 16.3635,
8
- "eval_rougeLsum": 19.7575,
9
- "eval_runtime": 30.2029,
10
- "eval_samples": 100,
11
- "eval_samples_per_second": 3.311,
12
- "eval_steps_per_second": 0.43,
13
- "train_loss": 1.877090069580078,
14
- "train_runtime": 1007.5303,
15
- "train_samples": 995,
16
- "train_samples_per_second": 9.876,
17
- "train_steps_per_second": 1.241
18
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_gen_len": 56.8235,
4
+ "eval_loss": 3.790048360824585,
5
+ "eval_rouge1": 24.5193,
6
+ "eval_rouge2": 6.267,
7
+ "eval_rougeL": 17.4389,
8
+ "eval_rougeLsum": 20.5821,
9
+ "eval_runtime": 686.4492,
10
+ "eval_samples": 2323,
11
+ "eval_samples_per_second": 3.384,
12
+ "eval_steps_per_second": 0.424,
13
+ "train_loss": 2.145213280321204,
14
+ "train_runtime": 23488.7209,
15
+ "train_samples": 23000,
16
+ "train_samples_per_second": 9.792,
17
+ "train_steps_per_second": 1.224
18
  }
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_gen_len": 55.25,
4
- "eval_loss": 4.636623382568359,
5
- "eval_rouge1": 23.9947,
6
- "eval_rouge2": 5.3034,
7
- "eval_rougeL": 16.3635,
8
- "eval_rougeLsum": 19.7575,
9
- "eval_runtime": 30.2029,
10
- "eval_samples": 100,
11
- "eval_samples_per_second": 3.311,
12
- "eval_steps_per_second": 0.43
13
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_gen_len": 56.8235,
4
+ "eval_loss": 3.790048360824585,
5
+ "eval_rouge1": 24.5193,
6
+ "eval_rouge2": 6.267,
7
+ "eval_rougeL": 17.4389,
8
+ "eval_rougeLsum": 20.5821,
9
+ "eval_runtime": 686.4492,
10
+ "eval_samples": 2323,
11
+ "eval_samples_per_second": 3.384,
12
+ "eval_steps_per_second": 0.424
13
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90eee62817bf936a7493476fc3439c5f8844d7965c7e38dcfe0ac1244ea91248
3
  size 1625569391
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a56a4efb7f95a9c07a59b2460261edeea7485fe431c55e8fe740b2c1f9028e41
3
  size 1625569391
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 10.0,
3
- "train_loss": 1.877090069580078,
4
- "train_runtime": 1007.5303,
5
- "train_samples": 995,
6
- "train_samples_per_second": 9.876,
7
- "train_steps_per_second": 1.241
8
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "train_loss": 2.145213280321204,
4
+ "train_runtime": 23488.7209,
5
+ "train_samples": 23000,
6
+ "train_samples_per_second": 9.792,
7
+ "train_steps_per_second": 1.224
8
  }
trainer_state.json CHANGED
@@ -2,304 +2,3604 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 10.0,
5
- "global_step": 1250,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.4,
12
- "learning_rate": 1.223404255319149e-05,
13
- "loss": 3.8907,
14
  "step": 50
15
  },
16
  {
17
- "epoch": 0.8,
18
- "learning_rate": 2.5265957446808515e-05,
19
- "loss": 3.6608,
20
- "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  },
22
  {
23
- "epoch": 1.0,
24
- "eval_gen_len": 19.23,
25
- "eval_loss": 3.32743763923645,
26
- "eval_rouge1": 15.2816,
27
- "eval_rouge2": 4.0723,
28
- "eval_rougeL": 12.0873,
29
- "eval_rougeLsum": 13.6532,
30
- "eval_runtime": 11.1064,
31
- "eval_samples_per_second": 9.004,
32
- "eval_steps_per_second": 1.17,
33
- "step": 125
34
  },
35
  {
36
- "epoch": 1.2,
37
- "learning_rate": 3.8563829787234045e-05,
38
- "loss": 3.4017,
39
- "step": 150
40
  },
41
  {
42
- "epoch": 1.6,
43
- "learning_rate": 4.967043314500942e-05,
44
- "loss": 3.3923,
45
- "step": 200
46
  },
47
  {
48
- "epoch": 2.0,
49
- "learning_rate": 4.7316384180790966e-05,
50
- "loss": 3.3135,
51
- "step": 250
52
  },
53
  {
54
- "epoch": 2.0,
55
- "eval_gen_len": 19.31,
56
- "eval_loss": 3.313326358795166,
57
- "eval_rouge1": 16.5105,
58
- "eval_rouge2": 4.4915,
59
- "eval_rougeL": 13.8639,
60
- "eval_rougeLsum": 14.8435,
61
- "eval_runtime": 10.7765,
62
- "eval_samples_per_second": 9.279,
63
- "eval_steps_per_second": 1.206,
64
- "step": 250
65
  },
66
  {
67
- "epoch": 2.4,
68
- "learning_rate": 4.4962335216572505e-05,
69
- "loss": 2.7445,
70
- "step": 300
71
  },
72
  {
73
- "epoch": 2.8,
74
- "learning_rate": 4.260828625235405e-05,
75
- "loss": 2.7732,
76
- "step": 350
77
  },
78
  {
79
- "epoch": 3.0,
80
- "eval_gen_len": 19.93,
81
- "eval_loss": 3.3855550289154053,
82
- "eval_rouge1": 16.8687,
83
- "eval_rouge2": 4.904,
84
- "eval_rougeL": 13.4615,
85
- "eval_rougeLsum": 14.7432,
86
- "eval_runtime": 10.8555,
87
- "eval_samples_per_second": 9.212,
88
- "eval_steps_per_second": 1.198,
89
- "step": 375
90
  },
91
  {
92
- "epoch": 3.2,
93
- "learning_rate": 4.025423728813559e-05,
94
- "loss": 2.5055,
95
- "step": 400
96
  },
97
  {
98
- "epoch": 3.6,
99
- "learning_rate": 3.790018832391714e-05,
100
- "loss": 2.1993,
101
- "step": 450
102
  },
103
  {
104
- "epoch": 4.0,
105
- "learning_rate": 3.554613935969868e-05,
106
- "loss": 2.2759,
107
- "step": 500
108
  },
109
  {
110
- "epoch": 4.0,
111
- "eval_gen_len": 19.27,
112
- "eval_loss": 3.5507638454437256,
113
- "eval_rouge1": 15.4023,
114
- "eval_rouge2": 4.1761,
115
- "eval_rougeL": 12.5635,
116
- "eval_rougeLsum": 13.7679,
117
- "eval_runtime": 10.8402,
118
- "eval_samples_per_second": 9.225,
119
- "eval_steps_per_second": 1.199,
120
- "step": 500
121
  },
122
  {
123
- "epoch": 4.4,
124
- "learning_rate": 3.319209039548023e-05,
125
- "loss": 1.7219,
126
- "step": 550
127
  },
128
  {
129
- "epoch": 4.8,
130
- "learning_rate": 3.0838041431261774e-05,
131
- "loss": 1.8199,
132
- "step": 600
133
  },
134
  {
135
- "epoch": 5.0,
136
- "eval_gen_len": 19.93,
137
- "eval_loss": 3.7590889930725098,
138
- "eval_rouge1": 17.7686,
139
- "eval_rouge2": 4.532,
140
- "eval_rougeL": 13.9086,
141
- "eval_rougeLsum": 15.3305,
142
- "eval_runtime": 10.8632,
143
- "eval_samples_per_second": 9.205,
144
- "eval_steps_per_second": 1.197,
145
- "step": 625
146
  },
147
  {
148
- "epoch": 5.2,
149
- "learning_rate": 2.8483992467043313e-05,
150
- "loss": 1.5748,
151
- "step": 650
152
  },
153
  {
154
- "epoch": 5.6,
155
- "learning_rate": 2.612994350282486e-05,
156
- "loss": 1.3965,
157
- "step": 700
158
  },
159
  {
160
- "epoch": 6.0,
161
- "learning_rate": 2.3775894538606405e-05,
162
- "loss": 1.4575,
163
- "step": 750
164
  },
165
  {
166
- "epoch": 6.0,
167
- "eval_gen_len": 19.92,
168
- "eval_loss": 3.9725918769836426,
169
- "eval_rouge1": 16.4133,
170
- "eval_rouge2": 4.359,
171
- "eval_rougeL": 13.4621,
172
- "eval_rougeLsum": 14.5896,
173
- "eval_runtime": 10.8642,
174
- "eval_samples_per_second": 9.205,
175
- "eval_steps_per_second": 1.197,
176
- "step": 750
177
  },
178
  {
179
- "epoch": 6.4,
180
- "learning_rate": 2.1421845574387948e-05,
181
- "loss": 1.1037,
182
- "step": 800
183
  },
184
  {
185
- "epoch": 6.8,
186
- "learning_rate": 1.906779661016949e-05,
187
- "loss": 1.126,
188
- "step": 850
189
  },
190
  {
191
- "epoch": 7.0,
192
- "eval_gen_len": 19.43,
193
- "eval_loss": 4.29640007019043,
194
- "eval_rouge1": 17.3934,
195
- "eval_rouge2": 3.6935,
196
- "eval_rougeL": 13.7934,
197
- "eval_rougeLsum": 14.9719,
198
- "eval_runtime": 10.8699,
199
- "eval_samples_per_second": 9.2,
200
- "eval_steps_per_second": 1.196,
201
- "step": 875
202
  },
203
  {
204
- "epoch": 7.2,
205
- "learning_rate": 1.6713747645951036e-05,
206
- "loss": 1.0553,
207
- "step": 900
208
  },
209
  {
210
- "epoch": 7.6,
211
- "learning_rate": 1.435969868173258e-05,
212
- "loss": 0.9473,
213
- "step": 950
214
  },
215
  {
216
- "epoch": 8.0,
217
- "learning_rate": 1.2005649717514125e-05,
218
- "loss": 0.9073,
219
- "step": 1000
220
  },
221
  {
222
- "epoch": 8.0,
223
- "eval_gen_len": 19.83,
224
- "eval_loss": 4.420531749725342,
225
- "eval_rouge1": 17.4328,
226
- "eval_rouge2": 3.8734,
227
- "eval_rougeL": 13.4282,
228
- "eval_rougeLsum": 14.7105,
229
- "eval_runtime": 10.9454,
230
- "eval_samples_per_second": 9.136,
231
- "eval_steps_per_second": 1.188,
232
- "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  },
234
  {
235
  "epoch": 8.4,
236
- "learning_rate": 9.65160075329567e-06,
237
- "loss": 0.7636,
238
- "step": 1050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  },
240
  {
241
  "epoch": 8.8,
242
- "learning_rate": 7.297551789077213e-06,
243
- "loss": 0.7925,
244
- "step": 1100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  },
246
  {
247
  "epoch": 9.0,
248
- "eval_gen_len": 19.87,
249
- "eval_loss": 4.550107955932617,
250
- "eval_rouge1": 17.3798,
251
- "eval_rouge2": 3.9775,
252
- "eval_rougeL": 13.3141,
253
- "eval_rougeLsum": 14.7692,
254
- "eval_runtime": 10.8417,
255
- "eval_samples_per_second": 9.224,
256
- "eval_steps_per_second": 1.199,
257
- "step": 1125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  },
259
  {
260
  "epoch": 9.2,
261
- "learning_rate": 4.990583804143127e-06,
262
- "loss": 0.7396,
263
- "step": 1150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  },
265
  {
266
  "epoch": 9.6,
267
- "learning_rate": 2.6365348399246707e-06,
268
- "loss": 0.6792,
269
- "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  },
271
  {
272
  "epoch": 10.0,
273
- "learning_rate": 2.8248587570621473e-07,
274
- "loss": 0.6844,
275
- "step": 1250
276
  },
277
  {
278
  "epoch": 10.0,
279
- "eval_gen_len": 19.81,
280
- "eval_loss": 4.636623382568359,
281
- "eval_rouge1": 17.3867,
282
- "eval_rouge2": 4.2671,
283
- "eval_rougeL": 13.7924,
284
- "eval_rougeLsum": 15.1543,
285
- "eval_runtime": 10.8893,
286
- "eval_samples_per_second": 9.183,
287
- "eval_steps_per_second": 1.194,
288
- "step": 1250
289
  },
290
  {
291
  "epoch": 10.0,
292
- "step": 1250,
293
- "total_flos": 2.278181410848768e+16,
294
- "train_loss": 1.877090069580078,
295
- "train_runtime": 1007.5303,
296
- "train_samples_per_second": 9.876,
297
- "train_steps_per_second": 1.241
298
  }
299
  ],
300
- "max_steps": 1250,
301
  "num_train_epochs": 10,
302
- "total_flos": 2.278181410848768e+16,
303
  "trial_name": null,
304
  "trial_params": null
305
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 10.0,
5
+ "global_step": 28750,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.02,
12
+ "learning_rate": 5.216786459540924e-07,
13
+ "loss": 4.1832,
14
  "step": 50
15
  },
16
  {
17
+ "epoch": 0.03,
18
+ "learning_rate": 1.1013215859030839e-06,
19
+ "loss": 3.9055,
20
+ "step": 100
21
+ },
22
+ {
23
+ "epoch": 0.05,
24
+ "learning_rate": 1.6809645258520752e-06,
25
+ "loss": 3.653,
26
+ "step": 150
27
+ },
28
+ {
29
+ "epoch": 0.07,
30
+ "learning_rate": 2.260607465801067e-06,
31
+ "loss": 3.5949,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.09,
36
+ "learning_rate": 2.8286575469510783e-06,
37
+ "loss": 3.5274,
38
+ "step": 250
39
+ },
40
+ {
41
+ "epoch": 0.1,
42
+ "learning_rate": 3.40830048690007e-06,
43
+ "loss": 3.4424,
44
+ "step": 300
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "learning_rate": 3.987943426849061e-06,
49
+ "loss": 3.5065,
50
+ "step": 350
51
+ },
52
+ {
53
+ "epoch": 0.14,
54
+ "learning_rate": 4.567586366798052e-06,
55
+ "loss": 3.4948,
56
+ "step": 400
57
+ },
58
+ {
59
+ "epoch": 0.16,
60
+ "learning_rate": 5.147229306747045e-06,
61
+ "loss": 3.4344,
62
+ "step": 450
63
+ },
64
+ {
65
+ "epoch": 0.17,
66
+ "learning_rate": 5.7268722466960354e-06,
67
+ "loss": 3.5227,
68
+ "step": 500
69
+ },
70
+ {
71
+ "epoch": 0.19,
72
+ "learning_rate": 6.306515186645028e-06,
73
+ "loss": 3.3376,
74
+ "step": 550
75
+ },
76
+ {
77
+ "epoch": 0.21,
78
+ "learning_rate": 6.8861581265940184e-06,
79
+ "loss": 3.3889,
80
+ "step": 600
81
+ },
82
+ {
83
+ "epoch": 0.23,
84
+ "learning_rate": 7.46580106654301e-06,
85
+ "loss": 3.262,
86
+ "step": 650
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "learning_rate": 8.045444006492002e-06,
91
+ "loss": 3.3727,
92
+ "step": 700
93
+ },
94
+ {
95
+ "epoch": 0.26,
96
+ "learning_rate": 8.625086946440993e-06,
97
+ "loss": 3.3277,
98
+ "step": 750
99
+ },
100
+ {
101
+ "epoch": 0.28,
102
+ "learning_rate": 9.204729886389984e-06,
103
+ "loss": 3.3311,
104
+ "step": 800
105
+ },
106
+ {
107
+ "epoch": 0.3,
108
+ "learning_rate": 9.784372826338974e-06,
109
+ "loss": 3.3332,
110
+ "step": 850
111
+ },
112
+ {
113
+ "epoch": 0.31,
114
+ "learning_rate": 1.0364015766287967e-05,
115
+ "loss": 3.3371,
116
+ "step": 900
117
+ },
118
+ {
119
+ "epoch": 0.33,
120
+ "learning_rate": 1.0943658706236959e-05,
121
+ "loss": 3.3538,
122
+ "step": 950
123
+ },
124
+ {
125
+ "epoch": 0.35,
126
+ "learning_rate": 1.152330164618595e-05,
127
+ "loss": 3.2633,
128
+ "step": 1000
129
+ },
130
+ {
131
+ "epoch": 0.37,
132
+ "learning_rate": 1.2102944586134942e-05,
133
+ "loss": 3.3787,
134
+ "step": 1050
135
+ },
136
+ {
137
+ "epoch": 0.38,
138
+ "learning_rate": 1.2682587526083933e-05,
139
+ "loss": 3.2775,
140
+ "step": 1100
141
+ },
142
+ {
143
+ "epoch": 0.4,
144
+ "learning_rate": 1.3262230466032923e-05,
145
+ "loss": 3.2566,
146
+ "step": 1150
147
+ },
148
+ {
149
+ "epoch": 0.42,
150
+ "learning_rate": 1.3841873405981917e-05,
151
+ "loss": 3.3364,
152
+ "step": 1200
153
+ },
154
+ {
155
+ "epoch": 0.43,
156
+ "learning_rate": 1.4421516345930908e-05,
157
+ "loss": 3.2883,
158
+ "step": 1250
159
+ },
160
+ {
161
+ "epoch": 0.45,
162
+ "learning_rate": 1.5001159285879899e-05,
163
+ "loss": 3.3018,
164
+ "step": 1300
165
+ },
166
+ {
167
+ "epoch": 0.47,
168
+ "learning_rate": 1.5580802225828888e-05,
169
+ "loss": 3.3564,
170
+ "step": 1350
171
+ },
172
+ {
173
+ "epoch": 0.49,
174
+ "learning_rate": 1.6160445165777882e-05,
175
+ "loss": 3.2463,
176
+ "step": 1400
177
+ },
178
+ {
179
+ "epoch": 0.5,
180
+ "learning_rate": 1.6740088105726872e-05,
181
+ "loss": 3.3455,
182
+ "step": 1450
183
+ },
184
+ {
185
+ "epoch": 0.52,
186
+ "learning_rate": 1.7319731045675863e-05,
187
+ "loss": 3.3171,
188
+ "step": 1500
189
+ },
190
+ {
191
+ "epoch": 0.54,
192
+ "learning_rate": 1.7899373985624857e-05,
193
+ "loss": 3.2919,
194
+ "step": 1550
195
+ },
196
+ {
197
+ "epoch": 0.56,
198
+ "learning_rate": 1.8479016925573848e-05,
199
+ "loss": 3.292,
200
+ "step": 1600
201
+ },
202
+ {
203
+ "epoch": 0.57,
204
+ "learning_rate": 1.905865986552284e-05,
205
+ "loss": 3.3646,
206
+ "step": 1650
207
+ },
208
+ {
209
+ "epoch": 0.59,
210
+ "learning_rate": 1.9638302805471832e-05,
211
+ "loss": 3.2056,
212
+ "step": 1700
213
+ },
214
+ {
215
+ "epoch": 0.61,
216
+ "learning_rate": 2.0217945745420823e-05,
217
+ "loss": 3.2624,
218
+ "step": 1750
219
+ },
220
+ {
221
+ "epoch": 0.63,
222
+ "learning_rate": 2.079758868536981e-05,
223
+ "loss": 3.2349,
224
+ "step": 1800
225
+ },
226
+ {
227
+ "epoch": 0.64,
228
+ "learning_rate": 2.1377231625318804e-05,
229
+ "loss": 3.2517,
230
+ "step": 1850
231
+ },
232
+ {
233
+ "epoch": 0.66,
234
+ "learning_rate": 2.1956874565267795e-05,
235
+ "loss": 3.2804,
236
+ "step": 1900
237
+ },
238
+ {
239
+ "epoch": 0.68,
240
+ "learning_rate": 2.2536517505216786e-05,
241
+ "loss": 3.165,
242
+ "step": 1950
243
+ },
244
+ {
245
+ "epoch": 0.7,
246
+ "learning_rate": 2.311616044516578e-05,
247
+ "loss": 3.2733,
248
+ "step": 2000
249
+ },
250
+ {
251
+ "epoch": 0.71,
252
+ "learning_rate": 2.369580338511477e-05,
253
+ "loss": 3.2974,
254
+ "step": 2050
255
+ },
256
+ {
257
+ "epoch": 0.73,
258
+ "learning_rate": 2.427544632506376e-05,
259
+ "loss": 3.4127,
260
+ "step": 2100
261
+ },
262
+ {
263
+ "epoch": 0.75,
264
+ "learning_rate": 2.4855089265012755e-05,
265
+ "loss": 3.21,
266
+ "step": 2150
267
+ },
268
+ {
269
+ "epoch": 0.77,
270
+ "learning_rate": 2.5434732204961743e-05,
271
+ "loss": 3.2733,
272
+ "step": 2200
273
+ },
274
+ {
275
+ "epoch": 0.78,
276
+ "learning_rate": 2.6014375144910737e-05,
277
+ "loss": 3.2862,
278
+ "step": 2250
279
+ },
280
+ {
281
+ "epoch": 0.8,
282
+ "learning_rate": 2.6594018084859727e-05,
283
+ "loss": 3.2508,
284
+ "step": 2300
285
+ },
286
+ {
287
+ "epoch": 0.82,
288
+ "learning_rate": 2.7173661024808718e-05,
289
+ "loss": 3.2311,
290
+ "step": 2350
291
+ },
292
+ {
293
+ "epoch": 0.83,
294
+ "learning_rate": 2.775330396475771e-05,
295
+ "loss": 3.2441,
296
+ "step": 2400
297
+ },
298
+ {
299
+ "epoch": 0.85,
300
+ "learning_rate": 2.8332946904706703e-05,
301
+ "loss": 3.32,
302
+ "step": 2450
303
+ },
304
+ {
305
+ "epoch": 0.87,
306
+ "learning_rate": 2.891258984465569e-05,
307
+ "loss": 3.2646,
308
+ "step": 2500
309
+ },
310
+ {
311
+ "epoch": 0.89,
312
+ "learning_rate": 2.9492232784604684e-05,
313
+ "loss": 3.2483,
314
+ "step": 2550
315
+ },
316
+ {
317
+ "epoch": 0.9,
318
+ "learning_rate": 3.0071875724553678e-05,
319
+ "loss": 3.2918,
320
+ "step": 2600
321
+ },
322
+ {
323
+ "epoch": 0.92,
324
+ "learning_rate": 3.0651518664502665e-05,
325
+ "loss": 3.3011,
326
+ "step": 2650
327
+ },
328
+ {
329
+ "epoch": 0.94,
330
+ "learning_rate": 3.1231161604451656e-05,
331
+ "loss": 3.2694,
332
+ "step": 2700
333
+ },
334
+ {
335
+ "epoch": 0.96,
336
+ "learning_rate": 3.181080454440065e-05,
337
+ "loss": 3.2271,
338
+ "step": 2750
339
+ },
340
+ {
341
+ "epoch": 0.97,
342
+ "learning_rate": 3.239044748434964e-05,
343
+ "loss": 3.2236,
344
+ "step": 2800
345
+ },
346
+ {
347
+ "epoch": 0.99,
348
+ "learning_rate": 3.2970090424298635e-05,
349
+ "loss": 3.2899,
350
+ "step": 2850
351
+ },
352
+ {
353
+ "epoch": 1.0,
354
+ "eval_gen_len": 19.3457,
355
+ "eval_loss": 3.032784938812256,
356
+ "eval_rouge1": 16.185,
357
+ "eval_rouge2": 4.0368,
358
+ "eval_rougeL": 12.9047,
359
+ "eval_rougeLsum": 14.0748,
360
+ "eval_runtime": 251.1607,
361
+ "eval_samples_per_second": 9.249,
362
+ "eval_steps_per_second": 1.159,
363
+ "step": 2875
364
+ },
365
+ {
366
+ "epoch": 1.01,
367
+ "learning_rate": 3.3549733364247625e-05,
368
+ "loss": 3.159,
369
+ "step": 2900
370
+ },
371
+ {
372
+ "epoch": 1.03,
373
+ "learning_rate": 3.4129376304196616e-05,
374
+ "loss": 3.1081,
375
+ "step": 2950
376
+ },
377
+ {
378
+ "epoch": 1.04,
379
+ "learning_rate": 3.470901924414561e-05,
380
+ "loss": 3.085,
381
+ "step": 3000
382
+ },
383
+ {
384
+ "epoch": 1.06,
385
+ "learning_rate": 3.5288662184094604e-05,
386
+ "loss": 3.0179,
387
+ "step": 3050
388
+ },
389
+ {
390
+ "epoch": 1.08,
391
+ "learning_rate": 3.586830512404359e-05,
392
+ "loss": 3.0466,
393
+ "step": 3100
394
+ },
395
+ {
396
+ "epoch": 1.1,
397
+ "learning_rate": 3.644794806399258e-05,
398
+ "loss": 3.0898,
399
+ "step": 3150
400
+ },
401
+ {
402
+ "epoch": 1.11,
403
+ "learning_rate": 3.7027591003941576e-05,
404
+ "loss": 3.1355,
405
+ "step": 3200
406
+ },
407
+ {
408
+ "epoch": 1.13,
409
+ "learning_rate": 3.760723394389056e-05,
410
+ "loss": 3.0855,
411
+ "step": 3250
412
+ },
413
+ {
414
+ "epoch": 1.15,
415
+ "learning_rate": 3.818687688383956e-05,
416
+ "loss": 3.146,
417
+ "step": 3300
418
+ },
419
+ {
420
+ "epoch": 1.17,
421
+ "learning_rate": 3.876651982378855e-05,
422
+ "loss": 3.0868,
423
+ "step": 3350
424
+ },
425
+ {
426
+ "epoch": 1.18,
427
+ "learning_rate": 3.934616276373754e-05,
428
+ "loss": 3.1014,
429
+ "step": 3400
430
+ },
431
+ {
432
+ "epoch": 1.2,
433
+ "learning_rate": 3.992580570368653e-05,
434
+ "loss": 3.0761,
435
+ "step": 3450
436
+ },
437
+ {
438
+ "epoch": 1.22,
439
+ "learning_rate": 4.050544864363553e-05,
440
+ "loss": 3.121,
441
+ "step": 3500
442
+ },
443
+ {
444
+ "epoch": 1.23,
445
+ "learning_rate": 4.108509158358451e-05,
446
+ "loss": 3.1405,
447
+ "step": 3550
448
+ },
449
+ {
450
+ "epoch": 1.25,
451
+ "learning_rate": 4.166473452353351e-05,
452
+ "loss": 3.2121,
453
+ "step": 3600
454
+ },
455
+ {
456
+ "epoch": 1.27,
457
+ "learning_rate": 4.22443774634825e-05,
458
+ "loss": 3.1119,
459
+ "step": 3650
460
+ },
461
+ {
462
+ "epoch": 1.29,
463
+ "learning_rate": 4.282402040343149e-05,
464
+ "loss": 3.1303,
465
+ "step": 3700
466
+ },
467
+ {
468
+ "epoch": 1.3,
469
+ "learning_rate": 4.340366334338048e-05,
470
+ "loss": 3.1476,
471
+ "step": 3750
472
+ },
473
+ {
474
+ "epoch": 1.32,
475
+ "learning_rate": 4.398330628332947e-05,
476
+ "loss": 3.1053,
477
+ "step": 3800
478
+ },
479
+ {
480
+ "epoch": 1.34,
481
+ "learning_rate": 4.456294922327846e-05,
482
+ "loss": 3.1109,
483
+ "step": 3850
484
+ },
485
+ {
486
+ "epoch": 1.36,
487
+ "learning_rate": 4.514259216322745e-05,
488
+ "loss": 3.2268,
489
+ "step": 3900
490
+ },
491
+ {
492
+ "epoch": 1.37,
493
+ "learning_rate": 4.572223510317645e-05,
494
+ "loss": 3.2088,
495
+ "step": 3950
496
+ },
497
+ {
498
+ "epoch": 1.39,
499
+ "learning_rate": 4.6301878043125433e-05,
500
+ "loss": 3.175,
501
+ "step": 4000
502
+ },
503
+ {
504
+ "epoch": 1.41,
505
+ "learning_rate": 4.688152098307443e-05,
506
+ "loss": 3.1848,
507
+ "step": 4050
508
+ },
509
+ {
510
+ "epoch": 1.43,
511
+ "learning_rate": 4.744957106422444e-05,
512
+ "loss": 3.1789,
513
+ "step": 4100
514
+ },
515
+ {
516
+ "epoch": 1.44,
517
+ "learning_rate": 4.8029214004173436e-05,
518
+ "loss": 3.2485,
519
+ "step": 4150
520
+ },
521
+ {
522
+ "epoch": 1.46,
523
+ "learning_rate": 4.860885694412242e-05,
524
+ "loss": 3.1635,
525
+ "step": 4200
526
+ },
527
+ {
528
+ "epoch": 1.48,
529
+ "learning_rate": 4.918849988407142e-05,
530
+ "loss": 3.1433,
531
+ "step": 4250
532
+ },
533
+ {
534
+ "epoch": 1.5,
535
+ "learning_rate": 4.976814282402041e-05,
536
+ "loss": 3.3169,
537
+ "step": 4300
538
+ },
539
+ {
540
+ "epoch": 1.51,
541
+ "learning_rate": 4.993861766992675e-05,
542
+ "loss": 3.2209,
543
+ "step": 4350
544
+ },
545
+ {
546
+ "epoch": 1.53,
547
+ "learning_rate": 4.9836313786471336e-05,
548
+ "loss": 3.1356,
549
+ "step": 4400
550
+ },
551
+ {
552
+ "epoch": 1.55,
553
+ "learning_rate": 4.973400990301592e-05,
554
+ "loss": 3.1549,
555
+ "step": 4450
556
+ },
557
+ {
558
+ "epoch": 1.57,
559
+ "learning_rate": 4.96317060195605e-05,
560
+ "loss": 3.161,
561
+ "step": 4500
562
+ },
563
+ {
564
+ "epoch": 1.58,
565
+ "learning_rate": 4.952940213610509e-05,
566
+ "loss": 3.2009,
567
+ "step": 4550
568
+ },
569
+ {
570
+ "epoch": 1.6,
571
+ "learning_rate": 4.9427098252649675e-05,
572
+ "loss": 3.2533,
573
+ "step": 4600
574
+ },
575
+ {
576
+ "epoch": 1.62,
577
+ "learning_rate": 4.9324794369194254e-05,
578
+ "loss": 3.1573,
579
+ "step": 4650
580
+ },
581
+ {
582
+ "epoch": 1.63,
583
+ "learning_rate": 4.922249048573884e-05,
584
+ "loss": 3.1616,
585
+ "step": 4700
586
+ },
587
+ {
588
+ "epoch": 1.65,
589
+ "learning_rate": 4.912018660228343e-05,
590
+ "loss": 3.1886,
591
+ "step": 4750
592
+ },
593
+ {
594
+ "epoch": 1.67,
595
+ "learning_rate": 4.901788271882801e-05,
596
+ "loss": 3.1398,
597
+ "step": 4800
598
+ },
599
+ {
600
+ "epoch": 1.69,
601
+ "learning_rate": 4.891557883537259e-05,
602
+ "loss": 3.2065,
603
+ "step": 4850
604
+ },
605
+ {
606
+ "epoch": 1.7,
607
+ "learning_rate": 4.881327495191718e-05,
608
+ "loss": 3.151,
609
+ "step": 4900
610
+ },
611
+ {
612
+ "epoch": 1.72,
613
+ "learning_rate": 4.8710971068461766e-05,
614
+ "loss": 3.1184,
615
+ "step": 4950
616
+ },
617
+ {
618
+ "epoch": 1.74,
619
+ "learning_rate": 4.8608667185006345e-05,
620
+ "loss": 3.1874,
621
+ "step": 5000
622
+ },
623
+ {
624
+ "epoch": 1.76,
625
+ "learning_rate": 4.8506363301550925e-05,
626
+ "loss": 3.1453,
627
+ "step": 5050
628
+ },
629
+ {
630
+ "epoch": 1.77,
631
+ "learning_rate": 4.840405941809552e-05,
632
+ "loss": 3.1359,
633
+ "step": 5100
634
+ },
635
+ {
636
+ "epoch": 1.79,
637
+ "learning_rate": 4.83017555346401e-05,
638
+ "loss": 3.1124,
639
+ "step": 5150
640
+ },
641
+ {
642
+ "epoch": 1.81,
643
+ "learning_rate": 4.819945165118468e-05,
644
+ "loss": 3.2292,
645
+ "step": 5200
646
+ },
647
+ {
648
+ "epoch": 1.83,
649
+ "learning_rate": 4.809714776772927e-05,
650
+ "loss": 3.0806,
651
+ "step": 5250
652
+ },
653
+ {
654
+ "epoch": 1.84,
655
+ "learning_rate": 4.799484388427385e-05,
656
+ "loss": 3.163,
657
+ "step": 5300
658
+ },
659
+ {
660
+ "epoch": 1.86,
661
+ "learning_rate": 4.789254000081843e-05,
662
+ "loss": 3.1863,
663
+ "step": 5350
664
+ },
665
+ {
666
+ "epoch": 1.88,
667
+ "learning_rate": 4.7790236117363015e-05,
668
+ "loss": 3.1359,
669
+ "step": 5400
670
+ },
671
+ {
672
+ "epoch": 1.9,
673
+ "learning_rate": 4.76879322339076e-05,
674
+ "loss": 3.1654,
675
+ "step": 5450
676
+ },
677
+ {
678
+ "epoch": 1.91,
679
+ "learning_rate": 4.758562835045218e-05,
680
+ "loss": 3.0909,
681
+ "step": 5500
682
+ },
683
+ {
684
+ "epoch": 1.93,
685
+ "learning_rate": 4.748332446699677e-05,
686
+ "loss": 3.1349,
687
+ "step": 5550
688
+ },
689
+ {
690
+ "epoch": 1.95,
691
+ "learning_rate": 4.7381020583541354e-05,
692
+ "loss": 3.0526,
693
+ "step": 5600
694
+ },
695
+ {
696
+ "epoch": 1.97,
697
+ "learning_rate": 4.727871670008594e-05,
698
+ "loss": 3.2346,
699
+ "step": 5650
700
+ },
701
+ {
702
+ "epoch": 1.98,
703
+ "learning_rate": 4.717641281663052e-05,
704
+ "loss": 3.2078,
705
+ "step": 5700
706
+ },
707
+ {
708
+ "epoch": 2.0,
709
+ "learning_rate": 4.7074108933175106e-05,
710
+ "loss": 3.0916,
711
+ "step": 5750
712
+ },
713
+ {
714
+ "epoch": 2.0,
715
+ "eval_gen_len": 19.3427,
716
+ "eval_loss": 3.0548317432403564,
717
+ "eval_rouge1": 16.2962,
718
+ "eval_rouge2": 3.9567,
719
+ "eval_rougeL": 13.0426,
720
+ "eval_rougeLsum": 14.2023,
721
+ "eval_runtime": 251.2192,
722
+ "eval_samples_per_second": 9.247,
723
+ "eval_steps_per_second": 1.158,
724
+ "step": 5750
725
+ },
726
+ {
727
+ "epoch": 2.02,
728
+ "learning_rate": 4.697180504971969e-05,
729
+ "loss": 2.7401,
730
+ "step": 5800
731
+ },
732
+ {
733
+ "epoch": 2.03,
734
+ "learning_rate": 4.686950116626427e-05,
735
+ "loss": 2.6917,
736
+ "step": 5850
737
+ },
738
+ {
739
+ "epoch": 2.05,
740
+ "learning_rate": 4.676719728280886e-05,
741
+ "loss": 2.7485,
742
+ "step": 5900
743
+ },
744
+ {
745
+ "epoch": 2.07,
746
+ "learning_rate": 4.6664893399353445e-05,
747
+ "loss": 2.8236,
748
+ "step": 5950
749
+ },
750
+ {
751
+ "epoch": 2.09,
752
+ "learning_rate": 4.6562589515898024e-05,
753
+ "loss": 2.7402,
754
+ "step": 6000
755
+ },
756
+ {
757
+ "epoch": 2.1,
758
+ "learning_rate": 4.646028563244261e-05,
759
+ "loss": 2.7964,
760
+ "step": 6050
761
+ },
762
+ {
763
+ "epoch": 2.12,
764
+ "learning_rate": 4.63579817489872e-05,
765
+ "loss": 2.6982,
766
+ "step": 6100
767
+ },
768
+ {
769
+ "epoch": 2.14,
770
+ "learning_rate": 4.6255677865531776e-05,
771
+ "loss": 2.7642,
772
+ "step": 6150
773
+ },
774
+ {
775
+ "epoch": 2.16,
776
+ "learning_rate": 4.615337398207636e-05,
777
+ "loss": 2.7334,
778
+ "step": 6200
779
+ },
780
+ {
781
+ "epoch": 2.17,
782
+ "learning_rate": 4.605107009862095e-05,
783
+ "loss": 2.8078,
784
+ "step": 6250
785
+ },
786
+ {
787
+ "epoch": 2.19,
788
+ "learning_rate": 4.594876621516553e-05,
789
+ "loss": 2.7167,
790
+ "step": 6300
791
+ },
792
+ {
793
+ "epoch": 2.21,
794
+ "learning_rate": 4.5846462331710115e-05,
795
+ "loss": 2.8431,
796
+ "step": 6350
797
+ },
798
+ {
799
+ "epoch": 2.23,
800
+ "learning_rate": 4.57441584482547e-05,
801
+ "loss": 2.7301,
802
+ "step": 6400
803
+ },
804
+ {
805
+ "epoch": 2.24,
806
+ "learning_rate": 4.564185456479928e-05,
807
+ "loss": 2.8079,
808
+ "step": 6450
809
+ },
810
+ {
811
+ "epoch": 2.26,
812
+ "learning_rate": 4.553955068134387e-05,
813
+ "loss": 2.8141,
814
+ "step": 6500
815
+ },
816
+ {
817
+ "epoch": 2.28,
818
+ "learning_rate": 4.5437246797888446e-05,
819
+ "loss": 2.8138,
820
+ "step": 6550
821
+ },
822
+ {
823
+ "epoch": 2.3,
824
+ "learning_rate": 4.533494291443303e-05,
825
+ "loss": 2.8127,
826
+ "step": 6600
827
+ },
828
+ {
829
+ "epoch": 2.31,
830
+ "learning_rate": 4.523263903097762e-05,
831
+ "loss": 2.8158,
832
+ "step": 6650
833
+ },
834
+ {
835
+ "epoch": 2.33,
836
+ "learning_rate": 4.51303351475222e-05,
837
+ "loss": 2.7652,
838
+ "step": 6700
839
+ },
840
+ {
841
+ "epoch": 2.35,
842
+ "learning_rate": 4.5028031264066785e-05,
843
+ "loss": 2.8055,
844
+ "step": 6750
845
+ },
846
+ {
847
+ "epoch": 2.37,
848
+ "learning_rate": 4.492572738061137e-05,
849
+ "loss": 2.7853,
850
+ "step": 6800
851
+ },
852
+ {
853
+ "epoch": 2.38,
854
+ "learning_rate": 4.482342349715595e-05,
855
+ "loss": 2.7547,
856
+ "step": 6850
857
+ },
858
+ {
859
+ "epoch": 2.4,
860
+ "learning_rate": 4.4721119613700544e-05,
861
+ "loss": 2.7961,
862
+ "step": 6900
863
+ },
864
+ {
865
+ "epoch": 2.42,
866
+ "learning_rate": 4.4618815730245123e-05,
867
+ "loss": 2.817,
868
+ "step": 6950
869
+ },
870
+ {
871
+ "epoch": 2.43,
872
+ "learning_rate": 4.45165118467897e-05,
873
+ "loss": 2.8016,
874
+ "step": 7000
875
+ },
876
+ {
877
+ "epoch": 2.45,
878
+ "learning_rate": 4.441420796333429e-05,
879
+ "loss": 2.7794,
880
+ "step": 7050
881
+ },
882
+ {
883
+ "epoch": 2.47,
884
+ "learning_rate": 4.4311904079878876e-05,
885
+ "loss": 2.8091,
886
+ "step": 7100
887
+ },
888
+ {
889
+ "epoch": 2.49,
890
+ "learning_rate": 4.4209600196423455e-05,
891
+ "loss": 2.8352,
892
+ "step": 7150
893
+ },
894
+ {
895
+ "epoch": 2.5,
896
+ "learning_rate": 4.410729631296804e-05,
897
+ "loss": 2.8756,
898
+ "step": 7200
899
+ },
900
+ {
901
+ "epoch": 2.52,
902
+ "learning_rate": 4.400499242951263e-05,
903
+ "loss": 2.7531,
904
+ "step": 7250
905
+ },
906
+ {
907
+ "epoch": 2.54,
908
+ "learning_rate": 4.390268854605721e-05,
909
+ "loss": 2.8505,
910
+ "step": 7300
911
+ },
912
+ {
913
+ "epoch": 2.56,
914
+ "learning_rate": 4.3800384662601794e-05,
915
+ "loss": 2.8014,
916
+ "step": 7350
917
+ },
918
+ {
919
+ "epoch": 2.57,
920
+ "learning_rate": 4.369808077914638e-05,
921
+ "loss": 2.7784,
922
+ "step": 7400
923
+ },
924
+ {
925
+ "epoch": 2.59,
926
+ "learning_rate": 4.3595776895690966e-05,
927
+ "loss": 2.7593,
928
+ "step": 7450
929
+ },
930
+ {
931
+ "epoch": 2.61,
932
+ "learning_rate": 4.3493473012235546e-05,
933
+ "loss": 2.7198,
934
+ "step": 7500
935
+ },
936
+ {
937
+ "epoch": 2.63,
938
+ "learning_rate": 4.339321520644924e-05,
939
+ "loss": 2.7886,
940
+ "step": 7550
941
+ },
942
+ {
943
+ "epoch": 2.64,
944
+ "learning_rate": 4.329091132299382e-05,
945
+ "loss": 2.7781,
946
+ "step": 7600
947
+ },
948
+ {
949
+ "epoch": 2.66,
950
+ "learning_rate": 4.318860743953841e-05,
951
+ "loss": 2.7815,
952
+ "step": 7650
953
+ },
954
+ {
955
+ "epoch": 2.68,
956
+ "learning_rate": 4.3086303556082994e-05,
957
+ "loss": 2.8369,
958
+ "step": 7700
959
+ },
960
+ {
961
+ "epoch": 2.7,
962
+ "learning_rate": 4.2983999672627574e-05,
963
+ "loss": 2.7718,
964
+ "step": 7750
965
+ },
966
+ {
967
+ "epoch": 2.71,
968
+ "learning_rate": 4.288169578917216e-05,
969
+ "loss": 2.7706,
970
+ "step": 7800
971
+ },
972
+ {
973
+ "epoch": 2.73,
974
+ "learning_rate": 4.277939190571674e-05,
975
+ "loss": 2.7951,
976
+ "step": 7850
977
+ },
978
+ {
979
+ "epoch": 2.75,
980
+ "learning_rate": 4.2677088022261326e-05,
981
+ "loss": 2.8291,
982
+ "step": 7900
983
+ },
984
+ {
985
+ "epoch": 2.77,
986
+ "learning_rate": 4.257478413880591e-05,
987
+ "loss": 2.8511,
988
+ "step": 7950
989
+ },
990
+ {
991
+ "epoch": 2.78,
992
+ "learning_rate": 4.247248025535049e-05,
993
+ "loss": 2.8155,
994
+ "step": 8000
995
+ },
996
+ {
997
+ "epoch": 2.8,
998
+ "learning_rate": 4.2370176371895085e-05,
999
+ "loss": 2.8248,
1000
+ "step": 8050
1001
+ },
1002
+ {
1003
+ "epoch": 2.82,
1004
+ "learning_rate": 4.2267872488439664e-05,
1005
+ "loss": 2.8097,
1006
+ "step": 8100
1007
+ },
1008
+ {
1009
+ "epoch": 2.83,
1010
+ "learning_rate": 4.2165568604984244e-05,
1011
+ "loss": 2.8237,
1012
+ "step": 8150
1013
+ },
1014
+ {
1015
+ "epoch": 2.85,
1016
+ "learning_rate": 4.206326472152883e-05,
1017
+ "loss": 2.8494,
1018
+ "step": 8200
1019
+ },
1020
+ {
1021
+ "epoch": 2.87,
1022
+ "learning_rate": 4.1960960838073416e-05,
1023
+ "loss": 2.8382,
1024
+ "step": 8250
1025
+ },
1026
+ {
1027
+ "epoch": 2.89,
1028
+ "learning_rate": 4.1858656954617996e-05,
1029
+ "loss": 2.8271,
1030
+ "step": 8300
1031
+ },
1032
+ {
1033
+ "epoch": 2.9,
1034
+ "learning_rate": 4.175635307116258e-05,
1035
+ "loss": 2.8145,
1036
+ "step": 8350
1037
+ },
1038
+ {
1039
+ "epoch": 2.92,
1040
+ "learning_rate": 4.165404918770717e-05,
1041
+ "loss": 2.8698,
1042
+ "step": 8400
1043
+ },
1044
+ {
1045
+ "epoch": 2.94,
1046
+ "learning_rate": 4.155174530425175e-05,
1047
+ "loss": 2.8377,
1048
+ "step": 8450
1049
+ },
1050
+ {
1051
+ "epoch": 2.96,
1052
+ "learning_rate": 4.1449441420796334e-05,
1053
+ "loss": 2.7838,
1054
+ "step": 8500
1055
+ },
1056
+ {
1057
+ "epoch": 2.97,
1058
+ "learning_rate": 4.134713753734092e-05,
1059
+ "loss": 2.7709,
1060
+ "step": 8550
1061
+ },
1062
+ {
1063
+ "epoch": 2.99,
1064
+ "learning_rate": 4.124483365388551e-05,
1065
+ "loss": 2.8345,
1066
+ "step": 8600
1067
+ },
1068
+ {
1069
+ "epoch": 3.0,
1070
+ "eval_gen_len": 19.9707,
1071
+ "eval_loss": 3.0645270347595215,
1072
+ "eval_rouge1": 16.4597,
1073
+ "eval_rouge2": 4.2017,
1074
+ "eval_rougeL": 13.3787,
1075
+ "eval_rougeLsum": 14.5527,
1076
+ "eval_runtime": 250.7875,
1077
+ "eval_samples_per_second": 9.263,
1078
+ "eval_steps_per_second": 1.16,
1079
+ "step": 8625
1080
+ },
1081
+ {
1082
+ "epoch": 3.01,
1083
+ "learning_rate": 4.114252977043009e-05,
1084
+ "loss": 2.5888,
1085
+ "step": 8650
1086
+ },
1087
+ {
1088
+ "epoch": 3.03,
1089
+ "learning_rate": 4.104022588697467e-05,
1090
+ "loss": 2.3788,
1091
+ "step": 8700
1092
+ },
1093
+ {
1094
+ "epoch": 3.04,
1095
+ "learning_rate": 4.093792200351926e-05,
1096
+ "loss": 2.4263,
1097
+ "step": 8750
1098
+ },
1099
+ {
1100
+ "epoch": 3.06,
1101
+ "learning_rate": 4.083561812006384e-05,
1102
+ "loss": 2.3851,
1103
+ "step": 8800
1104
+ },
1105
+ {
1106
+ "epoch": 3.08,
1107
+ "learning_rate": 4.0733314236608425e-05,
1108
+ "loss": 2.378,
1109
+ "step": 8850
1110
+ },
1111
+ {
1112
+ "epoch": 3.1,
1113
+ "learning_rate": 4.063101035315301e-05,
1114
+ "loss": 2.3572,
1115
+ "step": 8900
1116
+ },
1117
+ {
1118
+ "epoch": 3.11,
1119
+ "learning_rate": 4.052870646969759e-05,
1120
+ "loss": 2.4259,
1121
+ "step": 8950
1122
+ },
1123
+ {
1124
+ "epoch": 3.13,
1125
+ "learning_rate": 4.042640258624217e-05,
1126
+ "loss": 2.4093,
1127
+ "step": 9000
1128
+ },
1129
+ {
1130
+ "epoch": 3.15,
1131
+ "learning_rate": 4.0324098702786764e-05,
1132
+ "loss": 2.3708,
1133
+ "step": 9050
1134
+ },
1135
+ {
1136
+ "epoch": 3.17,
1137
+ "learning_rate": 4.022179481933134e-05,
1138
+ "loss": 2.3737,
1139
+ "step": 9100
1140
+ },
1141
+ {
1142
+ "epoch": 3.18,
1143
+ "learning_rate": 4.011949093587592e-05,
1144
+ "loss": 2.4567,
1145
+ "step": 9150
1146
+ },
1147
+ {
1148
+ "epoch": 3.2,
1149
+ "learning_rate": 4.0017187052420516e-05,
1150
+ "loss": 2.4837,
1151
+ "step": 9200
1152
+ },
1153
+ {
1154
+ "epoch": 3.22,
1155
+ "learning_rate": 3.9914883168965095e-05,
1156
+ "loss": 2.4936,
1157
+ "step": 9250
1158
+ },
1159
+ {
1160
+ "epoch": 3.23,
1161
+ "learning_rate": 3.981257928550968e-05,
1162
+ "loss": 2.4992,
1163
+ "step": 9300
1164
+ },
1165
+ {
1166
+ "epoch": 3.25,
1167
+ "learning_rate": 3.971027540205426e-05,
1168
+ "loss": 2.4514,
1169
+ "step": 9350
1170
+ },
1171
+ {
1172
+ "epoch": 3.27,
1173
+ "learning_rate": 3.960797151859885e-05,
1174
+ "loss": 2.4341,
1175
+ "step": 9400
1176
+ },
1177
+ {
1178
+ "epoch": 3.29,
1179
+ "learning_rate": 3.9505667635143434e-05,
1180
+ "loss": 2.4617,
1181
+ "step": 9450
1182
+ },
1183
+ {
1184
+ "epoch": 3.3,
1185
+ "learning_rate": 3.940336375168801e-05,
1186
+ "loss": 2.489,
1187
+ "step": 9500
1188
+ },
1189
+ {
1190
+ "epoch": 3.32,
1191
+ "learning_rate": 3.93010598682326e-05,
1192
+ "loss": 2.4343,
1193
+ "step": 9550
1194
+ },
1195
+ {
1196
+ "epoch": 3.34,
1197
+ "learning_rate": 3.9198755984777186e-05,
1198
+ "loss": 2.4264,
1199
+ "step": 9600
1200
+ },
1201
+ {
1202
+ "epoch": 3.36,
1203
+ "learning_rate": 3.9096452101321766e-05,
1204
+ "loss": 2.4492,
1205
+ "step": 9650
1206
+ },
1207
+ {
1208
+ "epoch": 3.37,
1209
+ "learning_rate": 3.899414821786635e-05,
1210
+ "loss": 2.4625,
1211
+ "step": 9700
1212
+ },
1213
+ {
1214
+ "epoch": 3.39,
1215
+ "learning_rate": 3.889184433441094e-05,
1216
+ "loss": 2.4601,
1217
+ "step": 9750
1218
+ },
1219
+ {
1220
+ "epoch": 3.41,
1221
+ "learning_rate": 3.878954045095552e-05,
1222
+ "loss": 2.4262,
1223
+ "step": 9800
1224
+ },
1225
+ {
1226
+ "epoch": 3.43,
1227
+ "learning_rate": 3.8687236567500104e-05,
1228
+ "loss": 2.4918,
1229
+ "step": 9850
1230
+ },
1231
+ {
1232
+ "epoch": 3.44,
1233
+ "learning_rate": 3.858493268404469e-05,
1234
+ "loss": 2.4677,
1235
+ "step": 9900
1236
+ },
1237
+ {
1238
+ "epoch": 3.46,
1239
+ "learning_rate": 3.848262880058927e-05,
1240
+ "loss": 2.4299,
1241
+ "step": 9950
1242
+ },
1243
+ {
1244
+ "epoch": 3.48,
1245
+ "learning_rate": 3.8380324917133856e-05,
1246
+ "loss": 2.4229,
1247
+ "step": 10000
1248
+ },
1249
+ {
1250
+ "epoch": 3.5,
1251
+ "learning_rate": 3.827802103367844e-05,
1252
+ "loss": 2.4598,
1253
+ "step": 10050
1254
+ },
1255
+ {
1256
+ "epoch": 3.51,
1257
+ "learning_rate": 3.817571715022302e-05,
1258
+ "loss": 2.4382,
1259
+ "step": 10100
1260
+ },
1261
+ {
1262
+ "epoch": 3.53,
1263
+ "learning_rate": 3.807341326676761e-05,
1264
+ "loss": 2.4227,
1265
+ "step": 10150
1266
+ },
1267
+ {
1268
+ "epoch": 3.55,
1269
+ "learning_rate": 3.7971109383312195e-05,
1270
+ "loss": 2.4821,
1271
+ "step": 10200
1272
+ },
1273
+ {
1274
+ "epoch": 3.57,
1275
+ "learning_rate": 3.7868805499856774e-05,
1276
+ "loss": 2.4532,
1277
+ "step": 10250
1278
+ },
1279
+ {
1280
+ "epoch": 3.58,
1281
+ "learning_rate": 3.776650161640136e-05,
1282
+ "loss": 2.4879,
1283
+ "step": 10300
1284
+ },
1285
+ {
1286
+ "epoch": 3.6,
1287
+ "learning_rate": 3.766419773294595e-05,
1288
+ "loss": 2.4945,
1289
+ "step": 10350
1290
+ },
1291
+ {
1292
+ "epoch": 3.62,
1293
+ "learning_rate": 3.7561893849490526e-05,
1294
+ "loss": 2.5232,
1295
+ "step": 10400
1296
+ },
1297
+ {
1298
+ "epoch": 3.63,
1299
+ "learning_rate": 3.745958996603511e-05,
1300
+ "loss": 2.5075,
1301
+ "step": 10450
1302
+ },
1303
+ {
1304
+ "epoch": 3.65,
1305
+ "learning_rate": 3.735728608257969e-05,
1306
+ "loss": 2.4174,
1307
+ "step": 10500
1308
+ },
1309
+ {
1310
+ "epoch": 3.67,
1311
+ "learning_rate": 3.7254982199124285e-05,
1312
+ "loss": 2.4856,
1313
+ "step": 10550
1314
+ },
1315
+ {
1316
+ "epoch": 3.69,
1317
+ "learning_rate": 3.7152678315668865e-05,
1318
+ "loss": 2.4594,
1319
+ "step": 10600
1320
+ },
1321
+ {
1322
+ "epoch": 3.7,
1323
+ "learning_rate": 3.7050374432213444e-05,
1324
+ "loss": 2.4923,
1325
+ "step": 10650
1326
+ },
1327
+ {
1328
+ "epoch": 3.72,
1329
+ "learning_rate": 3.694807054875804e-05,
1330
+ "loss": 2.3824,
1331
+ "step": 10700
1332
+ },
1333
+ {
1334
+ "epoch": 3.74,
1335
+ "learning_rate": 3.684576666530262e-05,
1336
+ "loss": 2.4486,
1337
+ "step": 10750
1338
+ },
1339
+ {
1340
+ "epoch": 3.76,
1341
+ "learning_rate": 3.67434627818472e-05,
1342
+ "loss": 2.4735,
1343
+ "step": 10800
1344
+ },
1345
+ {
1346
+ "epoch": 3.77,
1347
+ "learning_rate": 3.664115889839179e-05,
1348
+ "loss": 2.4633,
1349
+ "step": 10850
1350
+ },
1351
+ {
1352
+ "epoch": 3.79,
1353
+ "learning_rate": 3.653885501493637e-05,
1354
+ "loss": 2.5318,
1355
+ "step": 10900
1356
+ },
1357
+ {
1358
+ "epoch": 3.81,
1359
+ "learning_rate": 3.643655113148095e-05,
1360
+ "loss": 2.4771,
1361
+ "step": 10950
1362
+ },
1363
+ {
1364
+ "epoch": 3.83,
1365
+ "learning_rate": 3.6334247248025535e-05,
1366
+ "loss": 2.5253,
1367
+ "step": 11000
1368
+ },
1369
+ {
1370
+ "epoch": 3.84,
1371
+ "learning_rate": 3.623194336457012e-05,
1372
+ "loss": 2.4974,
1373
+ "step": 11050
1374
+ },
1375
+ {
1376
+ "epoch": 3.86,
1377
+ "learning_rate": 3.61296394811147e-05,
1378
+ "loss": 2.4386,
1379
+ "step": 11100
1380
+ },
1381
+ {
1382
+ "epoch": 3.88,
1383
+ "learning_rate": 3.602733559765929e-05,
1384
+ "loss": 2.4687,
1385
+ "step": 11150
1386
+ },
1387
+ {
1388
+ "epoch": 3.9,
1389
+ "learning_rate": 3.5925031714203874e-05,
1390
+ "loss": 2.4287,
1391
+ "step": 11200
1392
+ },
1393
+ {
1394
+ "epoch": 3.91,
1395
+ "learning_rate": 3.582272783074846e-05,
1396
+ "loss": 2.5498,
1397
+ "step": 11250
1398
+ },
1399
+ {
1400
+ "epoch": 3.93,
1401
+ "learning_rate": 3.572042394729304e-05,
1402
+ "loss": 2.5195,
1403
+ "step": 11300
1404
+ },
1405
+ {
1406
+ "epoch": 3.95,
1407
+ "learning_rate": 3.5618120063837626e-05,
1408
+ "loss": 2.5423,
1409
+ "step": 11350
1410
+ },
1411
+ {
1412
+ "epoch": 3.97,
1413
+ "learning_rate": 3.551581618038221e-05,
1414
+ "loss": 2.4865,
1415
+ "step": 11400
1416
+ },
1417
+ {
1418
+ "epoch": 3.98,
1419
+ "learning_rate": 3.541351229692679e-05,
1420
+ "loss": 2.4827,
1421
+ "step": 11450
1422
+ },
1423
+ {
1424
+ "epoch": 4.0,
1425
+ "learning_rate": 3.531120841347138e-05,
1426
+ "loss": 2.5522,
1427
+ "step": 11500
1428
+ },
1429
+ {
1430
+ "epoch": 4.0,
1431
+ "eval_gen_len": 19.9324,
1432
+ "eval_loss": 3.0988194942474365,
1433
+ "eval_rouge1": 16.8388,
1434
+ "eval_rouge2": 4.3742,
1435
+ "eval_rougeL": 13.5688,
1436
+ "eval_rougeLsum": 14.7003,
1437
+ "eval_runtime": 251.1664,
1438
+ "eval_samples_per_second": 9.249,
1439
+ "eval_steps_per_second": 1.159,
1440
+ "step": 11500
1441
+ },
1442
+ {
1443
+ "epoch": 4.02,
1444
+ "learning_rate": 3.5208904530015964e-05,
1445
+ "loss": 2.1091,
1446
+ "step": 11550
1447
+ },
1448
+ {
1449
+ "epoch": 4.03,
1450
+ "learning_rate": 3.5106600646560544e-05,
1451
+ "loss": 2.0875,
1452
+ "step": 11600
1453
+ },
1454
+ {
1455
+ "epoch": 4.05,
1456
+ "learning_rate": 3.500429676310512e-05,
1457
+ "loss": 2.1041,
1458
+ "step": 11650
1459
+ },
1460
+ {
1461
+ "epoch": 4.07,
1462
+ "learning_rate": 3.4901992879649716e-05,
1463
+ "loss": 2.1667,
1464
+ "step": 11700
1465
+ },
1466
+ {
1467
+ "epoch": 4.09,
1468
+ "learning_rate": 3.4799688996194296e-05,
1469
+ "loss": 2.0991,
1470
+ "step": 11750
1471
+ },
1472
+ {
1473
+ "epoch": 4.1,
1474
+ "learning_rate": 3.469738511273888e-05,
1475
+ "loss": 2.1036,
1476
+ "step": 11800
1477
+ },
1478
+ {
1479
+ "epoch": 4.12,
1480
+ "learning_rate": 3.459508122928347e-05,
1481
+ "loss": 2.0709,
1482
+ "step": 11850
1483
+ },
1484
+ {
1485
+ "epoch": 4.14,
1486
+ "learning_rate": 3.449277734582805e-05,
1487
+ "loss": 2.1313,
1488
+ "step": 11900
1489
+ },
1490
+ {
1491
+ "epoch": 4.16,
1492
+ "learning_rate": 3.4390473462372634e-05,
1493
+ "loss": 2.1212,
1494
+ "step": 11950
1495
+ },
1496
+ {
1497
+ "epoch": 4.17,
1498
+ "learning_rate": 3.428816957891722e-05,
1499
+ "loss": 2.1679,
1500
+ "step": 12000
1501
+ },
1502
+ {
1503
+ "epoch": 4.19,
1504
+ "learning_rate": 3.41858656954618e-05,
1505
+ "loss": 2.1259,
1506
+ "step": 12050
1507
+ },
1508
+ {
1509
+ "epoch": 4.21,
1510
+ "learning_rate": 3.408356181200639e-05,
1511
+ "loss": 2.0973,
1512
+ "step": 12100
1513
+ },
1514
+ {
1515
+ "epoch": 4.23,
1516
+ "learning_rate": 3.3981257928550966e-05,
1517
+ "loss": 2.1457,
1518
+ "step": 12150
1519
+ },
1520
+ {
1521
+ "epoch": 4.24,
1522
+ "learning_rate": 3.387895404509555e-05,
1523
+ "loss": 2.1776,
1524
+ "step": 12200
1525
+ },
1526
+ {
1527
+ "epoch": 4.26,
1528
+ "learning_rate": 3.377665016164014e-05,
1529
+ "loss": 2.202,
1530
+ "step": 12250
1531
+ },
1532
+ {
1533
+ "epoch": 4.28,
1534
+ "learning_rate": 3.367639235585383e-05,
1535
+ "loss": 2.1676,
1536
+ "step": 12300
1537
+ },
1538
+ {
1539
+ "epoch": 4.3,
1540
+ "learning_rate": 3.3574088472398414e-05,
1541
+ "loss": 2.1375,
1542
+ "step": 12350
1543
+ },
1544
+ {
1545
+ "epoch": 4.31,
1546
+ "learning_rate": 3.3471784588943e-05,
1547
+ "loss": 2.1265,
1548
+ "step": 12400
1549
+ },
1550
+ {
1551
+ "epoch": 4.33,
1552
+ "learning_rate": 3.336948070548758e-05,
1553
+ "loss": 2.1044,
1554
+ "step": 12450
1555
+ },
1556
+ {
1557
+ "epoch": 4.35,
1558
+ "learning_rate": 3.326717682203217e-05,
1559
+ "loss": 2.1207,
1560
+ "step": 12500
1561
+ },
1562
+ {
1563
+ "epoch": 4.37,
1564
+ "learning_rate": 3.316487293857675e-05,
1565
+ "loss": 2.1651,
1566
+ "step": 12550
1567
+ },
1568
+ {
1569
+ "epoch": 4.38,
1570
+ "learning_rate": 3.306256905512133e-05,
1571
+ "loss": 2.1117,
1572
+ "step": 12600
1573
+ },
1574
+ {
1575
+ "epoch": 4.4,
1576
+ "learning_rate": 3.296026517166592e-05,
1577
+ "loss": 2.1279,
1578
+ "step": 12650
1579
+ },
1580
+ {
1581
+ "epoch": 4.42,
1582
+ "learning_rate": 3.2857961288210505e-05,
1583
+ "loss": 2.1459,
1584
+ "step": 12700
1585
+ },
1586
+ {
1587
+ "epoch": 4.43,
1588
+ "learning_rate": 3.2755657404755085e-05,
1589
+ "loss": 2.1566,
1590
+ "step": 12750
1591
+ },
1592
+ {
1593
+ "epoch": 4.45,
1594
+ "learning_rate": 3.265335352129967e-05,
1595
+ "loss": 2.1036,
1596
+ "step": 12800
1597
+ },
1598
+ {
1599
+ "epoch": 4.47,
1600
+ "learning_rate": 3.255104963784426e-05,
1601
+ "loss": 2.1034,
1602
+ "step": 12850
1603
+ },
1604
+ {
1605
+ "epoch": 4.49,
1606
+ "learning_rate": 3.244874575438884e-05,
1607
+ "loss": 2.096,
1608
+ "step": 12900
1609
+ },
1610
+ {
1611
+ "epoch": 4.5,
1612
+ "learning_rate": 3.234644187093342e-05,
1613
+ "loss": 2.123,
1614
+ "step": 12950
1615
+ },
1616
+ {
1617
+ "epoch": 4.52,
1618
+ "learning_rate": 3.224413798747801e-05,
1619
+ "loss": 2.1869,
1620
+ "step": 13000
1621
+ },
1622
+ {
1623
+ "epoch": 4.54,
1624
+ "learning_rate": 3.214183410402259e-05,
1625
+ "loss": 2.1858,
1626
+ "step": 13050
1627
+ },
1628
+ {
1629
+ "epoch": 4.56,
1630
+ "learning_rate": 3.2039530220567175e-05,
1631
+ "loss": 2.2084,
1632
+ "step": 13100
1633
+ },
1634
+ {
1635
+ "epoch": 4.57,
1636
+ "learning_rate": 3.193722633711176e-05,
1637
+ "loss": 2.1973,
1638
+ "step": 13150
1639
+ },
1640
+ {
1641
+ "epoch": 4.59,
1642
+ "learning_rate": 3.183492245365634e-05,
1643
+ "loss": 2.1373,
1644
+ "step": 13200
1645
+ },
1646
+ {
1647
+ "epoch": 4.61,
1648
+ "learning_rate": 3.173261857020093e-05,
1649
+ "loss": 2.1881,
1650
+ "step": 13250
1651
+ },
1652
+ {
1653
+ "epoch": 4.63,
1654
+ "learning_rate": 3.163031468674551e-05,
1655
+ "loss": 2.2398,
1656
+ "step": 13300
1657
+ },
1658
+ {
1659
+ "epoch": 4.64,
1660
+ "learning_rate": 3.152801080329009e-05,
1661
+ "loss": 2.1807,
1662
+ "step": 13350
1663
+ },
1664
+ {
1665
+ "epoch": 4.66,
1666
+ "learning_rate": 3.142570691983468e-05,
1667
+ "loss": 2.1782,
1668
+ "step": 13400
1669
+ },
1670
+ {
1671
+ "epoch": 4.68,
1672
+ "learning_rate": 3.132340303637926e-05,
1673
+ "loss": 2.215,
1674
+ "step": 13450
1675
+ },
1676
+ {
1677
+ "epoch": 4.7,
1678
+ "learning_rate": 3.1221099152923846e-05,
1679
+ "loss": 2.131,
1680
+ "step": 13500
1681
+ },
1682
+ {
1683
+ "epoch": 4.71,
1684
+ "learning_rate": 3.111879526946843e-05,
1685
+ "loss": 2.2367,
1686
+ "step": 13550
1687
+ },
1688
+ {
1689
+ "epoch": 4.73,
1690
+ "learning_rate": 3.101649138601301e-05,
1691
+ "loss": 2.2064,
1692
+ "step": 13600
1693
+ },
1694
+ {
1695
+ "epoch": 4.75,
1696
+ "learning_rate": 3.09141875025576e-05,
1697
+ "loss": 2.224,
1698
+ "step": 13650
1699
+ },
1700
+ {
1701
+ "epoch": 4.77,
1702
+ "learning_rate": 3.0811883619102184e-05,
1703
+ "loss": 2.1554,
1704
+ "step": 13700
1705
+ },
1706
+ {
1707
+ "epoch": 4.78,
1708
+ "learning_rate": 3.0709579735646764e-05,
1709
+ "loss": 2.1551,
1710
+ "step": 13750
1711
+ },
1712
+ {
1713
+ "epoch": 4.8,
1714
+ "learning_rate": 3.060727585219135e-05,
1715
+ "loss": 2.1932,
1716
+ "step": 13800
1717
+ },
1718
+ {
1719
+ "epoch": 4.82,
1720
+ "learning_rate": 3.0504971968735936e-05,
1721
+ "loss": 2.2028,
1722
+ "step": 13850
1723
+ },
1724
+ {
1725
+ "epoch": 4.83,
1726
+ "learning_rate": 3.0402668085280516e-05,
1727
+ "loss": 2.1582,
1728
+ "step": 13900
1729
+ },
1730
+ {
1731
+ "epoch": 4.85,
1732
+ "learning_rate": 3.0300364201825105e-05,
1733
+ "loss": 2.2122,
1734
+ "step": 13950
1735
+ },
1736
+ {
1737
+ "epoch": 4.87,
1738
+ "learning_rate": 3.0198060318369685e-05,
1739
+ "loss": 2.2249,
1740
+ "step": 14000
1741
+ },
1742
+ {
1743
+ "epoch": 4.89,
1744
+ "learning_rate": 3.0095756434914268e-05,
1745
+ "loss": 2.1797,
1746
+ "step": 14050
1747
+ },
1748
+ {
1749
+ "epoch": 4.9,
1750
+ "learning_rate": 2.9993452551458858e-05,
1751
+ "loss": 2.186,
1752
+ "step": 14100
1753
+ },
1754
+ {
1755
+ "epoch": 4.92,
1756
+ "learning_rate": 2.9891148668003437e-05,
1757
+ "loss": 2.2225,
1758
+ "step": 14150
1759
+ },
1760
+ {
1761
+ "epoch": 4.94,
1762
+ "learning_rate": 2.9788844784548027e-05,
1763
+ "loss": 2.1545,
1764
+ "step": 14200
1765
+ },
1766
+ {
1767
+ "epoch": 4.96,
1768
+ "learning_rate": 2.9686540901092606e-05,
1769
+ "loss": 2.2314,
1770
+ "step": 14250
1771
+ },
1772
+ {
1773
+ "epoch": 4.97,
1774
+ "learning_rate": 2.958423701763719e-05,
1775
+ "loss": 2.1955,
1776
+ "step": 14300
1777
+ },
1778
+ {
1779
+ "epoch": 4.99,
1780
+ "learning_rate": 2.9481933134181776e-05,
1781
+ "loss": 2.2307,
1782
+ "step": 14350
1783
+ },
1784
+ {
1785
+ "epoch": 5.0,
1786
+ "eval_gen_len": 19.8502,
1787
+ "eval_loss": 3.2058229446411133,
1788
+ "eval_rouge1": 16.4764,
1789
+ "eval_rouge2": 4.2906,
1790
+ "eval_rougeL": 13.3875,
1791
+ "eval_rougeLsum": 14.5223,
1792
+ "eval_runtime": 251.2531,
1793
+ "eval_samples_per_second": 9.246,
1794
+ "eval_steps_per_second": 1.158,
1795
+ "step": 14375
1796
+ },
1797
+ {
1798
+ "epoch": 5.01,
1799
+ "learning_rate": 2.937962925072636e-05,
1800
+ "loss": 2.0556,
1801
+ "step": 14400
1802
+ },
1803
+ {
1804
+ "epoch": 5.03,
1805
+ "learning_rate": 2.927732536727094e-05,
1806
+ "loss": 1.7699,
1807
+ "step": 14450
1808
+ },
1809
+ {
1810
+ "epoch": 5.04,
1811
+ "learning_rate": 2.9175021483815528e-05,
1812
+ "loss": 1.8145,
1813
+ "step": 14500
1814
+ },
1815
+ {
1816
+ "epoch": 5.06,
1817
+ "learning_rate": 2.907271760036011e-05,
1818
+ "loss": 1.8895,
1819
+ "step": 14550
1820
+ },
1821
+ {
1822
+ "epoch": 5.08,
1823
+ "learning_rate": 2.8970413716904694e-05,
1824
+ "loss": 1.8408,
1825
+ "step": 14600
1826
+ },
1827
+ {
1828
+ "epoch": 5.1,
1829
+ "learning_rate": 2.886810983344928e-05,
1830
+ "loss": 1.8626,
1831
+ "step": 14650
1832
+ },
1833
+ {
1834
+ "epoch": 5.11,
1835
+ "learning_rate": 2.8765805949993863e-05,
1836
+ "loss": 1.8605,
1837
+ "step": 14700
1838
+ },
1839
+ {
1840
+ "epoch": 5.13,
1841
+ "learning_rate": 2.8663502066538446e-05,
1842
+ "loss": 1.8738,
1843
+ "step": 14750
1844
+ },
1845
+ {
1846
+ "epoch": 5.15,
1847
+ "learning_rate": 2.8561198183083032e-05,
1848
+ "loss": 1.8194,
1849
+ "step": 14800
1850
+ },
1851
+ {
1852
+ "epoch": 5.17,
1853
+ "learning_rate": 2.8458894299627615e-05,
1854
+ "loss": 1.8558,
1855
+ "step": 14850
1856
+ },
1857
+ {
1858
+ "epoch": 5.18,
1859
+ "learning_rate": 2.83565904161722e-05,
1860
+ "loss": 1.8444,
1861
+ "step": 14900
1862
+ },
1863
+ {
1864
+ "epoch": 5.2,
1865
+ "learning_rate": 2.8254286532716784e-05,
1866
+ "loss": 1.8405,
1867
+ "step": 14950
1868
+ },
1869
+ {
1870
+ "epoch": 5.22,
1871
+ "learning_rate": 2.8151982649261367e-05,
1872
+ "loss": 1.8181,
1873
+ "step": 15000
1874
+ },
1875
+ {
1876
+ "epoch": 5.23,
1877
+ "learning_rate": 2.8049678765805954e-05,
1878
+ "loss": 1.8897,
1879
+ "step": 15050
1880
+ },
1881
+ {
1882
+ "epoch": 5.25,
1883
+ "learning_rate": 2.7947374882350537e-05,
1884
+ "loss": 1.8484,
1885
+ "step": 15100
1886
+ },
1887
+ {
1888
+ "epoch": 5.27,
1889
+ "learning_rate": 2.7845070998895116e-05,
1890
+ "loss": 1.8372,
1891
+ "step": 15150
1892
+ },
1893
+ {
1894
+ "epoch": 5.29,
1895
+ "learning_rate": 2.7742767115439706e-05,
1896
+ "loss": 1.9071,
1897
+ "step": 15200
1898
+ },
1899
+ {
1900
+ "epoch": 5.3,
1901
+ "learning_rate": 2.764046323198429e-05,
1902
+ "loss": 1.9168,
1903
+ "step": 15250
1904
+ },
1905
+ {
1906
+ "epoch": 5.32,
1907
+ "learning_rate": 2.7538159348528868e-05,
1908
+ "loss": 1.9397,
1909
+ "step": 15300
1910
+ },
1911
+ {
1912
+ "epoch": 5.34,
1913
+ "learning_rate": 2.7435855465073458e-05,
1914
+ "loss": 1.9043,
1915
+ "step": 15350
1916
+ },
1917
+ {
1918
+ "epoch": 5.36,
1919
+ "learning_rate": 2.7333551581618038e-05,
1920
+ "loss": 1.8516,
1921
+ "step": 15400
1922
+ },
1923
+ {
1924
+ "epoch": 5.37,
1925
+ "learning_rate": 2.7231247698162627e-05,
1926
+ "loss": 1.8666,
1927
+ "step": 15450
1928
+ },
1929
+ {
1930
+ "epoch": 5.39,
1931
+ "learning_rate": 2.7128943814707207e-05,
1932
+ "loss": 1.8822,
1933
+ "step": 15500
1934
+ },
1935
+ {
1936
+ "epoch": 5.41,
1937
+ "learning_rate": 2.702663993125179e-05,
1938
+ "loss": 1.9649,
1939
+ "step": 15550
1940
+ },
1941
+ {
1942
+ "epoch": 5.43,
1943
+ "learning_rate": 2.692433604779638e-05,
1944
+ "loss": 1.9319,
1945
+ "step": 15600
1946
+ },
1947
+ {
1948
+ "epoch": 5.44,
1949
+ "learning_rate": 2.682203216434096e-05,
1950
+ "loss": 1.8705,
1951
+ "step": 15650
1952
+ },
1953
+ {
1954
+ "epoch": 5.46,
1955
+ "learning_rate": 2.6719728280885542e-05,
1956
+ "loss": 1.8545,
1957
+ "step": 15700
1958
+ },
1959
+ {
1960
+ "epoch": 5.48,
1961
+ "learning_rate": 2.6617424397430128e-05,
1962
+ "loss": 1.8977,
1963
+ "step": 15750
1964
+ },
1965
+ {
1966
+ "epoch": 5.5,
1967
+ "learning_rate": 2.651512051397471e-05,
1968
+ "loss": 1.9078,
1969
+ "step": 15800
1970
+ },
1971
+ {
1972
+ "epoch": 5.51,
1973
+ "learning_rate": 2.6412816630519294e-05,
1974
+ "loss": 1.907,
1975
+ "step": 15850
1976
+ },
1977
+ {
1978
+ "epoch": 5.53,
1979
+ "learning_rate": 2.631051274706388e-05,
1980
+ "loss": 1.9019,
1981
+ "step": 15900
1982
+ },
1983
+ {
1984
+ "epoch": 5.55,
1985
+ "learning_rate": 2.6208208863608463e-05,
1986
+ "loss": 1.8915,
1987
+ "step": 15950
1988
+ },
1989
+ {
1990
+ "epoch": 5.57,
1991
+ "learning_rate": 2.6105904980153046e-05,
1992
+ "loss": 1.9043,
1993
+ "step": 16000
1994
+ },
1995
+ {
1996
+ "epoch": 5.58,
1997
+ "learning_rate": 2.6003601096697633e-05,
1998
+ "loss": 1.8562,
1999
+ "step": 16050
2000
+ },
2001
+ {
2002
+ "epoch": 5.6,
2003
+ "learning_rate": 2.5901297213242215e-05,
2004
+ "loss": 1.8799,
2005
+ "step": 16100
2006
+ },
2007
+ {
2008
+ "epoch": 5.62,
2009
+ "learning_rate": 2.5798993329786802e-05,
2010
+ "loss": 1.9325,
2011
+ "step": 16150
2012
+ },
2013
+ {
2014
+ "epoch": 5.63,
2015
+ "learning_rate": 2.5696689446331385e-05,
2016
+ "loss": 1.85,
2017
+ "step": 16200
2018
+ },
2019
+ {
2020
+ "epoch": 5.65,
2021
+ "learning_rate": 2.5594385562875968e-05,
2022
+ "loss": 1.9304,
2023
+ "step": 16250
2024
+ },
2025
+ {
2026
+ "epoch": 5.67,
2027
+ "learning_rate": 2.5492081679420554e-05,
2028
+ "loss": 1.9084,
2029
+ "step": 16300
2030
+ },
2031
+ {
2032
+ "epoch": 5.69,
2033
+ "learning_rate": 2.5389777795965137e-05,
2034
+ "loss": 1.9046,
2035
+ "step": 16350
2036
+ },
2037
+ {
2038
+ "epoch": 5.7,
2039
+ "learning_rate": 2.528747391250972e-05,
2040
+ "loss": 1.9901,
2041
+ "step": 16400
2042
+ },
2043
+ {
2044
+ "epoch": 5.72,
2045
+ "learning_rate": 2.5185170029054306e-05,
2046
+ "loss": 1.9568,
2047
+ "step": 16450
2048
+ },
2049
+ {
2050
+ "epoch": 5.74,
2051
+ "learning_rate": 2.5084912223268e-05,
2052
+ "loss": 1.8857,
2053
+ "step": 16500
2054
+ },
2055
+ {
2056
+ "epoch": 5.76,
2057
+ "learning_rate": 2.498260833981258e-05,
2058
+ "loss": 1.8836,
2059
+ "step": 16550
2060
+ },
2061
+ {
2062
+ "epoch": 5.77,
2063
+ "learning_rate": 2.4880304456357165e-05,
2064
+ "loss": 1.8819,
2065
+ "step": 16600
2066
+ },
2067
+ {
2068
+ "epoch": 5.79,
2069
+ "learning_rate": 2.477800057290175e-05,
2070
+ "loss": 1.926,
2071
+ "step": 16650
2072
+ },
2073
+ {
2074
+ "epoch": 5.81,
2075
+ "learning_rate": 2.467569668944633e-05,
2076
+ "loss": 1.8706,
2077
+ "step": 16700
2078
+ },
2079
+ {
2080
+ "epoch": 5.83,
2081
+ "learning_rate": 2.4573392805990917e-05,
2082
+ "loss": 1.9216,
2083
+ "step": 16750
2084
+ },
2085
+ {
2086
+ "epoch": 5.84,
2087
+ "learning_rate": 2.44710889225355e-05,
2088
+ "loss": 1.9101,
2089
+ "step": 16800
2090
+ },
2091
+ {
2092
+ "epoch": 5.86,
2093
+ "learning_rate": 2.4368785039080086e-05,
2094
+ "loss": 1.9195,
2095
+ "step": 16850
2096
+ },
2097
+ {
2098
+ "epoch": 5.88,
2099
+ "learning_rate": 2.426648115562467e-05,
2100
+ "loss": 1.9217,
2101
+ "step": 16900
2102
+ },
2103
+ {
2104
+ "epoch": 5.9,
2105
+ "learning_rate": 2.4164177272169252e-05,
2106
+ "loss": 1.9095,
2107
+ "step": 16950
2108
+ },
2109
+ {
2110
+ "epoch": 5.91,
2111
+ "learning_rate": 2.4061873388713838e-05,
2112
+ "loss": 1.9033,
2113
+ "step": 17000
2114
+ },
2115
+ {
2116
+ "epoch": 5.93,
2117
+ "learning_rate": 2.395956950525842e-05,
2118
+ "loss": 1.9233,
2119
+ "step": 17050
2120
+ },
2121
+ {
2122
+ "epoch": 5.95,
2123
+ "learning_rate": 2.3857265621803004e-05,
2124
+ "loss": 1.9185,
2125
+ "step": 17100
2126
+ },
2127
+ {
2128
+ "epoch": 5.97,
2129
+ "learning_rate": 2.375496173834759e-05,
2130
+ "loss": 1.9486,
2131
+ "step": 17150
2132
+ },
2133
+ {
2134
+ "epoch": 5.98,
2135
+ "learning_rate": 2.3652657854892173e-05,
2136
+ "loss": 1.9232,
2137
+ "step": 17200
2138
+ },
2139
+ {
2140
+ "epoch": 6.0,
2141
+ "learning_rate": 2.3550353971436756e-05,
2142
+ "loss": 1.8381,
2143
+ "step": 17250
2144
+ },
2145
+ {
2146
+ "epoch": 6.0,
2147
+ "eval_gen_len": 19.9681,
2148
+ "eval_loss": 3.3179376125335693,
2149
+ "eval_rouge1": 16.6764,
2150
+ "eval_rouge2": 4.4834,
2151
+ "eval_rougeL": 13.5489,
2152
+ "eval_rougeLsum": 14.6173,
2153
+ "eval_runtime": 251.1063,
2154
+ "eval_samples_per_second": 9.251,
2155
+ "eval_steps_per_second": 1.159,
2156
+ "step": 17250
2157
+ },
2158
+ {
2159
+ "epoch": 6.02,
2160
+ "learning_rate": 2.344805008798134e-05,
2161
+ "loss": 1.636,
2162
+ "step": 17300
2163
+ },
2164
+ {
2165
+ "epoch": 6.03,
2166
+ "learning_rate": 2.3345746204525926e-05,
2167
+ "loss": 1.6805,
2168
+ "step": 17350
2169
+ },
2170
+ {
2171
+ "epoch": 6.05,
2172
+ "learning_rate": 2.324344232107051e-05,
2173
+ "loss": 1.6332,
2174
+ "step": 17400
2175
+ },
2176
+ {
2177
+ "epoch": 6.07,
2178
+ "learning_rate": 2.314113843761509e-05,
2179
+ "loss": 1.6295,
2180
+ "step": 17450
2181
+ },
2182
+ {
2183
+ "epoch": 6.09,
2184
+ "learning_rate": 2.3038834554159678e-05,
2185
+ "loss": 1.6199,
2186
+ "step": 17500
2187
+ },
2188
+ {
2189
+ "epoch": 6.1,
2190
+ "learning_rate": 2.293653067070426e-05,
2191
+ "loss": 1.6603,
2192
+ "step": 17550
2193
+ },
2194
+ {
2195
+ "epoch": 6.12,
2196
+ "learning_rate": 2.2834226787248844e-05,
2197
+ "loss": 1.6277,
2198
+ "step": 17600
2199
+ },
2200
+ {
2201
+ "epoch": 6.14,
2202
+ "learning_rate": 2.273192290379343e-05,
2203
+ "loss": 1.5951,
2204
+ "step": 17650
2205
+ },
2206
+ {
2207
+ "epoch": 6.16,
2208
+ "learning_rate": 2.2629619020338013e-05,
2209
+ "loss": 1.627,
2210
+ "step": 17700
2211
+ },
2212
+ {
2213
+ "epoch": 6.17,
2214
+ "learning_rate": 2.2527315136882596e-05,
2215
+ "loss": 1.6449,
2216
+ "step": 17750
2217
+ },
2218
+ {
2219
+ "epoch": 6.19,
2220
+ "learning_rate": 2.2425011253427182e-05,
2221
+ "loss": 1.685,
2222
+ "step": 17800
2223
+ },
2224
+ {
2225
+ "epoch": 6.21,
2226
+ "learning_rate": 2.2322707369971765e-05,
2227
+ "loss": 1.6253,
2228
+ "step": 17850
2229
+ },
2230
+ {
2231
+ "epoch": 6.23,
2232
+ "learning_rate": 2.222040348651635e-05,
2233
+ "loss": 1.6401,
2234
+ "step": 17900
2235
+ },
2236
+ {
2237
+ "epoch": 6.24,
2238
+ "learning_rate": 2.211809960306093e-05,
2239
+ "loss": 1.6131,
2240
+ "step": 17950
2241
+ },
2242
+ {
2243
+ "epoch": 6.26,
2244
+ "learning_rate": 2.2015795719605517e-05,
2245
+ "loss": 1.6889,
2246
+ "step": 18000
2247
+ },
2248
+ {
2249
+ "epoch": 6.28,
2250
+ "learning_rate": 2.1913491836150104e-05,
2251
+ "loss": 1.6918,
2252
+ "step": 18050
2253
+ },
2254
+ {
2255
+ "epoch": 6.3,
2256
+ "learning_rate": 2.1811187952694686e-05,
2257
+ "loss": 1.6414,
2258
+ "step": 18100
2259
+ },
2260
+ {
2261
+ "epoch": 6.31,
2262
+ "learning_rate": 2.170888406923927e-05,
2263
+ "loss": 1.6895,
2264
+ "step": 18150
2265
+ },
2266
+ {
2267
+ "epoch": 6.33,
2268
+ "learning_rate": 2.1606580185783852e-05,
2269
+ "loss": 1.6829,
2270
+ "step": 18200
2271
+ },
2272
+ {
2273
+ "epoch": 6.35,
2274
+ "learning_rate": 2.150427630232844e-05,
2275
+ "loss": 1.6401,
2276
+ "step": 18250
2277
+ },
2278
+ {
2279
+ "epoch": 6.37,
2280
+ "learning_rate": 2.140197241887302e-05,
2281
+ "loss": 1.6438,
2282
+ "step": 18300
2283
+ },
2284
+ {
2285
+ "epoch": 6.38,
2286
+ "learning_rate": 2.1299668535417604e-05,
2287
+ "loss": 1.6432,
2288
+ "step": 18350
2289
+ },
2290
+ {
2291
+ "epoch": 6.4,
2292
+ "learning_rate": 2.119736465196219e-05,
2293
+ "loss": 1.6894,
2294
+ "step": 18400
2295
+ },
2296
+ {
2297
+ "epoch": 6.42,
2298
+ "learning_rate": 2.1095060768506774e-05,
2299
+ "loss": 1.6283,
2300
+ "step": 18450
2301
+ },
2302
+ {
2303
+ "epoch": 6.43,
2304
+ "learning_rate": 2.0992756885051357e-05,
2305
+ "loss": 1.6865,
2306
+ "step": 18500
2307
+ },
2308
+ {
2309
+ "epoch": 6.45,
2310
+ "learning_rate": 2.0890453001595943e-05,
2311
+ "loss": 1.6672,
2312
+ "step": 18550
2313
+ },
2314
+ {
2315
+ "epoch": 6.47,
2316
+ "learning_rate": 2.0790195195809632e-05,
2317
+ "loss": 1.6886,
2318
+ "step": 18600
2319
+ },
2320
+ {
2321
+ "epoch": 6.49,
2322
+ "learning_rate": 2.068789131235422e-05,
2323
+ "loss": 1.6576,
2324
+ "step": 18650
2325
+ },
2326
+ {
2327
+ "epoch": 6.5,
2328
+ "learning_rate": 2.0585587428898805e-05,
2329
+ "loss": 1.6552,
2330
+ "step": 18700
2331
+ },
2332
+ {
2333
+ "epoch": 6.52,
2334
+ "learning_rate": 2.0483283545443384e-05,
2335
+ "loss": 1.6455,
2336
+ "step": 18750
2337
+ },
2338
+ {
2339
+ "epoch": 6.54,
2340
+ "learning_rate": 2.038097966198797e-05,
2341
+ "loss": 1.6223,
2342
+ "step": 18800
2343
+ },
2344
+ {
2345
+ "epoch": 6.56,
2346
+ "learning_rate": 2.0278675778532554e-05,
2347
+ "loss": 1.6833,
2348
+ "step": 18850
2349
+ },
2350
+ {
2351
+ "epoch": 6.57,
2352
+ "learning_rate": 2.0176371895077137e-05,
2353
+ "loss": 1.6283,
2354
+ "step": 18900
2355
+ },
2356
+ {
2357
+ "epoch": 6.59,
2358
+ "learning_rate": 2.0074068011621723e-05,
2359
+ "loss": 1.6394,
2360
+ "step": 18950
2361
+ },
2362
+ {
2363
+ "epoch": 6.61,
2364
+ "learning_rate": 1.9971764128166306e-05,
2365
+ "loss": 1.7302,
2366
+ "step": 19000
2367
+ },
2368
+ {
2369
+ "epoch": 6.63,
2370
+ "learning_rate": 1.9869460244710892e-05,
2371
+ "loss": 1.6423,
2372
+ "step": 19050
2373
+ },
2374
+ {
2375
+ "epoch": 6.64,
2376
+ "learning_rate": 1.9767156361255472e-05,
2377
+ "loss": 1.6277,
2378
+ "step": 19100
2379
+ },
2380
+ {
2381
+ "epoch": 6.66,
2382
+ "learning_rate": 1.9664852477800058e-05,
2383
+ "loss": 1.6844,
2384
+ "step": 19150
2385
+ },
2386
+ {
2387
+ "epoch": 6.68,
2388
+ "learning_rate": 1.9562548594344644e-05,
2389
+ "loss": 1.6526,
2390
+ "step": 19200
2391
+ },
2392
+ {
2393
+ "epoch": 6.7,
2394
+ "learning_rate": 1.9460244710889224e-05,
2395
+ "loss": 1.6732,
2396
+ "step": 19250
2397
+ },
2398
+ {
2399
+ "epoch": 6.71,
2400
+ "learning_rate": 1.935794082743381e-05,
2401
+ "loss": 1.6854,
2402
+ "step": 19300
2403
+ },
2404
+ {
2405
+ "epoch": 6.73,
2406
+ "learning_rate": 1.9255636943978393e-05,
2407
+ "loss": 1.6902,
2408
+ "step": 19350
2409
+ },
2410
+ {
2411
+ "epoch": 6.75,
2412
+ "learning_rate": 1.915333306052298e-05,
2413
+ "loss": 1.6522,
2414
+ "step": 19400
2415
+ },
2416
+ {
2417
+ "epoch": 6.77,
2418
+ "learning_rate": 1.9051029177067562e-05,
2419
+ "loss": 1.7403,
2420
+ "step": 19450
2421
+ },
2422
+ {
2423
+ "epoch": 6.78,
2424
+ "learning_rate": 1.8948725293612145e-05,
2425
+ "loss": 1.6806,
2426
+ "step": 19500
2427
+ },
2428
+ {
2429
+ "epoch": 6.8,
2430
+ "learning_rate": 1.884642141015673e-05,
2431
+ "loss": 1.6889,
2432
+ "step": 19550
2433
+ },
2434
+ {
2435
+ "epoch": 6.82,
2436
+ "learning_rate": 1.8744117526701315e-05,
2437
+ "loss": 1.6665,
2438
+ "step": 19600
2439
+ },
2440
+ {
2441
+ "epoch": 6.83,
2442
+ "learning_rate": 1.8641813643245897e-05,
2443
+ "loss": 1.6874,
2444
+ "step": 19650
2445
+ },
2446
+ {
2447
+ "epoch": 6.85,
2448
+ "learning_rate": 1.8539509759790484e-05,
2449
+ "loss": 1.6686,
2450
+ "step": 19700
2451
+ },
2452
+ {
2453
+ "epoch": 6.87,
2454
+ "learning_rate": 1.8437205876335067e-05,
2455
+ "loss": 1.6768,
2456
+ "step": 19750
2457
+ },
2458
+ {
2459
+ "epoch": 6.89,
2460
+ "learning_rate": 1.833490199287965e-05,
2461
+ "loss": 1.6844,
2462
+ "step": 19800
2463
+ },
2464
+ {
2465
+ "epoch": 6.9,
2466
+ "learning_rate": 1.8232598109424236e-05,
2467
+ "loss": 1.6395,
2468
+ "step": 19850
2469
+ },
2470
+ {
2471
+ "epoch": 6.92,
2472
+ "learning_rate": 1.813029422596882e-05,
2473
+ "loss": 1.6785,
2474
+ "step": 19900
2475
+ },
2476
+ {
2477
+ "epoch": 6.94,
2478
+ "learning_rate": 1.8027990342513405e-05,
2479
+ "loss": 1.705,
2480
+ "step": 19950
2481
+ },
2482
+ {
2483
+ "epoch": 6.96,
2484
+ "learning_rate": 1.7925686459057985e-05,
2485
+ "loss": 1.6897,
2486
+ "step": 20000
2487
+ },
2488
+ {
2489
+ "epoch": 6.97,
2490
+ "learning_rate": 1.782338257560257e-05,
2491
+ "loss": 1.6934,
2492
+ "step": 20050
2493
+ },
2494
+ {
2495
+ "epoch": 6.99,
2496
+ "learning_rate": 1.7721078692147154e-05,
2497
+ "loss": 1.6203,
2498
+ "step": 20100
2499
+ },
2500
+ {
2501
+ "epoch": 7.0,
2502
+ "eval_gen_len": 19.9105,
2503
+ "eval_loss": 3.476348638534546,
2504
+ "eval_rouge1": 17.0434,
2505
+ "eval_rouge2": 4.5045,
2506
+ "eval_rougeL": 13.8329,
2507
+ "eval_rougeLsum": 14.9286,
2508
+ "eval_runtime": 251.3557,
2509
+ "eval_samples_per_second": 9.242,
2510
+ "eval_steps_per_second": 1.158,
2511
+ "step": 20125
2512
+ },
2513
+ {
2514
+ "epoch": 7.01,
2515
+ "learning_rate": 1.7618774808691737e-05,
2516
+ "loss": 1.5313,
2517
+ "step": 20150
2518
+ },
2519
+ {
2520
+ "epoch": 7.03,
2521
+ "learning_rate": 1.7516470925236323e-05,
2522
+ "loss": 1.4404,
2523
+ "step": 20200
2524
+ },
2525
+ {
2526
+ "epoch": 7.04,
2527
+ "learning_rate": 1.7414167041780906e-05,
2528
+ "loss": 1.4907,
2529
+ "step": 20250
2530
+ },
2531
+ {
2532
+ "epoch": 7.06,
2533
+ "learning_rate": 1.7311863158325493e-05,
2534
+ "loss": 1.4881,
2535
+ "step": 20300
2536
+ },
2537
+ {
2538
+ "epoch": 7.08,
2539
+ "learning_rate": 1.7209559274870075e-05,
2540
+ "loss": 1.4657,
2541
+ "step": 20350
2542
+ },
2543
+ {
2544
+ "epoch": 7.1,
2545
+ "learning_rate": 1.710725539141466e-05,
2546
+ "loss": 1.4654,
2547
+ "step": 20400
2548
+ },
2549
+ {
2550
+ "epoch": 7.11,
2551
+ "learning_rate": 1.7004951507959245e-05,
2552
+ "loss": 1.465,
2553
+ "step": 20450
2554
+ },
2555
+ {
2556
+ "epoch": 7.13,
2557
+ "learning_rate": 1.6902647624503824e-05,
2558
+ "loss": 1.4669,
2559
+ "step": 20500
2560
+ },
2561
+ {
2562
+ "epoch": 7.15,
2563
+ "learning_rate": 1.680034374104841e-05,
2564
+ "loss": 1.4409,
2565
+ "step": 20550
2566
+ },
2567
+ {
2568
+ "epoch": 7.17,
2569
+ "learning_rate": 1.6698039857592997e-05,
2570
+ "loss": 1.4553,
2571
+ "step": 20600
2572
+ },
2573
+ {
2574
+ "epoch": 7.18,
2575
+ "learning_rate": 1.659573597413758e-05,
2576
+ "loss": 1.4388,
2577
+ "step": 20650
2578
+ },
2579
+ {
2580
+ "epoch": 7.2,
2581
+ "learning_rate": 1.6493432090682163e-05,
2582
+ "loss": 1.4827,
2583
+ "step": 20700
2584
+ },
2585
+ {
2586
+ "epoch": 7.22,
2587
+ "learning_rate": 1.6391128207226746e-05,
2588
+ "loss": 1.4422,
2589
+ "step": 20750
2590
+ },
2591
+ {
2592
+ "epoch": 7.23,
2593
+ "learning_rate": 1.629087040144044e-05,
2594
+ "loss": 1.5225,
2595
+ "step": 20800
2596
+ },
2597
+ {
2598
+ "epoch": 7.25,
2599
+ "learning_rate": 1.6188566517985025e-05,
2600
+ "loss": 1.4795,
2601
+ "step": 20850
2602
+ },
2603
+ {
2604
+ "epoch": 7.27,
2605
+ "learning_rate": 1.6086262634529608e-05,
2606
+ "loss": 1.4375,
2607
+ "step": 20900
2608
+ },
2609
+ {
2610
+ "epoch": 7.29,
2611
+ "learning_rate": 1.598395875107419e-05,
2612
+ "loss": 1.4506,
2613
+ "step": 20950
2614
+ },
2615
+ {
2616
+ "epoch": 7.3,
2617
+ "learning_rate": 1.5881654867618777e-05,
2618
+ "loss": 1.4734,
2619
+ "step": 21000
2620
+ },
2621
+ {
2622
+ "epoch": 7.32,
2623
+ "learning_rate": 1.577935098416336e-05,
2624
+ "loss": 1.483,
2625
+ "step": 21050
2626
+ },
2627
+ {
2628
+ "epoch": 7.34,
2629
+ "learning_rate": 1.5677047100707943e-05,
2630
+ "loss": 1.4546,
2631
+ "step": 21100
2632
+ },
2633
+ {
2634
+ "epoch": 7.36,
2635
+ "learning_rate": 1.5574743217252526e-05,
2636
+ "loss": 1.4732,
2637
+ "step": 21150
2638
+ },
2639
+ {
2640
+ "epoch": 7.37,
2641
+ "learning_rate": 1.5472439333797112e-05,
2642
+ "loss": 1.4479,
2643
+ "step": 21200
2644
+ },
2645
+ {
2646
+ "epoch": 7.39,
2647
+ "learning_rate": 1.5370135450341698e-05,
2648
+ "loss": 1.4968,
2649
+ "step": 21250
2650
+ },
2651
+ {
2652
+ "epoch": 7.41,
2653
+ "learning_rate": 1.5267831566886278e-05,
2654
+ "loss": 1.5115,
2655
+ "step": 21300
2656
+ },
2657
+ {
2658
+ "epoch": 7.43,
2659
+ "learning_rate": 1.5165527683430864e-05,
2660
+ "loss": 1.4405,
2661
+ "step": 21350
2662
+ },
2663
+ {
2664
+ "epoch": 7.44,
2665
+ "learning_rate": 1.5063223799975449e-05,
2666
+ "loss": 1.4919,
2667
+ "step": 21400
2668
+ },
2669
+ {
2670
+ "epoch": 7.46,
2671
+ "learning_rate": 1.4960919916520033e-05,
2672
+ "loss": 1.4872,
2673
+ "step": 21450
2674
+ },
2675
+ {
2676
+ "epoch": 7.48,
2677
+ "learning_rate": 1.4858616033064615e-05,
2678
+ "loss": 1.4497,
2679
+ "step": 21500
2680
+ },
2681
+ {
2682
+ "epoch": 7.5,
2683
+ "learning_rate": 1.47563121496092e-05,
2684
+ "loss": 1.5039,
2685
+ "step": 21550
2686
+ },
2687
+ {
2688
+ "epoch": 7.51,
2689
+ "learning_rate": 1.4654008266153786e-05,
2690
+ "loss": 1.4613,
2691
+ "step": 21600
2692
+ },
2693
+ {
2694
+ "epoch": 7.53,
2695
+ "learning_rate": 1.4551704382698367e-05,
2696
+ "loss": 1.441,
2697
+ "step": 21650
2698
+ },
2699
+ {
2700
+ "epoch": 7.55,
2701
+ "learning_rate": 1.4449400499242951e-05,
2702
+ "loss": 1.4831,
2703
+ "step": 21700
2704
+ },
2705
+ {
2706
+ "epoch": 7.57,
2707
+ "learning_rate": 1.4347096615787536e-05,
2708
+ "loss": 1.4237,
2709
+ "step": 21750
2710
+ },
2711
+ {
2712
+ "epoch": 7.58,
2713
+ "learning_rate": 1.424479273233212e-05,
2714
+ "loss": 1.4776,
2715
+ "step": 21800
2716
+ },
2717
+ {
2718
+ "epoch": 7.6,
2719
+ "learning_rate": 1.4142488848876704e-05,
2720
+ "loss": 1.4167,
2721
+ "step": 21850
2722
+ },
2723
+ {
2724
+ "epoch": 7.62,
2725
+ "learning_rate": 1.4040184965421288e-05,
2726
+ "loss": 1.4623,
2727
+ "step": 21900
2728
+ },
2729
+ {
2730
+ "epoch": 7.63,
2731
+ "learning_rate": 1.3937881081965873e-05,
2732
+ "loss": 1.4778,
2733
+ "step": 21950
2734
+ },
2735
+ {
2736
+ "epoch": 7.65,
2737
+ "learning_rate": 1.3835577198510454e-05,
2738
+ "loss": 1.4511,
2739
+ "step": 22000
2740
+ },
2741
+ {
2742
+ "epoch": 7.67,
2743
+ "learning_rate": 1.373327331505504e-05,
2744
+ "loss": 1.465,
2745
+ "step": 22050
2746
+ },
2747
+ {
2748
+ "epoch": 7.69,
2749
+ "learning_rate": 1.3630969431599625e-05,
2750
+ "loss": 1.4614,
2751
+ "step": 22100
2752
+ },
2753
+ {
2754
+ "epoch": 7.7,
2755
+ "learning_rate": 1.352866554814421e-05,
2756
+ "loss": 1.4325,
2757
+ "step": 22150
2758
+ },
2759
+ {
2760
+ "epoch": 7.72,
2761
+ "learning_rate": 1.342636166468879e-05,
2762
+ "loss": 1.4482,
2763
+ "step": 22200
2764
+ },
2765
+ {
2766
+ "epoch": 7.74,
2767
+ "learning_rate": 1.3324057781233375e-05,
2768
+ "loss": 1.5127,
2769
+ "step": 22250
2770
+ },
2771
+ {
2772
+ "epoch": 7.76,
2773
+ "learning_rate": 1.322175389777796e-05,
2774
+ "loss": 1.4358,
2775
+ "step": 22300
2776
  },
2777
  {
2778
+ "epoch": 7.77,
2779
+ "learning_rate": 1.3119450014322543e-05,
2780
+ "loss": 1.457,
2781
+ "step": 22350
 
 
 
 
 
 
 
2782
  },
2783
  {
2784
+ "epoch": 7.79,
2785
+ "learning_rate": 1.3017146130867128e-05,
2786
+ "loss": 1.5029,
2787
+ "step": 22400
2788
  },
2789
  {
2790
+ "epoch": 7.81,
2791
+ "learning_rate": 1.2914842247411712e-05,
2792
+ "loss": 1.4424,
2793
+ "step": 22450
2794
  },
2795
  {
2796
+ "epoch": 7.83,
2797
+ "learning_rate": 1.2812538363956297e-05,
2798
+ "loss": 1.5116,
2799
+ "step": 22500
2800
  },
2801
  {
2802
+ "epoch": 7.84,
2803
+ "learning_rate": 1.271023448050088e-05,
2804
+ "loss": 1.4955,
2805
+ "step": 22550
 
 
 
 
 
 
 
2806
  },
2807
  {
2808
+ "epoch": 7.86,
2809
+ "learning_rate": 1.2607930597045464e-05,
2810
+ "loss": 1.5251,
2811
+ "step": 22600
2812
  },
2813
  {
2814
+ "epoch": 7.88,
2815
+ "learning_rate": 1.2505626713590049e-05,
2816
+ "loss": 1.5159,
2817
+ "step": 22650
2818
  },
2819
  {
2820
+ "epoch": 7.9,
2821
+ "learning_rate": 1.2403322830134632e-05,
2822
+ "loss": 1.4945,
2823
+ "step": 22700
 
 
 
 
 
 
 
2824
  },
2825
  {
2826
+ "epoch": 7.91,
2827
+ "learning_rate": 1.2301018946679217e-05,
2828
+ "loss": 1.4622,
2829
+ "step": 22750
2830
  },
2831
  {
2832
+ "epoch": 7.93,
2833
+ "learning_rate": 1.2198715063223801e-05,
2834
+ "loss": 1.4704,
2835
+ "step": 22800
2836
  },
2837
  {
2838
+ "epoch": 7.95,
2839
+ "learning_rate": 1.2096411179768384e-05,
2840
+ "loss": 1.548,
2841
+ "step": 22850
2842
  },
2843
  {
2844
+ "epoch": 7.97,
2845
+ "learning_rate": 1.1994107296312969e-05,
2846
+ "loss": 1.4293,
2847
+ "step": 22900
 
 
 
 
 
 
 
2848
  },
2849
  {
2850
+ "epoch": 7.98,
2851
+ "learning_rate": 1.1891803412857552e-05,
2852
+ "loss": 1.4121,
2853
+ "step": 22950
2854
  },
2855
  {
2856
+ "epoch": 8.0,
2857
+ "learning_rate": 1.1791545607071244e-05,
2858
+ "loss": 1.4982,
2859
+ "step": 23000
2860
  },
2861
  {
2862
+ "epoch": 8.0,
2863
+ "eval_gen_len": 19.9539,
2864
+ "eval_loss": 3.6031477451324463,
2865
+ "eval_rouge1": 17.0044,
2866
+ "eval_rouge2": 4.7727,
2867
+ "eval_rougeL": 13.8743,
2868
+ "eval_rougeLsum": 14.9683,
2869
+ "eval_runtime": 251.1177,
2870
+ "eval_samples_per_second": 9.251,
2871
+ "eval_steps_per_second": 1.159,
2872
+ "step": 23000
2873
  },
2874
  {
2875
+ "epoch": 8.02,
2876
+ "learning_rate": 1.1689241723615829e-05,
2877
+ "loss": 1.3034,
2878
+ "step": 23050
2879
  },
2880
  {
2881
+ "epoch": 8.03,
2882
+ "learning_rate": 1.1586937840160412e-05,
2883
+ "loss": 1.2528,
2884
+ "step": 23100
2885
  },
2886
  {
2887
+ "epoch": 8.05,
2888
+ "learning_rate": 1.1484633956704997e-05,
2889
+ "loss": 1.3111,
2890
+ "step": 23150
2891
  },
2892
  {
2893
+ "epoch": 8.07,
2894
+ "learning_rate": 1.1382330073249581e-05,
2895
+ "loss": 1.2992,
2896
+ "step": 23200
 
 
 
 
 
 
 
2897
  },
2898
  {
2899
+ "epoch": 8.09,
2900
+ "learning_rate": 1.1280026189794166e-05,
2901
+ "loss": 1.3479,
2902
+ "step": 23250
2903
  },
2904
  {
2905
+ "epoch": 8.1,
2906
+ "learning_rate": 1.1177722306338749e-05,
2907
+ "loss": 1.2799,
2908
+ "step": 23300
2909
  },
2910
  {
2911
+ "epoch": 8.12,
2912
+ "learning_rate": 1.1075418422883333e-05,
2913
+ "loss": 1.3115,
2914
+ "step": 23350
 
 
 
 
 
 
 
2915
  },
2916
  {
2917
+ "epoch": 8.14,
2918
+ "learning_rate": 1.0973114539427918e-05,
2919
+ "loss": 1.3176,
2920
+ "step": 23400
2921
  },
2922
  {
2923
+ "epoch": 8.16,
2924
+ "learning_rate": 1.0870810655972501e-05,
2925
+ "loss": 1.325,
2926
+ "step": 23450
2927
  },
2928
  {
2929
+ "epoch": 8.17,
2930
+ "learning_rate": 1.0768506772517086e-05,
2931
+ "loss": 1.3137,
2932
+ "step": 23500
2933
  },
2934
  {
2935
+ "epoch": 8.19,
2936
+ "learning_rate": 1.0666202889061668e-05,
2937
+ "loss": 1.3652,
2938
+ "step": 23550
2939
+ },
2940
+ {
2941
+ "epoch": 8.21,
2942
+ "learning_rate": 1.0563899005606253e-05,
2943
+ "loss": 1.2918,
2944
+ "step": 23600
2945
+ },
2946
+ {
2947
+ "epoch": 8.23,
2948
+ "learning_rate": 1.0461595122150838e-05,
2949
+ "loss": 1.3667,
2950
+ "step": 23650
2951
+ },
2952
+ {
2953
+ "epoch": 8.24,
2954
+ "learning_rate": 1.0359291238695422e-05,
2955
+ "loss": 1.3175,
2956
+ "step": 23700
2957
+ },
2958
+ {
2959
+ "epoch": 8.26,
2960
+ "learning_rate": 1.0256987355240005e-05,
2961
+ "loss": 1.32,
2962
+ "step": 23750
2963
+ },
2964
+ {
2965
+ "epoch": 8.28,
2966
+ "learning_rate": 1.015468347178459e-05,
2967
+ "loss": 1.3254,
2968
+ "step": 23800
2969
+ },
2970
+ {
2971
+ "epoch": 8.3,
2972
+ "learning_rate": 1.0054425665998283e-05,
2973
+ "loss": 1.3535,
2974
+ "step": 23850
2975
+ },
2976
+ {
2977
+ "epoch": 8.31,
2978
+ "learning_rate": 9.952121782542866e-06,
2979
+ "loss": 1.2927,
2980
+ "step": 23900
2981
+ },
2982
+ {
2983
+ "epoch": 8.33,
2984
+ "learning_rate": 9.84981789908745e-06,
2985
+ "loss": 1.3337,
2986
+ "step": 23950
2987
+ },
2988
+ {
2989
+ "epoch": 8.35,
2990
+ "learning_rate": 9.747514015632033e-06,
2991
+ "loss": 1.3132,
2992
+ "step": 24000
2993
+ },
2994
+ {
2995
+ "epoch": 8.37,
2996
+ "learning_rate": 9.645210132176618e-06,
2997
+ "loss": 1.3396,
2998
+ "step": 24050
2999
+ },
3000
+ {
3001
+ "epoch": 8.38,
3002
+ "learning_rate": 9.542906248721202e-06,
3003
+ "loss": 1.3197,
3004
+ "step": 24100
3005
  },
3006
  {
3007
  "epoch": 8.4,
3008
+ "learning_rate": 9.440602365265785e-06,
3009
+ "loss": 1.2531,
3010
+ "step": 24150
3011
+ },
3012
+ {
3013
+ "epoch": 8.42,
3014
+ "learning_rate": 9.33829848181037e-06,
3015
+ "loss": 1.315,
3016
+ "step": 24200
3017
+ },
3018
+ {
3019
+ "epoch": 8.43,
3020
+ "learning_rate": 9.235994598354955e-06,
3021
+ "loss": 1.3129,
3022
+ "step": 24250
3023
+ },
3024
+ {
3025
+ "epoch": 8.45,
3026
+ "learning_rate": 9.133690714899539e-06,
3027
+ "loss": 1.3048,
3028
+ "step": 24300
3029
+ },
3030
+ {
3031
+ "epoch": 8.47,
3032
+ "learning_rate": 9.031386831444122e-06,
3033
+ "loss": 1.3205,
3034
+ "step": 24350
3035
+ },
3036
+ {
3037
+ "epoch": 8.49,
3038
+ "learning_rate": 8.929082947988707e-06,
3039
+ "loss": 1.3407,
3040
+ "step": 24400
3041
+ },
3042
+ {
3043
+ "epoch": 8.5,
3044
+ "learning_rate": 8.82677906453329e-06,
3045
+ "loss": 1.3455,
3046
+ "step": 24450
3047
+ },
3048
+ {
3049
+ "epoch": 8.52,
3050
+ "learning_rate": 8.724475181077874e-06,
3051
+ "loss": 1.3333,
3052
+ "step": 24500
3053
+ },
3054
+ {
3055
+ "epoch": 8.54,
3056
+ "learning_rate": 8.622171297622459e-06,
3057
+ "loss": 1.3236,
3058
+ "step": 24550
3059
+ },
3060
+ {
3061
+ "epoch": 8.56,
3062
+ "learning_rate": 8.519867414167042e-06,
3063
+ "loss": 1.3537,
3064
+ "step": 24600
3065
+ },
3066
+ {
3067
+ "epoch": 8.57,
3068
+ "learning_rate": 8.417563530711626e-06,
3069
+ "loss": 1.302,
3070
+ "step": 24650
3071
+ },
3072
+ {
3073
+ "epoch": 8.59,
3074
+ "learning_rate": 8.31525964725621e-06,
3075
+ "loss": 1.2704,
3076
+ "step": 24700
3077
+ },
3078
+ {
3079
+ "epoch": 8.61,
3080
+ "learning_rate": 8.212955763800794e-06,
3081
+ "loss": 1.2953,
3082
+ "step": 24750
3083
+ },
3084
+ {
3085
+ "epoch": 8.63,
3086
+ "learning_rate": 8.110651880345379e-06,
3087
+ "loss": 1.3311,
3088
+ "step": 24800
3089
+ },
3090
+ {
3091
+ "epoch": 8.64,
3092
+ "learning_rate": 8.008347996889963e-06,
3093
+ "loss": 1.2794,
3094
+ "step": 24850
3095
+ },
3096
+ {
3097
+ "epoch": 8.66,
3098
+ "learning_rate": 7.906044113434546e-06,
3099
+ "loss": 1.3408,
3100
+ "step": 24900
3101
+ },
3102
+ {
3103
+ "epoch": 8.68,
3104
+ "learning_rate": 7.803740229979129e-06,
3105
+ "loss": 1.298,
3106
+ "step": 24950
3107
+ },
3108
+ {
3109
+ "epoch": 8.7,
3110
+ "learning_rate": 7.701436346523715e-06,
3111
+ "loss": 1.2529,
3112
+ "step": 25000
3113
+ },
3114
+ {
3115
+ "epoch": 8.71,
3116
+ "learning_rate": 7.599132463068298e-06,
3117
+ "loss": 1.3221,
3118
+ "step": 25050
3119
+ },
3120
+ {
3121
+ "epoch": 8.73,
3122
+ "learning_rate": 7.496828579612883e-06,
3123
+ "loss": 1.3254,
3124
+ "step": 25100
3125
+ },
3126
+ {
3127
+ "epoch": 8.75,
3128
+ "learning_rate": 7.394524696157467e-06,
3129
+ "loss": 1.2636,
3130
+ "step": 25150
3131
+ },
3132
+ {
3133
+ "epoch": 8.77,
3134
+ "learning_rate": 7.292220812702051e-06,
3135
+ "loss": 1.4183,
3136
+ "step": 25200
3137
+ },
3138
+ {
3139
+ "epoch": 8.78,
3140
+ "learning_rate": 7.189916929246634e-06,
3141
+ "loss": 1.3473,
3142
+ "step": 25250
3143
  },
3144
  {
3145
  "epoch": 8.8,
3146
+ "learning_rate": 7.087613045791218e-06,
3147
+ "loss": 1.3512,
3148
+ "step": 25300
3149
+ },
3150
+ {
3151
+ "epoch": 8.82,
3152
+ "learning_rate": 6.985309162335803e-06,
3153
+ "loss": 1.3251,
3154
+ "step": 25350
3155
+ },
3156
+ {
3157
+ "epoch": 8.83,
3158
+ "learning_rate": 6.8830052788803864e-06,
3159
+ "loss": 1.3251,
3160
+ "step": 25400
3161
+ },
3162
+ {
3163
+ "epoch": 8.85,
3164
+ "learning_rate": 6.780701395424971e-06,
3165
+ "loss": 1.2679,
3166
+ "step": 25450
3167
+ },
3168
+ {
3169
+ "epoch": 8.87,
3170
+ "learning_rate": 6.678397511969554e-06,
3171
+ "loss": 1.2999,
3172
+ "step": 25500
3173
+ },
3174
+ {
3175
+ "epoch": 8.89,
3176
+ "learning_rate": 6.5760936285141395e-06,
3177
+ "loss": 1.3313,
3178
+ "step": 25550
3179
+ },
3180
+ {
3181
+ "epoch": 8.9,
3182
+ "learning_rate": 6.473789745058722e-06,
3183
+ "loss": 1.2963,
3184
+ "step": 25600
3185
+ },
3186
+ {
3187
+ "epoch": 8.92,
3188
+ "learning_rate": 6.371485861603307e-06,
3189
+ "loss": 1.2849,
3190
+ "step": 25650
3191
+ },
3192
+ {
3193
+ "epoch": 8.94,
3194
+ "learning_rate": 6.269181978147891e-06,
3195
+ "loss": 1.3351,
3196
+ "step": 25700
3197
+ },
3198
+ {
3199
+ "epoch": 8.96,
3200
+ "learning_rate": 6.1668780946924746e-06,
3201
+ "loss": 1.3182,
3202
+ "step": 25750
3203
+ },
3204
+ {
3205
+ "epoch": 8.97,
3206
+ "learning_rate": 6.064574211237059e-06,
3207
+ "loss": 1.3212,
3208
+ "step": 25800
3209
+ },
3210
+ {
3211
+ "epoch": 8.99,
3212
+ "learning_rate": 5.962270327781643e-06,
3213
+ "loss": 1.3385,
3214
+ "step": 25850
3215
  },
3216
  {
3217
  "epoch": 9.0,
3218
+ "eval_gen_len": 19.8291,
3219
+ "eval_loss": 3.7051403522491455,
3220
+ "eval_rouge1": 17.0903,
3221
+ "eval_rouge2": 4.5413,
3222
+ "eval_rougeL": 13.8897,
3223
+ "eval_rougeLsum": 15.0091,
3224
+ "eval_runtime": 251.3799,
3225
+ "eval_samples_per_second": 9.241,
3226
+ "eval_steps_per_second": 1.158,
3227
+ "step": 25875
3228
+ },
3229
+ {
3230
+ "epoch": 9.01,
3231
+ "learning_rate": 5.859966444326227e-06,
3232
+ "loss": 1.2874,
3233
+ "step": 25900
3234
+ },
3235
+ {
3236
+ "epoch": 9.03,
3237
+ "learning_rate": 5.7576625608708105e-06,
3238
+ "loss": 1.2224,
3239
+ "step": 25950
3240
+ },
3241
+ {
3242
+ "epoch": 9.04,
3243
+ "learning_rate": 5.655358677415395e-06,
3244
+ "loss": 1.1688,
3245
+ "step": 26000
3246
+ },
3247
+ {
3248
+ "epoch": 9.06,
3249
+ "learning_rate": 5.553054793959979e-06,
3250
+ "loss": 1.1576,
3251
+ "step": 26050
3252
+ },
3253
+ {
3254
+ "epoch": 9.08,
3255
+ "learning_rate": 5.450750910504563e-06,
3256
+ "loss": 1.2135,
3257
+ "step": 26100
3258
+ },
3259
+ {
3260
+ "epoch": 9.1,
3261
+ "learning_rate": 5.348447027049147e-06,
3262
+ "loss": 1.2315,
3263
+ "step": 26150
3264
+ },
3265
+ {
3266
+ "epoch": 9.11,
3267
+ "learning_rate": 5.246143143593731e-06,
3268
+ "loss": 1.1559,
3269
+ "step": 26200
3270
+ },
3271
+ {
3272
+ "epoch": 9.13,
3273
+ "learning_rate": 5.143839260138316e-06,
3274
+ "loss": 1.2484,
3275
+ "step": 26250
3276
+ },
3277
+ {
3278
+ "epoch": 9.15,
3279
+ "learning_rate": 5.0415353766828995e-06,
3280
+ "loss": 1.2101,
3281
+ "step": 26300
3282
+ },
3283
+ {
3284
+ "epoch": 9.17,
3285
+ "learning_rate": 4.939231493227483e-06,
3286
+ "loss": 1.2117,
3287
+ "step": 26350
3288
+ },
3289
+ {
3290
+ "epoch": 9.18,
3291
+ "learning_rate": 4.836927609772067e-06,
3292
+ "loss": 1.2629,
3293
+ "step": 26400
3294
  },
3295
  {
3296
  "epoch": 9.2,
3297
+ "learning_rate": 4.734623726316651e-06,
3298
+ "loss": 1.2024,
3299
+ "step": 26450
3300
+ },
3301
+ {
3302
+ "epoch": 9.22,
3303
+ "learning_rate": 4.6323198428612354e-06,
3304
+ "loss": 1.1946,
3305
+ "step": 26500
3306
+ },
3307
+ {
3308
+ "epoch": 9.23,
3309
+ "learning_rate": 4.530015959405819e-06,
3310
+ "loss": 1.2109,
3311
+ "step": 26550
3312
+ },
3313
+ {
3314
+ "epoch": 9.25,
3315
+ "learning_rate": 4.427712075950404e-06,
3316
+ "loss": 1.2544,
3317
+ "step": 26600
3318
+ },
3319
+ {
3320
+ "epoch": 9.27,
3321
+ "learning_rate": 4.325408192494988e-06,
3322
+ "loss": 1.1986,
3323
+ "step": 26650
3324
+ },
3325
+ {
3326
+ "epoch": 9.29,
3327
+ "learning_rate": 4.223104309039571e-06,
3328
+ "loss": 1.2237,
3329
+ "step": 26700
3330
+ },
3331
+ {
3332
+ "epoch": 9.3,
3333
+ "learning_rate": 4.120800425584155e-06,
3334
+ "loss": 1.1966,
3335
+ "step": 26750
3336
+ },
3337
+ {
3338
+ "epoch": 9.32,
3339
+ "learning_rate": 4.018496542128739e-06,
3340
+ "loss": 1.247,
3341
+ "step": 26800
3342
+ },
3343
+ {
3344
+ "epoch": 9.34,
3345
+ "learning_rate": 3.9161926586733236e-06,
3346
+ "loss": 1.2958,
3347
+ "step": 26850
3348
+ },
3349
+ {
3350
+ "epoch": 9.36,
3351
+ "learning_rate": 3.8138887752179073e-06,
3352
+ "loss": 1.1975,
3353
+ "step": 26900
3354
+ },
3355
+ {
3356
+ "epoch": 9.37,
3357
+ "learning_rate": 3.7115848917624915e-06,
3358
+ "loss": 1.2126,
3359
+ "step": 26950
3360
+ },
3361
+ {
3362
+ "epoch": 9.39,
3363
+ "learning_rate": 3.6092810083070757e-06,
3364
+ "loss": 1.177,
3365
+ "step": 27000
3366
+ },
3367
+ {
3368
+ "epoch": 9.41,
3369
+ "learning_rate": 3.50697712485166e-06,
3370
+ "loss": 1.2372,
3371
+ "step": 27050
3372
+ },
3373
+ {
3374
+ "epoch": 9.43,
3375
+ "learning_rate": 3.4046732413962437e-06,
3376
+ "loss": 1.1955,
3377
+ "step": 27100
3378
+ },
3379
+ {
3380
+ "epoch": 9.44,
3381
+ "learning_rate": 3.302369357940827e-06,
3382
+ "loss": 1.226,
3383
+ "step": 27150
3384
+ },
3385
+ {
3386
+ "epoch": 9.46,
3387
+ "learning_rate": 3.2000654744854113e-06,
3388
+ "loss": 1.2704,
3389
+ "step": 27200
3390
+ },
3391
+ {
3392
+ "epoch": 9.48,
3393
+ "learning_rate": 3.0977615910299955e-06,
3394
+ "loss": 1.1625,
3395
+ "step": 27250
3396
+ },
3397
+ {
3398
+ "epoch": 9.5,
3399
+ "learning_rate": 2.9954577075745797e-06,
3400
+ "loss": 1.2163,
3401
+ "step": 27300
3402
+ },
3403
+ {
3404
+ "epoch": 9.51,
3405
+ "learning_rate": 2.893153824119164e-06,
3406
+ "loss": 1.1937,
3407
+ "step": 27350
3408
+ },
3409
+ {
3410
+ "epoch": 9.53,
3411
+ "learning_rate": 2.7908499406637476e-06,
3412
+ "loss": 1.1698,
3413
+ "step": 27400
3414
+ },
3415
+ {
3416
+ "epoch": 9.55,
3417
+ "learning_rate": 2.6885460572083314e-06,
3418
+ "loss": 1.1571,
3419
+ "step": 27450
3420
+ },
3421
+ {
3422
+ "epoch": 9.57,
3423
+ "learning_rate": 2.5862421737529156e-06,
3424
+ "loss": 1.1881,
3425
+ "step": 27500
3426
+ },
3427
+ {
3428
+ "epoch": 9.58,
3429
+ "learning_rate": 2.4839382902975e-06,
3430
+ "loss": 1.25,
3431
+ "step": 27550
3432
  },
3433
  {
3434
  "epoch": 9.6,
3435
+ "learning_rate": 2.381634406842084e-06,
3436
+ "loss": 1.1645,
3437
+ "step": 27600
3438
+ },
3439
+ {
3440
+ "epoch": 9.62,
3441
+ "learning_rate": 2.2793305233866682e-06,
3442
+ "loss": 1.1695,
3443
+ "step": 27650
3444
+ },
3445
+ {
3446
+ "epoch": 9.63,
3447
+ "learning_rate": 2.177026639931252e-06,
3448
+ "loss": 1.1629,
3449
+ "step": 27700
3450
+ },
3451
+ {
3452
+ "epoch": 9.65,
3453
+ "learning_rate": 2.0747227564758358e-06,
3454
+ "loss": 1.2161,
3455
+ "step": 27750
3456
+ },
3457
+ {
3458
+ "epoch": 9.67,
3459
+ "learning_rate": 1.97241887302042e-06,
3460
+ "loss": 1.2465,
3461
+ "step": 27800
3462
+ },
3463
+ {
3464
+ "epoch": 9.69,
3465
+ "learning_rate": 1.8701149895650042e-06,
3466
+ "loss": 1.1754,
3467
+ "step": 27850
3468
+ },
3469
+ {
3470
+ "epoch": 9.7,
3471
+ "learning_rate": 1.767811106109588e-06,
3472
+ "loss": 1.1823,
3473
+ "step": 27900
3474
+ },
3475
+ {
3476
+ "epoch": 9.72,
3477
+ "learning_rate": 1.665507222654172e-06,
3478
+ "loss": 1.2259,
3479
+ "step": 27950
3480
+ },
3481
+ {
3482
+ "epoch": 9.74,
3483
+ "learning_rate": 1.5632033391987561e-06,
3484
+ "loss": 1.2193,
3485
+ "step": 28000
3486
+ },
3487
+ {
3488
+ "epoch": 9.76,
3489
+ "learning_rate": 1.4608994557433401e-06,
3490
+ "loss": 1.2087,
3491
+ "step": 28050
3492
+ },
3493
+ {
3494
+ "epoch": 9.77,
3495
+ "learning_rate": 1.3585955722879241e-06,
3496
+ "loss": 1.2028,
3497
+ "step": 28100
3498
+ },
3499
+ {
3500
+ "epoch": 9.79,
3501
+ "learning_rate": 1.256291688832508e-06,
3502
+ "loss": 1.2146,
3503
+ "step": 28150
3504
+ },
3505
+ {
3506
+ "epoch": 9.81,
3507
+ "learning_rate": 1.1560338830462006e-06,
3508
+ "loss": 1.226,
3509
+ "step": 28200
3510
+ },
3511
+ {
3512
+ "epoch": 9.83,
3513
+ "learning_rate": 1.0537299995907844e-06,
3514
+ "loss": 1.1566,
3515
+ "step": 28250
3516
+ },
3517
+ {
3518
+ "epoch": 9.84,
3519
+ "learning_rate": 9.514261161353686e-07,
3520
+ "loss": 1.1818,
3521
+ "step": 28300
3522
+ },
3523
+ {
3524
+ "epoch": 9.86,
3525
+ "learning_rate": 8.491222326799525e-07,
3526
+ "loss": 1.1817,
3527
+ "step": 28350
3528
+ },
3529
+ {
3530
+ "epoch": 9.88,
3531
+ "learning_rate": 7.468183492245366e-07,
3532
+ "loss": 1.2089,
3533
+ "step": 28400
3534
+ },
3535
+ {
3536
+ "epoch": 9.9,
3537
+ "learning_rate": 6.445144657691206e-07,
3538
+ "loss": 1.199,
3539
+ "step": 28450
3540
+ },
3541
+ {
3542
+ "epoch": 9.91,
3543
+ "learning_rate": 5.422105823137046e-07,
3544
+ "loss": 1.2147,
3545
+ "step": 28500
3546
+ },
3547
+ {
3548
+ "epoch": 9.93,
3549
+ "learning_rate": 4.399066988582887e-07,
3550
+ "loss": 1.2246,
3551
+ "step": 28550
3552
+ },
3553
+ {
3554
+ "epoch": 9.95,
3555
+ "learning_rate": 3.376028154028727e-07,
3556
+ "loss": 1.1782,
3557
+ "step": 28600
3558
+ },
3559
+ {
3560
+ "epoch": 9.97,
3561
+ "learning_rate": 2.3529893194745673e-07,
3562
+ "loss": 1.2261,
3563
+ "step": 28650
3564
+ },
3565
+ {
3566
+ "epoch": 9.98,
3567
+ "learning_rate": 1.3299504849204075e-07,
3568
+ "loss": 1.1608,
3569
+ "step": 28700
3570
  },
3571
  {
3572
  "epoch": 10.0,
3573
+ "learning_rate": 3.069116503662479e-08,
3574
+ "loss": 1.2211,
3575
+ "step": 28750
3576
  },
3577
  {
3578
  "epoch": 10.0,
3579
+ "eval_gen_len": 19.9066,
3580
+ "eval_loss": 3.790048360824585,
3581
+ "eval_rouge1": 16.7843,
3582
+ "eval_rouge2": 4.4907,
3583
+ "eval_rougeL": 13.6418,
3584
+ "eval_rougeLsum": 14.7366,
3585
+ "eval_runtime": 251.7779,
3586
+ "eval_samples_per_second": 9.226,
3587
+ "eval_steps_per_second": 1.156,
3588
+ "step": 28750
3589
  },
3590
  {
3591
  "epoch": 10.0,
3592
+ "step": 28750,
3593
+ "total_flos": 5.4143051427446784e+17,
3594
+ "train_loss": 2.145213280321204,
3595
+ "train_runtime": 23488.7209,
3596
+ "train_samples_per_second": 9.792,
3597
+ "train_steps_per_second": 1.224
3598
  }
3599
  ],
3600
+ "max_steps": 28750,
3601
  "num_train_epochs": 10,
3602
+ "total_flos": 5.4143051427446784e+17,
3603
  "trial_name": null,
3604
  "trial_params": null
3605
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70f5cbd5c3fdf1dff1e73912d6b5f5e3bf6950bf344cc4feff7de3eec81290ac
3
  size 2799
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b18db3123e11bca3b4df4077ecd882047ff189fe4bd30dc91442fdc0308aa5b0
3
  size 2799