dq158 commited on
Commit
1435cc4
·
1 Parent(s): f25e7a0

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46abff71e7026647c05fbaf7f699e8ee421c0a2e584b794f2fb0c530d4b1f4b4
3
  size 1980859973
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6050f38120b0e933aecb28cd28fb9c7925ebc96441d3fad1965b4adf303d89fd
3
  size 1980859973
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95635d8117e24e5efeb681934dc5313da771c7b857de60f1ac43e8245d9484ab
3
  size 990408885
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a7994a97f599475aa5874b645c4945821ebf2eb2b2f90ad0971c97bb6cef71b
3
  size 990408885
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4844a5d656111932c3ceada711e9cbb272e49c0cfedd858f395737c72b4c6992
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d79d8043ba95baf61ac1e0e9419f144c2c0f76428640c86753d23d96ba746f8
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5371d45631fd7dd6529335dfaa6166f363409fc22e7123a04a9a24603e02dcb6
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9508142e5058d0d70d25ee8e25948935ec2c9917db374dac6856edde91f4a56
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.6461355686187744,
3
- "best_model_checkpoint": "dq158/morbius/checkpoint-6084",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 6084,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -117,13 +117,68 @@
117
  "eval_steps_per_second": 1.822,
118
  "eval_translation_length": 22165,
119
  "step": 6084
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  }
121
  ],
122
  "logging_steps": 500,
123
  "max_steps": 15210,
124
  "num_train_epochs": 5,
125
  "save_steps": 500,
126
- "total_flos": 4.998043915517952e+16,
127
  "trial_name": null,
128
  "trial_params": null
129
  }
 
1
  {
2
+ "best_metric": 2.596039056777954,
3
+ "best_model_checkpoint": "dq158/morbius/checkpoint-9126",
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 9126,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
117
  "eval_steps_per_second": 1.822,
118
  "eval_translation_length": 22165,
119
  "step": 6084
120
+ },
121
+ {
122
+ "epoch": 2.14,
123
+ "learning_rate": 2.863247863247863e-05,
124
+ "loss": 2.6801,
125
+ "step": 6500
126
+ },
127
+ {
128
+ "epoch": 2.3,
129
+ "learning_rate": 2.69888231426693e-05,
130
+ "loss": 2.6995,
131
+ "step": 7000
132
+ },
133
+ {
134
+ "epoch": 2.47,
135
+ "learning_rate": 2.5345167652859964e-05,
136
+ "loss": 2.6345,
137
+ "step": 7500
138
+ },
139
+ {
140
+ "epoch": 2.63,
141
+ "learning_rate": 2.3701512163050626e-05,
142
+ "loss": 2.6573,
143
+ "step": 8000
144
+ },
145
+ {
146
+ "epoch": 2.79,
147
+ "learning_rate": 2.205785667324129e-05,
148
+ "loss": 2.629,
149
+ "step": 8500
150
+ },
151
+ {
152
+ "epoch": 2.96,
153
+ "learning_rate": 2.0414201183431952e-05,
154
+ "loss": 2.6619,
155
+ "step": 9000
156
+ },
157
+ {
158
+ "epoch": 3.0,
159
+ "eval_bleu": 0.02507600348121833,
160
+ "eval_brevity_penalty": 0.3107319508436834,
161
+ "eval_length_ratio": 0.4610792338199308,
162
+ "eval_loss": 2.596039056777954,
163
+ "eval_precisions": [
164
+ 0.19751109484375715,
165
+ 0.10842087523172855,
166
+ 0.04568934157875591,
167
+ 0.043348126741665806
168
+ ],
169
+ "eval_reference_length": 47404,
170
+ "eval_runtime": 182.5266,
171
+ "eval_samples_per_second": 22.221,
172
+ "eval_steps_per_second": 1.852,
173
+ "eval_translation_length": 21857,
174
+ "step": 9126
175
  }
176
  ],
177
  "logging_steps": 500,
178
  "max_steps": 15210,
179
  "num_train_epochs": 5,
180
  "save_steps": 500,
181
+ "total_flos": 7.497065873276928e+16,
182
  "trial_name": null,
183
  "trial_params": null
184
  }