Yahiael1 commited on
Commit
cc73a11
·
1 Parent(s): bb1173a

Training in progress, epoch 2

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77ee03a1d0311a29793f120677b5bfc69e1e8781bc96853c8e9c88c8166c16e4
3
  size 1115579397
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1351bd3bf566cfe6d40151bfcbbf3bccee762d4f23ee53f6d2d73d2a5f861a64
3
  size 1115579397
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64abc4de3991cbdfd5ff0c316e2fac01273b5b67804a7c101f49a609279180f1
3
  size 557971229
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e95e8c522521c337928967e212665cfb3206fe4eb5ec06a2bb1372b913d6fc0
3
  size 557971229
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a45b7b77b12af6e89720c582cf4778d8f486d6946f3e82947292c4d5038406f7
3
  size 15597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:154076bccd841f8ac822307d4f056a539bae87f1cf1cd71150f9a588ea1896b8
3
  size 15597
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03c25bfe5cf11a212342871a1fefb1e71d639bd1297dd40b8d896a9d5808de6b
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fe54476cab8566f02dbdb424711234a968a086cef865765d8cdb7f900b7dbb6
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0801195a6d147316da17a810626a833a27ae6da4b2ee1058dbb32488537477c
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81fe517ff574cd498c420225231c23c454eca4639d3d380b67b10c0d2f673ad4
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
- "global_step": 5534,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -98,11 +98,116 @@
98
  "learning_rate": 0.00015035236718467654,
99
  "loss": 2.0776,
100
  "step": 5500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  }
102
  ],
103
  "max_steps": 22136,
104
  "num_train_epochs": 4,
105
- "total_flos": 6.744738452594688e+16,
106
  "trial_name": null,
107
  "trial_params": null
108
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "global_step": 11068,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
98
  "learning_rate": 0.00015035236718467654,
99
  "loss": 2.0776,
100
  "step": 5500
101
+ },
102
+ {
103
+ "epoch": 1.08,
104
+ "learning_rate": 0.0001458348391760029,
105
+ "loss": 1.7518,
106
+ "step": 6000
107
+ },
108
+ {
109
+ "epoch": 1.08,
110
+ "eval_gen_len": 19.6188,
111
+ "eval_loss": 1.6170521974563599,
112
+ "eval_rouge1": 1.4387,
113
+ "eval_rouge2": 1.2076,
114
+ "eval_rougeL": 1.4167,
115
+ "eval_rougeLsum": 1.4222,
116
+ "eval_runtime": 3886.2514,
117
+ "eval_samples_per_second": 7.119,
118
+ "eval_steps_per_second": 0.356,
119
+ "step": 6000
120
+ },
121
+ {
122
+ "epoch": 1.17,
123
+ "learning_rate": 0.00014131731116732925,
124
+ "loss": 1.7539,
125
+ "step": 6500
126
+ },
127
+ {
128
+ "epoch": 1.26,
129
+ "learning_rate": 0.00013680881821467292,
130
+ "loss": 1.7379,
131
+ "step": 7000
132
+ },
133
+ {
134
+ "epoch": 1.36,
135
+ "learning_rate": 0.0001322912902059993,
136
+ "loss": 1.7458,
137
+ "step": 7500
138
+ },
139
+ {
140
+ "epoch": 1.45,
141
+ "learning_rate": 0.00012777376219732562,
142
+ "loss": 1.7498,
143
+ "step": 8000
144
+ },
145
+ {
146
+ "epoch": 1.45,
147
+ "eval_gen_len": 19.549,
148
+ "eval_loss": 1.5690149068832397,
149
+ "eval_rouge1": 1.6999,
150
+ "eval_rouge2": 1.4397,
151
+ "eval_rougeL": 1.6688,
152
+ "eval_rougeLsum": 1.6791,
153
+ "eval_runtime": 3827.5328,
154
+ "eval_samples_per_second": 7.228,
155
+ "eval_steps_per_second": 0.362,
156
+ "step": 8000
157
+ },
158
+ {
159
+ "epoch": 1.54,
160
+ "learning_rate": 0.00012325623418865198,
161
+ "loss": 1.7506,
162
+ "step": 8500
163
+ },
164
+ {
165
+ "epoch": 1.63,
166
+ "learning_rate": 0.00011873870617997833,
167
+ "loss": 1.7461,
168
+ "step": 9000
169
+ },
170
+ {
171
+ "epoch": 1.72,
172
+ "learning_rate": 0.00011422117817130467,
173
+ "loss": 1.7558,
174
+ "step": 9500
175
+ },
176
+ {
177
+ "epoch": 1.81,
178
+ "learning_rate": 0.00010970365016263103,
179
+ "loss": 1.7326,
180
+ "step": 10000
181
+ },
182
+ {
183
+ "epoch": 1.81,
184
+ "eval_gen_len": 19.4797,
185
+ "eval_loss": 1.5457514524459839,
186
+ "eval_rouge1": 1.6783,
187
+ "eval_rouge2": 1.3966,
188
+ "eval_rougeL": 1.6484,
189
+ "eval_rougeLsum": 1.658,
190
+ "eval_runtime": 3817.3069,
191
+ "eval_samples_per_second": 7.247,
192
+ "eval_steps_per_second": 0.363,
193
+ "step": 10000
194
+ },
195
+ {
196
+ "epoch": 1.9,
197
+ "learning_rate": 0.00010518612215395735,
198
+ "loss": 1.7658,
199
+ "step": 10500
200
+ },
201
+ {
202
+ "epoch": 1.99,
203
+ "learning_rate": 0.00010067762920130106,
204
+ "loss": 1.7348,
205
+ "step": 11000
206
  }
207
  ],
208
  "max_steps": 22136,
209
  "num_train_epochs": 4,
210
+ "total_flos": 1.348859886087168e+17,
211
  "trial_name": null,
212
  "trial_params": null
213
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64abc4de3991cbdfd5ff0c316e2fac01273b5b67804a7c101f49a609279180f1
3
  size 557971229
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e95e8c522521c337928967e212665cfb3206fe4eb5ec06a2bb1372b913d6fc0
3
  size 557971229