jflotz commited on
Commit
26be54c
·
1 Parent(s): d0a731f

Training in progress, step 850000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2f81c933b26cfeb60d53ba82d975294e2c7358973e2715677db9ca7fd31945d
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e84ef55f585409a7eb44e502edc32a6f98749bb849f76b8b16d08a3dc37d8ae
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80793dabaaad0486a9c6c7d32363ba477ae4d785ddabb640496bf016359dc491
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a461e16ba87e1e69720d854eafd25f8ca62064d88d546c07f46eddd6113b782
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c09ff3d1788e565e5a086e252ccf0ede212b045e4e5f4392a44c6ea6f0987dd6
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ac44739d68ab8d92ea75cbaa95e05b08c8692ab1a77edef6cb58ac613b56151
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e058e2238a38cee98eacc9109fd883ace95c4833f253ace4bd37e2704c0fe5af
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1df68ed20ea8916573dab6835f47431337c9ff47e9296153749cafb56a0237a7
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9431567146b6a803c38f6863bbd8c9115e688967dc8f725b32605962fde389b3
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77f291c8b37b0aa8ca2df1a663842fb970bdeff54c0b2afea5e2500f1ec3c566
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65f8725dca368138ac60071ebf1967a52a0bdc41ecaaff24531fe8b99b9ccb52
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21eb46d9347a0d809f89248620dfe17276a72e359d7ca7fa7d35b0bd105aa105
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2eda6d197ffa3fe6958e94125c5fa0490a4afe5ac2f8a51ad2a4931b09364f04
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:351c206e5b3b4ed149381ec4e0dbf48f45d70d99336ae5535af82fc3c087c11c
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa3a1be70ad51e1c8a5b547f1989a2e92d51a9ec27c3a4490875ff9354ff3dda
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aa383878cd306edcfd23540112b984f51a1f84d02ae2848e2f7b0c8c85af935
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bc51de7c0af3e5027c4a852a232459cf39ee9a71ea51b7603a1f5327ee5a020
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63948dedd46aa19ac12a8709dee2f12fa168b63bee773e75612f1919ff39b2f0
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4a63b1c76ff6ed1a203a2dff4664ff326fc59ea9cbb507ef4f3897d7810fb84
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ec45b14f87f8f42299fc33d4fabbcc588674c41ff3399499af6ac36481288d5
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90777e10c619a178822dbd35785dbd74396ff21ef94c6855b7e97b44a2c700b9
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:208c2830fa39459dcf60f5b94c27151cbc48e3dc8fdf884186b8d5be230917b9
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2192265686657473,
5
- "global_step": 840000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10086,11 +10086,131 @@
10086
  "learning_rate": 1.9572046332969825e-05,
10087
  "loss": 0.2881,
10088
  "step": 840000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10089
  }
10090
  ],
10091
  "max_steps": 1000000,
10092
  "num_train_epochs": 2,
10093
- "total_flos": 5.678984079326211e+22,
10094
  "trial_name": null,
10095
  "trial_params": null
10096
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2391562567262697,
5
+ "global_step": 850000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10086
  "learning_rate": 1.9572046332969825e-05,
10087
  "loss": 0.2881,
10088
  "step": 840000
10089
+ },
10090
+ {
10091
+ "epoch": 1.22,
10092
+ "learning_rate": 1.95137059427344e-05,
10093
+ "loss": 0.2884,
10094
+ "step": 840500
10095
+ },
10096
+ {
10097
+ "epoch": 1.22,
10098
+ "learning_rate": 1.945553091971727e-05,
10099
+ "loss": 0.2883,
10100
+ "step": 841000
10101
+ },
10102
+ {
10103
+ "epoch": 1.22,
10104
+ "learning_rate": 1.93975214229667e-05,
10105
+ "loss": 0.2885,
10106
+ "step": 841500
10107
+ },
10108
+ {
10109
+ "epoch": 1.22,
10110
+ "learning_rate": 1.933967761107847e-05,
10111
+ "loss": 0.2877,
10112
+ "step": 842000
10113
+ },
10114
+ {
10115
+ "epoch": 1.22,
10116
+ "learning_rate": 1.928199964219533e-05,
10117
+ "loss": 0.2876,
10118
+ "step": 842500
10119
+ },
10120
+ {
10121
+ "epoch": 1.23,
10122
+ "learning_rate": 1.9224487674006694e-05,
10123
+ "loss": 0.2873,
10124
+ "step": 843000
10125
+ },
10126
+ {
10127
+ "epoch": 1.23,
10128
+ "learning_rate": 1.9167141863748015e-05,
10129
+ "loss": 0.288,
10130
+ "step": 843500
10131
+ },
10132
+ {
10133
+ "epoch": 1.23,
10134
+ "learning_rate": 1.9109962368200602e-05,
10135
+ "loss": 0.2874,
10136
+ "step": 844000
10137
+ },
10138
+ {
10139
+ "epoch": 1.23,
10140
+ "learning_rate": 1.9052949343690977e-05,
10141
+ "loss": 0.2884,
10142
+ "step": 844500
10143
+ },
10144
+ {
10145
+ "epoch": 1.23,
10146
+ "learning_rate": 1.8996102946090586e-05,
10147
+ "loss": 0.2874,
10148
+ "step": 845000
10149
+ },
10150
+ {
10151
+ "epoch": 1.23,
10152
+ "learning_rate": 1.8939423330815345e-05,
10153
+ "loss": 0.2879,
10154
+ "step": 845500
10155
+ },
10156
+ {
10157
+ "epoch": 1.23,
10158
+ "learning_rate": 1.888291065282509e-05,
10159
+ "loss": 0.2872,
10160
+ "step": 846000
10161
+ },
10162
+ {
10163
+ "epoch": 1.23,
10164
+ "learning_rate": 1.882656506662338e-05,
10165
+ "loss": 0.2882,
10166
+ "step": 846500
10167
+ },
10168
+ {
10169
+ "epoch": 1.23,
10170
+ "learning_rate": 1.8770386726256865e-05,
10171
+ "loss": 0.2875,
10172
+ "step": 847000
10173
+ },
10174
+ {
10175
+ "epoch": 1.23,
10176
+ "learning_rate": 1.8714375785315006e-05,
10177
+ "loss": 0.2871,
10178
+ "step": 847500
10179
+ },
10180
+ {
10181
+ "epoch": 1.24,
10182
+ "learning_rate": 1.8658532396929565e-05,
10183
+ "loss": 0.2872,
10184
+ "step": 848000
10185
+ },
10186
+ {
10187
+ "epoch": 1.24,
10188
+ "learning_rate": 1.8602856713774208e-05,
10189
+ "loss": 0.2874,
10190
+ "step": 848500
10191
+ },
10192
+ {
10193
+ "epoch": 1.24,
10194
+ "learning_rate": 1.8547348888064178e-05,
10195
+ "loss": 0.2869,
10196
+ "step": 849000
10197
+ },
10198
+ {
10199
+ "epoch": 1.24,
10200
+ "learning_rate": 1.8492009071555703e-05,
10201
+ "loss": 0.2873,
10202
+ "step": 849500
10203
+ },
10204
+ {
10205
+ "epoch": 1.24,
10206
+ "learning_rate": 1.8436837415545772e-05,
10207
+ "loss": 0.2874,
10208
+ "step": 850000
10209
  }
10210
  ],
10211
  "max_steps": 1000000,
10212
  "num_train_epochs": 2,
10213
+ "total_flos": 5.746589779150674e+22,
10214
  "trial_name": null,
10215
  "trial_params": null
10216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80793dabaaad0486a9c6c7d32363ba477ae4d785ddabb640496bf016359dc491
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a461e16ba87e1e69720d854eafd25f8ca62064d88d546c07f46eddd6113b782
3
  size 449450757