jflotz commited on
Commit
e00ada0
·
1 Parent(s): 27b8fd7

Training in progress, step 910000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15d8bbdd8e9ea2eabe38aad0708eba010797097fc97de2bc108dfae7de70eae1
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286afc89953ac85c207ddee00ff74005ad2f262dcd53bfa64635a387b524e2f5
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:083d2e25ea0ac8e88690f7365c08a3344ba965c5f4549f033918008711bc1651
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b52f3b8fcfa70b1731fae94d573cc6b63207a962d882488f83af9b17655c7c7
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa624c96f92eaad028a188cf78acc34c38cdc88db165dfecd04176965e65555e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68bd93c5df85b9ef6b8dfb004005413abc49b194d979c692716ee25211f1498f
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.68044789935648,
5
- "global_step": 900000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -18006,11 +18006,211 @@
18006
  "eval_samples_per_second": 854.803,
18007
  "eval_steps_per_second": 13.397,
18008
  "step": 900000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18009
  }
18010
  ],
18011
  "max_steps": 1000000,
18012
  "num_train_epochs": 12,
18013
- "total_flos": 6.3089742047036024e+22,
18014
  "trial_name": null,
18015
  "trial_params": null
18016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.791976623579403,
5
+ "global_step": 910000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
18006
  "eval_samples_per_second": 854.803,
18007
  "eval_steps_per_second": 13.397,
18008
  "step": 900000
18009
+ },
18010
+ {
18011
+ "epoch": 9.69,
18012
+ "learning_rate": 1.3755301948017599e-05,
18013
+ "loss": 0.1822,
18014
+ "step": 900500
18015
+ },
18016
+ {
18017
+ "epoch": 9.69,
18018
+ "learning_rate": 1.3717991925554562e-05,
18019
+ "loss": 0.1821,
18020
+ "step": 901000
18021
+ },
18022
+ {
18023
+ "epoch": 9.69,
18024
+ "eval_loss": 0.17332369089126587,
18025
+ "eval_runtime": 2.5587,
18026
+ "eval_samples_per_second": 897.731,
18027
+ "eval_steps_per_second": 14.07,
18028
+ "step": 901000
18029
+ },
18030
+ {
18031
+ "epoch": 9.7,
18032
+ "learning_rate": 1.368086311557062e-05,
18033
+ "loss": 0.1821,
18034
+ "step": 901500
18035
+ },
18036
+ {
18037
+ "epoch": 9.7,
18038
+ "learning_rate": 1.3643915619574529e-05,
18039
+ "loss": 0.1821,
18040
+ "step": 902000
18041
+ },
18042
+ {
18043
+ "epoch": 9.7,
18044
+ "eval_loss": 0.1713598072528839,
18045
+ "eval_runtime": 2.7303,
18046
+ "eval_samples_per_second": 841.295,
18047
+ "eval_steps_per_second": 13.185,
18048
+ "step": 902000
18049
+ },
18050
+ {
18051
+ "epoch": 9.71,
18052
+ "learning_rate": 1.3607149538579341e-05,
18053
+ "loss": 0.182,
18054
+ "step": 902500
18055
+ },
18056
+ {
18057
+ "epoch": 9.71,
18058
+ "learning_rate": 1.35705649731021e-05,
18059
+ "loss": 0.1816,
18060
+ "step": 903000
18061
+ },
18062
+ {
18063
+ "epoch": 9.71,
18064
+ "eval_loss": 0.17178992927074432,
18065
+ "eval_runtime": 2.6316,
18066
+ "eval_samples_per_second": 872.853,
18067
+ "eval_steps_per_second": 13.68,
18068
+ "step": 903000
18069
+ },
18070
+ {
18071
+ "epoch": 9.72,
18072
+ "learning_rate": 1.3534162023163642e-05,
18073
+ "loss": 0.1821,
18074
+ "step": 903500
18075
+ },
18076
+ {
18077
+ "epoch": 9.73,
18078
+ "learning_rate": 1.3497940788288195e-05,
18079
+ "loss": 0.182,
18080
+ "step": 904000
18081
+ },
18082
+ {
18083
+ "epoch": 9.73,
18084
+ "eval_loss": 0.17401227355003357,
18085
+ "eval_runtime": 2.6109,
18086
+ "eval_samples_per_second": 879.785,
18087
+ "eval_steps_per_second": 13.789,
18088
+ "step": 904000
18089
+ },
18090
+ {
18091
+ "epoch": 9.73,
18092
+ "learning_rate": 1.3461901367503262e-05,
18093
+ "loss": 0.1816,
18094
+ "step": 904500
18095
+ },
18096
+ {
18097
+ "epoch": 9.74,
18098
+ "learning_rate": 1.3426043859339253e-05,
18099
+ "loss": 0.1822,
18100
+ "step": 905000
18101
+ },
18102
+ {
18103
+ "epoch": 9.74,
18104
+ "eval_loss": 0.16998076438903809,
18105
+ "eval_runtime": 2.6306,
18106
+ "eval_samples_per_second": 873.188,
18107
+ "eval_steps_per_second": 13.685,
18108
+ "step": 905000
18109
+ },
18110
+ {
18111
+ "epoch": 9.74,
18112
+ "learning_rate": 1.3390368361829197e-05,
18113
+ "loss": 0.1818,
18114
+ "step": 905500
18115
+ },
18116
+ {
18117
+ "epoch": 9.75,
18118
+ "learning_rate": 1.3354874972508582e-05,
18119
+ "loss": 0.1815,
18120
+ "step": 906000
18121
+ },
18122
+ {
18123
+ "epoch": 9.75,
18124
+ "eval_loss": 0.17435437440872192,
18125
+ "eval_runtime": 2.7558,
18126
+ "eval_samples_per_second": 833.52,
18127
+ "eval_steps_per_second": 13.063,
18128
+ "step": 906000
18129
+ },
18130
+ {
18131
+ "epoch": 9.75,
18132
+ "learning_rate": 1.3319563788414934e-05,
18133
+ "loss": 0.182,
18134
+ "step": 906500
18135
+ },
18136
+ {
18137
+ "epoch": 9.76,
18138
+ "learning_rate": 1.3284434906087695e-05,
18139
+ "loss": 0.1823,
18140
+ "step": 907000
18141
+ },
18142
+ {
18143
+ "epoch": 9.76,
18144
+ "eval_loss": 0.17397646605968475,
18145
+ "eval_runtime": 2.6707,
18146
+ "eval_samples_per_second": 860.067,
18147
+ "eval_steps_per_second": 13.479,
18148
+ "step": 907000
18149
+ },
18150
+ {
18151
+ "epoch": 9.76,
18152
+ "learning_rate": 1.3249488421567911e-05,
18153
+ "loss": 0.1818,
18154
+ "step": 907500
18155
+ },
18156
+ {
18157
+ "epoch": 9.77,
18158
+ "learning_rate": 1.3214724430397915e-05,
18159
+ "loss": 0.1817,
18160
+ "step": 908000
18161
+ },
18162
+ {
18163
+ "epoch": 9.77,
18164
+ "eval_loss": 0.17298473417758942,
18165
+ "eval_runtime": 2.659,
18166
+ "eval_samples_per_second": 863.865,
18167
+ "eval_steps_per_second": 13.539,
18168
+ "step": 908000
18169
+ },
18170
+ {
18171
+ "epoch": 9.78,
18172
+ "learning_rate": 1.3180143027621145e-05,
18173
+ "loss": 0.1819,
18174
+ "step": 908500
18175
+ },
18176
+ {
18177
+ "epoch": 9.78,
18178
+ "learning_rate": 1.314574430778182e-05,
18179
+ "loss": 0.1817,
18180
+ "step": 909000
18181
+ },
18182
+ {
18183
+ "epoch": 9.78,
18184
+ "eval_loss": 0.17148981988430023,
18185
+ "eval_runtime": 2.7078,
18186
+ "eval_samples_per_second": 848.287,
18187
+ "eval_steps_per_second": 13.295,
18188
+ "step": 909000
18189
+ },
18190
+ {
18191
+ "epoch": 9.79,
18192
+ "learning_rate": 1.311152836492473e-05,
18193
+ "loss": 0.1817,
18194
+ "step": 909500
18195
+ },
18196
+ {
18197
+ "epoch": 9.79,
18198
+ "learning_rate": 1.3077495292594966e-05,
18199
+ "loss": 0.1817,
18200
+ "step": 910000
18201
+ },
18202
+ {
18203
+ "epoch": 9.79,
18204
+ "eval_loss": 0.17322474718093872,
18205
+ "eval_runtime": 2.6678,
18206
+ "eval_samples_per_second": 861.024,
18207
+ "eval_steps_per_second": 13.495,
18208
+ "step": 910000
18209
  }
18210
  ],
18211
  "max_steps": 1000000,
18212
  "num_train_epochs": 12,
18213
+ "total_flos": 6.3790744657339496e+22,
18214
  "trial_name": null,
18215
  "trial_params": null
18216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:083d2e25ea0ac8e88690f7365c08a3344ba965c5f4549f033918008711bc1651
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b52f3b8fcfa70b1731fae94d573cc6b63207a962d882488f83af9b17655c7c7
3
  size 449471589