jflotz commited on
Commit
89c8e94
·
1 Parent(s): 966ea8f

Training in progress, step 930000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85d7ab430aa8c54263516e5c658c95ef97b9bc952ec5cf5e5365b30e31306997
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46ff1d8d7d94b8fc5390751564ccb419f91a9858db9bdfd9f11bebe5113b944a
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:117dc2055f7a26952ace82c418351bb27b2cbd09036de7085a25da82b27081c1
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d2bac07d166f73980c3c9cce6825ce7a1c1f5f22b97d0264ce5e7ba42eeb3df
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e2032da347399ee9901067558672d5410fcaa09ee842d38aba65b094b37a736
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:523ad7de15419f9770d9e7768264983ffe3ed3b6b2e41d3411de9307e3568dda
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b09f2ef3a0afa1ae47cb3dd253fe80e29739af6753655094c50bdfd3d8ca9758
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f02b76320d64a6c1fa36467fd16fb842d38967c6960f680698d2b6c46fa9ddb
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8eac4809b99b679dbbfdc011b63659991a28862c1d22f4f38d4dcdd1375d8975
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb4ba7042a42dc2dddfda89081c338c759af082271bf3d6b2249d374dfb5549f
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac2c7ce896174bcaadeefd72a00d81f719974e36a24b565ddbefaaf2ca16bb3f
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d492de85ba9ca1e7e895ebf249111dbc1e669c10034dc3697642d6b066377bc6
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a667e3ff228e320fd542f2c9810b24c5de42e01882b1675c0130a2b28728da55
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87bf88b63314cf7e6bc76f627a1120e94df3871d06e2e6a10b31a22c360f0003
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29a7714196dcafb8ce267314792ddd65aaf3d14c2545f4d588e79110e3a9dbdd
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b861dd3cac874664255fa5c36d008a64ba119b36b6461f68fec95ff34294b32
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:286d7db0b43398757bd21b81f8821f59e9aec9025866e41358d6eb3d2324815f
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b64737ac04d5942009955008d940bd1d1005db5a2c22b7982f258d011e0ad8cb
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e4dda75c800acb3eea07d56b537e9a8a3fcee9f9f4cb599f95d990328e10c0f
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bb15e8b762074a6cf94106e13146d7507cbb10c383c4eadc6b62f549780de27
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cf0117689e0aa6f4558cab47b77e379a54989becb8f5536e911f2ec4fd9adb7
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8bf7fa4ff5edd25c6aca18d0779a09bdac81be773b8a02595b2a01362d92297
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3786640731499271,
5
- "global_step": 920000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -11046,11 +11046,131 @@
11046
  "learning_rate": 1.24353762805856e-05,
11047
  "loss": 0.2847,
11048
  "step": 920000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11049
  }
11050
  ],
11051
  "max_steps": 1000000,
11052
  "num_train_epochs": 2,
11053
- "total_flos": 6.219855439259451e+22,
11054
  "trial_name": null,
11055
  "trial_params": null
11056
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3985937612104495,
5
+ "global_step": 930000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
11046
  "learning_rate": 1.24353762805856e-05,
11047
  "loss": 0.2847,
11048
  "step": 920000
11049
+ },
11050
+ {
11051
+ "epoch": 1.38,
11052
+ "learning_rate": 1.240520421060586e-05,
11053
+ "loss": 0.2843,
11054
+ "step": 920500
11055
+ },
11056
+ {
11057
+ "epoch": 1.38,
11058
+ "learning_rate": 1.2375216942216713e-05,
11059
+ "loss": 0.2848,
11060
+ "step": 921000
11061
+ },
11062
+ {
11063
+ "epoch": 1.38,
11064
+ "learning_rate": 1.2345414557402198e-05,
11065
+ "loss": 0.2848,
11066
+ "step": 921500
11067
+ },
11068
+ {
11069
+ "epoch": 1.38,
11070
+ "learning_rate": 1.2315797137640906e-05,
11071
+ "loss": 0.2843,
11072
+ "step": 922000
11073
+ },
11074
+ {
11075
+ "epoch": 1.38,
11076
+ "learning_rate": 1.2286364763905723e-05,
11077
+ "loss": 0.2847,
11078
+ "step": 922500
11079
+ },
11080
+ {
11081
+ "epoch": 1.38,
11082
+ "learning_rate": 1.225711751666363e-05,
11083
+ "loss": 0.2841,
11084
+ "step": 923000
11085
+ },
11086
+ {
11087
+ "epoch": 1.39,
11088
+ "learning_rate": 1.2228055475875488e-05,
11089
+ "loss": 0.285,
11090
+ "step": 923500
11091
+ },
11092
+ {
11093
+ "epoch": 1.39,
11094
+ "learning_rate": 1.2199178720995825e-05,
11095
+ "loss": 0.2845,
11096
+ "step": 924000
11097
+ },
11098
+ {
11099
+ "epoch": 1.39,
11100
+ "learning_rate": 1.217048733097256e-05,
11101
+ "loss": 0.2843,
11102
+ "step": 924500
11103
+ },
11104
+ {
11105
+ "epoch": 1.39,
11106
+ "learning_rate": 1.2141981384246874e-05,
11107
+ "loss": 0.2845,
11108
+ "step": 925000
11109
+ },
11110
+ {
11111
+ "epoch": 1.39,
11112
+ "learning_rate": 1.211366095875293e-05,
11113
+ "loss": 0.2849,
11114
+ "step": 925500
11115
+ },
11116
+ {
11117
+ "epoch": 1.39,
11118
+ "learning_rate": 1.2085526131917685e-05,
11119
+ "loss": 0.2847,
11120
+ "step": 926000
11121
+ },
11122
+ {
11123
+ "epoch": 1.39,
11124
+ "learning_rate": 1.2057576980660691e-05,
11125
+ "loss": 0.2841,
11126
+ "step": 926500
11127
+ },
11128
+ {
11129
+ "epoch": 1.39,
11130
+ "learning_rate": 1.2029813581393866e-05,
11131
+ "loss": 0.2847,
11132
+ "step": 927000
11133
+ },
11134
+ {
11135
+ "epoch": 1.39,
11136
+ "learning_rate": 1.2002236010021269e-05,
11137
+ "loss": 0.2849,
11138
+ "step": 927500
11139
+ },
11140
+ {
11141
+ "epoch": 1.39,
11142
+ "learning_rate": 1.197484434193893e-05,
11143
+ "loss": 0.2842,
11144
+ "step": 928000
11145
+ },
11146
+ {
11147
+ "epoch": 1.4,
11148
+ "learning_rate": 1.1947638652034617e-05,
11149
+ "loss": 0.2843,
11150
+ "step": 928500
11151
+ },
11152
+ {
11153
+ "epoch": 1.4,
11154
+ "learning_rate": 1.192061901468768e-05,
11155
+ "loss": 0.2844,
11156
+ "step": 929000
11157
+ },
11158
+ {
11159
+ "epoch": 1.4,
11160
+ "learning_rate": 1.1893785503768736e-05,
11161
+ "loss": 0.2838,
11162
+ "step": 929500
11163
+ },
11164
+ {
11165
+ "epoch": 1.4,
11166
+ "learning_rate": 1.1867138192639601e-05,
11167
+ "loss": 0.2842,
11168
+ "step": 930000
11169
  }
11170
  ],
11171
  "max_steps": 1000000,
11172
  "num_train_epochs": 2,
11173
+ "total_flos": 6.287461350629951e+22,
11174
  "trial_name": null,
11175
  "trial_params": null
11176
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:117dc2055f7a26952ace82c418351bb27b2cbd09036de7085a25da82b27081c1
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d2bac07d166f73980c3c9cce6825ce7a1c1f5f22b97d0264ce5e7ba42eeb3df
3
  size 449450757