jflotz commited on
Commit
966ea8f
·
1 Parent(s): 82e7d63

Training in progress, step 920000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:833419c3e72d7f9d08d84324dff99c4df5021ccb89fb201b7ceab844fd1fe7de
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85d7ab430aa8c54263516e5c658c95ef97b9bc952ec5cf5e5365b30e31306997
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:178bde3b89657b90235cfe9d9ea492b4c3952edf2d20ecb0ed27ebbb80775260
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:117dc2055f7a26952ace82c418351bb27b2cbd09036de7085a25da82b27081c1
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44cbeb264fb2a81130e0bac112a18c08ec1d410c1c39e23c023c635c67938a81
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e2032da347399ee9901067558672d5410fcaa09ee842d38aba65b094b37a736
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dbfbe0d14b5c95909c74387495649181894d175bc5bdbfa086f89dc9ba7b8e5
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b09f2ef3a0afa1ae47cb3dd253fe80e29739af6753655094c50bdfd3d8ca9758
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61414351d7a2b424bf69a03902f1cdb6a014111a0704f27525f615bba9831275
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eac4809b99b679dbbfdc011b63659991a28862c1d22f4f38d4dcdd1375d8975
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8736baa79faca75f1de8fbb0415f2f8cedffbb08cddfa2e1e02291f66849ffd
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac2c7ce896174bcaadeefd72a00d81f719974e36a24b565ddbefaaf2ca16bb3f
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43bc53bd3230780ced8543e570f3a1576c876dc5bb9da2beb20be127155a27a1
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a667e3ff228e320fd542f2c9810b24c5de42e01882b1675c0130a2b28728da55
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:425858a9ea3774cea4e80f4044a1828fb43f83f6100a0fd4aa4ea1985d9d8afe
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29a7714196dcafb8ce267314792ddd65aaf3d14c2545f4d588e79110e3a9dbdd
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e99df6b45e45b719db86866dd43b0e8f476f0d4749f66d2f42f7183415384c8
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286d7db0b43398757bd21b81f8821f59e9aec9025866e41358d6eb3d2324815f
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ffd4b3aa19562ff729663fcb259b8c71f90c0398d12b236c6885ec68255bc91
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e4dda75c800acb3eea07d56b537e9a8a3fcee9f9f4cb599f95d990328e10c0f
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa4db73f02c4f7c376b4a473b74af0d764f1f4d822f644308482e4f6b4c6cad2
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cf0117689e0aa6f4558cab47b77e379a54989becb8f5536e911f2ec4fd9adb7
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3587343850894045,
5
- "global_step": 910000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10926,11 +10926,131 @@
10926
  "learning_rate": 1.3077495292594966e-05,
10927
  "loss": 0.285,
10928
  "step": 910000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10929
  }
10930
  ],
10931
  "max_steps": 1000000,
10932
  "num_train_epochs": 2,
10933
- "total_flos": 6.152247724689881e+22,
10934
  "trial_name": null,
10935
  "trial_params": null
10936
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3786640731499271,
5
+ "global_step": 920000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10926
  "learning_rate": 1.3077495292594966e-05,
10927
  "loss": 0.285,
10928
  "step": 910000
10929
+ },
10930
+ {
10931
+ "epoch": 1.36,
10932
+ "learning_rate": 1.3043645183837645e-05,
10933
+ "loss": 0.2843,
10934
+ "step": 910500
10935
+ },
10936
+ {
10937
+ "epoch": 1.36,
10938
+ "learning_rate": 1.3009978131197669e-05,
10939
+ "loss": 0.2853,
10940
+ "step": 911000
10941
+ },
10942
+ {
10943
+ "epoch": 1.36,
10944
+ "learning_rate": 1.297649422671947e-05,
10945
+ "loss": 0.2848,
10946
+ "step": 911500
10947
+ },
10948
+ {
10949
+ "epoch": 1.36,
10950
+ "learning_rate": 1.2943193561946762e-05,
10951
+ "loss": 0.2854,
10952
+ "step": 912000
10953
+ },
10954
+ {
10955
+ "epoch": 1.36,
10956
+ "learning_rate": 1.291007622792231e-05,
10957
+ "loss": 0.2845,
10958
+ "step": 912500
10959
+ },
10960
+ {
10961
+ "epoch": 1.36,
10962
+ "learning_rate": 1.2877142315187628e-05,
10963
+ "loss": 0.2849,
10964
+ "step": 913000
10965
+ },
10966
+ {
10967
+ "epoch": 1.37,
10968
+ "learning_rate": 1.2844391913782773e-05,
10969
+ "loss": 0.2852,
10970
+ "step": 913500
10971
+ },
10972
+ {
10973
+ "epoch": 1.37,
10974
+ "learning_rate": 1.28118251132461e-05,
10975
+ "loss": 0.2845,
10976
+ "step": 914000
10977
+ },
10978
+ {
10979
+ "epoch": 1.37,
10980
+ "learning_rate": 1.2779442002613984e-05,
10981
+ "loss": 0.2844,
10982
+ "step": 914500
10983
+ },
10984
+ {
10985
+ "epoch": 1.37,
10986
+ "learning_rate": 1.274724267042063e-05,
10987
+ "loss": 0.285,
10988
+ "step": 915000
10989
+ },
10990
+ {
10991
+ "epoch": 1.37,
10992
+ "learning_rate": 1.2715227204697775e-05,
10993
+ "loss": 0.2848,
10994
+ "step": 915500
10995
+ },
10996
+ {
10997
+ "epoch": 1.37,
10998
+ "learning_rate": 1.2683395692974472e-05,
10999
+ "loss": 0.2843,
11000
+ "step": 916000
11001
+ },
11002
+ {
11003
+ "epoch": 1.37,
11004
+ "learning_rate": 1.2651748222276879e-05,
11005
+ "loss": 0.284,
11006
+ "step": 916500
11007
+ },
11008
+ {
11009
+ "epoch": 1.37,
11010
+ "learning_rate": 1.2620284879127947e-05,
11011
+ "loss": 0.2846,
11012
+ "step": 917000
11013
+ },
11014
+ {
11015
+ "epoch": 1.37,
11016
+ "learning_rate": 1.2589005749547281e-05,
11017
+ "loss": 0.2848,
11018
+ "step": 917500
11019
+ },
11020
+ {
11021
+ "epoch": 1.37,
11022
+ "learning_rate": 1.2557910919050803e-05,
11023
+ "loss": 0.2845,
11024
+ "step": 918000
11025
+ },
11026
+ {
11027
+ "epoch": 1.38,
11028
+ "learning_rate": 1.2527000472650597e-05,
11029
+ "loss": 0.2842,
11030
+ "step": 918500
11031
+ },
11032
+ {
11033
+ "epoch": 1.38,
11034
+ "learning_rate": 1.2496274494854666e-05,
11035
+ "loss": 0.285,
11036
+ "step": 919000
11037
+ },
11038
+ {
11039
+ "epoch": 1.38,
11040
+ "learning_rate": 1.2465733069666629e-05,
11041
+ "loss": 0.2848,
11042
+ "step": 919500
11043
+ },
11044
+ {
11045
+ "epoch": 1.38,
11046
+ "learning_rate": 1.24353762805856e-05,
11047
+ "loss": 0.2847,
11048
+ "step": 920000
11049
  }
11050
  ],
11051
  "max_steps": 1000000,
11052
  "num_train_epochs": 2,
11053
+ "total_flos": 6.219855439259451e+22,
11054
  "trial_name": null,
11055
  "trial_params": null
11056
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:178bde3b89657b90235cfe9d9ea492b4c3952edf2d20ecb0ed27ebbb80775260
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:117dc2055f7a26952ace82c418351bb27b2cbd09036de7085a25da82b27081c1
3
  size 449450757