Training in progress, step 920000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85d7ab430aa8c54263516e5c658c95ef97b9bc952ec5cf5e5365b30e31306997
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:117dc2055f7a26952ace82c418351bb27b2cbd09036de7085a25da82b27081c1
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e2032da347399ee9901067558672d5410fcaa09ee842d38aba65b094b37a736
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b09f2ef3a0afa1ae47cb3dd253fe80e29739af6753655094c50bdfd3d8ca9758
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8eac4809b99b679dbbfdc011b63659991a28862c1d22f4f38d4dcdd1375d8975
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac2c7ce896174bcaadeefd72a00d81f719974e36a24b565ddbefaaf2ca16bb3f
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a667e3ff228e320fd542f2c9810b24c5de42e01882b1675c0130a2b28728da55
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29a7714196dcafb8ce267314792ddd65aaf3d14c2545f4d588e79110e3a9dbdd
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:286d7db0b43398757bd21b81f8821f59e9aec9025866e41358d6eb3d2324815f
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e4dda75c800acb3eea07d56b537e9a8a3fcee9f9f4cb599f95d990328e10c0f
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cf0117689e0aa6f4558cab47b77e379a54989becb8f5536e911f2ec4fd9adb7
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -10926,11 +10926,131 @@
|
|
| 10926 |
"learning_rate": 1.3077495292594966e-05,
|
| 10927 |
"loss": 0.285,
|
| 10928 |
"step": 910000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10929 |
}
|
| 10930 |
],
|
| 10931 |
"max_steps": 1000000,
|
| 10932 |
"num_train_epochs": 2,
|
| 10933 |
-
"total_flos": 6.
|
| 10934 |
"trial_name": null,
|
| 10935 |
"trial_params": null
|
| 10936 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.3786640731499271,
|
| 5 |
+
"global_step": 920000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 10926 |
"learning_rate": 1.3077495292594966e-05,
|
| 10927 |
"loss": 0.285,
|
| 10928 |
"step": 910000
|
| 10929 |
+
},
|
| 10930 |
+
{
|
| 10931 |
+
"epoch": 1.36,
|
| 10932 |
+
"learning_rate": 1.3043645183837645e-05,
|
| 10933 |
+
"loss": 0.2843,
|
| 10934 |
+
"step": 910500
|
| 10935 |
+
},
|
| 10936 |
+
{
|
| 10937 |
+
"epoch": 1.36,
|
| 10938 |
+
"learning_rate": 1.3009978131197669e-05,
|
| 10939 |
+
"loss": 0.2853,
|
| 10940 |
+
"step": 911000
|
| 10941 |
+
},
|
| 10942 |
+
{
|
| 10943 |
+
"epoch": 1.36,
|
| 10944 |
+
"learning_rate": 1.297649422671947e-05,
|
| 10945 |
+
"loss": 0.2848,
|
| 10946 |
+
"step": 911500
|
| 10947 |
+
},
|
| 10948 |
+
{
|
| 10949 |
+
"epoch": 1.36,
|
| 10950 |
+
"learning_rate": 1.2943193561946762e-05,
|
| 10951 |
+
"loss": 0.2854,
|
| 10952 |
+
"step": 912000
|
| 10953 |
+
},
|
| 10954 |
+
{
|
| 10955 |
+
"epoch": 1.36,
|
| 10956 |
+
"learning_rate": 1.291007622792231e-05,
|
| 10957 |
+
"loss": 0.2845,
|
| 10958 |
+
"step": 912500
|
| 10959 |
+
},
|
| 10960 |
+
{
|
| 10961 |
+
"epoch": 1.36,
|
| 10962 |
+
"learning_rate": 1.2877142315187628e-05,
|
| 10963 |
+
"loss": 0.2849,
|
| 10964 |
+
"step": 913000
|
| 10965 |
+
},
|
| 10966 |
+
{
|
| 10967 |
+
"epoch": 1.37,
|
| 10968 |
+
"learning_rate": 1.2844391913782773e-05,
|
| 10969 |
+
"loss": 0.2852,
|
| 10970 |
+
"step": 913500
|
| 10971 |
+
},
|
| 10972 |
+
{
|
| 10973 |
+
"epoch": 1.37,
|
| 10974 |
+
"learning_rate": 1.28118251132461e-05,
|
| 10975 |
+
"loss": 0.2845,
|
| 10976 |
+
"step": 914000
|
| 10977 |
+
},
|
| 10978 |
+
{
|
| 10979 |
+
"epoch": 1.37,
|
| 10980 |
+
"learning_rate": 1.2779442002613984e-05,
|
| 10981 |
+
"loss": 0.2844,
|
| 10982 |
+
"step": 914500
|
| 10983 |
+
},
|
| 10984 |
+
{
|
| 10985 |
+
"epoch": 1.37,
|
| 10986 |
+
"learning_rate": 1.274724267042063e-05,
|
| 10987 |
+
"loss": 0.285,
|
| 10988 |
+
"step": 915000
|
| 10989 |
+
},
|
| 10990 |
+
{
|
| 10991 |
+
"epoch": 1.37,
|
| 10992 |
+
"learning_rate": 1.2715227204697775e-05,
|
| 10993 |
+
"loss": 0.2848,
|
| 10994 |
+
"step": 915500
|
| 10995 |
+
},
|
| 10996 |
+
{
|
| 10997 |
+
"epoch": 1.37,
|
| 10998 |
+
"learning_rate": 1.2683395692974472e-05,
|
| 10999 |
+
"loss": 0.2843,
|
| 11000 |
+
"step": 916000
|
| 11001 |
+
},
|
| 11002 |
+
{
|
| 11003 |
+
"epoch": 1.37,
|
| 11004 |
+
"learning_rate": 1.2651748222276879e-05,
|
| 11005 |
+
"loss": 0.284,
|
| 11006 |
+
"step": 916500
|
| 11007 |
+
},
|
| 11008 |
+
{
|
| 11009 |
+
"epoch": 1.37,
|
| 11010 |
+
"learning_rate": 1.2620284879127947e-05,
|
| 11011 |
+
"loss": 0.2846,
|
| 11012 |
+
"step": 917000
|
| 11013 |
+
},
|
| 11014 |
+
{
|
| 11015 |
+
"epoch": 1.37,
|
| 11016 |
+
"learning_rate": 1.2589005749547281e-05,
|
| 11017 |
+
"loss": 0.2848,
|
| 11018 |
+
"step": 917500
|
| 11019 |
+
},
|
| 11020 |
+
{
|
| 11021 |
+
"epoch": 1.37,
|
| 11022 |
+
"learning_rate": 1.2557910919050803e-05,
|
| 11023 |
+
"loss": 0.2845,
|
| 11024 |
+
"step": 918000
|
| 11025 |
+
},
|
| 11026 |
+
{
|
| 11027 |
+
"epoch": 1.38,
|
| 11028 |
+
"learning_rate": 1.2527000472650597e-05,
|
| 11029 |
+
"loss": 0.2842,
|
| 11030 |
+
"step": 918500
|
| 11031 |
+
},
|
| 11032 |
+
{
|
| 11033 |
+
"epoch": 1.38,
|
| 11034 |
+
"learning_rate": 1.2496274494854666e-05,
|
| 11035 |
+
"loss": 0.285,
|
| 11036 |
+
"step": 919000
|
| 11037 |
+
},
|
| 11038 |
+
{
|
| 11039 |
+
"epoch": 1.38,
|
| 11040 |
+
"learning_rate": 1.2465733069666629e-05,
|
| 11041 |
+
"loss": 0.2848,
|
| 11042 |
+
"step": 919500
|
| 11043 |
+
},
|
| 11044 |
+
{
|
| 11045 |
+
"epoch": 1.38,
|
| 11046 |
+
"learning_rate": 1.24353762805856e-05,
|
| 11047 |
+
"loss": 0.2847,
|
| 11048 |
+
"step": 920000
|
| 11049 |
}
|
| 11050 |
],
|
| 11051 |
"max_steps": 1000000,
|
| 11052 |
"num_train_epochs": 2,
|
| 11053 |
+
"total_flos": 6.219855439259451e+22,
|
| 11054 |
"trial_name": null,
|
| 11055 |
"trial_params": null
|
| 11056 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:117dc2055f7a26952ace82c418351bb27b2cbd09036de7085a25da82b27081c1
|
| 3 |
size 449450757
|