Upload 14 files
Browse files- latest +1 -1
- pytorch_model.bin +2 -2
- rng_state_0.pth +1 -1
- rng_state_1.pth +1 -1
- rng_state_2.pth +1 -1
- rng_state_3.pth +1 -1
- trainer_state.json +3 -162
latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step8000
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d990b51ce673b6fd2abc89e19c8de3396176bd4b7629d4afe284e263eeb25b50
|
| 3 |
+
size 23657822141
|
rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e3c5cb412e12159a59afe5657ce4b5e0a06e7fb420bedbb5228fe1245702762
|
| 3 |
size 14583
|
rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:741230672078323886b763e522c728741456a587860909fc529ce815a7aca5ec
|
| 3 |
size 14583
|
rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ea587886b41579993bb5d20c79047b968ae2d71d22ba4c739b07ce31d7486a6
|
| 3 |
size 14583
|
rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ab727740f74dd67e60283d27b4339609a1dda888b067cc06520e2f1d7dc17db
|
| 3 |
size 14583
|
trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 2.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -48006,170 +48006,11 @@
|
|
| 48006 |
"learning_rate": 2e-05,
|
| 48007 |
"loss": 0.4429,
|
| 48008 |
"step": 8000
|
| 48009 |
-
},
|
| 48010 |
-
{
|
| 48011 |
-
"epoch": 2.99,
|
| 48012 |
-
"learning_rate": 2e-05,
|
| 48013 |
-
"loss": 0.5603,
|
| 48014 |
-
"step": 8001
|
| 48015 |
-
},
|
| 48016 |
-
{
|
| 48017 |
-
"epoch": 2.99,
|
| 48018 |
-
"learning_rate": 2e-05,
|
| 48019 |
-
"loss": 0.5119,
|
| 48020 |
-
"step": 8002
|
| 48021 |
-
},
|
| 48022 |
-
{
|
| 48023 |
-
"epoch": 2.99,
|
| 48024 |
-
"learning_rate": 2e-05,
|
| 48025 |
-
"loss": 0.3574,
|
| 48026 |
-
"step": 8003
|
| 48027 |
-
},
|
| 48028 |
-
{
|
| 48029 |
-
"epoch": 2.99,
|
| 48030 |
-
"learning_rate": 2e-05,
|
| 48031 |
-
"loss": 0.4055,
|
| 48032 |
-
"step": 8004
|
| 48033 |
-
},
|
| 48034 |
-
{
|
| 48035 |
-
"epoch": 2.99,
|
| 48036 |
-
"learning_rate": 2e-05,
|
| 48037 |
-
"loss": 0.6877,
|
| 48038 |
-
"step": 8005
|
| 48039 |
-
},
|
| 48040 |
-
{
|
| 48041 |
-
"epoch": 2.99,
|
| 48042 |
-
"learning_rate": 2e-05,
|
| 48043 |
-
"loss": 0.3634,
|
| 48044 |
-
"step": 8006
|
| 48045 |
-
},
|
| 48046 |
-
{
|
| 48047 |
-
"epoch": 2.99,
|
| 48048 |
-
"learning_rate": 2e-05,
|
| 48049 |
-
"loss": 0.4054,
|
| 48050 |
-
"step": 8007
|
| 48051 |
-
},
|
| 48052 |
-
{
|
| 48053 |
-
"epoch": 2.99,
|
| 48054 |
-
"learning_rate": 2e-05,
|
| 48055 |
-
"loss": 0.3723,
|
| 48056 |
-
"step": 8008
|
| 48057 |
-
},
|
| 48058 |
-
{
|
| 48059 |
-
"epoch": 2.99,
|
| 48060 |
-
"learning_rate": 2e-05,
|
| 48061 |
-
"loss": 0.4081,
|
| 48062 |
-
"step": 8009
|
| 48063 |
-
},
|
| 48064 |
-
{
|
| 48065 |
-
"epoch": 2.99,
|
| 48066 |
-
"learning_rate": 2e-05,
|
| 48067 |
-
"loss": 0.4419,
|
| 48068 |
-
"step": 8010
|
| 48069 |
-
},
|
| 48070 |
-
{
|
| 48071 |
-
"epoch": 2.99,
|
| 48072 |
-
"learning_rate": 2e-05,
|
| 48073 |
-
"loss": 0.6377,
|
| 48074 |
-
"step": 8011
|
| 48075 |
-
},
|
| 48076 |
-
{
|
| 48077 |
-
"epoch": 2.99,
|
| 48078 |
-
"learning_rate": 2e-05,
|
| 48079 |
-
"loss": 0.5082,
|
| 48080 |
-
"step": 8012
|
| 48081 |
-
},
|
| 48082 |
-
{
|
| 48083 |
-
"epoch": 2.99,
|
| 48084 |
-
"learning_rate": 2e-05,
|
| 48085 |
-
"loss": 0.5274,
|
| 48086 |
-
"step": 8013
|
| 48087 |
-
},
|
| 48088 |
-
{
|
| 48089 |
-
"epoch": 3.0,
|
| 48090 |
-
"learning_rate": 2e-05,
|
| 48091 |
-
"loss": 0.4954,
|
| 48092 |
-
"step": 8014
|
| 48093 |
-
},
|
| 48094 |
-
{
|
| 48095 |
-
"epoch": 3.0,
|
| 48096 |
-
"learning_rate": 2e-05,
|
| 48097 |
-
"loss": 0.617,
|
| 48098 |
-
"step": 8015
|
| 48099 |
-
},
|
| 48100 |
-
{
|
| 48101 |
-
"epoch": 3.0,
|
| 48102 |
-
"learning_rate": 2e-05,
|
| 48103 |
-
"loss": 0.4943,
|
| 48104 |
-
"step": 8016
|
| 48105 |
-
},
|
| 48106 |
-
{
|
| 48107 |
-
"epoch": 3.0,
|
| 48108 |
-
"learning_rate": 2e-05,
|
| 48109 |
-
"loss": 0.3116,
|
| 48110 |
-
"step": 8017
|
| 48111 |
-
},
|
| 48112 |
-
{
|
| 48113 |
-
"epoch": 3.0,
|
| 48114 |
-
"learning_rate": 2e-05,
|
| 48115 |
-
"loss": 0.4602,
|
| 48116 |
-
"step": 8018
|
| 48117 |
-
},
|
| 48118 |
-
{
|
| 48119 |
-
"epoch": 3.0,
|
| 48120 |
-
"learning_rate": 2e-05,
|
| 48121 |
-
"loss": 0.4009,
|
| 48122 |
-
"step": 8019
|
| 48123 |
-
},
|
| 48124 |
-
{
|
| 48125 |
-
"epoch": 3.0,
|
| 48126 |
-
"learning_rate": 2e-05,
|
| 48127 |
-
"loss": 0.4631,
|
| 48128 |
-
"step": 8020
|
| 48129 |
-
},
|
| 48130 |
-
{
|
| 48131 |
-
"epoch": 3.0,
|
| 48132 |
-
"learning_rate": 2e-05,
|
| 48133 |
-
"loss": 0.3465,
|
| 48134 |
-
"step": 8021
|
| 48135 |
-
},
|
| 48136 |
-
{
|
| 48137 |
-
"epoch": 3.0,
|
| 48138 |
-
"learning_rate": 2e-05,
|
| 48139 |
-
"loss": 0.6339,
|
| 48140 |
-
"step": 8022
|
| 48141 |
-
},
|
| 48142 |
-
{
|
| 48143 |
-
"epoch": 3.0,
|
| 48144 |
-
"learning_rate": 2e-05,
|
| 48145 |
-
"loss": 0.3831,
|
| 48146 |
-
"step": 8023
|
| 48147 |
-
},
|
| 48148 |
-
{
|
| 48149 |
-
"epoch": 3.0,
|
| 48150 |
-
"learning_rate": 2e-05,
|
| 48151 |
-
"loss": 0.4769,
|
| 48152 |
-
"step": 8024
|
| 48153 |
-
},
|
| 48154 |
-
{
|
| 48155 |
-
"epoch": 3.0,
|
| 48156 |
-
"learning_rate": 2e-05,
|
| 48157 |
-
"loss": 0.5142,
|
| 48158 |
-
"step": 8025
|
| 48159 |
-
},
|
| 48160 |
-
{
|
| 48161 |
-
"epoch": 3.0,
|
| 48162 |
-
"step": 8025,
|
| 48163 |
-
"total_flos": 1027959387144192.0,
|
| 48164 |
-
"train_loss": 1.0478906600497593,
|
| 48165 |
-
"train_runtime": 551322.7584,
|
| 48166 |
-
"train_samples_per_second": 0.466,
|
| 48167 |
-
"train_steps_per_second": 0.015
|
| 48168 |
}
|
| 48169 |
],
|
| 48170 |
"max_steps": 8025,
|
| 48171 |
"num_train_epochs": 3,
|
| 48172 |
-
"total_flos":
|
| 48173 |
"trial_name": null,
|
| 48174 |
"trial_params": null
|
| 48175 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.989955617846298,
|
| 5 |
+
"global_step": 8000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 48006 |
"learning_rate": 2e-05,
|
| 48007 |
"loss": 0.4429,
|
| 48008 |
"step": 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48009 |
}
|
| 48010 |
],
|
| 48011 |
"max_steps": 8025,
|
| 48012 |
"num_train_epochs": 3,
|
| 48013 |
+
"total_flos": 1024787635126272.0,
|
| 48014 |
"trial_name": null,
|
| 48015 |
"trial_params": null
|
| 48016 |
}
|