Training in progress, step 360000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +2 -2
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +2 -2
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e16c64f372a1e3d1a1bc418a8e836e1cdfb665e54de868c2b11e7665a8124bd
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc2fe2e5c738c8106278780fab9f7f87cb208c3a8d2ade89df93aa0343c89bdb
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:005b2e75d9554e018ed97e7633dc38306c4b6264c09e33e752be796ffbb52bec
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f90d85a554f4240712d12f4bc1ca42d7122c1ea252e7027b0c918e99e4e1de85
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:795aa2456aa093dc231a0ad24cc817827fd817979cb85490c41b919c7e2ac93d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3a87c23715b7e025f85b63ac84fed0695e2db4c59ea40d605d467271ee7eaff
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2870379f6b63b47a9122657f592b0d575a5671da1a1fb7b494f10475da4ee08
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccc684958440988665bb83e21073c7e935f44d96d06218e7d486fc15f417721c
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7790e1f5d1d539cc24a5c7f36951077137e9c4173691b395bbb8021eb1098267
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:296d7ff2fcb06c6f8604bd8a18f2173bb33ea1cf17b1b7e10c614ffd53051e05
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6e54422706a010aa16b679660182e5a0c0f546c43656852cb88a82c1d45dccf
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7006,11 +7006,211 @@
|
|
| 7006 |
"eval_samples_per_second": 1982.304,
|
| 7007 |
"eval_steps_per_second": 31.717,
|
| 7008 |
"step": 350000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7009 |
}
|
| 7010 |
],
|
| 7011 |
"max_steps": 500000,
|
| 7012 |
"num_train_epochs": 16,
|
| 7013 |
-
"total_flos": 1.
|
| 7014 |
"trial_name": null,
|
| 7015 |
"trial_params": null
|
| 7016 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 11.029749685958516,
|
| 5 |
+
"global_step": 360000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7006 |
"eval_samples_per_second": 1982.304,
|
| 7007 |
"eval_steps_per_second": 31.717,
|
| 7008 |
"step": 350000
|
| 7009 |
+
},
|
| 7010 |
+
{
|
| 7011 |
+
"epoch": 10.74,
|
| 7012 |
+
"learning_rate": 7.529152489465592e-05,
|
| 7013 |
+
"loss": 0.3237,
|
| 7014 |
+
"step": 350500
|
| 7015 |
+
},
|
| 7016 |
+
{
|
| 7017 |
+
"epoch": 10.75,
|
| 7018 |
+
"learning_rate": 7.489140439617708e-05,
|
| 7019 |
+
"loss": 0.3239,
|
| 7020 |
+
"step": 351000
|
| 7021 |
+
},
|
| 7022 |
+
{
|
| 7023 |
+
"epoch": 10.75,
|
| 7024 |
+
"eval_loss": 0.7802942991256714,
|
| 7025 |
+
"eval_runtime": 0.4971,
|
| 7026 |
+
"eval_samples_per_second": 2011.848,
|
| 7027 |
+
"eval_steps_per_second": 32.19,
|
| 7028 |
+
"step": 351000
|
| 7029 |
+
},
|
| 7030 |
+
{
|
| 7031 |
+
"epoch": 10.77,
|
| 7032 |
+
"learning_rate": 7.449215995246522e-05,
|
| 7033 |
+
"loss": 0.3236,
|
| 7034 |
+
"step": 351500
|
| 7035 |
+
},
|
| 7036 |
+
{
|
| 7037 |
+
"epoch": 10.78,
|
| 7038 |
+
"learning_rate": 7.409379592959367e-05,
|
| 7039 |
+
"loss": 0.3237,
|
| 7040 |
+
"step": 352000
|
| 7041 |
+
},
|
| 7042 |
+
{
|
| 7043 |
+
"epoch": 10.78,
|
| 7044 |
+
"eval_loss": 0.7798171043395996,
|
| 7045 |
+
"eval_runtime": 0.501,
|
| 7046 |
+
"eval_samples_per_second": 1995.947,
|
| 7047 |
+
"eval_steps_per_second": 31.935,
|
| 7048 |
+
"step": 352000
|
| 7049 |
+
},
|
| 7050 |
+
{
|
| 7051 |
+
"epoch": 10.8,
|
| 7052 |
+
"learning_rate": 7.369631668400746e-05,
|
| 7053 |
+
"loss": 0.3234,
|
| 7054 |
+
"step": 352500
|
| 7055 |
+
},
|
| 7056 |
+
{
|
| 7057 |
+
"epoch": 10.82,
|
| 7058 |
+
"learning_rate": 7.3299726562476e-05,
|
| 7059 |
+
"loss": 0.3231,
|
| 7060 |
+
"step": 353000
|
| 7061 |
+
},
|
| 7062 |
+
{
|
| 7063 |
+
"epoch": 10.82,
|
| 7064 |
+
"eval_loss": 0.7781672477722168,
|
| 7065 |
+
"eval_runtime": 0.5053,
|
| 7066 |
+
"eval_samples_per_second": 1979.072,
|
| 7067 |
+
"eval_steps_per_second": 31.665,
|
| 7068 |
+
"step": 353000
|
| 7069 |
+
},
|
| 7070 |
+
{
|
| 7071 |
+
"epoch": 10.83,
|
| 7072 |
+
"learning_rate": 7.290402990204531e-05,
|
| 7073 |
+
"loss": 0.3233,
|
| 7074 |
+
"step": 353500
|
| 7075 |
+
},
|
| 7076 |
+
{
|
| 7077 |
+
"epoch": 10.85,
|
| 7078 |
+
"learning_rate": 7.250923102999073e-05,
|
| 7079 |
+
"loss": 0.3234,
|
| 7080 |
+
"step": 354000
|
| 7081 |
+
},
|
| 7082 |
+
{
|
| 7083 |
+
"epoch": 10.85,
|
| 7084 |
+
"eval_loss": 0.7746726870536804,
|
| 7085 |
+
"eval_runtime": 0.5021,
|
| 7086 |
+
"eval_samples_per_second": 1991.707,
|
| 7087 |
+
"eval_steps_per_second": 31.867,
|
| 7088 |
+
"step": 354000
|
| 7089 |
+
},
|
| 7090 |
+
{
|
| 7091 |
+
"epoch": 10.86,
|
| 7092 |
+
"learning_rate": 7.211533426376934e-05,
|
| 7093 |
+
"loss": 0.3234,
|
| 7094 |
+
"step": 354500
|
| 7095 |
+
},
|
| 7096 |
+
{
|
| 7097 |
+
"epoch": 10.88,
|
| 7098 |
+
"learning_rate": 7.172234391097317e-05,
|
| 7099 |
+
"loss": 0.3232,
|
| 7100 |
+
"step": 355000
|
| 7101 |
+
},
|
| 7102 |
+
{
|
| 7103 |
+
"epoch": 10.88,
|
| 7104 |
+
"eval_loss": 0.7761996984481812,
|
| 7105 |
+
"eval_runtime": 0.5166,
|
| 7106 |
+
"eval_samples_per_second": 1935.585,
|
| 7107 |
+
"eval_steps_per_second": 30.969,
|
| 7108 |
+
"step": 355000
|
| 7109 |
+
},
|
| 7110 |
+
{
|
| 7111 |
+
"epoch": 10.89,
|
| 7112 |
+
"learning_rate": 7.133026426928173e-05,
|
| 7113 |
+
"loss": 0.3231,
|
| 7114 |
+
"step": 355500
|
| 7115 |
+
},
|
| 7116 |
+
{
|
| 7117 |
+
"epoch": 10.91,
|
| 7118 |
+
"learning_rate": 7.093909962641514e-05,
|
| 7119 |
+
"loss": 0.3254,
|
| 7120 |
+
"step": 356000
|
| 7121 |
+
},
|
| 7122 |
+
{
|
| 7123 |
+
"epoch": 10.91,
|
| 7124 |
+
"eval_loss": 0.7848865985870361,
|
| 7125 |
+
"eval_runtime": 0.5114,
|
| 7126 |
+
"eval_samples_per_second": 1955.496,
|
| 7127 |
+
"eval_steps_per_second": 31.288,
|
| 7128 |
+
"step": 356000
|
| 7129 |
+
},
|
| 7130 |
+
{
|
| 7131 |
+
"epoch": 10.92,
|
| 7132 |
+
"learning_rate": 7.054885426008737e-05,
|
| 7133 |
+
"loss": 0.3229,
|
| 7134 |
+
"step": 356500
|
| 7135 |
+
},
|
| 7136 |
+
{
|
| 7137 |
+
"epoch": 10.94,
|
| 7138 |
+
"learning_rate": 7.015953243795907e-05,
|
| 7139 |
+
"loss": 0.3229,
|
| 7140 |
+
"step": 357000
|
| 7141 |
+
},
|
| 7142 |
+
{
|
| 7143 |
+
"epoch": 10.94,
|
| 7144 |
+
"eval_loss": 0.7789940237998962,
|
| 7145 |
+
"eval_runtime": 0.5055,
|
| 7146 |
+
"eval_samples_per_second": 1978.178,
|
| 7147 |
+
"eval_steps_per_second": 31.651,
|
| 7148 |
+
"step": 357000
|
| 7149 |
+
},
|
| 7150 |
+
{
|
| 7151 |
+
"epoch": 10.95,
|
| 7152 |
+
"learning_rate": 6.97711384175914e-05,
|
| 7153 |
+
"loss": 0.3244,
|
| 7154 |
+
"step": 357500
|
| 7155 |
+
},
|
| 7156 |
+
{
|
| 7157 |
+
"epoch": 10.97,
|
| 7158 |
+
"learning_rate": 6.938367644639911e-05,
|
| 7159 |
+
"loss": 0.3227,
|
| 7160 |
+
"step": 358000
|
| 7161 |
+
},
|
| 7162 |
+
{
|
| 7163 |
+
"epoch": 10.97,
|
| 7164 |
+
"eval_loss": 0.7808487415313721,
|
| 7165 |
+
"eval_runtime": 0.5081,
|
| 7166 |
+
"eval_samples_per_second": 1968.006,
|
| 7167 |
+
"eval_steps_per_second": 31.488,
|
| 7168 |
+
"step": 358000
|
| 7169 |
+
},
|
| 7170 |
+
{
|
| 7171 |
+
"epoch": 10.98,
|
| 7172 |
+
"learning_rate": 6.899715076160425e-05,
|
| 7173 |
+
"loss": 0.3226,
|
| 7174 |
+
"step": 358500
|
| 7175 |
+
},
|
| 7176 |
+
{
|
| 7177 |
+
"epoch": 11.0,
|
| 7178 |
+
"learning_rate": 6.861156559018986e-05,
|
| 7179 |
+
"loss": 0.323,
|
| 7180 |
+
"step": 359000
|
| 7181 |
+
},
|
| 7182 |
+
{
|
| 7183 |
+
"epoch": 11.0,
|
| 7184 |
+
"eval_loss": 0.7747591137886047,
|
| 7185 |
+
"eval_runtime": 0.493,
|
| 7186 |
+
"eval_samples_per_second": 2028.496,
|
| 7187 |
+
"eval_steps_per_second": 32.456,
|
| 7188 |
+
"step": 359000
|
| 7189 |
+
},
|
| 7190 |
+
{
|
| 7191 |
+
"epoch": 11.01,
|
| 7192 |
+
"learning_rate": 6.822692514885346e-05,
|
| 7193 |
+
"loss": 0.3225,
|
| 7194 |
+
"step": 359500
|
| 7195 |
+
},
|
| 7196 |
+
{
|
| 7197 |
+
"epoch": 11.03,
|
| 7198 |
+
"learning_rate": 6.784323364396135e-05,
|
| 7199 |
+
"loss": 0.3224,
|
| 7200 |
+
"step": 360000
|
| 7201 |
+
},
|
| 7202 |
+
{
|
| 7203 |
+
"epoch": 11.03,
|
| 7204 |
+
"eval_loss": 0.7760407328605652,
|
| 7205 |
+
"eval_runtime": 0.5204,
|
| 7206 |
+
"eval_samples_per_second": 1921.599,
|
| 7207 |
+
"eval_steps_per_second": 30.746,
|
| 7208 |
+
"step": 360000
|
| 7209 |
}
|
| 7210 |
],
|
| 7211 |
"max_steps": 500000,
|
| 7212 |
"num_train_epochs": 16,
|
| 7213 |
+
"total_flos": 1.1501449128410266e+22,
|
| 7214 |
"trial_name": null,
|
| 7215 |
"trial_params": null
|
| 7216 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc2fe2e5c738c8106278780fab9f7f87cb208c3a8d2ade89df93aa0343c89bdb
|
| 3 |
size 102501541
|