Training in progress, step 40500, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 448472762
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d03259d2e256ae1843dd9d93c96f13ae033fa50104b7b3335fc98eda0124f7a5
|
| 3 |
size 448472762
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 151589028
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15854e4e6f02157f1526c4c8d47e876fa37ca90091d0530a30a41ceb5b08bcec
|
| 3 |
size 151589028
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30b82ff92adb88680c27be8e5a2b5c9da63ae08090ef3a7c14508b2164f79186
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf6d32c22f27c022798bfaaaca4ba2cb9286d958e4d3a8bc21674e25ce2e9897
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bad3a95014e8c014b1b0e1d2c7e862e00a64dd0395000ae8357a96077edd14c5
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce9750971f49c876ed40cfbeda660585c3f668d39961f483bf1708ae57b0f2eb
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b16d6b3e5a3f4efdb62ade99a44d77fcf809fff9d006debbc9f917125a34ca7
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -28008,6 +28008,356 @@
|
|
| 28008 |
"learning_rate": 0.00048715881405557025,
|
| 28009 |
"loss": 2.0898,
|
| 28010 |
"step": 40000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28011 |
}
|
| 28012 |
],
|
| 28013 |
"logging_steps": 10,
|
|
@@ -28027,7 +28377,7 @@
|
|
| 28027 |
"attributes": {}
|
| 28028 |
}
|
| 28029 |
},
|
| 28030 |
-
"total_flos": 1.
|
| 28031 |
"train_batch_size": 48,
|
| 28032 |
"trial_name": null,
|
| 28033 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.07897392398028577,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 40500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 28008 |
"learning_rate": 0.00048715881405557025,
|
| 28009 |
"loss": 2.0898,
|
| 28010 |
"step": 40000
|
| 28011 |
+
},
|
| 28012 |
+
{
|
| 28013 |
+
"epoch": 0.07801843699879589,
|
| 28014 |
+
"grad_norm": 0.37890625,
|
| 28015 |
+
"learning_rate": 0.0004871555630401156,
|
| 28016 |
+
"loss": 2.0975,
|
| 28017 |
+
"step": 40010
|
| 28018 |
+
},
|
| 28019 |
+
{
|
| 28020 |
+
"epoch": 0.07803793673311202,
|
| 28021 |
+
"grad_norm": 0.416015625,
|
| 28022 |
+
"learning_rate": 0.0004871523120246609,
|
| 28023 |
+
"loss": 2.0899,
|
| 28024 |
+
"step": 40020
|
| 28025 |
+
},
|
| 28026 |
+
{
|
| 28027 |
+
"epoch": 0.07805743646742813,
|
| 28028 |
+
"grad_norm": 0.4453125,
|
| 28029 |
+
"learning_rate": 0.00048714906100920624,
|
| 28030 |
+
"loss": 2.1008,
|
| 28031 |
+
"step": 40030
|
| 28032 |
+
},
|
| 28033 |
+
{
|
| 28034 |
+
"epoch": 0.07807693620174425,
|
| 28035 |
+
"grad_norm": 0.490234375,
|
| 28036 |
+
"learning_rate": 0.00048714580999375157,
|
| 28037 |
+
"loss": 2.0912,
|
| 28038 |
+
"step": 40040
|
| 28039 |
+
},
|
| 28040 |
+
{
|
| 28041 |
+
"epoch": 0.07809643593606037,
|
| 28042 |
+
"grad_norm": 0.39453125,
|
| 28043 |
+
"learning_rate": 0.0004871425589782969,
|
| 28044 |
+
"loss": 2.0913,
|
| 28045 |
+
"step": 40050
|
| 28046 |
+
},
|
| 28047 |
+
{
|
| 28048 |
+
"epoch": 0.07811593567037649,
|
| 28049 |
+
"grad_norm": 0.396484375,
|
| 28050 |
+
"learning_rate": 0.0004871393079628422,
|
| 28051 |
+
"loss": 2.0787,
|
| 28052 |
+
"step": 40060
|
| 28053 |
+
},
|
| 28054 |
+
{
|
| 28055 |
+
"epoch": 0.07813543540469262,
|
| 28056 |
+
"grad_norm": 0.369140625,
|
| 28057 |
+
"learning_rate": 0.00048713605694738755,
|
| 28058 |
+
"loss": 2.1002,
|
| 28059 |
+
"step": 40070
|
| 28060 |
+
},
|
| 28061 |
+
{
|
| 28062 |
+
"epoch": 0.07815493513900873,
|
| 28063 |
+
"grad_norm": 0.396484375,
|
| 28064 |
+
"learning_rate": 0.0004871328059319329,
|
| 28065 |
+
"loss": 2.1055,
|
| 28066 |
+
"step": 40080
|
| 28067 |
+
},
|
| 28068 |
+
{
|
| 28069 |
+
"epoch": 0.07817443487332486,
|
| 28070 |
+
"grad_norm": 0.392578125,
|
| 28071 |
+
"learning_rate": 0.00048712955491647816,
|
| 28072 |
+
"loss": 2.0943,
|
| 28073 |
+
"step": 40090
|
| 28074 |
+
},
|
| 28075 |
+
{
|
| 28076 |
+
"epoch": 0.07819393460764097,
|
| 28077 |
+
"grad_norm": 0.412109375,
|
| 28078 |
+
"learning_rate": 0.0004871263039010235,
|
| 28079 |
+
"loss": 2.096,
|
| 28080 |
+
"step": 40100
|
| 28081 |
+
},
|
| 28082 |
+
{
|
| 28083 |
+
"epoch": 0.07821343434195709,
|
| 28084 |
+
"grad_norm": 0.462890625,
|
| 28085 |
+
"learning_rate": 0.0004871230528855688,
|
| 28086 |
+
"loss": 2.0891,
|
| 28087 |
+
"step": 40110
|
| 28088 |
+
},
|
| 28089 |
+
{
|
| 28090 |
+
"epoch": 0.07823293407627321,
|
| 28091 |
+
"grad_norm": 0.3828125,
|
| 28092 |
+
"learning_rate": 0.00048711980187011414,
|
| 28093 |
+
"loss": 2.0926,
|
| 28094 |
+
"step": 40120
|
| 28095 |
+
},
|
| 28096 |
+
{
|
| 28097 |
+
"epoch": 0.07825243381058933,
|
| 28098 |
+
"grad_norm": 0.45703125,
|
| 28099 |
+
"learning_rate": 0.00048711655085465947,
|
| 28100 |
+
"loss": 2.0898,
|
| 28101 |
+
"step": 40130
|
| 28102 |
+
},
|
| 28103 |
+
{
|
| 28104 |
+
"epoch": 0.07827193354490546,
|
| 28105 |
+
"grad_norm": 0.37109375,
|
| 28106 |
+
"learning_rate": 0.00048711329983920474,
|
| 28107 |
+
"loss": 2.0886,
|
| 28108 |
+
"step": 40140
|
| 28109 |
+
},
|
| 28110 |
+
{
|
| 28111 |
+
"epoch": 0.07829143327922157,
|
| 28112 |
+
"grad_norm": 0.373046875,
|
| 28113 |
+
"learning_rate": 0.00048711004882375007,
|
| 28114 |
+
"loss": 2.0855,
|
| 28115 |
+
"step": 40150
|
| 28116 |
+
},
|
| 28117 |
+
{
|
| 28118 |
+
"epoch": 0.07831093301353768,
|
| 28119 |
+
"grad_norm": 0.55859375,
|
| 28120 |
+
"learning_rate": 0.0004871067978082954,
|
| 28121 |
+
"loss": 2.1028,
|
| 28122 |
+
"step": 40160
|
| 28123 |
+
},
|
| 28124 |
+
{
|
| 28125 |
+
"epoch": 0.07833043274785381,
|
| 28126 |
+
"grad_norm": 0.42578125,
|
| 28127 |
+
"learning_rate": 0.00048710354679284073,
|
| 28128 |
+
"loss": 2.0743,
|
| 28129 |
+
"step": 40170
|
| 28130 |
+
},
|
| 28131 |
+
{
|
| 28132 |
+
"epoch": 0.07834993248216993,
|
| 28133 |
+
"grad_norm": 0.53125,
|
| 28134 |
+
"learning_rate": 0.00048710029577738606,
|
| 28135 |
+
"loss": 2.0989,
|
| 28136 |
+
"step": 40180
|
| 28137 |
+
},
|
| 28138 |
+
{
|
| 28139 |
+
"epoch": 0.07836943221648605,
|
| 28140 |
+
"grad_norm": 0.39453125,
|
| 28141 |
+
"learning_rate": 0.0004870970447619314,
|
| 28142 |
+
"loss": 2.0705,
|
| 28143 |
+
"step": 40190
|
| 28144 |
+
},
|
| 28145 |
+
{
|
| 28146 |
+
"epoch": 0.07838893195080217,
|
| 28147 |
+
"grad_norm": 0.3515625,
|
| 28148 |
+
"learning_rate": 0.0004870937937464767,
|
| 28149 |
+
"loss": 2.0708,
|
| 28150 |
+
"step": 40200
|
| 28151 |
+
},
|
| 28152 |
+
{
|
| 28153 |
+
"epoch": 0.0784084316851183,
|
| 28154 |
+
"grad_norm": 0.48828125,
|
| 28155 |
+
"learning_rate": 0.00048709054273102204,
|
| 28156 |
+
"loss": 2.0809,
|
| 28157 |
+
"step": 40210
|
| 28158 |
+
},
|
| 28159 |
+
{
|
| 28160 |
+
"epoch": 0.07842793141943441,
|
| 28161 |
+
"grad_norm": 0.43359375,
|
| 28162 |
+
"learning_rate": 0.00048708729171556737,
|
| 28163 |
+
"loss": 2.0944,
|
| 28164 |
+
"step": 40220
|
| 28165 |
+
},
|
| 28166 |
+
{
|
| 28167 |
+
"epoch": 0.07844743115375052,
|
| 28168 |
+
"grad_norm": 0.349609375,
|
| 28169 |
+
"learning_rate": 0.0004870840407001127,
|
| 28170 |
+
"loss": 2.0886,
|
| 28171 |
+
"step": 40230
|
| 28172 |
+
},
|
| 28173 |
+
{
|
| 28174 |
+
"epoch": 0.07846693088806665,
|
| 28175 |
+
"grad_norm": 0.71875,
|
| 28176 |
+
"learning_rate": 0.00048708078968465803,
|
| 28177 |
+
"loss": 2.0778,
|
| 28178 |
+
"step": 40240
|
| 28179 |
+
},
|
| 28180 |
+
{
|
| 28181 |
+
"epoch": 0.07848643062238277,
|
| 28182 |
+
"grad_norm": 0.39453125,
|
| 28183 |
+
"learning_rate": 0.00048707753866920336,
|
| 28184 |
+
"loss": 2.0816,
|
| 28185 |
+
"step": 40250
|
| 28186 |
+
},
|
| 28187 |
+
{
|
| 28188 |
+
"epoch": 0.0785059303566989,
|
| 28189 |
+
"grad_norm": 0.38671875,
|
| 28190 |
+
"learning_rate": 0.0004870742876537487,
|
| 28191 |
+
"loss": 2.095,
|
| 28192 |
+
"step": 40260
|
| 28193 |
+
},
|
| 28194 |
+
{
|
| 28195 |
+
"epoch": 0.07852543009101501,
|
| 28196 |
+
"grad_norm": 0.357421875,
|
| 28197 |
+
"learning_rate": 0.000487071036638294,
|
| 28198 |
+
"loss": 2.0847,
|
| 28199 |
+
"step": 40270
|
| 28200 |
+
},
|
| 28201 |
+
{
|
| 28202 |
+
"epoch": 0.07854492982533114,
|
| 28203 |
+
"grad_norm": 0.3984375,
|
| 28204 |
+
"learning_rate": 0.00048706778562283934,
|
| 28205 |
+
"loss": 2.095,
|
| 28206 |
+
"step": 40280
|
| 28207 |
+
},
|
| 28208 |
+
{
|
| 28209 |
+
"epoch": 0.07856442955964725,
|
| 28210 |
+
"grad_norm": 0.408203125,
|
| 28211 |
+
"learning_rate": 0.00048706453460738467,
|
| 28212 |
+
"loss": 2.0868,
|
| 28213 |
+
"step": 40290
|
| 28214 |
+
},
|
| 28215 |
+
{
|
| 28216 |
+
"epoch": 0.07858392929396336,
|
| 28217 |
+
"grad_norm": 0.375,
|
| 28218 |
+
"learning_rate": 0.00048706128359192995,
|
| 28219 |
+
"loss": 2.0935,
|
| 28220 |
+
"step": 40300
|
| 28221 |
+
},
|
| 28222 |
+
{
|
| 28223 |
+
"epoch": 0.07860342902827949,
|
| 28224 |
+
"grad_norm": 0.43359375,
|
| 28225 |
+
"learning_rate": 0.0004870580325764753,
|
| 28226 |
+
"loss": 2.0762,
|
| 28227 |
+
"step": 40310
|
| 28228 |
+
},
|
| 28229 |
+
{
|
| 28230 |
+
"epoch": 0.0786229287625956,
|
| 28231 |
+
"grad_norm": 0.384765625,
|
| 28232 |
+
"learning_rate": 0.0004870547815610206,
|
| 28233 |
+
"loss": 2.0939,
|
| 28234 |
+
"step": 40320
|
| 28235 |
+
},
|
| 28236 |
+
{
|
| 28237 |
+
"epoch": 0.07864242849691173,
|
| 28238 |
+
"grad_norm": 0.431640625,
|
| 28239 |
+
"learning_rate": 0.0004870515305455659,
|
| 28240 |
+
"loss": 2.091,
|
| 28241 |
+
"step": 40330
|
| 28242 |
+
},
|
| 28243 |
+
{
|
| 28244 |
+
"epoch": 0.07866192823122785,
|
| 28245 |
+
"grad_norm": 0.44921875,
|
| 28246 |
+
"learning_rate": 0.0004870482795301112,
|
| 28247 |
+
"loss": 2.1065,
|
| 28248 |
+
"step": 40340
|
| 28249 |
+
},
|
| 28250 |
+
{
|
| 28251 |
+
"epoch": 0.07868142796554398,
|
| 28252 |
+
"grad_norm": 0.42578125,
|
| 28253 |
+
"learning_rate": 0.00048704502851465653,
|
| 28254 |
+
"loss": 2.0833,
|
| 28255 |
+
"step": 40350
|
| 28256 |
+
},
|
| 28257 |
+
{
|
| 28258 |
+
"epoch": 0.07870092769986009,
|
| 28259 |
+
"grad_norm": 0.390625,
|
| 28260 |
+
"learning_rate": 0.00048704177749920186,
|
| 28261 |
+
"loss": 2.0815,
|
| 28262 |
+
"step": 40360
|
| 28263 |
+
},
|
| 28264 |
+
{
|
| 28265 |
+
"epoch": 0.0787204274341762,
|
| 28266 |
+
"grad_norm": 0.431640625,
|
| 28267 |
+
"learning_rate": 0.0004870385264837472,
|
| 28268 |
+
"loss": 2.0821,
|
| 28269 |
+
"step": 40370
|
| 28270 |
+
},
|
| 28271 |
+
{
|
| 28272 |
+
"epoch": 0.07873992716849233,
|
| 28273 |
+
"grad_norm": 0.416015625,
|
| 28274 |
+
"learning_rate": 0.0004870352754682925,
|
| 28275 |
+
"loss": 2.0873,
|
| 28276 |
+
"step": 40380
|
| 28277 |
+
},
|
| 28278 |
+
{
|
| 28279 |
+
"epoch": 0.07875942690280845,
|
| 28280 |
+
"grad_norm": 0.404296875,
|
| 28281 |
+
"learning_rate": 0.00048703202445283785,
|
| 28282 |
+
"loss": 2.0744,
|
| 28283 |
+
"step": 40390
|
| 28284 |
+
},
|
| 28285 |
+
{
|
| 28286 |
+
"epoch": 0.07877892663712457,
|
| 28287 |
+
"grad_norm": 0.392578125,
|
| 28288 |
+
"learning_rate": 0.0004870287734373832,
|
| 28289 |
+
"loss": 2.0897,
|
| 28290 |
+
"step": 40400
|
| 28291 |
+
},
|
| 28292 |
+
{
|
| 28293 |
+
"epoch": 0.07879842637144069,
|
| 28294 |
+
"grad_norm": 0.46484375,
|
| 28295 |
+
"learning_rate": 0.0004870255224219285,
|
| 28296 |
+
"loss": 2.0874,
|
| 28297 |
+
"step": 40410
|
| 28298 |
+
},
|
| 28299 |
+
{
|
| 28300 |
+
"epoch": 0.0788179261057568,
|
| 28301 |
+
"grad_norm": 0.435546875,
|
| 28302 |
+
"learning_rate": 0.00048702227140647383,
|
| 28303 |
+
"loss": 2.0957,
|
| 28304 |
+
"step": 40420
|
| 28305 |
+
},
|
| 28306 |
+
{
|
| 28307 |
+
"epoch": 0.07883742584007293,
|
| 28308 |
+
"grad_norm": 0.419921875,
|
| 28309 |
+
"learning_rate": 0.00048701902039101916,
|
| 28310 |
+
"loss": 2.0726,
|
| 28311 |
+
"step": 40430
|
| 28312 |
+
},
|
| 28313 |
+
{
|
| 28314 |
+
"epoch": 0.07885692557438904,
|
| 28315 |
+
"grad_norm": 0.4140625,
|
| 28316 |
+
"learning_rate": 0.0004870157693755645,
|
| 28317 |
+
"loss": 2.0802,
|
| 28318 |
+
"step": 40440
|
| 28319 |
+
},
|
| 28320 |
+
{
|
| 28321 |
+
"epoch": 0.07887642530870517,
|
| 28322 |
+
"grad_norm": 0.53515625,
|
| 28323 |
+
"learning_rate": 0.00048701251836010976,
|
| 28324 |
+
"loss": 2.0906,
|
| 28325 |
+
"step": 40450
|
| 28326 |
+
},
|
| 28327 |
+
{
|
| 28328 |
+
"epoch": 0.07889592504302129,
|
| 28329 |
+
"grad_norm": 0.384765625,
|
| 28330 |
+
"learning_rate": 0.0004870092673446551,
|
| 28331 |
+
"loss": 2.0789,
|
| 28332 |
+
"step": 40460
|
| 28333 |
+
},
|
| 28334 |
+
{
|
| 28335 |
+
"epoch": 0.07891542477733741,
|
| 28336 |
+
"grad_norm": 0.421875,
|
| 28337 |
+
"learning_rate": 0.0004870060163292004,
|
| 28338 |
+
"loss": 2.0869,
|
| 28339 |
+
"step": 40470
|
| 28340 |
+
},
|
| 28341 |
+
{
|
| 28342 |
+
"epoch": 0.07893492451165353,
|
| 28343 |
+
"grad_norm": 0.396484375,
|
| 28344 |
+
"learning_rate": 0.00048700276531374575,
|
| 28345 |
+
"loss": 2.0973,
|
| 28346 |
+
"step": 40480
|
| 28347 |
+
},
|
| 28348 |
+
{
|
| 28349 |
+
"epoch": 0.07895442424596964,
|
| 28350 |
+
"grad_norm": 0.39453125,
|
| 28351 |
+
"learning_rate": 0.0004869995142982911,
|
| 28352 |
+
"loss": 2.0851,
|
| 28353 |
+
"step": 40490
|
| 28354 |
+
},
|
| 28355 |
+
{
|
| 28356 |
+
"epoch": 0.07897392398028577,
|
| 28357 |
+
"grad_norm": 0.40234375,
|
| 28358 |
+
"learning_rate": 0.0004869962632828364,
|
| 28359 |
+
"loss": 2.0964,
|
| 28360 |
+
"step": 40500
|
| 28361 |
}
|
| 28362 |
],
|
| 28363 |
"logging_steps": 10,
|
|
|
|
| 28377 |
"attributes": {}
|
| 28378 |
}
|
| 28379 |
},
|
| 28380 |
+
"total_flos": 1.3244470339766845e+19,
|
| 28381 |
"train_batch_size": 48,
|
| 28382 |
"trial_name": null,
|
| 28383 |
"trial_params": null
|