Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +156 -6
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 369133600
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35a10693e1787343ac352742f8056750f0ac3a12868daaf18b0918e27b34fa1d
|
| 3 |
size 369133600
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 738413771
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c242f7f75f01bdd5efda8f041fb44b200816c61d4350bc0f7d14d85cae7feb68
|
| 3 |
size 738413771
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4485c82a37971afa1989881a7c670bc00a34ca98f97cf8ff55f29491f3f7f15f
|
| 3 |
size 14645
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6a0ceb4b88e220a982844159be33a142d353b4617ed2e8cbafc7dcba9bd25b9
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-
|
| 5 |
-
"epoch": 7.
|
| 6 |
"eval_steps": 1,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2108,6 +2108,156 @@
|
|
| 2108 |
"eval_samples_per_second": 8.9,
|
| 2109 |
"eval_steps_per_second": 1.187,
|
| 2110 |
"step": 140
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2111 |
}
|
| 2112 |
],
|
| 2113 |
"logging_steps": 1,
|
|
@@ -2127,7 +2277,7 @@
|
|
| 2127 |
"attributes": {}
|
| 2128 |
}
|
| 2129 |
},
|
| 2130 |
-
"total_flos":
|
| 2131 |
"train_batch_size": 1,
|
| 2132 |
"trial_name": null,
|
| 2133 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 150,
|
| 3 |
+
"best_metric": 0.02963736467063427,
|
| 4 |
+
"best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-150",
|
| 5 |
+
"epoch": 7.894736842105263,
|
| 6 |
"eval_steps": 1,
|
| 7 |
+
"global_step": 150,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2108 |
"eval_samples_per_second": 8.9,
|
| 2109 |
"eval_steps_per_second": 1.187,
|
| 2110 |
"step": 140
|
| 2111 |
+
},
|
| 2112 |
+
{
|
| 2113 |
+
"epoch": 7.421052631578947,
|
| 2114 |
+
"grad_norm": 0.6074717044830322,
|
| 2115 |
+
"learning_rate": 0.0002249760770939754,
|
| 2116 |
+
"loss": 0.0494,
|
| 2117 |
+
"step": 141
|
| 2118 |
+
},
|
| 2119 |
+
{
|
| 2120 |
+
"epoch": 7.421052631578947,
|
| 2121 |
+
"eval_loss": 0.06366048008203506,
|
| 2122 |
+
"eval_runtime": 3.3644,
|
| 2123 |
+
"eval_samples_per_second": 8.917,
|
| 2124 |
+
"eval_steps_per_second": 1.189,
|
| 2125 |
+
"step": 141
|
| 2126 |
+
},
|
| 2127 |
+
{
|
| 2128 |
+
"epoch": 7.473684210526316,
|
| 2129 |
+
"grad_norm": 0.5519073009490967,
|
| 2130 |
+
"learning_rate": 0.0002216530770986795,
|
| 2131 |
+
"loss": 0.0569,
|
| 2132 |
+
"step": 142
|
| 2133 |
+
},
|
| 2134 |
+
{
|
| 2135 |
+
"epoch": 7.473684210526316,
|
| 2136 |
+
"eval_loss": 0.060868822038173676,
|
| 2137 |
+
"eval_runtime": 3.3772,
|
| 2138 |
+
"eval_samples_per_second": 8.883,
|
| 2139 |
+
"eval_steps_per_second": 1.184,
|
| 2140 |
+
"step": 142
|
| 2141 |
+
},
|
| 2142 |
+
{
|
| 2143 |
+
"epoch": 7.526315789473684,
|
| 2144 |
+
"grad_norm": 0.5936044454574585,
|
| 2145 |
+
"learning_rate": 0.0002183351430834358,
|
| 2146 |
+
"loss": 0.0529,
|
| 2147 |
+
"step": 143
|
| 2148 |
+
},
|
| 2149 |
+
{
|
| 2150 |
+
"epoch": 7.526315789473684,
|
| 2151 |
+
"eval_loss": 0.060183968394994736,
|
| 2152 |
+
"eval_runtime": 3.3842,
|
| 2153 |
+
"eval_samples_per_second": 8.865,
|
| 2154 |
+
"eval_steps_per_second": 1.182,
|
| 2155 |
+
"step": 143
|
| 2156 |
+
},
|
| 2157 |
+
{
|
| 2158 |
+
"epoch": 7.578947368421053,
|
| 2159 |
+
"grad_norm": 0.4775611162185669,
|
| 2160 |
+
"learning_rate": 0.0002150228680081079,
|
| 2161 |
+
"loss": 0.0606,
|
| 2162 |
+
"step": 144
|
| 2163 |
+
},
|
| 2164 |
+
{
|
| 2165 |
+
"epoch": 7.578947368421053,
|
| 2166 |
+
"eval_loss": 0.060147836804389954,
|
| 2167 |
+
"eval_runtime": 3.377,
|
| 2168 |
+
"eval_samples_per_second": 8.884,
|
| 2169 |
+
"eval_steps_per_second": 1.184,
|
| 2170 |
+
"step": 144
|
| 2171 |
+
},
|
| 2172 |
+
{
|
| 2173 |
+
"epoch": 7.631578947368421,
|
| 2174 |
+
"grad_norm": 0.5507313013076782,
|
| 2175 |
+
"learning_rate": 0.00021171684382123,
|
| 2176 |
+
"loss": 0.0515,
|
| 2177 |
+
"step": 145
|
| 2178 |
+
},
|
| 2179 |
+
{
|
| 2180 |
+
"epoch": 7.631578947368421,
|
| 2181 |
+
"eval_loss": 0.05933203548192978,
|
| 2182 |
+
"eval_runtime": 3.3739,
|
| 2183 |
+
"eval_samples_per_second": 8.892,
|
| 2184 |
+
"eval_steps_per_second": 1.186,
|
| 2185 |
+
"step": 145
|
| 2186 |
+
},
|
| 2187 |
+
{
|
| 2188 |
+
"epoch": 7.684210526315789,
|
| 2189 |
+
"grad_norm": 0.6271359324455261,
|
| 2190 |
+
"learning_rate": 0.0002084176613542175,
|
| 2191 |
+
"loss": 0.0768,
|
| 2192 |
+
"step": 146
|
| 2193 |
+
},
|
| 2194 |
+
{
|
| 2195 |
+
"epoch": 7.684210526315789,
|
| 2196 |
+
"eval_loss": 0.05137402191758156,
|
| 2197 |
+
"eval_runtime": 3.3705,
|
| 2198 |
+
"eval_samples_per_second": 8.901,
|
| 2199 |
+
"eval_steps_per_second": 1.187,
|
| 2200 |
+
"step": 146
|
| 2201 |
+
},
|
| 2202 |
+
{
|
| 2203 |
+
"epoch": 7.7368421052631575,
|
| 2204 |
+
"grad_norm": 0.511416494846344,
|
| 2205 |
+
"learning_rate": 0.00020512591021577773,
|
| 2206 |
+
"loss": 0.0559,
|
| 2207 |
+
"step": 147
|
| 2208 |
+
},
|
| 2209 |
+
{
|
| 2210 |
+
"epoch": 7.7368421052631575,
|
| 2211 |
+
"eval_loss": 0.04617203772068024,
|
| 2212 |
+
"eval_runtime": 3.3685,
|
| 2213 |
+
"eval_samples_per_second": 8.906,
|
| 2214 |
+
"eval_steps_per_second": 1.187,
|
| 2215 |
+
"step": 147
|
| 2216 |
+
},
|
| 2217 |
+
{
|
| 2218 |
+
"epoch": 7.7894736842105265,
|
| 2219 |
+
"grad_norm": 0.43504372239112854,
|
| 2220 |
+
"learning_rate": 0.00020184217868653867,
|
| 2221 |
+
"loss": 0.0495,
|
| 2222 |
+
"step": 148
|
| 2223 |
+
},
|
| 2224 |
+
{
|
| 2225 |
+
"epoch": 7.7894736842105265,
|
| 2226 |
+
"eval_loss": 0.039726559072732925,
|
| 2227 |
+
"eval_runtime": 3.3679,
|
| 2228 |
+
"eval_samples_per_second": 8.907,
|
| 2229 |
+
"eval_steps_per_second": 1.188,
|
| 2230 |
+
"step": 148
|
| 2231 |
+
},
|
| 2232 |
+
{
|
| 2233 |
+
"epoch": 7.842105263157895,
|
| 2234 |
+
"grad_norm": 0.5811814069747925,
|
| 2235 |
+
"learning_rate": 0.0001985670536139151,
|
| 2236 |
+
"loss": 0.0741,
|
| 2237 |
+
"step": 149
|
| 2238 |
+
},
|
| 2239 |
+
{
|
| 2240 |
+
"epoch": 7.842105263157895,
|
| 2241 |
+
"eval_loss": 0.03227859362959862,
|
| 2242 |
+
"eval_runtime": 3.3641,
|
| 2243 |
+
"eval_samples_per_second": 8.918,
|
| 2244 |
+
"eval_steps_per_second": 1.189,
|
| 2245 |
+
"step": 149
|
| 2246 |
+
},
|
| 2247 |
+
{
|
| 2248 |
+
"epoch": 7.894736842105263,
|
| 2249 |
+
"grad_norm": 0.41861817240715027,
|
| 2250 |
+
"learning_rate": 0.0001953011203072312,
|
| 2251 |
+
"loss": 0.0477,
|
| 2252 |
+
"step": 150
|
| 2253 |
+
},
|
| 2254 |
+
{
|
| 2255 |
+
"epoch": 7.894736842105263,
|
| 2256 |
+
"eval_loss": 0.02963736467063427,
|
| 2257 |
+
"eval_runtime": 3.3713,
|
| 2258 |
+
"eval_samples_per_second": 8.899,
|
| 2259 |
+
"eval_steps_per_second": 1.186,
|
| 2260 |
+
"step": 150
|
| 2261 |
}
|
| 2262 |
],
|
| 2263 |
"logging_steps": 1,
|
|
|
|
| 2277 |
"attributes": {}
|
| 2278 |
}
|
| 2279 |
},
|
| 2280 |
+
"total_flos": 6215871296870400.0,
|
| 2281 |
"train_batch_size": 1,
|
| 2282 |
"trial_name": null,
|
| 2283 |
"trial_params": null
|