robertou2 commited on
Commit
d504359
·
verified ·
1 Parent(s): ff8beda

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60cddb0b8a51baae7ac39f5c131c6e5d5ca1923905a0722867d9c0ecf9871b58
3
  size 369133600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35a10693e1787343ac352742f8056750f0ac3a12868daaf18b0918e27b34fa1d
3
  size 369133600
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9588fde0e17ea6c2c5918e5ed6b3717a4c29046a1c9c3d836cd39d4bb1689c37
3
  size 738413771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c242f7f75f01bdd5efda8f041fb44b200816c61d4350bc0f7d14d85cae7feb68
3
  size 738413771
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dac12bb05bd98136abee56099db4e75eadb476f00ee51aa0f7db3f3de1cccf51
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4485c82a37971afa1989881a7c670bc00a34ca98f97cf8ff55f29491f3f7f15f
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:432fe31cc8feaadc988ff87816d3eb23d869c5008676f20a3367d6de19e5cf4c
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6a0ceb4b88e220a982844159be33a142d353b4617ed2e8cbafc7dcba9bd25b9
3
  size 1465
trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 140,
3
- "best_metric": 0.06147347763180733,
4
- "best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-140",
5
- "epoch": 7.368421052631579,
6
  "eval_steps": 1,
7
- "global_step": 140,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2108,6 +2108,156 @@
2108
  "eval_samples_per_second": 8.9,
2109
  "eval_steps_per_second": 1.187,
2110
  "step": 140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2111
  }
2112
  ],
2113
  "logging_steps": 1,
@@ -2127,7 +2277,7 @@
2127
  "attributes": {}
2128
  }
2129
  },
2130
- "total_flos": 5806655876229120.0,
2131
  "train_batch_size": 1,
2132
  "trial_name": null,
2133
  "trial_params": null
 
1
  {
2
+ "best_global_step": 150,
3
+ "best_metric": 0.02963736467063427,
4
+ "best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-150",
5
+ "epoch": 7.894736842105263,
6
  "eval_steps": 1,
7
+ "global_step": 150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2108
  "eval_samples_per_second": 8.9,
2109
  "eval_steps_per_second": 1.187,
2110
  "step": 140
2111
+ },
2112
+ {
2113
+ "epoch": 7.421052631578947,
2114
+ "grad_norm": 0.6074717044830322,
2115
+ "learning_rate": 0.0002249760770939754,
2116
+ "loss": 0.0494,
2117
+ "step": 141
2118
+ },
2119
+ {
2120
+ "epoch": 7.421052631578947,
2121
+ "eval_loss": 0.06366048008203506,
2122
+ "eval_runtime": 3.3644,
2123
+ "eval_samples_per_second": 8.917,
2124
+ "eval_steps_per_second": 1.189,
2125
+ "step": 141
2126
+ },
2127
+ {
2128
+ "epoch": 7.473684210526316,
2129
+ "grad_norm": 0.5519073009490967,
2130
+ "learning_rate": 0.0002216530770986795,
2131
+ "loss": 0.0569,
2132
+ "step": 142
2133
+ },
2134
+ {
2135
+ "epoch": 7.473684210526316,
2136
+ "eval_loss": 0.060868822038173676,
2137
+ "eval_runtime": 3.3772,
2138
+ "eval_samples_per_second": 8.883,
2139
+ "eval_steps_per_second": 1.184,
2140
+ "step": 142
2141
+ },
2142
+ {
2143
+ "epoch": 7.526315789473684,
2144
+ "grad_norm": 0.5936044454574585,
2145
+ "learning_rate": 0.0002183351430834358,
2146
+ "loss": 0.0529,
2147
+ "step": 143
2148
+ },
2149
+ {
2150
+ "epoch": 7.526315789473684,
2151
+ "eval_loss": 0.060183968394994736,
2152
+ "eval_runtime": 3.3842,
2153
+ "eval_samples_per_second": 8.865,
2154
+ "eval_steps_per_second": 1.182,
2155
+ "step": 143
2156
+ },
2157
+ {
2158
+ "epoch": 7.578947368421053,
2159
+ "grad_norm": 0.4775611162185669,
2160
+ "learning_rate": 0.0002150228680081079,
2161
+ "loss": 0.0606,
2162
+ "step": 144
2163
+ },
2164
+ {
2165
+ "epoch": 7.578947368421053,
2166
+ "eval_loss": 0.060147836804389954,
2167
+ "eval_runtime": 3.377,
2168
+ "eval_samples_per_second": 8.884,
2169
+ "eval_steps_per_second": 1.184,
2170
+ "step": 144
2171
+ },
2172
+ {
2173
+ "epoch": 7.631578947368421,
2174
+ "grad_norm": 0.5507313013076782,
2175
+ "learning_rate": 0.00021171684382123,
2176
+ "loss": 0.0515,
2177
+ "step": 145
2178
+ },
2179
+ {
2180
+ "epoch": 7.631578947368421,
2181
+ "eval_loss": 0.05933203548192978,
2182
+ "eval_runtime": 3.3739,
2183
+ "eval_samples_per_second": 8.892,
2184
+ "eval_steps_per_second": 1.186,
2185
+ "step": 145
2186
+ },
2187
+ {
2188
+ "epoch": 7.684210526315789,
2189
+ "grad_norm": 0.6271359324455261,
2190
+ "learning_rate": 0.0002084176613542175,
2191
+ "loss": 0.0768,
2192
+ "step": 146
2193
+ },
2194
+ {
2195
+ "epoch": 7.684210526315789,
2196
+ "eval_loss": 0.05137402191758156,
2197
+ "eval_runtime": 3.3705,
2198
+ "eval_samples_per_second": 8.901,
2199
+ "eval_steps_per_second": 1.187,
2200
+ "step": 146
2201
+ },
2202
+ {
2203
+ "epoch": 7.7368421052631575,
2204
+ "grad_norm": 0.511416494846344,
2205
+ "learning_rate": 0.00020512591021577773,
2206
+ "loss": 0.0559,
2207
+ "step": 147
2208
+ },
2209
+ {
2210
+ "epoch": 7.7368421052631575,
2211
+ "eval_loss": 0.04617203772068024,
2212
+ "eval_runtime": 3.3685,
2213
+ "eval_samples_per_second": 8.906,
2214
+ "eval_steps_per_second": 1.187,
2215
+ "step": 147
2216
+ },
2217
+ {
2218
+ "epoch": 7.7894736842105265,
2219
+ "grad_norm": 0.43504372239112854,
2220
+ "learning_rate": 0.00020184217868653867,
2221
+ "loss": 0.0495,
2222
+ "step": 148
2223
+ },
2224
+ {
2225
+ "epoch": 7.7894736842105265,
2226
+ "eval_loss": 0.039726559072732925,
2227
+ "eval_runtime": 3.3679,
2228
+ "eval_samples_per_second": 8.907,
2229
+ "eval_steps_per_second": 1.188,
2230
+ "step": 148
2231
+ },
2232
+ {
2233
+ "epoch": 7.842105263157895,
2234
+ "grad_norm": 0.5811814069747925,
2235
+ "learning_rate": 0.0001985670536139151,
2236
+ "loss": 0.0741,
2237
+ "step": 149
2238
+ },
2239
+ {
2240
+ "epoch": 7.842105263157895,
2241
+ "eval_loss": 0.03227859362959862,
2242
+ "eval_runtime": 3.3641,
2243
+ "eval_samples_per_second": 8.918,
2244
+ "eval_steps_per_second": 1.189,
2245
+ "step": 149
2246
+ },
2247
+ {
2248
+ "epoch": 7.894736842105263,
2249
+ "grad_norm": 0.41861817240715027,
2250
+ "learning_rate": 0.0001953011203072312,
2251
+ "loss": 0.0477,
2252
+ "step": 150
2253
+ },
2254
+ {
2255
+ "epoch": 7.894736842105263,
2256
+ "eval_loss": 0.02963736467063427,
2257
+ "eval_runtime": 3.3713,
2258
+ "eval_samples_per_second": 8.899,
2259
+ "eval_steps_per_second": 1.186,
2260
+ "step": 150
2261
  }
2262
  ],
2263
  "logging_steps": 1,
 
2277
  "attributes": {}
2278
  }
2279
  },
2280
+ "total_flos": 6215871296870400.0,
2281
  "train_batch_size": 1,
2282
  "trial_name": null,
2283
  "trial_params": null