SystemAdmin123 commited on
Commit
13d44d1
·
verified ·
1 Parent(s): a217fff

Training in progress, step 3160, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccc31b325ea5e97de26962f9c363d36c8f7bcc9b2c483acfd04d8b25473fb030
3
  size 778096664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c78c8e751c48b040746199c31e75d4feef561d8684dffb5fcab27976fb39c540
3
  size 778096664
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6714b7aef17f2794052715fb1d2a91d01de4d8f1caf070386b5c9090d1706b18
3
  size 395561780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c99fe2c15f38fcdc70c5b58aeb7a136875edf29121b459165ec343854635f422
3
  size 395561780
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27aa5a04d0e71b37e8a012539a9ef09b35c45f10549e842573fcadd937bdbda6
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3928ccf2665d8a8f3c5df0c8d51a1eb19cd2344599c97853e6f5d312bbb405f
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:935ce3c1f9a7c99c1a2b10dd4d0a70e52329d509a97359b0a5e2dd28db2a0c7e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7592e1462fd4ef7cdf5cfb2001145c35bf845f4d92dab45be159a6b9ee3ebede
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d4fa2b8adc56b8f314255d4d4174130680b175366a89e510095e6ebf1d963bb
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08890ef963c10a9a7237efd117c8a94e5bd2fcf389061bead891541b89553427
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9f58c145b7dd21968ed9e8d6c7e6466a650ad919a266febaa45d71dd3b08d06
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3913c6f063ebbf405426bd1b09789ed429fc38cb3d4f60751f4a61b49df23be0
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28319ed5c95ef1c1c3a582aaf767f4eabd6950a5f91539e533e9bb21d7f70a19
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51411024d8c787642cc454881d33eeea730ff1d5a51ba77cdf08351edff87c13
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.23509933774834438,
5
  "eval_steps": 320,
6
- "global_step": 2840,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2067,6 +2067,238 @@
2067
  "learning_rate": 3.420445597436056e-06,
2068
  "loss": 1.4729,
2069
  "step": 2840
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2070
  }
2071
  ],
2072
  "logging_steps": 10,
@@ -2086,7 +2318,7 @@
2086
  "attributes": {}
2087
  }
2088
  },
2089
- "total_flos": 1.2618773495157883e+18,
2090
  "train_batch_size": 3,
2091
  "trial_name": null,
2092
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.26158940397350994,
5
  "eval_steps": 320,
6
+ "global_step": 3160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2067
  "learning_rate": 3.420445597436056e-06,
2068
  "loss": 1.4729,
2069
  "step": 2840
2070
+ },
2071
+ {
2072
+ "epoch": 0.2359271523178808,
2073
+ "grad_norm": 0.680798351764679,
2074
+ "learning_rate": 3.2351082041623125e-06,
2075
+ "loss": 1.5296,
2076
+ "step": 2850
2077
+ },
2078
+ {
2079
+ "epoch": 0.2367549668874172,
2080
+ "grad_norm": 0.7416069507598877,
2081
+ "learning_rate": 3.054765042128521e-06,
2082
+ "loss": 1.4074,
2083
+ "step": 2860
2084
+ },
2085
+ {
2086
+ "epoch": 0.23758278145695363,
2087
+ "grad_norm": 1.013679027557373,
2088
+ "learning_rate": 2.8794353709848178e-06,
2089
+ "loss": 1.5031,
2090
+ "step": 2870
2091
+ },
2092
+ {
2093
+ "epoch": 0.23841059602649006,
2094
+ "grad_norm": 0.684421956539154,
2095
+ "learning_rate": 2.7091379149682685e-06,
2096
+ "loss": 1.4008,
2097
+ "step": 2880
2098
+ },
2099
+ {
2100
+ "epoch": 0.23841059602649006,
2101
+ "eval_loss": 1.778387188911438,
2102
+ "eval_runtime": 379.3466,
2103
+ "eval_samples_per_second": 45.721,
2104
+ "eval_steps_per_second": 3.812,
2105
+ "step": 2880
2106
+ },
2107
+ {
2108
+ "epoch": 0.2392384105960265,
2109
+ "grad_norm": 0.6681047081947327,
2110
+ "learning_rate": 2.5438908609032698e-06,
2111
+ "loss": 1.37,
2112
+ "step": 2890
2113
+ },
2114
+ {
2115
+ "epoch": 0.24006622516556292,
2116
+ "grad_norm": 0.5961506366729736,
2117
+ "learning_rate": 2.3837118562592797e-06,
2118
+ "loss": 1.4046,
2119
+ "step": 2900
2120
+ },
2121
+ {
2122
+ "epoch": 0.24089403973509935,
2123
+ "grad_norm": 0.5972995758056641,
2124
+ "learning_rate": 2.22861800726617e-06,
2125
+ "loss": 1.5898,
2126
+ "step": 2910
2127
+ },
2128
+ {
2129
+ "epoch": 0.24172185430463577,
2130
+ "grad_norm": 0.7544158697128296,
2131
+ "learning_rate": 2.0786258770873647e-06,
2132
+ "loss": 1.4093,
2133
+ "step": 2920
2134
+ },
2135
+ {
2136
+ "epoch": 0.24254966887417218,
2137
+ "grad_norm": 0.5568986535072327,
2138
+ "learning_rate": 1.933751484051027e-06,
2139
+ "loss": 1.5618,
2140
+ "step": 2930
2141
+ },
2142
+ {
2143
+ "epoch": 0.2433774834437086,
2144
+ "grad_norm": 0.6362126469612122,
2145
+ "learning_rate": 1.7940102999393194e-06,
2146
+ "loss": 1.4395,
2147
+ "step": 2940
2148
+ },
2149
+ {
2150
+ "epoch": 0.24420529801324503,
2151
+ "grad_norm": 0.6483564376831055,
2152
+ "learning_rate": 1.6594172483361758e-06,
2153
+ "loss": 1.3861,
2154
+ "step": 2950
2155
+ },
2156
+ {
2157
+ "epoch": 0.24503311258278146,
2158
+ "grad_norm": 0.6191815137863159,
2159
+ "learning_rate": 1.5299867030334814e-06,
2160
+ "loss": 1.4288,
2161
+ "step": 2960
2162
+ },
2163
+ {
2164
+ "epoch": 0.2458609271523179,
2165
+ "grad_norm": 0.7167540788650513,
2166
+ "learning_rate": 1.4057324864960975e-06,
2167
+ "loss": 1.4678,
2168
+ "step": 2970
2169
+ },
2170
+ {
2171
+ "epoch": 0.24668874172185432,
2172
+ "grad_norm": 0.7405688762664795,
2173
+ "learning_rate": 1.286667868385627e-06,
2174
+ "loss": 1.3775,
2175
+ "step": 2980
2176
+ },
2177
+ {
2178
+ "epoch": 0.24751655629139072,
2179
+ "grad_norm": 0.7301118969917297,
2180
+ "learning_rate": 1.172805564143359e-06,
2181
+ "loss": 1.5456,
2182
+ "step": 2990
2183
+ },
2184
+ {
2185
+ "epoch": 0.24834437086092714,
2186
+ "grad_norm": 0.8688226938247681,
2187
+ "learning_rate": 1.064157733632276e-06,
2188
+ "loss": 1.4586,
2189
+ "step": 3000
2190
+ },
2191
+ {
2192
+ "epoch": 0.24917218543046357,
2193
+ "grad_norm": 0.7074631452560425,
2194
+ "learning_rate": 9.607359798384785e-07,
2195
+ "loss": 1.4873,
2196
+ "step": 3010
2197
+ },
2198
+ {
2199
+ "epoch": 0.25,
2200
+ "grad_norm": 0.7328817844390869,
2201
+ "learning_rate": 8.62551347632029e-07,
2202
+ "loss": 1.4613,
2203
+ "step": 3020
2204
+ },
2205
+ {
2206
+ "epoch": 0.2508278145695364,
2207
+ "grad_norm": 0.7635940909385681,
2208
+ "learning_rate": 7.696143225874475e-07,
2209
+ "loss": 1.4208,
2210
+ "step": 3030
2211
+ },
2212
+ {
2213
+ "epoch": 0.25165562913907286,
2214
+ "grad_norm": 0.6220166683197021,
2215
+ "learning_rate": 6.819348298638839e-07,
2216
+ "loss": 1.4609,
2217
+ "step": 3040
2218
+ },
2219
+ {
2220
+ "epoch": 0.25248344370860926,
2221
+ "grad_norm": 0.7133238911628723,
2222
+ "learning_rate": 5.995222331451722e-07,
2223
+ "loss": 1.4993,
2224
+ "step": 3050
2225
+ },
2226
+ {
2227
+ "epoch": 0.2533112582781457,
2228
+ "grad_norm": 0.697382390499115,
2229
+ "learning_rate": 5.223853336398632e-07,
2230
+ "loss": 1.4244,
2231
+ "step": 3060
2232
+ },
2233
+ {
2234
+ "epoch": 0.2541390728476821,
2235
+ "grad_norm": 0.7835958003997803,
2236
+ "learning_rate": 4.505323691412711e-07,
2237
+ "loss": 1.5133,
2238
+ "step": 3070
2239
+ },
2240
+ {
2241
+ "epoch": 0.25496688741721857,
2242
+ "grad_norm": 0.6556974053382874,
2243
+ "learning_rate": 3.839710131477492e-07,
2244
+ "loss": 1.4107,
2245
+ "step": 3080
2246
+ },
2247
+ {
2248
+ "epoch": 0.25579470198675497,
2249
+ "grad_norm": 0.8099976181983948,
2250
+ "learning_rate": 3.2270837404318464e-07,
2251
+ "loss": 1.4708,
2252
+ "step": 3090
2253
+ },
2254
+ {
2255
+ "epoch": 0.25662251655629137,
2256
+ "grad_norm": 0.7530120015144348,
2257
+ "learning_rate": 2.667509943378721e-07,
2258
+ "loss": 1.4437,
2259
+ "step": 3100
2260
+ },
2261
+ {
2262
+ "epoch": 0.2574503311258278,
2263
+ "grad_norm": 0.6962845921516418,
2264
+ "learning_rate": 2.161048499698115e-07,
2265
+ "loss": 1.504,
2266
+ "step": 3110
2267
+ },
2268
+ {
2269
+ "epoch": 0.2582781456953642,
2270
+ "grad_norm": 0.6753128170967102,
2271
+ "learning_rate": 1.7077534966650766e-07,
2272
+ "loss": 1.4186,
2273
+ "step": 3120
2274
+ },
2275
+ {
2276
+ "epoch": 0.2591059602649007,
2277
+ "grad_norm": 0.6573652625083923,
2278
+ "learning_rate": 1.3076733436734322e-07,
2279
+ "loss": 1.4241,
2280
+ "step": 3130
2281
+ },
2282
+ {
2283
+ "epoch": 0.2599337748344371,
2284
+ "grad_norm": 0.6068715453147888,
2285
+ "learning_rate": 9.60850767065924e-08,
2286
+ "loss": 1.3682,
2287
+ "step": 3140
2288
+ },
2289
+ {
2290
+ "epoch": 0.26076158940397354,
2291
+ "grad_norm": 0.7726837396621704,
2292
+ "learning_rate": 6.673228055715241e-08,
2293
+ "loss": 1.5453,
2294
+ "step": 3150
2295
+ },
2296
+ {
2297
+ "epoch": 0.26158940397350994,
2298
+ "grad_norm": 0.6201027035713196,
2299
+ "learning_rate": 4.2712080634949024e-08,
2300
+ "loss": 1.4545,
2301
+ "step": 3160
2302
  }
2303
  ],
2304
  "logging_steps": 10,
 
2318
  "attributes": {}
2319
  }
2320
  },
2321
+ "total_flos": 1.404060712841511e+18,
2322
  "train_batch_size": 3,
2323
  "trial_name": null,
2324
  "trial_params": null