NBAmine commited on
Commit
f58ec24
·
verified ·
1 Parent(s): 9f8bab3

Training in progress, step 2100, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -29,13 +29,13 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "k_proj",
33
- "down_proj",
34
- "o_proj",
35
- "q_proj",
36
  "v_proj",
 
37
  "gate_proj",
38
- "up_proj"
 
 
 
39
  ],
40
  "target_parameters": null,
41
  "task_type": "CAUSAL_LM",
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
 
 
 
 
32
  "v_proj",
33
+ "k_proj",
34
  "gate_proj",
35
+ "o_proj",
36
+ "up_proj",
37
+ "down_proj",
38
+ "q_proj"
39
  ],
40
  "target_parameters": null,
41
  "task_type": "CAUSAL_LM",
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:875e11864c60557b1ce9d0f4a3628b1921ba20dcfcb047f1194317ca21dd647e
3
  size 228140600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9e46ae2720088669da0e7f9e660e9df21b3f13cd814ef2c054173a76a40c0a8
3
  size 228140600
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de1ef9fce3501f8a10d1279e16882931ece02414376645b57e1c3a181bf8a440
3
  size 117931203
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76b3121a237388d42068dd86668509dc36abd8695d8ccbfd6fb7b924e1a73d7f
3
  size 117931203
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9eb46347e03fd2a32788474d53b64aa40655ea04df926d70dd4416068652168
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6df16b3659f33d85607b74fb7cdd42ccb03ca1d0dc5313a9352883e092924860
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61bb68517c2e5d425f2cd920b30f02d4e60fd1e393f4dd6c263b9f530746bef3
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ed5fdd6f9fe5f0de5d43635eeeee3253ccf660833d7fe6d9be640b40bec6bbe
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cbbe8c194b3272da66f1fba8ab4ba395d75f317a59ad44137b928cbb13dbc0e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc835731ce73222513c24c9953cdc95225ff0e18509f3befa431f270d3d03450
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 750,
3
  "best_metric": 0.5089643597602844,
4
  "best_model_checkpoint": "./adapter-phase1/checkpoint-750",
5
- "epoch": 3.2,
6
  "eval_steps": 300,
7
- "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2140,6 +2140,118 @@
2140
  "eval_samples_per_second": 2.036,
2141
  "eval_steps_per_second": 0.509,
2142
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2143
  }
2144
  ],
2145
  "logging_steps": 10,
@@ -2159,7 +2271,7 @@
2159
  "attributes": {}
2160
  }
2161
  },
2162
- "total_flos": 3.452158742886605e+17,
2163
  "train_batch_size": 1,
2164
  "trial_name": null,
2165
  "trial_params": null
 
2
  "best_global_step": 750,
3
  "best_metric": 0.5089643597602844,
4
  "best_model_checkpoint": "./adapter-phase1/checkpoint-750",
5
+ "epoch": 3.36,
6
  "eval_steps": 300,
7
+ "global_step": 2100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2140
  "eval_samples_per_second": 2.036,
2141
  "eval_steps_per_second": 0.509,
2142
  "step": 2000
2143
+ },
2144
+ {
2145
+ "entropy": 0.3617474281229079,
2146
+ "epoch": 3.216,
2147
+ "grad_norm": 0.7705036997795105,
2148
+ "learning_rate": 3.5744e-05,
2149
+ "loss": 0.3175,
2150
+ "mean_token_accuracy": 0.9062783475965261,
2151
+ "num_tokens": 20779.0,
2152
+ "step": 2010
2153
+ },
2154
+ {
2155
+ "entropy": 0.3887558562681079,
2156
+ "epoch": 3.232,
2157
+ "grad_norm": 0.9926668405532837,
2158
+ "learning_rate": 3.5424e-05,
2159
+ "loss": 0.3243,
2160
+ "mean_token_accuracy": 0.9048940639942884,
2161
+ "num_tokens": 37039.0,
2162
+ "step": 2020
2163
+ },
2164
+ {
2165
+ "entropy": 0.36308987056836484,
2166
+ "epoch": 3.248,
2167
+ "grad_norm": 0.5336251258850098,
2168
+ "learning_rate": 3.5104e-05,
2169
+ "loss": 0.3286,
2170
+ "mean_token_accuracy": 0.9028704173862934,
2171
+ "num_tokens": 66230.0,
2172
+ "step": 2030
2173
+ },
2174
+ {
2175
+ "entropy": 0.3100855226628482,
2176
+ "epoch": 3.2640000000000002,
2177
+ "grad_norm": 0.6235008239746094,
2178
+ "learning_rate": 3.4784e-05,
2179
+ "loss": 0.3026,
2180
+ "mean_token_accuracy": 0.9074051853269338,
2181
+ "num_tokens": 98315.0,
2182
+ "step": 2040
2183
+ },
2184
+ {
2185
+ "entropy": 0.33463340234011413,
2186
+ "epoch": 3.2800000000000002,
2187
+ "grad_norm": 0.6380220651626587,
2188
+ "learning_rate": 3.4464e-05,
2189
+ "loss": 0.3058,
2190
+ "mean_token_accuracy": 0.9115277793258428,
2191
+ "num_tokens": 123538.0,
2192
+ "step": 2050
2193
+ },
2194
+ {
2195
+ "entropy": 0.3619419479742646,
2196
+ "epoch": 3.296,
2197
+ "grad_norm": 0.7604582905769348,
2198
+ "learning_rate": 3.4144000000000004e-05,
2199
+ "loss": 0.3112,
2200
+ "mean_token_accuracy": 0.9084025923162699,
2201
+ "num_tokens": 143855.0,
2202
+ "step": 2060
2203
+ },
2204
+ {
2205
+ "entropy": 0.3980453579686582,
2206
+ "epoch": 3.312,
2207
+ "grad_norm": 0.8576037883758545,
2208
+ "learning_rate": 3.3824e-05,
2209
+ "loss": 0.3267,
2210
+ "mean_token_accuracy": 0.9037791218608617,
2211
+ "num_tokens": 159314.0,
2212
+ "step": 2070
2213
+ },
2214
+ {
2215
+ "entropy": 0.35077386572957037,
2216
+ "epoch": 3.328,
2217
+ "grad_norm": 0.5504621863365173,
2218
+ "learning_rate": 3.3504e-05,
2219
+ "loss": 0.3004,
2220
+ "mean_token_accuracy": 0.9084354028105736,
2221
+ "num_tokens": 187464.0,
2222
+ "step": 2080
2223
+ },
2224
+ {
2225
+ "entropy": 0.28209723997861147,
2226
+ "epoch": 3.344,
2227
+ "grad_norm": 0.8361979126930237,
2228
+ "learning_rate": 3.3184000000000006e-05,
2229
+ "loss": 0.2903,
2230
+ "mean_token_accuracy": 0.9112230580300092,
2231
+ "num_tokens": 219657.0,
2232
+ "step": 2090
2233
+ },
2234
+ {
2235
+ "entropy": 0.3153431011363864,
2236
+ "epoch": 3.36,
2237
+ "grad_norm": 0.6275749802589417,
2238
+ "learning_rate": 3.2864e-05,
2239
+ "loss": 0.2894,
2240
+ "mean_token_accuracy": 0.9114996068179607,
2241
+ "num_tokens": 245396.0,
2242
+ "step": 2100
2243
+ },
2244
+ {
2245
+ "epoch": 3.36,
2246
+ "eval_accuracy": 0.026501569905019107,
2247
+ "eval_entropy": 0.4113759865760803,
2248
+ "eval_loss": 0.541074275970459,
2249
+ "eval_mean_token_accuracy": 0.8583663606643677,
2250
+ "eval_num_tokens": 245396.0,
2251
+ "eval_runtime": 869.6626,
2252
+ "eval_samples_per_second": 2.3,
2253
+ "eval_steps_per_second": 0.575,
2254
+ "step": 2100
2255
  }
2256
  ],
2257
  "logging_steps": 10,
 
2271
  "attributes": {}
2272
  }
2273
  },
2274
+ "total_flos": 3.6234506980141056e+17,
2275
  "train_batch_size": 1,
2276
  "trial_name": null,
2277
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1429883e942c6b0b337507f32bd2f9e9b227b047e8eeab1d3295f008840498d1
3
  size 6353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc0c9c43aae96575e8afc416e967ac5674d13cc1a38c487b69cd4534aafef005
3
  size 6353