a-ord19 commited on
Commit
f4ea563
·
verified ·
1 Parent(s): 5f9c26c

End of training

Browse files
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 2.2506963788300833,
3
- "eval_entropy": 0.4378464198247953,
4
- "eval_loss": 0.43301182985305786,
5
- "eval_mean_token_accuracy": 0.8880459434026248,
6
- "eval_num_tokens": 14684160.0,
7
- "eval_runtime": 1046.0416,
8
- "eval_samples_per_second": 2.353,
9
  "eval_steps_per_second": 0.294,
10
- "total_flos": 6.266922890113843e+17,
11
- "train_loss": 0.04690098939118562,
12
- "train_runtime": 4798.2939,
13
- "train_samples_per_second": 7.18,
14
  "train_steps_per_second": 0.15
15
  }
 
1
  {
2
+ "epoch": 2.501392757660167,
3
+ "eval_entropy": 0.43805627868353547,
4
+ "eval_loss": 0.4330117106437683,
5
+ "eval_mean_token_accuracy": 0.887996860912868,
6
+ "eval_num_tokens": 16158720.0,
7
+ "eval_runtime": 1046.2012,
8
+ "eval_samples_per_second": 2.352,
9
  "eval_steps_per_second": 0.294,
10
+ "total_flos": 6.896237322593894e+17,
11
+ "train_loss": 0.04248945871988932,
12
+ "train_runtime": 4807.6761,
13
+ "train_samples_per_second": 7.166,
14
  "train_steps_per_second": 0.15
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 2.2506963788300833,
3
- "eval_entropy": 0.4378464198247953,
4
- "eval_loss": 0.43301182985305786,
5
- "eval_mean_token_accuracy": 0.8880459434026248,
6
- "eval_num_tokens": 14684160.0,
7
- "eval_runtime": 1046.0416,
8
- "eval_samples_per_second": 2.353,
9
  "eval_steps_per_second": 0.294
10
  }
 
1
  {
2
+ "epoch": 2.501392757660167,
3
+ "eval_entropy": 0.43805627868353547,
4
+ "eval_loss": 0.4330117106437683,
5
+ "eval_mean_token_accuracy": 0.887996860912868,
6
+ "eval_num_tokens": 16158720.0,
7
+ "eval_runtime": 1046.2012,
8
+ "eval_samples_per_second": 2.352,
9
  "eval_steps_per_second": 0.294
10
  }
runs/Oct27_22-21-52_7e534512710f/events.out.tfevents.1761658635.7e534512710f.2586.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d2587d42020e9994d2db4b9b718fd697cad9b722ed51835f797fc7f5ad3c9bc
3
+ size 527
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.2506963788300833,
3
- "total_flos": 6.266922890113843e+17,
4
- "train_loss": 0.04690098939118562,
5
- "train_runtime": 4798.2939,
6
- "train_samples_per_second": 7.18,
7
  "train_steps_per_second": 0.15
8
  }
 
1
  {
2
+ "epoch": 2.501392757660167,
3
+ "total_flos": 6.896237322593894e+17,
4
+ "train_loss": 0.04248945871988932,
5
+ "train_runtime": 4807.6761,
6
+ "train_samples_per_second": 7.166,
7
  "train_steps_per_second": 0.15
8
  }
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 540,
3
  "best_metric": 0.43301182985305786,
4
  "best_model_checkpoint": "Mistral-7B-v0.1/r4/checkpoint-540",
5
- "epoch": 2.2506963788300833,
6
  "eval_steps": 60,
7
- "global_step": 540,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -199,12 +199,33 @@
199
  "step": 540
200
  },
201
  {
202
- "epoch": 2.2506963788300833,
203
- "step": 540,
204
- "total_flos": 6.266922890113843e+17,
205
- "train_loss": 0.04690098939118562,
206
- "train_runtime": 4798.2939,
207
- "train_samples_per_second": 7.18,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  "train_steps_per_second": 0.15
209
  }
210
  ],
@@ -220,7 +241,7 @@
220
  "early_stopping_threshold": 0.01
221
  },
222
  "attributes": {
223
- "early_stopping_patience_counter": 7
224
  }
225
  },
226
  "TrainerControl": {
@@ -234,7 +255,7 @@
234
  "attributes": {}
235
  }
236
  },
237
- "total_flos": 6.266922890113843e+17,
238
  "train_batch_size": 8,
239
  "trial_name": null,
240
  "trial_params": null
 
2
  "best_global_step": 540,
3
  "best_metric": 0.43301182985305786,
4
  "best_model_checkpoint": "Mistral-7B-v0.1/r4/checkpoint-540",
5
+ "epoch": 2.501392757660167,
6
  "eval_steps": 60,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
199
  "step": 540
200
  },
201
  {
202
+ "entropy": 0.4391553503357702,
203
+ "epoch": 2.501392757660167,
204
+ "grad_norm": 0.630813479423523,
205
+ "learning_rate": 8.903840820084096e-06,
206
+ "loss": 0.4249,
207
+ "mean_token_accuracy": 0.8895656656887796,
208
+ "num_tokens": 16158720.0,
209
+ "step": 600
210
+ },
211
+ {
212
+ "epoch": 2.501392757660167,
213
+ "eval_entropy": 0.4278067753880055,
214
+ "eval_loss": 0.4333657920360565,
215
+ "eval_mean_token_accuracy": 0.8881023328799706,
216
+ "eval_num_tokens": 16158720.0,
217
+ "eval_runtime": 1045.4866,
218
+ "eval_samples_per_second": 2.354,
219
+ "eval_steps_per_second": 0.295,
220
+ "step": 600
221
+ },
222
+ {
223
+ "epoch": 2.501392757660167,
224
+ "step": 600,
225
+ "total_flos": 6.896237322593894e+17,
226
+ "train_loss": 0.04248945871988932,
227
+ "train_runtime": 4807.6761,
228
+ "train_samples_per_second": 7.166,
229
  "train_steps_per_second": 0.15
230
  }
231
  ],
 
241
  "early_stopping_threshold": 0.01
242
  },
243
  "attributes": {
244
+ "early_stopping_patience_counter": 9
245
  }
246
  },
247
  "TrainerControl": {
 
255
  "attributes": {}
256
  }
257
  },
258
+ "total_flos": 6.896237322593894e+17,
259
  "train_batch_size": 8,
260
  "trial_name": null,
261
  "trial_params": null