RantiRepo commited on
Commit
567e03a
·
verified ·
1 Parent(s): f12bf2e

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +45 -3
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.903954802259887,
6
  "eval_steps": 10,
7
- "global_step": 80,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -176,6 +176,48 @@
176
  "eval_samples_per_second": 4.363,
177
  "eval_steps_per_second": 1.091,
178
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  }
180
  ],
181
  "logging_steps": 10,
@@ -195,7 +237,7 @@
195
  "attributes": {}
196
  }
197
  },
198
- "total_flos": 9.734138912925696e+16,
199
  "train_batch_size": 4,
200
  "trial_name": null,
201
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.1242937853107344,
6
  "eval_steps": 10,
7
+ "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
176
  "eval_samples_per_second": 4.363,
177
  "eval_steps_per_second": 1.091,
178
  "step": 80
179
+ },
180
+ {
181
+ "entropy": 6.3696688413619995,
182
+ "epoch": 1.0112994350282485,
183
+ "grad_norm": 0.64453125,
184
+ "learning_rate": 0.00010348837209302327,
185
+ "loss": 6.255178451538086,
186
+ "mean_token_accuracy": 0.24344109077202647,
187
+ "num_tokens": 1511764.0,
188
+ "step": 90
189
+ },
190
+ {
191
+ "epoch": 1.0112994350282485,
192
+ "eval_entropy": 6.723124821980794,
193
+ "eval_loss": 6.457315921783447,
194
+ "eval_mean_token_accuracy": 0.20087979889164367,
195
+ "eval_num_tokens": 1511764.0,
196
+ "eval_runtime": 43.8306,
197
+ "eval_samples_per_second": 4.381,
198
+ "eval_steps_per_second": 1.095,
199
+ "step": 90
200
+ },
201
+ {
202
+ "entropy": 6.43809232711792,
203
+ "epoch": 1.1242937853107344,
204
+ "grad_norm": 0.55859375,
205
+ "learning_rate": 9.186046511627907e-05,
206
+ "loss": 6.314236068725586,
207
+ "mean_token_accuracy": 0.23132331417873503,
208
+ "num_tokens": 1682798.0,
209
+ "step": 100
210
+ },
211
+ {
212
+ "epoch": 1.1242937853107344,
213
+ "eval_entropy": 6.687405467033386,
214
+ "eval_loss": 6.43255090713501,
215
+ "eval_mean_token_accuracy": 0.20094856123129526,
216
+ "eval_num_tokens": 1682798.0,
217
+ "eval_runtime": 44.0723,
218
+ "eval_samples_per_second": 4.356,
219
+ "eval_steps_per_second": 1.089,
220
+ "step": 100
221
  }
222
  ],
223
  "logging_steps": 10,
 
237
  "attributes": {}
238
  }
239
  },
240
+ "total_flos": 1.2023018700050227e+17,
241
  "train_batch_size": 4,
242
  "trial_name": null,
243
  "trial_params": null