RantiRepo commited on
Commit
f77e207
·
verified ·
1 Parent(s): ace217a

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +45 -3
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.6779661016949152,
6
  "eval_steps": 10,
7
- "global_step": 60,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -134,6 +134,48 @@
134
  "eval_samples_per_second": 4.392,
135
  "eval_steps_per_second": 1.098,
136
  "step": 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  }
138
  ],
139
  "logging_steps": 10,
@@ -153,7 +195,7 @@
153
  "attributes": {}
154
  }
155
  },
156
- "total_flos": 7.239544391631667e+16,
157
  "train_batch_size": 4,
158
  "trial_name": null,
159
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.903954802259887,
6
  "eval_steps": 10,
7
+ "global_step": 80,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
134
  "eval_samples_per_second": 4.392,
135
  "eval_steps_per_second": 1.098,
136
  "step": 60
137
+ },
138
+ {
139
+ "entropy": 6.680863696336746,
140
+ "epoch": 0.7909604519774012,
141
+ "grad_norm": 1.34375,
142
+ "learning_rate": 0.00012674418604651164,
143
+ "loss": 6.45898666381836,
144
+ "mean_token_accuracy": 0.21339080817997455,
145
+ "num_tokens": 1181787.0,
146
+ "step": 70
147
+ },
148
+ {
149
+ "epoch": 0.7909604519774012,
150
+ "eval_entropy": 6.870167553424835,
151
+ "eval_loss": 6.5776286125183105,
152
+ "eval_mean_token_accuracy": 0.19155203737318516,
153
+ "eval_num_tokens": 1181787.0,
154
+ "eval_runtime": 43.9254,
155
+ "eval_samples_per_second": 4.371,
156
+ "eval_steps_per_second": 1.093,
157
+ "step": 70
158
+ },
159
+ {
160
+ "entropy": 6.659492689371109,
161
+ "epoch": 0.903954802259887,
162
+ "grad_norm": 0.82421875,
163
+ "learning_rate": 0.00011511627906976746,
164
+ "loss": 6.499990081787109,
165
+ "mean_token_accuracy": 0.21302505303174257,
166
+ "num_tokens": 1360714.0,
167
+ "step": 80
168
+ },
169
+ {
170
+ "epoch": 0.903954802259887,
171
+ "eval_entropy": 6.760685175657272,
172
+ "eval_loss": 6.4918060302734375,
173
+ "eval_mean_token_accuracy": 0.20070527338733277,
174
+ "eval_num_tokens": 1360714.0,
175
+ "eval_runtime": 44.0105,
176
+ "eval_samples_per_second": 4.363,
177
+ "eval_steps_per_second": 1.091,
178
+ "step": 80
179
  }
180
  ],
181
  "logging_steps": 10,
 
195
  "attributes": {}
196
  }
197
  },
198
+ "total_flos": 9.734138912925696e+16,
199
  "train_batch_size": 4,
200
  "trial_name": null,
201
  "trial_params": null