moos124 commited on
Commit
7cf7642
·
verified ·
1 Parent(s): 8669218

Training in progress, step 170, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71055204d112a0352eaeb730e04d7cada71c54e2db4c9895168aaf6bac161f5f
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d74edd1550f3054e6c6139caf7d16e4fba4e83f275ff94e54f95511292c6eef0
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9300105f69645a78e8cafbfae6bdebb729a1caf9e718790492c3f348d7a3a3ad
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80f329ce846ef5208cb4db50950c0159fb4339bbdfa9deca86421fcb3a3e22fe
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdfb0a36845787ad84ee4edb14ff27f7311542eb37712c2ce4e2eb1fe67e1be4
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ed2df097c924d834b0115a6978c7176f079a672434526c0cd830d074b234e6
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:385d9b2ff8eb3417d1dd96e916d06043c00fb3770964c1dfd252f37e6ba8752a
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e24a40a25924ca0c915ae0e90b167f67b5630f335c6534762576ecaa3d8bf48
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.034133333333333335,
6
  "eval_steps": 500,
7
- "global_step": 160,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -178,6 +178,16 @@
178
  "mean_token_accuracy": 0.7412141926586628,
179
  "num_tokens": 751635.0,
180
  "step": 160
 
 
 
 
 
 
 
 
 
 
181
  }
182
  ],
183
  "logging_steps": 10,
@@ -197,7 +207,7 @@
197
  "attributes": {}
198
  }
199
  },
200
- "total_flos": 3594717258968064.0,
201
  "train_batch_size": 4,
202
  "trial_name": null,
203
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.03626666666666667,
6
  "eval_steps": 500,
7
+ "global_step": 170,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
178
  "mean_token_accuracy": 0.7412141926586628,
179
  "num_tokens": 751635.0,
180
  "step": 160
181
+ },
182
+ {
183
+ "entropy": 1.0946420103311538,
184
+ "epoch": 0.03626666666666667,
185
+ "grad_norm": 0.2150825560092926,
186
+ "learning_rate": 5.633333333333334e-05,
187
+ "loss": 1.1224396705627442,
188
+ "mean_token_accuracy": 0.7382922798395157,
189
+ "num_tokens": 794488.0,
190
+ "step": 170
191
  }
192
  ],
193
  "logging_steps": 10,
 
207
  "attributes": {}
208
  }
209
  },
210
+ "total_flos": 3810848905328640.0,
211
  "train_batch_size": 4,
212
  "trial_name": null,
213
  "trial_params": null