moos124 commited on
Commit
cefab9a
·
verified ·
1 Parent(s): 373b5ed

Training in progress, step 120, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:624f7058064d76847152a7aff03ccc76f9687853b2fe9892d721cbe8fffcd2c4
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:770a3ac44a65aeab36defa9a73c90b3349f1f0ce5face580eed165902b649903
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:065f308e48674fbaead7fbe7336d030bb7aff58e7b4632d7a51e4ace7e66a063
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7ef4b3eae44ab844f50deb0c675fb8968aec3602ceed5a0f3f20aef8c00b79a
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:503c8a7590e7af91329256ad3aad149e4b5fc6f9829d279e0510c95425b510ab
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cb0752c5e5cc21f8ee10d67074562d11bb46466fdbff03f1ff2c1b5680879cb
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:255c2565ea9ef2b84420f28b6a4b847239969551451003011398fa9bc80fd20c
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d02b805ae3a79cb34b0073f6931b6897f9b2e3b2ee710e22dd1d03d4cd7017b
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.023466666666666667,
6
  "eval_steps": 500,
7
- "global_step": 110,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -128,6 +128,16 @@
128
  "mean_token_accuracy": 0.7173117578029633,
129
  "num_tokens": 508883.0,
130
  "step": 110
 
 
 
 
 
 
 
 
 
 
131
  }
132
  ],
133
  "logging_steps": 10,
@@ -147,7 +157,7 @@
147
  "attributes": {}
148
  }
149
  },
150
- "total_flos": 2463835883056128.0,
151
  "train_batch_size": 4,
152
  "trial_name": null,
153
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0256,
6
  "eval_steps": 500,
7
+ "global_step": 120,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
128
  "mean_token_accuracy": 0.7173117578029633,
129
  "num_tokens": 508883.0,
130
  "step": 110
131
+ },
132
+ {
133
+ "entropy": 1.2709181517362595,
134
+ "epoch": 0.0256,
135
+ "grad_norm": 0.5542399883270264,
136
+ "learning_rate": 3.966666666666667e-05,
137
+ "loss": 1.249708652496338,
138
+ "mean_token_accuracy": 0.727140337228775,
139
+ "num_tokens": 555373.0,
140
+ "step": 120
141
  }
142
  ],
143
  "logging_steps": 10,
 
157
  "attributes": {}
158
  }
159
  },
160
+ "total_flos": 2673316770336768.0,
161
  "train_batch_size": 4,
162
  "trial_name": null,
163
  "trial_params": null