moos124 commited on
Commit
ff87f3a
·
verified ·
1 Parent(s): 9a8046c

Training in progress, step 130, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:770a3ac44a65aeab36defa9a73c90b3349f1f0ce5face580eed165902b649903
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbfebfb020a6922ac7c187ec43e6f2446a7f9d245a376eb38de42ed46b1590e1
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7ef4b3eae44ab844f50deb0c675fb8968aec3602ceed5a0f3f20aef8c00b79a
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b904d73ce7aa570ef7776576e060395a7cc4149d14b35769204c250bd553f164
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cb0752c5e5cc21f8ee10d67074562d11bb46466fdbff03f1ff2c1b5680879cb
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09728db8e28af893fcd6887250b0e05bc3ac00085eefbd783ad496394a300ca7
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d02b805ae3a79cb34b0073f6931b6897f9b2e3b2ee710e22dd1d03d4cd7017b
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0d97838d356de43e090e5798f1ba163245c9f1fea822a25f9209f3bf9aa6e5c
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0256,
6
  "eval_steps": 500,
7
- "global_step": 120,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -138,6 +138,16 @@
138
  "mean_token_accuracy": 0.727140337228775,
139
  "num_tokens": 555373.0,
140
  "step": 120
 
 
 
 
 
 
 
 
 
 
141
  }
142
  ],
143
  "logging_steps": 10,
@@ -157,7 +167,7 @@
157
  "attributes": {}
158
  }
159
  },
160
- "total_flos": 2673316770336768.0,
161
  "train_batch_size": 4,
162
  "trial_name": null,
163
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.027733333333333332,
6
  "eval_steps": 500,
7
+ "global_step": 130,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
138
  "mean_token_accuracy": 0.727140337228775,
139
  "num_tokens": 555373.0,
140
  "step": 120
141
+ },
142
+ {
143
+ "entropy": 1.3233605474233627,
144
+ "epoch": 0.027733333333333332,
145
+ "grad_norm": 0.4590514004230499,
146
+ "learning_rate": 4.3e-05,
147
+ "loss": 1.3480740547180177,
148
+ "mean_token_accuracy": 0.7144808873534203,
149
+ "num_tokens": 599533.0,
150
+ "step": 130
151
  }
152
  ],
153
  "logging_steps": 10,
 
167
  "attributes": {}
168
  }
169
  },
170
+ "total_flos": 2892863914924032.0,
171
  "train_batch_size": 4,
172
  "trial_name": null,
173
  "trial_params": null