amos1088 commited on
Commit
3a20545
·
verified ·
1 Parent(s): 27430f6

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:368e648350fa88e97ed24bf06f585bb7f7097580946a4a9480bef2318de437fd
3
  size 18257035
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fbf29aa08f41e0f0601f4765837411287c24654fa0450a669320cc88dc0d507
3
  size 18257035
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f92bb13c8c261c5bbacd52e4713611a2458ef3c2d47986ab438b3233a082b5f
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f39368c97fa07f86c85b709084202463eb1ea663fda6349cc12642c32aea74f1
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4393a84a3109995aa1202073b039b12062e3189ed89aa0b94ef0510ba843009
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2b5ed8d4c0db2e24674d7f125356981e2c73273d96a8f3eabaf284b99f24856
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a68b30ede80d2355469409876134f9fb0aa83dcc5c47c7d77b992bd6fb65d11f
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.020057665789143787,
6
  "eval_steps": 500,
7
- "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -98,6 +98,96 @@
98
  "mean_token_accuracy": 0.0,
99
  "num_tokens": 371196.0,
100
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  }
102
  ],
103
  "logging_steps": 10,
@@ -117,7 +207,7 @@
117
  "attributes": {}
118
  }
119
  },
120
- "total_flos": 8310689070243840.0,
121
  "train_batch_size": 1,
122
  "trial_name": null,
123
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.040115331578287575,
6
  "eval_steps": 500,
7
+ "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
98
  "mean_token_accuracy": 0.0,
99
  "num_tokens": 371196.0,
100
  "step": 100
101
+ },
102
+ {
103
+ "epoch": 0.022063432368058168,
104
+ "grad_norm": 0.0,
105
+ "learning_rate": 3.643048128342246e-06,
106
+ "loss": 0.0,
107
+ "mean_token_accuracy": 0.0,
108
+ "num_tokens": 408430.0,
109
+ "step": 110
110
+ },
111
+ {
112
+ "epoch": 0.024069198946972545,
113
+ "grad_norm": 0.0,
114
+ "learning_rate": 3.9772727272727275e-06,
115
+ "loss": 0.0,
116
+ "mean_token_accuracy": 0.0,
117
+ "num_tokens": 445095.0,
118
+ "step": 120
119
+ },
120
+ {
121
+ "epoch": 0.026074965525886926,
122
+ "grad_norm": 0.0,
123
+ "learning_rate": 4.311497326203208e-06,
124
+ "loss": 0.0,
125
+ "mean_token_accuracy": 0.0,
126
+ "num_tokens": 483443.0,
127
+ "step": 130
128
+ },
129
+ {
130
+ "epoch": 0.028080732104801304,
131
+ "grad_norm": 0.0,
132
+ "learning_rate": 4.64572192513369e-06,
133
+ "loss": 0.0,
134
+ "mean_token_accuracy": 0.0,
135
+ "num_tokens": 519939.0,
136
+ "step": 140
137
+ },
138
+ {
139
+ "epoch": 0.03008649868371568,
140
+ "grad_norm": 0.0,
141
+ "learning_rate": 4.979946524064171e-06,
142
+ "loss": 0.0,
143
+ "mean_token_accuracy": 0.0,
144
+ "num_tokens": 557291.0,
145
+ "step": 150
146
+ },
147
+ {
148
+ "epoch": 0.03209226526263006,
149
+ "grad_norm": 0.0,
150
+ "learning_rate": 5.314171122994652e-06,
151
+ "loss": 0.0,
152
+ "mean_token_accuracy": 0.0,
153
+ "num_tokens": 594842.0,
154
+ "step": 160
155
+ },
156
+ {
157
+ "epoch": 0.03409803184154444,
158
+ "grad_norm": 0.0,
159
+ "learning_rate": 5.648395721925134e-06,
160
+ "loss": 0.0,
161
+ "mean_token_accuracy": 0.0,
162
+ "num_tokens": 630588.0,
163
+ "step": 170
164
+ },
165
+ {
166
+ "epoch": 0.03610379842045882,
167
+ "grad_norm": 0.0,
168
+ "learning_rate": 5.982620320855615e-06,
169
+ "loss": 0.0,
170
+ "mean_token_accuracy": 0.0,
171
+ "num_tokens": 666902.0,
172
+ "step": 180
173
+ },
174
+ {
175
+ "epoch": 0.0381095649993732,
176
+ "grad_norm": 0.0,
177
+ "learning_rate": 6.316844919786097e-06,
178
+ "loss": 0.0,
179
+ "mean_token_accuracy": 0.0,
180
+ "num_tokens": 704018.0,
181
+ "step": 190
182
+ },
183
+ {
184
+ "epoch": 0.040115331578287575,
185
+ "grad_norm": 0.0,
186
+ "learning_rate": 6.651069518716578e-06,
187
+ "loss": 0.0,
188
+ "mean_token_accuracy": 0.0,
189
+ "num_tokens": 741862.0,
190
+ "step": 200
191
  }
192
  ],
193
  "logging_steps": 10,
 
207
  "attributes": {}
208
  }
209
  },
210
+ "total_flos": 1.660951199643648e+16,
211
  "train_batch_size": 1,
212
  "trial_name": null,
213
  "trial_params": null