tokhey commited on
Commit
cbebcc9
·
verified ·
1 Parent(s): 3a31eca

Training in progress, step 120, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4404b24076cc63d331e67d7ba3a7fe9520f7a0e8f6d4b52270bfbfafc9664489
3
  size 140815952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2832e178fbfc4fe2fb481e79a8d919fe407032c53cea12f332a1381234b6f1c4
3
  size 140815952
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d619ba6794210f6129f2e991bb4cca6fb744cc687fb23c48cd2119fe42dacaae
3
  size 281829907
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbcd49ec20bf55444feca0578bd6f0012b99c1072af990df00376b5d58929f87
3
  size 281829907
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:417227e46f0a581722968d781d8ea2ba8688c7892e0d69749745f185230daf6a
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:994855e9f3bb291ce54dbed25621bf0ab9b11111a5f95765ed4ae5933e96cbd7
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2f9c1a66b0f67830f363d7aa55a784bfca1310ea07876b83a0f2f54a5722f96
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cea2e56a8bde12e649a1d7cae49ed9d38ef81a2916cebf53379e47210c432c6
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.0666666666666667,
6
  "eval_steps": 100,
7
- "global_step": 80,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -120,6 +120,70 @@
120
  "learning_rate": 0.0004110276028625994,
121
  "loss": 0.4856,
122
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  }
124
  ],
125
  "logging_steps": 5,
@@ -139,7 +203,7 @@
139
  "attributes": {}
140
  }
141
  },
142
- "total_flos": 2644590082129920.0,
143
  "train_batch_size": 1,
144
  "trial_name": null,
145
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.6,
6
  "eval_steps": 100,
7
+ "global_step": 120,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
120
  "learning_rate": 0.0004110276028625994,
121
  "loss": 0.4856,
122
  "step": 80
123
+ },
124
+ {
125
+ "epoch": 1.1333333333333333,
126
+ "grad_norm": 0.1001599058508873,
127
+ "learning_rate": 0.00039568525901157527,
128
+ "loss": 0.4632,
129
+ "step": 85
130
+ },
131
+ {
132
+ "epoch": 1.2,
133
+ "grad_norm": 0.09839078038930893,
134
+ "learning_rate": 0.0003794624062245813,
135
+ "loss": 0.5097,
136
+ "step": 90
137
+ },
138
+ {
139
+ "epoch": 1.2666666666666666,
140
+ "grad_norm": 0.102094367146492,
141
+ "learning_rate": 0.000362457094001909,
142
+ "loss": 0.4476,
143
+ "step": 95
144
+ },
145
+ {
146
+ "epoch": 1.3333333333333333,
147
+ "grad_norm": 0.11247370392084122,
148
+ "learning_rate": 0.00034477210096009486,
149
+ "loss": 0.4919,
150
+ "step": 100
151
+ },
152
+ {
153
+ "epoch": 1.3333333333333333,
154
+ "eval_loss": 0.506523609161377,
155
+ "eval_runtime": 45.0985,
156
+ "eval_samples_per_second": 0.377,
157
+ "eval_steps_per_second": 0.377,
158
+ "step": 100
159
+ },
160
+ {
161
+ "epoch": 1.4,
162
+ "grad_norm": 0.10984684526920319,
163
+ "learning_rate": 0.00032651431364697186,
164
+ "loss": 0.3816,
165
+ "step": 105
166
+ },
167
+ {
168
+ "epoch": 1.4666666666666668,
169
+ "grad_norm": 0.10393229871988297,
170
+ "learning_rate": 0.0003077940805287425,
171
+ "loss": 0.4784,
172
+ "step": 110
173
+ },
174
+ {
175
+ "epoch": 1.5333333333333332,
176
+ "grad_norm": 0.11296918988227844,
177
+ "learning_rate": 0.00028872454505352117,
178
+ "loss": 0.4422,
179
+ "step": 115
180
+ },
181
+ {
182
+ "epoch": 1.6,
183
+ "grad_norm": 0.10333701968193054,
184
+ "learning_rate": 0.0002694209618222516,
185
+ "loss": 0.4828,
186
+ "step": 120
187
  }
188
  ],
189
  "logging_steps": 5,
 
203
  "attributes": {}
204
  }
205
  },
206
+ "total_flos": 3966885123194880.0,
207
  "train_batch_size": 1,
208
  "trial_name": null,
209
  "trial_params": null