fguryel commited on
Commit
d90d90c
·
verified ·
1 Parent(s): 7ba2b3f

Upload folder using huggingface_hub

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12b20d2726037c98f2ce4ae7731f490bd4c05574f16b52fedf610ca2f299fbf8
3
  size 4991037968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a0fb435857da39fb3ac4a6250cba4532cdfeca14f1ceb94ac4c0859ff87c986
3
  size 4991037968
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:338de36ed0bd740de5ef2e61b137c64e2bd91321bb50de1a705521395bcfd53b
3
  size 1610725592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a3ebf4fcc9f98c653aafde81046f3489c2c8df911dcf954681a07bda8f5ad06
3
  size 1610725592
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2bac92613552e39b1793e6ae133fd0d27096b237be00533069cc8a7dbcb0bb3
3
  size 13203690391
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ab574ed84635fe62ae45a58e5af8787fc4f025fd04fcdc4fcf32df00d050037
3
  size 13203690391
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f196323d7423b60f8e4ceb7dbf8715ee326c0d068e5ff164f13c63b279b9f1a0
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea11996454b5587fcf33ae0ab5cf14b2031bf5f53f8c2ed5a48e87de31e29c84
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e92a7052d2a7c01a1b09ea2ba38f44479d548f97aef62e831f459b08d633005
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c085c9e30c4332cb31b5d70b86d33eec0e6be6c0ce92c99e20a2795c064a4205
3
  size 1465
trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 1000,
3
- "best_metric": 1.2828963994979858,
4
- "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-1000",
5
- "epoch": 4.976947040498443,
6
  "eval_steps": 500,
7
- "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -164,6 +164,84 @@
164
  "eval_samples_per_second": 11.524,
165
  "eval_steps_per_second": 1.481,
166
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  }
168
  ],
169
  "logging_steps": 50,
@@ -183,7 +261,7 @@
183
  "attributes": {}
184
  }
185
  },
186
- "total_flos": 2.7667850240773325e+17,
187
  "train_batch_size": 1,
188
  "trial_name": null,
189
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1500,
3
+ "best_metric": 1.2759937047958374,
4
+ "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-1500",
5
+ "epoch": 7.463551401869159,
6
  "eval_steps": 500,
7
+ "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
164
  "eval_samples_per_second": 11.524,
165
  "eval_steps_per_second": 1.481,
166
  "step": 1000
167
+ },
168
+ {
169
+ "epoch": 5.224299065420561,
170
+ "grad_norm": 0.92578125,
171
+ "learning_rate": 9.231474733040436e-06,
172
+ "loss": 1.2206,
173
+ "step": 1050
174
+ },
175
+ {
176
+ "epoch": 5.473520249221184,
177
+ "grad_norm": 0.9609375,
178
+ "learning_rate": 9.111881511288579e-06,
179
+ "loss": 1.2081,
180
+ "step": 1100
181
+ },
182
+ {
183
+ "epoch": 5.722741433021807,
184
+ "grad_norm": 1.0703125,
185
+ "learning_rate": 8.984538766023024e-06,
186
+ "loss": 1.2224,
187
+ "step": 1150
188
+ },
189
+ {
190
+ "epoch": 5.97196261682243,
191
+ "grad_norm": 1.0625,
192
+ "learning_rate": 8.849686495793349e-06,
193
+ "loss": 1.2105,
194
+ "step": 1200
195
+ },
196
+ {
197
+ "epoch": 6.219314641744548,
198
+ "grad_norm": 1.1015625,
199
+ "learning_rate": 8.707578852095928e-06,
200
+ "loss": 1.2096,
201
+ "step": 1250
202
+ },
203
+ {
204
+ "epoch": 6.468535825545171,
205
+ "grad_norm": 1.3203125,
206
+ "learning_rate": 8.558483660383245e-06,
207
+ "loss": 1.2097,
208
+ "step": 1300
209
+ },
210
+ {
211
+ "epoch": 6.717757009345794,
212
+ "grad_norm": 1.1875,
213
+ "learning_rate": 8.402681915302344e-06,
214
+ "loss": 1.2227,
215
+ "step": 1350
216
+ },
217
+ {
218
+ "epoch": 6.966978193146417,
219
+ "grad_norm": 1.2265625,
220
+ "learning_rate": 8.240467251113762e-06,
221
+ "loss": 1.2102,
222
+ "step": 1400
223
+ },
224
+ {
225
+ "epoch": 7.214330218068536,
226
+ "grad_norm": 1.1015625,
227
+ "learning_rate": 8.072145388289002e-06,
228
+ "loss": 1.185,
229
+ "step": 1450
230
+ },
231
+ {
232
+ "epoch": 7.463551401869159,
233
+ "grad_norm": 1.1953125,
234
+ "learning_rate": 7.898033557329536e-06,
235
+ "loss": 1.2051,
236
+ "step": 1500
237
+ },
238
+ {
239
+ "epoch": 7.463551401869159,
240
+ "eval_loss": 1.2759937047958374,
241
+ "eval_runtime": 15.5082,
242
+ "eval_samples_per_second": 11.542,
243
+ "eval_steps_per_second": 1.483,
244
+ "step": 1500
245
  }
246
  ],
247
  "logging_steps": 50,
 
261
  "attributes": {}
262
  }
263
  },
264
+ "total_flos": 4.149138433077412e+17,
265
  "train_batch_size": 1,
266
  "trial_name": null,
267
  "trial_params": null