fguryel commited on
Commit
b332165
·
verified ·
1 Parent(s): d90d90c

Upload folder using huggingface_hub

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a0fb435857da39fb3ac4a6250cba4532cdfeca14f1ceb94ac4c0859ff87c986
3
  size 4991037968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:532b785101e9a0144716c91a267717e1bf9c98f2026889ca401be8536ab800af
3
  size 4991037968
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a3ebf4fcc9f98c653aafde81046f3489c2c8df911dcf954681a07bda8f5ad06
3
  size 1610725592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e9f0e287277c4c9b13e7806196ccadd02de3ceda54b649e85019dabf8193091
3
  size 1610725592
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ab574ed84635fe62ae45a58e5af8787fc4f025fd04fcdc4fcf32df00d050037
3
  size 13203690391
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:076eba10f086da4576d3906878882df2c7186b2daa7ce0f6a7f0437b019a6acc
3
  size 13203690391
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea11996454b5587fcf33ae0ab5cf14b2031bf5f53f8c2ed5a48e87de31e29c84
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20ea3a198ff666cb4ace1c684b598fe43fc7c3c276b83efc553a1b787e12a304
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c085c9e30c4332cb31b5d70b86d33eec0e6be6c0ce92c99e20a2795c064a4205
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:355a4437a5f06b625275e18af63d950b8d65ee72e9119d148de55df74a553f3b
3
  size 1465
trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 1500,
3
- "best_metric": 1.2759937047958374,
4
- "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-1500",
5
- "epoch": 7.463551401869159,
6
  "eval_steps": 500,
7
- "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -242,6 +242,84 @@
242
  "eval_samples_per_second": 11.542,
243
  "eval_steps_per_second": 1.483,
244
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  }
246
  ],
247
  "logging_steps": 50,
@@ -261,7 +339,7 @@
261
  "attributes": {}
262
  }
263
  },
264
- "total_flos": 4.149138433077412e+17,
265
  "train_batch_size": 1,
266
  "trial_name": null,
267
  "trial_params": null
 
1
  {
2
+ "best_global_step": 2000,
3
+ "best_metric": 1.2739386558532715,
4
+ "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-2000",
5
+ "epoch": 9.95202492211838,
6
  "eval_steps": 500,
7
+ "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
242
  "eval_samples_per_second": 11.542,
243
  "eval_steps_per_second": 1.483,
244
  "step": 1500
245
+ },
246
+ {
247
+ "epoch": 7.712772585669782,
248
+ "grad_norm": 0.9921875,
249
+ "learning_rate": 7.718459900893254e-06,
250
+ "loss": 1.2148,
251
+ "step": 1550
252
+ },
253
+ {
254
+ "epoch": 7.961993769470405,
255
+ "grad_norm": 1.46875,
256
+ "learning_rate": 7.533762855355126e-06,
257
+ "loss": 1.2196,
258
+ "step": 1600
259
+ },
260
+ {
261
+ "epoch": 8.209345794392524,
262
+ "grad_norm": 1.6328125,
263
+ "learning_rate": 7.344290512967664e-06,
264
+ "loss": 1.1935,
265
+ "step": 1650
266
+ },
267
+ {
268
+ "epoch": 8.458566978193147,
269
+ "grad_norm": 1.3203125,
270
+ "learning_rate": 7.150399965823252e-06,
271
+ "loss": 1.208,
272
+ "step": 1700
273
+ },
274
+ {
275
+ "epoch": 8.70778816199377,
276
+ "grad_norm": 1.484375,
277
+ "learning_rate": 6.952456632854821e-06,
278
+ "loss": 1.1997,
279
+ "step": 1750
280
+ },
281
+ {
282
+ "epoch": 8.957009345794393,
283
+ "grad_norm": 1.171875,
284
+ "learning_rate": 6.750833571143174e-06,
285
+ "loss": 1.1962,
286
+ "step": 1800
287
+ },
288
+ {
289
+ "epoch": 9.20436137071651,
290
+ "grad_norm": 1.015625,
291
+ "learning_rate": 6.5459107728289784e-06,
292
+ "loss": 1.216,
293
+ "step": 1850
294
+ },
295
+ {
296
+ "epoch": 9.453582554517133,
297
+ "grad_norm": 1.2109375,
298
+ "learning_rate": 6.338074448954472e-06,
299
+ "loss": 1.1888,
300
+ "step": 1900
301
+ },
302
+ {
303
+ "epoch": 9.702803738317757,
304
+ "grad_norm": 1.140625,
305
+ "learning_rate": 6.127716301584618e-06,
306
+ "loss": 1.1998,
307
+ "step": 1950
308
+ },
309
+ {
310
+ "epoch": 9.95202492211838,
311
+ "grad_norm": 1.25,
312
+ "learning_rate": 5.915232785579527e-06,
313
+ "loss": 1.2089,
314
+ "step": 2000
315
+ },
316
+ {
317
+ "epoch": 9.95202492211838,
318
+ "eval_loss": 1.2739386558532715,
319
+ "eval_runtime": 15.5272,
320
+ "eval_samples_per_second": 11.528,
321
+ "eval_steps_per_second": 1.481,
322
+ "step": 2000
323
  }
324
  ],
325
  "logging_steps": 50,
 
339
  "attributes": {}
340
  }
341
  },
342
+ "total_flos": 5.532530945116078e+17,
343
  "train_batch_size": 1,
344
  "trial_name": null,
345
  "trial_params": null