fguryel commited on
Commit
fecf95a
·
verified ·
1 Parent(s): 5db0525

Upload folder using huggingface_hub

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:563ad1a53ff9968c23e661fbde0971b00547c40f4d286b6a81f100ad3a953090
3
  size 4991037968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3245d78a13e7ddea92cfe2329a3cd9082ce81c727a2cc2606ce1f900c202e0a6
3
  size 4991037968
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cae00f32d5364a2e287e59819cabad01b83cc639d9fa338d8f4a929c62f6b094
3
  size 1610725592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0c547788cdb8c419c8cb9f5d95e756653d92acc015aa99d43bf59eb40ae716f
3
  size 1610725592
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c768a039ece165c81adc7ffc56067dfc1c0ba40916bd4dd23a66b7db5284ff5
3
  size 13203690391
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60cec7fca8de21c83b496fb8b64d9ea7326c87883c23fc24097cace6df9ef4fe
3
  size 13203690391
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9445552595536daf5bd8731be4eabb308bd26e76a3f4f0c20c4aa55fcf9ea202
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12d8b6c71ec5842a7f720763e6312f0db9384dc999ad47a74f64d26d1c1cb7ce
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5db160d0d863329e76a33fba4aeedbc2d728eee05bd8deed47128275fa20fabd
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec90f171ad210047003950ef1fd5a4adde21307241897cd52f196d25f3f85ab7
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 2500,
3
  "best_metric": 1.2259057760238647,
4
  "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-2500",
5
- "epoch": 12.44361370716511,
6
  "eval_steps": 500,
7
- "global_step": 2500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -398,6 +398,84 @@
398
  "eval_samples_per_second": 11.596,
399
  "eval_steps_per_second": 1.49,
400
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  }
402
  ],
403
  "logging_steps": 50,
@@ -417,7 +495,7 @@
417
  "attributes": {}
418
  }
419
  },
420
- "total_flos": 6.917655295552389e+17,
421
  "train_batch_size": 1,
422
  "trial_name": null,
423
  "trial_params": null
 
2
  "best_global_step": 2500,
3
  "best_metric": 1.2259057760238647,
4
  "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-2500",
5
+ "epoch": 14.93208722741433,
6
  "eval_steps": 500,
7
+ "global_step": 3000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
398
  "eval_samples_per_second": 11.596,
399
  "eval_steps_per_second": 1.49,
400
  "step": 2500
401
+ },
402
+ {
403
+ "epoch": 12.692834890965733,
404
+ "grad_norm": 1.40625,
405
+ "learning_rate": 3.55351105100606e-06,
406
+ "loss": 1.1879,
407
+ "step": 2550
408
+ },
409
+ {
410
+ "epoch": 12.942056074766356,
411
+ "grad_norm": 0.89453125,
412
+ "learning_rate": 3.3471412060573944e-06,
413
+ "loss": 1.215,
414
+ "step": 2600
415
+ },
416
+ {
417
+ "epoch": 13.189408099688473,
418
+ "grad_norm": 1.15625,
419
+ "learning_rate": 3.1438864479742693e-06,
420
+ "loss": 1.2105,
421
+ "step": 2650
422
+ },
423
+ {
424
+ "epoch": 13.438629283489096,
425
+ "grad_norm": 1.2109375,
426
+ "learning_rate": 2.9441298441001165e-06,
427
+ "loss": 1.1956,
428
+ "step": 2700
429
+ },
430
+ {
431
+ "epoch": 13.687850467289719,
432
+ "grad_norm": 1.15625,
433
+ "learning_rate": 2.7482478689258733e-06,
434
+ "loss": 1.2139,
435
+ "step": 2750
436
+ },
437
+ {
438
+ "epoch": 13.937071651090342,
439
+ "grad_norm": 1.640625,
440
+ "learning_rate": 2.556609694561273e-06,
441
+ "loss": 1.1846,
442
+ "step": 2800
443
+ },
444
+ {
445
+ "epoch": 14.184423676012461,
446
+ "grad_norm": 0.98828125,
447
+ "learning_rate": 2.3695764949687234e-06,
448
+ "loss": 1.1978,
449
+ "step": 2850
450
+ },
451
+ {
452
+ "epoch": 14.433644859813084,
453
+ "grad_norm": 1.2890625,
454
+ "learning_rate": 2.1875007652709768e-06,
455
+ "loss": 1.199,
456
+ "step": 2900
457
+ },
458
+ {
459
+ "epoch": 14.682866043613707,
460
+ "grad_norm": 0.9140625,
461
+ "learning_rate": 2.0107256574155564e-06,
462
+ "loss": 1.2097,
463
+ "step": 2950
464
+ },
465
+ {
466
+ "epoch": 14.93208722741433,
467
+ "grad_norm": 1.0625,
468
+ "learning_rate": 1.8395843334479125e-06,
469
+ "loss": 1.2051,
470
+ "step": 3000
471
+ },
472
+ {
473
+ "epoch": 14.93208722741433,
474
+ "eval_loss": 1.2260087728500366,
475
+ "eval_runtime": 15.4261,
476
+ "eval_samples_per_second": 11.604,
477
+ "eval_steps_per_second": 1.491,
478
+ "step": 3000
479
  }
480
  ],
481
  "logging_steps": 50,
 
495
  "attributes": {}
496
  }
497
  },
498
+ "total_flos": 8.301047807591055e+17,
499
  "train_batch_size": 1,
500
  "trial_name": null,
501
  "trial_params": null