fguryel commited on
Commit
7ba2b3f
·
verified ·
1 Parent(s): 931eaf5

Upload folder using huggingface_hub

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33a77d12adc3c90a6d03d30b2ebb684027e4f21c9ca19ce87864192c1cbdd7b8
3
  size 4991037968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12b20d2726037c98f2ce4ae7731f490bd4c05574f16b52fedf610ca2f299fbf8
3
  size 4991037968
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16b197f53c7eb5f953acb0917626be62d881b989a1678589016fddbdd2e620b8
3
  size 1610725592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:338de36ed0bd740de5ef2e61b137c64e2bd91321bb50de1a705521395bcfd53b
3
  size 1610725592
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:709540f11f094ab7fcb18f525b097ef780a66646213b0f225b0cec2172f4c781
3
  size 13203690391
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2bac92613552e39b1793e6ae133fd0d27096b237be00533069cc8a7dbcb0bb3
3
  size 13203690391
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8e2011629d8bed3ef560fa11175cac55684c4e12a72634bb24abf767b6c7399
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f196323d7423b60f8e4ceb7dbf8715ee326c0d068e5ff164f13c63b279b9f1a0
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3159b8255e3ba63ecfbf9ad9882d37c8b55d7643e07b70fee54fef23e5ee0ce
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e92a7052d2a7c01a1b09ea2ba38f44479d548f97aef62e831f459b08d633005
3
  size 1465
trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 500,
3
- "best_metric": 1.3274219036102295,
4
- "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-500",
5
- "epoch": 2.4884735202492214,
6
  "eval_steps": 500,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -86,6 +86,84 @@
86
  "eval_samples_per_second": 11.532,
87
  "eval_steps_per_second": 1.482,
88
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  }
90
  ],
91
  "logging_steps": 50,
@@ -105,7 +183,7 @@
105
  "attributes": {}
106
  }
107
  },
108
- "total_flos": 1.3833925120386662e+17,
109
  "train_batch_size": 1,
110
  "trial_name": null,
111
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1000,
3
+ "best_metric": 1.2828963994979858,
4
+ "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-1000",
5
+ "epoch": 4.976947040498443,
6
  "eval_steps": 500,
7
+ "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
86
  "eval_samples_per_second": 11.532,
87
  "eval_steps_per_second": 1.482,
88
  "step": 500
89
+ },
90
+ {
91
+ "epoch": 2.7376947040498445,
92
+ "grad_norm": 0.87109375,
93
+ "learning_rate": 9.95932312693483e-06,
94
+ "loss": 1.2695,
95
+ "step": 550
96
+ },
97
+ {
98
+ "epoch": 2.986915887850467,
99
+ "grad_norm": 1.3984375,
100
+ "learning_rate": 9.927024711991988e-06,
101
+ "loss": 1.2801,
102
+ "step": 600
103
+ },
104
+ {
105
+ "epoch": 3.2342679127725855,
106
+ "grad_norm": 0.91015625,
107
+ "learning_rate": 9.885440500813695e-06,
108
+ "loss": 1.2495,
109
+ "step": 650
110
+ },
111
+ {
112
+ "epoch": 3.4834890965732086,
113
+ "grad_norm": 0.9375,
114
+ "learning_rate": 9.834648865751254e-06,
115
+ "loss": 1.2456,
116
+ "step": 700
117
+ },
118
+ {
119
+ "epoch": 3.7327102803738317,
120
+ "grad_norm": 1.078125,
121
+ "learning_rate": 9.774745532075235e-06,
122
+ "loss": 1.2472,
123
+ "step": 750
124
+ },
125
+ {
126
+ "epoch": 3.9819314641744548,
127
+ "grad_norm": 0.89453125,
128
+ "learning_rate": 9.705843397565304e-06,
129
+ "loss": 1.2243,
130
+ "step": 800
131
+ },
132
+ {
133
+ "epoch": 4.229283489096574,
134
+ "grad_norm": 1.1484375,
135
+ "learning_rate": 9.628072319735607e-06,
136
+ "loss": 1.2273,
137
+ "step": 850
138
+ },
139
+ {
140
+ "epoch": 4.478504672897197,
141
+ "grad_norm": 1.0,
142
+ "learning_rate": 9.541578871096728e-06,
143
+ "loss": 1.2192,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 4.72772585669782,
148
+ "grad_norm": 0.85546875,
149
+ "learning_rate": 9.446526062915449e-06,
150
+ "loss": 1.2295,
151
+ "step": 950
152
+ },
153
+ {
154
+ "epoch": 4.976947040498443,
155
+ "grad_norm": 1.2265625,
156
+ "learning_rate": 9.343093037992946e-06,
157
+ "loss": 1.2226,
158
+ "step": 1000
159
+ },
160
+ {
161
+ "epoch": 4.976947040498443,
162
+ "eval_loss": 1.2828963994979858,
163
+ "eval_runtime": 15.5322,
164
+ "eval_samples_per_second": 11.524,
165
+ "eval_steps_per_second": 1.481,
166
+ "step": 1000
167
  }
168
  ],
169
  "logging_steps": 50,
 
183
  "attributes": {}
184
  }
185
  },
186
+ "total_flos": 2.7667850240773325e+17,
187
  "train_batch_size": 1,
188
  "trial_name": null,
189
  "trial_params": null