训练结束,上传最终模型
Browse files- README.md +23 -23
- all_results.json +5 -5
- config.json +1 -1
- generation_config.json +10 -18
- model.safetensors +2 -2
- train_results.json +5 -5
- trainer_state.json +0 -0
README.md
CHANGED
|
@@ -16,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 16 |
|
| 17 |
This model was trained from scratch on an unknown dataset.
|
| 18 |
It achieves the following results on the evaluation set:
|
| 19 |
-
- Loss:
|
| 20 |
-
- Wer:
|
| 21 |
-
- Cer:
|
| 22 |
|
| 23 |
## Model description
|
| 24 |
|
|
@@ -53,26 +53,26 @@ The following hyperparameters were used during training:
|
|
| 53 |
|
| 54 |
| Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
|
| 55 |
|:-------------:|:-------:|:-----:|:---------------:|:-------:|:-------:|
|
| 56 |
-
|
|
| 57 |
-
| 0.
|
| 58 |
-
| 0.
|
| 59 |
-
| 0.
|
| 60 |
-
| 0.
|
| 61 |
-
| 0.
|
| 62 |
-
| 0.
|
| 63 |
-
| 0.
|
| 64 |
-
| 0.
|
| 65 |
-
| 0.
|
| 66 |
-
| 0.
|
| 67 |
-
| 0.
|
| 68 |
-
| 0.
|
| 69 |
-
| 0.
|
| 70 |
-
| 0.
|
| 71 |
-
| 0.
|
| 72 |
-
| 0.
|
| 73 |
-
| 0.
|
| 74 |
-
| 0.
|
| 75 |
-
| 0.
|
| 76 |
|
| 77 |
|
| 78 |
### Framework versions
|
|
|
|
| 16 |
|
| 17 |
This model was trained from scratch on an unknown dataset.
|
| 18 |
It achieves the following results on the evaluation set:
|
| 19 |
+
- Loss: 0.8947
|
| 20 |
+
- Wer: 82.3479
|
| 21 |
+
- Cer: 22.6268
|
| 22 |
|
| 23 |
## Model description
|
| 24 |
|
|
|
|
| 53 |
|
| 54 |
| Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
|
| 55 |
|:-------------:|:-------:|:-----:|:---------------:|:-------:|:-------:|
|
| 56 |
+
| 1.0854 | 1.1013 | 1000 | 1.2534 | 97.5672 | 52.7088 |
|
| 57 |
+
| 0.5859 | 2.2026 | 2000 | 0.8996 | 90.9477 | 48.1097 |
|
| 58 |
+
| 0.3373 | 3.3040 | 3000 | 0.7766 | 87.7699 | 29.9950 |
|
| 59 |
+
| 0.2445 | 4.4053 | 4000 | 0.7662 | 86.6761 | 28.1264 |
|
| 60 |
+
| 0.1548 | 5.5066 | 5000 | 0.7709 | 86.6007 | 27.8748 |
|
| 61 |
+
| 0.1102 | 6.6079 | 6000 | 0.7889 | 86.3178 | 26.2934 |
|
| 62 |
+
| 0.0682 | 7.7093 | 7000 | 0.7991 | 84.4507 | 27.3578 |
|
| 63 |
+
| 0.0647 | 8.8106 | 8000 | 0.8132 | 84.6488 | 25.6262 |
|
| 64 |
+
| 0.0343 | 9.9119 | 9000 | 0.8282 | 84.8279 | 24.6948 |
|
| 65 |
+
| 0.0181 | 11.0132 | 10000 | 0.8396 | 83.8001 | 24.3618 |
|
| 66 |
+
| 0.0117 | 12.1145 | 11000 | 0.8592 | 84.1584 | 24.0030 |
|
| 67 |
+
| 0.0111 | 13.2159 | 12000 | 0.8610 | 83.8378 | 24.3537 |
|
| 68 |
+
| 0.0088 | 14.3172 | 13000 | 0.8743 | 84.0924 | 24.6323 |
|
| 69 |
+
| 0.0112 | 15.4185 | 14000 | 0.8769 | 84.1867 | 24.9344 |
|
| 70 |
+
| 0.0109 | 16.5198 | 15000 | 0.8774 | 84.6770 | 24.6214 |
|
| 71 |
+
| 0.0032 | 17.6211 | 16000 | 0.8810 | 82.6591 | 23.3174 |
|
| 72 |
+
| 0.0017 | 18.7225 | 17000 | 0.8870 | 82.9986 | 22.8532 |
|
| 73 |
+
| 0.0019 | 19.8238 | 18000 | 0.8900 | 82.5083 | 22.6634 |
|
| 74 |
+
| 0.0008 | 20.9251 | 19000 | 0.8924 | 82.4800 | 22.5878 |
|
| 75 |
+
| 0.0006 | 22.0264 | 20000 | 0.8947 | 82.3479 | 22.6268 |
|
| 76 |
|
| 77 |
|
| 78 |
### Framework versions
|
all_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 22.026431718061673,
|
| 3 |
-
"total_flos": 3.
|
| 4 |
-
"train_loss": 0.
|
| 5 |
-
"train_runtime":
|
| 6 |
-
"train_samples_per_second":
|
| 7 |
-
"train_steps_per_second": 0.
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 22.026431718061673,
|
| 3 |
+
"total_flos": 3.4362863729801953e+20,
|
| 4 |
+
"train_loss": 0.2743150826841593,
|
| 5 |
+
"train_runtime": 135533.426,
|
| 6 |
+
"train_samples_per_second": 4.722,
|
| 7 |
+
"train_steps_per_second": 0.148
|
| 8 |
}
|
config.json
CHANGED
|
@@ -53,7 +53,7 @@
|
|
| 53 |
"num_mel_bins": 80,
|
| 54 |
"pad_token_id": 50257,
|
| 55 |
"scale_embedding": false,
|
| 56 |
-
"torch_dtype": "
|
| 57 |
"transformers_version": "4.48.3",
|
| 58 |
"use_cache": true,
|
| 59 |
"use_weighted_layer_sum": false,
|
|
|
|
| 53 |
"num_mel_bins": 80,
|
| 54 |
"pad_token_id": 50257,
|
| 55 |
"scale_embedding": false,
|
| 56 |
+
"torch_dtype": "float16",
|
| 57 |
"transformers_version": "4.48.3",
|
| 58 |
"use_cache": true,
|
| 59 |
"use_weighted_layer_sum": false,
|
generation_config.json
CHANGED
|
@@ -1,36 +1,28 @@
|
|
| 1 |
{
|
| 2 |
"alignment_heads": [
|
| 3 |
[
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
],
|
| 7 |
-
[
|
| 8 |
-
4,
|
| 9 |
-
2
|
| 10 |
],
|
| 11 |
[
|
| 12 |
-
|
| 13 |
-
|
| 14 |
],
|
| 15 |
[
|
| 16 |
-
|
| 17 |
-
|
| 18 |
],
|
| 19 |
[
|
| 20 |
-
|
| 21 |
1
|
| 22 |
],
|
| 23 |
[
|
| 24 |
-
|
| 25 |
-
|
| 26 |
],
|
| 27 |
[
|
| 28 |
-
|
| 29 |
4
|
| 30 |
-
],
|
| 31 |
-
[
|
| 32 |
-
5,
|
| 33 |
-
6
|
| 34 |
]
|
| 35 |
],
|
| 36 |
"begin_suppress_tokens": [
|
|
|
|
| 1 |
{
|
| 2 |
"alignment_heads": [
|
| 3 |
[
|
| 4 |
+
13,
|
| 5 |
+
15
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
],
|
| 7 |
[
|
| 8 |
+
15,
|
| 9 |
+
4
|
| 10 |
],
|
| 11 |
[
|
| 12 |
+
15,
|
| 13 |
+
15
|
| 14 |
],
|
| 15 |
[
|
| 16 |
+
16,
|
| 17 |
1
|
| 18 |
],
|
| 19 |
[
|
| 20 |
+
20,
|
| 21 |
+
0
|
| 22 |
],
|
| 23 |
[
|
| 24 |
+
23,
|
| 25 |
4
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
]
|
| 27 |
],
|
| 28 |
"begin_suppress_tokens": [
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80896de1056c7d3f5720efafc618f3b726ef158f417676f3a48761e98510dc75
|
| 3 |
+
size 962204752
|
train_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 22.026431718061673,
|
| 3 |
-
"total_flos": 3.
|
| 4 |
-
"train_loss": 0.
|
| 5 |
-
"train_runtime":
|
| 6 |
-
"train_samples_per_second":
|
| 7 |
-
"train_steps_per_second": 0.
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 22.026431718061673,
|
| 3 |
+
"total_flos": 3.4362863729801953e+20,
|
| 4 |
+
"train_loss": 0.2743150826841593,
|
| 5 |
+
"train_runtime": 135533.426,
|
| 6 |
+
"train_samples_per_second": 4.722,
|
| 7 |
+
"train_steps_per_second": 0.148
|
| 8 |
}
|
trainer_state.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|