kiritan commited on
Commit
965a4ed
·
verified ·
1 Parent(s): 417c0ce

训练结束,上传最终模型

Browse files
README.md CHANGED
@@ -16,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 1.0545
20
- - Wer: 96.5771
21
- - Cer: 54.8789
22
 
23
  ## Model description
24
 
@@ -53,26 +53,26 @@ The following hyperparameters were used during training:
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
55
  |:-------------:|:-------:|:-----:|:---------------:|:-------:|:-------:|
56
- | 0.9609 | 1.1013 | 1000 | 1.1190 | 99.2079 | 98.0002 |
57
- | 0.6483 | 2.2026 | 2000 | 0.8776 | 98.7082 | 82.7038 |
58
- | 0.4333 | 3.3040 | 3000 | 0.8139 | 97.8689 | 67.8442 |
59
- | 0.3453 | 4.4053 | 4000 | 0.7950 | 98.0481 | 68.7091 |
60
- | 0.2517 | 5.5066 | 5000 | 0.8068 | 96.9448 | 64.5243 |
61
- | 0.1854 | 6.6079 | 6000 | 0.8310 | 97.9915 | 73.3730 |
62
- | 0.1173 | 7.7093 | 7000 | 0.8566 | 97.6426 | 64.1145 |
63
- | 0.1049 | 8.8106 | 8000 | 0.8806 | 97.7275 | 70.6504 |
64
- | 0.0566 | 9.9119 | 9000 | 0.9025 | 97.7935 | 66.4983 |
65
- | 0.037 | 11.0132 | 10000 | 0.9284 | 97.5389 | 63.1154 |
66
- | 0.0139 | 12.1145 | 11000 | 0.9458 | 97.0297 | 60.9058 |
67
- | 0.013 | 13.2159 | 12000 | 0.9624 | 96.8223 | 57.8806 |
68
- | 0.008 | 14.3172 | 13000 | 0.9800 | 96.7185 | 57.1280 |
69
- | 0.0062 | 15.4185 | 14000 | 0.9948 | 96.6714 | 55.3007 |
70
- | 0.0044 | 16.5198 | 15000 | 1.0088 | 96.6808 | 57.2599 |
71
- | 0.0034 | 17.6211 | 16000 | 1.0242 | 96.5959 | 55.2440 |
72
- | 0.0029 | 18.7225 | 17000 | 1.0367 | 96.5865 | 55.6945 |
73
- | 0.0022 | 19.8238 | 18000 | 1.0447 | 96.6148 | 55.5518 |
74
- | 0.0021 | 20.9251 | 19000 | 1.0507 | 96.5771 | 55.5891 |
75
- | 0.0017 | 22.0264 | 20000 | 1.0545 | 96.5771 | 54.8789 |
76
 
77
 
78
  ### Framework versions
 
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.8947
20
+ - Wer: 82.3479
21
+ - Cer: 22.6268
22
 
23
  ## Model description
24
 
 
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
55
  |:-------------:|:-------:|:-----:|:---------------:|:-------:|:-------:|
56
+ | 1.0854 | 1.1013 | 1000 | 1.2534 | 97.5672 | 52.7088 |
57
+ | 0.5859 | 2.2026 | 2000 | 0.8996 | 90.9477 | 48.1097 |
58
+ | 0.3373 | 3.3040 | 3000 | 0.7766 | 87.7699 | 29.9950 |
59
+ | 0.2445 | 4.4053 | 4000 | 0.7662 | 86.6761 | 28.1264 |
60
+ | 0.1548 | 5.5066 | 5000 | 0.7709 | 86.6007 | 27.8748 |
61
+ | 0.1102 | 6.6079 | 6000 | 0.7889 | 86.3178 | 26.2934 |
62
+ | 0.0682 | 7.7093 | 7000 | 0.7991 | 84.4507 | 27.3578 |
63
+ | 0.0647 | 8.8106 | 8000 | 0.8132 | 84.6488 | 25.6262 |
64
+ | 0.0343 | 9.9119 | 9000 | 0.8282 | 84.8279 | 24.6948 |
65
+ | 0.0181 | 11.0132 | 10000 | 0.8396 | 83.8001 | 24.3618 |
66
+ | 0.0117 | 12.1145 | 11000 | 0.8592 | 84.1584 | 24.0030 |
67
+ | 0.0111 | 13.2159 | 12000 | 0.8610 | 83.8378 | 24.3537 |
68
+ | 0.0088 | 14.3172 | 13000 | 0.8743 | 84.0924 | 24.6323 |
69
+ | 0.0112 | 15.4185 | 14000 | 0.8769 | 84.1867 | 24.9344 |
70
+ | 0.0109 | 16.5198 | 15000 | 0.8774 | 84.6770 | 24.6214 |
71
+ | 0.0032 | 17.6211 | 16000 | 0.8810 | 82.6591 | 23.3174 |
72
+ | 0.0017 | 18.7225 | 17000 | 0.8870 | 82.9986 | 22.8532 |
73
+ | 0.0019 | 19.8238 | 18000 | 0.8900 | 82.5083 | 22.6634 |
74
+ | 0.0008 | 20.9251 | 19000 | 0.8924 | 82.4800 | 22.5878 |
75
+ | 0.0006 | 22.0264 | 20000 | 0.8947 | 82.3479 | 22.6268 |
76
 
77
 
78
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 22.026431718061673,
3
- "total_flos": 3.376341480070185e+19,
4
- "train_loss": 0.2651471851706505,
5
- "train_runtime": 28635.2259,
6
- "train_samples_per_second": 22.35,
7
- "train_steps_per_second": 0.698
8
  }
 
1
  {
2
  "epoch": 22.026431718061673,
3
+ "total_flos": 3.4362863729801953e+20,
4
+ "train_loss": 0.2743150826841593,
5
+ "train_runtime": 135533.426,
6
+ "train_samples_per_second": 4.722,
7
+ "train_steps_per_second": 0.148
8
  }
config.json CHANGED
@@ -53,7 +53,7 @@
53
  "num_mel_bins": 80,
54
  "pad_token_id": 50257,
55
  "scale_embedding": false,
56
- "torch_dtype": "bfloat16",
57
  "transformers_version": "4.48.3",
58
  "use_cache": true,
59
  "use_weighted_layer_sum": false,
 
53
  "num_mel_bins": 80,
54
  "pad_token_id": 50257,
55
  "scale_embedding": false,
56
+ "torch_dtype": "float16",
57
  "transformers_version": "4.48.3",
58
  "use_cache": true,
59
  "use_weighted_layer_sum": false,
generation_config.json CHANGED
@@ -1,36 +1,28 @@
1
  {
2
  "alignment_heads": [
3
  [
4
- 3,
5
- 1
6
- ],
7
- [
8
- 4,
9
- 2
10
  ],
11
  [
12
- 4,
13
- 3
14
  ],
15
  [
16
- 4,
17
- 7
18
  ],
19
  [
20
- 5,
21
  1
22
  ],
23
  [
24
- 5,
25
- 2
26
  ],
27
  [
28
- 5,
29
  4
30
- ],
31
- [
32
- 5,
33
- 6
34
  ]
35
  ],
36
  "begin_suppress_tokens": [
 
1
  {
2
  "alignment_heads": [
3
  [
4
+ 13,
5
+ 15
 
 
 
 
6
  ],
7
  [
8
+ 15,
9
+ 4
10
  ],
11
  [
12
+ 15,
13
+ 15
14
  ],
15
  [
16
+ 16,
17
  1
18
  ],
19
  [
20
+ 20,
21
+ 0
22
  ],
23
  [
24
+ 23,
25
  4
 
 
 
 
26
  ]
27
  ],
28
  "begin_suppress_tokens": [
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:069fbc2b96ff55558de2b6621d0406b4fbcbc7edffe8d2472bb8b992e0abdb14
3
- size 962205216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80896de1056c7d3f5720efafc618f3b726ef158f417676f3a48761e98510dc75
3
+ size 962204752
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 22.026431718061673,
3
- "total_flos": 3.376341480070185e+19,
4
- "train_loss": 0.2651471851706505,
5
- "train_runtime": 28635.2259,
6
- "train_samples_per_second": 22.35,
7
- "train_steps_per_second": 0.698
8
  }
 
1
  {
2
  "epoch": 22.026431718061673,
3
+ "total_flos": 3.4362863729801953e+20,
4
+ "train_loss": 0.2743150826841593,
5
+ "train_runtime": 135533.426,
6
+ "train_samples_per_second": 4.722,
7
+ "train_steps_per_second": 0.148
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff