训练结束，上传最终模型

Browse files

Files changed (7) hide show

README.md +23 -23
all_results.json +5 -5
config.json +1 -1
generation_config.json +10 -18
model.safetensors +2 -2
train_results.json +5 -5
trainer_state.json +0 -0

README.md CHANGED Viewed

@@ -16,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
 This model was trained from scratch on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.0545
-- Wer: 96.5771
-- Cer: 54.8789
 ## Model description
@@ -53,26 +53,26 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch   | Step  | Validation Loss | Wer     | Cer     |
 |:-------------:|:-------:|:-----:|:---------------:|:-------:|:-------:|
-| 0.9609        | 1.1013  | 1000  | 1.1190          | 99.2079 | 98.0002 |
-| 0.6483        | 2.2026  | 2000  | 0.8776          | 98.7082 | 82.7038 |
-| 0.4333        | 3.3040  | 3000  | 0.8139          | 97.8689 | 67.8442 |
-| 0.3453        | 4.4053  | 4000  | 0.7950          | 98.0481 | 68.7091 |
-| 0.2517        | 5.5066  | 5000  | 0.8068          | 96.9448 | 64.5243 |
-| 0.1854        | 6.6079  | 6000  | 0.8310          | 97.9915 | 73.3730 |
-| 0.1173        | 7.7093  | 7000  | 0.8566          | 97.6426 | 64.1145 |
-| 0.1049        | 8.8106  | 8000  | 0.8806          | 97.7275 | 70.6504 |
-| 0.0566        | 9.9119  | 9000  | 0.9025          | 97.7935 | 66.4983 |
-| 0.037         | 11.0132 | 10000 | 0.9284          | 97.5389 | 63.1154 |
-| 0.0139        | 12.1145 | 11000 | 0.9458          | 97.0297 | 60.9058 |
-| 0.013         | 13.2159 | 12000 | 0.9624          | 96.8223 | 57.8806 |
-| 0.008         | 14.3172 | 13000 | 0.9800          | 96.7185 | 57.1280 |
-| 0.0062        | 15.4185 | 14000 | 0.9948          | 96.6714 | 55.3007 |
-| 0.0044        | 16.5198 | 15000 | 1.0088          | 96.6808 | 57.2599 |
-| 0.0034        | 17.6211 | 16000 | 1.0242          | 96.5959 | 55.2440 |
-| 0.0029        | 18.7225 | 17000 | 1.0367          | 96.5865 | 55.6945 |
-| 0.0022        | 19.8238 | 18000 | 1.0447          | 96.6148 | 55.5518 |
-| 0.0021        | 20.9251 | 19000 | 1.0507          | 96.5771 | 55.5891 |
-| 0.0017        | 22.0264 | 20000 | 1.0545          | 96.5771 | 54.8789 |
 ### Framework versions

 This model was trained from scratch on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.8947
+- Wer: 82.3479
+- Cer: 22.6268
 ## Model description
 | Training Loss | Epoch   | Step  | Validation Loss | Wer     | Cer     |
 |:-------------:|:-------:|:-----:|:---------------:|:-------:|:-------:|
+| 1.0854        | 1.1013  | 1000  | 1.2534          | 97.5672 | 52.7088 |
+| 0.5859        | 2.2026  | 2000  | 0.8996          | 90.9477 | 48.1097 |
+| 0.3373        | 3.3040  | 3000  | 0.7766          | 87.7699 | 29.9950 |
+| 0.2445        | 4.4053  | 4000  | 0.7662          | 86.6761 | 28.1264 |
+| 0.1548        | 5.5066  | 5000  | 0.7709          | 86.6007 | 27.8748 |
+| 0.1102        | 6.6079  | 6000  | 0.7889          | 86.3178 | 26.2934 |
+| 0.0682        | 7.7093  | 7000  | 0.7991          | 84.4507 | 27.3578 |
+| 0.0647        | 8.8106  | 8000  | 0.8132          | 84.6488 | 25.6262 |
+| 0.0343        | 9.9119  | 9000  | 0.8282          | 84.8279 | 24.6948 |
+| 0.0181        | 11.0132 | 10000 | 0.8396          | 83.8001 | 24.3618 |
+| 0.0117        | 12.1145 | 11000 | 0.8592          | 84.1584 | 24.0030 |
+| 0.0111        | 13.2159 | 12000 | 0.8610          | 83.8378 | 24.3537 |
+| 0.0088        | 14.3172 | 13000 | 0.8743          | 84.0924 | 24.6323 |
+| 0.0112        | 15.4185 | 14000 | 0.8769          | 84.1867 | 24.9344 |
+| 0.0109        | 16.5198 | 15000 | 0.8774          | 84.6770 | 24.6214 |
+| 0.0032        | 17.6211 | 16000 | 0.8810          | 82.6591 | 23.3174 |
+| 0.0017        | 18.7225 | 17000 | 0.8870          | 82.9986 | 22.8532 |
+| 0.0019        | 19.8238 | 18000 | 0.8900          | 82.5083 | 22.6634 |
+| 0.0008        | 20.9251 | 19000 | 0.8924          | 82.4800 | 22.5878 |
+| 0.0006        | 22.0264 | 20000 | 0.8947          | 82.3479 | 22.6268 |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 22.026431718061673,
-    "total_flos": 3.376341480070185e+19,
-    "train_loss": 0.2651471851706505,
-    "train_runtime": 28635.2259,
-    "train_samples_per_second": 22.35,
-    "train_steps_per_second": 0.698
 }

 {
     "epoch": 22.026431718061673,
+    "total_flos": 3.4362863729801953e+20,
+    "train_loss": 0.2743150826841593,
+    "train_runtime": 135533.426,
+    "train_samples_per_second": 4.722,
+    "train_steps_per_second": 0.148
 }

config.json CHANGED Viewed

@@ -53,7 +53,7 @@
   "num_mel_bins": 80,
   "pad_token_id": 50257,
   "scale_embedding": false,
-  "torch_dtype": "bfloat16",
   "transformers_version": "4.48.3",
   "use_cache": true,
   "use_weighted_layer_sum": false,

   "num_mel_bins": 80,
   "pad_token_id": 50257,
   "scale_embedding": false,
+  "torch_dtype": "float16",
   "transformers_version": "4.48.3",
   "use_cache": true,
   "use_weighted_layer_sum": false,

generation_config.json CHANGED Viewed

@@ -1,36 +1,28 @@
 {
   "alignment_heads": [
     [
-      3,
-      1
-    ],
-    [
-      4,
-      2
     ],
     [
-      4,
-      3
     ],
     [
-      4,
-      7
     ],
     [
-      5,
       1
     ],
     [
-      5,
-      2
     ],
     [
-      5,
       4
-    ],
-    [
-      5,
-      6
     ]
   ],
   "begin_suppress_tokens": [

 {
   "alignment_heads": [
     [
+      13,
+      15
     ],
     [
+      15,
+      4
     ],
     [
+      15,
+      15
     ],
     [
+      16,
       1
     ],
     [
+      20,
+      0
     ],
     [
+      23,
       4
     ]
   ],
   "begin_suppress_tokens": [

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:069fbc2b96ff55558de2b6621d0406b4fbcbc7edffe8d2472bb8b992e0abdb14
-size 962205216

 version https://git-lfs.github.com/spec/v1
+oid sha256:80896de1056c7d3f5720efafc618f3b726ef158f417676f3a48761e98510dc75
+size 962204752

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 22.026431718061673,
-    "total_flos": 3.376341480070185e+19,
-    "train_loss": 0.2651471851706505,
-    "train_runtime": 28635.2259,
-    "train_samples_per_second": 22.35,
-    "train_steps_per_second": 0.698
 }

 {
     "epoch": 22.026431718061673,
+    "total_flos": 3.4362863729801953e+20,
+    "train_loss": 0.2743150826841593,
+    "train_runtime": 135533.426,
+    "train_samples_per_second": 4.722,
+    "train_steps_per_second": 0.148
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff