niobures commited on
Commit
abd0a25
·
verified ·
1 Parent(s): 1bd3c1d

Fish Speech, OpenAudio

Browse files
Files changed (36) hide show
  1. .gitattributes +11 -0
  2. fish-speech-gui/releases/v1.4.5/fish-amd64.bin +3 -0
  3. fish-speech-gui/releases/v1.4.5/fish-speech-gui-v1.4.5.zip +3 -0
  4. fish-speech-gui/releases/v1.4.5/fish.exe +3 -0
  5. fish-speech.rs/releases/fish-speech.rs-v0.2.0.zip +3 -0
  6. fish-speech.rs/releases/fish-speech.rs-v0.2.1.zip +3 -0
  7. fish-speech.rs/releases/fish-speech.rs-v0.2.2.1.zip +3 -0
  8. fish-speech.rs/releases/fish-speech.rs-v0.2.2.zip +3 -0
  9. fish-speech.rs/releases/fish-speech.rs-v0.2.3.zip +3 -0
  10. fish-speech.rs/releases/fish-speech.rs-v0.3.0.1.zip +3 -0
  11. fish-speech.rs/releases/fish-speech.rs-v0.3.0.zip +3 -0
  12. fish-speech/releases/fish-speech-v1.4.0.zip +3 -0
  13. fish-speech/releases/fish-speech-v1.4.1.zip +3 -0
  14. fish-speech/releases/fish-speech-v1.4.2.zip +3 -0
  15. fish-speech/releases/fish-speech-v1.4.3.zip +3 -0
  16. fish-speech/releases/fish-speech-v1.5.0.zip +3 -0
  17. fish-speech/releases/fish-speech-v1.5.1.zip +3 -0
  18. openaudio-gguf/.gitattributes +45 -0
  19. openaudio-gguf/README.md +46 -0
  20. openaudio-gguf/codec-bf16.gguf +3 -0
  21. openaudio-gguf/codec-f16.gguf +3 -0
  22. openaudio-gguf/codec-f32.gguf +3 -0
  23. openaudio-gguf/codec-q2_k.gguf +3 -0
  24. openaudio-gguf/codec-q3_k_m.gguf +3 -0
  25. openaudio-gguf/codec-q4_k_m.gguf +3 -0
  26. openaudio-gguf/codec-q5_k_m.gguf +3 -0
  27. openaudio-gguf/codec-q6_k.gguf +3 -0
  28. openaudio-gguf/samples/audio1.wav +3 -0
  29. openaudio-gguf/samples/audio2.wav +3 -0
  30. openaudio-s1-mini/.gitattributes +35 -0
  31. openaudio-s1-mini/README.md +92 -0
  32. openaudio-s1-mini/codec.pth +3 -0
  33. openaudio-s1-mini/config.json +32 -0
  34. openaudio-s1-mini/model.pth +3 -0
  35. openaudio-s1-mini/special_tokens.json +0 -0
  36. openaudio-s1-mini/tokenizer.tiktoken +0 -0
.gitattributes CHANGED
@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ fish-speech-gui/releases/v1.4.5/fish.exe filter=lfs diff=lfs merge=lfs -text
37
+ openaudio-gguf/codec-bf16.gguf filter=lfs diff=lfs merge=lfs -text
38
+ openaudio-gguf/codec-f16.gguf filter=lfs diff=lfs merge=lfs -text
39
+ openaudio-gguf/codec-f32.gguf filter=lfs diff=lfs merge=lfs -text
40
+ openaudio-gguf/codec-q2_k.gguf filter=lfs diff=lfs merge=lfs -text
41
+ openaudio-gguf/codec-q3_k_m.gguf filter=lfs diff=lfs merge=lfs -text
42
+ openaudio-gguf/codec-q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text
43
+ openaudio-gguf/codec-q5_k_m.gguf filter=lfs diff=lfs merge=lfs -text
44
+ openaudio-gguf/codec-q6_k.gguf filter=lfs diff=lfs merge=lfs -text
45
+ openaudio-gguf/samples/audio1.wav filter=lfs diff=lfs merge=lfs -text
46
+ openaudio-gguf/samples/audio2.wav filter=lfs diff=lfs merge=lfs -text
fish-speech-gui/releases/v1.4.5/fish-amd64.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70735219758ba10f206907d0d56cac07ebba8dfdb34ed36a19adbfe5611e93d2
3
+ size 73944982
fish-speech-gui/releases/v1.4.5/fish-speech-gui-v1.4.5.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ee695e5c1f8ac0d1cf90dcce24f8bdbf24ac4d2d767ef1783836cb62019cf44
3
+ size 5790712
fish-speech-gui/releases/v1.4.5/fish.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3857a9c96bbf01909d18c466591ee4e897c6840cb8038f759eb8ac126656f9f0
3
+ size 46259712
fish-speech.rs/releases/fish-speech.rs-v0.2.0.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2e42861abc754f5900bf72300926d2eceedbaf7a10e72664e38212b73ede2f9
3
+ size 2056531
fish-speech.rs/releases/fish-speech.rs-v0.2.1.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a108c7b6160291f61ec5fa15125e6cc74128bca25073ab64bb11fa5bea45e75b
3
+ size 2057262
fish-speech.rs/releases/fish-speech.rs-v0.2.2.1.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7daf3f8f62ade125de38edf24c980eee569ad9e6859d891403425a53098649b5
3
+ size 2057828
fish-speech.rs/releases/fish-speech.rs-v0.2.2.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:579e57584b2b208cba19baa37aa05714e1135cb07d541ff003202f8f80a33cf3
3
+ size 2057268
fish-speech.rs/releases/fish-speech.rs-v0.2.3.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a92987a8c4bf511be15344a4fe918c9fff48fc4f0915916ca49d02fa6e5414d3
3
+ size 2057976
fish-speech.rs/releases/fish-speech.rs-v0.3.0.1.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaa3813bc3f53113d7ef2e648ed13c17cdaa53af1fdb42017912571c17e2424a
3
+ size 2058697
fish-speech.rs/releases/fish-speech.rs-v0.3.0.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aa614a8186c40b226bf51460eaed45feaf17143050628329c8f078dd1074886
3
+ size 2057318
fish-speech/releases/fish-speech-v1.4.0.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be6c9466ad5ffc7e334fda81442c8818fd8090ffef47285b9dae15dcd4dcf480
3
+ size 606436
fish-speech/releases/fish-speech-v1.4.1.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beeac30cdc5ceef826faa445c5a5fe3813ba48b6fcf5f35a632b6c4ed92ce6d1
3
+ size 608016
fish-speech/releases/fish-speech-v1.4.2.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77c3e85969e21f1d1cf2789e6a16c6afd6fd0960724f1214ce7907f4c059e608
3
+ size 627081
fish-speech/releases/fish-speech-v1.4.3.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bb033c62c63298d0d357f35bf34d184b8284ee35baf4ff2a6c7427440ef6a3f
3
+ size 926563
fish-speech/releases/fish-speech-v1.5.0.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3022b240d5be41943bb300ad5d7bd8e9fa32e81aa5e37ad94f5a51aa532276ec
3
+ size 935374
fish-speech/releases/fish-speech-v1.5.1.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0161ab223786f0ac31dbdddeb4b423d19fe027be6dcfe7033c16843df0a320b7
3
+ size 887692
openaudio-gguf/.gitattributes ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ codec-bf16.gguf filter=lfs diff=lfs merge=lfs -text
37
+ codec-f16.gguf filter=lfs diff=lfs merge=lfs -text
38
+ codec-f32.gguf filter=lfs diff=lfs merge=lfs -text
39
+ codec-q2_k.gguf filter=lfs diff=lfs merge=lfs -text
40
+ codec-q3_k_m.gguf filter=lfs diff=lfs merge=lfs -text
41
+ codec-q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text
42
+ codec-q5_k_m.gguf filter=lfs diff=lfs merge=lfs -text
43
+ codec-q6_k.gguf filter=lfs diff=lfs merge=lfs -text
44
+ samples/audio1.wav filter=lfs diff=lfs merge=lfs -text
45
+ samples/audio2.wav filter=lfs diff=lfs merge=lfs -text
openaudio-gguf/README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-sa-4.0
3
+ base_model:
4
+ - fishaudio/openaudio-s1-mini
5
+ pipeline_tag: text-to-speech
6
+ tags:
7
+ - gguf-connector
8
+ ---
9
+ ## gguf quantized version of openaudio
10
+ - base model from [fishaudio](https://huggingface.co/fishaudio)
11
+ - text-to-speech synthesis
12
+
13
+ ### **run it with gguf-connector**
14
+ ```
15
+ ggc o2
16
+ ```
17
+
18
+ ![screenshot](https://raw.githubusercontent.com/calcuis/text-to-speech-synthesis-2/master/demo.png)
19
+
20
+ | Prompt | Audio Sample |
21
+ |--------|---------------|
22
+ |`Hey Connector, why your appearance looks so stupid?`<br/>`Oh, really? maybe I ate too much smart beans.`<br/>`Wow. Amazing (laughing).`<br/>`Let's go to get some more smart beans and you will become stupid as well.`<br/> | 🎧 **audio-sample-1**<br><audio controls src="https://huggingface.co/calcuis/openaudio-gguf/resolve/main/samples%5Caudio1.wav"></audio> |
23
+ |`Suddenly the plane's engines began failing, and the pilot says there isn't much time, and he'll keep the plane in the air as long as he can, and told his two passengers to take the only two parachutes on board and bail out. The world's smartest man immediately took a parachute and said "I'm the world's smartest man! The world needs me, so I can't die here!", and then jumped out of the plane. The pilot tells the hippie to hurry up and take the other parachute, because there aren't any more. And the hippie says "Relax man. We'll be fine. The world's smartest man took my backpack."`<br/> | 🎧 **audio-sample-2**<br><audio controls src="https://huggingface.co/calcuis/openaudio-gguf/resolve/main/samples%5Caudio2.wav"></audio> |
24
+
25
+ ### **review/reference**
26
+ - simply execute the command (`ggc o2`) above in console/terminal
27
+ - opt a `codec` gguf file in the current directory to interact with (see example below)
28
+
29
+ >
30
+ >GGUF file(s) available. Select which one for codec:
31
+ >
32
+ >1. codec-bf16.gguf
33
+ >2. codec-f16.gguf
34
+ >3. codec-f32.gguf
35
+ >4. codec-q2_k.gguf
36
+ >5. codec-q3_k_m.gguf
37
+ >6. codec-q4_k_m.gguf
38
+ >7. codec-q5_k_m.gguf
39
+ >8. codec-q6_k.gguf
40
+ >
41
+ >Enter your choice (1 to 8): _
42
+ >
43
+
44
+ - note: tokenizer and model will be pulled to `models/fish` automatically during the first launch
45
+ - then run it entirely offline; i.e., from local URL: http://127.0.0.1:7860 with lazy webui
46
+ - gguf-connector ([pypi](https://pypi.org/project/gguf-connector))
openaudio-gguf/codec-bf16.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:261394a822e17e24dd24a222c4511fb560e72efa3125efc1760a7577d31a0bd2
3
+ size 1390445280
openaudio-gguf/codec-f16.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc386742876414b1810baa790fb2b545960b8932d9f0ba27396575fece7f4503
3
+ size 1390445280
openaudio-gguf/codec-f32.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fae3c0fa7bd162dd45751502929da4fc7e6800aa38e43cd816f294ff9f15348
3
+ size 2780022496
openaudio-gguf/codec-q2_k.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:046b2dc7ddc9292929210e886a7009ca7007b924a99ed9efebc1499fe0a64a1e
3
+ size 878543584
openaudio-gguf/codec-q3_k_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da0575a707e08b74d2c4a2c1dd1647cf86de4adf2e66e461d27ac5dc76aa35f5
3
+ size 909640416
openaudio-gguf/codec-q4_k_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16109c1c80cfeda62e78ec55d1f69c947a99119b5dba95eba0186c06a61d8984
3
+ size 950305504
openaudio-gguf/codec-q5_k_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86fcb3dd9b5193b6ba06843ed2beddcd3534a84d751bb464fbb2a1f87131fad4
3
+ size 988578528
openaudio-gguf/codec-q6_k.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12c1dee9265853be3fceed63b33d25dd9eb4f2496d2f6e8fea5a0788e499e5cd
3
+ size 1029243616
openaudio-gguf/samples/audio1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:958951b601f18b74a4a896cdfac27ba00359600cf33756792d1297811e89df7d
3
+ size 835628
openaudio-gguf/samples/audio2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6fa0bc4cf6a49d45b264532850239f41ef9bed407fd3a154b6d79c8c5f5a393
3
+ size 2396204
openaudio-s1-mini/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
openaudio-s1-mini/README.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - text-to-speech
4
+ license: cc-by-nc-sa-4.0
5
+ language:
6
+ - zh
7
+ - en
8
+ - de
9
+ - ja
10
+ - fr
11
+ - es
12
+ - ko
13
+ - ar
14
+ - nl
15
+ - ru
16
+ - it
17
+ - pl
18
+ - pt
19
+ pipeline_tag: text-to-speech
20
+ inference: false
21
+ extra_gated_prompt: >-
22
+ You agree to not use the model to generate contents that violate DMCA or local
23
+ laws.
24
+ extra_gated_fields:
25
+ Country: country
26
+ Specific date: date_picker
27
+ I agree to use this model for non-commercial use ONLY: checkbox
28
+ ---
29
+
30
+
31
+ # OpenAudio S1
32
+
33
+ **OpenAudio S1** is a leading text-to-speech (TTS) model trained on more than 2 million hours of audio data in multiple languages.
34
+
35
+ Supported languages:
36
+ - English (en)
37
+ - Chinese (zh)
38
+ - Japanese (ja)
39
+ - German (de)
40
+ - French (fr)
41
+ - Spanish (es)
42
+ - Korean (ko)
43
+ - Arabic (ar)
44
+ - Russian (ru)
45
+ - Dutch (nl)
46
+ - Italian (it)
47
+ - Polish (pl)
48
+ - Portuguese (pt)
49
+
50
+ Please refer to [Fish Speech Github](https://github.com/fishaudio/fish-speech) for more info.
51
+ Demo available at [Fish Audio Playground](https://fish.audio).
52
+ Visit the [OpenAudio website](https://openaudio.com) for blog & tech report.
53
+
54
+ ## Emotion and Tone Support
55
+
56
+ OpenAudio S1 supports a variety of emotional, tone, and special markers to enhance speech synthesis:
57
+
58
+ **1. Emotional markers:**
59
+ (angry) (sad) (disdainful) (excited) (surprised) (satisfied) (unhappy) (anxious) (hysterical) (delighted) (scared) (worried) (indifferent) (upset) (impatient) (nervous) (guilty) (scornful) (frustrated) (depressed) (panicked) (furious) (empathetic) (embarrassed) (reluctant) (disgusted) (keen) (moved) (proud) (relaxed) (grateful) (confident) (interested) (curious) (confused) (joyful) (disapproving) (negative) (denying) (astonished) (serious) (sarcastic) (conciliative) (comforting) (sincere) (sneering) (hesitating) (yielding) (painful) (awkward) (amused)
60
+
61
+ **2. Tone markers:**
62
+ (in a hurry tone) (shouting) (screaming) (whispering) (soft tone)
63
+
64
+ **3. Special markers:**
65
+ (laughing) (chuckling) (sobbing) (crying loudly) (sighing) (panting) (groaning) (crowd laughing) (background laughter) (audience laughing)
66
+
67
+ **Special markers with corresponding onomatopoeia:**
68
+ - Laughing: Ha,ha,ha
69
+ - Chuckling: Hmm,hmm
70
+
71
+ ## Model Variants and Performance
72
+
73
+ OpenAudio S1 includes the following models:
74
+ - **S1 (4B, proprietary):** The full-sized model.
75
+ - **S1-mini (0.5B):** A distilled version of S1.
76
+
77
+ Both S1 and S1-mini incorporate online Reinforcement Learning from Human Feedback (RLHF).
78
+
79
+ **Seed TTS Eval Metrics (English, auto eval, based on OpenAI gpt-4o-transcribe, speaker distance using Revai/pyannote-wespeaker-voxceleb-resnet34-LM):**
80
+
81
+ - **S1:**
82
+ - WER (Word Error Rate): **0.008**
83
+ - CER (Character Error Rate): **0.004**
84
+ - Distance: **0.332**
85
+ - **S1-mini:**
86
+ - WER (Word Error Rate): **0.011**
87
+ - CER (Character Error Rate): **0.005**
88
+ - Distance: **0.380**
89
+
90
+ ## License
91
+
92
+ This model is permissively licensed under the CC-BY-NC-SA-4.0 license.
openaudio-s1-mini/codec.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74fc41c5a7151c6f350af8bd7e5d6e3accfcc7f3dfbfac23afd35af07052bb2f
3
+ size 1871099728
openaudio-s1-mini/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_o_bias": false,
3
+ "attention_qk_norm": true,
4
+ "attention_qkv_bias": false,
5
+ "codebook_size": 4096,
6
+ "dim": 1024,
7
+ "dropout": 0.0,
8
+ "fast_attention_o_bias": false,
9
+ "fast_attention_qk_norm": false,
10
+ "fast_attention_qkv_bias": false,
11
+ "fast_dim": 1024,
12
+ "fast_head_dim": 64,
13
+ "fast_intermediate_size": 3072,
14
+ "fast_n_head": 16,
15
+ "fast_n_local_heads": 8,
16
+ "head_dim": 128,
17
+ "initializer_range": 0.03125,
18
+ "intermediate_size": 3072,
19
+ "max_seq_len": 8192,
20
+ "model_type": "dual_ar",
21
+ "n_fast_layer": 4,
22
+ "n_head": 16,
23
+ "n_layer": 28,
24
+ "n_local_heads": 8,
25
+ "norm_eps": 1e-06,
26
+ "num_codebooks": 10,
27
+ "rope_base": 1000000,
28
+ "scale_codebook_embeddings": true,
29
+ "tie_word_embeddings": false,
30
+ "use_gradient_checkpointing": true,
31
+ "vocab_size": 155776
32
+ }
openaudio-s1-mini/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e59be7dc6714040dce3cde1f41e730c2f0daa5339785b1cd3b60041208c35e6
3
+ size 1735122974
openaudio-s1-mini/special_tokens.json ADDED
The diff for this file is too large to render. See raw diff
 
openaudio-s1-mini/tokenizer.tiktoken ADDED
The diff for this file is too large to render. See raw diff