lainlives commited on
Commit
c515baa
·
verified ·
1 Parent(s): a29b3ac

Upload 327 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. bark/__init__.py +2 -0
  3. bark/__main__.py +3 -0
  4. bark/__pycache__/__init__.cpython-313.pyc +0 -0
  5. bark/__pycache__/api.cpython-313.pyc +0 -0
  6. bark/__pycache__/generation.cpython-313.pyc +0 -0
  7. bark/__pycache__/model.cpython-313.pyc +0 -0
  8. bark/__pycache__/model_fine.cpython-313.pyc +0 -0
  9. bark/__pycache__/settings.cpython-313.pyc +0 -0
  10. bark/api.py +158 -0
  11. bark/assets/prompts/announcer.npz +3 -0
  12. bark/assets/prompts/custom/evil.npz +3 -0
  13. bark/assets/prompts/custom/readme.md +1 -0
  14. bark/assets/prompts/de_speaker_0.npz +3 -0
  15. bark/assets/prompts/de_speaker_1.npz +3 -0
  16. bark/assets/prompts/de_speaker_2.npz +3 -0
  17. bark/assets/prompts/de_speaker_3.npz +3 -0
  18. bark/assets/prompts/de_speaker_4.npz +3 -0
  19. bark/assets/prompts/de_speaker_5.npz +3 -0
  20. bark/assets/prompts/de_speaker_6.npz +3 -0
  21. bark/assets/prompts/de_speaker_7.npz +3 -0
  22. bark/assets/prompts/de_speaker_8.npz +3 -0
  23. bark/assets/prompts/de_speaker_9.npz +3 -0
  24. bark/assets/prompts/en_speaker_0.npz +3 -0
  25. bark/assets/prompts/en_speaker_1.npz +3 -0
  26. bark/assets/prompts/en_speaker_2.npz +3 -0
  27. bark/assets/prompts/en_speaker_3.npz +3 -0
  28. bark/assets/prompts/en_speaker_4.npz +3 -0
  29. bark/assets/prompts/en_speaker_5.npz +3 -0
  30. bark/assets/prompts/en_speaker_6.npz +3 -0
  31. bark/assets/prompts/en_speaker_7.npz +3 -0
  32. bark/assets/prompts/en_speaker_8.npz +3 -0
  33. bark/assets/prompts/en_speaker_9.npz +3 -0
  34. bark/assets/prompts/es_speaker_0.npz +3 -0
  35. bark/assets/prompts/es_speaker_1.npz +3 -0
  36. bark/assets/prompts/es_speaker_2.npz +3 -0
  37. bark/assets/prompts/es_speaker_3.npz +3 -0
  38. bark/assets/prompts/es_speaker_4.npz +3 -0
  39. bark/assets/prompts/es_speaker_5.npz +3 -0
  40. bark/assets/prompts/es_speaker_6.npz +3 -0
  41. bark/assets/prompts/es_speaker_7.npz +3 -0
  42. bark/assets/prompts/es_speaker_8.npz +3 -0
  43. bark/assets/prompts/es_speaker_9.npz +3 -0
  44. bark/assets/prompts/fr_speaker_0.npz +3 -0
  45. bark/assets/prompts/fr_speaker_1.npz +3 -0
  46. bark/assets/prompts/fr_speaker_2.npz +3 -0
  47. bark/assets/prompts/fr_speaker_3.npz +3 -0
  48. bark/assets/prompts/fr_speaker_4.npz +3 -0
  49. bark/assets/prompts/fr_speaker_5.npz +3 -0
  50. bark/assets/prompts/fr_speaker_6.npz +3 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ outputs/02-25-2026/final_21-53-52_s1.wav filter=lfs diff=lfs merge=lfs -text
37
+ outputs/02-25-2026/final_21-57-01_s1.wav filter=lfs diff=lfs merge=lfs -text
38
+ outputs/02-25-2026/final_22-01-33_s1.wav filter=lfs diff=lfs merge=lfs -text
39
+ outputs/02-25-2026/final_22-05-17_s1.wav filter=lfs diff=lfs merge=lfs -text
bark/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .api import generate_audio, text_to_semantic, semantic_to_waveform, save_as_prompt
2
+ from .generation import SAMPLE_RATE, preload_models
bark/__main__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .cli import cli
2
+
3
+ cli()
bark/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (340 Bytes). View file
 
bark/__pycache__/api.cpython-313.pyc ADDED
Binary file (4.3 kB). View file
 
bark/__pycache__/generation.cpython-313.pyc ADDED
Binary file (37.2 kB). View file
 
bark/__pycache__/model.cpython-313.pyc ADDED
Binary file (14 kB). View file
 
bark/__pycache__/model_fine.cpython-313.pyc ADDED
Binary file (10.3 kB). View file
 
bark/__pycache__/settings.cpython-313.pyc ADDED
Binary file (743 Bytes). View file
 
bark/api.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Optional, Union
2
+
3
+ import numpy as np
4
+
5
+ from .generation import codec_decode, generate_coarse, generate_fine, generate_text_semantic
6
+
7
+
8
+ def generate_with_settings(text_prompt, semantic_temp=0.6, eos_p=0.2, coarse_temp=0.7, fine_temp=0.5, voice_name=None, output_full=False):
9
+
10
+ # generation with more control
11
+ x_semantic = generate_text_semantic(
12
+ text_prompt,
13
+ history_prompt=voice_name,
14
+ temp=semantic_temp,
15
+ min_eos_p = eos_p,
16
+ use_kv_caching=True
17
+ )
18
+
19
+ x_coarse_gen = generate_coarse(
20
+ x_semantic,
21
+ history_prompt=voice_name,
22
+ temp=coarse_temp,
23
+ use_kv_caching=True
24
+ )
25
+ x_fine_gen = generate_fine(
26
+ x_coarse_gen,
27
+ history_prompt=voice_name,
28
+ temp=fine_temp,
29
+ )
30
+
31
+ if output_full:
32
+ full_generation = {
33
+ 'semantic_prompt': x_semantic,
34
+ 'coarse_prompt': x_coarse_gen,
35
+ 'fine_prompt': x_fine_gen
36
+ }
37
+ return full_generation, codec_decode(x_fine_gen)
38
+ return codec_decode(x_fine_gen)
39
+
40
+
41
+ def text_to_semantic(
42
+ text: str,
43
+ history_prompt: Optional[Union[Dict, str]] = None,
44
+ temp: float = 0.7,
45
+ silent: bool = False,
46
+ ):
47
+ """Generate semantic array from text.
48
+
49
+ Args:
50
+ text: text to be turned into audio
51
+ history_prompt: history choice for audio cloning
52
+ temp: generation temperature (1.0 more diverse, 0.0 more conservative)
53
+ silent: disable progress bar
54
+
55
+ Returns:
56
+ numpy semantic array to be fed into `semantic_to_waveform`
57
+ """
58
+ x_semantic = generate_text_semantic(
59
+ text,
60
+ history_prompt=history_prompt,
61
+ temp=temp,
62
+ silent=silent,
63
+ use_kv_caching=True
64
+ )
65
+ return x_semantic
66
+
67
+
68
+ def semantic_to_waveform(
69
+ semantic_tokens: np.ndarray,
70
+ history_prompt: Optional[Union[Dict, str]] = None,
71
+ temp: float = 0.7,
72
+ silent: bool = False,
73
+ output_full: bool = False,
74
+ ):
75
+ """Generate audio array from semantic input.
76
+
77
+ Args:
78
+ semantic_tokens: semantic token output from `text_to_semantic`
79
+ history_prompt: history choice for audio cloning
80
+ temp: generation temperature (1.0 more diverse, 0.0 more conservative)
81
+ silent: disable progress bar
82
+ output_full: return full generation to be used as a history prompt
83
+
84
+ Returns:
85
+ numpy audio array at sample frequency 24khz
86
+ """
87
+ coarse_tokens = generate_coarse(
88
+ semantic_tokens,
89
+ history_prompt=history_prompt,
90
+ temp=temp,
91
+ silent=silent,
92
+ use_kv_caching=True
93
+ )
94
+ fine_tokens = generate_fine(
95
+ coarse_tokens,
96
+ history_prompt=history_prompt,
97
+ temp=0.5,
98
+ )
99
+ audio_arr = codec_decode(fine_tokens)
100
+ if output_full:
101
+ full_generation = {
102
+ "semantic_prompt": semantic_tokens,
103
+ "coarse_prompt": coarse_tokens,
104
+ "fine_prompt": fine_tokens,
105
+ }
106
+ return full_generation, audio_arr
107
+ return audio_arr
108
+
109
+
110
+ def save_as_prompt(filepath, full_generation):
111
+ assert(filepath.endswith(".npz"))
112
+ assert(isinstance(full_generation, dict))
113
+ assert("semantic_prompt" in full_generation)
114
+ assert("coarse_prompt" in full_generation)
115
+ assert("fine_prompt" in full_generation)
116
+ np.savez(filepath, **full_generation)
117
+
118
+
119
+ def generate_audio(
120
+ text: str,
121
+ history_prompt: Optional[Union[Dict, str]] = None,
122
+ text_temp: float = 0.7,
123
+ waveform_temp: float = 0.7,
124
+ silent: bool = False,
125
+ output_full: bool = False,
126
+ ):
127
+ """Generate audio array from input text.
128
+
129
+ Args:
130
+ text: text to be turned into audio
131
+ history_prompt: history choice for audio cloning
132
+ text_temp: generation temperature (1.0 more diverse, 0.0 more conservative)
133
+ waveform_temp: generation temperature (1.0 more diverse, 0.0 more conservative)
134
+ silent: disable progress bar
135
+ output_full: return full generation to be used as a history prompt
136
+
137
+ Returns:
138
+ numpy audio array at sample frequency 24khz
139
+ """
140
+ semantic_tokens = text_to_semantic(
141
+ text,
142
+ history_prompt=history_prompt,
143
+ temp=text_temp,
144
+ silent=silent,
145
+ )
146
+ out = semantic_to_waveform(
147
+ semantic_tokens,
148
+ history_prompt=history_prompt,
149
+ temp=waveform_temp,
150
+ silent=silent,
151
+ output_full=output_full,
152
+ )
153
+ if output_full:
154
+ full_generation, audio_arr = out
155
+ return full_generation, audio_arr
156
+ else:
157
+ audio_arr = out
158
+ return audio_arr
bark/assets/prompts/announcer.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26f2d1a9e3b6fe453cf5fc8191de26cbfae6276c5b0f7c376c6a0f3c35867f83
3
+ size 16794
bark/assets/prompts/custom/evil.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0474629d1ca65c36ff054a335dc4c790766985c76f27e20d264167753c6726e0
3
+ size 1379260
bark/assets/prompts/custom/readme.md ADDED
@@ -0,0 +1 @@
 
 
1
+ For convenience, place your custom prompts here...
bark/assets/prompts/de_speaker_0.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:008d7f3d0a52305a80c1abce26ccf4120181554a24055a0581894819b14f998d
3
+ size 31940
bark/assets/prompts/de_speaker_1.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5bb2ac34fa466f5d6804f48f51658d7b7d8d91ce7139d34c717c917578858fb
3
+ size 31940
bark/assets/prompts/de_speaker_2.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dedc8ab1949653480223f0c0cf3ebd20406d39b52e19908d32275eb8cfaf4b9
3
+ size 23516
bark/assets/prompts/de_speaker_3.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5abe325e6306a7f96725fcc6186c0eb147d2f068ce14b863e086cbf52b1986e
3
+ size 29060
bark/assets/prompts/de_speaker_4.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91d102ad045aabc996f487d0d4f0b3fd289ef2da200d1df289cf5da298d23796
3
+ size 20316
bark/assets/prompts/de_speaker_5.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aa116b450c74c60ef43d1fd141fe961e23ebeafdcb57991b22ae4a08c62cf44
3
+ size 35084
bark/assets/prompts/de_speaker_6.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f95bd28bc7382b7294c0bb187b18873aa9c050b3fe5793166c547200c8e2da9
3
+ size 31724
bark/assets/prompts/de_speaker_7.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:332c5aee851c0544e1ad587fbc477b8d4eb28e852192fcd969d97c894b028a2b
3
+ size 59348
bark/assets/prompts/de_speaker_8.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0eefea2a0d702177f44df4b218b950119726c041cb505e1df36ab0fc0651018
3
+ size 25116
bark/assets/prompts/de_speaker_9.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:189e941a182411853351c56e422d51a4a8fad20f1f8b8f396042bb2ada3cceb2
3
+ size 22180
bark/assets/prompts/en_speaker_0.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb130b14872cc53381bdb867cee71c26a6d116af81dbf2542f3f44d11b8aaf3f
3
+ size 22396
bark/assets/prompts/en_speaker_1.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cdc113954acb3839e9112437a029d482925236bce91294803a42e3f1f493aea
3
+ size 18396
bark/assets/prompts/en_speaker_2.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c27653e7db430ba4518cb5306c62a228329f928bfa566f68334545f0949b5eea
3
+ size 33860
bark/assets/prompts/en_speaker_3.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22de48d9414836a5337e483b256ed916d51ece916c36669371d9e92b1323047b
3
+ size 38124
bark/assets/prompts/en_speaker_4.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3481fe27c9ffc73b68783ebe122934e0430a888c199ade914e97433df73038c1
3
+ size 21220
bark/assets/prompts/en_speaker_5.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b661d1573ab2df0d89b4b51e79d727dd5bfccfe8d740a84594de4028e1a23057
3
+ size 15516
bark/assets/prompts/en_speaker_6.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d8f92a1ea0383453614d1c20c8cfbeaf9ad28d9f5778f718bf0e54eb18c0245
3
+ size 13436
bark/assets/prompts/en_speaker_7.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fdbb2c04efb4e81d179369b614678adba1cac9da8cc76fe6c40396da681b3a3
3
+ size 35084
bark/assets/prompts/en_speaker_8.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4233571cfc24030c9c2ed823f6393d8f3c99e26fef20d744a2e5ff59b93f086
3
+ size 18980
bark/assets/prompts/en_speaker_9.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb86c2ec884fcc906cb0d7342a9d84657f6d9abeac3c88c7b1bbfd1207ec09ca
3
+ size 35940
bark/assets/prompts/es_speaker_0.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a4849970528104040e0ed6a96f9c705b58c72b5eee538baed1fa2283873b331
3
+ size 27620
bark/assets/prompts/es_speaker_1.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c41ca11134138c1cb6108f643c686f0d0c72f376a13576cd9490721a0916d07a
3
+ size 25436
bark/assets/prompts/es_speaker_2.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9a6406ce99291a80f81bef895e1fd3d13b5204143d656cf0aa30c013f2974bd
3
+ size 27620
bark/assets/prompts/es_speaker_3.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9e43586d2a185df543444fe3f7e604bfe56c9f1364f59c9671be75e88b14d02
3
+ size 26500
bark/assets/prompts/es_speaker_4.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52b4c89d19199265d9347ff83550ceeb5bead49c2552df776ef292f851d3de33
3
+ size 24420
bark/assets/prompts/es_speaker_5.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c57dddcdf54e8e97813e887dc2e066efde628d17e10fad2a9824b552af485b2
3
+ size 24900
bark/assets/prompts/es_speaker_6.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22b2dc4980a17c3dcd5f2833cc0eaab5dec06e7233520885fa792f618606dc68
3
+ size 34820
bark/assets/prompts/es_speaker_7.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c848b3561977abaed30f38fcda853283ae04c11457483347c8baaa2d5a5f94d3
3
+ size 21596
bark/assets/prompts/es_speaker_8.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:691b4a12bbfd8f0e04df1ed793de2a4ada97ae04a7546e3bee12aaa094b7e156
3
+ size 18660
bark/assets/prompts/es_speaker_9.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dda9f490517edf9447e2f02de3bec3877515a086e9668d7f0abb0d800d82ab6
3
+ size 22660
bark/assets/prompts/fr_speaker_0.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f483b271820be529ffc95968a1b7cd5e5f63137c30649192b1e10a935a8b846c
3
+ size 30604
bark/assets/prompts/fr_speaker_1.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba3805ef05a285f8501762900b1919631b2fd4274ee8d7cf4b4c432afd6a7635
3
+ size 29324
bark/assets/prompts/fr_speaker_2.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3e7654e74d80a7068745838b1640c72d3616fbb2fa8f88de997d252139f7b74
3
+ size 51084
bark/assets/prompts/fr_speaker_3.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e67de23fa486d091eaea3d276dcf640ed0d34079fc5e78ae9e4ab0f758341af2
3
+ size 31460
bark/assets/prompts/fr_speaker_4.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0e02e7b5f98b834968a47b1dbbb7acb18b681152461ae08e16c4b5ee93cbbcd
3
+ size 36364
bark/assets/prompts/fr_speaker_5.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f771bcf5db66f2865a8023874291a6d706154853c9c9bdecd0ab0aeae3bd0a59
3
+ size 44044
bark/assets/prompts/fr_speaker_6.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21906f0c2dc2578662cdc6359a03a96e02aa296c02d0cd3c50cb9dca4379ae9a
3
+ size 43564