bark

Running

App Files Files Community

lainlives commited on 7 days ago

Commit

c515baa

verified ·

1 Parent(s): a29b3ac

Upload 327 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +4 -0
bark/__init__.py +2 -0
bark/__main__.py +3 -0
bark/__pycache__/__init__.cpython-313.pyc +0 -0
bark/__pycache__/api.cpython-313.pyc +0 -0
bark/__pycache__/generation.cpython-313.pyc +0 -0
bark/__pycache__/model.cpython-313.pyc +0 -0
bark/__pycache__/model_fine.cpython-313.pyc +0 -0
bark/__pycache__/settings.cpython-313.pyc +0 -0
bark/api.py +158 -0
bark/assets/prompts/announcer.npz +3 -0
bark/assets/prompts/custom/evil.npz +3 -0
bark/assets/prompts/custom/readme.md +1 -0
bark/assets/prompts/de_speaker_0.npz +3 -0
bark/assets/prompts/de_speaker_1.npz +3 -0
bark/assets/prompts/de_speaker_2.npz +3 -0
bark/assets/prompts/de_speaker_3.npz +3 -0
bark/assets/prompts/de_speaker_4.npz +3 -0
bark/assets/prompts/de_speaker_5.npz +3 -0
bark/assets/prompts/de_speaker_6.npz +3 -0
bark/assets/prompts/de_speaker_7.npz +3 -0
bark/assets/prompts/de_speaker_8.npz +3 -0
bark/assets/prompts/de_speaker_9.npz +3 -0
bark/assets/prompts/en_speaker_0.npz +3 -0
bark/assets/prompts/en_speaker_1.npz +3 -0
bark/assets/prompts/en_speaker_2.npz +3 -0
bark/assets/prompts/en_speaker_3.npz +3 -0
bark/assets/prompts/en_speaker_4.npz +3 -0
bark/assets/prompts/en_speaker_5.npz +3 -0
bark/assets/prompts/en_speaker_6.npz +3 -0
bark/assets/prompts/en_speaker_7.npz +3 -0
bark/assets/prompts/en_speaker_8.npz +3 -0
bark/assets/prompts/en_speaker_9.npz +3 -0
bark/assets/prompts/es_speaker_0.npz +3 -0
bark/assets/prompts/es_speaker_1.npz +3 -0
bark/assets/prompts/es_speaker_2.npz +3 -0
bark/assets/prompts/es_speaker_3.npz +3 -0
bark/assets/prompts/es_speaker_4.npz +3 -0
bark/assets/prompts/es_speaker_5.npz +3 -0
bark/assets/prompts/es_speaker_6.npz +3 -0
bark/assets/prompts/es_speaker_7.npz +3 -0
bark/assets/prompts/es_speaker_8.npz +3 -0
bark/assets/prompts/es_speaker_9.npz +3 -0
bark/assets/prompts/fr_speaker_0.npz +3 -0
bark/assets/prompts/fr_speaker_1.npz +3 -0
bark/assets/prompts/fr_speaker_2.npz +3 -0
bark/assets/prompts/fr_speaker_3.npz +3 -0
bark/assets/prompts/fr_speaker_4.npz +3 -0
bark/assets/prompts/fr_speaker_5.npz +3 -0
bark/assets/prompts/fr_speaker_6.npz +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+outputs/02-25-2026/final_21-53-52_s1.wav filter=lfs diff=lfs merge=lfs -text
+outputs/02-25-2026/final_21-57-01_s1.wav filter=lfs diff=lfs merge=lfs -text
+outputs/02-25-2026/final_22-01-33_s1.wav filter=lfs diff=lfs merge=lfs -text
+outputs/02-25-2026/final_22-05-17_s1.wav filter=lfs diff=lfs merge=lfs -text

bark/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .api import generate_audio, text_to_semantic, semantic_to_waveform, save_as_prompt
2	+ from .generation import SAMPLE_RATE, preload_models

bark/__main__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .cli import cli
2	+
3	+ cli()

bark/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (340 Bytes). View file

bark/__pycache__/api.cpython-313.pyc ADDED Viewed

Binary file (4.3 kB). View file

bark/__pycache__/generation.cpython-313.pyc ADDED Viewed

Binary file (37.2 kB). View file

bark/__pycache__/model.cpython-313.pyc ADDED Viewed

Binary file (14 kB). View file

bark/__pycache__/model_fine.cpython-313.pyc ADDED Viewed

Binary file (10.3 kB). View file

bark/__pycache__/settings.cpython-313.pyc ADDED Viewed

Binary file (743 Bytes). View file

bark/api.py ADDED Viewed

	@@ -0,0 +1,158 @@

+from typing import Dict, Optional, Union
+import numpy as np
+from .generation import codec_decode, generate_coarse, generate_fine, generate_text_semantic
+def generate_with_settings(text_prompt, semantic_temp=0.6, eos_p=0.2, coarse_temp=0.7, fine_temp=0.5, voice_name=None, output_full=False):
+    # generation with more control
+    x_semantic = generate_text_semantic(
+        text_prompt,
+        history_prompt=voice_name,
+        temp=semantic_temp,
+        min_eos_p = eos_p,
+        use_kv_caching=True
+    )
+    x_coarse_gen = generate_coarse(
+        x_semantic,
+        history_prompt=voice_name,
+        temp=coarse_temp,
+        use_kv_caching=True
+    )
+    x_fine_gen = generate_fine(
+        x_coarse_gen,
+        history_prompt=voice_name,
+        temp=fine_temp,
+    )
+    if output_full:
+        full_generation = {
+            'semantic_prompt': x_semantic,
+            'coarse_prompt': x_coarse_gen,
+            'fine_prompt': x_fine_gen
+        }
+        return full_generation, codec_decode(x_fine_gen)
+    return codec_decode(x_fine_gen)
+def text_to_semantic(
+    text: str,
+    history_prompt: Optional[Union[Dict, str]] = None,
+    temp: float = 0.7,
+    silent: bool = False,
+):
+    """Generate semantic array from text.
+    Args:
+        text: text to be turned into audio
+        history_prompt: history choice for audio cloning
+        temp: generation temperature (1.0 more diverse, 0.0 more conservative)
+        silent: disable progress bar
+    Returns:
+        numpy semantic array to be fed into `semantic_to_waveform`
+    """
+    x_semantic = generate_text_semantic(
+        text,
+        history_prompt=history_prompt,
+        temp=temp,
+        silent=silent,
+        use_kv_caching=True
+    )
+    return x_semantic
+def semantic_to_waveform(
+    semantic_tokens: np.ndarray,
+    history_prompt: Optional[Union[Dict, str]] = None,
+    temp: float = 0.7,
+    silent: bool = False,
+    output_full: bool = False,
+):
+    """Generate audio array from semantic input.
+    Args:
+        semantic_tokens: semantic token output from `text_to_semantic`
+        history_prompt: history choice for audio cloning
+        temp: generation temperature (1.0 more diverse, 0.0 more conservative)
+        silent: disable progress bar
+        output_full: return full generation to be used as a history prompt
+    Returns:
+        numpy audio array at sample frequency 24khz
+    """
+    coarse_tokens = generate_coarse(
+        semantic_tokens,
+        history_prompt=history_prompt,
+        temp=temp,
+        silent=silent,
+        use_kv_caching=True
+    )
+    fine_tokens = generate_fine(
+        coarse_tokens,
+        history_prompt=history_prompt,
+        temp=0.5,
+    )
+    audio_arr = codec_decode(fine_tokens)
+    if output_full:
+        full_generation = {
+            "semantic_prompt": semantic_tokens,
+            "coarse_prompt": coarse_tokens,
+            "fine_prompt": fine_tokens,
+        }
+        return full_generation, audio_arr
+    return audio_arr
+def save_as_prompt(filepath, full_generation):
+    assert(filepath.endswith(".npz"))
+    assert(isinstance(full_generation, dict))
+    assert("semantic_prompt" in full_generation)
+    assert("coarse_prompt" in full_generation)
+    assert("fine_prompt" in full_generation)
+    np.savez(filepath, **full_generation)
+def generate_audio(
+    text: str,
+    history_prompt: Optional[Union[Dict, str]] = None,
+    text_temp: float = 0.7,
+    waveform_temp: float = 0.7,
+    silent: bool = False,
+    output_full: bool = False,
+):
+    """Generate audio array from input text.
+    Args:
+        text: text to be turned into audio
+        history_prompt: history choice for audio cloning
+        text_temp: generation temperature (1.0 more diverse, 0.0 more conservative)
+        waveform_temp: generation temperature (1.0 more diverse, 0.0 more conservative)
+        silent: disable progress bar
+        output_full: return full generation to be used as a history prompt
+    Returns:
+        numpy audio array at sample frequency 24khz
+    """
+    semantic_tokens = text_to_semantic(
+        text,
+        history_prompt=history_prompt,
+        temp=text_temp,
+        silent=silent,
+    )
+    out = semantic_to_waveform(
+        semantic_tokens,
+        history_prompt=history_prompt,
+        temp=waveform_temp,
+        silent=silent,
+        output_full=output_full,
+    )
+    if output_full:
+        full_generation, audio_arr = out
+        return full_generation, audio_arr
+    else:
+        audio_arr = out
+    return audio_arr

bark/assets/prompts/announcer.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26f2d1a9e3b6fe453cf5fc8191de26cbfae6276c5b0f7c376c6a0f3c35867f83
+size 16794

bark/assets/prompts/custom/evil.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0474629d1ca65c36ff054a335dc4c790766985c76f27e20d264167753c6726e0
+size 1379260

bark/assets/prompts/custom/readme.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ For convenience, place your custom prompts here...

bark/assets/prompts/de_speaker_0.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:008d7f3d0a52305a80c1abce26ccf4120181554a24055a0581894819b14f998d
+size 31940

bark/assets/prompts/de_speaker_1.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5bb2ac34fa466f5d6804f48f51658d7b7d8d91ce7139d34c717c917578858fb
+size 31940

bark/assets/prompts/de_speaker_2.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1dedc8ab1949653480223f0c0cf3ebd20406d39b52e19908d32275eb8cfaf4b9
+size 23516

bark/assets/prompts/de_speaker_3.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5abe325e6306a7f96725fcc6186c0eb147d2f068ce14b863e086cbf52b1986e
+size 29060

bark/assets/prompts/de_speaker_4.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91d102ad045aabc996f487d0d4f0b3fd289ef2da200d1df289cf5da298d23796
+size 20316

bark/assets/prompts/de_speaker_5.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8aa116b450c74c60ef43d1fd141fe961e23ebeafdcb57991b22ae4a08c62cf44
+size 35084

bark/assets/prompts/de_speaker_6.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f95bd28bc7382b7294c0bb187b18873aa9c050b3fe5793166c547200c8e2da9
+size 31724

bark/assets/prompts/de_speaker_7.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:332c5aee851c0544e1ad587fbc477b8d4eb28e852192fcd969d97c894b028a2b
+size 59348

bark/assets/prompts/de_speaker_8.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0eefea2a0d702177f44df4b218b950119726c041cb505e1df36ab0fc0651018
+size 25116

bark/assets/prompts/de_speaker_9.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:189e941a182411853351c56e422d51a4a8fad20f1f8b8f396042bb2ada3cceb2
+size 22180

bark/assets/prompts/en_speaker_0.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb130b14872cc53381bdb867cee71c26a6d116af81dbf2542f3f44d11b8aaf3f
+size 22396

bark/assets/prompts/en_speaker_1.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3cdc113954acb3839e9112437a029d482925236bce91294803a42e3f1f493aea
+size 18396

bark/assets/prompts/en_speaker_2.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c27653e7db430ba4518cb5306c62a228329f928bfa566f68334545f0949b5eea
+size 33860

bark/assets/prompts/en_speaker_3.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22de48d9414836a5337e483b256ed916d51ece916c36669371d9e92b1323047b
+size 38124

bark/assets/prompts/en_speaker_4.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3481fe27c9ffc73b68783ebe122934e0430a888c199ade914e97433df73038c1
+size 21220

bark/assets/prompts/en_speaker_5.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b661d1573ab2df0d89b4b51e79d727dd5bfccfe8d740a84594de4028e1a23057
+size 15516

bark/assets/prompts/en_speaker_6.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d8f92a1ea0383453614d1c20c8cfbeaf9ad28d9f5778f718bf0e54eb18c0245
+size 13436

bark/assets/prompts/en_speaker_7.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6fdbb2c04efb4e81d179369b614678adba1cac9da8cc76fe6c40396da681b3a3
+size 35084

bark/assets/prompts/en_speaker_8.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4233571cfc24030c9c2ed823f6393d8f3c99e26fef20d744a2e5ff59b93f086
+size 18980

bark/assets/prompts/en_speaker_9.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb86c2ec884fcc906cb0d7342a9d84657f6d9abeac3c88c7b1bbfd1207ec09ca
+size 35940

bark/assets/prompts/es_speaker_0.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a4849970528104040e0ed6a96f9c705b58c72b5eee538baed1fa2283873b331
+size 27620

bark/assets/prompts/es_speaker_1.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c41ca11134138c1cb6108f643c686f0d0c72f376a13576cd9490721a0916d07a
+size 25436

bark/assets/prompts/es_speaker_2.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9a6406ce99291a80f81bef895e1fd3d13b5204143d656cf0aa30c013f2974bd
+size 27620

bark/assets/prompts/es_speaker_3.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9e43586d2a185df543444fe3f7e604bfe56c9f1364f59c9671be75e88b14d02
+size 26500

bark/assets/prompts/es_speaker_4.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52b4c89d19199265d9347ff83550ceeb5bead49c2552df776ef292f851d3de33
+size 24420

bark/assets/prompts/es_speaker_5.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c57dddcdf54e8e97813e887dc2e066efde628d17e10fad2a9824b552af485b2
+size 24900

bark/assets/prompts/es_speaker_6.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22b2dc4980a17c3dcd5f2833cc0eaab5dec06e7233520885fa792f618606dc68
+size 34820

bark/assets/prompts/es_speaker_7.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c848b3561977abaed30f38fcda853283ae04c11457483347c8baaa2d5a5f94d3
+size 21596

bark/assets/prompts/es_speaker_8.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:691b4a12bbfd8f0e04df1ed793de2a4ada97ae04a7546e3bee12aaa094b7e156
+size 18660

bark/assets/prompts/es_speaker_9.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5dda9f490517edf9447e2f02de3bec3877515a086e9668d7f0abb0d800d82ab6
+size 22660

bark/assets/prompts/fr_speaker_0.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f483b271820be529ffc95968a1b7cd5e5f63137c30649192b1e10a935a8b846c
+size 30604

bark/assets/prompts/fr_speaker_1.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba3805ef05a285f8501762900b1919631b2fd4274ee8d7cf4b4c432afd6a7635
+size 29324

bark/assets/prompts/fr_speaker_2.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3e7654e74d80a7068745838b1640c72d3616fbb2fa8f88de997d252139f7b74
+size 51084

bark/assets/prompts/fr_speaker_3.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e67de23fa486d091eaea3d276dcf640ed0d34079fc5e78ae9e4ab0f758341af2
+size 31460

bark/assets/prompts/fr_speaker_4.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0e02e7b5f98b834968a47b1dbbb7acb18b681152461ae08e16c4b5ee93cbbcd
+size 36364

bark/assets/prompts/fr_speaker_5.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f771bcf5db66f2865a8023874291a6d706154853c9c9bdecd0ab0aeae3bd0a59
+size 44044

bark/assets/prompts/fr_speaker_6.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21906f0c2dc2578662cdc6359a03a96e02aa296c02d0cd3c50cb9dca4379ae9a
+size 43564