wham

Sleeping

App Files Files Community

orrp commited on Mar 10

Commit

e823eac

1 Parent(s): 1932c0c

Switching to pyproject.toml

Browse files

Files changed (5) hide show

pyproject.toml +61 -0
requirements.txt +0 -16
setup.py +0 -33
vampnet/.pre-commit-config.yaml +13 -14
vampnet/app.py +62 -88

pyproject.toml ADDED Viewed

	@@ -0,0 +1,61 @@

+[build-system]
+requires = ["setuptools>=61.0", "wheel", "Cython"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "wham"
+version = "0.0.1"
+description = "Towards A Translative Model of Sperm Whale Vocalization"
+readme = "README.md"
+authors = [
+    { name = "Project CETI" }
+]
+license = { text = "MIT" }
+requires-python = ">=3.9"
+dependencies = [
+    "torch",
+    "gradio",
+    "argbind>=0.3.2",
+    "numpy<1.24",
+    "pydantic>=2.0,<3",
+    "huggingface_hub",
+    "loralib",
+    "torch_pitch_shift",
+    "soundfile",
+    "pydub",
+    "tqdm",
+    "Cython",
+    "pandas",
+    "pathlib",
+    "ffmpeg-python",
+    "scikit-learn",
+    "wandb",
+    "gdown",
+    "transformers",
+    "fadtk",
+    "urllib3==2.0",
+    "plotly",
+    "pyharp",
+    # Git-based dependencies
+    "wavebeat @ git+https://github.com/hugofloresgarcia/wavebeat.git",
+    "lac @ git+https://github.com/hugofloresgarcia/lac.git",
+    "descript-audiotools @ git+https://github.com/hugofloresgarcia/audiotools.git"
+]
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["wham*", "vampnet*"]
+[tool.ruff]
+# Target Python 3.9+
+target-version = "py39"
+line-length = 88
+[tool.ruff.lint]
+# Enable Pyflakes (F), pycodestyle (E, W), and isort (I)
+select = ["E", "F", "W", "I"]
+ignore = []
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"

requirements.txt DELETED Viewed

@@ -1,16 +0,0 @@
-torch
-gradio
-argbind>=0.3.2
-numpy<1.24
-pydantic>=2.0,<3
-huggingface_hub
-loralib
-torch_pitch_shift
-soundfile
-pydub
-tqdm
-Cython
-wavebeat @ git+https://github.com/hugofloresgarcia/wavebeat
-lac @ git+https://github.com/hugofloresgarcia/lac.git
-descript-audiotools @ git+https://github.com/hugofloresgarcia/audiotools.git
-pyharp

setup.py DELETED Viewed

@@ -1,33 +0,0 @@
-from setuptools import setup, find_packages
-with open("README.md") as f:
-    long_description = f.read()
-setup(
-    name="wham",
-    version="0.0.1",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    url="https://github.com/orrp/wam",
-    license="MIT",
-    packages=find_packages(),
-    package_dir={},
-    install_requires=[
-        "descript-audiotools @ git+https://github.com/hugofloresgarcia/audiotools.git",
-        "argbind",
-        "pandas",
-        "pathlib",
-        "pydub",
-        "ffmpeg-python",
-        "tqdm",
-        "scikit-learn",
-        "wandb",
-        "gdown",  # For fetching large files from Google Drive
-        "soundfile",
-        "transformers",
-        "torch",
-        "Cython",
-        "fadtk",
-        "urllib3==2.0"
-    ],
-)

vampnet/.pre-commit-config.yaml CHANGED Viewed

@@ -1,15 +1,14 @@
 repos:
-- repo: https://github.com/asottile/reorder_python_imports
-  rev: v2.5.0
-  hooks:
-    - id: reorder-python-imports
-- repo: https://github.com/psf/black
-  rev: 23.1.0
-  hooks:
-    - id: black
-      language_version: python3
-- repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.0.1
-  hooks:
-    - id: end-of-file-fixer
-    - id: trailing-whitespace

 repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.3.0 # Use the latest version available
+    hooks:
+      # Run the linter and import sorter
+      - id: ruff
+        args: [--fix]
+      # Run the formatter (replaces black)
+      - id: ruff-format
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: end-of-file-fixer
+      - id: trailing-whitespace

vampnet/app.py CHANGED Viewed

@@ -1,36 +1,12 @@
 import os
 import sys
-try:
-    import spaces
-    ZERO_GPU = True
-except ImportError:
-    ZERO_GPU = False
-def gpu(fn):
-    if ZERO_GPU:
-        return spaces.GPU(fn)
-    return fn
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-if ZERO_GPU:
-    from huggingface_hub import hf_hub_download
-    MODEL_DIR = os.path.join(SCRIPT_DIR, "models")
-    os.makedirs(MODEL_DIR, exist_ok=True)
-    MODEL_REPO = "anvitax/wham-weights"
-    for fname in ["coarse.pth", "c2f.pth", "codec.pth", "wavebeat.pth"]:
-        target = os.path.join(MODEL_DIR, fname)
-        if not os.path.exists(target):
-            print(f"Downloading {fname} from {MODEL_REPO}...")
-            hf_hub_download(repo_id=MODEL_REPO, filename=fname, local_dir=MODEL_DIR)
-        else:
-            print(f"Found {fname}")
 os.chdir(SCRIPT_DIR)
 import torch
-device = "cpu" if ZERO_GPU else ("cuda" if torch.cuda.is_available() else "cpu")
 sys.argv = ["app.py", "--args.load", "conf/interface.yml", "--Interface.device", device]
 from pathlib import Path
@@ -56,8 +32,8 @@ conf = argbind.parse_args()
 from torch_pitch_shift import pitch_shift, get_fast_shifts
 def shift_pitch(signal, interval: int):
     signal.samples = pitch_shift(
-        signal.samples,
-        shift=interval,
         sample_rate=signal.sample_rate
     )
     return signal
@@ -83,7 +59,7 @@ def load_audio(file):
     print(file)
     filepath = file.name
     sig = at.AudioSignal.salient_excerpt(
-        filepath,
         duration=interface.coarse.chunk_size_s
     )
     sig = interface.preprocess(sig)
@@ -98,7 +74,6 @@ def load_example_audio():
     return "./assets/example.wav"
-@gpu
 def _vamp(
     _input_audio,
     _num_steps,
@@ -167,7 +142,7 @@ def _vamp(
     if _beat_mask_width > 0:
         beat_mask = interface.make_beat_mask(
             sig,
-            after_beat_s=(_beat_mask_width/1000),
             mask_upbeats=not _beat_mask_downbeats,
         )
         mask = pmask.mask_and(mask, beat_mask)
@@ -203,29 +178,29 @@ def _vamp(
     _seed_val = _seed if _seed > 0 else None
     zv, mask_z = interface.coarse_vamp(
-        z,
         mask=mask,
         sampling_steps=_num_steps,
         mask_temperature=_masktemp*10,
         sampling_temperature=_sampletemp,
-        return_mask=True,
-        typical_filtering=_typical_filtering,
-        typical_mass=_typical_mass,
-        typical_min_tokens=_typical_min_tokens,
         top_p=_top_p_val,
         gen_fn=interface.coarse.generate,
         seed=_seed_val,
         sample_cutoff=_sample_cutoff,
     )
-    if _use_coarse2fine:
         zv = interface.coarse_to_fine(
-            zv,
-            mask_temperature=_masktemp*10,
             sampling_temperature=_sampletemp,
             mask=mask,
             sampling_steps=_num_steps,
-            sample_cutoff=_sample_cutoff,
             seed=_seed_val,
         )
@@ -233,7 +208,7 @@ def _vamp(
     print("done")
     print(f"output loudness is {sig.loudness()}")
-    sig = sig.normalize(loudness)
     print(f"normalized loudness is {sig.loudness()}")
     sig.write(out_dir / "output.wav")
@@ -280,7 +255,7 @@ def vamp(data):
 def api_vamp(data):
     return _extract_and_call_vamp(data, return_mask=False)
 def save_vamp(data):
     out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
     out_dir.mkdir(parents=True, exist_ok=True)
@@ -290,7 +265,7 @@ def save_vamp(data):
     sig_in.write(out_dir / "input.wav")
     sig_out.write(out_dir / "output.wav")
     _data = {
         "masktemp": data[masktemp],
         "sampletemp": data[sampletemp],
@@ -302,7 +277,7 @@ def save_vamp(data):
         "notes": data[notes_text],
         "periodic_period": data[periodic_p],
         "periodic_width": data[periodic_w],
-        "n_conditioning_codebooks": data[n_conditioning_codebooks],
         "use_coarse2fine": data[use_coarse2fine],
         "stretch_factor": data[stretch_factor],
         "seed": data[seed],
@@ -322,7 +297,6 @@ def save_vamp(data):
     return f"saved! your save code is {out_dir.stem}", zip_path
-@gpu
 def harp_vamp(_input_audio, _beat_mask_width, _sampletemp):
     interface.to("cuda")
@@ -338,22 +312,22 @@ def harp_vamp(_input_audio, _beat_mask_width, _sampletemp):
     if _beat_mask_width > 0:
         beat_mask = interface.make_beat_mask(
             sig,
-            after_beat_s=(_beat_mask_width/1000),
         )
         mask = pmask.mask_and(mask, beat_mask)
     # save the mask as a txt file
     zv, mask_z = interface.coarse_vamp(
-        z,
         mask=mask,
         sampling_temperature=_sampletemp,
-        return_mask=True,
         gen_fn=interface.coarse.generate,
     )
     zv = interface.coarse_to_fine(
-        zv,
         sampling_temperature=_sampletemp,
         mask=mask,
     )
@@ -371,17 +345,17 @@ with gr.Blocks() as demo:
         with gr.Column():
             gr.Markdown("# VampNet Audio Vamping")
             gr.Markdown("""## Description:
-            This is a demo of the VampNet, a generative audio model that transforms the input audio based on the chosen settings.
-            You can control the extent and nature of variation with a set of manual controls and presets.
             Use this interface to experiment with different mask settings and explore the audio outputs.
             """)
             gr.Markdown("""
             ## Instructions:
-            1. You can start by uploading some audio, or by loading the example audio.
-            2. Choose a preset for the vamp operation, or manually adjust the controls to customize the mask settings.
             3. Click the "generate (vamp)!!!" button to apply the vamp operation. Listen to the output audio.
-            4. Optionally, you can add some notes and save the result.
             5. You can also use the output as the new input and continue experimenting!
             """)
     with gr.Row():
@@ -396,13 +370,13 @@ with gr.Blocks() as demo:
             input_audio = gr.Audio(
                 label="input audio",
-                interactive=False,
                 type="filepath",
             )
             audio_mask = gr.Audio(
                 label="audio mask (listen to this to hear the mask hints)",
-                interactive=False,
                 type="filepath",
             )
@@ -418,7 +392,7 @@ with gr.Blocks() as demo:
                 inputs=[manual_audio_upload],
                 outputs=[ input_audio]
             )
         # mask settings
         with gr.Column():
@@ -429,7 +403,7 @@ with gr.Blocks() as demo:
                         "onset_mask_width": 0,
                         "beat_mask_width": 0,
                         "beat_mask_downbeats": False,
-                    },
                     "slight periodic variation": {
                         "periodic_p": 5,
                         "onset_mask_width": 5,
@@ -475,7 +449,7 @@ with gr.Blocks() as demo:
                 }
             preset = gr.Dropdown(
-                label="preset",
                 choices=list(presets.keys()),
                 value="strong periodic variation",
             )
@@ -485,9 +459,9 @@ with gr.Blocks() as demo:
                 periodic_p = gr.Slider(
                     label="periodic prompt  (0 - unconditional, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
                     minimum=0,
-                    maximum=128,
                     step=1,
-                    value=3,
                 )
@@ -506,7 +480,7 @@ with gr.Blocks() as demo:
                     value=0,
                 )
                 beat_mask_downbeats = gr.Checkbox(
-                    label="beat mask downbeats only?",
                     value=False
                 )
@@ -540,7 +514,7 @@ with gr.Blocks() as demo:
                         value=1,
                     )
                     n_conditioning_codebooks = gr.Number(
-                        label="number of conditioning codebooks. probably 0",
                         value=0,
                         precision=0,
                     )
@@ -548,14 +522,14 @@ with gr.Blocks() as demo:
                     stretch_factor = gr.Slider(
                         label="time stretch factor",
                         minimum=0,
-                        maximum=64,
                         step=1,
-                        value=1,
                     )
             preset_outputs = {
-                periodic_p,
-                onset_mask_width,
                 beat_mask_width,
                 beat_mask_downbeats,
             }
@@ -594,10 +568,10 @@ with gr.Blocks() as demo:
                 label="sample temperature",
                 minimum=0.1,
                 maximum=10.0,
-                value=1.0,
                 step=0.001
             )
             with gr.Accordion("sampling settings", open=False):
@@ -611,7 +585,7 @@ with gr.Blocks() as demo:
                     label="typical filtering ",
                     value=False
                 )
-                typical_mass = gr.Slider(
                     label="typical mass (should probably stay between 0.1 and 0.5)",
                     minimum=0.01,
                     maximum=0.99,
@@ -628,13 +602,13 @@ with gr.Blocks() as demo:
                     label="sample cutoff",
                     minimum=0.0,
                     maximum=1.0,
-                    value=0.5,
                     step=0.01
                 )
             use_coarse2fine = gr.Checkbox(
                 label="use coarse2fine",
-                value=True,
                 visible=False
             )
@@ -667,9 +641,9 @@ with gr.Blocks() as demo:
         with gr.Column():
             # lora_choice = gr.Dropdown(
-            #     label="lora choice",
             #     choices=list(loras.keys()),
-            #     value=LORA_NONE,
             #     visible=False
             # )
@@ -681,7 +655,7 @@ with gr.Blocks() as demo:
             )
             notes_text = gr.Textbox(
-                label="type any notes about the generated audio here",
                 value="",
                 interactive=True
             )
@@ -691,48 +665,48 @@ with gr.Blocks() as demo:
                 interactive=False
             )
             use_as_input_button = gr.Button("use output as input")
             thank_you = gr.Markdown("")
     _inputs = {
-            input_audio,
             num_steps,
             masktemp,
             sampletemp,
             top_p,
-            prefix_s, suffix_s,
-            rand_mask_intensity,
             periodic_p, periodic_w,
-            n_conditioning_codebooks,
             dropout,
-            use_coarse2fine,
-            stretch_factor,
-            onset_mask_width,
             typical_filtering,
             typical_mass,
             typical_min_tokens,
             beat_mask_width,
             beat_mask_downbeats,
-            seed,
             # lora_choice,
             n_mask_codebooks,
-            pitch_shift_amt,
             sample_cutoff
         }
     # connect widgets
     vamp_button.click(
         fn=vamp,
         inputs=_inputs,
-        outputs=[output_audio, audio_mask],
     )
     api_vamp_button = gr.Button("api vamp", visible=False)
     api_vamp_button.click(
         fn=api_vamp,
-        inputs=_inputs,
-        outputs=[output_audio],
         api_name="vamp"
     )

 import os
 import sys
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 os.chdir(SCRIPT_DIR)
 import torch
+device = "cuda" if torch.cuda.is_available()
 sys.argv = ["app.py", "--args.load", "conf/interface.yml", "--Interface.device", device]
 from pathlib import Path
 from torch_pitch_shift import pitch_shift, get_fast_shifts
 def shift_pitch(signal, interval: int):
     signal.samples = pitch_shift(
+        signal.samples,
+        shift=interval,
         sample_rate=signal.sample_rate
     )
     return signal
     print(file)
     filepath = file.name
     sig = at.AudioSignal.salient_excerpt(
+        filepath,
         duration=interface.coarse.chunk_size_s
     )
     sig = interface.preprocess(sig)
     return "./assets/example.wav"
 def _vamp(
     _input_audio,
     _num_steps,
     if _beat_mask_width > 0:
         beat_mask = interface.make_beat_mask(
             sig,
+            after_beat_s=(_beat_mask_width/1000),
             mask_upbeats=not _beat_mask_downbeats,
         )
         mask = pmask.mask_and(mask, beat_mask)
     _seed_val = _seed if _seed > 0 else None
     zv, mask_z = interface.coarse_vamp(
+        z,
         mask=mask,
         sampling_steps=_num_steps,
         mask_temperature=_masktemp*10,
         sampling_temperature=_sampletemp,
+        return_mask=True,
+        typical_filtering=_typical_filtering,
+        typical_mass=_typical_mass,
+        typical_min_tokens=_typical_min_tokens,
         top_p=_top_p_val,
         gen_fn=interface.coarse.generate,
         seed=_seed_val,
         sample_cutoff=_sample_cutoff,
     )
+    if _use_coarse2fine:
         zv = interface.coarse_to_fine(
+            zv,
+            mask_temperature=_masktemp*10,
             sampling_temperature=_sampletemp,
             mask=mask,
             sampling_steps=_num_steps,
+            sample_cutoff=_sample_cutoff,
             seed=_seed_val,
         )
     print("done")
     print(f"output loudness is {sig.loudness()}")
+    sig = sig.normalize(loudness)
     print(f"normalized loudness is {sig.loudness()}")
     sig.write(out_dir / "output.wav")
 def api_vamp(data):
     return _extract_and_call_vamp(data, return_mask=False)
 def save_vamp(data):
     out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
     out_dir.mkdir(parents=True, exist_ok=True)
     sig_in.write(out_dir / "input.wav")
     sig_out.write(out_dir / "output.wav")
     _data = {
         "masktemp": data[masktemp],
         "sampletemp": data[sampletemp],
         "notes": data[notes_text],
         "periodic_period": data[periodic_p],
         "periodic_width": data[periodic_w],
+        "n_conditioning_codebooks": data[n_conditioning_codebooks],
         "use_coarse2fine": data[use_coarse2fine],
         "stretch_factor": data[stretch_factor],
         "seed": data[seed],
     return f"saved! your save code is {out_dir.stem}", zip_path
 def harp_vamp(_input_audio, _beat_mask_width, _sampletemp):
     interface.to("cuda")
     if _beat_mask_width > 0:
         beat_mask = interface.make_beat_mask(
             sig,
+            after_beat_s=(_beat_mask_width/1000),
         )
         mask = pmask.mask_and(mask, beat_mask)
     # save the mask as a txt file
     zv, mask_z = interface.coarse_vamp(
+        z,
         mask=mask,
         sampling_temperature=_sampletemp,
+        return_mask=True,
         gen_fn=interface.coarse.generate,
     )
     zv = interface.coarse_to_fine(
+        zv,
         sampling_temperature=_sampletemp,
         mask=mask,
     )
         with gr.Column():
             gr.Markdown("# VampNet Audio Vamping")
             gr.Markdown("""## Description:
+            This is a demo of the VampNet, a generative audio model that transforms the input audio based on the chosen settings.
+            You can control the extent and nature of variation with a set of manual controls and presets.
             Use this interface to experiment with different mask settings and explore the audio outputs.
             """)
             gr.Markdown("""
             ## Instructions:
+            1. You can start by uploading some audio, or by loading the example audio.
+            2. Choose a preset for the vamp operation, or manually adjust the controls to customize the mask settings.
             3. Click the "generate (vamp)!!!" button to apply the vamp operation. Listen to the output audio.
+            4. Optionally, you can add some notes and save the result.
             5. You can also use the output as the new input and continue experimenting!
             """)
     with gr.Row():
             input_audio = gr.Audio(
                 label="input audio",
+                interactive=False,
                 type="filepath",
             )
             audio_mask = gr.Audio(
                 label="audio mask (listen to this to hear the mask hints)",
+                interactive=False,
                 type="filepath",
             )
                 inputs=[manual_audio_upload],
                 outputs=[ input_audio]
             )
         # mask settings
         with gr.Column():
                         "onset_mask_width": 0,
                         "beat_mask_width": 0,
                         "beat_mask_downbeats": False,
+                    },
                     "slight periodic variation": {
                         "periodic_p": 5,
                         "onset_mask_width": 5,
                 }
             preset = gr.Dropdown(
+                label="preset",
                 choices=list(presets.keys()),
                 value="strong periodic variation",
             )
                 periodic_p = gr.Slider(
                     label="periodic prompt  (0 - unconditional, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
                     minimum=0,
+                    maximum=128,
                     step=1,
+                    value=3,
                 )
                     value=0,
                 )
                 beat_mask_downbeats = gr.Checkbox(
+                    label="beat mask downbeats only?",
                     value=False
                 )
                         value=1,
                     )
                     n_conditioning_codebooks = gr.Number(
+                        label="number of conditioning codebooks. probably 0",
                         value=0,
                         precision=0,
                     )
                     stretch_factor = gr.Slider(
                         label="time stretch factor",
                         minimum=0,
+                        maximum=64,
                         step=1,
+                        value=1,
                     )
             preset_outputs = {
+                periodic_p,
+                onset_mask_width,
                 beat_mask_width,
                 beat_mask_downbeats,
             }
                 label="sample temperature",
                 minimum=0.1,
                 maximum=10.0,
+                value=1.0,
                 step=0.001
             )
             with gr.Accordion("sampling settings", open=False):
                     label="typical filtering ",
                     value=False
                 )
+                typical_mass = gr.Slider(
                     label="typical mass (should probably stay between 0.1 and 0.5)",
                     minimum=0.01,
                     maximum=0.99,
                     label="sample cutoff",
                     minimum=0.0,
                     maximum=1.0,
+                    value=0.5,
                     step=0.01
                 )
             use_coarse2fine = gr.Checkbox(
                 label="use coarse2fine",
+                value=True,
                 visible=False
             )
         with gr.Column():
             # lora_choice = gr.Dropdown(
+            #     label="lora choice",
             #     choices=list(loras.keys()),
+            #     value=LORA_NONE,
             #     visible=False
             # )
             )
             notes_text = gr.Textbox(
+                label="type any notes about the generated audio here",
                 value="",
                 interactive=True
             )
                 interactive=False
             )
             use_as_input_button = gr.Button("use output as input")
             thank_you = gr.Markdown("")
     _inputs = {
+            input_audio,
             num_steps,
             masktemp,
             sampletemp,
             top_p,
+            prefix_s, suffix_s,
+            rand_mask_intensity,
             periodic_p, periodic_w,
+            n_conditioning_codebooks,
             dropout,
+            use_coarse2fine,
+            stretch_factor,
+            onset_mask_width,
             typical_filtering,
             typical_mass,
             typical_min_tokens,
             beat_mask_width,
             beat_mask_downbeats,
+            seed,
             # lora_choice,
             n_mask_codebooks,
+            pitch_shift_amt,
             sample_cutoff
         }
     # connect widgets
     vamp_button.click(
         fn=vamp,
         inputs=_inputs,
+        outputs=[output_audio, audio_mask],
     )
     api_vamp_button = gr.Button("api vamp", visible=False)
     api_vamp_button.click(
         fn=api_vamp,
+        inputs=_inputs,
+        outputs=[output_audio],
         api_name="vamp"
     )