Spaces:

cwitkowitz
/

timbre-trap

Runtime error

App Files Files Community

cwitkowitz commited on May 4, 2024

Commit

c1112a0

1 Parent(s): 106218a

Updated app in accordance with timbre-trap updates and chunk-based processing.

Browse files

Files changed (3) hide show

app.py +22 -16
tt-demo.pt → models/tt-orig.pt +2 -2
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from pyharp import ModelCard, build_endpoint
 import gradio as gr
@@ -5,7 +6,19 @@ import torchaudio
 import torch
 import os
-timbre_trap = torch.load('tt-demo.pt', map_location='cpu')
 card = ModelCard(
     name='Timbre-Trap',
@@ -26,28 +39,20 @@ def process_fn(audio_path, de_timbre):
     audio = audio.unsqueeze(0)
     # Determine original number of samples
     n_samples = audio.size(-1)
-    # Pad audio to next multiple of block length
-    audio = timbre_trap.sliCQ.pad_to_block_length(audio)
-    # Encode raw audio into latent vectors
-    latents, embeddings, _ = timbre_trap.encode(audio)
-    # Apply skip connections if they are turned on
-    embeddings = timbre_trap.apply_skip_connections(embeddings)
     # Obtain transcription or reconstructed spectral coefficients
-    coefficients = timbre_trap.decode(latents, embeddings, de_timbre)
-    # Invert reconstructed spectral coefficients
-    audio = timbre_trap.sliCQ.decode(coefficients)
     # Trim to original number of samples
     audio = audio[..., :n_samples]
     # Remove batch dimension
     audio = audio.squeeze(0)
-    if de_timbre and audio.abs().max():
-        # Low-pass filter the audio to remove ringing
-        audio = torchaudio.functional.lowpass_biquad(audio, 22050, 8000)
-        # Normalize audio to [-1, 1]
-        audio /= audio.abs().max()
     # Resample audio back to the original sampling rate
     audio = torchaudio.functional.resample(audio, 22050, fs)
@@ -62,6 +67,7 @@ def process_fn(audio_path, de_timbre):
     return save_path
 with gr.Blocks() as demo:
     inputs = [
         gr.Audio(
@@ -81,8 +87,8 @@ with gr.Blocks() as demo:
         )
     ]
     output = gr.Audio(label='Audio Output', type='filepath')
     widgets = build_endpoint(inputs, output, process_fn, card)
 demo.queue()

+from timbre_trap.framework.modules import TimbreTrap
 from pyharp import ModelCard, build_endpoint
 import gradio as gr
 import torch
 import os
+model = TimbreTrap(sample_rate=22050,
+                   n_octaves=9,
+                   bins_per_octave=60,
+                   secs_per_block=3,
+                   latent_size=128,
+                   model_complexity=2,
+                   skip_connections=False)
+model.eval()
+model_path_orig = os.path.join('models', 'tt-orig.pt')
+tt_weights_orig = torch.load(model_path_orig, map_location='cpu')
+model.load_state_dict(tt_weights_orig)
 card = ModelCard(
     name='Timbre-Trap',
     audio = audio.unsqueeze(0)
     # Determine original number of samples
     n_samples = audio.size(-1)
     # Obtain transcription or reconstructed spectral coefficients
+    coefficients = model.chunked_inference(audio, de_timbre)
+    #coefficients = model.inference(audio, de_timbre)
+    # Invert coefficients to produce audio
+    audio = model.sliCQ.decode(coefficients)
     # Trim to original number of samples
     audio = audio[..., :n_samples]
     # Remove batch dimension
     audio = audio.squeeze(0)
+    # Low-pass filter the audio in attempt to remove artifacts
+    audio = torchaudio.functional.lowpass_biquad(audio, 22050, 8000)
     # Resample audio back to the original sampling rate
     audio = torchaudio.functional.resample(audio, 22050, fs)
     return save_path
+# Build Gradio endpoint
 with gr.Blocks() as demo:
     inputs = [
         gr.Audio(
         )
     ]
+    # Build endpoint
     output = gr.Audio(label='Audio Output', type='filepath')
     widgets = build_endpoint(inputs, output, process_fn, card)
 demo.queue()

tt-demo.pt → models/tt-orig.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f4575c6642348eda3d2e7ff280eece5036e5922e0dacfd25e8dfeb10fd52842
-size 11399295

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c3bafd33a831d61e8ee9051d6c5b4c5d483e6a7669ca9df85ac6ab304cb9fe3
+size 11353410

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 -e git+https://github.com/audacitorch/pyharp.git#egg=pyharp
--e git+https://github.com/sony/timbre-trap.git@release#egg=timbre-trap
 torchaudio
 torch
 cqt_pytorch

 -e git+https://github.com/audacitorch/pyharp.git#egg=pyharp
+-e git+https://github.com/sony/timbre-trap.git@updates#egg=timbre-trap
 torchaudio
 torch
 cqt_pytorch