Spaces:

TechAudio
/

audiosr

Runtime error

App Files Files Community

j commited on Jan 30, 2024

Commit

564c686

1 Parent(s): 13e5909

initial commit

Browse files

Files changed (3) hide show

README.md +2 -1
app.py +113 -0
requirements.txt +16 -0

README.md CHANGED Viewed

@@ -1,8 +1,9 @@
 ---
-title: Audiosr
 emoji: 🏢
 colorFrom: yellow
 colorTo: yellow
 sdk: gradio
 sdk_version: 4.16.0
 app_file: app.py

 ---
+title: Versatile Audio Super-resolution HARP plugin
 emoji: 🏢
 colorFrom: yellow
 colorTo: yellow
+python_version: 3.9
 sdk: gradio
 sdk_version: 4.16.0
 app_file: app.py

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+#!/usr/bin/python3
+import os
+import torch
+from audiosr import super_resolution, build_model, save_wave, get_time, read_list
+from pyharp import ModelCard, build_endpoint
+from audiotools import AudioSignal
+import scipy
+import torch
+import gradio as gr
+card = ModelCard(
+    name='Versatile Audio Super Resolution',
+    description='Upsample audio and predict upper spectrum.',
+    author='Team Audio',
+    tags=['AudioSR', 'Diffusion', 'Super Resolution', 'Upsampling', 'Sample Rate Conversion']
+)
+os.environ["TOKENIZERS_PARALLELISM"] = "true"
+torch.set_float32_matmul_precision("high")
+latent_t_per_second=12.8  # not sure about this??
+audiosr = build_model(model_name="basic", device="auto")
+def process_fn(input_audio_path, seed, guidance_scale, num_inference_steps):
+    """
+    This function defines the audio processing steps
+    Args:
+        input_audio_path (str): the audio filepath to be processed.
+        <YOUR_KWARGS>: additional keyword arguments necessary for processing.
+            NOTE: These should correspond to and match order of UI elements defined below.
+    Returns:
+        output_audio_path (str): the filepath of the processed audio.
+    """
+    sig = AudioSignal(input_audio_path)
+    outfile = "./output.wav"
+    audio_concat = None
+    total_length = sig.duration
+    num_segs = int(total_length / 10) #10 second segments
+    remainder = total_length % 10 # duration of last segment
+    for audio_segment in range(num_segs):
+        start = audio_segment * 10
+        if audio_segment == num_segs - 1:
+            end = start + remainder
+        else:
+            end = start + 10
+        # get segment of audio from original file
+        sig_seg = sig[start:end]
+        sig_seg.write("temp.wav")
+        audio = super_resolution(
+            audiosr,
+            "temp.wav",
+            seed=seed,
+            guidance_scale=guidance_scale,
+            ddim_steps=num_inference_steps,
+            latent_t_per_second=latent_t_per_second
+        )
+        #save_wave(waveform, output_dir, name=name, samplerate=sig.sample_rate)
+        if audio_concat is None:
+            audio_concat = audio
+            #audio_concat = audio[0]
+        else:
+            audio_concat = scipy.concatenate((audio_concat, audio))
+    scipy.io.wavfile.write(outfile, rate=sig.sample_rate, data=audio_concat)
+    return outfile
+# Build the endpoint
+with gr.Blocks() as webapp:
+    # Define your Gradio interface
+    inputs = [
+        gr.Audio(
+            label="Audio Input",
+            type="filepath"
+        ),
+        gr.Slider(
+            label="seed",
+            minimum="0",
+            maximum="65535",
+            value="0",
+            step="1"
+        ),
+        gr.Slider(
+            minimum=0, maximum=10,
+            value=3.5,
+            label="Guidance Scale"
+        ),
+        gr.Slider(
+            minimum=1, maximum=500,
+            step=1, value=50,
+            label="Inference Steps"
+        ),
+    ]
+    # make an output audio widget
+    output = gr.Audio(label="Audio Output", type="filepath")
+    # Build the endpoint
+    ctrls_data, ctrls_button, process_button, cancel_button = build_endpoint(inputs, output, process_fn, card)
+#webapp.queue()
+webapp.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+--extra-index-url https://download.pytorch.org/whl/cu118
+git+https://github.com/huggingface/diffusers.git
+git+https://github.com/huggingface/transformers.git
+torch==2.0.1+cu118; sys_platform != 'darwin'
+torch==2.0.1; sys_platform == 'darwin'
+torchvision==0.15.2+cu118; sys_platform != 'darwin'
+torchvision==0.15.2; sys_platform == 'darwin'
+torchaudio==2.0.2+cu118; sys_platform != 'darwin'
+torchaudio==2.0.2; sys_platform == 'darwin'
+huggingface_hub
+transformers==4.30.2
+-e git+https://github.com/audacitorch/pyharp.git#egg=pyharp
+descript-audiotools
+scipy
+datetime
+gradio