niobures commited on Oct 25, 2025

Commit

b508693

verified ·

1 Parent(s): 8b6325d

AudioSep (code, models, paper)

Browse files

Files changed (29) hide show

.gitattributes +5 -0
Separate Anything You Describe.pdf +3 -0
Separate What You Describe. Language-Queried Audio Source Separation.pdf +3 -0
code/AudioSep.zip +3 -0
models/AudioSep (audo)/.gitattributes +35 -0
models/AudioSep (audo)/audiosep_base_4M_steps.ckpt +3 -0
models/AudioSep (audo)/audioset_textmap.npy +3 -0
models/AudioSep (audo)/bpe_simple_vocab_16e6.txt.gz +3 -0
models/AudioSep (audo)/music_speech_audioset_epoch_15_esc_89.98.pt +3 -0
models/AudioSep (audo)/source.txt +1 -0
models/ailia-models/audiosep_resunet.onnx +3 -0
models/ailia-models/audiosep_resunet.onnx.prototxt +0 -0
models/ailia-models/audiosep_text.onnx +3 -0
models/ailia-models/audiosep_text.onnx.prototxt +0 -0
models/ailia-models/code/LICENSE.txt +21 -0
models/ailia-models/code/README.md +65 -0
models/ailia-models/code/audiosep.py +283 -0
models/ailia-models/code/input.wav +3 -0
models/ailia-models/code/output_thunder.wav +3 -0
models/ailia-models/code/output_waterdrops.wav +3 -0
models/ailia-models/code/tokenizer/merges.txt +0 -0
models/ailia-models/code/tokenizer/tokenizer.json +0 -0
models/ailia-models/code/tokenizer/tokenizer_config.json +1 -0
models/ailia-models/code/tokenizer/vocab.json +0 -0
models/ailia-models/source.txt +7 -0
models/audiosep-demo/.gitattributes +35 -0
models/audiosep-demo/README.md +9 -0
models/audiosep-demo/pytorch_model.bin +3 -0
models/audiosep-demo/source.txt +1 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+models/ailia-models/code/input.wav filter=lfs diff=lfs merge=lfs -text
+models/ailia-models/code/output_thunder.wav filter=lfs diff=lfs merge=lfs -text
+models/ailia-models/code/output_waterdrops.wav filter=lfs diff=lfs merge=lfs -text
+Separate[[:space:]]Anything[[:space:]]You[[:space:]]Describe.pdf filter=lfs diff=lfs merge=lfs -text
+Separate[[:space:]]What[[:space:]]You[[:space:]]Describe.[[:space:]]Language-Queried[[:space:]]Audio[[:space:]]Source[[:space:]]Separation.pdf filter=lfs diff=lfs merge=lfs -text

Separate Anything You Describe.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9df16e42f8b2d68e6b14b15979189334a8fc05e63b52a1cba6b8da893dd6711a
+size 13520281

Separate What You Describe. Language-Queried Audio Source Separation.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:298516c10117a2e9f0657219daf66ad1cb6892ff57965fb7605aa61aedcfc883
+size 7601966

code/AudioSep.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0680f2680e3ee93cc1451e24eb26e6a04fc17b5f75404248e0c7399dc9486350
+size 20054078

models/AudioSep (audo)/.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

models/AudioSep (audo)/audiosep_base_4M_steps.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8cda01bfd0ebd141eef45d41db7a3ada23a56568465840d3cff04b8010ce82c
+size 1264844076

models/AudioSep (audo)/audioset_textmap.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bada103070d92f9eadd33e1b4f45ec8583f59080ef218c966b43294bd4c86d5b
+size 84448

models/AudioSep (audo)/bpe_simple_vocab_16e6.txt.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:924691ac288e54409236115652ad4aa250f48203de50a9e4722a6ecd48d6804a
+size 1356917

models/AudioSep (audo)/music_speech_audioset_epoch_15_esc_89.98.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51c68f12f9d7ea25fdaaccf741ec7f81e93ee594455410f3bca4f47f88d8e006
+size 2352471003

models/AudioSep (audo)/source.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ https://huggingface.co/audo/AudioSep

models/ailia-models/audiosep_resunet.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:030b1ff2af237e753ed5441c3a11358defaf2a706d7e037af681e4282b0ec179
+size 102583411

models/ailia-models/audiosep_resunet.onnx.prototxt ADDED Viewed

The diff for this file is too large to render. See raw diff

models/ailia-models/audiosep_text.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3cb68423900435937ff5c139f1c867ef8d1b0c05e5023c64f3446a24c08d3de
+size 501433356

models/ailia-models/audiosep_text.onnx.prototxt ADDED Viewed

The diff for this file is too large to render. See raw diff

models/ailia-models/code/LICENSE.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+    MIT License
+    Copyright (c) Xubo Liu
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE

models/ailia-models/code/README.md ADDED Viewed

	@@ -0,0 +1,65 @@

+# AudioSep: Separate Anything You Describe
+## Input
+* **Mixed audio file**
+Audio file in wav format with mixed sources. [input.wav](./input.wav)
+https://github.com/axinc-ai/ailia-models/assets/53651931/4b761212-a1c7-46dc-b598-a08e4c5ab7ff
+This audio file was adapted from the [official audiosep implementation](https://github.com/Audio-AGI/AudioSep)
+https://audio-agi.github.io/Separate-Anything-You-Describe/demos/exp31_water/drops_mixture.wav
+* **Text condition**
+Text description of the sound source you want to separate.
+## Output
+* **Audio file**
+Separated audio source according to the text query.
+Saves to ```./output.wav``` by default but it can be specified with the ```--path``` option
+## Usage
+Internet connection is required when running the script for the first time, as the model files will be automatically downloaded.
+Running this script will separate sound sources from the original input audio file, according to the language query.
+#### Example1: Extract sound of thunder
+```bash
+$ python3 audiosep.py -p "thunder" -i input.wav -s output_thunder.wav
+```
+https://github.com/axinc-ai/ailia-models/assets/53651931/d0d016dd-a808-4eb6-a4b5-9791f8f1bd2f
+#### Example2: Extract sound of waterdrops
+```bash
+$ python3 audiosep.py -p "water drops" -i input.wav -s output_waterdrops.wav
+```
+https://github.com/axinc-ai/ailia-models/assets/53651931/7710b6c9-49dc-4d2a-8489-ccbf7fb45591
+```.wav``` file containing the sound source separated from the original mixture will be created in both cases.
+## Reference
+* [AudioSep](https://github.com/Audio-AGI/AudioSep)
+* [Separate Anything You Describe](https://audio-agi.github.io/Separate-Anything-You-Describe/)
+## Framework
+Pytorch
+## Model Format
+ONNX opset=11
+## Netron
+* [audiosep_text.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/audiosep/audiosep_text.onnx.prototxt)
+* [audiosep_resunet.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/audiosep/audiosep_resunet.onnx.prototxt)

models/ailia-models/code/audiosep.py ADDED Viewed

	@@ -0,0 +1,283 @@

+import sys
+import time
+import math
+from logging import getLogger
+import scipy
+import librosa
+import numpy as np
+import ailia
+# import original modules
+sys.path.append('../../util')
+from arg_utils import get_base_parser, update_parser, get_savepath  # noqa
+from model_utils import check_and_download_models  # noqa
+logger = getLogger(__name__)
+# ======================
+# Parameters
+# ======================
+QUERY_WEIGHT_PATH = 'audiosep_text.onnx'
+SEPNET_WEIGHT_PATH = 'audiosep_resunet.onnx'
+QUERY_MODEL_PATH = 'audiosep_text.onnx.prototxt'
+SEPNET_MODEL_PATH = 'audiosep_resunet.onnx.prototxt'
+REMOTE_PATH = "https://storage.googleapis.com/ailia-models/audiosep/"
+# ======================
+# Arguemnt Parser Config
+# ======================
+parser = get_base_parser(
+    'audiosep', "input.wav", None
+)
+parser.add_argument(
+    "-p", "--prompt", metavar="TEXT", type=str,
+    default="water drops",
+    help="Text query."
+)
+parser.add_argument(
+    '--disable_ailia_tokenizer',
+    action='store_true',
+    help='disable ailia tokenizer.'
+)
+args = update_parser(parser, check_input_type=False)
+# ======================
+# Helper functions
+# ======================
+"""
+Functions below are taken from https://github.com/Audio-AGI/AudioSep, which was released under MIT license.
+Modified to be run with numpy arrays instead of torch tensors
+"""
+def preprocess_mag(mag):
+    #batch normalize self.bn0 = nn.BatchNorm2d(window_size // 2 + 1, momentum=momentum)
+    mag = np.transpose(mag, (0,3,2,1))
+    mag = (mag - np.mean(mag, axis=(2,3), keepdims=True)) / (np.std(mag, axis=(2,3), keepdims=True) + 1e-5)
+    mag = np.transpose(mag, (0,3,2,1))
+    p = math.ceil(mag.shape[2] / 2**5) * 2**5 - mag.shape[2]
+    mag = np.pad(mag, ((0,0),(0,0),(0,p),(0,0)))
+    mag = mag[:,:,:,0:mag.shape[-1]-1]
+    return mag
+def spectrogram_phase(input, eps=0.):
+    D = librosa.stft(
+         input,
+         n_fft=2048,
+         hop_length=320,
+         win_length=2048,
+         window='hann',
+         center=True,
+         pad_mode='reflect'
+    )
+    real = np.real(D)
+    imag = np.imag(D)
+    mag = np.clip(real ** 2 + imag ** 2, eps, np.inf) ** 0.5
+    cos = real / mag# normalize
+    sin = imag / mag# normalize
+    return mag, cos, sin
+def wav_to_spectrogram(input, eps=1e-10):
+        """Waveform to spectrogram.
+        Args:
+          input: (batch_size, segment_samples, channels_num)
+        Outputs:
+          output: (batch_size, channels_num, time_steps, freq_bins)
+        """
+        sp_list = []
+        cos_list = []
+        sin_list = []
+        channels_num = input.shape[1]
+        for channel in range(channels_num):
+            mag, cos, sin = spectrogram_phase(input[:, channel, :], eps=eps)
+            sp_list.append(mag)
+            cos_list.append(cos)
+            sin_list.append(sin)
+        sps = np.concatenate(sp_list, axis=1)
+        coss = np.concatenate(cos_list, axis=1)
+        sins = np.concatenate(sin_list, axis=1)
+        return sps, coss, sins
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+def feature_maps_to_wav(
+        input_tensor,
+        sp,
+        sin_in,
+        cos_in,
+        audio_length,
+    ):
+        batch_size, _, time_steps, freq_bins = input_tensor.shape
+        x = input_tensor.reshape(
+            batch_size,
+            1,
+            1,
+            3,
+            time_steps,
+            freq_bins,
+        )
+        # x: (batch_size, target_sources_num, output_channels, self.K, time_steps, freq_bins)
+        mask_mag = sigmoid(x[:, :, :, 0, :, :])
+        _mask_real = np.tanh(x[:, :, :, 1, :, :])
+        _mask_imag = np.tanh(x[:, :, :, 2, :, :])
+        # linear_mag = torch.tanh(x[:, :, :, 3, :, :])
+        _, phase = librosa.magphase(_mask_real + 1j*_mask_imag)
+        #norm = (np.real(phase)**2 + np.imag(phase)**2)**0.5
+        mask_cos = np.real(phase)
+        mask_sin = np.imag(phase)
+        # Y = |Y|cos∠Y + j|Y|sin∠Y
+        #   = |Y|cos(∠X + ∠M) + j|Y|sin(∠X + ∠M)
+        #   = |Y|(cos∠X cos∠M - sin∠X sin∠M) + j|Y|(sin∠X cos∠M + cos∠X sin∠M)
+        out_cos = (
+            cos_in[:, None, :, :, :] * mask_cos - sin_in[:, None, :, :, :] * mask_sin
+        )
+        out_sin = (
+            sin_in[:, None, :, :, :] * mask_cos + cos_in[:, None, :, :, :] * mask_sin
+        )
+        # out_cos: (batch_size, target_sources_num, output_channels, time_steps, freq_bins)
+        # out_sin: (batch_size, target_sources_num, output_channels, time_steps, freq_bins)
+        # Calculate |Y|.
+        out_mag = np.max(sp[:, None, :, :, :] * mask_mag, 0)
+        # out_mag = F.relu_(sp[:, None, :, :, :] * mask_mag + linear_mag)
+        # out_mag: (batch_size, target_sources_num, output_channels, time_steps, freq_bins)
+        # Calculate Y_{real} and Y_{imag} for ISTFT.
+        out_real = out_mag * out_cos
+        out_imag = out_mag * out_sin
+        # out_real, out_imag: (batch_size, target_sources_num, output_channels, time_steps, freq_bins)
+        # Reformat shape to (N, 1, time_steps, freq_bins) for ISTFT where
+        # N = batch_size * target_sources_num * output_channels
+        shape = (
+            batch_size,
+            1,
+            time_steps,
+            freq_bins,
+        )
+        out_real = out_real.reshape(shape)
+        out_imag = out_imag.reshape(shape)
+        x = librosa.istft(
+             (out_real +  1j * out_imag)[0,0].astype('complex64').transpose((1,0)),
+             n_fft = 2048,
+             hop_length = 320,
+             win_length = 2048,
+             window = 'hann',
+             center = True,
+             length = audio_length,
+            )
+        return x
+# ======================
+# Main functions
+# ======================
+def inference(model, input_text, input_wav):
+    # tokenize
+    tokenizer = model['tokenizer']
+    text_prompt_tkn = dict(tokenizer(input_text, return_tensors = 'np', padding = True))
+    text_prompt_tkn = (text_prompt_tkn['input_ids'], text_prompt_tkn['attention_mask'])
+    # prepare audio input
+    mag, cosin, sinin = wav_to_spectrogram(input_wav)
+    orig_len = mag.shape[-1]
+    mag = mag.transpose((0,2,1))[None]
+    cosin = cosin.transpose((0,2,1))[None]
+    sinin = sinin.transpose((0,2,1))[None]
+    # preprocess
+    mag_in = preprocess_mag(mag)
+    # inference
+    query = model['querynet'].predict(text_prompt_tkn)[0]
+    output = model['sepnet'].predict((query, mag_in))[0]
+    # postprocess
+    output = output[:,:,:orig_len,:]# trim to original length
+    output_wav = feature_maps_to_wav(output, mag, sinin, cosin, input_wav.shape[-1])
+    return output_wav
+def split_audio(model):
+    input_text = args.prompt
+    input_wav = librosa.load(args.input[0], sr=32000, mono=True)[0][None,None,:]
+    logger.info("input_text: %s" % input_text)
+    # inference
+    logger.info('inference has started...')
+    if args.benchmark:
+        logger.info('BENCHMARK mode')
+        total_time_estimation = 0
+        for i in range(args.benchmark_count):
+            start = int(round(time.time() * 1000))
+            output = inference(model, input_text, input_wav)
+            end = int(round(time.time() * 1000))
+            estimation_time = (end - start)
+            # Logging
+            logger.info(f'\tailia processing estimation time {estimation_time} ms')
+            if i != 0:
+                total_time_estimation = total_time_estimation + estimation_time
+        logger.info(f'\taverage time estimation {total_time_estimation / (args.benchmark_count - 1)} ms')
+    else:
+        output = inference(model, input_text, input_wav)
+    # save output
+    if args.savepath is None:
+        sp = 'output.wav'
+    else:
+        sp = args.savepath
+    scipy.io.wavfile.write(sp, 32000, np.round(output * 32767).astype(np.int16))
+    logger.info(f"Separated audio has been saved to {sp}")
+    logger.info('Script finished successfully.')
+def main():
+    # model files check and download
+    check_and_download_models(QUERY_WEIGHT_PATH, QUERY_MODEL_PATH, REMOTE_PATH)
+    check_and_download_models(SEPNET_WEIGHT_PATH, SEPNET_MODEL_PATH, REMOTE_PATH)
+    env_id = args.env_id
+    # initialize
+    querynet = ailia.Net(None, QUERY_WEIGHT_PATH, env_id=env_id)
+    sepnet = ailia.Net(None, SEPNET_WEIGHT_PATH)
+    if args.disable_ailia_tokenizer:
+        from transformers import RobertaTokenizer
+        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
+    else:
+        import ailia_tokenizer
+        tokenizer = ailia_tokenizer.RobertaTokenizer.from_pretrained('./tokenizer/')
+    model = {
+        'querynet': querynet,
+        'sepnet':sepnet,
+        'tokenizer':tokenizer
+    }
+    split_audio(model)
+if __name__ == '__main__':
+    main()

models/ailia-models/code/input.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:228089ef063480d966c1003b9c4fd363c972d92e28e8ae9f229b3ac5a293c25a
+size 320044

models/ailia-models/code/output_thunder.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9588903b7d37571be22cdd2aa7ebe92ca101a9336a34e43dce7505f6cd133ef
+size 320044

models/ailia-models/code/output_waterdrops.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf50edbdce182145010a6fbe88f61231d468e2d71375a2ec39499159eb852c3b
+size 320044

models/ailia-models/code/tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

models/ailia-models/code/tokenizer/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/ailia-models/code/tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"model_max_length": 512}

models/ailia-models/code/tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/ailia-models/source.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+https://github.com/axinc-ai/ailia-models/tree/master/audio_processing/audiosep
+https://storage.googleapis.com/ailia-models/audiosep/audiosep_text.onnx
+https://storage.googleapis.com/ailia-models/audiosep/audiosep_text.onnx.prototxt
+https://storage.googleapis.com/ailia-models/audiosep/audiosep_resunet.onnx
+https://storage.googleapis.com/ailia-models/audiosep/audiosep_resunet.onnx.prototxt

models/audiosep-demo/.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

models/audiosep-demo/README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+---
+license: apache-2.0
+---
+## AudioSep model
+This model was proposed in ["Separate Anything You Describe"](https://audio-agi.github.io/Separate-Anything-You-Describe/).
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/5f1158120c833276f61f1a84/X1Src5PFpfPvcEpj9yVih.png)

models/audiosep-demo/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37f1691fb067e2575f1ad1cfbfe44b7b3da18e52f33fcb2b0937b72952f11ba1
+size 957134817

models/audiosep-demo/source.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ https://huggingface.co/nielsr/audiosep-demo