Spaces:

bianxing77
/

Hive

Sleeping

App Files Files Community

bianxing77 commited on Feb 27

Commit

1dad4d3

verified ·

1 Parent(s): 1d887a7

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +11 -0
app.py +349 -0
audiosep/__pycache__/utils.cpython-310.pyc +0 -0
audiosep/__pycache__/utils.cpython-312.pyc +0 -0
audiosep/config/audiosep_base.yaml +41 -0
audiosep/models/CLAP/__init__.py +0 -0
audiosep/models/CLAP/__pycache__/__init__.cpython-310.pyc +0 -0
audiosep/models/CLAP/__pycache__/__init__.cpython-311.pyc +0 -0
audiosep/models/CLAP/__pycache__/__init__.cpython-312.pyc +0 -0
audiosep/models/CLAP/open_clip/__init__.py +25 -0
audiosep/models/CLAP/open_clip/__pycache__/__init__.cpython-310.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/__init__.cpython-311.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/__init__.cpython-312.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/factory.cpython-310.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/factory.cpython-311.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/factory.cpython-312.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/feature_fusion.cpython-310.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/feature_fusion.cpython-311.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/feature_fusion.cpython-312.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/htsat.cpython-310.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/htsat.cpython-311.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/htsat.cpython-312.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/loss.cpython-310.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/loss.cpython-311.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/loss.cpython-312.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/model.cpython-310.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/model.cpython-311.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/model.cpython-312.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/openai.cpython-310.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/openai.cpython-311.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/openai.cpython-312.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/pann_model.cpython-310.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/pann_model.cpython-311.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/pann_model.cpython-312.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/pretrained.cpython-310.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/pretrained.cpython-311.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/pretrained.cpython-312.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/timm_model.cpython-310.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/timm_model.cpython-311.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/timm_model.cpython-312.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/tokenizer.cpython-310.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/tokenizer.cpython-311.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/tokenizer.cpython-312.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/transform.cpython-310.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/transform.cpython-311.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/transform.cpython-312.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/utils.cpython-310.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/utils.cpython-311.pyc +0 -0
audiosep/models/CLAP/open_clip/__pycache__/utils.cpython-312.pyc +0 -0
audiosep/models/CLAP/open_clip/bert.py +40 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+examples/acoustic_guitar.wav filter=lfs diff=lfs merge=lfs -text
+examples/laughing.wav filter=lfs diff=lfs merge=lfs -text
+examples/ticktok_piano.wav filter=lfs diff=lfs merge=lfs -text
+examples/water_drops.wav filter=lfs diff=lfs merge=lfs -text
+flowsep/bigvgan/g_01000000 filter=lfs diff=lfs merge=lfs -text
+flowsep/latent_diffusion/modules/losses/panns_distance/model/__pycache__/models.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+flowsep/latent_diffusion/modules/losses/panns_distance/model/__pycache__/models.cpython-312.pyc filter=lfs diff=lfs merge=lfs -text
+temp_result/acoustic_guitar.wav filter=lfs diff=lfs merge=lfs -text
+temp_result/laughing.wav filter=lfs diff=lfs merge=lfs -text
+temp_result/mixed/acoustic_guitar.wav filter=lfs diff=lfs merge=lfs -text
+temp_result/mixed/laughing.wav filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,349 @@

+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "audiosep"))
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "flowsep"))
+import gradio as gr
+import torch
+import numpy as np
+import torchaudio
+import librosa
+import yaml
+from huggingface_hub import hf_hub_download
+from pytorch_lightning import seed_everything
+# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+device = "cpu"
+_audiosep_model = None
+_flowsep_model = None
+_flowsep_preprocessor = None
+class FlowSepPreprocessor:
+    def __init__(self, config):
+        import utilities.audio as Audio
+        self.sampling_rate = config["preprocessing"]["audio"]["sampling_rate"]
+        self.duration = config["preprocessing"]["audio"]["duration"]
+        self.hopsize = config["preprocessing"]["stft"]["hop_length"]
+        self.target_length = int(self.duration * self.sampling_rate / self.hopsize)
+        self.STFT = Audio.stft.TacotronSTFT(
+            config["preprocessing"]["stft"]["filter_length"],
+            config["preprocessing"]["stft"]["hop_length"],
+            config["preprocessing"]["stft"]["win_length"],
+            config["preprocessing"]["mel"]["n_mel_channels"],
+            config["preprocessing"]["audio"]["sampling_rate"],
+            config["preprocessing"]["mel"]["mel_fmin"],
+            config["preprocessing"]["mel"]["mel_fmax"],
+        )
+    def read_wav_file(self, filename):
+        waveform, sr = torchaudio.load(filename)
+        target_length = int(sr * self.duration)
+        if waveform.shape[-1] > target_length:
+            waveform = waveform[:, :target_length]
+        if sr != self.sampling_rate:
+            waveform = torchaudio.functional.resample(waveform, sr, self.sampling_rate)
+        waveform = waveform.numpy()[0, ...]
+        waveform = waveform - np.mean(waveform)
+        waveform = waveform / (np.max(np.abs(waveform)) + 1e-8)
+        waveform = waveform * 0.5
+        waveform = waveform[None, ...]
+        target_samples = int(self.sampling_rate * self.duration)
+        if waveform.shape[-1] < target_samples:
+            temp_wav = np.zeros((1, target_samples), dtype=np.float32)
+            temp_wav[:, :waveform.shape[-1]] = waveform
+            waveform = temp_wav
+        return waveform
+    def wav_feature_extraction(self, waveform):
+        import utilities.audio as Audio
+        waveform = waveform[0, ...]
+        waveform = torch.FloatTensor(waveform)
+        log_mel_spec, stft, energy = Audio.tools.get_mel_from_wav(waveform, self.STFT)
+        log_mel_spec = torch.FloatTensor(log_mel_spec.T)
+        stft = torch.FloatTensor(stft.T)
+        log_mel_spec = self._pad_spec(log_mel_spec)
+        stft = self._pad_spec(stft)
+        return log_mel_spec, stft
+    def _pad_spec(self, log_mel_spec):
+        n_frames = log_mel_spec.shape[0]
+        p = self.target_length - n_frames
+        if p > 0:
+            m = torch.nn.ZeroPad2d((0, 0, 0, p))
+            log_mel_spec = m(log_mel_spec)
+        elif p < 0:
+            log_mel_spec = log_mel_spec[:self.target_length, :]
+        if log_mel_spec.size(-1) % 2 != 0:
+            log_mel_spec = log_mel_spec[..., :-1]
+        return log_mel_spec
+    def load_full_audio(self, filename):
+        waveform, sr = torchaudio.load(filename)
+        if sr != self.sampling_rate:
+            waveform = torchaudio.functional.resample(waveform, sr, self.sampling_rate)
+        waveform = waveform.numpy()[0, ...]
+        return waveform
+    def preprocess_chunk(self, chunk):
+        chunk = chunk - np.mean(chunk)
+        chunk = chunk / (np.max(np.abs(chunk)) + 1e-8)
+        chunk = chunk * 0.5
+        return chunk
+def load_audiosep():
+    global _audiosep_model
+    if _audiosep_model is not None:
+        return _audiosep_model
+    from models.clap_encoder import CLAP_Encoder
+    from utils import parse_yaml, load_ss_model
+    clap_ckpt = hf_hub_download(repo_id="bianxing77/AudioSep-hive", filename="music_speech_audioset_epoch_15_esc_89.98.pt")
+    query_encoder = CLAP_Encoder(pretrained_path=clap_ckpt).eval()
+    config_file = hf_hub_download(repo_id="bianxing77/AudioSep-hive", filename="config.yaml")
+    checkpoint_file = hf_hub_download(repo_id="bianxing77/AudioSep-hive", filename="audiosep_hive.ckpt")
+    configs = parse_yaml(config_file)
+    model = load_ss_model(configs=configs, checkpoint_path=checkpoint_file, query_encoder=query_encoder)
+    model = model.to(device).eval()
+    _audiosep_model = model
+    return model
+def load_flowsep():
+    global _flowsep_model, _flowsep_preprocessor
+    if _flowsep_model is not None:
+        return _flowsep_model, _flowsep_preprocessor
+    seed_everything(0)
+    from latent_diffusion.util import instantiate_from_config
+    config_file = hf_hub_download(repo_id="bianxing77/FlowSep-hive", filename="config.yaml")
+    model_file = hf_hub_download(repo_id="bianxing77/FlowSep-hive", filename="flowsep_hive.ckpt")
+    configs = yaml.load(open(config_file, 'r'), Loader=yaml.FullLoader)
+    configs["model"]["params"]["first_stage_config"]["params"]["reload_from_ckpt"] = None
+    preprocessor = FlowSepPreprocessor(configs)
+    model = instantiate_from_config(configs["model"]).to(device)
+    try:
+        ckpt = torch.load(model_file, map_location=device, weights_only=False)["state_dict"]
+    except TypeError:
+        ckpt = torch.load(model_file, map_location=device)["state_dict"]
+    model.load_state_dict(ckpt, strict=True)
+    model.eval()
+    _flowsep_model = model
+    _flowsep_preprocessor = preprocessor
+    return model, preprocessor
+AUDIOSEP_SR = 32000
+FLOWSEP_CHUNK_IN = 163840
+FLOWSEP_CHUNK_OUT = 160000
+FLOWSEP_SR = 16000
+def separate_audiosep(audio_path, text):
+    model = load_audiosep()
+    mixture, _ = librosa.load(audio_path, sr=AUDIOSEP_SR, mono=True)
+    input_len = mixture.shape[0]
+    with torch.no_grad():
+        conditions = model.query_encoder.get_query_embed(
+            modality='text', text=[text], device=device
+        )
+        input_dict = {
+            "mixture": torch.Tensor(mixture)[None, None, :].to(device),
+            "condition": conditions,
+        }
+        if input_len > AUDIOSEP_SR * 10:
+            sep_audio = model.ss_model.chunk_inference(input_dict)
+            sep_audio = sep_audio.squeeze()
+        else:
+            sep_segment = model.ss_model(input_dict)["waveform"]
+            sep_audio = sep_segment.squeeze(0).squeeze(0).data.cpu().numpy()
+        sep_audio = sep_audio[:input_len]
+    return (AUDIOSEP_SR, sep_audio)
+def _flowsep_process_chunk(model, preprocessor, chunk_wav, text):
+    chunk_wav = preprocessor.preprocess_chunk(chunk_wav)
+    if len(chunk_wav) < FLOWSEP_CHUNK_IN:
+        pad = np.zeros(FLOWSEP_CHUNK_IN - len(chunk_wav), dtype=np.float32)
+        chunk_wav = np.concatenate([chunk_wav, pad])
+    chunk_wav = chunk_wav[:FLOWSEP_CHUNK_IN]
+    mixed_mel, stft = preprocessor.wav_feature_extraction(chunk_wav.reshape(1, -1))
+    batch = {
+        "fname": ["temp"],
+        "text": [text],
+        "caption": [text],
+        "waveform": torch.rand(1, 1, FLOWSEP_CHUNK_IN).to(device),
+        "log_mel_spec": torch.rand(1, 1024, 64).to(device),
+        "sampling_rate": torch.tensor([FLOWSEP_SR]).to(device),
+        "label_vector": torch.rand(1, 527).to(device),
+        "stft": torch.rand(1, 1024, 512).to(device),
+        "mixed_waveform": torch.from_numpy(chunk_wav.reshape(1, 1, FLOWSEP_CHUNK_IN)).to(device),
+        "mixed_mel": mixed_mel.reshape(1, mixed_mel.shape[0], mixed_mel.shape[1]).to(device),
+    }
+    result = model.generate_sample(
+        [batch],
+        name="temp_result",
+        unconditional_guidance_scale=1.0,
+        ddim_steps=20,
+        n_gen=1,
+        save=False,
+        save_mixed=False,
+    )
+    if isinstance(result, np.ndarray):
+        out = result.squeeze()
+    else:
+        out = result.squeeze().cpu().numpy()
+    return out[:FLOWSEP_CHUNK_OUT]
+def separate_flowsep(audio_path, text):
+    model, preprocessor = load_flowsep()
+    full_wav = preprocessor.load_full_audio(audio_path)
+    input_len = full_wav.shape[0]
+    with torch.no_grad():
+        if input_len <= FLOWSEP_CHUNK_IN:
+            sep_audio = _flowsep_process_chunk(model, preprocessor, full_wav.copy(), text)
+        else:
+            out_list = []
+            start = 0
+            while start < input_len:
+                end = min(start + FLOWSEP_CHUNK_IN, input_len)
+                chunk = full_wav[start:end]
+                out_chunk = _flowsep_process_chunk(model, preprocessor, chunk.copy(), text)
+                need = min(FLOWSEP_CHUNK_OUT, input_len - start)
+                out_list.append(out_chunk[:need])
+                start += FLOWSEP_CHUNK_OUT
+            sep_audio = np.concatenate(out_list)
+        if len(sep_audio) > input_len:
+            sep_audio = sep_audio[:input_len]
+        elif len(sep_audio) < input_len:
+            sep_audio = np.pad(sep_audio, (0, input_len - len(sep_audio)), mode="constant", constant_values=0)
+    return (FLOWSEP_SR, sep_audio)
+def inference(audio, text, model_choice):
+    if audio is None:
+        raise gr.Error("Please upload an audio file / 请上传音频文件")
+    if not text or not text.strip():
+        raise gr.Error("Please enter a text query / 请输入文本描述")
+    if model_choice == "AudioSep-hive":
+        return separate_audiosep(audio, text)
+    else:
+        return separate_flowsep(audio, text)
+DESCRIPTION = """
+# Universal Sound Separation on HIVE
+**Hive** is a high-quality synthetic dataset (2k hours) built via an automated pipeline that mines high-purity single-event segments and synthesizes semantically consistent mixtures. Despite using only ~0.2% of the data scale of million-hour baselines, models trained on Hive achieve competitive separation accuracy and strong zero-shot generalization.
+This space provides two separation models trained on Hive:
+- **AudioSep**: A foundation model for open-domain sound separation with natural language queries, based on [AudioSep](https://github.com/Audio-AGI/AudioSep).
+- **FlowSep**: A flow-matching based separation model with text conditioning, based on [FlowSep](https://github.com/Audio-AGI/FlowSep).
+**How to use:**
+1. Upload an audio file (mix of sounds)
+2. Describe what you want to separate (e.g., "piano", "speech", "dog barking")
+3. Select a model and click Separate
+[[Paper]](https://arxiv.org/abs/2601.22599) | [[Code]](https://github.com/ShandaAI/Hive) | [[Hive Dataset]](https://huggingface.co/datasets/ShandaAI/Hive) | [[Demo Page]](https://shandaai.github.io/Hive/)
+"""
+EXAMPLES = [
+    ["examples/acoustic_guitar.wav", "acoustic guitar"],
+    ["examples/laughing.wav", "laughing"],
+    ["examples/ticktok_piano.wav", "A ticktock sound playing at the same rhythm with piano"],
+    ["examples/water_drops.wav", "water drops"],
+    ["examples/noisy_speech.wav", "speech"],
+]
+with gr.Blocks(
+    theme=gr.themes.Soft(),
+    title="Universal Sound Separation on HIVE",
+) as demo:
+    gr.Markdown(DESCRIPTION)
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(label="Input Mixture Audio", type="filepath")
+            text_input = gr.Textbox(
+                label="Text Query",
+                placeholder='e.g. "dog barking", "piano playing"',
+            )
+            model_choice = gr.Dropdown(
+                choices=["AudioSep-hive", "FlowSep-hive"],
+                value="AudioSep-hive",
+                label="Select Model",
+            )
+            submit_btn = gr.Button("Separate", variant="primary")
+        with gr.Column():
+            audio_output = gr.Audio(label="Separated Audio")
+    submit_btn.click(
+        fn=inference,
+        inputs=[audio_input, text_input, model_choice],
+        outputs=audio_output,
+    )
+    gr.Markdown("## Examples")
+    gr.Examples(examples=EXAMPLES, inputs=[audio_input, text_input])
+DEBUG = False
+def run_debug():
+    examples_dir = os.path.join(os.path.dirname(__file__), "examples")
+    test_path = os.path.join(examples_dir, "acoustic_guitar.wav")
+    test_text = "acoustic guitar"
+    print("\n" + "=" * 50)
+    print("[DEBUG] Starting inference test for both models")
+    print("=" * 50)
+    if not os.path.exists(test_path):
+        print(f"[DEBUG] Skip: {test_path} not found")
+        return
+    print(f"\n[DEBUG] Using test audio: {test_path}")
+    print("\n" + "-" * 40)
+    print("[DEBUG] AudioSep inference")
+    print("-" * 40)
+    print("[DEBUG] Loading AudioSep model...")
+    out_audiosep = separate_audiosep(test_path, test_text)
+    print(f"[DEBUG] AudioSep done. Output sr={out_audiosep[0]}, shape={np.array(out_audiosep[1]).shape}")
+    print("\n" + "-" * 40)
+    print("[DEBUG] FlowSep inference")
+    print("-" * 40)
+    print("[DEBUG] Loading FlowSep model...")
+    out_flowsep = separate_flowsep(test_path, test_text)
+    print(f"[DEBUG] FlowSep done. Output sr={out_flowsep[0]}, shape={np.array(out_flowsep[1]).shape}")
+    print("\n" + "=" * 50)
+    print("[DEBUG] Both models passed inference test")
+    print("=" * 50 + "\n")
+if DEBUG:
+    run_debug()
+demo.launch()

audiosep/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (9.65 kB). View file

audiosep/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (15.4 kB). View file

audiosep/config/audiosep_base.yaml ADDED Viewed

	@@ -0,0 +1,41 @@

+---
+task_name: AudioSep
+data:
+    datafiles:
+        - 'datafiles/template.json'
+    sampling_rate: 32000
+    segment_seconds: 5
+    loudness_norm:
+        lower_db: -10
+        higher_db: 10
+    max_mix_num: 2
+model:
+    query_net: CLAP
+    condition_size: 512
+    model_type: ResUNet30
+    input_channels: 1
+    output_channels: 1
+    resume_checkpoint: ""
+    use_text_ratio: 1.0
+train:
+    optimizer:
+        optimizer_type: AdamW
+        learning_rate: 1e-3
+        warm_up_steps: 10000
+        reduce_lr_steps: 1000000
+        lr_lambda_type: constant_warm_up
+    num_nodes: 1
+    num_workers: 6
+    loss_type: l1_wav
+    sync_batchnorm: True
+    batch_size_per_device: 12
+    steps_per_epoch: 10000  # Every 10000 steps is called an `epoch`.
+    evaluate_step_frequency: 10000     # Evaluate every #evaluate_step_frequency steps.
+    save_step_frequency: 20000  # Save every #save_step_frequency steps.
+    early_stop_steps: 10000001
+    random_seed: 1234

audiosep/models/CLAP/__init__.py ADDED Viewed

File without changes

audiosep/models/CLAP/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (199 Bytes). View file

audiosep/models/CLAP/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (232 Bytes). View file

audiosep/models/CLAP/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (209 Bytes). View file

audiosep/models/CLAP/open_clip/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from .factory import (
+    list_models,
+    create_model,
+    create_model_and_transforms,
+    add_model_config,
+)
+from .loss import ClipLoss, gather_features, LPLoss, lp_gather_features, LPMetrics
+from .model import (
+    CLAP,
+    CLAPTextCfg,
+    CLAPVisionCfg,
+    CLAPAudioCfp,
+    convert_weights_to_fp16,
+    trace_model,
+)
+from .openai import load_openai_model, list_openai_models
+from .pretrained import (
+    list_pretrained,
+    list_pretrained_tag_models,
+    list_pretrained_model_tags,
+    get_pretrained_url,
+    download_pretrained,
+)
+from .tokenizer import SimpleTokenizer, tokenize
+from .transform import image_transform

audiosep/models/CLAP/open_clip/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (1.01 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.35 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (1.06 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/factory.cpython-310.pyc ADDED Viewed

Binary file (6.71 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/factory.cpython-311.pyc ADDED Viewed

Binary file (13.5 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/factory.cpython-312.pyc ADDED Viewed

Binary file (11.3 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/feature_fusion.cpython-310.pyc ADDED Viewed

Binary file (4.16 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/feature_fusion.cpython-311.pyc ADDED Viewed

Binary file (9.94 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/feature_fusion.cpython-312.pyc ADDED Viewed

Binary file (9.12 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/htsat.cpython-310.pyc ADDED Viewed

Binary file (30.8 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/htsat.cpython-311.pyc ADDED Viewed

Binary file (57.8 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/htsat.cpython-312.pyc ADDED Viewed

Binary file (54.1 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/loss.cpython-310.pyc ADDED Viewed

Binary file (8.01 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/loss.cpython-311.pyc ADDED Viewed

Binary file (17.8 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/loss.cpython-312.pyc ADDED Viewed

Binary file (16.1 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (24.2 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/model.cpython-311.pyc ADDED Viewed

Binary file (48.2 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/model.cpython-312.pyc ADDED Viewed

Binary file (45.4 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/openai.cpython-310.pyc ADDED Viewed

Binary file (4.56 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/openai.cpython-311.pyc ADDED Viewed

Binary file (8.46 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/openai.cpython-312.pyc ADDED Viewed

Binary file (7.38 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/pann_model.cpython-310.pyc ADDED Viewed

Binary file (13.1 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/pann_model.cpython-311.pyc ADDED Viewed

Binary file (30 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/pann_model.cpython-312.pyc ADDED Viewed

Binary file (27.2 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/pretrained.cpython-310.pyc ADDED Viewed

Binary file (5.08 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/pretrained.cpython-311.pyc ADDED Viewed

Binary file (8.33 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/pretrained.cpython-312.pyc ADDED Viewed

Binary file (7.14 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/timm_model.cpython-310.pyc ADDED Viewed

Binary file (3.48 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/timm_model.cpython-311.pyc ADDED Viewed

Binary file (5.82 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/timm_model.cpython-312.pyc ADDED Viewed

Binary file (5.05 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/tokenizer.cpython-310.pyc ADDED Viewed

Binary file (7.4 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/tokenizer.cpython-311.pyc ADDED Viewed

Binary file (13.9 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/tokenizer.cpython-312.pyc ADDED Viewed

Binary file (11.1 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/transform.cpython-310.pyc ADDED Viewed

Binary file (1.02 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/transform.cpython-311.pyc ADDED Viewed

Binary file (1.6 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/transform.cpython-312.pyc ADDED Viewed

Binary file (1.36 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (10.5 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (19.9 kB). View file

audiosep/models/CLAP/open_clip/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (16.8 kB). View file

audiosep/models/CLAP/open_clip/bert.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from transformers import BertTokenizer, BertModel
+tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+model = BertModel.from_pretrained("bert-base-uncased")
+text = "Replace me by any text you'd like."
+def bert_embeddings(text):
+    # text = "Replace me by any text you'd like."
+    encoded_input = tokenizer(text, return_tensors="pt")
+    output = model(**encoded_input)
+    return output
+from transformers import RobertaTokenizer, RobertaModel
+tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
+model = RobertaModel.from_pretrained("roberta-base")
+text = "Replace me by any text you'd like."
+def Roberta_embeddings(text):
+    # text = "Replace me by any text you'd like."
+    encoded_input = tokenizer(text, return_tensors="pt")
+    output = model(**encoded_input)
+    return output
+from transformers import BartTokenizer, BartModel
+tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
+model = BartModel.from_pretrained("facebook/bart-base")
+text = "Replace me by any text you'd like."
+def bart_embeddings(text):
+    # text = "Replace me by any text you'd like."
+    encoded_input = tokenizer(text, return_tensors="pt")
+    output = model(**encoded_input)
+    return output