Upload folder using huggingface_hub
Browse files- .gitattributes +8 -0
- LICENSE +21 -0
- README.md +224 -0
- assets/human_eval.png +3 -0
- assets/longcat_video_avatar_logo.svg +0 -0
- assets/teaser.png +3 -0
- avatar_multi/config.json +37 -0
- avatar_multi/diffusion_pytorch_model.safetensors +3 -0
- avatar_single/config.json +37 -0
- avatar_single/diffusion_pytorch_model.safetensors +3 -0
- chinese-wav2vec2-base/.gitattributes +27 -0
- chinese-wav2vec2-base/README.md +61 -0
- chinese-wav2vec2-base/chinese-wav2vec2-base-fairseq-ckpt.pt +3 -0
- chinese-wav2vec2-base/config.json +105 -0
- chinese-wav2vec2-base/preprocessor_config.json +8 -0
- chinese-wav2vec2-base/pytorch_model.bin +3 -0
- config.json +3 -0
- model_index.json +3 -0
- vocal_separator/Kim_Vocal_2.onnx +3 -0
- vocal_separator/download_checks.json +289 -0
- vocal_separator/mdx_model_data.json +482 -0
- vocal_separator/vr_model_data.json +137 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
assets/actor1.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
assets/actor2.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
assets/human_eval.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
assets/postcad1.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
assets/sale1.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
assets/singer1.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
assets/singer2.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
assets/teaser.png filter=lfs diff=lfs merge=lfs -text
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 Meituan
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in
|
| 13 |
+
all copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
- zh
|
| 6 |
+
library_name: diffusers
|
| 7 |
+
tags:
|
| 8 |
+
- audio-text-to-video
|
| 9 |
+
- audio-image-text-to-video
|
| 10 |
+
- audio-driven-video-continuation
|
| 11 |
+
- diffusers
|
| 12 |
+
- transformers
|
| 13 |
+
- avatar
|
| 14 |
+
- video-generation
|
| 15 |
+
---
|
| 16 |
+
# LongCat-Video-Avatar
|
| 17 |
+
|
| 18 |
+
<div align="center">
|
| 19 |
+
<img src="assets/longcat_video_avatar_logo.svg" width="45%" alt="LongCat-Video" />
|
| 20 |
+
</div>
|
| 21 |
+
<hr>
|
| 22 |
+
|
| 23 |
+
<div align="center" style="line-height: 1;">
|
| 24 |
+
<a href='https://meigen-ai.github.io/LongCat-Video-Avatar/'><img src='https://img.shields.io/badge/Project-Page-green'></a>
|
| 25 |
+
<a href='https://github.com/meituan-longcat/LongCat-Video/blob/main/assets/LongCat-Video-Avatar-Tech-Report.pdf'><img src='https://img.shields.io/badge/Technique-Report-red'></a>
|
| 26 |
+
<a href='https://huggingface.co/meituan-longcat/LongCat-Video-Avatar'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue'></a>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
<div align="center" style="line-height: 1;">
|
| 30 |
+
<a href='https://github.com/meituan-longcat/LongCat-Flash-Chat/blob/main/figures/wechat_official_accounts.png'><img src='https://img.shields.io/badge/WeChat-LongCat-brightgreen?logo=wechat&logoColor=white'></a>
|
| 31 |
+
<a href='https://x.com/Meituan_LongCat'><img src='https://img.shields.io/badge/Twitter-LongCat-white?logo=x&logoColor=white'></a>
|
| 32 |
+
</div>
|
| 33 |
+
|
| 34 |
+
<div align="center" style="line-height: 1;">
|
| 35 |
+
<a href='LICENSE'><img src='https://img.shields.io/badge/License-MIT-f5de53?&color=f5de53'></a>
|
| 36 |
+
</div>
|
| 37 |
+
|
| 38 |
+
## 🚀 Model Introduction
|
| 39 |
+
We are excited to announce the release of LongCat-Video-Avatar, a unified model that delivers expressive and highly dynamic audio-driven character animation, supporting native tasks including Audio-Text-to-Video, Audio-Text-Image-to-Video, and Video Continuation with seamless compatibility for both single-stream and multi-stream audio inputs.
|
| 40 |
+
|
| 41 |
+
### Key Features
|
| 42 |
+
- 🌟 **Support Multiple Generation Modes**: One unified model can be used for *audio-text-to-video (AT2V)* generation, *audio-text-image-to-video (ATI2V)* generation, and *Video Continuation*.
|
| 43 |
+
- 🌟 **Natural Human Dynamics**: The disentangled unconditional guidance is designed to effectively decouple speech signals from motion dynamics for natural behavior.
|
| 44 |
+
- 🌟 **Avoid Repetitive Content**: The reference skip attention is adopted to strategically incorporates reference cues to preserve identity while preventing excessive conditional image leakage.
|
| 45 |
+
- 🌟 **Alleviate Error Accumulation from VAE**: Cross-Chunk Latent Stitching is designed to eliminates redundant VAE decode-encode cycles to reduce pixel degradation in long sequences.
|
| 46 |
+
|
| 47 |
+
For more detail, please refer to the comprehensive [***LongCat-Video-Avatar Technical Report***](https://huggingface.co/meituan-longcat/LongCat-Video-Avatar).
|
| 48 |
+
|
| 49 |
+
<div align="center">
|
| 50 |
+
<img src="assets/teaser.png" width="90%" alt="LongCat-Video" />
|
| 51 |
+
</div>
|
| 52 |
+
|
| 53 |
+
## 🌀 Preview Gallery
|
| 54 |
+
<!-- <div align="center">
|
| 55 |
+
<video src="https://github.com/user-attachments/assets/00fa63f0-9c4e-461a-a79e-c662ad596d7d" width="2264" height="384"> </video>
|
| 56 |
+
</div> -->
|
| 57 |
+
The following videos showcase example generations from our model and have been compressed for easier viewing.
|
| 58 |
+
<table align="center" border="1" cellspacing="0" cellpadding="5">
|
| 59 |
+
<tr>
|
| 60 |
+
<td>
|
| 61 |
+
<video src="https://github.com/user-attachments/assets/2628acfa-36e3-4729-8030-413387c3ccf4" width="300" controls>
|
| 62 |
+
</video>
|
| 63 |
+
</td>
|
| 64 |
+
<td>
|
| 65 |
+
<video src="https://github.com/user-attachments/assets/618c01bd-b1f6-4256-a300-be8ce6bfe167" width="300" controls>
|
| 66 |
+
</video>
|
| 67 |
+
</td>
|
| 68 |
+
</tr>
|
| 69 |
+
<tr>
|
| 70 |
+
<td>
|
| 71 |
+
<video src="https://github.com/user-attachments/assets/1ecf9e0b-aaa0-481a-8b06-8de181b928a1" width="300" controls>
|
| 72 |
+
</video>
|
| 73 |
+
</td>
|
| 74 |
+
<td>
|
| 75 |
+
<video src="https://github.com/user-attachments/assets/384cf13b-ddd9-4dc4-a04d-282f8618f702" width="300" controls>
|
| 76 |
+
</video>
|
| 77 |
+
</td>
|
| 78 |
+
</tr>
|
| 79 |
+
<tr>
|
| 80 |
+
<td>
|
| 81 |
+
<video src="https://github.com/user-attachments/assets/7407b82d-7e8e-40d2-b65d-906926c80250" width="300" controls>
|
| 82 |
+
</video>
|
| 83 |
+
</td>
|
| 84 |
+
<td>
|
| 85 |
+
<video src="https://github.com/user-attachments/assets/03cca3e0-86ed-4a0a-a14f-6636d9177d0e" width="300" controls>
|
| 86 |
+
</video>
|
| 87 |
+
</td>
|
| 88 |
+
</tr>
|
| 89 |
+
</table>
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
## 📊 Human Evaluation
|
| 93 |
+
Human evaluation on naturalness and realism of the synthesized videos. The benchmark EvalTalker [1] contains more than 400 testing samples with different difficulty levels for evaluating the single and multiple human video generation.
|
| 94 |
+
<div align="center">
|
| 95 |
+
<img src="assets/human_eval.png" width="90%" alt="LongCat-Video-Avatar" />
|
| 96 |
+
</div>
|
| 97 |
+
|
| 98 |
+
<p style="font-size:0.9em; color:gray;">
|
| 99 |
+
Reference:<br>
|
| 100 |
+
[1] Zhou Y, Zhu X, Ren S, et al. EvalTalker: Learning to Evaluate Real-Portrait-Driven Multi-Subject Talking Humans[J]. arXiv preprint arXiv:2512.01340, 2025.
|
| 101 |
+
</p>
|
| 102 |
+
|
| 103 |
+
## 💡 Quick Start
|
| 104 |
+
Clone the repo
|
| 105 |
+
|
| 106 |
+
```shell
|
| 107 |
+
git clone --single-branch --branch main https://github.com/meituan-longcat/LongCat-Video
|
| 108 |
+
cd LongCat-Video
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
Install dependencies
|
| 112 |
+
|
| 113 |
+
```shell
|
| 114 |
+
# create conda environment
|
| 115 |
+
conda create -n longcat-video python=3.10
|
| 116 |
+
conda activate longcat-video
|
| 117 |
+
|
| 118 |
+
# install torch (configure according to your CUDA version)
|
| 119 |
+
pip install torch==2.6.0+cu124 torchvision==0.21.0+cu124 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124
|
| 120 |
+
|
| 121 |
+
# install flash-attn-2
|
| 122 |
+
pip install ninja
|
| 123 |
+
pip install psutil
|
| 124 |
+
pip install packaging
|
| 125 |
+
pip install flash_attn==2.7.4.post1
|
| 126 |
+
|
| 127 |
+
# install other requirements
|
| 128 |
+
pip install -r requirements.txt
|
| 129 |
+
|
| 130 |
+
# install longcat-video-avatar requirements
|
| 131 |
+
conda install -c conda-forge librosa
|
| 132 |
+
conda install -c conda-forge ffmpeg
|
| 133 |
+
pip install -r requirements_avatar.txt
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
FlashAttention-2 is enabled in the model config by default; you can also change the model config ("./weights/LongCat-Video-Avatar/avatar_multi/config.json" and "./weights/LongCat-Video-Avatar/avatar_single/config.json") to use FlashAttention-3 or xformers once installed.
|
| 137 |
+
|
| 138 |
+
### ⛽️ Model Download
|
| 139 |
+
|
| 140 |
+
| Models | Description | Download Link |
|
| 141 |
+
| --- | --- | --- |
|
| 142 |
+
| LongCat-Video | foundational video generation | 🤗 [Huggingface](https://huggingface.co/meituan-longcat/LongCat-Video) |
|
| 143 |
+
| LongCat-Video-Avatar-Single | single-character audio-driven video generation | 🤗 [Huggingface](https://huggingface.co/meituan-longcat/LongCat-Video-Avatar) |
|
| 144 |
+
| LongCat-Video-Avatar-Multi | multi-character audio-driven video generation | 🤗 [Huggingface](https://huggingface.co/meituan-longcat/LongCat-Video-Avatar) |
|
| 145 |
+
|
| 146 |
+
Download models using huggingface-cli:
|
| 147 |
+
```shell
|
| 148 |
+
pip install "huggingface_hub[cli]"
|
| 149 |
+
huggingface-cli download meituan-longcat/LongCat-Video --local-dir ./weights/LongCat-Video
|
| 150 |
+
huggingface-cli download meituan-longcat/LongCat-Video-Avatar --local-dir ./weights/LongCat-Video-Avatar
|
| 151 |
+
```
|
| 152 |
+
|
| 153 |
+
### 🔑 Quick Inference
|
| 154 |
+
Usage Tips
|
| 155 |
+
> - Lip synchronization accuracy: Audio CFG works optimally between 3–5. Increase the audio CFG value for better synchronization.
|
| 156 |
+
> - Prompt Enhancement: Include clear verbal-action cues (e.g., talking, speaking) in the prompt to achieve more natural lip movements.
|
| 157 |
+
> - Mitigate repeated actions: Setting the reference image index(--ref_img_index, default to 10) between 0 and 24 ensures better consistency, while selecting other ranges (e.g., -10 or 30) helps reduce repeated actions. Additionally, increasing the mask frame range (--mask_frame_range, default to 3) can further help mitigate repeated actions, but excessively large values may introduce artifacts.
|
| 158 |
+
> - Super resolution: Our model is compatible with both 480P and 720P, which can be controlled via --resolution.
|
| 159 |
+
> - Dual-Audio Modes: Merge mode (set audio_type to para) requires two audio clips of equal length, and the resulting audio is obtained by summing the two clips; Concatenation mode (set audio_type to add) does not require equal-length inputs, and the resulting audio is formed by sequentially concatenating the two clips with silence padding for any gaps, where by default person1 speaks first and person2 speaks afterward.
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
#### Single-Person Animation
|
| 163 |
+
```shell
|
| 164 |
+
# Audio-Text-to-Video
|
| 165 |
+
torchrun --nproc_per_node=2 run_demo_avatar_single_audio_to_video.py --context_parallel_size=2 --checkpoint_dir=./weights/LongCat-Video-Avatar --stage_1=at2v --input_json=assets/avatar/single_example_1.json
|
| 166 |
+
|
| 167 |
+
# Audio-Image-to-Video
|
| 168 |
+
torchrun --nproc_per_node=2 run_demo_avatar_single_audio_to_video.py --context_parallel_size=2 --checkpoint_dir=./weights/LongCat-Video-Avatar --stage_1=ai2v --input_json=assets/avatar/single_example_1.json
|
| 169 |
+
|
| 170 |
+
# Audio-Text-to-Video and Video-Continuation
|
| 171 |
+
torchrun --nproc_per_node=2 run_demo_avatar_single_audio_to_video.py --context_parallel_size=2 --checkpoint_dir=./weights/LongCat-Video-Avatar --stage_1=at2v --input_json=assets/avatar/single_example_1.json --num_segments=5 --ref_img_index=10 --mask_frame_range=3
|
| 172 |
+
|
| 173 |
+
# Audio-Image-to-Video and Video-Continuation
|
| 174 |
+
torchrun --nproc_per_node=2 run_demo_avatar_single_audio_to_video.py --context_parallel_size=2 --checkpoint_dir=./weights/LongCat-Video-Avatar --stage_1=ai2v --input_json=assets/avatar/single_example_1.json --num_segments=5 --ref_img_index=10 --mask_frame_range=3
|
| 175 |
+
```
|
| 176 |
+
|
| 177 |
+
#### Multi-Person Animation
|
| 178 |
+
```shell
|
| 179 |
+
# Audio-Image-to-Video
|
| 180 |
+
torchrun --nproc_per_node=2 run_demo_avatar_multi_audio_to_video.py --context_parallel_size=2 --checkpoint_dir=./weights/LongCat-Video-Avatar --input_json=assets/avatar/multi_example_1.json
|
| 181 |
+
|
| 182 |
+
# Audio-Image-to-Video and Video-Continuation
|
| 183 |
+
torchrun --nproc_per_node=2 run_demo_avatar_multi_audio_to_video.py --context_parallel_size=2 --checkpoint_dir=./weights/LongCat-Video-Avatar --input_json=assets/avatar/multi_example_1.json --num_segments=5 --ref_img_index=10 --mask_frame_range=3
|
| 184 |
+
```
|
| 185 |
+
|
| 186 |
+
## ⚖️ License Agreement
|
| 187 |
+
|
| 188 |
+
The **model weights** are released under the **MIT License**.
|
| 189 |
+
|
| 190 |
+
Any contributions to this repository are licensed under the MIT License, unless otherwise stated. This license does not grant any rights to use Meituan trademarks or patents.
|
| 191 |
+
|
| 192 |
+
See the [LICENSE](LICENSE) file for the full license text.
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
## 🧠 Usage Considerations
|
| 196 |
+
This model has not been specifically designed or comprehensively evaluated for every possible downstream application.
|
| 197 |
+
|
| 198 |
+
Developers should take into account the known limitations of large language models, including performance variations across different languages, and carefully assess accuracy, safety, and fairness before deploying the model in sensitive or high-risk scenarios.
|
| 199 |
+
It is the responsibility of developers and downstream users to understand and comply with all applicable laws and regulations relevant to their use case, including but not limited to data protection, privacy, and content safety requirements.
|
| 200 |
+
|
| 201 |
+
Nothing in this Model Card should be interpreted as altering or restricting the terms of the MIT License under which the model is released.
|
| 202 |
+
|
| 203 |
+
## 📖 Citation
|
| 204 |
+
We kindly encourage citation of our work if you find it useful.
|
| 205 |
+
|
| 206 |
+
```
|
| 207 |
+
@misc{meituanlongcatteam2025longcatvideoavatartechnicalreport,
|
| 208 |
+
title={LongCat-Video-Avatar Technical Report},
|
| 209 |
+
author={Meituan LongCat Team},
|
| 210 |
+
year={2025},
|
| 211 |
+
eprint={},
|
| 212 |
+
archivePrefix={arXiv},
|
| 213 |
+
primaryClass={cs.CV},
|
| 214 |
+
url={},
|
| 215 |
+
}
|
| 216 |
+
```
|
| 217 |
+
|
| 218 |
+
## 🙏 Acknowledgements
|
| 219 |
+
|
| 220 |
+
We would like to thank the contributors to the [Wan](https://huggingface.co/Wan-AI), [UMT5-XXL](https://huggingface.co/google/umt5-xxl), [Diffusers](https://github.com/huggingface/diffusers) and [HuggingFace](https://huggingface.co) repositories, for their open research.
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
## 📞 Contact
|
| 224 |
+
Please contact us at <a href="mailto:longcat-team@meituan.com">longcat-team@meituan.com</a> or join our WeChat Group if you have any questions.
|
assets/human_eval.png
ADDED
|
|
Git LFS Details
|
assets/longcat_video_avatar_logo.svg
ADDED
|
|
assets/teaser.png
ADDED
|
|
Git LFS Details
|
avatar_multi/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "LongCatVideoAvatarTransformer3DModel",
|
| 3 |
+
"_diffusers_version": "0.32.0",
|
| 4 |
+
"adaln_tembed_dim": 512,
|
| 5 |
+
"bsa_params":{
|
| 6 |
+
"sparsity": 0.9375,
|
| 7 |
+
"chunk_3d_shape_q": [4, 4, 4],
|
| 8 |
+
"chunk_3d_shape_k": [4, 4, 4]
|
| 9 |
+
},
|
| 10 |
+
"caption_channels": 4096,
|
| 11 |
+
"cp_split_hw": null,
|
| 12 |
+
"depth": 48,
|
| 13 |
+
"enable_bsa": false,
|
| 14 |
+
"enable_flashattn3": false,
|
| 15 |
+
"enable_flashattn2": true,
|
| 16 |
+
"enable_xformers": false,
|
| 17 |
+
"frequency_embedding_size": 256,
|
| 18 |
+
"hidden_size": 4096,
|
| 19 |
+
"in_channels": 16,
|
| 20 |
+
"text_tokens_zero_pad": true,
|
| 21 |
+
"mlp_ratio": 4,
|
| 22 |
+
"num_heads": 32,
|
| 23 |
+
"out_channels": 16,
|
| 24 |
+
"patch_size": [
|
| 25 |
+
1,
|
| 26 |
+
2,
|
| 27 |
+
2
|
| 28 |
+
],
|
| 29 |
+
"audio_window": 5,
|
| 30 |
+
"intermediate_dim": 512,
|
| 31 |
+
"output_dim": 768,
|
| 32 |
+
"context_tokens": 32,
|
| 33 |
+
"vae_scale": 4,
|
| 34 |
+
"audio_prenorm": false,
|
| 35 |
+
"class_range": 24,
|
| 36 |
+
"class_interval": 4
|
| 37 |
+
}
|
avatar_multi/diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0003ee67645295087f7cc0e569174ec7201d3900ed9c7cc69ad0b817d27bad8f
|
| 3 |
+
size 31743497896
|
avatar_single/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "LongCatVideoAvatarTransformer3DModel",
|
| 3 |
+
"_diffusers_version": "0.32.0",
|
| 4 |
+
"adaln_tembed_dim": 512,
|
| 5 |
+
"bsa_params":{
|
| 6 |
+
"sparsity": 0.9375,
|
| 7 |
+
"chunk_3d_shape_q": [4, 4, 4],
|
| 8 |
+
"chunk_3d_shape_k": [4, 4, 4]
|
| 9 |
+
},
|
| 10 |
+
"caption_channels": 4096,
|
| 11 |
+
"cp_split_hw": null,
|
| 12 |
+
"depth": 48,
|
| 13 |
+
"enable_bsa": false,
|
| 14 |
+
"enable_flashattn3": false,
|
| 15 |
+
"enable_flashattn2": true,
|
| 16 |
+
"enable_xformers": false,
|
| 17 |
+
"frequency_embedding_size": 256,
|
| 18 |
+
"hidden_size": 4096,
|
| 19 |
+
"in_channels": 16,
|
| 20 |
+
"text_tokens_zero_pad": true,
|
| 21 |
+
"mlp_ratio": 4,
|
| 22 |
+
"num_heads": 32,
|
| 23 |
+
"out_channels": 16,
|
| 24 |
+
"patch_size": [
|
| 25 |
+
1,
|
| 26 |
+
2,
|
| 27 |
+
2
|
| 28 |
+
],
|
| 29 |
+
"audio_window": 5,
|
| 30 |
+
"intermediate_dim": 512,
|
| 31 |
+
"output_dim": 768,
|
| 32 |
+
"context_tokens": 32,
|
| 33 |
+
"vae_scale": 4,
|
| 34 |
+
"audio_prenorm": false,
|
| 35 |
+
"class_range": 24,
|
| 36 |
+
"class_interval": 4
|
| 37 |
+
}
|
avatar_single/diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05e49cf1e2dbd67b4128e70e26283eb0df1047e755f614e0d92b9342eae030c0
|
| 3 |
+
size 31743497896
|
chinese-wav2vec2-base/.gitattributes
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
chinese-wav2vec2-base/README.md
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
---
|
| 4 |
+
Pretrained on 10k hours WenetSpeech L subset. More details in [TencentGameMate/chinese_speech_pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
|
| 5 |
+
|
| 6 |
+
This model does not have a tokenizer as it was pretrained on audio alone.
|
| 7 |
+
In order to use this model speech recognition, a tokenizer should be created and the model should be fine-tuned on labeled text data.
|
| 8 |
+
|
| 9 |
+
python package:
|
| 10 |
+
transformers==4.16.2
|
| 11 |
+
|
| 12 |
+
```python
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
import torch
|
| 16 |
+
import torch.nn.functional as F
|
| 17 |
+
import soundfile as sf
|
| 18 |
+
from fairseq import checkpoint_utils
|
| 19 |
+
|
| 20 |
+
from transformers import (
|
| 21 |
+
Wav2Vec2FeatureExtractor,
|
| 22 |
+
Wav2Vec2ForPreTraining,
|
| 23 |
+
Wav2Vec2Model,
|
| 24 |
+
)
|
| 25 |
+
from transformers.models.wav2vec2.modeling_wav2vec2 import _compute_mask_indices
|
| 26 |
+
|
| 27 |
+
model_path=""
|
| 28 |
+
wav_path=""
|
| 29 |
+
mask_prob=0.0
|
| 30 |
+
mask_length=10
|
| 31 |
+
|
| 32 |
+
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
|
| 33 |
+
model = Wav2Vec2Model.from_pretrained(model_path)
|
| 34 |
+
|
| 35 |
+
# for pretrain: Wav2Vec2ForPreTraining
|
| 36 |
+
# model = Wav2Vec2ForPreTraining.from_pretrained(model_path)
|
| 37 |
+
|
| 38 |
+
model = model.to(device)
|
| 39 |
+
model = model.half()
|
| 40 |
+
model.eval()
|
| 41 |
+
|
| 42 |
+
wav, sr = sf.read(wav_path)
|
| 43 |
+
input_values = feature_extractor(wav, return_tensors="pt").input_values
|
| 44 |
+
input_values = input_values.half()
|
| 45 |
+
input_values = input_values.to(device)
|
| 46 |
+
|
| 47 |
+
# for Wav2Vec2ForPreTraining
|
| 48 |
+
# batch_size, raw_sequence_length = input_values.shape
|
| 49 |
+
# sequence_length = model._get_feat_extract_output_lengths(raw_sequence_length)
|
| 50 |
+
# mask_time_indices = _compute_mask_indices((batch_size, sequence_length), mask_prob=0.0, mask_length=2)
|
| 51 |
+
# mask_time_indices = torch.tensor(mask_time_indices, device=input_values.device, dtype=torch.long)
|
| 52 |
+
|
| 53 |
+
with torch.no_grad():
|
| 54 |
+
outputs = model(input_values)
|
| 55 |
+
last_hidden_state = outputs.last_hidden_state
|
| 56 |
+
|
| 57 |
+
# for Wav2Vec2ForPreTraining
|
| 58 |
+
# outputs = model(input_values, mask_time_indices=mask_time_indices, output_hidden_states=True)
|
| 59 |
+
# last_hidden_state = outputs.hidden_states[-1]
|
| 60 |
+
|
| 61 |
+
```
|
chinese-wav2vec2-base/chinese-wav2vec2-base-fairseq-ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a75e04e426977dd399415b7f586b18978bc6836a3e8514ae1bb29e468fb17184
|
| 3 |
+
size 1140632401
|
chinese-wav2vec2-base/config.json
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"activation_dropout": 0.1,
|
| 3 |
+
"adapter_kernel_size": 3,
|
| 4 |
+
"adapter_stride": 2,
|
| 5 |
+
"add_adapter": false,
|
| 6 |
+
"apply_spec_augment": true,
|
| 7 |
+
"architectures": [
|
| 8 |
+
"Wav2Vec2ForPreTraining"
|
| 9 |
+
],
|
| 10 |
+
"attention_dropout": 0.1,
|
| 11 |
+
"bos_token_id": 1,
|
| 12 |
+
"classifier_proj_size": 256,
|
| 13 |
+
"codevector_dim": 256,
|
| 14 |
+
"contrastive_logits_temperature": 0.1,
|
| 15 |
+
"conv_bias": false,
|
| 16 |
+
"conv_dim": [
|
| 17 |
+
512,
|
| 18 |
+
512,
|
| 19 |
+
512,
|
| 20 |
+
512,
|
| 21 |
+
512,
|
| 22 |
+
512,
|
| 23 |
+
512
|
| 24 |
+
],
|
| 25 |
+
"conv_kernel": [
|
| 26 |
+
10,
|
| 27 |
+
3,
|
| 28 |
+
3,
|
| 29 |
+
3,
|
| 30 |
+
3,
|
| 31 |
+
2,
|
| 32 |
+
2
|
| 33 |
+
],
|
| 34 |
+
"conv_stride": [
|
| 35 |
+
5,
|
| 36 |
+
2,
|
| 37 |
+
2,
|
| 38 |
+
2,
|
| 39 |
+
2,
|
| 40 |
+
2,
|
| 41 |
+
2
|
| 42 |
+
],
|
| 43 |
+
"ctc_loss_reduction": "sum",
|
| 44 |
+
"ctc_zero_infinity": false,
|
| 45 |
+
"diversity_loss_weight": 0.1,
|
| 46 |
+
"do_stable_layer_norm": false,
|
| 47 |
+
"eos_token_id": 2,
|
| 48 |
+
"feat_extract_activation": "gelu",
|
| 49 |
+
"feat_extract_norm": "group",
|
| 50 |
+
"feat_proj_dropout": 0.0,
|
| 51 |
+
"feat_quantizer_dropout": 0.0,
|
| 52 |
+
"final_dropout": 0.1,
|
| 53 |
+
"hidden_act": "gelu",
|
| 54 |
+
"hidden_dropout": 0.1,
|
| 55 |
+
"hidden_size": 768,
|
| 56 |
+
"initializer_range": 0.02,
|
| 57 |
+
"intermediate_size": 3072,
|
| 58 |
+
"layer_norm_eps": 1e-05,
|
| 59 |
+
"layerdrop": 0.1,
|
| 60 |
+
"mask_feature_length": 10,
|
| 61 |
+
"mask_feature_min_masks": 0,
|
| 62 |
+
"mask_feature_prob": 0.0,
|
| 63 |
+
"mask_time_length": 10,
|
| 64 |
+
"mask_time_min_masks": 2,
|
| 65 |
+
"mask_time_prob": 0.05,
|
| 66 |
+
"model_type": "wav2vec2",
|
| 67 |
+
"num_adapter_layers": 3,
|
| 68 |
+
"num_attention_heads": 12,
|
| 69 |
+
"num_codevector_groups": 2,
|
| 70 |
+
"num_codevectors_per_group": 320,
|
| 71 |
+
"num_conv_pos_embedding_groups": 16,
|
| 72 |
+
"num_conv_pos_embeddings": 128,
|
| 73 |
+
"num_feat_extract_layers": 7,
|
| 74 |
+
"num_hidden_layers": 12,
|
| 75 |
+
"num_negatives": 100,
|
| 76 |
+
"output_hidden_size": 768,
|
| 77 |
+
"pad_token_id": 0,
|
| 78 |
+
"proj_codevector_dim": 256,
|
| 79 |
+
"tdnn_dilation": [
|
| 80 |
+
1,
|
| 81 |
+
2,
|
| 82 |
+
3,
|
| 83 |
+
1,
|
| 84 |
+
1
|
| 85 |
+
],
|
| 86 |
+
"tdnn_dim": [
|
| 87 |
+
512,
|
| 88 |
+
512,
|
| 89 |
+
512,
|
| 90 |
+
512,
|
| 91 |
+
1500
|
| 92 |
+
],
|
| 93 |
+
"tdnn_kernel": [
|
| 94 |
+
5,
|
| 95 |
+
3,
|
| 96 |
+
3,
|
| 97 |
+
1,
|
| 98 |
+
1
|
| 99 |
+
],
|
| 100 |
+
"torch_dtype": "float32",
|
| 101 |
+
"transformers_version": "4.16.2",
|
| 102 |
+
"use_weighted_layer_sum": false,
|
| 103 |
+
"vocab_size": 32,
|
| 104 |
+
"xvector_output_dim": 512
|
| 105 |
+
}
|
chinese-wav2vec2-base/preprocessor_config.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_normalize": false,
|
| 3 |
+
"feature_size": 1,
|
| 4 |
+
"padding_side": "right",
|
| 5 |
+
"padding_value": 0.0,
|
| 6 |
+
"return_attention_mask": false,
|
| 7 |
+
"sampling_rate": 16000
|
| 8 |
+
}
|
chinese-wav2vec2-base/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be2da40c9e7ae26bfc904a3ed79ebb9e8f060bec6dba85d6a6ae86114bc38901
|
| 3 |
+
size 380261837
|
config.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_name": "LongCat-Video-Avatar"
|
| 3 |
+
}
|
model_index.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_name": "LongCat-Video-Avatar"
|
| 3 |
+
}
|
vocal_separator/Kim_Vocal_2.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce74ef3b6a6024ce44211a07be9cf8bc6d87728cc852a68ab34eb8e58cde9c8b
|
| 3 |
+
size 66759214
|
vocal_separator/download_checks.json
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"current_version": "UVR_Patch_10_6_23_4_27",
|
| 3 |
+
"current_version_ocl": "UVR_Patch_10_6_23_4_27",
|
| 4 |
+
"current_version_mac": "UVR_Patch_10_6_23_4_27",
|
| 5 |
+
"current_version_linux": "UVR_Patch_10_6_23_4_27",
|
| 6 |
+
"vr_download_list": {
|
| 7 |
+
"VR Arch Single Model v5: 1_HP-UVR": "1_HP-UVR.pth",
|
| 8 |
+
"VR Arch Single Model v5: 2_HP-UVR": "2_HP-UVR.pth",
|
| 9 |
+
"VR Arch Single Model v5: 3_HP-Vocal-UVR": "3_HP-Vocal-UVR.pth",
|
| 10 |
+
"VR Arch Single Model v5: 4_HP-Vocal-UVR": "4_HP-Vocal-UVR.pth",
|
| 11 |
+
"VR Arch Single Model v5: 5_HP-Karaoke-UVR": "5_HP-Karaoke-UVR.pth",
|
| 12 |
+
"VR Arch Single Model v5: 6_HP-Karaoke-UVR": "6_HP-Karaoke-UVR.pth",
|
| 13 |
+
"VR Arch Single Model v5: 7_HP2-UVR": "7_HP2-UVR.pth",
|
| 14 |
+
"VR Arch Single Model v5: 8_HP2-UVR": "8_HP2-UVR.pth",
|
| 15 |
+
"VR Arch Single Model v5: 9_HP2-UVR": "9_HP2-UVR.pth",
|
| 16 |
+
"VR Arch Single Model v5: 10_SP-UVR-2B-32000-1": "10_SP-UVR-2B-32000-1.pth",
|
| 17 |
+
"VR Arch Single Model v5: 11_SP-UVR-2B-32000-2": "11_SP-UVR-2B-32000-2.pth",
|
| 18 |
+
"VR Arch Single Model v5: 12_SP-UVR-3B-44100": "12_SP-UVR-3B-44100.pth",
|
| 19 |
+
"VR Arch Single Model v5: 13_SP-UVR-4B-44100-1": "13_SP-UVR-4B-44100-1.pth",
|
| 20 |
+
"VR Arch Single Model v5: 14_SP-UVR-4B-44100-2": "14_SP-UVR-4B-44100-2.pth",
|
| 21 |
+
"VR Arch Single Model v5: 15_SP-UVR-MID-44100-1": "15_SP-UVR-MID-44100-1.pth",
|
| 22 |
+
"VR Arch Single Model v5: 16_SP-UVR-MID-44100-2": "16_SP-UVR-MID-44100-2.pth",
|
| 23 |
+
"VR Arch Single Model v5: 17_HP-Wind_Inst-UVR": "17_HP-Wind_Inst-UVR.pth",
|
| 24 |
+
"VR Arch Single Model v5: UVR-De-Echo-Aggressive by FoxJoy": "UVR-De-Echo-Aggressive.pth",
|
| 25 |
+
"VR Arch Single Model v5: UVR-De-Echo-Normal by FoxJoy": "UVR-De-Echo-Normal.pth",
|
| 26 |
+
"VR Arch Single Model v5: UVR-DeEcho-DeReverb by FoxJoy": "UVR-DeEcho-DeReverb.pth",
|
| 27 |
+
"VR Arch Single Model v5: UVR-DeNoise-Lite by FoxJoy": "UVR-DeNoise-Lite.pth",
|
| 28 |
+
"VR Arch Single Model v5: UVR-DeNoise by FoxJoy": "UVR-DeNoise.pth",
|
| 29 |
+
"VR Arch Single Model v5: UVR-BVE-4B_SN-44100-1": "UVR-BVE-4B_SN-44100-1.pth",
|
| 30 |
+
"VR Arch Single Model v4: MGM_HIGHEND_v4": "MGM_HIGHEND_v4.pth",
|
| 31 |
+
"VR Arch Single Model v4: MGM_LOWEND_A_v4": "MGM_LOWEND_A_v4.pth",
|
| 32 |
+
"VR Arch Single Model v4: MGM_LOWEND_B_v4": "MGM_LOWEND_B_v4.pth",
|
| 33 |
+
"VR Arch Single Model v4: MGM_MAIN_v4": "MGM_MAIN_v4.pth"
|
| 34 |
+
},
|
| 35 |
+
|
| 36 |
+
"mdx_download_list": {
|
| 37 |
+
"MDX-Net Model: UVR-MDX-NET Inst HQ 1": "UVR-MDX-NET-Inst_HQ_1.onnx",
|
| 38 |
+
"MDX-Net Model: UVR-MDX-NET Inst HQ 2": "UVR-MDX-NET-Inst_HQ_2.onnx",
|
| 39 |
+
"MDX-Net Model: UVR-MDX-NET Inst HQ 3": "UVR-MDX-NET-Inst_HQ_3.onnx",
|
| 40 |
+
"MDX-Net Model: UVR-MDX-NET Inst HQ 4": "UVR-MDX-NET-Inst_HQ_4.onnx",
|
| 41 |
+
"MDX-Net Model: UVR-MDX-NET Inst HQ 5": "UVR-MDX-NET-Inst_HQ_5.onnx",
|
| 42 |
+
"MDX-Net Model: UVR-MDX-NET Main": "UVR_MDXNET_Main.onnx",
|
| 43 |
+
"MDX-Net Model: UVR-MDX-NET Inst Main": "UVR-MDX-NET-Inst_Main.onnx",
|
| 44 |
+
"MDX-Net Model: UVR-MDX-NET 1": "UVR_MDXNET_1_9703.onnx",
|
| 45 |
+
"MDX-Net Model: UVR-MDX-NET 2": "UVR_MDXNET_2_9682.onnx",
|
| 46 |
+
"MDX-Net Model: UVR-MDX-NET 3": "UVR_MDXNET_3_9662.onnx",
|
| 47 |
+
"MDX-Net Model: UVR-MDX-NET Inst 1": "UVR-MDX-NET-Inst_1.onnx",
|
| 48 |
+
"MDX-Net Model: UVR-MDX-NET Inst 2": "UVR-MDX-NET-Inst_2.onnx",
|
| 49 |
+
"MDX-Net Model: UVR-MDX-NET Inst 3": "UVR-MDX-NET-Inst_3.onnx",
|
| 50 |
+
"MDX-Net Model: UVR-MDX-NET Karaoke": "UVR_MDXNET_KARA.onnx",
|
| 51 |
+
"MDX-Net Model: UVR-MDX-NET Karaoke 2": "UVR_MDXNET_KARA_2.onnx",
|
| 52 |
+
"MDX-Net Model: UVR_MDXNET_9482": "UVR_MDXNET_9482.onnx",
|
| 53 |
+
"MDX-Net Model: UVR-MDX-NET Voc FT": "UVR-MDX-NET-Voc_FT.onnx",
|
| 54 |
+
"MDX-Net Model: Kim Vocal 1": "Kim_Vocal_1.onnx",
|
| 55 |
+
"MDX-Net Model: Kim Vocal 2": "Kim_Vocal_2.onnx",
|
| 56 |
+
"MDX-Net Model: Kim Inst": "Kim_Inst.onnx",
|
| 57 |
+
"MDX-Net Model: Reverb HQ By FoxJoy": "Reverb_HQ_By_FoxJoy.onnx",
|
| 58 |
+
"MDX-Net Model: UVR-MDX-NET Crowd HQ 1 By Aufr33": "UVR-MDX-NET_Crowd_HQ_1.onnx",
|
| 59 |
+
"MDX-Net Model: kuielab_a_vocals": "kuielab_a_vocals.onnx",
|
| 60 |
+
"MDX-Net Model: kuielab_a_other": "kuielab_a_other.onnx",
|
| 61 |
+
"MDX-Net Model: kuielab_a_bass": "kuielab_a_bass.onnx",
|
| 62 |
+
"MDX-Net Model: kuielab_a_drums": "kuielab_a_drums.onnx",
|
| 63 |
+
"MDX-Net Model: kuielab_b_vocals": "kuielab_b_vocals.onnx",
|
| 64 |
+
"MDX-Net Model: kuielab_b_other": "kuielab_b_other.onnx",
|
| 65 |
+
"MDX-Net Model: kuielab_b_bass": "kuielab_b_bass.onnx",
|
| 66 |
+
"MDX-Net Model: kuielab_b_drums": "kuielab_b_drums.onnx"
|
| 67 |
+
},
|
| 68 |
+
|
| 69 |
+
"demucs_download_list":{
|
| 70 |
+
|
| 71 |
+
"Demucs v4: htdemucs_ft":{
|
| 72 |
+
"f7e0c4bc-ba3fe64a.th":"https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/f7e0c4bc-ba3fe64a.th",
|
| 73 |
+
"d12395a8-e57c48e6.th":"https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/d12395a8-e57c48e6.th",
|
| 74 |
+
"92cfc3b6-ef3bcb9c.th":"https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/92cfc3b6-ef3bcb9c.th",
|
| 75 |
+
"04573f0d-f3cf25b2.th":"https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/04573f0d-f3cf25b2.th",
|
| 76 |
+
"htdemucs_ft.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/htdemucs_ft.yaml"
|
| 77 |
+
},
|
| 78 |
+
|
| 79 |
+
"Demucs v4: htdemucs":{
|
| 80 |
+
"955717e8-8726e21a.th": "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/955717e8-8726e21a.th",
|
| 81 |
+
"htdemucs.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/htdemucs.yaml"
|
| 82 |
+
},
|
| 83 |
+
|
| 84 |
+
"Demucs v4: hdemucs_mmi":{
|
| 85 |
+
"75fc33f5-1941ce65.th": "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/75fc33f5-1941ce65.th",
|
| 86 |
+
"hdemucs_mmi.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/hdemucs_mmi.yaml"
|
| 87 |
+
},
|
| 88 |
+
"Demucs v4: htdemucs_6s":{
|
| 89 |
+
"5c90dfd2-34c22ccb.th": "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/5c90dfd2-34c22ccb.th",
|
| 90 |
+
"htdemucs_6s.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/htdemucs_6s.yaml"
|
| 91 |
+
},
|
| 92 |
+
"Demucs v3: mdx":{
|
| 93 |
+
"0d19c1c6-0f06f20e.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/0d19c1c6-0f06f20e.th",
|
| 94 |
+
"7ecf8ec1-70f50cc9.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/7ecf8ec1-70f50cc9.th",
|
| 95 |
+
"c511e2ab-fe698775.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/c511e2ab-fe698775.th",
|
| 96 |
+
"7d865c68-3d5dd56b.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/7d865c68-3d5dd56b.th",
|
| 97 |
+
"mdx.yaml": "https://raw.githubusercontent.com/facebookresearch/demucs/main/demucs/remote/mdx.yaml"
|
| 98 |
+
},
|
| 99 |
+
|
| 100 |
+
"Demucs v3: mdx_q":{
|
| 101 |
+
"6b9c2ca1-3fd82607.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/6b9c2ca1-3fd82607.th",
|
| 102 |
+
"b72baf4e-8778635e.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/b72baf4e-8778635e.th",
|
| 103 |
+
"42e558d4-196e0e1b.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/42e558d4-196e0e1b.th",
|
| 104 |
+
"305bc58f-18378783.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/305bc58f-18378783.th",
|
| 105 |
+
"mdx_q.yaml": "https://raw.githubusercontent.com/facebookresearch/demucs/main/demucs/remote/mdx_q.yaml"
|
| 106 |
+
},
|
| 107 |
+
|
| 108 |
+
"Demucs v3: mdx_extra":{
|
| 109 |
+
"e51eebcc-c1b80bdd.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/e51eebcc-c1b80bdd.th",
|
| 110 |
+
"a1d90b5c-ae9d2452.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/a1d90b5c-ae9d2452.th",
|
| 111 |
+
"5d2d6c55-db83574e.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/5d2d6c55-db83574e.th",
|
| 112 |
+
"cfa93e08-61801ae1.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/cfa93e08-61801ae1.th",
|
| 113 |
+
"mdx_extra.yaml": "https://raw.githubusercontent.com/facebookresearch/demucs/main/demucs/remote/mdx_extra.yaml"
|
| 114 |
+
},
|
| 115 |
+
|
| 116 |
+
"Demucs v3: mdx_extra_q": {
|
| 117 |
+
"83fc094f-4a16d450.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/83fc094f-4a16d450.th",
|
| 118 |
+
"464b36d7-e5a9386e.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/464b36d7-e5a9386e.th",
|
| 119 |
+
"14fc6a69-a89dd0ee.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/14fc6a69-a89dd0ee.th",
|
| 120 |
+
"7fd6ef75-a905dd85.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/7fd6ef75-a905dd85.th",
|
| 121 |
+
"mdx_extra_q.yaml": "https://raw.githubusercontent.com/facebookresearch/demucs/main/demucs/remote/mdx_extra_q.yaml"
|
| 122 |
+
},
|
| 123 |
+
|
| 124 |
+
"Demucs v3: UVR Model":{
|
| 125 |
+
"ebf34a2db.th": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/ebf34a2db.th",
|
| 126 |
+
"UVR_Demucs_Model_1.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/UVR_Demucs_Model_1.yaml"
|
| 127 |
+
},
|
| 128 |
+
|
| 129 |
+
"Demucs v3: repro_mdx_a":{
|
| 130 |
+
"9a6b4851-03af0aa6.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/9a6b4851-03af0aa6.th",
|
| 131 |
+
"1ef250f1-592467ce.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/1ef250f1-592467ce.th",
|
| 132 |
+
"fa0cb7f9-100d8bf4.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/fa0cb7f9-100d8bf4.th",
|
| 133 |
+
"902315c2-b39ce9c9.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/902315c2-b39ce9c9.th",
|
| 134 |
+
"repro_mdx_a.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/repro_mdx_a.yaml"
|
| 135 |
+
},
|
| 136 |
+
|
| 137 |
+
"Demucs v3: repro_mdx_a_time_only":{
|
| 138 |
+
"9a6b4851-03af0aa6.th":"https://dl.fbaipublicfiles.com/demucs/mdx_final/9a6b4851-03af0aa6.th",
|
| 139 |
+
"1ef250f1-592467ce.th":"https://dl.fbaipublicfiles.com/demucs/mdx_final/1ef250f1-592467ce.th",
|
| 140 |
+
"repro_mdx_a_time_only.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/repro_mdx_a_time_only.yaml"
|
| 141 |
+
},
|
| 142 |
+
|
| 143 |
+
"Demucs v3: repro_mdx_a_hybrid_only":{
|
| 144 |
+
"fa0cb7f9-100d8bf4.th":"https://dl.fbaipublicfiles.com/demucs/mdx_final/fa0cb7f9-100d8bf4.th",
|
| 145 |
+
"902315c2-b39ce9c9.th":"https://dl.fbaipublicfiles.com/demucs/mdx_final/902315c2-b39ce9c9.th",
|
| 146 |
+
"repro_mdx_a_hybrid_only.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/repro_mdx_a_hybrid_only.yaml"
|
| 147 |
+
},
|
| 148 |
+
|
| 149 |
+
"Demucs v2: demucs": {
|
| 150 |
+
"demucs-e07c671f.th": "https://dl.fbaipublicfiles.com/demucs/v3.0/demucs-e07c671f.th"
|
| 151 |
+
},
|
| 152 |
+
|
| 153 |
+
"Demucs v2: demucs_extra": {
|
| 154 |
+
"demucs_extra-3646af93.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/demucs_extra-3646af93.th"
|
| 155 |
+
},
|
| 156 |
+
|
| 157 |
+
"Demucs v2: demucs48_hq": {
|
| 158 |
+
"demucs48_hq-28a1282c.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/demucs48_hq-28a1282c.th"
|
| 159 |
+
},
|
| 160 |
+
|
| 161 |
+
"Demucs v2: tasnet": {
|
| 162 |
+
"tasnet-beb46fac.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/tasnet-beb46fac.th"
|
| 163 |
+
},
|
| 164 |
+
|
| 165 |
+
"Demucs v2: tasnet_extra": {
|
| 166 |
+
"tasnet_extra-df3777b2.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/tasnet_extra-df3777b2.th"
|
| 167 |
+
},
|
| 168 |
+
|
| 169 |
+
"Demucs v2: demucs_unittest": {
|
| 170 |
+
"demucs_unittest-09ebc15f.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/demucs_unittest-09ebc15f.th"
|
| 171 |
+
},
|
| 172 |
+
|
| 173 |
+
"Demucs v1: demucs": {
|
| 174 |
+
"demucs.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/demucs.th"
|
| 175 |
+
},
|
| 176 |
+
|
| 177 |
+
"Demucs v1: demucs_extra": {
|
| 178 |
+
"demucs_extra.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/demucs_extra.th"
|
| 179 |
+
},
|
| 180 |
+
|
| 181 |
+
"Demucs v1: light": {
|
| 182 |
+
"light.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/light.th"
|
| 183 |
+
},
|
| 184 |
+
|
| 185 |
+
"Demucs v1: light_extra": {
|
| 186 |
+
"light_extra.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/light_extra.th"
|
| 187 |
+
},
|
| 188 |
+
|
| 189 |
+
"Demucs v1: tasnet": {
|
| 190 |
+
"tasnet.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/tasnet.th"
|
| 191 |
+
},
|
| 192 |
+
|
| 193 |
+
"Demucs v1: tasnet_extra": {
|
| 194 |
+
"tasnet_extra.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/tasnet_extra.th"
|
| 195 |
+
}
|
| 196 |
+
},
|
| 197 |
+
|
| 198 |
+
"mdx_download_vip_list": {
|
| 199 |
+
"MDX-Net Model VIP: UVR-MDX-NET_Main_340": "UVR-MDX-NET_Main_340.onnx",
|
| 200 |
+
"MDX-Net Model VIP: UVR-MDX-NET_Main_390": "UVR-MDX-NET_Main_390.onnx",
|
| 201 |
+
"MDX-Net Model VIP: UVR-MDX-NET_Main_406": "UVR-MDX-NET_Main_406.onnx",
|
| 202 |
+
"MDX-Net Model VIP: UVR-MDX-NET_Main_427": "UVR-MDX-NET_Main_427.onnx",
|
| 203 |
+
"MDX-Net Model VIP: UVR-MDX-NET_Main_438": "UVR-MDX-NET_Main_438.onnx",
|
| 204 |
+
"MDX-Net Model VIP: UVR-MDX-NET_Inst_82_beta": "UVR-MDX-NET_Inst_82_beta.onnx",
|
| 205 |
+
"MDX-Net Model VIP: UVR-MDX-NET_Inst_90_beta": "UVR-MDX-NET_Inst_90_beta.onnx",
|
| 206 |
+
"MDX-Net Model VIP: UVR-MDX-NET_Inst_187_beta": "UVR-MDX-NET_Inst_187_beta.onnx",
|
| 207 |
+
"MDX-Net Model VIP: UVR-MDX-NET-Inst_full_292": "UVR-MDX-NET-Inst_full_292.onnx"
|
| 208 |
+
},
|
| 209 |
+
|
| 210 |
+
"mdx23_download_list": {
|
| 211 |
+
"MDX23C Model: MDX23C_D1581": {"MDX23C_D1581.ckpt":"model_2_stem_061321.yaml"}
|
| 212 |
+
},
|
| 213 |
+
|
| 214 |
+
"mdx23c_download_list": {
|
| 215 |
+
"MDX23C Model: MDX23C-InstVoc HQ": {"MDX23C-8KFFT-InstVoc_HQ.ckpt":"model_2_stem_full_band_8k.yaml"}
|
| 216 |
+
},
|
| 217 |
+
|
| 218 |
+
"roformer_download_list": {
|
| 219 |
+
"Roformer Model: BS-Roformer-Viperx-1297": {"model_bs_roformer_ep_317_sdr_12.9755.ckpt":"model_bs_roformer_ep_317_sdr_12.9755.yaml"},
|
| 220 |
+
"Roformer Model: BS-Roformer-Viperx-1296": {"model_bs_roformer_ep_368_sdr_12.9628.ckpt":"model_bs_roformer_ep_368_sdr_12.9628.yaml"},
|
| 221 |
+
"Roformer Model: BS-Roformer-Viperx-1053": {"model_bs_roformer_ep_937_sdr_10.5309.ckpt":"model_bs_roformer_ep_937_sdr_10.5309.yaml"},
|
| 222 |
+
"Roformer Model: Mel-Roformer-Viperx-1143": {"model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt":"model_mel_band_roformer_ep_3005_sdr_11.4360.yaml"},
|
| 223 |
+
"Roformer Model: MelBand Roformer Kim | Inst V1 by Unwa": {"melband_roformer_inst_v1.ckpt":"config_melbandroformer_inst.yaml"},
|
| 224 |
+
"Roformer Model: MelBand Roformer Kim | Inst V2 by Unwa": {"melband_roformer_inst_v2.ckpt":"config_melbandroformer_inst_v2.yaml"},
|
| 225 |
+
"Roformer Model: MelBand Roformer Kim | InstVoc Duality V1 by Unwa": {"melband_roformer_instvoc_duality_v1.ckpt":"config_melbandroformer_instvoc_duality.yaml"},
|
| 226 |
+
"Roformer Model: MelBand Roformer Kim | InstVoc Duality V2 by Unwa": {"melband_roformer_instvox_duality_v2.ckpt":"config_melbandroformer_instvoc_duality.yaml"}
|
| 227 |
+
},
|
| 228 |
+
|
| 229 |
+
"other_network_list": {
|
| 230 |
+
"Roformer Model: BS-Roformer-Viperx-1297": {"model_bs_roformer_ep_317_sdr_12.9755.ckpt":"https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/model_bs_roformer_ep_317_sdr_12.9755.ckpt",
|
| 231 |
+
"model_bs_roformer_ep_317_sdr_12.9755.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/model_bs_roformer_ep_317_sdr_12.9755.yaml"},
|
| 232 |
+
"Roformer Model: BS-Roformer-Viperx-1296": {"model_bs_roformer_ep_368_sdr_12.9628.ckpt":"https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/model_bs_roformer_ep_368_sdr_12.9628.ckpt",
|
| 233 |
+
"model_bs_roformer_ep_368_sdr_12.9628.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/model_bs_roformer_ep_368_sdr_12.9628.yaml"},
|
| 234 |
+
"Roformer Model: BS-Roformer-Viperx-1053": {"model_bs_roformer_ep_937_sdr_10.5309.ckpt":"https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/model_bs_roformer_ep_937_sdr_10.5309.ckpt",
|
| 235 |
+
"model_bs_roformer_ep_937_sdr_10.5309.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/model_bs_roformer_ep_937_sdr_10.5309.yaml"},
|
| 236 |
+
"Roformer Model: Mel-Roformer-Viperx-1143": {"model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt":"https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt",
|
| 237 |
+
"model_mel_band_roformer_ep_3005_sdr_11.4360.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml"},
|
| 238 |
+
"Roformer Model: MelBand Roformer Kim | Inst V1 by Unwa": {"melband_roformer_inst_v1.ckpt":"https://huggingface.co/pcunwa/Mel-Band-Roformer-Inst/resolve/main/melband_roformer_inst_v1.ckpt",
|
| 239 |
+
"config_melbandroformer_inst.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/config_melbandroformer_inst.yaml"},
|
| 240 |
+
"Roformer Model: MelBand Roformer Kim | Inst V2 by Unwa": {"melband_roformer_inst_v2.ckpt":"https://huggingface.co/pcunwa/Mel-Band-Roformer-Inst/resolve/main/melband_roformer_inst_v2.ckpt",
|
| 241 |
+
"config_melbandroformer_inst_v2.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/config_melbandroformer_inst_v2.yaml"},
|
| 242 |
+
"Roformer Model: MelBand Roformer Kim | InstVoc Duality V1 by Unwa": {"melband_roformer_instvoc_duality_v1.ckpt":"https://huggingface.co/pcunwa/Mel-Band-Roformer-InstVoc-Duality/resolve/main/melband_roformer_instvoc_duality_v1.ckpt",
|
| 243 |
+
"config_melbandroformer_instvoc_duality.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/config_melbandroformer_instvoc_duality.yaml"},
|
| 244 |
+
"Roformer Model: MelBand Roformer Kim | InstVoc Duality V2 by Unwa": {"melband_roformer_instvox_duality_v2.ckpt":"https://huggingface.co/pcunwa/Mel-Band-Roformer-InstVoc-Duality/resolve/main/melband_roformer_instvox_duality_v2.ckpt",
|
| 245 |
+
"config_melbandroformer_instvoc_duality.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/config_melbandroformer_instvoc_duality.yaml"},
|
| 246 |
+
"Roformer Model: MelBand Roformer Kim | Inst V1 (E) by Unwa": {"inst_v1e.ckpt":"https://huggingface.co/pcunwa/Mel-Band-Roformer-Inst/resolve/main/inst_v1e.ckpt",
|
| 247 |
+
"config_melbandroformer_inst.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/config_melbandroformer_inst.yaml"},
|
| 248 |
+
"Roformer Model: MelBand Roformer Kim": {"MelBandRoformer.ckpt":"https://huggingface.co/KimberleyJSN/melbandroformer/resolve/main/MelBandRoformer.ckpt",
|
| 249 |
+
"config_vocals_mel_band_roformer_kim.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/config_vocals_mel_band_roformer_kim.yaml"}
|
| 250 |
+
},
|
| 251 |
+
|
| 252 |
+
"other_network_list_new": {
|
| 253 |
+
|
| 254 |
+
"Roformer Model: BS-Roformer-Inst-EXP-Value-Residual | (by Unwa)": {"BS_Inst_EXP_VRL.ckpt":"https://huggingface.co/pcunwa/BS-Roformer-Inst-EXP-Value-Residual/resolve/main/BS_Inst_EXP_VRL.ckpt",
|
| 255 |
+
"BS_Inst_EXP_VRL.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/BS_Inst_EXP_VRL.yaml"},
|
| 256 |
+
"Roformer Model: Karaoke MelBand Roformer | (by aufr33 & viperx)": {"mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt":"https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt",
|
| 257 |
+
"config_mel_band_roformer_karaoke.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/config_mel_band_roformer_karaoke.yaml"},
|
| 258 |
+
"Roformer Model: BS Roformer Dereverb | (anvuew edition)": {"deverb_bs_roformer_8_256dim_8depth.ckpt":"https://huggingface.co/anvuew/deverb_bs_roformer/resolve/main/deverb_bs_roformer_8_256dim_8depth.ckpt",
|
| 259 |
+
"deverb_bs_roformer_8_256dim_8depth.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/deverb_bs_roformer_8_256dim_8depth.yaml"},
|
| 260 |
+
"Bandit Plus: Cinematic Bandit Plus | (by kwatcharasupat)": {"model_bandit_plus_dnr_sdr_11.47.ckpt":"https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/model_bandit_plus_dnr_sdr_11.47.ckpt",
|
| 261 |
+
"config_dnr_bandit_bsrnn_multi_mus64.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/config_dnr_bandit_bsrnn_multi_mus64.yaml"},
|
| 262 |
+
"Bandit v2: Cinematic Bandit v2 Multilang | (by kwatcharasupat)": {"checkpoint-multi_fixed.ckpt":"https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/checkpoint-multi_fixed.ckpt",
|
| 263 |
+
"config_dnr_bandit_v2_mus64.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/config_dnr_bandit_v2_mus64.yaml"},
|
| 264 |
+
"SCnet: 4-stems SCNet_MUSDB18 | (by starrytong)": {"scnet_checkpoint_musdb18.ckpt":"https://github.com/ZFTurbo/Music-Source-Separation-Training/releases/download/v.1.0.6/scnet_checkpoint_musdb18.ckpt",
|
| 265 |
+
"config_musdb18_scnet.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/config_musdb18_scnet.yaml"},
|
| 266 |
+
"SCnet: 4-stems SCNet Large | (by starrytong)": {"SCNet-large_starrytong_fixed.ckpt":"https://github.com/ZFTurbo/Music-Source-Separation-Training/releases/download/v1.0.9/SCNet-large_starrytong_fixed.ckpt",
|
| 267 |
+
"config_musdb18_scnet_large_starrytong.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/config_musdb18_scnet_large_starrytong.yaml"},
|
| 268 |
+
"SCnet: 4-stems SCNet Large": {"model_scnet_sdr_9.3244.ckpt":"https://github.com/ZFTurbo/Music-Source-Separation-Training/releases/download/v1.0.8/model_scnet_sdr_9.3244.ckpt",
|
| 269 |
+
"config_musdb18_scnet_large.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/config_musdb18_scnet_large.yaml"},
|
| 270 |
+
"SCnet: 4-stems SCNet-XL": {"model_scnet_ep_54_sdr_9.8051.ckpt":"https://github.com/ZFTurbo/Music-Source-Separation-Training/releases/download/v1.0.13/model_scnet_ep_54_sdr_9.8051.ckpt",
|
| 271 |
+
"config_musdb18_scnet_xl.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/config_musdb18_scnet_xl.yaml"},
|
| 272 |
+
"MDX23C Model: DrumSep 6stem | (by aufr33 & jarredou)": {"aufr33-jarredou_DrumSep_model_mdx23c_ep_141_sdr_10.8059.ckpt":"https://github.com/jarredou/models/releases/download/aufr33-jarredou_MDX23C_DrumSep_model_v0.1/aufr33-jarredou_DrumSep_model_mdx23c_ep_141_sdr_10.8059.ckpt",
|
| 273 |
+
"aufr33-jarredou_DrumSep_model_mdx23c_ep_141_sdr_10.8059.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/aufr33-jarredou_DrumSep_model_mdx23c_ep_141_sdr_10.8059.yaml"},
|
| 274 |
+
"MDX23C Model: Phantom Centre extraction | (by wesleyr36)": {"model_mdx23c_ep_271_l1_freq_72.2383.ckpt":"https://github.com/ZFTurbo/Music-Source-Separation-Training/releases/download/v1.0.10/model_mdx23c_ep_271_l1_freq_72.2383.ckpt",
|
| 275 |
+
"config_mdx23c_similarity.yaml":"https://raw.githubusercontent.com/TRvlvr/application_data/main/mdx_model_data/mdx_c_configs/config_mdx23c_similarity.yaml"}
|
| 276 |
+
},
|
| 277 |
+
|
| 278 |
+
"mdx23c_download_vip_list": {
|
| 279 |
+
"MDX23C Model VIP: MDX23C_D1581": {"MDX23C_D1581.ckpt":"model_2_stem_061321.yaml"},
|
| 280 |
+
"MDX23C Model VIP: MDX23C-InstVoc HQ 2": {"MDX23C-8KFFT-InstVoc_HQ_2.ckpt":"model_2_stem_full_band_8k.yaml"}
|
| 281 |
+
},
|
| 282 |
+
|
| 283 |
+
"roll_back_win_url": "https://github.com/Anjok07/ultimatevocalremovergui/releases/download/v5.6/UVR_v5.6.0_setup.exe",
|
| 284 |
+
"roll_back_macos_x86_64_url": "https://github.com/Anjok07/ultimatevocalremovergui/releases/download/v5.6/Ultimate_Vocal_Remover_v5_6_MacOS_x86_64.dmg",
|
| 285 |
+
"roll_back_macos_arm64_url": "https://github.com/Anjok07/ultimatevocalremovergui/releases/download/v5.6/Ultimate_Vocal_Remover_v5_6_MacOS_arm64.dmg",
|
| 286 |
+
|
| 287 |
+
"vr_download_vip_list": [],
|
| 288 |
+
"demucs_download_vip_list": []
|
| 289 |
+
}
|
vocal_separator/mdx_model_data.json
ADDED
|
@@ -0,0 +1,482 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"0ddfc0eb5792638ad5dc27850236c246": {
|
| 3 |
+
"compensate": 1.035,
|
| 4 |
+
"mdx_dim_f_set": 2048,
|
| 5 |
+
"mdx_dim_t_set": 8,
|
| 6 |
+
"mdx_n_fft_scale_set": 6144,
|
| 7 |
+
"primary_stem": "Vocals"
|
| 8 |
+
},
|
| 9 |
+
"26d308f91f3423a67dc69a6d12a8793d": {
|
| 10 |
+
"compensate": 1.035,
|
| 11 |
+
"mdx_dim_f_set": 2048,
|
| 12 |
+
"mdx_dim_t_set": 9,
|
| 13 |
+
"mdx_n_fft_scale_set": 8192,
|
| 14 |
+
"primary_stem": "Other"
|
| 15 |
+
},
|
| 16 |
+
"2cdd429caac38f0194b133884160f2c6": {
|
| 17 |
+
"compensate": 1.045,
|
| 18 |
+
"mdx_dim_f_set": 3072,
|
| 19 |
+
"mdx_dim_t_set": 8,
|
| 20 |
+
"mdx_n_fft_scale_set": 7680,
|
| 21 |
+
"primary_stem": "Instrumental"
|
| 22 |
+
},
|
| 23 |
+
"2f5501189a2f6db6349916fabe8c90de": {
|
| 24 |
+
"compensate": 1.035,
|
| 25 |
+
"mdx_dim_f_set": 2048,
|
| 26 |
+
"mdx_dim_t_set": 8,
|
| 27 |
+
"mdx_n_fft_scale_set": 6144,
|
| 28 |
+
"primary_stem": "Vocals",
|
| 29 |
+
"is_karaoke": true
|
| 30 |
+
},
|
| 31 |
+
"398580b6d5d973af3120df54cee6759d": {
|
| 32 |
+
"compensate": 1.75,
|
| 33 |
+
"mdx_dim_f_set": 3072,
|
| 34 |
+
"mdx_dim_t_set": 8,
|
| 35 |
+
"mdx_n_fft_scale_set": 7680,
|
| 36 |
+
"primary_stem": "Vocals"
|
| 37 |
+
},
|
| 38 |
+
"488b3e6f8bd3717d9d7c428476be2d75": {
|
| 39 |
+
"compensate": 1.035,
|
| 40 |
+
"mdx_dim_f_set": 3072,
|
| 41 |
+
"mdx_dim_t_set": 8,
|
| 42 |
+
"mdx_n_fft_scale_set": 7680,
|
| 43 |
+
"primary_stem": "Instrumental"
|
| 44 |
+
},
|
| 45 |
+
"4910e7827f335048bdac11fa967772f9": {
|
| 46 |
+
"compensate": 1.035,
|
| 47 |
+
"mdx_dim_f_set": 2048,
|
| 48 |
+
"mdx_dim_t_set": 7,
|
| 49 |
+
"mdx_n_fft_scale_set": 4096,
|
| 50 |
+
"primary_stem": "Drums"
|
| 51 |
+
},
|
| 52 |
+
"53c4baf4d12c3e6c3831bb8f5b532b93": {
|
| 53 |
+
"compensate": 1.043,
|
| 54 |
+
"mdx_dim_f_set": 3072,
|
| 55 |
+
"mdx_dim_t_set": 8,
|
| 56 |
+
"mdx_n_fft_scale_set": 7680,
|
| 57 |
+
"primary_stem": "Vocals"
|
| 58 |
+
},
|
| 59 |
+
"5d343409ef0df48c7d78cce9f0106781": {
|
| 60 |
+
"compensate": 1.075,
|
| 61 |
+
"mdx_dim_f_set": 3072,
|
| 62 |
+
"mdx_dim_t_set": 8,
|
| 63 |
+
"mdx_n_fft_scale_set": 7680,
|
| 64 |
+
"primary_stem": "Vocals"
|
| 65 |
+
},
|
| 66 |
+
"5f6483271e1efb9bfb59e4a3e6d4d098": {
|
| 67 |
+
"compensate": 1.035,
|
| 68 |
+
"mdx_dim_f_set": 2048,
|
| 69 |
+
"mdx_dim_t_set": 9,
|
| 70 |
+
"mdx_n_fft_scale_set": 6144,
|
| 71 |
+
"primary_stem": "Vocals"
|
| 72 |
+
},
|
| 73 |
+
"65ab5919372a128e4167f5e01a8fda85": {
|
| 74 |
+
"compensate": 1.035,
|
| 75 |
+
"mdx_dim_f_set": 2048,
|
| 76 |
+
"mdx_dim_t_set": 8,
|
| 77 |
+
"mdx_n_fft_scale_set": 8192,
|
| 78 |
+
"primary_stem": "Other"
|
| 79 |
+
},
|
| 80 |
+
"6703e39f36f18aa7855ee1047765621d": {
|
| 81 |
+
"compensate": 1.035,
|
| 82 |
+
"mdx_dim_f_set": 2048,
|
| 83 |
+
"mdx_dim_t_set": 9,
|
| 84 |
+
"mdx_n_fft_scale_set": 16384,
|
| 85 |
+
"primary_stem": "Bass"
|
| 86 |
+
},
|
| 87 |
+
"6b31de20e84392859a3d09d43f089515": {
|
| 88 |
+
"compensate": 1.035,
|
| 89 |
+
"mdx_dim_f_set": 2048,
|
| 90 |
+
"mdx_dim_t_set": 8,
|
| 91 |
+
"mdx_n_fft_scale_set": 6144,
|
| 92 |
+
"primary_stem": "Vocals"
|
| 93 |
+
},
|
| 94 |
+
"867595e9de46f6ab699008295df62798": {
|
| 95 |
+
"compensate": 1.03,
|
| 96 |
+
"mdx_dim_f_set": 3072,
|
| 97 |
+
"mdx_dim_t_set": 8,
|
| 98 |
+
"mdx_n_fft_scale_set": 7680,
|
| 99 |
+
"primary_stem": "Vocals"
|
| 100 |
+
},
|
| 101 |
+
"a3cd63058945e777505c01d2507daf37": {
|
| 102 |
+
"compensate": 1.03,
|
| 103 |
+
"mdx_dim_f_set": 2048,
|
| 104 |
+
"mdx_dim_t_set": 8,
|
| 105 |
+
"mdx_n_fft_scale_set": 6144,
|
| 106 |
+
"primary_stem": "Vocals"
|
| 107 |
+
},
|
| 108 |
+
"b33d9b3950b6cbf5fe90a32608924700": {
|
| 109 |
+
"compensate": 1.03,
|
| 110 |
+
"mdx_dim_f_set": 3072,
|
| 111 |
+
"mdx_dim_t_set": 8,
|
| 112 |
+
"mdx_n_fft_scale_set": 7680,
|
| 113 |
+
"primary_stem": "Vocals"
|
| 114 |
+
},
|
| 115 |
+
"c3b29bdce8c4fa17ec609e16220330ab": {
|
| 116 |
+
"compensate": 1.035,
|
| 117 |
+
"mdx_dim_f_set": 2048,
|
| 118 |
+
"mdx_dim_t_set": 8,
|
| 119 |
+
"mdx_n_fft_scale_set": 16384,
|
| 120 |
+
"primary_stem": "Bass"
|
| 121 |
+
},
|
| 122 |
+
"ceed671467c1f64ebdfac8a2490d0d52": {
|
| 123 |
+
"compensate": 1.035,
|
| 124 |
+
"mdx_dim_f_set": 3072,
|
| 125 |
+
"mdx_dim_t_set": 8,
|
| 126 |
+
"mdx_n_fft_scale_set": 7680,
|
| 127 |
+
"primary_stem": "Instrumental"
|
| 128 |
+
},
|
| 129 |
+
"d2a1376f310e4f7fa37fb9b5774eb701": {
|
| 130 |
+
"compensate": 1.035,
|
| 131 |
+
"mdx_dim_f_set": 3072,
|
| 132 |
+
"mdx_dim_t_set": 8,
|
| 133 |
+
"mdx_n_fft_scale_set": 7680,
|
| 134 |
+
"primary_stem": "Instrumental"
|
| 135 |
+
},
|
| 136 |
+
"d7bff498db9324db933d913388cba6be": {
|
| 137 |
+
"compensate": 1.035,
|
| 138 |
+
"mdx_dim_f_set": 2048,
|
| 139 |
+
"mdx_dim_t_set": 8,
|
| 140 |
+
"mdx_n_fft_scale_set": 6144,
|
| 141 |
+
"primary_stem": "Vocals"
|
| 142 |
+
},
|
| 143 |
+
"d94058f8c7f1fae4164868ae8ae66b20": {
|
| 144 |
+
"compensate": 1.035,
|
| 145 |
+
"mdx_dim_f_set": 2048,
|
| 146 |
+
"mdx_dim_t_set": 8,
|
| 147 |
+
"mdx_n_fft_scale_set": 6144,
|
| 148 |
+
"primary_stem": "Vocals"
|
| 149 |
+
},
|
| 150 |
+
"dc41ede5961d50f277eb846db17f5319": {
|
| 151 |
+
"compensate": 1.035,
|
| 152 |
+
"mdx_dim_f_set": 2048,
|
| 153 |
+
"mdx_dim_t_set": 9,
|
| 154 |
+
"mdx_n_fft_scale_set": 4096,
|
| 155 |
+
"primary_stem": "Drums"
|
| 156 |
+
},
|
| 157 |
+
"e5572e58abf111f80d8241d2e44e7fa4": {
|
| 158 |
+
"compensate": 1.028,
|
| 159 |
+
"mdx_dim_f_set": 3072,
|
| 160 |
+
"mdx_dim_t_set": 8,
|
| 161 |
+
"mdx_n_fft_scale_set": 7680,
|
| 162 |
+
"primary_stem": "Instrumental"
|
| 163 |
+
},
|
| 164 |
+
"e7324c873b1f615c35c1967f912db92a": {
|
| 165 |
+
"compensate": 1.03,
|
| 166 |
+
"mdx_dim_f_set": 3072,
|
| 167 |
+
"mdx_dim_t_set": 8,
|
| 168 |
+
"mdx_n_fft_scale_set": 7680,
|
| 169 |
+
"primary_stem": "Vocals"
|
| 170 |
+
},
|
| 171 |
+
"1c56ec0224f1d559c42fd6fd2a67b154": {
|
| 172 |
+
"compensate": 1.025,
|
| 173 |
+
"mdx_dim_f_set": 2048,
|
| 174 |
+
"mdx_dim_t_set": 8,
|
| 175 |
+
"mdx_n_fft_scale_set": 5120,
|
| 176 |
+
"primary_stem": "Instrumental"
|
| 177 |
+
},
|
| 178 |
+
"f2df6d6863d8f435436d8b561594ff49": {
|
| 179 |
+
"compensate": 1.035,
|
| 180 |
+
"mdx_dim_f_set": 3072,
|
| 181 |
+
"mdx_dim_t_set": 8,
|
| 182 |
+
"mdx_n_fft_scale_set": 7680,
|
| 183 |
+
"primary_stem": "Instrumental"
|
| 184 |
+
},
|
| 185 |
+
"b06327a00d5e5fbc7d96e1781bbdb596": {
|
| 186 |
+
"compensate": 1.035,
|
| 187 |
+
"mdx_dim_f_set": 3072,
|
| 188 |
+
"mdx_dim_t_set": 8,
|
| 189 |
+
"mdx_n_fft_scale_set": 6144,
|
| 190 |
+
"primary_stem": "Instrumental"
|
| 191 |
+
},
|
| 192 |
+
"94ff780b977d3ca07c7a343dab2e25dd": {
|
| 193 |
+
"compensate": 1.039,
|
| 194 |
+
"mdx_dim_f_set": 3072,
|
| 195 |
+
"mdx_dim_t_set": 8,
|
| 196 |
+
"mdx_n_fft_scale_set": 6144,
|
| 197 |
+
"primary_stem": "Instrumental"
|
| 198 |
+
},
|
| 199 |
+
"73492b58195c3b52d34590d5474452f6": {
|
| 200 |
+
"compensate": 1.043,
|
| 201 |
+
"mdx_dim_f_set": 3072,
|
| 202 |
+
"mdx_dim_t_set": 8,
|
| 203 |
+
"mdx_n_fft_scale_set": 7680,
|
| 204 |
+
"primary_stem": "Vocals"
|
| 205 |
+
},
|
| 206 |
+
"970b3f9492014d18fefeedfe4773cb42": {
|
| 207 |
+
"compensate": 1.009,
|
| 208 |
+
"mdx_dim_f_set": 3072,
|
| 209 |
+
"mdx_dim_t_set": 8,
|
| 210 |
+
"mdx_n_fft_scale_set": 7680,
|
| 211 |
+
"primary_stem": "Vocals"
|
| 212 |
+
},
|
| 213 |
+
"1d64a6d2c30f709b8c9b4ce1366d96ee": {
|
| 214 |
+
"compensate": 1.065,
|
| 215 |
+
"mdx_dim_f_set": 2048,
|
| 216 |
+
"mdx_dim_t_set": 8,
|
| 217 |
+
"mdx_n_fft_scale_set": 5120,
|
| 218 |
+
"primary_stem": "Instrumental",
|
| 219 |
+
"is_karaoke": true
|
| 220 |
+
},
|
| 221 |
+
"203f2a3955221b64df85a41af87cf8f0": {
|
| 222 |
+
"compensate": 1.035,
|
| 223 |
+
"mdx_dim_f_set": 3072,
|
| 224 |
+
"mdx_dim_t_set": 8,
|
| 225 |
+
"mdx_n_fft_scale_set": 6144,
|
| 226 |
+
"primary_stem": "Instrumental"
|
| 227 |
+
},
|
| 228 |
+
"291c2049608edb52648b96e27eb80e95": {
|
| 229 |
+
"compensate": 1.035,
|
| 230 |
+
"mdx_dim_f_set": 3072,
|
| 231 |
+
"mdx_dim_t_set": 8,
|
| 232 |
+
"mdx_n_fft_scale_set": 6144,
|
| 233 |
+
"primary_stem": "Instrumental"
|
| 234 |
+
},
|
| 235 |
+
"ead8d05dab12ec571d67549b3aab03fc": {
|
| 236 |
+
"compensate": 1.035,
|
| 237 |
+
"mdx_dim_f_set": 3072,
|
| 238 |
+
"mdx_dim_t_set": 8,
|
| 239 |
+
"mdx_n_fft_scale_set": 6144,
|
| 240 |
+
"primary_stem": "Instrumental"
|
| 241 |
+
},
|
| 242 |
+
"cc63408db3d80b4d85b0287d1d7c9632": {
|
| 243 |
+
"compensate": 1.033,
|
| 244 |
+
"mdx_dim_f_set": 3072,
|
| 245 |
+
"mdx_dim_t_set": 8,
|
| 246 |
+
"mdx_n_fft_scale_set": 6144,
|
| 247 |
+
"primary_stem": "Instrumental"
|
| 248 |
+
},
|
| 249 |
+
"cd5b2989ad863f116c855db1dfe24e39": {
|
| 250 |
+
"compensate": 1.035,
|
| 251 |
+
"mdx_dim_f_set": 3072,
|
| 252 |
+
"mdx_dim_t_set": 9,
|
| 253 |
+
"mdx_n_fft_scale_set": 6144,
|
| 254 |
+
"primary_stem": "Reverb"
|
| 255 |
+
},
|
| 256 |
+
"55657dd70583b0fedfba5f67df11d711": {
|
| 257 |
+
"compensate": 1.022,
|
| 258 |
+
"mdx_dim_f_set": 3072,
|
| 259 |
+
"mdx_dim_t_set": 8,
|
| 260 |
+
"mdx_n_fft_scale_set": 6144,
|
| 261 |
+
"primary_stem": "Instrumental"
|
| 262 |
+
},
|
| 263 |
+
"b6bccda408a436db8500083ef3491e8b": {
|
| 264 |
+
"compensate": 1.02,
|
| 265 |
+
"mdx_dim_f_set": 3072,
|
| 266 |
+
"mdx_dim_t_set": 8,
|
| 267 |
+
"mdx_n_fft_scale_set": 7680,
|
| 268 |
+
"primary_stem": "Instrumental"
|
| 269 |
+
},
|
| 270 |
+
"8a88db95c7fb5dbe6a095ff2ffb428b1": {
|
| 271 |
+
"compensate": 1.026,
|
| 272 |
+
"mdx_dim_f_set": 2048,
|
| 273 |
+
"mdx_dim_t_set": 8,
|
| 274 |
+
"mdx_n_fft_scale_set": 5120,
|
| 275 |
+
"primary_stem": "Instrumental"
|
| 276 |
+
},
|
| 277 |
+
"b78da4afc6512f98e4756f5977f5c6b9": {
|
| 278 |
+
"compensate": 1.021,
|
| 279 |
+
"mdx_dim_f_set": 3072,
|
| 280 |
+
"mdx_dim_t_set": 8,
|
| 281 |
+
"mdx_n_fft_scale_set": 7680,
|
| 282 |
+
"primary_stem": "Instrumental"
|
| 283 |
+
},
|
| 284 |
+
"77d07b2667ddf05b9e3175941b4454a0": {
|
| 285 |
+
"compensate": 1.021,
|
| 286 |
+
"mdx_dim_f_set": 3072,
|
| 287 |
+
"mdx_dim_t_set": 8,
|
| 288 |
+
"mdx_n_fft_scale_set": 7680,
|
| 289 |
+
"primary_stem": "Vocals"
|
| 290 |
+
},
|
| 291 |
+
"0f2a6bc5b49d87d64728ee40e23bceb1": {
|
| 292 |
+
"compensate": 1.019,
|
| 293 |
+
"mdx_dim_f_set": 2560,
|
| 294 |
+
"mdx_dim_t_set": 8,
|
| 295 |
+
"mdx_n_fft_scale_set": 5120,
|
| 296 |
+
"primary_stem": "Instrumental"
|
| 297 |
+
},
|
| 298 |
+
"cb790d0c913647ced70fc6b38f5bea1a": {
|
| 299 |
+
"compensate": 1.01,
|
| 300 |
+
"mdx_dim_f_set": 2560,
|
| 301 |
+
"mdx_dim_t_set": 8,
|
| 302 |
+
"mdx_n_fft_scale_set": 5120,
|
| 303 |
+
"primary_stem": "Instrumental"
|
| 304 |
+
},
|
| 305 |
+
"b02be2d198d4968a121030cf8950b492": {
|
| 306 |
+
"compensate": 1.02,
|
| 307 |
+
"mdx_dim_f_set": 2560,
|
| 308 |
+
"mdx_dim_t_set": 8,
|
| 309 |
+
"mdx_n_fft_scale_set": 5120,
|
| 310 |
+
"primary_stem": "No Crowd"
|
| 311 |
+
},
|
| 312 |
+
"2154254ee89b2945b97a7efed6e88820": {
|
| 313 |
+
"config_yaml": "model_2_stem_061321.yaml"
|
| 314 |
+
},
|
| 315 |
+
"063aadd735d58150722926dcbf5852a9": {
|
| 316 |
+
"config_yaml": "model_2_stem_061321.yaml"
|
| 317 |
+
},
|
| 318 |
+
"c09f714d978b41d718facfe3427e6001": {
|
| 319 |
+
"config_yaml": "model_2_stem_061321.yaml"
|
| 320 |
+
},
|
| 321 |
+
"fe96801369f6a148df2720f5ced88c19": {
|
| 322 |
+
"config_yaml": "model3.yaml"
|
| 323 |
+
},
|
| 324 |
+
"02e8b226f85fb566e5db894b9931c640": {
|
| 325 |
+
"config_yaml": "model2.yaml"
|
| 326 |
+
},
|
| 327 |
+
"e3de6d861635ab9c1d766149edd680d6": {
|
| 328 |
+
"config_yaml": "model1.yaml"
|
| 329 |
+
},
|
| 330 |
+
"3f2936c554ab73ce2e396d54636bd373": {
|
| 331 |
+
"config_yaml": "modelB.yaml"
|
| 332 |
+
},
|
| 333 |
+
"890d0f6f82d7574bca741a9e8bcb8168": {
|
| 334 |
+
"config_yaml": "modelB.yaml"
|
| 335 |
+
},
|
| 336 |
+
"63a3cb8c37c474681049be4ad1ba8815": {
|
| 337 |
+
"config_yaml": "modelB.yaml"
|
| 338 |
+
},
|
| 339 |
+
"a7fc5d719743c7fd6b61bd2b4d48b9f0": {
|
| 340 |
+
"config_yaml": "modelA.yaml"
|
| 341 |
+
},
|
| 342 |
+
"3567f3dee6e77bf366fcb1c7b8bc3745": {
|
| 343 |
+
"config_yaml": "modelA.yaml"
|
| 344 |
+
},
|
| 345 |
+
"a28f4d717bd0d34cd2ff7a3b0a3d065e": {
|
| 346 |
+
"config_yaml": "modelA.yaml"
|
| 347 |
+
},
|
| 348 |
+
"c9971a18da20911822593dc81caa8be9": {
|
| 349 |
+
"config_yaml": "sndfx.yaml"
|
| 350 |
+
},
|
| 351 |
+
"57d94d5ed705460d21c75a5ac829a605": {
|
| 352 |
+
"config_yaml": "sndfx.yaml"
|
| 353 |
+
},
|
| 354 |
+
"e7a25f8764f25a52c1b96c4946e66ba2": {
|
| 355 |
+
"config_yaml": "sndfx.yaml"
|
| 356 |
+
},
|
| 357 |
+
"104081d24e37217086ce5fde09147ee1": {
|
| 358 |
+
"config_yaml": "model_2_stem_061321.yaml"
|
| 359 |
+
},
|
| 360 |
+
"1e6165b601539f38d0a9330f3facffeb": {
|
| 361 |
+
"config_yaml": "model_2_stem_061321.yaml"
|
| 362 |
+
},
|
| 363 |
+
"fe0108464ce0d8271be5ab810891bd7c": {
|
| 364 |
+
"config_yaml": "model_2_stem_full_band.yaml"
|
| 365 |
+
},
|
| 366 |
+
"e9b82ec90ee56c507a3a982f1555714c": {
|
| 367 |
+
"config_yaml": "model_2_stem_full_band_2.yaml"
|
| 368 |
+
},
|
| 369 |
+
"99b6ceaae542265a3b6d657bf9fde79f": {
|
| 370 |
+
"config_yaml": "model_2_stem_full_band_8k.yaml"
|
| 371 |
+
},
|
| 372 |
+
"116f6f9dabb907b53d847ed9f7a9475f": {
|
| 373 |
+
"config_yaml": "model_2_stem_full_band_8k.yaml"
|
| 374 |
+
},
|
| 375 |
+
"53f707017bfcbb56f5e1bfac420d6732": {
|
| 376 |
+
"config_yaml": "model_bs_roformer_ep_317_sdr_12.9755.yaml",
|
| 377 |
+
"is_roformer": true
|
| 378 |
+
},
|
| 379 |
+
"63e41acc264bf681a73aa9f7e5f606cc": {
|
| 380 |
+
"config_yaml": "model_mel_band_roformer_ep_3005_sdr_11.4360.yaml",
|
| 381 |
+
"is_roformer": true
|
| 382 |
+
},
|
| 383 |
+
"e733736763234047587931fc35322fd9": {
|
| 384 |
+
"config_yaml": "model_bs_roformer_ep_937_sdr_10.5309.yaml",
|
| 385 |
+
"is_roformer": true
|
| 386 |
+
},
|
| 387 |
+
"d7a256bee3e7c620f554bceaab2f68f6": {
|
| 388 |
+
"config_yaml": "config_melbandroformer_inst.yaml",
|
| 389 |
+
"is_roformer": true
|
| 390 |
+
},
|
| 391 |
+
"365ccfa0e04b31ac2e24bbb935142a81": {
|
| 392 |
+
"config_yaml": "config_melbandroformer_inst.yaml",
|
| 393 |
+
"is_roformer": true
|
| 394 |
+
},
|
| 395 |
+
"3c15abf122d8eccc4a0eb97bf84a3e58": {
|
| 396 |
+
"config_yaml": "config_melbandroformer_instvoc_duality.yaml",
|
| 397 |
+
"is_roformer": true
|
| 398 |
+
},
|
| 399 |
+
"9fb197af219c5172ea38703a33aceb79": {
|
| 400 |
+
"config_yaml": "config_melbandroformer_instvoc_duality.yaml",
|
| 401 |
+
"is_roformer": true
|
| 402 |
+
},
|
| 403 |
+
"d789065adfd747d6f585b27b495bcdae": {
|
| 404 |
+
"config_yaml": "model_bs_roformer_ep_368_sdr_12.9628.yaml",
|
| 405 |
+
"is_roformer": true
|
| 406 |
+
},
|
| 407 |
+
"e4ca75912fcff3224a19058e55facfbf": {
|
| 408 |
+
"config_yaml": "config_vocals_mel_band_roformer_kim.yaml",
|
| 409 |
+
"is_roformer": true
|
| 410 |
+
},
|
| 411 |
+
"951f8ef420a941a395a9919f5d55cce9": {
|
| 412 |
+
"config_yaml": "config_melbandroformer_inst_v2.yaml",
|
| 413 |
+
"is_roformer": true
|
| 414 |
+
},
|
| 415 |
+
"8d306349be961bd1de6b02ee598920b3": {
|
| 416 |
+
"config_yaml": "config_mel_band_roformer_karaoke.yaml",
|
| 417 |
+
"is_roformer": true,
|
| 418 |
+
"model_type": "MelBand-Roformer",
|
| 419 |
+
"is_karaoke": true
|
| 420 |
+
},
|
| 421 |
+
"b1e6014529f71b38dae30aec978c8eb4": {
|
| 422 |
+
"config_yaml": "aufr33-jarredou_DrumSep_model_mdx23c_ep_141_sdr_10.8059.yaml",
|
| 423 |
+
"is_roformer": false,
|
| 424 |
+
"model_type": "MDX23C",
|
| 425 |
+
"is_karaoke": false
|
| 426 |
+
},
|
| 427 |
+
"490993ba65cd8b61a97a7c61650ddb7d": {
|
| 428 |
+
"config_yaml": "BS_Inst_EXP_VRL.yaml",
|
| 429 |
+
"is_roformer": true,
|
| 430 |
+
"model_type": "BS-Roformer v2",
|
| 431 |
+
"is_karaoke": false
|
| 432 |
+
},
|
| 433 |
+
"cfa9cee352938476c31e92656f1b9074": {
|
| 434 |
+
"config_yaml": "config_dnr_bandit_v2_mus64.yaml",
|
| 435 |
+
"is_roformer": true,
|
| 436 |
+
"model_type": "Bandit v2",
|
| 437 |
+
"is_karaoke": false
|
| 438 |
+
},
|
| 439 |
+
"8318a54fe1278ddcf78aad32145c0a6f": {
|
| 440 |
+
"config_yaml": "deverb_bs_roformer_8_256dim_8depth.yaml",
|
| 441 |
+
"is_roformer": true,
|
| 442 |
+
"model_type": "BS-Roformer",
|
| 443 |
+
"is_karaoke": false
|
| 444 |
+
},
|
| 445 |
+
"3bff56e6709357854e71cb2e7802733a": {
|
| 446 |
+
"config_yaml": "config_dnr_bandit_bsrnn_multi_mus64.yaml",
|
| 447 |
+
"is_roformer": true,
|
| 448 |
+
"model_type": "Bandit",
|
| 449 |
+
"is_karaoke": false
|
| 450 |
+
},
|
| 451 |
+
"c7500d7fdb1c0fc24b14b698515462d2": {
|
| 452 |
+
"config_yaml": "config_mdx23c_similarity.yaml",
|
| 453 |
+
"is_roformer": false,
|
| 454 |
+
"model_type": "MDX23C",
|
| 455 |
+
"is_karaoke": false
|
| 456 |
+
},
|
| 457 |
+
"77dd1942c0feb5c04ad0b4effa34fbc6": {
|
| 458 |
+
"config_yaml": "config_musdb18_scnet_large.yaml",
|
| 459 |
+
"is_roformer": true,
|
| 460 |
+
"model_type": "SCNet",
|
| 461 |
+
"is_karaoke": false
|
| 462 |
+
},
|
| 463 |
+
"6e749f5803b8baa7988a10393bb96df1": {
|
| 464 |
+
"config_yaml": "config_musdb18_scnet.yaml",
|
| 465 |
+
"is_roformer": true,
|
| 466 |
+
"model_type": "SCNet",
|
| 467 |
+
"is_karaoke": false
|
| 468 |
+
},
|
| 469 |
+
"bb845b04a3cb0d7a32cdb1f2ce1e007c": {
|
| 470 |
+
"config_yaml": "config_musdb18_scnet_large_starrytong.yaml",
|
| 471 |
+
"is_roformer": true,
|
| 472 |
+
"model_type": "SCNet",
|
| 473 |
+
"is_karaoke": false
|
| 474 |
+
},
|
| 475 |
+
"85b1955d57c0719e94e832eaaeceb0a8": {
|
| 476 |
+
"config_yaml": "config_musdb18_scnet_xl.yaml",
|
| 477 |
+
"is_roformer": true,
|
| 478 |
+
"model_type": "SCNet",
|
| 479 |
+
"is_karaoke": false
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
}
|
vocal_separator/vr_model_data.json
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"0d0e6d143046b0eecc41a22e60224582": {
|
| 3 |
+
"vr_model_param": "3band_44100_mid",
|
| 4 |
+
"primary_stem": "Instrumental"
|
| 5 |
+
},
|
| 6 |
+
"18b52f873021a0af556fb4ecd552bb8e": {
|
| 7 |
+
"vr_model_param": "2band_32000",
|
| 8 |
+
"primary_stem": "Instrumental"
|
| 9 |
+
},
|
| 10 |
+
"1fc66027c82b499c7d8f55f79e64cadc": {
|
| 11 |
+
"vr_model_param": "2band_32000",
|
| 12 |
+
"primary_stem": "Instrumental"
|
| 13 |
+
},
|
| 14 |
+
"2aa34fbc01f8e6d2bf509726481e7142": {
|
| 15 |
+
"vr_model_param": "4band_44100",
|
| 16 |
+
"primary_stem": "No Piano"
|
| 17 |
+
},
|
| 18 |
+
"3e18f639b11abea7361db1a4a91c2559": {
|
| 19 |
+
"vr_model_param": "4band_44100",
|
| 20 |
+
"primary_stem": "Instrumental"
|
| 21 |
+
},
|
| 22 |
+
"570b5f50054609a17741369a35007ddd": {
|
| 23 |
+
"vr_model_param": "4band_v3",
|
| 24 |
+
"primary_stem": "Instrumental"
|
| 25 |
+
},
|
| 26 |
+
"5a6e24c1b530f2dab045a522ef89b751": {
|
| 27 |
+
"vr_model_param": "1band_sr44100_hl512",
|
| 28 |
+
"primary_stem": "Instrumental"
|
| 29 |
+
},
|
| 30 |
+
"6b5916069a49be3fe29d4397ecfd73fa": {
|
| 31 |
+
"vr_model_param": "3band_44100_msb2",
|
| 32 |
+
"primary_stem": "Instrumental",
|
| 33 |
+
"is_karaoke": true
|
| 34 |
+
},
|
| 35 |
+
"74b3bc5fa2b69f29baf7839b858bc679": {
|
| 36 |
+
"vr_model_param": "4band_44100",
|
| 37 |
+
"primary_stem": "Instrumental"
|
| 38 |
+
},
|
| 39 |
+
"827213b316df36b52a1f3d04fec89369": {
|
| 40 |
+
"vr_model_param": "4band_44100",
|
| 41 |
+
"primary_stem": "Instrumental"
|
| 42 |
+
},
|
| 43 |
+
"911d4048eee7223eca4ee0efb7d29256": {
|
| 44 |
+
"vr_model_param": "4band_44100",
|
| 45 |
+
"primary_stem": "Vocals"
|
| 46 |
+
},
|
| 47 |
+
"941f3f7f0b0341f12087aacdfef644b1": {
|
| 48 |
+
"vr_model_param": "4band_v2",
|
| 49 |
+
"primary_stem": "Instrumental"
|
| 50 |
+
},
|
| 51 |
+
"a02827cf69d75781a35c0e8a327f3195": {
|
| 52 |
+
"vr_model_param": "1band_sr33075_hl384",
|
| 53 |
+
"primary_stem": "Instrumental"
|
| 54 |
+
},
|
| 55 |
+
"b165fbff113c959dba5303b74c6484bc": {
|
| 56 |
+
"vr_model_param": "3band_44100",
|
| 57 |
+
"primary_stem": "Instrumental"
|
| 58 |
+
},
|
| 59 |
+
"b5f988cd3e891dca7253bf5f0f3427c7": {
|
| 60 |
+
"vr_model_param": "4band_44100",
|
| 61 |
+
"primary_stem": "Instrumental"
|
| 62 |
+
},
|
| 63 |
+
"b99c35723bc35cb11ed14a4780006a80": {
|
| 64 |
+
"vr_model_param": "1band_sr44100_hl1024",
|
| 65 |
+
"primary_stem": "Instrumental"
|
| 66 |
+
},
|
| 67 |
+
"ba02fd25b71d620eebbdb49e18e4c336": {
|
| 68 |
+
"vr_model_param": "3band_44100_mid",
|
| 69 |
+
"primary_stem": "Instrumental"
|
| 70 |
+
},
|
| 71 |
+
"c4476ef424d8cba65f38d8d04e8514e2": {
|
| 72 |
+
"vr_model_param": "3band_44100_msb2",
|
| 73 |
+
"primary_stem": "Instrumental"
|
| 74 |
+
},
|
| 75 |
+
"da2d37b8be2972e550a409bae08335aa": {
|
| 76 |
+
"vr_model_param": "4band_44100",
|
| 77 |
+
"primary_stem": "Vocals"
|
| 78 |
+
},
|
| 79 |
+
"db57205d3133e39df8e050b435a78c80": {
|
| 80 |
+
"vr_model_param": "4band_44100",
|
| 81 |
+
"primary_stem": "Instrumental"
|
| 82 |
+
},
|
| 83 |
+
"ea83b08e32ec2303456fe50659035f69": {
|
| 84 |
+
"vr_model_param": "4band_v3",
|
| 85 |
+
"primary_stem": "Instrumental"
|
| 86 |
+
},
|
| 87 |
+
"f6ea8473ff86017b5ebd586ccacf156b": {
|
| 88 |
+
"vr_model_param": "4band_v2_sn",
|
| 89 |
+
"primary_stem": "Instrumental",
|
| 90 |
+
"is_karaoke": true
|
| 91 |
+
},
|
| 92 |
+
"fd297a61eafc9d829033f8b987c39a3d": {
|
| 93 |
+
"vr_model_param": "1band_sr32000_hl512",
|
| 94 |
+
"primary_stem": "Instrumental"
|
| 95 |
+
},
|
| 96 |
+
"0ec76fd9e65f81d8b4fbd13af4826ed8": {
|
| 97 |
+
"vr_model_param": "4band_v3",
|
| 98 |
+
"primary_stem": "No Woodwinds"
|
| 99 |
+
},
|
| 100 |
+
"0fb9249ffe4ffc38d7b16243f394c0ff": {
|
| 101 |
+
"vr_model_param": "4band_v3",
|
| 102 |
+
"primary_stem": "No Reverb"
|
| 103 |
+
},
|
| 104 |
+
"6857b2972e1754913aad0c9a1678c753": {
|
| 105 |
+
"vr_model_param": "4band_v3",
|
| 106 |
+
"primary_stem": "No Echo",
|
| 107 |
+
"nout": 48,
|
| 108 |
+
"nout_lstm": 128
|
| 109 |
+
},
|
| 110 |
+
"f200a145434efc7dcf0cd093f517ed52": {
|
| 111 |
+
"vr_model_param": "4band_v3",
|
| 112 |
+
"primary_stem": "No Echo",
|
| 113 |
+
"nout": 48,
|
| 114 |
+
"nout_lstm": 128
|
| 115 |
+
},
|
| 116 |
+
"44c55d8b5d2e3edea98c2b2bf93071c7": {
|
| 117 |
+
"vr_model_param": "4band_v3",
|
| 118 |
+
"primary_stem": "Noise",
|
| 119 |
+
"nout": 48,
|
| 120 |
+
"nout_lstm": 128
|
| 121 |
+
},
|
| 122 |
+
"51ea8c43a6928ed3c10ef5cb2707d57b": {
|
| 123 |
+
"vr_model_param": "1band_sr44100_hl1024",
|
| 124 |
+
"primary_stem": "Noise",
|
| 125 |
+
"nout": 16,
|
| 126 |
+
"nout_lstm": 128
|
| 127 |
+
},
|
| 128 |
+
"944950a9c5963a5eb70b445d67b7068a": {
|
| 129 |
+
"vr_model_param": "4band_v3_sn",
|
| 130 |
+
"primary_stem": "Vocals",
|
| 131 |
+
"nout": 64,
|
| 132 |
+
"nout_lstm": 128,
|
| 133 |
+
"is_karaoke": false,
|
| 134 |
+
"is_bv_model": true,
|
| 135 |
+
"is_bv_model_rebalanced": 0.9
|
| 136 |
+
}
|
| 137 |
+
}
|