| { |
| "schemaVersion": "1.0.0", |
| "organization": "schismaudio", |
| "models": [ |
| { |
| "expertId": "melroformer-full", |
| "expertIds": [ |
| "melroformer-full" |
| ], |
| "name": "MelRoFormer Full", |
| "repo": "schismaudio/melroformer-full", |
| "version": "1.0.0", |
| "tasks": [ |
| "vocalSeparation" |
| ], |
| "qualityTier": "high", |
| "estimatedRTF": 0.6, |
| "peakMemoryMB": 850, |
| "inputKind": "mix", |
| "outputKind": "stem", |
| "description": "Full Mel-Band RoFormer (~72M params) for high-quality vocal separation", |
| "fileSizeMB": 436 |
| }, |
| { |
| "expertId": "melroformer-lightweight", |
| "expertIds": [ |
| "melroformer-lightweight" |
| ], |
| "name": "MelRoFormer Lightweight", |
| "repo": "schismaudio/melroformer-lightweight", |
| "version": "1.0.0", |
| "tasks": [ |
| "vocalSeparation" |
| ], |
| "qualityTier": "medium", |
| "estimatedRTF": 0.3, |
| "peakMemoryMB": 256, |
| "inputKind": "mix", |
| "outputKind": "stem", |
| "description": "Lightweight Mel-Band RoFormer (~34M params) for fast vocal separation", |
| "fileSizeMB": 65 |
| }, |
| { |
| "expertId": "scnet-base", |
| "expertIds": [ |
| "scnet-base" |
| ], |
| "name": "SCNet Base", |
| "repo": "schismaudio/scnet-base", |
| "version": "1.0.0", |
| "tasks": [ |
| "stemSeparation", |
| "vocalSeparation" |
| ], |
| "qualityTier": "medium", |
| "estimatedRTF": 0.6, |
| "peakMemoryMB": 1024, |
| "inputKind": "mix", |
| "outputKind": "stems", |
| "description": "SCNet Base (~10M params) for 4-stem separation", |
| "outputStems": [ |
| "drums", |
| "bass", |
| "other", |
| "vocals" |
| ], |
| "fileSizeMB": 44 |
| }, |
| { |
| "expertId": "scnet-large", |
| "expertIds": [ |
| "scnet-large" |
| ], |
| "name": "SCNet Large", |
| "repo": "schismaudio/scnet-large", |
| "version": "1.0.0", |
| "tasks": [ |
| "stemSeparation", |
| "vocalSeparation" |
| ], |
| "qualityTier": "high", |
| "estimatedRTF": 0.8, |
| "peakMemoryMB": 1600, |
| "inputKind": "mix", |
| "outputKind": "stems", |
| "description": "SCNet Large (~41M params) for higher-quality 4-stem separation", |
| "outputStems": [ |
| "drums", |
| "bass", |
| "other", |
| "vocals" |
| ], |
| "fileSizeMB": 165 |
| }, |
| { |
| "expertId": "apollo", |
| "expertIds": [ |
| "apollo" |
| ], |
| "name": "Apollo Enhancement", |
| "repo": "schismaudio/apollo", |
| "version": "1.0.0", |
| "tasks": [ |
| "audioEnhancement" |
| ], |
| "qualityTier": "high", |
| "estimatedRTF": 1.6, |
| "peakMemoryMB": 128, |
| "inputKind": "any", |
| "outputKind": "any", |
| "description": "Apollo audio enhancement model for post-separation cleanup", |
| "fileSizeMB": 33 |
| }, |
| { |
| "expertId": "bsroformer-base", |
| "expertIds": [ |
| "bsroformer-base" |
| ], |
| "name": "BS-RoFormer Base", |
| "repo": "schismaudio/bsroformer-base", |
| "version": "1.0.0", |
| "tasks": [ |
| "stemSeparation", |
| "vocalSeparation" |
| ], |
| "qualityTier": "high", |
| "estimatedRTF": 1.0, |
| "peakMemoryMB": 330, |
| "inputKind": "mix", |
| "outputKind": "stems", |
| "description": "Band-Split RoFormer for high-quality 4-stem separation", |
| "outputStems": [ |
| "drums", |
| "bass", |
| "other", |
| "vocals" |
| ], |
| "fileSizeMB": 253 |
| }, |
| { |
| "expertId": "dttnet", |
| "expertIds": [ |
| "dttnet-vocals", |
| "dttnet-drums", |
| "dttnet-bass", |
| "dttnet-other" |
| ], |
| "name": "DTTNet (All Stems)", |
| "repo": "schismaudio/dttnet", |
| "version": "1.0.0", |
| "tasks": [ |
| "vocalSeparation", |
| "stemSeparation" |
| ], |
| "qualityTier": "high", |
| "estimatedRTF": 0.4, |
| "peakMemoryMB": 384, |
| "inputKind": "mix", |
| "outputKind": "stem", |
| "description": "Dual Time-Frequency Transformer per-stem models (vocals, drums, bass, other)", |
| "fileSizeMB": 75 |
| }, |
| { |
| "expertId": "htdemucs-ft", |
| "expertIds": [ |
| "htdemucs-ft-vocals", |
| "htdemucs-ft-drums", |
| "htdemucs-ft-bass", |
| "htdemucs-ft-other" |
| ], |
| "name": "HTDemucs Fine-Tuned (All Stems)", |
| "repo": "schismaudio/htdemucs-ft", |
| "version": "1.0.0", |
| "tasks": [ |
| "vocalSeparation", |
| "stemSeparation" |
| ], |
| "qualityTier": "high", |
| "estimatedRTF": 1.0, |
| "peakMemoryMB": 250, |
| "inputKind": "mix", |
| "outputKind": "stem", |
| "description": "Hybrid Transformer Demucs fine-tuned per-stem models", |
| "fileSizeMB": 821 |
| }, |
| { |
| "expertId": "htdemucs-6s", |
| "expertIds": [ |
| "htdemucs-6s" |
| ], |
| "name": "HTDemucs 6-Source", |
| "repo": "schismaudio/htdemucs-6s", |
| "version": "1.0.0", |
| "tasks": [ |
| "stemSeparation" |
| ], |
| "qualityTier": "medium", |
| "estimatedRTF": 1.0, |
| "peakMemoryMB": 500, |
| "inputKind": "mix", |
| "outputKind": "stems", |
| "description": "HTDemucs 6-source model for vocals/drums/bass/other/guitar/piano separation", |
| "outputStems": [ |
| "drums", |
| "bass", |
| "other", |
| "vocals", |
| "guitar", |
| "piano" |
| ], |
| "fileSizeMB": 146 |
| }, |
| { |
| "expertId": "banquet", |
| "expertIds": [ |
| "banquet-vocals", |
| "banquet-drums", |
| "banquet-bass", |
| "banquet-other", |
| "banquet-guitar", |
| "banquet-piano", |
| "banquet-query" |
| ], |
| "name": "Banquet (Query-Based Separation)", |
| "repo": "schismaudio/banquet", |
| "version": "1.0.0", |
| "tasks": [ |
| "vocalSeparation", |
| "stemSeparation" |
| ], |
| "qualityTier": "medium", |
| "estimatedRTF": 2.5, |
| "peakMemoryMB": 450, |
| "inputKind": "mix", |
| "outputKind": "stem", |
| "description": "Query-based multi-stem separation with PaSST encoder (6 stems + subcategories)", |
| "fileSizeMB": 322 |
| }, |
| { |
| "expertId": "audiosep", |
| "expertIds": [ |
| "audiosep-base" |
| ], |
| "name": "AudioSep (Text-Guided Separation)", |
| "repo": "schismaudio/audiosep", |
| "version": "1.0.0", |
| "tasks": [ |
| "textGuidedSeparation" |
| ], |
| "qualityTier": "medium", |
| "estimatedRTF": 0.1, |
| "peakMemoryMB": 600, |
| "inputKind": "mix", |
| "outputKind": "stem", |
| "description": "Text-guided universal sound separation \u2014 describe what to extract in natural language", |
| "fileSizeMB": 578 |
| }, |
| { |
| "expertId": "deepfilternet3", |
| "expertIds": [ |
| "deepfilternet" |
| ], |
| "name": "DeepFilterNet3 Enhancement", |
| "repo": "schismaudio/deepfilternet3", |
| "version": "1.0.0", |
| "tasks": [ |
| "audioEnhancement" |
| ], |
| "qualityTier": "medium", |
| "estimatedRTF": 0.1, |
| "peakMemoryMB": 50, |
| "inputKind": "any", |
| "outputKind": "any", |
| "description": "DeepFilterNet3 speech enhancement (~2.3M params) \u2014 ERB spectral masking + deep filtering with learned complex FIR filters", |
| "fileSizeMB": 9 |
| }, |
| { |
| "expertId": "tfctdfunet", |
| "expertIds": [ |
| "tfctdfunet-vocals", |
| "tfctdfunet-drums", |
| "tfctdfunet-bass", |
| "tfctdfunet-other" |
| ], |
| "name": "TFC-TDF-UNet v3 (MDX23)", |
| "repo": "schismaudio/tfctdfunet", |
| "version": "1.0.0", |
| "tasks": [ |
| "vocalSeparation", |
| "stemSeparation" |
| ], |
| "qualityTier": "high", |
| "estimatedRTF": 0.6, |
| "peakMemoryMB": 350, |
| "inputKind": "mix", |
| "outputKind": "stem", |
| "description": "MDX23 Model-A (TFC-TDF-UNet v3) multi-source separation with subbands", |
| "fileSizeMB": 115 |
| } |
| ] |
| } |
|
|