| { | |
| "schemaVersion": "1.0.0", | |
| "organization": "schismaudio", | |
| "models": [ | |
| { | |
| "expertId": "melroformer-full", | |
| "expertIds": [ | |
| "melroformer-full" | |
| ], | |
| "name": "MelRoFormer Full", | |
| "repo": "schismaudio/melroformer-full", | |
| "version": "1.0.0", | |
| "tasks": [ | |
| "vocalSeparation" | |
| ], | |
| "qualityTier": "high", | |
| "estimatedRTF": 0.6, | |
| "peakMemoryMB": 850, | |
| "inputKind": "mix", | |
| "outputKind": "stem", | |
| "description": "Full Mel-Band RoFormer (~72M params) for high-quality vocal separation", | |
| "fileSizeMB": 436 | |
| }, | |
| { | |
| "expertId": "melroformer-lightweight", | |
| "expertIds": [ | |
| "melroformer-lightweight" | |
| ], | |
| "name": "MelRoFormer Lightweight", | |
| "repo": "schismaudio/melroformer-lightweight", | |
| "version": "1.0.0", | |
| "tasks": [ | |
| "vocalSeparation" | |
| ], | |
| "qualityTier": "medium", | |
| "estimatedRTF": 0.3, | |
| "peakMemoryMB": 256, | |
| "inputKind": "mix", | |
| "outputKind": "stem", | |
| "description": "Lightweight Mel-Band RoFormer (~34M params) for fast vocal separation", | |
| "fileSizeMB": 65 | |
| }, | |
| { | |
| "expertId": "scnet-base", | |
| "expertIds": [ | |
| "scnet-base" | |
| ], | |
| "name": "SCNet Base", | |
| "repo": "schismaudio/scnet-base", | |
| "version": "1.0.0", | |
| "tasks": [ | |
| "stemSeparation", | |
| "vocalSeparation" | |
| ], | |
| "qualityTier": "medium", | |
| "estimatedRTF": 0.6, | |
| "peakMemoryMB": 1024, | |
| "inputKind": "mix", | |
| "outputKind": "stems", | |
| "description": "SCNet Base (~10M params) for 4-stem separation", | |
| "outputStems": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals" | |
| ], | |
| "fileSizeMB": 44 | |
| }, | |
| { | |
| "expertId": "scnet-large", | |
| "expertIds": [ | |
| "scnet-large" | |
| ], | |
| "name": "SCNet Large", | |
| "repo": "schismaudio/scnet-large", | |
| "version": "1.0.0", | |
| "tasks": [ | |
| "stemSeparation", | |
| "vocalSeparation" | |
| ], | |
| "qualityTier": "high", | |
| "estimatedRTF": 0.8, | |
| "peakMemoryMB": 1600, | |
| "inputKind": "mix", | |
| "outputKind": "stems", | |
| "description": "SCNet Large (~41M params) for higher-quality 4-stem separation", | |
| "outputStems": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals" | |
| ], | |
| "fileSizeMB": 165 | |
| }, | |
| { | |
| "expertId": "apollo", | |
| "expertIds": [ | |
| "apollo" | |
| ], | |
| "name": "Apollo Enhancement", | |
| "repo": "schismaudio/apollo", | |
| "version": "1.0.0", | |
| "tasks": [ | |
| "audioEnhancement" | |
| ], | |
| "qualityTier": "high", | |
| "estimatedRTF": 1.6, | |
| "peakMemoryMB": 128, | |
| "inputKind": "any", | |
| "outputKind": "any", | |
| "description": "Apollo audio enhancement model for post-separation cleanup", | |
| "fileSizeMB": 33 | |
| }, | |
| { | |
| "expertId": "bsroformer-base", | |
| "expertIds": [ | |
| "bsroformer-base" | |
| ], | |
| "name": "BS-RoFormer Base", | |
| "repo": "schismaudio/bsroformer-base", | |
| "version": "1.0.0", | |
| "tasks": [ | |
| "stemSeparation", | |
| "vocalSeparation" | |
| ], | |
| "qualityTier": "high", | |
| "estimatedRTF": 1.0, | |
| "peakMemoryMB": 330, | |
| "inputKind": "mix", | |
| "outputKind": "stems", | |
| "description": "Band-Split RoFormer for high-quality 4-stem separation", | |
| "outputStems": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals" | |
| ], | |
| "fileSizeMB": 253 | |
| }, | |
| { | |
| "expertId": "dttnet", | |
| "expertIds": [ | |
| "dttnet-vocals", | |
| "dttnet-drums", | |
| "dttnet-bass", | |
| "dttnet-other" | |
| ], | |
| "name": "DTTNet (All Stems)", | |
| "repo": "schismaudio/dttnet", | |
| "version": "1.0.0", | |
| "tasks": [ | |
| "vocalSeparation", | |
| "stemSeparation" | |
| ], | |
| "qualityTier": "high", | |
| "estimatedRTF": 0.4, | |
| "peakMemoryMB": 384, | |
| "inputKind": "mix", | |
| "outputKind": "stem", | |
| "description": "Dual Time-Frequency Transformer per-stem models (vocals, drums, bass, other)", | |
| "fileSizeMB": 75 | |
| }, | |
| { | |
| "expertId": "htdemucs-ft", | |
| "expertIds": [ | |
| "htdemucs-ft-vocals", | |
| "htdemucs-ft-drums", | |
| "htdemucs-ft-bass", | |
| "htdemucs-ft-other" | |
| ], | |
| "name": "HTDemucs Fine-Tuned (All Stems)", | |
| "repo": "schismaudio/htdemucs-ft", | |
| "version": "1.0.0", | |
| "tasks": [ | |
| "vocalSeparation", | |
| "stemSeparation" | |
| ], | |
| "qualityTier": "high", | |
| "estimatedRTF": 1.0, | |
| "peakMemoryMB": 250, | |
| "inputKind": "mix", | |
| "outputKind": "stem", | |
| "description": "Hybrid Transformer Demucs fine-tuned per-stem models", | |
| "fileSizeMB": 821 | |
| }, | |
| { | |
| "expertId": "htdemucs-6s", | |
| "expertIds": [ | |
| "htdemucs-6s" | |
| ], | |
| "name": "HTDemucs 6-Source", | |
| "repo": "schismaudio/htdemucs-6s", | |
| "version": "1.0.0", | |
| "tasks": [ | |
| "stemSeparation" | |
| ], | |
| "qualityTier": "medium", | |
| "estimatedRTF": 1.0, | |
| "peakMemoryMB": 500, | |
| "inputKind": "mix", | |
| "outputKind": "stems", | |
| "description": "HTDemucs 6-source model for vocals/drums/bass/other/guitar/piano separation", | |
| "outputStems": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals", | |
| "guitar", | |
| "piano" | |
| ], | |
| "fileSizeMB": 146 | |
| }, | |
| { | |
| "expertId": "banquet", | |
| "expertIds": [ | |
| "banquet-vocals", | |
| "banquet-drums", | |
| "banquet-bass", | |
| "banquet-other", | |
| "banquet-guitar", | |
| "banquet-piano", | |
| "banquet-query" | |
| ], | |
| "name": "Banquet (Query-Based Separation)", | |
| "repo": "schismaudio/banquet", | |
| "version": "1.0.0", | |
| "tasks": [ | |
| "vocalSeparation", | |
| "stemSeparation" | |
| ], | |
| "qualityTier": "medium", | |
| "estimatedRTF": 2.5, | |
| "peakMemoryMB": 450, | |
| "inputKind": "mix", | |
| "outputKind": "stem", | |
| "description": "Query-based multi-stem separation with PaSST encoder (6 stems + subcategories)", | |
| "fileSizeMB": 322 | |
| }, | |
| { | |
| "expertId": "audiosep", | |
| "expertIds": [ | |
| "audiosep-base" | |
| ], | |
| "name": "AudioSep (Text-Guided Separation)", | |
| "repo": "schismaudio/audiosep", | |
| "version": "1.0.0", | |
| "tasks": [ | |
| "textGuidedSeparation" | |
| ], | |
| "qualityTier": "medium", | |
| "estimatedRTF": 0.1, | |
| "peakMemoryMB": 600, | |
| "inputKind": "mix", | |
| "outputKind": "stem", | |
| "description": "Text-guided universal sound separation \u2014 describe what to extract in natural language", | |
| "fileSizeMB": 578 | |
| }, | |
| { | |
| "expertId": "deepfilternet3", | |
| "expertIds": [ | |
| "deepfilternet" | |
| ], | |
| "name": "DeepFilterNet3 Enhancement", | |
| "repo": "schismaudio/deepfilternet3", | |
| "version": "1.0.0", | |
| "tasks": [ | |
| "audioEnhancement" | |
| ], | |
| "qualityTier": "medium", | |
| "estimatedRTF": 0.1, | |
| "peakMemoryMB": 50, | |
| "inputKind": "any", | |
| "outputKind": "any", | |
| "description": "DeepFilterNet3 speech enhancement (~2.3M params) \u2014 ERB spectral masking + deep filtering with learned complex FIR filters", | |
| "fileSizeMB": 9 | |
| }, | |
| { | |
| "expertId": "tfctdfunet", | |
| "expertIds": [ | |
| "tfctdfunet-vocals", | |
| "tfctdfunet-drums", | |
| "tfctdfunet-bass", | |
| "tfctdfunet-other" | |
| ], | |
| "name": "TFC-TDF-UNet v3 (MDX23)", | |
| "repo": "schismaudio/tfctdfunet", | |
| "version": "1.0.0", | |
| "tasks": [ | |
| "vocalSeparation", | |
| "stemSeparation" | |
| ], | |
| "qualityTier": "high", | |
| "estimatedRTF": 0.6, | |
| "peakMemoryMB": 350, | |
| "inputKind": "mix", | |
| "outputKind": "stem", | |
| "description": "MDX23 Model-A (TFC-TDF-UNet v3) multi-source separation with subbands", | |
| "fileSizeMB": 115 | |
| } | |
| ] | |
| } | |