Delete _mdx_c_configs

Browse files

Files changed (6) hide show

_mdx_c_configs/config_aspiration_mel_band_roformer.yaml +0 -77
_mdx_c_configs/config_mel_band_roformer_karaoke.yaml +0 -71
_mdx_c_configs/config_melbandroformer_big.yaml +0 -48
_mdx_c_configs/config_vocals_mdx23c.yaml +0 -96
_mdx_c_configs/config_vocals_mel_band_roformer_kj.yaml +0 -69
_mdx_c_configs/model_mel_band_roformer_denoise.yaml +0 -71

_mdx_c_configs/config_aspiration_mel_band_roformer.yaml DELETED Viewed

@@ -1,77 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 801 # don't work (use in model)
-  hop_length: 441 # don't work (use in model)
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 256
-  depth: 8
-  stereo: true
-  num_stems: 2
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  linear_transformer_depth: 0
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0.1
-  ff_dropout: 0.1
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 8
-  grad_clip: 0
-  instruments:
-  - aspiration
-  - other
-  lr: 4.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: null
-  num_epochs: 1000
-  num_steps: 1000
-  q: 0.95
-  coarse_loss_clip: true
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-augmentations:
-  enable: true # enable or disable all augmentations (to fast disable if needed)
-  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
-  loudness_min: 0.5
-  loudness_max: 1.5
-  mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
-  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
-    - 0.2
-    - 0.02
-  mixup_loudness_min: 0.5
-  mixup_loudness_max: 1.5
-inference:
-  batch_size: 4
-  dim_t: 1101
-  num_overlap: 2

_mdx_c_configs/config_mel_band_roformer_karaoke.yaml DELETED Viewed

@@ -1,71 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 256
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 4
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - karaoke
-  - other
-  lr: 1.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: karaoke
-  num_epochs: 1000
-  num_steps: 2000
-  augmentation: false # enable augmentations by audiomentations and pedalboard
-  augmentation_type: null
-  use_mp3_compress: false # Deprecated
-  augmentation_mix: false # Mix several stems of the same type with some probability
-  augmentation_loudness: false # randomly change loudness of each stem
-  augmentation_loudness_type: 1 # Type 1 or 2
-  augmentation_loudness_min: 0
-  augmentation_loudness_max: 0
-  q: 0.95
-  coarse_loss_clip: false
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
-inference:
-  batch_size: 1
-  dim_t: 1101
-  num_overlap: 2

_mdx_c_configs/config_melbandroformer_big.yaml DELETED Viewed

@@ -1,48 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 1101
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.0
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: true
-  dim_freqs_in: 1025
-  sample_rate: 44100
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: false
-  mask_estimator_depth: 3
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-    - 4096
-    - 2048
-    - 1024
-    - 512
-    - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: false
-training:
-  instruments:
-    - vocals
-    - other
-  target_instrument: vocals
-  use_amp: true
-inference:
-  batch_size: 1
-  dim_t: 1101
-  num_overlap: 2

_mdx_c_configs/config_vocals_mdx23c.yaml DELETED Viewed

@@ -1,96 +0,0 @@
-audio:
-  chunk_size: 261120
-  dim_f: 4096
-  dim_t: 256
-  hop_length: 1024
-  n_fft: 8192
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.001
-model:
-  act: gelu
-  bottleneck_factor: 4
-  growth: 128
-  norm: InstanceNorm
-  num_blocks_per_scale: 2
-  num_channels: 128
-  num_scales: 5
-  num_subbands: 4
-  scale:
-  - 2
-  - 2
-training:
-  batch_size: 6
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - vocals
-  - other
-  lr: 9.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: null
-  num_epochs: 1000
-  num_steps: 1000
-  q: 0.95
-  coarse_loss_clip: true
-  ema_momentum: 0.999
-  optimizer: adam
-  read_metadata_procs: 8 # Number of processes to use during metadata reading for dataset. Can speed up metadata generation
-  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-augmentations:
-  enable: true # enable or disable all augmentations (to fast disable if needed)
-  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
-  loudness_min: 0.5
-  loudness_max: 1.5
-  mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
-  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
-    - 0.2
-    - 0.02
-  mixup_loudness_min: 0.5
-  mixup_loudness_max: 1.5
-  # apply mp3 compression to mixture only (emulate downloading mp3 from internet)
-  mp3_compression_on_mixture: 0.01
-  mp3_compression_on_mixture_bitrate_min: 32
-  mp3_compression_on_mixture_bitrate_max: 320
-  mp3_compression_on_mixture_backend: "lameenc"
-  all:
-    channel_shuffle: 0.5 # Set 0 or lower to disable
-    random_inverse: 0.1 # inverse track (better lower probability)
-    random_polarity: 0.5 # polarity change (multiply waveform to -1)
-    mp3_compression: 0.01
-    mp3_compression_min_bitrate: 32
-    mp3_compression_max_bitrate: 320
-    mp3_compression_backend: "lameenc"
-  vocals:
-    pitch_shift: 0.1
-    pitch_shift_min_semitones: -5
-    pitch_shift_max_semitones: 5
-    seven_band_parametric_eq: 0.25
-    seven_band_parametric_eq_min_gain_db: -9
-    seven_band_parametric_eq_max_gain_db: 9
-    tanh_distortion: 0.1
-    tanh_distortion_min: 0.1
-    tanh_distortion_max: 0.7
-  other:
-    pitch_shift: 0.1
-    pitch_shift_min_semitones: -4
-    pitch_shift_max_semitones: 4
-    gaussian_noise: 0.1
-    gaussian_noise_min_amplitude: 0.001
-    gaussian_noise_max_amplitude: 0.015
-    time_stretch: 0.01
-    time_stretch_min_rate: 0.8
-    time_stretch_max_rate: 1.25
-inference:
-  batch_size: 1
-  dim_t: 512
-  num_overlap: 8

_mdx_c_configs/config_vocals_mel_band_roformer_kj.yaml DELETED Viewed

@@ -1,69 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 256
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.0
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: true
-  dim_freqs_in: 1025
-  sample_rate: 44100
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: false
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-    - 4096
-    - 2048
-    - 1024
-    - 512
-    - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: false
-training:
-  batch_size: 4
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-    - vocals
-    - other
-  lr: 1.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: vocals
-  num_epochs: 1000
-  num_steps: 1000
-  augmentation: false
-  augmentation_type: null
-  use_mp3_compress: false
-  augmentation_mix: false
-  augmentation_loudness: false
-  augmentation_loudness_type: 1
-  augmentation_loudness_min: 0
-  augmentation_loudness_max: 0
-  q: 0.95
-  coarse_loss_clip: false
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: true
-  use_amp: true
-inference:
-  batch_size: 4
-  dim_t: 1101
-  num_overlap: 2

_mdx_c_configs/model_mel_band_roformer_denoise.yaml DELETED Viewed

@@ -1,71 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 256
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 2
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - dry
-  - other
-  lr: 1.0e-05
-  patience: 8
-  reduce_factor: 0.95
-  target_instrument: dry
-  num_epochs: 1000
-  num_steps: 4032
-  augmentation: false # enable augmentations by audiomentations and pedalboard
-  augmentation_type: null
-  use_mp3_compress: false # Deprecated
-  augmentation_mix: false # Mix several stems of the same type with some probability
-  augmentation_loudness: false # randomly change loudness of each stem
-  augmentation_loudness_type: 1 # Type 1 or 2
-  augmentation_loudness_min: 0
-  augmentation_loudness_max: 0
-  q: 0.95
-  coarse_loss_clip: false
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
-inference:
-  batch_size: 2
-  dim_t: 1101
-  num_overlap: 4