Uploaded checkpoint + yaml

Full release of my rifforge model

Characteristics:
This is a dimension 512 depth 24 model (so fairely large file size at 1.9 GB!), with a sdr of 14.2436.
finetuned from an older Melband Roformer checkpoint with an sdr of 13.7.
Model can have some quirks (just like most models) but its all around clean for me to release.

Metrics from my own validation data :
sdr: 14.2436
l1_frequency: 21.6864
bleedless: 57.1301
fullness: 33.3656

Files changed (2) hide show

config_rifforge_full_mesk.yaml +74 -0
rifforge_full_sdr_14.2436.ckpt +3 -0

config_rifforge_full_mesk.yaml ADDED Viewed

	@@ -0,0 +1,74 @@

+audio:
+  chunk_size: 733824
+  dim_f: 1024
+  dim_t: 1665
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+model:
+  dim: 512
+  depth: 24
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+  use_torch_checkpoint: true
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - vocals
+  - other
+  lr: 1.0e-05
+  patience: 1000
+  reduce_factor: 0.95
+  target_instrument: other
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adamw8bit
+  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+  use_torch_checkpoint: true
+inference:
+  batch_size: 2
+  dim_t: 1665
+  num_overlap: 2

rifforge_full_sdr_14.2436.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22f4eb72181d43336a1437b5d85079fa678d7b39d293283db56b3cb48908d5d5
+size 2011438773