Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

README.md +10 -0
config.yaml +100 -0
config_apollo_vocals_ep_54.yaml +31 -0
epoch=54-val_loss=-17.6221.ckpt +3 -0
model_apollo_vocals_ep_54.ckpt +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+license: cc-by-nc-sa-4.0
+---
+Apollo Offical GitHub：https://github.com/JusperLee/Apollo
+Apollo is a novel music restoration method designed to address distortions and artefacts caused by audio codecs, especially at low bitrates. Operating in the frequency domain, Apollo uses a frequency band-split module, band-sequence modeling, and frequency band reconstruction to restore the audio quality of MP3-compressed music. It divides the spectrogram into sub-bands, extracts gain-shape representations, and models both sub-band and temporal information for high-quality audio recovery. Trained with a Generative Adversarial Network (GAN), Apollo outperforms existing SR-GAN models on the MUSDB18-HQ and MoisesDB datasets, excelling in complex multi-instrument and vocal scenarios, while maintaining efficiency.
+The open-sourced content includes models for inference at https://github.com/ZFTurbo/Music-Source-Separation-Training and the original weights with fewer training steps. The training was conducted using sucial's project at https://github.com/SUC-DriverOld/Apollo-Training, with a 92-hour high-quality vocal dataset trained for 1 million steps.
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/65ef5331b46c5c72e374a3dd/uRJGmwdu--qhKlkMy5HO6.png)

config.yaml ADDED Viewed

	@@ -0,0 +1,100 @@

+exp:
+  dir: ./Exps
+  name: ApolloVoice
+datas:
+  _target_: look2hear.datas.MusdbMoisesdbDataModule
+  train_dir: ./dataset/restoration/train
+  eval_dir: ./dataset/restoration/test
+  codec_type: mp3
+  codec_options:
+    bitrate: random
+    compression: random
+    complexity: random
+    vbr: random
+  sr: 44100
+  segments: 5.4
+  num_stems: 8
+  snr_range:
+    - -10
+    - 10
+  num_samples: 3000
+  batch_size: 1
+  num_workers: 8
+model:
+  _target_: look2hear.models.apollo.Apollo
+  sr: 44100
+  win: 20
+  feature_dim: 384
+  layer: 8
+discriminator:
+  _target_: look2hear.discriminators.frequencydis.MultiFrequencyDiscriminator
+  nch: 2
+  window:
+    - 32
+    - 64
+    - 128
+    - 256
+    - 512
+    - 1024
+    - 2048
+optimizer_g:
+  _target_: bitsandbytes.optim.AdamW8bit
+  lr: 0.001
+  weight_decay: 0.01
+optimizer_d:
+  _target_: bitsandbytes.optim.AdamW8bit
+  lr: 0.0001
+  weight_decay: 0.01
+  betas:
+    - 0.5
+    - 0.99
+scheduler_g:
+  _target_: torch.optim.lr_scheduler.StepLR
+  step_size: 4
+  gamma: 0.98
+scheduler_d:
+  _target_: torch.optim.lr_scheduler.StepLR
+  step_size: 4
+  gamma: 0.98
+loss_g:
+  _target_: look2hear.losses.gan_losses.MultiFrequencyGenLoss
+  eps: 2.0e-08
+loss_d:
+  _target_: look2hear.losses.gan_losses.MultiFrequencyDisLoss
+  eps: 2.0e-08
+metrics:
+  _target_: look2hear.losses.MultiSrcNegSDR
+  sdr_type: sisdr
+system:
+  _target_: look2hear.system.audio_litmodule.AudioLightningModule
+early_stopping:
+  _target_: pytorch_lightning.callbacks.EarlyStopping
+  monitor: val_loss
+  patience: 50
+  mode: min
+  verbose: true
+checkpoint:
+  _target_: pytorch_lightning.callbacks.ModelCheckpoint
+  dirpath: ${exp.dir}/${exp.name}/checkpoints
+  monitor: val_loss
+  mode: min
+  verbose: true
+  save_top_k: 5
+  save_last: true
+  filename: "{epoch}-{val_loss:.4f}"
+logger:
+  _target_: pytorch_lightning.loggers.WandbLogger
+  name: ${exp.name}
+  save_dir: ${exp.dir}/${exp.name}/logs
+  offline: true
+  project: Audio-Restoration
+trainer:
+  _target_: pytorch_lightning.Trainer
+  devices:
+    - 0
+  max_epochs: 500
+  sync_batchnorm: true
+  default_root_dir: ${exp.dir}/${exp.name}/
+  accelerator: cuda
+  limit_train_batches: 1.0
+  fast_dev_run: false

config_apollo_vocals_ep_54.yaml ADDED Viewed

	@@ -0,0 +1,31 @@

+audio:
+  chunk_size: 441000
+  min_mean_abs: 0.0
+  num_channels: 2
+  sample_rate: 44100
+augmentations:
+  enable: false
+inference:
+  batch_size: 1
+  num_overlap: 4
+model:
+  feature_dim: 384
+  layer: 8
+  sr: 44100
+  win: 20
+training:
+  batch_size: 1
+  coarse_loss_clip: true
+  grad_clip: 0
+  instruments:
+  - restored
+  - addition
+  lr: 1.0
+  num_epochs: 1000
+  num_steps: 1000
+  optimizer: prodigy
+  patience: 2
+  q: 0.95
+  reduce_factor: 0.95
+  target_instrument: restored
+  use_amp: true

epoch=54-val_loss=-17.6221.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7158aac7d9fb886b986ea62beb4c050526b3951f8d9c11bacba1984a6b5c74f
+size 615867827

model_apollo_vocals_ep_54.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59e1311f93e1f0fde6d5d11fa69d97e41cdee39be38f0cf4ccb80cfce34b2a2b
+size 194278526