Spaces:

youngNdum
/

DTTNett-API

Sleeping

App Files Files Community

youngNdum commited on 5 days ago

Commit

f47f762

verified ·

1 Parent(s): 172f29c

Upload 55 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

configs/callbacks/default.yaml +20 -0
configs/callbacks/none.yaml +0 -0
configs/callbacks/wandb.yaml +31 -0
configs/config.yaml +44 -0
configs/datamodule/musdb18_hq.yaml +50 -0
configs/datamodule/musdb_dev14.yaml +28 -0
configs/evaluation.yaml +43 -0
configs/experiment/bass_dis.yaml +38 -0
configs/experiment/drums_dis.yaml +38 -0
configs/experiment/multigpu_default.yaml +26 -0
configs/experiment/other_dis.yaml +38 -0
configs/experiment/vocals_dis.yaml +38 -0
configs/hydra/default.yaml +16 -0
configs/infer.yaml +26 -0
configs/logger/csv.yaml +8 -0
configs/logger/many_loggers.yaml +10 -0
configs/logger/neptune.yaml +11 -0
configs/logger/none.yaml +0 -0
configs/logger/tensorboard.yaml +10 -0
configs/logger/wandb.yaml +15 -0
configs/model/bass.yaml +28 -0
configs/model/drums.yaml +28 -0
configs/model/other.yaml +28 -0
configs/model/vocals.yaml +28 -0
configs/paths/default.yaml +18 -0
configs/trainer/ddp.yaml +13 -0
configs/trainer/default.yaml +19 -0
configs/trainer/minimal.yaml +21 -0
src/__init__.py +0 -0
src/callbacks/__init__.py +0 -0
src/callbacks/onnx_callback.py +49 -0
src/callbacks/wandb_callbacks.py +280 -0
src/datamodules/__init__.py +0 -0
src/datamodules/datasets/__init__.py +0 -0
src/datamodules/datasets/musdb.py +174 -0
src/datamodules/musdb_datamodule.py +117 -0
src/dp_tdf/__init__.py +0 -0
src/dp_tdf/abstract.py +204 -0
src/dp_tdf/bandsequence.py +136 -0
src/dp_tdf/dp_tdf_net.py +118 -0
src/dp_tdf/modules.py +158 -0
src/evaluation/eval.py +120 -0
src/evaluation/eval_demo.py +71 -0
src/evaluation/separate.py +193 -0
src/layers/__init__.py +2 -0
src/layers/batch_norm.py +201 -0
src/layers/chunk_size.py +53 -0
src/train.py +152 -0
src/utils/__init__.py +3 -0
src/utils/data_augmentation.py +128 -0

configs/callbacks/default.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+model_checkpoint:
+  _target_: pytorch_lightning.callbacks.ModelCheckpoint
+  monitor: "val/usdr" # name of the logged metric which determines when model is improving
+  save_top_k: 5 # save k best models (determined by above metric)
+  save_last: True # additionaly always save model from last epoch
+  mode: "max" # can be "max" or "min"
+  verbose: False
+  dirpath: "checkpoints/"
+  filename: "{epoch:02d}-{step}"
+#
+#early_stopping:
+#  _target_: pytorch_lightning.callbacks.EarlyStopping
+#  monitor: "val/sdr" # name of the logged metric which determines when model is improving
+#  patience: 300 # how many epochs of not improving until training stops
+#  mode: "max" # can be "max" or "min"
+#  min_delta: 0.05 # minimum change in the monitored metric needed to qualify as an improvement
+#make_onnx:
+#  _target_: src.callbacks.onnx_callback.MakeONNXCallback
+#  dirpath: "onnx/"

configs/callbacks/none.yaml ADDED Viewed

File without changes

configs/callbacks/wandb.yaml ADDED Viewed

	@@ -0,0 +1,31 @@

+defaults:
+  - default.yaml
+watch_model:
+  _target_: src.callbacks.wandb_callbacks.WatchModel
+  log: "all"
+  log_freq: 100
+#upload_valid_track:
+#  _target_: src.callbacks.wandb_callbacks.UploadValidTrack
+#  crop: 3
+#  upload_after_n_epoch: -1
+#upload_code_as_artifact:
+#  _target_: src.callbacks.wandb_callbacks.UploadCodeAsArtifact
+#  code_dir: ${work_dir}/src
+#
+#upload_ckpts_as_artifact:
+#  _target_: src.callbacks.wandb_callbacks.UploadCheckpointsAsArtifact
+#  ckpt_dir: "checkpoints/"
+#  upload_best_only: True
+#
+#log_f1_precision_recall_heatmap:
+#  _target_: src.callbacks.wandb_callbacks.LogF1PrecRecHeatmap
+#
+#log_confusion_matrix:
+#  _target_: src.callbacks.wandb_callbacks.LogConfusionMatrix
+#
+#log_image_predictions:
+#  _target_: src.callbacks.wandb_callbacks.LogImagePredictions
+#  num_samples: 8

configs/config.yaml ADDED Viewed

	@@ -0,0 +1,44 @@

+# @package _global_
+# specify here default training configuration
+defaults:
+  - datamodule: musdb18_hq
+  - model: null
+  - callbacks: default # set this to null if you don't want to use callbacks
+  - logger: null # set logger here or use command line (e.g. `python run.py logger=wandb`)
+  - trainer: default
+  - hparams_search: null
+  - paths: default.yaml
+  - hydra: default
+  - experiment: null
+  # enable color logging
+  - override hydra/hydra_logging: colorlog
+  - override hydra/job_logging: colorlog
+# path to original working directory
+# hydra hijacks working directory by changing it to the current log directory,
+# so it's useful to have this path as a special variable
+# learn more here: https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory
+#work_dir: ${hydra:runtime.cwd}
+#output_dir: ${hydra:runtime.output_dir}
+# path to folder with data
+# use `python run.py debug=true` for easy debugging!
+# this will run 1 train, val and test loop with only 1 batch
+# equivalent to running `python run.py trainer.fast_dev_run=true`
+# (this is placed here just for easier access from command line)
+debug: False
+# pretty print config at the start of the run using Rich library
+print_config: True
+# disable python warnings if they annoy you
+ignore_warnings: True
+wandb_api_key: ${oc.env:wandb_api_key}

configs/datamodule/musdb18_hq.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+_target_: src.datamodules.musdb_datamodule.MusdbDataModule
+# data_dir is specified in config.yaml
+data_dir: null
+single_channel: False
+# chunk_size = (hop_length * (dim_t - 1) / sample_rate) secs
+sample_rate: 44100
+hop_length: ${model.hop_length}   # stft hop_length
+dim_t: ${model.dim_t}  # number of stft frames
+# number of overlapping wave samples between chunks when separating a whole track
+overlap: ${model.overlap}
+source_names:
+  - bass
+  - drums
+  - other
+  - vocals
+target_name: ${model.target_name}
+external_datasets: null
+#external_datasets:
+#  - test
+batch_size: 8
+num_workers: 0
+pin_memory: False
+aug_params:
+  - 2   # maximum pitch shift in semitones (-x < shift param < x)
+  - 20  # maximum time stretch percentage (-x < stretch param < x)
+validation_set:
+  - Actions - One Minute Smile
+  - Clara Berry And Wooldog - Waltz For My Victims
+  - Johnny Lokke - Promises & Lies
+  - Patrick Talbot - A Reason To Leave
+  - Triviul - Angelsaint
+#  - Alexander Ross - Goodbye Bolero
+#  - Fergessen - Nos Palpitants
+#  - Leaf - Summerghost
+#  - Skelpolu - Human Mistakes
+#  - Young Griffo - Pennies
+#  - ANiMAL - Rockshow
+#  - James May - On The Line
+#  - Meaxic - Take A Step
+#  - Traffic Experiment - Sirens

configs/datamodule/musdb_dev14.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+defaults:
+  - musdb18_hq
+data_dir: ${oc.env:data_dir}
+has_split_structure: True
+validation_set:
+#  - Meaxic - Take A Step
+#  - Skelpolu - Human Mistakes
+  - Actions - One Minute Smile
+  - Clara Berry And Wooldog - Waltz For My Victims
+  - Johnny Lokke - Promises & Lies
+  - Patrick Talbot - A Reason To Leave
+  - Triviul - Angelsaint
+  - Alexander Ross - Goodbye Bolero
+  - Fergessen - Nos Palpitants
+  - Leaf - Summerghost
+  - Skelpolu - Human Mistakes
+  - Young Griffo - Pennies
+  - ANiMAL - Rockshow
+  - James May - On The Line
+  - Meaxic - Take A Step
+  - Traffic Experiment - Sirens
+mode: musdb18hq

configs/evaluation.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+# @package _global_
+# specify here default training configuration
+defaults:
+  - model: ConvTDFNet_vocals
+  - logger:
+    - wandb
+    - tensorboard
+  - paths: default.yaml
+  # enable color logging
+  - override hydra/hydra_logging: colorlog
+  - override hydra/job_logging: colorlog
+hydra:
+  run:
+    dir: ${get_eval_log_dir:${ckpt_path}}
+#ckpt_path: "G:\\Experiments\\KLRef\\vocals.onnx"
+ckpt_path: ${oc.env:ckpt_path}
+split: 'test'
+batch_size: 4
+device: 'cuda:0'
+bss: fast # fast or official
+single: False # for debug investigation, only run the model on 1 single song
+#data_dir: ${oc.env:data_dir}
+eval_dir: ${oc.env:data_dir}
+wandb_api_key: ${oc.env:wandb_api_key}
+logger:
+  wandb:
+#    project: mdx_eval_${split}
+    project: new_eval_order
+    name: ${get_eval_log_dir:${ckpt_path}}
+pool_workers: 8
+double_chunk: False
+overlap_add:
+  overlap_rate: 0.5
+  tmp_root: ${paths.root_dir}/tmp # for saving temp chunks, since we use ffmpeg and will need io to disk
+  samplerate: 44100

configs/experiment/bass_dis.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+# @package _global_
+# to execute this experiment run:
+# python run.py experiment=example_simple.yaml
+defaults:
+  - multigpu_default
+  - override /model: bass.yaml
+seed: 2021
+exp_name: bass_g32
+# the name inside project
+logger:
+  wandb:
+    name: ${exp_name}
+model:
+  lr: 0.0002
+  optimizer: adamW
+  bn_norm: syncBN
+  audio_ch: 2 # datamodule.single_channel
+  g: 32
+trainer:
+  devices: 2 # int or list
+  sync_batchnorm: True
+  track_grad_norm: 2
+#  gradient_clip_val: 5
+datamodule:
+  batch_size: 8
+  num_workers: ${oc.decode:${oc.env:NUM_WORKERS}}
+  pin_memory: False
+  overlap: ${model.overlap}
+  audio_ch: ${model.audio_ch}
+  epoch_size:

configs/experiment/drums_dis.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+# @package _global_
+# to execute this experiment run:
+# python run.py experiment=example_simple.yaml
+defaults:
+  - multigpu_default
+  - override /model: drums.yaml
+seed: 2021
+exp_name: drums_g32
+# the name inside project
+logger:
+  wandb:
+    name: ${exp_name}
+model:
+  lr: 0.0002
+  optimizer: adamW
+  bn_norm: syncBN
+  audio_ch: 2 # datamodule.single_channel
+  g: 32
+trainer:
+  devices: 2 # int or list
+  sync_batchnorm: True
+  track_grad_norm: 2
+#  gradient_clip_val: 5
+datamodule:
+  batch_size: 8
+  num_workers: ${oc.decode:${oc.env:NUM_WORKERS}}
+  pin_memory: False
+  overlap: ${model.overlap}
+  audio_ch: ${model.audio_ch}
+  epoch_size:

configs/experiment/multigpu_default.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+# @package _global_
+# to execute this experiment run:
+# python run.py experiment=example_simple.yaml
+defaults:
+  - override /callbacks: default
+  - override /logger:
+    - wandb
+    - tensorboard
+#callbacks:
+#  early_stopping:
+#    patience: 1000000
+#datamodule:
+#  external_datasets:
+#    - test
+trainer:
+  max_epochs: 1000000
+  accelerator: cuda
+  amp_backend: native
+  precision: 16
+  track_grad_norm: -1

configs/experiment/other_dis.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+# @package _global_
+# to execute this experiment run:
+# python run.py experiment=example_simple.yaml
+defaults:
+  - multigpu_default
+  - override /model: other.yaml
+seed: 2021
+exp_name: other_g32
+# the name inside project
+logger:
+  wandb:
+    name: ${exp_name}
+model:
+  lr: 0.0002
+  optimizer: adamW
+  bn_norm: syncBN
+  audio_ch: 2 # datamodule.single_channel
+  g: 32
+trainer:
+  devices: 2 # int or list
+  sync_batchnorm: True
+  track_grad_norm: 2
+#  gradient_clip_val: 5
+datamodule:
+  batch_size: 8
+  num_workers: ${oc.decode:${oc.env:NUM_WORKERS}}
+  pin_memory: False
+  overlap: ${model.overlap}
+  audio_ch: ${model.audio_ch}
+  epoch_size:

configs/experiment/vocals_dis.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+# @package _global_
+# to execute this experiment run:
+# python run.py experiment=example_simple.yaml
+defaults:
+  - multigpu_default
+  - override /model: vocals.yaml
+seed: 2021
+exp_name: vocals_g32
+# the name inside project
+logger:
+  wandb:
+    name: ${exp_name}
+model:
+  lr: 0.0002
+  optimizer: adamW
+  bn_norm: syncBN
+  audio_ch: 2 # datamodule.single_channel
+  g: 32
+trainer:
+  devices: 2 # int or list
+  sync_batchnorm: True
+  track_grad_norm: 2
+#  gradient_clip_val: 5
+datamodule:
+  batch_size: 8
+  num_workers: ${oc.decode:${oc.env:NUM_WORKERS}}
+  pin_memory: False
+  overlap: ${model.overlap}
+  audio_ch: ${model.audio_ch}
+  epoch_size:

configs/hydra/default.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+# output paths for hydra logs
+run:
+#  dir: logs/runs/${datamodule.target_name}_${exp_name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  dir: ${get_train_log_dir:${datamodule.target_name},${exp_name}}
+sweep:
+#  dir: logs/multiruns/${now:%Y-%m-%d_%H-%M-%S}
+  dir: ${get_sweep_log_dir:${datamodule.target_name},${exp_name}}
+  subdir: ${hydra.job.num}
+# you can set here environment variables that are universal for all users
+# for system specific variables (like data paths) it's better to use .env file!
+job:
+  env_set:
+    EXAMPLE_VAR: "example_value"

configs/infer.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+# @package _global_
+# specify here default training configuration
+defaults:
+  - model: vocals
+  - paths: default.yaml
+  # enable color logging
+  - override hydra/hydra_logging: colorlog
+  - override hydra/job_logging: colorlog
+#hydra:
+#  run:
+#    dir: ${get_eval_log_dir:${ckpt_path}}
+#ckpt_path: "G:\\Experiments\\KLRef\\vocals.onnx"
+ckpt_path:
+mixture_path:
+batch_size: 4
+device: 'cuda:0'
+double_chunk: False
+overlap_add:
+  overlap_rate: 0.5
+  tmp_root: ${paths.root_dir}/tmp # for saving temp chunks, since we use ffmpeg and will need io to disk
+  samplerate: 44100

configs/logger/csv.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+# csv logger built in lightning
+csv:
+  _target_: pytorch_lightning.loggers.csv_logs.CSVLogger
+  save_dir: "."
+  name: "csv/"
+  version: null
+  prefix: ""

configs/logger/many_loggers.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+# train with many loggers at once
+defaults:
+  # - aim.yaml
+  # - comet.yaml
+  - csv.yaml
+  # - mlflow.yaml
+  # - neptune.yaml
+  # - tensorboard.yaml
+  - wandb.yaml

configs/logger/neptune.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+# https://neptune.ai
+neptune:
+  _target_: pytorch_lightning.loggers.neptune.NeptuneLogger
+  api_key: ${oc.env:NEPTUNE_API_TOKEN} # api key is laoded from environment variable
+  project_name: your_name/template-tests
+  close_after_fit: True
+  offline_mode: False
+  experiment_name: null
+  experiment_id: null
+  prefix: ""

configs/logger/none.yaml ADDED Viewed

File without changes

configs/logger/tensorboard.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+# https://www.tensorflow.org/tensorboard/
+tensorboard:
+  _target_: pytorch_lightning.loggers.tensorboard.TensorBoardLogger
+  save_dir: "tensorboard/"
+  name: "default"
+  version: null
+  log_graph: False
+  default_hp_metric: True
+  prefix: ""

configs/logger/wandb.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+# https://wandb.ai
+wandb:
+  _target_: pytorch_lightning.loggers.wandb.WandbLogger
+  project: dtt_${model.target_name}
+  name: null
+  save_dir: ${hydra:run.dir}
+  offline: False # set True to store all logs only locally
+  id: null # pass correct id to resume experiment!
+  # entity: ""  # set to name of your wandb team or just remove it
+  log_model: False
+  prefix: ""
+  job_type: "train"
+  group: ""
+  tags: []

configs/model/bass.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+_target_: src.dp_tdf.dp_tdf_net.DPTDFNet
+# abstract parent class
+target_name: 'bass'
+lr: 0.0001
+optimizer: adamW
+dim_f: 864
+dim_t: 256
+n_fft: 6144
+hop_length: 1024
+overlap: 3072
+audio_ch: 2
+block_type: TFC_TDF_Res2
+num_blocks: 5
+l: 3
+g: 32
+k: 3
+bn: 2
+bias: False
+bn_norm: BN
+bandsequence:
+  rnn_type: LSTM
+  bidirectional: True
+  num_layers: 4
+  n_heads: 2

configs/model/drums.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+_target_: src.dp_tdf.dp_tdf_net.DPTDFNet
+# abstract parent class
+target_name: 'drums'
+lr: 0.0001
+optimizer: adamW
+dim_f: 2048
+dim_t: 256
+n_fft: 6144
+hop_length: 1024
+overlap: 3072
+audio_ch: 2
+block_type: TFC_TDF_Res2
+num_blocks: 5
+l: 3
+g: 32
+k: 3
+bn: 8
+bias: False
+bn_norm: BN
+bandsequence:
+  rnn_type: LSTM
+  bidirectional: True
+  num_layers: 4
+  n_heads: 2

configs/model/other.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+_target_: src.dp_tdf.dp_tdf_net.DPTDFNet
+# abstract parent class
+target_name: 'other'
+lr: 0.0001
+optimizer: adamW
+dim_f: 2048
+dim_t: 256
+n_fft: 6144
+hop_length: 1024
+overlap: 3072
+audio_ch: 2
+block_type: TFC_TDF_Res2
+num_blocks: 5
+l: 3
+g: 32
+k: 3
+bn: 8
+bias: False
+bn_norm: BN
+bandsequence:
+  rnn_type: LSTM
+  bidirectional: True
+  num_layers: 4
+  n_heads: 2

configs/model/vocals.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+_target_: src.dp_tdf.dp_tdf_net.DPTDFNet
+# abstract parent class
+target_name: 'vocals'
+lr: 0.0001
+optimizer: adamW
+dim_f: 2048
+dim_t: 256
+n_fft: 6144
+hop_length: 1024
+overlap: 3072
+audio_ch: 2
+block_type: TFC_TDF_Res2
+num_blocks: 5
+l: 3
+g: 32
+k: 3
+bn: 8
+bias: False
+bn_norm: BN
+bandsequence:
+  rnn_type: LSTM
+  bidirectional: True
+  num_layers: 4
+  n_heads: 2

configs/paths/default.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+# path to root directory
+# this requires PROJECT_ROOT environment variable to exist
+# you can replace it with "." if you want the root to be the current working directory
+root_dir: ${oc.env:PROJECT_ROOT}
+# path to data directory
+data_dir: ${paths.root_dir}/data/
+# path to logging directory
+log_dir: ${oc.env:LOG_DIR}
+# path to output directory, created dynamically by hydra
+# path generation pattern is specified in `configs/hydra/default.yaml`
+# use it to store all files generated during the run, like ckpts and metrics
+output_dir: ${hydra:runtime.output_dir}
+# path to working directory
+work_dir: ${hydra:runtime.cwd}

configs/trainer/ddp.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+defaults:
+  - default.yaml
+# use "ddp_spawn" instead of "ddp",
+# it's slower but normal "ddp" currently doesn't work ideally with hydra
+# https://github.com/facebookresearch/hydra/issues/2070
+# https://pytorch-lightning.readthedocs.io/en/latest/accelerators/gpu_intermediate.html#distributed-data-parallel-spawn
+strategy: ddp_spawn
+accelerator: gpu
+devices: 2
+num_nodes: 1
+sync_batchnorm: True

configs/trainer/default.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+_target_: pytorch_lightning.Trainer
+default_root_dir: ${paths.output_dir}
+min_epochs: 1 # prevents early stopping
+max_epochs: 10
+accelerator: cpu
+devices: 1
+# mixed precision for extra speed-up
+# precision: 16
+# perform a validation loop every N training epochs
+check_val_every_n_epoch: 1
+# set True to to ensure deterministic results
+# makes training slower but gives more reproducibility than just setting seeds
+deterministic: False

configs/trainer/minimal.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+_target_: pytorch_lightning.Trainer
+defaults:
+  - default
+devices: 4
+resume_from_checkpoint:
+auto_lr_find: False
+deterministic: True
+accelerator: dp
+sync_batchnorm: False
+max_epochs: 3000
+min_epochs: 1
+check_val_every_n_epoch: 10
+num_sanity_val_steps: 1
+precision: 16
+amp_backend: "native"
+amp_level: "O2"

src/__init__.py ADDED Viewed

File without changes

src/callbacks/__init__.py ADDED Viewed

File without changes

src/callbacks/onnx_callback.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import os.path
+from typing import Dict, Any
+import torch
+from pytorch_lightning import Callback
+import pytorch_lightning as pl
+import inspect
+from src.models.mdxnet import AbstractMDXNet
+class MakeONNXCallback(Callback):
+    """Upload all *.py files to wandb as an artifact, at the beginning of the run."""
+    def __init__(self, dirpath: str):
+        self.dirpath = dirpath
+        if not os.path.exists(self.dirpath):
+            os.mkdir(self.dirpath)
+    def on_save_checkpoint(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule',
+                           checkpoint: Dict[str, Any]) -> dict:
+        res = super().on_save_checkpoint(trainer, pl_module, checkpoint)
+        var = inspect.signature(pl_module.__init__).parameters
+        model = pl_module.__class__(**dict((name, pl_module.__dict__[name]) for name in var))
+        model.load_state_dict(pl_module.state_dict())
+        target_dir = '{}epoch_{}'.format(self.dirpath, pl_module.current_epoch)
+        try:
+            if not os.path.exists(target_dir):
+                os.mkdir(target_dir)
+            with torch.no_grad():
+                torch.onnx.export(model,
+                                  torch.zeros(model.inference_chunk_shape),
+                                  '{}/{}.onnx'.format(target_dir, model.target_name),
+                                  export_params=True,  # store the trained parameter weights inside the model file
+                                  opset_version=13,  # the ONNX version to export the model to
+                                  do_constant_folding=True,  # whether to execute constant folding for optimization
+                                  input_names=['input'],  # the model's input names
+                                  output_names=['output'],  # the model's output names
+                                  dynamic_axes={'input': {0: 'batch_size'},  # variable length axes
+                                                'output': {0: 'batch_size'}})
+        except:
+            print('onnx error')
+        finally:
+            del model
+        return res

src/callbacks/wandb_callbacks.py ADDED Viewed

	@@ -0,0 +1,280 @@

+import glob
+import os
+from typing import List, Optional, Any
+import matplotlib.pyplot as plt
+import seaborn as sn
+import torch
+import wandb
+from pytorch_lightning import Callback, Trainer
+from pytorch_lightning.loggers import WandbLogger
+from pytorch_lightning.utilities.types import STEP_OUTPUT
+from sklearn import metrics
+from sklearn.metrics import f1_score, precision_score, recall_score
+def get_wandb_logger(trainer: Trainer) -> WandbLogger:
+    """Safely get Weights&Biases logger from Trainer."""
+    if isinstance(trainer.logger, WandbLogger):
+        return trainer.logger
+    if isinstance(trainer.loggers, list):
+        for logger in trainer.loggers:
+            if isinstance(logger, WandbLogger):
+                return logger
+    raise Exception(
+        "You are using wandb related callback, but WandbLogger was not found for some reason..."
+    )
+class UploadValidTrack(Callback):
+    def __init__(self, crop: int, upload_after_n_epoch: int):
+        self.sample_length = crop * 44100
+        self.upload_after_n_epoch = upload_after_n_epoch
+        self.len_left_window = self.len_right_window = self.sample_length // 2
+    def on_validation_batch_end(
+        self,
+        trainer: 'pl.Trainer',
+        pl_module: 'pl.LightningModule',
+        outputs: Optional[STEP_OUTPUT],
+        batch: Any,
+        batch_idx: int,
+        dataloader_idx: int,
+    ) -> None:
+        if outputs is None:
+            return
+        track_id = outputs['track_id']
+        track = outputs['track']
+        logger = get_wandb_logger(trainer=trainer)
+        experiment = logger.experiment
+        if pl_module.current_epoch < self.upload_after_n_epoch:
+            return None
+        mid = track.shape[-1]//2
+        track = track[:, mid-self.len_left_window:mid+self.len_right_window]
+        experiment.log({'track={}_epoch={}'.format(track_id, pl_module.current_epoch):
+            [wandb.Audio(track.T, sample_rate=44100)]})
+class WatchModel(Callback):
+    """Make wandb watch model at the beginning of the run."""
+    def __init__(self, log: str = "gradients", log_freq: int = 100):
+        self.log = log
+        self.log_freq = log_freq
+    def on_train_start(self, trainer, pl_module):
+        logger = get_wandb_logger(trainer=trainer)
+        logger.watch(model=trainer.model, log=self.log, log_freq=self.log_freq)
+class UploadCodeAsArtifact(Callback):
+    """Upload all *.py files to wandb as an artifact, at the beginning of the run."""
+    def __init__(self, code_dir: str):
+        self.code_dir = code_dir
+    def on_train_start(self, trainer, pl_module):
+        logger = get_wandb_logger(trainer=trainer)
+        experiment = logger.experiment
+        code = wandb.Artifact("project-source", type="code")
+        for path in glob.glob(os.path.join(self.code_dir, "**/*.py"), recursive=True):
+            code.add_file(path)
+        experiment.use_artifact(code)
+class UploadCheckpointsAsArtifact(Callback):
+    """Upload checkpoints to wandb as an artifact, at the end of run."""
+    def __init__(self, ckpt_dir: str = "checkpoints/", upload_best_only: bool = False):
+        self.ckpt_dir = ckpt_dir
+        self.upload_best_only = upload_best_only
+    def on_train_end(self, trainer, pl_module):
+        logger = get_wandb_logger(trainer=trainer)
+        experiment = logger.experiment
+        ckpts = wandb.Artifact("experiment-ckpts", type="checkpoints")
+        if self.upload_best_only:
+            ckpts.add_file(trainer.checkpoint_callback.best_model_path)
+        else:
+            for path in glob.glob(os.path.join(self.ckpt_dir, "**/*.ckpt"), recursive=True):
+                ckpts.add_file(path)
+        experiment.use_artifact(ckpts)
+class LogConfusionMatrix(Callback):
+    """Generate confusion matrix every epoch and send it to wandb.
+    Expects validation step to return predictions and targets.
+    """
+    def __init__(self):
+        self.preds = []
+        self.targets = []
+        self.ready = True
+    def on_sanity_check_start(self, trainer, pl_module) -> None:
+        self.ready = False
+    def on_sanity_check_end(self, trainer, pl_module):
+        """Start executing this callback only after all validation sanity checks end."""
+        self.ready = True
+    def on_validation_batch_end(
+        self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx
+    ):
+        """Gather data from single batch."""
+        if self.ready:
+            self.preds.append(outputs["preds"])
+            self.targets.append(outputs["targets"])
+    def on_validation_epoch_end(self, trainer, pl_module):
+        """Generate confusion matrix."""
+        if self.ready:
+            logger = get_wandb_logger(trainer)
+            experiment = logger.experiment
+            preds = torch.cat(self.preds).cpu().numpy()
+            targets = torch.cat(self.targets).cpu().numpy()
+            confusion_matrix = metrics.confusion_matrix(y_true=targets, y_pred=preds)
+            # set figure size
+            plt.figure(figsize=(14, 8))
+            # set labels size
+            sn.set(font_scale=1.4)
+            # set font size
+            sn.heatmap(confusion_matrix, annot=True, annot_kws={"size": 8}, fmt="g")
+            # names should be uniqe or else charts from different experiments in wandb will overlap
+            experiment.log({f"confusion_matrix/{experiment.name}": wandb.Image(plt)}, commit=False)
+            # according to wandb docs this should also work but it crashes
+            # experiment.log(f{"confusion_matrix/{experiment.name}": plt})
+            # reset plot
+            plt.clf()
+            self.preds.clear()
+            self.targets.clear()
+class LogF1PrecRecHeatmap(Callback):
+    """Generate f1, precision, recall heatmap every epoch and send it to wandb.
+    Expects validation step to return predictions and targets.
+    """
+    def __init__(self, class_names: List[str] = None):
+        self.preds = []
+        self.targets = []
+        self.ready = True
+    def on_sanity_check_start(self, trainer, pl_module):
+        self.ready = False
+    def on_sanity_check_end(self, trainer, pl_module):
+        """Start executing this callback only after all validation sanity checks end."""
+        self.ready = True
+    def on_validation_batch_end(
+        self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx
+    ):
+        """Gather data from single batch."""
+        if self.ready:
+            self.preds.append(outputs["preds"])
+            self.targets.append(outputs["targets"])
+    def on_validation_epoch_end(self, trainer, pl_module):
+        """Generate f1, precision and recall heatmap."""
+        if self.ready:
+            logger = get_wandb_logger(trainer=trainer)
+            experiment = logger.experiment
+            preds = torch.cat(self.preds).cpu().numpy()
+            targets = torch.cat(self.targets).cpu().numpy()
+            f1 = f1_score(preds, targets, average=None)
+            r = recall_score(preds, targets, average=None)
+            p = precision_score(preds, targets, average=None)
+            data = [f1, p, r]
+            # set figure size
+            plt.figure(figsize=(14, 3))
+            # set labels size
+            sn.set(font_scale=1.2)
+            # set font size
+            sn.heatmap(
+                data,
+                annot=True,
+                annot_kws={"size": 10},
+                fmt=".3f",
+                yticklabels=["F1", "Precision", "Recall"],
+            )
+            # names should be uniqe or else charts from different experiments in wandb will overlap
+            experiment.log({f"f1_p_r_heatmap/{experiment.name}": wandb.Image(plt)}, commit=False)
+            # reset plot
+            plt.clf()
+            self.preds.clear()
+            self.targets.clear()
+class LogImagePredictions(Callback):
+    """Logs a validation batch and their predictions to wandb.
+    Example adapted from:
+        https://wandb.ai/wandb/wandb-lightning/reports/Image-Classification-using-PyTorch-Lightning--VmlldzoyODk1NzY
+    """
+    def __init__(self, num_samples: int = 8):
+        super().__init__()
+        self.num_samples = num_samples
+        self.ready = True
+    def on_sanity_check_start(self, trainer, pl_module):
+        self.ready = False
+    def on_sanity_check_end(self, trainer, pl_module):
+        """Start executing this callback only after all validation sanity checks end."""
+        self.ready = True
+    def on_validation_epoch_end(self, trainer, pl_module):
+        if self.ready:
+            logger = get_wandb_logger(trainer=trainer)
+            experiment = logger.experiment
+            # get a validation batch from the validation dat loader
+            val_samples = next(iter(trainer.datamodule.val_dataloader()))
+            val_imgs, val_labels = val_samples
+            # run the batch through the network
+            val_imgs = val_imgs.to(device=pl_module.device)
+            logits = pl_module(val_imgs)
+            preds = torch.argmax(logits, axis=-1)
+            # log the images as wandb Image
+            experiment.log(
+                {
+                    f"Images/{experiment.name}": [
+                        wandb.Image(x, caption=f"Pred:{pred}, Label:{y}")
+                        for x, pred, y in zip(
+                            val_imgs[: self.num_samples],
+                            preds[: self.num_samples],
+                            val_labels[: self.num_samples],
+                        )
+                    ]
+                }
+            )

src/datamodules/__init__.py ADDED Viewed

File without changes

src/datamodules/datasets/__init__.py ADDED Viewed

File without changes

src/datamodules/datasets/musdb.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import os
+from abc import ABCMeta, ABC
+from pathlib import Path
+import soundfile
+from torch.utils.data import Dataset
+import torch
+import numpy as np
+import random
+from tqdm import tqdm
+from src.utils.utils import load_wav
+from src import utils
+import numpy as np
+log = utils.get_pylogger(__name__)
+def check_target_name(target_name, source_names):
+    try:
+        assert target_name is not None
+    except AssertionError:
+        print('[ERROR] please identify target name. ex) +datamodule.target_name="vocals"')
+        exit(-1)
+    try:
+        assert target_name in source_names or target_name == 'all'
+    except AssertionError:
+        print('[ERROR] target name should one of "bass", "drums", "other", "vocals", "all"')
+        exit(-1)
+def check_sample_rate(sr, sample_track):
+    try:
+        sample_rate = soundfile.read(sample_track)[1]
+        assert sample_rate == sr
+    except AssertionError:
+        sample_rate = soundfile.read(sample_track)[1]
+        print('[ERROR] sampling rate mismatched')
+        print('\t=> sr in Config file: {}, but sr of data: {}'.format(sr, sample_rate))
+        exit(-1)
+class MusdbDataset(Dataset):
+    __metaclass__ = ABCMeta
+    def __init__(self, data_dir, chunk_size):
+        self.source_names = ['bass', 'drums', 'other', 'vocals']
+        self.chunk_size = chunk_size
+        self.musdb_path = Path(data_dir)
+class MusdbTrainDataset(MusdbDataset):
+    def __init__(self, data_dir, chunk_size, target_name, aug_params, external_datasets, single_channel, epoch_size):
+        super(MusdbTrainDataset, self).__init__(data_dir, chunk_size)
+        self.single_channel = single_channel
+        self.neg_lst = [x for x in self.source_names if x != target_name]
+        self.target_name = target_name
+        check_target_name(self.target_name, self.source_names)
+        if not self.musdb_path.joinpath('metadata').exists():
+            os.mkdir(self.musdb_path.joinpath('metadata'))
+        splits = ['train']
+        if external_datasets is not None:
+            splits += external_datasets
+        # collect paths for datasets and metadata (track names and duration)
+        datasets, metadata_caches = [], []
+        raw_datasets = []    # un-augmented datasets
+        for split in splits:
+            raw_datasets.append(self.musdb_path.joinpath(split))
+            max_pitch, max_tempo = aug_params
+            for p in range(-max_pitch, max_pitch+1):
+                for t in range(-max_tempo, max_tempo+1, 10):
+                    aug_split = split if p==t==0 else split + f'_p={p}_t={t}'
+                    datasets.append(self.musdb_path.joinpath(aug_split))
+                    metadata_caches.append(self.musdb_path.joinpath('metadata').joinpath(aug_split + '.pkl'))
+        # collect all track names and their duration
+        self.metadata = []
+        raw_track_lengths = []   # for calculating epoch size
+        for i, (dataset, metadata_cache) in enumerate(tqdm(zip(datasets, metadata_caches))):
+            try:
+                metadata = torch.load(metadata_cache)
+            except FileNotFoundError:
+                print('creating metadata for', dataset)
+                metadata = []
+                for track_name in sorted(os.listdir(dataset)):
+                    track_path = dataset.joinpath(track_name)
+                    track_length = load_wav(track_path.joinpath('vocals.wav')).shape[-1]
+                    metadata.append((track_path, track_length))
+                torch.save(metadata, metadata_cache)
+            self.metadata += metadata
+            if dataset in raw_datasets:
+                raw_track_lengths += [length for path, length in metadata]
+        self.epoch_size = sum(raw_track_lengths) // self.chunk_size if epoch_size is None else epoch_size
+        log.info(f'epoch size: {self.epoch_size}')
+    def __getitem__(self, _):
+        sources = []
+        for source_name in self.source_names:
+            track_path, track_length = random.choice(self.metadata)   # random mixing between tracks
+            source = load_wav(track_path.joinpath(source_name + '.wav'),
+                              track_length=track_length, chunk_size=self.chunk_size) # (2, times)
+            sources.append(source)
+        mix = sum(sources)
+        if self.target_name == 'all':
+            # Targets for models that separate all four sources (ex. Demucs).
+            # This adds additional 'source' dimension => batch_shape=[batch, source, channel, time]
+            target = sources
+        else:
+            target = sources[self.source_names.index(self.target_name)]
+        mix, target = torch.tensor(mix), torch.tensor(target)
+        if self.single_channel:
+            mix = torch.mean(mix, dim=0, keepdim=True)
+            target = torch.mean(target, dim=0, keepdim=True)
+        return mix, target
+    def __len__(self):
+        return self.epoch_size
+class MusdbValidDataset(MusdbDataset):
+    def __init__(self, data_dir, chunk_size, target_name, overlap, batch_size, single_channel):
+        super(MusdbValidDataset, self).__init__(data_dir, chunk_size)
+        self.target_name = target_name
+        check_target_name(self.target_name, self.source_names)
+        self.overlap = overlap
+        self.batch_size = batch_size
+        self.single_channel = single_channel
+        musdb_valid_path = self.musdb_path.joinpath('valid')
+        self.track_paths = [musdb_valid_path.joinpath(track_name)
+                            for track_name in os.listdir(musdb_valid_path)]
+    def __getitem__(self, index):
+        mix = load_wav(self.track_paths[index].joinpath('mixture.wav')) # (2, time)
+        if self.target_name == 'all':
+            # Targets for models that separate all four sources (ex. Demucs).
+            # This adds additional 'source' dimension => batch_shape=[batch, source, channel, time]
+            target = [load_wav(self.track_paths[index].joinpath(source_name + '.wav'))
+                      for source_name in self.source_names]
+        else:
+            target = load_wav(self.track_paths[index].joinpath(self.target_name + '.wav'))
+        chunk_output_size = self.chunk_size - 2 * self.overlap
+        left_pad = np.zeros([2, self.overlap])
+        right_pad = np.zeros([2, self.overlap + chunk_output_size - (mix.shape[-1] % chunk_output_size)])
+        mix_padded = np.concatenate([left_pad, mix, right_pad], 1)
+        num_chunks = mix_padded.shape[-1] // chunk_output_size
+        mix_chunks = np.array([mix_padded[:, i * chunk_output_size: i * chunk_output_size + self.chunk_size]
+                      for i in range(num_chunks)])
+        mix_chunk_batches = torch.tensor(mix_chunks, dtype=torch.float32).split(self.batch_size)
+        target = torch.tensor(target)
+        if self.single_channel:
+            mix_chunk_batches = [torch.mean(t, dim=1, keepdim=True) for t in mix_chunk_batches]
+            target = torch.mean(target, dim=0, keepdim=True)
+        return mix_chunk_batches, target
+    def __len__(self):
+        return len(self.track_paths)

src/datamodules/musdb_datamodule.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import os
+from os.path import exists, join
+from pathlib import Path
+from typing import Optional, Tuple
+from pytorch_lightning import LightningDataModule
+from torch.utils.data import ConcatDataset, DataLoader, Dataset, random_split
+from src.datamodules.datasets.musdb import MusdbTrainDataset, MusdbValidDataset
+class MusdbDataModule(LightningDataModule):
+    """
+    LightningDataModule for Musdb18-HQ dataset.
+    A DataModule implements 5 key methods:
+        - prepare_data (things to do on 1 GPU/TPU, not on every GPU/TPU in distributed mode)
+        - setup (things to do on every accelerator in distributed mode)
+        - train_dataloader (the training dataloader)
+        - val_dataloader (the validation dataloader(s))
+        - test_dataloader (the test dataloader(s))
+    This allows you to share a full dataset without explaining how to download,
+    split, transform and process the data
+    Read the docs:
+        https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html
+    """
+    def __init__(
+            self,
+            data_dir: str,
+            aug_params,
+            target_name: str,
+            overlap: int,
+            hop_length: int,
+            dim_t: int,
+            sample_rate: int,
+            batch_size: int,
+            num_workers: int,
+            pin_memory: bool,
+            external_datasets,
+            audio_ch: int,
+            epoch_size,
+            **kwargs,
+    ):
+        super().__init__()
+        self.data_dir = Path(data_dir)
+        self.target_name = target_name
+        self.aug_params = aug_params
+        self.external_datasets = external_datasets
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.pin_memory = pin_memory
+        # audio-related
+        self.hop_length = hop_length
+        self.sample_rate = sample_rate
+        self.single_channel = audio_ch == 1
+        # derived
+        self.chunk_size = hop_length * (dim_t - 1)
+        self.overlap = overlap
+        self.epoch_size = epoch_size
+        self.data_train: Optional[Dataset] = None
+        self.data_val: Optional[Dataset] = None
+        self.data_test: Optional[Dataset] = None
+        trainset_path = self.data_dir.joinpath('train')
+        validset_path = self.data_dir.joinpath('valid')
+        # create validation split
+        if not exists(validset_path):
+            from shutil import move
+            os.mkdir(validset_path)
+            for track in kwargs['validation_set']:
+                if trainset_path.joinpath(track).exists():
+                    move(trainset_path.joinpath(track), validset_path.joinpath(track))
+        else:
+            valid_files = os.listdir(validset_path)
+            assert set(valid_files) == set(kwargs['validation_set'])
+    def setup(self, stage: Optional[str] = None):
+        """Load data. Set variables: self.data_train, self.data_val, self.data_test."""
+        self.data_train = MusdbTrainDataset(self.data_dir,
+                                            self.chunk_size,
+                                            self.target_name,
+                                            self.aug_params,
+                                            self.external_datasets,
+                                            self.single_channel,
+                                            self.epoch_size)
+        self.data_val = MusdbValidDataset(self.data_dir,
+                                          self.chunk_size,
+                                          self.target_name,
+                                          self.overlap,
+                                          self.batch_size,
+                                          self.single_channel)
+    def train_dataloader(self):
+        return DataLoader(
+            dataset=self.data_train,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            pin_memory=self.pin_memory,
+            shuffle=True,
+        )
+    def val_dataloader(self):
+        return DataLoader(
+            dataset=self.data_val,
+            batch_size=1,
+            num_workers=self.num_workers,
+            pin_memory=self.pin_memory,
+            shuffle=False,
+        )

src/dp_tdf/__init__.py ADDED Viewed

File without changes

src/dp_tdf/abstract.py ADDED Viewed

	@@ -0,0 +1,204 @@

+from abc import ABCMeta
+from typing import Optional
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from pytorch_lightning import LightningModule
+from pytorch_lightning.utilities.types import STEP_OUTPUT
+from src.utils.utils import sdr, simplified_msseval
+class AbstractModel(LightningModule):
+    __metaclass__ = ABCMeta
+    def __init__(self, target_name,
+                 lr, optimizer,
+                  dim_f, dim_t, n_fft, hop_length, overlap,
+                 audio_ch,
+                  **kwargs):
+        super().__init__()
+        self.target_name = target_name
+        self.lr = lr
+        self.optimizer = optimizer
+        self.dim_c_in = audio_ch * 2
+        self.dim_c_out = audio_ch * 2
+        self.dim_f = dim_f
+        self.dim_t = dim_t
+        self.n_fft = n_fft
+        self.n_bins = n_fft // 2 + 1
+        self.hop_length = hop_length
+        self.audio_ch = audio_ch
+        self.chunk_size = hop_length * (self.dim_t - 1)
+        self.inference_chunk_size = hop_length * (self.dim_t*2 - 1)
+        self.overlap = overlap
+        self.window = nn.Parameter(torch.hann_window(window_length=self.n_fft, periodic=True), requires_grad=False)
+        self.freq_pad = nn.Parameter(torch.zeros([1, self.dim_c_out, self.n_bins - self.dim_f, 1]), requires_grad=False)
+        self.inference_chunk_shape = (self.stft(torch.zeros([1, audio_ch, self.inference_chunk_size]))).shape
+    def configure_optimizers(self):
+        if self.optimizer == 'rmsprop':
+            print("Using RMSprop optimizer")
+            return torch.optim.RMSprop(self.parameters(), self.lr)
+        elif self.optimizer == 'adamW':
+            print("Using AdamW optimizer")
+            return torch.optim.AdamW(self.parameters(), self.lr)
+    def comp_loss(self, pred_detail, target_wave):
+        pred_detail = self.istft(pred_detail)
+        comp_loss = F.l1_loss(pred_detail, target_wave)
+        self.log("train/comp_loss", comp_loss, sync_dist=True, on_step=False, on_epoch=True, prog_bar=False)
+        return comp_loss
+    def training_step(self, *args, **kwargs) -> STEP_OUTPUT:
+        mix_wave, target_wave = args[0] # (batch, c, 261120)
+        # input 1
+        stft_44k = self.stft(mix_wave) # (batch, c*2, 1044, 256)
+        # forward
+        t_est_stft = self(stft_44k) # (batch, c, 1044, 256)
+        loss = self.comp_loss(t_est_stft, target_wave)
+        self.log("train/loss", loss, sync_dist=True, on_step=True, on_epoch=True, prog_bar=True)
+        return {"loss": loss}
+    # Validation SDR is calculated on whole tracks and not chunks since
+    # short inputs have high possibility of being silent (all-zero signal)
+    # which leads to very low sdr values regardless of the model.
+    # A natural procedure would be to split a track into chunk batches and
+    # load them on multiple gpus, but aggregation was too difficult.
+    # So instead we load one whole track on a single device (data_loader batch_size should always be 1)
+    # and do all the batch splitting and aggregation on a single device.
+    def validation_step(self, *args, **kwargs) -> Optional[STEP_OUTPUT]:
+        mix_chunk_batches, target = args[0]
+        # remove data_loader batch dimension
+        # [(b, c, time)], (c, all_times)
+        mix_chunk_batches, target = [batch[0] for batch in mix_chunk_batches], target[0]
+        # process whole track in batches of chunks
+        target_hat_chunks = []
+        for batch in mix_chunk_batches:
+            # input
+            stft_44k = self.stft(batch)  # (batch, c*2, 1044, 256)
+            pred_detail = self(stft_44k) # (batch, c, 1044, 256), irm
+            pred_detail = self.istft(pred_detail)
+            target_hat_chunks.append(pred_detail[..., self.overlap:-self.overlap])
+        target_hat_chunks = torch.cat(target_hat_chunks) # (b*len(ls),c,t)
+        # concat all output chunks (c, all_times)
+        target_hat = target_hat_chunks.transpose(0, 1).reshape(self.audio_ch, -1)[..., :target.shape[-1]]
+        ests = target_hat.detach().cpu().numpy()  # (c, all_times)
+        references = target.cpu().numpy()
+        score = sdr(ests, references)
+        # (src, t, c)
+        SDR = simplified_msseval(np.expand_dims(references.T, axis=0), np.expand_dims(ests.T, axis=0), chunk_size=44100)
+        # self.log("val/sdr", score, sync_dist=True, on_step=False, on_epoch=True, logger=True)
+        return {'song': score, 'chunk': SDR}
+    def validation_epoch_end(self, outputs) -> None:
+        avg_uSDR = torch.Tensor([x['song'] for x in outputs]).mean()
+        self.log("val/usdr", avg_uSDR, sync_dist=True, on_step=False, on_epoch=True, logger=True)
+        chunks = [x['chunk'][0, :] for x in outputs]
+        # concat np array
+        chunks = np.concatenate(chunks, axis=0)
+        median_cSDR = np.nanmedian(chunks.flatten(), axis=0)
+        median_cSDR = float(median_cSDR)
+        self.log("val/csdr", median_cSDR, sync_dist=True, on_step=False, on_epoch=True, logger=True)
+    def stft(self, x):
+        '''
+        Args:
+            x: (batch, c, 261120)
+        '''
+        dim_b = x.shape[0]
+        x = x.reshape([dim_b * self.audio_ch, -1]) # (batch*c, 261120)
+        x = torch.stft(x, n_fft=self.n_fft, hop_length=self.hop_length, window=self.window, center=True) # (batch*c, 3073, 256, 2)
+        x = x.permute([0, 3, 1, 2]) # (batch*c, 2, 3073, 256)
+        x = x.reshape([dim_b, self.audio_ch, 2, self.n_bins, -1]).reshape([dim_b, self.audio_ch * 2, self.n_bins, -1]) # (batch, c*2, 3073, 256)
+        return x[:, :, :self.dim_f] # (batch, c*2, 2048, 256)
+    def istft(self, x):
+        '''
+        Args:
+            x: (batch, c*2, 2048, 256)
+        '''
+        dim_b = x.shape[0]
+        x = torch.cat([x, self.freq_pad.repeat([x.shape[0], 1, 1, x.shape[-1]])], -2) # (batch, c*2, 3073, 256)
+        x = x.reshape([dim_b, self.audio_ch, 2, self.n_bins, -1]).reshape([dim_b * self.audio_ch, 2, self.n_bins, -1]) # (batch*c, 2, 3073, 256)
+        x = x.permute([0, 2, 3, 1]) # (batch*c, 3073, 256, 2)
+        x = torch.istft(x, n_fft=self.n_fft, hop_length=self.hop_length, window=self.window, center=True) # (batch*c, 261120)
+        return x.reshape([dim_b, self.audio_ch, -1]) # (batch,c,261120)
+    def demix(self, mix, inf_chunk_size, batch_size=5, inf_overf=4):
+        '''
+        Args:
+            mix: (C, L)
+        Returns:
+            est: (src, C, L)
+        '''
+        # batch_size = self.config.inference.batch_size
+        #  = self.chunk_size
+        # self.instruments = ['bass', 'drums', 'other', 'vocals']
+        num_instruments = 1
+        inf_hop = inf_chunk_size // inf_overf  # hop size
+        L = mix.shape[1]
+        pad_size = inf_hop - (L - inf_chunk_size) % inf_hop
+        mix = torch.cat([torch.zeros(2, inf_chunk_size - inf_hop), torch.Tensor(mix), torch.zeros(2, pad_size + inf_chunk_size - inf_hop)], 1)
+        mix = mix.cuda()
+        chunks = []
+        i = 0
+        while i + inf_chunk_size <= mix.shape[1]:
+            chunks.append(mix[:, i:i + inf_chunk_size])
+            i += inf_hop
+        chunks = torch.stack(chunks)
+        batches = []
+        i = 0
+        while i < len(chunks):
+            batches.append(chunks[i:i + batch_size])
+            i = i + batch_size
+        X = torch.zeros(num_instruments, 2, inf_chunk_size - inf_hop) # (src, c, t)
+        X = X.cuda()
+        with torch.cuda.amp.autocast():
+            with torch.no_grad():
+                for batch in batches:
+                    x = self.stft(batch)
+                    x = self(x)
+                    x = self.istft(x) # (batch, c, 261120)
+                    # insert new axis, the model only predict 1 src so we need to add axis
+                    x = x[:,None, ...] # (batch, 1, c, 261120)
+                    x = x.repeat([ 1, num_instruments, 1, 1]) # (batch, src, c, 261120)
+                    for w in x: # iterate over batch
+                        a = X[..., :-(inf_chunk_size - inf_hop)]
+                        b = X[..., -(inf_chunk_size - inf_hop):] + w[..., :(inf_chunk_size - inf_hop)]
+                        c = w[..., (inf_chunk_size - inf_hop):]
+                        X = torch.cat([a, b, c], -1)
+        estimated_sources = X[..., inf_chunk_size - inf_hop:-(pad_size + inf_chunk_size - inf_hop)] / inf_overf
+        assert L == estimated_sources.shape[-1]
+        return estimated_sources

src/dp_tdf/bandsequence.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import torch
+import torch.nn as nn
+# Original code from https://github.com/amanteur/BandSplitRNN-Pytorch
+class RNNModule(nn.Module):
+    """
+    RNN submodule of BandSequence module
+    """
+    def __init__(
+            self,
+            group_num: int,
+            input_dim_size: int,
+            hidden_dim_size: int,
+            rnn_type: str = 'lstm',
+            bidirectional: bool = True
+    ):
+        super(RNNModule, self).__init__()
+        self.groupnorm = nn.GroupNorm(group_num, input_dim_size)
+        self.rnn = getattr(nn, rnn_type)(
+            input_dim_size, hidden_dim_size, batch_first=True, bidirectional=bidirectional # 输出是2*hidden_dim_size，因为是bi
+        )
+        self.fc = nn.Linear(
+            hidden_dim_size * 2 if bidirectional else hidden_dim_size,
+            input_dim_size
+        )
+    def forward(
+            self,
+            x: torch.Tensor
+    ):
+        """
+        Input shape:
+            across T - [batch_size, k_subbands, time, n_features]
+            OR
+            across K - [batch_size, time, k_subbands, n_features]
+        """
+        B, K, T, N = x.shape  # across T      across K (keep in mind T->K, K->T)
+        # print(x.shape)
+        out = x.view(B * K, T, N)  # [BK, T, N]    [BT, K, N]
+        # print(out.shape)
+        # print(self.groupnorm)
+        out = self.groupnorm(
+            out.transpose(-1, -2)
+        ).transpose(-1, -2)  # [BK, T, N]    [BT, K, N]
+        out = self.rnn(out)[0]  # [BK, T, H]    [BT, K, H]， 最后一维是特征
+        out = self.fc(out)  # [BK, T, N]    [BT, K, N]
+        x = out.view(B, K, T, N) + x  # [B, K, T, N]  [B, T, K, N]
+        x = x.permute(0, 2, 1, 3).contiguous()  # [B, T, K, N]  [B, K, T, N]
+        return x
+class BandSequenceModelModule(nn.Module):
+    """
+    BandSequence (2nd) Module of BandSplitRNN.
+    Runs input through n BiLSTMs in two dimensions - time and subbands.
+    """
+    def __init__(
+            self,
+            # group_num,
+            input_dim_size: int,
+            hidden_dim_size: int,
+            rnn_type: str = 'lstm',
+            bidirectional: bool = True,
+            num_layers: int = 12,
+            n_heads: int = 4,
+    ):
+        super(BandSequenceModelModule, self).__init__()
+        self.bsrnn = nn.ModuleList([])
+        self.n_heads = n_heads
+        input_dim_size = input_dim_size // n_heads
+        hidden_dim_size = hidden_dim_size // n_heads
+        group_num = input_dim_size // 16
+        # print(f"input_dim_size: {input_dim_size}, hidden_dim_size: {hidden_dim_size}, group_num: {group_num}")
+        # print(group_num, input_dim_size)
+        for _ in range(num_layers):
+            rnn_across_t = RNNModule(
+                group_num, input_dim_size, hidden_dim_size, rnn_type, bidirectional
+            )
+            rnn_across_k = RNNModule(
+                group_num, input_dim_size, hidden_dim_size, rnn_type, bidirectional
+            )
+            self.bsrnn.append(
+                nn.Sequential(rnn_across_t, rnn_across_k)
+            )
+    def forward(self, x: torch.Tensor):
+        """
+        Input shape: [batch_size, k_subbands, time, n_features]
+        Output shape: [batch_size, k_subbands, time, n_features]
+        """
+        # x (b,c,t,f)
+        b,c,t,f = x.shape
+        x = x.view(b * self.n_heads, c // self.n_heads, t, f) # [b*n_heads, c//n_heads, t, f]
+        x = x.permute(0, 3, 2, 1).contiguous()  # [b*n_heads, f, t, c//n_heads]
+        for i in range(len(self.bsrnn)):
+            x = self.bsrnn[i](x)
+        x = x.permute(0, 3, 2, 1).contiguous()  # [b*n_heads, c//n_heads, t, f]
+        x = x.view(b, c, t, f)  # [b, c, t, f]
+        return x
+if __name__ == '__main__':
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    batch_size, k_subbands, t_timesteps, input_dim = 4, 41, 512, 128
+    in_features = torch.rand(batch_size, k_subbands, t_timesteps, input_dim).to(device)
+    cfg = {
+        # "t_timesteps": t_timesteps,
+        "group_num": 32,
+        "input_dim_size": 128,
+        "hidden_dim_size": 256,
+        "rnn_type": "LSTM",
+        "bidirectional": True,
+        "num_layers": 1
+    }
+    model = BandSequenceModelModule(**cfg).to(device)
+    _ = model.eval()
+    with torch.no_grad():
+        out_features = model(in_features)
+    print(f"In: {in_features.shape}\nOut: {out_features.shape}")
+    print(f"Total number of parameters: {sum([p.numel() for p in model.parameters()])}")

src/dp_tdf/dp_tdf_net.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import torch.nn as nn
+import torch
+from src.dp_tdf.modules import TFC_TDF, TFC_TDF_Res1, TFC_TDF_Res2
+from src.dp_tdf.bandsequence import BandSequenceModelModule
+from src.layers import (get_norm)
+from src.dp_tdf.abstract import AbstractModel
+class DPTDFNet(AbstractModel):
+    def __init__(self, num_blocks, l, g, k, bn, bias, bn_norm, bandsequence, block_type,  **kwargs):
+        super(DPTDFNet, self).__init__(**kwargs)
+        # self.save_hyperparameters()
+        self.num_blocks = num_blocks
+        self.l = l
+        self.g = g
+        self.k = k
+        self.bn = bn
+        self.bias = bias
+        self.n = num_blocks // 2
+        scale = (2, 2)
+        if block_type == "TFC_TDF":
+            T_BLOCK = TFC_TDF
+        elif block_type == "TFC_TDF_Res1":
+            T_BLOCK = TFC_TDF_Res1
+        elif block_type == "TFC_TDF_Res2":
+            T_BLOCK = TFC_TDF_Res2
+        else:
+            raise ValueError(f"Unknown block type {block_type}")
+        self.first_conv = nn.Sequential(
+            nn.Conv2d(in_channels=self.dim_c_in, out_channels=g, kernel_size=(1, 1)),
+            get_norm(bn_norm, g),
+            nn.ReLU(),
+        )
+        f = self.dim_f
+        c = g
+        self.encoding_blocks = nn.ModuleList()
+        self.ds = nn.ModuleList()
+        for i in range(self.n):
+            c_in = c
+            self.encoding_blocks.append(T_BLOCK(c_in, c, l, f, k, bn, bn_norm, bias=bias))
+            self.ds.append(
+                nn.Sequential(
+                    nn.Conv2d(in_channels=c, out_channels=c + g, kernel_size=scale, stride=scale),
+                    get_norm(bn_norm, c + g),
+                    nn.ReLU()
+                )
+            )
+            f = f // 2
+            c += g
+        self.bottleneck_block1 = T_BLOCK(c, c, l, f, k, bn, bn_norm, bias=bias)
+        self.bottleneck_block2 = BandSequenceModelModule(
+            **bandsequence,
+            input_dim_size=c,
+            hidden_dim_size=2*c
+        )
+        self.decoding_blocks = nn.ModuleList()
+        self.us = nn.ModuleList()
+        for i in range(self.n):
+            # print(f"i: {i}, in channels: {c}")
+            self.us.append(
+                nn.Sequential(
+                    nn.ConvTranspose2d(in_channels=c, out_channels=c - g, kernel_size=scale, stride=scale),
+                    get_norm(bn_norm, c - g),
+                    nn.ReLU()
+                )
+            )
+            f = f * 2
+            c -= g
+            self.decoding_blocks.append(T_BLOCK(c, c, l, f, k, bn, bn_norm, bias=bias))
+        self.final_conv = nn.Sequential(
+            nn.Conv2d(in_channels=c, out_channels=self.dim_c_out, kernel_size=(1, 1)),
+        )
+    def forward(self, x):
+        '''
+        Args:
+            x: (batch, c*2, 2048, 256)
+        '''
+        x = self.first_conv(x)
+        x = x.transpose(-1, -2)
+        ds_outputs = []
+        for i in range(self.n):
+            x = self.encoding_blocks[i](x)
+            ds_outputs.append(x)
+            x = self.ds[i](x)
+        # print(f"bottleneck in: {x.shape}")
+        x = self.bottleneck_block1(x)
+        x = self.bottleneck_block2(x)
+        for i in range(self.n):
+            x = self.us[i](x)
+            # print(f"us{i} in: {x.shape}")
+            # print(f"ds{i} out: {ds_outputs[-i - 1].shape}")
+            x = x * ds_outputs[-i - 1]
+            x = self.decoding_blocks[i](x)
+        x = x.transpose(-1, -2)
+        x = self.final_conv(x)
+        return x

src/dp_tdf/modules.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import torch
+import torch.nn as nn
+from src.layers import (get_norm)
+class TFC(nn.Module):
+    def __init__(self, c_in, c_out, l, k, bn_norm):
+        super(TFC, self).__init__()
+        self.H = nn.ModuleList()
+        for i in range(l):
+            if i == 0:
+                c_in = c_in
+            else:
+                c_in = c_out
+            self.H.append(
+                nn.Sequential(
+                    nn.Conv2d(in_channels=c_in, out_channels=c_out, kernel_size=k, stride=1, padding=k // 2),
+                    get_norm(bn_norm, c_out),
+                    nn.ReLU(),
+                )
+            )
+    def forward(self, x):
+        for h in self.H:
+            x = h(x)
+        return x
+class DenseTFC(nn.Module):
+    def __init__(self, c_in, c_out, l, k, bn_norm):
+        super(DenseTFC, self).__init__()
+        self.conv = nn.ModuleList()
+        for i in range(l):
+            self.conv.append(
+                nn.Sequential(
+                    nn.Conv2d(in_channels=c_in, out_channels=c_out, kernel_size=k, stride=1, padding=k // 2),
+                    get_norm(bn_norm, c_out),
+                    nn.ReLU(),
+                )
+            )
+    def forward(self, x):
+        for layer in self.conv[:-1]:
+            x = torch.cat([layer(x), x], 1)
+        return self.conv[-1](x)
+class TFC_TDF(nn.Module):
+    def __init__(self, c_in, c_out, l, f, k, bn, bn_norm, dense=False, bias=True):
+        super(TFC_TDF, self).__init__()
+        self.use_tdf = bn is not None
+        self.tfc = DenseTFC(c_in, c_out, l, k, bn_norm) if dense else TFC(c_in, c_out, l, k, bn_norm)
+        if self.use_tdf:
+            if bn == 0:
+                # print(f"TDF={f},{f}")
+                self.tdf = nn.Sequential(
+                    nn.Linear(f, f, bias=bias),
+                    get_norm(bn_norm, c_out),
+                    nn.ReLU()
+                )
+            else:
+                # print(f"TDF={f},{f // bn},{f}")
+                self.tdf = nn.Sequential(
+                    nn.Linear(f, f // bn, bias=bias),
+                    get_norm(bn_norm, c_out),
+                    nn.ReLU(),
+                    nn.Linear(f // bn, f, bias=bias),
+                    get_norm(bn_norm, c_out),
+                    nn.ReLU()
+                )
+    def forward(self, x):
+        x = self.tfc(x)
+        return x + self.tdf(x) if self.use_tdf else x
+class TFC_TDF_Res1(nn.Module):
+    def __init__(self, c_in, c_out, l, f, k, bn, bn_norm, dense=False, bias=True):
+        super(TFC_TDF_Res1, self).__init__()
+        self.use_tdf = bn is not None
+        self.tfc = DenseTFC(c_in, c_out, l, k, bn_norm) if dense else TFC(c_in, c_out, l, k, bn_norm)
+        self.res = TFC(c_in, c_out, 1, k, bn_norm)
+        if self.use_tdf:
+            if bn == 0:
+                # print(f"TDF={f},{f}")
+                self.tdf = nn.Sequential(
+                    nn.Linear(f, f, bias=bias),
+                    get_norm(bn_norm, c_out),
+                    nn.ReLU()
+                )
+            else:
+                # print(f"TDF={f},{f // bn},{f}")
+                self.tdf = nn.Sequential(
+                    nn.Linear(f, f // bn, bias=bias),
+                    get_norm(bn_norm, c_out),
+                    nn.ReLU(),
+                    nn.Linear(f // bn, f, bias=bias),
+                    get_norm(bn_norm, c_out),
+                    nn.ReLU()
+                )
+    def forward(self, x):
+        res = self.res(x)
+        x = self.tfc(x)
+        x = x + res
+        return x + self.tdf(x) if self.use_tdf else x
+class TFC_TDF_Res2(nn.Module):
+    def __init__(self, c_in, c_out, l, f, k, bn, bn_norm, dense=False, bias=True):
+        super(TFC_TDF_Res2, self).__init__()
+        self.use_tdf = bn is not None
+        self.tfc1 = TFC(c_in, c_out, l, k, bn_norm)
+        self.tfc2 = TFC(c_in, c_out, l, k, bn_norm)
+        self.res = TFC(c_in, c_out, 1, k, bn_norm)
+        if self.use_tdf:
+            if bn == 0:
+                # print(f"TDF={f},{f}")
+                self.tdf = nn.Sequential(
+                    nn.Linear(f, f, bias=bias),
+                    get_norm(bn_norm, c_out),
+                    nn.ReLU()
+                )
+            else:
+                # print(f"TDF={f},{f // bn},{f}")
+                self.tdf = nn.Sequential(
+                    nn.Linear(f, f // bn, bias=bias),
+                    get_norm(bn_norm, c_out),
+                    nn.ReLU(),
+                    nn.Linear(f // bn, f, bias=bias),
+                    get_norm(bn_norm, c_out),
+                    nn.ReLU()
+                )
+    def forward(self, x):
+        res = self.res(x)
+        x = self.tfc1(x)
+        if self.use_tdf:
+            x = x + self.tdf(x)
+        x = self.tfc2(x)
+        x = x + res
+        return x

src/evaluation/eval.py ADDED Viewed

	@@ -0,0 +1,120 @@

+from os import listdir
+from pathlib import Path
+from typing import Optional, List
+from concurrent import futures
+import hydra
+import wandb
+import os
+import shutil
+from omegaconf import DictConfig
+from pytorch_lightning import LightningDataModule, LightningModule
+from pytorch_lightning.loggers import Logger, WandbLogger
+import soundfile as sf
+from tqdm import tqdm
+import numpy as np
+from src.callbacks.wandb_callbacks import get_wandb_logger
+from src.evaluation.separate import separate_with_onnx_TDF, separate_with_ckpt_TDF
+from src.utils import utils
+from src.utils.utils import load_wav, sdr, get_median_csdr, save_results, get_metrics
+from src.utils import pylogger
+log = pylogger.get_pylogger(__name__)
+def evaluation(config: DictConfig):
+    assert config.split in ['train', 'valid', 'test']
+    data_dir = Path(config.get('eval_dir')).joinpath(config['split'])
+    assert data_dir.exists()
+    # Init Lightning loggers
+    loggers: List[Logger] = []
+    if "logger" in config:
+        for _, lg_conf in config.logger.items():
+            if "_target_" in lg_conf:
+                log.info(f"Instantiating logger <{lg_conf._target_}>")
+                loggers.append(hydra.utils.instantiate(lg_conf))
+        if any([isinstance(l, WandbLogger) for l in loggers]):
+            utils.wandb_login(key=config.wandb_api_key)
+    model = hydra.utils.instantiate(config.model)
+    target_name = model.target_name
+    ckpt_path = Path(config.ckpt_path)
+    is_onnx = os.path.split(ckpt_path)[-1].split('.')[-1] == 'onnx'
+    shutil.copy(ckpt_path,os.getcwd()) # copy model
+    ssdrs = []
+    bss_lst = []
+    bss_perms = []
+    num_tracks = len(listdir(data_dir))
+    target_list = [config.model.target_name,"complement"]
+    pool = futures.ProcessPoolExecutor
+    with pool(config.pool_workers) as pool:
+        datas = sorted(listdir(data_dir))
+        if len(datas) > 27: # if not debugging
+            # move idx 27 to head
+            datas = [datas[27]] + datas[:27] + datas[28:]
+        # iterate datas with batchsize 8
+        for k in range(0, len(datas), config.pool_workers):
+            batch = datas[k:k + config.pool_workers]
+            pendings = []
+            for i, track in tqdm(enumerate(batch)):
+                folder_name = track
+                track = data_dir.joinpath(track)
+                mixture = load_wav(track.joinpath('mixture.wav')) # (c, t)
+                target = load_wav(track.joinpath(target_name + '.wav'))
+                if model.audio_ch == 1:
+                    mixture = np.mean(mixture, axis=0, keepdims=True)
+                    target = np.mean(target, axis=0, keepdims=True)
+                #target_hat = {source: separate(config['batch_size'], models[source], onnxs[source], mixture) for source in sources}
+                if is_onnx:
+                    target_hat = separate_with_onnx_TDF(config.batch_size, model, ckpt_path, mixture)
+                else:
+                    target_hat = separate_with_ckpt_TDF(config.batch_size, model, ckpt_path, mixture, config.device, config.double_chunk, config.overlap_add)
+                pendings.append((folder_name, pool.submit(
+                    get_metrics, target_hat, target, mixture, sr=44100,version=config.bss)))
+                for wandb_logger in [logger for logger in loggers if isinstance(logger, WandbLogger)]:
+                    mid = mixture.shape[-1] // 2
+                    track = target_hat[:, mid - 44100 * 3:mid + 44100 * 3]
+                    wandb_logger.experiment.log(
+                        {f'track={k+i}_target={target_name}': [wandb.Audio(track.T, sample_rate=44100)]})
+            for i, (track_name, pending) in tqdm(enumerate(pendings)):
+                pending = pending.result()
+                bssmetrics, perms, ssdr = pending
+                bss_lst.append(bssmetrics)
+                bss_perms.append(perms)
+                ssdrs.append(ssdr)
+                for logger in loggers:
+                    logger.log_metrics({'song/ssdr': ssdr}, k+i)
+                    logger.log_metrics({'song/csdr': get_median_csdr([bssmetrics])}, k+i)
+    log_dir = os.getcwd()
+    save_results(log_dir, bss_lst, target_list, bss_perms, ssdrs)
+    cSDR = get_median_csdr(bss_lst)
+    uSDR = sum(ssdrs)/num_tracks
+    for logger in loggers:
+        logger.log_metrics({'metrics/mean_sdr_' + target_name: sum(ssdrs)/num_tracks})
+        logger.log_metrics({'metrics/median_csdr_' + target_name: get_median_csdr(bss_lst)})
+        # get the path of the log dir
+        if not isinstance(logger, WandbLogger):
+            logger.experiment.close()
+    if any([isinstance(logger, WandbLogger) for logger in loggers]):
+        wandb.finish()
+    return cSDR, uSDR

src/evaluation/eval_demo.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from os import listdir
+from pathlib import Path
+from typing import Optional, List
+from concurrent import futures
+import hydra
+import wandb
+import os
+import shutil
+from omegaconf import DictConfig
+from pytorch_lightning import LightningDataModule, LightningModule
+from pytorch_lightning.loggers import Logger, WandbLogger
+from tqdm import tqdm
+import numpy as np
+from src.callbacks.wandb_callbacks import get_wandb_logger
+from src.evaluation.separate import separate_with_onnx_TDF, separate_with_ckpt_TDF
+from src.utils import utils
+from src.utils.utils import load_wav, sdr, get_median_csdr, save_results, get_metrics
+from src.utils import pylogger
+import soundfile as sf
+log = pylogger.get_pylogger(__name__)
+def evaluation(config: DictConfig, idx):
+    assert config.split in ['train', 'valid', 'test']
+    data_dir = Path(config.get('eval_dir')).joinpath(config['split'])
+    assert data_dir.exists()
+    model = hydra.utils.instantiate(config.model)
+    target_name = model.target_name
+    ckpt_path = Path(config.ckpt_path)
+    is_onnx = os.path.split(ckpt_path)[-1].split('.')[-1] == 'onnx'
+    shutil.copy(ckpt_path,os.getcwd()) # copy model
+    datas = sorted(listdir(data_dir))
+    if len(datas) > 27: # if not debugging
+        # move idx 27 to head
+        datas = [datas[27]] + datas[:27] + datas[28:]
+    track = datas[idx]
+    track = data_dir.joinpath(track)
+    print(track)
+    mixture = load_wav(track.joinpath('mixture.wav')) # (c, t)
+    target = load_wav(track.joinpath(target_name + '.wav'))
+    if model.audio_ch == 1:
+        mixture = np.mean(mixture, axis=0, keepdims=True)
+        target = np.mean(target, axis=0, keepdims=True)
+    #target_hat = {source: separate(config['batch_size'], models[source], onnxs[source], mixture) for source in sources}
+    if is_onnx:
+        target_hat = separate_with_onnx_TDF(config.batch_size, model, ckpt_path, mixture)
+    else:
+        target_hat = separate_with_ckpt_TDF(config.batch_size, model, ckpt_path, mixture, config.device, config.double_chunk, overlap_factor=config.overlap_factor)
+    bssmetrics, perms, ssdr = get_metrics(target_hat, target, mixture, sr=44100,version=config.bss)
+    # dump bssmetrics into pkl
+    import pickle
+    with open(os.path.join(os.getcwd(),'bssmetrics.pkl'),'wb') as f:
+        pickle.dump(bssmetrics,f)
+    return bssmetrics

src/evaluation/separate.py ADDED Viewed

	@@ -0,0 +1,193 @@

+from os import listdir
+from pathlib import Path
+import torch
+import numpy as np
+import onnxruntime as ort
+import math
+import os
+from src.utils.utils import split_nparray_with_overlap, join_chunks
+def separate_with_onnx(batch_size, model, onnx_path: Path, mix):
+    n_sample = mix.shape[1]
+    trim = model.n_fft // 2
+    gen_size = model.sampling_size - 2 * trim
+    pad = gen_size - n_sample % gen_size
+    mix_p = np.concatenate((np.zeros((2, trim)), mix, np.zeros((2, pad)), np.zeros((2, trim))), 1)
+    mix_waves = []
+    i = 0
+    while i < n_sample + pad:
+        waves = np.array(mix_p[:, i:i + model.sampling_size], dtype=np.float32)
+        mix_waves.append(waves)
+        i += gen_size
+    mix_waves_batched = torch.tensor(mix_waves, dtype=torch.float32).split(batch_size)
+    tar_signals = []
+    with torch.no_grad():
+        _ort = ort.InferenceSession(str(onnx_path))
+        for mix_waves in mix_waves_batched:
+            tar_waves = model.istft(torch.tensor(
+                _ort.run(None, {'input': model.stft(mix_waves).numpy()})[0]
+            ))
+            tar_signals.append(tar_waves[:, :, trim:-trim].transpose(0, 1).reshape(2, -1).numpy())
+        tar_signal = np.concatenate(tar_signals, axis=-1)[:, :-pad]
+    return tar_signal
+def separate_with_ckpt(batch_size, model, ckpt_path: Path, mix, device, double_chunk):
+    model = model.load_from_checkpoint(ckpt_path).to(device)
+    if double_chunk:
+        inf_ck = model.inference_chunk_size
+    else:
+        inf_ck = model.sampling_size
+    true_samples = inf_ck - 2 * model.trim
+    right_pad = true_samples + model.trim - ((mix.shape[-1]) % true_samples)
+    mixture = np.concatenate((np.zeros((2, model.trim), dtype='float32'),
+                              mix,
+                              np.zeros((2, right_pad), dtype='float32')),
+                             1)
+    num_chunks = mixture.shape[-1] // true_samples
+    mix_waves_batched = [mixture[:, i * true_samples: i * true_samples + inf_ck] for i in
+                         range(num_chunks)]
+    mix_waves_batched = torch.tensor(mix_waves_batched, dtype=torch.float32).split(batch_size)
+    target_wav_hats = []
+    with torch.no_grad():
+        model.eval()
+        for mixture_wav in mix_waves_batched:
+            mix_spec = model.stft(mixture_wav.to(device))
+            spec_hat = model(mix_spec)
+            target_wav_hat = model.istft(spec_hat)
+            target_wav_hat = target_wav_hat.cpu().detach().numpy()
+            target_wav_hats.append(target_wav_hat)
+        target_wav_hat = np.vstack(target_wav_hats)[:, :, model.trim:-model.trim]
+        target_wav_hat = np.concatenate(target_wav_hat, axis=-1)[:, :mix.shape[-1]]
+    return target_wav_hat
+def separate_with_onnx_TDF(batch_size, model, onnx_path: Path, mix):
+    n_sample = mix.shape[1]
+    overlap = model.n_fft // 2
+    gen_size = model.inference_chunk_size - 2 * overlap
+    pad = gen_size - n_sample % gen_size
+    mix_p = np.concatenate((np.zeros((2, overlap)), mix, np.zeros((2, pad)), np.zeros((2, overlap))), 1)
+    mix_waves = []
+    i = 0
+    while i < n_sample + pad:
+        waves = np.array(mix_p[:, i:i + model.inference_chunk_size], dtype=np.float32)
+        mix_waves.append(waves)
+        i += gen_size
+    mix_waves_batched = torch.tensor(mix_waves, dtype=torch.float32).split(batch_size)
+    tar_signals = []
+    with torch.no_grad():
+        _ort = ort.InferenceSession(str(onnx_path), providers=['CUDAExecutionProvider'])
+        for mix_waves in mix_waves_batched:
+            tar_waves = model.istft(torch.tensor(
+                _ort.run(None, {'input': model.stft(mix_waves).numpy()})[0]
+            ))
+            tar_signals.append(tar_waves[:, :, overlap:-overlap].transpose(0, 1).reshape(2, -1).numpy())
+        tar_signal = np.concatenate(tar_signals, axis=-1)[:, :-pad]
+    return tar_signal
+def separate_with_ckpt_TDF(batch_size, model, ckpt_path: Path, mix, device, double_chunk, overlap_add):
+    '''
+    Args:
+        batch_size: the inference batch size
+        model: the model to be used
+        ckpt_path: the path to the checkpoint
+        mix: (c, t)
+        device: the device to be used
+        double_chunk: whether to use double chunk size
+    Returns:
+        target_wav_hat: (c, t)
+    '''
+    checkpoint = torch.load(ckpt_path)
+    model.load_state_dict(checkpoint["state_dict"])
+    model = model.to(device)
+    # model = model.load_from_checkpoint(ckpt_path).to(device)
+    if double_chunk:
+        inf_ck = model.inference_chunk_size
+    else:
+        inf_ck = model.chunk_size
+    if overlap_add is None:
+        target_wav_hat = no_overlap_inference(model, mix, device, batch_size, inf_ck)
+    else:
+        if not os.path.exists(overlap_add.tmp_root):
+            os.makedirs(overlap_add.tmp_root)
+        target_wav_hat = overlap_inference(model, mix, device, batch_size, inf_ck, overlap_add.overlap_rate, overlap_add.tmp_root, overlap_add.samplerate)
+    return target_wav_hat
+def no_overlap_inference(model, mix, device, batch_size, inf_ck):
+    true_samples = inf_ck - 2 * model.overlap
+    right_pad = true_samples + model.overlap - ((mix.shape[-1]) % true_samples)
+    mixture = np.concatenate((np.zeros((model.audio_ch, model.overlap), dtype='float32'),
+                              mix,
+                              np.zeros((model.audio_ch, right_pad), dtype='float32')),
+                             1)
+    num_chunks = mixture.shape[-1] // true_samples
+    mix_waves_batched = [mixture[:, i * true_samples: i * true_samples + inf_ck] for i in
+                         range(num_chunks)]
+    mix_waves_batched = torch.tensor(mix_waves_batched, dtype=torch.float32).split(batch_size)
+    target_wav_hats = []
+    with torch.no_grad():
+        model.eval()
+        for mixture_wav in mix_waves_batched:
+            mix_spec = model.stft(mixture_wav.to(device))
+            spec_hat = model(mix_spec)
+            target_wav_hat = model.istft(spec_hat)
+            target_wav_hat = target_wav_hat.cpu().detach().numpy()
+            target_wav_hats.append(target_wav_hat) # (b, c, t)
+        target_wav_hat = np.vstack(target_wav_hats)[:, :, model.overlap:-model.overlap] # (sum(b), c, t)
+        target_wav_hat = np.concatenate(target_wav_hat, axis=-1)[:, :mix.shape[-1]]
+    return target_wav_hat
+def overlap_inference(model, mix, device, batch_size, inf_ck, overlap_rate, tmp_root, samplerate):
+    '''
+    Args:
+        mix: (c, t)
+    '''
+    hop_length = math.ceil((1 - overlap_rate) * inf_ck)
+    overlap_size = inf_ck - hop_length
+    step_t = mix.shape[1]
+    mix_waves_batched = split_nparray_with_overlap(mix.T, hop_length, overlap_size)
+    mix_waves_batched = torch.tensor(mix_waves_batched, dtype=torch.float32).split(batch_size) # [(b, c, t)]
+    target_wav_hats = []
+    with torch.no_grad():
+        model.eval()
+        for mixture_wav in mix_waves_batched:
+            mix_spec = model.stft(mixture_wav.to(device))
+            spec_hat = model(mix_spec)
+            target_wav_hat = model.istft(spec_hat)
+            target_wav_hat = target_wav_hat.cpu().detach().numpy()
+            target_wav_hats.append(target_wav_hat) # (b, c, t)
+        target_wav_hat = np.vstack(target_wav_hats) # (sum(b), c, t)
+        target_wav_hat = np.transpose(target_wav_hat, (0, 2, 1)) # (sum(b), t, c)
+        target_wav_hat = join_chunks(tmp_root, target_wav_hat, samplerate, overlap_size) # (t, c)
+    return target_wav_hat[:step_t].T # (c, t)

src/layers/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+
2	+ from .batch_norm import *

src/layers/batch_norm.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import logging
+import torch
+import torch.nn.functional as F
+from torch import nn
+__all__ = ["IBN", "get_norm"]
+class BatchNorm(nn.BatchNorm2d):
+    def __init__(self, num_features, eps=1e-05, momentum=0.1, weight_freeze=False, bias_freeze=False, weight_init=1.0,
+                 bias_init=0.0, **kwargs):
+        super().__init__(num_features, eps=eps, momentum=momentum)
+        if weight_init is not None: nn.init.constant_(self.weight, weight_init)
+        if bias_init is not None: nn.init.constant_(self.bias, bias_init)
+        self.weight.requires_grad_(not weight_freeze)
+        self.bias.requires_grad_(not bias_freeze)
+class SyncBatchNorm(nn.SyncBatchNorm):
+    def __init__(self, num_features, eps=1e-05, momentum=0.1, weight_freeze=False, bias_freeze=False, weight_init=1.0,
+                 bias_init=0.0):
+        super().__init__(num_features, eps=eps, momentum=momentum)
+        if weight_init is not None: nn.init.constant_(self.weight, weight_init)
+        if bias_init is not None: nn.init.constant_(self.bias, bias_init)
+        self.weight.requires_grad_(not weight_freeze)
+        self.bias.requires_grad_(not bias_freeze)
+class IBN(nn.Module):
+    def __init__(self, planes, bn_norm, **kwargs):
+        super(IBN, self).__init__()
+        half1 = int(planes / 2)
+        self.half = half1
+        half2 = planes - half1
+        self.IN = nn.InstanceNorm2d(half1, affine=True)
+        self.BN = get_norm(bn_norm, half2, **kwargs)
+    def forward(self, x):
+        split = torch.split(x, self.half, 1)
+        out1 = self.IN(split[0].contiguous())
+        out2 = self.BN(split[1].contiguous())
+        out = torch.cat((out1, out2), 1)
+        return out
+class GhostBatchNorm(BatchNorm):
+    def __init__(self, num_features, num_splits=1, **kwargs):
+        super().__init__(num_features, **kwargs)
+        self.num_splits = num_splits
+        self.register_buffer('running_mean', torch.zeros(num_features))
+        self.register_buffer('running_var', torch.ones(num_features))
+    def forward(self, input):
+        N, C, H, W = input.shape
+        if self.training or not self.track_running_stats:
+            self.running_mean = self.running_mean.repeat(self.num_splits)
+            self.running_var = self.running_var.repeat(self.num_splits)
+            outputs = F.batch_norm(
+                input.view(-1, C * self.num_splits, H, W), self.running_mean, self.running_var,
+                self.weight.repeat(self.num_splits), self.bias.repeat(self.num_splits),
+                True, self.momentum, self.eps).view(N, C, H, W)
+            self.running_mean = torch.mean(self.running_mean.view(self.num_splits, self.num_features), dim=0)
+            self.running_var = torch.mean(self.running_var.view(self.num_splits, self.num_features), dim=0)
+            return outputs
+        else:
+            return F.batch_norm(
+                input, self.running_mean, self.running_var,
+                self.weight, self.bias, False, self.momentum, self.eps)
+class FrozenBatchNorm(nn.Module):
+    """
+    BatchNorm2d where the batch statistics and the affine parameters are fixed.
+    It contains non-trainable buffers called
+    "weight" and "bias", "running_mean", "running_var",
+    initialized to perform identity transformation.
+    The pre-trained backbone models from Caffe2 only contain "weight" and "bias",
+    which are computed from the original four parameters of BN.
+    The affine transform `x * weight + bias` will perform the equivalent
+    computation of `(x - running_mean) / sqrt(running_var) * weight + bias`.
+    When loading a backbone model from Caffe2, "running_mean" and "running_var"
+    will be left unchanged as identity transformation.
+    Other pre-trained backbone models may contain all 4 parameters.
+    The forward is implemented by `F.batch_norm(..., training=False)`.
+    """
+    _version = 3
+    def __init__(self, num_features, eps=1e-5, **kwargs):
+        super().__init__()
+        self.num_features = num_features
+        self.eps = eps
+        self.register_buffer("weight", torch.ones(num_features))
+        self.register_buffer("bias", torch.zeros(num_features))
+        self.register_buffer("running_mean", torch.zeros(num_features))
+        self.register_buffer("running_var", torch.ones(num_features) - eps)
+    def forward(self, x):
+        if x.requires_grad:
+            # When gradients are needed, F.batch_norm will use extra memory
+            # because its backward op computes gradients for weight/bias as well.
+            scale = self.weight * (self.running_var + self.eps).rsqrt()
+            bias = self.bias - self.running_mean * scale
+            scale = scale.reshape(1, -1, 1, 1)
+            bias = bias.reshape(1, -1, 1, 1)
+            return x * scale + bias
+        else:
+            # When gradients are not needed, F.batch_norm is a single fused op
+            # and provide more optimization opportunities.
+            return F.batch_norm(
+                x,
+                self.running_mean,
+                self.running_var,
+                self.weight,
+                self.bias,
+                training=False,
+                eps=self.eps,
+            )
+    def _load_from_state_dict(
+            self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
+    ):
+        version = local_metadata.get("version", None)
+        if version is None or version < 2:
+            # No running_mean/var in early versions
+            # This will silent the warnings
+            if prefix + "running_mean" not in state_dict:
+                state_dict[prefix + "running_mean"] = torch.zeros_like(self.running_mean)
+            if prefix + "running_var" not in state_dict:
+                state_dict[prefix + "running_var"] = torch.ones_like(self.running_var)
+        if version is not None and version < 3:
+            logger = logging.getLogger(__name__)
+            logger.info("FrozenBatchNorm {} is upgraded to version 3.".format(prefix.rstrip(".")))
+            # In version < 3, running_var are used without +eps.
+            state_dict[prefix + "running_var"] -= self.eps
+        super()._load_from_state_dict(
+            state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
+        )
+    def __repr__(self):
+        return "FrozenBatchNorm2d(num_features={}, eps={})".format(self.num_features, self.eps)
+    @classmethod
+    def convert_frozen_batchnorm(cls, module):
+        """
+        Convert BatchNorm/SyncBatchNorm in module into FrozenBatchNorm.
+        Args:
+            module (torch.nn.Module):
+        Returns:
+            If module is BatchNorm/SyncBatchNorm, returns a new module.
+            Otherwise, in-place convert module and return it.
+        Similar to convert_sync_batchnorm in
+        https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py
+        """
+        bn_module = nn.modules.batchnorm
+        bn_module = (bn_module.BatchNorm2d, bn_module.SyncBatchNorm)
+        res = module
+        if isinstance(module, bn_module):
+            res = cls(module.num_features)
+            if module.affine:
+                res.weight.data = module.weight.data.clone().detach()
+                res.bias.data = module.bias.data.clone().detach()
+            res.running_mean.data = module.running_mean.data
+            res.running_var.data = module.running_var.data
+            res.eps = module.eps
+        else:
+            for name, child in module.named_children():
+                new_child = cls.convert_frozen_batchnorm(child)
+                if new_child is not child:
+                    res.add_module(name, new_child)
+        return res
+def get_norm(norm, out_channels, **kwargs):
+    """
+    Args:
+        norm (str or callable): either one of BN, GhostBN, FrozenBN, GN or SyncBN;
+            or a callable that takes a channel number and returns
+            the normalization layer as a nn.Module
+        out_channels: number of channels for normalization layer
+    Returns:
+        nn.Module or None: the normalization layer
+    """
+    # return nn.BatchNorm2d(out_channels)
+    if isinstance(norm, str):
+        if len(norm) == 0:
+            return None
+        norm = {
+            "BN": BatchNorm,
+            "syncBN": SyncBatchNorm,
+            "GhostBN": GhostBatchNorm,
+            "FrozenBN": FrozenBatchNorm,
+            "GN": lambda channels, **args: nn.GroupNorm(32, channels),
+        }[norm]
+    return norm(out_channels, **kwargs)

src/layers/chunk_size.py ADDED Viewed

	@@ -0,0 +1,53 @@

+#%%
+import numpy as np
+import torch
+def wave_to_batches(mix, inf_ck, overlap, batch_size):
+    '''
+    Args:
+        mix: (2, N) numpy array
+        inf_ck: int, the chunk size as the model input (contains 2*overlap)
+                inf_ck = overlap + true_samples + overlap
+        overlap: int, the discarded samples at each side
+    Returns:
+        a tuples of batches, each batch is a (batch, 2, inf_ck) torch tensor
+    '''
+    true_samples = inf_ck - 2 * overlap
+    channels = mix.shape[0]
+    right_pad = true_samples + overlap - ((mix.shape[-1]) % true_samples)
+    mixture = np.concatenate((np.zeros((channels, overlap), dtype='float32'),
+                              mix,
+                              np.zeros((channels, right_pad), dtype='float32')),
+                             1)
+    num_chunks = mixture.shape[-1] // true_samples
+    mix_waves_batched = np.array([mixture[:, i * true_samples: i * true_samples + inf_ck] for i in
+                         range(num_chunks)]) # (x,2,inf_ck)
+    return torch.tensor(mix_waves_batched, dtype=torch.float32).split(batch_size)
+def batches_to_wave(target_hat_chunks, overlap, org_len):
+    '''
+    Args:
+        target_hat_chunks: a list of (batch, 2, inf_ck) torch tensors
+        overlap: int, the discarded samples at each side
+        org_len: int, the original length of the mixture
+    Returns:
+        (2, N) numpy array
+    '''
+    target_hat_chunks = [c[..., overlap:-overlap] for c in target_hat_chunks]
+    target_hat_chunks = torch.cat(target_hat_chunks)
+    # concat all output chunks
+    return target_hat_chunks.transpose(0, 1).reshape(2, -1)[..., :org_len].detach().cpu().numpy()
+if __name__ == '__main__':
+    mix = np.random.rand(2, 14318640)
+    inf_ck = 261120
+    overlap = 3072
+    batch_size = 8
+    out = wave_to_batches(mix, inf_ck, overlap, batch_size)
+    in_wav = batches_to_wave(out, overlap, mix.shape[-1])
+    print(in_wav.shape)

src/train.py ADDED Viewed

	@@ -0,0 +1,152 @@

+from typing import List, Optional
+import hydra
+import pytorch_lightning as pl
+import pyrootutils
+import torch
+import os
+import shutil
+from omegaconf import DictConfig
+from pytorch_lightning import (
+    Callback,
+    LightningDataModule,
+    LightningModule,
+    Trainer,
+    seed_everything,
+)
+from pytorch_lightning.loggers import WandbLogger
+from hydra.core.hydra_config import HydraConfig
+from src import utils
+pyrootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
+log = utils.get_pylogger(__name__)
+@utils.task_wrapper
+def train(cfg: DictConfig) -> Optional[float]:
+    """Contains training pipeline.
+    Instantiates all PyTorch Lightning objects from config.
+    Args:
+        cfg (DictConfig): Configuration composed by Hydra.
+    Returns:
+        Optional[float]: Metric score for hyperparameter optimization.
+    """
+    # Set seed for random number generators in pytorch, numpy and python.random
+    try:
+        if "seed" in cfg:
+            # set seed for random number generators in pytorch, numpy and python.random
+            if cfg.get("seed"):
+                pl.seed_everything(cfg.seed, workers=True)
+        else:
+            raise ModuleNotFoundError
+    except ModuleNotFoundError:
+        print('[Error] seed should be fixed for reproducibility \n=> e.g. python run.py +seed=$SEED')
+        exit(-1)
+    # Init Lightning datamodule
+    log.info(f"Instantiating datamodule <{cfg.datamodule._target_}>")
+    datamodule: LightningDataModule = hydra.utils.instantiate(cfg.datamodule)
+    # Init Lightning model
+    log.info(f"Instantiating model <{cfg.model._target_}>")
+    model: LightningModule = hydra.utils.instantiate(cfg.model)
+    # Init Lightning callbacks
+    callbacks: List[Callback] = []
+    if "callbacks" in cfg:
+        for _, cb_conf in cfg["callbacks"].items():
+            if "_target_" in cb_conf:
+                log.info(f"Instantiating callback <{cb_conf._target_}>")
+                callbacks.append(hydra.utils.instantiate(cb_conf))
+    # Init Lightning loggers
+    if "resume_from_checkpoint" in cfg.trainer:
+        ckpt_path = cfg.trainer.resume_from_checkpoint
+        # get the parent directory of the checkpoint path
+        log_dir = os.path.dirname(os.path.dirname(ckpt_path))
+        tensorboard_dir = os.path.join(log_dir, "tensorboard")
+        if os.path.exists(tensorboard_dir):
+            # copy tensorboard dir to the parent directory of the checkpoint path
+            # HydraConfig.get().run.dir returns new dir so do not use it! (now fixed)
+            shutil.copytree(tensorboard_dir,os.path.join(os.getcwd(),"tensorboard"))
+        wandb_dir = os.path.join(log_dir, "wandb")
+        if os.path.exists(wandb_dir):
+            shutil.copytree(wandb_dir,os.path.join(os.getcwd(),"wandb"))
+    logger: List = []
+    if "logger" in cfg:
+        for _, lg_conf in cfg["logger"].items():
+            if "_target_" in lg_conf:
+                log.info(f"Instantiating logger <{lg_conf._target_}>")
+                logger.append(hydra.utils.instantiate(lg_conf))
+        for wandb_logger in [l for l in logger if isinstance(l, WandbLogger)]:
+            utils.wandb_login(key=cfg.wandb_api_key)
+            # utils.wandb_watch_all(wandb_logger, model) # TODO buggy
+            break
+    # Init Lightning trainer
+    log.info(f"Instantiating trainer <{cfg.trainer._target_}>")
+    # get env variable use_gloo
+    use_gloo = os.environ.get("USE_GLOO", False)
+    if use_gloo:
+        from pytorch_lightning.strategies import DDPStrategy
+        ddp = DDPStrategy(process_group_backend='gloo')
+        trainer: Trainer = hydra.utils.instantiate(
+            cfg.trainer, strategy=ddp, callbacks=callbacks, logger=logger, _convert_="partial"
+        )
+    else:
+        trainer: Trainer = hydra.utils.instantiate(
+            cfg.trainer, callbacks=callbacks, logger=logger, _convert_="partial"
+        )
+    # Send some parameters from config to all lightning loggers
+    log.info("Logging hyperparameters!")
+    utils.log_hyperparameters(
+        dict(
+            cfg=cfg,
+            model=model,
+            datamodule=datamodule,
+            trainer=trainer,
+            callbacks=callbacks,
+            logger=logger,
+        )
+    )
+    # Train the model
+    log.info("Starting training!")
+    trainer.fit(model=model, datamodule=datamodule)
+    # Evaluate model on test set after training
+    # if not cfg.trainer.get("fast_dev_run"):
+    #     log.info("Starting testing!")
+    #     trainer.test()
+    # Make sure everything closed properly
+    log.info("Finalizing!")
+    # utils.finish(
+    #     config=cfg,
+    #     model=model,
+    #     datamodule=datamodule,
+    #     trainer=trainer,
+    #     callbacks=callbacks,
+    #     logger=logger,
+    # )
+    # Print path to best checkpoint
+    # log.info(f"Best checkpoint path:\n{trainer.checkpoint_callback.best_model_path}")
+    # Return metric score for hyperparameter optimization
+    # optimized_metric = cfg.get("optimized_metric")
+    # if optimized_metric:
+    #     return trainer.callback_metrics[optimized_metric]
+    return None, None

src/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from src.utils.pylogger import get_pylogger
+from src.utils.rich_utils import enforce_tags, print_config_tree
+from src.utils.utils import *

src/utils/data_augmentation.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import os
+import subprocess as sp
+import tempfile
+import warnings
+from argparse import ArgumentParser
+from concurrent import futures
+import numpy as np
+import soundfile as sf
+import torch
+from tqdm import tqdm
+warnings.simplefilter(action='ignore', category=Warning)
+source_names = ['vocals', 'drums', 'bass', 'other']
+sample_rate = 44100
+def main (args):
+    data_root = args.data_dir
+    train = args.train
+    test = args.test
+    valid = args.valid
+    musdb_train_path = data_root + 'train/'
+    musdb_test_path = data_root + 'test/'
+    musdb_valid_path = data_root + 'valid/'
+    print(f"train={train}, test={test}, valid={valid}")
+    mix_name = 'mixture'
+    P = [-3, -2, -1, 0, 1, 2, 3]   # pitch shift amounts (in semitones)
+    T = [-30, -20, -10, 0, 10, 20, 30]   # time stretch amounts (10 means 10% slower)
+    pool = futures.ProcessPoolExecutor
+    pool_workers = 13
+    pendings = []
+    with pool(pool_workers) as pool:
+        for p in P:
+            for t in T:
+                if not (p==0 and t==0):
+                    if train:
+                        pendings.append(pool.submit(save_shifted_dataset, p, t, musdb_train_path))
+                        # save_shifted_dataset(p, t, musdb_train_path)
+                    if valid:
+                        save_shifted_dataset(p, t, musdb_valid_path)
+                    if test:
+                        save_shifted_dataset(p, t, musdb_test_path)
+    for pending in pendings:
+        pending.result()
+def shift(wav, pitch, tempo, voice=False, quick=False, samplerate=44100):
+    def i16_pcm(wav):
+        if wav.dtype == np.int16:
+            return wav
+        return (wav * 2 ** 15).clamp_(-2 ** 15, 2 ** 15 - 1).short()
+    def f32_pcm(wav):
+        if wav.dtype == np.float:
+            return wav
+        return wav.float() / 2 ** 15
+    """
+    tempo is a relative delta in percentage, so tempo=10 means tempo at 110%!
+    pitch is in semi tones.
+    Requires `soundstretch` to be installed, see
+    https://www.surina.net/soundtouch/soundstretch.html
+    """
+    inputfile = tempfile.NamedTemporaryFile(dir="/root/autodl-tmp/tmp", suffix=".wav")
+    outfile = tempfile.NamedTemporaryFile(dir="/root/autodl-tmp/tmp", suffix=".wav")
+    sf.write(inputfile.name, data=i16_pcm(wav).t().numpy(), samplerate=samplerate, format='WAV')
+    command = [
+        "soundstretch",
+        inputfile.name,
+        outfile.name,
+        f"-pitch={pitch}",
+        f"-tempo={tempo:.6f}",
+    ]
+    if quick:
+        command += ["-quick"]
+    if voice:
+        command += ["-speech"]
+    try:
+        sp.run(command, capture_output=True, check=True)
+    except sp.CalledProcessError as error:
+        raise RuntimeError(f"Could not change bpm because {error.stderr.decode('utf-8')}")
+    wav, sr = sf.read(outfile.name, dtype='float32')
+    # wav = np.float32(wav)
+    # wav = f32_pcm(torch.from_numpy(wav).t())
+    assert sr == samplerate
+    return wav
+def save_shifted_dataset(delta_pitch, delta_tempo, data_path):
+    out_path = data_path[:-1] + f'_p={delta_pitch}_t={delta_tempo}/'
+    try:
+        os.mkdir(out_path)
+    except FileExistsError:
+        pass
+    track_names = list(filter(lambda x: os.path.isdir(f'{data_path}/{x}'), sorted(os.listdir(data_path))))
+    for track_name in tqdm(track_names):
+        try:
+            os.mkdir(f'{out_path}/{track_name}')
+        except FileExistsError:
+            pass
+        for s_name in source_names:
+            source = load_wav(f'{data_path}/{track_name}/{s_name}.wav')
+            shifted = shift(
+                torch.tensor(source),
+                delta_pitch,
+                delta_tempo,
+                voice=s_name == 'vocals')
+            sf.write(f'{out_path}/{track_name}/{s_name}.wav', shifted, samplerate=sample_rate, format='WAV')
+def load_wav(path, sr=None):
+    return sf.read(path, samplerate=sr, dtype='float32')[0].T
+if __name__ == '__main__':
+    parser = ArgumentParser()
+    parser.add_argument('--data_dir', type=str)
+    parser.add_argument('--train', type=bool, default=True)
+    parser.add_argument('--valid', type=bool, default=False)
+    parser.add_argument('--test', type=bool, default=False)
+    main(parser.parse_args())