swc2 commited on Jun 28, 2025

Commit

36e02be

1 Parent(s): bcc11b4

update model

Browse files

Files changed (27) hide show

Sepformer/results/sepformer_4mix/1234/env.log +0 -90
Sepformer/results/sepformer_4mix/1234/hyperparams.yaml +0 -198
Sepformer/results/sepformer_4mix/1234/log.txt +0 -762
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/CKPT.yaml +0 -4
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/brain.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/counter.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/dataloader-TRAIN.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/decoder.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/encoder.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/masknet.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/optimizer.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/scaler.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/CKPT.yaml +0 -4
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/brain.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/counter.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/dataloader-TRAIN.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/decoder.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/encoder.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/masknet.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/optimizer.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/scaler.ckpt +0 -3
Sepformer/results/sepformer_4mix/1234/save/record_tr.csv +0 -0
Sepformer/results/sepformer_4mix/1234/save/record_val.csv +0 -0
Sepformer/results/sepformer_4mix/1234/save/test_data.csv +0 -0
Sepformer/results/sepformer_4mix/1234/test_results.csv +0 -6
Sepformer/results/sepformer_4mix/1234/train.py +0 -666
Sepformer/results/sepformer_4mix/1234/train_log.txt +0 -1

Sepformer/results/sepformer_4mix/1234/env.log DELETED Viewed

@@ -1,90 +0,0 @@
-SpeechBrain system description
-==============================
-Python version:
-3.11.13 (main, Jun  5 2025, 13:12:00) [GCC 11.2.0]
-==============================
-Installed Python packages:
-black==24.3.0
-certifi==2025.6.15
-cfgv==3.4.0
-charset-normalizer==3.4.2
-click==8.1.7
-distlib==0.3.9
-docstring_parser_fork==0.0.12
-filelock==3.18.0
-flake8==7.0.0
-fsspec==2025.5.1
-future==1.0.0
-hf-xet==1.1.5
-huggingface-hub==0.33.1
-HyperPyYAML==1.2.2
-identify==2.6.12
-idna==3.10
-iniconfig==2.1.0
-isort==5.13.2
-Jinja2==3.1.6
-joblib==1.5.1
-MarkupSafe==3.0.2
-mccabe==0.7.0
-mir_eval==0.6
-mpmath==1.3.0
-mypy_extensions==1.1.0
-networkx==3.5
-nodeenv==1.9.1
-numpy==2.3.1
-nvidia-cublas-cu12==12.6.4.1
-nvidia-cuda-cupti-cu12==12.6.80
-nvidia-cuda-nvrtc-cu12==12.6.77
-nvidia-cuda-runtime-cu12==12.6.77
-nvidia-cudnn-cu12==9.5.1.17
-nvidia-cufft-cu12==11.3.0.4
-nvidia-cufile-cu12==1.11.1.6
-nvidia-curand-cu12==10.3.7.77
-nvidia-cusolver-cu12==11.7.1.2
-nvidia-cusparse-cu12==12.5.4.2
-nvidia-cusparselt-cu12==0.6.3
-nvidia-nccl-cu12==2.26.2
-nvidia-nvjitlink-cu12==12.6.85
-nvidia-nvtx-cu12==12.6.77
-packaging==25.0
-pandas==2.3.0
-pathspec==0.12.1
-platformdirs==4.3.8
-pluggy==1.6.0
-pre_commit==4.2.0
-pycodestyle==2.11.0
-pydoclint==0.4.1
-pyflakes==3.2.0
-pygtrie==2.5.0
-pyloudnorm==0.1.1
-pytest==7.4.0
-python-dateutil==2.9.0.post0
-pytz==2025.2
-PyYAML==6.0.2
-regex==2024.11.6
-requests==2.32.4
-ruamel.yaml==0.18.14
-ruamel.yaml.clib==0.2.12
-safetensors==0.5.3
-scipy==1.16.0
-sentencepiece==0.2.0
-six==1.17.0
-speechbrain==1.0.3
-sympy==1.14.0
-tokenizers==0.21.2
-torch==2.7.1
-torchaudio==2.7.1
-tqdm==4.67.1
-transformers==4.53.0
-triton==3.3.1
-typing_extensions==4.14.0
-tzdata==2025.2
-urllib3==2.5.0
-virtualenv==20.31.2
-yamllint==1.35.1
-==============================
-Git revision:
-476ac4f
-==============================
-CUDA version:
-12.6

Sepformer/results/sepformer_4mix/1234/hyperparams.yaml DELETED Viewed

@@ -1,198 +0,0 @@
-# Generated 2025-06-27 from:
-# /home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/hparams/sepformer_4mix.yaml
-# yamllint disable
-# ################################
-# Model: SepFormer for source separation
-# https://arxiv.org/abs/2010.13154
-# ################################
-#
-# Basic parameters
-# Seed needs to be set at top of yaml, before objects with parameters are made
-#
-seed: 1234
-__set_seed: !apply:speechbrain.utils.seed_everything [1234]
-# Data params
-# e.g. '/yourpath/Libri3Mix/train-clean-360/'
-# the data folder is needed even if dynamic mixing is applied
-data_folder: /data/
-# This is needed only if dynamic mixing is applied
-base_folder_dm: /yourpath/
-experiment_name: sepformer_4mix
-output_folder: results/sepformer_4mix/1234
-train_log: results/sepformer_4mix/1234/train_log.txt
-save_folder: results/sepformer_4mix/1234/save
-train_data: results/sepformer_4mix/1234/save/record_tr.csv
-valid_data: results/sepformer_4mix/1234/save/record_val.csv
-test_data: results/sepformer_4mix/1234/save/test_data.csv
-skip_prep: false
-ckpt_interval_minutes: 60
-# Experiment params
-precision: fp16 # bf16, fp16 or fp32 # Set it to True for mixed precision
-num_spks: 4
-noprogressbar: false
-save_audio: false # Save estimated sources on disk
-sample_rate: 16000
-####################### Training Parameters ####################################
-N_epochs: 200
-batch_size: 1
-lr: 0.00015
-clip_grad_norm: 5
-loss_upper_lim: 999999  # this is the upper limit for an acceptable loss
-# if True, the training sequences are cut to a specified length
-limit_training_signal_len: true
-# this is the length of sequences if we choose to limit
-# the signal length of training sequences
-training_signal_len: 64000000
-# Set it to True to dynamically create mixtures at training time
-dynamic_mixing: false
-use_wham_noise: false
-# Parameters for data augmentation
-use_wavedrop: false
-use_speedperturb: true
-use_rand_shift: false
-min_shift: -8000
-max_shift: 8000
-# Speed perturbation
-speed_changes: &id001 [95, 100, 105]
-# Frequency drop: randomly drops a number of frequency bands to zero.
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-  orig_freq: 16000
-  speeds: *id001
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-  drop_freq_low: 0
-  drop_freq_high: 1
-  drop_freq_count_low: 1
-  drop_freq_count_high: 3
-  drop_freq_width: 0.05
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-  drop_length_low: 1000
-  drop_length_high: 2000
-  drop_count_low: 1
-  drop_count_high: 5
-# loss thresholding -- this thresholds the training loss
-threshold_byloss: true
-threshold: -30
-# Encoder parameters
-N_encoder_out: 256
-out_channels: 256
-kernel_size: 32
-kernel_stride: 16
-d_ffn: 1024
-# Dataloader options
-dataloader_opts:
-  batch_size: 1
-  num_workers: 3
-# Specifying the network
-Encoder: &id004 !new:speechbrain.lobes.models.dual_path.Encoder
-  kernel_size: 32
-  out_channels: 256
-SBtfintra: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
-  num_layers: 8
-  d_model: 256
-  nhead: 8
-  d_ffn: 1024
-  dropout: 0
-  use_positional_encoding: true
-  norm_before: true
-SBtfinter: &id003 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
-  num_layers: 8
-  d_model: 256
-  nhead: 8
-  d_ffn: 1024
-  dropout: 0
-  use_positional_encoding: true
-  norm_before: true
-MaskNet: &id006 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
-  num_spks: 4
-  in_channels: 256
-  out_channels: 256
-  num_layers: 2
-  K: 250
-  intra_model: *id002
-  inter_model: *id003
-  norm: ln
-  linear_layer_after_inter_intra: false
-  skip_around_intra: true
-Decoder: &id005 !new:speechbrain.lobes.models.dual_path.Decoder
-  in_channels: 256
-  out_channels: 1
-  kernel_size: 32
-  stride: 16
-  bias: false
-optimizer: !name:torch.optim.Adam
-  lr: 0.00015
-  weight_decay: 0
-loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
-lr_scheduler: !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
-  factor: 0.5
-  patience: 2
-  dont_halve_until_epoch: 5
-epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter
-        # lr_scheduler: !ref <lr_scheduler>
-  limit: 200
-modules:
-  encoder: *id004
-  decoder: *id005
-  masknet: *id006
-checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
-  checkpoints_dir: results/sepformer_4mix/1234/save
-  recoverables:
-    encoder: *id004
-    decoder: *id005
-    masknet: *id006
-    counter: *id007
-train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
-  save_file: results/sepformer_4mix/1234/train_log.txt
-# # If you do not want to use the pretrained separator you can simply delete pretrained_separator field.
-# pretrained_separator: !new:speechbrain.utils.parameter_transfer.Pretrainer
-#     collect_in: !ref <save_folder>
-#     loadables:
-#         encoder: !ref <Encoder>
-#         decoder: !ref <Decoder>
-#         masknet: !ref <MaskNet>
-#     paths:
-#         encoder: speechbrain/sepformer-wsj03mix/encoder.ckpt
-#         decoder: speechbrain/sepformer-wsj03mix/decoder.ckpt
-#         masknet: speechbrain/sepformer-wsj03mix/masknet.ckpt

Sepformer/results/sepformer_4mix/1234/log.txt DELETED Viewed

@@ -1,762 +0,0 @@
-2025-06-27 17:13:10,582 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
-2025-06-27 17:13:10,583 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
-2025-06-27 17:13:10,583 - speechbrain.core - INFO - Beginning experiment!
-2025-06-27 17:13:10,583 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
-2025-06-27 17:13:10,831 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
-certifi==2025.6.15
-cfgv==3.4.0
-charset-normalizer==3.4.2
-click==8.1.7
-distlib==0.3.9
-docstring_parser_fork==0.0.12
-filelock==3.18.0
-flake8==7.0.0
-fsspec==2025.5.1
-future==1.0.0
-hf-xet==1.1.5
-huggingface-hub==0.33.0
-HyperPyYAML==1.2.2
-identify==2.6.12
-idna==3.10
-iniconfig==2.1.0
-isort==5.13.2
-Jinja2==3.1.6
-joblib==1.5.1
-MarkupSafe==3.0.2
-mccabe==0.7.0
-mir_eval==0.6
-mpmath==1.3.0
-mypy_extensions==1.1.0
-networkx==3.5
-nodeenv==1.9.1
-numpy==2.3.1
-nvidia-cublas-cu12==12.6.4.1
-nvidia-cuda-cupti-cu12==12.6.80
-nvidia-cuda-nvrtc-cu12==12.6.77
-nvidia-cuda-runtime-cu12==12.6.77
-nvidia-cudnn-cu12==9.5.1.17
-nvidia-cufft-cu12==11.3.0.4
-nvidia-cufile-cu12==1.11.1.6
-nvidia-curand-cu12==10.3.7.77
-nvidia-cusolver-cu12==11.7.1.2
-nvidia-cusparse-cu12==12.5.4.2
-nvidia-cusparselt-cu12==0.6.3
-nvidia-nccl-cu12==2.26.2
-nvidia-nvjitlink-cu12==12.6.85
-nvidia-nvtx-cu12==12.6.77
-packaging==25.0
-pandas==2.3.0
-pathspec==0.12.1
-platformdirs==4.3.8
-pluggy==1.6.0
-pre_commit==4.2.0
-pycodestyle==2.11.0
-pydoclint==0.4.1
-pyflakes==3.2.0
-Pygments==2.19.2
-pygtrie==2.5.0
-pyloudnorm==0.1.1
-pytest==7.4.0
-python-dateutil==2.9.0.post0
-pytz==2025.2
-PyYAML==6.0.2
-regex==2024.11.6
-requests==2.32.4
-ruamel.yaml==0.18.14
-ruamel.yaml.clib==0.2.12
-safetensors==0.5.3
-scipy==1.16.0
-sentencepiece==0.2.0
-six==1.17.0
-speechbrain==1.0.3
-sympy==1.14.0
-tokenizers==0.21.2
-torch==2.7.1
-torchaudio==2.7.1
-tqdm==4.67.1
-transformers==4.52.4
-triton==3.3.1
-typing_extensions==4.14.0
-tzdata==2025.2
-urllib3==2.5.0
-virtualenv==20.31.2
-yamllint==1.35.1
-2025-06-27 17:13:10,836 - speechbrain.core - ERROR - Exception:
-Traceback (most recent call last):
-  File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 634, in <module>
-    train_data, valid_data, test_data = dataio_prep(hparams)
-                                        ^^^^^^^^^^^^^^^^^^^^
-  File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 446, in dataio_prep
-    train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
-                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/dataio/dataset.py", line 417, in from_csv
-    data = load_data_csv(csv_path, replacements)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/dataio/dataio.py", line 138, in load_data_csv
-    with open(csv_path, newline="", encoding="utf-8") as csvfile:
-         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-FileNotFoundError: [Errno 2] No such file or directory: 'results/sepformer_4mix/1234/save/record_tr.csv'
-2025-06-27 17:17:17,084 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [allow_tf32, disable_jit_profiling]
-2025-06-27 17:17:17,085 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
-2025-06-27 17:17:17,085 - speechbrain.core - INFO - Beginning experiment!
-2025-06-27 17:17:17,085 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
-2025-06-27 17:17:17,318 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
-certifi==2025.6.15
-cfgv==3.4.0
-charset-normalizer==3.4.2
-click==8.1.7
-distlib==0.3.9
-docstring_parser_fork==0.0.12
-filelock==3.18.0
-flake8==7.0.0
-fsspec==2025.5.1
-future==1.0.0
-hf-xet==1.1.5
-huggingface-hub==0.33.0
-HyperPyYAML==1.2.2
-identify==2.6.12
-idna==3.10
-iniconfig==2.1.0
-isort==5.13.2
-Jinja2==3.1.6
-joblib==1.5.1
-MarkupSafe==3.0.2
-mccabe==0.7.0
-mir_eval==0.6
-mpmath==1.3.0
-mypy_extensions==1.1.0
-networkx==3.5
-nodeenv==1.9.1
-numpy==2.3.1
-nvidia-cublas-cu12==12.6.4.1
-nvidia-cuda-cupti-cu12==12.6.80
-nvidia-cuda-nvrtc-cu12==12.6.77
-nvidia-cuda-runtime-cu12==12.6.77
-nvidia-cudnn-cu12==9.5.1.17
-nvidia-cufft-cu12==11.3.0.4
-nvidia-cufile-cu12==1.11.1.6
-nvidia-curand-cu12==10.3.7.77
-nvidia-cusolver-cu12==11.7.1.2
-nvidia-cusparse-cu12==12.5.4.2
-nvidia-cusparselt-cu12==0.6.3
-nvidia-nccl-cu12==2.26.2
-nvidia-nvjitlink-cu12==12.6.85
-nvidia-nvtx-cu12==12.6.77
-packaging==25.0
-pandas==2.3.0
-pathspec==0.12.1
-platformdirs==4.3.8
-pluggy==1.6.0
-pre_commit==4.2.0
-pycodestyle==2.11.0
-pydoclint==0.4.1
-pyflakes==3.2.0
-Pygments==2.19.2
-pygtrie==2.5.0
-pyloudnorm==0.1.1
-pytest==7.4.0
-python-dateutil==2.9.0.post0
-pytz==2025.2
-PyYAML==6.0.2
-regex==2024.11.6
-requests==2.32.4
-ruamel.yaml==0.18.14
-ruamel.yaml.clib==0.2.12
-safetensors==0.5.3
-scipy==1.16.0
-sentencepiece==0.2.0
-six==1.17.0
-speechbrain==1.0.3
-sympy==1.14.0
-tokenizers==0.21.2
-torch==2.7.1
-torchaudio==2.7.1
-tqdm==4.67.1
-transformers==4.52.4
-triton==3.3.1
-typing_extensions==4.14.0
-tzdata==2025.2
-urllib3==2.5.0
-virtualenv==20.31.2
-yamllint==1.35.1
-2025-06-27 17:17:17,325 - speechbrain.core - ERROR - Exception:
-Traceback (most recent call last):
-  File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 634, in <module>
-    train_data, valid_data, test_data = dataio_prep(hparams)
-                                        ^^^^^^^^^^^^^^^^^^^^
-  File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 446, in dataio_prep
-    train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
-                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/dataio/dataset.py", line 417, in from_csv
-    data = load_data_csv(csv_path, replacements)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/dataio/dataio.py", line 138, in load_data_csv
-    with open(csv_path, newline="", encoding="utf-8") as csvfile:
-         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-FileNotFoundError: [Errno 2] No such file or directory: 'results/sepformer_4mix/1234/save/record_tr.csv'
-2025-06-27 17:18:04,558 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
-2025-06-27 17:18:04,559 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
-2025-06-27 17:18:04,559 - speechbrain.core - INFO - Beginning experiment!
-2025-06-27 17:18:04,559 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
-2025-06-27 17:18:04,806 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
-certifi==2025.6.15
-cfgv==3.4.0
-charset-normalizer==3.4.2
-click==8.1.7
-distlib==0.3.9
-docstring_parser_fork==0.0.12
-filelock==3.18.0
-flake8==7.0.0
-fsspec==2025.5.1
-future==1.0.0
-hf-xet==1.1.5
-huggingface-hub==0.33.0
-HyperPyYAML==1.2.2
-identify==2.6.12
-idna==3.10
-iniconfig==2.1.0
-isort==5.13.2
-Jinja2==3.1.6
-joblib==1.5.1
-MarkupSafe==3.0.2
-mccabe==0.7.0
-mir_eval==0.6
-mpmath==1.3.0
-mypy_extensions==1.1.0
-networkx==3.5
-nodeenv==1.9.1
-numpy==2.3.1
-nvidia-cublas-cu12==12.6.4.1
-nvidia-cuda-cupti-cu12==12.6.80
-nvidia-cuda-nvrtc-cu12==12.6.77
-nvidia-cuda-runtime-cu12==12.6.77
-nvidia-cudnn-cu12==9.5.1.17
-nvidia-cufft-cu12==11.3.0.4
-nvidia-cufile-cu12==1.11.1.6
-nvidia-curand-cu12==10.3.7.77
-nvidia-cusolver-cu12==11.7.1.2
-nvidia-cusparse-cu12==12.5.4.2
-nvidia-cusparselt-cu12==0.6.3
-nvidia-nccl-cu12==2.26.2
-nvidia-nvjitlink-cu12==12.6.85
-nvidia-nvtx-cu12==12.6.77
-packaging==25.0
-pandas==2.3.0
-pathspec==0.12.1
-platformdirs==4.3.8
-pluggy==1.6.0
-pre_commit==4.2.0
-pycodestyle==2.11.0
-pydoclint==0.4.1
-pyflakes==3.2.0
-Pygments==2.19.2
-pygtrie==2.5.0
-pyloudnorm==0.1.1
-pytest==7.4.0
-python-dateutil==2.9.0.post0
-pytz==2025.2
-PyYAML==6.0.2
-regex==2024.11.6
-requests==2.32.4
-ruamel.yaml==0.18.14
-ruamel.yaml.clib==0.2.12
-safetensors==0.5.3
-scipy==1.16.0
-sentencepiece==0.2.0
-six==1.17.0
-speechbrain==1.0.3
-sympy==1.14.0
-tokenizers==0.21.2
-torch==2.7.1
-torchaudio==2.7.1
-tqdm==4.67.1
-transformers==4.52.4
-triton==3.3.1
-typing_extensions==4.14.0
-tzdata==2025.2
-urllib3==2.5.0
-virtualenv==20.31.2
-yamllint==1.35.1
-2025-06-27 17:18:05,007 - speechbrain.core - INFO - Info: precision arg from hparam file is used
-2025-06-27 17:18:05,008 - speechbrain.core - INFO - Info: noprogressbar arg from hparam file is used
-2025-06-27 17:18:05,008 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
-2025-06-27 17:18:05,221 - speechbrain.core - INFO - Gradscaler enabled: `True`
-2025-06-27 17:18:05,221 - speechbrain.core - INFO - Using training precision: `--precision=fp16`
-2025-06-27 17:18:05,221 - speechbrain.core - INFO - Using evaluation precision: `--eval_precision=fp32`
-2025-06-27 17:18:05,222 - speechbrain.core - INFO - Separation Model Statistics:
-* Total Number of Trainable Parameters: 25.8M
-* Total Number of Parameters: 25.8M
-* Trainable Parameters represent 100.0000% of the total size.
-2025-06-27 17:18:06,855 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00
-2025-06-27 17:18:07,209 - speechbrain.utils.epoch_loop - INFO - Going into epoch 163
-2025-06-27 17:18:10,757 - speechbrain.core - ERROR - Exception:
-Traceback (most recent call last):
-  File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 656, in <module>
-    separator.fit(
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/core.py", line 1575, in fit
-    self._fit_train(train_set=train_set, epoch=epoch, enable=enable)
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/core.py", line 1400, in _fit_train
-    loss = self.fit_batch(batch)
-           ^^^^^^^^^^^^^^^^^^^^^
-  File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 148, in fit_batch
-    self.scaler.scale(loss).backward()
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/_tensor.py", line 648, in backward
-    torch.autograd.backward(
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/autograd/__init__.py", line 353, in backward
-    _engine_run_backward(
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/autograd/graph.py", line 824, in _engine_run_backward
-    return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-KeyboardInterrupt
-2025-06-27 17:24:05,950 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
-2025-06-27 17:24:05,951 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
-2025-06-27 17:24:05,951 - speechbrain.core - INFO - Beginning experiment!
-2025-06-27 17:24:05,951 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
-2025-06-27 17:24:06,192 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
-certifi==2025.6.15
-cfgv==3.4.0
-charset-normalizer==3.4.2
-click==8.1.7
-distlib==0.3.9
-docstring_parser_fork==0.0.12
-filelock==3.18.0
-flake8==7.0.0
-fsspec==2025.5.1
-future==1.0.0
-hf-xet==1.1.5
-huggingface-hub==0.33.0
-HyperPyYAML==1.2.2
-identify==2.6.12
-idna==3.10
-iniconfig==2.1.0
-isort==5.13.2
-Jinja2==3.1.6
-joblib==1.5.1
-MarkupSafe==3.0.2
-mccabe==0.7.0
-mir_eval==0.6
-mpmath==1.3.0
-mypy_extensions==1.1.0
-networkx==3.5
-nodeenv==1.9.1
-numpy==2.3.1
-nvidia-cublas-cu12==12.6.4.1
-nvidia-cuda-cupti-cu12==12.6.80
-nvidia-cuda-nvrtc-cu12==12.6.77
-nvidia-cuda-runtime-cu12==12.6.77
-nvidia-cudnn-cu12==9.5.1.17
-nvidia-cufft-cu12==11.3.0.4
-nvidia-cufile-cu12==1.11.1.6
-nvidia-curand-cu12==10.3.7.77
-nvidia-cusolver-cu12==11.7.1.2
-nvidia-cusparse-cu12==12.5.4.2
-nvidia-cusparselt-cu12==0.6.3
-nvidia-nccl-cu12==2.26.2
-nvidia-nvjitlink-cu12==12.6.85
-nvidia-nvtx-cu12==12.6.77
-packaging==25.0
-pandas==2.3.0
-pathspec==0.12.1
-platformdirs==4.3.8
-pluggy==1.6.0
-pre_commit==4.2.0
-pycodestyle==2.11.0
-pydoclint==0.4.1
-pyflakes==3.2.0
-Pygments==2.19.2
-pygtrie==2.5.0
-pyloudnorm==0.1.1
-pytest==7.4.0
-python-dateutil==2.9.0.post0
-pytz==2025.2
-PyYAML==6.0.2
-regex==2024.11.6
-requests==2.32.4
-ruamel.yaml==0.18.14
-ruamel.yaml.clib==0.2.12
-safetensors==0.5.3
-scipy==1.16.0
-sentencepiece==0.2.0
-six==1.17.0
-speechbrain==1.0.3
-sympy==1.14.0
-tokenizers==0.21.2
-torch==2.7.1
-torchaudio==2.7.1
-tqdm==4.67.1
-transformers==4.52.4
-triton==3.3.1
-typing_extensions==4.14.0
-tzdata==2025.2
-urllib3==2.5.0
-virtualenv==20.31.2
-yamllint==1.35.1
-2025-06-27 17:24:06,389 - speechbrain.core - INFO - Info: precision arg from hparam file is used
-2025-06-27 17:24:06,390 - speechbrain.core - INFO - Info: noprogressbar arg from hparam file is used
-2025-06-27 17:24:06,390 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
-2025-06-27 17:24:06,573 - speechbrain.core - INFO - Gradscaler enabled: `True`
-2025-06-27 17:24:06,573 - speechbrain.core - INFO - Using training precision: `--precision=fp16`
-2025-06-27 17:24:06,573 - speechbrain.core - INFO - Using evaluation precision: `--eval_precision=fp32`
-2025-06-27 17:24:06,574 - speechbrain.core - INFO - Separation Model Statistics:
-* Total Number of Trainable Parameters: 25.8M
-* Total Number of Parameters: 25.8M
-* Trainable Parameters represent 100.0000% of the total size.
-2025-06-27 17:24:08,245 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00
-2025-06-27 17:24:08,608 - speechbrain.utils.epoch_loop - INFO - Going into epoch 163
-2025-06-27 17:24:11,017 - speechbrain.core - ERROR - Exception:
-Traceback (most recent call last):
-  File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 656, in <module>
-    separator.fit(
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/core.py", line 1575, in fit
-    self._fit_train(train_set=train_set, epoch=epoch, enable=enable)
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/core.py", line 1400, in _fit_train
-    loss = self.fit_batch(batch)
-           ^^^^^^^^^^^^^^^^^^^^^
-  File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 133, in fit_batch
-    predictions, targets = self.compute_forward(
-                           ^^^^^^^^^^^^^^^^^^^^^
-  File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 87, in compute_forward
-    est_mask = self.hparams.MaskNet(mix_w)
-               ^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
-    return forward_call(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 1067, in forward
-    x = self.dual_mdl[i](x)
-        ^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
-    return forward_call(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 918, in forward
-    inter = self.inter_mdl(inter)
-            ^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
-    return forward_call(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 638, in forward
-    return self.mdl(x + pos_enc)[0]
-           ^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _wrapped_call_impl
-    def _wrapped_call_impl(self, *args, **kwargs):
-KeyboardInterrupt
-2025-06-27 21:10:07,131 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
-2025-06-27 21:10:07,133 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
-2025-06-27 21:10:07,133 - speechbrain.core - INFO - Beginning experiment!
-2025-06-27 21:10:07,133 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
-2025-06-27 21:10:07,389 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
-certifi==2025.6.15
-cfgv==3.4.0
-charset-normalizer==3.4.2
-click==8.1.7
-distlib==0.3.9
-docstring_parser_fork==0.0.12
-filelock==3.18.0
-flake8==7.0.0
-fsspec==2025.5.1
-future==1.0.0
-hf-xet==1.1.5
-huggingface-hub==0.33.1
-HyperPyYAML==1.2.2
-identify==2.6.12
-idna==3.10
-iniconfig==2.1.0
-isort==5.13.2
-Jinja2==3.1.6
-joblib==1.5.1
-MarkupSafe==3.0.2
-mccabe==0.7.0
-mir_eval==0.6
-mpmath==1.3.0
-mypy_extensions==1.1.0
-networkx==3.5
-nodeenv==1.9.1
-numpy==2.3.1
-nvidia-cublas-cu12==12.6.4.1
-nvidia-cuda-cupti-cu12==12.6.80
-nvidia-cuda-nvrtc-cu12==12.6.77
-nvidia-cuda-runtime-cu12==12.6.77
-nvidia-cudnn-cu12==9.5.1.17
-nvidia-cufft-cu12==11.3.0.4
-nvidia-cufile-cu12==1.11.1.6
-nvidia-curand-cu12==10.3.7.77
-nvidia-cusolver-cu12==11.7.1.2
-nvidia-cusparse-cu12==12.5.4.2
-nvidia-cusparselt-cu12==0.6.3
-nvidia-nccl-cu12==2.26.2
-nvidia-nvjitlink-cu12==12.6.85
-nvidia-nvtx-cu12==12.6.77
-packaging==25.0
-pandas==2.3.0
-pathspec==0.12.1
-platformdirs==4.3.8
-pluggy==1.6.0
-pre_commit==4.2.0
-pycodestyle==2.11.0
-pydoclint==0.4.1
-pyflakes==3.2.0
-pygtrie==2.5.0
-pyloudnorm==0.1.1
-pytest==7.4.0
-python-dateutil==2.9.0.post0
-pytz==2025.2
-PyYAML==6.0.2
-regex==2024.11.6
-requests==2.32.4
-ruamel.yaml==0.18.14
-ruamel.yaml.clib==0.2.12
-safetensors==0.5.3
-scipy==1.16.0
-sentencepiece==0.2.0
-six==1.17.0
-speechbrain==1.0.3
-sympy==1.14.0
-tokenizers==0.21.2
-torch==2.7.1
-torchaudio==2.7.1
-tqdm==4.67.1
-transformers==4.53.0
-triton==3.3.1
-typing_extensions==4.14.0
-tzdata==2025.2
-urllib3==2.5.0
-virtualenv==20.31.2
-yamllint==1.35.1
-2025-06-27 21:10:07,393 - speechbrain.utils.superpowers - DEBUG - 476ac4f
-2025-06-27 21:10:07,996 - speechbrain.core - INFO - Info: precision arg from hparam file is used
-2025-06-27 21:10:07,997 - speechbrain.core - INFO - Info: noprogressbar arg from hparam file is used
-2025-06-27 21:10:07,997 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
-2025-06-27 21:10:08,035 - speechbrain.core - INFO - Gradscaler enabled: `True`
-2025-06-27 21:10:08,035 - speechbrain.core - INFO - Using training precision: `--precision=fp16`
-2025-06-27 21:10:08,035 - speechbrain.core - INFO - Using evaluation precision: `--eval_precision=fp32`
-2025-06-27 21:10:08,036 - speechbrain.core - INFO - Separation Model Statistics:
-* Total Number of Trainable Parameters: 25.8M
-* Total Number of Parameters: 25.8M
-* Trainable Parameters represent 100.0000% of the total size.
-2025-06-27 21:10:09,782 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00
-2025-06-27 21:10:10,160 - speechbrain.utils.epoch_loop - INFO - Going into epoch 163
-2025-06-27 21:10:17,953 - speechbrain.core - ERROR - Exception:
-Traceback (most recent call last):
-  File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 656, in <module>
-    separator.fit(
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/core.py", line 1575, in fit
-    self._fit_train(train_set=train_set, epoch=epoch, enable=enable)
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/core.py", line 1400, in _fit_train
-    loss = self.fit_batch(batch)
-           ^^^^^^^^^^^^^^^^^^^^^
-  File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 133, in fit_batch
-    predictions, targets = self.compute_forward(
-                           ^^^^^^^^^^^^^^^^^^^^^
-  File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 87, in compute_forward
-    est_mask = self.hparams.MaskNet(mix_w)
-               ^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
-    return forward_call(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 1067, in forward
-    x = self.dual_mdl[i](x)
-        ^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
-    return forward_call(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 918, in forward
-    inter = self.inter_mdl(inter)
-            ^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
-    return forward_call(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 638, in forward
-    return self.mdl(x + pos_enc)[0]
-           ^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
-    return forward_call(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/transformer/Transformer.py", line 639, in forward
-    output, attention = enc_layer(
-                        ^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
-    return forward_call(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/transformer/Transformer.py", line 457, in forward
-    output, self_attn = self.self_att(
-                        ^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
-    return forward_call(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/nnet/attention.py", line 865, in forward
-    output, attention_weights = self.att(
-                                ^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
-    return forward_call(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/activation.py", line 1373, in forward
-    attn_output, attn_output_weights = F.multi_head_attention_forward(
-                                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/functional.py", line 6230, in multi_head_attention_forward
-    q, k, v = _in_projection_packed(query, key, value, in_proj_weight, in_proj_bias)
-              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/functional.py", line 5648, in _in_projection_packed
-    return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
-           ^^^^^^^^^^^^^^^^^^^
-KeyboardInterrupt
-2025-06-27 21:10:50,985 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
-2025-06-27 21:10:50,986 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
-2025-06-27 21:10:50,986 - speechbrain.core - INFO - Beginning experiment!
-2025-06-27 21:10:50,986 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
-2025-06-27 21:10:51,243 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
-certifi==2025.6.15
-cfgv==3.4.0
-charset-normalizer==3.4.2
-click==8.1.7
-distlib==0.3.9
-docstring_parser_fork==0.0.12
-filelock==3.18.0
-flake8==7.0.0
-fsspec==2025.5.1
-future==1.0.0
-hf-xet==1.1.5
-huggingface-hub==0.33.1
-HyperPyYAML==1.2.2
-identify==2.6.12
-idna==3.10
-iniconfig==2.1.0
-isort==5.13.2
-Jinja2==3.1.6
-joblib==1.5.1
-MarkupSafe==3.0.2
-mccabe==0.7.0
-mir_eval==0.6
-mpmath==1.3.0
-mypy_extensions==1.1.0
-networkx==3.5
-nodeenv==1.9.1
-numpy==2.3.1
-nvidia-cublas-cu12==12.6.4.1
-nvidia-cuda-cupti-cu12==12.6.80
-nvidia-cuda-nvrtc-cu12==12.6.77
-nvidia-cuda-runtime-cu12==12.6.77
-nvidia-cudnn-cu12==9.5.1.17
-nvidia-cufft-cu12==11.3.0.4
-nvidia-cufile-cu12==1.11.1.6
-nvidia-curand-cu12==10.3.7.77
-nvidia-cusolver-cu12==11.7.1.2
-nvidia-cusparse-cu12==12.5.4.2
-nvidia-cusparselt-cu12==0.6.3
-nvidia-nccl-cu12==2.26.2
-nvidia-nvjitlink-cu12==12.6.85
-nvidia-nvtx-cu12==12.6.77
-packaging==25.0
-pandas==2.3.0
-pathspec==0.12.1
-platformdirs==4.3.8
-pluggy==1.6.0
-pre_commit==4.2.0
-pycodestyle==2.11.0
-pydoclint==0.4.1
-pyflakes==3.2.0
-pygtrie==2.5.0
-pyloudnorm==0.1.1
-pytest==7.4.0
-python-dateutil==2.9.0.post0
-pytz==2025.2
-PyYAML==6.0.2
-regex==2024.11.6
-requests==2.32.4
-ruamel.yaml==0.18.14
-ruamel.yaml.clib==0.2.12
-safetensors==0.5.3
-scipy==1.16.0
-sentencepiece==0.2.0
-six==1.17.0
-speechbrain==1.0.3
-sympy==1.14.0
-tokenizers==0.21.2
-torch==2.7.1
-torchaudio==2.7.1
-tqdm==4.67.1
-transformers==4.53.0
-triton==3.3.1
-typing_extensions==4.14.0
-tzdata==2025.2
-urllib3==2.5.0
-virtualenv==20.31.2
-yamllint==1.35.1
-2025-06-27 21:10:51,249 - speechbrain.utils.superpowers - DEBUG - 476ac4f
-2025-06-27 21:10:51,876 - speechbrain.core - INFO - Info: precision arg from hparam file is used
-2025-06-27 21:10:51,876 - speechbrain.core - INFO - Info: noprogressbar arg from hparam file is used
-2025-06-27 21:10:51,876 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
-2025-06-27 21:10:51,915 - speechbrain.core - INFO - Gradscaler enabled: `True`
-2025-06-27 21:10:51,915 - speechbrain.core - INFO - Using training precision: `--precision=fp16`
-2025-06-27 21:10:51,915 - speechbrain.core - INFO - Using evaluation precision: `--eval_precision=fp32`
-2025-06-27 21:10:51,917 - speechbrain.core - INFO - Separation Model Statistics:
-* Total Number of Trainable Parameters: 25.8M
-* Total Number of Parameters: 25.8M
-* Trainable Parameters represent 100.0000% of the total size.
-2025-06-27 21:10:52,857 - speechbrain.core - INFO - Test only mode, skipping training and validation stages.
-2025-06-27 21:10:52,859 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00
-2025-06-27 21:11:31,648 - speechbrain.utils.train_logger - INFO - Epoch loaded: 48 - test si-snr: 20.60
-2025-06-27 21:12:49,750 - speechbrain.core - ERROR - Exception:
-Traceback (most recent call last):
-  File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 666, in <module>
-    separator.save_results(test_data)
-  File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 367, in save_results
-    sdr_baseline, _, _, _ = bss_eval_sources(
-                            ^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/mir_eval/separation.py", line 210, in bss_eval_sources
-    _bss_decomp_mtifilt(reference_sources,
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/mir_eval/separation.py", line 623, in _bss_decomp_mtifilt
-    e_interf = _project(reference_sources,
-               ^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/mir_eval/separation.py", line 715, in _project
-    C = np.linalg.solve(G, D).reshape(flen, nsrc, order='F')
-        ^^^^^^^^^^^^^^^^^^^^^
-  File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/numpy/linalg/_linalg.py", line 471, in solve
-    r = gufunc(a, b, signature=signature)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-KeyboardInterrupt

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/CKPT.yaml DELETED Viewed

@@ -1,4 +0,0 @@
-# yamllint disable
-end-of-epoch: true
-si-snr: 22.403992604029355
-unixtime: 1750946214.2858236

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/brain.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:33809a026a2c1febce7b03c8aafaee4ddfc851b2c70f180f8c06bf1017f4df5c
-size 46

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/counter.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:98010bd9270f9b100b6214a21754fd33bdc8d41b2bc9f9dd16ff54d3c34ffd71
-size 2

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/dataloader-TRAIN.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d253d7b7ace4e06589dd90003f047380ddfdcfb29007b4e815caf48ff09b498b
-size 4

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/decoder.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6ab49185bb3560f75ce4c18769157375a051f6b3a36e0c35d027574ca9c29e42
-size 34409

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/encoder.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:95ee4117e13cc2fb383208925edb71d86947024a9dd2be3da1ea25aca5ae8adf
-size 34473

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/masknet.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e2c23ccb34b361feb8eeb630d4947815533cfb7dcfd54402e97edc82e032479b
-size 113629889

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/optimizer.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:fd09ff01fca43d3985535808946f8dcd75488e1da097ed30b148cb5c3b9114d5
-size 206898874

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/scaler.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:060762594d7f2f0162569b71f7b3ab95a021d06848d3088c63366abf8b98f80c
-size 1383

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/CKPT.yaml DELETED Viewed

@@ -1,4 +0,0 @@
-# yamllint disable
-end-of-epoch: true
-si-snr: 22.415829142613383
-unixtime: 1750994609.9935129

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/brain.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:33809a026a2c1febce7b03c8aafaee4ddfc851b2c70f180f8c06bf1017f4df5c
-size 46

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/counter.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:79d6eaa2676189eb927f2e16a70091474078e2117c3fc607d35cdc6b591ef355
-size 3

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/dataloader-TRAIN.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d253d7b7ace4e06589dd90003f047380ddfdcfb29007b4e815caf48ff09b498b
-size 4

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/decoder.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:96810d4644ba93e03d448330d0be5de5a3befc453f07b0c61f13aeca7464b2c5
-size 34409

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/encoder.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:76f73bc7bdde7f931679475847d79af49d687d5eb52011f17d6a37024a222558
-size 34473

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/masknet.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:30262fd31537e9349c1c1071bbd86c9a89e359ea11d5d50c48a05da03bc26e0e
-size 113629889

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/optimizer.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ee4c807b50d7f9af8606acf172b2713c218ca53faf4aaa3e614e0c0a6fbac5bd
-size 206898874

Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/scaler.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:91cd3440b19e568449ff6d8fbb6df704d57d37fc7ad77f05146ac1de7310cded
-size 1383

Sepformer/results/sepformer_4mix/1234/save/record_tr.csv DELETED Viewed

The diff for this file is too large to render. See raw diff

Sepformer/results/sepformer_4mix/1234/save/record_val.csv DELETED Viewed

The diff for this file is too large to render. See raw diff

Sepformer/results/sepformer_4mix/1234/save/test_data.csv DELETED Viewed

The diff for this file is too large to render. See raw diff

Sepformer/results/sepformer_4mix/1234/test_results.csv DELETED Viewed

@@ -1,6 +0,0 @@
-snt_id,sdr,sdr_i,si-snr,si-snr_i
-0,-0.7558969463329976,9.341916369626974,-15.376874923706055,5.828725814819336
-1,-1.0032419873910463,9.161513886510548,-15.261016845703125,5.546741485595703
-2,-5.201884601728196,4.524667155561474,-15.91262149810791,6.88614559173584
-3,-3.896726451280197,4.951401911520096,-15.797815322875977,5.112443923950195
-4,-3.685112954805245,6.856254410975225,-22.656341552734375,13.753952026367188

Sepformer/results/sepformer_4mix/1234/train.py DELETED Viewed

@@ -1,666 +0,0 @@
-#!/usr/bin/env/python3
-"""Recipe for training a neural speech separation system on Libri2/3Mix datasets.
-The system employs an encoder, a decoder, and a masking network.
-To run this recipe, do the following:
-> python train.py hparams/sepformer-libri2mix.yaml
-> python train.py hparams/sepformer-libri3mix.yaml
-The experiment file is flexible enough to support different neural
-networks. By properly changing the parameter files, you can try
-different architectures. The script supports both libri2mix and
-libri3mix.
-# 4-mix 主要根据 num_spks 修改 train.py 和 config
-Authors
- * Cem Subakan 2020
- * Mirco Ravanelli 2020
- * Samuele Cornell 2020
- * Mirko Bronzi 2020
- * Jianyuan Zhong 2020
-"""
-import csv
-import os
-import sys
-import numpy as np
-import torch
-import torch.nn.functional as F
-import torchaudio
-from hyperpyyaml import load_hyperpyyaml
-from tqdm import tqdm
-import speechbrain as sb
-import speechbrain.nnet.schedulers as schedulers
-from speechbrain.utils.distributed import run_on_main
-from speechbrain.utils.logger import get_logger
-logger = get_logger(__name__)
-# Define training procedure
-class Separation(sb.Brain):
-    def compute_forward(self, mix, targets, stage, noise=None):
-        """Forward computations from the mixture to the separated signals."""
-        # Unpack lists and put tensors in the right device
-        mix, mix_lens = mix
-        mix, mix_lens = mix.to(self.device), mix_lens.to(self.device)
-        # Convert targets to tensor
-        targets = torch.cat(
-            [targets[i][0].unsqueeze(-1) for i in range(self.hparams.num_spks)],
-            dim=-1,
-        ).to(self.device)
-        # Add speech distortions
-        if stage == sb.Stage.TRAIN:
-            with torch.no_grad():
-                if self.hparams.use_speedperturb or self.hparams.use_rand_shift:
-                    mix, targets = self.add_speed_perturb(targets, mix_lens)
-                    mix = targets.sum(-1)
-                    if self.hparams.use_wham_noise:
-                        noise = noise.to(self.device)
-                        len_noise = noise.shape[1]
-                        len_mix = mix.shape[1]
-                        min_len = min(len_noise, len_mix)
-                        # add the noise
-                        mix = mix[:, :min_len] + noise[:, :min_len]
-                        # fix the length of targets also
-                        targets = targets[:, :min_len, :]
-                if self.hparams.use_wavedrop:
-                    mix = self.hparams.drop_chunk(mix, mix_lens)
-                    mix = self.hparams.drop_freq(mix)
-                if self.hparams.limit_training_signal_len:
-                    mix, targets = self.cut_signals(mix, targets)
-        # Separation
-        mix_w = self.hparams.Encoder(mix)
-        est_mask = self.hparams.MaskNet(mix_w)
-        mix_w = torch.stack([mix_w] * self.hparams.num_spks)
-        sep_h = mix_w * est_mask
-        # Decoding
-        est_source = torch.cat(
-            [
-                self.hparams.Decoder(sep_h[i]).unsqueeze(-1)
-                for i in range(self.hparams.num_spks)
-            ],
-            dim=-1,
-        )
-        # T changed after conv1d in encoder, fix it here
-        T_origin = mix.size(1)
-        T_est = est_source.size(1)
-        if T_origin > T_est:
-            est_source = F.pad(est_source, (0, 0, 0, T_origin - T_est))
-        else:
-            est_source = est_source[:, :T_origin, :]
-        return est_source, targets
-    def compute_objectives(self, predictions, targets):
-        """Computes the si-snr loss"""
-        return self.hparams.loss(targets, predictions)
-    def fit_batch(self, batch):
-        """Trains one batch"""
-        # Unpacking batch list
-        mixture = batch.mix_sig
-        targets = [batch.s1_sig, batch.s2_sig]
-        if self.hparams.use_wham_noise:
-            noise = batch.noise_sig[0]
-        else:
-            noise = None
-        if self.hparams.num_spks == 3:
-            targets.append(batch.s3_sig)
-        if self.hparams.num_spks == 4:
-            targets.append(batch.s3_sig)
-            targets.append(batch.s4_sig)
-        with self.training_ctx:
-            predictions, targets = self.compute_forward(
-                mixture, targets, sb.Stage.TRAIN, noise
-            )
-            loss = self.compute_objectives(predictions, targets)
-            # hard threshold the easy dataitems
-            if self.hparams.threshold_byloss:
-                th = self.hparams.threshold
-                loss = loss[loss > th]
-                if loss.nelement() > 0:
-                    loss = loss.mean()
-            else:
-                loss = loss.mean()
-        if loss.nelement() > 0 and loss < self.hparams.loss_upper_lim:
-            self.scaler.scale(loss).backward()
-            if self.hparams.clip_grad_norm >= 0:
-                self.scaler.unscale_(self.optimizer)
-                torch.nn.utils.clip_grad_norm_(
-                    self.modules.parameters(),
-                    self.hparams.clip_grad_norm,
-                )
-            self.scaler.step(self.optimizer)
-            self.scaler.update()
-        else:
-            self.nonfinite_count += 1
-            logger.info(
-                "infinite loss or empty loss! it happened {} times so far - skipping this batch".format(
-                    self.nonfinite_count
-                )
-            )
-            loss.data = torch.tensor(0.0).to(self.device)
-        self.optimizer.zero_grad()
-        return loss.detach().cpu()
-    def evaluate_batch(self, batch, stage):
-        """Computations needed for validation/test batches"""
-        snt_id = batch.id
-        mixture = batch.mix_sig
-        targets = [batch.s1_sig, batch.s2_sig]
-        if self.hparams.num_spks == 3:
-            targets.append(batch.s3_sig)
-        if self.hparams.num_spks == 4:
-            targets.append(batch.s3_sig)
-            targets.append(batch.s4_sig)
-        with torch.no_grad():
-            predictions, targets = self.compute_forward(mixture, targets, stage)
-            loss = self.compute_objectives(predictions, targets)
-        # Manage audio file saving
-        if stage == sb.Stage.TEST and self.hparams.save_audio:
-            if hasattr(self.hparams, "n_audio_to_save"):
-                if self.hparams.n_audio_to_save > 0:
-                    self.save_audio(snt_id[0], mixture, targets, predictions)
-                    self.hparams.n_audio_to_save += -1
-            else:
-                self.save_audio(snt_id[0], mixture, targets, predictions)
-        return loss.mean().detach()
-    def on_stage_end(self, stage, stage_loss, epoch):
-        """Gets called at the end of a epoch."""
-        # Compute/store important stats
-        stage_stats = {"si-snr": stage_loss}
-        if stage == sb.Stage.TRAIN:
-            self.train_stats = stage_stats
-        # Perform end-of-iteration things, like annealing, logging, etc.
-        if stage == sb.Stage.VALID:
-            # Learning rate annealing
-            if isinstance(
-                self.hparams.lr_scheduler, schedulers.ReduceLROnPlateau
-            ):
-                current_lr, next_lr = self.hparams.lr_scheduler(
-                    [self.optimizer], epoch, stage_loss
-                )
-                schedulers.update_learning_rate(self.optimizer, next_lr)
-            else:
-                # if we do not use the reducelronplateau, we do not change the lr
-                current_lr = self.hparams.optimizer.optim.param_groups[0]["lr"]
-            self.hparams.train_logger.log_stats(
-                stats_meta={"epoch": epoch, "lr": current_lr},
-                train_stats=self.train_stats,
-                valid_stats=stage_stats,
-            )
-            self.checkpointer.save_and_keep_only(
-                meta={"si-snr": stage_stats["si-snr"]},
-                min_keys=["si-snr"],
-            )
-        elif stage == sb.Stage.TEST:
-            self.hparams.train_logger.log_stats(
-                stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
-                test_stats=stage_stats,
-            )
-    def add_speed_perturb(self, targets, targ_lens):
-        """Adds speed perturbation and random_shift to the input signals"""
-        min_len = -1
-        recombine = False
-        if self.hparams.use_speedperturb:
-            # Performing speed change (independently on each source)
-            new_targets = []
-            recombine = True
-            for i in range(targets.shape[-1]):
-                new_target = self.hparams.speed_perturb(targets[:, :, i])
-                new_targets.append(new_target)
-                if i == 0:
-                    min_len = new_target.shape[-1]
-                else:
-                    if new_target.shape[-1] < min_len:
-                        min_len = new_target.shape[-1]
-            if self.hparams.use_rand_shift:
-                # Performing random_shift (independently on each source)
-                recombine = True
-                for i in range(targets.shape[-1]):
-                    rand_shift = torch.randint(
-                        self.hparams.min_shift, self.hparams.max_shift, (1,)
-                    )
-                    new_targets[i] = new_targets[i].to(self.device)
-                    new_targets[i] = torch.roll(
-                        new_targets[i], shifts=(rand_shift[0],), dims=1
-                    )
-            # Re-combination
-            if recombine:
-                if self.hparams.use_speedperturb:
-                    targets = torch.zeros(
-                        targets.shape[0],
-                        min_len,
-                        targets.shape[-1],
-                        device=targets.device,
-                        dtype=torch.float,
-                    )
-                for i, new_target in enumerate(new_targets):
-                    targets[:, :, i] = new_targets[i][:, 0:min_len]
-        mix = targets.sum(-1)
-        return mix, targets
-    def cut_signals(self, mixture, targets):
-        """This function selects a random segment of a given length within the mixture.
-        The corresponding targets are selected accordingly"""
-        randstart = torch.randint(
-            0,
-            1 + max(0, mixture.shape[1] - self.hparams.training_signal_len),
-            (1,),
-        ).item()
-        targets = targets[
-            :, randstart : randstart + self.hparams.training_signal_len, :
-        ]
-        mixture = mixture[
-            :, randstart : randstart + self.hparams.training_signal_len
-        ]
-        return mixture, targets
-    def reset_layer_recursively(self, layer):
-        """Reinitializes the parameters of the neural networks"""
-        if hasattr(layer, "reset_parameters"):
-            layer.reset_parameters()
-        for child_layer in layer.modules():
-            if layer != child_layer:
-                self.reset_layer_recursively(child_layer)
-    def save_results(self, test_data):
-        """This script computes the SDR and SI-SNR metrics and saves
-        them into a csv file"""
-        # This package is required for SDR computation
-        from mir_eval.separation import bss_eval_sources
-        # Create folders where to store audio
-        save_file = os.path.join(self.hparams.output_folder, "test_results.csv")
-        # Variable init
-        all_sdrs = []
-        all_sdrs_i = []
-        all_sisnrs = []
-        all_sisnrs_i = []
-        csv_columns = ["snt_id", "sdr", "sdr_i", "si-snr", "si-snr_i"]
-        test_loader = sb.dataio.dataloader.make_dataloader(
-            test_data, **self.hparams.dataloader_opts
-        )
-        with open(save_file, "w", newline="", encoding="utf-8") as results_csv:
-            writer = csv.DictWriter(results_csv, fieldnames=csv_columns)
-            writer.writeheader()
-            # Loop over all test sentence
-            with tqdm(test_loader, dynamic_ncols=True) as t:
-                for i, batch in enumerate(t):
-                    # Apply Separation
-                    mixture, mix_len = batch.mix_sig
-                    snt_id = batch.id
-                    targets = [batch.s1_sig, batch.s2_sig]
-                    if self.hparams.num_spks == 3:
-                        targets.append(batch.s3_sig)
-                    if self.hparams.num_spks == 4:
-                        targets.append(batch.s3_sig)
-                        targets.append(batch.s4_sig)
-                    with torch.no_grad():
-                        predictions, targets = self.compute_forward(
-                            batch.mix_sig, targets, sb.Stage.TEST
-                        )
-                    # Compute SI-SNR
-                    sisnr = self.compute_objectives(predictions, targets)
-                    # Compute SI-SNR improvement
-                    mixture_signal = torch.stack(
-                        [mixture] * self.hparams.num_spks, dim=-1
-                    )
-                    mixture_signal = mixture_signal.to(targets.device)
-                    sisnr_baseline = self.compute_objectives(
-                        mixture_signal, targets
-                    )
-                    sisnr_i = sisnr - sisnr_baseline
-                    # Compute SDR
-                    sdr, _, _, _ = bss_eval_sources(
-                        targets[0].t().cpu().numpy(),
-                        predictions[0].t().detach().cpu().numpy(),
-                    )
-                    sdr_baseline, _, _, _ = bss_eval_sources(
-                        targets[0].t().cpu().numpy(),
-                        mixture_signal[0].t().detach().cpu().numpy(),
-                    )
-                    sdr_i = sdr.mean() - sdr_baseline.mean()
-                    # Saving on a csv file
-                    row = {
-                        "snt_id": snt_id[0],
-                        "sdr": sdr.mean(),
-                        "sdr_i": sdr_i,
-                        "si-snr": -sisnr.item(),
-                        "si-snr_i": -sisnr_i.item(),
-                    }
-                    writer.writerow(row)
-                    # Metric Accumulation
-                    all_sdrs.append(sdr.mean())
-                    all_sdrs_i.append(sdr_i.mean())
-                    all_sisnrs.append(-sisnr.item())
-                    all_sisnrs_i.append(-sisnr_i.item())
-                row = {
-                    "snt_id": "avg",
-                    "sdr": np.array(all_sdrs).mean(),
-                    "sdr_i": np.array(all_sdrs_i).mean(),
-                    "si-snr": np.array(all_sisnrs).mean(),
-                    "si-snr_i": np.array(all_sisnrs_i).mean(),
-                }
-                writer.writerow(row)
-        logger.info("Mean SISNR is {}".format(np.array(all_sisnrs).mean()))
-        logger.info("Mean SISNRi is {}".format(np.array(all_sisnrs_i).mean()))
-        logger.info("Mean SDR is {}".format(np.array(all_sdrs).mean()))
-        logger.info("Mean SDRi is {}".format(np.array(all_sdrs_i).mean()))
-    def save_audio(self, snt_id, mixture, targets, predictions):
-        "saves the test audio (mixture, targets, and estimated sources) on disk"
-        # Create output folder
-        save_path = os.path.join(self.hparams.save_folder, "audio_results")
-        if not os.path.exists(save_path):
-            os.mkdir(save_path)
-        for ns in range(self.hparams.num_spks):
-            # Estimated source
-            signal = predictions[0, :, ns]
-            signal = signal / signal.abs().max()
-            save_file = os.path.join(
-                save_path, "item{}_source{}hat.wav".format(snt_id, ns + 1)
-            )
-            torchaudio.save(
-                save_file, signal.unsqueeze(0).cpu(), self.hparams.sample_rate
-            )
-            # Original source
-            signal = targets[0, :, ns]
-            signal = signal / signal.abs().max()
-            save_file = os.path.join(
-                save_path, "item{}_source{}.wav".format(snt_id, ns + 1)
-            )
-            torchaudio.save(
-                save_file, signal.unsqueeze(0).cpu(), self.hparams.sample_rate
-            )
-        # Mixture
-        signal = mixture[0][0, :]
-        signal = signal / signal.abs().max()
-        save_file = os.path.join(save_path, "item{}_mix.wav".format(snt_id))
-        torchaudio.save(
-            save_file, signal.unsqueeze(0).cpu(), self.hparams.sample_rate
-        )
-def dataio_prep(hparams):
-    """Creates data processing pipeline"""
-    # 1. Define datasets
-    train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
-        csv_path=hparams["train_data"],
-        replacements={"data_root": hparams["data_folder"]},
-    )
-    valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
-        csv_path=hparams["valid_data"],
-        replacements={"data_root": hparams["data_folder"]},
-    )
-    test_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
-        csv_path=hparams["test_data"],
-        replacements={"data_root": hparams["data_folder"]},
-    )
-    datasets = [train_data, valid_data, test_data]
-    # 2. Provide audio pipelines
-    @sb.utils.data_pipeline.takes("mix_wav")
-    @sb.utils.data_pipeline.provides("mix_sig")
-    def audio_pipeline_mix(mix_wav):
-        mix_sig = sb.dataio.dataio.read_audio(mix_wav)
-        return mix_sig
-    @sb.utils.data_pipeline.takes("s1_wav")
-    @sb.utils.data_pipeline.provides("s1_sig")
-    def audio_pipeline_s1(s1_wav):
-        s1_sig = sb.dataio.dataio.read_audio(s1_wav)
-        return s1_sig
-    @sb.utils.data_pipeline.takes("s2_wav")
-    @sb.utils.data_pipeline.provides("s2_sig")
-    def audio_pipeline_s2(s2_wav):
-        s2_sig = sb.dataio.dataio.read_audio(s2_wav)
-        return s2_sig
-    # --- 如果说话人 >= 3，定义第 3 路 ---
-    if hparams["num_spks"] >= 3:
-        @sb.utils.data_pipeline.takes("s3_wav")
-        @sb.utils.data_pipeline.provides("s3_sig")
-        def audio_pipeline_s3(s3_wav):
-            return sb.dataio.dataio.read_audio(s3_wav)
-    # --- 如果说话人 == 4，定义第 4 路 ---
-    if hparams["num_spks"] == 4:
-        @sb.utils.data_pipeline.takes("s4_wav")
-        @sb.utils.data_pipeline.provides("s4_sig")
-        def audio_pipeline_s4(s4_wav):
-            return sb.dataio.dataio.read_audio(s4_wav)
-    if hparams["use_wham_noise"]:
-        @sb.utils.data_pipeline.takes("noise_wav")
-        @sb.utils.data_pipeline.provides("noise_sig")
-        def audio_pipeline_noise(noise_wav):
-            noise_sig = sb.dataio.dataio.read_audio(noise_wav)
-            return noise_sig
-    sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_mix)
-    sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s1)
-    sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s2)
-    if hparams["num_spks"] == 3:
-        sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s3)
-    if hparams["num_spks"] == 4:
-        sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s3)
-        sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s4)
-    if hparams["use_wham_noise"]:
-        print("Using the WHAM! noise in the data pipeline")
-        sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_noise)
-    if (hparams["num_spks"] == 2) and hparams["use_wham_noise"]:
-        sb.dataio.dataset.set_output_keys(
-            datasets, ["id", "mix_sig", "s1_sig", "s2_sig", "noise_sig"]
-        )
-    elif (hparams["num_spks"] == 3) and hparams["use_wham_noise"]:
-        sb.dataio.dataset.set_output_keys(
-            datasets,
-            ["id", "mix_sig", "s1_sig", "s2_sig", "s3_sig", "noise_sig"],
-        )
-    elif (hparams["num_spks"] == 4) and hparams["use_wham_noise"]:
-        sb.dataio.dataset.set_output_keys(
-            datasets,
-            ["id", "mix_sig", "s1_sig", "s2_sig", "s3_sig", "s4_sig", "noise_sig"],
-        )
-    elif (hparams["num_spks"] == 4) and not hparams["use_wham_noise"]:
-        sb.dataio.dataset.set_output_keys(
-            datasets,
-            ["id", "mix_sig", "s1_sig", "s2_sig", "s3_sig", "s4_sig"],
-        )
-    elif (hparams["num_spks"] == 2) and not hparams["use_wham_noise"]:
-        sb.dataio.dataset.set_output_keys(
-            datasets, ["id", "mix_sig", "s1_sig", "s2_sig"]
-        )
-    else:
-        sb.dataio.dataset.set_output_keys(
-            datasets, ["id", "mix_sig", "s1_sig", "s2_sig", "s3_sig"]
-        )
-    return train_data, valid_data, test_data
-if __name__ == "__main__":
-    # Load hyperparameters file with command-line overrides
-    hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
-    with open(hparams_file, encoding="utf-8") as fin:
-        hparams = load_hyperpyyaml(fin, overrides)
-    # Initialize ddp (useful only for multi-GPU DDP training)
-    sb.utils.distributed.ddp_init_group(run_opts)
-    # Create experiment directory
-    sb.create_experiment_directory(
-        experiment_directory=hparams["output_folder"],
-        hyperparams_to_save=hparams_file,
-        overrides=overrides,
-    )
-    # Check if wsj0_tr is set with dynamic mixing
-    if hparams["dynamic_mixing"] and not os.path.exists(
-        hparams["base_folder_dm"]
-    ):
-        raise ValueError(
-            "Please, specify a valid base_folder_dm folder when using dynamic mixing"
-        )
-    # Update precision to bf16 if the device is CPU and precision is fp16
-    if run_opts.get("device") == "cpu" and hparams.get("precision") == "fp16":
-        hparams["precision"] = "bf16"
-    # Create dataset objects
-    if hparams["dynamic_mixing"]:
-        from dynamic_mixing import (
-            dynamic_mix_data_prep_librimix as dynamic_mix_data_prep,
-        )
-        # if the base_folder for dm is not processed, preprocess them
-        if "processed" not in hparams["base_folder_dm"]:
-            # if the processed folder already exists we just use it otherwise we do the preprocessing
-            if not os.path.exists(
-                os.path.normpath(hparams["base_folder_dm"]) + "_processed"
-            ):
-                from recipes.LibriMix.meta.preprocess_dynamic_mixing import (
-                    resample_folder,
-                )
-                print("Resampling the base folder")
-                run_on_main(
-                    resample_folder,
-                    kwargs={
-                        "input_folder": hparams["base_folder_dm"],
-                        "output_folder": os.path.normpath(
-                            hparams["base_folder_dm"]
-                        )
-                        + "_processed",
-                        "fs": hparams["sample_rate"],
-                        "regex": "**/*.flac",
-                    },
-                )
-                # adjust the base_folder_dm path
-                hparams["base_folder_dm"] = (
-                    os.path.normpath(hparams["base_folder_dm"]) + "_processed"
-                )
-            else:
-                print(
-                    "Using the existing processed folder on the same directory as base_folder_dm"
-                )
-                hparams["base_folder_dm"] = (
-                    os.path.normpath(hparams["base_folder_dm"]) + "_processed"
-                )
-        dm_hparams = {
-            "train_data": hparams["train_data"],
-            "data_folder": hparams["data_folder"],
-            "base_folder_dm": hparams["base_folder_dm"],
-            "sample_rate": hparams["sample_rate"],
-            "num_spks": hparams["num_spks"],
-            "training_signal_len": hparams["training_signal_len"],
-            "dataloader_opts": hparams["dataloader_opts"],
-        }
-        train_data = dynamic_mix_data_prep(dm_hparams)
-        _, valid_data, test_data = dataio_prep(hparams)
-    else:
-        train_data, valid_data, test_data = dataio_prep(hparams)
-    # Load pretrained model if pretrained_separator is present in the yaml
-    if "pretrained_separator" in hparams:
-        run_on_main(hparams["pretrained_separator"].collect_files)
-        hparams["pretrained_separator"].load_collected()
-    # Brain class initialization
-    separator = Separation(
-        modules=hparams["modules"],
-        opt_class=hparams["optimizer"],
-        hparams=hparams,
-        run_opts=run_opts,
-        checkpointer=hparams["checkpointer"],
-    )
-    # re-initialize the parameters if we don't use a pretrained model
-    if "pretrained_separator" not in hparams:
-        for module in separator.modules.values():
-            separator.reset_layer_recursively(module)
-    # Training
-    separator.fit(
-        separator.hparams.epoch_counter,
-        train_data,
-        valid_data,
-        train_loader_kwargs=hparams["dataloader_opts"],
-        valid_loader_kwargs=hparams["dataloader_opts"],
-    )
-    # Eval
-    separator.evaluate(test_data, min_key="si-snr")
-    separator.save_results(test_data)

Sepformer/results/sepformer_4mix/1234/train_log.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- Epoch loaded: 48 - test si-snr: 20.60