swc2 commited on
Commit
36e02be
·
1 Parent(s): bcc11b4

update model

Browse files
Files changed (27) hide show
  1. Sepformer/results/sepformer_4mix/1234/env.log +0 -90
  2. Sepformer/results/sepformer_4mix/1234/hyperparams.yaml +0 -198
  3. Sepformer/results/sepformer_4mix/1234/log.txt +0 -762
  4. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/CKPT.yaml +0 -4
  5. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/brain.ckpt +0 -3
  6. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/counter.ckpt +0 -3
  7. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/dataloader-TRAIN.ckpt +0 -3
  8. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/decoder.ckpt +0 -3
  9. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/encoder.ckpt +0 -3
  10. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/masknet.ckpt +0 -3
  11. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/optimizer.ckpt +0 -3
  12. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/scaler.ckpt +0 -3
  13. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/CKPT.yaml +0 -4
  14. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/brain.ckpt +0 -3
  15. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/counter.ckpt +0 -3
  16. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/dataloader-TRAIN.ckpt +0 -3
  17. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/decoder.ckpt +0 -3
  18. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/encoder.ckpt +0 -3
  19. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/masknet.ckpt +0 -3
  20. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/optimizer.ckpt +0 -3
  21. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/scaler.ckpt +0 -3
  22. Sepformer/results/sepformer_4mix/1234/save/record_tr.csv +0 -0
  23. Sepformer/results/sepformer_4mix/1234/save/record_val.csv +0 -0
  24. Sepformer/results/sepformer_4mix/1234/save/test_data.csv +0 -0
  25. Sepformer/results/sepformer_4mix/1234/test_results.csv +0 -6
  26. Sepformer/results/sepformer_4mix/1234/train.py +0 -666
  27. Sepformer/results/sepformer_4mix/1234/train_log.txt +0 -1
Sepformer/results/sepformer_4mix/1234/env.log DELETED
@@ -1,90 +0,0 @@
1
- SpeechBrain system description
2
- ==============================
3
- Python version:
4
- 3.11.13 (main, Jun 5 2025, 13:12:00) [GCC 11.2.0]
5
- ==============================
6
- Installed Python packages:
7
- black==24.3.0
8
- certifi==2025.6.15
9
- cfgv==3.4.0
10
- charset-normalizer==3.4.2
11
- click==8.1.7
12
- distlib==0.3.9
13
- docstring_parser_fork==0.0.12
14
- filelock==3.18.0
15
- flake8==7.0.0
16
- fsspec==2025.5.1
17
- future==1.0.0
18
- hf-xet==1.1.5
19
- huggingface-hub==0.33.1
20
- HyperPyYAML==1.2.2
21
- identify==2.6.12
22
- idna==3.10
23
- iniconfig==2.1.0
24
- isort==5.13.2
25
- Jinja2==3.1.6
26
- joblib==1.5.1
27
- MarkupSafe==3.0.2
28
- mccabe==0.7.0
29
- mir_eval==0.6
30
- mpmath==1.3.0
31
- mypy_extensions==1.1.0
32
- networkx==3.5
33
- nodeenv==1.9.1
34
- numpy==2.3.1
35
- nvidia-cublas-cu12==12.6.4.1
36
- nvidia-cuda-cupti-cu12==12.6.80
37
- nvidia-cuda-nvrtc-cu12==12.6.77
38
- nvidia-cuda-runtime-cu12==12.6.77
39
- nvidia-cudnn-cu12==9.5.1.17
40
- nvidia-cufft-cu12==11.3.0.4
41
- nvidia-cufile-cu12==1.11.1.6
42
- nvidia-curand-cu12==10.3.7.77
43
- nvidia-cusolver-cu12==11.7.1.2
44
- nvidia-cusparse-cu12==12.5.4.2
45
- nvidia-cusparselt-cu12==0.6.3
46
- nvidia-nccl-cu12==2.26.2
47
- nvidia-nvjitlink-cu12==12.6.85
48
- nvidia-nvtx-cu12==12.6.77
49
- packaging==25.0
50
- pandas==2.3.0
51
- pathspec==0.12.1
52
- platformdirs==4.3.8
53
- pluggy==1.6.0
54
- pre_commit==4.2.0
55
- pycodestyle==2.11.0
56
- pydoclint==0.4.1
57
- pyflakes==3.2.0
58
- pygtrie==2.5.0
59
- pyloudnorm==0.1.1
60
- pytest==7.4.0
61
- python-dateutil==2.9.0.post0
62
- pytz==2025.2
63
- PyYAML==6.0.2
64
- regex==2024.11.6
65
- requests==2.32.4
66
- ruamel.yaml==0.18.14
67
- ruamel.yaml.clib==0.2.12
68
- safetensors==0.5.3
69
- scipy==1.16.0
70
- sentencepiece==0.2.0
71
- six==1.17.0
72
- speechbrain==1.0.3
73
- sympy==1.14.0
74
- tokenizers==0.21.2
75
- torch==2.7.1
76
- torchaudio==2.7.1
77
- tqdm==4.67.1
78
- transformers==4.53.0
79
- triton==3.3.1
80
- typing_extensions==4.14.0
81
- tzdata==2025.2
82
- urllib3==2.5.0
83
- virtualenv==20.31.2
84
- yamllint==1.35.1
85
- ==============================
86
- Git revision:
87
- 476ac4f
88
- ==============================
89
- CUDA version:
90
- 12.6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Sepformer/results/sepformer_4mix/1234/hyperparams.yaml DELETED
@@ -1,198 +0,0 @@
1
- # Generated 2025-06-27 from:
2
- # /home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/hparams/sepformer_4mix.yaml
3
- # yamllint disable
4
- # ################################
5
- # Model: SepFormer for source separation
6
- # https://arxiv.org/abs/2010.13154
7
- # ################################
8
- #
9
- # Basic parameters
10
- # Seed needs to be set at top of yaml, before objects with parameters are made
11
- #
12
- seed: 1234
13
- __set_seed: !apply:speechbrain.utils.seed_everything [1234]
14
-
15
- # Data params
16
-
17
- # e.g. '/yourpath/Libri3Mix/train-clean-360/'
18
- # the data folder is needed even if dynamic mixing is applied
19
- data_folder: /data/
20
-
21
- # This is needed only if dynamic mixing is applied
22
- base_folder_dm: /yourpath/
23
-
24
- experiment_name: sepformer_4mix
25
- output_folder: results/sepformer_4mix/1234
26
- train_log: results/sepformer_4mix/1234/train_log.txt
27
- save_folder: results/sepformer_4mix/1234/save
28
- train_data: results/sepformer_4mix/1234/save/record_tr.csv
29
- valid_data: results/sepformer_4mix/1234/save/record_val.csv
30
- test_data: results/sepformer_4mix/1234/save/test_data.csv
31
- skip_prep: false
32
-
33
- ckpt_interval_minutes: 60
34
-
35
- # Experiment params
36
- precision: fp16 # bf16, fp16 or fp32 # Set it to True for mixed precision
37
- num_spks: 4
38
- noprogressbar: false
39
- save_audio: false # Save estimated sources on disk
40
- sample_rate: 16000
41
-
42
- ####################### Training Parameters ####################################
43
- N_epochs: 200
44
- batch_size: 1
45
- lr: 0.00015
46
- clip_grad_norm: 5
47
- loss_upper_lim: 999999 # this is the upper limit for an acceptable loss
48
- # if True, the training sequences are cut to a specified length
49
- limit_training_signal_len: true
50
- # this is the length of sequences if we choose to limit
51
- # the signal length of training sequences
52
- training_signal_len: 64000000
53
-
54
- # Set it to True to dynamically create mixtures at training time
55
- dynamic_mixing: false
56
- use_wham_noise: false
57
-
58
- # Parameters for data augmentation
59
- use_wavedrop: false
60
- use_speedperturb: true
61
- use_rand_shift: false
62
- min_shift: -8000
63
- max_shift: 8000
64
-
65
- # Speed perturbation
66
- speed_changes: &id001 [95, 100, 105]
67
-
68
- # Frequency drop: randomly drops a number of frequency bands to zero.
69
- speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
70
- orig_freq: 16000
71
- speeds: *id001
72
- drop_freq_low: 0 # Min frequency band dropout probability
73
- drop_freq_high: 1 # Max frequency band dropout probability
74
- drop_freq_count_low: 1 # Min number of frequency bands to drop
75
- drop_freq_count_high: 3 # Max number of frequency bands to drop
76
- drop_freq_width: 0.05 # Width of frequency bands to drop
77
-
78
- drop_freq: !new:speechbrain.augment.time_domain.DropFreq
79
- drop_freq_low: 0
80
- drop_freq_high: 1
81
- drop_freq_count_low: 1
82
- drop_freq_count_high: 3
83
- drop_freq_width: 0.05
84
-
85
- # Time drop: randomly drops a number of temporal chunks.
86
- drop_chunk_count_low: 1 # Min number of audio chunks to drop
87
- drop_chunk_count_high: 5 # Max number of audio chunks to drop
88
- drop_chunk_length_low: 1000 # Min length of audio chunks to drop
89
- drop_chunk_length_high: 2000 # Max length of audio chunks to drop
90
-
91
- drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
92
- drop_length_low: 1000
93
- drop_length_high: 2000
94
- drop_count_low: 1
95
- drop_count_high: 5
96
-
97
- # loss thresholding -- this thresholds the training loss
98
- threshold_byloss: true
99
- threshold: -30
100
-
101
- # Encoder parameters
102
- N_encoder_out: 256
103
- out_channels: 256
104
- kernel_size: 32
105
- kernel_stride: 16
106
- d_ffn: 1024
107
-
108
- # Dataloader options
109
- dataloader_opts:
110
- batch_size: 1
111
- num_workers: 3
112
-
113
-
114
- # Specifying the network
115
- Encoder: &id004 !new:speechbrain.lobes.models.dual_path.Encoder
116
- kernel_size: 32
117
- out_channels: 256
118
-
119
-
120
- SBtfintra: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
121
- num_layers: 8
122
- d_model: 256
123
- nhead: 8
124
- d_ffn: 1024
125
- dropout: 0
126
- use_positional_encoding: true
127
- norm_before: true
128
-
129
- SBtfinter: &id003 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
130
- num_layers: 8
131
- d_model: 256
132
- nhead: 8
133
- d_ffn: 1024
134
- dropout: 0
135
- use_positional_encoding: true
136
- norm_before: true
137
-
138
- MaskNet: &id006 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
139
-
140
- num_spks: 4
141
- in_channels: 256
142
- out_channels: 256
143
- num_layers: 2
144
- K: 250
145
- intra_model: *id002
146
- inter_model: *id003
147
- norm: ln
148
- linear_layer_after_inter_intra: false
149
- skip_around_intra: true
150
-
151
- Decoder: &id005 !new:speechbrain.lobes.models.dual_path.Decoder
152
- in_channels: 256
153
- out_channels: 1
154
- kernel_size: 32
155
- stride: 16
156
- bias: false
157
-
158
- optimizer: !name:torch.optim.Adam
159
- lr: 0.00015
160
- weight_decay: 0
161
-
162
- loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
163
-
164
- lr_scheduler: !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
165
- factor: 0.5
166
- patience: 2
167
- dont_halve_until_epoch: 5
168
-
169
- epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter
170
- # lr_scheduler: !ref <lr_scheduler>
171
-
172
- limit: 200
173
-
174
- modules:
175
- encoder: *id004
176
- decoder: *id005
177
- masknet: *id006
178
- checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
179
- checkpoints_dir: results/sepformer_4mix/1234/save
180
- recoverables:
181
- encoder: *id004
182
- decoder: *id005
183
- masknet: *id006
184
- counter: *id007
185
- train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
186
- save_file: results/sepformer_4mix/1234/train_log.txt
187
-
188
- # # If you do not want to use the pretrained separator you can simply delete pretrained_separator field.
189
- # pretrained_separator: !new:speechbrain.utils.parameter_transfer.Pretrainer
190
- # collect_in: !ref <save_folder>
191
- # loadables:
192
- # encoder: !ref <Encoder>
193
- # decoder: !ref <Decoder>
194
- # masknet: !ref <MaskNet>
195
- # paths:
196
- # encoder: speechbrain/sepformer-wsj03mix/encoder.ckpt
197
- # decoder: speechbrain/sepformer-wsj03mix/decoder.ckpt
198
- # masknet: speechbrain/sepformer-wsj03mix/masknet.ckpt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Sepformer/results/sepformer_4mix/1234/log.txt DELETED
@@ -1,762 +0,0 @@
1
- 2025-06-27 17:13:10,582 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
2
- 2025-06-27 17:13:10,583 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
3
- 2025-06-27 17:13:10,583 - speechbrain.core - INFO - Beginning experiment!
4
- 2025-06-27 17:13:10,583 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
5
- 2025-06-27 17:13:10,831 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
6
- certifi==2025.6.15
7
- cfgv==3.4.0
8
- charset-normalizer==3.4.2
9
- click==8.1.7
10
- distlib==0.3.9
11
- docstring_parser_fork==0.0.12
12
- filelock==3.18.0
13
- flake8==7.0.0
14
- fsspec==2025.5.1
15
- future==1.0.0
16
- hf-xet==1.1.5
17
- huggingface-hub==0.33.0
18
- HyperPyYAML==1.2.2
19
- identify==2.6.12
20
- idna==3.10
21
- iniconfig==2.1.0
22
- isort==5.13.2
23
- Jinja2==3.1.6
24
- joblib==1.5.1
25
- MarkupSafe==3.0.2
26
- mccabe==0.7.0
27
- mir_eval==0.6
28
- mpmath==1.3.0
29
- mypy_extensions==1.1.0
30
- networkx==3.5
31
- nodeenv==1.9.1
32
- numpy==2.3.1
33
- nvidia-cublas-cu12==12.6.4.1
34
- nvidia-cuda-cupti-cu12==12.6.80
35
- nvidia-cuda-nvrtc-cu12==12.6.77
36
- nvidia-cuda-runtime-cu12==12.6.77
37
- nvidia-cudnn-cu12==9.5.1.17
38
- nvidia-cufft-cu12==11.3.0.4
39
- nvidia-cufile-cu12==1.11.1.6
40
- nvidia-curand-cu12==10.3.7.77
41
- nvidia-cusolver-cu12==11.7.1.2
42
- nvidia-cusparse-cu12==12.5.4.2
43
- nvidia-cusparselt-cu12==0.6.3
44
- nvidia-nccl-cu12==2.26.2
45
- nvidia-nvjitlink-cu12==12.6.85
46
- nvidia-nvtx-cu12==12.6.77
47
- packaging==25.0
48
- pandas==2.3.0
49
- pathspec==0.12.1
50
- platformdirs==4.3.8
51
- pluggy==1.6.0
52
- pre_commit==4.2.0
53
- pycodestyle==2.11.0
54
- pydoclint==0.4.1
55
- pyflakes==3.2.0
56
- Pygments==2.19.2
57
- pygtrie==2.5.0
58
- pyloudnorm==0.1.1
59
- pytest==7.4.0
60
- python-dateutil==2.9.0.post0
61
- pytz==2025.2
62
- PyYAML==6.0.2
63
- regex==2024.11.6
64
- requests==2.32.4
65
- ruamel.yaml==0.18.14
66
- ruamel.yaml.clib==0.2.12
67
- safetensors==0.5.3
68
- scipy==1.16.0
69
- sentencepiece==0.2.0
70
- six==1.17.0
71
- speechbrain==1.0.3
72
- sympy==1.14.0
73
- tokenizers==0.21.2
74
- torch==2.7.1
75
- torchaudio==2.7.1
76
- tqdm==4.67.1
77
- transformers==4.52.4
78
- triton==3.3.1
79
- typing_extensions==4.14.0
80
- tzdata==2025.2
81
- urllib3==2.5.0
82
- virtualenv==20.31.2
83
- yamllint==1.35.1
84
-
85
-
86
- 2025-06-27 17:13:10,836 - speechbrain.core - ERROR - Exception:
87
- Traceback (most recent call last):
88
- File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 634, in <module>
89
- train_data, valid_data, test_data = dataio_prep(hparams)
90
- ^^^^^^^^^^^^^^^^^^^^
91
- File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 446, in dataio_prep
92
- train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
93
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
94
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/dataio/dataset.py", line 417, in from_csv
95
- data = load_data_csv(csv_path, replacements)
96
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
97
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/dataio/dataio.py", line 138, in load_data_csv
98
- with open(csv_path, newline="", encoding="utf-8") as csvfile:
99
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
100
- FileNotFoundError: [Errno 2] No such file or directory: 'results/sepformer_4mix/1234/save/record_tr.csv'
101
- 2025-06-27 17:17:17,084 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [allow_tf32, disable_jit_profiling]
102
- 2025-06-27 17:17:17,085 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
103
- 2025-06-27 17:17:17,085 - speechbrain.core - INFO - Beginning experiment!
104
- 2025-06-27 17:17:17,085 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
105
- 2025-06-27 17:17:17,318 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
106
- certifi==2025.6.15
107
- cfgv==3.4.0
108
- charset-normalizer==3.4.2
109
- click==8.1.7
110
- distlib==0.3.9
111
- docstring_parser_fork==0.0.12
112
- filelock==3.18.0
113
- flake8==7.0.0
114
- fsspec==2025.5.1
115
- future==1.0.0
116
- hf-xet==1.1.5
117
- huggingface-hub==0.33.0
118
- HyperPyYAML==1.2.2
119
- identify==2.6.12
120
- idna==3.10
121
- iniconfig==2.1.0
122
- isort==5.13.2
123
- Jinja2==3.1.6
124
- joblib==1.5.1
125
- MarkupSafe==3.0.2
126
- mccabe==0.7.0
127
- mir_eval==0.6
128
- mpmath==1.3.0
129
- mypy_extensions==1.1.0
130
- networkx==3.5
131
- nodeenv==1.9.1
132
- numpy==2.3.1
133
- nvidia-cublas-cu12==12.6.4.1
134
- nvidia-cuda-cupti-cu12==12.6.80
135
- nvidia-cuda-nvrtc-cu12==12.6.77
136
- nvidia-cuda-runtime-cu12==12.6.77
137
- nvidia-cudnn-cu12==9.5.1.17
138
- nvidia-cufft-cu12==11.3.0.4
139
- nvidia-cufile-cu12==1.11.1.6
140
- nvidia-curand-cu12==10.3.7.77
141
- nvidia-cusolver-cu12==11.7.1.2
142
- nvidia-cusparse-cu12==12.5.4.2
143
- nvidia-cusparselt-cu12==0.6.3
144
- nvidia-nccl-cu12==2.26.2
145
- nvidia-nvjitlink-cu12==12.6.85
146
- nvidia-nvtx-cu12==12.6.77
147
- packaging==25.0
148
- pandas==2.3.0
149
- pathspec==0.12.1
150
- platformdirs==4.3.8
151
- pluggy==1.6.0
152
- pre_commit==4.2.0
153
- pycodestyle==2.11.0
154
- pydoclint==0.4.1
155
- pyflakes==3.2.0
156
- Pygments==2.19.2
157
- pygtrie==2.5.0
158
- pyloudnorm==0.1.1
159
- pytest==7.4.0
160
- python-dateutil==2.9.0.post0
161
- pytz==2025.2
162
- PyYAML==6.0.2
163
- regex==2024.11.6
164
- requests==2.32.4
165
- ruamel.yaml==0.18.14
166
- ruamel.yaml.clib==0.2.12
167
- safetensors==0.5.3
168
- scipy==1.16.0
169
- sentencepiece==0.2.0
170
- six==1.17.0
171
- speechbrain==1.0.3
172
- sympy==1.14.0
173
- tokenizers==0.21.2
174
- torch==2.7.1
175
- torchaudio==2.7.1
176
- tqdm==4.67.1
177
- transformers==4.52.4
178
- triton==3.3.1
179
- typing_extensions==4.14.0
180
- tzdata==2025.2
181
- urllib3==2.5.0
182
- virtualenv==20.31.2
183
- yamllint==1.35.1
184
-
185
-
186
- 2025-06-27 17:17:17,325 - speechbrain.core - ERROR - Exception:
187
- Traceback (most recent call last):
188
- File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 634, in <module>
189
- train_data, valid_data, test_data = dataio_prep(hparams)
190
- ^^^^^^^^^^^^^^^^^^^^
191
- File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 446, in dataio_prep
192
- train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
193
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
194
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/dataio/dataset.py", line 417, in from_csv
195
- data = load_data_csv(csv_path, replacements)
196
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
197
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/dataio/dataio.py", line 138, in load_data_csv
198
- with open(csv_path, newline="", encoding="utf-8") as csvfile:
199
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
200
- FileNotFoundError: [Errno 2] No such file or directory: 'results/sepformer_4mix/1234/save/record_tr.csv'
201
- 2025-06-27 17:18:04,558 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
202
- 2025-06-27 17:18:04,559 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
203
- 2025-06-27 17:18:04,559 - speechbrain.core - INFO - Beginning experiment!
204
- 2025-06-27 17:18:04,559 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
205
- 2025-06-27 17:18:04,806 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
206
- certifi==2025.6.15
207
- cfgv==3.4.0
208
- charset-normalizer==3.4.2
209
- click==8.1.7
210
- distlib==0.3.9
211
- docstring_parser_fork==0.0.12
212
- filelock==3.18.0
213
- flake8==7.0.0
214
- fsspec==2025.5.1
215
- future==1.0.0
216
- hf-xet==1.1.5
217
- huggingface-hub==0.33.0
218
- HyperPyYAML==1.2.2
219
- identify==2.6.12
220
- idna==3.10
221
- iniconfig==2.1.0
222
- isort==5.13.2
223
- Jinja2==3.1.6
224
- joblib==1.5.1
225
- MarkupSafe==3.0.2
226
- mccabe==0.7.0
227
- mir_eval==0.6
228
- mpmath==1.3.0
229
- mypy_extensions==1.1.0
230
- networkx==3.5
231
- nodeenv==1.9.1
232
- numpy==2.3.1
233
- nvidia-cublas-cu12==12.6.4.1
234
- nvidia-cuda-cupti-cu12==12.6.80
235
- nvidia-cuda-nvrtc-cu12==12.6.77
236
- nvidia-cuda-runtime-cu12==12.6.77
237
- nvidia-cudnn-cu12==9.5.1.17
238
- nvidia-cufft-cu12==11.3.0.4
239
- nvidia-cufile-cu12==1.11.1.6
240
- nvidia-curand-cu12==10.3.7.77
241
- nvidia-cusolver-cu12==11.7.1.2
242
- nvidia-cusparse-cu12==12.5.4.2
243
- nvidia-cusparselt-cu12==0.6.3
244
- nvidia-nccl-cu12==2.26.2
245
- nvidia-nvjitlink-cu12==12.6.85
246
- nvidia-nvtx-cu12==12.6.77
247
- packaging==25.0
248
- pandas==2.3.0
249
- pathspec==0.12.1
250
- platformdirs==4.3.8
251
- pluggy==1.6.0
252
- pre_commit==4.2.0
253
- pycodestyle==2.11.0
254
- pydoclint==0.4.1
255
- pyflakes==3.2.0
256
- Pygments==2.19.2
257
- pygtrie==2.5.0
258
- pyloudnorm==0.1.1
259
- pytest==7.4.0
260
- python-dateutil==2.9.0.post0
261
- pytz==2025.2
262
- PyYAML==6.0.2
263
- regex==2024.11.6
264
- requests==2.32.4
265
- ruamel.yaml==0.18.14
266
- ruamel.yaml.clib==0.2.12
267
- safetensors==0.5.3
268
- scipy==1.16.0
269
- sentencepiece==0.2.0
270
- six==1.17.0
271
- speechbrain==1.0.3
272
- sympy==1.14.0
273
- tokenizers==0.21.2
274
- torch==2.7.1
275
- torchaudio==2.7.1
276
- tqdm==4.67.1
277
- transformers==4.52.4
278
- triton==3.3.1
279
- typing_extensions==4.14.0
280
- tzdata==2025.2
281
- urllib3==2.5.0
282
- virtualenv==20.31.2
283
- yamllint==1.35.1
284
-
285
-
286
- 2025-06-27 17:18:05,007 - speechbrain.core - INFO - Info: precision arg from hparam file is used
287
- 2025-06-27 17:18:05,008 - speechbrain.core - INFO - Info: noprogressbar arg from hparam file is used
288
- 2025-06-27 17:18:05,008 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
289
- 2025-06-27 17:18:05,221 - speechbrain.core - INFO - Gradscaler enabled: `True`
290
- 2025-06-27 17:18:05,221 - speechbrain.core - INFO - Using training precision: `--precision=fp16`
291
- 2025-06-27 17:18:05,221 - speechbrain.core - INFO - Using evaluation precision: `--eval_precision=fp32`
292
- 2025-06-27 17:18:05,222 - speechbrain.core - INFO - Separation Model Statistics:
293
- * Total Number of Trainable Parameters: 25.8M
294
- * Total Number of Parameters: 25.8M
295
- * Trainable Parameters represent 100.0000% of the total size.
296
- 2025-06-27 17:18:06,855 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00
297
- 2025-06-27 17:18:07,209 - speechbrain.utils.epoch_loop - INFO - Going into epoch 163
298
- 2025-06-27 17:18:10,757 - speechbrain.core - ERROR - Exception:
299
- Traceback (most recent call last):
300
- File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 656, in <module>
301
- separator.fit(
302
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/core.py", line 1575, in fit
303
- self._fit_train(train_set=train_set, epoch=epoch, enable=enable)
304
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/core.py", line 1400, in _fit_train
305
- loss = self.fit_batch(batch)
306
- ^^^^^^^^^^^^^^^^^^^^^
307
- File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 148, in fit_batch
308
- self.scaler.scale(loss).backward()
309
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/_tensor.py", line 648, in backward
310
- torch.autograd.backward(
311
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/autograd/__init__.py", line 353, in backward
312
- _engine_run_backward(
313
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/autograd/graph.py", line 824, in _engine_run_backward
314
- return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
315
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
316
- KeyboardInterrupt
317
- 2025-06-27 17:24:05,950 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
318
- 2025-06-27 17:24:05,951 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
319
- 2025-06-27 17:24:05,951 - speechbrain.core - INFO - Beginning experiment!
320
- 2025-06-27 17:24:05,951 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
321
- 2025-06-27 17:24:06,192 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
322
- certifi==2025.6.15
323
- cfgv==3.4.0
324
- charset-normalizer==3.4.2
325
- click==8.1.7
326
- distlib==0.3.9
327
- docstring_parser_fork==0.0.12
328
- filelock==3.18.0
329
- flake8==7.0.0
330
- fsspec==2025.5.1
331
- future==1.0.0
332
- hf-xet==1.1.5
333
- huggingface-hub==0.33.0
334
- HyperPyYAML==1.2.2
335
- identify==2.6.12
336
- idna==3.10
337
- iniconfig==2.1.0
338
- isort==5.13.2
339
- Jinja2==3.1.6
340
- joblib==1.5.1
341
- MarkupSafe==3.0.2
342
- mccabe==0.7.0
343
- mir_eval==0.6
344
- mpmath==1.3.0
345
- mypy_extensions==1.1.0
346
- networkx==3.5
347
- nodeenv==1.9.1
348
- numpy==2.3.1
349
- nvidia-cublas-cu12==12.6.4.1
350
- nvidia-cuda-cupti-cu12==12.6.80
351
- nvidia-cuda-nvrtc-cu12==12.6.77
352
- nvidia-cuda-runtime-cu12==12.6.77
353
- nvidia-cudnn-cu12==9.5.1.17
354
- nvidia-cufft-cu12==11.3.0.4
355
- nvidia-cufile-cu12==1.11.1.6
356
- nvidia-curand-cu12==10.3.7.77
357
- nvidia-cusolver-cu12==11.7.1.2
358
- nvidia-cusparse-cu12==12.5.4.2
359
- nvidia-cusparselt-cu12==0.6.3
360
- nvidia-nccl-cu12==2.26.2
361
- nvidia-nvjitlink-cu12==12.6.85
362
- nvidia-nvtx-cu12==12.6.77
363
- packaging==25.0
364
- pandas==2.3.0
365
- pathspec==0.12.1
366
- platformdirs==4.3.8
367
- pluggy==1.6.0
368
- pre_commit==4.2.0
369
- pycodestyle==2.11.0
370
- pydoclint==0.4.1
371
- pyflakes==3.2.0
372
- Pygments==2.19.2
373
- pygtrie==2.5.0
374
- pyloudnorm==0.1.1
375
- pytest==7.4.0
376
- python-dateutil==2.9.0.post0
377
- pytz==2025.2
378
- PyYAML==6.0.2
379
- regex==2024.11.6
380
- requests==2.32.4
381
- ruamel.yaml==0.18.14
382
- ruamel.yaml.clib==0.2.12
383
- safetensors==0.5.3
384
- scipy==1.16.0
385
- sentencepiece==0.2.0
386
- six==1.17.0
387
- speechbrain==1.0.3
388
- sympy==1.14.0
389
- tokenizers==0.21.2
390
- torch==2.7.1
391
- torchaudio==2.7.1
392
- tqdm==4.67.1
393
- transformers==4.52.4
394
- triton==3.3.1
395
- typing_extensions==4.14.0
396
- tzdata==2025.2
397
- urllib3==2.5.0
398
- virtualenv==20.31.2
399
- yamllint==1.35.1
400
-
401
-
402
- 2025-06-27 17:24:06,389 - speechbrain.core - INFO - Info: precision arg from hparam file is used
403
- 2025-06-27 17:24:06,390 - speechbrain.core - INFO - Info: noprogressbar arg from hparam file is used
404
- 2025-06-27 17:24:06,390 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
405
- 2025-06-27 17:24:06,573 - speechbrain.core - INFO - Gradscaler enabled: `True`
406
- 2025-06-27 17:24:06,573 - speechbrain.core - INFO - Using training precision: `--precision=fp16`
407
- 2025-06-27 17:24:06,573 - speechbrain.core - INFO - Using evaluation precision: `--eval_precision=fp32`
408
- 2025-06-27 17:24:06,574 - speechbrain.core - INFO - Separation Model Statistics:
409
- * Total Number of Trainable Parameters: 25.8M
410
- * Total Number of Parameters: 25.8M
411
- * Trainable Parameters represent 100.0000% of the total size.
412
- 2025-06-27 17:24:08,245 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00
413
- 2025-06-27 17:24:08,608 - speechbrain.utils.epoch_loop - INFO - Going into epoch 163
414
- 2025-06-27 17:24:11,017 - speechbrain.core - ERROR - Exception:
415
- Traceback (most recent call last):
416
- File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 656, in <module>
417
- separator.fit(
418
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/core.py", line 1575, in fit
419
- self._fit_train(train_set=train_set, epoch=epoch, enable=enable)
420
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/core.py", line 1400, in _fit_train
421
- loss = self.fit_batch(batch)
422
- ^^^^^^^^^^^^^^^^^^^^^
423
- File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 133, in fit_batch
424
- predictions, targets = self.compute_forward(
425
- ^^^^^^^^^^^^^^^^^^^^^
426
- File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 87, in compute_forward
427
- est_mask = self.hparams.MaskNet(mix_w)
428
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^
429
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
430
- return self._call_impl(*args, **kwargs)
431
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
432
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
433
- return forward_call(*args, **kwargs)
434
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
435
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 1067, in forward
436
- x = self.dual_mdl[i](x)
437
- ^^^^^^^^^^^^^^^^^^^
438
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
439
- return self._call_impl(*args, **kwargs)
440
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
441
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
442
- return forward_call(*args, **kwargs)
443
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
444
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 918, in forward
445
- inter = self.inter_mdl(inter)
446
- ^^^^^^^^^^^^^^^^^^^^^
447
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
448
- return self._call_impl(*args, **kwargs)
449
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
450
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
451
- return forward_call(*args, **kwargs)
452
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
453
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 638, in forward
454
- return self.mdl(x + pos_enc)[0]
455
- ^^^^^^^^^^^^^^^^^^^^^
456
- File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _wrapped_call_impl
457
- def _wrapped_call_impl(self, *args, **kwargs):
458
-
459
- KeyboardInterrupt
460
- 2025-06-27 21:10:07,131 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
461
- 2025-06-27 21:10:07,133 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
462
- 2025-06-27 21:10:07,133 - speechbrain.core - INFO - Beginning experiment!
463
- 2025-06-27 21:10:07,133 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
464
- 2025-06-27 21:10:07,389 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
465
- certifi==2025.6.15
466
- cfgv==3.4.0
467
- charset-normalizer==3.4.2
468
- click==8.1.7
469
- distlib==0.3.9
470
- docstring_parser_fork==0.0.12
471
- filelock==3.18.0
472
- flake8==7.0.0
473
- fsspec==2025.5.1
474
- future==1.0.0
475
- hf-xet==1.1.5
476
- huggingface-hub==0.33.1
477
- HyperPyYAML==1.2.2
478
- identify==2.6.12
479
- idna==3.10
480
- iniconfig==2.1.0
481
- isort==5.13.2
482
- Jinja2==3.1.6
483
- joblib==1.5.1
484
- MarkupSafe==3.0.2
485
- mccabe==0.7.0
486
- mir_eval==0.6
487
- mpmath==1.3.0
488
- mypy_extensions==1.1.0
489
- networkx==3.5
490
- nodeenv==1.9.1
491
- numpy==2.3.1
492
- nvidia-cublas-cu12==12.6.4.1
493
- nvidia-cuda-cupti-cu12==12.6.80
494
- nvidia-cuda-nvrtc-cu12==12.6.77
495
- nvidia-cuda-runtime-cu12==12.6.77
496
- nvidia-cudnn-cu12==9.5.1.17
497
- nvidia-cufft-cu12==11.3.0.4
498
- nvidia-cufile-cu12==1.11.1.6
499
- nvidia-curand-cu12==10.3.7.77
500
- nvidia-cusolver-cu12==11.7.1.2
501
- nvidia-cusparse-cu12==12.5.4.2
502
- nvidia-cusparselt-cu12==0.6.3
503
- nvidia-nccl-cu12==2.26.2
504
- nvidia-nvjitlink-cu12==12.6.85
505
- nvidia-nvtx-cu12==12.6.77
506
- packaging==25.0
507
- pandas==2.3.0
508
- pathspec==0.12.1
509
- platformdirs==4.3.8
510
- pluggy==1.6.0
511
- pre_commit==4.2.0
512
- pycodestyle==2.11.0
513
- pydoclint==0.4.1
514
- pyflakes==3.2.0
515
- pygtrie==2.5.0
516
- pyloudnorm==0.1.1
517
- pytest==7.4.0
518
- python-dateutil==2.9.0.post0
519
- pytz==2025.2
520
- PyYAML==6.0.2
521
- regex==2024.11.6
522
- requests==2.32.4
523
- ruamel.yaml==0.18.14
524
- ruamel.yaml.clib==0.2.12
525
- safetensors==0.5.3
526
- scipy==1.16.0
527
- sentencepiece==0.2.0
528
- six==1.17.0
529
- speechbrain==1.0.3
530
- sympy==1.14.0
531
- tokenizers==0.21.2
532
- torch==2.7.1
533
- torchaudio==2.7.1
534
- tqdm==4.67.1
535
- transformers==4.53.0
536
- triton==3.3.1
537
- typing_extensions==4.14.0
538
- tzdata==2025.2
539
- urllib3==2.5.0
540
- virtualenv==20.31.2
541
- yamllint==1.35.1
542
-
543
-
544
- 2025-06-27 21:10:07,393 - speechbrain.utils.superpowers - DEBUG - 476ac4f
545
-
546
-
547
- 2025-06-27 21:10:07,996 - speechbrain.core - INFO - Info: precision arg from hparam file is used
548
- 2025-06-27 21:10:07,997 - speechbrain.core - INFO - Info: noprogressbar arg from hparam file is used
549
- 2025-06-27 21:10:07,997 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
550
- 2025-06-27 21:10:08,035 - speechbrain.core - INFO - Gradscaler enabled: `True`
551
- 2025-06-27 21:10:08,035 - speechbrain.core - INFO - Using training precision: `--precision=fp16`
552
- 2025-06-27 21:10:08,035 - speechbrain.core - INFO - Using evaluation precision: `--eval_precision=fp32`
553
- 2025-06-27 21:10:08,036 - speechbrain.core - INFO - Separation Model Statistics:
554
- * Total Number of Trainable Parameters: 25.8M
555
- * Total Number of Parameters: 25.8M
556
- * Trainable Parameters represent 100.0000% of the total size.
557
- 2025-06-27 21:10:09,782 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00
558
- 2025-06-27 21:10:10,160 - speechbrain.utils.epoch_loop - INFO - Going into epoch 163
559
- 2025-06-27 21:10:17,953 - speechbrain.core - ERROR - Exception:
560
- Traceback (most recent call last):
561
- File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 656, in <module>
562
- separator.fit(
563
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/core.py", line 1575, in fit
564
- self._fit_train(train_set=train_set, epoch=epoch, enable=enable)
565
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/core.py", line 1400, in _fit_train
566
- loss = self.fit_batch(batch)
567
- ^^^^^^^^^^^^^^^^^^^^^
568
- File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 133, in fit_batch
569
- predictions, targets = self.compute_forward(
570
- ^^^^^^^^^^^^^^^^^^^^^
571
- File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 87, in compute_forward
572
- est_mask = self.hparams.MaskNet(mix_w)
573
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^
574
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
575
- return self._call_impl(*args, **kwargs)
576
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
577
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
578
- return forward_call(*args, **kwargs)
579
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
580
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 1067, in forward
581
- x = self.dual_mdl[i](x)
582
- ^^^^^^^^^^^^^^^^^^^
583
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
584
- return self._call_impl(*args, **kwargs)
585
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
586
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
587
- return forward_call(*args, **kwargs)
588
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
589
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 918, in forward
590
- inter = self.inter_mdl(inter)
591
- ^^^^^^^^^^^^^^^^^^^^^
592
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
593
- return self._call_impl(*args, **kwargs)
594
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
595
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
596
- return forward_call(*args, **kwargs)
597
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
598
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 638, in forward
599
- return self.mdl(x + pos_enc)[0]
600
- ^^^^^^^^^^^^^^^^^^^^^
601
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
602
- return self._call_impl(*args, **kwargs)
603
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
604
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
605
- return forward_call(*args, **kwargs)
606
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
607
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/transformer/Transformer.py", line 639, in forward
608
- output, attention = enc_layer(
609
- ^^^^^^^^^^
610
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
611
- return self._call_impl(*args, **kwargs)
612
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
613
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
614
- return forward_call(*args, **kwargs)
615
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
616
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/transformer/Transformer.py", line 457, in forward
617
- output, self_attn = self.self_att(
618
- ^^^^^^^^^^^^^^
619
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
620
- return self._call_impl(*args, **kwargs)
621
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
622
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
623
- return forward_call(*args, **kwargs)
624
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
625
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/nnet/attention.py", line 865, in forward
626
- output, attention_weights = self.att(
627
- ^^^^^^^^^
628
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
629
- return self._call_impl(*args, **kwargs)
630
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
631
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
632
- return forward_call(*args, **kwargs)
633
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
634
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/activation.py", line 1373, in forward
635
- attn_output, attn_output_weights = F.multi_head_attention_forward(
636
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
637
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/functional.py", line 6230, in multi_head_attention_forward
638
- q, k, v = _in_projection_packed(query, key, value, in_proj_weight, in_proj_bias)
639
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
640
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/functional.py", line 5648, in _in_projection_packed
641
- return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
642
- ^^^^^^^^^^^^^^^^^^^
643
- KeyboardInterrupt
644
- 2025-06-27 21:10:50,985 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
645
- 2025-06-27 21:10:50,986 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
646
- 2025-06-27 21:10:50,986 - speechbrain.core - INFO - Beginning experiment!
647
- 2025-06-27 21:10:50,986 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
648
- 2025-06-27 21:10:51,243 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
649
- certifi==2025.6.15
650
- cfgv==3.4.0
651
- charset-normalizer==3.4.2
652
- click==8.1.7
653
- distlib==0.3.9
654
- docstring_parser_fork==0.0.12
655
- filelock==3.18.0
656
- flake8==7.0.0
657
- fsspec==2025.5.1
658
- future==1.0.0
659
- hf-xet==1.1.5
660
- huggingface-hub==0.33.1
661
- HyperPyYAML==1.2.2
662
- identify==2.6.12
663
- idna==3.10
664
- iniconfig==2.1.0
665
- isort==5.13.2
666
- Jinja2==3.1.6
667
- joblib==1.5.1
668
- MarkupSafe==3.0.2
669
- mccabe==0.7.0
670
- mir_eval==0.6
671
- mpmath==1.3.0
672
- mypy_extensions==1.1.0
673
- networkx==3.5
674
- nodeenv==1.9.1
675
- numpy==2.3.1
676
- nvidia-cublas-cu12==12.6.4.1
677
- nvidia-cuda-cupti-cu12==12.6.80
678
- nvidia-cuda-nvrtc-cu12==12.6.77
679
- nvidia-cuda-runtime-cu12==12.6.77
680
- nvidia-cudnn-cu12==9.5.1.17
681
- nvidia-cufft-cu12==11.3.0.4
682
- nvidia-cufile-cu12==1.11.1.6
683
- nvidia-curand-cu12==10.3.7.77
684
- nvidia-cusolver-cu12==11.7.1.2
685
- nvidia-cusparse-cu12==12.5.4.2
686
- nvidia-cusparselt-cu12==0.6.3
687
- nvidia-nccl-cu12==2.26.2
688
- nvidia-nvjitlink-cu12==12.6.85
689
- nvidia-nvtx-cu12==12.6.77
690
- packaging==25.0
691
- pandas==2.3.0
692
- pathspec==0.12.1
693
- platformdirs==4.3.8
694
- pluggy==1.6.0
695
- pre_commit==4.2.0
696
- pycodestyle==2.11.0
697
- pydoclint==0.4.1
698
- pyflakes==3.2.0
699
- pygtrie==2.5.0
700
- pyloudnorm==0.1.1
701
- pytest==7.4.0
702
- python-dateutil==2.9.0.post0
703
- pytz==2025.2
704
- PyYAML==6.0.2
705
- regex==2024.11.6
706
- requests==2.32.4
707
- ruamel.yaml==0.18.14
708
- ruamel.yaml.clib==0.2.12
709
- safetensors==0.5.3
710
- scipy==1.16.0
711
- sentencepiece==0.2.0
712
- six==1.17.0
713
- speechbrain==1.0.3
714
- sympy==1.14.0
715
- tokenizers==0.21.2
716
- torch==2.7.1
717
- torchaudio==2.7.1
718
- tqdm==4.67.1
719
- transformers==4.53.0
720
- triton==3.3.1
721
- typing_extensions==4.14.0
722
- tzdata==2025.2
723
- urllib3==2.5.0
724
- virtualenv==20.31.2
725
- yamllint==1.35.1
726
-
727
-
728
- 2025-06-27 21:10:51,249 - speechbrain.utils.superpowers - DEBUG - 476ac4f
729
-
730
-
731
- 2025-06-27 21:10:51,876 - speechbrain.core - INFO - Info: precision arg from hparam file is used
732
- 2025-06-27 21:10:51,876 - speechbrain.core - INFO - Info: noprogressbar arg from hparam file is used
733
- 2025-06-27 21:10:51,876 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
734
- 2025-06-27 21:10:51,915 - speechbrain.core - INFO - Gradscaler enabled: `True`
735
- 2025-06-27 21:10:51,915 - speechbrain.core - INFO - Using training precision: `--precision=fp16`
736
- 2025-06-27 21:10:51,915 - speechbrain.core - INFO - Using evaluation precision: `--eval_precision=fp32`
737
- 2025-06-27 21:10:51,917 - speechbrain.core - INFO - Separation Model Statistics:
738
- * Total Number of Trainable Parameters: 25.8M
739
- * Total Number of Parameters: 25.8M
740
- * Trainable Parameters represent 100.0000% of the total size.
741
- 2025-06-27 21:10:52,857 - speechbrain.core - INFO - Test only mode, skipping training and validation stages.
742
- 2025-06-27 21:10:52,859 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00
743
- 2025-06-27 21:11:31,648 - speechbrain.utils.train_logger - INFO - Epoch loaded: 48 - test si-snr: 20.60
744
- 2025-06-27 21:12:49,750 - speechbrain.core - ERROR - Exception:
745
- Traceback (most recent call last):
746
- File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 666, in <module>
747
- separator.save_results(test_data)
748
- File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 367, in save_results
749
- sdr_baseline, _, _, _ = bss_eval_sources(
750
- ^^^^^^^^^^^^^^^^^
751
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/mir_eval/separation.py", line 210, in bss_eval_sources
752
- _bss_decomp_mtifilt(reference_sources,
753
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/mir_eval/separation.py", line 623, in _bss_decomp_mtifilt
754
- e_interf = _project(reference_sources,
755
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^
756
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/mir_eval/separation.py", line 715, in _project
757
- C = np.linalg.solve(G, D).reshape(flen, nsrc, order='F')
758
- ^^^^^^^^^^^^^^^^^^^^^
759
- File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/numpy/linalg/_linalg.py", line 471, in solve
760
- r = gufunc(a, b, signature=signature)
761
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
762
- KeyboardInterrupt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/CKPT.yaml DELETED
@@ -1,4 +0,0 @@
1
- # yamllint disable
2
- end-of-epoch: true
3
- si-snr: 22.403992604029355
4
- unixtime: 1750946214.2858236
 
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/brain.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:33809a026a2c1febce7b03c8aafaee4ddfc851b2c70f180f8c06bf1017f4df5c
3
- size 46
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/counter.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:98010bd9270f9b100b6214a21754fd33bdc8d41b2bc9f9dd16ff54d3c34ffd71
3
- size 2
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/dataloader-TRAIN.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d253d7b7ace4e06589dd90003f047380ddfdcfb29007b4e815caf48ff09b498b
3
- size 4
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/decoder.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ab49185bb3560f75ce4c18769157375a051f6b3a36e0c35d027574ca9c29e42
3
- size 34409
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/encoder.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:95ee4117e13cc2fb383208925edb71d86947024a9dd2be3da1ea25aca5ae8adf
3
- size 34473
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/masknet.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2c23ccb34b361feb8eeb630d4947815533cfb7dcfd54402e97edc82e032479b
3
- size 113629889
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/optimizer.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd09ff01fca43d3985535808946f8dcd75488e1da097ed30b148cb5c3b9114d5
3
- size 206898874
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/scaler.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:060762594d7f2f0162569b71f7b3ab95a021d06848d3088c63366abf8b98f80c
3
- size 1383
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/CKPT.yaml DELETED
@@ -1,4 +0,0 @@
1
- # yamllint disable
2
- end-of-epoch: true
3
- si-snr: 22.415829142613383
4
- unixtime: 1750994609.9935129
 
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/brain.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:33809a026a2c1febce7b03c8aafaee4ddfc851b2c70f180f8c06bf1017f4df5c
3
- size 46
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/counter.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:79d6eaa2676189eb927f2e16a70091474078e2117c3fc607d35cdc6b591ef355
3
- size 3
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/dataloader-TRAIN.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d253d7b7ace4e06589dd90003f047380ddfdcfb29007b4e815caf48ff09b498b
3
- size 4
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/decoder.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:96810d4644ba93e03d448330d0be5de5a3befc453f07b0c61f13aeca7464b2c5
3
- size 34409
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/encoder.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:76f73bc7bdde7f931679475847d79af49d687d5eb52011f17d6a37024a222558
3
- size 34473
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/masknet.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:30262fd31537e9349c1c1071bbd86c9a89e359ea11d5d50c48a05da03bc26e0e
3
- size 113629889
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/optimizer.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee4c807b50d7f9af8606acf172b2713c218ca53faf4aaa3e614e0c0a6fbac5bd
3
- size 206898874
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/scaler.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:91cd3440b19e568449ff6d8fbb6df704d57d37fc7ad77f05146ac1de7310cded
3
- size 1383
 
 
 
 
Sepformer/results/sepformer_4mix/1234/save/record_tr.csv DELETED
The diff for this file is too large to render. See raw diff
 
Sepformer/results/sepformer_4mix/1234/save/record_val.csv DELETED
The diff for this file is too large to render. See raw diff
 
Sepformer/results/sepformer_4mix/1234/save/test_data.csv DELETED
The diff for this file is too large to render. See raw diff
 
Sepformer/results/sepformer_4mix/1234/test_results.csv DELETED
@@ -1,6 +0,0 @@
1
- snt_id,sdr,sdr_i,si-snr,si-snr_i
2
- 0,-0.7558969463329976,9.341916369626974,-15.376874923706055,5.828725814819336
3
- 1,-1.0032419873910463,9.161513886510548,-15.261016845703125,5.546741485595703
4
- 2,-5.201884601728196,4.524667155561474,-15.91262149810791,6.88614559173584
5
- 3,-3.896726451280197,4.951401911520096,-15.797815322875977,5.112443923950195
6
- 4,-3.685112954805245,6.856254410975225,-22.656341552734375,13.753952026367188
 
 
 
 
 
 
 
Sepformer/results/sepformer_4mix/1234/train.py DELETED
@@ -1,666 +0,0 @@
1
- #!/usr/bin/env/python3
2
- """Recipe for training a neural speech separation system on Libri2/3Mix datasets.
3
- The system employs an encoder, a decoder, and a masking network.
4
-
5
- To run this recipe, do the following:
6
- > python train.py hparams/sepformer-libri2mix.yaml
7
- > python train.py hparams/sepformer-libri3mix.yaml
8
-
9
-
10
- The experiment file is flexible enough to support different neural
11
- networks. By properly changing the parameter files, you can try
12
- different architectures. The script supports both libri2mix and
13
- libri3mix.
14
-
15
- # 4-mix 主要根据 num_spks 修改 train.py 和 config
16
- Authors
17
- * Cem Subakan 2020
18
- * Mirco Ravanelli 2020
19
- * Samuele Cornell 2020
20
- * Mirko Bronzi 2020
21
- * Jianyuan Zhong 2020
22
- """
23
-
24
- import csv
25
- import os
26
- import sys
27
-
28
- import numpy as np
29
- import torch
30
- import torch.nn.functional as F
31
- import torchaudio
32
- from hyperpyyaml import load_hyperpyyaml
33
- from tqdm import tqdm
34
-
35
- import speechbrain as sb
36
- import speechbrain.nnet.schedulers as schedulers
37
- from speechbrain.utils.distributed import run_on_main
38
- from speechbrain.utils.logger import get_logger
39
-
40
- logger = get_logger(__name__)
41
-
42
-
43
- # Define training procedure
44
- class Separation(sb.Brain):
45
- def compute_forward(self, mix, targets, stage, noise=None):
46
- """Forward computations from the mixture to the separated signals."""
47
-
48
- # Unpack lists and put tensors in the right device
49
- mix, mix_lens = mix
50
- mix, mix_lens = mix.to(self.device), mix_lens.to(self.device)
51
-
52
- # Convert targets to tensor
53
- targets = torch.cat(
54
- [targets[i][0].unsqueeze(-1) for i in range(self.hparams.num_spks)],
55
- dim=-1,
56
- ).to(self.device)
57
-
58
- # Add speech distortions
59
- if stage == sb.Stage.TRAIN:
60
- with torch.no_grad():
61
- if self.hparams.use_speedperturb or self.hparams.use_rand_shift:
62
- mix, targets = self.add_speed_perturb(targets, mix_lens)
63
-
64
- mix = targets.sum(-1)
65
-
66
- if self.hparams.use_wham_noise:
67
- noise = noise.to(self.device)
68
- len_noise = noise.shape[1]
69
- len_mix = mix.shape[1]
70
- min_len = min(len_noise, len_mix)
71
-
72
- # add the noise
73
- mix = mix[:, :min_len] + noise[:, :min_len]
74
-
75
- # fix the length of targets also
76
- targets = targets[:, :min_len, :]
77
-
78
- if self.hparams.use_wavedrop:
79
- mix = self.hparams.drop_chunk(mix, mix_lens)
80
- mix = self.hparams.drop_freq(mix)
81
-
82
- if self.hparams.limit_training_signal_len:
83
- mix, targets = self.cut_signals(mix, targets)
84
-
85
- # Separation
86
- mix_w = self.hparams.Encoder(mix)
87
- est_mask = self.hparams.MaskNet(mix_w)
88
- mix_w = torch.stack([mix_w] * self.hparams.num_spks)
89
- sep_h = mix_w * est_mask
90
-
91
- # Decoding
92
- est_source = torch.cat(
93
- [
94
- self.hparams.Decoder(sep_h[i]).unsqueeze(-1)
95
- for i in range(self.hparams.num_spks)
96
- ],
97
- dim=-1,
98
- )
99
-
100
- # T changed after conv1d in encoder, fix it here
101
- T_origin = mix.size(1)
102
- T_est = est_source.size(1)
103
- if T_origin > T_est:
104
- est_source = F.pad(est_source, (0, 0, 0, T_origin - T_est))
105
- else:
106
- est_source = est_source[:, :T_origin, :]
107
-
108
- return est_source, targets
109
-
110
- def compute_objectives(self, predictions, targets):
111
- """Computes the si-snr loss"""
112
- return self.hparams.loss(targets, predictions)
113
-
114
- def fit_batch(self, batch):
115
- """Trains one batch"""
116
-
117
- # Unpacking batch list
118
- mixture = batch.mix_sig
119
- targets = [batch.s1_sig, batch.s2_sig]
120
- if self.hparams.use_wham_noise:
121
- noise = batch.noise_sig[0]
122
- else:
123
- noise = None
124
-
125
- if self.hparams.num_spks == 3:
126
- targets.append(batch.s3_sig)
127
-
128
- if self.hparams.num_spks == 4:
129
- targets.append(batch.s3_sig)
130
- targets.append(batch.s4_sig)
131
-
132
- with self.training_ctx:
133
- predictions, targets = self.compute_forward(
134
- mixture, targets, sb.Stage.TRAIN, noise
135
- )
136
- loss = self.compute_objectives(predictions, targets)
137
-
138
- # hard threshold the easy dataitems
139
- if self.hparams.threshold_byloss:
140
- th = self.hparams.threshold
141
- loss = loss[loss > th]
142
- if loss.nelement() > 0:
143
- loss = loss.mean()
144
- else:
145
- loss = loss.mean()
146
-
147
- if loss.nelement() > 0 and loss < self.hparams.loss_upper_lim:
148
- self.scaler.scale(loss).backward()
149
- if self.hparams.clip_grad_norm >= 0:
150
- self.scaler.unscale_(self.optimizer)
151
- torch.nn.utils.clip_grad_norm_(
152
- self.modules.parameters(),
153
- self.hparams.clip_grad_norm,
154
- )
155
- self.scaler.step(self.optimizer)
156
- self.scaler.update()
157
- else:
158
- self.nonfinite_count += 1
159
- logger.info(
160
- "infinite loss or empty loss! it happened {} times so far - skipping this batch".format(
161
- self.nonfinite_count
162
- )
163
- )
164
- loss.data = torch.tensor(0.0).to(self.device)
165
- self.optimizer.zero_grad()
166
-
167
- return loss.detach().cpu()
168
-
169
- def evaluate_batch(self, batch, stage):
170
- """Computations needed for validation/test batches"""
171
- snt_id = batch.id
172
- mixture = batch.mix_sig
173
- targets = [batch.s1_sig, batch.s2_sig]
174
- if self.hparams.num_spks == 3:
175
- targets.append(batch.s3_sig)
176
-
177
- if self.hparams.num_spks == 4:
178
- targets.append(batch.s3_sig)
179
- targets.append(batch.s4_sig)
180
-
181
- with torch.no_grad():
182
- predictions, targets = self.compute_forward(mixture, targets, stage)
183
- loss = self.compute_objectives(predictions, targets)
184
-
185
- # Manage audio file saving
186
- if stage == sb.Stage.TEST and self.hparams.save_audio:
187
- if hasattr(self.hparams, "n_audio_to_save"):
188
- if self.hparams.n_audio_to_save > 0:
189
- self.save_audio(snt_id[0], mixture, targets, predictions)
190
- self.hparams.n_audio_to_save += -1
191
- else:
192
- self.save_audio(snt_id[0], mixture, targets, predictions)
193
-
194
- return loss.mean().detach()
195
-
196
- def on_stage_end(self, stage, stage_loss, epoch):
197
- """Gets called at the end of a epoch."""
198
- # Compute/store important stats
199
- stage_stats = {"si-snr": stage_loss}
200
- if stage == sb.Stage.TRAIN:
201
- self.train_stats = stage_stats
202
-
203
- # Perform end-of-iteration things, like annealing, logging, etc.
204
- if stage == sb.Stage.VALID:
205
- # Learning rate annealing
206
- if isinstance(
207
- self.hparams.lr_scheduler, schedulers.ReduceLROnPlateau
208
- ):
209
- current_lr, next_lr = self.hparams.lr_scheduler(
210
- [self.optimizer], epoch, stage_loss
211
- )
212
- schedulers.update_learning_rate(self.optimizer, next_lr)
213
- else:
214
- # if we do not use the reducelronplateau, we do not change the lr
215
- current_lr = self.hparams.optimizer.optim.param_groups[0]["lr"]
216
-
217
- self.hparams.train_logger.log_stats(
218
- stats_meta={"epoch": epoch, "lr": current_lr},
219
- train_stats=self.train_stats,
220
- valid_stats=stage_stats,
221
- )
222
- self.checkpointer.save_and_keep_only(
223
- meta={"si-snr": stage_stats["si-snr"]},
224
- min_keys=["si-snr"],
225
- )
226
- elif stage == sb.Stage.TEST:
227
- self.hparams.train_logger.log_stats(
228
- stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
229
- test_stats=stage_stats,
230
- )
231
-
232
- def add_speed_perturb(self, targets, targ_lens):
233
- """Adds speed perturbation and random_shift to the input signals"""
234
-
235
- min_len = -1
236
- recombine = False
237
-
238
- if self.hparams.use_speedperturb:
239
- # Performing speed change (independently on each source)
240
- new_targets = []
241
- recombine = True
242
-
243
- for i in range(targets.shape[-1]):
244
- new_target = self.hparams.speed_perturb(targets[:, :, i])
245
- new_targets.append(new_target)
246
- if i == 0:
247
- min_len = new_target.shape[-1]
248
- else:
249
- if new_target.shape[-1] < min_len:
250
- min_len = new_target.shape[-1]
251
-
252
- if self.hparams.use_rand_shift:
253
- # Performing random_shift (independently on each source)
254
- recombine = True
255
- for i in range(targets.shape[-1]):
256
- rand_shift = torch.randint(
257
- self.hparams.min_shift, self.hparams.max_shift, (1,)
258
- )
259
- new_targets[i] = new_targets[i].to(self.device)
260
- new_targets[i] = torch.roll(
261
- new_targets[i], shifts=(rand_shift[0],), dims=1
262
- )
263
-
264
- # Re-combination
265
- if recombine:
266
- if self.hparams.use_speedperturb:
267
- targets = torch.zeros(
268
- targets.shape[0],
269
- min_len,
270
- targets.shape[-1],
271
- device=targets.device,
272
- dtype=torch.float,
273
- )
274
- for i, new_target in enumerate(new_targets):
275
- targets[:, :, i] = new_targets[i][:, 0:min_len]
276
-
277
- mix = targets.sum(-1)
278
- return mix, targets
279
-
280
- def cut_signals(self, mixture, targets):
281
- """This function selects a random segment of a given length within the mixture.
282
- The corresponding targets are selected accordingly"""
283
- randstart = torch.randint(
284
- 0,
285
- 1 + max(0, mixture.shape[1] - self.hparams.training_signal_len),
286
- (1,),
287
- ).item()
288
- targets = targets[
289
- :, randstart : randstart + self.hparams.training_signal_len, :
290
- ]
291
- mixture = mixture[
292
- :, randstart : randstart + self.hparams.training_signal_len
293
- ]
294
- return mixture, targets
295
-
296
- def reset_layer_recursively(self, layer):
297
- """Reinitializes the parameters of the neural networks"""
298
- if hasattr(layer, "reset_parameters"):
299
- layer.reset_parameters()
300
- for child_layer in layer.modules():
301
- if layer != child_layer:
302
- self.reset_layer_recursively(child_layer)
303
-
304
- def save_results(self, test_data):
305
- """This script computes the SDR and SI-SNR metrics and saves
306
- them into a csv file"""
307
-
308
- # This package is required for SDR computation
309
- from mir_eval.separation import bss_eval_sources
310
-
311
- # Create folders where to store audio
312
- save_file = os.path.join(self.hparams.output_folder, "test_results.csv")
313
-
314
- # Variable init
315
- all_sdrs = []
316
- all_sdrs_i = []
317
- all_sisnrs = []
318
- all_sisnrs_i = []
319
- csv_columns = ["snt_id", "sdr", "sdr_i", "si-snr", "si-snr_i"]
320
-
321
- test_loader = sb.dataio.dataloader.make_dataloader(
322
- test_data, **self.hparams.dataloader_opts
323
- )
324
-
325
- with open(save_file, "w", newline="", encoding="utf-8") as results_csv:
326
- writer = csv.DictWriter(results_csv, fieldnames=csv_columns)
327
- writer.writeheader()
328
-
329
- # Loop over all test sentence
330
- with tqdm(test_loader, dynamic_ncols=True) as t:
331
- for i, batch in enumerate(t):
332
- # Apply Separation
333
- mixture, mix_len = batch.mix_sig
334
- snt_id = batch.id
335
- targets = [batch.s1_sig, batch.s2_sig]
336
- if self.hparams.num_spks == 3:
337
- targets.append(batch.s3_sig)
338
-
339
- if self.hparams.num_spks == 4:
340
- targets.append(batch.s3_sig)
341
- targets.append(batch.s4_sig)
342
-
343
- with torch.no_grad():
344
- predictions, targets = self.compute_forward(
345
- batch.mix_sig, targets, sb.Stage.TEST
346
- )
347
-
348
- # Compute SI-SNR
349
- sisnr = self.compute_objectives(predictions, targets)
350
-
351
- # Compute SI-SNR improvement
352
- mixture_signal = torch.stack(
353
- [mixture] * self.hparams.num_spks, dim=-1
354
- )
355
- mixture_signal = mixture_signal.to(targets.device)
356
- sisnr_baseline = self.compute_objectives(
357
- mixture_signal, targets
358
- )
359
- sisnr_i = sisnr - sisnr_baseline
360
-
361
- # Compute SDR
362
- sdr, _, _, _ = bss_eval_sources(
363
- targets[0].t().cpu().numpy(),
364
- predictions[0].t().detach().cpu().numpy(),
365
- )
366
-
367
- sdr_baseline, _, _, _ = bss_eval_sources(
368
- targets[0].t().cpu().numpy(),
369
- mixture_signal[0].t().detach().cpu().numpy(),
370
- )
371
-
372
- sdr_i = sdr.mean() - sdr_baseline.mean()
373
-
374
- # Saving on a csv file
375
- row = {
376
- "snt_id": snt_id[0],
377
- "sdr": sdr.mean(),
378
- "sdr_i": sdr_i,
379
- "si-snr": -sisnr.item(),
380
- "si-snr_i": -sisnr_i.item(),
381
- }
382
- writer.writerow(row)
383
-
384
- # Metric Accumulation
385
- all_sdrs.append(sdr.mean())
386
- all_sdrs_i.append(sdr_i.mean())
387
- all_sisnrs.append(-sisnr.item())
388
- all_sisnrs_i.append(-sisnr_i.item())
389
-
390
- row = {
391
- "snt_id": "avg",
392
- "sdr": np.array(all_sdrs).mean(),
393
- "sdr_i": np.array(all_sdrs_i).mean(),
394
- "si-snr": np.array(all_sisnrs).mean(),
395
- "si-snr_i": np.array(all_sisnrs_i).mean(),
396
- }
397
- writer.writerow(row)
398
-
399
- logger.info("Mean SISNR is {}".format(np.array(all_sisnrs).mean()))
400
- logger.info("Mean SISNRi is {}".format(np.array(all_sisnrs_i).mean()))
401
- logger.info("Mean SDR is {}".format(np.array(all_sdrs).mean()))
402
- logger.info("Mean SDRi is {}".format(np.array(all_sdrs_i).mean()))
403
-
404
- def save_audio(self, snt_id, mixture, targets, predictions):
405
- "saves the test audio (mixture, targets, and estimated sources) on disk"
406
-
407
- # Create output folder
408
- save_path = os.path.join(self.hparams.save_folder, "audio_results")
409
- if not os.path.exists(save_path):
410
- os.mkdir(save_path)
411
-
412
- for ns in range(self.hparams.num_spks):
413
- # Estimated source
414
- signal = predictions[0, :, ns]
415
- signal = signal / signal.abs().max()
416
- save_file = os.path.join(
417
- save_path, "item{}_source{}hat.wav".format(snt_id, ns + 1)
418
- )
419
- torchaudio.save(
420
- save_file, signal.unsqueeze(0).cpu(), self.hparams.sample_rate
421
- )
422
-
423
- # Original source
424
- signal = targets[0, :, ns]
425
- signal = signal / signal.abs().max()
426
- save_file = os.path.join(
427
- save_path, "item{}_source{}.wav".format(snt_id, ns + 1)
428
- )
429
- torchaudio.save(
430
- save_file, signal.unsqueeze(0).cpu(), self.hparams.sample_rate
431
- )
432
-
433
- # Mixture
434
- signal = mixture[0][0, :]
435
- signal = signal / signal.abs().max()
436
- save_file = os.path.join(save_path, "item{}_mix.wav".format(snt_id))
437
- torchaudio.save(
438
- save_file, signal.unsqueeze(0).cpu(), self.hparams.sample_rate
439
- )
440
-
441
-
442
- def dataio_prep(hparams):
443
- """Creates data processing pipeline"""
444
-
445
- # 1. Define datasets
446
- train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
447
- csv_path=hparams["train_data"],
448
- replacements={"data_root": hparams["data_folder"]},
449
- )
450
-
451
- valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
452
- csv_path=hparams["valid_data"],
453
- replacements={"data_root": hparams["data_folder"]},
454
- )
455
-
456
- test_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
457
- csv_path=hparams["test_data"],
458
- replacements={"data_root": hparams["data_folder"]},
459
- )
460
-
461
- datasets = [train_data, valid_data, test_data]
462
-
463
- # 2. Provide audio pipelines
464
-
465
- @sb.utils.data_pipeline.takes("mix_wav")
466
- @sb.utils.data_pipeline.provides("mix_sig")
467
- def audio_pipeline_mix(mix_wav):
468
- mix_sig = sb.dataio.dataio.read_audio(mix_wav)
469
- return mix_sig
470
-
471
- @sb.utils.data_pipeline.takes("s1_wav")
472
- @sb.utils.data_pipeline.provides("s1_sig")
473
- def audio_pipeline_s1(s1_wav):
474
- s1_sig = sb.dataio.dataio.read_audio(s1_wav)
475
- return s1_sig
476
-
477
- @sb.utils.data_pipeline.takes("s2_wav")
478
- @sb.utils.data_pipeline.provides("s2_sig")
479
- def audio_pipeline_s2(s2_wav):
480
- s2_sig = sb.dataio.dataio.read_audio(s2_wav)
481
- return s2_sig
482
-
483
- # --- 如果说话人 >= 3,定义第 3 路 ---
484
- if hparams["num_spks"] >= 3:
485
- @sb.utils.data_pipeline.takes("s3_wav")
486
- @sb.utils.data_pipeline.provides("s3_sig")
487
- def audio_pipeline_s3(s3_wav):
488
- return sb.dataio.dataio.read_audio(s3_wav)
489
-
490
- # --- 如果说话人 == 4,定义第 4 路 ---
491
- if hparams["num_spks"] == 4:
492
- @sb.utils.data_pipeline.takes("s4_wav")
493
- @sb.utils.data_pipeline.provides("s4_sig")
494
- def audio_pipeline_s4(s4_wav):
495
- return sb.dataio.dataio.read_audio(s4_wav)
496
-
497
- if hparams["use_wham_noise"]:
498
-
499
- @sb.utils.data_pipeline.takes("noise_wav")
500
- @sb.utils.data_pipeline.provides("noise_sig")
501
- def audio_pipeline_noise(noise_wav):
502
- noise_sig = sb.dataio.dataio.read_audio(noise_wav)
503
- return noise_sig
504
-
505
- sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_mix)
506
- sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s1)
507
- sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s2)
508
- if hparams["num_spks"] == 3:
509
- sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s3)
510
- if hparams["num_spks"] == 4:
511
- sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s3)
512
- sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s4)
513
-
514
-
515
- if hparams["use_wham_noise"]:
516
- print("Using the WHAM! noise in the data pipeline")
517
- sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_noise)
518
-
519
- if (hparams["num_spks"] == 2) and hparams["use_wham_noise"]:
520
- sb.dataio.dataset.set_output_keys(
521
- datasets, ["id", "mix_sig", "s1_sig", "s2_sig", "noise_sig"]
522
- )
523
- elif (hparams["num_spks"] == 3) and hparams["use_wham_noise"]:
524
- sb.dataio.dataset.set_output_keys(
525
- datasets,
526
- ["id", "mix_sig", "s1_sig", "s2_sig", "s3_sig", "noise_sig"],
527
- )
528
- elif (hparams["num_spks"] == 4) and hparams["use_wham_noise"]:
529
- sb.dataio.dataset.set_output_keys(
530
- datasets,
531
- ["id", "mix_sig", "s1_sig", "s2_sig", "s3_sig", "s4_sig", "noise_sig"],
532
- )
533
- elif (hparams["num_spks"] == 4) and not hparams["use_wham_noise"]:
534
-
535
- sb.dataio.dataset.set_output_keys(
536
- datasets,
537
- ["id", "mix_sig", "s1_sig", "s2_sig", "s3_sig", "s4_sig"],
538
- )
539
- elif (hparams["num_spks"] == 2) and not hparams["use_wham_noise"]:
540
- sb.dataio.dataset.set_output_keys(
541
- datasets, ["id", "mix_sig", "s1_sig", "s2_sig"]
542
- )
543
- else:
544
- sb.dataio.dataset.set_output_keys(
545
- datasets, ["id", "mix_sig", "s1_sig", "s2_sig", "s3_sig"]
546
- )
547
-
548
- return train_data, valid_data, test_data
549
-
550
-
551
- if __name__ == "__main__":
552
- # Load hyperparameters file with command-line overrides
553
- hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
554
- with open(hparams_file, encoding="utf-8") as fin:
555
- hparams = load_hyperpyyaml(fin, overrides)
556
-
557
- # Initialize ddp (useful only for multi-GPU DDP training)
558
- sb.utils.distributed.ddp_init_group(run_opts)
559
-
560
- # Create experiment directory
561
- sb.create_experiment_directory(
562
- experiment_directory=hparams["output_folder"],
563
- hyperparams_to_save=hparams_file,
564
- overrides=overrides,
565
- )
566
-
567
- # Check if wsj0_tr is set with dynamic mixing
568
- if hparams["dynamic_mixing"] and not os.path.exists(
569
- hparams["base_folder_dm"]
570
- ):
571
- raise ValueError(
572
- "Please, specify a valid base_folder_dm folder when using dynamic mixing"
573
- )
574
-
575
- # Update precision to bf16 if the device is CPU and precision is fp16
576
- if run_opts.get("device") == "cpu" and hparams.get("precision") == "fp16":
577
- hparams["precision"] = "bf16"
578
-
579
-
580
- # Create dataset objects
581
- if hparams["dynamic_mixing"]:
582
- from dynamic_mixing import (
583
- dynamic_mix_data_prep_librimix as dynamic_mix_data_prep,
584
- )
585
-
586
- # if the base_folder for dm is not processed, preprocess them
587
- if "processed" not in hparams["base_folder_dm"]:
588
- # if the processed folder already exists we just use it otherwise we do the preprocessing
589
- if not os.path.exists(
590
- os.path.normpath(hparams["base_folder_dm"]) + "_processed"
591
- ):
592
- from recipes.LibriMix.meta.preprocess_dynamic_mixing import (
593
- resample_folder,
594
- )
595
-
596
- print("Resampling the base folder")
597
- run_on_main(
598
- resample_folder,
599
- kwargs={
600
- "input_folder": hparams["base_folder_dm"],
601
- "output_folder": os.path.normpath(
602
- hparams["base_folder_dm"]
603
- )
604
- + "_processed",
605
- "fs": hparams["sample_rate"],
606
- "regex": "**/*.flac",
607
- },
608
- )
609
- # adjust the base_folder_dm path
610
- hparams["base_folder_dm"] = (
611
- os.path.normpath(hparams["base_folder_dm"]) + "_processed"
612
- )
613
- else:
614
- print(
615
- "Using the existing processed folder on the same directory as base_folder_dm"
616
- )
617
- hparams["base_folder_dm"] = (
618
- os.path.normpath(hparams["base_folder_dm"]) + "_processed"
619
- )
620
-
621
- dm_hparams = {
622
- "train_data": hparams["train_data"],
623
- "data_folder": hparams["data_folder"],
624
- "base_folder_dm": hparams["base_folder_dm"],
625
- "sample_rate": hparams["sample_rate"],
626
- "num_spks": hparams["num_spks"],
627
- "training_signal_len": hparams["training_signal_len"],
628
- "dataloader_opts": hparams["dataloader_opts"],
629
- }
630
-
631
- train_data = dynamic_mix_data_prep(dm_hparams)
632
- _, valid_data, test_data = dataio_prep(hparams)
633
- else:
634
- train_data, valid_data, test_data = dataio_prep(hparams)
635
-
636
- # Load pretrained model if pretrained_separator is present in the yaml
637
- if "pretrained_separator" in hparams:
638
- run_on_main(hparams["pretrained_separator"].collect_files)
639
- hparams["pretrained_separator"].load_collected()
640
-
641
- # Brain class initialization
642
- separator = Separation(
643
- modules=hparams["modules"],
644
- opt_class=hparams["optimizer"],
645
- hparams=hparams,
646
- run_opts=run_opts,
647
- checkpointer=hparams["checkpointer"],
648
- )
649
-
650
- # re-initialize the parameters if we don't use a pretrained model
651
- if "pretrained_separator" not in hparams:
652
- for module in separator.modules.values():
653
- separator.reset_layer_recursively(module)
654
-
655
- # Training
656
- separator.fit(
657
- separator.hparams.epoch_counter,
658
- train_data,
659
- valid_data,
660
- train_loader_kwargs=hparams["dataloader_opts"],
661
- valid_loader_kwargs=hparams["dataloader_opts"],
662
- )
663
-
664
- # Eval
665
- separator.evaluate(test_data, min_key="si-snr")
666
- separator.save_results(test_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Sepformer/results/sepformer_4mix/1234/train_log.txt DELETED
@@ -1 +0,0 @@
1
- Epoch loaded: 48 - test si-snr: 20.60