swc2 commited on
Commit
bcc11b4
·
verified ·
1 Parent(s): 5773b31

upload model

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Conv-Tasnet/results/convtasnet_4-mix/1234/env.log +93 -0
  2. Conv-Tasnet/results/convtasnet_4-mix/1234/hyperparams.yaml +179 -0
  3. Conv-Tasnet/results/convtasnet_4-mix/1234/log.txt +0 -0
  4. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/CKPT.yaml +4 -0
  5. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/brain.ckpt +3 -0
  6. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/counter.ckpt +3 -0
  7. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/dataloader-TRAIN.ckpt +3 -0
  8. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/decoder.ckpt +3 -0
  9. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/encoder.ckpt +3 -0
  10. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/lr_scheduler.ckpt +3 -0
  11. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/masknet.ckpt +3 -0
  12. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/optimizer.ckpt +3 -0
  13. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/CKPT.yaml +4 -0
  14. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/brain.ckpt +3 -0
  15. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/counter.ckpt +3 -0
  16. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/dataloader-TRAIN.ckpt +3 -0
  17. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/decoder.ckpt +3 -0
  18. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/encoder.ckpt +3 -0
  19. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/lr_scheduler.ckpt +3 -0
  20. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/masknet.ckpt +3 -0
  21. Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/optimizer.ckpt +3 -0
  22. Conv-Tasnet/results/convtasnet_4-mix/1234/save/record_tr.csv +0 -0
  23. Conv-Tasnet/results/convtasnet_4-mix/1234/save/record_val.csv +0 -0
  24. Conv-Tasnet/results/convtasnet_4-mix/1234/save/test_data.csv +0 -0
  25. Conv-Tasnet/results/convtasnet_4-mix/1234/test.py +628 -0
  26. Conv-Tasnet/results/convtasnet_4-mix/1234/test_results.csv +1 -0
  27. Conv-Tasnet/results/convtasnet_4-mix/1234/train.py +628 -0
  28. Conv-Tasnet/results/convtasnet_4-mix/1234/train_log.txt +242 -0
  29. Sepformer/results/sepformer_4mix/1234/env.log +90 -0
  30. Sepformer/results/sepformer_4mix/1234/hyperparams.yaml +198 -0
  31. Sepformer/results/sepformer_4mix/1234/log.txt +762 -0
  32. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/CKPT.yaml +4 -0
  33. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/brain.ckpt +3 -0
  34. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/counter.ckpt +3 -0
  35. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/dataloader-TRAIN.ckpt +3 -0
  36. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/decoder.ckpt +3 -0
  37. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/encoder.ckpt +3 -0
  38. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/masknet.ckpt +3 -0
  39. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/optimizer.ckpt +3 -0
  40. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/scaler.ckpt +3 -0
  41. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/CKPT.yaml +4 -0
  42. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/brain.ckpt +3 -0
  43. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/counter.ckpt +3 -0
  44. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/dataloader-TRAIN.ckpt +3 -0
  45. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/decoder.ckpt +3 -0
  46. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/encoder.ckpt +3 -0
  47. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/masknet.ckpt +3 -0
  48. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/optimizer.ckpt +3 -0
  49. Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/scaler.ckpt +3 -0
  50. Sepformer/results/sepformer_4mix/1234/save/record_tr.csv +0 -0
Conv-Tasnet/results/convtasnet_4-mix/1234/env.log ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SpeechBrain system description
2
+ ==============================
3
+ Python version:
4
+ 3.11.13 (main, Jun 5 2025, 13:12:00) [GCC 11.2.0]
5
+ ==============================
6
+ Installed Python packages:
7
+ black==24.3.0
8
+ certifi==2025.6.15
9
+ cffi==1.17.1
10
+ cfgv==3.4.0
11
+ charset-normalizer==3.4.2
12
+ click==8.1.7
13
+ distlib==0.3.9
14
+ docstring_parser_fork==0.0.12
15
+ filelock==3.18.0
16
+ flake8==7.0.0
17
+ fsspec==2025.5.1
18
+ future==1.0.0
19
+ hf-xet==1.1.5
20
+ huggingface-hub==0.33.0
21
+ HyperPyYAML==1.2.2
22
+ identify==2.6.12
23
+ idna==3.10
24
+ iniconfig==2.1.0
25
+ isort==5.13.2
26
+ Jinja2==3.1.6
27
+ joblib==1.5.1
28
+ MarkupSafe==3.0.2
29
+ mccabe==0.7.0
30
+ mir_eval==0.6
31
+ mpmath==1.3.0
32
+ mypy_extensions==1.1.0
33
+ networkx==3.5
34
+ nodeenv==1.9.1
35
+ numpy==2.3.1
36
+ nvidia-cublas-cu12==12.6.4.1
37
+ nvidia-cuda-cupti-cu12==12.6.80
38
+ nvidia-cuda-nvrtc-cu12==12.6.77
39
+ nvidia-cuda-runtime-cu12==12.6.77
40
+ nvidia-cudnn-cu12==9.5.1.17
41
+ nvidia-cufft-cu12==11.3.0.4
42
+ nvidia-cufile-cu12==1.11.1.6
43
+ nvidia-curand-cu12==10.3.7.77
44
+ nvidia-cusolver-cu12==11.7.1.2
45
+ nvidia-cusparse-cu12==12.5.4.2
46
+ nvidia-cusparselt-cu12==0.6.3
47
+ nvidia-nccl-cu12==2.26.2
48
+ nvidia-nvjitlink-cu12==12.6.85
49
+ nvidia-nvtx-cu12==12.6.77
50
+ packaging==25.0
51
+ pandas==2.3.0
52
+ pathspec==0.12.1
53
+ platformdirs==4.3.8
54
+ pluggy==1.6.0
55
+ pre_commit==4.2.0
56
+ pycodestyle==2.11.0
57
+ pycparser==2.22
58
+ pydoclint==0.4.1
59
+ pyflakes==3.2.0
60
+ pyloudnorm==0.1.1
61
+ pytest==7.4.0
62
+ python-dateutil==2.9.0.post0
63
+ pytz==2025.2
64
+ PyYAML==6.0.2
65
+ regex==2024.11.6
66
+ requests==2.32.4
67
+ ruamel.yaml==0.18.14
68
+ ruamel.yaml.clib==0.2.12
69
+ safetensors==0.5.3
70
+ scipy==1.16.0
71
+ sentencepiece==0.2.0
72
+ six==1.17.0
73
+ soundfile==0.13.1
74
+ sox==1.5.0
75
+ -e git+ssh://git@github.com/speechbrain/speechbrain.git@c75ab5489431fd0a2a7d21160bc37677801cb506#egg=speechbrain
76
+ sympy==1.14.0
77
+ tokenizers==0.21.2
78
+ torch==2.7.1
79
+ torchaudio==2.7.1
80
+ tqdm==4.67.1
81
+ transformers==4.52.4
82
+ triton==3.3.1
83
+ typing_extensions==4.14.0
84
+ tzdata==2025.2
85
+ urllib3==2.5.0
86
+ virtualenv==20.31.2
87
+ yamllint==1.35.1
88
+ ==============================
89
+ Git revision:
90
+ c75ab5489
91
+ ==============================
92
+ CUDA version:
93
+ 12.6
Conv-Tasnet/results/convtasnet_4-mix/1234/hyperparams.yaml ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated 2025-06-26 from:
2
+ # /work106/youzhenghai/project/speechbrain/myegs/FORHUAWEI_TASNET/separation/hparams/convtasnet_4mix.yaml
3
+ # yamllint disable
4
+ # ################################
5
+ # Model: SepFormer for source separation
6
+ # https://arxiv.org/abs/2010.13154
7
+ # Dataset : WSJ0-2mix and WSJ0-3mix
8
+ # ################################
9
+ # Basic parameters
10
+ # Seed needs to be set at top of yaml, before objects with parameters are made
11
+ #
12
+ seed: 1234
13
+ __set_seed: !apply:speechbrain.utils.seed_everything [1234]
14
+
15
+ # Data params
16
+
17
+ # e.g. '/yourpath/wsj0-mix/2speakers'
18
+ # end with 2speakers for wsj0-2mix or 3speakers for wsj0-3mix
19
+ data_folder: /work105/youzhenghai/data/wsj0_2mix
20
+
21
+ # the path for wsj0/si_tr_s/ folder -- only needed if dynamic mixing is used
22
+ # e.g. /yourpath/wsj0-processed/si_tr_s/
23
+ base_folder_dm: /yourpath/wsj0-processed/si_tr_s/
24
+
25
+ experiment_name: convtasnet_4-mix
26
+ output_folder: results/convtasnet_4-mix/1234
27
+ train_log: results/convtasnet_4-mix/1234/train_log.txt
28
+ save_folder: results/convtasnet_4-mix/1234/save
29
+ train_data: results/convtasnet_4-mix/1234/save/record_tr.csv
30
+ valid_data: results/convtasnet_4-mix/1234/save/record_val.csv
31
+ test_data: results/convtasnet_4-mix/1234/save/test_data.csv
32
+ skip_prep: false
33
+
34
+
35
+ # Experiment params
36
+ precision: fp32 # bf16, fp16 or fp32
37
+ num_spks: 4 # set to 3 for wsj0-3mix
38
+ noprogressbar: false
39
+ save_audio: false # Save estimated sources on disk
40
+ sample_rate: 16000
41
+
42
+ ####################### Training Parameters ####################################
43
+ N_epochs: 200
44
+ batch_size: 2
45
+ lr: 0.00015
46
+ clip_grad_norm: 5
47
+ loss_upper_lim: 999999 # this is the upper limit for an acceptable loss
48
+ # if True, the training sequences are cut to a specified length
49
+ limit_training_signal_len: true
50
+ # this is the length of sequences if we choose to limit
51
+ # the signal length of training sequences
52
+ training_signal_len: 64000000
53
+
54
+ # Set it to True to dynamically create mixtures at training time
55
+ dynamic_mixing: false
56
+
57
+ # Parameters for data augmentation
58
+ use_wavedrop: false
59
+ use_speedperturb: true
60
+ use_rand_shift: false
61
+ min_shift: -8000
62
+ max_shift: 8000
63
+
64
+ # Speed perturbation
65
+ speed_changes: &id001 [95, 100, 105]
66
+
67
+ # Frequency drop: randomly drops a number of frequency bands to zero.
68
+ speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
69
+ orig_freq: 16000
70
+ speeds: *id001
71
+ drop_freq_low: 0 # Min frequency band dropout probability
72
+ drop_freq_high: 1 # Max frequency band dropout probability
73
+ drop_freq_count_low: 1 # Min number of frequency bands to drop
74
+ drop_freq_count_high: 3 # Max number of frequency bands to drop
75
+ drop_freq_width: 0.05 # Width of frequency bands to drop
76
+
77
+ drop_freq: !new:speechbrain.augment.time_domain.DropFreq
78
+ drop_freq_low: 0
79
+ drop_freq_high: 1
80
+ drop_freq_count_low: 1
81
+ drop_freq_count_high: 3
82
+ drop_freq_width: 0.05
83
+
84
+ # Time drop: randomly drops a number of temporal chunks.
85
+ drop_chunk_count_low: 1 # Min number of audio chunks to drop
86
+ drop_chunk_count_high: 5 # Max number of audio chunks to drop
87
+ drop_chunk_length_low: 1000 # Min length of audio chunks to drop
88
+ drop_chunk_length_high: 2000 # Max length of audio chunks to drop
89
+
90
+ drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
91
+ drop_length_low: 1000
92
+ drop_length_high: 2000
93
+ drop_count_low: 1
94
+ drop_count_high: 5
95
+
96
+ # loss thresholding -- this thresholds the training loss
97
+ threshold_byloss: true
98
+ threshold: -30
99
+
100
+ # Encoder parameters
101
+ N_encoder_out: 256
102
+ # out_channels: 256
103
+ kernel_size: 32
104
+ kernel_stride: 16
105
+
106
+ # Dataloader options
107
+ dataloader_opts:
108
+ batch_size: 2
109
+ num_workers: 3
110
+
111
+
112
+ # Specifying the network
113
+ Encoder: &id002 !new:speechbrain.lobes.models.dual_path.Encoder
114
+ kernel_size: 32
115
+ out_channels: 256
116
+
117
+ # intra: !new:speechbrain.lobes.models.dual_path.SBRNNBlock
118
+ # num_layers: 1
119
+ # input_size: !ref <out_channels>
120
+ # hidden_channels: !ref <out_channels>
121
+ # dropout: 0
122
+ # bidirectional: True
123
+
124
+ # inter: !new:speechbrain.lobes.models.dual_path.SBRNNBlock
125
+ # num_layers: 1
126
+ # input_size: !ref <out_channels>
127
+ # hidden_channels: !ref <out_channels>
128
+ # dropout: 0
129
+ # bidirectional: True
130
+
131
+ MaskNet: &id004 !new:speechbrain.lobes.models.conv_tasnet.MaskNet
132
+
133
+ N: 256
134
+ B: 256
135
+ H: 512
136
+ P: 3
137
+ X: 6
138
+ R: 4
139
+ C: 4
140
+ norm_type: gLN
141
+ causal: true
142
+ mask_nonlinear: relu
143
+
144
+ Decoder: &id003 !new:speechbrain.lobes.models.dual_path.Decoder
145
+ in_channels: 256
146
+ out_channels: 1
147
+ kernel_size: 32
148
+ stride: 16
149
+ bias: false
150
+
151
+ optimizer: !name:torch.optim.Adam
152
+ lr: 0.00015
153
+ weight_decay: 0
154
+
155
+ loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
156
+
157
+ lr_scheduler: &id006 !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
158
+
159
+ factor: 0.5
160
+ patience: 2
161
+ dont_halve_until_epoch: 85
162
+
163
+ epoch_counter: &id005 !new:speechbrain.utils.epoch_loop.EpochCounter
164
+ limit: 200
165
+
166
+ modules:
167
+ encoder: *id002
168
+ decoder: *id003
169
+ masknet: *id004
170
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
171
+ checkpoints_dir: results/convtasnet_4-mix/1234/save
172
+ recoverables:
173
+ encoder: *id002
174
+ decoder: *id003
175
+ masknet: *id004
176
+ counter: *id005
177
+ lr_scheduler: *id006
178
+ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
179
+ save_file: results/convtasnet_4-mix/1234/train_log.txt
Conv-Tasnet/results/convtasnet_4-mix/1234/log.txt ADDED
The diff for this file is too large to render. See raw diff
 
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/CKPT.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # yamllint disable
2
+ end-of-epoch: true
3
+ si-snr: 22.240427712364045
4
+ unixtime: 1750961143.069555
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/brain.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33809a026a2c1febce7b03c8aafaee4ddfc851b2c70f180f8c06bf1017f4df5c
3
+ size 46
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/counter.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ef6fdf32513aa7cd11f72beccf132b9224d33f271471fff402742887a171edf
3
+ size 3
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/dataloader-TRAIN.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c344ba7044815dd03c3448028a43e5b9c16074cb5a6a19c7ae86165c149735f
3
+ size 3
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/decoder.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b89e695d01ef7a5aeb76f5000f70959a078e4ea1cf97ae978a2a4dc2121c7f29
3
+ size 34409
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/encoder.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5ef4fe38605072559dbf12b09643423c4649460c0f803f34f047e92f9358f39
3
+ size 34473
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/lr_scheduler.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f02f6900fea06c469d975f48c9b3f4d40868d5fb6e6758baf76c4e68c4785dd1
3
+ size 2251
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/masknet.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:100869f60d27f540b6d23e4a811cff04541c67e6ff4639776645069f841f0db5
3
+ size 26926023
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+02-05-43+00/optimizer.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c05ce1c793e4f0bae4a6905774bbfc8360e4450103008c838ea195f4a146452c
3
+ size 53964363
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/CKPT.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # yamllint disable
2
+ end-of-epoch: true
3
+ si-snr: 22.256136728080673
4
+ unixtime: 1750994220.6695538
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/brain.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33809a026a2c1febce7b03c8aafaee4ddfc851b2c70f180f8c06bf1017f4df5c
3
+ size 46
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/counter.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27badc983df1780b60c2b3fa9d3a19a00e46aac798451f0febdca52920faaddf
3
+ size 3
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/dataloader-TRAIN.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c344ba7044815dd03c3448028a43e5b9c16074cb5a6a19c7ae86165c149735f
3
+ size 3
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/decoder.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ba5891c2436cdefe57f4ca4b87bfa8267f927948330ea482d9cd6fadcd14163
3
+ size 34409
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/encoder.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:533c6dfe50d9c410e8c0e4907efaf95679ca2fe85f0ceab9aa0ede0c817d58d8
3
+ size 34473
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/lr_scheduler.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8689d8fb8de14a5995a161e50181134543321bbd431f774ce20f507239669ce3
3
+ size 3147
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/masknet.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3acd263841af684db0cf622b77e83a807e969390661e115b89a8139f8785aa64
3
+ size 26926023
Conv-Tasnet/results/convtasnet_4-mix/1234/save/CKPT+2025-06-27+11-17-00+00/optimizer.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89593e2f633757a61883ef5aeb48a9e79ec4b09565d470c5571ee16edcb51c5c
3
+ size 53964363
Conv-Tasnet/results/convtasnet_4-mix/1234/save/record_tr.csv ADDED
The diff for this file is too large to render. See raw diff
 
Conv-Tasnet/results/convtasnet_4-mix/1234/save/record_val.csv ADDED
The diff for this file is too large to render. See raw diff
 
Conv-Tasnet/results/convtasnet_4-mix/1234/save/test_data.csv ADDED
The diff for this file is too large to render. See raw diff
 
Conv-Tasnet/results/convtasnet_4-mix/1234/test.py ADDED
@@ -0,0 +1,628 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env/python3
2
+ """Recipe for training a neural speech separation system on the wsjmix
3
+ dataset. The system employs an encoder, a decoder, and a masking network.
4
+
5
+ To run this recipe, do the following:
6
+ > python train.py hparams/sepformer.yaml
7
+ > python train.py hparams/dualpath_rnn.yaml
8
+ > python train.py hparams/convtasnet.yaml
9
+
10
+ The experiment file is flexible enough to support different neural
11
+ networks. By properly changing the parameter files, you can try
12
+ different architectures. The script supports both wsj2mix and
13
+ wsj3mix.
14
+
15
+ # 4-mix 主要根据 num_spks 修改 train.py 和 config
16
+ Authors
17
+ * Cem Subakan 2020
18
+ * Mirco Ravanelli 2020
19
+ * Samuele Cornell 2020
20
+ * Mirko Bronzi 2020
21
+ * Jianyuan Zhong 2020
22
+ """
23
+
24
+ import csv
25
+ import os
26
+ import sys
27
+
28
+ import numpy as np
29
+ import torch
30
+ import torch.nn.functional as F
31
+ import torchaudio
32
+ from hyperpyyaml import load_hyperpyyaml
33
+ from tqdm import tqdm
34
+
35
+ import speechbrain as sb
36
+ import speechbrain.nnet.schedulers as schedulers
37
+ from speechbrain.utils.distributed import run_on_main
38
+ from speechbrain.utils.logger import get_logger
39
+
40
+
41
+ # Define training procedure
42
+ class Separation(sb.Brain):
43
+ def compute_forward(self, mix, targets, stage, noise=None):
44
+ """Forward computations from the mixture to the separated signals."""
45
+
46
+ # Unpack lists and put tensors in the right device
47
+ mix, mix_lens = mix
48
+ mix, mix_lens = mix.to(self.device), mix_lens.to(self.device)
49
+
50
+ # Convert targets to tensor
51
+ targets = torch.cat(
52
+ [targets[i][0].unsqueeze(-1) for i in range(self.hparams.num_spks)],
53
+ dim=-1,
54
+ ).to(self.device)
55
+
56
+ # Add speech distortions
57
+ if stage == sb.Stage.TRAIN:
58
+ with torch.no_grad():
59
+ if self.hparams.use_speedperturb:
60
+ mix, targets = self.add_speed_perturb(targets, mix_lens)
61
+
62
+ mix = targets.sum(-1)
63
+
64
+ if self.hparams.use_wavedrop:
65
+ mix = self.hparams.drop_chunk(mix, mix_lens)
66
+ mix = self.hparams.drop_freq(mix)
67
+
68
+ if self.hparams.limit_training_signal_len:
69
+ mix, targets = self.cut_signals(mix, targets)
70
+
71
+ # Separation
72
+ mix_w = self.hparams.Encoder(mix)
73
+ est_mask = self.hparams.MaskNet(mix_w)
74
+ mix_w = torch.stack([mix_w] * self.hparams.num_spks)
75
+ sep_h = mix_w * est_mask
76
+
77
+ # Decoding
78
+ est_source = torch.cat(
79
+ [
80
+ self.hparams.Decoder(sep_h[i]).unsqueeze(-1)
81
+ for i in range(self.hparams.num_spks)
82
+ ],
83
+ dim=-1,
84
+ )
85
+
86
+ # T changed after conv1d in encoder, fix it here
87
+ T_origin = mix.size(1)
88
+ T_est = est_source.size(1)
89
+ if T_origin > T_est:
90
+ est_source = F.pad(est_source, (0, 0, 0, T_origin - T_est))
91
+ else:
92
+ est_source = est_source[:, :T_origin, :]
93
+
94
+ return est_source, targets
95
+
96
+ def compute_objectives(self, predictions, targets):
97
+ """Computes the sinr loss"""
98
+ return self.hparams.loss(targets, predictions)
99
+
100
+ def fit_batch(self, batch):
101
+ """Trains one batch"""
102
+
103
+ # Unpacking batch list
104
+ mixture = batch.mix_sig
105
+ targets = [batch.s1_sig, batch.s2_sig]
106
+
107
+ if self.hparams.num_spks == 3:
108
+ targets.append(batch.s3_sig)
109
+
110
+ if self.hparams.num_spks == 4:
111
+ targets.append(batch.s3_sig)
112
+ targets.append(batch.s4_sig)
113
+
114
+ with self.training_ctx:
115
+ predictions, targets = self.compute_forward(
116
+ mixture, targets, sb.Stage.TRAIN
117
+ )
118
+ loss = self.compute_objectives(predictions, targets)
119
+
120
+ # hard threshold the easy dataitems
121
+ if self.hparams.threshold_byloss:
122
+ th = self.hparams.threshold
123
+ loss = loss[loss > th]
124
+ if loss.nelement() > 0:
125
+ loss = loss.mean()
126
+ else:
127
+ loss = loss.mean()
128
+
129
+ if loss.nelement() > 0 and loss < self.hparams.loss_upper_lim:
130
+ self.scaler.scale(loss).backward()
131
+ if self.hparams.clip_grad_norm >= 0:
132
+ self.scaler.unscale_(self.optimizer)
133
+ torch.nn.utils.clip_grad_norm_(
134
+ self.modules.parameters(),
135
+ self.hparams.clip_grad_norm,
136
+ )
137
+ self.scaler.step(self.optimizer)
138
+ self.scaler.update()
139
+ else:
140
+ self.nonfinite_count += 1
141
+ logger.info(
142
+ "infinite loss or empty loss! it happened {} times so far - skipping this batch".format(
143
+ self.nonfinite_count
144
+ )
145
+ )
146
+ loss.data = torch.tensor(0.0).to(self.device)
147
+ self.optimizer.zero_grad()
148
+
149
+ return loss.detach().cpu()
150
+
151
+ def evaluate_batch(self, batch, stage):
152
+ """Computations needed for validation/test batches"""
153
+ snt_id = batch.id
154
+ mixture = batch.mix_sig
155
+ targets = [batch.s1_sig, batch.s2_sig]
156
+ if self.hparams.num_spks == 3:
157
+ targets.append(batch.s3_sig)
158
+
159
+ if self.hparams.num_spks == 4:
160
+ targets.append(batch.s3_sig)
161
+ targets.append(batch.s4_sig)
162
+
163
+ with torch.no_grad():
164
+ predictions, targets = self.compute_forward(mixture, targets, stage)
165
+ loss = self.compute_objectives(predictions, targets)
166
+
167
+ # Manage audio file saving
168
+ if stage == sb.Stage.TEST and self.hparams.save_audio:
169
+ if hasattr(self.hparams, "n_audio_to_save"):
170
+ if self.hparams.n_audio_to_save > 0:
171
+ self.save_audio(snt_id[0], mixture, targets, predictions)
172
+ self.hparams.n_audio_to_save += -1
173
+ else:
174
+ self.save_audio(snt_id[0], mixture, targets, predictions)
175
+
176
+ return loss.mean().detach()
177
+
178
+ def on_stage_end(self, stage, stage_loss, epoch):
179
+ """Gets called at the end of a epoch."""
180
+ # Compute/store important stats
181
+ stage_stats = {"si-snr": stage_loss}
182
+ if stage == sb.Stage.TRAIN:
183
+ self.train_stats = stage_stats
184
+
185
+ # Perform end-of-iteration things, like annealing, logging, etc.
186
+ if stage == sb.Stage.VALID:
187
+ # Learning rate annealing
188
+ if isinstance(
189
+ self.hparams.lr_scheduler, schedulers.ReduceLROnPlateau
190
+ ):
191
+ current_lr, next_lr = self.hparams.lr_scheduler(
192
+ [self.optimizer], epoch, stage_loss
193
+ )
194
+ schedulers.update_learning_rate(self.optimizer, next_lr)
195
+ else:
196
+ # if we do not use the reducelronplateau, we do not change the lr
197
+ current_lr = self.hparams.optimizer.optim.param_groups[0]["lr"]
198
+
199
+ self.hparams.train_logger.log_stats(
200
+ stats_meta={"epoch": epoch, "lr": current_lr},
201
+ train_stats=self.train_stats,
202
+ valid_stats=stage_stats,
203
+ )
204
+ self.checkpointer.save_and_keep_only(
205
+ meta={"si-snr": stage_stats["si-snr"]}, min_keys=["si-snr"]
206
+ )
207
+ elif stage == sb.Stage.TEST:
208
+ self.hparams.train_logger.log_stats(
209
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
210
+ test_stats=stage_stats,
211
+ )
212
+
213
+ def add_speed_perturb(self, targets, targ_lens):
214
+ """Adds speed perturbation and random_shift to the input signals"""
215
+
216
+ min_len = -1
217
+ recombine = False
218
+
219
+ if self.hparams.use_speedperturb or self.hparams.use_rand_shift:
220
+ # Performing speed change (independently on each source)
221
+ new_targets = []
222
+ recombine = True
223
+
224
+ for i in range(targets.shape[-1]):
225
+ new_target = self.hparams.speed_perturb(targets[:, :, i])
226
+ new_targets.append(new_target)
227
+ if i == 0:
228
+ min_len = new_target.shape[-1]
229
+ else:
230
+ if new_target.shape[-1] < min_len:
231
+ min_len = new_target.shape[-1]
232
+
233
+ if self.hparams.use_rand_shift:
234
+ # Performing random_shift (independently on each source)
235
+ recombine = True
236
+ for i in range(targets.shape[-1]):
237
+ rand_shift = torch.randint(
238
+ self.hparams.min_shift, self.hparams.max_shift, (1,)
239
+ )
240
+ new_targets[i] = new_targets[i].to(self.device)
241
+ new_targets[i] = torch.roll(
242
+ new_targets[i], shifts=(rand_shift[0],), dims=1
243
+ )
244
+
245
+ # Re-combination
246
+ if recombine:
247
+ if self.hparams.use_speedperturb:
248
+ targets = torch.zeros(
249
+ targets.shape[0],
250
+ min_len,
251
+ targets.shape[-1],
252
+ device=targets.device,
253
+ dtype=torch.float,
254
+ )
255
+ for i, new_target in enumerate(new_targets):
256
+ targets[:, :, i] = new_targets[i][:, 0:min_len]
257
+
258
+ mix = targets.sum(-1)
259
+ return mix, targets
260
+
261
+ def cut_signals(self, mixture, targets):
262
+ """This function selects a random segment of a given length within the mixture.
263
+ The corresponding targets are selected accordingly"""
264
+ randstart = torch.randint(
265
+ 0,
266
+ 1 + max(0, mixture.shape[1] - self.hparams.training_signal_len),
267
+ (1,),
268
+ ).item()
269
+ targets = targets[
270
+ :, randstart : randstart + self.hparams.training_signal_len, :
271
+ ]
272
+ mixture = mixture[
273
+ :, randstart : randstart + self.hparams.training_signal_len
274
+ ]
275
+ return mixture, targets
276
+
277
+ def reset_layer_recursively(self, layer):
278
+ """Reinitializes the parameters of the neural networks"""
279
+ if hasattr(layer, "reset_parameters"):
280
+ layer.reset_parameters()
281
+ for child_layer in layer.modules():
282
+ if layer != child_layer:
283
+ self.reset_layer_recursively(child_layer)
284
+
285
+ def save_results(self, test_data):
286
+ """This script computes the SDR and SI-SNR metrics and saves
287
+ them into a csv file"""
288
+
289
+ # This package is required for SDR computation
290
+ from mir_eval.separation import bss_eval_sources
291
+
292
+ # Create folders where to store audio
293
+ save_file = os.path.join(self.hparams.output_folder, "test_results.csv")
294
+
295
+ # Variable init
296
+ all_sdrs = []
297
+ all_sdrs_i = []
298
+ all_sisnrs = []
299
+ all_sisnrs_i = []
300
+ csv_columns = ["snt_id", "sdr", "sdr_i", "si-snr", "si-snr_i"]
301
+
302
+ test_loader = sb.dataio.dataloader.make_dataloader(
303
+ test_data, **self.hparams.dataloader_opts
304
+ )
305
+
306
+ with open(save_file, "w", newline="", encoding="utf-8") as results_csv:
307
+ writer = csv.DictWriter(results_csv, fieldnames=csv_columns)
308
+ writer.writeheader()
309
+
310
+ # Loop over all test sentence
311
+ with tqdm(test_loader, dynamic_ncols=True) as t:
312
+ for i, batch in enumerate(t):
313
+ # Apply Separation
314
+ mixture, mix_len = batch.mix_sig
315
+ snt_id = batch.id
316
+ targets = [batch.s1_sig, batch.s2_sig]
317
+ if self.hparams.num_spks == 3:
318
+ targets.append(batch.s3_sig)
319
+
320
+ if self.hparams.num_spks == 4:
321
+ targets.append(batch.s3_sig)
322
+ targets.append(batch.s4_sig)
323
+
324
+ with torch.no_grad():
325
+ predictions, targets = self.compute_forward(
326
+ batch.mix_sig, targets, sb.Stage.TEST
327
+ )
328
+
329
+ # Compute SI-SNR
330
+ sisnr = self.compute_objectives(predictions, targets)
331
+
332
+ # Compute SI-SNR improvement
333
+ mixture_signal = torch.stack(
334
+ [mixture] * self.hparams.num_spks, dim=-1
335
+ )
336
+ mixture_signal = mixture_signal.to(targets.device)
337
+ sisnr_baseline = self.compute_objectives(
338
+ mixture_signal, targets
339
+ )
340
+ sisnr_i = sisnr - sisnr_baseline
341
+
342
+ # Compute SDR
343
+ sdr, _, _, _ = bss_eval_sources(
344
+ targets[0].t().cpu().numpy(),
345
+ predictions[0].t().detach().cpu().numpy(),
346
+ )
347
+
348
+ sdr_baseline, _, _, _ = bss_eval_sources(
349
+ targets[0].t().cpu().numpy(),
350
+ mixture_signal[0].t().detach().cpu().numpy(),
351
+ )
352
+
353
+ sdr_i = sdr.mean() - sdr_baseline.mean()
354
+
355
+ # Saving on a csv file
356
+ row = {
357
+ "snt_id": snt_id[0],
358
+ "sdr": sdr.mean(),
359
+ "sdr_i": sdr_i,
360
+ "si-snr": -sisnr.item(),
361
+ "si-snr_i": -sisnr_i.item(),
362
+ }
363
+ writer.writerow(row)
364
+
365
+ # Metric Accumulation
366
+ all_sdrs.append(sdr.mean())
367
+ all_sdrs_i.append(sdr_i.mean())
368
+ all_sisnrs.append(-sisnr.item())
369
+ all_sisnrs_i.append(-sisnr_i.item())
370
+
371
+ row = {
372
+ "snt_id": "avg",
373
+ "sdr": np.array(all_sdrs).mean(),
374
+ "sdr_i": np.array(all_sdrs_i).mean(),
375
+ "si-snr": np.array(all_sisnrs).mean(),
376
+ "si-snr_i": np.array(all_sisnrs_i).mean(),
377
+ }
378
+ writer.writerow(row)
379
+
380
+ logger.info("Mean SISNR is {}".format(np.array(all_sisnrs).mean()))
381
+ logger.info("Mean SISNRi is {}".format(np.array(all_sisnrs_i).mean()))
382
+ logger.info("Mean SDR is {}".format(np.array(all_sdrs).mean()))
383
+ logger.info("Mean SDRi is {}".format(np.array(all_sdrs_i).mean()))
384
+
385
+ def save_audio(self, snt_id, mixture, targets, predictions):
386
+ "saves the test audio (mixture, targets, and estimated sources) on disk"
387
+
388
+ # Create output folder
389
+ save_path = os.path.join(self.hparams.save_folder, "audio_results")
390
+ if not os.path.exists(save_path):
391
+ os.mkdir(save_path)
392
+
393
+ for ns in range(self.hparams.num_spks):
394
+ # Estimated source
395
+ signal = predictions[0, :, ns]
396
+ signal = signal / signal.abs().max()
397
+ save_file = os.path.join(
398
+ save_path, "item{}_source{}hat.wav".format(snt_id, ns + 1)
399
+ )
400
+ torchaudio.save(
401
+ save_file, signal.unsqueeze(0).cpu(), self.hparams.sample_rate
402
+ )
403
+
404
+ # Original source
405
+ signal = targets[0, :, ns]
406
+ signal = signal / signal.abs().max()
407
+ save_file = os.path.join(
408
+ save_path, "item{}_source{}.wav".format(snt_id, ns + 1)
409
+ )
410
+ torchaudio.save(
411
+ save_file, signal.unsqueeze(0).cpu(), self.hparams.sample_rate
412
+ )
413
+
414
+ # Mixture
415
+ signal = mixture[0][0, :]
416
+ signal = signal / signal.abs().max()
417
+ save_file = os.path.join(save_path, "item{}_mix.wav".format(snt_id))
418
+ torchaudio.save(
419
+ save_file, signal.unsqueeze(0).cpu(), self.hparams.sample_rate
420
+ )
421
+
422
+
423
+ def dataio_prep(hparams):
424
+ """Creates data processing pipeline"""
425
+
426
+ # 1. Define datasets
427
+ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
428
+ csv_path=hparams["train_data"],
429
+ replacements={"data_root": hparams["data_folder"]},
430
+ )
431
+
432
+ valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
433
+ csv_path=hparams["valid_data"],
434
+ replacements={"data_root": hparams["data_folder"]},
435
+ )
436
+
437
+ test_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
438
+ csv_path=hparams["test_data"],
439
+ replacements={"data_root": hparams["data_folder"]},
440
+ )
441
+
442
+ datasets = [train_data, valid_data, test_data]
443
+
444
+ # 2. Provide audio pipelines
445
+
446
+ @sb.utils.data_pipeline.takes("mix_wav")
447
+ @sb.utils.data_pipeline.provides("mix_sig")
448
+ def audio_pipeline_mix(mix_wav):
449
+ mix_sig = sb.dataio.dataio.read_audio(mix_wav)
450
+ return mix_sig
451
+
452
+ @sb.utils.data_pipeline.takes("s1_wav")
453
+ @sb.utils.data_pipeline.provides("s1_sig")
454
+ def audio_pipeline_s1(s1_wav):
455
+ s1_sig = sb.dataio.dataio.read_audio(s1_wav)
456
+ return s1_sig
457
+
458
+ @sb.utils.data_pipeline.takes("s2_wav")
459
+ @sb.utils.data_pipeline.provides("s2_sig")
460
+ def audio_pipeline_s2(s2_wav):
461
+ s2_sig = sb.dataio.dataio.read_audio(s2_wav)
462
+ return s2_sig
463
+
464
+ # --- 如果说话人 >= 3,定义第 3 路 ---
465
+ if hparams["num_spks"] >= 3:
466
+ @sb.utils.data_pipeline.takes("s3_wav")
467
+ @sb.utils.data_pipeline.provides("s3_sig")
468
+ def audio_pipeline_s3(s3_wav):
469
+ return sb.dataio.dataio.read_audio(s3_wav)
470
+
471
+ # --- 如果说话人 == 4,定义第 4 路 ---
472
+ if hparams["num_spks"] == 4:
473
+ @sb.utils.data_pipeline.takes("s4_wav")
474
+ @sb.utils.data_pipeline.provides("s4_sig")
475
+ def audio_pipeline_s4(s4_wav):
476
+ return sb.dataio.dataio.read_audio(s4_wav)
477
+
478
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_mix)
479
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s1)
480
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s2)
481
+ if hparams["num_spks"] == 3:
482
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s3)
483
+ sb.dataio.dataset.set_output_keys(
484
+ datasets, ["id", "mix_sig", "s1_sig", "s2_sig", "s3_sig"]
485
+ )
486
+ elif hparams["num_spks"] == 4 :
487
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s3)
488
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s4)
489
+ sb.dataio.dataset.set_output_keys(
490
+ datasets,
491
+ ["id", "mix_sig", "s1_sig", "s2_sig", "s3_sig", "s4_sig"],
492
+ )
493
+ else:
494
+ sb.dataio.dataset.set_output_keys(
495
+ datasets, ["id", "mix_sig", "s1_sig", "s2_sig"]
496
+ )
497
+
498
+ return train_data, valid_data, test_data
499
+
500
+
501
+ if __name__ == "__main__":
502
+ # Load hyperparameters file with command-line overrides
503
+ hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
504
+ with open(hparams_file, encoding="utf-8") as fin:
505
+ hparams = load_hyperpyyaml(fin, overrides)
506
+
507
+ # Initialize ddp (useful only for multi-GPU DDP training)
508
+ sb.utils.distributed.ddp_init_group(run_opts)
509
+
510
+ # Logger info
511
+ logger = get_logger(__name__)
512
+
513
+ # Create experiment directory
514
+ sb.create_experiment_directory(
515
+ experiment_directory=hparams["output_folder"],
516
+ hyperparams_to_save=hparams_file,
517
+ overrides=overrides,
518
+ )
519
+
520
+ # Update precision to bf16 if the device is CPU and precision is fp16
521
+ if run_opts.get("device") == "cpu" and hparams.get("precision") == "fp16":
522
+ hparams["precision"] = "bf16"
523
+
524
+ # Check if wsj0_tr is set with dynamic mixing
525
+ if hparams["dynamic_mixing"] and not os.path.exists(
526
+ hparams["base_folder_dm"]
527
+ ):
528
+ raise ValueError(
529
+ "Please, specify a valid base_folder_dm folder when using dynamic mixing"
530
+ )
531
+
532
+ # Data preparation
533
+ from prepare_data import prepare_wsjmix # noqa
534
+
535
+ # run_on_main(
536
+ # prepare_wsjmix,
537
+ # kwargs={
538
+ # "datapath": hparams["data_folder"],
539
+ # "savepath": hparams["save_folder"],
540
+ # "n_spks": hparams["num_spks"],
541
+ # "skip_prep": hparams["skip_prep"],
542
+ # "fs": hparams["sample_rate"],
543
+ # },
544
+ # )
545
+
546
+ # Create dataset objects
547
+ if hparams["dynamic_mixing"]:
548
+ from dynamic_mixing import dynamic_mix_data_prep
549
+
550
+ # if the base_folder for dm is not processed, preprocess them
551
+ if "processed" not in hparams["base_folder_dm"]:
552
+ # if the processed folder already exists we just use it otherwise we do the preprocessing
553
+ if not os.path.exists(
554
+ os.path.normpath(hparams["base_folder_dm"]) + "_processed"
555
+ ):
556
+ from preprocess_dynamic_mixing import resample_folder
557
+
558
+ print("Resampling the base folder")
559
+ run_on_main(
560
+ resample_folder,
561
+ kwargs={
562
+ "input_folder": hparams["base_folder_dm"],
563
+ "output_folder": os.path.normpath(
564
+ hparams["base_folder_dm"]
565
+ )
566
+ + "_processed",
567
+ "fs": hparams["sample_rate"],
568
+ "regex": "**/*.wav",
569
+ },
570
+ )
571
+ # adjust the base_folder_dm path
572
+ hparams["base_folder_dm"] = (
573
+ os.path.normpath(hparams["base_folder_dm"]) + "_processed"
574
+ )
575
+ else:
576
+ print(
577
+ "Using the existing processed folder on the same directory as base_folder_dm"
578
+ )
579
+ hparams["base_folder_dm"] = (
580
+ os.path.normpath(hparams["base_folder_dm"]) + "_processed"
581
+ )
582
+
583
+ # Collecting the hparams for dynamic batching
584
+ dm_hparams = {
585
+ "train_data": hparams["train_data"],
586
+ "data_folder": hparams["data_folder"],
587
+ "base_folder_dm": hparams["base_folder_dm"],
588
+ "sample_rate": hparams["sample_rate"],
589
+ "num_spks": hparams["num_spks"],
590
+ "training_signal_len": hparams["training_signal_len"],
591
+ "dataloader_opts": hparams["dataloader_opts"],
592
+ }
593
+ train_data = dynamic_mix_data_prep(dm_hparams)
594
+ _, valid_data, test_data = dataio_prep(hparams)
595
+ else:
596
+ train_data, valid_data, test_data = dataio_prep(hparams)
597
+
598
+ # Load pretrained model if pretrained_separator is present in the yaml
599
+ if "pretrained_separator" in hparams:
600
+ run_on_main(hparams["pretrained_separator"].collect_files)
601
+ hparams["pretrained_separator"].load_collected()
602
+
603
+ # Brain class initialization
604
+ separator = Separation(
605
+ modules=hparams["modules"],
606
+ opt_class=hparams["optimizer"],
607
+ hparams=hparams,
608
+ run_opts=run_opts,
609
+ checkpointer=hparams["checkpointer"],
610
+ )
611
+
612
+ # re-initialize the parameters if we don't use a pretrained model
613
+ if "pretrained_separator" not in hparams:
614
+ for module in separator.modules.values():
615
+ separator.reset_layer_recursively(module)
616
+
617
+ # # Training
618
+ # separator.fit(
619
+ # separator.hparams.epoch_counter,
620
+ # train_data,
621
+ # valid_data,
622
+ # train_loader_kwargs=hparams["dataloader_opts"],
623
+ # valid_loader_kwargs=hparams["dataloader_opts"],
624
+ # )
625
+
626
+ # Eval
627
+ separator.evaluate(test_data, min_key="si-snr")
628
+ separator.save_results(test_data)
Conv-Tasnet/results/convtasnet_4-mix/1234/test_results.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ snt_id,sdr,sdr_i,si-snr,si-snr_i
Conv-Tasnet/results/convtasnet_4-mix/1234/train.py ADDED
@@ -0,0 +1,628 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env/python3
2
+ """Recipe for training a neural speech separation system on the wsjmix
3
+ dataset. The system employs an encoder, a decoder, and a masking network.
4
+
5
+ To run this recipe, do the following:
6
+ > python train.py hparams/sepformer.yaml
7
+ > python train.py hparams/dualpath_rnn.yaml
8
+ > python train.py hparams/convtasnet.yaml
9
+
10
+ The experiment file is flexible enough to support different neural
11
+ networks. By properly changing the parameter files, you can try
12
+ different architectures. The script supports both wsj2mix and
13
+ wsj3mix.
14
+
15
+ # 4-mix 主要根据 num_spks 修改 train.py 和 config
16
+ Authors
17
+ * Cem Subakan 2020
18
+ * Mirco Ravanelli 2020
19
+ * Samuele Cornell 2020
20
+ * Mirko Bronzi 2020
21
+ * Jianyuan Zhong 2020
22
+ """
23
+
24
+ import csv
25
+ import os
26
+ import sys
27
+
28
+ import numpy as np
29
+ import torch
30
+ import torch.nn.functional as F
31
+ import torchaudio
32
+ from hyperpyyaml import load_hyperpyyaml
33
+ from tqdm import tqdm
34
+
35
+ import speechbrain as sb
36
+ import speechbrain.nnet.schedulers as schedulers
37
+ from speechbrain.utils.distributed import run_on_main
38
+ from speechbrain.utils.logger import get_logger
39
+
40
+
41
+ # Define training procedure
42
+ class Separation(sb.Brain):
43
+ def compute_forward(self, mix, targets, stage, noise=None):
44
+ """Forward computations from the mixture to the separated signals."""
45
+
46
+ # Unpack lists and put tensors in the right device
47
+ mix, mix_lens = mix
48
+ mix, mix_lens = mix.to(self.device), mix_lens.to(self.device)
49
+
50
+ # Convert targets to tensor
51
+ targets = torch.cat(
52
+ [targets[i][0].unsqueeze(-1) for i in range(self.hparams.num_spks)],
53
+ dim=-1,
54
+ ).to(self.device)
55
+
56
+ # Add speech distortions
57
+ if stage == sb.Stage.TRAIN:
58
+ with torch.no_grad():
59
+ if self.hparams.use_speedperturb:
60
+ mix, targets = self.add_speed_perturb(targets, mix_lens)
61
+
62
+ mix = targets.sum(-1)
63
+
64
+ if self.hparams.use_wavedrop:
65
+ mix = self.hparams.drop_chunk(mix, mix_lens)
66
+ mix = self.hparams.drop_freq(mix)
67
+
68
+ if self.hparams.limit_training_signal_len:
69
+ mix, targets = self.cut_signals(mix, targets)
70
+
71
+ # Separation
72
+ mix_w = self.hparams.Encoder(mix)
73
+ est_mask = self.hparams.MaskNet(mix_w)
74
+ mix_w = torch.stack([mix_w] * self.hparams.num_spks)
75
+ sep_h = mix_w * est_mask
76
+
77
+ # Decoding
78
+ est_source = torch.cat(
79
+ [
80
+ self.hparams.Decoder(sep_h[i]).unsqueeze(-1)
81
+ for i in range(self.hparams.num_spks)
82
+ ],
83
+ dim=-1,
84
+ )
85
+
86
+ # T changed after conv1d in encoder, fix it here
87
+ T_origin = mix.size(1)
88
+ T_est = est_source.size(1)
89
+ if T_origin > T_est:
90
+ est_source = F.pad(est_source, (0, 0, 0, T_origin - T_est))
91
+ else:
92
+ est_source = est_source[:, :T_origin, :]
93
+
94
+ return est_source, targets
95
+
96
+ def compute_objectives(self, predictions, targets):
97
+ """Computes the sinr loss"""
98
+ return self.hparams.loss(targets, predictions)
99
+
100
+ def fit_batch(self, batch):
101
+ """Trains one batch"""
102
+
103
+ # Unpacking batch list
104
+ mixture = batch.mix_sig
105
+ targets = [batch.s1_sig, batch.s2_sig]
106
+
107
+ if self.hparams.num_spks == 3:
108
+ targets.append(batch.s3_sig)
109
+
110
+ if self.hparams.num_spks == 4:
111
+ targets.append(batch.s3_sig)
112
+ targets.append(batch.s4_sig)
113
+
114
+ with self.training_ctx:
115
+ predictions, targets = self.compute_forward(
116
+ mixture, targets, sb.Stage.TRAIN
117
+ )
118
+ loss = self.compute_objectives(predictions, targets)
119
+
120
+ # hard threshold the easy dataitems
121
+ if self.hparams.threshold_byloss:
122
+ th = self.hparams.threshold
123
+ loss = loss[loss > th]
124
+ if loss.nelement() > 0:
125
+ loss = loss.mean()
126
+ else:
127
+ loss = loss.mean()
128
+
129
+ if loss.nelement() > 0 and loss < self.hparams.loss_upper_lim:
130
+ self.scaler.scale(loss).backward()
131
+ if self.hparams.clip_grad_norm >= 0:
132
+ self.scaler.unscale_(self.optimizer)
133
+ torch.nn.utils.clip_grad_norm_(
134
+ self.modules.parameters(),
135
+ self.hparams.clip_grad_norm,
136
+ )
137
+ self.scaler.step(self.optimizer)
138
+ self.scaler.update()
139
+ else:
140
+ self.nonfinite_count += 1
141
+ logger.info(
142
+ "infinite loss or empty loss! it happened {} times so far - skipping this batch".format(
143
+ self.nonfinite_count
144
+ )
145
+ )
146
+ loss.data = torch.tensor(0.0).to(self.device)
147
+ self.optimizer.zero_grad()
148
+
149
+ return loss.detach().cpu()
150
+
151
+ def evaluate_batch(self, batch, stage):
152
+ """Computations needed for validation/test batches"""
153
+ snt_id = batch.id
154
+ mixture = batch.mix_sig
155
+ targets = [batch.s1_sig, batch.s2_sig]
156
+ if self.hparams.num_spks == 3:
157
+ targets.append(batch.s3_sig)
158
+
159
+ if self.hparams.num_spks == 4:
160
+ targets.append(batch.s3_sig)
161
+ targets.append(batch.s4_sig)
162
+
163
+ with torch.no_grad():
164
+ predictions, targets = self.compute_forward(mixture, targets, stage)
165
+ loss = self.compute_objectives(predictions, targets)
166
+
167
+ # Manage audio file saving
168
+ if stage == sb.Stage.TEST and self.hparams.save_audio:
169
+ if hasattr(self.hparams, "n_audio_to_save"):
170
+ if self.hparams.n_audio_to_save > 0:
171
+ self.save_audio(snt_id[0], mixture, targets, predictions)
172
+ self.hparams.n_audio_to_save += -1
173
+ else:
174
+ self.save_audio(snt_id[0], mixture, targets, predictions)
175
+
176
+ return loss.mean().detach()
177
+
178
+ def on_stage_end(self, stage, stage_loss, epoch):
179
+ """Gets called at the end of a epoch."""
180
+ # Compute/store important stats
181
+ stage_stats = {"si-snr": stage_loss}
182
+ if stage == sb.Stage.TRAIN:
183
+ self.train_stats = stage_stats
184
+
185
+ # Perform end-of-iteration things, like annealing, logging, etc.
186
+ if stage == sb.Stage.VALID:
187
+ # Learning rate annealing
188
+ if isinstance(
189
+ self.hparams.lr_scheduler, schedulers.ReduceLROnPlateau
190
+ ):
191
+ current_lr, next_lr = self.hparams.lr_scheduler(
192
+ [self.optimizer], epoch, stage_loss
193
+ )
194
+ schedulers.update_learning_rate(self.optimizer, next_lr)
195
+ else:
196
+ # if we do not use the reducelronplateau, we do not change the lr
197
+ current_lr = self.hparams.optimizer.optim.param_groups[0]["lr"]
198
+
199
+ self.hparams.train_logger.log_stats(
200
+ stats_meta={"epoch": epoch, "lr": current_lr},
201
+ train_stats=self.train_stats,
202
+ valid_stats=stage_stats,
203
+ )
204
+ self.checkpointer.save_and_keep_only(
205
+ meta={"si-snr": stage_stats["si-snr"]}, min_keys=["si-snr"]
206
+ )
207
+ elif stage == sb.Stage.TEST:
208
+ self.hparams.train_logger.log_stats(
209
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
210
+ test_stats=stage_stats,
211
+ )
212
+
213
+ def add_speed_perturb(self, targets, targ_lens):
214
+ """Adds speed perturbation and random_shift to the input signals"""
215
+
216
+ min_len = -1
217
+ recombine = False
218
+
219
+ if self.hparams.use_speedperturb or self.hparams.use_rand_shift:
220
+ # Performing speed change (independently on each source)
221
+ new_targets = []
222
+ recombine = True
223
+
224
+ for i in range(targets.shape[-1]):
225
+ new_target = self.hparams.speed_perturb(targets[:, :, i])
226
+ new_targets.append(new_target)
227
+ if i == 0:
228
+ min_len = new_target.shape[-1]
229
+ else:
230
+ if new_target.shape[-1] < min_len:
231
+ min_len = new_target.shape[-1]
232
+
233
+ if self.hparams.use_rand_shift:
234
+ # Performing random_shift (independently on each source)
235
+ recombine = True
236
+ for i in range(targets.shape[-1]):
237
+ rand_shift = torch.randint(
238
+ self.hparams.min_shift, self.hparams.max_shift, (1,)
239
+ )
240
+ new_targets[i] = new_targets[i].to(self.device)
241
+ new_targets[i] = torch.roll(
242
+ new_targets[i], shifts=(rand_shift[0],), dims=1
243
+ )
244
+
245
+ # Re-combination
246
+ if recombine:
247
+ if self.hparams.use_speedperturb:
248
+ targets = torch.zeros(
249
+ targets.shape[0],
250
+ min_len,
251
+ targets.shape[-1],
252
+ device=targets.device,
253
+ dtype=torch.float,
254
+ )
255
+ for i, new_target in enumerate(new_targets):
256
+ targets[:, :, i] = new_targets[i][:, 0:min_len]
257
+
258
+ mix = targets.sum(-1)
259
+ return mix, targets
260
+
261
+ def cut_signals(self, mixture, targets):
262
+ """This function selects a random segment of a given length within the mixture.
263
+ The corresponding targets are selected accordingly"""
264
+ randstart = torch.randint(
265
+ 0,
266
+ 1 + max(0, mixture.shape[1] - self.hparams.training_signal_len),
267
+ (1,),
268
+ ).item()
269
+ targets = targets[
270
+ :, randstart : randstart + self.hparams.training_signal_len, :
271
+ ]
272
+ mixture = mixture[
273
+ :, randstart : randstart + self.hparams.training_signal_len
274
+ ]
275
+ return mixture, targets
276
+
277
+ def reset_layer_recursively(self, layer):
278
+ """Reinitializes the parameters of the neural networks"""
279
+ if hasattr(layer, "reset_parameters"):
280
+ layer.reset_parameters()
281
+ for child_layer in layer.modules():
282
+ if layer != child_layer:
283
+ self.reset_layer_recursively(child_layer)
284
+
285
+ def save_results(self, test_data):
286
+ """This script computes the SDR and SI-SNR metrics and saves
287
+ them into a csv file"""
288
+
289
+ # This package is required for SDR computation
290
+ from mir_eval.separation import bss_eval_sources
291
+
292
+ # Create folders where to store audio
293
+ save_file = os.path.join(self.hparams.output_folder, "test_results.csv")
294
+
295
+ # Variable init
296
+ all_sdrs = []
297
+ all_sdrs_i = []
298
+ all_sisnrs = []
299
+ all_sisnrs_i = []
300
+ csv_columns = ["snt_id", "sdr", "sdr_i", "si-snr", "si-snr_i"]
301
+
302
+ test_loader = sb.dataio.dataloader.make_dataloader(
303
+ test_data, **self.hparams.dataloader_opts
304
+ )
305
+
306
+ with open(save_file, "w", newline="", encoding="utf-8") as results_csv:
307
+ writer = csv.DictWriter(results_csv, fieldnames=csv_columns)
308
+ writer.writeheader()
309
+
310
+ # Loop over all test sentence
311
+ with tqdm(test_loader, dynamic_ncols=True) as t:
312
+ for i, batch in enumerate(t):
313
+ # Apply Separation
314
+ mixture, mix_len = batch.mix_sig
315
+ snt_id = batch.id
316
+ targets = [batch.s1_sig, batch.s2_sig]
317
+ if self.hparams.num_spks == 3:
318
+ targets.append(batch.s3_sig)
319
+
320
+ if self.hparams.num_spks == 4:
321
+ targets.append(batch.s3_sig)
322
+ targets.append(batch.s4_sig)
323
+
324
+ with torch.no_grad():
325
+ predictions, targets = self.compute_forward(
326
+ batch.mix_sig, targets, sb.Stage.TEST
327
+ )
328
+
329
+ # Compute SI-SNR
330
+ sisnr = self.compute_objectives(predictions, targets)
331
+
332
+ # Compute SI-SNR improvement
333
+ mixture_signal = torch.stack(
334
+ [mixture] * self.hparams.num_spks, dim=-1
335
+ )
336
+ mixture_signal = mixture_signal.to(targets.device)
337
+ sisnr_baseline = self.compute_objectives(
338
+ mixture_signal, targets
339
+ )
340
+ sisnr_i = sisnr - sisnr_baseline
341
+
342
+ # Compute SDR
343
+ sdr, _, _, _ = bss_eval_sources(
344
+ targets[0].t().cpu().numpy(),
345
+ predictions[0].t().detach().cpu().numpy(),
346
+ )
347
+
348
+ sdr_baseline, _, _, _ = bss_eval_sources(
349
+ targets[0].t().cpu().numpy(),
350
+ mixture_signal[0].t().detach().cpu().numpy(),
351
+ )
352
+
353
+ sdr_i = sdr.mean() - sdr_baseline.mean()
354
+
355
+ # Saving on a csv file
356
+ row = {
357
+ "snt_id": snt_id[0],
358
+ "sdr": sdr.mean(),
359
+ "sdr_i": sdr_i,
360
+ "si-snr": -sisnr.item(),
361
+ "si-snr_i": -sisnr_i.item(),
362
+ }
363
+ writer.writerow(row)
364
+
365
+ # Metric Accumulation
366
+ all_sdrs.append(sdr.mean())
367
+ all_sdrs_i.append(sdr_i.mean())
368
+ all_sisnrs.append(-sisnr.item())
369
+ all_sisnrs_i.append(-sisnr_i.item())
370
+
371
+ row = {
372
+ "snt_id": "avg",
373
+ "sdr": np.array(all_sdrs).mean(),
374
+ "sdr_i": np.array(all_sdrs_i).mean(),
375
+ "si-snr": np.array(all_sisnrs).mean(),
376
+ "si-snr_i": np.array(all_sisnrs_i).mean(),
377
+ }
378
+ writer.writerow(row)
379
+
380
+ logger.info("Mean SISNR is {}".format(np.array(all_sisnrs).mean()))
381
+ logger.info("Mean SISNRi is {}".format(np.array(all_sisnrs_i).mean()))
382
+ logger.info("Mean SDR is {}".format(np.array(all_sdrs).mean()))
383
+ logger.info("Mean SDRi is {}".format(np.array(all_sdrs_i).mean()))
384
+
385
+ def save_audio(self, snt_id, mixture, targets, predictions):
386
+ "saves the test audio (mixture, targets, and estimated sources) on disk"
387
+
388
+ # Create output folder
389
+ save_path = os.path.join(self.hparams.save_folder, "audio_results")
390
+ if not os.path.exists(save_path):
391
+ os.mkdir(save_path)
392
+
393
+ for ns in range(self.hparams.num_spks):
394
+ # Estimated source
395
+ signal = predictions[0, :, ns]
396
+ signal = signal / signal.abs().max()
397
+ save_file = os.path.join(
398
+ save_path, "item{}_source{}hat.wav".format(snt_id, ns + 1)
399
+ )
400
+ torchaudio.save(
401
+ save_file, signal.unsqueeze(0).cpu(), self.hparams.sample_rate
402
+ )
403
+
404
+ # Original source
405
+ signal = targets[0, :, ns]
406
+ signal = signal / signal.abs().max()
407
+ save_file = os.path.join(
408
+ save_path, "item{}_source{}.wav".format(snt_id, ns + 1)
409
+ )
410
+ torchaudio.save(
411
+ save_file, signal.unsqueeze(0).cpu(), self.hparams.sample_rate
412
+ )
413
+
414
+ # Mixture
415
+ signal = mixture[0][0, :]
416
+ signal = signal / signal.abs().max()
417
+ save_file = os.path.join(save_path, "item{}_mix.wav".format(snt_id))
418
+ torchaudio.save(
419
+ save_file, signal.unsqueeze(0).cpu(), self.hparams.sample_rate
420
+ )
421
+
422
+
423
+ def dataio_prep(hparams):
424
+ """Creates data processing pipeline"""
425
+
426
+ # 1. Define datasets
427
+ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
428
+ csv_path=hparams["train_data"],
429
+ replacements={"data_root": hparams["data_folder"]},
430
+ )
431
+
432
+ valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
433
+ csv_path=hparams["valid_data"],
434
+ replacements={"data_root": hparams["data_folder"]},
435
+ )
436
+
437
+ test_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
438
+ csv_path=hparams["test_data"],
439
+ replacements={"data_root": hparams["data_folder"]},
440
+ )
441
+
442
+ datasets = [train_data, valid_data, test_data]
443
+
444
+ # 2. Provide audio pipelines
445
+
446
+ @sb.utils.data_pipeline.takes("mix_wav")
447
+ @sb.utils.data_pipeline.provides("mix_sig")
448
+ def audio_pipeline_mix(mix_wav):
449
+ mix_sig = sb.dataio.dataio.read_audio(mix_wav)
450
+ return mix_sig
451
+
452
+ @sb.utils.data_pipeline.takes("s1_wav")
453
+ @sb.utils.data_pipeline.provides("s1_sig")
454
+ def audio_pipeline_s1(s1_wav):
455
+ s1_sig = sb.dataio.dataio.read_audio(s1_wav)
456
+ return s1_sig
457
+
458
+ @sb.utils.data_pipeline.takes("s2_wav")
459
+ @sb.utils.data_pipeline.provides("s2_sig")
460
+ def audio_pipeline_s2(s2_wav):
461
+ s2_sig = sb.dataio.dataio.read_audio(s2_wav)
462
+ return s2_sig
463
+
464
+ # --- 如果说话人 >= 3,定义第 3 路 ---
465
+ if hparams["num_spks"] >= 3:
466
+ @sb.utils.data_pipeline.takes("s3_wav")
467
+ @sb.utils.data_pipeline.provides("s3_sig")
468
+ def audio_pipeline_s3(s3_wav):
469
+ return sb.dataio.dataio.read_audio(s3_wav)
470
+
471
+ # --- 如果说话人 == 4,定义第 4 路 ---
472
+ if hparams["num_spks"] == 4:
473
+ @sb.utils.data_pipeline.takes("s4_wav")
474
+ @sb.utils.data_pipeline.provides("s4_sig")
475
+ def audio_pipeline_s4(s4_wav):
476
+ return sb.dataio.dataio.read_audio(s4_wav)
477
+
478
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_mix)
479
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s1)
480
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s2)
481
+ if hparams["num_spks"] == 3:
482
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s3)
483
+ sb.dataio.dataset.set_output_keys(
484
+ datasets, ["id", "mix_sig", "s1_sig", "s2_sig", "s3_sig"]
485
+ )
486
+ elif hparams["num_spks"] == 4 :
487
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s3)
488
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline_s4)
489
+ sb.dataio.dataset.set_output_keys(
490
+ datasets,
491
+ ["id", "mix_sig", "s1_sig", "s2_sig", "s3_sig", "s4_sig"],
492
+ )
493
+ else:
494
+ sb.dataio.dataset.set_output_keys(
495
+ datasets, ["id", "mix_sig", "s1_sig", "s2_sig"]
496
+ )
497
+
498
+ return train_data, valid_data, test_data
499
+
500
+
501
+ if __name__ == "__main__":
502
+ # Load hyperparameters file with command-line overrides
503
+ hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
504
+ with open(hparams_file, encoding="utf-8") as fin:
505
+ hparams = load_hyperpyyaml(fin, overrides)
506
+
507
+ # Initialize ddp (useful only for multi-GPU DDP training)
508
+ sb.utils.distributed.ddp_init_group(run_opts)
509
+
510
+ # Logger info
511
+ logger = get_logger(__name__)
512
+
513
+ # Create experiment directory
514
+ sb.create_experiment_directory(
515
+ experiment_directory=hparams["output_folder"],
516
+ hyperparams_to_save=hparams_file,
517
+ overrides=overrides,
518
+ )
519
+
520
+ # Update precision to bf16 if the device is CPU and precision is fp16
521
+ if run_opts.get("device") == "cpu" and hparams.get("precision") == "fp16":
522
+ hparams["precision"] = "bf16"
523
+
524
+ # Check if wsj0_tr is set with dynamic mixing
525
+ if hparams["dynamic_mixing"] and not os.path.exists(
526
+ hparams["base_folder_dm"]
527
+ ):
528
+ raise ValueError(
529
+ "Please, specify a valid base_folder_dm folder when using dynamic mixing"
530
+ )
531
+
532
+ # Data preparation
533
+ from prepare_data import prepare_wsjmix # noqa
534
+
535
+ # run_on_main(
536
+ # prepare_wsjmix,
537
+ # kwargs={
538
+ # "datapath": hparams["data_folder"],
539
+ # "savepath": hparams["save_folder"],
540
+ # "n_spks": hparams["num_spks"],
541
+ # "skip_prep": hparams["skip_prep"],
542
+ # "fs": hparams["sample_rate"],
543
+ # },
544
+ # )
545
+
546
+ # Create dataset objects
547
+ if hparams["dynamic_mixing"]:
548
+ from dynamic_mixing import dynamic_mix_data_prep
549
+
550
+ # if the base_folder for dm is not processed, preprocess them
551
+ if "processed" not in hparams["base_folder_dm"]:
552
+ # if the processed folder already exists we just use it otherwise we do the preprocessing
553
+ if not os.path.exists(
554
+ os.path.normpath(hparams["base_folder_dm"]) + "_processed"
555
+ ):
556
+ from preprocess_dynamic_mixing import resample_folder
557
+
558
+ print("Resampling the base folder")
559
+ run_on_main(
560
+ resample_folder,
561
+ kwargs={
562
+ "input_folder": hparams["base_folder_dm"],
563
+ "output_folder": os.path.normpath(
564
+ hparams["base_folder_dm"]
565
+ )
566
+ + "_processed",
567
+ "fs": hparams["sample_rate"],
568
+ "regex": "**/*.wav",
569
+ },
570
+ )
571
+ # adjust the base_folder_dm path
572
+ hparams["base_folder_dm"] = (
573
+ os.path.normpath(hparams["base_folder_dm"]) + "_processed"
574
+ )
575
+ else:
576
+ print(
577
+ "Using the existing processed folder on the same directory as base_folder_dm"
578
+ )
579
+ hparams["base_folder_dm"] = (
580
+ os.path.normpath(hparams["base_folder_dm"]) + "_processed"
581
+ )
582
+
583
+ # Collecting the hparams for dynamic batching
584
+ dm_hparams = {
585
+ "train_data": hparams["train_data"],
586
+ "data_folder": hparams["data_folder"],
587
+ "base_folder_dm": hparams["base_folder_dm"],
588
+ "sample_rate": hparams["sample_rate"],
589
+ "num_spks": hparams["num_spks"],
590
+ "training_signal_len": hparams["training_signal_len"],
591
+ "dataloader_opts": hparams["dataloader_opts"],
592
+ }
593
+ train_data = dynamic_mix_data_prep(dm_hparams)
594
+ _, valid_data, test_data = dataio_prep(hparams)
595
+ else:
596
+ train_data, valid_data, test_data = dataio_prep(hparams)
597
+
598
+ # Load pretrained model if pretrained_separator is present in the yaml
599
+ if "pretrained_separator" in hparams:
600
+ run_on_main(hparams["pretrained_separator"].collect_files)
601
+ hparams["pretrained_separator"].load_collected()
602
+
603
+ # Brain class initialization
604
+ separator = Separation(
605
+ modules=hparams["modules"],
606
+ opt_class=hparams["optimizer"],
607
+ hparams=hparams,
608
+ run_opts=run_opts,
609
+ checkpointer=hparams["checkpointer"],
610
+ )
611
+
612
+ # re-initialize the parameters if we don't use a pretrained model
613
+ if "pretrained_separator" not in hparams:
614
+ for module in separator.modules.values():
615
+ separator.reset_layer_recursively(module)
616
+
617
+ # Training
618
+ separator.fit(
619
+ separator.hparams.epoch_counter,
620
+ train_data,
621
+ valid_data,
622
+ train_loader_kwargs=hparams["dataloader_opts"],
623
+ valid_loader_kwargs=hparams["dataloader_opts"],
624
+ )
625
+
626
+ # Eval
627
+ separator.evaluate(test_data, min_key="si-snr")
628
+ separator.save_results(test_data)
Conv-Tasnet/results/convtasnet_4-mix/1234/train_log.txt ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch: 1, lr: 1.50e-04 - train si-snr: 2.76 - valid si-snr: 12.09
2
+ epoch: 2, lr: 1.50e-04 - train si-snr: 2.15 - valid si-snr: 11.84
3
+ epoch: 3, lr: 1.50e-04 - train si-snr: 1.96 - valid si-snr: 11.70
4
+ epoch: 4, lr: 1.50e-04 - train si-snr: 1.70 - valid si-snr: 11.63
5
+ epoch: 5, lr: 1.50e-04 - train si-snr: 1.58 - valid si-snr: 11.57
6
+ epoch: 6, lr: 1.50e-04 - train si-snr: 1.45 - valid si-snr: 11.55
7
+ epoch: 7, lr: 1.50e-04 - train si-snr: 1.33 - valid si-snr: 11.45
8
+ epoch: 8, lr: 1.50e-04 - train si-snr: 1.20 - valid si-snr: 11.33
9
+ epoch: 9, lr: 1.50e-04 - train si-snr: 1.10 - valid si-snr: 11.35
10
+ epoch: 10, lr: 1.50e-04 - train si-snr: 1.01 - valid si-snr: 11.30
11
+ epoch: 11, lr: 1.50e-04 - train si-snr: 9.25e-01 - valid si-snr: 11.33
12
+ epoch: 12, lr: 1.50e-04 - train si-snr: 7.83e-01 - valid si-snr: 11.16
13
+ epoch: 13, lr: 1.50e-04 - train si-snr: 7.61e-01 - valid si-snr: 11.19
14
+ epoch: 14, lr: 1.50e-04 - train si-snr: 6.87e-01 - valid si-snr: 11.13
15
+ epoch: 15, lr: 1.50e-04 - train si-snr: 6.31e-01 - valid si-snr: 11.13
16
+ epoch: 16, lr: 1.50e-04 - train si-snr: 5.54e-01 - valid si-snr: 11.10
17
+ epoch: 17, lr: 1.50e-04 - train si-snr: 4.47e-01 - valid si-snr: 11.02
18
+ epoch: 18, lr: 1.50e-04 - train si-snr: 4.65e-01 - valid si-snr: 11.04
19
+ epoch: 19, lr: 1.50e-04 - train si-snr: 3.32e-01 - valid si-snr: 11.01
20
+ epoch: 20, lr: 1.50e-04 - train si-snr: 3.27e-01 - valid si-snr: 10.95
21
+ epoch: 21, lr: 1.50e-04 - train si-snr: 2.78e-01 - valid si-snr: 10.97
22
+ epoch: 22, lr: 1.50e-04 - train si-snr: 2.18e-01 - valid si-snr: 10.88
23
+ epoch: 23, lr: 1.50e-04 - train si-snr: 1.74e-01 - valid si-snr: 10.87
24
+ epoch: 24, lr: 1.50e-04 - train si-snr: 1.03e-01 - valid si-snr: 10.95
25
+ epoch: 25, lr: 1.50e-04 - train si-snr: 6.04e-02 - valid si-snr: 10.84
26
+ epoch: 26, lr: 1.50e-04 - train si-snr: -2.94e-02 - valid si-snr: 10.79
27
+ epoch: 27, lr: 1.50e-04 - train si-snr: -5.32e-02 - valid si-snr: 10.77
28
+ epoch: 28, lr: 1.50e-04 - train si-snr: -5.68e-02 - valid si-snr: 10.74
29
+ epoch: 29, lr: 1.50e-04 - train si-snr: -1.04e-01 - valid si-snr: 10.79
30
+ epoch: 30, lr: 1.50e-04 - train si-snr: -1.57e-01 - valid si-snr: 10.73
31
+ epoch: 31, lr: 1.50e-04 - train si-snr: -1.64e-01 - valid si-snr: 10.67
32
+ epoch: 32, lr: 1.50e-04 - train si-snr: -2.11e-01 - valid si-snr: 10.71
33
+ epoch: 33, lr: 1.50e-04 - train si-snr: -2.48e-01 - valid si-snr: 10.73
34
+ epoch: 34, lr: 1.50e-04 - train si-snr: -2.79e-01 - valid si-snr: 10.69
35
+ epoch: 35, lr: 1.50e-04 - train si-snr: -3.55e-01 - valid si-snr: 10.69
36
+ epoch: 36, lr: 1.50e-04 - train si-snr: -3.32e-01 - valid si-snr: 10.64
37
+ epoch: 37, lr: 1.50e-04 - train si-snr: -3.97e-01 - valid si-snr: 10.63
38
+ epoch: 38, lr: 1.50e-04 - train si-snr: -4.11e-01 - valid si-snr: 10.71
39
+ epoch: 39, lr: 1.50e-04 - train si-snr: -4.18e-01 - valid si-snr: 10.56
40
+ epoch: 40, lr: 1.50e-04 - train si-snr: -4.74e-01 - valid si-snr: 10.55
41
+ epoch: 41, lr: 1.50e-04 - train si-snr: -4.71e-01 - valid si-snr: 10.52
42
+ epoch: 1, lr: 1.50e-04 - train si-snr: 6.31 - valid si-snr: 23.11
43
+ epoch: 2, lr: 1.50e-04 - train si-snr: 4.85 - valid si-snr: 23.05
44
+ epoch: 3, lr: 1.50e-04 - train si-snr: 4.79 - valid si-snr: 22.98
45
+ epoch: 4, lr: 1.50e-04 - train si-snr: 4.56 - valid si-snr: 22.79
46
+ epoch: 5, lr: 1.50e-04 - train si-snr: 4.28 - valid si-snr: 23.05
47
+ epoch: 6, lr: 1.50e-04 - train si-snr: 4.27 - valid si-snr: 22.88
48
+ epoch: 7, lr: 1.50e-04 - train si-snr: 4.11 - valid si-snr: 22.86
49
+ epoch: 8, lr: 1.50e-04 - train si-snr: 4.11 - valid si-snr: 22.80
50
+ epoch: 9, lr: 1.50e-04 - train si-snr: 3.96 - valid si-snr: 22.80
51
+ epoch: 10, lr: 1.50e-04 - train si-snr: 3.91 - valid si-snr: 22.75
52
+ epoch: 11, lr: 1.50e-04 - train si-snr: 3.76 - valid si-snr: 22.72
53
+ epoch: 12, lr: 1.50e-04 - train si-snr: 3.82 - valid si-snr: 22.69
54
+ epoch: 13, lr: 1.50e-04 - train si-snr: 3.71 - valid si-snr: 22.86
55
+ epoch: 14, lr: 1.50e-04 - train si-snr: 3.64 - valid si-snr: 22.71
56
+ epoch: 15, lr: 1.50e-04 - train si-snr: 3.59 - valid si-snr: 22.89
57
+ epoch: 16, lr: 1.50e-04 - train si-snr: 3.39 - valid si-snr: 22.79
58
+ epoch: 17, lr: 1.50e-04 - train si-snr: 3.30 - valid si-snr: 22.69
59
+ epoch: 18, lr: 1.50e-04 - train si-snr: 3.29 - valid si-snr: 22.82
60
+ epoch: 19, lr: 1.50e-04 - train si-snr: 3.32 - valid si-snr: 22.75
61
+ epoch: 20, lr: 1.50e-04 - train si-snr: 3.14 - valid si-snr: 22.49
62
+ epoch: 21, lr: 1.50e-04 - train si-snr: 3.11 - valid si-snr: 22.83
63
+ epoch: 22, lr: 1.50e-04 - train si-snr: 3.12 - valid si-snr: 22.69
64
+ epoch: 23, lr: 1.50e-04 - train si-snr: 2.93 - valid si-snr: 22.66
65
+ epoch: 24, lr: 1.50e-04 - train si-snr: 2.96 - valid si-snr: 22.72
66
+ epoch: 25, lr: 1.50e-04 - train si-snr: 2.96 - valid si-snr: 22.83
67
+ epoch: 26, lr: 1.50e-04 - train si-snr: 2.88 - valid si-snr: 22.61
68
+ epoch: 27, lr: 1.50e-04 - train si-snr: 2.86 - valid si-snr: 22.83
69
+ epoch: 28, lr: 1.50e-04 - train si-snr: 2.80 - valid si-snr: 22.67
70
+ epoch: 29, lr: 1.50e-04 - train si-snr: 2.73 - valid si-snr: 22.67
71
+ epoch: 30, lr: 1.50e-04 - train si-snr: 2.65 - valid si-snr: 22.62
72
+ epoch: 31, lr: 1.50e-04 - train si-snr: 2.62 - valid si-snr: 22.63
73
+ epoch: 32, lr: 1.50e-04 - train si-snr: 2.61 - valid si-snr: 22.61
74
+ epoch: 33, lr: 1.50e-04 - train si-snr: 2.44 - valid si-snr: 22.55
75
+ epoch: 34, lr: 1.50e-04 - train si-snr: 2.50 - valid si-snr: 22.55
76
+ epoch: 35, lr: 1.50e-04 - train si-snr: 2.47 - valid si-snr: 22.60
77
+ epoch: 36, lr: 1.50e-04 - train si-snr: 2.44 - valid si-snr: 22.66
78
+ epoch: 37, lr: 1.50e-04 - train si-snr: 2.24 - valid si-snr: 22.64
79
+ epoch: 38, lr: 1.50e-04 - train si-snr: 2.28 - valid si-snr: 22.66
80
+ epoch: 39, lr: 1.50e-04 - train si-snr: 2.15 - valid si-snr: 22.62
81
+ epoch: 40, lr: 1.50e-04 - train si-snr: 2.19 - valid si-snr: 22.48
82
+ epoch: 41, lr: 1.50e-04 - train si-snr: 2.26 - valid si-snr: 22.66
83
+ epoch: 42, lr: 1.50e-04 - train si-snr: 2.09 - valid si-snr: 22.57
84
+ epoch: 43, lr: 1.50e-04 - train si-snr: 2.15 - valid si-snr: 22.47
85
+ epoch: 44, lr: 1.50e-04 - train si-snr: 2.00 - valid si-snr: 22.63
86
+ epoch: 45, lr: 1.50e-04 - train si-snr: 2.13 - valid si-snr: 22.52
87
+ epoch: 46, lr: 1.50e-04 - train si-snr: 2.00 - valid si-snr: 22.57
88
+ epoch: 47, lr: 1.50e-04 - train si-snr: 1.90 - valid si-snr: 22.50
89
+ epoch: 48, lr: 1.50e-04 - train si-snr: 1.89 - valid si-snr: 22.49
90
+ epoch: 49, lr: 1.50e-04 - train si-snr: 1.94 - valid si-snr: 22.54
91
+ epoch: 50, lr: 1.50e-04 - train si-snr: 1.89 - valid si-snr: 22.50
92
+ epoch: 51, lr: 1.50e-04 - train si-snr: 1.85 - valid si-snr: 22.55
93
+ epoch: 52, lr: 1.50e-04 - train si-snr: 1.66 - valid si-snr: 22.51
94
+ epoch: 53, lr: 1.50e-04 - train si-snr: 1.65 - valid si-snr: 22.52
95
+ epoch: 54, lr: 1.50e-04 - train si-snr: 1.77 - valid si-snr: 22.45
96
+ epoch: 55, lr: 1.50e-04 - train si-snr: 1.62 - valid si-snr: 22.45
97
+ epoch: 56, lr: 1.50e-04 - train si-snr: 1.52 - valid si-snr: 22.42
98
+ epoch: 57, lr: 1.50e-04 - train si-snr: 1.53 - valid si-snr: 22.39
99
+ epoch: 58, lr: 1.50e-04 - train si-snr: 1.52 - valid si-snr: 22.40
100
+ epoch: 59, lr: 1.50e-04 - train si-snr: 1.55 - valid si-snr: 22.43
101
+ epoch: 60, lr: 1.50e-04 - train si-snr: 1.64 - valid si-snr: 22.43
102
+ epoch: 61, lr: 1.50e-04 - train si-snr: 1.42 - valid si-snr: 22.38
103
+ epoch: 62, lr: 1.50e-04 - train si-snr: 1.50 - valid si-snr: 22.29
104
+ epoch: 63, lr: 1.50e-04 - train si-snr: 1.34 - valid si-snr: 22.51
105
+ epoch: 64, lr: 1.50e-04 - train si-snr: 1.25 - valid si-snr: 22.55
106
+ epoch: 65, lr: 1.50e-04 - train si-snr: 1.43 - valid si-snr: 22.35
107
+ epoch: 66, lr: 1.50e-04 - train si-snr: 1.33 - valid si-snr: 22.54
108
+ epoch: 67, lr: 1.50e-04 - train si-snr: 1.35 - valid si-snr: 22.44
109
+ epoch: 68, lr: 1.50e-04 - train si-snr: 1.35 - valid si-snr: 22.33
110
+ epoch: 69, lr: 1.50e-04 - train si-snr: 1.13 - valid si-snr: 22.38
111
+ epoch: 70, lr: 1.50e-04 - train si-snr: 1.18 - valid si-snr: 22.37
112
+ epoch: 71, lr: 1.50e-04 - train si-snr: 1.04 - valid si-snr: 22.35
113
+ epoch: 72, lr: 1.50e-04 - train si-snr: 1.24 - valid si-snr: 22.49
114
+ epoch: 73, lr: 1.50e-04 - train si-snr: 1.25 - valid si-snr: 22.35
115
+ epoch: 74, lr: 1.50e-04 - train si-snr: 1.07 - valid si-snr: 22.37
116
+ epoch: 75, lr: 1.50e-04 - train si-snr: 1.04 - valid si-snr: 22.37
117
+ epoch: 76, lr: 1.50e-04 - train si-snr: 1.11 - valid si-snr: 22.48
118
+ epoch: 77, lr: 1.50e-04 - train si-snr: 1.03 - valid si-snr: 22.46
119
+ epoch: 78, lr: 1.50e-04 - train si-snr: 9.65e-01 - valid si-snr: 22.31
120
+ epoch: 79, lr: 1.50e-04 - train si-snr: 1.06 - valid si-snr: 22.34
121
+ epoch: 80, lr: 1.50e-04 - train si-snr: 1.03 - valid si-snr: 22.32
122
+ epoch: 81, lr: 1.50e-04 - train si-snr: 8.12e-01 - valid si-snr: 22.32
123
+ epoch: 82, lr: 1.50e-04 - train si-snr: 8.76e-01 - valid si-snr: 22.33
124
+ epoch: 83, lr: 1.50e-04 - train si-snr: 8.91e-01 - valid si-snr: 22.32
125
+ epoch: 84, lr: 1.50e-04 - train si-snr: 9.11e-01 - valid si-snr: 22.34
126
+ epoch: 85, lr: 1.50e-04 - train si-snr: 7.24e-01 - valid si-snr: 22.39
127
+ epoch: 86, lr: 1.50e-04 - train si-snr: 7.65e-01 - valid si-snr: 22.34
128
+ epoch: 87, lr: 1.50e-04 - train si-snr: 7.10e-01 - valid si-snr: 22.29
129
+ epoch: 88, lr: 1.50e-04 - train si-snr: 7.65e-01 - valid si-snr: 22.42
130
+ epoch: 89, lr: 1.50e-04 - train si-snr: 7.09e-01 - valid si-snr: 22.35
131
+ epoch: 90, lr: 1.50e-04 - train si-snr: 8.13e-01 - valid si-snr: 22.38
132
+ epoch: 91, lr: 7.50e-05 - train si-snr: 5.81e-01 - valid si-snr: 22.24
133
+ epoch: 92, lr: 7.50e-05 - train si-snr: 3.71e-01 - valid si-snr: 22.33
134
+ epoch: 93, lr: 7.50e-05 - train si-snr: 3.21e-01 - valid si-snr: 22.33
135
+ epoch: 94, lr: 7.50e-05 - train si-snr: 3.48e-01 - valid si-snr: 22.29
136
+ epoch: 95, lr: 3.75e-05 - train si-snr: 4.08e-01 - valid si-snr: 22.34
137
+ epoch: 96, lr: 3.75e-05 - train si-snr: 2.29e-01 - valid si-snr: 22.33
138
+ epoch: 97, lr: 3.75e-05 - train si-snr: 2.27e-01 - valid si-snr: 22.29
139
+ epoch: 98, lr: 1.87e-05 - train si-snr: 1.28e-01 - valid si-snr: 22.27
140
+ epoch: 99, lr: 1.87e-05 - train si-snr: 3.17e-02 - valid si-snr: 22.27
141
+ epoch: 100, lr: 1.87e-05 - train si-snr: 6.84e-02 - valid si-snr: 22.24
142
+ epoch: 101, lr: 1.87e-05 - train si-snr: 6.90e-02 - valid si-snr: 22.25
143
+ epoch: 102, lr: 1.87e-05 - train si-snr: 1.53e-01 - valid si-snr: 22.28
144
+ epoch: 103, lr: 1.87e-05 - train si-snr: 4.23e-02 - valid si-snr: 22.28
145
+ epoch: 104, lr: 9.37e-06 - train si-snr: 7.48e-02 - valid si-snr: 22.24
146
+ epoch: 105, lr: 9.37e-06 - train si-snr: 8.28e-02 - valid si-snr: 22.27
147
+ epoch: 106, lr: 9.37e-06 - train si-snr: -1.19e-01 - valid si-snr: 22.26
148
+ epoch: 107, lr: 9.37e-06 - train si-snr: 2.27e-02 - valid si-snr: 22.26
149
+ epoch: 108, lr: 4.69e-06 - train si-snr: -9.19e-02 - valid si-snr: 22.24
150
+ epoch: 109, lr: 4.69e-06 - train si-snr: -1.86e-02 - valid si-snr: 22.26
151
+ epoch: 110, lr: 4.69e-06 - train si-snr: -1.29e-01 - valid si-snr: 22.26
152
+ epoch: 111, lr: 2.34e-06 - train si-snr: -1.28e-01 - valid si-snr: 22.26
153
+ epoch: 112, lr: 2.34e-06 - train si-snr: 1.96e-02 - valid si-snr: 22.26
154
+ epoch: 113, lr: 2.34e-06 - train si-snr: -8.82e-02 - valid si-snr: 22.26
155
+ epoch: 114, lr: 1.17e-06 - train si-snr: -2.95e-02 - valid si-snr: 22.25
156
+ epoch: 115, lr: 1.17e-06 - train si-snr: 1.44e-02 - valid si-snr: 22.26
157
+ epoch: 116, lr: 1.17e-06 - train si-snr: -2.01e-02 - valid si-snr: 22.25
158
+ epoch: 117, lr: 5.86e-07 - train si-snr: -6.14e-02 - valid si-snr: 22.25
159
+ epoch: 118, lr: 5.86e-07 - train si-snr: 1.49e-02 - valid si-snr: 22.25
160
+ epoch: 119, lr: 5.86e-07 - train si-snr: -2.11e-02 - valid si-snr: 22.25
161
+ epoch: 120, lr: 2.93e-07 - train si-snr: -8.56e-02 - valid si-snr: 22.25
162
+ epoch: 121, lr: 2.93e-07 - train si-snr: 3.46e-02 - valid si-snr: 22.25
163
+ epoch: 122, lr: 2.93e-07 - train si-snr: -4.48e-02 - valid si-snr: 22.26
164
+ epoch: 123, lr: 1.46e-07 - train si-snr: -4.78e-02 - valid si-snr: 22.25
165
+ epoch: 124, lr: 1.46e-07 - train si-snr: 4.87e-02 - valid si-snr: 22.26
166
+ epoch: 125, lr: 1.46e-07 - train si-snr: -8.55e-02 - valid si-snr: 22.25
167
+ epoch: 126, lr: 7.32e-08 - train si-snr: 4.56e-02 - valid si-snr: 22.25
168
+ epoch: 127, lr: 7.32e-08 - train si-snr: -7.29e-02 - valid si-snr: 22.25
169
+ epoch: 128, lr: 7.32e-08 - train si-snr: -4.80e-02 - valid si-snr: 22.26
170
+ epoch: 129, lr: 3.66e-08 - train si-snr: -6.66e-02 - valid si-snr: 22.26
171
+ epoch: 130, lr: 3.66e-08 - train si-snr: 6.62e-03 - valid si-snr: 22.26
172
+ epoch: 131, lr: 3.66e-08 - train si-snr: -1.94e-02 - valid si-snr: 22.26
173
+ epoch: 132, lr: 1.83e-08 - train si-snr: 1.16e-02 - valid si-snr: 22.26
174
+ epoch: 133, lr: 1.83e-08 - train si-snr: -1.09e-01 - valid si-snr: 22.26
175
+ epoch: 134, lr: 1.83e-08 - train si-snr: -1.16e-01 - valid si-snr: 22.26
176
+ epoch: 135, lr: 1.00e-08 - train si-snr: 2.68e-02 - valid si-snr: 22.26
177
+ epoch: 136, lr: 1.00e-08 - train si-snr: 3.10e-03 - valid si-snr: 22.26
178
+ epoch: 137, lr: 1.00e-08 - train si-snr: -4.31e-02 - valid si-snr: 22.26
179
+ epoch: 138, lr: 1.00e-08 - train si-snr: 7.30e-02 - valid si-snr: 22.26
180
+ epoch: 139, lr: 1.00e-08 - train si-snr: -9.77e-02 - valid si-snr: 22.26
181
+ epoch: 140, lr: 1.00e-08 - train si-snr: -1.41e-01 - valid si-snr: 22.26
182
+ epoch: 141, lr: 1.00e-08 - train si-snr: -1.82e-02 - valid si-snr: 22.26
183
+ epoch: 142, lr: 1.00e-08 - train si-snr: -5.03e-02 - valid si-snr: 22.26
184
+ epoch: 143, lr: 1.00e-08 - train si-snr: -9.63e-02 - valid si-snr: 22.26
185
+ epoch: 144, lr: 1.00e-08 - train si-snr: -1.29e-02 - valid si-snr: 22.26
186
+ epoch: 145, lr: 1.00e-08 - train si-snr: -3.77e-02 - valid si-snr: 22.26
187
+ epoch: 146, lr: 1.00e-08 - train si-snr: -1.36e-01 - valid si-snr: 22.26
188
+ epoch: 147, lr: 1.00e-08 - train si-snr: -1.02e-01 - valid si-snr: 22.26
189
+ epoch: 148, lr: 1.00e-08 - train si-snr: 1.05e-01 - valid si-snr: 22.26
190
+ epoch: 149, lr: 1.00e-08 - train si-snr: -1.08e-01 - valid si-snr: 22.26
191
+ epoch: 150, lr: 1.00e-08 - train si-snr: 1.28e-02 - valid si-snr: 22.26
192
+ epoch: 151, lr: 1.00e-08 - train si-snr: -8.94e-02 - valid si-snr: 22.26
193
+ epoch: 152, lr: 1.00e-08 - train si-snr: -9.64e-02 - valid si-snr: 22.26
194
+ epoch: 153, lr: 1.00e-08 - train si-snr: -1.32e-01 - valid si-snr: 22.26
195
+ epoch: 154, lr: 1.00e-08 - train si-snr: 2.86e-02 - valid si-snr: 22.26
196
+ epoch: 155, lr: 1.00e-08 - train si-snr: -2.50e-02 - valid si-snr: 22.26
197
+ epoch: 156, lr: 1.00e-08 - train si-snr: -1.44e-02 - valid si-snr: 22.26
198
+ epoch: 157, lr: 1.00e-08 - train si-snr: 9.09e-02 - valid si-snr: 22.26
199
+ epoch: 158, lr: 1.00e-08 - train si-snr: 6.12e-03 - valid si-snr: 22.26
200
+ epoch: 159, lr: 1.00e-08 - train si-snr: -3.80e-02 - valid si-snr: 22.26
201
+ epoch: 160, lr: 1.00e-08 - train si-snr: 4.51e-02 - valid si-snr: 22.26
202
+ epoch: 161, lr: 1.00e-08 - train si-snr: -2.98e-02 - valid si-snr: 22.26
203
+ epoch: 162, lr: 1.00e-08 - train si-snr: -2.20e-03 - valid si-snr: 22.26
204
+ epoch: 163, lr: 1.00e-08 - train si-snr: -1.64e-01 - valid si-snr: 22.26
205
+ epoch: 164, lr: 1.00e-08 - train si-snr: -3.20e-02 - valid si-snr: 22.26
206
+ epoch: 165, lr: 1.00e-08 - train si-snr: 3.47e-03 - valid si-snr: 22.26
207
+ epoch: 166, lr: 1.00e-08 - train si-snr: -8.60e-02 - valid si-snr: 22.26
208
+ epoch: 167, lr: 1.00e-08 - train si-snr: 6.45e-03 - valid si-snr: 22.26
209
+ epoch: 168, lr: 1.00e-08 - train si-snr: 1.17e-02 - valid si-snr: 22.26
210
+ epoch: 169, lr: 1.00e-08 - train si-snr: -4.05e-02 - valid si-snr: 22.26
211
+ epoch: 170, lr: 1.00e-08 - train si-snr: -1.26e-01 - valid si-snr: 22.26
212
+ epoch: 171, lr: 1.00e-08 - train si-snr: -1.06e-01 - valid si-snr: 22.26
213
+ epoch: 172, lr: 1.00e-08 - train si-snr: -1.26e-01 - valid si-snr: 22.26
214
+ epoch: 173, lr: 1.00e-08 - train si-snr: -7.41e-02 - valid si-snr: 22.26
215
+ epoch: 174, lr: 1.00e-08 - train si-snr: 1.57e-02 - valid si-snr: 22.26
216
+ epoch: 175, lr: 1.00e-08 - train si-snr: -1.48e-02 - valid si-snr: 22.26
217
+ epoch: 176, lr: 1.00e-08 - train si-snr: 6.87e-02 - valid si-snr: 22.26
218
+ epoch: 177, lr: 1.00e-08 - train si-snr: -6.77e-02 - valid si-snr: 22.26
219
+ epoch: 178, lr: 1.00e-08 - train si-snr: -1.75e-01 - valid si-snr: 22.26
220
+ epoch: 179, lr: 1.00e-08 - train si-snr: -8.73e-02 - valid si-snr: 22.26
221
+ epoch: 180, lr: 1.00e-08 - train si-snr: -7.13e-02 - valid si-snr: 22.26
222
+ epoch: 181, lr: 1.00e-08 - train si-snr: -1.28e-01 - valid si-snr: 22.26
223
+ epoch: 182, lr: 1.00e-08 - train si-snr: 2.53e-02 - valid si-snr: 22.26
224
+ epoch: 183, lr: 1.00e-08 - train si-snr: 5.30e-02 - valid si-snr: 22.26
225
+ epoch: 184, lr: 1.00e-08 - train si-snr: -6.50e-02 - valid si-snr: 22.26
226
+ epoch: 185, lr: 1.00e-08 - train si-snr: -7.48e-02 - valid si-snr: 22.26
227
+ epoch: 186, lr: 1.00e-08 - train si-snr: -6.33e-02 - valid si-snr: 22.26
228
+ epoch: 187, lr: 1.00e-08 - train si-snr: -5.01e-02 - valid si-snr: 22.26
229
+ epoch: 188, lr: 1.00e-08 - train si-snr: -2.82e-03 - valid si-snr: 22.26
230
+ epoch: 189, lr: 1.00e-08 - train si-snr: -1.37e-01 - valid si-snr: 22.26
231
+ epoch: 190, lr: 1.00e-08 - train si-snr: -3.86e-02 - valid si-snr: 22.26
232
+ epoch: 191, lr: 1.00e-08 - train si-snr: -4.23e-02 - valid si-snr: 22.26
233
+ epoch: 192, lr: 1.00e-08 - train si-snr: -7.80e-02 - valid si-snr: 22.26
234
+ epoch: 193, lr: 1.00e-08 - train si-snr: -2.90e-02 - valid si-snr: 22.26
235
+ epoch: 194, lr: 1.00e-08 - train si-snr: -1.21e-01 - valid si-snr: 22.26
236
+ epoch: 195, lr: 1.00e-08 - train si-snr: 8.91e-03 - valid si-snr: 22.26
237
+ epoch: 196, lr: 1.00e-08 - train si-snr: -5.28e-02 - valid si-snr: 22.26
238
+ epoch: 197, lr: 1.00e-08 - train si-snr: 9.40e-02 - valid si-snr: 22.26
239
+ epoch: 198, lr: 1.00e-08 - train si-snr: -4.55e-02 - valid si-snr: 22.26
240
+ epoch: 199, lr: 1.00e-08 - train si-snr: -6.24e-02 - valid si-snr: 22.26
241
+ epoch: 200, lr: 1.00e-08 - train si-snr: 5.69e-03 - valid si-snr: 22.26
242
+ Epoch loaded: 104 - test si-snr: 20.22
Sepformer/results/sepformer_4mix/1234/env.log ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SpeechBrain system description
2
+ ==============================
3
+ Python version:
4
+ 3.11.13 (main, Jun 5 2025, 13:12:00) [GCC 11.2.0]
5
+ ==============================
6
+ Installed Python packages:
7
+ black==24.3.0
8
+ certifi==2025.6.15
9
+ cfgv==3.4.0
10
+ charset-normalizer==3.4.2
11
+ click==8.1.7
12
+ distlib==0.3.9
13
+ docstring_parser_fork==0.0.12
14
+ filelock==3.18.0
15
+ flake8==7.0.0
16
+ fsspec==2025.5.1
17
+ future==1.0.0
18
+ hf-xet==1.1.5
19
+ huggingface-hub==0.33.1
20
+ HyperPyYAML==1.2.2
21
+ identify==2.6.12
22
+ idna==3.10
23
+ iniconfig==2.1.0
24
+ isort==5.13.2
25
+ Jinja2==3.1.6
26
+ joblib==1.5.1
27
+ MarkupSafe==3.0.2
28
+ mccabe==0.7.0
29
+ mir_eval==0.6
30
+ mpmath==1.3.0
31
+ mypy_extensions==1.1.0
32
+ networkx==3.5
33
+ nodeenv==1.9.1
34
+ numpy==2.3.1
35
+ nvidia-cublas-cu12==12.6.4.1
36
+ nvidia-cuda-cupti-cu12==12.6.80
37
+ nvidia-cuda-nvrtc-cu12==12.6.77
38
+ nvidia-cuda-runtime-cu12==12.6.77
39
+ nvidia-cudnn-cu12==9.5.1.17
40
+ nvidia-cufft-cu12==11.3.0.4
41
+ nvidia-cufile-cu12==1.11.1.6
42
+ nvidia-curand-cu12==10.3.7.77
43
+ nvidia-cusolver-cu12==11.7.1.2
44
+ nvidia-cusparse-cu12==12.5.4.2
45
+ nvidia-cusparselt-cu12==0.6.3
46
+ nvidia-nccl-cu12==2.26.2
47
+ nvidia-nvjitlink-cu12==12.6.85
48
+ nvidia-nvtx-cu12==12.6.77
49
+ packaging==25.0
50
+ pandas==2.3.0
51
+ pathspec==0.12.1
52
+ platformdirs==4.3.8
53
+ pluggy==1.6.0
54
+ pre_commit==4.2.0
55
+ pycodestyle==2.11.0
56
+ pydoclint==0.4.1
57
+ pyflakes==3.2.0
58
+ pygtrie==2.5.0
59
+ pyloudnorm==0.1.1
60
+ pytest==7.4.0
61
+ python-dateutil==2.9.0.post0
62
+ pytz==2025.2
63
+ PyYAML==6.0.2
64
+ regex==2024.11.6
65
+ requests==2.32.4
66
+ ruamel.yaml==0.18.14
67
+ ruamel.yaml.clib==0.2.12
68
+ safetensors==0.5.3
69
+ scipy==1.16.0
70
+ sentencepiece==0.2.0
71
+ six==1.17.0
72
+ speechbrain==1.0.3
73
+ sympy==1.14.0
74
+ tokenizers==0.21.2
75
+ torch==2.7.1
76
+ torchaudio==2.7.1
77
+ tqdm==4.67.1
78
+ transformers==4.53.0
79
+ triton==3.3.1
80
+ typing_extensions==4.14.0
81
+ tzdata==2025.2
82
+ urllib3==2.5.0
83
+ virtualenv==20.31.2
84
+ yamllint==1.35.1
85
+ ==============================
86
+ Git revision:
87
+ 476ac4f
88
+ ==============================
89
+ CUDA version:
90
+ 12.6
Sepformer/results/sepformer_4mix/1234/hyperparams.yaml ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated 2025-06-27 from:
2
+ # /home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/hparams/sepformer_4mix.yaml
3
+ # yamllint disable
4
+ # ################################
5
+ # Model: SepFormer for source separation
6
+ # https://arxiv.org/abs/2010.13154
7
+ # ################################
8
+ #
9
+ # Basic parameters
10
+ # Seed needs to be set at top of yaml, before objects with parameters are made
11
+ #
12
+ seed: 1234
13
+ __set_seed: !apply:speechbrain.utils.seed_everything [1234]
14
+
15
+ # Data params
16
+
17
+ # e.g. '/yourpath/Libri3Mix/train-clean-360/'
18
+ # the data folder is needed even if dynamic mixing is applied
19
+ data_folder: /data/
20
+
21
+ # This is needed only if dynamic mixing is applied
22
+ base_folder_dm: /yourpath/
23
+
24
+ experiment_name: sepformer_4mix
25
+ output_folder: results/sepformer_4mix/1234
26
+ train_log: results/sepformer_4mix/1234/train_log.txt
27
+ save_folder: results/sepformer_4mix/1234/save
28
+ train_data: results/sepformer_4mix/1234/save/record_tr.csv
29
+ valid_data: results/sepformer_4mix/1234/save/record_val.csv
30
+ test_data: results/sepformer_4mix/1234/save/test_data.csv
31
+ skip_prep: false
32
+
33
+ ckpt_interval_minutes: 60
34
+
35
+ # Experiment params
36
+ precision: fp16 # bf16, fp16 or fp32 # Set it to True for mixed precision
37
+ num_spks: 4
38
+ noprogressbar: false
39
+ save_audio: false # Save estimated sources on disk
40
+ sample_rate: 16000
41
+
42
+ ####################### Training Parameters ####################################
43
+ N_epochs: 200
44
+ batch_size: 1
45
+ lr: 0.00015
46
+ clip_grad_norm: 5
47
+ loss_upper_lim: 999999 # this is the upper limit for an acceptable loss
48
+ # if True, the training sequences are cut to a specified length
49
+ limit_training_signal_len: true
50
+ # this is the length of sequences if we choose to limit
51
+ # the signal length of training sequences
52
+ training_signal_len: 64000000
53
+
54
+ # Set it to True to dynamically create mixtures at training time
55
+ dynamic_mixing: false
56
+ use_wham_noise: false
57
+
58
+ # Parameters for data augmentation
59
+ use_wavedrop: false
60
+ use_speedperturb: true
61
+ use_rand_shift: false
62
+ min_shift: -8000
63
+ max_shift: 8000
64
+
65
+ # Speed perturbation
66
+ speed_changes: &id001 [95, 100, 105]
67
+
68
+ # Frequency drop: randomly drops a number of frequency bands to zero.
69
+ speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
70
+ orig_freq: 16000
71
+ speeds: *id001
72
+ drop_freq_low: 0 # Min frequency band dropout probability
73
+ drop_freq_high: 1 # Max frequency band dropout probability
74
+ drop_freq_count_low: 1 # Min number of frequency bands to drop
75
+ drop_freq_count_high: 3 # Max number of frequency bands to drop
76
+ drop_freq_width: 0.05 # Width of frequency bands to drop
77
+
78
+ drop_freq: !new:speechbrain.augment.time_domain.DropFreq
79
+ drop_freq_low: 0
80
+ drop_freq_high: 1
81
+ drop_freq_count_low: 1
82
+ drop_freq_count_high: 3
83
+ drop_freq_width: 0.05
84
+
85
+ # Time drop: randomly drops a number of temporal chunks.
86
+ drop_chunk_count_low: 1 # Min number of audio chunks to drop
87
+ drop_chunk_count_high: 5 # Max number of audio chunks to drop
88
+ drop_chunk_length_low: 1000 # Min length of audio chunks to drop
89
+ drop_chunk_length_high: 2000 # Max length of audio chunks to drop
90
+
91
+ drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
92
+ drop_length_low: 1000
93
+ drop_length_high: 2000
94
+ drop_count_low: 1
95
+ drop_count_high: 5
96
+
97
+ # loss thresholding -- this thresholds the training loss
98
+ threshold_byloss: true
99
+ threshold: -30
100
+
101
+ # Encoder parameters
102
+ N_encoder_out: 256
103
+ out_channels: 256
104
+ kernel_size: 32
105
+ kernel_stride: 16
106
+ d_ffn: 1024
107
+
108
+ # Dataloader options
109
+ dataloader_opts:
110
+ batch_size: 1
111
+ num_workers: 3
112
+
113
+
114
+ # Specifying the network
115
+ Encoder: &id004 !new:speechbrain.lobes.models.dual_path.Encoder
116
+ kernel_size: 32
117
+ out_channels: 256
118
+
119
+
120
+ SBtfintra: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
121
+ num_layers: 8
122
+ d_model: 256
123
+ nhead: 8
124
+ d_ffn: 1024
125
+ dropout: 0
126
+ use_positional_encoding: true
127
+ norm_before: true
128
+
129
+ SBtfinter: &id003 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
130
+ num_layers: 8
131
+ d_model: 256
132
+ nhead: 8
133
+ d_ffn: 1024
134
+ dropout: 0
135
+ use_positional_encoding: true
136
+ norm_before: true
137
+
138
+ MaskNet: &id006 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
139
+
140
+ num_spks: 4
141
+ in_channels: 256
142
+ out_channels: 256
143
+ num_layers: 2
144
+ K: 250
145
+ intra_model: *id002
146
+ inter_model: *id003
147
+ norm: ln
148
+ linear_layer_after_inter_intra: false
149
+ skip_around_intra: true
150
+
151
+ Decoder: &id005 !new:speechbrain.lobes.models.dual_path.Decoder
152
+ in_channels: 256
153
+ out_channels: 1
154
+ kernel_size: 32
155
+ stride: 16
156
+ bias: false
157
+
158
+ optimizer: !name:torch.optim.Adam
159
+ lr: 0.00015
160
+ weight_decay: 0
161
+
162
+ loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
163
+
164
+ lr_scheduler: !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
165
+ factor: 0.5
166
+ patience: 2
167
+ dont_halve_until_epoch: 5
168
+
169
+ epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter
170
+ # lr_scheduler: !ref <lr_scheduler>
171
+
172
+ limit: 200
173
+
174
+ modules:
175
+ encoder: *id004
176
+ decoder: *id005
177
+ masknet: *id006
178
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
179
+ checkpoints_dir: results/sepformer_4mix/1234/save
180
+ recoverables:
181
+ encoder: *id004
182
+ decoder: *id005
183
+ masknet: *id006
184
+ counter: *id007
185
+ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
186
+ save_file: results/sepformer_4mix/1234/train_log.txt
187
+
188
+ # # If you do not want to use the pretrained separator you can simply delete pretrained_separator field.
189
+ # pretrained_separator: !new:speechbrain.utils.parameter_transfer.Pretrainer
190
+ # collect_in: !ref <save_folder>
191
+ # loadables:
192
+ # encoder: !ref <Encoder>
193
+ # decoder: !ref <Decoder>
194
+ # masknet: !ref <MaskNet>
195
+ # paths:
196
+ # encoder: speechbrain/sepformer-wsj03mix/encoder.ckpt
197
+ # decoder: speechbrain/sepformer-wsj03mix/decoder.ckpt
198
+ # masknet: speechbrain/sepformer-wsj03mix/masknet.ckpt
Sepformer/results/sepformer_4mix/1234/log.txt ADDED
@@ -0,0 +1,762 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-06-27 17:13:10,582 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
2
+ 2025-06-27 17:13:10,583 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
3
+ 2025-06-27 17:13:10,583 - speechbrain.core - INFO - Beginning experiment!
4
+ 2025-06-27 17:13:10,583 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
5
+ 2025-06-27 17:13:10,831 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
6
+ certifi==2025.6.15
7
+ cfgv==3.4.0
8
+ charset-normalizer==3.4.2
9
+ click==8.1.7
10
+ distlib==0.3.9
11
+ docstring_parser_fork==0.0.12
12
+ filelock==3.18.0
13
+ flake8==7.0.0
14
+ fsspec==2025.5.1
15
+ future==1.0.0
16
+ hf-xet==1.1.5
17
+ huggingface-hub==0.33.0
18
+ HyperPyYAML==1.2.2
19
+ identify==2.6.12
20
+ idna==3.10
21
+ iniconfig==2.1.0
22
+ isort==5.13.2
23
+ Jinja2==3.1.6
24
+ joblib==1.5.1
25
+ MarkupSafe==3.0.2
26
+ mccabe==0.7.0
27
+ mir_eval==0.6
28
+ mpmath==1.3.0
29
+ mypy_extensions==1.1.0
30
+ networkx==3.5
31
+ nodeenv==1.9.1
32
+ numpy==2.3.1
33
+ nvidia-cublas-cu12==12.6.4.1
34
+ nvidia-cuda-cupti-cu12==12.6.80
35
+ nvidia-cuda-nvrtc-cu12==12.6.77
36
+ nvidia-cuda-runtime-cu12==12.6.77
37
+ nvidia-cudnn-cu12==9.5.1.17
38
+ nvidia-cufft-cu12==11.3.0.4
39
+ nvidia-cufile-cu12==1.11.1.6
40
+ nvidia-curand-cu12==10.3.7.77
41
+ nvidia-cusolver-cu12==11.7.1.2
42
+ nvidia-cusparse-cu12==12.5.4.2
43
+ nvidia-cusparselt-cu12==0.6.3
44
+ nvidia-nccl-cu12==2.26.2
45
+ nvidia-nvjitlink-cu12==12.6.85
46
+ nvidia-nvtx-cu12==12.6.77
47
+ packaging==25.0
48
+ pandas==2.3.0
49
+ pathspec==0.12.1
50
+ platformdirs==4.3.8
51
+ pluggy==1.6.0
52
+ pre_commit==4.2.0
53
+ pycodestyle==2.11.0
54
+ pydoclint==0.4.1
55
+ pyflakes==3.2.0
56
+ Pygments==2.19.2
57
+ pygtrie==2.5.0
58
+ pyloudnorm==0.1.1
59
+ pytest==7.4.0
60
+ python-dateutil==2.9.0.post0
61
+ pytz==2025.2
62
+ PyYAML==6.0.2
63
+ regex==2024.11.6
64
+ requests==2.32.4
65
+ ruamel.yaml==0.18.14
66
+ ruamel.yaml.clib==0.2.12
67
+ safetensors==0.5.3
68
+ scipy==1.16.0
69
+ sentencepiece==0.2.0
70
+ six==1.17.0
71
+ speechbrain==1.0.3
72
+ sympy==1.14.0
73
+ tokenizers==0.21.2
74
+ torch==2.7.1
75
+ torchaudio==2.7.1
76
+ tqdm==4.67.1
77
+ transformers==4.52.4
78
+ triton==3.3.1
79
+ typing_extensions==4.14.0
80
+ tzdata==2025.2
81
+ urllib3==2.5.0
82
+ virtualenv==20.31.2
83
+ yamllint==1.35.1
84
+
85
+
86
+ 2025-06-27 17:13:10,836 - speechbrain.core - ERROR - Exception:
87
+ Traceback (most recent call last):
88
+ File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 634, in <module>
89
+ train_data, valid_data, test_data = dataio_prep(hparams)
90
+ ^^^^^^^^^^^^^^^^^^^^
91
+ File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 446, in dataio_prep
92
+ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
93
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
94
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/dataio/dataset.py", line 417, in from_csv
95
+ data = load_data_csv(csv_path, replacements)
96
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
97
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/dataio/dataio.py", line 138, in load_data_csv
98
+ with open(csv_path, newline="", encoding="utf-8") as csvfile:
99
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
100
+ FileNotFoundError: [Errno 2] No such file or directory: 'results/sepformer_4mix/1234/save/record_tr.csv'
101
+ 2025-06-27 17:17:17,084 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [allow_tf32, disable_jit_profiling]
102
+ 2025-06-27 17:17:17,085 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
103
+ 2025-06-27 17:17:17,085 - speechbrain.core - INFO - Beginning experiment!
104
+ 2025-06-27 17:17:17,085 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
105
+ 2025-06-27 17:17:17,318 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
106
+ certifi==2025.6.15
107
+ cfgv==3.4.0
108
+ charset-normalizer==3.4.2
109
+ click==8.1.7
110
+ distlib==0.3.9
111
+ docstring_parser_fork==0.0.12
112
+ filelock==3.18.0
113
+ flake8==7.0.0
114
+ fsspec==2025.5.1
115
+ future==1.0.0
116
+ hf-xet==1.1.5
117
+ huggingface-hub==0.33.0
118
+ HyperPyYAML==1.2.2
119
+ identify==2.6.12
120
+ idna==3.10
121
+ iniconfig==2.1.0
122
+ isort==5.13.2
123
+ Jinja2==3.1.6
124
+ joblib==1.5.1
125
+ MarkupSafe==3.0.2
126
+ mccabe==0.7.0
127
+ mir_eval==0.6
128
+ mpmath==1.3.0
129
+ mypy_extensions==1.1.0
130
+ networkx==3.5
131
+ nodeenv==1.9.1
132
+ numpy==2.3.1
133
+ nvidia-cublas-cu12==12.6.4.1
134
+ nvidia-cuda-cupti-cu12==12.6.80
135
+ nvidia-cuda-nvrtc-cu12==12.6.77
136
+ nvidia-cuda-runtime-cu12==12.6.77
137
+ nvidia-cudnn-cu12==9.5.1.17
138
+ nvidia-cufft-cu12==11.3.0.4
139
+ nvidia-cufile-cu12==1.11.1.6
140
+ nvidia-curand-cu12==10.3.7.77
141
+ nvidia-cusolver-cu12==11.7.1.2
142
+ nvidia-cusparse-cu12==12.5.4.2
143
+ nvidia-cusparselt-cu12==0.6.3
144
+ nvidia-nccl-cu12==2.26.2
145
+ nvidia-nvjitlink-cu12==12.6.85
146
+ nvidia-nvtx-cu12==12.6.77
147
+ packaging==25.0
148
+ pandas==2.3.0
149
+ pathspec==0.12.1
150
+ platformdirs==4.3.8
151
+ pluggy==1.6.0
152
+ pre_commit==4.2.0
153
+ pycodestyle==2.11.0
154
+ pydoclint==0.4.1
155
+ pyflakes==3.2.0
156
+ Pygments==2.19.2
157
+ pygtrie==2.5.0
158
+ pyloudnorm==0.1.1
159
+ pytest==7.4.0
160
+ python-dateutil==2.9.0.post0
161
+ pytz==2025.2
162
+ PyYAML==6.0.2
163
+ regex==2024.11.6
164
+ requests==2.32.4
165
+ ruamel.yaml==0.18.14
166
+ ruamel.yaml.clib==0.2.12
167
+ safetensors==0.5.3
168
+ scipy==1.16.0
169
+ sentencepiece==0.2.0
170
+ six==1.17.0
171
+ speechbrain==1.0.3
172
+ sympy==1.14.0
173
+ tokenizers==0.21.2
174
+ torch==2.7.1
175
+ torchaudio==2.7.1
176
+ tqdm==4.67.1
177
+ transformers==4.52.4
178
+ triton==3.3.1
179
+ typing_extensions==4.14.0
180
+ tzdata==2025.2
181
+ urllib3==2.5.0
182
+ virtualenv==20.31.2
183
+ yamllint==1.35.1
184
+
185
+
186
+ 2025-06-27 17:17:17,325 - speechbrain.core - ERROR - Exception:
187
+ Traceback (most recent call last):
188
+ File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 634, in <module>
189
+ train_data, valid_data, test_data = dataio_prep(hparams)
190
+ ^^^^^^^^^^^^^^^^^^^^
191
+ File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 446, in dataio_prep
192
+ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
193
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
194
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/dataio/dataset.py", line 417, in from_csv
195
+ data = load_data_csv(csv_path, replacements)
196
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
197
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/dataio/dataio.py", line 138, in load_data_csv
198
+ with open(csv_path, newline="", encoding="utf-8") as csvfile:
199
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
200
+ FileNotFoundError: [Errno 2] No such file or directory: 'results/sepformer_4mix/1234/save/record_tr.csv'
201
+ 2025-06-27 17:18:04,558 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
202
+ 2025-06-27 17:18:04,559 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
203
+ 2025-06-27 17:18:04,559 - speechbrain.core - INFO - Beginning experiment!
204
+ 2025-06-27 17:18:04,559 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
205
+ 2025-06-27 17:18:04,806 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
206
+ certifi==2025.6.15
207
+ cfgv==3.4.0
208
+ charset-normalizer==3.4.2
209
+ click==8.1.7
210
+ distlib==0.3.9
211
+ docstring_parser_fork==0.0.12
212
+ filelock==3.18.0
213
+ flake8==7.0.0
214
+ fsspec==2025.5.1
215
+ future==1.0.0
216
+ hf-xet==1.1.5
217
+ huggingface-hub==0.33.0
218
+ HyperPyYAML==1.2.2
219
+ identify==2.6.12
220
+ idna==3.10
221
+ iniconfig==2.1.0
222
+ isort==5.13.2
223
+ Jinja2==3.1.6
224
+ joblib==1.5.1
225
+ MarkupSafe==3.0.2
226
+ mccabe==0.7.0
227
+ mir_eval==0.6
228
+ mpmath==1.3.0
229
+ mypy_extensions==1.1.0
230
+ networkx==3.5
231
+ nodeenv==1.9.1
232
+ numpy==2.3.1
233
+ nvidia-cublas-cu12==12.6.4.1
234
+ nvidia-cuda-cupti-cu12==12.6.80
235
+ nvidia-cuda-nvrtc-cu12==12.6.77
236
+ nvidia-cuda-runtime-cu12==12.6.77
237
+ nvidia-cudnn-cu12==9.5.1.17
238
+ nvidia-cufft-cu12==11.3.0.4
239
+ nvidia-cufile-cu12==1.11.1.6
240
+ nvidia-curand-cu12==10.3.7.77
241
+ nvidia-cusolver-cu12==11.7.1.2
242
+ nvidia-cusparse-cu12==12.5.4.2
243
+ nvidia-cusparselt-cu12==0.6.3
244
+ nvidia-nccl-cu12==2.26.2
245
+ nvidia-nvjitlink-cu12==12.6.85
246
+ nvidia-nvtx-cu12==12.6.77
247
+ packaging==25.0
248
+ pandas==2.3.0
249
+ pathspec==0.12.1
250
+ platformdirs==4.3.8
251
+ pluggy==1.6.0
252
+ pre_commit==4.2.0
253
+ pycodestyle==2.11.0
254
+ pydoclint==0.4.1
255
+ pyflakes==3.2.0
256
+ Pygments==2.19.2
257
+ pygtrie==2.5.0
258
+ pyloudnorm==0.1.1
259
+ pytest==7.4.0
260
+ python-dateutil==2.9.0.post0
261
+ pytz==2025.2
262
+ PyYAML==6.0.2
263
+ regex==2024.11.6
264
+ requests==2.32.4
265
+ ruamel.yaml==0.18.14
266
+ ruamel.yaml.clib==0.2.12
267
+ safetensors==0.5.3
268
+ scipy==1.16.0
269
+ sentencepiece==0.2.0
270
+ six==1.17.0
271
+ speechbrain==1.0.3
272
+ sympy==1.14.0
273
+ tokenizers==0.21.2
274
+ torch==2.7.1
275
+ torchaudio==2.7.1
276
+ tqdm==4.67.1
277
+ transformers==4.52.4
278
+ triton==3.3.1
279
+ typing_extensions==4.14.0
280
+ tzdata==2025.2
281
+ urllib3==2.5.0
282
+ virtualenv==20.31.2
283
+ yamllint==1.35.1
284
+
285
+
286
+ 2025-06-27 17:18:05,007 - speechbrain.core - INFO - Info: precision arg from hparam file is used
287
+ 2025-06-27 17:18:05,008 - speechbrain.core - INFO - Info: noprogressbar arg from hparam file is used
288
+ 2025-06-27 17:18:05,008 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
289
+ 2025-06-27 17:18:05,221 - speechbrain.core - INFO - Gradscaler enabled: `True`
290
+ 2025-06-27 17:18:05,221 - speechbrain.core - INFO - Using training precision: `--precision=fp16`
291
+ 2025-06-27 17:18:05,221 - speechbrain.core - INFO - Using evaluation precision: `--eval_precision=fp32`
292
+ 2025-06-27 17:18:05,222 - speechbrain.core - INFO - Separation Model Statistics:
293
+ * Total Number of Trainable Parameters: 25.8M
294
+ * Total Number of Parameters: 25.8M
295
+ * Trainable Parameters represent 100.0000% of the total size.
296
+ 2025-06-27 17:18:06,855 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00
297
+ 2025-06-27 17:18:07,209 - speechbrain.utils.epoch_loop - INFO - Going into epoch 163
298
+ 2025-06-27 17:18:10,757 - speechbrain.core - ERROR - Exception:
299
+ Traceback (most recent call last):
300
+ File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 656, in <module>
301
+ separator.fit(
302
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/core.py", line 1575, in fit
303
+ self._fit_train(train_set=train_set, epoch=epoch, enable=enable)
304
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/core.py", line 1400, in _fit_train
305
+ loss = self.fit_batch(batch)
306
+ ^^^^^^^^^^^^^^^^^^^^^
307
+ File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 148, in fit_batch
308
+ self.scaler.scale(loss).backward()
309
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/_tensor.py", line 648, in backward
310
+ torch.autograd.backward(
311
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/autograd/__init__.py", line 353, in backward
312
+ _engine_run_backward(
313
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/autograd/graph.py", line 824, in _engine_run_backward
314
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
315
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
316
+ KeyboardInterrupt
317
+ 2025-06-27 17:24:05,950 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
318
+ 2025-06-27 17:24:05,951 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
319
+ 2025-06-27 17:24:05,951 - speechbrain.core - INFO - Beginning experiment!
320
+ 2025-06-27 17:24:05,951 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
321
+ 2025-06-27 17:24:06,192 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
322
+ certifi==2025.6.15
323
+ cfgv==3.4.0
324
+ charset-normalizer==3.4.2
325
+ click==8.1.7
326
+ distlib==0.3.9
327
+ docstring_parser_fork==0.0.12
328
+ filelock==3.18.0
329
+ flake8==7.0.0
330
+ fsspec==2025.5.1
331
+ future==1.0.0
332
+ hf-xet==1.1.5
333
+ huggingface-hub==0.33.0
334
+ HyperPyYAML==1.2.2
335
+ identify==2.6.12
336
+ idna==3.10
337
+ iniconfig==2.1.0
338
+ isort==5.13.2
339
+ Jinja2==3.1.6
340
+ joblib==1.5.1
341
+ MarkupSafe==3.0.2
342
+ mccabe==0.7.0
343
+ mir_eval==0.6
344
+ mpmath==1.3.0
345
+ mypy_extensions==1.1.0
346
+ networkx==3.5
347
+ nodeenv==1.9.1
348
+ numpy==2.3.1
349
+ nvidia-cublas-cu12==12.6.4.1
350
+ nvidia-cuda-cupti-cu12==12.6.80
351
+ nvidia-cuda-nvrtc-cu12==12.6.77
352
+ nvidia-cuda-runtime-cu12==12.6.77
353
+ nvidia-cudnn-cu12==9.5.1.17
354
+ nvidia-cufft-cu12==11.3.0.4
355
+ nvidia-cufile-cu12==1.11.1.6
356
+ nvidia-curand-cu12==10.3.7.77
357
+ nvidia-cusolver-cu12==11.7.1.2
358
+ nvidia-cusparse-cu12==12.5.4.2
359
+ nvidia-cusparselt-cu12==0.6.3
360
+ nvidia-nccl-cu12==2.26.2
361
+ nvidia-nvjitlink-cu12==12.6.85
362
+ nvidia-nvtx-cu12==12.6.77
363
+ packaging==25.0
364
+ pandas==2.3.0
365
+ pathspec==0.12.1
366
+ platformdirs==4.3.8
367
+ pluggy==1.6.0
368
+ pre_commit==4.2.0
369
+ pycodestyle==2.11.0
370
+ pydoclint==0.4.1
371
+ pyflakes==3.2.0
372
+ Pygments==2.19.2
373
+ pygtrie==2.5.0
374
+ pyloudnorm==0.1.1
375
+ pytest==7.4.0
376
+ python-dateutil==2.9.0.post0
377
+ pytz==2025.2
378
+ PyYAML==6.0.2
379
+ regex==2024.11.6
380
+ requests==2.32.4
381
+ ruamel.yaml==0.18.14
382
+ ruamel.yaml.clib==0.2.12
383
+ safetensors==0.5.3
384
+ scipy==1.16.0
385
+ sentencepiece==0.2.0
386
+ six==1.17.0
387
+ speechbrain==1.0.3
388
+ sympy==1.14.0
389
+ tokenizers==0.21.2
390
+ torch==2.7.1
391
+ torchaudio==2.7.1
392
+ tqdm==4.67.1
393
+ transformers==4.52.4
394
+ triton==3.3.1
395
+ typing_extensions==4.14.0
396
+ tzdata==2025.2
397
+ urllib3==2.5.0
398
+ virtualenv==20.31.2
399
+ yamllint==1.35.1
400
+
401
+
402
+ 2025-06-27 17:24:06,389 - speechbrain.core - INFO - Info: precision arg from hparam file is used
403
+ 2025-06-27 17:24:06,390 - speechbrain.core - INFO - Info: noprogressbar arg from hparam file is used
404
+ 2025-06-27 17:24:06,390 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
405
+ 2025-06-27 17:24:06,573 - speechbrain.core - INFO - Gradscaler enabled: `True`
406
+ 2025-06-27 17:24:06,573 - speechbrain.core - INFO - Using training precision: `--precision=fp16`
407
+ 2025-06-27 17:24:06,573 - speechbrain.core - INFO - Using evaluation precision: `--eval_precision=fp32`
408
+ 2025-06-27 17:24:06,574 - speechbrain.core - INFO - Separation Model Statistics:
409
+ * Total Number of Trainable Parameters: 25.8M
410
+ * Total Number of Parameters: 25.8M
411
+ * Trainable Parameters represent 100.0000% of the total size.
412
+ 2025-06-27 17:24:08,245 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00
413
+ 2025-06-27 17:24:08,608 - speechbrain.utils.epoch_loop - INFO - Going into epoch 163
414
+ 2025-06-27 17:24:11,017 - speechbrain.core - ERROR - Exception:
415
+ Traceback (most recent call last):
416
+ File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 656, in <module>
417
+ separator.fit(
418
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/core.py", line 1575, in fit
419
+ self._fit_train(train_set=train_set, epoch=epoch, enable=enable)
420
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/core.py", line 1400, in _fit_train
421
+ loss = self.fit_batch(batch)
422
+ ^^^^^^^^^^^^^^^^^^^^^
423
+ File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 133, in fit_batch
424
+ predictions, targets = self.compute_forward(
425
+ ^^^^^^^^^^^^^^^^^^^^^
426
+ File "/home/youzhenghai/github/Vocal-Separartion/Sepformer/separation/train.py", line 87, in compute_forward
427
+ est_mask = self.hparams.MaskNet(mix_w)
428
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^
429
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
430
+ return self._call_impl(*args, **kwargs)
431
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
432
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
433
+ return forward_call(*args, **kwargs)
434
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
435
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 1067, in forward
436
+ x = self.dual_mdl[i](x)
437
+ ^^^^^^^^^^^^^^^^^^^
438
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
439
+ return self._call_impl(*args, **kwargs)
440
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
441
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
442
+ return forward_call(*args, **kwargs)
443
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
444
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 918, in forward
445
+ inter = self.inter_mdl(inter)
446
+ ^^^^^^^^^^^^^^^^^^^^^
447
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
448
+ return self._call_impl(*args, **kwargs)
449
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
450
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
451
+ return forward_call(*args, **kwargs)
452
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
453
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 638, in forward
454
+ return self.mdl(x + pos_enc)[0]
455
+ ^^^^^^^^^^^^^^^^^^^^^
456
+ File "/work/youzhenghai/anaconda3/envs/sb/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _wrapped_call_impl
457
+ def _wrapped_call_impl(self, *args, **kwargs):
458
+
459
+ KeyboardInterrupt
460
+ 2025-06-27 21:10:07,131 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
461
+ 2025-06-27 21:10:07,133 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
462
+ 2025-06-27 21:10:07,133 - speechbrain.core - INFO - Beginning experiment!
463
+ 2025-06-27 21:10:07,133 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
464
+ 2025-06-27 21:10:07,389 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
465
+ certifi==2025.6.15
466
+ cfgv==3.4.0
467
+ charset-normalizer==3.4.2
468
+ click==8.1.7
469
+ distlib==0.3.9
470
+ docstring_parser_fork==0.0.12
471
+ filelock==3.18.0
472
+ flake8==7.0.0
473
+ fsspec==2025.5.1
474
+ future==1.0.0
475
+ hf-xet==1.1.5
476
+ huggingface-hub==0.33.1
477
+ HyperPyYAML==1.2.2
478
+ identify==2.6.12
479
+ idna==3.10
480
+ iniconfig==2.1.0
481
+ isort==5.13.2
482
+ Jinja2==3.1.6
483
+ joblib==1.5.1
484
+ MarkupSafe==3.0.2
485
+ mccabe==0.7.0
486
+ mir_eval==0.6
487
+ mpmath==1.3.0
488
+ mypy_extensions==1.1.0
489
+ networkx==3.5
490
+ nodeenv==1.9.1
491
+ numpy==2.3.1
492
+ nvidia-cublas-cu12==12.6.4.1
493
+ nvidia-cuda-cupti-cu12==12.6.80
494
+ nvidia-cuda-nvrtc-cu12==12.6.77
495
+ nvidia-cuda-runtime-cu12==12.6.77
496
+ nvidia-cudnn-cu12==9.5.1.17
497
+ nvidia-cufft-cu12==11.3.0.4
498
+ nvidia-cufile-cu12==1.11.1.6
499
+ nvidia-curand-cu12==10.3.7.77
500
+ nvidia-cusolver-cu12==11.7.1.2
501
+ nvidia-cusparse-cu12==12.5.4.2
502
+ nvidia-cusparselt-cu12==0.6.3
503
+ nvidia-nccl-cu12==2.26.2
504
+ nvidia-nvjitlink-cu12==12.6.85
505
+ nvidia-nvtx-cu12==12.6.77
506
+ packaging==25.0
507
+ pandas==2.3.0
508
+ pathspec==0.12.1
509
+ platformdirs==4.3.8
510
+ pluggy==1.6.0
511
+ pre_commit==4.2.0
512
+ pycodestyle==2.11.0
513
+ pydoclint==0.4.1
514
+ pyflakes==3.2.0
515
+ pygtrie==2.5.0
516
+ pyloudnorm==0.1.1
517
+ pytest==7.4.0
518
+ python-dateutil==2.9.0.post0
519
+ pytz==2025.2
520
+ PyYAML==6.0.2
521
+ regex==2024.11.6
522
+ requests==2.32.4
523
+ ruamel.yaml==0.18.14
524
+ ruamel.yaml.clib==0.2.12
525
+ safetensors==0.5.3
526
+ scipy==1.16.0
527
+ sentencepiece==0.2.0
528
+ six==1.17.0
529
+ speechbrain==1.0.3
530
+ sympy==1.14.0
531
+ tokenizers==0.21.2
532
+ torch==2.7.1
533
+ torchaudio==2.7.1
534
+ tqdm==4.67.1
535
+ transformers==4.53.0
536
+ triton==3.3.1
537
+ typing_extensions==4.14.0
538
+ tzdata==2025.2
539
+ urllib3==2.5.0
540
+ virtualenv==20.31.2
541
+ yamllint==1.35.1
542
+
543
+
544
+ 2025-06-27 21:10:07,393 - speechbrain.utils.superpowers - DEBUG - 476ac4f
545
+
546
+
547
+ 2025-06-27 21:10:07,996 - speechbrain.core - INFO - Info: precision arg from hparam file is used
548
+ 2025-06-27 21:10:07,997 - speechbrain.core - INFO - Info: noprogressbar arg from hparam file is used
549
+ 2025-06-27 21:10:07,997 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
550
+ 2025-06-27 21:10:08,035 - speechbrain.core - INFO - Gradscaler enabled: `True`
551
+ 2025-06-27 21:10:08,035 - speechbrain.core - INFO - Using training precision: `--precision=fp16`
552
+ 2025-06-27 21:10:08,035 - speechbrain.core - INFO - Using evaluation precision: `--eval_precision=fp32`
553
+ 2025-06-27 21:10:08,036 - speechbrain.core - INFO - Separation Model Statistics:
554
+ * Total Number of Trainable Parameters: 25.8M
555
+ * Total Number of Parameters: 25.8M
556
+ * Trainable Parameters represent 100.0000% of the total size.
557
+ 2025-06-27 21:10:09,782 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00
558
+ 2025-06-27 21:10:10,160 - speechbrain.utils.epoch_loop - INFO - Going into epoch 163
559
+ 2025-06-27 21:10:17,953 - speechbrain.core - ERROR - Exception:
560
+ Traceback (most recent call last):
561
+ File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 656, in <module>
562
+ separator.fit(
563
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/core.py", line 1575, in fit
564
+ self._fit_train(train_set=train_set, epoch=epoch, enable=enable)
565
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/core.py", line 1400, in _fit_train
566
+ loss = self.fit_batch(batch)
567
+ ^^^^^^^^^^^^^^^^^^^^^
568
+ File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 133, in fit_batch
569
+ predictions, targets = self.compute_forward(
570
+ ^^^^^^^^^^^^^^^^^^^^^
571
+ File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 87, in compute_forward
572
+ est_mask = self.hparams.MaskNet(mix_w)
573
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^
574
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
575
+ return self._call_impl(*args, **kwargs)
576
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
577
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
578
+ return forward_call(*args, **kwargs)
579
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
580
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 1067, in forward
581
+ x = self.dual_mdl[i](x)
582
+ ^^^^^^^^^^^^^^^^^^^
583
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
584
+ return self._call_impl(*args, **kwargs)
585
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
586
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
587
+ return forward_call(*args, **kwargs)
588
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
589
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 918, in forward
590
+ inter = self.inter_mdl(inter)
591
+ ^^^^^^^^^^^^^^^^^^^^^
592
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
593
+ return self._call_impl(*args, **kwargs)
594
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
595
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
596
+ return forward_call(*args, **kwargs)
597
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
598
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/dual_path.py", line 638, in forward
599
+ return self.mdl(x + pos_enc)[0]
600
+ ^^^^^^^^^^^^^^^^^^^^^
601
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
602
+ return self._call_impl(*args, **kwargs)
603
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
604
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
605
+ return forward_call(*args, **kwargs)
606
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
607
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/transformer/Transformer.py", line 639, in forward
608
+ output, attention = enc_layer(
609
+ ^^^^^^^^^^
610
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
611
+ return self._call_impl(*args, **kwargs)
612
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
613
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
614
+ return forward_call(*args, **kwargs)
615
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
616
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/lobes/models/transformer/Transformer.py", line 457, in forward
617
+ output, self_attn = self.self_att(
618
+ ^^^^^^^^^^^^^^
619
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
620
+ return self._call_impl(*args, **kwargs)
621
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
622
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
623
+ return forward_call(*args, **kwargs)
624
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
625
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/speechbrain/nnet/attention.py", line 865, in forward
626
+ output, attention_weights = self.att(
627
+ ^^^^^^^^^
628
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
629
+ return self._call_impl(*args, **kwargs)
630
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
631
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
632
+ return forward_call(*args, **kwargs)
633
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
634
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/modules/activation.py", line 1373, in forward
635
+ attn_output, attn_output_weights = F.multi_head_attention_forward(
636
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
637
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/functional.py", line 6230, in multi_head_attention_forward
638
+ q, k, v = _in_projection_packed(query, key, value, in_proj_weight, in_proj_bias)
639
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
640
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/torch/nn/functional.py", line 5648, in _in_projection_packed
641
+ return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
642
+ ^^^^^^^^^^^^^^^^^^^
643
+ KeyboardInterrupt
644
+ 2025-06-27 21:10:50,985 - speechbrain.utils.quirks - INFO - Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
645
+ 2025-06-27 21:10:50,986 - speechbrain.utils.quirks - INFO - Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
646
+ 2025-06-27 21:10:50,986 - speechbrain.core - INFO - Beginning experiment!
647
+ 2025-06-27 21:10:50,986 - speechbrain.core - INFO - Experiment folder: results/sepformer_4mix/1234
648
+ 2025-06-27 21:10:51,243 - speechbrain.utils.superpowers - DEBUG - black==24.3.0
649
+ certifi==2025.6.15
650
+ cfgv==3.4.0
651
+ charset-normalizer==3.4.2
652
+ click==8.1.7
653
+ distlib==0.3.9
654
+ docstring_parser_fork==0.0.12
655
+ filelock==3.18.0
656
+ flake8==7.0.0
657
+ fsspec==2025.5.1
658
+ future==1.0.0
659
+ hf-xet==1.1.5
660
+ huggingface-hub==0.33.1
661
+ HyperPyYAML==1.2.2
662
+ identify==2.6.12
663
+ idna==3.10
664
+ iniconfig==2.1.0
665
+ isort==5.13.2
666
+ Jinja2==3.1.6
667
+ joblib==1.5.1
668
+ MarkupSafe==3.0.2
669
+ mccabe==0.7.0
670
+ mir_eval==0.6
671
+ mpmath==1.3.0
672
+ mypy_extensions==1.1.0
673
+ networkx==3.5
674
+ nodeenv==1.9.1
675
+ numpy==2.3.1
676
+ nvidia-cublas-cu12==12.6.4.1
677
+ nvidia-cuda-cupti-cu12==12.6.80
678
+ nvidia-cuda-nvrtc-cu12==12.6.77
679
+ nvidia-cuda-runtime-cu12==12.6.77
680
+ nvidia-cudnn-cu12==9.5.1.17
681
+ nvidia-cufft-cu12==11.3.0.4
682
+ nvidia-cufile-cu12==1.11.1.6
683
+ nvidia-curand-cu12==10.3.7.77
684
+ nvidia-cusolver-cu12==11.7.1.2
685
+ nvidia-cusparse-cu12==12.5.4.2
686
+ nvidia-cusparselt-cu12==0.6.3
687
+ nvidia-nccl-cu12==2.26.2
688
+ nvidia-nvjitlink-cu12==12.6.85
689
+ nvidia-nvtx-cu12==12.6.77
690
+ packaging==25.0
691
+ pandas==2.3.0
692
+ pathspec==0.12.1
693
+ platformdirs==4.3.8
694
+ pluggy==1.6.0
695
+ pre_commit==4.2.0
696
+ pycodestyle==2.11.0
697
+ pydoclint==0.4.1
698
+ pyflakes==3.2.0
699
+ pygtrie==2.5.0
700
+ pyloudnorm==0.1.1
701
+ pytest==7.4.0
702
+ python-dateutil==2.9.0.post0
703
+ pytz==2025.2
704
+ PyYAML==6.0.2
705
+ regex==2024.11.6
706
+ requests==2.32.4
707
+ ruamel.yaml==0.18.14
708
+ ruamel.yaml.clib==0.2.12
709
+ safetensors==0.5.3
710
+ scipy==1.16.0
711
+ sentencepiece==0.2.0
712
+ six==1.17.0
713
+ speechbrain==1.0.3
714
+ sympy==1.14.0
715
+ tokenizers==0.21.2
716
+ torch==2.7.1
717
+ torchaudio==2.7.1
718
+ tqdm==4.67.1
719
+ transformers==4.53.0
720
+ triton==3.3.1
721
+ typing_extensions==4.14.0
722
+ tzdata==2025.2
723
+ urllib3==2.5.0
724
+ virtualenv==20.31.2
725
+ yamllint==1.35.1
726
+
727
+
728
+ 2025-06-27 21:10:51,249 - speechbrain.utils.superpowers - DEBUG - 476ac4f
729
+
730
+
731
+ 2025-06-27 21:10:51,876 - speechbrain.core - INFO - Info: precision arg from hparam file is used
732
+ 2025-06-27 21:10:51,876 - speechbrain.core - INFO - Info: noprogressbar arg from hparam file is used
733
+ 2025-06-27 21:10:51,876 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
734
+ 2025-06-27 21:10:51,915 - speechbrain.core - INFO - Gradscaler enabled: `True`
735
+ 2025-06-27 21:10:51,915 - speechbrain.core - INFO - Using training precision: `--precision=fp16`
736
+ 2025-06-27 21:10:51,915 - speechbrain.core - INFO - Using evaluation precision: `--eval_precision=fp32`
737
+ 2025-06-27 21:10:51,917 - speechbrain.core - INFO - Separation Model Statistics:
738
+ * Total Number of Trainable Parameters: 25.8M
739
+ * Total Number of Parameters: 25.8M
740
+ * Trainable Parameters represent 100.0000% of the total size.
741
+ 2025-06-27 21:10:52,857 - speechbrain.core - INFO - Test only mode, skipping training and validation stages.
742
+ 2025-06-27 21:10:52,859 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00
743
+ 2025-06-27 21:11:31,648 - speechbrain.utils.train_logger - INFO - Epoch loaded: 48 - test si-snr: 20.60
744
+ 2025-06-27 21:12:49,750 - speechbrain.core - ERROR - Exception:
745
+ Traceback (most recent call last):
746
+ File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 666, in <module>
747
+ separator.save_results(test_data)
748
+ File "/home/youzhenghai/github/Vocal-Separartion-Baseline/Sepformer/separation/train.py", line 367, in save_results
749
+ sdr_baseline, _, _, _ = bss_eval_sources(
750
+ ^^^^^^^^^^^^^^^^^
751
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/mir_eval/separation.py", line 210, in bss_eval_sources
752
+ _bss_decomp_mtifilt(reference_sources,
753
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/mir_eval/separation.py", line 623, in _bss_decomp_mtifilt
754
+ e_interf = _project(reference_sources,
755
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^
756
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/mir_eval/separation.py", line 715, in _project
757
+ C = np.linalg.solve(G, D).reshape(flen, nsrc, order='F')
758
+ ^^^^^^^^^^^^^^^^^^^^^
759
+ File "/work/youzhenghai/anaconda3/envs/sb_sep/lib/python3.11/site-packages/numpy/linalg/_linalg.py", line 471, in solve
760
+ r = gufunc(a, b, signature=signature)
761
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
762
+ KeyboardInterrupt
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/CKPT.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # yamllint disable
2
+ end-of-epoch: true
3
+ si-snr: 22.403992604029355
4
+ unixtime: 1750946214.2858236
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/brain.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33809a026a2c1febce7b03c8aafaee4ddfc851b2c70f180f8c06bf1017f4df5c
3
+ size 46
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/counter.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98010bd9270f9b100b6214a21754fd33bdc8d41b2bc9f9dd16ff54d3c34ffd71
3
+ size 2
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/dataloader-TRAIN.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d253d7b7ace4e06589dd90003f047380ddfdcfb29007b4e815caf48ff09b498b
3
+ size 4
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/decoder.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ab49185bb3560f75ce4c18769157375a051f6b3a36e0c35d027574ca9c29e42
3
+ size 34409
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/encoder.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95ee4117e13cc2fb383208925edb71d86947024a9dd2be3da1ea25aca5ae8adf
3
+ size 34473
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/masknet.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2c23ccb34b361feb8eeb630d4947815533cfb7dcfd54402e97edc82e032479b
3
+ size 113629889
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/optimizer.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd09ff01fca43d3985535808946f8dcd75488e1da097ed30b148cb5c3b9114d5
3
+ size 206898874
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-26+21-56-54+00/scaler.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:060762594d7f2f0162569b71f7b3ab95a021d06848d3088c63366abf8b98f80c
3
+ size 1383
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/CKPT.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # yamllint disable
2
+ end-of-epoch: true
3
+ si-snr: 22.415829142613383
4
+ unixtime: 1750994609.9935129
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/brain.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33809a026a2c1febce7b03c8aafaee4ddfc851b2c70f180f8c06bf1017f4df5c
3
+ size 46
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/counter.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79d6eaa2676189eb927f2e16a70091474078e2117c3fc607d35cdc6b591ef355
3
+ size 3
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/dataloader-TRAIN.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d253d7b7ace4e06589dd90003f047380ddfdcfb29007b4e815caf48ff09b498b
3
+ size 4
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/decoder.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96810d4644ba93e03d448330d0be5de5a3befc453f07b0c61f13aeca7464b2c5
3
+ size 34409
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/encoder.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76f73bc7bdde7f931679475847d79af49d687d5eb52011f17d6a37024a222558
3
+ size 34473
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/masknet.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30262fd31537e9349c1c1071bbd86c9a89e359ea11d5d50c48a05da03bc26e0e
3
+ size 113629889
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/optimizer.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee4c807b50d7f9af8606acf172b2713c218ca53faf4aaa3e614e0c0a6fbac5bd
3
+ size 206898874
Sepformer/results/sepformer_4mix/1234/save/CKPT+2025-06-27+11-23-29+00/scaler.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91cd3440b19e568449ff6d8fbb6df704d57d37fc7ad77f05146ac1de7310cded
3
+ size 1383
Sepformer/results/sepformer_4mix/1234/save/record_tr.csv ADDED
The diff for this file is too large to render. See raw diff