zl389 commited on
Commit
7ea777f
·
verified ·
1 Parent(s): bd36143

Upload 3 files

Browse files
Files changed (3) hide show
  1. config/v1/s1.yaml +81 -0
  2. config/v1/s2.yaml +78 -0
  3. config/v1/s3.yaml +76 -0
config/v1/s1.yaml ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 24
2
+ __set_seed: !apply:deeplab.utils.misc.set_random_seed [!ref <seed>]
3
+
4
+ use_amp: true
5
+ use_gradient_clipping: true
6
+ gradient_accumulation: 1
7
+ cudnn_benchmark: false
8
+
9
+ optimizer: !name:torch.optim.AdamW
10
+ lr: 0.0001
11
+ weight_decay: 0.0001
12
+
13
+ scheduler: !name:deeplab.core.scheduler.WarmupLR_withStepDecay
14
+ warmup_step: 5
15
+ decay_step: 10
16
+ gamma: 0.1
17
+
18
+ num_epochs: 25
19
+ max_iters_per_epoch: null
20
+ batch_size: 192
21
+ valid_batch_size: 1
22
+ training_loop: 1
23
+ num_workers: 16
24
+ output_dir: results/
25
+
26
+ items_save: False
27
+ item_save_steps: 500
28
+
29
+ dur_range: [2, 3]
30
+ max_valid_dur: 60
31
+ speed_perturbation: []
32
+ data_aug: true
33
+ embd_dim: 256
34
+
35
+ sample_rate: 16000
36
+
37
+ corpus_dir: /work/zl389/AudioData
38
+
39
+ musan_path: !apply:os.path.join [!ref <corpus_dir>, 'musan']
40
+ rirs_path: !apply:os.path.join [!ref <corpus_dir>, 'rirs_noise']
41
+
42
+ train_data:
43
+ - !name:deeplab.utils.corpus.load_audio_corpus
44
+ ['/work/zl389/workspace/LLM_ASV/data/vox2dev', ['dev']] # 5994
45
+ - !name:deeplab.utils.corpus.load_audio_corpus
46
+ ['/work/zl389/workspace/LLM_ASV/data/voxblink2', ['audio']] # 111284
47
+
48
+ valid_data:
49
+ - scp_path: '/work/zl389/workspace/LLM_ASV/data/test_vox/vox1-o/wav_copy.scp'
50
+ trial_path: '/work/zl389/workspace/LLM_ASV/data/test_vox/vox1-o/trials'
51
+
52
+ peft_config: !apply:deeplab.pretrained.audio2vector.api.create_lora_config
53
+ model_type: 'w2v-bert'
54
+ r: 64
55
+ lora_alpha: 128
56
+ target_modules: ["linear_q", "linear_v"]
57
+ lora_dropout: 0.0
58
+ bias: 'none'
59
+
60
+ spk_model: !new:local.spk_model.Audio2Vec_based_Adapter
61
+ model_name: 'facebook/w2v-bert-2.0'
62
+ frozen_encoder: true
63
+ bnb_config: null
64
+ peft_config: !ref <peft_config>
65
+ encoder_config: 'config_prune_tea.json'
66
+ n_mfa_layers: -1
67
+ pooling_layer: 'ASP'
68
+ embd_dim: !ref <embd_dim>
69
+ adapter_dim: 128
70
+ dropout: 0.0
71
+
72
+
73
+ classifier: !new:local.spk_classifier.ArcFace
74
+ in_features: !ref <embd_dim>
75
+ out_features: 117278 # 111284 + 5994
76
+ s: 32
77
+ m: 0.2
78
+
79
+ modules:
80
+ spk_model: !ref <spk_model>
81
+ classifier: !ref <classifier>
config/v1/s2.yaml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 24
2
+ __set_seed: !apply:deeplab.utils.misc.set_random_seed [!ref <seed>]
3
+
4
+ use_amp: true
5
+ use_gradient_clipping: true
6
+ gradient_accumulation: 1
7
+ cudnn_benchmark: false
8
+
9
+ optimizer: !name:torch.optim.AdamW
10
+ lr: 0.00001
11
+ weight_decay: 0.0001
12
+
13
+ scheduler_lmft: !name:deeplab.core.scheduler.WarmupCosineScheduler
14
+ min_lr: 0.000005
15
+ max_lr: 0.00001
16
+ warmup_epoch: 0
17
+ fix_epoch: 2
18
+
19
+ num_epochs: 4
20
+ max_iters_per_epoch: null
21
+ batch_size: 192
22
+ valid_batch_size: 1
23
+ training_loop: 1
24
+ num_workers: 16
25
+ output_dir: results/
26
+
27
+ items_save: true
28
+ item_save_steps: 2000
29
+
30
+ dur_range: [2, 3]
31
+ max_valid_dur: 60
32
+ speed_perturbation: []
33
+ data_aug: true
34
+ embd_dim: 256
35
+
36
+ sample_rate: 16000
37
+
38
+ corpus_dir: /work/zl389/AudioData
39
+
40
+ musan_path: !apply:os.path.join [!ref <corpus_dir>, 'musan']
41
+ rirs_path: !apply:os.path.join [!ref <corpus_dir>, 'rirs_noise']
42
+
43
+ train_data:
44
+ - !name:deeplab.utils.corpus.load_audio_corpus
45
+ ['/work/zl389/workspace/LLM_ASV/data/vox2dev', ['dev']] # 5994
46
+ - !name:deeplab.utils.corpus.load_audio_corpus
47
+ ['/work/zl389/workspace/LLM_ASV/data/voxblink2', ['audio']] # 111284
48
+
49
+
50
+ valid_data:
51
+ - scp_path: '/work/zl389/workspace/LLM_ASV/data/test_vox/vox1-o/wav_copy.scp'
52
+ trial_path: '/work/zl389/workspace/LLM_ASV/data/test_vox/vox1-o/trials'
53
+
54
+
55
+ peft_config: null
56
+
57
+ spk_model: !new:local.spk_model.Audio2Vec_based_Adapter
58
+ model_name: 'facebook/w2v-bert-2.0'
59
+ frozen_encoder: false
60
+ bnb_config: null
61
+ peft_config: !ref <peft_config>
62
+ encoder_config: 'config_prune_tea.json'
63
+ n_mfa_layers: -1
64
+ pooling_layer: 'ASP'
65
+ embd_dim: !ref <embd_dim>
66
+ adapter_dim: 128
67
+ dropout: 0.0
68
+
69
+
70
+ classifier: !new:local.spk_classifier.ArcFace
71
+ in_features: !ref <embd_dim>
72
+ out_features: 117278 # 111284 + 5994
73
+ s: 32
74
+ m: 0.2
75
+
76
+ modules:
77
+ spk_model: !ref <spk_model>
78
+ classifier: !ref <classifier>
config/v1/s3.yaml ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 24
2
+ __set_seed: !apply:deeplab.utils.misc.set_random_seed [!ref <seed>]
3
+
4
+ use_amp: true
5
+ use_gradient_clipping: true
6
+ gradient_accumulation: 1
7
+ cudnn_benchmark: false
8
+
9
+ optimizer: !name:torch.optim.AdamW
10
+ lr: 0.00001
11
+ weight_decay: 0.0001
12
+
13
+ scheduler_lmft: !name:deeplab.core.scheduler.WarmupCosineScheduler
14
+ min_lr: 0.000005
15
+ max_lr: 0.00001
16
+ warmup_epoch: 0
17
+ fix_epoch: 1
18
+
19
+ num_epochs: 2
20
+ max_iters_per_epoch: null
21
+ batch_size: 32
22
+ valid_batch_size: 1
23
+ training_loop: 1
24
+ num_workers: 16
25
+ output_dir: results/
26
+
27
+ items_save: true
28
+ item_save_steps: 500
29
+
30
+ dur_range: [5, 6]
31
+ max_valid_dur: 60
32
+ speed_perturbation: []
33
+ data_aug: false
34
+ embd_dim: 256
35
+
36
+ sample_rate: 16000
37
+
38
+ corpus_dir: /work/zl389/AudioData
39
+
40
+ musan_path: !apply:os.path.join [!ref <corpus_dir>, 'musan']
41
+ rirs_path: !apply:os.path.join [!ref <corpus_dir>, 'rirs_noise']
42
+
43
+ train_data:
44
+ - !name:deeplab.utils.corpus.load_audio_corpus
45
+ ['/work/zl389/workspace/LLM_ASV/data/vox2dev', ['dev']] # 5994
46
+
47
+
48
+ valid_data:
49
+ - scp_path: '/work/zl389/workspace/LLM_ASV/data/test_vox/vox1-o/wav_copy.scp'
50
+ trial_path: '/work/zl389/workspace/LLM_ASV/data/test_vox/vox1-o/trials'
51
+
52
+
53
+ peft_config: null
54
+
55
+ spk_model: !new:local.spk_model.Audio2Vec_based_Adapter
56
+ model_name: 'facebook/w2v-bert-2.0'
57
+ frozen_encoder: false
58
+ bnb_config: null
59
+ peft_config: !ref <peft_config>
60
+ encoder_config: 'config_prune_tea.json'
61
+ n_mfa_layers: -1
62
+ pooling_layer: 'ASP'
63
+ embd_dim: !ref <embd_dim>
64
+ adapter_dim: 128
65
+ dropout: 0.0
66
+
67
+
68
+ classifier: !new:local.spk_classifier.ArcFace
69
+ in_features: !ref <embd_dim>
70
+ out_features: 5994
71
+ s: 32
72
+ m: 0.5
73
+
74
+ modules:
75
+ spk_model: !ref <spk_model>
76
+ classifier: !ref <classifier>