Upload 3 files
Browse files- config/v1/s1.yaml +81 -0
- config/v1/s2.yaml +78 -0
- config/v1/s3.yaml +76 -0
config/v1/s1.yaml
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
seed: 24
|
| 2 |
+
__set_seed: !apply:deeplab.utils.misc.set_random_seed [!ref <seed>]
|
| 3 |
+
|
| 4 |
+
use_amp: true
|
| 5 |
+
use_gradient_clipping: true
|
| 6 |
+
gradient_accumulation: 1
|
| 7 |
+
cudnn_benchmark: false
|
| 8 |
+
|
| 9 |
+
optimizer: !name:torch.optim.AdamW
|
| 10 |
+
lr: 0.0001
|
| 11 |
+
weight_decay: 0.0001
|
| 12 |
+
|
| 13 |
+
scheduler: !name:deeplab.core.scheduler.WarmupLR_withStepDecay
|
| 14 |
+
warmup_step: 5
|
| 15 |
+
decay_step: 10
|
| 16 |
+
gamma: 0.1
|
| 17 |
+
|
| 18 |
+
num_epochs: 25
|
| 19 |
+
max_iters_per_epoch: null
|
| 20 |
+
batch_size: 192
|
| 21 |
+
valid_batch_size: 1
|
| 22 |
+
training_loop: 1
|
| 23 |
+
num_workers: 16
|
| 24 |
+
output_dir: results/
|
| 25 |
+
|
| 26 |
+
items_save: False
|
| 27 |
+
item_save_steps: 500
|
| 28 |
+
|
| 29 |
+
dur_range: [2, 3]
|
| 30 |
+
max_valid_dur: 60
|
| 31 |
+
speed_perturbation: []
|
| 32 |
+
data_aug: true
|
| 33 |
+
embd_dim: 256
|
| 34 |
+
|
| 35 |
+
sample_rate: 16000
|
| 36 |
+
|
| 37 |
+
corpus_dir: /work/zl389/AudioData
|
| 38 |
+
|
| 39 |
+
musan_path: !apply:os.path.join [!ref <corpus_dir>, 'musan']
|
| 40 |
+
rirs_path: !apply:os.path.join [!ref <corpus_dir>, 'rirs_noise']
|
| 41 |
+
|
| 42 |
+
train_data:
|
| 43 |
+
- !name:deeplab.utils.corpus.load_audio_corpus
|
| 44 |
+
['/work/zl389/workspace/LLM_ASV/data/vox2dev', ['dev']] # 5994
|
| 45 |
+
- !name:deeplab.utils.corpus.load_audio_corpus
|
| 46 |
+
['/work/zl389/workspace/LLM_ASV/data/voxblink2', ['audio']] # 111284
|
| 47 |
+
|
| 48 |
+
valid_data:
|
| 49 |
+
- scp_path: '/work/zl389/workspace/LLM_ASV/data/test_vox/vox1-o/wav_copy.scp'
|
| 50 |
+
trial_path: '/work/zl389/workspace/LLM_ASV/data/test_vox/vox1-o/trials'
|
| 51 |
+
|
| 52 |
+
peft_config: !apply:deeplab.pretrained.audio2vector.api.create_lora_config
|
| 53 |
+
model_type: 'w2v-bert'
|
| 54 |
+
r: 64
|
| 55 |
+
lora_alpha: 128
|
| 56 |
+
target_modules: ["linear_q", "linear_v"]
|
| 57 |
+
lora_dropout: 0.0
|
| 58 |
+
bias: 'none'
|
| 59 |
+
|
| 60 |
+
spk_model: !new:local.spk_model.Audio2Vec_based_Adapter
|
| 61 |
+
model_name: 'facebook/w2v-bert-2.0'
|
| 62 |
+
frozen_encoder: true
|
| 63 |
+
bnb_config: null
|
| 64 |
+
peft_config: !ref <peft_config>
|
| 65 |
+
encoder_config: 'config_prune_tea.json'
|
| 66 |
+
n_mfa_layers: -1
|
| 67 |
+
pooling_layer: 'ASP'
|
| 68 |
+
embd_dim: !ref <embd_dim>
|
| 69 |
+
adapter_dim: 128
|
| 70 |
+
dropout: 0.0
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
classifier: !new:local.spk_classifier.ArcFace
|
| 74 |
+
in_features: !ref <embd_dim>
|
| 75 |
+
out_features: 117278 # 111284 + 5994
|
| 76 |
+
s: 32
|
| 77 |
+
m: 0.2
|
| 78 |
+
|
| 79 |
+
modules:
|
| 80 |
+
spk_model: !ref <spk_model>
|
| 81 |
+
classifier: !ref <classifier>
|
config/v1/s2.yaml
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
seed: 24
|
| 2 |
+
__set_seed: !apply:deeplab.utils.misc.set_random_seed [!ref <seed>]
|
| 3 |
+
|
| 4 |
+
use_amp: true
|
| 5 |
+
use_gradient_clipping: true
|
| 6 |
+
gradient_accumulation: 1
|
| 7 |
+
cudnn_benchmark: false
|
| 8 |
+
|
| 9 |
+
optimizer: !name:torch.optim.AdamW
|
| 10 |
+
lr: 0.00001
|
| 11 |
+
weight_decay: 0.0001
|
| 12 |
+
|
| 13 |
+
scheduler_lmft: !name:deeplab.core.scheduler.WarmupCosineScheduler
|
| 14 |
+
min_lr: 0.000005
|
| 15 |
+
max_lr: 0.00001
|
| 16 |
+
warmup_epoch: 0
|
| 17 |
+
fix_epoch: 2
|
| 18 |
+
|
| 19 |
+
num_epochs: 4
|
| 20 |
+
max_iters_per_epoch: null
|
| 21 |
+
batch_size: 192
|
| 22 |
+
valid_batch_size: 1
|
| 23 |
+
training_loop: 1
|
| 24 |
+
num_workers: 16
|
| 25 |
+
output_dir: results/
|
| 26 |
+
|
| 27 |
+
items_save: true
|
| 28 |
+
item_save_steps: 2000
|
| 29 |
+
|
| 30 |
+
dur_range: [2, 3]
|
| 31 |
+
max_valid_dur: 60
|
| 32 |
+
speed_perturbation: []
|
| 33 |
+
data_aug: true
|
| 34 |
+
embd_dim: 256
|
| 35 |
+
|
| 36 |
+
sample_rate: 16000
|
| 37 |
+
|
| 38 |
+
corpus_dir: /work/zl389/AudioData
|
| 39 |
+
|
| 40 |
+
musan_path: !apply:os.path.join [!ref <corpus_dir>, 'musan']
|
| 41 |
+
rirs_path: !apply:os.path.join [!ref <corpus_dir>, 'rirs_noise']
|
| 42 |
+
|
| 43 |
+
train_data:
|
| 44 |
+
- !name:deeplab.utils.corpus.load_audio_corpus
|
| 45 |
+
['/work/zl389/workspace/LLM_ASV/data/vox2dev', ['dev']] # 5994
|
| 46 |
+
- !name:deeplab.utils.corpus.load_audio_corpus
|
| 47 |
+
['/work/zl389/workspace/LLM_ASV/data/voxblink2', ['audio']] # 111284
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
valid_data:
|
| 51 |
+
- scp_path: '/work/zl389/workspace/LLM_ASV/data/test_vox/vox1-o/wav_copy.scp'
|
| 52 |
+
trial_path: '/work/zl389/workspace/LLM_ASV/data/test_vox/vox1-o/trials'
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
peft_config: null
|
| 56 |
+
|
| 57 |
+
spk_model: !new:local.spk_model.Audio2Vec_based_Adapter
|
| 58 |
+
model_name: 'facebook/w2v-bert-2.0'
|
| 59 |
+
frozen_encoder: false
|
| 60 |
+
bnb_config: null
|
| 61 |
+
peft_config: !ref <peft_config>
|
| 62 |
+
encoder_config: 'config_prune_tea.json'
|
| 63 |
+
n_mfa_layers: -1
|
| 64 |
+
pooling_layer: 'ASP'
|
| 65 |
+
embd_dim: !ref <embd_dim>
|
| 66 |
+
adapter_dim: 128
|
| 67 |
+
dropout: 0.0
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
classifier: !new:local.spk_classifier.ArcFace
|
| 71 |
+
in_features: !ref <embd_dim>
|
| 72 |
+
out_features: 117278 # 111284 + 5994
|
| 73 |
+
s: 32
|
| 74 |
+
m: 0.2
|
| 75 |
+
|
| 76 |
+
modules:
|
| 77 |
+
spk_model: !ref <spk_model>
|
| 78 |
+
classifier: !ref <classifier>
|
config/v1/s3.yaml
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
seed: 24
|
| 2 |
+
__set_seed: !apply:deeplab.utils.misc.set_random_seed [!ref <seed>]
|
| 3 |
+
|
| 4 |
+
use_amp: true
|
| 5 |
+
use_gradient_clipping: true
|
| 6 |
+
gradient_accumulation: 1
|
| 7 |
+
cudnn_benchmark: false
|
| 8 |
+
|
| 9 |
+
optimizer: !name:torch.optim.AdamW
|
| 10 |
+
lr: 0.00001
|
| 11 |
+
weight_decay: 0.0001
|
| 12 |
+
|
| 13 |
+
scheduler_lmft: !name:deeplab.core.scheduler.WarmupCosineScheduler
|
| 14 |
+
min_lr: 0.000005
|
| 15 |
+
max_lr: 0.00001
|
| 16 |
+
warmup_epoch: 0
|
| 17 |
+
fix_epoch: 1
|
| 18 |
+
|
| 19 |
+
num_epochs: 2
|
| 20 |
+
max_iters_per_epoch: null
|
| 21 |
+
batch_size: 32
|
| 22 |
+
valid_batch_size: 1
|
| 23 |
+
training_loop: 1
|
| 24 |
+
num_workers: 16
|
| 25 |
+
output_dir: results/
|
| 26 |
+
|
| 27 |
+
items_save: true
|
| 28 |
+
item_save_steps: 500
|
| 29 |
+
|
| 30 |
+
dur_range: [5, 6]
|
| 31 |
+
max_valid_dur: 60
|
| 32 |
+
speed_perturbation: []
|
| 33 |
+
data_aug: false
|
| 34 |
+
embd_dim: 256
|
| 35 |
+
|
| 36 |
+
sample_rate: 16000
|
| 37 |
+
|
| 38 |
+
corpus_dir: /work/zl389/AudioData
|
| 39 |
+
|
| 40 |
+
musan_path: !apply:os.path.join [!ref <corpus_dir>, 'musan']
|
| 41 |
+
rirs_path: !apply:os.path.join [!ref <corpus_dir>, 'rirs_noise']
|
| 42 |
+
|
| 43 |
+
train_data:
|
| 44 |
+
- !name:deeplab.utils.corpus.load_audio_corpus
|
| 45 |
+
['/work/zl389/workspace/LLM_ASV/data/vox2dev', ['dev']] # 5994
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
valid_data:
|
| 49 |
+
- scp_path: '/work/zl389/workspace/LLM_ASV/data/test_vox/vox1-o/wav_copy.scp'
|
| 50 |
+
trial_path: '/work/zl389/workspace/LLM_ASV/data/test_vox/vox1-o/trials'
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
peft_config: null
|
| 54 |
+
|
| 55 |
+
spk_model: !new:local.spk_model.Audio2Vec_based_Adapter
|
| 56 |
+
model_name: 'facebook/w2v-bert-2.0'
|
| 57 |
+
frozen_encoder: false
|
| 58 |
+
bnb_config: null
|
| 59 |
+
peft_config: !ref <peft_config>
|
| 60 |
+
encoder_config: 'config_prune_tea.json'
|
| 61 |
+
n_mfa_layers: -1
|
| 62 |
+
pooling_layer: 'ASP'
|
| 63 |
+
embd_dim: !ref <embd_dim>
|
| 64 |
+
adapter_dim: 128
|
| 65 |
+
dropout: 0.0
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
classifier: !new:local.spk_classifier.ArcFace
|
| 69 |
+
in_features: !ref <embd_dim>
|
| 70 |
+
out_features: 5994
|
| 71 |
+
s: 32
|
| 72 |
+
m: 0.5
|
| 73 |
+
|
| 74 |
+
modules:
|
| 75 |
+
spk_model: !ref <spk_model>
|
| 76 |
+
classifier: !ref <classifier>
|