NeMo / examples /speaker_tasks /recognition /conf /SpeakerNet_verification_3x2x256.yaml

thanks to NVIDIA ❤

7934b29 about 3 years ago

2.99 kB

	name: &name "SpeakerNet"
	sample_rate: &sample_rate 16000
	repeat: &rep 2
	dropout: &drop 0.5
	separable: &separable True
	n_filters: &n_filters 256

	model:
	train_ds:
	manifest_filepath: ???
	sample_rate: 16000
	labels: null
	batch_size: 64
	shuffle: True
	is_tarred: False
	tarred_audio_filepaths: null
	tarred_shard_strategy: "scatter"

	validation_ds:
	manifest_filepath: ???
	sample_rate: 16000
	labels: null
	batch_size: 128
	shuffle: False

	preprocessor:
	_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
	normalize: "per_feature"
	window_size: 0.02
	sample_rate: *sample_rate
	window_stride: 0.01
	window: "hann"
	features: &n_mels 64
	n_fft: 512
	frame_splicing: 1
	dither: 0.00001

	encoder:
	_target_: nemo.collections.asr.modules.ConvASREncoder
	feat_in: *n_mels
	activation: relu
	conv_mask: true

	jasper:
	- filters: *n_filters
	repeat: 1
	kernel: [3]
	stride: [1]
	dilation: [1]
	dropout: *drop
	residual: true
	separable: *separable

	- filters: *n_filters
	repeat: *rep
	kernel: [7]
	stride: [1]
	dilation: [1]
	dropout: *drop
	residual: true
	separable: *separable

	- filters: *n_filters
	repeat: *rep
	kernel: [11]
	stride: [1]
	dilation: [1]
	dropout: *drop
	residual: true
	separable: *separable

	- filters: *n_filters
	repeat: *rep
	kernel: [15]
	stride: [1]
	dilation: [1]
	dropout: *drop
	residual: true
	separable: *separable

	- filters: &enc_feat_out 1500
	repeat: 1
	kernel: [1]
	stride: [1]
	dilation: [1]
	dropout: 0.0
	residual: false
	separable: *separable

	decoder:
	_target_: nemo.collections.asr.modules.SpeakerDecoder
	feat_in: *enc_feat_out
	num_classes: 7205
	pool_mode: 'xvector'
	emb_sizes: 256

	loss:
	_target_: nemo.collections.asr.losses.angularloss.AngularSoftmaxLoss # you could also use cross-entrophy loss
	scale: 30
	margin: 0.2

	optim:
	name: sgd
	lr: .006
	weight_decay: 0.001
	momentum: 0.9

	# scheduler setup
	sched:
	name: CosineAnnealing
	warmup_ratio: 0.1
	min_lr: 0.0

	trainer:
	devices: 1 # number of gpus
	max_epochs: 200
	max_steps: -1 # computed at runtime if not set
	num_nodes: 1
	accelerator: gpu
	strategy: ddp
	accumulate_grad_batches: 1
	deterministic: True
	enable_checkpointing: False
	logger: False
	log_every_n_steps: 1 # Interval of logging.
	val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
	gradient_clip_val: 1.0

	exp_manager:
	exp_dir: null
	name: *name
	create_tensorboard_logger: True
	create_checkpoint_callback: True