cheoljun95
/

Speech-Articulatory-Coding

Model card Files Files and versions

Speech-Articulatory-Coding / model_multiling.yaml

cheoljun95's picture

Upload 4 files

2e6a07d verified about 1 year ago

history blame contribute delete

1.11 kB

	## Base audio configs
	normalize: true # zscore input waveforms
	sr: 16000
	ft_sr: 50
	## Source feature configs
	crepe_model: full
	device: cuda
	fmax: 550
	fmin: 50
	pitch_q: 2
	periodicity_threshold: 0.0
	reflect_loudness: false
	loudness_threshold: 0.05
	use_penn: false
	## Articulatory Inversion configs
	speech_model: microsoft/wavlm-large
	spk_ft_size: 1024
	target_layer: 9
	freqcut: 10
	## Hifi-GAN configs
	generator_configs:
	bias: true
	channels: 512
	in_channels: 14
	kernel_size: 7
	nonlinear_activation: LeakyReLU
	nonlinear_activation_params:
	negative_slope: 0.1
	out_channels: 1
	resblock_dilations:
	- - 1
	- 3
	- 5
	- - 1
	- 3
	- 5
	- - 1
	- 3
	- 5
	resblock_kernel_sizes:
	- 3
	- 7
	- 11
	spk_emb_size: 64
	upsample_kernel_sizes:
	- 16
	- 10
	- 8
	- 4
	upsample_scales:
	- 8
	- 5
	- 4
	- 2
	use_additional_convs: true
	use_weight_norm: true
	pitch_offset: 50
	pitch_rescale: 0.01
	pitch_axis: 12
	## Speaker encoder configs
	spk_emb_size: 64
	spk_target_layer: 0
	## Checkpoint Info
	all_ckpt: null
	linear_model_path: null
	generator_ckpt: null
	spk_ft_ckpt: null