namin72
/

SenseVoice_Finetune

Model card Files Files and versions

SenseVoice_Finetune / config.yaml

namin72's picture

Upload 2 files

d73aad3 verified 4 months ago

history blame contribute delete

2.46 kB

	encoder: SenseVoiceEncoderSmall
	encoder_conf:
	output_size: 512
	attention_heads: 4
	linear_units: 2048
	num_blocks: 50
	tp_blocks: 20
	dropout_rate: 0.1
	positional_dropout_rate: 0.1
	attention_dropout_rate: 0.1
	input_layer: pe
	pos_enc_class: SinusoidalPositionEncoder
	normalize_before: true
	kernel_size: 11
	sanm_shfit: 0
	selfattention_layer_type: sanm
	model: SenseVoiceSmall
	model_conf:
	length_normalized_loss: true
	sos: 1
	eos: 2
	ignore_id: -1
	tokenizer: SentencepiecesTokenizer
	tokenizer_conf:
	bpemodel: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall/chn_jpn_yue_eng_ko_spectok.bpe.model
	unk_symbol: <unk>
	split_with_space: true
	frontend: WavFrontend
	frontend_conf:
	fs: 16000
	window: hamming
	n_mels: 80
	frame_length: 25
	frame_shift: 10
	lfr_m: 7
	lfr_n: 6
	cmvn_file: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall/am.mvn
	dataset: SenseVoiceCTCDataset
	dataset_conf:
	index_ds: IndexDSJsonl
	batch_sampler: BatchSampler
	data_split_num: 1
	batch_type: token
	batch_size: 1200
	max_token_length: 2000
	min_token_length: 60
	max_source_length: 2000
	min_source_length: 60
	max_target_length: 200
	min_target_length: 0
	shuffle: true
	num_workers: 2
	sos: 1
	eos: 2
	IndexDSJsonl: IndexDSJsonl
	retry: 20
	sort_size: 1024
	train_conf:
	accum_grad: 1
	grad_clip: 5
	max_epoch: 500
	keep_nbest_models: 1
	avg_nbest_model: 0
	log_interval: 1
	resume: true
	validate_interval: 2000
	save_checkpoint_interval: 2000
	use_deepspeed: false
	deepspeed_config: /home/ubuntu/work/SenseVoice/deepspeed_conf/ds_stage1.json
	optim: adamw
	optim_conf:
	lr: 0.0002
	scheduler: warmuplr
	scheduler_conf:
	warmup_steps: 25000
	specaug: SpecAugLFR
	specaug_conf:
	apply_time_warp: false
	time_warp_window: 5
	time_warp_mode: bicubic
	apply_freq_mask: true
	freq_mask_width_range:
	- 0
	- 30
	lfr_rate: 6
	num_freq_mask: 1
	apply_time_mask: true
	time_mask_width_range:
	- 0
	- 12
	num_time_mask: 1
	init_param: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall/model.pt
	config: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall/config.yaml
	is_training: true
	trust_remote_code: true
	train_data_set_list: /home/ubuntu/work/SenseVoice/dataset/train_split.jsonl
	valid_data_set_list: /home/ubuntu/work/SenseVoice/dataset/val.jsonl
	output_dir: ./outputs_ep500
	model_path: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall
	device: cpu