jcmc
/

speechbrain-ic-slu

intent classification

Eval Results (legacy)

Model card Files Files and versions

speechbrain-ic-slu / fluent-speech-commands /Tokenizer /hparams /tokenizer_bpe51.yaml

jcmc's picture

Upload speechbrain IC model

9b333dd about 4 years ago

history blame contribute delete

1.16 kB

	# ############################################################################
	# Tokenizer: subword BPE with unigram 51
	# Training: Fluent Speech Commands
	# Authors: Abdel Heba 2021
	# ############################################################################

	output_folder: !ref results/tokenizer_bpe51/
	train_log: !ref <output_folder>/train_log.txt

	# Data files
	data_folder: !PLACEHOLDER # e,g. /localscratch/fluent_speech_commands_dataset
	train_csv: !ref <output_folder>/train.csv
	valid_csv: !ref <output_folder>/valid.csv
	skip_prep: False

	# Training parameters
	token_type: unigram # ["unigram", "bpe", "char"]
	token_output: 51 # index(blank/eos/bos/unk) = 0
	character_coverage: 1.0
	num_sequences: 10000
	csv_read: semantics


	tokenizer: !name:speechbrain.tokenizers.SentencePiece.SentencePiece
	model_dir: !ref <output_folder>
	vocab_size: !ref <token_output>
	annotation_train: !ref <train_csv>
	annotation_read: !ref <csv_read>
	model_type: !ref <token_type> # ["unigram", "bpe", "char"]
	character_coverage: !ref <character_coverage>
	num_sequences: !ref <num_sequences>
	annotation_list_to_check: [!ref <train_csv>, !ref <valid_csv>]