jcmc's picture
Upload speechbrain IC model
9b333dd
# ############################################################################
# Tokenizer: subword BPE with unigram 51
# Training: Fluent Speech Commands
# Authors: Abdel Heba 2021
# ############################################################################
output_folder: !ref results/tokenizer_bpe51/
train_log: !ref <output_folder>/train_log.txt
# Data files
data_folder: !PLACEHOLDER # e,g. /localscratch/fluent_speech_commands_dataset
train_csv: !ref <output_folder>/train.csv
valid_csv: !ref <output_folder>/valid.csv
skip_prep: False
# Training parameters
token_type: unigram # ["unigram", "bpe", "char"]
token_output: 51 # index(blank/eos/bos/unk) = 0
character_coverage: 1.0
num_sequences: 10000
csv_read: semantics
tokenizer: !name:speechbrain.tokenizers.SentencePiece.SentencePiece
model_dir: !ref <output_folder>
vocab_size: !ref <token_output>
annotation_train: !ref <train_csv>
annotation_read: !ref <csv_read>
model_type: !ref <token_type> # ["unigram", "bpe", "char"]
character_coverage: !ref <character_coverage>
num_sequences: !ref <num_sequences>
annotation_list_to_check: [!ref <train_csv>, !ref <valid_csv>]