qfuxa
/

canary-mlx

speech-recognition

Model card Files Files and versions

canary-mlx / config.json

qfuxa's picture

Upload folder using huggingface_hub

d05f6ea verified 20 days ago

history blame contribute delete

1.31 kB

	{
	"preprocessor": {
	"sample_rate": 16000,
	"normalize": "per_feature",
	"window_size": 0.025,
	"window_stride": 0.01,
	"window": "hann",
	"features": 128,
	"n_fft": 512,
	"dither": 1e-05,
	"pad_to": 0,
	"pad_value": 0.0,
	"preemph": 0.97,
	"mag_power": 2.0
	},
	"encoder": {
	"feat_in": 128,
	"n_layers": 32,
	"d_model": 1024,
	"n_heads": 8,
	"ff_expansion_factor": 4,
	"subsampling_factor": 8,
	"self_attention_model": "rel_pos",
	"subsampling": "dw_striding",
	"conv_kernel_size": 9,
	"subsampling_conv_channels": 256,
	"pos_emb_max_len": 5000,
	"causal_downsampling": false,
	"use_bias": true,
	"xscaling": false,
	"subsampling_conv_chunking_factor": 1,
	"att_context_size": [
	-1,
	-1
	]
	},
	"transf_decoder": {
	"vocab_size": 16384,
	"hidden_size": 1024,
	"inner_size": 4096,
	"num_layers": 8,
	"num_attention_heads": 8,
	"pre_ln": true,
	"hidden_act": "relu",
	"pre_ln_final_layer_norm": true,
	"learn_positional_encodings": false,
	"max_sequence_length": 1024
	},
	"head": {
	"num_layers": 1,
	"hidden_size": 1024,
	"num_classes": 16384
	},
	"prompt_format": "canary2",
	"tokenizer": {
	"type": "sentencepiece",
	"model_path": "tokenizer.model"
	}
	}