preprocessor.json · Reza2kn/Shenava-Rizeh-v1.0-ONNX-fp16 at main

add onnx

d393dce verified 10 days ago

1.06 kB

	{
	"schema": "visualears-preprocessor-v2-nemo-fbank",
	"description": "NeMo AudioToMelSpectrogramPreprocessor-compatible feature pipeline for shenava-32m-v5.",
	"sample_rate": 16000,
	"n_fft": 512,
	"win_length": 400,
	"hop_length": 160,
	"n_mels": 80,
	"window": "hann_periodic_false",
	"center": true,
	"center_pad": 256,
	"pad_mode": "reflect",
	"preemphasis": 0.97,
	"mel_scale": "slaney/librosa.filters.mel(htk=False,norm='slaney')",
	"mel_filters_file": "mel_filters_slaney_80x257.json",
	"spectrum": "magnitude_power_2_no_fft_normalization",
	"log": "natural",
	"log_zero_guard_type": "add",
	"log_zero_guard_value": 5.960464477539063e-08,
	"normalize": "NA",
	"fixed_frames": 2005,
	"pad_value": 0.0,
	"frame_count_formula": "max(1, min(fixed_frames, floor(num_samples / hop_length) + 1))",
	"output_stride": 8,
	"usable_steps_formula": "min(encoded_lengths[0], logits_steps)",
	"ms_per_output_step": 80,
	"blank_id": 1024,
	"ctc_decode": "greedy argmax; drop blank, repeats, SentencePiece specials; join; '▁' -> space"
	}