Automatic Speech Recognition
NeMo
Finnish
asr
speech-recognition
canary-v2
kenlm
finnish
Eval Results (legacy)
Instructions to use RASMUS/Finnish-ASR-Canary-v2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- NeMo
How to use RASMUS/Finnish-ASR-Canary-v2 with NeMo:
import nemo.collections.asr as nemo_asr asr_model = nemo_asr.models.ASRModel.from_pretrained("RASMUS/Finnish-ASR-Canary-v2") transcriptions = asr_model.transcribe(["file.wav"]) - Notebooks
- Google Colab
- Kaggle
File size: 2,146 Bytes
e99f86f bf41f30 e99f86f bf41f30 e99f86f bf41f30 e99f86f bf41f30 e99f86f bf41f30 e99f86f bf41f30 e99f86f bf41f30 e99f86f bf41f30 e99f86f bf41f30 e99f86f bf41f30 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | from nemo.collections.asr.models import EncDecMultiTaskModel
from omegaconf import OmegaConf
import os
import argparse
def main():
parser = argparse.ArgumentParser(description="Finnish ASR Inference Example")
parser.add_argument("--audio", type=str, required=True, help="Path to the audio file (.wav)")
parser.add_argument("--model", type=str, default="models/canary-finnish.nemo", help="Path to the finetuned .nemo model")
parser.add_argument("--kenlm", type=str, default="models/kenlm_5M.nemo", help="Path to the KenLM model")
parser.add_argument("--beam_size", type=int, default=4, help="Beam size for decoding")
parser.add_argument("--pnc", type=str, default="yes", help="Enable Punctuation and Capitalization (yes/no)")
args = parser.parse_args()
# 1. Load Model and KenLM Bundle
if not os.path.exists(args.model):
print(f"Error: Model not found at {args.model}")
return
print(f"Loading model from {args.model}...")
model = EncDecMultiTaskModel.restore_from(args.model)
# Configure KenLM if provided
if args.kenlm and os.path.exists(args.kenlm):
print(f"Configuring decoding strategy with KenLM from {args.kenlm}...")
model.change_decoding_strategy(
decoding_cfg=OmegaConf.create({
'strategy': 'beam',
'beam': {
'beam_size': args.beam_size,
'ngram_lm_model': args.kenlm,
'ngram_lm_alpha': 0.2,
},
'batch_size': 1
})
)
else:
print("Using greedy decoding (no KenLM found or specified).")
# 2. Transcribe with Finnish Prompts
if not os.path.exists(args.audio):
print(f"Error: Audio sample not found at {args.audio}")
return
print(f"Transcribing {args.audio}...")
transcription = model.transcribe(
audio=[args.audio],
taskname="asr",
source_lang="fi",
target_lang="fi",
pnc=args.pnc
)
print("-" * 30)
print(f"Result: {transcription[0]}")
print("-" * 30)
if __name__ == "__main__":
main()
|