Automatic Speech Recognition
NeMo
Finnish
asr
speech-recognition
canary-v2
kenlm
finnish
Eval Results (legacy)
Instructions to use RASMUS/Finnish-ASR-Canary-v2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- NeMo
How to use RASMUS/Finnish-ASR-Canary-v2 with NeMo:
import nemo.collections.asr as nemo_asr asr_model = nemo_asr.models.ASRModel.from_pretrained("RASMUS/Finnish-ASR-Canary-v2") transcriptions = asr_model.transcribe(["file.wav"]) - Notebooks
- Google Colab
- Kaggle
| # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| import argparse | |
| import json | |
| import signal | |
| import subprocess | |
| import requests | |
| from nemo.collections.llm import api | |
| from nemo.collections.llm.evaluation.base import wait_for_fastapi_server | |
| from nemo.utils import logging | |
| logging.setLevel(logging.INFO) | |
| deploy_process = None | |
| base_url = None | |
| chat_url = None | |
| model_name = None | |
| def parse_args(): | |
| parser = argparse.ArgumentParser(description='Evaluate model on benchmark dataset') | |
| parser.add_argument('--checkpoint_path', type=str, required=True, help='Path to the model checkpoint') | |
| parser.add_argument( | |
| '--dataset', | |
| type=str, | |
| required=True, | |
| choices=['gpqa_main', 'mmlu', 'gpqa_diamond'], | |
| help='Dataset to evaluate on (gpqa, mmlu)', | |
| ) | |
| parser.add_argument( | |
| '--output_prefix', type=str, default='evaluation_results', help='Prefix for the output file name' | |
| ) | |
| parser.add_argument( | |
| '--max_tokens', type=int, default=2048, help='Maximum number of tokens to generate in the response' | |
| ) | |
| return parser.parse_args() | |
| def create_benchmark_prompt(question, choice1, choice2, choice3, choice4): | |
| """Create benchmark prompt in the specified format""" | |
| prompt = f"""Given the following question and four candidate answers (A, B, C and D), choose the best answer. | |
| Question: {question} A. {choice1} B. {choice2} C. {choice3} D. {choice4} | |
| For simple problems, directly provide the answer with minimal explanation. For complex problems, use step-by-step format. Always conclude with: The final answer is [the_answer_letter], where the [the_answer_letter] is one of A, B, C or D.""" | |
| return prompt | |
| def load_model(checkpoint_path): | |
| """Initialize and load the model for inference""" | |
| global deploy_process, base_url, chat_url, model_name | |
| SCRIPTS_PATH = "/opt/NeMo/scripts" | |
| WORKSPACE = "." | |
| deploy_script = f"{SCRIPTS_PATH}/deploy/nlp/deploy_in_fw_oai_server_eval.py" | |
| deploy_process = subprocess.Popen( | |
| ['python', deploy_script, '--nemo_checkpoint', checkpoint_path], | |
| ) | |
| base_url = "http://0.0.0.0:8886" | |
| model_name = "triton_model" | |
| chat_url = f"{base_url}/v1/chat/completions/" | |
| wait_for_fastapi_server(base_url=base_url, max_retries=600, retry_interval=10) | |
| logging.info("Model loaded and server is ready for inference") | |
| def get_response(prompt, max_tokens): | |
| chat_payload = { | |
| "messages": [{"role": "system", "content": "detailed thinking on"}, {"role": "user", "content": prompt}], | |
| "model": model_name, | |
| "max_tokens": max_tokens, | |
| } | |
| response = requests.post(chat_url, json=chat_payload) | |
| return response.content.decode() | |
| def main(): | |
| args = parse_args() | |
| # Determine dataset file and output file based on dataset selection | |
| dataset_files = { | |
| 'gpqa_main': 'gpqa_dataset.jsonl', | |
| 'mmlu': 'mmlu_dataset_test.jsonl', | |
| 'gpqa_diamond': 'gpqa_diamond_dataset.jsonl', | |
| } | |
| dataset_file = dataset_files[args.dataset] | |
| output_file = f"{args.output_prefix}_{args.dataset}_evaluation.jsonl" | |
| try: | |
| with open(dataset_file, "r") as f: | |
| problems = [json.loads(line) for line in f] | |
| load_model(args.checkpoint_path) | |
| # Open output file once before the loop | |
| with open(output_file, "w") as f: | |
| for i, problem in enumerate(problems): | |
| print(f"\n{'='*70}") | |
| print(f"Problem {i+1}/{len(problems)}") | |
| prompt = create_benchmark_prompt( | |
| problem['Question'], | |
| problem['Choice 1'], | |
| problem['Choice 2'], | |
| problem['Choice 3'], | |
| problem['Choice 4'], | |
| ) | |
| response = get_response(prompt, args.max_tokens) | |
| # Create result entry | |
| result = { | |
| "question": problem['Question'], | |
| "choices": { | |
| "A": problem['Choice 1'], | |
| "B": problem['Choice 2'], | |
| "C": problem['Choice 3'], | |
| "D": problem['Choice 4'], | |
| }, | |
| "expected_answer": problem['Answer'], | |
| "model_response": response, | |
| } | |
| # Write to JSONL file | |
| f.write(json.dumps(result) + "\n") | |
| print(f"All results written to {output_file}") | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| finally: | |
| print("Killing the server...") | |
| deploy_process.send_signal(signal.SIGINT) | |
| if __name__ == "__main__": | |
| main() | |