| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
|
|
| import os |
| import subprocess |
| import sys |
| from pathlib import Path |
|
|
| import pytest |
|
|
| from nemo.collections.asr.parts.utils.transcribe_utils import TextProcessingConfig |
|
|
| sys.path.append(str(Path(__file__).parents[2] / 'examples' / 'asr')) |
| import speech_to_text_eval |
|
|
|
|
| @pytest.mark.parametrize( |
| 'build_args', |
| [ |
| "ensemble.0.model=stt_es_conformer_ctc_large ensemble.1.model=stt_it_conformer_ctc_large", |
| "ensemble.0.model=stt_es_conformer_transducer_large ensemble.1.model=stt_it_conformer_transducer_large", |
| ( |
| "ensemble.0.model=stt_es_fastconformer_hybrid_large_pc ensemble.1.model=stt_it_fastconformer_hybrid_large_pc " |
| "confidence.method_cfg.alpha=0.33 confidence.method_cfg.entropy_norm=exp " |
| ), |
| ( |
| "ensemble.0.model=stt_es_fastconformer_hybrid_large_pc " |
| "ensemble.1.model=stt_it_fastconformer_hybrid_large_pc " |
| "transcription.decoder_type=ctc " |
| ), |
| "ensemble.0.model=stt_es_conformer_ctc_large ensemble.1.model=stt_it_conformer_transducer_large", |
| ( |
| "ensemble.0.model=stt_es_conformer_ctc_large " |
| "ensemble.1.model=stt_it_conformer_ctc_large " |
| f"ensemble.0.dev_manifest={Path(os.getenv('TEST_DATA_PATH', '')) / 'es' / 'dev_manifest.json'} " |
| f"ensemble.1.dev_manifest={Path(os.getenv('TEST_DATA_PATH', '')) / 'it' / 'dev_manifest.json'} " |
| "tune_confidence=True " |
| ), |
| ( |
| "ensemble.0.model=stt_es_conformer_transducer_large " |
| "ensemble.1.model=stt_it_conformer_transducer_large " |
| f"ensemble.0.dev_manifest={Path(os.getenv('TEST_DATA_PATH', '')) / 'es' / 'dev_manifest.json'} " |
| f"ensemble.1.dev_manifest={Path(os.getenv('TEST_DATA_PATH', '')) / 'it' / 'dev_manifest.json'} " |
| "tune_confidence=True " |
| ), |
| ], |
| ids=( |
| [ |
| "CTC models", |
| "Transducer models", |
| "Hybrid models (Transducer mode)", |
| "Hybrid models (CTC mode)", |
| "CTC + Transducer", |
| "CTC models + confidence tuning", |
| "Transducer models + confidence tuning", |
| ] |
| ), |
| ) |
| def test_confidence_ensemble(tmp_path, build_args): |
| """Integration tests for confidence-ensembles. |
| |
| Tests building ensemble and running inference with the model. |
| To use, make sure to define TEST_DATA_PATH env variable with path to |
| the test data. The following structure is assumed: |
| |
| $TEST_DATA_PATH |
| βββ es |
| β βββ dev |
| β βββ dev_manifest.json |
| β βββ test |
| β βββ train |
| β βββ train_manifest.json |
| βββ it |
| β βββ dev |
| β βββ dev_manifest.json |
| β βββ test |
| β βββ test_manifest.json |
| β βββ train |
| β βββ train_manifest.json |
| βββ test_manifest.json |
| |
| """ |
| |
| if not os.getenv("TEST_DATA_PATH"): |
| raise ValueError("TEST_DATA_PATH env variable has to be defined!") |
|
|
| test_data_path = Path(os.environ['TEST_DATA_PATH']) |
|
|
| build_ensemble_cmd = f""" |
| python {Path(__file__).parent / 'build_ensemble.py'} \ |
| --config-name=ensemble_config.yaml \ |
| output_path={tmp_path / 'ensemble.nemo'} \ |
| {build_args} |
| """ |
| subprocess.run(build_ensemble_cmd, check=True, shell=True) |
|
|
| eval_cfg = speech_to_text_eval.EvaluationConfig( |
| dataset_manifest=str(test_data_path / 'test_manifest.json'), |
| output_filename=str(tmp_path / 'output.json'), |
| model_path=str(tmp_path / 'ensemble.nemo'), |
| text_processing=TextProcessingConfig(punctuation_marks=".,?", do_lowercase=True, rm_punctuation=True), |
| ) |
|
|
| results = speech_to_text_eval.main(eval_cfg) |
| assert results.metric_value < 0.20 |
|
|