niobures commited on Jun 14, 2025

Commit

74e8c79

verified ·

1 Parent(s): c17ac2b

GigaAM

Browse files

Files changed (24) hide show

.gitattributes +35 -35
Examples/Dockerfile +13 -0
Examples/README.md +126 -0
Examples/ctc_inference.py +83 -0
Examples/ctc_longform_inference.py +201 -0
Examples/emo_inference.py +81 -0
Examples/notebooks/GigaAM_CTC_Model_Usage_Example.ipynb +0 -0
Examples/notebooks/GigaAM_Emo_Model_Usage_Example.ipynb +955 -0
Examples/notebooks/GigaAM_Model_Usage_Example.ipynb +881 -0
Examples/notebooks/GigaAM_RNNT_Model_Usage_Example.ipynb +0 -0
Examples/rnnt_inference.py +98 -0
Examples/rnnt_longform_inference.py +210 -0
Examples/ssl_inference.py +55 -0
GigaAM-CTC/ctc_model_config.yaml +271 -0
GigaAM-CTC/ctc_model_weights.ckpt +3 -0
GigaAM-Emo/emo_model_config.yaml +38 -0
GigaAM-Emo/emo_model_weights.ckpt +3 -0
GigaAM-RNNT/rnnt_model_config.yaml +109 -0
GigaAM-RNNT/rnnt_model_weights.ckpt +3 -0
GigaAM/encoder_config.yaml +27 -0
GigaAM/ssl_model_weights.ckpt +3 -0
README.md +84 -0
README_ru.md +83 -0
gigaam_scheme.svg +0 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

Examples/Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM nvcr.io/nvidia/nemo:23.10
+RUN mkdir -p /workspace/data
+WORKDIR /workspace/data
+RUN wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/{ssl_model_weights.ckpt,emo_model_weights.ckpt,ctc_model_weights.ckpt,rnnt_model_weights.ckpt,ctc_model_config.yaml,emo_model_config.yaml,encoder_config.yaml,rnnt_model_config.yaml,tokenizer_all_sets.tar,example.wav,long_example.wav}
+RUN tar -xf tokenizer_all_sets.tar && rm tokenizer_all_sets.tar
+RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
+RUN pip install Cython
+RUN pip install git+https://github.com/NVIDIA/NeMo.git@1fa961ba03ab5f8c91b278640e29807079373372#egg=nemo_toolkit[all]
+RUN pip install -U soundfile
+RUN pip install pyannote.audio==3.2.0

Examples/README.md ADDED Viewed

	@@ -0,0 +1,126 @@

+* [Virtual environment](#virtual-environment)
+* [Docker](#docker)
+* For long-form inference:
+  * generate [Hugging Face API token](https://huggingface.co/docs/hub/security-tokens)
+  * accept the conditions to access [pyannote/voice-activity-detection](https://huggingface.co/pyannote/voice-activity-detection) files and content
+  * accept the conditions to access [pyannote/segmentation](https://huggingface.co/pyannote/segmentation) files and content
+## Virtual environment
+```bash
+apt install python3-dev
+apt install python3-venv
+apt install ffmpeg libavcodec-extra
+```
+```bash
+python3.10 -m venv venv && . venv/bin/activate
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
+pip install Cython
+pip install -U wheel
+pip install git+https://github.com/NVIDIA/NeMo.git@1fa961ba03ab5f8c91b278640e29807079373372#egg=nemo_toolkit[all]
+pip install pyannote.audio==3.2.0
+mkdir ./data
+wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/{ssl_model_weights.ckpt,emo_model_weights.ckpt,ctc_model_weights.ckpt,rnnt_model_weights.ckpt,ctc_model_config.yaml,emo_model_config.yaml,encoder_config.yaml,rnnt_model_config.yaml,tokenizer_all_sets.tar,example.wav,long_example.wav} -P ./data && tar -xf ./data/tokenizer_all_sets.tar --directory ./data/ && rm ./data/tokenizer_all_sets.tar
+# GigaAM
+python ssl_inference.py --encoder_config ./data/encoder_config.yaml \
+    --model_weights ./data/ssl_model_weights.ckpt --device cuda --audio_path ./data/example.wav
+# encoded signal shape: torch.Size([1, 768, 283])
+# GigaAM-CTC
+python ctc_inference.py --model_config ./data/ctc_model_config.yaml \
+    --model_weights ./data/ctc_model_weights.ckpt --device cuda --audio_path ./data/example.wav
+# transcription: ничьих не требуя похвал счастлив уж я надеждой сладкой что дева с трепетом любви посмотрит может быть украдкой на песни грешные мои у лукоморья дуб зеленый
+# GigaAM-CTC long-form
+python ctc_longform_inference.py --model_config ./data/ctc_model_config.yaml \
+    --model_weights ./data/ctc_model_weights.ckpt --device cuda \
+    --audio_path ./data/long_example.wav --hf_token <YOUR_HF_TOKEN>
+# [00:00:00 - 00:16:83]: вечерня отошла давно но в кельях тихо и темно уже и сам эгумин строгий свои молитвы прекратил и кости ветхие склонил перекрестясь на одр убогий кругом и сон и тишина но церкви дверь отворена
+# [00:17:10 - 00:32:61]: трепещет луч лампады и тускло озаряет он и темную живопись икон и возлощенные оклады и раздается в тишине то тяжкий вздох то шепот важный и мрачно дремлет в вашине старинный свод
+# ...
+# GigaAM-RNNT
+python rnnt_inference.py --model_config ./data/rnnt_model_config.yaml \
+    --model_weights ./data/rnnt_model_weights.ckpt --tokenizer_path ./data/tokenizer_all_sets \
+    --device cuda --audio_path ./data/example.wav
+# transcription: ничьих не требуя похвал счастлив уж я надеждой сладкой что дева с трепетом любви посмотрит может быть украдкой на песни грешные мои у лукоморья дуб зеленый
+# GigaAM-RNNT long-form
+python rnnt_longform_inference.py --model_config ./data/rnnt_model_config.yaml \
+    --model_weights ./data/rnnt_model_weights.ckpt --tokenizer_path ./data/tokenizer_all_sets \
+    --device cuda --audio_path ./data/long_example.wav --hf_token <YOUR_HF_TOKEN>
+# [00:00:00 - 00:16:83]: вечерня отошла давно но в кельях тихо и темно уже и сам игумин строгий свои молитвы прекратил и кости ветхие склонил перекрестясь на одр убогий кругом и сон и тишина но церкви дверь отворена
+# [00:17:10 - 00:32:61]: трепещет луч лампады и тускло озаряет он и темну живопись икон и возлащенные оклады и раздается в тишине то тяжкий вздох то шепот важный и мрачно дремлет в вышине старинный свод
+# ...
+# GigaAM-Emo
+python emo_inference.py --model_config ./data/emo_model_config.yaml \
+    --model_weights ./data/emo_model_weights.ckpt --device cuda --audio_path ./data/example.wav
+# angry: 0.000, sad: 0.002, neutral: 0.923, positive: 0.074
+```
+## Docker
+```bash
+docker build -t gigaam_image .
+# GigaAM
+docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
+    python /workspace/gigaam/ssl_inference.py --encoder_config /workspace/data/encoder_config.yaml \
+    --model_weights /workspace/data/ssl_model_weights.ckpt \
+    --device cuda --audio_path /workspace/data/example.wav
+# encoded signal shape: torch.Size([1, 768, 283])
+# GigaAM-CTC
+docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
+    python /workspace/gigaam/ctc_inference.py --model_config /workspace/data/ctc_model_config.yaml \
+    --model_weights /workspace/data/ctc_model_weights.ckpt \
+    --device cuda --audio_path /workspace/data/example.wav
+# transcription: ничьих не требуя похвал счастлив уж я надеждой сладкой что дева с трепетом любви посмотрит может быть украдкой на песни грешные мои у лукоморья дуб зеленый
+# GigaAM-CTC longform
+docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
+    python /workspace/gigaam/ctc_longform_inference.py --model_config /workspace/data/ctc_model_config.yaml \
+    --model_weights /workspace/data/ctc_model_weights.ckpt --device cuda \
+    --audio_path /workspace/data/long_example.wav --hf_token <YOUR_HF_TOKEN>
+# [00:00:00 - 00:16:83]: вечерня отошла давно но в кельях тихо и темно уже и сам эгумин строгий свои молитвы прекратил и кости ветхие склонил перекрестясь на одр убогий кругом и сон и тишина но церкви дверь отворена
+# [00:17:10 - 00:32:61]: трепещет луч лампады и тускло озаряет он и темную живопись икон и возлощенные оклады и раздается в тишине то тяжкий вздох то шепот важный и мрачно дремлет в вашине старинный свод
+# ...
+# GigaAM-RNNT
+docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
+    python /workspace/gigaam/rnnt_inference.py --model_config /workspace/data/rnnt_model_config.yaml \
+    --model_weights /workspace/data/rnnt_model_weights.ckpt --tokenizer_path /workspace/data/tokenizer_all_sets \
+    --device cuda --audio_path /workspace/data/example.wav
+# transcription: ничьих не требуя похвал счастлив уж я надеждой сладкой что дева с трепетом любви посмотрит может быть украдкой на песни грешные мои у лукоморья дуб зеленый
+# GigaAM-RNNT longform
+docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
+    python /workspace/gigaam/rnnt_longform_inference.py --model_config /workspace/data/rnnt_model_config.yaml \
+    --model_weights /workspace/data/rnnt_model_weights.ckpt --tokenizer_path /workspace/data/tokenizer_all_sets \
+    --device cuda --audio_path /workspace/data/long_example.wav --hf_token <YOUR_HF_TOKEN>
+# [00:00:00 - 00:16:83]: вечерня отошла давно но в кельях тихо и темно уже и сам игумин строгий свои молитвы прекратил и кости ветхие склонил перекрестясь на одр убогий кругом и сон и тишина но церкви дверь отворена
+# [00:17:10 - 00:32:61]: трепещет луч лампады и тускло озаряет он и темну живопись икон и возлащенные оклады и раздается в тишине то тяжкий вздох то шепот важный и мрачно дремлет в вышине старинный свод
+# ...
+# GigaAM-Emo
+docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
+    python /workspace/gigaam/emo_inference.py --model_config /workspace/data/emo_model_config.yaml \
+    --model_weights /workspace/data/emo_model_weights.ckpt \
+    --device cuda --audio_path /workspace/data/example.wav
+# angry: 0.000, sad: 0.002, neutral: 0.923, positive: 0.074
+```

Examples/ctc_inference.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import argparse
+import torch
+import torchaudio
+from nemo.collections.asr.models import EncDecCTCModel
+from nemo.collections.asr.modules.audio_preprocessing import (
+    AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
+)
+from nemo.collections.asr.parts.preprocessing.features import (
+    FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
+)
+class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
+    def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
+        if "window_size" in kwargs:
+            del kwargs["window_size"]
+        if "window_stride" in kwargs:
+            del kwargs["window_stride"]
+        super().__init__(**kwargs)
+        self._mel_spec_extractor = torchaudio.transforms.MelSpectrogram(
+            sample_rate=self._sample_rate,
+            win_length=self.win_length,
+            hop_length=self.hop_length,
+            n_mels=kwargs["nfilt"],
+            window_fn=self.torch_windows[kwargs["window"]],
+            mel_scale=mel_scale,
+            norm=kwargs["mel_norm"],
+            n_fft=kwargs["n_fft"],
+            f_max=kwargs.get("highfreq", None),
+            f_min=kwargs.get("lowfreq", 0),
+            wkwargs=wkwargs,
+        )
+class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
+    def __init__(self, mel_scale: str = "htk", **kwargs):
+        super().__init__(**kwargs)
+        kwargs["nfilt"] = kwargs["features"]
+        del kwargs["features"]
+        self.featurizer = (
+            FilterbankFeaturesTA(  # Deprecated arguments; kept for config compatibility
+                mel_scale=mel_scale,
+                **kwargs,
+            )
+        )
+def _parse_args():
+    parser = argparse.ArgumentParser(
+        description="Run inference using GigaAM-CTC checkpoint"
+    )
+    parser.add_argument("--model_config", help="Path to GigaAM-CTC config file (.yaml)")
+    parser.add_argument(
+        "--model_weights", help="Path to GigaAM-CTC checkpoint file (.ckpt)"
+    )
+    parser.add_argument("--audio_path", help="Path to audio signal")
+    parser.add_argument("--device", help="Device: cpu / cuda")
+    return parser.parse_args()
+def main(model_config: str, model_weights: str, device: str, audio_path: str):
+    model = EncDecCTCModel.from_config_file(model_config)
+    ckpt = torch.load(model_weights, map_location="cpu")
+    model.load_state_dict(ckpt, strict=False)
+    model = model.to(device)
+    model.eval()
+    transcription = model.transcribe([audio_path])[0]
+    print(f"transcription: {transcription}")
+if __name__ == "__main__":
+    args = _parse_args()
+    main(
+        model_config=args.model_config,
+        model_weights=args.model_weights,
+        device=args.device,
+        audio_path=args.audio_path,
+    )

Examples/ctc_longform_inference.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import argparse
+from io import BytesIO
+from typing import List, Tuple
+import numpy as np
+import torch
+import torchaudio
+from nemo.collections.asr.models import EncDecCTCModel
+from nemo.collections.asr.modules.audio_preprocessing import (
+    AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
+)
+from nemo.collections.asr.parts.preprocessing.features import (
+    FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
+)
+from pyannote.audio import Pipeline
+from pydub import AudioSegment
+class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
+    def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
+        if "window_size" in kwargs:
+            del kwargs["window_size"]
+        if "window_stride" in kwargs:
+            del kwargs["window_stride"]
+        super().__init__(**kwargs)
+        self._mel_spec_extractor = torchaudio.transforms.MelSpectrogram(
+            sample_rate=self._sample_rate,
+            win_length=self.win_length,
+            hop_length=self.hop_length,
+            n_mels=kwargs["nfilt"],
+            window_fn=self.torch_windows[kwargs["window"]],
+            mel_scale=mel_scale,
+            norm=kwargs["mel_norm"],
+            n_fft=kwargs["n_fft"],
+            f_max=kwargs.get("highfreq", None),
+            f_min=kwargs.get("lowfreq", 0),
+            wkwargs=wkwargs,
+        )
+class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
+    def __init__(self, mel_scale: str = "htk", **kwargs):
+        super().__init__(**kwargs)
+        kwargs["nfilt"] = kwargs["features"]
+        del kwargs["features"]
+        self.featurizer = (
+            FilterbankFeaturesTA(  # Deprecated arguments; kept for config compatibility
+                mel_scale=mel_scale,
+                **kwargs,
+            )
+        )
+def audiosegment_to_numpy(audiosegment: AudioSegment) -> np.ndarray:
+    """Convert AudioSegment to numpy array."""
+    samples = np.array(audiosegment.get_array_of_samples())
+    if audiosegment.channels == 2:
+        samples = samples.reshape((-1, 2))
+    samples = samples.astype(np.float32, order="C") / 32768.0
+    return samples
+def format_time(seconds: float) -> str:
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    seconds = seconds % 60
+    full_seconds = int(seconds)
+    milliseconds = int((seconds - full_seconds) * 100)
+    if hours > 0:
+        return f"{hours:02}:{minutes:02}:{full_seconds:02}:{milliseconds:02}"
+    else:
+        return f"{minutes:02}:{full_seconds:02}:{milliseconds:02}"
+def segment_audio(
+    audio_path: str,
+    pipeline: Pipeline,
+    max_duration: float = 22.0,
+    min_duration: float = 15.0,
+    new_chunk_threshold: float = 0.2,
+) -> Tuple[List[np.ndarray], List[List[float]]]:
+    # Prepare audio for pyannote vad pipeline
+    audio = AudioSegment.from_wav(audio_path)
+    audio_bytes = BytesIO()
+    audio.export(audio_bytes, format="wav")
+    audio_bytes.seek(0)
+    # Process audio with pipeline to obtain segments with speech activity
+    sad_segments = pipeline({"uri": "filename", "audio": audio_bytes})
+    segments = []
+    curr_duration = 0
+    curr_start = 0
+    curr_end = 0
+    boundaries = []
+    # Concat segments from pipeline into chunks for asr according to max/min duration
+    for segment in sad_segments.get_timeline().support():
+        start = max(0, segment.start)
+        end = min(len(audio) / 1000, segment.end)
+        if (
+            curr_duration > min_duration and start - curr_end > new_chunk_threshold
+        ) or (curr_duration + (end - curr_end) > max_duration):
+            audio_segment = audiosegment_to_numpy(
+                audio[curr_start * 1000 : curr_end * 1000]
+            )
+            segments.append(audio_segment)
+            boundaries.append([curr_start, curr_end])
+            curr_start = start
+        curr_end = end
+        curr_duration = curr_end - curr_start
+    if curr_duration != 0:
+        audio_segment = audiosegment_to_numpy(
+            audio[curr_start * 1000 : curr_end * 1000]
+        )
+        segments.append(audio_segment)
+        boundaries.append([curr_start, curr_end])
+    return segments, boundaries
+def _parse_args():
+    parser = argparse.ArgumentParser(
+        description="Run long-form inference using GigaAM-CTC checkpoint"
+    )
+    parser.add_argument("--model_config", help="Path to GigaAM-CTC config file (.yaml)")
+    parser.add_argument(
+        "--model_weights", help="Path to GigaAM-CTC checkpoint file (.ckpt)"
+    )
+    parser.add_argument("--audio_path", help="Path to audio signal")
+    parser.add_argument(
+        "--hf_token", help="HuggingFace token for using pyannote Pipeline"
+    )
+    parser.add_argument("--device", help="Device: cpu / cuda")
+    parser.add_argument("--fp16", help="Run in FP16 mode", default=True)
+    parser.add_argument(
+        "--batch_size", help="Batch size for acoustic model inference", default=10
+    )
+    return parser.parse_args()
+def main(
+    model_config: str,
+    model_weights: str,
+    device: str,
+    audio_path: str,
+    hf_token: str,
+    fp16: bool,
+    batch_size: int = 10,
+):
+    # Initialize model
+    model = EncDecCTCModel.from_config_file(model_config)
+    ckpt = torch.load(model_weights, map_location="cpu")
+    model.load_state_dict(ckpt, strict=False)
+    model = model.to(device)
+    if device != "cpu" and fp16:
+        model = model.half()
+        model.preprocessor = model.preprocessor.float()
+    model.eval()
+    # Initialize pyannote pipeline
+    pipeline = Pipeline.from_pretrained(
+        "pyannote/voice-activity-detection", use_auth_token=hf_token
+    )
+    pipeline = pipeline.to(torch.device(device))
+    # Segment audio
+    segments, boundaries = segment_audio(audio_path, pipeline)
+    # Transcribe segments
+    transcriptions = []
+    if device != "cpu" and fp16:
+        with torch.autocast(device_type="cuda", dtype=torch.float16):
+            transcriptions = model.transcribe(segments, batch_size=batch_size)
+    else:
+        transcriptions = model.transcribe(segments, batch_size=batch_size)
+    for transcription, boundary in zip(transcriptions, boundaries):
+        print(
+            f"[{format_time(boundary[0])} - {format_time(boundary[1])}]: {transcription}\n"
+        )
+if __name__ == "__main__":
+    args = _parse_args()
+    main(
+        model_config=args.model_config,
+        model_weights=args.model_weights,
+        device=args.device,
+        audio_path=args.audio_path,
+        hf_token=args.hf_token,
+        fp16=args.fp16,
+        batch_size=args.batch_size,
+    )

Examples/emo_inference.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import argparse
+from typing import List, Union
+import hydra
+import soundfile
+import torch
+from omegaconf import DictConfig, ListConfig, OmegaConf
+class SpecScaler(torch.nn.Module):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.log(x.clamp_(1e-9, 1e9))
+class GigaAMEmo(torch.nn.Module):
+    def __init__(self, conf: Union[DictConfig, ListConfig]):
+        super().__init__()
+        self.id2name = conf.id2name
+        self.feature_extractor = hydra.utils.instantiate(conf.feature_extractor)
+        self.conformer = hydra.utils.instantiate(conf.encoder)
+        self.linear_head = hydra.utils.instantiate(conf.classification_head)
+    @property
+    def device(self):
+        return next(self.parameters()).device
+    def forward(self, features, features_length=None):
+        if features.dim() == 2:
+            features = features.unsqueeze(0)
+        if not features_length:
+            features_length = torch.ones(features.shape[0], device=self.device) * features.shape[-1]
+        encoded, _ = self.conformer(audio_signal=features, length=features_length)
+        encoded_pooled = torch.nn.functional.avg_pool1d(
+            encoded, kernel_size=encoded.shape[-1]
+        ).squeeze(-1)
+        logits = self.linear_head(encoded_pooled)
+        return logits
+    def get_probs(self, audio_path: str) -> List[List[float]]:
+        audio_signal, _ = soundfile.read(audio_path, dtype="float32")
+        audio_tensor = torch.tensor(audio_signal).float().to(self.device)
+        features = self.feature_extractor(audio_tensor)
+        logits = self.forward(features)
+        probs = torch.nn.functional.softmax(logits, dim=1).detach().tolist()
+        return probs
+def _parse_args():
+    parser = argparse.ArgumentParser(
+        description="Run inference using GigaAM-Emo checkpoint"
+    )
+    parser.add_argument("--model_config", help="Path to GigaAM-Emo config file (.yaml)")
+    parser.add_argument(
+        "--model_weights", help="Path to GigaAM-Emo checkpoint file (.ckpt)"
+    )
+    parser.add_argument("--audio_path", help="Path to audio signal")
+    parser.add_argument("--device", help="Device: cpu / cuda")
+    return parser.parse_args()
+def main(model_config: str, model_weights: str, device: str, audio_path: str):
+    conf = OmegaConf.load(model_config)
+    model = GigaAMEmo(conf)
+    ckpt = torch.load(model_weights, map_location="cpu")
+    model.load_state_dict(ckpt, strict=False)
+    model = model.to(device)
+    model.eval()
+    with torch.no_grad():
+        probs = model.get_probs(audio_path)[0]
+    print(", ".join([f"{model.id2name[i]}: {p:.3f}" for i, p in enumerate(probs)]))
+if __name__ == "__main__":
+    args = _parse_args()
+    main(
+        model_config=args.model_config,
+        model_weights=args.model_weights,
+        device=args.device,
+        audio_path=args.audio_path,
+    )

Examples/notebooks/GigaAM_CTC_Model_Usage_Example.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Examples/notebooks/GigaAM_Emo_Model_Usage_Example.ipynb ADDED Viewed

	@@ -0,0 +1,955 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "oREzT-effoFr"
+   },
+   "source": [
+    "### Installing and importing dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "yxU1SOPfWxab",
+    "outputId": "e9b2c73a-d3d4-4ba9-8ce1-3527c95c4d3f"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting wget\n",
+      "  Downloading wget-3.2.zip (10 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Building wheels for collected packages: wget\n",
+      "  Building wheel for wget (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9656 sha256=2e82f0e3a185ee764cf0a1eef86b3f525139a342d3630e878d05860de80d6dee\n",
+      "  Stored in directory: /root/.cache/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769\n",
+      "Successfully built wget\n",
+      "Installing collected packages: wget\n",
+      "Successfully installed wget-3.2\n",
+      "Reading package lists... Done\n",
+      "Building dependency tree... Done\n",
+      "Reading state information... Done\n",
+      "libsndfile1 is already the newest version (1.0.31-2ubuntu0.1).\n",
+      "ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).\n",
+      "The following additional packages will be installed:\n",
+      "  libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base libsox3 libwavpack1\n",
+      "Suggested packages:\n",
+      "  libsox-fmt-all\n",
+      "The following NEW packages will be installed:\n",
+      "  libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base libsox3 libwavpack1 sox\n",
+      "0 upgraded, 7 newly installed, 0 to remove and 45 not upgraded.\n",
+      "Need to get 617 kB of archives.\n",
+      "After this operation, 1,764 kB of additional disk space will be used.\n",
+      "Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrnb0 amd64 0.1.5-1 [94.8 kB]\n",
+      "Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrwb0 amd64 0.1.5-1 [49.1 kB]\n",
+      "Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox3 amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [240 kB]\n",
+      "Get:4 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-alsa amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [11.2 kB]\n",
+      "Get:5 http://archive.ubuntu.com/ubuntu jammy/main amd64 libwavpack1 amd64 5.4.0-1build2 [83.7 kB]\n",
+      "Get:6 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-base amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [33.7 kB]\n",
+      "Get:7 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 sox amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [104 kB]\n",
+      "Fetched 617 kB in 0s (2,444 kB/s)\n",
+      "Selecting previously unselected package libopencore-amrnb0:amd64.\n",
+      "(Reading database ... 121918 files and directories currently installed.)\n",
+      "Preparing to unpack .../0-libopencore-amrnb0_0.1.5-1_amd64.deb ...\n",
+      "Unpacking libopencore-amrnb0:amd64 (0.1.5-1) ...\n",
+      "Selecting previously unselected package libopencore-amrwb0:amd64.\n",
+      "Preparing to unpack .../1-libopencore-amrwb0_0.1.5-1_amd64.deb ...\n",
+      "Unpacking libopencore-amrwb0:amd64 (0.1.5-1) ...\n",
+      "Selecting previously unselected package libsox3:amd64.\n",
+      "Preparing to unpack .../2-libsox3_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
+      "Unpacking libsox3:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Selecting previously unselected package libsox-fmt-alsa:amd64.\n",
+      "Preparing to unpack .../3-libsox-fmt-alsa_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
+      "Unpacking libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Selecting previously unselected package libwavpack1:amd64.\n",
+      "Preparing to unpack .../4-libwavpack1_5.4.0-1build2_amd64.deb ...\n",
+      "Unpacking libwavpack1:amd64 (5.4.0-1build2) ...\n",
+      "Selecting previously unselected package libsox-fmt-base:amd64.\n",
+      "Preparing to unpack .../5-libsox-fmt-base_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
+      "Unpacking libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Selecting previously unselected package sox.\n",
+      "Preparing to unpack .../6-sox_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
+      "Unpacking sox (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Setting up libsox3:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Setting up libopencore-amrwb0:amd64 (0.1.5-1) ...\n",
+      "Setting up libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Setting up libwavpack1:amd64 (5.4.0-1build2) ...\n",
+      "Setting up libopencore-amrnb0:amd64 (0.1.5-1) ...\n",
+      "Setting up libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Setting up sox (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Processing triggers for man-db (2.10.2-1) ...\n",
+      "Processing triggers for libc-bin (2.35-0ubuntu3.4) ...\n",
+      "/sbin/ldconfig.real: /usr/local/lib/libtbb.so.12 is not a symbolic link\n",
+      "\n",
+      "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc_proxy.so.2 is not a symbolic link\n",
+      "\n",
+      "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_5.so.3 is not a symbolic link\n",
+      "\n",
+      "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_0.so.3 is not a symbolic link\n",
+      "\n",
+      "/sbin/ldconfig.real: /usr/local/lib/libtbbbind.so.3 is not a symbolic link\n",
+      "\n",
+      "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc.so.2 is not a symbolic link\n",
+      "\n",
+      "\u001b[33mDEPRECATION: git+https://github.com/NVIDIA/NeMo.git#egg=nemo_toolkit[all] contains an egg fragment with a non-PEP 508 name pip 25.0 will enforce this behaviour change. A possible replacement is to use the req @ url syntax, and remove the egg fragment. Discussion can be found at https://github.com/pypa/pip/issues/11617\u001b[0m\u001b[33m\n",
+      "\u001b[0mCollecting nemo_toolkit[all]\n",
+      "  Cloning https://github.com/NVIDIA/NeMo.git to /tmp/pip-install-unbwo6dj/nemo-toolkit_de6e0e6e28ce411cafb3187496bb4905\n",
+      "  Running command git clone --filter=blob:none --quiet https://github.com/NVIDIA/NeMo.git /tmp/pip-install-unbwo6dj/nemo-toolkit_de6e0e6e28ce411cafb3187496bb4905\n",
+      "  Resolved https://github.com/NVIDIA/NeMo.git to commit 1fa961ba03ab5f8c91b278640e29807079373372\n",
+      "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+      "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+      "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting fiddle (from nemo_toolkit[all])\n",
+      "  Downloading fiddle-0.3.0-py3-none-any.whl (419 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m419.8/419.8 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: huggingface-hub>=0.20.3 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.23.1)\n",
+      "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.58.1)\n",
+      "Requirement already satisfied: numpy>=1.22 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.25.2)\n",
+      "Collecting onnx>=1.7.0 (from nemo_toolkit[all])\n",
+      "  Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m34.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.8.2)\n",
+      "Collecting ruamel.yaml (from nemo_toolkit[all])\n",
+      "  Downloading ruamel.yaml-0.18.6-py3-none-any.whl (117 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.8/117.8 kB\u001b[0m \u001b[31m14.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.2.2)\n",
+      "Requirement already satisfied: setuptools>=65.5.1 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (67.7.2)\n",
+      "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.15.2)\n",
+      "Requirement already satisfied: text-unidecode in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.3)\n",
+      "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.3.0+cu121)\n",
+      "Requirement already satisfied: tqdm>=4.41.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (4.66.4)\n",
+      "Requirement already satisfied: wget in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.2)\n",
+      "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.14.1)\n",
+      "Collecting black~=24.3 (from nemo_toolkit[all])\n",
+      "  Downloading black-24.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m53.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting click==8.0.2 (from nemo_toolkit[all])\n",
+      "  Downloading click-8.0.2-py3-none-any.whl (97 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.6/97.6 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting isort<6.0.0,>5.1.0 (from nemo_toolkit[all])\n",
+      "  Downloading isort-5.13.2-py3-none-any.whl (92 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.3/92.3 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting parameterized (from nemo_toolkit[all])\n",
+      "  Downloading parameterized-0.9.0-py2.py3-none-any.whl (20 kB)\n",
+      "Requirement already satisfied: pytest in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.4.4)\n",
+      "Collecting pytest-mock (from nemo_toolkit[all])\n",
+      "  Downloading pytest_mock-3.14.0-py3-none-any.whl (9.9 kB)\n",
+      "Collecting pytest-runner (from nemo_toolkit[all])\n",
+      "  Downloading pytest_runner-6.0.1-py3-none-any.whl (7.2 kB)\n",
+      "Requirement already satisfied: sphinx in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (5.0.2)\n",
+      "Collecting sphinxcontrib-bibtex (from nemo_toolkit[all])\n",
+      "  Downloading sphinxcontrib_bibtex-2.6.2-py3-none-any.whl (40 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting wandb (from nemo_toolkit[all])\n",
+      "  Downloading wandb-0.17.0-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.7/6.7 MB\u001b[0m \u001b[31m59.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.2.1)\n",
+      "Collecting hydra-core<=1.3.2,>1.3 (from nemo_toolkit[all])\n",
+      "  Downloading hydra_core-1.3.2-py3-none-any.whl (154 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.5/154.5 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting omegaconf<=2.3 (from nemo_toolkit[all])\n",
+      "  Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pytorch-lightning>=2.2.1 (from nemo_toolkit[all])\n",
+      "  Downloading pytorch_lightning-2.2.5-py3-none-any.whl (802 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m802.3/802.3 kB\u001b[0m \u001b[31m45.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting torchmetrics>=0.11.0 (from nemo_toolkit[all])\n",
+      "  Downloading torchmetrics-1.4.0.post0-py3-none-any.whl (868 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m868.8/868.8 kB\u001b[0m \u001b[31m58.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting transformers<=4.40.2,>=4.36.0 (from nemo_toolkit[all])\n",
+      "  Downloading transformers-4.40.2-py3-none-any.whl (9.0 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.0/9.0 MB\u001b[0m \u001b[31m76.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting webdataset>=0.2.86 (from nemo_toolkit[all])\n",
+      "  Downloading webdataset-0.2.86-py3-none-any.whl (70 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.4/70.4 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting datasets (from nemo_toolkit[all])\n",
+      "  Downloading datasets-2.19.1-py3-none-any.whl (542 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m542.0/542.0 kB\u001b[0m \u001b[31m46.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: inflect in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.0.0)\n",
+      "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.0.3)\n",
+      "Collecting sacremoses>=0.0.43 (from nemo_toolkit[all])\n",
+      "  Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m897.5/897.5 kB\u001b[0m \u001b[31m58.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: sentencepiece<1.0.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.1.99)\n",
+      "Collecting braceexpand (from nemo_toolkit[all])\n",
+      "  Downloading braceexpand-0.1.7-py2.py3-none-any.whl (5.9 kB)\n",
+      "Requirement already satisfied: editdistance in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.6.2)\n",
+      "Collecting einops (from nemo_toolkit[all])\n",
+      "  Downloading einops-0.8.0-py3-none-any.whl (43 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.2/43.2 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting g2p-en (from nemo_toolkit[all])\n",
+      "  Downloading g2p_en-2.1.0-py3-none-any.whl (3.1 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m78.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: ipywidgets in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.7.1)\n",
+      "Collecting jiwer (from nemo_toolkit[all])\n",
+      "  Downloading jiwer-3.0.4-py3-none-any.whl (21 kB)\n",
+      "Collecting kaldi-python-io (from nemo_toolkit[all])\n",
+      "  Downloading kaldi-python-io-1.2.2.tar.gz (8.8 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting kaldiio (from nemo_toolkit[all])\n",
+      "  Downloading kaldiio-2.18.0-py3-none-any.whl (28 kB)\n",
+      "Collecting lhotse>=1.22.0 (from nemo_toolkit[all])\n",
+      "  Downloading lhotse-1.23.0-py3-none-any.whl (772 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m772.4/772.4 kB\u001b[0m \u001b[31m46.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: librosa>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.10.2.post1)\n",
+      "Collecting marshmallow (from nemo_toolkit[all])\n",
+      "  Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.7.1)\n",
+      "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (24.0)\n",
+      "Collecting pyannote.core (from nemo_toolkit[all])\n",
+      "  Downloading pyannote.core-5.0.0-py3-none-any.whl (58 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.5/58.5 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pyannote.metrics (from nemo_toolkit[all])\n",
+      "  Downloading pyannote.metrics-3.2.1-py3-none-any.whl (51 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.4/51.4 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pydub (from nemo_toolkit[all])\n",
+      "  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
+      "Collecting pyloudnorm (from nemo_toolkit[all])\n",
+      "  Downloading pyloudnorm-0.1.1-py3-none-any.whl (9.6 kB)\n",
+      "Collecting resampy (from nemo_toolkit[all])\n",
+      "  Downloading resampy-0.4.3-py3-none-any.whl (3.1 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m82.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.11.4)\n",
+      "Requirement already satisfied: soundfile in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.12.1)\n",
+      "Collecting sox (from nemo_toolkit[all])\n",
+      "  Downloading sox-1.5.0.tar.gz (63 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.9/63.9 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting texterrors (from nemo_toolkit[all])\n",
+      "  Downloading texterrors-0.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m64.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting accelerated-scan (from nemo_toolkit[all])\n",
+      "  Downloading accelerated_scan-0.2.0-py3-none-any.whl (11 kB)\n",
+      "Collecting boto3 (from nemo_toolkit[all])\n",
+      "  Downloading boto3-1.34.113-py3-none-any.whl (139 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.3/139.3 kB\u001b[0m \u001b[31m16.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting causal-conv1d>=1.2.0 (from nemo_toolkit[all])\n",
+      "  Downloading causal_conv1d-1.2.2.post1.tar.gz (7.2 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting faiss-cpu (from nemo_toolkit[all])\n",
+      "  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m45.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting fasttext (from nemo_toolkit[all])\n",
+      "  Downloading fasttext-0.9.2.tar.gz (68 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m68.8/68.8 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting flask-restful (from nemo_toolkit[all])\n",
+      "  Downloading Flask_RESTful-0.3.10-py2.py3-none-any.whl (26 kB)\n",
+      "Collecting ftfy (from nemo_toolkit[all])\n",
+      "  Downloading ftfy-6.2.0-py3-none-any.whl (54 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.4/54.4 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: gdown in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (5.1.0)\n",
+      "Requirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.9.0)\n",
+      "Collecting ijson (from nemo_toolkit[all])\n",
+      "  Downloading ijson-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (111 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.8/111.8 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.42.1)\n",
+      "Collecting markdown2 (from nemo_toolkit[all])\n",
+      "  Downloading markdown2-2.4.13-py2.py3-none-any.whl (41 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.3/41.3 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: nltk>=3.6.5 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.8.1)\n",
+      "Collecting opencc<1.1.7 (from nemo_toolkit[all])\n",
+      "  Downloading OpenCC-1.1.6-cp310-cp310-manylinux1_x86_64.whl (778 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m778.3/778.3 kB\u001b[0m \u001b[31m54.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pangu (from nemo_toolkit[all])\n",
+      "  Downloading pangu-4.0.6.1-py3-none-any.whl (6.4 kB)\n",
+      "Collecting rapidfuzz (from nemo_toolkit[all])\n",
+      "  Downloading rapidfuzz-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m83.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting rouge-score (from nemo_toolkit[all])\n",
+      "  Downloading rouge_score-0.1.2.tar.gz (17 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting sacrebleu (from nemo_toolkit[all])\n",
+      "  Downloading sacrebleu-2.4.2-py3-none-any.whl (106 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.7/106.7 kB\u001b[0m \u001b[31m11.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting sentence-transformers (from nemo_toolkit[all])\n",
+      "  Downloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: tensorstore<0.1.46 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.1.45)\n",
+      "Collecting zarr (from nemo_toolkit[all])\n",
+      "  Downloading zarr-2.18.2-py3-none-any.whl (210 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m210.2/210.2 kB\u001b[0m \u001b[31m22.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting attrdict (from nemo_toolkit[all])\n",
+      "  Downloading attrdict-2.0.1-py2.py3-none-any.whl (9.9 kB)\n",
+      "Collecting kornia (from nemo_toolkit[all])\n",
+      "  Downloading kornia-0.7.2-py2.py3-none-any.whl (825 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m825.4/825.4 kB\u001b[0m \u001b[31m57.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pypinyin (from nemo_toolkit[all])\n",
+      "  Downloading pypinyin-0.51.0-py2.py3-none-any.whl (1.4 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m61.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pypinyin-dict (from nemo_toolkit[all])\n",
+      "  Downloading pypinyin_dict-0.8.0-py2.py3-none-any.whl (9.5 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.5/9.5 MB\u001b[0m \u001b[31m88.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting progress>=1.5 (from nemo_toolkit[all])\n",
+      "  Downloading progress-1.6.tar.gz (7.8 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Requirement already satisfied: tabulate>=0.8.7 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.9.0)\n",
+      "Collecting textdistance>=4.1.5 (from nemo_toolkit[all])\n",
+      "  Downloading textdistance-4.6.2-py3-none-any.whl (31 kB)\n",
+      "Collecting addict (from nemo_toolkit[all])\n",
+      "  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
+      "Collecting clip (from nemo_toolkit[all])\n",
+      "  Downloading clip-0.2.0.tar.gz (5.5 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting decord (from nemo_toolkit[all])\n",
+      "  Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.6/13.6 MB\u001b[0m \u001b[31m74.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting diffusers>=0.19.3 (from nemo_toolkit[all])\n",
+      "  Downloading diffusers-0.28.0-py3-none-any.whl (2.2 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m77.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting einops-exts (from nemo_toolkit[all])\n",
+      "  Downloading einops_exts-0.0.4-py3-none-any.whl (3.9 kB)\n",
+      "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.31.6)\n",
+      "Collecting nerfacc>=0.5.3 (from nemo_toolkit[all])\n",
+      "  Downloading nerfacc-0.5.3-py3-none-any.whl (54 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.6/54.6 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting open-clip-torch (from nemo_toolkit[all])\n",
+      "  Downloading open_clip_torch-2.24.0-py3-none-any.whl (1.5 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m74.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting PyMCubes (from nemo_toolkit[all])\n",
+      "  Downloading PyMCubes-0.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (274 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m274.3/274.3 kB\u001b[0m \u001b[31m27.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting taming-transformers (from nemo_toolkit[all])\n",
+      "  Downloading taming_transformers-0.0.1-py3-none-any.whl (45 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.6/45.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting torchdiffeq (from nemo_toolkit[all])\n",
+      "  Downloading torchdiffeq-0.2.3-py3-none-any.whl (31 kB)\n",
+      "Collecting torchsde (from nemo_toolkit[all])\n",
+      "  Downloading torchsde-0.2.6-py3-none-any.whl (61 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.2/61.2 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting trimesh (from nemo_toolkit[all])\n",
+      "  Downloading trimesh-4.4.0-py3-none-any.whl (694 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m694.6/694.6 kB\u001b[0m \u001b[31m52.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting nemo-text-processing (from nemo_toolkit[all])\n",
+      "  Downloading nemo_text_processing-1.0.2-py3-none-any.whl (2.6 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m79.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting mypy-extensions>=0.4.3 (from black~=24.3->nemo_toolkit[all])\n",
+      "  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
+      "Collecting pathspec>=0.9.0 (from black~=24.3->nemo_toolkit[all])\n",
+      "  Downloading pathspec-0.12.1-py3-none-any.whl (31 kB)\n",
+      "Requirement already satisfied: platformdirs>=2 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (4.2.2)\n",
+      "Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (2.0.1)\n",
+      "Requirement already satisfied: typing-extensions>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (4.11.0)\n",
+      "Collecting ninja (from causal-conv1d>=1.2.0->nemo_toolkit[all])\n",
+      "  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (7.1.0)\n",
+      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (3.14.0)\n",
+      "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (2023.12.25)\n",
+      "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (2.31.0)\n",
+      "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (0.4.3)\n",
+      "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (9.4.0)\n",
+      "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.20.3->nemo_toolkit[all]) (2023.6.0)\n",
+      "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.20.3->nemo_toolkit[all]) (6.0.1)\n",
+      "Collecting antlr4-python3-runtime==4.9.* (from hydra-core<=1.3.2,>1.3->nemo_toolkit[all])\n",
+      "  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "INFO: pip is looking at multiple versions of jiwer to determine which version is compatible with other requirements. This could take a while.\n",
+      "Collecting jiwer (from nemo_toolkit[all])\n",
+      "  Downloading jiwer-3.0.3-py3-none-any.whl (21 kB)\n",
+      "  Downloading jiwer-3.0.2-py3-none-any.whl (21 kB)\n",
+      "  Downloading jiwer-3.0.1-py3-none-any.whl (21 kB)\n",
+      "  Downloading jiwer-3.0.0-py3-none-any.whl (21 kB)\n",
+      "  Downloading jiwer-2.6.0-py3-none-any.whl (20 kB)\n",
+      "  Downloading jiwer-2.5.2-py3-none-any.whl (15 kB)\n",
+      "Collecting rapidfuzz (from nemo_toolkit[all])\n",
+      "  Downloading rapidfuzz-2.13.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m62.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from lhotse>=1.22.0->nemo_toolkit[all]) (3.0.1)\n",
+      "Collecting cytoolz>=0.10.1 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
+      "  Downloading cytoolz-0.12.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting intervaltree>=3.1.0 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
+      "  Downloading intervaltree-3.1.0.tar.gz (32 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting lilcom>=1.1.0 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
+      "  Downloading lilcom-1.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (87 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.1/87.1 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.4.2)\n",
+      "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (4.4.2)\n",
+      "Requirement already satisfied: pooch>=1.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.8.1)\n",
+      "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (0.3.7)\n",
+      "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (0.4)\n",
+      "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.0.8)\n",
+      "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (1.2.1)\n",
+      "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (0.12.1)\n",
+      "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (4.51.0)\n",
+      "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (1.4.5)\n",
+      "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (3.1.2)\n",
+      "Requirement already satisfied: rich>=12 in /usr/local/lib/python3.10/dist-packages (from nerfacc>=0.5.3->nemo_toolkit[all]) (13.7.1)\n",
+      "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->nemo_toolkit[all]) (0.41.1)\n",
+      "Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.10/dist-packages (from onnx>=1.7.0->nemo_toolkit[all]) (3.20.3)\n",
+      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil->nemo_toolkit[all]) (1.16.0)\n",
+      "Collecting lightning-utilities>=0.8.0 (from pytorch-lightning>=2.2.1->nemo_toolkit[all])\n",
+      "  Downloading lightning_utilities-0.11.2-py3-none-any.whl (26 kB)\n",
+      "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->nemo_toolkit[all]) (3.5.0)\n",
+      "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile->nemo_toolkit[all]) (1.16.0)\n",
+      "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (1.12)\n",
+      "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (3.3)\n",
+      "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (3.1.4)\n",
+      "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
+      "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
+      "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
+      "Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
+      "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
+      "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
+      "Collecting nvidia-curand-cu12==10.3.2.106 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
+      "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
+      "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
+      "Collecting nvidia-nccl-cu12==2.20.5 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n",
+      "Collecting nvidia-nvtx-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
+      "Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (2.3.0)\n",
+      "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch->nemo_toolkit[all])\n",
+      "  Downloading nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl (21.3 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m57.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers<=4.40.2,>=4.36.0->nemo_toolkit[all]) (0.19.1)\n",
+      "Collecting botocore<1.35.0,>=1.34.113 (from boto3->nemo_toolkit[all])\n",
+      "  Downloading botocore-1.34.113-py3-none-any.whl (12.3 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m78.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3->nemo_toolkit[all])\n",
+      "  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n",
+      "Collecting s3transfer<0.11.0,>=0.10.0 (from boto3->nemo_toolkit[all])\n",
+      "  Downloading s3transfer-0.10.1-py3-none-any.whl (82 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.2/82.2 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (14.0.2)\n",
+      "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (0.6)\n",
+      "Collecting dill<0.3.9,>=0.3.0 (from datasets->nemo_toolkit[all])\n",
+      "  Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting xxhash (from datasets->nemo_toolkit[all])\n",
+      "  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting multiprocess (from datasets->nemo_toolkit[all])\n",
+      "  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m15.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (3.9.5)\n",
+      "Collecting pybind11>=2.2 (from fasttext->nemo_toolkit[all])\n",
+      "  Using cached pybind11-2.12.0-py3-none-any.whl (234 kB)\n",
+      "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from fiddle->nemo_toolkit[all]) (1.4.0)\n",
+      "Requirement already satisfied: graphviz in /usr/local/lib/python3.10/dist-packages (from fiddle->nemo_toolkit[all]) (0.20.3)\n",
+      "Collecting libcst (from fiddle->nemo_toolkit[all])\n",
+      "  Downloading libcst-1.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m65.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting aniso8601>=0.82 (from flask-restful->nemo_toolkit[all])\n",
+      "  Downloading aniso8601-9.0.1-py2.py3-none-any.whl (52 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.8/52.8 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: Flask>=0.8 in /usr/local/lib/python3.10/dist-packages (from flask-restful->nemo_toolkit[all]) (2.2.5)\n",
+      "Requirement already satisfied: pytz in /usr/local/lib/python3.10/dist-packages (from flask-restful->nemo_toolkit[all]) (2023.4)\n",
+      "Requirement already satisfied: wcwidth<0.3.0,>=0.2.12 in /usr/local/lib/python3.10/dist-packages (from ftfy->nemo_toolkit[all]) (0.2.13)\n",
+      "Collecting distance>=0.1.3 (from g2p-en->nemo_toolkit[all])\n",
+      "  Downloading Distance-0.1.3.tar.gz (180 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m180.3/180.3 kB\u001b[0m \u001b[31m16.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Requirement already satisfied: pydantic>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from inflect->nemo_toolkit[all]) (2.7.1)\n",
+      "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from gdown->nemo_toolkit[all]) (4.12.3)\n",
+      "Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (5.5.6)\n",
+      "Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (0.2.0)\n",
+      "Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (5.7.1)\n",
+      "Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (3.6.6)\n",
+      "Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (7.34.0)\n",
+      "Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (3.0.10)\n",
+      "Collecting kornia-rs>=0.1.0 (from kornia->nemo_toolkit[all])\n",
+      "  Downloading kornia_rs-0.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m89.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting cdifflib (from nemo-text-processing->nemo_toolkit[all])\n",
+      "  Downloading cdifflib-1.2.6.tar.gz (11 kB)\n",
+      "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+      "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+      "  Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
+      "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting pynini==2.1.5 (from nemo-text-processing->nemo_toolkit[all])\n",
+      "  Downloading pynini-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161.3 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m161.3/161.3 MB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: Cython>=0.29 in /usr/local/lib/python3.10/dist-packages (from pynini==2.1.5->nemo-text-processing->nemo_toolkit[all]) (3.0.10)\n",
+      "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from open-clip-torch->nemo_toolkit[all]) (0.18.0+cu121)\n",
+      "Collecting timm (from open-clip-torch->nemo_toolkit[all])\n",
+      "  Downloading timm-1.0.3-py3-none-any.whl (2.3 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m85.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->nemo_toolkit[all]) (2024.1)\n",
+      "Requirement already satisfied: sortedcontainers>=2.0.4 in /usr/local/lib/python3.10/dist-packages (from pyannote.core->nemo_toolkit[all]) (2.4.0)\n",
+      "Collecting pyannote.database>=4.0.1 (from pyannote.metrics->nemo_toolkit[all])\n",
+      "  Downloading pyannote.database-5.1.0-py3-none-any.whl (48 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.1/48.1 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting docopt>=0.6.2 (from pyannote.metrics->nemo_toolkit[all])\n",
+      "  Downloading docopt-0.6.2.tar.gz (25 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Requirement already satisfied: future>=0.16.0 in /usr/local/lib/python3.10/dist-packages (from pyloudnorm->nemo_toolkit[all]) (0.18.3)\n",
+      "Requirement already satisfied: iniconfig in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (2.0.0)\n",
+      "Requirement already satisfied: pluggy<2.0,>=0.12 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (1.5.0)\n",
+      "Requirement already satisfied: exceptiongroup>=1.0.0rc8 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (1.2.1)\n",
+      "Collecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml->nemo_toolkit[all])\n",
+      "  Downloading ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (526 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.7/526.7 kB\u001b[0m \u001b[31m48.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting portalocker (from sacrebleu->nemo_toolkit[all])\n",
+      "  Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)\n",
+      "Collecting colorama (from sacrebleu->nemo_toolkit[all])\n",
+      "  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
+      "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from sacrebleu->nemo_toolkit[all]) (4.9.4)\n",
+      "Requirement already satisfied: sphinxcontrib-applehelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.8)\n",
+      "Requirement already satisfied: sphinxcontrib-devhelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.6)\n",
+      "Requirement already satisfied: sphinxcontrib-jsmath in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.1)\n",
+      "Requirement already satisfied: sphinxcontrib-htmlhelp>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.0.5)\n",
+      "Requirement already satisfied: sphinxcontrib-serializinghtml>=1.1.5 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.1.10)\n",
+      "Requirement already satisfied: sphinxcontrib-qthelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.7)\n",
+      "Requirement already satisfied: Pygments>=2.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.16.1)\n",
+      "Requirement already satisfied: docutils<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (0.18.1)\n",
+      "Requirement already satisfied: snowballstemmer>=1.1 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.2.0)\n",
+      "Requirement already satisfied: babel>=1.3 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.15.0)\n",
+      "Requirement already satisfied: alabaster<0.8,>=0.7 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (0.7.16)\n",
+      "Requirement already satisfied: imagesize in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.4.1)\n",
+      "Collecting docutils<0.19,>=0.14 (from sphinx->nemo_toolkit[all])\n",
+      "  Downloading docutils-0.17.1-py2.py3-none-any.whl (575 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m575.5/575.5 kB\u001b[0m \u001b[31m48.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pybtex>=0.24 (from sphinxcontrib-bibtex->nemo_toolkit[all])\n",
+      "  Downloading pybtex-0.24.0-py2.py3-none-any.whl (561 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m561.4/561.4 kB\u001b[0m \u001b[31m50.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pybtex-docutils>=1.0.0 (from sphinxcontrib-bibtex->nemo_toolkit[all])\n",
+      "  Downloading pybtex_docutils-1.0.3-py3-none-any.whl (6.4 kB)\n",
+      "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (1.64.0)\n",
+      "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (2.27.0)\n",
+      "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (1.2.0)\n",
+      "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (3.6)\n",
+      "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (0.7.2)\n",
+      "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (3.0.3)\n",
+      "Collecting plac (from texterrors->nemo_toolkit[all])\n",
+      "  Downloading plac-1.4.3-py2.py3-none-any.whl (22 kB)\n",
+      "Collecting loguru (from texterrors->nemo_toolkit[all])\n",
+      "  Downloading loguru-0.7.2-py3-none-any.whl (62 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from texterrors->nemo_toolkit[all]) (2.4.0)\n",
+      "Collecting Levenshtein (from texterrors->nemo_toolkit[all])\n",
+      "  Downloading Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting trampoline>=0.1.2 (from torchsde->nemo_toolkit[all])\n",
+      "  Downloading trampoline-0.1.2-py3-none-any.whl (5.2 kB)\n",
+      "Collecting docker-pycreds>=0.4.0 (from wandb->nemo_toolkit[all])\n",
+      "  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n",
+      "Collecting gitpython!=3.1.29,>=1.0.0 (from wandb->nemo_toolkit[all])\n",
+      "  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.3/207.3 kB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb->nemo_toolkit[all]) (5.9.5)\n",
+      "Collecting sentry-sdk>=1.0.0 (from wandb->nemo_toolkit[all])\n",
+      "  Downloading sentry_sdk-2.3.1-py2.py3-none-any.whl (289 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m289.0/289.0 kB\u001b[0m \u001b[31m28.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting setproctitle (from wandb->nemo_toolkit[all])\n",
+      "  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n",
+      "Collecting asciitree (from zarr->nemo_toolkit[all])\n",
+      "  Downloading asciitree-0.3.3.tar.gz (4.0 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting numcodecs>=0.10.0 (from zarr->nemo_toolkit[all])\n",
+      "  Downloading numcodecs-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.7 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m63.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting fasteners (from zarr->nemo_toolkit[all])\n",
+      "  Downloading fasteners-0.19-py3-none-any.whl (18 kB)\n",
+      "Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /usr/local/lib/python3.10/dist-packages (from botocore<1.35.0,>=1.34.113->boto3->nemo_toolkit[all]) (2.0.7)\n",
+      "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile->nemo_toolkit[all]) (2.22)\n",
+      "Requirement already satisfied: toolz>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from cytoolz>=0.10.1->lhotse>=1.22.0->nemo_toolkit[all]) (0.12.1)\n",
+      "Requirement already satisfied: itsdangerous>=2.0 in /usr/local/lib/python3.10/dist-packages (from Flask>=0.8->flask-restful->nemo_toolkit[all]) (2.2.0)\n",
+      "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.3.1)\n",
+      "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (23.2.0)\n",
+      "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.4.1)\n",
+      "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (6.0.5)\n",
+      "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.9.4)\n",
+      "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (4.0.3)\n",
+      "Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb->nemo_toolkit[all])\n",
+      "  Downloading gitdb-4.0.11-py3-none-any.whl (62 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (5.3.3)\n",
+      "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (0.4.0)\n",
+      "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (4.9)\n",
+      "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard->nemo_toolkit[all]) (1.3.1)\n",
+      "Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets->nemo_toolkit[all]) (6.1.12)\n",
+      "Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets->nemo_toolkit[all]) (6.3.3)\n",
+      "Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all])\n",
+      "  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m73.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.7.5)\n",
+      "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (3.0.43)\n",
+      "Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.2.0)\n",
+      "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.1.7)\n",
+      "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (4.9.0)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->nemo_toolkit[all]) (2.1.5)\n",
+      "Collecting typer>=0.12.1 (from pyannote.database>=4.0.1->pyannote.metrics->nemo_toolkit[all])\n",
+      "  Downloading typer-0.12.3-py3-none-any.whl (47 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting latexcodec>=1.0.4 (from pybtex>=0.24->sphinxcontrib-bibtex->nemo_toolkit[all])\n",
+      "  Downloading latexcodec-3.0.0-py3-none-any.whl (18 kB)\n",
+      "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->nemo_toolkit[all]) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->nemo_toolkit[all]) (2.18.2)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (3.3.2)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (3.7)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (2024.2.2)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=12->nerfacc>=0.5.3->nemo_toolkit[all]) (3.0.0)\n",
+      "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->nemo_toolkit[all]) (1.3.0)\n",
+      "Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.5.5)\n",
+      "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->gdown->nemo_toolkit[all]) (2.5)\n",
+      "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata->diffusers>=0.19.3->nemo_toolkit[all]) (3.18.2)\n",
+      "INFO: pip is looking at multiple versions of levenshtein to determine which version is compatible with other requirements. This could take a while.\n",
+      "Collecting Levenshtein (from texterrors->nemo_toolkit[all])\n",
+      "  Downloading Levenshtein-0.25.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Downloading Levenshtein-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Downloading Levenshtein-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (169 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m169.4/169.4 kB\u001b[0m \u001b[31m19.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Downloading Levenshtein-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (172 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m172.9/172.9 kB\u001b[0m \u001b[31m19.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (1.7.1)\n",
+      "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb->nemo_toolkit[all])\n",
+      "  Downloading smmap-5.0.1-py3-none-any.whl (24 kB)\n",
+      "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.8.4)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=12->nerfacc>=0.5.3->nemo_toolkit[all]) (0.1.2)\n",
+      "Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (24.0.1)\n",
+      "Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (23.1.0)\n",
+      "Requirement already satisfied: jupyter-core>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (5.7.2)\n",
+      "Requirement already satisfied: nbformat in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (5.10.4)\n",
+      "Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.5.4)\n",
+      "Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.6.0)\n",
+      "Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.8.3)\n",
+      "Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.18.1)\n",
+      "Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.20.0)\n",
+      "Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.0.0)\n",
+      "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.7.0)\n",
+      "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (0.6.0)\n",
+      "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard->nemo_toolkit[all]) (3.2.2)\n",
+      "Collecting shellingham>=1.3.0 (from typer>=0.12.1->pyannote.database>=4.0.1->pyannote.metrics->nemo_toolkit[all])\n",
+      "  Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n",
+      "Requirement already satisfied: jupyter-server>=1.8 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.24.0)\n",
+      "Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.2.4)\n",
+      "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.1.0)\n",
+      "Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.7.1)\n",
+      "Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.4)\n",
+      "Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.3.0)\n",
+      "Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.8.4)\n",
+      "Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.10.0)\n",
+      "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.5.1)\n",
+      "Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.3.0)\n",
+      "Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (2.19.1)\n",
+      "Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (4.19.2)\n",
+      "Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (21.2.0)\n",
+      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (2023.12.1)\n",
+      "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.35.1)\n",
+      "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.18.1)\n",
+      "Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (3.7.1)\n",
+      "Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.8.0)\n",
+      "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.5.1)\n",
+      "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.3.1)\n",
+      "Building wheels for collected packages: causal-conv1d, antlr4-python3-runtime, progress, clip, fasttext, kaldi-python-io, nemo_toolkit, rouge-score, sox, distance, docopt, intervaltree, asciitree, cdifflib\n",
+      "  Building wheel for causal-conv1d (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for causal-conv1d: filename=causal_conv1d-1.2.2.post1-cp310-cp310-linux_x86_64.whl size=103643300 sha256=2bba8823ae89bd79c2d067978e0e533fab8298f69855bfc5d199828b278cf66c\n",
+      "  Stored in directory: /root/.cache/pip/wheels/22/a7/db/0c9482dec3707ad23181b0eb2da40e4b8f26aaed49752fc49f\n",
+      "  Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=d26518c128f80048ec70721551489517353867c7668a281f27cf1a20b9acd114\n",
+      "  Stored in directory: /root/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n",
+      "  Building wheel for progress (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for progress: filename=progress-1.6-py3-none-any.whl size=9614 sha256=87c634c79d4e56e317499682766011b5d0e28953e43f6a3754957d0f4fd3633a\n",
+      "  Stored in directory: /root/.cache/pip/wheels/a2/68/5f/c339b20a41659d856c93ccdce6a33095493eb82c3964aac5a1\n",
+      "  Building wheel for clip (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for clip: filename=clip-0.2.0-py3-none-any.whl size=6989 sha256=3e9ac01ba0eff273ea70feaf80d486a07683956515496b6dfeeafe81c9caae24\n",
+      "  Stored in directory: /root/.cache/pip/wheels/7f/5c/e6/2c0fdb453a3569188864b17e9676bea8b3b7e160c037117869\n",
+      "  Building wheel for fasttext (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for fasttext: filename=fasttext-0.9.2-cp310-cp310-linux_x86_64.whl size=4227140 sha256=708a73871f9ae384ea66b706bb0b73b6c624f23ce0d19882b6711b31abed8091\n",
+      "  Stored in directory: /root/.cache/pip/wheels/a5/13/75/f811c84a8ab36eedbaef977a6a58a98990e8e0f1967f98f394\n",
+      "  Building wheel for kaldi-python-io (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for kaldi-python-io: filename=kaldi_python_io-1.2.2-py3-none-any.whl size=8949 sha256=5399346b043c1ae3d7431729bbd34a5206a1bbe26c41b5ba69d2b45879740d55\n",
+      "  Stored in directory: /root/.cache/pip/wheels/b7/23/5f/49d3a826be576faf61d84e8028e1914bb36a5586ee2613b087\n",
+      "  Building wheel for nemo_toolkit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for nemo_toolkit: filename=nemo_toolkit-2.0.0rc1-py3-none-any.whl size=3709778 sha256=3eb9e4278cef98370e97bf7cc0f009cdebbdaaf8fac7a6584289fdd8abfbd8c8\n",
+      "  Stored in directory: /tmp/pip-ephem-wheel-cache-sdhc6zr1/wheels/c3/4e/45/ab3d29aa73df619f27b371cacf809d5330a18f794879163c1b\n",
+      "  Building wheel for rouge-score (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=78515a9f3e94b274e69e68c059af462dc7cc1b10b51c1b6d419704ea6b4cffe5\n",
+      "  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n",
+      "  Building wheel for sox (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for sox: filename=sox-1.5.0-py3-none-any.whl size=40038 sha256=1c48c5456291b6b4859918dc81caa53229a5114b68c2772f1a5518f6c6a21254\n",
+      "  Stored in directory: /root/.cache/pip/wheels/74/e7/7b/8033be3ec5e4994595d01269fc9657c8fd83a0dcbf8536666a\n",
+      "  Building wheel for distance (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for distance: filename=Distance-0.1.3-py3-none-any.whl size=16258 sha256=ade70730449fb839934e857bdcddc6de204e5eaab05db259da2f85be3fc099d0\n",
+      "  Stored in directory: /root/.cache/pip/wheels/e8/bb/de/f71bf63559ea9a921059a5405806f7ff6ed612a9231c4a9309\n",
+      "  Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13706 sha256=1aaae6b0427604326f67708418c3010e0f969b8a82ae512f79307f3978f09f52\n",
+      "  Stored in directory: /root/.cache/pip/wheels/fc/ab/d4/5da2067ac95b36618c629a5f93f809425700506f72c9732fac\n",
+      "  Building wheel for intervaltree (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for intervaltree: filename=intervaltree-3.1.0-py2.py3-none-any.whl size=26096 sha256=f306547725eb9ea7e52d4b78e8d49734164aa4dc43faee9a74ebd91087a42b68\n",
+      "  Stored in directory: /root/.cache/pip/wheels/fa/80/8c/43488a924a046b733b64de3fac99252674c892a4c3801c0a61\n",
+      "  Building wheel for asciitree (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for asciitree: filename=asciitree-0.3.3-py3-none-any.whl size=5034 sha256=0251fcc8a18991f12d3209f3acf225199c31dee41236f40b77b69fde95038da9\n",
+      "  Stored in directory: /root/.cache/pip/wheels/7f/4e/be/1171b40f43b918087657ec57cf3b81fa1a2e027d8755baa184\n",
+      "  Building wheel for cdifflib (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for cdifflib: filename=cdifflib-1.2.6-cp310-cp310-linux_x86_64.whl size=27681 sha256=7c7f9bf50579f19875573df405249c5c962045cdeb20eea6469b5c2d2defb0ce\n",
+      "  Stored in directory: /root/.cache/pip/wheels/87/a7/fd/8061e24ed08689045cb6d1ca303768dc463b20a5a338174841\n",
+      "Successfully built causal-conv1d antlr4-python3-runtime progress clip fasttext kaldi-python-io nemo_toolkit rouge-score sox distance docopt intervaltree asciitree cdifflib\n",
+      "Installing collected packages: trampoline, pydub, progress, plac, pangu, opencc, ninja, ijson, docopt, distance, clip, braceexpand, asciitree, antlr4-python3-runtime, aniso8601, addict, xxhash, webdataset, trimesh, textdistance, sox, smmap, shellingham, setproctitle, sentry-sdk, ruamel.yaml.clib, rapidfuzz, pytest-runner, pypinyin, pynini, pybind11, portalocker, pathspec, parameterized, onnx, omegaconf, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numcodecs, mypy-extensions, marshmallow, markdown2, loguru, lilcom, lightning-utilities, libcst, latexcodec, kornia-rs, kaldiio, kaldi-python-io, jmespath, jedi, isort, intervaltree, ftfy, fasteners, faiss-cpu, einops, docutils, docker-pycreds, dill, decord, cytoolz, colorama, click, cdifflib, attrdict, zarr, sacremoses, sacrebleu, ruamel.yaml, resampy, pytest-mock, pypinyin-dict, PyMCubes, pyloudnorm, pybtex, pyannote.core, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, Levenshtein, jiwer, hydra-core, gitdb, fiddle, fasttext, einops-exts, botocore, black, typer, texterrors, s3transfer, rouge-score, pybtex-docutils, nvidia-cusolver-cu12, lhotse, gitpython, flask-restful, diffusers, wandb, transformers, sphinxcontrib-bibtex, pyannote.database, g2p-en, datasets, boto3, torchsde, torchmetrics, torchdiffeq, sentence-transformers, pyannote.metrics, nerfacc, nemo_toolkit, nemo-text-processing, kornia, causal-conv1d, accelerated-scan, timm, pytorch-lightning, taming-transformers, open-clip-torch\n",
+      "  Attempting uninstall: docutils\n",
+      "    Found existing installation: docutils 0.18.1\n",
+      "    Uninstalling docutils-0.18.1:\n",
+      "      Successfully uninstalled docutils-0.18.1\n",
+      "  Attempting uninstall: click\n",
+      "    Found existing installation: click 8.1.7\n",
+      "    Uninstalling click-8.1.7:\n",
+      "      Successfully uninstalled click-8.1.7\n",
+      "  Attempting uninstall: typer\n",
+      "    Found existing installation: typer 0.9.4\n",
+      "    Uninstalling typer-0.9.4:\n",
+      "      Successfully uninstalled typer-0.9.4\n",
+      "  Attempting uninstall: transformers\n",
+      "    Found existing installation: transformers 4.41.0\n",
+      "    Uninstalling transformers-4.41.0:\n",
+      "      Successfully uninstalled transformers-4.41.0\n",
+      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+      "spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
+      "weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\u001b[0m\u001b[31m\n",
+      "\u001b[0mSuccessfully installed Levenshtein-0.22.0 PyMCubes-0.1.4 accelerated-scan-0.2.0 addict-2.4.0 aniso8601-9.0.1 antlr4-python3-runtime-4.9.3 asciitree-0.3.3 attrdict-2.0.1 black-24.4.2 boto3-1.34.113 botocore-1.34.113 braceexpand-0.1.7 causal-conv1d-1.2.2.post1 cdifflib-1.2.6 click-8.0.2 clip-0.2.0 colorama-0.4.6 cytoolz-0.12.3 datasets-2.19.1 decord-0.6.0 diffusers-0.28.0 dill-0.3.8 distance-0.1.3 docker-pycreds-0.4.0 docopt-0.6.2 docutils-0.17.1 einops-0.8.0 einops-exts-0.0.4 faiss-cpu-1.8.0 fasteners-0.19 fasttext-0.9.2 fiddle-0.3.0 flask-restful-0.3.10 ftfy-6.2.0 g2p-en-2.1.0 gitdb-4.0.11 gitpython-3.1.43 hydra-core-1.3.2 ijson-3.2.3 intervaltree-3.1.0 isort-5.13.2 jedi-0.19.1 jiwer-2.5.2 jmespath-1.0.1 kaldi-python-io-1.2.2 kaldiio-2.18.0 kornia-0.7.2 kornia-rs-0.1.3 latexcodec-3.0.0 lhotse-1.23.0 libcst-1.4.0 lightning-utilities-0.11.2 lilcom-1.7 loguru-0.7.2 markdown2-2.4.13 marshmallow-3.21.2 multiprocess-0.70.16 mypy-extensions-1.0.0 nemo-text-processing-1.0.2 nemo_toolkit-2.0.0rc1 nerfacc-0.5.3 ninja-1.11.1.1 numcodecs-0.12.1 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.40 nvidia-nvtx-cu12-12.1.105 omegaconf-2.3.0 onnx-1.16.1 open-clip-torch-2.24.0 opencc-1.1.6 pangu-4.0.6.1 parameterized-0.9.0 pathspec-0.12.1 plac-1.4.3 portalocker-2.8.2 progress-1.6 pyannote.core-5.0.0 pyannote.database-5.1.0 pyannote.metrics-3.2.1 pybind11-2.12.0 pybtex-0.24.0 pybtex-docutils-1.0.3 pydub-0.25.1 pyloudnorm-0.1.1 pynini-2.1.5 pypinyin-0.51.0 pypinyin-dict-0.8.0 pytest-mock-3.14.0 pytest-runner-6.0.1 pytorch-lightning-2.2.5 rapidfuzz-2.13.7 resampy-0.4.3 rouge-score-0.1.2 ruamel.yaml-0.18.6 ruamel.yaml.clib-0.2.8 s3transfer-0.10.1 sacrebleu-2.4.2 sacremoses-0.1.1 sentence-transformers-2.7.0 sentry-sdk-2.3.1 setproctitle-1.3.3 shellingham-1.5.4 smmap-5.0.1 sox-1.5.0 sphinxcontrib-bibtex-2.6.2 taming-transformers-0.0.1 textdistance-4.6.2 texterrors-0.4.4 timm-1.0.3 torchdiffeq-0.2.3 torchmetrics-1.4.0.post0 torchsde-0.2.6 trampoline-0.1.2 transformers-4.40.2 trimesh-4.4.0 typer-0.12.3 wandb-0.17.0 webdataset-0.2.86 xxhash-3.4.1 zarr-2.18.2\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install wget\n",
+    "!apt-get install sox libsndfile1 ffmpeg\n",
+    "!pip install matplotlib>=3.3.2\n",
+    "\n",
+    "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@1fa961ba03ab5f8c91b278640e29807079373372#egg=nemo_toolkit[all]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "_Utv8kLRW9Js"
+   },
+   "outputs": [],
+   "source": [
+    "from typing import List, Union\n",
+    "\n",
+    "import hydra\n",
+    "import soundfile as sf\n",
+    "import torch\n",
+    "from omegaconf import DictConfig, ListConfig, OmegaConf"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ZLslfbEfXQIE"
+   },
+   "source": [
+    "### Model for emotions classification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "paEKSFFVXPqC"
+   },
+   "outputs": [],
+   "source": [
+    "class SpecScaler(torch.nn.Module):\n",
+    "    def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
+    "        return torch.log(x.clamp_(1e-9, 1e9))\n",
+    "\n",
+    "\n",
+    "class GigaAMEmo(torch.nn.Module):\n",
+    "    def __init__(self, conf: Union[DictConfig, ListConfig]):\n",
+    "        super().__init__()\n",
+    "        self.id2name = conf.id2name\n",
+    "        self.feature_extractor = hydra.utils.instantiate(conf.feature_extractor)\n",
+    "        self.conformer = hydra.utils.instantiate(conf.encoder)\n",
+    "        self.linear_head = hydra.utils.instantiate(conf.classification_head)\n",
+    "\n",
+    "    def forward(self, features, features_length=None):\n",
+    "        if features.dim() == 2:\n",
+    "            features = features.unsqueeze(0)\n",
+    "        if not features_length:\n",
+    "            features_length = torch.ones(features.shape[0]) * features.shape[-1]\n",
+    "            features_length = features_length.to(features.device)\n",
+    "        encoded, _ = self.conformer(audio_signal=features, length=features_length)\n",
+    "        encoded_pooled = torch.nn.functional.avg_pool1d(\n",
+    "            encoded, kernel_size=encoded.shape[-1]\n",
+    "        ).squeeze(-1)\n",
+    "\n",
+    "        logits = self.linear_head(encoded_pooled)\n",
+    "        return logits\n",
+    "\n",
+    "    def get_probs(self, audio_path: str) -> List[List[float]]:\n",
+    "        audio_signal, _ = sf.read(audio_path, dtype=\"float32\")\n",
+    "        features = self.feature_extractor(\n",
+    "            torch.tensor(audio_signal).float().to(next(self.parameters()).device)\n",
+    "        )\n",
+    "        logits = self.forward(features)\n",
+    "        probs = torch.nn.functional.softmax(logits).detach().tolist()\n",
+    "        return probs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "7UFpN0Ghc244"
+   },
+   "source": [
+    "### Downloading config, weights and audio example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "jFZJGISjcmHW",
+    "outputId": "74a2a71e-2dba-4551-c2cb-737eaa35bfa4"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--2024-05-28 07:10:07--  https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_weights.ckpt\n",
+      "Resolving n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)... 37.230.193.192\n",
+      "Connecting to n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)|37.230.193.192|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 968409626 (924M) [application/octet-stream]\n",
+      "Saving to: ‘emo_model_weights.ckpt’\n",
+      "\n",
+      "emo_model_weights.c 100%[===================>] 923.55M  7.48MB/s    in 1m 45s  \n",
+      "\n",
+      "2024-05-28 07:11:53 (8.82 MB/s) - ‘emo_model_weights.ckpt’ saved [968409626/968409626]\n",
+      "\n",
+      "--2024-05-28 07:11:54--  https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_config.yaml\n",
+      "Resolving n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)... 37.230.193.192\n",
+      "Connecting to n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)|37.230.193.192|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 765 [application/octet-stream]\n",
+      "Saving to: ‘emo_model_config.yaml’\n",
+      "\n",
+      "emo_model_config.ya 100%[===================>]     765  --.-KB/s    in 0s      \n",
+      "\n",
+      "2024-05-28 07:11:54 (252 MB/s) - ‘emo_model_config.yaml’ saved [765/765]\n",
+      "\n",
+      "--2024-05-28 07:11:54--  https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav\n",
+      "Resolving n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)... 37.230.193.192\n",
+      "Connecting to n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)|37.230.193.192|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 361324 (353K) [application/octet-stream]\n",
+      "Saving to: ‘example.wav’\n",
+      "\n",
+      "example.wav         100%[===================>] 352.86K   715KB/s    in 0.5s    \n",
+      "\n",
+      "2024-05-28 07:11:56 (715 KB/s) - ‘example.wav’ saved [361324/361324]\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "import locale\n",
+    "\n",
+    "locale.getpreferredencoding = lambda: \"UTF-8\"\n",
+    "\n",
+    "# Loading weights, config and example wav for CTC-model\n",
+    "!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_weights.ckpt\n",
+    "!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_config.yaml\n",
+    "!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "NZ6-O2M0fxDY"
+   },
+   "source": [
+    "### Model instantiating and inference"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "plXt8297d5km",
+    "outputId": "537acb90-f6a1-4a73-ea66-0d3fe6fd9a3a"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[NeMo W 2024-05-28 07:20:12 nemo_logging:349] <ipython-input-3-06a5deda234a>:32: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
+      "      probs = torch.nn.functional.softmax(logits).detach().tolist()\n",
+      "    \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "angry: 0.000, sad: 0.002, neutral: 0.923, positive: 0.074\n"
+     ]
+    }
+   ],
+   "source": [
+    "model_config = \"emo_model_config.yaml\"\n",
+    "model_weights = \"emo_model_weights.ckpt\"\n",
+    "audio_path = \"example.wav\"\n",
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+    "\n",
+    "conf = OmegaConf.load(model_config)\n",
+    "model = GigaAMEmo(conf)\n",
+    "ckpt = torch.load(model_weights, map_location=\"cpu\")\n",
+    "model.load_state_dict(ckpt, strict=False)\n",
+    "model = model.to(device)\n",
+    "model.eval()\n",
+    "with torch.no_grad():\n",
+    "    probs = model.get_probs(audio_path)[0]\n",
+    "print(\", \".join([f\"{model.id2name[i]}: {p:.3f}\" for i, p in enumerate(probs)]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "nmFvC_GfkasV"
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "T4",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}

Examples/notebooks/GigaAM_Model_Usage_Example.ipynb ADDED Viewed

	@@ -0,0 +1,881 @@

+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+  "colab": {
+   "provenance": [],
+   "gpuType": "T4"
+  },
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3"
+  },
+  "language_info": {
+   "name": "python"
+  },
+  "accelerator": "GPU"
+ },
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Installing and importing dependencies"
+   ],
+   "metadata": {
+    "id": "aqymJFVQhere"
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "mJ5zzajTbzRX",
+    "outputId": "a6e8f1cc-5ef7-43e2-824e-39133c8f3f98"
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Collecting wget\n",
+      "  Downloading wget-3.2.zip (10 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Building wheels for collected packages: wget\n",
+      "  Building wheel for wget (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9656 sha256=fb233af0965c5da90b8babdcb0fbd51095c2a135ec877618013ed9078dced85b\n",
+      "  Stored in directory: /root/.cache/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769\n",
+      "Successfully built wget\n",
+      "Installing collected packages: wget\n",
+      "Successfully installed wget-3.2\n",
+      "Reading package lists... Done\n",
+      "Building dependency tree... Done\n",
+      "Reading state information... Done\n",
+      "libsndfile1 is already the newest version (1.0.31-2ubuntu0.1).\n",
+      "ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).\n",
+      "The following additional packages will be installed:\n",
+      "  libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base libsox3 libwavpack1\n",
+      "Suggested packages:\n",
+      "  libsox-fmt-all\n",
+      "The following NEW packages will be installed:\n",
+      "  libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base libsox3 libwavpack1 sox\n",
+      "0 upgraded, 7 newly installed, 0 to remove and 45 not upgraded.\n",
+      "Need to get 617 kB of archives.\n",
+      "After this operation, 1,764 kB of additional disk space will be used.\n",
+      "Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrnb0 amd64 0.1.5-1 [94.8 kB]\n",
+      "Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrwb0 amd64 0.1.5-1 [49.1 kB]\n",
+      "Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox3 amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [240 kB]\n",
+      "Get:4 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-alsa amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [11.2 kB]\n",
+      "Get:5 http://archive.ubuntu.com/ubuntu jammy/main amd64 libwavpack1 amd64 5.4.0-1build2 [83.7 kB]\n",
+      "Get:6 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-base amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [33.7 kB]\n",
+      "Get:7 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 sox amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [104 kB]\n",
+      "Fetched 617 kB in 0s (2,171 kB/s)\n",
+      "Selecting previously unselected package libopencore-amrnb0:amd64.\n",
+      "(Reading database ... 121918 files and directories currently installed.)\n",
+      "Preparing to unpack .../0-libopencore-amrnb0_0.1.5-1_amd64.deb ...\n",
+      "Unpacking libopencore-amrnb0:amd64 (0.1.5-1) ...\n",
+      "Selecting previously unselected package libopencore-amrwb0:amd64.\n",
+      "Preparing to unpack .../1-libopencore-amrwb0_0.1.5-1_amd64.deb ...\n",
+      "Unpacking libopencore-amrwb0:amd64 (0.1.5-1) ...\n",
+      "Selecting previously unselected package libsox3:amd64.\n",
+      "Preparing to unpack .../2-libsox3_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
+      "Unpacking libsox3:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Selecting previously unselected package libsox-fmt-alsa:amd64.\n",
+      "Preparing to unpack .../3-libsox-fmt-alsa_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
+      "Unpacking libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Selecting previously unselected package libwavpack1:amd64.\n",
+      "Preparing to unpack .../4-libwavpack1_5.4.0-1build2_amd64.deb ...\n",
+      "Unpacking libwavpack1:amd64 (5.4.0-1build2) ...\n",
+      "Selecting previously unselected package libsox-fmt-base:amd64.\n",
+      "Preparing to unpack .../5-libsox-fmt-base_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
+      "Unpacking libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Selecting previously unselected package sox.\n",
+      "Preparing to unpack .../6-sox_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
+      "Unpacking sox (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Setting up libsox3:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Setting up libopencore-amrwb0:amd64 (0.1.5-1) ...\n",
+      "Setting up libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Setting up libwavpack1:amd64 (5.4.0-1build2) ...\n",
+      "Setting up libopencore-amrnb0:amd64 (0.1.5-1) ...\n",
+      "Setting up libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Setting up sox (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
+      "Processing triggers for man-db (2.10.2-1) ...\n",
+      "Processing triggers for libc-bin (2.35-0ubuntu3.4) ...\n",
+      "/sbin/ldconfig.real: /usr/local/lib/libtbb.so.12 is not a symbolic link\n",
+      "\n",
+      "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc_proxy.so.2 is not a symbolic link\n",
+      "\n",
+      "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_5.so.3 is not a symbolic link\n",
+      "\n",
+      "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_0.so.3 is not a symbolic link\n",
+      "\n",
+      "/sbin/ldconfig.real: /usr/local/lib/libtbbbind.so.3 is not a symbolic link\n",
+      "\n",
+      "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc.so.2 is not a symbolic link\n",
+      "\n",
+      "\u001b[33mDEPRECATION: git+https://github.com/NVIDIA/NeMo.git#egg=nemo_toolkit[all] contains an egg fragment with a non-PEP 508 name pip 25.0 will enforce this behaviour change. A possible replacement is to use the req @ url syntax, and remove the egg fragment. Discussion can be found at https://github.com/pypa/pip/issues/11617\u001b[0m\u001b[33m\n",
+      "\u001b[0mCollecting nemo_toolkit[all]\n",
+      "  Cloning https://github.com/NVIDIA/NeMo.git to /tmp/pip-install-rgi4yev1/nemo-toolkit_1a843c4761a042a6a6b3b0dfbb81352c\n",
+      "  Running command git clone --filter=blob:none --quiet https://github.com/NVIDIA/NeMo.git /tmp/pip-install-rgi4yev1/nemo-toolkit_1a843c4761a042a6a6b3b0dfbb81352c\n",
+      "  Resolved https://github.com/NVIDIA/NeMo.git to commit 1fa961ba03ab5f8c91b278640e29807079373372\n",
+      "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+      "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+      "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting fiddle (from nemo_toolkit[all])\n",
+      "  Downloading fiddle-0.3.0-py3-none-any.whl (419 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m419.8/419.8 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: huggingface-hub>=0.20.3 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.23.1)\n",
+      "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.58.1)\n",
+      "Requirement already satisfied: numpy>=1.22 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.25.2)\n",
+      "Collecting onnx>=1.7.0 (from nemo_toolkit[all])\n",
+      "  Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m64.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.8.2)\n",
+      "Collecting ruamel.yaml (from nemo_toolkit[all])\n",
+      "  Downloading ruamel.yaml-0.18.6-py3-none-any.whl (117 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.8/117.8 kB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.2.2)\n",
+      "Requirement already satisfied: setuptools>=65.5.1 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (67.7.2)\n",
+      "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.15.2)\n",
+      "Requirement already satisfied: text-unidecode in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.3)\n",
+      "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.3.0+cu121)\n",
+      "Requirement already satisfied: tqdm>=4.41.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (4.66.4)\n",
+      "Requirement already satisfied: wget in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.2)\n",
+      "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.14.1)\n",
+      "Collecting black~=24.3 (from nemo_toolkit[all])\n",
+      "  Downloading black-24.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m56.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting click==8.0.2 (from nemo_toolkit[all])\n",
+      "  Downloading click-8.0.2-py3-none-any.whl (97 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.6/97.6 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting isort<6.0.0,>5.1.0 (from nemo_toolkit[all])\n",
+      "  Downloading isort-5.13.2-py3-none-any.whl (92 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.3/92.3 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting parameterized (from nemo_toolkit[all])\n",
+      "  Downloading parameterized-0.9.0-py2.py3-none-any.whl (20 kB)\n",
+      "Requirement already satisfied: pytest in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.4.4)\n",
+      "Collecting pytest-mock (from nemo_toolkit[all])\n",
+      "  Downloading pytest_mock-3.14.0-py3-none-any.whl (9.9 kB)\n",
+      "Collecting pytest-runner (from nemo_toolkit[all])\n",
+      "  Downloading pytest_runner-6.0.1-py3-none-any.whl (7.2 kB)\n",
+      "Requirement already satisfied: sphinx in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (5.0.2)\n",
+      "Collecting sphinxcontrib-bibtex (from nemo_toolkit[all])\n",
+      "  Downloading sphinxcontrib_bibtex-2.6.2-py3-none-any.whl (40 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting wandb (from nemo_toolkit[all])\n",
+      "  Downloading wandb-0.17.0-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.7/6.7 MB\u001b[0m \u001b[31m57.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.2.1)\n",
+      "Collecting hydra-core<=1.3.2,>1.3 (from nemo_toolkit[all])\n",
+      "  Downloading hydra_core-1.3.2-py3-none-any.whl (154 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.5/154.5 kB\u001b[0m \u001b[31m14.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting omegaconf<=2.3 (from nemo_toolkit[all])\n",
+      "  Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pytorch-lightning>=2.2.1 (from nemo_toolkit[all])\n",
+      "  Downloading pytorch_lightning-2.2.5-py3-none-any.whl (802 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m802.3/802.3 kB\u001b[0m \u001b[31m47.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting torchmetrics>=0.11.0 (from nemo_toolkit[all])\n",
+      "  Downloading torchmetrics-1.4.0.post0-py3-none-any.whl (868 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m868.8/868.8 kB\u001b[0m \u001b[31m46.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting transformers<=4.40.2,>=4.36.0 (from nemo_toolkit[all])\n",
+      "  Downloading transformers-4.40.2-py3-none-any.whl (9.0 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.0/9.0 MB\u001b[0m \u001b[31m70.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting webdataset>=0.2.86 (from nemo_toolkit[all])\n",
+      "  Downloading webdataset-0.2.86-py3-none-any.whl (70 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.4/70.4 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting datasets (from nemo_toolkit[all])\n",
+      "  Downloading datasets-2.19.1-py3-none-any.whl (542 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m542.0/542.0 kB\u001b[0m \u001b[31m40.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: inflect in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.0.0)\n",
+      "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.0.3)\n",
+      "Collecting sacremoses>=0.0.43 (from nemo_toolkit[all])\n",
+      "  Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m897.5/897.5 kB\u001b[0m \u001b[31m56.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: sentencepiece<1.0.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.1.99)\n",
+      "Collecting braceexpand (from nemo_toolkit[all])\n",
+      "  Downloading braceexpand-0.1.7-py2.py3-none-any.whl (5.9 kB)\n",
+      "Requirement already satisfied: editdistance in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.6.2)\n",
+      "Collecting einops (from nemo_toolkit[all])\n",
+      "  Downloading einops-0.8.0-py3-none-any.whl (43 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.2/43.2 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting g2p-en (from nemo_toolkit[all])\n",
+      "  Downloading g2p_en-2.1.0-py3-none-any.whl (3.1 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m76.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: ipywidgets in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.7.1)\n",
+      "Collecting jiwer (from nemo_toolkit[all])\n",
+      "  Downloading jiwer-3.0.4-py3-none-any.whl (21 kB)\n",
+      "Collecting kaldi-python-io (from nemo_toolkit[all])\n",
+      "  Downloading kaldi-python-io-1.2.2.tar.gz (8.8 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting kaldiio (from nemo_toolkit[all])\n",
+      "  Downloading kaldiio-2.18.0-py3-none-any.whl (28 kB)\n",
+      "Collecting lhotse>=1.22.0 (from nemo_toolkit[all])\n",
+      "  Downloading lhotse-1.23.0-py3-none-any.whl (772 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m772.4/772.4 kB\u001b[0m \u001b[31m50.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: librosa>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.10.2.post1)\n",
+      "Collecting marshmallow (from nemo_toolkit[all])\n",
+      "  Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.7.1)\n",
+      "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (24.0)\n",
+      "Collecting pyannote.core (from nemo_toolkit[all])\n",
+      "  Downloading pyannote.core-5.0.0-py3-none-any.whl (58 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.5/58.5 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pyannote.metrics (from nemo_toolkit[all])\n",
+      "  Downloading pyannote.metrics-3.2.1-py3-none-any.whl (51 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.4/51.4 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pydub (from nemo_toolkit[all])\n",
+      "  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
+      "Collecting pyloudnorm (from nemo_toolkit[all])\n",
+      "  Downloading pyloudnorm-0.1.1-py3-none-any.whl (9.6 kB)\n",
+      "Collecting resampy (from nemo_toolkit[all])\n",
+      "  Downloading resampy-0.4.3-py3-none-any.whl (3.1 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m82.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.11.4)\n",
+      "Requirement already satisfied: soundfile in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.12.1)\n",
+      "Collecting sox (from nemo_toolkit[all])\n",
+      "  Downloading sox-1.5.0.tar.gz (63 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.9/63.9 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting texterrors (from nemo_toolkit[all])\n",
+      "  Downloading texterrors-0.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m58.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting accelerated-scan (from nemo_toolkit[all])\n",
+      "  Downloading accelerated_scan-0.2.0-py3-none-any.whl (11 kB)\n",
+      "Collecting boto3 (from nemo_toolkit[all])\n",
+      "  Downloading boto3-1.34.113-py3-none-any.whl (139 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.3/139.3 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting causal-conv1d>=1.2.0 (from nemo_toolkit[all])\n",
+      "  Downloading causal_conv1d-1.2.2.post1.tar.gz (7.2 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting faiss-cpu (from nemo_toolkit[all])\n",
+      "  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m41.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting fasttext (from nemo_toolkit[all])\n",
+      "  Downloading fasttext-0.9.2.tar.gz (68 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m68.8/68.8 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting flask-restful (from nemo_toolkit[all])\n",
+      "  Downloading Flask_RESTful-0.3.10-py2.py3-none-any.whl (26 kB)\n",
+      "Collecting ftfy (from nemo_toolkit[all])\n",
+      "  Downloading ftfy-6.2.0-py3-none-any.whl (54 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.4/54.4 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: gdown in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (5.1.0)\n",
+      "Requirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.9.0)\n",
+      "Collecting ijson (from nemo_toolkit[all])\n",
+      "  Downloading ijson-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (111 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.8/111.8 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.42.1)\n",
+      "Collecting markdown2 (from nemo_toolkit[all])\n",
+      "  Downloading markdown2-2.4.13-py2.py3-none-any.whl (41 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.3/41.3 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: nltk>=3.6.5 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.8.1)\n",
+      "Collecting opencc<1.1.7 (from nemo_toolkit[all])\n",
+      "  Downloading OpenCC-1.1.6-cp310-cp310-manylinux1_x86_64.whl (778 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m778.3/778.3 kB\u001b[0m \u001b[31m43.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pangu (from nemo_toolkit[all])\n",
+      "  Downloading pangu-4.0.6.1-py3-none-any.whl (6.4 kB)\n",
+      "Collecting rapidfuzz (from nemo_toolkit[all])\n",
+      "  Downloading rapidfuzz-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m76.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting rouge-score (from nemo_toolkit[all])\n",
+      "  Downloading rouge_score-0.1.2.tar.gz (17 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting sacrebleu (from nemo_toolkit[all])\n",
+      "  Downloading sacrebleu-2.4.2-py3-none-any.whl (106 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.7/106.7 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting sentence-transformers (from nemo_toolkit[all])\n",
+      "  Downloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m19.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: tensorstore<0.1.46 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.1.45)\n",
+      "Collecting zarr (from nemo_toolkit[all])\n",
+      "  Downloading zarr-2.18.2-py3-none-any.whl (210 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m210.2/210.2 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting attrdict (from nemo_toolkit[all])\n",
+      "  Downloading attrdict-2.0.1-py2.py3-none-any.whl (9.9 kB)\n",
+      "Collecting kornia (from nemo_toolkit[all])\n",
+      "  Downloading kornia-0.7.2-py2.py3-none-any.whl (825 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m825.4/825.4 kB\u001b[0m \u001b[31m54.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pypinyin (from nemo_toolkit[all])\n",
+      "  Downloading pypinyin-0.51.0-py2.py3-none-any.whl (1.4 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m62.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pypinyin-dict (from nemo_toolkit[all])\n",
+      "  Downloading pypinyin_dict-0.8.0-py2.py3-none-any.whl (9.5 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.5/9.5 MB\u001b[0m \u001b[31m92.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting progress>=1.5 (from nemo_toolkit[all])\n",
+      "  Downloading progress-1.6.tar.gz (7.8 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Requirement already satisfied: tabulate>=0.8.7 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.9.0)\n",
+      "Collecting textdistance>=4.1.5 (from nemo_toolkit[all])\n",
+      "  Downloading textdistance-4.6.2-py3-none-any.whl (31 kB)\n",
+      "Collecting addict (from nemo_toolkit[all])\n",
+      "  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
+      "Collecting clip (from nemo_toolkit[all])\n",
+      "  Downloading clip-0.2.0.tar.gz (5.5 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting decord (from nemo_toolkit[all])\n",
+      "  Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.6/13.6 MB\u001b[0m \u001b[31m74.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting diffusers>=0.19.3 (from nemo_toolkit[all])\n",
+      "  Downloading diffusers-0.28.0-py3-none-any.whl (2.2 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting einops-exts (from nemo_toolkit[all])\n",
+      "  Downloading einops_exts-0.0.4-py3-none-any.whl (3.9 kB)\n",
+      "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.31.6)\n",
+      "Collecting nerfacc>=0.5.3 (from nemo_toolkit[all])\n",
+      "  Downloading nerfacc-0.5.3-py3-none-any.whl (54 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.6/54.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting open-clip-torch (from nemo_toolkit[all])\n",
+      "  Downloading open_clip_torch-2.24.0-py3-none-any.whl (1.5 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m58.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting PyMCubes (from nemo_toolkit[all])\n",
+      "  Downloading PyMCubes-0.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (274 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m274.3/274.3 kB\u001b[0m \u001b[31m19.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting taming-transformers (from nemo_toolkit[all])\n",
+      "  Downloading taming_transformers-0.0.1-py3-none-any.whl (45 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.6/45.6 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting torchdiffeq (from nemo_toolkit[all])\n",
+      "  Downloading torchdiffeq-0.2.3-py3-none-any.whl (31 kB)\n",
+      "Collecting torchsde (from nemo_toolkit[all])\n",
+      "  Downloading torchsde-0.2.6-py3-none-any.whl (61 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.2/61.2 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting trimesh (from nemo_toolkit[all])\n",
+      "  Downloading trimesh-4.4.0-py3-none-any.whl (694 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m694.6/694.6 kB\u001b[0m \u001b[31m39.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting nemo-text-processing (from nemo_toolkit[all])\n",
+      "  Downloading nemo_text_processing-1.0.2-py3-none-any.whl (2.6 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m44.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting mypy-extensions>=0.4.3 (from black~=24.3->nemo_toolkit[all])\n",
+      "  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
+      "Collecting pathspec>=0.9.0 (from black~=24.3->nemo_toolkit[all])\n",
+      "  Downloading pathspec-0.12.1-py3-none-any.whl (31 kB)\n",
+      "Requirement already satisfied: platformdirs>=2 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (4.2.2)\n",
+      "Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (2.0.1)\n",
+      "Requirement already satisfied: typing-extensions>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (4.11.0)\n",
+      "Collecting ninja (from causal-conv1d>=1.2.0->nemo_toolkit[all])\n",
+      "  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m25.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (7.1.0)\n",
+      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (3.14.0)\n",
+      "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (2023.12.25)\n",
+      "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (2.31.0)\n",
+      "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (0.4.3)\n",
+      "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (9.4.0)\n",
+      "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.20.3->nemo_toolkit[all]) (2023.6.0)\n",
+      "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.20.3->nemo_toolkit[all]) (6.0.1)\n",
+      "Collecting antlr4-python3-runtime==4.9.* (from hydra-core<=1.3.2,>1.3->nemo_toolkit[all])\n",
+      "  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "INFO: pip is looking at multiple versions of jiwer to determine which version is compatible with other requirements. This could take a while.\n",
+      "Collecting jiwer (from nemo_toolkit[all])\n",
+      "  Downloading jiwer-3.0.3-py3-none-any.whl (21 kB)\n",
+      "  Downloading jiwer-3.0.2-py3-none-any.whl (21 kB)\n",
+      "  Downloading jiwer-3.0.1-py3-none-any.whl (21 kB)\n",
+      "  Downloading jiwer-3.0.0-py3-none-any.whl (21 kB)\n",
+      "  Downloading jiwer-2.6.0-py3-none-any.whl (20 kB)\n",
+      "  Downloading jiwer-2.5.2-py3-none-any.whl (15 kB)\n",
+      "Collecting rapidfuzz (from nemo_toolkit[all])\n",
+      "  Downloading rapidfuzz-2.13.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m63.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from lhotse>=1.22.0->nemo_toolkit[all]) (3.0.1)\n",
+      "Collecting cytoolz>=0.10.1 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
+      "  Downloading cytoolz-0.12.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m63.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting intervaltree>=3.1.0 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
+      "  Downloading intervaltree-3.1.0.tar.gz (32 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting lilcom>=1.1.0 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
+      "  Downloading lilcom-1.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (87 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.1/87.1 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.4.2)\n",
+      "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (4.4.2)\n",
+      "Requirement already satisfied: pooch>=1.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.8.1)\n",
+      "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (0.3.7)\n",
+      "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (0.4)\n",
+      "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.0.8)\n",
+      "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (1.2.1)\n",
+      "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (0.12.1)\n",
+      "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (4.51.0)\n",
+      "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (1.4.5)\n",
+      "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (3.1.2)\n",
+      "Requirement already satisfied: rich>=12 in /usr/local/lib/python3.10/dist-packages (from nerfacc>=0.5.3->nemo_toolkit[all]) (13.7.1)\n",
+      "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->nemo_toolkit[all]) (0.41.1)\n",
+      "Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.10/dist-packages (from onnx>=1.7.0->nemo_toolkit[all]) (3.20.3)\n",
+      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil->nemo_toolkit[all]) (1.16.0)\n",
+      "Collecting lightning-utilities>=0.8.0 (from pytorch-lightning>=2.2.1->nemo_toolkit[all])\n",
+      "  Downloading lightning_utilities-0.11.2-py3-none-any.whl (26 kB)\n",
+      "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->nemo_toolkit[all]) (3.5.0)\n",
+      "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile->nemo_toolkit[all]) (1.16.0)\n",
+      "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (1.12)\n",
+      "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (3.3)\n",
+      "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (3.1.4)\n",
+      "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
+      "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
+      "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
+      "Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
+      "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
+      "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
+      "Collecting nvidia-curand-cu12==10.3.2.106 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
+      "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
+      "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
+      "Collecting nvidia-nccl-cu12==2.20.5 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n",
+      "Collecting nvidia-nvtx-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
+      "  Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
+      "Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (2.3.0)\n",
+      "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch->nemo_toolkit[all])\n",
+      "  Downloading nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl (21.3 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m57.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers<=4.40.2,>=4.36.0->nemo_toolkit[all]) (0.19.1)\n",
+      "Collecting botocore<1.35.0,>=1.34.113 (from boto3->nemo_toolkit[all])\n",
+      "  Downloading botocore-1.34.113-py3-none-any.whl (12.3 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m63.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3->nemo_toolkit[all])\n",
+      "  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n",
+      "Collecting s3transfer<0.11.0,>=0.10.0 (from boto3->nemo_toolkit[all])\n",
+      "  Downloading s3transfer-0.10.1-py3-none-any.whl (82 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.2/82.2 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (14.0.2)\n",
+      "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (0.6)\n",
+      "Collecting dill<0.3.9,>=0.3.0 (from datasets->nemo_toolkit[all])\n",
+      "  Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting xxhash (from datasets->nemo_toolkit[all])\n",
+      "  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting multiprocess (from datasets->nemo_toolkit[all])\n",
+      "  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (3.9.5)\n",
+      "Collecting pybind11>=2.2 (from fasttext->nemo_toolkit[all])\n",
+      "  Using cached pybind11-2.12.0-py3-none-any.whl (234 kB)\n",
+      "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from fiddle->nemo_toolkit[all]) (1.4.0)\n",
+      "Requirement already satisfied: graphviz in /usr/local/lib/python3.10/dist-packages (from fiddle->nemo_toolkit[all]) (0.20.3)\n",
+      "Collecting libcst (from fiddle->nemo_toolkit[all])\n",
+      "  Downloading libcst-1.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m72.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting aniso8601>=0.82 (from flask-restful->nemo_toolkit[all])\n",
+      "  Downloading aniso8601-9.0.1-py2.py3-none-any.whl (52 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.8/52.8 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: Flask>=0.8 in /usr/local/lib/python3.10/dist-packages (from flask-restful->nemo_toolkit[all]) (2.2.5)\n",
+      "Requirement already satisfied: pytz in /usr/local/lib/python3.10/dist-packages (from flask-restful->nemo_toolkit[all]) (2023.4)\n",
+      "Requirement already satisfied: wcwidth<0.3.0,>=0.2.12 in /usr/local/lib/python3.10/dist-packages (from ftfy->nemo_toolkit[all]) (0.2.13)\n",
+      "Collecting distance>=0.1.3 (from g2p-en->nemo_toolkit[all])\n",
+      "  Downloading Distance-0.1.3.tar.gz (180 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m180.3/180.3 kB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Requirement already satisfied: pydantic>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from inflect->nemo_toolkit[all]) (2.7.1)\n",
+      "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from gdown->nemo_toolkit[all]) (4.12.3)\n",
+      "Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (5.5.6)\n",
+      "Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (0.2.0)\n",
+      "Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (5.7.1)\n",
+      "Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (3.6.6)\n",
+      "Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (7.34.0)\n",
+      "Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (3.0.10)\n",
+      "Collecting kornia-rs>=0.1.0 (from kornia->nemo_toolkit[all])\n",
+      "  Downloading kornia_rs-0.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m75.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting cdifflib (from nemo-text-processing->nemo_toolkit[all])\n",
+      "  Downloading cdifflib-1.2.6.tar.gz (11 kB)\n",
+      "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+      "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+      "  Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
+      "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting pynini==2.1.5 (from nemo-text-processing->nemo_toolkit[all])\n",
+      "  Downloading pynini-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161.3 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m161.3/161.3 MB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: Cython>=0.29 in /usr/local/lib/python3.10/dist-packages (from pynini==2.1.5->nemo-text-processing->nemo_toolkit[all]) (3.0.10)\n",
+      "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from open-clip-torch->nemo_toolkit[all]) (0.18.0+cu121)\n",
+      "Collecting timm (from open-clip-torch->nemo_toolkit[all])\n",
+      "  Downloading timm-1.0.3-py3-none-any.whl (2.3 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m58.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->nemo_toolkit[all]) (2024.1)\n",
+      "Requirement already satisfied: sortedcontainers>=2.0.4 in /usr/local/lib/python3.10/dist-packages (from pyannote.core->nemo_toolkit[all]) (2.4.0)\n",
+      "Collecting pyannote.database>=4.0.1 (from pyannote.metrics->nemo_toolkit[all])\n",
+      "  Downloading pyannote.database-5.1.0-py3-none-any.whl (48 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.1/48.1 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting docopt>=0.6.2 (from pyannote.metrics->nemo_toolkit[all])\n",
+      "  Downloading docopt-0.6.2.tar.gz (25 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Requirement already satisfied: future>=0.16.0 in /usr/local/lib/python3.10/dist-packages (from pyloudnorm->nemo_toolkit[all]) (0.18.3)\n",
+      "Requirement already satisfied: iniconfig in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (2.0.0)\n",
+      "Requirement already satisfied: pluggy<2.0,>=0.12 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (1.5.0)\n",
+      "Requirement already satisfied: exceptiongroup>=1.0.0rc8 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (1.2.1)\n",
+      "Collecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml->nemo_toolkit[all])\n",
+      "  Downloading ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (526 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.7/526.7 kB\u001b[0m \u001b[31m30.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting portalocker (from sacrebleu->nemo_toolkit[all])\n",
+      "  Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)\n",
+      "Collecting colorama (from sacrebleu->nemo_toolkit[all])\n",
+      "  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
+      "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from sacrebleu->nemo_toolkit[all]) (4.9.4)\n",
+      "Requirement already satisfied: sphinxcontrib-applehelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.8)\n",
+      "Requirement already satisfied: sphinxcontrib-devhelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.6)\n",
+      "Requirement already satisfied: sphinxcontrib-jsmath in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.1)\n",
+      "Requirement already satisfied: sphinxcontrib-htmlhelp>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.0.5)\n",
+      "Requirement already satisfied: sphinxcontrib-serializinghtml>=1.1.5 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.1.10)\n",
+      "Requirement already satisfied: sphinxcontrib-qthelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.7)\n",
+      "Requirement already satisfied: Pygments>=2.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.16.1)\n",
+      "Requirement already satisfied: docutils<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (0.18.1)\n",
+      "Requirement already satisfied: snowballstemmer>=1.1 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.2.0)\n",
+      "Requirement already satisfied: babel>=1.3 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.15.0)\n",
+      "Requirement already satisfied: alabaster<0.8,>=0.7 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (0.7.16)\n",
+      "Requirement already satisfied: imagesize in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.4.1)\n",
+      "Collecting docutils<0.19,>=0.14 (from sphinx->nemo_toolkit[all])\n",
+      "  Downloading docutils-0.17.1-py2.py3-none-any.whl (575 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m575.5/575.5 kB\u001b[0m \u001b[31m34.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pybtex>=0.24 (from sphinxcontrib-bibtex->nemo_toolkit[all])\n",
+      "  Downloading pybtex-0.24.0-py2.py3-none-any.whl (561 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m561.4/561.4 kB\u001b[0m \u001b[31m32.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pybtex-docutils>=1.0.0 (from sphinxcontrib-bibtex->nemo_toolkit[all])\n",
+      "  Downloading pybtex_docutils-1.0.3-py3-none-any.whl (6.4 kB)\n",
+      "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (1.64.0)\n",
+      "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (2.27.0)\n",
+      "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (1.2.0)\n",
+      "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (3.6)\n",
+      "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (0.7.2)\n",
+      "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (3.0.3)\n",
+      "Collecting plac (from texterrors->nemo_toolkit[all])\n",
+      "  Downloading plac-1.4.3-py2.py3-none-any.whl (22 kB)\n",
+      "Collecting loguru (from texterrors->nemo_toolkit[all])\n",
+      "  Downloading loguru-0.7.2-py3-none-any.whl (62 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from texterrors->nemo_toolkit[all]) (2.4.0)\n",
+      "Collecting Levenshtein (from texterrors->nemo_toolkit[all])\n",
+      "  Downloading Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting trampoline>=0.1.2 (from torchsde->nemo_toolkit[all])\n",
+      "  Downloading trampoline-0.1.2-py3-none-any.whl (5.2 kB)\n",
+      "Collecting docker-pycreds>=0.4.0 (from wandb->nemo_toolkit[all])\n",
+      "  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n",
+      "Collecting gitpython!=3.1.29,>=1.0.0 (from wandb->nemo_toolkit[all])\n",
+      "  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.3/207.3 kB\u001b[0m \u001b[31m19.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb->nemo_toolkit[all]) (5.9.5)\n",
+      "Collecting sentry-sdk>=1.0.0 (from wandb->nemo_toolkit[all])\n",
+      "  Downloading sentry_sdk-2.3.1-py2.py3-none-any.whl (289 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m289.0/289.0 kB\u001b[0m \u001b[31m25.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting setproctitle (from wandb->nemo_toolkit[all])\n",
+      "  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n",
+      "Collecting asciitree (from zarr->nemo_toolkit[all])\n",
+      "  Downloading asciitree-0.3.3.tar.gz (4.0 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "Collecting numcodecs>=0.10.0 (from zarr->nemo_toolkit[all])\n",
+      "  Downloading numcodecs-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.7 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m86.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting fasteners (from zarr->nemo_toolkit[all])\n",
+      "  Downloading fasteners-0.19-py3-none-any.whl (18 kB)\n",
+      "Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /usr/local/lib/python3.10/dist-packages (from botocore<1.35.0,>=1.34.113->boto3->nemo_toolkit[all]) (2.0.7)\n",
+      "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile->nemo_toolkit[all]) (2.22)\n",
+      "Requirement already satisfied: toolz>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from cytoolz>=0.10.1->lhotse>=1.22.0->nemo_toolkit[all]) (0.12.1)\n",
+      "Requirement already satisfied: itsdangerous>=2.0 in /usr/local/lib/python3.10/dist-packages (from Flask>=0.8->flask-restful->nemo_toolkit[all]) (2.2.0)\n",
+      "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.3.1)\n",
+      "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (23.2.0)\n",
+      "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.4.1)\n",
+      "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (6.0.5)\n",
+      "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.9.4)\n",
+      "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (4.0.3)\n",
+      "Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb->nemo_toolkit[all])\n",
+      "  Downloading gitdb-4.0.11-py3-none-any.whl (62 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (5.3.3)\n",
+      "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (0.4.0)\n",
+      "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (4.9)\n",
+      "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard->nemo_toolkit[all]) (1.3.1)\n",
+      "Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets->nemo_toolkit[all]) (6.1.12)\n",
+      "Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets->nemo_toolkit[all]) (6.3.3)\n",
+      "Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all])\n",
+      "  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m64.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.7.5)\n",
+      "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (3.0.43)\n",
+      "Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.2.0)\n",
+      "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.1.7)\n",
+      "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (4.9.0)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->nemo_toolkit[all]) (2.1.5)\n",
+      "Collecting typer>=0.12.1 (from pyannote.database>=4.0.1->pyannote.metrics->nemo_toolkit[all])\n",
+      "  Downloading typer-0.12.3-py3-none-any.whl (47 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting latexcodec>=1.0.4 (from pybtex>=0.24->sphinxcontrib-bibtex->nemo_toolkit[all])\n",
+      "  Downloading latexcodec-3.0.0-py3-none-any.whl (18 kB)\n",
+      "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->nemo_toolkit[all]) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->nemo_toolkit[all]) (2.18.2)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (3.3.2)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (3.7)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (2024.2.2)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=12->nerfacc>=0.5.3->nemo_toolkit[all]) (3.0.0)\n",
+      "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->nemo_toolkit[all]) (1.3.0)\n",
+      "Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.5.5)\n",
+      "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->gdown->nemo_toolkit[all]) (2.5)\n",
+      "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata->diffusers>=0.19.3->nemo_toolkit[all]) (3.18.2)\n",
+      "INFO: pip is looking at multiple versions of levenshtein to determine which version is compatible with other requirements. This could take a while.\n",
+      "Collecting Levenshtein (from texterrors->nemo_toolkit[all])\n",
+      "  Downloading Levenshtein-0.25.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Downloading Levenshtein-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m19.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Downloading Levenshtein-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (169 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m169.4/169.4 kB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Downloading Levenshtein-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (172 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m172.9/172.9 kB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (1.7.1)\n",
+      "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb->nemo_toolkit[all])\n",
+      "  Downloading smmap-5.0.1-py3-none-any.whl (24 kB)\n",
+      "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.8.4)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=12->nerfacc>=0.5.3->nemo_toolkit[all]) (0.1.2)\n",
+      "Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (24.0.1)\n",
+      "Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (23.1.0)\n",
+      "Requirement already satisfied: jupyter-core>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (5.7.2)\n",
+      "Requirement already satisfied: nbformat in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (5.10.4)\n",
+      "Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.5.4)\n",
+      "Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.6.0)\n",
+      "Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.8.3)\n",
+      "Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.18.1)\n",
+      "Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.20.0)\n",
+      "Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.0.0)\n",
+      "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.7.0)\n",
+      "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (0.6.0)\n",
+      "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard->nemo_toolkit[all]) (3.2.2)\n",
+      "Collecting shellingham>=1.3.0 (from typer>=0.12.1->pyannote.database>=4.0.1->pyannote.metrics->nemo_toolkit[all])\n",
+      "  Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n",
+      "Requirement already satisfied: jupyter-server>=1.8 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.24.0)\n",
+      "Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.2.4)\n",
+      "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.1.0)\n",
+      "Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.7.1)\n",
+      "Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.4)\n",
+      "Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.3.0)\n",
+      "Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.8.4)\n",
+      "Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.10.0)\n",
+      "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.5.1)\n",
+      "Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.3.0)\n",
+      "Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (2.19.1)\n",
+      "Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (4.19.2)\n",
+      "Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (21.2.0)\n",
+      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (2023.12.1)\n",
+      "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.35.1)\n",
+      "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.18.1)\n",
+      "Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (3.7.1)\n",
+      "Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.8.0)\n",
+      "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.5.1)\n",
+      "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.3.1)\n",
+      "Building wheels for collected packages: causal-conv1d, antlr4-python3-runtime, progress, clip, fasttext, kaldi-python-io, nemo_toolkit, rouge-score, sox, distance, docopt, intervaltree, asciitree, cdifflib\n",
+      "  Building wheel for causal-conv1d (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for causal-conv1d: filename=causal_conv1d-1.2.2.post1-cp310-cp310-linux_x86_64.whl size=103643300 sha256=2bba8823ae89bd79c2d067978e0e533fab8298f69855bfc5d199828b278cf66c\n",
+      "  Stored in directory: /root/.cache/pip/wheels/22/a7/db/0c9482dec3707ad23181b0eb2da40e4b8f26aaed49752fc49f\n",
+      "  Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=5ba620ca9da88d714c879b4a21820b9bdebd36fb76051b0b48a375e2e4f0fcb7\n",
+      "  Stored in directory: /root/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n",
+      "  Building wheel for progress (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for progress: filename=progress-1.6-py3-none-any.whl size=9614 sha256=8102705b8ef612530f059a82dde5ea899c85e387fb8c5e956ed0fef5f2929103\n",
+      "  Stored in directory: /root/.cache/pip/wheels/a2/68/5f/c339b20a41659d856c93ccdce6a33095493eb82c3964aac5a1\n",
+      "  Building wheel for clip (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for clip: filename=clip-0.2.0-py3-none-any.whl size=6989 sha256=d8ab11e4cbc0837cde86e4c7011ffabab187b9937f98e39480bb87ec75a34740\n",
+      "  Stored in directory: /root/.cache/pip/wheels/7f/5c/e6/2c0fdb453a3569188864b17e9676bea8b3b7e160c037117869\n",
+      "  Building wheel for fasttext (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for fasttext: filename=fasttext-0.9.2-cp310-cp310-linux_x86_64.whl size=4227136 sha256=71dc3f2989afb1a6f206ee64ae86bfcfa8381c66960e93ac984be24f2871c66b\n",
+      "  Stored in directory: /root/.cache/pip/wheels/a5/13/75/f811c84a8ab36eedbaef977a6a58a98990e8e0f1967f98f394\n",
+      "  Building wheel for kaldi-python-io (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for kaldi-python-io: filename=kaldi_python_io-1.2.2-py3-none-any.whl size=8949 sha256=959e8f93e517267e62e51f1e26455214c6b2aba320bb5621fa506730d4ad2ceb\n",
+      "  Stored in directory: /root/.cache/pip/wheels/b7/23/5f/49d3a826be576faf61d84e8028e1914bb36a5586ee2613b087\n",
+      "  Building wheel for nemo_toolkit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for nemo_toolkit: filename=nemo_toolkit-2.0.0rc1-py3-none-any.whl size=3709778 sha256=458c9cb158a12a8ddc8c570fde72f15111afe5767ac2a0e485966d2d76e1bda8\n",
+      "  Stored in directory: /tmp/pip-ephem-wheel-cache-992hxcpb/wheels/c3/4e/45/ab3d29aa73df619f27b371cacf809d5330a18f794879163c1b\n",
+      "  Building wheel for rouge-score (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=076fda87c1a21e7a9fe88f0b3b9a26f7b76171063d76812353b4a30ebe02da51\n",
+      "  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n",
+      "  Building wheel for sox (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for sox: filename=sox-1.5.0-py3-none-any.whl size=40038 sha256=717f5186772b8ce84cbbf7b1a01931be688fc591982575b518b2bc327460675d\n",
+      "  Stored in directory: /root/.cache/pip/wheels/74/e7/7b/8033be3ec5e4994595d01269fc9657c8fd83a0dcbf8536666a\n",
+      "  Building wheel for distance (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for distance: filename=Distance-0.1.3-py3-none-any.whl size=16258 sha256=4863022ee11d6ede70f4b4362c6554629a2e734cb7a0a0212904aeafac36f78e\n",
+      "  Stored in directory: /root/.cache/pip/wheels/e8/bb/de/f71bf63559ea9a921059a5405806f7ff6ed612a9231c4a9309\n",
+      "  Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13706 sha256=80b5355530de1ea759d79fc19047cdd59679e6b0a014b51dbca811111b1aad36\n",
+      "  Stored in directory: /root/.cache/pip/wheels/fc/ab/d4/5da2067ac95b36618c629a5f93f809425700506f72c9732fac\n",
+      "  Building wheel for intervaltree (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for intervaltree: filename=intervaltree-3.1.0-py2.py3-none-any.whl size=26096 sha256=e831b80cc0232f925c293997e7be035697c58f5a834060f1b1f6a097fa5502b7\n",
+      "  Stored in directory: /root/.cache/pip/wheels/fa/80/8c/43488a924a046b733b64de3fac99252674c892a4c3801c0a61\n",
+      "  Building wheel for asciitree (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for asciitree: filename=asciitree-0.3.3-py3-none-any.whl size=5034 sha256=483b21d8a257179dcd00b430bc43c6fce9f97d0442b44433ac2794d3030a48e1\n",
+      "  Stored in directory: /root/.cache/pip/wheels/7f/4e/be/1171b40f43b918087657ec57cf3b81fa1a2e027d8755baa184\n",
+      "  Building wheel for cdifflib (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+      "  Created wheel for cdifflib: filename=cdifflib-1.2.6-cp310-cp310-linux_x86_64.whl size=27681 sha256=a4929dc925e36d0e71a89e124ce85f0ada2ec5862708eb18c9136da35649ebc1\n",
+      "  Stored in directory: /root/.cache/pip/wheels/87/a7/fd/8061e24ed08689045cb6d1ca303768dc463b20a5a338174841\n",
+      "Successfully built causal-conv1d antlr4-python3-runtime progress clip fasttext kaldi-python-io nemo_toolkit rouge-score sox distance docopt intervaltree asciitree cdifflib\n",
+      "Installing collected packages: trampoline, pydub, progress, plac, pangu, opencc, ninja, ijson, docopt, distance, clip, braceexpand, asciitree, antlr4-python3-runtime, aniso8601, addict, xxhash, webdataset, trimesh, textdistance, sox, smmap, shellingham, setproctitle, sentry-sdk, ruamel.yaml.clib, rapidfuzz, pytest-runner, pypinyin, pynini, pybind11, portalocker, pathspec, parameterized, onnx, omegaconf, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numcodecs, mypy-extensions, marshmallow, markdown2, loguru, lilcom, lightning-utilities, libcst, latexcodec, kornia-rs, kaldiio, kaldi-python-io, jmespath, jedi, isort, intervaltree, ftfy, fasteners, faiss-cpu, einops, docutils, docker-pycreds, dill, decord, cytoolz, colorama, click, cdifflib, attrdict, zarr, sacremoses, sacrebleu, ruamel.yaml, resampy, pytest-mock, pypinyin-dict, PyMCubes, pyloudnorm, pybtex, pyannote.core, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, Levenshtein, jiwer, hydra-core, gitdb, fiddle, fasttext, einops-exts, botocore, black, typer, texterrors, s3transfer, rouge-score, pybtex-docutils, nvidia-cusolver-cu12, lhotse, gitpython, flask-restful, diffusers, wandb, transformers, sphinxcontrib-bibtex, pyannote.database, g2p-en, datasets, boto3, torchsde, torchmetrics, torchdiffeq, sentence-transformers, pyannote.metrics, nerfacc, nemo_toolkit, nemo-text-processing, kornia, causal-conv1d, accelerated-scan, timm, pytorch-lightning, taming-transformers, open-clip-torch\n",
+      "  Attempting uninstall: docutils\n",
+      "    Found existing installation: docutils 0.18.1\n",
+      "    Uninstalling docutils-0.18.1:\n",
+      "      Successfully uninstalled docutils-0.18.1\n",
+      "  Attempting uninstall: click\n",
+      "    Found existing installation: click 8.1.7\n",
+      "    Uninstalling click-8.1.7:\n",
+      "      Successfully uninstalled click-8.1.7\n",
+      "  Attempting uninstall: typer\n",
+      "    Found existing installation: typer 0.9.4\n",
+      "    Uninstalling typer-0.9.4:\n",
+      "      Successfully uninstalled typer-0.9.4\n",
+      "  Attempting uninstall: transformers\n",
+      "    Found existing installation: transformers 4.41.0\n",
+      "    Uninstalling transformers-4.41.0:\n",
+      "      Successfully uninstalled transformers-4.41.0\n",
+      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+      "spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
+      "weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\u001b[0m\u001b[31m\n",
+      "\u001b[0mSuccessfully installed Levenshtein-0.22.0 PyMCubes-0.1.4 accelerated-scan-0.2.0 addict-2.4.0 aniso8601-9.0.1 antlr4-python3-runtime-4.9.3 asciitree-0.3.3 attrdict-2.0.1 black-24.4.2 boto3-1.34.113 botocore-1.34.113 braceexpand-0.1.7 causal-conv1d-1.2.2.post1 cdifflib-1.2.6 click-8.0.2 clip-0.2.0 colorama-0.4.6 cytoolz-0.12.3 datasets-2.19.1 decord-0.6.0 diffusers-0.28.0 dill-0.3.8 distance-0.1.3 docker-pycreds-0.4.0 docopt-0.6.2 docutils-0.17.1 einops-0.8.0 einops-exts-0.0.4 faiss-cpu-1.8.0 fasteners-0.19 fasttext-0.9.2 fiddle-0.3.0 flask-restful-0.3.10 ftfy-6.2.0 g2p-en-2.1.0 gitdb-4.0.11 gitpython-3.1.43 hydra-core-1.3.2 ijson-3.2.3 intervaltree-3.1.0 isort-5.13.2 jedi-0.19.1 jiwer-2.5.2 jmespath-1.0.1 kaldi-python-io-1.2.2 kaldiio-2.18.0 kornia-0.7.2 kornia-rs-0.1.3 latexcodec-3.0.0 lhotse-1.23.0 libcst-1.4.0 lightning-utilities-0.11.2 lilcom-1.7 loguru-0.7.2 markdown2-2.4.13 marshmallow-3.21.2 multiprocess-0.70.16 mypy-extensions-1.0.0 nemo-text-processing-1.0.2 nemo_toolkit-2.0.0rc1 nerfacc-0.5.3 ninja-1.11.1.1 numcodecs-0.12.1 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.40 nvidia-nvtx-cu12-12.1.105 omegaconf-2.3.0 onnx-1.16.1 open-clip-torch-2.24.0 opencc-1.1.6 pangu-4.0.6.1 parameterized-0.9.0 pathspec-0.12.1 plac-1.4.3 portalocker-2.8.2 progress-1.6 pyannote.core-5.0.0 pyannote.database-5.1.0 pyannote.metrics-3.2.1 pybind11-2.12.0 pybtex-0.24.0 pybtex-docutils-1.0.3 pydub-0.25.1 pyloudnorm-0.1.1 pynini-2.1.5 pypinyin-0.51.0 pypinyin-dict-0.8.0 pytest-mock-3.14.0 pytest-runner-6.0.1 pytorch-lightning-2.2.5 rapidfuzz-2.13.7 resampy-0.4.3 rouge-score-0.1.2 ruamel.yaml-0.18.6 ruamel.yaml.clib-0.2.8 s3transfer-0.10.1 sacrebleu-2.4.2 sacremoses-0.1.1 sentence-transformers-2.7.0 sentry-sdk-2.3.1 setproctitle-1.3.3 shellingham-1.5.4 smmap-5.0.1 sox-1.5.0 sphinxcontrib-bibtex-2.6.2 taming-transformers-0.0.1 textdistance-4.6.2 texterrors-0.4.4 timm-1.0.3 torchdiffeq-0.2.3 torchmetrics-1.4.0.post0 torchsde-0.2.6 trampoline-0.1.2 transformers-4.40.2 trimesh-4.4.0 typer-0.12.3 wandb-0.17.0 webdataset-0.2.86 xxhash-3.4.1 zarr-2.18.2\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install wget\n",
+    "!apt-get install sox libsndfile1 ffmpeg\n",
+    "\n",
+    "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@1fa961ba03ab5f8c91b278640e29807079373372#egg=nemo_toolkit[all]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "import hydra\n",
+    "import soundfile as sf\n",
+    "import torch\n",
+    "from omegaconf import OmegaConf"
+   ],
+   "metadata": {
+    "id": "cBz_fQ6KbzrZ"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Downloading config, weights and audio example"
+   ],
+   "metadata": {
+    "id": "3Fvy0phvhr0G"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "import locale\n",
+    "\n",
+    "locale.getpreferredencoding = lambda: \"UTF-8\"\n",
+    "\n",
+    "# Loading weights, config and example wav for CTC-model\n",
+    "!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ssl_model_weights.ckpt\n",
+    "!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/encoder_config.yaml\n",
+    "!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "0EHgk_I6hrGI",
+    "outputId": "4c9ac38d-eeca-4da4-af26-aa41becfed00"
+   },
+   "execution_count": null,
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "--2024-05-28 07:12:41--  https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav\n",
+      "Resolving n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)... 37.230.193.192\n",
+      "Connecting to n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)|37.230.193.192|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 361324 (353K) [application/octet-stream]\n",
+      "Saving to: ‘example.wav’\n",
+      "\n",
+      "example.wav         100%[===================>] 352.86K   583KB/s    in 0.6s    \n",
+      "\n",
+      "2024-05-28 07:12:42 (583 KB/s) - ‘example.wav’ saved [361324/361324]\n",
+      "\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Model instantiating and *inference*"
+   ],
+   "metadata": {
+    "id": "FUA6Ah1blyHv"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "class SpecScaler(torch.nn.Module):\n",
+    "    def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
+    "        return torch.log(x.clamp_(1e-9, 1e9))\n",
+    "\n",
+    "\n",
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+    "encoder_config = \"encoder_config.yaml\"\n",
+    "model_weights = \"ssl_model_weights.ckpt\"\n",
+    "audio_path = \"example.wav\"\n",
+    "\n",
+    "conf = OmegaConf.load(encoder_config)\n",
+    "\n",
+    "encoder = hydra.utils.instantiate(conf.encoder)\n",
+    "ckpt = torch.load(model_weights, map_location=\"cpu\")\n",
+    "encoder.load_state_dict(ckpt, strict=True)\n",
+    "encoder.to(device)\n",
+    "\n",
+    "feature_extractor = hydra.utils.instantiate(conf.feature_extractor)\n",
+    "\n",
+    "audio_signal, _ = sf.read(audio_path, dtype=\"float32\")\n",
+    "features = feature_extractor(torch.tensor(audio_signal).float())\n",
+    "features = features.to(device)\n",
+    "\n",
+    "encoded, _ = encoder.forward(\n",
+    "    audio_signal=features.unsqueeze(0),\n",
+    "    length=torch.tensor([features.shape[-1]]).to(device),\n",
+    ")\n",
+    "print(f\"encoded signal shape: {encoded.shape}\")"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "AsUapeJKh3cz",
+    "outputId": "ee5bc82b-4526-4364-ef7c-decd59cdbc5f"
+   },
+   "execution_count": null,
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "encoded signal shape: torch.Size([1, 768, 283])\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [],
+   "metadata": {
+    "id": "p1yWHEU5Dn60"
+   },
+   "execution_count": null,
+   "outputs": []
+  }
+ ]
+}

Examples/notebooks/GigaAM_RNNT_Model_Usage_Example.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Examples/rnnt_inference.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import argparse
+import torch
+import torchaudio
+from nemo.collections.asr.models import EncDecRNNTBPEModel
+from nemo.collections.asr.modules.audio_preprocessing import (
+    AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
+)
+from nemo.collections.asr.parts.preprocessing.features import (
+    FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
+)
+from omegaconf import OmegaConf, open_dict
+class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
+    def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
+        if "window_size" in kwargs:
+            del kwargs["window_size"]
+        if "window_stride" in kwargs:
+            del kwargs["window_stride"]
+        super().__init__(**kwargs)
+        self._mel_spec_extractor = torchaudio.transforms.MelSpectrogram(
+            sample_rate=self._sample_rate,
+            win_length=self.win_length,
+            hop_length=self.hop_length,
+            n_mels=kwargs["nfilt"],
+            window_fn=self.torch_windows[kwargs["window"]],
+            mel_scale=mel_scale,
+            norm=kwargs["mel_norm"],
+            n_fft=kwargs["n_fft"],
+            f_max=kwargs.get("highfreq", None),
+            f_min=kwargs.get("lowfreq", 0),
+            wkwargs=wkwargs,
+        )
+class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
+    def __init__(self, mel_scale: str = "htk", **kwargs):
+        super().__init__(**kwargs)
+        kwargs["nfilt"] = kwargs["features"]
+        del kwargs["features"]
+        self.featurizer = (
+            FilterbankFeaturesTA(  # Deprecated arguments; kept for config compatibility
+                mel_scale=mel_scale,
+                **kwargs,
+            )
+        )
+def _parse_args():
+    parser = argparse.ArgumentParser(
+        description="Run inference using GigaAM-RNNT checkpoint"
+    )
+    parser.add_argument(
+        "--model_config", help="Path to GigaAM-RNNT config file (.yaml)"
+    )
+    parser.add_argument(
+        "--model_weights", help="Path to GigaAM-RNNT checkpoint file (.ckpt)"
+    )
+    parser.add_argument("--tokenizer_path", help="Path to tokenizer directory")
+    parser.add_argument("--audio_path", help="Path to audio signal")
+    parser.add_argument("--device", help="Device: cpu / cuda")
+    return parser.parse_args()
+def main(
+    model_config: str,
+    model_weights: str,
+    tokenizer_path: str,
+    device: str,
+    audio_path: str,
+):
+    config = OmegaConf.load(model_config)
+    with open_dict(config):
+        config.tokenizer.dir = tokenizer_path
+    model = EncDecRNNTBPEModel.from_config_dict(config)
+    ckpt = torch.load(model_weights, map_location="cpu")
+    model.load_state_dict(ckpt, strict=False)
+    model = model.to(device)
+    model.eval()
+    transcription = model.transcribe([audio_path])[0][0]
+    print(f"transcription: {transcription}")
+if __name__ == "__main__":
+    args = _parse_args()
+    main(
+        model_config=args.model_config,
+        model_weights=args.model_weights,
+        tokenizer_path=args.tokenizer_path,
+        device=args.device,
+        audio_path=args.audio_path,
+    )

Examples/rnnt_longform_inference.py ADDED Viewed

	@@ -0,0 +1,210 @@

+import argparse
+from io import BytesIO
+from typing import List, Tuple
+import numpy as np
+import torch
+import torchaudio
+from nemo.collections.asr.models import EncDecRNNTBPEModel
+from nemo.collections.asr.modules.audio_preprocessing import (
+    AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
+)
+from nemo.collections.asr.parts.preprocessing.features import (
+    FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
+)
+from omegaconf import OmegaConf, open_dict
+from pyannote.audio import Pipeline
+from pydub import AudioSegment
+class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
+    def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
+        if "window_size" in kwargs:
+            del kwargs["window_size"]
+        if "window_stride" in kwargs:
+            del kwargs["window_stride"]
+        super().__init__(**kwargs)
+        self._mel_spec_extractor = torchaudio.transforms.MelSpectrogram(
+            sample_rate=self._sample_rate,
+            win_length=self.win_length,
+            hop_length=self.hop_length,
+            n_mels=kwargs["nfilt"],
+            window_fn=self.torch_windows[kwargs["window"]],
+            mel_scale=mel_scale,
+            norm=kwargs["mel_norm"],
+            n_fft=kwargs["n_fft"],
+            f_max=kwargs.get("highfreq", None),
+            f_min=kwargs.get("lowfreq", 0),
+            wkwargs=wkwargs,
+        )
+class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
+    def __init__(self, mel_scale: str = "htk", **kwargs):
+        super().__init__(**kwargs)
+        kwargs["nfilt"] = kwargs["features"]
+        del kwargs["features"]
+        self.featurizer = (
+            FilterbankFeaturesTA(  # Deprecated arguments; kept for config compatibility
+                mel_scale=mel_scale,
+                **kwargs,
+            )
+        )
+def audiosegment_to_numpy(audiosegment: AudioSegment) -> np.ndarray:
+    """Convert AudioSegment to numpy array."""
+    samples = np.array(audiosegment.get_array_of_samples())
+    if audiosegment.channels == 2:
+        samples = samples.reshape((-1, 2))
+    samples = samples.astype(np.float32, order="C") / 32768.0
+    return samples
+def format_time(seconds: float) -> str:
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    seconds = seconds % 60
+    full_seconds = int(seconds)
+    milliseconds = int((seconds - full_seconds) * 100)
+    if hours > 0:
+        return f"{hours:02}:{minutes:02}:{full_seconds:02}:{milliseconds:02}"
+    else:
+        return f"{minutes:02}:{full_seconds:02}:{milliseconds:02}"
+def segment_audio(
+    audio_path: str,
+    pipeline: Pipeline,
+    max_duration: float = 22.0,
+    min_duration: float = 15.0,
+    new_chunk_threshold: float = 0.2,
+) -> Tuple[List[np.ndarray], List[List[float]]]:
+    # Prepare audio for pyannote vad pipeline
+    audio = AudioSegment.from_wav(audio_path)
+    audio_bytes = BytesIO()
+    audio.export(audio_bytes, format="wav")
+    audio_bytes.seek(0)
+    # Process audio with pipeline to obtain segments with speech activity
+    sad_segments = pipeline({"uri": "filename", "audio": audio_bytes})
+    segments = []
+    curr_duration = 0
+    curr_start = 0
+    curr_end = 0
+    boundaries = []
+    # Concat segments from pipeline into chunks for asr according to max/min duration
+    for segment in sad_segments.get_timeline().support():
+        start = max(0, segment.start)
+        end = min(len(audio) / 1000, segment.end)
+        if (
+            curr_duration > min_duration and start - curr_end > new_chunk_threshold
+        ) or (curr_duration + (end - curr_end) > max_duration):
+            audio_segment = audiosegment_to_numpy(
+                audio[curr_start * 1000 : curr_end * 1000]
+            )
+            segments.append(audio_segment)
+            boundaries.append([curr_start, curr_end])
+            curr_start = start
+        curr_end = end
+        curr_duration = curr_end - curr_start
+    if curr_duration != 0:
+        audio_segment = audiosegment_to_numpy(
+            audio[curr_start * 1000 : curr_end * 1000]
+        )
+        segments.append(audio_segment)
+        boundaries.append([curr_start, curr_end])
+    return segments, boundaries
+def _parse_args():
+    parser = argparse.ArgumentParser(
+        description="Run long-form inference using GigaAM-RNNT checkpoint"
+    )
+    parser.add_argument(
+        "--model_config", help="Path to GigaAM-RNNT config file (.yaml)"
+    )
+    parser.add_argument(
+        "--model_weights", help="Path to GigaAM-RNNT checkpoint file (.ckpt)"
+    )
+    parser.add_argument("--tokenizer_path", help="Path to tokenizer directory")
+    parser.add_argument("--audio_path", help="Path to audio signal")
+    parser.add_argument(
+        "--hf_token", help="HuggingFace token for using pyannote Pipeline"
+    )
+    parser.add_argument("--device", help="Device: cpu / cuda")
+    parser.add_argument("--fp16", help="Run in FP16 mode", default=True)
+    parser.add_argument(
+        "--batch_size", help="Batch size for acoustic model inference", default=10
+    )
+    return parser.parse_args()
+def main(
+    model_config: str,
+    model_weights: str,
+    tokenizer_path: str,
+    device: str,
+    audio_path: str,
+    hf_token: str,
+    fp16: bool,
+    batch_size: int = 10,
+):
+    # Initialize model
+    config = OmegaConf.load(model_config)
+    with open_dict(config):
+        config.tokenizer.dir = tokenizer_path
+    model = EncDecRNNTBPEModel.from_config_dict(config)
+    ckpt = torch.load(model_weights, map_location="cpu")
+    model.load_state_dict(ckpt, strict=False)
+    model = model.to(device)
+    if device != "cpu" and fp16:
+        model = model.half()
+        model.preprocessor = model.preprocessor.float()
+    model.eval()
+    # Initialize pyannote pipeline
+    pipeline = Pipeline.from_pretrained(
+        "pyannote/voice-activity-detection", use_auth_token=hf_token
+    )
+    pipeline = pipeline.to(torch.device(device))
+    # Segment audio
+    segments, boundaries = segment_audio(audio_path, pipeline)
+    # Transcribe segments
+    transcriptions = []
+    if device != "cpu" and fp16:
+        with torch.autocast(device_type="cuda", dtype=torch.float16):
+            transcriptions = model.transcribe(segments, batch_size=batch_size)[0]
+    else:
+        transcriptions = model.transcribe(segments, batch_size=batch_size)[0]
+    for transcription, boundary in zip(transcriptions, boundaries):
+        print(
+            f"[{format_time(boundary[0])} - {format_time(boundary[1])}]: {transcription}\n"
+        )
+if __name__ == "__main__":
+    args = _parse_args()
+    main(
+        model_config=args.model_config,
+        model_weights=args.model_weights,
+        tokenizer_path=args.tokenizer_path,
+        device=args.device,
+        audio_path=args.audio_path,
+        hf_token=args.hf_token,
+        fp16=args.fp16,
+        batch_size=args.batch_size,
+    )

Examples/ssl_inference.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import argparse
+import hydra
+import soundfile
+import torch
+from omegaconf import OmegaConf
+class SpecScaler(torch.nn.Module):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.log(x.clamp_(1e-9, 1e9))
+def _parse_args():
+    parser = argparse.ArgumentParser(
+        description="Run inference using GigaAM checkpoint"
+    )
+    parser.add_argument("--encoder_config", help="Path to GigaAM config file (.yaml)")
+    parser.add_argument(
+        "--model_weights", help="Path to GigaAM checkpoint file (.ckpt)"
+    )
+    parser.add_argument("--audio_path", help="Path to audio signal")
+    parser.add_argument("--device", help="Device: cpu / cuda")
+    return parser.parse_args()
+def main(encoder_config: str, model_weights: str, device: str, audio_path: str):
+    conf = OmegaConf.load(encoder_config)
+    encoder = hydra.utils.instantiate(conf.encoder)
+    ckpt = torch.load(model_weights, map_location="cpu")
+    encoder.load_state_dict(ckpt, strict=True)
+    encoder.to(device)
+    feature_extractor = hydra.utils.instantiate(conf.feature_extractor)
+    audio_signal, _ = soundfile.read(audio_path, dtype="float32")
+    features = feature_extractor(torch.tensor(audio_signal).float())
+    features = features.to(device)
+    encoded, _ = encoder.forward(
+        audio_signal=features.unsqueeze(0),
+        length=torch.tensor([features.shape[-1]]).to(device),
+    )
+    print(f"encoded signal shape: {encoded.shape}")
+if __name__ == "__main__":
+    args = _parse_args()
+    main(
+        encoder_config=args.encoder_config,
+        model_weights=args.model_weights,
+        device=args.device,
+        audio_path=args.audio_path,
+    )

GigaAM-CTC/ctc_model_config.yaml ADDED Viewed

	@@ -0,0 +1,271 @@

+model_class: enc_dec_ctc_char
+sample_rate: 16000
+log_prediction: true
+ctc_reduction: mean_batch
+labels:
+- ' '
+- а
+- б
+- в
+- г
+- д
+- е
+- ж
+- з
+- и
+- й
+- к
+- л
+- м
+- н
+- о
+- п
+- р
+- с
+- т
+- у
+- ф
+- х
+- ц
+- ч
+- ш
+- щ
+- ъ
+- ы
+- ь
+- э
+- ю
+- я
+preprocessor:
+  _target_: __main__.AudioToMelSpectrogramPreprocessor
+  sample_rate: 16000
+  n_fft: 400
+  n_window_size: 400
+  window_size: null
+  n_window_stride: 160
+  window_stride: null
+  features: 64
+  dither: 0.0
+  preemph: null
+  log: true
+  log_zero_guard_type: clamp
+  normalize: null
+  pad_to: 0
+  mel_norm: null
+  window: hann
+  log_zero_guard_value: 1e-9
+train_ds:
+  batch_size: 10
+  trim_silence: false
+  max_duration: 25.0
+  min_duration: 0.1
+  shuffle: true
+  is_tarred: false
+  num_workers: 8
+  pin_memory: true
+  manifest_filepath: null
+  labels:
+  - ' '
+  - а
+  - б
+  - в
+  - г
+  - д
+  - е
+  - ж
+  - з
+  - и
+  - й
+  - к
+  - л
+  - м
+  - н
+  - о
+  - п
+  - р
+  - с
+  - т
+  - у
+  - ф
+  - х
+  - ц
+  - ч
+  - ш
+  - щ
+  - ъ
+  - ы
+  - ь
+  - э
+  - ю
+  - я
+validation_ds:
+  batch_size: 20
+  shuffle: false
+  num_workers: 4
+  min_duration: 0.1
+  pin_memory: true
+  manifest_filepath: null
+  labels:
+  - ' '
+  - а
+  - б
+  - в
+  - г
+  - д
+  - е
+  - ж
+  - з
+  - и
+  - й
+  - к
+  - л
+  - м
+  - н
+  - о
+  - п
+  - р
+  - с
+  - т
+  - у
+  - ф
+  - х
+  - ц
+  - ч
+  - ш
+  - щ
+  - ъ
+  - ы
+  - ь
+  - э
+  - ю
+  - я
+test_ds:
+  manifest_filepath: null
+  batch_size: 100
+  shuffle: false
+  num_workers: 4
+  pin_memory: true
+  labels:
+  - ' '
+  - а
+  - б
+  - в
+  - г
+  - д
+  - е
+  - ж
+  - з
+  - и
+  - й
+  - к
+  - л
+  - м
+  - н
+  - о
+  - п
+  - р
+  - с
+  - т
+  - у
+  - ф
+  - х
+  - ц
+  - ч
+  - ш
+  - щ
+  - ъ
+  - ы
+  - ь
+  - э
+  - ю
+  - я
+spec_augment:
+  _target_: nemo.collections.asr.modules.SpectrogramAugmentation
+  freq_masks: 2
+  time_masks: 10
+  freq_width: 27
+  time_width: 0.05
+encoder:
+  _target_: nemo.collections.asr.modules.ConformerEncoder
+  feat_in: 64
+  feat_out: -1
+  n_layers: 16
+  d_model: 768
+  subsampling: striding
+  subsampling_factor: 4
+  subsampling_conv_channels: 768
+  ff_expansion_factor: 4
+  self_attention_model: rel_pos
+  pos_emb_max_len: 5000
+  n_heads: 16
+  xscaling: false
+  untie_biases: true
+  conv_kernel_size: 31
+  dropout: 0.1
+  dropout_emb: 0.1
+  dropout_att: 0.1
+decoder:
+  _target_: nemo.collections.asr.modules.ConvASRDecoder
+  feat_in: 768
+  num_classes: 33
+  vocabulary:
+  - ' '
+  - а
+  - б
+  - в
+  - г
+  - д
+  - е
+  - ж
+  - з
+  - и
+  - й
+  - к
+  - л
+  - м
+  - н
+  - о
+  - п
+  - р
+  - с
+  - т
+  - у
+  - ф
+  - х
+  - ц
+  - ч
+  - ш
+  - щ
+  - ъ
+  - ы
+  - ь
+  - э
+  - ю
+  - я
+optim:
+  name: adamw
+  lr: 5.0e-05
+  betas:
+  - 0.9
+  - 0.98
+  weight_decay: 0.01
+  sched:
+    name: CosineAnnealing
+    warmup_steps: 10000
+    warmup_ratio: null
+    min_lr: 1.0e-07
+nemo_version: 1.12.0
+decoding:
+  strategy: greedy
+  preserve_alignments: null
+  compute_timestamps: null
+  word_seperator: ' '
+  ctc_timestamp_type: all
+  batch_dim_index: 0
+  greedy:
+    preserve_alignments: false
+    compute_timestamps: false

GigaAM-CTC/ctc_model_weights.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6813e52607414d6006ac30a392087cb8d716afce7e0319a38bcb744ba741d2dc
+size 968535213

GigaAM-Emo/emo_model_config.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+id2name:
+  - 'angry'
+  - 'sad'
+  - 'neutral'
+  - 'positive'
+feature_extractor:
+  _target_: torch.nn.Sequential
+  _args_:
+    - _target_: torchaudio.transforms.MelSpectrogram
+      sample_rate: 16000
+      n_fft: 400
+      win_length: 400
+      hop_length: 160
+      n_mels: 64
+    - _target_: __main__.SpecScaler
+encoder:
+  _target_: nemo.collections.asr.modules.ConformerEncoder
+  feat_in: 64
+  feat_out: -1
+  n_layers: 16
+  d_model: 768
+  subsampling: striding
+  subsampling_factor: 4
+  subsampling_conv_channels: 768
+  ff_expansion_factor: 4
+  self_attention_model: rel_pos
+  pos_emb_max_len: 5000
+  n_heads: 16
+  xscaling: false
+  untie_biases: true
+  conv_kernel_size: 31
+classification_head:
+  _target_: torch.nn.Linear
+  in_features: 768
+  out_features: 4

GigaAM-Emo/emo_model_weights.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a8530d7573e0f0cd78c48c91345bd67c09a8eb4b15913baab77590140b9ecb0
+size 968409626

GigaAM-RNNT/rnnt_model_config.yaml ADDED Viewed

	@@ -0,0 +1,109 @@

+model_class: enc_dec_rnnt_bpe
+sample_rate: 16000
+log_prediction: true
+model_defaults:
+  enc_hidden: 768
+  pred_hidden: 320
+  join_hidden: 320
+preprocessor:
+  _target_: __main__.AudioToMelSpectrogramPreprocessor
+  sample_rate: 16000
+  n_fft: 400
+  n_window_size: 400
+  window_size: null
+  n_window_stride: 160
+  window_stride: null
+  features: 64
+  dither: 0.0
+  preemph: null
+  log: true
+  log_zero_guard_type: clamp
+  normalize: null
+  pad_to: 0
+  mel_norm: null
+  window: hann
+  log_zero_guard_value: 1e-9
+tokenizer:
+  dir: tokenizer_all_sets/
+  type: bpe
+validation_ds:
+  shuffle: False
+  manifest_filepath: null
+encoder:
+  _target_: nemo.collections.asr.modules.ConformerEncoder
+  feat_in: 64
+  feat_out: -1
+  n_layers: 16
+  d_model: 768
+  subsampling: striding
+  subsampling_factor: 4
+  subsampling_conv_channels: 768
+  ff_expansion_factor: 4
+  self_attention_model: rel_pos
+  pos_emb_max_len: 5000
+  n_heads: 16
+  xscaling: false
+  untie_biases: true
+  conv_kernel_size: 31
+  dropout: 0.1
+  dropout_emb: 0.1
+  dropout_att: 0.1
+decoder:
+  _target_: nemo.collections.asr.modules.RNNTDecoder
+  normalization_mode: null
+  random_state_sampling: false
+  blank_as_pad: true
+  vocab_size: 512
+  prednet:
+    pred_hidden: 320
+    pred_rnn_layers: 1
+    t_max: null
+    dropout: 0.0
+joint:
+  _target_: nemo.collections.asr.modules.RNNTJoint
+  log_softmax: null
+  fuse_loss_wer: false
+  fused_batch_size: 1
+  jointnet:
+    joint_hidden: 320
+    activation: relu
+    dropout: 0.0
+    encoder_hidden: 768
+optim:
+  name: adamw
+  lr: 5.0e-05
+  betas:
+  - 0.9
+  - 0.98
+  weight_decay: 0.01
+  sched:
+    name: CosineAnnealing
+    warmup_steps: 10000
+    warmup_ratio: null
+    min_lr: 1.0e-07
+nemo_version: 1.12.0
+decoding:
+  strategy: greedy_batch
+  preserve_alignments: false
+  greedy:
+    max_symbols: 3
+  beam:
+    beam_size: 5
+    score_norm: true
+loss:
+  loss_name: default
+  mwer: false
+  rnnt_reduction: mean_batch
+  wer_coef: false
+  subtract_mean: true
+  warprnnt_numba_kwargs:
+    fastemit_lambda: 0.0
+    clamp: -1.0
+  rnnt_weight: 0.1
+  unique_hyp: true

GigaAM-RNNT/rnnt_model_weights.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9311712a085aba1b103c325f4965faa7b32e950bf0b724720103a94d204d2a9
+size 974419733

GigaAM/encoder_config.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+feature_extractor:
+  _target_: torch.nn.Sequential
+  _args_:
+    - _target_: torchaudio.transforms.MelSpectrogram
+      sample_rate: 16000
+      n_fft: 400
+      win_length: 400
+      hop_length: 160
+      n_mels: 64
+    - _target_: __main__.SpecScaler
+encoder:
+  _target_: nemo.collections.asr.modules.ConformerEncoder
+  feat_in: 64
+  feat_out: -1
+  n_layers: 16
+  d_model: 768
+  subsampling: striding
+  subsampling_factor: 4
+  subsampling_conv_channels: 768
+  ff_expansion_factor: 4
+  self_attention_model: rel_pos
+  pos_emb_max_len: 5000
+  n_heads: 16
+  xscaling: false
+  untie_biases: true
+  conv_kernel_size: 31

GigaAM/ssl_model_weights.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fea2e9cee640c931a159667c9f1d82519e789087966ed412c77c0b7e69a35073
+size 968385941

README.md ADDED Viewed

	@@ -0,0 +1,84 @@

+# GigaAM: the family of open-source acoustic models for speech processing
+![plot](./gigaam_scheme.svg)
+## Table of contents
+* [GigaAM](#gigaam)
+* [GigaAM for Speech Recognition](#gigaam-for-speech-recognition)
+  * [GigaAM-CTC](#gigaam-ctc)
+  * [GigaAM-RNNT](#gigaam-rnnt)
+* [GigaAM-Emo](#gigaam-emo)
+* [Links](#links)
+## GigaAM
+GigaAM (**Giga** **A**coustic **M**odel) is a [Conformer](https://arxiv.org/pdf/2005.08100.pdf)-based [wav2vec2](https://arxiv.org/pdf/2006.11477.pdf) foundational model (around 240M parameters). We trained GigaAM on nearly 50 thousand hours of diversified speech audio in the Russian language.
+Resources:
+* [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ssl_model_weights.ckpt)
+* [Encoder config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/encoder_config.yaml)
+* [Colab example](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_Model_Usage_Example.ipynb)
+* [Docker example](./examples/README.md)
+## GigaAM for Speech Recognition
+We fine-tuned the GigaAM encoder for Speech Recognition with two different decoders:
+* GigaAM-CTC was fine-tunined with [Connectionist Temporal Classification](https://www.cs.toronto.edu/~graves/icml_2006.pdf) and character-based tokenizer.
+* GigaAM-RNNT was fine-tuned with [RNN Transducer loss](https://arxiv.org/abs/1211.3711) and subword tokenizer.
+Both models were trained using [the NeMo toolkit](https://github.com/NVIDIA/NeMo) on publicly available Russian labeled data:
+| dataset | size, hours | weight |
+| --- | --- | --- |
+| [Golos](https://arxiv.org/pdf/2106.10161.pdf) | 1227 | 0.6 |
+| [SOVA](https://github.com/sovaai/sova-dataset) | 369 | 0.2 |
+| [Russian Common Voice](https://arxiv.org/pdf/1912.06670.pdf) | 207 | 0.1 |
+| [Russian LibriSpeech](https://arxiv.org/pdf/2012.03411.pdf) | 93 | 0.1 |
+Resources:
+* ### GigaAM-CTC:
+  * [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_weights.ckpt)
+  * [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_config.yaml)
+  * [Colab example](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_CTC_Model_Usage_Example.ipynb)
+  * [Docker example](./examples/README.md)
+* ### GigaAM-RNNT:
+  * [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_weights.ckpt)
+  * [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_config.yaml)
+  * [Colab example](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_RNNT_Model_Usage_Example.ipynb)
+  * [Docker examples](./examples/README.md)
+The following table summarizes the performance of different models in terms of Word Error Rate on open Russian datasets:
+| model | parameters | [Golos Crowd](https://arxiv.org/abs/2106.10161) | [Golos Farfield](https://arxiv.org/abs/2106.10161) | [OpenSTT Youtube](https://github.com/snakers4/open_stt) | [OpenSTT Phone calls](https://github.com/snakers4/open_stt) | [OpenSTT Audiobooks](https://github.com/snakers4/open_stt) | [Mozilla Common Voice](https://arxiv.org/pdf/1912.06670.pdf) | [Russian LibriSpeech](https://arxiv.org/pdf/2012.03411.pdf) |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| [Whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) | 1.5B | 17.4 | 14.5 | 21.1 | 31.2 | 17.0 | 5.3 | 9.0 |
+| [NVIDIA Ru-FastConformer-RNNT](https://huggingface.co/nvidia/stt_ru_fastconformer_hybrid_large_pc) | 115M | 2.6 | 6.6 | 23.8 | 32.9 | 16.4 | 2.7 | 11.6 |
+| GigaAM-CTC | 242M | 3.1 | 5.7 | 18.4 | 25.6 | 15.1| 1.7 | 8.1 |
+| GigaAM-RNNT | 243M | <span style="color:green">2.3</span> | <span style="color:green">4.4</span> | <span style="color:green">16.7</span> | <span style="color:green">22.9</span> | <span style="color:green">13.9</span> | <span style="color:green">0.9</span> | <span style="color:green">7.4</span> |
+## GigaAM-Emo
+GigaAM-Emo is an acoustic model for Emotion Recognition. We fine-tuned the GigaAM Encoder on the [Dusha](https://arxiv.org/pdf/2212.12266.pdf) dataset.
+Resources:
+* [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_weights.ckpt)
+* [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_config.yaml)
+* [Colab example](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_Emo_Model_Usage_Example.ipynb)
+* [Docker example](./examples/README.md)
+The following table summarizes the performance of different models on the [Dusha](https://arxiv.org/pdf/2212.12266.pdf) dataset:
+|  |  | Crowd |  |  | Podcast |  |
+| --- | --- | --- | --- | --- | --- | --- |
+|  | Unweighted Accuracy | Weighted Accuracy | Macro F1-score | Unweighted Accuracy | Weighted Accuracy | Macro F1-score |
+| [DUSHA](https://arxiv.org/pdf/2212.12266.pdf) baseline <br/> ([MobileNetV2](https://arxiv.org/abs/1801.04381) + [Self-Attention](https://arxiv.org/pdf/1805.08318.pdf)) | 0.83 | 0.76 | 0.77 | 0.89 | 0.53 | 0.54 |
+| [АБК](https://aij.ru/archive?albumId=2&videoId=337) ([TIM-Net](https://arxiv.org/pdf/2211.08233.pdf)) | 0.84 | 0.77 | 0.78 | <span style="color:green">0.90</span> | 0.50 | 0.55 |
+| GigaAM-Emo | <span style="color:green">0.90</span> | <span style="color:green">0.87</span> | <span style="color:green">0.84</span> | <span style="color:green">0.90</span> | <span style="color:green">0.76</span> | <span style="color:green">0.67</span> |
+## Links
+* [[habr] GigaAM: класс открытых моделей для обработки звучащей речи](https://habr.com/ru/companies/sberdevices/articles/805569)
+* [[youtube] GigaAM: Семейство акустических моделей для русского языка](https://youtu.be/PvZuTUnZa2Q?t=26442)
+* [[youtube] Speech-only Pre-training: обучение универсального аудиоэнкодера](https://www.youtube.com/watch?v=ktO4Mx6UMNk)

README_ru.md ADDED Viewed

	@@ -0,0 +1,83 @@

+# GigaAM: семейство акустических моделей для обработки звучащей речи
+![plot](./gigaam_scheme.svg)
+## Содержание
+* [GigaAM](#gigaam)
+* [GigaAM для распознавания речи](#gigaam-для-распознавания-речи)
+  * [GigaAM-CTC](#gigaam-ctc)
+  * [GigaAM-RNNT](#gigaam-rnnt)
+* [GigaAM-Emo](#gigaam-emo)
+* [Ссылки](#ссылки)
+## GigaAM
+GigaAM (**Giga** **A**coustic **M**odel) — фундаментальная акустическая модель, основанная на [Conformer](https://arxiv.org/pdf/2005.08100.pdf) энкодере (около 240M параметров). Мы предобучали GigaAM в [wav2vec2](https://arxiv.org/pdf/2006.11477.pdf) режиме на 50 тысячах часов разнообразных русскоязычных данных.
+Материалы
+* [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ssl_model_weights.ckpt)
+* [Encoder config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/encoder_config.yaml)
+* [Пример использования в colab](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_Model_Usage_Example.ipynb)
+* [Пример использования в docker](./examples/README.md)
+## GigaAM для распознавания речи
+Мы дообучали GigaAM энкодер для задачи распознавания речи с двумя разными декодерами:
+* GigaAM-CTC была дообучена с [CTC](https://www.cs.toronto.edu/~graves/icml_2006.pdf) функцией потерь и посимвольной токенизацией.
+* GigaAM-RNNT была дообучена с [RNN-T](https://arxiv.org/abs/1211.3711) функцией потерь и subword-токенизацией.
+Для обучения обеих моделей использовался [фреймворк NeMo](https://github.com/NVIDIA/NeMo) и следующие открытые данные:
+| dataset | size, hours | weight |
+| --- | --- | --- |
+| [Golos](https://arxiv.org/pdf/2106.10161.pdf) | 1227 | 0.6 |
+| [SOVA](https://github.com/sovaai/sova-dataset) | 369 | 0.2 |
+| [Russian Common Voice](https://arxiv.org/pdf/1912.06670.pdf) | 207 | 0.1 |
+| [Russian LibriSpeech](https://arxiv.org/pdf/2012.03411.pdf) | 93 | 0.1 |
+Материалы:
+* ### GigaAM-CTC:
+  * [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_weights.ckpt)
+  * [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_config.yaml)
+  * [Пример использования в colab](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_CTC_Model_Usage_Example.ipynb)
+  * [Пример использования в docker](./examples/README.md)
+* ### GigaAM-RNNT:
+  * [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_weights.ckpt)
+  * [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_config.yaml)
+  * [Пример использования в colab](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_RNNT_Model_Usage_Example.ipynb)
+  * [Пример использования в docker](./examples/README.md)
+В таблице ниже приведены оценки Word Error Rate различных моделей на открытых русскоязычных наборах данных:
+| model | parameters | [Golos Crowd](https://arxiv.org/abs/2106.10161) | [Golos Farfield](https://arxiv.org/abs/2106.10161) | [OpenSTT Youtube](https://github.com/snakers4/open_stt) | [OpenSTT Phone calls](https://github.com/snakers4/open_stt) | [OpenSTT Audiobooks](https://github.com/snakers4/open_stt) | [Mozilla Common Voice](https://arxiv.org/pdf/1912.06670.pdf) | [Russian LibriSpeech](https://arxiv.org/pdf/2012.03411.pdf) |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| [Whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) | 1.5B | 17.4 | 14.5 | 21.1 | 31.2 | 17.0 | 5.3 | 9.0 |
+| [NVIDIA Ru-FastConformer-RNNT](https://huggingface.co/nvidia/stt_ru_fastconformer_hybrid_large_pc) | 115M | 2.6 | 6.6 | 23.8 | 32.9 | 16.4 | 2.7 | 11.6 |
+| GigaAM-CTC | 242M | 3.1 | 5.7 | 18.4 | 25.6 | 15.1| 1.7 | 8.1 |
+| GigaAM-RNNT | 243M | <span style="color:green">2.3</span> | <span style="color:green">4.4</span> | <span style="color:green">16.7</span> | <span style="color:green">22.9</span> | <span style="color:green">13.9</span> | <span style="color:green">0.9</span> | <span style="color:green">7.4</span> |
+## GigaAM-Emo
+GigaAM-Emo — акустическая модель для определения эмоций. Мы доучивали GigaAM на датасете [Dusha](https://arxiv.org/pdf/2212.12266.pdf).
+Материалы:
+* [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_weights.ckpt)
+* [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_config.yaml)
+* [Пример использования в colab](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_Emo_Model_Usage_Example.ipynb)
+* [Пример использования в docker](./examples/README.md)
+В таблице ниже приведены метрики качества открытых моделей на датасете [Dusha](https://arxiv.org/pdf/2212.12266.pdf):
+|  |  | Crowd |  |  | Podcast |  |
+| --- | --- | --- | --- | --- | --- | --- |
+|  | Unweighted Accuracy | Weighted Accuracy | Macro F1-score | Unweighted Accuracy | Weighted Accuracy | Macro F1-score |
+| [DUSHA](https://arxiv.org/pdf/2212.12266.pdf) baseline <br/> ([MobileNetV2](https://arxiv.org/abs/1801.04381) + [Self-Attention](https://arxiv.org/pdf/1805.08318.pdf)) | 0.83 | 0.76 | 0.77 | 0.89 | 0.53 | 0.54 |
+| [АБК](https://aij.ru/archive?albumId=2&videoId=337) ([TIM-Net](https://arxiv.org/pdf/2211.08233.pdf)) | 0.84 | 0.77 | 0.78 | <span style="color:green">0.90</span> | 0.50 | 0.55 |
+| GigaAM-Emo | <span style="color:green">0.90</span> | <span style="color:green">0.87</span> | <span style="color:green">0.84</span> | <span style="color:green">0.90</span> | <span style="color:green">0.76</span> | <span style="color:green">0.67</span> |
+## Ссылки
+* [[habr] GigaAM: класс открытых моделей для обработки звучащей речи](https://habr.com/ru/companies/sberdevices/articles/805569)
+* [[youtube] GigaAM: Семейство акустических моделей для русского языка](https://youtu.be/PvZuTUnZa2Q?t=26442)
+* [[youtube] Speech-only Pre-training: обучение универсального аудиоэнкодера](https://www.youtube.com/watch?v=ktO4Mx6UMNk)

gigaam_scheme.svg ADDED Viewed